blob: a7db3f6f1b7b0b964b53465617d0108d3b476376 [file] [log] [blame]
David Sterbac1d7c512018-04-03 19:23:33 +02001// SPDX-License-Identifier: GPL-2.0
Chris Mason6cbd5572007-06-12 09:07:21 -04002/*
Chris Masond352ac62008-09-29 15:18:18 -04003 * Copyright (C) 2007,2008 Oracle. All rights reserved.
Chris Mason6cbd5572007-06-12 09:07:21 -04004 */
5
Chris Masona6b6e752007-10-15 16:22:39 -04006#include <linux/sched.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +09007#include <linux/slab.h>
Jan Schmidtbd989ba2012-05-16 17:18:50 +02008#include <linux/rbtree.h>
David Sterbaadf02122017-05-31 19:44:31 +02009#include <linux/mm.h>
Christoph Hellwige41d12f2021-09-20 14:33:13 +020010#include <linux/error-injection.h>
Chris Masoneb60cea2007-02-02 09:18:22 -050011#include "ctree.h"
12#include "disk-io.h"
Chris Mason7f5c1512007-03-23 15:56:19 -040013#include "transaction.h"
Chris Mason5f39d392007-10-15 16:14:19 -040014#include "print-tree.h"
Chris Mason925baed2008-06-25 16:01:30 -040015#include "locking.h"
Nikolay Borisovde37aa52018-10-30 16:43:24 +020016#include "volumes.h"
Qu Wenruof616f5c2019-01-23 15:15:17 +080017#include "qgroup.h"
Filipe Mananaf3a84cc2021-03-11 14:31:07 +000018#include "tree-mod-log.h"
Chris Mason9a8dd152007-02-23 08:38:36 -050019
Chris Masone089f052007-03-16 16:20:31 -040020static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
21 *root, struct btrfs_path *path, int level);
Omar Sandoval310712b2017-01-17 23:24:37 -080022static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root *root,
23 const struct btrfs_key *ins_key, struct btrfs_path *path,
24 int data_size, int extend);
Chris Mason5f39d392007-10-15 16:14:19 -040025static int push_node_left(struct btrfs_trans_handle *trans,
Jeff Mahoney2ff7e612016-06-22 18:54:24 -040026 struct extent_buffer *dst,
Chris Mason971a1f62008-04-24 10:54:32 -040027 struct extent_buffer *src, int empty);
Chris Mason5f39d392007-10-15 16:14:19 -040028static int balance_node_right(struct btrfs_trans_handle *trans,
Chris Mason5f39d392007-10-15 16:14:19 -040029 struct extent_buffer *dst_buf,
30 struct extent_buffer *src_buf);
Tsutomu Itohafe5fea2013-04-16 05:18:22 +000031static void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
32 int level, int slot);
Chris Masond97e63b2007-02-20 16:40:44 -050033
Johannes Thumshirnaf024ed2019-08-30 13:36:09 +020034static const struct btrfs_csums {
35 u16 size;
David Sterba59a0fcd2020-02-27 21:00:45 +010036 const char name[10];
37 const char driver[12];
Johannes Thumshirnaf024ed2019-08-30 13:36:09 +020038} btrfs_csums[] = {
39 [BTRFS_CSUM_TYPE_CRC32] = { .size = 4, .name = "crc32c" },
Johannes Thumshirn3951e7f2019-10-07 11:11:01 +020040 [BTRFS_CSUM_TYPE_XXHASH] = { .size = 8, .name = "xxhash64" },
Johannes Thumshirn3831bf02019-10-07 11:11:02 +020041 [BTRFS_CSUM_TYPE_SHA256] = { .size = 32, .name = "sha256" },
David Sterba352ae072019-10-07 11:11:02 +020042 [BTRFS_CSUM_TYPE_BLAKE2] = { .size = 32, .name = "blake2b",
43 .driver = "blake2b-256" },
Johannes Thumshirnaf024ed2019-08-30 13:36:09 +020044};
45
46int btrfs_super_csum_size(const struct btrfs_super_block *s)
47{
48 u16 t = btrfs_super_csum_type(s);
49 /*
50 * csum type is validated at mount time
51 */
52 return btrfs_csums[t].size;
53}
54
55const char *btrfs_super_csum_name(u16 csum_type)
56{
57 /* csum type is validated at mount time */
58 return btrfs_csums[csum_type].name;
59}
60
David Sterbab4e967b2019-10-08 18:41:33 +020061/*
62 * Return driver name if defined, otherwise the name that's also a valid driver
63 * name
64 */
65const char *btrfs_super_csum_driver(u16 csum_type)
66{
67 /* csum type is validated at mount time */
David Sterba59a0fcd2020-02-27 21:00:45 +010068 return btrfs_csums[csum_type].driver[0] ?
69 btrfs_csums[csum_type].driver :
David Sterbab4e967b2019-10-08 18:41:33 +020070 btrfs_csums[csum_type].name;
71}
72
David Sterba604997b2020-07-27 17:38:19 +020073size_t __attribute_const__ btrfs_get_num_csums(void)
David Sterbaf7cea562019-10-07 11:11:03 +020074{
75 return ARRAY_SIZE(btrfs_csums);
76}
77
Chris Mason2c90e5d2007-04-02 10:50:19 -040078struct btrfs_path *btrfs_alloc_path(void)
79{
Masahiro Yamadae2c89902016-09-13 04:35:52 +090080 return kmem_cache_zalloc(btrfs_path_cachep, GFP_NOFS);
Chris Mason2c90e5d2007-04-02 10:50:19 -040081}
82
Chris Masond352ac62008-09-29 15:18:18 -040083/* this also releases the path */
Chris Mason2c90e5d2007-04-02 10:50:19 -040084void btrfs_free_path(struct btrfs_path *p)
85{
Jesper Juhlff175d52010-12-25 21:22:30 +000086 if (!p)
87 return;
David Sterbab3b4aa72011-04-21 01:20:15 +020088 btrfs_release_path(p);
Chris Mason2c90e5d2007-04-02 10:50:19 -040089 kmem_cache_free(btrfs_path_cachep, p);
90}
91
Chris Masond352ac62008-09-29 15:18:18 -040092/*
93 * path release drops references on the extent buffers in the path
94 * and it drops any locks held by this path
95 *
96 * It is safe to call this on paths that no locks or extent buffers held.
97 */
David Sterbab3b4aa72011-04-21 01:20:15 +020098noinline void btrfs_release_path(struct btrfs_path *p)
Chris Masoneb60cea2007-02-02 09:18:22 -050099{
100 int i;
Chris Masona2135012008-06-25 16:01:30 -0400101
Chris Mason234b63a2007-03-13 10:46:10 -0400102 for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
Chris Mason3f157a22008-06-25 16:01:31 -0400103 p->slots[i] = 0;
Chris Masoneb60cea2007-02-02 09:18:22 -0500104 if (!p->nodes[i])
Chris Mason925baed2008-06-25 16:01:30 -0400105 continue;
106 if (p->locks[i]) {
Chris Masonbd681512011-07-16 15:23:14 -0400107 btrfs_tree_unlock_rw(p->nodes[i], p->locks[i]);
Chris Mason925baed2008-06-25 16:01:30 -0400108 p->locks[i] = 0;
109 }
Chris Mason5f39d392007-10-15 16:14:19 -0400110 free_extent_buffer(p->nodes[i]);
Chris Mason3f157a22008-06-25 16:01:31 -0400111 p->nodes[i] = NULL;
Chris Masoneb60cea2007-02-02 09:18:22 -0500112 }
113}
114
Chris Masond352ac62008-09-29 15:18:18 -0400115/*
116 * safely gets a reference on the root node of a tree. A lock
117 * is not taken, so a concurrent writer may put a different node
118 * at the root of the tree. See btrfs_lock_root_node for the
119 * looping required.
120 *
121 * The extent buffer returned by this has a reference taken, so
122 * it won't disappear. It may stop being the root of the tree
123 * at any time because there are no locks held.
124 */
Chris Mason925baed2008-06-25 16:01:30 -0400125struct extent_buffer *btrfs_root_node(struct btrfs_root *root)
126{
127 struct extent_buffer *eb;
Chris Mason240f62c2011-03-23 14:54:42 -0400128
Josef Bacik3083ee22012-03-09 16:01:49 -0500129 while (1) {
130 rcu_read_lock();
131 eb = rcu_dereference(root->node);
132
133 /*
134 * RCU really hurts here, we could free up the root node because
Nicholas D Steeves01327612016-05-19 21:18:45 -0400135 * it was COWed but we may not get the new root node yet so do
Josef Bacik3083ee22012-03-09 16:01:49 -0500136 * the inc_not_zero dance and if it doesn't work then
137 * synchronize_rcu and try again.
138 */
139 if (atomic_inc_not_zero(&eb->refs)) {
140 rcu_read_unlock();
141 break;
142 }
143 rcu_read_unlock();
144 synchronize_rcu();
145 }
Chris Mason925baed2008-06-25 16:01:30 -0400146 return eb;
147}
148
Qu Wenruo92a7cc42020-05-15 14:01:40 +0800149/*
150 * Cowonly root (not-shareable trees, everything not subvolume or reloc roots),
151 * just get put onto a simple dirty list. Transaction walks this list to make
152 * sure they get properly updated on disk.
Chris Masond352ac62008-09-29 15:18:18 -0400153 */
Chris Mason0b86a832008-03-24 15:01:56 -0400154static void add_root_to_dirty_list(struct btrfs_root *root)
155{
Jeff Mahoney0b246af2016-06-22 18:54:23 -0400156 struct btrfs_fs_info *fs_info = root->fs_info;
157
Josef Bacike7070be2014-12-16 08:54:43 -0800158 if (test_bit(BTRFS_ROOT_DIRTY, &root->state) ||
159 !test_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state))
160 return;
161
Jeff Mahoney0b246af2016-06-22 18:54:23 -0400162 spin_lock(&fs_info->trans_lock);
Josef Bacike7070be2014-12-16 08:54:43 -0800163 if (!test_and_set_bit(BTRFS_ROOT_DIRTY, &root->state)) {
164 /* Want the extent tree to be the last on the list */
Misono Tomohiro4fd786e2018-08-06 14:25:24 +0900165 if (root->root_key.objectid == BTRFS_EXTENT_TREE_OBJECTID)
Josef Bacike7070be2014-12-16 08:54:43 -0800166 list_move_tail(&root->dirty_list,
Jeff Mahoney0b246af2016-06-22 18:54:23 -0400167 &fs_info->dirty_cowonly_roots);
Josef Bacike7070be2014-12-16 08:54:43 -0800168 else
169 list_move(&root->dirty_list,
Jeff Mahoney0b246af2016-06-22 18:54:23 -0400170 &fs_info->dirty_cowonly_roots);
Chris Mason0b86a832008-03-24 15:01:56 -0400171 }
Jeff Mahoney0b246af2016-06-22 18:54:23 -0400172 spin_unlock(&fs_info->trans_lock);
Chris Mason0b86a832008-03-24 15:01:56 -0400173}
174
Chris Masond352ac62008-09-29 15:18:18 -0400175/*
176 * used by snapshot creation to make a copy of a root for a tree with
177 * a given objectid. The buffer with the new root node is returned in
178 * cow_ret, and this func returns zero on success or a negative error code.
179 */
Chris Masonbe20aa92007-12-17 20:14:01 -0500180int btrfs_copy_root(struct btrfs_trans_handle *trans,
181 struct btrfs_root *root,
182 struct extent_buffer *buf,
183 struct extent_buffer **cow_ret, u64 new_root_objectid)
184{
Jeff Mahoney0b246af2016-06-22 18:54:23 -0400185 struct btrfs_fs_info *fs_info = root->fs_info;
Chris Masonbe20aa92007-12-17 20:14:01 -0500186 struct extent_buffer *cow;
Chris Masonbe20aa92007-12-17 20:14:01 -0500187 int ret = 0;
188 int level;
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400189 struct btrfs_disk_key disk_key;
Chris Masonbe20aa92007-12-17 20:14:01 -0500190
Qu Wenruo92a7cc42020-05-15 14:01:40 +0800191 WARN_ON(test_bit(BTRFS_ROOT_SHAREABLE, &root->state) &&
Jeff Mahoney0b246af2016-06-22 18:54:23 -0400192 trans->transid != fs_info->running_transaction->transid);
Qu Wenruo92a7cc42020-05-15 14:01:40 +0800193 WARN_ON(test_bit(BTRFS_ROOT_SHAREABLE, &root->state) &&
Miao Xie27cdeb72014-04-02 19:51:05 +0800194 trans->transid != root->last_trans);
Chris Masonbe20aa92007-12-17 20:14:01 -0500195
196 level = btrfs_header_level(buf);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400197 if (level == 0)
198 btrfs_item_key(buf, &disk_key, 0);
199 else
200 btrfs_node_key(buf, &disk_key, 0);
Zheng Yan31840ae2008-09-23 13:14:14 -0400201
David Sterba4d75f8a2014-06-15 01:54:12 +0200202 cow = btrfs_alloc_tree_block(trans, root, 0, new_root_objectid,
Josef Bacikcf6f34a2020-08-20 11:46:07 -0400203 &disk_key, level, buf->start, 0,
204 BTRFS_NESTING_NEW_ROOT);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400205 if (IS_ERR(cow))
Chris Masonbe20aa92007-12-17 20:14:01 -0500206 return PTR_ERR(cow);
207
David Sterba58e80122016-11-08 18:30:31 +0100208 copy_extent_buffer_full(cow, buf);
Chris Masonbe20aa92007-12-17 20:14:01 -0500209 btrfs_set_header_bytenr(cow, cow->start);
210 btrfs_set_header_generation(cow, trans->transid);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400211 btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV);
212 btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN |
213 BTRFS_HEADER_FLAG_RELOC);
214 if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID)
215 btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC);
216 else
217 btrfs_set_header_owner(cow, new_root_objectid);
Chris Masonbe20aa92007-12-17 20:14:01 -0500218
Nikolay Borisovde37aa52018-10-30 16:43:24 +0200219 write_extent_buffer_fsid(cow, fs_info->fs_devices->metadata_uuid);
Yan Zheng2b820322008-11-17 21:11:30 -0500220
Chris Masonbe20aa92007-12-17 20:14:01 -0500221 WARN_ON(btrfs_header_generation(buf) > trans->transid);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400222 if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID)
Josef Bacike339a6b2014-07-02 10:54:25 -0700223 ret = btrfs_inc_ref(trans, root, cow, 1);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400224 else
Josef Bacike339a6b2014-07-02 10:54:25 -0700225 ret = btrfs_inc_ref(trans, root, cow, 0);
Josef Bacik867ed322021-01-14 14:02:46 -0500226 if (ret) {
Filipe Manana72c99252021-02-04 14:35:44 +0000227 btrfs_tree_unlock(cow);
228 free_extent_buffer(cow);
Josef Bacik867ed322021-01-14 14:02:46 -0500229 btrfs_abort_transaction(trans, ret);
Chris Masonbe20aa92007-12-17 20:14:01 -0500230 return ret;
Josef Bacik867ed322021-01-14 14:02:46 -0500231 }
Chris Masonbe20aa92007-12-17 20:14:01 -0500232
233 btrfs_mark_buffer_dirty(cow);
234 *cow_ret = cow;
235 return 0;
236}
237
Chris Masond352ac62008-09-29 15:18:18 -0400238/*
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400239 * check if the tree block can be shared by multiple trees
240 */
241int btrfs_block_can_be_shared(struct btrfs_root *root,
242 struct extent_buffer *buf)
243{
244 /*
Qu Wenruo92a7cc42020-05-15 14:01:40 +0800245 * Tree blocks not in shareable trees and tree roots are never shared.
246 * If a block was allocated after the last snapshot and the block was
247 * not allocated by tree relocation, we know the block is not shared.
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400248 */
Qu Wenruo92a7cc42020-05-15 14:01:40 +0800249 if (test_bit(BTRFS_ROOT_SHAREABLE, &root->state) &&
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400250 buf != root->node && buf != root->commit_root &&
251 (btrfs_header_generation(buf) <=
252 btrfs_root_last_snapshot(&root->root_item) ||
253 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)))
254 return 1;
Nikolay Borisova79865c2018-06-21 09:45:00 +0300255
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400256 return 0;
257}
258
259static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
260 struct btrfs_root *root,
261 struct extent_buffer *buf,
Yan, Zhengf0486c62010-05-16 10:46:25 -0400262 struct extent_buffer *cow,
263 int *last_ref)
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400264{
Jeff Mahoney0b246af2016-06-22 18:54:23 -0400265 struct btrfs_fs_info *fs_info = root->fs_info;
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400266 u64 refs;
267 u64 owner;
268 u64 flags;
269 u64 new_flags = 0;
270 int ret;
271
272 /*
273 * Backrefs update rules:
274 *
275 * Always use full backrefs for extent pointers in tree block
276 * allocated by tree relocation.
277 *
278 * If a shared tree block is no longer referenced by its owner
279 * tree (btrfs_header_owner(buf) == root->root_key.objectid),
280 * use full backrefs for extent pointers in tree block.
281 *
282 * If a tree block is been relocating
283 * (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID),
284 * use full backrefs for extent pointers in tree block.
285 * The reason for this is some operations (such as drop tree)
286 * are only allowed for blocks use full backrefs.
287 */
288
289 if (btrfs_block_can_be_shared(root, buf)) {
Jeff Mahoney2ff7e612016-06-22 18:54:24 -0400290 ret = btrfs_lookup_extent_info(trans, fs_info, buf->start,
Josef Bacik3173a182013-03-07 14:22:04 -0500291 btrfs_header_level(buf), 1,
292 &refs, &flags);
Mark Fashehbe1a5562011-08-08 13:20:18 -0700293 if (ret)
294 return ret;
Mark Fashehe5df9572011-08-29 14:17:04 -0700295 if (refs == 0) {
296 ret = -EROFS;
Jeff Mahoney0b246af2016-06-22 18:54:23 -0400297 btrfs_handle_fs_error(fs_info, ret, NULL);
Mark Fashehe5df9572011-08-29 14:17:04 -0700298 return ret;
299 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400300 } else {
301 refs = 1;
302 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
303 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
304 flags = BTRFS_BLOCK_FLAG_FULL_BACKREF;
305 else
306 flags = 0;
307 }
308
309 owner = btrfs_header_owner(buf);
310 BUG_ON(owner == BTRFS_TREE_RELOC_OBJECTID &&
311 !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF));
312
313 if (refs > 1) {
314 if ((owner == root->root_key.objectid ||
315 root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) &&
316 !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) {
Josef Bacike339a6b2014-07-02 10:54:25 -0700317 ret = btrfs_inc_ref(trans, root, buf, 1);
Jeff Mahoney692826b2017-11-21 13:58:49 -0500318 if (ret)
319 return ret;
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400320
321 if (root->root_key.objectid ==
322 BTRFS_TREE_RELOC_OBJECTID) {
Josef Bacike339a6b2014-07-02 10:54:25 -0700323 ret = btrfs_dec_ref(trans, root, buf, 0);
Jeff Mahoney692826b2017-11-21 13:58:49 -0500324 if (ret)
325 return ret;
Josef Bacike339a6b2014-07-02 10:54:25 -0700326 ret = btrfs_inc_ref(trans, root, cow, 1);
Jeff Mahoney692826b2017-11-21 13:58:49 -0500327 if (ret)
328 return ret;
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400329 }
330 new_flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
331 } else {
332
333 if (root->root_key.objectid ==
334 BTRFS_TREE_RELOC_OBJECTID)
Josef Bacike339a6b2014-07-02 10:54:25 -0700335 ret = btrfs_inc_ref(trans, root, cow, 1);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400336 else
Josef Bacike339a6b2014-07-02 10:54:25 -0700337 ret = btrfs_inc_ref(trans, root, cow, 0);
Jeff Mahoney692826b2017-11-21 13:58:49 -0500338 if (ret)
339 return ret;
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400340 }
341 if (new_flags != 0) {
Josef Bacikb1c79e02013-05-09 13:49:30 -0400342 int level = btrfs_header_level(buf);
343
David Sterba42c9d0b2019-03-20 11:54:13 +0100344 ret = btrfs_set_disk_extent_flags(trans, buf,
Josef Bacikb1c79e02013-05-09 13:49:30 -0400345 new_flags, level, 0);
Mark Fashehbe1a5562011-08-08 13:20:18 -0700346 if (ret)
347 return ret;
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400348 }
349 } else {
350 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
351 if (root->root_key.objectid ==
352 BTRFS_TREE_RELOC_OBJECTID)
Josef Bacike339a6b2014-07-02 10:54:25 -0700353 ret = btrfs_inc_ref(trans, root, cow, 1);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400354 else
Josef Bacike339a6b2014-07-02 10:54:25 -0700355 ret = btrfs_inc_ref(trans, root, cow, 0);
Jeff Mahoney692826b2017-11-21 13:58:49 -0500356 if (ret)
357 return ret;
Josef Bacike339a6b2014-07-02 10:54:25 -0700358 ret = btrfs_dec_ref(trans, root, buf, 1);
Jeff Mahoney692826b2017-11-21 13:58:49 -0500359 if (ret)
360 return ret;
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400361 }
David Sterba6a884d7d2019-03-20 14:30:02 +0100362 btrfs_clean_tree_block(buf);
Yan, Zhengf0486c62010-05-16 10:46:25 -0400363 *last_ref = 1;
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400364 }
365 return 0;
366}
367
368/*
Chris Masond3977122009-01-05 21:25:51 -0500369 * does the dirty work in cow of a single block. The parent block (if
370 * supplied) is updated to point to the new cow copy. The new buffer is marked
371 * dirty and returned locked. If you modify the block it needs to be marked
372 * dirty again.
Chris Masond352ac62008-09-29 15:18:18 -0400373 *
374 * search_start -- an allocation hint for the new block
375 *
Chris Masond3977122009-01-05 21:25:51 -0500376 * empty_size -- a hint that you plan on doing more cow. This is the size in
377 * bytes the allocator should try to find free next to the block it returns.
378 * This is just a hint and may be ignored by the allocator.
Chris Masond352ac62008-09-29 15:18:18 -0400379 */
Chris Masond3977122009-01-05 21:25:51 -0500380static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
Chris Mason5f39d392007-10-15 16:14:19 -0400381 struct btrfs_root *root,
382 struct extent_buffer *buf,
383 struct extent_buffer *parent, int parent_slot,
384 struct extent_buffer **cow_ret,
Josef Bacik9631e4c2020-08-20 11:46:03 -0400385 u64 search_start, u64 empty_size,
386 enum btrfs_lock_nesting nest)
Chris Mason6702ed42007-08-07 16:15:09 -0400387{
Jeff Mahoney0b246af2016-06-22 18:54:23 -0400388 struct btrfs_fs_info *fs_info = root->fs_info;
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400389 struct btrfs_disk_key disk_key;
Chris Mason5f39d392007-10-15 16:14:19 -0400390 struct extent_buffer *cow;
Mark Fashehbe1a5562011-08-08 13:20:18 -0700391 int level, ret;
Yan, Zhengf0486c62010-05-16 10:46:25 -0400392 int last_ref = 0;
Chris Mason925baed2008-06-25 16:01:30 -0400393 int unlock_orig = 0;
Goldwyn Rodrigues0f5053e2016-09-22 14:11:34 -0500394 u64 parent_start = 0;
Chris Mason6702ed42007-08-07 16:15:09 -0400395
Chris Mason925baed2008-06-25 16:01:30 -0400396 if (*cow_ret == buf)
397 unlock_orig = 1;
398
Filipe Manana49d0c642021-09-22 10:36:45 +0100399 btrfs_assert_tree_write_locked(buf);
Chris Mason925baed2008-06-25 16:01:30 -0400400
Qu Wenruo92a7cc42020-05-15 14:01:40 +0800401 WARN_ON(test_bit(BTRFS_ROOT_SHAREABLE, &root->state) &&
Jeff Mahoney0b246af2016-06-22 18:54:23 -0400402 trans->transid != fs_info->running_transaction->transid);
Qu Wenruo92a7cc42020-05-15 14:01:40 +0800403 WARN_ON(test_bit(BTRFS_ROOT_SHAREABLE, &root->state) &&
Miao Xie27cdeb72014-04-02 19:51:05 +0800404 trans->transid != root->last_trans);
Chris Mason5f39d392007-10-15 16:14:19 -0400405
Chris Mason7bb86312007-12-11 09:25:06 -0500406 level = btrfs_header_level(buf);
Zheng Yan31840ae2008-09-23 13:14:14 -0400407
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400408 if (level == 0)
409 btrfs_item_key(buf, &disk_key, 0);
410 else
411 btrfs_node_key(buf, &disk_key, 0);
412
Goldwyn Rodrigues0f5053e2016-09-22 14:11:34 -0500413 if ((root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) && parent)
414 parent_start = parent->start;
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400415
Filipe Manana79bd3712021-06-29 14:43:06 +0100416 cow = btrfs_alloc_tree_block(trans, root, parent_start,
417 root->root_key.objectid, &disk_key, level,
418 search_start, empty_size, nest);
Chris Mason6702ed42007-08-07 16:15:09 -0400419 if (IS_ERR(cow))
420 return PTR_ERR(cow);
421
Chris Masonb4ce94d2009-02-04 09:25:08 -0500422 /* cow is set to blocking by btrfs_init_new_buffer */
423
David Sterba58e80122016-11-08 18:30:31 +0100424 copy_extent_buffer_full(cow, buf);
Chris Masondb945352007-10-15 16:15:53 -0400425 btrfs_set_header_bytenr(cow, cow->start);
Chris Mason5f39d392007-10-15 16:14:19 -0400426 btrfs_set_header_generation(cow, trans->transid);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400427 btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV);
428 btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN |
429 BTRFS_HEADER_FLAG_RELOC);
430 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
431 btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC);
432 else
433 btrfs_set_header_owner(cow, root->root_key.objectid);
Chris Mason6702ed42007-08-07 16:15:09 -0400434
Nikolay Borisovde37aa52018-10-30 16:43:24 +0200435 write_extent_buffer_fsid(cow, fs_info->fs_devices->metadata_uuid);
Yan Zheng2b820322008-11-17 21:11:30 -0500436
Mark Fashehbe1a5562011-08-08 13:20:18 -0700437 ret = update_ref_for_cow(trans, root, buf, cow, &last_ref);
Mark Fashehb68dc2a2011-08-29 14:30:39 -0700438 if (ret) {
Josef Bacik572c83a2020-09-29 08:53:54 -0400439 btrfs_tree_unlock(cow);
440 free_extent_buffer(cow);
Jeff Mahoney66642832016-06-10 18:19:25 -0400441 btrfs_abort_transaction(trans, ret);
Mark Fashehb68dc2a2011-08-29 14:30:39 -0700442 return ret;
443 }
Zheng Yan1a40e232008-09-26 10:09:34 -0400444
Qu Wenruo92a7cc42020-05-15 14:01:40 +0800445 if (test_bit(BTRFS_ROOT_SHAREABLE, &root->state)) {
Josef Bacik83d4cfd2013-08-30 15:09:51 -0400446 ret = btrfs_reloc_cow_block(trans, root, buf, cow);
Zhaolei93314e32015-08-06 21:56:58 +0800447 if (ret) {
Josef Bacik572c83a2020-09-29 08:53:54 -0400448 btrfs_tree_unlock(cow);
449 free_extent_buffer(cow);
Jeff Mahoney66642832016-06-10 18:19:25 -0400450 btrfs_abort_transaction(trans, ret);
Josef Bacik83d4cfd2013-08-30 15:09:51 -0400451 return ret;
Zhaolei93314e32015-08-06 21:56:58 +0800452 }
Josef Bacik83d4cfd2013-08-30 15:09:51 -0400453 }
Yan, Zheng3fd0a552010-05-16 10:49:59 -0400454
Chris Mason6702ed42007-08-07 16:15:09 -0400455 if (buf == root->node) {
Chris Mason925baed2008-06-25 16:01:30 -0400456 WARN_ON(parent && parent != buf);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400457 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
458 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
459 parent_start = buf->start;
Chris Mason925baed2008-06-25 16:01:30 -0400460
David Sterba67439da2019-10-08 13:28:47 +0200461 atomic_inc(&cow->refs);
Filipe Manana406808a2021-03-11 14:31:08 +0000462 ret = btrfs_tree_mod_log_insert_root(root->node, cow, true);
David Sterbad9d19a02018-03-05 16:35:29 +0100463 BUG_ON(ret < 0);
Chris Mason240f62c2011-03-23 14:54:42 -0400464 rcu_assign_pointer(root->node, cow);
Chris Mason925baed2008-06-25 16:01:30 -0400465
Filipe Manana7a1636082021-12-13 08:45:12 +0000466 btrfs_free_tree_block(trans, btrfs_root_id(root), buf,
467 parent_start, last_ref);
Chris Mason5f39d392007-10-15 16:14:19 -0400468 free_extent_buffer(buf);
Chris Mason0b86a832008-03-24 15:01:56 -0400469 add_root_to_dirty_list(root);
Chris Mason6702ed42007-08-07 16:15:09 -0400470 } else {
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400471 WARN_ON(trans->transid != btrfs_header_generation(parent));
Filipe Mananaf3a84cc2021-03-11 14:31:07 +0000472 btrfs_tree_mod_log_insert_key(parent, parent_slot,
473 BTRFS_MOD_LOG_KEY_REPLACE, GFP_NOFS);
Chris Mason5f39d392007-10-15 16:14:19 -0400474 btrfs_set_node_blockptr(parent, parent_slot,
Chris Masondb945352007-10-15 16:15:53 -0400475 cow->start);
Chris Mason74493f72007-12-11 09:25:06 -0500476 btrfs_set_node_ptr_generation(parent, parent_slot,
477 trans->transid);
Chris Mason6702ed42007-08-07 16:15:09 -0400478 btrfs_mark_buffer_dirty(parent);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000479 if (last_ref) {
Filipe Mananaf3a84cc2021-03-11 14:31:07 +0000480 ret = btrfs_tree_mod_log_free_eb(buf);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000481 if (ret) {
Josef Bacik572c83a2020-09-29 08:53:54 -0400482 btrfs_tree_unlock(cow);
483 free_extent_buffer(cow);
Jeff Mahoney66642832016-06-10 18:19:25 -0400484 btrfs_abort_transaction(trans, ret);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000485 return ret;
486 }
487 }
Filipe Manana7a1636082021-12-13 08:45:12 +0000488 btrfs_free_tree_block(trans, btrfs_root_id(root), buf,
489 parent_start, last_ref);
Chris Mason6702ed42007-08-07 16:15:09 -0400490 }
Chris Mason925baed2008-06-25 16:01:30 -0400491 if (unlock_orig)
492 btrfs_tree_unlock(buf);
Josef Bacik3083ee22012-03-09 16:01:49 -0500493 free_extent_buffer_stale(buf);
Chris Mason6702ed42007-08-07 16:15:09 -0400494 btrfs_mark_buffer_dirty(cow);
495 *cow_ret = cow;
496 return 0;
497}
498
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400499static inline int should_cow_block(struct btrfs_trans_handle *trans,
500 struct btrfs_root *root,
501 struct extent_buffer *buf)
502{
Jeff Mahoneyf5ee5c92016-06-21 09:52:41 -0400503 if (btrfs_is_testing(root->fs_info))
Josef Bacikfaa2dbf2014-05-07 17:06:09 -0400504 return 0;
David Sterbafccb84c2014-09-29 23:53:21 +0200505
David Sterbad1980132018-03-16 02:39:40 +0100506 /* Ensure we can see the FORCE_COW bit */
507 smp_mb__before_atomic();
Liu Bof1ebcc72011-11-14 20:48:06 -0500508
509 /*
510 * We do not need to cow a block if
511 * 1) this block is not created or changed in this transaction;
512 * 2) this block does not belong to TREE_RELOC tree;
513 * 3) the root is not forced COW.
514 *
515 * What is forced COW:
Nicholas D Steeves01327612016-05-19 21:18:45 -0400516 * when we create snapshot during committing the transaction,
Andrea Gelmini52042d82018-11-28 12:05:13 +0100517 * after we've finished copying src root, we must COW the shared
Liu Bof1ebcc72011-11-14 20:48:06 -0500518 * block to ensure the metadata consistency.
519 */
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400520 if (btrfs_header_generation(buf) == trans->transid &&
521 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN) &&
522 !(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID &&
Liu Bof1ebcc72011-11-14 20:48:06 -0500523 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)) &&
Miao Xie27cdeb72014-04-02 19:51:05 +0800524 !test_bit(BTRFS_ROOT_FORCE_COW, &root->state))
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400525 return 0;
526 return 1;
527}
528
Chris Masond352ac62008-09-29 15:18:18 -0400529/*
530 * cows a single block, see __btrfs_cow_block for the real work.
Nicholas D Steeves01327612016-05-19 21:18:45 -0400531 * This version of it has extra checks so that a block isn't COWed more than
Chris Masond352ac62008-09-29 15:18:18 -0400532 * once per transaction, as long as it hasn't been written yet
533 */
Chris Masond3977122009-01-05 21:25:51 -0500534noinline int btrfs_cow_block(struct btrfs_trans_handle *trans,
Chris Mason5f39d392007-10-15 16:14:19 -0400535 struct btrfs_root *root, struct extent_buffer *buf,
536 struct extent_buffer *parent, int parent_slot,
Josef Bacik9631e4c2020-08-20 11:46:03 -0400537 struct extent_buffer **cow_ret,
538 enum btrfs_lock_nesting nest)
Chris Mason02217ed2007-03-02 16:08:05 -0500539{
Jeff Mahoney0b246af2016-06-22 18:54:23 -0400540 struct btrfs_fs_info *fs_info = root->fs_info;
Chris Mason6702ed42007-08-07 16:15:09 -0400541 u64 search_start;
Chris Masonf510cfe2007-10-15 16:14:48 -0400542 int ret;
Chris Masondc17ff82008-01-08 15:46:30 -0500543
Josef Bacik83354f02018-11-30 11:52:13 -0500544 if (test_bit(BTRFS_ROOT_DELETING, &root->state))
545 btrfs_err(fs_info,
546 "COW'ing blocks on a fs root that's being dropped");
547
Jeff Mahoney0b246af2016-06-22 18:54:23 -0400548 if (trans->transaction != fs_info->running_transaction)
Julia Lawall31b1a2b2012-11-03 10:58:34 +0000549 WARN(1, KERN_CRIT "trans %llu running %llu\n",
Geert Uytterhoevenc1c9ff72013-08-20 13:20:07 +0200550 trans->transid,
Jeff Mahoney0b246af2016-06-22 18:54:23 -0400551 fs_info->running_transaction->transid);
Julia Lawall31b1a2b2012-11-03 10:58:34 +0000552
Jeff Mahoney0b246af2016-06-22 18:54:23 -0400553 if (trans->transid != fs_info->generation)
Julia Lawall31b1a2b2012-11-03 10:58:34 +0000554 WARN(1, KERN_CRIT "trans %llu running %llu\n",
Jeff Mahoney0b246af2016-06-22 18:54:23 -0400555 trans->transid, fs_info->generation);
Chris Masondc17ff82008-01-08 15:46:30 -0500556
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400557 if (!should_cow_block(trans, root, buf)) {
Chris Mason02217ed2007-03-02 16:08:05 -0500558 *cow_ret = buf;
559 return 0;
560 }
Chris Masonc4876852009-02-04 09:24:25 -0500561
Byongho Leeee221842015-12-15 01:42:10 +0900562 search_start = buf->start & ~((u64)SZ_1G - 1);
Chris Masonb4ce94d2009-02-04 09:25:08 -0500563
Qu Wenruof616f5c2019-01-23 15:15:17 +0800564 /*
565 * Before CoWing this block for later modification, check if it's
566 * the subtree root and do the delayed subtree trace if needed.
567 *
568 * Also We don't care about the error, as it's handled internally.
569 */
570 btrfs_qgroup_trace_subtree_after_cow(trans, root, buf);
Chris Masonf510cfe2007-10-15 16:14:48 -0400571 ret = __btrfs_cow_block(trans, root, buf, parent,
Josef Bacik9631e4c2020-08-20 11:46:03 -0400572 parent_slot, cow_ret, search_start, 0, nest);
liubo1abe9b82011-03-24 11:18:59 +0000573
574 trace_btrfs_cow_block(root, buf, *cow_ret);
575
Chris Masonf510cfe2007-10-15 16:14:48 -0400576 return ret;
Chris Mason6702ed42007-08-07 16:15:09 -0400577}
Josef Bacikf75e2b72020-12-16 11:18:43 -0500578ALLOW_ERROR_INJECTION(btrfs_cow_block, ERRNO);
Chris Mason6702ed42007-08-07 16:15:09 -0400579
Chris Masond352ac62008-09-29 15:18:18 -0400580/*
581 * helper function for defrag to decide if two blocks pointed to by a
582 * node are actually close by
583 */
Chris Mason6b800532007-10-15 16:17:34 -0400584static int close_blocks(u64 blocknr, u64 other, u32 blocksize)
Chris Mason6702ed42007-08-07 16:15:09 -0400585{
Chris Mason6b800532007-10-15 16:17:34 -0400586 if (blocknr < other && other - (blocknr + blocksize) < 32768)
Chris Mason6702ed42007-08-07 16:15:09 -0400587 return 1;
Chris Mason6b800532007-10-15 16:17:34 -0400588 if (blocknr > other && blocknr - (other + blocksize) < 32768)
Chris Mason6702ed42007-08-07 16:15:09 -0400589 return 1;
Chris Mason02217ed2007-03-02 16:08:05 -0500590 return 0;
591}
592
David Sterbace6ef5a2020-06-08 16:06:07 +0200593#ifdef __LITTLE_ENDIAN
594
595/*
596 * Compare two keys, on little-endian the disk order is same as CPU order and
597 * we can avoid the conversion.
598 */
599static int comp_keys(const struct btrfs_disk_key *disk_key,
600 const struct btrfs_key *k2)
601{
602 const struct btrfs_key *k1 = (const struct btrfs_key *)disk_key;
603
604 return btrfs_comp_cpu_keys(k1, k2);
605}
606
607#else
608
Chris Mason081e9572007-11-06 10:26:24 -0500609/*
610 * compare two keys in a memcmp fashion
611 */
Omar Sandoval310712b2017-01-17 23:24:37 -0800612static int comp_keys(const struct btrfs_disk_key *disk,
613 const struct btrfs_key *k2)
Chris Mason081e9572007-11-06 10:26:24 -0500614{
615 struct btrfs_key k1;
616
617 btrfs_disk_key_to_cpu(&k1, disk);
618
Diego Calleja20736ab2009-07-24 11:06:52 -0400619 return btrfs_comp_cpu_keys(&k1, k2);
Chris Mason081e9572007-11-06 10:26:24 -0500620}
David Sterbace6ef5a2020-06-08 16:06:07 +0200621#endif
Chris Mason081e9572007-11-06 10:26:24 -0500622
Josef Bacikf3465ca2008-11-12 14:19:50 -0500623/*
624 * same as comp_keys only with two btrfs_key's
625 */
David Sterbae1f60a62019-10-01 19:57:39 +0200626int __pure btrfs_comp_cpu_keys(const struct btrfs_key *k1, const struct btrfs_key *k2)
Josef Bacikf3465ca2008-11-12 14:19:50 -0500627{
628 if (k1->objectid > k2->objectid)
629 return 1;
630 if (k1->objectid < k2->objectid)
631 return -1;
632 if (k1->type > k2->type)
633 return 1;
634 if (k1->type < k2->type)
635 return -1;
636 if (k1->offset > k2->offset)
637 return 1;
638 if (k1->offset < k2->offset)
639 return -1;
640 return 0;
641}
Chris Mason081e9572007-11-06 10:26:24 -0500642
Chris Masond352ac62008-09-29 15:18:18 -0400643/*
644 * this is used by the defrag code to go through all the
645 * leaves pointed to by a node and reallocate them so that
646 * disk order is close to key order
647 */
Chris Mason6702ed42007-08-07 16:15:09 -0400648int btrfs_realloc_node(struct btrfs_trans_handle *trans,
Chris Mason5f39d392007-10-15 16:14:19 -0400649 struct btrfs_root *root, struct extent_buffer *parent,
Eric Sandeende78b512013-01-31 18:21:12 +0000650 int start_slot, u64 *last_ret,
Chris Masona6b6e752007-10-15 16:22:39 -0400651 struct btrfs_key *progress)
Chris Mason6702ed42007-08-07 16:15:09 -0400652{
Jeff Mahoney0b246af2016-06-22 18:54:23 -0400653 struct btrfs_fs_info *fs_info = root->fs_info;
Chris Mason6b800532007-10-15 16:17:34 -0400654 struct extent_buffer *cur;
Chris Mason6702ed42007-08-07 16:15:09 -0400655 u64 blocknr;
Chris Masone9d0b132007-08-10 14:06:19 -0400656 u64 search_start = *last_ret;
657 u64 last_block = 0;
Chris Mason6702ed42007-08-07 16:15:09 -0400658 u64 other;
659 u32 parent_nritems;
Chris Mason6702ed42007-08-07 16:15:09 -0400660 int end_slot;
661 int i;
662 int err = 0;
Chris Mason6b800532007-10-15 16:17:34 -0400663 u32 blocksize;
Chris Mason081e9572007-11-06 10:26:24 -0500664 int progress_passed = 0;
665 struct btrfs_disk_key disk_key;
Chris Mason6702ed42007-08-07 16:15:09 -0400666
Jeff Mahoney0b246af2016-06-22 18:54:23 -0400667 WARN_ON(trans->transaction != fs_info->running_transaction);
668 WARN_ON(trans->transid != fs_info->generation);
Chris Mason86479a02007-09-10 19:58:16 -0400669
Chris Mason6b800532007-10-15 16:17:34 -0400670 parent_nritems = btrfs_header_nritems(parent);
Jeff Mahoney0b246af2016-06-22 18:54:23 -0400671 blocksize = fs_info->nodesize;
Filipe Manana5dfe2be2015-02-23 19:48:52 +0000672 end_slot = parent_nritems - 1;
Chris Mason6702ed42007-08-07 16:15:09 -0400673
Filipe Manana5dfe2be2015-02-23 19:48:52 +0000674 if (parent_nritems <= 1)
Chris Mason6702ed42007-08-07 16:15:09 -0400675 return 0;
676
Filipe Manana5dfe2be2015-02-23 19:48:52 +0000677 for (i = start_slot; i <= end_slot; i++) {
Chris Mason6702ed42007-08-07 16:15:09 -0400678 int close = 1;
Chris Masona6b6e752007-10-15 16:22:39 -0400679
Chris Mason081e9572007-11-06 10:26:24 -0500680 btrfs_node_key(parent, &disk_key, i);
681 if (!progress_passed && comp_keys(&disk_key, progress) < 0)
682 continue;
683
684 progress_passed = 1;
Chris Mason6b800532007-10-15 16:17:34 -0400685 blocknr = btrfs_node_blockptr(parent, i);
Chris Masone9d0b132007-08-10 14:06:19 -0400686 if (last_block == 0)
687 last_block = blocknr;
Chris Mason5708b952007-10-25 15:43:18 -0400688
Chris Mason6702ed42007-08-07 16:15:09 -0400689 if (i > 0) {
Chris Mason6b800532007-10-15 16:17:34 -0400690 other = btrfs_node_blockptr(parent, i - 1);
691 close = close_blocks(blocknr, other, blocksize);
Chris Mason6702ed42007-08-07 16:15:09 -0400692 }
Filipe Manana5dfe2be2015-02-23 19:48:52 +0000693 if (!close && i < end_slot) {
Chris Mason6b800532007-10-15 16:17:34 -0400694 other = btrfs_node_blockptr(parent, i + 1);
695 close = close_blocks(blocknr, other, blocksize);
Chris Mason6702ed42007-08-07 16:15:09 -0400696 }
Chris Masone9d0b132007-08-10 14:06:19 -0400697 if (close) {
698 last_block = blocknr;
Chris Mason6702ed42007-08-07 16:15:09 -0400699 continue;
Chris Masone9d0b132007-08-10 14:06:19 -0400700 }
Chris Mason6702ed42007-08-07 16:15:09 -0400701
Josef Bacik206983b2020-11-05 10:45:10 -0500702 cur = btrfs_read_node_slot(parent, i);
703 if (IS_ERR(cur))
704 return PTR_ERR(cur);
Chris Masone9d0b132007-08-10 14:06:19 -0400705 if (search_start == 0)
Chris Mason6b800532007-10-15 16:17:34 -0400706 search_start = last_block;
Chris Masone9d0b132007-08-10 14:06:19 -0400707
Chris Masone7a84562008-06-25 16:01:31 -0400708 btrfs_tree_lock(cur);
Chris Mason6b800532007-10-15 16:17:34 -0400709 err = __btrfs_cow_block(trans, root, cur, parent, i,
Chris Masone7a84562008-06-25 16:01:31 -0400710 &cur, search_start,
Chris Mason6b800532007-10-15 16:17:34 -0400711 min(16 * blocksize,
Josef Bacik9631e4c2020-08-20 11:46:03 -0400712 (end_slot - i) * blocksize),
713 BTRFS_NESTING_COW);
Yan252c38f2007-08-29 09:11:44 -0400714 if (err) {
Chris Masone7a84562008-06-25 16:01:31 -0400715 btrfs_tree_unlock(cur);
Chris Mason6b800532007-10-15 16:17:34 -0400716 free_extent_buffer(cur);
Chris Mason6702ed42007-08-07 16:15:09 -0400717 break;
Yan252c38f2007-08-29 09:11:44 -0400718 }
Chris Masone7a84562008-06-25 16:01:31 -0400719 search_start = cur->start;
720 last_block = cur->start;
Chris Masonf2183bd2007-08-10 14:42:37 -0400721 *last_ret = search_start;
Chris Masone7a84562008-06-25 16:01:31 -0400722 btrfs_tree_unlock(cur);
723 free_extent_buffer(cur);
Chris Mason6702ed42007-08-07 16:15:09 -0400724 }
725 return err;
726}
727
Chris Mason74123bd2007-02-02 11:05:29 -0500728/*
Filipe Mananafb812122021-12-02 10:30:35 +0000729 * Search for a key in the given extent_buffer.
Chris Mason5f39d392007-10-15 16:14:19 -0400730 *
Filipe Mananafb812122021-12-02 10:30:35 +0000731 * The lower boundary for the search is specified by the slot number @low. Use a
732 * value of 0 to search over the whole extent buffer.
Chris Mason74123bd2007-02-02 11:05:29 -0500733 *
Filipe Mananafb812122021-12-02 10:30:35 +0000734 * The slot in the extent buffer is returned via @slot. If the key exists in the
735 * extent buffer, then @slot will point to the slot where the key is, otherwise
736 * it points to the slot where you would insert the key.
737 *
738 * Slot may point to the total number of items (i.e. one position beyond the last
739 * key) if the key is bigger than the last key in the extent buffer.
Chris Mason74123bd2007-02-02 11:05:29 -0500740 */
Filipe Mananafb812122021-12-02 10:30:35 +0000741static noinline int generic_bin_search(struct extent_buffer *eb, int low,
Marcos Paulo de Souza67d5e282021-07-06 15:13:25 -0300742 const struct btrfs_key *key, int *slot)
Chris Masonbe0e5c02007-01-26 15:51:26 -0500743{
Filipe Mananafb812122021-12-02 10:30:35 +0000744 unsigned long p;
745 int item_size;
Marcos Paulo de Souza67d5e282021-07-06 15:13:25 -0300746 int high = btrfs_header_nritems(eb);
Chris Masonbe0e5c02007-01-26 15:51:26 -0500747 int ret;
David Sterba5cd17f32020-04-29 23:23:37 +0200748 const int key_size = sizeof(struct btrfs_disk_key);
Chris Masonbe0e5c02007-01-26 15:51:26 -0500749
Liu Bo5e24e9a2016-06-23 16:32:45 -0700750 if (low > high) {
751 btrfs_err(eb->fs_info,
752 "%s: low (%d) > high (%d) eb %llu owner %llu level %d",
753 __func__, low, high, eb->start,
754 btrfs_header_owner(eb), btrfs_header_level(eb));
755 return -EINVAL;
756 }
757
Filipe Mananafb812122021-12-02 10:30:35 +0000758 if (btrfs_header_level(eb) == 0) {
759 p = offsetof(struct btrfs_leaf, items);
760 item_size = sizeof(struct btrfs_item);
761 } else {
762 p = offsetof(struct btrfs_node, ptrs);
763 item_size = sizeof(struct btrfs_key_ptr);
764 }
765
Chris Masond3977122009-01-05 21:25:51 -0500766 while (low < high) {
David Sterba5cd17f32020-04-29 23:23:37 +0200767 unsigned long oip;
768 unsigned long offset;
769 struct btrfs_disk_key *tmp;
770 struct btrfs_disk_key unaligned;
771 int mid;
772
Chris Masonbe0e5c02007-01-26 15:51:26 -0500773 mid = (low + high) / 2;
Chris Mason5f39d392007-10-15 16:14:19 -0400774 offset = p + mid * item_size;
David Sterba5cd17f32020-04-29 23:23:37 +0200775 oip = offset_in_page(offset);
Chris Mason5f39d392007-10-15 16:14:19 -0400776
David Sterba5cd17f32020-04-29 23:23:37 +0200777 if (oip + key_size <= PAGE_SIZE) {
Qu Wenruo884b07d2020-12-02 14:48:04 +0800778 const unsigned long idx = get_eb_page_index(offset);
David Sterba5cd17f32020-04-29 23:23:37 +0200779 char *kaddr = page_address(eb->pages[idx]);
Chris Mason934d3752008-12-08 16:43:10 -0500780
Qu Wenruo884b07d2020-12-02 14:48:04 +0800781 oip = get_eb_offset_in_page(eb, offset);
David Sterba5cd17f32020-04-29 23:23:37 +0200782 tmp = (struct btrfs_disk_key *)(kaddr + oip);
Chris Mason5f39d392007-10-15 16:14:19 -0400783 } else {
David Sterba5cd17f32020-04-29 23:23:37 +0200784 read_extent_buffer(eb, &unaligned, offset, key_size);
785 tmp = &unaligned;
Chris Mason5f39d392007-10-15 16:14:19 -0400786 }
David Sterba5cd17f32020-04-29 23:23:37 +0200787
Chris Masonbe0e5c02007-01-26 15:51:26 -0500788 ret = comp_keys(tmp, key);
789
790 if (ret < 0)
791 low = mid + 1;
792 else if (ret > 0)
793 high = mid;
794 else {
795 *slot = mid;
796 return 0;
797 }
798 }
799 *slot = low;
800 return 1;
801}
802
Chris Mason97571fd2007-02-24 13:39:08 -0500803/*
Filipe Mananafb812122021-12-02 10:30:35 +0000804 * Simple binary search on an extent buffer. Works for both leaves and nodes, and
805 * always searches over the whole range of keys (slot 0 to slot 'nritems - 1').
Chris Mason97571fd2007-02-24 13:39:08 -0500806 */
Nikolay Borisova74b35e2017-12-08 16:27:43 +0200807int btrfs_bin_search(struct extent_buffer *eb, const struct btrfs_key *key,
Qu Wenruoe3b83362020-04-17 15:08:21 +0800808 int *slot)
Chris Masonbe0e5c02007-01-26 15:51:26 -0500809{
Filipe Mananafb812122021-12-02 10:30:35 +0000810 return generic_bin_search(eb, 0, key, slot);
Chris Masonbe0e5c02007-01-26 15:51:26 -0500811}
812
Yan, Zhengf0486c62010-05-16 10:46:25 -0400813static void root_add_used(struct btrfs_root *root, u32 size)
814{
815 spin_lock(&root->accounting_lock);
816 btrfs_set_root_used(&root->root_item,
817 btrfs_root_used(&root->root_item) + size);
818 spin_unlock(&root->accounting_lock);
819}
820
821static void root_sub_used(struct btrfs_root *root, u32 size)
822{
823 spin_lock(&root->accounting_lock);
824 btrfs_set_root_used(&root->root_item,
825 btrfs_root_used(&root->root_item) - size);
826 spin_unlock(&root->accounting_lock);
827}
828
Chris Masond352ac62008-09-29 15:18:18 -0400829/* given a node and slot number, this reads the blocks it points to. The
830 * extent buffer is returned with a reference taken (but unlocked).
Chris Masond352ac62008-09-29 15:18:18 -0400831 */
David Sterba4b231ae2019-08-21 19:16:27 +0200832struct extent_buffer *btrfs_read_node_slot(struct extent_buffer *parent,
833 int slot)
Chris Masonbb803952007-03-01 12:04:21 -0500834{
Chris Masonca7a79a2008-05-12 12:59:19 -0400835 int level = btrfs_header_level(parent);
Josef Bacik416bc652013-04-23 14:17:42 -0400836 struct extent_buffer *eb;
Qu Wenruo581c1762018-03-29 09:08:11 +0800837 struct btrfs_key first_key;
Josef Bacik416bc652013-04-23 14:17:42 -0400838
Liu Bofb770ae2016-07-05 12:10:14 -0700839 if (slot < 0 || slot >= btrfs_header_nritems(parent))
840 return ERR_PTR(-ENOENT);
Chris Masonca7a79a2008-05-12 12:59:19 -0400841
842 BUG_ON(level == 0);
843
Qu Wenruo581c1762018-03-29 09:08:11 +0800844 btrfs_node_key_to_cpu(parent, &first_key, slot);
David Sterbad0d20b02019-03-20 14:54:01 +0100845 eb = read_tree_block(parent->fs_info, btrfs_node_blockptr(parent, slot),
Josef Bacik1b7ec852020-11-05 10:45:18 -0500846 btrfs_header_owner(parent),
Qu Wenruo581c1762018-03-29 09:08:11 +0800847 btrfs_node_ptr_generation(parent, slot),
848 level - 1, &first_key);
Liu Bofb770ae2016-07-05 12:10:14 -0700849 if (!IS_ERR(eb) && !extent_buffer_uptodate(eb)) {
850 free_extent_buffer(eb);
851 eb = ERR_PTR(-EIO);
Josef Bacik416bc652013-04-23 14:17:42 -0400852 }
853
854 return eb;
Chris Masonbb803952007-03-01 12:04:21 -0500855}
856
Chris Masond352ac62008-09-29 15:18:18 -0400857/*
858 * node level balancing, used to make sure nodes are in proper order for
859 * item deletion. We balance from the top down, so we have to make sure
860 * that a deletion won't leave an node completely empty later on.
861 */
Chris Masone02119d2008-09-05 16:13:11 -0400862static noinline int balance_level(struct btrfs_trans_handle *trans,
Chris Mason98ed5172008-01-03 10:01:48 -0500863 struct btrfs_root *root,
864 struct btrfs_path *path, int level)
Chris Masonbb803952007-03-01 12:04:21 -0500865{
Jeff Mahoney0b246af2016-06-22 18:54:23 -0400866 struct btrfs_fs_info *fs_info = root->fs_info;
Chris Mason5f39d392007-10-15 16:14:19 -0400867 struct extent_buffer *right = NULL;
868 struct extent_buffer *mid;
869 struct extent_buffer *left = NULL;
870 struct extent_buffer *parent = NULL;
Chris Masonbb803952007-03-01 12:04:21 -0500871 int ret = 0;
872 int wret;
873 int pslot;
Chris Masonbb803952007-03-01 12:04:21 -0500874 int orig_slot = path->slots[level];
Chris Mason79f95c82007-03-01 15:16:26 -0500875 u64 orig_ptr;
Chris Masonbb803952007-03-01 12:04:21 -0500876
Liu Bo98e6b1e2018-09-12 06:06:23 +0800877 ASSERT(level > 0);
Chris Masonbb803952007-03-01 12:04:21 -0500878
Chris Mason5f39d392007-10-15 16:14:19 -0400879 mid = path->nodes[level];
Chris Masonb4ce94d2009-02-04 09:25:08 -0500880
Josef Bacikac5887c2020-08-20 11:46:10 -0400881 WARN_ON(path->locks[level] != BTRFS_WRITE_LOCK);
Chris Mason7bb86312007-12-11 09:25:06 -0500882 WARN_ON(btrfs_header_generation(mid) != trans->transid);
883
Chris Mason1d4f8a02007-03-13 09:28:32 -0400884 orig_ptr = btrfs_node_blockptr(mid, orig_slot);
Chris Mason79f95c82007-03-01 15:16:26 -0500885
Li Zefana05a9bb2011-09-06 16:55:34 +0800886 if (level < BTRFS_MAX_LEVEL - 1) {
Chris Mason5f39d392007-10-15 16:14:19 -0400887 parent = path->nodes[level + 1];
Li Zefana05a9bb2011-09-06 16:55:34 +0800888 pslot = path->slots[level + 1];
889 }
Chris Masonbb803952007-03-01 12:04:21 -0500890
Chris Mason40689472007-03-17 14:29:23 -0400891 /*
892 * deal with the case where there is only one pointer in the root
893 * by promoting the node below to a root
894 */
Chris Mason5f39d392007-10-15 16:14:19 -0400895 if (!parent) {
896 struct extent_buffer *child;
Chris Masonbb803952007-03-01 12:04:21 -0500897
Chris Mason5f39d392007-10-15 16:14:19 -0400898 if (btrfs_header_nritems(mid) != 1)
Chris Masonbb803952007-03-01 12:04:21 -0500899 return 0;
900
901 /* promote the child to a root */
David Sterba4b231ae2019-08-21 19:16:27 +0200902 child = btrfs_read_node_slot(mid, 0);
Liu Bofb770ae2016-07-05 12:10:14 -0700903 if (IS_ERR(child)) {
904 ret = PTR_ERR(child);
Jeff Mahoney0b246af2016-06-22 18:54:23 -0400905 btrfs_handle_fs_error(fs_info, ret, NULL);
Mark Fasheh305a26a2011-09-01 11:27:57 -0700906 goto enospc;
907 }
908
Chris Mason925baed2008-06-25 16:01:30 -0400909 btrfs_tree_lock(child);
Josef Bacik9631e4c2020-08-20 11:46:03 -0400910 ret = btrfs_cow_block(trans, root, child, mid, 0, &child,
911 BTRFS_NESTING_COW);
Yan, Zhengf0486c62010-05-16 10:46:25 -0400912 if (ret) {
913 btrfs_tree_unlock(child);
914 free_extent_buffer(child);
915 goto enospc;
916 }
Yan2f375ab2008-02-01 14:58:07 -0500917
Filipe Manana406808a2021-03-11 14:31:08 +0000918 ret = btrfs_tree_mod_log_insert_root(root->node, child, true);
David Sterbad9d19a02018-03-05 16:35:29 +0100919 BUG_ON(ret < 0);
Chris Mason240f62c2011-03-23 14:54:42 -0400920 rcu_assign_pointer(root->node, child);
Chris Mason925baed2008-06-25 16:01:30 -0400921
Chris Mason0b86a832008-03-24 15:01:56 -0400922 add_root_to_dirty_list(root);
Chris Mason925baed2008-06-25 16:01:30 -0400923 btrfs_tree_unlock(child);
Chris Masonb4ce94d2009-02-04 09:25:08 -0500924
Chris Mason925baed2008-06-25 16:01:30 -0400925 path->locks[level] = 0;
Chris Masonbb803952007-03-01 12:04:21 -0500926 path->nodes[level] = NULL;
David Sterba6a884d7d2019-03-20 14:30:02 +0100927 btrfs_clean_tree_block(mid);
Chris Mason925baed2008-06-25 16:01:30 -0400928 btrfs_tree_unlock(mid);
Chris Masonbb803952007-03-01 12:04:21 -0500929 /* once for the path */
Chris Mason5f39d392007-10-15 16:14:19 -0400930 free_extent_buffer(mid);
Yan, Zhengf0486c62010-05-16 10:46:25 -0400931
932 root_sub_used(root, mid->len);
Filipe Manana7a1636082021-12-13 08:45:12 +0000933 btrfs_free_tree_block(trans, btrfs_root_id(root), mid, 0, 1);
Chris Masonbb803952007-03-01 12:04:21 -0500934 /* once for the root ptr */
Josef Bacik3083ee22012-03-09 16:01:49 -0500935 free_extent_buffer_stale(mid);
Yan, Zhengf0486c62010-05-16 10:46:25 -0400936 return 0;
Chris Masonbb803952007-03-01 12:04:21 -0500937 }
Chris Mason5f39d392007-10-15 16:14:19 -0400938 if (btrfs_header_nritems(mid) >
Jeff Mahoney0b246af2016-06-22 18:54:23 -0400939 BTRFS_NODEPTRS_PER_BLOCK(fs_info) / 4)
Chris Masonbb803952007-03-01 12:04:21 -0500940 return 0;
941
David Sterba4b231ae2019-08-21 19:16:27 +0200942 left = btrfs_read_node_slot(parent, pslot - 1);
Liu Bofb770ae2016-07-05 12:10:14 -0700943 if (IS_ERR(left))
944 left = NULL;
945
Chris Mason5f39d392007-10-15 16:14:19 -0400946 if (left) {
Josef Bacikbf774672020-08-20 11:46:04 -0400947 __btrfs_tree_lock(left, BTRFS_NESTING_LEFT);
Chris Mason5f39d392007-10-15 16:14:19 -0400948 wret = btrfs_cow_block(trans, root, left,
Josef Bacik9631e4c2020-08-20 11:46:03 -0400949 parent, pslot - 1, &left,
Josef Bacikbf59a5a2020-08-20 11:46:05 -0400950 BTRFS_NESTING_LEFT_COW);
Chris Mason54aa1f42007-06-22 14:16:25 -0400951 if (wret) {
952 ret = wret;
953 goto enospc;
954 }
Chris Mason2cc58cf2007-08-27 16:49:44 -0400955 }
Liu Bofb770ae2016-07-05 12:10:14 -0700956
David Sterba4b231ae2019-08-21 19:16:27 +0200957 right = btrfs_read_node_slot(parent, pslot + 1);
Liu Bofb770ae2016-07-05 12:10:14 -0700958 if (IS_ERR(right))
959 right = NULL;
960
Chris Mason5f39d392007-10-15 16:14:19 -0400961 if (right) {
Josef Bacikbf774672020-08-20 11:46:04 -0400962 __btrfs_tree_lock(right, BTRFS_NESTING_RIGHT);
Chris Mason5f39d392007-10-15 16:14:19 -0400963 wret = btrfs_cow_block(trans, root, right,
Josef Bacik9631e4c2020-08-20 11:46:03 -0400964 parent, pslot + 1, &right,
Josef Bacikbf59a5a2020-08-20 11:46:05 -0400965 BTRFS_NESTING_RIGHT_COW);
Chris Mason2cc58cf2007-08-27 16:49:44 -0400966 if (wret) {
967 ret = wret;
968 goto enospc;
969 }
970 }
971
972 /* first, try to make some room in the middle buffer */
Chris Mason5f39d392007-10-15 16:14:19 -0400973 if (left) {
974 orig_slot += btrfs_header_nritems(left);
David Sterbad30a6682019-03-20 14:16:45 +0100975 wret = push_node_left(trans, left, mid, 1);
Chris Mason79f95c82007-03-01 15:16:26 -0500976 if (wret < 0)
977 ret = wret;
Chris Masonbb803952007-03-01 12:04:21 -0500978 }
Chris Mason79f95c82007-03-01 15:16:26 -0500979
980 /*
981 * then try to empty the right most buffer into the middle
982 */
Chris Mason5f39d392007-10-15 16:14:19 -0400983 if (right) {
David Sterbad30a6682019-03-20 14:16:45 +0100984 wret = push_node_left(trans, mid, right, 1);
Chris Mason54aa1f42007-06-22 14:16:25 -0400985 if (wret < 0 && wret != -ENOSPC)
Chris Mason79f95c82007-03-01 15:16:26 -0500986 ret = wret;
Chris Mason5f39d392007-10-15 16:14:19 -0400987 if (btrfs_header_nritems(right) == 0) {
David Sterba6a884d7d2019-03-20 14:30:02 +0100988 btrfs_clean_tree_block(right);
Chris Mason925baed2008-06-25 16:01:30 -0400989 btrfs_tree_unlock(right);
Tsutomu Itohafe5fea2013-04-16 05:18:22 +0000990 del_ptr(root, path, level + 1, pslot + 1);
Yan, Zhengf0486c62010-05-16 10:46:25 -0400991 root_sub_used(root, right->len);
Filipe Manana7a1636082021-12-13 08:45:12 +0000992 btrfs_free_tree_block(trans, btrfs_root_id(root), right,
993 0, 1);
Josef Bacik3083ee22012-03-09 16:01:49 -0500994 free_extent_buffer_stale(right);
Yan, Zhengf0486c62010-05-16 10:46:25 -0400995 right = NULL;
Chris Masonbb803952007-03-01 12:04:21 -0500996 } else {
Chris Mason5f39d392007-10-15 16:14:19 -0400997 struct btrfs_disk_key right_key;
998 btrfs_node_key(right, &right_key, 0);
Filipe Mananaf3a84cc2021-03-11 14:31:07 +0000999 ret = btrfs_tree_mod_log_insert_key(parent, pslot + 1,
1000 BTRFS_MOD_LOG_KEY_REPLACE, GFP_NOFS);
David Sterba0e82bcf2018-03-05 16:16:54 +01001001 BUG_ON(ret < 0);
Chris Mason5f39d392007-10-15 16:14:19 -04001002 btrfs_set_node_key(parent, &right_key, pslot + 1);
1003 btrfs_mark_buffer_dirty(parent);
Chris Masonbb803952007-03-01 12:04:21 -05001004 }
1005 }
Chris Mason5f39d392007-10-15 16:14:19 -04001006 if (btrfs_header_nritems(mid) == 1) {
Chris Mason79f95c82007-03-01 15:16:26 -05001007 /*
1008 * we're not allowed to leave a node with one item in the
1009 * tree during a delete. A deletion from lower in the tree
1010 * could try to delete the only pointer in this node.
1011 * So, pull some keys from the left.
1012 * There has to be a left pointer at this point because
1013 * otherwise we would have pulled some pointers from the
1014 * right
1015 */
Mark Fasheh305a26a2011-09-01 11:27:57 -07001016 if (!left) {
1017 ret = -EROFS;
Jeff Mahoney0b246af2016-06-22 18:54:23 -04001018 btrfs_handle_fs_error(fs_info, ret, NULL);
Mark Fasheh305a26a2011-09-01 11:27:57 -07001019 goto enospc;
1020 }
David Sterba55d32ed2019-03-20 14:18:06 +01001021 wret = balance_node_right(trans, mid, left);
Chris Mason54aa1f42007-06-22 14:16:25 -04001022 if (wret < 0) {
Chris Mason79f95c82007-03-01 15:16:26 -05001023 ret = wret;
Chris Mason54aa1f42007-06-22 14:16:25 -04001024 goto enospc;
1025 }
Chris Masonbce4eae2008-04-24 14:42:46 -04001026 if (wret == 1) {
David Sterbad30a6682019-03-20 14:16:45 +01001027 wret = push_node_left(trans, left, mid, 1);
Chris Masonbce4eae2008-04-24 14:42:46 -04001028 if (wret < 0)
1029 ret = wret;
1030 }
Chris Mason79f95c82007-03-01 15:16:26 -05001031 BUG_ON(wret == 1);
1032 }
Chris Mason5f39d392007-10-15 16:14:19 -04001033 if (btrfs_header_nritems(mid) == 0) {
David Sterba6a884d7d2019-03-20 14:30:02 +01001034 btrfs_clean_tree_block(mid);
Chris Mason925baed2008-06-25 16:01:30 -04001035 btrfs_tree_unlock(mid);
Tsutomu Itohafe5fea2013-04-16 05:18:22 +00001036 del_ptr(root, path, level + 1, pslot);
Yan, Zhengf0486c62010-05-16 10:46:25 -04001037 root_sub_used(root, mid->len);
Filipe Manana7a1636082021-12-13 08:45:12 +00001038 btrfs_free_tree_block(trans, btrfs_root_id(root), mid, 0, 1);
Josef Bacik3083ee22012-03-09 16:01:49 -05001039 free_extent_buffer_stale(mid);
Yan, Zhengf0486c62010-05-16 10:46:25 -04001040 mid = NULL;
Chris Mason79f95c82007-03-01 15:16:26 -05001041 } else {
1042 /* update the parent key to reflect our changes */
Chris Mason5f39d392007-10-15 16:14:19 -04001043 struct btrfs_disk_key mid_key;
1044 btrfs_node_key(mid, &mid_key, 0);
Filipe Mananaf3a84cc2021-03-11 14:31:07 +00001045 ret = btrfs_tree_mod_log_insert_key(parent, pslot,
1046 BTRFS_MOD_LOG_KEY_REPLACE, GFP_NOFS);
David Sterba0e82bcf2018-03-05 16:16:54 +01001047 BUG_ON(ret < 0);
Chris Mason5f39d392007-10-15 16:14:19 -04001048 btrfs_set_node_key(parent, &mid_key, pslot);
1049 btrfs_mark_buffer_dirty(parent);
Chris Mason79f95c82007-03-01 15:16:26 -05001050 }
Chris Masonbb803952007-03-01 12:04:21 -05001051
Chris Mason79f95c82007-03-01 15:16:26 -05001052 /* update the path */
Chris Mason5f39d392007-10-15 16:14:19 -04001053 if (left) {
1054 if (btrfs_header_nritems(left) > orig_slot) {
David Sterba67439da2019-10-08 13:28:47 +02001055 atomic_inc(&left->refs);
Chris Mason925baed2008-06-25 16:01:30 -04001056 /* left was locked after cow */
Chris Mason5f39d392007-10-15 16:14:19 -04001057 path->nodes[level] = left;
Chris Masonbb803952007-03-01 12:04:21 -05001058 path->slots[level + 1] -= 1;
1059 path->slots[level] = orig_slot;
Chris Mason925baed2008-06-25 16:01:30 -04001060 if (mid) {
1061 btrfs_tree_unlock(mid);
Chris Mason5f39d392007-10-15 16:14:19 -04001062 free_extent_buffer(mid);
Chris Mason925baed2008-06-25 16:01:30 -04001063 }
Chris Masonbb803952007-03-01 12:04:21 -05001064 } else {
Chris Mason5f39d392007-10-15 16:14:19 -04001065 orig_slot -= btrfs_header_nritems(left);
Chris Masonbb803952007-03-01 12:04:21 -05001066 path->slots[level] = orig_slot;
1067 }
1068 }
Chris Mason79f95c82007-03-01 15:16:26 -05001069 /* double check we haven't messed things up */
Chris Masone20d96d2007-03-22 12:13:20 -04001070 if (orig_ptr !=
Chris Mason5f39d392007-10-15 16:14:19 -04001071 btrfs_node_blockptr(path->nodes[level], path->slots[level]))
Chris Mason79f95c82007-03-01 15:16:26 -05001072 BUG();
Chris Mason54aa1f42007-06-22 14:16:25 -04001073enospc:
Chris Mason925baed2008-06-25 16:01:30 -04001074 if (right) {
1075 btrfs_tree_unlock(right);
Chris Mason5f39d392007-10-15 16:14:19 -04001076 free_extent_buffer(right);
Chris Mason925baed2008-06-25 16:01:30 -04001077 }
1078 if (left) {
1079 if (path->nodes[level] != left)
1080 btrfs_tree_unlock(left);
Chris Mason5f39d392007-10-15 16:14:19 -04001081 free_extent_buffer(left);
Chris Mason925baed2008-06-25 16:01:30 -04001082 }
Chris Masonbb803952007-03-01 12:04:21 -05001083 return ret;
1084}
1085
Chris Masond352ac62008-09-29 15:18:18 -04001086/* Node balancing for insertion. Here we only split or push nodes around
1087 * when they are completely full. This is also done top down, so we
1088 * have to be pessimistic.
1089 */
Chris Masond3977122009-01-05 21:25:51 -05001090static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
Chris Mason98ed5172008-01-03 10:01:48 -05001091 struct btrfs_root *root,
1092 struct btrfs_path *path, int level)
Chris Masone66f7092007-04-20 13:16:02 -04001093{
Jeff Mahoney0b246af2016-06-22 18:54:23 -04001094 struct btrfs_fs_info *fs_info = root->fs_info;
Chris Mason5f39d392007-10-15 16:14:19 -04001095 struct extent_buffer *right = NULL;
1096 struct extent_buffer *mid;
1097 struct extent_buffer *left = NULL;
1098 struct extent_buffer *parent = NULL;
Chris Masone66f7092007-04-20 13:16:02 -04001099 int ret = 0;
1100 int wret;
1101 int pslot;
1102 int orig_slot = path->slots[level];
Chris Masone66f7092007-04-20 13:16:02 -04001103
1104 if (level == 0)
1105 return 1;
1106
Chris Mason5f39d392007-10-15 16:14:19 -04001107 mid = path->nodes[level];
Chris Mason7bb86312007-12-11 09:25:06 -05001108 WARN_ON(btrfs_header_generation(mid) != trans->transid);
Chris Masone66f7092007-04-20 13:16:02 -04001109
Li Zefana05a9bb2011-09-06 16:55:34 +08001110 if (level < BTRFS_MAX_LEVEL - 1) {
Chris Mason5f39d392007-10-15 16:14:19 -04001111 parent = path->nodes[level + 1];
Li Zefana05a9bb2011-09-06 16:55:34 +08001112 pslot = path->slots[level + 1];
1113 }
Chris Masone66f7092007-04-20 13:16:02 -04001114
Chris Mason5f39d392007-10-15 16:14:19 -04001115 if (!parent)
Chris Masone66f7092007-04-20 13:16:02 -04001116 return 1;
Chris Masone66f7092007-04-20 13:16:02 -04001117
David Sterba4b231ae2019-08-21 19:16:27 +02001118 left = btrfs_read_node_slot(parent, pslot - 1);
Liu Bofb770ae2016-07-05 12:10:14 -07001119 if (IS_ERR(left))
1120 left = NULL;
Chris Masone66f7092007-04-20 13:16:02 -04001121
1122 /* first, try to make some room in the middle buffer */
Chris Mason5f39d392007-10-15 16:14:19 -04001123 if (left) {
Chris Masone66f7092007-04-20 13:16:02 -04001124 u32 left_nr;
Chris Mason925baed2008-06-25 16:01:30 -04001125
Josef Bacikbf774672020-08-20 11:46:04 -04001126 __btrfs_tree_lock(left, BTRFS_NESTING_LEFT);
Chris Masonb4ce94d2009-02-04 09:25:08 -05001127
Chris Mason5f39d392007-10-15 16:14:19 -04001128 left_nr = btrfs_header_nritems(left);
Jeff Mahoney0b246af2016-06-22 18:54:23 -04001129 if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(fs_info) - 1) {
Chris Mason33ade1f2007-04-20 13:48:57 -04001130 wret = 1;
1131 } else {
Chris Mason5f39d392007-10-15 16:14:19 -04001132 ret = btrfs_cow_block(trans, root, left, parent,
Josef Bacik9631e4c2020-08-20 11:46:03 -04001133 pslot - 1, &left,
Josef Bacikbf59a5a2020-08-20 11:46:05 -04001134 BTRFS_NESTING_LEFT_COW);
Chris Mason54aa1f42007-06-22 14:16:25 -04001135 if (ret)
1136 wret = 1;
1137 else {
David Sterbad30a6682019-03-20 14:16:45 +01001138 wret = push_node_left(trans, left, mid, 0);
Chris Mason54aa1f42007-06-22 14:16:25 -04001139 }
Chris Mason33ade1f2007-04-20 13:48:57 -04001140 }
Chris Masone66f7092007-04-20 13:16:02 -04001141 if (wret < 0)
1142 ret = wret;
1143 if (wret == 0) {
Chris Mason5f39d392007-10-15 16:14:19 -04001144 struct btrfs_disk_key disk_key;
Chris Masone66f7092007-04-20 13:16:02 -04001145 orig_slot += left_nr;
Chris Mason5f39d392007-10-15 16:14:19 -04001146 btrfs_node_key(mid, &disk_key, 0);
Filipe Mananaf3a84cc2021-03-11 14:31:07 +00001147 ret = btrfs_tree_mod_log_insert_key(parent, pslot,
1148 BTRFS_MOD_LOG_KEY_REPLACE, GFP_NOFS);
David Sterba0e82bcf2018-03-05 16:16:54 +01001149 BUG_ON(ret < 0);
Chris Mason5f39d392007-10-15 16:14:19 -04001150 btrfs_set_node_key(parent, &disk_key, pslot);
1151 btrfs_mark_buffer_dirty(parent);
1152 if (btrfs_header_nritems(left) > orig_slot) {
1153 path->nodes[level] = left;
Chris Masone66f7092007-04-20 13:16:02 -04001154 path->slots[level + 1] -= 1;
1155 path->slots[level] = orig_slot;
Chris Mason925baed2008-06-25 16:01:30 -04001156 btrfs_tree_unlock(mid);
Chris Mason5f39d392007-10-15 16:14:19 -04001157 free_extent_buffer(mid);
Chris Masone66f7092007-04-20 13:16:02 -04001158 } else {
1159 orig_slot -=
Chris Mason5f39d392007-10-15 16:14:19 -04001160 btrfs_header_nritems(left);
Chris Masone66f7092007-04-20 13:16:02 -04001161 path->slots[level] = orig_slot;
Chris Mason925baed2008-06-25 16:01:30 -04001162 btrfs_tree_unlock(left);
Chris Mason5f39d392007-10-15 16:14:19 -04001163 free_extent_buffer(left);
Chris Masone66f7092007-04-20 13:16:02 -04001164 }
Chris Masone66f7092007-04-20 13:16:02 -04001165 return 0;
1166 }
Chris Mason925baed2008-06-25 16:01:30 -04001167 btrfs_tree_unlock(left);
Chris Mason5f39d392007-10-15 16:14:19 -04001168 free_extent_buffer(left);
Chris Masone66f7092007-04-20 13:16:02 -04001169 }
David Sterba4b231ae2019-08-21 19:16:27 +02001170 right = btrfs_read_node_slot(parent, pslot + 1);
Liu Bofb770ae2016-07-05 12:10:14 -07001171 if (IS_ERR(right))
1172 right = NULL;
Chris Masone66f7092007-04-20 13:16:02 -04001173
1174 /*
1175 * then try to empty the right most buffer into the middle
1176 */
Chris Mason5f39d392007-10-15 16:14:19 -04001177 if (right) {
Chris Mason33ade1f2007-04-20 13:48:57 -04001178 u32 right_nr;
Chris Masonb4ce94d2009-02-04 09:25:08 -05001179
Josef Bacikbf774672020-08-20 11:46:04 -04001180 __btrfs_tree_lock(right, BTRFS_NESTING_RIGHT);
Chris Masonb4ce94d2009-02-04 09:25:08 -05001181
Chris Mason5f39d392007-10-15 16:14:19 -04001182 right_nr = btrfs_header_nritems(right);
Jeff Mahoney0b246af2016-06-22 18:54:23 -04001183 if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(fs_info) - 1) {
Chris Mason33ade1f2007-04-20 13:48:57 -04001184 wret = 1;
1185 } else {
Chris Mason5f39d392007-10-15 16:14:19 -04001186 ret = btrfs_cow_block(trans, root, right,
1187 parent, pslot + 1,
Josef Bacikbf59a5a2020-08-20 11:46:05 -04001188 &right, BTRFS_NESTING_RIGHT_COW);
Chris Mason54aa1f42007-06-22 14:16:25 -04001189 if (ret)
1190 wret = 1;
1191 else {
David Sterba55d32ed2019-03-20 14:18:06 +01001192 wret = balance_node_right(trans, right, mid);
Chris Mason54aa1f42007-06-22 14:16:25 -04001193 }
Chris Mason33ade1f2007-04-20 13:48:57 -04001194 }
Chris Masone66f7092007-04-20 13:16:02 -04001195 if (wret < 0)
1196 ret = wret;
1197 if (wret == 0) {
Chris Mason5f39d392007-10-15 16:14:19 -04001198 struct btrfs_disk_key disk_key;
1199
1200 btrfs_node_key(right, &disk_key, 0);
Filipe Mananaf3a84cc2021-03-11 14:31:07 +00001201 ret = btrfs_tree_mod_log_insert_key(parent, pslot + 1,
1202 BTRFS_MOD_LOG_KEY_REPLACE, GFP_NOFS);
David Sterba0e82bcf2018-03-05 16:16:54 +01001203 BUG_ON(ret < 0);
Chris Mason5f39d392007-10-15 16:14:19 -04001204 btrfs_set_node_key(parent, &disk_key, pslot + 1);
1205 btrfs_mark_buffer_dirty(parent);
1206
1207 if (btrfs_header_nritems(mid) <= orig_slot) {
1208 path->nodes[level] = right;
Chris Masone66f7092007-04-20 13:16:02 -04001209 path->slots[level + 1] += 1;
1210 path->slots[level] = orig_slot -
Chris Mason5f39d392007-10-15 16:14:19 -04001211 btrfs_header_nritems(mid);
Chris Mason925baed2008-06-25 16:01:30 -04001212 btrfs_tree_unlock(mid);
Chris Mason5f39d392007-10-15 16:14:19 -04001213 free_extent_buffer(mid);
Chris Masone66f7092007-04-20 13:16:02 -04001214 } else {
Chris Mason925baed2008-06-25 16:01:30 -04001215 btrfs_tree_unlock(right);
Chris Mason5f39d392007-10-15 16:14:19 -04001216 free_extent_buffer(right);
Chris Masone66f7092007-04-20 13:16:02 -04001217 }
Chris Masone66f7092007-04-20 13:16:02 -04001218 return 0;
1219 }
Chris Mason925baed2008-06-25 16:01:30 -04001220 btrfs_tree_unlock(right);
Chris Mason5f39d392007-10-15 16:14:19 -04001221 free_extent_buffer(right);
Chris Masone66f7092007-04-20 13:16:02 -04001222 }
Chris Masone66f7092007-04-20 13:16:02 -04001223 return 1;
1224}
1225
Chris Mason74123bd2007-02-02 11:05:29 -05001226/*
Chris Masond352ac62008-09-29 15:18:18 -04001227 * readahead one full node of leaves, finding things that are close
1228 * to the block in 'slot', and triggering ra on them.
Chris Mason3c69fae2007-08-07 15:52:22 -04001229 */
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04001230static void reada_for_search(struct btrfs_fs_info *fs_info,
Chris Masonc8c42862009-04-03 10:14:18 -04001231 struct btrfs_path *path,
1232 int level, int slot, u64 objectid)
Chris Mason3c69fae2007-08-07 15:52:22 -04001233{
Chris Mason5f39d392007-10-15 16:14:19 -04001234 struct extent_buffer *node;
Chris Mason01f46652007-12-21 16:24:26 -05001235 struct btrfs_disk_key disk_key;
Chris Mason3c69fae2007-08-07 15:52:22 -04001236 u32 nritems;
Chris Mason3c69fae2007-08-07 15:52:22 -04001237 u64 search;
Chris Masona7175312009-01-22 09:23:10 -05001238 u64 target;
Chris Mason6b800532007-10-15 16:17:34 -04001239 u64 nread = 0;
Filipe Mananaace75062021-03-31 11:56:21 +01001240 u64 nread_max;
Chris Mason6b800532007-10-15 16:17:34 -04001241 u32 nr;
1242 u32 blocksize;
1243 u32 nscan = 0;
Chris Masondb945352007-10-15 16:15:53 -04001244
Filipe Mananaace75062021-03-31 11:56:21 +01001245 if (level != 1 && path->reada != READA_FORWARD_ALWAYS)
Chris Mason3c69fae2007-08-07 15:52:22 -04001246 return;
1247
Chris Mason6702ed42007-08-07 16:15:09 -04001248 if (!path->nodes[level])
1249 return;
1250
Chris Mason5f39d392007-10-15 16:14:19 -04001251 node = path->nodes[level];
Chris Mason925baed2008-06-25 16:01:30 -04001252
Filipe Mananaace75062021-03-31 11:56:21 +01001253 /*
1254 * Since the time between visiting leaves is much shorter than the time
1255 * between visiting nodes, limit read ahead of nodes to 1, to avoid too
1256 * much IO at once (possibly random).
1257 */
1258 if (path->reada == READA_FORWARD_ALWAYS) {
1259 if (level > 1)
1260 nread_max = node->fs_info->nodesize;
1261 else
1262 nread_max = SZ_128K;
1263 } else {
1264 nread_max = SZ_64K;
1265 }
1266
Chris Mason3c69fae2007-08-07 15:52:22 -04001267 search = btrfs_node_blockptr(node, slot);
Jeff Mahoney0b246af2016-06-22 18:54:23 -04001268 blocksize = fs_info->nodesize;
Filipe Manana069a2e32021-07-20 16:03:03 +01001269 if (path->reada != READA_FORWARD_ALWAYS) {
1270 struct extent_buffer *eb;
1271
1272 eb = find_extent_buffer(fs_info, search);
1273 if (eb) {
1274 free_extent_buffer(eb);
1275 return;
1276 }
Chris Mason3c69fae2007-08-07 15:52:22 -04001277 }
1278
Chris Masona7175312009-01-22 09:23:10 -05001279 target = search;
Chris Mason6b800532007-10-15 16:17:34 -04001280
Chris Mason5f39d392007-10-15 16:14:19 -04001281 nritems = btrfs_header_nritems(node);
Chris Mason6b800532007-10-15 16:17:34 -04001282 nr = slot;
Josef Bacik25b8b932011-06-08 14:36:54 -04001283
Chris Masond3977122009-01-05 21:25:51 -05001284 while (1) {
David Sterbae4058b52015-11-27 16:31:35 +01001285 if (path->reada == READA_BACK) {
Chris Mason6b800532007-10-15 16:17:34 -04001286 if (nr == 0)
1287 break;
1288 nr--;
Filipe Mananaace75062021-03-31 11:56:21 +01001289 } else if (path->reada == READA_FORWARD ||
1290 path->reada == READA_FORWARD_ALWAYS) {
Chris Mason6b800532007-10-15 16:17:34 -04001291 nr++;
1292 if (nr >= nritems)
1293 break;
Chris Mason3c69fae2007-08-07 15:52:22 -04001294 }
David Sterbae4058b52015-11-27 16:31:35 +01001295 if (path->reada == READA_BACK && objectid) {
Chris Mason01f46652007-12-21 16:24:26 -05001296 btrfs_node_key(node, &disk_key, nr);
1297 if (btrfs_disk_key_objectid(&disk_key) != objectid)
1298 break;
1299 }
Chris Mason6b800532007-10-15 16:17:34 -04001300 search = btrfs_node_blockptr(node, nr);
Filipe Mananaace75062021-03-31 11:56:21 +01001301 if (path->reada == READA_FORWARD_ALWAYS ||
1302 (search <= target && target - search <= 65536) ||
Chris Masona7175312009-01-22 09:23:10 -05001303 (search > target && search - target <= 65536)) {
Josef Bacikbfb484d2020-11-05 10:45:09 -05001304 btrfs_readahead_node_child(node, nr);
Chris Mason6b800532007-10-15 16:17:34 -04001305 nread += blocksize;
1306 }
1307 nscan++;
Filipe Mananaace75062021-03-31 11:56:21 +01001308 if (nread > nread_max || nscan > 32)
Chris Mason6b800532007-10-15 16:17:34 -04001309 break;
Chris Mason3c69fae2007-08-07 15:52:22 -04001310 }
1311}
Chris Mason925baed2008-06-25 16:01:30 -04001312
Josef Bacikbfb484d2020-11-05 10:45:09 -05001313static noinline void reada_for_balance(struct btrfs_path *path, int level)
Chris Masonb4ce94d2009-02-04 09:25:08 -05001314{
Josef Bacikbfb484d2020-11-05 10:45:09 -05001315 struct extent_buffer *parent;
Chris Masonb4ce94d2009-02-04 09:25:08 -05001316 int slot;
1317 int nritems;
Chris Masonb4ce94d2009-02-04 09:25:08 -05001318
Chris Mason8c594ea2009-04-20 15:50:10 -04001319 parent = path->nodes[level + 1];
Chris Masonb4ce94d2009-02-04 09:25:08 -05001320 if (!parent)
Josef Bacik0b088512013-06-17 14:23:02 -04001321 return;
Chris Masonb4ce94d2009-02-04 09:25:08 -05001322
1323 nritems = btrfs_header_nritems(parent);
Chris Mason8c594ea2009-04-20 15:50:10 -04001324 slot = path->slots[level + 1];
Chris Masonb4ce94d2009-02-04 09:25:08 -05001325
Josef Bacikbfb484d2020-11-05 10:45:09 -05001326 if (slot > 0)
1327 btrfs_readahead_node_child(parent, slot - 1);
1328 if (slot + 1 < nritems)
1329 btrfs_readahead_node_child(parent, slot + 1);
Chris Masonb4ce94d2009-02-04 09:25:08 -05001330}
1331
1332
1333/*
Chris Masond3977122009-01-05 21:25:51 -05001334 * when we walk down the tree, it is usually safe to unlock the higher layers
1335 * in the tree. The exceptions are when our path goes through slot 0, because
1336 * operations on the tree might require changing key pointers higher up in the
1337 * tree.
Chris Masond352ac62008-09-29 15:18:18 -04001338 *
Chris Masond3977122009-01-05 21:25:51 -05001339 * callers might also have set path->keep_locks, which tells this code to keep
1340 * the lock if the path points to the last slot in the block. This is part of
1341 * walking through the tree, and selecting the next slot in the higher block.
Chris Masond352ac62008-09-29 15:18:18 -04001342 *
Chris Masond3977122009-01-05 21:25:51 -05001343 * lowest_unlock sets the lowest level in the tree we're allowed to unlock. so
1344 * if lowest_unlock is 1, level 0 won't be unlocked
Chris Masond352ac62008-09-29 15:18:18 -04001345 */
Chris Masone02119d2008-09-05 16:13:11 -04001346static noinline void unlock_up(struct btrfs_path *path, int level,
Chris Masonf7c79f32012-03-19 15:54:38 -04001347 int lowest_unlock, int min_write_lock_level,
1348 int *write_lock_level)
Chris Mason925baed2008-06-25 16:01:30 -04001349{
1350 int i;
1351 int skip_level = level;
Nikolay Borisovc1227992021-12-14 15:39:39 +02001352 bool check_skip = true;
Chris Mason925baed2008-06-25 16:01:30 -04001353
1354 for (i = level; i < BTRFS_MAX_LEVEL; i++) {
1355 if (!path->nodes[i])
1356 break;
1357 if (!path->locks[i])
1358 break;
Nikolay Borisovc1227992021-12-14 15:39:39 +02001359
1360 if (check_skip) {
1361 if (path->slots[i] == 0) {
Chris Mason925baed2008-06-25 16:01:30 -04001362 skip_level = i + 1;
1363 continue;
1364 }
Chris Mason051e1b92008-06-25 16:01:30 -04001365
Nikolay Borisovc1227992021-12-14 15:39:39 +02001366 if (path->keep_locks) {
1367 u32 nritems;
1368
1369 nritems = btrfs_header_nritems(path->nodes[i]);
1370 if (nritems < 1 || path->slots[i] >= nritems - 1) {
1371 skip_level = i + 1;
1372 continue;
1373 }
1374 }
1375 }
1376
Liu Bod80bb3f2018-05-18 11:00:24 +08001377 if (i >= lowest_unlock && i > skip_level) {
Nikolay Borisovc1227992021-12-14 15:39:39 +02001378 check_skip = false;
1379 btrfs_tree_unlock_rw(path->nodes[i], path->locks[i]);
Chris Mason925baed2008-06-25 16:01:30 -04001380 path->locks[i] = 0;
Chris Masonf7c79f32012-03-19 15:54:38 -04001381 if (write_lock_level &&
1382 i > min_write_lock_level &&
1383 i <= *write_lock_level) {
1384 *write_lock_level = i - 1;
1385 }
Chris Mason925baed2008-06-25 16:01:30 -04001386 }
1387 }
1388}
1389
Chris Mason3c69fae2007-08-07 15:52:22 -04001390/*
Chris Masonc8c42862009-04-03 10:14:18 -04001391 * helper function for btrfs_search_slot. The goal is to find a block
1392 * in cache without setting the path to blocking. If we find the block
1393 * we return zero and the path is unchanged.
1394 *
1395 * If we can't find the block, we set the path blocking and do some
1396 * reada. -EAGAIN is returned and the search must be repeated.
1397 */
1398static int
Liu Bod07b8522017-01-30 12:23:42 -08001399read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
1400 struct extent_buffer **eb_ret, int level, int slot,
David Sterbacda79c52017-02-10 18:44:32 +01001401 const struct btrfs_key *key)
Chris Masonc8c42862009-04-03 10:14:18 -04001402{
Jeff Mahoney0b246af2016-06-22 18:54:23 -04001403 struct btrfs_fs_info *fs_info = root->fs_info;
Chris Masonc8c42862009-04-03 10:14:18 -04001404 u64 blocknr;
1405 u64 gen;
Chris Masonc8c42862009-04-03 10:14:18 -04001406 struct extent_buffer *tmp;
Qu Wenruo581c1762018-03-29 09:08:11 +08001407 struct btrfs_key first_key;
Chris Mason76a05b32009-05-14 13:24:30 -04001408 int ret;
Qu Wenruo581c1762018-03-29 09:08:11 +08001409 int parent_level;
Chris Masonc8c42862009-04-03 10:14:18 -04001410
Nikolay Borisov213ff4b2020-05-27 13:10:59 +03001411 blocknr = btrfs_node_blockptr(*eb_ret, slot);
1412 gen = btrfs_node_ptr_generation(*eb_ret, slot);
1413 parent_level = btrfs_header_level(*eb_ret);
1414 btrfs_node_key_to_cpu(*eb_ret, &first_key, slot);
Chris Masonc8c42862009-04-03 10:14:18 -04001415
Jeff Mahoney0b246af2016-06-22 18:54:23 -04001416 tmp = find_extent_buffer(fs_info, blocknr);
Chris Masoncb449212010-10-24 11:01:27 -04001417 if (tmp) {
Filipe Mananaace75062021-03-31 11:56:21 +01001418 if (p->reada == READA_FORWARD_ALWAYS)
1419 reada_for_search(fs_info, p, level, slot, key->objectid);
1420
Chris Masonb9fab912012-05-06 07:23:47 -04001421 /* first we do an atomic uptodate check */
Josef Bacikbdf7c002013-06-17 13:44:48 -04001422 if (btrfs_buffer_uptodate(tmp, gen, 1) > 0) {
Qu Wenruo448de472019-03-12 17:10:40 +08001423 /*
1424 * Do extra check for first_key, eb can be stale due to
1425 * being cached, read from scrub, or have multiple
1426 * parents (shared tree blocks).
1427 */
David Sterbae064d5e2019-03-20 14:58:13 +01001428 if (btrfs_verify_level_key(tmp,
Qu Wenruo448de472019-03-12 17:10:40 +08001429 parent_level - 1, &first_key, gen)) {
1430 free_extent_buffer(tmp);
1431 return -EUCLEAN;
1432 }
Josef Bacikbdf7c002013-06-17 13:44:48 -04001433 *eb_ret = tmp;
1434 return 0;
Chris Masoncb449212010-10-24 11:01:27 -04001435 }
Josef Bacikbdf7c002013-06-17 13:44:48 -04001436
Josef Bacikbdf7c002013-06-17 13:44:48 -04001437 /* now we're allowed to do a blocking uptodate check */
Qu Wenruo581c1762018-03-29 09:08:11 +08001438 ret = btrfs_read_buffer(tmp, gen, parent_level - 1, &first_key);
Josef Bacikbdf7c002013-06-17 13:44:48 -04001439 if (!ret) {
1440 *eb_ret = tmp;
1441 return 0;
1442 }
1443 free_extent_buffer(tmp);
1444 btrfs_release_path(p);
1445 return -EIO;
Chris Masonc8c42862009-04-03 10:14:18 -04001446 }
1447
1448 /*
1449 * reduce lock contention at high levels
1450 * of the btree by dropping locks before
Chris Mason76a05b32009-05-14 13:24:30 -04001451 * we read. Don't release the lock on the current
1452 * level because we need to walk this node to figure
1453 * out which blocks to read.
Chris Masonc8c42862009-04-03 10:14:18 -04001454 */
Chris Mason8c594ea2009-04-20 15:50:10 -04001455 btrfs_unlock_up_safe(p, level + 1);
Chris Mason8c594ea2009-04-20 15:50:10 -04001456
David Sterbae4058b52015-11-27 16:31:35 +01001457 if (p->reada != READA_NONE)
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04001458 reada_for_search(fs_info, p, level, slot, key->objectid);
Chris Masonc8c42862009-04-03 10:14:18 -04001459
Chris Mason76a05b32009-05-14 13:24:30 -04001460 ret = -EAGAIN;
Josef Bacik1b7ec852020-11-05 10:45:18 -05001461 tmp = read_tree_block(fs_info, blocknr, root->root_key.objectid,
1462 gen, parent_level - 1, &first_key);
Liu Bo64c043d2015-05-25 17:30:15 +08001463 if (!IS_ERR(tmp)) {
Chris Mason76a05b32009-05-14 13:24:30 -04001464 /*
1465 * If the read above didn't mark this buffer up to date,
1466 * it will never end up being up to date. Set ret to EIO now
1467 * and give up so that our caller doesn't loop forever
1468 * on our EAGAINs.
1469 */
Liu Boe6a1d6f2018-05-18 11:00:20 +08001470 if (!extent_buffer_uptodate(tmp))
Chris Mason76a05b32009-05-14 13:24:30 -04001471 ret = -EIO;
Chris Masonc8c42862009-04-03 10:14:18 -04001472 free_extent_buffer(tmp);
Liu Boc871b0f2016-06-06 12:01:23 -07001473 } else {
1474 ret = PTR_ERR(tmp);
Chris Mason76a05b32009-05-14 13:24:30 -04001475 }
Liu Bo02a33072018-05-16 01:37:36 +08001476
1477 btrfs_release_path(p);
Chris Mason76a05b32009-05-14 13:24:30 -04001478 return ret;
Chris Masonc8c42862009-04-03 10:14:18 -04001479}
1480
1481/*
1482 * helper function for btrfs_search_slot. This does all of the checks
1483 * for node-level blocks and does any balancing required based on
1484 * the ins_len.
1485 *
1486 * If no extra work was required, zero is returned. If we had to
1487 * drop the path, -EAGAIN is returned and btrfs_search_slot must
1488 * start over
1489 */
1490static int
1491setup_nodes_for_search(struct btrfs_trans_handle *trans,
1492 struct btrfs_root *root, struct btrfs_path *p,
Chris Masonbd681512011-07-16 15:23:14 -04001493 struct extent_buffer *b, int level, int ins_len,
1494 int *write_lock_level)
Chris Masonc8c42862009-04-03 10:14:18 -04001495{
Jeff Mahoney0b246af2016-06-22 18:54:23 -04001496 struct btrfs_fs_info *fs_info = root->fs_info;
Nikolay Borisov95b982d2020-11-13 09:29:40 +02001497 int ret = 0;
Jeff Mahoney0b246af2016-06-22 18:54:23 -04001498
Chris Masonc8c42862009-04-03 10:14:18 -04001499 if ((p->search_for_split || ins_len > 0) && btrfs_header_nritems(b) >=
Jeff Mahoney0b246af2016-06-22 18:54:23 -04001500 BTRFS_NODEPTRS_PER_BLOCK(fs_info) - 3) {
Chris Masonc8c42862009-04-03 10:14:18 -04001501
Chris Masonbd681512011-07-16 15:23:14 -04001502 if (*write_lock_level < level + 1) {
1503 *write_lock_level = level + 1;
1504 btrfs_release_path(p);
Nikolay Borisov95b982d2020-11-13 09:29:40 +02001505 return -EAGAIN;
Chris Masonbd681512011-07-16 15:23:14 -04001506 }
1507
Josef Bacikbfb484d2020-11-05 10:45:09 -05001508 reada_for_balance(p, level);
Nikolay Borisov95b982d2020-11-13 09:29:40 +02001509 ret = split_node(trans, root, p, level);
Chris Masonc8c42862009-04-03 10:14:18 -04001510
Chris Masonc8c42862009-04-03 10:14:18 -04001511 b = p->nodes[level];
1512 } else if (ins_len < 0 && btrfs_header_nritems(b) <
Jeff Mahoney0b246af2016-06-22 18:54:23 -04001513 BTRFS_NODEPTRS_PER_BLOCK(fs_info) / 2) {
Chris Masonc8c42862009-04-03 10:14:18 -04001514
Chris Masonbd681512011-07-16 15:23:14 -04001515 if (*write_lock_level < level + 1) {
1516 *write_lock_level = level + 1;
1517 btrfs_release_path(p);
Nikolay Borisov95b982d2020-11-13 09:29:40 +02001518 return -EAGAIN;
Chris Masonbd681512011-07-16 15:23:14 -04001519 }
1520
Josef Bacikbfb484d2020-11-05 10:45:09 -05001521 reada_for_balance(p, level);
Nikolay Borisov95b982d2020-11-13 09:29:40 +02001522 ret = balance_level(trans, root, p, level);
1523 if (ret)
1524 return ret;
Chris Masonc8c42862009-04-03 10:14:18 -04001525
Chris Masonc8c42862009-04-03 10:14:18 -04001526 b = p->nodes[level];
1527 if (!b) {
David Sterbab3b4aa72011-04-21 01:20:15 +02001528 btrfs_release_path(p);
Nikolay Borisov95b982d2020-11-13 09:29:40 +02001529 return -EAGAIN;
Chris Masonc8c42862009-04-03 10:14:18 -04001530 }
1531 BUG_ON(btrfs_header_nritems(b) == 1);
1532 }
Chris Masonc8c42862009-04-03 10:14:18 -04001533 return ret;
1534}
1535
David Sterba381cf652015-01-02 18:45:16 +01001536int btrfs_find_item(struct btrfs_root *fs_root, struct btrfs_path *path,
Kelley Nielsene33d5c32013-11-04 19:33:33 -08001537 u64 iobjectid, u64 ioff, u8 key_type,
1538 struct btrfs_key *found_key)
1539{
1540 int ret;
1541 struct btrfs_key key;
1542 struct extent_buffer *eb;
David Sterba381cf652015-01-02 18:45:16 +01001543
1544 ASSERT(path);
David Sterba1d4c08e2015-01-02 19:36:14 +01001545 ASSERT(found_key);
Kelley Nielsene33d5c32013-11-04 19:33:33 -08001546
1547 key.type = key_type;
1548 key.objectid = iobjectid;
1549 key.offset = ioff;
1550
1551 ret = btrfs_search_slot(NULL, fs_root, &key, path, 0, 0);
David Sterba1d4c08e2015-01-02 19:36:14 +01001552 if (ret < 0)
Kelley Nielsene33d5c32013-11-04 19:33:33 -08001553 return ret;
1554
1555 eb = path->nodes[0];
1556 if (ret && path->slots[0] >= btrfs_header_nritems(eb)) {
1557 ret = btrfs_next_leaf(fs_root, path);
1558 if (ret)
1559 return ret;
1560 eb = path->nodes[0];
1561 }
1562
1563 btrfs_item_key_to_cpu(eb, found_key, path->slots[0]);
1564 if (found_key->type != key.type ||
1565 found_key->objectid != key.objectid)
1566 return 1;
1567
1568 return 0;
1569}
1570
Liu Bo1fc28d82018-05-18 11:00:21 +08001571static struct extent_buffer *btrfs_search_slot_get_root(struct btrfs_root *root,
1572 struct btrfs_path *p,
1573 int write_lock_level)
1574{
Liu Bo1fc28d82018-05-18 11:00:21 +08001575 struct extent_buffer *b;
Josef Bacik120de402021-11-24 14:14:24 -05001576 int root_lock = 0;
Liu Bo1fc28d82018-05-18 11:00:21 +08001577 int level = 0;
1578
Liu Bo1fc28d82018-05-18 11:00:21 +08001579 if (p->search_commit_root) {
Filipe Mananad96b3422021-11-22 12:03:38 +00001580 b = root->commit_root;
1581 atomic_inc(&b->refs);
Filipe Mananabe6821f2018-12-11 10:19:45 +00001582 level = btrfs_header_level(b);
Liu Bof9ddfd02018-05-29 21:27:06 +08001583 /*
1584 * Ensure that all callers have set skip_locking when
1585 * p->search_commit_root = 1.
1586 */
1587 ASSERT(p->skip_locking == 1);
Liu Bo1fc28d82018-05-18 11:00:21 +08001588
1589 goto out;
1590 }
1591
1592 if (p->skip_locking) {
1593 b = btrfs_root_node(root);
1594 level = btrfs_header_level(b);
1595 goto out;
1596 }
1597
Josef Bacik120de402021-11-24 14:14:24 -05001598 /* We try very hard to do read locks on the root */
1599 root_lock = BTRFS_READ_LOCK;
1600
Liu Bo1fc28d82018-05-18 11:00:21 +08001601 /*
Liu Bo662c6532018-05-18 11:00:23 +08001602 * If the level is set to maximum, we can skip trying to get the read
1603 * lock.
Liu Bo1fc28d82018-05-18 11:00:21 +08001604 */
Liu Bo662c6532018-05-18 11:00:23 +08001605 if (write_lock_level < BTRFS_MAX_LEVEL) {
1606 /*
1607 * We don't know the level of the root node until we actually
1608 * have it read locked
1609 */
Josef Bacik1bb96592020-11-06 16:27:33 -05001610 b = btrfs_read_lock_root_node(root);
Liu Bo662c6532018-05-18 11:00:23 +08001611 level = btrfs_header_level(b);
1612 if (level > write_lock_level)
1613 goto out;
Liu Bo1fc28d82018-05-18 11:00:21 +08001614
Liu Bo662c6532018-05-18 11:00:23 +08001615 /* Whoops, must trade for write lock */
1616 btrfs_tree_read_unlock(b);
1617 free_extent_buffer(b);
1618 }
1619
Liu Bo1fc28d82018-05-18 11:00:21 +08001620 b = btrfs_lock_root_node(root);
1621 root_lock = BTRFS_WRITE_LOCK;
1622
1623 /* The level might have changed, check again */
1624 level = btrfs_header_level(b);
1625
1626out:
Josef Bacik120de402021-11-24 14:14:24 -05001627 /*
1628 * The root may have failed to write out at some point, and thus is no
1629 * longer valid, return an error in this case.
1630 */
1631 if (!extent_buffer_uptodate(b)) {
1632 if (root_lock)
1633 btrfs_tree_unlock_rw(b, root_lock);
1634 free_extent_buffer(b);
1635 return ERR_PTR(-EIO);
1636 }
1637
Liu Bo1fc28d82018-05-18 11:00:21 +08001638 p->nodes[level] = b;
1639 if (!p->skip_locking)
1640 p->locks[level] = root_lock;
1641 /*
1642 * Callers are responsible for dropping b's references.
1643 */
1644 return b;
1645}
1646
Filipe Mananad96b3422021-11-22 12:03:38 +00001647/*
1648 * Replace the extent buffer at the lowest level of the path with a cloned
1649 * version. The purpose is to be able to use it safely, after releasing the
1650 * commit root semaphore, even if relocation is happening in parallel, the
1651 * transaction used for relocation is committed and the extent buffer is
1652 * reallocated in the next transaction.
1653 *
1654 * This is used in a context where the caller does not prevent transaction
1655 * commits from happening, either by holding a transaction handle or holding
1656 * some lock, while it's doing searches through a commit root.
1657 * At the moment it's only used for send operations.
1658 */
1659static int finish_need_commit_sem_search(struct btrfs_path *path)
1660{
1661 const int i = path->lowest_level;
1662 const int slot = path->slots[i];
1663 struct extent_buffer *lowest = path->nodes[i];
1664 struct extent_buffer *clone;
1665
1666 ASSERT(path->need_commit_sem);
1667
1668 if (!lowest)
1669 return 0;
1670
1671 lockdep_assert_held_read(&lowest->fs_info->commit_root_sem);
1672
1673 clone = btrfs_clone_extent_buffer(lowest);
1674 if (!clone)
1675 return -ENOMEM;
1676
1677 btrfs_release_path(path);
1678 path->nodes[i] = clone;
1679 path->slots[i] = slot;
1680
1681 return 0;
1682}
Liu Bo1fc28d82018-05-18 11:00:21 +08001683
Filipe Mananae2e58d02021-12-02 10:30:36 +00001684static inline int search_for_key_slot(struct extent_buffer *eb,
1685 int search_low_slot,
1686 const struct btrfs_key *key,
1687 int prev_cmp,
1688 int *slot)
1689{
1690 /*
1691 * If a previous call to btrfs_bin_search() on a parent node returned an
1692 * exact match (prev_cmp == 0), we can safely assume the target key will
1693 * always be at slot 0 on lower levels, since each key pointer
1694 * (struct btrfs_key_ptr) refers to the lowest key accessible from the
1695 * subtree it points to. Thus we can skip searching lower levels.
1696 */
1697 if (prev_cmp == 0) {
1698 *slot = 0;
1699 return 0;
1700 }
1701
1702 return generic_bin_search(eb, search_low_slot, key, slot);
1703}
1704
Filipe Manana109324c2021-12-02 10:30:38 +00001705static int search_leaf(struct btrfs_trans_handle *trans,
1706 struct btrfs_root *root,
1707 const struct btrfs_key *key,
1708 struct btrfs_path *path,
1709 int ins_len,
1710 int prev_cmp)
1711{
1712 struct extent_buffer *leaf = path->nodes[0];
1713 int leaf_free_space = -1;
1714 int search_low_slot = 0;
1715 int ret;
1716 bool do_bin_search = true;
1717
1718 /*
1719 * If we are doing an insertion, the leaf has enough free space and the
1720 * destination slot for the key is not slot 0, then we can unlock our
1721 * write lock on the parent, and any other upper nodes, before doing the
1722 * binary search on the leaf (with search_for_key_slot()), allowing other
1723 * tasks to lock the parent and any other upper nodes.
1724 */
1725 if (ins_len > 0) {
1726 /*
1727 * Cache the leaf free space, since we will need it later and it
1728 * will not change until then.
1729 */
1730 leaf_free_space = btrfs_leaf_free_space(leaf);
1731
1732 /*
1733 * !path->locks[1] means we have a single node tree, the leaf is
1734 * the root of the tree.
1735 */
1736 if (path->locks[1] && leaf_free_space >= ins_len) {
1737 struct btrfs_disk_key first_key;
1738
1739 ASSERT(btrfs_header_nritems(leaf) > 0);
1740 btrfs_item_key(leaf, &first_key, 0);
1741
1742 /*
1743 * Doing the extra comparison with the first key is cheap,
1744 * taking into account that the first key is very likely
1745 * already in a cache line because it immediately follows
1746 * the extent buffer's header and we have recently accessed
1747 * the header's level field.
1748 */
1749 ret = comp_keys(&first_key, key);
1750 if (ret < 0) {
1751 /*
1752 * The first key is smaller than the key we want
1753 * to insert, so we are safe to unlock all upper
1754 * nodes and we have to do the binary search.
1755 *
1756 * We do use btrfs_unlock_up_safe() and not
1757 * unlock_up() because the later does not unlock
1758 * nodes with a slot of 0 - we can safely unlock
1759 * any node even if its slot is 0 since in this
1760 * case the key does not end up at slot 0 of the
1761 * leaf and there's no need to split the leaf.
1762 */
1763 btrfs_unlock_up_safe(path, 1);
1764 search_low_slot = 1;
1765 } else {
1766 /*
1767 * The first key is >= then the key we want to
1768 * insert, so we can skip the binary search as
1769 * the target key will be at slot 0.
1770 *
1771 * We can not unlock upper nodes when the key is
1772 * less than the first key, because we will need
1773 * to update the key at slot 0 of the parent node
1774 * and possibly of other upper nodes too.
1775 * If the key matches the first key, then we can
1776 * unlock all the upper nodes, using
1777 * btrfs_unlock_up_safe() instead of unlock_up()
1778 * as stated above.
1779 */
1780 if (ret == 0)
1781 btrfs_unlock_up_safe(path, 1);
1782 /*
1783 * ret is already 0 or 1, matching the result of
1784 * a btrfs_bin_search() call, so there is no need
1785 * to adjust it.
1786 */
1787 do_bin_search = false;
1788 path->slots[0] = 0;
1789 }
1790 }
1791 }
1792
1793 if (do_bin_search) {
1794 ret = search_for_key_slot(leaf, search_low_slot, key,
1795 prev_cmp, &path->slots[0]);
1796 if (ret < 0)
1797 return ret;
1798 }
1799
1800 if (ins_len > 0) {
1801 /*
1802 * Item key already exists. In this case, if we are allowed to
1803 * insert the item (for example, in dir_item case, item key
1804 * collision is allowed), it will be merged with the original
1805 * item. Only the item size grows, no new btrfs item will be
1806 * added. If search_for_extension is not set, ins_len already
1807 * accounts the size btrfs_item, deduct it here so leaf space
1808 * check will be correct.
1809 */
1810 if (ret == 0 && !path->search_for_extension) {
1811 ASSERT(ins_len >= sizeof(struct btrfs_item));
1812 ins_len -= sizeof(struct btrfs_item);
1813 }
1814
1815 ASSERT(leaf_free_space >= 0);
1816
1817 if (leaf_free_space < ins_len) {
1818 int err;
1819
1820 err = split_leaf(trans, root, key, path, ins_len,
1821 (ret == 0));
Filipe Mananabb8e9a62021-12-02 10:30:39 +00001822 ASSERT(err <= 0);
1823 if (WARN_ON(err > 0))
1824 err = -EUCLEAN;
Filipe Manana109324c2021-12-02 10:30:38 +00001825 if (err)
1826 ret = err;
1827 }
1828 }
1829
1830 return ret;
1831}
1832
Chris Masonc8c42862009-04-03 10:14:18 -04001833/*
Nikolay Borisov4271ece2017-12-13 09:38:14 +02001834 * btrfs_search_slot - look for a key in a tree and perform necessary
1835 * modifications to preserve tree invariants.
Chris Mason74123bd2007-02-02 11:05:29 -05001836 *
Nikolay Borisov4271ece2017-12-13 09:38:14 +02001837 * @trans: Handle of transaction, used when modifying the tree
1838 * @p: Holds all btree nodes along the search path
1839 * @root: The root node of the tree
1840 * @key: The key we are looking for
ethanwu9a664972020-12-01 17:25:12 +08001841 * @ins_len: Indicates purpose of search:
1842 * >0 for inserts it's size of item inserted (*)
1843 * <0 for deletions
1844 * 0 for plain searches, not modifying the tree
1845 *
1846 * (*) If size of item inserted doesn't include
1847 * sizeof(struct btrfs_item), then p->search_for_extension must
1848 * be set.
Nikolay Borisov4271ece2017-12-13 09:38:14 +02001849 * @cow: boolean should CoW operations be performed. Must always be 1
1850 * when modifying the tree.
Chris Mason97571fd2007-02-24 13:39:08 -05001851 *
Nikolay Borisov4271ece2017-12-13 09:38:14 +02001852 * If @ins_len > 0, nodes and leaves will be split as we walk down the tree.
1853 * If @ins_len < 0, nodes will be merged as we walk down the tree (if possible)
1854 *
1855 * If @key is found, 0 is returned and you can find the item in the leaf level
1856 * of the path (level 0)
1857 *
1858 * If @key isn't found, 1 is returned and the leaf level of the path (level 0)
1859 * points to the slot where it should be inserted
1860 *
1861 * If an error is encountered while searching the tree a negative error number
1862 * is returned
Chris Mason74123bd2007-02-02 11:05:29 -05001863 */
Omar Sandoval310712b2017-01-17 23:24:37 -08001864int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1865 const struct btrfs_key *key, struct btrfs_path *p,
1866 int ins_len, int cow)
Chris Masonbe0e5c02007-01-26 15:51:26 -05001867{
Filipe Mananad96b3422021-11-22 12:03:38 +00001868 struct btrfs_fs_info *fs_info = root->fs_info;
Chris Mason5f39d392007-10-15 16:14:19 -04001869 struct extent_buffer *b;
Chris Masonbe0e5c02007-01-26 15:51:26 -05001870 int slot;
1871 int ret;
Yan Zheng33c66f42009-07-22 09:59:00 -04001872 int err;
Chris Masonbe0e5c02007-01-26 15:51:26 -05001873 int level;
Chris Mason925baed2008-06-25 16:01:30 -04001874 int lowest_unlock = 1;
Chris Masonbd681512011-07-16 15:23:14 -04001875 /* everything at write_lock_level or lower must be write locked */
1876 int write_lock_level = 0;
Chris Mason9f3a7422007-08-07 15:52:19 -04001877 u8 lowest_level = 0;
Chris Masonf7c79f32012-03-19 15:54:38 -04001878 int min_write_lock_level;
Filipe David Borba Mananad7396f02013-08-30 15:46:43 +01001879 int prev_cmp;
Chris Mason9f3a7422007-08-07 15:52:19 -04001880
Chris Mason6702ed42007-08-07 16:15:09 -04001881 lowest_level = p->lowest_level;
Chris Mason323ac952008-10-01 19:05:46 -04001882 WARN_ON(lowest_level && ins_len > 0);
Chris Mason22b0ebd2007-03-30 08:47:31 -04001883 WARN_ON(p->nodes[0] != NULL);
Filipe David Borba Mananaeb653de2013-12-23 11:53:02 +00001884 BUG_ON(!cow && ins_len);
Josef Bacik25179202008-10-29 14:49:05 -04001885
Chris Masonbd681512011-07-16 15:23:14 -04001886 if (ins_len < 0) {
Chris Mason925baed2008-06-25 16:01:30 -04001887 lowest_unlock = 2;
Chris Mason65b51a02008-08-01 15:11:20 -04001888
Chris Masonbd681512011-07-16 15:23:14 -04001889 /* when we are removing items, we might have to go up to level
1890 * two as we update tree pointers Make sure we keep write
1891 * for those levels as well
1892 */
1893 write_lock_level = 2;
1894 } else if (ins_len > 0) {
1895 /*
1896 * for inserting items, make sure we have a write lock on
1897 * level 1 so we can update keys
1898 */
1899 write_lock_level = 1;
1900 }
1901
1902 if (!cow)
1903 write_lock_level = -1;
1904
Josef Bacik09a2a8f92013-04-05 16:51:15 -04001905 if (cow && (p->keep_locks || p->lowest_level))
Chris Masonbd681512011-07-16 15:23:14 -04001906 write_lock_level = BTRFS_MAX_LEVEL;
1907
Chris Masonf7c79f32012-03-19 15:54:38 -04001908 min_write_lock_level = write_lock_level;
1909
Filipe Mananad96b3422021-11-22 12:03:38 +00001910 if (p->need_commit_sem) {
1911 ASSERT(p->search_commit_root);
1912 down_read(&fs_info->commit_root_sem);
1913 }
1914
Chris Masonbb803952007-03-01 12:04:21 -05001915again:
Filipe David Borba Mananad7396f02013-08-30 15:46:43 +01001916 prev_cmp = -1;
Liu Bo1fc28d82018-05-18 11:00:21 +08001917 b = btrfs_search_slot_get_root(root, p, write_lock_level);
Filipe Mananabe6821f2018-12-11 10:19:45 +00001918 if (IS_ERR(b)) {
1919 ret = PTR_ERR(b);
1920 goto done;
1921 }
Chris Mason925baed2008-06-25 16:01:30 -04001922
Chris Masoneb60cea2007-02-02 09:18:22 -05001923 while (b) {
Qu Wenruof624d972019-09-10 15:40:17 +08001924 int dec = 0;
1925
Chris Mason5f39d392007-10-15 16:14:19 -04001926 level = btrfs_header_level(b);
Chris Mason65b51a02008-08-01 15:11:20 -04001927
Chris Mason02217ed2007-03-02 16:08:05 -05001928 if (cow) {
Nikolay Borisov9ea2c7c2017-12-12 11:14:49 +02001929 bool last_level = (level == (BTRFS_MAX_LEVEL - 1));
1930
Chris Masonc8c42862009-04-03 10:14:18 -04001931 /*
1932 * if we don't really need to cow this block
1933 * then we don't want to set the path blocking,
1934 * so we test it here
1935 */
Josef Bacik5963ffca2021-05-20 11:21:31 -04001936 if (!should_cow_block(trans, root, b))
Chris Mason65b51a02008-08-01 15:11:20 -04001937 goto cow_done;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001938
Chris Masonbd681512011-07-16 15:23:14 -04001939 /*
1940 * must have write locks on this node and the
1941 * parent
1942 */
Josef Bacik5124e002012-11-07 13:44:13 -05001943 if (level > write_lock_level ||
1944 (level + 1 > write_lock_level &&
1945 level + 1 < BTRFS_MAX_LEVEL &&
1946 p->nodes[level + 1])) {
Chris Masonbd681512011-07-16 15:23:14 -04001947 write_lock_level = level + 1;
1948 btrfs_release_path(p);
1949 goto again;
1950 }
1951
Nikolay Borisov9ea2c7c2017-12-12 11:14:49 +02001952 if (last_level)
1953 err = btrfs_cow_block(trans, root, b, NULL, 0,
Josef Bacik9631e4c2020-08-20 11:46:03 -04001954 &b,
1955 BTRFS_NESTING_COW);
Nikolay Borisov9ea2c7c2017-12-12 11:14:49 +02001956 else
1957 err = btrfs_cow_block(trans, root, b,
1958 p->nodes[level + 1],
Josef Bacik9631e4c2020-08-20 11:46:03 -04001959 p->slots[level + 1], &b,
1960 BTRFS_NESTING_COW);
Yan Zheng33c66f42009-07-22 09:59:00 -04001961 if (err) {
Yan Zheng33c66f42009-07-22 09:59:00 -04001962 ret = err;
Chris Mason65b51a02008-08-01 15:11:20 -04001963 goto done;
Chris Mason54aa1f42007-06-22 14:16:25 -04001964 }
Chris Mason02217ed2007-03-02 16:08:05 -05001965 }
Chris Mason65b51a02008-08-01 15:11:20 -04001966cow_done:
Chris Masoneb60cea2007-02-02 09:18:22 -05001967 p->nodes[level] = b;
Chris Masonb4ce94d2009-02-04 09:25:08 -05001968
1969 /*
1970 * we have a lock on b and as long as we aren't changing
1971 * the tree, there is no way to for the items in b to change.
1972 * It is safe to drop the lock on our parent before we
1973 * go through the expensive btree search on b.
1974 *
Filipe David Borba Mananaeb653de2013-12-23 11:53:02 +00001975 * If we're inserting or deleting (ins_len != 0), then we might
1976 * be changing slot zero, which may require changing the parent.
1977 * So, we can't drop the lock until after we know which slot
1978 * we're operating on.
Chris Masonb4ce94d2009-02-04 09:25:08 -05001979 */
Filipe David Borba Mananaeb653de2013-12-23 11:53:02 +00001980 if (!ins_len && !p->keep_locks) {
1981 int u = level + 1;
1982
1983 if (u < BTRFS_MAX_LEVEL && p->locks[u]) {
1984 btrfs_tree_unlock_rw(p->nodes[u], p->locks[u]);
1985 p->locks[u] = 0;
1986 }
1987 }
Chris Masonb4ce94d2009-02-04 09:25:08 -05001988
Filipe Mananae2e58d02021-12-02 10:30:36 +00001989 if (level == 0) {
Filipe Manana109324c2021-12-02 10:30:38 +00001990 if (ins_len > 0)
Filipe Mananae5e1c172021-12-02 10:30:37 +00001991 ASSERT(write_lock_level >= 1);
Chris Masonbd681512011-07-16 15:23:14 -04001992
Filipe Manana109324c2021-12-02 10:30:38 +00001993 ret = search_leaf(trans, root, key, p, ins_len, prev_cmp);
Chris Mason459931e2008-12-10 09:10:46 -05001994 if (!p->search_for_split)
Chris Masonf7c79f32012-03-19 15:54:38 -04001995 unlock_up(p, level, lowest_unlock,
Liu Bo4b6f8e92018-08-14 10:46:53 +08001996 min_write_lock_level, NULL);
Chris Mason65b51a02008-08-01 15:11:20 -04001997 goto done;
Chris Masonbe0e5c02007-01-26 15:51:26 -05001998 }
Filipe Mananae2e58d02021-12-02 10:30:36 +00001999
2000 ret = search_for_key_slot(b, 0, key, prev_cmp, &slot);
2001 if (ret < 0)
2002 goto done;
2003 prev_cmp = ret;
2004
Qu Wenruof624d972019-09-10 15:40:17 +08002005 if (ret && slot > 0) {
2006 dec = 1;
2007 slot--;
2008 }
2009 p->slots[level] = slot;
2010 err = setup_nodes_for_search(trans, root, p, b, level, ins_len,
2011 &write_lock_level);
2012 if (err == -EAGAIN)
2013 goto again;
2014 if (err) {
2015 ret = err;
2016 goto done;
2017 }
2018 b = p->nodes[level];
2019 slot = p->slots[level];
2020
2021 /*
2022 * Slot 0 is special, if we change the key we have to update
2023 * the parent pointer which means we must have a write lock on
2024 * the parent
2025 */
2026 if (slot == 0 && ins_len && write_lock_level < level + 1) {
2027 write_lock_level = level + 1;
2028 btrfs_release_path(p);
2029 goto again;
2030 }
2031
2032 unlock_up(p, level, lowest_unlock, min_write_lock_level,
2033 &write_lock_level);
2034
2035 if (level == lowest_level) {
2036 if (dec)
2037 p->slots[level]++;
2038 goto done;
2039 }
2040
2041 err = read_block_for_search(root, p, &b, level, slot, key);
2042 if (err == -EAGAIN)
2043 goto again;
2044 if (err) {
2045 ret = err;
2046 goto done;
2047 }
2048
2049 if (!p->skip_locking) {
2050 level = btrfs_header_level(b);
2051 if (level <= write_lock_level) {
Josef Bacikac5887c2020-08-20 11:46:10 -04002052 btrfs_tree_lock(b);
Qu Wenruof624d972019-09-10 15:40:17 +08002053 p->locks[level] = BTRFS_WRITE_LOCK;
2054 } else {
Josef Bacikfe596ca2020-11-06 16:27:34 -05002055 btrfs_tree_read_lock(b);
Qu Wenruof624d972019-09-10 15:40:17 +08002056 p->locks[level] = BTRFS_READ_LOCK;
2057 }
2058 p->nodes[level] = b;
2059 }
Chris Masonbe0e5c02007-01-26 15:51:26 -05002060 }
Chris Mason65b51a02008-08-01 15:11:20 -04002061 ret = 1;
2062done:
Filipe Manana5f5bc6b2014-11-09 08:38:39 +00002063 if (ret < 0 && !p->skip_release_on_error)
David Sterbab3b4aa72011-04-21 01:20:15 +02002064 btrfs_release_path(p);
Filipe Mananad96b3422021-11-22 12:03:38 +00002065
2066 if (p->need_commit_sem) {
2067 int ret2;
2068
2069 ret2 = finish_need_commit_sem_search(p);
2070 up_read(&fs_info->commit_root_sem);
2071 if (ret2)
2072 ret = ret2;
2073 }
2074
Chris Mason65b51a02008-08-01 15:11:20 -04002075 return ret;
Chris Masonbe0e5c02007-01-26 15:51:26 -05002076}
Josef Bacikf75e2b72020-12-16 11:18:43 -05002077ALLOW_ERROR_INJECTION(btrfs_search_slot, ERRNO);
Chris Masonbe0e5c02007-01-26 15:51:26 -05002078
Chris Mason74123bd2007-02-02 11:05:29 -05002079/*
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02002080 * Like btrfs_search_slot, this looks for a key in the given tree. It uses the
2081 * current state of the tree together with the operations recorded in the tree
2082 * modification log to search for the key in a previous version of this tree, as
2083 * denoted by the time_seq parameter.
2084 *
2085 * Naturally, there is no support for insert, delete or cow operations.
2086 *
2087 * The resulting path and return value will be set up as if we called
2088 * btrfs_search_slot at that point in time with ins_len and cow both set to 0.
2089 */
Omar Sandoval310712b2017-01-17 23:24:37 -08002090int btrfs_search_old_slot(struct btrfs_root *root, const struct btrfs_key *key,
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02002091 struct btrfs_path *p, u64 time_seq)
2092{
Jeff Mahoney0b246af2016-06-22 18:54:23 -04002093 struct btrfs_fs_info *fs_info = root->fs_info;
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02002094 struct extent_buffer *b;
2095 int slot;
2096 int ret;
2097 int err;
2098 int level;
2099 int lowest_unlock = 1;
2100 u8 lowest_level = 0;
2101
2102 lowest_level = p->lowest_level;
2103 WARN_ON(p->nodes[0] != NULL);
2104
2105 if (p->search_commit_root) {
2106 BUG_ON(time_seq);
2107 return btrfs_search_slot(NULL, root, key, p, 0, 0);
2108 }
2109
2110again:
Filipe Mananaf3a84cc2021-03-11 14:31:07 +00002111 b = btrfs_get_old_root(root, time_seq);
Nikolay Borisov315bed42018-09-13 11:35:10 +03002112 if (!b) {
2113 ret = -EIO;
2114 goto done;
2115 }
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02002116 level = btrfs_header_level(b);
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02002117 p->locks[level] = BTRFS_READ_LOCK;
2118
2119 while (b) {
Qu Wenruoabe93392019-09-10 15:40:18 +08002120 int dec = 0;
2121
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02002122 level = btrfs_header_level(b);
2123 p->nodes[level] = b;
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02002124
2125 /*
2126 * we have a lock on b and as long as we aren't changing
2127 * the tree, there is no way to for the items in b to change.
2128 * It is safe to drop the lock on our parent before we
2129 * go through the expensive btree search on b.
2130 */
2131 btrfs_unlock_up_safe(p, level + 1);
2132
Nikolay Borisov995e9a12020-05-27 13:10:53 +03002133 ret = btrfs_bin_search(b, key, &slot);
Filipe Mananacbca7d52019-02-18 16:57:26 +00002134 if (ret < 0)
2135 goto done;
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02002136
Qu Wenruoabe93392019-09-10 15:40:18 +08002137 if (level == 0) {
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02002138 p->slots[level] = slot;
2139 unlock_up(p, level, lowest_unlock, 0, NULL);
2140 goto done;
2141 }
Qu Wenruoabe93392019-09-10 15:40:18 +08002142
2143 if (ret && slot > 0) {
2144 dec = 1;
2145 slot--;
2146 }
2147 p->slots[level] = slot;
2148 unlock_up(p, level, lowest_unlock, 0, NULL);
2149
2150 if (level == lowest_level) {
2151 if (dec)
2152 p->slots[level]++;
2153 goto done;
2154 }
2155
2156 err = read_block_for_search(root, p, &b, level, slot, key);
2157 if (err == -EAGAIN)
2158 goto again;
2159 if (err) {
2160 ret = err;
2161 goto done;
2162 }
2163
2164 level = btrfs_header_level(b);
Josef Bacikac5887c2020-08-20 11:46:10 -04002165 btrfs_tree_read_lock(b);
Filipe Mananaf3a84cc2021-03-11 14:31:07 +00002166 b = btrfs_tree_mod_log_rewind(fs_info, p, b, time_seq);
Qu Wenruoabe93392019-09-10 15:40:18 +08002167 if (!b) {
2168 ret = -ENOMEM;
2169 goto done;
2170 }
2171 p->locks[level] = BTRFS_READ_LOCK;
2172 p->nodes[level] = b;
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02002173 }
2174 ret = 1;
2175done:
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02002176 if (ret < 0)
2177 btrfs_release_path(p);
2178
2179 return ret;
2180}
2181
2182/*
Arne Jansen2f38b3e2011-09-13 11:18:10 +02002183 * helper to use instead of search slot if no exact match is needed but
2184 * instead the next or previous item should be returned.
2185 * When find_higher is true, the next higher item is returned, the next lower
2186 * otherwise.
2187 * When return_any and find_higher are both true, and no higher item is found,
2188 * return the next lower instead.
2189 * When return_any is true and find_higher is false, and no lower item is found,
2190 * return the next higher instead.
2191 * It returns 0 if any item is found, 1 if none is found (tree empty), and
2192 * < 0 on error
2193 */
2194int btrfs_search_slot_for_read(struct btrfs_root *root,
Omar Sandoval310712b2017-01-17 23:24:37 -08002195 const struct btrfs_key *key,
2196 struct btrfs_path *p, int find_higher,
2197 int return_any)
Arne Jansen2f38b3e2011-09-13 11:18:10 +02002198{
2199 int ret;
2200 struct extent_buffer *leaf;
2201
2202again:
2203 ret = btrfs_search_slot(NULL, root, key, p, 0, 0);
2204 if (ret <= 0)
2205 return ret;
2206 /*
2207 * a return value of 1 means the path is at the position where the
2208 * item should be inserted. Normally this is the next bigger item,
2209 * but in case the previous item is the last in a leaf, path points
2210 * to the first free slot in the previous leaf, i.e. at an invalid
2211 * item.
2212 */
2213 leaf = p->nodes[0];
2214
2215 if (find_higher) {
2216 if (p->slots[0] >= btrfs_header_nritems(leaf)) {
2217 ret = btrfs_next_leaf(root, p);
2218 if (ret <= 0)
2219 return ret;
2220 if (!return_any)
2221 return 1;
2222 /*
2223 * no higher item found, return the next
2224 * lower instead
2225 */
2226 return_any = 0;
2227 find_higher = 0;
2228 btrfs_release_path(p);
2229 goto again;
2230 }
2231 } else {
Arne Jansene6793762011-09-13 11:18:10 +02002232 if (p->slots[0] == 0) {
2233 ret = btrfs_prev_leaf(root, p);
2234 if (ret < 0)
2235 return ret;
2236 if (!ret) {
Filipe David Borba Manana23c6bf62014-01-11 21:28:54 +00002237 leaf = p->nodes[0];
2238 if (p->slots[0] == btrfs_header_nritems(leaf))
2239 p->slots[0]--;
Arne Jansene6793762011-09-13 11:18:10 +02002240 return 0;
Arne Jansen2f38b3e2011-09-13 11:18:10 +02002241 }
Arne Jansene6793762011-09-13 11:18:10 +02002242 if (!return_any)
2243 return 1;
2244 /*
2245 * no lower item found, return the next
2246 * higher instead
2247 */
2248 return_any = 0;
2249 find_higher = 1;
2250 btrfs_release_path(p);
2251 goto again;
2252 } else {
Arne Jansen2f38b3e2011-09-13 11:18:10 +02002253 --p->slots[0];
2254 }
2255 }
2256 return 0;
2257}
2258
2259/*
Marcos Paulo de Souza0ff40a92021-07-29 05:22:16 -03002260 * Execute search and call btrfs_previous_item to traverse backwards if the item
2261 * was not found.
2262 *
2263 * Return 0 if found, 1 if not found and < 0 if error.
2264 */
2265int btrfs_search_backwards(struct btrfs_root *root, struct btrfs_key *key,
2266 struct btrfs_path *path)
2267{
2268 int ret;
2269
2270 ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
2271 if (ret > 0)
2272 ret = btrfs_previous_item(root, path, key->objectid, key->type);
2273
2274 if (ret == 0)
2275 btrfs_item_key_to_cpu(path->nodes[0], key, path->slots[0]);
2276
2277 return ret;
2278}
2279
2280/*
Chris Mason74123bd2007-02-02 11:05:29 -05002281 * adjust the pointers going up the tree, starting at level
2282 * making sure the right key of each node is points to 'key'.
2283 * This is used after shifting pointers to the left, so it stops
2284 * fixing up pointers when a given leaf/node is not in slot 0 of the
2285 * higher levels
Chris Masonaa5d6be2007-02-28 16:35:06 -05002286 *
Chris Mason74123bd2007-02-02 11:05:29 -05002287 */
Nikolay Borisovb167fa92018-06-20 15:48:47 +03002288static void fixup_low_keys(struct btrfs_path *path,
Jeff Mahoney143bede2012-03-01 14:56:26 +01002289 struct btrfs_disk_key *key, int level)
Chris Masonbe0e5c02007-01-26 15:51:26 -05002290{
2291 int i;
Chris Mason5f39d392007-10-15 16:14:19 -04002292 struct extent_buffer *t;
David Sterba0e82bcf2018-03-05 16:16:54 +01002293 int ret;
Chris Mason5f39d392007-10-15 16:14:19 -04002294
Chris Mason234b63a2007-03-13 10:46:10 -04002295 for (i = level; i < BTRFS_MAX_LEVEL; i++) {
Chris Masonbe0e5c02007-01-26 15:51:26 -05002296 int tslot = path->slots[i];
David Sterba0e82bcf2018-03-05 16:16:54 +01002297
Chris Masoneb60cea2007-02-02 09:18:22 -05002298 if (!path->nodes[i])
Chris Masonbe0e5c02007-01-26 15:51:26 -05002299 break;
Chris Mason5f39d392007-10-15 16:14:19 -04002300 t = path->nodes[i];
Filipe Mananaf3a84cc2021-03-11 14:31:07 +00002301 ret = btrfs_tree_mod_log_insert_key(t, tslot,
2302 BTRFS_MOD_LOG_KEY_REPLACE, GFP_ATOMIC);
David Sterba0e82bcf2018-03-05 16:16:54 +01002303 BUG_ON(ret < 0);
Chris Mason5f39d392007-10-15 16:14:19 -04002304 btrfs_set_node_key(t, key, tslot);
Chris Masond6025572007-03-30 14:27:56 -04002305 btrfs_mark_buffer_dirty(path->nodes[i]);
Chris Masonbe0e5c02007-01-26 15:51:26 -05002306 if (tslot != 0)
2307 break;
2308 }
2309}
2310
Chris Mason74123bd2007-02-02 11:05:29 -05002311/*
Zheng Yan31840ae2008-09-23 13:14:14 -04002312 * update item key.
2313 *
2314 * This function isn't completely safe. It's the caller's responsibility
2315 * that the new key won't break the order
2316 */
Daniel Dresslerb7a03652014-11-12 13:43:09 +09002317void btrfs_set_item_key_safe(struct btrfs_fs_info *fs_info,
2318 struct btrfs_path *path,
Omar Sandoval310712b2017-01-17 23:24:37 -08002319 const struct btrfs_key *new_key)
Zheng Yan31840ae2008-09-23 13:14:14 -04002320{
2321 struct btrfs_disk_key disk_key;
2322 struct extent_buffer *eb;
2323 int slot;
2324
2325 eb = path->nodes[0];
2326 slot = path->slots[0];
2327 if (slot > 0) {
2328 btrfs_item_key(eb, &disk_key, slot - 1);
Qu Wenruo7c15d412019-04-25 08:55:53 +08002329 if (unlikely(comp_keys(&disk_key, new_key) >= 0)) {
2330 btrfs_crit(fs_info,
2331 "slot %u key (%llu %u %llu) new key (%llu %u %llu)",
2332 slot, btrfs_disk_key_objectid(&disk_key),
2333 btrfs_disk_key_type(&disk_key),
2334 btrfs_disk_key_offset(&disk_key),
2335 new_key->objectid, new_key->type,
2336 new_key->offset);
2337 btrfs_print_leaf(eb);
2338 BUG();
2339 }
Zheng Yan31840ae2008-09-23 13:14:14 -04002340 }
2341 if (slot < btrfs_header_nritems(eb) - 1) {
2342 btrfs_item_key(eb, &disk_key, slot + 1);
Qu Wenruo7c15d412019-04-25 08:55:53 +08002343 if (unlikely(comp_keys(&disk_key, new_key) <= 0)) {
2344 btrfs_crit(fs_info,
2345 "slot %u key (%llu %u %llu) new key (%llu %u %llu)",
2346 slot, btrfs_disk_key_objectid(&disk_key),
2347 btrfs_disk_key_type(&disk_key),
2348 btrfs_disk_key_offset(&disk_key),
2349 new_key->objectid, new_key->type,
2350 new_key->offset);
2351 btrfs_print_leaf(eb);
2352 BUG();
2353 }
Zheng Yan31840ae2008-09-23 13:14:14 -04002354 }
2355
2356 btrfs_cpu_key_to_disk(&disk_key, new_key);
2357 btrfs_set_item_key(eb, &disk_key, slot);
2358 btrfs_mark_buffer_dirty(eb);
2359 if (slot == 0)
Nikolay Borisovb167fa92018-06-20 15:48:47 +03002360 fixup_low_keys(path, &disk_key, 1);
Zheng Yan31840ae2008-09-23 13:14:14 -04002361}
2362
2363/*
Qu Wenruod16c7022020-08-19 14:35:50 +08002364 * Check key order of two sibling extent buffers.
2365 *
2366 * Return true if something is wrong.
2367 * Return false if everything is fine.
2368 *
2369 * Tree-checker only works inside one tree block, thus the following
2370 * corruption can not be detected by tree-checker:
2371 *
2372 * Leaf @left | Leaf @right
2373 * --------------------------------------------------------------
2374 * | 1 | 2 | 3 | 4 | 5 | f6 | | 7 | 8 |
2375 *
2376 * Key f6 in leaf @left itself is valid, but not valid when the next
2377 * key in leaf @right is 7.
2378 * This can only be checked at tree block merge time.
2379 * And since tree checker has ensured all key order in each tree block
2380 * is correct, we only need to bother the last key of @left and the first
2381 * key of @right.
2382 */
2383static bool check_sibling_keys(struct extent_buffer *left,
2384 struct extent_buffer *right)
2385{
2386 struct btrfs_key left_last;
2387 struct btrfs_key right_first;
2388 int level = btrfs_header_level(left);
2389 int nr_left = btrfs_header_nritems(left);
2390 int nr_right = btrfs_header_nritems(right);
2391
2392 /* No key to check in one of the tree blocks */
2393 if (!nr_left || !nr_right)
2394 return false;
2395
2396 if (level) {
2397 btrfs_node_key_to_cpu(left, &left_last, nr_left - 1);
2398 btrfs_node_key_to_cpu(right, &right_first, 0);
2399 } else {
2400 btrfs_item_key_to_cpu(left, &left_last, nr_left - 1);
2401 btrfs_item_key_to_cpu(right, &right_first, 0);
2402 }
2403
2404 if (btrfs_comp_cpu_keys(&left_last, &right_first) >= 0) {
2405 btrfs_crit(left->fs_info,
2406"bad key order, sibling blocks, left last (%llu %u %llu) right first (%llu %u %llu)",
2407 left_last.objectid, left_last.type,
2408 left_last.offset, right_first.objectid,
2409 right_first.type, right_first.offset);
2410 return true;
2411 }
2412 return false;
2413}
2414
2415/*
Chris Mason74123bd2007-02-02 11:05:29 -05002416 * try to push data from one node into the next node left in the
Chris Mason79f95c82007-03-01 15:16:26 -05002417 * tree.
Chris Masonaa5d6be2007-02-28 16:35:06 -05002418 *
2419 * returns 0 if some ptrs were pushed left, < 0 if there was some horrible
2420 * error, and > 0 if there was no room in the left hand block.
Chris Mason74123bd2007-02-02 11:05:29 -05002421 */
Chris Mason98ed5172008-01-03 10:01:48 -05002422static int push_node_left(struct btrfs_trans_handle *trans,
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04002423 struct extent_buffer *dst,
Chris Mason971a1f62008-04-24 10:54:32 -04002424 struct extent_buffer *src, int empty)
Chris Masonbe0e5c02007-01-26 15:51:26 -05002425{
David Sterbad30a6682019-03-20 14:16:45 +01002426 struct btrfs_fs_info *fs_info = trans->fs_info;
Chris Masonbe0e5c02007-01-26 15:51:26 -05002427 int push_items = 0;
Chris Masonbb803952007-03-01 12:04:21 -05002428 int src_nritems;
2429 int dst_nritems;
Chris Masonaa5d6be2007-02-28 16:35:06 -05002430 int ret = 0;
Chris Masonbe0e5c02007-01-26 15:51:26 -05002431
Chris Mason5f39d392007-10-15 16:14:19 -04002432 src_nritems = btrfs_header_nritems(src);
2433 dst_nritems = btrfs_header_nritems(dst);
Jeff Mahoney0b246af2016-06-22 18:54:23 -04002434 push_items = BTRFS_NODEPTRS_PER_BLOCK(fs_info) - dst_nritems;
Chris Mason7bb86312007-12-11 09:25:06 -05002435 WARN_ON(btrfs_header_generation(src) != trans->transid);
2436 WARN_ON(btrfs_header_generation(dst) != trans->transid);
Chris Mason54aa1f42007-06-22 14:16:25 -04002437
Chris Masonbce4eae2008-04-24 14:42:46 -04002438 if (!empty && src_nritems <= 8)
Chris Mason971a1f62008-04-24 10:54:32 -04002439 return 1;
2440
Chris Masond3977122009-01-05 21:25:51 -05002441 if (push_items <= 0)
Chris Masonbe0e5c02007-01-26 15:51:26 -05002442 return 1;
2443
Chris Masonbce4eae2008-04-24 14:42:46 -04002444 if (empty) {
Chris Mason971a1f62008-04-24 10:54:32 -04002445 push_items = min(src_nritems, push_items);
Chris Masonbce4eae2008-04-24 14:42:46 -04002446 if (push_items < src_nritems) {
2447 /* leave at least 8 pointers in the node if
2448 * we aren't going to empty it
2449 */
2450 if (src_nritems - push_items < 8) {
2451 if (push_items <= 8)
2452 return 1;
2453 push_items -= 8;
2454 }
2455 }
2456 } else
2457 push_items = min(src_nritems - 8, push_items);
Chris Mason79f95c82007-03-01 15:16:26 -05002458
Qu Wenruod16c7022020-08-19 14:35:50 +08002459 /* dst is the left eb, src is the middle eb */
2460 if (check_sibling_keys(dst, src)) {
2461 ret = -EUCLEAN;
2462 btrfs_abort_transaction(trans, ret);
2463 return ret;
2464 }
Filipe Mananaf3a84cc2021-03-11 14:31:07 +00002465 ret = btrfs_tree_mod_log_eb_copy(dst, src, dst_nritems, 0, push_items);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +00002466 if (ret) {
Jeff Mahoney66642832016-06-10 18:19:25 -04002467 btrfs_abort_transaction(trans, ret);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +00002468 return ret;
2469 }
Chris Mason5f39d392007-10-15 16:14:19 -04002470 copy_extent_buffer(dst, src,
2471 btrfs_node_key_ptr_offset(dst_nritems),
2472 btrfs_node_key_ptr_offset(0),
Chris Masond3977122009-01-05 21:25:51 -05002473 push_items * sizeof(struct btrfs_key_ptr));
Chris Mason5f39d392007-10-15 16:14:19 -04002474
Chris Masonbb803952007-03-01 12:04:21 -05002475 if (push_items < src_nritems) {
Jan Schmidt57911b82012-10-19 09:22:03 +02002476 /*
Filipe Mananaf3a84cc2021-03-11 14:31:07 +00002477 * Don't call btrfs_tree_mod_log_insert_move() here, key removal
2478 * was already fully logged by btrfs_tree_mod_log_eb_copy() above.
Jan Schmidt57911b82012-10-19 09:22:03 +02002479 */
Chris Mason5f39d392007-10-15 16:14:19 -04002480 memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0),
2481 btrfs_node_key_ptr_offset(push_items),
2482 (src_nritems - push_items) *
2483 sizeof(struct btrfs_key_ptr));
Chris Masonbb803952007-03-01 12:04:21 -05002484 }
Chris Mason5f39d392007-10-15 16:14:19 -04002485 btrfs_set_header_nritems(src, src_nritems - push_items);
2486 btrfs_set_header_nritems(dst, dst_nritems + push_items);
2487 btrfs_mark_buffer_dirty(src);
2488 btrfs_mark_buffer_dirty(dst);
Zheng Yan31840ae2008-09-23 13:14:14 -04002489
Chris Masonbb803952007-03-01 12:04:21 -05002490 return ret;
Chris Masonbe0e5c02007-01-26 15:51:26 -05002491}
2492
Chris Mason97571fd2007-02-24 13:39:08 -05002493/*
Chris Mason79f95c82007-03-01 15:16:26 -05002494 * try to push data from one node into the next node right in the
2495 * tree.
2496 *
2497 * returns 0 if some ptrs were pushed, < 0 if there was some horrible
2498 * error, and > 0 if there was no room in the right hand block.
2499 *
2500 * this will only push up to 1/2 the contents of the left node over
2501 */
Chris Mason5f39d392007-10-15 16:14:19 -04002502static int balance_node_right(struct btrfs_trans_handle *trans,
Chris Mason5f39d392007-10-15 16:14:19 -04002503 struct extent_buffer *dst,
2504 struct extent_buffer *src)
Chris Mason79f95c82007-03-01 15:16:26 -05002505{
David Sterba55d32ed2019-03-20 14:18:06 +01002506 struct btrfs_fs_info *fs_info = trans->fs_info;
Chris Mason79f95c82007-03-01 15:16:26 -05002507 int push_items = 0;
2508 int max_push;
2509 int src_nritems;
2510 int dst_nritems;
2511 int ret = 0;
Chris Mason79f95c82007-03-01 15:16:26 -05002512
Chris Mason7bb86312007-12-11 09:25:06 -05002513 WARN_ON(btrfs_header_generation(src) != trans->transid);
2514 WARN_ON(btrfs_header_generation(dst) != trans->transid);
2515
Chris Mason5f39d392007-10-15 16:14:19 -04002516 src_nritems = btrfs_header_nritems(src);
2517 dst_nritems = btrfs_header_nritems(dst);
Jeff Mahoney0b246af2016-06-22 18:54:23 -04002518 push_items = BTRFS_NODEPTRS_PER_BLOCK(fs_info) - dst_nritems;
Chris Masond3977122009-01-05 21:25:51 -05002519 if (push_items <= 0)
Chris Mason79f95c82007-03-01 15:16:26 -05002520 return 1;
Chris Masonbce4eae2008-04-24 14:42:46 -04002521
Chris Masond3977122009-01-05 21:25:51 -05002522 if (src_nritems < 4)
Chris Masonbce4eae2008-04-24 14:42:46 -04002523 return 1;
Chris Mason79f95c82007-03-01 15:16:26 -05002524
2525 max_push = src_nritems / 2 + 1;
2526 /* don't try to empty the node */
Chris Masond3977122009-01-05 21:25:51 -05002527 if (max_push >= src_nritems)
Chris Mason79f95c82007-03-01 15:16:26 -05002528 return 1;
Yan252c38f2007-08-29 09:11:44 -04002529
Chris Mason79f95c82007-03-01 15:16:26 -05002530 if (max_push < push_items)
2531 push_items = max_push;
2532
Qu Wenruod16c7022020-08-19 14:35:50 +08002533 /* dst is the right eb, src is the middle eb */
2534 if (check_sibling_keys(src, dst)) {
2535 ret = -EUCLEAN;
2536 btrfs_abort_transaction(trans, ret);
2537 return ret;
2538 }
Filipe Mananaf3a84cc2021-03-11 14:31:07 +00002539 ret = btrfs_tree_mod_log_insert_move(dst, push_items, 0, dst_nritems);
David Sterbabf1d3422018-03-05 15:47:39 +01002540 BUG_ON(ret < 0);
Chris Mason5f39d392007-10-15 16:14:19 -04002541 memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(push_items),
2542 btrfs_node_key_ptr_offset(0),
2543 (dst_nritems) *
2544 sizeof(struct btrfs_key_ptr));
Chris Masond6025572007-03-30 14:27:56 -04002545
Filipe Mananaf3a84cc2021-03-11 14:31:07 +00002546 ret = btrfs_tree_mod_log_eb_copy(dst, src, 0, src_nritems - push_items,
2547 push_items);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +00002548 if (ret) {
Jeff Mahoney66642832016-06-10 18:19:25 -04002549 btrfs_abort_transaction(trans, ret);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +00002550 return ret;
2551 }
Chris Mason5f39d392007-10-15 16:14:19 -04002552 copy_extent_buffer(dst, src,
2553 btrfs_node_key_ptr_offset(0),
2554 btrfs_node_key_ptr_offset(src_nritems - push_items),
Chris Masond3977122009-01-05 21:25:51 -05002555 push_items * sizeof(struct btrfs_key_ptr));
Chris Mason79f95c82007-03-01 15:16:26 -05002556
Chris Mason5f39d392007-10-15 16:14:19 -04002557 btrfs_set_header_nritems(src, src_nritems - push_items);
2558 btrfs_set_header_nritems(dst, dst_nritems + push_items);
Chris Mason79f95c82007-03-01 15:16:26 -05002559
Chris Mason5f39d392007-10-15 16:14:19 -04002560 btrfs_mark_buffer_dirty(src);
2561 btrfs_mark_buffer_dirty(dst);
Zheng Yan31840ae2008-09-23 13:14:14 -04002562
Chris Mason79f95c82007-03-01 15:16:26 -05002563 return ret;
2564}
2565
2566/*
Chris Mason97571fd2007-02-24 13:39:08 -05002567 * helper function to insert a new root level in the tree.
2568 * A new node is allocated, and a single item is inserted to
2569 * point to the existing root
Chris Masonaa5d6be2007-02-28 16:35:06 -05002570 *
2571 * returns zero on success or < 0 on failure.
Chris Mason97571fd2007-02-24 13:39:08 -05002572 */
Chris Masond3977122009-01-05 21:25:51 -05002573static noinline int insert_new_root(struct btrfs_trans_handle *trans,
Chris Mason5f39d392007-10-15 16:14:19 -04002574 struct btrfs_root *root,
Liu Bofdd99c72013-05-22 12:06:51 +00002575 struct btrfs_path *path, int level)
Chris Mason5c680ed2007-02-22 11:39:13 -05002576{
Jeff Mahoney0b246af2016-06-22 18:54:23 -04002577 struct btrfs_fs_info *fs_info = root->fs_info;
Chris Mason7bb86312007-12-11 09:25:06 -05002578 u64 lower_gen;
Chris Mason5f39d392007-10-15 16:14:19 -04002579 struct extent_buffer *lower;
2580 struct extent_buffer *c;
Chris Mason925baed2008-06-25 16:01:30 -04002581 struct extent_buffer *old;
Chris Mason5f39d392007-10-15 16:14:19 -04002582 struct btrfs_disk_key lower_key;
David Sterbad9d19a02018-03-05 16:35:29 +01002583 int ret;
Chris Mason5c680ed2007-02-22 11:39:13 -05002584
2585 BUG_ON(path->nodes[level]);
2586 BUG_ON(path->nodes[level-1] != root->node);
2587
Chris Mason7bb86312007-12-11 09:25:06 -05002588 lower = path->nodes[level-1];
2589 if (level == 1)
2590 btrfs_item_key(lower, &lower_key, 0);
2591 else
2592 btrfs_node_key(lower, &lower_key, 0);
2593
Filipe Manana79bd3712021-06-29 14:43:06 +01002594 c = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid,
2595 &lower_key, level, root->node->start, 0,
2596 BTRFS_NESTING_NEW_ROOT);
Chris Mason5f39d392007-10-15 16:14:19 -04002597 if (IS_ERR(c))
2598 return PTR_ERR(c);
Chris Mason925baed2008-06-25 16:01:30 -04002599
Jeff Mahoney0b246af2016-06-22 18:54:23 -04002600 root_add_used(root, fs_info->nodesize);
Yan, Zhengf0486c62010-05-16 10:46:25 -04002601
Chris Mason5f39d392007-10-15 16:14:19 -04002602 btrfs_set_header_nritems(c, 1);
Chris Mason5f39d392007-10-15 16:14:19 -04002603 btrfs_set_node_key(c, &lower_key, 0);
Chris Masondb945352007-10-15 16:15:53 -04002604 btrfs_set_node_blockptr(c, 0, lower->start);
Chris Mason7bb86312007-12-11 09:25:06 -05002605 lower_gen = btrfs_header_generation(lower);
Zheng Yan31840ae2008-09-23 13:14:14 -04002606 WARN_ON(lower_gen != trans->transid);
Chris Mason7bb86312007-12-11 09:25:06 -05002607
2608 btrfs_set_node_ptr_generation(c, 0, lower_gen);
Chris Mason5f39d392007-10-15 16:14:19 -04002609
2610 btrfs_mark_buffer_dirty(c);
Chris Masond5719762007-03-23 10:01:08 -04002611
Chris Mason925baed2008-06-25 16:01:30 -04002612 old = root->node;
Filipe Manana406808a2021-03-11 14:31:08 +00002613 ret = btrfs_tree_mod_log_insert_root(root->node, c, false);
David Sterbad9d19a02018-03-05 16:35:29 +01002614 BUG_ON(ret < 0);
Chris Mason240f62c2011-03-23 14:54:42 -04002615 rcu_assign_pointer(root->node, c);
Chris Mason925baed2008-06-25 16:01:30 -04002616
2617 /* the super has an extra ref to root->node */
2618 free_extent_buffer(old);
2619
Chris Mason0b86a832008-03-24 15:01:56 -04002620 add_root_to_dirty_list(root);
David Sterba67439da2019-10-08 13:28:47 +02002621 atomic_inc(&c->refs);
Chris Mason5f39d392007-10-15 16:14:19 -04002622 path->nodes[level] = c;
Josef Bacikac5887c2020-08-20 11:46:10 -04002623 path->locks[level] = BTRFS_WRITE_LOCK;
Chris Mason5c680ed2007-02-22 11:39:13 -05002624 path->slots[level] = 0;
2625 return 0;
2626}
2627
Chris Mason74123bd2007-02-02 11:05:29 -05002628/*
2629 * worker function to insert a single pointer in a node.
2630 * the node should have enough room for the pointer already
Chris Mason97571fd2007-02-24 13:39:08 -05002631 *
Chris Mason74123bd2007-02-02 11:05:29 -05002632 * slot and level indicate where you want the key to go, and
2633 * blocknr is the block the key points to.
2634 */
Jeff Mahoney143bede2012-03-01 14:56:26 +01002635static void insert_ptr(struct btrfs_trans_handle *trans,
David Sterba6ad3cf62019-03-20 14:32:45 +01002636 struct btrfs_path *path,
Jeff Mahoney143bede2012-03-01 14:56:26 +01002637 struct btrfs_disk_key *key, u64 bytenr,
Jan Schmidtc3e06962012-06-21 11:01:06 +02002638 int slot, int level)
Chris Mason74123bd2007-02-02 11:05:29 -05002639{
Chris Mason5f39d392007-10-15 16:14:19 -04002640 struct extent_buffer *lower;
Chris Mason74123bd2007-02-02 11:05:29 -05002641 int nritems;
Jan Schmidtf3ea38d2012-05-26 11:45:21 +02002642 int ret;
Chris Mason5c680ed2007-02-22 11:39:13 -05002643
2644 BUG_ON(!path->nodes[level]);
Filipe Manana49d0c642021-09-22 10:36:45 +01002645 btrfs_assert_tree_write_locked(path->nodes[level]);
Chris Mason5f39d392007-10-15 16:14:19 -04002646 lower = path->nodes[level];
2647 nritems = btrfs_header_nritems(lower);
Stoyan Gaydarovc2934982009-04-02 17:05:11 -04002648 BUG_ON(slot > nritems);
David Sterba6ad3cf62019-03-20 14:32:45 +01002649 BUG_ON(nritems == BTRFS_NODEPTRS_PER_BLOCK(trans->fs_info));
Chris Mason74123bd2007-02-02 11:05:29 -05002650 if (slot != nritems) {
David Sterbabf1d3422018-03-05 15:47:39 +01002651 if (level) {
Filipe Mananaf3a84cc2021-03-11 14:31:07 +00002652 ret = btrfs_tree_mod_log_insert_move(lower, slot + 1,
2653 slot, nritems - slot);
David Sterbabf1d3422018-03-05 15:47:39 +01002654 BUG_ON(ret < 0);
2655 }
Chris Mason5f39d392007-10-15 16:14:19 -04002656 memmove_extent_buffer(lower,
2657 btrfs_node_key_ptr_offset(slot + 1),
2658 btrfs_node_key_ptr_offset(slot),
Chris Masond6025572007-03-30 14:27:56 -04002659 (nritems - slot) * sizeof(struct btrfs_key_ptr));
Chris Mason74123bd2007-02-02 11:05:29 -05002660 }
Jan Schmidtc3e06962012-06-21 11:01:06 +02002661 if (level) {
Filipe Mananaf3a84cc2021-03-11 14:31:07 +00002662 ret = btrfs_tree_mod_log_insert_key(lower, slot,
2663 BTRFS_MOD_LOG_KEY_ADD, GFP_NOFS);
Jan Schmidtf3ea38d2012-05-26 11:45:21 +02002664 BUG_ON(ret < 0);
2665 }
Chris Mason5f39d392007-10-15 16:14:19 -04002666 btrfs_set_node_key(lower, key, slot);
Chris Masondb945352007-10-15 16:15:53 -04002667 btrfs_set_node_blockptr(lower, slot, bytenr);
Chris Mason74493f72007-12-11 09:25:06 -05002668 WARN_ON(trans->transid == 0);
2669 btrfs_set_node_ptr_generation(lower, slot, trans->transid);
Chris Mason5f39d392007-10-15 16:14:19 -04002670 btrfs_set_header_nritems(lower, nritems + 1);
2671 btrfs_mark_buffer_dirty(lower);
Chris Mason74123bd2007-02-02 11:05:29 -05002672}
2673
Chris Mason97571fd2007-02-24 13:39:08 -05002674/*
2675 * split the node at the specified level in path in two.
2676 * The path is corrected to point to the appropriate node after the split
2677 *
2678 * Before splitting this tries to make some room in the node by pushing
2679 * left and right, if either one works, it returns right away.
Chris Masonaa5d6be2007-02-28 16:35:06 -05002680 *
2681 * returns 0 on success and < 0 on failure
Chris Mason97571fd2007-02-24 13:39:08 -05002682 */
Chris Masone02119d2008-09-05 16:13:11 -04002683static noinline int split_node(struct btrfs_trans_handle *trans,
2684 struct btrfs_root *root,
2685 struct btrfs_path *path, int level)
Chris Masonbe0e5c02007-01-26 15:51:26 -05002686{
Jeff Mahoney0b246af2016-06-22 18:54:23 -04002687 struct btrfs_fs_info *fs_info = root->fs_info;
Chris Mason5f39d392007-10-15 16:14:19 -04002688 struct extent_buffer *c;
2689 struct extent_buffer *split;
2690 struct btrfs_disk_key disk_key;
Chris Masonbe0e5c02007-01-26 15:51:26 -05002691 int mid;
Chris Mason5c680ed2007-02-22 11:39:13 -05002692 int ret;
Chris Mason7518a232007-03-12 12:01:18 -04002693 u32 c_nritems;
Chris Masonbe0e5c02007-01-26 15:51:26 -05002694
Chris Mason5f39d392007-10-15 16:14:19 -04002695 c = path->nodes[level];
Chris Mason7bb86312007-12-11 09:25:06 -05002696 WARN_ON(btrfs_header_generation(c) != trans->transid);
Chris Mason5f39d392007-10-15 16:14:19 -04002697 if (c == root->node) {
Jan Schmidtd9abbf12013-03-20 13:49:48 +00002698 /*
Jan Schmidt90f8d622013-04-13 13:19:53 +00002699 * trying to split the root, lets make a new one
2700 *
Liu Bofdd99c72013-05-22 12:06:51 +00002701 * tree mod log: We don't log_removal old root in
Jan Schmidt90f8d622013-04-13 13:19:53 +00002702 * insert_new_root, because that root buffer will be kept as a
2703 * normal node. We are going to log removal of half of the
Filipe Mananaf3a84cc2021-03-11 14:31:07 +00002704 * elements below with btrfs_tree_mod_log_eb_copy(). We're
2705 * holding a tree lock on the buffer, which is why we cannot
2706 * race with other tree_mod_log users.
Jan Schmidtd9abbf12013-03-20 13:49:48 +00002707 */
Liu Bofdd99c72013-05-22 12:06:51 +00002708 ret = insert_new_root(trans, root, path, level + 1);
Chris Mason5c680ed2007-02-22 11:39:13 -05002709 if (ret)
2710 return ret;
Chris Masonb3612422009-05-13 19:12:15 -04002711 } else {
Chris Masone66f7092007-04-20 13:16:02 -04002712 ret = push_nodes_for_insert(trans, root, path, level);
Chris Mason5f39d392007-10-15 16:14:19 -04002713 c = path->nodes[level];
2714 if (!ret && btrfs_header_nritems(c) <
Jeff Mahoney0b246af2016-06-22 18:54:23 -04002715 BTRFS_NODEPTRS_PER_BLOCK(fs_info) - 3)
Chris Masone66f7092007-04-20 13:16:02 -04002716 return 0;
Chris Mason54aa1f42007-06-22 14:16:25 -04002717 if (ret < 0)
2718 return ret;
Chris Masonbe0e5c02007-01-26 15:51:26 -05002719 }
Chris Masone66f7092007-04-20 13:16:02 -04002720
Chris Mason5f39d392007-10-15 16:14:19 -04002721 c_nritems = btrfs_header_nritems(c);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002722 mid = (c_nritems + 1) / 2;
2723 btrfs_node_key(c, &disk_key, mid);
Chris Mason7bb86312007-12-11 09:25:06 -05002724
Filipe Manana79bd3712021-06-29 14:43:06 +01002725 split = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid,
2726 &disk_key, level, c->start, 0,
2727 BTRFS_NESTING_SPLIT);
Chris Mason5f39d392007-10-15 16:14:19 -04002728 if (IS_ERR(split))
2729 return PTR_ERR(split);
Chris Mason54aa1f42007-06-22 14:16:25 -04002730
Jeff Mahoney0b246af2016-06-22 18:54:23 -04002731 root_add_used(root, fs_info->nodesize);
Nikolay Borisovbc877d22018-06-18 14:13:19 +03002732 ASSERT(btrfs_header_level(c) == level);
Chris Mason5f39d392007-10-15 16:14:19 -04002733
Filipe Mananaf3a84cc2021-03-11 14:31:07 +00002734 ret = btrfs_tree_mod_log_eb_copy(split, c, 0, mid, c_nritems - mid);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +00002735 if (ret) {
Jeff Mahoney66642832016-06-10 18:19:25 -04002736 btrfs_abort_transaction(trans, ret);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +00002737 return ret;
2738 }
Chris Mason5f39d392007-10-15 16:14:19 -04002739 copy_extent_buffer(split, c,
2740 btrfs_node_key_ptr_offset(0),
2741 btrfs_node_key_ptr_offset(mid),
2742 (c_nritems - mid) * sizeof(struct btrfs_key_ptr));
2743 btrfs_set_header_nritems(split, c_nritems - mid);
2744 btrfs_set_header_nritems(c, mid);
Chris Masonaa5d6be2007-02-28 16:35:06 -05002745
Chris Mason5f39d392007-10-15 16:14:19 -04002746 btrfs_mark_buffer_dirty(c);
2747 btrfs_mark_buffer_dirty(split);
2748
David Sterba6ad3cf62019-03-20 14:32:45 +01002749 insert_ptr(trans, path, &disk_key, split->start,
Jan Schmidtc3e06962012-06-21 11:01:06 +02002750 path->slots[level + 1] + 1, level + 1);
Chris Masonaa5d6be2007-02-28 16:35:06 -05002751
Chris Mason5de08d72007-02-24 06:24:44 -05002752 if (path->slots[level] >= mid) {
Chris Mason5c680ed2007-02-22 11:39:13 -05002753 path->slots[level] -= mid;
Chris Mason925baed2008-06-25 16:01:30 -04002754 btrfs_tree_unlock(c);
Chris Mason5f39d392007-10-15 16:14:19 -04002755 free_extent_buffer(c);
2756 path->nodes[level] = split;
Chris Mason5c680ed2007-02-22 11:39:13 -05002757 path->slots[level + 1] += 1;
2758 } else {
Chris Mason925baed2008-06-25 16:01:30 -04002759 btrfs_tree_unlock(split);
Chris Mason5f39d392007-10-15 16:14:19 -04002760 free_extent_buffer(split);
Chris Masonbe0e5c02007-01-26 15:51:26 -05002761 }
Nikolay Borisovd5286a922020-11-12 13:24:02 +02002762 return 0;
Chris Masonbe0e5c02007-01-26 15:51:26 -05002763}
2764
Chris Mason74123bd2007-02-02 11:05:29 -05002765/*
2766 * how many bytes are required to store the items in a leaf. start
2767 * and nr indicate which items in the leaf to check. This totals up the
2768 * space used both by the item structs and the item data
2769 */
Chris Mason5f39d392007-10-15 16:14:19 -04002770static int leaf_space_used(struct extent_buffer *l, int start, int nr)
Chris Masonbe0e5c02007-01-26 15:51:26 -05002771{
2772 int data_len;
Chris Mason5f39d392007-10-15 16:14:19 -04002773 int nritems = btrfs_header_nritems(l);
Chris Masond4dbff92007-04-04 14:08:15 -04002774 int end = min(nritems, start + nr) - 1;
Chris Masonbe0e5c02007-01-26 15:51:26 -05002775
2776 if (!nr)
2777 return 0;
Josef Bacik3212fa12021-10-21 14:58:35 -04002778 data_len = btrfs_item_offset(l, start) + btrfs_item_size(l, start);
2779 data_len = data_len - btrfs_item_offset(l, end);
Chris Mason0783fcf2007-03-12 20:12:07 -04002780 data_len += sizeof(struct btrfs_item) * nr;
Chris Masond4dbff92007-04-04 14:08:15 -04002781 WARN_ON(data_len < 0);
Chris Masonbe0e5c02007-01-26 15:51:26 -05002782 return data_len;
2783}
2784
Chris Mason74123bd2007-02-02 11:05:29 -05002785/*
Chris Masond4dbff92007-04-04 14:08:15 -04002786 * The space between the end of the leaf items and
2787 * the start of the leaf data. IOW, how much room
2788 * the leaf has left for both items and data
2789 */
David Sterbae902baa2019-03-20 14:36:46 +01002790noinline int btrfs_leaf_free_space(struct extent_buffer *leaf)
Chris Masond4dbff92007-04-04 14:08:15 -04002791{
David Sterbae902baa2019-03-20 14:36:46 +01002792 struct btrfs_fs_info *fs_info = leaf->fs_info;
Chris Mason5f39d392007-10-15 16:14:19 -04002793 int nritems = btrfs_header_nritems(leaf);
2794 int ret;
Jeff Mahoney0b246af2016-06-22 18:54:23 -04002795
2796 ret = BTRFS_LEAF_DATA_SIZE(fs_info) - leaf_space_used(leaf, 0, nritems);
Chris Mason5f39d392007-10-15 16:14:19 -04002797 if (ret < 0) {
Jeff Mahoney0b246af2016-06-22 18:54:23 -04002798 btrfs_crit(fs_info,
2799 "leaf free space ret %d, leaf data size %lu, used %d nritems %d",
2800 ret,
2801 (unsigned long) BTRFS_LEAF_DATA_SIZE(fs_info),
2802 leaf_space_used(leaf, 0, nritems), nritems);
Chris Mason5f39d392007-10-15 16:14:19 -04002803 }
2804 return ret;
Chris Masond4dbff92007-04-04 14:08:15 -04002805}
2806
Chris Mason99d8f832010-07-07 10:51:48 -04002807/*
2808 * min slot controls the lowest index we're willing to push to the
2809 * right. We'll push up to and including min_slot, but no lower
2810 */
David Sterbaf72f0012019-03-20 14:39:45 +01002811static noinline int __push_leaf_right(struct btrfs_path *path,
Chris Mason44871b12009-03-13 10:04:31 -04002812 int data_size, int empty,
2813 struct extent_buffer *right,
Chris Mason99d8f832010-07-07 10:51:48 -04002814 int free_space, u32 left_nritems,
2815 u32 min_slot)
Chris Mason00ec4c52007-02-24 12:47:20 -05002816{
David Sterbaf72f0012019-03-20 14:39:45 +01002817 struct btrfs_fs_info *fs_info = right->fs_info;
Chris Mason5f39d392007-10-15 16:14:19 -04002818 struct extent_buffer *left = path->nodes[0];
Chris Mason44871b12009-03-13 10:04:31 -04002819 struct extent_buffer *upper = path->nodes[1];
Chris Masoncfed81a2012-03-03 07:40:03 -05002820 struct btrfs_map_token token;
Chris Mason5f39d392007-10-15 16:14:19 -04002821 struct btrfs_disk_key disk_key;
Chris Mason00ec4c52007-02-24 12:47:20 -05002822 int slot;
Chris Mason34a38212007-11-07 13:31:03 -05002823 u32 i;
Chris Mason00ec4c52007-02-24 12:47:20 -05002824 int push_space = 0;
2825 int push_items = 0;
Chris Mason34a38212007-11-07 13:31:03 -05002826 u32 nr;
Chris Mason7518a232007-03-12 12:01:18 -04002827 u32 right_nritems;
Chris Mason5f39d392007-10-15 16:14:19 -04002828 u32 data_end;
Chris Masondb945352007-10-15 16:15:53 -04002829 u32 this_item_size;
Chris Mason00ec4c52007-02-24 12:47:20 -05002830
Chris Mason34a38212007-11-07 13:31:03 -05002831 if (empty)
2832 nr = 0;
2833 else
Chris Mason99d8f832010-07-07 10:51:48 -04002834 nr = max_t(u32, 1, min_slot);
Chris Mason34a38212007-11-07 13:31:03 -05002835
Zheng Yan31840ae2008-09-23 13:14:14 -04002836 if (path->slots[0] >= left_nritems)
Yan Zheng87b29b22008-12-17 10:21:48 -05002837 push_space += data_size;
Zheng Yan31840ae2008-09-23 13:14:14 -04002838
Chris Mason44871b12009-03-13 10:04:31 -04002839 slot = path->slots[1];
Chris Mason34a38212007-11-07 13:31:03 -05002840 i = left_nritems - 1;
2841 while (i >= nr) {
Zheng Yan31840ae2008-09-23 13:14:14 -04002842 if (!empty && push_items > 0) {
2843 if (path->slots[0] > i)
2844 break;
2845 if (path->slots[0] == i) {
David Sterbae902baa2019-03-20 14:36:46 +01002846 int space = btrfs_leaf_free_space(left);
2847
Zheng Yan31840ae2008-09-23 13:14:14 -04002848 if (space + push_space * 2 > free_space)
2849 break;
2850 }
2851 }
2852
Chris Mason00ec4c52007-02-24 12:47:20 -05002853 if (path->slots[0] == i)
Yan Zheng87b29b22008-12-17 10:21:48 -05002854 push_space += data_size;
Chris Masondb945352007-10-15 16:15:53 -04002855
Josef Bacik3212fa12021-10-21 14:58:35 -04002856 this_item_size = btrfs_item_size(left, i);
Josef Bacik74794202021-10-21 14:58:34 -04002857 if (this_item_size + sizeof(struct btrfs_item) +
2858 push_space > free_space)
Chris Mason00ec4c52007-02-24 12:47:20 -05002859 break;
Zheng Yan31840ae2008-09-23 13:14:14 -04002860
Chris Mason00ec4c52007-02-24 12:47:20 -05002861 push_items++;
Josef Bacik74794202021-10-21 14:58:34 -04002862 push_space += this_item_size + sizeof(struct btrfs_item);
Chris Mason34a38212007-11-07 13:31:03 -05002863 if (i == 0)
2864 break;
2865 i--;
Chris Masondb945352007-10-15 16:15:53 -04002866 }
Chris Mason5f39d392007-10-15 16:14:19 -04002867
Chris Mason925baed2008-06-25 16:01:30 -04002868 if (push_items == 0)
2869 goto out_unlock;
Chris Mason5f39d392007-10-15 16:14:19 -04002870
Julia Lawall6c1500f2012-11-03 20:30:18 +00002871 WARN_ON(!empty && push_items == left_nritems);
Chris Mason5f39d392007-10-15 16:14:19 -04002872
Chris Mason00ec4c52007-02-24 12:47:20 -05002873 /* push left to right */
Chris Mason5f39d392007-10-15 16:14:19 -04002874 right_nritems = btrfs_header_nritems(right);
Chris Mason34a38212007-11-07 13:31:03 -05002875
Josef Bacikdc2e7242021-10-21 14:58:37 -04002876 push_space = btrfs_item_data_end(left, left_nritems - push_items);
David Sterba8f881e82019-03-20 11:33:10 +01002877 push_space -= leaf_data_end(left);
Chris Mason5f39d392007-10-15 16:14:19 -04002878
Chris Mason00ec4c52007-02-24 12:47:20 -05002879 /* make room in the right data area */
David Sterba8f881e82019-03-20 11:33:10 +01002880 data_end = leaf_data_end(right);
Chris Mason5f39d392007-10-15 16:14:19 -04002881 memmove_extent_buffer(right,
Nikolay Borisov3d9ec8c2017-05-29 09:43:43 +03002882 BTRFS_LEAF_DATA_OFFSET + data_end - push_space,
2883 BTRFS_LEAF_DATA_OFFSET + data_end,
Jeff Mahoney0b246af2016-06-22 18:54:23 -04002884 BTRFS_LEAF_DATA_SIZE(fs_info) - data_end);
Chris Mason5f39d392007-10-15 16:14:19 -04002885
Chris Mason00ec4c52007-02-24 12:47:20 -05002886 /* copy from the left data area */
Nikolay Borisov3d9ec8c2017-05-29 09:43:43 +03002887 copy_extent_buffer(right, left, BTRFS_LEAF_DATA_OFFSET +
Jeff Mahoney0b246af2016-06-22 18:54:23 -04002888 BTRFS_LEAF_DATA_SIZE(fs_info) - push_space,
David Sterba8f881e82019-03-20 11:33:10 +01002889 BTRFS_LEAF_DATA_OFFSET + leaf_data_end(left),
Chris Masond6025572007-03-30 14:27:56 -04002890 push_space);
Chris Mason5f39d392007-10-15 16:14:19 -04002891
2892 memmove_extent_buffer(right, btrfs_item_nr_offset(push_items),
2893 btrfs_item_nr_offset(0),
2894 right_nritems * sizeof(struct btrfs_item));
2895
Chris Mason00ec4c52007-02-24 12:47:20 -05002896 /* copy the items from left to right */
Chris Mason5f39d392007-10-15 16:14:19 -04002897 copy_extent_buffer(right, left, btrfs_item_nr_offset(0),
2898 btrfs_item_nr_offset(left_nritems - push_items),
2899 push_items * sizeof(struct btrfs_item));
Chris Mason00ec4c52007-02-24 12:47:20 -05002900
2901 /* update the item pointers */
David Sterbac82f8232019-08-09 17:48:21 +02002902 btrfs_init_map_token(&token, right);
Chris Mason7518a232007-03-12 12:01:18 -04002903 right_nritems += push_items;
Chris Mason5f39d392007-10-15 16:14:19 -04002904 btrfs_set_header_nritems(right, right_nritems);
Jeff Mahoney0b246af2016-06-22 18:54:23 -04002905 push_space = BTRFS_LEAF_DATA_SIZE(fs_info);
Chris Mason7518a232007-03-12 12:01:18 -04002906 for (i = 0; i < right_nritems; i++) {
Josef Bacik3212fa12021-10-21 14:58:35 -04002907 push_space -= btrfs_token_item_size(&token, i);
2908 btrfs_set_token_item_offset(&token, i, push_space);
Chris Masondb945352007-10-15 16:15:53 -04002909 }
2910
Chris Mason7518a232007-03-12 12:01:18 -04002911 left_nritems -= push_items;
Chris Mason5f39d392007-10-15 16:14:19 -04002912 btrfs_set_header_nritems(left, left_nritems);
Chris Mason00ec4c52007-02-24 12:47:20 -05002913
Chris Mason34a38212007-11-07 13:31:03 -05002914 if (left_nritems)
2915 btrfs_mark_buffer_dirty(left);
Yan, Zhengf0486c62010-05-16 10:46:25 -04002916 else
David Sterba6a884d7d2019-03-20 14:30:02 +01002917 btrfs_clean_tree_block(left);
Yan, Zhengf0486c62010-05-16 10:46:25 -04002918
Chris Mason5f39d392007-10-15 16:14:19 -04002919 btrfs_mark_buffer_dirty(right);
Chris Masona429e512007-04-18 16:15:28 -04002920
Chris Mason5f39d392007-10-15 16:14:19 -04002921 btrfs_item_key(right, &disk_key, 0);
2922 btrfs_set_node_key(upper, &disk_key, slot + 1);
Chris Masond6025572007-03-30 14:27:56 -04002923 btrfs_mark_buffer_dirty(upper);
Chris Mason02217ed2007-03-02 16:08:05 -05002924
Chris Mason00ec4c52007-02-24 12:47:20 -05002925 /* then fixup the leaf pointer in the path */
Chris Mason7518a232007-03-12 12:01:18 -04002926 if (path->slots[0] >= left_nritems) {
2927 path->slots[0] -= left_nritems;
Chris Mason925baed2008-06-25 16:01:30 -04002928 if (btrfs_header_nritems(path->nodes[0]) == 0)
David Sterba6a884d7d2019-03-20 14:30:02 +01002929 btrfs_clean_tree_block(path->nodes[0]);
Chris Mason925baed2008-06-25 16:01:30 -04002930 btrfs_tree_unlock(path->nodes[0]);
Chris Mason5f39d392007-10-15 16:14:19 -04002931 free_extent_buffer(path->nodes[0]);
2932 path->nodes[0] = right;
Chris Mason00ec4c52007-02-24 12:47:20 -05002933 path->slots[1] += 1;
2934 } else {
Chris Mason925baed2008-06-25 16:01:30 -04002935 btrfs_tree_unlock(right);
Chris Mason5f39d392007-10-15 16:14:19 -04002936 free_extent_buffer(right);
Chris Mason00ec4c52007-02-24 12:47:20 -05002937 }
2938 return 0;
Chris Mason925baed2008-06-25 16:01:30 -04002939
2940out_unlock:
2941 btrfs_tree_unlock(right);
2942 free_extent_buffer(right);
2943 return 1;
Chris Mason00ec4c52007-02-24 12:47:20 -05002944}
Chris Mason925baed2008-06-25 16:01:30 -04002945
Chris Mason00ec4c52007-02-24 12:47:20 -05002946/*
Chris Mason44871b12009-03-13 10:04:31 -04002947 * push some data in the path leaf to the right, trying to free up at
2948 * least data_size bytes. returns zero if the push worked, nonzero otherwise
2949 *
2950 * returns 1 if the push failed because the other node didn't have enough
2951 * room, 0 if everything worked out and < 0 if there were major errors.
Chris Mason99d8f832010-07-07 10:51:48 -04002952 *
2953 * this will push starting from min_slot to the end of the leaf. It won't
2954 * push any slot lower than min_slot
Chris Mason44871b12009-03-13 10:04:31 -04002955 */
2956static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
Chris Mason99d8f832010-07-07 10:51:48 -04002957 *root, struct btrfs_path *path,
2958 int min_data_size, int data_size,
2959 int empty, u32 min_slot)
Chris Mason44871b12009-03-13 10:04:31 -04002960{
2961 struct extent_buffer *left = path->nodes[0];
2962 struct extent_buffer *right;
2963 struct extent_buffer *upper;
2964 int slot;
2965 int free_space;
2966 u32 left_nritems;
2967 int ret;
2968
2969 if (!path->nodes[1])
2970 return 1;
2971
2972 slot = path->slots[1];
2973 upper = path->nodes[1];
2974 if (slot >= btrfs_header_nritems(upper) - 1)
2975 return 1;
2976
Filipe Manana49d0c642021-09-22 10:36:45 +01002977 btrfs_assert_tree_write_locked(path->nodes[1]);
Chris Mason44871b12009-03-13 10:04:31 -04002978
David Sterba4b231ae2019-08-21 19:16:27 +02002979 right = btrfs_read_node_slot(upper, slot + 1);
Liu Bofb770ae2016-07-05 12:10:14 -07002980 /*
2981 * slot + 1 is not valid or we fail to read the right node,
2982 * no big deal, just return.
2983 */
2984 if (IS_ERR(right))
Tsutomu Itoh91ca3382011-01-05 02:32:22 +00002985 return 1;
2986
Josef Bacikbf774672020-08-20 11:46:04 -04002987 __btrfs_tree_lock(right, BTRFS_NESTING_RIGHT);
Chris Mason44871b12009-03-13 10:04:31 -04002988
David Sterbae902baa2019-03-20 14:36:46 +01002989 free_space = btrfs_leaf_free_space(right);
Chris Mason44871b12009-03-13 10:04:31 -04002990 if (free_space < data_size)
2991 goto out_unlock;
2992
2993 /* cow and double check */
2994 ret = btrfs_cow_block(trans, root, right, upper,
Josef Bacikbf59a5a2020-08-20 11:46:05 -04002995 slot + 1, &right, BTRFS_NESTING_RIGHT_COW);
Chris Mason44871b12009-03-13 10:04:31 -04002996 if (ret)
2997 goto out_unlock;
2998
David Sterbae902baa2019-03-20 14:36:46 +01002999 free_space = btrfs_leaf_free_space(right);
Chris Mason44871b12009-03-13 10:04:31 -04003000 if (free_space < data_size)
3001 goto out_unlock;
3002
3003 left_nritems = btrfs_header_nritems(left);
3004 if (left_nritems == 0)
3005 goto out_unlock;
3006
Qu Wenruod16c7022020-08-19 14:35:50 +08003007 if (check_sibling_keys(left, right)) {
3008 ret = -EUCLEAN;
3009 btrfs_tree_unlock(right);
3010 free_extent_buffer(right);
3011 return ret;
3012 }
Filipe David Borba Manana2ef1fed2013-12-04 22:17:39 +00003013 if (path->slots[0] == left_nritems && !empty) {
3014 /* Key greater than all keys in the leaf, right neighbor has
3015 * enough room for it and we're not emptying our leaf to delete
3016 * it, therefore use right neighbor to insert the new item and
Andrea Gelmini52042d82018-11-28 12:05:13 +01003017 * no need to touch/dirty our left leaf. */
Filipe David Borba Manana2ef1fed2013-12-04 22:17:39 +00003018 btrfs_tree_unlock(left);
3019 free_extent_buffer(left);
3020 path->nodes[0] = right;
3021 path->slots[0] = 0;
3022 path->slots[1]++;
3023 return 0;
3024 }
3025
David Sterbaf72f0012019-03-20 14:39:45 +01003026 return __push_leaf_right(path, min_data_size, empty,
Chris Mason99d8f832010-07-07 10:51:48 -04003027 right, free_space, left_nritems, min_slot);
Chris Mason44871b12009-03-13 10:04:31 -04003028out_unlock:
3029 btrfs_tree_unlock(right);
3030 free_extent_buffer(right);
3031 return 1;
3032}
3033
3034/*
Chris Mason74123bd2007-02-02 11:05:29 -05003035 * push some data in the path leaf to the left, trying to free up at
3036 * least data_size bytes. returns zero if the push worked, nonzero otherwise
Chris Mason99d8f832010-07-07 10:51:48 -04003037 *
3038 * max_slot can put a limit on how far into the leaf we'll push items. The
3039 * item at 'max_slot' won't be touched. Use (u32)-1 to make us do all the
3040 * items
Chris Mason74123bd2007-02-02 11:05:29 -05003041 */
David Sterba8087c192019-03-20 14:40:41 +01003042static noinline int __push_leaf_left(struct btrfs_path *path, int data_size,
Chris Mason44871b12009-03-13 10:04:31 -04003043 int empty, struct extent_buffer *left,
Chris Mason99d8f832010-07-07 10:51:48 -04003044 int free_space, u32 right_nritems,
3045 u32 max_slot)
Chris Masonbe0e5c02007-01-26 15:51:26 -05003046{
David Sterba8087c192019-03-20 14:40:41 +01003047 struct btrfs_fs_info *fs_info = left->fs_info;
Chris Mason5f39d392007-10-15 16:14:19 -04003048 struct btrfs_disk_key disk_key;
3049 struct extent_buffer *right = path->nodes[0];
Chris Masonbe0e5c02007-01-26 15:51:26 -05003050 int i;
Chris Masonbe0e5c02007-01-26 15:51:26 -05003051 int push_space = 0;
3052 int push_items = 0;
Chris Mason7518a232007-03-12 12:01:18 -04003053 u32 old_left_nritems;
Chris Mason34a38212007-11-07 13:31:03 -05003054 u32 nr;
Chris Masonaa5d6be2007-02-28 16:35:06 -05003055 int ret = 0;
Chris Masondb945352007-10-15 16:15:53 -04003056 u32 this_item_size;
3057 u32 old_left_item_size;
Chris Masoncfed81a2012-03-03 07:40:03 -05003058 struct btrfs_map_token token;
3059
Chris Mason34a38212007-11-07 13:31:03 -05003060 if (empty)
Chris Mason99d8f832010-07-07 10:51:48 -04003061 nr = min(right_nritems, max_slot);
Chris Mason34a38212007-11-07 13:31:03 -05003062 else
Chris Mason99d8f832010-07-07 10:51:48 -04003063 nr = min(right_nritems - 1, max_slot);
Chris Mason34a38212007-11-07 13:31:03 -05003064
3065 for (i = 0; i < nr; i++) {
Zheng Yan31840ae2008-09-23 13:14:14 -04003066 if (!empty && push_items > 0) {
3067 if (path->slots[0] < i)
3068 break;
3069 if (path->slots[0] == i) {
David Sterbae902baa2019-03-20 14:36:46 +01003070 int space = btrfs_leaf_free_space(right);
3071
Zheng Yan31840ae2008-09-23 13:14:14 -04003072 if (space + push_space * 2 > free_space)
3073 break;
3074 }
3075 }
3076
Chris Masonbe0e5c02007-01-26 15:51:26 -05003077 if (path->slots[0] == i)
Yan Zheng87b29b22008-12-17 10:21:48 -05003078 push_space += data_size;
Chris Masondb945352007-10-15 16:15:53 -04003079
Josef Bacik3212fa12021-10-21 14:58:35 -04003080 this_item_size = btrfs_item_size(right, i);
Josef Bacik74794202021-10-21 14:58:34 -04003081 if (this_item_size + sizeof(struct btrfs_item) + push_space >
3082 free_space)
Chris Masonbe0e5c02007-01-26 15:51:26 -05003083 break;
Chris Masondb945352007-10-15 16:15:53 -04003084
Chris Masonbe0e5c02007-01-26 15:51:26 -05003085 push_items++;
Josef Bacik74794202021-10-21 14:58:34 -04003086 push_space += this_item_size + sizeof(struct btrfs_item);
Chris Masonbe0e5c02007-01-26 15:51:26 -05003087 }
Chris Masondb945352007-10-15 16:15:53 -04003088
Chris Masonbe0e5c02007-01-26 15:51:26 -05003089 if (push_items == 0) {
Chris Mason925baed2008-06-25 16:01:30 -04003090 ret = 1;
3091 goto out;
Chris Masonbe0e5c02007-01-26 15:51:26 -05003092 }
Dulshani Gunawardhanafae7f212013-10-31 10:30:08 +05303093 WARN_ON(!empty && push_items == btrfs_header_nritems(right));
Chris Mason5f39d392007-10-15 16:14:19 -04003094
Chris Masonbe0e5c02007-01-26 15:51:26 -05003095 /* push data from right to left */
Chris Mason5f39d392007-10-15 16:14:19 -04003096 copy_extent_buffer(left, right,
3097 btrfs_item_nr_offset(btrfs_header_nritems(left)),
3098 btrfs_item_nr_offset(0),
3099 push_items * sizeof(struct btrfs_item));
3100
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003101 push_space = BTRFS_LEAF_DATA_SIZE(fs_info) -
Josef Bacik3212fa12021-10-21 14:58:35 -04003102 btrfs_item_offset(right, push_items - 1);
Chris Mason5f39d392007-10-15 16:14:19 -04003103
Nikolay Borisov3d9ec8c2017-05-29 09:43:43 +03003104 copy_extent_buffer(left, right, BTRFS_LEAF_DATA_OFFSET +
David Sterba8f881e82019-03-20 11:33:10 +01003105 leaf_data_end(left) - push_space,
Nikolay Borisov3d9ec8c2017-05-29 09:43:43 +03003106 BTRFS_LEAF_DATA_OFFSET +
Josef Bacik3212fa12021-10-21 14:58:35 -04003107 btrfs_item_offset(right, push_items - 1),
Chris Masond6025572007-03-30 14:27:56 -04003108 push_space);
Chris Mason5f39d392007-10-15 16:14:19 -04003109 old_left_nritems = btrfs_header_nritems(left);
Yan Zheng87b29b22008-12-17 10:21:48 -05003110 BUG_ON(old_left_nritems <= 0);
Chris Masoneb60cea2007-02-02 09:18:22 -05003111
David Sterbac82f8232019-08-09 17:48:21 +02003112 btrfs_init_map_token(&token, left);
Josef Bacik3212fa12021-10-21 14:58:35 -04003113 old_left_item_size = btrfs_item_offset(left, old_left_nritems - 1);
Chris Mason0783fcf2007-03-12 20:12:07 -04003114 for (i = old_left_nritems; i < old_left_nritems + push_items; i++) {
Chris Mason5f39d392007-10-15 16:14:19 -04003115 u32 ioff;
Chris Masondb945352007-10-15 16:15:53 -04003116
Josef Bacik3212fa12021-10-21 14:58:35 -04003117 ioff = btrfs_token_item_offset(&token, i);
3118 btrfs_set_token_item_offset(&token, i,
David Sterbacc4c13d2020-04-29 02:15:56 +02003119 ioff - (BTRFS_LEAF_DATA_SIZE(fs_info) - old_left_item_size));
Chris Masonbe0e5c02007-01-26 15:51:26 -05003120 }
Chris Mason5f39d392007-10-15 16:14:19 -04003121 btrfs_set_header_nritems(left, old_left_nritems + push_items);
Chris Masonbe0e5c02007-01-26 15:51:26 -05003122
3123 /* fixup right node */
Julia Lawall31b1a2b2012-11-03 10:58:34 +00003124 if (push_items > right_nritems)
3125 WARN(1, KERN_CRIT "push items %d nr %u\n", push_items,
Chris Masond3977122009-01-05 21:25:51 -05003126 right_nritems);
Chris Mason5f39d392007-10-15 16:14:19 -04003127
Chris Mason34a38212007-11-07 13:31:03 -05003128 if (push_items < right_nritems) {
Josef Bacik3212fa12021-10-21 14:58:35 -04003129 push_space = btrfs_item_offset(right, push_items - 1) -
David Sterba8f881e82019-03-20 11:33:10 +01003130 leaf_data_end(right);
Nikolay Borisov3d9ec8c2017-05-29 09:43:43 +03003131 memmove_extent_buffer(right, BTRFS_LEAF_DATA_OFFSET +
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003132 BTRFS_LEAF_DATA_SIZE(fs_info) - push_space,
Nikolay Borisov3d9ec8c2017-05-29 09:43:43 +03003133 BTRFS_LEAF_DATA_OFFSET +
David Sterba8f881e82019-03-20 11:33:10 +01003134 leaf_data_end(right), push_space);
Chris Mason34a38212007-11-07 13:31:03 -05003135
3136 memmove_extent_buffer(right, btrfs_item_nr_offset(0),
Chris Mason5f39d392007-10-15 16:14:19 -04003137 btrfs_item_nr_offset(push_items),
3138 (btrfs_header_nritems(right) - push_items) *
3139 sizeof(struct btrfs_item));
Chris Mason34a38212007-11-07 13:31:03 -05003140 }
David Sterbac82f8232019-08-09 17:48:21 +02003141
3142 btrfs_init_map_token(&token, right);
Yaneef1c492007-11-26 10:58:13 -05003143 right_nritems -= push_items;
3144 btrfs_set_header_nritems(right, right_nritems);
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003145 push_space = BTRFS_LEAF_DATA_SIZE(fs_info);
Chris Mason5f39d392007-10-15 16:14:19 -04003146 for (i = 0; i < right_nritems; i++) {
Josef Bacik3212fa12021-10-21 14:58:35 -04003147 push_space = push_space - btrfs_token_item_size(&token, i);
3148 btrfs_set_token_item_offset(&token, i, push_space);
Chris Masondb945352007-10-15 16:15:53 -04003149 }
Chris Masoneb60cea2007-02-02 09:18:22 -05003150
Chris Mason5f39d392007-10-15 16:14:19 -04003151 btrfs_mark_buffer_dirty(left);
Chris Mason34a38212007-11-07 13:31:03 -05003152 if (right_nritems)
3153 btrfs_mark_buffer_dirty(right);
Yan, Zhengf0486c62010-05-16 10:46:25 -04003154 else
David Sterba6a884d7d2019-03-20 14:30:02 +01003155 btrfs_clean_tree_block(right);
Chris Mason098f59c2007-05-11 11:33:21 -04003156
Chris Mason5f39d392007-10-15 16:14:19 -04003157 btrfs_item_key(right, &disk_key, 0);
Nikolay Borisovb167fa92018-06-20 15:48:47 +03003158 fixup_low_keys(path, &disk_key, 1);
Chris Masonbe0e5c02007-01-26 15:51:26 -05003159
3160 /* then fixup the leaf pointer in the path */
3161 if (path->slots[0] < push_items) {
3162 path->slots[0] += old_left_nritems;
Chris Mason925baed2008-06-25 16:01:30 -04003163 btrfs_tree_unlock(path->nodes[0]);
Chris Mason5f39d392007-10-15 16:14:19 -04003164 free_extent_buffer(path->nodes[0]);
3165 path->nodes[0] = left;
Chris Masonbe0e5c02007-01-26 15:51:26 -05003166 path->slots[1] -= 1;
3167 } else {
Chris Mason925baed2008-06-25 16:01:30 -04003168 btrfs_tree_unlock(left);
Chris Mason5f39d392007-10-15 16:14:19 -04003169 free_extent_buffer(left);
Chris Masonbe0e5c02007-01-26 15:51:26 -05003170 path->slots[0] -= push_items;
3171 }
Chris Masoneb60cea2007-02-02 09:18:22 -05003172 BUG_ON(path->slots[0] < 0);
Chris Masonaa5d6be2007-02-28 16:35:06 -05003173 return ret;
Chris Mason925baed2008-06-25 16:01:30 -04003174out:
3175 btrfs_tree_unlock(left);
3176 free_extent_buffer(left);
3177 return ret;
Chris Masonbe0e5c02007-01-26 15:51:26 -05003178}
3179
Chris Mason74123bd2007-02-02 11:05:29 -05003180/*
Chris Mason44871b12009-03-13 10:04:31 -04003181 * push some data in the path leaf to the left, trying to free up at
3182 * least data_size bytes. returns zero if the push worked, nonzero otherwise
Chris Mason99d8f832010-07-07 10:51:48 -04003183 *
3184 * max_slot can put a limit on how far into the leaf we'll push items. The
3185 * item at 'max_slot' won't be touched. Use (u32)-1 to make us push all the
3186 * items
Chris Mason44871b12009-03-13 10:04:31 -04003187 */
3188static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
Chris Mason99d8f832010-07-07 10:51:48 -04003189 *root, struct btrfs_path *path, int min_data_size,
3190 int data_size, int empty, u32 max_slot)
Chris Mason44871b12009-03-13 10:04:31 -04003191{
3192 struct extent_buffer *right = path->nodes[0];
3193 struct extent_buffer *left;
3194 int slot;
3195 int free_space;
3196 u32 right_nritems;
3197 int ret = 0;
3198
3199 slot = path->slots[1];
3200 if (slot == 0)
3201 return 1;
3202 if (!path->nodes[1])
3203 return 1;
3204
3205 right_nritems = btrfs_header_nritems(right);
3206 if (right_nritems == 0)
3207 return 1;
3208
Filipe Manana49d0c642021-09-22 10:36:45 +01003209 btrfs_assert_tree_write_locked(path->nodes[1]);
Chris Mason44871b12009-03-13 10:04:31 -04003210
David Sterba4b231ae2019-08-21 19:16:27 +02003211 left = btrfs_read_node_slot(path->nodes[1], slot - 1);
Liu Bofb770ae2016-07-05 12:10:14 -07003212 /*
3213 * slot - 1 is not valid or we fail to read the left node,
3214 * no big deal, just return.
3215 */
3216 if (IS_ERR(left))
Tsutomu Itoh91ca3382011-01-05 02:32:22 +00003217 return 1;
3218
Josef Bacikbf774672020-08-20 11:46:04 -04003219 __btrfs_tree_lock(left, BTRFS_NESTING_LEFT);
Chris Mason44871b12009-03-13 10:04:31 -04003220
David Sterbae902baa2019-03-20 14:36:46 +01003221 free_space = btrfs_leaf_free_space(left);
Chris Mason44871b12009-03-13 10:04:31 -04003222 if (free_space < data_size) {
3223 ret = 1;
3224 goto out;
3225 }
3226
3227 /* cow and double check */
3228 ret = btrfs_cow_block(trans, root, left,
Josef Bacik9631e4c2020-08-20 11:46:03 -04003229 path->nodes[1], slot - 1, &left,
Josef Bacikbf59a5a2020-08-20 11:46:05 -04003230 BTRFS_NESTING_LEFT_COW);
Chris Mason44871b12009-03-13 10:04:31 -04003231 if (ret) {
3232 /* we hit -ENOSPC, but it isn't fatal here */
Jeff Mahoney79787ea2012-03-12 16:03:00 +01003233 if (ret == -ENOSPC)
3234 ret = 1;
Chris Mason44871b12009-03-13 10:04:31 -04003235 goto out;
3236 }
3237
David Sterbae902baa2019-03-20 14:36:46 +01003238 free_space = btrfs_leaf_free_space(left);
Chris Mason44871b12009-03-13 10:04:31 -04003239 if (free_space < data_size) {
3240 ret = 1;
3241 goto out;
3242 }
3243
Qu Wenruod16c7022020-08-19 14:35:50 +08003244 if (check_sibling_keys(left, right)) {
3245 ret = -EUCLEAN;
3246 goto out;
3247 }
David Sterba8087c192019-03-20 14:40:41 +01003248 return __push_leaf_left(path, min_data_size,
Chris Mason99d8f832010-07-07 10:51:48 -04003249 empty, left, free_space, right_nritems,
3250 max_slot);
Chris Mason44871b12009-03-13 10:04:31 -04003251out:
3252 btrfs_tree_unlock(left);
3253 free_extent_buffer(left);
3254 return ret;
3255}
3256
3257/*
Chris Mason74123bd2007-02-02 11:05:29 -05003258 * split the path's leaf in two, making sure there is at least data_size
3259 * available for the resulting leaf level of the path.
3260 */
Jeff Mahoney143bede2012-03-01 14:56:26 +01003261static noinline void copy_for_split(struct btrfs_trans_handle *trans,
Jeff Mahoney143bede2012-03-01 14:56:26 +01003262 struct btrfs_path *path,
3263 struct extent_buffer *l,
3264 struct extent_buffer *right,
3265 int slot, int mid, int nritems)
Chris Masonbe0e5c02007-01-26 15:51:26 -05003266{
David Sterba94f94ad2019-03-20 14:42:33 +01003267 struct btrfs_fs_info *fs_info = trans->fs_info;
Chris Masonbe0e5c02007-01-26 15:51:26 -05003268 int data_copy_size;
3269 int rt_data_off;
3270 int i;
Chris Masond4dbff92007-04-04 14:08:15 -04003271 struct btrfs_disk_key disk_key;
Chris Masoncfed81a2012-03-03 07:40:03 -05003272 struct btrfs_map_token token;
3273
Chris Mason5f39d392007-10-15 16:14:19 -04003274 nritems = nritems - mid;
3275 btrfs_set_header_nritems(right, nritems);
Josef Bacikdc2e7242021-10-21 14:58:37 -04003276 data_copy_size = btrfs_item_data_end(l, mid) - leaf_data_end(l);
Chris Mason5f39d392007-10-15 16:14:19 -04003277
3278 copy_extent_buffer(right, l, btrfs_item_nr_offset(0),
3279 btrfs_item_nr_offset(mid),
3280 nritems * sizeof(struct btrfs_item));
3281
3282 copy_extent_buffer(right, l,
Nikolay Borisov3d9ec8c2017-05-29 09:43:43 +03003283 BTRFS_LEAF_DATA_OFFSET + BTRFS_LEAF_DATA_SIZE(fs_info) -
3284 data_copy_size, BTRFS_LEAF_DATA_OFFSET +
David Sterba8f881e82019-03-20 11:33:10 +01003285 leaf_data_end(l), data_copy_size);
Chris Mason74123bd2007-02-02 11:05:29 -05003286
Josef Bacikdc2e7242021-10-21 14:58:37 -04003287 rt_data_off = BTRFS_LEAF_DATA_SIZE(fs_info) - btrfs_item_data_end(l, mid);
Chris Mason5f39d392007-10-15 16:14:19 -04003288
David Sterbac82f8232019-08-09 17:48:21 +02003289 btrfs_init_map_token(&token, right);
Chris Mason5f39d392007-10-15 16:14:19 -04003290 for (i = 0; i < nritems; i++) {
Chris Masondb945352007-10-15 16:15:53 -04003291 u32 ioff;
3292
Josef Bacik3212fa12021-10-21 14:58:35 -04003293 ioff = btrfs_token_item_offset(&token, i);
3294 btrfs_set_token_item_offset(&token, i, ioff + rt_data_off);
Chris Mason0783fcf2007-03-12 20:12:07 -04003295 }
Chris Mason74123bd2007-02-02 11:05:29 -05003296
Chris Mason5f39d392007-10-15 16:14:19 -04003297 btrfs_set_header_nritems(l, mid);
Chris Mason5f39d392007-10-15 16:14:19 -04003298 btrfs_item_key(right, &disk_key, 0);
David Sterba6ad3cf62019-03-20 14:32:45 +01003299 insert_ptr(trans, path, &disk_key, right->start, path->slots[1] + 1, 1);
Chris Mason5f39d392007-10-15 16:14:19 -04003300
3301 btrfs_mark_buffer_dirty(right);
3302 btrfs_mark_buffer_dirty(l);
Chris Masoneb60cea2007-02-02 09:18:22 -05003303 BUG_ON(path->slots[0] != slot);
Chris Mason5f39d392007-10-15 16:14:19 -04003304
Chris Masonbe0e5c02007-01-26 15:51:26 -05003305 if (mid <= slot) {
Chris Mason925baed2008-06-25 16:01:30 -04003306 btrfs_tree_unlock(path->nodes[0]);
Chris Mason5f39d392007-10-15 16:14:19 -04003307 free_extent_buffer(path->nodes[0]);
3308 path->nodes[0] = right;
Chris Masonbe0e5c02007-01-26 15:51:26 -05003309 path->slots[0] -= mid;
3310 path->slots[1] += 1;
Chris Mason925baed2008-06-25 16:01:30 -04003311 } else {
3312 btrfs_tree_unlock(right);
Chris Mason5f39d392007-10-15 16:14:19 -04003313 free_extent_buffer(right);
Chris Mason925baed2008-06-25 16:01:30 -04003314 }
Chris Mason5f39d392007-10-15 16:14:19 -04003315
Chris Masoneb60cea2007-02-02 09:18:22 -05003316 BUG_ON(path->slots[0] < 0);
Chris Mason44871b12009-03-13 10:04:31 -04003317}
3318
3319/*
Chris Mason99d8f832010-07-07 10:51:48 -04003320 * double splits happen when we need to insert a big item in the middle
3321 * of a leaf. A double split can leave us with 3 mostly empty leaves:
3322 * leaf: [ slots 0 - N] [ our target ] [ N + 1 - total in leaf ]
3323 * A B C
3324 *
3325 * We avoid this by trying to push the items on either side of our target
3326 * into the adjacent leaves. If all goes well we can avoid the double split
3327 * completely.
3328 */
3329static noinline int push_for_double_split(struct btrfs_trans_handle *trans,
3330 struct btrfs_root *root,
3331 struct btrfs_path *path,
3332 int data_size)
3333{
3334 int ret;
3335 int progress = 0;
3336 int slot;
3337 u32 nritems;
Filipe David Borba Manana5a4267c2013-11-25 03:20:46 +00003338 int space_needed = data_size;
Chris Mason99d8f832010-07-07 10:51:48 -04003339
3340 slot = path->slots[0];
Filipe David Borba Manana5a4267c2013-11-25 03:20:46 +00003341 if (slot < btrfs_header_nritems(path->nodes[0]))
David Sterbae902baa2019-03-20 14:36:46 +01003342 space_needed -= btrfs_leaf_free_space(path->nodes[0]);
Chris Mason99d8f832010-07-07 10:51:48 -04003343
3344 /*
3345 * try to push all the items after our slot into the
3346 * right leaf
3347 */
Filipe David Borba Manana5a4267c2013-11-25 03:20:46 +00003348 ret = push_leaf_right(trans, root, path, 1, space_needed, 0, slot);
Chris Mason99d8f832010-07-07 10:51:48 -04003349 if (ret < 0)
3350 return ret;
3351
3352 if (ret == 0)
3353 progress++;
3354
3355 nritems = btrfs_header_nritems(path->nodes[0]);
3356 /*
3357 * our goal is to get our slot at the start or end of a leaf. If
3358 * we've done so we're done
3359 */
3360 if (path->slots[0] == 0 || path->slots[0] == nritems)
3361 return 0;
3362
David Sterbae902baa2019-03-20 14:36:46 +01003363 if (btrfs_leaf_free_space(path->nodes[0]) >= data_size)
Chris Mason99d8f832010-07-07 10:51:48 -04003364 return 0;
3365
3366 /* try to push all the items before our slot into the next leaf */
3367 slot = path->slots[0];
Filipe Manana263d3992017-02-17 18:43:57 +00003368 space_needed = data_size;
3369 if (slot > 0)
David Sterbae902baa2019-03-20 14:36:46 +01003370 space_needed -= btrfs_leaf_free_space(path->nodes[0]);
Filipe David Borba Manana5a4267c2013-11-25 03:20:46 +00003371 ret = push_leaf_left(trans, root, path, 1, space_needed, 0, slot);
Chris Mason99d8f832010-07-07 10:51:48 -04003372 if (ret < 0)
3373 return ret;
3374
3375 if (ret == 0)
3376 progress++;
3377
3378 if (progress)
3379 return 0;
3380 return 1;
3381}
3382
3383/*
Chris Mason44871b12009-03-13 10:04:31 -04003384 * split the path's leaf in two, making sure there is at least data_size
3385 * available for the resulting leaf level of the path.
3386 *
3387 * returns 0 if all went well and < 0 on failure.
3388 */
3389static noinline int split_leaf(struct btrfs_trans_handle *trans,
3390 struct btrfs_root *root,
Omar Sandoval310712b2017-01-17 23:24:37 -08003391 const struct btrfs_key *ins_key,
Chris Mason44871b12009-03-13 10:04:31 -04003392 struct btrfs_path *path, int data_size,
3393 int extend)
3394{
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003395 struct btrfs_disk_key disk_key;
Chris Mason44871b12009-03-13 10:04:31 -04003396 struct extent_buffer *l;
3397 u32 nritems;
3398 int mid;
3399 int slot;
3400 struct extent_buffer *right;
Daniel Dresslerb7a03652014-11-12 13:43:09 +09003401 struct btrfs_fs_info *fs_info = root->fs_info;
Chris Mason44871b12009-03-13 10:04:31 -04003402 int ret = 0;
3403 int wret;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003404 int split;
Chris Mason44871b12009-03-13 10:04:31 -04003405 int num_doubles = 0;
Chris Mason99d8f832010-07-07 10:51:48 -04003406 int tried_avoid_double = 0;
Chris Mason44871b12009-03-13 10:04:31 -04003407
Yan, Zhenga5719522009-09-24 09:17:31 -04003408 l = path->nodes[0];
3409 slot = path->slots[0];
Josef Bacik3212fa12021-10-21 14:58:35 -04003410 if (extend && data_size + btrfs_item_size(l, slot) +
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003411 sizeof(struct btrfs_item) > BTRFS_LEAF_DATA_SIZE(fs_info))
Yan, Zhenga5719522009-09-24 09:17:31 -04003412 return -EOVERFLOW;
3413
Chris Mason44871b12009-03-13 10:04:31 -04003414 /* first try to make some room by pushing left and right */
Liu Bo33157e02013-05-22 12:07:06 +00003415 if (data_size && path->nodes[1]) {
Filipe David Borba Manana5a4267c2013-11-25 03:20:46 +00003416 int space_needed = data_size;
3417
3418 if (slot < btrfs_header_nritems(l))
David Sterbae902baa2019-03-20 14:36:46 +01003419 space_needed -= btrfs_leaf_free_space(l);
Filipe David Borba Manana5a4267c2013-11-25 03:20:46 +00003420
3421 wret = push_leaf_right(trans, root, path, space_needed,
3422 space_needed, 0, 0);
Chris Mason44871b12009-03-13 10:04:31 -04003423 if (wret < 0)
3424 return wret;
3425 if (wret) {
Filipe Manana263d3992017-02-17 18:43:57 +00003426 space_needed = data_size;
3427 if (slot > 0)
David Sterbae902baa2019-03-20 14:36:46 +01003428 space_needed -= btrfs_leaf_free_space(l);
Filipe David Borba Manana5a4267c2013-11-25 03:20:46 +00003429 wret = push_leaf_left(trans, root, path, space_needed,
3430 space_needed, 0, (u32)-1);
Chris Mason44871b12009-03-13 10:04:31 -04003431 if (wret < 0)
3432 return wret;
3433 }
3434 l = path->nodes[0];
3435
3436 /* did the pushes work? */
David Sterbae902baa2019-03-20 14:36:46 +01003437 if (btrfs_leaf_free_space(l) >= data_size)
Chris Mason44871b12009-03-13 10:04:31 -04003438 return 0;
3439 }
3440
3441 if (!path->nodes[1]) {
Liu Bofdd99c72013-05-22 12:06:51 +00003442 ret = insert_new_root(trans, root, path, 1);
Chris Mason44871b12009-03-13 10:04:31 -04003443 if (ret)
3444 return ret;
3445 }
3446again:
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003447 split = 1;
Chris Mason44871b12009-03-13 10:04:31 -04003448 l = path->nodes[0];
3449 slot = path->slots[0];
3450 nritems = btrfs_header_nritems(l);
3451 mid = (nritems + 1) / 2;
3452
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003453 if (mid <= slot) {
3454 if (nritems == 1 ||
3455 leaf_space_used(l, mid, nritems - mid) + data_size >
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003456 BTRFS_LEAF_DATA_SIZE(fs_info)) {
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003457 if (slot >= nritems) {
3458 split = 0;
3459 } else {
3460 mid = slot;
3461 if (mid != nritems &&
3462 leaf_space_used(l, mid, nritems - mid) +
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003463 data_size > BTRFS_LEAF_DATA_SIZE(fs_info)) {
Chris Mason99d8f832010-07-07 10:51:48 -04003464 if (data_size && !tried_avoid_double)
3465 goto push_for_double;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003466 split = 2;
3467 }
3468 }
3469 }
3470 } else {
3471 if (leaf_space_used(l, 0, mid) + data_size >
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003472 BTRFS_LEAF_DATA_SIZE(fs_info)) {
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003473 if (!extend && data_size && slot == 0) {
3474 split = 0;
3475 } else if ((extend || !data_size) && slot == 0) {
3476 mid = 1;
3477 } else {
3478 mid = slot;
3479 if (mid != nritems &&
3480 leaf_space_used(l, mid, nritems - mid) +
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003481 data_size > BTRFS_LEAF_DATA_SIZE(fs_info)) {
Chris Mason99d8f832010-07-07 10:51:48 -04003482 if (data_size && !tried_avoid_double)
3483 goto push_for_double;
Dulshani Gunawardhana67871252013-10-31 10:33:04 +05303484 split = 2;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003485 }
3486 }
3487 }
3488 }
3489
3490 if (split == 0)
3491 btrfs_cpu_key_to_disk(&disk_key, ins_key);
3492 else
3493 btrfs_item_key(l, &disk_key, mid);
3494
Josef Bacikca9d4732020-08-20 11:46:08 -04003495 /*
3496 * We have to about BTRFS_NESTING_NEW_ROOT here if we've done a double
3497 * split, because we're only allowed to have MAX_LOCKDEP_SUBCLASSES
3498 * subclasses, which is 8 at the time of this patch, and we've maxed it
3499 * out. In the future we could add a
3500 * BTRFS_NESTING_SPLIT_THE_SPLITTENING if we need to, but for now just
3501 * use BTRFS_NESTING_NEW_ROOT.
3502 */
Filipe Manana79bd3712021-06-29 14:43:06 +01003503 right = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid,
3504 &disk_key, 0, l->start, 0,
3505 num_doubles ? BTRFS_NESTING_NEW_ROOT :
3506 BTRFS_NESTING_SPLIT);
Yan, Zhengf0486c62010-05-16 10:46:25 -04003507 if (IS_ERR(right))
Chris Mason44871b12009-03-13 10:04:31 -04003508 return PTR_ERR(right);
Yan, Zhengf0486c62010-05-16 10:46:25 -04003509
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003510 root_add_used(root, fs_info->nodesize);
Chris Mason44871b12009-03-13 10:04:31 -04003511
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003512 if (split == 0) {
3513 if (mid <= slot) {
3514 btrfs_set_header_nritems(right, 0);
David Sterba6ad3cf62019-03-20 14:32:45 +01003515 insert_ptr(trans, path, &disk_key,
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04003516 right->start, path->slots[1] + 1, 1);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003517 btrfs_tree_unlock(path->nodes[0]);
3518 free_extent_buffer(path->nodes[0]);
3519 path->nodes[0] = right;
3520 path->slots[0] = 0;
3521 path->slots[1] += 1;
3522 } else {
3523 btrfs_set_header_nritems(right, 0);
David Sterba6ad3cf62019-03-20 14:32:45 +01003524 insert_ptr(trans, path, &disk_key,
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04003525 right->start, path->slots[1], 1);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003526 btrfs_tree_unlock(path->nodes[0]);
3527 free_extent_buffer(path->nodes[0]);
3528 path->nodes[0] = right;
3529 path->slots[0] = 0;
Jeff Mahoney143bede2012-03-01 14:56:26 +01003530 if (path->slots[1] == 0)
Nikolay Borisovb167fa92018-06-20 15:48:47 +03003531 fixup_low_keys(path, &disk_key, 1);
Chris Mason44871b12009-03-13 10:04:31 -04003532 }
Liu Bo196e0242016-09-07 14:48:28 -07003533 /*
3534 * We create a new leaf 'right' for the required ins_len and
3535 * we'll do btrfs_mark_buffer_dirty() on this leaf after copying
3536 * the content of ins_len to 'right'.
3537 */
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003538 return ret;
Chris Mason44871b12009-03-13 10:04:31 -04003539 }
3540
David Sterba94f94ad2019-03-20 14:42:33 +01003541 copy_for_split(trans, path, l, right, slot, mid, nritems);
Chris Mason44871b12009-03-13 10:04:31 -04003542
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003543 if (split == 2) {
Chris Masoncc0c5532007-10-25 15:42:57 -04003544 BUG_ON(num_doubles != 0);
3545 num_doubles++;
3546 goto again;
Chris Mason3326d1b2007-10-15 16:18:25 -04003547 }
Chris Mason44871b12009-03-13 10:04:31 -04003548
Jeff Mahoney143bede2012-03-01 14:56:26 +01003549 return 0;
Chris Mason99d8f832010-07-07 10:51:48 -04003550
3551push_for_double:
3552 push_for_double_split(trans, root, path, data_size);
3553 tried_avoid_double = 1;
David Sterbae902baa2019-03-20 14:36:46 +01003554 if (btrfs_leaf_free_space(path->nodes[0]) >= data_size)
Chris Mason99d8f832010-07-07 10:51:48 -04003555 return 0;
3556 goto again;
Chris Masonbe0e5c02007-01-26 15:51:26 -05003557}
3558
Yan, Zhengad48fd752009-11-12 09:33:58 +00003559static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans,
3560 struct btrfs_root *root,
3561 struct btrfs_path *path, int ins_len)
Chris Mason459931e2008-12-10 09:10:46 -05003562{
Yan, Zhengad48fd752009-11-12 09:33:58 +00003563 struct btrfs_key key;
Chris Mason459931e2008-12-10 09:10:46 -05003564 struct extent_buffer *leaf;
Yan, Zhengad48fd752009-11-12 09:33:58 +00003565 struct btrfs_file_extent_item *fi;
3566 u64 extent_len = 0;
3567 u32 item_size;
3568 int ret;
Chris Mason459931e2008-12-10 09:10:46 -05003569
3570 leaf = path->nodes[0];
Yan, Zhengad48fd752009-11-12 09:33:58 +00003571 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
3572
3573 BUG_ON(key.type != BTRFS_EXTENT_DATA_KEY &&
3574 key.type != BTRFS_EXTENT_CSUM_KEY);
3575
David Sterbae902baa2019-03-20 14:36:46 +01003576 if (btrfs_leaf_free_space(leaf) >= ins_len)
Yan, Zhengad48fd752009-11-12 09:33:58 +00003577 return 0;
Chris Mason459931e2008-12-10 09:10:46 -05003578
Josef Bacik3212fa12021-10-21 14:58:35 -04003579 item_size = btrfs_item_size(leaf, path->slots[0]);
Yan, Zhengad48fd752009-11-12 09:33:58 +00003580 if (key.type == BTRFS_EXTENT_DATA_KEY) {
3581 fi = btrfs_item_ptr(leaf, path->slots[0],
3582 struct btrfs_file_extent_item);
3583 extent_len = btrfs_file_extent_num_bytes(leaf, fi);
3584 }
David Sterbab3b4aa72011-04-21 01:20:15 +02003585 btrfs_release_path(path);
Chris Mason459931e2008-12-10 09:10:46 -05003586
Chris Mason459931e2008-12-10 09:10:46 -05003587 path->keep_locks = 1;
Yan, Zhengad48fd752009-11-12 09:33:58 +00003588 path->search_for_split = 1;
3589 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
Chris Mason459931e2008-12-10 09:10:46 -05003590 path->search_for_split = 0;
Filipe Mananaa8df6fe2015-01-20 12:40:53 +00003591 if (ret > 0)
3592 ret = -EAGAIN;
Yan, Zhengad48fd752009-11-12 09:33:58 +00003593 if (ret < 0)
3594 goto err;
Chris Mason459931e2008-12-10 09:10:46 -05003595
Yan, Zhengad48fd752009-11-12 09:33:58 +00003596 ret = -EAGAIN;
3597 leaf = path->nodes[0];
Filipe Mananaa8df6fe2015-01-20 12:40:53 +00003598 /* if our item isn't there, return now */
Josef Bacik3212fa12021-10-21 14:58:35 -04003599 if (item_size != btrfs_item_size(leaf, path->slots[0]))
Yan, Zhengad48fd752009-11-12 09:33:58 +00003600 goto err;
3601
Chris Mason109f6ae2010-04-02 09:20:18 -04003602 /* the leaf has changed, it now has room. return now */
David Sterbae902baa2019-03-20 14:36:46 +01003603 if (btrfs_leaf_free_space(path->nodes[0]) >= ins_len)
Chris Mason109f6ae2010-04-02 09:20:18 -04003604 goto err;
3605
Yan, Zhengad48fd752009-11-12 09:33:58 +00003606 if (key.type == BTRFS_EXTENT_DATA_KEY) {
3607 fi = btrfs_item_ptr(leaf, path->slots[0],
3608 struct btrfs_file_extent_item);
3609 if (extent_len != btrfs_file_extent_num_bytes(leaf, fi))
3610 goto err;
Chris Mason459931e2008-12-10 09:10:46 -05003611 }
3612
Yan, Zhengad48fd752009-11-12 09:33:58 +00003613 ret = split_leaf(trans, root, &key, path, ins_len, 1);
Yan, Zhengf0486c62010-05-16 10:46:25 -04003614 if (ret)
3615 goto err;
Chris Mason459931e2008-12-10 09:10:46 -05003616
Yan, Zhengad48fd752009-11-12 09:33:58 +00003617 path->keep_locks = 0;
Chris Masonb9473432009-03-13 11:00:37 -04003618 btrfs_unlock_up_safe(path, 1);
Yan, Zhengad48fd752009-11-12 09:33:58 +00003619 return 0;
3620err:
3621 path->keep_locks = 0;
3622 return ret;
3623}
3624
David Sterba25263cd2019-03-20 14:44:57 +01003625static noinline int split_item(struct btrfs_path *path,
Omar Sandoval310712b2017-01-17 23:24:37 -08003626 const struct btrfs_key *new_key,
Yan, Zhengad48fd752009-11-12 09:33:58 +00003627 unsigned long split_offset)
3628{
3629 struct extent_buffer *leaf;
Josef Bacikc91666b2021-10-21 14:58:32 -04003630 int orig_slot, slot;
Yan, Zhengad48fd752009-11-12 09:33:58 +00003631 char *buf;
3632 u32 nritems;
3633 u32 item_size;
3634 u32 orig_offset;
3635 struct btrfs_disk_key disk_key;
3636
Chris Masonb9473432009-03-13 11:00:37 -04003637 leaf = path->nodes[0];
David Sterbae902baa2019-03-20 14:36:46 +01003638 BUG_ON(btrfs_leaf_free_space(leaf) < sizeof(struct btrfs_item));
Chris Masonb9473432009-03-13 11:00:37 -04003639
Josef Bacikc91666b2021-10-21 14:58:32 -04003640 orig_slot = path->slots[0];
Josef Bacik3212fa12021-10-21 14:58:35 -04003641 orig_offset = btrfs_item_offset(leaf, path->slots[0]);
3642 item_size = btrfs_item_size(leaf, path->slots[0]);
Chris Mason459931e2008-12-10 09:10:46 -05003643
Chris Mason459931e2008-12-10 09:10:46 -05003644 buf = kmalloc(item_size, GFP_NOFS);
Yan, Zhengad48fd752009-11-12 09:33:58 +00003645 if (!buf)
3646 return -ENOMEM;
3647
Chris Mason459931e2008-12-10 09:10:46 -05003648 read_extent_buffer(leaf, buf, btrfs_item_ptr_offset(leaf,
3649 path->slots[0]), item_size);
Yan, Zhengad48fd752009-11-12 09:33:58 +00003650
Chris Mason459931e2008-12-10 09:10:46 -05003651 slot = path->slots[0] + 1;
Chris Mason459931e2008-12-10 09:10:46 -05003652 nritems = btrfs_header_nritems(leaf);
Chris Mason459931e2008-12-10 09:10:46 -05003653 if (slot != nritems) {
3654 /* shift the items */
3655 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + 1),
Yan, Zhengad48fd752009-11-12 09:33:58 +00003656 btrfs_item_nr_offset(slot),
3657 (nritems - slot) * sizeof(struct btrfs_item));
Chris Mason459931e2008-12-10 09:10:46 -05003658 }
3659
3660 btrfs_cpu_key_to_disk(&disk_key, new_key);
3661 btrfs_set_item_key(leaf, &disk_key, slot);
3662
Josef Bacik3212fa12021-10-21 14:58:35 -04003663 btrfs_set_item_offset(leaf, slot, orig_offset);
3664 btrfs_set_item_size(leaf, slot, item_size - split_offset);
Chris Mason459931e2008-12-10 09:10:46 -05003665
Josef Bacik3212fa12021-10-21 14:58:35 -04003666 btrfs_set_item_offset(leaf, orig_slot,
Josef Bacikc91666b2021-10-21 14:58:32 -04003667 orig_offset + item_size - split_offset);
Josef Bacik3212fa12021-10-21 14:58:35 -04003668 btrfs_set_item_size(leaf, orig_slot, split_offset);
Chris Mason459931e2008-12-10 09:10:46 -05003669
3670 btrfs_set_header_nritems(leaf, nritems + 1);
3671
3672 /* write the data for the start of the original item */
3673 write_extent_buffer(leaf, buf,
3674 btrfs_item_ptr_offset(leaf, path->slots[0]),
3675 split_offset);
3676
3677 /* write the data for the new item */
3678 write_extent_buffer(leaf, buf + split_offset,
3679 btrfs_item_ptr_offset(leaf, slot),
3680 item_size - split_offset);
3681 btrfs_mark_buffer_dirty(leaf);
3682
David Sterbae902baa2019-03-20 14:36:46 +01003683 BUG_ON(btrfs_leaf_free_space(leaf) < 0);
Chris Mason459931e2008-12-10 09:10:46 -05003684 kfree(buf);
Yan, Zhengad48fd752009-11-12 09:33:58 +00003685 return 0;
3686}
3687
3688/*
3689 * This function splits a single item into two items,
3690 * giving 'new_key' to the new item and splitting the
3691 * old one at split_offset (from the start of the item).
3692 *
3693 * The path may be released by this operation. After
3694 * the split, the path is pointing to the old item. The
3695 * new item is going to be in the same node as the old one.
3696 *
3697 * Note, the item being split must be smaller enough to live alone on
3698 * a tree block with room for one extra struct btrfs_item
3699 *
3700 * This allows us to split the item in place, keeping a lock on the
3701 * leaf the entire time.
3702 */
3703int btrfs_split_item(struct btrfs_trans_handle *trans,
3704 struct btrfs_root *root,
3705 struct btrfs_path *path,
Omar Sandoval310712b2017-01-17 23:24:37 -08003706 const struct btrfs_key *new_key,
Yan, Zhengad48fd752009-11-12 09:33:58 +00003707 unsigned long split_offset)
3708{
3709 int ret;
3710 ret = setup_leaf_for_split(trans, root, path,
3711 sizeof(struct btrfs_item));
3712 if (ret)
3713 return ret;
3714
David Sterba25263cd2019-03-20 14:44:57 +01003715 ret = split_item(path, new_key, split_offset);
Chris Mason459931e2008-12-10 09:10:46 -05003716 return ret;
3717}
3718
3719/*
Chris Masond352ac62008-09-29 15:18:18 -04003720 * make the item pointed to by the path smaller. new_size indicates
3721 * how small to make it, and from_end tells us if we just chop bytes
3722 * off the end of the item or if we shift the item to chop bytes off
3723 * the front.
3724 */
David Sterba78ac4f92019-03-20 14:49:12 +01003725void btrfs_truncate_item(struct btrfs_path *path, u32 new_size, int from_end)
Chris Masonb18c6682007-04-17 13:26:50 -04003726{
Chris Masonb18c6682007-04-17 13:26:50 -04003727 int slot;
Chris Mason5f39d392007-10-15 16:14:19 -04003728 struct extent_buffer *leaf;
Chris Masonb18c6682007-04-17 13:26:50 -04003729 u32 nritems;
3730 unsigned int data_end;
3731 unsigned int old_data_start;
3732 unsigned int old_size;
3733 unsigned int size_diff;
3734 int i;
Chris Masoncfed81a2012-03-03 07:40:03 -05003735 struct btrfs_map_token token;
3736
Chris Mason5f39d392007-10-15 16:14:19 -04003737 leaf = path->nodes[0];
Chris Mason179e29e2007-11-01 11:28:41 -04003738 slot = path->slots[0];
3739
Josef Bacik3212fa12021-10-21 14:58:35 -04003740 old_size = btrfs_item_size(leaf, slot);
Chris Mason179e29e2007-11-01 11:28:41 -04003741 if (old_size == new_size)
Jeff Mahoney143bede2012-03-01 14:56:26 +01003742 return;
Chris Masonb18c6682007-04-17 13:26:50 -04003743
Chris Mason5f39d392007-10-15 16:14:19 -04003744 nritems = btrfs_header_nritems(leaf);
David Sterba8f881e82019-03-20 11:33:10 +01003745 data_end = leaf_data_end(leaf);
Chris Masonb18c6682007-04-17 13:26:50 -04003746
Josef Bacik3212fa12021-10-21 14:58:35 -04003747 old_data_start = btrfs_item_offset(leaf, slot);
Chris Mason179e29e2007-11-01 11:28:41 -04003748
Chris Masonb18c6682007-04-17 13:26:50 -04003749 size_diff = old_size - new_size;
3750
3751 BUG_ON(slot < 0);
3752 BUG_ON(slot >= nritems);
3753
3754 /*
3755 * item0..itemN ... dataN.offset..dataN.size .. data0.size
3756 */
3757 /* first correct the data pointers */
David Sterbac82f8232019-08-09 17:48:21 +02003758 btrfs_init_map_token(&token, leaf);
Chris Masonb18c6682007-04-17 13:26:50 -04003759 for (i = slot; i < nritems; i++) {
Chris Mason5f39d392007-10-15 16:14:19 -04003760 u32 ioff;
Chris Masondb945352007-10-15 16:15:53 -04003761
Josef Bacik3212fa12021-10-21 14:58:35 -04003762 ioff = btrfs_token_item_offset(&token, i);
3763 btrfs_set_token_item_offset(&token, i, ioff + size_diff);
Chris Masonb18c6682007-04-17 13:26:50 -04003764 }
Chris Masondb945352007-10-15 16:15:53 -04003765
Chris Masonb18c6682007-04-17 13:26:50 -04003766 /* shift the data */
Chris Mason179e29e2007-11-01 11:28:41 -04003767 if (from_end) {
Nikolay Borisov3d9ec8c2017-05-29 09:43:43 +03003768 memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET +
3769 data_end + size_diff, BTRFS_LEAF_DATA_OFFSET +
Chris Mason179e29e2007-11-01 11:28:41 -04003770 data_end, old_data_start + new_size - data_end);
3771 } else {
3772 struct btrfs_disk_key disk_key;
3773 u64 offset;
3774
3775 btrfs_item_key(leaf, &disk_key, slot);
3776
3777 if (btrfs_disk_key_type(&disk_key) == BTRFS_EXTENT_DATA_KEY) {
3778 unsigned long ptr;
3779 struct btrfs_file_extent_item *fi;
3780
3781 fi = btrfs_item_ptr(leaf, slot,
3782 struct btrfs_file_extent_item);
3783 fi = (struct btrfs_file_extent_item *)(
3784 (unsigned long)fi - size_diff);
3785
3786 if (btrfs_file_extent_type(leaf, fi) ==
3787 BTRFS_FILE_EXTENT_INLINE) {
3788 ptr = btrfs_item_ptr_offset(leaf, slot);
3789 memmove_extent_buffer(leaf, ptr,
Chris Masond3977122009-01-05 21:25:51 -05003790 (unsigned long)fi,
David Sterba7ec20af2014-07-24 17:34:58 +02003791 BTRFS_FILE_EXTENT_INLINE_DATA_START);
Chris Mason179e29e2007-11-01 11:28:41 -04003792 }
3793 }
3794
Nikolay Borisov3d9ec8c2017-05-29 09:43:43 +03003795 memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET +
3796 data_end + size_diff, BTRFS_LEAF_DATA_OFFSET +
Chris Mason179e29e2007-11-01 11:28:41 -04003797 data_end, old_data_start - data_end);
3798
3799 offset = btrfs_disk_key_offset(&disk_key);
3800 btrfs_set_disk_key_offset(&disk_key, offset + size_diff);
3801 btrfs_set_item_key(leaf, &disk_key, slot);
3802 if (slot == 0)
Nikolay Borisovb167fa92018-06-20 15:48:47 +03003803 fixup_low_keys(path, &disk_key, 1);
Chris Mason179e29e2007-11-01 11:28:41 -04003804 }
Chris Mason5f39d392007-10-15 16:14:19 -04003805
Josef Bacik3212fa12021-10-21 14:58:35 -04003806 btrfs_set_item_size(leaf, slot, new_size);
Chris Mason5f39d392007-10-15 16:14:19 -04003807 btrfs_mark_buffer_dirty(leaf);
Chris Masonb18c6682007-04-17 13:26:50 -04003808
David Sterbae902baa2019-03-20 14:36:46 +01003809 if (btrfs_leaf_free_space(leaf) < 0) {
David Sterbaa4f78752017-06-29 18:37:49 +02003810 btrfs_print_leaf(leaf);
Chris Masonb18c6682007-04-17 13:26:50 -04003811 BUG();
Chris Mason5f39d392007-10-15 16:14:19 -04003812 }
Chris Masonb18c6682007-04-17 13:26:50 -04003813}
3814
Chris Masond352ac62008-09-29 15:18:18 -04003815/*
Stefan Behrens8f69dbd2013-05-07 10:23:30 +00003816 * make the item pointed to by the path bigger, data_size is the added size.
Chris Masond352ac62008-09-29 15:18:18 -04003817 */
David Sterbac71dd882019-03-20 14:51:10 +01003818void btrfs_extend_item(struct btrfs_path *path, u32 data_size)
Chris Mason6567e832007-04-16 09:22:45 -04003819{
Chris Mason6567e832007-04-16 09:22:45 -04003820 int slot;
Chris Mason5f39d392007-10-15 16:14:19 -04003821 struct extent_buffer *leaf;
Chris Mason6567e832007-04-16 09:22:45 -04003822 u32 nritems;
3823 unsigned int data_end;
3824 unsigned int old_data;
3825 unsigned int old_size;
3826 int i;
Chris Masoncfed81a2012-03-03 07:40:03 -05003827 struct btrfs_map_token token;
3828
Chris Mason5f39d392007-10-15 16:14:19 -04003829 leaf = path->nodes[0];
Chris Mason6567e832007-04-16 09:22:45 -04003830
Chris Mason5f39d392007-10-15 16:14:19 -04003831 nritems = btrfs_header_nritems(leaf);
David Sterba8f881e82019-03-20 11:33:10 +01003832 data_end = leaf_data_end(leaf);
Chris Mason6567e832007-04-16 09:22:45 -04003833
David Sterbae902baa2019-03-20 14:36:46 +01003834 if (btrfs_leaf_free_space(leaf) < data_size) {
David Sterbaa4f78752017-06-29 18:37:49 +02003835 btrfs_print_leaf(leaf);
Chris Mason6567e832007-04-16 09:22:45 -04003836 BUG();
Chris Mason5f39d392007-10-15 16:14:19 -04003837 }
Chris Mason6567e832007-04-16 09:22:45 -04003838 slot = path->slots[0];
Josef Bacikdc2e7242021-10-21 14:58:37 -04003839 old_data = btrfs_item_data_end(leaf, slot);
Chris Mason6567e832007-04-16 09:22:45 -04003840
3841 BUG_ON(slot < 0);
Chris Mason3326d1b2007-10-15 16:18:25 -04003842 if (slot >= nritems) {
David Sterbaa4f78752017-06-29 18:37:49 +02003843 btrfs_print_leaf(leaf);
David Sterbac71dd882019-03-20 14:51:10 +01003844 btrfs_crit(leaf->fs_info, "slot %d too large, nritems %d",
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003845 slot, nritems);
Arnd Bergmann290342f2019-03-25 14:02:25 +01003846 BUG();
Chris Mason3326d1b2007-10-15 16:18:25 -04003847 }
Chris Mason6567e832007-04-16 09:22:45 -04003848
3849 /*
3850 * item0..itemN ... dataN.offset..dataN.size .. data0.size
3851 */
3852 /* first correct the data pointers */
David Sterbac82f8232019-08-09 17:48:21 +02003853 btrfs_init_map_token(&token, leaf);
Chris Mason6567e832007-04-16 09:22:45 -04003854 for (i = slot; i < nritems; i++) {
Chris Mason5f39d392007-10-15 16:14:19 -04003855 u32 ioff;
Chris Masondb945352007-10-15 16:15:53 -04003856
Josef Bacik3212fa12021-10-21 14:58:35 -04003857 ioff = btrfs_token_item_offset(&token, i);
3858 btrfs_set_token_item_offset(&token, i, ioff - data_size);
Chris Mason6567e832007-04-16 09:22:45 -04003859 }
Chris Mason5f39d392007-10-15 16:14:19 -04003860
Chris Mason6567e832007-04-16 09:22:45 -04003861 /* shift the data */
Nikolay Borisov3d9ec8c2017-05-29 09:43:43 +03003862 memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET +
3863 data_end - data_size, BTRFS_LEAF_DATA_OFFSET +
Chris Mason6567e832007-04-16 09:22:45 -04003864 data_end, old_data - data_end);
Chris Mason5f39d392007-10-15 16:14:19 -04003865
Chris Mason6567e832007-04-16 09:22:45 -04003866 data_end = old_data;
Josef Bacik3212fa12021-10-21 14:58:35 -04003867 old_size = btrfs_item_size(leaf, slot);
3868 btrfs_set_item_size(leaf, slot, old_size + data_size);
Chris Mason5f39d392007-10-15 16:14:19 -04003869 btrfs_mark_buffer_dirty(leaf);
Chris Mason6567e832007-04-16 09:22:45 -04003870
David Sterbae902baa2019-03-20 14:36:46 +01003871 if (btrfs_leaf_free_space(leaf) < 0) {
David Sterbaa4f78752017-06-29 18:37:49 +02003872 btrfs_print_leaf(leaf);
Chris Mason6567e832007-04-16 09:22:45 -04003873 BUG();
Chris Mason5f39d392007-10-15 16:14:19 -04003874 }
Chris Mason6567e832007-04-16 09:22:45 -04003875}
3876
Nikolay Borisovda9ffb22020-09-01 17:40:00 +03003877/**
3878 * setup_items_for_insert - Helper called before inserting one or more items
3879 * to a leaf. Main purpose is to save stack depth by doing the bulk of the work
3880 * in a function that doesn't call btrfs_search_slot
3881 *
3882 * @root: root we are inserting items to
3883 * @path: points to the leaf/slot where we are going to insert new items
Filipe Mananab7ef5f32021-09-24 12:28:13 +01003884 * @batch: information about the batch of items to insert
Chris Mason74123bd2007-02-02 11:05:29 -05003885 */
Filipe Mananaf0641652021-09-24 12:28:14 +01003886static void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
3887 const struct btrfs_item_batch *batch)
Chris Masonbe0e5c02007-01-26 15:51:26 -05003888{
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003889 struct btrfs_fs_info *fs_info = root->fs_info;
Chris Mason9c583092008-01-29 15:15:18 -05003890 int i;
Chris Mason7518a232007-03-12 12:01:18 -04003891 u32 nritems;
Chris Masonbe0e5c02007-01-26 15:51:26 -05003892 unsigned int data_end;
Chris Masone2fa7222007-03-12 16:22:34 -04003893 struct btrfs_disk_key disk_key;
Chris Mason44871b12009-03-13 10:04:31 -04003894 struct extent_buffer *leaf;
3895 int slot;
Chris Masoncfed81a2012-03-03 07:40:03 -05003896 struct btrfs_map_token token;
Nikolay Borisovfc0d82e2020-09-01 17:39:59 +03003897 u32 total_size;
Nikolay Borisovfc0d82e2020-09-01 17:39:59 +03003898
Filipe Mananab7ef5f32021-09-24 12:28:13 +01003899 /*
3900 * Before anything else, update keys in the parent and other ancestors
3901 * if needed, then release the write locks on them, so that other tasks
3902 * can use them while we modify the leaf.
3903 */
Filipe Manana24cdc842014-07-28 19:34:35 +01003904 if (path->slots[0] == 0) {
Filipe Mananab7ef5f32021-09-24 12:28:13 +01003905 btrfs_cpu_key_to_disk(&disk_key, &batch->keys[0]);
Nikolay Borisovb167fa92018-06-20 15:48:47 +03003906 fixup_low_keys(path, &disk_key, 1);
Filipe Manana24cdc842014-07-28 19:34:35 +01003907 }
3908 btrfs_unlock_up_safe(path, 1);
3909
Chris Mason5f39d392007-10-15 16:14:19 -04003910 leaf = path->nodes[0];
Chris Mason44871b12009-03-13 10:04:31 -04003911 slot = path->slots[0];
Chris Mason74123bd2007-02-02 11:05:29 -05003912
Chris Mason5f39d392007-10-15 16:14:19 -04003913 nritems = btrfs_header_nritems(leaf);
David Sterba8f881e82019-03-20 11:33:10 +01003914 data_end = leaf_data_end(leaf);
Filipe Mananab7ef5f32021-09-24 12:28:13 +01003915 total_size = batch->total_data_size + (batch->nr * sizeof(struct btrfs_item));
Chris Masoneb60cea2007-02-02 09:18:22 -05003916
David Sterbae902baa2019-03-20 14:36:46 +01003917 if (btrfs_leaf_free_space(leaf) < total_size) {
David Sterbaa4f78752017-06-29 18:37:49 +02003918 btrfs_print_leaf(leaf);
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003919 btrfs_crit(fs_info, "not enough freespace need %u have %d",
David Sterbae902baa2019-03-20 14:36:46 +01003920 total_size, btrfs_leaf_free_space(leaf));
Chris Masonbe0e5c02007-01-26 15:51:26 -05003921 BUG();
Chris Masond4dbff92007-04-04 14:08:15 -04003922 }
Chris Mason5f39d392007-10-15 16:14:19 -04003923
David Sterbac82f8232019-08-09 17:48:21 +02003924 btrfs_init_map_token(&token, leaf);
Chris Masonbe0e5c02007-01-26 15:51:26 -05003925 if (slot != nritems) {
Josef Bacikdc2e7242021-10-21 14:58:37 -04003926 unsigned int old_data = btrfs_item_data_end(leaf, slot);
Chris Masonbe0e5c02007-01-26 15:51:26 -05003927
Chris Mason5f39d392007-10-15 16:14:19 -04003928 if (old_data < data_end) {
David Sterbaa4f78752017-06-29 18:37:49 +02003929 btrfs_print_leaf(leaf);
Nikolay Borisov7269ddd2020-09-01 17:40:01 +03003930 btrfs_crit(fs_info,
3931 "item at slot %d with data offset %u beyond data end of leaf %u",
Jeff Mahoney5d163e02016-09-20 10:05:00 -04003932 slot, old_data, data_end);
Arnd Bergmann290342f2019-03-25 14:02:25 +01003933 BUG();
Chris Mason5f39d392007-10-15 16:14:19 -04003934 }
Chris Masonbe0e5c02007-01-26 15:51:26 -05003935 /*
3936 * item0..itemN ... dataN.offset..dataN.size .. data0.size
3937 */
3938 /* first correct the data pointers */
Chris Mason0783fcf2007-03-12 20:12:07 -04003939 for (i = slot; i < nritems; i++) {
Chris Mason5f39d392007-10-15 16:14:19 -04003940 u32 ioff;
Chris Masondb945352007-10-15 16:15:53 -04003941
Josef Bacik3212fa12021-10-21 14:58:35 -04003942 ioff = btrfs_token_item_offset(&token, i);
3943 btrfs_set_token_item_offset(&token, i,
Josef Bacik74794202021-10-21 14:58:34 -04003944 ioff - batch->total_data_size);
Chris Mason0783fcf2007-03-12 20:12:07 -04003945 }
Chris Masonbe0e5c02007-01-26 15:51:26 -05003946 /* shift the items */
Filipe Mananab7ef5f32021-09-24 12:28:13 +01003947 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + batch->nr),
Chris Mason5f39d392007-10-15 16:14:19 -04003948 btrfs_item_nr_offset(slot),
Chris Masond6025572007-03-30 14:27:56 -04003949 (nritems - slot) * sizeof(struct btrfs_item));
Chris Masonbe0e5c02007-01-26 15:51:26 -05003950
3951 /* shift the data */
Nikolay Borisov3d9ec8c2017-05-29 09:43:43 +03003952 memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET +
Filipe Mananab7ef5f32021-09-24 12:28:13 +01003953 data_end - batch->total_data_size,
3954 BTRFS_LEAF_DATA_OFFSET + data_end,
3955 old_data - data_end);
Chris Masonbe0e5c02007-01-26 15:51:26 -05003956 data_end = old_data;
3957 }
Chris Mason5f39d392007-10-15 16:14:19 -04003958
Chris Mason62e27492007-03-15 12:56:47 -04003959 /* setup the item for the new data */
Filipe Mananab7ef5f32021-09-24 12:28:13 +01003960 for (i = 0; i < batch->nr; i++) {
3961 btrfs_cpu_key_to_disk(&disk_key, &batch->keys[i]);
Chris Mason9c583092008-01-29 15:15:18 -05003962 btrfs_set_item_key(leaf, &disk_key, slot + i);
Filipe Mananab7ef5f32021-09-24 12:28:13 +01003963 data_end -= batch->data_sizes[i];
Josef Bacik3212fa12021-10-21 14:58:35 -04003964 btrfs_set_token_item_offset(&token, slot + i, data_end);
3965 btrfs_set_token_item_size(&token, slot + i, batch->data_sizes[i]);
Chris Mason9c583092008-01-29 15:15:18 -05003966 }
Chris Mason44871b12009-03-13 10:04:31 -04003967
Filipe Mananab7ef5f32021-09-24 12:28:13 +01003968 btrfs_set_header_nritems(leaf, nritems + batch->nr);
Chris Masonb9473432009-03-13 11:00:37 -04003969 btrfs_mark_buffer_dirty(leaf);
Chris Masonaa5d6be2007-02-28 16:35:06 -05003970
David Sterbae902baa2019-03-20 14:36:46 +01003971 if (btrfs_leaf_free_space(leaf) < 0) {
David Sterbaa4f78752017-06-29 18:37:49 +02003972 btrfs_print_leaf(leaf);
Chris Masonbe0e5c02007-01-26 15:51:26 -05003973 BUG();
Chris Mason5f39d392007-10-15 16:14:19 -04003974 }
Chris Mason44871b12009-03-13 10:04:31 -04003975}
3976
3977/*
Filipe Mananaf0641652021-09-24 12:28:14 +01003978 * Insert a new item into a leaf.
3979 *
3980 * @root: The root of the btree.
3981 * @path: A path pointing to the target leaf and slot.
3982 * @key: The key of the new item.
3983 * @data_size: The size of the data associated with the new key.
3984 */
3985void btrfs_setup_item_for_insert(struct btrfs_root *root,
3986 struct btrfs_path *path,
3987 const struct btrfs_key *key,
3988 u32 data_size)
3989{
3990 struct btrfs_item_batch batch;
3991
3992 batch.keys = key;
3993 batch.data_sizes = &data_size;
3994 batch.total_data_size = data_size;
3995 batch.nr = 1;
3996
3997 setup_items_for_insert(root, path, &batch);
3998}
3999
4000/*
Chris Mason44871b12009-03-13 10:04:31 -04004001 * Given a key and some data, insert items into the tree.
4002 * This does all the path init required, making room in the tree if needed.
4003 */
4004int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
4005 struct btrfs_root *root,
4006 struct btrfs_path *path,
Filipe Mananab7ef5f32021-09-24 12:28:13 +01004007 const struct btrfs_item_batch *batch)
Chris Mason44871b12009-03-13 10:04:31 -04004008{
Chris Mason44871b12009-03-13 10:04:31 -04004009 int ret = 0;
4010 int slot;
Filipe Mananab7ef5f32021-09-24 12:28:13 +01004011 u32 total_size;
Chris Mason44871b12009-03-13 10:04:31 -04004012
Filipe Mananab7ef5f32021-09-24 12:28:13 +01004013 total_size = batch->total_data_size + (batch->nr * sizeof(struct btrfs_item));
4014 ret = btrfs_search_slot(trans, root, &batch->keys[0], path, total_size, 1);
Chris Mason44871b12009-03-13 10:04:31 -04004015 if (ret == 0)
4016 return -EEXIST;
4017 if (ret < 0)
Jeff Mahoney143bede2012-03-01 14:56:26 +01004018 return ret;
Chris Mason44871b12009-03-13 10:04:31 -04004019
Chris Mason44871b12009-03-13 10:04:31 -04004020 slot = path->slots[0];
4021 BUG_ON(slot < 0);
4022
Filipe Mananab7ef5f32021-09-24 12:28:13 +01004023 setup_items_for_insert(root, path, batch);
Jeff Mahoney143bede2012-03-01 14:56:26 +01004024 return 0;
Chris Mason62e27492007-03-15 12:56:47 -04004025}
4026
4027/*
4028 * Given a key and some data, insert an item into the tree.
4029 * This does all the path init required, making room in the tree if needed.
4030 */
Omar Sandoval310712b2017-01-17 23:24:37 -08004031int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root *root,
4032 const struct btrfs_key *cpu_key, void *data,
4033 u32 data_size)
Chris Mason62e27492007-03-15 12:56:47 -04004034{
4035 int ret = 0;
Chris Mason2c90e5d2007-04-02 10:50:19 -04004036 struct btrfs_path *path;
Chris Mason5f39d392007-10-15 16:14:19 -04004037 struct extent_buffer *leaf;
4038 unsigned long ptr;
Chris Mason62e27492007-03-15 12:56:47 -04004039
Chris Mason2c90e5d2007-04-02 10:50:19 -04004040 path = btrfs_alloc_path();
Tsutomu Itohdb5b4932011-03-23 08:14:16 +00004041 if (!path)
4042 return -ENOMEM;
Chris Mason2c90e5d2007-04-02 10:50:19 -04004043 ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size);
Chris Mason62e27492007-03-15 12:56:47 -04004044 if (!ret) {
Chris Mason5f39d392007-10-15 16:14:19 -04004045 leaf = path->nodes[0];
4046 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
4047 write_extent_buffer(leaf, data, ptr, data_size);
4048 btrfs_mark_buffer_dirty(leaf);
Chris Mason62e27492007-03-15 12:56:47 -04004049 }
Chris Mason2c90e5d2007-04-02 10:50:19 -04004050 btrfs_free_path(path);
Chris Masonaa5d6be2007-02-28 16:35:06 -05004051 return ret;
Chris Masonbe0e5c02007-01-26 15:51:26 -05004052}
4053
Chris Mason74123bd2007-02-02 11:05:29 -05004054/*
Filipe Mananaf0641652021-09-24 12:28:14 +01004055 * This function duplicates an item, giving 'new_key' to the new item.
4056 * It guarantees both items live in the same tree leaf and the new item is
4057 * contiguous with the original item.
4058 *
4059 * This allows us to split a file extent in place, keeping a lock on the leaf
4060 * the entire time.
4061 */
4062int btrfs_duplicate_item(struct btrfs_trans_handle *trans,
4063 struct btrfs_root *root,
4064 struct btrfs_path *path,
4065 const struct btrfs_key *new_key)
4066{
4067 struct extent_buffer *leaf;
4068 int ret;
4069 u32 item_size;
4070
4071 leaf = path->nodes[0];
Josef Bacik3212fa12021-10-21 14:58:35 -04004072 item_size = btrfs_item_size(leaf, path->slots[0]);
Filipe Mananaf0641652021-09-24 12:28:14 +01004073 ret = setup_leaf_for_split(trans, root, path,
4074 item_size + sizeof(struct btrfs_item));
4075 if (ret)
4076 return ret;
4077
4078 path->slots[0]++;
4079 btrfs_setup_item_for_insert(root, path, new_key, item_size);
4080 leaf = path->nodes[0];
4081 memcpy_extent_buffer(leaf,
4082 btrfs_item_ptr_offset(leaf, path->slots[0]),
4083 btrfs_item_ptr_offset(leaf, path->slots[0] - 1),
4084 item_size);
4085 return 0;
4086}
4087
4088/*
Chris Mason5de08d72007-02-24 06:24:44 -05004089 * delete the pointer from a given node.
Chris Mason74123bd2007-02-02 11:05:29 -05004090 *
Chris Masond352ac62008-09-29 15:18:18 -04004091 * the tree should have been previously balanced so the deletion does not
4092 * empty a node.
Chris Mason74123bd2007-02-02 11:05:29 -05004093 */
Tsutomu Itohafe5fea2013-04-16 05:18:22 +00004094static void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
4095 int level, int slot)
Chris Masonbe0e5c02007-01-26 15:51:26 -05004096{
Chris Mason5f39d392007-10-15 16:14:19 -04004097 struct extent_buffer *parent = path->nodes[level];
Chris Mason7518a232007-03-12 12:01:18 -04004098 u32 nritems;
Jan Schmidtf3ea38d2012-05-26 11:45:21 +02004099 int ret;
Chris Masonbe0e5c02007-01-26 15:51:26 -05004100
Chris Mason5f39d392007-10-15 16:14:19 -04004101 nritems = btrfs_header_nritems(parent);
Chris Masond3977122009-01-05 21:25:51 -05004102 if (slot != nritems - 1) {
David Sterbabf1d3422018-03-05 15:47:39 +01004103 if (level) {
Filipe Mananaf3a84cc2021-03-11 14:31:07 +00004104 ret = btrfs_tree_mod_log_insert_move(parent, slot,
4105 slot + 1, nritems - slot - 1);
David Sterbabf1d3422018-03-05 15:47:39 +01004106 BUG_ON(ret < 0);
4107 }
Chris Mason5f39d392007-10-15 16:14:19 -04004108 memmove_extent_buffer(parent,
4109 btrfs_node_key_ptr_offset(slot),
4110 btrfs_node_key_ptr_offset(slot + 1),
Chris Masond6025572007-03-30 14:27:56 -04004111 sizeof(struct btrfs_key_ptr) *
4112 (nritems - slot - 1));
Chris Mason57ba86c2012-12-18 19:35:32 -05004113 } else if (level) {
Filipe Mananaf3a84cc2021-03-11 14:31:07 +00004114 ret = btrfs_tree_mod_log_insert_key(parent, slot,
4115 BTRFS_MOD_LOG_KEY_REMOVE, GFP_NOFS);
Chris Mason57ba86c2012-12-18 19:35:32 -05004116 BUG_ON(ret < 0);
Chris Masonbb803952007-03-01 12:04:21 -05004117 }
Jan Schmidtf3ea38d2012-05-26 11:45:21 +02004118
Chris Mason7518a232007-03-12 12:01:18 -04004119 nritems--;
Chris Mason5f39d392007-10-15 16:14:19 -04004120 btrfs_set_header_nritems(parent, nritems);
Chris Mason7518a232007-03-12 12:01:18 -04004121 if (nritems == 0 && parent == root->node) {
Chris Mason5f39d392007-10-15 16:14:19 -04004122 BUG_ON(btrfs_header_level(root->node) != 1);
Chris Masonbb803952007-03-01 12:04:21 -05004123 /* just turn the root into a leaf and break */
Chris Mason5f39d392007-10-15 16:14:19 -04004124 btrfs_set_header_level(root->node, 0);
Chris Masonbb803952007-03-01 12:04:21 -05004125 } else if (slot == 0) {
Chris Mason5f39d392007-10-15 16:14:19 -04004126 struct btrfs_disk_key disk_key;
4127
4128 btrfs_node_key(parent, &disk_key, 0);
Nikolay Borisovb167fa92018-06-20 15:48:47 +03004129 fixup_low_keys(path, &disk_key, level + 1);
Chris Masonbe0e5c02007-01-26 15:51:26 -05004130 }
Chris Masond6025572007-03-30 14:27:56 -04004131 btrfs_mark_buffer_dirty(parent);
Chris Masonbe0e5c02007-01-26 15:51:26 -05004132}
4133
Chris Mason74123bd2007-02-02 11:05:29 -05004134/*
Chris Mason323ac952008-10-01 19:05:46 -04004135 * a helper function to delete the leaf pointed to by path->slots[1] and
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004136 * path->nodes[1].
Chris Mason323ac952008-10-01 19:05:46 -04004137 *
4138 * This deletes the pointer in path->nodes[1] and frees the leaf
4139 * block extent. zero is returned if it all worked out, < 0 otherwise.
4140 *
4141 * The path must have already been setup for deleting the leaf, including
4142 * all the proper balancing. path->nodes[1] must be locked.
4143 */
Jeff Mahoney143bede2012-03-01 14:56:26 +01004144static noinline void btrfs_del_leaf(struct btrfs_trans_handle *trans,
4145 struct btrfs_root *root,
4146 struct btrfs_path *path,
4147 struct extent_buffer *leaf)
Chris Mason323ac952008-10-01 19:05:46 -04004148{
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004149 WARN_ON(btrfs_header_generation(leaf) != trans->transid);
Tsutomu Itohafe5fea2013-04-16 05:18:22 +00004150 del_ptr(root, path, 1, path->slots[1]);
Chris Mason323ac952008-10-01 19:05:46 -04004151
Chris Mason4d081c42009-02-04 09:31:28 -05004152 /*
4153 * btrfs_free_extent is expensive, we want to make sure we
4154 * aren't holding any locks when we call it
4155 */
4156 btrfs_unlock_up_safe(path, 0);
4157
Yan, Zhengf0486c62010-05-16 10:46:25 -04004158 root_sub_used(root, leaf->len);
4159
David Sterba67439da2019-10-08 13:28:47 +02004160 atomic_inc(&leaf->refs);
Filipe Manana7a1636082021-12-13 08:45:12 +00004161 btrfs_free_tree_block(trans, btrfs_root_id(root), leaf, 0, 1);
Josef Bacik3083ee22012-03-09 16:01:49 -05004162 free_extent_buffer_stale(leaf);
Chris Mason323ac952008-10-01 19:05:46 -04004163}
4164/*
Chris Mason74123bd2007-02-02 11:05:29 -05004165 * delete the item at the leaf level in path. If that empties
4166 * the leaf, remove it from the tree
4167 */
Chris Mason85e21ba2008-01-29 15:11:36 -05004168int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
4169 struct btrfs_path *path, int slot, int nr)
Chris Masonbe0e5c02007-01-26 15:51:26 -05004170{
Jeff Mahoney0b246af2016-06-22 18:54:23 -04004171 struct btrfs_fs_info *fs_info = root->fs_info;
Chris Mason5f39d392007-10-15 16:14:19 -04004172 struct extent_buffer *leaf;
Alexandru Moisece0eac22015-08-23 16:01:42 +00004173 u32 last_off;
4174 u32 dsize = 0;
Chris Masonaa5d6be2007-02-28 16:35:06 -05004175 int ret = 0;
4176 int wret;
Chris Mason85e21ba2008-01-29 15:11:36 -05004177 int i;
Chris Mason7518a232007-03-12 12:01:18 -04004178 u32 nritems;
Chris Masonbe0e5c02007-01-26 15:51:26 -05004179
Chris Mason5f39d392007-10-15 16:14:19 -04004180 leaf = path->nodes[0];
Josef Bacik3212fa12021-10-21 14:58:35 -04004181 last_off = btrfs_item_offset(leaf, slot + nr - 1);
Chris Mason85e21ba2008-01-29 15:11:36 -05004182
4183 for (i = 0; i < nr; i++)
Josef Bacik3212fa12021-10-21 14:58:35 -04004184 dsize += btrfs_item_size(leaf, slot + i);
Chris Mason85e21ba2008-01-29 15:11:36 -05004185
Chris Mason5f39d392007-10-15 16:14:19 -04004186 nritems = btrfs_header_nritems(leaf);
Chris Masonbe0e5c02007-01-26 15:51:26 -05004187
Chris Mason85e21ba2008-01-29 15:11:36 -05004188 if (slot + nr != nritems) {
David Sterba8f881e82019-03-20 11:33:10 +01004189 int data_end = leaf_data_end(leaf);
David Sterbac82f8232019-08-09 17:48:21 +02004190 struct btrfs_map_token token;
Chris Mason5f39d392007-10-15 16:14:19 -04004191
Nikolay Borisov3d9ec8c2017-05-29 09:43:43 +03004192 memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET +
Chris Masond6025572007-03-30 14:27:56 -04004193 data_end + dsize,
Nikolay Borisov3d9ec8c2017-05-29 09:43:43 +03004194 BTRFS_LEAF_DATA_OFFSET + data_end,
Chris Mason85e21ba2008-01-29 15:11:36 -05004195 last_off - data_end);
Chris Mason5f39d392007-10-15 16:14:19 -04004196
David Sterbac82f8232019-08-09 17:48:21 +02004197 btrfs_init_map_token(&token, leaf);
Chris Mason85e21ba2008-01-29 15:11:36 -05004198 for (i = slot + nr; i < nritems; i++) {
Chris Mason5f39d392007-10-15 16:14:19 -04004199 u32 ioff;
Chris Masondb945352007-10-15 16:15:53 -04004200
Josef Bacik3212fa12021-10-21 14:58:35 -04004201 ioff = btrfs_token_item_offset(&token, i);
4202 btrfs_set_token_item_offset(&token, i, ioff + dsize);
Chris Mason0783fcf2007-03-12 20:12:07 -04004203 }
Chris Masondb945352007-10-15 16:15:53 -04004204
Chris Mason5f39d392007-10-15 16:14:19 -04004205 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot),
Chris Mason85e21ba2008-01-29 15:11:36 -05004206 btrfs_item_nr_offset(slot + nr),
Chris Masond6025572007-03-30 14:27:56 -04004207 sizeof(struct btrfs_item) *
Chris Mason85e21ba2008-01-29 15:11:36 -05004208 (nritems - slot - nr));
Chris Masonbe0e5c02007-01-26 15:51:26 -05004209 }
Chris Mason85e21ba2008-01-29 15:11:36 -05004210 btrfs_set_header_nritems(leaf, nritems - nr);
4211 nritems -= nr;
Chris Mason5f39d392007-10-15 16:14:19 -04004212
Chris Mason74123bd2007-02-02 11:05:29 -05004213 /* delete the leaf if we've emptied it */
Chris Mason7518a232007-03-12 12:01:18 -04004214 if (nritems == 0) {
Chris Mason5f39d392007-10-15 16:14:19 -04004215 if (leaf == root->node) {
4216 btrfs_set_header_level(leaf, 0);
Chris Mason9a8dd152007-02-23 08:38:36 -05004217 } else {
David Sterba6a884d7d2019-03-20 14:30:02 +01004218 btrfs_clean_tree_block(leaf);
Jeff Mahoney143bede2012-03-01 14:56:26 +01004219 btrfs_del_leaf(trans, root, path, leaf);
Chris Mason9a8dd152007-02-23 08:38:36 -05004220 }
Chris Masonbe0e5c02007-01-26 15:51:26 -05004221 } else {
Chris Mason7518a232007-03-12 12:01:18 -04004222 int used = leaf_space_used(leaf, 0, nritems);
Chris Masonaa5d6be2007-02-28 16:35:06 -05004223 if (slot == 0) {
Chris Mason5f39d392007-10-15 16:14:19 -04004224 struct btrfs_disk_key disk_key;
4225
4226 btrfs_item_key(leaf, &disk_key, 0);
Nikolay Borisovb167fa92018-06-20 15:48:47 +03004227 fixup_low_keys(path, &disk_key, 1);
Chris Masonaa5d6be2007-02-28 16:35:06 -05004228 }
Chris Masonaa5d6be2007-02-28 16:35:06 -05004229
Chris Mason74123bd2007-02-02 11:05:29 -05004230 /* delete the leaf if it is mostly empty */
Jeff Mahoney0b246af2016-06-22 18:54:23 -04004231 if (used < BTRFS_LEAF_DATA_SIZE(fs_info) / 3) {
Chris Masonbe0e5c02007-01-26 15:51:26 -05004232 /* push_leaf_left fixes the path.
4233 * make sure the path still points to our leaf
4234 * for possible call to del_ptr below
4235 */
Chris Mason4920c9a2007-01-26 16:38:42 -05004236 slot = path->slots[1];
David Sterba67439da2019-10-08 13:28:47 +02004237 atomic_inc(&leaf->refs);
Chris Mason5f39d392007-10-15 16:14:19 -04004238
Chris Mason99d8f832010-07-07 10:51:48 -04004239 wret = push_leaf_left(trans, root, path, 1, 1,
4240 1, (u32)-1);
Chris Mason54aa1f42007-06-22 14:16:25 -04004241 if (wret < 0 && wret != -ENOSPC)
Chris Masonaa5d6be2007-02-28 16:35:06 -05004242 ret = wret;
Chris Mason5f39d392007-10-15 16:14:19 -04004243
4244 if (path->nodes[0] == leaf &&
4245 btrfs_header_nritems(leaf)) {
Chris Mason99d8f832010-07-07 10:51:48 -04004246 wret = push_leaf_right(trans, root, path, 1,
4247 1, 1, 0);
Chris Mason54aa1f42007-06-22 14:16:25 -04004248 if (wret < 0 && wret != -ENOSPC)
Chris Masonaa5d6be2007-02-28 16:35:06 -05004249 ret = wret;
4250 }
Chris Mason5f39d392007-10-15 16:14:19 -04004251
4252 if (btrfs_header_nritems(leaf) == 0) {
Chris Mason323ac952008-10-01 19:05:46 -04004253 path->slots[1] = slot;
Jeff Mahoney143bede2012-03-01 14:56:26 +01004254 btrfs_del_leaf(trans, root, path, leaf);
Chris Mason5f39d392007-10-15 16:14:19 -04004255 free_extent_buffer(leaf);
Jeff Mahoney143bede2012-03-01 14:56:26 +01004256 ret = 0;
Chris Mason5de08d72007-02-24 06:24:44 -05004257 } else {
Chris Mason925baed2008-06-25 16:01:30 -04004258 /* if we're still in the path, make sure
4259 * we're dirty. Otherwise, one of the
4260 * push_leaf functions must have already
4261 * dirtied this buffer
4262 */
4263 if (path->nodes[0] == leaf)
4264 btrfs_mark_buffer_dirty(leaf);
Chris Mason5f39d392007-10-15 16:14:19 -04004265 free_extent_buffer(leaf);
Chris Masonbe0e5c02007-01-26 15:51:26 -05004266 }
Chris Masond5719762007-03-23 10:01:08 -04004267 } else {
Chris Mason5f39d392007-10-15 16:14:19 -04004268 btrfs_mark_buffer_dirty(leaf);
Chris Masonbe0e5c02007-01-26 15:51:26 -05004269 }
4270 }
Chris Masonaa5d6be2007-02-28 16:35:06 -05004271 return ret;
Chris Masonbe0e5c02007-01-26 15:51:26 -05004272}
4273
Chris Mason97571fd2007-02-24 13:39:08 -05004274/*
Chris Mason925baed2008-06-25 16:01:30 -04004275 * search the tree again to find a leaf with lesser keys
Chris Mason7bb86312007-12-11 09:25:06 -05004276 * returns 0 if it found something or 1 if there are no lesser leaves.
4277 * returns < 0 on io errors.
Chris Masond352ac62008-09-29 15:18:18 -04004278 *
4279 * This may release the path, and so you may lose any locks held at the
4280 * time you call it.
Chris Mason7bb86312007-12-11 09:25:06 -05004281 */
Josef Bacik16e75492013-10-22 12:18:51 -04004282int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
Chris Mason7bb86312007-12-11 09:25:06 -05004283{
Chris Mason925baed2008-06-25 16:01:30 -04004284 struct btrfs_key key;
4285 struct btrfs_disk_key found_key;
4286 int ret;
Chris Mason7bb86312007-12-11 09:25:06 -05004287
Chris Mason925baed2008-06-25 16:01:30 -04004288 btrfs_item_key_to_cpu(path->nodes[0], &key, 0);
Chris Mason7bb86312007-12-11 09:25:06 -05004289
Filipe David Borba Mananae8b0d7242013-10-15 00:12:27 +01004290 if (key.offset > 0) {
Chris Mason925baed2008-06-25 16:01:30 -04004291 key.offset--;
Filipe David Borba Mananae8b0d7242013-10-15 00:12:27 +01004292 } else if (key.type > 0) {
Chris Mason925baed2008-06-25 16:01:30 -04004293 key.type--;
Filipe David Borba Mananae8b0d7242013-10-15 00:12:27 +01004294 key.offset = (u64)-1;
4295 } else if (key.objectid > 0) {
Chris Mason925baed2008-06-25 16:01:30 -04004296 key.objectid--;
Filipe David Borba Mananae8b0d7242013-10-15 00:12:27 +01004297 key.type = (u8)-1;
4298 key.offset = (u64)-1;
4299 } else {
Chris Mason925baed2008-06-25 16:01:30 -04004300 return 1;
Filipe David Borba Mananae8b0d7242013-10-15 00:12:27 +01004301 }
Chris Mason7bb86312007-12-11 09:25:06 -05004302
David Sterbab3b4aa72011-04-21 01:20:15 +02004303 btrfs_release_path(path);
Chris Mason925baed2008-06-25 16:01:30 -04004304 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4305 if (ret < 0)
4306 return ret;
4307 btrfs_item_key(path->nodes[0], &found_key, 0);
4308 ret = comp_keys(&found_key, &key);
Filipe Manana337c6f62014-06-09 13:22:13 +01004309 /*
4310 * We might have had an item with the previous key in the tree right
4311 * before we released our path. And after we released our path, that
4312 * item might have been pushed to the first slot (0) of the leaf we
4313 * were holding due to a tree balance. Alternatively, an item with the
4314 * previous key can exist as the only element of a leaf (big fat item).
4315 * Therefore account for these 2 cases, so that our callers (like
4316 * btrfs_previous_item) don't miss an existing item with a key matching
4317 * the previous key we computed above.
4318 */
4319 if (ret <= 0)
Chris Mason925baed2008-06-25 16:01:30 -04004320 return 0;
4321 return 1;
Chris Mason7bb86312007-12-11 09:25:06 -05004322}
4323
Chris Mason3f157a22008-06-25 16:01:31 -04004324/*
4325 * A helper function to walk down the tree starting at min_key, and looking
Eric Sandeende78b512013-01-31 18:21:12 +00004326 * for nodes or leaves that are have a minimum transaction id.
4327 * This is used by the btree defrag code, and tree logging
Chris Mason3f157a22008-06-25 16:01:31 -04004328 *
4329 * This does not cow, but it does stuff the starting key it finds back
4330 * into min_key, so you can call btrfs_search_slot with cow=1 on the
4331 * key and get a writable path.
4332 *
Chris Mason3f157a22008-06-25 16:01:31 -04004333 * This honors path->lowest_level to prevent descent past a given level
4334 * of the tree.
4335 *
Chris Masond352ac62008-09-29 15:18:18 -04004336 * min_trans indicates the oldest transaction that you are interested
4337 * in walking through. Any nodes or leaves older than min_trans are
4338 * skipped over (without reading them).
4339 *
Chris Mason3f157a22008-06-25 16:01:31 -04004340 * returns zero if something useful was found, < 0 on error and 1 if there
4341 * was nothing in the tree that matched the search criteria.
4342 */
4343int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
Eric Sandeende78b512013-01-31 18:21:12 +00004344 struct btrfs_path *path,
Chris Mason3f157a22008-06-25 16:01:31 -04004345 u64 min_trans)
4346{
4347 struct extent_buffer *cur;
4348 struct btrfs_key found_key;
4349 int slot;
Yan96524802008-07-24 12:19:49 -04004350 int sret;
Chris Mason3f157a22008-06-25 16:01:31 -04004351 u32 nritems;
4352 int level;
4353 int ret = 1;
Filipe Mananaf98de9b2014-08-04 19:37:21 +01004354 int keep_locks = path->keep_locks;
Chris Mason3f157a22008-06-25 16:01:31 -04004355
Filipe Mananaf98de9b2014-08-04 19:37:21 +01004356 path->keep_locks = 1;
Chris Mason3f157a22008-06-25 16:01:31 -04004357again:
Chris Masonbd681512011-07-16 15:23:14 -04004358 cur = btrfs_read_lock_root_node(root);
Chris Mason3f157a22008-06-25 16:01:31 -04004359 level = btrfs_header_level(cur);
Chris Masone02119d2008-09-05 16:13:11 -04004360 WARN_ON(path->nodes[level]);
Chris Mason3f157a22008-06-25 16:01:31 -04004361 path->nodes[level] = cur;
Chris Masonbd681512011-07-16 15:23:14 -04004362 path->locks[level] = BTRFS_READ_LOCK;
Chris Mason3f157a22008-06-25 16:01:31 -04004363
4364 if (btrfs_header_generation(cur) < min_trans) {
4365 ret = 1;
4366 goto out;
4367 }
Chris Masond3977122009-01-05 21:25:51 -05004368 while (1) {
Chris Mason3f157a22008-06-25 16:01:31 -04004369 nritems = btrfs_header_nritems(cur);
4370 level = btrfs_header_level(cur);
Qu Wenruoe3b83362020-04-17 15:08:21 +08004371 sret = btrfs_bin_search(cur, min_key, &slot);
Filipe Mananacbca7d52019-02-18 16:57:26 +00004372 if (sret < 0) {
4373 ret = sret;
4374 goto out;
4375 }
Chris Mason3f157a22008-06-25 16:01:31 -04004376
Chris Mason323ac952008-10-01 19:05:46 -04004377 /* at the lowest level, we're done, setup the path and exit */
4378 if (level == path->lowest_level) {
Chris Masone02119d2008-09-05 16:13:11 -04004379 if (slot >= nritems)
4380 goto find_next_key;
Chris Mason3f157a22008-06-25 16:01:31 -04004381 ret = 0;
4382 path->slots[level] = slot;
4383 btrfs_item_key_to_cpu(cur, &found_key, slot);
4384 goto out;
4385 }
Yan96524802008-07-24 12:19:49 -04004386 if (sret && slot > 0)
4387 slot--;
Chris Mason3f157a22008-06-25 16:01:31 -04004388 /*
Eric Sandeende78b512013-01-31 18:21:12 +00004389 * check this node pointer against the min_trans parameters.
Randy Dunlap260db432020-08-04 19:48:34 -07004390 * If it is too old, skip to the next one.
Chris Mason3f157a22008-06-25 16:01:31 -04004391 */
Chris Masond3977122009-01-05 21:25:51 -05004392 while (slot < nritems) {
Chris Mason3f157a22008-06-25 16:01:31 -04004393 u64 gen;
Chris Masone02119d2008-09-05 16:13:11 -04004394
Chris Mason3f157a22008-06-25 16:01:31 -04004395 gen = btrfs_node_ptr_generation(cur, slot);
4396 if (gen < min_trans) {
4397 slot++;
4398 continue;
4399 }
Eric Sandeende78b512013-01-31 18:21:12 +00004400 break;
Chris Mason3f157a22008-06-25 16:01:31 -04004401 }
Chris Masone02119d2008-09-05 16:13:11 -04004402find_next_key:
Chris Mason3f157a22008-06-25 16:01:31 -04004403 /*
4404 * we didn't find a candidate key in this node, walk forward
4405 * and find another one
4406 */
4407 if (slot >= nritems) {
Chris Masone02119d2008-09-05 16:13:11 -04004408 path->slots[level] = slot;
4409 sret = btrfs_find_next_key(root, path, min_key, level,
Eric Sandeende78b512013-01-31 18:21:12 +00004410 min_trans);
Chris Masone02119d2008-09-05 16:13:11 -04004411 if (sret == 0) {
David Sterbab3b4aa72011-04-21 01:20:15 +02004412 btrfs_release_path(path);
Chris Mason3f157a22008-06-25 16:01:31 -04004413 goto again;
4414 } else {
4415 goto out;
4416 }
4417 }
4418 /* save our key for returning back */
4419 btrfs_node_key_to_cpu(cur, &found_key, slot);
4420 path->slots[level] = slot;
4421 if (level == path->lowest_level) {
4422 ret = 0;
Chris Mason3f157a22008-06-25 16:01:31 -04004423 goto out;
4424 }
David Sterba4b231ae2019-08-21 19:16:27 +02004425 cur = btrfs_read_node_slot(cur, slot);
Liu Bofb770ae2016-07-05 12:10:14 -07004426 if (IS_ERR(cur)) {
4427 ret = PTR_ERR(cur);
4428 goto out;
4429 }
Chris Mason3f157a22008-06-25 16:01:31 -04004430
Chris Masonbd681512011-07-16 15:23:14 -04004431 btrfs_tree_read_lock(cur);
Chris Masonb4ce94d2009-02-04 09:25:08 -05004432
Chris Masonbd681512011-07-16 15:23:14 -04004433 path->locks[level - 1] = BTRFS_READ_LOCK;
Chris Mason3f157a22008-06-25 16:01:31 -04004434 path->nodes[level - 1] = cur;
Chris Masonf7c79f32012-03-19 15:54:38 -04004435 unlock_up(path, level, 1, 0, NULL);
Chris Mason3f157a22008-06-25 16:01:31 -04004436 }
4437out:
Filipe Mananaf98de9b2014-08-04 19:37:21 +01004438 path->keep_locks = keep_locks;
4439 if (ret == 0) {
4440 btrfs_unlock_up_safe(path, path->lowest_level + 1);
Chris Mason3f157a22008-06-25 16:01:31 -04004441 memcpy(min_key, &found_key, sizeof(found_key));
Filipe Mananaf98de9b2014-08-04 19:37:21 +01004442 }
Chris Mason3f157a22008-06-25 16:01:31 -04004443 return ret;
4444}
4445
4446/*
4447 * this is similar to btrfs_next_leaf, but does not try to preserve
4448 * and fixup the path. It looks for and returns the next key in the
Eric Sandeende78b512013-01-31 18:21:12 +00004449 * tree based on the current path and the min_trans parameters.
Chris Mason3f157a22008-06-25 16:01:31 -04004450 *
4451 * 0 is returned if another key is found, < 0 if there are any errors
4452 * and 1 is returned if there are no higher keys in the tree
4453 *
4454 * path->keep_locks should be set to 1 on the search made before
4455 * calling this function.
4456 */
Chris Masone7a84562008-06-25 16:01:31 -04004457int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path,
Eric Sandeende78b512013-01-31 18:21:12 +00004458 struct btrfs_key *key, int level, u64 min_trans)
Chris Masone7a84562008-06-25 16:01:31 -04004459{
Chris Masone7a84562008-06-25 16:01:31 -04004460 int slot;
4461 struct extent_buffer *c;
4462
Josef Bacik6a9fb462019-06-20 15:37:52 -04004463 WARN_ON(!path->keep_locks && !path->skip_locking);
Chris Masond3977122009-01-05 21:25:51 -05004464 while (level < BTRFS_MAX_LEVEL) {
Chris Masone7a84562008-06-25 16:01:31 -04004465 if (!path->nodes[level])
4466 return 1;
4467
4468 slot = path->slots[level] + 1;
4469 c = path->nodes[level];
Chris Mason3f157a22008-06-25 16:01:31 -04004470next:
Chris Masone7a84562008-06-25 16:01:31 -04004471 if (slot >= btrfs_header_nritems(c)) {
Yan Zheng33c66f42009-07-22 09:59:00 -04004472 int ret;
4473 int orig_lowest;
4474 struct btrfs_key cur_key;
4475 if (level + 1 >= BTRFS_MAX_LEVEL ||
4476 !path->nodes[level + 1])
Chris Masone7a84562008-06-25 16:01:31 -04004477 return 1;
Yan Zheng33c66f42009-07-22 09:59:00 -04004478
Josef Bacik6a9fb462019-06-20 15:37:52 -04004479 if (path->locks[level + 1] || path->skip_locking) {
Yan Zheng33c66f42009-07-22 09:59:00 -04004480 level++;
4481 continue;
4482 }
4483
4484 slot = btrfs_header_nritems(c) - 1;
4485 if (level == 0)
4486 btrfs_item_key_to_cpu(c, &cur_key, slot);
4487 else
4488 btrfs_node_key_to_cpu(c, &cur_key, slot);
4489
4490 orig_lowest = path->lowest_level;
David Sterbab3b4aa72011-04-21 01:20:15 +02004491 btrfs_release_path(path);
Yan Zheng33c66f42009-07-22 09:59:00 -04004492 path->lowest_level = level;
4493 ret = btrfs_search_slot(NULL, root, &cur_key, path,
4494 0, 0);
4495 path->lowest_level = orig_lowest;
4496 if (ret < 0)
4497 return ret;
4498
4499 c = path->nodes[level];
4500 slot = path->slots[level];
4501 if (ret == 0)
4502 slot++;
4503 goto next;
Chris Masone7a84562008-06-25 16:01:31 -04004504 }
Yan Zheng33c66f42009-07-22 09:59:00 -04004505
Chris Masone7a84562008-06-25 16:01:31 -04004506 if (level == 0)
4507 btrfs_item_key_to_cpu(c, key, slot);
Chris Mason3f157a22008-06-25 16:01:31 -04004508 else {
Chris Mason3f157a22008-06-25 16:01:31 -04004509 u64 gen = btrfs_node_ptr_generation(c, slot);
4510
Chris Mason3f157a22008-06-25 16:01:31 -04004511 if (gen < min_trans) {
4512 slot++;
4513 goto next;
4514 }
Chris Masone7a84562008-06-25 16:01:31 -04004515 btrfs_node_key_to_cpu(c, key, slot);
Chris Mason3f157a22008-06-25 16:01:31 -04004516 }
Chris Masone7a84562008-06-25 16:01:31 -04004517 return 0;
4518 }
4519 return 1;
4520}
4521
Jan Schmidt3d7806e2012-06-11 08:29:29 +02004522int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path,
4523 u64 time_seq)
4524{
Chris Masond97e63b2007-02-20 16:40:44 -05004525 int slot;
Chris Mason8e73f272009-04-03 10:14:18 -04004526 int level;
Chris Mason5f39d392007-10-15 16:14:19 -04004527 struct extent_buffer *c;
Chris Mason8e73f272009-04-03 10:14:18 -04004528 struct extent_buffer *next;
Filipe Mananad96b3422021-11-22 12:03:38 +00004529 struct btrfs_fs_info *fs_info = root->fs_info;
Chris Mason925baed2008-06-25 16:01:30 -04004530 struct btrfs_key key;
Filipe Mananad96b3422021-11-22 12:03:38 +00004531 bool need_commit_sem = false;
Chris Mason925baed2008-06-25 16:01:30 -04004532 u32 nritems;
4533 int ret;
Josef Bacik0e463182020-11-06 16:27:30 -05004534 int i;
Chris Mason925baed2008-06-25 16:01:30 -04004535
4536 nritems = btrfs_header_nritems(path->nodes[0]);
Chris Masond3977122009-01-05 21:25:51 -05004537 if (nritems == 0)
Chris Mason925baed2008-06-25 16:01:30 -04004538 return 1;
Chris Mason925baed2008-06-25 16:01:30 -04004539
Chris Mason8e73f272009-04-03 10:14:18 -04004540 btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1);
4541again:
4542 level = 1;
4543 next = NULL;
David Sterbab3b4aa72011-04-21 01:20:15 +02004544 btrfs_release_path(path);
Chris Mason8e73f272009-04-03 10:14:18 -04004545
Chris Masona2135012008-06-25 16:01:30 -04004546 path->keep_locks = 1;
Chris Mason8e73f272009-04-03 10:14:18 -04004547
Filipe Mananad96b3422021-11-22 12:03:38 +00004548 if (time_seq) {
Jan Schmidt3d7806e2012-06-11 08:29:29 +02004549 ret = btrfs_search_old_slot(root, &key, path, time_seq);
Filipe Mananad96b3422021-11-22 12:03:38 +00004550 } else {
4551 if (path->need_commit_sem) {
4552 path->need_commit_sem = 0;
4553 need_commit_sem = true;
4554 down_read(&fs_info->commit_root_sem);
4555 }
Jan Schmidt3d7806e2012-06-11 08:29:29 +02004556 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
Filipe Mananad96b3422021-11-22 12:03:38 +00004557 }
Chris Mason925baed2008-06-25 16:01:30 -04004558 path->keep_locks = 0;
4559
4560 if (ret < 0)
Filipe Mananad96b3422021-11-22 12:03:38 +00004561 goto done;
Chris Mason925baed2008-06-25 16:01:30 -04004562
Chris Masona2135012008-06-25 16:01:30 -04004563 nritems = btrfs_header_nritems(path->nodes[0]);
Chris Mason168fd7d2008-06-25 16:01:30 -04004564 /*
4565 * by releasing the path above we dropped all our locks. A balance
4566 * could have added more items next to the key that used to be
4567 * at the very end of the block. So, check again here and
4568 * advance the path if there are now more items available.
4569 */
Chris Masona2135012008-06-25 16:01:30 -04004570 if (nritems > 0 && path->slots[0] < nritems - 1) {
Yan Zhenge457afe2009-07-22 09:59:00 -04004571 if (ret == 0)
4572 path->slots[0]++;
Chris Mason8e73f272009-04-03 10:14:18 -04004573 ret = 0;
Chris Mason925baed2008-06-25 16:01:30 -04004574 goto done;
4575 }
Liu Bo0b43e042014-06-09 11:04:49 +08004576 /*
4577 * So the above check misses one case:
4578 * - after releasing the path above, someone has removed the item that
4579 * used to be at the very end of the block, and balance between leafs
4580 * gets another one with bigger key.offset to replace it.
4581 *
4582 * This one should be returned as well, or we can get leaf corruption
4583 * later(esp. in __btrfs_drop_extents()).
4584 *
4585 * And a bit more explanation about this check,
4586 * with ret > 0, the key isn't found, the path points to the slot
4587 * where it should be inserted, so the path->slots[0] item must be the
4588 * bigger one.
4589 */
4590 if (nritems > 0 && ret > 0 && path->slots[0] == nritems - 1) {
4591 ret = 0;
4592 goto done;
4593 }
Chris Masond97e63b2007-02-20 16:40:44 -05004594
Chris Masond3977122009-01-05 21:25:51 -05004595 while (level < BTRFS_MAX_LEVEL) {
Chris Mason8e73f272009-04-03 10:14:18 -04004596 if (!path->nodes[level]) {
4597 ret = 1;
4598 goto done;
4599 }
Chris Mason5f39d392007-10-15 16:14:19 -04004600
Chris Masond97e63b2007-02-20 16:40:44 -05004601 slot = path->slots[level] + 1;
4602 c = path->nodes[level];
Chris Mason5f39d392007-10-15 16:14:19 -04004603 if (slot >= btrfs_header_nritems(c)) {
Chris Masond97e63b2007-02-20 16:40:44 -05004604 level++;
Chris Mason8e73f272009-04-03 10:14:18 -04004605 if (level == BTRFS_MAX_LEVEL) {
4606 ret = 1;
4607 goto done;
4608 }
Chris Masond97e63b2007-02-20 16:40:44 -05004609 continue;
4610 }
Chris Mason5f39d392007-10-15 16:14:19 -04004611
Josef Bacik0e463182020-11-06 16:27:30 -05004612
4613 /*
4614 * Our current level is where we're going to start from, and to
4615 * make sure lockdep doesn't complain we need to drop our locks
4616 * and nodes from 0 to our current level.
4617 */
4618 for (i = 0; i < level; i++) {
4619 if (path->locks[level]) {
4620 btrfs_tree_read_unlock(path->nodes[i]);
4621 path->locks[i] = 0;
4622 }
4623 free_extent_buffer(path->nodes[i]);
4624 path->nodes[i] = NULL;
Chris Mason925baed2008-06-25 16:01:30 -04004625 }
Chris Mason5f39d392007-10-15 16:14:19 -04004626
Chris Mason8e73f272009-04-03 10:14:18 -04004627 next = c;
Liu Bod07b8522017-01-30 12:23:42 -08004628 ret = read_block_for_search(root, path, &next, level,
David Sterbacda79c52017-02-10 18:44:32 +01004629 slot, &key);
Chris Mason8e73f272009-04-03 10:14:18 -04004630 if (ret == -EAGAIN)
4631 goto again;
Chris Mason5f39d392007-10-15 16:14:19 -04004632
Chris Mason76a05b32009-05-14 13:24:30 -04004633 if (ret < 0) {
David Sterbab3b4aa72011-04-21 01:20:15 +02004634 btrfs_release_path(path);
Chris Mason76a05b32009-05-14 13:24:30 -04004635 goto done;
4636 }
4637
Chris Mason5cd57b22008-06-25 16:01:30 -04004638 if (!path->skip_locking) {
Chris Masonbd681512011-07-16 15:23:14 -04004639 ret = btrfs_try_tree_read_lock(next);
Jan Schmidtd42244a2012-06-22 14:51:15 +02004640 if (!ret && time_seq) {
4641 /*
4642 * If we don't get the lock, we may be racing
4643 * with push_leaf_left, holding that lock while
4644 * itself waiting for the leaf we've currently
4645 * locked. To solve this situation, we give up
4646 * on our lock and cycle.
4647 */
Jan Schmidtcf538832012-07-04 15:42:48 +02004648 free_extent_buffer(next);
Jan Schmidtd42244a2012-06-22 14:51:15 +02004649 btrfs_release_path(path);
4650 cond_resched();
4651 goto again;
4652 }
Josef Bacik0e463182020-11-06 16:27:30 -05004653 if (!ret)
4654 btrfs_tree_read_lock(next);
Chris Mason5cd57b22008-06-25 16:01:30 -04004655 }
Chris Masond97e63b2007-02-20 16:40:44 -05004656 break;
4657 }
4658 path->slots[level] = slot;
Chris Masond3977122009-01-05 21:25:51 -05004659 while (1) {
Chris Masond97e63b2007-02-20 16:40:44 -05004660 level--;
Chris Masond97e63b2007-02-20 16:40:44 -05004661 path->nodes[level] = next;
4662 path->slots[level] = 0;
Chris Masona74a4b92008-06-25 16:01:31 -04004663 if (!path->skip_locking)
Josef Bacikffeb03c2020-11-06 16:27:29 -05004664 path->locks[level] = BTRFS_READ_LOCK;
Chris Masond97e63b2007-02-20 16:40:44 -05004665 if (!level)
4666 break;
Chris Masonb4ce94d2009-02-04 09:25:08 -05004667
Liu Bod07b8522017-01-30 12:23:42 -08004668 ret = read_block_for_search(root, path, &next, level,
David Sterbacda79c52017-02-10 18:44:32 +01004669 0, &key);
Chris Mason8e73f272009-04-03 10:14:18 -04004670 if (ret == -EAGAIN)
4671 goto again;
4672
Chris Mason76a05b32009-05-14 13:24:30 -04004673 if (ret < 0) {
David Sterbab3b4aa72011-04-21 01:20:15 +02004674 btrfs_release_path(path);
Chris Mason76a05b32009-05-14 13:24:30 -04004675 goto done;
4676 }
4677
Josef Bacikffeb03c2020-11-06 16:27:29 -05004678 if (!path->skip_locking)
Josef Bacik0e463182020-11-06 16:27:30 -05004679 btrfs_tree_read_lock(next);
Chris Masond97e63b2007-02-20 16:40:44 -05004680 }
Chris Mason8e73f272009-04-03 10:14:18 -04004681 ret = 0;
Chris Mason925baed2008-06-25 16:01:30 -04004682done:
Chris Masonf7c79f32012-03-19 15:54:38 -04004683 unlock_up(path, 0, 1, 0, NULL);
Filipe Mananad96b3422021-11-22 12:03:38 +00004684 if (need_commit_sem) {
4685 int ret2;
4686
4687 path->need_commit_sem = 1;
4688 ret2 = finish_need_commit_sem_search(path);
4689 up_read(&fs_info->commit_root_sem);
4690 if (ret2)
4691 ret = ret2;
4692 }
Chris Mason8e73f272009-04-03 10:14:18 -04004693
4694 return ret;
Chris Masond97e63b2007-02-20 16:40:44 -05004695}
Chris Mason0b86a832008-03-24 15:01:56 -04004696
Chris Mason3f157a22008-06-25 16:01:31 -04004697/*
4698 * this uses btrfs_prev_leaf to walk backwards in the tree, and keeps
4699 * searching until it gets past min_objectid or finds an item of 'type'
4700 *
4701 * returns 0 if something is found, 1 if nothing was found and < 0 on error
4702 */
Chris Mason0b86a832008-03-24 15:01:56 -04004703int btrfs_previous_item(struct btrfs_root *root,
4704 struct btrfs_path *path, u64 min_objectid,
4705 int type)
4706{
4707 struct btrfs_key found_key;
4708 struct extent_buffer *leaf;
Chris Masone02119d2008-09-05 16:13:11 -04004709 u32 nritems;
Chris Mason0b86a832008-03-24 15:01:56 -04004710 int ret;
4711
Chris Masond3977122009-01-05 21:25:51 -05004712 while (1) {
Chris Mason0b86a832008-03-24 15:01:56 -04004713 if (path->slots[0] == 0) {
4714 ret = btrfs_prev_leaf(root, path);
4715 if (ret != 0)
4716 return ret;
4717 } else {
4718 path->slots[0]--;
4719 }
4720 leaf = path->nodes[0];
Chris Masone02119d2008-09-05 16:13:11 -04004721 nritems = btrfs_header_nritems(leaf);
4722 if (nritems == 0)
4723 return 1;
4724 if (path->slots[0] == nritems)
4725 path->slots[0]--;
4726
Chris Mason0b86a832008-03-24 15:01:56 -04004727 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
Chris Masone02119d2008-09-05 16:13:11 -04004728 if (found_key.objectid < min_objectid)
4729 break;
Yan Zheng0a4eefb2009-07-24 11:06:53 -04004730 if (found_key.type == type)
4731 return 0;
Chris Masone02119d2008-09-05 16:13:11 -04004732 if (found_key.objectid == min_objectid &&
4733 found_key.type < type)
4734 break;
Chris Mason0b86a832008-03-24 15:01:56 -04004735 }
4736 return 1;
4737}
Wang Shilongade2e0b2014-01-12 21:38:33 +08004738
4739/*
4740 * search in extent tree to find a previous Metadata/Data extent item with
4741 * min objecitd.
4742 *
4743 * returns 0 if something is found, 1 if nothing was found and < 0 on error
4744 */
4745int btrfs_previous_extent_item(struct btrfs_root *root,
4746 struct btrfs_path *path, u64 min_objectid)
4747{
4748 struct btrfs_key found_key;
4749 struct extent_buffer *leaf;
4750 u32 nritems;
4751 int ret;
4752
4753 while (1) {
4754 if (path->slots[0] == 0) {
Wang Shilongade2e0b2014-01-12 21:38:33 +08004755 ret = btrfs_prev_leaf(root, path);
4756 if (ret != 0)
4757 return ret;
4758 } else {
4759 path->slots[0]--;
4760 }
4761 leaf = path->nodes[0];
4762 nritems = btrfs_header_nritems(leaf);
4763 if (nritems == 0)
4764 return 1;
4765 if (path->slots[0] == nritems)
4766 path->slots[0]--;
4767
4768 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
4769 if (found_key.objectid < min_objectid)
4770 break;
4771 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
4772 found_key.type == BTRFS_METADATA_ITEM_KEY)
4773 return 0;
4774 if (found_key.objectid == min_objectid &&
4775 found_key.type < BTRFS_EXTENT_ITEM_KEY)
4776 break;
4777 }
4778 return 1;
4779}