blob: e612db99988916e0b5d33c7c10680efd05b14c47 [file] [log] [blame]
David Sterbac1d7c512018-04-03 19:23:33 +02001// SPDX-License-Identifier: GPL-2.0
Chris Mason6cbd5572007-06-12 09:07:21 -04002/*
Chris Masond352ac62008-09-29 15:18:18 -04003 * Copyright (C) 2007,2008 Oracle. All rights reserved.
Chris Mason6cbd5572007-06-12 09:07:21 -04004 */
5
Chris Masona6b6e752007-10-15 16:22:39 -04006#include <linux/sched.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +09007#include <linux/slab.h>
Jan Schmidtbd989ba2012-05-16 17:18:50 +02008#include <linux/rbtree.h>
David Sterbaadf02122017-05-31 19:44:31 +02009#include <linux/mm.h>
Chris Masoneb60cea2007-02-02 09:18:22 -050010#include "ctree.h"
11#include "disk-io.h"
Chris Mason7f5c1512007-03-23 15:56:19 -040012#include "transaction.h"
Chris Mason5f39d392007-10-15 16:14:19 -040013#include "print-tree.h"
Chris Mason925baed2008-06-25 16:01:30 -040014#include "locking.h"
Nikolay Borisovde37aa52018-10-30 16:43:24 +020015#include "volumes.h"
Qu Wenruof616f5c2019-01-23 15:15:17 +080016#include "qgroup.h"
Chris Mason9a8dd152007-02-23 08:38:36 -050017
Chris Masone089f052007-03-16 16:20:31 -040018static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
19 *root, struct btrfs_path *path, int level);
Omar Sandoval310712b2017-01-17 23:24:37 -080020static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root *root,
21 const struct btrfs_key *ins_key, struct btrfs_path *path,
22 int data_size, int extend);
Chris Mason5f39d392007-10-15 16:14:19 -040023static int push_node_left(struct btrfs_trans_handle *trans,
Jeff Mahoney2ff7e612016-06-22 18:54:24 -040024 struct extent_buffer *dst,
Chris Mason971a1f62008-04-24 10:54:32 -040025 struct extent_buffer *src, int empty);
Chris Mason5f39d392007-10-15 16:14:19 -040026static int balance_node_right(struct btrfs_trans_handle *trans,
Chris Mason5f39d392007-10-15 16:14:19 -040027 struct extent_buffer *dst_buf,
28 struct extent_buffer *src_buf);
Tsutomu Itohafe5fea2013-04-16 05:18:22 +000029static void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
30 int level, int slot);
Chris Masond97e63b2007-02-20 16:40:44 -050031
Johannes Thumshirnaf024ed2019-08-30 13:36:09 +020032static const struct btrfs_csums {
33 u16 size;
David Sterba59a0fcd2020-02-27 21:00:45 +010034 const char name[10];
35 const char driver[12];
Johannes Thumshirnaf024ed2019-08-30 13:36:09 +020036} btrfs_csums[] = {
37 [BTRFS_CSUM_TYPE_CRC32] = { .size = 4, .name = "crc32c" },
Johannes Thumshirn3951e7f2019-10-07 11:11:01 +020038 [BTRFS_CSUM_TYPE_XXHASH] = { .size = 8, .name = "xxhash64" },
Johannes Thumshirn3831bf02019-10-07 11:11:02 +020039 [BTRFS_CSUM_TYPE_SHA256] = { .size = 32, .name = "sha256" },
David Sterba352ae072019-10-07 11:11:02 +020040 [BTRFS_CSUM_TYPE_BLAKE2] = { .size = 32, .name = "blake2b",
41 .driver = "blake2b-256" },
Johannes Thumshirnaf024ed2019-08-30 13:36:09 +020042};
43
44int btrfs_super_csum_size(const struct btrfs_super_block *s)
45{
46 u16 t = btrfs_super_csum_type(s);
47 /*
48 * csum type is validated at mount time
49 */
50 return btrfs_csums[t].size;
51}
52
53const char *btrfs_super_csum_name(u16 csum_type)
54{
55 /* csum type is validated at mount time */
56 return btrfs_csums[csum_type].name;
57}
58
David Sterbab4e967b2019-10-08 18:41:33 +020059/*
60 * Return driver name if defined, otherwise the name that's also a valid driver
61 * name
62 */
63const char *btrfs_super_csum_driver(u16 csum_type)
64{
65 /* csum type is validated at mount time */
David Sterba59a0fcd2020-02-27 21:00:45 +010066 return btrfs_csums[csum_type].driver[0] ?
67 btrfs_csums[csum_type].driver :
David Sterbab4e967b2019-10-08 18:41:33 +020068 btrfs_csums[csum_type].name;
69}
70
David Sterba604997b2020-07-27 17:38:19 +020071size_t __attribute_const__ btrfs_get_num_csums(void)
David Sterbaf7cea562019-10-07 11:11:03 +020072{
73 return ARRAY_SIZE(btrfs_csums);
74}
75
Chris Mason2c90e5d2007-04-02 10:50:19 -040076struct btrfs_path *btrfs_alloc_path(void)
77{
Masahiro Yamadae2c89902016-09-13 04:35:52 +090078 return kmem_cache_zalloc(btrfs_path_cachep, GFP_NOFS);
Chris Mason2c90e5d2007-04-02 10:50:19 -040079}
80
Chris Masond352ac62008-09-29 15:18:18 -040081/* this also releases the path */
Chris Mason2c90e5d2007-04-02 10:50:19 -040082void btrfs_free_path(struct btrfs_path *p)
83{
Jesper Juhlff175d52010-12-25 21:22:30 +000084 if (!p)
85 return;
David Sterbab3b4aa72011-04-21 01:20:15 +020086 btrfs_release_path(p);
Chris Mason2c90e5d2007-04-02 10:50:19 -040087 kmem_cache_free(btrfs_path_cachep, p);
88}
89
Chris Masond352ac62008-09-29 15:18:18 -040090/*
91 * path release drops references on the extent buffers in the path
92 * and it drops any locks held by this path
93 *
94 * It is safe to call this on paths that no locks or extent buffers held.
95 */
David Sterbab3b4aa72011-04-21 01:20:15 +020096noinline void btrfs_release_path(struct btrfs_path *p)
Chris Masoneb60cea2007-02-02 09:18:22 -050097{
98 int i;
Chris Masona2135012008-06-25 16:01:30 -040099
Chris Mason234b63a2007-03-13 10:46:10 -0400100 for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
Chris Mason3f157a22008-06-25 16:01:31 -0400101 p->slots[i] = 0;
Chris Masoneb60cea2007-02-02 09:18:22 -0500102 if (!p->nodes[i])
Chris Mason925baed2008-06-25 16:01:30 -0400103 continue;
104 if (p->locks[i]) {
Chris Masonbd681512011-07-16 15:23:14 -0400105 btrfs_tree_unlock_rw(p->nodes[i], p->locks[i]);
Chris Mason925baed2008-06-25 16:01:30 -0400106 p->locks[i] = 0;
107 }
Chris Mason5f39d392007-10-15 16:14:19 -0400108 free_extent_buffer(p->nodes[i]);
Chris Mason3f157a22008-06-25 16:01:31 -0400109 p->nodes[i] = NULL;
Chris Masoneb60cea2007-02-02 09:18:22 -0500110 }
111}
112
Chris Masond352ac62008-09-29 15:18:18 -0400113/*
114 * safely gets a reference on the root node of a tree. A lock
115 * is not taken, so a concurrent writer may put a different node
116 * at the root of the tree. See btrfs_lock_root_node for the
117 * looping required.
118 *
119 * The extent buffer returned by this has a reference taken, so
120 * it won't disappear. It may stop being the root of the tree
121 * at any time because there are no locks held.
122 */
Chris Mason925baed2008-06-25 16:01:30 -0400123struct extent_buffer *btrfs_root_node(struct btrfs_root *root)
124{
125 struct extent_buffer *eb;
Chris Mason240f62c2011-03-23 14:54:42 -0400126
Josef Bacik3083ee22012-03-09 16:01:49 -0500127 while (1) {
128 rcu_read_lock();
129 eb = rcu_dereference(root->node);
130
131 /*
132 * RCU really hurts here, we could free up the root node because
Nicholas D Steeves01327612016-05-19 21:18:45 -0400133 * it was COWed but we may not get the new root node yet so do
Josef Bacik3083ee22012-03-09 16:01:49 -0500134 * the inc_not_zero dance and if it doesn't work then
135 * synchronize_rcu and try again.
136 */
137 if (atomic_inc_not_zero(&eb->refs)) {
138 rcu_read_unlock();
139 break;
140 }
141 rcu_read_unlock();
142 synchronize_rcu();
143 }
Chris Mason925baed2008-06-25 16:01:30 -0400144 return eb;
145}
146
Qu Wenruo92a7cc42020-05-15 14:01:40 +0800147/*
148 * Cowonly root (not-shareable trees, everything not subvolume or reloc roots),
149 * just get put onto a simple dirty list. Transaction walks this list to make
150 * sure they get properly updated on disk.
Chris Masond352ac62008-09-29 15:18:18 -0400151 */
Chris Mason0b86a832008-03-24 15:01:56 -0400152static void add_root_to_dirty_list(struct btrfs_root *root)
153{
Jeff Mahoney0b246af2016-06-22 18:54:23 -0400154 struct btrfs_fs_info *fs_info = root->fs_info;
155
Josef Bacike7070be2014-12-16 08:54:43 -0800156 if (test_bit(BTRFS_ROOT_DIRTY, &root->state) ||
157 !test_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state))
158 return;
159
Jeff Mahoney0b246af2016-06-22 18:54:23 -0400160 spin_lock(&fs_info->trans_lock);
Josef Bacike7070be2014-12-16 08:54:43 -0800161 if (!test_and_set_bit(BTRFS_ROOT_DIRTY, &root->state)) {
162 /* Want the extent tree to be the last on the list */
Misono Tomohiro4fd786e2018-08-06 14:25:24 +0900163 if (root->root_key.objectid == BTRFS_EXTENT_TREE_OBJECTID)
Josef Bacike7070be2014-12-16 08:54:43 -0800164 list_move_tail(&root->dirty_list,
Jeff Mahoney0b246af2016-06-22 18:54:23 -0400165 &fs_info->dirty_cowonly_roots);
Josef Bacike7070be2014-12-16 08:54:43 -0800166 else
167 list_move(&root->dirty_list,
Jeff Mahoney0b246af2016-06-22 18:54:23 -0400168 &fs_info->dirty_cowonly_roots);
Chris Mason0b86a832008-03-24 15:01:56 -0400169 }
Jeff Mahoney0b246af2016-06-22 18:54:23 -0400170 spin_unlock(&fs_info->trans_lock);
Chris Mason0b86a832008-03-24 15:01:56 -0400171}
172
Chris Masond352ac62008-09-29 15:18:18 -0400173/*
174 * used by snapshot creation to make a copy of a root for a tree with
175 * a given objectid. The buffer with the new root node is returned in
176 * cow_ret, and this func returns zero on success or a negative error code.
177 */
Chris Masonbe20aa92007-12-17 20:14:01 -0500178int btrfs_copy_root(struct btrfs_trans_handle *trans,
179 struct btrfs_root *root,
180 struct extent_buffer *buf,
181 struct extent_buffer **cow_ret, u64 new_root_objectid)
182{
Jeff Mahoney0b246af2016-06-22 18:54:23 -0400183 struct btrfs_fs_info *fs_info = root->fs_info;
Chris Masonbe20aa92007-12-17 20:14:01 -0500184 struct extent_buffer *cow;
Chris Masonbe20aa92007-12-17 20:14:01 -0500185 int ret = 0;
186 int level;
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400187 struct btrfs_disk_key disk_key;
Chris Masonbe20aa92007-12-17 20:14:01 -0500188
Qu Wenruo92a7cc42020-05-15 14:01:40 +0800189 WARN_ON(test_bit(BTRFS_ROOT_SHAREABLE, &root->state) &&
Jeff Mahoney0b246af2016-06-22 18:54:23 -0400190 trans->transid != fs_info->running_transaction->transid);
Qu Wenruo92a7cc42020-05-15 14:01:40 +0800191 WARN_ON(test_bit(BTRFS_ROOT_SHAREABLE, &root->state) &&
Miao Xie27cdeb72014-04-02 19:51:05 +0800192 trans->transid != root->last_trans);
Chris Masonbe20aa92007-12-17 20:14:01 -0500193
194 level = btrfs_header_level(buf);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400195 if (level == 0)
196 btrfs_item_key(buf, &disk_key, 0);
197 else
198 btrfs_node_key(buf, &disk_key, 0);
Zheng Yan31840ae2008-09-23 13:14:14 -0400199
David Sterba4d75f8a2014-06-15 01:54:12 +0200200 cow = btrfs_alloc_tree_block(trans, root, 0, new_root_objectid,
Josef Bacikcf6f34a2020-08-20 11:46:07 -0400201 &disk_key, level, buf->start, 0,
202 BTRFS_NESTING_NEW_ROOT);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400203 if (IS_ERR(cow))
Chris Masonbe20aa92007-12-17 20:14:01 -0500204 return PTR_ERR(cow);
205
David Sterba58e80122016-11-08 18:30:31 +0100206 copy_extent_buffer_full(cow, buf);
Chris Masonbe20aa92007-12-17 20:14:01 -0500207 btrfs_set_header_bytenr(cow, cow->start);
208 btrfs_set_header_generation(cow, trans->transid);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400209 btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV);
210 btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN |
211 BTRFS_HEADER_FLAG_RELOC);
212 if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID)
213 btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC);
214 else
215 btrfs_set_header_owner(cow, new_root_objectid);
Chris Masonbe20aa92007-12-17 20:14:01 -0500216
Nikolay Borisovde37aa52018-10-30 16:43:24 +0200217 write_extent_buffer_fsid(cow, fs_info->fs_devices->metadata_uuid);
Yan Zheng2b820322008-11-17 21:11:30 -0500218
Chris Masonbe20aa92007-12-17 20:14:01 -0500219 WARN_ON(btrfs_header_generation(buf) > trans->transid);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400220 if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID)
Josef Bacike339a6b2014-07-02 10:54:25 -0700221 ret = btrfs_inc_ref(trans, root, cow, 1);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400222 else
Josef Bacike339a6b2014-07-02 10:54:25 -0700223 ret = btrfs_inc_ref(trans, root, cow, 0);
Josef Bacik4d3edf72021-01-14 14:02:46 -0500224 if (ret) {
225 btrfs_abort_transaction(trans, ret);
Chris Masonbe20aa92007-12-17 20:14:01 -0500226 return ret;
Josef Bacik4d3edf72021-01-14 14:02:46 -0500227 }
Chris Masonbe20aa92007-12-17 20:14:01 -0500228
229 btrfs_mark_buffer_dirty(cow);
230 *cow_ret = cow;
231 return 0;
232}
233
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200234enum mod_log_op {
235 MOD_LOG_KEY_REPLACE,
236 MOD_LOG_KEY_ADD,
237 MOD_LOG_KEY_REMOVE,
238 MOD_LOG_KEY_REMOVE_WHILE_FREEING,
239 MOD_LOG_KEY_REMOVE_WHILE_MOVING,
240 MOD_LOG_MOVE_KEYS,
241 MOD_LOG_ROOT_REPLACE,
242};
243
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200244struct tree_mod_root {
245 u64 logical;
246 u8 level;
247};
248
249struct tree_mod_elem {
250 struct rb_node node;
Chandan Rajendra298cfd32016-01-21 15:55:59 +0530251 u64 logical;
Jan Schmidt097b8a72012-06-21 11:08:04 +0200252 u64 seq;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200253 enum mod_log_op op;
254
255 /* this is used for MOD_LOG_KEY_* and MOD_LOG_MOVE_KEYS operations */
256 int slot;
257
258 /* this is used for MOD_LOG_KEY* and MOD_LOG_ROOT_REPLACE */
259 u64 generation;
260
261 /* those are used for op == MOD_LOG_KEY_{REPLACE,REMOVE} */
262 struct btrfs_disk_key key;
263 u64 blockptr;
264
265 /* this is used for op == MOD_LOG_MOVE_KEYS */
David Sterbab6dfa352018-03-05 15:31:18 +0100266 struct {
267 int dst_slot;
268 int nr_items;
269 } move;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200270
271 /* this is used for op == MOD_LOG_ROOT_REPLACE */
272 struct tree_mod_root old_root;
273};
274
Jan Schmidt097b8a72012-06-21 11:08:04 +0200275/*
Josef Bacikfcebe452014-05-13 17:30:47 -0700276 * Pull a new tree mod seq number for our operation.
Jan Schmidtfc36ed7e2013-04-24 16:57:33 +0000277 */
Josef Bacikfcebe452014-05-13 17:30:47 -0700278static inline u64 btrfs_inc_tree_mod_seq(struct btrfs_fs_info *fs_info)
Jan Schmidtfc36ed7e2013-04-24 16:57:33 +0000279{
280 return atomic64_inc_return(&fs_info->tree_mod_seq);
281}
282
283/*
Jan Schmidt097b8a72012-06-21 11:08:04 +0200284 * This adds a new blocker to the tree mod log's blocker list if the @elem
285 * passed does not already have a sequence number set. So when a caller expects
286 * to record tree modifications, it should ensure to set elem->seq to zero
287 * before calling btrfs_get_tree_mod_seq.
288 * Returns a fresh, unused tree log modification sequence number, even if no new
289 * blocker was added.
290 */
291u64 btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info,
292 struct seq_list *elem)
293{
David Sterbab1a09f12018-03-05 15:43:41 +0100294 write_lock(&fs_info->tree_mod_log_lock);
Jan Schmidt097b8a72012-06-21 11:08:04 +0200295 if (!elem->seq) {
Josef Bacikfcebe452014-05-13 17:30:47 -0700296 elem->seq = btrfs_inc_tree_mod_seq(fs_info);
Jan Schmidt097b8a72012-06-21 11:08:04 +0200297 list_add_tail(&elem->list, &fs_info->tree_mod_seq_list);
298 }
David Sterbab1a09f12018-03-05 15:43:41 +0100299 write_unlock(&fs_info->tree_mod_log_lock);
Jan Schmidt097b8a72012-06-21 11:08:04 +0200300
Josef Bacikfcebe452014-05-13 17:30:47 -0700301 return elem->seq;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200302}
303
304void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
305 struct seq_list *elem)
306{
307 struct rb_root *tm_root;
308 struct rb_node *node;
309 struct rb_node *next;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200310 struct tree_mod_elem *tm;
311 u64 min_seq = (u64)-1;
312 u64 seq_putting = elem->seq;
313
314 if (!seq_putting)
315 return;
316
Filipe Manana7227ff42020-01-22 12:23:20 +0000317 write_lock(&fs_info->tree_mod_log_lock);
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200318 list_del(&elem->list);
Jan Schmidt097b8a72012-06-21 11:08:04 +0200319 elem->seq = 0;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200320
Filipe Manana42836cf2020-01-22 12:23:54 +0000321 if (!list_empty(&fs_info->tree_mod_seq_list)) {
322 struct seq_list *first;
323
324 first = list_first_entry(&fs_info->tree_mod_seq_list,
325 struct seq_list, list);
326 if (seq_putting > first->seq) {
327 /*
328 * Blocker with lower sequence number exists, we
329 * cannot remove anything from the log.
330 */
331 write_unlock(&fs_info->tree_mod_log_lock);
332 return;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200333 }
Filipe Manana42836cf2020-01-22 12:23:54 +0000334 min_seq = first->seq;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200335 }
Jan Schmidt097b8a72012-06-21 11:08:04 +0200336
337 /*
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200338 * anything that's lower than the lowest existing (read: blocked)
339 * sequence number can be removed from the tree.
340 */
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200341 tm_root = &fs_info->tree_mod_log;
342 for (node = rb_first(tm_root); node; node = next) {
343 next = rb_next(node);
Geliang Tang6b4df8b2016-12-19 22:53:41 +0800344 tm = rb_entry(node, struct tree_mod_elem, node);
Filipe Manana6609fee2019-12-06 12:27:39 +0000345 if (tm->seq >= min_seq)
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200346 continue;
347 rb_erase(node, tm_root);
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200348 kfree(tm);
349 }
David Sterbab1a09f12018-03-05 15:43:41 +0100350 write_unlock(&fs_info->tree_mod_log_lock);
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200351}
352
353/*
354 * key order of the log:
Chandan Rajendra298cfd32016-01-21 15:55:59 +0530355 * node/leaf start address -> sequence
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200356 *
Chandan Rajendra298cfd32016-01-21 15:55:59 +0530357 * The 'start address' is the logical address of the *new* root node
358 * for root replace operations, or the logical address of the affected
359 * block for all other operations.
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200360 */
361static noinline int
362__tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm)
363{
364 struct rb_root *tm_root;
365 struct rb_node **new;
366 struct rb_node *parent = NULL;
367 struct tree_mod_elem *cur;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200368
David Sterba73e82fe2019-03-27 16:19:55 +0100369 lockdep_assert_held_write(&fs_info->tree_mod_log_lock);
370
Josef Bacikfcebe452014-05-13 17:30:47 -0700371 tm->seq = btrfs_inc_tree_mod_seq(fs_info);
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200372
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200373 tm_root = &fs_info->tree_mod_log;
374 new = &tm_root->rb_node;
375 while (*new) {
Geliang Tang6b4df8b2016-12-19 22:53:41 +0800376 cur = rb_entry(*new, struct tree_mod_elem, node);
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200377 parent = *new;
Chandan Rajendra298cfd32016-01-21 15:55:59 +0530378 if (cur->logical < tm->logical)
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200379 new = &((*new)->rb_left);
Chandan Rajendra298cfd32016-01-21 15:55:59 +0530380 else if (cur->logical > tm->logical)
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200381 new = &((*new)->rb_right);
Jan Schmidt097b8a72012-06-21 11:08:04 +0200382 else if (cur->seq < tm->seq)
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200383 new = &((*new)->rb_left);
Jan Schmidt097b8a72012-06-21 11:08:04 +0200384 else if (cur->seq > tm->seq)
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200385 new = &((*new)->rb_right);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000386 else
387 return -EEXIST;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200388 }
389
390 rb_link_node(&tm->node, parent, new);
391 rb_insert_color(&tm->node, tm_root);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000392 return 0;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200393}
394
Jan Schmidt097b8a72012-06-21 11:08:04 +0200395/*
396 * Determines if logging can be omitted. Returns 1 if it can. Otherwise, it
397 * returns zero with the tree_mod_log_lock acquired. The caller must hold
398 * this until all tree mod log insertions are recorded in the rb tree and then
David Sterbab1a09f12018-03-05 15:43:41 +0100399 * write unlock fs_info::tree_mod_log_lock.
Jan Schmidt097b8a72012-06-21 11:08:04 +0200400 */
Jan Schmidte9b7fd42012-05-31 14:59:09 +0200401static inline int tree_mod_dont_log(struct btrfs_fs_info *fs_info,
402 struct extent_buffer *eb) {
403 smp_mb();
404 if (list_empty(&(fs_info)->tree_mod_seq_list))
405 return 1;
Jan Schmidt097b8a72012-06-21 11:08:04 +0200406 if (eb && btrfs_header_level(eb) == 0)
Jan Schmidte9b7fd42012-05-31 14:59:09 +0200407 return 1;
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000408
David Sterbab1a09f12018-03-05 15:43:41 +0100409 write_lock(&fs_info->tree_mod_log_lock);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000410 if (list_empty(&(fs_info)->tree_mod_seq_list)) {
David Sterbab1a09f12018-03-05 15:43:41 +0100411 write_unlock(&fs_info->tree_mod_log_lock);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000412 return 1;
413 }
414
Jan Schmidte9b7fd42012-05-31 14:59:09 +0200415 return 0;
416}
417
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000418/* Similar to tree_mod_dont_log, but doesn't acquire any locks. */
419static inline int tree_mod_need_log(const struct btrfs_fs_info *fs_info,
420 struct extent_buffer *eb)
421{
422 smp_mb();
423 if (list_empty(&(fs_info)->tree_mod_seq_list))
424 return 0;
425 if (eb && btrfs_header_level(eb) == 0)
426 return 0;
427
428 return 1;
429}
430
431static struct tree_mod_elem *
432alloc_tree_mod_elem(struct extent_buffer *eb, int slot,
433 enum mod_log_op op, gfp_t flags)
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200434{
Jan Schmidt097b8a72012-06-21 11:08:04 +0200435 struct tree_mod_elem *tm;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200436
Josef Bacikc8cc6342013-07-01 16:18:19 -0400437 tm = kzalloc(sizeof(*tm), flags);
438 if (!tm)
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000439 return NULL;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200440
Chandan Rajendra298cfd32016-01-21 15:55:59 +0530441 tm->logical = eb->start;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200442 if (op != MOD_LOG_KEY_ADD) {
443 btrfs_node_key(eb, &tm->key, slot);
444 tm->blockptr = btrfs_node_blockptr(eb, slot);
445 }
446 tm->op = op;
447 tm->slot = slot;
448 tm->generation = btrfs_node_ptr_generation(eb, slot);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000449 RB_CLEAR_NODE(&tm->node);
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200450
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000451 return tm;
Jan Schmidt097b8a72012-06-21 11:08:04 +0200452}
453
David Sterbae09c2ef2018-03-05 15:09:03 +0100454static noinline int tree_mod_log_insert_key(struct extent_buffer *eb, int slot,
455 enum mod_log_op op, gfp_t flags)
Jan Schmidt097b8a72012-06-21 11:08:04 +0200456{
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000457 struct tree_mod_elem *tm;
458 int ret;
459
David Sterbae09c2ef2018-03-05 15:09:03 +0100460 if (!tree_mod_need_log(eb->fs_info, eb))
Jan Schmidt097b8a72012-06-21 11:08:04 +0200461 return 0;
462
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000463 tm = alloc_tree_mod_elem(eb, slot, op, flags);
464 if (!tm)
465 return -ENOMEM;
466
David Sterbae09c2ef2018-03-05 15:09:03 +0100467 if (tree_mod_dont_log(eb->fs_info, eb)) {
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000468 kfree(tm);
469 return 0;
470 }
471
David Sterbae09c2ef2018-03-05 15:09:03 +0100472 ret = __tree_mod_log_insert(eb->fs_info, tm);
David Sterbab1a09f12018-03-05 15:43:41 +0100473 write_unlock(&eb->fs_info->tree_mod_log_lock);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000474 if (ret)
475 kfree(tm);
476
477 return ret;
Jan Schmidt097b8a72012-06-21 11:08:04 +0200478}
479
David Sterba6074d452018-03-05 15:03:52 +0100480static noinline int tree_mod_log_insert_move(struct extent_buffer *eb,
481 int dst_slot, int src_slot, int nr_items)
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200482{
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000483 struct tree_mod_elem *tm = NULL;
484 struct tree_mod_elem **tm_list = NULL;
485 int ret = 0;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200486 int i;
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000487 int locked = 0;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200488
David Sterba6074d452018-03-05 15:03:52 +0100489 if (!tree_mod_need_log(eb->fs_info, eb))
Jan Schmidtf3956942012-05-31 15:02:32 +0200490 return 0;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200491
David Sterba176ef8f2017-03-28 14:35:01 +0200492 tm_list = kcalloc(nr_items, sizeof(struct tree_mod_elem *), GFP_NOFS);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000493 if (!tm_list)
494 return -ENOMEM;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200495
David Sterba176ef8f2017-03-28 14:35:01 +0200496 tm = kzalloc(sizeof(*tm), GFP_NOFS);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000497 if (!tm) {
498 ret = -ENOMEM;
499 goto free_tms;
500 }
Jan Schmidtf3956942012-05-31 15:02:32 +0200501
Chandan Rajendra298cfd32016-01-21 15:55:59 +0530502 tm->logical = eb->start;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200503 tm->slot = src_slot;
504 tm->move.dst_slot = dst_slot;
505 tm->move.nr_items = nr_items;
506 tm->op = MOD_LOG_MOVE_KEYS;
507
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000508 for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) {
509 tm_list[i] = alloc_tree_mod_elem(eb, i + dst_slot,
David Sterba176ef8f2017-03-28 14:35:01 +0200510 MOD_LOG_KEY_REMOVE_WHILE_MOVING, GFP_NOFS);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000511 if (!tm_list[i]) {
512 ret = -ENOMEM;
513 goto free_tms;
514 }
515 }
516
David Sterba6074d452018-03-05 15:03:52 +0100517 if (tree_mod_dont_log(eb->fs_info, eb))
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000518 goto free_tms;
519 locked = 1;
520
521 /*
522 * When we override something during the move, we log these removals.
523 * This can only happen when we move towards the beginning of the
524 * buffer, i.e. dst_slot < src_slot.
525 */
526 for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) {
David Sterba6074d452018-03-05 15:03:52 +0100527 ret = __tree_mod_log_insert(eb->fs_info, tm_list[i]);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000528 if (ret)
529 goto free_tms;
530 }
531
David Sterba6074d452018-03-05 15:03:52 +0100532 ret = __tree_mod_log_insert(eb->fs_info, tm);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000533 if (ret)
534 goto free_tms;
David Sterbab1a09f12018-03-05 15:43:41 +0100535 write_unlock(&eb->fs_info->tree_mod_log_lock);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000536 kfree(tm_list);
537
538 return 0;
539free_tms:
540 for (i = 0; i < nr_items; i++) {
541 if (tm_list[i] && !RB_EMPTY_NODE(&tm_list[i]->node))
David Sterba6074d452018-03-05 15:03:52 +0100542 rb_erase(&tm_list[i]->node, &eb->fs_info->tree_mod_log);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000543 kfree(tm_list[i]);
544 }
545 if (locked)
David Sterbab1a09f12018-03-05 15:43:41 +0100546 write_unlock(&eb->fs_info->tree_mod_log_lock);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000547 kfree(tm_list);
548 kfree(tm);
549
550 return ret;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200551}
552
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000553static inline int
554__tree_mod_log_free_eb(struct btrfs_fs_info *fs_info,
555 struct tree_mod_elem **tm_list,
556 int nritems)
Jan Schmidt097b8a72012-06-21 11:08:04 +0200557{
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000558 int i, j;
Jan Schmidt097b8a72012-06-21 11:08:04 +0200559 int ret;
560
Jan Schmidt097b8a72012-06-21 11:08:04 +0200561 for (i = nritems - 1; i >= 0; i--) {
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000562 ret = __tree_mod_log_insert(fs_info, tm_list[i]);
563 if (ret) {
564 for (j = nritems - 1; j > i; j--)
565 rb_erase(&tm_list[j]->node,
566 &fs_info->tree_mod_log);
567 return ret;
568 }
Jan Schmidt097b8a72012-06-21 11:08:04 +0200569 }
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000570
571 return 0;
Jan Schmidt097b8a72012-06-21 11:08:04 +0200572}
573
David Sterba95b757c2018-03-05 15:22:30 +0100574static noinline int tree_mod_log_insert_root(struct extent_buffer *old_root,
575 struct extent_buffer *new_root, int log_removal)
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200576{
David Sterba95b757c2018-03-05 15:22:30 +0100577 struct btrfs_fs_info *fs_info = old_root->fs_info;
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000578 struct tree_mod_elem *tm = NULL;
579 struct tree_mod_elem **tm_list = NULL;
580 int nritems = 0;
581 int ret = 0;
582 int i;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200583
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000584 if (!tree_mod_need_log(fs_info, NULL))
Jan Schmidt097b8a72012-06-21 11:08:04 +0200585 return 0;
586
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000587 if (log_removal && btrfs_header_level(old_root) > 0) {
588 nritems = btrfs_header_nritems(old_root);
David Sterba31e818f2015-02-20 18:00:26 +0100589 tm_list = kcalloc(nritems, sizeof(struct tree_mod_elem *),
David Sterbabcc8e072017-03-28 14:35:42 +0200590 GFP_NOFS);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000591 if (!tm_list) {
592 ret = -ENOMEM;
593 goto free_tms;
594 }
595 for (i = 0; i < nritems; i++) {
596 tm_list[i] = alloc_tree_mod_elem(old_root, i,
David Sterbabcc8e072017-03-28 14:35:42 +0200597 MOD_LOG_KEY_REMOVE_WHILE_FREEING, GFP_NOFS);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000598 if (!tm_list[i]) {
599 ret = -ENOMEM;
600 goto free_tms;
601 }
602 }
603 }
Jan Schmidtd9abbf12013-03-20 13:49:48 +0000604
David Sterbabcc8e072017-03-28 14:35:42 +0200605 tm = kzalloc(sizeof(*tm), GFP_NOFS);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000606 if (!tm) {
607 ret = -ENOMEM;
608 goto free_tms;
609 }
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200610
Chandan Rajendra298cfd32016-01-21 15:55:59 +0530611 tm->logical = new_root->start;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200612 tm->old_root.logical = old_root->start;
613 tm->old_root.level = btrfs_header_level(old_root);
614 tm->generation = btrfs_header_generation(old_root);
615 tm->op = MOD_LOG_ROOT_REPLACE;
616
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000617 if (tree_mod_dont_log(fs_info, NULL))
618 goto free_tms;
619
620 if (tm_list)
621 ret = __tree_mod_log_free_eb(fs_info, tm_list, nritems);
622 if (!ret)
623 ret = __tree_mod_log_insert(fs_info, tm);
624
David Sterbab1a09f12018-03-05 15:43:41 +0100625 write_unlock(&fs_info->tree_mod_log_lock);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000626 if (ret)
627 goto free_tms;
628 kfree(tm_list);
629
630 return ret;
631
632free_tms:
633 if (tm_list) {
634 for (i = 0; i < nritems; i++)
635 kfree(tm_list[i]);
636 kfree(tm_list);
637 }
638 kfree(tm);
639
640 return ret;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200641}
642
643static struct tree_mod_elem *
644__tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq,
645 int smallest)
646{
647 struct rb_root *tm_root;
648 struct rb_node *node;
649 struct tree_mod_elem *cur = NULL;
650 struct tree_mod_elem *found = NULL;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200651
David Sterbab1a09f12018-03-05 15:43:41 +0100652 read_lock(&fs_info->tree_mod_log_lock);
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200653 tm_root = &fs_info->tree_mod_log;
654 node = tm_root->rb_node;
655 while (node) {
Geliang Tang6b4df8b2016-12-19 22:53:41 +0800656 cur = rb_entry(node, struct tree_mod_elem, node);
Chandan Rajendra298cfd32016-01-21 15:55:59 +0530657 if (cur->logical < start) {
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200658 node = node->rb_left;
Chandan Rajendra298cfd32016-01-21 15:55:59 +0530659 } else if (cur->logical > start) {
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200660 node = node->rb_right;
Jan Schmidt097b8a72012-06-21 11:08:04 +0200661 } else if (cur->seq < min_seq) {
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200662 node = node->rb_left;
663 } else if (!smallest) {
664 /* we want the node with the highest seq */
665 if (found)
Jan Schmidt097b8a72012-06-21 11:08:04 +0200666 BUG_ON(found->seq > cur->seq);
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200667 found = cur;
668 node = node->rb_left;
Jan Schmidt097b8a72012-06-21 11:08:04 +0200669 } else if (cur->seq > min_seq) {
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200670 /* we want the node with the smallest seq */
671 if (found)
Jan Schmidt097b8a72012-06-21 11:08:04 +0200672 BUG_ON(found->seq < cur->seq);
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200673 found = cur;
674 node = node->rb_right;
675 } else {
676 found = cur;
677 break;
678 }
679 }
David Sterbab1a09f12018-03-05 15:43:41 +0100680 read_unlock(&fs_info->tree_mod_log_lock);
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200681
682 return found;
683}
684
685/*
686 * this returns the element from the log with the smallest time sequence
687 * value that's in the log (the oldest log item). any element with a time
688 * sequence lower than min_seq will be ignored.
689 */
690static struct tree_mod_elem *
691tree_mod_log_search_oldest(struct btrfs_fs_info *fs_info, u64 start,
692 u64 min_seq)
693{
694 return __tree_mod_log_search(fs_info, start, min_seq, 1);
695}
696
697/*
698 * this returns the element from the log with the largest time sequence
699 * value that's in the log (the most recent log item). any element with
700 * a time sequence lower than min_seq will be ignored.
701 */
702static struct tree_mod_elem *
703tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq)
704{
705 return __tree_mod_log_search(fs_info, start, min_seq, 0);
706}
707
David Sterbaed874f02019-03-20 14:22:04 +0100708static noinline int tree_mod_log_eb_copy(struct extent_buffer *dst,
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200709 struct extent_buffer *src, unsigned long dst_offset,
Jan Schmidt90f8d622013-04-13 13:19:53 +0000710 unsigned long src_offset, int nr_items)
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200711{
David Sterbaed874f02019-03-20 14:22:04 +0100712 struct btrfs_fs_info *fs_info = dst->fs_info;
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000713 int ret = 0;
714 struct tree_mod_elem **tm_list = NULL;
715 struct tree_mod_elem **tm_list_add, **tm_list_rem;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200716 int i;
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000717 int locked = 0;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200718
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000719 if (!tree_mod_need_log(fs_info, NULL))
720 return 0;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200721
Josef Bacikc8cc6342013-07-01 16:18:19 -0400722 if (btrfs_header_level(dst) == 0 && btrfs_header_level(src) == 0)
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000723 return 0;
724
David Sterba31e818f2015-02-20 18:00:26 +0100725 tm_list = kcalloc(nr_items * 2, sizeof(struct tree_mod_elem *),
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000726 GFP_NOFS);
727 if (!tm_list)
728 return -ENOMEM;
729
730 tm_list_add = tm_list;
731 tm_list_rem = tm_list + nr_items;
732 for (i = 0; i < nr_items; i++) {
733 tm_list_rem[i] = alloc_tree_mod_elem(src, i + src_offset,
734 MOD_LOG_KEY_REMOVE, GFP_NOFS);
735 if (!tm_list_rem[i]) {
736 ret = -ENOMEM;
737 goto free_tms;
738 }
739
740 tm_list_add[i] = alloc_tree_mod_elem(dst, i + dst_offset,
741 MOD_LOG_KEY_ADD, GFP_NOFS);
742 if (!tm_list_add[i]) {
743 ret = -ENOMEM;
744 goto free_tms;
745 }
746 }
747
748 if (tree_mod_dont_log(fs_info, NULL))
749 goto free_tms;
750 locked = 1;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200751
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200752 for (i = 0; i < nr_items; i++) {
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000753 ret = __tree_mod_log_insert(fs_info, tm_list_rem[i]);
754 if (ret)
755 goto free_tms;
756 ret = __tree_mod_log_insert(fs_info, tm_list_add[i]);
757 if (ret)
758 goto free_tms;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200759 }
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000760
David Sterbab1a09f12018-03-05 15:43:41 +0100761 write_unlock(&fs_info->tree_mod_log_lock);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000762 kfree(tm_list);
763
764 return 0;
765
766free_tms:
767 for (i = 0; i < nr_items * 2; i++) {
768 if (tm_list[i] && !RB_EMPTY_NODE(&tm_list[i]->node))
769 rb_erase(&tm_list[i]->node, &fs_info->tree_mod_log);
770 kfree(tm_list[i]);
771 }
772 if (locked)
David Sterbab1a09f12018-03-05 15:43:41 +0100773 write_unlock(&fs_info->tree_mod_log_lock);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000774 kfree(tm_list);
775
776 return ret;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200777}
778
David Sterbadb7279a2018-03-05 15:14:25 +0100779static noinline int tree_mod_log_free_eb(struct extent_buffer *eb)
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200780{
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000781 struct tree_mod_elem **tm_list = NULL;
782 int nritems = 0;
783 int i;
784 int ret = 0;
785
786 if (btrfs_header_level(eb) == 0)
787 return 0;
788
David Sterbadb7279a2018-03-05 15:14:25 +0100789 if (!tree_mod_need_log(eb->fs_info, NULL))
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000790 return 0;
791
792 nritems = btrfs_header_nritems(eb);
David Sterba31e818f2015-02-20 18:00:26 +0100793 tm_list = kcalloc(nritems, sizeof(struct tree_mod_elem *), GFP_NOFS);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000794 if (!tm_list)
795 return -ENOMEM;
796
797 for (i = 0; i < nritems; i++) {
798 tm_list[i] = alloc_tree_mod_elem(eb, i,
799 MOD_LOG_KEY_REMOVE_WHILE_FREEING, GFP_NOFS);
800 if (!tm_list[i]) {
801 ret = -ENOMEM;
802 goto free_tms;
803 }
804 }
805
David Sterbadb7279a2018-03-05 15:14:25 +0100806 if (tree_mod_dont_log(eb->fs_info, eb))
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000807 goto free_tms;
808
David Sterbadb7279a2018-03-05 15:14:25 +0100809 ret = __tree_mod_log_free_eb(eb->fs_info, tm_list, nritems);
David Sterbab1a09f12018-03-05 15:43:41 +0100810 write_unlock(&eb->fs_info->tree_mod_log_lock);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000811 if (ret)
812 goto free_tms;
813 kfree(tm_list);
814
815 return 0;
816
817free_tms:
818 for (i = 0; i < nritems; i++)
819 kfree(tm_list[i]);
820 kfree(tm_list);
821
822 return ret;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200823}
824
Chris Masond352ac62008-09-29 15:18:18 -0400825/*
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400826 * check if the tree block can be shared by multiple trees
827 */
828int btrfs_block_can_be_shared(struct btrfs_root *root,
829 struct extent_buffer *buf)
830{
831 /*
Qu Wenruo92a7cc42020-05-15 14:01:40 +0800832 * Tree blocks not in shareable trees and tree roots are never shared.
833 * If a block was allocated after the last snapshot and the block was
834 * not allocated by tree relocation, we know the block is not shared.
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400835 */
Qu Wenruo92a7cc42020-05-15 14:01:40 +0800836 if (test_bit(BTRFS_ROOT_SHAREABLE, &root->state) &&
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400837 buf != root->node && buf != root->commit_root &&
838 (btrfs_header_generation(buf) <=
839 btrfs_root_last_snapshot(&root->root_item) ||
840 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)))
841 return 1;
Nikolay Borisova79865c2018-06-21 09:45:00 +0300842
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400843 return 0;
844}
845
846static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
847 struct btrfs_root *root,
848 struct extent_buffer *buf,
Yan, Zhengf0486c62010-05-16 10:46:25 -0400849 struct extent_buffer *cow,
850 int *last_ref)
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400851{
Jeff Mahoney0b246af2016-06-22 18:54:23 -0400852 struct btrfs_fs_info *fs_info = root->fs_info;
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400853 u64 refs;
854 u64 owner;
855 u64 flags;
856 u64 new_flags = 0;
857 int ret;
858
859 /*
860 * Backrefs update rules:
861 *
862 * Always use full backrefs for extent pointers in tree block
863 * allocated by tree relocation.
864 *
865 * If a shared tree block is no longer referenced by its owner
866 * tree (btrfs_header_owner(buf) == root->root_key.objectid),
867 * use full backrefs for extent pointers in tree block.
868 *
869 * If a tree block is been relocating
870 * (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID),
871 * use full backrefs for extent pointers in tree block.
872 * The reason for this is some operations (such as drop tree)
873 * are only allowed for blocks use full backrefs.
874 */
875
876 if (btrfs_block_can_be_shared(root, buf)) {
Jeff Mahoney2ff7e612016-06-22 18:54:24 -0400877 ret = btrfs_lookup_extent_info(trans, fs_info, buf->start,
Josef Bacik3173a182013-03-07 14:22:04 -0500878 btrfs_header_level(buf), 1,
879 &refs, &flags);
Mark Fashehbe1a5562011-08-08 13:20:18 -0700880 if (ret)
881 return ret;
Mark Fashehe5df9572011-08-29 14:17:04 -0700882 if (refs == 0) {
883 ret = -EROFS;
Jeff Mahoney0b246af2016-06-22 18:54:23 -0400884 btrfs_handle_fs_error(fs_info, ret, NULL);
Mark Fashehe5df9572011-08-29 14:17:04 -0700885 return ret;
886 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400887 } else {
888 refs = 1;
889 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
890 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
891 flags = BTRFS_BLOCK_FLAG_FULL_BACKREF;
892 else
893 flags = 0;
894 }
895
896 owner = btrfs_header_owner(buf);
897 BUG_ON(owner == BTRFS_TREE_RELOC_OBJECTID &&
898 !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF));
899
900 if (refs > 1) {
901 if ((owner == root->root_key.objectid ||
902 root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) &&
903 !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) {
Josef Bacike339a6b2014-07-02 10:54:25 -0700904 ret = btrfs_inc_ref(trans, root, buf, 1);
Jeff Mahoney692826b2017-11-21 13:58:49 -0500905 if (ret)
906 return ret;
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400907
908 if (root->root_key.objectid ==
909 BTRFS_TREE_RELOC_OBJECTID) {
Josef Bacike339a6b2014-07-02 10:54:25 -0700910 ret = btrfs_dec_ref(trans, root, buf, 0);
Jeff Mahoney692826b2017-11-21 13:58:49 -0500911 if (ret)
912 return ret;
Josef Bacike339a6b2014-07-02 10:54:25 -0700913 ret = btrfs_inc_ref(trans, root, cow, 1);
Jeff Mahoney692826b2017-11-21 13:58:49 -0500914 if (ret)
915 return ret;
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400916 }
917 new_flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
918 } else {
919
920 if (root->root_key.objectid ==
921 BTRFS_TREE_RELOC_OBJECTID)
Josef Bacike339a6b2014-07-02 10:54:25 -0700922 ret = btrfs_inc_ref(trans, root, cow, 1);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400923 else
Josef Bacike339a6b2014-07-02 10:54:25 -0700924 ret = btrfs_inc_ref(trans, root, cow, 0);
Jeff Mahoney692826b2017-11-21 13:58:49 -0500925 if (ret)
926 return ret;
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400927 }
928 if (new_flags != 0) {
Josef Bacikb1c79e02013-05-09 13:49:30 -0400929 int level = btrfs_header_level(buf);
930
David Sterba42c9d0b2019-03-20 11:54:13 +0100931 ret = btrfs_set_disk_extent_flags(trans, buf,
Josef Bacikb1c79e02013-05-09 13:49:30 -0400932 new_flags, level, 0);
Mark Fashehbe1a5562011-08-08 13:20:18 -0700933 if (ret)
934 return ret;
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400935 }
936 } else {
937 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
938 if (root->root_key.objectid ==
939 BTRFS_TREE_RELOC_OBJECTID)
Josef Bacike339a6b2014-07-02 10:54:25 -0700940 ret = btrfs_inc_ref(trans, root, cow, 1);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400941 else
Josef Bacike339a6b2014-07-02 10:54:25 -0700942 ret = btrfs_inc_ref(trans, root, cow, 0);
Jeff Mahoney692826b2017-11-21 13:58:49 -0500943 if (ret)
944 return ret;
Josef Bacike339a6b2014-07-02 10:54:25 -0700945 ret = btrfs_dec_ref(trans, root, buf, 1);
Jeff Mahoney692826b2017-11-21 13:58:49 -0500946 if (ret)
947 return ret;
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400948 }
David Sterba6a884d7d2019-03-20 14:30:02 +0100949 btrfs_clean_tree_block(buf);
Yan, Zhengf0486c62010-05-16 10:46:25 -0400950 *last_ref = 1;
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400951 }
952 return 0;
953}
954
Filipe Mananaa6279472019-01-25 11:48:51 +0000955static struct extent_buffer *alloc_tree_block_no_bg_flush(
956 struct btrfs_trans_handle *trans,
957 struct btrfs_root *root,
958 u64 parent_start,
959 const struct btrfs_disk_key *disk_key,
960 int level,
961 u64 hint,
Josef Bacik9631e4c2020-08-20 11:46:03 -0400962 u64 empty_size,
963 enum btrfs_lock_nesting nest)
Filipe Mananaa6279472019-01-25 11:48:51 +0000964{
965 struct btrfs_fs_info *fs_info = root->fs_info;
966 struct extent_buffer *ret;
967
968 /*
969 * If we are COWing a node/leaf from the extent, chunk, device or free
970 * space trees, make sure that we do not finish block group creation of
971 * pending block groups. We do this to avoid a deadlock.
972 * COWing can result in allocation of a new chunk, and flushing pending
973 * block groups (btrfs_create_pending_block_groups()) can be triggered
974 * when finishing allocation of a new chunk. Creation of a pending block
975 * group modifies the extent, chunk, device and free space trees,
976 * therefore we could deadlock with ourselves since we are holding a
977 * lock on an extent buffer that btrfs_create_pending_block_groups() may
978 * try to COW later.
979 * For similar reasons, we also need to delay flushing pending block
980 * groups when splitting a leaf or node, from one of those trees, since
981 * we are holding a write lock on it and its parent or when inserting a
982 * new root node for one of those trees.
983 */
984 if (root == fs_info->extent_root ||
985 root == fs_info->chunk_root ||
986 root == fs_info->dev_root ||
987 root == fs_info->free_space_root)
988 trans->can_flush_pending_bgs = false;
989
990 ret = btrfs_alloc_tree_block(trans, root, parent_start,
991 root->root_key.objectid, disk_key, level,
Josef Bacik9631e4c2020-08-20 11:46:03 -0400992 hint, empty_size, nest);
Filipe Mananaa6279472019-01-25 11:48:51 +0000993 trans->can_flush_pending_bgs = true;
994
995 return ret;
996}
997
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400998/*
Chris Masond3977122009-01-05 21:25:51 -0500999 * does the dirty work in cow of a single block. The parent block (if
1000 * supplied) is updated to point to the new cow copy. The new buffer is marked
1001 * dirty and returned locked. If you modify the block it needs to be marked
1002 * dirty again.
Chris Masond352ac62008-09-29 15:18:18 -04001003 *
1004 * search_start -- an allocation hint for the new block
1005 *
Chris Masond3977122009-01-05 21:25:51 -05001006 * empty_size -- a hint that you plan on doing more cow. This is the size in
1007 * bytes the allocator should try to find free next to the block it returns.
1008 * This is just a hint and may be ignored by the allocator.
Chris Masond352ac62008-09-29 15:18:18 -04001009 */
Chris Masond3977122009-01-05 21:25:51 -05001010static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
Chris Mason5f39d392007-10-15 16:14:19 -04001011 struct btrfs_root *root,
1012 struct extent_buffer *buf,
1013 struct extent_buffer *parent, int parent_slot,
1014 struct extent_buffer **cow_ret,
Josef Bacik9631e4c2020-08-20 11:46:03 -04001015 u64 search_start, u64 empty_size,
1016 enum btrfs_lock_nesting nest)
Chris Mason6702ed42007-08-07 16:15:09 -04001017{
Jeff Mahoney0b246af2016-06-22 18:54:23 -04001018 struct btrfs_fs_info *fs_info = root->fs_info;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001019 struct btrfs_disk_key disk_key;
Chris Mason5f39d392007-10-15 16:14:19 -04001020 struct extent_buffer *cow;
Mark Fashehbe1a5562011-08-08 13:20:18 -07001021 int level, ret;
Yan, Zhengf0486c62010-05-16 10:46:25 -04001022 int last_ref = 0;
Chris Mason925baed2008-06-25 16:01:30 -04001023 int unlock_orig = 0;
Goldwyn Rodrigues0f5053e2016-09-22 14:11:34 -05001024 u64 parent_start = 0;
Chris Mason6702ed42007-08-07 16:15:09 -04001025
Chris Mason925baed2008-06-25 16:01:30 -04001026 if (*cow_ret == buf)
1027 unlock_orig = 1;
1028
Chris Masonb9447ef82009-03-09 11:45:38 -04001029 btrfs_assert_tree_locked(buf);
Chris Mason925baed2008-06-25 16:01:30 -04001030
Qu Wenruo92a7cc42020-05-15 14:01:40 +08001031 WARN_ON(test_bit(BTRFS_ROOT_SHAREABLE, &root->state) &&
Jeff Mahoney0b246af2016-06-22 18:54:23 -04001032 trans->transid != fs_info->running_transaction->transid);
Qu Wenruo92a7cc42020-05-15 14:01:40 +08001033 WARN_ON(test_bit(BTRFS_ROOT_SHAREABLE, &root->state) &&
Miao Xie27cdeb72014-04-02 19:51:05 +08001034 trans->transid != root->last_trans);
Chris Mason5f39d392007-10-15 16:14:19 -04001035
Chris Mason7bb86312007-12-11 09:25:06 -05001036 level = btrfs_header_level(buf);
Zheng Yan31840ae2008-09-23 13:14:14 -04001037
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001038 if (level == 0)
1039 btrfs_item_key(buf, &disk_key, 0);
1040 else
1041 btrfs_node_key(buf, &disk_key, 0);
1042
Goldwyn Rodrigues0f5053e2016-09-22 14:11:34 -05001043 if ((root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) && parent)
1044 parent_start = parent->start;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001045
Filipe Mananaa6279472019-01-25 11:48:51 +00001046 cow = alloc_tree_block_no_bg_flush(trans, root, parent_start, &disk_key,
Josef Bacik9631e4c2020-08-20 11:46:03 -04001047 level, search_start, empty_size, nest);
Chris Mason6702ed42007-08-07 16:15:09 -04001048 if (IS_ERR(cow))
1049 return PTR_ERR(cow);
1050
Chris Masonb4ce94d2009-02-04 09:25:08 -05001051 /* cow is set to blocking by btrfs_init_new_buffer */
1052
David Sterba58e80122016-11-08 18:30:31 +01001053 copy_extent_buffer_full(cow, buf);
Chris Masondb945352007-10-15 16:15:53 -04001054 btrfs_set_header_bytenr(cow, cow->start);
Chris Mason5f39d392007-10-15 16:14:19 -04001055 btrfs_set_header_generation(cow, trans->transid);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001056 btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV);
1057 btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN |
1058 BTRFS_HEADER_FLAG_RELOC);
1059 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
1060 btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC);
1061 else
1062 btrfs_set_header_owner(cow, root->root_key.objectid);
Chris Mason6702ed42007-08-07 16:15:09 -04001063
Nikolay Borisovde37aa52018-10-30 16:43:24 +02001064 write_extent_buffer_fsid(cow, fs_info->fs_devices->metadata_uuid);
Yan Zheng2b820322008-11-17 21:11:30 -05001065
Mark Fashehbe1a5562011-08-08 13:20:18 -07001066 ret = update_ref_for_cow(trans, root, buf, cow, &last_ref);
Mark Fashehb68dc2a2011-08-29 14:30:39 -07001067 if (ret) {
Josef Bacik572c83a2020-09-29 08:53:54 -04001068 btrfs_tree_unlock(cow);
1069 free_extent_buffer(cow);
Jeff Mahoney66642832016-06-10 18:19:25 -04001070 btrfs_abort_transaction(trans, ret);
Mark Fashehb68dc2a2011-08-29 14:30:39 -07001071 return ret;
1072 }
Zheng Yan1a40e232008-09-26 10:09:34 -04001073
Qu Wenruo92a7cc42020-05-15 14:01:40 +08001074 if (test_bit(BTRFS_ROOT_SHAREABLE, &root->state)) {
Josef Bacik83d4cfd2013-08-30 15:09:51 -04001075 ret = btrfs_reloc_cow_block(trans, root, buf, cow);
Zhaolei93314e32015-08-06 21:56:58 +08001076 if (ret) {
Josef Bacik572c83a2020-09-29 08:53:54 -04001077 btrfs_tree_unlock(cow);
1078 free_extent_buffer(cow);
Jeff Mahoney66642832016-06-10 18:19:25 -04001079 btrfs_abort_transaction(trans, ret);
Josef Bacik83d4cfd2013-08-30 15:09:51 -04001080 return ret;
Zhaolei93314e32015-08-06 21:56:58 +08001081 }
Josef Bacik83d4cfd2013-08-30 15:09:51 -04001082 }
Yan, Zheng3fd0a552010-05-16 10:49:59 -04001083
Chris Mason6702ed42007-08-07 16:15:09 -04001084 if (buf == root->node) {
Chris Mason925baed2008-06-25 16:01:30 -04001085 WARN_ON(parent && parent != buf);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001086 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
1087 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
1088 parent_start = buf->start;
Chris Mason925baed2008-06-25 16:01:30 -04001089
David Sterba67439da2019-10-08 13:28:47 +02001090 atomic_inc(&cow->refs);
David Sterbad9d19a02018-03-05 16:35:29 +01001091 ret = tree_mod_log_insert_root(root->node, cow, 1);
1092 BUG_ON(ret < 0);
Chris Mason240f62c2011-03-23 14:54:42 -04001093 rcu_assign_pointer(root->node, cow);
Chris Mason925baed2008-06-25 16:01:30 -04001094
Yan, Zhengf0486c62010-05-16 10:46:25 -04001095 btrfs_free_tree_block(trans, root, buf, parent_start,
Jan Schmidt5581a512012-05-16 17:04:52 +02001096 last_ref);
Chris Mason5f39d392007-10-15 16:14:19 -04001097 free_extent_buffer(buf);
Chris Mason0b86a832008-03-24 15:01:56 -04001098 add_root_to_dirty_list(root);
Chris Mason6702ed42007-08-07 16:15:09 -04001099 } else {
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001100 WARN_ON(trans->transid != btrfs_header_generation(parent));
David Sterbae09c2ef2018-03-05 15:09:03 +01001101 tree_mod_log_insert_key(parent, parent_slot,
Josef Bacikc8cc6342013-07-01 16:18:19 -04001102 MOD_LOG_KEY_REPLACE, GFP_NOFS);
Chris Mason5f39d392007-10-15 16:14:19 -04001103 btrfs_set_node_blockptr(parent, parent_slot,
Chris Masondb945352007-10-15 16:15:53 -04001104 cow->start);
Chris Mason74493f72007-12-11 09:25:06 -05001105 btrfs_set_node_ptr_generation(parent, parent_slot,
1106 trans->transid);
Chris Mason6702ed42007-08-07 16:15:09 -04001107 btrfs_mark_buffer_dirty(parent);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +00001108 if (last_ref) {
David Sterbadb7279a2018-03-05 15:14:25 +01001109 ret = tree_mod_log_free_eb(buf);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +00001110 if (ret) {
Josef Bacik572c83a2020-09-29 08:53:54 -04001111 btrfs_tree_unlock(cow);
1112 free_extent_buffer(cow);
Jeff Mahoney66642832016-06-10 18:19:25 -04001113 btrfs_abort_transaction(trans, ret);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +00001114 return ret;
1115 }
1116 }
Yan, Zhengf0486c62010-05-16 10:46:25 -04001117 btrfs_free_tree_block(trans, root, buf, parent_start,
Jan Schmidt5581a512012-05-16 17:04:52 +02001118 last_ref);
Chris Mason6702ed42007-08-07 16:15:09 -04001119 }
Chris Mason925baed2008-06-25 16:01:30 -04001120 if (unlock_orig)
1121 btrfs_tree_unlock(buf);
Josef Bacik3083ee22012-03-09 16:01:49 -05001122 free_extent_buffer_stale(buf);
Chris Mason6702ed42007-08-07 16:15:09 -04001123 btrfs_mark_buffer_dirty(cow);
1124 *cow_ret = cow;
1125 return 0;
1126}
1127
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001128/*
1129 * returns the logical address of the oldest predecessor of the given root.
1130 * entries older than time_seq are ignored.
1131 */
David Sterbabcd24da2018-03-05 15:33:18 +01001132static struct tree_mod_elem *__tree_mod_log_oldest_root(
1133 struct extent_buffer *eb_root, u64 time_seq)
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001134{
1135 struct tree_mod_elem *tm;
1136 struct tree_mod_elem *found = NULL;
Jan Schmidt30b04632013-04-13 13:19:54 +00001137 u64 root_logical = eb_root->start;
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001138 int looped = 0;
1139
1140 if (!time_seq)
Stefan Behrens35a36212013-08-14 18:12:25 +02001141 return NULL;
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001142
1143 /*
Chandan Rajendra298cfd32016-01-21 15:55:59 +05301144 * the very last operation that's logged for a root is the
1145 * replacement operation (if it is replaced at all). this has
1146 * the logical address of the *new* root, making it the very
1147 * first operation that's logged for this root.
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001148 */
1149 while (1) {
David Sterbabcd24da2018-03-05 15:33:18 +01001150 tm = tree_mod_log_search_oldest(eb_root->fs_info, root_logical,
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001151 time_seq);
1152 if (!looped && !tm)
Stefan Behrens35a36212013-08-14 18:12:25 +02001153 return NULL;
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001154 /*
Jan Schmidt28da9fb2012-06-21 10:59:13 +02001155 * if there are no tree operation for the oldest root, we simply
1156 * return it. this should only happen if that (old) root is at
1157 * level 0.
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001158 */
Jan Schmidt28da9fb2012-06-21 10:59:13 +02001159 if (!tm)
1160 break;
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001161
Jan Schmidt28da9fb2012-06-21 10:59:13 +02001162 /*
1163 * if there's an operation that's not a root replacement, we
1164 * found the oldest version of our root. normally, we'll find a
1165 * MOD_LOG_KEY_REMOVE_WHILE_FREEING operation here.
1166 */
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001167 if (tm->op != MOD_LOG_ROOT_REPLACE)
1168 break;
1169
1170 found = tm;
1171 root_logical = tm->old_root.logical;
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001172 looped = 1;
1173 }
1174
Jan Schmidta95236d2012-06-05 16:41:24 +02001175 /* if there's no old root to return, return what we found instead */
1176 if (!found)
1177 found = tm;
1178
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001179 return found;
1180}
1181
1182/*
1183 * tm is a pointer to the first operation to rewind within eb. then, all
Nicholas D Steeves01327612016-05-19 21:18:45 -04001184 * previous operations will be rewound (until we reach something older than
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001185 * time_seq).
1186 */
1187static void
Josef Bacikf1ca7e982013-06-29 23:15:19 -04001188__tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
1189 u64 time_seq, struct tree_mod_elem *first_tm)
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001190{
1191 u32 n;
1192 struct rb_node *next;
1193 struct tree_mod_elem *tm = first_tm;
1194 unsigned long o_dst;
1195 unsigned long o_src;
1196 unsigned long p_size = sizeof(struct btrfs_key_ptr);
1197
1198 n = btrfs_header_nritems(eb);
David Sterbab1a09f12018-03-05 15:43:41 +01001199 read_lock(&fs_info->tree_mod_log_lock);
Jan Schmidt097b8a72012-06-21 11:08:04 +02001200 while (tm && tm->seq >= time_seq) {
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001201 /*
1202 * all the operations are recorded with the operator used for
1203 * the modification. as we're going backwards, we do the
1204 * opposite of each operation here.
1205 */
1206 switch (tm->op) {
1207 case MOD_LOG_KEY_REMOVE_WHILE_FREEING:
1208 BUG_ON(tm->slot < n);
Marcos Paulo de Souzac730ae02020-06-16 15:54:29 -03001209 fallthrough;
Liu Bo95c80bb2012-10-19 09:50:52 +00001210 case MOD_LOG_KEY_REMOVE_WHILE_MOVING:
Chris Mason4c3e6962012-12-18 15:43:18 -05001211 case MOD_LOG_KEY_REMOVE:
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001212 btrfs_set_node_key(eb, &tm->key, tm->slot);
1213 btrfs_set_node_blockptr(eb, tm->slot, tm->blockptr);
1214 btrfs_set_node_ptr_generation(eb, tm->slot,
1215 tm->generation);
Chris Mason4c3e6962012-12-18 15:43:18 -05001216 n++;
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001217 break;
1218 case MOD_LOG_KEY_REPLACE:
1219 BUG_ON(tm->slot >= n);
1220 btrfs_set_node_key(eb, &tm->key, tm->slot);
1221 btrfs_set_node_blockptr(eb, tm->slot, tm->blockptr);
1222 btrfs_set_node_ptr_generation(eb, tm->slot,
1223 tm->generation);
1224 break;
1225 case MOD_LOG_KEY_ADD:
Jan Schmidt19956c72012-06-22 14:52:13 +02001226 /* if a move operation is needed it's in the log */
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001227 n--;
1228 break;
1229 case MOD_LOG_MOVE_KEYS:
Jan Schmidtc3193102012-05-31 19:24:36 +02001230 o_dst = btrfs_node_key_ptr_offset(tm->slot);
1231 o_src = btrfs_node_key_ptr_offset(tm->move.dst_slot);
1232 memmove_extent_buffer(eb, o_dst, o_src,
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001233 tm->move.nr_items * p_size);
1234 break;
1235 case MOD_LOG_ROOT_REPLACE:
1236 /*
1237 * this operation is special. for roots, this must be
1238 * handled explicitly before rewinding.
1239 * for non-roots, this operation may exist if the node
1240 * was a root: root A -> child B; then A gets empty and
1241 * B is promoted to the new root. in the mod log, we'll
1242 * have a root-replace operation for B, a tree block
1243 * that is no root. we simply ignore that operation.
1244 */
1245 break;
1246 }
1247 next = rb_next(&tm->node);
1248 if (!next)
1249 break;
Geliang Tang6b4df8b2016-12-19 22:53:41 +08001250 tm = rb_entry(next, struct tree_mod_elem, node);
Chandan Rajendra298cfd32016-01-21 15:55:59 +05301251 if (tm->logical != first_tm->logical)
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001252 break;
1253 }
David Sterbab1a09f12018-03-05 15:43:41 +01001254 read_unlock(&fs_info->tree_mod_log_lock);
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001255 btrfs_set_header_nritems(eb, n);
1256}
1257
Jan Schmidt47fb0912013-04-13 13:19:55 +00001258/*
Nicholas D Steeves01327612016-05-19 21:18:45 -04001259 * Called with eb read locked. If the buffer cannot be rewound, the same buffer
Jan Schmidt47fb0912013-04-13 13:19:55 +00001260 * is returned. If rewind operations happen, a fresh buffer is returned. The
1261 * returned buffer is always read-locked. If the returned buffer is not the
1262 * input buffer, the lock on the input buffer is released and the input buffer
1263 * is freed (its refcount is decremented).
1264 */
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001265static struct extent_buffer *
Josef Bacik9ec72672013-08-07 16:57:23 -04001266tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
1267 struct extent_buffer *eb, u64 time_seq)
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001268{
1269 struct extent_buffer *eb_rewin;
1270 struct tree_mod_elem *tm;
1271
1272 if (!time_seq)
1273 return eb;
1274
1275 if (btrfs_header_level(eb) == 0)
1276 return eb;
1277
1278 tm = tree_mod_log_search(fs_info, eb->start, time_seq);
1279 if (!tm)
1280 return eb;
1281
Josef Bacik9ec72672013-08-07 16:57:23 -04001282 btrfs_set_path_blocking(path);
David Sterba300aa892018-04-04 02:00:17 +02001283 btrfs_set_lock_blocking_read(eb);
Josef Bacik9ec72672013-08-07 16:57:23 -04001284
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001285 if (tm->op == MOD_LOG_KEY_REMOVE_WHILE_FREEING) {
1286 BUG_ON(tm->slot != 0);
Jeff Mahoneyda170662016-06-15 09:22:56 -04001287 eb_rewin = alloc_dummy_extent_buffer(fs_info, eb->start);
Josef Bacikdb7f3432013-08-07 14:54:37 -04001288 if (!eb_rewin) {
Josef Bacik9ec72672013-08-07 16:57:23 -04001289 btrfs_tree_read_unlock_blocking(eb);
Josef Bacikdb7f3432013-08-07 14:54:37 -04001290 free_extent_buffer(eb);
1291 return NULL;
1292 }
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001293 btrfs_set_header_bytenr(eb_rewin, eb->start);
1294 btrfs_set_header_backref_rev(eb_rewin,
1295 btrfs_header_backref_rev(eb));
1296 btrfs_set_header_owner(eb_rewin, btrfs_header_owner(eb));
Jan Schmidtc3193102012-05-31 19:24:36 +02001297 btrfs_set_header_level(eb_rewin, btrfs_header_level(eb));
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001298 } else {
1299 eb_rewin = btrfs_clone_extent_buffer(eb);
Josef Bacikdb7f3432013-08-07 14:54:37 -04001300 if (!eb_rewin) {
Josef Bacik9ec72672013-08-07 16:57:23 -04001301 btrfs_tree_read_unlock_blocking(eb);
Josef Bacikdb7f3432013-08-07 14:54:37 -04001302 free_extent_buffer(eb);
1303 return NULL;
1304 }
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001305 }
1306
Josef Bacik9ec72672013-08-07 16:57:23 -04001307 btrfs_tree_read_unlock_blocking(eb);
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001308 free_extent_buffer(eb);
1309
Josef Bacikd3beaa22020-08-10 11:42:31 -04001310 btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb_rewin),
1311 eb_rewin, btrfs_header_level(eb_rewin));
Jan Schmidt47fb0912013-04-13 13:19:55 +00001312 btrfs_tree_read_lock(eb_rewin);
Josef Bacikf1ca7e982013-06-29 23:15:19 -04001313 __tree_mod_log_rewind(fs_info, eb_rewin, time_seq, tm);
Jan Schmidt57911b82012-10-19 09:22:03 +02001314 WARN_ON(btrfs_header_nritems(eb_rewin) >
Jeff Mahoneyda170662016-06-15 09:22:56 -04001315 BTRFS_NODEPTRS_PER_BLOCK(fs_info));
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001316
1317 return eb_rewin;
1318}
1319
Jan Schmidt8ba97a12012-06-04 16:54:57 +02001320/*
1321 * get_old_root() rewinds the state of @root's root node to the given @time_seq
1322 * value. If there are no changes, the current root->root_node is returned. If
1323 * anything changed in between, there's a fresh buffer allocated on which the
1324 * rewind operations are done. In any case, the returned buffer is read locked.
1325 * Returns NULL on error (with no locks held).
1326 */
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001327static inline struct extent_buffer *
1328get_old_root(struct btrfs_root *root, u64 time_seq)
1329{
Jeff Mahoney0b246af2016-06-22 18:54:23 -04001330 struct btrfs_fs_info *fs_info = root->fs_info;
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001331 struct tree_mod_elem *tm;
Jan Schmidt30b04632013-04-13 13:19:54 +00001332 struct extent_buffer *eb = NULL;
1333 struct extent_buffer *eb_root;
Filipe Mananaefad8a82019-08-12 19:14:29 +01001334 u64 eb_root_owner = 0;
Liu Bo7bfdcf72012-10-25 07:30:19 -06001335 struct extent_buffer *old;
Jan Schmidta95236d2012-06-05 16:41:24 +02001336 struct tree_mod_root *old_root = NULL;
Chris Mason4325edd2012-06-15 20:02:02 -04001337 u64 old_generation = 0;
Jan Schmidta95236d2012-06-05 16:41:24 +02001338 u64 logical;
Qu Wenruo581c1762018-03-29 09:08:11 +08001339 int level;
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001340
Jan Schmidt30b04632013-04-13 13:19:54 +00001341 eb_root = btrfs_read_lock_root_node(root);
David Sterbabcd24da2018-03-05 15:33:18 +01001342 tm = __tree_mod_log_oldest_root(eb_root, time_seq);
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001343 if (!tm)
Jan Schmidt30b04632013-04-13 13:19:54 +00001344 return eb_root;
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001345
Jan Schmidta95236d2012-06-05 16:41:24 +02001346 if (tm->op == MOD_LOG_ROOT_REPLACE) {
1347 old_root = &tm->old_root;
1348 old_generation = tm->generation;
1349 logical = old_root->logical;
Qu Wenruo581c1762018-03-29 09:08:11 +08001350 level = old_root->level;
Jan Schmidta95236d2012-06-05 16:41:24 +02001351 } else {
Jan Schmidt30b04632013-04-13 13:19:54 +00001352 logical = eb_root->start;
Qu Wenruo581c1762018-03-29 09:08:11 +08001353 level = btrfs_header_level(eb_root);
Jan Schmidta95236d2012-06-05 16:41:24 +02001354 }
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001355
Jeff Mahoney0b246af2016-06-22 18:54:23 -04001356 tm = tree_mod_log_search(fs_info, logical, time_seq);
Jan Schmidt834328a2012-10-23 11:27:33 +02001357 if (old_root && tm && tm->op != MOD_LOG_KEY_REMOVE_WHILE_FREEING) {
Jan Schmidt30b04632013-04-13 13:19:54 +00001358 btrfs_tree_read_unlock(eb_root);
1359 free_extent_buffer(eb_root);
Qu Wenruo581c1762018-03-29 09:08:11 +08001360 old = read_tree_block(fs_info, logical, 0, level, NULL);
Liu Bo64c043d2015-05-25 17:30:15 +08001361 if (WARN_ON(IS_ERR(old) || !extent_buffer_uptodate(old))) {
1362 if (!IS_ERR(old))
1363 free_extent_buffer(old);
Jeff Mahoney0b246af2016-06-22 18:54:23 -04001364 btrfs_warn(fs_info,
1365 "failed to read tree block %llu from get_old_root",
1366 logical);
Jan Schmidt834328a2012-10-23 11:27:33 +02001367 } else {
Liu Bo7bfdcf72012-10-25 07:30:19 -06001368 eb = btrfs_clone_extent_buffer(old);
1369 free_extent_buffer(old);
Jan Schmidt834328a2012-10-23 11:27:33 +02001370 }
1371 } else if (old_root) {
Filipe Mananaefad8a82019-08-12 19:14:29 +01001372 eb_root_owner = btrfs_header_owner(eb_root);
Jan Schmidt30b04632013-04-13 13:19:54 +00001373 btrfs_tree_read_unlock(eb_root);
1374 free_extent_buffer(eb_root);
Jeff Mahoney0b246af2016-06-22 18:54:23 -04001375 eb = alloc_dummy_extent_buffer(fs_info, logical);
Jan Schmidt834328a2012-10-23 11:27:33 +02001376 } else {
David Sterba300aa892018-04-04 02:00:17 +02001377 btrfs_set_lock_blocking_read(eb_root);
Jan Schmidt30b04632013-04-13 13:19:54 +00001378 eb = btrfs_clone_extent_buffer(eb_root);
Josef Bacik9ec72672013-08-07 16:57:23 -04001379 btrfs_tree_read_unlock_blocking(eb_root);
Jan Schmidt30b04632013-04-13 13:19:54 +00001380 free_extent_buffer(eb_root);
Jan Schmidt834328a2012-10-23 11:27:33 +02001381 }
1382
Jan Schmidt8ba97a12012-06-04 16:54:57 +02001383 if (!eb)
1384 return NULL;
Jan Schmidta95236d2012-06-05 16:41:24 +02001385 if (old_root) {
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001386 btrfs_set_header_bytenr(eb, eb->start);
1387 btrfs_set_header_backref_rev(eb, BTRFS_MIXED_BACKREF_REV);
Filipe Mananaefad8a82019-08-12 19:14:29 +01001388 btrfs_set_header_owner(eb, eb_root_owner);
Jan Schmidta95236d2012-06-05 16:41:24 +02001389 btrfs_set_header_level(eb, old_root->level);
1390 btrfs_set_header_generation(eb, old_generation);
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001391 }
Josef Bacikd3beaa22020-08-10 11:42:31 -04001392 btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb), eb,
1393 btrfs_header_level(eb));
1394 btrfs_tree_read_lock(eb);
Jan Schmidt28da9fb2012-06-21 10:59:13 +02001395 if (tm)
Jeff Mahoney0b246af2016-06-22 18:54:23 -04001396 __tree_mod_log_rewind(fs_info, eb, time_seq, tm);
Jan Schmidt28da9fb2012-06-21 10:59:13 +02001397 else
1398 WARN_ON(btrfs_header_level(eb) != 0);
Jeff Mahoney0b246af2016-06-22 18:54:23 -04001399 WARN_ON(btrfs_header_nritems(eb) > BTRFS_NODEPTRS_PER_BLOCK(fs_info));
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001400
1401 return eb;
1402}
1403
Jan Schmidt5b6602e2012-10-23 11:28:27 +02001404int btrfs_old_root_level(struct btrfs_root *root, u64 time_seq)
1405{
1406 struct tree_mod_elem *tm;
1407 int level;
Jan Schmidt30b04632013-04-13 13:19:54 +00001408 struct extent_buffer *eb_root = btrfs_root_node(root);
Jan Schmidt5b6602e2012-10-23 11:28:27 +02001409
David Sterbabcd24da2018-03-05 15:33:18 +01001410 tm = __tree_mod_log_oldest_root(eb_root, time_seq);
Jan Schmidt5b6602e2012-10-23 11:28:27 +02001411 if (tm && tm->op == MOD_LOG_ROOT_REPLACE) {
1412 level = tm->old_root.level;
1413 } else {
Jan Schmidt30b04632013-04-13 13:19:54 +00001414 level = btrfs_header_level(eb_root);
Jan Schmidt5b6602e2012-10-23 11:28:27 +02001415 }
Jan Schmidt30b04632013-04-13 13:19:54 +00001416 free_extent_buffer(eb_root);
Jan Schmidt5b6602e2012-10-23 11:28:27 +02001417
1418 return level;
1419}
1420
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001421static inline int should_cow_block(struct btrfs_trans_handle *trans,
1422 struct btrfs_root *root,
1423 struct extent_buffer *buf)
1424{
Jeff Mahoneyf5ee5c92016-06-21 09:52:41 -04001425 if (btrfs_is_testing(root->fs_info))
Josef Bacikfaa2dbf2014-05-07 17:06:09 -04001426 return 0;
David Sterbafccb84c2014-09-29 23:53:21 +02001427
David Sterbad1980132018-03-16 02:39:40 +01001428 /* Ensure we can see the FORCE_COW bit */
1429 smp_mb__before_atomic();
Liu Bof1ebcc72011-11-14 20:48:06 -05001430
1431 /*
1432 * We do not need to cow a block if
1433 * 1) this block is not created or changed in this transaction;
1434 * 2) this block does not belong to TREE_RELOC tree;
1435 * 3) the root is not forced COW.
1436 *
1437 * What is forced COW:
Nicholas D Steeves01327612016-05-19 21:18:45 -04001438 * when we create snapshot during committing the transaction,
Andrea Gelmini52042d82018-11-28 12:05:13 +01001439 * after we've finished copying src root, we must COW the shared
Liu Bof1ebcc72011-11-14 20:48:06 -05001440 * block to ensure the metadata consistency.
1441 */
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001442 if (btrfs_header_generation(buf) == trans->transid &&
1443 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN) &&
1444 !(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID &&
Liu Bof1ebcc72011-11-14 20:48:06 -05001445 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)) &&
Miao Xie27cdeb72014-04-02 19:51:05 +08001446 !test_bit(BTRFS_ROOT_FORCE_COW, &root->state))
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001447 return 0;
1448 return 1;
1449}
1450
Chris Masond352ac62008-09-29 15:18:18 -04001451/*
1452 * cows a single block, see __btrfs_cow_block for the real work.
Nicholas D Steeves01327612016-05-19 21:18:45 -04001453 * This version of it has extra checks so that a block isn't COWed more than
Chris Masond352ac62008-09-29 15:18:18 -04001454 * once per transaction, as long as it hasn't been written yet
1455 */
Chris Masond3977122009-01-05 21:25:51 -05001456noinline int btrfs_cow_block(struct btrfs_trans_handle *trans,
Chris Mason5f39d392007-10-15 16:14:19 -04001457 struct btrfs_root *root, struct extent_buffer *buf,
1458 struct extent_buffer *parent, int parent_slot,
Josef Bacik9631e4c2020-08-20 11:46:03 -04001459 struct extent_buffer **cow_ret,
1460 enum btrfs_lock_nesting nest)
Chris Mason02217ed2007-03-02 16:08:05 -05001461{
Jeff Mahoney0b246af2016-06-22 18:54:23 -04001462 struct btrfs_fs_info *fs_info = root->fs_info;
Chris Mason6702ed42007-08-07 16:15:09 -04001463 u64 search_start;
Chris Masonf510cfe2007-10-15 16:14:48 -04001464 int ret;
Chris Masondc17ff82008-01-08 15:46:30 -05001465
Josef Bacik83354f02018-11-30 11:52:13 -05001466 if (test_bit(BTRFS_ROOT_DELETING, &root->state))
1467 btrfs_err(fs_info,
1468 "COW'ing blocks on a fs root that's being dropped");
1469
Jeff Mahoney0b246af2016-06-22 18:54:23 -04001470 if (trans->transaction != fs_info->running_transaction)
Julia Lawall31b1a2b2012-11-03 10:58:34 +00001471 WARN(1, KERN_CRIT "trans %llu running %llu\n",
Geert Uytterhoevenc1c9ff72013-08-20 13:20:07 +02001472 trans->transid,
Jeff Mahoney0b246af2016-06-22 18:54:23 -04001473 fs_info->running_transaction->transid);
Julia Lawall31b1a2b2012-11-03 10:58:34 +00001474
Jeff Mahoney0b246af2016-06-22 18:54:23 -04001475 if (trans->transid != fs_info->generation)
Julia Lawall31b1a2b2012-11-03 10:58:34 +00001476 WARN(1, KERN_CRIT "trans %llu running %llu\n",
Jeff Mahoney0b246af2016-06-22 18:54:23 -04001477 trans->transid, fs_info->generation);
Chris Masondc17ff82008-01-08 15:46:30 -05001478
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001479 if (!should_cow_block(trans, root, buf)) {
Jeff Mahoney64c12922016-06-08 00:36:38 -04001480 trans->dirty = true;
Chris Mason02217ed2007-03-02 16:08:05 -05001481 *cow_ret = buf;
1482 return 0;
1483 }
Chris Masonc4876852009-02-04 09:24:25 -05001484
Byongho Leeee221842015-12-15 01:42:10 +09001485 search_start = buf->start & ~((u64)SZ_1G - 1);
Chris Masonb4ce94d2009-02-04 09:25:08 -05001486
1487 if (parent)
David Sterba8bead252018-04-04 02:03:48 +02001488 btrfs_set_lock_blocking_write(parent);
1489 btrfs_set_lock_blocking_write(buf);
Chris Masonb4ce94d2009-02-04 09:25:08 -05001490
Qu Wenruof616f5c2019-01-23 15:15:17 +08001491 /*
1492 * Before CoWing this block for later modification, check if it's
1493 * the subtree root and do the delayed subtree trace if needed.
1494 *
1495 * Also We don't care about the error, as it's handled internally.
1496 */
1497 btrfs_qgroup_trace_subtree_after_cow(trans, root, buf);
Chris Masonf510cfe2007-10-15 16:14:48 -04001498 ret = __btrfs_cow_block(trans, root, buf, parent,
Josef Bacik9631e4c2020-08-20 11:46:03 -04001499 parent_slot, cow_ret, search_start, 0, nest);
liubo1abe9b82011-03-24 11:18:59 +00001500
1501 trace_btrfs_cow_block(root, buf, *cow_ret);
1502
Chris Masonf510cfe2007-10-15 16:14:48 -04001503 return ret;
Chris Mason6702ed42007-08-07 16:15:09 -04001504}
1505
Chris Masond352ac62008-09-29 15:18:18 -04001506/*
1507 * helper function for defrag to decide if two blocks pointed to by a
1508 * node are actually close by
1509 */
Chris Mason6b800532007-10-15 16:17:34 -04001510static int close_blocks(u64 blocknr, u64 other, u32 blocksize)
Chris Mason6702ed42007-08-07 16:15:09 -04001511{
Chris Mason6b800532007-10-15 16:17:34 -04001512 if (blocknr < other && other - (blocknr + blocksize) < 32768)
Chris Mason6702ed42007-08-07 16:15:09 -04001513 return 1;
Chris Mason6b800532007-10-15 16:17:34 -04001514 if (blocknr > other && blocknr - (other + blocksize) < 32768)
Chris Mason6702ed42007-08-07 16:15:09 -04001515 return 1;
Chris Mason02217ed2007-03-02 16:08:05 -05001516 return 0;
1517}
1518
David Sterbace6ef5a2020-06-08 16:06:07 +02001519#ifdef __LITTLE_ENDIAN
1520
1521/*
1522 * Compare two keys, on little-endian the disk order is same as CPU order and
1523 * we can avoid the conversion.
1524 */
1525static int comp_keys(const struct btrfs_disk_key *disk_key,
1526 const struct btrfs_key *k2)
1527{
1528 const struct btrfs_key *k1 = (const struct btrfs_key *)disk_key;
1529
1530 return btrfs_comp_cpu_keys(k1, k2);
1531}
1532
1533#else
1534
Chris Mason081e9572007-11-06 10:26:24 -05001535/*
1536 * compare two keys in a memcmp fashion
1537 */
Omar Sandoval310712b2017-01-17 23:24:37 -08001538static int comp_keys(const struct btrfs_disk_key *disk,
1539 const struct btrfs_key *k2)
Chris Mason081e9572007-11-06 10:26:24 -05001540{
1541 struct btrfs_key k1;
1542
1543 btrfs_disk_key_to_cpu(&k1, disk);
1544
Diego Calleja20736ab2009-07-24 11:06:52 -04001545 return btrfs_comp_cpu_keys(&k1, k2);
Chris Mason081e9572007-11-06 10:26:24 -05001546}
David Sterbace6ef5a2020-06-08 16:06:07 +02001547#endif
Chris Mason081e9572007-11-06 10:26:24 -05001548
Josef Bacikf3465ca2008-11-12 14:19:50 -05001549/*
1550 * same as comp_keys only with two btrfs_key's
1551 */
David Sterbae1f60a62019-10-01 19:57:39 +02001552int __pure btrfs_comp_cpu_keys(const struct btrfs_key *k1, const struct btrfs_key *k2)
Josef Bacikf3465ca2008-11-12 14:19:50 -05001553{
1554 if (k1->objectid > k2->objectid)
1555 return 1;
1556 if (k1->objectid < k2->objectid)
1557 return -1;
1558 if (k1->type > k2->type)
1559 return 1;
1560 if (k1->type < k2->type)
1561 return -1;
1562 if (k1->offset > k2->offset)
1563 return 1;
1564 if (k1->offset < k2->offset)
1565 return -1;
1566 return 0;
1567}
Chris Mason081e9572007-11-06 10:26:24 -05001568
Chris Masond352ac62008-09-29 15:18:18 -04001569/*
1570 * this is used by the defrag code to go through all the
1571 * leaves pointed to by a node and reallocate them so that
1572 * disk order is close to key order
1573 */
Chris Mason6702ed42007-08-07 16:15:09 -04001574int btrfs_realloc_node(struct btrfs_trans_handle *trans,
Chris Mason5f39d392007-10-15 16:14:19 -04001575 struct btrfs_root *root, struct extent_buffer *parent,
Eric Sandeende78b512013-01-31 18:21:12 +00001576 int start_slot, u64 *last_ret,
Chris Masona6b6e752007-10-15 16:22:39 -04001577 struct btrfs_key *progress)
Chris Mason6702ed42007-08-07 16:15:09 -04001578{
Jeff Mahoney0b246af2016-06-22 18:54:23 -04001579 struct btrfs_fs_info *fs_info = root->fs_info;
Chris Mason6b800532007-10-15 16:17:34 -04001580 struct extent_buffer *cur;
Chris Mason6702ed42007-08-07 16:15:09 -04001581 u64 blocknr;
Chris Masonca7a79a2008-05-12 12:59:19 -04001582 u64 gen;
Chris Masone9d0b132007-08-10 14:06:19 -04001583 u64 search_start = *last_ret;
1584 u64 last_block = 0;
Chris Mason6702ed42007-08-07 16:15:09 -04001585 u64 other;
1586 u32 parent_nritems;
Chris Mason6702ed42007-08-07 16:15:09 -04001587 int end_slot;
1588 int i;
1589 int err = 0;
Chris Masonf2183bd2007-08-10 14:42:37 -04001590 int parent_level;
Chris Mason6b800532007-10-15 16:17:34 -04001591 int uptodate;
1592 u32 blocksize;
Chris Mason081e9572007-11-06 10:26:24 -05001593 int progress_passed = 0;
1594 struct btrfs_disk_key disk_key;
Chris Mason6702ed42007-08-07 16:15:09 -04001595
Chris Mason5708b952007-10-25 15:43:18 -04001596 parent_level = btrfs_header_level(parent);
Chris Mason5708b952007-10-25 15:43:18 -04001597
Jeff Mahoney0b246af2016-06-22 18:54:23 -04001598 WARN_ON(trans->transaction != fs_info->running_transaction);
1599 WARN_ON(trans->transid != fs_info->generation);
Chris Mason86479a02007-09-10 19:58:16 -04001600
Chris Mason6b800532007-10-15 16:17:34 -04001601 parent_nritems = btrfs_header_nritems(parent);
Jeff Mahoney0b246af2016-06-22 18:54:23 -04001602 blocksize = fs_info->nodesize;
Filipe Manana5dfe2be2015-02-23 19:48:52 +00001603 end_slot = parent_nritems - 1;
Chris Mason6702ed42007-08-07 16:15:09 -04001604
Filipe Manana5dfe2be2015-02-23 19:48:52 +00001605 if (parent_nritems <= 1)
Chris Mason6702ed42007-08-07 16:15:09 -04001606 return 0;
1607
David Sterba8bead252018-04-04 02:03:48 +02001608 btrfs_set_lock_blocking_write(parent);
Chris Masonb4ce94d2009-02-04 09:25:08 -05001609
Filipe Manana5dfe2be2015-02-23 19:48:52 +00001610 for (i = start_slot; i <= end_slot; i++) {
Qu Wenruo581c1762018-03-29 09:08:11 +08001611 struct btrfs_key first_key;
Chris Mason6702ed42007-08-07 16:15:09 -04001612 int close = 1;
Chris Masona6b6e752007-10-15 16:22:39 -04001613
Chris Mason081e9572007-11-06 10:26:24 -05001614 btrfs_node_key(parent, &disk_key, i);
1615 if (!progress_passed && comp_keys(&disk_key, progress) < 0)
1616 continue;
1617
1618 progress_passed = 1;
Chris Mason6b800532007-10-15 16:17:34 -04001619 blocknr = btrfs_node_blockptr(parent, i);
Chris Masonca7a79a2008-05-12 12:59:19 -04001620 gen = btrfs_node_ptr_generation(parent, i);
Qu Wenruo581c1762018-03-29 09:08:11 +08001621 btrfs_node_key_to_cpu(parent, &first_key, i);
Chris Masone9d0b132007-08-10 14:06:19 -04001622 if (last_block == 0)
1623 last_block = blocknr;
Chris Mason5708b952007-10-25 15:43:18 -04001624
Chris Mason6702ed42007-08-07 16:15:09 -04001625 if (i > 0) {
Chris Mason6b800532007-10-15 16:17:34 -04001626 other = btrfs_node_blockptr(parent, i - 1);
1627 close = close_blocks(blocknr, other, blocksize);
Chris Mason6702ed42007-08-07 16:15:09 -04001628 }
Filipe Manana5dfe2be2015-02-23 19:48:52 +00001629 if (!close && i < end_slot) {
Chris Mason6b800532007-10-15 16:17:34 -04001630 other = btrfs_node_blockptr(parent, i + 1);
1631 close = close_blocks(blocknr, other, blocksize);
Chris Mason6702ed42007-08-07 16:15:09 -04001632 }
Chris Masone9d0b132007-08-10 14:06:19 -04001633 if (close) {
1634 last_block = blocknr;
Chris Mason6702ed42007-08-07 16:15:09 -04001635 continue;
Chris Masone9d0b132007-08-10 14:06:19 -04001636 }
Chris Mason6702ed42007-08-07 16:15:09 -04001637
Jeff Mahoney0b246af2016-06-22 18:54:23 -04001638 cur = find_extent_buffer(fs_info, blocknr);
Chris Mason6b800532007-10-15 16:17:34 -04001639 if (cur)
Chris Masonb9fab912012-05-06 07:23:47 -04001640 uptodate = btrfs_buffer_uptodate(cur, gen, 0);
Chris Mason6b800532007-10-15 16:17:34 -04001641 else
1642 uptodate = 0;
Chris Mason5708b952007-10-25 15:43:18 -04001643 if (!cur || !uptodate) {
Chris Mason6b800532007-10-15 16:17:34 -04001644 if (!cur) {
Qu Wenruo581c1762018-03-29 09:08:11 +08001645 cur = read_tree_block(fs_info, blocknr, gen,
1646 parent_level - 1,
1647 &first_key);
Liu Bo64c043d2015-05-25 17:30:15 +08001648 if (IS_ERR(cur)) {
1649 return PTR_ERR(cur);
1650 } else if (!extent_buffer_uptodate(cur)) {
Josef Bacik416bc652013-04-23 14:17:42 -04001651 free_extent_buffer(cur);
Tsutomu Itoh97d9a8a2011-03-24 06:33:21 +00001652 return -EIO;
Josef Bacik416bc652013-04-23 14:17:42 -04001653 }
Chris Mason6b800532007-10-15 16:17:34 -04001654 } else if (!uptodate) {
Qu Wenruo581c1762018-03-29 09:08:11 +08001655 err = btrfs_read_buffer(cur, gen,
1656 parent_level - 1,&first_key);
Tsutomu Itoh018642a2012-05-29 18:10:13 +09001657 if (err) {
1658 free_extent_buffer(cur);
1659 return err;
1660 }
Chris Masonf2183bd2007-08-10 14:42:37 -04001661 }
Chris Mason6702ed42007-08-07 16:15:09 -04001662 }
Chris Masone9d0b132007-08-10 14:06:19 -04001663 if (search_start == 0)
Chris Mason6b800532007-10-15 16:17:34 -04001664 search_start = last_block;
Chris Masone9d0b132007-08-10 14:06:19 -04001665
Chris Masone7a84562008-06-25 16:01:31 -04001666 btrfs_tree_lock(cur);
David Sterba8bead252018-04-04 02:03:48 +02001667 btrfs_set_lock_blocking_write(cur);
Chris Mason6b800532007-10-15 16:17:34 -04001668 err = __btrfs_cow_block(trans, root, cur, parent, i,
Chris Masone7a84562008-06-25 16:01:31 -04001669 &cur, search_start,
Chris Mason6b800532007-10-15 16:17:34 -04001670 min(16 * blocksize,
Josef Bacik9631e4c2020-08-20 11:46:03 -04001671 (end_slot - i) * blocksize),
1672 BTRFS_NESTING_COW);
Yan252c38f2007-08-29 09:11:44 -04001673 if (err) {
Chris Masone7a84562008-06-25 16:01:31 -04001674 btrfs_tree_unlock(cur);
Chris Mason6b800532007-10-15 16:17:34 -04001675 free_extent_buffer(cur);
Chris Mason6702ed42007-08-07 16:15:09 -04001676 break;
Yan252c38f2007-08-29 09:11:44 -04001677 }
Chris Masone7a84562008-06-25 16:01:31 -04001678 search_start = cur->start;
1679 last_block = cur->start;
Chris Masonf2183bd2007-08-10 14:42:37 -04001680 *last_ret = search_start;
Chris Masone7a84562008-06-25 16:01:31 -04001681 btrfs_tree_unlock(cur);
1682 free_extent_buffer(cur);
Chris Mason6702ed42007-08-07 16:15:09 -04001683 }
1684 return err;
1685}
1686
Chris Mason74123bd2007-02-02 11:05:29 -05001687/*
Chris Mason5f39d392007-10-15 16:14:19 -04001688 * search for key in the extent_buffer. The items start at offset p,
1689 * and they are item_size apart. There are 'max' items in p.
1690 *
Chris Mason74123bd2007-02-02 11:05:29 -05001691 * the slot in the array is returned via slot, and it points to
1692 * the place where you would insert key if it is not found in
1693 * the array.
1694 *
1695 * slot may point to max if the key is bigger than all of the keys
1696 */
Chris Masone02119d2008-09-05 16:13:11 -04001697static noinline int generic_bin_search(struct extent_buffer *eb,
Omar Sandoval310712b2017-01-17 23:24:37 -08001698 unsigned long p, int item_size,
1699 const struct btrfs_key *key,
Chris Masone02119d2008-09-05 16:13:11 -04001700 int max, int *slot)
Chris Masonbe0e5c02007-01-26 15:51:26 -05001701{
1702 int low = 0;
1703 int high = max;
Chris Masonbe0e5c02007-01-26 15:51:26 -05001704 int ret;
David Sterba5cd17f32020-04-29 23:23:37 +02001705 const int key_size = sizeof(struct btrfs_disk_key);
Chris Masonbe0e5c02007-01-26 15:51:26 -05001706
Liu Bo5e24e9a2016-06-23 16:32:45 -07001707 if (low > high) {
1708 btrfs_err(eb->fs_info,
1709 "%s: low (%d) > high (%d) eb %llu owner %llu level %d",
1710 __func__, low, high, eb->start,
1711 btrfs_header_owner(eb), btrfs_header_level(eb));
1712 return -EINVAL;
1713 }
1714
Chris Masond3977122009-01-05 21:25:51 -05001715 while (low < high) {
David Sterba5cd17f32020-04-29 23:23:37 +02001716 unsigned long oip;
1717 unsigned long offset;
1718 struct btrfs_disk_key *tmp;
1719 struct btrfs_disk_key unaligned;
1720 int mid;
1721
Chris Masonbe0e5c02007-01-26 15:51:26 -05001722 mid = (low + high) / 2;
Chris Mason5f39d392007-10-15 16:14:19 -04001723 offset = p + mid * item_size;
David Sterba5cd17f32020-04-29 23:23:37 +02001724 oip = offset_in_page(offset);
Chris Mason5f39d392007-10-15 16:14:19 -04001725
David Sterba5cd17f32020-04-29 23:23:37 +02001726 if (oip + key_size <= PAGE_SIZE) {
1727 const unsigned long idx = offset >> PAGE_SHIFT;
1728 char *kaddr = page_address(eb->pages[idx]);
Chris Mason934d3752008-12-08 16:43:10 -05001729
David Sterba5cd17f32020-04-29 23:23:37 +02001730 tmp = (struct btrfs_disk_key *)(kaddr + oip);
Chris Mason5f39d392007-10-15 16:14:19 -04001731 } else {
David Sterba5cd17f32020-04-29 23:23:37 +02001732 read_extent_buffer(eb, &unaligned, offset, key_size);
1733 tmp = &unaligned;
Chris Mason5f39d392007-10-15 16:14:19 -04001734 }
David Sterba5cd17f32020-04-29 23:23:37 +02001735
Chris Masonbe0e5c02007-01-26 15:51:26 -05001736 ret = comp_keys(tmp, key);
1737
1738 if (ret < 0)
1739 low = mid + 1;
1740 else if (ret > 0)
1741 high = mid;
1742 else {
1743 *slot = mid;
1744 return 0;
1745 }
1746 }
1747 *slot = low;
1748 return 1;
1749}
1750
Chris Mason97571fd2007-02-24 13:39:08 -05001751/*
1752 * simple bin_search frontend that does the right thing for
1753 * leaves vs nodes
1754 */
Nikolay Borisova74b35e2017-12-08 16:27:43 +02001755int btrfs_bin_search(struct extent_buffer *eb, const struct btrfs_key *key,
Qu Wenruoe3b83362020-04-17 15:08:21 +08001756 int *slot)
Chris Masonbe0e5c02007-01-26 15:51:26 -05001757{
Qu Wenruoe3b83362020-04-17 15:08:21 +08001758 if (btrfs_header_level(eb) == 0)
Chris Mason5f39d392007-10-15 16:14:19 -04001759 return generic_bin_search(eb,
1760 offsetof(struct btrfs_leaf, items),
Chris Mason0783fcf2007-03-12 20:12:07 -04001761 sizeof(struct btrfs_item),
Chris Mason5f39d392007-10-15 16:14:19 -04001762 key, btrfs_header_nritems(eb),
Chris Mason7518a232007-03-12 12:01:18 -04001763 slot);
Wang Sheng-Huif7757382012-03-30 15:14:27 +08001764 else
Chris Mason5f39d392007-10-15 16:14:19 -04001765 return generic_bin_search(eb,
1766 offsetof(struct btrfs_node, ptrs),
Chris Mason123abc82007-03-14 14:14:43 -04001767 sizeof(struct btrfs_key_ptr),
Chris Mason5f39d392007-10-15 16:14:19 -04001768 key, btrfs_header_nritems(eb),
Chris Mason7518a232007-03-12 12:01:18 -04001769 slot);
Chris Masonbe0e5c02007-01-26 15:51:26 -05001770}
1771
Yan, Zhengf0486c62010-05-16 10:46:25 -04001772static void root_add_used(struct btrfs_root *root, u32 size)
1773{
1774 spin_lock(&root->accounting_lock);
1775 btrfs_set_root_used(&root->root_item,
1776 btrfs_root_used(&root->root_item) + size);
1777 spin_unlock(&root->accounting_lock);
1778}
1779
1780static void root_sub_used(struct btrfs_root *root, u32 size)
1781{
1782 spin_lock(&root->accounting_lock);
1783 btrfs_set_root_used(&root->root_item,
1784 btrfs_root_used(&root->root_item) - size);
1785 spin_unlock(&root->accounting_lock);
1786}
1787
Chris Masond352ac62008-09-29 15:18:18 -04001788/* given a node and slot number, this reads the blocks it points to. The
1789 * extent buffer is returned with a reference taken (but unlocked).
Chris Masond352ac62008-09-29 15:18:18 -04001790 */
David Sterba4b231ae2019-08-21 19:16:27 +02001791struct extent_buffer *btrfs_read_node_slot(struct extent_buffer *parent,
1792 int slot)
Chris Masonbb803952007-03-01 12:04:21 -05001793{
Chris Masonca7a79a2008-05-12 12:59:19 -04001794 int level = btrfs_header_level(parent);
Josef Bacik416bc652013-04-23 14:17:42 -04001795 struct extent_buffer *eb;
Qu Wenruo581c1762018-03-29 09:08:11 +08001796 struct btrfs_key first_key;
Josef Bacik416bc652013-04-23 14:17:42 -04001797
Liu Bofb770ae2016-07-05 12:10:14 -07001798 if (slot < 0 || slot >= btrfs_header_nritems(parent))
1799 return ERR_PTR(-ENOENT);
Chris Masonca7a79a2008-05-12 12:59:19 -04001800
1801 BUG_ON(level == 0);
1802
Qu Wenruo581c1762018-03-29 09:08:11 +08001803 btrfs_node_key_to_cpu(parent, &first_key, slot);
David Sterbad0d20b02019-03-20 14:54:01 +01001804 eb = read_tree_block(parent->fs_info, btrfs_node_blockptr(parent, slot),
Qu Wenruo581c1762018-03-29 09:08:11 +08001805 btrfs_node_ptr_generation(parent, slot),
1806 level - 1, &first_key);
Liu Bofb770ae2016-07-05 12:10:14 -07001807 if (!IS_ERR(eb) && !extent_buffer_uptodate(eb)) {
1808 free_extent_buffer(eb);
1809 eb = ERR_PTR(-EIO);
Josef Bacik416bc652013-04-23 14:17:42 -04001810 }
1811
1812 return eb;
Chris Masonbb803952007-03-01 12:04:21 -05001813}
1814
Chris Masond352ac62008-09-29 15:18:18 -04001815/*
1816 * node level balancing, used to make sure nodes are in proper order for
1817 * item deletion. We balance from the top down, so we have to make sure
1818 * that a deletion won't leave an node completely empty later on.
1819 */
Chris Masone02119d2008-09-05 16:13:11 -04001820static noinline int balance_level(struct btrfs_trans_handle *trans,
Chris Mason98ed5172008-01-03 10:01:48 -05001821 struct btrfs_root *root,
1822 struct btrfs_path *path, int level)
Chris Masonbb803952007-03-01 12:04:21 -05001823{
Jeff Mahoney0b246af2016-06-22 18:54:23 -04001824 struct btrfs_fs_info *fs_info = root->fs_info;
Chris Mason5f39d392007-10-15 16:14:19 -04001825 struct extent_buffer *right = NULL;
1826 struct extent_buffer *mid;
1827 struct extent_buffer *left = NULL;
1828 struct extent_buffer *parent = NULL;
Chris Masonbb803952007-03-01 12:04:21 -05001829 int ret = 0;
1830 int wret;
1831 int pslot;
Chris Masonbb803952007-03-01 12:04:21 -05001832 int orig_slot = path->slots[level];
Chris Mason79f95c82007-03-01 15:16:26 -05001833 u64 orig_ptr;
Chris Masonbb803952007-03-01 12:04:21 -05001834
Liu Bo98e6b1e2018-09-12 06:06:23 +08001835 ASSERT(level > 0);
Chris Masonbb803952007-03-01 12:04:21 -05001836
Chris Mason5f39d392007-10-15 16:14:19 -04001837 mid = path->nodes[level];
Chris Masonb4ce94d2009-02-04 09:25:08 -05001838
Chris Masonbd681512011-07-16 15:23:14 -04001839 WARN_ON(path->locks[level] != BTRFS_WRITE_LOCK &&
1840 path->locks[level] != BTRFS_WRITE_LOCK_BLOCKING);
Chris Mason7bb86312007-12-11 09:25:06 -05001841 WARN_ON(btrfs_header_generation(mid) != trans->transid);
1842
Chris Mason1d4f8a02007-03-13 09:28:32 -04001843 orig_ptr = btrfs_node_blockptr(mid, orig_slot);
Chris Mason79f95c82007-03-01 15:16:26 -05001844
Li Zefana05a9bb2011-09-06 16:55:34 +08001845 if (level < BTRFS_MAX_LEVEL - 1) {
Chris Mason5f39d392007-10-15 16:14:19 -04001846 parent = path->nodes[level + 1];
Li Zefana05a9bb2011-09-06 16:55:34 +08001847 pslot = path->slots[level + 1];
1848 }
Chris Masonbb803952007-03-01 12:04:21 -05001849
Chris Mason40689472007-03-17 14:29:23 -04001850 /*
1851 * deal with the case where there is only one pointer in the root
1852 * by promoting the node below to a root
1853 */
Chris Mason5f39d392007-10-15 16:14:19 -04001854 if (!parent) {
1855 struct extent_buffer *child;
Chris Masonbb803952007-03-01 12:04:21 -05001856
Chris Mason5f39d392007-10-15 16:14:19 -04001857 if (btrfs_header_nritems(mid) != 1)
Chris Masonbb803952007-03-01 12:04:21 -05001858 return 0;
1859
1860 /* promote the child to a root */
David Sterba4b231ae2019-08-21 19:16:27 +02001861 child = btrfs_read_node_slot(mid, 0);
Liu Bofb770ae2016-07-05 12:10:14 -07001862 if (IS_ERR(child)) {
1863 ret = PTR_ERR(child);
Jeff Mahoney0b246af2016-06-22 18:54:23 -04001864 btrfs_handle_fs_error(fs_info, ret, NULL);
Mark Fasheh305a26a2011-09-01 11:27:57 -07001865 goto enospc;
1866 }
1867
Chris Mason925baed2008-06-25 16:01:30 -04001868 btrfs_tree_lock(child);
David Sterba8bead252018-04-04 02:03:48 +02001869 btrfs_set_lock_blocking_write(child);
Josef Bacik9631e4c2020-08-20 11:46:03 -04001870 ret = btrfs_cow_block(trans, root, child, mid, 0, &child,
1871 BTRFS_NESTING_COW);
Yan, Zhengf0486c62010-05-16 10:46:25 -04001872 if (ret) {
1873 btrfs_tree_unlock(child);
1874 free_extent_buffer(child);
1875 goto enospc;
1876 }
Yan2f375ab2008-02-01 14:58:07 -05001877
David Sterbad9d19a02018-03-05 16:35:29 +01001878 ret = tree_mod_log_insert_root(root->node, child, 1);
1879 BUG_ON(ret < 0);
Chris Mason240f62c2011-03-23 14:54:42 -04001880 rcu_assign_pointer(root->node, child);
Chris Mason925baed2008-06-25 16:01:30 -04001881
Chris Mason0b86a832008-03-24 15:01:56 -04001882 add_root_to_dirty_list(root);
Chris Mason925baed2008-06-25 16:01:30 -04001883 btrfs_tree_unlock(child);
Chris Masonb4ce94d2009-02-04 09:25:08 -05001884
Chris Mason925baed2008-06-25 16:01:30 -04001885 path->locks[level] = 0;
Chris Masonbb803952007-03-01 12:04:21 -05001886 path->nodes[level] = NULL;
David Sterba6a884d7d2019-03-20 14:30:02 +01001887 btrfs_clean_tree_block(mid);
Chris Mason925baed2008-06-25 16:01:30 -04001888 btrfs_tree_unlock(mid);
Chris Masonbb803952007-03-01 12:04:21 -05001889 /* once for the path */
Chris Mason5f39d392007-10-15 16:14:19 -04001890 free_extent_buffer(mid);
Yan, Zhengf0486c62010-05-16 10:46:25 -04001891
1892 root_sub_used(root, mid->len);
Jan Schmidt5581a512012-05-16 17:04:52 +02001893 btrfs_free_tree_block(trans, root, mid, 0, 1);
Chris Masonbb803952007-03-01 12:04:21 -05001894 /* once for the root ptr */
Josef Bacik3083ee22012-03-09 16:01:49 -05001895 free_extent_buffer_stale(mid);
Yan, Zhengf0486c62010-05-16 10:46:25 -04001896 return 0;
Chris Masonbb803952007-03-01 12:04:21 -05001897 }
Chris Mason5f39d392007-10-15 16:14:19 -04001898 if (btrfs_header_nritems(mid) >
Jeff Mahoney0b246af2016-06-22 18:54:23 -04001899 BTRFS_NODEPTRS_PER_BLOCK(fs_info) / 4)
Chris Masonbb803952007-03-01 12:04:21 -05001900 return 0;
1901
David Sterba4b231ae2019-08-21 19:16:27 +02001902 left = btrfs_read_node_slot(parent, pslot - 1);
Liu Bofb770ae2016-07-05 12:10:14 -07001903 if (IS_ERR(left))
1904 left = NULL;
1905
Chris Mason5f39d392007-10-15 16:14:19 -04001906 if (left) {
Josef Bacikbf774672020-08-20 11:46:04 -04001907 __btrfs_tree_lock(left, BTRFS_NESTING_LEFT);
David Sterba8bead252018-04-04 02:03:48 +02001908 btrfs_set_lock_blocking_write(left);
Chris Mason5f39d392007-10-15 16:14:19 -04001909 wret = btrfs_cow_block(trans, root, left,
Josef Bacik9631e4c2020-08-20 11:46:03 -04001910 parent, pslot - 1, &left,
Josef Bacikbf59a5a2020-08-20 11:46:05 -04001911 BTRFS_NESTING_LEFT_COW);
Chris Mason54aa1f42007-06-22 14:16:25 -04001912 if (wret) {
1913 ret = wret;
1914 goto enospc;
1915 }
Chris Mason2cc58cf2007-08-27 16:49:44 -04001916 }
Liu Bofb770ae2016-07-05 12:10:14 -07001917
David Sterba4b231ae2019-08-21 19:16:27 +02001918 right = btrfs_read_node_slot(parent, pslot + 1);
Liu Bofb770ae2016-07-05 12:10:14 -07001919 if (IS_ERR(right))
1920 right = NULL;
1921
Chris Mason5f39d392007-10-15 16:14:19 -04001922 if (right) {
Josef Bacikbf774672020-08-20 11:46:04 -04001923 __btrfs_tree_lock(right, BTRFS_NESTING_RIGHT);
David Sterba8bead252018-04-04 02:03:48 +02001924 btrfs_set_lock_blocking_write(right);
Chris Mason5f39d392007-10-15 16:14:19 -04001925 wret = btrfs_cow_block(trans, root, right,
Josef Bacik9631e4c2020-08-20 11:46:03 -04001926 parent, pslot + 1, &right,
Josef Bacikbf59a5a2020-08-20 11:46:05 -04001927 BTRFS_NESTING_RIGHT_COW);
Chris Mason2cc58cf2007-08-27 16:49:44 -04001928 if (wret) {
1929 ret = wret;
1930 goto enospc;
1931 }
1932 }
1933
1934 /* first, try to make some room in the middle buffer */
Chris Mason5f39d392007-10-15 16:14:19 -04001935 if (left) {
1936 orig_slot += btrfs_header_nritems(left);
David Sterbad30a6682019-03-20 14:16:45 +01001937 wret = push_node_left(trans, left, mid, 1);
Chris Mason79f95c82007-03-01 15:16:26 -05001938 if (wret < 0)
1939 ret = wret;
Chris Masonbb803952007-03-01 12:04:21 -05001940 }
Chris Mason79f95c82007-03-01 15:16:26 -05001941
1942 /*
1943 * then try to empty the right most buffer into the middle
1944 */
Chris Mason5f39d392007-10-15 16:14:19 -04001945 if (right) {
David Sterbad30a6682019-03-20 14:16:45 +01001946 wret = push_node_left(trans, mid, right, 1);
Chris Mason54aa1f42007-06-22 14:16:25 -04001947 if (wret < 0 && wret != -ENOSPC)
Chris Mason79f95c82007-03-01 15:16:26 -05001948 ret = wret;
Chris Mason5f39d392007-10-15 16:14:19 -04001949 if (btrfs_header_nritems(right) == 0) {
David Sterba6a884d7d2019-03-20 14:30:02 +01001950 btrfs_clean_tree_block(right);
Chris Mason925baed2008-06-25 16:01:30 -04001951 btrfs_tree_unlock(right);
Tsutomu Itohafe5fea2013-04-16 05:18:22 +00001952 del_ptr(root, path, level + 1, pslot + 1);
Yan, Zhengf0486c62010-05-16 10:46:25 -04001953 root_sub_used(root, right->len);
Jan Schmidt5581a512012-05-16 17:04:52 +02001954 btrfs_free_tree_block(trans, root, right, 0, 1);
Josef Bacik3083ee22012-03-09 16:01:49 -05001955 free_extent_buffer_stale(right);
Yan, Zhengf0486c62010-05-16 10:46:25 -04001956 right = NULL;
Chris Masonbb803952007-03-01 12:04:21 -05001957 } else {
Chris Mason5f39d392007-10-15 16:14:19 -04001958 struct btrfs_disk_key right_key;
1959 btrfs_node_key(right, &right_key, 0);
David Sterba0e82bcf2018-03-05 16:16:54 +01001960 ret = tree_mod_log_insert_key(parent, pslot + 1,
1961 MOD_LOG_KEY_REPLACE, GFP_NOFS);
1962 BUG_ON(ret < 0);
Chris Mason5f39d392007-10-15 16:14:19 -04001963 btrfs_set_node_key(parent, &right_key, pslot + 1);
1964 btrfs_mark_buffer_dirty(parent);
Chris Masonbb803952007-03-01 12:04:21 -05001965 }
1966 }
Chris Mason5f39d392007-10-15 16:14:19 -04001967 if (btrfs_header_nritems(mid) == 1) {
Chris Mason79f95c82007-03-01 15:16:26 -05001968 /*
1969 * we're not allowed to leave a node with one item in the
1970 * tree during a delete. A deletion from lower in the tree
1971 * could try to delete the only pointer in this node.
1972 * So, pull some keys from the left.
1973 * There has to be a left pointer at this point because
1974 * otherwise we would have pulled some pointers from the
1975 * right
1976 */
Mark Fasheh305a26a2011-09-01 11:27:57 -07001977 if (!left) {
1978 ret = -EROFS;
Jeff Mahoney0b246af2016-06-22 18:54:23 -04001979 btrfs_handle_fs_error(fs_info, ret, NULL);
Mark Fasheh305a26a2011-09-01 11:27:57 -07001980 goto enospc;
1981 }
David Sterba55d32ed2019-03-20 14:18:06 +01001982 wret = balance_node_right(trans, mid, left);
Chris Mason54aa1f42007-06-22 14:16:25 -04001983 if (wret < 0) {
Chris Mason79f95c82007-03-01 15:16:26 -05001984 ret = wret;
Chris Mason54aa1f42007-06-22 14:16:25 -04001985 goto enospc;
1986 }
Chris Masonbce4eae2008-04-24 14:42:46 -04001987 if (wret == 1) {
David Sterbad30a6682019-03-20 14:16:45 +01001988 wret = push_node_left(trans, left, mid, 1);
Chris Masonbce4eae2008-04-24 14:42:46 -04001989 if (wret < 0)
1990 ret = wret;
1991 }
Chris Mason79f95c82007-03-01 15:16:26 -05001992 BUG_ON(wret == 1);
1993 }
Chris Mason5f39d392007-10-15 16:14:19 -04001994 if (btrfs_header_nritems(mid) == 0) {
David Sterba6a884d7d2019-03-20 14:30:02 +01001995 btrfs_clean_tree_block(mid);
Chris Mason925baed2008-06-25 16:01:30 -04001996 btrfs_tree_unlock(mid);
Tsutomu Itohafe5fea2013-04-16 05:18:22 +00001997 del_ptr(root, path, level + 1, pslot);
Yan, Zhengf0486c62010-05-16 10:46:25 -04001998 root_sub_used(root, mid->len);
Jan Schmidt5581a512012-05-16 17:04:52 +02001999 btrfs_free_tree_block(trans, root, mid, 0, 1);
Josef Bacik3083ee22012-03-09 16:01:49 -05002000 free_extent_buffer_stale(mid);
Yan, Zhengf0486c62010-05-16 10:46:25 -04002001 mid = NULL;
Chris Mason79f95c82007-03-01 15:16:26 -05002002 } else {
2003 /* update the parent key to reflect our changes */
Chris Mason5f39d392007-10-15 16:14:19 -04002004 struct btrfs_disk_key mid_key;
2005 btrfs_node_key(mid, &mid_key, 0);
David Sterba0e82bcf2018-03-05 16:16:54 +01002006 ret = tree_mod_log_insert_key(parent, pslot,
2007 MOD_LOG_KEY_REPLACE, GFP_NOFS);
2008 BUG_ON(ret < 0);
Chris Mason5f39d392007-10-15 16:14:19 -04002009 btrfs_set_node_key(parent, &mid_key, pslot);
2010 btrfs_mark_buffer_dirty(parent);
Chris Mason79f95c82007-03-01 15:16:26 -05002011 }
Chris Masonbb803952007-03-01 12:04:21 -05002012
Chris Mason79f95c82007-03-01 15:16:26 -05002013 /* update the path */
Chris Mason5f39d392007-10-15 16:14:19 -04002014 if (left) {
2015 if (btrfs_header_nritems(left) > orig_slot) {
David Sterba67439da2019-10-08 13:28:47 +02002016 atomic_inc(&left->refs);
Chris Mason925baed2008-06-25 16:01:30 -04002017 /* left was locked after cow */
Chris Mason5f39d392007-10-15 16:14:19 -04002018 path->nodes[level] = left;
Chris Masonbb803952007-03-01 12:04:21 -05002019 path->slots[level + 1] -= 1;
2020 path->slots[level] = orig_slot;
Chris Mason925baed2008-06-25 16:01:30 -04002021 if (mid) {
2022 btrfs_tree_unlock(mid);
Chris Mason5f39d392007-10-15 16:14:19 -04002023 free_extent_buffer(mid);
Chris Mason925baed2008-06-25 16:01:30 -04002024 }
Chris Masonbb803952007-03-01 12:04:21 -05002025 } else {
Chris Mason5f39d392007-10-15 16:14:19 -04002026 orig_slot -= btrfs_header_nritems(left);
Chris Masonbb803952007-03-01 12:04:21 -05002027 path->slots[level] = orig_slot;
2028 }
2029 }
Chris Mason79f95c82007-03-01 15:16:26 -05002030 /* double check we haven't messed things up */
Chris Masone20d96d2007-03-22 12:13:20 -04002031 if (orig_ptr !=
Chris Mason5f39d392007-10-15 16:14:19 -04002032 btrfs_node_blockptr(path->nodes[level], path->slots[level]))
Chris Mason79f95c82007-03-01 15:16:26 -05002033 BUG();
Chris Mason54aa1f42007-06-22 14:16:25 -04002034enospc:
Chris Mason925baed2008-06-25 16:01:30 -04002035 if (right) {
2036 btrfs_tree_unlock(right);
Chris Mason5f39d392007-10-15 16:14:19 -04002037 free_extent_buffer(right);
Chris Mason925baed2008-06-25 16:01:30 -04002038 }
2039 if (left) {
2040 if (path->nodes[level] != left)
2041 btrfs_tree_unlock(left);
Chris Mason5f39d392007-10-15 16:14:19 -04002042 free_extent_buffer(left);
Chris Mason925baed2008-06-25 16:01:30 -04002043 }
Chris Masonbb803952007-03-01 12:04:21 -05002044 return ret;
2045}
2046
Chris Masond352ac62008-09-29 15:18:18 -04002047/* Node balancing for insertion. Here we only split or push nodes around
2048 * when they are completely full. This is also done top down, so we
2049 * have to be pessimistic.
2050 */
Chris Masond3977122009-01-05 21:25:51 -05002051static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
Chris Mason98ed5172008-01-03 10:01:48 -05002052 struct btrfs_root *root,
2053 struct btrfs_path *path, int level)
Chris Masone66f7092007-04-20 13:16:02 -04002054{
Jeff Mahoney0b246af2016-06-22 18:54:23 -04002055 struct btrfs_fs_info *fs_info = root->fs_info;
Chris Mason5f39d392007-10-15 16:14:19 -04002056 struct extent_buffer *right = NULL;
2057 struct extent_buffer *mid;
2058 struct extent_buffer *left = NULL;
2059 struct extent_buffer *parent = NULL;
Chris Masone66f7092007-04-20 13:16:02 -04002060 int ret = 0;
2061 int wret;
2062 int pslot;
2063 int orig_slot = path->slots[level];
Chris Masone66f7092007-04-20 13:16:02 -04002064
2065 if (level == 0)
2066 return 1;
2067
Chris Mason5f39d392007-10-15 16:14:19 -04002068 mid = path->nodes[level];
Chris Mason7bb86312007-12-11 09:25:06 -05002069 WARN_ON(btrfs_header_generation(mid) != trans->transid);
Chris Masone66f7092007-04-20 13:16:02 -04002070
Li Zefana05a9bb2011-09-06 16:55:34 +08002071 if (level < BTRFS_MAX_LEVEL - 1) {
Chris Mason5f39d392007-10-15 16:14:19 -04002072 parent = path->nodes[level + 1];
Li Zefana05a9bb2011-09-06 16:55:34 +08002073 pslot = path->slots[level + 1];
2074 }
Chris Masone66f7092007-04-20 13:16:02 -04002075
Chris Mason5f39d392007-10-15 16:14:19 -04002076 if (!parent)
Chris Masone66f7092007-04-20 13:16:02 -04002077 return 1;
Chris Masone66f7092007-04-20 13:16:02 -04002078
David Sterba4b231ae2019-08-21 19:16:27 +02002079 left = btrfs_read_node_slot(parent, pslot - 1);
Liu Bofb770ae2016-07-05 12:10:14 -07002080 if (IS_ERR(left))
2081 left = NULL;
Chris Masone66f7092007-04-20 13:16:02 -04002082
2083 /* first, try to make some room in the middle buffer */
Chris Mason5f39d392007-10-15 16:14:19 -04002084 if (left) {
Chris Masone66f7092007-04-20 13:16:02 -04002085 u32 left_nr;
Chris Mason925baed2008-06-25 16:01:30 -04002086
Josef Bacikbf774672020-08-20 11:46:04 -04002087 __btrfs_tree_lock(left, BTRFS_NESTING_LEFT);
David Sterba8bead252018-04-04 02:03:48 +02002088 btrfs_set_lock_blocking_write(left);
Chris Masonb4ce94d2009-02-04 09:25:08 -05002089
Chris Mason5f39d392007-10-15 16:14:19 -04002090 left_nr = btrfs_header_nritems(left);
Jeff Mahoney0b246af2016-06-22 18:54:23 -04002091 if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(fs_info) - 1) {
Chris Mason33ade1f2007-04-20 13:48:57 -04002092 wret = 1;
2093 } else {
Chris Mason5f39d392007-10-15 16:14:19 -04002094 ret = btrfs_cow_block(trans, root, left, parent,
Josef Bacik9631e4c2020-08-20 11:46:03 -04002095 pslot - 1, &left,
Josef Bacikbf59a5a2020-08-20 11:46:05 -04002096 BTRFS_NESTING_LEFT_COW);
Chris Mason54aa1f42007-06-22 14:16:25 -04002097 if (ret)
2098 wret = 1;
2099 else {
David Sterbad30a6682019-03-20 14:16:45 +01002100 wret = push_node_left(trans, left, mid, 0);
Chris Mason54aa1f42007-06-22 14:16:25 -04002101 }
Chris Mason33ade1f2007-04-20 13:48:57 -04002102 }
Chris Masone66f7092007-04-20 13:16:02 -04002103 if (wret < 0)
2104 ret = wret;
2105 if (wret == 0) {
Chris Mason5f39d392007-10-15 16:14:19 -04002106 struct btrfs_disk_key disk_key;
Chris Masone66f7092007-04-20 13:16:02 -04002107 orig_slot += left_nr;
Chris Mason5f39d392007-10-15 16:14:19 -04002108 btrfs_node_key(mid, &disk_key, 0);
David Sterba0e82bcf2018-03-05 16:16:54 +01002109 ret = tree_mod_log_insert_key(parent, pslot,
2110 MOD_LOG_KEY_REPLACE, GFP_NOFS);
2111 BUG_ON(ret < 0);
Chris Mason5f39d392007-10-15 16:14:19 -04002112 btrfs_set_node_key(parent, &disk_key, pslot);
2113 btrfs_mark_buffer_dirty(parent);
2114 if (btrfs_header_nritems(left) > orig_slot) {
2115 path->nodes[level] = left;
Chris Masone66f7092007-04-20 13:16:02 -04002116 path->slots[level + 1] -= 1;
2117 path->slots[level] = orig_slot;
Chris Mason925baed2008-06-25 16:01:30 -04002118 btrfs_tree_unlock(mid);
Chris Mason5f39d392007-10-15 16:14:19 -04002119 free_extent_buffer(mid);
Chris Masone66f7092007-04-20 13:16:02 -04002120 } else {
2121 orig_slot -=
Chris Mason5f39d392007-10-15 16:14:19 -04002122 btrfs_header_nritems(left);
Chris Masone66f7092007-04-20 13:16:02 -04002123 path->slots[level] = orig_slot;
Chris Mason925baed2008-06-25 16:01:30 -04002124 btrfs_tree_unlock(left);
Chris Mason5f39d392007-10-15 16:14:19 -04002125 free_extent_buffer(left);
Chris Masone66f7092007-04-20 13:16:02 -04002126 }
Chris Masone66f7092007-04-20 13:16:02 -04002127 return 0;
2128 }
Chris Mason925baed2008-06-25 16:01:30 -04002129 btrfs_tree_unlock(left);
Chris Mason5f39d392007-10-15 16:14:19 -04002130 free_extent_buffer(left);
Chris Masone66f7092007-04-20 13:16:02 -04002131 }
David Sterba4b231ae2019-08-21 19:16:27 +02002132 right = btrfs_read_node_slot(parent, pslot + 1);
Liu Bofb770ae2016-07-05 12:10:14 -07002133 if (IS_ERR(right))
2134 right = NULL;
Chris Masone66f7092007-04-20 13:16:02 -04002135
2136 /*
2137 * then try to empty the right most buffer into the middle
2138 */
Chris Mason5f39d392007-10-15 16:14:19 -04002139 if (right) {
Chris Mason33ade1f2007-04-20 13:48:57 -04002140 u32 right_nr;
Chris Masonb4ce94d2009-02-04 09:25:08 -05002141
Josef Bacikbf774672020-08-20 11:46:04 -04002142 __btrfs_tree_lock(right, BTRFS_NESTING_RIGHT);
David Sterba8bead252018-04-04 02:03:48 +02002143 btrfs_set_lock_blocking_write(right);
Chris Masonb4ce94d2009-02-04 09:25:08 -05002144
Chris Mason5f39d392007-10-15 16:14:19 -04002145 right_nr = btrfs_header_nritems(right);
Jeff Mahoney0b246af2016-06-22 18:54:23 -04002146 if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(fs_info) - 1) {
Chris Mason33ade1f2007-04-20 13:48:57 -04002147 wret = 1;
2148 } else {
Chris Mason5f39d392007-10-15 16:14:19 -04002149 ret = btrfs_cow_block(trans, root, right,
2150 parent, pslot + 1,
Josef Bacikbf59a5a2020-08-20 11:46:05 -04002151 &right, BTRFS_NESTING_RIGHT_COW);
Chris Mason54aa1f42007-06-22 14:16:25 -04002152 if (ret)
2153 wret = 1;
2154 else {
David Sterba55d32ed2019-03-20 14:18:06 +01002155 wret = balance_node_right(trans, right, mid);
Chris Mason54aa1f42007-06-22 14:16:25 -04002156 }
Chris Mason33ade1f2007-04-20 13:48:57 -04002157 }
Chris Masone66f7092007-04-20 13:16:02 -04002158 if (wret < 0)
2159 ret = wret;
2160 if (wret == 0) {
Chris Mason5f39d392007-10-15 16:14:19 -04002161 struct btrfs_disk_key disk_key;
2162
2163 btrfs_node_key(right, &disk_key, 0);
David Sterba0e82bcf2018-03-05 16:16:54 +01002164 ret = tree_mod_log_insert_key(parent, pslot + 1,
2165 MOD_LOG_KEY_REPLACE, GFP_NOFS);
2166 BUG_ON(ret < 0);
Chris Mason5f39d392007-10-15 16:14:19 -04002167 btrfs_set_node_key(parent, &disk_key, pslot + 1);
2168 btrfs_mark_buffer_dirty(parent);
2169
2170 if (btrfs_header_nritems(mid) <= orig_slot) {
2171 path->nodes[level] = right;
Chris Masone66f7092007-04-20 13:16:02 -04002172 path->slots[level + 1] += 1;
2173 path->slots[level] = orig_slot -
Chris Mason5f39d392007-10-15 16:14:19 -04002174 btrfs_header_nritems(mid);
Chris Mason925baed2008-06-25 16:01:30 -04002175 btrfs_tree_unlock(mid);
Chris Mason5f39d392007-10-15 16:14:19 -04002176 free_extent_buffer(mid);
Chris Masone66f7092007-04-20 13:16:02 -04002177 } else {
Chris Mason925baed2008-06-25 16:01:30 -04002178 btrfs_tree_unlock(right);
Chris Mason5f39d392007-10-15 16:14:19 -04002179 free_extent_buffer(right);
Chris Masone66f7092007-04-20 13:16:02 -04002180 }
Chris Masone66f7092007-04-20 13:16:02 -04002181 return 0;
2182 }
Chris Mason925baed2008-06-25 16:01:30 -04002183 btrfs_tree_unlock(right);
Chris Mason5f39d392007-10-15 16:14:19 -04002184 free_extent_buffer(right);
Chris Masone66f7092007-04-20 13:16:02 -04002185 }
Chris Masone66f7092007-04-20 13:16:02 -04002186 return 1;
2187}
2188
Chris Mason74123bd2007-02-02 11:05:29 -05002189/*
Chris Masond352ac62008-09-29 15:18:18 -04002190 * readahead one full node of leaves, finding things that are close
2191 * to the block in 'slot', and triggering ra on them.
Chris Mason3c69fae2007-08-07 15:52:22 -04002192 */
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04002193static void reada_for_search(struct btrfs_fs_info *fs_info,
Chris Masonc8c42862009-04-03 10:14:18 -04002194 struct btrfs_path *path,
2195 int level, int slot, u64 objectid)
Chris Mason3c69fae2007-08-07 15:52:22 -04002196{
Chris Mason5f39d392007-10-15 16:14:19 -04002197 struct extent_buffer *node;
Chris Mason01f46652007-12-21 16:24:26 -05002198 struct btrfs_disk_key disk_key;
Chris Mason3c69fae2007-08-07 15:52:22 -04002199 u32 nritems;
Chris Mason3c69fae2007-08-07 15:52:22 -04002200 u64 search;
Chris Masona7175312009-01-22 09:23:10 -05002201 u64 target;
Chris Mason6b800532007-10-15 16:17:34 -04002202 u64 nread = 0;
Chris Mason5f39d392007-10-15 16:14:19 -04002203 struct extent_buffer *eb;
Chris Mason6b800532007-10-15 16:17:34 -04002204 u32 nr;
2205 u32 blocksize;
2206 u32 nscan = 0;
Chris Masondb945352007-10-15 16:15:53 -04002207
Chris Masona6b6e752007-10-15 16:22:39 -04002208 if (level != 1)
Chris Mason3c69fae2007-08-07 15:52:22 -04002209 return;
2210
Chris Mason6702ed42007-08-07 16:15:09 -04002211 if (!path->nodes[level])
2212 return;
2213
Chris Mason5f39d392007-10-15 16:14:19 -04002214 node = path->nodes[level];
Chris Mason925baed2008-06-25 16:01:30 -04002215
Chris Mason3c69fae2007-08-07 15:52:22 -04002216 search = btrfs_node_blockptr(node, slot);
Jeff Mahoney0b246af2016-06-22 18:54:23 -04002217 blocksize = fs_info->nodesize;
2218 eb = find_extent_buffer(fs_info, search);
Chris Mason5f39d392007-10-15 16:14:19 -04002219 if (eb) {
2220 free_extent_buffer(eb);
Chris Mason3c69fae2007-08-07 15:52:22 -04002221 return;
2222 }
2223
Chris Masona7175312009-01-22 09:23:10 -05002224 target = search;
Chris Mason6b800532007-10-15 16:17:34 -04002225
Chris Mason5f39d392007-10-15 16:14:19 -04002226 nritems = btrfs_header_nritems(node);
Chris Mason6b800532007-10-15 16:17:34 -04002227 nr = slot;
Josef Bacik25b8b932011-06-08 14:36:54 -04002228
Chris Masond3977122009-01-05 21:25:51 -05002229 while (1) {
David Sterbae4058b52015-11-27 16:31:35 +01002230 if (path->reada == READA_BACK) {
Chris Mason6b800532007-10-15 16:17:34 -04002231 if (nr == 0)
2232 break;
2233 nr--;
David Sterbae4058b52015-11-27 16:31:35 +01002234 } else if (path->reada == READA_FORWARD) {
Chris Mason6b800532007-10-15 16:17:34 -04002235 nr++;
2236 if (nr >= nritems)
2237 break;
Chris Mason3c69fae2007-08-07 15:52:22 -04002238 }
David Sterbae4058b52015-11-27 16:31:35 +01002239 if (path->reada == READA_BACK && objectid) {
Chris Mason01f46652007-12-21 16:24:26 -05002240 btrfs_node_key(node, &disk_key, nr);
2241 if (btrfs_disk_key_objectid(&disk_key) != objectid)
2242 break;
2243 }
Chris Mason6b800532007-10-15 16:17:34 -04002244 search = btrfs_node_blockptr(node, nr);
Chris Masona7175312009-01-22 09:23:10 -05002245 if ((search <= target && target - search <= 65536) ||
2246 (search > target && search - target <= 65536)) {
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04002247 readahead_tree_block(fs_info, search);
Chris Mason6b800532007-10-15 16:17:34 -04002248 nread += blocksize;
2249 }
2250 nscan++;
Chris Masona7175312009-01-22 09:23:10 -05002251 if ((nread > 65536 || nscan > 32))
Chris Mason6b800532007-10-15 16:17:34 -04002252 break;
Chris Mason3c69fae2007-08-07 15:52:22 -04002253 }
2254}
Chris Mason925baed2008-06-25 16:01:30 -04002255
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04002256static noinline void reada_for_balance(struct btrfs_fs_info *fs_info,
Josef Bacik0b088512013-06-17 14:23:02 -04002257 struct btrfs_path *path, int level)
Chris Masonb4ce94d2009-02-04 09:25:08 -05002258{
2259 int slot;
2260 int nritems;
2261 struct extent_buffer *parent;
2262 struct extent_buffer *eb;
2263 u64 gen;
2264 u64 block1 = 0;
2265 u64 block2 = 0;
Chris Masonb4ce94d2009-02-04 09:25:08 -05002266
Chris Mason8c594ea2009-04-20 15:50:10 -04002267 parent = path->nodes[level + 1];
Chris Masonb4ce94d2009-02-04 09:25:08 -05002268 if (!parent)
Josef Bacik0b088512013-06-17 14:23:02 -04002269 return;
Chris Masonb4ce94d2009-02-04 09:25:08 -05002270
2271 nritems = btrfs_header_nritems(parent);
Chris Mason8c594ea2009-04-20 15:50:10 -04002272 slot = path->slots[level + 1];
Chris Masonb4ce94d2009-02-04 09:25:08 -05002273
2274 if (slot > 0) {
2275 block1 = btrfs_node_blockptr(parent, slot - 1);
2276 gen = btrfs_node_ptr_generation(parent, slot - 1);
Jeff Mahoney0b246af2016-06-22 18:54:23 -04002277 eb = find_extent_buffer(fs_info, block1);
Chris Masonb9fab912012-05-06 07:23:47 -04002278 /*
2279 * if we get -eagain from btrfs_buffer_uptodate, we
2280 * don't want to return eagain here. That will loop
2281 * forever
2282 */
2283 if (eb && btrfs_buffer_uptodate(eb, gen, 1) != 0)
Chris Masonb4ce94d2009-02-04 09:25:08 -05002284 block1 = 0;
2285 free_extent_buffer(eb);
2286 }
Chris Mason8c594ea2009-04-20 15:50:10 -04002287 if (slot + 1 < nritems) {
Chris Masonb4ce94d2009-02-04 09:25:08 -05002288 block2 = btrfs_node_blockptr(parent, slot + 1);
2289 gen = btrfs_node_ptr_generation(parent, slot + 1);
Jeff Mahoney0b246af2016-06-22 18:54:23 -04002290 eb = find_extent_buffer(fs_info, block2);
Chris Masonb9fab912012-05-06 07:23:47 -04002291 if (eb && btrfs_buffer_uptodate(eb, gen, 1) != 0)
Chris Masonb4ce94d2009-02-04 09:25:08 -05002292 block2 = 0;
2293 free_extent_buffer(eb);
2294 }
Chris Mason8c594ea2009-04-20 15:50:10 -04002295
Josef Bacik0b088512013-06-17 14:23:02 -04002296 if (block1)
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04002297 readahead_tree_block(fs_info, block1);
Josef Bacik0b088512013-06-17 14:23:02 -04002298 if (block2)
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04002299 readahead_tree_block(fs_info, block2);
Chris Masonb4ce94d2009-02-04 09:25:08 -05002300}
2301
2302
2303/*
Chris Masond3977122009-01-05 21:25:51 -05002304 * when we walk down the tree, it is usually safe to unlock the higher layers
2305 * in the tree. The exceptions are when our path goes through slot 0, because
2306 * operations on the tree might require changing key pointers higher up in the
2307 * tree.
Chris Masond352ac62008-09-29 15:18:18 -04002308 *
Chris Masond3977122009-01-05 21:25:51 -05002309 * callers might also have set path->keep_locks, which tells this code to keep
2310 * the lock if the path points to the last slot in the block. This is part of
2311 * walking through the tree, and selecting the next slot in the higher block.
Chris Masond352ac62008-09-29 15:18:18 -04002312 *
Chris Masond3977122009-01-05 21:25:51 -05002313 * lowest_unlock sets the lowest level in the tree we're allowed to unlock. so
2314 * if lowest_unlock is 1, level 0 won't be unlocked
Chris Masond352ac62008-09-29 15:18:18 -04002315 */
Chris Masone02119d2008-09-05 16:13:11 -04002316static noinline void unlock_up(struct btrfs_path *path, int level,
Chris Masonf7c79f32012-03-19 15:54:38 -04002317 int lowest_unlock, int min_write_lock_level,
2318 int *write_lock_level)
Chris Mason925baed2008-06-25 16:01:30 -04002319{
2320 int i;
2321 int skip_level = level;
Chris Mason051e1b92008-06-25 16:01:30 -04002322 int no_skips = 0;
Chris Mason925baed2008-06-25 16:01:30 -04002323 struct extent_buffer *t;
2324
2325 for (i = level; i < BTRFS_MAX_LEVEL; i++) {
2326 if (!path->nodes[i])
2327 break;
2328 if (!path->locks[i])
2329 break;
Chris Mason051e1b92008-06-25 16:01:30 -04002330 if (!no_skips && path->slots[i] == 0) {
Chris Mason925baed2008-06-25 16:01:30 -04002331 skip_level = i + 1;
2332 continue;
2333 }
Chris Mason051e1b92008-06-25 16:01:30 -04002334 if (!no_skips && path->keep_locks) {
Chris Mason925baed2008-06-25 16:01:30 -04002335 u32 nritems;
2336 t = path->nodes[i];
2337 nritems = btrfs_header_nritems(t);
Chris Mason051e1b92008-06-25 16:01:30 -04002338 if (nritems < 1 || path->slots[i] >= nritems - 1) {
Chris Mason925baed2008-06-25 16:01:30 -04002339 skip_level = i + 1;
2340 continue;
2341 }
2342 }
Chris Mason051e1b92008-06-25 16:01:30 -04002343 if (skip_level < i && i >= lowest_unlock)
2344 no_skips = 1;
2345
Chris Mason925baed2008-06-25 16:01:30 -04002346 t = path->nodes[i];
Liu Bod80bb3f2018-05-18 11:00:24 +08002347 if (i >= lowest_unlock && i > skip_level) {
Chris Masonbd681512011-07-16 15:23:14 -04002348 btrfs_tree_unlock_rw(t, path->locks[i]);
Chris Mason925baed2008-06-25 16:01:30 -04002349 path->locks[i] = 0;
Chris Masonf7c79f32012-03-19 15:54:38 -04002350 if (write_lock_level &&
2351 i > min_write_lock_level &&
2352 i <= *write_lock_level) {
2353 *write_lock_level = i - 1;
2354 }
Chris Mason925baed2008-06-25 16:01:30 -04002355 }
2356 }
2357}
2358
Chris Mason3c69fae2007-08-07 15:52:22 -04002359/*
Chris Masonc8c42862009-04-03 10:14:18 -04002360 * helper function for btrfs_search_slot. The goal is to find a block
2361 * in cache without setting the path to blocking. If we find the block
2362 * we return zero and the path is unchanged.
2363 *
2364 * If we can't find the block, we set the path blocking and do some
2365 * reada. -EAGAIN is returned and the search must be repeated.
2366 */
2367static int
Liu Bod07b8522017-01-30 12:23:42 -08002368read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
2369 struct extent_buffer **eb_ret, int level, int slot,
David Sterbacda79c52017-02-10 18:44:32 +01002370 const struct btrfs_key *key)
Chris Masonc8c42862009-04-03 10:14:18 -04002371{
Jeff Mahoney0b246af2016-06-22 18:54:23 -04002372 struct btrfs_fs_info *fs_info = root->fs_info;
Chris Masonc8c42862009-04-03 10:14:18 -04002373 u64 blocknr;
2374 u64 gen;
Chris Masonc8c42862009-04-03 10:14:18 -04002375 struct extent_buffer *tmp;
Qu Wenruo581c1762018-03-29 09:08:11 +08002376 struct btrfs_key first_key;
Chris Mason76a05b32009-05-14 13:24:30 -04002377 int ret;
Qu Wenruo581c1762018-03-29 09:08:11 +08002378 int parent_level;
Chris Masonc8c42862009-04-03 10:14:18 -04002379
Nikolay Borisov213ff4b2020-05-27 13:10:59 +03002380 blocknr = btrfs_node_blockptr(*eb_ret, slot);
2381 gen = btrfs_node_ptr_generation(*eb_ret, slot);
2382 parent_level = btrfs_header_level(*eb_ret);
2383 btrfs_node_key_to_cpu(*eb_ret, &first_key, slot);
Chris Masonc8c42862009-04-03 10:14:18 -04002384
Jeff Mahoney0b246af2016-06-22 18:54:23 -04002385 tmp = find_extent_buffer(fs_info, blocknr);
Chris Masoncb449212010-10-24 11:01:27 -04002386 if (tmp) {
Chris Masonb9fab912012-05-06 07:23:47 -04002387 /* first we do an atomic uptodate check */
Josef Bacikbdf7c002013-06-17 13:44:48 -04002388 if (btrfs_buffer_uptodate(tmp, gen, 1) > 0) {
Qu Wenruo448de472019-03-12 17:10:40 +08002389 /*
2390 * Do extra check for first_key, eb can be stale due to
2391 * being cached, read from scrub, or have multiple
2392 * parents (shared tree blocks).
2393 */
David Sterbae064d5e2019-03-20 14:58:13 +01002394 if (btrfs_verify_level_key(tmp,
Qu Wenruo448de472019-03-12 17:10:40 +08002395 parent_level - 1, &first_key, gen)) {
2396 free_extent_buffer(tmp);
2397 return -EUCLEAN;
2398 }
Josef Bacikbdf7c002013-06-17 13:44:48 -04002399 *eb_ret = tmp;
2400 return 0;
Chris Masoncb449212010-10-24 11:01:27 -04002401 }
Josef Bacikbdf7c002013-06-17 13:44:48 -04002402
2403 /* the pages were up to date, but we failed
2404 * the generation number check. Do a full
2405 * read for the generation number that is correct.
2406 * We must do this without dropping locks so
2407 * we can trust our generation number
2408 */
2409 btrfs_set_path_blocking(p);
2410
2411 /* now we're allowed to do a blocking uptodate check */
Qu Wenruo581c1762018-03-29 09:08:11 +08002412 ret = btrfs_read_buffer(tmp, gen, parent_level - 1, &first_key);
Josef Bacikbdf7c002013-06-17 13:44:48 -04002413 if (!ret) {
2414 *eb_ret = tmp;
2415 return 0;
2416 }
2417 free_extent_buffer(tmp);
2418 btrfs_release_path(p);
2419 return -EIO;
Chris Masonc8c42862009-04-03 10:14:18 -04002420 }
2421
2422 /*
2423 * reduce lock contention at high levels
2424 * of the btree by dropping locks before
Chris Mason76a05b32009-05-14 13:24:30 -04002425 * we read. Don't release the lock on the current
2426 * level because we need to walk this node to figure
2427 * out which blocks to read.
Chris Masonc8c42862009-04-03 10:14:18 -04002428 */
Chris Mason8c594ea2009-04-20 15:50:10 -04002429 btrfs_unlock_up_safe(p, level + 1);
2430 btrfs_set_path_blocking(p);
2431
David Sterbae4058b52015-11-27 16:31:35 +01002432 if (p->reada != READA_NONE)
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04002433 reada_for_search(fs_info, p, level, slot, key->objectid);
Chris Masonc8c42862009-04-03 10:14:18 -04002434
Chris Mason76a05b32009-05-14 13:24:30 -04002435 ret = -EAGAIN;
Liu Bo02a33072018-05-16 01:37:36 +08002436 tmp = read_tree_block(fs_info, blocknr, gen, parent_level - 1,
Qu Wenruo581c1762018-03-29 09:08:11 +08002437 &first_key);
Liu Bo64c043d2015-05-25 17:30:15 +08002438 if (!IS_ERR(tmp)) {
Chris Mason76a05b32009-05-14 13:24:30 -04002439 /*
2440 * If the read above didn't mark this buffer up to date,
2441 * it will never end up being up to date. Set ret to EIO now
2442 * and give up so that our caller doesn't loop forever
2443 * on our EAGAINs.
2444 */
Liu Boe6a1d6f2018-05-18 11:00:20 +08002445 if (!extent_buffer_uptodate(tmp))
Chris Mason76a05b32009-05-14 13:24:30 -04002446 ret = -EIO;
Chris Masonc8c42862009-04-03 10:14:18 -04002447 free_extent_buffer(tmp);
Liu Boc871b0f2016-06-06 12:01:23 -07002448 } else {
2449 ret = PTR_ERR(tmp);
Chris Mason76a05b32009-05-14 13:24:30 -04002450 }
Liu Bo02a33072018-05-16 01:37:36 +08002451
2452 btrfs_release_path(p);
Chris Mason76a05b32009-05-14 13:24:30 -04002453 return ret;
Chris Masonc8c42862009-04-03 10:14:18 -04002454}
2455
2456/*
2457 * helper function for btrfs_search_slot. This does all of the checks
2458 * for node-level blocks and does any balancing required based on
2459 * the ins_len.
2460 *
2461 * If no extra work was required, zero is returned. If we had to
2462 * drop the path, -EAGAIN is returned and btrfs_search_slot must
2463 * start over
2464 */
2465static int
2466setup_nodes_for_search(struct btrfs_trans_handle *trans,
2467 struct btrfs_root *root, struct btrfs_path *p,
Chris Masonbd681512011-07-16 15:23:14 -04002468 struct extent_buffer *b, int level, int ins_len,
2469 int *write_lock_level)
Chris Masonc8c42862009-04-03 10:14:18 -04002470{
Jeff Mahoney0b246af2016-06-22 18:54:23 -04002471 struct btrfs_fs_info *fs_info = root->fs_info;
Chris Masonc8c42862009-04-03 10:14:18 -04002472 int ret;
Jeff Mahoney0b246af2016-06-22 18:54:23 -04002473
Chris Masonc8c42862009-04-03 10:14:18 -04002474 if ((p->search_for_split || ins_len > 0) && btrfs_header_nritems(b) >=
Jeff Mahoney0b246af2016-06-22 18:54:23 -04002475 BTRFS_NODEPTRS_PER_BLOCK(fs_info) - 3) {
Chris Masonc8c42862009-04-03 10:14:18 -04002476 int sret;
2477
Chris Masonbd681512011-07-16 15:23:14 -04002478 if (*write_lock_level < level + 1) {
2479 *write_lock_level = level + 1;
2480 btrfs_release_path(p);
2481 goto again;
2482 }
2483
Chris Masonc8c42862009-04-03 10:14:18 -04002484 btrfs_set_path_blocking(p);
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04002485 reada_for_balance(fs_info, p, level);
Chris Masonc8c42862009-04-03 10:14:18 -04002486 sret = split_node(trans, root, p, level);
Chris Masonc8c42862009-04-03 10:14:18 -04002487
2488 BUG_ON(sret > 0);
2489 if (sret) {
2490 ret = sret;
2491 goto done;
2492 }
2493 b = p->nodes[level];
2494 } else if (ins_len < 0 && btrfs_header_nritems(b) <
Jeff Mahoney0b246af2016-06-22 18:54:23 -04002495 BTRFS_NODEPTRS_PER_BLOCK(fs_info) / 2) {
Chris Masonc8c42862009-04-03 10:14:18 -04002496 int sret;
2497
Chris Masonbd681512011-07-16 15:23:14 -04002498 if (*write_lock_level < level + 1) {
2499 *write_lock_level = level + 1;
2500 btrfs_release_path(p);
2501 goto again;
2502 }
2503
Chris Masonc8c42862009-04-03 10:14:18 -04002504 btrfs_set_path_blocking(p);
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04002505 reada_for_balance(fs_info, p, level);
Chris Masonc8c42862009-04-03 10:14:18 -04002506 sret = balance_level(trans, root, p, level);
Chris Masonc8c42862009-04-03 10:14:18 -04002507
2508 if (sret) {
2509 ret = sret;
2510 goto done;
2511 }
2512 b = p->nodes[level];
2513 if (!b) {
David Sterbab3b4aa72011-04-21 01:20:15 +02002514 btrfs_release_path(p);
Chris Masonc8c42862009-04-03 10:14:18 -04002515 goto again;
2516 }
2517 BUG_ON(btrfs_header_nritems(b) == 1);
2518 }
2519 return 0;
2520
2521again:
2522 ret = -EAGAIN;
2523done:
2524 return ret;
2525}
2526
David Sterba381cf652015-01-02 18:45:16 +01002527int btrfs_find_item(struct btrfs_root *fs_root, struct btrfs_path *path,
Kelley Nielsene33d5c32013-11-04 19:33:33 -08002528 u64 iobjectid, u64 ioff, u8 key_type,
2529 struct btrfs_key *found_key)
2530{
2531 int ret;
2532 struct btrfs_key key;
2533 struct extent_buffer *eb;
David Sterba381cf652015-01-02 18:45:16 +01002534
2535 ASSERT(path);
David Sterba1d4c08e2015-01-02 19:36:14 +01002536 ASSERT(found_key);
Kelley Nielsene33d5c32013-11-04 19:33:33 -08002537
2538 key.type = key_type;
2539 key.objectid = iobjectid;
2540 key.offset = ioff;
2541
2542 ret = btrfs_search_slot(NULL, fs_root, &key, path, 0, 0);
David Sterba1d4c08e2015-01-02 19:36:14 +01002543 if (ret < 0)
Kelley Nielsene33d5c32013-11-04 19:33:33 -08002544 return ret;
2545
2546 eb = path->nodes[0];
2547 if (ret && path->slots[0] >= btrfs_header_nritems(eb)) {
2548 ret = btrfs_next_leaf(fs_root, path);
2549 if (ret)
2550 return ret;
2551 eb = path->nodes[0];
2552 }
2553
2554 btrfs_item_key_to_cpu(eb, found_key, path->slots[0]);
2555 if (found_key->type != key.type ||
2556 found_key->objectid != key.objectid)
2557 return 1;
2558
2559 return 0;
2560}
2561
Liu Bo1fc28d82018-05-18 11:00:21 +08002562static struct extent_buffer *btrfs_search_slot_get_root(struct btrfs_root *root,
2563 struct btrfs_path *p,
2564 int write_lock_level)
2565{
2566 struct btrfs_fs_info *fs_info = root->fs_info;
2567 struct extent_buffer *b;
2568 int root_lock;
2569 int level = 0;
2570
2571 /* We try very hard to do read locks on the root */
2572 root_lock = BTRFS_READ_LOCK;
2573
2574 if (p->search_commit_root) {
Filipe Mananabe6821f2018-12-11 10:19:45 +00002575 /*
2576 * The commit roots are read only so we always do read locks,
2577 * and we always must hold the commit_root_sem when doing
2578 * searches on them, the only exception is send where we don't
2579 * want to block transaction commits for a long time, so
2580 * we need to clone the commit root in order to avoid races
2581 * with transaction commits that create a snapshot of one of
2582 * the roots used by a send operation.
2583 */
2584 if (p->need_commit_sem) {
Liu Bo1fc28d82018-05-18 11:00:21 +08002585 down_read(&fs_info->commit_root_sem);
Filipe Mananabe6821f2018-12-11 10:19:45 +00002586 b = btrfs_clone_extent_buffer(root->commit_root);
Liu Bo1fc28d82018-05-18 11:00:21 +08002587 up_read(&fs_info->commit_root_sem);
Filipe Mananabe6821f2018-12-11 10:19:45 +00002588 if (!b)
2589 return ERR_PTR(-ENOMEM);
2590
2591 } else {
2592 b = root->commit_root;
David Sterba67439da2019-10-08 13:28:47 +02002593 atomic_inc(&b->refs);
Filipe Mananabe6821f2018-12-11 10:19:45 +00002594 }
2595 level = btrfs_header_level(b);
Liu Bof9ddfd02018-05-29 21:27:06 +08002596 /*
2597 * Ensure that all callers have set skip_locking when
2598 * p->search_commit_root = 1.
2599 */
2600 ASSERT(p->skip_locking == 1);
Liu Bo1fc28d82018-05-18 11:00:21 +08002601
2602 goto out;
2603 }
2604
2605 if (p->skip_locking) {
2606 b = btrfs_root_node(root);
2607 level = btrfs_header_level(b);
2608 goto out;
2609 }
2610
2611 /*
Liu Bo662c6532018-05-18 11:00:23 +08002612 * If the level is set to maximum, we can skip trying to get the read
2613 * lock.
Liu Bo1fc28d82018-05-18 11:00:21 +08002614 */
Liu Bo662c6532018-05-18 11:00:23 +08002615 if (write_lock_level < BTRFS_MAX_LEVEL) {
2616 /*
2617 * We don't know the level of the root node until we actually
2618 * have it read locked
2619 */
Josef Bacik51899412020-08-20 11:46:01 -04002620 b = __btrfs_read_lock_root_node(root, p->recurse);
Liu Bo662c6532018-05-18 11:00:23 +08002621 level = btrfs_header_level(b);
2622 if (level > write_lock_level)
2623 goto out;
Liu Bo1fc28d82018-05-18 11:00:21 +08002624
Liu Bo662c6532018-05-18 11:00:23 +08002625 /* Whoops, must trade for write lock */
2626 btrfs_tree_read_unlock(b);
2627 free_extent_buffer(b);
2628 }
2629
Liu Bo1fc28d82018-05-18 11:00:21 +08002630 b = btrfs_lock_root_node(root);
2631 root_lock = BTRFS_WRITE_LOCK;
2632
2633 /* The level might have changed, check again */
2634 level = btrfs_header_level(b);
2635
2636out:
2637 p->nodes[level] = b;
2638 if (!p->skip_locking)
2639 p->locks[level] = root_lock;
2640 /*
2641 * Callers are responsible for dropping b's references.
2642 */
2643 return b;
2644}
2645
2646
Chris Masonc8c42862009-04-03 10:14:18 -04002647/*
Nikolay Borisov4271ece2017-12-13 09:38:14 +02002648 * btrfs_search_slot - look for a key in a tree and perform necessary
2649 * modifications to preserve tree invariants.
Chris Mason74123bd2007-02-02 11:05:29 -05002650 *
Nikolay Borisov4271ece2017-12-13 09:38:14 +02002651 * @trans: Handle of transaction, used when modifying the tree
2652 * @p: Holds all btree nodes along the search path
2653 * @root: The root node of the tree
2654 * @key: The key we are looking for
2655 * @ins_len: Indicates purpose of search, for inserts it is 1, for
2656 * deletions it's -1. 0 for plain searches
2657 * @cow: boolean should CoW operations be performed. Must always be 1
2658 * when modifying the tree.
Chris Mason97571fd2007-02-24 13:39:08 -05002659 *
Nikolay Borisov4271ece2017-12-13 09:38:14 +02002660 * If @ins_len > 0, nodes and leaves will be split as we walk down the tree.
2661 * If @ins_len < 0, nodes will be merged as we walk down the tree (if possible)
2662 *
2663 * If @key is found, 0 is returned and you can find the item in the leaf level
2664 * of the path (level 0)
2665 *
2666 * If @key isn't found, 1 is returned and the leaf level of the path (level 0)
2667 * points to the slot where it should be inserted
2668 *
2669 * If an error is encountered while searching the tree a negative error number
2670 * is returned
Chris Mason74123bd2007-02-02 11:05:29 -05002671 */
Omar Sandoval310712b2017-01-17 23:24:37 -08002672int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2673 const struct btrfs_key *key, struct btrfs_path *p,
2674 int ins_len, int cow)
Chris Masonbe0e5c02007-01-26 15:51:26 -05002675{
Chris Mason5f39d392007-10-15 16:14:19 -04002676 struct extent_buffer *b;
Chris Masonbe0e5c02007-01-26 15:51:26 -05002677 int slot;
2678 int ret;
Yan Zheng33c66f42009-07-22 09:59:00 -04002679 int err;
Chris Masonbe0e5c02007-01-26 15:51:26 -05002680 int level;
Chris Mason925baed2008-06-25 16:01:30 -04002681 int lowest_unlock = 1;
Chris Masonbd681512011-07-16 15:23:14 -04002682 /* everything at write_lock_level or lower must be write locked */
2683 int write_lock_level = 0;
Chris Mason9f3a7422007-08-07 15:52:19 -04002684 u8 lowest_level = 0;
Chris Masonf7c79f32012-03-19 15:54:38 -04002685 int min_write_lock_level;
Filipe David Borba Mananad7396f02013-08-30 15:46:43 +01002686 int prev_cmp;
Chris Mason9f3a7422007-08-07 15:52:19 -04002687
Chris Mason6702ed42007-08-07 16:15:09 -04002688 lowest_level = p->lowest_level;
Chris Mason323ac952008-10-01 19:05:46 -04002689 WARN_ON(lowest_level && ins_len > 0);
Chris Mason22b0ebd2007-03-30 08:47:31 -04002690 WARN_ON(p->nodes[0] != NULL);
Filipe David Borba Mananaeb653de2013-12-23 11:53:02 +00002691 BUG_ON(!cow && ins_len);
Josef Bacik25179202008-10-29 14:49:05 -04002692
Chris Masonbd681512011-07-16 15:23:14 -04002693 if (ins_len < 0) {
Chris Mason925baed2008-06-25 16:01:30 -04002694 lowest_unlock = 2;
Chris Mason65b51a02008-08-01 15:11:20 -04002695
Chris Masonbd681512011-07-16 15:23:14 -04002696 /* when we are removing items, we might have to go up to level
2697 * two as we update tree pointers Make sure we keep write
2698 * for those levels as well
2699 */
2700 write_lock_level = 2;
2701 } else if (ins_len > 0) {
2702 /*
2703 * for inserting items, make sure we have a write lock on
2704 * level 1 so we can update keys
2705 */
2706 write_lock_level = 1;
2707 }
2708
2709 if (!cow)
2710 write_lock_level = -1;
2711
Josef Bacik09a2a8f92013-04-05 16:51:15 -04002712 if (cow && (p->keep_locks || p->lowest_level))
Chris Masonbd681512011-07-16 15:23:14 -04002713 write_lock_level = BTRFS_MAX_LEVEL;
2714
Chris Masonf7c79f32012-03-19 15:54:38 -04002715 min_write_lock_level = write_lock_level;
2716
Chris Masonbb803952007-03-01 12:04:21 -05002717again:
Filipe David Borba Mananad7396f02013-08-30 15:46:43 +01002718 prev_cmp = -1;
Liu Bo1fc28d82018-05-18 11:00:21 +08002719 b = btrfs_search_slot_get_root(root, p, write_lock_level);
Filipe Mananabe6821f2018-12-11 10:19:45 +00002720 if (IS_ERR(b)) {
2721 ret = PTR_ERR(b);
2722 goto done;
2723 }
Chris Mason925baed2008-06-25 16:01:30 -04002724
Chris Masoneb60cea2007-02-02 09:18:22 -05002725 while (b) {
Qu Wenruof624d972019-09-10 15:40:17 +08002726 int dec = 0;
2727
Chris Mason5f39d392007-10-15 16:14:19 -04002728 level = btrfs_header_level(b);
Chris Mason65b51a02008-08-01 15:11:20 -04002729
Chris Mason02217ed2007-03-02 16:08:05 -05002730 if (cow) {
Nikolay Borisov9ea2c7c2017-12-12 11:14:49 +02002731 bool last_level = (level == (BTRFS_MAX_LEVEL - 1));
2732
Chris Masonc8c42862009-04-03 10:14:18 -04002733 /*
2734 * if we don't really need to cow this block
2735 * then we don't want to set the path blocking,
2736 * so we test it here
2737 */
Jeff Mahoney64c12922016-06-08 00:36:38 -04002738 if (!should_cow_block(trans, root, b)) {
2739 trans->dirty = true;
Chris Mason65b51a02008-08-01 15:11:20 -04002740 goto cow_done;
Jeff Mahoney64c12922016-06-08 00:36:38 -04002741 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002742
Chris Masonbd681512011-07-16 15:23:14 -04002743 /*
2744 * must have write locks on this node and the
2745 * parent
2746 */
Josef Bacik5124e002012-11-07 13:44:13 -05002747 if (level > write_lock_level ||
2748 (level + 1 > write_lock_level &&
2749 level + 1 < BTRFS_MAX_LEVEL &&
2750 p->nodes[level + 1])) {
Chris Masonbd681512011-07-16 15:23:14 -04002751 write_lock_level = level + 1;
2752 btrfs_release_path(p);
2753 goto again;
2754 }
2755
Filipe Manana160f4082014-07-28 19:37:17 +01002756 btrfs_set_path_blocking(p);
Nikolay Borisov9ea2c7c2017-12-12 11:14:49 +02002757 if (last_level)
2758 err = btrfs_cow_block(trans, root, b, NULL, 0,
Josef Bacik9631e4c2020-08-20 11:46:03 -04002759 &b,
2760 BTRFS_NESTING_COW);
Nikolay Borisov9ea2c7c2017-12-12 11:14:49 +02002761 else
2762 err = btrfs_cow_block(trans, root, b,
2763 p->nodes[level + 1],
Josef Bacik9631e4c2020-08-20 11:46:03 -04002764 p->slots[level + 1], &b,
2765 BTRFS_NESTING_COW);
Yan Zheng33c66f42009-07-22 09:59:00 -04002766 if (err) {
Yan Zheng33c66f42009-07-22 09:59:00 -04002767 ret = err;
Chris Mason65b51a02008-08-01 15:11:20 -04002768 goto done;
Chris Mason54aa1f42007-06-22 14:16:25 -04002769 }
Chris Mason02217ed2007-03-02 16:08:05 -05002770 }
Chris Mason65b51a02008-08-01 15:11:20 -04002771cow_done:
Chris Masoneb60cea2007-02-02 09:18:22 -05002772 p->nodes[level] = b;
Liu Bo52398342018-08-22 05:54:37 +08002773 /*
2774 * Leave path with blocking locks to avoid massive
2775 * lock context switch, this is made on purpose.
2776 */
Chris Masonb4ce94d2009-02-04 09:25:08 -05002777
2778 /*
2779 * we have a lock on b and as long as we aren't changing
2780 * the tree, there is no way to for the items in b to change.
2781 * It is safe to drop the lock on our parent before we
2782 * go through the expensive btree search on b.
2783 *
Filipe David Borba Mananaeb653de2013-12-23 11:53:02 +00002784 * If we're inserting or deleting (ins_len != 0), then we might
2785 * be changing slot zero, which may require changing the parent.
2786 * So, we can't drop the lock until after we know which slot
2787 * we're operating on.
Chris Masonb4ce94d2009-02-04 09:25:08 -05002788 */
Filipe David Borba Mananaeb653de2013-12-23 11:53:02 +00002789 if (!ins_len && !p->keep_locks) {
2790 int u = level + 1;
2791
2792 if (u < BTRFS_MAX_LEVEL && p->locks[u]) {
2793 btrfs_tree_unlock_rw(p->nodes[u], p->locks[u]);
2794 p->locks[u] = 0;
2795 }
2796 }
Chris Masonb4ce94d2009-02-04 09:25:08 -05002797
Nikolay Borisov995e9a12020-05-27 13:10:53 +03002798 /*
2799 * If btrfs_bin_search returns an exact match (prev_cmp == 0)
2800 * we can safely assume the target key will always be in slot 0
2801 * on lower levels due to the invariants BTRFS' btree provides,
2802 * namely that a btrfs_key_ptr entry always points to the
2803 * lowest key in the child node, thus we can skip searching
2804 * lower levels
2805 */
2806 if (prev_cmp == 0) {
2807 slot = 0;
2808 ret = 0;
2809 } else {
2810 ret = btrfs_bin_search(b, key, &slot);
2811 prev_cmp = ret;
2812 if (ret < 0)
2813 goto done;
2814 }
Chris Masonb4ce94d2009-02-04 09:25:08 -05002815
Qu Wenruof624d972019-09-10 15:40:17 +08002816 if (level == 0) {
Chris Masonbe0e5c02007-01-26 15:51:26 -05002817 p->slots[level] = slot;
Yan Zheng87b29b22008-12-17 10:21:48 -05002818 if (ins_len > 0 &&
David Sterbae902baa2019-03-20 14:36:46 +01002819 btrfs_leaf_free_space(b) < ins_len) {
Chris Masonbd681512011-07-16 15:23:14 -04002820 if (write_lock_level < 1) {
2821 write_lock_level = 1;
2822 btrfs_release_path(p);
2823 goto again;
2824 }
2825
Chris Masonb4ce94d2009-02-04 09:25:08 -05002826 btrfs_set_path_blocking(p);
Yan Zheng33c66f42009-07-22 09:59:00 -04002827 err = split_leaf(trans, root, key,
2828 p, ins_len, ret == 0);
Chris Masonb4ce94d2009-02-04 09:25:08 -05002829
Yan Zheng33c66f42009-07-22 09:59:00 -04002830 BUG_ON(err > 0);
2831 if (err) {
2832 ret = err;
Chris Mason65b51a02008-08-01 15:11:20 -04002833 goto done;
2834 }
Chris Mason5c680ed2007-02-22 11:39:13 -05002835 }
Chris Mason459931e2008-12-10 09:10:46 -05002836 if (!p->search_for_split)
Chris Masonf7c79f32012-03-19 15:54:38 -04002837 unlock_up(p, level, lowest_unlock,
Liu Bo4b6f8e92018-08-14 10:46:53 +08002838 min_write_lock_level, NULL);
Chris Mason65b51a02008-08-01 15:11:20 -04002839 goto done;
Chris Masonbe0e5c02007-01-26 15:51:26 -05002840 }
Qu Wenruof624d972019-09-10 15:40:17 +08002841 if (ret && slot > 0) {
2842 dec = 1;
2843 slot--;
2844 }
2845 p->slots[level] = slot;
2846 err = setup_nodes_for_search(trans, root, p, b, level, ins_len,
2847 &write_lock_level);
2848 if (err == -EAGAIN)
2849 goto again;
2850 if (err) {
2851 ret = err;
2852 goto done;
2853 }
2854 b = p->nodes[level];
2855 slot = p->slots[level];
2856
2857 /*
2858 * Slot 0 is special, if we change the key we have to update
2859 * the parent pointer which means we must have a write lock on
2860 * the parent
2861 */
2862 if (slot == 0 && ins_len && write_lock_level < level + 1) {
2863 write_lock_level = level + 1;
2864 btrfs_release_path(p);
2865 goto again;
2866 }
2867
2868 unlock_up(p, level, lowest_unlock, min_write_lock_level,
2869 &write_lock_level);
2870
2871 if (level == lowest_level) {
2872 if (dec)
2873 p->slots[level]++;
2874 goto done;
2875 }
2876
2877 err = read_block_for_search(root, p, &b, level, slot, key);
2878 if (err == -EAGAIN)
2879 goto again;
2880 if (err) {
2881 ret = err;
2882 goto done;
2883 }
2884
2885 if (!p->skip_locking) {
2886 level = btrfs_header_level(b);
2887 if (level <= write_lock_level) {
2888 if (!btrfs_try_tree_write_lock(b)) {
2889 btrfs_set_path_blocking(p);
2890 btrfs_tree_lock(b);
2891 }
2892 p->locks[level] = BTRFS_WRITE_LOCK;
2893 } else {
2894 if (!btrfs_tree_read_lock_atomic(b)) {
2895 btrfs_set_path_blocking(p);
Josef Bacikfd7ba1c2020-08-20 11:46:02 -04002896 __btrfs_tree_read_lock(b, BTRFS_NESTING_NORMAL,
2897 p->recurse);
Qu Wenruof624d972019-09-10 15:40:17 +08002898 }
2899 p->locks[level] = BTRFS_READ_LOCK;
2900 }
2901 p->nodes[level] = b;
2902 }
Chris Masonbe0e5c02007-01-26 15:51:26 -05002903 }
Chris Mason65b51a02008-08-01 15:11:20 -04002904 ret = 1;
2905done:
Chris Masonb4ce94d2009-02-04 09:25:08 -05002906 /*
2907 * we don't really know what they plan on doing with the path
2908 * from here on, so for now just mark it as blocking
2909 */
Chris Masonb9473432009-03-13 11:00:37 -04002910 if (!p->leave_spinning)
2911 btrfs_set_path_blocking(p);
Filipe Manana5f5bc6b2014-11-09 08:38:39 +00002912 if (ret < 0 && !p->skip_release_on_error)
David Sterbab3b4aa72011-04-21 01:20:15 +02002913 btrfs_release_path(p);
Chris Mason65b51a02008-08-01 15:11:20 -04002914 return ret;
Chris Masonbe0e5c02007-01-26 15:51:26 -05002915}
2916
Chris Mason74123bd2007-02-02 11:05:29 -05002917/*
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02002918 * Like btrfs_search_slot, this looks for a key in the given tree. It uses the
2919 * current state of the tree together with the operations recorded in the tree
2920 * modification log to search for the key in a previous version of this tree, as
2921 * denoted by the time_seq parameter.
2922 *
2923 * Naturally, there is no support for insert, delete or cow operations.
2924 *
2925 * The resulting path and return value will be set up as if we called
2926 * btrfs_search_slot at that point in time with ins_len and cow both set to 0.
2927 */
Omar Sandoval310712b2017-01-17 23:24:37 -08002928int btrfs_search_old_slot(struct btrfs_root *root, const struct btrfs_key *key,
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02002929 struct btrfs_path *p, u64 time_seq)
2930{
Jeff Mahoney0b246af2016-06-22 18:54:23 -04002931 struct btrfs_fs_info *fs_info = root->fs_info;
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02002932 struct extent_buffer *b;
2933 int slot;
2934 int ret;
2935 int err;
2936 int level;
2937 int lowest_unlock = 1;
2938 u8 lowest_level = 0;
2939
2940 lowest_level = p->lowest_level;
2941 WARN_ON(p->nodes[0] != NULL);
2942
2943 if (p->search_commit_root) {
2944 BUG_ON(time_seq);
2945 return btrfs_search_slot(NULL, root, key, p, 0, 0);
2946 }
2947
2948again:
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02002949 b = get_old_root(root, time_seq);
Nikolay Borisov315bed42018-09-13 11:35:10 +03002950 if (!b) {
2951 ret = -EIO;
2952 goto done;
2953 }
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02002954 level = btrfs_header_level(b);
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02002955 p->locks[level] = BTRFS_READ_LOCK;
2956
2957 while (b) {
Qu Wenruoabe93392019-09-10 15:40:18 +08002958 int dec = 0;
2959
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02002960 level = btrfs_header_level(b);
2961 p->nodes[level] = b;
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02002962
2963 /*
2964 * we have a lock on b and as long as we aren't changing
2965 * the tree, there is no way to for the items in b to change.
2966 * It is safe to drop the lock on our parent before we
2967 * go through the expensive btree search on b.
2968 */
2969 btrfs_unlock_up_safe(p, level + 1);
2970
Nikolay Borisov995e9a12020-05-27 13:10:53 +03002971 ret = btrfs_bin_search(b, key, &slot);
Filipe Mananacbca7d52019-02-18 16:57:26 +00002972 if (ret < 0)
2973 goto done;
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02002974
Qu Wenruoabe93392019-09-10 15:40:18 +08002975 if (level == 0) {
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02002976 p->slots[level] = slot;
2977 unlock_up(p, level, lowest_unlock, 0, NULL);
2978 goto done;
2979 }
Qu Wenruoabe93392019-09-10 15:40:18 +08002980
2981 if (ret && slot > 0) {
2982 dec = 1;
2983 slot--;
2984 }
2985 p->slots[level] = slot;
2986 unlock_up(p, level, lowest_unlock, 0, NULL);
2987
2988 if (level == lowest_level) {
2989 if (dec)
2990 p->slots[level]++;
2991 goto done;
2992 }
2993
2994 err = read_block_for_search(root, p, &b, level, slot, key);
2995 if (err == -EAGAIN)
2996 goto again;
2997 if (err) {
2998 ret = err;
2999 goto done;
3000 }
3001
3002 level = btrfs_header_level(b);
3003 if (!btrfs_tree_read_lock_atomic(b)) {
3004 btrfs_set_path_blocking(p);
3005 btrfs_tree_read_lock(b);
3006 }
3007 b = tree_mod_log_rewind(fs_info, p, b, time_seq);
3008 if (!b) {
3009 ret = -ENOMEM;
3010 goto done;
3011 }
3012 p->locks[level] = BTRFS_READ_LOCK;
3013 p->nodes[level] = b;
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02003014 }
3015 ret = 1;
3016done:
3017 if (!p->leave_spinning)
3018 btrfs_set_path_blocking(p);
3019 if (ret < 0)
3020 btrfs_release_path(p);
3021
3022 return ret;
3023}
3024
3025/*
Arne Jansen2f38b3e2011-09-13 11:18:10 +02003026 * helper to use instead of search slot if no exact match is needed but
3027 * instead the next or previous item should be returned.
3028 * When find_higher is true, the next higher item is returned, the next lower
3029 * otherwise.
3030 * When return_any and find_higher are both true, and no higher item is found,
3031 * return the next lower instead.
3032 * When return_any is true and find_higher is false, and no lower item is found,
3033 * return the next higher instead.
3034 * It returns 0 if any item is found, 1 if none is found (tree empty), and
3035 * < 0 on error
3036 */
3037int btrfs_search_slot_for_read(struct btrfs_root *root,
Omar Sandoval310712b2017-01-17 23:24:37 -08003038 const struct btrfs_key *key,
3039 struct btrfs_path *p, int find_higher,
3040 int return_any)
Arne Jansen2f38b3e2011-09-13 11:18:10 +02003041{
3042 int ret;
3043 struct extent_buffer *leaf;
3044
3045again:
3046 ret = btrfs_search_slot(NULL, root, key, p, 0, 0);
3047 if (ret <= 0)
3048 return ret;
3049 /*
3050 * a return value of 1 means the path is at the position where the
3051 * item should be inserted. Normally this is the next bigger item,
3052 * but in case the previous item is the last in a leaf, path points
3053 * to the first free slot in the previous leaf, i.e. at an invalid
3054 * item.
3055 */
3056 leaf = p->nodes[0];
3057
3058 if (find_higher) {
3059 if (p->slots[0] >= btrfs_header_nritems(leaf)) {
3060 ret = btrfs_next_leaf(root, p);
3061 if (ret <= 0)
3062 return ret;
3063 if (!return_any)
3064 return 1;
3065 /*
3066 * no higher item found, return the next
3067 * lower instead
3068 */
3069 return_any = 0;
3070 find_higher = 0;
3071 btrfs_release_path(p);
3072 goto again;
3073 }
3074 } else {
Arne Jansene6793762011-09-13 11:18:10 +02003075 if (p->slots[0] == 0) {
3076 ret = btrfs_prev_leaf(root, p);
3077 if (ret < 0)
3078 return ret;
3079 if (!ret) {
Filipe David Borba Manana23c6bf62014-01-11 21:28:54 +00003080 leaf = p->nodes[0];
3081 if (p->slots[0] == btrfs_header_nritems(leaf))
3082 p->slots[0]--;
Arne Jansene6793762011-09-13 11:18:10 +02003083 return 0;
Arne Jansen2f38b3e2011-09-13 11:18:10 +02003084 }
Arne Jansene6793762011-09-13 11:18:10 +02003085 if (!return_any)
3086 return 1;
3087 /*
3088 * no lower item found, return the next
3089 * higher instead
3090 */
3091 return_any = 0;
3092 find_higher = 1;
3093 btrfs_release_path(p);
3094 goto again;
3095 } else {
Arne Jansen2f38b3e2011-09-13 11:18:10 +02003096 --p->slots[0];
3097 }
3098 }
3099 return 0;
3100}
3101
3102/*
Chris Mason74123bd2007-02-02 11:05:29 -05003103 * adjust the pointers going up the tree, starting at level
3104 * making sure the right key of each node is points to 'key'.
3105 * This is used after shifting pointers to the left, so it stops
3106 * fixing up pointers when a given leaf/node is not in slot 0 of the
3107 * higher levels
Chris Masonaa5d6be2007-02-28 16:35:06 -05003108 *
Chris Mason74123bd2007-02-02 11:05:29 -05003109 */
Nikolay Borisovb167fa92018-06-20 15:48:47 +03003110static void fixup_low_keys(struct btrfs_path *path,
Jeff Mahoney143bede2012-03-01 14:56:26 +01003111 struct btrfs_disk_key *key, int level)
Chris Masonbe0e5c02007-01-26 15:51:26 -05003112{
3113 int i;
Chris Mason5f39d392007-10-15 16:14:19 -04003114 struct extent_buffer *t;
David Sterba0e82bcf2018-03-05 16:16:54 +01003115 int ret;
Chris Mason5f39d392007-10-15 16:14:19 -04003116
Chris Mason234b63a2007-03-13 10:46:10 -04003117 for (i = level; i < BTRFS_MAX_LEVEL; i++) {
Chris Masonbe0e5c02007-01-26 15:51:26 -05003118 int tslot = path->slots[i];
David Sterba0e82bcf2018-03-05 16:16:54 +01003119
Chris Masoneb60cea2007-02-02 09:18:22 -05003120 if (!path->nodes[i])
Chris Masonbe0e5c02007-01-26 15:51:26 -05003121 break;
Chris Mason5f39d392007-10-15 16:14:19 -04003122 t = path->nodes[i];
David Sterba0e82bcf2018-03-05 16:16:54 +01003123 ret = tree_mod_log_insert_key(t, tslot, MOD_LOG_KEY_REPLACE,
3124 GFP_ATOMIC);
3125 BUG_ON(ret < 0);
Chris Mason5f39d392007-10-15 16:14:19 -04003126 btrfs_set_node_key(t, key, tslot);
Chris Masond6025572007-03-30 14:27:56 -04003127 btrfs_mark_buffer_dirty(path->nodes[i]);
Chris Masonbe0e5c02007-01-26 15:51:26 -05003128 if (tslot != 0)
3129 break;
3130 }
3131}
3132
Chris Mason74123bd2007-02-02 11:05:29 -05003133/*
Zheng Yan31840ae2008-09-23 13:14:14 -04003134 * update item key.
3135 *
3136 * This function isn't completely safe. It's the caller's responsibility
3137 * that the new key won't break the order
3138 */
Daniel Dresslerb7a03652014-11-12 13:43:09 +09003139void btrfs_set_item_key_safe(struct btrfs_fs_info *fs_info,
3140 struct btrfs_path *path,
Omar Sandoval310712b2017-01-17 23:24:37 -08003141 const struct btrfs_key *new_key)
Zheng Yan31840ae2008-09-23 13:14:14 -04003142{
3143 struct btrfs_disk_key disk_key;
3144 struct extent_buffer *eb;
3145 int slot;
3146
3147 eb = path->nodes[0];
3148 slot = path->slots[0];
3149 if (slot > 0) {
3150 btrfs_item_key(eb, &disk_key, slot - 1);
Qu Wenruo7c15d412019-04-25 08:55:53 +08003151 if (unlikely(comp_keys(&disk_key, new_key) >= 0)) {
3152 btrfs_crit(fs_info,
3153 "slot %u key (%llu %u %llu) new key (%llu %u %llu)",
3154 slot, btrfs_disk_key_objectid(&disk_key),
3155 btrfs_disk_key_type(&disk_key),
3156 btrfs_disk_key_offset(&disk_key),
3157 new_key->objectid, new_key->type,
3158 new_key->offset);
3159 btrfs_print_leaf(eb);
3160 BUG();
3161 }
Zheng Yan31840ae2008-09-23 13:14:14 -04003162 }
3163 if (slot < btrfs_header_nritems(eb) - 1) {
3164 btrfs_item_key(eb, &disk_key, slot + 1);
Qu Wenruo7c15d412019-04-25 08:55:53 +08003165 if (unlikely(comp_keys(&disk_key, new_key) <= 0)) {
3166 btrfs_crit(fs_info,
3167 "slot %u key (%llu %u %llu) new key (%llu %u %llu)",
3168 slot, btrfs_disk_key_objectid(&disk_key),
3169 btrfs_disk_key_type(&disk_key),
3170 btrfs_disk_key_offset(&disk_key),
3171 new_key->objectid, new_key->type,
3172 new_key->offset);
3173 btrfs_print_leaf(eb);
3174 BUG();
3175 }
Zheng Yan31840ae2008-09-23 13:14:14 -04003176 }
3177
3178 btrfs_cpu_key_to_disk(&disk_key, new_key);
3179 btrfs_set_item_key(eb, &disk_key, slot);
3180 btrfs_mark_buffer_dirty(eb);
3181 if (slot == 0)
Nikolay Borisovb167fa92018-06-20 15:48:47 +03003182 fixup_low_keys(path, &disk_key, 1);
Zheng Yan31840ae2008-09-23 13:14:14 -04003183}
3184
3185/*
Qu Wenruod16c7022020-08-19 14:35:50 +08003186 * Check key order of two sibling extent buffers.
3187 *
3188 * Return true if something is wrong.
3189 * Return false if everything is fine.
3190 *
3191 * Tree-checker only works inside one tree block, thus the following
3192 * corruption can not be detected by tree-checker:
3193 *
3194 * Leaf @left | Leaf @right
3195 * --------------------------------------------------------------
3196 * | 1 | 2 | 3 | 4 | 5 | f6 | | 7 | 8 |
3197 *
3198 * Key f6 in leaf @left itself is valid, but not valid when the next
3199 * key in leaf @right is 7.
3200 * This can only be checked at tree block merge time.
3201 * And since tree checker has ensured all key order in each tree block
3202 * is correct, we only need to bother the last key of @left and the first
3203 * key of @right.
3204 */
3205static bool check_sibling_keys(struct extent_buffer *left,
3206 struct extent_buffer *right)
3207{
3208 struct btrfs_key left_last;
3209 struct btrfs_key right_first;
3210 int level = btrfs_header_level(left);
3211 int nr_left = btrfs_header_nritems(left);
3212 int nr_right = btrfs_header_nritems(right);
3213
3214 /* No key to check in one of the tree blocks */
3215 if (!nr_left || !nr_right)
3216 return false;
3217
3218 if (level) {
3219 btrfs_node_key_to_cpu(left, &left_last, nr_left - 1);
3220 btrfs_node_key_to_cpu(right, &right_first, 0);
3221 } else {
3222 btrfs_item_key_to_cpu(left, &left_last, nr_left - 1);
3223 btrfs_item_key_to_cpu(right, &right_first, 0);
3224 }
3225
3226 if (btrfs_comp_cpu_keys(&left_last, &right_first) >= 0) {
3227 btrfs_crit(left->fs_info,
3228"bad key order, sibling blocks, left last (%llu %u %llu) right first (%llu %u %llu)",
3229 left_last.objectid, left_last.type,
3230 left_last.offset, right_first.objectid,
3231 right_first.type, right_first.offset);
3232 return true;
3233 }
3234 return false;
3235}
3236
3237/*
Chris Mason74123bd2007-02-02 11:05:29 -05003238 * try to push data from one node into the next node left in the
Chris Mason79f95c82007-03-01 15:16:26 -05003239 * tree.
Chris Masonaa5d6be2007-02-28 16:35:06 -05003240 *
3241 * returns 0 if some ptrs were pushed left, < 0 if there was some horrible
3242 * error, and > 0 if there was no room in the left hand block.
Chris Mason74123bd2007-02-02 11:05:29 -05003243 */
Chris Mason98ed5172008-01-03 10:01:48 -05003244static int push_node_left(struct btrfs_trans_handle *trans,
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04003245 struct extent_buffer *dst,
Chris Mason971a1f62008-04-24 10:54:32 -04003246 struct extent_buffer *src, int empty)
Chris Masonbe0e5c02007-01-26 15:51:26 -05003247{
David Sterbad30a6682019-03-20 14:16:45 +01003248 struct btrfs_fs_info *fs_info = trans->fs_info;
Chris Masonbe0e5c02007-01-26 15:51:26 -05003249 int push_items = 0;
Chris Masonbb803952007-03-01 12:04:21 -05003250 int src_nritems;
3251 int dst_nritems;
Chris Masonaa5d6be2007-02-28 16:35:06 -05003252 int ret = 0;
Chris Masonbe0e5c02007-01-26 15:51:26 -05003253
Chris Mason5f39d392007-10-15 16:14:19 -04003254 src_nritems = btrfs_header_nritems(src);
3255 dst_nritems = btrfs_header_nritems(dst);
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003256 push_items = BTRFS_NODEPTRS_PER_BLOCK(fs_info) - dst_nritems;
Chris Mason7bb86312007-12-11 09:25:06 -05003257 WARN_ON(btrfs_header_generation(src) != trans->transid);
3258 WARN_ON(btrfs_header_generation(dst) != trans->transid);
Chris Mason54aa1f42007-06-22 14:16:25 -04003259
Chris Masonbce4eae2008-04-24 14:42:46 -04003260 if (!empty && src_nritems <= 8)
Chris Mason971a1f62008-04-24 10:54:32 -04003261 return 1;
3262
Chris Masond3977122009-01-05 21:25:51 -05003263 if (push_items <= 0)
Chris Masonbe0e5c02007-01-26 15:51:26 -05003264 return 1;
3265
Chris Masonbce4eae2008-04-24 14:42:46 -04003266 if (empty) {
Chris Mason971a1f62008-04-24 10:54:32 -04003267 push_items = min(src_nritems, push_items);
Chris Masonbce4eae2008-04-24 14:42:46 -04003268 if (push_items < src_nritems) {
3269 /* leave at least 8 pointers in the node if
3270 * we aren't going to empty it
3271 */
3272 if (src_nritems - push_items < 8) {
3273 if (push_items <= 8)
3274 return 1;
3275 push_items -= 8;
3276 }
3277 }
3278 } else
3279 push_items = min(src_nritems - 8, push_items);
Chris Mason79f95c82007-03-01 15:16:26 -05003280
Qu Wenruod16c7022020-08-19 14:35:50 +08003281 /* dst is the left eb, src is the middle eb */
3282 if (check_sibling_keys(dst, src)) {
3283 ret = -EUCLEAN;
3284 btrfs_abort_transaction(trans, ret);
3285 return ret;
3286 }
David Sterbaed874f02019-03-20 14:22:04 +01003287 ret = tree_mod_log_eb_copy(dst, src, dst_nritems, 0, push_items);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +00003288 if (ret) {
Jeff Mahoney66642832016-06-10 18:19:25 -04003289 btrfs_abort_transaction(trans, ret);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +00003290 return ret;
3291 }
Chris Mason5f39d392007-10-15 16:14:19 -04003292 copy_extent_buffer(dst, src,
3293 btrfs_node_key_ptr_offset(dst_nritems),
3294 btrfs_node_key_ptr_offset(0),
Chris Masond3977122009-01-05 21:25:51 -05003295 push_items * sizeof(struct btrfs_key_ptr));
Chris Mason5f39d392007-10-15 16:14:19 -04003296
Chris Masonbb803952007-03-01 12:04:21 -05003297 if (push_items < src_nritems) {
Jan Schmidt57911b82012-10-19 09:22:03 +02003298 /*
David Sterbabf1d3422018-03-05 15:47:39 +01003299 * Don't call tree_mod_log_insert_move here, key removal was
3300 * already fully logged by tree_mod_log_eb_copy above.
Jan Schmidt57911b82012-10-19 09:22:03 +02003301 */
Chris Mason5f39d392007-10-15 16:14:19 -04003302 memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0),
3303 btrfs_node_key_ptr_offset(push_items),
3304 (src_nritems - push_items) *
3305 sizeof(struct btrfs_key_ptr));
Chris Masonbb803952007-03-01 12:04:21 -05003306 }
Chris Mason5f39d392007-10-15 16:14:19 -04003307 btrfs_set_header_nritems(src, src_nritems - push_items);
3308 btrfs_set_header_nritems(dst, dst_nritems + push_items);
3309 btrfs_mark_buffer_dirty(src);
3310 btrfs_mark_buffer_dirty(dst);
Zheng Yan31840ae2008-09-23 13:14:14 -04003311
Chris Masonbb803952007-03-01 12:04:21 -05003312 return ret;
Chris Masonbe0e5c02007-01-26 15:51:26 -05003313}
3314
Chris Mason97571fd2007-02-24 13:39:08 -05003315/*
Chris Mason79f95c82007-03-01 15:16:26 -05003316 * try to push data from one node into the next node right in the
3317 * tree.
3318 *
3319 * returns 0 if some ptrs were pushed, < 0 if there was some horrible
3320 * error, and > 0 if there was no room in the right hand block.
3321 *
3322 * this will only push up to 1/2 the contents of the left node over
3323 */
Chris Mason5f39d392007-10-15 16:14:19 -04003324static int balance_node_right(struct btrfs_trans_handle *trans,
Chris Mason5f39d392007-10-15 16:14:19 -04003325 struct extent_buffer *dst,
3326 struct extent_buffer *src)
Chris Mason79f95c82007-03-01 15:16:26 -05003327{
David Sterba55d32ed2019-03-20 14:18:06 +01003328 struct btrfs_fs_info *fs_info = trans->fs_info;
Chris Mason79f95c82007-03-01 15:16:26 -05003329 int push_items = 0;
3330 int max_push;
3331 int src_nritems;
3332 int dst_nritems;
3333 int ret = 0;
Chris Mason79f95c82007-03-01 15:16:26 -05003334
Chris Mason7bb86312007-12-11 09:25:06 -05003335 WARN_ON(btrfs_header_generation(src) != trans->transid);
3336 WARN_ON(btrfs_header_generation(dst) != trans->transid);
3337
Chris Mason5f39d392007-10-15 16:14:19 -04003338 src_nritems = btrfs_header_nritems(src);
3339 dst_nritems = btrfs_header_nritems(dst);
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003340 push_items = BTRFS_NODEPTRS_PER_BLOCK(fs_info) - dst_nritems;
Chris Masond3977122009-01-05 21:25:51 -05003341 if (push_items <= 0)
Chris Mason79f95c82007-03-01 15:16:26 -05003342 return 1;
Chris Masonbce4eae2008-04-24 14:42:46 -04003343
Chris Masond3977122009-01-05 21:25:51 -05003344 if (src_nritems < 4)
Chris Masonbce4eae2008-04-24 14:42:46 -04003345 return 1;
Chris Mason79f95c82007-03-01 15:16:26 -05003346
3347 max_push = src_nritems / 2 + 1;
3348 /* don't try to empty the node */
Chris Masond3977122009-01-05 21:25:51 -05003349 if (max_push >= src_nritems)
Chris Mason79f95c82007-03-01 15:16:26 -05003350 return 1;
Yan252c38f2007-08-29 09:11:44 -04003351
Chris Mason79f95c82007-03-01 15:16:26 -05003352 if (max_push < push_items)
3353 push_items = max_push;
3354
Qu Wenruod16c7022020-08-19 14:35:50 +08003355 /* dst is the right eb, src is the middle eb */
3356 if (check_sibling_keys(src, dst)) {
3357 ret = -EUCLEAN;
3358 btrfs_abort_transaction(trans, ret);
3359 return ret;
3360 }
David Sterbabf1d3422018-03-05 15:47:39 +01003361 ret = tree_mod_log_insert_move(dst, push_items, 0, dst_nritems);
3362 BUG_ON(ret < 0);
Chris Mason5f39d392007-10-15 16:14:19 -04003363 memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(push_items),
3364 btrfs_node_key_ptr_offset(0),
3365 (dst_nritems) *
3366 sizeof(struct btrfs_key_ptr));
Chris Masond6025572007-03-30 14:27:56 -04003367
David Sterbaed874f02019-03-20 14:22:04 +01003368 ret = tree_mod_log_eb_copy(dst, src, 0, src_nritems - push_items,
3369 push_items);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +00003370 if (ret) {
Jeff Mahoney66642832016-06-10 18:19:25 -04003371 btrfs_abort_transaction(trans, ret);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +00003372 return ret;
3373 }
Chris Mason5f39d392007-10-15 16:14:19 -04003374 copy_extent_buffer(dst, src,
3375 btrfs_node_key_ptr_offset(0),
3376 btrfs_node_key_ptr_offset(src_nritems - push_items),
Chris Masond3977122009-01-05 21:25:51 -05003377 push_items * sizeof(struct btrfs_key_ptr));
Chris Mason79f95c82007-03-01 15:16:26 -05003378
Chris Mason5f39d392007-10-15 16:14:19 -04003379 btrfs_set_header_nritems(src, src_nritems - push_items);
3380 btrfs_set_header_nritems(dst, dst_nritems + push_items);
Chris Mason79f95c82007-03-01 15:16:26 -05003381
Chris Mason5f39d392007-10-15 16:14:19 -04003382 btrfs_mark_buffer_dirty(src);
3383 btrfs_mark_buffer_dirty(dst);
Zheng Yan31840ae2008-09-23 13:14:14 -04003384
Chris Mason79f95c82007-03-01 15:16:26 -05003385 return ret;
3386}
3387
3388/*
Chris Mason97571fd2007-02-24 13:39:08 -05003389 * helper function to insert a new root level in the tree.
3390 * A new node is allocated, and a single item is inserted to
3391 * point to the existing root
Chris Masonaa5d6be2007-02-28 16:35:06 -05003392 *
3393 * returns zero on success or < 0 on failure.
Chris Mason97571fd2007-02-24 13:39:08 -05003394 */
Chris Masond3977122009-01-05 21:25:51 -05003395static noinline int insert_new_root(struct btrfs_trans_handle *trans,
Chris Mason5f39d392007-10-15 16:14:19 -04003396 struct btrfs_root *root,
Liu Bofdd99c72013-05-22 12:06:51 +00003397 struct btrfs_path *path, int level)
Chris Mason5c680ed2007-02-22 11:39:13 -05003398{
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003399 struct btrfs_fs_info *fs_info = root->fs_info;
Chris Mason7bb86312007-12-11 09:25:06 -05003400 u64 lower_gen;
Chris Mason5f39d392007-10-15 16:14:19 -04003401 struct extent_buffer *lower;
3402 struct extent_buffer *c;
Chris Mason925baed2008-06-25 16:01:30 -04003403 struct extent_buffer *old;
Chris Mason5f39d392007-10-15 16:14:19 -04003404 struct btrfs_disk_key lower_key;
David Sterbad9d19a02018-03-05 16:35:29 +01003405 int ret;
Chris Mason5c680ed2007-02-22 11:39:13 -05003406
3407 BUG_ON(path->nodes[level]);
3408 BUG_ON(path->nodes[level-1] != root->node);
3409
Chris Mason7bb86312007-12-11 09:25:06 -05003410 lower = path->nodes[level-1];
3411 if (level == 1)
3412 btrfs_item_key(lower, &lower_key, 0);
3413 else
3414 btrfs_node_key(lower, &lower_key, 0);
3415
Filipe Mananaa6279472019-01-25 11:48:51 +00003416 c = alloc_tree_block_no_bg_flush(trans, root, 0, &lower_key, level,
Josef Bacik9631e4c2020-08-20 11:46:03 -04003417 root->node->start, 0,
Josef Bacikcf6f34a2020-08-20 11:46:07 -04003418 BTRFS_NESTING_NEW_ROOT);
Chris Mason5f39d392007-10-15 16:14:19 -04003419 if (IS_ERR(c))
3420 return PTR_ERR(c);
Chris Mason925baed2008-06-25 16:01:30 -04003421
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003422 root_add_used(root, fs_info->nodesize);
Yan, Zhengf0486c62010-05-16 10:46:25 -04003423
Chris Mason5f39d392007-10-15 16:14:19 -04003424 btrfs_set_header_nritems(c, 1);
Chris Mason5f39d392007-10-15 16:14:19 -04003425 btrfs_set_node_key(c, &lower_key, 0);
Chris Masondb945352007-10-15 16:15:53 -04003426 btrfs_set_node_blockptr(c, 0, lower->start);
Chris Mason7bb86312007-12-11 09:25:06 -05003427 lower_gen = btrfs_header_generation(lower);
Zheng Yan31840ae2008-09-23 13:14:14 -04003428 WARN_ON(lower_gen != trans->transid);
Chris Mason7bb86312007-12-11 09:25:06 -05003429
3430 btrfs_set_node_ptr_generation(c, 0, lower_gen);
Chris Mason5f39d392007-10-15 16:14:19 -04003431
3432 btrfs_mark_buffer_dirty(c);
Chris Masond5719762007-03-23 10:01:08 -04003433
Chris Mason925baed2008-06-25 16:01:30 -04003434 old = root->node;
David Sterbad9d19a02018-03-05 16:35:29 +01003435 ret = tree_mod_log_insert_root(root->node, c, 0);
3436 BUG_ON(ret < 0);
Chris Mason240f62c2011-03-23 14:54:42 -04003437 rcu_assign_pointer(root->node, c);
Chris Mason925baed2008-06-25 16:01:30 -04003438
3439 /* the super has an extra ref to root->node */
3440 free_extent_buffer(old);
3441
Chris Mason0b86a832008-03-24 15:01:56 -04003442 add_root_to_dirty_list(root);
David Sterba67439da2019-10-08 13:28:47 +02003443 atomic_inc(&c->refs);
Chris Mason5f39d392007-10-15 16:14:19 -04003444 path->nodes[level] = c;
chandan95449a12015-01-15 12:22:03 +05303445 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
Chris Mason5c680ed2007-02-22 11:39:13 -05003446 path->slots[level] = 0;
3447 return 0;
3448}
3449
Chris Mason74123bd2007-02-02 11:05:29 -05003450/*
3451 * worker function to insert a single pointer in a node.
3452 * the node should have enough room for the pointer already
Chris Mason97571fd2007-02-24 13:39:08 -05003453 *
Chris Mason74123bd2007-02-02 11:05:29 -05003454 * slot and level indicate where you want the key to go, and
3455 * blocknr is the block the key points to.
3456 */
Jeff Mahoney143bede2012-03-01 14:56:26 +01003457static void insert_ptr(struct btrfs_trans_handle *trans,
David Sterba6ad3cf62019-03-20 14:32:45 +01003458 struct btrfs_path *path,
Jeff Mahoney143bede2012-03-01 14:56:26 +01003459 struct btrfs_disk_key *key, u64 bytenr,
Jan Schmidtc3e06962012-06-21 11:01:06 +02003460 int slot, int level)
Chris Mason74123bd2007-02-02 11:05:29 -05003461{
Chris Mason5f39d392007-10-15 16:14:19 -04003462 struct extent_buffer *lower;
Chris Mason74123bd2007-02-02 11:05:29 -05003463 int nritems;
Jan Schmidtf3ea38d2012-05-26 11:45:21 +02003464 int ret;
Chris Mason5c680ed2007-02-22 11:39:13 -05003465
3466 BUG_ON(!path->nodes[level]);
Yan, Zhengf0486c62010-05-16 10:46:25 -04003467 btrfs_assert_tree_locked(path->nodes[level]);
Chris Mason5f39d392007-10-15 16:14:19 -04003468 lower = path->nodes[level];
3469 nritems = btrfs_header_nritems(lower);
Stoyan Gaydarovc2934982009-04-02 17:05:11 -04003470 BUG_ON(slot > nritems);
David Sterba6ad3cf62019-03-20 14:32:45 +01003471 BUG_ON(nritems == BTRFS_NODEPTRS_PER_BLOCK(trans->fs_info));
Chris Mason74123bd2007-02-02 11:05:29 -05003472 if (slot != nritems) {
David Sterbabf1d3422018-03-05 15:47:39 +01003473 if (level) {
3474 ret = tree_mod_log_insert_move(lower, slot + 1, slot,
David Sterbaa446a972018-03-05 15:26:29 +01003475 nritems - slot);
David Sterbabf1d3422018-03-05 15:47:39 +01003476 BUG_ON(ret < 0);
3477 }
Chris Mason5f39d392007-10-15 16:14:19 -04003478 memmove_extent_buffer(lower,
3479 btrfs_node_key_ptr_offset(slot + 1),
3480 btrfs_node_key_ptr_offset(slot),
Chris Masond6025572007-03-30 14:27:56 -04003481 (nritems - slot) * sizeof(struct btrfs_key_ptr));
Chris Mason74123bd2007-02-02 11:05:29 -05003482 }
Jan Schmidtc3e06962012-06-21 11:01:06 +02003483 if (level) {
David Sterbae09c2ef2018-03-05 15:09:03 +01003484 ret = tree_mod_log_insert_key(lower, slot, MOD_LOG_KEY_ADD,
3485 GFP_NOFS);
Jan Schmidtf3ea38d2012-05-26 11:45:21 +02003486 BUG_ON(ret < 0);
3487 }
Chris Mason5f39d392007-10-15 16:14:19 -04003488 btrfs_set_node_key(lower, key, slot);
Chris Masondb945352007-10-15 16:15:53 -04003489 btrfs_set_node_blockptr(lower, slot, bytenr);
Chris Mason74493f72007-12-11 09:25:06 -05003490 WARN_ON(trans->transid == 0);
3491 btrfs_set_node_ptr_generation(lower, slot, trans->transid);
Chris Mason5f39d392007-10-15 16:14:19 -04003492 btrfs_set_header_nritems(lower, nritems + 1);
3493 btrfs_mark_buffer_dirty(lower);
Chris Mason74123bd2007-02-02 11:05:29 -05003494}
3495
Chris Mason97571fd2007-02-24 13:39:08 -05003496/*
3497 * split the node at the specified level in path in two.
3498 * The path is corrected to point to the appropriate node after the split
3499 *
3500 * Before splitting this tries to make some room in the node by pushing
3501 * left and right, if either one works, it returns right away.
Chris Masonaa5d6be2007-02-28 16:35:06 -05003502 *
3503 * returns 0 on success and < 0 on failure
Chris Mason97571fd2007-02-24 13:39:08 -05003504 */
Chris Masone02119d2008-09-05 16:13:11 -04003505static noinline int split_node(struct btrfs_trans_handle *trans,
3506 struct btrfs_root *root,
3507 struct btrfs_path *path, int level)
Chris Masonbe0e5c02007-01-26 15:51:26 -05003508{
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003509 struct btrfs_fs_info *fs_info = root->fs_info;
Chris Mason5f39d392007-10-15 16:14:19 -04003510 struct extent_buffer *c;
3511 struct extent_buffer *split;
3512 struct btrfs_disk_key disk_key;
Chris Masonbe0e5c02007-01-26 15:51:26 -05003513 int mid;
Chris Mason5c680ed2007-02-22 11:39:13 -05003514 int ret;
Chris Mason7518a232007-03-12 12:01:18 -04003515 u32 c_nritems;
Chris Masonbe0e5c02007-01-26 15:51:26 -05003516
Chris Mason5f39d392007-10-15 16:14:19 -04003517 c = path->nodes[level];
Chris Mason7bb86312007-12-11 09:25:06 -05003518 WARN_ON(btrfs_header_generation(c) != trans->transid);
Chris Mason5f39d392007-10-15 16:14:19 -04003519 if (c == root->node) {
Jan Schmidtd9abbf12013-03-20 13:49:48 +00003520 /*
Jan Schmidt90f8d622013-04-13 13:19:53 +00003521 * trying to split the root, lets make a new one
3522 *
Liu Bofdd99c72013-05-22 12:06:51 +00003523 * tree mod log: We don't log_removal old root in
Jan Schmidt90f8d622013-04-13 13:19:53 +00003524 * insert_new_root, because that root buffer will be kept as a
3525 * normal node. We are going to log removal of half of the
3526 * elements below with tree_mod_log_eb_copy. We're holding a
3527 * tree lock on the buffer, which is why we cannot race with
3528 * other tree_mod_log users.
Jan Schmidtd9abbf12013-03-20 13:49:48 +00003529 */
Liu Bofdd99c72013-05-22 12:06:51 +00003530 ret = insert_new_root(trans, root, path, level + 1);
Chris Mason5c680ed2007-02-22 11:39:13 -05003531 if (ret)
3532 return ret;
Chris Masonb3612422009-05-13 19:12:15 -04003533 } else {
Chris Masone66f7092007-04-20 13:16:02 -04003534 ret = push_nodes_for_insert(trans, root, path, level);
Chris Mason5f39d392007-10-15 16:14:19 -04003535 c = path->nodes[level];
3536 if (!ret && btrfs_header_nritems(c) <
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003537 BTRFS_NODEPTRS_PER_BLOCK(fs_info) - 3)
Chris Masone66f7092007-04-20 13:16:02 -04003538 return 0;
Chris Mason54aa1f42007-06-22 14:16:25 -04003539 if (ret < 0)
3540 return ret;
Chris Masonbe0e5c02007-01-26 15:51:26 -05003541 }
Chris Masone66f7092007-04-20 13:16:02 -04003542
Chris Mason5f39d392007-10-15 16:14:19 -04003543 c_nritems = btrfs_header_nritems(c);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003544 mid = (c_nritems + 1) / 2;
3545 btrfs_node_key(c, &disk_key, mid);
Chris Mason7bb86312007-12-11 09:25:06 -05003546
Filipe Mananaa6279472019-01-25 11:48:51 +00003547 split = alloc_tree_block_no_bg_flush(trans, root, 0, &disk_key, level,
Josef Bacik4dff97e2020-08-20 11:46:06 -04003548 c->start, 0, BTRFS_NESTING_SPLIT);
Chris Mason5f39d392007-10-15 16:14:19 -04003549 if (IS_ERR(split))
3550 return PTR_ERR(split);
Chris Mason54aa1f42007-06-22 14:16:25 -04003551
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003552 root_add_used(root, fs_info->nodesize);
Nikolay Borisovbc877d22018-06-18 14:13:19 +03003553 ASSERT(btrfs_header_level(c) == level);
Chris Mason5f39d392007-10-15 16:14:19 -04003554
David Sterbaed874f02019-03-20 14:22:04 +01003555 ret = tree_mod_log_eb_copy(split, c, 0, mid, c_nritems - mid);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +00003556 if (ret) {
Jeff Mahoney66642832016-06-10 18:19:25 -04003557 btrfs_abort_transaction(trans, ret);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +00003558 return ret;
3559 }
Chris Mason5f39d392007-10-15 16:14:19 -04003560 copy_extent_buffer(split, c,
3561 btrfs_node_key_ptr_offset(0),
3562 btrfs_node_key_ptr_offset(mid),
3563 (c_nritems - mid) * sizeof(struct btrfs_key_ptr));
3564 btrfs_set_header_nritems(split, c_nritems - mid);
3565 btrfs_set_header_nritems(c, mid);
Chris Masonaa5d6be2007-02-28 16:35:06 -05003566 ret = 0;
3567
Chris Mason5f39d392007-10-15 16:14:19 -04003568 btrfs_mark_buffer_dirty(c);
3569 btrfs_mark_buffer_dirty(split);
3570
David Sterba6ad3cf62019-03-20 14:32:45 +01003571 insert_ptr(trans, path, &disk_key, split->start,
Jan Schmidtc3e06962012-06-21 11:01:06 +02003572 path->slots[level + 1] + 1, level + 1);
Chris Masonaa5d6be2007-02-28 16:35:06 -05003573
Chris Mason5de08d72007-02-24 06:24:44 -05003574 if (path->slots[level] >= mid) {
Chris Mason5c680ed2007-02-22 11:39:13 -05003575 path->slots[level] -= mid;
Chris Mason925baed2008-06-25 16:01:30 -04003576 btrfs_tree_unlock(c);
Chris Mason5f39d392007-10-15 16:14:19 -04003577 free_extent_buffer(c);
3578 path->nodes[level] = split;
Chris Mason5c680ed2007-02-22 11:39:13 -05003579 path->slots[level + 1] += 1;
3580 } else {
Chris Mason925baed2008-06-25 16:01:30 -04003581 btrfs_tree_unlock(split);
Chris Mason5f39d392007-10-15 16:14:19 -04003582 free_extent_buffer(split);
Chris Masonbe0e5c02007-01-26 15:51:26 -05003583 }
Chris Masonaa5d6be2007-02-28 16:35:06 -05003584 return ret;
Chris Masonbe0e5c02007-01-26 15:51:26 -05003585}
3586
Chris Mason74123bd2007-02-02 11:05:29 -05003587/*
3588 * how many bytes are required to store the items in a leaf. start
3589 * and nr indicate which items in the leaf to check. This totals up the
3590 * space used both by the item structs and the item data
3591 */
Chris Mason5f39d392007-10-15 16:14:19 -04003592static int leaf_space_used(struct extent_buffer *l, int start, int nr)
Chris Masonbe0e5c02007-01-26 15:51:26 -05003593{
Josef Bacik41be1f32012-10-15 13:43:18 -04003594 struct btrfs_item *start_item;
3595 struct btrfs_item *end_item;
Chris Masonbe0e5c02007-01-26 15:51:26 -05003596 int data_len;
Chris Mason5f39d392007-10-15 16:14:19 -04003597 int nritems = btrfs_header_nritems(l);
Chris Masond4dbff92007-04-04 14:08:15 -04003598 int end = min(nritems, start + nr) - 1;
Chris Masonbe0e5c02007-01-26 15:51:26 -05003599
3600 if (!nr)
3601 return 0;
Ross Kirkdd3cc162013-09-16 15:58:09 +01003602 start_item = btrfs_item_nr(start);
3603 end_item = btrfs_item_nr(end);
David Sterbaa31356b2020-04-29 22:56:01 +02003604 data_len = btrfs_item_offset(l, start_item) +
3605 btrfs_item_size(l, start_item);
3606 data_len = data_len - btrfs_item_offset(l, end_item);
Chris Mason0783fcf2007-03-12 20:12:07 -04003607 data_len += sizeof(struct btrfs_item) * nr;
Chris Masond4dbff92007-04-04 14:08:15 -04003608 WARN_ON(data_len < 0);
Chris Masonbe0e5c02007-01-26 15:51:26 -05003609 return data_len;
3610}
3611
Chris Mason74123bd2007-02-02 11:05:29 -05003612/*
Chris Masond4dbff92007-04-04 14:08:15 -04003613 * The space between the end of the leaf items and
3614 * the start of the leaf data. IOW, how much room
3615 * the leaf has left for both items and data
3616 */
David Sterbae902baa2019-03-20 14:36:46 +01003617noinline int btrfs_leaf_free_space(struct extent_buffer *leaf)
Chris Masond4dbff92007-04-04 14:08:15 -04003618{
David Sterbae902baa2019-03-20 14:36:46 +01003619 struct btrfs_fs_info *fs_info = leaf->fs_info;
Chris Mason5f39d392007-10-15 16:14:19 -04003620 int nritems = btrfs_header_nritems(leaf);
3621 int ret;
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003622
3623 ret = BTRFS_LEAF_DATA_SIZE(fs_info) - leaf_space_used(leaf, 0, nritems);
Chris Mason5f39d392007-10-15 16:14:19 -04003624 if (ret < 0) {
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003625 btrfs_crit(fs_info,
3626 "leaf free space ret %d, leaf data size %lu, used %d nritems %d",
3627 ret,
3628 (unsigned long) BTRFS_LEAF_DATA_SIZE(fs_info),
3629 leaf_space_used(leaf, 0, nritems), nritems);
Chris Mason5f39d392007-10-15 16:14:19 -04003630 }
3631 return ret;
Chris Masond4dbff92007-04-04 14:08:15 -04003632}
3633
Chris Mason99d8f832010-07-07 10:51:48 -04003634/*
3635 * min slot controls the lowest index we're willing to push to the
3636 * right. We'll push up to and including min_slot, but no lower
3637 */
David Sterbaf72f0012019-03-20 14:39:45 +01003638static noinline int __push_leaf_right(struct btrfs_path *path,
Chris Mason44871b12009-03-13 10:04:31 -04003639 int data_size, int empty,
3640 struct extent_buffer *right,
Chris Mason99d8f832010-07-07 10:51:48 -04003641 int free_space, u32 left_nritems,
3642 u32 min_slot)
Chris Mason00ec4c52007-02-24 12:47:20 -05003643{
David Sterbaf72f0012019-03-20 14:39:45 +01003644 struct btrfs_fs_info *fs_info = right->fs_info;
Chris Mason5f39d392007-10-15 16:14:19 -04003645 struct extent_buffer *left = path->nodes[0];
Chris Mason44871b12009-03-13 10:04:31 -04003646 struct extent_buffer *upper = path->nodes[1];
Chris Masoncfed81a2012-03-03 07:40:03 -05003647 struct btrfs_map_token token;
Chris Mason5f39d392007-10-15 16:14:19 -04003648 struct btrfs_disk_key disk_key;
Chris Mason00ec4c52007-02-24 12:47:20 -05003649 int slot;
Chris Mason34a38212007-11-07 13:31:03 -05003650 u32 i;
Chris Mason00ec4c52007-02-24 12:47:20 -05003651 int push_space = 0;
3652 int push_items = 0;
Chris Mason0783fcf2007-03-12 20:12:07 -04003653 struct btrfs_item *item;
Chris Mason34a38212007-11-07 13:31:03 -05003654 u32 nr;
Chris Mason7518a232007-03-12 12:01:18 -04003655 u32 right_nritems;
Chris Mason5f39d392007-10-15 16:14:19 -04003656 u32 data_end;
Chris Masondb945352007-10-15 16:15:53 -04003657 u32 this_item_size;
Chris Mason00ec4c52007-02-24 12:47:20 -05003658
Chris Mason34a38212007-11-07 13:31:03 -05003659 if (empty)
3660 nr = 0;
3661 else
Chris Mason99d8f832010-07-07 10:51:48 -04003662 nr = max_t(u32, 1, min_slot);
Chris Mason34a38212007-11-07 13:31:03 -05003663
Zheng Yan31840ae2008-09-23 13:14:14 -04003664 if (path->slots[0] >= left_nritems)
Yan Zheng87b29b22008-12-17 10:21:48 -05003665 push_space += data_size;
Zheng Yan31840ae2008-09-23 13:14:14 -04003666
Chris Mason44871b12009-03-13 10:04:31 -04003667 slot = path->slots[1];
Chris Mason34a38212007-11-07 13:31:03 -05003668 i = left_nritems - 1;
3669 while (i >= nr) {
Ross Kirkdd3cc162013-09-16 15:58:09 +01003670 item = btrfs_item_nr(i);
Chris Masondb945352007-10-15 16:15:53 -04003671
Zheng Yan31840ae2008-09-23 13:14:14 -04003672 if (!empty && push_items > 0) {
3673 if (path->slots[0] > i)
3674 break;
3675 if (path->slots[0] == i) {
David Sterbae902baa2019-03-20 14:36:46 +01003676 int space = btrfs_leaf_free_space(left);
3677
Zheng Yan31840ae2008-09-23 13:14:14 -04003678 if (space + push_space * 2 > free_space)
3679 break;
3680 }
3681 }
3682
Chris Mason00ec4c52007-02-24 12:47:20 -05003683 if (path->slots[0] == i)
Yan Zheng87b29b22008-12-17 10:21:48 -05003684 push_space += data_size;
Chris Masondb945352007-10-15 16:15:53 -04003685
Chris Masondb945352007-10-15 16:15:53 -04003686 this_item_size = btrfs_item_size(left, item);
3687 if (this_item_size + sizeof(*item) + push_space > free_space)
Chris Mason00ec4c52007-02-24 12:47:20 -05003688 break;
Zheng Yan31840ae2008-09-23 13:14:14 -04003689
Chris Mason00ec4c52007-02-24 12:47:20 -05003690 push_items++;
Chris Masondb945352007-10-15 16:15:53 -04003691 push_space += this_item_size + sizeof(*item);
Chris Mason34a38212007-11-07 13:31:03 -05003692 if (i == 0)
3693 break;
3694 i--;
Chris Masondb945352007-10-15 16:15:53 -04003695 }
Chris Mason5f39d392007-10-15 16:14:19 -04003696
Chris Mason925baed2008-06-25 16:01:30 -04003697 if (push_items == 0)
3698 goto out_unlock;
Chris Mason5f39d392007-10-15 16:14:19 -04003699
Julia Lawall6c1500f2012-11-03 20:30:18 +00003700 WARN_ON(!empty && push_items == left_nritems);
Chris Mason5f39d392007-10-15 16:14:19 -04003701
Chris Mason00ec4c52007-02-24 12:47:20 -05003702 /* push left to right */
Chris Mason5f39d392007-10-15 16:14:19 -04003703 right_nritems = btrfs_header_nritems(right);
Chris Mason34a38212007-11-07 13:31:03 -05003704
Chris Mason5f39d392007-10-15 16:14:19 -04003705 push_space = btrfs_item_end_nr(left, left_nritems - push_items);
David Sterba8f881e82019-03-20 11:33:10 +01003706 push_space -= leaf_data_end(left);
Chris Mason5f39d392007-10-15 16:14:19 -04003707
Chris Mason00ec4c52007-02-24 12:47:20 -05003708 /* make room in the right data area */
David Sterba8f881e82019-03-20 11:33:10 +01003709 data_end = leaf_data_end(right);
Chris Mason5f39d392007-10-15 16:14:19 -04003710 memmove_extent_buffer(right,
Nikolay Borisov3d9ec8c2017-05-29 09:43:43 +03003711 BTRFS_LEAF_DATA_OFFSET + data_end - push_space,
3712 BTRFS_LEAF_DATA_OFFSET + data_end,
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003713 BTRFS_LEAF_DATA_SIZE(fs_info) - data_end);
Chris Mason5f39d392007-10-15 16:14:19 -04003714
Chris Mason00ec4c52007-02-24 12:47:20 -05003715 /* copy from the left data area */
Nikolay Borisov3d9ec8c2017-05-29 09:43:43 +03003716 copy_extent_buffer(right, left, BTRFS_LEAF_DATA_OFFSET +
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003717 BTRFS_LEAF_DATA_SIZE(fs_info) - push_space,
David Sterba8f881e82019-03-20 11:33:10 +01003718 BTRFS_LEAF_DATA_OFFSET + leaf_data_end(left),
Chris Masond6025572007-03-30 14:27:56 -04003719 push_space);
Chris Mason5f39d392007-10-15 16:14:19 -04003720
3721 memmove_extent_buffer(right, btrfs_item_nr_offset(push_items),
3722 btrfs_item_nr_offset(0),
3723 right_nritems * sizeof(struct btrfs_item));
3724
Chris Mason00ec4c52007-02-24 12:47:20 -05003725 /* copy the items from left to right */
Chris Mason5f39d392007-10-15 16:14:19 -04003726 copy_extent_buffer(right, left, btrfs_item_nr_offset(0),
3727 btrfs_item_nr_offset(left_nritems - push_items),
3728 push_items * sizeof(struct btrfs_item));
Chris Mason00ec4c52007-02-24 12:47:20 -05003729
3730 /* update the item pointers */
David Sterbac82f8232019-08-09 17:48:21 +02003731 btrfs_init_map_token(&token, right);
Chris Mason7518a232007-03-12 12:01:18 -04003732 right_nritems += push_items;
Chris Mason5f39d392007-10-15 16:14:19 -04003733 btrfs_set_header_nritems(right, right_nritems);
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003734 push_space = BTRFS_LEAF_DATA_SIZE(fs_info);
Chris Mason7518a232007-03-12 12:01:18 -04003735 for (i = 0; i < right_nritems; i++) {
Ross Kirkdd3cc162013-09-16 15:58:09 +01003736 item = btrfs_item_nr(i);
David Sterbacc4c13d2020-04-29 02:15:56 +02003737 push_space -= btrfs_token_item_size(&token, item);
3738 btrfs_set_token_item_offset(&token, item, push_space);
Chris Masondb945352007-10-15 16:15:53 -04003739 }
3740
Chris Mason7518a232007-03-12 12:01:18 -04003741 left_nritems -= push_items;
Chris Mason5f39d392007-10-15 16:14:19 -04003742 btrfs_set_header_nritems(left, left_nritems);
Chris Mason00ec4c52007-02-24 12:47:20 -05003743
Chris Mason34a38212007-11-07 13:31:03 -05003744 if (left_nritems)
3745 btrfs_mark_buffer_dirty(left);
Yan, Zhengf0486c62010-05-16 10:46:25 -04003746 else
David Sterba6a884d7d2019-03-20 14:30:02 +01003747 btrfs_clean_tree_block(left);
Yan, Zhengf0486c62010-05-16 10:46:25 -04003748
Chris Mason5f39d392007-10-15 16:14:19 -04003749 btrfs_mark_buffer_dirty(right);
Chris Masona429e512007-04-18 16:15:28 -04003750
Chris Mason5f39d392007-10-15 16:14:19 -04003751 btrfs_item_key(right, &disk_key, 0);
3752 btrfs_set_node_key(upper, &disk_key, slot + 1);
Chris Masond6025572007-03-30 14:27:56 -04003753 btrfs_mark_buffer_dirty(upper);
Chris Mason02217ed2007-03-02 16:08:05 -05003754
Chris Mason00ec4c52007-02-24 12:47:20 -05003755 /* then fixup the leaf pointer in the path */
Chris Mason7518a232007-03-12 12:01:18 -04003756 if (path->slots[0] >= left_nritems) {
3757 path->slots[0] -= left_nritems;
Chris Mason925baed2008-06-25 16:01:30 -04003758 if (btrfs_header_nritems(path->nodes[0]) == 0)
David Sterba6a884d7d2019-03-20 14:30:02 +01003759 btrfs_clean_tree_block(path->nodes[0]);
Chris Mason925baed2008-06-25 16:01:30 -04003760 btrfs_tree_unlock(path->nodes[0]);
Chris Mason5f39d392007-10-15 16:14:19 -04003761 free_extent_buffer(path->nodes[0]);
3762 path->nodes[0] = right;
Chris Mason00ec4c52007-02-24 12:47:20 -05003763 path->slots[1] += 1;
3764 } else {
Chris Mason925baed2008-06-25 16:01:30 -04003765 btrfs_tree_unlock(right);
Chris Mason5f39d392007-10-15 16:14:19 -04003766 free_extent_buffer(right);
Chris Mason00ec4c52007-02-24 12:47:20 -05003767 }
3768 return 0;
Chris Mason925baed2008-06-25 16:01:30 -04003769
3770out_unlock:
3771 btrfs_tree_unlock(right);
3772 free_extent_buffer(right);
3773 return 1;
Chris Mason00ec4c52007-02-24 12:47:20 -05003774}
Chris Mason925baed2008-06-25 16:01:30 -04003775
Chris Mason00ec4c52007-02-24 12:47:20 -05003776/*
Chris Mason44871b12009-03-13 10:04:31 -04003777 * push some data in the path leaf to the right, trying to free up at
3778 * least data_size bytes. returns zero if the push worked, nonzero otherwise
3779 *
3780 * returns 1 if the push failed because the other node didn't have enough
3781 * room, 0 if everything worked out and < 0 if there were major errors.
Chris Mason99d8f832010-07-07 10:51:48 -04003782 *
3783 * this will push starting from min_slot to the end of the leaf. It won't
3784 * push any slot lower than min_slot
Chris Mason44871b12009-03-13 10:04:31 -04003785 */
3786static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
Chris Mason99d8f832010-07-07 10:51:48 -04003787 *root, struct btrfs_path *path,
3788 int min_data_size, int data_size,
3789 int empty, u32 min_slot)
Chris Mason44871b12009-03-13 10:04:31 -04003790{
3791 struct extent_buffer *left = path->nodes[0];
3792 struct extent_buffer *right;
3793 struct extent_buffer *upper;
3794 int slot;
3795 int free_space;
3796 u32 left_nritems;
3797 int ret;
3798
3799 if (!path->nodes[1])
3800 return 1;
3801
3802 slot = path->slots[1];
3803 upper = path->nodes[1];
3804 if (slot >= btrfs_header_nritems(upper) - 1)
3805 return 1;
3806
3807 btrfs_assert_tree_locked(path->nodes[1]);
3808
David Sterba4b231ae2019-08-21 19:16:27 +02003809 right = btrfs_read_node_slot(upper, slot + 1);
Liu Bofb770ae2016-07-05 12:10:14 -07003810 /*
3811 * slot + 1 is not valid or we fail to read the right node,
3812 * no big deal, just return.
3813 */
3814 if (IS_ERR(right))
Tsutomu Itoh91ca3382011-01-05 02:32:22 +00003815 return 1;
3816
Josef Bacikbf774672020-08-20 11:46:04 -04003817 __btrfs_tree_lock(right, BTRFS_NESTING_RIGHT);
David Sterba8bead252018-04-04 02:03:48 +02003818 btrfs_set_lock_blocking_write(right);
Chris Mason44871b12009-03-13 10:04:31 -04003819
David Sterbae902baa2019-03-20 14:36:46 +01003820 free_space = btrfs_leaf_free_space(right);
Chris Mason44871b12009-03-13 10:04:31 -04003821 if (free_space < data_size)
3822 goto out_unlock;
3823
3824 /* cow and double check */
3825 ret = btrfs_cow_block(trans, root, right, upper,
Josef Bacikbf59a5a2020-08-20 11:46:05 -04003826 slot + 1, &right, BTRFS_NESTING_RIGHT_COW);
Chris Mason44871b12009-03-13 10:04:31 -04003827 if (ret)
3828 goto out_unlock;
3829
David Sterbae902baa2019-03-20 14:36:46 +01003830 free_space = btrfs_leaf_free_space(right);
Chris Mason44871b12009-03-13 10:04:31 -04003831 if (free_space < data_size)
3832 goto out_unlock;
3833
3834 left_nritems = btrfs_header_nritems(left);
3835 if (left_nritems == 0)
3836 goto out_unlock;
3837
Qu Wenruod16c7022020-08-19 14:35:50 +08003838 if (check_sibling_keys(left, right)) {
3839 ret = -EUCLEAN;
3840 btrfs_tree_unlock(right);
3841 free_extent_buffer(right);
3842 return ret;
3843 }
Filipe David Borba Manana2ef1fed2013-12-04 22:17:39 +00003844 if (path->slots[0] == left_nritems && !empty) {
3845 /* Key greater than all keys in the leaf, right neighbor has
3846 * enough room for it and we're not emptying our leaf to delete
3847 * it, therefore use right neighbor to insert the new item and
Andrea Gelmini52042d82018-11-28 12:05:13 +01003848 * no need to touch/dirty our left leaf. */
Filipe David Borba Manana2ef1fed2013-12-04 22:17:39 +00003849 btrfs_tree_unlock(left);
3850 free_extent_buffer(left);
3851 path->nodes[0] = right;
3852 path->slots[0] = 0;
3853 path->slots[1]++;
3854 return 0;
3855 }
3856
David Sterbaf72f0012019-03-20 14:39:45 +01003857 return __push_leaf_right(path, min_data_size, empty,
Chris Mason99d8f832010-07-07 10:51:48 -04003858 right, free_space, left_nritems, min_slot);
Chris Mason44871b12009-03-13 10:04:31 -04003859out_unlock:
3860 btrfs_tree_unlock(right);
3861 free_extent_buffer(right);
3862 return 1;
3863}
3864
3865/*
Chris Mason74123bd2007-02-02 11:05:29 -05003866 * push some data in the path leaf to the left, trying to free up at
3867 * least data_size bytes. returns zero if the push worked, nonzero otherwise
Chris Mason99d8f832010-07-07 10:51:48 -04003868 *
3869 * max_slot can put a limit on how far into the leaf we'll push items. The
3870 * item at 'max_slot' won't be touched. Use (u32)-1 to make us do all the
3871 * items
Chris Mason74123bd2007-02-02 11:05:29 -05003872 */
David Sterba8087c192019-03-20 14:40:41 +01003873static noinline int __push_leaf_left(struct btrfs_path *path, int data_size,
Chris Mason44871b12009-03-13 10:04:31 -04003874 int empty, struct extent_buffer *left,
Chris Mason99d8f832010-07-07 10:51:48 -04003875 int free_space, u32 right_nritems,
3876 u32 max_slot)
Chris Masonbe0e5c02007-01-26 15:51:26 -05003877{
David Sterba8087c192019-03-20 14:40:41 +01003878 struct btrfs_fs_info *fs_info = left->fs_info;
Chris Mason5f39d392007-10-15 16:14:19 -04003879 struct btrfs_disk_key disk_key;
3880 struct extent_buffer *right = path->nodes[0];
Chris Masonbe0e5c02007-01-26 15:51:26 -05003881 int i;
Chris Masonbe0e5c02007-01-26 15:51:26 -05003882 int push_space = 0;
3883 int push_items = 0;
Chris Mason0783fcf2007-03-12 20:12:07 -04003884 struct btrfs_item *item;
Chris Mason7518a232007-03-12 12:01:18 -04003885 u32 old_left_nritems;
Chris Mason34a38212007-11-07 13:31:03 -05003886 u32 nr;
Chris Masonaa5d6be2007-02-28 16:35:06 -05003887 int ret = 0;
Chris Masondb945352007-10-15 16:15:53 -04003888 u32 this_item_size;
3889 u32 old_left_item_size;
Chris Masoncfed81a2012-03-03 07:40:03 -05003890 struct btrfs_map_token token;
3891
Chris Mason34a38212007-11-07 13:31:03 -05003892 if (empty)
Chris Mason99d8f832010-07-07 10:51:48 -04003893 nr = min(right_nritems, max_slot);
Chris Mason34a38212007-11-07 13:31:03 -05003894 else
Chris Mason99d8f832010-07-07 10:51:48 -04003895 nr = min(right_nritems - 1, max_slot);
Chris Mason34a38212007-11-07 13:31:03 -05003896
3897 for (i = 0; i < nr; i++) {
Ross Kirkdd3cc162013-09-16 15:58:09 +01003898 item = btrfs_item_nr(i);
Chris Masondb945352007-10-15 16:15:53 -04003899
Zheng Yan31840ae2008-09-23 13:14:14 -04003900 if (!empty && push_items > 0) {
3901 if (path->slots[0] < i)
3902 break;
3903 if (path->slots[0] == i) {
David Sterbae902baa2019-03-20 14:36:46 +01003904 int space = btrfs_leaf_free_space(right);
3905
Zheng Yan31840ae2008-09-23 13:14:14 -04003906 if (space + push_space * 2 > free_space)
3907 break;
3908 }
3909 }
3910
Chris Masonbe0e5c02007-01-26 15:51:26 -05003911 if (path->slots[0] == i)
Yan Zheng87b29b22008-12-17 10:21:48 -05003912 push_space += data_size;
Chris Masondb945352007-10-15 16:15:53 -04003913
3914 this_item_size = btrfs_item_size(right, item);
3915 if (this_item_size + sizeof(*item) + push_space > free_space)
Chris Masonbe0e5c02007-01-26 15:51:26 -05003916 break;
Chris Masondb945352007-10-15 16:15:53 -04003917
Chris Masonbe0e5c02007-01-26 15:51:26 -05003918 push_items++;
Chris Masondb945352007-10-15 16:15:53 -04003919 push_space += this_item_size + sizeof(*item);
Chris Masonbe0e5c02007-01-26 15:51:26 -05003920 }
Chris Masondb945352007-10-15 16:15:53 -04003921
Chris Masonbe0e5c02007-01-26 15:51:26 -05003922 if (push_items == 0) {
Chris Mason925baed2008-06-25 16:01:30 -04003923 ret = 1;
3924 goto out;
Chris Masonbe0e5c02007-01-26 15:51:26 -05003925 }
Dulshani Gunawardhanafae7f212013-10-31 10:30:08 +05303926 WARN_ON(!empty && push_items == btrfs_header_nritems(right));
Chris Mason5f39d392007-10-15 16:14:19 -04003927
Chris Masonbe0e5c02007-01-26 15:51:26 -05003928 /* push data from right to left */
Chris Mason5f39d392007-10-15 16:14:19 -04003929 copy_extent_buffer(left, right,
3930 btrfs_item_nr_offset(btrfs_header_nritems(left)),
3931 btrfs_item_nr_offset(0),
3932 push_items * sizeof(struct btrfs_item));
3933
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003934 push_space = BTRFS_LEAF_DATA_SIZE(fs_info) -
Chris Masond3977122009-01-05 21:25:51 -05003935 btrfs_item_offset_nr(right, push_items - 1);
Chris Mason5f39d392007-10-15 16:14:19 -04003936
Nikolay Borisov3d9ec8c2017-05-29 09:43:43 +03003937 copy_extent_buffer(left, right, BTRFS_LEAF_DATA_OFFSET +
David Sterba8f881e82019-03-20 11:33:10 +01003938 leaf_data_end(left) - push_space,
Nikolay Borisov3d9ec8c2017-05-29 09:43:43 +03003939 BTRFS_LEAF_DATA_OFFSET +
Chris Mason5f39d392007-10-15 16:14:19 -04003940 btrfs_item_offset_nr(right, push_items - 1),
Chris Masond6025572007-03-30 14:27:56 -04003941 push_space);
Chris Mason5f39d392007-10-15 16:14:19 -04003942 old_left_nritems = btrfs_header_nritems(left);
Yan Zheng87b29b22008-12-17 10:21:48 -05003943 BUG_ON(old_left_nritems <= 0);
Chris Masoneb60cea2007-02-02 09:18:22 -05003944
David Sterbac82f8232019-08-09 17:48:21 +02003945 btrfs_init_map_token(&token, left);
Chris Masondb945352007-10-15 16:15:53 -04003946 old_left_item_size = btrfs_item_offset_nr(left, old_left_nritems - 1);
Chris Mason0783fcf2007-03-12 20:12:07 -04003947 for (i = old_left_nritems; i < old_left_nritems + push_items; i++) {
Chris Mason5f39d392007-10-15 16:14:19 -04003948 u32 ioff;
Chris Masondb945352007-10-15 16:15:53 -04003949
Ross Kirkdd3cc162013-09-16 15:58:09 +01003950 item = btrfs_item_nr(i);
Chris Masondb945352007-10-15 16:15:53 -04003951
David Sterbacc4c13d2020-04-29 02:15:56 +02003952 ioff = btrfs_token_item_offset(&token, item);
3953 btrfs_set_token_item_offset(&token, item,
3954 ioff - (BTRFS_LEAF_DATA_SIZE(fs_info) - old_left_item_size));
Chris Masonbe0e5c02007-01-26 15:51:26 -05003955 }
Chris Mason5f39d392007-10-15 16:14:19 -04003956 btrfs_set_header_nritems(left, old_left_nritems + push_items);
Chris Masonbe0e5c02007-01-26 15:51:26 -05003957
3958 /* fixup right node */
Julia Lawall31b1a2b2012-11-03 10:58:34 +00003959 if (push_items > right_nritems)
3960 WARN(1, KERN_CRIT "push items %d nr %u\n", push_items,
Chris Masond3977122009-01-05 21:25:51 -05003961 right_nritems);
Chris Mason5f39d392007-10-15 16:14:19 -04003962
Chris Mason34a38212007-11-07 13:31:03 -05003963 if (push_items < right_nritems) {
3964 push_space = btrfs_item_offset_nr(right, push_items - 1) -
David Sterba8f881e82019-03-20 11:33:10 +01003965 leaf_data_end(right);
Nikolay Borisov3d9ec8c2017-05-29 09:43:43 +03003966 memmove_extent_buffer(right, BTRFS_LEAF_DATA_OFFSET +
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003967 BTRFS_LEAF_DATA_SIZE(fs_info) - push_space,
Nikolay Borisov3d9ec8c2017-05-29 09:43:43 +03003968 BTRFS_LEAF_DATA_OFFSET +
David Sterba8f881e82019-03-20 11:33:10 +01003969 leaf_data_end(right), push_space);
Chris Mason34a38212007-11-07 13:31:03 -05003970
3971 memmove_extent_buffer(right, btrfs_item_nr_offset(0),
Chris Mason5f39d392007-10-15 16:14:19 -04003972 btrfs_item_nr_offset(push_items),
3973 (btrfs_header_nritems(right) - push_items) *
3974 sizeof(struct btrfs_item));
Chris Mason34a38212007-11-07 13:31:03 -05003975 }
David Sterbac82f8232019-08-09 17:48:21 +02003976
3977 btrfs_init_map_token(&token, right);
Yaneef1c492007-11-26 10:58:13 -05003978 right_nritems -= push_items;
3979 btrfs_set_header_nritems(right, right_nritems);
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003980 push_space = BTRFS_LEAF_DATA_SIZE(fs_info);
Chris Mason5f39d392007-10-15 16:14:19 -04003981 for (i = 0; i < right_nritems; i++) {
Ross Kirkdd3cc162013-09-16 15:58:09 +01003982 item = btrfs_item_nr(i);
Chris Masondb945352007-10-15 16:15:53 -04003983
David Sterbacc4c13d2020-04-29 02:15:56 +02003984 push_space = push_space - btrfs_token_item_size(&token, item);
3985 btrfs_set_token_item_offset(&token, item, push_space);
Chris Masondb945352007-10-15 16:15:53 -04003986 }
Chris Masoneb60cea2007-02-02 09:18:22 -05003987
Chris Mason5f39d392007-10-15 16:14:19 -04003988 btrfs_mark_buffer_dirty(left);
Chris Mason34a38212007-11-07 13:31:03 -05003989 if (right_nritems)
3990 btrfs_mark_buffer_dirty(right);
Yan, Zhengf0486c62010-05-16 10:46:25 -04003991 else
David Sterba6a884d7d2019-03-20 14:30:02 +01003992 btrfs_clean_tree_block(right);
Chris Mason098f59c2007-05-11 11:33:21 -04003993
Chris Mason5f39d392007-10-15 16:14:19 -04003994 btrfs_item_key(right, &disk_key, 0);
Nikolay Borisovb167fa92018-06-20 15:48:47 +03003995 fixup_low_keys(path, &disk_key, 1);
Chris Masonbe0e5c02007-01-26 15:51:26 -05003996
3997 /* then fixup the leaf pointer in the path */
3998 if (path->slots[0] < push_items) {
3999 path->slots[0] += old_left_nritems;
Chris Mason925baed2008-06-25 16:01:30 -04004000 btrfs_tree_unlock(path->nodes[0]);
Chris Mason5f39d392007-10-15 16:14:19 -04004001 free_extent_buffer(path->nodes[0]);
4002 path->nodes[0] = left;
Chris Masonbe0e5c02007-01-26 15:51:26 -05004003 path->slots[1] -= 1;
4004 } else {
Chris Mason925baed2008-06-25 16:01:30 -04004005 btrfs_tree_unlock(left);
Chris Mason5f39d392007-10-15 16:14:19 -04004006 free_extent_buffer(left);
Chris Masonbe0e5c02007-01-26 15:51:26 -05004007 path->slots[0] -= push_items;
4008 }
Chris Masoneb60cea2007-02-02 09:18:22 -05004009 BUG_ON(path->slots[0] < 0);
Chris Masonaa5d6be2007-02-28 16:35:06 -05004010 return ret;
Chris Mason925baed2008-06-25 16:01:30 -04004011out:
4012 btrfs_tree_unlock(left);
4013 free_extent_buffer(left);
4014 return ret;
Chris Masonbe0e5c02007-01-26 15:51:26 -05004015}
4016
Chris Mason74123bd2007-02-02 11:05:29 -05004017/*
Chris Mason44871b12009-03-13 10:04:31 -04004018 * push some data in the path leaf to the left, trying to free up at
4019 * least data_size bytes. returns zero if the push worked, nonzero otherwise
Chris Mason99d8f832010-07-07 10:51:48 -04004020 *
4021 * max_slot can put a limit on how far into the leaf we'll push items. The
4022 * item at 'max_slot' won't be touched. Use (u32)-1 to make us push all the
4023 * items
Chris Mason44871b12009-03-13 10:04:31 -04004024 */
4025static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
Chris Mason99d8f832010-07-07 10:51:48 -04004026 *root, struct btrfs_path *path, int min_data_size,
4027 int data_size, int empty, u32 max_slot)
Chris Mason44871b12009-03-13 10:04:31 -04004028{
4029 struct extent_buffer *right = path->nodes[0];
4030 struct extent_buffer *left;
4031 int slot;
4032 int free_space;
4033 u32 right_nritems;
4034 int ret = 0;
4035
4036 slot = path->slots[1];
4037 if (slot == 0)
4038 return 1;
4039 if (!path->nodes[1])
4040 return 1;
4041
4042 right_nritems = btrfs_header_nritems(right);
4043 if (right_nritems == 0)
4044 return 1;
4045
4046 btrfs_assert_tree_locked(path->nodes[1]);
4047
David Sterba4b231ae2019-08-21 19:16:27 +02004048 left = btrfs_read_node_slot(path->nodes[1], slot - 1);
Liu Bofb770ae2016-07-05 12:10:14 -07004049 /*
4050 * slot - 1 is not valid or we fail to read the left node,
4051 * no big deal, just return.
4052 */
4053 if (IS_ERR(left))
Tsutomu Itoh91ca3382011-01-05 02:32:22 +00004054 return 1;
4055
Josef Bacikbf774672020-08-20 11:46:04 -04004056 __btrfs_tree_lock(left, BTRFS_NESTING_LEFT);
David Sterba8bead252018-04-04 02:03:48 +02004057 btrfs_set_lock_blocking_write(left);
Chris Mason44871b12009-03-13 10:04:31 -04004058
David Sterbae902baa2019-03-20 14:36:46 +01004059 free_space = btrfs_leaf_free_space(left);
Chris Mason44871b12009-03-13 10:04:31 -04004060 if (free_space < data_size) {
4061 ret = 1;
4062 goto out;
4063 }
4064
4065 /* cow and double check */
4066 ret = btrfs_cow_block(trans, root, left,
Josef Bacik9631e4c2020-08-20 11:46:03 -04004067 path->nodes[1], slot - 1, &left,
Josef Bacikbf59a5a2020-08-20 11:46:05 -04004068 BTRFS_NESTING_LEFT_COW);
Chris Mason44871b12009-03-13 10:04:31 -04004069 if (ret) {
4070 /* we hit -ENOSPC, but it isn't fatal here */
Jeff Mahoney79787ea2012-03-12 16:03:00 +01004071 if (ret == -ENOSPC)
4072 ret = 1;
Chris Mason44871b12009-03-13 10:04:31 -04004073 goto out;
4074 }
4075
David Sterbae902baa2019-03-20 14:36:46 +01004076 free_space = btrfs_leaf_free_space(left);
Chris Mason44871b12009-03-13 10:04:31 -04004077 if (free_space < data_size) {
4078 ret = 1;
4079 goto out;
4080 }
4081
Qu Wenruod16c7022020-08-19 14:35:50 +08004082 if (check_sibling_keys(left, right)) {
4083 ret = -EUCLEAN;
4084 goto out;
4085 }
David Sterba8087c192019-03-20 14:40:41 +01004086 return __push_leaf_left(path, min_data_size,
Chris Mason99d8f832010-07-07 10:51:48 -04004087 empty, left, free_space, right_nritems,
4088 max_slot);
Chris Mason44871b12009-03-13 10:04:31 -04004089out:
4090 btrfs_tree_unlock(left);
4091 free_extent_buffer(left);
4092 return ret;
4093}
4094
4095/*
Chris Mason74123bd2007-02-02 11:05:29 -05004096 * split the path's leaf in two, making sure there is at least data_size
4097 * available for the resulting leaf level of the path.
4098 */
Jeff Mahoney143bede2012-03-01 14:56:26 +01004099static noinline void copy_for_split(struct btrfs_trans_handle *trans,
Jeff Mahoney143bede2012-03-01 14:56:26 +01004100 struct btrfs_path *path,
4101 struct extent_buffer *l,
4102 struct extent_buffer *right,
4103 int slot, int mid, int nritems)
Chris Masonbe0e5c02007-01-26 15:51:26 -05004104{
David Sterba94f94ad2019-03-20 14:42:33 +01004105 struct btrfs_fs_info *fs_info = trans->fs_info;
Chris Masonbe0e5c02007-01-26 15:51:26 -05004106 int data_copy_size;
4107 int rt_data_off;
4108 int i;
Chris Masond4dbff92007-04-04 14:08:15 -04004109 struct btrfs_disk_key disk_key;
Chris Masoncfed81a2012-03-03 07:40:03 -05004110 struct btrfs_map_token token;
4111
Chris Mason5f39d392007-10-15 16:14:19 -04004112 nritems = nritems - mid;
4113 btrfs_set_header_nritems(right, nritems);
David Sterba8f881e82019-03-20 11:33:10 +01004114 data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(l);
Chris Mason5f39d392007-10-15 16:14:19 -04004115
4116 copy_extent_buffer(right, l, btrfs_item_nr_offset(0),
4117 btrfs_item_nr_offset(mid),
4118 nritems * sizeof(struct btrfs_item));
4119
4120 copy_extent_buffer(right, l,
Nikolay Borisov3d9ec8c2017-05-29 09:43:43 +03004121 BTRFS_LEAF_DATA_OFFSET + BTRFS_LEAF_DATA_SIZE(fs_info) -
4122 data_copy_size, BTRFS_LEAF_DATA_OFFSET +
David Sterba8f881e82019-03-20 11:33:10 +01004123 leaf_data_end(l), data_copy_size);
Chris Mason74123bd2007-02-02 11:05:29 -05004124
Jeff Mahoney0b246af2016-06-22 18:54:23 -04004125 rt_data_off = BTRFS_LEAF_DATA_SIZE(fs_info) - btrfs_item_end_nr(l, mid);
Chris Mason5f39d392007-10-15 16:14:19 -04004126
David Sterbac82f8232019-08-09 17:48:21 +02004127 btrfs_init_map_token(&token, right);
Chris Mason5f39d392007-10-15 16:14:19 -04004128 for (i = 0; i < nritems; i++) {
Ross Kirkdd3cc162013-09-16 15:58:09 +01004129 struct btrfs_item *item = btrfs_item_nr(i);
Chris Masondb945352007-10-15 16:15:53 -04004130 u32 ioff;
4131
David Sterbacc4c13d2020-04-29 02:15:56 +02004132 ioff = btrfs_token_item_offset(&token, item);
4133 btrfs_set_token_item_offset(&token, item, ioff + rt_data_off);
Chris Mason0783fcf2007-03-12 20:12:07 -04004134 }
Chris Mason74123bd2007-02-02 11:05:29 -05004135
Chris Mason5f39d392007-10-15 16:14:19 -04004136 btrfs_set_header_nritems(l, mid);
Chris Mason5f39d392007-10-15 16:14:19 -04004137 btrfs_item_key(right, &disk_key, 0);
David Sterba6ad3cf62019-03-20 14:32:45 +01004138 insert_ptr(trans, path, &disk_key, right->start, path->slots[1] + 1, 1);
Chris Mason5f39d392007-10-15 16:14:19 -04004139
4140 btrfs_mark_buffer_dirty(right);
4141 btrfs_mark_buffer_dirty(l);
Chris Masoneb60cea2007-02-02 09:18:22 -05004142 BUG_ON(path->slots[0] != slot);
Chris Mason5f39d392007-10-15 16:14:19 -04004143
Chris Masonbe0e5c02007-01-26 15:51:26 -05004144 if (mid <= slot) {
Chris Mason925baed2008-06-25 16:01:30 -04004145 btrfs_tree_unlock(path->nodes[0]);
Chris Mason5f39d392007-10-15 16:14:19 -04004146 free_extent_buffer(path->nodes[0]);
4147 path->nodes[0] = right;
Chris Masonbe0e5c02007-01-26 15:51:26 -05004148 path->slots[0] -= mid;
4149 path->slots[1] += 1;
Chris Mason925baed2008-06-25 16:01:30 -04004150 } else {
4151 btrfs_tree_unlock(right);
Chris Mason5f39d392007-10-15 16:14:19 -04004152 free_extent_buffer(right);
Chris Mason925baed2008-06-25 16:01:30 -04004153 }
Chris Mason5f39d392007-10-15 16:14:19 -04004154
Chris Masoneb60cea2007-02-02 09:18:22 -05004155 BUG_ON(path->slots[0] < 0);
Chris Mason44871b12009-03-13 10:04:31 -04004156}
4157
4158/*
Chris Mason99d8f832010-07-07 10:51:48 -04004159 * double splits happen when we need to insert a big item in the middle
4160 * of a leaf. A double split can leave us with 3 mostly empty leaves:
4161 * leaf: [ slots 0 - N] [ our target ] [ N + 1 - total in leaf ]
4162 * A B C
4163 *
4164 * We avoid this by trying to push the items on either side of our target
4165 * into the adjacent leaves. If all goes well we can avoid the double split
4166 * completely.
4167 */
4168static noinline int push_for_double_split(struct btrfs_trans_handle *trans,
4169 struct btrfs_root *root,
4170 struct btrfs_path *path,
4171 int data_size)
4172{
4173 int ret;
4174 int progress = 0;
4175 int slot;
4176 u32 nritems;
Filipe David Borba Manana5a4267c2013-11-25 03:20:46 +00004177 int space_needed = data_size;
Chris Mason99d8f832010-07-07 10:51:48 -04004178
4179 slot = path->slots[0];
Filipe David Borba Manana5a4267c2013-11-25 03:20:46 +00004180 if (slot < btrfs_header_nritems(path->nodes[0]))
David Sterbae902baa2019-03-20 14:36:46 +01004181 space_needed -= btrfs_leaf_free_space(path->nodes[0]);
Chris Mason99d8f832010-07-07 10:51:48 -04004182
4183 /*
4184 * try to push all the items after our slot into the
4185 * right leaf
4186 */
Filipe David Borba Manana5a4267c2013-11-25 03:20:46 +00004187 ret = push_leaf_right(trans, root, path, 1, space_needed, 0, slot);
Chris Mason99d8f832010-07-07 10:51:48 -04004188 if (ret < 0)
4189 return ret;
4190
4191 if (ret == 0)
4192 progress++;
4193
4194 nritems = btrfs_header_nritems(path->nodes[0]);
4195 /*
4196 * our goal is to get our slot at the start or end of a leaf. If
4197 * we've done so we're done
4198 */
4199 if (path->slots[0] == 0 || path->slots[0] == nritems)
4200 return 0;
4201
David Sterbae902baa2019-03-20 14:36:46 +01004202 if (btrfs_leaf_free_space(path->nodes[0]) >= data_size)
Chris Mason99d8f832010-07-07 10:51:48 -04004203 return 0;
4204
4205 /* try to push all the items before our slot into the next leaf */
4206 slot = path->slots[0];
Filipe Manana263d3992017-02-17 18:43:57 +00004207 space_needed = data_size;
4208 if (slot > 0)
David Sterbae902baa2019-03-20 14:36:46 +01004209 space_needed -= btrfs_leaf_free_space(path->nodes[0]);
Filipe David Borba Manana5a4267c2013-11-25 03:20:46 +00004210 ret = push_leaf_left(trans, root, path, 1, space_needed, 0, slot);
Chris Mason99d8f832010-07-07 10:51:48 -04004211 if (ret < 0)
4212 return ret;
4213
4214 if (ret == 0)
4215 progress++;
4216
4217 if (progress)
4218 return 0;
4219 return 1;
4220}
4221
4222/*
Chris Mason44871b12009-03-13 10:04:31 -04004223 * split the path's leaf in two, making sure there is at least data_size
4224 * available for the resulting leaf level of the path.
4225 *
4226 * returns 0 if all went well and < 0 on failure.
4227 */
4228static noinline int split_leaf(struct btrfs_trans_handle *trans,
4229 struct btrfs_root *root,
Omar Sandoval310712b2017-01-17 23:24:37 -08004230 const struct btrfs_key *ins_key,
Chris Mason44871b12009-03-13 10:04:31 -04004231 struct btrfs_path *path, int data_size,
4232 int extend)
4233{
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004234 struct btrfs_disk_key disk_key;
Chris Mason44871b12009-03-13 10:04:31 -04004235 struct extent_buffer *l;
4236 u32 nritems;
4237 int mid;
4238 int slot;
4239 struct extent_buffer *right;
Daniel Dresslerb7a03652014-11-12 13:43:09 +09004240 struct btrfs_fs_info *fs_info = root->fs_info;
Chris Mason44871b12009-03-13 10:04:31 -04004241 int ret = 0;
4242 int wret;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004243 int split;
Chris Mason44871b12009-03-13 10:04:31 -04004244 int num_doubles = 0;
Chris Mason99d8f832010-07-07 10:51:48 -04004245 int tried_avoid_double = 0;
Chris Mason44871b12009-03-13 10:04:31 -04004246
Yan, Zhenga5719522009-09-24 09:17:31 -04004247 l = path->nodes[0];
4248 slot = path->slots[0];
4249 if (extend && data_size + btrfs_item_size_nr(l, slot) +
Jeff Mahoney0b246af2016-06-22 18:54:23 -04004250 sizeof(struct btrfs_item) > BTRFS_LEAF_DATA_SIZE(fs_info))
Yan, Zhenga5719522009-09-24 09:17:31 -04004251 return -EOVERFLOW;
4252
Chris Mason44871b12009-03-13 10:04:31 -04004253 /* first try to make some room by pushing left and right */
Liu Bo33157e02013-05-22 12:07:06 +00004254 if (data_size && path->nodes[1]) {
Filipe David Borba Manana5a4267c2013-11-25 03:20:46 +00004255 int space_needed = data_size;
4256
4257 if (slot < btrfs_header_nritems(l))
David Sterbae902baa2019-03-20 14:36:46 +01004258 space_needed -= btrfs_leaf_free_space(l);
Filipe David Borba Manana5a4267c2013-11-25 03:20:46 +00004259
4260 wret = push_leaf_right(trans, root, path, space_needed,
4261 space_needed, 0, 0);
Chris Mason44871b12009-03-13 10:04:31 -04004262 if (wret < 0)
4263 return wret;
4264 if (wret) {
Filipe Manana263d3992017-02-17 18:43:57 +00004265 space_needed = data_size;
4266 if (slot > 0)
David Sterbae902baa2019-03-20 14:36:46 +01004267 space_needed -= btrfs_leaf_free_space(l);
Filipe David Borba Manana5a4267c2013-11-25 03:20:46 +00004268 wret = push_leaf_left(trans, root, path, space_needed,
4269 space_needed, 0, (u32)-1);
Chris Mason44871b12009-03-13 10:04:31 -04004270 if (wret < 0)
4271 return wret;
4272 }
4273 l = path->nodes[0];
4274
4275 /* did the pushes work? */
David Sterbae902baa2019-03-20 14:36:46 +01004276 if (btrfs_leaf_free_space(l) >= data_size)
Chris Mason44871b12009-03-13 10:04:31 -04004277 return 0;
4278 }
4279
4280 if (!path->nodes[1]) {
Liu Bofdd99c72013-05-22 12:06:51 +00004281 ret = insert_new_root(trans, root, path, 1);
Chris Mason44871b12009-03-13 10:04:31 -04004282 if (ret)
4283 return ret;
4284 }
4285again:
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004286 split = 1;
Chris Mason44871b12009-03-13 10:04:31 -04004287 l = path->nodes[0];
4288 slot = path->slots[0];
4289 nritems = btrfs_header_nritems(l);
4290 mid = (nritems + 1) / 2;
4291
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004292 if (mid <= slot) {
4293 if (nritems == 1 ||
4294 leaf_space_used(l, mid, nritems - mid) + data_size >
Jeff Mahoney0b246af2016-06-22 18:54:23 -04004295 BTRFS_LEAF_DATA_SIZE(fs_info)) {
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004296 if (slot >= nritems) {
4297 split = 0;
4298 } else {
4299 mid = slot;
4300 if (mid != nritems &&
4301 leaf_space_used(l, mid, nritems - mid) +
Jeff Mahoney0b246af2016-06-22 18:54:23 -04004302 data_size > BTRFS_LEAF_DATA_SIZE(fs_info)) {
Chris Mason99d8f832010-07-07 10:51:48 -04004303 if (data_size && !tried_avoid_double)
4304 goto push_for_double;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004305 split = 2;
4306 }
4307 }
4308 }
4309 } else {
4310 if (leaf_space_used(l, 0, mid) + data_size >
Jeff Mahoney0b246af2016-06-22 18:54:23 -04004311 BTRFS_LEAF_DATA_SIZE(fs_info)) {
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004312 if (!extend && data_size && slot == 0) {
4313 split = 0;
4314 } else if ((extend || !data_size) && slot == 0) {
4315 mid = 1;
4316 } else {
4317 mid = slot;
4318 if (mid != nritems &&
4319 leaf_space_used(l, mid, nritems - mid) +
Jeff Mahoney0b246af2016-06-22 18:54:23 -04004320 data_size > BTRFS_LEAF_DATA_SIZE(fs_info)) {
Chris Mason99d8f832010-07-07 10:51:48 -04004321 if (data_size && !tried_avoid_double)
4322 goto push_for_double;
Dulshani Gunawardhana67871252013-10-31 10:33:04 +05304323 split = 2;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004324 }
4325 }
4326 }
4327 }
4328
4329 if (split == 0)
4330 btrfs_cpu_key_to_disk(&disk_key, ins_key);
4331 else
4332 btrfs_item_key(l, &disk_key, mid);
4333
Josef Bacikca9d4732020-08-20 11:46:08 -04004334 /*
4335 * We have to about BTRFS_NESTING_NEW_ROOT here if we've done a double
4336 * split, because we're only allowed to have MAX_LOCKDEP_SUBCLASSES
4337 * subclasses, which is 8 at the time of this patch, and we've maxed it
4338 * out. In the future we could add a
4339 * BTRFS_NESTING_SPLIT_THE_SPLITTENING if we need to, but for now just
4340 * use BTRFS_NESTING_NEW_ROOT.
4341 */
Filipe Mananaa6279472019-01-25 11:48:51 +00004342 right = alloc_tree_block_no_bg_flush(trans, root, 0, &disk_key, 0,
Josef Bacikca9d4732020-08-20 11:46:08 -04004343 l->start, 0, num_doubles ?
4344 BTRFS_NESTING_NEW_ROOT :
4345 BTRFS_NESTING_SPLIT);
Yan, Zhengf0486c62010-05-16 10:46:25 -04004346 if (IS_ERR(right))
Chris Mason44871b12009-03-13 10:04:31 -04004347 return PTR_ERR(right);
Yan, Zhengf0486c62010-05-16 10:46:25 -04004348
Jeff Mahoney0b246af2016-06-22 18:54:23 -04004349 root_add_used(root, fs_info->nodesize);
Chris Mason44871b12009-03-13 10:04:31 -04004350
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004351 if (split == 0) {
4352 if (mid <= slot) {
4353 btrfs_set_header_nritems(right, 0);
David Sterba6ad3cf62019-03-20 14:32:45 +01004354 insert_ptr(trans, path, &disk_key,
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04004355 right->start, path->slots[1] + 1, 1);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004356 btrfs_tree_unlock(path->nodes[0]);
4357 free_extent_buffer(path->nodes[0]);
4358 path->nodes[0] = right;
4359 path->slots[0] = 0;
4360 path->slots[1] += 1;
4361 } else {
4362 btrfs_set_header_nritems(right, 0);
David Sterba6ad3cf62019-03-20 14:32:45 +01004363 insert_ptr(trans, path, &disk_key,
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04004364 right->start, path->slots[1], 1);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004365 btrfs_tree_unlock(path->nodes[0]);
4366 free_extent_buffer(path->nodes[0]);
4367 path->nodes[0] = right;
4368 path->slots[0] = 0;
Jeff Mahoney143bede2012-03-01 14:56:26 +01004369 if (path->slots[1] == 0)
Nikolay Borisovb167fa92018-06-20 15:48:47 +03004370 fixup_low_keys(path, &disk_key, 1);
Chris Mason44871b12009-03-13 10:04:31 -04004371 }
Liu Bo196e0242016-09-07 14:48:28 -07004372 /*
4373 * We create a new leaf 'right' for the required ins_len and
4374 * we'll do btrfs_mark_buffer_dirty() on this leaf after copying
4375 * the content of ins_len to 'right'.
4376 */
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004377 return ret;
Chris Mason44871b12009-03-13 10:04:31 -04004378 }
4379
David Sterba94f94ad2019-03-20 14:42:33 +01004380 copy_for_split(trans, path, l, right, slot, mid, nritems);
Chris Mason44871b12009-03-13 10:04:31 -04004381
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004382 if (split == 2) {
Chris Masoncc0c5532007-10-25 15:42:57 -04004383 BUG_ON(num_doubles != 0);
4384 num_doubles++;
4385 goto again;
Chris Mason3326d1b2007-10-15 16:18:25 -04004386 }
Chris Mason44871b12009-03-13 10:04:31 -04004387
Jeff Mahoney143bede2012-03-01 14:56:26 +01004388 return 0;
Chris Mason99d8f832010-07-07 10:51:48 -04004389
4390push_for_double:
4391 push_for_double_split(trans, root, path, data_size);
4392 tried_avoid_double = 1;
David Sterbae902baa2019-03-20 14:36:46 +01004393 if (btrfs_leaf_free_space(path->nodes[0]) >= data_size)
Chris Mason99d8f832010-07-07 10:51:48 -04004394 return 0;
4395 goto again;
Chris Masonbe0e5c02007-01-26 15:51:26 -05004396}
4397
Yan, Zhengad48fd752009-11-12 09:33:58 +00004398static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans,
4399 struct btrfs_root *root,
4400 struct btrfs_path *path, int ins_len)
Chris Mason459931e2008-12-10 09:10:46 -05004401{
Yan, Zhengad48fd752009-11-12 09:33:58 +00004402 struct btrfs_key key;
Chris Mason459931e2008-12-10 09:10:46 -05004403 struct extent_buffer *leaf;
Yan, Zhengad48fd752009-11-12 09:33:58 +00004404 struct btrfs_file_extent_item *fi;
4405 u64 extent_len = 0;
4406 u32 item_size;
4407 int ret;
Chris Mason459931e2008-12-10 09:10:46 -05004408
4409 leaf = path->nodes[0];
Yan, Zhengad48fd752009-11-12 09:33:58 +00004410 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
4411
4412 BUG_ON(key.type != BTRFS_EXTENT_DATA_KEY &&
4413 key.type != BTRFS_EXTENT_CSUM_KEY);
4414
David Sterbae902baa2019-03-20 14:36:46 +01004415 if (btrfs_leaf_free_space(leaf) >= ins_len)
Yan, Zhengad48fd752009-11-12 09:33:58 +00004416 return 0;
Chris Mason459931e2008-12-10 09:10:46 -05004417
4418 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
Yan, Zhengad48fd752009-11-12 09:33:58 +00004419 if (key.type == BTRFS_EXTENT_DATA_KEY) {
4420 fi = btrfs_item_ptr(leaf, path->slots[0],
4421 struct btrfs_file_extent_item);
4422 extent_len = btrfs_file_extent_num_bytes(leaf, fi);
4423 }
David Sterbab3b4aa72011-04-21 01:20:15 +02004424 btrfs_release_path(path);
Chris Mason459931e2008-12-10 09:10:46 -05004425
Chris Mason459931e2008-12-10 09:10:46 -05004426 path->keep_locks = 1;
Yan, Zhengad48fd752009-11-12 09:33:58 +00004427 path->search_for_split = 1;
4428 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
Chris Mason459931e2008-12-10 09:10:46 -05004429 path->search_for_split = 0;
Filipe Mananaa8df6fe2015-01-20 12:40:53 +00004430 if (ret > 0)
4431 ret = -EAGAIN;
Yan, Zhengad48fd752009-11-12 09:33:58 +00004432 if (ret < 0)
4433 goto err;
Chris Mason459931e2008-12-10 09:10:46 -05004434
Yan, Zhengad48fd752009-11-12 09:33:58 +00004435 ret = -EAGAIN;
4436 leaf = path->nodes[0];
Filipe Mananaa8df6fe2015-01-20 12:40:53 +00004437 /* if our item isn't there, return now */
4438 if (item_size != btrfs_item_size_nr(leaf, path->slots[0]))
Yan, Zhengad48fd752009-11-12 09:33:58 +00004439 goto err;
4440
Chris Mason109f6ae2010-04-02 09:20:18 -04004441 /* the leaf has changed, it now has room. return now */
David Sterbae902baa2019-03-20 14:36:46 +01004442 if (btrfs_leaf_free_space(path->nodes[0]) >= ins_len)
Chris Mason109f6ae2010-04-02 09:20:18 -04004443 goto err;
4444
Yan, Zhengad48fd752009-11-12 09:33:58 +00004445 if (key.type == BTRFS_EXTENT_DATA_KEY) {
4446 fi = btrfs_item_ptr(leaf, path->slots[0],
4447 struct btrfs_file_extent_item);
4448 if (extent_len != btrfs_file_extent_num_bytes(leaf, fi))
4449 goto err;
Chris Mason459931e2008-12-10 09:10:46 -05004450 }
4451
Chris Masonb9473432009-03-13 11:00:37 -04004452 btrfs_set_path_blocking(path);
Yan, Zhengad48fd752009-11-12 09:33:58 +00004453 ret = split_leaf(trans, root, &key, path, ins_len, 1);
Yan, Zhengf0486c62010-05-16 10:46:25 -04004454 if (ret)
4455 goto err;
Chris Mason459931e2008-12-10 09:10:46 -05004456
Yan, Zhengad48fd752009-11-12 09:33:58 +00004457 path->keep_locks = 0;
Chris Masonb9473432009-03-13 11:00:37 -04004458 btrfs_unlock_up_safe(path, 1);
Yan, Zhengad48fd752009-11-12 09:33:58 +00004459 return 0;
4460err:
4461 path->keep_locks = 0;
4462 return ret;
4463}
4464
David Sterba25263cd2019-03-20 14:44:57 +01004465static noinline int split_item(struct btrfs_path *path,
Omar Sandoval310712b2017-01-17 23:24:37 -08004466 const struct btrfs_key *new_key,
Yan, Zhengad48fd752009-11-12 09:33:58 +00004467 unsigned long split_offset)
4468{
4469 struct extent_buffer *leaf;
4470 struct btrfs_item *item;
4471 struct btrfs_item *new_item;
4472 int slot;
4473 char *buf;
4474 u32 nritems;
4475 u32 item_size;
4476 u32 orig_offset;
4477 struct btrfs_disk_key disk_key;
4478
Chris Masonb9473432009-03-13 11:00:37 -04004479 leaf = path->nodes[0];
David Sterbae902baa2019-03-20 14:36:46 +01004480 BUG_ON(btrfs_leaf_free_space(leaf) < sizeof(struct btrfs_item));
Chris Masonb9473432009-03-13 11:00:37 -04004481
Chris Masonb4ce94d2009-02-04 09:25:08 -05004482 btrfs_set_path_blocking(path);
4483
Ross Kirkdd3cc162013-09-16 15:58:09 +01004484 item = btrfs_item_nr(path->slots[0]);
Chris Mason459931e2008-12-10 09:10:46 -05004485 orig_offset = btrfs_item_offset(leaf, item);
4486 item_size = btrfs_item_size(leaf, item);
4487
Chris Mason459931e2008-12-10 09:10:46 -05004488 buf = kmalloc(item_size, GFP_NOFS);
Yan, Zhengad48fd752009-11-12 09:33:58 +00004489 if (!buf)
4490 return -ENOMEM;
4491
Chris Mason459931e2008-12-10 09:10:46 -05004492 read_extent_buffer(leaf, buf, btrfs_item_ptr_offset(leaf,
4493 path->slots[0]), item_size);
Yan, Zhengad48fd752009-11-12 09:33:58 +00004494
Chris Mason459931e2008-12-10 09:10:46 -05004495 slot = path->slots[0] + 1;
Chris Mason459931e2008-12-10 09:10:46 -05004496 nritems = btrfs_header_nritems(leaf);
Chris Mason459931e2008-12-10 09:10:46 -05004497 if (slot != nritems) {
4498 /* shift the items */
4499 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + 1),
Yan, Zhengad48fd752009-11-12 09:33:58 +00004500 btrfs_item_nr_offset(slot),
4501 (nritems - slot) * sizeof(struct btrfs_item));
Chris Mason459931e2008-12-10 09:10:46 -05004502 }
4503
4504 btrfs_cpu_key_to_disk(&disk_key, new_key);
4505 btrfs_set_item_key(leaf, &disk_key, slot);
4506
Ross Kirkdd3cc162013-09-16 15:58:09 +01004507 new_item = btrfs_item_nr(slot);
Chris Mason459931e2008-12-10 09:10:46 -05004508
4509 btrfs_set_item_offset(leaf, new_item, orig_offset);
4510 btrfs_set_item_size(leaf, new_item, item_size - split_offset);
4511
4512 btrfs_set_item_offset(leaf, item,
4513 orig_offset + item_size - split_offset);
4514 btrfs_set_item_size(leaf, item, split_offset);
4515
4516 btrfs_set_header_nritems(leaf, nritems + 1);
4517
4518 /* write the data for the start of the original item */
4519 write_extent_buffer(leaf, buf,
4520 btrfs_item_ptr_offset(leaf, path->slots[0]),
4521 split_offset);
4522
4523 /* write the data for the new item */
4524 write_extent_buffer(leaf, buf + split_offset,
4525 btrfs_item_ptr_offset(leaf, slot),
4526 item_size - split_offset);
4527 btrfs_mark_buffer_dirty(leaf);
4528
David Sterbae902baa2019-03-20 14:36:46 +01004529 BUG_ON(btrfs_leaf_free_space(leaf) < 0);
Chris Mason459931e2008-12-10 09:10:46 -05004530 kfree(buf);
Yan, Zhengad48fd752009-11-12 09:33:58 +00004531 return 0;
4532}
4533
4534/*
4535 * This function splits a single item into two items,
4536 * giving 'new_key' to the new item and splitting the
4537 * old one at split_offset (from the start of the item).
4538 *
4539 * The path may be released by this operation. After
4540 * the split, the path is pointing to the old item. The
4541 * new item is going to be in the same node as the old one.
4542 *
4543 * Note, the item being split must be smaller enough to live alone on
4544 * a tree block with room for one extra struct btrfs_item
4545 *
4546 * This allows us to split the item in place, keeping a lock on the
4547 * leaf the entire time.
4548 */
4549int btrfs_split_item(struct btrfs_trans_handle *trans,
4550 struct btrfs_root *root,
4551 struct btrfs_path *path,
Omar Sandoval310712b2017-01-17 23:24:37 -08004552 const struct btrfs_key *new_key,
Yan, Zhengad48fd752009-11-12 09:33:58 +00004553 unsigned long split_offset)
4554{
4555 int ret;
4556 ret = setup_leaf_for_split(trans, root, path,
4557 sizeof(struct btrfs_item));
4558 if (ret)
4559 return ret;
4560
David Sterba25263cd2019-03-20 14:44:57 +01004561 ret = split_item(path, new_key, split_offset);
Chris Mason459931e2008-12-10 09:10:46 -05004562 return ret;
4563}
4564
4565/*
Yan, Zhengad48fd752009-11-12 09:33:58 +00004566 * This function duplicate a item, giving 'new_key' to the new item.
4567 * It guarantees both items live in the same tree leaf and the new item
4568 * is contiguous with the original item.
4569 *
4570 * This allows us to split file extent in place, keeping a lock on the
4571 * leaf the entire time.
4572 */
4573int btrfs_duplicate_item(struct btrfs_trans_handle *trans,
4574 struct btrfs_root *root,
4575 struct btrfs_path *path,
Omar Sandoval310712b2017-01-17 23:24:37 -08004576 const struct btrfs_key *new_key)
Yan, Zhengad48fd752009-11-12 09:33:58 +00004577{
4578 struct extent_buffer *leaf;
4579 int ret;
4580 u32 item_size;
4581
4582 leaf = path->nodes[0];
4583 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
4584 ret = setup_leaf_for_split(trans, root, path,
4585 item_size + sizeof(struct btrfs_item));
4586 if (ret)
4587 return ret;
4588
4589 path->slots[0]++;
Nikolay Borisovfc0d82e2020-09-01 17:39:59 +03004590 setup_items_for_insert(root, path, new_key, &item_size, 1);
Yan, Zhengad48fd752009-11-12 09:33:58 +00004591 leaf = path->nodes[0];
4592 memcpy_extent_buffer(leaf,
4593 btrfs_item_ptr_offset(leaf, path->slots[0]),
4594 btrfs_item_ptr_offset(leaf, path->slots[0] - 1),
4595 item_size);
4596 return 0;
4597}
4598
4599/*
Chris Masond352ac62008-09-29 15:18:18 -04004600 * make the item pointed to by the path smaller. new_size indicates
4601 * how small to make it, and from_end tells us if we just chop bytes
4602 * off the end of the item or if we shift the item to chop bytes off
4603 * the front.
4604 */
David Sterba78ac4f92019-03-20 14:49:12 +01004605void btrfs_truncate_item(struct btrfs_path *path, u32 new_size, int from_end)
Chris Masonb18c6682007-04-17 13:26:50 -04004606{
Chris Masonb18c6682007-04-17 13:26:50 -04004607 int slot;
Chris Mason5f39d392007-10-15 16:14:19 -04004608 struct extent_buffer *leaf;
4609 struct btrfs_item *item;
Chris Masonb18c6682007-04-17 13:26:50 -04004610 u32 nritems;
4611 unsigned int data_end;
4612 unsigned int old_data_start;
4613 unsigned int old_size;
4614 unsigned int size_diff;
4615 int i;
Chris Masoncfed81a2012-03-03 07:40:03 -05004616 struct btrfs_map_token token;
4617
Chris Mason5f39d392007-10-15 16:14:19 -04004618 leaf = path->nodes[0];
Chris Mason179e29e2007-11-01 11:28:41 -04004619 slot = path->slots[0];
4620
4621 old_size = btrfs_item_size_nr(leaf, slot);
4622 if (old_size == new_size)
Jeff Mahoney143bede2012-03-01 14:56:26 +01004623 return;
Chris Masonb18c6682007-04-17 13:26:50 -04004624
Chris Mason5f39d392007-10-15 16:14:19 -04004625 nritems = btrfs_header_nritems(leaf);
David Sterba8f881e82019-03-20 11:33:10 +01004626 data_end = leaf_data_end(leaf);
Chris Masonb18c6682007-04-17 13:26:50 -04004627
Chris Mason5f39d392007-10-15 16:14:19 -04004628 old_data_start = btrfs_item_offset_nr(leaf, slot);
Chris Mason179e29e2007-11-01 11:28:41 -04004629
Chris Masonb18c6682007-04-17 13:26:50 -04004630 size_diff = old_size - new_size;
4631
4632 BUG_ON(slot < 0);
4633 BUG_ON(slot >= nritems);
4634
4635 /*
4636 * item0..itemN ... dataN.offset..dataN.size .. data0.size
4637 */
4638 /* first correct the data pointers */
David Sterbac82f8232019-08-09 17:48:21 +02004639 btrfs_init_map_token(&token, leaf);
Chris Masonb18c6682007-04-17 13:26:50 -04004640 for (i = slot; i < nritems; i++) {
Chris Mason5f39d392007-10-15 16:14:19 -04004641 u32 ioff;
Ross Kirkdd3cc162013-09-16 15:58:09 +01004642 item = btrfs_item_nr(i);
Chris Masondb945352007-10-15 16:15:53 -04004643
David Sterbacc4c13d2020-04-29 02:15:56 +02004644 ioff = btrfs_token_item_offset(&token, item);
4645 btrfs_set_token_item_offset(&token, item, ioff + size_diff);
Chris Masonb18c6682007-04-17 13:26:50 -04004646 }
Chris Masondb945352007-10-15 16:15:53 -04004647
Chris Masonb18c6682007-04-17 13:26:50 -04004648 /* shift the data */
Chris Mason179e29e2007-11-01 11:28:41 -04004649 if (from_end) {
Nikolay Borisov3d9ec8c2017-05-29 09:43:43 +03004650 memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET +
4651 data_end + size_diff, BTRFS_LEAF_DATA_OFFSET +
Chris Mason179e29e2007-11-01 11:28:41 -04004652 data_end, old_data_start + new_size - data_end);
4653 } else {
4654 struct btrfs_disk_key disk_key;
4655 u64 offset;
4656
4657 btrfs_item_key(leaf, &disk_key, slot);
4658
4659 if (btrfs_disk_key_type(&disk_key) == BTRFS_EXTENT_DATA_KEY) {
4660 unsigned long ptr;
4661 struct btrfs_file_extent_item *fi;
4662
4663 fi = btrfs_item_ptr(leaf, slot,
4664 struct btrfs_file_extent_item);
4665 fi = (struct btrfs_file_extent_item *)(
4666 (unsigned long)fi - size_diff);
4667
4668 if (btrfs_file_extent_type(leaf, fi) ==
4669 BTRFS_FILE_EXTENT_INLINE) {
4670 ptr = btrfs_item_ptr_offset(leaf, slot);
4671 memmove_extent_buffer(leaf, ptr,
Chris Masond3977122009-01-05 21:25:51 -05004672 (unsigned long)fi,
David Sterba7ec20af2014-07-24 17:34:58 +02004673 BTRFS_FILE_EXTENT_INLINE_DATA_START);
Chris Mason179e29e2007-11-01 11:28:41 -04004674 }
4675 }
4676
Nikolay Borisov3d9ec8c2017-05-29 09:43:43 +03004677 memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET +
4678 data_end + size_diff, BTRFS_LEAF_DATA_OFFSET +
Chris Mason179e29e2007-11-01 11:28:41 -04004679 data_end, old_data_start - data_end);
4680
4681 offset = btrfs_disk_key_offset(&disk_key);
4682 btrfs_set_disk_key_offset(&disk_key, offset + size_diff);
4683 btrfs_set_item_key(leaf, &disk_key, slot);
4684 if (slot == 0)
Nikolay Borisovb167fa92018-06-20 15:48:47 +03004685 fixup_low_keys(path, &disk_key, 1);
Chris Mason179e29e2007-11-01 11:28:41 -04004686 }
Chris Mason5f39d392007-10-15 16:14:19 -04004687
Ross Kirkdd3cc162013-09-16 15:58:09 +01004688 item = btrfs_item_nr(slot);
Chris Mason5f39d392007-10-15 16:14:19 -04004689 btrfs_set_item_size(leaf, item, new_size);
4690 btrfs_mark_buffer_dirty(leaf);
Chris Masonb18c6682007-04-17 13:26:50 -04004691
David Sterbae902baa2019-03-20 14:36:46 +01004692 if (btrfs_leaf_free_space(leaf) < 0) {
David Sterbaa4f78752017-06-29 18:37:49 +02004693 btrfs_print_leaf(leaf);
Chris Masonb18c6682007-04-17 13:26:50 -04004694 BUG();
Chris Mason5f39d392007-10-15 16:14:19 -04004695 }
Chris Masonb18c6682007-04-17 13:26:50 -04004696}
4697
Chris Masond352ac62008-09-29 15:18:18 -04004698/*
Stefan Behrens8f69dbd2013-05-07 10:23:30 +00004699 * make the item pointed to by the path bigger, data_size is the added size.
Chris Masond352ac62008-09-29 15:18:18 -04004700 */
David Sterbac71dd882019-03-20 14:51:10 +01004701void btrfs_extend_item(struct btrfs_path *path, u32 data_size)
Chris Mason6567e832007-04-16 09:22:45 -04004702{
Chris Mason6567e832007-04-16 09:22:45 -04004703 int slot;
Chris Mason5f39d392007-10-15 16:14:19 -04004704 struct extent_buffer *leaf;
4705 struct btrfs_item *item;
Chris Mason6567e832007-04-16 09:22:45 -04004706 u32 nritems;
4707 unsigned int data_end;
4708 unsigned int old_data;
4709 unsigned int old_size;
4710 int i;
Chris Masoncfed81a2012-03-03 07:40:03 -05004711 struct btrfs_map_token token;
4712
Chris Mason5f39d392007-10-15 16:14:19 -04004713 leaf = path->nodes[0];
Chris Mason6567e832007-04-16 09:22:45 -04004714
Chris Mason5f39d392007-10-15 16:14:19 -04004715 nritems = btrfs_header_nritems(leaf);
David Sterba8f881e82019-03-20 11:33:10 +01004716 data_end = leaf_data_end(leaf);
Chris Mason6567e832007-04-16 09:22:45 -04004717
David Sterbae902baa2019-03-20 14:36:46 +01004718 if (btrfs_leaf_free_space(leaf) < data_size) {
David Sterbaa4f78752017-06-29 18:37:49 +02004719 btrfs_print_leaf(leaf);
Chris Mason6567e832007-04-16 09:22:45 -04004720 BUG();
Chris Mason5f39d392007-10-15 16:14:19 -04004721 }
Chris Mason6567e832007-04-16 09:22:45 -04004722 slot = path->slots[0];
Chris Mason5f39d392007-10-15 16:14:19 -04004723 old_data = btrfs_item_end_nr(leaf, slot);
Chris Mason6567e832007-04-16 09:22:45 -04004724
4725 BUG_ON(slot < 0);
Chris Mason3326d1b2007-10-15 16:18:25 -04004726 if (slot >= nritems) {
David Sterbaa4f78752017-06-29 18:37:49 +02004727 btrfs_print_leaf(leaf);
David Sterbac71dd882019-03-20 14:51:10 +01004728 btrfs_crit(leaf->fs_info, "slot %d too large, nritems %d",
Jeff Mahoney0b246af2016-06-22 18:54:23 -04004729 slot, nritems);
Arnd Bergmann290342f2019-03-25 14:02:25 +01004730 BUG();
Chris Mason3326d1b2007-10-15 16:18:25 -04004731 }
Chris Mason6567e832007-04-16 09:22:45 -04004732
4733 /*
4734 * item0..itemN ... dataN.offset..dataN.size .. data0.size
4735 */
4736 /* first correct the data pointers */
David Sterbac82f8232019-08-09 17:48:21 +02004737 btrfs_init_map_token(&token, leaf);
Chris Mason6567e832007-04-16 09:22:45 -04004738 for (i = slot; i < nritems; i++) {
Chris Mason5f39d392007-10-15 16:14:19 -04004739 u32 ioff;
Ross Kirkdd3cc162013-09-16 15:58:09 +01004740 item = btrfs_item_nr(i);
Chris Masondb945352007-10-15 16:15:53 -04004741
David Sterbacc4c13d2020-04-29 02:15:56 +02004742 ioff = btrfs_token_item_offset(&token, item);
4743 btrfs_set_token_item_offset(&token, item, ioff - data_size);
Chris Mason6567e832007-04-16 09:22:45 -04004744 }
Chris Mason5f39d392007-10-15 16:14:19 -04004745
Chris Mason6567e832007-04-16 09:22:45 -04004746 /* shift the data */
Nikolay Borisov3d9ec8c2017-05-29 09:43:43 +03004747 memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET +
4748 data_end - data_size, BTRFS_LEAF_DATA_OFFSET +
Chris Mason6567e832007-04-16 09:22:45 -04004749 data_end, old_data - data_end);
Chris Mason5f39d392007-10-15 16:14:19 -04004750
Chris Mason6567e832007-04-16 09:22:45 -04004751 data_end = old_data;
Chris Mason5f39d392007-10-15 16:14:19 -04004752 old_size = btrfs_item_size_nr(leaf, slot);
Ross Kirkdd3cc162013-09-16 15:58:09 +01004753 item = btrfs_item_nr(slot);
Chris Mason5f39d392007-10-15 16:14:19 -04004754 btrfs_set_item_size(leaf, item, old_size + data_size);
4755 btrfs_mark_buffer_dirty(leaf);
Chris Mason6567e832007-04-16 09:22:45 -04004756
David Sterbae902baa2019-03-20 14:36:46 +01004757 if (btrfs_leaf_free_space(leaf) < 0) {
David Sterbaa4f78752017-06-29 18:37:49 +02004758 btrfs_print_leaf(leaf);
Chris Mason6567e832007-04-16 09:22:45 -04004759 BUG();
Chris Mason5f39d392007-10-15 16:14:19 -04004760 }
Chris Mason6567e832007-04-16 09:22:45 -04004761}
4762
Nikolay Borisovda9ffb22020-09-01 17:40:00 +03004763/**
4764 * setup_items_for_insert - Helper called before inserting one or more items
4765 * to a leaf. Main purpose is to save stack depth by doing the bulk of the work
4766 * in a function that doesn't call btrfs_search_slot
4767 *
4768 * @root: root we are inserting items to
4769 * @path: points to the leaf/slot where we are going to insert new items
4770 * @cpu_key: array of keys for items to be inserted
4771 * @data_size: size of the body of each item we are going to insert
4772 * @nr: size of @cpu_key/@data_size arrays
Chris Mason74123bd2007-02-02 11:05:29 -05004773 */
Tsutomu Itohafe5fea2013-04-16 05:18:22 +00004774void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
Omar Sandoval310712b2017-01-17 23:24:37 -08004775 const struct btrfs_key *cpu_key, u32 *data_size,
Nikolay Borisovfc0d82e2020-09-01 17:39:59 +03004776 int nr)
Chris Masonbe0e5c02007-01-26 15:51:26 -05004777{
Jeff Mahoney0b246af2016-06-22 18:54:23 -04004778 struct btrfs_fs_info *fs_info = root->fs_info;
Chris Mason5f39d392007-10-15 16:14:19 -04004779 struct btrfs_item *item;
Chris Mason9c583092008-01-29 15:15:18 -05004780 int i;
Chris Mason7518a232007-03-12 12:01:18 -04004781 u32 nritems;
Chris Masonbe0e5c02007-01-26 15:51:26 -05004782 unsigned int data_end;
Chris Masone2fa7222007-03-12 16:22:34 -04004783 struct btrfs_disk_key disk_key;
Chris Mason44871b12009-03-13 10:04:31 -04004784 struct extent_buffer *leaf;
4785 int slot;
Chris Masoncfed81a2012-03-03 07:40:03 -05004786 struct btrfs_map_token token;
Nikolay Borisovfc0d82e2020-09-01 17:39:59 +03004787 u32 total_size;
4788 u32 total_data = 0;
4789
4790 for (i = 0; i < nr; i++)
4791 total_data += data_size[i];
4792 total_size = total_data + (nr * sizeof(struct btrfs_item));
Chris Masoncfed81a2012-03-03 07:40:03 -05004793
Filipe Manana24cdc842014-07-28 19:34:35 +01004794 if (path->slots[0] == 0) {
4795 btrfs_cpu_key_to_disk(&disk_key, cpu_key);
Nikolay Borisovb167fa92018-06-20 15:48:47 +03004796 fixup_low_keys(path, &disk_key, 1);
Filipe Manana24cdc842014-07-28 19:34:35 +01004797 }
4798 btrfs_unlock_up_safe(path, 1);
4799
Chris Mason5f39d392007-10-15 16:14:19 -04004800 leaf = path->nodes[0];
Chris Mason44871b12009-03-13 10:04:31 -04004801 slot = path->slots[0];
Chris Mason74123bd2007-02-02 11:05:29 -05004802
Chris Mason5f39d392007-10-15 16:14:19 -04004803 nritems = btrfs_header_nritems(leaf);
David Sterba8f881e82019-03-20 11:33:10 +01004804 data_end = leaf_data_end(leaf);
Chris Masoneb60cea2007-02-02 09:18:22 -05004805
David Sterbae902baa2019-03-20 14:36:46 +01004806 if (btrfs_leaf_free_space(leaf) < total_size) {
David Sterbaa4f78752017-06-29 18:37:49 +02004807 btrfs_print_leaf(leaf);
Jeff Mahoney0b246af2016-06-22 18:54:23 -04004808 btrfs_crit(fs_info, "not enough freespace need %u have %d",
David Sterbae902baa2019-03-20 14:36:46 +01004809 total_size, btrfs_leaf_free_space(leaf));
Chris Masonbe0e5c02007-01-26 15:51:26 -05004810 BUG();
Chris Masond4dbff92007-04-04 14:08:15 -04004811 }
Chris Mason5f39d392007-10-15 16:14:19 -04004812
David Sterbac82f8232019-08-09 17:48:21 +02004813 btrfs_init_map_token(&token, leaf);
Chris Masonbe0e5c02007-01-26 15:51:26 -05004814 if (slot != nritems) {
Chris Mason5f39d392007-10-15 16:14:19 -04004815 unsigned int old_data = btrfs_item_end_nr(leaf, slot);
Chris Masonbe0e5c02007-01-26 15:51:26 -05004816
Chris Mason5f39d392007-10-15 16:14:19 -04004817 if (old_data < data_end) {
David Sterbaa4f78752017-06-29 18:37:49 +02004818 btrfs_print_leaf(leaf);
Nikolay Borisov7269ddd2020-09-01 17:40:01 +03004819 btrfs_crit(fs_info,
4820 "item at slot %d with data offset %u beyond data end of leaf %u",
Jeff Mahoney5d163e02016-09-20 10:05:00 -04004821 slot, old_data, data_end);
Arnd Bergmann290342f2019-03-25 14:02:25 +01004822 BUG();
Chris Mason5f39d392007-10-15 16:14:19 -04004823 }
Chris Masonbe0e5c02007-01-26 15:51:26 -05004824 /*
4825 * item0..itemN ... dataN.offset..dataN.size .. data0.size
4826 */
4827 /* first correct the data pointers */
Chris Mason0783fcf2007-03-12 20:12:07 -04004828 for (i = slot; i < nritems; i++) {
Chris Mason5f39d392007-10-15 16:14:19 -04004829 u32 ioff;
Chris Masondb945352007-10-15 16:15:53 -04004830
Jeff Mahoney62e85572016-09-20 10:05:01 -04004831 item = btrfs_item_nr(i);
David Sterbacc4c13d2020-04-29 02:15:56 +02004832 ioff = btrfs_token_item_offset(&token, item);
4833 btrfs_set_token_item_offset(&token, item,
4834 ioff - total_data);
Chris Mason0783fcf2007-03-12 20:12:07 -04004835 }
Chris Masonbe0e5c02007-01-26 15:51:26 -05004836 /* shift the items */
Chris Mason9c583092008-01-29 15:15:18 -05004837 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr),
Chris Mason5f39d392007-10-15 16:14:19 -04004838 btrfs_item_nr_offset(slot),
Chris Masond6025572007-03-30 14:27:56 -04004839 (nritems - slot) * sizeof(struct btrfs_item));
Chris Masonbe0e5c02007-01-26 15:51:26 -05004840
4841 /* shift the data */
Nikolay Borisov3d9ec8c2017-05-29 09:43:43 +03004842 memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET +
4843 data_end - total_data, BTRFS_LEAF_DATA_OFFSET +
Chris Masond6025572007-03-30 14:27:56 -04004844 data_end, old_data - data_end);
Chris Masonbe0e5c02007-01-26 15:51:26 -05004845 data_end = old_data;
4846 }
Chris Mason5f39d392007-10-15 16:14:19 -04004847
Chris Mason62e27492007-03-15 12:56:47 -04004848 /* setup the item for the new data */
Chris Mason9c583092008-01-29 15:15:18 -05004849 for (i = 0; i < nr; i++) {
4850 btrfs_cpu_key_to_disk(&disk_key, cpu_key + i);
4851 btrfs_set_item_key(leaf, &disk_key, slot + i);
Ross Kirkdd3cc162013-09-16 15:58:09 +01004852 item = btrfs_item_nr(slot + i);
Chris Mason9c583092008-01-29 15:15:18 -05004853 data_end -= data_size[i];
Nikolay Borisovfc0716c2020-09-01 17:39:57 +03004854 btrfs_set_token_item_offset(&token, item, data_end);
David Sterbacc4c13d2020-04-29 02:15:56 +02004855 btrfs_set_token_item_size(&token, item, data_size[i]);
Chris Mason9c583092008-01-29 15:15:18 -05004856 }
Chris Mason44871b12009-03-13 10:04:31 -04004857
Chris Mason9c583092008-01-29 15:15:18 -05004858 btrfs_set_header_nritems(leaf, nritems + nr);
Chris Masonb9473432009-03-13 11:00:37 -04004859 btrfs_mark_buffer_dirty(leaf);
Chris Masonaa5d6be2007-02-28 16:35:06 -05004860
David Sterbae902baa2019-03-20 14:36:46 +01004861 if (btrfs_leaf_free_space(leaf) < 0) {
David Sterbaa4f78752017-06-29 18:37:49 +02004862 btrfs_print_leaf(leaf);
Chris Masonbe0e5c02007-01-26 15:51:26 -05004863 BUG();
Chris Mason5f39d392007-10-15 16:14:19 -04004864 }
Chris Mason44871b12009-03-13 10:04:31 -04004865}
4866
4867/*
4868 * Given a key and some data, insert items into the tree.
4869 * This does all the path init required, making room in the tree if needed.
4870 */
4871int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
4872 struct btrfs_root *root,
4873 struct btrfs_path *path,
Omar Sandoval310712b2017-01-17 23:24:37 -08004874 const struct btrfs_key *cpu_key, u32 *data_size,
Chris Mason44871b12009-03-13 10:04:31 -04004875 int nr)
4876{
Chris Mason44871b12009-03-13 10:04:31 -04004877 int ret = 0;
4878 int slot;
4879 int i;
4880 u32 total_size = 0;
4881 u32 total_data = 0;
4882
4883 for (i = 0; i < nr; i++)
4884 total_data += data_size[i];
4885
4886 total_size = total_data + (nr * sizeof(struct btrfs_item));
4887 ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1);
4888 if (ret == 0)
4889 return -EEXIST;
4890 if (ret < 0)
Jeff Mahoney143bede2012-03-01 14:56:26 +01004891 return ret;
Chris Mason44871b12009-03-13 10:04:31 -04004892
Chris Mason44871b12009-03-13 10:04:31 -04004893 slot = path->slots[0];
4894 BUG_ON(slot < 0);
4895
Nikolay Borisovfc0d82e2020-09-01 17:39:59 +03004896 setup_items_for_insert(root, path, cpu_key, data_size, nr);
Jeff Mahoney143bede2012-03-01 14:56:26 +01004897 return 0;
Chris Mason62e27492007-03-15 12:56:47 -04004898}
4899
4900/*
4901 * Given a key and some data, insert an item into the tree.
4902 * This does all the path init required, making room in the tree if needed.
4903 */
Omar Sandoval310712b2017-01-17 23:24:37 -08004904int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root *root,
4905 const struct btrfs_key *cpu_key, void *data,
4906 u32 data_size)
Chris Mason62e27492007-03-15 12:56:47 -04004907{
4908 int ret = 0;
Chris Mason2c90e5d2007-04-02 10:50:19 -04004909 struct btrfs_path *path;
Chris Mason5f39d392007-10-15 16:14:19 -04004910 struct extent_buffer *leaf;
4911 unsigned long ptr;
Chris Mason62e27492007-03-15 12:56:47 -04004912
Chris Mason2c90e5d2007-04-02 10:50:19 -04004913 path = btrfs_alloc_path();
Tsutomu Itohdb5b4932011-03-23 08:14:16 +00004914 if (!path)
4915 return -ENOMEM;
Chris Mason2c90e5d2007-04-02 10:50:19 -04004916 ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size);
Chris Mason62e27492007-03-15 12:56:47 -04004917 if (!ret) {
Chris Mason5f39d392007-10-15 16:14:19 -04004918 leaf = path->nodes[0];
4919 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
4920 write_extent_buffer(leaf, data, ptr, data_size);
4921 btrfs_mark_buffer_dirty(leaf);
Chris Mason62e27492007-03-15 12:56:47 -04004922 }
Chris Mason2c90e5d2007-04-02 10:50:19 -04004923 btrfs_free_path(path);
Chris Masonaa5d6be2007-02-28 16:35:06 -05004924 return ret;
Chris Masonbe0e5c02007-01-26 15:51:26 -05004925}
4926
Chris Mason74123bd2007-02-02 11:05:29 -05004927/*
Chris Mason5de08d72007-02-24 06:24:44 -05004928 * delete the pointer from a given node.
Chris Mason74123bd2007-02-02 11:05:29 -05004929 *
Chris Masond352ac62008-09-29 15:18:18 -04004930 * the tree should have been previously balanced so the deletion does not
4931 * empty a node.
Chris Mason74123bd2007-02-02 11:05:29 -05004932 */
Tsutomu Itohafe5fea2013-04-16 05:18:22 +00004933static void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
4934 int level, int slot)
Chris Masonbe0e5c02007-01-26 15:51:26 -05004935{
Chris Mason5f39d392007-10-15 16:14:19 -04004936 struct extent_buffer *parent = path->nodes[level];
Chris Mason7518a232007-03-12 12:01:18 -04004937 u32 nritems;
Jan Schmidtf3ea38d2012-05-26 11:45:21 +02004938 int ret;
Chris Masonbe0e5c02007-01-26 15:51:26 -05004939
Chris Mason5f39d392007-10-15 16:14:19 -04004940 nritems = btrfs_header_nritems(parent);
Chris Masond3977122009-01-05 21:25:51 -05004941 if (slot != nritems - 1) {
David Sterbabf1d3422018-03-05 15:47:39 +01004942 if (level) {
4943 ret = tree_mod_log_insert_move(parent, slot, slot + 1,
David Sterbaa446a972018-03-05 15:26:29 +01004944 nritems - slot - 1);
David Sterbabf1d3422018-03-05 15:47:39 +01004945 BUG_ON(ret < 0);
4946 }
Chris Mason5f39d392007-10-15 16:14:19 -04004947 memmove_extent_buffer(parent,
4948 btrfs_node_key_ptr_offset(slot),
4949 btrfs_node_key_ptr_offset(slot + 1),
Chris Masond6025572007-03-30 14:27:56 -04004950 sizeof(struct btrfs_key_ptr) *
4951 (nritems - slot - 1));
Chris Mason57ba86c2012-12-18 19:35:32 -05004952 } else if (level) {
David Sterbae09c2ef2018-03-05 15:09:03 +01004953 ret = tree_mod_log_insert_key(parent, slot, MOD_LOG_KEY_REMOVE,
4954 GFP_NOFS);
Chris Mason57ba86c2012-12-18 19:35:32 -05004955 BUG_ON(ret < 0);
Chris Masonbb803952007-03-01 12:04:21 -05004956 }
Jan Schmidtf3ea38d2012-05-26 11:45:21 +02004957
Chris Mason7518a232007-03-12 12:01:18 -04004958 nritems--;
Chris Mason5f39d392007-10-15 16:14:19 -04004959 btrfs_set_header_nritems(parent, nritems);
Chris Mason7518a232007-03-12 12:01:18 -04004960 if (nritems == 0 && parent == root->node) {
Chris Mason5f39d392007-10-15 16:14:19 -04004961 BUG_ON(btrfs_header_level(root->node) != 1);
Chris Masonbb803952007-03-01 12:04:21 -05004962 /* just turn the root into a leaf and break */
Chris Mason5f39d392007-10-15 16:14:19 -04004963 btrfs_set_header_level(root->node, 0);
Chris Masonbb803952007-03-01 12:04:21 -05004964 } else if (slot == 0) {
Chris Mason5f39d392007-10-15 16:14:19 -04004965 struct btrfs_disk_key disk_key;
4966
4967 btrfs_node_key(parent, &disk_key, 0);
Nikolay Borisovb167fa92018-06-20 15:48:47 +03004968 fixup_low_keys(path, &disk_key, level + 1);
Chris Masonbe0e5c02007-01-26 15:51:26 -05004969 }
Chris Masond6025572007-03-30 14:27:56 -04004970 btrfs_mark_buffer_dirty(parent);
Chris Masonbe0e5c02007-01-26 15:51:26 -05004971}
4972
Chris Mason74123bd2007-02-02 11:05:29 -05004973/*
Chris Mason323ac952008-10-01 19:05:46 -04004974 * a helper function to delete the leaf pointed to by path->slots[1] and
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004975 * path->nodes[1].
Chris Mason323ac952008-10-01 19:05:46 -04004976 *
4977 * This deletes the pointer in path->nodes[1] and frees the leaf
4978 * block extent. zero is returned if it all worked out, < 0 otherwise.
4979 *
4980 * The path must have already been setup for deleting the leaf, including
4981 * all the proper balancing. path->nodes[1] must be locked.
4982 */
Jeff Mahoney143bede2012-03-01 14:56:26 +01004983static noinline void btrfs_del_leaf(struct btrfs_trans_handle *trans,
4984 struct btrfs_root *root,
4985 struct btrfs_path *path,
4986 struct extent_buffer *leaf)
Chris Mason323ac952008-10-01 19:05:46 -04004987{
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004988 WARN_ON(btrfs_header_generation(leaf) != trans->transid);
Tsutomu Itohafe5fea2013-04-16 05:18:22 +00004989 del_ptr(root, path, 1, path->slots[1]);
Chris Mason323ac952008-10-01 19:05:46 -04004990
Chris Mason4d081c42009-02-04 09:31:28 -05004991 /*
4992 * btrfs_free_extent is expensive, we want to make sure we
4993 * aren't holding any locks when we call it
4994 */
4995 btrfs_unlock_up_safe(path, 0);
4996
Yan, Zhengf0486c62010-05-16 10:46:25 -04004997 root_sub_used(root, leaf->len);
4998
David Sterba67439da2019-10-08 13:28:47 +02004999 atomic_inc(&leaf->refs);
Jan Schmidt5581a512012-05-16 17:04:52 +02005000 btrfs_free_tree_block(trans, root, leaf, 0, 1);
Josef Bacik3083ee22012-03-09 16:01:49 -05005001 free_extent_buffer_stale(leaf);
Chris Mason323ac952008-10-01 19:05:46 -04005002}
5003/*
Chris Mason74123bd2007-02-02 11:05:29 -05005004 * delete the item at the leaf level in path. If that empties
5005 * the leaf, remove it from the tree
5006 */
Chris Mason85e21ba2008-01-29 15:11:36 -05005007int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
5008 struct btrfs_path *path, int slot, int nr)
Chris Masonbe0e5c02007-01-26 15:51:26 -05005009{
Jeff Mahoney0b246af2016-06-22 18:54:23 -04005010 struct btrfs_fs_info *fs_info = root->fs_info;
Chris Mason5f39d392007-10-15 16:14:19 -04005011 struct extent_buffer *leaf;
5012 struct btrfs_item *item;
Alexandru Moisece0eac22015-08-23 16:01:42 +00005013 u32 last_off;
5014 u32 dsize = 0;
Chris Masonaa5d6be2007-02-28 16:35:06 -05005015 int ret = 0;
5016 int wret;
Chris Mason85e21ba2008-01-29 15:11:36 -05005017 int i;
Chris Mason7518a232007-03-12 12:01:18 -04005018 u32 nritems;
Chris Masonbe0e5c02007-01-26 15:51:26 -05005019
Chris Mason5f39d392007-10-15 16:14:19 -04005020 leaf = path->nodes[0];
Chris Mason85e21ba2008-01-29 15:11:36 -05005021 last_off = btrfs_item_offset_nr(leaf, slot + nr - 1);
5022
5023 for (i = 0; i < nr; i++)
5024 dsize += btrfs_item_size_nr(leaf, slot + i);
5025
Chris Mason5f39d392007-10-15 16:14:19 -04005026 nritems = btrfs_header_nritems(leaf);
Chris Masonbe0e5c02007-01-26 15:51:26 -05005027
Chris Mason85e21ba2008-01-29 15:11:36 -05005028 if (slot + nr != nritems) {
David Sterba8f881e82019-03-20 11:33:10 +01005029 int data_end = leaf_data_end(leaf);
David Sterbac82f8232019-08-09 17:48:21 +02005030 struct btrfs_map_token token;
Chris Mason5f39d392007-10-15 16:14:19 -04005031
Nikolay Borisov3d9ec8c2017-05-29 09:43:43 +03005032 memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET +
Chris Masond6025572007-03-30 14:27:56 -04005033 data_end + dsize,
Nikolay Borisov3d9ec8c2017-05-29 09:43:43 +03005034 BTRFS_LEAF_DATA_OFFSET + data_end,
Chris Mason85e21ba2008-01-29 15:11:36 -05005035 last_off - data_end);
Chris Mason5f39d392007-10-15 16:14:19 -04005036
David Sterbac82f8232019-08-09 17:48:21 +02005037 btrfs_init_map_token(&token, leaf);
Chris Mason85e21ba2008-01-29 15:11:36 -05005038 for (i = slot + nr; i < nritems; i++) {
Chris Mason5f39d392007-10-15 16:14:19 -04005039 u32 ioff;
Chris Masondb945352007-10-15 16:15:53 -04005040
Ross Kirkdd3cc162013-09-16 15:58:09 +01005041 item = btrfs_item_nr(i);
David Sterbacc4c13d2020-04-29 02:15:56 +02005042 ioff = btrfs_token_item_offset(&token, item);
5043 btrfs_set_token_item_offset(&token, item, ioff + dsize);
Chris Mason0783fcf2007-03-12 20:12:07 -04005044 }
Chris Masondb945352007-10-15 16:15:53 -04005045
Chris Mason5f39d392007-10-15 16:14:19 -04005046 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot),
Chris Mason85e21ba2008-01-29 15:11:36 -05005047 btrfs_item_nr_offset(slot + nr),
Chris Masond6025572007-03-30 14:27:56 -04005048 sizeof(struct btrfs_item) *
Chris Mason85e21ba2008-01-29 15:11:36 -05005049 (nritems - slot - nr));
Chris Masonbe0e5c02007-01-26 15:51:26 -05005050 }
Chris Mason85e21ba2008-01-29 15:11:36 -05005051 btrfs_set_header_nritems(leaf, nritems - nr);
5052 nritems -= nr;
Chris Mason5f39d392007-10-15 16:14:19 -04005053
Chris Mason74123bd2007-02-02 11:05:29 -05005054 /* delete the leaf if we've emptied it */
Chris Mason7518a232007-03-12 12:01:18 -04005055 if (nritems == 0) {
Chris Mason5f39d392007-10-15 16:14:19 -04005056 if (leaf == root->node) {
5057 btrfs_set_header_level(leaf, 0);
Chris Mason9a8dd152007-02-23 08:38:36 -05005058 } else {
Yan, Zhengf0486c62010-05-16 10:46:25 -04005059 btrfs_set_path_blocking(path);
David Sterba6a884d7d2019-03-20 14:30:02 +01005060 btrfs_clean_tree_block(leaf);
Jeff Mahoney143bede2012-03-01 14:56:26 +01005061 btrfs_del_leaf(trans, root, path, leaf);
Chris Mason9a8dd152007-02-23 08:38:36 -05005062 }
Chris Masonbe0e5c02007-01-26 15:51:26 -05005063 } else {
Chris Mason7518a232007-03-12 12:01:18 -04005064 int used = leaf_space_used(leaf, 0, nritems);
Chris Masonaa5d6be2007-02-28 16:35:06 -05005065 if (slot == 0) {
Chris Mason5f39d392007-10-15 16:14:19 -04005066 struct btrfs_disk_key disk_key;
5067
5068 btrfs_item_key(leaf, &disk_key, 0);
Nikolay Borisovb167fa92018-06-20 15:48:47 +03005069 fixup_low_keys(path, &disk_key, 1);
Chris Masonaa5d6be2007-02-28 16:35:06 -05005070 }
Chris Masonaa5d6be2007-02-28 16:35:06 -05005071
Chris Mason74123bd2007-02-02 11:05:29 -05005072 /* delete the leaf if it is mostly empty */
Jeff Mahoney0b246af2016-06-22 18:54:23 -04005073 if (used < BTRFS_LEAF_DATA_SIZE(fs_info) / 3) {
Chris Masonbe0e5c02007-01-26 15:51:26 -05005074 /* push_leaf_left fixes the path.
5075 * make sure the path still points to our leaf
5076 * for possible call to del_ptr below
5077 */
Chris Mason4920c9a2007-01-26 16:38:42 -05005078 slot = path->slots[1];
David Sterba67439da2019-10-08 13:28:47 +02005079 atomic_inc(&leaf->refs);
Chris Mason5f39d392007-10-15 16:14:19 -04005080
Chris Masonb9473432009-03-13 11:00:37 -04005081 btrfs_set_path_blocking(path);
Chris Mason99d8f832010-07-07 10:51:48 -04005082 wret = push_leaf_left(trans, root, path, 1, 1,
5083 1, (u32)-1);
Chris Mason54aa1f42007-06-22 14:16:25 -04005084 if (wret < 0 && wret != -ENOSPC)
Chris Masonaa5d6be2007-02-28 16:35:06 -05005085 ret = wret;
Chris Mason5f39d392007-10-15 16:14:19 -04005086
5087 if (path->nodes[0] == leaf &&
5088 btrfs_header_nritems(leaf)) {
Chris Mason99d8f832010-07-07 10:51:48 -04005089 wret = push_leaf_right(trans, root, path, 1,
5090 1, 1, 0);
Chris Mason54aa1f42007-06-22 14:16:25 -04005091 if (wret < 0 && wret != -ENOSPC)
Chris Masonaa5d6be2007-02-28 16:35:06 -05005092 ret = wret;
5093 }
Chris Mason5f39d392007-10-15 16:14:19 -04005094
5095 if (btrfs_header_nritems(leaf) == 0) {
Chris Mason323ac952008-10-01 19:05:46 -04005096 path->slots[1] = slot;
Jeff Mahoney143bede2012-03-01 14:56:26 +01005097 btrfs_del_leaf(trans, root, path, leaf);
Chris Mason5f39d392007-10-15 16:14:19 -04005098 free_extent_buffer(leaf);
Jeff Mahoney143bede2012-03-01 14:56:26 +01005099 ret = 0;
Chris Mason5de08d72007-02-24 06:24:44 -05005100 } else {
Chris Mason925baed2008-06-25 16:01:30 -04005101 /* if we're still in the path, make sure
5102 * we're dirty. Otherwise, one of the
5103 * push_leaf functions must have already
5104 * dirtied this buffer
5105 */
5106 if (path->nodes[0] == leaf)
5107 btrfs_mark_buffer_dirty(leaf);
Chris Mason5f39d392007-10-15 16:14:19 -04005108 free_extent_buffer(leaf);
Chris Masonbe0e5c02007-01-26 15:51:26 -05005109 }
Chris Masond5719762007-03-23 10:01:08 -04005110 } else {
Chris Mason5f39d392007-10-15 16:14:19 -04005111 btrfs_mark_buffer_dirty(leaf);
Chris Masonbe0e5c02007-01-26 15:51:26 -05005112 }
5113 }
Chris Masonaa5d6be2007-02-28 16:35:06 -05005114 return ret;
Chris Masonbe0e5c02007-01-26 15:51:26 -05005115}
5116
Chris Mason97571fd2007-02-24 13:39:08 -05005117/*
Chris Mason925baed2008-06-25 16:01:30 -04005118 * search the tree again to find a leaf with lesser keys
Chris Mason7bb86312007-12-11 09:25:06 -05005119 * returns 0 if it found something or 1 if there are no lesser leaves.
5120 * returns < 0 on io errors.
Chris Masond352ac62008-09-29 15:18:18 -04005121 *
5122 * This may release the path, and so you may lose any locks held at the
5123 * time you call it.
Chris Mason7bb86312007-12-11 09:25:06 -05005124 */
Josef Bacik16e75492013-10-22 12:18:51 -04005125int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
Chris Mason7bb86312007-12-11 09:25:06 -05005126{
Chris Mason925baed2008-06-25 16:01:30 -04005127 struct btrfs_key key;
5128 struct btrfs_disk_key found_key;
5129 int ret;
Chris Mason7bb86312007-12-11 09:25:06 -05005130
Chris Mason925baed2008-06-25 16:01:30 -04005131 btrfs_item_key_to_cpu(path->nodes[0], &key, 0);
Chris Mason7bb86312007-12-11 09:25:06 -05005132
Filipe David Borba Mananae8b0d7242013-10-15 00:12:27 +01005133 if (key.offset > 0) {
Chris Mason925baed2008-06-25 16:01:30 -04005134 key.offset--;
Filipe David Borba Mananae8b0d7242013-10-15 00:12:27 +01005135 } else if (key.type > 0) {
Chris Mason925baed2008-06-25 16:01:30 -04005136 key.type--;
Filipe David Borba Mananae8b0d7242013-10-15 00:12:27 +01005137 key.offset = (u64)-1;
5138 } else if (key.objectid > 0) {
Chris Mason925baed2008-06-25 16:01:30 -04005139 key.objectid--;
Filipe David Borba Mananae8b0d7242013-10-15 00:12:27 +01005140 key.type = (u8)-1;
5141 key.offset = (u64)-1;
5142 } else {
Chris Mason925baed2008-06-25 16:01:30 -04005143 return 1;
Filipe David Borba Mananae8b0d7242013-10-15 00:12:27 +01005144 }
Chris Mason7bb86312007-12-11 09:25:06 -05005145
David Sterbab3b4aa72011-04-21 01:20:15 +02005146 btrfs_release_path(path);
Chris Mason925baed2008-06-25 16:01:30 -04005147 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5148 if (ret < 0)
5149 return ret;
5150 btrfs_item_key(path->nodes[0], &found_key, 0);
5151 ret = comp_keys(&found_key, &key);
Filipe Manana337c6f62014-06-09 13:22:13 +01005152 /*
5153 * We might have had an item with the previous key in the tree right
5154 * before we released our path. And after we released our path, that
5155 * item might have been pushed to the first slot (0) of the leaf we
5156 * were holding due to a tree balance. Alternatively, an item with the
5157 * previous key can exist as the only element of a leaf (big fat item).
5158 * Therefore account for these 2 cases, so that our callers (like
5159 * btrfs_previous_item) don't miss an existing item with a key matching
5160 * the previous key we computed above.
5161 */
5162 if (ret <= 0)
Chris Mason925baed2008-06-25 16:01:30 -04005163 return 0;
5164 return 1;
Chris Mason7bb86312007-12-11 09:25:06 -05005165}
5166
Chris Mason3f157a22008-06-25 16:01:31 -04005167/*
5168 * A helper function to walk down the tree starting at min_key, and looking
Eric Sandeende78b512013-01-31 18:21:12 +00005169 * for nodes or leaves that are have a minimum transaction id.
5170 * This is used by the btree defrag code, and tree logging
Chris Mason3f157a22008-06-25 16:01:31 -04005171 *
5172 * This does not cow, but it does stuff the starting key it finds back
5173 * into min_key, so you can call btrfs_search_slot with cow=1 on the
5174 * key and get a writable path.
5175 *
Chris Mason3f157a22008-06-25 16:01:31 -04005176 * This honors path->lowest_level to prevent descent past a given level
5177 * of the tree.
5178 *
Chris Masond352ac62008-09-29 15:18:18 -04005179 * min_trans indicates the oldest transaction that you are interested
5180 * in walking through. Any nodes or leaves older than min_trans are
5181 * skipped over (without reading them).
5182 *
Chris Mason3f157a22008-06-25 16:01:31 -04005183 * returns zero if something useful was found, < 0 on error and 1 if there
5184 * was nothing in the tree that matched the search criteria.
5185 */
5186int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
Eric Sandeende78b512013-01-31 18:21:12 +00005187 struct btrfs_path *path,
Chris Mason3f157a22008-06-25 16:01:31 -04005188 u64 min_trans)
5189{
5190 struct extent_buffer *cur;
5191 struct btrfs_key found_key;
5192 int slot;
Yan96524802008-07-24 12:19:49 -04005193 int sret;
Chris Mason3f157a22008-06-25 16:01:31 -04005194 u32 nritems;
5195 int level;
5196 int ret = 1;
Filipe Mananaf98de9b2014-08-04 19:37:21 +01005197 int keep_locks = path->keep_locks;
Chris Mason3f157a22008-06-25 16:01:31 -04005198
Filipe Mananaf98de9b2014-08-04 19:37:21 +01005199 path->keep_locks = 1;
Chris Mason3f157a22008-06-25 16:01:31 -04005200again:
Chris Masonbd681512011-07-16 15:23:14 -04005201 cur = btrfs_read_lock_root_node(root);
Chris Mason3f157a22008-06-25 16:01:31 -04005202 level = btrfs_header_level(cur);
Chris Masone02119d2008-09-05 16:13:11 -04005203 WARN_ON(path->nodes[level]);
Chris Mason3f157a22008-06-25 16:01:31 -04005204 path->nodes[level] = cur;
Chris Masonbd681512011-07-16 15:23:14 -04005205 path->locks[level] = BTRFS_READ_LOCK;
Chris Mason3f157a22008-06-25 16:01:31 -04005206
5207 if (btrfs_header_generation(cur) < min_trans) {
5208 ret = 1;
5209 goto out;
5210 }
Chris Masond3977122009-01-05 21:25:51 -05005211 while (1) {
Chris Mason3f157a22008-06-25 16:01:31 -04005212 nritems = btrfs_header_nritems(cur);
5213 level = btrfs_header_level(cur);
Qu Wenruoe3b83362020-04-17 15:08:21 +08005214 sret = btrfs_bin_search(cur, min_key, &slot);
Filipe Mananacbca7d52019-02-18 16:57:26 +00005215 if (sret < 0) {
5216 ret = sret;
5217 goto out;
5218 }
Chris Mason3f157a22008-06-25 16:01:31 -04005219
Chris Mason323ac952008-10-01 19:05:46 -04005220 /* at the lowest level, we're done, setup the path and exit */
5221 if (level == path->lowest_level) {
Chris Masone02119d2008-09-05 16:13:11 -04005222 if (slot >= nritems)
5223 goto find_next_key;
Chris Mason3f157a22008-06-25 16:01:31 -04005224 ret = 0;
5225 path->slots[level] = slot;
5226 btrfs_item_key_to_cpu(cur, &found_key, slot);
5227 goto out;
5228 }
Yan96524802008-07-24 12:19:49 -04005229 if (sret && slot > 0)
5230 slot--;
Chris Mason3f157a22008-06-25 16:01:31 -04005231 /*
Eric Sandeende78b512013-01-31 18:21:12 +00005232 * check this node pointer against the min_trans parameters.
Randy Dunlap260db432020-08-04 19:48:34 -07005233 * If it is too old, skip to the next one.
Chris Mason3f157a22008-06-25 16:01:31 -04005234 */
Chris Masond3977122009-01-05 21:25:51 -05005235 while (slot < nritems) {
Chris Mason3f157a22008-06-25 16:01:31 -04005236 u64 gen;
Chris Masone02119d2008-09-05 16:13:11 -04005237
Chris Mason3f157a22008-06-25 16:01:31 -04005238 gen = btrfs_node_ptr_generation(cur, slot);
5239 if (gen < min_trans) {
5240 slot++;
5241 continue;
5242 }
Eric Sandeende78b512013-01-31 18:21:12 +00005243 break;
Chris Mason3f157a22008-06-25 16:01:31 -04005244 }
Chris Masone02119d2008-09-05 16:13:11 -04005245find_next_key:
Chris Mason3f157a22008-06-25 16:01:31 -04005246 /*
5247 * we didn't find a candidate key in this node, walk forward
5248 * and find another one
5249 */
5250 if (slot >= nritems) {
Chris Masone02119d2008-09-05 16:13:11 -04005251 path->slots[level] = slot;
Chris Masonb4ce94d2009-02-04 09:25:08 -05005252 btrfs_set_path_blocking(path);
Chris Masone02119d2008-09-05 16:13:11 -04005253 sret = btrfs_find_next_key(root, path, min_key, level,
Eric Sandeende78b512013-01-31 18:21:12 +00005254 min_trans);
Chris Masone02119d2008-09-05 16:13:11 -04005255 if (sret == 0) {
David Sterbab3b4aa72011-04-21 01:20:15 +02005256 btrfs_release_path(path);
Chris Mason3f157a22008-06-25 16:01:31 -04005257 goto again;
5258 } else {
5259 goto out;
5260 }
5261 }
5262 /* save our key for returning back */
5263 btrfs_node_key_to_cpu(cur, &found_key, slot);
5264 path->slots[level] = slot;
5265 if (level == path->lowest_level) {
5266 ret = 0;
Chris Mason3f157a22008-06-25 16:01:31 -04005267 goto out;
5268 }
Chris Masonb4ce94d2009-02-04 09:25:08 -05005269 btrfs_set_path_blocking(path);
David Sterba4b231ae2019-08-21 19:16:27 +02005270 cur = btrfs_read_node_slot(cur, slot);
Liu Bofb770ae2016-07-05 12:10:14 -07005271 if (IS_ERR(cur)) {
5272 ret = PTR_ERR(cur);
5273 goto out;
5274 }
Chris Mason3f157a22008-06-25 16:01:31 -04005275
Chris Masonbd681512011-07-16 15:23:14 -04005276 btrfs_tree_read_lock(cur);
Chris Masonb4ce94d2009-02-04 09:25:08 -05005277
Chris Masonbd681512011-07-16 15:23:14 -04005278 path->locks[level - 1] = BTRFS_READ_LOCK;
Chris Mason3f157a22008-06-25 16:01:31 -04005279 path->nodes[level - 1] = cur;
Chris Masonf7c79f32012-03-19 15:54:38 -04005280 unlock_up(path, level, 1, 0, NULL);
Chris Mason3f157a22008-06-25 16:01:31 -04005281 }
5282out:
Filipe Mananaf98de9b2014-08-04 19:37:21 +01005283 path->keep_locks = keep_locks;
5284 if (ret == 0) {
5285 btrfs_unlock_up_safe(path, path->lowest_level + 1);
5286 btrfs_set_path_blocking(path);
Chris Mason3f157a22008-06-25 16:01:31 -04005287 memcpy(min_key, &found_key, sizeof(found_key));
Filipe Mananaf98de9b2014-08-04 19:37:21 +01005288 }
Chris Mason3f157a22008-06-25 16:01:31 -04005289 return ret;
5290}
5291
5292/*
5293 * this is similar to btrfs_next_leaf, but does not try to preserve
5294 * and fixup the path. It looks for and returns the next key in the
Eric Sandeende78b512013-01-31 18:21:12 +00005295 * tree based on the current path and the min_trans parameters.
Chris Mason3f157a22008-06-25 16:01:31 -04005296 *
5297 * 0 is returned if another key is found, < 0 if there are any errors
5298 * and 1 is returned if there are no higher keys in the tree
5299 *
5300 * path->keep_locks should be set to 1 on the search made before
5301 * calling this function.
5302 */
Chris Masone7a84562008-06-25 16:01:31 -04005303int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path,
Eric Sandeende78b512013-01-31 18:21:12 +00005304 struct btrfs_key *key, int level, u64 min_trans)
Chris Masone7a84562008-06-25 16:01:31 -04005305{
Chris Masone7a84562008-06-25 16:01:31 -04005306 int slot;
5307 struct extent_buffer *c;
5308
Josef Bacik6a9fb462019-06-20 15:37:52 -04005309 WARN_ON(!path->keep_locks && !path->skip_locking);
Chris Masond3977122009-01-05 21:25:51 -05005310 while (level < BTRFS_MAX_LEVEL) {
Chris Masone7a84562008-06-25 16:01:31 -04005311 if (!path->nodes[level])
5312 return 1;
5313
5314 slot = path->slots[level] + 1;
5315 c = path->nodes[level];
Chris Mason3f157a22008-06-25 16:01:31 -04005316next:
Chris Masone7a84562008-06-25 16:01:31 -04005317 if (slot >= btrfs_header_nritems(c)) {
Yan Zheng33c66f42009-07-22 09:59:00 -04005318 int ret;
5319 int orig_lowest;
5320 struct btrfs_key cur_key;
5321 if (level + 1 >= BTRFS_MAX_LEVEL ||
5322 !path->nodes[level + 1])
Chris Masone7a84562008-06-25 16:01:31 -04005323 return 1;
Yan Zheng33c66f42009-07-22 09:59:00 -04005324
Josef Bacik6a9fb462019-06-20 15:37:52 -04005325 if (path->locks[level + 1] || path->skip_locking) {
Yan Zheng33c66f42009-07-22 09:59:00 -04005326 level++;
5327 continue;
5328 }
5329
5330 slot = btrfs_header_nritems(c) - 1;
5331 if (level == 0)
5332 btrfs_item_key_to_cpu(c, &cur_key, slot);
5333 else
5334 btrfs_node_key_to_cpu(c, &cur_key, slot);
5335
5336 orig_lowest = path->lowest_level;
David Sterbab3b4aa72011-04-21 01:20:15 +02005337 btrfs_release_path(path);
Yan Zheng33c66f42009-07-22 09:59:00 -04005338 path->lowest_level = level;
5339 ret = btrfs_search_slot(NULL, root, &cur_key, path,
5340 0, 0);
5341 path->lowest_level = orig_lowest;
5342 if (ret < 0)
5343 return ret;
5344
5345 c = path->nodes[level];
5346 slot = path->slots[level];
5347 if (ret == 0)
5348 slot++;
5349 goto next;
Chris Masone7a84562008-06-25 16:01:31 -04005350 }
Yan Zheng33c66f42009-07-22 09:59:00 -04005351
Chris Masone7a84562008-06-25 16:01:31 -04005352 if (level == 0)
5353 btrfs_item_key_to_cpu(c, key, slot);
Chris Mason3f157a22008-06-25 16:01:31 -04005354 else {
Chris Mason3f157a22008-06-25 16:01:31 -04005355 u64 gen = btrfs_node_ptr_generation(c, slot);
5356
Chris Mason3f157a22008-06-25 16:01:31 -04005357 if (gen < min_trans) {
5358 slot++;
5359 goto next;
5360 }
Chris Masone7a84562008-06-25 16:01:31 -04005361 btrfs_node_key_to_cpu(c, key, slot);
Chris Mason3f157a22008-06-25 16:01:31 -04005362 }
Chris Masone7a84562008-06-25 16:01:31 -04005363 return 0;
5364 }
5365 return 1;
5366}
5367
Chris Mason7bb86312007-12-11 09:25:06 -05005368/*
Chris Mason925baed2008-06-25 16:01:30 -04005369 * search the tree again to find a leaf with greater keys
Chris Mason0f70abe2007-02-28 16:46:22 -05005370 * returns 0 if it found something or 1 if there are no greater leaves.
5371 * returns < 0 on io errors.
Chris Mason97571fd2007-02-24 13:39:08 -05005372 */
Chris Mason234b63a2007-03-13 10:46:10 -04005373int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
Chris Masond97e63b2007-02-20 16:40:44 -05005374{
Jan Schmidt3d7806e2012-06-11 08:29:29 +02005375 return btrfs_next_old_leaf(root, path, 0);
5376}
5377
5378int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path,
5379 u64 time_seq)
5380{
Chris Masond97e63b2007-02-20 16:40:44 -05005381 int slot;
Chris Mason8e73f272009-04-03 10:14:18 -04005382 int level;
Chris Mason5f39d392007-10-15 16:14:19 -04005383 struct extent_buffer *c;
Chris Mason8e73f272009-04-03 10:14:18 -04005384 struct extent_buffer *next;
Chris Mason925baed2008-06-25 16:01:30 -04005385 struct btrfs_key key;
5386 u32 nritems;
5387 int ret;
Chris Mason8e73f272009-04-03 10:14:18 -04005388 int old_spinning = path->leave_spinning;
Chris Masonbd681512011-07-16 15:23:14 -04005389 int next_rw_lock = 0;
Chris Mason925baed2008-06-25 16:01:30 -04005390
5391 nritems = btrfs_header_nritems(path->nodes[0]);
Chris Masond3977122009-01-05 21:25:51 -05005392 if (nritems == 0)
Chris Mason925baed2008-06-25 16:01:30 -04005393 return 1;
Chris Mason925baed2008-06-25 16:01:30 -04005394
Chris Mason8e73f272009-04-03 10:14:18 -04005395 btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1);
5396again:
5397 level = 1;
5398 next = NULL;
Chris Masonbd681512011-07-16 15:23:14 -04005399 next_rw_lock = 0;
David Sterbab3b4aa72011-04-21 01:20:15 +02005400 btrfs_release_path(path);
Chris Mason8e73f272009-04-03 10:14:18 -04005401
Chris Masona2135012008-06-25 16:01:30 -04005402 path->keep_locks = 1;
Chris Mason31533fb2011-07-26 16:01:59 -04005403 path->leave_spinning = 1;
Chris Mason8e73f272009-04-03 10:14:18 -04005404
Jan Schmidt3d7806e2012-06-11 08:29:29 +02005405 if (time_seq)
5406 ret = btrfs_search_old_slot(root, &key, path, time_seq);
5407 else
5408 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
Chris Mason925baed2008-06-25 16:01:30 -04005409 path->keep_locks = 0;
5410
5411 if (ret < 0)
5412 return ret;
5413
Chris Masona2135012008-06-25 16:01:30 -04005414 nritems = btrfs_header_nritems(path->nodes[0]);
Chris Mason168fd7d2008-06-25 16:01:30 -04005415 /*
5416 * by releasing the path above we dropped all our locks. A balance
5417 * could have added more items next to the key that used to be
5418 * at the very end of the block. So, check again here and
5419 * advance the path if there are now more items available.
5420 */
Chris Masona2135012008-06-25 16:01:30 -04005421 if (nritems > 0 && path->slots[0] < nritems - 1) {
Yan Zhenge457afe2009-07-22 09:59:00 -04005422 if (ret == 0)
5423 path->slots[0]++;
Chris Mason8e73f272009-04-03 10:14:18 -04005424 ret = 0;
Chris Mason925baed2008-06-25 16:01:30 -04005425 goto done;
5426 }
Liu Bo0b43e042014-06-09 11:04:49 +08005427 /*
5428 * So the above check misses one case:
5429 * - after releasing the path above, someone has removed the item that
5430 * used to be at the very end of the block, and balance between leafs
5431 * gets another one with bigger key.offset to replace it.
5432 *
5433 * This one should be returned as well, or we can get leaf corruption
5434 * later(esp. in __btrfs_drop_extents()).
5435 *
5436 * And a bit more explanation about this check,
5437 * with ret > 0, the key isn't found, the path points to the slot
5438 * where it should be inserted, so the path->slots[0] item must be the
5439 * bigger one.
5440 */
5441 if (nritems > 0 && ret > 0 && path->slots[0] == nritems - 1) {
5442 ret = 0;
5443 goto done;
5444 }
Chris Masond97e63b2007-02-20 16:40:44 -05005445
Chris Masond3977122009-01-05 21:25:51 -05005446 while (level < BTRFS_MAX_LEVEL) {
Chris Mason8e73f272009-04-03 10:14:18 -04005447 if (!path->nodes[level]) {
5448 ret = 1;
5449 goto done;
5450 }
Chris Mason5f39d392007-10-15 16:14:19 -04005451
Chris Masond97e63b2007-02-20 16:40:44 -05005452 slot = path->slots[level] + 1;
5453 c = path->nodes[level];
Chris Mason5f39d392007-10-15 16:14:19 -04005454 if (slot >= btrfs_header_nritems(c)) {
Chris Masond97e63b2007-02-20 16:40:44 -05005455 level++;
Chris Mason8e73f272009-04-03 10:14:18 -04005456 if (level == BTRFS_MAX_LEVEL) {
5457 ret = 1;
5458 goto done;
5459 }
Chris Masond97e63b2007-02-20 16:40:44 -05005460 continue;
5461 }
Chris Mason5f39d392007-10-15 16:14:19 -04005462
Chris Mason925baed2008-06-25 16:01:30 -04005463 if (next) {
Chris Masonbd681512011-07-16 15:23:14 -04005464 btrfs_tree_unlock_rw(next, next_rw_lock);
Chris Mason5f39d392007-10-15 16:14:19 -04005465 free_extent_buffer(next);
Chris Mason925baed2008-06-25 16:01:30 -04005466 }
Chris Mason5f39d392007-10-15 16:14:19 -04005467
Chris Mason8e73f272009-04-03 10:14:18 -04005468 next = c;
Chris Masonbd681512011-07-16 15:23:14 -04005469 next_rw_lock = path->locks[level];
Liu Bod07b8522017-01-30 12:23:42 -08005470 ret = read_block_for_search(root, path, &next, level,
David Sterbacda79c52017-02-10 18:44:32 +01005471 slot, &key);
Chris Mason8e73f272009-04-03 10:14:18 -04005472 if (ret == -EAGAIN)
5473 goto again;
Chris Mason5f39d392007-10-15 16:14:19 -04005474
Chris Mason76a05b32009-05-14 13:24:30 -04005475 if (ret < 0) {
David Sterbab3b4aa72011-04-21 01:20:15 +02005476 btrfs_release_path(path);
Chris Mason76a05b32009-05-14 13:24:30 -04005477 goto done;
5478 }
5479
Chris Mason5cd57b22008-06-25 16:01:30 -04005480 if (!path->skip_locking) {
Chris Masonbd681512011-07-16 15:23:14 -04005481 ret = btrfs_try_tree_read_lock(next);
Jan Schmidtd42244a2012-06-22 14:51:15 +02005482 if (!ret && time_seq) {
5483 /*
5484 * If we don't get the lock, we may be racing
5485 * with push_leaf_left, holding that lock while
5486 * itself waiting for the leaf we've currently
5487 * locked. To solve this situation, we give up
5488 * on our lock and cycle.
5489 */
Jan Schmidtcf538832012-07-04 15:42:48 +02005490 free_extent_buffer(next);
Jan Schmidtd42244a2012-06-22 14:51:15 +02005491 btrfs_release_path(path);
5492 cond_resched();
5493 goto again;
5494 }
Chris Mason8e73f272009-04-03 10:14:18 -04005495 if (!ret) {
5496 btrfs_set_path_blocking(path);
Josef Bacikfd7ba1c2020-08-20 11:46:02 -04005497 __btrfs_tree_read_lock(next,
Josef Bacikbf774672020-08-20 11:46:04 -04005498 BTRFS_NESTING_RIGHT,
Josef Bacikfd7ba1c2020-08-20 11:46:02 -04005499 path->recurse);
Chris Mason8e73f272009-04-03 10:14:18 -04005500 }
Chris Mason31533fb2011-07-26 16:01:59 -04005501 next_rw_lock = BTRFS_READ_LOCK;
Chris Mason5cd57b22008-06-25 16:01:30 -04005502 }
Chris Masond97e63b2007-02-20 16:40:44 -05005503 break;
5504 }
5505 path->slots[level] = slot;
Chris Masond3977122009-01-05 21:25:51 -05005506 while (1) {
Chris Masond97e63b2007-02-20 16:40:44 -05005507 level--;
5508 c = path->nodes[level];
Chris Mason925baed2008-06-25 16:01:30 -04005509 if (path->locks[level])
Chris Masonbd681512011-07-16 15:23:14 -04005510 btrfs_tree_unlock_rw(c, path->locks[level]);
Chris Mason8e73f272009-04-03 10:14:18 -04005511
Chris Mason5f39d392007-10-15 16:14:19 -04005512 free_extent_buffer(c);
Chris Masond97e63b2007-02-20 16:40:44 -05005513 path->nodes[level] = next;
5514 path->slots[level] = 0;
Chris Masona74a4b92008-06-25 16:01:31 -04005515 if (!path->skip_locking)
Chris Masonbd681512011-07-16 15:23:14 -04005516 path->locks[level] = next_rw_lock;
Chris Masond97e63b2007-02-20 16:40:44 -05005517 if (!level)
5518 break;
Chris Masonb4ce94d2009-02-04 09:25:08 -05005519
Liu Bod07b8522017-01-30 12:23:42 -08005520 ret = read_block_for_search(root, path, &next, level,
David Sterbacda79c52017-02-10 18:44:32 +01005521 0, &key);
Chris Mason8e73f272009-04-03 10:14:18 -04005522 if (ret == -EAGAIN)
5523 goto again;
5524
Chris Mason76a05b32009-05-14 13:24:30 -04005525 if (ret < 0) {
David Sterbab3b4aa72011-04-21 01:20:15 +02005526 btrfs_release_path(path);
Chris Mason76a05b32009-05-14 13:24:30 -04005527 goto done;
5528 }
5529
Chris Mason5cd57b22008-06-25 16:01:30 -04005530 if (!path->skip_locking) {
Chris Masonbd681512011-07-16 15:23:14 -04005531 ret = btrfs_try_tree_read_lock(next);
Chris Mason8e73f272009-04-03 10:14:18 -04005532 if (!ret) {
5533 btrfs_set_path_blocking(path);
Josef Bacikfd7ba1c2020-08-20 11:46:02 -04005534 __btrfs_tree_read_lock(next,
Josef Bacikbf774672020-08-20 11:46:04 -04005535 BTRFS_NESTING_RIGHT,
Josef Bacikfd7ba1c2020-08-20 11:46:02 -04005536 path->recurse);
Chris Mason8e73f272009-04-03 10:14:18 -04005537 }
Chris Mason31533fb2011-07-26 16:01:59 -04005538 next_rw_lock = BTRFS_READ_LOCK;
Chris Mason5cd57b22008-06-25 16:01:30 -04005539 }
Chris Masond97e63b2007-02-20 16:40:44 -05005540 }
Chris Mason8e73f272009-04-03 10:14:18 -04005541 ret = 0;
Chris Mason925baed2008-06-25 16:01:30 -04005542done:
Chris Masonf7c79f32012-03-19 15:54:38 -04005543 unlock_up(path, 0, 1, 0, NULL);
Chris Mason8e73f272009-04-03 10:14:18 -04005544 path->leave_spinning = old_spinning;
5545 if (!old_spinning)
5546 btrfs_set_path_blocking(path);
5547
5548 return ret;
Chris Masond97e63b2007-02-20 16:40:44 -05005549}
Chris Mason0b86a832008-03-24 15:01:56 -04005550
Chris Mason3f157a22008-06-25 16:01:31 -04005551/*
5552 * this uses btrfs_prev_leaf to walk backwards in the tree, and keeps
5553 * searching until it gets past min_objectid or finds an item of 'type'
5554 *
5555 * returns 0 if something is found, 1 if nothing was found and < 0 on error
5556 */
Chris Mason0b86a832008-03-24 15:01:56 -04005557int btrfs_previous_item(struct btrfs_root *root,
5558 struct btrfs_path *path, u64 min_objectid,
5559 int type)
5560{
5561 struct btrfs_key found_key;
5562 struct extent_buffer *leaf;
Chris Masone02119d2008-09-05 16:13:11 -04005563 u32 nritems;
Chris Mason0b86a832008-03-24 15:01:56 -04005564 int ret;
5565
Chris Masond3977122009-01-05 21:25:51 -05005566 while (1) {
Chris Mason0b86a832008-03-24 15:01:56 -04005567 if (path->slots[0] == 0) {
Chris Masonb4ce94d2009-02-04 09:25:08 -05005568 btrfs_set_path_blocking(path);
Chris Mason0b86a832008-03-24 15:01:56 -04005569 ret = btrfs_prev_leaf(root, path);
5570 if (ret != 0)
5571 return ret;
5572 } else {
5573 path->slots[0]--;
5574 }
5575 leaf = path->nodes[0];
Chris Masone02119d2008-09-05 16:13:11 -04005576 nritems = btrfs_header_nritems(leaf);
5577 if (nritems == 0)
5578 return 1;
5579 if (path->slots[0] == nritems)
5580 path->slots[0]--;
5581
Chris Mason0b86a832008-03-24 15:01:56 -04005582 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
Chris Masone02119d2008-09-05 16:13:11 -04005583 if (found_key.objectid < min_objectid)
5584 break;
Yan Zheng0a4eefb2009-07-24 11:06:53 -04005585 if (found_key.type == type)
5586 return 0;
Chris Masone02119d2008-09-05 16:13:11 -04005587 if (found_key.objectid == min_objectid &&
5588 found_key.type < type)
5589 break;
Chris Mason0b86a832008-03-24 15:01:56 -04005590 }
5591 return 1;
5592}
Wang Shilongade2e0b2014-01-12 21:38:33 +08005593
5594/*
5595 * search in extent tree to find a previous Metadata/Data extent item with
5596 * min objecitd.
5597 *
5598 * returns 0 if something is found, 1 if nothing was found and < 0 on error
5599 */
5600int btrfs_previous_extent_item(struct btrfs_root *root,
5601 struct btrfs_path *path, u64 min_objectid)
5602{
5603 struct btrfs_key found_key;
5604 struct extent_buffer *leaf;
5605 u32 nritems;
5606 int ret;
5607
5608 while (1) {
5609 if (path->slots[0] == 0) {
5610 btrfs_set_path_blocking(path);
5611 ret = btrfs_prev_leaf(root, path);
5612 if (ret != 0)
5613 return ret;
5614 } else {
5615 path->slots[0]--;
5616 }
5617 leaf = path->nodes[0];
5618 nritems = btrfs_header_nritems(leaf);
5619 if (nritems == 0)
5620 return 1;
5621 if (path->slots[0] == nritems)
5622 path->slots[0]--;
5623
5624 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
5625 if (found_key.objectid < min_objectid)
5626 break;
5627 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
5628 found_key.type == BTRFS_METADATA_ITEM_KEY)
5629 return 0;
5630 if (found_key.objectid == min_objectid &&
5631 found_key.type < BTRFS_EXTENT_ITEM_KEY)
5632 break;
5633 }
5634 return 1;
5635}