blob: a0a4a885664e9e68db222873ee5a74903fa24f59 [file] [log] [blame]
David Sterbac1d7c512018-04-03 19:23:33 +02001// SPDX-License-Identifier: GPL-2.0
Chris Mason6cbd5572007-06-12 09:07:21 -04002/*
Chris Masond352ac62008-09-29 15:18:18 -04003 * Copyright (C) 2007,2008 Oracle. All rights reserved.
Chris Mason6cbd5572007-06-12 09:07:21 -04004 */
5
Chris Masona6b6e752007-10-15 16:22:39 -04006#include <linux/sched.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +09007#include <linux/slab.h>
Jan Schmidtbd989ba2012-05-16 17:18:50 +02008#include <linux/rbtree.h>
David Sterbaadf02122017-05-31 19:44:31 +02009#include <linux/mm.h>
Chris Masoneb60cea2007-02-02 09:18:22 -050010#include "ctree.h"
11#include "disk-io.h"
Chris Mason7f5c1512007-03-23 15:56:19 -040012#include "transaction.h"
Chris Mason5f39d392007-10-15 16:14:19 -040013#include "print-tree.h"
Chris Mason925baed2008-06-25 16:01:30 -040014#include "locking.h"
Nikolay Borisovde37aa52018-10-30 16:43:24 +020015#include "volumes.h"
Qu Wenruof616f5c2019-01-23 15:15:17 +080016#include "qgroup.h"
Chris Mason9a8dd152007-02-23 08:38:36 -050017
Chris Masone089f052007-03-16 16:20:31 -040018static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
19 *root, struct btrfs_path *path, int level);
Omar Sandoval310712b2017-01-17 23:24:37 -080020static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root *root,
21 const struct btrfs_key *ins_key, struct btrfs_path *path,
22 int data_size, int extend);
Chris Mason5f39d392007-10-15 16:14:19 -040023static int push_node_left(struct btrfs_trans_handle *trans,
Jeff Mahoney2ff7e612016-06-22 18:54:24 -040024 struct extent_buffer *dst,
Chris Mason971a1f62008-04-24 10:54:32 -040025 struct extent_buffer *src, int empty);
Chris Mason5f39d392007-10-15 16:14:19 -040026static int balance_node_right(struct btrfs_trans_handle *trans,
Chris Mason5f39d392007-10-15 16:14:19 -040027 struct extent_buffer *dst_buf,
28 struct extent_buffer *src_buf);
Tsutomu Itohafe5fea2013-04-16 05:18:22 +000029static void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
30 int level, int slot);
Chris Masond97e63b2007-02-20 16:40:44 -050031
Johannes Thumshirnaf024ed2019-08-30 13:36:09 +020032static const struct btrfs_csums {
33 u16 size;
David Sterba59a0fcd2020-02-27 21:00:45 +010034 const char name[10];
35 const char driver[12];
Johannes Thumshirnaf024ed2019-08-30 13:36:09 +020036} btrfs_csums[] = {
37 [BTRFS_CSUM_TYPE_CRC32] = { .size = 4, .name = "crc32c" },
Johannes Thumshirn3951e7f2019-10-07 11:11:01 +020038 [BTRFS_CSUM_TYPE_XXHASH] = { .size = 8, .name = "xxhash64" },
Johannes Thumshirn3831bf02019-10-07 11:11:02 +020039 [BTRFS_CSUM_TYPE_SHA256] = { .size = 32, .name = "sha256" },
David Sterba352ae072019-10-07 11:11:02 +020040 [BTRFS_CSUM_TYPE_BLAKE2] = { .size = 32, .name = "blake2b",
41 .driver = "blake2b-256" },
Johannes Thumshirnaf024ed2019-08-30 13:36:09 +020042};
43
44int btrfs_super_csum_size(const struct btrfs_super_block *s)
45{
46 u16 t = btrfs_super_csum_type(s);
47 /*
48 * csum type is validated at mount time
49 */
50 return btrfs_csums[t].size;
51}
52
53const char *btrfs_super_csum_name(u16 csum_type)
54{
55 /* csum type is validated at mount time */
56 return btrfs_csums[csum_type].name;
57}
58
David Sterbab4e967b2019-10-08 18:41:33 +020059/*
60 * Return driver name if defined, otherwise the name that's also a valid driver
61 * name
62 */
63const char *btrfs_super_csum_driver(u16 csum_type)
64{
65 /* csum type is validated at mount time */
David Sterba59a0fcd2020-02-27 21:00:45 +010066 return btrfs_csums[csum_type].driver[0] ?
67 btrfs_csums[csum_type].driver :
David Sterbab4e967b2019-10-08 18:41:33 +020068 btrfs_csums[csum_type].name;
69}
70
David Sterba604997b2020-07-27 17:38:19 +020071size_t __attribute_const__ btrfs_get_num_csums(void)
David Sterbaf7cea562019-10-07 11:11:03 +020072{
73 return ARRAY_SIZE(btrfs_csums);
74}
75
Chris Mason2c90e5d2007-04-02 10:50:19 -040076struct btrfs_path *btrfs_alloc_path(void)
77{
Masahiro Yamadae2c89902016-09-13 04:35:52 +090078 return kmem_cache_zalloc(btrfs_path_cachep, GFP_NOFS);
Chris Mason2c90e5d2007-04-02 10:50:19 -040079}
80
Chris Masond352ac62008-09-29 15:18:18 -040081/* this also releases the path */
Chris Mason2c90e5d2007-04-02 10:50:19 -040082void btrfs_free_path(struct btrfs_path *p)
83{
Jesper Juhlff175d52010-12-25 21:22:30 +000084 if (!p)
85 return;
David Sterbab3b4aa72011-04-21 01:20:15 +020086 btrfs_release_path(p);
Chris Mason2c90e5d2007-04-02 10:50:19 -040087 kmem_cache_free(btrfs_path_cachep, p);
88}
89
Chris Masond352ac62008-09-29 15:18:18 -040090/*
91 * path release drops references on the extent buffers in the path
92 * and it drops any locks held by this path
93 *
94 * It is safe to call this on paths that no locks or extent buffers held.
95 */
David Sterbab3b4aa72011-04-21 01:20:15 +020096noinline void btrfs_release_path(struct btrfs_path *p)
Chris Masoneb60cea2007-02-02 09:18:22 -050097{
98 int i;
Chris Masona2135012008-06-25 16:01:30 -040099
Chris Mason234b63a2007-03-13 10:46:10 -0400100 for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
Chris Mason3f157a22008-06-25 16:01:31 -0400101 p->slots[i] = 0;
Chris Masoneb60cea2007-02-02 09:18:22 -0500102 if (!p->nodes[i])
Chris Mason925baed2008-06-25 16:01:30 -0400103 continue;
104 if (p->locks[i]) {
Chris Masonbd681512011-07-16 15:23:14 -0400105 btrfs_tree_unlock_rw(p->nodes[i], p->locks[i]);
Chris Mason925baed2008-06-25 16:01:30 -0400106 p->locks[i] = 0;
107 }
Chris Mason5f39d392007-10-15 16:14:19 -0400108 free_extent_buffer(p->nodes[i]);
Chris Mason3f157a22008-06-25 16:01:31 -0400109 p->nodes[i] = NULL;
Chris Masoneb60cea2007-02-02 09:18:22 -0500110 }
111}
112
Chris Masond352ac62008-09-29 15:18:18 -0400113/*
114 * safely gets a reference on the root node of a tree. A lock
115 * is not taken, so a concurrent writer may put a different node
116 * at the root of the tree. See btrfs_lock_root_node for the
117 * looping required.
118 *
119 * The extent buffer returned by this has a reference taken, so
120 * it won't disappear. It may stop being the root of the tree
121 * at any time because there are no locks held.
122 */
Chris Mason925baed2008-06-25 16:01:30 -0400123struct extent_buffer *btrfs_root_node(struct btrfs_root *root)
124{
125 struct extent_buffer *eb;
Chris Mason240f62c2011-03-23 14:54:42 -0400126
Josef Bacik3083ee22012-03-09 16:01:49 -0500127 while (1) {
128 rcu_read_lock();
129 eb = rcu_dereference(root->node);
130
131 /*
132 * RCU really hurts here, we could free up the root node because
Nicholas D Steeves01327612016-05-19 21:18:45 -0400133 * it was COWed but we may not get the new root node yet so do
Josef Bacik3083ee22012-03-09 16:01:49 -0500134 * the inc_not_zero dance and if it doesn't work then
135 * synchronize_rcu and try again.
136 */
137 if (atomic_inc_not_zero(&eb->refs)) {
138 rcu_read_unlock();
139 break;
140 }
141 rcu_read_unlock();
142 synchronize_rcu();
143 }
Chris Mason925baed2008-06-25 16:01:30 -0400144 return eb;
145}
146
Qu Wenruo92a7cc42020-05-15 14:01:40 +0800147/*
148 * Cowonly root (not-shareable trees, everything not subvolume or reloc roots),
149 * just get put onto a simple dirty list. Transaction walks this list to make
150 * sure they get properly updated on disk.
Chris Masond352ac62008-09-29 15:18:18 -0400151 */
Chris Mason0b86a832008-03-24 15:01:56 -0400152static void add_root_to_dirty_list(struct btrfs_root *root)
153{
Jeff Mahoney0b246af2016-06-22 18:54:23 -0400154 struct btrfs_fs_info *fs_info = root->fs_info;
155
Josef Bacike7070be2014-12-16 08:54:43 -0800156 if (test_bit(BTRFS_ROOT_DIRTY, &root->state) ||
157 !test_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state))
158 return;
159
Jeff Mahoney0b246af2016-06-22 18:54:23 -0400160 spin_lock(&fs_info->trans_lock);
Josef Bacike7070be2014-12-16 08:54:43 -0800161 if (!test_and_set_bit(BTRFS_ROOT_DIRTY, &root->state)) {
162 /* Want the extent tree to be the last on the list */
Misono Tomohiro4fd786e2018-08-06 14:25:24 +0900163 if (root->root_key.objectid == BTRFS_EXTENT_TREE_OBJECTID)
Josef Bacike7070be2014-12-16 08:54:43 -0800164 list_move_tail(&root->dirty_list,
Jeff Mahoney0b246af2016-06-22 18:54:23 -0400165 &fs_info->dirty_cowonly_roots);
Josef Bacike7070be2014-12-16 08:54:43 -0800166 else
167 list_move(&root->dirty_list,
Jeff Mahoney0b246af2016-06-22 18:54:23 -0400168 &fs_info->dirty_cowonly_roots);
Chris Mason0b86a832008-03-24 15:01:56 -0400169 }
Jeff Mahoney0b246af2016-06-22 18:54:23 -0400170 spin_unlock(&fs_info->trans_lock);
Chris Mason0b86a832008-03-24 15:01:56 -0400171}
172
Chris Masond352ac62008-09-29 15:18:18 -0400173/*
174 * used by snapshot creation to make a copy of a root for a tree with
175 * a given objectid. The buffer with the new root node is returned in
176 * cow_ret, and this func returns zero on success or a negative error code.
177 */
Chris Masonbe20aa92007-12-17 20:14:01 -0500178int btrfs_copy_root(struct btrfs_trans_handle *trans,
179 struct btrfs_root *root,
180 struct extent_buffer *buf,
181 struct extent_buffer **cow_ret, u64 new_root_objectid)
182{
Jeff Mahoney0b246af2016-06-22 18:54:23 -0400183 struct btrfs_fs_info *fs_info = root->fs_info;
Chris Masonbe20aa92007-12-17 20:14:01 -0500184 struct extent_buffer *cow;
Chris Masonbe20aa92007-12-17 20:14:01 -0500185 int ret = 0;
186 int level;
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400187 struct btrfs_disk_key disk_key;
Chris Masonbe20aa92007-12-17 20:14:01 -0500188
Qu Wenruo92a7cc42020-05-15 14:01:40 +0800189 WARN_ON(test_bit(BTRFS_ROOT_SHAREABLE, &root->state) &&
Jeff Mahoney0b246af2016-06-22 18:54:23 -0400190 trans->transid != fs_info->running_transaction->transid);
Qu Wenruo92a7cc42020-05-15 14:01:40 +0800191 WARN_ON(test_bit(BTRFS_ROOT_SHAREABLE, &root->state) &&
Miao Xie27cdeb72014-04-02 19:51:05 +0800192 trans->transid != root->last_trans);
Chris Masonbe20aa92007-12-17 20:14:01 -0500193
194 level = btrfs_header_level(buf);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400195 if (level == 0)
196 btrfs_item_key(buf, &disk_key, 0);
197 else
198 btrfs_node_key(buf, &disk_key, 0);
Zheng Yan31840ae2008-09-23 13:14:14 -0400199
David Sterba4d75f8a2014-06-15 01:54:12 +0200200 cow = btrfs_alloc_tree_block(trans, root, 0, new_root_objectid,
Josef Bacikcf6f34a2020-08-20 11:46:07 -0400201 &disk_key, level, buf->start, 0,
202 BTRFS_NESTING_NEW_ROOT);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400203 if (IS_ERR(cow))
Chris Masonbe20aa92007-12-17 20:14:01 -0500204 return PTR_ERR(cow);
205
David Sterba58e80122016-11-08 18:30:31 +0100206 copy_extent_buffer_full(cow, buf);
Chris Masonbe20aa92007-12-17 20:14:01 -0500207 btrfs_set_header_bytenr(cow, cow->start);
208 btrfs_set_header_generation(cow, trans->transid);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400209 btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV);
210 btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN |
211 BTRFS_HEADER_FLAG_RELOC);
212 if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID)
213 btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC);
214 else
215 btrfs_set_header_owner(cow, new_root_objectid);
Chris Masonbe20aa92007-12-17 20:14:01 -0500216
Nikolay Borisovde37aa52018-10-30 16:43:24 +0200217 write_extent_buffer_fsid(cow, fs_info->fs_devices->metadata_uuid);
Yan Zheng2b820322008-11-17 21:11:30 -0500218
Chris Masonbe20aa92007-12-17 20:14:01 -0500219 WARN_ON(btrfs_header_generation(buf) > trans->transid);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400220 if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID)
Josef Bacike339a6b2014-07-02 10:54:25 -0700221 ret = btrfs_inc_ref(trans, root, cow, 1);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400222 else
Josef Bacike339a6b2014-07-02 10:54:25 -0700223 ret = btrfs_inc_ref(trans, root, cow, 0);
Chris Mason4aec2b52007-12-18 16:25:45 -0500224
Chris Masonbe20aa92007-12-17 20:14:01 -0500225 if (ret)
226 return ret;
227
228 btrfs_mark_buffer_dirty(cow);
229 *cow_ret = cow;
230 return 0;
231}
232
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200233enum mod_log_op {
234 MOD_LOG_KEY_REPLACE,
235 MOD_LOG_KEY_ADD,
236 MOD_LOG_KEY_REMOVE,
237 MOD_LOG_KEY_REMOVE_WHILE_FREEING,
238 MOD_LOG_KEY_REMOVE_WHILE_MOVING,
239 MOD_LOG_MOVE_KEYS,
240 MOD_LOG_ROOT_REPLACE,
241};
242
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200243struct tree_mod_root {
244 u64 logical;
245 u8 level;
246};
247
248struct tree_mod_elem {
249 struct rb_node node;
Chandan Rajendra298cfd32016-01-21 15:55:59 +0530250 u64 logical;
Jan Schmidt097b8a72012-06-21 11:08:04 +0200251 u64 seq;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200252 enum mod_log_op op;
253
254 /* this is used for MOD_LOG_KEY_* and MOD_LOG_MOVE_KEYS operations */
255 int slot;
256
257 /* this is used for MOD_LOG_KEY* and MOD_LOG_ROOT_REPLACE */
258 u64 generation;
259
260 /* those are used for op == MOD_LOG_KEY_{REPLACE,REMOVE} */
261 struct btrfs_disk_key key;
262 u64 blockptr;
263
264 /* this is used for op == MOD_LOG_MOVE_KEYS */
David Sterbab6dfa352018-03-05 15:31:18 +0100265 struct {
266 int dst_slot;
267 int nr_items;
268 } move;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200269
270 /* this is used for op == MOD_LOG_ROOT_REPLACE */
271 struct tree_mod_root old_root;
272};
273
Jan Schmidt097b8a72012-06-21 11:08:04 +0200274/*
Josef Bacikfcebe452014-05-13 17:30:47 -0700275 * Pull a new tree mod seq number for our operation.
Jan Schmidtfc36ed7e2013-04-24 16:57:33 +0000276 */
Josef Bacikfcebe452014-05-13 17:30:47 -0700277static inline u64 btrfs_inc_tree_mod_seq(struct btrfs_fs_info *fs_info)
Jan Schmidtfc36ed7e2013-04-24 16:57:33 +0000278{
279 return atomic64_inc_return(&fs_info->tree_mod_seq);
280}
281
282/*
Jan Schmidt097b8a72012-06-21 11:08:04 +0200283 * This adds a new blocker to the tree mod log's blocker list if the @elem
284 * passed does not already have a sequence number set. So when a caller expects
285 * to record tree modifications, it should ensure to set elem->seq to zero
286 * before calling btrfs_get_tree_mod_seq.
287 * Returns a fresh, unused tree log modification sequence number, even if no new
288 * blocker was added.
289 */
290u64 btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info,
291 struct seq_list *elem)
292{
David Sterbab1a09f12018-03-05 15:43:41 +0100293 write_lock(&fs_info->tree_mod_log_lock);
Jan Schmidt097b8a72012-06-21 11:08:04 +0200294 if (!elem->seq) {
Josef Bacikfcebe452014-05-13 17:30:47 -0700295 elem->seq = btrfs_inc_tree_mod_seq(fs_info);
Jan Schmidt097b8a72012-06-21 11:08:04 +0200296 list_add_tail(&elem->list, &fs_info->tree_mod_seq_list);
297 }
David Sterbab1a09f12018-03-05 15:43:41 +0100298 write_unlock(&fs_info->tree_mod_log_lock);
Jan Schmidt097b8a72012-06-21 11:08:04 +0200299
Josef Bacikfcebe452014-05-13 17:30:47 -0700300 return elem->seq;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200301}
302
303void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
304 struct seq_list *elem)
305{
306 struct rb_root *tm_root;
307 struct rb_node *node;
308 struct rb_node *next;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200309 struct tree_mod_elem *tm;
310 u64 min_seq = (u64)-1;
311 u64 seq_putting = elem->seq;
312
313 if (!seq_putting)
314 return;
315
Filipe Manana7227ff42020-01-22 12:23:20 +0000316 write_lock(&fs_info->tree_mod_log_lock);
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200317 list_del(&elem->list);
Jan Schmidt097b8a72012-06-21 11:08:04 +0200318 elem->seq = 0;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200319
Filipe Manana42836cf2020-01-22 12:23:54 +0000320 if (!list_empty(&fs_info->tree_mod_seq_list)) {
321 struct seq_list *first;
322
323 first = list_first_entry(&fs_info->tree_mod_seq_list,
324 struct seq_list, list);
325 if (seq_putting > first->seq) {
326 /*
327 * Blocker with lower sequence number exists, we
328 * cannot remove anything from the log.
329 */
330 write_unlock(&fs_info->tree_mod_log_lock);
331 return;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200332 }
Filipe Manana42836cf2020-01-22 12:23:54 +0000333 min_seq = first->seq;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200334 }
Jan Schmidt097b8a72012-06-21 11:08:04 +0200335
336 /*
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200337 * anything that's lower than the lowest existing (read: blocked)
338 * sequence number can be removed from the tree.
339 */
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200340 tm_root = &fs_info->tree_mod_log;
341 for (node = rb_first(tm_root); node; node = next) {
342 next = rb_next(node);
Geliang Tang6b4df8b2016-12-19 22:53:41 +0800343 tm = rb_entry(node, struct tree_mod_elem, node);
Filipe Manana6609fee2019-12-06 12:27:39 +0000344 if (tm->seq >= min_seq)
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200345 continue;
346 rb_erase(node, tm_root);
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200347 kfree(tm);
348 }
David Sterbab1a09f12018-03-05 15:43:41 +0100349 write_unlock(&fs_info->tree_mod_log_lock);
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200350}
351
352/*
353 * key order of the log:
Chandan Rajendra298cfd32016-01-21 15:55:59 +0530354 * node/leaf start address -> sequence
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200355 *
Chandan Rajendra298cfd32016-01-21 15:55:59 +0530356 * The 'start address' is the logical address of the *new* root node
357 * for root replace operations, or the logical address of the affected
358 * block for all other operations.
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200359 */
360static noinline int
361__tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm)
362{
363 struct rb_root *tm_root;
364 struct rb_node **new;
365 struct rb_node *parent = NULL;
366 struct tree_mod_elem *cur;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200367
David Sterba73e82fe2019-03-27 16:19:55 +0100368 lockdep_assert_held_write(&fs_info->tree_mod_log_lock);
369
Josef Bacikfcebe452014-05-13 17:30:47 -0700370 tm->seq = btrfs_inc_tree_mod_seq(fs_info);
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200371
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200372 tm_root = &fs_info->tree_mod_log;
373 new = &tm_root->rb_node;
374 while (*new) {
Geliang Tang6b4df8b2016-12-19 22:53:41 +0800375 cur = rb_entry(*new, struct tree_mod_elem, node);
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200376 parent = *new;
Chandan Rajendra298cfd32016-01-21 15:55:59 +0530377 if (cur->logical < tm->logical)
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200378 new = &((*new)->rb_left);
Chandan Rajendra298cfd32016-01-21 15:55:59 +0530379 else if (cur->logical > tm->logical)
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200380 new = &((*new)->rb_right);
Jan Schmidt097b8a72012-06-21 11:08:04 +0200381 else if (cur->seq < tm->seq)
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200382 new = &((*new)->rb_left);
Jan Schmidt097b8a72012-06-21 11:08:04 +0200383 else if (cur->seq > tm->seq)
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200384 new = &((*new)->rb_right);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000385 else
386 return -EEXIST;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200387 }
388
389 rb_link_node(&tm->node, parent, new);
390 rb_insert_color(&tm->node, tm_root);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000391 return 0;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200392}
393
Jan Schmidt097b8a72012-06-21 11:08:04 +0200394/*
395 * Determines if logging can be omitted. Returns 1 if it can. Otherwise, it
396 * returns zero with the tree_mod_log_lock acquired. The caller must hold
397 * this until all tree mod log insertions are recorded in the rb tree and then
David Sterbab1a09f12018-03-05 15:43:41 +0100398 * write unlock fs_info::tree_mod_log_lock.
Jan Schmidt097b8a72012-06-21 11:08:04 +0200399 */
Jan Schmidte9b7fd42012-05-31 14:59:09 +0200400static inline int tree_mod_dont_log(struct btrfs_fs_info *fs_info,
401 struct extent_buffer *eb) {
402 smp_mb();
403 if (list_empty(&(fs_info)->tree_mod_seq_list))
404 return 1;
Jan Schmidt097b8a72012-06-21 11:08:04 +0200405 if (eb && btrfs_header_level(eb) == 0)
Jan Schmidte9b7fd42012-05-31 14:59:09 +0200406 return 1;
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000407
David Sterbab1a09f12018-03-05 15:43:41 +0100408 write_lock(&fs_info->tree_mod_log_lock);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000409 if (list_empty(&(fs_info)->tree_mod_seq_list)) {
David Sterbab1a09f12018-03-05 15:43:41 +0100410 write_unlock(&fs_info->tree_mod_log_lock);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000411 return 1;
412 }
413
Jan Schmidte9b7fd42012-05-31 14:59:09 +0200414 return 0;
415}
416
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000417/* Similar to tree_mod_dont_log, but doesn't acquire any locks. */
418static inline int tree_mod_need_log(const struct btrfs_fs_info *fs_info,
419 struct extent_buffer *eb)
420{
421 smp_mb();
422 if (list_empty(&(fs_info)->tree_mod_seq_list))
423 return 0;
424 if (eb && btrfs_header_level(eb) == 0)
425 return 0;
426
427 return 1;
428}
429
430static struct tree_mod_elem *
431alloc_tree_mod_elem(struct extent_buffer *eb, int slot,
432 enum mod_log_op op, gfp_t flags)
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200433{
Jan Schmidt097b8a72012-06-21 11:08:04 +0200434 struct tree_mod_elem *tm;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200435
Josef Bacikc8cc6342013-07-01 16:18:19 -0400436 tm = kzalloc(sizeof(*tm), flags);
437 if (!tm)
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000438 return NULL;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200439
Chandan Rajendra298cfd32016-01-21 15:55:59 +0530440 tm->logical = eb->start;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200441 if (op != MOD_LOG_KEY_ADD) {
442 btrfs_node_key(eb, &tm->key, slot);
443 tm->blockptr = btrfs_node_blockptr(eb, slot);
444 }
445 tm->op = op;
446 tm->slot = slot;
447 tm->generation = btrfs_node_ptr_generation(eb, slot);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000448 RB_CLEAR_NODE(&tm->node);
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200449
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000450 return tm;
Jan Schmidt097b8a72012-06-21 11:08:04 +0200451}
452
David Sterbae09c2ef2018-03-05 15:09:03 +0100453static noinline int tree_mod_log_insert_key(struct extent_buffer *eb, int slot,
454 enum mod_log_op op, gfp_t flags)
Jan Schmidt097b8a72012-06-21 11:08:04 +0200455{
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000456 struct tree_mod_elem *tm;
457 int ret;
458
David Sterbae09c2ef2018-03-05 15:09:03 +0100459 if (!tree_mod_need_log(eb->fs_info, eb))
Jan Schmidt097b8a72012-06-21 11:08:04 +0200460 return 0;
461
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000462 tm = alloc_tree_mod_elem(eb, slot, op, flags);
463 if (!tm)
464 return -ENOMEM;
465
David Sterbae09c2ef2018-03-05 15:09:03 +0100466 if (tree_mod_dont_log(eb->fs_info, eb)) {
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000467 kfree(tm);
468 return 0;
469 }
470
David Sterbae09c2ef2018-03-05 15:09:03 +0100471 ret = __tree_mod_log_insert(eb->fs_info, tm);
David Sterbab1a09f12018-03-05 15:43:41 +0100472 write_unlock(&eb->fs_info->tree_mod_log_lock);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000473 if (ret)
474 kfree(tm);
475
476 return ret;
Jan Schmidt097b8a72012-06-21 11:08:04 +0200477}
478
David Sterba6074d452018-03-05 15:03:52 +0100479static noinline int tree_mod_log_insert_move(struct extent_buffer *eb,
480 int dst_slot, int src_slot, int nr_items)
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200481{
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000482 struct tree_mod_elem *tm = NULL;
483 struct tree_mod_elem **tm_list = NULL;
484 int ret = 0;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200485 int i;
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000486 int locked = 0;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200487
David Sterba6074d452018-03-05 15:03:52 +0100488 if (!tree_mod_need_log(eb->fs_info, eb))
Jan Schmidtf3956942012-05-31 15:02:32 +0200489 return 0;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200490
David Sterba176ef8f2017-03-28 14:35:01 +0200491 tm_list = kcalloc(nr_items, sizeof(struct tree_mod_elem *), GFP_NOFS);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000492 if (!tm_list)
493 return -ENOMEM;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200494
David Sterba176ef8f2017-03-28 14:35:01 +0200495 tm = kzalloc(sizeof(*tm), GFP_NOFS);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000496 if (!tm) {
497 ret = -ENOMEM;
498 goto free_tms;
499 }
Jan Schmidtf3956942012-05-31 15:02:32 +0200500
Chandan Rajendra298cfd32016-01-21 15:55:59 +0530501 tm->logical = eb->start;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200502 tm->slot = src_slot;
503 tm->move.dst_slot = dst_slot;
504 tm->move.nr_items = nr_items;
505 tm->op = MOD_LOG_MOVE_KEYS;
506
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000507 for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) {
508 tm_list[i] = alloc_tree_mod_elem(eb, i + dst_slot,
David Sterba176ef8f2017-03-28 14:35:01 +0200509 MOD_LOG_KEY_REMOVE_WHILE_MOVING, GFP_NOFS);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000510 if (!tm_list[i]) {
511 ret = -ENOMEM;
512 goto free_tms;
513 }
514 }
515
David Sterba6074d452018-03-05 15:03:52 +0100516 if (tree_mod_dont_log(eb->fs_info, eb))
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000517 goto free_tms;
518 locked = 1;
519
520 /*
521 * When we override something during the move, we log these removals.
522 * This can only happen when we move towards the beginning of the
523 * buffer, i.e. dst_slot < src_slot.
524 */
525 for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) {
David Sterba6074d452018-03-05 15:03:52 +0100526 ret = __tree_mod_log_insert(eb->fs_info, tm_list[i]);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000527 if (ret)
528 goto free_tms;
529 }
530
David Sterba6074d452018-03-05 15:03:52 +0100531 ret = __tree_mod_log_insert(eb->fs_info, tm);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000532 if (ret)
533 goto free_tms;
David Sterbab1a09f12018-03-05 15:43:41 +0100534 write_unlock(&eb->fs_info->tree_mod_log_lock);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000535 kfree(tm_list);
536
537 return 0;
538free_tms:
539 for (i = 0; i < nr_items; i++) {
540 if (tm_list[i] && !RB_EMPTY_NODE(&tm_list[i]->node))
David Sterba6074d452018-03-05 15:03:52 +0100541 rb_erase(&tm_list[i]->node, &eb->fs_info->tree_mod_log);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000542 kfree(tm_list[i]);
543 }
544 if (locked)
David Sterbab1a09f12018-03-05 15:43:41 +0100545 write_unlock(&eb->fs_info->tree_mod_log_lock);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000546 kfree(tm_list);
547 kfree(tm);
548
549 return ret;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200550}
551
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000552static inline int
553__tree_mod_log_free_eb(struct btrfs_fs_info *fs_info,
554 struct tree_mod_elem **tm_list,
555 int nritems)
Jan Schmidt097b8a72012-06-21 11:08:04 +0200556{
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000557 int i, j;
Jan Schmidt097b8a72012-06-21 11:08:04 +0200558 int ret;
559
Jan Schmidt097b8a72012-06-21 11:08:04 +0200560 for (i = nritems - 1; i >= 0; i--) {
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000561 ret = __tree_mod_log_insert(fs_info, tm_list[i]);
562 if (ret) {
563 for (j = nritems - 1; j > i; j--)
564 rb_erase(&tm_list[j]->node,
565 &fs_info->tree_mod_log);
566 return ret;
567 }
Jan Schmidt097b8a72012-06-21 11:08:04 +0200568 }
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000569
570 return 0;
Jan Schmidt097b8a72012-06-21 11:08:04 +0200571}
572
David Sterba95b757c2018-03-05 15:22:30 +0100573static noinline int tree_mod_log_insert_root(struct extent_buffer *old_root,
574 struct extent_buffer *new_root, int log_removal)
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200575{
David Sterba95b757c2018-03-05 15:22:30 +0100576 struct btrfs_fs_info *fs_info = old_root->fs_info;
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000577 struct tree_mod_elem *tm = NULL;
578 struct tree_mod_elem **tm_list = NULL;
579 int nritems = 0;
580 int ret = 0;
581 int i;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200582
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000583 if (!tree_mod_need_log(fs_info, NULL))
Jan Schmidt097b8a72012-06-21 11:08:04 +0200584 return 0;
585
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000586 if (log_removal && btrfs_header_level(old_root) > 0) {
587 nritems = btrfs_header_nritems(old_root);
David Sterba31e818f2015-02-20 18:00:26 +0100588 tm_list = kcalloc(nritems, sizeof(struct tree_mod_elem *),
David Sterbabcc8e072017-03-28 14:35:42 +0200589 GFP_NOFS);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000590 if (!tm_list) {
591 ret = -ENOMEM;
592 goto free_tms;
593 }
594 for (i = 0; i < nritems; i++) {
595 tm_list[i] = alloc_tree_mod_elem(old_root, i,
David Sterbabcc8e072017-03-28 14:35:42 +0200596 MOD_LOG_KEY_REMOVE_WHILE_FREEING, GFP_NOFS);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000597 if (!tm_list[i]) {
598 ret = -ENOMEM;
599 goto free_tms;
600 }
601 }
602 }
Jan Schmidtd9abbf12013-03-20 13:49:48 +0000603
David Sterbabcc8e072017-03-28 14:35:42 +0200604 tm = kzalloc(sizeof(*tm), GFP_NOFS);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000605 if (!tm) {
606 ret = -ENOMEM;
607 goto free_tms;
608 }
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200609
Chandan Rajendra298cfd32016-01-21 15:55:59 +0530610 tm->logical = new_root->start;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200611 tm->old_root.logical = old_root->start;
612 tm->old_root.level = btrfs_header_level(old_root);
613 tm->generation = btrfs_header_generation(old_root);
614 tm->op = MOD_LOG_ROOT_REPLACE;
615
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000616 if (tree_mod_dont_log(fs_info, NULL))
617 goto free_tms;
618
619 if (tm_list)
620 ret = __tree_mod_log_free_eb(fs_info, tm_list, nritems);
621 if (!ret)
622 ret = __tree_mod_log_insert(fs_info, tm);
623
David Sterbab1a09f12018-03-05 15:43:41 +0100624 write_unlock(&fs_info->tree_mod_log_lock);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000625 if (ret)
626 goto free_tms;
627 kfree(tm_list);
628
629 return ret;
630
631free_tms:
632 if (tm_list) {
633 for (i = 0; i < nritems; i++)
634 kfree(tm_list[i]);
635 kfree(tm_list);
636 }
637 kfree(tm);
638
639 return ret;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200640}
641
642static struct tree_mod_elem *
643__tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq,
644 int smallest)
645{
646 struct rb_root *tm_root;
647 struct rb_node *node;
648 struct tree_mod_elem *cur = NULL;
649 struct tree_mod_elem *found = NULL;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200650
David Sterbab1a09f12018-03-05 15:43:41 +0100651 read_lock(&fs_info->tree_mod_log_lock);
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200652 tm_root = &fs_info->tree_mod_log;
653 node = tm_root->rb_node;
654 while (node) {
Geliang Tang6b4df8b2016-12-19 22:53:41 +0800655 cur = rb_entry(node, struct tree_mod_elem, node);
Chandan Rajendra298cfd32016-01-21 15:55:59 +0530656 if (cur->logical < start) {
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200657 node = node->rb_left;
Chandan Rajendra298cfd32016-01-21 15:55:59 +0530658 } else if (cur->logical > start) {
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200659 node = node->rb_right;
Jan Schmidt097b8a72012-06-21 11:08:04 +0200660 } else if (cur->seq < min_seq) {
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200661 node = node->rb_left;
662 } else if (!smallest) {
663 /* we want the node with the highest seq */
664 if (found)
Jan Schmidt097b8a72012-06-21 11:08:04 +0200665 BUG_ON(found->seq > cur->seq);
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200666 found = cur;
667 node = node->rb_left;
Jan Schmidt097b8a72012-06-21 11:08:04 +0200668 } else if (cur->seq > min_seq) {
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200669 /* we want the node with the smallest seq */
670 if (found)
Jan Schmidt097b8a72012-06-21 11:08:04 +0200671 BUG_ON(found->seq < cur->seq);
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200672 found = cur;
673 node = node->rb_right;
674 } else {
675 found = cur;
676 break;
677 }
678 }
David Sterbab1a09f12018-03-05 15:43:41 +0100679 read_unlock(&fs_info->tree_mod_log_lock);
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200680
681 return found;
682}
683
684/*
685 * this returns the element from the log with the smallest time sequence
686 * value that's in the log (the oldest log item). any element with a time
687 * sequence lower than min_seq will be ignored.
688 */
689static struct tree_mod_elem *
690tree_mod_log_search_oldest(struct btrfs_fs_info *fs_info, u64 start,
691 u64 min_seq)
692{
693 return __tree_mod_log_search(fs_info, start, min_seq, 1);
694}
695
696/*
697 * this returns the element from the log with the largest time sequence
698 * value that's in the log (the most recent log item). any element with
699 * a time sequence lower than min_seq will be ignored.
700 */
701static struct tree_mod_elem *
702tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq)
703{
704 return __tree_mod_log_search(fs_info, start, min_seq, 0);
705}
706
David Sterbaed874f02019-03-20 14:22:04 +0100707static noinline int tree_mod_log_eb_copy(struct extent_buffer *dst,
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200708 struct extent_buffer *src, unsigned long dst_offset,
Jan Schmidt90f8d622013-04-13 13:19:53 +0000709 unsigned long src_offset, int nr_items)
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200710{
David Sterbaed874f02019-03-20 14:22:04 +0100711 struct btrfs_fs_info *fs_info = dst->fs_info;
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000712 int ret = 0;
713 struct tree_mod_elem **tm_list = NULL;
714 struct tree_mod_elem **tm_list_add, **tm_list_rem;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200715 int i;
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000716 int locked = 0;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200717
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000718 if (!tree_mod_need_log(fs_info, NULL))
719 return 0;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200720
Josef Bacikc8cc6342013-07-01 16:18:19 -0400721 if (btrfs_header_level(dst) == 0 && btrfs_header_level(src) == 0)
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000722 return 0;
723
David Sterba31e818f2015-02-20 18:00:26 +0100724 tm_list = kcalloc(nr_items * 2, sizeof(struct tree_mod_elem *),
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000725 GFP_NOFS);
726 if (!tm_list)
727 return -ENOMEM;
728
729 tm_list_add = tm_list;
730 tm_list_rem = tm_list + nr_items;
731 for (i = 0; i < nr_items; i++) {
732 tm_list_rem[i] = alloc_tree_mod_elem(src, i + src_offset,
733 MOD_LOG_KEY_REMOVE, GFP_NOFS);
734 if (!tm_list_rem[i]) {
735 ret = -ENOMEM;
736 goto free_tms;
737 }
738
739 tm_list_add[i] = alloc_tree_mod_elem(dst, i + dst_offset,
740 MOD_LOG_KEY_ADD, GFP_NOFS);
741 if (!tm_list_add[i]) {
742 ret = -ENOMEM;
743 goto free_tms;
744 }
745 }
746
747 if (tree_mod_dont_log(fs_info, NULL))
748 goto free_tms;
749 locked = 1;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200750
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200751 for (i = 0; i < nr_items; i++) {
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000752 ret = __tree_mod_log_insert(fs_info, tm_list_rem[i]);
753 if (ret)
754 goto free_tms;
755 ret = __tree_mod_log_insert(fs_info, tm_list_add[i]);
756 if (ret)
757 goto free_tms;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200758 }
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000759
David Sterbab1a09f12018-03-05 15:43:41 +0100760 write_unlock(&fs_info->tree_mod_log_lock);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000761 kfree(tm_list);
762
763 return 0;
764
765free_tms:
766 for (i = 0; i < nr_items * 2; i++) {
767 if (tm_list[i] && !RB_EMPTY_NODE(&tm_list[i]->node))
768 rb_erase(&tm_list[i]->node, &fs_info->tree_mod_log);
769 kfree(tm_list[i]);
770 }
771 if (locked)
David Sterbab1a09f12018-03-05 15:43:41 +0100772 write_unlock(&fs_info->tree_mod_log_lock);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000773 kfree(tm_list);
774
775 return ret;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200776}
777
David Sterbadb7279a2018-03-05 15:14:25 +0100778static noinline int tree_mod_log_free_eb(struct extent_buffer *eb)
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200779{
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000780 struct tree_mod_elem **tm_list = NULL;
781 int nritems = 0;
782 int i;
783 int ret = 0;
784
785 if (btrfs_header_level(eb) == 0)
786 return 0;
787
David Sterbadb7279a2018-03-05 15:14:25 +0100788 if (!tree_mod_need_log(eb->fs_info, NULL))
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000789 return 0;
790
791 nritems = btrfs_header_nritems(eb);
David Sterba31e818f2015-02-20 18:00:26 +0100792 tm_list = kcalloc(nritems, sizeof(struct tree_mod_elem *), GFP_NOFS);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000793 if (!tm_list)
794 return -ENOMEM;
795
796 for (i = 0; i < nritems; i++) {
797 tm_list[i] = alloc_tree_mod_elem(eb, i,
798 MOD_LOG_KEY_REMOVE_WHILE_FREEING, GFP_NOFS);
799 if (!tm_list[i]) {
800 ret = -ENOMEM;
801 goto free_tms;
802 }
803 }
804
David Sterbadb7279a2018-03-05 15:14:25 +0100805 if (tree_mod_dont_log(eb->fs_info, eb))
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000806 goto free_tms;
807
David Sterbadb7279a2018-03-05 15:14:25 +0100808 ret = __tree_mod_log_free_eb(eb->fs_info, tm_list, nritems);
David Sterbab1a09f12018-03-05 15:43:41 +0100809 write_unlock(&eb->fs_info->tree_mod_log_lock);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +0000810 if (ret)
811 goto free_tms;
812 kfree(tm_list);
813
814 return 0;
815
816free_tms:
817 for (i = 0; i < nritems; i++)
818 kfree(tm_list[i]);
819 kfree(tm_list);
820
821 return ret;
Jan Schmidtbd989ba2012-05-16 17:18:50 +0200822}
823
Chris Masond352ac62008-09-29 15:18:18 -0400824/*
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400825 * check if the tree block can be shared by multiple trees
826 */
827int btrfs_block_can_be_shared(struct btrfs_root *root,
828 struct extent_buffer *buf)
829{
830 /*
Qu Wenruo92a7cc42020-05-15 14:01:40 +0800831 * Tree blocks not in shareable trees and tree roots are never shared.
832 * If a block was allocated after the last snapshot and the block was
833 * not allocated by tree relocation, we know the block is not shared.
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400834 */
Qu Wenruo92a7cc42020-05-15 14:01:40 +0800835 if (test_bit(BTRFS_ROOT_SHAREABLE, &root->state) &&
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400836 buf != root->node && buf != root->commit_root &&
837 (btrfs_header_generation(buf) <=
838 btrfs_root_last_snapshot(&root->root_item) ||
839 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)))
840 return 1;
Nikolay Borisova79865c2018-06-21 09:45:00 +0300841
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400842 return 0;
843}
844
845static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
846 struct btrfs_root *root,
847 struct extent_buffer *buf,
Yan, Zhengf0486c62010-05-16 10:46:25 -0400848 struct extent_buffer *cow,
849 int *last_ref)
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400850{
Jeff Mahoney0b246af2016-06-22 18:54:23 -0400851 struct btrfs_fs_info *fs_info = root->fs_info;
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400852 u64 refs;
853 u64 owner;
854 u64 flags;
855 u64 new_flags = 0;
856 int ret;
857
858 /*
859 * Backrefs update rules:
860 *
861 * Always use full backrefs for extent pointers in tree block
862 * allocated by tree relocation.
863 *
864 * If a shared tree block is no longer referenced by its owner
865 * tree (btrfs_header_owner(buf) == root->root_key.objectid),
866 * use full backrefs for extent pointers in tree block.
867 *
868 * If a tree block is been relocating
869 * (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID),
870 * use full backrefs for extent pointers in tree block.
871 * The reason for this is some operations (such as drop tree)
872 * are only allowed for blocks use full backrefs.
873 */
874
875 if (btrfs_block_can_be_shared(root, buf)) {
Jeff Mahoney2ff7e612016-06-22 18:54:24 -0400876 ret = btrfs_lookup_extent_info(trans, fs_info, buf->start,
Josef Bacik3173a182013-03-07 14:22:04 -0500877 btrfs_header_level(buf), 1,
878 &refs, &flags);
Mark Fashehbe1a5562011-08-08 13:20:18 -0700879 if (ret)
880 return ret;
Mark Fashehe5df9572011-08-29 14:17:04 -0700881 if (refs == 0) {
882 ret = -EROFS;
Jeff Mahoney0b246af2016-06-22 18:54:23 -0400883 btrfs_handle_fs_error(fs_info, ret, NULL);
Mark Fashehe5df9572011-08-29 14:17:04 -0700884 return ret;
885 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400886 } else {
887 refs = 1;
888 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
889 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
890 flags = BTRFS_BLOCK_FLAG_FULL_BACKREF;
891 else
892 flags = 0;
893 }
894
895 owner = btrfs_header_owner(buf);
896 BUG_ON(owner == BTRFS_TREE_RELOC_OBJECTID &&
897 !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF));
898
899 if (refs > 1) {
900 if ((owner == root->root_key.objectid ||
901 root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) &&
902 !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) {
Josef Bacike339a6b2014-07-02 10:54:25 -0700903 ret = btrfs_inc_ref(trans, root, buf, 1);
Jeff Mahoney692826b2017-11-21 13:58:49 -0500904 if (ret)
905 return ret;
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400906
907 if (root->root_key.objectid ==
908 BTRFS_TREE_RELOC_OBJECTID) {
Josef Bacike339a6b2014-07-02 10:54:25 -0700909 ret = btrfs_dec_ref(trans, root, buf, 0);
Jeff Mahoney692826b2017-11-21 13:58:49 -0500910 if (ret)
911 return ret;
Josef Bacike339a6b2014-07-02 10:54:25 -0700912 ret = btrfs_inc_ref(trans, root, cow, 1);
Jeff Mahoney692826b2017-11-21 13:58:49 -0500913 if (ret)
914 return ret;
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400915 }
916 new_flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
917 } else {
918
919 if (root->root_key.objectid ==
920 BTRFS_TREE_RELOC_OBJECTID)
Josef Bacike339a6b2014-07-02 10:54:25 -0700921 ret = btrfs_inc_ref(trans, root, cow, 1);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400922 else
Josef Bacike339a6b2014-07-02 10:54:25 -0700923 ret = btrfs_inc_ref(trans, root, cow, 0);
Jeff Mahoney692826b2017-11-21 13:58:49 -0500924 if (ret)
925 return ret;
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400926 }
927 if (new_flags != 0) {
Josef Bacikb1c79e02013-05-09 13:49:30 -0400928 int level = btrfs_header_level(buf);
929
David Sterba42c9d0b2019-03-20 11:54:13 +0100930 ret = btrfs_set_disk_extent_flags(trans, buf,
Josef Bacikb1c79e02013-05-09 13:49:30 -0400931 new_flags, level, 0);
Mark Fashehbe1a5562011-08-08 13:20:18 -0700932 if (ret)
933 return ret;
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400934 }
935 } else {
936 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
937 if (root->root_key.objectid ==
938 BTRFS_TREE_RELOC_OBJECTID)
Josef Bacike339a6b2014-07-02 10:54:25 -0700939 ret = btrfs_inc_ref(trans, root, cow, 1);
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400940 else
Josef Bacike339a6b2014-07-02 10:54:25 -0700941 ret = btrfs_inc_ref(trans, root, cow, 0);
Jeff Mahoney692826b2017-11-21 13:58:49 -0500942 if (ret)
943 return ret;
Josef Bacike339a6b2014-07-02 10:54:25 -0700944 ret = btrfs_dec_ref(trans, root, buf, 1);
Jeff Mahoney692826b2017-11-21 13:58:49 -0500945 if (ret)
946 return ret;
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400947 }
David Sterba6a884d7d2019-03-20 14:30:02 +0100948 btrfs_clean_tree_block(buf);
Yan, Zhengf0486c62010-05-16 10:46:25 -0400949 *last_ref = 1;
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400950 }
951 return 0;
952}
953
Filipe Mananaa6279472019-01-25 11:48:51 +0000954static struct extent_buffer *alloc_tree_block_no_bg_flush(
955 struct btrfs_trans_handle *trans,
956 struct btrfs_root *root,
957 u64 parent_start,
958 const struct btrfs_disk_key *disk_key,
959 int level,
960 u64 hint,
Josef Bacik9631e4c2020-08-20 11:46:03 -0400961 u64 empty_size,
962 enum btrfs_lock_nesting nest)
Filipe Mananaa6279472019-01-25 11:48:51 +0000963{
964 struct btrfs_fs_info *fs_info = root->fs_info;
965 struct extent_buffer *ret;
966
967 /*
968 * If we are COWing a node/leaf from the extent, chunk, device or free
969 * space trees, make sure that we do not finish block group creation of
970 * pending block groups. We do this to avoid a deadlock.
971 * COWing can result in allocation of a new chunk, and flushing pending
972 * block groups (btrfs_create_pending_block_groups()) can be triggered
973 * when finishing allocation of a new chunk. Creation of a pending block
974 * group modifies the extent, chunk, device and free space trees,
975 * therefore we could deadlock with ourselves since we are holding a
976 * lock on an extent buffer that btrfs_create_pending_block_groups() may
977 * try to COW later.
978 * For similar reasons, we also need to delay flushing pending block
979 * groups when splitting a leaf or node, from one of those trees, since
980 * we are holding a write lock on it and its parent or when inserting a
981 * new root node for one of those trees.
982 */
983 if (root == fs_info->extent_root ||
984 root == fs_info->chunk_root ||
985 root == fs_info->dev_root ||
986 root == fs_info->free_space_root)
987 trans->can_flush_pending_bgs = false;
988
989 ret = btrfs_alloc_tree_block(trans, root, parent_start,
990 root->root_key.objectid, disk_key, level,
Josef Bacik9631e4c2020-08-20 11:46:03 -0400991 hint, empty_size, nest);
Filipe Mananaa6279472019-01-25 11:48:51 +0000992 trans->can_flush_pending_bgs = true;
993
994 return ret;
995}
996
Yan Zheng5d4f98a2009-06-10 10:45:14 -0400997/*
Chris Masond3977122009-01-05 21:25:51 -0500998 * does the dirty work in cow of a single block. The parent block (if
999 * supplied) is updated to point to the new cow copy. The new buffer is marked
1000 * dirty and returned locked. If you modify the block it needs to be marked
1001 * dirty again.
Chris Masond352ac62008-09-29 15:18:18 -04001002 *
1003 * search_start -- an allocation hint for the new block
1004 *
Chris Masond3977122009-01-05 21:25:51 -05001005 * empty_size -- a hint that you plan on doing more cow. This is the size in
1006 * bytes the allocator should try to find free next to the block it returns.
1007 * This is just a hint and may be ignored by the allocator.
Chris Masond352ac62008-09-29 15:18:18 -04001008 */
Chris Masond3977122009-01-05 21:25:51 -05001009static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
Chris Mason5f39d392007-10-15 16:14:19 -04001010 struct btrfs_root *root,
1011 struct extent_buffer *buf,
1012 struct extent_buffer *parent, int parent_slot,
1013 struct extent_buffer **cow_ret,
Josef Bacik9631e4c2020-08-20 11:46:03 -04001014 u64 search_start, u64 empty_size,
1015 enum btrfs_lock_nesting nest)
Chris Mason6702ed42007-08-07 16:15:09 -04001016{
Jeff Mahoney0b246af2016-06-22 18:54:23 -04001017 struct btrfs_fs_info *fs_info = root->fs_info;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001018 struct btrfs_disk_key disk_key;
Chris Mason5f39d392007-10-15 16:14:19 -04001019 struct extent_buffer *cow;
Mark Fashehbe1a5562011-08-08 13:20:18 -07001020 int level, ret;
Yan, Zhengf0486c62010-05-16 10:46:25 -04001021 int last_ref = 0;
Chris Mason925baed2008-06-25 16:01:30 -04001022 int unlock_orig = 0;
Goldwyn Rodrigues0f5053e2016-09-22 14:11:34 -05001023 u64 parent_start = 0;
Chris Mason6702ed42007-08-07 16:15:09 -04001024
Chris Mason925baed2008-06-25 16:01:30 -04001025 if (*cow_ret == buf)
1026 unlock_orig = 1;
1027
Chris Masonb9447ef82009-03-09 11:45:38 -04001028 btrfs_assert_tree_locked(buf);
Chris Mason925baed2008-06-25 16:01:30 -04001029
Qu Wenruo92a7cc42020-05-15 14:01:40 +08001030 WARN_ON(test_bit(BTRFS_ROOT_SHAREABLE, &root->state) &&
Jeff Mahoney0b246af2016-06-22 18:54:23 -04001031 trans->transid != fs_info->running_transaction->transid);
Qu Wenruo92a7cc42020-05-15 14:01:40 +08001032 WARN_ON(test_bit(BTRFS_ROOT_SHAREABLE, &root->state) &&
Miao Xie27cdeb72014-04-02 19:51:05 +08001033 trans->transid != root->last_trans);
Chris Mason5f39d392007-10-15 16:14:19 -04001034
Chris Mason7bb86312007-12-11 09:25:06 -05001035 level = btrfs_header_level(buf);
Zheng Yan31840ae2008-09-23 13:14:14 -04001036
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001037 if (level == 0)
1038 btrfs_item_key(buf, &disk_key, 0);
1039 else
1040 btrfs_node_key(buf, &disk_key, 0);
1041
Goldwyn Rodrigues0f5053e2016-09-22 14:11:34 -05001042 if ((root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) && parent)
1043 parent_start = parent->start;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001044
Filipe Mananaa6279472019-01-25 11:48:51 +00001045 cow = alloc_tree_block_no_bg_flush(trans, root, parent_start, &disk_key,
Josef Bacik9631e4c2020-08-20 11:46:03 -04001046 level, search_start, empty_size, nest);
Chris Mason6702ed42007-08-07 16:15:09 -04001047 if (IS_ERR(cow))
1048 return PTR_ERR(cow);
1049
Chris Masonb4ce94d2009-02-04 09:25:08 -05001050 /* cow is set to blocking by btrfs_init_new_buffer */
1051
David Sterba58e80122016-11-08 18:30:31 +01001052 copy_extent_buffer_full(cow, buf);
Chris Masondb945352007-10-15 16:15:53 -04001053 btrfs_set_header_bytenr(cow, cow->start);
Chris Mason5f39d392007-10-15 16:14:19 -04001054 btrfs_set_header_generation(cow, trans->transid);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001055 btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV);
1056 btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN |
1057 BTRFS_HEADER_FLAG_RELOC);
1058 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
1059 btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC);
1060 else
1061 btrfs_set_header_owner(cow, root->root_key.objectid);
Chris Mason6702ed42007-08-07 16:15:09 -04001062
Nikolay Borisovde37aa52018-10-30 16:43:24 +02001063 write_extent_buffer_fsid(cow, fs_info->fs_devices->metadata_uuid);
Yan Zheng2b820322008-11-17 21:11:30 -05001064
Mark Fashehbe1a5562011-08-08 13:20:18 -07001065 ret = update_ref_for_cow(trans, root, buf, cow, &last_ref);
Mark Fashehb68dc2a2011-08-29 14:30:39 -07001066 if (ret) {
Jeff Mahoney66642832016-06-10 18:19:25 -04001067 btrfs_abort_transaction(trans, ret);
Mark Fashehb68dc2a2011-08-29 14:30:39 -07001068 return ret;
1069 }
Zheng Yan1a40e232008-09-26 10:09:34 -04001070
Qu Wenruo92a7cc42020-05-15 14:01:40 +08001071 if (test_bit(BTRFS_ROOT_SHAREABLE, &root->state)) {
Josef Bacik83d4cfd2013-08-30 15:09:51 -04001072 ret = btrfs_reloc_cow_block(trans, root, buf, cow);
Zhaolei93314e32015-08-06 21:56:58 +08001073 if (ret) {
Jeff Mahoney66642832016-06-10 18:19:25 -04001074 btrfs_abort_transaction(trans, ret);
Josef Bacik83d4cfd2013-08-30 15:09:51 -04001075 return ret;
Zhaolei93314e32015-08-06 21:56:58 +08001076 }
Josef Bacik83d4cfd2013-08-30 15:09:51 -04001077 }
Yan, Zheng3fd0a552010-05-16 10:49:59 -04001078
Chris Mason6702ed42007-08-07 16:15:09 -04001079 if (buf == root->node) {
Chris Mason925baed2008-06-25 16:01:30 -04001080 WARN_ON(parent && parent != buf);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001081 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
1082 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
1083 parent_start = buf->start;
Chris Mason925baed2008-06-25 16:01:30 -04001084
David Sterba67439da2019-10-08 13:28:47 +02001085 atomic_inc(&cow->refs);
David Sterbad9d19a02018-03-05 16:35:29 +01001086 ret = tree_mod_log_insert_root(root->node, cow, 1);
1087 BUG_ON(ret < 0);
Chris Mason240f62c2011-03-23 14:54:42 -04001088 rcu_assign_pointer(root->node, cow);
Chris Mason925baed2008-06-25 16:01:30 -04001089
Yan, Zhengf0486c62010-05-16 10:46:25 -04001090 btrfs_free_tree_block(trans, root, buf, parent_start,
Jan Schmidt5581a512012-05-16 17:04:52 +02001091 last_ref);
Chris Mason5f39d392007-10-15 16:14:19 -04001092 free_extent_buffer(buf);
Chris Mason0b86a832008-03-24 15:01:56 -04001093 add_root_to_dirty_list(root);
Chris Mason6702ed42007-08-07 16:15:09 -04001094 } else {
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001095 WARN_ON(trans->transid != btrfs_header_generation(parent));
David Sterbae09c2ef2018-03-05 15:09:03 +01001096 tree_mod_log_insert_key(parent, parent_slot,
Josef Bacikc8cc6342013-07-01 16:18:19 -04001097 MOD_LOG_KEY_REPLACE, GFP_NOFS);
Chris Mason5f39d392007-10-15 16:14:19 -04001098 btrfs_set_node_blockptr(parent, parent_slot,
Chris Masondb945352007-10-15 16:15:53 -04001099 cow->start);
Chris Mason74493f72007-12-11 09:25:06 -05001100 btrfs_set_node_ptr_generation(parent, parent_slot,
1101 trans->transid);
Chris Mason6702ed42007-08-07 16:15:09 -04001102 btrfs_mark_buffer_dirty(parent);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +00001103 if (last_ref) {
David Sterbadb7279a2018-03-05 15:14:25 +01001104 ret = tree_mod_log_free_eb(buf);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +00001105 if (ret) {
Jeff Mahoney66642832016-06-10 18:19:25 -04001106 btrfs_abort_transaction(trans, ret);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +00001107 return ret;
1108 }
1109 }
Yan, Zhengf0486c62010-05-16 10:46:25 -04001110 btrfs_free_tree_block(trans, root, buf, parent_start,
Jan Schmidt5581a512012-05-16 17:04:52 +02001111 last_ref);
Chris Mason6702ed42007-08-07 16:15:09 -04001112 }
Chris Mason925baed2008-06-25 16:01:30 -04001113 if (unlock_orig)
1114 btrfs_tree_unlock(buf);
Josef Bacik3083ee22012-03-09 16:01:49 -05001115 free_extent_buffer_stale(buf);
Chris Mason6702ed42007-08-07 16:15:09 -04001116 btrfs_mark_buffer_dirty(cow);
1117 *cow_ret = cow;
1118 return 0;
1119}
1120
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001121/*
1122 * returns the logical address of the oldest predecessor of the given root.
1123 * entries older than time_seq are ignored.
1124 */
David Sterbabcd24da2018-03-05 15:33:18 +01001125static struct tree_mod_elem *__tree_mod_log_oldest_root(
1126 struct extent_buffer *eb_root, u64 time_seq)
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001127{
1128 struct tree_mod_elem *tm;
1129 struct tree_mod_elem *found = NULL;
Jan Schmidt30b04632013-04-13 13:19:54 +00001130 u64 root_logical = eb_root->start;
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001131 int looped = 0;
1132
1133 if (!time_seq)
Stefan Behrens35a36212013-08-14 18:12:25 +02001134 return NULL;
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001135
1136 /*
Chandan Rajendra298cfd32016-01-21 15:55:59 +05301137 * the very last operation that's logged for a root is the
1138 * replacement operation (if it is replaced at all). this has
1139 * the logical address of the *new* root, making it the very
1140 * first operation that's logged for this root.
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001141 */
1142 while (1) {
David Sterbabcd24da2018-03-05 15:33:18 +01001143 tm = tree_mod_log_search_oldest(eb_root->fs_info, root_logical,
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001144 time_seq);
1145 if (!looped && !tm)
Stefan Behrens35a36212013-08-14 18:12:25 +02001146 return NULL;
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001147 /*
Jan Schmidt28da9fb2012-06-21 10:59:13 +02001148 * if there are no tree operation for the oldest root, we simply
1149 * return it. this should only happen if that (old) root is at
1150 * level 0.
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001151 */
Jan Schmidt28da9fb2012-06-21 10:59:13 +02001152 if (!tm)
1153 break;
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001154
Jan Schmidt28da9fb2012-06-21 10:59:13 +02001155 /*
1156 * if there's an operation that's not a root replacement, we
1157 * found the oldest version of our root. normally, we'll find a
1158 * MOD_LOG_KEY_REMOVE_WHILE_FREEING operation here.
1159 */
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001160 if (tm->op != MOD_LOG_ROOT_REPLACE)
1161 break;
1162
1163 found = tm;
1164 root_logical = tm->old_root.logical;
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001165 looped = 1;
1166 }
1167
Jan Schmidta95236d2012-06-05 16:41:24 +02001168 /* if there's no old root to return, return what we found instead */
1169 if (!found)
1170 found = tm;
1171
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001172 return found;
1173}
1174
1175/*
1176 * tm is a pointer to the first operation to rewind within eb. then, all
Nicholas D Steeves01327612016-05-19 21:18:45 -04001177 * previous operations will be rewound (until we reach something older than
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001178 * time_seq).
1179 */
1180static void
Josef Bacikf1ca7e982013-06-29 23:15:19 -04001181__tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
1182 u64 time_seq, struct tree_mod_elem *first_tm)
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001183{
1184 u32 n;
1185 struct rb_node *next;
1186 struct tree_mod_elem *tm = first_tm;
1187 unsigned long o_dst;
1188 unsigned long o_src;
1189 unsigned long p_size = sizeof(struct btrfs_key_ptr);
1190
1191 n = btrfs_header_nritems(eb);
David Sterbab1a09f12018-03-05 15:43:41 +01001192 read_lock(&fs_info->tree_mod_log_lock);
Jan Schmidt097b8a72012-06-21 11:08:04 +02001193 while (tm && tm->seq >= time_seq) {
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001194 /*
1195 * all the operations are recorded with the operator used for
1196 * the modification. as we're going backwards, we do the
1197 * opposite of each operation here.
1198 */
1199 switch (tm->op) {
1200 case MOD_LOG_KEY_REMOVE_WHILE_FREEING:
1201 BUG_ON(tm->slot < n);
Marcos Paulo de Souzac730ae02020-06-16 15:54:29 -03001202 fallthrough;
Liu Bo95c80bb2012-10-19 09:50:52 +00001203 case MOD_LOG_KEY_REMOVE_WHILE_MOVING:
Chris Mason4c3e6962012-12-18 15:43:18 -05001204 case MOD_LOG_KEY_REMOVE:
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001205 btrfs_set_node_key(eb, &tm->key, tm->slot);
1206 btrfs_set_node_blockptr(eb, tm->slot, tm->blockptr);
1207 btrfs_set_node_ptr_generation(eb, tm->slot,
1208 tm->generation);
Chris Mason4c3e6962012-12-18 15:43:18 -05001209 n++;
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001210 break;
1211 case MOD_LOG_KEY_REPLACE:
1212 BUG_ON(tm->slot >= n);
1213 btrfs_set_node_key(eb, &tm->key, tm->slot);
1214 btrfs_set_node_blockptr(eb, tm->slot, tm->blockptr);
1215 btrfs_set_node_ptr_generation(eb, tm->slot,
1216 tm->generation);
1217 break;
1218 case MOD_LOG_KEY_ADD:
Jan Schmidt19956c72012-06-22 14:52:13 +02001219 /* if a move operation is needed it's in the log */
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001220 n--;
1221 break;
1222 case MOD_LOG_MOVE_KEYS:
Jan Schmidtc3193102012-05-31 19:24:36 +02001223 o_dst = btrfs_node_key_ptr_offset(tm->slot);
1224 o_src = btrfs_node_key_ptr_offset(tm->move.dst_slot);
1225 memmove_extent_buffer(eb, o_dst, o_src,
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001226 tm->move.nr_items * p_size);
1227 break;
1228 case MOD_LOG_ROOT_REPLACE:
1229 /*
1230 * this operation is special. for roots, this must be
1231 * handled explicitly before rewinding.
1232 * for non-roots, this operation may exist if the node
1233 * was a root: root A -> child B; then A gets empty and
1234 * B is promoted to the new root. in the mod log, we'll
1235 * have a root-replace operation for B, a tree block
1236 * that is no root. we simply ignore that operation.
1237 */
1238 break;
1239 }
1240 next = rb_next(&tm->node);
1241 if (!next)
1242 break;
Geliang Tang6b4df8b2016-12-19 22:53:41 +08001243 tm = rb_entry(next, struct tree_mod_elem, node);
Chandan Rajendra298cfd32016-01-21 15:55:59 +05301244 if (tm->logical != first_tm->logical)
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001245 break;
1246 }
David Sterbab1a09f12018-03-05 15:43:41 +01001247 read_unlock(&fs_info->tree_mod_log_lock);
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001248 btrfs_set_header_nritems(eb, n);
1249}
1250
Jan Schmidt47fb0912013-04-13 13:19:55 +00001251/*
Nicholas D Steeves01327612016-05-19 21:18:45 -04001252 * Called with eb read locked. If the buffer cannot be rewound, the same buffer
Jan Schmidt47fb0912013-04-13 13:19:55 +00001253 * is returned. If rewind operations happen, a fresh buffer is returned. The
1254 * returned buffer is always read-locked. If the returned buffer is not the
1255 * input buffer, the lock on the input buffer is released and the input buffer
1256 * is freed (its refcount is decremented).
1257 */
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001258static struct extent_buffer *
Josef Bacik9ec72672013-08-07 16:57:23 -04001259tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
1260 struct extent_buffer *eb, u64 time_seq)
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001261{
1262 struct extent_buffer *eb_rewin;
1263 struct tree_mod_elem *tm;
1264
1265 if (!time_seq)
1266 return eb;
1267
1268 if (btrfs_header_level(eb) == 0)
1269 return eb;
1270
1271 tm = tree_mod_log_search(fs_info, eb->start, time_seq);
1272 if (!tm)
1273 return eb;
1274
Josef Bacik9ec72672013-08-07 16:57:23 -04001275 btrfs_set_path_blocking(path);
David Sterba300aa892018-04-04 02:00:17 +02001276 btrfs_set_lock_blocking_read(eb);
Josef Bacik9ec72672013-08-07 16:57:23 -04001277
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001278 if (tm->op == MOD_LOG_KEY_REMOVE_WHILE_FREEING) {
1279 BUG_ON(tm->slot != 0);
Jeff Mahoneyda170662016-06-15 09:22:56 -04001280 eb_rewin = alloc_dummy_extent_buffer(fs_info, eb->start);
Josef Bacikdb7f3432013-08-07 14:54:37 -04001281 if (!eb_rewin) {
Josef Bacik9ec72672013-08-07 16:57:23 -04001282 btrfs_tree_read_unlock_blocking(eb);
Josef Bacikdb7f3432013-08-07 14:54:37 -04001283 free_extent_buffer(eb);
1284 return NULL;
1285 }
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001286 btrfs_set_header_bytenr(eb_rewin, eb->start);
1287 btrfs_set_header_backref_rev(eb_rewin,
1288 btrfs_header_backref_rev(eb));
1289 btrfs_set_header_owner(eb_rewin, btrfs_header_owner(eb));
Jan Schmidtc3193102012-05-31 19:24:36 +02001290 btrfs_set_header_level(eb_rewin, btrfs_header_level(eb));
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001291 } else {
1292 eb_rewin = btrfs_clone_extent_buffer(eb);
Josef Bacikdb7f3432013-08-07 14:54:37 -04001293 if (!eb_rewin) {
Josef Bacik9ec72672013-08-07 16:57:23 -04001294 btrfs_tree_read_unlock_blocking(eb);
Josef Bacikdb7f3432013-08-07 14:54:37 -04001295 free_extent_buffer(eb);
1296 return NULL;
1297 }
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001298 }
1299
Josef Bacik9ec72672013-08-07 16:57:23 -04001300 btrfs_tree_read_unlock_blocking(eb);
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001301 free_extent_buffer(eb);
1302
Josef Bacikd3beaa22020-08-10 11:42:31 -04001303 btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb_rewin),
1304 eb_rewin, btrfs_header_level(eb_rewin));
Jan Schmidt47fb0912013-04-13 13:19:55 +00001305 btrfs_tree_read_lock(eb_rewin);
Josef Bacikf1ca7e982013-06-29 23:15:19 -04001306 __tree_mod_log_rewind(fs_info, eb_rewin, time_seq, tm);
Jan Schmidt57911b82012-10-19 09:22:03 +02001307 WARN_ON(btrfs_header_nritems(eb_rewin) >
Jeff Mahoneyda170662016-06-15 09:22:56 -04001308 BTRFS_NODEPTRS_PER_BLOCK(fs_info));
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001309
1310 return eb_rewin;
1311}
1312
Jan Schmidt8ba97a12012-06-04 16:54:57 +02001313/*
1314 * get_old_root() rewinds the state of @root's root node to the given @time_seq
1315 * value. If there are no changes, the current root->root_node is returned. If
1316 * anything changed in between, there's a fresh buffer allocated on which the
1317 * rewind operations are done. In any case, the returned buffer is read locked.
1318 * Returns NULL on error (with no locks held).
1319 */
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001320static inline struct extent_buffer *
1321get_old_root(struct btrfs_root *root, u64 time_seq)
1322{
Jeff Mahoney0b246af2016-06-22 18:54:23 -04001323 struct btrfs_fs_info *fs_info = root->fs_info;
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001324 struct tree_mod_elem *tm;
Jan Schmidt30b04632013-04-13 13:19:54 +00001325 struct extent_buffer *eb = NULL;
1326 struct extent_buffer *eb_root;
Filipe Mananaefad8a82019-08-12 19:14:29 +01001327 u64 eb_root_owner = 0;
Liu Bo7bfdcf72012-10-25 07:30:19 -06001328 struct extent_buffer *old;
Jan Schmidta95236d2012-06-05 16:41:24 +02001329 struct tree_mod_root *old_root = NULL;
Chris Mason4325edd2012-06-15 20:02:02 -04001330 u64 old_generation = 0;
Jan Schmidta95236d2012-06-05 16:41:24 +02001331 u64 logical;
Qu Wenruo581c1762018-03-29 09:08:11 +08001332 int level;
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001333
Jan Schmidt30b04632013-04-13 13:19:54 +00001334 eb_root = btrfs_read_lock_root_node(root);
David Sterbabcd24da2018-03-05 15:33:18 +01001335 tm = __tree_mod_log_oldest_root(eb_root, time_seq);
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001336 if (!tm)
Jan Schmidt30b04632013-04-13 13:19:54 +00001337 return eb_root;
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001338
Jan Schmidta95236d2012-06-05 16:41:24 +02001339 if (tm->op == MOD_LOG_ROOT_REPLACE) {
1340 old_root = &tm->old_root;
1341 old_generation = tm->generation;
1342 logical = old_root->logical;
Qu Wenruo581c1762018-03-29 09:08:11 +08001343 level = old_root->level;
Jan Schmidta95236d2012-06-05 16:41:24 +02001344 } else {
Jan Schmidt30b04632013-04-13 13:19:54 +00001345 logical = eb_root->start;
Qu Wenruo581c1762018-03-29 09:08:11 +08001346 level = btrfs_header_level(eb_root);
Jan Schmidta95236d2012-06-05 16:41:24 +02001347 }
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001348
Jeff Mahoney0b246af2016-06-22 18:54:23 -04001349 tm = tree_mod_log_search(fs_info, logical, time_seq);
Jan Schmidt834328a2012-10-23 11:27:33 +02001350 if (old_root && tm && tm->op != MOD_LOG_KEY_REMOVE_WHILE_FREEING) {
Jan Schmidt30b04632013-04-13 13:19:54 +00001351 btrfs_tree_read_unlock(eb_root);
1352 free_extent_buffer(eb_root);
Qu Wenruo581c1762018-03-29 09:08:11 +08001353 old = read_tree_block(fs_info, logical, 0, level, NULL);
Liu Bo64c043d2015-05-25 17:30:15 +08001354 if (WARN_ON(IS_ERR(old) || !extent_buffer_uptodate(old))) {
1355 if (!IS_ERR(old))
1356 free_extent_buffer(old);
Jeff Mahoney0b246af2016-06-22 18:54:23 -04001357 btrfs_warn(fs_info,
1358 "failed to read tree block %llu from get_old_root",
1359 logical);
Jan Schmidt834328a2012-10-23 11:27:33 +02001360 } else {
Liu Bo7bfdcf72012-10-25 07:30:19 -06001361 eb = btrfs_clone_extent_buffer(old);
1362 free_extent_buffer(old);
Jan Schmidt834328a2012-10-23 11:27:33 +02001363 }
1364 } else if (old_root) {
Filipe Mananaefad8a82019-08-12 19:14:29 +01001365 eb_root_owner = btrfs_header_owner(eb_root);
Jan Schmidt30b04632013-04-13 13:19:54 +00001366 btrfs_tree_read_unlock(eb_root);
1367 free_extent_buffer(eb_root);
Jeff Mahoney0b246af2016-06-22 18:54:23 -04001368 eb = alloc_dummy_extent_buffer(fs_info, logical);
Jan Schmidt834328a2012-10-23 11:27:33 +02001369 } else {
David Sterba300aa892018-04-04 02:00:17 +02001370 btrfs_set_lock_blocking_read(eb_root);
Jan Schmidt30b04632013-04-13 13:19:54 +00001371 eb = btrfs_clone_extent_buffer(eb_root);
Josef Bacik9ec72672013-08-07 16:57:23 -04001372 btrfs_tree_read_unlock_blocking(eb_root);
Jan Schmidt30b04632013-04-13 13:19:54 +00001373 free_extent_buffer(eb_root);
Jan Schmidt834328a2012-10-23 11:27:33 +02001374 }
1375
Jan Schmidt8ba97a12012-06-04 16:54:57 +02001376 if (!eb)
1377 return NULL;
Jan Schmidta95236d2012-06-05 16:41:24 +02001378 if (old_root) {
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001379 btrfs_set_header_bytenr(eb, eb->start);
1380 btrfs_set_header_backref_rev(eb, BTRFS_MIXED_BACKREF_REV);
Filipe Mananaefad8a82019-08-12 19:14:29 +01001381 btrfs_set_header_owner(eb, eb_root_owner);
Jan Schmidta95236d2012-06-05 16:41:24 +02001382 btrfs_set_header_level(eb, old_root->level);
1383 btrfs_set_header_generation(eb, old_generation);
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001384 }
Josef Bacikd3beaa22020-08-10 11:42:31 -04001385 btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb), eb,
1386 btrfs_header_level(eb));
1387 btrfs_tree_read_lock(eb);
Jan Schmidt28da9fb2012-06-21 10:59:13 +02001388 if (tm)
Jeff Mahoney0b246af2016-06-22 18:54:23 -04001389 __tree_mod_log_rewind(fs_info, eb, time_seq, tm);
Jan Schmidt28da9fb2012-06-21 10:59:13 +02001390 else
1391 WARN_ON(btrfs_header_level(eb) != 0);
Jeff Mahoney0b246af2016-06-22 18:54:23 -04001392 WARN_ON(btrfs_header_nritems(eb) > BTRFS_NODEPTRS_PER_BLOCK(fs_info));
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02001393
1394 return eb;
1395}
1396
Jan Schmidt5b6602e2012-10-23 11:28:27 +02001397int btrfs_old_root_level(struct btrfs_root *root, u64 time_seq)
1398{
1399 struct tree_mod_elem *tm;
1400 int level;
Jan Schmidt30b04632013-04-13 13:19:54 +00001401 struct extent_buffer *eb_root = btrfs_root_node(root);
Jan Schmidt5b6602e2012-10-23 11:28:27 +02001402
David Sterbabcd24da2018-03-05 15:33:18 +01001403 tm = __tree_mod_log_oldest_root(eb_root, time_seq);
Jan Schmidt5b6602e2012-10-23 11:28:27 +02001404 if (tm && tm->op == MOD_LOG_ROOT_REPLACE) {
1405 level = tm->old_root.level;
1406 } else {
Jan Schmidt30b04632013-04-13 13:19:54 +00001407 level = btrfs_header_level(eb_root);
Jan Schmidt5b6602e2012-10-23 11:28:27 +02001408 }
Jan Schmidt30b04632013-04-13 13:19:54 +00001409 free_extent_buffer(eb_root);
Jan Schmidt5b6602e2012-10-23 11:28:27 +02001410
1411 return level;
1412}
1413
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001414static inline int should_cow_block(struct btrfs_trans_handle *trans,
1415 struct btrfs_root *root,
1416 struct extent_buffer *buf)
1417{
Jeff Mahoneyf5ee5c92016-06-21 09:52:41 -04001418 if (btrfs_is_testing(root->fs_info))
Josef Bacikfaa2dbf2014-05-07 17:06:09 -04001419 return 0;
David Sterbafccb84c2014-09-29 23:53:21 +02001420
David Sterbad1980132018-03-16 02:39:40 +01001421 /* Ensure we can see the FORCE_COW bit */
1422 smp_mb__before_atomic();
Liu Bof1ebcc72011-11-14 20:48:06 -05001423
1424 /*
1425 * We do not need to cow a block if
1426 * 1) this block is not created or changed in this transaction;
1427 * 2) this block does not belong to TREE_RELOC tree;
1428 * 3) the root is not forced COW.
1429 *
1430 * What is forced COW:
Nicholas D Steeves01327612016-05-19 21:18:45 -04001431 * when we create snapshot during committing the transaction,
Andrea Gelmini52042d82018-11-28 12:05:13 +01001432 * after we've finished copying src root, we must COW the shared
Liu Bof1ebcc72011-11-14 20:48:06 -05001433 * block to ensure the metadata consistency.
1434 */
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001435 if (btrfs_header_generation(buf) == trans->transid &&
1436 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN) &&
1437 !(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID &&
Liu Bof1ebcc72011-11-14 20:48:06 -05001438 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)) &&
Miao Xie27cdeb72014-04-02 19:51:05 +08001439 !test_bit(BTRFS_ROOT_FORCE_COW, &root->state))
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001440 return 0;
1441 return 1;
1442}
1443
Chris Masond352ac62008-09-29 15:18:18 -04001444/*
1445 * cows a single block, see __btrfs_cow_block for the real work.
Nicholas D Steeves01327612016-05-19 21:18:45 -04001446 * This version of it has extra checks so that a block isn't COWed more than
Chris Masond352ac62008-09-29 15:18:18 -04001447 * once per transaction, as long as it hasn't been written yet
1448 */
Chris Masond3977122009-01-05 21:25:51 -05001449noinline int btrfs_cow_block(struct btrfs_trans_handle *trans,
Chris Mason5f39d392007-10-15 16:14:19 -04001450 struct btrfs_root *root, struct extent_buffer *buf,
1451 struct extent_buffer *parent, int parent_slot,
Josef Bacik9631e4c2020-08-20 11:46:03 -04001452 struct extent_buffer **cow_ret,
1453 enum btrfs_lock_nesting nest)
Chris Mason02217ed2007-03-02 16:08:05 -05001454{
Jeff Mahoney0b246af2016-06-22 18:54:23 -04001455 struct btrfs_fs_info *fs_info = root->fs_info;
Chris Mason6702ed42007-08-07 16:15:09 -04001456 u64 search_start;
Chris Masonf510cfe2007-10-15 16:14:48 -04001457 int ret;
Chris Masondc17ff82008-01-08 15:46:30 -05001458
Josef Bacik83354f02018-11-30 11:52:13 -05001459 if (test_bit(BTRFS_ROOT_DELETING, &root->state))
1460 btrfs_err(fs_info,
1461 "COW'ing blocks on a fs root that's being dropped");
1462
Jeff Mahoney0b246af2016-06-22 18:54:23 -04001463 if (trans->transaction != fs_info->running_transaction)
Julia Lawall31b1a2b2012-11-03 10:58:34 +00001464 WARN(1, KERN_CRIT "trans %llu running %llu\n",
Geert Uytterhoevenc1c9ff72013-08-20 13:20:07 +02001465 trans->transid,
Jeff Mahoney0b246af2016-06-22 18:54:23 -04001466 fs_info->running_transaction->transid);
Julia Lawall31b1a2b2012-11-03 10:58:34 +00001467
Jeff Mahoney0b246af2016-06-22 18:54:23 -04001468 if (trans->transid != fs_info->generation)
Julia Lawall31b1a2b2012-11-03 10:58:34 +00001469 WARN(1, KERN_CRIT "trans %llu running %llu\n",
Jeff Mahoney0b246af2016-06-22 18:54:23 -04001470 trans->transid, fs_info->generation);
Chris Masondc17ff82008-01-08 15:46:30 -05001471
Yan Zheng5d4f98a2009-06-10 10:45:14 -04001472 if (!should_cow_block(trans, root, buf)) {
Jeff Mahoney64c12922016-06-08 00:36:38 -04001473 trans->dirty = true;
Chris Mason02217ed2007-03-02 16:08:05 -05001474 *cow_ret = buf;
1475 return 0;
1476 }
Chris Masonc4876852009-02-04 09:24:25 -05001477
Byongho Leeee221842015-12-15 01:42:10 +09001478 search_start = buf->start & ~((u64)SZ_1G - 1);
Chris Masonb4ce94d2009-02-04 09:25:08 -05001479
1480 if (parent)
David Sterba8bead252018-04-04 02:03:48 +02001481 btrfs_set_lock_blocking_write(parent);
1482 btrfs_set_lock_blocking_write(buf);
Chris Masonb4ce94d2009-02-04 09:25:08 -05001483
Qu Wenruof616f5c2019-01-23 15:15:17 +08001484 /*
1485 * Before CoWing this block for later modification, check if it's
1486 * the subtree root and do the delayed subtree trace if needed.
1487 *
1488 * Also We don't care about the error, as it's handled internally.
1489 */
1490 btrfs_qgroup_trace_subtree_after_cow(trans, root, buf);
Chris Masonf510cfe2007-10-15 16:14:48 -04001491 ret = __btrfs_cow_block(trans, root, buf, parent,
Josef Bacik9631e4c2020-08-20 11:46:03 -04001492 parent_slot, cow_ret, search_start, 0, nest);
liubo1abe9b82011-03-24 11:18:59 +00001493
1494 trace_btrfs_cow_block(root, buf, *cow_ret);
1495
Chris Masonf510cfe2007-10-15 16:14:48 -04001496 return ret;
Chris Mason6702ed42007-08-07 16:15:09 -04001497}
1498
Chris Masond352ac62008-09-29 15:18:18 -04001499/*
1500 * helper function for defrag to decide if two blocks pointed to by a
1501 * node are actually close by
1502 */
Chris Mason6b800532007-10-15 16:17:34 -04001503static int close_blocks(u64 blocknr, u64 other, u32 blocksize)
Chris Mason6702ed42007-08-07 16:15:09 -04001504{
Chris Mason6b800532007-10-15 16:17:34 -04001505 if (blocknr < other && other - (blocknr + blocksize) < 32768)
Chris Mason6702ed42007-08-07 16:15:09 -04001506 return 1;
Chris Mason6b800532007-10-15 16:17:34 -04001507 if (blocknr > other && blocknr - (other + blocksize) < 32768)
Chris Mason6702ed42007-08-07 16:15:09 -04001508 return 1;
Chris Mason02217ed2007-03-02 16:08:05 -05001509 return 0;
1510}
1511
David Sterbace6ef5a2020-06-08 16:06:07 +02001512#ifdef __LITTLE_ENDIAN
1513
1514/*
1515 * Compare two keys, on little-endian the disk order is same as CPU order and
1516 * we can avoid the conversion.
1517 */
1518static int comp_keys(const struct btrfs_disk_key *disk_key,
1519 const struct btrfs_key *k2)
1520{
1521 const struct btrfs_key *k1 = (const struct btrfs_key *)disk_key;
1522
1523 return btrfs_comp_cpu_keys(k1, k2);
1524}
1525
1526#else
1527
Chris Mason081e9572007-11-06 10:26:24 -05001528/*
1529 * compare two keys in a memcmp fashion
1530 */
Omar Sandoval310712b2017-01-17 23:24:37 -08001531static int comp_keys(const struct btrfs_disk_key *disk,
1532 const struct btrfs_key *k2)
Chris Mason081e9572007-11-06 10:26:24 -05001533{
1534 struct btrfs_key k1;
1535
1536 btrfs_disk_key_to_cpu(&k1, disk);
1537
Diego Calleja20736ab2009-07-24 11:06:52 -04001538 return btrfs_comp_cpu_keys(&k1, k2);
Chris Mason081e9572007-11-06 10:26:24 -05001539}
David Sterbace6ef5a2020-06-08 16:06:07 +02001540#endif
Chris Mason081e9572007-11-06 10:26:24 -05001541
Josef Bacikf3465ca2008-11-12 14:19:50 -05001542/*
1543 * same as comp_keys only with two btrfs_key's
1544 */
David Sterbae1f60a62019-10-01 19:57:39 +02001545int __pure btrfs_comp_cpu_keys(const struct btrfs_key *k1, const struct btrfs_key *k2)
Josef Bacikf3465ca2008-11-12 14:19:50 -05001546{
1547 if (k1->objectid > k2->objectid)
1548 return 1;
1549 if (k1->objectid < k2->objectid)
1550 return -1;
1551 if (k1->type > k2->type)
1552 return 1;
1553 if (k1->type < k2->type)
1554 return -1;
1555 if (k1->offset > k2->offset)
1556 return 1;
1557 if (k1->offset < k2->offset)
1558 return -1;
1559 return 0;
1560}
Chris Mason081e9572007-11-06 10:26:24 -05001561
Chris Masond352ac62008-09-29 15:18:18 -04001562/*
1563 * this is used by the defrag code to go through all the
1564 * leaves pointed to by a node and reallocate them so that
1565 * disk order is close to key order
1566 */
Chris Mason6702ed42007-08-07 16:15:09 -04001567int btrfs_realloc_node(struct btrfs_trans_handle *trans,
Chris Mason5f39d392007-10-15 16:14:19 -04001568 struct btrfs_root *root, struct extent_buffer *parent,
Eric Sandeende78b512013-01-31 18:21:12 +00001569 int start_slot, u64 *last_ret,
Chris Masona6b6e752007-10-15 16:22:39 -04001570 struct btrfs_key *progress)
Chris Mason6702ed42007-08-07 16:15:09 -04001571{
Jeff Mahoney0b246af2016-06-22 18:54:23 -04001572 struct btrfs_fs_info *fs_info = root->fs_info;
Chris Mason6b800532007-10-15 16:17:34 -04001573 struct extent_buffer *cur;
Chris Mason6702ed42007-08-07 16:15:09 -04001574 u64 blocknr;
Chris Masonca7a79a2008-05-12 12:59:19 -04001575 u64 gen;
Chris Masone9d0b132007-08-10 14:06:19 -04001576 u64 search_start = *last_ret;
1577 u64 last_block = 0;
Chris Mason6702ed42007-08-07 16:15:09 -04001578 u64 other;
1579 u32 parent_nritems;
Chris Mason6702ed42007-08-07 16:15:09 -04001580 int end_slot;
1581 int i;
1582 int err = 0;
Chris Masonf2183bd2007-08-10 14:42:37 -04001583 int parent_level;
Chris Mason6b800532007-10-15 16:17:34 -04001584 int uptodate;
1585 u32 blocksize;
Chris Mason081e9572007-11-06 10:26:24 -05001586 int progress_passed = 0;
1587 struct btrfs_disk_key disk_key;
Chris Mason6702ed42007-08-07 16:15:09 -04001588
Chris Mason5708b952007-10-25 15:43:18 -04001589 parent_level = btrfs_header_level(parent);
Chris Mason5708b952007-10-25 15:43:18 -04001590
Jeff Mahoney0b246af2016-06-22 18:54:23 -04001591 WARN_ON(trans->transaction != fs_info->running_transaction);
1592 WARN_ON(trans->transid != fs_info->generation);
Chris Mason86479a02007-09-10 19:58:16 -04001593
Chris Mason6b800532007-10-15 16:17:34 -04001594 parent_nritems = btrfs_header_nritems(parent);
Jeff Mahoney0b246af2016-06-22 18:54:23 -04001595 blocksize = fs_info->nodesize;
Filipe Manana5dfe2be2015-02-23 19:48:52 +00001596 end_slot = parent_nritems - 1;
Chris Mason6702ed42007-08-07 16:15:09 -04001597
Filipe Manana5dfe2be2015-02-23 19:48:52 +00001598 if (parent_nritems <= 1)
Chris Mason6702ed42007-08-07 16:15:09 -04001599 return 0;
1600
David Sterba8bead252018-04-04 02:03:48 +02001601 btrfs_set_lock_blocking_write(parent);
Chris Masonb4ce94d2009-02-04 09:25:08 -05001602
Filipe Manana5dfe2be2015-02-23 19:48:52 +00001603 for (i = start_slot; i <= end_slot; i++) {
Qu Wenruo581c1762018-03-29 09:08:11 +08001604 struct btrfs_key first_key;
Chris Mason6702ed42007-08-07 16:15:09 -04001605 int close = 1;
Chris Masona6b6e752007-10-15 16:22:39 -04001606
Chris Mason081e9572007-11-06 10:26:24 -05001607 btrfs_node_key(parent, &disk_key, i);
1608 if (!progress_passed && comp_keys(&disk_key, progress) < 0)
1609 continue;
1610
1611 progress_passed = 1;
Chris Mason6b800532007-10-15 16:17:34 -04001612 blocknr = btrfs_node_blockptr(parent, i);
Chris Masonca7a79a2008-05-12 12:59:19 -04001613 gen = btrfs_node_ptr_generation(parent, i);
Qu Wenruo581c1762018-03-29 09:08:11 +08001614 btrfs_node_key_to_cpu(parent, &first_key, i);
Chris Masone9d0b132007-08-10 14:06:19 -04001615 if (last_block == 0)
1616 last_block = blocknr;
Chris Mason5708b952007-10-25 15:43:18 -04001617
Chris Mason6702ed42007-08-07 16:15:09 -04001618 if (i > 0) {
Chris Mason6b800532007-10-15 16:17:34 -04001619 other = btrfs_node_blockptr(parent, i - 1);
1620 close = close_blocks(blocknr, other, blocksize);
Chris Mason6702ed42007-08-07 16:15:09 -04001621 }
Filipe Manana5dfe2be2015-02-23 19:48:52 +00001622 if (!close && i < end_slot) {
Chris Mason6b800532007-10-15 16:17:34 -04001623 other = btrfs_node_blockptr(parent, i + 1);
1624 close = close_blocks(blocknr, other, blocksize);
Chris Mason6702ed42007-08-07 16:15:09 -04001625 }
Chris Masone9d0b132007-08-10 14:06:19 -04001626 if (close) {
1627 last_block = blocknr;
Chris Mason6702ed42007-08-07 16:15:09 -04001628 continue;
Chris Masone9d0b132007-08-10 14:06:19 -04001629 }
Chris Mason6702ed42007-08-07 16:15:09 -04001630
Jeff Mahoney0b246af2016-06-22 18:54:23 -04001631 cur = find_extent_buffer(fs_info, blocknr);
Chris Mason6b800532007-10-15 16:17:34 -04001632 if (cur)
Chris Masonb9fab912012-05-06 07:23:47 -04001633 uptodate = btrfs_buffer_uptodate(cur, gen, 0);
Chris Mason6b800532007-10-15 16:17:34 -04001634 else
1635 uptodate = 0;
Chris Mason5708b952007-10-25 15:43:18 -04001636 if (!cur || !uptodate) {
Chris Mason6b800532007-10-15 16:17:34 -04001637 if (!cur) {
Qu Wenruo581c1762018-03-29 09:08:11 +08001638 cur = read_tree_block(fs_info, blocknr, gen,
1639 parent_level - 1,
1640 &first_key);
Liu Bo64c043d2015-05-25 17:30:15 +08001641 if (IS_ERR(cur)) {
1642 return PTR_ERR(cur);
1643 } else if (!extent_buffer_uptodate(cur)) {
Josef Bacik416bc652013-04-23 14:17:42 -04001644 free_extent_buffer(cur);
Tsutomu Itoh97d9a8a2011-03-24 06:33:21 +00001645 return -EIO;
Josef Bacik416bc652013-04-23 14:17:42 -04001646 }
Chris Mason6b800532007-10-15 16:17:34 -04001647 } else if (!uptodate) {
Qu Wenruo581c1762018-03-29 09:08:11 +08001648 err = btrfs_read_buffer(cur, gen,
1649 parent_level - 1,&first_key);
Tsutomu Itoh018642a2012-05-29 18:10:13 +09001650 if (err) {
1651 free_extent_buffer(cur);
1652 return err;
1653 }
Chris Masonf2183bd2007-08-10 14:42:37 -04001654 }
Chris Mason6702ed42007-08-07 16:15:09 -04001655 }
Chris Masone9d0b132007-08-10 14:06:19 -04001656 if (search_start == 0)
Chris Mason6b800532007-10-15 16:17:34 -04001657 search_start = last_block;
Chris Masone9d0b132007-08-10 14:06:19 -04001658
Chris Masone7a84562008-06-25 16:01:31 -04001659 btrfs_tree_lock(cur);
David Sterba8bead252018-04-04 02:03:48 +02001660 btrfs_set_lock_blocking_write(cur);
Chris Mason6b800532007-10-15 16:17:34 -04001661 err = __btrfs_cow_block(trans, root, cur, parent, i,
Chris Masone7a84562008-06-25 16:01:31 -04001662 &cur, search_start,
Chris Mason6b800532007-10-15 16:17:34 -04001663 min(16 * blocksize,
Josef Bacik9631e4c2020-08-20 11:46:03 -04001664 (end_slot - i) * blocksize),
1665 BTRFS_NESTING_COW);
Yan252c38f2007-08-29 09:11:44 -04001666 if (err) {
Chris Masone7a84562008-06-25 16:01:31 -04001667 btrfs_tree_unlock(cur);
Chris Mason6b800532007-10-15 16:17:34 -04001668 free_extent_buffer(cur);
Chris Mason6702ed42007-08-07 16:15:09 -04001669 break;
Yan252c38f2007-08-29 09:11:44 -04001670 }
Chris Masone7a84562008-06-25 16:01:31 -04001671 search_start = cur->start;
1672 last_block = cur->start;
Chris Masonf2183bd2007-08-10 14:42:37 -04001673 *last_ret = search_start;
Chris Masone7a84562008-06-25 16:01:31 -04001674 btrfs_tree_unlock(cur);
1675 free_extent_buffer(cur);
Chris Mason6702ed42007-08-07 16:15:09 -04001676 }
1677 return err;
1678}
1679
Chris Mason74123bd2007-02-02 11:05:29 -05001680/*
Chris Mason5f39d392007-10-15 16:14:19 -04001681 * search for key in the extent_buffer. The items start at offset p,
1682 * and they are item_size apart. There are 'max' items in p.
1683 *
Chris Mason74123bd2007-02-02 11:05:29 -05001684 * the slot in the array is returned via slot, and it points to
1685 * the place where you would insert key if it is not found in
1686 * the array.
1687 *
1688 * slot may point to max if the key is bigger than all of the keys
1689 */
Chris Masone02119d2008-09-05 16:13:11 -04001690static noinline int generic_bin_search(struct extent_buffer *eb,
Omar Sandoval310712b2017-01-17 23:24:37 -08001691 unsigned long p, int item_size,
1692 const struct btrfs_key *key,
Chris Masone02119d2008-09-05 16:13:11 -04001693 int max, int *slot)
Chris Masonbe0e5c02007-01-26 15:51:26 -05001694{
1695 int low = 0;
1696 int high = max;
Chris Masonbe0e5c02007-01-26 15:51:26 -05001697 int ret;
David Sterba5cd17f32020-04-29 23:23:37 +02001698 const int key_size = sizeof(struct btrfs_disk_key);
Chris Masonbe0e5c02007-01-26 15:51:26 -05001699
Liu Bo5e24e9a2016-06-23 16:32:45 -07001700 if (low > high) {
1701 btrfs_err(eb->fs_info,
1702 "%s: low (%d) > high (%d) eb %llu owner %llu level %d",
1703 __func__, low, high, eb->start,
1704 btrfs_header_owner(eb), btrfs_header_level(eb));
1705 return -EINVAL;
1706 }
1707
Chris Masond3977122009-01-05 21:25:51 -05001708 while (low < high) {
David Sterba5cd17f32020-04-29 23:23:37 +02001709 unsigned long oip;
1710 unsigned long offset;
1711 struct btrfs_disk_key *tmp;
1712 struct btrfs_disk_key unaligned;
1713 int mid;
1714
Chris Masonbe0e5c02007-01-26 15:51:26 -05001715 mid = (low + high) / 2;
Chris Mason5f39d392007-10-15 16:14:19 -04001716 offset = p + mid * item_size;
David Sterba5cd17f32020-04-29 23:23:37 +02001717 oip = offset_in_page(offset);
Chris Mason5f39d392007-10-15 16:14:19 -04001718
David Sterba5cd17f32020-04-29 23:23:37 +02001719 if (oip + key_size <= PAGE_SIZE) {
1720 const unsigned long idx = offset >> PAGE_SHIFT;
1721 char *kaddr = page_address(eb->pages[idx]);
Chris Mason934d3752008-12-08 16:43:10 -05001722
David Sterba5cd17f32020-04-29 23:23:37 +02001723 tmp = (struct btrfs_disk_key *)(kaddr + oip);
Chris Mason5f39d392007-10-15 16:14:19 -04001724 } else {
David Sterba5cd17f32020-04-29 23:23:37 +02001725 read_extent_buffer(eb, &unaligned, offset, key_size);
1726 tmp = &unaligned;
Chris Mason5f39d392007-10-15 16:14:19 -04001727 }
David Sterba5cd17f32020-04-29 23:23:37 +02001728
Chris Masonbe0e5c02007-01-26 15:51:26 -05001729 ret = comp_keys(tmp, key);
1730
1731 if (ret < 0)
1732 low = mid + 1;
1733 else if (ret > 0)
1734 high = mid;
1735 else {
1736 *slot = mid;
1737 return 0;
1738 }
1739 }
1740 *slot = low;
1741 return 1;
1742}
1743
Chris Mason97571fd2007-02-24 13:39:08 -05001744/*
1745 * simple bin_search frontend that does the right thing for
1746 * leaves vs nodes
1747 */
Nikolay Borisova74b35e2017-12-08 16:27:43 +02001748int btrfs_bin_search(struct extent_buffer *eb, const struct btrfs_key *key,
Qu Wenruoe3b83362020-04-17 15:08:21 +08001749 int *slot)
Chris Masonbe0e5c02007-01-26 15:51:26 -05001750{
Qu Wenruoe3b83362020-04-17 15:08:21 +08001751 if (btrfs_header_level(eb) == 0)
Chris Mason5f39d392007-10-15 16:14:19 -04001752 return generic_bin_search(eb,
1753 offsetof(struct btrfs_leaf, items),
Chris Mason0783fcf2007-03-12 20:12:07 -04001754 sizeof(struct btrfs_item),
Chris Mason5f39d392007-10-15 16:14:19 -04001755 key, btrfs_header_nritems(eb),
Chris Mason7518a232007-03-12 12:01:18 -04001756 slot);
Wang Sheng-Huif7757382012-03-30 15:14:27 +08001757 else
Chris Mason5f39d392007-10-15 16:14:19 -04001758 return generic_bin_search(eb,
1759 offsetof(struct btrfs_node, ptrs),
Chris Mason123abc82007-03-14 14:14:43 -04001760 sizeof(struct btrfs_key_ptr),
Chris Mason5f39d392007-10-15 16:14:19 -04001761 key, btrfs_header_nritems(eb),
Chris Mason7518a232007-03-12 12:01:18 -04001762 slot);
Chris Masonbe0e5c02007-01-26 15:51:26 -05001763}
1764
Yan, Zhengf0486c62010-05-16 10:46:25 -04001765static void root_add_used(struct btrfs_root *root, u32 size)
1766{
1767 spin_lock(&root->accounting_lock);
1768 btrfs_set_root_used(&root->root_item,
1769 btrfs_root_used(&root->root_item) + size);
1770 spin_unlock(&root->accounting_lock);
1771}
1772
1773static void root_sub_used(struct btrfs_root *root, u32 size)
1774{
1775 spin_lock(&root->accounting_lock);
1776 btrfs_set_root_used(&root->root_item,
1777 btrfs_root_used(&root->root_item) - size);
1778 spin_unlock(&root->accounting_lock);
1779}
1780
Chris Masond352ac62008-09-29 15:18:18 -04001781/* given a node and slot number, this reads the blocks it points to. The
1782 * extent buffer is returned with a reference taken (but unlocked).
Chris Masond352ac62008-09-29 15:18:18 -04001783 */
David Sterba4b231ae2019-08-21 19:16:27 +02001784struct extent_buffer *btrfs_read_node_slot(struct extent_buffer *parent,
1785 int slot)
Chris Masonbb803952007-03-01 12:04:21 -05001786{
Chris Masonca7a79a2008-05-12 12:59:19 -04001787 int level = btrfs_header_level(parent);
Josef Bacik416bc652013-04-23 14:17:42 -04001788 struct extent_buffer *eb;
Qu Wenruo581c1762018-03-29 09:08:11 +08001789 struct btrfs_key first_key;
Josef Bacik416bc652013-04-23 14:17:42 -04001790
Liu Bofb770ae2016-07-05 12:10:14 -07001791 if (slot < 0 || slot >= btrfs_header_nritems(parent))
1792 return ERR_PTR(-ENOENT);
Chris Masonca7a79a2008-05-12 12:59:19 -04001793
1794 BUG_ON(level == 0);
1795
Qu Wenruo581c1762018-03-29 09:08:11 +08001796 btrfs_node_key_to_cpu(parent, &first_key, slot);
David Sterbad0d20b02019-03-20 14:54:01 +01001797 eb = read_tree_block(parent->fs_info, btrfs_node_blockptr(parent, slot),
Qu Wenruo581c1762018-03-29 09:08:11 +08001798 btrfs_node_ptr_generation(parent, slot),
1799 level - 1, &first_key);
Liu Bofb770ae2016-07-05 12:10:14 -07001800 if (!IS_ERR(eb) && !extent_buffer_uptodate(eb)) {
1801 free_extent_buffer(eb);
1802 eb = ERR_PTR(-EIO);
Josef Bacik416bc652013-04-23 14:17:42 -04001803 }
1804
1805 return eb;
Chris Masonbb803952007-03-01 12:04:21 -05001806}
1807
Chris Masond352ac62008-09-29 15:18:18 -04001808/*
1809 * node level balancing, used to make sure nodes are in proper order for
1810 * item deletion. We balance from the top down, so we have to make sure
1811 * that a deletion won't leave an node completely empty later on.
1812 */
Chris Masone02119d2008-09-05 16:13:11 -04001813static noinline int balance_level(struct btrfs_trans_handle *trans,
Chris Mason98ed5172008-01-03 10:01:48 -05001814 struct btrfs_root *root,
1815 struct btrfs_path *path, int level)
Chris Masonbb803952007-03-01 12:04:21 -05001816{
Jeff Mahoney0b246af2016-06-22 18:54:23 -04001817 struct btrfs_fs_info *fs_info = root->fs_info;
Chris Mason5f39d392007-10-15 16:14:19 -04001818 struct extent_buffer *right = NULL;
1819 struct extent_buffer *mid;
1820 struct extent_buffer *left = NULL;
1821 struct extent_buffer *parent = NULL;
Chris Masonbb803952007-03-01 12:04:21 -05001822 int ret = 0;
1823 int wret;
1824 int pslot;
Chris Masonbb803952007-03-01 12:04:21 -05001825 int orig_slot = path->slots[level];
Chris Mason79f95c82007-03-01 15:16:26 -05001826 u64 orig_ptr;
Chris Masonbb803952007-03-01 12:04:21 -05001827
Liu Bo98e6b1e2018-09-12 06:06:23 +08001828 ASSERT(level > 0);
Chris Masonbb803952007-03-01 12:04:21 -05001829
Chris Mason5f39d392007-10-15 16:14:19 -04001830 mid = path->nodes[level];
Chris Masonb4ce94d2009-02-04 09:25:08 -05001831
Chris Masonbd681512011-07-16 15:23:14 -04001832 WARN_ON(path->locks[level] != BTRFS_WRITE_LOCK &&
1833 path->locks[level] != BTRFS_WRITE_LOCK_BLOCKING);
Chris Mason7bb86312007-12-11 09:25:06 -05001834 WARN_ON(btrfs_header_generation(mid) != trans->transid);
1835
Chris Mason1d4f8a02007-03-13 09:28:32 -04001836 orig_ptr = btrfs_node_blockptr(mid, orig_slot);
Chris Mason79f95c82007-03-01 15:16:26 -05001837
Li Zefana05a9bb2011-09-06 16:55:34 +08001838 if (level < BTRFS_MAX_LEVEL - 1) {
Chris Mason5f39d392007-10-15 16:14:19 -04001839 parent = path->nodes[level + 1];
Li Zefana05a9bb2011-09-06 16:55:34 +08001840 pslot = path->slots[level + 1];
1841 }
Chris Masonbb803952007-03-01 12:04:21 -05001842
Chris Mason40689472007-03-17 14:29:23 -04001843 /*
1844 * deal with the case where there is only one pointer in the root
1845 * by promoting the node below to a root
1846 */
Chris Mason5f39d392007-10-15 16:14:19 -04001847 if (!parent) {
1848 struct extent_buffer *child;
Chris Masonbb803952007-03-01 12:04:21 -05001849
Chris Mason5f39d392007-10-15 16:14:19 -04001850 if (btrfs_header_nritems(mid) != 1)
Chris Masonbb803952007-03-01 12:04:21 -05001851 return 0;
1852
1853 /* promote the child to a root */
David Sterba4b231ae2019-08-21 19:16:27 +02001854 child = btrfs_read_node_slot(mid, 0);
Liu Bofb770ae2016-07-05 12:10:14 -07001855 if (IS_ERR(child)) {
1856 ret = PTR_ERR(child);
Jeff Mahoney0b246af2016-06-22 18:54:23 -04001857 btrfs_handle_fs_error(fs_info, ret, NULL);
Mark Fasheh305a26a2011-09-01 11:27:57 -07001858 goto enospc;
1859 }
1860
Chris Mason925baed2008-06-25 16:01:30 -04001861 btrfs_tree_lock(child);
David Sterba8bead252018-04-04 02:03:48 +02001862 btrfs_set_lock_blocking_write(child);
Josef Bacik9631e4c2020-08-20 11:46:03 -04001863 ret = btrfs_cow_block(trans, root, child, mid, 0, &child,
1864 BTRFS_NESTING_COW);
Yan, Zhengf0486c62010-05-16 10:46:25 -04001865 if (ret) {
1866 btrfs_tree_unlock(child);
1867 free_extent_buffer(child);
1868 goto enospc;
1869 }
Yan2f375ab2008-02-01 14:58:07 -05001870
David Sterbad9d19a02018-03-05 16:35:29 +01001871 ret = tree_mod_log_insert_root(root->node, child, 1);
1872 BUG_ON(ret < 0);
Chris Mason240f62c2011-03-23 14:54:42 -04001873 rcu_assign_pointer(root->node, child);
Chris Mason925baed2008-06-25 16:01:30 -04001874
Chris Mason0b86a832008-03-24 15:01:56 -04001875 add_root_to_dirty_list(root);
Chris Mason925baed2008-06-25 16:01:30 -04001876 btrfs_tree_unlock(child);
Chris Masonb4ce94d2009-02-04 09:25:08 -05001877
Chris Mason925baed2008-06-25 16:01:30 -04001878 path->locks[level] = 0;
Chris Masonbb803952007-03-01 12:04:21 -05001879 path->nodes[level] = NULL;
David Sterba6a884d7d2019-03-20 14:30:02 +01001880 btrfs_clean_tree_block(mid);
Chris Mason925baed2008-06-25 16:01:30 -04001881 btrfs_tree_unlock(mid);
Chris Masonbb803952007-03-01 12:04:21 -05001882 /* once for the path */
Chris Mason5f39d392007-10-15 16:14:19 -04001883 free_extent_buffer(mid);
Yan, Zhengf0486c62010-05-16 10:46:25 -04001884
1885 root_sub_used(root, mid->len);
Jan Schmidt5581a512012-05-16 17:04:52 +02001886 btrfs_free_tree_block(trans, root, mid, 0, 1);
Chris Masonbb803952007-03-01 12:04:21 -05001887 /* once for the root ptr */
Josef Bacik3083ee22012-03-09 16:01:49 -05001888 free_extent_buffer_stale(mid);
Yan, Zhengf0486c62010-05-16 10:46:25 -04001889 return 0;
Chris Masonbb803952007-03-01 12:04:21 -05001890 }
Chris Mason5f39d392007-10-15 16:14:19 -04001891 if (btrfs_header_nritems(mid) >
Jeff Mahoney0b246af2016-06-22 18:54:23 -04001892 BTRFS_NODEPTRS_PER_BLOCK(fs_info) / 4)
Chris Masonbb803952007-03-01 12:04:21 -05001893 return 0;
1894
David Sterba4b231ae2019-08-21 19:16:27 +02001895 left = btrfs_read_node_slot(parent, pslot - 1);
Liu Bofb770ae2016-07-05 12:10:14 -07001896 if (IS_ERR(left))
1897 left = NULL;
1898
Chris Mason5f39d392007-10-15 16:14:19 -04001899 if (left) {
Josef Bacikbf774672020-08-20 11:46:04 -04001900 __btrfs_tree_lock(left, BTRFS_NESTING_LEFT);
David Sterba8bead252018-04-04 02:03:48 +02001901 btrfs_set_lock_blocking_write(left);
Chris Mason5f39d392007-10-15 16:14:19 -04001902 wret = btrfs_cow_block(trans, root, left,
Josef Bacik9631e4c2020-08-20 11:46:03 -04001903 parent, pslot - 1, &left,
Josef Bacikbf59a5a2020-08-20 11:46:05 -04001904 BTRFS_NESTING_LEFT_COW);
Chris Mason54aa1f42007-06-22 14:16:25 -04001905 if (wret) {
1906 ret = wret;
1907 goto enospc;
1908 }
Chris Mason2cc58cf2007-08-27 16:49:44 -04001909 }
Liu Bofb770ae2016-07-05 12:10:14 -07001910
David Sterba4b231ae2019-08-21 19:16:27 +02001911 right = btrfs_read_node_slot(parent, pslot + 1);
Liu Bofb770ae2016-07-05 12:10:14 -07001912 if (IS_ERR(right))
1913 right = NULL;
1914
Chris Mason5f39d392007-10-15 16:14:19 -04001915 if (right) {
Josef Bacikbf774672020-08-20 11:46:04 -04001916 __btrfs_tree_lock(right, BTRFS_NESTING_RIGHT);
David Sterba8bead252018-04-04 02:03:48 +02001917 btrfs_set_lock_blocking_write(right);
Chris Mason5f39d392007-10-15 16:14:19 -04001918 wret = btrfs_cow_block(trans, root, right,
Josef Bacik9631e4c2020-08-20 11:46:03 -04001919 parent, pslot + 1, &right,
Josef Bacikbf59a5a2020-08-20 11:46:05 -04001920 BTRFS_NESTING_RIGHT_COW);
Chris Mason2cc58cf2007-08-27 16:49:44 -04001921 if (wret) {
1922 ret = wret;
1923 goto enospc;
1924 }
1925 }
1926
1927 /* first, try to make some room in the middle buffer */
Chris Mason5f39d392007-10-15 16:14:19 -04001928 if (left) {
1929 orig_slot += btrfs_header_nritems(left);
David Sterbad30a6682019-03-20 14:16:45 +01001930 wret = push_node_left(trans, left, mid, 1);
Chris Mason79f95c82007-03-01 15:16:26 -05001931 if (wret < 0)
1932 ret = wret;
Chris Masonbb803952007-03-01 12:04:21 -05001933 }
Chris Mason79f95c82007-03-01 15:16:26 -05001934
1935 /*
1936 * then try to empty the right most buffer into the middle
1937 */
Chris Mason5f39d392007-10-15 16:14:19 -04001938 if (right) {
David Sterbad30a6682019-03-20 14:16:45 +01001939 wret = push_node_left(trans, mid, right, 1);
Chris Mason54aa1f42007-06-22 14:16:25 -04001940 if (wret < 0 && wret != -ENOSPC)
Chris Mason79f95c82007-03-01 15:16:26 -05001941 ret = wret;
Chris Mason5f39d392007-10-15 16:14:19 -04001942 if (btrfs_header_nritems(right) == 0) {
David Sterba6a884d7d2019-03-20 14:30:02 +01001943 btrfs_clean_tree_block(right);
Chris Mason925baed2008-06-25 16:01:30 -04001944 btrfs_tree_unlock(right);
Tsutomu Itohafe5fea2013-04-16 05:18:22 +00001945 del_ptr(root, path, level + 1, pslot + 1);
Yan, Zhengf0486c62010-05-16 10:46:25 -04001946 root_sub_used(root, right->len);
Jan Schmidt5581a512012-05-16 17:04:52 +02001947 btrfs_free_tree_block(trans, root, right, 0, 1);
Josef Bacik3083ee22012-03-09 16:01:49 -05001948 free_extent_buffer_stale(right);
Yan, Zhengf0486c62010-05-16 10:46:25 -04001949 right = NULL;
Chris Masonbb803952007-03-01 12:04:21 -05001950 } else {
Chris Mason5f39d392007-10-15 16:14:19 -04001951 struct btrfs_disk_key right_key;
1952 btrfs_node_key(right, &right_key, 0);
David Sterba0e82bcf2018-03-05 16:16:54 +01001953 ret = tree_mod_log_insert_key(parent, pslot + 1,
1954 MOD_LOG_KEY_REPLACE, GFP_NOFS);
1955 BUG_ON(ret < 0);
Chris Mason5f39d392007-10-15 16:14:19 -04001956 btrfs_set_node_key(parent, &right_key, pslot + 1);
1957 btrfs_mark_buffer_dirty(parent);
Chris Masonbb803952007-03-01 12:04:21 -05001958 }
1959 }
Chris Mason5f39d392007-10-15 16:14:19 -04001960 if (btrfs_header_nritems(mid) == 1) {
Chris Mason79f95c82007-03-01 15:16:26 -05001961 /*
1962 * we're not allowed to leave a node with one item in the
1963 * tree during a delete. A deletion from lower in the tree
1964 * could try to delete the only pointer in this node.
1965 * So, pull some keys from the left.
1966 * There has to be a left pointer at this point because
1967 * otherwise we would have pulled some pointers from the
1968 * right
1969 */
Mark Fasheh305a26a2011-09-01 11:27:57 -07001970 if (!left) {
1971 ret = -EROFS;
Jeff Mahoney0b246af2016-06-22 18:54:23 -04001972 btrfs_handle_fs_error(fs_info, ret, NULL);
Mark Fasheh305a26a2011-09-01 11:27:57 -07001973 goto enospc;
1974 }
David Sterba55d32ed2019-03-20 14:18:06 +01001975 wret = balance_node_right(trans, mid, left);
Chris Mason54aa1f42007-06-22 14:16:25 -04001976 if (wret < 0) {
Chris Mason79f95c82007-03-01 15:16:26 -05001977 ret = wret;
Chris Mason54aa1f42007-06-22 14:16:25 -04001978 goto enospc;
1979 }
Chris Masonbce4eae2008-04-24 14:42:46 -04001980 if (wret == 1) {
David Sterbad30a6682019-03-20 14:16:45 +01001981 wret = push_node_left(trans, left, mid, 1);
Chris Masonbce4eae2008-04-24 14:42:46 -04001982 if (wret < 0)
1983 ret = wret;
1984 }
Chris Mason79f95c82007-03-01 15:16:26 -05001985 BUG_ON(wret == 1);
1986 }
Chris Mason5f39d392007-10-15 16:14:19 -04001987 if (btrfs_header_nritems(mid) == 0) {
David Sterba6a884d7d2019-03-20 14:30:02 +01001988 btrfs_clean_tree_block(mid);
Chris Mason925baed2008-06-25 16:01:30 -04001989 btrfs_tree_unlock(mid);
Tsutomu Itohafe5fea2013-04-16 05:18:22 +00001990 del_ptr(root, path, level + 1, pslot);
Yan, Zhengf0486c62010-05-16 10:46:25 -04001991 root_sub_used(root, mid->len);
Jan Schmidt5581a512012-05-16 17:04:52 +02001992 btrfs_free_tree_block(trans, root, mid, 0, 1);
Josef Bacik3083ee22012-03-09 16:01:49 -05001993 free_extent_buffer_stale(mid);
Yan, Zhengf0486c62010-05-16 10:46:25 -04001994 mid = NULL;
Chris Mason79f95c82007-03-01 15:16:26 -05001995 } else {
1996 /* update the parent key to reflect our changes */
Chris Mason5f39d392007-10-15 16:14:19 -04001997 struct btrfs_disk_key mid_key;
1998 btrfs_node_key(mid, &mid_key, 0);
David Sterba0e82bcf2018-03-05 16:16:54 +01001999 ret = tree_mod_log_insert_key(parent, pslot,
2000 MOD_LOG_KEY_REPLACE, GFP_NOFS);
2001 BUG_ON(ret < 0);
Chris Mason5f39d392007-10-15 16:14:19 -04002002 btrfs_set_node_key(parent, &mid_key, pslot);
2003 btrfs_mark_buffer_dirty(parent);
Chris Mason79f95c82007-03-01 15:16:26 -05002004 }
Chris Masonbb803952007-03-01 12:04:21 -05002005
Chris Mason79f95c82007-03-01 15:16:26 -05002006 /* update the path */
Chris Mason5f39d392007-10-15 16:14:19 -04002007 if (left) {
2008 if (btrfs_header_nritems(left) > orig_slot) {
David Sterba67439da2019-10-08 13:28:47 +02002009 atomic_inc(&left->refs);
Chris Mason925baed2008-06-25 16:01:30 -04002010 /* left was locked after cow */
Chris Mason5f39d392007-10-15 16:14:19 -04002011 path->nodes[level] = left;
Chris Masonbb803952007-03-01 12:04:21 -05002012 path->slots[level + 1] -= 1;
2013 path->slots[level] = orig_slot;
Chris Mason925baed2008-06-25 16:01:30 -04002014 if (mid) {
2015 btrfs_tree_unlock(mid);
Chris Mason5f39d392007-10-15 16:14:19 -04002016 free_extent_buffer(mid);
Chris Mason925baed2008-06-25 16:01:30 -04002017 }
Chris Masonbb803952007-03-01 12:04:21 -05002018 } else {
Chris Mason5f39d392007-10-15 16:14:19 -04002019 orig_slot -= btrfs_header_nritems(left);
Chris Masonbb803952007-03-01 12:04:21 -05002020 path->slots[level] = orig_slot;
2021 }
2022 }
Chris Mason79f95c82007-03-01 15:16:26 -05002023 /* double check we haven't messed things up */
Chris Masone20d96d2007-03-22 12:13:20 -04002024 if (orig_ptr !=
Chris Mason5f39d392007-10-15 16:14:19 -04002025 btrfs_node_blockptr(path->nodes[level], path->slots[level]))
Chris Mason79f95c82007-03-01 15:16:26 -05002026 BUG();
Chris Mason54aa1f42007-06-22 14:16:25 -04002027enospc:
Chris Mason925baed2008-06-25 16:01:30 -04002028 if (right) {
2029 btrfs_tree_unlock(right);
Chris Mason5f39d392007-10-15 16:14:19 -04002030 free_extent_buffer(right);
Chris Mason925baed2008-06-25 16:01:30 -04002031 }
2032 if (left) {
2033 if (path->nodes[level] != left)
2034 btrfs_tree_unlock(left);
Chris Mason5f39d392007-10-15 16:14:19 -04002035 free_extent_buffer(left);
Chris Mason925baed2008-06-25 16:01:30 -04002036 }
Chris Masonbb803952007-03-01 12:04:21 -05002037 return ret;
2038}
2039
Chris Masond352ac62008-09-29 15:18:18 -04002040/* Node balancing for insertion. Here we only split or push nodes around
2041 * when they are completely full. This is also done top down, so we
2042 * have to be pessimistic.
2043 */
Chris Masond3977122009-01-05 21:25:51 -05002044static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
Chris Mason98ed5172008-01-03 10:01:48 -05002045 struct btrfs_root *root,
2046 struct btrfs_path *path, int level)
Chris Masone66f7092007-04-20 13:16:02 -04002047{
Jeff Mahoney0b246af2016-06-22 18:54:23 -04002048 struct btrfs_fs_info *fs_info = root->fs_info;
Chris Mason5f39d392007-10-15 16:14:19 -04002049 struct extent_buffer *right = NULL;
2050 struct extent_buffer *mid;
2051 struct extent_buffer *left = NULL;
2052 struct extent_buffer *parent = NULL;
Chris Masone66f7092007-04-20 13:16:02 -04002053 int ret = 0;
2054 int wret;
2055 int pslot;
2056 int orig_slot = path->slots[level];
Chris Masone66f7092007-04-20 13:16:02 -04002057
2058 if (level == 0)
2059 return 1;
2060
Chris Mason5f39d392007-10-15 16:14:19 -04002061 mid = path->nodes[level];
Chris Mason7bb86312007-12-11 09:25:06 -05002062 WARN_ON(btrfs_header_generation(mid) != trans->transid);
Chris Masone66f7092007-04-20 13:16:02 -04002063
Li Zefana05a9bb2011-09-06 16:55:34 +08002064 if (level < BTRFS_MAX_LEVEL - 1) {
Chris Mason5f39d392007-10-15 16:14:19 -04002065 parent = path->nodes[level + 1];
Li Zefana05a9bb2011-09-06 16:55:34 +08002066 pslot = path->slots[level + 1];
2067 }
Chris Masone66f7092007-04-20 13:16:02 -04002068
Chris Mason5f39d392007-10-15 16:14:19 -04002069 if (!parent)
Chris Masone66f7092007-04-20 13:16:02 -04002070 return 1;
Chris Masone66f7092007-04-20 13:16:02 -04002071
David Sterba4b231ae2019-08-21 19:16:27 +02002072 left = btrfs_read_node_slot(parent, pslot - 1);
Liu Bofb770ae2016-07-05 12:10:14 -07002073 if (IS_ERR(left))
2074 left = NULL;
Chris Masone66f7092007-04-20 13:16:02 -04002075
2076 /* first, try to make some room in the middle buffer */
Chris Mason5f39d392007-10-15 16:14:19 -04002077 if (left) {
Chris Masone66f7092007-04-20 13:16:02 -04002078 u32 left_nr;
Chris Mason925baed2008-06-25 16:01:30 -04002079
Josef Bacikbf774672020-08-20 11:46:04 -04002080 __btrfs_tree_lock(left, BTRFS_NESTING_LEFT);
David Sterba8bead252018-04-04 02:03:48 +02002081 btrfs_set_lock_blocking_write(left);
Chris Masonb4ce94d2009-02-04 09:25:08 -05002082
Chris Mason5f39d392007-10-15 16:14:19 -04002083 left_nr = btrfs_header_nritems(left);
Jeff Mahoney0b246af2016-06-22 18:54:23 -04002084 if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(fs_info) - 1) {
Chris Mason33ade1f2007-04-20 13:48:57 -04002085 wret = 1;
2086 } else {
Chris Mason5f39d392007-10-15 16:14:19 -04002087 ret = btrfs_cow_block(trans, root, left, parent,
Josef Bacik9631e4c2020-08-20 11:46:03 -04002088 pslot - 1, &left,
Josef Bacikbf59a5a2020-08-20 11:46:05 -04002089 BTRFS_NESTING_LEFT_COW);
Chris Mason54aa1f42007-06-22 14:16:25 -04002090 if (ret)
2091 wret = 1;
2092 else {
David Sterbad30a6682019-03-20 14:16:45 +01002093 wret = push_node_left(trans, left, mid, 0);
Chris Mason54aa1f42007-06-22 14:16:25 -04002094 }
Chris Mason33ade1f2007-04-20 13:48:57 -04002095 }
Chris Masone66f7092007-04-20 13:16:02 -04002096 if (wret < 0)
2097 ret = wret;
2098 if (wret == 0) {
Chris Mason5f39d392007-10-15 16:14:19 -04002099 struct btrfs_disk_key disk_key;
Chris Masone66f7092007-04-20 13:16:02 -04002100 orig_slot += left_nr;
Chris Mason5f39d392007-10-15 16:14:19 -04002101 btrfs_node_key(mid, &disk_key, 0);
David Sterba0e82bcf2018-03-05 16:16:54 +01002102 ret = tree_mod_log_insert_key(parent, pslot,
2103 MOD_LOG_KEY_REPLACE, GFP_NOFS);
2104 BUG_ON(ret < 0);
Chris Mason5f39d392007-10-15 16:14:19 -04002105 btrfs_set_node_key(parent, &disk_key, pslot);
2106 btrfs_mark_buffer_dirty(parent);
2107 if (btrfs_header_nritems(left) > orig_slot) {
2108 path->nodes[level] = left;
Chris Masone66f7092007-04-20 13:16:02 -04002109 path->slots[level + 1] -= 1;
2110 path->slots[level] = orig_slot;
Chris Mason925baed2008-06-25 16:01:30 -04002111 btrfs_tree_unlock(mid);
Chris Mason5f39d392007-10-15 16:14:19 -04002112 free_extent_buffer(mid);
Chris Masone66f7092007-04-20 13:16:02 -04002113 } else {
2114 orig_slot -=
Chris Mason5f39d392007-10-15 16:14:19 -04002115 btrfs_header_nritems(left);
Chris Masone66f7092007-04-20 13:16:02 -04002116 path->slots[level] = orig_slot;
Chris Mason925baed2008-06-25 16:01:30 -04002117 btrfs_tree_unlock(left);
Chris Mason5f39d392007-10-15 16:14:19 -04002118 free_extent_buffer(left);
Chris Masone66f7092007-04-20 13:16:02 -04002119 }
Chris Masone66f7092007-04-20 13:16:02 -04002120 return 0;
2121 }
Chris Mason925baed2008-06-25 16:01:30 -04002122 btrfs_tree_unlock(left);
Chris Mason5f39d392007-10-15 16:14:19 -04002123 free_extent_buffer(left);
Chris Masone66f7092007-04-20 13:16:02 -04002124 }
David Sterba4b231ae2019-08-21 19:16:27 +02002125 right = btrfs_read_node_slot(parent, pslot + 1);
Liu Bofb770ae2016-07-05 12:10:14 -07002126 if (IS_ERR(right))
2127 right = NULL;
Chris Masone66f7092007-04-20 13:16:02 -04002128
2129 /*
2130 * then try to empty the right most buffer into the middle
2131 */
Chris Mason5f39d392007-10-15 16:14:19 -04002132 if (right) {
Chris Mason33ade1f2007-04-20 13:48:57 -04002133 u32 right_nr;
Chris Masonb4ce94d2009-02-04 09:25:08 -05002134
Josef Bacikbf774672020-08-20 11:46:04 -04002135 __btrfs_tree_lock(right, BTRFS_NESTING_RIGHT);
David Sterba8bead252018-04-04 02:03:48 +02002136 btrfs_set_lock_blocking_write(right);
Chris Masonb4ce94d2009-02-04 09:25:08 -05002137
Chris Mason5f39d392007-10-15 16:14:19 -04002138 right_nr = btrfs_header_nritems(right);
Jeff Mahoney0b246af2016-06-22 18:54:23 -04002139 if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(fs_info) - 1) {
Chris Mason33ade1f2007-04-20 13:48:57 -04002140 wret = 1;
2141 } else {
Chris Mason5f39d392007-10-15 16:14:19 -04002142 ret = btrfs_cow_block(trans, root, right,
2143 parent, pslot + 1,
Josef Bacikbf59a5a2020-08-20 11:46:05 -04002144 &right, BTRFS_NESTING_RIGHT_COW);
Chris Mason54aa1f42007-06-22 14:16:25 -04002145 if (ret)
2146 wret = 1;
2147 else {
David Sterba55d32ed2019-03-20 14:18:06 +01002148 wret = balance_node_right(trans, right, mid);
Chris Mason54aa1f42007-06-22 14:16:25 -04002149 }
Chris Mason33ade1f2007-04-20 13:48:57 -04002150 }
Chris Masone66f7092007-04-20 13:16:02 -04002151 if (wret < 0)
2152 ret = wret;
2153 if (wret == 0) {
Chris Mason5f39d392007-10-15 16:14:19 -04002154 struct btrfs_disk_key disk_key;
2155
2156 btrfs_node_key(right, &disk_key, 0);
David Sterba0e82bcf2018-03-05 16:16:54 +01002157 ret = tree_mod_log_insert_key(parent, pslot + 1,
2158 MOD_LOG_KEY_REPLACE, GFP_NOFS);
2159 BUG_ON(ret < 0);
Chris Mason5f39d392007-10-15 16:14:19 -04002160 btrfs_set_node_key(parent, &disk_key, pslot + 1);
2161 btrfs_mark_buffer_dirty(parent);
2162
2163 if (btrfs_header_nritems(mid) <= orig_slot) {
2164 path->nodes[level] = right;
Chris Masone66f7092007-04-20 13:16:02 -04002165 path->slots[level + 1] += 1;
2166 path->slots[level] = orig_slot -
Chris Mason5f39d392007-10-15 16:14:19 -04002167 btrfs_header_nritems(mid);
Chris Mason925baed2008-06-25 16:01:30 -04002168 btrfs_tree_unlock(mid);
Chris Mason5f39d392007-10-15 16:14:19 -04002169 free_extent_buffer(mid);
Chris Masone66f7092007-04-20 13:16:02 -04002170 } else {
Chris Mason925baed2008-06-25 16:01:30 -04002171 btrfs_tree_unlock(right);
Chris Mason5f39d392007-10-15 16:14:19 -04002172 free_extent_buffer(right);
Chris Masone66f7092007-04-20 13:16:02 -04002173 }
Chris Masone66f7092007-04-20 13:16:02 -04002174 return 0;
2175 }
Chris Mason925baed2008-06-25 16:01:30 -04002176 btrfs_tree_unlock(right);
Chris Mason5f39d392007-10-15 16:14:19 -04002177 free_extent_buffer(right);
Chris Masone66f7092007-04-20 13:16:02 -04002178 }
Chris Masone66f7092007-04-20 13:16:02 -04002179 return 1;
2180}
2181
Chris Mason74123bd2007-02-02 11:05:29 -05002182/*
Chris Masond352ac62008-09-29 15:18:18 -04002183 * readahead one full node of leaves, finding things that are close
2184 * to the block in 'slot', and triggering ra on them.
Chris Mason3c69fae2007-08-07 15:52:22 -04002185 */
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04002186static void reada_for_search(struct btrfs_fs_info *fs_info,
Chris Masonc8c42862009-04-03 10:14:18 -04002187 struct btrfs_path *path,
2188 int level, int slot, u64 objectid)
Chris Mason3c69fae2007-08-07 15:52:22 -04002189{
Chris Mason5f39d392007-10-15 16:14:19 -04002190 struct extent_buffer *node;
Chris Mason01f46652007-12-21 16:24:26 -05002191 struct btrfs_disk_key disk_key;
Chris Mason3c69fae2007-08-07 15:52:22 -04002192 u32 nritems;
Chris Mason3c69fae2007-08-07 15:52:22 -04002193 u64 search;
Chris Masona7175312009-01-22 09:23:10 -05002194 u64 target;
Chris Mason6b800532007-10-15 16:17:34 -04002195 u64 nread = 0;
Chris Mason5f39d392007-10-15 16:14:19 -04002196 struct extent_buffer *eb;
Chris Mason6b800532007-10-15 16:17:34 -04002197 u32 nr;
2198 u32 blocksize;
2199 u32 nscan = 0;
Chris Masondb945352007-10-15 16:15:53 -04002200
Chris Masona6b6e752007-10-15 16:22:39 -04002201 if (level != 1)
Chris Mason3c69fae2007-08-07 15:52:22 -04002202 return;
2203
Chris Mason6702ed42007-08-07 16:15:09 -04002204 if (!path->nodes[level])
2205 return;
2206
Chris Mason5f39d392007-10-15 16:14:19 -04002207 node = path->nodes[level];
Chris Mason925baed2008-06-25 16:01:30 -04002208
Chris Mason3c69fae2007-08-07 15:52:22 -04002209 search = btrfs_node_blockptr(node, slot);
Jeff Mahoney0b246af2016-06-22 18:54:23 -04002210 blocksize = fs_info->nodesize;
2211 eb = find_extent_buffer(fs_info, search);
Chris Mason5f39d392007-10-15 16:14:19 -04002212 if (eb) {
2213 free_extent_buffer(eb);
Chris Mason3c69fae2007-08-07 15:52:22 -04002214 return;
2215 }
2216
Chris Masona7175312009-01-22 09:23:10 -05002217 target = search;
Chris Mason6b800532007-10-15 16:17:34 -04002218
Chris Mason5f39d392007-10-15 16:14:19 -04002219 nritems = btrfs_header_nritems(node);
Chris Mason6b800532007-10-15 16:17:34 -04002220 nr = slot;
Josef Bacik25b8b932011-06-08 14:36:54 -04002221
Chris Masond3977122009-01-05 21:25:51 -05002222 while (1) {
David Sterbae4058b52015-11-27 16:31:35 +01002223 if (path->reada == READA_BACK) {
Chris Mason6b800532007-10-15 16:17:34 -04002224 if (nr == 0)
2225 break;
2226 nr--;
David Sterbae4058b52015-11-27 16:31:35 +01002227 } else if (path->reada == READA_FORWARD) {
Chris Mason6b800532007-10-15 16:17:34 -04002228 nr++;
2229 if (nr >= nritems)
2230 break;
Chris Mason3c69fae2007-08-07 15:52:22 -04002231 }
David Sterbae4058b52015-11-27 16:31:35 +01002232 if (path->reada == READA_BACK && objectid) {
Chris Mason01f46652007-12-21 16:24:26 -05002233 btrfs_node_key(node, &disk_key, nr);
2234 if (btrfs_disk_key_objectid(&disk_key) != objectid)
2235 break;
2236 }
Chris Mason6b800532007-10-15 16:17:34 -04002237 search = btrfs_node_blockptr(node, nr);
Chris Masona7175312009-01-22 09:23:10 -05002238 if ((search <= target && target - search <= 65536) ||
2239 (search > target && search - target <= 65536)) {
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04002240 readahead_tree_block(fs_info, search);
Chris Mason6b800532007-10-15 16:17:34 -04002241 nread += blocksize;
2242 }
2243 nscan++;
Chris Masona7175312009-01-22 09:23:10 -05002244 if ((nread > 65536 || nscan > 32))
Chris Mason6b800532007-10-15 16:17:34 -04002245 break;
Chris Mason3c69fae2007-08-07 15:52:22 -04002246 }
2247}
Chris Mason925baed2008-06-25 16:01:30 -04002248
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04002249static noinline void reada_for_balance(struct btrfs_fs_info *fs_info,
Josef Bacik0b088512013-06-17 14:23:02 -04002250 struct btrfs_path *path, int level)
Chris Masonb4ce94d2009-02-04 09:25:08 -05002251{
2252 int slot;
2253 int nritems;
2254 struct extent_buffer *parent;
2255 struct extent_buffer *eb;
2256 u64 gen;
2257 u64 block1 = 0;
2258 u64 block2 = 0;
Chris Masonb4ce94d2009-02-04 09:25:08 -05002259
Chris Mason8c594ea2009-04-20 15:50:10 -04002260 parent = path->nodes[level + 1];
Chris Masonb4ce94d2009-02-04 09:25:08 -05002261 if (!parent)
Josef Bacik0b088512013-06-17 14:23:02 -04002262 return;
Chris Masonb4ce94d2009-02-04 09:25:08 -05002263
2264 nritems = btrfs_header_nritems(parent);
Chris Mason8c594ea2009-04-20 15:50:10 -04002265 slot = path->slots[level + 1];
Chris Masonb4ce94d2009-02-04 09:25:08 -05002266
2267 if (slot > 0) {
2268 block1 = btrfs_node_blockptr(parent, slot - 1);
2269 gen = btrfs_node_ptr_generation(parent, slot - 1);
Jeff Mahoney0b246af2016-06-22 18:54:23 -04002270 eb = find_extent_buffer(fs_info, block1);
Chris Masonb9fab912012-05-06 07:23:47 -04002271 /*
2272 * if we get -eagain from btrfs_buffer_uptodate, we
2273 * don't want to return eagain here. That will loop
2274 * forever
2275 */
2276 if (eb && btrfs_buffer_uptodate(eb, gen, 1) != 0)
Chris Masonb4ce94d2009-02-04 09:25:08 -05002277 block1 = 0;
2278 free_extent_buffer(eb);
2279 }
Chris Mason8c594ea2009-04-20 15:50:10 -04002280 if (slot + 1 < nritems) {
Chris Masonb4ce94d2009-02-04 09:25:08 -05002281 block2 = btrfs_node_blockptr(parent, slot + 1);
2282 gen = btrfs_node_ptr_generation(parent, slot + 1);
Jeff Mahoney0b246af2016-06-22 18:54:23 -04002283 eb = find_extent_buffer(fs_info, block2);
Chris Masonb9fab912012-05-06 07:23:47 -04002284 if (eb && btrfs_buffer_uptodate(eb, gen, 1) != 0)
Chris Masonb4ce94d2009-02-04 09:25:08 -05002285 block2 = 0;
2286 free_extent_buffer(eb);
2287 }
Chris Mason8c594ea2009-04-20 15:50:10 -04002288
Josef Bacik0b088512013-06-17 14:23:02 -04002289 if (block1)
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04002290 readahead_tree_block(fs_info, block1);
Josef Bacik0b088512013-06-17 14:23:02 -04002291 if (block2)
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04002292 readahead_tree_block(fs_info, block2);
Chris Masonb4ce94d2009-02-04 09:25:08 -05002293}
2294
2295
2296/*
Chris Masond3977122009-01-05 21:25:51 -05002297 * when we walk down the tree, it is usually safe to unlock the higher layers
2298 * in the tree. The exceptions are when our path goes through slot 0, because
2299 * operations on the tree might require changing key pointers higher up in the
2300 * tree.
Chris Masond352ac62008-09-29 15:18:18 -04002301 *
Chris Masond3977122009-01-05 21:25:51 -05002302 * callers might also have set path->keep_locks, which tells this code to keep
2303 * the lock if the path points to the last slot in the block. This is part of
2304 * walking through the tree, and selecting the next slot in the higher block.
Chris Masond352ac62008-09-29 15:18:18 -04002305 *
Chris Masond3977122009-01-05 21:25:51 -05002306 * lowest_unlock sets the lowest level in the tree we're allowed to unlock. so
2307 * if lowest_unlock is 1, level 0 won't be unlocked
Chris Masond352ac62008-09-29 15:18:18 -04002308 */
Chris Masone02119d2008-09-05 16:13:11 -04002309static noinline void unlock_up(struct btrfs_path *path, int level,
Chris Masonf7c79f32012-03-19 15:54:38 -04002310 int lowest_unlock, int min_write_lock_level,
2311 int *write_lock_level)
Chris Mason925baed2008-06-25 16:01:30 -04002312{
2313 int i;
2314 int skip_level = level;
Chris Mason051e1b92008-06-25 16:01:30 -04002315 int no_skips = 0;
Chris Mason925baed2008-06-25 16:01:30 -04002316 struct extent_buffer *t;
2317
2318 for (i = level; i < BTRFS_MAX_LEVEL; i++) {
2319 if (!path->nodes[i])
2320 break;
2321 if (!path->locks[i])
2322 break;
Chris Mason051e1b92008-06-25 16:01:30 -04002323 if (!no_skips && path->slots[i] == 0) {
Chris Mason925baed2008-06-25 16:01:30 -04002324 skip_level = i + 1;
2325 continue;
2326 }
Chris Mason051e1b92008-06-25 16:01:30 -04002327 if (!no_skips && path->keep_locks) {
Chris Mason925baed2008-06-25 16:01:30 -04002328 u32 nritems;
2329 t = path->nodes[i];
2330 nritems = btrfs_header_nritems(t);
Chris Mason051e1b92008-06-25 16:01:30 -04002331 if (nritems < 1 || path->slots[i] >= nritems - 1) {
Chris Mason925baed2008-06-25 16:01:30 -04002332 skip_level = i + 1;
2333 continue;
2334 }
2335 }
Chris Mason051e1b92008-06-25 16:01:30 -04002336 if (skip_level < i && i >= lowest_unlock)
2337 no_skips = 1;
2338
Chris Mason925baed2008-06-25 16:01:30 -04002339 t = path->nodes[i];
Liu Bod80bb3f2018-05-18 11:00:24 +08002340 if (i >= lowest_unlock && i > skip_level) {
Chris Masonbd681512011-07-16 15:23:14 -04002341 btrfs_tree_unlock_rw(t, path->locks[i]);
Chris Mason925baed2008-06-25 16:01:30 -04002342 path->locks[i] = 0;
Chris Masonf7c79f32012-03-19 15:54:38 -04002343 if (write_lock_level &&
2344 i > min_write_lock_level &&
2345 i <= *write_lock_level) {
2346 *write_lock_level = i - 1;
2347 }
Chris Mason925baed2008-06-25 16:01:30 -04002348 }
2349 }
2350}
2351
Chris Mason3c69fae2007-08-07 15:52:22 -04002352/*
Chris Masonc8c42862009-04-03 10:14:18 -04002353 * helper function for btrfs_search_slot. The goal is to find a block
2354 * in cache without setting the path to blocking. If we find the block
2355 * we return zero and the path is unchanged.
2356 *
2357 * If we can't find the block, we set the path blocking and do some
2358 * reada. -EAGAIN is returned and the search must be repeated.
2359 */
2360static int
Liu Bod07b8522017-01-30 12:23:42 -08002361read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
2362 struct extent_buffer **eb_ret, int level, int slot,
David Sterbacda79c52017-02-10 18:44:32 +01002363 const struct btrfs_key *key)
Chris Masonc8c42862009-04-03 10:14:18 -04002364{
Jeff Mahoney0b246af2016-06-22 18:54:23 -04002365 struct btrfs_fs_info *fs_info = root->fs_info;
Chris Masonc8c42862009-04-03 10:14:18 -04002366 u64 blocknr;
2367 u64 gen;
Chris Masonc8c42862009-04-03 10:14:18 -04002368 struct extent_buffer *tmp;
Qu Wenruo581c1762018-03-29 09:08:11 +08002369 struct btrfs_key first_key;
Chris Mason76a05b32009-05-14 13:24:30 -04002370 int ret;
Qu Wenruo581c1762018-03-29 09:08:11 +08002371 int parent_level;
Chris Masonc8c42862009-04-03 10:14:18 -04002372
Nikolay Borisov213ff4b2020-05-27 13:10:59 +03002373 blocknr = btrfs_node_blockptr(*eb_ret, slot);
2374 gen = btrfs_node_ptr_generation(*eb_ret, slot);
2375 parent_level = btrfs_header_level(*eb_ret);
2376 btrfs_node_key_to_cpu(*eb_ret, &first_key, slot);
Chris Masonc8c42862009-04-03 10:14:18 -04002377
Jeff Mahoney0b246af2016-06-22 18:54:23 -04002378 tmp = find_extent_buffer(fs_info, blocknr);
Chris Masoncb449212010-10-24 11:01:27 -04002379 if (tmp) {
Chris Masonb9fab912012-05-06 07:23:47 -04002380 /* first we do an atomic uptodate check */
Josef Bacikbdf7c002013-06-17 13:44:48 -04002381 if (btrfs_buffer_uptodate(tmp, gen, 1) > 0) {
Qu Wenruo448de472019-03-12 17:10:40 +08002382 /*
2383 * Do extra check for first_key, eb can be stale due to
2384 * being cached, read from scrub, or have multiple
2385 * parents (shared tree blocks).
2386 */
David Sterbae064d5e2019-03-20 14:58:13 +01002387 if (btrfs_verify_level_key(tmp,
Qu Wenruo448de472019-03-12 17:10:40 +08002388 parent_level - 1, &first_key, gen)) {
2389 free_extent_buffer(tmp);
2390 return -EUCLEAN;
2391 }
Josef Bacikbdf7c002013-06-17 13:44:48 -04002392 *eb_ret = tmp;
2393 return 0;
Chris Masoncb449212010-10-24 11:01:27 -04002394 }
Josef Bacikbdf7c002013-06-17 13:44:48 -04002395
2396 /* the pages were up to date, but we failed
2397 * the generation number check. Do a full
2398 * read for the generation number that is correct.
2399 * We must do this without dropping locks so
2400 * we can trust our generation number
2401 */
2402 btrfs_set_path_blocking(p);
2403
2404 /* now we're allowed to do a blocking uptodate check */
Qu Wenruo581c1762018-03-29 09:08:11 +08002405 ret = btrfs_read_buffer(tmp, gen, parent_level - 1, &first_key);
Josef Bacikbdf7c002013-06-17 13:44:48 -04002406 if (!ret) {
2407 *eb_ret = tmp;
2408 return 0;
2409 }
2410 free_extent_buffer(tmp);
2411 btrfs_release_path(p);
2412 return -EIO;
Chris Masonc8c42862009-04-03 10:14:18 -04002413 }
2414
2415 /*
2416 * reduce lock contention at high levels
2417 * of the btree by dropping locks before
Chris Mason76a05b32009-05-14 13:24:30 -04002418 * we read. Don't release the lock on the current
2419 * level because we need to walk this node to figure
2420 * out which blocks to read.
Chris Masonc8c42862009-04-03 10:14:18 -04002421 */
Chris Mason8c594ea2009-04-20 15:50:10 -04002422 btrfs_unlock_up_safe(p, level + 1);
2423 btrfs_set_path_blocking(p);
2424
David Sterbae4058b52015-11-27 16:31:35 +01002425 if (p->reada != READA_NONE)
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04002426 reada_for_search(fs_info, p, level, slot, key->objectid);
Chris Masonc8c42862009-04-03 10:14:18 -04002427
Chris Mason76a05b32009-05-14 13:24:30 -04002428 ret = -EAGAIN;
Liu Bo02a33072018-05-16 01:37:36 +08002429 tmp = read_tree_block(fs_info, blocknr, gen, parent_level - 1,
Qu Wenruo581c1762018-03-29 09:08:11 +08002430 &first_key);
Liu Bo64c043d2015-05-25 17:30:15 +08002431 if (!IS_ERR(tmp)) {
Chris Mason76a05b32009-05-14 13:24:30 -04002432 /*
2433 * If the read above didn't mark this buffer up to date,
2434 * it will never end up being up to date. Set ret to EIO now
2435 * and give up so that our caller doesn't loop forever
2436 * on our EAGAINs.
2437 */
Liu Boe6a1d6f2018-05-18 11:00:20 +08002438 if (!extent_buffer_uptodate(tmp))
Chris Mason76a05b32009-05-14 13:24:30 -04002439 ret = -EIO;
Chris Masonc8c42862009-04-03 10:14:18 -04002440 free_extent_buffer(tmp);
Liu Boc871b0f2016-06-06 12:01:23 -07002441 } else {
2442 ret = PTR_ERR(tmp);
Chris Mason76a05b32009-05-14 13:24:30 -04002443 }
Liu Bo02a33072018-05-16 01:37:36 +08002444
2445 btrfs_release_path(p);
Chris Mason76a05b32009-05-14 13:24:30 -04002446 return ret;
Chris Masonc8c42862009-04-03 10:14:18 -04002447}
2448
2449/*
2450 * helper function for btrfs_search_slot. This does all of the checks
2451 * for node-level blocks and does any balancing required based on
2452 * the ins_len.
2453 *
2454 * If no extra work was required, zero is returned. If we had to
2455 * drop the path, -EAGAIN is returned and btrfs_search_slot must
2456 * start over
2457 */
2458static int
2459setup_nodes_for_search(struct btrfs_trans_handle *trans,
2460 struct btrfs_root *root, struct btrfs_path *p,
Chris Masonbd681512011-07-16 15:23:14 -04002461 struct extent_buffer *b, int level, int ins_len,
2462 int *write_lock_level)
Chris Masonc8c42862009-04-03 10:14:18 -04002463{
Jeff Mahoney0b246af2016-06-22 18:54:23 -04002464 struct btrfs_fs_info *fs_info = root->fs_info;
Chris Masonc8c42862009-04-03 10:14:18 -04002465 int ret;
Jeff Mahoney0b246af2016-06-22 18:54:23 -04002466
Chris Masonc8c42862009-04-03 10:14:18 -04002467 if ((p->search_for_split || ins_len > 0) && btrfs_header_nritems(b) >=
Jeff Mahoney0b246af2016-06-22 18:54:23 -04002468 BTRFS_NODEPTRS_PER_BLOCK(fs_info) - 3) {
Chris Masonc8c42862009-04-03 10:14:18 -04002469 int sret;
2470
Chris Masonbd681512011-07-16 15:23:14 -04002471 if (*write_lock_level < level + 1) {
2472 *write_lock_level = level + 1;
2473 btrfs_release_path(p);
2474 goto again;
2475 }
2476
Chris Masonc8c42862009-04-03 10:14:18 -04002477 btrfs_set_path_blocking(p);
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04002478 reada_for_balance(fs_info, p, level);
Chris Masonc8c42862009-04-03 10:14:18 -04002479 sret = split_node(trans, root, p, level);
Chris Masonc8c42862009-04-03 10:14:18 -04002480
2481 BUG_ON(sret > 0);
2482 if (sret) {
2483 ret = sret;
2484 goto done;
2485 }
2486 b = p->nodes[level];
2487 } else if (ins_len < 0 && btrfs_header_nritems(b) <
Jeff Mahoney0b246af2016-06-22 18:54:23 -04002488 BTRFS_NODEPTRS_PER_BLOCK(fs_info) / 2) {
Chris Masonc8c42862009-04-03 10:14:18 -04002489 int sret;
2490
Chris Masonbd681512011-07-16 15:23:14 -04002491 if (*write_lock_level < level + 1) {
2492 *write_lock_level = level + 1;
2493 btrfs_release_path(p);
2494 goto again;
2495 }
2496
Chris Masonc8c42862009-04-03 10:14:18 -04002497 btrfs_set_path_blocking(p);
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04002498 reada_for_balance(fs_info, p, level);
Chris Masonc8c42862009-04-03 10:14:18 -04002499 sret = balance_level(trans, root, p, level);
Chris Masonc8c42862009-04-03 10:14:18 -04002500
2501 if (sret) {
2502 ret = sret;
2503 goto done;
2504 }
2505 b = p->nodes[level];
2506 if (!b) {
David Sterbab3b4aa72011-04-21 01:20:15 +02002507 btrfs_release_path(p);
Chris Masonc8c42862009-04-03 10:14:18 -04002508 goto again;
2509 }
2510 BUG_ON(btrfs_header_nritems(b) == 1);
2511 }
2512 return 0;
2513
2514again:
2515 ret = -EAGAIN;
2516done:
2517 return ret;
2518}
2519
David Sterba381cf652015-01-02 18:45:16 +01002520int btrfs_find_item(struct btrfs_root *fs_root, struct btrfs_path *path,
Kelley Nielsene33d5c32013-11-04 19:33:33 -08002521 u64 iobjectid, u64 ioff, u8 key_type,
2522 struct btrfs_key *found_key)
2523{
2524 int ret;
2525 struct btrfs_key key;
2526 struct extent_buffer *eb;
David Sterba381cf652015-01-02 18:45:16 +01002527
2528 ASSERT(path);
David Sterba1d4c08e2015-01-02 19:36:14 +01002529 ASSERT(found_key);
Kelley Nielsene33d5c32013-11-04 19:33:33 -08002530
2531 key.type = key_type;
2532 key.objectid = iobjectid;
2533 key.offset = ioff;
2534
2535 ret = btrfs_search_slot(NULL, fs_root, &key, path, 0, 0);
David Sterba1d4c08e2015-01-02 19:36:14 +01002536 if (ret < 0)
Kelley Nielsene33d5c32013-11-04 19:33:33 -08002537 return ret;
2538
2539 eb = path->nodes[0];
2540 if (ret && path->slots[0] >= btrfs_header_nritems(eb)) {
2541 ret = btrfs_next_leaf(fs_root, path);
2542 if (ret)
2543 return ret;
2544 eb = path->nodes[0];
2545 }
2546
2547 btrfs_item_key_to_cpu(eb, found_key, path->slots[0]);
2548 if (found_key->type != key.type ||
2549 found_key->objectid != key.objectid)
2550 return 1;
2551
2552 return 0;
2553}
2554
Liu Bo1fc28d82018-05-18 11:00:21 +08002555static struct extent_buffer *btrfs_search_slot_get_root(struct btrfs_root *root,
2556 struct btrfs_path *p,
2557 int write_lock_level)
2558{
2559 struct btrfs_fs_info *fs_info = root->fs_info;
2560 struct extent_buffer *b;
2561 int root_lock;
2562 int level = 0;
2563
2564 /* We try very hard to do read locks on the root */
2565 root_lock = BTRFS_READ_LOCK;
2566
2567 if (p->search_commit_root) {
Filipe Mananabe6821f2018-12-11 10:19:45 +00002568 /*
2569 * The commit roots are read only so we always do read locks,
2570 * and we always must hold the commit_root_sem when doing
2571 * searches on them, the only exception is send where we don't
2572 * want to block transaction commits for a long time, so
2573 * we need to clone the commit root in order to avoid races
2574 * with transaction commits that create a snapshot of one of
2575 * the roots used by a send operation.
2576 */
2577 if (p->need_commit_sem) {
Liu Bo1fc28d82018-05-18 11:00:21 +08002578 down_read(&fs_info->commit_root_sem);
Filipe Mananabe6821f2018-12-11 10:19:45 +00002579 b = btrfs_clone_extent_buffer(root->commit_root);
Liu Bo1fc28d82018-05-18 11:00:21 +08002580 up_read(&fs_info->commit_root_sem);
Filipe Mananabe6821f2018-12-11 10:19:45 +00002581 if (!b)
2582 return ERR_PTR(-ENOMEM);
2583
2584 } else {
2585 b = root->commit_root;
David Sterba67439da2019-10-08 13:28:47 +02002586 atomic_inc(&b->refs);
Filipe Mananabe6821f2018-12-11 10:19:45 +00002587 }
2588 level = btrfs_header_level(b);
Liu Bof9ddfd02018-05-29 21:27:06 +08002589 /*
2590 * Ensure that all callers have set skip_locking when
2591 * p->search_commit_root = 1.
2592 */
2593 ASSERT(p->skip_locking == 1);
Liu Bo1fc28d82018-05-18 11:00:21 +08002594
2595 goto out;
2596 }
2597
2598 if (p->skip_locking) {
2599 b = btrfs_root_node(root);
2600 level = btrfs_header_level(b);
2601 goto out;
2602 }
2603
2604 /*
Liu Bo662c6532018-05-18 11:00:23 +08002605 * If the level is set to maximum, we can skip trying to get the read
2606 * lock.
Liu Bo1fc28d82018-05-18 11:00:21 +08002607 */
Liu Bo662c6532018-05-18 11:00:23 +08002608 if (write_lock_level < BTRFS_MAX_LEVEL) {
2609 /*
2610 * We don't know the level of the root node until we actually
2611 * have it read locked
2612 */
Josef Bacik51899412020-08-20 11:46:01 -04002613 b = __btrfs_read_lock_root_node(root, p->recurse);
Liu Bo662c6532018-05-18 11:00:23 +08002614 level = btrfs_header_level(b);
2615 if (level > write_lock_level)
2616 goto out;
Liu Bo1fc28d82018-05-18 11:00:21 +08002617
Liu Bo662c6532018-05-18 11:00:23 +08002618 /* Whoops, must trade for write lock */
2619 btrfs_tree_read_unlock(b);
2620 free_extent_buffer(b);
2621 }
2622
Liu Bo1fc28d82018-05-18 11:00:21 +08002623 b = btrfs_lock_root_node(root);
2624 root_lock = BTRFS_WRITE_LOCK;
2625
2626 /* The level might have changed, check again */
2627 level = btrfs_header_level(b);
2628
2629out:
2630 p->nodes[level] = b;
2631 if (!p->skip_locking)
2632 p->locks[level] = root_lock;
2633 /*
2634 * Callers are responsible for dropping b's references.
2635 */
2636 return b;
2637}
2638
2639
Chris Masonc8c42862009-04-03 10:14:18 -04002640/*
Nikolay Borisov4271ece2017-12-13 09:38:14 +02002641 * btrfs_search_slot - look for a key in a tree and perform necessary
2642 * modifications to preserve tree invariants.
Chris Mason74123bd2007-02-02 11:05:29 -05002643 *
Nikolay Borisov4271ece2017-12-13 09:38:14 +02002644 * @trans: Handle of transaction, used when modifying the tree
2645 * @p: Holds all btree nodes along the search path
2646 * @root: The root node of the tree
2647 * @key: The key we are looking for
2648 * @ins_len: Indicates purpose of search, for inserts it is 1, for
2649 * deletions it's -1. 0 for plain searches
2650 * @cow: boolean should CoW operations be performed. Must always be 1
2651 * when modifying the tree.
Chris Mason97571fd2007-02-24 13:39:08 -05002652 *
Nikolay Borisov4271ece2017-12-13 09:38:14 +02002653 * If @ins_len > 0, nodes and leaves will be split as we walk down the tree.
2654 * If @ins_len < 0, nodes will be merged as we walk down the tree (if possible)
2655 *
2656 * If @key is found, 0 is returned and you can find the item in the leaf level
2657 * of the path (level 0)
2658 *
2659 * If @key isn't found, 1 is returned and the leaf level of the path (level 0)
2660 * points to the slot where it should be inserted
2661 *
2662 * If an error is encountered while searching the tree a negative error number
2663 * is returned
Chris Mason74123bd2007-02-02 11:05:29 -05002664 */
Omar Sandoval310712b2017-01-17 23:24:37 -08002665int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2666 const struct btrfs_key *key, struct btrfs_path *p,
2667 int ins_len, int cow)
Chris Masonbe0e5c02007-01-26 15:51:26 -05002668{
Chris Mason5f39d392007-10-15 16:14:19 -04002669 struct extent_buffer *b;
Chris Masonbe0e5c02007-01-26 15:51:26 -05002670 int slot;
2671 int ret;
Yan Zheng33c66f42009-07-22 09:59:00 -04002672 int err;
Chris Masonbe0e5c02007-01-26 15:51:26 -05002673 int level;
Chris Mason925baed2008-06-25 16:01:30 -04002674 int lowest_unlock = 1;
Chris Masonbd681512011-07-16 15:23:14 -04002675 /* everything at write_lock_level or lower must be write locked */
2676 int write_lock_level = 0;
Chris Mason9f3a7422007-08-07 15:52:19 -04002677 u8 lowest_level = 0;
Chris Masonf7c79f32012-03-19 15:54:38 -04002678 int min_write_lock_level;
Filipe David Borba Mananad7396f02013-08-30 15:46:43 +01002679 int prev_cmp;
Chris Mason9f3a7422007-08-07 15:52:19 -04002680
Chris Mason6702ed42007-08-07 16:15:09 -04002681 lowest_level = p->lowest_level;
Chris Mason323ac952008-10-01 19:05:46 -04002682 WARN_ON(lowest_level && ins_len > 0);
Chris Mason22b0ebd2007-03-30 08:47:31 -04002683 WARN_ON(p->nodes[0] != NULL);
Filipe David Borba Mananaeb653de2013-12-23 11:53:02 +00002684 BUG_ON(!cow && ins_len);
Josef Bacik25179202008-10-29 14:49:05 -04002685
Chris Masonbd681512011-07-16 15:23:14 -04002686 if (ins_len < 0) {
Chris Mason925baed2008-06-25 16:01:30 -04002687 lowest_unlock = 2;
Chris Mason65b51a02008-08-01 15:11:20 -04002688
Chris Masonbd681512011-07-16 15:23:14 -04002689 /* when we are removing items, we might have to go up to level
2690 * two as we update tree pointers Make sure we keep write
2691 * for those levels as well
2692 */
2693 write_lock_level = 2;
2694 } else if (ins_len > 0) {
2695 /*
2696 * for inserting items, make sure we have a write lock on
2697 * level 1 so we can update keys
2698 */
2699 write_lock_level = 1;
2700 }
2701
2702 if (!cow)
2703 write_lock_level = -1;
2704
Josef Bacik09a2a8f92013-04-05 16:51:15 -04002705 if (cow && (p->keep_locks || p->lowest_level))
Chris Masonbd681512011-07-16 15:23:14 -04002706 write_lock_level = BTRFS_MAX_LEVEL;
2707
Chris Masonf7c79f32012-03-19 15:54:38 -04002708 min_write_lock_level = write_lock_level;
2709
Chris Masonbb803952007-03-01 12:04:21 -05002710again:
Filipe David Borba Mananad7396f02013-08-30 15:46:43 +01002711 prev_cmp = -1;
Liu Bo1fc28d82018-05-18 11:00:21 +08002712 b = btrfs_search_slot_get_root(root, p, write_lock_level);
Filipe Mananabe6821f2018-12-11 10:19:45 +00002713 if (IS_ERR(b)) {
2714 ret = PTR_ERR(b);
2715 goto done;
2716 }
Chris Mason925baed2008-06-25 16:01:30 -04002717
Chris Masoneb60cea2007-02-02 09:18:22 -05002718 while (b) {
Qu Wenruof624d972019-09-10 15:40:17 +08002719 int dec = 0;
2720
Chris Mason5f39d392007-10-15 16:14:19 -04002721 level = btrfs_header_level(b);
Chris Mason65b51a02008-08-01 15:11:20 -04002722
Chris Mason02217ed2007-03-02 16:08:05 -05002723 if (cow) {
Nikolay Borisov9ea2c7c2017-12-12 11:14:49 +02002724 bool last_level = (level == (BTRFS_MAX_LEVEL - 1));
2725
Chris Masonc8c42862009-04-03 10:14:18 -04002726 /*
2727 * if we don't really need to cow this block
2728 * then we don't want to set the path blocking,
2729 * so we test it here
2730 */
Jeff Mahoney64c12922016-06-08 00:36:38 -04002731 if (!should_cow_block(trans, root, b)) {
2732 trans->dirty = true;
Chris Mason65b51a02008-08-01 15:11:20 -04002733 goto cow_done;
Jeff Mahoney64c12922016-06-08 00:36:38 -04002734 }
Yan Zheng5d4f98a2009-06-10 10:45:14 -04002735
Chris Masonbd681512011-07-16 15:23:14 -04002736 /*
2737 * must have write locks on this node and the
2738 * parent
2739 */
Josef Bacik5124e002012-11-07 13:44:13 -05002740 if (level > write_lock_level ||
2741 (level + 1 > write_lock_level &&
2742 level + 1 < BTRFS_MAX_LEVEL &&
2743 p->nodes[level + 1])) {
Chris Masonbd681512011-07-16 15:23:14 -04002744 write_lock_level = level + 1;
2745 btrfs_release_path(p);
2746 goto again;
2747 }
2748
Filipe Manana160f4082014-07-28 19:37:17 +01002749 btrfs_set_path_blocking(p);
Nikolay Borisov9ea2c7c2017-12-12 11:14:49 +02002750 if (last_level)
2751 err = btrfs_cow_block(trans, root, b, NULL, 0,
Josef Bacik9631e4c2020-08-20 11:46:03 -04002752 &b,
2753 BTRFS_NESTING_COW);
Nikolay Borisov9ea2c7c2017-12-12 11:14:49 +02002754 else
2755 err = btrfs_cow_block(trans, root, b,
2756 p->nodes[level + 1],
Josef Bacik9631e4c2020-08-20 11:46:03 -04002757 p->slots[level + 1], &b,
2758 BTRFS_NESTING_COW);
Yan Zheng33c66f42009-07-22 09:59:00 -04002759 if (err) {
Yan Zheng33c66f42009-07-22 09:59:00 -04002760 ret = err;
Chris Mason65b51a02008-08-01 15:11:20 -04002761 goto done;
Chris Mason54aa1f42007-06-22 14:16:25 -04002762 }
Chris Mason02217ed2007-03-02 16:08:05 -05002763 }
Chris Mason65b51a02008-08-01 15:11:20 -04002764cow_done:
Chris Masoneb60cea2007-02-02 09:18:22 -05002765 p->nodes[level] = b;
Liu Bo52398342018-08-22 05:54:37 +08002766 /*
2767 * Leave path with blocking locks to avoid massive
2768 * lock context switch, this is made on purpose.
2769 */
Chris Masonb4ce94d2009-02-04 09:25:08 -05002770
2771 /*
2772 * we have a lock on b and as long as we aren't changing
2773 * the tree, there is no way to for the items in b to change.
2774 * It is safe to drop the lock on our parent before we
2775 * go through the expensive btree search on b.
2776 *
Filipe David Borba Mananaeb653de2013-12-23 11:53:02 +00002777 * If we're inserting or deleting (ins_len != 0), then we might
2778 * be changing slot zero, which may require changing the parent.
2779 * So, we can't drop the lock until after we know which slot
2780 * we're operating on.
Chris Masonb4ce94d2009-02-04 09:25:08 -05002781 */
Filipe David Borba Mananaeb653de2013-12-23 11:53:02 +00002782 if (!ins_len && !p->keep_locks) {
2783 int u = level + 1;
2784
2785 if (u < BTRFS_MAX_LEVEL && p->locks[u]) {
2786 btrfs_tree_unlock_rw(p->nodes[u], p->locks[u]);
2787 p->locks[u] = 0;
2788 }
2789 }
Chris Masonb4ce94d2009-02-04 09:25:08 -05002790
Nikolay Borisov995e9a12020-05-27 13:10:53 +03002791 /*
2792 * If btrfs_bin_search returns an exact match (prev_cmp == 0)
2793 * we can safely assume the target key will always be in slot 0
2794 * on lower levels due to the invariants BTRFS' btree provides,
2795 * namely that a btrfs_key_ptr entry always points to the
2796 * lowest key in the child node, thus we can skip searching
2797 * lower levels
2798 */
2799 if (prev_cmp == 0) {
2800 slot = 0;
2801 ret = 0;
2802 } else {
2803 ret = btrfs_bin_search(b, key, &slot);
2804 prev_cmp = ret;
2805 if (ret < 0)
2806 goto done;
2807 }
Chris Masonb4ce94d2009-02-04 09:25:08 -05002808
Qu Wenruof624d972019-09-10 15:40:17 +08002809 if (level == 0) {
Chris Masonbe0e5c02007-01-26 15:51:26 -05002810 p->slots[level] = slot;
Yan Zheng87b29b22008-12-17 10:21:48 -05002811 if (ins_len > 0 &&
David Sterbae902baa2019-03-20 14:36:46 +01002812 btrfs_leaf_free_space(b) < ins_len) {
Chris Masonbd681512011-07-16 15:23:14 -04002813 if (write_lock_level < 1) {
2814 write_lock_level = 1;
2815 btrfs_release_path(p);
2816 goto again;
2817 }
2818
Chris Masonb4ce94d2009-02-04 09:25:08 -05002819 btrfs_set_path_blocking(p);
Yan Zheng33c66f42009-07-22 09:59:00 -04002820 err = split_leaf(trans, root, key,
2821 p, ins_len, ret == 0);
Chris Masonb4ce94d2009-02-04 09:25:08 -05002822
Yan Zheng33c66f42009-07-22 09:59:00 -04002823 BUG_ON(err > 0);
2824 if (err) {
2825 ret = err;
Chris Mason65b51a02008-08-01 15:11:20 -04002826 goto done;
2827 }
Chris Mason5c680ed2007-02-22 11:39:13 -05002828 }
Chris Mason459931e2008-12-10 09:10:46 -05002829 if (!p->search_for_split)
Chris Masonf7c79f32012-03-19 15:54:38 -04002830 unlock_up(p, level, lowest_unlock,
Liu Bo4b6f8e92018-08-14 10:46:53 +08002831 min_write_lock_level, NULL);
Chris Mason65b51a02008-08-01 15:11:20 -04002832 goto done;
Chris Masonbe0e5c02007-01-26 15:51:26 -05002833 }
Qu Wenruof624d972019-09-10 15:40:17 +08002834 if (ret && slot > 0) {
2835 dec = 1;
2836 slot--;
2837 }
2838 p->slots[level] = slot;
2839 err = setup_nodes_for_search(trans, root, p, b, level, ins_len,
2840 &write_lock_level);
2841 if (err == -EAGAIN)
2842 goto again;
2843 if (err) {
2844 ret = err;
2845 goto done;
2846 }
2847 b = p->nodes[level];
2848 slot = p->slots[level];
2849
2850 /*
2851 * Slot 0 is special, if we change the key we have to update
2852 * the parent pointer which means we must have a write lock on
2853 * the parent
2854 */
2855 if (slot == 0 && ins_len && write_lock_level < level + 1) {
2856 write_lock_level = level + 1;
2857 btrfs_release_path(p);
2858 goto again;
2859 }
2860
2861 unlock_up(p, level, lowest_unlock, min_write_lock_level,
2862 &write_lock_level);
2863
2864 if (level == lowest_level) {
2865 if (dec)
2866 p->slots[level]++;
2867 goto done;
2868 }
2869
2870 err = read_block_for_search(root, p, &b, level, slot, key);
2871 if (err == -EAGAIN)
2872 goto again;
2873 if (err) {
2874 ret = err;
2875 goto done;
2876 }
2877
2878 if (!p->skip_locking) {
2879 level = btrfs_header_level(b);
2880 if (level <= write_lock_level) {
2881 if (!btrfs_try_tree_write_lock(b)) {
2882 btrfs_set_path_blocking(p);
2883 btrfs_tree_lock(b);
2884 }
2885 p->locks[level] = BTRFS_WRITE_LOCK;
2886 } else {
2887 if (!btrfs_tree_read_lock_atomic(b)) {
2888 btrfs_set_path_blocking(p);
Josef Bacikfd7ba1c2020-08-20 11:46:02 -04002889 __btrfs_tree_read_lock(b, BTRFS_NESTING_NORMAL,
2890 p->recurse);
Qu Wenruof624d972019-09-10 15:40:17 +08002891 }
2892 p->locks[level] = BTRFS_READ_LOCK;
2893 }
2894 p->nodes[level] = b;
2895 }
Chris Masonbe0e5c02007-01-26 15:51:26 -05002896 }
Chris Mason65b51a02008-08-01 15:11:20 -04002897 ret = 1;
2898done:
Chris Masonb4ce94d2009-02-04 09:25:08 -05002899 /*
2900 * we don't really know what they plan on doing with the path
2901 * from here on, so for now just mark it as blocking
2902 */
Chris Masonb9473432009-03-13 11:00:37 -04002903 if (!p->leave_spinning)
2904 btrfs_set_path_blocking(p);
Filipe Manana5f5bc6b2014-11-09 08:38:39 +00002905 if (ret < 0 && !p->skip_release_on_error)
David Sterbab3b4aa72011-04-21 01:20:15 +02002906 btrfs_release_path(p);
Chris Mason65b51a02008-08-01 15:11:20 -04002907 return ret;
Chris Masonbe0e5c02007-01-26 15:51:26 -05002908}
2909
Chris Mason74123bd2007-02-02 11:05:29 -05002910/*
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02002911 * Like btrfs_search_slot, this looks for a key in the given tree. It uses the
2912 * current state of the tree together with the operations recorded in the tree
2913 * modification log to search for the key in a previous version of this tree, as
2914 * denoted by the time_seq parameter.
2915 *
2916 * Naturally, there is no support for insert, delete or cow operations.
2917 *
2918 * The resulting path and return value will be set up as if we called
2919 * btrfs_search_slot at that point in time with ins_len and cow both set to 0.
2920 */
Omar Sandoval310712b2017-01-17 23:24:37 -08002921int btrfs_search_old_slot(struct btrfs_root *root, const struct btrfs_key *key,
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02002922 struct btrfs_path *p, u64 time_seq)
2923{
Jeff Mahoney0b246af2016-06-22 18:54:23 -04002924 struct btrfs_fs_info *fs_info = root->fs_info;
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02002925 struct extent_buffer *b;
2926 int slot;
2927 int ret;
2928 int err;
2929 int level;
2930 int lowest_unlock = 1;
2931 u8 lowest_level = 0;
2932
2933 lowest_level = p->lowest_level;
2934 WARN_ON(p->nodes[0] != NULL);
2935
2936 if (p->search_commit_root) {
2937 BUG_ON(time_seq);
2938 return btrfs_search_slot(NULL, root, key, p, 0, 0);
2939 }
2940
2941again:
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02002942 b = get_old_root(root, time_seq);
Nikolay Borisov315bed42018-09-13 11:35:10 +03002943 if (!b) {
2944 ret = -EIO;
2945 goto done;
2946 }
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02002947 level = btrfs_header_level(b);
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02002948 p->locks[level] = BTRFS_READ_LOCK;
2949
2950 while (b) {
Qu Wenruoabe93392019-09-10 15:40:18 +08002951 int dec = 0;
2952
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02002953 level = btrfs_header_level(b);
2954 p->nodes[level] = b;
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02002955
2956 /*
2957 * we have a lock on b and as long as we aren't changing
2958 * the tree, there is no way to for the items in b to change.
2959 * It is safe to drop the lock on our parent before we
2960 * go through the expensive btree search on b.
2961 */
2962 btrfs_unlock_up_safe(p, level + 1);
2963
Nikolay Borisov995e9a12020-05-27 13:10:53 +03002964 ret = btrfs_bin_search(b, key, &slot);
Filipe Mananacbca7d52019-02-18 16:57:26 +00002965 if (ret < 0)
2966 goto done;
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02002967
Qu Wenruoabe93392019-09-10 15:40:18 +08002968 if (level == 0) {
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02002969 p->slots[level] = slot;
2970 unlock_up(p, level, lowest_unlock, 0, NULL);
2971 goto done;
2972 }
Qu Wenruoabe93392019-09-10 15:40:18 +08002973
2974 if (ret && slot > 0) {
2975 dec = 1;
2976 slot--;
2977 }
2978 p->slots[level] = slot;
2979 unlock_up(p, level, lowest_unlock, 0, NULL);
2980
2981 if (level == lowest_level) {
2982 if (dec)
2983 p->slots[level]++;
2984 goto done;
2985 }
2986
2987 err = read_block_for_search(root, p, &b, level, slot, key);
2988 if (err == -EAGAIN)
2989 goto again;
2990 if (err) {
2991 ret = err;
2992 goto done;
2993 }
2994
2995 level = btrfs_header_level(b);
2996 if (!btrfs_tree_read_lock_atomic(b)) {
2997 btrfs_set_path_blocking(p);
2998 btrfs_tree_read_lock(b);
2999 }
3000 b = tree_mod_log_rewind(fs_info, p, b, time_seq);
3001 if (!b) {
3002 ret = -ENOMEM;
3003 goto done;
3004 }
3005 p->locks[level] = BTRFS_READ_LOCK;
3006 p->nodes[level] = b;
Jan Schmidt5d9e75c42012-05-16 18:25:47 +02003007 }
3008 ret = 1;
3009done:
3010 if (!p->leave_spinning)
3011 btrfs_set_path_blocking(p);
3012 if (ret < 0)
3013 btrfs_release_path(p);
3014
3015 return ret;
3016}
3017
3018/*
Arne Jansen2f38b3e2011-09-13 11:18:10 +02003019 * helper to use instead of search slot if no exact match is needed but
3020 * instead the next or previous item should be returned.
3021 * When find_higher is true, the next higher item is returned, the next lower
3022 * otherwise.
3023 * When return_any and find_higher are both true, and no higher item is found,
3024 * return the next lower instead.
3025 * When return_any is true and find_higher is false, and no lower item is found,
3026 * return the next higher instead.
3027 * It returns 0 if any item is found, 1 if none is found (tree empty), and
3028 * < 0 on error
3029 */
3030int btrfs_search_slot_for_read(struct btrfs_root *root,
Omar Sandoval310712b2017-01-17 23:24:37 -08003031 const struct btrfs_key *key,
3032 struct btrfs_path *p, int find_higher,
3033 int return_any)
Arne Jansen2f38b3e2011-09-13 11:18:10 +02003034{
3035 int ret;
3036 struct extent_buffer *leaf;
3037
3038again:
3039 ret = btrfs_search_slot(NULL, root, key, p, 0, 0);
3040 if (ret <= 0)
3041 return ret;
3042 /*
3043 * a return value of 1 means the path is at the position where the
3044 * item should be inserted. Normally this is the next bigger item,
3045 * but in case the previous item is the last in a leaf, path points
3046 * to the first free slot in the previous leaf, i.e. at an invalid
3047 * item.
3048 */
3049 leaf = p->nodes[0];
3050
3051 if (find_higher) {
3052 if (p->slots[0] >= btrfs_header_nritems(leaf)) {
3053 ret = btrfs_next_leaf(root, p);
3054 if (ret <= 0)
3055 return ret;
3056 if (!return_any)
3057 return 1;
3058 /*
3059 * no higher item found, return the next
3060 * lower instead
3061 */
3062 return_any = 0;
3063 find_higher = 0;
3064 btrfs_release_path(p);
3065 goto again;
3066 }
3067 } else {
Arne Jansene6793762011-09-13 11:18:10 +02003068 if (p->slots[0] == 0) {
3069 ret = btrfs_prev_leaf(root, p);
3070 if (ret < 0)
3071 return ret;
3072 if (!ret) {
Filipe David Borba Manana23c6bf62014-01-11 21:28:54 +00003073 leaf = p->nodes[0];
3074 if (p->slots[0] == btrfs_header_nritems(leaf))
3075 p->slots[0]--;
Arne Jansene6793762011-09-13 11:18:10 +02003076 return 0;
Arne Jansen2f38b3e2011-09-13 11:18:10 +02003077 }
Arne Jansene6793762011-09-13 11:18:10 +02003078 if (!return_any)
3079 return 1;
3080 /*
3081 * no lower item found, return the next
3082 * higher instead
3083 */
3084 return_any = 0;
3085 find_higher = 1;
3086 btrfs_release_path(p);
3087 goto again;
3088 } else {
Arne Jansen2f38b3e2011-09-13 11:18:10 +02003089 --p->slots[0];
3090 }
3091 }
3092 return 0;
3093}
3094
3095/*
Chris Mason74123bd2007-02-02 11:05:29 -05003096 * adjust the pointers going up the tree, starting at level
3097 * making sure the right key of each node is points to 'key'.
3098 * This is used after shifting pointers to the left, so it stops
3099 * fixing up pointers when a given leaf/node is not in slot 0 of the
3100 * higher levels
Chris Masonaa5d6be2007-02-28 16:35:06 -05003101 *
Chris Mason74123bd2007-02-02 11:05:29 -05003102 */
Nikolay Borisovb167fa92018-06-20 15:48:47 +03003103static void fixup_low_keys(struct btrfs_path *path,
Jeff Mahoney143bede2012-03-01 14:56:26 +01003104 struct btrfs_disk_key *key, int level)
Chris Masonbe0e5c02007-01-26 15:51:26 -05003105{
3106 int i;
Chris Mason5f39d392007-10-15 16:14:19 -04003107 struct extent_buffer *t;
David Sterba0e82bcf2018-03-05 16:16:54 +01003108 int ret;
Chris Mason5f39d392007-10-15 16:14:19 -04003109
Chris Mason234b63a2007-03-13 10:46:10 -04003110 for (i = level; i < BTRFS_MAX_LEVEL; i++) {
Chris Masonbe0e5c02007-01-26 15:51:26 -05003111 int tslot = path->slots[i];
David Sterba0e82bcf2018-03-05 16:16:54 +01003112
Chris Masoneb60cea2007-02-02 09:18:22 -05003113 if (!path->nodes[i])
Chris Masonbe0e5c02007-01-26 15:51:26 -05003114 break;
Chris Mason5f39d392007-10-15 16:14:19 -04003115 t = path->nodes[i];
David Sterba0e82bcf2018-03-05 16:16:54 +01003116 ret = tree_mod_log_insert_key(t, tslot, MOD_LOG_KEY_REPLACE,
3117 GFP_ATOMIC);
3118 BUG_ON(ret < 0);
Chris Mason5f39d392007-10-15 16:14:19 -04003119 btrfs_set_node_key(t, key, tslot);
Chris Masond6025572007-03-30 14:27:56 -04003120 btrfs_mark_buffer_dirty(path->nodes[i]);
Chris Masonbe0e5c02007-01-26 15:51:26 -05003121 if (tslot != 0)
3122 break;
3123 }
3124}
3125
Chris Mason74123bd2007-02-02 11:05:29 -05003126/*
Zheng Yan31840ae2008-09-23 13:14:14 -04003127 * update item key.
3128 *
3129 * This function isn't completely safe. It's the caller's responsibility
3130 * that the new key won't break the order
3131 */
Daniel Dresslerb7a03652014-11-12 13:43:09 +09003132void btrfs_set_item_key_safe(struct btrfs_fs_info *fs_info,
3133 struct btrfs_path *path,
Omar Sandoval310712b2017-01-17 23:24:37 -08003134 const struct btrfs_key *new_key)
Zheng Yan31840ae2008-09-23 13:14:14 -04003135{
3136 struct btrfs_disk_key disk_key;
3137 struct extent_buffer *eb;
3138 int slot;
3139
3140 eb = path->nodes[0];
3141 slot = path->slots[0];
3142 if (slot > 0) {
3143 btrfs_item_key(eb, &disk_key, slot - 1);
Qu Wenruo7c15d412019-04-25 08:55:53 +08003144 if (unlikely(comp_keys(&disk_key, new_key) >= 0)) {
3145 btrfs_crit(fs_info,
3146 "slot %u key (%llu %u %llu) new key (%llu %u %llu)",
3147 slot, btrfs_disk_key_objectid(&disk_key),
3148 btrfs_disk_key_type(&disk_key),
3149 btrfs_disk_key_offset(&disk_key),
3150 new_key->objectid, new_key->type,
3151 new_key->offset);
3152 btrfs_print_leaf(eb);
3153 BUG();
3154 }
Zheng Yan31840ae2008-09-23 13:14:14 -04003155 }
3156 if (slot < btrfs_header_nritems(eb) - 1) {
3157 btrfs_item_key(eb, &disk_key, slot + 1);
Qu Wenruo7c15d412019-04-25 08:55:53 +08003158 if (unlikely(comp_keys(&disk_key, new_key) <= 0)) {
3159 btrfs_crit(fs_info,
3160 "slot %u key (%llu %u %llu) new key (%llu %u %llu)",
3161 slot, btrfs_disk_key_objectid(&disk_key),
3162 btrfs_disk_key_type(&disk_key),
3163 btrfs_disk_key_offset(&disk_key),
3164 new_key->objectid, new_key->type,
3165 new_key->offset);
3166 btrfs_print_leaf(eb);
3167 BUG();
3168 }
Zheng Yan31840ae2008-09-23 13:14:14 -04003169 }
3170
3171 btrfs_cpu_key_to_disk(&disk_key, new_key);
3172 btrfs_set_item_key(eb, &disk_key, slot);
3173 btrfs_mark_buffer_dirty(eb);
3174 if (slot == 0)
Nikolay Borisovb167fa92018-06-20 15:48:47 +03003175 fixup_low_keys(path, &disk_key, 1);
Zheng Yan31840ae2008-09-23 13:14:14 -04003176}
3177
3178/*
Qu Wenruod16c7022020-08-19 14:35:50 +08003179 * Check key order of two sibling extent buffers.
3180 *
3181 * Return true if something is wrong.
3182 * Return false if everything is fine.
3183 *
3184 * Tree-checker only works inside one tree block, thus the following
3185 * corruption can not be detected by tree-checker:
3186 *
3187 * Leaf @left | Leaf @right
3188 * --------------------------------------------------------------
3189 * | 1 | 2 | 3 | 4 | 5 | f6 | | 7 | 8 |
3190 *
3191 * Key f6 in leaf @left itself is valid, but not valid when the next
3192 * key in leaf @right is 7.
3193 * This can only be checked at tree block merge time.
3194 * And since tree checker has ensured all key order in each tree block
3195 * is correct, we only need to bother the last key of @left and the first
3196 * key of @right.
3197 */
3198static bool check_sibling_keys(struct extent_buffer *left,
3199 struct extent_buffer *right)
3200{
3201 struct btrfs_key left_last;
3202 struct btrfs_key right_first;
3203 int level = btrfs_header_level(left);
3204 int nr_left = btrfs_header_nritems(left);
3205 int nr_right = btrfs_header_nritems(right);
3206
3207 /* No key to check in one of the tree blocks */
3208 if (!nr_left || !nr_right)
3209 return false;
3210
3211 if (level) {
3212 btrfs_node_key_to_cpu(left, &left_last, nr_left - 1);
3213 btrfs_node_key_to_cpu(right, &right_first, 0);
3214 } else {
3215 btrfs_item_key_to_cpu(left, &left_last, nr_left - 1);
3216 btrfs_item_key_to_cpu(right, &right_first, 0);
3217 }
3218
3219 if (btrfs_comp_cpu_keys(&left_last, &right_first) >= 0) {
3220 btrfs_crit(left->fs_info,
3221"bad key order, sibling blocks, left last (%llu %u %llu) right first (%llu %u %llu)",
3222 left_last.objectid, left_last.type,
3223 left_last.offset, right_first.objectid,
3224 right_first.type, right_first.offset);
3225 return true;
3226 }
3227 return false;
3228}
3229
3230/*
Chris Mason74123bd2007-02-02 11:05:29 -05003231 * try to push data from one node into the next node left in the
Chris Mason79f95c82007-03-01 15:16:26 -05003232 * tree.
Chris Masonaa5d6be2007-02-28 16:35:06 -05003233 *
3234 * returns 0 if some ptrs were pushed left, < 0 if there was some horrible
3235 * error, and > 0 if there was no room in the left hand block.
Chris Mason74123bd2007-02-02 11:05:29 -05003236 */
Chris Mason98ed5172008-01-03 10:01:48 -05003237static int push_node_left(struct btrfs_trans_handle *trans,
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04003238 struct extent_buffer *dst,
Chris Mason971a1f62008-04-24 10:54:32 -04003239 struct extent_buffer *src, int empty)
Chris Masonbe0e5c02007-01-26 15:51:26 -05003240{
David Sterbad30a6682019-03-20 14:16:45 +01003241 struct btrfs_fs_info *fs_info = trans->fs_info;
Chris Masonbe0e5c02007-01-26 15:51:26 -05003242 int push_items = 0;
Chris Masonbb803952007-03-01 12:04:21 -05003243 int src_nritems;
3244 int dst_nritems;
Chris Masonaa5d6be2007-02-28 16:35:06 -05003245 int ret = 0;
Chris Masonbe0e5c02007-01-26 15:51:26 -05003246
Chris Mason5f39d392007-10-15 16:14:19 -04003247 src_nritems = btrfs_header_nritems(src);
3248 dst_nritems = btrfs_header_nritems(dst);
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003249 push_items = BTRFS_NODEPTRS_PER_BLOCK(fs_info) - dst_nritems;
Chris Mason7bb86312007-12-11 09:25:06 -05003250 WARN_ON(btrfs_header_generation(src) != trans->transid);
3251 WARN_ON(btrfs_header_generation(dst) != trans->transid);
Chris Mason54aa1f42007-06-22 14:16:25 -04003252
Chris Masonbce4eae2008-04-24 14:42:46 -04003253 if (!empty && src_nritems <= 8)
Chris Mason971a1f62008-04-24 10:54:32 -04003254 return 1;
3255
Chris Masond3977122009-01-05 21:25:51 -05003256 if (push_items <= 0)
Chris Masonbe0e5c02007-01-26 15:51:26 -05003257 return 1;
3258
Chris Masonbce4eae2008-04-24 14:42:46 -04003259 if (empty) {
Chris Mason971a1f62008-04-24 10:54:32 -04003260 push_items = min(src_nritems, push_items);
Chris Masonbce4eae2008-04-24 14:42:46 -04003261 if (push_items < src_nritems) {
3262 /* leave at least 8 pointers in the node if
3263 * we aren't going to empty it
3264 */
3265 if (src_nritems - push_items < 8) {
3266 if (push_items <= 8)
3267 return 1;
3268 push_items -= 8;
3269 }
3270 }
3271 } else
3272 push_items = min(src_nritems - 8, push_items);
Chris Mason79f95c82007-03-01 15:16:26 -05003273
Qu Wenruod16c7022020-08-19 14:35:50 +08003274 /* dst is the left eb, src is the middle eb */
3275 if (check_sibling_keys(dst, src)) {
3276 ret = -EUCLEAN;
3277 btrfs_abort_transaction(trans, ret);
3278 return ret;
3279 }
David Sterbaed874f02019-03-20 14:22:04 +01003280 ret = tree_mod_log_eb_copy(dst, src, dst_nritems, 0, push_items);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +00003281 if (ret) {
Jeff Mahoney66642832016-06-10 18:19:25 -04003282 btrfs_abort_transaction(trans, ret);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +00003283 return ret;
3284 }
Chris Mason5f39d392007-10-15 16:14:19 -04003285 copy_extent_buffer(dst, src,
3286 btrfs_node_key_ptr_offset(dst_nritems),
3287 btrfs_node_key_ptr_offset(0),
Chris Masond3977122009-01-05 21:25:51 -05003288 push_items * sizeof(struct btrfs_key_ptr));
Chris Mason5f39d392007-10-15 16:14:19 -04003289
Chris Masonbb803952007-03-01 12:04:21 -05003290 if (push_items < src_nritems) {
Jan Schmidt57911b82012-10-19 09:22:03 +02003291 /*
David Sterbabf1d3422018-03-05 15:47:39 +01003292 * Don't call tree_mod_log_insert_move here, key removal was
3293 * already fully logged by tree_mod_log_eb_copy above.
Jan Schmidt57911b82012-10-19 09:22:03 +02003294 */
Chris Mason5f39d392007-10-15 16:14:19 -04003295 memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0),
3296 btrfs_node_key_ptr_offset(push_items),
3297 (src_nritems - push_items) *
3298 sizeof(struct btrfs_key_ptr));
Chris Masonbb803952007-03-01 12:04:21 -05003299 }
Chris Mason5f39d392007-10-15 16:14:19 -04003300 btrfs_set_header_nritems(src, src_nritems - push_items);
3301 btrfs_set_header_nritems(dst, dst_nritems + push_items);
3302 btrfs_mark_buffer_dirty(src);
3303 btrfs_mark_buffer_dirty(dst);
Zheng Yan31840ae2008-09-23 13:14:14 -04003304
Chris Masonbb803952007-03-01 12:04:21 -05003305 return ret;
Chris Masonbe0e5c02007-01-26 15:51:26 -05003306}
3307
Chris Mason97571fd2007-02-24 13:39:08 -05003308/*
Chris Mason79f95c82007-03-01 15:16:26 -05003309 * try to push data from one node into the next node right in the
3310 * tree.
3311 *
3312 * returns 0 if some ptrs were pushed, < 0 if there was some horrible
3313 * error, and > 0 if there was no room in the right hand block.
3314 *
3315 * this will only push up to 1/2 the contents of the left node over
3316 */
Chris Mason5f39d392007-10-15 16:14:19 -04003317static int balance_node_right(struct btrfs_trans_handle *trans,
Chris Mason5f39d392007-10-15 16:14:19 -04003318 struct extent_buffer *dst,
3319 struct extent_buffer *src)
Chris Mason79f95c82007-03-01 15:16:26 -05003320{
David Sterba55d32ed2019-03-20 14:18:06 +01003321 struct btrfs_fs_info *fs_info = trans->fs_info;
Chris Mason79f95c82007-03-01 15:16:26 -05003322 int push_items = 0;
3323 int max_push;
3324 int src_nritems;
3325 int dst_nritems;
3326 int ret = 0;
Chris Mason79f95c82007-03-01 15:16:26 -05003327
Chris Mason7bb86312007-12-11 09:25:06 -05003328 WARN_ON(btrfs_header_generation(src) != trans->transid);
3329 WARN_ON(btrfs_header_generation(dst) != trans->transid);
3330
Chris Mason5f39d392007-10-15 16:14:19 -04003331 src_nritems = btrfs_header_nritems(src);
3332 dst_nritems = btrfs_header_nritems(dst);
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003333 push_items = BTRFS_NODEPTRS_PER_BLOCK(fs_info) - dst_nritems;
Chris Masond3977122009-01-05 21:25:51 -05003334 if (push_items <= 0)
Chris Mason79f95c82007-03-01 15:16:26 -05003335 return 1;
Chris Masonbce4eae2008-04-24 14:42:46 -04003336
Chris Masond3977122009-01-05 21:25:51 -05003337 if (src_nritems < 4)
Chris Masonbce4eae2008-04-24 14:42:46 -04003338 return 1;
Chris Mason79f95c82007-03-01 15:16:26 -05003339
3340 max_push = src_nritems / 2 + 1;
3341 /* don't try to empty the node */
Chris Masond3977122009-01-05 21:25:51 -05003342 if (max_push >= src_nritems)
Chris Mason79f95c82007-03-01 15:16:26 -05003343 return 1;
Yan252c38f2007-08-29 09:11:44 -04003344
Chris Mason79f95c82007-03-01 15:16:26 -05003345 if (max_push < push_items)
3346 push_items = max_push;
3347
Qu Wenruod16c7022020-08-19 14:35:50 +08003348 /* dst is the right eb, src is the middle eb */
3349 if (check_sibling_keys(src, dst)) {
3350 ret = -EUCLEAN;
3351 btrfs_abort_transaction(trans, ret);
3352 return ret;
3353 }
David Sterbabf1d3422018-03-05 15:47:39 +01003354 ret = tree_mod_log_insert_move(dst, push_items, 0, dst_nritems);
3355 BUG_ON(ret < 0);
Chris Mason5f39d392007-10-15 16:14:19 -04003356 memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(push_items),
3357 btrfs_node_key_ptr_offset(0),
3358 (dst_nritems) *
3359 sizeof(struct btrfs_key_ptr));
Chris Masond6025572007-03-30 14:27:56 -04003360
David Sterbaed874f02019-03-20 14:22:04 +01003361 ret = tree_mod_log_eb_copy(dst, src, 0, src_nritems - push_items,
3362 push_items);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +00003363 if (ret) {
Jeff Mahoney66642832016-06-10 18:19:25 -04003364 btrfs_abort_transaction(trans, ret);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +00003365 return ret;
3366 }
Chris Mason5f39d392007-10-15 16:14:19 -04003367 copy_extent_buffer(dst, src,
3368 btrfs_node_key_ptr_offset(0),
3369 btrfs_node_key_ptr_offset(src_nritems - push_items),
Chris Masond3977122009-01-05 21:25:51 -05003370 push_items * sizeof(struct btrfs_key_ptr));
Chris Mason79f95c82007-03-01 15:16:26 -05003371
Chris Mason5f39d392007-10-15 16:14:19 -04003372 btrfs_set_header_nritems(src, src_nritems - push_items);
3373 btrfs_set_header_nritems(dst, dst_nritems + push_items);
Chris Mason79f95c82007-03-01 15:16:26 -05003374
Chris Mason5f39d392007-10-15 16:14:19 -04003375 btrfs_mark_buffer_dirty(src);
3376 btrfs_mark_buffer_dirty(dst);
Zheng Yan31840ae2008-09-23 13:14:14 -04003377
Chris Mason79f95c82007-03-01 15:16:26 -05003378 return ret;
3379}
3380
3381/*
Chris Mason97571fd2007-02-24 13:39:08 -05003382 * helper function to insert a new root level in the tree.
3383 * A new node is allocated, and a single item is inserted to
3384 * point to the existing root
Chris Masonaa5d6be2007-02-28 16:35:06 -05003385 *
3386 * returns zero on success or < 0 on failure.
Chris Mason97571fd2007-02-24 13:39:08 -05003387 */
Chris Masond3977122009-01-05 21:25:51 -05003388static noinline int insert_new_root(struct btrfs_trans_handle *trans,
Chris Mason5f39d392007-10-15 16:14:19 -04003389 struct btrfs_root *root,
Liu Bofdd99c72013-05-22 12:06:51 +00003390 struct btrfs_path *path, int level)
Chris Mason5c680ed2007-02-22 11:39:13 -05003391{
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003392 struct btrfs_fs_info *fs_info = root->fs_info;
Chris Mason7bb86312007-12-11 09:25:06 -05003393 u64 lower_gen;
Chris Mason5f39d392007-10-15 16:14:19 -04003394 struct extent_buffer *lower;
3395 struct extent_buffer *c;
Chris Mason925baed2008-06-25 16:01:30 -04003396 struct extent_buffer *old;
Chris Mason5f39d392007-10-15 16:14:19 -04003397 struct btrfs_disk_key lower_key;
David Sterbad9d19a02018-03-05 16:35:29 +01003398 int ret;
Chris Mason5c680ed2007-02-22 11:39:13 -05003399
3400 BUG_ON(path->nodes[level]);
3401 BUG_ON(path->nodes[level-1] != root->node);
3402
Chris Mason7bb86312007-12-11 09:25:06 -05003403 lower = path->nodes[level-1];
3404 if (level == 1)
3405 btrfs_item_key(lower, &lower_key, 0);
3406 else
3407 btrfs_node_key(lower, &lower_key, 0);
3408
Filipe Mananaa6279472019-01-25 11:48:51 +00003409 c = alloc_tree_block_no_bg_flush(trans, root, 0, &lower_key, level,
Josef Bacik9631e4c2020-08-20 11:46:03 -04003410 root->node->start, 0,
Josef Bacikcf6f34a2020-08-20 11:46:07 -04003411 BTRFS_NESTING_NEW_ROOT);
Chris Mason5f39d392007-10-15 16:14:19 -04003412 if (IS_ERR(c))
3413 return PTR_ERR(c);
Chris Mason925baed2008-06-25 16:01:30 -04003414
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003415 root_add_used(root, fs_info->nodesize);
Yan, Zhengf0486c62010-05-16 10:46:25 -04003416
Chris Mason5f39d392007-10-15 16:14:19 -04003417 btrfs_set_header_nritems(c, 1);
Chris Mason5f39d392007-10-15 16:14:19 -04003418 btrfs_set_node_key(c, &lower_key, 0);
Chris Masondb945352007-10-15 16:15:53 -04003419 btrfs_set_node_blockptr(c, 0, lower->start);
Chris Mason7bb86312007-12-11 09:25:06 -05003420 lower_gen = btrfs_header_generation(lower);
Zheng Yan31840ae2008-09-23 13:14:14 -04003421 WARN_ON(lower_gen != trans->transid);
Chris Mason7bb86312007-12-11 09:25:06 -05003422
3423 btrfs_set_node_ptr_generation(c, 0, lower_gen);
Chris Mason5f39d392007-10-15 16:14:19 -04003424
3425 btrfs_mark_buffer_dirty(c);
Chris Masond5719762007-03-23 10:01:08 -04003426
Chris Mason925baed2008-06-25 16:01:30 -04003427 old = root->node;
David Sterbad9d19a02018-03-05 16:35:29 +01003428 ret = tree_mod_log_insert_root(root->node, c, 0);
3429 BUG_ON(ret < 0);
Chris Mason240f62c2011-03-23 14:54:42 -04003430 rcu_assign_pointer(root->node, c);
Chris Mason925baed2008-06-25 16:01:30 -04003431
3432 /* the super has an extra ref to root->node */
3433 free_extent_buffer(old);
3434
Chris Mason0b86a832008-03-24 15:01:56 -04003435 add_root_to_dirty_list(root);
David Sterba67439da2019-10-08 13:28:47 +02003436 atomic_inc(&c->refs);
Chris Mason5f39d392007-10-15 16:14:19 -04003437 path->nodes[level] = c;
chandan95449a12015-01-15 12:22:03 +05303438 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
Chris Mason5c680ed2007-02-22 11:39:13 -05003439 path->slots[level] = 0;
3440 return 0;
3441}
3442
Chris Mason74123bd2007-02-02 11:05:29 -05003443/*
3444 * worker function to insert a single pointer in a node.
3445 * the node should have enough room for the pointer already
Chris Mason97571fd2007-02-24 13:39:08 -05003446 *
Chris Mason74123bd2007-02-02 11:05:29 -05003447 * slot and level indicate where you want the key to go, and
3448 * blocknr is the block the key points to.
3449 */
Jeff Mahoney143bede2012-03-01 14:56:26 +01003450static void insert_ptr(struct btrfs_trans_handle *trans,
David Sterba6ad3cf62019-03-20 14:32:45 +01003451 struct btrfs_path *path,
Jeff Mahoney143bede2012-03-01 14:56:26 +01003452 struct btrfs_disk_key *key, u64 bytenr,
Jan Schmidtc3e06962012-06-21 11:01:06 +02003453 int slot, int level)
Chris Mason74123bd2007-02-02 11:05:29 -05003454{
Chris Mason5f39d392007-10-15 16:14:19 -04003455 struct extent_buffer *lower;
Chris Mason74123bd2007-02-02 11:05:29 -05003456 int nritems;
Jan Schmidtf3ea38d2012-05-26 11:45:21 +02003457 int ret;
Chris Mason5c680ed2007-02-22 11:39:13 -05003458
3459 BUG_ON(!path->nodes[level]);
Yan, Zhengf0486c62010-05-16 10:46:25 -04003460 btrfs_assert_tree_locked(path->nodes[level]);
Chris Mason5f39d392007-10-15 16:14:19 -04003461 lower = path->nodes[level];
3462 nritems = btrfs_header_nritems(lower);
Stoyan Gaydarovc2934982009-04-02 17:05:11 -04003463 BUG_ON(slot > nritems);
David Sterba6ad3cf62019-03-20 14:32:45 +01003464 BUG_ON(nritems == BTRFS_NODEPTRS_PER_BLOCK(trans->fs_info));
Chris Mason74123bd2007-02-02 11:05:29 -05003465 if (slot != nritems) {
David Sterbabf1d3422018-03-05 15:47:39 +01003466 if (level) {
3467 ret = tree_mod_log_insert_move(lower, slot + 1, slot,
David Sterbaa446a972018-03-05 15:26:29 +01003468 nritems - slot);
David Sterbabf1d3422018-03-05 15:47:39 +01003469 BUG_ON(ret < 0);
3470 }
Chris Mason5f39d392007-10-15 16:14:19 -04003471 memmove_extent_buffer(lower,
3472 btrfs_node_key_ptr_offset(slot + 1),
3473 btrfs_node_key_ptr_offset(slot),
Chris Masond6025572007-03-30 14:27:56 -04003474 (nritems - slot) * sizeof(struct btrfs_key_ptr));
Chris Mason74123bd2007-02-02 11:05:29 -05003475 }
Jan Schmidtc3e06962012-06-21 11:01:06 +02003476 if (level) {
David Sterbae09c2ef2018-03-05 15:09:03 +01003477 ret = tree_mod_log_insert_key(lower, slot, MOD_LOG_KEY_ADD,
3478 GFP_NOFS);
Jan Schmidtf3ea38d2012-05-26 11:45:21 +02003479 BUG_ON(ret < 0);
3480 }
Chris Mason5f39d392007-10-15 16:14:19 -04003481 btrfs_set_node_key(lower, key, slot);
Chris Masondb945352007-10-15 16:15:53 -04003482 btrfs_set_node_blockptr(lower, slot, bytenr);
Chris Mason74493f72007-12-11 09:25:06 -05003483 WARN_ON(trans->transid == 0);
3484 btrfs_set_node_ptr_generation(lower, slot, trans->transid);
Chris Mason5f39d392007-10-15 16:14:19 -04003485 btrfs_set_header_nritems(lower, nritems + 1);
3486 btrfs_mark_buffer_dirty(lower);
Chris Mason74123bd2007-02-02 11:05:29 -05003487}
3488
Chris Mason97571fd2007-02-24 13:39:08 -05003489/*
3490 * split the node at the specified level in path in two.
3491 * The path is corrected to point to the appropriate node after the split
3492 *
3493 * Before splitting this tries to make some room in the node by pushing
3494 * left and right, if either one works, it returns right away.
Chris Masonaa5d6be2007-02-28 16:35:06 -05003495 *
3496 * returns 0 on success and < 0 on failure
Chris Mason97571fd2007-02-24 13:39:08 -05003497 */
Chris Masone02119d2008-09-05 16:13:11 -04003498static noinline int split_node(struct btrfs_trans_handle *trans,
3499 struct btrfs_root *root,
3500 struct btrfs_path *path, int level)
Chris Masonbe0e5c02007-01-26 15:51:26 -05003501{
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003502 struct btrfs_fs_info *fs_info = root->fs_info;
Chris Mason5f39d392007-10-15 16:14:19 -04003503 struct extent_buffer *c;
3504 struct extent_buffer *split;
3505 struct btrfs_disk_key disk_key;
Chris Masonbe0e5c02007-01-26 15:51:26 -05003506 int mid;
Chris Mason5c680ed2007-02-22 11:39:13 -05003507 int ret;
Chris Mason7518a232007-03-12 12:01:18 -04003508 u32 c_nritems;
Chris Masonbe0e5c02007-01-26 15:51:26 -05003509
Chris Mason5f39d392007-10-15 16:14:19 -04003510 c = path->nodes[level];
Chris Mason7bb86312007-12-11 09:25:06 -05003511 WARN_ON(btrfs_header_generation(c) != trans->transid);
Chris Mason5f39d392007-10-15 16:14:19 -04003512 if (c == root->node) {
Jan Schmidtd9abbf12013-03-20 13:49:48 +00003513 /*
Jan Schmidt90f8d622013-04-13 13:19:53 +00003514 * trying to split the root, lets make a new one
3515 *
Liu Bofdd99c72013-05-22 12:06:51 +00003516 * tree mod log: We don't log_removal old root in
Jan Schmidt90f8d622013-04-13 13:19:53 +00003517 * insert_new_root, because that root buffer will be kept as a
3518 * normal node. We are going to log removal of half of the
3519 * elements below with tree_mod_log_eb_copy. We're holding a
3520 * tree lock on the buffer, which is why we cannot race with
3521 * other tree_mod_log users.
Jan Schmidtd9abbf12013-03-20 13:49:48 +00003522 */
Liu Bofdd99c72013-05-22 12:06:51 +00003523 ret = insert_new_root(trans, root, path, level + 1);
Chris Mason5c680ed2007-02-22 11:39:13 -05003524 if (ret)
3525 return ret;
Chris Masonb3612422009-05-13 19:12:15 -04003526 } else {
Chris Masone66f7092007-04-20 13:16:02 -04003527 ret = push_nodes_for_insert(trans, root, path, level);
Chris Mason5f39d392007-10-15 16:14:19 -04003528 c = path->nodes[level];
3529 if (!ret && btrfs_header_nritems(c) <
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003530 BTRFS_NODEPTRS_PER_BLOCK(fs_info) - 3)
Chris Masone66f7092007-04-20 13:16:02 -04003531 return 0;
Chris Mason54aa1f42007-06-22 14:16:25 -04003532 if (ret < 0)
3533 return ret;
Chris Masonbe0e5c02007-01-26 15:51:26 -05003534 }
Chris Masone66f7092007-04-20 13:16:02 -04003535
Chris Mason5f39d392007-10-15 16:14:19 -04003536 c_nritems = btrfs_header_nritems(c);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04003537 mid = (c_nritems + 1) / 2;
3538 btrfs_node_key(c, &disk_key, mid);
Chris Mason7bb86312007-12-11 09:25:06 -05003539
Filipe Mananaa6279472019-01-25 11:48:51 +00003540 split = alloc_tree_block_no_bg_flush(trans, root, 0, &disk_key, level,
Josef Bacik4dff97e2020-08-20 11:46:06 -04003541 c->start, 0, BTRFS_NESTING_SPLIT);
Chris Mason5f39d392007-10-15 16:14:19 -04003542 if (IS_ERR(split))
3543 return PTR_ERR(split);
Chris Mason54aa1f42007-06-22 14:16:25 -04003544
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003545 root_add_used(root, fs_info->nodesize);
Nikolay Borisovbc877d22018-06-18 14:13:19 +03003546 ASSERT(btrfs_header_level(c) == level);
Chris Mason5f39d392007-10-15 16:14:19 -04003547
David Sterbaed874f02019-03-20 14:22:04 +01003548 ret = tree_mod_log_eb_copy(split, c, 0, mid, c_nritems - mid);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +00003549 if (ret) {
Jeff Mahoney66642832016-06-10 18:19:25 -04003550 btrfs_abort_transaction(trans, ret);
Filipe David Borba Manana5de865e2013-12-20 15:17:46 +00003551 return ret;
3552 }
Chris Mason5f39d392007-10-15 16:14:19 -04003553 copy_extent_buffer(split, c,
3554 btrfs_node_key_ptr_offset(0),
3555 btrfs_node_key_ptr_offset(mid),
3556 (c_nritems - mid) * sizeof(struct btrfs_key_ptr));
3557 btrfs_set_header_nritems(split, c_nritems - mid);
3558 btrfs_set_header_nritems(c, mid);
Chris Masonaa5d6be2007-02-28 16:35:06 -05003559 ret = 0;
3560
Chris Mason5f39d392007-10-15 16:14:19 -04003561 btrfs_mark_buffer_dirty(c);
3562 btrfs_mark_buffer_dirty(split);
3563
David Sterba6ad3cf62019-03-20 14:32:45 +01003564 insert_ptr(trans, path, &disk_key, split->start,
Jan Schmidtc3e06962012-06-21 11:01:06 +02003565 path->slots[level + 1] + 1, level + 1);
Chris Masonaa5d6be2007-02-28 16:35:06 -05003566
Chris Mason5de08d72007-02-24 06:24:44 -05003567 if (path->slots[level] >= mid) {
Chris Mason5c680ed2007-02-22 11:39:13 -05003568 path->slots[level] -= mid;
Chris Mason925baed2008-06-25 16:01:30 -04003569 btrfs_tree_unlock(c);
Chris Mason5f39d392007-10-15 16:14:19 -04003570 free_extent_buffer(c);
3571 path->nodes[level] = split;
Chris Mason5c680ed2007-02-22 11:39:13 -05003572 path->slots[level + 1] += 1;
3573 } else {
Chris Mason925baed2008-06-25 16:01:30 -04003574 btrfs_tree_unlock(split);
Chris Mason5f39d392007-10-15 16:14:19 -04003575 free_extent_buffer(split);
Chris Masonbe0e5c02007-01-26 15:51:26 -05003576 }
Chris Masonaa5d6be2007-02-28 16:35:06 -05003577 return ret;
Chris Masonbe0e5c02007-01-26 15:51:26 -05003578}
3579
Chris Mason74123bd2007-02-02 11:05:29 -05003580/*
3581 * how many bytes are required to store the items in a leaf. start
3582 * and nr indicate which items in the leaf to check. This totals up the
3583 * space used both by the item structs and the item data
3584 */
Chris Mason5f39d392007-10-15 16:14:19 -04003585static int leaf_space_used(struct extent_buffer *l, int start, int nr)
Chris Masonbe0e5c02007-01-26 15:51:26 -05003586{
Josef Bacik41be1f32012-10-15 13:43:18 -04003587 struct btrfs_item *start_item;
3588 struct btrfs_item *end_item;
Chris Masonbe0e5c02007-01-26 15:51:26 -05003589 int data_len;
Chris Mason5f39d392007-10-15 16:14:19 -04003590 int nritems = btrfs_header_nritems(l);
Chris Masond4dbff92007-04-04 14:08:15 -04003591 int end = min(nritems, start + nr) - 1;
Chris Masonbe0e5c02007-01-26 15:51:26 -05003592
3593 if (!nr)
3594 return 0;
Ross Kirkdd3cc162013-09-16 15:58:09 +01003595 start_item = btrfs_item_nr(start);
3596 end_item = btrfs_item_nr(end);
David Sterbaa31356b2020-04-29 22:56:01 +02003597 data_len = btrfs_item_offset(l, start_item) +
3598 btrfs_item_size(l, start_item);
3599 data_len = data_len - btrfs_item_offset(l, end_item);
Chris Mason0783fcf2007-03-12 20:12:07 -04003600 data_len += sizeof(struct btrfs_item) * nr;
Chris Masond4dbff92007-04-04 14:08:15 -04003601 WARN_ON(data_len < 0);
Chris Masonbe0e5c02007-01-26 15:51:26 -05003602 return data_len;
3603}
3604
Chris Mason74123bd2007-02-02 11:05:29 -05003605/*
Chris Masond4dbff92007-04-04 14:08:15 -04003606 * The space between the end of the leaf items and
3607 * the start of the leaf data. IOW, how much room
3608 * the leaf has left for both items and data
3609 */
David Sterbae902baa2019-03-20 14:36:46 +01003610noinline int btrfs_leaf_free_space(struct extent_buffer *leaf)
Chris Masond4dbff92007-04-04 14:08:15 -04003611{
David Sterbae902baa2019-03-20 14:36:46 +01003612 struct btrfs_fs_info *fs_info = leaf->fs_info;
Chris Mason5f39d392007-10-15 16:14:19 -04003613 int nritems = btrfs_header_nritems(leaf);
3614 int ret;
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003615
3616 ret = BTRFS_LEAF_DATA_SIZE(fs_info) - leaf_space_used(leaf, 0, nritems);
Chris Mason5f39d392007-10-15 16:14:19 -04003617 if (ret < 0) {
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003618 btrfs_crit(fs_info,
3619 "leaf free space ret %d, leaf data size %lu, used %d nritems %d",
3620 ret,
3621 (unsigned long) BTRFS_LEAF_DATA_SIZE(fs_info),
3622 leaf_space_used(leaf, 0, nritems), nritems);
Chris Mason5f39d392007-10-15 16:14:19 -04003623 }
3624 return ret;
Chris Masond4dbff92007-04-04 14:08:15 -04003625}
3626
Chris Mason99d8f832010-07-07 10:51:48 -04003627/*
3628 * min slot controls the lowest index we're willing to push to the
3629 * right. We'll push up to and including min_slot, but no lower
3630 */
David Sterbaf72f0012019-03-20 14:39:45 +01003631static noinline int __push_leaf_right(struct btrfs_path *path,
Chris Mason44871b12009-03-13 10:04:31 -04003632 int data_size, int empty,
3633 struct extent_buffer *right,
Chris Mason99d8f832010-07-07 10:51:48 -04003634 int free_space, u32 left_nritems,
3635 u32 min_slot)
Chris Mason00ec4c52007-02-24 12:47:20 -05003636{
David Sterbaf72f0012019-03-20 14:39:45 +01003637 struct btrfs_fs_info *fs_info = right->fs_info;
Chris Mason5f39d392007-10-15 16:14:19 -04003638 struct extent_buffer *left = path->nodes[0];
Chris Mason44871b12009-03-13 10:04:31 -04003639 struct extent_buffer *upper = path->nodes[1];
Chris Masoncfed81a2012-03-03 07:40:03 -05003640 struct btrfs_map_token token;
Chris Mason5f39d392007-10-15 16:14:19 -04003641 struct btrfs_disk_key disk_key;
Chris Mason00ec4c52007-02-24 12:47:20 -05003642 int slot;
Chris Mason34a38212007-11-07 13:31:03 -05003643 u32 i;
Chris Mason00ec4c52007-02-24 12:47:20 -05003644 int push_space = 0;
3645 int push_items = 0;
Chris Mason0783fcf2007-03-12 20:12:07 -04003646 struct btrfs_item *item;
Chris Mason34a38212007-11-07 13:31:03 -05003647 u32 nr;
Chris Mason7518a232007-03-12 12:01:18 -04003648 u32 right_nritems;
Chris Mason5f39d392007-10-15 16:14:19 -04003649 u32 data_end;
Chris Masondb945352007-10-15 16:15:53 -04003650 u32 this_item_size;
Chris Mason00ec4c52007-02-24 12:47:20 -05003651
Chris Mason34a38212007-11-07 13:31:03 -05003652 if (empty)
3653 nr = 0;
3654 else
Chris Mason99d8f832010-07-07 10:51:48 -04003655 nr = max_t(u32, 1, min_slot);
Chris Mason34a38212007-11-07 13:31:03 -05003656
Zheng Yan31840ae2008-09-23 13:14:14 -04003657 if (path->slots[0] >= left_nritems)
Yan Zheng87b29b22008-12-17 10:21:48 -05003658 push_space += data_size;
Zheng Yan31840ae2008-09-23 13:14:14 -04003659
Chris Mason44871b12009-03-13 10:04:31 -04003660 slot = path->slots[1];
Chris Mason34a38212007-11-07 13:31:03 -05003661 i = left_nritems - 1;
3662 while (i >= nr) {
Ross Kirkdd3cc162013-09-16 15:58:09 +01003663 item = btrfs_item_nr(i);
Chris Masondb945352007-10-15 16:15:53 -04003664
Zheng Yan31840ae2008-09-23 13:14:14 -04003665 if (!empty && push_items > 0) {
3666 if (path->slots[0] > i)
3667 break;
3668 if (path->slots[0] == i) {
David Sterbae902baa2019-03-20 14:36:46 +01003669 int space = btrfs_leaf_free_space(left);
3670
Zheng Yan31840ae2008-09-23 13:14:14 -04003671 if (space + push_space * 2 > free_space)
3672 break;
3673 }
3674 }
3675
Chris Mason00ec4c52007-02-24 12:47:20 -05003676 if (path->slots[0] == i)
Yan Zheng87b29b22008-12-17 10:21:48 -05003677 push_space += data_size;
Chris Masondb945352007-10-15 16:15:53 -04003678
Chris Masondb945352007-10-15 16:15:53 -04003679 this_item_size = btrfs_item_size(left, item);
3680 if (this_item_size + sizeof(*item) + push_space > free_space)
Chris Mason00ec4c52007-02-24 12:47:20 -05003681 break;
Zheng Yan31840ae2008-09-23 13:14:14 -04003682
Chris Mason00ec4c52007-02-24 12:47:20 -05003683 push_items++;
Chris Masondb945352007-10-15 16:15:53 -04003684 push_space += this_item_size + sizeof(*item);
Chris Mason34a38212007-11-07 13:31:03 -05003685 if (i == 0)
3686 break;
3687 i--;
Chris Masondb945352007-10-15 16:15:53 -04003688 }
Chris Mason5f39d392007-10-15 16:14:19 -04003689
Chris Mason925baed2008-06-25 16:01:30 -04003690 if (push_items == 0)
3691 goto out_unlock;
Chris Mason5f39d392007-10-15 16:14:19 -04003692
Julia Lawall6c1500f2012-11-03 20:30:18 +00003693 WARN_ON(!empty && push_items == left_nritems);
Chris Mason5f39d392007-10-15 16:14:19 -04003694
Chris Mason00ec4c52007-02-24 12:47:20 -05003695 /* push left to right */
Chris Mason5f39d392007-10-15 16:14:19 -04003696 right_nritems = btrfs_header_nritems(right);
Chris Mason34a38212007-11-07 13:31:03 -05003697
Chris Mason5f39d392007-10-15 16:14:19 -04003698 push_space = btrfs_item_end_nr(left, left_nritems - push_items);
David Sterba8f881e82019-03-20 11:33:10 +01003699 push_space -= leaf_data_end(left);
Chris Mason5f39d392007-10-15 16:14:19 -04003700
Chris Mason00ec4c52007-02-24 12:47:20 -05003701 /* make room in the right data area */
David Sterba8f881e82019-03-20 11:33:10 +01003702 data_end = leaf_data_end(right);
Chris Mason5f39d392007-10-15 16:14:19 -04003703 memmove_extent_buffer(right,
Nikolay Borisov3d9ec8c2017-05-29 09:43:43 +03003704 BTRFS_LEAF_DATA_OFFSET + data_end - push_space,
3705 BTRFS_LEAF_DATA_OFFSET + data_end,
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003706 BTRFS_LEAF_DATA_SIZE(fs_info) - data_end);
Chris Mason5f39d392007-10-15 16:14:19 -04003707
Chris Mason00ec4c52007-02-24 12:47:20 -05003708 /* copy from the left data area */
Nikolay Borisov3d9ec8c2017-05-29 09:43:43 +03003709 copy_extent_buffer(right, left, BTRFS_LEAF_DATA_OFFSET +
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003710 BTRFS_LEAF_DATA_SIZE(fs_info) - push_space,
David Sterba8f881e82019-03-20 11:33:10 +01003711 BTRFS_LEAF_DATA_OFFSET + leaf_data_end(left),
Chris Masond6025572007-03-30 14:27:56 -04003712 push_space);
Chris Mason5f39d392007-10-15 16:14:19 -04003713
3714 memmove_extent_buffer(right, btrfs_item_nr_offset(push_items),
3715 btrfs_item_nr_offset(0),
3716 right_nritems * sizeof(struct btrfs_item));
3717
Chris Mason00ec4c52007-02-24 12:47:20 -05003718 /* copy the items from left to right */
Chris Mason5f39d392007-10-15 16:14:19 -04003719 copy_extent_buffer(right, left, btrfs_item_nr_offset(0),
3720 btrfs_item_nr_offset(left_nritems - push_items),
3721 push_items * sizeof(struct btrfs_item));
Chris Mason00ec4c52007-02-24 12:47:20 -05003722
3723 /* update the item pointers */
David Sterbac82f8232019-08-09 17:48:21 +02003724 btrfs_init_map_token(&token, right);
Chris Mason7518a232007-03-12 12:01:18 -04003725 right_nritems += push_items;
Chris Mason5f39d392007-10-15 16:14:19 -04003726 btrfs_set_header_nritems(right, right_nritems);
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003727 push_space = BTRFS_LEAF_DATA_SIZE(fs_info);
Chris Mason7518a232007-03-12 12:01:18 -04003728 for (i = 0; i < right_nritems; i++) {
Ross Kirkdd3cc162013-09-16 15:58:09 +01003729 item = btrfs_item_nr(i);
David Sterbacc4c13d2020-04-29 02:15:56 +02003730 push_space -= btrfs_token_item_size(&token, item);
3731 btrfs_set_token_item_offset(&token, item, push_space);
Chris Masondb945352007-10-15 16:15:53 -04003732 }
3733
Chris Mason7518a232007-03-12 12:01:18 -04003734 left_nritems -= push_items;
Chris Mason5f39d392007-10-15 16:14:19 -04003735 btrfs_set_header_nritems(left, left_nritems);
Chris Mason00ec4c52007-02-24 12:47:20 -05003736
Chris Mason34a38212007-11-07 13:31:03 -05003737 if (left_nritems)
3738 btrfs_mark_buffer_dirty(left);
Yan, Zhengf0486c62010-05-16 10:46:25 -04003739 else
David Sterba6a884d7d2019-03-20 14:30:02 +01003740 btrfs_clean_tree_block(left);
Yan, Zhengf0486c62010-05-16 10:46:25 -04003741
Chris Mason5f39d392007-10-15 16:14:19 -04003742 btrfs_mark_buffer_dirty(right);
Chris Masona429e512007-04-18 16:15:28 -04003743
Chris Mason5f39d392007-10-15 16:14:19 -04003744 btrfs_item_key(right, &disk_key, 0);
3745 btrfs_set_node_key(upper, &disk_key, slot + 1);
Chris Masond6025572007-03-30 14:27:56 -04003746 btrfs_mark_buffer_dirty(upper);
Chris Mason02217ed2007-03-02 16:08:05 -05003747
Chris Mason00ec4c52007-02-24 12:47:20 -05003748 /* then fixup the leaf pointer in the path */
Chris Mason7518a232007-03-12 12:01:18 -04003749 if (path->slots[0] >= left_nritems) {
3750 path->slots[0] -= left_nritems;
Chris Mason925baed2008-06-25 16:01:30 -04003751 if (btrfs_header_nritems(path->nodes[0]) == 0)
David Sterba6a884d7d2019-03-20 14:30:02 +01003752 btrfs_clean_tree_block(path->nodes[0]);
Chris Mason925baed2008-06-25 16:01:30 -04003753 btrfs_tree_unlock(path->nodes[0]);
Chris Mason5f39d392007-10-15 16:14:19 -04003754 free_extent_buffer(path->nodes[0]);
3755 path->nodes[0] = right;
Chris Mason00ec4c52007-02-24 12:47:20 -05003756 path->slots[1] += 1;
3757 } else {
Chris Mason925baed2008-06-25 16:01:30 -04003758 btrfs_tree_unlock(right);
Chris Mason5f39d392007-10-15 16:14:19 -04003759 free_extent_buffer(right);
Chris Mason00ec4c52007-02-24 12:47:20 -05003760 }
3761 return 0;
Chris Mason925baed2008-06-25 16:01:30 -04003762
3763out_unlock:
3764 btrfs_tree_unlock(right);
3765 free_extent_buffer(right);
3766 return 1;
Chris Mason00ec4c52007-02-24 12:47:20 -05003767}
Chris Mason925baed2008-06-25 16:01:30 -04003768
Chris Mason00ec4c52007-02-24 12:47:20 -05003769/*
Chris Mason44871b12009-03-13 10:04:31 -04003770 * push some data in the path leaf to the right, trying to free up at
3771 * least data_size bytes. returns zero if the push worked, nonzero otherwise
3772 *
3773 * returns 1 if the push failed because the other node didn't have enough
3774 * room, 0 if everything worked out and < 0 if there were major errors.
Chris Mason99d8f832010-07-07 10:51:48 -04003775 *
3776 * this will push starting from min_slot to the end of the leaf. It won't
3777 * push any slot lower than min_slot
Chris Mason44871b12009-03-13 10:04:31 -04003778 */
3779static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
Chris Mason99d8f832010-07-07 10:51:48 -04003780 *root, struct btrfs_path *path,
3781 int min_data_size, int data_size,
3782 int empty, u32 min_slot)
Chris Mason44871b12009-03-13 10:04:31 -04003783{
3784 struct extent_buffer *left = path->nodes[0];
3785 struct extent_buffer *right;
3786 struct extent_buffer *upper;
3787 int slot;
3788 int free_space;
3789 u32 left_nritems;
3790 int ret;
3791
3792 if (!path->nodes[1])
3793 return 1;
3794
3795 slot = path->slots[1];
3796 upper = path->nodes[1];
3797 if (slot >= btrfs_header_nritems(upper) - 1)
3798 return 1;
3799
3800 btrfs_assert_tree_locked(path->nodes[1]);
3801
David Sterba4b231ae2019-08-21 19:16:27 +02003802 right = btrfs_read_node_slot(upper, slot + 1);
Liu Bofb770ae2016-07-05 12:10:14 -07003803 /*
3804 * slot + 1 is not valid or we fail to read the right node,
3805 * no big deal, just return.
3806 */
3807 if (IS_ERR(right))
Tsutomu Itoh91ca3382011-01-05 02:32:22 +00003808 return 1;
3809
Josef Bacikbf774672020-08-20 11:46:04 -04003810 __btrfs_tree_lock(right, BTRFS_NESTING_RIGHT);
David Sterba8bead252018-04-04 02:03:48 +02003811 btrfs_set_lock_blocking_write(right);
Chris Mason44871b12009-03-13 10:04:31 -04003812
David Sterbae902baa2019-03-20 14:36:46 +01003813 free_space = btrfs_leaf_free_space(right);
Chris Mason44871b12009-03-13 10:04:31 -04003814 if (free_space < data_size)
3815 goto out_unlock;
3816
3817 /* cow and double check */
3818 ret = btrfs_cow_block(trans, root, right, upper,
Josef Bacikbf59a5a2020-08-20 11:46:05 -04003819 slot + 1, &right, BTRFS_NESTING_RIGHT_COW);
Chris Mason44871b12009-03-13 10:04:31 -04003820 if (ret)
3821 goto out_unlock;
3822
David Sterbae902baa2019-03-20 14:36:46 +01003823 free_space = btrfs_leaf_free_space(right);
Chris Mason44871b12009-03-13 10:04:31 -04003824 if (free_space < data_size)
3825 goto out_unlock;
3826
3827 left_nritems = btrfs_header_nritems(left);
3828 if (left_nritems == 0)
3829 goto out_unlock;
3830
Qu Wenruod16c7022020-08-19 14:35:50 +08003831 if (check_sibling_keys(left, right)) {
3832 ret = -EUCLEAN;
3833 btrfs_tree_unlock(right);
3834 free_extent_buffer(right);
3835 return ret;
3836 }
Filipe David Borba Manana2ef1fed2013-12-04 22:17:39 +00003837 if (path->slots[0] == left_nritems && !empty) {
3838 /* Key greater than all keys in the leaf, right neighbor has
3839 * enough room for it and we're not emptying our leaf to delete
3840 * it, therefore use right neighbor to insert the new item and
Andrea Gelmini52042d82018-11-28 12:05:13 +01003841 * no need to touch/dirty our left leaf. */
Filipe David Borba Manana2ef1fed2013-12-04 22:17:39 +00003842 btrfs_tree_unlock(left);
3843 free_extent_buffer(left);
3844 path->nodes[0] = right;
3845 path->slots[0] = 0;
3846 path->slots[1]++;
3847 return 0;
3848 }
3849
David Sterbaf72f0012019-03-20 14:39:45 +01003850 return __push_leaf_right(path, min_data_size, empty,
Chris Mason99d8f832010-07-07 10:51:48 -04003851 right, free_space, left_nritems, min_slot);
Chris Mason44871b12009-03-13 10:04:31 -04003852out_unlock:
3853 btrfs_tree_unlock(right);
3854 free_extent_buffer(right);
3855 return 1;
3856}
3857
3858/*
Chris Mason74123bd2007-02-02 11:05:29 -05003859 * push some data in the path leaf to the left, trying to free up at
3860 * least data_size bytes. returns zero if the push worked, nonzero otherwise
Chris Mason99d8f832010-07-07 10:51:48 -04003861 *
3862 * max_slot can put a limit on how far into the leaf we'll push items. The
3863 * item at 'max_slot' won't be touched. Use (u32)-1 to make us do all the
3864 * items
Chris Mason74123bd2007-02-02 11:05:29 -05003865 */
David Sterba8087c192019-03-20 14:40:41 +01003866static noinline int __push_leaf_left(struct btrfs_path *path, int data_size,
Chris Mason44871b12009-03-13 10:04:31 -04003867 int empty, struct extent_buffer *left,
Chris Mason99d8f832010-07-07 10:51:48 -04003868 int free_space, u32 right_nritems,
3869 u32 max_slot)
Chris Masonbe0e5c02007-01-26 15:51:26 -05003870{
David Sterba8087c192019-03-20 14:40:41 +01003871 struct btrfs_fs_info *fs_info = left->fs_info;
Chris Mason5f39d392007-10-15 16:14:19 -04003872 struct btrfs_disk_key disk_key;
3873 struct extent_buffer *right = path->nodes[0];
Chris Masonbe0e5c02007-01-26 15:51:26 -05003874 int i;
Chris Masonbe0e5c02007-01-26 15:51:26 -05003875 int push_space = 0;
3876 int push_items = 0;
Chris Mason0783fcf2007-03-12 20:12:07 -04003877 struct btrfs_item *item;
Chris Mason7518a232007-03-12 12:01:18 -04003878 u32 old_left_nritems;
Chris Mason34a38212007-11-07 13:31:03 -05003879 u32 nr;
Chris Masonaa5d6be2007-02-28 16:35:06 -05003880 int ret = 0;
Chris Masondb945352007-10-15 16:15:53 -04003881 u32 this_item_size;
3882 u32 old_left_item_size;
Chris Masoncfed81a2012-03-03 07:40:03 -05003883 struct btrfs_map_token token;
3884
Chris Mason34a38212007-11-07 13:31:03 -05003885 if (empty)
Chris Mason99d8f832010-07-07 10:51:48 -04003886 nr = min(right_nritems, max_slot);
Chris Mason34a38212007-11-07 13:31:03 -05003887 else
Chris Mason99d8f832010-07-07 10:51:48 -04003888 nr = min(right_nritems - 1, max_slot);
Chris Mason34a38212007-11-07 13:31:03 -05003889
3890 for (i = 0; i < nr; i++) {
Ross Kirkdd3cc162013-09-16 15:58:09 +01003891 item = btrfs_item_nr(i);
Chris Masondb945352007-10-15 16:15:53 -04003892
Zheng Yan31840ae2008-09-23 13:14:14 -04003893 if (!empty && push_items > 0) {
3894 if (path->slots[0] < i)
3895 break;
3896 if (path->slots[0] == i) {
David Sterbae902baa2019-03-20 14:36:46 +01003897 int space = btrfs_leaf_free_space(right);
3898
Zheng Yan31840ae2008-09-23 13:14:14 -04003899 if (space + push_space * 2 > free_space)
3900 break;
3901 }
3902 }
3903
Chris Masonbe0e5c02007-01-26 15:51:26 -05003904 if (path->slots[0] == i)
Yan Zheng87b29b22008-12-17 10:21:48 -05003905 push_space += data_size;
Chris Masondb945352007-10-15 16:15:53 -04003906
3907 this_item_size = btrfs_item_size(right, item);
3908 if (this_item_size + sizeof(*item) + push_space > free_space)
Chris Masonbe0e5c02007-01-26 15:51:26 -05003909 break;
Chris Masondb945352007-10-15 16:15:53 -04003910
Chris Masonbe0e5c02007-01-26 15:51:26 -05003911 push_items++;
Chris Masondb945352007-10-15 16:15:53 -04003912 push_space += this_item_size + sizeof(*item);
Chris Masonbe0e5c02007-01-26 15:51:26 -05003913 }
Chris Masondb945352007-10-15 16:15:53 -04003914
Chris Masonbe0e5c02007-01-26 15:51:26 -05003915 if (push_items == 0) {
Chris Mason925baed2008-06-25 16:01:30 -04003916 ret = 1;
3917 goto out;
Chris Masonbe0e5c02007-01-26 15:51:26 -05003918 }
Dulshani Gunawardhanafae7f212013-10-31 10:30:08 +05303919 WARN_ON(!empty && push_items == btrfs_header_nritems(right));
Chris Mason5f39d392007-10-15 16:14:19 -04003920
Chris Masonbe0e5c02007-01-26 15:51:26 -05003921 /* push data from right to left */
Chris Mason5f39d392007-10-15 16:14:19 -04003922 copy_extent_buffer(left, right,
3923 btrfs_item_nr_offset(btrfs_header_nritems(left)),
3924 btrfs_item_nr_offset(0),
3925 push_items * sizeof(struct btrfs_item));
3926
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003927 push_space = BTRFS_LEAF_DATA_SIZE(fs_info) -
Chris Masond3977122009-01-05 21:25:51 -05003928 btrfs_item_offset_nr(right, push_items - 1);
Chris Mason5f39d392007-10-15 16:14:19 -04003929
Nikolay Borisov3d9ec8c2017-05-29 09:43:43 +03003930 copy_extent_buffer(left, right, BTRFS_LEAF_DATA_OFFSET +
David Sterba8f881e82019-03-20 11:33:10 +01003931 leaf_data_end(left) - push_space,
Nikolay Borisov3d9ec8c2017-05-29 09:43:43 +03003932 BTRFS_LEAF_DATA_OFFSET +
Chris Mason5f39d392007-10-15 16:14:19 -04003933 btrfs_item_offset_nr(right, push_items - 1),
Chris Masond6025572007-03-30 14:27:56 -04003934 push_space);
Chris Mason5f39d392007-10-15 16:14:19 -04003935 old_left_nritems = btrfs_header_nritems(left);
Yan Zheng87b29b22008-12-17 10:21:48 -05003936 BUG_ON(old_left_nritems <= 0);
Chris Masoneb60cea2007-02-02 09:18:22 -05003937
David Sterbac82f8232019-08-09 17:48:21 +02003938 btrfs_init_map_token(&token, left);
Chris Masondb945352007-10-15 16:15:53 -04003939 old_left_item_size = btrfs_item_offset_nr(left, old_left_nritems - 1);
Chris Mason0783fcf2007-03-12 20:12:07 -04003940 for (i = old_left_nritems; i < old_left_nritems + push_items; i++) {
Chris Mason5f39d392007-10-15 16:14:19 -04003941 u32 ioff;
Chris Masondb945352007-10-15 16:15:53 -04003942
Ross Kirkdd3cc162013-09-16 15:58:09 +01003943 item = btrfs_item_nr(i);
Chris Masondb945352007-10-15 16:15:53 -04003944
David Sterbacc4c13d2020-04-29 02:15:56 +02003945 ioff = btrfs_token_item_offset(&token, item);
3946 btrfs_set_token_item_offset(&token, item,
3947 ioff - (BTRFS_LEAF_DATA_SIZE(fs_info) - old_left_item_size));
Chris Masonbe0e5c02007-01-26 15:51:26 -05003948 }
Chris Mason5f39d392007-10-15 16:14:19 -04003949 btrfs_set_header_nritems(left, old_left_nritems + push_items);
Chris Masonbe0e5c02007-01-26 15:51:26 -05003950
3951 /* fixup right node */
Julia Lawall31b1a2b2012-11-03 10:58:34 +00003952 if (push_items > right_nritems)
3953 WARN(1, KERN_CRIT "push items %d nr %u\n", push_items,
Chris Masond3977122009-01-05 21:25:51 -05003954 right_nritems);
Chris Mason5f39d392007-10-15 16:14:19 -04003955
Chris Mason34a38212007-11-07 13:31:03 -05003956 if (push_items < right_nritems) {
3957 push_space = btrfs_item_offset_nr(right, push_items - 1) -
David Sterba8f881e82019-03-20 11:33:10 +01003958 leaf_data_end(right);
Nikolay Borisov3d9ec8c2017-05-29 09:43:43 +03003959 memmove_extent_buffer(right, BTRFS_LEAF_DATA_OFFSET +
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003960 BTRFS_LEAF_DATA_SIZE(fs_info) - push_space,
Nikolay Borisov3d9ec8c2017-05-29 09:43:43 +03003961 BTRFS_LEAF_DATA_OFFSET +
David Sterba8f881e82019-03-20 11:33:10 +01003962 leaf_data_end(right), push_space);
Chris Mason34a38212007-11-07 13:31:03 -05003963
3964 memmove_extent_buffer(right, btrfs_item_nr_offset(0),
Chris Mason5f39d392007-10-15 16:14:19 -04003965 btrfs_item_nr_offset(push_items),
3966 (btrfs_header_nritems(right) - push_items) *
3967 sizeof(struct btrfs_item));
Chris Mason34a38212007-11-07 13:31:03 -05003968 }
David Sterbac82f8232019-08-09 17:48:21 +02003969
3970 btrfs_init_map_token(&token, right);
Yaneef1c492007-11-26 10:58:13 -05003971 right_nritems -= push_items;
3972 btrfs_set_header_nritems(right, right_nritems);
Jeff Mahoney0b246af2016-06-22 18:54:23 -04003973 push_space = BTRFS_LEAF_DATA_SIZE(fs_info);
Chris Mason5f39d392007-10-15 16:14:19 -04003974 for (i = 0; i < right_nritems; i++) {
Ross Kirkdd3cc162013-09-16 15:58:09 +01003975 item = btrfs_item_nr(i);
Chris Masondb945352007-10-15 16:15:53 -04003976
David Sterbacc4c13d2020-04-29 02:15:56 +02003977 push_space = push_space - btrfs_token_item_size(&token, item);
3978 btrfs_set_token_item_offset(&token, item, push_space);
Chris Masondb945352007-10-15 16:15:53 -04003979 }
Chris Masoneb60cea2007-02-02 09:18:22 -05003980
Chris Mason5f39d392007-10-15 16:14:19 -04003981 btrfs_mark_buffer_dirty(left);
Chris Mason34a38212007-11-07 13:31:03 -05003982 if (right_nritems)
3983 btrfs_mark_buffer_dirty(right);
Yan, Zhengf0486c62010-05-16 10:46:25 -04003984 else
David Sterba6a884d7d2019-03-20 14:30:02 +01003985 btrfs_clean_tree_block(right);
Chris Mason098f59c2007-05-11 11:33:21 -04003986
Chris Mason5f39d392007-10-15 16:14:19 -04003987 btrfs_item_key(right, &disk_key, 0);
Nikolay Borisovb167fa92018-06-20 15:48:47 +03003988 fixup_low_keys(path, &disk_key, 1);
Chris Masonbe0e5c02007-01-26 15:51:26 -05003989
3990 /* then fixup the leaf pointer in the path */
3991 if (path->slots[0] < push_items) {
3992 path->slots[0] += old_left_nritems;
Chris Mason925baed2008-06-25 16:01:30 -04003993 btrfs_tree_unlock(path->nodes[0]);
Chris Mason5f39d392007-10-15 16:14:19 -04003994 free_extent_buffer(path->nodes[0]);
3995 path->nodes[0] = left;
Chris Masonbe0e5c02007-01-26 15:51:26 -05003996 path->slots[1] -= 1;
3997 } else {
Chris Mason925baed2008-06-25 16:01:30 -04003998 btrfs_tree_unlock(left);
Chris Mason5f39d392007-10-15 16:14:19 -04003999 free_extent_buffer(left);
Chris Masonbe0e5c02007-01-26 15:51:26 -05004000 path->slots[0] -= push_items;
4001 }
Chris Masoneb60cea2007-02-02 09:18:22 -05004002 BUG_ON(path->slots[0] < 0);
Chris Masonaa5d6be2007-02-28 16:35:06 -05004003 return ret;
Chris Mason925baed2008-06-25 16:01:30 -04004004out:
4005 btrfs_tree_unlock(left);
4006 free_extent_buffer(left);
4007 return ret;
Chris Masonbe0e5c02007-01-26 15:51:26 -05004008}
4009
Chris Mason74123bd2007-02-02 11:05:29 -05004010/*
Chris Mason44871b12009-03-13 10:04:31 -04004011 * push some data in the path leaf to the left, trying to free up at
4012 * least data_size bytes. returns zero if the push worked, nonzero otherwise
Chris Mason99d8f832010-07-07 10:51:48 -04004013 *
4014 * max_slot can put a limit on how far into the leaf we'll push items. The
4015 * item at 'max_slot' won't be touched. Use (u32)-1 to make us push all the
4016 * items
Chris Mason44871b12009-03-13 10:04:31 -04004017 */
4018static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
Chris Mason99d8f832010-07-07 10:51:48 -04004019 *root, struct btrfs_path *path, int min_data_size,
4020 int data_size, int empty, u32 max_slot)
Chris Mason44871b12009-03-13 10:04:31 -04004021{
4022 struct extent_buffer *right = path->nodes[0];
4023 struct extent_buffer *left;
4024 int slot;
4025 int free_space;
4026 u32 right_nritems;
4027 int ret = 0;
4028
4029 slot = path->slots[1];
4030 if (slot == 0)
4031 return 1;
4032 if (!path->nodes[1])
4033 return 1;
4034
4035 right_nritems = btrfs_header_nritems(right);
4036 if (right_nritems == 0)
4037 return 1;
4038
4039 btrfs_assert_tree_locked(path->nodes[1]);
4040
David Sterba4b231ae2019-08-21 19:16:27 +02004041 left = btrfs_read_node_slot(path->nodes[1], slot - 1);
Liu Bofb770ae2016-07-05 12:10:14 -07004042 /*
4043 * slot - 1 is not valid or we fail to read the left node,
4044 * no big deal, just return.
4045 */
4046 if (IS_ERR(left))
Tsutomu Itoh91ca3382011-01-05 02:32:22 +00004047 return 1;
4048
Josef Bacikbf774672020-08-20 11:46:04 -04004049 __btrfs_tree_lock(left, BTRFS_NESTING_LEFT);
David Sterba8bead252018-04-04 02:03:48 +02004050 btrfs_set_lock_blocking_write(left);
Chris Mason44871b12009-03-13 10:04:31 -04004051
David Sterbae902baa2019-03-20 14:36:46 +01004052 free_space = btrfs_leaf_free_space(left);
Chris Mason44871b12009-03-13 10:04:31 -04004053 if (free_space < data_size) {
4054 ret = 1;
4055 goto out;
4056 }
4057
4058 /* cow and double check */
4059 ret = btrfs_cow_block(trans, root, left,
Josef Bacik9631e4c2020-08-20 11:46:03 -04004060 path->nodes[1], slot - 1, &left,
Josef Bacikbf59a5a2020-08-20 11:46:05 -04004061 BTRFS_NESTING_LEFT_COW);
Chris Mason44871b12009-03-13 10:04:31 -04004062 if (ret) {
4063 /* we hit -ENOSPC, but it isn't fatal here */
Jeff Mahoney79787ea2012-03-12 16:03:00 +01004064 if (ret == -ENOSPC)
4065 ret = 1;
Chris Mason44871b12009-03-13 10:04:31 -04004066 goto out;
4067 }
4068
David Sterbae902baa2019-03-20 14:36:46 +01004069 free_space = btrfs_leaf_free_space(left);
Chris Mason44871b12009-03-13 10:04:31 -04004070 if (free_space < data_size) {
4071 ret = 1;
4072 goto out;
4073 }
4074
Qu Wenruod16c7022020-08-19 14:35:50 +08004075 if (check_sibling_keys(left, right)) {
4076 ret = -EUCLEAN;
4077 goto out;
4078 }
David Sterba8087c192019-03-20 14:40:41 +01004079 return __push_leaf_left(path, min_data_size,
Chris Mason99d8f832010-07-07 10:51:48 -04004080 empty, left, free_space, right_nritems,
4081 max_slot);
Chris Mason44871b12009-03-13 10:04:31 -04004082out:
4083 btrfs_tree_unlock(left);
4084 free_extent_buffer(left);
4085 return ret;
4086}
4087
4088/*
Chris Mason74123bd2007-02-02 11:05:29 -05004089 * split the path's leaf in two, making sure there is at least data_size
4090 * available for the resulting leaf level of the path.
4091 */
Jeff Mahoney143bede2012-03-01 14:56:26 +01004092static noinline void copy_for_split(struct btrfs_trans_handle *trans,
Jeff Mahoney143bede2012-03-01 14:56:26 +01004093 struct btrfs_path *path,
4094 struct extent_buffer *l,
4095 struct extent_buffer *right,
4096 int slot, int mid, int nritems)
Chris Masonbe0e5c02007-01-26 15:51:26 -05004097{
David Sterba94f94ad2019-03-20 14:42:33 +01004098 struct btrfs_fs_info *fs_info = trans->fs_info;
Chris Masonbe0e5c02007-01-26 15:51:26 -05004099 int data_copy_size;
4100 int rt_data_off;
4101 int i;
Chris Masond4dbff92007-04-04 14:08:15 -04004102 struct btrfs_disk_key disk_key;
Chris Masoncfed81a2012-03-03 07:40:03 -05004103 struct btrfs_map_token token;
4104
Chris Mason5f39d392007-10-15 16:14:19 -04004105 nritems = nritems - mid;
4106 btrfs_set_header_nritems(right, nritems);
David Sterba8f881e82019-03-20 11:33:10 +01004107 data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(l);
Chris Mason5f39d392007-10-15 16:14:19 -04004108
4109 copy_extent_buffer(right, l, btrfs_item_nr_offset(0),
4110 btrfs_item_nr_offset(mid),
4111 nritems * sizeof(struct btrfs_item));
4112
4113 copy_extent_buffer(right, l,
Nikolay Borisov3d9ec8c2017-05-29 09:43:43 +03004114 BTRFS_LEAF_DATA_OFFSET + BTRFS_LEAF_DATA_SIZE(fs_info) -
4115 data_copy_size, BTRFS_LEAF_DATA_OFFSET +
David Sterba8f881e82019-03-20 11:33:10 +01004116 leaf_data_end(l), data_copy_size);
Chris Mason74123bd2007-02-02 11:05:29 -05004117
Jeff Mahoney0b246af2016-06-22 18:54:23 -04004118 rt_data_off = BTRFS_LEAF_DATA_SIZE(fs_info) - btrfs_item_end_nr(l, mid);
Chris Mason5f39d392007-10-15 16:14:19 -04004119
David Sterbac82f8232019-08-09 17:48:21 +02004120 btrfs_init_map_token(&token, right);
Chris Mason5f39d392007-10-15 16:14:19 -04004121 for (i = 0; i < nritems; i++) {
Ross Kirkdd3cc162013-09-16 15:58:09 +01004122 struct btrfs_item *item = btrfs_item_nr(i);
Chris Masondb945352007-10-15 16:15:53 -04004123 u32 ioff;
4124
David Sterbacc4c13d2020-04-29 02:15:56 +02004125 ioff = btrfs_token_item_offset(&token, item);
4126 btrfs_set_token_item_offset(&token, item, ioff + rt_data_off);
Chris Mason0783fcf2007-03-12 20:12:07 -04004127 }
Chris Mason74123bd2007-02-02 11:05:29 -05004128
Chris Mason5f39d392007-10-15 16:14:19 -04004129 btrfs_set_header_nritems(l, mid);
Chris Mason5f39d392007-10-15 16:14:19 -04004130 btrfs_item_key(right, &disk_key, 0);
David Sterba6ad3cf62019-03-20 14:32:45 +01004131 insert_ptr(trans, path, &disk_key, right->start, path->slots[1] + 1, 1);
Chris Mason5f39d392007-10-15 16:14:19 -04004132
4133 btrfs_mark_buffer_dirty(right);
4134 btrfs_mark_buffer_dirty(l);
Chris Masoneb60cea2007-02-02 09:18:22 -05004135 BUG_ON(path->slots[0] != slot);
Chris Mason5f39d392007-10-15 16:14:19 -04004136
Chris Masonbe0e5c02007-01-26 15:51:26 -05004137 if (mid <= slot) {
Chris Mason925baed2008-06-25 16:01:30 -04004138 btrfs_tree_unlock(path->nodes[0]);
Chris Mason5f39d392007-10-15 16:14:19 -04004139 free_extent_buffer(path->nodes[0]);
4140 path->nodes[0] = right;
Chris Masonbe0e5c02007-01-26 15:51:26 -05004141 path->slots[0] -= mid;
4142 path->slots[1] += 1;
Chris Mason925baed2008-06-25 16:01:30 -04004143 } else {
4144 btrfs_tree_unlock(right);
Chris Mason5f39d392007-10-15 16:14:19 -04004145 free_extent_buffer(right);
Chris Mason925baed2008-06-25 16:01:30 -04004146 }
Chris Mason5f39d392007-10-15 16:14:19 -04004147
Chris Masoneb60cea2007-02-02 09:18:22 -05004148 BUG_ON(path->slots[0] < 0);
Chris Mason44871b12009-03-13 10:04:31 -04004149}
4150
4151/*
Chris Mason99d8f832010-07-07 10:51:48 -04004152 * double splits happen when we need to insert a big item in the middle
4153 * of a leaf. A double split can leave us with 3 mostly empty leaves:
4154 * leaf: [ slots 0 - N] [ our target ] [ N + 1 - total in leaf ]
4155 * A B C
4156 *
4157 * We avoid this by trying to push the items on either side of our target
4158 * into the adjacent leaves. If all goes well we can avoid the double split
4159 * completely.
4160 */
4161static noinline int push_for_double_split(struct btrfs_trans_handle *trans,
4162 struct btrfs_root *root,
4163 struct btrfs_path *path,
4164 int data_size)
4165{
4166 int ret;
4167 int progress = 0;
4168 int slot;
4169 u32 nritems;
Filipe David Borba Manana5a4267c2013-11-25 03:20:46 +00004170 int space_needed = data_size;
Chris Mason99d8f832010-07-07 10:51:48 -04004171
4172 slot = path->slots[0];
Filipe David Borba Manana5a4267c2013-11-25 03:20:46 +00004173 if (slot < btrfs_header_nritems(path->nodes[0]))
David Sterbae902baa2019-03-20 14:36:46 +01004174 space_needed -= btrfs_leaf_free_space(path->nodes[0]);
Chris Mason99d8f832010-07-07 10:51:48 -04004175
4176 /*
4177 * try to push all the items after our slot into the
4178 * right leaf
4179 */
Filipe David Borba Manana5a4267c2013-11-25 03:20:46 +00004180 ret = push_leaf_right(trans, root, path, 1, space_needed, 0, slot);
Chris Mason99d8f832010-07-07 10:51:48 -04004181 if (ret < 0)
4182 return ret;
4183
4184 if (ret == 0)
4185 progress++;
4186
4187 nritems = btrfs_header_nritems(path->nodes[0]);
4188 /*
4189 * our goal is to get our slot at the start or end of a leaf. If
4190 * we've done so we're done
4191 */
4192 if (path->slots[0] == 0 || path->slots[0] == nritems)
4193 return 0;
4194
David Sterbae902baa2019-03-20 14:36:46 +01004195 if (btrfs_leaf_free_space(path->nodes[0]) >= data_size)
Chris Mason99d8f832010-07-07 10:51:48 -04004196 return 0;
4197
4198 /* try to push all the items before our slot into the next leaf */
4199 slot = path->slots[0];
Filipe Manana263d3992017-02-17 18:43:57 +00004200 space_needed = data_size;
4201 if (slot > 0)
David Sterbae902baa2019-03-20 14:36:46 +01004202 space_needed -= btrfs_leaf_free_space(path->nodes[0]);
Filipe David Borba Manana5a4267c2013-11-25 03:20:46 +00004203 ret = push_leaf_left(trans, root, path, 1, space_needed, 0, slot);
Chris Mason99d8f832010-07-07 10:51:48 -04004204 if (ret < 0)
4205 return ret;
4206
4207 if (ret == 0)
4208 progress++;
4209
4210 if (progress)
4211 return 0;
4212 return 1;
4213}
4214
4215/*
Chris Mason44871b12009-03-13 10:04:31 -04004216 * split the path's leaf in two, making sure there is at least data_size
4217 * available for the resulting leaf level of the path.
4218 *
4219 * returns 0 if all went well and < 0 on failure.
4220 */
4221static noinline int split_leaf(struct btrfs_trans_handle *trans,
4222 struct btrfs_root *root,
Omar Sandoval310712b2017-01-17 23:24:37 -08004223 const struct btrfs_key *ins_key,
Chris Mason44871b12009-03-13 10:04:31 -04004224 struct btrfs_path *path, int data_size,
4225 int extend)
4226{
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004227 struct btrfs_disk_key disk_key;
Chris Mason44871b12009-03-13 10:04:31 -04004228 struct extent_buffer *l;
4229 u32 nritems;
4230 int mid;
4231 int slot;
4232 struct extent_buffer *right;
Daniel Dresslerb7a03652014-11-12 13:43:09 +09004233 struct btrfs_fs_info *fs_info = root->fs_info;
Chris Mason44871b12009-03-13 10:04:31 -04004234 int ret = 0;
4235 int wret;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004236 int split;
Chris Mason44871b12009-03-13 10:04:31 -04004237 int num_doubles = 0;
Chris Mason99d8f832010-07-07 10:51:48 -04004238 int tried_avoid_double = 0;
Chris Mason44871b12009-03-13 10:04:31 -04004239
Yan, Zhenga5719522009-09-24 09:17:31 -04004240 l = path->nodes[0];
4241 slot = path->slots[0];
4242 if (extend && data_size + btrfs_item_size_nr(l, slot) +
Jeff Mahoney0b246af2016-06-22 18:54:23 -04004243 sizeof(struct btrfs_item) > BTRFS_LEAF_DATA_SIZE(fs_info))
Yan, Zhenga5719522009-09-24 09:17:31 -04004244 return -EOVERFLOW;
4245
Chris Mason44871b12009-03-13 10:04:31 -04004246 /* first try to make some room by pushing left and right */
Liu Bo33157e02013-05-22 12:07:06 +00004247 if (data_size && path->nodes[1]) {
Filipe David Borba Manana5a4267c2013-11-25 03:20:46 +00004248 int space_needed = data_size;
4249
4250 if (slot < btrfs_header_nritems(l))
David Sterbae902baa2019-03-20 14:36:46 +01004251 space_needed -= btrfs_leaf_free_space(l);
Filipe David Borba Manana5a4267c2013-11-25 03:20:46 +00004252
4253 wret = push_leaf_right(trans, root, path, space_needed,
4254 space_needed, 0, 0);
Chris Mason44871b12009-03-13 10:04:31 -04004255 if (wret < 0)
4256 return wret;
4257 if (wret) {
Filipe Manana263d3992017-02-17 18:43:57 +00004258 space_needed = data_size;
4259 if (slot > 0)
David Sterbae902baa2019-03-20 14:36:46 +01004260 space_needed -= btrfs_leaf_free_space(l);
Filipe David Borba Manana5a4267c2013-11-25 03:20:46 +00004261 wret = push_leaf_left(trans, root, path, space_needed,
4262 space_needed, 0, (u32)-1);
Chris Mason44871b12009-03-13 10:04:31 -04004263 if (wret < 0)
4264 return wret;
4265 }
4266 l = path->nodes[0];
4267
4268 /* did the pushes work? */
David Sterbae902baa2019-03-20 14:36:46 +01004269 if (btrfs_leaf_free_space(l) >= data_size)
Chris Mason44871b12009-03-13 10:04:31 -04004270 return 0;
4271 }
4272
4273 if (!path->nodes[1]) {
Liu Bofdd99c72013-05-22 12:06:51 +00004274 ret = insert_new_root(trans, root, path, 1);
Chris Mason44871b12009-03-13 10:04:31 -04004275 if (ret)
4276 return ret;
4277 }
4278again:
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004279 split = 1;
Chris Mason44871b12009-03-13 10:04:31 -04004280 l = path->nodes[0];
4281 slot = path->slots[0];
4282 nritems = btrfs_header_nritems(l);
4283 mid = (nritems + 1) / 2;
4284
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004285 if (mid <= slot) {
4286 if (nritems == 1 ||
4287 leaf_space_used(l, mid, nritems - mid) + data_size >
Jeff Mahoney0b246af2016-06-22 18:54:23 -04004288 BTRFS_LEAF_DATA_SIZE(fs_info)) {
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004289 if (slot >= nritems) {
4290 split = 0;
4291 } else {
4292 mid = slot;
4293 if (mid != nritems &&
4294 leaf_space_used(l, mid, nritems - mid) +
Jeff Mahoney0b246af2016-06-22 18:54:23 -04004295 data_size > BTRFS_LEAF_DATA_SIZE(fs_info)) {
Chris Mason99d8f832010-07-07 10:51:48 -04004296 if (data_size && !tried_avoid_double)
4297 goto push_for_double;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004298 split = 2;
4299 }
4300 }
4301 }
4302 } else {
4303 if (leaf_space_used(l, 0, mid) + data_size >
Jeff Mahoney0b246af2016-06-22 18:54:23 -04004304 BTRFS_LEAF_DATA_SIZE(fs_info)) {
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004305 if (!extend && data_size && slot == 0) {
4306 split = 0;
4307 } else if ((extend || !data_size) && slot == 0) {
4308 mid = 1;
4309 } else {
4310 mid = slot;
4311 if (mid != nritems &&
4312 leaf_space_used(l, mid, nritems - mid) +
Jeff Mahoney0b246af2016-06-22 18:54:23 -04004313 data_size > BTRFS_LEAF_DATA_SIZE(fs_info)) {
Chris Mason99d8f832010-07-07 10:51:48 -04004314 if (data_size && !tried_avoid_double)
4315 goto push_for_double;
Dulshani Gunawardhana67871252013-10-31 10:33:04 +05304316 split = 2;
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004317 }
4318 }
4319 }
4320 }
4321
4322 if (split == 0)
4323 btrfs_cpu_key_to_disk(&disk_key, ins_key);
4324 else
4325 btrfs_item_key(l, &disk_key, mid);
4326
Josef Bacikca9d4732020-08-20 11:46:08 -04004327 /*
4328 * We have to about BTRFS_NESTING_NEW_ROOT here if we've done a double
4329 * split, because we're only allowed to have MAX_LOCKDEP_SUBCLASSES
4330 * subclasses, which is 8 at the time of this patch, and we've maxed it
4331 * out. In the future we could add a
4332 * BTRFS_NESTING_SPLIT_THE_SPLITTENING if we need to, but for now just
4333 * use BTRFS_NESTING_NEW_ROOT.
4334 */
Filipe Mananaa6279472019-01-25 11:48:51 +00004335 right = alloc_tree_block_no_bg_flush(trans, root, 0, &disk_key, 0,
Josef Bacikca9d4732020-08-20 11:46:08 -04004336 l->start, 0, num_doubles ?
4337 BTRFS_NESTING_NEW_ROOT :
4338 BTRFS_NESTING_SPLIT);
Yan, Zhengf0486c62010-05-16 10:46:25 -04004339 if (IS_ERR(right))
Chris Mason44871b12009-03-13 10:04:31 -04004340 return PTR_ERR(right);
Yan, Zhengf0486c62010-05-16 10:46:25 -04004341
Jeff Mahoney0b246af2016-06-22 18:54:23 -04004342 root_add_used(root, fs_info->nodesize);
Chris Mason44871b12009-03-13 10:04:31 -04004343
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004344 if (split == 0) {
4345 if (mid <= slot) {
4346 btrfs_set_header_nritems(right, 0);
David Sterba6ad3cf62019-03-20 14:32:45 +01004347 insert_ptr(trans, path, &disk_key,
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04004348 right->start, path->slots[1] + 1, 1);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004349 btrfs_tree_unlock(path->nodes[0]);
4350 free_extent_buffer(path->nodes[0]);
4351 path->nodes[0] = right;
4352 path->slots[0] = 0;
4353 path->slots[1] += 1;
4354 } else {
4355 btrfs_set_header_nritems(right, 0);
David Sterba6ad3cf62019-03-20 14:32:45 +01004356 insert_ptr(trans, path, &disk_key,
Jeff Mahoney2ff7e612016-06-22 18:54:24 -04004357 right->start, path->slots[1], 1);
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004358 btrfs_tree_unlock(path->nodes[0]);
4359 free_extent_buffer(path->nodes[0]);
4360 path->nodes[0] = right;
4361 path->slots[0] = 0;
Jeff Mahoney143bede2012-03-01 14:56:26 +01004362 if (path->slots[1] == 0)
Nikolay Borisovb167fa92018-06-20 15:48:47 +03004363 fixup_low_keys(path, &disk_key, 1);
Chris Mason44871b12009-03-13 10:04:31 -04004364 }
Liu Bo196e0242016-09-07 14:48:28 -07004365 /*
4366 * We create a new leaf 'right' for the required ins_len and
4367 * we'll do btrfs_mark_buffer_dirty() on this leaf after copying
4368 * the content of ins_len to 'right'.
4369 */
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004370 return ret;
Chris Mason44871b12009-03-13 10:04:31 -04004371 }
4372
David Sterba94f94ad2019-03-20 14:42:33 +01004373 copy_for_split(trans, path, l, right, slot, mid, nritems);
Chris Mason44871b12009-03-13 10:04:31 -04004374
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004375 if (split == 2) {
Chris Masoncc0c5532007-10-25 15:42:57 -04004376 BUG_ON(num_doubles != 0);
4377 num_doubles++;
4378 goto again;
Chris Mason3326d1b2007-10-15 16:18:25 -04004379 }
Chris Mason44871b12009-03-13 10:04:31 -04004380
Jeff Mahoney143bede2012-03-01 14:56:26 +01004381 return 0;
Chris Mason99d8f832010-07-07 10:51:48 -04004382
4383push_for_double:
4384 push_for_double_split(trans, root, path, data_size);
4385 tried_avoid_double = 1;
David Sterbae902baa2019-03-20 14:36:46 +01004386 if (btrfs_leaf_free_space(path->nodes[0]) >= data_size)
Chris Mason99d8f832010-07-07 10:51:48 -04004387 return 0;
4388 goto again;
Chris Masonbe0e5c02007-01-26 15:51:26 -05004389}
4390
Yan, Zhengad48fd752009-11-12 09:33:58 +00004391static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans,
4392 struct btrfs_root *root,
4393 struct btrfs_path *path, int ins_len)
Chris Mason459931e2008-12-10 09:10:46 -05004394{
Yan, Zhengad48fd752009-11-12 09:33:58 +00004395 struct btrfs_key key;
Chris Mason459931e2008-12-10 09:10:46 -05004396 struct extent_buffer *leaf;
Yan, Zhengad48fd752009-11-12 09:33:58 +00004397 struct btrfs_file_extent_item *fi;
4398 u64 extent_len = 0;
4399 u32 item_size;
4400 int ret;
Chris Mason459931e2008-12-10 09:10:46 -05004401
4402 leaf = path->nodes[0];
Yan, Zhengad48fd752009-11-12 09:33:58 +00004403 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
4404
4405 BUG_ON(key.type != BTRFS_EXTENT_DATA_KEY &&
4406 key.type != BTRFS_EXTENT_CSUM_KEY);
4407
David Sterbae902baa2019-03-20 14:36:46 +01004408 if (btrfs_leaf_free_space(leaf) >= ins_len)
Yan, Zhengad48fd752009-11-12 09:33:58 +00004409 return 0;
Chris Mason459931e2008-12-10 09:10:46 -05004410
4411 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
Yan, Zhengad48fd752009-11-12 09:33:58 +00004412 if (key.type == BTRFS_EXTENT_DATA_KEY) {
4413 fi = btrfs_item_ptr(leaf, path->slots[0],
4414 struct btrfs_file_extent_item);
4415 extent_len = btrfs_file_extent_num_bytes(leaf, fi);
4416 }
David Sterbab3b4aa72011-04-21 01:20:15 +02004417 btrfs_release_path(path);
Chris Mason459931e2008-12-10 09:10:46 -05004418
Chris Mason459931e2008-12-10 09:10:46 -05004419 path->keep_locks = 1;
Yan, Zhengad48fd752009-11-12 09:33:58 +00004420 path->search_for_split = 1;
4421 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
Chris Mason459931e2008-12-10 09:10:46 -05004422 path->search_for_split = 0;
Filipe Mananaa8df6fe2015-01-20 12:40:53 +00004423 if (ret > 0)
4424 ret = -EAGAIN;
Yan, Zhengad48fd752009-11-12 09:33:58 +00004425 if (ret < 0)
4426 goto err;
Chris Mason459931e2008-12-10 09:10:46 -05004427
Yan, Zhengad48fd752009-11-12 09:33:58 +00004428 ret = -EAGAIN;
4429 leaf = path->nodes[0];
Filipe Mananaa8df6fe2015-01-20 12:40:53 +00004430 /* if our item isn't there, return now */
4431 if (item_size != btrfs_item_size_nr(leaf, path->slots[0]))
Yan, Zhengad48fd752009-11-12 09:33:58 +00004432 goto err;
4433
Chris Mason109f6ae2010-04-02 09:20:18 -04004434 /* the leaf has changed, it now has room. return now */
David Sterbae902baa2019-03-20 14:36:46 +01004435 if (btrfs_leaf_free_space(path->nodes[0]) >= ins_len)
Chris Mason109f6ae2010-04-02 09:20:18 -04004436 goto err;
4437
Yan, Zhengad48fd752009-11-12 09:33:58 +00004438 if (key.type == BTRFS_EXTENT_DATA_KEY) {
4439 fi = btrfs_item_ptr(leaf, path->slots[0],
4440 struct btrfs_file_extent_item);
4441 if (extent_len != btrfs_file_extent_num_bytes(leaf, fi))
4442 goto err;
Chris Mason459931e2008-12-10 09:10:46 -05004443 }
4444
Chris Masonb9473432009-03-13 11:00:37 -04004445 btrfs_set_path_blocking(path);
Yan, Zhengad48fd752009-11-12 09:33:58 +00004446 ret = split_leaf(trans, root, &key, path, ins_len, 1);
Yan, Zhengf0486c62010-05-16 10:46:25 -04004447 if (ret)
4448 goto err;
Chris Mason459931e2008-12-10 09:10:46 -05004449
Yan, Zhengad48fd752009-11-12 09:33:58 +00004450 path->keep_locks = 0;
Chris Masonb9473432009-03-13 11:00:37 -04004451 btrfs_unlock_up_safe(path, 1);
Yan, Zhengad48fd752009-11-12 09:33:58 +00004452 return 0;
4453err:
4454 path->keep_locks = 0;
4455 return ret;
4456}
4457
David Sterba25263cd2019-03-20 14:44:57 +01004458static noinline int split_item(struct btrfs_path *path,
Omar Sandoval310712b2017-01-17 23:24:37 -08004459 const struct btrfs_key *new_key,
Yan, Zhengad48fd752009-11-12 09:33:58 +00004460 unsigned long split_offset)
4461{
4462 struct extent_buffer *leaf;
4463 struct btrfs_item *item;
4464 struct btrfs_item *new_item;
4465 int slot;
4466 char *buf;
4467 u32 nritems;
4468 u32 item_size;
4469 u32 orig_offset;
4470 struct btrfs_disk_key disk_key;
4471
Chris Masonb9473432009-03-13 11:00:37 -04004472 leaf = path->nodes[0];
David Sterbae902baa2019-03-20 14:36:46 +01004473 BUG_ON(btrfs_leaf_free_space(leaf) < sizeof(struct btrfs_item));
Chris Masonb9473432009-03-13 11:00:37 -04004474
Chris Masonb4ce94d2009-02-04 09:25:08 -05004475 btrfs_set_path_blocking(path);
4476
Ross Kirkdd3cc162013-09-16 15:58:09 +01004477 item = btrfs_item_nr(path->slots[0]);
Chris Mason459931e2008-12-10 09:10:46 -05004478 orig_offset = btrfs_item_offset(leaf, item);
4479 item_size = btrfs_item_size(leaf, item);
4480
Chris Mason459931e2008-12-10 09:10:46 -05004481 buf = kmalloc(item_size, GFP_NOFS);
Yan, Zhengad48fd752009-11-12 09:33:58 +00004482 if (!buf)
4483 return -ENOMEM;
4484
Chris Mason459931e2008-12-10 09:10:46 -05004485 read_extent_buffer(leaf, buf, btrfs_item_ptr_offset(leaf,
4486 path->slots[0]), item_size);
Yan, Zhengad48fd752009-11-12 09:33:58 +00004487
Chris Mason459931e2008-12-10 09:10:46 -05004488 slot = path->slots[0] + 1;
Chris Mason459931e2008-12-10 09:10:46 -05004489 nritems = btrfs_header_nritems(leaf);
Chris Mason459931e2008-12-10 09:10:46 -05004490 if (slot != nritems) {
4491 /* shift the items */
4492 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + 1),
Yan, Zhengad48fd752009-11-12 09:33:58 +00004493 btrfs_item_nr_offset(slot),
4494 (nritems - slot) * sizeof(struct btrfs_item));
Chris Mason459931e2008-12-10 09:10:46 -05004495 }
4496
4497 btrfs_cpu_key_to_disk(&disk_key, new_key);
4498 btrfs_set_item_key(leaf, &disk_key, slot);
4499
Ross Kirkdd3cc162013-09-16 15:58:09 +01004500 new_item = btrfs_item_nr(slot);
Chris Mason459931e2008-12-10 09:10:46 -05004501
4502 btrfs_set_item_offset(leaf, new_item, orig_offset);
4503 btrfs_set_item_size(leaf, new_item, item_size - split_offset);
4504
4505 btrfs_set_item_offset(leaf, item,
4506 orig_offset + item_size - split_offset);
4507 btrfs_set_item_size(leaf, item, split_offset);
4508
4509 btrfs_set_header_nritems(leaf, nritems + 1);
4510
4511 /* write the data for the start of the original item */
4512 write_extent_buffer(leaf, buf,
4513 btrfs_item_ptr_offset(leaf, path->slots[0]),
4514 split_offset);
4515
4516 /* write the data for the new item */
4517 write_extent_buffer(leaf, buf + split_offset,
4518 btrfs_item_ptr_offset(leaf, slot),
4519 item_size - split_offset);
4520 btrfs_mark_buffer_dirty(leaf);
4521
David Sterbae902baa2019-03-20 14:36:46 +01004522 BUG_ON(btrfs_leaf_free_space(leaf) < 0);
Chris Mason459931e2008-12-10 09:10:46 -05004523 kfree(buf);
Yan, Zhengad48fd752009-11-12 09:33:58 +00004524 return 0;
4525}
4526
4527/*
4528 * This function splits a single item into two items,
4529 * giving 'new_key' to the new item and splitting the
4530 * old one at split_offset (from the start of the item).
4531 *
4532 * The path may be released by this operation. After
4533 * the split, the path is pointing to the old item. The
4534 * new item is going to be in the same node as the old one.
4535 *
4536 * Note, the item being split must be smaller enough to live alone on
4537 * a tree block with room for one extra struct btrfs_item
4538 *
4539 * This allows us to split the item in place, keeping a lock on the
4540 * leaf the entire time.
4541 */
4542int btrfs_split_item(struct btrfs_trans_handle *trans,
4543 struct btrfs_root *root,
4544 struct btrfs_path *path,
Omar Sandoval310712b2017-01-17 23:24:37 -08004545 const struct btrfs_key *new_key,
Yan, Zhengad48fd752009-11-12 09:33:58 +00004546 unsigned long split_offset)
4547{
4548 int ret;
4549 ret = setup_leaf_for_split(trans, root, path,
4550 sizeof(struct btrfs_item));
4551 if (ret)
4552 return ret;
4553
David Sterba25263cd2019-03-20 14:44:57 +01004554 ret = split_item(path, new_key, split_offset);
Chris Mason459931e2008-12-10 09:10:46 -05004555 return ret;
4556}
4557
4558/*
Yan, Zhengad48fd752009-11-12 09:33:58 +00004559 * This function duplicate a item, giving 'new_key' to the new item.
4560 * It guarantees both items live in the same tree leaf and the new item
4561 * is contiguous with the original item.
4562 *
4563 * This allows us to split file extent in place, keeping a lock on the
4564 * leaf the entire time.
4565 */
4566int btrfs_duplicate_item(struct btrfs_trans_handle *trans,
4567 struct btrfs_root *root,
4568 struct btrfs_path *path,
Omar Sandoval310712b2017-01-17 23:24:37 -08004569 const struct btrfs_key *new_key)
Yan, Zhengad48fd752009-11-12 09:33:58 +00004570{
4571 struct extent_buffer *leaf;
4572 int ret;
4573 u32 item_size;
4574
4575 leaf = path->nodes[0];
4576 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
4577 ret = setup_leaf_for_split(trans, root, path,
4578 item_size + sizeof(struct btrfs_item));
4579 if (ret)
4580 return ret;
4581
4582 path->slots[0]++;
Nikolay Borisovfc0d82e2020-09-01 17:39:59 +03004583 setup_items_for_insert(root, path, new_key, &item_size, 1);
Yan, Zhengad48fd752009-11-12 09:33:58 +00004584 leaf = path->nodes[0];
4585 memcpy_extent_buffer(leaf,
4586 btrfs_item_ptr_offset(leaf, path->slots[0]),
4587 btrfs_item_ptr_offset(leaf, path->slots[0] - 1),
4588 item_size);
4589 return 0;
4590}
4591
4592/*
Chris Masond352ac62008-09-29 15:18:18 -04004593 * make the item pointed to by the path smaller. new_size indicates
4594 * how small to make it, and from_end tells us if we just chop bytes
4595 * off the end of the item or if we shift the item to chop bytes off
4596 * the front.
4597 */
David Sterba78ac4f92019-03-20 14:49:12 +01004598void btrfs_truncate_item(struct btrfs_path *path, u32 new_size, int from_end)
Chris Masonb18c6682007-04-17 13:26:50 -04004599{
Chris Masonb18c6682007-04-17 13:26:50 -04004600 int slot;
Chris Mason5f39d392007-10-15 16:14:19 -04004601 struct extent_buffer *leaf;
4602 struct btrfs_item *item;
Chris Masonb18c6682007-04-17 13:26:50 -04004603 u32 nritems;
4604 unsigned int data_end;
4605 unsigned int old_data_start;
4606 unsigned int old_size;
4607 unsigned int size_diff;
4608 int i;
Chris Masoncfed81a2012-03-03 07:40:03 -05004609 struct btrfs_map_token token;
4610
Chris Mason5f39d392007-10-15 16:14:19 -04004611 leaf = path->nodes[0];
Chris Mason179e29e2007-11-01 11:28:41 -04004612 slot = path->slots[0];
4613
4614 old_size = btrfs_item_size_nr(leaf, slot);
4615 if (old_size == new_size)
Jeff Mahoney143bede2012-03-01 14:56:26 +01004616 return;
Chris Masonb18c6682007-04-17 13:26:50 -04004617
Chris Mason5f39d392007-10-15 16:14:19 -04004618 nritems = btrfs_header_nritems(leaf);
David Sterba8f881e82019-03-20 11:33:10 +01004619 data_end = leaf_data_end(leaf);
Chris Masonb18c6682007-04-17 13:26:50 -04004620
Chris Mason5f39d392007-10-15 16:14:19 -04004621 old_data_start = btrfs_item_offset_nr(leaf, slot);
Chris Mason179e29e2007-11-01 11:28:41 -04004622
Chris Masonb18c6682007-04-17 13:26:50 -04004623 size_diff = old_size - new_size;
4624
4625 BUG_ON(slot < 0);
4626 BUG_ON(slot >= nritems);
4627
4628 /*
4629 * item0..itemN ... dataN.offset..dataN.size .. data0.size
4630 */
4631 /* first correct the data pointers */
David Sterbac82f8232019-08-09 17:48:21 +02004632 btrfs_init_map_token(&token, leaf);
Chris Masonb18c6682007-04-17 13:26:50 -04004633 for (i = slot; i < nritems; i++) {
Chris Mason5f39d392007-10-15 16:14:19 -04004634 u32 ioff;
Ross Kirkdd3cc162013-09-16 15:58:09 +01004635 item = btrfs_item_nr(i);
Chris Masondb945352007-10-15 16:15:53 -04004636
David Sterbacc4c13d2020-04-29 02:15:56 +02004637 ioff = btrfs_token_item_offset(&token, item);
4638 btrfs_set_token_item_offset(&token, item, ioff + size_diff);
Chris Masonb18c6682007-04-17 13:26:50 -04004639 }
Chris Masondb945352007-10-15 16:15:53 -04004640
Chris Masonb18c6682007-04-17 13:26:50 -04004641 /* shift the data */
Chris Mason179e29e2007-11-01 11:28:41 -04004642 if (from_end) {
Nikolay Borisov3d9ec8c2017-05-29 09:43:43 +03004643 memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET +
4644 data_end + size_diff, BTRFS_LEAF_DATA_OFFSET +
Chris Mason179e29e2007-11-01 11:28:41 -04004645 data_end, old_data_start + new_size - data_end);
4646 } else {
4647 struct btrfs_disk_key disk_key;
4648 u64 offset;
4649
4650 btrfs_item_key(leaf, &disk_key, slot);
4651
4652 if (btrfs_disk_key_type(&disk_key) == BTRFS_EXTENT_DATA_KEY) {
4653 unsigned long ptr;
4654 struct btrfs_file_extent_item *fi;
4655
4656 fi = btrfs_item_ptr(leaf, slot,
4657 struct btrfs_file_extent_item);
4658 fi = (struct btrfs_file_extent_item *)(
4659 (unsigned long)fi - size_diff);
4660
4661 if (btrfs_file_extent_type(leaf, fi) ==
4662 BTRFS_FILE_EXTENT_INLINE) {
4663 ptr = btrfs_item_ptr_offset(leaf, slot);
4664 memmove_extent_buffer(leaf, ptr,
Chris Masond3977122009-01-05 21:25:51 -05004665 (unsigned long)fi,
David Sterba7ec20af2014-07-24 17:34:58 +02004666 BTRFS_FILE_EXTENT_INLINE_DATA_START);
Chris Mason179e29e2007-11-01 11:28:41 -04004667 }
4668 }
4669
Nikolay Borisov3d9ec8c2017-05-29 09:43:43 +03004670 memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET +
4671 data_end + size_diff, BTRFS_LEAF_DATA_OFFSET +
Chris Mason179e29e2007-11-01 11:28:41 -04004672 data_end, old_data_start - data_end);
4673
4674 offset = btrfs_disk_key_offset(&disk_key);
4675 btrfs_set_disk_key_offset(&disk_key, offset + size_diff);
4676 btrfs_set_item_key(leaf, &disk_key, slot);
4677 if (slot == 0)
Nikolay Borisovb167fa92018-06-20 15:48:47 +03004678 fixup_low_keys(path, &disk_key, 1);
Chris Mason179e29e2007-11-01 11:28:41 -04004679 }
Chris Mason5f39d392007-10-15 16:14:19 -04004680
Ross Kirkdd3cc162013-09-16 15:58:09 +01004681 item = btrfs_item_nr(slot);
Chris Mason5f39d392007-10-15 16:14:19 -04004682 btrfs_set_item_size(leaf, item, new_size);
4683 btrfs_mark_buffer_dirty(leaf);
Chris Masonb18c6682007-04-17 13:26:50 -04004684
David Sterbae902baa2019-03-20 14:36:46 +01004685 if (btrfs_leaf_free_space(leaf) < 0) {
David Sterbaa4f78752017-06-29 18:37:49 +02004686 btrfs_print_leaf(leaf);
Chris Masonb18c6682007-04-17 13:26:50 -04004687 BUG();
Chris Mason5f39d392007-10-15 16:14:19 -04004688 }
Chris Masonb18c6682007-04-17 13:26:50 -04004689}
4690
Chris Masond352ac62008-09-29 15:18:18 -04004691/*
Stefan Behrens8f69dbd2013-05-07 10:23:30 +00004692 * make the item pointed to by the path bigger, data_size is the added size.
Chris Masond352ac62008-09-29 15:18:18 -04004693 */
David Sterbac71dd882019-03-20 14:51:10 +01004694void btrfs_extend_item(struct btrfs_path *path, u32 data_size)
Chris Mason6567e832007-04-16 09:22:45 -04004695{
Chris Mason6567e832007-04-16 09:22:45 -04004696 int slot;
Chris Mason5f39d392007-10-15 16:14:19 -04004697 struct extent_buffer *leaf;
4698 struct btrfs_item *item;
Chris Mason6567e832007-04-16 09:22:45 -04004699 u32 nritems;
4700 unsigned int data_end;
4701 unsigned int old_data;
4702 unsigned int old_size;
4703 int i;
Chris Masoncfed81a2012-03-03 07:40:03 -05004704 struct btrfs_map_token token;
4705
Chris Mason5f39d392007-10-15 16:14:19 -04004706 leaf = path->nodes[0];
Chris Mason6567e832007-04-16 09:22:45 -04004707
Chris Mason5f39d392007-10-15 16:14:19 -04004708 nritems = btrfs_header_nritems(leaf);
David Sterba8f881e82019-03-20 11:33:10 +01004709 data_end = leaf_data_end(leaf);
Chris Mason6567e832007-04-16 09:22:45 -04004710
David Sterbae902baa2019-03-20 14:36:46 +01004711 if (btrfs_leaf_free_space(leaf) < data_size) {
David Sterbaa4f78752017-06-29 18:37:49 +02004712 btrfs_print_leaf(leaf);
Chris Mason6567e832007-04-16 09:22:45 -04004713 BUG();
Chris Mason5f39d392007-10-15 16:14:19 -04004714 }
Chris Mason6567e832007-04-16 09:22:45 -04004715 slot = path->slots[0];
Chris Mason5f39d392007-10-15 16:14:19 -04004716 old_data = btrfs_item_end_nr(leaf, slot);
Chris Mason6567e832007-04-16 09:22:45 -04004717
4718 BUG_ON(slot < 0);
Chris Mason3326d1b2007-10-15 16:18:25 -04004719 if (slot >= nritems) {
David Sterbaa4f78752017-06-29 18:37:49 +02004720 btrfs_print_leaf(leaf);
David Sterbac71dd882019-03-20 14:51:10 +01004721 btrfs_crit(leaf->fs_info, "slot %d too large, nritems %d",
Jeff Mahoney0b246af2016-06-22 18:54:23 -04004722 slot, nritems);
Arnd Bergmann290342f2019-03-25 14:02:25 +01004723 BUG();
Chris Mason3326d1b2007-10-15 16:18:25 -04004724 }
Chris Mason6567e832007-04-16 09:22:45 -04004725
4726 /*
4727 * item0..itemN ... dataN.offset..dataN.size .. data0.size
4728 */
4729 /* first correct the data pointers */
David Sterbac82f8232019-08-09 17:48:21 +02004730 btrfs_init_map_token(&token, leaf);
Chris Mason6567e832007-04-16 09:22:45 -04004731 for (i = slot; i < nritems; i++) {
Chris Mason5f39d392007-10-15 16:14:19 -04004732 u32 ioff;
Ross Kirkdd3cc162013-09-16 15:58:09 +01004733 item = btrfs_item_nr(i);
Chris Masondb945352007-10-15 16:15:53 -04004734
David Sterbacc4c13d2020-04-29 02:15:56 +02004735 ioff = btrfs_token_item_offset(&token, item);
4736 btrfs_set_token_item_offset(&token, item, ioff - data_size);
Chris Mason6567e832007-04-16 09:22:45 -04004737 }
Chris Mason5f39d392007-10-15 16:14:19 -04004738
Chris Mason6567e832007-04-16 09:22:45 -04004739 /* shift the data */
Nikolay Borisov3d9ec8c2017-05-29 09:43:43 +03004740 memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET +
4741 data_end - data_size, BTRFS_LEAF_DATA_OFFSET +
Chris Mason6567e832007-04-16 09:22:45 -04004742 data_end, old_data - data_end);
Chris Mason5f39d392007-10-15 16:14:19 -04004743
Chris Mason6567e832007-04-16 09:22:45 -04004744 data_end = old_data;
Chris Mason5f39d392007-10-15 16:14:19 -04004745 old_size = btrfs_item_size_nr(leaf, slot);
Ross Kirkdd3cc162013-09-16 15:58:09 +01004746 item = btrfs_item_nr(slot);
Chris Mason5f39d392007-10-15 16:14:19 -04004747 btrfs_set_item_size(leaf, item, old_size + data_size);
4748 btrfs_mark_buffer_dirty(leaf);
Chris Mason6567e832007-04-16 09:22:45 -04004749
David Sterbae902baa2019-03-20 14:36:46 +01004750 if (btrfs_leaf_free_space(leaf) < 0) {
David Sterbaa4f78752017-06-29 18:37:49 +02004751 btrfs_print_leaf(leaf);
Chris Mason6567e832007-04-16 09:22:45 -04004752 BUG();
Chris Mason5f39d392007-10-15 16:14:19 -04004753 }
Chris Mason6567e832007-04-16 09:22:45 -04004754}
4755
Chris Mason74123bd2007-02-02 11:05:29 -05004756/*
Chris Mason44871b12009-03-13 10:04:31 -04004757 * this is a helper for btrfs_insert_empty_items, the main goal here is
4758 * to save stack depth by doing the bulk of the work in a function
4759 * that doesn't call btrfs_search_slot
Chris Mason74123bd2007-02-02 11:05:29 -05004760 */
Tsutomu Itohafe5fea2013-04-16 05:18:22 +00004761void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
Omar Sandoval310712b2017-01-17 23:24:37 -08004762 const struct btrfs_key *cpu_key, u32 *data_size,
Nikolay Borisovfc0d82e2020-09-01 17:39:59 +03004763 int nr)
Chris Masonbe0e5c02007-01-26 15:51:26 -05004764{
Jeff Mahoney0b246af2016-06-22 18:54:23 -04004765 struct btrfs_fs_info *fs_info = root->fs_info;
Chris Mason5f39d392007-10-15 16:14:19 -04004766 struct btrfs_item *item;
Chris Mason9c583092008-01-29 15:15:18 -05004767 int i;
Chris Mason7518a232007-03-12 12:01:18 -04004768 u32 nritems;
Chris Masonbe0e5c02007-01-26 15:51:26 -05004769 unsigned int data_end;
Chris Masone2fa7222007-03-12 16:22:34 -04004770 struct btrfs_disk_key disk_key;
Chris Mason44871b12009-03-13 10:04:31 -04004771 struct extent_buffer *leaf;
4772 int slot;
Chris Masoncfed81a2012-03-03 07:40:03 -05004773 struct btrfs_map_token token;
Nikolay Borisovfc0d82e2020-09-01 17:39:59 +03004774 u32 total_size;
4775 u32 total_data = 0;
4776
4777 for (i = 0; i < nr; i++)
4778 total_data += data_size[i];
4779 total_size = total_data + (nr * sizeof(struct btrfs_item));
Chris Masoncfed81a2012-03-03 07:40:03 -05004780
Filipe Manana24cdc842014-07-28 19:34:35 +01004781 if (path->slots[0] == 0) {
4782 btrfs_cpu_key_to_disk(&disk_key, cpu_key);
Nikolay Borisovb167fa92018-06-20 15:48:47 +03004783 fixup_low_keys(path, &disk_key, 1);
Filipe Manana24cdc842014-07-28 19:34:35 +01004784 }
4785 btrfs_unlock_up_safe(path, 1);
4786
Chris Mason5f39d392007-10-15 16:14:19 -04004787 leaf = path->nodes[0];
Chris Mason44871b12009-03-13 10:04:31 -04004788 slot = path->slots[0];
Chris Mason74123bd2007-02-02 11:05:29 -05004789
Chris Mason5f39d392007-10-15 16:14:19 -04004790 nritems = btrfs_header_nritems(leaf);
David Sterba8f881e82019-03-20 11:33:10 +01004791 data_end = leaf_data_end(leaf);
Chris Masoneb60cea2007-02-02 09:18:22 -05004792
David Sterbae902baa2019-03-20 14:36:46 +01004793 if (btrfs_leaf_free_space(leaf) < total_size) {
David Sterbaa4f78752017-06-29 18:37:49 +02004794 btrfs_print_leaf(leaf);
Jeff Mahoney0b246af2016-06-22 18:54:23 -04004795 btrfs_crit(fs_info, "not enough freespace need %u have %d",
David Sterbae902baa2019-03-20 14:36:46 +01004796 total_size, btrfs_leaf_free_space(leaf));
Chris Masonbe0e5c02007-01-26 15:51:26 -05004797 BUG();
Chris Masond4dbff92007-04-04 14:08:15 -04004798 }
Chris Mason5f39d392007-10-15 16:14:19 -04004799
David Sterbac82f8232019-08-09 17:48:21 +02004800 btrfs_init_map_token(&token, leaf);
Chris Masonbe0e5c02007-01-26 15:51:26 -05004801 if (slot != nritems) {
Chris Mason5f39d392007-10-15 16:14:19 -04004802 unsigned int old_data = btrfs_item_end_nr(leaf, slot);
Chris Masonbe0e5c02007-01-26 15:51:26 -05004803
Chris Mason5f39d392007-10-15 16:14:19 -04004804 if (old_data < data_end) {
David Sterbaa4f78752017-06-29 18:37:49 +02004805 btrfs_print_leaf(leaf);
Jeff Mahoney0b246af2016-06-22 18:54:23 -04004806 btrfs_crit(fs_info, "slot %d old_data %d data_end %d",
Jeff Mahoney5d163e02016-09-20 10:05:00 -04004807 slot, old_data, data_end);
Arnd Bergmann290342f2019-03-25 14:02:25 +01004808 BUG();
Chris Mason5f39d392007-10-15 16:14:19 -04004809 }
Chris Masonbe0e5c02007-01-26 15:51:26 -05004810 /*
4811 * item0..itemN ... dataN.offset..dataN.size .. data0.size
4812 */
4813 /* first correct the data pointers */
Chris Mason0783fcf2007-03-12 20:12:07 -04004814 for (i = slot; i < nritems; i++) {
Chris Mason5f39d392007-10-15 16:14:19 -04004815 u32 ioff;
Chris Masondb945352007-10-15 16:15:53 -04004816
Jeff Mahoney62e85572016-09-20 10:05:01 -04004817 item = btrfs_item_nr(i);
David Sterbacc4c13d2020-04-29 02:15:56 +02004818 ioff = btrfs_token_item_offset(&token, item);
4819 btrfs_set_token_item_offset(&token, item,
4820 ioff - total_data);
Chris Mason0783fcf2007-03-12 20:12:07 -04004821 }
Chris Masonbe0e5c02007-01-26 15:51:26 -05004822 /* shift the items */
Chris Mason9c583092008-01-29 15:15:18 -05004823 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr),
Chris Mason5f39d392007-10-15 16:14:19 -04004824 btrfs_item_nr_offset(slot),
Chris Masond6025572007-03-30 14:27:56 -04004825 (nritems - slot) * sizeof(struct btrfs_item));
Chris Masonbe0e5c02007-01-26 15:51:26 -05004826
4827 /* shift the data */
Nikolay Borisov3d9ec8c2017-05-29 09:43:43 +03004828 memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET +
4829 data_end - total_data, BTRFS_LEAF_DATA_OFFSET +
Chris Masond6025572007-03-30 14:27:56 -04004830 data_end, old_data - data_end);
Chris Masonbe0e5c02007-01-26 15:51:26 -05004831 data_end = old_data;
4832 }
Chris Mason5f39d392007-10-15 16:14:19 -04004833
Chris Mason62e27492007-03-15 12:56:47 -04004834 /* setup the item for the new data */
Chris Mason9c583092008-01-29 15:15:18 -05004835 for (i = 0; i < nr; i++) {
4836 btrfs_cpu_key_to_disk(&disk_key, cpu_key + i);
4837 btrfs_set_item_key(leaf, &disk_key, slot + i);
Ross Kirkdd3cc162013-09-16 15:58:09 +01004838 item = btrfs_item_nr(slot + i);
Chris Mason9c583092008-01-29 15:15:18 -05004839 data_end -= data_size[i];
Nikolay Borisovfc0716c2020-09-01 17:39:57 +03004840 btrfs_set_token_item_offset(&token, item, data_end);
David Sterbacc4c13d2020-04-29 02:15:56 +02004841 btrfs_set_token_item_size(&token, item, data_size[i]);
Chris Mason9c583092008-01-29 15:15:18 -05004842 }
Chris Mason44871b12009-03-13 10:04:31 -04004843
Chris Mason9c583092008-01-29 15:15:18 -05004844 btrfs_set_header_nritems(leaf, nritems + nr);
Chris Masonb9473432009-03-13 11:00:37 -04004845 btrfs_mark_buffer_dirty(leaf);
Chris Masonaa5d6be2007-02-28 16:35:06 -05004846
David Sterbae902baa2019-03-20 14:36:46 +01004847 if (btrfs_leaf_free_space(leaf) < 0) {
David Sterbaa4f78752017-06-29 18:37:49 +02004848 btrfs_print_leaf(leaf);
Chris Masonbe0e5c02007-01-26 15:51:26 -05004849 BUG();
Chris Mason5f39d392007-10-15 16:14:19 -04004850 }
Chris Mason44871b12009-03-13 10:04:31 -04004851}
4852
4853/*
4854 * Given a key and some data, insert items into the tree.
4855 * This does all the path init required, making room in the tree if needed.
4856 */
4857int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
4858 struct btrfs_root *root,
4859 struct btrfs_path *path,
Omar Sandoval310712b2017-01-17 23:24:37 -08004860 const struct btrfs_key *cpu_key, u32 *data_size,
Chris Mason44871b12009-03-13 10:04:31 -04004861 int nr)
4862{
Chris Mason44871b12009-03-13 10:04:31 -04004863 int ret = 0;
4864 int slot;
4865 int i;
4866 u32 total_size = 0;
4867 u32 total_data = 0;
4868
4869 for (i = 0; i < nr; i++)
4870 total_data += data_size[i];
4871
4872 total_size = total_data + (nr * sizeof(struct btrfs_item));
4873 ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1);
4874 if (ret == 0)
4875 return -EEXIST;
4876 if (ret < 0)
Jeff Mahoney143bede2012-03-01 14:56:26 +01004877 return ret;
Chris Mason44871b12009-03-13 10:04:31 -04004878
Chris Mason44871b12009-03-13 10:04:31 -04004879 slot = path->slots[0];
4880 BUG_ON(slot < 0);
4881
Nikolay Borisovfc0d82e2020-09-01 17:39:59 +03004882 setup_items_for_insert(root, path, cpu_key, data_size, nr);
Jeff Mahoney143bede2012-03-01 14:56:26 +01004883 return 0;
Chris Mason62e27492007-03-15 12:56:47 -04004884}
4885
4886/*
4887 * Given a key and some data, insert an item into the tree.
4888 * This does all the path init required, making room in the tree if needed.
4889 */
Omar Sandoval310712b2017-01-17 23:24:37 -08004890int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root *root,
4891 const struct btrfs_key *cpu_key, void *data,
4892 u32 data_size)
Chris Mason62e27492007-03-15 12:56:47 -04004893{
4894 int ret = 0;
Chris Mason2c90e5d2007-04-02 10:50:19 -04004895 struct btrfs_path *path;
Chris Mason5f39d392007-10-15 16:14:19 -04004896 struct extent_buffer *leaf;
4897 unsigned long ptr;
Chris Mason62e27492007-03-15 12:56:47 -04004898
Chris Mason2c90e5d2007-04-02 10:50:19 -04004899 path = btrfs_alloc_path();
Tsutomu Itohdb5b4932011-03-23 08:14:16 +00004900 if (!path)
4901 return -ENOMEM;
Chris Mason2c90e5d2007-04-02 10:50:19 -04004902 ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size);
Chris Mason62e27492007-03-15 12:56:47 -04004903 if (!ret) {
Chris Mason5f39d392007-10-15 16:14:19 -04004904 leaf = path->nodes[0];
4905 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
4906 write_extent_buffer(leaf, data, ptr, data_size);
4907 btrfs_mark_buffer_dirty(leaf);
Chris Mason62e27492007-03-15 12:56:47 -04004908 }
Chris Mason2c90e5d2007-04-02 10:50:19 -04004909 btrfs_free_path(path);
Chris Masonaa5d6be2007-02-28 16:35:06 -05004910 return ret;
Chris Masonbe0e5c02007-01-26 15:51:26 -05004911}
4912
Chris Mason74123bd2007-02-02 11:05:29 -05004913/*
Chris Mason5de08d72007-02-24 06:24:44 -05004914 * delete the pointer from a given node.
Chris Mason74123bd2007-02-02 11:05:29 -05004915 *
Chris Masond352ac62008-09-29 15:18:18 -04004916 * the tree should have been previously balanced so the deletion does not
4917 * empty a node.
Chris Mason74123bd2007-02-02 11:05:29 -05004918 */
Tsutomu Itohafe5fea2013-04-16 05:18:22 +00004919static void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
4920 int level, int slot)
Chris Masonbe0e5c02007-01-26 15:51:26 -05004921{
Chris Mason5f39d392007-10-15 16:14:19 -04004922 struct extent_buffer *parent = path->nodes[level];
Chris Mason7518a232007-03-12 12:01:18 -04004923 u32 nritems;
Jan Schmidtf3ea38d2012-05-26 11:45:21 +02004924 int ret;
Chris Masonbe0e5c02007-01-26 15:51:26 -05004925
Chris Mason5f39d392007-10-15 16:14:19 -04004926 nritems = btrfs_header_nritems(parent);
Chris Masond3977122009-01-05 21:25:51 -05004927 if (slot != nritems - 1) {
David Sterbabf1d3422018-03-05 15:47:39 +01004928 if (level) {
4929 ret = tree_mod_log_insert_move(parent, slot, slot + 1,
David Sterbaa446a972018-03-05 15:26:29 +01004930 nritems - slot - 1);
David Sterbabf1d3422018-03-05 15:47:39 +01004931 BUG_ON(ret < 0);
4932 }
Chris Mason5f39d392007-10-15 16:14:19 -04004933 memmove_extent_buffer(parent,
4934 btrfs_node_key_ptr_offset(slot),
4935 btrfs_node_key_ptr_offset(slot + 1),
Chris Masond6025572007-03-30 14:27:56 -04004936 sizeof(struct btrfs_key_ptr) *
4937 (nritems - slot - 1));
Chris Mason57ba86c2012-12-18 19:35:32 -05004938 } else if (level) {
David Sterbae09c2ef2018-03-05 15:09:03 +01004939 ret = tree_mod_log_insert_key(parent, slot, MOD_LOG_KEY_REMOVE,
4940 GFP_NOFS);
Chris Mason57ba86c2012-12-18 19:35:32 -05004941 BUG_ON(ret < 0);
Chris Masonbb803952007-03-01 12:04:21 -05004942 }
Jan Schmidtf3ea38d2012-05-26 11:45:21 +02004943
Chris Mason7518a232007-03-12 12:01:18 -04004944 nritems--;
Chris Mason5f39d392007-10-15 16:14:19 -04004945 btrfs_set_header_nritems(parent, nritems);
Chris Mason7518a232007-03-12 12:01:18 -04004946 if (nritems == 0 && parent == root->node) {
Chris Mason5f39d392007-10-15 16:14:19 -04004947 BUG_ON(btrfs_header_level(root->node) != 1);
Chris Masonbb803952007-03-01 12:04:21 -05004948 /* just turn the root into a leaf and break */
Chris Mason5f39d392007-10-15 16:14:19 -04004949 btrfs_set_header_level(root->node, 0);
Chris Masonbb803952007-03-01 12:04:21 -05004950 } else if (slot == 0) {
Chris Mason5f39d392007-10-15 16:14:19 -04004951 struct btrfs_disk_key disk_key;
4952
4953 btrfs_node_key(parent, &disk_key, 0);
Nikolay Borisovb167fa92018-06-20 15:48:47 +03004954 fixup_low_keys(path, &disk_key, level + 1);
Chris Masonbe0e5c02007-01-26 15:51:26 -05004955 }
Chris Masond6025572007-03-30 14:27:56 -04004956 btrfs_mark_buffer_dirty(parent);
Chris Masonbe0e5c02007-01-26 15:51:26 -05004957}
4958
Chris Mason74123bd2007-02-02 11:05:29 -05004959/*
Chris Mason323ac952008-10-01 19:05:46 -04004960 * a helper function to delete the leaf pointed to by path->slots[1] and
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004961 * path->nodes[1].
Chris Mason323ac952008-10-01 19:05:46 -04004962 *
4963 * This deletes the pointer in path->nodes[1] and frees the leaf
4964 * block extent. zero is returned if it all worked out, < 0 otherwise.
4965 *
4966 * The path must have already been setup for deleting the leaf, including
4967 * all the proper balancing. path->nodes[1] must be locked.
4968 */
Jeff Mahoney143bede2012-03-01 14:56:26 +01004969static noinline void btrfs_del_leaf(struct btrfs_trans_handle *trans,
4970 struct btrfs_root *root,
4971 struct btrfs_path *path,
4972 struct extent_buffer *leaf)
Chris Mason323ac952008-10-01 19:05:46 -04004973{
Yan Zheng5d4f98a2009-06-10 10:45:14 -04004974 WARN_ON(btrfs_header_generation(leaf) != trans->transid);
Tsutomu Itohafe5fea2013-04-16 05:18:22 +00004975 del_ptr(root, path, 1, path->slots[1]);
Chris Mason323ac952008-10-01 19:05:46 -04004976
Chris Mason4d081c42009-02-04 09:31:28 -05004977 /*
4978 * btrfs_free_extent is expensive, we want to make sure we
4979 * aren't holding any locks when we call it
4980 */
4981 btrfs_unlock_up_safe(path, 0);
4982
Yan, Zhengf0486c62010-05-16 10:46:25 -04004983 root_sub_used(root, leaf->len);
4984
David Sterba67439da2019-10-08 13:28:47 +02004985 atomic_inc(&leaf->refs);
Jan Schmidt5581a512012-05-16 17:04:52 +02004986 btrfs_free_tree_block(trans, root, leaf, 0, 1);
Josef Bacik3083ee22012-03-09 16:01:49 -05004987 free_extent_buffer_stale(leaf);
Chris Mason323ac952008-10-01 19:05:46 -04004988}
4989/*
Chris Mason74123bd2007-02-02 11:05:29 -05004990 * delete the item at the leaf level in path. If that empties
4991 * the leaf, remove it from the tree
4992 */
Chris Mason85e21ba2008-01-29 15:11:36 -05004993int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
4994 struct btrfs_path *path, int slot, int nr)
Chris Masonbe0e5c02007-01-26 15:51:26 -05004995{
Jeff Mahoney0b246af2016-06-22 18:54:23 -04004996 struct btrfs_fs_info *fs_info = root->fs_info;
Chris Mason5f39d392007-10-15 16:14:19 -04004997 struct extent_buffer *leaf;
4998 struct btrfs_item *item;
Alexandru Moisece0eac22015-08-23 16:01:42 +00004999 u32 last_off;
5000 u32 dsize = 0;
Chris Masonaa5d6be2007-02-28 16:35:06 -05005001 int ret = 0;
5002 int wret;
Chris Mason85e21ba2008-01-29 15:11:36 -05005003 int i;
Chris Mason7518a232007-03-12 12:01:18 -04005004 u32 nritems;
Chris Masonbe0e5c02007-01-26 15:51:26 -05005005
Chris Mason5f39d392007-10-15 16:14:19 -04005006 leaf = path->nodes[0];
Chris Mason85e21ba2008-01-29 15:11:36 -05005007 last_off = btrfs_item_offset_nr(leaf, slot + nr - 1);
5008
5009 for (i = 0; i < nr; i++)
5010 dsize += btrfs_item_size_nr(leaf, slot + i);
5011
Chris Mason5f39d392007-10-15 16:14:19 -04005012 nritems = btrfs_header_nritems(leaf);
Chris Masonbe0e5c02007-01-26 15:51:26 -05005013
Chris Mason85e21ba2008-01-29 15:11:36 -05005014 if (slot + nr != nritems) {
David Sterba8f881e82019-03-20 11:33:10 +01005015 int data_end = leaf_data_end(leaf);
David Sterbac82f8232019-08-09 17:48:21 +02005016 struct btrfs_map_token token;
Chris Mason5f39d392007-10-15 16:14:19 -04005017
Nikolay Borisov3d9ec8c2017-05-29 09:43:43 +03005018 memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET +
Chris Masond6025572007-03-30 14:27:56 -04005019 data_end + dsize,
Nikolay Borisov3d9ec8c2017-05-29 09:43:43 +03005020 BTRFS_LEAF_DATA_OFFSET + data_end,
Chris Mason85e21ba2008-01-29 15:11:36 -05005021 last_off - data_end);
Chris Mason5f39d392007-10-15 16:14:19 -04005022
David Sterbac82f8232019-08-09 17:48:21 +02005023 btrfs_init_map_token(&token, leaf);
Chris Mason85e21ba2008-01-29 15:11:36 -05005024 for (i = slot + nr; i < nritems; i++) {
Chris Mason5f39d392007-10-15 16:14:19 -04005025 u32 ioff;
Chris Masondb945352007-10-15 16:15:53 -04005026
Ross Kirkdd3cc162013-09-16 15:58:09 +01005027 item = btrfs_item_nr(i);
David Sterbacc4c13d2020-04-29 02:15:56 +02005028 ioff = btrfs_token_item_offset(&token, item);
5029 btrfs_set_token_item_offset(&token, item, ioff + dsize);
Chris Mason0783fcf2007-03-12 20:12:07 -04005030 }
Chris Masondb945352007-10-15 16:15:53 -04005031
Chris Mason5f39d392007-10-15 16:14:19 -04005032 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot),
Chris Mason85e21ba2008-01-29 15:11:36 -05005033 btrfs_item_nr_offset(slot + nr),
Chris Masond6025572007-03-30 14:27:56 -04005034 sizeof(struct btrfs_item) *
Chris Mason85e21ba2008-01-29 15:11:36 -05005035 (nritems - slot - nr));
Chris Masonbe0e5c02007-01-26 15:51:26 -05005036 }
Chris Mason85e21ba2008-01-29 15:11:36 -05005037 btrfs_set_header_nritems(leaf, nritems - nr);
5038 nritems -= nr;
Chris Mason5f39d392007-10-15 16:14:19 -04005039
Chris Mason74123bd2007-02-02 11:05:29 -05005040 /* delete the leaf if we've emptied it */
Chris Mason7518a232007-03-12 12:01:18 -04005041 if (nritems == 0) {
Chris Mason5f39d392007-10-15 16:14:19 -04005042 if (leaf == root->node) {
5043 btrfs_set_header_level(leaf, 0);
Chris Mason9a8dd152007-02-23 08:38:36 -05005044 } else {
Yan, Zhengf0486c62010-05-16 10:46:25 -04005045 btrfs_set_path_blocking(path);
David Sterba6a884d7d2019-03-20 14:30:02 +01005046 btrfs_clean_tree_block(leaf);
Jeff Mahoney143bede2012-03-01 14:56:26 +01005047 btrfs_del_leaf(trans, root, path, leaf);
Chris Mason9a8dd152007-02-23 08:38:36 -05005048 }
Chris Masonbe0e5c02007-01-26 15:51:26 -05005049 } else {
Chris Mason7518a232007-03-12 12:01:18 -04005050 int used = leaf_space_used(leaf, 0, nritems);
Chris Masonaa5d6be2007-02-28 16:35:06 -05005051 if (slot == 0) {
Chris Mason5f39d392007-10-15 16:14:19 -04005052 struct btrfs_disk_key disk_key;
5053
5054 btrfs_item_key(leaf, &disk_key, 0);
Nikolay Borisovb167fa92018-06-20 15:48:47 +03005055 fixup_low_keys(path, &disk_key, 1);
Chris Masonaa5d6be2007-02-28 16:35:06 -05005056 }
Chris Masonaa5d6be2007-02-28 16:35:06 -05005057
Chris Mason74123bd2007-02-02 11:05:29 -05005058 /* delete the leaf if it is mostly empty */
Jeff Mahoney0b246af2016-06-22 18:54:23 -04005059 if (used < BTRFS_LEAF_DATA_SIZE(fs_info) / 3) {
Chris Masonbe0e5c02007-01-26 15:51:26 -05005060 /* push_leaf_left fixes the path.
5061 * make sure the path still points to our leaf
5062 * for possible call to del_ptr below
5063 */
Chris Mason4920c9a2007-01-26 16:38:42 -05005064 slot = path->slots[1];
David Sterba67439da2019-10-08 13:28:47 +02005065 atomic_inc(&leaf->refs);
Chris Mason5f39d392007-10-15 16:14:19 -04005066
Chris Masonb9473432009-03-13 11:00:37 -04005067 btrfs_set_path_blocking(path);
Chris Mason99d8f832010-07-07 10:51:48 -04005068 wret = push_leaf_left(trans, root, path, 1, 1,
5069 1, (u32)-1);
Chris Mason54aa1f42007-06-22 14:16:25 -04005070 if (wret < 0 && wret != -ENOSPC)
Chris Masonaa5d6be2007-02-28 16:35:06 -05005071 ret = wret;
Chris Mason5f39d392007-10-15 16:14:19 -04005072
5073 if (path->nodes[0] == leaf &&
5074 btrfs_header_nritems(leaf)) {
Chris Mason99d8f832010-07-07 10:51:48 -04005075 wret = push_leaf_right(trans, root, path, 1,
5076 1, 1, 0);
Chris Mason54aa1f42007-06-22 14:16:25 -04005077 if (wret < 0 && wret != -ENOSPC)
Chris Masonaa5d6be2007-02-28 16:35:06 -05005078 ret = wret;
5079 }
Chris Mason5f39d392007-10-15 16:14:19 -04005080
5081 if (btrfs_header_nritems(leaf) == 0) {
Chris Mason323ac952008-10-01 19:05:46 -04005082 path->slots[1] = slot;
Jeff Mahoney143bede2012-03-01 14:56:26 +01005083 btrfs_del_leaf(trans, root, path, leaf);
Chris Mason5f39d392007-10-15 16:14:19 -04005084 free_extent_buffer(leaf);
Jeff Mahoney143bede2012-03-01 14:56:26 +01005085 ret = 0;
Chris Mason5de08d72007-02-24 06:24:44 -05005086 } else {
Chris Mason925baed2008-06-25 16:01:30 -04005087 /* if we're still in the path, make sure
5088 * we're dirty. Otherwise, one of the
5089 * push_leaf functions must have already
5090 * dirtied this buffer
5091 */
5092 if (path->nodes[0] == leaf)
5093 btrfs_mark_buffer_dirty(leaf);
Chris Mason5f39d392007-10-15 16:14:19 -04005094 free_extent_buffer(leaf);
Chris Masonbe0e5c02007-01-26 15:51:26 -05005095 }
Chris Masond5719762007-03-23 10:01:08 -04005096 } else {
Chris Mason5f39d392007-10-15 16:14:19 -04005097 btrfs_mark_buffer_dirty(leaf);
Chris Masonbe0e5c02007-01-26 15:51:26 -05005098 }
5099 }
Chris Masonaa5d6be2007-02-28 16:35:06 -05005100 return ret;
Chris Masonbe0e5c02007-01-26 15:51:26 -05005101}
5102
Chris Mason97571fd2007-02-24 13:39:08 -05005103/*
Chris Mason925baed2008-06-25 16:01:30 -04005104 * search the tree again to find a leaf with lesser keys
Chris Mason7bb86312007-12-11 09:25:06 -05005105 * returns 0 if it found something or 1 if there are no lesser leaves.
5106 * returns < 0 on io errors.
Chris Masond352ac62008-09-29 15:18:18 -04005107 *
5108 * This may release the path, and so you may lose any locks held at the
5109 * time you call it.
Chris Mason7bb86312007-12-11 09:25:06 -05005110 */
Josef Bacik16e75492013-10-22 12:18:51 -04005111int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
Chris Mason7bb86312007-12-11 09:25:06 -05005112{
Chris Mason925baed2008-06-25 16:01:30 -04005113 struct btrfs_key key;
5114 struct btrfs_disk_key found_key;
5115 int ret;
Chris Mason7bb86312007-12-11 09:25:06 -05005116
Chris Mason925baed2008-06-25 16:01:30 -04005117 btrfs_item_key_to_cpu(path->nodes[0], &key, 0);
Chris Mason7bb86312007-12-11 09:25:06 -05005118
Filipe David Borba Mananae8b0d7242013-10-15 00:12:27 +01005119 if (key.offset > 0) {
Chris Mason925baed2008-06-25 16:01:30 -04005120 key.offset--;
Filipe David Borba Mananae8b0d7242013-10-15 00:12:27 +01005121 } else if (key.type > 0) {
Chris Mason925baed2008-06-25 16:01:30 -04005122 key.type--;
Filipe David Borba Mananae8b0d7242013-10-15 00:12:27 +01005123 key.offset = (u64)-1;
5124 } else if (key.objectid > 0) {
Chris Mason925baed2008-06-25 16:01:30 -04005125 key.objectid--;
Filipe David Borba Mananae8b0d7242013-10-15 00:12:27 +01005126 key.type = (u8)-1;
5127 key.offset = (u64)-1;
5128 } else {
Chris Mason925baed2008-06-25 16:01:30 -04005129 return 1;
Filipe David Borba Mananae8b0d7242013-10-15 00:12:27 +01005130 }
Chris Mason7bb86312007-12-11 09:25:06 -05005131
David Sterbab3b4aa72011-04-21 01:20:15 +02005132 btrfs_release_path(path);
Chris Mason925baed2008-06-25 16:01:30 -04005133 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5134 if (ret < 0)
5135 return ret;
5136 btrfs_item_key(path->nodes[0], &found_key, 0);
5137 ret = comp_keys(&found_key, &key);
Filipe Manana337c6f62014-06-09 13:22:13 +01005138 /*
5139 * We might have had an item with the previous key in the tree right
5140 * before we released our path. And after we released our path, that
5141 * item might have been pushed to the first slot (0) of the leaf we
5142 * were holding due to a tree balance. Alternatively, an item with the
5143 * previous key can exist as the only element of a leaf (big fat item).
5144 * Therefore account for these 2 cases, so that our callers (like
5145 * btrfs_previous_item) don't miss an existing item with a key matching
5146 * the previous key we computed above.
5147 */
5148 if (ret <= 0)
Chris Mason925baed2008-06-25 16:01:30 -04005149 return 0;
5150 return 1;
Chris Mason7bb86312007-12-11 09:25:06 -05005151}
5152
Chris Mason3f157a22008-06-25 16:01:31 -04005153/*
5154 * A helper function to walk down the tree starting at min_key, and looking
Eric Sandeende78b512013-01-31 18:21:12 +00005155 * for nodes or leaves that are have a minimum transaction id.
5156 * This is used by the btree defrag code, and tree logging
Chris Mason3f157a22008-06-25 16:01:31 -04005157 *
5158 * This does not cow, but it does stuff the starting key it finds back
5159 * into min_key, so you can call btrfs_search_slot with cow=1 on the
5160 * key and get a writable path.
5161 *
Chris Mason3f157a22008-06-25 16:01:31 -04005162 * This honors path->lowest_level to prevent descent past a given level
5163 * of the tree.
5164 *
Chris Masond352ac62008-09-29 15:18:18 -04005165 * min_trans indicates the oldest transaction that you are interested
5166 * in walking through. Any nodes or leaves older than min_trans are
5167 * skipped over (without reading them).
5168 *
Chris Mason3f157a22008-06-25 16:01:31 -04005169 * returns zero if something useful was found, < 0 on error and 1 if there
5170 * was nothing in the tree that matched the search criteria.
5171 */
5172int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
Eric Sandeende78b512013-01-31 18:21:12 +00005173 struct btrfs_path *path,
Chris Mason3f157a22008-06-25 16:01:31 -04005174 u64 min_trans)
5175{
5176 struct extent_buffer *cur;
5177 struct btrfs_key found_key;
5178 int slot;
Yan96524802008-07-24 12:19:49 -04005179 int sret;
Chris Mason3f157a22008-06-25 16:01:31 -04005180 u32 nritems;
5181 int level;
5182 int ret = 1;
Filipe Mananaf98de9b2014-08-04 19:37:21 +01005183 int keep_locks = path->keep_locks;
Chris Mason3f157a22008-06-25 16:01:31 -04005184
Filipe Mananaf98de9b2014-08-04 19:37:21 +01005185 path->keep_locks = 1;
Chris Mason3f157a22008-06-25 16:01:31 -04005186again:
Chris Masonbd681512011-07-16 15:23:14 -04005187 cur = btrfs_read_lock_root_node(root);
Chris Mason3f157a22008-06-25 16:01:31 -04005188 level = btrfs_header_level(cur);
Chris Masone02119d2008-09-05 16:13:11 -04005189 WARN_ON(path->nodes[level]);
Chris Mason3f157a22008-06-25 16:01:31 -04005190 path->nodes[level] = cur;
Chris Masonbd681512011-07-16 15:23:14 -04005191 path->locks[level] = BTRFS_READ_LOCK;
Chris Mason3f157a22008-06-25 16:01:31 -04005192
5193 if (btrfs_header_generation(cur) < min_trans) {
5194 ret = 1;
5195 goto out;
5196 }
Chris Masond3977122009-01-05 21:25:51 -05005197 while (1) {
Chris Mason3f157a22008-06-25 16:01:31 -04005198 nritems = btrfs_header_nritems(cur);
5199 level = btrfs_header_level(cur);
Qu Wenruoe3b83362020-04-17 15:08:21 +08005200 sret = btrfs_bin_search(cur, min_key, &slot);
Filipe Mananacbca7d52019-02-18 16:57:26 +00005201 if (sret < 0) {
5202 ret = sret;
5203 goto out;
5204 }
Chris Mason3f157a22008-06-25 16:01:31 -04005205
Chris Mason323ac952008-10-01 19:05:46 -04005206 /* at the lowest level, we're done, setup the path and exit */
5207 if (level == path->lowest_level) {
Chris Masone02119d2008-09-05 16:13:11 -04005208 if (slot >= nritems)
5209 goto find_next_key;
Chris Mason3f157a22008-06-25 16:01:31 -04005210 ret = 0;
5211 path->slots[level] = slot;
5212 btrfs_item_key_to_cpu(cur, &found_key, slot);
5213 goto out;
5214 }
Yan96524802008-07-24 12:19:49 -04005215 if (sret && slot > 0)
5216 slot--;
Chris Mason3f157a22008-06-25 16:01:31 -04005217 /*
Eric Sandeende78b512013-01-31 18:21:12 +00005218 * check this node pointer against the min_trans parameters.
Randy Dunlap260db432020-08-04 19:48:34 -07005219 * If it is too old, skip to the next one.
Chris Mason3f157a22008-06-25 16:01:31 -04005220 */
Chris Masond3977122009-01-05 21:25:51 -05005221 while (slot < nritems) {
Chris Mason3f157a22008-06-25 16:01:31 -04005222 u64 gen;
Chris Masone02119d2008-09-05 16:13:11 -04005223
Chris Mason3f157a22008-06-25 16:01:31 -04005224 gen = btrfs_node_ptr_generation(cur, slot);
5225 if (gen < min_trans) {
5226 slot++;
5227 continue;
5228 }
Eric Sandeende78b512013-01-31 18:21:12 +00005229 break;
Chris Mason3f157a22008-06-25 16:01:31 -04005230 }
Chris Masone02119d2008-09-05 16:13:11 -04005231find_next_key:
Chris Mason3f157a22008-06-25 16:01:31 -04005232 /*
5233 * we didn't find a candidate key in this node, walk forward
5234 * and find another one
5235 */
5236 if (slot >= nritems) {
Chris Masone02119d2008-09-05 16:13:11 -04005237 path->slots[level] = slot;
Chris Masonb4ce94d2009-02-04 09:25:08 -05005238 btrfs_set_path_blocking(path);
Chris Masone02119d2008-09-05 16:13:11 -04005239 sret = btrfs_find_next_key(root, path, min_key, level,
Eric Sandeende78b512013-01-31 18:21:12 +00005240 min_trans);
Chris Masone02119d2008-09-05 16:13:11 -04005241 if (sret == 0) {
David Sterbab3b4aa72011-04-21 01:20:15 +02005242 btrfs_release_path(path);
Chris Mason3f157a22008-06-25 16:01:31 -04005243 goto again;
5244 } else {
5245 goto out;
5246 }
5247 }
5248 /* save our key for returning back */
5249 btrfs_node_key_to_cpu(cur, &found_key, slot);
5250 path->slots[level] = slot;
5251 if (level == path->lowest_level) {
5252 ret = 0;
Chris Mason3f157a22008-06-25 16:01:31 -04005253 goto out;
5254 }
Chris Masonb4ce94d2009-02-04 09:25:08 -05005255 btrfs_set_path_blocking(path);
David Sterba4b231ae2019-08-21 19:16:27 +02005256 cur = btrfs_read_node_slot(cur, slot);
Liu Bofb770ae2016-07-05 12:10:14 -07005257 if (IS_ERR(cur)) {
5258 ret = PTR_ERR(cur);
5259 goto out;
5260 }
Chris Mason3f157a22008-06-25 16:01:31 -04005261
Chris Masonbd681512011-07-16 15:23:14 -04005262 btrfs_tree_read_lock(cur);
Chris Masonb4ce94d2009-02-04 09:25:08 -05005263
Chris Masonbd681512011-07-16 15:23:14 -04005264 path->locks[level - 1] = BTRFS_READ_LOCK;
Chris Mason3f157a22008-06-25 16:01:31 -04005265 path->nodes[level - 1] = cur;
Chris Masonf7c79f32012-03-19 15:54:38 -04005266 unlock_up(path, level, 1, 0, NULL);
Chris Mason3f157a22008-06-25 16:01:31 -04005267 }
5268out:
Filipe Mananaf98de9b2014-08-04 19:37:21 +01005269 path->keep_locks = keep_locks;
5270 if (ret == 0) {
5271 btrfs_unlock_up_safe(path, path->lowest_level + 1);
5272 btrfs_set_path_blocking(path);
Chris Mason3f157a22008-06-25 16:01:31 -04005273 memcpy(min_key, &found_key, sizeof(found_key));
Filipe Mananaf98de9b2014-08-04 19:37:21 +01005274 }
Chris Mason3f157a22008-06-25 16:01:31 -04005275 return ret;
5276}
5277
5278/*
5279 * this is similar to btrfs_next_leaf, but does not try to preserve
5280 * and fixup the path. It looks for and returns the next key in the
Eric Sandeende78b512013-01-31 18:21:12 +00005281 * tree based on the current path and the min_trans parameters.
Chris Mason3f157a22008-06-25 16:01:31 -04005282 *
5283 * 0 is returned if another key is found, < 0 if there are any errors
5284 * and 1 is returned if there are no higher keys in the tree
5285 *
5286 * path->keep_locks should be set to 1 on the search made before
5287 * calling this function.
5288 */
Chris Masone7a84562008-06-25 16:01:31 -04005289int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path,
Eric Sandeende78b512013-01-31 18:21:12 +00005290 struct btrfs_key *key, int level, u64 min_trans)
Chris Masone7a84562008-06-25 16:01:31 -04005291{
Chris Masone7a84562008-06-25 16:01:31 -04005292 int slot;
5293 struct extent_buffer *c;
5294
Josef Bacik6a9fb462019-06-20 15:37:52 -04005295 WARN_ON(!path->keep_locks && !path->skip_locking);
Chris Masond3977122009-01-05 21:25:51 -05005296 while (level < BTRFS_MAX_LEVEL) {
Chris Masone7a84562008-06-25 16:01:31 -04005297 if (!path->nodes[level])
5298 return 1;
5299
5300 slot = path->slots[level] + 1;
5301 c = path->nodes[level];
Chris Mason3f157a22008-06-25 16:01:31 -04005302next:
Chris Masone7a84562008-06-25 16:01:31 -04005303 if (slot >= btrfs_header_nritems(c)) {
Yan Zheng33c66f42009-07-22 09:59:00 -04005304 int ret;
5305 int orig_lowest;
5306 struct btrfs_key cur_key;
5307 if (level + 1 >= BTRFS_MAX_LEVEL ||
5308 !path->nodes[level + 1])
Chris Masone7a84562008-06-25 16:01:31 -04005309 return 1;
Yan Zheng33c66f42009-07-22 09:59:00 -04005310
Josef Bacik6a9fb462019-06-20 15:37:52 -04005311 if (path->locks[level + 1] || path->skip_locking) {
Yan Zheng33c66f42009-07-22 09:59:00 -04005312 level++;
5313 continue;
5314 }
5315
5316 slot = btrfs_header_nritems(c) - 1;
5317 if (level == 0)
5318 btrfs_item_key_to_cpu(c, &cur_key, slot);
5319 else
5320 btrfs_node_key_to_cpu(c, &cur_key, slot);
5321
5322 orig_lowest = path->lowest_level;
David Sterbab3b4aa72011-04-21 01:20:15 +02005323 btrfs_release_path(path);
Yan Zheng33c66f42009-07-22 09:59:00 -04005324 path->lowest_level = level;
5325 ret = btrfs_search_slot(NULL, root, &cur_key, path,
5326 0, 0);
5327 path->lowest_level = orig_lowest;
5328 if (ret < 0)
5329 return ret;
5330
5331 c = path->nodes[level];
5332 slot = path->slots[level];
5333 if (ret == 0)
5334 slot++;
5335 goto next;
Chris Masone7a84562008-06-25 16:01:31 -04005336 }
Yan Zheng33c66f42009-07-22 09:59:00 -04005337
Chris Masone7a84562008-06-25 16:01:31 -04005338 if (level == 0)
5339 btrfs_item_key_to_cpu(c, key, slot);
Chris Mason3f157a22008-06-25 16:01:31 -04005340 else {
Chris Mason3f157a22008-06-25 16:01:31 -04005341 u64 gen = btrfs_node_ptr_generation(c, slot);
5342
Chris Mason3f157a22008-06-25 16:01:31 -04005343 if (gen < min_trans) {
5344 slot++;
5345 goto next;
5346 }
Chris Masone7a84562008-06-25 16:01:31 -04005347 btrfs_node_key_to_cpu(c, key, slot);
Chris Mason3f157a22008-06-25 16:01:31 -04005348 }
Chris Masone7a84562008-06-25 16:01:31 -04005349 return 0;
5350 }
5351 return 1;
5352}
5353
Chris Mason7bb86312007-12-11 09:25:06 -05005354/*
Chris Mason925baed2008-06-25 16:01:30 -04005355 * search the tree again to find a leaf with greater keys
Chris Mason0f70abe2007-02-28 16:46:22 -05005356 * returns 0 if it found something or 1 if there are no greater leaves.
5357 * returns < 0 on io errors.
Chris Mason97571fd2007-02-24 13:39:08 -05005358 */
Chris Mason234b63a2007-03-13 10:46:10 -04005359int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
Chris Masond97e63b2007-02-20 16:40:44 -05005360{
Jan Schmidt3d7806e2012-06-11 08:29:29 +02005361 return btrfs_next_old_leaf(root, path, 0);
5362}
5363
5364int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path,
5365 u64 time_seq)
5366{
Chris Masond97e63b2007-02-20 16:40:44 -05005367 int slot;
Chris Mason8e73f272009-04-03 10:14:18 -04005368 int level;
Chris Mason5f39d392007-10-15 16:14:19 -04005369 struct extent_buffer *c;
Chris Mason8e73f272009-04-03 10:14:18 -04005370 struct extent_buffer *next;
Chris Mason925baed2008-06-25 16:01:30 -04005371 struct btrfs_key key;
5372 u32 nritems;
5373 int ret;
Chris Mason8e73f272009-04-03 10:14:18 -04005374 int old_spinning = path->leave_spinning;
Chris Masonbd681512011-07-16 15:23:14 -04005375 int next_rw_lock = 0;
Chris Mason925baed2008-06-25 16:01:30 -04005376
5377 nritems = btrfs_header_nritems(path->nodes[0]);
Chris Masond3977122009-01-05 21:25:51 -05005378 if (nritems == 0)
Chris Mason925baed2008-06-25 16:01:30 -04005379 return 1;
Chris Mason925baed2008-06-25 16:01:30 -04005380
Chris Mason8e73f272009-04-03 10:14:18 -04005381 btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1);
5382again:
5383 level = 1;
5384 next = NULL;
Chris Masonbd681512011-07-16 15:23:14 -04005385 next_rw_lock = 0;
David Sterbab3b4aa72011-04-21 01:20:15 +02005386 btrfs_release_path(path);
Chris Mason8e73f272009-04-03 10:14:18 -04005387
Chris Masona2135012008-06-25 16:01:30 -04005388 path->keep_locks = 1;
Chris Mason31533fb2011-07-26 16:01:59 -04005389 path->leave_spinning = 1;
Chris Mason8e73f272009-04-03 10:14:18 -04005390
Jan Schmidt3d7806e2012-06-11 08:29:29 +02005391 if (time_seq)
5392 ret = btrfs_search_old_slot(root, &key, path, time_seq);
5393 else
5394 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
Chris Mason925baed2008-06-25 16:01:30 -04005395 path->keep_locks = 0;
5396
5397 if (ret < 0)
5398 return ret;
5399
Chris Masona2135012008-06-25 16:01:30 -04005400 nritems = btrfs_header_nritems(path->nodes[0]);
Chris Mason168fd7d2008-06-25 16:01:30 -04005401 /*
5402 * by releasing the path above we dropped all our locks. A balance
5403 * could have added more items next to the key that used to be
5404 * at the very end of the block. So, check again here and
5405 * advance the path if there are now more items available.
5406 */
Chris Masona2135012008-06-25 16:01:30 -04005407 if (nritems > 0 && path->slots[0] < nritems - 1) {
Yan Zhenge457afe2009-07-22 09:59:00 -04005408 if (ret == 0)
5409 path->slots[0]++;
Chris Mason8e73f272009-04-03 10:14:18 -04005410 ret = 0;
Chris Mason925baed2008-06-25 16:01:30 -04005411 goto done;
5412 }
Liu Bo0b43e042014-06-09 11:04:49 +08005413 /*
5414 * So the above check misses one case:
5415 * - after releasing the path above, someone has removed the item that
5416 * used to be at the very end of the block, and balance between leafs
5417 * gets another one with bigger key.offset to replace it.
5418 *
5419 * This one should be returned as well, or we can get leaf corruption
5420 * later(esp. in __btrfs_drop_extents()).
5421 *
5422 * And a bit more explanation about this check,
5423 * with ret > 0, the key isn't found, the path points to the slot
5424 * where it should be inserted, so the path->slots[0] item must be the
5425 * bigger one.
5426 */
5427 if (nritems > 0 && ret > 0 && path->slots[0] == nritems - 1) {
5428 ret = 0;
5429 goto done;
5430 }
Chris Masond97e63b2007-02-20 16:40:44 -05005431
Chris Masond3977122009-01-05 21:25:51 -05005432 while (level < BTRFS_MAX_LEVEL) {
Chris Mason8e73f272009-04-03 10:14:18 -04005433 if (!path->nodes[level]) {
5434 ret = 1;
5435 goto done;
5436 }
Chris Mason5f39d392007-10-15 16:14:19 -04005437
Chris Masond97e63b2007-02-20 16:40:44 -05005438 slot = path->slots[level] + 1;
5439 c = path->nodes[level];
Chris Mason5f39d392007-10-15 16:14:19 -04005440 if (slot >= btrfs_header_nritems(c)) {
Chris Masond97e63b2007-02-20 16:40:44 -05005441 level++;
Chris Mason8e73f272009-04-03 10:14:18 -04005442 if (level == BTRFS_MAX_LEVEL) {
5443 ret = 1;
5444 goto done;
5445 }
Chris Masond97e63b2007-02-20 16:40:44 -05005446 continue;
5447 }
Chris Mason5f39d392007-10-15 16:14:19 -04005448
Chris Mason925baed2008-06-25 16:01:30 -04005449 if (next) {
Chris Masonbd681512011-07-16 15:23:14 -04005450 btrfs_tree_unlock_rw(next, next_rw_lock);
Chris Mason5f39d392007-10-15 16:14:19 -04005451 free_extent_buffer(next);
Chris Mason925baed2008-06-25 16:01:30 -04005452 }
Chris Mason5f39d392007-10-15 16:14:19 -04005453
Chris Mason8e73f272009-04-03 10:14:18 -04005454 next = c;
Chris Masonbd681512011-07-16 15:23:14 -04005455 next_rw_lock = path->locks[level];
Liu Bod07b8522017-01-30 12:23:42 -08005456 ret = read_block_for_search(root, path, &next, level,
David Sterbacda79c52017-02-10 18:44:32 +01005457 slot, &key);
Chris Mason8e73f272009-04-03 10:14:18 -04005458 if (ret == -EAGAIN)
5459 goto again;
Chris Mason5f39d392007-10-15 16:14:19 -04005460
Chris Mason76a05b32009-05-14 13:24:30 -04005461 if (ret < 0) {
David Sterbab3b4aa72011-04-21 01:20:15 +02005462 btrfs_release_path(path);
Chris Mason76a05b32009-05-14 13:24:30 -04005463 goto done;
5464 }
5465
Chris Mason5cd57b22008-06-25 16:01:30 -04005466 if (!path->skip_locking) {
Chris Masonbd681512011-07-16 15:23:14 -04005467 ret = btrfs_try_tree_read_lock(next);
Jan Schmidtd42244a2012-06-22 14:51:15 +02005468 if (!ret && time_seq) {
5469 /*
5470 * If we don't get the lock, we may be racing
5471 * with push_leaf_left, holding that lock while
5472 * itself waiting for the leaf we've currently
5473 * locked. To solve this situation, we give up
5474 * on our lock and cycle.
5475 */
Jan Schmidtcf538832012-07-04 15:42:48 +02005476 free_extent_buffer(next);
Jan Schmidtd42244a2012-06-22 14:51:15 +02005477 btrfs_release_path(path);
5478 cond_resched();
5479 goto again;
5480 }
Chris Mason8e73f272009-04-03 10:14:18 -04005481 if (!ret) {
5482 btrfs_set_path_blocking(path);
Josef Bacikfd7ba1c2020-08-20 11:46:02 -04005483 __btrfs_tree_read_lock(next,
Josef Bacikbf774672020-08-20 11:46:04 -04005484 BTRFS_NESTING_RIGHT,
Josef Bacikfd7ba1c2020-08-20 11:46:02 -04005485 path->recurse);
Chris Mason8e73f272009-04-03 10:14:18 -04005486 }
Chris Mason31533fb2011-07-26 16:01:59 -04005487 next_rw_lock = BTRFS_READ_LOCK;
Chris Mason5cd57b22008-06-25 16:01:30 -04005488 }
Chris Masond97e63b2007-02-20 16:40:44 -05005489 break;
5490 }
5491 path->slots[level] = slot;
Chris Masond3977122009-01-05 21:25:51 -05005492 while (1) {
Chris Masond97e63b2007-02-20 16:40:44 -05005493 level--;
5494 c = path->nodes[level];
Chris Mason925baed2008-06-25 16:01:30 -04005495 if (path->locks[level])
Chris Masonbd681512011-07-16 15:23:14 -04005496 btrfs_tree_unlock_rw(c, path->locks[level]);
Chris Mason8e73f272009-04-03 10:14:18 -04005497
Chris Mason5f39d392007-10-15 16:14:19 -04005498 free_extent_buffer(c);
Chris Masond97e63b2007-02-20 16:40:44 -05005499 path->nodes[level] = next;
5500 path->slots[level] = 0;
Chris Masona74a4b92008-06-25 16:01:31 -04005501 if (!path->skip_locking)
Chris Masonbd681512011-07-16 15:23:14 -04005502 path->locks[level] = next_rw_lock;
Chris Masond97e63b2007-02-20 16:40:44 -05005503 if (!level)
5504 break;
Chris Masonb4ce94d2009-02-04 09:25:08 -05005505
Liu Bod07b8522017-01-30 12:23:42 -08005506 ret = read_block_for_search(root, path, &next, level,
David Sterbacda79c52017-02-10 18:44:32 +01005507 0, &key);
Chris Mason8e73f272009-04-03 10:14:18 -04005508 if (ret == -EAGAIN)
5509 goto again;
5510
Chris Mason76a05b32009-05-14 13:24:30 -04005511 if (ret < 0) {
David Sterbab3b4aa72011-04-21 01:20:15 +02005512 btrfs_release_path(path);
Chris Mason76a05b32009-05-14 13:24:30 -04005513 goto done;
5514 }
5515
Chris Mason5cd57b22008-06-25 16:01:30 -04005516 if (!path->skip_locking) {
Chris Masonbd681512011-07-16 15:23:14 -04005517 ret = btrfs_try_tree_read_lock(next);
Chris Mason8e73f272009-04-03 10:14:18 -04005518 if (!ret) {
5519 btrfs_set_path_blocking(path);
Josef Bacikfd7ba1c2020-08-20 11:46:02 -04005520 __btrfs_tree_read_lock(next,
Josef Bacikbf774672020-08-20 11:46:04 -04005521 BTRFS_NESTING_RIGHT,
Josef Bacikfd7ba1c2020-08-20 11:46:02 -04005522 path->recurse);
Chris Mason8e73f272009-04-03 10:14:18 -04005523 }
Chris Mason31533fb2011-07-26 16:01:59 -04005524 next_rw_lock = BTRFS_READ_LOCK;
Chris Mason5cd57b22008-06-25 16:01:30 -04005525 }
Chris Masond97e63b2007-02-20 16:40:44 -05005526 }
Chris Mason8e73f272009-04-03 10:14:18 -04005527 ret = 0;
Chris Mason925baed2008-06-25 16:01:30 -04005528done:
Chris Masonf7c79f32012-03-19 15:54:38 -04005529 unlock_up(path, 0, 1, 0, NULL);
Chris Mason8e73f272009-04-03 10:14:18 -04005530 path->leave_spinning = old_spinning;
5531 if (!old_spinning)
5532 btrfs_set_path_blocking(path);
5533
5534 return ret;
Chris Masond97e63b2007-02-20 16:40:44 -05005535}
Chris Mason0b86a832008-03-24 15:01:56 -04005536
Chris Mason3f157a22008-06-25 16:01:31 -04005537/*
5538 * this uses btrfs_prev_leaf to walk backwards in the tree, and keeps
5539 * searching until it gets past min_objectid or finds an item of 'type'
5540 *
5541 * returns 0 if something is found, 1 if nothing was found and < 0 on error
5542 */
Chris Mason0b86a832008-03-24 15:01:56 -04005543int btrfs_previous_item(struct btrfs_root *root,
5544 struct btrfs_path *path, u64 min_objectid,
5545 int type)
5546{
5547 struct btrfs_key found_key;
5548 struct extent_buffer *leaf;
Chris Masone02119d2008-09-05 16:13:11 -04005549 u32 nritems;
Chris Mason0b86a832008-03-24 15:01:56 -04005550 int ret;
5551
Chris Masond3977122009-01-05 21:25:51 -05005552 while (1) {
Chris Mason0b86a832008-03-24 15:01:56 -04005553 if (path->slots[0] == 0) {
Chris Masonb4ce94d2009-02-04 09:25:08 -05005554 btrfs_set_path_blocking(path);
Chris Mason0b86a832008-03-24 15:01:56 -04005555 ret = btrfs_prev_leaf(root, path);
5556 if (ret != 0)
5557 return ret;
5558 } else {
5559 path->slots[0]--;
5560 }
5561 leaf = path->nodes[0];
Chris Masone02119d2008-09-05 16:13:11 -04005562 nritems = btrfs_header_nritems(leaf);
5563 if (nritems == 0)
5564 return 1;
5565 if (path->slots[0] == nritems)
5566 path->slots[0]--;
5567
Chris Mason0b86a832008-03-24 15:01:56 -04005568 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
Chris Masone02119d2008-09-05 16:13:11 -04005569 if (found_key.objectid < min_objectid)
5570 break;
Yan Zheng0a4eefb2009-07-24 11:06:53 -04005571 if (found_key.type == type)
5572 return 0;
Chris Masone02119d2008-09-05 16:13:11 -04005573 if (found_key.objectid == min_objectid &&
5574 found_key.type < type)
5575 break;
Chris Mason0b86a832008-03-24 15:01:56 -04005576 }
5577 return 1;
5578}
Wang Shilongade2e0b2014-01-12 21:38:33 +08005579
5580/*
5581 * search in extent tree to find a previous Metadata/Data extent item with
5582 * min objecitd.
5583 *
5584 * returns 0 if something is found, 1 if nothing was found and < 0 on error
5585 */
5586int btrfs_previous_extent_item(struct btrfs_root *root,
5587 struct btrfs_path *path, u64 min_objectid)
5588{
5589 struct btrfs_key found_key;
5590 struct extent_buffer *leaf;
5591 u32 nritems;
5592 int ret;
5593
5594 while (1) {
5595 if (path->slots[0] == 0) {
5596 btrfs_set_path_blocking(path);
5597 ret = btrfs_prev_leaf(root, path);
5598 if (ret != 0)
5599 return ret;
5600 } else {
5601 path->slots[0]--;
5602 }
5603 leaf = path->nodes[0];
5604 nritems = btrfs_header_nritems(leaf);
5605 if (nritems == 0)
5606 return 1;
5607 if (path->slots[0] == nritems)
5608 path->slots[0]--;
5609
5610 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
5611 if (found_key.objectid < min_objectid)
5612 break;
5613 if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
5614 found_key.type == BTRFS_METADATA_ITEM_KEY)
5615 return 0;
5616 if (found_key.objectid == min_objectid &&
5617 found_key.type < BTRFS_EXTENT_ITEM_KEY)
5618 break;
5619 }
5620 return 1;
5621}