blob: f30d74d807e96ab65bb2a3cffb10d09443e756d7 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
Nathan Scott7b718762005-11-02 14:58:39 +11002 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
Linus Torvalds1da177e2005-04-16 15:20:36 -07004 *
Nathan Scott7b718762005-11-02 14:58:39 +11005 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
Linus Torvalds1da177e2005-04-16 15:20:36 -07007 * published by the Free Software Foundation.
8 *
Nathan Scott7b718762005-11-02 14:58:39 +11009 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
Linus Torvalds1da177e2005-04-16 15:20:36 -070013 *
Nathan Scott7b718762005-11-02 14:58:39 +110014 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
Linus Torvalds1da177e2005-04-16 15:20:36 -070017 */
Linus Torvalds1da177e2005-04-16 15:20:36 -070018#include "xfs.h"
Nathan Scotta844f452005-11-02 14:38:42 +110019#include "xfs_fs.h"
Dave Chinner70a98832013-10-23 10:36:05 +110020#include "xfs_shared.h"
Dave Chinnera4fbe6a2013-10-23 10:51:50 +110021#include "xfs_format.h"
Dave Chinner239880e2013-10-23 10:50:10 +110022#include "xfs_log_format.h"
23#include "xfs_trans_resv.h"
Nathan Scotta844f452005-11-02 14:38:42 +110024#include "xfs_bit.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070025#include "xfs_mount.h"
Darrick J. Wong3ab78df2016-08-03 11:15:38 +100026#include "xfs_defer.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070027#include "xfs_inode.h"
Dave Chinner239880e2013-10-23 10:50:10 +110028#include "xfs_trans.h"
Christoph Hellwig38bb7422008-10-30 16:56:22 +110029#include "xfs_inode_item.h"
Christoph Hellwigee1a47a2013-04-21 14:53:46 -050030#include "xfs_buf_item.h"
Nathan Scotta844f452005-11-02 14:38:42 +110031#include "xfs_btree.h"
Darrick J. Wonge9e899a2017-10-31 12:04:49 -070032#include "xfs_errortag.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070033#include "xfs_error.h"
Christoph Hellwig0b1b2132009-12-14 23:14:59 +000034#include "xfs_trace.h"
Christoph Hellwigee1a47a2013-04-21 14:53:46 -050035#include "xfs_cksum.h"
Dave Chinnercf11da92014-07-15 07:08:24 +100036#include "xfs_alloc.h"
Brian Fostera45086e2015-10-12 15:59:25 +110037#include "xfs_log.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070038
39/*
40 * Cursor allocation zone.
41 */
42kmem_zone_t *xfs_btree_cur_zone;
43
44/*
45 * Btree magic numbers.
46 */
Darrick J. Wongc8ce5402017-06-16 11:00:05 -070047static const uint32_t xfs_magics[2][XFS_BTNUM_MAX] = {
Darrick J. Wongb8704942016-08-03 11:30:32 +100048 { XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, 0, XFS_BMAP_MAGIC, XFS_IBT_MAGIC,
Darrick J. Wong46eeb522016-10-03 09:11:16 -070049 XFS_FIBT_MAGIC, 0 },
Darrick J. Wongb8704942016-08-03 11:30:32 +100050 { XFS_ABTB_CRC_MAGIC, XFS_ABTC_CRC_MAGIC, XFS_RMAP_CRC_MAGIC,
Darrick J. Wong46eeb522016-10-03 09:11:16 -070051 XFS_BMAP_CRC_MAGIC, XFS_IBT_CRC_MAGIC, XFS_FIBT_CRC_MAGIC,
52 XFS_REFC_CRC_MAGIC }
Linus Torvalds1da177e2005-04-16 15:20:36 -070053};
Eric Sandeenaf7d20f2017-01-27 23:16:38 -080054
Darrick J. Wongc8ce5402017-06-16 11:00:05 -070055uint32_t
Eric Sandeenaf7d20f2017-01-27 23:16:38 -080056xfs_btree_magic(
57 int crc,
58 xfs_btnum_t btnum)
59{
Darrick J. Wongc8ce5402017-06-16 11:00:05 -070060 uint32_t magic = xfs_magics[crc][btnum];
Eric Sandeenaf7d20f2017-01-27 23:16:38 -080061
62 /* Ensure we asked for crc for crc-only magics. */
63 ASSERT(magic != 0);
64 return magic;
65}
Linus Torvalds1da177e2005-04-16 15:20:36 -070066
Darrick J. Wong52c732e2017-10-17 21:37:33 -070067/*
68 * Check a long btree block header. Return the address of the failing check,
69 * or NULL if everything is ok.
70 */
71xfs_failaddr_t
72__xfs_btree_check_lblock(
73 struct xfs_btree_cur *cur,
74 struct xfs_btree_block *block,
75 int level,
76 struct xfs_buf *bp)
Christoph Hellwiga23f6ef2008-10-30 16:54:53 +110077{
Darrick J. Wong52c732e2017-10-17 21:37:33 -070078 struct xfs_mount *mp = cur->bc_mp;
Eric Sandeenaf7d20f2017-01-27 23:16:38 -080079 xfs_btnum_t btnum = cur->bc_btnum;
Darrick J. Wong52c732e2017-10-17 21:37:33 -070080 int crc = xfs_sb_version_hascrc(&mp->m_sb);
Christoph Hellwigee1a47a2013-04-21 14:53:46 -050081
Eric Sandeenaf7d20f2017-01-27 23:16:38 -080082 if (crc) {
Darrick J. Wong52c732e2017-10-17 21:37:33 -070083 if (!uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid))
84 return __this_address;
85 if (block->bb_u.l.bb_blkno !=
86 cpu_to_be64(bp ? bp->b_bn : XFS_BUF_DADDR_NULL))
87 return __this_address;
88 if (block->bb_u.l.bb_pad != cpu_to_be32(0))
89 return __this_address;
Christoph Hellwigee1a47a2013-04-21 14:53:46 -050090 }
91
Darrick J. Wong52c732e2017-10-17 21:37:33 -070092 if (be32_to_cpu(block->bb_magic) != xfs_btree_magic(crc, btnum))
93 return __this_address;
94 if (be16_to_cpu(block->bb_level) != level)
95 return __this_address;
96 if (be16_to_cpu(block->bb_numrecs) >
97 cur->bc_ops->get_maxrecs(cur, level))
98 return __this_address;
99 if (block->bb_u.l.bb_leftsib != cpu_to_be64(NULLFSBLOCK) &&
100 !xfs_btree_check_lptr(cur, be64_to_cpu(block->bb_u.l.bb_leftsib),
101 level + 1))
102 return __this_address;
103 if (block->bb_u.l.bb_rightsib != cpu_to_be64(NULLFSBLOCK) &&
104 !xfs_btree_check_lptr(cur, be64_to_cpu(block->bb_u.l.bb_rightsib),
105 level + 1))
106 return __this_address;
Christoph Hellwigee1a47a2013-04-21 14:53:46 -0500107
Darrick J. Wong52c732e2017-10-17 21:37:33 -0700108 return NULL;
109}
110
111/* Check a long btree block header. */
Christoph Hellwig4483eb52017-11-06 11:54:01 -0800112static int
Darrick J. Wong52c732e2017-10-17 21:37:33 -0700113xfs_btree_check_lblock(
114 struct xfs_btree_cur *cur,
115 struct xfs_btree_block *block,
116 int level,
117 struct xfs_buf *bp)
118{
119 struct xfs_mount *mp = cur->bc_mp;
120 xfs_failaddr_t fa;
121
122 fa = __xfs_btree_check_lblock(cur, block, level, bp);
123 if (unlikely(XFS_TEST_ERROR(fa != NULL, mp,
Darrick J. Wong9e24cfd2017-06-20 17:54:47 -0700124 XFS_ERRTAG_BTREE_CHECK_LBLOCK))) {
Christoph Hellwiga23f6ef2008-10-30 16:54:53 +1100125 if (bp)
Christoph Hellwig0b1b2132009-12-14 23:14:59 +0000126 trace_xfs_btree_corrupt(bp, _RET_IP_);
Christoph Hellwigee1a47a2013-04-21 14:53:46 -0500127 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
Dave Chinner24513372014-06-25 14:58:08 +1000128 return -EFSCORRUPTED;
Christoph Hellwiga23f6ef2008-10-30 16:54:53 +1100129 }
130 return 0;
131}
132
Darrick J. Wong52c732e2017-10-17 21:37:33 -0700133/*
134 * Check a short btree block header. Return the address of the failing check,
135 * or NULL if everything is ok.
136 */
137xfs_failaddr_t
138__xfs_btree_check_sblock(
139 struct xfs_btree_cur *cur,
140 struct xfs_btree_block *block,
141 int level,
142 struct xfs_buf *bp)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700143{
Darrick J. Wong52c732e2017-10-17 21:37:33 -0700144 struct xfs_mount *mp = cur->bc_mp;
Eric Sandeenaf7d20f2017-01-27 23:16:38 -0800145 xfs_btnum_t btnum = cur->bc_btnum;
Darrick J. Wong52c732e2017-10-17 21:37:33 -0700146 int crc = xfs_sb_version_hascrc(&mp->m_sb);
Christoph Hellwigee1a47a2013-04-21 14:53:46 -0500147
Eric Sandeenaf7d20f2017-01-27 23:16:38 -0800148 if (crc) {
Darrick J. Wong52c732e2017-10-17 21:37:33 -0700149 if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid))
150 return __this_address;
151 if (block->bb_u.s.bb_blkno !=
152 cpu_to_be64(bp ? bp->b_bn : XFS_BUF_DADDR_NULL))
153 return __this_address;
Christoph Hellwigee1a47a2013-04-21 14:53:46 -0500154 }
155
Darrick J. Wong52c732e2017-10-17 21:37:33 -0700156 if (be32_to_cpu(block->bb_magic) != xfs_btree_magic(crc, btnum))
157 return __this_address;
158 if (be16_to_cpu(block->bb_level) != level)
159 return __this_address;
160 if (be16_to_cpu(block->bb_numrecs) >
161 cur->bc_ops->get_maxrecs(cur, level))
162 return __this_address;
163 if (block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK) &&
164 !xfs_btree_check_sptr(cur, be32_to_cpu(block->bb_u.s.bb_leftsib),
165 level + 1))
166 return __this_address;
167 if (block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK) &&
168 !xfs_btree_check_sptr(cur, be32_to_cpu(block->bb_u.s.bb_rightsib),
169 level + 1))
170 return __this_address;
Christoph Hellwigee1a47a2013-04-21 14:53:46 -0500171
Darrick J. Wong52c732e2017-10-17 21:37:33 -0700172 return NULL;
173}
174
175/* Check a short btree block header. */
176STATIC int
177xfs_btree_check_sblock(
178 struct xfs_btree_cur *cur,
179 struct xfs_btree_block *block,
180 int level,
181 struct xfs_buf *bp)
182{
183 struct xfs_mount *mp = cur->bc_mp;
184 xfs_failaddr_t fa;
185
186 fa = __xfs_btree_check_sblock(cur, block, level, bp);
187 if (unlikely(XFS_TEST_ERROR(fa != NULL, mp,
Darrick J. Wong9e24cfd2017-06-20 17:54:47 -0700188 XFS_ERRTAG_BTREE_CHECK_SBLOCK))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700189 if (bp)
Christoph Hellwig0b1b2132009-12-14 23:14:59 +0000190 trace_xfs_btree_corrupt(bp, _RET_IP_);
Christoph Hellwigee1a47a2013-04-21 14:53:46 -0500191 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
Dave Chinner24513372014-06-25 14:58:08 +1000192 return -EFSCORRUPTED;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700193 }
194 return 0;
195}
196
197/*
Christoph Hellwiga23f6ef2008-10-30 16:54:53 +1100198 * Debug routine: check that block header is ok.
199 */
200int
201xfs_btree_check_block(
202 struct xfs_btree_cur *cur, /* btree cursor */
203 struct xfs_btree_block *block, /* generic btree block pointer */
204 int level, /* level of the btree block */
205 struct xfs_buf *bp) /* buffer containing block, if any */
206{
Christoph Hellwig7cc95a82008-10-30 17:14:34 +1100207 if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
208 return xfs_btree_check_lblock(cur, block, level, bp);
209 else
210 return xfs_btree_check_sblock(cur, block, level, bp);
Christoph Hellwiga23f6ef2008-10-30 16:54:53 +1100211}
212
Darrick J. Wongf1357612017-10-17 21:37:33 -0700213/* Check that this long pointer is valid and points within the fs. */
214bool
Christoph Hellwiga23f6ef2008-10-30 16:54:53 +1100215xfs_btree_check_lptr(
Darrick J. Wongf1357612017-10-17 21:37:33 -0700216 struct xfs_btree_cur *cur,
217 xfs_fsblock_t fsbno,
218 int level)
Christoph Hellwiga23f6ef2008-10-30 16:54:53 +1100219{
Darrick J. Wongf1357612017-10-17 21:37:33 -0700220 if (level <= 0)
221 return false;
222 return xfs_verify_fsbno(cur->bc_mp, fsbno);
223}
224
225/* Check that this short pointer is valid and points within the AG. */
226bool
227xfs_btree_check_sptr(
228 struct xfs_btree_cur *cur,
229 xfs_agblock_t agbno,
230 int level)
231{
232 if (level <= 0)
233 return false;
234 return xfs_verify_agbno(cur->bc_mp, cur->bc_private.a.agno, agbno);
Christoph Hellwiga23f6ef2008-10-30 16:54:53 +1100235}
236
237/*
Darrick J. Wongf1357612017-10-17 21:37:33 -0700238 * Check that a given (indexed) btree pointer at a certain level of a
239 * btree is valid and doesn't point past where it should.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700240 */
Christoph Hellwig4483eb52017-11-06 11:54:01 -0800241static int
Christoph Hellwiga23f6ef2008-10-30 16:54:53 +1100242xfs_btree_check_ptr(
Darrick J. Wongf1357612017-10-17 21:37:33 -0700243 struct xfs_btree_cur *cur,
244 union xfs_btree_ptr *ptr,
245 int index,
246 int level)
Christoph Hellwiga23f6ef2008-10-30 16:54:53 +1100247{
248 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
Darrick J. Wongf1357612017-10-17 21:37:33 -0700249 XFS_WANT_CORRUPTED_RETURN(cur->bc_mp,
250 xfs_btree_check_lptr(cur,
251 be64_to_cpu((&ptr->l)[index]), level));
Christoph Hellwiga23f6ef2008-10-30 16:54:53 +1100252 } else {
Darrick J. Wongf1357612017-10-17 21:37:33 -0700253 XFS_WANT_CORRUPTED_RETURN(cur->bc_mp,
254 xfs_btree_check_sptr(cur,
255 be32_to_cpu((&ptr->s)[index]), level));
Christoph Hellwiga23f6ef2008-10-30 16:54:53 +1100256 }
Darrick J. Wongf1357612017-10-17 21:37:33 -0700257
258 return 0;
Christoph Hellwiga23f6ef2008-10-30 16:54:53 +1100259}
Darrick J. Wong4cbae4b2018-06-03 21:10:48 -0700260
261#ifdef DEBUG
262# define xfs_btree_debug_check_ptr xfs_btree_check_ptr
263#else
264# define xfs_btree_debug_check_ptr(...) (0)
Lachlan McIlroy24ee0e42008-10-30 17:05:26 +1100265#endif
Christoph Hellwiga23f6ef2008-10-30 16:54:53 +1100266
267/*
Christoph Hellwigee1a47a2013-04-21 14:53:46 -0500268 * Calculate CRC on the whole btree block and stuff it into the
269 * long-form btree header.
270 *
271 * Prior to calculting the CRC, pull the LSN out of the buffer log item and put
Geliang Tangfef4ded2015-10-12 16:02:32 +1100272 * it into the buffer so recovery knows what the last modification was that made
Christoph Hellwigee1a47a2013-04-21 14:53:46 -0500273 * it to disk.
274 */
275void
276xfs_btree_lblock_calc_crc(
277 struct xfs_buf *bp)
278{
279 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
Carlos Maiolinofb1755a2018-01-24 13:38:48 -0800280 struct xfs_buf_log_item *bip = bp->b_log_item;
Christoph Hellwigee1a47a2013-04-21 14:53:46 -0500281
282 if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
283 return;
284 if (bip)
285 block->bb_u.l.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
Eric Sandeenf1dbcd72014-02-27 15:18:23 +1100286 xfs_buf_update_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF);
Christoph Hellwigee1a47a2013-04-21 14:53:46 -0500287}
288
289bool
290xfs_btree_lblock_verify_crc(
291 struct xfs_buf *bp)
292{
Brian Fostera45086e2015-10-12 15:59:25 +1100293 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
294 struct xfs_mount *mp = bp->b_target->bt_mount;
295
296 if (xfs_sb_version_hascrc(&mp->m_sb)) {
297 if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.l.bb_lsn)))
298 return false;
Eric Sandeen51582172014-02-27 15:17:27 +1100299 return xfs_buf_verify_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF);
Brian Fostera45086e2015-10-12 15:59:25 +1100300 }
Eric Sandeen51582172014-02-27 15:17:27 +1100301
Christoph Hellwigee1a47a2013-04-21 14:53:46 -0500302 return true;
303}
304
305/*
306 * Calculate CRC on the whole btree block and stuff it into the
307 * short-form btree header.
308 *
309 * Prior to calculting the CRC, pull the LSN out of the buffer log item and put
Geliang Tangfef4ded2015-10-12 16:02:32 +1100310 * it into the buffer so recovery knows what the last modification was that made
Christoph Hellwigee1a47a2013-04-21 14:53:46 -0500311 * it to disk.
312 */
313void
314xfs_btree_sblock_calc_crc(
315 struct xfs_buf *bp)
316{
317 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
Carlos Maiolinofb1755a2018-01-24 13:38:48 -0800318 struct xfs_buf_log_item *bip = bp->b_log_item;
Christoph Hellwigee1a47a2013-04-21 14:53:46 -0500319
320 if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
321 return;
322 if (bip)
323 block->bb_u.s.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
Eric Sandeenf1dbcd72014-02-27 15:18:23 +1100324 xfs_buf_update_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF);
Christoph Hellwigee1a47a2013-04-21 14:53:46 -0500325}
326
327bool
328xfs_btree_sblock_verify_crc(
329 struct xfs_buf *bp)
330{
Brian Fostera45086e2015-10-12 15:59:25 +1100331 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
332 struct xfs_mount *mp = bp->b_target->bt_mount;
333
334 if (xfs_sb_version_hascrc(&mp->m_sb)) {
335 if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.s.bb_lsn)))
Darrick J. Wonga6a781a2018-01-08 10:51:03 -0800336 return __this_address;
Eric Sandeen51582172014-02-27 15:17:27 +1100337 return xfs_buf_verify_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF);
Brian Fostera45086e2015-10-12 15:59:25 +1100338 }
Eric Sandeen51582172014-02-27 15:17:27 +1100339
Christoph Hellwigee1a47a2013-04-21 14:53:46 -0500340 return true;
341}
342
Christoph Hellwigc46ee8a2016-02-08 14:58:07 +1100343static int
344xfs_btree_free_block(
345 struct xfs_btree_cur *cur,
346 struct xfs_buf *bp)
347{
348 int error;
349
350 error = cur->bc_ops->free_block(cur, bp);
Christoph Hellwigedfd9dd2016-02-08 14:58:07 +1100351 if (!error) {
352 xfs_trans_binval(cur->bc_tp, bp);
Christoph Hellwigc46ee8a2016-02-08 14:58:07 +1100353 XFS_BTREE_STATS_INC(cur, free);
Christoph Hellwigedfd9dd2016-02-08 14:58:07 +1100354 }
Christoph Hellwigc46ee8a2016-02-08 14:58:07 +1100355 return error;
356}
357
Christoph Hellwigee1a47a2013-04-21 14:53:46 -0500358/*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700359 * Delete the btree cursor.
360 */
361void
362xfs_btree_del_cursor(
363 xfs_btree_cur_t *cur, /* btree cursor */
364 int error) /* del because of error */
365{
366 int i; /* btree level */
367
368 /*
369 * Clear the buffer pointers, and release the buffers.
370 * If we're doing this in the face of an error, we
371 * need to make sure to inspect all of the entries
372 * in the bc_bufs array for buffers to be unlocked.
373 * This is because some of the btree code works from
374 * level n down to 0, and if we get an error along
375 * the way we won't have initialized all the entries
376 * down to 0.
377 */
378 for (i = 0; i < cur->bc_nlevels; i++) {
379 if (cur->bc_bufs[i])
Christoph Hellwigc0e59e12010-09-07 23:34:07 +0000380 xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[i]);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700381 else if (!error)
382 break;
383 }
384 /*
385 * Can't free a bmap cursor without having dealt with the
386 * allocated indirect blocks' accounting.
387 */
388 ASSERT(cur->bc_btnum != XFS_BTNUM_BMAP ||
389 cur->bc_private.b.allocated == 0);
390 /*
391 * Free the cursor.
392 */
393 kmem_zone_free(xfs_btree_cur_zone, cur);
394}
395
396/*
397 * Duplicate the btree cursor.
398 * Allocate a new one, copy the record, re-get the buffers.
399 */
400int /* error */
401xfs_btree_dup_cursor(
402 xfs_btree_cur_t *cur, /* input cursor */
403 xfs_btree_cur_t **ncur) /* output cursor */
404{
405 xfs_buf_t *bp; /* btree block's buffer pointer */
406 int error; /* error return value */
407 int i; /* level number of btree block */
408 xfs_mount_t *mp; /* mount structure for filesystem */
409 xfs_btree_cur_t *new; /* new cursor value */
410 xfs_trans_t *tp; /* transaction pointer, can be NULL */
411
412 tp = cur->bc_tp;
413 mp = cur->bc_mp;
Christoph Hellwig561f7d12008-10-30 16:53:59 +1100414
Linus Torvalds1da177e2005-04-16 15:20:36 -0700415 /*
416 * Allocate a new cursor like the old one.
417 */
Christoph Hellwig561f7d12008-10-30 16:53:59 +1100418 new = cur->bc_ops->dup_cursor(cur);
419
Linus Torvalds1da177e2005-04-16 15:20:36 -0700420 /*
421 * Copy the record currently in the cursor.
422 */
423 new->bc_rec = cur->bc_rec;
Christoph Hellwig561f7d12008-10-30 16:53:59 +1100424
Linus Torvalds1da177e2005-04-16 15:20:36 -0700425 /*
426 * For each level current, re-get the buffer and copy the ptr value.
427 */
428 for (i = 0; i < new->bc_nlevels; i++) {
429 new->bc_ptrs[i] = cur->bc_ptrs[i];
430 new->bc_ra[i] = cur->bc_ra[i];
Dave Chinnerc3f8fc72012-11-12 22:54:01 +1100431 bp = cur->bc_bufs[i];
432 if (bp) {
433 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
434 XFS_BUF_ADDR(bp), mp->m_bsize,
Dave Chinner3d3e6f62012-11-12 22:54:08 +1100435 0, &bp,
Dave Chinner1813dd62012-11-14 17:54:40 +1100436 cur->bc_ops->buf_ops);
Dave Chinnerc3f8fc72012-11-12 22:54:01 +1100437 if (error) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700438 xfs_btree_del_cursor(new, error);
439 *ncur = NULL;
440 return error;
441 }
Christoph Hellwigee1a47a2013-04-21 14:53:46 -0500442 }
443 new->bc_bufs[i] = bp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700444 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700445 *ncur = new;
446 return 0;
447}
448
449/*
Christoph Hellwig65f1eae2008-10-30 16:55:34 +1100450 * XFS btree block layout and addressing:
451 *
452 * There are two types of blocks in the btree: leaf and non-leaf blocks.
453 *
454 * The leaf record start with a header then followed by records containing
455 * the values. A non-leaf block also starts with the same header, and
456 * then first contains lookup keys followed by an equal number of pointers
457 * to the btree blocks at the previous level.
458 *
459 * +--------+-------+-------+-------+-------+-------+-------+
460 * Leaf: | header | rec 1 | rec 2 | rec 3 | rec 4 | rec 5 | rec N |
461 * +--------+-------+-------+-------+-------+-------+-------+
462 *
463 * +--------+-------+-------+-------+-------+-------+-------+
464 * Non-Leaf: | header | key 1 | key 2 | key N | ptr 1 | ptr 2 | ptr N |
465 * +--------+-------+-------+-------+-------+-------+-------+
466 *
467 * The header is called struct xfs_btree_block for reasons better left unknown
468 * and comes in different versions for short (32bit) and long (64bit) block
469 * pointers. The record and key structures are defined by the btree instances
470 * and opaque to the btree core. The block pointers are simple disk endian
471 * integers, available in a short (32bit) and long (64bit) variant.
472 *
473 * The helpers below calculate the offset of a given record, key or pointer
474 * into a btree block (xfs_btree_*_offset) or return a pointer to the given
475 * record, key or pointer (xfs_btree_*_addr). Note that all addressing
476 * inside the btree block is done using indices starting at one, not zero!
Darrick J. Wong2c813ad2016-08-03 11:08:36 +1000477 *
478 * If XFS_BTREE_OVERLAPPING is set, then this btree supports keys containing
479 * overlapping intervals. In such a tree, records are still sorted lowest to
480 * highest and indexed by the smallest key value that refers to the record.
481 * However, nodes are different: each pointer has two associated keys -- one
482 * indexing the lowest key available in the block(s) below (the same behavior
483 * as the key in a regular btree) and another indexing the highest key
484 * available in the block(s) below. Because records are /not/ sorted by the
485 * highest key, all leaf block updates require us to compute the highest key
486 * that matches any record in the leaf and to recursively update the high keys
487 * in the nodes going further up in the tree, if necessary. Nodes look like
488 * this:
489 *
490 * +--------+-----+-----+-----+-----+-----+-------+-------+-----+
491 * Non-Leaf: | header | lo1 | hi1 | lo2 | hi2 | ... | ptr 1 | ptr 2 | ... |
492 * +--------+-----+-----+-----+-----+-----+-------+-------+-----+
493 *
494 * To perform an interval query on an overlapped tree, perform the usual
495 * depth-first search and use the low and high keys to decide if we can skip
496 * that particular node. If a leaf node is reached, return the records that
497 * intersect the interval. Note that an interval query may return numerous
498 * entries. For a non-overlapped tree, simply search for the record associated
499 * with the lowest key and iterate forward until a non-matching record is
500 * found. Section 14.3 ("Interval Trees") of _Introduction to Algorithms_ by
501 * Cormen, Leiserson, Rivest, and Stein (2nd or 3rd ed. only) discuss this in
502 * more detail.
503 *
504 * Why do we care about overlapping intervals? Let's say you have a bunch of
505 * reverse mapping records on a reflink filesystem:
506 *
507 * 1: +- file A startblock B offset C length D -----------+
508 * 2: +- file E startblock F offset G length H --------------+
509 * 3: +- file I startblock F offset J length K --+
510 * 4: +- file L... --+
511 *
512 * Now say we want to map block (B+D) into file A at offset (C+D). Ideally,
513 * we'd simply increment the length of record 1. But how do we find the record
514 * that ends at (B+D-1) (i.e. record 1)? A LE lookup of (B+D-1) would return
515 * record 3 because the keys are ordered first by startblock. An interval
516 * query would return records 1 and 2 because they both overlap (B+D-1), and
517 * from that we can pick out record 1 as the appropriate left neighbor.
518 *
519 * In the non-overlapped case you can do a LE lookup and decrement the cursor
520 * because a record's interval must end before the next record.
Christoph Hellwig65f1eae2008-10-30 16:55:34 +1100521 */
522
523/*
524 * Return size of the btree block header for this btree instance.
525 */
526static inline size_t xfs_btree_block_len(struct xfs_btree_cur *cur)
527{
Christoph Hellwigee1a47a2013-04-21 14:53:46 -0500528 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
529 if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS)
530 return XFS_BTREE_LBLOCK_CRC_LEN;
531 return XFS_BTREE_LBLOCK_LEN;
532 }
533 if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS)
534 return XFS_BTREE_SBLOCK_CRC_LEN;
535 return XFS_BTREE_SBLOCK_LEN;
Christoph Hellwig65f1eae2008-10-30 16:55:34 +1100536}
537
538/*
539 * Return size of btree block pointers for this btree instance.
540 */
541static inline size_t xfs_btree_ptr_len(struct xfs_btree_cur *cur)
542{
543 return (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
544 sizeof(__be64) : sizeof(__be32);
545}
546
547/*
548 * Calculate offset of the n-th record in a btree block.
549 */
550STATIC size_t
551xfs_btree_rec_offset(
552 struct xfs_btree_cur *cur,
553 int n)
554{
555 return xfs_btree_block_len(cur) +
556 (n - 1) * cur->bc_ops->rec_len;
557}
558
559/*
560 * Calculate offset of the n-th key in a btree block.
561 */
562STATIC size_t
563xfs_btree_key_offset(
564 struct xfs_btree_cur *cur,
565 int n)
566{
567 return xfs_btree_block_len(cur) +
568 (n - 1) * cur->bc_ops->key_len;
569}
570
571/*
Darrick J. Wong2c813ad2016-08-03 11:08:36 +1000572 * Calculate offset of the n-th high key in a btree block.
573 */
574STATIC size_t
575xfs_btree_high_key_offset(
576 struct xfs_btree_cur *cur,
577 int n)
578{
579 return xfs_btree_block_len(cur) +
580 (n - 1) * cur->bc_ops->key_len + (cur->bc_ops->key_len / 2);
581}
582
583/*
Christoph Hellwig65f1eae2008-10-30 16:55:34 +1100584 * Calculate offset of the n-th block pointer in a btree block.
585 */
586STATIC size_t
587xfs_btree_ptr_offset(
588 struct xfs_btree_cur *cur,
589 int n,
590 int level)
591{
592 return xfs_btree_block_len(cur) +
593 cur->bc_ops->get_maxrecs(cur, level) * cur->bc_ops->key_len +
594 (n - 1) * xfs_btree_ptr_len(cur);
595}
596
597/*
598 * Return a pointer to the n-th record in the btree block.
599 */
Darrick J. Wong26788092017-06-16 11:00:07 -0700600union xfs_btree_rec *
Christoph Hellwig65f1eae2008-10-30 16:55:34 +1100601xfs_btree_rec_addr(
602 struct xfs_btree_cur *cur,
603 int n,
604 struct xfs_btree_block *block)
605{
606 return (union xfs_btree_rec *)
607 ((char *)block + xfs_btree_rec_offset(cur, n));
608}
609
610/*
611 * Return a pointer to the n-th key in the btree block.
612 */
Darrick J. Wong26788092017-06-16 11:00:07 -0700613union xfs_btree_key *
Christoph Hellwig65f1eae2008-10-30 16:55:34 +1100614xfs_btree_key_addr(
615 struct xfs_btree_cur *cur,
616 int n,
617 struct xfs_btree_block *block)
618{
619 return (union xfs_btree_key *)
620 ((char *)block + xfs_btree_key_offset(cur, n));
621}
622
623/*
Darrick J. Wong2c813ad2016-08-03 11:08:36 +1000624 * Return a pointer to the n-th high key in the btree block.
625 */
Darrick J. Wong26788092017-06-16 11:00:07 -0700626union xfs_btree_key *
Darrick J. Wong2c813ad2016-08-03 11:08:36 +1000627xfs_btree_high_key_addr(
628 struct xfs_btree_cur *cur,
629 int n,
630 struct xfs_btree_block *block)
631{
632 return (union xfs_btree_key *)
633 ((char *)block + xfs_btree_high_key_offset(cur, n));
634}
635
636/*
Christoph Hellwig65f1eae2008-10-30 16:55:34 +1100637 * Return a pointer to the n-th block pointer in the btree block.
638 */
Darrick J. Wong26788092017-06-16 11:00:07 -0700639union xfs_btree_ptr *
Christoph Hellwig65f1eae2008-10-30 16:55:34 +1100640xfs_btree_ptr_addr(
641 struct xfs_btree_cur *cur,
642 int n,
643 struct xfs_btree_block *block)
644{
645 int level = xfs_btree_get_level(block);
646
647 ASSERT(block->bb_level != 0);
648
649 return (union xfs_btree_ptr *)
650 ((char *)block + xfs_btree_ptr_offset(cur, n, level));
651}
652
653/*
Zhi Yong Wu1cb93862013-08-07 10:11:05 +0000654 * Get the root block which is stored in the inode.
Christoph Hellwig8186e512008-10-30 16:54:22 +1100655 *
656 * For now this btree implementation assumes the btree root is always
657 * stored in the if_broot field of an inode fork.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700658 */
Christoph Hellwig8186e512008-10-30 16:54:22 +1100659STATIC struct xfs_btree_block *
660xfs_btree_get_iroot(
Kaho Ngfbfb24b2016-07-20 10:37:50 +1000661 struct xfs_btree_cur *cur)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700662{
Kaho Ngfbfb24b2016-07-20 10:37:50 +1000663 struct xfs_ifork *ifp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700664
Kaho Ngfbfb24b2016-07-20 10:37:50 +1000665 ifp = XFS_IFORK_PTR(cur->bc_private.b.ip, cur->bc_private.b.whichfork);
666 return (struct xfs_btree_block *)ifp->if_broot;
Christoph Hellwig8186e512008-10-30 16:54:22 +1100667}
668
669/*
670 * Retrieve the block pointer from the cursor at the given level.
671 * This may be an inode btree root or from a buffer.
672 */
Darrick J. Wong26788092017-06-16 11:00:07 -0700673struct xfs_btree_block * /* generic btree block pointer */
Christoph Hellwig8186e512008-10-30 16:54:22 +1100674xfs_btree_get_block(
675 struct xfs_btree_cur *cur, /* btree cursor */
676 int level, /* level in btree */
677 struct xfs_buf **bpp) /* buffer containing the block */
678{
679 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
680 (level == cur->bc_nlevels - 1)) {
681 *bpp = NULL;
682 return xfs_btree_get_iroot(cur);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700683 }
Christoph Hellwig8186e512008-10-30 16:54:22 +1100684
685 *bpp = cur->bc_bufs[level];
686 return XFS_BUF_TO_BLOCK(*bpp);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700687}
688
689/*
690 * Get a buffer for the block, return it with no data read.
691 * Long-form addressing.
692 */
693xfs_buf_t * /* buffer for fsbno */
694xfs_btree_get_bufl(
695 xfs_mount_t *mp, /* file system mount point */
696 xfs_trans_t *tp, /* transaction pointer */
697 xfs_fsblock_t fsbno, /* file system block number */
698 uint lock) /* lock flags for get_buf */
699{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700700 xfs_daddr_t d; /* real disk block address */
701
702 ASSERT(fsbno != NULLFSBLOCK);
703 d = XFS_FSB_TO_DADDR(mp, fsbno);
Dave Chinner36de9552014-06-06 16:02:12 +1000704 return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700705}
706
707/*
708 * Get a buffer for the block, return it with no data read.
709 * Short-form addressing.
710 */
711xfs_buf_t * /* buffer for agno/agbno */
712xfs_btree_get_bufs(
713 xfs_mount_t *mp, /* file system mount point */
714 xfs_trans_t *tp, /* transaction pointer */
715 xfs_agnumber_t agno, /* allocation group number */
716 xfs_agblock_t agbno, /* allocation group block number */
717 uint lock) /* lock flags for get_buf */
718{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700719 xfs_daddr_t d; /* real disk block address */
720
721 ASSERT(agno != NULLAGNUMBER);
722 ASSERT(agbno != NULLAGBLOCK);
723 d = XFS_AGB_TO_DADDR(mp, agno, agbno);
Dave Chinner36de9552014-06-06 16:02:12 +1000724 return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700725}
726
727/*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700728 * Check for the cursor referring to the last block at the given level.
729 */
730int /* 1=is last block, 0=not last block */
731xfs_btree_islastblock(
732 xfs_btree_cur_t *cur, /* btree cursor */
733 int level) /* level to check */
734{
Christoph Hellwig7cc95a82008-10-30 17:14:34 +1100735 struct xfs_btree_block *block; /* generic btree block pointer */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700736 xfs_buf_t *bp; /* buffer containing block */
737
738 block = xfs_btree_get_block(cur, level, &bp);
739 xfs_btree_check_block(cur, block, level, bp);
Christoph Hellwige99ab902008-10-30 16:54:33 +1100740 if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
Christoph Hellwigd5cf09b2014-07-30 09:12:05 +1000741 return block->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700742 else
Christoph Hellwig69ef9212011-07-08 14:36:05 +0200743 return block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700744}
745
746/*
Christoph Hellwigcdcf43332008-08-13 16:23:50 +1000747 * Change the cursor to point to the first record at the given level.
748 * Other levels are unaffected.
749 */
Christoph Hellwig3cc75242008-10-30 16:58:41 +1100750STATIC int /* success=1, failure=0 */
Christoph Hellwigcdcf43332008-08-13 16:23:50 +1000751xfs_btree_firstrec(
752 xfs_btree_cur_t *cur, /* btree cursor */
753 int level) /* level to change */
754{
Christoph Hellwig7cc95a82008-10-30 17:14:34 +1100755 struct xfs_btree_block *block; /* generic btree block pointer */
Christoph Hellwigcdcf43332008-08-13 16:23:50 +1000756 xfs_buf_t *bp; /* buffer containing block */
757
758 /*
759 * Get the block pointer for this level.
760 */
761 block = xfs_btree_get_block(cur, level, &bp);
Darrick J. Wong1e86eab2017-07-17 14:30:45 -0700762 if (xfs_btree_check_block(cur, block, level, bp))
763 return 0;
Christoph Hellwigcdcf43332008-08-13 16:23:50 +1000764 /*
765 * It's empty, there is no such record.
766 */
Christoph Hellwigf2277f02008-10-30 16:53:47 +1100767 if (!block->bb_numrecs)
Christoph Hellwigcdcf43332008-08-13 16:23:50 +1000768 return 0;
769 /*
770 * Set the ptr value to 1, that's the first record/key.
771 */
772 cur->bc_ptrs[level] = 1;
773 return 1;
774}
775
776/*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700777 * Change the cursor to point to the last record in the current block
778 * at the given level. Other levels are unaffected.
779 */
Christoph Hellwig3cc75242008-10-30 16:58:41 +1100780STATIC int /* success=1, failure=0 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700781xfs_btree_lastrec(
782 xfs_btree_cur_t *cur, /* btree cursor */
783 int level) /* level to change */
784{
Christoph Hellwig7cc95a82008-10-30 17:14:34 +1100785 struct xfs_btree_block *block; /* generic btree block pointer */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700786 xfs_buf_t *bp; /* buffer containing block */
787
788 /*
789 * Get the block pointer for this level.
790 */
791 block = xfs_btree_get_block(cur, level, &bp);
Darrick J. Wong1e86eab2017-07-17 14:30:45 -0700792 if (xfs_btree_check_block(cur, block, level, bp))
793 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700794 /*
795 * It's empty, there is no such record.
796 */
Christoph Hellwigf2277f02008-10-30 16:53:47 +1100797 if (!block->bb_numrecs)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700798 return 0;
799 /*
800 * Set the ptr value to numrecs, that's the last record/key.
801 */
Christoph Hellwigf2277f02008-10-30 16:53:47 +1100802 cur->bc_ptrs[level] = be16_to_cpu(block->bb_numrecs);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700803 return 1;
804}
805
806/*
807 * Compute first and last byte offsets for the fields given.
808 * Interprets the offsets table, which contains struct field offsets.
809 */
810void
811xfs_btree_offsets(
Darrick J. Wongc8ce5402017-06-16 11:00:05 -0700812 int64_t fields, /* bitmask of fields */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700813 const short *offsets, /* table of field offsets */
814 int nbits, /* number of bits to inspect */
815 int *first, /* output: first byte offset */
816 int *last) /* output: last byte offset */
817{
818 int i; /* current bit number */
Darrick J. Wongc8ce5402017-06-16 11:00:05 -0700819 int64_t imask; /* mask for current bit number */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700820
821 ASSERT(fields != 0);
822 /*
823 * Find the lowest bit, so the first byte offset.
824 */
825 for (i = 0, imask = 1LL; ; i++, imask <<= 1) {
826 if (imask & fields) {
827 *first = offsets[i];
828 break;
829 }
830 }
831 /*
832 * Find the highest bit, so the last byte offset.
833 */
834 for (i = nbits - 1, imask = 1LL << i; ; i--, imask >>= 1) {
835 if (imask & fields) {
836 *last = offsets[i + 1] - 1;
837 break;
838 }
839 }
840}
841
842/*
843 * Get a buffer for the block, return it read in.
844 * Long-form addressing.
845 */
Dave Chinner3d3e6f62012-11-12 22:54:08 +1100846int
Linus Torvalds1da177e2005-04-16 15:20:36 -0700847xfs_btree_read_bufl(
Dave Chinner3d3e6f62012-11-12 22:54:08 +1100848 struct xfs_mount *mp, /* file system mount point */
849 struct xfs_trans *tp, /* transaction pointer */
850 xfs_fsblock_t fsbno, /* file system block number */
851 uint lock, /* lock flags for read_buf */
852 struct xfs_buf **bpp, /* buffer for fsbno */
853 int refval, /* ref count value for buffer */
Dave Chinner1813dd62012-11-14 17:54:40 +1100854 const struct xfs_buf_ops *ops)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700855{
Dave Chinner3d3e6f62012-11-12 22:54:08 +1100856 struct xfs_buf *bp; /* return value */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700857 xfs_daddr_t d; /* real disk block address */
Dave Chinner3d3e6f62012-11-12 22:54:08 +1100858 int error;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700859
Darrick J. Wong59f6fec2018-01-08 10:51:00 -0800860 if (!xfs_verify_fsbno(mp, fsbno))
Darrick J. Wongd5a91ba2017-02-02 15:13:58 -0800861 return -EFSCORRUPTED;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700862 d = XFS_FSB_TO_DADDR(mp, fsbno);
Dave Chinnerc3f8fc72012-11-12 22:54:01 +1100863 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d,
Dave Chinner1813dd62012-11-14 17:54:40 +1100864 mp->m_bsize, lock, &bp, ops);
Dave Chinnerc3f8fc72012-11-12 22:54:01 +1100865 if (error)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700866 return error;
Dave Chinner821eb212010-12-02 16:31:13 +1100867 if (bp)
Christoph Hellwig38f23232011-10-10 16:52:45 +0000868 xfs_buf_set_ref(bp, refval);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700869 *bpp = bp;
870 return 0;
871}
872
873/*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700874 * Read-ahead the block, don't wait for it, don't return a buffer.
875 * Long-form addressing.
876 */
877/* ARGSUSED */
878void
879xfs_btree_reada_bufl(
Dave Chinner3d3e6f62012-11-12 22:54:08 +1100880 struct xfs_mount *mp, /* file system mount point */
881 xfs_fsblock_t fsbno, /* file system block number */
882 xfs_extlen_t count, /* count of filesystem blocks */
Dave Chinner1813dd62012-11-14 17:54:40 +1100883 const struct xfs_buf_ops *ops)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700884{
885 xfs_daddr_t d;
886
887 ASSERT(fsbno != NULLFSBLOCK);
888 d = XFS_FSB_TO_DADDR(mp, fsbno);
Dave Chinner1813dd62012-11-14 17:54:40 +1100889 xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count, ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700890}
891
892/*
893 * Read-ahead the block, don't wait for it, don't return a buffer.
894 * Short-form addressing.
895 */
896/* ARGSUSED */
897void
898xfs_btree_reada_bufs(
Dave Chinner3d3e6f62012-11-12 22:54:08 +1100899 struct xfs_mount *mp, /* file system mount point */
900 xfs_agnumber_t agno, /* allocation group number */
901 xfs_agblock_t agbno, /* allocation group block number */
902 xfs_extlen_t count, /* count of filesystem blocks */
Dave Chinner1813dd62012-11-14 17:54:40 +1100903 const struct xfs_buf_ops *ops)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700904{
905 xfs_daddr_t d;
906
907 ASSERT(agno != NULLAGNUMBER);
908 ASSERT(agbno != NULLAGBLOCK);
909 d = XFS_AGB_TO_DADDR(mp, agno, agbno);
Dave Chinner1813dd62012-11-14 17:54:40 +1100910 xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count, ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700911}
912
Christoph Hellwigb524bfe2008-10-30 16:54:43 +1100913STATIC int
914xfs_btree_readahead_lblock(
915 struct xfs_btree_cur *cur,
916 int lr,
917 struct xfs_btree_block *block)
918{
919 int rval = 0;
Christoph Hellwigd5cf09b2014-07-30 09:12:05 +1000920 xfs_fsblock_t left = be64_to_cpu(block->bb_u.l.bb_leftsib);
921 xfs_fsblock_t right = be64_to_cpu(block->bb_u.l.bb_rightsib);
Christoph Hellwigb524bfe2008-10-30 16:54:43 +1100922
Christoph Hellwigd5cf09b2014-07-30 09:12:05 +1000923 if ((lr & XFS_BTCUR_LEFTRA) && left != NULLFSBLOCK) {
Dave Chinner3d3e6f62012-11-12 22:54:08 +1100924 xfs_btree_reada_bufl(cur->bc_mp, left, 1,
Dave Chinner1813dd62012-11-14 17:54:40 +1100925 cur->bc_ops->buf_ops);
Christoph Hellwigb524bfe2008-10-30 16:54:43 +1100926 rval++;
927 }
928
Christoph Hellwigd5cf09b2014-07-30 09:12:05 +1000929 if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLFSBLOCK) {
Dave Chinner3d3e6f62012-11-12 22:54:08 +1100930 xfs_btree_reada_bufl(cur->bc_mp, right, 1,
Dave Chinner1813dd62012-11-14 17:54:40 +1100931 cur->bc_ops->buf_ops);
Christoph Hellwigb524bfe2008-10-30 16:54:43 +1100932 rval++;
933 }
934
935 return rval;
936}
937
938STATIC int
939xfs_btree_readahead_sblock(
940 struct xfs_btree_cur *cur,
941 int lr,
942 struct xfs_btree_block *block)
943{
944 int rval = 0;
945 xfs_agblock_t left = be32_to_cpu(block->bb_u.s.bb_leftsib);
946 xfs_agblock_t right = be32_to_cpu(block->bb_u.s.bb_rightsib);
947
948
949 if ((lr & XFS_BTCUR_LEFTRA) && left != NULLAGBLOCK) {
950 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
Dave Chinner1813dd62012-11-14 17:54:40 +1100951 left, 1, cur->bc_ops->buf_ops);
Christoph Hellwigb524bfe2008-10-30 16:54:43 +1100952 rval++;
953 }
954
955 if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLAGBLOCK) {
956 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
Dave Chinner1813dd62012-11-14 17:54:40 +1100957 right, 1, cur->bc_ops->buf_ops);
Christoph Hellwigb524bfe2008-10-30 16:54:43 +1100958 rval++;
959 }
960
961 return rval;
962}
963
Linus Torvalds1da177e2005-04-16 15:20:36 -0700964/*
965 * Read-ahead btree blocks, at the given level.
966 * Bits in lr are set from XFS_BTCUR_{LEFT,RIGHT}RA.
967 */
Christoph Hellwig3cc75242008-10-30 16:58:41 +1100968STATIC int
Christoph Hellwigb524bfe2008-10-30 16:54:43 +1100969xfs_btree_readahead(
970 struct xfs_btree_cur *cur, /* btree cursor */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700971 int lev, /* level in btree */
972 int lr) /* left/right bits */
973{
Christoph Hellwigb524bfe2008-10-30 16:54:43 +1100974 struct xfs_btree_block *block;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700975
Christoph Hellwigb524bfe2008-10-30 16:54:43 +1100976 /*
977 * No readahead needed if we are at the root level and the
978 * btree root is stored in the inode.
979 */
980 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
981 (lev == cur->bc_nlevels - 1))
982 return 0;
983
984 if ((cur->bc_ra[lev] | lr) == cur->bc_ra[lev])
985 return 0;
986
Linus Torvalds1da177e2005-04-16 15:20:36 -0700987 cur->bc_ra[lev] |= lr;
Christoph Hellwigb524bfe2008-10-30 16:54:43 +1100988 block = XFS_BUF_TO_BLOCK(cur->bc_bufs[lev]);
989
990 if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
991 return xfs_btree_readahead_lblock(cur, lr, block);
992 return xfs_btree_readahead_sblock(cur, lr, block);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700993}
994
Dave Chinner21b5c972013-08-30 10:23:44 +1000995STATIC xfs_daddr_t
996xfs_btree_ptr_to_daddr(
997 struct xfs_btree_cur *cur,
998 union xfs_btree_ptr *ptr)
999{
1000 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
Christoph Hellwigd5cf09b2014-07-30 09:12:05 +10001001 ASSERT(ptr->l != cpu_to_be64(NULLFSBLOCK));
Dave Chinner21b5c972013-08-30 10:23:44 +10001002
1003 return XFS_FSB_TO_DADDR(cur->bc_mp, be64_to_cpu(ptr->l));
1004 } else {
1005 ASSERT(cur->bc_private.a.agno != NULLAGNUMBER);
1006 ASSERT(ptr->s != cpu_to_be32(NULLAGBLOCK));
1007
1008 return XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_private.a.agno,
1009 be32_to_cpu(ptr->s));
1010 }
1011}
1012
1013/*
1014 * Readahead @count btree blocks at the given @ptr location.
1015 *
1016 * We don't need to care about long or short form btrees here as we have a
1017 * method of converting the ptr directly to a daddr available to us.
1018 */
1019STATIC void
1020xfs_btree_readahead_ptr(
1021 struct xfs_btree_cur *cur,
1022 union xfs_btree_ptr *ptr,
1023 xfs_extlen_t count)
1024{
1025 xfs_buf_readahead(cur->bc_mp->m_ddev_targp,
1026 xfs_btree_ptr_to_daddr(cur, ptr),
1027 cur->bc_mp->m_bsize * count, cur->bc_ops->buf_ops);
1028}
1029
Linus Torvalds1da177e2005-04-16 15:20:36 -07001030/*
1031 * Set the buffer for level "lev" in the cursor to bp, releasing
1032 * any previous buffer.
1033 */
Christoph Hellwigc0e59e12010-09-07 23:34:07 +00001034STATIC void
Linus Torvalds1da177e2005-04-16 15:20:36 -07001035xfs_btree_setbuf(
1036 xfs_btree_cur_t *cur, /* btree cursor */
1037 int lev, /* level in btree */
1038 xfs_buf_t *bp) /* new buffer to set */
1039{
Christoph Hellwig7cc95a82008-10-30 17:14:34 +11001040 struct xfs_btree_block *b; /* btree block */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001041
Christoph Hellwigc0e59e12010-09-07 23:34:07 +00001042 if (cur->bc_bufs[lev])
1043 xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[lev]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001044 cur->bc_bufs[lev] = bp;
1045 cur->bc_ra[lev] = 0;
Christoph Hellwigc0e59e12010-09-07 23:34:07 +00001046
Linus Torvalds1da177e2005-04-16 15:20:36 -07001047 b = XFS_BUF_TO_BLOCK(bp);
Christoph Hellwige99ab902008-10-30 16:54:33 +11001048 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
Christoph Hellwigd5cf09b2014-07-30 09:12:05 +10001049 if (b->bb_u.l.bb_leftsib == cpu_to_be64(NULLFSBLOCK))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001050 cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
Christoph Hellwigd5cf09b2014-07-30 09:12:05 +10001051 if (b->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001052 cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
1053 } else {
Christoph Hellwig69ef9212011-07-08 14:36:05 +02001054 if (b->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001055 cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
Christoph Hellwig69ef9212011-07-08 14:36:05 +02001056 if (b->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001057 cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
1058 }
1059}
Christoph Hellwig637aa502008-10-30 16:55:45 +11001060
Darrick J. Wongcc3e0942017-10-17 21:37:37 -07001061bool
Christoph Hellwig637aa502008-10-30 16:55:45 +11001062xfs_btree_ptr_is_null(
1063 struct xfs_btree_cur *cur,
1064 union xfs_btree_ptr *ptr)
1065{
1066 if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
Christoph Hellwigd5cf09b2014-07-30 09:12:05 +10001067 return ptr->l == cpu_to_be64(NULLFSBLOCK);
Christoph Hellwig637aa502008-10-30 16:55:45 +11001068 else
Christoph Hellwig69ef9212011-07-08 14:36:05 +02001069 return ptr->s == cpu_to_be32(NULLAGBLOCK);
Christoph Hellwig637aa502008-10-30 16:55:45 +11001070}
1071
Christoph Hellwig4b22a572008-10-30 16:57:40 +11001072STATIC void
1073xfs_btree_set_ptr_null(
1074 struct xfs_btree_cur *cur,
1075 union xfs_btree_ptr *ptr)
1076{
1077 if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
Christoph Hellwigd5cf09b2014-07-30 09:12:05 +10001078 ptr->l = cpu_to_be64(NULLFSBLOCK);
Christoph Hellwig4b22a572008-10-30 16:57:40 +11001079 else
1080 ptr->s = cpu_to_be32(NULLAGBLOCK);
1081}
1082
Christoph Hellwig637aa502008-10-30 16:55:45 +11001083/*
1084 * Get/set/init sibling pointers
1085 */
Darrick J. Wongcc3e0942017-10-17 21:37:37 -07001086void
Christoph Hellwig637aa502008-10-30 16:55:45 +11001087xfs_btree_get_sibling(
1088 struct xfs_btree_cur *cur,
1089 struct xfs_btree_block *block,
1090 union xfs_btree_ptr *ptr,
1091 int lr)
1092{
1093 ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB);
1094
1095 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
1096 if (lr == XFS_BB_RIGHTSIB)
1097 ptr->l = block->bb_u.l.bb_rightsib;
1098 else
1099 ptr->l = block->bb_u.l.bb_leftsib;
1100 } else {
1101 if (lr == XFS_BB_RIGHTSIB)
1102 ptr->s = block->bb_u.s.bb_rightsib;
1103 else
1104 ptr->s = block->bb_u.s.bb_leftsib;
1105 }
1106}
1107
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11001108STATIC void
1109xfs_btree_set_sibling(
1110 struct xfs_btree_cur *cur,
1111 struct xfs_btree_block *block,
1112 union xfs_btree_ptr *ptr,
1113 int lr)
1114{
1115 ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB);
1116
1117 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
1118 if (lr == XFS_BB_RIGHTSIB)
1119 block->bb_u.l.bb_rightsib = ptr->l;
1120 else
1121 block->bb_u.l.bb_leftsib = ptr->l;
1122 } else {
1123 if (lr == XFS_BB_RIGHTSIB)
1124 block->bb_u.s.bb_rightsib = ptr->s;
1125 else
1126 block->bb_u.s.bb_leftsib = ptr->s;
1127 }
1128}
1129
Dave Chinnerb64f3a32012-11-13 16:40:27 -06001130void
Christoph Hellwigee1a47a2013-04-21 14:53:46 -05001131xfs_btree_init_block_int(
1132 struct xfs_mount *mp,
1133 struct xfs_btree_block *buf,
1134 xfs_daddr_t blkno,
Eric Sandeenb6f41e42017-01-27 23:16:39 -08001135 xfs_btnum_t btnum,
Christoph Hellwigee1a47a2013-04-21 14:53:46 -05001136 __u16 level,
1137 __u16 numrecs,
1138 __u64 owner,
1139 unsigned int flags)
1140{
Eric Sandeenf88ae462017-01-27 23:16:37 -08001141 int crc = xfs_sb_version_hascrc(&mp->m_sb);
Eric Sandeenb6f41e42017-01-27 23:16:39 -08001142 __u32 magic = xfs_btree_magic(crc, btnum);
Eric Sandeenf88ae462017-01-27 23:16:37 -08001143
Christoph Hellwigee1a47a2013-04-21 14:53:46 -05001144 buf->bb_magic = cpu_to_be32(magic);
1145 buf->bb_level = cpu_to_be16(level);
1146 buf->bb_numrecs = cpu_to_be16(numrecs);
1147
1148 if (flags & XFS_BTREE_LONG_PTRS) {
Christoph Hellwigd5cf09b2014-07-30 09:12:05 +10001149 buf->bb_u.l.bb_leftsib = cpu_to_be64(NULLFSBLOCK);
1150 buf->bb_u.l.bb_rightsib = cpu_to_be64(NULLFSBLOCK);
Eric Sandeenf88ae462017-01-27 23:16:37 -08001151 if (crc) {
Christoph Hellwigee1a47a2013-04-21 14:53:46 -05001152 buf->bb_u.l.bb_blkno = cpu_to_be64(blkno);
1153 buf->bb_u.l.bb_owner = cpu_to_be64(owner);
Eric Sandeence748ea2015-07-29 11:53:31 +10001154 uuid_copy(&buf->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid);
Christoph Hellwigee1a47a2013-04-21 14:53:46 -05001155 buf->bb_u.l.bb_pad = 0;
Dave Chinnerb58fa552013-08-28 21:22:46 +10001156 buf->bb_u.l.bb_lsn = 0;
Christoph Hellwigee1a47a2013-04-21 14:53:46 -05001157 }
1158 } else {
1159 /* owner is a 32 bit value on short blocks */
1160 __u32 __owner = (__u32)owner;
1161
1162 buf->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK);
1163 buf->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK);
Eric Sandeenf88ae462017-01-27 23:16:37 -08001164 if (crc) {
Christoph Hellwigee1a47a2013-04-21 14:53:46 -05001165 buf->bb_u.s.bb_blkno = cpu_to_be64(blkno);
1166 buf->bb_u.s.bb_owner = cpu_to_be32(__owner);
Eric Sandeence748ea2015-07-29 11:53:31 +10001167 uuid_copy(&buf->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid);
Dave Chinnerb58fa552013-08-28 21:22:46 +10001168 buf->bb_u.s.bb_lsn = 0;
Christoph Hellwigee1a47a2013-04-21 14:53:46 -05001169 }
1170 }
1171}
1172
1173void
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11001174xfs_btree_init_block(
Dave Chinnerb64f3a32012-11-13 16:40:27 -06001175 struct xfs_mount *mp,
1176 struct xfs_buf *bp,
Eric Sandeenb6f41e42017-01-27 23:16:39 -08001177 xfs_btnum_t btnum,
Dave Chinnerb64f3a32012-11-13 16:40:27 -06001178 __u16 level,
1179 __u16 numrecs,
Christoph Hellwigee1a47a2013-04-21 14:53:46 -05001180 __u64 owner,
Dave Chinnerb64f3a32012-11-13 16:40:27 -06001181 unsigned int flags)
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11001182{
Christoph Hellwigee1a47a2013-04-21 14:53:46 -05001183 xfs_btree_init_block_int(mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn,
Eric Sandeenb6f41e42017-01-27 23:16:39 -08001184 btnum, level, numrecs, owner, flags);
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11001185}
1186
Dave Chinnerb64f3a32012-11-13 16:40:27 -06001187STATIC void
1188xfs_btree_init_block_cur(
1189 struct xfs_btree_cur *cur,
Christoph Hellwigee1a47a2013-04-21 14:53:46 -05001190 struct xfs_buf *bp,
Dave Chinnerb64f3a32012-11-13 16:40:27 -06001191 int level,
Christoph Hellwigee1a47a2013-04-21 14:53:46 -05001192 int numrecs)
Dave Chinnerb64f3a32012-11-13 16:40:27 -06001193{
Eric Sandeenaf7d20f2017-01-27 23:16:38 -08001194 __u64 owner;
Christoph Hellwigee1a47a2013-04-21 14:53:46 -05001195
1196 /*
1197 * we can pull the owner from the cursor right now as the different
1198 * owners align directly with the pointer size of the btree. This may
1199 * change in future, but is safe for current users of the generic btree
1200 * code.
1201 */
1202 if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
1203 owner = cur->bc_private.b.ip->i_ino;
1204 else
1205 owner = cur->bc_private.a.agno;
1206
1207 xfs_btree_init_block_int(cur->bc_mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn,
Eric Sandeenb6f41e42017-01-27 23:16:39 -08001208 cur->bc_btnum, level, numrecs,
Christoph Hellwigee1a47a2013-04-21 14:53:46 -05001209 owner, cur->bc_flags);
Dave Chinnerb64f3a32012-11-13 16:40:27 -06001210}
1211
Christoph Hellwig278d0ca2008-10-30 16:56:32 +11001212/*
1213 * Return true if ptr is the last record in the btree and
Christoph Hellwigee1a47a2013-04-21 14:53:46 -05001214 * we need to track updates to this record. The decision
Christoph Hellwig278d0ca2008-10-30 16:56:32 +11001215 * will be further refined in the update_lastrec method.
1216 */
1217STATIC int
1218xfs_btree_is_lastrec(
1219 struct xfs_btree_cur *cur,
1220 struct xfs_btree_block *block,
1221 int level)
1222{
1223 union xfs_btree_ptr ptr;
1224
1225 if (level > 0)
1226 return 0;
1227 if (!(cur->bc_flags & XFS_BTREE_LASTREC_UPDATE))
1228 return 0;
1229
1230 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
1231 if (!xfs_btree_ptr_is_null(cur, &ptr))
1232 return 0;
1233 return 1;
1234}
1235
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11001236STATIC void
1237xfs_btree_buf_to_ptr(
1238 struct xfs_btree_cur *cur,
1239 struct xfs_buf *bp,
1240 union xfs_btree_ptr *ptr)
1241{
1242 if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
1243 ptr->l = cpu_to_be64(XFS_DADDR_TO_FSB(cur->bc_mp,
1244 XFS_BUF_ADDR(bp)));
1245 else {
Eric Sandeen9d87c312009-01-14 23:22:07 -06001246 ptr->s = cpu_to_be32(xfs_daddr_to_agbno(cur->bc_mp,
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11001247 XFS_BUF_ADDR(bp)));
1248 }
1249}
1250
Christoph Hellwig637aa502008-10-30 16:55:45 +11001251STATIC void
1252xfs_btree_set_refs(
1253 struct xfs_btree_cur *cur,
1254 struct xfs_buf *bp)
1255{
1256 switch (cur->bc_btnum) {
1257 case XFS_BTNUM_BNO:
1258 case XFS_BTNUM_CNT:
Christoph Hellwig38f23232011-10-10 16:52:45 +00001259 xfs_buf_set_ref(bp, XFS_ALLOC_BTREE_REF);
Christoph Hellwig637aa502008-10-30 16:55:45 +11001260 break;
1261 case XFS_BTNUM_INO:
Brian Fosteraafc3c22014-04-24 16:00:52 +10001262 case XFS_BTNUM_FINO:
Christoph Hellwig38f23232011-10-10 16:52:45 +00001263 xfs_buf_set_ref(bp, XFS_INO_BTREE_REF);
Christoph Hellwig637aa502008-10-30 16:55:45 +11001264 break;
1265 case XFS_BTNUM_BMAP:
Christoph Hellwig38f23232011-10-10 16:52:45 +00001266 xfs_buf_set_ref(bp, XFS_BMAP_BTREE_REF);
Christoph Hellwig637aa502008-10-30 16:55:45 +11001267 break;
Darrick J. Wong035e00a2016-08-03 11:36:07 +10001268 case XFS_BTNUM_RMAP:
1269 xfs_buf_set_ref(bp, XFS_RMAP_BTREE_REF);
1270 break;
Darrick J. Wong1946b912016-10-03 09:11:18 -07001271 case XFS_BTNUM_REFC:
1272 xfs_buf_set_ref(bp, XFS_REFC_BTREE_REF);
1273 break;
Christoph Hellwig637aa502008-10-30 16:55:45 +11001274 default:
1275 ASSERT(0);
1276 }
1277}
1278
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11001279STATIC int
1280xfs_btree_get_buf_block(
1281 struct xfs_btree_cur *cur,
1282 union xfs_btree_ptr *ptr,
1283 int flags,
1284 struct xfs_btree_block **block,
1285 struct xfs_buf **bpp)
1286{
1287 struct xfs_mount *mp = cur->bc_mp;
1288 xfs_daddr_t d;
1289
1290 /* need to sort out how callers deal with failures first */
Christoph Hellwig0cadda12010-01-19 09:56:44 +00001291 ASSERT(!(flags & XBF_TRYLOCK));
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11001292
1293 d = xfs_btree_ptr_to_daddr(cur, ptr);
1294 *bpp = xfs_trans_get_buf(cur->bc_tp, mp->m_ddev_targp, d,
1295 mp->m_bsize, flags);
1296
Chandra Seetharaman2a30f36d2011-09-20 13:56:55 +00001297 if (!*bpp)
Dave Chinner24513372014-06-25 14:58:08 +10001298 return -ENOMEM;
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11001299
Dave Chinner1813dd62012-11-14 17:54:40 +11001300 (*bpp)->b_ops = cur->bc_ops->buf_ops;
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11001301 *block = XFS_BUF_TO_BLOCK(*bpp);
1302 return 0;
1303}
1304
Christoph Hellwig637aa502008-10-30 16:55:45 +11001305/*
1306 * Read in the buffer at the given ptr and return the buffer and
1307 * the block pointer within the buffer.
1308 */
1309STATIC int
1310xfs_btree_read_buf_block(
1311 struct xfs_btree_cur *cur,
1312 union xfs_btree_ptr *ptr,
Christoph Hellwig637aa502008-10-30 16:55:45 +11001313 int flags,
1314 struct xfs_btree_block **block,
1315 struct xfs_buf **bpp)
1316{
1317 struct xfs_mount *mp = cur->bc_mp;
1318 xfs_daddr_t d;
1319 int error;
1320
1321 /* need to sort out how callers deal with failures first */
Christoph Hellwig0cadda12010-01-19 09:56:44 +00001322 ASSERT(!(flags & XBF_TRYLOCK));
Christoph Hellwig637aa502008-10-30 16:55:45 +11001323
1324 d = xfs_btree_ptr_to_daddr(cur, ptr);
1325 error = xfs_trans_read_buf(mp, cur->bc_tp, mp->m_ddev_targp, d,
Dave Chinner3d3e6f62012-11-12 22:54:08 +11001326 mp->m_bsize, flags, bpp,
Dave Chinner1813dd62012-11-14 17:54:40 +11001327 cur->bc_ops->buf_ops);
Christoph Hellwig637aa502008-10-30 16:55:45 +11001328 if (error)
1329 return error;
1330
Christoph Hellwig637aa502008-10-30 16:55:45 +11001331 xfs_btree_set_refs(cur, *bpp);
1332 *block = XFS_BUF_TO_BLOCK(*bpp);
Dave Chinner3d3e6f62012-11-12 22:54:08 +11001333 return 0;
Christoph Hellwig637aa502008-10-30 16:55:45 +11001334}
1335
1336/*
Christoph Hellwig38bb7422008-10-30 16:56:22 +11001337 * Copy keys from one btree block to another.
1338 */
1339STATIC void
1340xfs_btree_copy_keys(
1341 struct xfs_btree_cur *cur,
1342 union xfs_btree_key *dst_key,
1343 union xfs_btree_key *src_key,
1344 int numkeys)
1345{
1346 ASSERT(numkeys >= 0);
1347 memcpy(dst_key, src_key, numkeys * cur->bc_ops->key_len);
1348}
1349
1350/*
Christoph Hellwig278d0ca2008-10-30 16:56:32 +11001351 * Copy records from one btree block to another.
1352 */
1353STATIC void
1354xfs_btree_copy_recs(
1355 struct xfs_btree_cur *cur,
1356 union xfs_btree_rec *dst_rec,
1357 union xfs_btree_rec *src_rec,
1358 int numrecs)
1359{
1360 ASSERT(numrecs >= 0);
1361 memcpy(dst_rec, src_rec, numrecs * cur->bc_ops->rec_len);
1362}
1363
1364/*
Christoph Hellwig9eaead52008-10-30 16:56:43 +11001365 * Copy block pointers from one btree block to another.
1366 */
1367STATIC void
1368xfs_btree_copy_ptrs(
1369 struct xfs_btree_cur *cur,
1370 union xfs_btree_ptr *dst_ptr,
1371 union xfs_btree_ptr *src_ptr,
1372 int numptrs)
1373{
1374 ASSERT(numptrs >= 0);
1375 memcpy(dst_ptr, src_ptr, numptrs * xfs_btree_ptr_len(cur));
1376}
1377
1378/*
1379 * Shift keys one index left/right inside a single btree block.
1380 */
1381STATIC void
1382xfs_btree_shift_keys(
1383 struct xfs_btree_cur *cur,
1384 union xfs_btree_key *key,
1385 int dir,
1386 int numkeys)
1387{
1388 char *dst_key;
1389
1390 ASSERT(numkeys >= 0);
1391 ASSERT(dir == 1 || dir == -1);
1392
1393 dst_key = (char *)key + (dir * cur->bc_ops->key_len);
1394 memmove(dst_key, key, numkeys * cur->bc_ops->key_len);
1395}
1396
1397/*
1398 * Shift records one index left/right inside a single btree block.
1399 */
1400STATIC void
1401xfs_btree_shift_recs(
1402 struct xfs_btree_cur *cur,
1403 union xfs_btree_rec *rec,
1404 int dir,
1405 int numrecs)
1406{
1407 char *dst_rec;
1408
1409 ASSERT(numrecs >= 0);
1410 ASSERT(dir == 1 || dir == -1);
1411
1412 dst_rec = (char *)rec + (dir * cur->bc_ops->rec_len);
1413 memmove(dst_rec, rec, numrecs * cur->bc_ops->rec_len);
1414}
1415
1416/*
1417 * Shift block pointers one index left/right inside a single btree block.
1418 */
1419STATIC void
1420xfs_btree_shift_ptrs(
1421 struct xfs_btree_cur *cur,
1422 union xfs_btree_ptr *ptr,
1423 int dir,
1424 int numptrs)
1425{
1426 char *dst_ptr;
1427
1428 ASSERT(numptrs >= 0);
1429 ASSERT(dir == 1 || dir == -1);
1430
1431 dst_ptr = (char *)ptr + (dir * xfs_btree_ptr_len(cur));
1432 memmove(dst_ptr, ptr, numptrs * xfs_btree_ptr_len(cur));
1433}
1434
1435/*
Christoph Hellwig38bb7422008-10-30 16:56:22 +11001436 * Log key values from the btree block.
1437 */
1438STATIC void
1439xfs_btree_log_keys(
1440 struct xfs_btree_cur *cur,
1441 struct xfs_buf *bp,
1442 int first,
1443 int last)
1444{
Christoph Hellwig38bb7422008-10-30 16:56:22 +11001445
1446 if (bp) {
Dave Chinner61fe1352013-04-03 16:11:30 +11001447 xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
Christoph Hellwig38bb7422008-10-30 16:56:22 +11001448 xfs_trans_log_buf(cur->bc_tp, bp,
1449 xfs_btree_key_offset(cur, first),
1450 xfs_btree_key_offset(cur, last + 1) - 1);
1451 } else {
1452 xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
1453 xfs_ilog_fbroot(cur->bc_private.b.whichfork));
1454 }
Christoph Hellwig38bb7422008-10-30 16:56:22 +11001455}
1456
1457/*
Christoph Hellwig278d0ca2008-10-30 16:56:32 +11001458 * Log record values from the btree block.
1459 */
Christoph Hellwigfd6bcc5b2008-10-30 16:58:21 +11001460void
Christoph Hellwig278d0ca2008-10-30 16:56:32 +11001461xfs_btree_log_recs(
1462 struct xfs_btree_cur *cur,
1463 struct xfs_buf *bp,
1464 int first,
1465 int last)
1466{
Christoph Hellwig278d0ca2008-10-30 16:56:32 +11001467
Dave Chinner61fe1352013-04-03 16:11:30 +11001468 xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
Christoph Hellwig278d0ca2008-10-30 16:56:32 +11001469 xfs_trans_log_buf(cur->bc_tp, bp,
1470 xfs_btree_rec_offset(cur, first),
1471 xfs_btree_rec_offset(cur, last + 1) - 1);
1472
Christoph Hellwig278d0ca2008-10-30 16:56:32 +11001473}
1474
1475/*
Christoph Hellwig9eaead52008-10-30 16:56:43 +11001476 * Log block pointer fields from a btree block (nonleaf).
1477 */
1478STATIC void
1479xfs_btree_log_ptrs(
1480 struct xfs_btree_cur *cur, /* btree cursor */
1481 struct xfs_buf *bp, /* buffer containing btree block */
1482 int first, /* index of first pointer to log */
1483 int last) /* index of last pointer to log */
1484{
Christoph Hellwig9eaead52008-10-30 16:56:43 +11001485
1486 if (bp) {
1487 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
1488 int level = xfs_btree_get_level(block);
1489
Dave Chinner61fe1352013-04-03 16:11:30 +11001490 xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
Christoph Hellwig9eaead52008-10-30 16:56:43 +11001491 xfs_trans_log_buf(cur->bc_tp, bp,
1492 xfs_btree_ptr_offset(cur, first, level),
1493 xfs_btree_ptr_offset(cur, last + 1, level) - 1);
1494 } else {
1495 xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
1496 xfs_ilog_fbroot(cur->bc_private.b.whichfork));
1497 }
1498
Christoph Hellwig9eaead52008-10-30 16:56:43 +11001499}
1500
1501/*
1502 * Log fields from a btree block header.
1503 */
Christoph Hellwigfd6bcc5b2008-10-30 16:58:21 +11001504void
Christoph Hellwig9eaead52008-10-30 16:56:43 +11001505xfs_btree_log_block(
1506 struct xfs_btree_cur *cur, /* btree cursor */
1507 struct xfs_buf *bp, /* buffer containing btree block */
1508 int fields) /* mask of fields: XFS_BB_... */
1509{
1510 int first; /* first byte offset logged */
1511 int last; /* last byte offset logged */
1512 static const short soffsets[] = { /* table of offsets (short) */
Christoph Hellwig7cc95a82008-10-30 17:14:34 +11001513 offsetof(struct xfs_btree_block, bb_magic),
1514 offsetof(struct xfs_btree_block, bb_level),
1515 offsetof(struct xfs_btree_block, bb_numrecs),
1516 offsetof(struct xfs_btree_block, bb_u.s.bb_leftsib),
1517 offsetof(struct xfs_btree_block, bb_u.s.bb_rightsib),
Christoph Hellwigee1a47a2013-04-21 14:53:46 -05001518 offsetof(struct xfs_btree_block, bb_u.s.bb_blkno),
1519 offsetof(struct xfs_btree_block, bb_u.s.bb_lsn),
1520 offsetof(struct xfs_btree_block, bb_u.s.bb_uuid),
1521 offsetof(struct xfs_btree_block, bb_u.s.bb_owner),
1522 offsetof(struct xfs_btree_block, bb_u.s.bb_crc),
1523 XFS_BTREE_SBLOCK_CRC_LEN
Christoph Hellwig9eaead52008-10-30 16:56:43 +11001524 };
1525 static const short loffsets[] = { /* table of offsets (long) */
Christoph Hellwig7cc95a82008-10-30 17:14:34 +11001526 offsetof(struct xfs_btree_block, bb_magic),
1527 offsetof(struct xfs_btree_block, bb_level),
1528 offsetof(struct xfs_btree_block, bb_numrecs),
1529 offsetof(struct xfs_btree_block, bb_u.l.bb_leftsib),
1530 offsetof(struct xfs_btree_block, bb_u.l.bb_rightsib),
Christoph Hellwigee1a47a2013-04-21 14:53:46 -05001531 offsetof(struct xfs_btree_block, bb_u.l.bb_blkno),
1532 offsetof(struct xfs_btree_block, bb_u.l.bb_lsn),
1533 offsetof(struct xfs_btree_block, bb_u.l.bb_uuid),
1534 offsetof(struct xfs_btree_block, bb_u.l.bb_owner),
1535 offsetof(struct xfs_btree_block, bb_u.l.bb_crc),
1536 offsetof(struct xfs_btree_block, bb_u.l.bb_pad),
1537 XFS_BTREE_LBLOCK_CRC_LEN
Christoph Hellwig9eaead52008-10-30 16:56:43 +11001538 };
1539
Christoph Hellwig9eaead52008-10-30 16:56:43 +11001540 if (bp) {
Christoph Hellwigee1a47a2013-04-21 14:53:46 -05001541 int nbits;
1542
1543 if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) {
1544 /*
1545 * We don't log the CRC when updating a btree
1546 * block but instead recreate it during log
1547 * recovery. As the log buffers have checksums
1548 * of their own this is safe and avoids logging a crc
1549 * update in a lot of places.
1550 */
1551 if (fields == XFS_BB_ALL_BITS)
1552 fields = XFS_BB_ALL_BITS_CRC;
1553 nbits = XFS_BB_NUM_BITS_CRC;
1554 } else {
1555 nbits = XFS_BB_NUM_BITS;
1556 }
Christoph Hellwig9eaead52008-10-30 16:56:43 +11001557 xfs_btree_offsets(fields,
1558 (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
1559 loffsets : soffsets,
Christoph Hellwigee1a47a2013-04-21 14:53:46 -05001560 nbits, &first, &last);
Dave Chinner61fe1352013-04-03 16:11:30 +11001561 xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
Christoph Hellwig9eaead52008-10-30 16:56:43 +11001562 xfs_trans_log_buf(cur->bc_tp, bp, first, last);
1563 } else {
1564 xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
1565 xfs_ilog_fbroot(cur->bc_private.b.whichfork));
1566 }
Christoph Hellwig9eaead52008-10-30 16:56:43 +11001567}
1568
1569/*
Christoph Hellwig637aa502008-10-30 16:55:45 +11001570 * Increment cursor by one record at the level.
1571 * For nonzero levels the leaf-ward information is untouched.
1572 */
1573int /* error */
1574xfs_btree_increment(
1575 struct xfs_btree_cur *cur,
1576 int level,
1577 int *stat) /* success/failure */
1578{
1579 struct xfs_btree_block *block;
1580 union xfs_btree_ptr ptr;
1581 struct xfs_buf *bp;
1582 int error; /* error return value */
1583 int lev;
1584
Christoph Hellwig637aa502008-10-30 16:55:45 +11001585 ASSERT(level < cur->bc_nlevels);
1586
1587 /* Read-ahead to the right at this level. */
1588 xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
1589
1590 /* Get a pointer to the btree block. */
1591 block = xfs_btree_get_block(cur, level, &bp);
1592
1593#ifdef DEBUG
1594 error = xfs_btree_check_block(cur, block, level, bp);
1595 if (error)
1596 goto error0;
1597#endif
1598
1599 /* We're done if we remain in the block after the increment. */
1600 if (++cur->bc_ptrs[level] <= xfs_btree_get_numrecs(block))
1601 goto out1;
1602
1603 /* Fail if we just went off the right edge of the tree. */
1604 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
1605 if (xfs_btree_ptr_is_null(cur, &ptr))
1606 goto out0;
1607
1608 XFS_BTREE_STATS_INC(cur, increment);
1609
1610 /*
1611 * March up the tree incrementing pointers.
1612 * Stop when we don't go off the right edge of a block.
1613 */
1614 for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
1615 block = xfs_btree_get_block(cur, lev, &bp);
1616
1617#ifdef DEBUG
1618 error = xfs_btree_check_block(cur, block, lev, bp);
1619 if (error)
1620 goto error0;
1621#endif
1622
1623 if (++cur->bc_ptrs[lev] <= xfs_btree_get_numrecs(block))
1624 break;
1625
1626 /* Read-ahead the right block for the next loop. */
1627 xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA);
1628 }
1629
1630 /*
1631 * If we went off the root then we are either seriously
1632 * confused or have the tree root in an inode.
1633 */
1634 if (lev == cur->bc_nlevels) {
1635 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
1636 goto out0;
1637 ASSERT(0);
Dave Chinner24513372014-06-25 14:58:08 +10001638 error = -EFSCORRUPTED;
Christoph Hellwig637aa502008-10-30 16:55:45 +11001639 goto error0;
1640 }
1641 ASSERT(lev < cur->bc_nlevels);
1642
1643 /*
1644 * Now walk back down the tree, fixing up the cursor's buffer
1645 * pointers and key numbers.
1646 */
1647 for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) {
1648 union xfs_btree_ptr *ptrp;
1649
1650 ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block);
Eric Sandeen0d7409b2014-04-14 18:59:56 +10001651 --lev;
1652 error = xfs_btree_read_buf_block(cur, ptrp, 0, &block, &bp);
Christoph Hellwig637aa502008-10-30 16:55:45 +11001653 if (error)
1654 goto error0;
1655
1656 xfs_btree_setbuf(cur, lev, bp);
1657 cur->bc_ptrs[lev] = 1;
1658 }
1659out1:
Christoph Hellwig637aa502008-10-30 16:55:45 +11001660 *stat = 1;
1661 return 0;
1662
1663out0:
Christoph Hellwig637aa502008-10-30 16:55:45 +11001664 *stat = 0;
1665 return 0;
1666
1667error0:
Christoph Hellwig637aa502008-10-30 16:55:45 +11001668 return error;
1669}
Christoph Hellwig8df4da42008-10-30 16:55:58 +11001670
1671/*
1672 * Decrement cursor by one record at the level.
1673 * For nonzero levels the leaf-ward information is untouched.
1674 */
1675int /* error */
1676xfs_btree_decrement(
1677 struct xfs_btree_cur *cur,
1678 int level,
1679 int *stat) /* success/failure */
1680{
1681 struct xfs_btree_block *block;
1682 xfs_buf_t *bp;
1683 int error; /* error return value */
1684 int lev;
1685 union xfs_btree_ptr ptr;
1686
Christoph Hellwig8df4da42008-10-30 16:55:58 +11001687 ASSERT(level < cur->bc_nlevels);
1688
1689 /* Read-ahead to the left at this level. */
1690 xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA);
1691
1692 /* We're done if we remain in the block after the decrement. */
1693 if (--cur->bc_ptrs[level] > 0)
1694 goto out1;
1695
1696 /* Get a pointer to the btree block. */
1697 block = xfs_btree_get_block(cur, level, &bp);
1698
1699#ifdef DEBUG
1700 error = xfs_btree_check_block(cur, block, level, bp);
1701 if (error)
1702 goto error0;
1703#endif
1704
1705 /* Fail if we just went off the left edge of the tree. */
1706 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB);
1707 if (xfs_btree_ptr_is_null(cur, &ptr))
1708 goto out0;
1709
1710 XFS_BTREE_STATS_INC(cur, decrement);
1711
1712 /*
1713 * March up the tree decrementing pointers.
1714 * Stop when we don't go off the left edge of a block.
1715 */
1716 for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
1717 if (--cur->bc_ptrs[lev] > 0)
1718 break;
1719 /* Read-ahead the left block for the next loop. */
1720 xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA);
1721 }
1722
1723 /*
1724 * If we went off the root then we are seriously confused.
1725 * or the root of the tree is in an inode.
1726 */
1727 if (lev == cur->bc_nlevels) {
1728 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
1729 goto out0;
1730 ASSERT(0);
Dave Chinner24513372014-06-25 14:58:08 +10001731 error = -EFSCORRUPTED;
Christoph Hellwig8df4da42008-10-30 16:55:58 +11001732 goto error0;
1733 }
1734 ASSERT(lev < cur->bc_nlevels);
1735
1736 /*
1737 * Now walk back down the tree, fixing up the cursor's buffer
1738 * pointers and key numbers.
1739 */
1740 for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) {
1741 union xfs_btree_ptr *ptrp;
1742
1743 ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block);
Eric Sandeen0d7409b2014-04-14 18:59:56 +10001744 --lev;
1745 error = xfs_btree_read_buf_block(cur, ptrp, 0, &block, &bp);
Christoph Hellwig8df4da42008-10-30 16:55:58 +11001746 if (error)
1747 goto error0;
1748 xfs_btree_setbuf(cur, lev, bp);
1749 cur->bc_ptrs[lev] = xfs_btree_get_numrecs(block);
1750 }
1751out1:
Christoph Hellwig8df4da42008-10-30 16:55:58 +11001752 *stat = 1;
1753 return 0;
1754
1755out0:
Christoph Hellwig8df4da42008-10-30 16:55:58 +11001756 *stat = 0;
1757 return 0;
1758
1759error0:
Christoph Hellwig8df4da42008-10-30 16:55:58 +11001760 return error;
1761}
1762
Darrick J. Wong26788092017-06-16 11:00:07 -07001763int
Christoph Hellwigfe033cc2008-10-30 16:56:09 +11001764xfs_btree_lookup_get_block(
1765 struct xfs_btree_cur *cur, /* btree cursor */
1766 int level, /* level in the btree */
1767 union xfs_btree_ptr *pp, /* ptr to btree block */
1768 struct xfs_btree_block **blkp) /* return btree block */
1769{
1770 struct xfs_buf *bp; /* buffer pointer for btree block */
1771 int error = 0;
1772
1773 /* special case the root block if in an inode */
1774 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
1775 (level == cur->bc_nlevels - 1)) {
1776 *blkp = xfs_btree_get_iroot(cur);
1777 return 0;
1778 }
1779
1780 /*
1781 * If the old buffer at this level for the disk address we are
1782 * looking for re-use it.
1783 *
1784 * Otherwise throw it away and get a new one.
1785 */
1786 bp = cur->bc_bufs[level];
1787 if (bp && XFS_BUF_ADDR(bp) == xfs_btree_ptr_to_daddr(cur, pp)) {
1788 *blkp = XFS_BUF_TO_BLOCK(bp);
1789 return 0;
1790 }
1791
Eric Sandeen0d7409b2014-04-14 18:59:56 +10001792 error = xfs_btree_read_buf_block(cur, pp, 0, blkp, &bp);
Christoph Hellwigfe033cc2008-10-30 16:56:09 +11001793 if (error)
1794 return error;
1795
Darrick J. Wongbb3be7e2016-12-05 12:33:54 +11001796 /* Check the inode owner since the verifiers don't. */
1797 if (xfs_sb_version_hascrc(&cur->bc_mp->m_sb) &&
Brian Foster99c794c2017-08-29 10:08:39 -07001798 !(cur->bc_private.b.flags & XFS_BTCUR_BPRV_INVALID_OWNER) &&
Darrick J. Wongbb3be7e2016-12-05 12:33:54 +11001799 (cur->bc_flags & XFS_BTREE_LONG_PTRS) &&
1800 be64_to_cpu((*blkp)->bb_u.l.bb_owner) !=
1801 cur->bc_private.b.ip->i_ino)
1802 goto out_bad;
1803
1804 /* Did we get the level we were looking for? */
1805 if (be16_to_cpu((*blkp)->bb_level) != level)
1806 goto out_bad;
1807
1808 /* Check that internal nodes have at least one record. */
1809 if (level != 0 && be16_to_cpu((*blkp)->bb_numrecs) == 0)
1810 goto out_bad;
1811
Christoph Hellwigfe033cc2008-10-30 16:56:09 +11001812 xfs_btree_setbuf(cur, level, bp);
1813 return 0;
Darrick J. Wongbb3be7e2016-12-05 12:33:54 +11001814
1815out_bad:
1816 *blkp = NULL;
1817 xfs_trans_brelse(cur->bc_tp, bp);
1818 return -EFSCORRUPTED;
Christoph Hellwigfe033cc2008-10-30 16:56:09 +11001819}
1820
1821/*
1822 * Get current search key. For level 0 we don't actually have a key
1823 * structure so we make one up from the record. For all other levels
1824 * we just return the right key.
1825 */
1826STATIC union xfs_btree_key *
1827xfs_lookup_get_search_key(
1828 struct xfs_btree_cur *cur,
1829 int level,
1830 int keyno,
1831 struct xfs_btree_block *block,
1832 union xfs_btree_key *kp)
1833{
1834 if (level == 0) {
1835 cur->bc_ops->init_key_from_rec(kp,
1836 xfs_btree_rec_addr(cur, keyno, block));
1837 return kp;
1838 }
1839
1840 return xfs_btree_key_addr(cur, keyno, block);
1841}
1842
1843/*
1844 * Lookup the record. The cursor is made to point to it, based on dir.
Zhi Yong Wu49d3da12013-08-07 10:11:00 +00001845 * stat is set to 0 if can't find any such record, 1 for success.
Christoph Hellwigfe033cc2008-10-30 16:56:09 +11001846 */
1847int /* error */
1848xfs_btree_lookup(
1849 struct xfs_btree_cur *cur, /* btree cursor */
1850 xfs_lookup_t dir, /* <=, ==, or >= */
1851 int *stat) /* success/failure */
1852{
1853 struct xfs_btree_block *block; /* current btree block */
Darrick J. Wongc8ce5402017-06-16 11:00:05 -07001854 int64_t diff; /* difference for the current key */
Christoph Hellwigfe033cc2008-10-30 16:56:09 +11001855 int error; /* error return value */
1856 int keyno; /* current key number */
1857 int level; /* level in the btree */
1858 union xfs_btree_ptr *pp; /* ptr to btree block */
1859 union xfs_btree_ptr ptr; /* ptr to btree block */
1860
Christoph Hellwigfe033cc2008-10-30 16:56:09 +11001861 XFS_BTREE_STATS_INC(cur, lookup);
1862
Darrick J. Wonged150e12016-08-26 15:58:40 +10001863 /* No such thing as a zero-level tree. */
1864 if (cur->bc_nlevels == 0)
1865 return -EFSCORRUPTED;
1866
Christoph Hellwigfe033cc2008-10-30 16:56:09 +11001867 block = NULL;
1868 keyno = 0;
1869
1870 /* initialise start pointer from cursor */
1871 cur->bc_ops->init_ptr_from_cur(cur, &ptr);
1872 pp = &ptr;
1873
1874 /*
1875 * Iterate over each level in the btree, starting at the root.
1876 * For each level above the leaves, find the key we need, based
1877 * on the lookup record, then follow the corresponding block
1878 * pointer down to the next level.
1879 */
1880 for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) {
1881 /* Get the block we need to do the lookup on. */
1882 error = xfs_btree_lookup_get_block(cur, level, pp, &block);
1883 if (error)
1884 goto error0;
1885
1886 if (diff == 0) {
1887 /*
1888 * If we already had a key match at a higher level, we
1889 * know we need to use the first entry in this block.
1890 */
1891 keyno = 1;
1892 } else {
1893 /* Otherwise search this block. Do a binary search. */
1894
1895 int high; /* high entry number */
1896 int low; /* low entry number */
1897
1898 /* Set low and high entry numbers, 1-based. */
1899 low = 1;
1900 high = xfs_btree_get_numrecs(block);
1901 if (!high) {
1902 /* Block is empty, must be an empty leaf. */
Darrick J. Wongeeee0d62018-06-03 16:10:14 -07001903 if (level != 0 || cur->bc_nlevels != 1) {
1904 XFS_CORRUPTION_ERROR(__func__,
1905 XFS_ERRLEVEL_LOW,
1906 cur->bc_mp, block);
1907 return -EFSCORRUPTED;
1908 }
Christoph Hellwigfe033cc2008-10-30 16:56:09 +11001909
1910 cur->bc_ptrs[0] = dir != XFS_LOOKUP_LE;
Christoph Hellwigfe033cc2008-10-30 16:56:09 +11001911 *stat = 0;
1912 return 0;
1913 }
1914
1915 /* Binary search the block. */
1916 while (low <= high) {
1917 union xfs_btree_key key;
1918 union xfs_btree_key *kp;
1919
1920 XFS_BTREE_STATS_INC(cur, compare);
1921
1922 /* keyno is average of low and high. */
1923 keyno = (low + high) >> 1;
1924
1925 /* Get current search key */
1926 kp = xfs_lookup_get_search_key(cur, level,
1927 keyno, block, &key);
1928
1929 /*
1930 * Compute difference to get next direction:
1931 * - less than, move right
1932 * - greater than, move left
1933 * - equal, we're done
1934 */
1935 diff = cur->bc_ops->key_diff(cur, kp);
1936 if (diff < 0)
1937 low = keyno + 1;
1938 else if (diff > 0)
1939 high = keyno - 1;
1940 else
1941 break;
1942 }
1943 }
1944
1945 /*
1946 * If there are more levels, set up for the next level
1947 * by getting the block number and filling in the cursor.
1948 */
1949 if (level > 0) {
1950 /*
1951 * If we moved left, need the previous key number,
1952 * unless there isn't one.
1953 */
1954 if (diff > 0 && --keyno < 1)
1955 keyno = 1;
1956 pp = xfs_btree_ptr_addr(cur, keyno, block);
1957
Darrick J. Wong4cbae4b2018-06-03 21:10:48 -07001958 error = xfs_btree_debug_check_ptr(cur, pp, 0, level);
Christoph Hellwigfe033cc2008-10-30 16:56:09 +11001959 if (error)
1960 goto error0;
Darrick J. Wong4cbae4b2018-06-03 21:10:48 -07001961
Christoph Hellwigfe033cc2008-10-30 16:56:09 +11001962 cur->bc_ptrs[level] = keyno;
1963 }
1964 }
1965
1966 /* Done with the search. See if we need to adjust the results. */
1967 if (dir != XFS_LOOKUP_LE && diff < 0) {
1968 keyno++;
1969 /*
1970 * If ge search and we went off the end of the block, but it's
1971 * not the last block, we're in the wrong block.
1972 */
1973 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
1974 if (dir == XFS_LOOKUP_GE &&
1975 keyno > xfs_btree_get_numrecs(block) &&
1976 !xfs_btree_ptr_is_null(cur, &ptr)) {
1977 int i;
1978
1979 cur->bc_ptrs[0] = keyno;
1980 error = xfs_btree_increment(cur, 0, &i);
1981 if (error)
1982 goto error0;
Eric Sandeen5fb5aee2015-02-23 22:39:13 +11001983 XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, i == 1);
Christoph Hellwigfe033cc2008-10-30 16:56:09 +11001984 *stat = 1;
1985 return 0;
1986 }
1987 } else if (dir == XFS_LOOKUP_LE && diff > 0)
1988 keyno--;
1989 cur->bc_ptrs[0] = keyno;
1990
1991 /* Return if we succeeded or not. */
1992 if (keyno == 0 || keyno > xfs_btree_get_numrecs(block))
1993 *stat = 0;
1994 else if (dir != XFS_LOOKUP_EQ || diff == 0)
1995 *stat = 1;
1996 else
1997 *stat = 0;
Christoph Hellwigfe033cc2008-10-30 16:56:09 +11001998 return 0;
1999
2000error0:
Christoph Hellwigfe033cc2008-10-30 16:56:09 +11002001 return error;
2002}
Christoph Hellwig38bb7422008-10-30 16:56:22 +11002003
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10002004/* Find the high key storage area from a regular key. */
Darrick J. Wong2fdbec52017-10-25 15:03:46 -07002005union xfs_btree_key *
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10002006xfs_btree_high_key_from_key(
2007 struct xfs_btree_cur *cur,
2008 union xfs_btree_key *key)
2009{
2010 ASSERT(cur->bc_flags & XFS_BTREE_OVERLAPPING);
2011 return (union xfs_btree_key *)((char *)key +
2012 (cur->bc_ops->key_len / 2));
2013}
2014
Darrick J. Wong973b8312016-08-03 12:22:12 +10002015/* Determine the low (and high if overlapped) keys of a leaf block */
2016STATIC void
2017xfs_btree_get_leaf_keys(
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10002018 struct xfs_btree_cur *cur,
2019 struct xfs_btree_block *block,
2020 union xfs_btree_key *key)
2021{
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10002022 union xfs_btree_key max_hkey;
2023 union xfs_btree_key hkey;
Darrick J. Wong973b8312016-08-03 12:22:12 +10002024 union xfs_btree_rec *rec;
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10002025 union xfs_btree_key *high;
Darrick J. Wong973b8312016-08-03 12:22:12 +10002026 int n;
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10002027
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10002028 rec = xfs_btree_rec_addr(cur, 1, block);
2029 cur->bc_ops->init_key_from_rec(key, rec);
2030
Darrick J. Wong973b8312016-08-03 12:22:12 +10002031 if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10002032
Darrick J. Wong973b8312016-08-03 12:22:12 +10002033 cur->bc_ops->init_high_key_from_rec(&max_hkey, rec);
2034 for (n = 2; n <= xfs_btree_get_numrecs(block); n++) {
2035 rec = xfs_btree_rec_addr(cur, n, block);
2036 cur->bc_ops->init_high_key_from_rec(&hkey, rec);
2037 if (cur->bc_ops->diff_two_keys(cur, &hkey, &max_hkey)
2038 > 0)
2039 max_hkey = hkey;
2040 }
2041
2042 high = xfs_btree_high_key_from_key(cur, key);
2043 memcpy(high, &max_hkey, cur->bc_ops->key_len / 2);
2044 }
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10002045}
2046
Darrick J. Wong973b8312016-08-03 12:22:12 +10002047/* Determine the low (and high if overlapped) keys of a node block */
2048STATIC void
2049xfs_btree_get_node_keys(
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10002050 struct xfs_btree_cur *cur,
2051 struct xfs_btree_block *block,
2052 union xfs_btree_key *key)
2053{
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10002054 union xfs_btree_key *hkey;
2055 union xfs_btree_key *max_hkey;
2056 union xfs_btree_key *high;
Darrick J. Wong973b8312016-08-03 12:22:12 +10002057 int n;
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10002058
Darrick J. Wong973b8312016-08-03 12:22:12 +10002059 if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
2060 memcpy(key, xfs_btree_key_addr(cur, 1, block),
2061 cur->bc_ops->key_len / 2);
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10002062
Darrick J. Wong973b8312016-08-03 12:22:12 +10002063 max_hkey = xfs_btree_high_key_addr(cur, 1, block);
2064 for (n = 2; n <= xfs_btree_get_numrecs(block); n++) {
2065 hkey = xfs_btree_high_key_addr(cur, n, block);
2066 if (cur->bc_ops->diff_two_keys(cur, hkey, max_hkey) > 0)
2067 max_hkey = hkey;
2068 }
2069
2070 high = xfs_btree_high_key_from_key(cur, key);
2071 memcpy(high, max_hkey, cur->bc_ops->key_len / 2);
2072 } else {
2073 memcpy(key, xfs_btree_key_addr(cur, 1, block),
2074 cur->bc_ops->key_len);
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10002075 }
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10002076}
2077
Darrick J. Wong70b22652016-08-03 11:03:38 +10002078/* Derive the keys for any btree block. */
Darrick J. Wong2fdbec52017-10-25 15:03:46 -07002079void
Darrick J. Wong70b22652016-08-03 11:03:38 +10002080xfs_btree_get_keys(
2081 struct xfs_btree_cur *cur,
2082 struct xfs_btree_block *block,
2083 union xfs_btree_key *key)
2084{
2085 if (be16_to_cpu(block->bb_level) == 0)
Darrick J. Wong973b8312016-08-03 12:22:12 +10002086 xfs_btree_get_leaf_keys(cur, block, key);
Darrick J. Wong70b22652016-08-03 11:03:38 +10002087 else
Darrick J. Wong973b8312016-08-03 12:22:12 +10002088 xfs_btree_get_node_keys(cur, block, key);
Darrick J. Wong70b22652016-08-03 11:03:38 +10002089}
2090
2091/*
2092 * Decide if we need to update the parent keys of a btree block. For
2093 * a standard btree this is only necessary if we're updating the first
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10002094 * record/key. For an overlapping btree, we must always update the
2095 * keys because the highest key can be in any of the records or keys
2096 * in the block.
Darrick J. Wong70b22652016-08-03 11:03:38 +10002097 */
2098static inline bool
2099xfs_btree_needs_key_update(
2100 struct xfs_btree_cur *cur,
2101 int ptr)
2102{
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10002103 return (cur->bc_flags & XFS_BTREE_OVERLAPPING) || ptr == 1;
2104}
2105
2106/*
2107 * Update the low and high parent keys of the given level, progressing
2108 * towards the root. If force_all is false, stop if the keys for a given
2109 * level do not need updating.
2110 */
2111STATIC int
2112__xfs_btree_updkeys(
2113 struct xfs_btree_cur *cur,
2114 int level,
2115 struct xfs_btree_block *block,
2116 struct xfs_buf *bp0,
2117 bool force_all)
2118{
Darrick J. Wonga1d46cf2016-09-19 10:24:36 +10002119 union xfs_btree_key key; /* keys from current level */
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10002120 union xfs_btree_key *lkey; /* keys from the next level up */
2121 union xfs_btree_key *hkey;
2122 union xfs_btree_key *nlkey; /* keys from the next level up */
2123 union xfs_btree_key *nhkey;
2124 struct xfs_buf *bp;
2125 int ptr;
2126
2127 ASSERT(cur->bc_flags & XFS_BTREE_OVERLAPPING);
2128
2129 /* Exit if there aren't any parent levels to update. */
2130 if (level + 1 >= cur->bc_nlevels)
2131 return 0;
2132
2133 trace_xfs_btree_updkeys(cur, level, bp0);
2134
Darrick J. Wonga1d46cf2016-09-19 10:24:36 +10002135 lkey = &key;
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10002136 hkey = xfs_btree_high_key_from_key(cur, lkey);
2137 xfs_btree_get_keys(cur, block, lkey);
2138 for (level++; level < cur->bc_nlevels; level++) {
2139#ifdef DEBUG
2140 int error;
2141#endif
2142 block = xfs_btree_get_block(cur, level, &bp);
2143 trace_xfs_btree_updkeys(cur, level, bp);
2144#ifdef DEBUG
2145 error = xfs_btree_check_block(cur, block, level, bp);
Carlos Maiolinoe157ebd2018-03-06 17:03:30 -08002146 if (error)
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10002147 return error;
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10002148#endif
2149 ptr = cur->bc_ptrs[level];
2150 nlkey = xfs_btree_key_addr(cur, ptr, block);
2151 nhkey = xfs_btree_high_key_addr(cur, ptr, block);
2152 if (!force_all &&
2153 !(cur->bc_ops->diff_two_keys(cur, nlkey, lkey) != 0 ||
2154 cur->bc_ops->diff_two_keys(cur, nhkey, hkey) != 0))
2155 break;
2156 xfs_btree_copy_keys(cur, nlkey, lkey, 1);
2157 xfs_btree_log_keys(cur, bp, ptr, ptr);
2158 if (level + 1 >= cur->bc_nlevels)
2159 break;
Darrick J. Wong973b8312016-08-03 12:22:12 +10002160 xfs_btree_get_node_keys(cur, block, lkey);
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10002161 }
2162
2163 return 0;
2164}
2165
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10002166/* Update all the keys from some level in cursor back to the root. */
2167STATIC int
2168xfs_btree_updkeys_force(
2169 struct xfs_btree_cur *cur,
2170 int level)
2171{
2172 struct xfs_buf *bp;
2173 struct xfs_btree_block *block;
2174
2175 block = xfs_btree_get_block(cur, level, &bp);
2176 return __xfs_btree_updkeys(cur, level, block, bp, true);
Darrick J. Wong70b22652016-08-03 11:03:38 +10002177}
2178
2179/*
2180 * Update the parent keys of the given level, progressing towards the root.
2181 */
Darrick J. Wong973b8312016-08-03 12:22:12 +10002182STATIC int
Darrick J. Wong70b22652016-08-03 11:03:38 +10002183xfs_btree_update_keys(
2184 struct xfs_btree_cur *cur,
Christoph Hellwig38bb7422008-10-30 16:56:22 +11002185 int level)
2186{
2187 struct xfs_btree_block *block;
2188 struct xfs_buf *bp;
2189 union xfs_btree_key *kp;
Darrick J. Wong70b22652016-08-03 11:03:38 +10002190 union xfs_btree_key key;
Christoph Hellwig38bb7422008-10-30 16:56:22 +11002191 int ptr;
2192
Darrick J. Wong973b8312016-08-03 12:22:12 +10002193 ASSERT(level >= 0);
2194
2195 block = xfs_btree_get_block(cur, level, &bp);
2196 if (cur->bc_flags & XFS_BTREE_OVERLAPPING)
2197 return __xfs_btree_updkeys(cur, level, block, bp, false);
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10002198
Christoph Hellwig38bb7422008-10-30 16:56:22 +11002199 /*
2200 * Go up the tree from this level toward the root.
2201 * At each level, update the key value to the value input.
2202 * Stop when we reach a level where the cursor isn't pointing
2203 * at the first entry in the block.
2204 */
Darrick J. Wong70b22652016-08-03 11:03:38 +10002205 xfs_btree_get_keys(cur, block, &key);
2206 for (level++, ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) {
Christoph Hellwig38bb7422008-10-30 16:56:22 +11002207#ifdef DEBUG
2208 int error;
2209#endif
2210 block = xfs_btree_get_block(cur, level, &bp);
2211#ifdef DEBUG
2212 error = xfs_btree_check_block(cur, block, level, bp);
Carlos Maiolinoe157ebd2018-03-06 17:03:30 -08002213 if (error)
Christoph Hellwig38bb7422008-10-30 16:56:22 +11002214 return error;
Christoph Hellwig38bb7422008-10-30 16:56:22 +11002215#endif
2216 ptr = cur->bc_ptrs[level];
2217 kp = xfs_btree_key_addr(cur, ptr, block);
Darrick J. Wong70b22652016-08-03 11:03:38 +10002218 xfs_btree_copy_keys(cur, kp, &key, 1);
Christoph Hellwig38bb7422008-10-30 16:56:22 +11002219 xfs_btree_log_keys(cur, bp, ptr, ptr);
2220 }
2221
Christoph Hellwig38bb7422008-10-30 16:56:22 +11002222 return 0;
2223}
Christoph Hellwig278d0ca2008-10-30 16:56:32 +11002224
2225/*
2226 * Update the record referred to by cur to the value in the
2227 * given record. This either works (return 0) or gets an
2228 * EFSCORRUPTED error.
2229 */
2230int
2231xfs_btree_update(
2232 struct xfs_btree_cur *cur,
2233 union xfs_btree_rec *rec)
2234{
2235 struct xfs_btree_block *block;
2236 struct xfs_buf *bp;
2237 int error;
2238 int ptr;
2239 union xfs_btree_rec *rp;
2240
Christoph Hellwig278d0ca2008-10-30 16:56:32 +11002241 /* Pick up the current block. */
2242 block = xfs_btree_get_block(cur, 0, &bp);
2243
2244#ifdef DEBUG
2245 error = xfs_btree_check_block(cur, block, 0, bp);
2246 if (error)
2247 goto error0;
2248#endif
2249 /* Get the address of the rec to be updated. */
2250 ptr = cur->bc_ptrs[0];
2251 rp = xfs_btree_rec_addr(cur, ptr, block);
2252
2253 /* Fill in the new contents and log them. */
2254 xfs_btree_copy_recs(cur, rp, rec, 1);
2255 xfs_btree_log_recs(cur, bp, ptr, ptr);
2256
2257 /*
2258 * If we are tracking the last record in the tree and
2259 * we are at the far right edge of the tree, update it.
2260 */
2261 if (xfs_btree_is_lastrec(cur, block, 0)) {
2262 cur->bc_ops->update_lastrec(cur, block, rec,
2263 ptr, LASTREC_UPDATE);
2264 }
2265
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10002266 /* Pass new key value up to our parent. */
Darrick J. Wong70b22652016-08-03 11:03:38 +10002267 if (xfs_btree_needs_key_update(cur, ptr)) {
Darrick J. Wong973b8312016-08-03 12:22:12 +10002268 error = xfs_btree_update_keys(cur, 0);
Christoph Hellwig278d0ca2008-10-30 16:56:32 +11002269 if (error)
2270 goto error0;
2271 }
2272
Christoph Hellwig278d0ca2008-10-30 16:56:32 +11002273 return 0;
2274
2275error0:
Christoph Hellwig278d0ca2008-10-30 16:56:32 +11002276 return error;
2277}
2278
Christoph Hellwig9eaead52008-10-30 16:56:43 +11002279/*
Christoph Hellwig687b8902008-10-30 16:56:53 +11002280 * Move 1 record left from cur/level if possible.
2281 * Update cur to reflect the new path.
2282 */
Christoph Hellwig3cc75242008-10-30 16:58:41 +11002283STATIC int /* error */
Christoph Hellwig687b8902008-10-30 16:56:53 +11002284xfs_btree_lshift(
2285 struct xfs_btree_cur *cur,
2286 int level,
2287 int *stat) /* success/failure */
2288{
Christoph Hellwig687b8902008-10-30 16:56:53 +11002289 struct xfs_buf *lbp; /* left buffer pointer */
2290 struct xfs_btree_block *left; /* left btree block */
2291 int lrecs; /* left record count */
2292 struct xfs_buf *rbp; /* right buffer pointer */
2293 struct xfs_btree_block *right; /* right btree block */
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10002294 struct xfs_btree_cur *tcur; /* temporary btree cursor */
Christoph Hellwig687b8902008-10-30 16:56:53 +11002295 int rrecs; /* right record count */
2296 union xfs_btree_ptr lptr; /* left btree pointer */
2297 union xfs_btree_key *rkp = NULL; /* right btree key */
2298 union xfs_btree_ptr *rpp = NULL; /* right address pointer */
2299 union xfs_btree_rec *rrp = NULL; /* right record pointer */
2300 int error; /* error return value */
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10002301 int i;
Christoph Hellwig687b8902008-10-30 16:56:53 +11002302
Christoph Hellwig687b8902008-10-30 16:56:53 +11002303 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
2304 level == cur->bc_nlevels - 1)
2305 goto out0;
2306
2307 /* Set up variables for this block as "right". */
2308 right = xfs_btree_get_block(cur, level, &rbp);
2309
2310#ifdef DEBUG
2311 error = xfs_btree_check_block(cur, right, level, rbp);
2312 if (error)
2313 goto error0;
2314#endif
2315
2316 /* If we've got no left sibling then we can't shift an entry left. */
2317 xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
2318 if (xfs_btree_ptr_is_null(cur, &lptr))
2319 goto out0;
2320
2321 /*
2322 * If the cursor entry is the one that would be moved, don't
2323 * do it... it's too complicated.
2324 */
2325 if (cur->bc_ptrs[level] <= 1)
2326 goto out0;
2327
2328 /* Set up the left neighbor as "left". */
Eric Sandeen0d7409b2014-04-14 18:59:56 +10002329 error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp);
Christoph Hellwig687b8902008-10-30 16:56:53 +11002330 if (error)
2331 goto error0;
2332
2333 /* If it's full, it can't take another entry. */
2334 lrecs = xfs_btree_get_numrecs(left);
2335 if (lrecs == cur->bc_ops->get_maxrecs(cur, level))
2336 goto out0;
2337
2338 rrecs = xfs_btree_get_numrecs(right);
2339
2340 /*
2341 * We add one entry to the left side and remove one for the right side.
Malcolm Parsons9da096f2009-03-29 09:55:42 +02002342 * Account for it here, the changes will be updated on disk and logged
Christoph Hellwig687b8902008-10-30 16:56:53 +11002343 * later.
2344 */
2345 lrecs++;
2346 rrecs--;
2347
2348 XFS_BTREE_STATS_INC(cur, lshift);
2349 XFS_BTREE_STATS_ADD(cur, moves, 1);
2350
2351 /*
2352 * If non-leaf, copy a key and a ptr to the left block.
2353 * Log the changes to the left block.
2354 */
2355 if (level > 0) {
2356 /* It's a non-leaf. Move keys and pointers. */
2357 union xfs_btree_key *lkp; /* left btree key */
2358 union xfs_btree_ptr *lpp; /* left address pointer */
2359
2360 lkp = xfs_btree_key_addr(cur, lrecs, left);
2361 rkp = xfs_btree_key_addr(cur, 1, right);
2362
2363 lpp = xfs_btree_ptr_addr(cur, lrecs, left);
2364 rpp = xfs_btree_ptr_addr(cur, 1, right);
Darrick J. Wong4cbae4b2018-06-03 21:10:48 -07002365
2366 error = xfs_btree_debug_check_ptr(cur, rpp, 0, level);
Christoph Hellwig687b8902008-10-30 16:56:53 +11002367 if (error)
2368 goto error0;
Darrick J. Wong4cbae4b2018-06-03 21:10:48 -07002369
Christoph Hellwig687b8902008-10-30 16:56:53 +11002370 xfs_btree_copy_keys(cur, lkp, rkp, 1);
2371 xfs_btree_copy_ptrs(cur, lpp, rpp, 1);
2372
2373 xfs_btree_log_keys(cur, lbp, lrecs, lrecs);
2374 xfs_btree_log_ptrs(cur, lbp, lrecs, lrecs);
2375
Christoph Hellwig4a26e662008-10-30 16:58:32 +11002376 ASSERT(cur->bc_ops->keys_inorder(cur,
2377 xfs_btree_key_addr(cur, lrecs - 1, left), lkp));
Christoph Hellwig687b8902008-10-30 16:56:53 +11002378 } else {
2379 /* It's a leaf. Move records. */
2380 union xfs_btree_rec *lrp; /* left record pointer */
2381
2382 lrp = xfs_btree_rec_addr(cur, lrecs, left);
2383 rrp = xfs_btree_rec_addr(cur, 1, right);
2384
2385 xfs_btree_copy_recs(cur, lrp, rrp, 1);
2386 xfs_btree_log_recs(cur, lbp, lrecs, lrecs);
2387
Christoph Hellwig4a26e662008-10-30 16:58:32 +11002388 ASSERT(cur->bc_ops->recs_inorder(cur,
2389 xfs_btree_rec_addr(cur, lrecs - 1, left), lrp));
Christoph Hellwig687b8902008-10-30 16:56:53 +11002390 }
2391
2392 xfs_btree_set_numrecs(left, lrecs);
2393 xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS);
2394
2395 xfs_btree_set_numrecs(right, rrecs);
2396 xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS);
2397
2398 /*
2399 * Slide the contents of right down one entry.
2400 */
2401 XFS_BTREE_STATS_ADD(cur, moves, rrecs - 1);
2402 if (level > 0) {
2403 /* It's a nonleaf. operate on keys and ptrs */
Christoph Hellwig687b8902008-10-30 16:56:53 +11002404 int i; /* loop index */
2405
2406 for (i = 0; i < rrecs; i++) {
Darrick J. Wong4cbae4b2018-06-03 21:10:48 -07002407 error = xfs_btree_debug_check_ptr(cur, rpp, i + 1, level);
Christoph Hellwig687b8902008-10-30 16:56:53 +11002408 if (error)
2409 goto error0;
2410 }
Darrick J. Wong4cbae4b2018-06-03 21:10:48 -07002411
Christoph Hellwig687b8902008-10-30 16:56:53 +11002412 xfs_btree_shift_keys(cur,
2413 xfs_btree_key_addr(cur, 2, right),
2414 -1, rrecs);
2415 xfs_btree_shift_ptrs(cur,
2416 xfs_btree_ptr_addr(cur, 2, right),
2417 -1, rrecs);
2418
2419 xfs_btree_log_keys(cur, rbp, 1, rrecs);
2420 xfs_btree_log_ptrs(cur, rbp, 1, rrecs);
2421 } else {
2422 /* It's a leaf. operate on records */
2423 xfs_btree_shift_recs(cur,
2424 xfs_btree_rec_addr(cur, 2, right),
2425 -1, rrecs);
2426 xfs_btree_log_recs(cur, rbp, 1, rrecs);
Christoph Hellwig687b8902008-10-30 16:56:53 +11002427 }
2428
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10002429 /*
2430 * Using a temporary cursor, update the parent key values of the
2431 * block on the left.
2432 */
Darrick J. Wongc1d22ae2016-08-03 12:26:22 +10002433 if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
2434 error = xfs_btree_dup_cursor(cur, &tcur);
2435 if (error)
2436 goto error0;
2437 i = xfs_btree_firstrec(tcur, level);
2438 XFS_WANT_CORRUPTED_GOTO(tcur->bc_mp, i == 1, error0);
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10002439
Darrick J. Wongc1d22ae2016-08-03 12:26:22 +10002440 error = xfs_btree_decrement(tcur, level, &i);
2441 if (error)
2442 goto error1;
2443
2444 /* Update the parent high keys of the left block, if needed. */
2445 error = xfs_btree_update_keys(tcur, level);
2446 if (error)
2447 goto error1;
2448
2449 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
2450 }
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10002451
Darrick J. Wong70b22652016-08-03 11:03:38 +10002452 /* Update the parent keys of the right block. */
Darrick J. Wong973b8312016-08-03 12:22:12 +10002453 error = xfs_btree_update_keys(cur, level);
Christoph Hellwig687b8902008-10-30 16:56:53 +11002454 if (error)
Darrick J. Wongc1d22ae2016-08-03 12:26:22 +10002455 goto error0;
Christoph Hellwig687b8902008-10-30 16:56:53 +11002456
2457 /* Slide the cursor value left one. */
2458 cur->bc_ptrs[level]--;
2459
Christoph Hellwig687b8902008-10-30 16:56:53 +11002460 *stat = 1;
2461 return 0;
2462
2463out0:
Christoph Hellwig687b8902008-10-30 16:56:53 +11002464 *stat = 0;
2465 return 0;
2466
2467error0:
Christoph Hellwig687b8902008-10-30 16:56:53 +11002468 return error;
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10002469
2470error1:
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10002471 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
2472 return error;
Christoph Hellwig687b8902008-10-30 16:56:53 +11002473}
2474
2475/*
Christoph Hellwig9eaead52008-10-30 16:56:43 +11002476 * Move 1 record right from cur/level if possible.
2477 * Update cur to reflect the new path.
2478 */
Christoph Hellwig3cc75242008-10-30 16:58:41 +11002479STATIC int /* error */
Christoph Hellwig9eaead52008-10-30 16:56:43 +11002480xfs_btree_rshift(
2481 struct xfs_btree_cur *cur,
2482 int level,
2483 int *stat) /* success/failure */
2484{
Christoph Hellwig9eaead52008-10-30 16:56:43 +11002485 struct xfs_buf *lbp; /* left buffer pointer */
2486 struct xfs_btree_block *left; /* left btree block */
2487 struct xfs_buf *rbp; /* right buffer pointer */
2488 struct xfs_btree_block *right; /* right btree block */
2489 struct xfs_btree_cur *tcur; /* temporary btree cursor */
2490 union xfs_btree_ptr rptr; /* right block pointer */
2491 union xfs_btree_key *rkp; /* right btree key */
2492 int rrecs; /* right record count */
2493 int lrecs; /* left record count */
2494 int error; /* error return value */
2495 int i; /* loop counter */
2496
Christoph Hellwig9eaead52008-10-30 16:56:43 +11002497 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
2498 (level == cur->bc_nlevels - 1))
2499 goto out0;
2500
2501 /* Set up variables for this block as "left". */
2502 left = xfs_btree_get_block(cur, level, &lbp);
2503
2504#ifdef DEBUG
2505 error = xfs_btree_check_block(cur, left, level, lbp);
2506 if (error)
2507 goto error0;
2508#endif
2509
2510 /* If we've got no right sibling then we can't shift an entry right. */
2511 xfs_btree_get_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB);
2512 if (xfs_btree_ptr_is_null(cur, &rptr))
2513 goto out0;
2514
2515 /*
2516 * If the cursor entry is the one that would be moved, don't
2517 * do it... it's too complicated.
2518 */
2519 lrecs = xfs_btree_get_numrecs(left);
2520 if (cur->bc_ptrs[level] >= lrecs)
2521 goto out0;
2522
2523 /* Set up the right neighbor as "right". */
Eric Sandeen0d7409b2014-04-14 18:59:56 +10002524 error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp);
Christoph Hellwig9eaead52008-10-30 16:56:43 +11002525 if (error)
2526 goto error0;
2527
2528 /* If it's full, it can't take another entry. */
2529 rrecs = xfs_btree_get_numrecs(right);
2530 if (rrecs == cur->bc_ops->get_maxrecs(cur, level))
2531 goto out0;
2532
2533 XFS_BTREE_STATS_INC(cur, rshift);
2534 XFS_BTREE_STATS_ADD(cur, moves, rrecs);
2535
2536 /*
2537 * Make a hole at the start of the right neighbor block, then
2538 * copy the last left block entry to the hole.
2539 */
2540 if (level > 0) {
2541 /* It's a nonleaf. make a hole in the keys and ptrs */
2542 union xfs_btree_key *lkp;
2543 union xfs_btree_ptr *lpp;
2544 union xfs_btree_ptr *rpp;
2545
2546 lkp = xfs_btree_key_addr(cur, lrecs, left);
2547 lpp = xfs_btree_ptr_addr(cur, lrecs, left);
2548 rkp = xfs_btree_key_addr(cur, 1, right);
2549 rpp = xfs_btree_ptr_addr(cur, 1, right);
2550
Christoph Hellwig9eaead52008-10-30 16:56:43 +11002551 for (i = rrecs - 1; i >= 0; i--) {
Darrick J. Wong4cbae4b2018-06-03 21:10:48 -07002552 error = xfs_btree_debug_check_ptr(cur, rpp, i, level);
Christoph Hellwig9eaead52008-10-30 16:56:43 +11002553 if (error)
2554 goto error0;
2555 }
Christoph Hellwig9eaead52008-10-30 16:56:43 +11002556
2557 xfs_btree_shift_keys(cur, rkp, 1, rrecs);
2558 xfs_btree_shift_ptrs(cur, rpp, 1, rrecs);
2559
Darrick J. Wong4cbae4b2018-06-03 21:10:48 -07002560 error = xfs_btree_debug_check_ptr(cur, lpp, 0, level);
Christoph Hellwig9eaead52008-10-30 16:56:43 +11002561 if (error)
2562 goto error0;
Christoph Hellwig9eaead52008-10-30 16:56:43 +11002563
2564 /* Now put the new data in, and log it. */
2565 xfs_btree_copy_keys(cur, rkp, lkp, 1);
2566 xfs_btree_copy_ptrs(cur, rpp, lpp, 1);
2567
2568 xfs_btree_log_keys(cur, rbp, 1, rrecs + 1);
2569 xfs_btree_log_ptrs(cur, rbp, 1, rrecs + 1);
2570
Christoph Hellwig4a26e662008-10-30 16:58:32 +11002571 ASSERT(cur->bc_ops->keys_inorder(cur, rkp,
2572 xfs_btree_key_addr(cur, 2, right)));
Christoph Hellwig9eaead52008-10-30 16:56:43 +11002573 } else {
2574 /* It's a leaf. make a hole in the records */
2575 union xfs_btree_rec *lrp;
2576 union xfs_btree_rec *rrp;
2577
2578 lrp = xfs_btree_rec_addr(cur, lrecs, left);
2579 rrp = xfs_btree_rec_addr(cur, 1, right);
2580
2581 xfs_btree_shift_recs(cur, rrp, 1, rrecs);
2582
2583 /* Now put the new data in, and log it. */
2584 xfs_btree_copy_recs(cur, rrp, lrp, 1);
2585 xfs_btree_log_recs(cur, rbp, 1, rrecs + 1);
Christoph Hellwig9eaead52008-10-30 16:56:43 +11002586 }
2587
2588 /*
2589 * Decrement and log left's numrecs, bump and log right's numrecs.
2590 */
2591 xfs_btree_set_numrecs(left, --lrecs);
2592 xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS);
2593
2594 xfs_btree_set_numrecs(right, ++rrecs);
2595 xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS);
2596
2597 /*
2598 * Using a temporary cursor, update the parent key values of the
2599 * block on the right.
2600 */
2601 error = xfs_btree_dup_cursor(cur, &tcur);
2602 if (error)
2603 goto error0;
2604 i = xfs_btree_lastrec(tcur, level);
Darrick J. Wongc1d22ae2016-08-03 12:26:22 +10002605 XFS_WANT_CORRUPTED_GOTO(tcur->bc_mp, i == 1, error0);
Christoph Hellwig9eaead52008-10-30 16:56:43 +11002606
2607 error = xfs_btree_increment(tcur, level, &i);
2608 if (error)
2609 goto error1;
2610
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10002611 /* Update the parent high keys of the left block, if needed. */
2612 if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
Darrick J. Wong973b8312016-08-03 12:22:12 +10002613 error = xfs_btree_update_keys(cur, level);
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10002614 if (error)
2615 goto error1;
2616 }
2617
Darrick J. Wong70b22652016-08-03 11:03:38 +10002618 /* Update the parent keys of the right block. */
Darrick J. Wong973b8312016-08-03 12:22:12 +10002619 error = xfs_btree_update_keys(tcur, level);
Christoph Hellwig9eaead52008-10-30 16:56:43 +11002620 if (error)
2621 goto error1;
2622
2623 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
2624
Christoph Hellwig9eaead52008-10-30 16:56:43 +11002625 *stat = 1;
2626 return 0;
2627
2628out0:
Christoph Hellwig9eaead52008-10-30 16:56:43 +11002629 *stat = 0;
2630 return 0;
2631
2632error0:
Christoph Hellwig9eaead52008-10-30 16:56:43 +11002633 return error;
2634
2635error1:
Christoph Hellwig9eaead52008-10-30 16:56:43 +11002636 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
2637 return error;
2638}
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11002639
2640/*
2641 * Split cur/level block in half.
2642 * Return new block number and the key to its first
2643 * record (to be inserted into parent).
2644 */
Christoph Hellwig3cc75242008-10-30 16:58:41 +11002645STATIC int /* error */
Dave Chinnercf11da92014-07-15 07:08:24 +10002646__xfs_btree_split(
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11002647 struct xfs_btree_cur *cur,
2648 int level,
2649 union xfs_btree_ptr *ptrp,
2650 union xfs_btree_key *key,
2651 struct xfs_btree_cur **curp,
2652 int *stat) /* success/failure */
2653{
2654 union xfs_btree_ptr lptr; /* left sibling block ptr */
2655 struct xfs_buf *lbp; /* left buffer pointer */
2656 struct xfs_btree_block *left; /* left btree block */
2657 union xfs_btree_ptr rptr; /* right sibling block ptr */
2658 struct xfs_buf *rbp; /* right buffer pointer */
2659 struct xfs_btree_block *right; /* right btree block */
2660 union xfs_btree_ptr rrptr; /* right-right sibling ptr */
2661 struct xfs_buf *rrbp; /* right-right buffer pointer */
2662 struct xfs_btree_block *rrblock; /* right-right btree block */
2663 int lrecs;
2664 int rrecs;
2665 int src_index;
2666 int error; /* error return value */
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11002667 int i;
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11002668
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11002669 XFS_BTREE_STATS_INC(cur, split);
2670
2671 /* Set up left block (current one). */
2672 left = xfs_btree_get_block(cur, level, &lbp);
2673
2674#ifdef DEBUG
2675 error = xfs_btree_check_block(cur, left, level, lbp);
2676 if (error)
2677 goto error0;
2678#endif
2679
2680 xfs_btree_buf_to_ptr(cur, lbp, &lptr);
2681
2682 /* Allocate the new block. If we can't do it, we're toast. Give up. */
Eric Sandeen6f8950c2014-04-14 19:03:53 +10002683 error = cur->bc_ops->alloc_block(cur, &lptr, &rptr, stat);
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11002684 if (error)
2685 goto error0;
2686 if (*stat == 0)
2687 goto out0;
2688 XFS_BTREE_STATS_INC(cur, alloc);
2689
2690 /* Set up the new block as "right". */
2691 error = xfs_btree_get_buf_block(cur, &rptr, 0, &right, &rbp);
2692 if (error)
2693 goto error0;
2694
2695 /* Fill in the btree header for the new right block. */
Christoph Hellwigee1a47a2013-04-21 14:53:46 -05002696 xfs_btree_init_block_cur(cur, rbp, xfs_btree_get_level(left), 0);
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11002697
2698 /*
2699 * Split the entries between the old and the new block evenly.
2700 * Make sure that if there's an odd number of entries now, that
2701 * each new block will have the same number of entries.
2702 */
2703 lrecs = xfs_btree_get_numrecs(left);
2704 rrecs = lrecs / 2;
2705 if ((lrecs & 1) && cur->bc_ptrs[level] <= rrecs + 1)
2706 rrecs++;
2707 src_index = (lrecs - rrecs + 1);
2708
2709 XFS_BTREE_STATS_ADD(cur, moves, rrecs);
2710
Darrick J. Wong70b22652016-08-03 11:03:38 +10002711 /* Adjust numrecs for the later get_*_keys() calls. */
2712 lrecs -= rrecs;
2713 xfs_btree_set_numrecs(left, lrecs);
2714 xfs_btree_set_numrecs(right, xfs_btree_get_numrecs(right) + rrecs);
2715
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11002716 /*
2717 * Copy btree block entries from the left block over to the
2718 * new block, the right. Update the right block and log the
2719 * changes.
2720 */
2721 if (level > 0) {
2722 /* It's a non-leaf. Move keys and pointers. */
2723 union xfs_btree_key *lkp; /* left btree key */
2724 union xfs_btree_ptr *lpp; /* left address pointer */
2725 union xfs_btree_key *rkp; /* right btree key */
2726 union xfs_btree_ptr *rpp; /* right address pointer */
2727
2728 lkp = xfs_btree_key_addr(cur, src_index, left);
2729 lpp = xfs_btree_ptr_addr(cur, src_index, left);
2730 rkp = xfs_btree_key_addr(cur, 1, right);
2731 rpp = xfs_btree_ptr_addr(cur, 1, right);
2732
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11002733 for (i = src_index; i < rrecs; i++) {
Darrick J. Wong4cbae4b2018-06-03 21:10:48 -07002734 error = xfs_btree_debug_check_ptr(cur, lpp, i, level);
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11002735 if (error)
2736 goto error0;
2737 }
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11002738
Darrick J. Wong70b22652016-08-03 11:03:38 +10002739 /* Copy the keys & pointers to the new block. */
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11002740 xfs_btree_copy_keys(cur, rkp, lkp, rrecs);
2741 xfs_btree_copy_ptrs(cur, rpp, lpp, rrecs);
2742
2743 xfs_btree_log_keys(cur, rbp, 1, rrecs);
2744 xfs_btree_log_ptrs(cur, rbp, 1, rrecs);
2745
Darrick J. Wong70b22652016-08-03 11:03:38 +10002746 /* Stash the keys of the new block for later insertion. */
Darrick J. Wong973b8312016-08-03 12:22:12 +10002747 xfs_btree_get_node_keys(cur, right, key);
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11002748 } else {
2749 /* It's a leaf. Move records. */
2750 union xfs_btree_rec *lrp; /* left record pointer */
2751 union xfs_btree_rec *rrp; /* right record pointer */
2752
2753 lrp = xfs_btree_rec_addr(cur, src_index, left);
2754 rrp = xfs_btree_rec_addr(cur, 1, right);
2755
Darrick J. Wong70b22652016-08-03 11:03:38 +10002756 /* Copy records to the new block. */
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11002757 xfs_btree_copy_recs(cur, rrp, lrp, rrecs);
2758 xfs_btree_log_recs(cur, rbp, 1, rrecs);
2759
Darrick J. Wong70b22652016-08-03 11:03:38 +10002760 /* Stash the keys of the new block for later insertion. */
Darrick J. Wong973b8312016-08-03 12:22:12 +10002761 xfs_btree_get_leaf_keys(cur, right, key);
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11002762 }
2763
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11002764 /*
2765 * Find the left block number by looking in the buffer.
Darrick J. Wong70b22652016-08-03 11:03:38 +10002766 * Adjust sibling pointers.
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11002767 */
2768 xfs_btree_get_sibling(cur, left, &rrptr, XFS_BB_RIGHTSIB);
2769 xfs_btree_set_sibling(cur, right, &rrptr, XFS_BB_RIGHTSIB);
2770 xfs_btree_set_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
2771 xfs_btree_set_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB);
2772
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11002773 xfs_btree_log_block(cur, rbp, XFS_BB_ALL_BITS);
2774 xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
2775
2776 /*
2777 * If there's a block to the new block's right, make that block
2778 * point back to right instead of to left.
2779 */
2780 if (!xfs_btree_ptr_is_null(cur, &rrptr)) {
Eric Sandeen0d7409b2014-04-14 18:59:56 +10002781 error = xfs_btree_read_buf_block(cur, &rrptr,
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11002782 0, &rrblock, &rrbp);
2783 if (error)
2784 goto error0;
2785 xfs_btree_set_sibling(cur, rrblock, &rptr, XFS_BB_LEFTSIB);
2786 xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB);
2787 }
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10002788
2789 /* Update the parent high keys of the left block, if needed. */
2790 if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
Darrick J. Wong973b8312016-08-03 12:22:12 +10002791 error = xfs_btree_update_keys(cur, level);
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10002792 if (error)
2793 goto error0;
2794 }
2795
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11002796 /*
2797 * If the cursor is really in the right block, move it there.
2798 * If it's just pointing past the last entry in left, then we'll
2799 * insert there, so don't change anything in that case.
2800 */
2801 if (cur->bc_ptrs[level] > lrecs + 1) {
2802 xfs_btree_setbuf(cur, level, rbp);
2803 cur->bc_ptrs[level] -= lrecs;
2804 }
2805 /*
2806 * If there are more levels, we'll need another cursor which refers
2807 * the right block, no matter where this cursor was.
2808 */
2809 if (level + 1 < cur->bc_nlevels) {
2810 error = xfs_btree_dup_cursor(cur, curp);
2811 if (error)
2812 goto error0;
2813 (*curp)->bc_ptrs[level + 1]++;
2814 }
2815 *ptrp = rptr;
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11002816 *stat = 1;
2817 return 0;
2818out0:
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11002819 *stat = 0;
2820 return 0;
2821
2822error0:
Christoph Hellwigf5eb8e72008-10-30 16:57:03 +11002823 return error;
2824}
Christoph Hellwig344207c2008-10-30 16:57:16 +11002825
Dave Chinnercf11da92014-07-15 07:08:24 +10002826struct xfs_btree_split_args {
2827 struct xfs_btree_cur *cur;
2828 int level;
2829 union xfs_btree_ptr *ptrp;
2830 union xfs_btree_key *key;
2831 struct xfs_btree_cur **curp;
2832 int *stat; /* success/failure */
2833 int result;
2834 bool kswapd; /* allocation in kswapd context */
2835 struct completion *done;
2836 struct work_struct work;
2837};
2838
2839/*
2840 * Stack switching interfaces for allocation
2841 */
2842static void
2843xfs_btree_split_worker(
2844 struct work_struct *work)
2845{
2846 struct xfs_btree_split_args *args = container_of(work,
2847 struct xfs_btree_split_args, work);
2848 unsigned long pflags;
Michal Hocko90707332017-05-03 14:53:12 -07002849 unsigned long new_pflags = PF_MEMALLOC_NOFS;
Dave Chinnercf11da92014-07-15 07:08:24 +10002850
2851 /*
2852 * we are in a transaction context here, but may also be doing work
2853 * in kswapd context, and hence we may need to inherit that state
2854 * temporarily to ensure that we don't block waiting for memory reclaim
2855 * in any way.
2856 */
2857 if (args->kswapd)
2858 new_pflags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD;
2859
2860 current_set_flags_nested(&pflags, new_pflags);
2861
2862 args->result = __xfs_btree_split(args->cur, args->level, args->ptrp,
2863 args->key, args->curp, args->stat);
2864 complete(args->done);
2865
2866 current_restore_flags_nested(&pflags, new_pflags);
2867}
2868
2869/*
2870 * BMBT split requests often come in with little stack to work on. Push
2871 * them off to a worker thread so there is lots of stack to use. For the other
2872 * btree types, just call directly to avoid the context switch overhead here.
2873 */
2874STATIC int /* error */
2875xfs_btree_split(
2876 struct xfs_btree_cur *cur,
2877 int level,
2878 union xfs_btree_ptr *ptrp,
2879 union xfs_btree_key *key,
2880 struct xfs_btree_cur **curp,
2881 int *stat) /* success/failure */
2882{
2883 struct xfs_btree_split_args args;
2884 DECLARE_COMPLETION_ONSTACK(done);
2885
2886 if (cur->bc_btnum != XFS_BTNUM_BMAP)
2887 return __xfs_btree_split(cur, level, ptrp, key, curp, stat);
2888
2889 args.cur = cur;
2890 args.level = level;
2891 args.ptrp = ptrp;
2892 args.key = key;
2893 args.curp = curp;
2894 args.stat = stat;
2895 args.done = &done;
2896 args.kswapd = current_is_kswapd();
2897 INIT_WORK_ONSTACK(&args.work, xfs_btree_split_worker);
2898 queue_work(xfs_alloc_wq, &args.work);
2899 wait_for_completion(&done);
2900 destroy_work_on_stack(&args.work);
2901 return args.result;
2902}
2903
2904
Christoph Hellwig344207c2008-10-30 16:57:16 +11002905/*
Christoph Hellwigea77b0a2008-10-30 16:57:28 +11002906 * Copy the old inode root contents into a real block and make the
2907 * broot point to it.
2908 */
2909int /* error */
2910xfs_btree_new_iroot(
2911 struct xfs_btree_cur *cur, /* btree cursor */
2912 int *logflags, /* logging flags for inode */
2913 int *stat) /* return status - 0 fail */
2914{
2915 struct xfs_buf *cbp; /* buffer for cblock */
2916 struct xfs_btree_block *block; /* btree block */
2917 struct xfs_btree_block *cblock; /* child btree block */
2918 union xfs_btree_key *ckp; /* child key pointer */
2919 union xfs_btree_ptr *cpp; /* child ptr pointer */
2920 union xfs_btree_key *kp; /* pointer to btree key */
2921 union xfs_btree_ptr *pp; /* pointer to block addr */
2922 union xfs_btree_ptr nptr; /* new block addr */
2923 int level; /* btree level */
2924 int error; /* error return code */
Christoph Hellwigea77b0a2008-10-30 16:57:28 +11002925 int i; /* loop counter */
Christoph Hellwigea77b0a2008-10-30 16:57:28 +11002926
Christoph Hellwigea77b0a2008-10-30 16:57:28 +11002927 XFS_BTREE_STATS_INC(cur, newroot);
2928
2929 ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
2930
2931 level = cur->bc_nlevels - 1;
2932
2933 block = xfs_btree_get_iroot(cur);
2934 pp = xfs_btree_ptr_addr(cur, 1, block);
2935
2936 /* Allocate the new block. If we can't do it, we're toast. Give up. */
Eric Sandeen6f8950c2014-04-14 19:03:53 +10002937 error = cur->bc_ops->alloc_block(cur, pp, &nptr, stat);
Christoph Hellwigea77b0a2008-10-30 16:57:28 +11002938 if (error)
2939 goto error0;
Carlos Maiolinoe157ebd2018-03-06 17:03:30 -08002940 if (*stat == 0)
Christoph Hellwigea77b0a2008-10-30 16:57:28 +11002941 return 0;
Carlos Maiolinoe157ebd2018-03-06 17:03:30 -08002942
Christoph Hellwigea77b0a2008-10-30 16:57:28 +11002943 XFS_BTREE_STATS_INC(cur, alloc);
2944
2945 /* Copy the root into a real block. */
2946 error = xfs_btree_get_buf_block(cur, &nptr, 0, &cblock, &cbp);
2947 if (error)
2948 goto error0;
2949
Dave Chinner088c9f62013-06-12 12:19:08 +10002950 /*
2951 * we can't just memcpy() the root in for CRC enabled btree blocks.
2952 * In that case have to also ensure the blkno remains correct
2953 */
Christoph Hellwigea77b0a2008-10-30 16:57:28 +11002954 memcpy(cblock, block, xfs_btree_block_len(cur));
Dave Chinner088c9f62013-06-12 12:19:08 +10002955 if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) {
2956 if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
2957 cblock->bb_u.l.bb_blkno = cpu_to_be64(cbp->b_bn);
2958 else
2959 cblock->bb_u.s.bb_blkno = cpu_to_be64(cbp->b_bn);
2960 }
Christoph Hellwigea77b0a2008-10-30 16:57:28 +11002961
2962 be16_add_cpu(&block->bb_level, 1);
2963 xfs_btree_set_numrecs(block, 1);
2964 cur->bc_nlevels++;
2965 cur->bc_ptrs[level + 1] = 1;
2966
2967 kp = xfs_btree_key_addr(cur, 1, block);
2968 ckp = xfs_btree_key_addr(cur, 1, cblock);
2969 xfs_btree_copy_keys(cur, ckp, kp, xfs_btree_get_numrecs(cblock));
2970
2971 cpp = xfs_btree_ptr_addr(cur, 1, cblock);
Christoph Hellwigea77b0a2008-10-30 16:57:28 +11002972 for (i = 0; i < be16_to_cpu(cblock->bb_numrecs); i++) {
Darrick J. Wong4cbae4b2018-06-03 21:10:48 -07002973 error = xfs_btree_debug_check_ptr(cur, pp, i, level);
Christoph Hellwigea77b0a2008-10-30 16:57:28 +11002974 if (error)
2975 goto error0;
2976 }
Darrick J. Wong4cbae4b2018-06-03 21:10:48 -07002977
Christoph Hellwigea77b0a2008-10-30 16:57:28 +11002978 xfs_btree_copy_ptrs(cur, cpp, pp, xfs_btree_get_numrecs(cblock));
2979
Darrick J. Wong4cbae4b2018-06-03 21:10:48 -07002980 error = xfs_btree_debug_check_ptr(cur, &nptr, 0, level);
Christoph Hellwigea77b0a2008-10-30 16:57:28 +11002981 if (error)
2982 goto error0;
Darrick J. Wong4cbae4b2018-06-03 21:10:48 -07002983
Christoph Hellwigea77b0a2008-10-30 16:57:28 +11002984 xfs_btree_copy_ptrs(cur, pp, &nptr, 1);
2985
2986 xfs_iroot_realloc(cur->bc_private.b.ip,
2987 1 - xfs_btree_get_numrecs(cblock),
2988 cur->bc_private.b.whichfork);
2989
2990 xfs_btree_setbuf(cur, level, cbp);
2991
2992 /*
2993 * Do all this logging at the end so that
2994 * the root is at the right level.
2995 */
2996 xfs_btree_log_block(cur, cbp, XFS_BB_ALL_BITS);
2997 xfs_btree_log_keys(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs));
2998 xfs_btree_log_ptrs(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs));
2999
3000 *logflags |=
Eric Sandeen9d87c312009-01-14 23:22:07 -06003001 XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_private.b.whichfork);
Christoph Hellwigea77b0a2008-10-30 16:57:28 +11003002 *stat = 1;
Christoph Hellwigea77b0a2008-10-30 16:57:28 +11003003 return 0;
3004error0:
Christoph Hellwigea77b0a2008-10-30 16:57:28 +11003005 return error;
3006}
3007
3008/*
Christoph Hellwig344207c2008-10-30 16:57:16 +11003009 * Allocate a new root block, fill it in.
3010 */
Christoph Hellwig3cc75242008-10-30 16:58:41 +11003011STATIC int /* error */
Christoph Hellwig344207c2008-10-30 16:57:16 +11003012xfs_btree_new_root(
3013 struct xfs_btree_cur *cur, /* btree cursor */
3014 int *stat) /* success/failure */
3015{
3016 struct xfs_btree_block *block; /* one half of the old root block */
3017 struct xfs_buf *bp; /* buffer containing block */
3018 int error; /* error return value */
3019 struct xfs_buf *lbp; /* left buffer pointer */
3020 struct xfs_btree_block *left; /* left btree block */
3021 struct xfs_buf *nbp; /* new (root) buffer */
3022 struct xfs_btree_block *new; /* new (root) btree block */
3023 int nptr; /* new value for key index, 1 or 2 */
3024 struct xfs_buf *rbp; /* right buffer pointer */
3025 struct xfs_btree_block *right; /* right btree block */
3026 union xfs_btree_ptr rptr;
3027 union xfs_btree_ptr lptr;
3028
Christoph Hellwig344207c2008-10-30 16:57:16 +11003029 XFS_BTREE_STATS_INC(cur, newroot);
3030
3031 /* initialise our start point from the cursor */
3032 cur->bc_ops->init_ptr_from_cur(cur, &rptr);
3033
3034 /* Allocate the new block. If we can't do it, we're toast. Give up. */
Eric Sandeen6f8950c2014-04-14 19:03:53 +10003035 error = cur->bc_ops->alloc_block(cur, &rptr, &lptr, stat);
Christoph Hellwig344207c2008-10-30 16:57:16 +11003036 if (error)
3037 goto error0;
3038 if (*stat == 0)
3039 goto out0;
3040 XFS_BTREE_STATS_INC(cur, alloc);
3041
3042 /* Set up the new block. */
3043 error = xfs_btree_get_buf_block(cur, &lptr, 0, &new, &nbp);
3044 if (error)
3045 goto error0;
3046
3047 /* Set the root in the holding structure increasing the level by 1. */
3048 cur->bc_ops->set_root(cur, &lptr, 1);
3049
3050 /*
3051 * At the previous root level there are now two blocks: the old root,
3052 * and the new block generated when it was split. We don't know which
3053 * one the cursor is pointing at, so we set up variables "left" and
3054 * "right" for each case.
3055 */
3056 block = xfs_btree_get_block(cur, cur->bc_nlevels - 1, &bp);
3057
3058#ifdef DEBUG
3059 error = xfs_btree_check_block(cur, block, cur->bc_nlevels - 1, bp);
3060 if (error)
3061 goto error0;
3062#endif
3063
3064 xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
3065 if (!xfs_btree_ptr_is_null(cur, &rptr)) {
3066 /* Our block is left, pick up the right block. */
3067 lbp = bp;
3068 xfs_btree_buf_to_ptr(cur, lbp, &lptr);
3069 left = block;
Eric Sandeen0d7409b2014-04-14 18:59:56 +10003070 error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp);
Christoph Hellwig344207c2008-10-30 16:57:16 +11003071 if (error)
3072 goto error0;
3073 bp = rbp;
3074 nptr = 1;
3075 } else {
3076 /* Our block is right, pick up the left block. */
3077 rbp = bp;
3078 xfs_btree_buf_to_ptr(cur, rbp, &rptr);
3079 right = block;
3080 xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
Eric Sandeen0d7409b2014-04-14 18:59:56 +10003081 error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp);
Christoph Hellwig344207c2008-10-30 16:57:16 +11003082 if (error)
3083 goto error0;
3084 bp = lbp;
3085 nptr = 2;
3086 }
Darrick J. Wong70b22652016-08-03 11:03:38 +10003087
Christoph Hellwig344207c2008-10-30 16:57:16 +11003088 /* Fill in the new block's btree header and log it. */
Christoph Hellwigee1a47a2013-04-21 14:53:46 -05003089 xfs_btree_init_block_cur(cur, nbp, cur->bc_nlevels, 2);
Christoph Hellwig344207c2008-10-30 16:57:16 +11003090 xfs_btree_log_block(cur, nbp, XFS_BB_ALL_BITS);
3091 ASSERT(!xfs_btree_ptr_is_null(cur, &lptr) &&
3092 !xfs_btree_ptr_is_null(cur, &rptr));
3093
3094 /* Fill in the key data in the new root. */
3095 if (xfs_btree_get_level(left) > 0) {
Darrick J. Wong70b22652016-08-03 11:03:38 +10003096 /*
3097 * Get the keys for the left block's keys and put them directly
3098 * in the parent block. Do the same for the right block.
3099 */
Darrick J. Wong973b8312016-08-03 12:22:12 +10003100 xfs_btree_get_node_keys(cur, left,
Darrick J. Wong70b22652016-08-03 11:03:38 +10003101 xfs_btree_key_addr(cur, 1, new));
Darrick J. Wong973b8312016-08-03 12:22:12 +10003102 xfs_btree_get_node_keys(cur, right,
Darrick J. Wong70b22652016-08-03 11:03:38 +10003103 xfs_btree_key_addr(cur, 2, new));
Christoph Hellwig344207c2008-10-30 16:57:16 +11003104 } else {
Darrick J. Wong70b22652016-08-03 11:03:38 +10003105 /*
3106 * Get the keys for the left block's records and put them
3107 * directly in the parent block. Do the same for the right
3108 * block.
3109 */
Darrick J. Wong973b8312016-08-03 12:22:12 +10003110 xfs_btree_get_leaf_keys(cur, left,
Darrick J. Wong70b22652016-08-03 11:03:38 +10003111 xfs_btree_key_addr(cur, 1, new));
Darrick J. Wong973b8312016-08-03 12:22:12 +10003112 xfs_btree_get_leaf_keys(cur, right,
Darrick J. Wong70b22652016-08-03 11:03:38 +10003113 xfs_btree_key_addr(cur, 2, new));
Christoph Hellwig344207c2008-10-30 16:57:16 +11003114 }
3115 xfs_btree_log_keys(cur, nbp, 1, 2);
3116
3117 /* Fill in the pointer data in the new root. */
3118 xfs_btree_copy_ptrs(cur,
3119 xfs_btree_ptr_addr(cur, 1, new), &lptr, 1);
3120 xfs_btree_copy_ptrs(cur,
3121 xfs_btree_ptr_addr(cur, 2, new), &rptr, 1);
3122 xfs_btree_log_ptrs(cur, nbp, 1, 2);
3123
3124 /* Fix up the cursor. */
3125 xfs_btree_setbuf(cur, cur->bc_nlevels, nbp);
3126 cur->bc_ptrs[cur->bc_nlevels] = nptr;
3127 cur->bc_nlevels++;
Christoph Hellwig344207c2008-10-30 16:57:16 +11003128 *stat = 1;
3129 return 0;
3130error0:
Christoph Hellwig344207c2008-10-30 16:57:16 +11003131 return error;
3132out0:
Christoph Hellwig344207c2008-10-30 16:57:16 +11003133 *stat = 0;
3134 return 0;
3135}
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003136
3137STATIC int
3138xfs_btree_make_block_unfull(
3139 struct xfs_btree_cur *cur, /* btree cursor */
3140 int level, /* btree level */
3141 int numrecs,/* # of recs in block */
3142 int *oindex,/* old tree index */
3143 int *index, /* new tree index */
3144 union xfs_btree_ptr *nptr, /* new btree ptr */
3145 struct xfs_btree_cur **ncur, /* new btree cursor */
Darrick J. Wong70b22652016-08-03 11:03:38 +10003146 union xfs_btree_key *key, /* key of new block */
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003147 int *stat)
3148{
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003149 int error = 0;
3150
3151 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
3152 level == cur->bc_nlevels - 1) {
Carlos Maiolinoe157ebd2018-03-06 17:03:30 -08003153 struct xfs_inode *ip = cur->bc_private.b.ip;
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003154
3155 if (numrecs < cur->bc_ops->get_dmaxrecs(cur, level)) {
3156 /* A root block that can be made bigger. */
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003157 xfs_iroot_realloc(ip, 1, cur->bc_private.b.whichfork);
Darrick J. Wong0d309792016-08-03 11:01:25 +10003158 *stat = 1;
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003159 } else {
3160 /* A root block that needs replacing */
3161 int logflags = 0;
3162
3163 error = xfs_btree_new_iroot(cur, &logflags, stat);
3164 if (error || *stat == 0)
3165 return error;
3166
3167 xfs_trans_log_inode(cur->bc_tp, ip, logflags);
3168 }
3169
3170 return 0;
3171 }
3172
3173 /* First, try shifting an entry to the right neighbor. */
3174 error = xfs_btree_rshift(cur, level, stat);
3175 if (error || *stat)
3176 return error;
3177
3178 /* Next, try shifting an entry to the left neighbor. */
3179 error = xfs_btree_lshift(cur, level, stat);
3180 if (error)
3181 return error;
3182
3183 if (*stat) {
3184 *oindex = *index = cur->bc_ptrs[level];
3185 return 0;
3186 }
3187
3188 /*
3189 * Next, try splitting the current block in half.
3190 *
3191 * If this works we have to re-set our variables because we
3192 * could be in a different block now.
3193 */
Darrick J. Wonge5821e52016-08-03 11:02:39 +10003194 error = xfs_btree_split(cur, level, nptr, key, ncur, stat);
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003195 if (error || *stat == 0)
3196 return error;
3197
3198
3199 *index = cur->bc_ptrs[level];
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003200 return 0;
3201}
3202
3203/*
3204 * Insert one record/level. Return information to the caller
3205 * allowing the next level up to proceed if necessary.
3206 */
3207STATIC int
3208xfs_btree_insrec(
3209 struct xfs_btree_cur *cur, /* btree cursor */
3210 int level, /* level to insert record at */
3211 union xfs_btree_ptr *ptrp, /* i/o: block number inserted */
Darrick J. Wonge5821e52016-08-03 11:02:39 +10003212 union xfs_btree_rec *rec, /* record to insert */
3213 union xfs_btree_key *key, /* i/o: block key for ptrp */
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003214 struct xfs_btree_cur **curp, /* output: new cursor replacing cur */
3215 int *stat) /* success/failure */
3216{
3217 struct xfs_btree_block *block; /* btree block */
3218 struct xfs_buf *bp; /* buffer for block */
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003219 union xfs_btree_ptr nptr; /* new block ptr */
3220 struct xfs_btree_cur *ncur; /* new btree cursor */
Darrick J. Wonga1d46cf2016-09-19 10:24:36 +10003221 union xfs_btree_key nkey; /* new block key */
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10003222 union xfs_btree_key *lkey;
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003223 int optr; /* old key/record index */
3224 int ptr; /* key/record index */
3225 int numrecs;/* number of records */
3226 int error; /* error return value */
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003227 int i;
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10003228 xfs_daddr_t old_bn;
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003229
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003230 ncur = NULL;
Darrick J. Wonga1d46cf2016-09-19 10:24:36 +10003231 lkey = &nkey;
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003232
3233 /*
3234 * If we have an external root pointer, and we've made it to the
3235 * root level, allocate a new root block and we're done.
3236 */
3237 if (!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
3238 (level >= cur->bc_nlevels)) {
3239 error = xfs_btree_new_root(cur, stat);
3240 xfs_btree_set_ptr_null(cur, ptrp);
3241
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003242 return error;
3243 }
3244
3245 /* If we're off the left edge, return failure. */
3246 ptr = cur->bc_ptrs[level];
3247 if (ptr == 0) {
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003248 *stat = 0;
3249 return 0;
3250 }
3251
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003252 optr = ptr;
3253
3254 XFS_BTREE_STATS_INC(cur, insrec);
3255
3256 /* Get pointers to the btree buffer and block. */
3257 block = xfs_btree_get_block(cur, level, &bp);
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10003258 old_bn = bp ? bp->b_bn : XFS_BUF_DADDR_NULL;
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003259 numrecs = xfs_btree_get_numrecs(block);
3260
3261#ifdef DEBUG
3262 error = xfs_btree_check_block(cur, block, level, bp);
3263 if (error)
3264 goto error0;
3265
3266 /* Check that the new entry is being inserted in the right place. */
3267 if (ptr <= numrecs) {
3268 if (level == 0) {
Darrick J. Wonge5821e52016-08-03 11:02:39 +10003269 ASSERT(cur->bc_ops->recs_inorder(cur, rec,
Christoph Hellwig4a26e662008-10-30 16:58:32 +11003270 xfs_btree_rec_addr(cur, ptr, block)));
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003271 } else {
Darrick J. Wonge5821e52016-08-03 11:02:39 +10003272 ASSERT(cur->bc_ops->keys_inorder(cur, key,
Christoph Hellwig4a26e662008-10-30 16:58:32 +11003273 xfs_btree_key_addr(cur, ptr, block)));
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003274 }
3275 }
3276#endif
3277
3278 /*
3279 * If the block is full, we can't insert the new entry until we
3280 * make the block un-full.
3281 */
3282 xfs_btree_set_ptr_null(cur, &nptr);
3283 if (numrecs == cur->bc_ops->get_maxrecs(cur, level)) {
3284 error = xfs_btree_make_block_unfull(cur, level, numrecs,
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10003285 &optr, &ptr, &nptr, &ncur, lkey, stat);
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003286 if (error || *stat == 0)
3287 goto error0;
3288 }
3289
3290 /*
3291 * The current block may have changed if the block was
3292 * previously full and we have just made space in it.
3293 */
3294 block = xfs_btree_get_block(cur, level, &bp);
3295 numrecs = xfs_btree_get_numrecs(block);
3296
3297#ifdef DEBUG
3298 error = xfs_btree_check_block(cur, block, level, bp);
3299 if (error)
3300 return error;
3301#endif
3302
3303 /*
3304 * At this point we know there's room for our new entry in the block
3305 * we're pointing at.
3306 */
3307 XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr + 1);
3308
3309 if (level > 0) {
3310 /* It's a nonleaf. make a hole in the keys and ptrs */
3311 union xfs_btree_key *kp;
3312 union xfs_btree_ptr *pp;
3313
3314 kp = xfs_btree_key_addr(cur, ptr, block);
3315 pp = xfs_btree_ptr_addr(cur, ptr, block);
3316
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003317 for (i = numrecs - ptr; i >= 0; i--) {
Darrick J. Wong4cbae4b2018-06-03 21:10:48 -07003318 error = xfs_btree_debug_check_ptr(cur, pp, i, level);
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003319 if (error)
3320 return error;
3321 }
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003322
3323 xfs_btree_shift_keys(cur, kp, 1, numrecs - ptr + 1);
3324 xfs_btree_shift_ptrs(cur, pp, 1, numrecs - ptr + 1);
3325
Darrick J. Wong4cbae4b2018-06-03 21:10:48 -07003326 error = xfs_btree_debug_check_ptr(cur, ptrp, 0, level);
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003327 if (error)
3328 goto error0;
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003329
3330 /* Now put the new data in, bump numrecs and log it. */
Darrick J. Wonge5821e52016-08-03 11:02:39 +10003331 xfs_btree_copy_keys(cur, kp, key, 1);
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003332 xfs_btree_copy_ptrs(cur, pp, ptrp, 1);
3333 numrecs++;
3334 xfs_btree_set_numrecs(block, numrecs);
3335 xfs_btree_log_ptrs(cur, bp, ptr, numrecs);
3336 xfs_btree_log_keys(cur, bp, ptr, numrecs);
3337#ifdef DEBUG
3338 if (ptr < numrecs) {
Christoph Hellwig4a26e662008-10-30 16:58:32 +11003339 ASSERT(cur->bc_ops->keys_inorder(cur, kp,
3340 xfs_btree_key_addr(cur, ptr + 1, block)));
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003341 }
3342#endif
3343 } else {
3344 /* It's a leaf. make a hole in the records */
3345 union xfs_btree_rec *rp;
3346
3347 rp = xfs_btree_rec_addr(cur, ptr, block);
3348
3349 xfs_btree_shift_recs(cur, rp, 1, numrecs - ptr + 1);
3350
3351 /* Now put the new data in, bump numrecs and log it. */
Darrick J. Wonge5821e52016-08-03 11:02:39 +10003352 xfs_btree_copy_recs(cur, rp, rec, 1);
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003353 xfs_btree_set_numrecs(block, ++numrecs);
3354 xfs_btree_log_recs(cur, bp, ptr, numrecs);
3355#ifdef DEBUG
3356 if (ptr < numrecs) {
Christoph Hellwig4a26e662008-10-30 16:58:32 +11003357 ASSERT(cur->bc_ops->recs_inorder(cur, rp,
3358 xfs_btree_rec_addr(cur, ptr + 1, block)));
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003359 }
3360#endif
3361 }
3362
3363 /* Log the new number of records in the btree header. */
3364 xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS);
3365
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10003366 /*
3367 * If we just inserted into a new tree block, we have to
3368 * recalculate nkey here because nkey is out of date.
3369 *
3370 * Otherwise we're just updating an existing block (having shoved
3371 * some records into the new tree block), so use the regular key
3372 * update mechanism.
3373 */
3374 if (bp && bp->b_bn != old_bn) {
3375 xfs_btree_get_keys(cur, block, lkey);
3376 } else if (xfs_btree_needs_key_update(cur, optr)) {
Darrick J. Wong973b8312016-08-03 12:22:12 +10003377 error = xfs_btree_update_keys(cur, level);
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003378 if (error)
3379 goto error0;
3380 }
3381
3382 /*
3383 * If we are tracking the last record in the tree and
3384 * we are at the far right edge of the tree, update it.
3385 */
3386 if (xfs_btree_is_lastrec(cur, block, level)) {
Darrick J. Wonge5821e52016-08-03 11:02:39 +10003387 cur->bc_ops->update_lastrec(cur, block, rec,
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003388 ptr, LASTREC_INSREC);
3389 }
3390
3391 /*
3392 * Return the new block number, if any.
3393 * If there is one, give back a record value and a cursor too.
3394 */
3395 *ptrp = nptr;
3396 if (!xfs_btree_ptr_is_null(cur, &nptr)) {
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10003397 xfs_btree_copy_keys(cur, key, lkey, 1);
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003398 *curp = ncur;
3399 }
3400
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003401 *stat = 1;
3402 return 0;
3403
3404error0:
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003405 return error;
3406}
3407
3408/*
3409 * Insert the record at the point referenced by cur.
3410 *
3411 * A multi-level split of the tree on insert will invalidate the original
3412 * cursor. All callers of this function should assume that the cursor is
3413 * no longer valid and revalidate it.
3414 */
3415int
3416xfs_btree_insert(
3417 struct xfs_btree_cur *cur,
3418 int *stat)
3419{
3420 int error; /* error return value */
3421 int i; /* result value, 0 for failure */
3422 int level; /* current level number in btree */
3423 union xfs_btree_ptr nptr; /* new block number (split result) */
3424 struct xfs_btree_cur *ncur; /* new cursor (split result) */
3425 struct xfs_btree_cur *pcur; /* previous level's cursor */
Darrick J. Wonga1d46cf2016-09-19 10:24:36 +10003426 union xfs_btree_key bkey; /* key of block to insert */
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10003427 union xfs_btree_key *key;
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003428 union xfs_btree_rec rec; /* record to insert */
3429
3430 level = 0;
3431 ncur = NULL;
3432 pcur = cur;
Darrick J. Wonga1d46cf2016-09-19 10:24:36 +10003433 key = &bkey;
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003434
3435 xfs_btree_set_ptr_null(cur, &nptr);
Darrick J. Wonge5821e52016-08-03 11:02:39 +10003436
3437 /* Make a key out of the record data to be inserted, and save it. */
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003438 cur->bc_ops->init_rec_from_cur(cur, &rec);
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10003439 cur->bc_ops->init_key_from_rec(key, &rec);
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003440
3441 /*
3442 * Loop going up the tree, starting at the leaf level.
3443 * Stop when we don't get a split block, that must mean that
3444 * the insert is finished with this level.
3445 */
3446 do {
3447 /*
3448 * Insert nrec/nptr into this level of the tree.
3449 * Note if we fail, nptr will be null.
3450 */
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10003451 error = xfs_btree_insrec(pcur, level, &nptr, &rec, key,
Darrick J. Wonge5821e52016-08-03 11:02:39 +10003452 &ncur, &i);
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003453 if (error) {
3454 if (pcur != cur)
3455 xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR);
3456 goto error0;
3457 }
3458
Eric Sandeenc29aad42015-02-23 22:39:08 +11003459 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003460 level++;
3461
3462 /*
3463 * See if the cursor we just used is trash.
3464 * Can't trash the caller's cursor, but otherwise we should
3465 * if ncur is a new cursor or we're about to be done.
3466 */
3467 if (pcur != cur &&
3468 (ncur || xfs_btree_ptr_is_null(cur, &nptr))) {
3469 /* Save the state from the cursor before we trash it */
3470 if (cur->bc_ops->update_cursor)
3471 cur->bc_ops->update_cursor(pcur, cur);
3472 cur->bc_nlevels = pcur->bc_nlevels;
3473 xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR);
3474 }
3475 /* If we got a new cursor, switch to it. */
3476 if (ncur) {
3477 pcur = ncur;
3478 ncur = NULL;
3479 }
3480 } while (!xfs_btree_ptr_is_null(cur, &nptr));
3481
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003482 *stat = i;
3483 return 0;
3484error0:
Christoph Hellwig4b22a572008-10-30 16:57:40 +11003485 return error;
3486}
Christoph Hellwigd4b3a4b2008-10-30 16:57:51 +11003487
3488/*
3489 * Try to merge a non-leaf block back into the inode root.
3490 *
3491 * Note: the killroot names comes from the fact that we're effectively
3492 * killing the old root block. But because we can't just delete the
3493 * inode we have to copy the single block it was pointing to into the
3494 * inode.
3495 */
Eric Sandeend96f8f82009-07-02 00:09:33 -05003496STATIC int
Christoph Hellwigd4b3a4b2008-10-30 16:57:51 +11003497xfs_btree_kill_iroot(
3498 struct xfs_btree_cur *cur)
3499{
3500 int whichfork = cur->bc_private.b.whichfork;
3501 struct xfs_inode *ip = cur->bc_private.b.ip;
3502 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
3503 struct xfs_btree_block *block;
3504 struct xfs_btree_block *cblock;
3505 union xfs_btree_key *kp;
3506 union xfs_btree_key *ckp;
3507 union xfs_btree_ptr *pp;
3508 union xfs_btree_ptr *cpp;
3509 struct xfs_buf *cbp;
3510 int level;
3511 int index;
3512 int numrecs;
Christoph Hellwig196328e2016-02-08 14:58:07 +11003513 int error;
Christoph Hellwigd4b3a4b2008-10-30 16:57:51 +11003514#ifdef DEBUG
3515 union xfs_btree_ptr ptr;
Christoph Hellwigd4b3a4b2008-10-30 16:57:51 +11003516#endif
Darrick J. Wong4cbae4b2018-06-03 21:10:48 -07003517 int i;
Christoph Hellwigd4b3a4b2008-10-30 16:57:51 +11003518
Christoph Hellwigd4b3a4b2008-10-30 16:57:51 +11003519 ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
3520 ASSERT(cur->bc_nlevels > 1);
3521
3522 /*
3523 * Don't deal with the root block needs to be a leaf case.
3524 * We're just going to turn the thing back into extents anyway.
3525 */
3526 level = cur->bc_nlevels - 1;
3527 if (level == 1)
3528 goto out0;
3529
3530 /*
3531 * Give up if the root has multiple children.
3532 */
3533 block = xfs_btree_get_iroot(cur);
3534 if (xfs_btree_get_numrecs(block) != 1)
3535 goto out0;
3536
3537 cblock = xfs_btree_get_block(cur, level - 1, &cbp);
3538 numrecs = xfs_btree_get_numrecs(cblock);
3539
3540 /*
3541 * Only do this if the next level will fit.
3542 * Then the data must be copied up to the inode,
3543 * instead of freeing the root you free the next level.
3544 */
3545 if (numrecs > cur->bc_ops->get_dmaxrecs(cur, level))
3546 goto out0;
3547
3548 XFS_BTREE_STATS_INC(cur, killroot);
3549
3550#ifdef DEBUG
3551 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB);
3552 ASSERT(xfs_btree_ptr_is_null(cur, &ptr));
3553 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
3554 ASSERT(xfs_btree_ptr_is_null(cur, &ptr));
3555#endif
3556
3557 index = numrecs - cur->bc_ops->get_maxrecs(cur, level);
3558 if (index) {
3559 xfs_iroot_realloc(cur->bc_private.b.ip, index,
3560 cur->bc_private.b.whichfork);
Christoph Hellwig7cc95a82008-10-30 17:14:34 +11003561 block = ifp->if_broot;
Christoph Hellwigd4b3a4b2008-10-30 16:57:51 +11003562 }
3563
3564 be16_add_cpu(&block->bb_numrecs, index);
3565 ASSERT(block->bb_numrecs == cblock->bb_numrecs);
3566
3567 kp = xfs_btree_key_addr(cur, 1, block);
3568 ckp = xfs_btree_key_addr(cur, 1, cblock);
3569 xfs_btree_copy_keys(cur, kp, ckp, numrecs);
3570
3571 pp = xfs_btree_ptr_addr(cur, 1, block);
3572 cpp = xfs_btree_ptr_addr(cur, 1, cblock);
Darrick J. Wong4cbae4b2018-06-03 21:10:48 -07003573
Christoph Hellwigd4b3a4b2008-10-30 16:57:51 +11003574 for (i = 0; i < numrecs; i++) {
Darrick J. Wong4cbae4b2018-06-03 21:10:48 -07003575 error = xfs_btree_debug_check_ptr(cur, cpp, i, level - 1);
Carlos Maiolinoe157ebd2018-03-06 17:03:30 -08003576 if (error)
Christoph Hellwigd4b3a4b2008-10-30 16:57:51 +11003577 return error;
Christoph Hellwigd4b3a4b2008-10-30 16:57:51 +11003578 }
Darrick J. Wong4cbae4b2018-06-03 21:10:48 -07003579
Christoph Hellwigd4b3a4b2008-10-30 16:57:51 +11003580 xfs_btree_copy_ptrs(cur, pp, cpp, numrecs);
3581
Christoph Hellwigc46ee8a2016-02-08 14:58:07 +11003582 error = xfs_btree_free_block(cur, cbp);
Carlos Maiolinoe157ebd2018-03-06 17:03:30 -08003583 if (error)
Christoph Hellwig196328e2016-02-08 14:58:07 +11003584 return error;
Christoph Hellwigd4b3a4b2008-10-30 16:57:51 +11003585
3586 cur->bc_bufs[level - 1] = NULL;
3587 be16_add_cpu(&block->bb_level, -1);
3588 xfs_trans_log_inode(cur->bc_tp, ip,
Eric Sandeen9d87c312009-01-14 23:22:07 -06003589 XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_private.b.whichfork));
Christoph Hellwigd4b3a4b2008-10-30 16:57:51 +11003590 cur->bc_nlevels--;
3591out0:
Christoph Hellwigd4b3a4b2008-10-30 16:57:51 +11003592 return 0;
3593}
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11003594
Christoph Hellwigc0e59e12010-09-07 23:34:07 +00003595/*
3596 * Kill the current root node, and replace it with it's only child node.
3597 */
3598STATIC int
3599xfs_btree_kill_root(
3600 struct xfs_btree_cur *cur,
3601 struct xfs_buf *bp,
3602 int level,
3603 union xfs_btree_ptr *newroot)
3604{
3605 int error;
3606
Christoph Hellwigc0e59e12010-09-07 23:34:07 +00003607 XFS_BTREE_STATS_INC(cur, killroot);
3608
3609 /*
3610 * Update the root pointer, decreasing the level by 1 and then
3611 * free the old root.
3612 */
3613 cur->bc_ops->set_root(cur, newroot, -1);
3614
Christoph Hellwigc46ee8a2016-02-08 14:58:07 +11003615 error = xfs_btree_free_block(cur, bp);
Carlos Maiolinoe157ebd2018-03-06 17:03:30 -08003616 if (error)
Christoph Hellwigc0e59e12010-09-07 23:34:07 +00003617 return error;
Christoph Hellwigc0e59e12010-09-07 23:34:07 +00003618
Christoph Hellwigc0e59e12010-09-07 23:34:07 +00003619 cur->bc_bufs[level] = NULL;
3620 cur->bc_ra[level] = 0;
3621 cur->bc_nlevels--;
3622
Christoph Hellwigc0e59e12010-09-07 23:34:07 +00003623 return 0;
3624}
3625
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11003626STATIC int
3627xfs_btree_dec_cursor(
3628 struct xfs_btree_cur *cur,
3629 int level,
3630 int *stat)
3631{
3632 int error;
3633 int i;
3634
3635 if (level > 0) {
3636 error = xfs_btree_decrement(cur, level, &i);
3637 if (error)
3638 return error;
3639 }
3640
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11003641 *stat = 1;
3642 return 0;
3643}
3644
3645/*
3646 * Single level of the btree record deletion routine.
3647 * Delete record pointed to by cur/level.
3648 * Remove the record from its block then rebalance the tree.
3649 * Return 0 for error, 1 for done, 2 to go on to the next level.
3650 */
3651STATIC int /* error */
3652xfs_btree_delrec(
3653 struct xfs_btree_cur *cur, /* btree cursor */
3654 int level, /* level removing record from */
3655 int *stat) /* fail/done/go-on */
3656{
3657 struct xfs_btree_block *block; /* btree block */
3658 union xfs_btree_ptr cptr; /* current block ptr */
3659 struct xfs_buf *bp; /* buffer for block */
3660 int error; /* error return value */
3661 int i; /* loop counter */
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11003662 union xfs_btree_ptr lptr; /* left sibling block ptr */
3663 struct xfs_buf *lbp; /* left buffer pointer */
3664 struct xfs_btree_block *left; /* left btree block */
3665 int lrecs = 0; /* left record count */
3666 int ptr; /* key/record index */
3667 union xfs_btree_ptr rptr; /* right sibling block ptr */
3668 struct xfs_buf *rbp; /* right buffer pointer */
3669 struct xfs_btree_block *right; /* right btree block */
3670 struct xfs_btree_block *rrblock; /* right-right btree block */
3671 struct xfs_buf *rrbp; /* right-right buffer pointer */
3672 int rrecs = 0; /* right record count */
3673 struct xfs_btree_cur *tcur; /* temporary btree cursor */
3674 int numrecs; /* temporary numrec count */
3675
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11003676 tcur = NULL;
3677
3678 /* Get the index of the entry being deleted, check for nothing there. */
3679 ptr = cur->bc_ptrs[level];
3680 if (ptr == 0) {
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11003681 *stat = 0;
3682 return 0;
3683 }
3684
3685 /* Get the buffer & block containing the record or key/ptr. */
3686 block = xfs_btree_get_block(cur, level, &bp);
3687 numrecs = xfs_btree_get_numrecs(block);
3688
3689#ifdef DEBUG
3690 error = xfs_btree_check_block(cur, block, level, bp);
3691 if (error)
3692 goto error0;
3693#endif
3694
3695 /* Fail if we're off the end of the block. */
3696 if (ptr > numrecs) {
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11003697 *stat = 0;
3698 return 0;
3699 }
3700
3701 XFS_BTREE_STATS_INC(cur, delrec);
3702 XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr);
3703
3704 /* Excise the entries being deleted. */
3705 if (level > 0) {
3706 /* It's a nonleaf. operate on keys and ptrs */
3707 union xfs_btree_key *lkp;
3708 union xfs_btree_ptr *lpp;
3709
3710 lkp = xfs_btree_key_addr(cur, ptr + 1, block);
3711 lpp = xfs_btree_ptr_addr(cur, ptr + 1, block);
3712
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11003713 for (i = 0; i < numrecs - ptr; i++) {
Darrick J. Wong4cbae4b2018-06-03 21:10:48 -07003714 error = xfs_btree_debug_check_ptr(cur, lpp, i, level);
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11003715 if (error)
3716 goto error0;
3717 }
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11003718
3719 if (ptr < numrecs) {
3720 xfs_btree_shift_keys(cur, lkp, -1, numrecs - ptr);
3721 xfs_btree_shift_ptrs(cur, lpp, -1, numrecs - ptr);
3722 xfs_btree_log_keys(cur, bp, ptr, numrecs - 1);
3723 xfs_btree_log_ptrs(cur, bp, ptr, numrecs - 1);
3724 }
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11003725 } else {
3726 /* It's a leaf. operate on records */
3727 if (ptr < numrecs) {
3728 xfs_btree_shift_recs(cur,
3729 xfs_btree_rec_addr(cur, ptr + 1, block),
3730 -1, numrecs - ptr);
3731 xfs_btree_log_recs(cur, bp, ptr, numrecs - 1);
3732 }
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11003733 }
3734
3735 /*
3736 * Decrement and log the number of entries in the block.
3737 */
3738 xfs_btree_set_numrecs(block, --numrecs);
3739 xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS);
3740
3741 /*
3742 * If we are tracking the last record in the tree and
3743 * we are at the far right edge of the tree, update it.
3744 */
3745 if (xfs_btree_is_lastrec(cur, block, level)) {
3746 cur->bc_ops->update_lastrec(cur, block, NULL,
3747 ptr, LASTREC_DELREC);
3748 }
3749
3750 /*
3751 * We're at the root level. First, shrink the root block in-memory.
3752 * Try to get rid of the next level down. If we can't then there's
3753 * nothing left to do.
3754 */
3755 if (level == cur->bc_nlevels - 1) {
3756 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) {
3757 xfs_iroot_realloc(cur->bc_private.b.ip, -1,
3758 cur->bc_private.b.whichfork);
3759
3760 error = xfs_btree_kill_iroot(cur);
3761 if (error)
3762 goto error0;
3763
3764 error = xfs_btree_dec_cursor(cur, level, stat);
3765 if (error)
3766 goto error0;
3767 *stat = 1;
3768 return 0;
3769 }
3770
3771 /*
3772 * If this is the root level, and there's only one entry left,
3773 * and it's NOT the leaf level, then we can get rid of this
3774 * level.
3775 */
3776 if (numrecs == 1 && level > 0) {
3777 union xfs_btree_ptr *pp;
3778 /*
3779 * pp is still set to the first pointer in the block.
3780 * Make it the new root of the btree.
3781 */
3782 pp = xfs_btree_ptr_addr(cur, 1, block);
Christoph Hellwigc0e59e12010-09-07 23:34:07 +00003783 error = xfs_btree_kill_root(cur, bp, level, pp);
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11003784 if (error)
3785 goto error0;
3786 } else if (level > 0) {
3787 error = xfs_btree_dec_cursor(cur, level, stat);
3788 if (error)
3789 goto error0;
3790 }
3791 *stat = 1;
3792 return 0;
3793 }
3794
3795 /*
3796 * If we deleted the leftmost entry in the block, update the
3797 * key values above us in the tree.
3798 */
Darrick J. Wong70b22652016-08-03 11:03:38 +10003799 if (xfs_btree_needs_key_update(cur, ptr)) {
Darrick J. Wong973b8312016-08-03 12:22:12 +10003800 error = xfs_btree_update_keys(cur, level);
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11003801 if (error)
3802 goto error0;
3803 }
3804
3805 /*
3806 * If the number of records remaining in the block is at least
3807 * the minimum, we're done.
3808 */
3809 if (numrecs >= cur->bc_ops->get_minrecs(cur, level)) {
3810 error = xfs_btree_dec_cursor(cur, level, stat);
3811 if (error)
3812 goto error0;
3813 return 0;
3814 }
3815
3816 /*
3817 * Otherwise, we have to move some records around to keep the
3818 * tree balanced. Look at the left and right sibling blocks to
3819 * see if we can re-balance by moving only one record.
3820 */
3821 xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
3822 xfs_btree_get_sibling(cur, block, &lptr, XFS_BB_LEFTSIB);
3823
3824 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) {
3825 /*
3826 * One child of root, need to get a chance to copy its contents
3827 * into the root and delete it. Can't go up to next level,
3828 * there's nothing to delete there.
3829 */
3830 if (xfs_btree_ptr_is_null(cur, &rptr) &&
3831 xfs_btree_ptr_is_null(cur, &lptr) &&
3832 level == cur->bc_nlevels - 2) {
3833 error = xfs_btree_kill_iroot(cur);
3834 if (!error)
3835 error = xfs_btree_dec_cursor(cur, level, stat);
3836 if (error)
3837 goto error0;
3838 return 0;
3839 }
3840 }
3841
3842 ASSERT(!xfs_btree_ptr_is_null(cur, &rptr) ||
3843 !xfs_btree_ptr_is_null(cur, &lptr));
3844
3845 /*
3846 * Duplicate the cursor so our btree manipulations here won't
3847 * disrupt the next level up.
3848 */
3849 error = xfs_btree_dup_cursor(cur, &tcur);
3850 if (error)
3851 goto error0;
3852
3853 /*
3854 * If there's a right sibling, see if it's ok to shift an entry
3855 * out of it.
3856 */
3857 if (!xfs_btree_ptr_is_null(cur, &rptr)) {
3858 /*
3859 * Move the temp cursor to the last entry in the next block.
3860 * Actually any entry but the first would suffice.
3861 */
3862 i = xfs_btree_lastrec(tcur, level);
Eric Sandeenc29aad42015-02-23 22:39:08 +11003863 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11003864
3865 error = xfs_btree_increment(tcur, level, &i);
3866 if (error)
3867 goto error0;
Eric Sandeenc29aad42015-02-23 22:39:08 +11003868 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11003869
3870 i = xfs_btree_lastrec(tcur, level);
Eric Sandeenc29aad42015-02-23 22:39:08 +11003871 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11003872
3873 /* Grab a pointer to the block. */
3874 right = xfs_btree_get_block(tcur, level, &rbp);
3875#ifdef DEBUG
3876 error = xfs_btree_check_block(tcur, right, level, rbp);
3877 if (error)
3878 goto error0;
3879#endif
3880 /* Grab the current block number, for future use. */
3881 xfs_btree_get_sibling(tcur, right, &cptr, XFS_BB_LEFTSIB);
3882
3883 /*
3884 * If right block is full enough so that removing one entry
3885 * won't make it too empty, and left-shifting an entry out
3886 * of right to us works, we're done.
3887 */
3888 if (xfs_btree_get_numrecs(right) - 1 >=
3889 cur->bc_ops->get_minrecs(tcur, level)) {
3890 error = xfs_btree_lshift(tcur, level, &i);
3891 if (error)
3892 goto error0;
3893 if (i) {
3894 ASSERT(xfs_btree_get_numrecs(block) >=
3895 cur->bc_ops->get_minrecs(tcur, level));
3896
3897 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
3898 tcur = NULL;
3899
3900 error = xfs_btree_dec_cursor(cur, level, stat);
3901 if (error)
3902 goto error0;
3903 return 0;
3904 }
3905 }
3906
3907 /*
3908 * Otherwise, grab the number of records in right for
3909 * future reference, and fix up the temp cursor to point
3910 * to our block again (last record).
3911 */
3912 rrecs = xfs_btree_get_numrecs(right);
3913 if (!xfs_btree_ptr_is_null(cur, &lptr)) {
3914 i = xfs_btree_firstrec(tcur, level);
Eric Sandeenc29aad42015-02-23 22:39:08 +11003915 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11003916
3917 error = xfs_btree_decrement(tcur, level, &i);
3918 if (error)
3919 goto error0;
Eric Sandeenc29aad42015-02-23 22:39:08 +11003920 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11003921 }
3922 }
3923
3924 /*
3925 * If there's a left sibling, see if it's ok to shift an entry
3926 * out of it.
3927 */
3928 if (!xfs_btree_ptr_is_null(cur, &lptr)) {
3929 /*
3930 * Move the temp cursor to the first entry in the
3931 * previous block.
3932 */
3933 i = xfs_btree_firstrec(tcur, level);
Eric Sandeenc29aad42015-02-23 22:39:08 +11003934 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11003935
3936 error = xfs_btree_decrement(tcur, level, &i);
3937 if (error)
3938 goto error0;
3939 i = xfs_btree_firstrec(tcur, level);
Eric Sandeenc29aad42015-02-23 22:39:08 +11003940 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11003941
3942 /* Grab a pointer to the block. */
3943 left = xfs_btree_get_block(tcur, level, &lbp);
3944#ifdef DEBUG
3945 error = xfs_btree_check_block(cur, left, level, lbp);
3946 if (error)
3947 goto error0;
3948#endif
3949 /* Grab the current block number, for future use. */
3950 xfs_btree_get_sibling(tcur, left, &cptr, XFS_BB_RIGHTSIB);
3951
3952 /*
3953 * If left block is full enough so that removing one entry
3954 * won't make it too empty, and right-shifting an entry out
3955 * of left to us works, we're done.
3956 */
3957 if (xfs_btree_get_numrecs(left) - 1 >=
3958 cur->bc_ops->get_minrecs(tcur, level)) {
3959 error = xfs_btree_rshift(tcur, level, &i);
3960 if (error)
3961 goto error0;
3962 if (i) {
3963 ASSERT(xfs_btree_get_numrecs(block) >=
3964 cur->bc_ops->get_minrecs(tcur, level));
3965 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
3966 tcur = NULL;
3967 if (level == 0)
3968 cur->bc_ptrs[0]++;
Carlos Maiolinoe157ebd2018-03-06 17:03:30 -08003969
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11003970 *stat = 1;
3971 return 0;
3972 }
3973 }
3974
3975 /*
3976 * Otherwise, grab the number of records in right for
3977 * future reference.
3978 */
3979 lrecs = xfs_btree_get_numrecs(left);
3980 }
3981
3982 /* Delete the temp cursor, we're done with it. */
3983 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
3984 tcur = NULL;
3985
3986 /* If here, we need to do a join to keep the tree balanced. */
3987 ASSERT(!xfs_btree_ptr_is_null(cur, &cptr));
3988
3989 if (!xfs_btree_ptr_is_null(cur, &lptr) &&
3990 lrecs + xfs_btree_get_numrecs(block) <=
3991 cur->bc_ops->get_maxrecs(cur, level)) {
3992 /*
3993 * Set "right" to be the starting block,
3994 * "left" to be the left neighbor.
3995 */
3996 rptr = cptr;
3997 right = block;
3998 rbp = bp;
Eric Sandeen0d7409b2014-04-14 18:59:56 +10003999 error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp);
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11004000 if (error)
4001 goto error0;
4002
4003 /*
4004 * If that won't work, see if we can join with the right neighbor block.
4005 */
4006 } else if (!xfs_btree_ptr_is_null(cur, &rptr) &&
4007 rrecs + xfs_btree_get_numrecs(block) <=
4008 cur->bc_ops->get_maxrecs(cur, level)) {
4009 /*
4010 * Set "left" to be the starting block,
4011 * "right" to be the right neighbor.
4012 */
4013 lptr = cptr;
4014 left = block;
4015 lbp = bp;
Eric Sandeen0d7409b2014-04-14 18:59:56 +10004016 error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp);
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11004017 if (error)
4018 goto error0;
4019
4020 /*
4021 * Otherwise, we can't fix the imbalance.
4022 * Just return. This is probably a logic error, but it's not fatal.
4023 */
4024 } else {
4025 error = xfs_btree_dec_cursor(cur, level, stat);
4026 if (error)
4027 goto error0;
4028 return 0;
4029 }
4030
4031 rrecs = xfs_btree_get_numrecs(right);
4032 lrecs = xfs_btree_get_numrecs(left);
4033
4034 /*
4035 * We're now going to join "left" and "right" by moving all the stuff
4036 * in "right" to "left" and deleting "right".
4037 */
4038 XFS_BTREE_STATS_ADD(cur, moves, rrecs);
4039 if (level > 0) {
4040 /* It's a non-leaf. Move keys and pointers. */
4041 union xfs_btree_key *lkp; /* left btree key */
4042 union xfs_btree_ptr *lpp; /* left address pointer */
4043 union xfs_btree_key *rkp; /* right btree key */
4044 union xfs_btree_ptr *rpp; /* right address pointer */
4045
4046 lkp = xfs_btree_key_addr(cur, lrecs + 1, left);
4047 lpp = xfs_btree_ptr_addr(cur, lrecs + 1, left);
4048 rkp = xfs_btree_key_addr(cur, 1, right);
4049 rpp = xfs_btree_ptr_addr(cur, 1, right);
Darrick J. Wong4cbae4b2018-06-03 21:10:48 -07004050
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11004051 for (i = 1; i < rrecs; i++) {
Darrick J. Wong4cbae4b2018-06-03 21:10:48 -07004052 error = xfs_btree_debug_check_ptr(cur, rpp, i, level);
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11004053 if (error)
4054 goto error0;
4055 }
Darrick J. Wong4cbae4b2018-06-03 21:10:48 -07004056
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11004057 xfs_btree_copy_keys(cur, lkp, rkp, rrecs);
4058 xfs_btree_copy_ptrs(cur, lpp, rpp, rrecs);
4059
4060 xfs_btree_log_keys(cur, lbp, lrecs + 1, lrecs + rrecs);
4061 xfs_btree_log_ptrs(cur, lbp, lrecs + 1, lrecs + rrecs);
4062 } else {
4063 /* It's a leaf. Move records. */
4064 union xfs_btree_rec *lrp; /* left record pointer */
4065 union xfs_btree_rec *rrp; /* right record pointer */
4066
4067 lrp = xfs_btree_rec_addr(cur, lrecs + 1, left);
4068 rrp = xfs_btree_rec_addr(cur, 1, right);
4069
4070 xfs_btree_copy_recs(cur, lrp, rrp, rrecs);
4071 xfs_btree_log_recs(cur, lbp, lrecs + 1, lrecs + rrecs);
4072 }
4073
4074 XFS_BTREE_STATS_INC(cur, join);
4075
4076 /*
Malcolm Parsons9da096f2009-03-29 09:55:42 +02004077 * Fix up the number of records and right block pointer in the
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11004078 * surviving block, and log it.
4079 */
4080 xfs_btree_set_numrecs(left, lrecs + rrecs);
4081 xfs_btree_get_sibling(cur, right, &cptr, XFS_BB_RIGHTSIB),
4082 xfs_btree_set_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB);
4083 xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
4084
4085 /* If there is a right sibling, point it to the remaining block. */
4086 xfs_btree_get_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB);
4087 if (!xfs_btree_ptr_is_null(cur, &cptr)) {
Eric Sandeen0d7409b2014-04-14 18:59:56 +10004088 error = xfs_btree_read_buf_block(cur, &cptr, 0, &rrblock, &rrbp);
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11004089 if (error)
4090 goto error0;
4091 xfs_btree_set_sibling(cur, rrblock, &lptr, XFS_BB_LEFTSIB);
4092 xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB);
4093 }
4094
4095 /* Free the deleted block. */
Christoph Hellwigc46ee8a2016-02-08 14:58:07 +11004096 error = xfs_btree_free_block(cur, rbp);
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11004097 if (error)
4098 goto error0;
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11004099
4100 /*
4101 * If we joined with the left neighbor, set the buffer in the
4102 * cursor to the left block, and fix up the index.
4103 */
4104 if (bp != lbp) {
4105 cur->bc_bufs[level] = lbp;
4106 cur->bc_ptrs[level] += lrecs;
4107 cur->bc_ra[level] = 0;
4108 }
4109 /*
4110 * If we joined with the right neighbor and there's a level above
4111 * us, increment the cursor at that level.
4112 */
4113 else if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) ||
4114 (level + 1 < cur->bc_nlevels)) {
4115 error = xfs_btree_increment(cur, level + 1, &i);
4116 if (error)
4117 goto error0;
4118 }
4119
4120 /*
4121 * Readjust the ptr at this level if it's not a leaf, since it's
4122 * still pointing at the deletion point, which makes the cursor
4123 * inconsistent. If this makes the ptr 0, the caller fixes it up.
4124 * We can't use decrement because it would change the next level up.
4125 */
4126 if (level > 0)
4127 cur->bc_ptrs[level]--;
4128
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10004129 /*
4130 * We combined blocks, so we have to update the parent keys if the
4131 * btree supports overlapped intervals. However, bc_ptrs[level + 1]
4132 * points to the old block so that the caller knows which record to
4133 * delete. Therefore, the caller must be savvy enough to call updkeys
4134 * for us if we return stat == 2. The other exit points from this
4135 * function don't require deletions further up the tree, so they can
4136 * call updkeys directly.
4137 */
4138
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11004139 /* Return value means the next level up has something to do. */
4140 *stat = 2;
4141 return 0;
4142
4143error0:
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11004144 if (tcur)
4145 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
4146 return error;
4147}
4148
4149/*
4150 * Delete the record pointed to by cur.
4151 * The cursor refers to the place where the record was (could be inserted)
4152 * when the operation returns.
4153 */
4154int /* error */
4155xfs_btree_delete(
4156 struct xfs_btree_cur *cur,
4157 int *stat) /* success/failure */
4158{
4159 int error; /* error return value */
4160 int level;
4161 int i;
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10004162 bool joined = false;
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11004163
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11004164 /*
4165 * Go up the tree, starting at leaf level.
4166 *
4167 * If 2 is returned then a join was done; go to the next level.
4168 * Otherwise we are done.
4169 */
4170 for (level = 0, i = 2; i == 2; level++) {
4171 error = xfs_btree_delrec(cur, level, &i);
4172 if (error)
4173 goto error0;
Darrick J. Wong2c813ad2016-08-03 11:08:36 +10004174 if (i == 2)
4175 joined = true;
4176 }
4177
4178 /*
4179 * If we combined blocks as part of deleting the record, delrec won't
4180 * have updated the parent high keys so we have to do that here.
4181 */
4182 if (joined && (cur->bc_flags & XFS_BTREE_OVERLAPPING)) {
4183 error = xfs_btree_updkeys_force(cur, 0);
4184 if (error)
4185 goto error0;
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11004186 }
4187
4188 if (i == 0) {
4189 for (level = 1; level < cur->bc_nlevels; level++) {
4190 if (cur->bc_ptrs[level] == 0) {
4191 error = xfs_btree_decrement(cur, level, &i);
4192 if (error)
4193 goto error0;
4194 break;
4195 }
4196 }
4197 }
4198
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11004199 *stat = i;
4200 return 0;
4201error0:
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11004202 return error;
4203}
Christoph Hellwig8cc938f2008-10-30 16:58:11 +11004204
4205/*
4206 * Get the data from the pointed-to record.
4207 */
4208int /* error */
4209xfs_btree_get_rec(
4210 struct xfs_btree_cur *cur, /* btree cursor */
4211 union xfs_btree_rec **recp, /* output: btree record */
4212 int *stat) /* output: success/failure */
4213{
4214 struct xfs_btree_block *block; /* btree block */
4215 struct xfs_buf *bp; /* buffer pointer */
4216 int ptr; /* record number */
4217#ifdef DEBUG
4218 int error; /* error return value */
4219#endif
4220
4221 ptr = cur->bc_ptrs[0];
4222 block = xfs_btree_get_block(cur, 0, &bp);
4223
4224#ifdef DEBUG
4225 error = xfs_btree_check_block(cur, block, 0, bp);
4226 if (error)
4227 return error;
4228#endif
4229
4230 /*
4231 * Off the right end or left end, return failure.
4232 */
4233 if (ptr > xfs_btree_get_numrecs(block) || ptr <= 0) {
4234 *stat = 0;
4235 return 0;
4236 }
4237
4238 /*
4239 * Point to the record and extract its data.
4240 */
4241 *recp = xfs_btree_rec_addr(cur, ptr, block);
4242 *stat = 1;
4243 return 0;
4244}
Dave Chinner21b5c972013-08-30 10:23:44 +10004245
Darrick J. Wong28a89562016-08-03 11:10:55 +10004246/* Visit a block in a btree. */
4247STATIC int
4248xfs_btree_visit_block(
4249 struct xfs_btree_cur *cur,
4250 int level,
4251 xfs_btree_visit_blocks_fn fn,
4252 void *data)
4253{
4254 struct xfs_btree_block *block;
4255 struct xfs_buf *bp;
4256 union xfs_btree_ptr rptr;
4257 int error;
4258
4259 /* do right sibling readahead */
4260 xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
4261 block = xfs_btree_get_block(cur, level, &bp);
4262
4263 /* process the block */
4264 error = fn(cur, level, data);
4265 if (error)
4266 return error;
4267
4268 /* now read rh sibling block for next iteration */
4269 xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
4270 if (xfs_btree_ptr_is_null(cur, &rptr))
4271 return -ENOENT;
4272
4273 return xfs_btree_lookup_get_block(cur, level, &rptr, &block);
4274}
4275
4276
4277/* Visit every block in a btree. */
4278int
4279xfs_btree_visit_blocks(
4280 struct xfs_btree_cur *cur,
4281 xfs_btree_visit_blocks_fn fn,
4282 void *data)
4283{
4284 union xfs_btree_ptr lptr;
4285 int level;
4286 struct xfs_btree_block *block = NULL;
4287 int error = 0;
4288
4289 cur->bc_ops->init_ptr_from_cur(cur, &lptr);
4290
4291 /* for each level */
4292 for (level = cur->bc_nlevels - 1; level >= 0; level--) {
4293 /* grab the left hand block */
4294 error = xfs_btree_lookup_get_block(cur, level, &lptr, &block);
4295 if (error)
4296 return error;
4297
4298 /* readahead the left most block for the next level down */
4299 if (level > 0) {
4300 union xfs_btree_ptr *ptr;
4301
4302 ptr = xfs_btree_ptr_addr(cur, 1, block);
4303 xfs_btree_readahead_ptr(cur, ptr, 1);
4304
4305 /* save for the next iteration of the loop */
Eric Sandeena4d768e2017-05-22 19:54:10 -07004306 xfs_btree_copy_ptrs(cur, &lptr, ptr, 1);
Darrick J. Wong28a89562016-08-03 11:10:55 +10004307 }
4308
4309 /* for each buffer in the level */
4310 do {
4311 error = xfs_btree_visit_block(cur, level, fn, data);
4312 } while (!error);
4313
4314 if (error != -ENOENT)
4315 return error;
4316 }
4317
4318 return 0;
4319}
4320
Dave Chinner21b5c972013-08-30 10:23:44 +10004321/*
4322 * Change the owner of a btree.
4323 *
4324 * The mechanism we use here is ordered buffer logging. Because we don't know
4325 * how many buffers were are going to need to modify, we don't really want to
4326 * have to make transaction reservations for the worst case of every buffer in a
4327 * full size btree as that may be more space that we can fit in the log....
4328 *
4329 * We do the btree walk in the most optimal manner possible - we have sibling
4330 * pointers so we can just walk all the blocks on each level from left to right
4331 * in a single pass, and then move to the next level and do the same. We can
4332 * also do readahead on the sibling pointers to get IO moving more quickly,
4333 * though for slow disks this is unlikely to make much difference to performance
4334 * as the amount of CPU work we have to do before moving to the next block is
4335 * relatively small.
4336 *
4337 * For each btree block that we load, modify the owner appropriately, set the
4338 * buffer as an ordered buffer and log it appropriately. We need to ensure that
4339 * we mark the region we change dirty so that if the buffer is relogged in
4340 * a subsequent transaction the changes we make here as an ordered buffer are
Dave Chinner638f44162013-08-30 10:23:45 +10004341 * correctly relogged in that transaction. If we are in recovery context, then
4342 * just queue the modified buffer as delayed write buffer so the transaction
4343 * recovery completion writes the changes to disk.
Dave Chinner21b5c972013-08-30 10:23:44 +10004344 */
Darrick J. Wong28a89562016-08-03 11:10:55 +10004345struct xfs_btree_block_change_owner_info {
Darrick J. Wongc8ce5402017-06-16 11:00:05 -07004346 uint64_t new_owner;
Darrick J. Wong28a89562016-08-03 11:10:55 +10004347 struct list_head *buffer_list;
4348};
4349
Dave Chinner21b5c972013-08-30 10:23:44 +10004350static int
4351xfs_btree_block_change_owner(
4352 struct xfs_btree_cur *cur,
4353 int level,
Darrick J. Wong28a89562016-08-03 11:10:55 +10004354 void *data)
Dave Chinner21b5c972013-08-30 10:23:44 +10004355{
Darrick J. Wong28a89562016-08-03 11:10:55 +10004356 struct xfs_btree_block_change_owner_info *bbcoi = data;
Dave Chinner21b5c972013-08-30 10:23:44 +10004357 struct xfs_btree_block *block;
4358 struct xfs_buf *bp;
Dave Chinner21b5c972013-08-30 10:23:44 +10004359
4360 /* modify the owner */
4361 block = xfs_btree_get_block(cur, level, &bp);
Brian Foster2dd3d702017-08-29 10:08:40 -07004362 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
4363 if (block->bb_u.l.bb_owner == cpu_to_be64(bbcoi->new_owner))
4364 return 0;
Darrick J. Wong28a89562016-08-03 11:10:55 +10004365 block->bb_u.l.bb_owner = cpu_to_be64(bbcoi->new_owner);
Brian Foster2dd3d702017-08-29 10:08:40 -07004366 } else {
4367 if (block->bb_u.s.bb_owner == cpu_to_be32(bbcoi->new_owner))
4368 return 0;
Darrick J. Wong28a89562016-08-03 11:10:55 +10004369 block->bb_u.s.bb_owner = cpu_to_be32(bbcoi->new_owner);
Brian Foster2dd3d702017-08-29 10:08:40 -07004370 }
Dave Chinner21b5c972013-08-30 10:23:44 +10004371
4372 /*
Dave Chinner638f44162013-08-30 10:23:45 +10004373 * If the block is a root block hosted in an inode, we might not have a
4374 * buffer pointer here and we shouldn't attempt to log the change as the
4375 * information is already held in the inode and discarded when the root
4376 * block is formatted into the on-disk inode fork. We still change it,
4377 * though, so everything is consistent in memory.
Dave Chinner21b5c972013-08-30 10:23:44 +10004378 */
Brian Foster2dd3d702017-08-29 10:08:40 -07004379 if (!bp) {
Dave Chinner21b5c972013-08-30 10:23:44 +10004380 ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
4381 ASSERT(level == cur->bc_nlevels - 1);
Brian Foster2dd3d702017-08-29 10:08:40 -07004382 return 0;
4383 }
4384
4385 if (cur->bc_tp) {
4386 if (!xfs_trans_ordered_buf(cur->bc_tp, bp)) {
4387 xfs_btree_log_block(cur, bp, XFS_BB_OWNER);
4388 return -EAGAIN;
4389 }
4390 } else {
4391 xfs_buf_delwri_queue(bp, bbcoi->buffer_list);
Dave Chinner21b5c972013-08-30 10:23:44 +10004392 }
4393
Darrick J. Wong28a89562016-08-03 11:10:55 +10004394 return 0;
Dave Chinner21b5c972013-08-30 10:23:44 +10004395}
4396
4397int
4398xfs_btree_change_owner(
4399 struct xfs_btree_cur *cur,
Darrick J. Wongc8ce5402017-06-16 11:00:05 -07004400 uint64_t new_owner,
Dave Chinner638f44162013-08-30 10:23:45 +10004401 struct list_head *buffer_list)
Dave Chinner21b5c972013-08-30 10:23:44 +10004402{
Darrick J. Wong28a89562016-08-03 11:10:55 +10004403 struct xfs_btree_block_change_owner_info bbcoi;
Dave Chinner21b5c972013-08-30 10:23:44 +10004404
Darrick J. Wong28a89562016-08-03 11:10:55 +10004405 bbcoi.new_owner = new_owner;
4406 bbcoi.buffer_list = buffer_list;
Dave Chinner21b5c972013-08-30 10:23:44 +10004407
Darrick J. Wong28a89562016-08-03 11:10:55 +10004408 return xfs_btree_visit_blocks(cur, xfs_btree_block_change_owner,
4409 &bbcoi);
Dave Chinner21b5c972013-08-30 10:23:44 +10004410}
Darrick J. Wongc5ab1312016-01-04 16:13:21 +11004411
Darrick J. Wong8368a602018-01-08 10:51:00 -08004412/* Verify the v5 fields of a long-format btree block. */
Darrick J. Wonga6a781a2018-01-08 10:51:03 -08004413xfs_failaddr_t
Darrick J. Wong8368a602018-01-08 10:51:00 -08004414xfs_btree_lblock_v5hdr_verify(
4415 struct xfs_buf *bp,
4416 uint64_t owner)
4417{
4418 struct xfs_mount *mp = bp->b_target->bt_mount;
4419 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
4420
4421 if (!xfs_sb_version_hascrc(&mp->m_sb))
Darrick J. Wonga6a781a2018-01-08 10:51:03 -08004422 return __this_address;
Darrick J. Wong8368a602018-01-08 10:51:00 -08004423 if (!uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid))
Darrick J. Wonga6a781a2018-01-08 10:51:03 -08004424 return __this_address;
Darrick J. Wong8368a602018-01-08 10:51:00 -08004425 if (block->bb_u.l.bb_blkno != cpu_to_be64(bp->b_bn))
Darrick J. Wonga6a781a2018-01-08 10:51:03 -08004426 return __this_address;
Darrick J. Wong8368a602018-01-08 10:51:00 -08004427 if (owner != XFS_RMAP_OWN_UNKNOWN &&
4428 be64_to_cpu(block->bb_u.l.bb_owner) != owner)
Darrick J. Wonga6a781a2018-01-08 10:51:03 -08004429 return __this_address;
4430 return NULL;
Darrick J. Wong8368a602018-01-08 10:51:00 -08004431}
4432
4433/* Verify a long-format btree block. */
Darrick J. Wonga6a781a2018-01-08 10:51:03 -08004434xfs_failaddr_t
Darrick J. Wong8368a602018-01-08 10:51:00 -08004435xfs_btree_lblock_verify(
4436 struct xfs_buf *bp,
4437 unsigned int max_recs)
4438{
4439 struct xfs_mount *mp = bp->b_target->bt_mount;
4440 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
4441
4442 /* numrecs verification */
4443 if (be16_to_cpu(block->bb_numrecs) > max_recs)
Darrick J. Wonga6a781a2018-01-08 10:51:03 -08004444 return __this_address;
Darrick J. Wong8368a602018-01-08 10:51:00 -08004445
4446 /* sibling pointer verification */
4447 if (block->bb_u.l.bb_leftsib != cpu_to_be64(NULLFSBLOCK) &&
4448 !xfs_verify_fsbno(mp, be64_to_cpu(block->bb_u.l.bb_leftsib)))
Darrick J. Wonga6a781a2018-01-08 10:51:03 -08004449 return __this_address;
Darrick J. Wong8368a602018-01-08 10:51:00 -08004450 if (block->bb_u.l.bb_rightsib != cpu_to_be64(NULLFSBLOCK) &&
4451 !xfs_verify_fsbno(mp, be64_to_cpu(block->bb_u.l.bb_rightsib)))
Darrick J. Wonga6a781a2018-01-08 10:51:03 -08004452 return __this_address;
Darrick J. Wong8368a602018-01-08 10:51:00 -08004453
Darrick J. Wonga6a781a2018-01-08 10:51:03 -08004454 return NULL;
Darrick J. Wong8368a602018-01-08 10:51:00 -08004455}
4456
Darrick J. Wongc5ab1312016-01-04 16:13:21 +11004457/**
4458 * xfs_btree_sblock_v5hdr_verify() -- verify the v5 fields of a short-format
4459 * btree block
4460 *
4461 * @bp: buffer containing the btree block
4462 * @max_recs: pointer to the m_*_mxr max records field in the xfs mount
4463 * @pag_max_level: pointer to the per-ag max level field
4464 */
Darrick J. Wonga6a781a2018-01-08 10:51:03 -08004465xfs_failaddr_t
Darrick J. Wongc5ab1312016-01-04 16:13:21 +11004466xfs_btree_sblock_v5hdr_verify(
4467 struct xfs_buf *bp)
4468{
4469 struct xfs_mount *mp = bp->b_target->bt_mount;
4470 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
4471 struct xfs_perag *pag = bp->b_pag;
4472
4473 if (!xfs_sb_version_hascrc(&mp->m_sb))
Darrick J. Wonga6a781a2018-01-08 10:51:03 -08004474 return __this_address;
Darrick J. Wongc5ab1312016-01-04 16:13:21 +11004475 if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid))
Darrick J. Wonga6a781a2018-01-08 10:51:03 -08004476 return __this_address;
Darrick J. Wongc5ab1312016-01-04 16:13:21 +11004477 if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn))
Darrick J. Wonga6a781a2018-01-08 10:51:03 -08004478 return __this_address;
Darrick J. Wongc5ab1312016-01-04 16:13:21 +11004479 if (pag && be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno)
Darrick J. Wonga6a781a2018-01-08 10:51:03 -08004480 return __this_address;
4481 return NULL;
Darrick J. Wongc5ab1312016-01-04 16:13:21 +11004482}
4483
4484/**
4485 * xfs_btree_sblock_verify() -- verify a short-format btree block
4486 *
4487 * @bp: buffer containing the btree block
4488 * @max_recs: maximum records allowed in this btree node
4489 */
Darrick J. Wonga6a781a2018-01-08 10:51:03 -08004490xfs_failaddr_t
Darrick J. Wongc5ab1312016-01-04 16:13:21 +11004491xfs_btree_sblock_verify(
4492 struct xfs_buf *bp,
4493 unsigned int max_recs)
4494{
4495 struct xfs_mount *mp = bp->b_target->bt_mount;
4496 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
Darrick J. Wonge1e55aa2018-01-08 10:51:01 -08004497 xfs_agblock_t agno;
Darrick J. Wongc5ab1312016-01-04 16:13:21 +11004498
4499 /* numrecs verification */
4500 if (be16_to_cpu(block->bb_numrecs) > max_recs)
Darrick J. Wonga6a781a2018-01-08 10:51:03 -08004501 return __this_address;
Darrick J. Wongc5ab1312016-01-04 16:13:21 +11004502
4503 /* sibling pointer verification */
Darrick J. Wonge1e55aa2018-01-08 10:51:01 -08004504 agno = xfs_daddr_to_agno(mp, XFS_BUF_ADDR(bp));
4505 if (block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK) &&
4506 !xfs_verify_agbno(mp, agno, be32_to_cpu(block->bb_u.s.bb_leftsib)))
Darrick J. Wonga6a781a2018-01-08 10:51:03 -08004507 return __this_address;
Darrick J. Wonge1e55aa2018-01-08 10:51:01 -08004508 if (block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK) &&
4509 !xfs_verify_agbno(mp, agno, be32_to_cpu(block->bb_u.s.bb_rightsib)))
Darrick J. Wonga6a781a2018-01-08 10:51:03 -08004510 return __this_address;
Darrick J. Wongc5ab1312016-01-04 16:13:21 +11004511
Darrick J. Wonga6a781a2018-01-08 10:51:03 -08004512 return NULL;
Darrick J. Wongc5ab1312016-01-04 16:13:21 +11004513}
Darrick J. Wong19b54ee2016-06-21 11:53:28 +10004514
4515/*
4516 * Calculate the number of btree levels needed to store a given number of
4517 * records in a short-format btree.
4518 */
4519uint
4520xfs_btree_compute_maxlevels(
Darrick J. Wong19b54ee2016-06-21 11:53:28 +10004521 uint *limits,
4522 unsigned long len)
4523{
4524 uint level;
4525 unsigned long maxblocks;
4526
4527 maxblocks = (len + limits[0] - 1) / limits[0];
4528 for (level = 1; maxblocks > 1; level++)
4529 maxblocks = (maxblocks + limits[1] - 1) / limits[1];
4530 return level;
4531}
Darrick J. Wong105f7d82016-08-03 11:10:21 +10004532
4533/*
4534 * Query a regular btree for all records overlapping a given interval.
4535 * Start with a LE lookup of the key of low_rec and return all records
4536 * until we find a record with a key greater than the key of high_rec.
4537 */
4538STATIC int
4539xfs_btree_simple_query_range(
4540 struct xfs_btree_cur *cur,
4541 union xfs_btree_key *low_key,
4542 union xfs_btree_key *high_key,
4543 xfs_btree_query_range_fn fn,
4544 void *priv)
4545{
4546 union xfs_btree_rec *recp;
4547 union xfs_btree_key rec_key;
Darrick J. Wongc8ce5402017-06-16 11:00:05 -07004548 int64_t diff;
Darrick J. Wong105f7d82016-08-03 11:10:21 +10004549 int stat;
4550 bool firstrec = true;
4551 int error;
4552
4553 ASSERT(cur->bc_ops->init_high_key_from_rec);
4554 ASSERT(cur->bc_ops->diff_two_keys);
4555
4556 /*
4557 * Find the leftmost record. The btree cursor must be set
4558 * to the low record used to generate low_key.
4559 */
4560 stat = 0;
4561 error = xfs_btree_lookup(cur, XFS_LOOKUP_LE, &stat);
4562 if (error)
4563 goto out;
4564
Darrick J. Wong5b5c2db2016-08-26 16:00:10 +10004565 /* Nothing? See if there's anything to the right. */
4566 if (!stat) {
4567 error = xfs_btree_increment(cur, 0, &stat);
4568 if (error)
4569 goto out;
4570 }
4571
Darrick J. Wong105f7d82016-08-03 11:10:21 +10004572 while (stat) {
4573 /* Find the record. */
4574 error = xfs_btree_get_rec(cur, &recp, &stat);
4575 if (error || !stat)
4576 break;
Darrick J. Wong105f7d82016-08-03 11:10:21 +10004577
4578 /* Skip if high_key(rec) < low_key. */
4579 if (firstrec) {
Darrick J. Wong72227892016-08-26 15:59:50 +10004580 cur->bc_ops->init_high_key_from_rec(&rec_key, recp);
Darrick J. Wong105f7d82016-08-03 11:10:21 +10004581 firstrec = false;
4582 diff = cur->bc_ops->diff_two_keys(cur, low_key,
4583 &rec_key);
4584 if (diff > 0)
4585 goto advloop;
4586 }
4587
4588 /* Stop if high_key < low_key(rec). */
Darrick J. Wong72227892016-08-26 15:59:50 +10004589 cur->bc_ops->init_key_from_rec(&rec_key, recp);
Darrick J. Wong105f7d82016-08-03 11:10:21 +10004590 diff = cur->bc_ops->diff_two_keys(cur, &rec_key, high_key);
4591 if (diff > 0)
4592 break;
4593
4594 /* Callback */
4595 error = fn(cur, recp, priv);
4596 if (error < 0 || error == XFS_BTREE_QUERY_RANGE_ABORT)
4597 break;
4598
4599advloop:
4600 /* Move on to the next record. */
4601 error = xfs_btree_increment(cur, 0, &stat);
4602 if (error)
4603 break;
4604 }
4605
4606out:
4607 return error;
4608}
4609
4610/*
4611 * Query an overlapped interval btree for all records overlapping a given
4612 * interval. This function roughly follows the algorithm given in
4613 * "Interval Trees" of _Introduction to Algorithms_, which is section
4614 * 14.3 in the 2nd and 3rd editions.
4615 *
4616 * First, generate keys for the low and high records passed in.
4617 *
4618 * For any leaf node, generate the high and low keys for the record.
4619 * If the record keys overlap with the query low/high keys, pass the
4620 * record to the function iterator.
4621 *
4622 * For any internal node, compare the low and high keys of each
4623 * pointer against the query low/high keys. If there's an overlap,
4624 * follow the pointer.
4625 *
4626 * As an optimization, we stop scanning a block when we find a low key
4627 * that is greater than the query's high key.
4628 */
4629STATIC int
4630xfs_btree_overlapped_query_range(
4631 struct xfs_btree_cur *cur,
4632 union xfs_btree_key *low_key,
4633 union xfs_btree_key *high_key,
4634 xfs_btree_query_range_fn fn,
4635 void *priv)
4636{
4637 union xfs_btree_ptr ptr;
4638 union xfs_btree_ptr *pp;
4639 union xfs_btree_key rec_key;
4640 union xfs_btree_key rec_hkey;
4641 union xfs_btree_key *lkp;
4642 union xfs_btree_key *hkp;
4643 union xfs_btree_rec *recp;
4644 struct xfs_btree_block *block;
Darrick J. Wongc8ce5402017-06-16 11:00:05 -07004645 int64_t ldiff;
4646 int64_t hdiff;
Darrick J. Wong105f7d82016-08-03 11:10:21 +10004647 int level;
4648 struct xfs_buf *bp;
4649 int i;
4650 int error;
4651
4652 /* Load the root of the btree. */
4653 level = cur->bc_nlevels - 1;
4654 cur->bc_ops->init_ptr_from_cur(cur, &ptr);
4655 error = xfs_btree_lookup_get_block(cur, level, &ptr, &block);
4656 if (error)
4657 return error;
4658 xfs_btree_get_block(cur, level, &bp);
4659 trace_xfs_btree_overlapped_query_range(cur, level, bp);
4660#ifdef DEBUG
4661 error = xfs_btree_check_block(cur, block, level, bp);
4662 if (error)
4663 goto out;
4664#endif
4665 cur->bc_ptrs[level] = 1;
4666
4667 while (level < cur->bc_nlevels) {
4668 block = xfs_btree_get_block(cur, level, &bp);
4669
4670 /* End of node, pop back towards the root. */
4671 if (cur->bc_ptrs[level] > be16_to_cpu(block->bb_numrecs)) {
4672pop_up:
4673 if (level < cur->bc_nlevels - 1)
4674 cur->bc_ptrs[level + 1]++;
4675 level++;
4676 continue;
4677 }
4678
4679 if (level == 0) {
4680 /* Handle a leaf node. */
4681 recp = xfs_btree_rec_addr(cur, cur->bc_ptrs[0], block);
4682
4683 cur->bc_ops->init_high_key_from_rec(&rec_hkey, recp);
4684 ldiff = cur->bc_ops->diff_two_keys(cur, &rec_hkey,
4685 low_key);
4686
4687 cur->bc_ops->init_key_from_rec(&rec_key, recp);
4688 hdiff = cur->bc_ops->diff_two_keys(cur, high_key,
4689 &rec_key);
4690
4691 /*
4692 * If (record's high key >= query's low key) and
4693 * (query's high key >= record's low key), then
4694 * this record overlaps the query range; callback.
4695 */
4696 if (ldiff >= 0 && hdiff >= 0) {
4697 error = fn(cur, recp, priv);
4698 if (error < 0 ||
4699 error == XFS_BTREE_QUERY_RANGE_ABORT)
4700 break;
4701 } else if (hdiff < 0) {
4702 /* Record is larger than high key; pop. */
4703 goto pop_up;
4704 }
4705 cur->bc_ptrs[level]++;
4706 continue;
4707 }
4708
4709 /* Handle an internal node. */
4710 lkp = xfs_btree_key_addr(cur, cur->bc_ptrs[level], block);
4711 hkp = xfs_btree_high_key_addr(cur, cur->bc_ptrs[level], block);
4712 pp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[level], block);
4713
4714 ldiff = cur->bc_ops->diff_two_keys(cur, hkp, low_key);
4715 hdiff = cur->bc_ops->diff_two_keys(cur, high_key, lkp);
4716
4717 /*
4718 * If (pointer's high key >= query's low key) and
4719 * (query's high key >= pointer's low key), then
4720 * this record overlaps the query range; follow pointer.
4721 */
4722 if (ldiff >= 0 && hdiff >= 0) {
4723 level--;
4724 error = xfs_btree_lookup_get_block(cur, level, pp,
4725 &block);
4726 if (error)
4727 goto out;
4728 xfs_btree_get_block(cur, level, &bp);
4729 trace_xfs_btree_overlapped_query_range(cur, level, bp);
4730#ifdef DEBUG
4731 error = xfs_btree_check_block(cur, block, level, bp);
4732 if (error)
4733 goto out;
4734#endif
4735 cur->bc_ptrs[level] = 1;
4736 continue;
4737 } else if (hdiff < 0) {
4738 /* The low key is larger than the upper range; pop. */
4739 goto pop_up;
4740 }
4741 cur->bc_ptrs[level]++;
4742 }
4743
4744out:
4745 /*
4746 * If we don't end this function with the cursor pointing at a record
4747 * block, a subsequent non-error cursor deletion will not release
4748 * node-level buffers, causing a buffer leak. This is quite possible
4749 * with a zero-results range query, so release the buffers if we
4750 * failed to return any results.
4751 */
4752 if (cur->bc_bufs[0] == NULL) {
4753 for (i = 0; i < cur->bc_nlevels; i++) {
4754 if (cur->bc_bufs[i]) {
4755 xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[i]);
4756 cur->bc_bufs[i] = NULL;
4757 cur->bc_ptrs[i] = 0;
4758 cur->bc_ra[i] = 0;
4759 }
4760 }
4761 }
4762
4763 return error;
4764}
4765
4766/*
4767 * Query a btree for all records overlapping a given interval of keys. The
4768 * supplied function will be called with each record found; return one of the
4769 * XFS_BTREE_QUERY_RANGE_{CONTINUE,ABORT} values or the usual negative error
4770 * code. This function returns XFS_BTREE_QUERY_RANGE_ABORT, zero, or a
4771 * negative error code.
4772 */
4773int
4774xfs_btree_query_range(
4775 struct xfs_btree_cur *cur,
4776 union xfs_btree_irec *low_rec,
4777 union xfs_btree_irec *high_rec,
4778 xfs_btree_query_range_fn fn,
4779 void *priv)
4780{
4781 union xfs_btree_rec rec;
4782 union xfs_btree_key low_key;
4783 union xfs_btree_key high_key;
4784
4785 /* Find the keys of both ends of the interval. */
4786 cur->bc_rec = *high_rec;
4787 cur->bc_ops->init_rec_from_cur(cur, &rec);
4788 cur->bc_ops->init_key_from_rec(&high_key, &rec);
4789
4790 cur->bc_rec = *low_rec;
4791 cur->bc_ops->init_rec_from_cur(cur, &rec);
4792 cur->bc_ops->init_key_from_rec(&low_key, &rec);
4793
4794 /* Enforce low key < high key. */
4795 if (cur->bc_ops->diff_two_keys(cur, &low_key, &high_key) > 0)
4796 return -EINVAL;
4797
4798 if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING))
4799 return xfs_btree_simple_query_range(cur, &low_key,
4800 &high_key, fn, priv);
4801 return xfs_btree_overlapped_query_range(cur, &low_key, &high_key,
4802 fn, priv);
4803}
Darrick J. Wong4ed3f682016-09-19 10:25:03 +10004804
Darrick J. Wonge9a25992017-03-28 14:56:35 -07004805/* Query a btree for all records. */
4806int
4807xfs_btree_query_all(
4808 struct xfs_btree_cur *cur,
4809 xfs_btree_query_range_fn fn,
4810 void *priv)
4811{
Darrick J. Wong5a4c7332017-06-16 11:00:04 -07004812 union xfs_btree_key low_key;
4813 union xfs_btree_key high_key;
Darrick J. Wonge9a25992017-03-28 14:56:35 -07004814
Darrick J. Wong5a4c7332017-06-16 11:00:04 -07004815 memset(&cur->bc_rec, 0, sizeof(cur->bc_rec));
4816 memset(&low_key, 0, sizeof(low_key));
4817 memset(&high_key, 0xFF, sizeof(high_key));
4818
4819 return xfs_btree_simple_query_range(cur, &low_key, &high_key, fn, priv);
Darrick J. Wonge9a25992017-03-28 14:56:35 -07004820}
4821
Darrick J. Wong4ed3f682016-09-19 10:25:03 +10004822/*
4823 * Calculate the number of blocks needed to store a given number of records
4824 * in a short-format (per-AG metadata) btree.
4825 */
Darrick J. Wong14861c42018-05-09 10:02:01 -07004826unsigned long long
Darrick J. Wong4ed3f682016-09-19 10:25:03 +10004827xfs_btree_calc_size(
Darrick J. Wong4ed3f682016-09-19 10:25:03 +10004828 uint *limits,
4829 unsigned long long len)
4830{
4831 int level;
4832 int maxrecs;
Darrick J. Wong14861c42018-05-09 10:02:01 -07004833 unsigned long long rval;
Darrick J. Wong4ed3f682016-09-19 10:25:03 +10004834
4835 maxrecs = limits[0];
4836 for (level = 0, rval = 0; len > 1; level++) {
4837 len += maxrecs - 1;
4838 do_div(len, maxrecs);
4839 maxrecs = limits[1];
4840 rval += len;
4841 }
4842 return rval;
4843}
Darrick J. Wongc611cc02016-09-19 10:25:20 +10004844
Eric Biggersf1b82432016-10-20 15:42:30 +11004845static int
Darrick J. Wongc611cc02016-09-19 10:25:20 +10004846xfs_btree_count_blocks_helper(
4847 struct xfs_btree_cur *cur,
4848 int level,
4849 void *data)
4850{
4851 xfs_extlen_t *blocks = data;
4852 (*blocks)++;
4853
4854 return 0;
4855}
4856
4857/* Count the blocks in a btree and return the result in *blocks. */
4858int
4859xfs_btree_count_blocks(
4860 struct xfs_btree_cur *cur,
4861 xfs_extlen_t *blocks)
4862{
4863 *blocks = 0;
4864 return xfs_btree_visit_blocks(cur, xfs_btree_count_blocks_helper,
4865 blocks);
4866}
Darrick J. Wongcc3e0942017-10-17 21:37:37 -07004867
4868/* Compare two btree pointers. */
4869int64_t
4870xfs_btree_diff_two_ptrs(
4871 struct xfs_btree_cur *cur,
4872 const union xfs_btree_ptr *a,
4873 const union xfs_btree_ptr *b)
4874{
4875 if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
4876 return (int64_t)be64_to_cpu(a->l) - be64_to_cpu(b->l);
4877 return (int64_t)be32_to_cpu(a->s) - be32_to_cpu(b->s);
4878}
Darrick J. Wongce1d8022018-01-16 18:52:12 -08004879
4880/* If there's an extent, we're done. */
4881STATIC int
4882xfs_btree_has_record_helper(
4883 struct xfs_btree_cur *cur,
4884 union xfs_btree_rec *rec,
4885 void *priv)
4886{
4887 return XFS_BTREE_QUERY_RANGE_ABORT;
4888}
4889
4890/* Is there a record covering a given range of keys? */
4891int
4892xfs_btree_has_record(
4893 struct xfs_btree_cur *cur,
4894 union xfs_btree_irec *low,
4895 union xfs_btree_irec *high,
4896 bool *exists)
4897{
4898 int error;
4899
4900 error = xfs_btree_query_range(cur, low, high,
4901 &xfs_btree_has_record_helper, NULL);
4902 if (error == XFS_BTREE_QUERY_RANGE_ABORT) {
4903 *exists = true;
4904 return 0;
4905 }
4906 *exists = false;
4907 return error;
4908}
Darrick J. Wong08daa3c2018-05-09 10:02:03 -07004909
4910/* Are there more records in this btree? */
4911bool
4912xfs_btree_has_more_records(
4913 struct xfs_btree_cur *cur)
4914{
4915 struct xfs_btree_block *block;
4916 struct xfs_buf *bp;
4917
4918 block = xfs_btree_get_block(cur, 0, &bp);
4919
4920 /* There are still records in this block. */
4921 if (cur->bc_ptrs[0] < xfs_btree_get_numrecs(block))
4922 return true;
4923
4924 /* There are more record blocks. */
4925 if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
4926 return block->bb_u.l.bb_rightsib != cpu_to_be64(NULLFSBLOCK);
4927 else
4928 return block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK);
4929}