blob: 1077ce7e189fe776fbda6c2fdb882f98f6fed7d8 [file] [log] [blame]
Theodore Ts'of5166762017-12-17 22:00:59 -05001// SPDX-License-Identifier: GPL-2.0
Alex Tomasa86c6182006-10-11 01:21:03 -07002/*
3 * Copyright (c) 2003-2006, Cluster File Systems, Inc, info@clusterfs.com
4 * Written by Alex Tomas <alex@clusterfs.com>
5 *
6 * Architecture independence:
7 * Copyright (c) 2005, Bull S.A.
8 * Written by Pierre Peiffer <pierre.peiffer@bull.net>
Alex Tomasa86c6182006-10-11 01:21:03 -07009 */
10
11/*
12 * Extents support for EXT4
13 *
14 * TODO:
15 * - ext4*_error() should be used in some situations
16 * - analyze all BUG()/BUG_ON(), use -EIO where appropriate
17 * - smart tree reduction
18 */
19
Alex Tomasa86c6182006-10-11 01:21:03 -070020#include <linux/fs.h>
21#include <linux/time.h>
Mingming Caocd02ff02007-10-16 18:38:25 -040022#include <linux/jbd2.h>
Alex Tomasa86c6182006-10-11 01:21:03 -070023#include <linux/highuid.h>
24#include <linux/pagemap.h>
25#include <linux/quotaops.h>
26#include <linux/string.h>
27#include <linux/slab.h>
Linus Torvalds7c0f6ba2016-12-24 11:46:01 -080028#include <linux/uaccess.h>
Eric Sandeen6873fa02008-10-07 00:46:36 -040029#include <linux/fiemap.h>
Tejun Heo66114ca2015-05-22 17:13:32 -040030#include <linux/backing-dev.h>
Ritesh Harjanid3b6f232020-02-28 14:56:58 +053031#include <linux/iomap.h>
Christoph Hellwig3dcf5452008-04-29 18:13:32 -040032#include "ext4_jbd2.h"
Theodore Ts'o4a092d72012-11-28 13:03:30 -050033#include "ext4_extents.h"
Tao Maf19d5872012-12-10 14:05:51 -050034#include "xattr.h"
Alex Tomasa86c6182006-10-11 01:21:03 -070035
Jiaying Zhang0562e0b2011-03-21 21:38:05 -040036#include <trace/events/ext4.h>
37
Lukas Czerner5f95d212012-03-19 23:03:19 -040038/*
39 * used by extent splitting.
40 */
41#define EXT4_EXT_MAY_ZEROOUT 0x1 /* safe to zeroout if split fails \
42 due to ENOSPC */
Lukas Czerner556615d2014-04-20 23:45:47 -040043#define EXT4_EXT_MARK_UNWRIT1 0x2 /* mark first half unwritten */
44#define EXT4_EXT_MARK_UNWRIT2 0x4 /* mark second half unwritten */
Lukas Czerner5f95d212012-03-19 23:03:19 -040045
Dmitry Monakhovdee1f972012-10-10 01:04:58 -040046#define EXT4_EXT_DATA_VALID1 0x8 /* first half contains valid data */
47#define EXT4_EXT_DATA_VALID2 0x10 /* second half contains valid data */
48
Darrick J. Wong7ac59902012-04-29 18:37:10 -040049static __le32 ext4_extent_block_csum(struct inode *inode,
50 struct ext4_extent_header *eh)
51{
52 struct ext4_inode_info *ei = EXT4_I(inode);
53 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
54 __u32 csum;
55
56 csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)eh,
57 EXT4_EXTENT_TAIL_OFFSET(eh));
58 return cpu_to_le32(csum);
59}
60
61static int ext4_extent_block_csum_verify(struct inode *inode,
62 struct ext4_extent_header *eh)
63{
64 struct ext4_extent_tail *et;
65
Dmitry Monakhov9aa5d32b2014-10-13 03:36:16 -040066 if (!ext4_has_metadata_csum(inode->i_sb))
Darrick J. Wong7ac59902012-04-29 18:37:10 -040067 return 1;
68
69 et = find_ext4_extent_tail(eh);
70 if (et->et_checksum != ext4_extent_block_csum(inode, eh))
71 return 0;
72 return 1;
73}
74
75static void ext4_extent_block_csum_set(struct inode *inode,
76 struct ext4_extent_header *eh)
77{
78 struct ext4_extent_tail *et;
79
Dmitry Monakhov9aa5d32b2014-10-13 03:36:16 -040080 if (!ext4_has_metadata_csum(inode->i_sb))
Darrick J. Wong7ac59902012-04-29 18:37:10 -040081 return;
82
83 et = find_ext4_extent_tail(eh);
84 et->et_checksum = ext4_extent_block_csum(inode, eh);
85}
86
Lukas Czerner5f95d212012-03-19 23:03:19 -040087static int ext4_split_extent_at(handle_t *handle,
88 struct inode *inode,
Theodore Ts'odfe50802014-09-01 14:37:09 -040089 struct ext4_ext_path **ppath,
Lukas Czerner5f95d212012-03-19 23:03:19 -040090 ext4_lblk_t split,
91 int split_flag,
92 int flags);
93
Jan Karaa4130362019-11-05 17:44:16 +010094static int ext4_ext_trunc_restart_fn(struct inode *inode, int *dropped)
Alex Tomasa86c6182006-10-11 01:21:03 -070095{
Theodore Ts'o7b808192016-04-25 23:13:17 -040096 /*
Jan Karaa4130362019-11-05 17:44:16 +010097 * Drop i_data_sem to avoid deadlock with ext4_map_blocks. At this
98 * moment, get_block can be called only for blocks inside i_size since
99 * page cache has been already dropped and writes are blocked by
100 * i_mutex. So we can safely drop the i_data_sem here.
Theodore Ts'o7b808192016-04-25 23:13:17 -0400101 */
Jan Karaa4130362019-11-05 17:44:16 +0100102 BUG_ON(EXT4_JOURNAL(inode) == NULL);
brookxu27bc4462020-08-17 15:36:15 +0800103 ext4_discard_preallocations(inode, 0);
Jan Karaa4130362019-11-05 17:44:16 +0100104 up_write(&EXT4_I(inode)->i_data_sem);
105 *dropped = 1;
106 return 0;
107}
Jan Kara487caee2009-08-17 22:17:20 -0400108
Jan Karaa4130362019-11-05 17:44:16 +0100109/*
110 * Make sure 'handle' has at least 'check_cred' credits. If not, restart
111 * transaction with 'restart_cred' credits. The function drops i_data_sem
112 * when restarting transaction and gets it after transaction is restarted.
113 *
114 * The function returns 0 on success, 1 if transaction had to be restarted,
115 * and < 0 in case of fatal error.
116 */
117int ext4_datasem_ensure_credits(handle_t *handle, struct inode *inode,
Jan Kara83448bd2019-11-05 17:44:29 +0100118 int check_cred, int restart_cred,
119 int revoke_cred)
Jan Karaa4130362019-11-05 17:44:16 +0100120{
121 int ret;
122 int dropped = 0;
123
124 ret = ext4_journal_ensure_credits_fn(handle, check_cred, restart_cred,
Jan Kara83448bd2019-11-05 17:44:29 +0100125 revoke_cred, ext4_ext_trunc_restart_fn(inode, &dropped));
Jan Karaa4130362019-11-05 17:44:16 +0100126 if (dropped)
127 down_write(&EXT4_I(inode)->i_data_sem);
128 return ret;
Alex Tomasa86c6182006-10-11 01:21:03 -0700129}
130
131/*
132 * could return:
133 * - EROFS
134 * - ENOMEM
135 */
136static int ext4_ext_get_access(handle_t *handle, struct inode *inode,
137 struct ext4_ext_path *path)
138{
Zhang Yi0f2f87d2021-09-08 20:08:50 +0800139 int err = 0;
140
Alex Tomasa86c6182006-10-11 01:21:03 -0700141 if (path->p_bh) {
142 /* path points to block */
liang xie5d601252014-05-12 22:06:43 -0400143 BUFFER_TRACE(path->p_bh, "get_write_access");
Zhang Yi0f2f87d2021-09-08 20:08:50 +0800144 err = ext4_journal_get_write_access(handle, inode->i_sb,
145 path->p_bh, EXT4_JTR_NONE);
146 /*
147 * The extent buffer's verified bit will be set again in
148 * __ext4_ext_dirty(). We could leave an inconsistent
149 * buffer if the extents updating procudure break off du
150 * to some error happens, force to check it again.
151 */
152 if (!err)
153 clear_buffer_verified(path->p_bh);
Alex Tomasa86c6182006-10-11 01:21:03 -0700154 }
155 /* path points to leaf/index in inode body */
156 /* we use in-core data, no need to protect them */
Zhang Yi0f2f87d2021-09-08 20:08:50 +0800157 return err;
Alex Tomasa86c6182006-10-11 01:21:03 -0700158}
159
160/*
161 * could return:
162 * - EROFS
163 * - ENOMEM
164 * - EIO
165 */
Eric Biggers43f81672019-12-31 12:04:40 -0600166static int __ext4_ext_dirty(const char *where, unsigned int line,
167 handle_t *handle, struct inode *inode,
168 struct ext4_ext_path *path)
Alex Tomasa86c6182006-10-11 01:21:03 -0700169{
170 int err;
Dmitry Monakhov4b1f1662014-07-27 22:28:15 -0400171
172 WARN_ON(!rwsem_is_locked(&EXT4_I(inode)->i_data_sem));
Alex Tomasa86c6182006-10-11 01:21:03 -0700173 if (path->p_bh) {
Darrick J. Wong7ac59902012-04-29 18:37:10 -0400174 ext4_extent_block_csum_set(inode, ext_block_hdr(path->p_bh));
Alex Tomasa86c6182006-10-11 01:21:03 -0700175 /* path points to block */
Theodore Ts'o9ea7a0d2011-09-04 10:18:14 -0400176 err = __ext4_handle_dirty_metadata(where, line, handle,
177 inode, path->p_bh);
Zhang Yi0f2f87d2021-09-08 20:08:50 +0800178 /* Extents updating done, re-set verified flag */
179 if (!err)
180 set_buffer_verified(path->p_bh);
Alex Tomasa86c6182006-10-11 01:21:03 -0700181 } else {
182 /* path points to leaf/index in inode body */
183 err = ext4_mark_inode_dirty(handle, inode);
184 }
185 return err;
186}
187
Eric Biggers43f81672019-12-31 12:04:40 -0600188#define ext4_ext_dirty(handle, inode, path) \
189 __ext4_ext_dirty(__func__, __LINE__, (handle), (inode), (path))
190
Alex Tomasf65e6fb2006-10-11 01:21:05 -0700191static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
Alex Tomasa86c6182006-10-11 01:21:03 -0700192 struct ext4_ext_path *path,
Aneesh Kumar K.V725d26d2008-01-28 23:58:27 -0500193 ext4_lblk_t block)
Alex Tomasa86c6182006-10-11 01:21:03 -0700194{
Alex Tomasa86c6182006-10-11 01:21:03 -0700195 if (path) {
Yongqiang Yang81fdbb42011-10-29 09:23:38 -0400196 int depth = path->p_depth;
Alex Tomasa86c6182006-10-11 01:21:03 -0700197 struct ext4_extent *ex;
Alex Tomasa86c6182006-10-11 01:21:03 -0700198
Kazuya Mioad4fb9c2011-01-10 12:12:28 -0500199 /*
200 * Try to predict block placement assuming that we are
201 * filling in a file which will eventually be
202 * non-sparse --- i.e., in the case of libbfd writing
203 * an ELF object sections out-of-order but in a way
204 * the eventually results in a contiguous object or
205 * executable file, or some database extending a table
206 * space file. However, this is actually somewhat
207 * non-ideal if we are writing a sparse file such as
208 * qemu or KVM writing a raw image file that is going
209 * to stay fairly sparse, since it will end up
210 * fragmenting the file system's free space. Maybe we
211 * should have some hueristics or some way to allow
212 * userspace to pass a hint to file system,
Tao Mab8d65682011-01-21 23:21:31 +0800213 * especially if the latter case turns out to be
Kazuya Mioad4fb9c2011-01-10 12:12:28 -0500214 * common.
215 */
Avantika Mathur7e028972006-12-06 20:41:33 -0800216 ex = path[depth].p_ext;
Kazuya Mioad4fb9c2011-01-10 12:12:28 -0500217 if (ex) {
218 ext4_fsblk_t ext_pblk = ext4_ext_pblock(ex);
219 ext4_lblk_t ext_block = le32_to_cpu(ex->ee_block);
220
221 if (block > ext_block)
222 return ext_pblk + (block - ext_block);
223 else
224 return ext_pblk - (ext_block - block);
225 }
Alex Tomasa86c6182006-10-11 01:21:03 -0700226
Randy Dunlapd0d856e2006-10-11 01:21:07 -0700227 /* it looks like index is empty;
228 * try to find starting block from index itself */
Alex Tomasa86c6182006-10-11 01:21:03 -0700229 if (path[depth].p_bh)
230 return path[depth].p_bh->b_blocknr;
231 }
232
233 /* OK. use inode's group */
Eric Sandeenf86186b2011-06-28 10:01:31 -0400234 return ext4_inode_to_goal_block(inode);
Alex Tomasa86c6182006-10-11 01:21:03 -0700235}
236
Aneesh Kumar K.V654b4902008-07-11 19:27:31 -0400237/*
238 * Allocation for a meta data block
239 */
Alex Tomasf65e6fb2006-10-11 01:21:05 -0700240static ext4_fsblk_t
Aneesh Kumar K.V654b4902008-07-11 19:27:31 -0400241ext4_ext_new_meta_block(handle_t *handle, struct inode *inode,
Alex Tomasa86c6182006-10-11 01:21:03 -0700242 struct ext4_ext_path *path,
Allison Henderson55f020d2011-05-25 07:41:26 -0400243 struct ext4_extent *ex, int *err, unsigned int flags)
Alex Tomasa86c6182006-10-11 01:21:03 -0700244{
Alex Tomasf65e6fb2006-10-11 01:21:05 -0700245 ext4_fsblk_t goal, newblock;
Alex Tomasa86c6182006-10-11 01:21:03 -0700246
247 goal = ext4_ext_find_goal(inode, path, le32_to_cpu(ex->ee_block));
Allison Henderson55f020d2011-05-25 07:41:26 -0400248 newblock = ext4_new_meta_blocks(handle, inode, goal, flags,
249 NULL, err);
Alex Tomasa86c6182006-10-11 01:21:03 -0700250 return newblock;
251}
252
Theodore Ts'o55ad63b2009-08-28 10:40:33 -0400253static inline int ext4_ext_space_block(struct inode *inode, int check)
Alex Tomasa86c6182006-10-11 01:21:03 -0700254{
255 int size;
256
257 size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header))
258 / sizeof(struct ext4_extent);
Robert P. J. Daybbf2f9f2007-02-17 19:20:16 +0100259#ifdef AGGRESSIVE_TEST
Yongqiang Yang02dc62fb2011-10-29 09:29:11 -0400260 if (!check && size > 6)
261 size = 6;
Alex Tomasa86c6182006-10-11 01:21:03 -0700262#endif
263 return size;
264}
265
Theodore Ts'o55ad63b2009-08-28 10:40:33 -0400266static inline int ext4_ext_space_block_idx(struct inode *inode, int check)
Alex Tomasa86c6182006-10-11 01:21:03 -0700267{
268 int size;
269
270 size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header))
271 / sizeof(struct ext4_extent_idx);
Robert P. J. Daybbf2f9f2007-02-17 19:20:16 +0100272#ifdef AGGRESSIVE_TEST
Yongqiang Yang02dc62fb2011-10-29 09:29:11 -0400273 if (!check && size > 5)
274 size = 5;
Alex Tomasa86c6182006-10-11 01:21:03 -0700275#endif
276 return size;
277}
278
Theodore Ts'o55ad63b2009-08-28 10:40:33 -0400279static inline int ext4_ext_space_root(struct inode *inode, int check)
Alex Tomasa86c6182006-10-11 01:21:03 -0700280{
281 int size;
282
283 size = sizeof(EXT4_I(inode)->i_data);
284 size -= sizeof(struct ext4_extent_header);
285 size /= sizeof(struct ext4_extent);
Robert P. J. Daybbf2f9f2007-02-17 19:20:16 +0100286#ifdef AGGRESSIVE_TEST
Yongqiang Yang02dc62fb2011-10-29 09:29:11 -0400287 if (!check && size > 3)
288 size = 3;
Alex Tomasa86c6182006-10-11 01:21:03 -0700289#endif
290 return size;
291}
292
Theodore Ts'o55ad63b2009-08-28 10:40:33 -0400293static inline int ext4_ext_space_root_idx(struct inode *inode, int check)
Alex Tomasa86c6182006-10-11 01:21:03 -0700294{
295 int size;
296
297 size = sizeof(EXT4_I(inode)->i_data);
298 size -= sizeof(struct ext4_extent_header);
299 size /= sizeof(struct ext4_extent_idx);
Robert P. J. Daybbf2f9f2007-02-17 19:20:16 +0100300#ifdef AGGRESSIVE_TEST
Yongqiang Yang02dc62fb2011-10-29 09:29:11 -0400301 if (!check && size > 4)
302 size = 4;
Alex Tomasa86c6182006-10-11 01:21:03 -0700303#endif
304 return size;
305}
306
Dmitry Monakhovfcf6b1b72014-08-30 23:52:19 -0400307static inline int
308ext4_force_split_extent_at(handle_t *handle, struct inode *inode,
Theodore Ts'odfe50802014-09-01 14:37:09 -0400309 struct ext4_ext_path **ppath, ext4_lblk_t lblk,
Dmitry Monakhovfcf6b1b72014-08-30 23:52:19 -0400310 int nofail)
311{
Theodore Ts'odfe50802014-09-01 14:37:09 -0400312 struct ext4_ext_path *path = *ppath;
Dmitry Monakhovfcf6b1b72014-08-30 23:52:19 -0400313 int unwritten = ext4_ext_is_unwritten(path[path->p_depth].p_ext);
Theodore Ts'o73c384c02020-05-07 10:50:28 -0700314 int flags = EXT4_EX_NOCACHE | EXT4_GET_BLOCKS_PRE_IO;
315
316 if (nofail)
317 flags |= EXT4_GET_BLOCKS_METADATA_NOFAIL | EXT4_EX_NOFAIL;
Dmitry Monakhovfcf6b1b72014-08-30 23:52:19 -0400318
Theodore Ts'odfe50802014-09-01 14:37:09 -0400319 return ext4_split_extent_at(handle, inode, ppath, lblk, unwritten ?
Dmitry Monakhovfcf6b1b72014-08-30 23:52:19 -0400320 EXT4_EXT_MARK_UNWRIT1|EXT4_EXT_MARK_UNWRIT2 : 0,
Theodore Ts'o73c384c02020-05-07 10:50:28 -0700321 flags);
Dmitry Monakhovfcf6b1b72014-08-30 23:52:19 -0400322}
323
Alex Tomasc29c0ae2007-07-18 09:19:09 -0400324static int
325ext4_ext_max_entries(struct inode *inode, int depth)
326{
327 int max;
328
329 if (depth == ext_depth(inode)) {
330 if (depth == 0)
Theodore Ts'o55ad63b2009-08-28 10:40:33 -0400331 max = ext4_ext_space_root(inode, 1);
Alex Tomasc29c0ae2007-07-18 09:19:09 -0400332 else
Theodore Ts'o55ad63b2009-08-28 10:40:33 -0400333 max = ext4_ext_space_root_idx(inode, 1);
Alex Tomasc29c0ae2007-07-18 09:19:09 -0400334 } else {
335 if (depth == 0)
Theodore Ts'o55ad63b2009-08-28 10:40:33 -0400336 max = ext4_ext_space_block(inode, 1);
Alex Tomasc29c0ae2007-07-18 09:19:09 -0400337 else
Theodore Ts'o55ad63b2009-08-28 10:40:33 -0400338 max = ext4_ext_space_block_idx(inode, 1);
Alex Tomasc29c0ae2007-07-18 09:19:09 -0400339 }
340
341 return max;
342}
343
Aneesh Kumar K.V56b19862009-03-12 09:51:20 -0400344static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext)
345{
Theodore Ts'obf89d162010-10-27 21:30:14 -0400346 ext4_fsblk_t block = ext4_ext_pblock(ext);
Aneesh Kumar K.V56b19862009-03-12 09:51:20 -0400347 int len = ext4_ext_get_actual_len(ext);
Eryu Guan5946d082013-12-03 21:22:21 -0500348 ext4_lblk_t lblock = le32_to_cpu(ext->ee_block);
Theodore Ts'oe84a26c2009-04-22 20:52:25 -0400349
Vegard Nossumf70749c2016-06-30 11:53:46 -0400350 /*
351 * We allow neither:
352 * - zero length
353 * - overflow/wrap-around
354 */
355 if (lblock + len <= lblock)
Theodore Ts'o31d4f3a2012-03-11 23:30:16 -0400356 return 0;
Jan Karace9f24c2020-07-28 15:04:34 +0200357 return ext4_inode_block_valid(inode, block, len);
Aneesh Kumar K.V56b19862009-03-12 09:51:20 -0400358}
359
360static int ext4_valid_extent_idx(struct inode *inode,
361 struct ext4_extent_idx *ext_idx)
362{
Theodore Ts'obf89d162010-10-27 21:30:14 -0400363 ext4_fsblk_t block = ext4_idx_pblock(ext_idx);
Theodore Ts'oe84a26c2009-04-22 20:52:25 -0400364
Jan Karace9f24c2020-07-28 15:04:34 +0200365 return ext4_inode_block_valid(inode, block, 1);
Aneesh Kumar K.V56b19862009-03-12 09:51:20 -0400366}
367
368static int ext4_valid_extent_entries(struct inode *inode,
Theodore Ts'o54d3adb2020-03-28 19:33:43 -0400369 struct ext4_extent_header *eh,
Zhang Yi9c6e0712021-09-08 20:08:49 +0800370 ext4_lblk_t lblk, ext4_fsblk_t *pblk,
371 int depth)
Aneesh Kumar K.V56b19862009-03-12 09:51:20 -0400372{
Aneesh Kumar K.V56b19862009-03-12 09:51:20 -0400373 unsigned short entries;
Zhang Yi8dd27fe2021-09-08 20:08:48 +0800374 ext4_lblk_t lblock = 0;
375 ext4_lblk_t prev = 0;
376
Aneesh Kumar K.V56b19862009-03-12 09:51:20 -0400377 if (eh->eh_entries == 0)
378 return 1;
379
380 entries = le16_to_cpu(eh->eh_entries);
381
382 if (depth == 0) {
383 /* leaf entries */
Yongqiang Yang81fdbb42011-10-29 09:23:38 -0400384 struct ext4_extent *ext = EXT_FIRST_EXTENT(eh);
Zhang Yi9c6e0712021-09-08 20:08:49 +0800385
386 /*
387 * The logical block in the first entry should equal to
388 * the number in the index block.
389 */
390 if (depth != ext_depth(inode) &&
391 lblk != le32_to_cpu(ext->ee_block))
392 return 0;
Aneesh Kumar K.V56b19862009-03-12 09:51:20 -0400393 while (entries) {
394 if (!ext4_valid_extent(inode, ext))
395 return 0;
Eryu Guan5946d082013-12-03 21:22:21 -0500396
397 /* Check for overlapping extents */
398 lblock = le32_to_cpu(ext->ee_block);
Eryu Guan5946d082013-12-03 21:22:21 -0500399 if ((lblock <= prev) && prev) {
Theodore Ts'o54d3adb2020-03-28 19:33:43 -0400400 *pblk = ext4_ext_pblock(ext);
Eryu Guan5946d082013-12-03 21:22:21 -0500401 return 0;
402 }
Zhang Yi8dd27fe2021-09-08 20:08:48 +0800403 prev = lblock + ext4_ext_get_actual_len(ext) - 1;
Aneesh Kumar K.V56b19862009-03-12 09:51:20 -0400404 ext++;
405 entries--;
406 }
407 } else {
Yongqiang Yang81fdbb42011-10-29 09:23:38 -0400408 struct ext4_extent_idx *ext_idx = EXT_FIRST_INDEX(eh);
Zhang Yi9c6e0712021-09-08 20:08:49 +0800409
410 /*
411 * The logical block in the first entry should equal to
412 * the number in the parent index block.
413 */
414 if (depth != ext_depth(inode) &&
415 lblk != le32_to_cpu(ext_idx->ei_block))
416 return 0;
Aneesh Kumar K.V56b19862009-03-12 09:51:20 -0400417 while (entries) {
418 if (!ext4_valid_extent_idx(inode, ext_idx))
419 return 0;
Zhang Yi8dd27fe2021-09-08 20:08:48 +0800420
421 /* Check for overlapping index extents */
422 lblock = le32_to_cpu(ext_idx->ei_block);
423 if ((lblock <= prev) && prev) {
424 *pblk = ext4_idx_pblock(ext_idx);
425 return 0;
426 }
Aneesh Kumar K.V56b19862009-03-12 09:51:20 -0400427 ext_idx++;
428 entries--;
Zhang Yi8dd27fe2021-09-08 20:08:48 +0800429 prev = lblock;
Aneesh Kumar K.V56b19862009-03-12 09:51:20 -0400430 }
431 }
432 return 1;
433}
434
Theodore Ts'oc398eda2010-07-27 11:56:40 -0400435static int __ext4_ext_check(const char *function, unsigned int line,
436 struct inode *inode, struct ext4_extent_header *eh,
Zhang Yi9c6e0712021-09-08 20:08:49 +0800437 int depth, ext4_fsblk_t pblk, ext4_lblk_t lblk)
Alex Tomasc29c0ae2007-07-18 09:19:09 -0400438{
439 const char *error_msg;
Darrick J. Wong6a797d22015-10-17 16:16:04 -0400440 int max = 0, err = -EFSCORRUPTED;
Alex Tomasc29c0ae2007-07-18 09:19:09 -0400441
442 if (unlikely(eh->eh_magic != EXT4_EXT_MAGIC)) {
443 error_msg = "invalid magic";
444 goto corrupted;
445 }
446 if (unlikely(le16_to_cpu(eh->eh_depth) != depth)) {
447 error_msg = "unexpected eh_depth";
448 goto corrupted;
449 }
450 if (unlikely(eh->eh_max == 0)) {
451 error_msg = "invalid eh_max";
452 goto corrupted;
453 }
454 max = ext4_ext_max_entries(inode, depth);
455 if (unlikely(le16_to_cpu(eh->eh_max) > max)) {
456 error_msg = "too large eh_max";
457 goto corrupted;
458 }
459 if (unlikely(le16_to_cpu(eh->eh_entries) > le16_to_cpu(eh->eh_max))) {
460 error_msg = "invalid eh_entries";
461 goto corrupted;
462 }
Zhang Yi9c6e0712021-09-08 20:08:49 +0800463 if (!ext4_valid_extent_entries(inode, eh, lblk, &pblk, depth)) {
Aneesh Kumar K.V56b19862009-03-12 09:51:20 -0400464 error_msg = "invalid extent entries";
465 goto corrupted;
466 }
Vegard Nossum7bc94912016-07-15 00:22:07 -0400467 if (unlikely(depth > 32)) {
468 error_msg = "too large eh_depth";
469 goto corrupted;
470 }
Darrick J. Wong7ac59902012-04-29 18:37:10 -0400471 /* Verify checksum on non-root extent tree nodes */
472 if (ext_depth(inode) != depth &&
473 !ext4_extent_block_csum_verify(inode, eh)) {
474 error_msg = "extent tree corrupted";
Darrick J. Wong6a797d22015-10-17 16:16:04 -0400475 err = -EFSBADCRC;
Darrick J. Wong7ac59902012-04-29 18:37:10 -0400476 goto corrupted;
477 }
Alex Tomasc29c0ae2007-07-18 09:19:09 -0400478 return 0;
479
480corrupted:
Theodore Ts'o54d3adb2020-03-28 19:33:43 -0400481 ext4_error_inode_err(inode, function, line, 0, -err,
482 "pblk %llu bad header/extent: %s - magic %x, "
483 "entries %u, max %u(%u), depth %u(%u)",
484 (unsigned long long) pblk, error_msg,
485 le16_to_cpu(eh->eh_magic),
486 le16_to_cpu(eh->eh_entries),
487 le16_to_cpu(eh->eh_max),
488 max, le16_to_cpu(eh->eh_depth), depth);
Darrick J. Wong6a797d22015-10-17 16:16:04 -0400489 return err;
Alex Tomasc29c0ae2007-07-18 09:19:09 -0400490}
491
Theodore Ts'oc3491792013-08-16 21:21:41 -0400492#define ext4_ext_check(inode, eh, depth, pblk) \
Zhang Yi9c6e0712021-09-08 20:08:49 +0800493 __ext4_ext_check(__func__, __LINE__, (inode), (eh), (depth), (pblk), 0)
Alex Tomasc29c0ae2007-07-18 09:19:09 -0400494
Aneesh Kumar K.V7a262f72009-03-27 16:39:58 -0400495int ext4_ext_check_inode(struct inode *inode)
496{
Theodore Ts'oc3491792013-08-16 21:21:41 -0400497 return ext4_ext_check(inode, ext_inode_hdr(inode), ext_depth(inode), 0);
Aneesh Kumar K.V7a262f72009-03-27 16:39:58 -0400498}
499
Dmitry Monakhov40686642019-11-06 12:25:02 +0000500static void ext4_cache_extents(struct inode *inode,
501 struct ext4_extent_header *eh)
502{
503 struct ext4_extent *ex = EXT_FIRST_EXTENT(eh);
504 ext4_lblk_t prev = 0;
505 int i;
506
507 for (i = le16_to_cpu(eh->eh_entries); i > 0; i--, ex++) {
508 unsigned int status = EXTENT_STATUS_WRITTEN;
509 ext4_lblk_t lblk = le32_to_cpu(ex->ee_block);
510 int len = ext4_ext_get_actual_len(ex);
511
512 if (prev && (prev != lblk))
513 ext4_es_cache_extent(inode, prev, lblk - prev, ~0,
514 EXTENT_STATUS_HOLE);
515
516 if (ext4_ext_is_unwritten(ex))
517 status = EXTENT_STATUS_UNWRITTEN;
518 ext4_es_cache_extent(inode, lblk, len,
519 ext4_ext_pblock(ex), status);
520 prev = lblk + len;
521 }
522}
523
Theodore Ts'o7d7ea892013-08-16 21:20:41 -0400524static struct buffer_head *
525__read_extent_tree_block(const char *function, unsigned int line,
Zhang Yi9c6e0712021-09-08 20:08:49 +0800526 struct inode *inode, struct ext4_extent_idx *idx,
527 int depth, int flags)
Darrick J. Wongf8489122012-04-29 18:21:10 -0400528{
Theodore Ts'o7d7ea892013-08-16 21:20:41 -0400529 struct buffer_head *bh;
530 int err;
Theodore Ts'o73c384c02020-05-07 10:50:28 -0700531 gfp_t gfp_flags = __GFP_MOVABLE | GFP_NOFS;
Zhang Yi9c6e0712021-09-08 20:08:49 +0800532 ext4_fsblk_t pblk;
Darrick J. Wongf8489122012-04-29 18:21:10 -0400533
Theodore Ts'o73c384c02020-05-07 10:50:28 -0700534 if (flags & EXT4_EX_NOFAIL)
535 gfp_flags |= __GFP_NOFAIL;
536
Zhang Yi9c6e0712021-09-08 20:08:49 +0800537 pblk = ext4_idx_pblock(idx);
Theodore Ts'o73c384c02020-05-07 10:50:28 -0700538 bh = sb_getblk_gfp(inode->i_sb, pblk, gfp_flags);
Theodore Ts'o7d7ea892013-08-16 21:20:41 -0400539 if (unlikely(!bh))
540 return ERR_PTR(-ENOMEM);
541
542 if (!bh_uptodate_or_lock(bh)) {
543 trace_ext4_ext_load_extent(inode, pblk, _RET_IP_);
zhangyi (F)2d069c02020-09-24 15:33:33 +0800544 err = ext4_read_bh(bh, 0, NULL);
Theodore Ts'o7d7ea892013-08-16 21:20:41 -0400545 if (err < 0)
546 goto errout;
547 }
Theodore Ts'o7869a4a2013-08-16 22:05:14 -0400548 if (buffer_verified(bh) && !(flags & EXT4_EX_FORCE_CACHE))
Theodore Ts'o7d7ea892013-08-16 21:20:41 -0400549 return bh;
Zhang Yi9c6e0712021-09-08 20:08:49 +0800550 err = __ext4_ext_check(function, line, inode, ext_block_hdr(bh),
551 depth, pblk, le32_to_cpu(idx->ei_block));
Jan Karace9f24c2020-07-28 15:04:34 +0200552 if (err)
553 goto errout;
Darrick J. Wongf8489122012-04-29 18:21:10 -0400554 set_buffer_verified(bh);
Theodore Ts'o107a7bd2013-08-16 21:23:41 -0400555 /*
556 * If this is a leaf block, cache all of its entries
557 */
558 if (!(flags & EXT4_EX_NOCACHE) && depth == 0) {
559 struct ext4_extent_header *eh = ext_block_hdr(bh);
Dmitry Monakhov40686642019-11-06 12:25:02 +0000560 ext4_cache_extents(inode, eh);
Theodore Ts'o107a7bd2013-08-16 21:23:41 -0400561 }
Theodore Ts'o7d7ea892013-08-16 21:20:41 -0400562 return bh;
563errout:
564 put_bh(bh);
565 return ERR_PTR(err);
566
Darrick J. Wongf8489122012-04-29 18:21:10 -0400567}
568
Zhang Yi9c6e0712021-09-08 20:08:49 +0800569#define read_extent_tree_block(inode, idx, depth, flags) \
570 __read_extent_tree_block(__func__, __LINE__, (inode), (idx), \
Theodore Ts'o107a7bd2013-08-16 21:23:41 -0400571 (depth), (flags))
Darrick J. Wongf8489122012-04-29 18:21:10 -0400572
Theodore Ts'o7869a4a2013-08-16 22:05:14 -0400573/*
574 * This function is called to cache a file's extent information in the
575 * extent status tree
576 */
577int ext4_ext_precache(struct inode *inode)
578{
579 struct ext4_inode_info *ei = EXT4_I(inode);
580 struct ext4_ext_path *path = NULL;
581 struct buffer_head *bh;
582 int i = 0, depth, ret = 0;
583
584 if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
585 return 0; /* not an extent-mapped inode */
586
587 down_read(&ei->i_data_sem);
588 depth = ext_depth(inode);
589
Ritesh Harjani2f424a52020-02-28 14:56:55 +0530590 /* Don't cache anything if there are no external extent blocks */
591 if (!depth) {
592 up_read(&ei->i_data_sem);
593 return ret;
594 }
595
Kees Cook6396bb22018-06-12 14:03:40 -0700596 path = kcalloc(depth + 1, sizeof(struct ext4_ext_path),
Theodore Ts'o7869a4a2013-08-16 22:05:14 -0400597 GFP_NOFS);
598 if (path == NULL) {
599 up_read(&ei->i_data_sem);
600 return -ENOMEM;
601 }
602
Theodore Ts'o7869a4a2013-08-16 22:05:14 -0400603 path[0].p_hdr = ext_inode_hdr(inode);
604 ret = ext4_ext_check(inode, path[0].p_hdr, depth, 0);
605 if (ret)
606 goto out;
607 path[0].p_idx = EXT_FIRST_INDEX(path[0].p_hdr);
608 while (i >= 0) {
609 /*
610 * If this is a leaf block or we've reached the end of
611 * the index block, go up
612 */
613 if ((i == depth) ||
614 path[i].p_idx > EXT_LAST_INDEX(path[i].p_hdr)) {
615 brelse(path[i].p_bh);
616 path[i].p_bh = NULL;
617 i--;
618 continue;
619 }
Zhang Yi9c6e0712021-09-08 20:08:49 +0800620 bh = read_extent_tree_block(inode, path[i].p_idx++,
Theodore Ts'o7869a4a2013-08-16 22:05:14 -0400621 depth - i - 1,
622 EXT4_EX_FORCE_CACHE);
623 if (IS_ERR(bh)) {
624 ret = PTR_ERR(bh);
625 break;
626 }
627 i++;
628 path[i].p_bh = bh;
629 path[i].p_hdr = ext_block_hdr(bh);
630 path[i].p_idx = EXT_FIRST_INDEX(path[i].p_hdr);
631 }
632 ext4_set_inode_state(inode, EXT4_STATE_EXT_PRECACHED);
633out:
634 up_read(&ei->i_data_sem);
635 ext4_ext_drop_refs(path);
636 kfree(path);
637 return ret;
638}
639
Alex Tomasa86c6182006-10-11 01:21:03 -0700640#ifdef EXT_DEBUG
641static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path)
642{
643 int k, l = path->p_depth;
644
Ritesh Harjani70aa1552020-05-10 11:54:55 +0530645 ext_debug(inode, "path:");
Alex Tomasa86c6182006-10-11 01:21:03 -0700646 for (k = 0; k <= l; k++, path++) {
647 if (path->p_idx) {
Ritesh Harjani70aa1552020-05-10 11:54:55 +0530648 ext_debug(inode, " %d->%llu",
Eric Biggers6e89bbb2019-12-31 12:04:43 -0600649 le32_to_cpu(path->p_idx->ei_block),
650 ext4_idx_pblock(path->p_idx));
Alex Tomasa86c6182006-10-11 01:21:03 -0700651 } else if (path->p_ext) {
Ritesh Harjani70aa1552020-05-10 11:54:55 +0530652 ext_debug(inode, " %d:[%d]%d:%llu ",
Alex Tomasa86c6182006-10-11 01:21:03 -0700653 le32_to_cpu(path->p_ext->ee_block),
Lukas Czerner556615d2014-04-20 23:45:47 -0400654 ext4_ext_is_unwritten(path->p_ext),
Amit Aroraa2df2a62007-07-17 21:42:41 -0400655 ext4_ext_get_actual_len(path->p_ext),
Theodore Ts'obf89d162010-10-27 21:30:14 -0400656 ext4_ext_pblock(path->p_ext));
Alex Tomasa86c6182006-10-11 01:21:03 -0700657 } else
Ritesh Harjani70aa1552020-05-10 11:54:55 +0530658 ext_debug(inode, " []");
Alex Tomasa86c6182006-10-11 01:21:03 -0700659 }
Ritesh Harjani70aa1552020-05-10 11:54:55 +0530660 ext_debug(inode, "\n");
Alex Tomasa86c6182006-10-11 01:21:03 -0700661}
662
663static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path)
664{
665 int depth = ext_depth(inode);
666 struct ext4_extent_header *eh;
667 struct ext4_extent *ex;
668 int i;
669
670 if (!path)
671 return;
672
673 eh = path[depth].p_hdr;
674 ex = EXT_FIRST_EXTENT(eh);
675
Ritesh Harjani70aa1552020-05-10 11:54:55 +0530676 ext_debug(inode, "Displaying leaf extents\n");
Mingming553f9002009-09-18 13:34:55 -0400677
Alex Tomasa86c6182006-10-11 01:21:03 -0700678 for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ex++) {
Ritesh Harjani70aa1552020-05-10 11:54:55 +0530679 ext_debug(inode, "%d:[%d]%d:%llu ", le32_to_cpu(ex->ee_block),
Lukas Czerner556615d2014-04-20 23:45:47 -0400680 ext4_ext_is_unwritten(ex),
Theodore Ts'obf89d162010-10-27 21:30:14 -0400681 ext4_ext_get_actual_len(ex), ext4_ext_pblock(ex));
Alex Tomasa86c6182006-10-11 01:21:03 -0700682 }
Ritesh Harjani70aa1552020-05-10 11:54:55 +0530683 ext_debug(inode, "\n");
Alex Tomasa86c6182006-10-11 01:21:03 -0700684}
Yongqiang Yang1b16da72011-05-25 17:41:48 -0400685
686static void ext4_ext_show_move(struct inode *inode, struct ext4_ext_path *path,
687 ext4_fsblk_t newblock, int level)
688{
689 int depth = ext_depth(inode);
690 struct ext4_extent *ex;
691
692 if (depth != level) {
693 struct ext4_extent_idx *idx;
694 idx = path[level].p_idx;
695 while (idx <= EXT_MAX_INDEX(path[level].p_hdr)) {
Ritesh Harjani70aa1552020-05-10 11:54:55 +0530696 ext_debug(inode, "%d: move %d:%llu in new index %llu\n",
697 level, le32_to_cpu(idx->ei_block),
698 ext4_idx_pblock(idx), newblock);
Yongqiang Yang1b16da72011-05-25 17:41:48 -0400699 idx++;
700 }
701
702 return;
703 }
704
705 ex = path[depth].p_ext;
706 while (ex <= EXT_MAX_EXTENT(path[depth].p_hdr)) {
Ritesh Harjani70aa1552020-05-10 11:54:55 +0530707 ext_debug(inode, "move %d:%llu:[%d]%d in new leaf %llu\n",
Yongqiang Yang1b16da72011-05-25 17:41:48 -0400708 le32_to_cpu(ex->ee_block),
709 ext4_ext_pblock(ex),
Lukas Czerner556615d2014-04-20 23:45:47 -0400710 ext4_ext_is_unwritten(ex),
Yongqiang Yang1b16da72011-05-25 17:41:48 -0400711 ext4_ext_get_actual_len(ex),
712 newblock);
713 ex++;
714 }
715}
716
Alex Tomasa86c6182006-10-11 01:21:03 -0700717#else
Theodore Ts'oaf5bc922008-09-08 22:25:24 -0400718#define ext4_ext_show_path(inode, path)
719#define ext4_ext_show_leaf(inode, path)
Yongqiang Yang1b16da72011-05-25 17:41:48 -0400720#define ext4_ext_show_move(inode, path, newblock, level)
Alex Tomasa86c6182006-10-11 01:21:03 -0700721#endif
722
Aneesh Kumar K.Vb35905c2008-02-25 16:54:37 -0500723void ext4_ext_drop_refs(struct ext4_ext_path *path)
Alex Tomasa86c6182006-10-11 01:21:03 -0700724{
Theodore Ts'ob7ea89a2014-09-01 14:39:09 -0400725 int depth, i;
Alex Tomasa86c6182006-10-11 01:21:03 -0700726
Theodore Ts'ob7ea89a2014-09-01 14:39:09 -0400727 if (!path)
728 return;
729 depth = path->p_depth;
Eric Biggersde745482019-12-31 12:04:44 -0600730 for (i = 0; i <= depth; i++, path++) {
Markus Elfringe0f49d22020-06-13 19:12:24 +0200731 brelse(path->p_bh);
732 path->p_bh = NULL;
Eric Biggersde745482019-12-31 12:04:44 -0600733 }
Alex Tomasa86c6182006-10-11 01:21:03 -0700734}
735
736/*
Randy Dunlapd0d856e2006-10-11 01:21:07 -0700737 * ext4_ext_binsearch_idx:
738 * binary search for the closest index of the given block
Alex Tomasc29c0ae2007-07-18 09:19:09 -0400739 * the header must be checked before calling this
Alex Tomasa86c6182006-10-11 01:21:03 -0700740 */
741static void
Aneesh Kumar K.V725d26d2008-01-28 23:58:27 -0500742ext4_ext_binsearch_idx(struct inode *inode,
743 struct ext4_ext_path *path, ext4_lblk_t block)
Alex Tomasa86c6182006-10-11 01:21:03 -0700744{
745 struct ext4_extent_header *eh = path->p_hdr;
746 struct ext4_extent_idx *r, *l, *m;
747
Alex Tomasa86c6182006-10-11 01:21:03 -0700748
Ritesh Harjani70aa1552020-05-10 11:54:55 +0530749 ext_debug(inode, "binsearch for %u(idx): ", block);
Alex Tomasa86c6182006-10-11 01:21:03 -0700750
751 l = EXT_FIRST_INDEX(eh) + 1;
Dmitry Monakhove9f410b2007-07-18 09:09:15 -0400752 r = EXT_LAST_INDEX(eh);
Alex Tomasa86c6182006-10-11 01:21:03 -0700753 while (l <= r) {
754 m = l + (r - l) / 2;
yangerkun83c56882021-09-03 14:27:46 +0800755 ext_debug(inode, "%p(%u):%p(%u):%p(%u) ", l,
756 le32_to_cpu(l->ei_block), m, le32_to_cpu(m->ei_block),
757 r, le32_to_cpu(r->ei_block));
758
Alex Tomasa86c6182006-10-11 01:21:03 -0700759 if (block < le32_to_cpu(m->ei_block))
760 r = m - 1;
761 else
762 l = m + 1;
Alex Tomasa86c6182006-10-11 01:21:03 -0700763 }
764
765 path->p_idx = l - 1;
Ritesh Harjani70aa1552020-05-10 11:54:55 +0530766 ext_debug(inode, " -> %u->%lld ", le32_to_cpu(path->p_idx->ei_block),
Theodore Ts'obf89d162010-10-27 21:30:14 -0400767 ext4_idx_pblock(path->p_idx));
Alex Tomasa86c6182006-10-11 01:21:03 -0700768
769#ifdef CHECK_BINSEARCH
770 {
771 struct ext4_extent_idx *chix, *ix;
772 int k;
773
774 chix = ix = EXT_FIRST_INDEX(eh);
775 for (k = 0; k < le16_to_cpu(eh->eh_entries); k++, ix++) {
Eric Biggers6e89bbb2019-12-31 12:04:43 -0600776 if (k != 0 && le32_to_cpu(ix->ei_block) <=
777 le32_to_cpu(ix[-1].ei_block)) {
Theodore Ts'o4776004f2008-09-08 23:00:52 -0400778 printk(KERN_DEBUG "k=%d, ix=0x%p, "
779 "first=0x%p\n", k,
780 ix, EXT_FIRST_INDEX(eh));
781 printk(KERN_DEBUG "%u <= %u\n",
Alex Tomasa86c6182006-10-11 01:21:03 -0700782 le32_to_cpu(ix->ei_block),
783 le32_to_cpu(ix[-1].ei_block));
784 }
785 BUG_ON(k && le32_to_cpu(ix->ei_block)
Dave Kleikamp8c55e202007-05-24 13:04:54 -0400786 <= le32_to_cpu(ix[-1].ei_block));
Alex Tomasa86c6182006-10-11 01:21:03 -0700787 if (block < le32_to_cpu(ix->ei_block))
788 break;
789 chix = ix;
790 }
791 BUG_ON(chix != path->p_idx);
792 }
793#endif
794
795}
796
797/*
Randy Dunlapd0d856e2006-10-11 01:21:07 -0700798 * ext4_ext_binsearch:
799 * binary search for closest extent of the given block
Alex Tomasc29c0ae2007-07-18 09:19:09 -0400800 * the header must be checked before calling this
Alex Tomasa86c6182006-10-11 01:21:03 -0700801 */
802static void
Aneesh Kumar K.V725d26d2008-01-28 23:58:27 -0500803ext4_ext_binsearch(struct inode *inode,
804 struct ext4_ext_path *path, ext4_lblk_t block)
Alex Tomasa86c6182006-10-11 01:21:03 -0700805{
806 struct ext4_extent_header *eh = path->p_hdr;
807 struct ext4_extent *r, *l, *m;
808
Alex Tomasa86c6182006-10-11 01:21:03 -0700809 if (eh->eh_entries == 0) {
810 /*
Randy Dunlapd0d856e2006-10-11 01:21:07 -0700811 * this leaf is empty:
812 * we get such a leaf in split/add case
Alex Tomasa86c6182006-10-11 01:21:03 -0700813 */
814 return;
815 }
816
Ritesh Harjani70aa1552020-05-10 11:54:55 +0530817 ext_debug(inode, "binsearch for %u: ", block);
Alex Tomasa86c6182006-10-11 01:21:03 -0700818
819 l = EXT_FIRST_EXTENT(eh) + 1;
Dmitry Monakhove9f410b2007-07-18 09:09:15 -0400820 r = EXT_LAST_EXTENT(eh);
Alex Tomasa86c6182006-10-11 01:21:03 -0700821
822 while (l <= r) {
823 m = l + (r - l) / 2;
yangerkun83c56882021-09-03 14:27:46 +0800824 ext_debug(inode, "%p(%u):%p(%u):%p(%u) ", l,
825 le32_to_cpu(l->ee_block), m, le32_to_cpu(m->ee_block),
826 r, le32_to_cpu(r->ee_block));
827
Alex Tomasa86c6182006-10-11 01:21:03 -0700828 if (block < le32_to_cpu(m->ee_block))
829 r = m - 1;
830 else
831 l = m + 1;
Alex Tomasa86c6182006-10-11 01:21:03 -0700832 }
833
834 path->p_ext = l - 1;
Ritesh Harjani70aa1552020-05-10 11:54:55 +0530835 ext_debug(inode, " -> %d:%llu:[%d]%d ",
Dave Kleikamp8c55e202007-05-24 13:04:54 -0400836 le32_to_cpu(path->p_ext->ee_block),
Theodore Ts'obf89d162010-10-27 21:30:14 -0400837 ext4_ext_pblock(path->p_ext),
Lukas Czerner556615d2014-04-20 23:45:47 -0400838 ext4_ext_is_unwritten(path->p_ext),
Amit Aroraa2df2a62007-07-17 21:42:41 -0400839 ext4_ext_get_actual_len(path->p_ext));
Alex Tomasa86c6182006-10-11 01:21:03 -0700840
841#ifdef CHECK_BINSEARCH
842 {
843 struct ext4_extent *chex, *ex;
844 int k;
845
846 chex = ex = EXT_FIRST_EXTENT(eh);
847 for (k = 0; k < le16_to_cpu(eh->eh_entries); k++, ex++) {
848 BUG_ON(k && le32_to_cpu(ex->ee_block)
Dave Kleikamp8c55e202007-05-24 13:04:54 -0400849 <= le32_to_cpu(ex[-1].ee_block));
Alex Tomasa86c6182006-10-11 01:21:03 -0700850 if (block < le32_to_cpu(ex->ee_block))
851 break;
852 chex = ex;
853 }
854 BUG_ON(chex != path->p_ext);
855 }
856#endif
857
858}
859
Harshad Shirwadkar4209ae12020-04-26 18:34:37 -0700860void ext4_ext_tree_init(handle_t *handle, struct inode *inode)
Alex Tomasa86c6182006-10-11 01:21:03 -0700861{
862 struct ext4_extent_header *eh;
863
864 eh = ext_inode_hdr(inode);
865 eh->eh_depth = 0;
866 eh->eh_entries = 0;
867 eh->eh_magic = EXT4_EXT_MAGIC;
Theodore Ts'o55ad63b2009-08-28 10:40:33 -0400868 eh->eh_max = cpu_to_le16(ext4_ext_space_root(inode, 0));
Anirudh Rayabharamce3aba42021-05-07 00:26:54 +0530869 eh->eh_generation = 0;
Alex Tomasa86c6182006-10-11 01:21:03 -0700870 ext4_mark_inode_dirty(handle, inode);
Alex Tomasa86c6182006-10-11 01:21:03 -0700871}
872
873struct ext4_ext_path *
Theodore Ts'oed8a1a72014-09-01 14:43:09 -0400874ext4_find_extent(struct inode *inode, ext4_lblk_t block,
875 struct ext4_ext_path **orig_path, int flags)
Alex Tomasa86c6182006-10-11 01:21:03 -0700876{
877 struct ext4_extent_header *eh;
878 struct buffer_head *bh;
Theodore Ts'o705912c2014-09-01 14:34:09 -0400879 struct ext4_ext_path *path = orig_path ? *orig_path : NULL;
880 short int depth, i, ppos = 0;
Theodore Ts'o860d21e2013-01-12 16:19:36 -0500881 int ret;
Theodore Ts'o73c384c02020-05-07 10:50:28 -0700882 gfp_t gfp_flags = GFP_NOFS;
883
884 if (flags & EXT4_EX_NOFAIL)
885 gfp_flags |= __GFP_NOFAIL;
Alex Tomasa86c6182006-10-11 01:21:03 -0700886
887 eh = ext_inode_hdr(inode);
Alex Tomasc29c0ae2007-07-18 09:19:09 -0400888 depth = ext_depth(inode);
Theodore Ts'obc890a62018-06-14 12:55:10 -0400889 if (depth < 0 || depth > EXT4_MAX_EXTENT_DEPTH) {
890 EXT4_ERROR_INODE(inode, "inode has invalid extent depth: %d",
891 depth);
892 ret = -EFSCORRUPTED;
893 goto err;
894 }
Alex Tomasa86c6182006-10-11 01:21:03 -0700895
Theodore Ts'o10809df82014-09-01 14:40:09 -0400896 if (path) {
Theodore Ts'o523f4312014-09-01 14:38:09 -0400897 ext4_ext_drop_refs(path);
Theodore Ts'o10809df82014-09-01 14:40:09 -0400898 if (depth > path[0].p_maxdepth) {
899 kfree(path);
900 *orig_path = path = NULL;
901 }
902 }
903 if (!path) {
Theodore Ts'o523f4312014-09-01 14:38:09 -0400904 /* account possible depth increase */
Kees Cook6396bb22018-06-12 14:03:40 -0700905 path = kcalloc(depth + 2, sizeof(struct ext4_ext_path),
Theodore Ts'o73c384c02020-05-07 10:50:28 -0700906 gfp_flags);
Theodore Ts'o19008f62014-08-31 15:03:14 -0400907 if (unlikely(!path))
Alex Tomasa86c6182006-10-11 01:21:03 -0700908 return ERR_PTR(-ENOMEM);
Theodore Ts'o10809df82014-09-01 14:40:09 -0400909 path[0].p_maxdepth = depth + 1;
Alex Tomasa86c6182006-10-11 01:21:03 -0700910 }
Alex Tomasa86c6182006-10-11 01:21:03 -0700911 path[0].p_hdr = eh;
Shen Feng1973adc2008-07-11 19:27:31 -0400912 path[0].p_bh = NULL;
Alex Tomasa86c6182006-10-11 01:21:03 -0700913
Alex Tomasc29c0ae2007-07-18 09:19:09 -0400914 i = depth;
Dmitry Monakhov40686642019-11-06 12:25:02 +0000915 if (!(flags & EXT4_EX_NOCACHE) && depth == 0)
916 ext4_cache_extents(inode, eh);
Alex Tomasa86c6182006-10-11 01:21:03 -0700917 /* walk through the tree */
918 while (i) {
Ritesh Harjani70aa1552020-05-10 11:54:55 +0530919 ext_debug(inode, "depth %d: num %d, max %d\n",
Alex Tomasa86c6182006-10-11 01:21:03 -0700920 ppos, le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max));
Alex Tomasc29c0ae2007-07-18 09:19:09 -0400921
Alex Tomasa86c6182006-10-11 01:21:03 -0700922 ext4_ext_binsearch_idx(inode, path + ppos, block);
Theodore Ts'obf89d162010-10-27 21:30:14 -0400923 path[ppos].p_block = ext4_idx_pblock(path[ppos].p_idx);
Alex Tomasa86c6182006-10-11 01:21:03 -0700924 path[ppos].p_depth = i;
925 path[ppos].p_ext = NULL;
926
Zhang Yi9c6e0712021-09-08 20:08:49 +0800927 bh = read_extent_tree_block(inode, path[ppos].p_idx, --i, flags);
Viresh Kumara1c83682015-08-12 15:59:44 +0530928 if (IS_ERR(bh)) {
Theodore Ts'o7d7ea892013-08-16 21:20:41 -0400929 ret = PTR_ERR(bh);
Alex Tomasa86c6182006-10-11 01:21:03 -0700930 goto err;
Theodore Ts'o860d21e2013-01-12 16:19:36 -0500931 }
Theodore Ts'o7d7ea892013-08-16 21:20:41 -0400932
Alex Tomasa86c6182006-10-11 01:21:03 -0700933 eh = ext_block_hdr(bh);
934 ppos++;
Alex Tomasa86c6182006-10-11 01:21:03 -0700935 path[ppos].p_bh = bh;
936 path[ppos].p_hdr = eh;
Alex Tomasa86c6182006-10-11 01:21:03 -0700937 }
938
939 path[ppos].p_depth = i;
Alex Tomasa86c6182006-10-11 01:21:03 -0700940 path[ppos].p_ext = NULL;
941 path[ppos].p_idx = NULL;
942
Alex Tomasa86c6182006-10-11 01:21:03 -0700943 /* find extent */
944 ext4_ext_binsearch(inode, path + ppos, block);
Shen Feng1973adc2008-07-11 19:27:31 -0400945 /* if not an empty leaf */
946 if (path[ppos].p_ext)
Theodore Ts'obf89d162010-10-27 21:30:14 -0400947 path[ppos].p_block = ext4_ext_pblock(path[ppos].p_ext);
Alex Tomasa86c6182006-10-11 01:21:03 -0700948
949 ext4_ext_show_path(inode, path);
950
951 return path;
952
953err:
954 ext4_ext_drop_refs(path);
Theodore Ts'odfe50802014-09-01 14:37:09 -0400955 kfree(path);
956 if (orig_path)
957 *orig_path = NULL;
Theodore Ts'o860d21e2013-01-12 16:19:36 -0500958 return ERR_PTR(ret);
Alex Tomasa86c6182006-10-11 01:21:03 -0700959}
960
961/*
Randy Dunlapd0d856e2006-10-11 01:21:07 -0700962 * ext4_ext_insert_index:
963 * insert new index [@logical;@ptr] into the block at @curp;
964 * check where to insert: before @curp or after @curp
Alex Tomasa86c6182006-10-11 01:21:03 -0700965 */
Theodore Ts'o1f109d52010-10-27 21:30:14 -0400966static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
967 struct ext4_ext_path *curp,
968 int logical, ext4_fsblk_t ptr)
Alex Tomasa86c6182006-10-11 01:21:03 -0700969{
970 struct ext4_extent_idx *ix;
971 int len, err;
972
Avantika Mathur7e028972006-12-06 20:41:33 -0800973 err = ext4_ext_get_access(handle, inode, curp);
974 if (err)
Alex Tomasa86c6182006-10-11 01:21:03 -0700975 return err;
976
Frank Mayhar273df552010-03-02 11:46:09 -0500977 if (unlikely(logical == le32_to_cpu(curp->p_idx->ei_block))) {
978 EXT4_ERROR_INODE(inode,
979 "logical %d == ei_block %d!",
980 logical, le32_to_cpu(curp->p_idx->ei_block));
Darrick J. Wong6a797d22015-10-17 16:16:04 -0400981 return -EFSCORRUPTED;
Frank Mayhar273df552010-03-02 11:46:09 -0500982 }
Robin Dongd4620312011-07-17 23:43:42 -0400983
984 if (unlikely(le16_to_cpu(curp->p_hdr->eh_entries)
985 >= le16_to_cpu(curp->p_hdr->eh_max))) {
986 EXT4_ERROR_INODE(inode,
987 "eh_entries %d >= eh_max %d!",
988 le16_to_cpu(curp->p_hdr->eh_entries),
989 le16_to_cpu(curp->p_hdr->eh_max));
Darrick J. Wong6a797d22015-10-17 16:16:04 -0400990 return -EFSCORRUPTED;
Robin Dongd4620312011-07-17 23:43:42 -0400991 }
992
Alex Tomasa86c6182006-10-11 01:21:03 -0700993 if (logical > le32_to_cpu(curp->p_idx->ei_block)) {
994 /* insert after */
Ritesh Harjani70aa1552020-05-10 11:54:55 +0530995 ext_debug(inode, "insert new index %d after: %llu\n",
996 logical, ptr);
Alex Tomasa86c6182006-10-11 01:21:03 -0700997 ix = curp->p_idx + 1;
998 } else {
999 /* insert before */
Ritesh Harjani70aa1552020-05-10 11:54:55 +05301000 ext_debug(inode, "insert new index %d before: %llu\n",
1001 logical, ptr);
Alex Tomasa86c6182006-10-11 01:21:03 -07001002 ix = curp->p_idx;
1003 }
1004
Eric Gouriou80e675f2011-10-27 11:52:18 -04001005 len = EXT_LAST_INDEX(curp->p_hdr) - ix + 1;
1006 BUG_ON(len < 0);
1007 if (len > 0) {
Ritesh Harjani70aa1552020-05-10 11:54:55 +05301008 ext_debug(inode, "insert new index %d: "
Eric Gouriou80e675f2011-10-27 11:52:18 -04001009 "move %d indices from 0x%p to 0x%p\n",
1010 logical, len, ix, ix + 1);
1011 memmove(ix + 1, ix, len * sizeof(struct ext4_extent_idx));
1012 }
1013
Tao Maf472e022011-10-17 10:13:46 -04001014 if (unlikely(ix > EXT_MAX_INDEX(curp->p_hdr))) {
1015 EXT4_ERROR_INODE(inode, "ix > EXT_MAX_INDEX!");
Darrick J. Wong6a797d22015-10-17 16:16:04 -04001016 return -EFSCORRUPTED;
Tao Maf472e022011-10-17 10:13:46 -04001017 }
1018
Alex Tomasa86c6182006-10-11 01:21:03 -07001019 ix->ei_block = cpu_to_le32(logical);
Alex Tomasf65e6fb2006-10-11 01:21:05 -07001020 ext4_idx_store_pblock(ix, ptr);
Marcin Slusarze8546d02008-04-17 10:38:59 -04001021 le16_add_cpu(&curp->p_hdr->eh_entries, 1);
Alex Tomasa86c6182006-10-11 01:21:03 -07001022
Frank Mayhar273df552010-03-02 11:46:09 -05001023 if (unlikely(ix > EXT_LAST_INDEX(curp->p_hdr))) {
1024 EXT4_ERROR_INODE(inode, "ix > EXT_LAST_INDEX!");
Darrick J. Wong6a797d22015-10-17 16:16:04 -04001025 return -EFSCORRUPTED;
Frank Mayhar273df552010-03-02 11:46:09 -05001026 }
Alex Tomasa86c6182006-10-11 01:21:03 -07001027
1028 err = ext4_ext_dirty(handle, inode, curp);
1029 ext4_std_error(inode->i_sb, err);
1030
1031 return err;
1032}
1033
1034/*
Randy Dunlapd0d856e2006-10-11 01:21:07 -07001035 * ext4_ext_split:
1036 * inserts new subtree into the path, using free index entry
1037 * at depth @at:
1038 * - allocates all needed blocks (new leaf and all intermediate index blocks)
1039 * - makes decision where to split
1040 * - moves remaining extents and index entries (right to the split point)
1041 * into the newly allocated blocks
1042 * - initializes subtree
Alex Tomasa86c6182006-10-11 01:21:03 -07001043 */
1044static int ext4_ext_split(handle_t *handle, struct inode *inode,
Allison Henderson55f020d2011-05-25 07:41:26 -04001045 unsigned int flags,
1046 struct ext4_ext_path *path,
1047 struct ext4_extent *newext, int at)
Alex Tomasa86c6182006-10-11 01:21:03 -07001048{
1049 struct buffer_head *bh = NULL;
1050 int depth = ext_depth(inode);
1051 struct ext4_extent_header *neh;
1052 struct ext4_extent_idx *fidx;
Alex Tomasa86c6182006-10-11 01:21:03 -07001053 int i = at, k, m, a;
Alex Tomasf65e6fb2006-10-11 01:21:05 -07001054 ext4_fsblk_t newblock, oldblock;
Alex Tomasa86c6182006-10-11 01:21:03 -07001055 __le32 border;
Alex Tomasf65e6fb2006-10-11 01:21:05 -07001056 ext4_fsblk_t *ablocks = NULL; /* array of allocated blocks */
Theodore Ts'o73c384c02020-05-07 10:50:28 -07001057 gfp_t gfp_flags = GFP_NOFS;
Alex Tomasa86c6182006-10-11 01:21:03 -07001058 int err = 0;
Sriram Rajagopalan592acbf2019-05-10 19:28:06 -04001059 size_t ext_size = 0;
Alex Tomasa86c6182006-10-11 01:21:03 -07001060
Theodore Ts'o73c384c02020-05-07 10:50:28 -07001061 if (flags & EXT4_EX_NOFAIL)
1062 gfp_flags |= __GFP_NOFAIL;
1063
Alex Tomasa86c6182006-10-11 01:21:03 -07001064 /* make decision: where to split? */
Randy Dunlapd0d856e2006-10-11 01:21:07 -07001065 /* FIXME: now decision is simplest: at current extent */
Alex Tomasa86c6182006-10-11 01:21:03 -07001066
Randy Dunlapd0d856e2006-10-11 01:21:07 -07001067 /* if current leaf will be split, then we should use
Alex Tomasa86c6182006-10-11 01:21:03 -07001068 * border from split point */
Frank Mayhar273df552010-03-02 11:46:09 -05001069 if (unlikely(path[depth].p_ext > EXT_MAX_EXTENT(path[depth].p_hdr))) {
1070 EXT4_ERROR_INODE(inode, "p_ext > EXT_MAX_EXTENT!");
Darrick J. Wong6a797d22015-10-17 16:16:04 -04001071 return -EFSCORRUPTED;
Frank Mayhar273df552010-03-02 11:46:09 -05001072 }
Alex Tomasa86c6182006-10-11 01:21:03 -07001073 if (path[depth].p_ext != EXT_MAX_EXTENT(path[depth].p_hdr)) {
1074 border = path[depth].p_ext[1].ee_block;
Ritesh Harjani70aa1552020-05-10 11:54:55 +05301075 ext_debug(inode, "leaf will be split."
Alex Tomasa86c6182006-10-11 01:21:03 -07001076 " next leaf starts at %d\n",
Dave Kleikamp8c55e202007-05-24 13:04:54 -04001077 le32_to_cpu(border));
Alex Tomasa86c6182006-10-11 01:21:03 -07001078 } else {
1079 border = newext->ee_block;
Ritesh Harjani70aa1552020-05-10 11:54:55 +05301080 ext_debug(inode, "leaf will be added."
Alex Tomasa86c6182006-10-11 01:21:03 -07001081 " next leaf starts at %d\n",
Dave Kleikamp8c55e202007-05-24 13:04:54 -04001082 le32_to_cpu(border));
Alex Tomasa86c6182006-10-11 01:21:03 -07001083 }
1084
1085 /*
Randy Dunlapd0d856e2006-10-11 01:21:07 -07001086 * If error occurs, then we break processing
1087 * and mark filesystem read-only. index won't
Alex Tomasa86c6182006-10-11 01:21:03 -07001088 * be inserted and tree will be in consistent
Randy Dunlapd0d856e2006-10-11 01:21:07 -07001089 * state. Next mount will repair buffers too.
Alex Tomasa86c6182006-10-11 01:21:03 -07001090 */
1091
1092 /*
Randy Dunlapd0d856e2006-10-11 01:21:07 -07001093 * Get array to track all allocated blocks.
1094 * We need this to handle errors and free blocks
1095 * upon them.
Alex Tomasa86c6182006-10-11 01:21:03 -07001096 */
Theodore Ts'o73c384c02020-05-07 10:50:28 -07001097 ablocks = kcalloc(depth, sizeof(ext4_fsblk_t), gfp_flags);
Alex Tomasa86c6182006-10-11 01:21:03 -07001098 if (!ablocks)
1099 return -ENOMEM;
Alex Tomasa86c6182006-10-11 01:21:03 -07001100
1101 /* allocate all needed blocks */
Ritesh Harjani70aa1552020-05-10 11:54:55 +05301102 ext_debug(inode, "allocate %d blocks for indexes/leaf\n", depth - at);
Alex Tomasa86c6182006-10-11 01:21:03 -07001103 for (a = 0; a < depth - at; a++) {
Aneesh Kumar K.V654b4902008-07-11 19:27:31 -04001104 newblock = ext4_ext_new_meta_block(handle, inode, path,
Allison Henderson55f020d2011-05-25 07:41:26 -04001105 newext, &err, flags);
Alex Tomasa86c6182006-10-11 01:21:03 -07001106 if (newblock == 0)
1107 goto cleanup;
1108 ablocks[a] = newblock;
1109 }
1110
1111 /* initialize new leaf */
1112 newblock = ablocks[--a];
Frank Mayhar273df552010-03-02 11:46:09 -05001113 if (unlikely(newblock == 0)) {
1114 EXT4_ERROR_INODE(inode, "newblock == 0!");
Darrick J. Wong6a797d22015-10-17 16:16:04 -04001115 err = -EFSCORRUPTED;
Frank Mayhar273df552010-03-02 11:46:09 -05001116 goto cleanup;
1117 }
Nikolay Borisovc45653c2015-07-02 01:34:07 -04001118 bh = sb_getblk_gfp(inode->i_sb, newblock, __GFP_MOVABLE | GFP_NOFS);
Wang Shilongaebf0242013-01-12 16:28:47 -05001119 if (unlikely(!bh)) {
Theodore Ts'o860d21e2013-01-12 16:19:36 -05001120 err = -ENOMEM;
Alex Tomasa86c6182006-10-11 01:21:03 -07001121 goto cleanup;
1122 }
1123 lock_buffer(bh);
1124
Jan Kara188c2992021-08-16 11:57:04 +02001125 err = ext4_journal_get_create_access(handle, inode->i_sb, bh,
1126 EXT4_JTR_NONE);
Avantika Mathur7e028972006-12-06 20:41:33 -08001127 if (err)
Alex Tomasa86c6182006-10-11 01:21:03 -07001128 goto cleanup;
1129
1130 neh = ext_block_hdr(bh);
1131 neh->eh_entries = 0;
Theodore Ts'o55ad63b2009-08-28 10:40:33 -04001132 neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode, 0));
Alex Tomasa86c6182006-10-11 01:21:03 -07001133 neh->eh_magic = EXT4_EXT_MAGIC;
1134 neh->eh_depth = 0;
Anirudh Rayabharamce3aba42021-05-07 00:26:54 +05301135 neh->eh_generation = 0;
Alex Tomasa86c6182006-10-11 01:21:03 -07001136
Randy Dunlapd0d856e2006-10-11 01:21:07 -07001137 /* move remainder of path[depth] to the new leaf */
Frank Mayhar273df552010-03-02 11:46:09 -05001138 if (unlikely(path[depth].p_hdr->eh_entries !=
1139 path[depth].p_hdr->eh_max)) {
1140 EXT4_ERROR_INODE(inode, "eh_entries %d != eh_max %d!",
1141 path[depth].p_hdr->eh_entries,
1142 path[depth].p_hdr->eh_max);
Darrick J. Wong6a797d22015-10-17 16:16:04 -04001143 err = -EFSCORRUPTED;
Frank Mayhar273df552010-03-02 11:46:09 -05001144 goto cleanup;
1145 }
Alex Tomasa86c6182006-10-11 01:21:03 -07001146 /* start copy from next extent */
Yongqiang Yang1b16da72011-05-25 17:41:48 -04001147 m = EXT_MAX_EXTENT(path[depth].p_hdr) - path[depth].p_ext++;
1148 ext4_ext_show_move(inode, path, newblock, depth);
Alex Tomasa86c6182006-10-11 01:21:03 -07001149 if (m) {
Yongqiang Yang1b16da72011-05-25 17:41:48 -04001150 struct ext4_extent *ex;
1151 ex = EXT_FIRST_EXTENT(neh);
1152 memmove(ex, path[depth].p_ext, sizeof(struct ext4_extent) * m);
Marcin Slusarze8546d02008-04-17 10:38:59 -04001153 le16_add_cpu(&neh->eh_entries, m);
Alex Tomasa86c6182006-10-11 01:21:03 -07001154 }
1155
Sriram Rajagopalan592acbf2019-05-10 19:28:06 -04001156 /* zero out unused area in the extent block */
1157 ext_size = sizeof(struct ext4_extent_header) +
1158 sizeof(struct ext4_extent) * le16_to_cpu(neh->eh_entries);
1159 memset(bh->b_data + ext_size, 0, inode->i_sb->s_blocksize - ext_size);
Darrick J. Wong7ac59902012-04-29 18:37:10 -04001160 ext4_extent_block_csum_set(inode, neh);
Alex Tomasa86c6182006-10-11 01:21:03 -07001161 set_buffer_uptodate(bh);
1162 unlock_buffer(bh);
1163
Frank Mayhar03901312009-01-07 00:06:22 -05001164 err = ext4_handle_dirty_metadata(handle, inode, bh);
Avantika Mathur7e028972006-12-06 20:41:33 -08001165 if (err)
Alex Tomasa86c6182006-10-11 01:21:03 -07001166 goto cleanup;
1167 brelse(bh);
1168 bh = NULL;
1169
1170 /* correct old leaf */
1171 if (m) {
Avantika Mathur7e028972006-12-06 20:41:33 -08001172 err = ext4_ext_get_access(handle, inode, path + depth);
1173 if (err)
Alex Tomasa86c6182006-10-11 01:21:03 -07001174 goto cleanup;
Marcin Slusarze8546d02008-04-17 10:38:59 -04001175 le16_add_cpu(&path[depth].p_hdr->eh_entries, -m);
Avantika Mathur7e028972006-12-06 20:41:33 -08001176 err = ext4_ext_dirty(handle, inode, path + depth);
1177 if (err)
Alex Tomasa86c6182006-10-11 01:21:03 -07001178 goto cleanup;
1179
1180 }
1181
1182 /* create intermediate indexes */
1183 k = depth - at - 1;
Frank Mayhar273df552010-03-02 11:46:09 -05001184 if (unlikely(k < 0)) {
1185 EXT4_ERROR_INODE(inode, "k %d < 0!", k);
Darrick J. Wong6a797d22015-10-17 16:16:04 -04001186 err = -EFSCORRUPTED;
Frank Mayhar273df552010-03-02 11:46:09 -05001187 goto cleanup;
1188 }
Alex Tomasa86c6182006-10-11 01:21:03 -07001189 if (k)
Ritesh Harjani70aa1552020-05-10 11:54:55 +05301190 ext_debug(inode, "create %d intermediate indices\n", k);
Alex Tomasa86c6182006-10-11 01:21:03 -07001191 /* insert new index into current index block */
1192 /* current depth stored in i var */
1193 i = depth - 1;
1194 while (k--) {
1195 oldblock = newblock;
1196 newblock = ablocks[--a];
Eric Sandeenbba90742008-01-28 23:58:27 -05001197 bh = sb_getblk(inode->i_sb, newblock);
Wang Shilongaebf0242013-01-12 16:28:47 -05001198 if (unlikely(!bh)) {
Theodore Ts'o860d21e2013-01-12 16:19:36 -05001199 err = -ENOMEM;
Alex Tomasa86c6182006-10-11 01:21:03 -07001200 goto cleanup;
1201 }
1202 lock_buffer(bh);
1203
Jan Kara188c2992021-08-16 11:57:04 +02001204 err = ext4_journal_get_create_access(handle, inode->i_sb, bh,
1205 EXT4_JTR_NONE);
Avantika Mathur7e028972006-12-06 20:41:33 -08001206 if (err)
Alex Tomasa86c6182006-10-11 01:21:03 -07001207 goto cleanup;
1208
1209 neh = ext_block_hdr(bh);
1210 neh->eh_entries = cpu_to_le16(1);
1211 neh->eh_magic = EXT4_EXT_MAGIC;
Theodore Ts'o55ad63b2009-08-28 10:40:33 -04001212 neh->eh_max = cpu_to_le16(ext4_ext_space_block_idx(inode, 0));
Alex Tomasa86c6182006-10-11 01:21:03 -07001213 neh->eh_depth = cpu_to_le16(depth - i);
Anirudh Rayabharamce3aba42021-05-07 00:26:54 +05301214 neh->eh_generation = 0;
Alex Tomasa86c6182006-10-11 01:21:03 -07001215 fidx = EXT_FIRST_INDEX(neh);
1216 fidx->ei_block = border;
Alex Tomasf65e6fb2006-10-11 01:21:05 -07001217 ext4_idx_store_pblock(fidx, oldblock);
Alex Tomasa86c6182006-10-11 01:21:03 -07001218
Ritesh Harjani70aa1552020-05-10 11:54:55 +05301219 ext_debug(inode, "int.index at %d (block %llu): %u -> %llu\n",
Eric Sandeenbba90742008-01-28 23:58:27 -05001220 i, newblock, le32_to_cpu(border), oldblock);
Alex Tomasa86c6182006-10-11 01:21:03 -07001221
Yongqiang Yang1b16da72011-05-25 17:41:48 -04001222 /* move remainder of path[i] to the new index block */
Frank Mayhar273df552010-03-02 11:46:09 -05001223 if (unlikely(EXT_MAX_INDEX(path[i].p_hdr) !=
1224 EXT_LAST_INDEX(path[i].p_hdr))) {
1225 EXT4_ERROR_INODE(inode,
1226 "EXT_MAX_INDEX != EXT_LAST_INDEX ee_block %d!",
1227 le32_to_cpu(path[i].p_ext->ee_block));
Darrick J. Wong6a797d22015-10-17 16:16:04 -04001228 err = -EFSCORRUPTED;
Frank Mayhar273df552010-03-02 11:46:09 -05001229 goto cleanup;
1230 }
Yongqiang Yang1b16da72011-05-25 17:41:48 -04001231 /* start copy indexes */
1232 m = EXT_MAX_INDEX(path[i].p_hdr) - path[i].p_idx++;
Ritesh Harjani70aa1552020-05-10 11:54:55 +05301233 ext_debug(inode, "cur 0x%p, last 0x%p\n", path[i].p_idx,
Yongqiang Yang1b16da72011-05-25 17:41:48 -04001234 EXT_MAX_INDEX(path[i].p_hdr));
1235 ext4_ext_show_move(inode, path, newblock, i);
Alex Tomasa86c6182006-10-11 01:21:03 -07001236 if (m) {
Yongqiang Yang1b16da72011-05-25 17:41:48 -04001237 memmove(++fidx, path[i].p_idx,
Alex Tomasa86c6182006-10-11 01:21:03 -07001238 sizeof(struct ext4_extent_idx) * m);
Marcin Slusarze8546d02008-04-17 10:38:59 -04001239 le16_add_cpu(&neh->eh_entries, m);
Alex Tomasa86c6182006-10-11 01:21:03 -07001240 }
Sriram Rajagopalan592acbf2019-05-10 19:28:06 -04001241 /* zero out unused area in the extent block */
1242 ext_size = sizeof(struct ext4_extent_header) +
1243 (sizeof(struct ext4_extent) * le16_to_cpu(neh->eh_entries));
1244 memset(bh->b_data + ext_size, 0,
1245 inode->i_sb->s_blocksize - ext_size);
Darrick J. Wong7ac59902012-04-29 18:37:10 -04001246 ext4_extent_block_csum_set(inode, neh);
Alex Tomasa86c6182006-10-11 01:21:03 -07001247 set_buffer_uptodate(bh);
1248 unlock_buffer(bh);
1249
Frank Mayhar03901312009-01-07 00:06:22 -05001250 err = ext4_handle_dirty_metadata(handle, inode, bh);
Avantika Mathur7e028972006-12-06 20:41:33 -08001251 if (err)
Alex Tomasa86c6182006-10-11 01:21:03 -07001252 goto cleanup;
1253 brelse(bh);
1254 bh = NULL;
1255
1256 /* correct old index */
1257 if (m) {
1258 err = ext4_ext_get_access(handle, inode, path + i);
1259 if (err)
1260 goto cleanup;
Marcin Slusarze8546d02008-04-17 10:38:59 -04001261 le16_add_cpu(&path[i].p_hdr->eh_entries, -m);
Alex Tomasa86c6182006-10-11 01:21:03 -07001262 err = ext4_ext_dirty(handle, inode, path + i);
1263 if (err)
1264 goto cleanup;
1265 }
1266
1267 i--;
1268 }
1269
1270 /* insert new index */
Alex Tomasa86c6182006-10-11 01:21:03 -07001271 err = ext4_ext_insert_index(handle, inode, path + at,
1272 le32_to_cpu(border), newblock);
1273
1274cleanup:
1275 if (bh) {
1276 if (buffer_locked(bh))
1277 unlock_buffer(bh);
1278 brelse(bh);
1279 }
1280
1281 if (err) {
1282 /* free all allocated blocks in error case */
1283 for (i = 0; i < depth; i++) {
1284 if (!ablocks[i])
1285 continue;
Peter Huewe7dc57612011-02-21 21:01:42 -05001286 ext4_free_blocks(handle, inode, NULL, ablocks[i], 1,
Theodore Ts'oe6362602009-11-23 07:17:05 -05001287 EXT4_FREE_BLOCKS_METADATA);
Alex Tomasa86c6182006-10-11 01:21:03 -07001288 }
1289 }
1290 kfree(ablocks);
1291
1292 return err;
1293}
1294
1295/*
Randy Dunlapd0d856e2006-10-11 01:21:07 -07001296 * ext4_ext_grow_indepth:
1297 * implements tree growing procedure:
1298 * - allocates new block
1299 * - moves top-level data (index block or leaf) into the new block
1300 * - initializes new top-level, creating index that points to the
1301 * just created block
Alex Tomasa86c6182006-10-11 01:21:03 -07001302 */
1303static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
Dmitry Monakhovbe5cd902014-10-01 22:57:09 -04001304 unsigned int flags)
Alex Tomasa86c6182006-10-11 01:21:03 -07001305{
Alex Tomasa86c6182006-10-11 01:21:03 -07001306 struct ext4_extent_header *neh;
Alex Tomasa86c6182006-10-11 01:21:03 -07001307 struct buffer_head *bh;
Dmitry Monakhovbe5cd902014-10-01 22:57:09 -04001308 ext4_fsblk_t newblock, goal = 0;
1309 struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
Alex Tomasa86c6182006-10-11 01:21:03 -07001310 int err = 0;
Sriram Rajagopalan592acbf2019-05-10 19:28:06 -04001311 size_t ext_size = 0;
Alex Tomasa86c6182006-10-11 01:21:03 -07001312
Dmitry Monakhovbe5cd902014-10-01 22:57:09 -04001313 /* Try to prepend new index to old one */
1314 if (ext_depth(inode))
1315 goal = ext4_idx_pblock(EXT_FIRST_INDEX(ext_inode_hdr(inode)));
1316 if (goal > le32_to_cpu(es->s_first_data_block)) {
1317 flags |= EXT4_MB_HINT_TRY_GOAL;
1318 goal--;
1319 } else
1320 goal = ext4_inode_to_goal_block(inode);
1321 newblock = ext4_new_meta_blocks(handle, inode, goal, flags,
1322 NULL, &err);
Alex Tomasa86c6182006-10-11 01:21:03 -07001323 if (newblock == 0)
1324 return err;
1325
Nikolay Borisovc45653c2015-07-02 01:34:07 -04001326 bh = sb_getblk_gfp(inode->i_sb, newblock, __GFP_MOVABLE | GFP_NOFS);
Wang Shilongaebf0242013-01-12 16:28:47 -05001327 if (unlikely(!bh))
Theodore Ts'o860d21e2013-01-12 16:19:36 -05001328 return -ENOMEM;
Alex Tomasa86c6182006-10-11 01:21:03 -07001329 lock_buffer(bh);
1330
Jan Kara188c2992021-08-16 11:57:04 +02001331 err = ext4_journal_get_create_access(handle, inode->i_sb, bh,
1332 EXT4_JTR_NONE);
Avantika Mathur7e028972006-12-06 20:41:33 -08001333 if (err) {
Alex Tomasa86c6182006-10-11 01:21:03 -07001334 unlock_buffer(bh);
1335 goto out;
1336 }
1337
Sriram Rajagopalan592acbf2019-05-10 19:28:06 -04001338 ext_size = sizeof(EXT4_I(inode)->i_data);
Alex Tomasa86c6182006-10-11 01:21:03 -07001339 /* move top-level index/leaf into new block */
Sriram Rajagopalan592acbf2019-05-10 19:28:06 -04001340 memmove(bh->b_data, EXT4_I(inode)->i_data, ext_size);
1341 /* zero out unused area in the extent block */
1342 memset(bh->b_data + ext_size, 0, inode->i_sb->s_blocksize - ext_size);
Alex Tomasa86c6182006-10-11 01:21:03 -07001343
1344 /* set size of new block */
1345 neh = ext_block_hdr(bh);
1346 /* old root could have indexes or leaves
1347 * so calculate e_max right way */
1348 if (ext_depth(inode))
Theodore Ts'o55ad63b2009-08-28 10:40:33 -04001349 neh->eh_max = cpu_to_le16(ext4_ext_space_block_idx(inode, 0));
Alex Tomasa86c6182006-10-11 01:21:03 -07001350 else
Theodore Ts'o55ad63b2009-08-28 10:40:33 -04001351 neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode, 0));
Alex Tomasa86c6182006-10-11 01:21:03 -07001352 neh->eh_magic = EXT4_EXT_MAGIC;
Darrick J. Wong7ac59902012-04-29 18:37:10 -04001353 ext4_extent_block_csum_set(inode, neh);
Alex Tomasa86c6182006-10-11 01:21:03 -07001354 set_buffer_uptodate(bh);
yangerkun0caaefb2021-06-09 15:55:45 +08001355 set_buffer_verified(bh);
Alex Tomasa86c6182006-10-11 01:21:03 -07001356 unlock_buffer(bh);
1357
Frank Mayhar03901312009-01-07 00:06:22 -05001358 err = ext4_handle_dirty_metadata(handle, inode, bh);
Avantika Mathur7e028972006-12-06 20:41:33 -08001359 if (err)
Alex Tomasa86c6182006-10-11 01:21:03 -07001360 goto out;
1361
Dmitry Monakhov1939dd82011-10-22 01:26:05 -04001362 /* Update top-level index: num,max,pointer */
Alex Tomasa86c6182006-10-11 01:21:03 -07001363 neh = ext_inode_hdr(inode);
Dmitry Monakhov1939dd82011-10-22 01:26:05 -04001364 neh->eh_entries = cpu_to_le16(1);
1365 ext4_idx_store_pblock(EXT_FIRST_INDEX(neh), newblock);
1366 if (neh->eh_depth == 0) {
1367 /* Root extent block becomes index block */
1368 neh->eh_max = cpu_to_le16(ext4_ext_space_root_idx(inode, 0));
1369 EXT_FIRST_INDEX(neh)->ei_block =
1370 EXT_FIRST_EXTENT(neh)->ee_block;
1371 }
Ritesh Harjani70aa1552020-05-10 11:54:55 +05301372 ext_debug(inode, "new root: num %d(%d), lblock %d, ptr %llu\n",
Alex Tomasa86c6182006-10-11 01:21:03 -07001373 le16_to_cpu(neh->eh_entries), le16_to_cpu(neh->eh_max),
Andi Kleen5a0790c2010-06-14 13:28:03 -04001374 le32_to_cpu(EXT_FIRST_INDEX(neh)->ei_block),
Theodore Ts'obf89d162010-10-27 21:30:14 -04001375 ext4_idx_pblock(EXT_FIRST_INDEX(neh)));
Alex Tomasa86c6182006-10-11 01:21:03 -07001376
Wei Yongjunba39ebb2012-09-27 09:37:53 -04001377 le16_add_cpu(&neh->eh_depth, 1);
Harshad Shirwadkar4209ae12020-04-26 18:34:37 -07001378 err = ext4_mark_inode_dirty(handle, inode);
Alex Tomasa86c6182006-10-11 01:21:03 -07001379out:
1380 brelse(bh);
1381
1382 return err;
1383}
1384
1385/*
Randy Dunlapd0d856e2006-10-11 01:21:07 -07001386 * ext4_ext_create_new_leaf:
1387 * finds empty index and adds new leaf.
1388 * if no free index is found, then it requests in-depth growing.
Alex Tomasa86c6182006-10-11 01:21:03 -07001389 */
1390static int ext4_ext_create_new_leaf(handle_t *handle, struct inode *inode,
Theodore Ts'o107a7bd2013-08-16 21:23:41 -04001391 unsigned int mb_flags,
1392 unsigned int gb_flags,
Theodore Ts'odfe50802014-09-01 14:37:09 -04001393 struct ext4_ext_path **ppath,
Allison Henderson55f020d2011-05-25 07:41:26 -04001394 struct ext4_extent *newext)
Alex Tomasa86c6182006-10-11 01:21:03 -07001395{
Theodore Ts'odfe50802014-09-01 14:37:09 -04001396 struct ext4_ext_path *path = *ppath;
Alex Tomasa86c6182006-10-11 01:21:03 -07001397 struct ext4_ext_path *curp;
1398 int depth, i, err = 0;
1399
1400repeat:
1401 i = depth = ext_depth(inode);
1402
1403 /* walk up to the tree and look for free index entry */
1404 curp = path + depth;
1405 while (i > 0 && !EXT_HAS_FREE_INDEX(curp)) {
1406 i--;
1407 curp--;
1408 }
1409
Randy Dunlapd0d856e2006-10-11 01:21:07 -07001410 /* we use already allocated block for index block,
1411 * so subsequent data blocks should be contiguous */
Alex Tomasa86c6182006-10-11 01:21:03 -07001412 if (EXT_HAS_FREE_INDEX(curp)) {
1413 /* if we found index with free entry, then use that
1414 * entry: create all needed subtree and add new leaf */
Theodore Ts'o107a7bd2013-08-16 21:23:41 -04001415 err = ext4_ext_split(handle, inode, mb_flags, path, newext, i);
Shen Feng787e0982008-07-11 19:27:31 -04001416 if (err)
1417 goto out;
Alex Tomasa86c6182006-10-11 01:21:03 -07001418
1419 /* refill path */
Theodore Ts'oed8a1a72014-09-01 14:43:09 -04001420 path = ext4_find_extent(inode,
Aneesh Kumar K.V725d26d2008-01-28 23:58:27 -05001421 (ext4_lblk_t)le32_to_cpu(newext->ee_block),
Theodore Ts'odfe50802014-09-01 14:37:09 -04001422 ppath, gb_flags);
Alex Tomasa86c6182006-10-11 01:21:03 -07001423 if (IS_ERR(path))
1424 err = PTR_ERR(path);
1425 } else {
1426 /* tree is full, time to grow in depth */
Dmitry Monakhovbe5cd902014-10-01 22:57:09 -04001427 err = ext4_ext_grow_indepth(handle, inode, mb_flags);
Alex Tomasa86c6182006-10-11 01:21:03 -07001428 if (err)
1429 goto out;
1430
1431 /* refill path */
Theodore Ts'oed8a1a72014-09-01 14:43:09 -04001432 path = ext4_find_extent(inode,
Aneesh Kumar K.V725d26d2008-01-28 23:58:27 -05001433 (ext4_lblk_t)le32_to_cpu(newext->ee_block),
Theodore Ts'odfe50802014-09-01 14:37:09 -04001434 ppath, gb_flags);
Alex Tomasa86c6182006-10-11 01:21:03 -07001435 if (IS_ERR(path)) {
1436 err = PTR_ERR(path);
1437 goto out;
1438 }
1439
1440 /*
Randy Dunlapd0d856e2006-10-11 01:21:07 -07001441 * only first (depth 0 -> 1) produces free space;
1442 * in all other cases we have to split the grown tree
Alex Tomasa86c6182006-10-11 01:21:03 -07001443 */
1444 depth = ext_depth(inode);
1445 if (path[depth].p_hdr->eh_entries == path[depth].p_hdr->eh_max) {
Randy Dunlapd0d856e2006-10-11 01:21:07 -07001446 /* now we need to split */
Alex Tomasa86c6182006-10-11 01:21:03 -07001447 goto repeat;
1448 }
1449 }
1450
1451out:
1452 return err;
1453}
1454
1455/*
Alex Tomas1988b512008-01-28 23:58:27 -05001456 * search the closest allocated block to the left for *logical
1457 * and returns it at @logical + it's physical address at @phys
1458 * if *logical is the smallest allocated block, the function
1459 * returns 0 at @phys
1460 * return value contains 0 (success) or error code
1461 */
Theodore Ts'o1f109d52010-10-27 21:30:14 -04001462static int ext4_ext_search_left(struct inode *inode,
1463 struct ext4_ext_path *path,
1464 ext4_lblk_t *logical, ext4_fsblk_t *phys)
Alex Tomas1988b512008-01-28 23:58:27 -05001465{
1466 struct ext4_extent_idx *ix;
1467 struct ext4_extent *ex;
Aneesh Kumar K.Vb939e372008-01-28 23:58:27 -05001468 int depth, ee_len;
Alex Tomas1988b512008-01-28 23:58:27 -05001469
Frank Mayhar273df552010-03-02 11:46:09 -05001470 if (unlikely(path == NULL)) {
1471 EXT4_ERROR_INODE(inode, "path == NULL *logical %d!", *logical);
Darrick J. Wong6a797d22015-10-17 16:16:04 -04001472 return -EFSCORRUPTED;
Frank Mayhar273df552010-03-02 11:46:09 -05001473 }
Alex Tomas1988b512008-01-28 23:58:27 -05001474 depth = path->p_depth;
1475 *phys = 0;
1476
1477 if (depth == 0 && path->p_ext == NULL)
1478 return 0;
1479
1480 /* usually extent in the path covers blocks smaller
1481 * then *logical, but it can be that extent is the
1482 * first one in the file */
1483
1484 ex = path[depth].p_ext;
Aneesh Kumar K.Vb939e372008-01-28 23:58:27 -05001485 ee_len = ext4_ext_get_actual_len(ex);
Alex Tomas1988b512008-01-28 23:58:27 -05001486 if (*logical < le32_to_cpu(ex->ee_block)) {
Frank Mayhar273df552010-03-02 11:46:09 -05001487 if (unlikely(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex)) {
1488 EXT4_ERROR_INODE(inode,
1489 "EXT_FIRST_EXTENT != ex *logical %d ee_block %d!",
1490 *logical, le32_to_cpu(ex->ee_block));
Darrick J. Wong6a797d22015-10-17 16:16:04 -04001491 return -EFSCORRUPTED;
Frank Mayhar273df552010-03-02 11:46:09 -05001492 }
Alex Tomas1988b512008-01-28 23:58:27 -05001493 while (--depth >= 0) {
1494 ix = path[depth].p_idx;
Frank Mayhar273df552010-03-02 11:46:09 -05001495 if (unlikely(ix != EXT_FIRST_INDEX(path[depth].p_hdr))) {
1496 EXT4_ERROR_INODE(inode,
1497 "ix (%d) != EXT_FIRST_INDEX (%d) (depth %d)!",
Tao Ma6ee3b212011-10-08 16:08:34 -04001498 ix != NULL ? le32_to_cpu(ix->ei_block) : 0,
Adam Borowski037e7c52021-11-15 18:20:20 +01001499 le32_to_cpu(EXT_FIRST_INDEX(path[depth].p_hdr)->ei_block),
Frank Mayhar273df552010-03-02 11:46:09 -05001500 depth);
Darrick J. Wong6a797d22015-10-17 16:16:04 -04001501 return -EFSCORRUPTED;
Frank Mayhar273df552010-03-02 11:46:09 -05001502 }
Alex Tomas1988b512008-01-28 23:58:27 -05001503 }
1504 return 0;
1505 }
1506
Frank Mayhar273df552010-03-02 11:46:09 -05001507 if (unlikely(*logical < (le32_to_cpu(ex->ee_block) + ee_len))) {
1508 EXT4_ERROR_INODE(inode,
1509 "logical %d < ee_block %d + ee_len %d!",
1510 *logical, le32_to_cpu(ex->ee_block), ee_len);
Darrick J. Wong6a797d22015-10-17 16:16:04 -04001511 return -EFSCORRUPTED;
Frank Mayhar273df552010-03-02 11:46:09 -05001512 }
Alex Tomas1988b512008-01-28 23:58:27 -05001513
Aneesh Kumar K.Vb939e372008-01-28 23:58:27 -05001514 *logical = le32_to_cpu(ex->ee_block) + ee_len - 1;
Theodore Ts'obf89d162010-10-27 21:30:14 -04001515 *phys = ext4_ext_pblock(ex) + ee_len - 1;
Alex Tomas1988b512008-01-28 23:58:27 -05001516 return 0;
1517}
1518
1519/*
yangerkund7dce9e2020-10-28 13:56:17 +08001520 * Search the closest allocated block to the right for *logical
1521 * and returns it at @logical + it's physical address at @phys.
1522 * If not exists, return 0 and @phys is set to 0. We will return
1523 * 1 which means we found an allocated block and ret_ex is valid.
1524 * Or return a (< 0) error code.
Alex Tomas1988b512008-01-28 23:58:27 -05001525 */
Theodore Ts'o1f109d52010-10-27 21:30:14 -04001526static int ext4_ext_search_right(struct inode *inode,
1527 struct ext4_ext_path *path,
Theodore Ts'o4d33b1e2011-09-09 18:52:51 -04001528 ext4_lblk_t *logical, ext4_fsblk_t *phys,
yangerkund7dce9e2020-10-28 13:56:17 +08001529 struct ext4_extent *ret_ex)
Alex Tomas1988b512008-01-28 23:58:27 -05001530{
1531 struct buffer_head *bh = NULL;
1532 struct ext4_extent_header *eh;
1533 struct ext4_extent_idx *ix;
1534 struct ext4_extent *ex;
Eric Sandeen395a87b2009-03-10 18:18:47 -04001535 int depth; /* Note, NOT eh_depth; depth from top of tree */
1536 int ee_len;
Alex Tomas1988b512008-01-28 23:58:27 -05001537
Frank Mayhar273df552010-03-02 11:46:09 -05001538 if (unlikely(path == NULL)) {
1539 EXT4_ERROR_INODE(inode, "path == NULL *logical %d!", *logical);
Darrick J. Wong6a797d22015-10-17 16:16:04 -04001540 return -EFSCORRUPTED;
Frank Mayhar273df552010-03-02 11:46:09 -05001541 }
Alex Tomas1988b512008-01-28 23:58:27 -05001542 depth = path->p_depth;
1543 *phys = 0;
1544
1545 if (depth == 0 && path->p_ext == NULL)
1546 return 0;
1547
1548 /* usually extent in the path covers blocks smaller
1549 * then *logical, but it can be that extent is the
1550 * first one in the file */
1551
1552 ex = path[depth].p_ext;
Aneesh Kumar K.Vb939e372008-01-28 23:58:27 -05001553 ee_len = ext4_ext_get_actual_len(ex);
Alex Tomas1988b512008-01-28 23:58:27 -05001554 if (*logical < le32_to_cpu(ex->ee_block)) {
Frank Mayhar273df552010-03-02 11:46:09 -05001555 if (unlikely(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex)) {
1556 EXT4_ERROR_INODE(inode,
1557 "first_extent(path[%d].p_hdr) != ex",
1558 depth);
Darrick J. Wong6a797d22015-10-17 16:16:04 -04001559 return -EFSCORRUPTED;
Frank Mayhar273df552010-03-02 11:46:09 -05001560 }
Alex Tomas1988b512008-01-28 23:58:27 -05001561 while (--depth >= 0) {
1562 ix = path[depth].p_idx;
Frank Mayhar273df552010-03-02 11:46:09 -05001563 if (unlikely(ix != EXT_FIRST_INDEX(path[depth].p_hdr))) {
1564 EXT4_ERROR_INODE(inode,
1565 "ix != EXT_FIRST_INDEX *logical %d!",
1566 *logical);
Darrick J. Wong6a797d22015-10-17 16:16:04 -04001567 return -EFSCORRUPTED;
Frank Mayhar273df552010-03-02 11:46:09 -05001568 }
Alex Tomas1988b512008-01-28 23:58:27 -05001569 }
Theodore Ts'o4d33b1e2011-09-09 18:52:51 -04001570 goto found_extent;
Alex Tomas1988b512008-01-28 23:58:27 -05001571 }
1572
Frank Mayhar273df552010-03-02 11:46:09 -05001573 if (unlikely(*logical < (le32_to_cpu(ex->ee_block) + ee_len))) {
1574 EXT4_ERROR_INODE(inode,
1575 "logical %d < ee_block %d + ee_len %d!",
1576 *logical, le32_to_cpu(ex->ee_block), ee_len);
Darrick J. Wong6a797d22015-10-17 16:16:04 -04001577 return -EFSCORRUPTED;
Frank Mayhar273df552010-03-02 11:46:09 -05001578 }
Alex Tomas1988b512008-01-28 23:58:27 -05001579
1580 if (ex != EXT_LAST_EXTENT(path[depth].p_hdr)) {
1581 /* next allocated block in this leaf */
1582 ex++;
Theodore Ts'o4d33b1e2011-09-09 18:52:51 -04001583 goto found_extent;
Alex Tomas1988b512008-01-28 23:58:27 -05001584 }
1585
1586 /* go up and search for index to the right */
1587 while (--depth >= 0) {
1588 ix = path[depth].p_idx;
1589 if (ix != EXT_LAST_INDEX(path[depth].p_hdr))
Wu Fengguang25f1ee32008-11-25 17:24:23 -05001590 goto got_index;
Alex Tomas1988b512008-01-28 23:58:27 -05001591 }
1592
Wu Fengguang25f1ee32008-11-25 17:24:23 -05001593 /* we've gone up to the root and found no index to the right */
1594 return 0;
Alex Tomas1988b512008-01-28 23:58:27 -05001595
Wu Fengguang25f1ee32008-11-25 17:24:23 -05001596got_index:
Alex Tomas1988b512008-01-28 23:58:27 -05001597 /* we've found index to the right, let's
1598 * follow it and find the closest allocated
1599 * block to the right */
1600 ix++;
Alex Tomas1988b512008-01-28 23:58:27 -05001601 while (++depth < path->p_depth) {
Eric Sandeen395a87b2009-03-10 18:18:47 -04001602 /* subtract from p_depth to get proper eh_depth */
Zhang Yi9c6e0712021-09-08 20:08:49 +08001603 bh = read_extent_tree_block(inode, ix, path->p_depth - depth, 0);
Theodore Ts'o7d7ea892013-08-16 21:20:41 -04001604 if (IS_ERR(bh))
1605 return PTR_ERR(bh);
1606 eh = ext_block_hdr(bh);
Alex Tomas1988b512008-01-28 23:58:27 -05001607 ix = EXT_FIRST_INDEX(eh);
Alex Tomas1988b512008-01-28 23:58:27 -05001608 put_bh(bh);
1609 }
1610
Zhang Yi9c6e0712021-09-08 20:08:49 +08001611 bh = read_extent_tree_block(inode, ix, path->p_depth - depth, 0);
Theodore Ts'o7d7ea892013-08-16 21:20:41 -04001612 if (IS_ERR(bh))
1613 return PTR_ERR(bh);
Alex Tomas1988b512008-01-28 23:58:27 -05001614 eh = ext_block_hdr(bh);
Alex Tomas1988b512008-01-28 23:58:27 -05001615 ex = EXT_FIRST_EXTENT(eh);
Theodore Ts'o4d33b1e2011-09-09 18:52:51 -04001616found_extent:
Alex Tomas1988b512008-01-28 23:58:27 -05001617 *logical = le32_to_cpu(ex->ee_block);
Theodore Ts'obf89d162010-10-27 21:30:14 -04001618 *phys = ext4_ext_pblock(ex);
yangerkund7dce9e2020-10-28 13:56:17 +08001619 if (ret_ex)
1620 *ret_ex = *ex;
Theodore Ts'o4d33b1e2011-09-09 18:52:51 -04001621 if (bh)
1622 put_bh(bh);
yangerkund7dce9e2020-10-28 13:56:17 +08001623 return 1;
Alex Tomas1988b512008-01-28 23:58:27 -05001624}
1625
1626/*
Randy Dunlapd0d856e2006-10-11 01:21:07 -07001627 * ext4_ext_next_allocated_block:
Lukas Czernerf17722f2011-06-06 00:05:17 -04001628 * returns allocated block in subsequent extent or EXT_MAX_BLOCKS.
Randy Dunlapd0d856e2006-10-11 01:21:07 -07001629 * NOTE: it considers block number from index entry as
1630 * allocated block. Thus, index entries have to be consistent
1631 * with leaves.
Alex Tomasa86c6182006-10-11 01:21:03 -07001632 */
Dmitry Monakhovfcf6b1b72014-08-30 23:52:19 -04001633ext4_lblk_t
Alex Tomasa86c6182006-10-11 01:21:03 -07001634ext4_ext_next_allocated_block(struct ext4_ext_path *path)
1635{
1636 int depth;
1637
1638 BUG_ON(path == NULL);
1639 depth = path->p_depth;
1640
1641 if (depth == 0 && path->p_ext == NULL)
Lukas Czernerf17722f2011-06-06 00:05:17 -04001642 return EXT_MAX_BLOCKS;
Alex Tomasa86c6182006-10-11 01:21:03 -07001643
1644 while (depth >= 0) {
Eric Biggers6e89bbb2019-12-31 12:04:43 -06001645 struct ext4_ext_path *p = &path[depth];
1646
Alex Tomasa86c6182006-10-11 01:21:03 -07001647 if (depth == path->p_depth) {
1648 /* leaf */
Eric Biggers6e89bbb2019-12-31 12:04:43 -06001649 if (p->p_ext && p->p_ext != EXT_LAST_EXTENT(p->p_hdr))
1650 return le32_to_cpu(p->p_ext[1].ee_block);
Alex Tomasa86c6182006-10-11 01:21:03 -07001651 } else {
1652 /* index */
Eric Biggers6e89bbb2019-12-31 12:04:43 -06001653 if (p->p_idx != EXT_LAST_INDEX(p->p_hdr))
1654 return le32_to_cpu(p->p_idx[1].ei_block);
Alex Tomasa86c6182006-10-11 01:21:03 -07001655 }
1656 depth--;
1657 }
1658
Lukas Czernerf17722f2011-06-06 00:05:17 -04001659 return EXT_MAX_BLOCKS;
Alex Tomasa86c6182006-10-11 01:21:03 -07001660}
1661
1662/*
Randy Dunlapd0d856e2006-10-11 01:21:07 -07001663 * ext4_ext_next_leaf_block:
Lukas Czernerf17722f2011-06-06 00:05:17 -04001664 * returns first allocated block from next leaf or EXT_MAX_BLOCKS
Alex Tomasa86c6182006-10-11 01:21:03 -07001665 */
Robin Dong57187892011-07-23 21:49:07 -04001666static ext4_lblk_t ext4_ext_next_leaf_block(struct ext4_ext_path *path)
Alex Tomasa86c6182006-10-11 01:21:03 -07001667{
1668 int depth;
1669
1670 BUG_ON(path == NULL);
1671 depth = path->p_depth;
1672
1673 /* zero-tree has no leaf blocks at all */
1674 if (depth == 0)
Lukas Czernerf17722f2011-06-06 00:05:17 -04001675 return EXT_MAX_BLOCKS;
Alex Tomasa86c6182006-10-11 01:21:03 -07001676
1677 /* go to index block */
1678 depth--;
1679
1680 while (depth >= 0) {
1681 if (path[depth].p_idx !=
1682 EXT_LAST_INDEX(path[depth].p_hdr))
Aneesh Kumar K.V725d26d2008-01-28 23:58:27 -05001683 return (ext4_lblk_t)
1684 le32_to_cpu(path[depth].p_idx[1].ei_block);
Alex Tomasa86c6182006-10-11 01:21:03 -07001685 depth--;
1686 }
1687
Lukas Czernerf17722f2011-06-06 00:05:17 -04001688 return EXT_MAX_BLOCKS;
Alex Tomasa86c6182006-10-11 01:21:03 -07001689}
1690
1691/*
Randy Dunlapd0d856e2006-10-11 01:21:07 -07001692 * ext4_ext_correct_indexes:
1693 * if leaf gets modified and modified extent is first in the leaf,
1694 * then we have to correct all indexes above.
Alex Tomasa86c6182006-10-11 01:21:03 -07001695 * TODO: do we need to correct tree in all cases?
1696 */
Aneesh Kumar K.V1d03ec92008-01-28 23:58:27 -05001697static int ext4_ext_correct_indexes(handle_t *handle, struct inode *inode,
Alex Tomasa86c6182006-10-11 01:21:03 -07001698 struct ext4_ext_path *path)
1699{
1700 struct ext4_extent_header *eh;
1701 int depth = ext_depth(inode);
1702 struct ext4_extent *ex;
1703 __le32 border;
1704 int k, err = 0;
1705
1706 eh = path[depth].p_hdr;
1707 ex = path[depth].p_ext;
Frank Mayhar273df552010-03-02 11:46:09 -05001708
1709 if (unlikely(ex == NULL || eh == NULL)) {
1710 EXT4_ERROR_INODE(inode,
1711 "ex %p == NULL or eh %p == NULL", ex, eh);
Darrick J. Wong6a797d22015-10-17 16:16:04 -04001712 return -EFSCORRUPTED;
Frank Mayhar273df552010-03-02 11:46:09 -05001713 }
Alex Tomasa86c6182006-10-11 01:21:03 -07001714
1715 if (depth == 0) {
1716 /* there is no tree at all */
1717 return 0;
1718 }
1719
1720 if (ex != EXT_FIRST_EXTENT(eh)) {
1721 /* we correct tree if first leaf got modified only */
1722 return 0;
1723 }
1724
1725 /*
Randy Dunlapd0d856e2006-10-11 01:21:07 -07001726 * TODO: we need correction if border is smaller than current one
Alex Tomasa86c6182006-10-11 01:21:03 -07001727 */
1728 k = depth - 1;
1729 border = path[depth].p_ext->ee_block;
Avantika Mathur7e028972006-12-06 20:41:33 -08001730 err = ext4_ext_get_access(handle, inode, path + k);
1731 if (err)
Alex Tomasa86c6182006-10-11 01:21:03 -07001732 return err;
1733 path[k].p_idx->ei_block = border;
Avantika Mathur7e028972006-12-06 20:41:33 -08001734 err = ext4_ext_dirty(handle, inode, path + k);
1735 if (err)
Alex Tomasa86c6182006-10-11 01:21:03 -07001736 return err;
1737
1738 while (k--) {
1739 /* change all left-side indexes */
1740 if (path[k+1].p_idx != EXT_FIRST_INDEX(path[k+1].p_hdr))
1741 break;
Avantika Mathur7e028972006-12-06 20:41:33 -08001742 err = ext4_ext_get_access(handle, inode, path + k);
1743 if (err)
Alex Tomasa86c6182006-10-11 01:21:03 -07001744 break;
1745 path[k].p_idx->ei_block = border;
Avantika Mathur7e028972006-12-06 20:41:33 -08001746 err = ext4_ext_dirty(handle, inode, path + k);
1747 if (err)
Alex Tomasa86c6182006-10-11 01:21:03 -07001748 break;
1749 }
1750
1751 return err;
1752}
1753
Eric Biggers43f81672019-12-31 12:04:40 -06001754static int ext4_can_extents_be_merged(struct inode *inode,
1755 struct ext4_extent *ex1,
1756 struct ext4_extent *ex2)
Alex Tomasa86c6182006-10-11 01:21:03 -07001757{
Eric Sandeenda0169b2013-11-04 09:58:26 -05001758 unsigned short ext1_ee_len, ext2_ee_len;
Amit Aroraa2df2a62007-07-17 21:42:41 -04001759
Lukas Czerner556615d2014-04-20 23:45:47 -04001760 if (ext4_ext_is_unwritten(ex1) != ext4_ext_is_unwritten(ex2))
Amit Aroraa2df2a62007-07-17 21:42:41 -04001761 return 0;
1762
1763 ext1_ee_len = ext4_ext_get_actual_len(ex1);
1764 ext2_ee_len = ext4_ext_get_actual_len(ex2);
1765
1766 if (le32_to_cpu(ex1->ee_block) + ext1_ee_len !=
Andrew Morton63f57932006-10-11 01:21:24 -07001767 le32_to_cpu(ex2->ee_block))
Alex Tomasa86c6182006-10-11 01:21:03 -07001768 return 0;
1769
Eric Sandeenda0169b2013-11-04 09:58:26 -05001770 if (ext1_ee_len + ext2_ee_len > EXT_INIT_MAX_LEN)
Suparna Bhattacharya471d4012006-10-11 01:21:06 -07001771 return 0;
Matthew Bobrowski378f32b2019-11-05 23:02:39 +11001772
Lukas Czerner556615d2014-04-20 23:45:47 -04001773 if (ext4_ext_is_unwritten(ex1) &&
Matthew Bobrowski378f32b2019-11-05 23:02:39 +11001774 ext1_ee_len + ext2_ee_len > EXT_UNWRITTEN_MAX_LEN)
Darrick J. Wonga9b82412014-02-20 21:17:35 -05001775 return 0;
Robert P. J. Daybbf2f9f2007-02-17 19:20:16 +01001776#ifdef AGGRESSIVE_TEST
Aneesh Kumar K.Vb939e372008-01-28 23:58:27 -05001777 if (ext1_ee_len >= 4)
Alex Tomasa86c6182006-10-11 01:21:03 -07001778 return 0;
1779#endif
1780
Theodore Ts'obf89d162010-10-27 21:30:14 -04001781 if (ext4_ext_pblock(ex1) + ext1_ee_len == ext4_ext_pblock(ex2))
Alex Tomasa86c6182006-10-11 01:21:03 -07001782 return 1;
1783 return 0;
1784}
1785
1786/*
Amit Arora56055d32007-07-17 21:42:38 -04001787 * This function tries to merge the "ex" extent to the next extent in the tree.
1788 * It always tries to merge towards right. If you want to merge towards
1789 * left, pass "ex - 1" as argument instead of "ex".
1790 * Returns 0 if the extents (ex and ex+1) were _not_ merged and returns
1791 * 1 if they got merged.
1792 */
Yongqiang Yang197217a2011-05-03 11:45:29 -04001793static int ext4_ext_try_to_merge_right(struct inode *inode,
Theodore Ts'o1f109d52010-10-27 21:30:14 -04001794 struct ext4_ext_path *path,
1795 struct ext4_extent *ex)
Amit Arora56055d32007-07-17 21:42:38 -04001796{
1797 struct ext4_extent_header *eh;
1798 unsigned int depth, len;
Lukas Czerner556615d2014-04-20 23:45:47 -04001799 int merge_done = 0, unwritten;
Amit Arora56055d32007-07-17 21:42:38 -04001800
1801 depth = ext_depth(inode);
1802 BUG_ON(path[depth].p_hdr == NULL);
1803 eh = path[depth].p_hdr;
1804
1805 while (ex < EXT_LAST_EXTENT(eh)) {
1806 if (!ext4_can_extents_be_merged(inode, ex, ex + 1))
1807 break;
1808 /* merge with next extent! */
Lukas Czerner556615d2014-04-20 23:45:47 -04001809 unwritten = ext4_ext_is_unwritten(ex);
Amit Arora56055d32007-07-17 21:42:38 -04001810 ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex)
1811 + ext4_ext_get_actual_len(ex + 1));
Lukas Czerner556615d2014-04-20 23:45:47 -04001812 if (unwritten)
1813 ext4_ext_mark_unwritten(ex);
Amit Arora56055d32007-07-17 21:42:38 -04001814
1815 if (ex + 1 < EXT_LAST_EXTENT(eh)) {
1816 len = (EXT_LAST_EXTENT(eh) - ex - 1)
1817 * sizeof(struct ext4_extent);
1818 memmove(ex + 1, ex + 2, len);
1819 }
Marcin Slusarze8546d02008-04-17 10:38:59 -04001820 le16_add_cpu(&eh->eh_entries, -1);
Amit Arora56055d32007-07-17 21:42:38 -04001821 merge_done = 1;
1822 WARN_ON(eh->eh_entries == 0);
1823 if (!eh->eh_entries)
Theodore Ts'o24676da2010-05-16 21:00:00 -04001824 EXT4_ERROR_INODE(inode, "eh->eh_entries = 0!");
Amit Arora56055d32007-07-17 21:42:38 -04001825 }
1826
1827 return merge_done;
1828}
1829
1830/*
Theodore Ts'oecb94f52012-08-17 09:44:17 -04001831 * This function does a very simple check to see if we can collapse
1832 * an extent tree with a single extent tree leaf block into the inode.
1833 */
1834static void ext4_ext_try_to_merge_up(handle_t *handle,
1835 struct inode *inode,
1836 struct ext4_ext_path *path)
1837{
1838 size_t s;
1839 unsigned max_root = ext4_ext_space_root(inode, 0);
1840 ext4_fsblk_t blk;
1841
1842 if ((path[0].p_depth != 1) ||
1843 (le16_to_cpu(path[0].p_hdr->eh_entries) != 1) ||
1844 (le16_to_cpu(path[1].p_hdr->eh_entries) > max_root))
1845 return;
1846
1847 /*
1848 * We need to modify the block allocation bitmap and the block
1849 * group descriptor to release the extent tree block. If we
1850 * can't get the journal credits, give up.
1851 */
Jan Kara83448bd2019-11-05 17:44:29 +01001852 if (ext4_journal_extend(handle, 2,
1853 ext4_free_metadata_revoke_credits(inode->i_sb, 1)))
Theodore Ts'oecb94f52012-08-17 09:44:17 -04001854 return;
1855
1856 /*
1857 * Copy the extent data up to the inode
1858 */
1859 blk = ext4_idx_pblock(path[0].p_idx);
1860 s = le16_to_cpu(path[1].p_hdr->eh_entries) *
1861 sizeof(struct ext4_extent_idx);
1862 s += sizeof(struct ext4_extent_header);
1863
Theodore Ts'o10809df82014-09-01 14:40:09 -04001864 path[1].p_maxdepth = path[0].p_maxdepth;
Theodore Ts'oecb94f52012-08-17 09:44:17 -04001865 memcpy(path[0].p_hdr, path[1].p_hdr, s);
1866 path[0].p_depth = 0;
1867 path[0].p_ext = EXT_FIRST_EXTENT(path[0].p_hdr) +
1868 (path[1].p_ext - EXT_FIRST_EXTENT(path[1].p_hdr));
1869 path[0].p_hdr->eh_max = cpu_to_le16(max_root);
1870
1871 brelse(path[1].p_bh);
1872 ext4_free_blocks(handle, inode, NULL, blk, 1,
Theodore Ts'o71d4f7d2014-07-15 06:02:38 -04001873 EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
Theodore Ts'oecb94f52012-08-17 09:44:17 -04001874}
1875
1876/*
Eric Biggersadde81c2019-12-31 12:04:41 -06001877 * This function tries to merge the @ex extent to neighbours in the tree, then
1878 * tries to collapse the extent tree into the inode.
Yongqiang Yang197217a2011-05-03 11:45:29 -04001879 */
Theodore Ts'oecb94f52012-08-17 09:44:17 -04001880static void ext4_ext_try_to_merge(handle_t *handle,
1881 struct inode *inode,
Yongqiang Yang197217a2011-05-03 11:45:29 -04001882 struct ext4_ext_path *path,
Eric Biggersadde81c2019-12-31 12:04:41 -06001883 struct ext4_extent *ex)
1884{
Yongqiang Yang197217a2011-05-03 11:45:29 -04001885 struct ext4_extent_header *eh;
1886 unsigned int depth;
1887 int merge_done = 0;
Yongqiang Yang197217a2011-05-03 11:45:29 -04001888
1889 depth = ext_depth(inode);
1890 BUG_ON(path[depth].p_hdr == NULL);
1891 eh = path[depth].p_hdr;
1892
1893 if (ex > EXT_FIRST_EXTENT(eh))
1894 merge_done = ext4_ext_try_to_merge_right(inode, path, ex - 1);
1895
1896 if (!merge_done)
Theodore Ts'oecb94f52012-08-17 09:44:17 -04001897 (void) ext4_ext_try_to_merge_right(inode, path, ex);
Yongqiang Yang197217a2011-05-03 11:45:29 -04001898
Theodore Ts'oecb94f52012-08-17 09:44:17 -04001899 ext4_ext_try_to_merge_up(handle, inode, path);
Yongqiang Yang197217a2011-05-03 11:45:29 -04001900}
1901
1902/*
Amit Arora25d14f92007-05-24 13:04:13 -04001903 * check if a portion of the "newext" extent overlaps with an
1904 * existing extent.
1905 *
1906 * If there is an overlap discovered, it updates the length of the newext
1907 * such that there will be no overlap, and then returns 1.
1908 * If there is no overlap found, it returns 0.
1909 */
Theodore Ts'o4d33b1e2011-09-09 18:52:51 -04001910static unsigned int ext4_ext_check_overlap(struct ext4_sb_info *sbi,
1911 struct inode *inode,
Theodore Ts'o1f109d52010-10-27 21:30:14 -04001912 struct ext4_extent *newext,
1913 struct ext4_ext_path *path)
Amit Arora25d14f92007-05-24 13:04:13 -04001914{
Aneesh Kumar K.V725d26d2008-01-28 23:58:27 -05001915 ext4_lblk_t b1, b2;
Amit Arora25d14f92007-05-24 13:04:13 -04001916 unsigned int depth, len1;
1917 unsigned int ret = 0;
1918
1919 b1 = le32_to_cpu(newext->ee_block);
Amit Aroraa2df2a62007-07-17 21:42:41 -04001920 len1 = ext4_ext_get_actual_len(newext);
Amit Arora25d14f92007-05-24 13:04:13 -04001921 depth = ext_depth(inode);
1922 if (!path[depth].p_ext)
1923 goto out;
Theodore Ts'of5a44db2013-12-20 09:29:35 -05001924 b2 = EXT4_LBLK_CMASK(sbi, le32_to_cpu(path[depth].p_ext->ee_block));
Amit Arora25d14f92007-05-24 13:04:13 -04001925
1926 /*
1927 * get the next allocated block if the extent in the path
Theodore Ts'o2b2d6d02008-07-26 16:15:44 -04001928 * is before the requested block(s)
Amit Arora25d14f92007-05-24 13:04:13 -04001929 */
1930 if (b2 < b1) {
1931 b2 = ext4_ext_next_allocated_block(path);
Lukas Czernerf17722f2011-06-06 00:05:17 -04001932 if (b2 == EXT_MAX_BLOCKS)
Amit Arora25d14f92007-05-24 13:04:13 -04001933 goto out;
Theodore Ts'of5a44db2013-12-20 09:29:35 -05001934 b2 = EXT4_LBLK_CMASK(sbi, b2);
Amit Arora25d14f92007-05-24 13:04:13 -04001935 }
1936
Aneesh Kumar K.V725d26d2008-01-28 23:58:27 -05001937 /* check for wrap through zero on extent logical start block*/
Amit Arora25d14f92007-05-24 13:04:13 -04001938 if (b1 + len1 < b1) {
Lukas Czernerf17722f2011-06-06 00:05:17 -04001939 len1 = EXT_MAX_BLOCKS - b1;
Amit Arora25d14f92007-05-24 13:04:13 -04001940 newext->ee_len = cpu_to_le16(len1);
1941 ret = 1;
1942 }
1943
1944 /* check for overlap */
1945 if (b1 + len1 > b2) {
1946 newext->ee_len = cpu_to_le16(b2 - b1);
1947 ret = 1;
1948 }
1949out:
1950 return ret;
1951}
1952
1953/*
Randy Dunlapd0d856e2006-10-11 01:21:07 -07001954 * ext4_ext_insert_extent:
Keyur Patele4d7f2d2020-06-10 23:19:46 -04001955 * tries to merge requested extent into the existing extent or
Randy Dunlapd0d856e2006-10-11 01:21:07 -07001956 * inserts requested extent as new one into the tree,
1957 * creating new leaf in the no-space case.
Alex Tomasa86c6182006-10-11 01:21:03 -07001958 */
1959int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
Theodore Ts'odfe50802014-09-01 14:37:09 -04001960 struct ext4_ext_path **ppath,
Theodore Ts'o107a7bd2013-08-16 21:23:41 -04001961 struct ext4_extent *newext, int gb_flags)
Alex Tomasa86c6182006-10-11 01:21:03 -07001962{
Theodore Ts'odfe50802014-09-01 14:37:09 -04001963 struct ext4_ext_path *path = *ppath;
Theodore Ts'oaf5bc922008-09-08 22:25:24 -04001964 struct ext4_extent_header *eh;
Alex Tomasa86c6182006-10-11 01:21:03 -07001965 struct ext4_extent *ex, *fex;
1966 struct ext4_extent *nearex; /* nearest extent */
1967 struct ext4_ext_path *npath = NULL;
Aneesh Kumar K.V725d26d2008-01-28 23:58:27 -05001968 int depth, len, err;
1969 ext4_lblk_t next;
Lukas Czerner556615d2014-04-20 23:45:47 -04001970 int mb_flags = 0, unwritten;
Alex Tomasa86c6182006-10-11 01:21:03 -07001971
Theodore Ts'oe3cf5d52014-09-04 18:07:25 -04001972 if (gb_flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
1973 mb_flags |= EXT4_MB_DELALLOC_RESERVED;
Frank Mayhar273df552010-03-02 11:46:09 -05001974 if (unlikely(ext4_ext_get_actual_len(newext) == 0)) {
1975 EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0");
Darrick J. Wong6a797d22015-10-17 16:16:04 -04001976 return -EFSCORRUPTED;
Frank Mayhar273df552010-03-02 11:46:09 -05001977 }
Alex Tomasa86c6182006-10-11 01:21:03 -07001978 depth = ext_depth(inode);
1979 ex = path[depth].p_ext;
Lukas Czernerbe8981b2013-04-03 23:33:28 -04001980 eh = path[depth].p_hdr;
Frank Mayhar273df552010-03-02 11:46:09 -05001981 if (unlikely(path[depth].p_hdr == NULL)) {
1982 EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth);
Darrick J. Wong6a797d22015-10-17 16:16:04 -04001983 return -EFSCORRUPTED;
Frank Mayhar273df552010-03-02 11:46:09 -05001984 }
Alex Tomasa86c6182006-10-11 01:21:03 -07001985
1986 /* try to insert block into found extent and return */
Theodore Ts'o107a7bd2013-08-16 21:23:41 -04001987 if (ex && !(gb_flags & EXT4_GET_BLOCKS_PRE_IO)) {
Amit Aroraa2df2a62007-07-17 21:42:41 -04001988
1989 /*
Lukas Czernerbe8981b2013-04-03 23:33:28 -04001990 * Try to see whether we should rather test the extent on
1991 * right from ex, or from the left of ex. This is because
Theodore Ts'oed8a1a72014-09-01 14:43:09 -04001992 * ext4_find_extent() can return either extent on the
Lukas Czernerbe8981b2013-04-03 23:33:28 -04001993 * left, or on the right from the searched position. This
1994 * will make merging more effective.
Amit Aroraa2df2a62007-07-17 21:42:41 -04001995 */
Lukas Czernerbe8981b2013-04-03 23:33:28 -04001996 if (ex < EXT_LAST_EXTENT(eh) &&
1997 (le32_to_cpu(ex->ee_block) +
1998 ext4_ext_get_actual_len(ex) <
1999 le32_to_cpu(newext->ee_block))) {
2000 ex += 1;
2001 goto prepend;
2002 } else if ((ex > EXT_FIRST_EXTENT(eh)) &&
2003 (le32_to_cpu(newext->ee_block) +
2004 ext4_ext_get_actual_len(newext) <
2005 le32_to_cpu(ex->ee_block)))
2006 ex -= 1;
2007
2008 /* Try to append newex to the ex */
2009 if (ext4_can_extents_be_merged(inode, ex, newext)) {
Ritesh Harjani70aa1552020-05-10 11:54:55 +05302010 ext_debug(inode, "append [%d]%d block to %u:[%d]%d"
Lukas Czernerbe8981b2013-04-03 23:33:28 -04002011 "(from %llu)\n",
Lukas Czerner556615d2014-04-20 23:45:47 -04002012 ext4_ext_is_unwritten(newext),
Lukas Czernerbe8981b2013-04-03 23:33:28 -04002013 ext4_ext_get_actual_len(newext),
2014 le32_to_cpu(ex->ee_block),
Lukas Czerner556615d2014-04-20 23:45:47 -04002015 ext4_ext_is_unwritten(ex),
Lukas Czernerbe8981b2013-04-03 23:33:28 -04002016 ext4_ext_get_actual_len(ex),
2017 ext4_ext_pblock(ex));
2018 err = ext4_ext_get_access(handle, inode,
2019 path + depth);
2020 if (err)
2021 return err;
Lukas Czerner556615d2014-04-20 23:45:47 -04002022 unwritten = ext4_ext_is_unwritten(ex);
Lukas Czernerbe8981b2013-04-03 23:33:28 -04002023 ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex)
Amit Aroraa2df2a62007-07-17 21:42:41 -04002024 + ext4_ext_get_actual_len(newext));
Lukas Czerner556615d2014-04-20 23:45:47 -04002025 if (unwritten)
2026 ext4_ext_mark_unwritten(ex);
Lukas Czernerbe8981b2013-04-03 23:33:28 -04002027 nearex = ex;
2028 goto merge;
2029 }
2030
2031prepend:
2032 /* Try to prepend newex to the ex */
2033 if (ext4_can_extents_be_merged(inode, newext, ex)) {
Ritesh Harjani70aa1552020-05-10 11:54:55 +05302034 ext_debug(inode, "prepend %u[%d]%d block to %u:[%d]%d"
Lukas Czernerbe8981b2013-04-03 23:33:28 -04002035 "(from %llu)\n",
2036 le32_to_cpu(newext->ee_block),
Lukas Czerner556615d2014-04-20 23:45:47 -04002037 ext4_ext_is_unwritten(newext),
Lukas Czernerbe8981b2013-04-03 23:33:28 -04002038 ext4_ext_get_actual_len(newext),
2039 le32_to_cpu(ex->ee_block),
Lukas Czerner556615d2014-04-20 23:45:47 -04002040 ext4_ext_is_unwritten(ex),
Lukas Czernerbe8981b2013-04-03 23:33:28 -04002041 ext4_ext_get_actual_len(ex),
2042 ext4_ext_pblock(ex));
2043 err = ext4_ext_get_access(handle, inode,
2044 path + depth);
2045 if (err)
2046 return err;
2047
Lukas Czerner556615d2014-04-20 23:45:47 -04002048 unwritten = ext4_ext_is_unwritten(ex);
Lukas Czernerbe8981b2013-04-03 23:33:28 -04002049 ex->ee_block = newext->ee_block;
2050 ext4_ext_store_pblock(ex, ext4_ext_pblock(newext));
2051 ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex)
2052 + ext4_ext_get_actual_len(newext));
Lukas Czerner556615d2014-04-20 23:45:47 -04002053 if (unwritten)
2054 ext4_ext_mark_unwritten(ex);
Lukas Czernerbe8981b2013-04-03 23:33:28 -04002055 nearex = ex;
2056 goto merge;
2057 }
Alex Tomasa86c6182006-10-11 01:21:03 -07002058 }
2059
Alex Tomasa86c6182006-10-11 01:21:03 -07002060 depth = ext_depth(inode);
2061 eh = path[depth].p_hdr;
2062 if (le16_to_cpu(eh->eh_entries) < le16_to_cpu(eh->eh_max))
2063 goto has_space;
2064
2065 /* probably next leaf has space for us? */
2066 fex = EXT_LAST_EXTENT(eh);
Robin Dong598dbdf2011-07-11 18:24:01 -04002067 next = EXT_MAX_BLOCKS;
2068 if (le32_to_cpu(newext->ee_block) > le32_to_cpu(fex->ee_block))
Robin Dong57187892011-07-23 21:49:07 -04002069 next = ext4_ext_next_leaf_block(path);
Robin Dong598dbdf2011-07-11 18:24:01 -04002070 if (next != EXT_MAX_BLOCKS) {
Ritesh Harjani70aa1552020-05-10 11:54:55 +05302071 ext_debug(inode, "next leaf block - %u\n", next);
Alex Tomasa86c6182006-10-11 01:21:03 -07002072 BUG_ON(npath != NULL);
Theodore Ts'o73c384c02020-05-07 10:50:28 -07002073 npath = ext4_find_extent(inode, next, NULL, gb_flags);
Alex Tomasa86c6182006-10-11 01:21:03 -07002074 if (IS_ERR(npath))
2075 return PTR_ERR(npath);
2076 BUG_ON(npath->p_depth != path->p_depth);
2077 eh = npath[depth].p_hdr;
2078 if (le16_to_cpu(eh->eh_entries) < le16_to_cpu(eh->eh_max)) {
Ritesh Harjani70aa1552020-05-10 11:54:55 +05302079 ext_debug(inode, "next leaf isn't full(%d)\n",
Alex Tomasa86c6182006-10-11 01:21:03 -07002080 le16_to_cpu(eh->eh_entries));
2081 path = npath;
Robin Dongffb505f2011-07-11 11:43:59 -04002082 goto has_space;
Alex Tomasa86c6182006-10-11 01:21:03 -07002083 }
Ritesh Harjani70aa1552020-05-10 11:54:55 +05302084 ext_debug(inode, "next leaf has no free space(%d,%d)\n",
Alex Tomasa86c6182006-10-11 01:21:03 -07002085 le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max));
2086 }
2087
2088 /*
Randy Dunlapd0d856e2006-10-11 01:21:07 -07002089 * There is no free space in the found leaf.
2090 * We're gonna add a new leaf in the tree.
Alex Tomasa86c6182006-10-11 01:21:03 -07002091 */
Theodore Ts'o107a7bd2013-08-16 21:23:41 -04002092 if (gb_flags & EXT4_GET_BLOCKS_METADATA_NOFAIL)
Theodore Ts'oe3cf5d52014-09-04 18:07:25 -04002093 mb_flags |= EXT4_MB_USE_RESERVED;
Theodore Ts'o107a7bd2013-08-16 21:23:41 -04002094 err = ext4_ext_create_new_leaf(handle, inode, mb_flags, gb_flags,
Theodore Ts'odfe50802014-09-01 14:37:09 -04002095 ppath, newext);
Alex Tomasa86c6182006-10-11 01:21:03 -07002096 if (err)
2097 goto cleanup;
2098 depth = ext_depth(inode);
2099 eh = path[depth].p_hdr;
2100
2101has_space:
2102 nearex = path[depth].p_ext;
2103
Avantika Mathur7e028972006-12-06 20:41:33 -08002104 err = ext4_ext_get_access(handle, inode, path + depth);
2105 if (err)
Alex Tomasa86c6182006-10-11 01:21:03 -07002106 goto cleanup;
2107
2108 if (!nearex) {
2109 /* there is no extent in this leaf, create first one */
Ritesh Harjani70aa1552020-05-10 11:54:55 +05302110 ext_debug(inode, "first extent in the leaf: %u:%llu:[%d]%d\n",
Dave Kleikamp8c55e202007-05-24 13:04:54 -04002111 le32_to_cpu(newext->ee_block),
Theodore Ts'obf89d162010-10-27 21:30:14 -04002112 ext4_ext_pblock(newext),
Lukas Czerner556615d2014-04-20 23:45:47 -04002113 ext4_ext_is_unwritten(newext),
Amit Aroraa2df2a62007-07-17 21:42:41 -04002114 ext4_ext_get_actual_len(newext));
Eric Gouriou80e675f2011-10-27 11:52:18 -04002115 nearex = EXT_FIRST_EXTENT(eh);
2116 } else {
2117 if (le32_to_cpu(newext->ee_block)
Dave Kleikamp8c55e202007-05-24 13:04:54 -04002118 > le32_to_cpu(nearex->ee_block)) {
Eric Gouriou80e675f2011-10-27 11:52:18 -04002119 /* Insert after */
Ritesh Harjani70aa1552020-05-10 11:54:55 +05302120 ext_debug(inode, "insert %u:%llu:[%d]%d before: "
Yongqiang Yang32de6752011-11-01 18:56:41 -04002121 "nearest %p\n",
Dave Kleikamp8c55e202007-05-24 13:04:54 -04002122 le32_to_cpu(newext->ee_block),
Theodore Ts'obf89d162010-10-27 21:30:14 -04002123 ext4_ext_pblock(newext),
Lukas Czerner556615d2014-04-20 23:45:47 -04002124 ext4_ext_is_unwritten(newext),
Amit Aroraa2df2a62007-07-17 21:42:41 -04002125 ext4_ext_get_actual_len(newext),
Eric Gouriou80e675f2011-10-27 11:52:18 -04002126 nearex);
2127 nearex++;
2128 } else {
2129 /* Insert before */
2130 BUG_ON(newext->ee_block == nearex->ee_block);
Ritesh Harjani70aa1552020-05-10 11:54:55 +05302131 ext_debug(inode, "insert %u:%llu:[%d]%d after: "
Yongqiang Yang32de6752011-11-01 18:56:41 -04002132 "nearest %p\n",
Eric Gouriou80e675f2011-10-27 11:52:18 -04002133 le32_to_cpu(newext->ee_block),
2134 ext4_ext_pblock(newext),
Lukas Czerner556615d2014-04-20 23:45:47 -04002135 ext4_ext_is_unwritten(newext),
Eric Gouriou80e675f2011-10-27 11:52:18 -04002136 ext4_ext_get_actual_len(newext),
2137 nearex);
Alex Tomasa86c6182006-10-11 01:21:03 -07002138 }
Eric Gouriou80e675f2011-10-27 11:52:18 -04002139 len = EXT_LAST_EXTENT(eh) - nearex + 1;
2140 if (len > 0) {
Ritesh Harjani70aa1552020-05-10 11:54:55 +05302141 ext_debug(inode, "insert %u:%llu:[%d]%d: "
Eric Gouriou80e675f2011-10-27 11:52:18 -04002142 "move %d extents from 0x%p to 0x%p\n",
2143 le32_to_cpu(newext->ee_block),
2144 ext4_ext_pblock(newext),
Lukas Czerner556615d2014-04-20 23:45:47 -04002145 ext4_ext_is_unwritten(newext),
Eric Gouriou80e675f2011-10-27 11:52:18 -04002146 ext4_ext_get_actual_len(newext),
2147 len, nearex, nearex + 1);
2148 memmove(nearex + 1, nearex,
2149 len * sizeof(struct ext4_extent));
2150 }
Alex Tomasa86c6182006-10-11 01:21:03 -07002151 }
2152
Marcin Slusarze8546d02008-04-17 10:38:59 -04002153 le16_add_cpu(&eh->eh_entries, 1);
Eric Gouriou80e675f2011-10-27 11:52:18 -04002154 path[depth].p_ext = nearex;
Alex Tomasa86c6182006-10-11 01:21:03 -07002155 nearex->ee_block = newext->ee_block;
Theodore Ts'obf89d162010-10-27 21:30:14 -04002156 ext4_ext_store_pblock(nearex, ext4_ext_pblock(newext));
Alex Tomasa86c6182006-10-11 01:21:03 -07002157 nearex->ee_len = newext->ee_len;
Alex Tomasa86c6182006-10-11 01:21:03 -07002158
2159merge:
HaiboLiue7bcf822012-07-09 16:29:28 -04002160 /* try to merge extents */
Theodore Ts'o107a7bd2013-08-16 21:23:41 -04002161 if (!(gb_flags & EXT4_GET_BLOCKS_PRE_IO))
Theodore Ts'oecb94f52012-08-17 09:44:17 -04002162 ext4_ext_try_to_merge(handle, inode, path, nearex);
Alex Tomasa86c6182006-10-11 01:21:03 -07002163
Alex Tomasa86c6182006-10-11 01:21:03 -07002164
2165 /* time to correct all indexes above */
2166 err = ext4_ext_correct_indexes(handle, inode, path);
2167 if (err)
2168 goto cleanup;
2169
Theodore Ts'oecb94f52012-08-17 09:44:17 -04002170 err = ext4_ext_dirty(handle, inode, path + path->p_depth);
Alex Tomasa86c6182006-10-11 01:21:03 -07002171
2172cleanup:
Theodore Ts'ob7ea89a2014-09-01 14:39:09 -04002173 ext4_ext_drop_refs(npath);
2174 kfree(npath);
Alex Tomasa86c6182006-10-11 01:21:03 -07002175 return err;
2176}
2177
Theodore Ts'obb5835e2019-08-11 16:32:41 -04002178static int ext4_fill_es_cache_info(struct inode *inode,
2179 ext4_lblk_t block, ext4_lblk_t num,
2180 struct fiemap_extent_info *fieinfo)
2181{
2182 ext4_lblk_t next, end = block + num - 1;
2183 struct extent_status es;
2184 unsigned char blksize_bits = inode->i_sb->s_blocksize_bits;
2185 unsigned int flags;
2186 int err;
2187
2188 while (block <= end) {
2189 next = 0;
2190 flags = 0;
2191 if (!ext4_es_lookup_extent(inode, block, &next, &es))
2192 break;
2193 if (ext4_es_is_unwritten(&es))
2194 flags |= FIEMAP_EXTENT_UNWRITTEN;
2195 if (ext4_es_is_delayed(&es))
2196 flags |= (FIEMAP_EXTENT_DELALLOC |
2197 FIEMAP_EXTENT_UNKNOWN);
2198 if (ext4_es_is_hole(&es))
2199 flags |= EXT4_FIEMAP_EXTENT_HOLE;
2200 if (next == 0)
2201 flags |= FIEMAP_EXTENT_LAST;
2202 if (flags & (FIEMAP_EXTENT_DELALLOC|
2203 EXT4_FIEMAP_EXTENT_HOLE))
2204 es.es_pblk = 0;
2205 else
2206 es.es_pblk = ext4_es_pblock(&es);
2207 err = fiemap_fill_next_extent(fieinfo,
2208 (__u64)es.es_lblk << blksize_bits,
2209 (__u64)es.es_pblk << blksize_bits,
2210 (__u64)es.es_len << blksize_bits,
2211 flags);
2212 if (next == 0)
2213 break;
2214 block = next;
2215 if (err < 0)
2216 return err;
2217 if (err == 1)
2218 return 0;
2219 }
2220 return 0;
2221}
2222
2223
Alex Tomasa86c6182006-10-11 01:21:03 -07002224/*
Jan Kara140a5252016-03-09 22:46:57 -05002225 * ext4_ext_determine_hole - determine hole around given block
2226 * @inode: inode we lookup in
2227 * @path: path in extent tree to @lblk
2228 * @lblk: pointer to logical block around which we want to determine hole
2229 *
2230 * Determine hole length (and start if easily possible) around given logical
2231 * block. We don't try too hard to find the beginning of the hole but @path
2232 * actually points to extent before @lblk, we provide it.
2233 *
2234 * The function returns the length of a hole starting at @lblk. We update @lblk
2235 * to the beginning of the hole if we managed to find it.
2236 */
2237static ext4_lblk_t ext4_ext_determine_hole(struct inode *inode,
2238 struct ext4_ext_path *path,
2239 ext4_lblk_t *lblk)
2240{
2241 int depth = ext_depth(inode);
2242 struct ext4_extent *ex;
2243 ext4_lblk_t len;
2244
2245 ex = path[depth].p_ext;
2246 if (ex == NULL) {
2247 /* there is no extent yet, so gap is [0;-] */
2248 *lblk = 0;
2249 len = EXT_MAX_BLOCKS;
2250 } else if (*lblk < le32_to_cpu(ex->ee_block)) {
2251 len = le32_to_cpu(ex->ee_block) - *lblk;
2252 } else if (*lblk >= le32_to_cpu(ex->ee_block)
2253 + ext4_ext_get_actual_len(ex)) {
2254 ext4_lblk_t next;
2255
2256 *lblk = le32_to_cpu(ex->ee_block) + ext4_ext_get_actual_len(ex);
2257 next = ext4_ext_next_allocated_block(path);
2258 BUG_ON(next == *lblk);
2259 len = next - *lblk;
2260 } else {
2261 BUG();
2262 }
2263 return len;
2264}
2265
2266/*
Randy Dunlapd0d856e2006-10-11 01:21:07 -07002267 * ext4_ext_put_gap_in_cache:
2268 * calculate boundaries of the gap that the requested block fits into
Alex Tomasa86c6182006-10-11 01:21:03 -07002269 * and cache this gap
2270 */
Avantika Mathur09b88252006-12-06 20:41:36 -08002271static void
Jan Kara140a5252016-03-09 22:46:57 -05002272ext4_ext_put_gap_in_cache(struct inode *inode, ext4_lblk_t hole_start,
2273 ext4_lblk_t hole_len)
Alex Tomasa86c6182006-10-11 01:21:03 -07002274{
Zheng Liu2f8e0a72014-11-25 11:44:37 -05002275 struct extent_status es;
Alex Tomasa86c6182006-10-11 01:21:03 -07002276
Eric Whitneyad431022018-10-01 14:10:39 -04002277 ext4_es_find_extent_range(inode, &ext4_es_is_delayed, hole_start,
2278 hole_start + hole_len - 1, &es);
Zheng Liu2f8e0a72014-11-25 11:44:37 -05002279 if (es.es_len) {
2280 /* There's delayed extent containing lblock? */
Jan Kara140a5252016-03-09 22:46:57 -05002281 if (es.es_lblk <= hole_start)
Zheng Liu2f8e0a72014-11-25 11:44:37 -05002282 return;
Jan Kara140a5252016-03-09 22:46:57 -05002283 hole_len = min(es.es_lblk - hole_start, hole_len);
Zheng Liu2f8e0a72014-11-25 11:44:37 -05002284 }
Ritesh Harjani70aa1552020-05-10 11:54:55 +05302285 ext_debug(inode, " -> %u:%u\n", hole_start, hole_len);
Jan Kara140a5252016-03-09 22:46:57 -05002286 ext4_es_insert_extent(inode, hole_start, hole_len, ~0,
2287 EXTENT_STATUS_HOLE);
Alex Tomasa86c6182006-10-11 01:21:03 -07002288}
2289
2290/*
Randy Dunlapd0d856e2006-10-11 01:21:07 -07002291 * ext4_ext_rm_idx:
2292 * removes index from the index block.
Alex Tomasa86c6182006-10-11 01:21:03 -07002293 */
Aneesh Kumar K.V1d03ec92008-01-28 23:58:27 -05002294static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
Forrest Liuc36575e2012-12-17 09:55:39 -05002295 struct ext4_ext_path *path, int depth)
Alex Tomasa86c6182006-10-11 01:21:03 -07002296{
Alex Tomasa86c6182006-10-11 01:21:03 -07002297 int err;
Alex Tomasf65e6fb2006-10-11 01:21:05 -07002298 ext4_fsblk_t leaf;
Alex Tomasa86c6182006-10-11 01:21:03 -07002299
2300 /* free index block */
Forrest Liuc36575e2012-12-17 09:55:39 -05002301 depth--;
2302 path = path + depth;
Theodore Ts'obf89d162010-10-27 21:30:14 -04002303 leaf = ext4_idx_pblock(path->p_idx);
Frank Mayhar273df552010-03-02 11:46:09 -05002304 if (unlikely(path->p_hdr->eh_entries == 0)) {
2305 EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0");
Darrick J. Wong6a797d22015-10-17 16:16:04 -04002306 return -EFSCORRUPTED;
Frank Mayhar273df552010-03-02 11:46:09 -05002307 }
Avantika Mathur7e028972006-12-06 20:41:33 -08002308 err = ext4_ext_get_access(handle, inode, path);
2309 if (err)
Alex Tomasa86c6182006-10-11 01:21:03 -07002310 return err;
Robin Dong0e1147b2011-07-27 21:29:33 -04002311
2312 if (path->p_idx != EXT_LAST_INDEX(path->p_hdr)) {
2313 int len = EXT_LAST_INDEX(path->p_hdr) - path->p_idx;
2314 len *= sizeof(struct ext4_extent_idx);
2315 memmove(path->p_idx, path->p_idx + 1, len);
2316 }
2317
Marcin Slusarze8546d02008-04-17 10:38:59 -04002318 le16_add_cpu(&path->p_hdr->eh_entries, -1);
Avantika Mathur7e028972006-12-06 20:41:33 -08002319 err = ext4_ext_dirty(handle, inode, path);
2320 if (err)
Alex Tomasa86c6182006-10-11 01:21:03 -07002321 return err;
Ritesh Harjani70aa1552020-05-10 11:54:55 +05302322 ext_debug(inode, "index is empty, remove it, free block %llu\n", leaf);
Aditya Kalid8990242011-09-09 19:18:51 -04002323 trace_ext4_ext_rm_idx(inode, leaf);
2324
Peter Huewe7dc57612011-02-21 21:01:42 -05002325 ext4_free_blocks(handle, inode, NULL, leaf, 1,
Theodore Ts'oe6362602009-11-23 07:17:05 -05002326 EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
Forrest Liuc36575e2012-12-17 09:55:39 -05002327
2328 while (--depth >= 0) {
2329 if (path->p_idx != EXT_FIRST_INDEX(path->p_hdr))
2330 break;
2331 path--;
2332 err = ext4_ext_get_access(handle, inode, path);
2333 if (err)
2334 break;
2335 path->p_idx->ei_block = (path+1)->p_idx->ei_block;
2336 err = ext4_ext_dirty(handle, inode, path);
2337 if (err)
2338 break;
2339 }
Alex Tomasa86c6182006-10-11 01:21:03 -07002340 return err;
2341}
2342
2343/*
Mingming Caoee12b632008-08-19 22:16:05 -04002344 * ext4_ext_calc_credits_for_single_extent:
2345 * This routine returns max. credits that needed to insert an extent
2346 * to the extent tree.
2347 * When pass the actual path, the caller should calculate credits
2348 * under i_data_sem.
Alex Tomasa86c6182006-10-11 01:21:03 -07002349 */
Mingming Cao525f4ed2008-08-19 22:15:58 -04002350int ext4_ext_calc_credits_for_single_extent(struct inode *inode, int nrblocks,
Alex Tomasa86c6182006-10-11 01:21:03 -07002351 struct ext4_ext_path *path)
2352{
Alex Tomasa86c6182006-10-11 01:21:03 -07002353 if (path) {
Mingming Caoee12b632008-08-19 22:16:05 -04002354 int depth = ext_depth(inode);
Mingming Caof3bd1f32008-08-19 22:16:03 -04002355 int ret = 0;
Mingming Caoee12b632008-08-19 22:16:05 -04002356
Alex Tomasa86c6182006-10-11 01:21:03 -07002357 /* probably there is space in leaf? */
Alex Tomasa86c6182006-10-11 01:21:03 -07002358 if (le16_to_cpu(path[depth].p_hdr->eh_entries)
Mingming Caoee12b632008-08-19 22:16:05 -04002359 < le16_to_cpu(path[depth].p_hdr->eh_max)) {
2360
2361 /*
2362 * There are some space in the leaf tree, no
2363 * need to account for leaf block credit
2364 *
2365 * bitmaps and block group descriptor blocks
Tao Madf3ab172011-10-08 15:53:49 -04002366 * and other metadata blocks still need to be
Mingming Caoee12b632008-08-19 22:16:05 -04002367 * accounted.
2368 */
Mingming Cao525f4ed2008-08-19 22:15:58 -04002369 /* 1 bitmap, 1 block group descriptor */
Mingming Caoee12b632008-08-19 22:16:05 -04002370 ret = 2 + EXT4_META_TRANS_BLOCKS(inode->i_sb);
Aneesh Kumar K.V5887e982009-07-05 23:12:04 -04002371 return ret;
Mingming Caoee12b632008-08-19 22:16:05 -04002372 }
Alex Tomasa86c6182006-10-11 01:21:03 -07002373 }
2374
Mingming Cao525f4ed2008-08-19 22:15:58 -04002375 return ext4_chunk_trans_blocks(inode, nrblocks);
Mingming Caoee12b632008-08-19 22:16:05 -04002376}
Alex Tomasa86c6182006-10-11 01:21:03 -07002377
Mingming Caoee12b632008-08-19 22:16:05 -04002378/*
Jan Karafffb2732013-06-04 13:01:11 -04002379 * How many index/leaf blocks need to change/allocate to add @extents extents?
Mingming Caoee12b632008-08-19 22:16:05 -04002380 *
Jan Karafffb2732013-06-04 13:01:11 -04002381 * If we add a single extent, then in the worse case, each tree level
2382 * index/leaf need to be changed in case of the tree split.
Mingming Caoee12b632008-08-19 22:16:05 -04002383 *
Jan Karafffb2732013-06-04 13:01:11 -04002384 * If more extents are inserted, they could cause the whole tree split more
2385 * than once, but this is really rare.
Mingming Caoee12b632008-08-19 22:16:05 -04002386 */
Jan Karafffb2732013-06-04 13:01:11 -04002387int ext4_ext_index_trans_blocks(struct inode *inode, int extents)
Mingming Caoee12b632008-08-19 22:16:05 -04002388{
2389 int index;
Tao Maf19d5872012-12-10 14:05:51 -05002390 int depth;
2391
2392 /* If we are converting the inline data, only one is needed here. */
2393 if (ext4_has_inline_data(inode))
2394 return 1;
2395
2396 depth = ext_depth(inode);
Alex Tomasa86c6182006-10-11 01:21:03 -07002397
Jan Karafffb2732013-06-04 13:01:11 -04002398 if (extents <= 1)
Mingming Caoee12b632008-08-19 22:16:05 -04002399 index = depth * 2;
2400 else
2401 index = depth * 3;
Alex Tomasa86c6182006-10-11 01:21:03 -07002402
Mingming Caoee12b632008-08-19 22:16:05 -04002403 return index;
Alex Tomasa86c6182006-10-11 01:21:03 -07002404}
2405
Theodore Ts'o981250c2013-06-12 11:48:29 -04002406static inline int get_default_free_blocks_flags(struct inode *inode)
2407{
Tahsin Erdoganddfa17e2017-06-21 21:36:51 -04002408 if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode) ||
2409 ext4_test_inode_flag(inode, EXT4_INODE_EA_INODE))
Theodore Ts'o981250c2013-06-12 11:48:29 -04002410 return EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET;
2411 else if (ext4_should_journal_data(inode))
2412 return EXT4_FREE_BLOCKS_FORGET;
2413 return 0;
2414}
2415
Eric Whitney9fe67142018-10-01 14:25:08 -04002416/*
2417 * ext4_rereserve_cluster - increment the reserved cluster count when
2418 * freeing a cluster with a pending reservation
2419 *
2420 * @inode - file containing the cluster
2421 * @lblk - logical block in cluster to be reserved
2422 *
2423 * Increments the reserved cluster count and adjusts quota in a bigalloc
2424 * file system when freeing a partial cluster containing at least one
2425 * delayed and unwritten block. A partial cluster meeting that
2426 * requirement will have a pending reservation. If so, the
2427 * RERESERVE_CLUSTER flag is used when calling ext4_free_blocks() to
2428 * defer reserved and allocated space accounting to a subsequent call
2429 * to this function.
2430 */
2431static void ext4_rereserve_cluster(struct inode *inode, ext4_lblk_t lblk)
2432{
2433 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
2434 struct ext4_inode_info *ei = EXT4_I(inode);
2435
2436 dquot_reclaim_block(inode, EXT4_C2B(sbi, 1));
2437
2438 spin_lock(&ei->i_block_reservation_lock);
2439 ei->i_reserved_data_blocks++;
2440 percpu_counter_add(&sbi->s_dirtyclusters_counter, 1);
2441 spin_unlock(&ei->i_block_reservation_lock);
2442
2443 percpu_counter_add(&sbi->s_freeclusters_counter, 1);
2444 ext4_remove_pending(inode, lblk);
2445}
2446
Alex Tomasa86c6182006-10-11 01:21:03 -07002447static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
Theodore Ts'o0aa06002011-09-09 18:54:51 -04002448 struct ext4_extent *ex,
Eric Whitney9fe67142018-10-01 14:25:08 -04002449 struct partial_cluster *partial,
Theodore Ts'o0aa06002011-09-09 18:54:51 -04002450 ext4_lblk_t from, ext4_lblk_t to)
Alex Tomasa86c6182006-10-11 01:21:03 -07002451{
Theodore Ts'o0aa06002011-09-09 18:54:51 -04002452 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
Eric Whitney345ee942014-11-23 00:59:39 -05002453 unsigned short ee_len = ext4_ext_get_actual_len(ex);
Eric Whitney9fe67142018-10-01 14:25:08 -04002454 ext4_fsblk_t last_pblk, pblk;
2455 ext4_lblk_t num;
2456 int flags;
2457
2458 /* only extent tail removal is allowed */
2459 if (from < le32_to_cpu(ex->ee_block) ||
2460 to != le32_to_cpu(ex->ee_block) + ee_len - 1) {
2461 ext4_error(sbi->s_sb,
2462 "strange request: removal(2) %u-%u from %u:%u",
2463 from, to, le32_to_cpu(ex->ee_block), ee_len);
2464 return 0;
2465 }
2466
2467#ifdef EXTENTS_STATS
2468 spin_lock(&sbi->s_ext_stats_lock);
2469 sbi->s_ext_blocks += ee_len;
2470 sbi->s_ext_extents++;
2471 if (ee_len < sbi->s_ext_min)
2472 sbi->s_ext_min = ee_len;
2473 if (ee_len > sbi->s_ext_max)
2474 sbi->s_ext_max = ee_len;
2475 if (ext_depth(inode) > sbi->s_depth_max)
2476 sbi->s_depth_max = ext_depth(inode);
2477 spin_unlock(&sbi->s_ext_stats_lock);
2478#endif
2479
2480 trace_ext4_remove_blocks(inode, ex, from, to, partial);
2481
2482 /*
2483 * if we have a partial cluster, and it's different from the
2484 * cluster of the last block in the extent, we free it
2485 */
2486 last_pblk = ext4_ext_pblock(ex) + ee_len - 1;
2487
2488 if (partial->state != initial &&
2489 partial->pclu != EXT4_B2C(sbi, last_pblk)) {
2490 if (partial->state == tofree) {
2491 flags = get_default_free_blocks_flags(inode);
2492 if (ext4_is_pending(inode, partial->lblk))
2493 flags |= EXT4_FREE_BLOCKS_RERESERVE_CLUSTER;
2494 ext4_free_blocks(handle, inode, NULL,
2495 EXT4_C2B(sbi, partial->pclu),
2496 sbi->s_cluster_ratio, flags);
2497 if (flags & EXT4_FREE_BLOCKS_RERESERVE_CLUSTER)
2498 ext4_rereserve_cluster(inode, partial->lblk);
2499 }
2500 partial->state = initial;
2501 }
2502
2503 num = le32_to_cpu(ex->ee_block) + ee_len - from;
2504 pblk = ext4_ext_pblock(ex) + ee_len - num;
2505
2506 /*
2507 * We free the partial cluster at the end of the extent (if any),
2508 * unless the cluster is used by another extent (partial_cluster
2509 * state is nofree). If a partial cluster exists here, it must be
2510 * shared with the last block in the extent.
2511 */
2512 flags = get_default_free_blocks_flags(inode);
2513
2514 /* partial, left end cluster aligned, right end unaligned */
2515 if ((EXT4_LBLK_COFF(sbi, to) != sbi->s_cluster_ratio - 1) &&
2516 (EXT4_LBLK_CMASK(sbi, to) >= from) &&
2517 (partial->state != nofree)) {
2518 if (ext4_is_pending(inode, to))
2519 flags |= EXT4_FREE_BLOCKS_RERESERVE_CLUSTER;
2520 ext4_free_blocks(handle, inode, NULL,
2521 EXT4_PBLK_CMASK(sbi, last_pblk),
2522 sbi->s_cluster_ratio, flags);
2523 if (flags & EXT4_FREE_BLOCKS_RERESERVE_CLUSTER)
2524 ext4_rereserve_cluster(inode, to);
2525 partial->state = initial;
2526 flags = get_default_free_blocks_flags(inode);
2527 }
2528
2529 flags |= EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER;
Andrey Sidorov18888cf2012-09-19 14:14:53 -04002530
Theodore Ts'o0aa06002011-09-09 18:54:51 -04002531 /*
2532 * For bigalloc file systems, we never free a partial cluster
Eric Whitney9fe67142018-10-01 14:25:08 -04002533 * at the beginning of the extent. Instead, we check to see if we
Theodore Ts'o0aa06002011-09-09 18:54:51 -04002534 * need to free it on a subsequent call to ext4_remove_blocks,
Eric Whitney345ee942014-11-23 00:59:39 -05002535 * or at the end of ext4_ext_rm_leaf or ext4_ext_remove_space.
Theodore Ts'o0aa06002011-09-09 18:54:51 -04002536 */
2537 flags |= EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER;
Eric Whitney9fe67142018-10-01 14:25:08 -04002538 ext4_free_blocks(handle, inode, NULL, pblk, num, flags);
Theodore Ts'o0aa06002011-09-09 18:54:51 -04002539
Eric Whitney9fe67142018-10-01 14:25:08 -04002540 /* reset the partial cluster if we've freed past it */
2541 if (partial->state != initial && partial->pclu != EXT4_B2C(sbi, pblk))
2542 partial->state = initial;
2543
Theodore Ts'o0aa06002011-09-09 18:54:51 -04002544 /*
Eric Whitney9fe67142018-10-01 14:25:08 -04002545 * If we've freed the entire extent but the beginning is not left
2546 * cluster aligned and is not marked as ineligible for freeing we
2547 * record the partial cluster at the beginning of the extent. It
2548 * wasn't freed by the preceding ext4_free_blocks() call, and we
2549 * need to look farther to the left to determine if it's to be freed
2550 * (not shared with another extent). Else, reset the partial
2551 * cluster - we're either done freeing or the beginning of the
2552 * extent is left cluster aligned.
Theodore Ts'o0aa06002011-09-09 18:54:51 -04002553 */
Eric Whitney9fe67142018-10-01 14:25:08 -04002554 if (EXT4_LBLK_COFF(sbi, from) && num == ee_len) {
2555 if (partial->state == initial) {
2556 partial->pclu = EXT4_B2C(sbi, pblk);
2557 partial->lblk = from;
2558 partial->state = tofree;
Eric Whitney345ee942014-11-23 00:59:39 -05002559 }
Eric Whitney9fe67142018-10-01 14:25:08 -04002560 } else {
2561 partial->state = initial;
2562 }
2563
Alex Tomasa86c6182006-10-11 01:21:03 -07002564 return 0;
2565}
2566
Allison Hendersond583fb82011-05-25 07:41:43 -04002567/*
2568 * ext4_ext_rm_leaf() Removes the extents associated with the
Eric Whitney5bf43762014-11-23 00:58:11 -05002569 * blocks appearing between "start" and "end". Both "start"
2570 * and "end" must appear in the same extent or EIO is returned.
Allison Hendersond583fb82011-05-25 07:41:43 -04002571 *
2572 * @handle: The journal handle
2573 * @inode: The files inode
2574 * @path: The path to the leaf
Lukas Czernerd23142c2013-05-27 23:33:35 -04002575 * @partial_cluster: The cluster which we'll have to free if all extents
Eric Whitney5bf43762014-11-23 00:58:11 -05002576 * has been released from it. However, if this value is
2577 * negative, it's a cluster just to the right of the
2578 * punched region and it must not be freed.
Allison Hendersond583fb82011-05-25 07:41:43 -04002579 * @start: The first block to remove
2580 * @end: The last block to remove
2581 */
Alex Tomasa86c6182006-10-11 01:21:03 -07002582static int
2583ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
Lukas Czernerd23142c2013-05-27 23:33:35 -04002584 struct ext4_ext_path *path,
Eric Whitney9fe67142018-10-01 14:25:08 -04002585 struct partial_cluster *partial,
Theodore Ts'o0aa06002011-09-09 18:54:51 -04002586 ext4_lblk_t start, ext4_lblk_t end)
Alex Tomasa86c6182006-10-11 01:21:03 -07002587{
Theodore Ts'o0aa06002011-09-09 18:54:51 -04002588 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
Alex Tomasa86c6182006-10-11 01:21:03 -07002589 int err = 0, correct_index = 0;
Jan Kara83448bd2019-11-05 17:44:29 +01002590 int depth = ext_depth(inode), credits, revoke_credits;
Alex Tomasa86c6182006-10-11 01:21:03 -07002591 struct ext4_extent_header *eh;
Dmitry Monakhov750c9c42011-10-25 05:35:05 -04002592 ext4_lblk_t a, b;
Aneesh Kumar K.V725d26d2008-01-28 23:58:27 -05002593 unsigned num;
2594 ext4_lblk_t ex_ee_block;
Alex Tomasa86c6182006-10-11 01:21:03 -07002595 unsigned short ex_ee_len;
Lukas Czerner556615d2014-04-20 23:45:47 -04002596 unsigned unwritten = 0;
Alex Tomasa86c6182006-10-11 01:21:03 -07002597 struct ext4_extent *ex;
Lukas Czernerd23142c2013-05-27 23:33:35 -04002598 ext4_fsblk_t pblk;
Alex Tomasa86c6182006-10-11 01:21:03 -07002599
Alex Tomasc29c0ae2007-07-18 09:19:09 -04002600 /* the header must be checked already in ext4_ext_remove_space() */
Ritesh Harjani70aa1552020-05-10 11:54:55 +05302601 ext_debug(inode, "truncate since %u in leaf to %u\n", start, end);
Alex Tomasa86c6182006-10-11 01:21:03 -07002602 if (!path[depth].p_hdr)
2603 path[depth].p_hdr = ext_block_hdr(path[depth].p_bh);
2604 eh = path[depth].p_hdr;
Frank Mayhar273df552010-03-02 11:46:09 -05002605 if (unlikely(path[depth].p_hdr == NULL)) {
2606 EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth);
Darrick J. Wong6a797d22015-10-17 16:16:04 -04002607 return -EFSCORRUPTED;
Frank Mayhar273df552010-03-02 11:46:09 -05002608 }
Alex Tomasa86c6182006-10-11 01:21:03 -07002609 /* find where to start removing */
Ashish Sangwan6ae06ff2013-07-01 08:12:41 -04002610 ex = path[depth].p_ext;
2611 if (!ex)
2612 ex = EXT_LAST_EXTENT(eh);
Alex Tomasa86c6182006-10-11 01:21:03 -07002613
2614 ex_ee_block = le32_to_cpu(ex->ee_block);
Amit Aroraa2df2a62007-07-17 21:42:41 -04002615 ex_ee_len = ext4_ext_get_actual_len(ex);
Alex Tomasa86c6182006-10-11 01:21:03 -07002616
Eric Whitney9fe67142018-10-01 14:25:08 -04002617 trace_ext4_ext_rm_leaf(inode, start, ex, partial);
Aditya Kalid8990242011-09-09 19:18:51 -04002618
Alex Tomasa86c6182006-10-11 01:21:03 -07002619 while (ex >= EXT_FIRST_EXTENT(eh) &&
2620 ex_ee_block + ex_ee_len > start) {
Aneesh Kumar K.Va41f2072009-06-10 14:22:55 -04002621
Lukas Czerner556615d2014-04-20 23:45:47 -04002622 if (ext4_ext_is_unwritten(ex))
2623 unwritten = 1;
Aneesh Kumar K.Va41f2072009-06-10 14:22:55 -04002624 else
Lukas Czerner556615d2014-04-20 23:45:47 -04002625 unwritten = 0;
Aneesh Kumar K.Va41f2072009-06-10 14:22:55 -04002626
Ritesh Harjani70aa1552020-05-10 11:54:55 +05302627 ext_debug(inode, "remove ext %u:[%d]%d\n", ex_ee_block,
Lukas Czerner556615d2014-04-20 23:45:47 -04002628 unwritten, ex_ee_len);
Alex Tomasa86c6182006-10-11 01:21:03 -07002629 path[depth].p_ext = ex;
2630
2631 a = ex_ee_block > start ? ex_ee_block : start;
Allison Hendersond583fb82011-05-25 07:41:43 -04002632 b = ex_ee_block+ex_ee_len - 1 < end ?
2633 ex_ee_block+ex_ee_len - 1 : end;
Alex Tomasa86c6182006-10-11 01:21:03 -07002634
Ritesh Harjani70aa1552020-05-10 11:54:55 +05302635 ext_debug(inode, " border %u:%u\n", a, b);
Alex Tomasa86c6182006-10-11 01:21:03 -07002636
Allison Hendersond583fb82011-05-25 07:41:43 -04002637 /* If this extent is beyond the end of the hole, skip it */
Lukas Czerner5f95d212012-03-19 23:03:19 -04002638 if (end < ex_ee_block) {
Lukas Czernerd23142c2013-05-27 23:33:35 -04002639 /*
2640 * We're going to skip this extent and move to another,
Eric Whitneyf4226d92014-11-23 00:55:42 -05002641 * so note that its first cluster is in use to avoid
2642 * freeing it when removing blocks. Eventually, the
2643 * right edge of the truncated/punched region will
2644 * be just to the left.
Lukas Czernerd23142c2013-05-27 23:33:35 -04002645 */
Eric Whitneyf4226d92014-11-23 00:55:42 -05002646 if (sbi->s_cluster_ratio > 1) {
2647 pblk = ext4_ext_pblock(ex);
Eric Whitney9fe67142018-10-01 14:25:08 -04002648 partial->pclu = EXT4_B2C(sbi, pblk);
2649 partial->state = nofree;
Eric Whitneyf4226d92014-11-23 00:55:42 -05002650 }
Allison Hendersond583fb82011-05-25 07:41:43 -04002651 ex--;
2652 ex_ee_block = le32_to_cpu(ex->ee_block);
2653 ex_ee_len = ext4_ext_get_actual_len(ex);
2654 continue;
Dmitry Monakhov750c9c42011-10-25 05:35:05 -04002655 } else if (b != ex_ee_block + ex_ee_len - 1) {
Lukas Czernerdc1841d2012-03-19 23:07:43 -04002656 EXT4_ERROR_INODE(inode,
2657 "can not handle truncate %u:%u "
2658 "on extent %u:%u",
2659 start, end, ex_ee_block,
2660 ex_ee_block + ex_ee_len - 1);
Darrick J. Wong6a797d22015-10-17 16:16:04 -04002661 err = -EFSCORRUPTED;
Dmitry Monakhov750c9c42011-10-25 05:35:05 -04002662 goto out;
Alex Tomasa86c6182006-10-11 01:21:03 -07002663 } else if (a != ex_ee_block) {
2664 /* remove tail of the extent */
Dmitry Monakhov750c9c42011-10-25 05:35:05 -04002665 num = a - ex_ee_block;
Alex Tomasa86c6182006-10-11 01:21:03 -07002666 } else {
2667 /* remove whole extent: excellent! */
Alex Tomasa86c6182006-10-11 01:21:03 -07002668 num = 0;
Alex Tomasa86c6182006-10-11 01:21:03 -07002669 }
Theodore Ts'o34071da2008-08-01 21:59:19 -04002670 /*
2671 * 3 for leaf, sb, and inode plus 2 (bmap and group
2672 * descriptor) for each block group; assume two block
2673 * groups plus ex_ee_len/blocks_per_block_group for
2674 * the worst case
2675 */
2676 credits = 7 + 2*(ex_ee_len/EXT4_BLOCKS_PER_GROUP(inode->i_sb));
Alex Tomasa86c6182006-10-11 01:21:03 -07002677 if (ex == EXT_FIRST_EXTENT(eh)) {
2678 correct_index = 1;
2679 credits += (ext_depth(inode)) + 1;
2680 }
Dmitry Monakhov5aca07e2009-12-08 22:42:15 -05002681 credits += EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb);
Jan Kara83448bd2019-11-05 17:44:29 +01002682 /*
2683 * We may end up freeing some index blocks and data from the
2684 * punched range. Note that partial clusters are accounted for
2685 * by ext4_free_data_revoke_credits().
2686 */
2687 revoke_credits =
2688 ext4_free_metadata_revoke_credits(inode->i_sb,
2689 ext_depth(inode)) +
2690 ext4_free_data_revoke_credits(inode, b - a + 1);
Alex Tomasa86c6182006-10-11 01:21:03 -07002691
Jan Karaa4130362019-11-05 17:44:16 +01002692 err = ext4_datasem_ensure_credits(handle, inode, credits,
Jan Kara83448bd2019-11-05 17:44:29 +01002693 credits, revoke_credits);
Jan Karaa4130362019-11-05 17:44:16 +01002694 if (err) {
2695 if (err > 0)
2696 err = -EAGAIN;
Alex Tomasa86c6182006-10-11 01:21:03 -07002697 goto out;
Jan Karaa4130362019-11-05 17:44:16 +01002698 }
Alex Tomasa86c6182006-10-11 01:21:03 -07002699
2700 err = ext4_ext_get_access(handle, inode, path + depth);
2701 if (err)
2702 goto out;
2703
Eric Whitney9fe67142018-10-01 14:25:08 -04002704 err = ext4_remove_blocks(handle, inode, ex, partial, a, b);
Alex Tomasa86c6182006-10-11 01:21:03 -07002705 if (err)
2706 goto out;
2707
Dmitry Monakhov750c9c42011-10-25 05:35:05 -04002708 if (num == 0)
Randy Dunlapd0d856e2006-10-11 01:21:07 -07002709 /* this extent is removed; mark slot entirely unused */
Alex Tomasf65e6fb2006-10-11 01:21:05 -07002710 ext4_ext_store_pblock(ex, 0);
Alex Tomasa86c6182006-10-11 01:21:03 -07002711
Alex Tomasa86c6182006-10-11 01:21:03 -07002712 ex->ee_len = cpu_to_le16(num);
Amit Arora749269f2007-07-18 09:02:56 -04002713 /*
Lukas Czerner556615d2014-04-20 23:45:47 -04002714 * Do not mark unwritten if all the blocks in the
Amit Arora749269f2007-07-18 09:02:56 -04002715 * extent have been removed.
2716 */
Lukas Czerner556615d2014-04-20 23:45:47 -04002717 if (unwritten && num)
2718 ext4_ext_mark_unwritten(ex);
Allison Hendersond583fb82011-05-25 07:41:43 -04002719 /*
2720 * If the extent was completely released,
2721 * we need to remove it from the leaf
2722 */
2723 if (num == 0) {
Lukas Czernerf17722f2011-06-06 00:05:17 -04002724 if (end != EXT_MAX_BLOCKS - 1) {
Allison Hendersond583fb82011-05-25 07:41:43 -04002725 /*
2726 * For hole punching, we need to scoot all the
2727 * extents up when an extent is removed so that
2728 * we dont have blank extents in the middle
2729 */
2730 memmove(ex, ex+1, (EXT_LAST_EXTENT(eh) - ex) *
2731 sizeof(struct ext4_extent));
2732
2733 /* Now get rid of the one at the end */
2734 memset(EXT_LAST_EXTENT(eh), 0,
2735 sizeof(struct ext4_extent));
2736 }
2737 le16_add_cpu(&eh->eh_entries, -1);
Eric Whitney5bf43762014-11-23 00:58:11 -05002738 }
Allison Hendersond583fb82011-05-25 07:41:43 -04002739
Dmitry Monakhov750c9c42011-10-25 05:35:05 -04002740 err = ext4_ext_dirty(handle, inode, path + depth);
2741 if (err)
2742 goto out;
2743
Ritesh Harjani70aa1552020-05-10 11:54:55 +05302744 ext_debug(inode, "new extent: %u:%u:%llu\n", ex_ee_block, num,
Theodore Ts'obf89d162010-10-27 21:30:14 -04002745 ext4_ext_pblock(ex));
Alex Tomasa86c6182006-10-11 01:21:03 -07002746 ex--;
2747 ex_ee_block = le32_to_cpu(ex->ee_block);
Amit Aroraa2df2a62007-07-17 21:42:41 -04002748 ex_ee_len = ext4_ext_get_actual_len(ex);
Alex Tomasa86c6182006-10-11 01:21:03 -07002749 }
2750
2751 if (correct_index && eh->eh_entries)
2752 err = ext4_ext_correct_indexes(handle, inode, path);
2753
Theodore Ts'o0aa06002011-09-09 18:54:51 -04002754 /*
Eric Whitneyad6599a2014-04-01 19:49:30 -04002755 * If there's a partial cluster and at least one extent remains in
2756 * the leaf, free the partial cluster if it isn't shared with the
Eric Whitney5bf43762014-11-23 00:58:11 -05002757 * current extent. If it is shared with the current extent
Eric Whitney9fe67142018-10-01 14:25:08 -04002758 * we reset the partial cluster because we've reached the start of the
Eric Whitney5bf43762014-11-23 00:58:11 -05002759 * truncated/punched region and we're done removing blocks.
Theodore Ts'o0aa06002011-09-09 18:54:51 -04002760 */
Eric Whitney9fe67142018-10-01 14:25:08 -04002761 if (partial->state == tofree && ex >= EXT_FIRST_EXTENT(eh)) {
Eric Whitney5bf43762014-11-23 00:58:11 -05002762 pblk = ext4_ext_pblock(ex) + ex_ee_len - 1;
Eric Whitney9fe67142018-10-01 14:25:08 -04002763 if (partial->pclu != EXT4_B2C(sbi, pblk)) {
2764 int flags = get_default_free_blocks_flags(inode);
2765
2766 if (ext4_is_pending(inode, partial->lblk))
2767 flags |= EXT4_FREE_BLOCKS_RERESERVE_CLUSTER;
Eric Whitney5bf43762014-11-23 00:58:11 -05002768 ext4_free_blocks(handle, inode, NULL,
Eric Whitney9fe67142018-10-01 14:25:08 -04002769 EXT4_C2B(sbi, partial->pclu),
2770 sbi->s_cluster_ratio, flags);
2771 if (flags & EXT4_FREE_BLOCKS_RERESERVE_CLUSTER)
2772 ext4_rereserve_cluster(inode, partial->lblk);
Eric Whitney5bf43762014-11-23 00:58:11 -05002773 }
Eric Whitney9fe67142018-10-01 14:25:08 -04002774 partial->state = initial;
Theodore Ts'o0aa06002011-09-09 18:54:51 -04002775 }
2776
Alex Tomasa86c6182006-10-11 01:21:03 -07002777 /* if this leaf is free, then we should
2778 * remove it from index block above */
2779 if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL)
Forrest Liuc36575e2012-12-17 09:55:39 -05002780 err = ext4_ext_rm_idx(handle, inode, path, depth);
Alex Tomasa86c6182006-10-11 01:21:03 -07002781
2782out:
2783 return err;
2784}
2785
2786/*
Randy Dunlapd0d856e2006-10-11 01:21:07 -07002787 * ext4_ext_more_to_rm:
2788 * returns 1 if current index has to be freed (even partial)
Alex Tomasa86c6182006-10-11 01:21:03 -07002789 */
Avantika Mathur09b88252006-12-06 20:41:36 -08002790static int
Alex Tomasa86c6182006-10-11 01:21:03 -07002791ext4_ext_more_to_rm(struct ext4_ext_path *path)
2792{
2793 BUG_ON(path->p_idx == NULL);
2794
2795 if (path->p_idx < EXT_FIRST_INDEX(path->p_hdr))
2796 return 0;
2797
2798 /*
Randy Dunlapd0d856e2006-10-11 01:21:07 -07002799 * if truncate on deeper level happened, it wasn't partial,
Alex Tomasa86c6182006-10-11 01:21:03 -07002800 * so we have to consider current index for truncation
2801 */
2802 if (le16_to_cpu(path->p_hdr->eh_entries) == path->p_block)
2803 return 0;
2804 return 1;
2805}
2806
Theodore Ts'o26a4c0c2013-04-03 12:45:17 -04002807int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,
2808 ext4_lblk_t end)
Alex Tomasa86c6182006-10-11 01:21:03 -07002809{
Eric Whitneyf4226d92014-11-23 00:55:42 -05002810 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
Alex Tomasa86c6182006-10-11 01:21:03 -07002811 int depth = ext_depth(inode);
Ashish Sangwan968dee72012-07-22 22:49:08 -04002812 struct ext4_ext_path *path = NULL;
Eric Whitney9fe67142018-10-01 14:25:08 -04002813 struct partial_cluster partial;
Alex Tomasa86c6182006-10-11 01:21:03 -07002814 handle_t *handle;
Dmitry Monakhov6f2080e2012-09-30 23:03:50 -04002815 int i = 0, err = 0;
Alex Tomasa86c6182006-10-11 01:21:03 -07002816
Eric Whitney9fe67142018-10-01 14:25:08 -04002817 partial.pclu = 0;
2818 partial.lblk = 0;
2819 partial.state = initial;
2820
Ritesh Harjani70aa1552020-05-10 11:54:55 +05302821 ext_debug(inode, "truncate since %u to %u\n", start, end);
Alex Tomasa86c6182006-10-11 01:21:03 -07002822
2823 /* probably first extent we're gonna free will be last in block */
Jan Kara83448bd2019-11-05 17:44:29 +01002824 handle = ext4_journal_start_with_revoke(inode, EXT4_HT_TRUNCATE,
2825 depth + 1,
2826 ext4_free_metadata_revoke_credits(inode->i_sb, depth));
Alex Tomasa86c6182006-10-11 01:21:03 -07002827 if (IS_ERR(handle))
2828 return PTR_ERR(handle);
2829
Dmitry Monakhov0617b832010-05-17 01:00:00 -04002830again:
Lukas Czerner61801322013-05-27 23:32:35 -04002831 trace_ext4_ext_remove_space(inode, start, end, depth);
Aditya Kalid8990242011-09-09 19:18:51 -04002832
Alex Tomasa86c6182006-10-11 01:21:03 -07002833 /*
Lukas Czerner5f95d212012-03-19 23:03:19 -04002834 * Check if we are removing extents inside the extent tree. If that
2835 * is the case, we are going to punch a hole inside the extent tree
2836 * so we have to check whether we need to split the extent covering
2837 * the last block to remove so we can easily remove the part of it
2838 * in ext4_ext_rm_leaf().
2839 */
2840 if (end < EXT_MAX_BLOCKS - 1) {
2841 struct ext4_extent *ex;
Eric Whitneyf4226d92014-11-23 00:55:42 -05002842 ext4_lblk_t ee_block, ex_end, lblk;
2843 ext4_fsblk_t pblk;
Lukas Czerner5f95d212012-03-19 23:03:19 -04002844
Eric Whitneyf4226d92014-11-23 00:55:42 -05002845 /* find extent for or closest extent to this block */
Theodore Ts'o73c384c02020-05-07 10:50:28 -07002846 path = ext4_find_extent(inode, end, NULL,
2847 EXT4_EX_NOCACHE | EXT4_EX_NOFAIL);
Lukas Czerner5f95d212012-03-19 23:03:19 -04002848 if (IS_ERR(path)) {
2849 ext4_journal_stop(handle);
2850 return PTR_ERR(path);
2851 }
2852 depth = ext_depth(inode);
Dmitry Monakhov6f2080e2012-09-30 23:03:50 -04002853 /* Leaf not may not exist only if inode has no blocks at all */
Lukas Czerner5f95d212012-03-19 23:03:19 -04002854 ex = path[depth].p_ext;
Ashish Sangwan968dee72012-07-22 22:49:08 -04002855 if (!ex) {
Dmitry Monakhov6f2080e2012-09-30 23:03:50 -04002856 if (depth) {
2857 EXT4_ERROR_INODE(inode,
2858 "path[%d].p_hdr == NULL",
2859 depth);
Darrick J. Wong6a797d22015-10-17 16:16:04 -04002860 err = -EFSCORRUPTED;
Dmitry Monakhov6f2080e2012-09-30 23:03:50 -04002861 }
2862 goto out;
Ashish Sangwan968dee72012-07-22 22:49:08 -04002863 }
Lukas Czerner5f95d212012-03-19 23:03:19 -04002864
2865 ee_block = le32_to_cpu(ex->ee_block);
Eric Whitneyf4226d92014-11-23 00:55:42 -05002866 ex_end = ee_block + ext4_ext_get_actual_len(ex) - 1;
Lukas Czerner5f95d212012-03-19 23:03:19 -04002867
2868 /*
2869 * See if the last block is inside the extent, if so split
2870 * the extent at 'end' block so we can easily remove the
2871 * tail of the first part of the split extent in
2872 * ext4_ext_rm_leaf().
2873 */
Eric Whitneyf4226d92014-11-23 00:55:42 -05002874 if (end >= ee_block && end < ex_end) {
2875
2876 /*
2877 * If we're going to split the extent, note that
2878 * the cluster containing the block after 'end' is
2879 * in use to avoid freeing it when removing blocks.
2880 */
2881 if (sbi->s_cluster_ratio > 1) {
Jeffle Xucfb3c852020-05-22 12:18:44 +08002882 pblk = ext4_ext_pblock(ex) + end - ee_block + 1;
Eric Whitney9fe67142018-10-01 14:25:08 -04002883 partial.pclu = EXT4_B2C(sbi, pblk);
2884 partial.state = nofree;
Eric Whitneyf4226d92014-11-23 00:55:42 -05002885 }
2886
Lukas Czerner5f95d212012-03-19 23:03:19 -04002887 /*
2888 * Split the extent in two so that 'end' is the last
Lukas Czerner27dd4382013-04-09 22:11:22 -04002889 * block in the first new extent. Also we should not
2890 * fail removing space due to ENOSPC so try to use
2891 * reserved block if that happens.
Lukas Czerner5f95d212012-03-19 23:03:19 -04002892 */
Theodore Ts'odfe50802014-09-01 14:37:09 -04002893 err = ext4_force_split_extent_at(handle, inode, &path,
Dmitry Monakhovfcf6b1b72014-08-30 23:52:19 -04002894 end + 1, 1);
Lukas Czerner5f95d212012-03-19 23:03:19 -04002895 if (err < 0)
2896 goto out;
Eric Whitneyf4226d92014-11-23 00:55:42 -05002897
Eric Whitney7bd75232019-02-28 23:34:11 -05002898 } else if (sbi->s_cluster_ratio > 1 && end >= ex_end &&
2899 partial.state == initial) {
Eric Whitneyf4226d92014-11-23 00:55:42 -05002900 /*
Eric Whitney7bd75232019-02-28 23:34:11 -05002901 * If we're punching, there's an extent to the right.
2902 * If the partial cluster hasn't been set, set it to
2903 * that extent's first cluster and its state to nofree
2904 * so it won't be freed should it contain blocks to be
2905 * removed. If it's already set (tofree/nofree), we're
2906 * retrying and keep the original partial cluster info
2907 * so a cluster marked tofree as a result of earlier
2908 * extent removal is not lost.
Eric Whitneyf4226d92014-11-23 00:55:42 -05002909 */
2910 lblk = ex_end + 1;
2911 err = ext4_ext_search_right(inode, path, &lblk, &pblk,
yangerkund7dce9e2020-10-28 13:56:17 +08002912 NULL);
2913 if (err < 0)
Eric Whitneyf4226d92014-11-23 00:55:42 -05002914 goto out;
Eric Whitney9fe67142018-10-01 14:25:08 -04002915 if (pblk) {
2916 partial.pclu = EXT4_B2C(sbi, pblk);
2917 partial.state = nofree;
2918 }
Lukas Czerner5f95d212012-03-19 23:03:19 -04002919 }
Lukas Czerner5f95d212012-03-19 23:03:19 -04002920 }
Lukas Czerner5f95d212012-03-19 23:03:19 -04002921 /*
Randy Dunlapd0d856e2006-10-11 01:21:07 -07002922 * We start scanning from right side, freeing all the blocks
2923 * after i_size and walking into the tree depth-wise.
Alex Tomasa86c6182006-10-11 01:21:03 -07002924 */
Dmitry Monakhov0617b832010-05-17 01:00:00 -04002925 depth = ext_depth(inode);
Ashish Sangwan968dee72012-07-22 22:49:08 -04002926 if (path) {
2927 int k = i = depth;
2928 while (--k > 0)
2929 path[k].p_block =
2930 le16_to_cpu(path[k].p_hdr->eh_entries)+1;
2931 } else {
Kees Cook6396bb22018-06-12 14:03:40 -07002932 path = kcalloc(depth + 1, sizeof(struct ext4_ext_path),
Theodore Ts'o73c384c02020-05-07 10:50:28 -07002933 GFP_NOFS | __GFP_NOFAIL);
Ashish Sangwan968dee72012-07-22 22:49:08 -04002934 if (path == NULL) {
2935 ext4_journal_stop(handle);
2936 return -ENOMEM;
2937 }
Theodore Ts'o10809df82014-09-01 14:40:09 -04002938 path[0].p_maxdepth = path[0].p_depth = depth;
Ashish Sangwan968dee72012-07-22 22:49:08 -04002939 path[0].p_hdr = ext_inode_hdr(inode);
Theodore Ts'o89a4e482012-08-17 08:54:52 -04002940 i = 0;
Lukas Czerner5f95d212012-03-19 23:03:19 -04002941
Theodore Ts'oc3491792013-08-16 21:21:41 -04002942 if (ext4_ext_check(inode, path[0].p_hdr, depth, 0)) {
Darrick J. Wong6a797d22015-10-17 16:16:04 -04002943 err = -EFSCORRUPTED;
Ashish Sangwan968dee72012-07-22 22:49:08 -04002944 goto out;
2945 }
Alex Tomasa86c6182006-10-11 01:21:03 -07002946 }
Ashish Sangwan968dee72012-07-22 22:49:08 -04002947 err = 0;
Alex Tomasa86c6182006-10-11 01:21:03 -07002948
2949 while (i >= 0 && err == 0) {
2950 if (i == depth) {
2951 /* this is leaf block */
Allison Hendersond583fb82011-05-25 07:41:43 -04002952 err = ext4_ext_rm_leaf(handle, inode, path,
Eric Whitney9fe67142018-10-01 14:25:08 -04002953 &partial, start, end);
Randy Dunlapd0d856e2006-10-11 01:21:07 -07002954 /* root level has p_bh == NULL, brelse() eats this */
Alex Tomasa86c6182006-10-11 01:21:03 -07002955 brelse(path[i].p_bh);
2956 path[i].p_bh = NULL;
2957 i--;
2958 continue;
2959 }
2960
2961 /* this is index block */
2962 if (!path[i].p_hdr) {
Ritesh Harjani70aa1552020-05-10 11:54:55 +05302963 ext_debug(inode, "initialize header\n");
Alex Tomasa86c6182006-10-11 01:21:03 -07002964 path[i].p_hdr = ext_block_hdr(path[i].p_bh);
Alex Tomasa86c6182006-10-11 01:21:03 -07002965 }
2966
Alex Tomasa86c6182006-10-11 01:21:03 -07002967 if (!path[i].p_idx) {
Randy Dunlapd0d856e2006-10-11 01:21:07 -07002968 /* this level hasn't been touched yet */
Alex Tomasa86c6182006-10-11 01:21:03 -07002969 path[i].p_idx = EXT_LAST_INDEX(path[i].p_hdr);
2970 path[i].p_block = le16_to_cpu(path[i].p_hdr->eh_entries)+1;
Ritesh Harjani70aa1552020-05-10 11:54:55 +05302971 ext_debug(inode, "init index ptr: hdr 0x%p, num %d\n",
Alex Tomasa86c6182006-10-11 01:21:03 -07002972 path[i].p_hdr,
2973 le16_to_cpu(path[i].p_hdr->eh_entries));
2974 } else {
Randy Dunlapd0d856e2006-10-11 01:21:07 -07002975 /* we were already here, see at next index */
Alex Tomasa86c6182006-10-11 01:21:03 -07002976 path[i].p_idx--;
2977 }
2978
Ritesh Harjani70aa1552020-05-10 11:54:55 +05302979 ext_debug(inode, "level %d - index, first 0x%p, cur 0x%p\n",
Alex Tomasa86c6182006-10-11 01:21:03 -07002980 i, EXT_FIRST_INDEX(path[i].p_hdr),
2981 path[i].p_idx);
2982 if (ext4_ext_more_to_rm(path + i)) {
Alex Tomasc29c0ae2007-07-18 09:19:09 -04002983 struct buffer_head *bh;
Alex Tomasa86c6182006-10-11 01:21:03 -07002984 /* go to the next level */
Ritesh Harjani70aa1552020-05-10 11:54:55 +05302985 ext_debug(inode, "move to level %d (block %llu)\n",
Theodore Ts'obf89d162010-10-27 21:30:14 -04002986 i + 1, ext4_idx_pblock(path[i].p_idx));
Alex Tomasa86c6182006-10-11 01:21:03 -07002987 memset(path + i + 1, 0, sizeof(*path));
Zhang Yi9c6e0712021-09-08 20:08:49 +08002988 bh = read_extent_tree_block(inode, path[i].p_idx,
2989 depth - i - 1,
2990 EXT4_EX_NOCACHE);
Theodore Ts'o7d7ea892013-08-16 21:20:41 -04002991 if (IS_ERR(bh)) {
Alex Tomasa86c6182006-10-11 01:21:03 -07002992 /* should we reset i_size? */
Theodore Ts'o7d7ea892013-08-16 21:20:41 -04002993 err = PTR_ERR(bh);
Alex Tomasa86c6182006-10-11 01:21:03 -07002994 break;
2995 }
Theodore Ts'o76828c882013-07-15 12:27:47 -04002996 /* Yield here to deal with large extent trees.
2997 * Should be a no-op if we did IO above. */
2998 cond_resched();
Alex Tomasc29c0ae2007-07-18 09:19:09 -04002999 if (WARN_ON(i + 1 > depth)) {
Darrick J. Wong6a797d22015-10-17 16:16:04 -04003000 err = -EFSCORRUPTED;
Alex Tomasc29c0ae2007-07-18 09:19:09 -04003001 break;
3002 }
Alex Tomasc29c0ae2007-07-18 09:19:09 -04003003 path[i + 1].p_bh = bh;
Alex Tomasa86c6182006-10-11 01:21:03 -07003004
Randy Dunlapd0d856e2006-10-11 01:21:07 -07003005 /* save actual number of indexes since this
3006 * number is changed at the next iteration */
Alex Tomasa86c6182006-10-11 01:21:03 -07003007 path[i].p_block = le16_to_cpu(path[i].p_hdr->eh_entries);
3008 i++;
3009 } else {
Randy Dunlapd0d856e2006-10-11 01:21:07 -07003010 /* we finished processing this index, go up */
Alex Tomasa86c6182006-10-11 01:21:03 -07003011 if (path[i].p_hdr->eh_entries == 0 && i > 0) {
Randy Dunlapd0d856e2006-10-11 01:21:07 -07003012 /* index is empty, remove it;
Alex Tomasa86c6182006-10-11 01:21:03 -07003013 * handle must be already prepared by the
3014 * truncatei_leaf() */
Forrest Liuc36575e2012-12-17 09:55:39 -05003015 err = ext4_ext_rm_idx(handle, inode, path, i);
Alex Tomasa86c6182006-10-11 01:21:03 -07003016 }
Randy Dunlapd0d856e2006-10-11 01:21:07 -07003017 /* root level has p_bh == NULL, brelse() eats this */
Alex Tomasa86c6182006-10-11 01:21:03 -07003018 brelse(path[i].p_bh);
3019 path[i].p_bh = NULL;
3020 i--;
Ritesh Harjani70aa1552020-05-10 11:54:55 +05303021 ext_debug(inode, "return to level %d\n", i);
Alex Tomasa86c6182006-10-11 01:21:03 -07003022 }
3023 }
3024
Eric Whitney9fe67142018-10-01 14:25:08 -04003025 trace_ext4_ext_remove_space_done(inode, start, end, depth, &partial,
3026 path->p_hdr->eh_entries);
Aditya Kalid8990242011-09-09 19:18:51 -04003027
Eric Whitney0756b902014-11-23 00:59:39 -05003028 /*
Eric Whitney9fe67142018-10-01 14:25:08 -04003029 * if there's a partial cluster and we have removed the first extent
3030 * in the file, then we also free the partial cluster, if any
Eric Whitney0756b902014-11-23 00:59:39 -05003031 */
Eric Whitney9fe67142018-10-01 14:25:08 -04003032 if (partial.state == tofree && err == 0) {
3033 int flags = get_default_free_blocks_flags(inode);
3034
3035 if (ext4_is_pending(inode, partial.lblk))
3036 flags |= EXT4_FREE_BLOCKS_RERESERVE_CLUSTER;
Aditya Kali7b415bf2011-09-09 19:04:51 -04003037 ext4_free_blocks(handle, inode, NULL,
Eric Whitney9fe67142018-10-01 14:25:08 -04003038 EXT4_C2B(sbi, partial.pclu),
3039 sbi->s_cluster_ratio, flags);
3040 if (flags & EXT4_FREE_BLOCKS_RERESERVE_CLUSTER)
3041 ext4_rereserve_cluster(inode, partial.lblk);
3042 partial.state = initial;
Aditya Kali7b415bf2011-09-09 19:04:51 -04003043 }
3044
Alex Tomasa86c6182006-10-11 01:21:03 -07003045 /* TODO: flexible tree reduction should be here */
3046 if (path->p_hdr->eh_entries == 0) {
3047 /*
Randy Dunlapd0d856e2006-10-11 01:21:07 -07003048 * truncate to zero freed all the tree,
3049 * so we need to correct eh_depth
Alex Tomasa86c6182006-10-11 01:21:03 -07003050 */
3051 err = ext4_ext_get_access(handle, inode, path);
3052 if (err == 0) {
3053 ext_inode_hdr(inode)->eh_depth = 0;
3054 ext_inode_hdr(inode)->eh_max =
Theodore Ts'o55ad63b2009-08-28 10:40:33 -04003055 cpu_to_le16(ext4_ext_space_root(inode, 0));
Alex Tomasa86c6182006-10-11 01:21:03 -07003056 err = ext4_ext_dirty(handle, inode, path);
3057 }
3058 }
3059out:
Theodore Ts'ob7ea89a2014-09-01 14:39:09 -04003060 ext4_ext_drop_refs(path);
3061 kfree(path);
3062 path = NULL;
Theodore Ts'odfe50802014-09-01 14:37:09 -04003063 if (err == -EAGAIN)
3064 goto again;
Alex Tomasa86c6182006-10-11 01:21:03 -07003065 ext4_journal_stop(handle);
3066
3067 return err;
3068}
3069
3070/*
3071 * called at mount time
3072 */
3073void ext4_ext_init(struct super_block *sb)
3074{
3075 /*
3076 * possible initialization would be here
3077 */
3078
Darrick J. Wonge2b911c2015-10-17 16:18:43 -04003079 if (ext4_has_feature_extents(sb)) {
Theodore Ts'o90576c02009-09-29 15:51:30 -04003080#if defined(AGGRESSIVE_TEST) || defined(CHECK_BINSEARCH) || defined(EXTENTS_STATS)
Theodore Ts'o92b97812012-03-19 23:41:49 -04003081 printk(KERN_INFO "EXT4-fs: file extents enabled"
Robert P. J. Daybbf2f9f2007-02-17 19:20:16 +01003082#ifdef AGGRESSIVE_TEST
Theodore Ts'o92b97812012-03-19 23:41:49 -04003083 ", aggressive tests"
Alex Tomasa86c6182006-10-11 01:21:03 -07003084#endif
3085#ifdef CHECK_BINSEARCH
Theodore Ts'o92b97812012-03-19 23:41:49 -04003086 ", check binsearch"
Alex Tomasa86c6182006-10-11 01:21:03 -07003087#endif
3088#ifdef EXTENTS_STATS
Theodore Ts'o92b97812012-03-19 23:41:49 -04003089 ", stats"
Alex Tomasa86c6182006-10-11 01:21:03 -07003090#endif
Theodore Ts'o92b97812012-03-19 23:41:49 -04003091 "\n");
Theodore Ts'o90576c02009-09-29 15:51:30 -04003092#endif
Alex Tomasa86c6182006-10-11 01:21:03 -07003093#ifdef EXTENTS_STATS
3094 spin_lock_init(&EXT4_SB(sb)->s_ext_stats_lock);
3095 EXT4_SB(sb)->s_ext_min = 1 << 30;
3096 EXT4_SB(sb)->s_ext_max = 0;
3097#endif
3098 }
3099}
3100
3101/*
3102 * called at umount time
3103 */
3104void ext4_ext_release(struct super_block *sb)
3105{
Darrick J. Wonge2b911c2015-10-17 16:18:43 -04003106 if (!ext4_has_feature_extents(sb))
Alex Tomasa86c6182006-10-11 01:21:03 -07003107 return;
3108
3109#ifdef EXTENTS_STATS
3110 if (EXT4_SB(sb)->s_ext_blocks && EXT4_SB(sb)->s_ext_extents) {
3111 struct ext4_sb_info *sbi = EXT4_SB(sb);
3112 printk(KERN_ERR "EXT4-fs: %lu blocks in %lu extents (%lu ave)\n",
3113 sbi->s_ext_blocks, sbi->s_ext_extents,
3114 sbi->s_ext_blocks / sbi->s_ext_extents);
3115 printk(KERN_ERR "EXT4-fs: extents: %lu min, %lu max, max depth %lu\n",
3116 sbi->s_ext_min, sbi->s_ext_max, sbi->s_depth_max);
3117 }
3118#endif
3119}
3120
Zheng Liud7b2a002013-08-28 14:47:06 -04003121static int ext4_zeroout_es(struct inode *inode, struct ext4_extent *ex)
3122{
3123 ext4_lblk_t ee_block;
3124 ext4_fsblk_t ee_pblock;
3125 unsigned int ee_len;
3126
3127 ee_block = le32_to_cpu(ex->ee_block);
3128 ee_len = ext4_ext_get_actual_len(ex);
3129 ee_pblock = ext4_ext_pblock(ex);
3130
3131 if (ee_len == 0)
3132 return 0;
3133
3134 return ext4_es_insert_extent(inode, ee_block, ee_len, ee_pblock,
3135 EXTENT_STATUS_WRITTEN);
3136}
3137
Aneesh Kumar K.V093a0882008-04-29 08:11:12 -04003138/* FIXME!! we need to try to merge to left or right after zero-out */
3139static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
3140{
Lukas Czerner24075182010-10-27 21:30:06 -04003141 ext4_fsblk_t ee_pblock;
3142 unsigned int ee_len;
Aneesh Kumar K.V093a0882008-04-29 08:11:12 -04003143
Aneesh Kumar K.V093a0882008-04-29 08:11:12 -04003144 ee_len = ext4_ext_get_actual_len(ex);
Theodore Ts'obf89d162010-10-27 21:30:14 -04003145 ee_pblock = ext4_ext_pblock(ex);
Jan Kara53085fa2015-12-07 15:09:35 -05003146 return ext4_issue_zeroout(inode, le32_to_cpu(ex->ee_block), ee_pblock,
3147 ee_len);
Aneesh Kumar K.V093a0882008-04-29 08:11:12 -04003148}
3149
Yongqiang Yang47ea3bb2011-05-03 12:23:07 -04003150/*
Yongqiang Yang47ea3bb2011-05-03 12:23:07 -04003151 * ext4_split_extent_at() splits an extent at given block.
3152 *
3153 * @handle: the journal handle
3154 * @inode: the file inode
3155 * @path: the path to the extent
3156 * @split: the logical block where the extent is splitted.
3157 * @split_flags: indicates if the extent could be zeroout if split fails, and
Lukas Czerner556615d2014-04-20 23:45:47 -04003158 * the states(init or unwritten) of new extents.
Yongqiang Yang47ea3bb2011-05-03 12:23:07 -04003159 * @flags: flags used to insert new extent to extent tree.
3160 *
3161 *
3162 * Splits extent [a, b] into two extents [a, @split) and [@split, b], states
Keyur Patele4d7f2d2020-06-10 23:19:46 -04003163 * of which are determined by split_flag.
Yongqiang Yang47ea3bb2011-05-03 12:23:07 -04003164 *
3165 * There are two cases:
3166 * a> the extent are splitted into two extent.
3167 * b> split is not needed, and just mark the extent.
3168 *
3169 * return 0 on success.
3170 */
3171static int ext4_split_extent_at(handle_t *handle,
3172 struct inode *inode,
Theodore Ts'odfe50802014-09-01 14:37:09 -04003173 struct ext4_ext_path **ppath,
Yongqiang Yang47ea3bb2011-05-03 12:23:07 -04003174 ext4_lblk_t split,
3175 int split_flag,
3176 int flags)
3177{
Theodore Ts'odfe50802014-09-01 14:37:09 -04003178 struct ext4_ext_path *path = *ppath;
Yongqiang Yang47ea3bb2011-05-03 12:23:07 -04003179 ext4_fsblk_t newblock;
3180 ext4_lblk_t ee_block;
Zheng Liuadb23552013-03-10 21:13:05 -04003181 struct ext4_extent *ex, newex, orig_ex, zero_ex;
Yongqiang Yang47ea3bb2011-05-03 12:23:07 -04003182 struct ext4_extent *ex2 = NULL;
3183 unsigned int ee_len, depth;
3184 int err = 0;
3185
Dmitry Monakhovdee1f972012-10-10 01:04:58 -04003186 BUG_ON((split_flag & (EXT4_EXT_DATA_VALID1 | EXT4_EXT_DATA_VALID2)) ==
3187 (EXT4_EXT_DATA_VALID1 | EXT4_EXT_DATA_VALID2));
3188
Ritesh Harjani70aa1552020-05-10 11:54:55 +05303189 ext_debug(inode, "logical block %llu\n", (unsigned long long)split);
Yongqiang Yang47ea3bb2011-05-03 12:23:07 -04003190
3191 ext4_ext_show_leaf(inode, path);
3192
3193 depth = ext_depth(inode);
3194 ex = path[depth].p_ext;
3195 ee_block = le32_to_cpu(ex->ee_block);
3196 ee_len = ext4_ext_get_actual_len(ex);
3197 newblock = split - ee_block + ext4_ext_pblock(ex);
3198
3199 BUG_ON(split < ee_block || split >= (ee_block + ee_len));
Lukas Czerner556615d2014-04-20 23:45:47 -04003200 BUG_ON(!ext4_ext_is_unwritten(ex) &&
Dmitry Monakhov357b66f2013-03-04 00:34:34 -05003201 split_flag & (EXT4_EXT_MAY_ZEROOUT |
Lukas Czerner556615d2014-04-20 23:45:47 -04003202 EXT4_EXT_MARK_UNWRIT1 |
3203 EXT4_EXT_MARK_UNWRIT2));
Yongqiang Yang47ea3bb2011-05-03 12:23:07 -04003204
3205 err = ext4_ext_get_access(handle, inode, path + depth);
3206 if (err)
3207 goto out;
3208
3209 if (split == ee_block) {
3210 /*
3211 * case b: block @split is the block that the extent begins with
3212 * then we just change the state of the extent, and splitting
3213 * is not needed.
3214 */
Lukas Czerner556615d2014-04-20 23:45:47 -04003215 if (split_flag & EXT4_EXT_MARK_UNWRIT2)
3216 ext4_ext_mark_unwritten(ex);
Yongqiang Yang47ea3bb2011-05-03 12:23:07 -04003217 else
3218 ext4_ext_mark_initialized(ex);
3219
3220 if (!(flags & EXT4_GET_BLOCKS_PRE_IO))
Theodore Ts'oecb94f52012-08-17 09:44:17 -04003221 ext4_ext_try_to_merge(handle, inode, path, ex);
Yongqiang Yang47ea3bb2011-05-03 12:23:07 -04003222
Theodore Ts'oecb94f52012-08-17 09:44:17 -04003223 err = ext4_ext_dirty(handle, inode, path + path->p_depth);
Yongqiang Yang47ea3bb2011-05-03 12:23:07 -04003224 goto out;
3225 }
3226
3227 /* case a */
3228 memcpy(&orig_ex, ex, sizeof(orig_ex));
3229 ex->ee_len = cpu_to_le16(split - ee_block);
Lukas Czerner556615d2014-04-20 23:45:47 -04003230 if (split_flag & EXT4_EXT_MARK_UNWRIT1)
3231 ext4_ext_mark_unwritten(ex);
Yongqiang Yang47ea3bb2011-05-03 12:23:07 -04003232
3233 /*
3234 * path may lead to new leaf, not to original leaf any more
3235 * after ext4_ext_insert_extent() returns,
3236 */
3237 err = ext4_ext_dirty(handle, inode, path + depth);
3238 if (err)
3239 goto fix_extent_len;
3240
3241 ex2 = &newex;
3242 ex2->ee_block = cpu_to_le32(split);
3243 ex2->ee_len = cpu_to_le16(ee_len - (split - ee_block));
3244 ext4_ext_store_pblock(ex2, newblock);
Lukas Czerner556615d2014-04-20 23:45:47 -04003245 if (split_flag & EXT4_EXT_MARK_UNWRIT2)
3246 ext4_ext_mark_unwritten(ex2);
Yongqiang Yang47ea3bb2011-05-03 12:23:07 -04003247
Theodore Ts'odfe50802014-09-01 14:37:09 -04003248 err = ext4_ext_insert_extent(handle, inode, ppath, &newex, flags);
Ye Bin082cd4e2021-05-06 22:10:42 +08003249 if (err != -ENOSPC && err != -EDQUOT)
3250 goto out;
3251
3252 if (EXT4_EXT_MAY_ZEROOUT & split_flag) {
Dmitry Monakhovdee1f972012-10-10 01:04:58 -04003253 if (split_flag & (EXT4_EXT_DATA_VALID1|EXT4_EXT_DATA_VALID2)) {
Zheng Liuadb23552013-03-10 21:13:05 -04003254 if (split_flag & EXT4_EXT_DATA_VALID1) {
Dmitry Monakhovdee1f972012-10-10 01:04:58 -04003255 err = ext4_ext_zeroout(inode, ex2);
Zheng Liuadb23552013-03-10 21:13:05 -04003256 zero_ex.ee_block = ex2->ee_block;
Zheng Liu8cde7ad2013-04-03 12:27:18 -04003257 zero_ex.ee_len = cpu_to_le16(
3258 ext4_ext_get_actual_len(ex2));
Zheng Liuadb23552013-03-10 21:13:05 -04003259 ext4_ext_store_pblock(&zero_ex,
3260 ext4_ext_pblock(ex2));
3261 } else {
Dmitry Monakhovdee1f972012-10-10 01:04:58 -04003262 err = ext4_ext_zeroout(inode, ex);
Zheng Liuadb23552013-03-10 21:13:05 -04003263 zero_ex.ee_block = ex->ee_block;
Zheng Liu8cde7ad2013-04-03 12:27:18 -04003264 zero_ex.ee_len = cpu_to_le16(
3265 ext4_ext_get_actual_len(ex));
Zheng Liuadb23552013-03-10 21:13:05 -04003266 ext4_ext_store_pblock(&zero_ex,
3267 ext4_ext_pblock(ex));
3268 }
3269 } else {
Dmitry Monakhovdee1f972012-10-10 01:04:58 -04003270 err = ext4_ext_zeroout(inode, &orig_ex);
Zheng Liuadb23552013-03-10 21:13:05 -04003271 zero_ex.ee_block = orig_ex.ee_block;
Zheng Liu8cde7ad2013-04-03 12:27:18 -04003272 zero_ex.ee_len = cpu_to_le16(
3273 ext4_ext_get_actual_len(&orig_ex));
Zheng Liuadb23552013-03-10 21:13:05 -04003274 ext4_ext_store_pblock(&zero_ex,
3275 ext4_ext_pblock(&orig_ex));
3276 }
Dmitry Monakhovdee1f972012-10-10 01:04:58 -04003277
Ye Bin082cd4e2021-05-06 22:10:42 +08003278 if (!err) {
3279 /* update the extent length and mark as initialized */
3280 ex->ee_len = cpu_to_le16(ee_len);
3281 ext4_ext_try_to_merge(handle, inode, path, ex);
3282 err = ext4_ext_dirty(handle, inode, path + path->p_depth);
3283 if (!err)
3284 /* update extent status tree */
3285 err = ext4_zeroout_es(inode, &zero_ex);
3286 /* If we failed at this point, we don't know in which
3287 * state the extent tree exactly is so don't try to fix
3288 * length of the original extent as it may do even more
3289 * damage.
3290 */
3291 goto out;
3292 }
3293 }
Yongqiang Yang47ea3bb2011-05-03 12:23:07 -04003294
3295fix_extent_len:
3296 ex->ee_len = orig_ex.ee_len;
Harshad Shirwadkarb60ca332020-04-26 18:34:38 -07003297 /*
3298 * Ignore ext4_ext_dirty return value since we are already in error path
3299 * and err is a non-zero error code.
3300 */
Dmitry Monakhov29faed12014-07-27 22:30:29 -04003301 ext4_ext_dirty(handle, inode, path + path->p_depth);
Yongqiang Yang47ea3bb2011-05-03 12:23:07 -04003302 return err;
Ye Bin082cd4e2021-05-06 22:10:42 +08003303out:
3304 ext4_ext_show_leaf(inode, path);
3305 return err;
Yongqiang Yang47ea3bb2011-05-03 12:23:07 -04003306}
3307
3308/*
3309 * ext4_split_extents() splits an extent and mark extent which is covered
3310 * by @map as split_flags indicates
3311 *
Anatol Pomozov70261f52013-08-28 14:40:12 -04003312 * It may result in splitting the extent into multiple extents (up to three)
Yongqiang Yang47ea3bb2011-05-03 12:23:07 -04003313 * There are three possibilities:
3314 * a> There is no split required
3315 * b> Splits in two extents: Split is happening at either end of the extent
3316 * c> Splits in three extents: Somone is splitting in middle of the extent
3317 *
3318 */
3319static int ext4_split_extent(handle_t *handle,
3320 struct inode *inode,
Theodore Ts'odfe50802014-09-01 14:37:09 -04003321 struct ext4_ext_path **ppath,
Yongqiang Yang47ea3bb2011-05-03 12:23:07 -04003322 struct ext4_map_blocks *map,
3323 int split_flag,
3324 int flags)
3325{
Theodore Ts'odfe50802014-09-01 14:37:09 -04003326 struct ext4_ext_path *path = *ppath;
Yongqiang Yang47ea3bb2011-05-03 12:23:07 -04003327 ext4_lblk_t ee_block;
3328 struct ext4_extent *ex;
3329 unsigned int ee_len, depth;
3330 int err = 0;
Lukas Czerner556615d2014-04-20 23:45:47 -04003331 int unwritten;
Yongqiang Yang47ea3bb2011-05-03 12:23:07 -04003332 int split_flag1, flags1;
Zheng Liu3a225672013-03-10 21:20:23 -04003333 int allocated = map->m_len;
Yongqiang Yang47ea3bb2011-05-03 12:23:07 -04003334
3335 depth = ext_depth(inode);
3336 ex = path[depth].p_ext;
3337 ee_block = le32_to_cpu(ex->ee_block);
3338 ee_len = ext4_ext_get_actual_len(ex);
Lukas Czerner556615d2014-04-20 23:45:47 -04003339 unwritten = ext4_ext_is_unwritten(ex);
Yongqiang Yang47ea3bb2011-05-03 12:23:07 -04003340
3341 if (map->m_lblk + map->m_len < ee_block + ee_len) {
Dmitry Monakhovdee1f972012-10-10 01:04:58 -04003342 split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT;
Yongqiang Yang47ea3bb2011-05-03 12:23:07 -04003343 flags1 = flags | EXT4_GET_BLOCKS_PRE_IO;
Lukas Czerner556615d2014-04-20 23:45:47 -04003344 if (unwritten)
3345 split_flag1 |= EXT4_EXT_MARK_UNWRIT1 |
3346 EXT4_EXT_MARK_UNWRIT2;
Dmitry Monakhovdee1f972012-10-10 01:04:58 -04003347 if (split_flag & EXT4_EXT_DATA_VALID2)
3348 split_flag1 |= EXT4_EXT_DATA_VALID1;
Theodore Ts'odfe50802014-09-01 14:37:09 -04003349 err = ext4_split_extent_at(handle, inode, ppath,
Yongqiang Yang47ea3bb2011-05-03 12:23:07 -04003350 map->m_lblk + map->m_len, split_flag1, flags1);
Yongqiang Yang93917412011-05-22 20:49:12 -04003351 if (err)
3352 goto out;
Zheng Liu3a225672013-03-10 21:20:23 -04003353 } else {
3354 allocated = ee_len - (map->m_lblk - ee_block);
Yongqiang Yang47ea3bb2011-05-03 12:23:07 -04003355 }
Dmitry Monakhov357b66f2013-03-04 00:34:34 -05003356 /*
3357 * Update path is required because previous ext4_split_extent_at() may
3358 * result in split of original leaf or extent zeroout.
3359 */
Theodore Ts'o73c384c02020-05-07 10:50:28 -07003360 path = ext4_find_extent(inode, map->m_lblk, ppath, flags);
Yongqiang Yang47ea3bb2011-05-03 12:23:07 -04003361 if (IS_ERR(path))
3362 return PTR_ERR(path);
Dmitry Monakhov357b66f2013-03-04 00:34:34 -05003363 depth = ext_depth(inode);
3364 ex = path[depth].p_ext;
Dmitry Monakhova18ed352014-04-13 15:41:13 -04003365 if (!ex) {
3366 EXT4_ERROR_INODE(inode, "unexpected hole at %lu",
3367 (unsigned long) map->m_lblk);
Darrick J. Wong6a797d22015-10-17 16:16:04 -04003368 return -EFSCORRUPTED;
Dmitry Monakhova18ed352014-04-13 15:41:13 -04003369 }
Lukas Czerner556615d2014-04-20 23:45:47 -04003370 unwritten = ext4_ext_is_unwritten(ex);
Dmitry Monakhov357b66f2013-03-04 00:34:34 -05003371 split_flag1 = 0;
Yongqiang Yang47ea3bb2011-05-03 12:23:07 -04003372
3373 if (map->m_lblk >= ee_block) {
Dmitry Monakhov357b66f2013-03-04 00:34:34 -05003374 split_flag1 = split_flag & EXT4_EXT_DATA_VALID2;
Lukas Czerner556615d2014-04-20 23:45:47 -04003375 if (unwritten) {
3376 split_flag1 |= EXT4_EXT_MARK_UNWRIT1;
Dmitry Monakhov357b66f2013-03-04 00:34:34 -05003377 split_flag1 |= split_flag & (EXT4_EXT_MAY_ZEROOUT |
Lukas Czerner556615d2014-04-20 23:45:47 -04003378 EXT4_EXT_MARK_UNWRIT2);
Dmitry Monakhov357b66f2013-03-04 00:34:34 -05003379 }
Theodore Ts'odfe50802014-09-01 14:37:09 -04003380 err = ext4_split_extent_at(handle, inode, ppath,
Yongqiang Yang47ea3bb2011-05-03 12:23:07 -04003381 map->m_lblk, split_flag1, flags);
3382 if (err)
3383 goto out;
3384 }
3385
3386 ext4_ext_show_leaf(inode, path);
3387out:
Zheng Liu3a225672013-03-10 21:20:23 -04003388 return err ? err : allocated;
Yongqiang Yang47ea3bb2011-05-03 12:23:07 -04003389}
3390
Amit Arora56055d32007-07-17 21:42:38 -04003391/*
Theodore Ts'oe35fd662010-05-16 19:00:00 -04003392 * This function is called by ext4_ext_map_blocks() if someone tries to write
Lukas Czerner556615d2014-04-20 23:45:47 -04003393 * to an unwritten extent. It may result in splitting the unwritten
Lucas De Marchi25985ed2011-03-30 22:57:33 -03003394 * extent into multiple extents (up to three - one initialized and two
Lukas Czerner556615d2014-04-20 23:45:47 -04003395 * unwritten).
Amit Arora56055d32007-07-17 21:42:38 -04003396 * There are three possibilities:
3397 * a> There is no split required: Entire extent should be initialized
3398 * b> Splits in two extents: Write is happening at either end of the extent
3399 * c> Splits in three extents: Somone is writing in middle of the extent
Eric Gouriou6f91bc52011-10-27 11:43:23 -04003400 *
3401 * Pre-conditions:
Lukas Czerner556615d2014-04-20 23:45:47 -04003402 * - The extent pointed to by 'path' is unwritten.
Eric Gouriou6f91bc52011-10-27 11:43:23 -04003403 * - The extent pointed to by 'path' contains a superset
3404 * of the logical span [map->m_lblk, map->m_lblk + map->m_len).
3405 *
3406 * Post-conditions on success:
3407 * - the returned value is the number of blocks beyond map->l_lblk
3408 * that are allocated and initialized.
3409 * It is guaranteed to be >= map->m_len.
Amit Arora56055d32007-07-17 21:42:38 -04003410 */
Aneesh Kumar K.V725d26d2008-01-28 23:58:27 -05003411static int ext4_ext_convert_to_initialized(handle_t *handle,
Theodore Ts'oe35fd662010-05-16 19:00:00 -04003412 struct inode *inode,
3413 struct ext4_map_blocks *map,
Theodore Ts'odfe50802014-09-01 14:37:09 -04003414 struct ext4_ext_path **ppath,
Lukas Czerner27dd4382013-04-09 22:11:22 -04003415 int flags)
Amit Arora56055d32007-07-17 21:42:38 -04003416{
Theodore Ts'odfe50802014-09-01 14:37:09 -04003417 struct ext4_ext_path *path = *ppath;
Zheng Liu67a5da52012-08-17 09:54:17 -04003418 struct ext4_sb_info *sbi;
Eric Gouriou6f91bc52011-10-27 11:43:23 -04003419 struct ext4_extent_header *eh;
Yongqiang Yang667eff32011-05-03 12:25:07 -04003420 struct ext4_map_blocks split_map;
Jan Kara4f8caa62017-05-26 17:40:52 -04003421 struct ext4_extent zero_ex1, zero_ex2;
Lukas Czernerbc2d9db2013-04-03 23:33:27 -04003422 struct ext4_extent *ex, *abut_ex;
Dmitry Monakhov21ca0872010-05-16 06:00:00 -04003423 ext4_lblk_t ee_block, eof_block;
Lukas Czernerbc2d9db2013-04-03 23:33:27 -04003424 unsigned int ee_len, depth, map_len = map->m_len;
3425 int allocated = 0, max_zeroout = 0;
Amit Arora56055d32007-07-17 21:42:38 -04003426 int err = 0;
Jan Kara4f8caa62017-05-26 17:40:52 -04003427 int split_flag = EXT4_EXT_DATA_VALID2;
Dmitry Monakhov21ca0872010-05-16 06:00:00 -04003428
Ritesh Harjani70aa1552020-05-10 11:54:55 +05303429 ext_debug(inode, "logical block %llu, max_blocks %u\n",
3430 (unsigned long long)map->m_lblk, map_len);
Dmitry Monakhov21ca0872010-05-16 06:00:00 -04003431
Zheng Liu67a5da52012-08-17 09:54:17 -04003432 sbi = EXT4_SB(inode->i_sb);
Jan Kara801674f2020-03-31 12:50:16 +02003433 eof_block = (EXT4_I(inode)->i_disksize + inode->i_sb->s_blocksize - 1)
3434 >> inode->i_sb->s_blocksize_bits;
Lukas Czernerbc2d9db2013-04-03 23:33:27 -04003435 if (eof_block < map->m_lblk + map_len)
3436 eof_block = map->m_lblk + map_len;
Amit Arora56055d32007-07-17 21:42:38 -04003437
3438 depth = ext_depth(inode);
Eric Gouriou6f91bc52011-10-27 11:43:23 -04003439 eh = path[depth].p_hdr;
Amit Arora56055d32007-07-17 21:42:38 -04003440 ex = path[depth].p_ext;
3441 ee_block = le32_to_cpu(ex->ee_block);
3442 ee_len = ext4_ext_get_actual_len(ex);
Jan Kara4f8caa62017-05-26 17:40:52 -04003443 zero_ex1.ee_len = 0;
3444 zero_ex2.ee_len = 0;
Dmitry Monakhov21ca0872010-05-16 06:00:00 -04003445
Eric Gouriou6f91bc52011-10-27 11:43:23 -04003446 trace_ext4_ext_convert_to_initialized_enter(inode, map, ex);
3447
3448 /* Pre-conditions */
Lukas Czerner556615d2014-04-20 23:45:47 -04003449 BUG_ON(!ext4_ext_is_unwritten(ex));
Eric Gouriou6f91bc52011-10-27 11:43:23 -04003450 BUG_ON(!in_range(map->m_lblk, ee_block, ee_len));
Eric Gouriou6f91bc52011-10-27 11:43:23 -04003451
3452 /*
3453 * Attempt to transfer newly initialized blocks from the currently
Lukas Czerner556615d2014-04-20 23:45:47 -04003454 * unwritten extent to its neighbor. This is much cheaper
Eric Gouriou6f91bc52011-10-27 11:43:23 -04003455 * than an insertion followed by a merge as those involve costly
Lukas Czernerbc2d9db2013-04-03 23:33:27 -04003456 * memmove() calls. Transferring to the left is the common case in
3457 * steady state for workloads doing fallocate(FALLOC_FL_KEEP_SIZE)
3458 * followed by append writes.
Eric Gouriou6f91bc52011-10-27 11:43:23 -04003459 *
3460 * Limitations of the current logic:
Lukas Czernerbc2d9db2013-04-03 23:33:27 -04003461 * - L1: we do not deal with writes covering the whole extent.
Eric Gouriou6f91bc52011-10-27 11:43:23 -04003462 * This would require removing the extent if the transfer
3463 * is possible.
Lukas Czernerbc2d9db2013-04-03 23:33:27 -04003464 * - L2: we only attempt to merge with an extent stored in the
Eric Gouriou6f91bc52011-10-27 11:43:23 -04003465 * same extent tree node.
3466 */
Lukas Czernerbc2d9db2013-04-03 23:33:27 -04003467 if ((map->m_lblk == ee_block) &&
3468 /* See if we can merge left */
3469 (map_len < ee_len) && /*L1*/
3470 (ex > EXT_FIRST_EXTENT(eh))) { /*L2*/
Eric Gouriou6f91bc52011-10-27 11:43:23 -04003471 ext4_lblk_t prev_lblk;
3472 ext4_fsblk_t prev_pblk, ee_pblk;
Lukas Czernerbc2d9db2013-04-03 23:33:27 -04003473 unsigned int prev_len;
Eric Gouriou6f91bc52011-10-27 11:43:23 -04003474
Lukas Czernerbc2d9db2013-04-03 23:33:27 -04003475 abut_ex = ex - 1;
3476 prev_lblk = le32_to_cpu(abut_ex->ee_block);
3477 prev_len = ext4_ext_get_actual_len(abut_ex);
3478 prev_pblk = ext4_ext_pblock(abut_ex);
Eric Gouriou6f91bc52011-10-27 11:43:23 -04003479 ee_pblk = ext4_ext_pblock(ex);
Eric Gouriou6f91bc52011-10-27 11:43:23 -04003480
3481 /*
Lukas Czernerbc2d9db2013-04-03 23:33:27 -04003482 * A transfer of blocks from 'ex' to 'abut_ex' is allowed
Eric Gouriou6f91bc52011-10-27 11:43:23 -04003483 * upon those conditions:
Lukas Czernerbc2d9db2013-04-03 23:33:27 -04003484 * - C1: abut_ex is initialized,
3485 * - C2: abut_ex is logically abutting ex,
3486 * - C3: abut_ex is physically abutting ex,
3487 * - C4: abut_ex can receive the additional blocks without
Eric Gouriou6f91bc52011-10-27 11:43:23 -04003488 * overflowing the (initialized) length limit.
3489 */
Lukas Czerner556615d2014-04-20 23:45:47 -04003490 if ((!ext4_ext_is_unwritten(abut_ex)) && /*C1*/
Eric Gouriou6f91bc52011-10-27 11:43:23 -04003491 ((prev_lblk + prev_len) == ee_block) && /*C2*/
3492 ((prev_pblk + prev_len) == ee_pblk) && /*C3*/
Lukas Czernerbc2d9db2013-04-03 23:33:27 -04003493 (prev_len < (EXT_INIT_MAX_LEN - map_len))) { /*C4*/
Eric Gouriou6f91bc52011-10-27 11:43:23 -04003494 err = ext4_ext_get_access(handle, inode, path + depth);
3495 if (err)
3496 goto out;
3497
3498 trace_ext4_ext_convert_to_initialized_fastpath(inode,
Lukas Czernerbc2d9db2013-04-03 23:33:27 -04003499 map, ex, abut_ex);
Eric Gouriou6f91bc52011-10-27 11:43:23 -04003500
Lukas Czernerbc2d9db2013-04-03 23:33:27 -04003501 /* Shift the start of ex by 'map_len' blocks */
3502 ex->ee_block = cpu_to_le32(ee_block + map_len);
3503 ext4_ext_store_pblock(ex, ee_pblk + map_len);
3504 ex->ee_len = cpu_to_le16(ee_len - map_len);
Lukas Czerner556615d2014-04-20 23:45:47 -04003505 ext4_ext_mark_unwritten(ex); /* Restore the flag */
Eric Gouriou6f91bc52011-10-27 11:43:23 -04003506
Lukas Czernerbc2d9db2013-04-03 23:33:27 -04003507 /* Extend abut_ex by 'map_len' blocks */
3508 abut_ex->ee_len = cpu_to_le16(prev_len + map_len);
Eric Gouriou6f91bc52011-10-27 11:43:23 -04003509
3510 /* Result: number of initialized blocks past m_lblk */
Lukas Czernerbc2d9db2013-04-03 23:33:27 -04003511 allocated = map_len;
3512 }
3513 } else if (((map->m_lblk + map_len) == (ee_block + ee_len)) &&
3514 (map_len < ee_len) && /*L1*/
3515 ex < EXT_LAST_EXTENT(eh)) { /*L2*/
3516 /* See if we can merge right */
3517 ext4_lblk_t next_lblk;
3518 ext4_fsblk_t next_pblk, ee_pblk;
3519 unsigned int next_len;
3520
3521 abut_ex = ex + 1;
3522 next_lblk = le32_to_cpu(abut_ex->ee_block);
3523 next_len = ext4_ext_get_actual_len(abut_ex);
3524 next_pblk = ext4_ext_pblock(abut_ex);
3525 ee_pblk = ext4_ext_pblock(ex);
3526
3527 /*
3528 * A transfer of blocks from 'ex' to 'abut_ex' is allowed
3529 * upon those conditions:
3530 * - C1: abut_ex is initialized,
3531 * - C2: abut_ex is logically abutting ex,
3532 * - C3: abut_ex is physically abutting ex,
3533 * - C4: abut_ex can receive the additional blocks without
3534 * overflowing the (initialized) length limit.
3535 */
Lukas Czerner556615d2014-04-20 23:45:47 -04003536 if ((!ext4_ext_is_unwritten(abut_ex)) && /*C1*/
Lukas Czernerbc2d9db2013-04-03 23:33:27 -04003537 ((map->m_lblk + map_len) == next_lblk) && /*C2*/
3538 ((ee_pblk + ee_len) == next_pblk) && /*C3*/
3539 (next_len < (EXT_INIT_MAX_LEN - map_len))) { /*C4*/
3540 err = ext4_ext_get_access(handle, inode, path + depth);
3541 if (err)
3542 goto out;
3543
3544 trace_ext4_ext_convert_to_initialized_fastpath(inode,
3545 map, ex, abut_ex);
3546
3547 /* Shift the start of abut_ex by 'map_len' blocks */
3548 abut_ex->ee_block = cpu_to_le32(next_lblk - map_len);
3549 ext4_ext_store_pblock(abut_ex, next_pblk - map_len);
3550 ex->ee_len = cpu_to_le16(ee_len - map_len);
Lukas Czerner556615d2014-04-20 23:45:47 -04003551 ext4_ext_mark_unwritten(ex); /* Restore the flag */
Lukas Czernerbc2d9db2013-04-03 23:33:27 -04003552
3553 /* Extend abut_ex by 'map_len' blocks */
3554 abut_ex->ee_len = cpu_to_le16(next_len + map_len);
3555
3556 /* Result: number of initialized blocks past m_lblk */
3557 allocated = map_len;
Eric Gouriou6f91bc52011-10-27 11:43:23 -04003558 }
3559 }
Lukas Czernerbc2d9db2013-04-03 23:33:27 -04003560 if (allocated) {
3561 /* Mark the block containing both extents as dirty */
Harshad Shirwadkarb60ca332020-04-26 18:34:38 -07003562 err = ext4_ext_dirty(handle, inode, path + depth);
Lukas Czernerbc2d9db2013-04-03 23:33:27 -04003563
3564 /* Update path to point to the right extent */
3565 path[depth].p_ext = abut_ex;
3566 goto out;
3567 } else
3568 allocated = ee_len - (map->m_lblk - ee_block);
Eric Gouriou6f91bc52011-10-27 11:43:23 -04003569
Yongqiang Yang667eff32011-05-03 12:25:07 -04003570 WARN_ON(map->m_lblk < ee_block);
Dmitry Monakhov21ca0872010-05-16 06:00:00 -04003571 /*
3572 * It is safe to convert extent to initialized via explicit
Yongqiang Yang9e740562014-01-06 14:05:23 -05003573 * zeroout only if extent is fully inside i_size or new_size.
Dmitry Monakhov21ca0872010-05-16 06:00:00 -04003574 */
Yongqiang Yang667eff32011-05-03 12:25:07 -04003575 split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0;
Dmitry Monakhov21ca0872010-05-16 06:00:00 -04003576
Zheng Liu67a5da52012-08-17 09:54:17 -04003577 if (EXT4_EXT_MAY_ZEROOUT & split_flag)
3578 max_zeroout = sbi->s_extent_max_zeroout_kb >>
Lukas Czerner4f42f802013-03-12 12:40:04 -04003579 (inode->i_sb->s_blocksize_bits - 10);
Zheng Liu67a5da52012-08-17 09:54:17 -04003580
Amit Arora56055d32007-07-17 21:42:38 -04003581 /*
Jan Kara4f8caa62017-05-26 17:40:52 -04003582 * five cases:
Yongqiang Yang667eff32011-05-03 12:25:07 -04003583 * 1. split the extent into three extents.
Jan Kara4f8caa62017-05-26 17:40:52 -04003584 * 2. split the extent into two extents, zeroout the head of the first
3585 * extent.
3586 * 3. split the extent into two extents, zeroout the tail of the second
3587 * extent.
Yongqiang Yang667eff32011-05-03 12:25:07 -04003588 * 4. split the extent into two extents with out zeroout.
Jan Kara4f8caa62017-05-26 17:40:52 -04003589 * 5. no splitting needed, just possibly zeroout the head and / or the
3590 * tail of the extent.
Amit Arora56055d32007-07-17 21:42:38 -04003591 */
Yongqiang Yang667eff32011-05-03 12:25:07 -04003592 split_map.m_lblk = map->m_lblk;
3593 split_map.m_len = map->m_len;
3594
Jan Kara4f8caa62017-05-26 17:40:52 -04003595 if (max_zeroout && (allocated > split_map.m_len)) {
Zheng Liu67a5da52012-08-17 09:54:17 -04003596 if (allocated <= max_zeroout) {
Jan Kara4f8caa62017-05-26 17:40:52 -04003597 /* case 3 or 5 */
3598 zero_ex1.ee_block =
3599 cpu_to_le32(split_map.m_lblk +
3600 split_map.m_len);
3601 zero_ex1.ee_len =
3602 cpu_to_le16(allocated - split_map.m_len);
3603 ext4_ext_store_pblock(&zero_ex1,
3604 ext4_ext_pblock(ex) + split_map.m_lblk +
3605 split_map.m_len - ee_block);
3606 err = ext4_ext_zeroout(inode, &zero_ex1);
Amit Arora56055d32007-07-17 21:42:38 -04003607 if (err)
Theodore Ts'o308c57c2021-08-13 11:20:48 -04003608 goto fallback;
Yongqiang Yang667eff32011-05-03 12:25:07 -04003609 split_map.m_len = allocated;
Jan Kara4f8caa62017-05-26 17:40:52 -04003610 }
3611 if (split_map.m_lblk - ee_block + split_map.m_len <
3612 max_zeroout) {
3613 /* case 2 or 5 */
3614 if (split_map.m_lblk != ee_block) {
3615 zero_ex2.ee_block = ex->ee_block;
3616 zero_ex2.ee_len = cpu_to_le16(split_map.m_lblk -
Yongqiang Yang667eff32011-05-03 12:25:07 -04003617 ee_block);
Jan Kara4f8caa62017-05-26 17:40:52 -04003618 ext4_ext_store_pblock(&zero_ex2,
Yongqiang Yang667eff32011-05-03 12:25:07 -04003619 ext4_ext_pblock(ex));
Jan Kara4f8caa62017-05-26 17:40:52 -04003620 err = ext4_ext_zeroout(inode, &zero_ex2);
Yongqiang Yang667eff32011-05-03 12:25:07 -04003621 if (err)
Theodore Ts'o308c57c2021-08-13 11:20:48 -04003622 goto fallback;
Yongqiang Yang667eff32011-05-03 12:25:07 -04003623 }
3624
Jan Kara4f8caa62017-05-26 17:40:52 -04003625 split_map.m_len += split_map.m_lblk - ee_block;
Yongqiang Yang667eff32011-05-03 12:25:07 -04003626 split_map.m_lblk = ee_block;
Allison Henderson9b940f82011-05-16 10:11:09 -04003627 allocated = map->m_len;
Amit Arora56055d32007-07-17 21:42:38 -04003628 }
3629 }
Yongqiang Yang667eff32011-05-03 12:25:07 -04003630
Theodore Ts'o308c57c2021-08-13 11:20:48 -04003631fallback:
Jan Karaae9e9c6a2014-10-30 10:53:17 -04003632 err = ext4_split_extent(handle, inode, ppath, &split_map, split_flag,
3633 flags);
3634 if (err > 0)
3635 err = 0;
Amit Arora56055d32007-07-17 21:42:38 -04003636out:
Zheng Liuadb23552013-03-10 21:13:05 -04003637 /* If we have gotten a failure, don't zero out status tree */
Jan Kara4f8caa62017-05-26 17:40:52 -04003638 if (!err) {
3639 err = ext4_zeroout_es(inode, &zero_ex1);
3640 if (!err)
3641 err = ext4_zeroout_es(inode, &zero_ex2);
3642 }
Amit Arora56055d32007-07-17 21:42:38 -04003643 return err ? err : allocated;
3644}
3645
Aneesh Kumar K.Vc278bfe2008-01-28 23:58:27 -05003646/*
Theodore Ts'oe35fd662010-05-16 19:00:00 -04003647 * This function is called by ext4_ext_map_blocks() from
Mingming Cao00314622009-09-28 15:49:08 -04003648 * ext4_get_blocks_dio_write() when DIO to write
Lukas Czerner556615d2014-04-20 23:45:47 -04003649 * to an unwritten extent.
Mingming Cao00314622009-09-28 15:49:08 -04003650 *
Lukas Czerner556615d2014-04-20 23:45:47 -04003651 * Writing to an unwritten extent may result in splitting the unwritten
3652 * extent into multiple initialized/unwritten extents (up to three)
Mingming Cao00314622009-09-28 15:49:08 -04003653 * There are three possibilities:
Lukas Czerner556615d2014-04-20 23:45:47 -04003654 * a> There is no split required: Entire extent should be unwritten
Mingming Cao00314622009-09-28 15:49:08 -04003655 * b> Splits in two extents: Write is happening at either end of the extent
3656 * c> Splits in three extents: Somone is writing in middle of the extent
3657 *
Lukas Czernerb8a86842014-03-18 18:05:35 -04003658 * This works the same way in the case of initialized -> unwritten conversion.
3659 *
Mingming Cao00314622009-09-28 15:49:08 -04003660 * One of more index blocks maybe needed if the extent tree grow after
Lukas Czerner556615d2014-04-20 23:45:47 -04003661 * the unwritten extent split. To prevent ENOSPC occur at the IO
3662 * complete, we need to split the unwritten extent before DIO submit
3663 * the IO. The unwritten extent called at this time will be split
3664 * into three unwritten extent(at most). After IO complete, the part
Mingming Cao00314622009-09-28 15:49:08 -04003665 * being filled will be convert to initialized by the end_io callback function
3666 * via ext4_convert_unwritten_extents().
Mingmingba230c32009-11-06 04:01:23 -05003667 *
Lukas Czerner556615d2014-04-20 23:45:47 -04003668 * Returns the size of unwritten extent to be written on success.
Mingming Cao00314622009-09-28 15:49:08 -04003669 */
Lukas Czernerb8a86842014-03-18 18:05:35 -04003670static int ext4_split_convert_extents(handle_t *handle,
Mingming Cao00314622009-09-28 15:49:08 -04003671 struct inode *inode,
Theodore Ts'oe35fd662010-05-16 19:00:00 -04003672 struct ext4_map_blocks *map,
Theodore Ts'odfe50802014-09-01 14:37:09 -04003673 struct ext4_ext_path **ppath,
Mingming Cao00314622009-09-28 15:49:08 -04003674 int flags)
3675{
Theodore Ts'odfe50802014-09-01 14:37:09 -04003676 struct ext4_ext_path *path = *ppath;
Yongqiang Yang667eff32011-05-03 12:25:07 -04003677 ext4_lblk_t eof_block;
3678 ext4_lblk_t ee_block;
3679 struct ext4_extent *ex;
3680 unsigned int ee_len;
3681 int split_flag = 0, depth;
Mingming Cao00314622009-09-28 15:49:08 -04003682
Ritesh Harjani70aa1552020-05-10 11:54:55 +05303683 ext_debug(inode, "logical block %llu, max_blocks %u\n",
Lukas Czernerb8a86842014-03-18 18:05:35 -04003684 (unsigned long long)map->m_lblk, map->m_len);
Dmitry Monakhov21ca0872010-05-16 06:00:00 -04003685
Jan Kara801674f2020-03-31 12:50:16 +02003686 eof_block = (EXT4_I(inode)->i_disksize + inode->i_sb->s_blocksize - 1)
3687 >> inode->i_sb->s_blocksize_bits;
Theodore Ts'oe35fd662010-05-16 19:00:00 -04003688 if (eof_block < map->m_lblk + map->m_len)
3689 eof_block = map->m_lblk + map->m_len;
Mingming Cao00314622009-09-28 15:49:08 -04003690 /*
Dmitry Monakhov21ca0872010-05-16 06:00:00 -04003691 * It is safe to convert extent to initialized via explicit
Keyur Patele4d7f2d2020-06-10 23:19:46 -04003692 * zeroout only if extent is fully inside i_size or new_size.
Dmitry Monakhov21ca0872010-05-16 06:00:00 -04003693 */
Yongqiang Yang667eff32011-05-03 12:25:07 -04003694 depth = ext_depth(inode);
3695 ex = path[depth].p_ext;
3696 ee_block = le32_to_cpu(ex->ee_block);
3697 ee_len = ext4_ext_get_actual_len(ex);
Dmitry Monakhov21ca0872010-05-16 06:00:00 -04003698
Lukas Czernerb8a86842014-03-18 18:05:35 -04003699 /* Convert to unwritten */
3700 if (flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN) {
3701 split_flag |= EXT4_EXT_DATA_VALID1;
3702 /* Convert to initialized */
3703 } else if (flags & EXT4_GET_BLOCKS_CONVERT) {
3704 split_flag |= ee_block + ee_len <= eof_block ?
3705 EXT4_EXT_MAY_ZEROOUT : 0;
Lukas Czerner556615d2014-04-20 23:45:47 -04003706 split_flag |= (EXT4_EXT_MARK_UNWRIT2 | EXT4_EXT_DATA_VALID2);
Lukas Czernerb8a86842014-03-18 18:05:35 -04003707 }
Yongqiang Yang667eff32011-05-03 12:25:07 -04003708 flags |= EXT4_GET_BLOCKS_PRE_IO;
Theodore Ts'odfe50802014-09-01 14:37:09 -04003709 return ext4_split_extent(handle, inode, ppath, map, split_flag, flags);
Mingming Cao00314622009-09-28 15:49:08 -04003710}
Yongqiang Yang197217a2011-05-03 11:45:29 -04003711
Jiaying Zhangc7064ef2010-03-02 13:28:44 -05003712static int ext4_convert_unwritten_extents_endio(handle_t *handle,
Dmitry Monakhovdee1f972012-10-10 01:04:58 -04003713 struct inode *inode,
3714 struct ext4_map_blocks *map,
Theodore Ts'odfe50802014-09-01 14:37:09 -04003715 struct ext4_ext_path **ppath)
Mingming Cao00314622009-09-28 15:49:08 -04003716{
Theodore Ts'odfe50802014-09-01 14:37:09 -04003717 struct ext4_ext_path *path = *ppath;
Mingming Cao00314622009-09-28 15:49:08 -04003718 struct ext4_extent *ex;
Dmitry Monakhovdee1f972012-10-10 01:04:58 -04003719 ext4_lblk_t ee_block;
3720 unsigned int ee_len;
Mingming Cao00314622009-09-28 15:49:08 -04003721 int depth;
3722 int err = 0;
Mingming Cao00314622009-09-28 15:49:08 -04003723
3724 depth = ext_depth(inode);
Mingming Cao00314622009-09-28 15:49:08 -04003725 ex = path[depth].p_ext;
Dmitry Monakhovdee1f972012-10-10 01:04:58 -04003726 ee_block = le32_to_cpu(ex->ee_block);
3727 ee_len = ext4_ext_get_actual_len(ex);
Mingming Cao00314622009-09-28 15:49:08 -04003728
Ritesh Harjani70aa1552020-05-10 11:54:55 +05303729 ext_debug(inode, "logical block %llu, max_blocks %u\n",
Dmitry Monakhovdee1f972012-10-10 01:04:58 -04003730 (unsigned long long)ee_block, ee_len);
3731
Dmitry Monakhovff95ec22013-03-04 00:41:05 -05003732 /* If extent is larger than requested it is a clear sign that we still
3733 * have some extent state machine issues left. So extent_split is still
3734 * required.
3735 * TODO: Once all related issues will be fixed this situation should be
3736 * illegal.
3737 */
Dmitry Monakhovdee1f972012-10-10 01:04:58 -04003738 if (ee_block != map->m_lblk || ee_len > map->m_len) {
Rakesh Pandite3d550c2019-08-22 22:53:46 -04003739#ifdef CONFIG_EXT4_DEBUG
3740 ext4_warning(inode->i_sb, "Inode (%ld) finished: extent logical block %llu,"
Jakub Wilk8d2ae1c2016-04-27 01:11:21 -04003741 " len %u; IO logical block %llu, len %u",
Dmitry Monakhovff95ec22013-03-04 00:41:05 -05003742 inode->i_ino, (unsigned long long)ee_block, ee_len,
3743 (unsigned long long)map->m_lblk, map->m_len);
3744#endif
Theodore Ts'odfe50802014-09-01 14:37:09 -04003745 err = ext4_split_convert_extents(handle, inode, map, ppath,
Lukas Czernerb8a86842014-03-18 18:05:35 -04003746 EXT4_GET_BLOCKS_CONVERT);
Dmitry Monakhovdee1f972012-10-10 01:04:58 -04003747 if (err < 0)
Theodore Ts'odfe50802014-09-01 14:37:09 -04003748 return err;
Theodore Ts'oed8a1a72014-09-01 14:43:09 -04003749 path = ext4_find_extent(inode, map->m_lblk, ppath, 0);
Theodore Ts'odfe50802014-09-01 14:37:09 -04003750 if (IS_ERR(path))
3751 return PTR_ERR(path);
Dmitry Monakhovdee1f972012-10-10 01:04:58 -04003752 depth = ext_depth(inode);
3753 ex = path[depth].p_ext;
3754 }
Yongqiang Yang197217a2011-05-03 11:45:29 -04003755
Mingming Cao00314622009-09-28 15:49:08 -04003756 err = ext4_ext_get_access(handle, inode, path + depth);
3757 if (err)
3758 goto out;
3759 /* first mark the extent as initialized */
3760 ext4_ext_mark_initialized(ex);
3761
Yongqiang Yang197217a2011-05-03 11:45:29 -04003762 /* note: ext4_ext_correct_indexes() isn't needed here because
3763 * borders are not changed
Mingming Cao00314622009-09-28 15:49:08 -04003764 */
Theodore Ts'oecb94f52012-08-17 09:44:17 -04003765 ext4_ext_try_to_merge(handle, inode, path, ex);
Yongqiang Yang197217a2011-05-03 11:45:29 -04003766
Mingming Cao00314622009-09-28 15:49:08 -04003767 /* Mark modified extent as dirty */
Theodore Ts'oecb94f52012-08-17 09:44:17 -04003768 err = ext4_ext_dirty(handle, inode, path + path->p_depth);
Mingming Cao00314622009-09-28 15:49:08 -04003769out:
3770 ext4_ext_show_leaf(inode, path);
3771 return err;
3772}
3773
3774static int
Theodore Ts'oe8b83d932014-09-01 14:35:09 -04003775convert_initialized_extent(handle_t *handle, struct inode *inode,
3776 struct ext4_map_blocks *map,
Eric Whitney29c6eaf2016-02-22 22:58:55 -05003777 struct ext4_ext_path **ppath,
Eric Whitneyf064a9d2020-02-18 15:26:56 -05003778 unsigned int *allocated)
Lukas Czernerb8a86842014-03-18 18:05:35 -04003779{
Theodore Ts'o4f224b82014-09-01 14:36:09 -04003780 struct ext4_ext_path *path = *ppath;
Theodore Ts'oe8b83d932014-09-01 14:35:09 -04003781 struct ext4_extent *ex;
3782 ext4_lblk_t ee_block;
3783 unsigned int ee_len;
3784 int depth;
Lukas Czernerb8a86842014-03-18 18:05:35 -04003785 int err = 0;
3786
3787 /*
3788 * Make sure that the extent is no bigger than we support with
Lukas Czerner556615d2014-04-20 23:45:47 -04003789 * unwritten extent
Lukas Czernerb8a86842014-03-18 18:05:35 -04003790 */
Lukas Czerner556615d2014-04-20 23:45:47 -04003791 if (map->m_len > EXT_UNWRITTEN_MAX_LEN)
3792 map->m_len = EXT_UNWRITTEN_MAX_LEN / 2;
Lukas Czernerb8a86842014-03-18 18:05:35 -04003793
Theodore Ts'oe8b83d932014-09-01 14:35:09 -04003794 depth = ext_depth(inode);
3795 ex = path[depth].p_ext;
3796 ee_block = le32_to_cpu(ex->ee_block);
3797 ee_len = ext4_ext_get_actual_len(ex);
3798
Ritesh Harjani70aa1552020-05-10 11:54:55 +05303799 ext_debug(inode, "logical block %llu, max_blocks %u\n",
Theodore Ts'oe8b83d932014-09-01 14:35:09 -04003800 (unsigned long long)ee_block, ee_len);
3801
3802 if (ee_block != map->m_lblk || ee_len > map->m_len) {
Theodore Ts'odfe50802014-09-01 14:37:09 -04003803 err = ext4_split_convert_extents(handle, inode, map, ppath,
Theodore Ts'oe8b83d932014-09-01 14:35:09 -04003804 EXT4_GET_BLOCKS_CONVERT_UNWRITTEN);
3805 if (err < 0)
3806 return err;
Theodore Ts'oed8a1a72014-09-01 14:43:09 -04003807 path = ext4_find_extent(inode, map->m_lblk, ppath, 0);
Theodore Ts'oe8b83d932014-09-01 14:35:09 -04003808 if (IS_ERR(path))
3809 return PTR_ERR(path);
3810 depth = ext_depth(inode);
3811 ex = path[depth].p_ext;
3812 if (!ex) {
3813 EXT4_ERROR_INODE(inode, "unexpected hole at %lu",
3814 (unsigned long) map->m_lblk);
Darrick J. Wong6a797d22015-10-17 16:16:04 -04003815 return -EFSCORRUPTED;
Theodore Ts'oe8b83d932014-09-01 14:35:09 -04003816 }
3817 }
3818
3819 err = ext4_ext_get_access(handle, inode, path + depth);
3820 if (err)
3821 return err;
3822 /* first mark the extent as unwritten */
3823 ext4_ext_mark_unwritten(ex);
3824
3825 /* note: ext4_ext_correct_indexes() isn't needed here because
3826 * borders are not changed
3827 */
3828 ext4_ext_try_to_merge(handle, inode, path, ex);
3829
3830 /* Mark modified extent as dirty */
3831 err = ext4_ext_dirty(handle, inode, path + path->p_depth);
3832 if (err)
3833 return err;
3834 ext4_ext_show_leaf(inode, path);
3835
3836 ext4_update_inode_fsync_trans(handle, inode, 1);
Eric Whitney4337ecd2020-02-11 16:02:16 -05003837
Lukas Czernerb8a86842014-03-18 18:05:35 -04003838 map->m_flags |= EXT4_MAP_UNWRITTEN;
Eric Whitneyf064a9d2020-02-18 15:26:56 -05003839 if (*allocated > map->m_len)
3840 *allocated = map->m_len;
3841 map->m_len = *allocated;
3842 return 0;
Lukas Czernerb8a86842014-03-18 18:05:35 -04003843}
3844
3845static int
Lukas Czerner556615d2014-04-20 23:45:47 -04003846ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode,
Theodore Ts'oe35fd662010-05-16 19:00:00 -04003847 struct ext4_map_blocks *map,
Theodore Ts'odfe50802014-09-01 14:37:09 -04003848 struct ext4_ext_path **ppath, int flags,
Theodore Ts'oe35fd662010-05-16 19:00:00 -04003849 unsigned int allocated, ext4_fsblk_t newblock)
Mingming Cao00314622009-09-28 15:49:08 -04003850{
Ritesh Harjani8ec2d312020-05-10 11:54:53 +05303851 struct ext4_ext_path __maybe_unused *path = *ppath;
Mingming Cao00314622009-09-28 15:49:08 -04003852 int ret = 0;
3853 int err = 0;
3854
Ritesh Harjani70aa1552020-05-10 11:54:55 +05303855 ext_debug(inode, "logical block %llu, max_blocks %u, flags 0x%x, allocated %u\n",
3856 (unsigned long long)map->m_lblk, map->m_len, flags,
3857 allocated);
Mingming Cao00314622009-09-28 15:49:08 -04003858 ext4_ext_show_leaf(inode, path);
3859
Lukas Czerner27dd4382013-04-09 22:11:22 -04003860 /*
Lukas Czerner556615d2014-04-20 23:45:47 -04003861 * When writing into unwritten space, we should not fail to
Lukas Czerner27dd4382013-04-09 22:11:22 -04003862 * allocate metadata blocks for the new extent block if needed.
3863 */
3864 flags |= EXT4_GET_BLOCKS_METADATA_NOFAIL;
3865
Lukas Czerner556615d2014-04-20 23:45:47 -04003866 trace_ext4_ext_handle_unwritten_extents(inode, map, flags,
Zheng Liub5645532012-11-08 14:33:43 -05003867 allocated, newblock);
Aditya Kalid8990242011-09-09 19:18:51 -04003868
Eric Whitney779e2652020-04-30 14:53:19 -04003869 /* get_block() before submitting IO, split the extent */
Lukas Czernerc8b459f2014-05-12 12:55:07 -04003870 if (flags & EXT4_GET_BLOCKS_PRE_IO) {
Theodore Ts'odfe50802014-09-01 14:37:09 -04003871 ret = ext4_split_convert_extents(handle, inode, map, ppath,
3872 flags | EXT4_GET_BLOCKS_CONVERT);
Eric Whitney779e2652020-04-30 14:53:19 -04003873 if (ret < 0) {
3874 err = ret;
3875 goto out2;
3876 }
3877 /*
3878 * shouldn't get a 0 return when splitting an extent unless
3879 * m_len is 0 (bug) or extent has been corrupted
3880 */
3881 if (unlikely(ret == 0)) {
3882 EXT4_ERROR_INODE(inode,
3883 "unexpected ret == 0, m_len = %u",
3884 map->m_len);
3885 err = -EFSCORRUPTED;
3886 goto out2;
3887 }
Zheng Liua25a4e12013-02-18 00:28:04 -05003888 map->m_flags |= EXT4_MAP_UNWRITTEN;
Mingming Cao00314622009-09-28 15:49:08 -04003889 goto out;
3890 }
Jiaying Zhangc7064ef2010-03-02 13:28:44 -05003891 /* IO end_io complete, convert the filled extent to written */
Lukas Czernerc8b459f2014-05-12 12:55:07 -04003892 if (flags & EXT4_GET_BLOCKS_CONVERT) {
Eric Whitneybee6cf02020-04-30 14:53:18 -04003893 err = ext4_convert_unwritten_extents_endio(handle, inode, map,
Theodore Ts'odfe50802014-09-01 14:37:09 -04003894 ppath);
Eric Whitneybee6cf02020-04-30 14:53:18 -04003895 if (err < 0)
3896 goto out2;
3897 ext4_update_inode_fsync_trans(handle, inode, 1);
3898 goto map_out;
Mingming Cao00314622009-09-28 15:49:08 -04003899 }
Eric Whitneybee6cf02020-04-30 14:53:18 -04003900 /* buffered IO cases */
Mingming Cao00314622009-09-28 15:49:08 -04003901 /*
3902 * repeat fallocate creation request
3903 * we already have an unwritten extent
3904 */
Lukas Czerner556615d2014-04-20 23:45:47 -04003905 if (flags & EXT4_GET_BLOCKS_UNWRIT_EXT) {
Zheng Liua25a4e12013-02-18 00:28:04 -05003906 map->m_flags |= EXT4_MAP_UNWRITTEN;
Mingming Cao00314622009-09-28 15:49:08 -04003907 goto map_out;
Zheng Liua25a4e12013-02-18 00:28:04 -05003908 }
Mingming Cao00314622009-09-28 15:49:08 -04003909
3910 /* buffered READ or buffered write_begin() lookup */
3911 if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
3912 /*
3913 * We have blocks reserved already. We
3914 * return allocated blocks so that delalloc
3915 * won't do block reservation for us. But
3916 * the buffer head will be unmapped so that
3917 * a read from the block returns 0s.
3918 */
Theodore Ts'oe35fd662010-05-16 19:00:00 -04003919 map->m_flags |= EXT4_MAP_UNWRITTEN;
Mingming Cao00314622009-09-28 15:49:08 -04003920 goto out1;
3921 }
3922
Eric Whitneybe809e12020-04-30 14:53:20 -04003923 /*
3924 * Default case when (flags & EXT4_GET_BLOCKS_CREATE) == 1.
3925 * For buffered writes, at writepage time, etc. Convert a
3926 * discovered unwritten extent to written.
3927 */
Theodore Ts'odfe50802014-09-01 14:37:09 -04003928 ret = ext4_ext_convert_to_initialized(handle, inode, map, ppath, flags);
Eric Whitneybe809e12020-04-30 14:53:20 -04003929 if (ret < 0) {
Mingming Cao00314622009-09-28 15:49:08 -04003930 err = ret;
3931 goto out2;
Eric Whitney779e2652020-04-30 14:53:19 -04003932 }
Eric Whitneybe809e12020-04-30 14:53:20 -04003933 ext4_update_inode_fsync_trans(handle, inode, 1);
3934 /*
3935 * shouldn't get a 0 return when converting an unwritten extent
3936 * unless m_len is 0 (bug) or extent has been corrupted
3937 */
3938 if (unlikely(ret == 0)) {
3939 EXT4_ERROR_INODE(inode, "unexpected ret == 0, m_len = %u",
3940 map->m_len);
3941 err = -EFSCORRUPTED;
3942 goto out2;
3943 }
3944
Eric Whitney779e2652020-04-30 14:53:19 -04003945out:
3946 allocated = ret;
Theodore Ts'oe35fd662010-05-16 19:00:00 -04003947 map->m_flags |= EXT4_MAP_NEW;
Mingming Cao00314622009-09-28 15:49:08 -04003948map_out:
Theodore Ts'oe35fd662010-05-16 19:00:00 -04003949 map->m_flags |= EXT4_MAP_MAPPED;
Mingming Cao00314622009-09-28 15:49:08 -04003950out1:
Eric Whitneybee6cf02020-04-30 14:53:18 -04003951 map->m_pblk = newblock;
Theodore Ts'oe35fd662010-05-16 19:00:00 -04003952 if (allocated > map->m_len)
3953 allocated = map->m_len;
Theodore Ts'oe35fd662010-05-16 19:00:00 -04003954 map->m_len = allocated;
Eric Whitneybee6cf02020-04-30 14:53:18 -04003955 ext4_ext_show_leaf(inode, path);
Mingming Cao00314622009-09-28 15:49:08 -04003956out2:
Mingming Cao00314622009-09-28 15:49:08 -04003957 return err ? err : allocated;
3958}
Theodore Ts'o58590b02010-10-27 21:23:12 -04003959
Mingming Cao00314622009-09-28 15:49:08 -04003960/*
Theodore Ts'o4d33b1e2011-09-09 18:52:51 -04003961 * get_implied_cluster_alloc - check to see if the requested
3962 * allocation (in the map structure) overlaps with a cluster already
3963 * allocated in an extent.
Aditya Kalid8990242011-09-09 19:18:51 -04003964 * @sb The filesystem superblock structure
Theodore Ts'o4d33b1e2011-09-09 18:52:51 -04003965 * @map The requested lblk->pblk mapping
3966 * @ex The extent structure which might contain an implied
3967 * cluster allocation
3968 *
3969 * This function is called by ext4_ext_map_blocks() after we failed to
3970 * find blocks that were already in the inode's extent tree. Hence,
3971 * we know that the beginning of the requested region cannot overlap
3972 * the extent from the inode's extent tree. There are three cases we
3973 * want to catch. The first is this case:
3974 *
3975 * |--- cluster # N--|
3976 * |--- extent ---| |---- requested region ---|
3977 * |==========|
3978 *
3979 * The second case that we need to test for is this one:
3980 *
3981 * |--------- cluster # N ----------------|
3982 * |--- requested region --| |------- extent ----|
3983 * |=======================|
3984 *
3985 * The third case is when the requested region lies between two extents
3986 * within the same cluster:
3987 * |------------- cluster # N-------------|
3988 * |----- ex -----| |---- ex_right ----|
3989 * |------ requested region ------|
3990 * |================|
3991 *
3992 * In each of the above cases, we need to set the map->m_pblk and
3993 * map->m_len so it corresponds to the return the extent labelled as
3994 * "|====|" from cluster #N, since it is already in use for data in
3995 * cluster EXT4_B2C(sbi, map->m_lblk). We will then return 1 to
3996 * signal to ext4_ext_map_blocks() that map->m_pblk should be treated
3997 * as a new "allocated" block region. Otherwise, we will return 0 and
3998 * ext4_ext_map_blocks() will then allocate one or more new clusters
3999 * by calling ext4_mb_new_blocks().
4000 */
Aditya Kalid8990242011-09-09 19:18:51 -04004001static int get_implied_cluster_alloc(struct super_block *sb,
Theodore Ts'o4d33b1e2011-09-09 18:52:51 -04004002 struct ext4_map_blocks *map,
4003 struct ext4_extent *ex,
4004 struct ext4_ext_path *path)
4005{
Aditya Kalid8990242011-09-09 19:18:51 -04004006 struct ext4_sb_info *sbi = EXT4_SB(sb);
Theodore Ts'of5a44db2013-12-20 09:29:35 -05004007 ext4_lblk_t c_offset = EXT4_LBLK_COFF(sbi, map->m_lblk);
Theodore Ts'o4d33b1e2011-09-09 18:52:51 -04004008 ext4_lblk_t ex_cluster_start, ex_cluster_end;
Curt Wohlgemuth14d7f3e2011-12-18 17:39:02 -05004009 ext4_lblk_t rr_cluster_start;
Theodore Ts'o4d33b1e2011-09-09 18:52:51 -04004010 ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block);
4011 ext4_fsblk_t ee_start = ext4_ext_pblock(ex);
4012 unsigned short ee_len = ext4_ext_get_actual_len(ex);
4013
4014 /* The extent passed in that we are trying to match */
4015 ex_cluster_start = EXT4_B2C(sbi, ee_block);
4016 ex_cluster_end = EXT4_B2C(sbi, ee_block + ee_len - 1);
4017
4018 /* The requested region passed into ext4_map_blocks() */
4019 rr_cluster_start = EXT4_B2C(sbi, map->m_lblk);
Theodore Ts'o4d33b1e2011-09-09 18:52:51 -04004020
4021 if ((rr_cluster_start == ex_cluster_end) ||
4022 (rr_cluster_start == ex_cluster_start)) {
4023 if (rr_cluster_start == ex_cluster_end)
4024 ee_start += ee_len - 1;
Theodore Ts'of5a44db2013-12-20 09:29:35 -05004025 map->m_pblk = EXT4_PBLK_CMASK(sbi, ee_start) + c_offset;
Theodore Ts'o4d33b1e2011-09-09 18:52:51 -04004026 map->m_len = min(map->m_len,
4027 (unsigned) sbi->s_cluster_ratio - c_offset);
4028 /*
4029 * Check for and handle this case:
4030 *
4031 * |--------- cluster # N-------------|
4032 * |------- extent ----|
4033 * |--- requested region ---|
4034 * |===========|
4035 */
4036
4037 if (map->m_lblk < ee_block)
4038 map->m_len = min(map->m_len, ee_block - map->m_lblk);
4039
4040 /*
4041 * Check for the case where there is already another allocated
4042 * block to the right of 'ex' but before the end of the cluster.
4043 *
4044 * |------------- cluster # N-------------|
4045 * |----- ex -----| |---- ex_right ----|
4046 * |------ requested region ------|
4047 * |================|
4048 */
4049 if (map->m_lblk > ee_block) {
4050 ext4_lblk_t next = ext4_ext_next_allocated_block(path);
4051 map->m_len = min(map->m_len, next - map->m_lblk);
4052 }
Aditya Kalid8990242011-09-09 19:18:51 -04004053
4054 trace_ext4_get_implied_cluster_alloc_exit(sb, map, 1);
Theodore Ts'o4d33b1e2011-09-09 18:52:51 -04004055 return 1;
4056 }
Aditya Kalid8990242011-09-09 19:18:51 -04004057
4058 trace_ext4_get_implied_cluster_alloc_exit(sb, map, 0);
Theodore Ts'o4d33b1e2011-09-09 18:52:51 -04004059 return 0;
4060}
4061
4062
4063/*
Mingming Caof5ab0d12008-02-25 15:29:55 -05004064 * Block allocation/map/preallocation routine for extents based files
4065 *
4066 *
Aneesh Kumar K.Vc278bfe2008-01-28 23:58:27 -05004067 * Need to be called with
Aneesh Kumar K.V0e855ac2008-01-28 23:58:26 -05004068 * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system block
4069 * (ie, create is zero). Otherwise down_write(&EXT4_I(inode)->i_data_sem)
Mingming Caof5ab0d12008-02-25 15:29:55 -05004070 *
Randy Dunlapb483bb72020-08-04 19:48:50 -07004071 * return > 0, number of blocks already mapped/allocated
Mingming Caof5ab0d12008-02-25 15:29:55 -05004072 * if create == 0 and these are pre-allocated blocks
4073 * buffer head is unmapped
4074 * otherwise blocks are mapped
4075 *
4076 * return = 0, if plain look up failed (blocks have not been allocated)
4077 * buffer head is unmapped
4078 *
4079 * return < 0, error case.
Aneesh Kumar K.Vc278bfe2008-01-28 23:58:27 -05004080 */
Theodore Ts'oe35fd662010-05-16 19:00:00 -04004081int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
4082 struct ext4_map_blocks *map, int flags)
Alex Tomasa86c6182006-10-11 01:21:03 -07004083{
4084 struct ext4_ext_path *path = NULL;
yangerkund7dce9e2020-10-28 13:56:17 +08004085 struct ext4_extent newex, *ex, ex2;
Theodore Ts'o4d33b1e2011-09-09 18:52:51 -04004086 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
Eric Whitney8ad8d712020-05-10 11:58:05 -04004087 ext4_fsblk_t newblock = 0, pblk;
Eric Whitney34990462020-03-11 16:50:33 -04004088 int err = 0, depth, ret;
Theodore Ts'o4d33b1e2011-09-09 18:52:51 -04004089 unsigned int allocated = 0, offset = 0;
Yongqiang Yang81fdbb42011-10-29 09:23:38 -04004090 unsigned int allocated_clusters = 0;
Alex Tomasc9de5602008-01-29 00:19:52 -05004091 struct ext4_allocation_request ar;
Theodore Ts'o4d33b1e2011-09-09 18:52:51 -04004092 ext4_lblk_t cluster_offset;
Alex Tomasa86c6182006-10-11 01:21:03 -07004093
Ritesh Harjani70aa1552020-05-10 11:54:55 +05304094 ext_debug(inode, "blocks %u/%u requested\n", map->m_lblk, map->m_len);
Jiaying Zhang0562e0b2011-03-21 21:38:05 -04004095 trace_ext4_ext_map_blocks_enter(inode, map->m_lblk, map->m_len, flags);
Alex Tomasa86c6182006-10-11 01:21:03 -07004096
Alex Tomasa86c6182006-10-11 01:21:03 -07004097 /* find extent for this block */
Theodore Ts'oed8a1a72014-09-01 14:43:09 -04004098 path = ext4_find_extent(inode, map->m_lblk, NULL, 0);
Alex Tomasa86c6182006-10-11 01:21:03 -07004099 if (IS_ERR(path)) {
4100 err = PTR_ERR(path);
4101 path = NULL;
Eric Whitney8ad8d712020-05-10 11:58:05 -04004102 goto out;
Alex Tomasa86c6182006-10-11 01:21:03 -07004103 }
4104
4105 depth = ext_depth(inode);
4106
4107 /*
Randy Dunlapd0d856e2006-10-11 01:21:07 -07004108 * consistent leaf must not be empty;
4109 * this situation is possible, though, _during_ tree modification;
Theodore Ts'oed8a1a72014-09-01 14:43:09 -04004110 * this is why assert can't be put in ext4_find_extent()
Alex Tomasa86c6182006-10-11 01:21:03 -07004111 */
Frank Mayhar273df552010-03-02 11:46:09 -05004112 if (unlikely(path[depth].p_ext == NULL && depth != 0)) {
4113 EXT4_ERROR_INODE(inode, "bad extent address "
Theodore Ts'of70f3622010-05-16 23:00:00 -04004114 "lblock: %lu, depth: %d pblock %lld",
4115 (unsigned long) map->m_lblk, depth,
4116 path[depth].p_block);
Darrick J. Wong6a797d22015-10-17 16:16:04 -04004117 err = -EFSCORRUPTED;
Eric Whitney8ad8d712020-05-10 11:58:05 -04004118 goto out;
Surbhi Palande034fb4c2009-12-14 09:53:52 -05004119 }
Alex Tomasa86c6182006-10-11 01:21:03 -07004120
Avantika Mathur7e028972006-12-06 20:41:33 -08004121 ex = path[depth].p_ext;
4122 if (ex) {
Aneesh Kumar K.V725d26d2008-01-28 23:58:27 -05004123 ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block);
Theodore Ts'obf89d162010-10-27 21:30:14 -04004124 ext4_fsblk_t ee_start = ext4_ext_pblock(ex);
Amit Aroraa2df2a62007-07-17 21:42:41 -04004125 unsigned short ee_len;
Suparna Bhattacharya471d4012006-10-11 01:21:06 -07004126
Lukas Czernerb8a86842014-03-18 18:05:35 -04004127
Suparna Bhattacharya471d4012006-10-11 01:21:06 -07004128 /*
Lukas Czerner556615d2014-04-20 23:45:47 -04004129 * unwritten extents are treated as holes, except that
Amit Arora56055d32007-07-17 21:42:38 -04004130 * we split out initialized portions during a write.
Suparna Bhattacharya471d4012006-10-11 01:21:06 -07004131 */
Amit Aroraa2df2a62007-07-17 21:42:41 -04004132 ee_len = ext4_ext_get_actual_len(ex);
Aditya Kalid8990242011-09-09 19:18:51 -04004133
4134 trace_ext4_ext_show_extent(inode, ee_block, ee_start, ee_len);
4135
Randy Dunlapd0d856e2006-10-11 01:21:07 -07004136 /* if found extent covers block, simply return it */
Theodore Ts'oe35fd662010-05-16 19:00:00 -04004137 if (in_range(map->m_lblk, ee_block, ee_len)) {
4138 newblock = map->m_lblk - ee_block + ee_start;
Randy Dunlapd0d856e2006-10-11 01:21:07 -07004139 /* number of remaining blocks in the extent */
Theodore Ts'oe35fd662010-05-16 19:00:00 -04004140 allocated = ee_len - (map->m_lblk - ee_block);
Ritesh Harjani70aa1552020-05-10 11:54:55 +05304141 ext_debug(inode, "%u fit into %u:%d -> %llu\n",
4142 map->m_lblk, ee_block, ee_len, newblock);
Amit Arora56055d32007-07-17 21:42:38 -04004143
Lukas Czernerb8a86842014-03-18 18:05:35 -04004144 /*
4145 * If the extent is initialized check whether the
4146 * caller wants to convert it to unwritten.
4147 */
Lukas Czerner556615d2014-04-20 23:45:47 -04004148 if ((!ext4_ext_is_unwritten(ex)) &&
Lukas Czernerb8a86842014-03-18 18:05:35 -04004149 (flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN)) {
Eric Whitneyf064a9d2020-02-18 15:26:56 -05004150 err = convert_initialized_extent(handle,
4151 inode, map, &path, &allocated);
Eric Whitney8ad8d712020-05-10 11:58:05 -04004152 goto out;
Eric Whitneyf064a9d2020-02-18 15:26:56 -05004153 } else if (!ext4_ext_is_unwritten(ex)) {
Eric Whitney8ad8d712020-05-10 11:58:05 -04004154 map->m_flags |= EXT4_MAP_MAPPED;
4155 map->m_pblk = newblock;
4156 if (allocated > map->m_len)
4157 allocated = map->m_len;
4158 map->m_len = allocated;
4159 ext4_ext_show_leaf(inode, path);
Lukas Czerner78771912012-03-19 23:05:43 -04004160 goto out;
Eric Whitneyf064a9d2020-02-18 15:26:56 -05004161 }
Zheng Liu69eb33d2013-02-18 00:31:07 -05004162
Lukas Czerner556615d2014-04-20 23:45:47 -04004163 ret = ext4_ext_handle_unwritten_extents(
Theodore Ts'odfe50802014-09-01 14:37:09 -04004164 handle, inode, map, &path, flags,
Lukas Czerner78771912012-03-19 23:05:43 -04004165 allocated, newblock);
Eric Whitneyce37c422014-02-19 18:52:39 -05004166 if (ret < 0)
4167 err = ret;
4168 else
4169 allocated = ret;
Eric Whitney8ad8d712020-05-10 11:58:05 -04004170 goto out;
Alex Tomasa86c6182006-10-11 01:21:03 -07004171 }
4172 }
4173
4174 /*
Randy Dunlapd0d856e2006-10-11 01:21:07 -07004175 * requested block isn't allocated yet;
Alex Tomasa86c6182006-10-11 01:21:03 -07004176 * we couldn't try to create block if create flag is zero
4177 */
Theodore Ts'oc2177052009-05-14 00:58:52 -04004178 if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
Jan Kara140a5252016-03-09 22:46:57 -05004179 ext4_lblk_t hole_start, hole_len;
4180
Jan Karafacab4d2016-03-09 22:54:00 -05004181 hole_start = map->m_lblk;
4182 hole_len = ext4_ext_determine_hole(inode, path, &hole_start);
Amit Arora56055d32007-07-17 21:42:38 -04004183 /*
4184 * put just found gap into cache to speed up
4185 * subsequent requests
4186 */
Jan Kara140a5252016-03-09 22:46:57 -05004187 ext4_ext_put_gap_in_cache(inode, hole_start, hole_len);
Jan Karafacab4d2016-03-09 22:54:00 -05004188
4189 /* Update hole_len to reflect hole size after map->m_lblk */
4190 if (hole_start != map->m_lblk)
4191 hole_len -= map->m_lblk - hole_start;
4192 map->m_pblk = 0;
4193 map->m_len = min_t(unsigned int, map->m_len, hole_len);
4194
Eric Whitney8ad8d712020-05-10 11:58:05 -04004195 goto out;
Alex Tomasa86c6182006-10-11 01:21:03 -07004196 }
Theodore Ts'o4d33b1e2011-09-09 18:52:51 -04004197
Alex Tomasa86c6182006-10-11 01:21:03 -07004198 /*
Theodore Ts'oc2ea3fd2008-10-10 09:40:52 -04004199 * Okay, we need to do block allocation.
Andrew Morton63f57932006-10-11 01:21:24 -07004200 */
Theodore Ts'o4d33b1e2011-09-09 18:52:51 -04004201 newex.ee_block = cpu_to_le32(map->m_lblk);
Eric Whitneyd0abafa2014-01-06 14:00:23 -05004202 cluster_offset = EXT4_LBLK_COFF(sbi, map->m_lblk);
Theodore Ts'o4d33b1e2011-09-09 18:52:51 -04004203
4204 /*
4205 * If we are doing bigalloc, check to see if the extent returned
Theodore Ts'oed8a1a72014-09-01 14:43:09 -04004206 * by ext4_find_extent() implies a cluster we can use.
Theodore Ts'o4d33b1e2011-09-09 18:52:51 -04004207 */
4208 if (cluster_offset && ex &&
Aditya Kalid8990242011-09-09 19:18:51 -04004209 get_implied_cluster_alloc(inode->i_sb, map, ex, path)) {
Theodore Ts'o4d33b1e2011-09-09 18:52:51 -04004210 ar.len = allocated = map->m_len;
4211 newblock = map->m_pblk;
4212 goto got_allocated_blocks;
4213 }
Alex Tomasa86c6182006-10-11 01:21:03 -07004214
Alex Tomasc9de5602008-01-29 00:19:52 -05004215 /* find neighbour allocated blocks */
Theodore Ts'oe35fd662010-05-16 19:00:00 -04004216 ar.lleft = map->m_lblk;
Alex Tomasc9de5602008-01-29 00:19:52 -05004217 err = ext4_ext_search_left(inode, path, &ar.lleft, &ar.pleft);
4218 if (err)
Eric Whitney8ad8d712020-05-10 11:58:05 -04004219 goto out;
Theodore Ts'oe35fd662010-05-16 19:00:00 -04004220 ar.lright = map->m_lblk;
Theodore Ts'o4d33b1e2011-09-09 18:52:51 -04004221 err = ext4_ext_search_right(inode, path, &ar.lright, &ar.pright, &ex2);
yangerkund7dce9e2020-10-28 13:56:17 +08004222 if (err < 0)
Eric Whitney8ad8d712020-05-10 11:58:05 -04004223 goto out;
Amit Arora25d14f92007-05-24 13:04:13 -04004224
Theodore Ts'o4d33b1e2011-09-09 18:52:51 -04004225 /* Check if the extent after searching to the right implies a
4226 * cluster we can use. */
yangerkund7dce9e2020-10-28 13:56:17 +08004227 if ((sbi->s_cluster_ratio > 1) && err &&
4228 get_implied_cluster_alloc(inode->i_sb, map, &ex2, path)) {
Theodore Ts'o4d33b1e2011-09-09 18:52:51 -04004229 ar.len = allocated = map->m_len;
4230 newblock = map->m_pblk;
4231 goto got_allocated_blocks;
4232 }
4233
Amit Arora749269f2007-07-18 09:02:56 -04004234 /*
4235 * See if request is beyond maximum number of blocks we can have in
4236 * a single extent. For an initialized extent this limit is
Lukas Czerner556615d2014-04-20 23:45:47 -04004237 * EXT_INIT_MAX_LEN and for an unwritten extent this limit is
4238 * EXT_UNWRITTEN_MAX_LEN.
Amit Arora749269f2007-07-18 09:02:56 -04004239 */
Theodore Ts'oe35fd662010-05-16 19:00:00 -04004240 if (map->m_len > EXT_INIT_MAX_LEN &&
Lukas Czerner556615d2014-04-20 23:45:47 -04004241 !(flags & EXT4_GET_BLOCKS_UNWRIT_EXT))
Theodore Ts'oe35fd662010-05-16 19:00:00 -04004242 map->m_len = EXT_INIT_MAX_LEN;
Lukas Czerner556615d2014-04-20 23:45:47 -04004243 else if (map->m_len > EXT_UNWRITTEN_MAX_LEN &&
4244 (flags & EXT4_GET_BLOCKS_UNWRIT_EXT))
4245 map->m_len = EXT_UNWRITTEN_MAX_LEN;
Amit Arora749269f2007-07-18 09:02:56 -04004246
Theodore Ts'oe35fd662010-05-16 19:00:00 -04004247 /* Check if we can really insert (m_lblk)::(m_lblk + m_len) extent */
Theodore Ts'oe35fd662010-05-16 19:00:00 -04004248 newex.ee_len = cpu_to_le16(map->m_len);
Theodore Ts'o4d33b1e2011-09-09 18:52:51 -04004249 err = ext4_ext_check_overlap(sbi, inode, &newex, path);
Amit Arora25d14f92007-05-24 13:04:13 -04004250 if (err)
Aneesh Kumar K.Vb939e372008-01-28 23:58:27 -05004251 allocated = ext4_ext_get_actual_len(&newex);
Amit Arora25d14f92007-05-24 13:04:13 -04004252 else
Theodore Ts'oe35fd662010-05-16 19:00:00 -04004253 allocated = map->m_len;
Alex Tomasc9de5602008-01-29 00:19:52 -05004254
4255 /* allocate new block */
4256 ar.inode = inode;
Theodore Ts'oe35fd662010-05-16 19:00:00 -04004257 ar.goal = ext4_ext_find_goal(inode, path, map->m_lblk);
4258 ar.logical = map->m_lblk;
Theodore Ts'o4d33b1e2011-09-09 18:52:51 -04004259 /*
4260 * We calculate the offset from the beginning of the cluster
4261 * for the logical block number, since when we allocate a
4262 * physical cluster, the physical block should start at the
4263 * same offset from the beginning of the cluster. This is
4264 * needed so that future calls to get_implied_cluster_alloc()
4265 * work correctly.
4266 */
Theodore Ts'of5a44db2013-12-20 09:29:35 -05004267 offset = EXT4_LBLK_COFF(sbi, map->m_lblk);
Theodore Ts'o4d33b1e2011-09-09 18:52:51 -04004268 ar.len = EXT4_NUM_B2C(sbi, offset+allocated);
4269 ar.goal -= offset;
4270 ar.logical -= offset;
Alex Tomasc9de5602008-01-29 00:19:52 -05004271 if (S_ISREG(inode->i_mode))
4272 ar.flags = EXT4_MB_HINT_DATA;
4273 else
4274 /* disable in-core preallocation for non-regular files */
4275 ar.flags = 0;
Vivek Haldar556b27a2011-05-25 07:41:54 -04004276 if (flags & EXT4_GET_BLOCKS_NO_NORMALIZE)
4277 ar.flags |= EXT4_MB_HINT_NOPREALLOC;
Theodore Ts'oe3cf5d52014-09-04 18:07:25 -04004278 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
4279 ar.flags |= EXT4_MB_DELALLOC_RESERVED;
Theodore Ts'oc5e298a2015-06-21 01:25:29 -04004280 if (flags & EXT4_GET_BLOCKS_METADATA_NOFAIL)
4281 ar.flags |= EXT4_MB_USE_RESERVED;
Alex Tomasc9de5602008-01-29 00:19:52 -05004282 newblock = ext4_mb_new_blocks(handle, &ar, &err);
Alex Tomasa86c6182006-10-11 01:21:03 -07004283 if (!newblock)
Eric Whitney8ad8d712020-05-10 11:58:05 -04004284 goto out;
Aditya Kali7b415bf2011-09-09 19:04:51 -04004285 allocated_clusters = ar.len;
Theodore Ts'o4d33b1e2011-09-09 18:52:51 -04004286 ar.len = EXT4_C2B(sbi, ar.len) - offset;
Ritesh Harjani70aa1552020-05-10 11:54:55 +05304287 ext_debug(inode, "allocate new block: goal %llu, found %llu/%u, requested %u\n",
Ritesh Harjaniec8c60b2020-05-10 11:54:52 +05304288 ar.goal, newblock, ar.len, allocated);
Theodore Ts'o4d33b1e2011-09-09 18:52:51 -04004289 if (ar.len > allocated)
4290 ar.len = allocated;
Alex Tomasa86c6182006-10-11 01:21:03 -07004291
Theodore Ts'o4d33b1e2011-09-09 18:52:51 -04004292got_allocated_blocks:
Alex Tomasa86c6182006-10-11 01:21:03 -07004293 /* try to insert new extent into found leaf and return */
Eric Whitney8ad8d712020-05-10 11:58:05 -04004294 pblk = newblock + offset;
4295 ext4_ext_store_pblock(&newex, pblk);
Alex Tomasc9de5602008-01-29 00:19:52 -05004296 newex.ee_len = cpu_to_le16(ar.len);
Lukas Czerner556615d2014-04-20 23:45:47 -04004297 /* Mark unwritten */
Eric Whitney34990462020-03-11 16:50:33 -04004298 if (flags & EXT4_GET_BLOCKS_UNWRIT_EXT) {
Lukas Czerner556615d2014-04-20 23:45:47 -04004299 ext4_ext_mark_unwritten(&newex);
Zheng Liua25a4e12013-02-18 00:28:04 -05004300 map->m_flags |= EXT4_MAP_UNWRITTEN;
Mingming Cao8d5d02e2009-09-28 15:48:29 -04004301 }
Jiaying Zhangc8d46e42010-02-24 09:52:53 -05004302
Eric Whitney4337ecd2020-02-11 16:02:16 -05004303 err = ext4_ext_insert_extent(handle, inode, &path, &newex, flags);
Eric Whitney34990462020-03-11 16:50:33 -04004304 if (err) {
4305 if (allocated_clusters) {
4306 int fb_flags = 0;
Dmitry Monakhov82e54222012-09-28 23:36:25 -04004307
Eric Whitney34990462020-03-11 16:50:33 -04004308 /*
4309 * free data blocks we just allocated.
4310 * not a good idea to call discard here directly,
4311 * but otherwise we'd need to call it every free().
4312 */
brookxu27bc4462020-08-17 15:36:15 +08004313 ext4_discard_preallocations(inode, 0);
Eric Whitney34990462020-03-11 16:50:33 -04004314 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
4315 fb_flags = EXT4_FREE_BLOCKS_NO_QUOT_UPDATE;
4316 ext4_free_blocks(handle, inode, NULL, newblock,
4317 EXT4_C2B(sbi, allocated_clusters),
4318 fb_flags);
4319 }
Eric Whitney8ad8d712020-05-10 11:58:05 -04004320 goto out;
Alex Tomas315054f2007-05-24 13:04:25 -04004321 }
Alex Tomasa86c6182006-10-11 01:21:03 -07004322
Jan Karab436b9b2009-12-08 23:51:10 -05004323 /*
Eric Whitneyb6bf9172018-10-01 14:24:08 -04004324 * Reduce the reserved cluster count to reflect successful deferred
4325 * allocation of delayed allocated clusters or direct allocation of
4326 * clusters discovered to be delayed allocated. Once allocated, a
4327 * cluster is not included in the reserved count.
Aneesh Kumar K.V5f634d02010-01-25 04:00:31 -05004328 */
Eric Whitney29711482020-03-11 16:51:25 -04004329 if (test_opt(inode->i_sb, DELALLOC) && allocated_clusters) {
Eric Whitneyb6bf9172018-10-01 14:24:08 -04004330 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) {
Lukas Czerner232ec872013-03-10 22:46:30 -04004331 /*
Eric Whitneyb6bf9172018-10-01 14:24:08 -04004332 * When allocating delayed allocated clusters, simply
4333 * reduce the reserved cluster count and claim quota
Lukas Czerner232ec872013-03-10 22:46:30 -04004334 */
4335 ext4_da_update_reserve_space(inode, allocated_clusters,
4336 1);
Eric Whitneyb6bf9172018-10-01 14:24:08 -04004337 } else {
4338 ext4_lblk_t lblk, len;
4339 unsigned int n;
4340
4341 /*
4342 * When allocating non-delayed allocated clusters
4343 * (from fallocate, filemap, DIO, or clusters
4344 * allocated when delalloc has been disabled by
4345 * ext4_nonda_switch), reduce the reserved cluster
4346 * count by the number of allocated clusters that
4347 * have previously been delayed allocated. Quota
4348 * has been claimed by ext4_mb_new_blocks() above,
4349 * so release the quota reservations made for any
4350 * previously delayed allocated clusters.
4351 */
4352 lblk = EXT4_LBLK_CMASK(sbi, map->m_lblk);
4353 len = allocated_clusters << sbi->s_cluster_bits;
4354 n = ext4_es_delayed_clu(inode, lblk, len);
4355 if (n > 0)
4356 ext4_da_update_reserve_space(inode, (int) n, 0);
Aditya Kali7b415bf2011-09-09 19:04:51 -04004357 }
4358 }
Aneesh Kumar K.V5f634d02010-01-25 04:00:31 -05004359
4360 /*
Jan Karab436b9b2009-12-08 23:51:10 -05004361 * Cache the extent and update transaction to commit on fdatasync only
Lukas Czerner556615d2014-04-20 23:45:47 -04004362 * when it is _not_ an unwritten extent.
Jan Karab436b9b2009-12-08 23:51:10 -05004363 */
Lukas Czerner556615d2014-04-20 23:45:47 -04004364 if ((flags & EXT4_GET_BLOCKS_UNWRIT_EXT) == 0)
Jan Karab436b9b2009-12-08 23:51:10 -05004365 ext4_update_inode_fsync_trans(handle, inode, 1);
Zheng Liu69eb33d2013-02-18 00:31:07 -05004366 else
Jan Karab436b9b2009-12-08 23:51:10 -05004367 ext4_update_inode_fsync_trans(handle, inode, 0);
Eric Whitney8ad8d712020-05-10 11:58:05 -04004368
4369 map->m_flags |= (EXT4_MAP_NEW | EXT4_MAP_MAPPED);
4370 map->m_pblk = pblk;
4371 map->m_len = ar.len;
4372 allocated = map->m_len;
Alex Tomasa86c6182006-10-11 01:21:03 -07004373 ext4_ext_show_leaf(inode, path);
Eric Whitney8ad8d712020-05-10 11:58:05 -04004374out:
Theodore Ts'ob7ea89a2014-09-01 14:39:09 -04004375 ext4_ext_drop_refs(path);
4376 kfree(path);
Allison Hendersone8613042011-05-25 07:41:46 -04004377
Theodore Ts'o63b99962013-07-16 10:28:47 -04004378 trace_ext4_ext_map_blocks_exit(inode, flags, map,
4379 err ? err : allocated);
Lukas Czerner78771912012-03-19 23:05:43 -04004380 return err ? err : allocated;
Alex Tomasa86c6182006-10-11 01:21:03 -07004381}
4382
Theodore Ts'od0abb362016-11-13 22:02:28 -05004383int ext4_ext_truncate(handle_t *handle, struct inode *inode)
Alex Tomasa86c6182006-10-11 01:21:03 -07004384{
Alex Tomasa86c6182006-10-11 01:21:03 -07004385 struct super_block *sb = inode->i_sb;
Aneesh Kumar K.V725d26d2008-01-28 23:58:27 -05004386 ext4_lblk_t last_block;
Alex Tomasa86c6182006-10-11 01:21:03 -07004387 int err = 0;
4388
4389 /*
Randy Dunlapd0d856e2006-10-11 01:21:07 -07004390 * TODO: optimization is possible here.
4391 * Probably we need not scan at all,
4392 * because page truncation is enough.
Alex Tomasa86c6182006-10-11 01:21:03 -07004393 */
Alex Tomasa86c6182006-10-11 01:21:03 -07004394
4395 /* we have to know where to truncate from in crash case */
4396 EXT4_I(inode)->i_disksize = inode->i_size;
Theodore Ts'od0abb362016-11-13 22:02:28 -05004397 err = ext4_mark_inode_dirty(handle, inode);
4398 if (err)
4399 return err;
Alex Tomasa86c6182006-10-11 01:21:03 -07004400
4401 last_block = (inode->i_size + sb->s_blocksize - 1)
4402 >> EXT4_BLOCK_SIZE_BITS(sb);
Theodore Ts'o8acd5e92013-07-15 00:09:19 -04004403retry:
Zheng Liu51865fd2012-11-08 21:57:32 -05004404 err = ext4_es_remove_extent(inode, last_block,
4405 EXT_MAX_BLOCKS - last_block);
Theodore Ts'o94eec0f2013-07-29 12:12:56 -04004406 if (err == -ENOMEM) {
Theodore Ts'o8acd5e92013-07-15 00:09:19 -04004407 cond_resched();
4408 congestion_wait(BLK_RW_ASYNC, HZ/50);
4409 goto retry;
4410 }
Theodore Ts'od0abb362016-11-13 22:02:28 -05004411 if (err)
4412 return err;
Theodore Ts'o73c384c02020-05-07 10:50:28 -07004413retry_remove_space:
4414 err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1);
4415 if (err == -ENOMEM) {
4416 cond_resched();
4417 congestion_wait(BLK_RW_ASYNC, HZ/50);
4418 goto retry_remove_space;
4419 }
4420 return err;
Alex Tomasa86c6182006-10-11 01:21:03 -07004421}
4422
Lukas Czerner0e8b6872014-03-18 18:03:51 -04004423static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
Dmitry Monakhovc174e6d2014-08-27 18:40:00 -04004424 ext4_lblk_t len, loff_t new_size,
Tahsin Erdogan77a2e842017-08-05 22:15:45 -04004425 int flags)
Amit Aroraa2df2a62007-07-17 21:42:41 -04004426{
Al Viro496ad9a2013-01-23 17:07:38 -05004427 struct inode *inode = file_inode(file);
Amit Aroraa2df2a62007-07-17 21:42:41 -04004428 handle_t *handle;
Theodore Ts'o64395d92021-03-21 00:45:37 -04004429 int ret = 0, ret2 = 0, ret3 = 0;
Amit Aroraa2df2a62007-07-17 21:42:41 -04004430 int retries = 0;
Lukas Czerner4134f5c2015-06-15 00:20:46 -04004431 int depth = 0;
Theodore Ts'o2ed88682010-05-16 20:00:00 -04004432 struct ext4_map_blocks map;
Lukas Czerner0e8b6872014-03-18 18:03:51 -04004433 unsigned int credits;
Dmitry Monakhovc174e6d2014-08-27 18:40:00 -04004434 loff_t epos;
Amit Aroraa2df2a62007-07-17 21:42:41 -04004435
Fabian Frederickc3fe4932016-09-15 11:52:07 -04004436 BUG_ON(!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS));
Lukas Czerner0e8b6872014-03-18 18:03:51 -04004437 map.m_lblk = offset;
Dmitry Monakhovc174e6d2014-08-27 18:40:00 -04004438 map.m_len = len;
Greg Harm3c6fe772011-10-31 18:41:47 -04004439 /*
4440 * Don't normalize the request if it can fit in one extent so
4441 * that it doesn't get unnecessarily split into multiple
4442 * extents.
4443 */
Lukas Czerner556615d2014-04-20 23:45:47 -04004444 if (len <= EXT_UNWRITTEN_MAX_LEN)
Greg Harm3c6fe772011-10-31 18:41:47 -04004445 flags |= EXT4_GET_BLOCKS_NO_NORMALIZE;
Dmitry Monakhov60d46162012-10-05 11:32:02 -04004446
Lukas Czerner0e8b6872014-03-18 18:03:51 -04004447 /*
4448 * credits to insert 1 extent into extent tree
4449 */
4450 credits = ext4_chunk_trans_blocks(inode, len);
Fabian Frederickc3fe4932016-09-15 11:52:07 -04004451 depth = ext_depth(inode);
Lukas Czerner0e8b6872014-03-18 18:03:51 -04004452
Amit Aroraa2df2a62007-07-17 21:42:41 -04004453retry:
Eric Whitney32583862021-01-13 17:14:03 -05004454 while (len) {
Lukas Czerner4134f5c2015-06-15 00:20:46 -04004455 /*
4456 * Recalculate credits when extent tree depth changes.
4457 */
Dan Carpenter011c88e2016-12-03 16:46:58 -05004458 if (depth != ext_depth(inode)) {
Lukas Czerner4134f5c2015-06-15 00:20:46 -04004459 credits = ext4_chunk_trans_blocks(inode, len);
4460 depth = ext_depth(inode);
4461 }
4462
Theodore Ts'o9924a922013-02-08 21:59:22 -05004463 handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS,
4464 credits);
Amit Aroraa2df2a62007-07-17 21:42:41 -04004465 if (IS_ERR(handle)) {
4466 ret = PTR_ERR(handle);
4467 break;
4468 }
Dmitry Monakhova4e5d882011-10-25 08:15:12 -04004469 ret = ext4_map_blocks(handle, inode, &map, flags);
Aneesh Kumar K.V221879c2008-01-28 23:58:27 -05004470 if (ret <= 0) {
Lukas Czernerf282ac12014-03-18 17:44:35 -04004471 ext4_debug("inode #%lu: block %u: len %u: "
4472 "ext4_ext_map_blocks returned %d",
4473 inode->i_ino, map.m_lblk,
4474 map.m_len, ret);
Amit Aroraa2df2a62007-07-17 21:42:41 -04004475 ext4_mark_inode_dirty(handle, inode);
Eric Whitney32583862021-01-13 17:14:03 -05004476 ext4_journal_stop(handle);
Amit Aroraa2df2a62007-07-17 21:42:41 -04004477 break;
4478 }
Eric Whitney32583862021-01-13 17:14:03 -05004479 /*
4480 * allow a full retry cycle for any remaining allocations
4481 */
4482 retries = 0;
Dmitry Monakhovc174e6d2014-08-27 18:40:00 -04004483 map.m_lblk += ret;
4484 map.m_len = len = len - ret;
4485 epos = (loff_t)map.m_lblk << inode->i_blkbits;
Deepa Dinamanieeca7ea2016-11-14 21:40:10 -05004486 inode->i_ctime = current_time(inode);
Dmitry Monakhovc174e6d2014-08-27 18:40:00 -04004487 if (new_size) {
4488 if (epos > new_size)
4489 epos = new_size;
4490 if (ext4_update_inode_size(inode, epos) & 0x1)
4491 inode->i_mtime = inode->i_ctime;
Dmitry Monakhovc174e6d2014-08-27 18:40:00 -04004492 }
Harshad Shirwadkar4209ae12020-04-26 18:34:37 -07004493 ret2 = ext4_mark_inode_dirty(handle, inode);
Eryu Guanc894aa92017-12-03 22:52:51 -05004494 ext4_update_inode_fsync_trans(handle, inode, 1);
Harshad Shirwadkar4209ae12020-04-26 18:34:37 -07004495 ret3 = ext4_journal_stop(handle);
4496 ret2 = ret3 ? ret3 : ret2;
4497 if (unlikely(ret2))
Amit Aroraa2df2a62007-07-17 21:42:41 -04004498 break;
4499 }
Eric Whitney32583862021-01-13 17:14:03 -05004500 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
Amit Aroraa2df2a62007-07-17 21:42:41 -04004501 goto retry;
Lukas Czernerf282ac12014-03-18 17:44:35 -04004502
Lukas Czerner0e8b6872014-03-18 18:03:51 -04004503 return ret > 0 ? ret2 : ret;
4504}
4505
Eric Biggers43f81672019-12-31 12:04:40 -06004506static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len);
4507
4508static int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len);
4509
Lukas Czernerb8a86842014-03-18 18:05:35 -04004510static long ext4_zero_range(struct file *file, loff_t offset,
4511 loff_t len, int mode)
4512{
4513 struct inode *inode = file_inode(file);
Jan Karad4f52582021-02-04 18:05:42 +01004514 struct address_space *mapping = file->f_mapping;
Lukas Czernerb8a86842014-03-18 18:05:35 -04004515 handle_t *handle = NULL;
4516 unsigned int max_blocks;
4517 loff_t new_size = 0;
4518 int ret = 0;
4519 int flags;
Dmitry Monakhov69dc9532014-08-27 18:33:49 -04004520 int credits;
Dmitry Monakhovc174e6d2014-08-27 18:40:00 -04004521 int partial_begin, partial_end;
Lukas Czernerb8a86842014-03-18 18:05:35 -04004522 loff_t start, end;
4523 ext4_lblk_t lblk;
Lukas Czernerb8a86842014-03-18 18:05:35 -04004524 unsigned int blkbits = inode->i_blkbits;
4525
4526 trace_ext4_zero_range(inode, offset, len, mode);
4527
Namjae Jeone1ee60f2014-05-27 12:48:55 -04004528 /* Call ext4_force_commit to flush all data in case of data=journal. */
4529 if (ext4_should_journal_data(inode)) {
4530 ret = ext4_force_commit(inode->i_sb);
4531 if (ret)
4532 return ret;
4533 }
4534
Lukas Czernerb8a86842014-03-18 18:05:35 -04004535 /*
Keyur Patele4d7f2d2020-06-10 23:19:46 -04004536 * Round up offset. This is not fallocate, we need to zero out
Lukas Czernerb8a86842014-03-18 18:05:35 -04004537 * blocks, so convert interior block aligned part of the range to
4538 * unwritten and possibly manually zero out unaligned parts of the
4539 * range.
4540 */
4541 start = round_up(offset, 1 << blkbits);
4542 end = round_down((offset + len), 1 << blkbits);
4543
4544 if (start < offset || end > offset + len)
4545 return -EINVAL;
Dmitry Monakhovc174e6d2014-08-27 18:40:00 -04004546 partial_begin = offset & ((1 << blkbits) - 1);
4547 partial_end = (offset + len) & ((1 << blkbits) - 1);
Lukas Czernerb8a86842014-03-18 18:05:35 -04004548
4549 lblk = start >> blkbits;
4550 max_blocks = (end >> blkbits);
4551 if (max_blocks < lblk)
4552 max_blocks = 0;
4553 else
4554 max_blocks -= lblk;
4555
Al Viro59551022016-01-22 15:40:57 -05004556 inode_lock(inode);
Lukas Czernerb8a86842014-03-18 18:05:35 -04004557
4558 /*
Christophe JAILLET80dd4972020-05-03 22:06:47 +02004559 * Indirect files do not support unwritten extents
Lukas Czernerb8a86842014-03-18 18:05:35 -04004560 */
4561 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
4562 ret = -EOPNOTSUPP;
4563 goto out_mutex;
4564 }
4565
4566 if (!(mode & FALLOC_FL_KEEP_SIZE) &&
Eric Biggers9b02e492019-12-31 12:04:38 -06004567 (offset + len > inode->i_size ||
Theodore Ts'o51e3ae82017-10-06 23:09:55 -04004568 offset + len > EXT4_I(inode)->i_disksize)) {
Lukas Czernerb8a86842014-03-18 18:05:35 -04004569 new_size = offset + len;
4570 ret = inode_newsize_ok(inode, new_size);
4571 if (ret)
4572 goto out_mutex;
Lukas Czernerb8a86842014-03-18 18:05:35 -04004573 }
4574
Lukas Czerner0f2af212015-04-03 00:09:13 -04004575 flags = EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT;
Lukas Czerner0f2af212015-04-03 00:09:13 -04004576
Jan Kara17048e82015-12-07 14:29:17 -05004577 /* Wait all existing dio workers, newcomers will block on i_mutex */
Jan Kara17048e82015-12-07 14:29:17 -05004578 inode_dio_wait(inode);
4579
Lukas Czerner0f2af212015-04-03 00:09:13 -04004580 /* Preallocate the range including the unaligned edges */
4581 if (partial_begin || partial_end) {
4582 ret = ext4_alloc_file_blocks(file,
4583 round_down(offset, 1 << blkbits) >> blkbits,
4584 (round_up((offset + len), 1 << blkbits) -
4585 round_down(offset, 1 << blkbits)) >> blkbits,
Tahsin Erdogan77a2e842017-08-05 22:15:45 -04004586 new_size, flags);
Lukas Czerner0f2af212015-04-03 00:09:13 -04004587 if (ret)
Nikolay Borisov1d398342018-03-22 11:52:10 -04004588 goto out_mutex;
Lukas Czerner0f2af212015-04-03 00:09:13 -04004589
4590 }
4591
4592 /* Zero range excluding the unaligned edges */
Lukas Czernerb8a86842014-03-18 18:05:35 -04004593 if (max_blocks > 0) {
Lukas Czerner0f2af212015-04-03 00:09:13 -04004594 flags |= (EXT4_GET_BLOCKS_CONVERT_UNWRITTEN |
4595 EXT4_EX_NOCACHE);
Lukas Czernerb8a86842014-03-18 18:05:35 -04004596
Jan Karaea3d7202015-12-07 14:28:03 -05004597 /*
4598 * Prevent page faults from reinstantiating pages we have
4599 * released from page cache.
4600 */
Jan Karad4f52582021-02-04 18:05:42 +01004601 filemap_invalidate_lock(mapping);
Ross Zwisler430657b2018-07-29 17:00:22 -04004602
4603 ret = ext4_break_layouts(inode);
4604 if (ret) {
Jan Karad4f52582021-02-04 18:05:42 +01004605 filemap_invalidate_unlock(mapping);
Ross Zwisler430657b2018-07-29 17:00:22 -04004606 goto out_mutex;
4607 }
4608
Jan Kara01127842015-12-07 14:34:49 -05004609 ret = ext4_update_disksize_before_punch(inode, offset, len);
4610 if (ret) {
Jan Karad4f52582021-02-04 18:05:42 +01004611 filemap_invalidate_unlock(mapping);
Nikolay Borisov1d398342018-03-22 11:52:10 -04004612 goto out_mutex;
Jan Kara01127842015-12-07 14:34:49 -05004613 }
Jan Karaea3d7202015-12-07 14:28:03 -05004614 /* Now release the pages and zero block aligned part of pages */
4615 truncate_pagecache_range(inode, start, end - 1);
Deepa Dinamanieeca7ea2016-11-14 21:40:10 -05004616 inode->i_mtime = inode->i_ctime = current_time(inode);
Jan Karaea3d7202015-12-07 14:28:03 -05004617
Dmitry Monakhovc174e6d2014-08-27 18:40:00 -04004618 ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
Tahsin Erdogan77a2e842017-08-05 22:15:45 -04004619 flags);
Jan Karad4f52582021-02-04 18:05:42 +01004620 filemap_invalidate_unlock(mapping);
Lukas Czernerb8a86842014-03-18 18:05:35 -04004621 if (ret)
Nikolay Borisov1d398342018-03-22 11:52:10 -04004622 goto out_mutex;
Lukas Czernerb8a86842014-03-18 18:05:35 -04004623 }
Dmitry Monakhovc174e6d2014-08-27 18:40:00 -04004624 if (!partial_begin && !partial_end)
Nikolay Borisov1d398342018-03-22 11:52:10 -04004625 goto out_mutex;
Dmitry Monakhovc174e6d2014-08-27 18:40:00 -04004626
Dmitry Monakhov69dc9532014-08-27 18:33:49 -04004627 /*
4628 * In worst case we have to writeout two nonadjacent unwritten
4629 * blocks and update the inode
4630 */
4631 credits = (2 * ext4_ext_index_trans_blocks(inode, 2)) + 1;
4632 if (ext4_should_journal_data(inode))
4633 credits += 2;
4634 handle = ext4_journal_start(inode, EXT4_HT_MISC, credits);
Lukas Czernerb8a86842014-03-18 18:05:35 -04004635 if (IS_ERR(handle)) {
4636 ret = PTR_ERR(handle);
4637 ext4_std_error(inode->i_sb, ret);
Nikolay Borisov1d398342018-03-22 11:52:10 -04004638 goto out_mutex;
Lukas Czernerb8a86842014-03-18 18:05:35 -04004639 }
4640
Deepa Dinamanieeca7ea2016-11-14 21:40:10 -05004641 inode->i_mtime = inode->i_ctime = current_time(inode);
Eric Whitney4337ecd2020-02-11 16:02:16 -05004642 if (new_size)
Dmitry Monakhov4631dbf2014-08-23 17:48:28 -04004643 ext4_update_inode_size(inode, new_size);
Harshad Shirwadkar4209ae12020-04-26 18:34:37 -07004644 ret = ext4_mark_inode_dirty(handle, inode);
4645 if (unlikely(ret))
4646 goto out_handle;
Lukas Czernerb8a86842014-03-18 18:05:35 -04004647 /* Zero out partial block at the edges of the range */
4648 ret = ext4_zero_partial_blocks(handle, inode, offset, len);
Jan Kara67a7d5f2017-05-29 13:24:55 -04004649 if (ret >= 0)
4650 ext4_update_inode_fsync_trans(handle, inode, 1);
Lukas Czernerb8a86842014-03-18 18:05:35 -04004651
4652 if (file->f_flags & O_SYNC)
4653 ext4_handle_sync(handle);
4654
Harshad Shirwadkar4209ae12020-04-26 18:34:37 -07004655out_handle:
Lukas Czernerb8a86842014-03-18 18:05:35 -04004656 ext4_journal_stop(handle);
Lukas Czernerb8a86842014-03-18 18:05:35 -04004657out_mutex:
Al Viro59551022016-01-22 15:40:57 -05004658 inode_unlock(inode);
Lukas Czernerb8a86842014-03-18 18:05:35 -04004659 return ret;
4660}
4661
Lukas Czerner0e8b6872014-03-18 18:03:51 -04004662/*
4663 * preallocate space for a file. This implements ext4's fallocate file
4664 * operation, which gets called from sys_fallocate system call.
4665 * For block-mapped files, posix_fallocate should fall back to the method
4666 * of writing zeroes to the required new blocks (the same behavior which is
4667 * expected for file systems which do not support fallocate() system call).
4668 */
4669long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
4670{
4671 struct inode *inode = file_inode(file);
Lukas Czerner0e8b6872014-03-18 18:03:51 -04004672 loff_t new_size = 0;
4673 unsigned int max_blocks;
4674 int ret = 0;
4675 int flags;
4676 ext4_lblk_t lblk;
Lukas Czerner0e8b6872014-03-18 18:03:51 -04004677 unsigned int blkbits = inode->i_blkbits;
4678
Michael Halcrow2058f832015-04-12 00:55:10 -04004679 /*
4680 * Encrypted inodes can't handle collapse range or insert
4681 * range since we would need to re-encrypt blocks with a
4682 * different IV or XTS tweak (which are based on the logical
4683 * block number).
Michael Halcrow2058f832015-04-12 00:55:10 -04004684 */
Chandan Rajendra592ddec2018-12-12 15:20:10 +05304685 if (IS_ENCRYPTED(inode) &&
Eric Biggers457b1e32019-12-26 09:42:16 -06004686 (mode & (FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_INSERT_RANGE)))
Michael Halcrow2058f832015-04-12 00:55:10 -04004687 return -EOPNOTSUPP;
4688
Lukas Czerner0e8b6872014-03-18 18:03:51 -04004689 /* Return error if mode is not supported */
4690 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
Namjae Jeon331573f2015-06-09 01:55:03 -04004691 FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE |
4692 FALLOC_FL_INSERT_RANGE))
Lukas Czerner0e8b6872014-03-18 18:03:51 -04004693 return -EOPNOTSUPP;
4694
Harshad Shirwadkaraa75f4d2020-10-15 13:37:57 -07004695 if (mode & FALLOC_FL_PUNCH_HOLE) {
4696 ret = ext4_punch_hole(inode, offset, len);
4697 goto exit;
4698 }
Lukas Czerner0e8b6872014-03-18 18:03:51 -04004699
Lukas Czerner0e8b6872014-03-18 18:03:51 -04004700 ret = ext4_convert_inline_data(inode);
4701 if (ret)
Harshad Shirwadkaraa75f4d2020-10-15 13:37:57 -07004702 goto exit;
Lukas Czerner0e8b6872014-03-18 18:03:51 -04004703
Harshad Shirwadkaraa75f4d2020-10-15 13:37:57 -07004704 if (mode & FALLOC_FL_COLLAPSE_RANGE) {
4705 ret = ext4_collapse_range(inode, offset, len);
4706 goto exit;
4707 }
Theodore Ts'o40c406c2014-04-12 22:53:53 -04004708
Harshad Shirwadkaraa75f4d2020-10-15 13:37:57 -07004709 if (mode & FALLOC_FL_INSERT_RANGE) {
4710 ret = ext4_insert_range(inode, offset, len);
4711 goto exit;
4712 }
Namjae Jeon331573f2015-06-09 01:55:03 -04004713
Harshad Shirwadkaraa75f4d2020-10-15 13:37:57 -07004714 if (mode & FALLOC_FL_ZERO_RANGE) {
4715 ret = ext4_zero_range(file, offset, len, mode);
4716 goto exit;
4717 }
Lukas Czerner0e8b6872014-03-18 18:03:51 -04004718 trace_ext4_fallocate_enter(inode, offset, len, mode);
4719 lblk = offset >> blkbits;
Lukas Czerner0e8b6872014-03-18 18:03:51 -04004720
Fabian Frederick518eaa62016-09-15 11:55:01 -04004721 max_blocks = EXT4_MAX_BLOCKS(len, offset, blkbits);
Lukas Czerner556615d2014-04-20 23:45:47 -04004722 flags = EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT;
Lukas Czerner0e8b6872014-03-18 18:03:51 -04004723
Al Viro59551022016-01-22 15:40:57 -05004724 inode_lock(inode);
Lukas Czerner0e8b6872014-03-18 18:03:51 -04004725
Davide Italiano280227a2015-05-02 23:21:15 -04004726 /*
4727 * We only support preallocation for extent-based files only
4728 */
4729 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
4730 ret = -EOPNOTSUPP;
4731 goto out;
4732 }
4733
Lukas Czerner0e8b6872014-03-18 18:03:51 -04004734 if (!(mode & FALLOC_FL_KEEP_SIZE) &&
Eric Biggers9b02e492019-12-31 12:04:38 -06004735 (offset + len > inode->i_size ||
Theodore Ts'o51e3ae82017-10-06 23:09:55 -04004736 offset + len > EXT4_I(inode)->i_disksize)) {
Lukas Czerner0e8b6872014-03-18 18:03:51 -04004737 new_size = offset + len;
4738 ret = inode_newsize_ok(inode, new_size);
4739 if (ret)
4740 goto out;
4741 }
4742
Jan Kara17048e82015-12-07 14:29:17 -05004743 /* Wait all existing dio workers, newcomers will block on i_mutex */
Jan Kara17048e82015-12-07 14:29:17 -05004744 inode_dio_wait(inode);
4745
Tahsin Erdogan77a2e842017-08-05 22:15:45 -04004746 ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size, flags);
Lukas Czerner0e8b6872014-03-18 18:03:51 -04004747 if (ret)
4748 goto out;
4749
Dmitry Monakhovc174e6d2014-08-27 18:40:00 -04004750 if (file->f_flags & O_SYNC && EXT4_SB(inode->i_sb)->s_journal) {
Harshad Shirwadkaraa75f4d2020-10-15 13:37:57 -07004751 ret = ext4_fc_commit(EXT4_SB(inode->i_sb)->s_journal,
4752 EXT4_I(inode)->i_sync_tid);
Lukas Czernerf282ac12014-03-18 17:44:35 -04004753 }
Lukas Czernerf282ac12014-03-18 17:44:35 -04004754out:
Al Viro59551022016-01-22 15:40:57 -05004755 inode_unlock(inode);
Lukas Czerner0e8b6872014-03-18 18:03:51 -04004756 trace_ext4_fallocate_exit(inode, offset, max_blocks, ret);
Harshad Shirwadkaraa75f4d2020-10-15 13:37:57 -07004757exit:
Lukas Czerner0e8b6872014-03-18 18:03:51 -04004758 return ret;
Amit Aroraa2df2a62007-07-17 21:42:41 -04004759}
Eric Sandeen6873fa02008-10-07 00:46:36 -04004760
4761/*
Mingming Cao00314622009-09-28 15:49:08 -04004762 * This function convert a range of blocks to written extents
4763 * The caller of this function will pass the start offset and the size.
4764 * all unwritten extents within this range will be converted to
4765 * written extents.
4766 *
4767 * This function is called from the direct IO end io call back
4768 * function, to convert the fallocated extents after IO is completed.
Mingming109f5562009-11-10 10:48:08 -05004769 * Returns 0 on success.
Mingming Cao00314622009-09-28 15:49:08 -04004770 */
Jan Kara6b523df2013-06-04 13:21:11 -04004771int ext4_convert_unwritten_extents(handle_t *handle, struct inode *inode,
4772 loff_t offset, ssize_t len)
Mingming Cao00314622009-09-28 15:49:08 -04004773{
Mingming Cao00314622009-09-28 15:49:08 -04004774 unsigned int max_blocks;
Harshad Shirwadkar4209ae12020-04-26 18:34:37 -07004775 int ret = 0, ret2 = 0, ret3 = 0;
Theodore Ts'o2ed88682010-05-16 20:00:00 -04004776 struct ext4_map_blocks map;
Ritesh Harjania00713e2019-10-16 13:07:08 +05304777 unsigned int blkbits = inode->i_blkbits;
4778 unsigned int credits = 0;
Mingming Cao00314622009-09-28 15:49:08 -04004779
Theodore Ts'o2ed88682010-05-16 20:00:00 -04004780 map.m_lblk = offset >> blkbits;
Fabian Frederick518eaa62016-09-15 11:55:01 -04004781 max_blocks = EXT4_MAX_BLOCKS(len, offset, blkbits);
4782
Ritesh Harjania00713e2019-10-16 13:07:08 +05304783 if (!handle) {
Jan Kara6b523df2013-06-04 13:21:11 -04004784 /*
4785 * credits to insert 1 extent into extent tree
4786 */
4787 credits = ext4_chunk_trans_blocks(inode, max_blocks);
4788 }
Mingming Cao00314622009-09-28 15:49:08 -04004789 while (ret >= 0 && ret < max_blocks) {
Theodore Ts'o2ed88682010-05-16 20:00:00 -04004790 map.m_lblk += ret;
4791 map.m_len = (max_blocks -= ret);
Jan Kara6b523df2013-06-04 13:21:11 -04004792 if (credits) {
4793 handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS,
4794 credits);
4795 if (IS_ERR(handle)) {
4796 ret = PTR_ERR(handle);
4797 break;
4798 }
Mingming Cao00314622009-09-28 15:49:08 -04004799 }
Theodore Ts'o2ed88682010-05-16 20:00:00 -04004800 ret = ext4_map_blocks(handle, inode, &map,
Jiaying Zhangc7064ef2010-03-02 13:28:44 -05004801 EXT4_GET_BLOCKS_IO_CONVERT_EXT);
Lukas Czernerb06acd32013-01-28 21:21:12 -05004802 if (ret <= 0)
4803 ext4_warning(inode->i_sb,
4804 "inode #%lu: block %u: len %u: "
4805 "ext4_ext_map_blocks returned %d",
4806 inode->i_ino, map.m_lblk,
4807 map.m_len, ret);
Harshad Shirwadkar4209ae12020-04-26 18:34:37 -07004808 ret2 = ext4_mark_inode_dirty(handle, inode);
4809 if (credits) {
4810 ret3 = ext4_journal_stop(handle);
4811 if (unlikely(ret3))
4812 ret2 = ret3;
4813 }
4814
Jan Kara6b523df2013-06-04 13:21:11 -04004815 if (ret <= 0 || ret2)
Mingming Cao00314622009-09-28 15:49:08 -04004816 break;
4817 }
4818 return ret > 0 ? ret2 : ret;
4819}
Yongqiang Yang6d9c85e2011-02-27 17:25:47 -05004820
Ritesh Harjania00713e2019-10-16 13:07:08 +05304821int ext4_convert_unwritten_io_end_vec(handle_t *handle, ext4_io_end_t *io_end)
4822{
Ritesh Harjanid1e18b82020-10-08 20:32:48 +05304823 int ret = 0, err = 0;
Ritesh Harjanic8cc8812019-10-16 13:07:10 +05304824 struct ext4_io_end_vec *io_end_vec;
Ritesh Harjania00713e2019-10-16 13:07:08 +05304825
4826 /*
4827 * This is somewhat ugly but the idea is clear: When transaction is
4828 * reserved, everything goes into it. Otherwise we rather start several
4829 * smaller transactions for conversion of each extent separately.
4830 */
4831 if (handle) {
4832 handle = ext4_journal_start_reserved(handle,
4833 EXT4_HT_EXT_CONVERT);
4834 if (IS_ERR(handle))
4835 return PTR_ERR(handle);
4836 }
4837
Ritesh Harjanic8cc8812019-10-16 13:07:10 +05304838 list_for_each_entry(io_end_vec, &io_end->list_vec, list) {
4839 ret = ext4_convert_unwritten_extents(handle, io_end->inode,
4840 io_end_vec->offset,
4841 io_end_vec->size);
4842 if (ret)
4843 break;
4844 }
4845
Ritesh Harjania00713e2019-10-16 13:07:08 +05304846 if (handle)
4847 err = ext4_journal_stop(handle);
4848
4849 return ret < 0 ? ret : err;
4850}
4851
Ritesh Harjanid3b6f232020-02-28 14:56:58 +05304852static int ext4_iomap_xattr_fiemap(struct inode *inode, struct iomap *iomap)
Eric Sandeen6873fa02008-10-07 00:46:36 -04004853{
4854 __u64 physical = 0;
Ritesh Harjanid3b6f232020-02-28 14:56:58 +05304855 __u64 length = 0;
Eric Sandeen6873fa02008-10-07 00:46:36 -04004856 int blockbits = inode->i_sb->s_blocksize_bits;
4857 int error = 0;
Ritesh Harjanid3b6f232020-02-28 14:56:58 +05304858 u16 iomap_type;
Eric Sandeen6873fa02008-10-07 00:46:36 -04004859
4860 /* in-inode? */
Theodore Ts'o19f5fb72010-01-24 14:34:07 -05004861 if (ext4_test_inode_state(inode, EXT4_STATE_XATTR)) {
Eric Sandeen6873fa02008-10-07 00:46:36 -04004862 struct ext4_iloc iloc;
4863 int offset; /* offset of xattr in inode */
4864
4865 error = ext4_get_inode_loc(inode, &iloc);
4866 if (error)
4867 return error;
Jan Karaa60697f2013-05-31 19:38:56 -04004868 physical = (__u64)iloc.bh->b_blocknr << blockbits;
Eric Sandeen6873fa02008-10-07 00:46:36 -04004869 offset = EXT4_GOOD_OLD_INODE_SIZE +
4870 EXT4_I(inode)->i_extra_isize;
4871 physical += offset;
4872 length = EXT4_SB(inode->i_sb)->s_inode_size - offset;
Curt Wohlgemuthfd2dd9f2010-04-03 17:44:16 -04004873 brelse(iloc.bh);
Ritesh Harjanid3b6f232020-02-28 14:56:58 +05304874 iomap_type = IOMAP_INLINE;
4875 } else if (EXT4_I(inode)->i_file_acl) { /* external block */
Jan Karaa60697f2013-05-31 19:38:56 -04004876 physical = (__u64)EXT4_I(inode)->i_file_acl << blockbits;
Eric Sandeen6873fa02008-10-07 00:46:36 -04004877 length = inode->i_sb->s_blocksize;
Ritesh Harjanid3b6f232020-02-28 14:56:58 +05304878 iomap_type = IOMAP_MAPPED;
4879 } else {
4880 /* no in-inode or external block for xattr, so return -ENOENT */
4881 error = -ENOENT;
4882 goto out;
Eric Sandeen6873fa02008-10-07 00:46:36 -04004883 }
4884
Ritesh Harjanid3b6f232020-02-28 14:56:58 +05304885 iomap->addr = physical;
4886 iomap->offset = 0;
4887 iomap->length = length;
4888 iomap->type = iomap_type;
4889 iomap->flags = 0;
4890out:
4891 return error;
Eric Sandeen6873fa02008-10-07 00:46:36 -04004892}
4893
Ritesh Harjanid3b6f232020-02-28 14:56:58 +05304894static int ext4_iomap_xattr_begin(struct inode *inode, loff_t offset,
4895 loff_t length, unsigned flags,
4896 struct iomap *iomap, struct iomap *srcmap)
4897{
4898 int error;
4899
4900 error = ext4_iomap_xattr_fiemap(inode, iomap);
4901 if (error == 0 && (offset >= iomap->length))
4902 error = -ENOENT;
4903 return error;
4904}
4905
4906static const struct iomap_ops ext4_iomap_xattr_ops = {
4907 .iomap_begin = ext4_iomap_xattr_begin,
4908};
4909
Christoph Hellwig328e24a2020-05-05 17:43:15 +02004910static int ext4_fiemap_check_ranges(struct inode *inode, u64 start, u64 *len)
4911{
4912 u64 maxbytes;
4913
4914 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
4915 maxbytes = inode->i_sb->s_maxbytes;
4916 else
4917 maxbytes = EXT4_SB(inode->i_sb)->s_bitmap_maxbytes;
4918
4919 if (*len == 0)
4920 return -EINVAL;
4921 if (start > maxbytes)
4922 return -EFBIG;
4923
4924 /*
4925 * Shrink request scope to what the fs can actually handle.
4926 */
4927 if (*len > maxbytes || (maxbytes - *len) < start)
4928 *len = maxbytes - start;
4929 return 0;
4930}
4931
Christoph Hellwig03a5ed22020-05-23 09:30:08 +02004932int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
4933 u64 start, u64 len)
Eric Sandeen6873fa02008-10-07 00:46:36 -04004934{
Eric Sandeen6873fa02008-10-07 00:46:36 -04004935 int error = 0;
4936
Theodore Ts'o7869a4a2013-08-16 22:05:14 -04004937 if (fieinfo->fi_flags & FIEMAP_FLAG_CACHE) {
4938 error = ext4_ext_precache(inode);
4939 if (error)
4940 return error;
Theodore Ts'obb5835e2019-08-11 16:32:41 -04004941 fieinfo->fi_flags &= ~FIEMAP_FLAG_CACHE;
Theodore Ts'o7869a4a2013-08-16 22:05:14 -04004942 }
4943
Christoph Hellwig328e24a2020-05-05 17:43:15 +02004944 /*
4945 * For bitmap files the maximum size limit could be smaller than
4946 * s_maxbytes, so check len here manually instead of just relying on the
4947 * generic check.
4948 */
4949 error = ext4_fiemap_check_ranges(inode, start, &len);
4950 if (error)
4951 return error;
4952
Eric Sandeen6873fa02008-10-07 00:46:36 -04004953 if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) {
Ritesh Harjanid3b6f232020-02-28 14:56:58 +05304954 fieinfo->fi_flags &= ~FIEMAP_FLAG_XATTR;
Christoph Hellwig03a5ed22020-05-23 09:30:08 +02004955 return iomap_fiemap(inode, fieinfo, start, len,
4956 &ext4_iomap_xattr_ops);
Eric Sandeen6873fa02008-10-07 00:46:36 -04004957 }
Namjae Jeon9eb79482014-02-23 15:18:59 -05004958
Christoph Hellwig03a5ed22020-05-23 09:30:08 +02004959 return iomap_fiemap(inode, fieinfo, start, len, &ext4_iomap_report_ops);
Theodore Ts'obb5835e2019-08-11 16:32:41 -04004960}
4961
4962int ext4_get_es_cache(struct inode *inode, struct fiemap_extent_info *fieinfo,
4963 __u64 start, __u64 len)
4964{
Christoph Hellwig03a5ed22020-05-23 09:30:08 +02004965 ext4_lblk_t start_blk, len_blks;
4966 __u64 last_blk;
4967 int error = 0;
4968
Theodore Ts'obb5835e2019-08-11 16:32:41 -04004969 if (ext4_has_inline_data(inode)) {
4970 int has_inline;
4971
4972 down_read(&EXT4_I(inode)->xattr_sem);
4973 has_inline = ext4_has_inline_data(inode);
4974 up_read(&EXT4_I(inode)->xattr_sem);
4975 if (has_inline)
4976 return 0;
4977 }
4978
Christoph Hellwig03a5ed22020-05-23 09:30:08 +02004979 if (fieinfo->fi_flags & FIEMAP_FLAG_CACHE) {
4980 error = ext4_ext_precache(inode);
4981 if (error)
4982 return error;
4983 fieinfo->fi_flags &= ~FIEMAP_FLAG_CACHE;
4984 }
Theodore Ts'obb5835e2019-08-11 16:32:41 -04004985
Christoph Hellwig45dd0522020-05-23 09:30:14 +02004986 error = fiemap_prep(inode, fieinfo, start, &len, 0);
Christoph Hellwigcddf8a22020-05-23 09:30:13 +02004987 if (error)
4988 return error;
Christoph Hellwig03a5ed22020-05-23 09:30:08 +02004989
4990 error = ext4_fiemap_check_ranges(inode, start, &len);
4991 if (error)
4992 return error;
4993
4994 start_blk = start >> inode->i_sb->s_blocksize_bits;
4995 last_blk = (start + len - 1) >> inode->i_sb->s_blocksize_bits;
4996 if (last_blk >= EXT_MAX_BLOCKS)
4997 last_blk = EXT_MAX_BLOCKS-1;
4998 len_blks = ((ext4_lblk_t) last_blk) - start_blk + 1;
4999
5000 /*
5001 * Walk the extent tree gathering extent information
5002 * and pushing extents back to the user.
5003 */
5004 return ext4_fill_es_cache_info(inode, start_blk, len_blks, fieinfo);
5005}
Theodore Ts'obb5835e2019-08-11 16:32:41 -04005006
Namjae Jeon9eb79482014-02-23 15:18:59 -05005007/*
Namjae Jeon9eb79482014-02-23 15:18:59 -05005008 * ext4_ext_shift_path_extents:
5009 * Shift the extents of a path structure lying between path[depth].p_ext
Namjae Jeon331573f2015-06-09 01:55:03 -04005010 * and EXT_LAST_EXTENT(path[depth].p_hdr), by @shift blocks. @SHIFT tells
5011 * if it is right shift or left shift operation.
Namjae Jeon9eb79482014-02-23 15:18:59 -05005012 */
5013static int
5014ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift,
5015 struct inode *inode, handle_t *handle,
Namjae Jeon331573f2015-06-09 01:55:03 -04005016 enum SHIFT_DIRECTION SHIFT)
Namjae Jeon9eb79482014-02-23 15:18:59 -05005017{
5018 int depth, err = 0;
5019 struct ext4_extent *ex_start, *ex_last;
zhengbin4756ee12019-12-25 10:45:59 +08005020 bool update = false;
yangerkun42684962021-09-03 14:27:47 +08005021 int credits, restart_credits;
Namjae Jeon9eb79482014-02-23 15:18:59 -05005022 depth = path->p_depth;
5023
5024 while (depth >= 0) {
5025 if (depth == path->p_depth) {
5026 ex_start = path[depth].p_ext;
5027 if (!ex_start)
Darrick J. Wong6a797d22015-10-17 16:16:04 -04005028 return -EFSCORRUPTED;
Namjae Jeon9eb79482014-02-23 15:18:59 -05005029
5030 ex_last = EXT_LAST_EXTENT(path[depth].p_hdr);
yangerkun42684962021-09-03 14:27:47 +08005031 /* leaf + sb + inode */
5032 credits = 3;
5033 if (ex_start == EXT_FIRST_EXTENT(path[depth].p_hdr)) {
5034 update = true;
5035 /* extent tree + sb + inode */
5036 credits = depth + 2;
5037 }
Namjae Jeon9eb79482014-02-23 15:18:59 -05005038
yangerkun42684962021-09-03 14:27:47 +08005039 restart_credits = ext4_writepage_trans_blocks(inode);
5040 err = ext4_datasem_ensure_credits(handle, inode, credits,
5041 restart_credits, 0);
yangerkun1811bc42021-09-03 14:27:48 +08005042 if (err) {
5043 if (err > 0)
5044 err = -EAGAIN;
Namjae Jeon9eb79482014-02-23 15:18:59 -05005045 goto out;
yangerkun1811bc42021-09-03 14:27:48 +08005046 }
Namjae Jeon9eb79482014-02-23 15:18:59 -05005047
yangerkun42684962021-09-03 14:27:47 +08005048 err = ext4_ext_get_access(handle, inode, path + depth);
5049 if (err)
5050 goto out;
Namjae Jeon9eb79482014-02-23 15:18:59 -05005051
Namjae Jeon9eb79482014-02-23 15:18:59 -05005052 while (ex_start <= ex_last) {
Namjae Jeon331573f2015-06-09 01:55:03 -04005053 if (SHIFT == SHIFT_LEFT) {
5054 le32_add_cpu(&ex_start->ee_block,
5055 -shift);
5056 /* Try to merge to the left. */
5057 if ((ex_start >
5058 EXT_FIRST_EXTENT(path[depth].p_hdr))
5059 &&
5060 ext4_ext_try_to_merge_right(inode,
5061 path, ex_start - 1))
5062 ex_last--;
5063 else
5064 ex_start++;
5065 } else {
5066 le32_add_cpu(&ex_last->ee_block, shift);
5067 ext4_ext_try_to_merge_right(inode, path,
5068 ex_last);
Lukas Czerner6dd834e2014-04-18 10:55:24 -04005069 ex_last--;
Namjae Jeon331573f2015-06-09 01:55:03 -04005070 }
Namjae Jeon9eb79482014-02-23 15:18:59 -05005071 }
5072 err = ext4_ext_dirty(handle, inode, path + depth);
5073 if (err)
5074 goto out;
5075
5076 if (--depth < 0 || !update)
5077 break;
5078 }
5079
5080 /* Update index too */
yangerkun42684962021-09-03 14:27:47 +08005081 err = ext4_ext_get_access(handle, inode, path + depth);
Namjae Jeon9eb79482014-02-23 15:18:59 -05005082 if (err)
5083 goto out;
5084
Namjae Jeon331573f2015-06-09 01:55:03 -04005085 if (SHIFT == SHIFT_LEFT)
5086 le32_add_cpu(&path[depth].p_idx->ei_block, -shift);
5087 else
5088 le32_add_cpu(&path[depth].p_idx->ei_block, shift);
Namjae Jeon9eb79482014-02-23 15:18:59 -05005089 err = ext4_ext_dirty(handle, inode, path + depth);
5090 if (err)
5091 goto out;
5092
5093 /* we are done if current index is not a starting index */
5094 if (path[depth].p_idx != EXT_FIRST_INDEX(path[depth].p_hdr))
5095 break;
5096
5097 depth--;
5098 }
5099
5100out:
5101 return err;
5102}
5103
5104/*
5105 * ext4_ext_shift_extents:
Namjae Jeon331573f2015-06-09 01:55:03 -04005106 * All the extents which lies in the range from @start to the last allocated
5107 * block for the @inode are shifted either towards left or right (depending
5108 * upon @SHIFT) by @shift blocks.
Namjae Jeon9eb79482014-02-23 15:18:59 -05005109 * On success, 0 is returned, error otherwise.
5110 */
5111static int
5112ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
Namjae Jeon331573f2015-06-09 01:55:03 -04005113 ext4_lblk_t start, ext4_lblk_t shift,
5114 enum SHIFT_DIRECTION SHIFT)
Namjae Jeon9eb79482014-02-23 15:18:59 -05005115{
5116 struct ext4_ext_path *path;
5117 int ret = 0, depth;
5118 struct ext4_extent *extent;
Namjae Jeon331573f2015-06-09 01:55:03 -04005119 ext4_lblk_t stop, *iterator, ex_start, ex_end;
yangerkun1811bc42021-09-03 14:27:48 +08005120 ext4_lblk_t tmp = EXT_MAX_BLOCKS;
Namjae Jeon9eb79482014-02-23 15:18:59 -05005121
5122 /* Let path point to the last extent */
Roman Pen03e916f2017-01-08 21:00:35 -05005123 path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL,
5124 EXT4_EX_NOCACHE);
Namjae Jeon9eb79482014-02-23 15:18:59 -05005125 if (IS_ERR(path))
5126 return PTR_ERR(path);
5127
5128 depth = path->p_depth;
5129 extent = path[depth].p_ext;
Theodore Ts'oee4bd0d92014-09-01 14:41:09 -04005130 if (!extent)
5131 goto out;
Namjae Jeon9eb79482014-02-23 15:18:59 -05005132
Roman Pen2a9b8cb2017-01-08 20:59:35 -05005133 stop = le32_to_cpu(extent->ee_block);
Namjae Jeon9eb79482014-02-23 15:18:59 -05005134
Namjae Jeon331573f2015-06-09 01:55:03 -04005135 /*
Eric Biggers349fa7d2018-04-12 11:48:09 -04005136 * For left shifts, make sure the hole on the left is big enough to
5137 * accommodate the shift. For right shifts, make sure the last extent
5138 * won't be shifted beyond EXT_MAX_BLOCKS.
Namjae Jeon331573f2015-06-09 01:55:03 -04005139 */
5140 if (SHIFT == SHIFT_LEFT) {
Roman Pen03e916f2017-01-08 21:00:35 -05005141 path = ext4_find_extent(inode, start - 1, &path,
5142 EXT4_EX_NOCACHE);
Namjae Jeon331573f2015-06-09 01:55:03 -04005143 if (IS_ERR(path))
5144 return PTR_ERR(path);
5145 depth = path->p_depth;
5146 extent = path[depth].p_ext;
5147 if (extent) {
5148 ex_start = le32_to_cpu(extent->ee_block);
5149 ex_end = le32_to_cpu(extent->ee_block) +
5150 ext4_ext_get_actual_len(extent);
5151 } else {
5152 ex_start = 0;
5153 ex_end = 0;
5154 }
Namjae Jeon9eb79482014-02-23 15:18:59 -05005155
Namjae Jeon331573f2015-06-09 01:55:03 -04005156 if ((start == ex_start && shift > ex_start) ||
5157 (shift > start - ex_end)) {
Eric Biggers349fa7d2018-04-12 11:48:09 -04005158 ret = -EINVAL;
5159 goto out;
5160 }
5161 } else {
5162 if (shift > EXT_MAX_BLOCKS -
5163 (stop + ext4_ext_get_actual_len(extent))) {
5164 ret = -EINVAL;
5165 goto out;
Namjae Jeon331573f2015-06-09 01:55:03 -04005166 }
Dmitry Monakhov8dc79ec2014-04-13 15:05:42 -04005167 }
Namjae Jeon9eb79482014-02-23 15:18:59 -05005168
Namjae Jeon331573f2015-06-09 01:55:03 -04005169 /*
5170 * In case of left shift, iterator points to start and it is increased
5171 * till we reach stop. In case of right shift, iterator points to stop
5172 * and it is decreased till we reach start.
5173 */
yangerkun1811bc42021-09-03 14:27:48 +08005174again:
Namjae Jeon331573f2015-06-09 01:55:03 -04005175 if (SHIFT == SHIFT_LEFT)
5176 iterator = &start;
5177 else
5178 iterator = &stop;
Namjae Jeon9eb79482014-02-23 15:18:59 -05005179
yangerkun1811bc42021-09-03 14:27:48 +08005180 if (tmp != EXT_MAX_BLOCKS)
5181 *iterator = tmp;
5182
Roman Pen2a9b8cb2017-01-08 20:59:35 -05005183 /*
5184 * Its safe to start updating extents. Start and stop are unsigned, so
5185 * in case of right shift if extent with 0 block is reached, iterator
5186 * becomes NULL to indicate the end of the loop.
5187 */
5188 while (iterator && start <= stop) {
Roman Pen03e916f2017-01-08 21:00:35 -05005189 path = ext4_find_extent(inode, *iterator, &path,
5190 EXT4_EX_NOCACHE);
Namjae Jeon9eb79482014-02-23 15:18:59 -05005191 if (IS_ERR(path))
5192 return PTR_ERR(path);
5193 depth = path->p_depth;
5194 extent = path[depth].p_ext;
Dmitry Monakhova18ed352014-04-13 15:41:13 -04005195 if (!extent) {
5196 EXT4_ERROR_INODE(inode, "unexpected hole at %lu",
Namjae Jeon331573f2015-06-09 01:55:03 -04005197 (unsigned long) *iterator);
Darrick J. Wong6a797d22015-10-17 16:16:04 -04005198 return -EFSCORRUPTED;
Dmitry Monakhova18ed352014-04-13 15:41:13 -04005199 }
Namjae Jeon331573f2015-06-09 01:55:03 -04005200 if (SHIFT == SHIFT_LEFT && *iterator >
5201 le32_to_cpu(extent->ee_block)) {
Namjae Jeon9eb79482014-02-23 15:18:59 -05005202 /* Hole, move to the next extent */
Dmitry Monakhovf8fb4f42014-08-30 23:50:56 -04005203 if (extent < EXT_LAST_EXTENT(path[depth].p_hdr)) {
5204 path[depth].p_ext++;
5205 } else {
Namjae Jeon331573f2015-06-09 01:55:03 -04005206 *iterator = ext4_ext_next_allocated_block(path);
Dmitry Monakhovf8fb4f42014-08-30 23:50:56 -04005207 continue;
Namjae Jeon9eb79482014-02-23 15:18:59 -05005208 }
5209 }
Namjae Jeon331573f2015-06-09 01:55:03 -04005210
yangerkun1811bc42021-09-03 14:27:48 +08005211 tmp = *iterator;
Namjae Jeon331573f2015-06-09 01:55:03 -04005212 if (SHIFT == SHIFT_LEFT) {
5213 extent = EXT_LAST_EXTENT(path[depth].p_hdr);
5214 *iterator = le32_to_cpu(extent->ee_block) +
5215 ext4_ext_get_actual_len(extent);
5216 } else {
5217 extent = EXT_FIRST_EXTENT(path[depth].p_hdr);
Roman Pen2a9b8cb2017-01-08 20:59:35 -05005218 if (le32_to_cpu(extent->ee_block) > 0)
5219 *iterator = le32_to_cpu(extent->ee_block) - 1;
5220 else
5221 /* Beginning is reached, end of the loop */
5222 iterator = NULL;
Namjae Jeon331573f2015-06-09 01:55:03 -04005223 /* Update path extent in case we need to stop */
5224 while (le32_to_cpu(extent->ee_block) < start)
5225 extent++;
5226 path[depth].p_ext = extent;
5227 }
Namjae Jeon9eb79482014-02-23 15:18:59 -05005228 ret = ext4_ext_shift_path_extents(path, shift, inode,
Namjae Jeon331573f2015-06-09 01:55:03 -04005229 handle, SHIFT);
yangerkun1811bc42021-09-03 14:27:48 +08005230 /* iterator can be NULL which means we should break */
5231 if (ret == -EAGAIN)
5232 goto again;
Namjae Jeon9eb79482014-02-23 15:18:59 -05005233 if (ret)
5234 break;
5235 }
Theodore Ts'oee4bd0d92014-09-01 14:41:09 -04005236out:
5237 ext4_ext_drop_refs(path);
5238 kfree(path);
Namjae Jeon9eb79482014-02-23 15:18:59 -05005239 return ret;
5240}
5241
5242/*
5243 * ext4_collapse_range:
5244 * This implements the fallocate's collapse range functionality for ext4
5245 * Returns: 0 and non-zero on error.
5246 */
Eric Biggers43f81672019-12-31 12:04:40 -06005247static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
Namjae Jeon9eb79482014-02-23 15:18:59 -05005248{
5249 struct super_block *sb = inode->i_sb;
Jan Karad4f52582021-02-04 18:05:42 +01005250 struct address_space *mapping = inode->i_mapping;
Namjae Jeon9eb79482014-02-23 15:18:59 -05005251 ext4_lblk_t punch_start, punch_stop;
5252 handle_t *handle;
5253 unsigned int credits;
Namjae Jeona8680e02014-04-19 16:37:31 -04005254 loff_t new_size, ioffset;
Namjae Jeon9eb79482014-02-23 15:18:59 -05005255 int ret;
5256
Theodore Ts'ob9576fc2015-05-15 00:24:10 -04005257 /*
5258 * We need to test this early because xfstests assumes that a
5259 * collapse range of (0, 1) will return EOPNOTSUPP if the file
5260 * system does not support collapse range.
5261 */
5262 if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
5263 return -EOPNOTSUPP;
5264
Eric Biggers9b02e492019-12-31 12:04:38 -06005265 /* Collapse range works only on fs cluster size aligned regions. */
5266 if (!IS_ALIGNED(offset | len, EXT4_CLUSTER_SIZE(sb)))
Namjae Jeon9eb79482014-02-23 15:18:59 -05005267 return -EINVAL;
5268
Namjae Jeon9eb79482014-02-23 15:18:59 -05005269 trace_ext4_collapse_range(inode, offset, len);
5270
5271 punch_start = offset >> EXT4_BLOCK_SIZE_BITS(sb);
5272 punch_stop = (offset + len) >> EXT4_BLOCK_SIZE_BITS(sb);
5273
Namjae Jeon1ce01c42014-04-10 22:58:20 -04005274 /* Call ext4_force_commit to flush all data in case of data=journal. */
5275 if (ext4_should_journal_data(inode)) {
5276 ret = ext4_force_commit(inode->i_sb);
5277 if (ret)
5278 return ret;
5279 }
5280
Al Viro59551022016-01-22 15:40:57 -05005281 inode_lock(inode);
Lukas Czerner23fffa92014-04-12 09:56:41 -04005282 /*
5283 * There is no need to overlap collapse range with EOF, in which case
5284 * it is effectively a truncate operation
5285 */
Eric Biggers9b02e492019-12-31 12:04:38 -06005286 if (offset + len >= inode->i_size) {
Lukas Czerner23fffa92014-04-12 09:56:41 -04005287 ret = -EINVAL;
5288 goto out_mutex;
5289 }
5290
Namjae Jeon9eb79482014-02-23 15:18:59 -05005291 /* Currently just for extent based files */
5292 if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
5293 ret = -EOPNOTSUPP;
5294 goto out_mutex;
5295 }
5296
Namjae Jeon9eb79482014-02-23 15:18:59 -05005297 /* Wait for existing dio to complete */
Namjae Jeon9eb79482014-02-23 15:18:59 -05005298 inode_dio_wait(inode);
5299
Jan Karaea3d7202015-12-07 14:28:03 -05005300 /*
5301 * Prevent page faults from reinstantiating pages we have released from
5302 * page cache.
5303 */
Jan Karad4f52582021-02-04 18:05:42 +01005304 filemap_invalidate_lock(mapping);
Ross Zwisler430657b2018-07-29 17:00:22 -04005305
5306 ret = ext4_break_layouts(inode);
5307 if (ret)
5308 goto out_mmap;
5309
Jan Kara32ebffd2015-12-07 14:31:11 -05005310 /*
5311 * Need to round down offset to be aligned with page size boundary
5312 * for page size > block size.
5313 */
5314 ioffset = round_down(offset, PAGE_SIZE);
5315 /*
5316 * Write tail of the last page before removed range since it will get
5317 * removed from the page cache below.
5318 */
Jan Karad4f52582021-02-04 18:05:42 +01005319 ret = filemap_write_and_wait_range(mapping, ioffset, offset);
Jan Kara32ebffd2015-12-07 14:31:11 -05005320 if (ret)
5321 goto out_mmap;
5322 /*
5323 * Write data that will be shifted to preserve them when discarding
5324 * page cache below. We are also protected from pages becoming dirty
Jan Karad4f52582021-02-04 18:05:42 +01005325 * by i_rwsem and invalidate_lock.
Jan Kara32ebffd2015-12-07 14:31:11 -05005326 */
Jan Karad4f52582021-02-04 18:05:42 +01005327 ret = filemap_write_and_wait_range(mapping, offset + len,
Jan Kara32ebffd2015-12-07 14:31:11 -05005328 LLONG_MAX);
5329 if (ret)
5330 goto out_mmap;
Jan Karaea3d7202015-12-07 14:28:03 -05005331 truncate_pagecache(inode, ioffset);
5332
Namjae Jeon9eb79482014-02-23 15:18:59 -05005333 credits = ext4_writepage_trans_blocks(inode);
5334 handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
5335 if (IS_ERR(handle)) {
5336 ret = PTR_ERR(handle);
Jan Karaea3d7202015-12-07 14:28:03 -05005337 goto out_mmap;
Namjae Jeon9eb79482014-02-23 15:18:59 -05005338 }
Harshad Shirwadkar7bbbe242021-12-23 12:21:38 -08005339 ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE);
Namjae Jeon9eb79482014-02-23 15:18:59 -05005340
5341 down_write(&EXT4_I(inode)->i_data_sem);
brookxu27bc4462020-08-17 15:36:15 +08005342 ext4_discard_preallocations(inode, 0);
Namjae Jeon9eb79482014-02-23 15:18:59 -05005343
5344 ret = ext4_es_remove_extent(inode, punch_start,
Lukas Czerner2c1d2322014-04-18 10:43:21 -04005345 EXT_MAX_BLOCKS - punch_start);
Namjae Jeon9eb79482014-02-23 15:18:59 -05005346 if (ret) {
5347 up_write(&EXT4_I(inode)->i_data_sem);
5348 goto out_stop;
5349 }
5350
5351 ret = ext4_ext_remove_space(inode, punch_start, punch_stop - 1);
5352 if (ret) {
5353 up_write(&EXT4_I(inode)->i_data_sem);
5354 goto out_stop;
5355 }
brookxu27bc4462020-08-17 15:36:15 +08005356 ext4_discard_preallocations(inode, 0);
Namjae Jeon9eb79482014-02-23 15:18:59 -05005357
5358 ret = ext4_ext_shift_extents(inode, handle, punch_stop,
Namjae Jeon331573f2015-06-09 01:55:03 -04005359 punch_stop - punch_start, SHIFT_LEFT);
Namjae Jeon9eb79482014-02-23 15:18:59 -05005360 if (ret) {
5361 up_write(&EXT4_I(inode)->i_data_sem);
5362 goto out_stop;
5363 }
5364
Eric Biggers9b02e492019-12-31 12:04:38 -06005365 new_size = inode->i_size - len;
Lukas Czerner9337d5d2014-04-18 10:48:25 -04005366 i_size_write(inode, new_size);
Namjae Jeon9eb79482014-02-23 15:18:59 -05005367 EXT4_I(inode)->i_disksize = new_size;
5368
Namjae Jeon9eb79482014-02-23 15:18:59 -05005369 up_write(&EXT4_I(inode)->i_data_sem);
5370 if (IS_SYNC(inode))
5371 ext4_handle_sync(handle);
Deepa Dinamanieeca7ea2016-11-14 21:40:10 -05005372 inode->i_mtime = inode->i_ctime = current_time(inode);
Harshad Shirwadkar4209ae12020-04-26 18:34:37 -07005373 ret = ext4_mark_inode_dirty(handle, inode);
Jan Kara67a7d5f2017-05-29 13:24:55 -04005374 ext4_update_inode_fsync_trans(handle, inode, 1);
Namjae Jeon9eb79482014-02-23 15:18:59 -05005375
5376out_stop:
5377 ext4_journal_stop(handle);
Jan Karaea3d7202015-12-07 14:28:03 -05005378out_mmap:
Jan Karad4f52582021-02-04 18:05:42 +01005379 filemap_invalidate_unlock(mapping);
Namjae Jeon9eb79482014-02-23 15:18:59 -05005380out_mutex:
Al Viro59551022016-01-22 15:40:57 -05005381 inode_unlock(inode);
Namjae Jeon9eb79482014-02-23 15:18:59 -05005382 return ret;
5383}
Dmitry Monakhovfcf6b1b72014-08-30 23:52:19 -04005384
Namjae Jeon331573f2015-06-09 01:55:03 -04005385/*
5386 * ext4_insert_range:
5387 * This function implements the FALLOC_FL_INSERT_RANGE flag of fallocate.
5388 * The data blocks starting from @offset to the EOF are shifted by @len
5389 * towards right to create a hole in the @inode. Inode size is increased
5390 * by len bytes.
5391 * Returns 0 on success, error otherwise.
5392 */
Eric Biggers43f81672019-12-31 12:04:40 -06005393static int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
Namjae Jeon331573f2015-06-09 01:55:03 -04005394{
5395 struct super_block *sb = inode->i_sb;
Jan Karad4f52582021-02-04 18:05:42 +01005396 struct address_space *mapping = inode->i_mapping;
Namjae Jeon331573f2015-06-09 01:55:03 -04005397 handle_t *handle;
5398 struct ext4_ext_path *path;
5399 struct ext4_extent *extent;
5400 ext4_lblk_t offset_lblk, len_lblk, ee_start_lblk = 0;
5401 unsigned int credits, ee_len;
5402 int ret = 0, depth, split_flag = 0;
5403 loff_t ioffset;
5404
5405 /*
5406 * We need to test this early because xfstests assumes that an
5407 * insert range of (0, 1) will return EOPNOTSUPP if the file
5408 * system does not support insert range.
5409 */
5410 if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
5411 return -EOPNOTSUPP;
5412
Eric Biggers9b02e492019-12-31 12:04:38 -06005413 /* Insert range works only on fs cluster size aligned regions. */
5414 if (!IS_ALIGNED(offset | len, EXT4_CLUSTER_SIZE(sb)))
Namjae Jeon331573f2015-06-09 01:55:03 -04005415 return -EINVAL;
5416
Namjae Jeon331573f2015-06-09 01:55:03 -04005417 trace_ext4_insert_range(inode, offset, len);
5418
5419 offset_lblk = offset >> EXT4_BLOCK_SIZE_BITS(sb);
5420 len_lblk = len >> EXT4_BLOCK_SIZE_BITS(sb);
5421
5422 /* Call ext4_force_commit to flush all data in case of data=journal */
5423 if (ext4_should_journal_data(inode)) {
5424 ret = ext4_force_commit(inode->i_sb);
5425 if (ret)
5426 return ret;
5427 }
5428
Al Viro59551022016-01-22 15:40:57 -05005429 inode_lock(inode);
Namjae Jeon331573f2015-06-09 01:55:03 -04005430 /* Currently just for extent based files */
5431 if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
5432 ret = -EOPNOTSUPP;
5433 goto out_mutex;
5434 }
5435
Eric Biggers9b02e492019-12-31 12:04:38 -06005436 /* Check whether the maximum file size would be exceeded */
5437 if (len > inode->i_sb->s_maxbytes - inode->i_size) {
Namjae Jeon331573f2015-06-09 01:55:03 -04005438 ret = -EFBIG;
5439 goto out_mutex;
5440 }
5441
Eric Biggers9b02e492019-12-31 12:04:38 -06005442 /* Offset must be less than i_size */
5443 if (offset >= inode->i_size) {
Namjae Jeon331573f2015-06-09 01:55:03 -04005444 ret = -EINVAL;
5445 goto out_mutex;
5446 }
5447
Namjae Jeon331573f2015-06-09 01:55:03 -04005448 /* Wait for existing dio to complete */
Namjae Jeon331573f2015-06-09 01:55:03 -04005449 inode_dio_wait(inode);
5450
Jan Karaea3d7202015-12-07 14:28:03 -05005451 /*
5452 * Prevent page faults from reinstantiating pages we have released from
5453 * page cache.
5454 */
Jan Karad4f52582021-02-04 18:05:42 +01005455 filemap_invalidate_lock(mapping);
Ross Zwisler430657b2018-07-29 17:00:22 -04005456
5457 ret = ext4_break_layouts(inode);
5458 if (ret)
5459 goto out_mmap;
5460
Jan Kara32ebffd2015-12-07 14:31:11 -05005461 /*
5462 * Need to round down to align start offset to page size boundary
5463 * for page size > block size.
5464 */
5465 ioffset = round_down(offset, PAGE_SIZE);
5466 /* Write out all dirty pages */
5467 ret = filemap_write_and_wait_range(inode->i_mapping, ioffset,
5468 LLONG_MAX);
5469 if (ret)
5470 goto out_mmap;
Jan Karaea3d7202015-12-07 14:28:03 -05005471 truncate_pagecache(inode, ioffset);
5472
Namjae Jeon331573f2015-06-09 01:55:03 -04005473 credits = ext4_writepage_trans_blocks(inode);
5474 handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
5475 if (IS_ERR(handle)) {
5476 ret = PTR_ERR(handle);
Jan Karaea3d7202015-12-07 14:28:03 -05005477 goto out_mmap;
Namjae Jeon331573f2015-06-09 01:55:03 -04005478 }
Harshad Shirwadkar7bbbe242021-12-23 12:21:38 -08005479 ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE);
Namjae Jeon331573f2015-06-09 01:55:03 -04005480
5481 /* Expand file to avoid data loss if there is error while shifting */
5482 inode->i_size += len;
5483 EXT4_I(inode)->i_disksize += len;
Deepa Dinamanieeca7ea2016-11-14 21:40:10 -05005484 inode->i_mtime = inode->i_ctime = current_time(inode);
Namjae Jeon331573f2015-06-09 01:55:03 -04005485 ret = ext4_mark_inode_dirty(handle, inode);
5486 if (ret)
5487 goto out_stop;
5488
5489 down_write(&EXT4_I(inode)->i_data_sem);
brookxu27bc4462020-08-17 15:36:15 +08005490 ext4_discard_preallocations(inode, 0);
Namjae Jeon331573f2015-06-09 01:55:03 -04005491
5492 path = ext4_find_extent(inode, offset_lblk, NULL, 0);
5493 if (IS_ERR(path)) {
5494 up_write(&EXT4_I(inode)->i_data_sem);
5495 goto out_stop;
5496 }
5497
5498 depth = ext_depth(inode);
5499 extent = path[depth].p_ext;
5500 if (extent) {
5501 ee_start_lblk = le32_to_cpu(extent->ee_block);
5502 ee_len = ext4_ext_get_actual_len(extent);
5503
5504 /*
5505 * If offset_lblk is not the starting block of extent, split
5506 * the extent @offset_lblk
5507 */
5508 if ((offset_lblk > ee_start_lblk) &&
5509 (offset_lblk < (ee_start_lblk + ee_len))) {
5510 if (ext4_ext_is_unwritten(extent))
5511 split_flag = EXT4_EXT_MARK_UNWRIT1 |
5512 EXT4_EXT_MARK_UNWRIT2;
5513 ret = ext4_split_extent_at(handle, inode, &path,
5514 offset_lblk, split_flag,
5515 EXT4_EX_NOCACHE |
5516 EXT4_GET_BLOCKS_PRE_IO |
5517 EXT4_GET_BLOCKS_METADATA_NOFAIL);
5518 }
5519
5520 ext4_ext_drop_refs(path);
5521 kfree(path);
5522 if (ret < 0) {
5523 up_write(&EXT4_I(inode)->i_data_sem);
5524 goto out_stop;
5525 }
Fabian Frederickedf15aa12016-09-15 11:39:52 -04005526 } else {
5527 ext4_ext_drop_refs(path);
5528 kfree(path);
Namjae Jeon331573f2015-06-09 01:55:03 -04005529 }
5530
5531 ret = ext4_es_remove_extent(inode, offset_lblk,
5532 EXT_MAX_BLOCKS - offset_lblk);
5533 if (ret) {
5534 up_write(&EXT4_I(inode)->i_data_sem);
5535 goto out_stop;
5536 }
5537
5538 /*
5539 * if offset_lblk lies in a hole which is at start of file, use
5540 * ee_start_lblk to shift extents
5541 */
5542 ret = ext4_ext_shift_extents(inode, handle,
5543 ee_start_lblk > offset_lblk ? ee_start_lblk : offset_lblk,
5544 len_lblk, SHIFT_RIGHT);
5545
5546 up_write(&EXT4_I(inode)->i_data_sem);
5547 if (IS_SYNC(inode))
5548 ext4_handle_sync(handle);
Jan Kara67a7d5f2017-05-29 13:24:55 -04005549 if (ret >= 0)
5550 ext4_update_inode_fsync_trans(handle, inode, 1);
Namjae Jeon331573f2015-06-09 01:55:03 -04005551
5552out_stop:
5553 ext4_journal_stop(handle);
Jan Karaea3d7202015-12-07 14:28:03 -05005554out_mmap:
Jan Karad4f52582021-02-04 18:05:42 +01005555 filemap_invalidate_unlock(mapping);
Namjae Jeon331573f2015-06-09 01:55:03 -04005556out_mutex:
Al Viro59551022016-01-22 15:40:57 -05005557 inode_unlock(inode);
Namjae Jeon331573f2015-06-09 01:55:03 -04005558 return ret;
5559}
5560
Dmitry Monakhovfcf6b1b72014-08-30 23:52:19 -04005561/**
Theodore Ts'oc60990b2019-06-19 16:30:03 -04005562 * ext4_swap_extents() - Swap extents between two inodes
5563 * @handle: handle for this transaction
Dmitry Monakhovfcf6b1b72014-08-30 23:52:19 -04005564 * @inode1: First inode
5565 * @inode2: Second inode
5566 * @lblk1: Start block for first inode
5567 * @lblk2: Start block for second inode
5568 * @count: Number of blocks to swap
zhenwei.pidcae0582018-03-26 01:44:03 -04005569 * @unwritten: Mark second inode's extents as unwritten after swap
Dmitry Monakhovfcf6b1b72014-08-30 23:52:19 -04005570 * @erp: Pointer to save error value
5571 *
5572 * This helper routine does exactly what is promise "swap extents". All other
5573 * stuff such as page-cache locking consistency, bh mapping consistency or
5574 * extent's data copying must be performed by caller.
5575 * Locking:
5576 * i_mutex is held for both inodes
5577 * i_data_sem is locked for write for both inodes
5578 * Assumptions:
5579 * All pages from requested range are locked for both inodes
5580 */
5581int
5582ext4_swap_extents(handle_t *handle, struct inode *inode1,
zhenwei.pidcae0582018-03-26 01:44:03 -04005583 struct inode *inode2, ext4_lblk_t lblk1, ext4_lblk_t lblk2,
Dmitry Monakhovfcf6b1b72014-08-30 23:52:19 -04005584 ext4_lblk_t count, int unwritten, int *erp)
5585{
5586 struct ext4_ext_path *path1 = NULL;
5587 struct ext4_ext_path *path2 = NULL;
5588 int replaced_count = 0;
5589
5590 BUG_ON(!rwsem_is_locked(&EXT4_I(inode1)->i_data_sem));
5591 BUG_ON(!rwsem_is_locked(&EXT4_I(inode2)->i_data_sem));
Al Viro59551022016-01-22 15:40:57 -05005592 BUG_ON(!inode_is_locked(inode1));
5593 BUG_ON(!inode_is_locked(inode2));
Dmitry Monakhovfcf6b1b72014-08-30 23:52:19 -04005594
5595 *erp = ext4_es_remove_extent(inode1, lblk1, count);
Theodore Ts'o19008f62014-08-31 15:03:14 -04005596 if (unlikely(*erp))
Dmitry Monakhovfcf6b1b72014-08-30 23:52:19 -04005597 return 0;
5598 *erp = ext4_es_remove_extent(inode2, lblk2, count);
Theodore Ts'o19008f62014-08-31 15:03:14 -04005599 if (unlikely(*erp))
Dmitry Monakhovfcf6b1b72014-08-30 23:52:19 -04005600 return 0;
5601
5602 while (count) {
5603 struct ext4_extent *ex1, *ex2, tmp_ex;
5604 ext4_lblk_t e1_blk, e2_blk;
5605 int e1_len, e2_len, len;
5606 int split = 0;
5607
Theodore Ts'oed8a1a72014-09-01 14:43:09 -04005608 path1 = ext4_find_extent(inode1, lblk1, NULL, EXT4_EX_NOCACHE);
Viresh Kumara1c83682015-08-12 15:59:44 +05305609 if (IS_ERR(path1)) {
Dmitry Monakhovfcf6b1b72014-08-30 23:52:19 -04005610 *erp = PTR_ERR(path1);
Theodore Ts'o19008f62014-08-31 15:03:14 -04005611 path1 = NULL;
5612 finish:
5613 count = 0;
5614 goto repeat;
Dmitry Monakhovfcf6b1b72014-08-30 23:52:19 -04005615 }
Theodore Ts'oed8a1a72014-09-01 14:43:09 -04005616 path2 = ext4_find_extent(inode2, lblk2, NULL, EXT4_EX_NOCACHE);
Viresh Kumara1c83682015-08-12 15:59:44 +05305617 if (IS_ERR(path2)) {
Dmitry Monakhovfcf6b1b72014-08-30 23:52:19 -04005618 *erp = PTR_ERR(path2);
Theodore Ts'o19008f62014-08-31 15:03:14 -04005619 path2 = NULL;
5620 goto finish;
Dmitry Monakhovfcf6b1b72014-08-30 23:52:19 -04005621 }
5622 ex1 = path1[path1->p_depth].p_ext;
5623 ex2 = path2[path2->p_depth].p_ext;
Keyur Patele4d7f2d2020-06-10 23:19:46 -04005624 /* Do we have something to swap ? */
Dmitry Monakhovfcf6b1b72014-08-30 23:52:19 -04005625 if (unlikely(!ex2 || !ex1))
Theodore Ts'o19008f62014-08-31 15:03:14 -04005626 goto finish;
Dmitry Monakhovfcf6b1b72014-08-30 23:52:19 -04005627
5628 e1_blk = le32_to_cpu(ex1->ee_block);
5629 e2_blk = le32_to_cpu(ex2->ee_block);
5630 e1_len = ext4_ext_get_actual_len(ex1);
5631 e2_len = ext4_ext_get_actual_len(ex2);
5632
5633 /* Hole handling */
5634 if (!in_range(lblk1, e1_blk, e1_len) ||
5635 !in_range(lblk2, e2_blk, e2_len)) {
5636 ext4_lblk_t next1, next2;
5637
5638 /* if hole after extent, then go to next extent */
5639 next1 = ext4_ext_next_allocated_block(path1);
5640 next2 = ext4_ext_next_allocated_block(path2);
5641 /* If hole before extent, then shift to that extent */
5642 if (e1_blk > lblk1)
5643 next1 = e1_blk;
5644 if (e2_blk > lblk2)
Maninder Singh4e562012017-08-06 01:33:07 -04005645 next2 = e2_blk;
Dmitry Monakhovfcf6b1b72014-08-30 23:52:19 -04005646 /* Do we have something to swap */
5647 if (next1 == EXT_MAX_BLOCKS || next2 == EXT_MAX_BLOCKS)
Theodore Ts'o19008f62014-08-31 15:03:14 -04005648 goto finish;
Dmitry Monakhovfcf6b1b72014-08-30 23:52:19 -04005649 /* Move to the rightest boundary */
5650 len = next1 - lblk1;
5651 if (len < next2 - lblk2)
5652 len = next2 - lblk2;
5653 if (len > count)
5654 len = count;
5655 lblk1 += len;
5656 lblk2 += len;
5657 count -= len;
5658 goto repeat;
5659 }
5660
5661 /* Prepare left boundary */
5662 if (e1_blk < lblk1) {
5663 split = 1;
5664 *erp = ext4_force_split_extent_at(handle, inode1,
Theodore Ts'odfe50802014-09-01 14:37:09 -04005665 &path1, lblk1, 0);
Theodore Ts'o19008f62014-08-31 15:03:14 -04005666 if (unlikely(*erp))
5667 goto finish;
Dmitry Monakhovfcf6b1b72014-08-30 23:52:19 -04005668 }
5669 if (e2_blk < lblk2) {
5670 split = 1;
5671 *erp = ext4_force_split_extent_at(handle, inode2,
Theodore Ts'odfe50802014-09-01 14:37:09 -04005672 &path2, lblk2, 0);
Theodore Ts'o19008f62014-08-31 15:03:14 -04005673 if (unlikely(*erp))
5674 goto finish;
Dmitry Monakhovfcf6b1b72014-08-30 23:52:19 -04005675 }
Theodore Ts'odfe50802014-09-01 14:37:09 -04005676 /* ext4_split_extent_at() may result in leaf extent split,
Dmitry Monakhovfcf6b1b72014-08-30 23:52:19 -04005677 * path must to be revalidated. */
5678 if (split)
5679 goto repeat;
5680
5681 /* Prepare right boundary */
5682 len = count;
5683 if (len > e1_blk + e1_len - lblk1)
5684 len = e1_blk + e1_len - lblk1;
5685 if (len > e2_blk + e2_len - lblk2)
5686 len = e2_blk + e2_len - lblk2;
5687
5688 if (len != e1_len) {
5689 split = 1;
5690 *erp = ext4_force_split_extent_at(handle, inode1,
Theodore Ts'odfe50802014-09-01 14:37:09 -04005691 &path1, lblk1 + len, 0);
Theodore Ts'o19008f62014-08-31 15:03:14 -04005692 if (unlikely(*erp))
5693 goto finish;
Dmitry Monakhovfcf6b1b72014-08-30 23:52:19 -04005694 }
5695 if (len != e2_len) {
5696 split = 1;
5697 *erp = ext4_force_split_extent_at(handle, inode2,
Theodore Ts'odfe50802014-09-01 14:37:09 -04005698 &path2, lblk2 + len, 0);
Dmitry Monakhovfcf6b1b72014-08-30 23:52:19 -04005699 if (*erp)
Theodore Ts'o19008f62014-08-31 15:03:14 -04005700 goto finish;
Dmitry Monakhovfcf6b1b72014-08-30 23:52:19 -04005701 }
Theodore Ts'odfe50802014-09-01 14:37:09 -04005702 /* ext4_split_extent_at() may result in leaf extent split,
Dmitry Monakhovfcf6b1b72014-08-30 23:52:19 -04005703 * path must to be revalidated. */
5704 if (split)
5705 goto repeat;
5706
5707 BUG_ON(e2_len != e1_len);
5708 *erp = ext4_ext_get_access(handle, inode1, path1 + path1->p_depth);
Theodore Ts'o19008f62014-08-31 15:03:14 -04005709 if (unlikely(*erp))
5710 goto finish;
Dmitry Monakhovfcf6b1b72014-08-30 23:52:19 -04005711 *erp = ext4_ext_get_access(handle, inode2, path2 + path2->p_depth);
Theodore Ts'o19008f62014-08-31 15:03:14 -04005712 if (unlikely(*erp))
5713 goto finish;
Dmitry Monakhovfcf6b1b72014-08-30 23:52:19 -04005714
5715 /* Both extents are fully inside boundaries. Swap it now */
5716 tmp_ex = *ex1;
5717 ext4_ext_store_pblock(ex1, ext4_ext_pblock(ex2));
5718 ext4_ext_store_pblock(ex2, ext4_ext_pblock(&tmp_ex));
5719 ex1->ee_len = cpu_to_le16(e2_len);
5720 ex2->ee_len = cpu_to_le16(e1_len);
5721 if (unwritten)
5722 ext4_ext_mark_unwritten(ex2);
5723 if (ext4_ext_is_unwritten(&tmp_ex))
5724 ext4_ext_mark_unwritten(ex1);
5725
5726 ext4_ext_try_to_merge(handle, inode2, path2, ex2);
5727 ext4_ext_try_to_merge(handle, inode1, path1, ex1);
5728 *erp = ext4_ext_dirty(handle, inode2, path2 +
5729 path2->p_depth);
Theodore Ts'o19008f62014-08-31 15:03:14 -04005730 if (unlikely(*erp))
5731 goto finish;
Dmitry Monakhovfcf6b1b72014-08-30 23:52:19 -04005732 *erp = ext4_ext_dirty(handle, inode1, path1 +
5733 path1->p_depth);
5734 /*
5735 * Looks scarry ah..? second inode already points to new blocks,
5736 * and it was successfully dirtied. But luckily error may happen
5737 * only due to journal error, so full transaction will be
5738 * aborted anyway.
5739 */
Theodore Ts'o19008f62014-08-31 15:03:14 -04005740 if (unlikely(*erp))
5741 goto finish;
Dmitry Monakhovfcf6b1b72014-08-30 23:52:19 -04005742 lblk1 += len;
5743 lblk2 += len;
5744 replaced_count += len;
5745 count -= len;
5746
5747 repeat:
Theodore Ts'ob7ea89a2014-09-01 14:39:09 -04005748 ext4_ext_drop_refs(path1);
5749 kfree(path1);
5750 ext4_ext_drop_refs(path2);
5751 kfree(path2);
5752 path1 = path2 = NULL;
Dmitry Monakhovfcf6b1b72014-08-30 23:52:19 -04005753 }
Dmitry Monakhovfcf6b1b72014-08-30 23:52:19 -04005754 return replaced_count;
5755}
Eric Whitney0b02f4c2018-10-01 14:19:37 -04005756
5757/*
5758 * ext4_clu_mapped - determine whether any block in a logical cluster has
5759 * been mapped to a physical cluster
5760 *
5761 * @inode - file containing the logical cluster
5762 * @lclu - logical cluster of interest
5763 *
5764 * Returns 1 if any block in the logical cluster is mapped, signifying
5765 * that a physical cluster has been allocated for it. Otherwise,
5766 * returns 0. Can also return negative error codes. Derived from
5767 * ext4_ext_map_blocks().
5768 */
5769int ext4_clu_mapped(struct inode *inode, ext4_lblk_t lclu)
5770{
5771 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
5772 struct ext4_ext_path *path;
5773 int depth, mapped = 0, err = 0;
5774 struct ext4_extent *extent;
5775 ext4_lblk_t first_lblk, first_lclu, last_lclu;
5776
5777 /* search for the extent closest to the first block in the cluster */
5778 path = ext4_find_extent(inode, EXT4_C2B(sbi, lclu), NULL, 0);
5779 if (IS_ERR(path)) {
5780 err = PTR_ERR(path);
5781 path = NULL;
5782 goto out;
5783 }
5784
5785 depth = ext_depth(inode);
5786
5787 /*
5788 * A consistent leaf must not be empty. This situation is possible,
5789 * though, _during_ tree modification, and it's why an assert can't
5790 * be put in ext4_find_extent().
5791 */
5792 if (unlikely(path[depth].p_ext == NULL && depth != 0)) {
5793 EXT4_ERROR_INODE(inode,
5794 "bad extent address - lblock: %lu, depth: %d, pblock: %lld",
5795 (unsigned long) EXT4_C2B(sbi, lclu),
5796 depth, path[depth].p_block);
5797 err = -EFSCORRUPTED;
5798 goto out;
5799 }
5800
5801 extent = path[depth].p_ext;
5802
5803 /* can't be mapped if the extent tree is empty */
5804 if (extent == NULL)
5805 goto out;
5806
5807 first_lblk = le32_to_cpu(extent->ee_block);
5808 first_lclu = EXT4_B2C(sbi, first_lblk);
5809
5810 /*
5811 * Three possible outcomes at this point - found extent spanning
5812 * the target cluster, to the left of the target cluster, or to the
5813 * right of the target cluster. The first two cases are handled here.
5814 * The last case indicates the target cluster is not mapped.
5815 */
5816 if (lclu >= first_lclu) {
5817 last_lclu = EXT4_B2C(sbi, first_lblk +
5818 ext4_ext_get_actual_len(extent) - 1);
5819 if (lclu <= last_lclu) {
5820 mapped = 1;
5821 } else {
5822 first_lblk = ext4_ext_next_allocated_block(path);
5823 first_lclu = EXT4_B2C(sbi, first_lblk);
5824 if (lclu == first_lclu)
5825 mapped = 1;
5826 }
5827 }
5828
5829out:
5830 ext4_ext_drop_refs(path);
5831 kfree(path);
5832
5833 return err ? err : mapped;
5834}
Harshad Shirwadkar8016e292020-10-15 13:37:59 -07005835
5836/*
5837 * Updates physical block address and unwritten status of extent
5838 * starting at lblk start and of len. If such an extent doesn't exist,
5839 * this function splits the extent tree appropriately to create an
5840 * extent like this. This function is called in the fast commit
5841 * replay path. Returns 0 on success and error on failure.
5842 */
5843int ext4_ext_replay_update_ex(struct inode *inode, ext4_lblk_t start,
5844 int len, int unwritten, ext4_fsblk_t pblk)
5845{
5846 struct ext4_ext_path *path = NULL, *ppath;
5847 struct ext4_extent *ex;
5848 int ret;
5849
5850 path = ext4_find_extent(inode, start, NULL, 0);
Dan Carpenterbc185462020-10-23 14:22:32 +03005851 if (IS_ERR(path))
5852 return PTR_ERR(path);
Harshad Shirwadkar8016e292020-10-15 13:37:59 -07005853 ex = path[path->p_depth].p_ext;
5854 if (!ex) {
5855 ret = -EFSCORRUPTED;
5856 goto out;
5857 }
5858
5859 if (le32_to_cpu(ex->ee_block) != start ||
5860 ext4_ext_get_actual_len(ex) != len) {
5861 /* We need to split this extent to match our extent first */
5862 ppath = path;
5863 down_write(&EXT4_I(inode)->i_data_sem);
5864 ret = ext4_force_split_extent_at(NULL, inode, &ppath, start, 1);
5865 up_write(&EXT4_I(inode)->i_data_sem);
5866 if (ret)
5867 goto out;
5868 kfree(path);
5869 path = ext4_find_extent(inode, start, NULL, 0);
5870 if (IS_ERR(path))
5871 return -1;
5872 ppath = path;
5873 ex = path[path->p_depth].p_ext;
5874 WARN_ON(le32_to_cpu(ex->ee_block) != start);
5875 if (ext4_ext_get_actual_len(ex) != len) {
5876 down_write(&EXT4_I(inode)->i_data_sem);
5877 ret = ext4_force_split_extent_at(NULL, inode, &ppath,
5878 start + len, 1);
5879 up_write(&EXT4_I(inode)->i_data_sem);
5880 if (ret)
5881 goto out;
5882 kfree(path);
5883 path = ext4_find_extent(inode, start, NULL, 0);
5884 if (IS_ERR(path))
5885 return -EINVAL;
5886 ex = path[path->p_depth].p_ext;
5887 }
5888 }
5889 if (unwritten)
5890 ext4_ext_mark_unwritten(ex);
5891 else
5892 ext4_ext_mark_initialized(ex);
5893 ext4_ext_store_pblock(ex, pblk);
5894 down_write(&EXT4_I(inode)->i_data_sem);
5895 ret = ext4_ext_dirty(NULL, inode, &path[path->p_depth]);
5896 up_write(&EXT4_I(inode)->i_data_sem);
5897out:
5898 ext4_ext_drop_refs(path);
5899 kfree(path);
5900 ext4_mark_inode_dirty(NULL, inode);
5901 return ret;
5902}
5903
5904/* Try to shrink the extent tree */
5905void ext4_ext_replay_shrink_inode(struct inode *inode, ext4_lblk_t end)
5906{
5907 struct ext4_ext_path *path = NULL;
5908 struct ext4_extent *ex;
5909 ext4_lblk_t old_cur, cur = 0;
5910
5911 while (cur < end) {
5912 path = ext4_find_extent(inode, cur, NULL, 0);
5913 if (IS_ERR(path))
5914 return;
5915 ex = path[path->p_depth].p_ext;
5916 if (!ex) {
5917 ext4_ext_drop_refs(path);
5918 kfree(path);
5919 ext4_mark_inode_dirty(NULL, inode);
5920 return;
5921 }
5922 old_cur = cur;
5923 cur = le32_to_cpu(ex->ee_block) + ext4_ext_get_actual_len(ex);
5924 if (cur <= old_cur)
5925 cur = old_cur + 1;
5926 ext4_ext_try_to_merge(NULL, inode, path, ex);
5927 down_write(&EXT4_I(inode)->i_data_sem);
5928 ext4_ext_dirty(NULL, inode, &path[path->p_depth]);
5929 up_write(&EXT4_I(inode)->i_data_sem);
5930 ext4_mark_inode_dirty(NULL, inode);
5931 ext4_ext_drop_refs(path);
5932 kfree(path);
5933 }
5934}
5935
5936/* Check if *cur is a hole and if it is, skip it */
Theodore Ts'o1fd95c02021-09-02 11:36:01 -04005937static int skip_hole(struct inode *inode, ext4_lblk_t *cur)
Harshad Shirwadkar8016e292020-10-15 13:37:59 -07005938{
5939 int ret;
5940 struct ext4_map_blocks map;
5941
5942 map.m_lblk = *cur;
5943 map.m_len = ((inode->i_size) >> inode->i_sb->s_blocksize_bits) - *cur;
5944
5945 ret = ext4_map_blocks(NULL, inode, &map, 0);
Theodore Ts'o1fd95c02021-09-02 11:36:01 -04005946 if (ret < 0)
5947 return ret;
Harshad Shirwadkar8016e292020-10-15 13:37:59 -07005948 if (ret != 0)
Theodore Ts'o1fd95c02021-09-02 11:36:01 -04005949 return 0;
Harshad Shirwadkar8016e292020-10-15 13:37:59 -07005950 *cur = *cur + map.m_len;
Theodore Ts'o1fd95c02021-09-02 11:36:01 -04005951 return 0;
Harshad Shirwadkar8016e292020-10-15 13:37:59 -07005952}
5953
5954/* Count number of blocks used by this inode and update i_blocks */
5955int ext4_ext_replay_set_iblocks(struct inode *inode)
5956{
5957 struct ext4_ext_path *path = NULL, *path2 = NULL;
5958 struct ext4_extent *ex;
5959 ext4_lblk_t cur = 0, end;
5960 int numblks = 0, i, ret = 0;
5961 ext4_fsblk_t cmp1, cmp2;
5962 struct ext4_map_blocks map;
5963
5964 /* Determin the size of the file first */
5965 path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL,
5966 EXT4_EX_NOCACHE);
5967 if (IS_ERR(path))
5968 return PTR_ERR(path);
5969 ex = path[path->p_depth].p_ext;
5970 if (!ex) {
5971 ext4_ext_drop_refs(path);
5972 kfree(path);
5973 goto out;
5974 }
5975 end = le32_to_cpu(ex->ee_block) + ext4_ext_get_actual_len(ex);
5976 ext4_ext_drop_refs(path);
5977 kfree(path);
5978
5979 /* Count the number of data blocks */
5980 cur = 0;
5981 while (cur < end) {
5982 map.m_lblk = cur;
5983 map.m_len = end - cur;
5984 ret = ext4_map_blocks(NULL, inode, &map, 0);
5985 if (ret < 0)
5986 break;
5987 if (ret > 0)
5988 numblks += ret;
5989 cur = cur + map.m_len;
5990 }
5991
5992 /*
5993 * Count the number of extent tree blocks. We do it by looking up
5994 * two successive extents and determining the difference between
5995 * their paths. When path is different for 2 successive extents
5996 * we compare the blocks in the path at each level and increment
5997 * iblocks by total number of differences found.
5998 */
5999 cur = 0;
Theodore Ts'o1fd95c02021-09-02 11:36:01 -04006000 ret = skip_hole(inode, &cur);
6001 if (ret < 0)
6002 goto out;
Harshad Shirwadkar8016e292020-10-15 13:37:59 -07006003 path = ext4_find_extent(inode, cur, NULL, 0);
6004 if (IS_ERR(path))
6005 goto out;
6006 numblks += path->p_depth;
6007 ext4_ext_drop_refs(path);
6008 kfree(path);
6009 while (cur < end) {
6010 path = ext4_find_extent(inode, cur, NULL, 0);
6011 if (IS_ERR(path))
6012 break;
6013 ex = path[path->p_depth].p_ext;
6014 if (!ex) {
6015 ext4_ext_drop_refs(path);
6016 kfree(path);
6017 return 0;
6018 }
6019 cur = max(cur + 1, le32_to_cpu(ex->ee_block) +
6020 ext4_ext_get_actual_len(ex));
Theodore Ts'o1fd95c02021-09-02 11:36:01 -04006021 ret = skip_hole(inode, &cur);
6022 if (ret < 0) {
6023 ext4_ext_drop_refs(path);
6024 kfree(path);
6025 break;
6026 }
Harshad Shirwadkar8016e292020-10-15 13:37:59 -07006027 path2 = ext4_find_extent(inode, cur, NULL, 0);
6028 if (IS_ERR(path2)) {
6029 ext4_ext_drop_refs(path);
6030 kfree(path);
6031 break;
6032 }
Harshad Shirwadkar8016e292020-10-15 13:37:59 -07006033 for (i = 0; i <= max(path->p_depth, path2->p_depth); i++) {
6034 cmp1 = cmp2 = 0;
6035 if (i <= path->p_depth)
6036 cmp1 = path[i].p_bh ?
6037 path[i].p_bh->b_blocknr : 0;
6038 if (i <= path2->p_depth)
6039 cmp2 = path2[i].p_bh ?
6040 path2[i].p_bh->b_blocknr : 0;
6041 if (cmp1 != cmp2 && cmp2 != 0)
6042 numblks++;
6043 }
6044 ext4_ext_drop_refs(path);
6045 ext4_ext_drop_refs(path2);
6046 kfree(path);
6047 kfree(path2);
6048 }
6049
6050out:
6051 inode->i_blocks = numblks << (inode->i_sb->s_blocksize_bits - 9);
6052 ext4_mark_inode_dirty(NULL, inode);
6053 return 0;
6054}
6055
6056int ext4_ext_clear_bb(struct inode *inode)
6057{
6058 struct ext4_ext_path *path = NULL;
6059 struct ext4_extent *ex;
6060 ext4_lblk_t cur = 0, end;
6061 int j, ret = 0;
6062 struct ext4_map_blocks map;
6063
Harshad Shirwadkar1ebf2172021-10-15 11:25:13 -07006064 if (ext4_test_inode_flag(inode, EXT4_INODE_INLINE_DATA))
6065 return 0;
6066
Harshad Shirwadkar8016e292020-10-15 13:37:59 -07006067 /* Determin the size of the file first */
6068 path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL,
6069 EXT4_EX_NOCACHE);
6070 if (IS_ERR(path))
6071 return PTR_ERR(path);
6072 ex = path[path->p_depth].p_ext;
6073 if (!ex) {
6074 ext4_ext_drop_refs(path);
6075 kfree(path);
6076 return 0;
6077 }
6078 end = le32_to_cpu(ex->ee_block) + ext4_ext_get_actual_len(ex);
6079 ext4_ext_drop_refs(path);
6080 kfree(path);
6081
6082 cur = 0;
6083 while (cur < end) {
6084 map.m_lblk = cur;
6085 map.m_len = end - cur;
6086 ret = ext4_map_blocks(NULL, inode, &map, 0);
6087 if (ret < 0)
6088 break;
6089 if (ret > 0) {
6090 path = ext4_find_extent(inode, map.m_lblk, NULL, 0);
6091 if (!IS_ERR_OR_NULL(path)) {
6092 for (j = 0; j < path->p_depth; j++) {
6093
6094 ext4_mb_mark_bb(inode->i_sb,
6095 path[j].p_block, 1, 0);
6096 }
6097 ext4_ext_drop_refs(path);
6098 kfree(path);
6099 }
6100 ext4_mb_mark_bb(inode->i_sb, map.m_pblk, map.m_len, 0);
6101 }
6102 cur = cur + map.m_len;
6103 }
6104
6105 return 0;
6106}