Merge branch 'for-4.14' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs updates from David Sterba:
"The changes range through all types: cleanups, core chagnes, sanity
checks, fixes, other user visible changes, detailed list below:
- deprecated: user transaction ioctl
- mount option ssd does not change allocation alignments
- degraded read-write mount is allowed if all the raid profile
constraints are met, now based on more accurate check
- defrag: do not reset compression afterwards; the NOCOMPRESS flag
can be now overriden by defrag
- prep work for better extent reference tracking (related to the
qgroup slowness with balance)
- prep work for compression heuristics
- memory allocation reductions (may help latencies on a loaded
system)
- better accounting for io waiting states
- error handling improvements (removed BUGs)
- added more sanity checks for shared refs
- fix readdir vs pagefault deadlock under some circumstances
- fix for 'no-hole' mode, certain combination of compressed and
inline extents
- send: fix emission of invalid clone operations
- fixup file mode if setting acls fail
- more fixes from fuzzing
- oher cleanups"
* 'for-4.14' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (104 commits)
btrfs: submit superblock io with REQ_META and REQ_PRIO
btrfs: remove unnecessary memory barrier in btrfs_direct_IO
btrfs: remove superfluous chunk_tree argument from btrfs_alloc_dev_extent
btrfs: Remove chunk_objectid parameter of btrfs_alloc_dev_extent
btrfs: pass fs_info to btrfs_del_root instead of tree_root
Btrfs: add one more sanity check for shared ref type
Btrfs: remove BUG_ON in __add_tree_block
Btrfs: remove BUG() in add_data_reference
Btrfs: remove BUG() in print_extent_item
Btrfs: remove BUG() in btrfs_extent_inline_ref_size
Btrfs: convert to use btrfs_get_extent_inline_ref_type
Btrfs: add a helper to retrive extent inline ref type
btrfs: scrub: simplify scrub worker initialization
btrfs: scrub: clean up division in scrub_find_csum
btrfs: scrub: clean up division in __scrub_mark_bitmap
btrfs: scrub: use bool for flush_all_writes
btrfs: preserve i_mode if __btrfs_set_acl() fails
btrfs: Remove extraneous chunk_objectid variable
btrfs: Remove chunk_objectid argument from btrfs_make_block_group
btrfs: Remove extra parentheses from condition in copy_items()
...
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 24bcd5c..17ad018 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -392,20 +392,23 @@ static noinline int add_async_extent(struct async_cow *cow,
return 0;
}
-static inline int inode_need_compress(struct inode *inode)
+static inline int inode_need_compress(struct inode *inode, u64 start, u64 end)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
/* force compress */
if (btrfs_test_opt(fs_info, FORCE_COMPRESS))
return 1;
+ /* defrag ioctl */
+ if (BTRFS_I(inode)->defrag_compress)
+ return 1;
/* bad compression ratios */
if (BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS)
return 0;
if (btrfs_test_opt(fs_info, COMPRESS) ||
BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS ||
- BTRFS_I(inode)->force_compress)
- return 1;
+ BTRFS_I(inode)->prop_compress)
+ return btrfs_compress_heuristic(inode, start, end);
return 0;
}
@@ -503,7 +506,7 @@ static noinline void compress_file_range(struct inode *inode,
* inode has not been flagged as nocompress. This flag can
* change at any time if we discover bad compression ratios.
*/
- if (inode_need_compress(inode)) {
+ if (inode_need_compress(inode, start, end)) {
WARN_ON(pages);
pages = kcalloc(nr_pages, sizeof(struct page *), GFP_NOFS);
if (!pages) {
@@ -511,8 +514,10 @@ static noinline void compress_file_range(struct inode *inode,
goto cont;
}
- if (BTRFS_I(inode)->force_compress)
- compress_type = BTRFS_I(inode)->force_compress;
+ if (BTRFS_I(inode)->defrag_compress)
+ compress_type = BTRFS_I(inode)->defrag_compress;
+ else if (BTRFS_I(inode)->prop_compress)
+ compress_type = BTRFS_I(inode)->prop_compress;
/*
* we need to call clear_page_dirty_for_io on each
@@ -645,7 +650,7 @@ static noinline void compress_file_range(struct inode *inode,
/* flag the file so we don't compress in the future */
if (!btrfs_test_opt(fs_info, FORCE_COMPRESS) &&
- !(BTRFS_I(inode)->force_compress)) {
+ !(BTRFS_I(inode)->prop_compress)) {
BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS;
}
}
@@ -1381,7 +1386,7 @@ static noinline int run_delalloc_nocow(struct inode *inode,
* we fall into common COW way.
*/
if (!nolock) {
- err = btrfs_start_write_no_snapshoting(root);
+ err = btrfs_start_write_no_snapshotting(root);
if (!err)
goto out_check;
}
@@ -1393,12 +1398,12 @@ static noinline int run_delalloc_nocow(struct inode *inode,
if (csum_exist_in_range(fs_info, disk_bytenr,
num_bytes)) {
if (!nolock)
- btrfs_end_write_no_snapshoting(root);
+ btrfs_end_write_no_snapshotting(root);
goto out_check;
}
if (!btrfs_inc_nocow_writers(fs_info, disk_bytenr)) {
if (!nolock)
- btrfs_end_write_no_snapshoting(root);
+ btrfs_end_write_no_snapshotting(root);
goto out_check;
}
nocow = 1;
@@ -1415,7 +1420,7 @@ static noinline int run_delalloc_nocow(struct inode *inode,
if (extent_end <= start) {
path->slots[0]++;
if (!nolock && nocow)
- btrfs_end_write_no_snapshoting(root);
+ btrfs_end_write_no_snapshotting(root);
if (nocow)
btrfs_dec_nocow_writers(fs_info, disk_bytenr);
goto next_slot;
@@ -1438,7 +1443,7 @@ static noinline int run_delalloc_nocow(struct inode *inode,
NULL);
if (ret) {
if (!nolock && nocow)
- btrfs_end_write_no_snapshoting(root);
+ btrfs_end_write_no_snapshotting(root);
if (nocow)
btrfs_dec_nocow_writers(fs_info,
disk_bytenr);
@@ -1459,7 +1464,7 @@ static noinline int run_delalloc_nocow(struct inode *inode,
BTRFS_ORDERED_PREALLOC);
if (IS_ERR(em)) {
if (!nolock && nocow)
- btrfs_end_write_no_snapshoting(root);
+ btrfs_end_write_no_snapshotting(root);
if (nocow)
btrfs_dec_nocow_writers(fs_info,
disk_bytenr);
@@ -1499,7 +1504,7 @@ static noinline int run_delalloc_nocow(struct inode *inode,
PAGE_UNLOCK | PAGE_SET_PRIVATE2);
if (!nolock && nocow)
- btrfs_end_write_no_snapshoting(root);
+ btrfs_end_write_no_snapshotting(root);
cur_offset = extent_end;
/*
@@ -1576,7 +1581,7 @@ static int run_delalloc_range(void *private_data, struct page *locked_page,
} else if (BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC && !force_cow) {
ret = run_delalloc_nocow(inode, locked_page, start, end,
page_started, 0, nr_written);
- } else if (!inode_need_compress(inode)) {
+ } else if (!inode_need_compress(inode, start, end)) {
ret = cow_file_range(inode, locked_page, start, end, end,
page_started, nr_written, 1, NULL);
} else {
@@ -1796,10 +1801,11 @@ static void btrfs_clear_bit_hook(void *private_data,
u64 len = state->end + 1 - state->start;
u32 num_extents = count_max_extents(len);
- spin_lock(&inode->lock);
- if ((state->state & EXTENT_DEFRAG) && (*bits & EXTENT_DEFRAG))
+ if ((state->state & EXTENT_DEFRAG) && (*bits & EXTENT_DEFRAG)) {
+ spin_lock(&inode->lock);
inode->defrag_bytes -= len;
- spin_unlock(&inode->lock);
+ spin_unlock(&inode->lock);
+ }
/*
* set_bit and clear bit hooks normally require _irqsave/restore
@@ -3159,8 +3165,6 @@ static int __readpage_endio_check(struct inode *inode,
memset(kaddr + pgoff, 1, len);
flush_dcache_page(page);
kunmap_atomic(kaddr);
- if (csum_expected == 0)
- return 0;
return -EIO;
}
@@ -5055,7 +5059,7 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
if (newsize > oldsize) {
/*
- * Don't do an expanding truncate while snapshoting is ongoing.
+ * Don't do an expanding truncate while snapshotting is ongoing.
* This is to ensure the snapshot captures a fully consistent
* state of this file - if the snapshot captures this expanding
* truncation, it must capture all writes that happened before
@@ -5064,13 +5068,13 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
btrfs_wait_for_snapshot_creation(root);
ret = btrfs_cont_expand(inode, oldsize, newsize);
if (ret) {
- btrfs_end_write_no_snapshoting(root);
+ btrfs_end_write_no_snapshotting(root);
return ret;
}
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) {
- btrfs_end_write_no_snapshoting(root);
+ btrfs_end_write_no_snapshotting(root);
return PTR_ERR(trans);
}
@@ -5078,7 +5082,7 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
btrfs_ordered_update_i_size(inode, i_size_read(inode), NULL);
pagecache_isize_extended(inode, oldsize, newsize);
ret = btrfs_update_inode(trans, root, inode);
- btrfs_end_write_no_snapshoting(root);
+ btrfs_end_write_no_snapshotting(root);
btrfs_end_transaction(trans);
} else {
@@ -5873,25 +5877,74 @@ unsigned char btrfs_filetype_table[] = {
DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
};
+/*
+ * All this infrastructure exists because dir_emit can fault, and we are holding
+ * the tree lock when doing readdir. For now just allocate a buffer and copy
+ * our information into that, and then dir_emit from the buffer. This is
+ * similar to what NFS does, only we don't keep the buffer around in pagecache
+ * because I'm afraid I'll mess that up. Long term we need to make filldir do
+ * copy_to_user_inatomic so we don't have to worry about page faulting under the
+ * tree lock.
+ */
+static int btrfs_opendir(struct inode *inode, struct file *file)
+{
+ struct btrfs_file_private *private;
+
+ private = kzalloc(sizeof(struct btrfs_file_private), GFP_KERNEL);
+ if (!private)
+ return -ENOMEM;
+ private->filldir_buf = kzalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!private->filldir_buf) {
+ kfree(private);
+ return -ENOMEM;
+ }
+ file->private_data = private;
+ return 0;
+}
+
+struct dir_entry {
+ u64 ino;
+ u64 offset;
+ unsigned type;
+ int name_len;
+};
+
+static int btrfs_filldir(void *addr, int entries, struct dir_context *ctx)
+{
+ while (entries--) {
+ struct dir_entry *entry = addr;
+ char *name = (char *)(entry + 1);
+
+ ctx->pos = entry->offset;
+ if (!dir_emit(ctx, name, entry->name_len, entry->ino,
+ entry->type))
+ return 1;
+ addr += sizeof(struct dir_entry) + entry->name_len;
+ ctx->pos++;
+ }
+ return 0;
+}
+
static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
{
struct inode *inode = file_inode(file);
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct btrfs_file_private *private = file->private_data;
struct btrfs_dir_item *di;
struct btrfs_key key;
struct btrfs_key found_key;
struct btrfs_path *path;
+ void *addr;
struct list_head ins_list;
struct list_head del_list;
int ret;
struct extent_buffer *leaf;
int slot;
- unsigned char d_type;
- int over = 0;
- char tmp_name[32];
char *name_ptr;
int name_len;
+ int entries = 0;
+ int total_len = 0;
bool put = false;
struct btrfs_key location;
@@ -5902,12 +5955,14 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
if (!path)
return -ENOMEM;
+ addr = private->filldir_buf;
path->reada = READA_FORWARD;
INIT_LIST_HEAD(&ins_list);
INIT_LIST_HEAD(&del_list);
put = btrfs_readdir_get_delayed_items(inode, &ins_list, &del_list);
+again:
key.type = BTRFS_DIR_INDEX_KEY;
key.offset = ctx->pos;
key.objectid = btrfs_ino(BTRFS_I(inode));
@@ -5917,6 +5972,8 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
goto err;
while (1) {
+ struct dir_entry *entry;
+
leaf = path->nodes[0];
slot = path->slots[0];
if (slot >= btrfs_header_nritems(leaf)) {
@@ -5938,41 +5995,43 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
goto next;
if (btrfs_should_delete_dir_index(&del_list, found_key.offset))
goto next;
-
- ctx->pos = found_key.offset;
-
di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
if (verify_dir_item(fs_info, leaf, slot, di))
goto next;
name_len = btrfs_dir_name_len(leaf, di);
- if (name_len <= sizeof(tmp_name)) {
- name_ptr = tmp_name;
- } else {
- name_ptr = kmalloc(name_len, GFP_KERNEL);
- if (!name_ptr) {
- ret = -ENOMEM;
- goto err;
- }
+ if ((total_len + sizeof(struct dir_entry) + name_len) >=
+ PAGE_SIZE) {
+ btrfs_release_path(path);
+ ret = btrfs_filldir(private->filldir_buf, entries, ctx);
+ if (ret)
+ goto nopos;
+ addr = private->filldir_buf;
+ entries = 0;
+ total_len = 0;
+ goto again;
}
+
+ entry = addr;
+ entry->name_len = name_len;
+ name_ptr = (char *)(entry + 1);
read_extent_buffer(leaf, name_ptr, (unsigned long)(di + 1),
name_len);
-
- d_type = btrfs_filetype_table[btrfs_dir_type(leaf, di)];
+ entry->type = btrfs_filetype_table[btrfs_dir_type(leaf, di)];
btrfs_dir_item_key_to_cpu(leaf, di, &location);
-
- over = !dir_emit(ctx, name_ptr, name_len, location.objectid,
- d_type);
-
- if (name_ptr != tmp_name)
- kfree(name_ptr);
-
- if (over)
- goto nopos;
- ctx->pos++;
+ entry->ino = location.objectid;
+ entry->offset = found_key.offset;
+ entries++;
+ addr += sizeof(struct dir_entry) + name_len;
+ total_len += sizeof(struct dir_entry) + name_len;
next:
path->slots[0]++;
}
+ btrfs_release_path(path);
+
+ ret = btrfs_filldir(private->filldir_buf, entries, ctx);
+ if (ret)
+ goto nopos;
ret = btrfs_readdir_delayed_dir_index(ctx, &ins_list);
if (ret)
@@ -6185,6 +6244,37 @@ static int btrfs_insert_inode_locked(struct inode *inode)
btrfs_find_actor, &args);
}
+/*
+ * Inherit flags from the parent inode.
+ *
+ * Currently only the compression flags and the cow flags are inherited.
+ */
+static void btrfs_inherit_iflags(struct inode *inode, struct inode *dir)
+{
+ unsigned int flags;
+
+ if (!dir)
+ return;
+
+ flags = BTRFS_I(dir)->flags;
+
+ if (flags & BTRFS_INODE_NOCOMPRESS) {
+ BTRFS_I(inode)->flags &= ~BTRFS_INODE_COMPRESS;
+ BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS;
+ } else if (flags & BTRFS_INODE_COMPRESS) {
+ BTRFS_I(inode)->flags &= ~BTRFS_INODE_NOCOMPRESS;
+ BTRFS_I(inode)->flags |= BTRFS_INODE_COMPRESS;
+ }
+
+ if (flags & BTRFS_INODE_NODATACOW) {
+ BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW;
+ if (S_ISREG(inode->i_mode))
+ BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM;
+ }
+
+ btrfs_update_iflags(inode);
+}
+
static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct inode *dir,
@@ -7991,7 +8081,7 @@ static blk_status_t dio_read_error(struct inode *inode, struct bio *failed_bio,
struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
struct bio *bio;
int isector;
- int read_mode = 0;
+ unsigned int read_mode = 0;
int segs;
int ret;
blk_status_t status;
@@ -8021,7 +8111,7 @@ static blk_status_t dio_read_error(struct inode *inode, struct bio *failed_bio,
bio_set_op_attrs(bio, REQ_OP_READ, read_mode);
btrfs_debug(BTRFS_I(inode)->root->fs_info,
- "Repair DIO Read Error: submitting new dio read[%#x] to this_mirror=%d, in_validation=%d\n",
+ "repair DIO read error: submitting new dio read[%#x] to this_mirror=%d, in_validation=%d",
read_mode, failrec->this_mirror, failrec->in_validation);
status = submit_dio_repair_bio(inode, bio, failrec->this_mirror);
@@ -8106,7 +8196,7 @@ static blk_status_t __btrfs_correct_data_nocsum(struct inode *inode,
goto next;
}
- wait_for_completion(&done.done);
+ wait_for_completion_io(&done.done);
if (!done.uptodate) {
/* We might have another mirror, so try again */
@@ -8221,7 +8311,7 @@ static blk_status_t __btrfs_subio_endio_read(struct inode *inode,
goto next;
}
- wait_for_completion(&done.done);
+ wait_for_completion_io(&done.done);
if (!done.uptodate) {
/* We might have another mirror, so try again */
@@ -8428,7 +8518,7 @@ static inline blk_status_t btrfs_lookup_and_bind_dio_csum(struct inode *inode,
static inline blk_status_t
__btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, u64 file_offset,
- int skip_sum, int async_submit)
+ int async_submit)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_dio_private *dip = bio->bi_private;
@@ -8446,7 +8536,7 @@ __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, u64 file_offset,
goto err;
}
- if (skip_sum)
+ if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)
goto map;
if (write && async_submit) {
@@ -8476,8 +8566,7 @@ __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, u64 file_offset,
return ret;
}
-static int btrfs_submit_direct_hook(struct btrfs_dio_private *dip,
- int skip_sum)
+static int btrfs_submit_direct_hook(struct btrfs_dio_private *dip)
{
struct inode *inode = dip->inode;
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
@@ -8541,7 +8630,7 @@ static int btrfs_submit_direct_hook(struct btrfs_dio_private *dip,
*/
atomic_inc(&dip->pending_bios);
- status = __btrfs_submit_dio_bio(bio, inode, file_offset, skip_sum,
+ status = __btrfs_submit_dio_bio(bio, inode, file_offset,
async_submit);
if (status) {
bio_put(bio);
@@ -8561,8 +8650,7 @@ static int btrfs_submit_direct_hook(struct btrfs_dio_private *dip,
} while (submit_len > 0);
submit:
- status = __btrfs_submit_dio_bio(bio, inode, file_offset, skip_sum,
- async_submit);
+ status = __btrfs_submit_dio_bio(bio, inode, file_offset, async_submit);
if (!status)
return 0;
@@ -8587,12 +8675,9 @@ static void btrfs_submit_direct(struct bio *dio_bio, struct inode *inode,
struct btrfs_dio_private *dip = NULL;
struct bio *bio = NULL;
struct btrfs_io_bio *io_bio;
- int skip_sum;
bool write = (bio_op(dio_bio) == REQ_OP_WRITE);
int ret = 0;
- skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
-
bio = btrfs_bio_clone(dio_bio);
dip = kzalloc(sizeof(*dip), GFP_NOFS);
@@ -8635,7 +8720,7 @@ static void btrfs_submit_direct(struct bio *dio_bio, struct inode *inode,
dio_data->unsubmitted_oe_range_end;
}
- ret = btrfs_submit_direct_hook(dip, skip_sum);
+ ret = btrfs_submit_direct_hook(dip);
if (!ret)
return;
@@ -8735,7 +8820,6 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
return 0;
inode_dio_begin(inode);
- smp_mb__after_atomic();
/*
* The generic stuff only does filemap_write_and_wait_range, which
@@ -9408,7 +9492,8 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
ei->reserved_extents = 0;
ei->runtime_flags = 0;
- ei->force_compress = BTRFS_COMPRESS_NONE;
+ ei->prop_compress = BTRFS_COMPRESS_NONE;
+ ei->defrag_compress = BTRFS_COMPRESS_NONE;
ei->delayed_node = NULL;
@@ -10748,6 +10833,7 @@ static const struct file_operations btrfs_dir_file_operations = {
.llseek = generic_file_llseek,
.read = generic_read_dir,
.iterate_shared = btrfs_real_readdir,
+ .open = btrfs_opendir,
.unlocked_ioctl = btrfs_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = btrfs_compat_ioctl,