erofs: add multiple device support
In order to support multi-layer container images, add multiple
device feature to EROFS. Two ways are available to use for now:
- Devices can be mapped into 32-bit global block address space;
- Device ID can be specified with the chunk indexes format.
Note that it assumes no extent would cross device boundary and mkfs
should take care of it seriously.
In the future, a dedicated device manager could be introduced then
thus extra devices can be automatically scanned by UUID as well.
Link: https://lore.kernel.org/r/20211014081010.43485-1-hsiangkao@linux.alibaba.com
Reviewed-by: Chao Yu <chao@kernel.org>
Reviewed-by: Liu Bo <bo.liu@linux.alibaba.com>
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
diff --git a/fs/erofs/super.c b/fs/erofs/super.c
index 25f6b8b..2cfe1ce 100644
--- a/fs/erofs/super.c
+++ b/fs/erofs/super.c
@@ -252,6 +252,79 @@ static int erofs_load_compr_cfgs(struct super_block *sb,
}
#endif
+static int erofs_init_devices(struct super_block *sb,
+ struct erofs_super_block *dsb)
+{
+ struct erofs_sb_info *sbi = EROFS_SB(sb);
+ unsigned int ondisk_extradevs;
+ erofs_off_t pos;
+ struct page *page = NULL;
+ struct erofs_device_info *dif;
+ struct erofs_deviceslot *dis;
+ void *ptr;
+ int id, err = 0;
+
+ sbi->total_blocks = sbi->primarydevice_blocks;
+ if (!erofs_sb_has_device_table(sbi))
+ ondisk_extradevs = 0;
+ else
+ ondisk_extradevs = le16_to_cpu(dsb->extra_devices);
+
+ if (ondisk_extradevs != sbi->devs->extra_devices) {
+ erofs_err(sb, "extra devices don't match (ondisk %u, given %u)",
+ ondisk_extradevs, sbi->devs->extra_devices);
+ return -EINVAL;
+ }
+ if (!ondisk_extradevs)
+ return 0;
+
+ sbi->device_id_mask = roundup_pow_of_two(ondisk_extradevs + 1) - 1;
+ pos = le16_to_cpu(dsb->devt_slotoff) * EROFS_DEVT_SLOT_SIZE;
+ down_read(&sbi->devs->rwsem);
+ idr_for_each_entry(&sbi->devs->tree, dif, id) {
+ erofs_blk_t blk = erofs_blknr(pos);
+ struct block_device *bdev;
+
+ if (!page || page->index != blk) {
+ if (page) {
+ kunmap(page);
+ unlock_page(page);
+ put_page(page);
+ }
+
+ page = erofs_get_meta_page(sb, blk);
+ if (IS_ERR(page)) {
+ up_read(&sbi->devs->rwsem);
+ return PTR_ERR(page);
+ }
+ ptr = kmap(page);
+ }
+ dis = ptr + erofs_blkoff(pos);
+
+ bdev = blkdev_get_by_path(dif->path,
+ FMODE_READ | FMODE_EXCL,
+ sb->s_type);
+ if (IS_ERR(bdev)) {
+ err = PTR_ERR(bdev);
+ goto err_out;
+ }
+ dif->bdev = bdev;
+ dif->dax_dev = fs_dax_get_by_bdev(bdev);
+ dif->blocks = le32_to_cpu(dis->blocks);
+ dif->mapped_blkaddr = le32_to_cpu(dis->mapped_blkaddr);
+ sbi->total_blocks += dif->blocks;
+ pos += EROFS_DEVT_SLOT_SIZE;
+ }
+err_out:
+ up_read(&sbi->devs->rwsem);
+ if (page) {
+ kunmap(page);
+ unlock_page(page);
+ put_page(page);
+ }
+ return err;
+}
+
static int erofs_read_superblock(struct super_block *sb)
{
struct erofs_sb_info *sbi;
@@ -303,7 +376,7 @@ static int erofs_read_superblock(struct super_block *sb)
sbi->sb_size);
goto out;
}
- sbi->blocks = le32_to_cpu(dsb->blocks);
+ sbi->primarydevice_blocks = le32_to_cpu(dsb->blocks);
sbi->meta_blkaddr = le32_to_cpu(dsb->meta_blkaddr);
#ifdef CONFIG_EROFS_FS_XATTR
sbi->xattr_blkaddr = le32_to_cpu(dsb->xattr_blkaddr);
@@ -330,6 +403,11 @@ static int erofs_read_superblock(struct super_block *sb)
ret = erofs_load_compr_cfgs(sb, dsb);
else
ret = z_erofs_load_lz4_config(sb, dsb, NULL, 0);
+ if (ret < 0)
+ goto out;
+
+ /* handle multiple devices */
+ ret = erofs_init_devices(sb, dsb);
out:
kunmap(page);
put_page(page);
@@ -358,6 +436,7 @@ enum {
Opt_cache_strategy,
Opt_dax,
Opt_dax_enum,
+ Opt_device,
Opt_err
};
@@ -381,6 +460,7 @@ static const struct fs_parameter_spec erofs_fs_parameters[] = {
erofs_param_cache_strategy),
fsparam_flag("dax", Opt_dax),
fsparam_enum("dax", Opt_dax_enum, erofs_dax_param_enums),
+ fsparam_string("device", Opt_device),
{}
};
@@ -412,9 +492,10 @@ static bool erofs_fc_set_dax_mode(struct fs_context *fc, unsigned int mode)
static int erofs_fc_parse_param(struct fs_context *fc,
struct fs_parameter *param)
{
- struct erofs_fs_context *ctx __maybe_unused = fc->fs_private;
+ struct erofs_fs_context *ctx = fc->fs_private;
struct fs_parse_result result;
- int opt;
+ struct erofs_device_info *dif;
+ int opt, ret;
opt = fs_parse(fc, erofs_fs_parameters, param, &result);
if (opt < 0)
@@ -456,6 +537,25 @@ static int erofs_fc_parse_param(struct fs_context *fc,
if (!erofs_fc_set_dax_mode(fc, result.uint_32))
return -EINVAL;
break;
+ case Opt_device:
+ dif = kzalloc(sizeof(*dif), GFP_KERNEL);
+ if (!dif)
+ return -ENOMEM;
+ dif->path = kstrdup(param->string, GFP_KERNEL);
+ if (!dif->path) {
+ kfree(dif);
+ return -ENOMEM;
+ }
+ down_write(&ctx->devs->rwsem);
+ ret = idr_alloc(&ctx->devs->tree, dif, 0, 0, GFP_KERNEL);
+ up_write(&ctx->devs->rwsem);
+ if (ret < 0) {
+ kfree(dif->path);
+ kfree(dif);
+ return ret;
+ }
+ ++ctx->devs->extra_devices;
+ break;
default:
return -ENOPARAM;
}
@@ -542,6 +642,9 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc)
sb->s_fs_info = sbi;
sbi->opt = ctx->opt;
sbi->dax_dev = fs_dax_get_by_bdev(sb->s_bdev);
+ sbi->devs = ctx->devs;
+ ctx->devs = NULL;
+
err = erofs_read_superblock(sb);
if (err)
return err;
@@ -617,9 +720,33 @@ static int erofs_fc_reconfigure(struct fs_context *fc)
return 0;
}
+static int erofs_release_device_info(int id, void *ptr, void *data)
+{
+ struct erofs_device_info *dif = ptr;
+
+ fs_put_dax(dif->dax_dev);
+ if (dif->bdev)
+ blkdev_put(dif->bdev, FMODE_READ | FMODE_EXCL);
+ kfree(dif->path);
+ kfree(dif);
+ return 0;
+}
+
+static void erofs_free_dev_context(struct erofs_dev_context *devs)
+{
+ if (!devs)
+ return;
+ idr_for_each(&devs->tree, &erofs_release_device_info, NULL);
+ idr_destroy(&devs->tree);
+ kfree(devs);
+}
+
static void erofs_fc_free(struct fs_context *fc)
{
- kfree(fc->fs_private);
+ struct erofs_fs_context *ctx = fc->fs_private;
+
+ erofs_free_dev_context(ctx->devs);
+ kfree(ctx);
}
static const struct fs_context_operations erofs_context_ops = {
@@ -631,13 +758,20 @@ static const struct fs_context_operations erofs_context_ops = {
static int erofs_init_fs_context(struct fs_context *fc)
{
- fc->fs_private = kzalloc(sizeof(struct erofs_fs_context), GFP_KERNEL);
- if (!fc->fs_private)
+ struct erofs_fs_context *ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+
+ if (!ctx)
return -ENOMEM;
+ ctx->devs = kzalloc(sizeof(struct erofs_dev_context), GFP_KERNEL);
+ if (!ctx->devs) {
+ kfree(ctx);
+ return -ENOMEM;
+ }
+ fc->fs_private = ctx;
- /* set default mount options */
- erofs_default_options(fc->fs_private);
-
+ idr_init(&ctx->devs->tree);
+ init_rwsem(&ctx->devs->rwsem);
+ erofs_default_options(ctx);
fc->ops = &erofs_context_ops;
return 0;
}
@@ -657,6 +791,8 @@ static void erofs_kill_sb(struct super_block *sb)
sbi = EROFS_SB(sb);
if (!sbi)
return;
+
+ erofs_free_dev_context(sbi->devs);
fs_put_dax(sbi->dax_dev);
kfree(sbi);
sb->s_fs_info = NULL;
@@ -746,7 +882,7 @@ static int erofs_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_type = sb->s_magic;
buf->f_bsize = EROFS_BLKSIZ;
- buf->f_blocks = sbi->blocks;
+ buf->f_blocks = sbi->total_blocks;
buf->f_bfree = buf->f_bavail = 0;
buf->f_files = ULLONG_MAX;