blob: fa7bcb28e952fb66da22978792301901b38851a0 [file] [log] [blame]
Dmitry Fomichevbae9a0a2019-08-02 15:02:50 -07001// SPDX-License-Identifier: GPL-2.0-only
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09002/*
3 * Copyright (C) 2017 Western Digital Corporation or its affiliates.
4 *
5 * This file is released under the GPL.
6 */
7
8#include "dm-zoned.h"
9
10#include <linux/module.h>
11#include <linux/crc32.h>
Damien Le Moalbd976e52019-07-01 14:09:16 +090012#include <linux/sched/mm.h>
Damien Le Moal3b1a94c2017-06-07 15:55:39 +090013
14#define DM_MSG_PREFIX "zoned metadata"
15
16/*
17 * Metadata version.
18 */
Hannes Reineckebd5c4032020-05-11 10:24:30 +020019#define DMZ_META_VER 2
Damien Le Moal3b1a94c2017-06-07 15:55:39 +090020
21/*
22 * On-disk super block magic.
23 */
24#define DMZ_MAGIC ((((unsigned int)('D')) << 24) | \
25 (((unsigned int)('Z')) << 16) | \
26 (((unsigned int)('B')) << 8) | \
27 ((unsigned int)('D')))
28
29/*
30 * On disk super block.
31 * This uses only 512 B but uses on disk a full 4KB block. This block is
32 * followed on disk by the mapping table of chunks to zones and the bitmap
33 * blocks indicating zone block validity.
34 * The overall resulting metadata format is:
35 * (1) Super block (1 block)
36 * (2) Chunk mapping table (nr_map_blocks)
37 * (3) Bitmap blocks (nr_bitmap_blocks)
Dmitry Fomichevad1bd572019-08-02 15:02:51 -070038 * All metadata blocks are stored in conventional zones, starting from
Damien Le Moal3b1a94c2017-06-07 15:55:39 +090039 * the first conventional zone found on disk.
40 */
41struct dmz_super {
42 /* Magic number */
43 __le32 magic; /* 4 */
44
45 /* Metadata version number */
46 __le32 version; /* 8 */
47
48 /* Generation number */
49 __le64 gen; /* 16 */
50
51 /* This block number */
52 __le64 sb_block; /* 24 */
53
54 /* The number of metadata blocks, including this super block */
55 __le32 nr_meta_blocks; /* 28 */
56
57 /* The number of sequential zones reserved for reclaim */
58 __le32 nr_reserved_seq; /* 32 */
59
60 /* The number of entries in the mapping table */
61 __le32 nr_chunks; /* 36 */
62
63 /* The number of blocks used for the chunk mapping table */
64 __le32 nr_map_blocks; /* 40 */
65
66 /* The number of blocks used for the block bitmaps */
67 __le32 nr_bitmap_blocks; /* 44 */
68
69 /* Checksum */
70 __le32 crc; /* 48 */
71
Hannes Reineckebd5c4032020-05-11 10:24:30 +020072 /* DM-Zoned label */
73 u8 dmz_label[32]; /* 80 */
74
75 /* DM-Zoned UUID */
76 u8 dmz_uuid[16]; /* 96 */
77
78 /* Device UUID */
79 u8 dev_uuid[16]; /* 112 */
80
Damien Le Moal3b1a94c2017-06-07 15:55:39 +090081 /* Padding to full 512B sector */
Hannes Reineckebd5c4032020-05-11 10:24:30 +020082 u8 reserved[400]; /* 512 */
Damien Le Moal3b1a94c2017-06-07 15:55:39 +090083};
84
85/*
86 * Chunk mapping entry: entries are indexed by chunk number
87 * and give the zone ID (dzone_id) mapping the chunk on disk.
88 * This zone may be sequential or random. If it is a sequential
89 * zone, a second zone (bzone_id) used as a write buffer may
90 * also be specified. This second zone will always be a randomly
91 * writeable zone.
92 */
93struct dmz_map {
94 __le32 dzone_id;
95 __le32 bzone_id;
96};
97
98/*
99 * Chunk mapping table metadata: 512 8-bytes entries per 4KB block.
100 */
101#define DMZ_MAP_ENTRIES (DMZ_BLOCK_SIZE / sizeof(struct dmz_map))
102#define DMZ_MAP_ENTRIES_SHIFT (ilog2(DMZ_MAP_ENTRIES))
103#define DMZ_MAP_ENTRIES_MASK (DMZ_MAP_ENTRIES - 1)
104#define DMZ_MAP_UNMAPPED UINT_MAX
105
106/*
107 * Meta data block descriptor (for cached metadata blocks).
108 */
109struct dmz_mblock {
110 struct rb_node node;
111 struct list_head link;
112 sector_t no;
Damien Le Moal33c28652018-10-17 18:05:07 +0900113 unsigned int ref;
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900114 unsigned long state;
115 struct page *page;
116 void *data;
117};
118
119/*
120 * Metadata block state flags.
121 */
122enum {
123 DMZ_META_DIRTY,
124 DMZ_META_READING,
125 DMZ_META_WRITING,
126 DMZ_META_ERROR,
127};
128
129/*
130 * Super block information (one per metadata set).
131 */
132struct dmz_sb {
133 sector_t block;
Hannes Reineckebf28a3b2020-05-11 10:24:20 +0200134 struct dmz_dev *dev;
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900135 struct dmz_mblock *mblk;
136 struct dmz_super *sb;
Hannes Reinecke735bd7e42020-05-11 10:24:19 +0200137 struct dm_zone *zone;
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900138};
139
140/*
141 * In-memory metadata.
142 */
143struct dmz_metadata {
144 struct dmz_dev *dev;
Hannes Reineckebd5c4032020-05-11 10:24:30 +0200145 unsigned int nr_devs;
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900146
Hannes Reinecke2234e732020-05-11 10:24:22 +0200147 char devname[BDEVNAME_SIZE];
Hannes Reineckebd5c4032020-05-11 10:24:30 +0200148 char label[BDEVNAME_SIZE];
149 uuid_t uuid;
Hannes Reinecke2234e732020-05-11 10:24:22 +0200150
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900151 sector_t zone_bitmap_size;
152 unsigned int zone_nr_bitmap_blocks;
Dmitry Fomichevb3996292019-12-23 17:05:46 -0800153 unsigned int zone_bits_per_mblk;
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900154
Hannes Reinecke36820562020-05-11 10:24:21 +0200155 sector_t zone_nr_blocks;
156 sector_t zone_nr_blocks_shift;
157
158 sector_t zone_nr_sectors;
159 sector_t zone_nr_sectors_shift;
160
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900161 unsigned int nr_bitmap_blocks;
162 unsigned int nr_map_blocks;
163
Hannes Reinecke36820562020-05-11 10:24:21 +0200164 unsigned int nr_zones;
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900165 unsigned int nr_useable_zones;
166 unsigned int nr_meta_blocks;
167 unsigned int nr_meta_zones;
168 unsigned int nr_data_zones;
169 unsigned int nr_rnd_zones;
170 unsigned int nr_reserved_seq;
171 unsigned int nr_chunks;
172
173 /* Zone information array */
174 struct dm_zone *zones;
175
Hannes Reineckebd5c4032020-05-11 10:24:30 +0200176 struct dmz_sb sb[3];
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900177 unsigned int mblk_primary;
Hannes Reineckebd5c4032020-05-11 10:24:30 +0200178 unsigned int sb_version;
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900179 u64 sb_gen;
180 unsigned int min_nr_mblks;
181 unsigned int max_nr_mblks;
182 atomic_t nr_mblks;
183 struct rw_semaphore mblk_sem;
184 struct mutex mblk_flush_lock;
185 spinlock_t mblk_lock;
186 struct rb_root mblk_rbtree;
187 struct list_head mblk_lru_list;
188 struct list_head mblk_dirty_list;
189 struct shrinker mblk_shrinker;
190
191 /* Zone allocation management */
192 struct mutex map_lock;
193 struct dmz_mblock **map_mblk;
194 unsigned int nr_rnd;
195 atomic_t unmap_nr_rnd;
196 struct list_head unmap_rnd_list;
197 struct list_head map_rnd_list;
198
199 unsigned int nr_seq;
200 atomic_t unmap_nr_seq;
201 struct list_head unmap_seq_list;
202 struct list_head map_seq_list;
203
204 atomic_t nr_reserved_seq_zones;
205 struct list_head reserved_seq_zones_list;
206
207 wait_queue_head_t free_wq;
208};
209
Hannes Reineckeca1a7042020-05-11 10:24:27 +0200210#define dmz_zmd_info(zmd, format, args...) \
Hannes Reineckebd5c4032020-05-11 10:24:30 +0200211 DMINFO("(%s): " format, (zmd)->label, ## args)
Hannes Reineckeca1a7042020-05-11 10:24:27 +0200212
213#define dmz_zmd_err(zmd, format, args...) \
Hannes Reineckebd5c4032020-05-11 10:24:30 +0200214 DMERR("(%s): " format, (zmd)->label, ## args)
Hannes Reineckeca1a7042020-05-11 10:24:27 +0200215
216#define dmz_zmd_warn(zmd, format, args...) \
Hannes Reineckebd5c4032020-05-11 10:24:30 +0200217 DMWARN("(%s): " format, (zmd)->label, ## args)
Hannes Reineckeca1a7042020-05-11 10:24:27 +0200218
219#define dmz_zmd_debug(zmd, format, args...) \
Hannes Reineckebd5c4032020-05-11 10:24:30 +0200220 DMDEBUG("(%s): " format, (zmd)->label, ## args)
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900221/*
222 * Various accessors
223 */
Hannes Reineckebd5c4032020-05-11 10:24:30 +0200224static unsigned int dmz_dev_zone_id(struct dmz_metadata *zmd, struct dm_zone *zone)
225{
226 unsigned int zone_id;
227
228 if (WARN_ON(!zone))
229 return 0;
230
231 zone_id = zone->id;
232 if (zmd->nr_devs > 1 &&
233 (zone_id >= zmd->dev[1].zone_offset))
234 zone_id -= zmd->dev[1].zone_offset;
235 return zone_id;
236}
237
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900238sector_t dmz_start_sect(struct dmz_metadata *zmd, struct dm_zone *zone)
239{
Hannes Reineckebd5c4032020-05-11 10:24:30 +0200240 unsigned int zone_id = dmz_dev_zone_id(zmd, zone);
241
242 return (sector_t)zone_id << zmd->zone_nr_sectors_shift;
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900243}
244
245sector_t dmz_start_block(struct dmz_metadata *zmd, struct dm_zone *zone)
246{
Hannes Reineckebd5c4032020-05-11 10:24:30 +0200247 unsigned int zone_id = dmz_dev_zone_id(zmd, zone);
248
249 return (sector_t)zone_id << zmd->zone_nr_blocks_shift;
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900250}
251
Hannes Reineckebf28a3b2020-05-11 10:24:20 +0200252struct dmz_dev *dmz_zone_to_dev(struct dmz_metadata *zmd, struct dm_zone *zone)
253{
Hannes Reineckebd5c4032020-05-11 10:24:30 +0200254 if (WARN_ON(!zone))
255 return &zmd->dev[0];
256
257 if (zmd->nr_devs > 1 &&
258 zone->id >= zmd->dev[1].zone_offset)
259 return &zmd->dev[1];
260
Hannes Reineckebf28a3b2020-05-11 10:24:20 +0200261 return &zmd->dev[0];
262}
263
Hannes Reinecke36820562020-05-11 10:24:21 +0200264unsigned int dmz_zone_nr_blocks(struct dmz_metadata *zmd)
265{
266 return zmd->zone_nr_blocks;
267}
268
269unsigned int dmz_zone_nr_blocks_shift(struct dmz_metadata *zmd)
270{
271 return zmd->zone_nr_blocks_shift;
272}
273
274unsigned int dmz_zone_nr_sectors(struct dmz_metadata *zmd)
275{
276 return zmd->zone_nr_sectors;
277}
278
279unsigned int dmz_zone_nr_sectors_shift(struct dmz_metadata *zmd)
280{
281 return zmd->zone_nr_sectors_shift;
282}
283
Hannes Reineckebc3d5712020-05-11 10:24:16 +0200284unsigned int dmz_nr_zones(struct dmz_metadata *zmd)
285{
Hannes Reinecke36820562020-05-11 10:24:21 +0200286 return zmd->nr_zones;
Hannes Reineckebc3d5712020-05-11 10:24:16 +0200287}
288
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900289unsigned int dmz_nr_chunks(struct dmz_metadata *zmd)
290{
291 return zmd->nr_chunks;
292}
293
294unsigned int dmz_nr_rnd_zones(struct dmz_metadata *zmd)
295{
296 return zmd->nr_rnd;
297}
298
299unsigned int dmz_nr_unmap_rnd_zones(struct dmz_metadata *zmd)
300{
301 return atomic_read(&zmd->unmap_nr_rnd);
302}
303
Hannes Reineckebc3d5712020-05-11 10:24:16 +0200304unsigned int dmz_nr_seq_zones(struct dmz_metadata *zmd)
305{
306 return zmd->nr_seq;
307}
308
309unsigned int dmz_nr_unmap_seq_zones(struct dmz_metadata *zmd)
310{
311 return atomic_read(&zmd->unmap_nr_seq);
312}
313
Hannes Reinecke2234e732020-05-11 10:24:22 +0200314const char *dmz_metadata_label(struct dmz_metadata *zmd)
315{
Hannes Reineckebd5c4032020-05-11 10:24:30 +0200316 return (const char *)zmd->label;
Hannes Reinecke2234e732020-05-11 10:24:22 +0200317}
318
Hannes Reinecked0e21ce2020-05-11 10:24:23 +0200319bool dmz_check_dev(struct dmz_metadata *zmd)
320{
Hannes Reineckebd5c4032020-05-11 10:24:30 +0200321 unsigned int i;
322
323 for (i = 0; i < zmd->nr_devs; i++) {
324 if (!dmz_check_bdev(&zmd->dev[i]))
325 return false;
326 }
327 return true;
Hannes Reinecked0e21ce2020-05-11 10:24:23 +0200328}
329
330bool dmz_dev_is_dying(struct dmz_metadata *zmd)
331{
Hannes Reineckebd5c4032020-05-11 10:24:30 +0200332 unsigned int i;
333
334 for (i = 0; i < zmd->nr_devs; i++) {
335 if (dmz_bdev_is_dying(&zmd->dev[i]))
336 return true;
337 }
338 return false;
Hannes Reinecked0e21ce2020-05-11 10:24:23 +0200339}
340
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900341/*
342 * Lock/unlock mapping table.
343 * The map lock also protects all the zone lists.
344 */
345void dmz_lock_map(struct dmz_metadata *zmd)
346{
347 mutex_lock(&zmd->map_lock);
348}
349
350void dmz_unlock_map(struct dmz_metadata *zmd)
351{
352 mutex_unlock(&zmd->map_lock);
353}
354
355/*
356 * Lock/unlock metadata access. This is a "read" lock on a semaphore
357 * that prevents metadata flush from running while metadata are being
358 * modified. The actual metadata write mutual exclusion is achieved with
Dmitry Fomichevad1bd572019-08-02 15:02:51 -0700359 * the map lock and zone state management (active and reclaim state are
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900360 * mutually exclusive).
361 */
362void dmz_lock_metadata(struct dmz_metadata *zmd)
363{
364 down_read(&zmd->mblk_sem);
365}
366
367void dmz_unlock_metadata(struct dmz_metadata *zmd)
368{
369 up_read(&zmd->mblk_sem);
370}
371
372/*
373 * Lock/unlock flush: prevent concurrent executions
374 * of dmz_flush_metadata as well as metadata modification in reclaim
375 * while flush is being executed.
376 */
377void dmz_lock_flush(struct dmz_metadata *zmd)
378{
379 mutex_lock(&zmd->mblk_flush_lock);
380}
381
382void dmz_unlock_flush(struct dmz_metadata *zmd)
383{
384 mutex_unlock(&zmd->mblk_flush_lock);
385}
386
387/*
388 * Allocate a metadata block.
389 */
390static struct dmz_mblock *dmz_alloc_mblock(struct dmz_metadata *zmd,
391 sector_t mblk_no)
392{
393 struct dmz_mblock *mblk = NULL;
394
395 /* See if we can reuse cached blocks */
396 if (zmd->max_nr_mblks && atomic_read(&zmd->nr_mblks) > zmd->max_nr_mblks) {
397 spin_lock(&zmd->mblk_lock);
398 mblk = list_first_entry_or_null(&zmd->mblk_lru_list,
399 struct dmz_mblock, link);
400 if (mblk) {
401 list_del_init(&mblk->link);
402 rb_erase(&mblk->node, &zmd->mblk_rbtree);
403 mblk->no = mblk_no;
404 }
405 spin_unlock(&zmd->mblk_lock);
406 if (mblk)
407 return mblk;
408 }
409
410 /* Allocate a new block */
411 mblk = kmalloc(sizeof(struct dmz_mblock), GFP_NOIO);
412 if (!mblk)
413 return NULL;
414
415 mblk->page = alloc_page(GFP_NOIO);
416 if (!mblk->page) {
417 kfree(mblk);
418 return NULL;
419 }
420
421 RB_CLEAR_NODE(&mblk->node);
422 INIT_LIST_HEAD(&mblk->link);
Damien Le Moal33c28652018-10-17 18:05:07 +0900423 mblk->ref = 0;
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900424 mblk->state = 0;
425 mblk->no = mblk_no;
426 mblk->data = page_address(mblk->page);
427
428 atomic_inc(&zmd->nr_mblks);
429
430 return mblk;
431}
432
433/*
434 * Free a metadata block.
435 */
436static void dmz_free_mblock(struct dmz_metadata *zmd, struct dmz_mblock *mblk)
437{
438 __free_pages(mblk->page, 0);
439 kfree(mblk);
440
441 atomic_dec(&zmd->nr_mblks);
442}
443
444/*
445 * Insert a metadata block in the rbtree.
446 */
447static void dmz_insert_mblock(struct dmz_metadata *zmd, struct dmz_mblock *mblk)
448{
449 struct rb_root *root = &zmd->mblk_rbtree;
450 struct rb_node **new = &(root->rb_node), *parent = NULL;
451 struct dmz_mblock *b;
452
453 /* Figure out where to put the new node */
454 while (*new) {
455 b = container_of(*new, struct dmz_mblock, node);
456 parent = *new;
457 new = (b->no < mblk->no) ? &((*new)->rb_left) : &((*new)->rb_right);
458 }
459
460 /* Add new node and rebalance tree */
461 rb_link_node(&mblk->node, parent, new);
462 rb_insert_color(&mblk->node, root);
463}
464
465/*
Damien Le Moal3d4e7382018-10-17 18:05:08 +0900466 * Lookup a metadata block in the rbtree. If the block is found, increment
467 * its reference count.
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900468 */
Damien Le Moal3d4e7382018-10-17 18:05:08 +0900469static struct dmz_mblock *dmz_get_mblock_fast(struct dmz_metadata *zmd,
470 sector_t mblk_no)
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900471{
472 struct rb_root *root = &zmd->mblk_rbtree;
473 struct rb_node *node = root->rb_node;
474 struct dmz_mblock *mblk;
475
476 while (node) {
477 mblk = container_of(node, struct dmz_mblock, node);
Damien Le Moal3d4e7382018-10-17 18:05:08 +0900478 if (mblk->no == mblk_no) {
479 /*
480 * If this is the first reference to the block,
481 * remove it from the LRU list.
482 */
483 mblk->ref++;
484 if (mblk->ref == 1 &&
485 !test_bit(DMZ_META_DIRTY, &mblk->state))
486 list_del_init(&mblk->link);
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900487 return mblk;
Damien Le Moal3d4e7382018-10-17 18:05:08 +0900488 }
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900489 node = (mblk->no < mblk_no) ? node->rb_left : node->rb_right;
490 }
491
492 return NULL;
493}
494
495/*
496 * Metadata block BIO end callback.
497 */
498static void dmz_mblock_bio_end_io(struct bio *bio)
499{
500 struct dmz_mblock *mblk = bio->bi_private;
501 int flag;
502
503 if (bio->bi_status)
504 set_bit(DMZ_META_ERROR, &mblk->state);
505
506 if (bio_op(bio) == REQ_OP_WRITE)
507 flag = DMZ_META_WRITING;
508 else
509 flag = DMZ_META_READING;
510
511 clear_bit_unlock(flag, &mblk->state);
512 smp_mb__after_atomic();
513 wake_up_bit(&mblk->state, flag);
514
515 bio_put(bio);
516}
517
518/*
Damien Le Moal3d4e7382018-10-17 18:05:08 +0900519 * Read an uncached metadata block from disk and add it to the cache.
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900520 */
Damien Le Moal3d4e7382018-10-17 18:05:08 +0900521static struct dmz_mblock *dmz_get_mblock_slow(struct dmz_metadata *zmd,
522 sector_t mblk_no)
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900523{
Damien Le Moal3d4e7382018-10-17 18:05:08 +0900524 struct dmz_mblock *mblk, *m;
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900525 sector_t block = zmd->sb[zmd->mblk_primary].block + mblk_no;
Hannes Reineckebf28a3b2020-05-11 10:24:20 +0200526 struct dmz_dev *dev = zmd->sb[zmd->mblk_primary].dev;
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900527 struct bio *bio;
528
Hannes Reineckebf28a3b2020-05-11 10:24:20 +0200529 if (dmz_bdev_is_dying(dev))
Dmitry Fomichev75d66ff2019-08-10 14:43:11 -0700530 return ERR_PTR(-EIO);
531
Damien Le Moal3d4e7382018-10-17 18:05:08 +0900532 /* Get a new block and a BIO to read it */
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900533 mblk = dmz_alloc_mblock(zmd, mblk_no);
534 if (!mblk)
Dmitry Fomichev75d66ff2019-08-10 14:43:11 -0700535 return ERR_PTR(-ENOMEM);
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900536
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900537 bio = bio_alloc(GFP_NOIO, 1);
538 if (!bio) {
539 dmz_free_mblock(zmd, mblk);
Dmitry Fomichev75d66ff2019-08-10 14:43:11 -0700540 return ERR_PTR(-ENOMEM);
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900541 }
542
Damien Le Moal3d4e7382018-10-17 18:05:08 +0900543 spin_lock(&zmd->mblk_lock);
544
545 /*
546 * Make sure that another context did not start reading
547 * the block already.
548 */
549 m = dmz_get_mblock_fast(zmd, mblk_no);
550 if (m) {
551 spin_unlock(&zmd->mblk_lock);
552 dmz_free_mblock(zmd, mblk);
553 bio_put(bio);
554 return m;
555 }
556
557 mblk->ref++;
558 set_bit(DMZ_META_READING, &mblk->state);
559 dmz_insert_mblock(zmd, mblk);
560
561 spin_unlock(&zmd->mblk_lock);
562
563 /* Submit read BIO */
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900564 bio->bi_iter.bi_sector = dmz_blk2sect(block);
Hannes Reineckebf28a3b2020-05-11 10:24:20 +0200565 bio_set_dev(bio, dev->bdev);
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900566 bio->bi_private = mblk;
567 bio->bi_end_io = dmz_mblock_bio_end_io;
568 bio_set_op_attrs(bio, REQ_OP_READ, REQ_META | REQ_PRIO);
569 bio_add_page(bio, mblk->page, DMZ_BLOCK_SIZE, 0);
570 submit_bio(bio);
571
572 return mblk;
573}
574
575/*
576 * Free metadata blocks.
577 */
578static unsigned long dmz_shrink_mblock_cache(struct dmz_metadata *zmd,
579 unsigned long limit)
580{
581 struct dmz_mblock *mblk;
582 unsigned long count = 0;
583
584 if (!zmd->max_nr_mblks)
585 return 0;
586
587 while (!list_empty(&zmd->mblk_lru_list) &&
588 atomic_read(&zmd->nr_mblks) > zmd->min_nr_mblks &&
589 count < limit) {
590 mblk = list_first_entry(&zmd->mblk_lru_list,
591 struct dmz_mblock, link);
592 list_del_init(&mblk->link);
593 rb_erase(&mblk->node, &zmd->mblk_rbtree);
594 dmz_free_mblock(zmd, mblk);
595 count++;
596 }
597
598 return count;
599}
600
601/*
602 * For mblock shrinker: get the number of unused metadata blocks in the cache.
603 */
604static unsigned long dmz_mblock_shrinker_count(struct shrinker *shrink,
605 struct shrink_control *sc)
606{
607 struct dmz_metadata *zmd = container_of(shrink, struct dmz_metadata, mblk_shrinker);
608
609 return atomic_read(&zmd->nr_mblks);
610}
611
612/*
613 * For mblock shrinker: scan unused metadata blocks and shrink the cache.
614 */
615static unsigned long dmz_mblock_shrinker_scan(struct shrinker *shrink,
616 struct shrink_control *sc)
617{
618 struct dmz_metadata *zmd = container_of(shrink, struct dmz_metadata, mblk_shrinker);
619 unsigned long count;
620
621 spin_lock(&zmd->mblk_lock);
622 count = dmz_shrink_mblock_cache(zmd, sc->nr_to_scan);
623 spin_unlock(&zmd->mblk_lock);
624
625 return count ? count : SHRINK_STOP;
626}
627
628/*
629 * Release a metadata block.
630 */
631static void dmz_release_mblock(struct dmz_metadata *zmd,
632 struct dmz_mblock *mblk)
633{
634
635 if (!mblk)
636 return;
637
638 spin_lock(&zmd->mblk_lock);
639
Damien Le Moal33c28652018-10-17 18:05:07 +0900640 mblk->ref--;
641 if (mblk->ref == 0) {
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900642 if (test_bit(DMZ_META_ERROR, &mblk->state)) {
643 rb_erase(&mblk->node, &zmd->mblk_rbtree);
644 dmz_free_mblock(zmd, mblk);
645 } else if (!test_bit(DMZ_META_DIRTY, &mblk->state)) {
646 list_add_tail(&mblk->link, &zmd->mblk_lru_list);
647 dmz_shrink_mblock_cache(zmd, 1);
648 }
649 }
650
651 spin_unlock(&zmd->mblk_lock);
652}
653
654/*
655 * Get a metadata block from the rbtree. If the block
656 * is not present, read it from disk.
657 */
658static struct dmz_mblock *dmz_get_mblock(struct dmz_metadata *zmd,
659 sector_t mblk_no)
660{
661 struct dmz_mblock *mblk;
Hannes Reineckebf28a3b2020-05-11 10:24:20 +0200662 struct dmz_dev *dev = zmd->sb[zmd->mblk_primary].dev;
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900663
664 /* Check rbtree */
665 spin_lock(&zmd->mblk_lock);
Damien Le Moal3d4e7382018-10-17 18:05:08 +0900666 mblk = dmz_get_mblock_fast(zmd, mblk_no);
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900667 spin_unlock(&zmd->mblk_lock);
668
669 if (!mblk) {
670 /* Cache miss: read the block from disk */
Damien Le Moal3d4e7382018-10-17 18:05:08 +0900671 mblk = dmz_get_mblock_slow(zmd, mblk_no);
Dmitry Fomichev75d66ff2019-08-10 14:43:11 -0700672 if (IS_ERR(mblk))
673 return mblk;
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900674 }
675
676 /* Wait for on-going read I/O and check for error */
677 wait_on_bit_io(&mblk->state, DMZ_META_READING,
678 TASK_UNINTERRUPTIBLE);
679 if (test_bit(DMZ_META_ERROR, &mblk->state)) {
680 dmz_release_mblock(zmd, mblk);
Hannes Reineckebf28a3b2020-05-11 10:24:20 +0200681 dmz_check_bdev(dev);
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900682 return ERR_PTR(-EIO);
683 }
684
685 return mblk;
686}
687
688/*
689 * Mark a metadata block dirty.
690 */
691static void dmz_dirty_mblock(struct dmz_metadata *zmd, struct dmz_mblock *mblk)
692{
693 spin_lock(&zmd->mblk_lock);
694 if (!test_and_set_bit(DMZ_META_DIRTY, &mblk->state))
695 list_add_tail(&mblk->link, &zmd->mblk_dirty_list);
696 spin_unlock(&zmd->mblk_lock);
697}
698
699/*
700 * Issue a metadata block write BIO.
701 */
Dmitry Fomichev75d66ff2019-08-10 14:43:11 -0700702static int dmz_write_mblock(struct dmz_metadata *zmd, struct dmz_mblock *mblk,
703 unsigned int set)
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900704{
Hannes Reineckebf28a3b2020-05-11 10:24:20 +0200705 struct dmz_dev *dev = zmd->sb[set].dev;
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900706 sector_t block = zmd->sb[set].block + mblk->no;
707 struct bio *bio;
708
Hannes Reineckebf28a3b2020-05-11 10:24:20 +0200709 if (dmz_bdev_is_dying(dev))
Dmitry Fomichev75d66ff2019-08-10 14:43:11 -0700710 return -EIO;
711
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900712 bio = bio_alloc(GFP_NOIO, 1);
713 if (!bio) {
714 set_bit(DMZ_META_ERROR, &mblk->state);
Dmitry Fomichev75d66ff2019-08-10 14:43:11 -0700715 return -ENOMEM;
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900716 }
717
718 set_bit(DMZ_META_WRITING, &mblk->state);
719
720 bio->bi_iter.bi_sector = dmz_blk2sect(block);
Hannes Reineckebf28a3b2020-05-11 10:24:20 +0200721 bio_set_dev(bio, dev->bdev);
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900722 bio->bi_private = mblk;
723 bio->bi_end_io = dmz_mblock_bio_end_io;
724 bio_set_op_attrs(bio, REQ_OP_WRITE, REQ_META | REQ_PRIO);
725 bio_add_page(bio, mblk->page, DMZ_BLOCK_SIZE, 0);
726 submit_bio(bio);
Dmitry Fomichev75d66ff2019-08-10 14:43:11 -0700727
728 return 0;
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900729}
730
731/*
732 * Read/write a metadata block.
733 */
Hannes Reineckebf28a3b2020-05-11 10:24:20 +0200734static int dmz_rdwr_block(struct dmz_dev *dev, int op,
735 sector_t block, struct page *page)
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900736{
737 struct bio *bio;
738 int ret;
739
Hannes Reineckebd5c4032020-05-11 10:24:30 +0200740 if (WARN_ON(!dev))
741 return -EIO;
742
Hannes Reineckebf28a3b2020-05-11 10:24:20 +0200743 if (dmz_bdev_is_dying(dev))
Dmitry Fomichev75d66ff2019-08-10 14:43:11 -0700744 return -EIO;
745
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900746 bio = bio_alloc(GFP_NOIO, 1);
747 if (!bio)
748 return -ENOMEM;
749
750 bio->bi_iter.bi_sector = dmz_blk2sect(block);
Hannes Reineckebf28a3b2020-05-11 10:24:20 +0200751 bio_set_dev(bio, dev->bdev);
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900752 bio_set_op_attrs(bio, op, REQ_SYNC | REQ_META | REQ_PRIO);
753 bio_add_page(bio, page, DMZ_BLOCK_SIZE, 0);
754 ret = submit_bio_wait(bio);
755 bio_put(bio);
756
Dmitry Fomicheve7fad902019-11-06 14:34:35 -0800757 if (ret)
Hannes Reineckebf28a3b2020-05-11 10:24:20 +0200758 dmz_check_bdev(dev);
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900759 return ret;
760}
761
762/*
763 * Write super block of the specified metadata set.
764 */
765static int dmz_write_sb(struct dmz_metadata *zmd, unsigned int set)
766{
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900767 struct dmz_mblock *mblk = zmd->sb[set].mblk;
768 struct dmz_super *sb = zmd->sb[set].sb;
Hannes Reineckebf28a3b2020-05-11 10:24:20 +0200769 struct dmz_dev *dev = zmd->sb[set].dev;
Hannes Reineckebd5c4032020-05-11 10:24:30 +0200770 sector_t sb_block;
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900771 u64 sb_gen = zmd->sb_gen + 1;
772 int ret;
773
774 sb->magic = cpu_to_le32(DMZ_MAGIC);
Hannes Reineckebd5c4032020-05-11 10:24:30 +0200775
776 sb->version = cpu_to_le32(zmd->sb_version);
777 if (zmd->sb_version > 1) {
778 BUILD_BUG_ON(UUID_SIZE != 16);
779 export_uuid(sb->dmz_uuid, &zmd->uuid);
780 memcpy(sb->dmz_label, zmd->label, BDEVNAME_SIZE);
781 export_uuid(sb->dev_uuid, &dev->uuid);
782 }
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900783
784 sb->gen = cpu_to_le64(sb_gen);
785
Hannes Reineckebd5c4032020-05-11 10:24:30 +0200786 /*
787 * The metadata always references the absolute block address,
788 * ie relative to the entire block range, not the per-device
789 * block address.
790 */
791 sb_block = zmd->sb[set].zone->id << zmd->zone_nr_blocks_shift;
792 sb->sb_block = cpu_to_le64(sb_block);
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900793 sb->nr_meta_blocks = cpu_to_le32(zmd->nr_meta_blocks);
794 sb->nr_reserved_seq = cpu_to_le32(zmd->nr_reserved_seq);
795 sb->nr_chunks = cpu_to_le32(zmd->nr_chunks);
796
797 sb->nr_map_blocks = cpu_to_le32(zmd->nr_map_blocks);
798 sb->nr_bitmap_blocks = cpu_to_le32(zmd->nr_bitmap_blocks);
799
800 sb->crc = 0;
801 sb->crc = cpu_to_le32(crc32_le(sb_gen, (unsigned char *)sb, DMZ_BLOCK_SIZE));
802
Hannes Reineckebd5c4032020-05-11 10:24:30 +0200803 ret = dmz_rdwr_block(dev, REQ_OP_WRITE, zmd->sb[set].block,
804 mblk->page);
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900805 if (ret == 0)
Hannes Reineckebf28a3b2020-05-11 10:24:20 +0200806 ret = blkdev_issue_flush(dev->bdev, GFP_NOIO, NULL);
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900807
808 return ret;
809}
810
811/*
812 * Write dirty metadata blocks to the specified set.
813 */
814static int dmz_write_dirty_mblocks(struct dmz_metadata *zmd,
815 struct list_head *write_list,
816 unsigned int set)
817{
818 struct dmz_mblock *mblk;
Hannes Reineckebf28a3b2020-05-11 10:24:20 +0200819 struct dmz_dev *dev = zmd->sb[set].dev;
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900820 struct blk_plug plug;
Dmitry Fomichev75d66ff2019-08-10 14:43:11 -0700821 int ret = 0, nr_mblks_submitted = 0;
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900822
823 /* Issue writes */
824 blk_start_plug(&plug);
Dmitry Fomichev75d66ff2019-08-10 14:43:11 -0700825 list_for_each_entry(mblk, write_list, link) {
826 ret = dmz_write_mblock(zmd, mblk, set);
827 if (ret)
828 break;
829 nr_mblks_submitted++;
830 }
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900831 blk_finish_plug(&plug);
832
833 /* Wait for completion */
834 list_for_each_entry(mblk, write_list, link) {
Dmitry Fomichev75d66ff2019-08-10 14:43:11 -0700835 if (!nr_mblks_submitted)
836 break;
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900837 wait_on_bit_io(&mblk->state, DMZ_META_WRITING,
838 TASK_UNINTERRUPTIBLE);
839 if (test_bit(DMZ_META_ERROR, &mblk->state)) {
840 clear_bit(DMZ_META_ERROR, &mblk->state);
Hannes Reineckebf28a3b2020-05-11 10:24:20 +0200841 dmz_check_bdev(dev);
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900842 ret = -EIO;
843 }
Dmitry Fomichev75d66ff2019-08-10 14:43:11 -0700844 nr_mblks_submitted--;
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900845 }
846
847 /* Flush drive cache (this will also sync data) */
848 if (ret == 0)
Hannes Reineckebf28a3b2020-05-11 10:24:20 +0200849 ret = blkdev_issue_flush(dev->bdev, GFP_NOIO, NULL);
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900850
851 return ret;
852}
853
854/*
855 * Log dirty metadata blocks.
856 */
857static int dmz_log_dirty_mblocks(struct dmz_metadata *zmd,
858 struct list_head *write_list)
859{
860 unsigned int log_set = zmd->mblk_primary ^ 0x1;
861 int ret;
862
863 /* Write dirty blocks to the log */
864 ret = dmz_write_dirty_mblocks(zmd, write_list, log_set);
865 if (ret)
866 return ret;
867
868 /*
869 * No error so far: now validate the log by updating the
870 * log index super block generation.
871 */
872 ret = dmz_write_sb(zmd, log_set);
873 if (ret)
874 return ret;
875
876 return 0;
877}
878
879/*
880 * Flush dirty metadata blocks.
881 */
882int dmz_flush_metadata(struct dmz_metadata *zmd)
883{
884 struct dmz_mblock *mblk;
885 struct list_head write_list;
Hannes Reineckebf28a3b2020-05-11 10:24:20 +0200886 struct dmz_dev *dev;
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900887 int ret;
888
889 if (WARN_ON(!zmd))
890 return 0;
891
892 INIT_LIST_HEAD(&write_list);
893
894 /*
895 * Make sure that metadata blocks are stable before logging: take
896 * the write lock on the metadata semaphore to prevent target BIOs
897 * from modifying metadata.
898 */
899 down_write(&zmd->mblk_sem);
Hannes Reineckebf28a3b2020-05-11 10:24:20 +0200900 dev = zmd->sb[zmd->mblk_primary].dev;
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900901
902 /*
903 * This is called from the target flush work and reclaim work.
904 * Concurrent execution is not allowed.
905 */
906 dmz_lock_flush(zmd);
907
Hannes Reineckebf28a3b2020-05-11 10:24:20 +0200908 if (dmz_bdev_is_dying(dev)) {
Dmitry Fomichev75d66ff2019-08-10 14:43:11 -0700909 ret = -EIO;
910 goto out;
911 }
912
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900913 /* Get dirty blocks */
914 spin_lock(&zmd->mblk_lock);
915 list_splice_init(&zmd->mblk_dirty_list, &write_list);
916 spin_unlock(&zmd->mblk_lock);
917
918 /* If there are no dirty metadata blocks, just flush the device cache */
919 if (list_empty(&write_list)) {
Hannes Reineckebf28a3b2020-05-11 10:24:20 +0200920 ret = blkdev_issue_flush(dev->bdev, GFP_NOIO, NULL);
Dmitry Fomicheve7fad902019-11-06 14:34:35 -0800921 goto err;
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900922 }
923
924 /*
925 * The primary metadata set is still clean. Keep it this way until
926 * all updates are successful in the secondary set. That is, use
927 * the secondary set as a log.
928 */
929 ret = dmz_log_dirty_mblocks(zmd, &write_list);
930 if (ret)
Dmitry Fomicheve7fad902019-11-06 14:34:35 -0800931 goto err;
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900932
933 /*
934 * The log is on disk. It is now safe to update in place
935 * in the primary metadata set.
936 */
937 ret = dmz_write_dirty_mblocks(zmd, &write_list, zmd->mblk_primary);
938 if (ret)
Dmitry Fomicheve7fad902019-11-06 14:34:35 -0800939 goto err;
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900940
941 ret = dmz_write_sb(zmd, zmd->mblk_primary);
942 if (ret)
Dmitry Fomicheve7fad902019-11-06 14:34:35 -0800943 goto err;
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900944
945 while (!list_empty(&write_list)) {
946 mblk = list_first_entry(&write_list, struct dmz_mblock, link);
947 list_del_init(&mblk->link);
948
949 spin_lock(&zmd->mblk_lock);
950 clear_bit(DMZ_META_DIRTY, &mblk->state);
Damien Le Moal33c28652018-10-17 18:05:07 +0900951 if (mblk->ref == 0)
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900952 list_add_tail(&mblk->link, &zmd->mblk_lru_list);
953 spin_unlock(&zmd->mblk_lock);
954 }
955
956 zmd->sb_gen++;
957out:
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900958 dmz_unlock_flush(zmd);
959 up_write(&zmd->mblk_sem);
960
961 return ret;
Dmitry Fomicheve7fad902019-11-06 14:34:35 -0800962
963err:
964 if (!list_empty(&write_list)) {
965 spin_lock(&zmd->mblk_lock);
966 list_splice(&write_list, &zmd->mblk_dirty_list);
967 spin_unlock(&zmd->mblk_lock);
968 }
Hannes Reineckebf28a3b2020-05-11 10:24:20 +0200969 if (!dmz_check_bdev(dev))
Dmitry Fomicheve7fad902019-11-06 14:34:35 -0800970 ret = -EIO;
971 goto out;
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900972}
973
974/*
975 * Check super block.
976 */
Hannes Reinecke735bd7e42020-05-11 10:24:19 +0200977static int dmz_check_sb(struct dmz_metadata *zmd, unsigned int set)
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900978{
Hannes Reinecke735bd7e42020-05-11 10:24:19 +0200979 struct dmz_super *sb = zmd->sb[set].sb;
Hannes Reineckebf28a3b2020-05-11 10:24:20 +0200980 struct dmz_dev *dev = zmd->sb[set].dev;
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900981 unsigned int nr_meta_zones, nr_data_zones;
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900982 u32 crc, stored_crc;
983 u64 gen;
984
Hannes Reineckebd5c4032020-05-11 10:24:30 +0200985 if (le32_to_cpu(sb->magic) != DMZ_MAGIC) {
986 dmz_dev_err(dev, "Invalid meta magic (needed 0x%08x, got 0x%08x)",
987 DMZ_MAGIC, le32_to_cpu(sb->magic));
988 return -ENXIO;
989 }
990
991 zmd->sb_version = le32_to_cpu(sb->version);
992 if (zmd->sb_version > DMZ_META_VER) {
993 dmz_dev_err(dev, "Invalid meta version (needed %d, got %d)",
994 DMZ_META_VER, zmd->sb_version);
995 return -EINVAL;
996 }
997 if ((zmd->sb_version < 1) && (set == 2)) {
998 dmz_dev_err(dev, "Tertiary superblocks are not supported");
999 return -EINVAL;
1000 }
1001
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09001002 gen = le64_to_cpu(sb->gen);
1003 stored_crc = le32_to_cpu(sb->crc);
1004 sb->crc = 0;
1005 crc = crc32_le(gen, (unsigned char *)sb, DMZ_BLOCK_SIZE);
1006 if (crc != stored_crc) {
1007 dmz_dev_err(dev, "Invalid checksum (needed 0x%08x, got 0x%08x)",
1008 crc, stored_crc);
1009 return -ENXIO;
1010 }
1011
Hannes Reineckebd5c4032020-05-11 10:24:30 +02001012 if (zmd->sb_version > 1) {
1013 uuid_t sb_uuid;
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09001014
Hannes Reineckebd5c4032020-05-11 10:24:30 +02001015 import_uuid(&sb_uuid, sb->dmz_uuid);
1016 if (uuid_is_null(&sb_uuid)) {
1017 dmz_dev_err(dev, "NULL DM-Zoned uuid");
1018 return -ENXIO;
1019 } else if (uuid_is_null(&zmd->uuid)) {
1020 uuid_copy(&zmd->uuid, &sb_uuid);
1021 } else if (!uuid_equal(&zmd->uuid, &sb_uuid)) {
1022 dmz_dev_err(dev, "mismatching DM-Zoned uuid, "
1023 "is %pUl expected %pUl",
1024 &sb_uuid, &zmd->uuid);
1025 return -ENXIO;
1026 }
1027 if (!strlen(zmd->label))
1028 memcpy(zmd->label, sb->dmz_label, BDEVNAME_SIZE);
1029 else if (memcmp(zmd->label, sb->dmz_label, BDEVNAME_SIZE)) {
1030 dmz_dev_err(dev, "mismatching DM-Zoned label, "
1031 "is %s expected %s",
1032 sb->dmz_label, zmd->label);
1033 return -ENXIO;
1034 }
1035 import_uuid(&dev->uuid, sb->dev_uuid);
1036 if (uuid_is_null(&dev->uuid)) {
1037 dmz_dev_err(dev, "NULL device uuid");
1038 return -ENXIO;
1039 }
1040
1041 if (set == 2) {
1042 /*
1043 * Generation number should be 0, but it doesn't
1044 * really matter if it isn't.
1045 */
1046 if (gen != 0)
1047 dmz_dev_warn(dev, "Invalid generation %llu",
1048 gen);
1049 return 0;
1050 }
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09001051 }
1052
Hannes Reinecke36820562020-05-11 10:24:21 +02001053 nr_meta_zones = (le32_to_cpu(sb->nr_meta_blocks) + zmd->zone_nr_blocks - 1)
1054 >> zmd->zone_nr_blocks_shift;
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09001055 if (!nr_meta_zones ||
1056 nr_meta_zones >= zmd->nr_rnd_zones) {
1057 dmz_dev_err(dev, "Invalid number of metadata blocks");
1058 return -ENXIO;
1059 }
1060
1061 if (!le32_to_cpu(sb->nr_reserved_seq) ||
1062 le32_to_cpu(sb->nr_reserved_seq) >= (zmd->nr_useable_zones - nr_meta_zones)) {
1063 dmz_dev_err(dev, "Invalid number of reserved sequential zones");
1064 return -ENXIO;
1065 }
1066
1067 nr_data_zones = zmd->nr_useable_zones -
1068 (nr_meta_zones * 2 + le32_to_cpu(sb->nr_reserved_seq));
1069 if (le32_to_cpu(sb->nr_chunks) > nr_data_zones) {
1070 dmz_dev_err(dev, "Invalid number of chunks %u / %u",
1071 le32_to_cpu(sb->nr_chunks), nr_data_zones);
1072 return -ENXIO;
1073 }
1074
1075 /* OK */
1076 zmd->nr_meta_blocks = le32_to_cpu(sb->nr_meta_blocks);
1077 zmd->nr_reserved_seq = le32_to_cpu(sb->nr_reserved_seq);
1078 zmd->nr_chunks = le32_to_cpu(sb->nr_chunks);
1079 zmd->nr_map_blocks = le32_to_cpu(sb->nr_map_blocks);
1080 zmd->nr_bitmap_blocks = le32_to_cpu(sb->nr_bitmap_blocks);
1081 zmd->nr_meta_zones = nr_meta_zones;
1082 zmd->nr_data_zones = nr_data_zones;
1083
1084 return 0;
1085}
1086
1087/*
1088 * Read the first or second super block from disk.
1089 */
1090static int dmz_read_sb(struct dmz_metadata *zmd, unsigned int set)
1091{
Hannes Reineckebf28a3b2020-05-11 10:24:20 +02001092 return dmz_rdwr_block(zmd->sb[set].dev, REQ_OP_READ,
1093 zmd->sb[set].block, zmd->sb[set].mblk->page);
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09001094}
1095
1096/*
1097 * Determine the position of the secondary super blocks on disk.
1098 * This is used only if a corruption of the primary super block
1099 * is detected.
1100 */
1101static int dmz_lookup_secondary_sb(struct dmz_metadata *zmd)
1102{
Hannes Reinecke36820562020-05-11 10:24:21 +02001103 unsigned int zone_nr_blocks = zmd->zone_nr_blocks;
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09001104 struct dmz_mblock *mblk;
1105 int i;
1106
1107 /* Allocate a block */
1108 mblk = dmz_alloc_mblock(zmd, 0);
1109 if (!mblk)
1110 return -ENOMEM;
1111
1112 zmd->sb[1].mblk = mblk;
1113 zmd->sb[1].sb = mblk->data;
1114
1115 /* Bad first super block: search for the second one */
1116 zmd->sb[1].block = zmd->sb[0].block + zone_nr_blocks;
Hannes Reinecke735bd7e42020-05-11 10:24:19 +02001117 zmd->sb[1].zone = zmd->sb[0].zone + 1;
Hannes Reineckebf28a3b2020-05-11 10:24:20 +02001118 zmd->sb[1].dev = dmz_zone_to_dev(zmd, zmd->sb[1].zone);
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09001119 for (i = 0; i < zmd->nr_rnd_zones - 1; i++) {
1120 if (dmz_read_sb(zmd, 1) != 0)
1121 break;
Hannes Reinecke735bd7e42020-05-11 10:24:19 +02001122 if (le32_to_cpu(zmd->sb[1].sb->magic) == DMZ_MAGIC) {
1123 zmd->sb[1].zone += i;
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09001124 return 0;
Hannes Reinecke735bd7e42020-05-11 10:24:19 +02001125 }
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09001126 zmd->sb[1].block += zone_nr_blocks;
Hannes Reineckebf28a3b2020-05-11 10:24:20 +02001127 zmd->sb[1].dev = dmz_zone_to_dev(zmd, zmd->sb[1].zone + i);
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09001128 }
1129
1130 dmz_free_mblock(zmd, mblk);
1131 zmd->sb[1].mblk = NULL;
Hannes Reinecke735bd7e42020-05-11 10:24:19 +02001132 zmd->sb[1].zone = NULL;
Hannes Reineckebf28a3b2020-05-11 10:24:20 +02001133 zmd->sb[1].dev = NULL;
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09001134
1135 return -EIO;
1136}
1137
1138/*
1139 * Read the first or second super block from disk.
1140 */
1141static int dmz_get_sb(struct dmz_metadata *zmd, unsigned int set)
1142{
1143 struct dmz_mblock *mblk;
1144 int ret;
1145
1146 /* Allocate a block */
1147 mblk = dmz_alloc_mblock(zmd, 0);
1148 if (!mblk)
1149 return -ENOMEM;
1150
1151 zmd->sb[set].mblk = mblk;
1152 zmd->sb[set].sb = mblk->data;
1153
1154 /* Read super block */
1155 ret = dmz_read_sb(zmd, set);
1156 if (ret) {
1157 dmz_free_mblock(zmd, mblk);
1158 zmd->sb[set].mblk = NULL;
1159 return ret;
1160 }
1161
1162 return 0;
1163}
1164
1165/*
1166 * Recover a metadata set.
1167 */
1168static int dmz_recover_mblocks(struct dmz_metadata *zmd, unsigned int dst_set)
1169{
1170 unsigned int src_set = dst_set ^ 0x1;
1171 struct page *page;
1172 int i, ret;
1173
Hannes Reineckebf28a3b2020-05-11 10:24:20 +02001174 dmz_dev_warn(zmd->sb[dst_set].dev,
1175 "Metadata set %u invalid: recovering", dst_set);
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09001176
1177 if (dst_set == 0)
Hannes Reinecke735bd7e42020-05-11 10:24:19 +02001178 zmd->sb[0].block = dmz_start_block(zmd, zmd->sb[0].zone);
1179 else
1180 zmd->sb[1].block = dmz_start_block(zmd, zmd->sb[1].zone);
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09001181
Damien Le Moal4218a952017-07-24 16:44:37 +09001182 page = alloc_page(GFP_NOIO);
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09001183 if (!page)
1184 return -ENOMEM;
1185
1186 /* Copy metadata blocks */
1187 for (i = 1; i < zmd->nr_meta_blocks; i++) {
Hannes Reineckebf28a3b2020-05-11 10:24:20 +02001188 ret = dmz_rdwr_block(zmd->sb[src_set].dev, REQ_OP_READ,
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09001189 zmd->sb[src_set].block + i, page);
1190 if (ret)
1191 goto out;
Hannes Reineckebf28a3b2020-05-11 10:24:20 +02001192 ret = dmz_rdwr_block(zmd->sb[dst_set].dev, REQ_OP_WRITE,
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09001193 zmd->sb[dst_set].block + i, page);
1194 if (ret)
1195 goto out;
1196 }
1197
1198 /* Finalize with the super block */
1199 if (!zmd->sb[dst_set].mblk) {
1200 zmd->sb[dst_set].mblk = dmz_alloc_mblock(zmd, 0);
1201 if (!zmd->sb[dst_set].mblk) {
1202 ret = -ENOMEM;
1203 goto out;
1204 }
1205 zmd->sb[dst_set].sb = zmd->sb[dst_set].mblk->data;
1206 }
1207
1208 ret = dmz_write_sb(zmd, dst_set);
1209out:
1210 __free_pages(page, 0);
1211
1212 return ret;
1213}
1214
1215/*
1216 * Get super block from disk.
1217 */
1218static int dmz_load_sb(struct dmz_metadata *zmd)
1219{
1220 bool sb_good[2] = {false, false};
1221 u64 sb_gen[2] = {0, 0};
1222 int ret;
1223
Hannes Reinecke735bd7e42020-05-11 10:24:19 +02001224 if (!zmd->sb[0].zone) {
Hannes Reineckeca1a7042020-05-11 10:24:27 +02001225 dmz_zmd_err(zmd, "Primary super block zone not set");
Hannes Reinecke735bd7e42020-05-11 10:24:19 +02001226 return -ENXIO;
1227 }
1228
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09001229 /* Read and check the primary super block */
Hannes Reinecke735bd7e42020-05-11 10:24:19 +02001230 zmd->sb[0].block = dmz_start_block(zmd, zmd->sb[0].zone);
Hannes Reineckebf28a3b2020-05-11 10:24:20 +02001231 zmd->sb[0].dev = dmz_zone_to_dev(zmd, zmd->sb[0].zone);
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09001232 ret = dmz_get_sb(zmd, 0);
1233 if (ret) {
Hannes Reineckebf28a3b2020-05-11 10:24:20 +02001234 dmz_dev_err(zmd->sb[0].dev, "Read primary super block failed");
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09001235 return ret;
1236 }
1237
Hannes Reinecke735bd7e42020-05-11 10:24:19 +02001238 ret = dmz_check_sb(zmd, 0);
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09001239
1240 /* Read and check secondary super block */
1241 if (ret == 0) {
1242 sb_good[0] = true;
Hannes Reinecke735bd7e42020-05-11 10:24:19 +02001243 if (!zmd->sb[1].zone)
1244 zmd->sb[1].zone = zmd->sb[0].zone + zmd->nr_meta_zones;
1245 zmd->sb[1].block = dmz_start_block(zmd, zmd->sb[1].zone);
Hannes Reineckebf28a3b2020-05-11 10:24:20 +02001246 zmd->sb[1].dev = dmz_zone_to_dev(zmd, zmd->sb[1].zone);
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09001247 ret = dmz_get_sb(zmd, 1);
1248 } else
1249 ret = dmz_lookup_secondary_sb(zmd);
1250
1251 if (ret) {
Hannes Reineckebf28a3b2020-05-11 10:24:20 +02001252 dmz_dev_err(zmd->sb[1].dev, "Read secondary super block failed");
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09001253 return ret;
1254 }
1255
Hannes Reinecke735bd7e42020-05-11 10:24:19 +02001256 ret = dmz_check_sb(zmd, 1);
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09001257 if (ret == 0)
1258 sb_good[1] = true;
1259
1260 /* Use highest generation sb first */
1261 if (!sb_good[0] && !sb_good[1]) {
Hannes Reineckeca1a7042020-05-11 10:24:27 +02001262 dmz_zmd_err(zmd, "No valid super block found");
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09001263 return -EIO;
1264 }
1265
1266 if (sb_good[0])
1267 sb_gen[0] = le64_to_cpu(zmd->sb[0].sb->gen);
Hannes Reineckebf28a3b2020-05-11 10:24:20 +02001268 else {
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09001269 ret = dmz_recover_mblocks(zmd, 0);
Hannes Reineckebf28a3b2020-05-11 10:24:20 +02001270 if (ret) {
1271 dmz_dev_err(zmd->sb[0].dev,
1272 "Recovery of superblock 0 failed");
1273 return -EIO;
1274 }
1275 }
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09001276
1277 if (sb_good[1])
1278 sb_gen[1] = le64_to_cpu(zmd->sb[1].sb->gen);
Hannes Reineckebf28a3b2020-05-11 10:24:20 +02001279 else {
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09001280 ret = dmz_recover_mblocks(zmd, 1);
1281
Hannes Reineckebf28a3b2020-05-11 10:24:20 +02001282 if (ret) {
1283 dmz_dev_err(zmd->sb[1].dev,
1284 "Recovery of superblock 1 failed");
1285 return -EIO;
1286 }
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09001287 }
1288
1289 if (sb_gen[0] >= sb_gen[1]) {
1290 zmd->sb_gen = sb_gen[0];
1291 zmd->mblk_primary = 0;
1292 } else {
1293 zmd->sb_gen = sb_gen[1];
1294 zmd->mblk_primary = 1;
1295 }
1296
Hannes Reineckebf28a3b2020-05-11 10:24:20 +02001297 dmz_dev_debug(zmd->sb[zmd->mblk_primary].dev,
1298 "Using super block %u (gen %llu)",
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09001299 zmd->mblk_primary, zmd->sb_gen);
1300
Hannes Reineckebd5c4032020-05-11 10:24:30 +02001301 if ((zmd->sb_version > 1) && zmd->sb[2].zone) {
1302 zmd->sb[2].block = dmz_start_block(zmd, zmd->sb[2].zone);
1303 zmd->sb[2].dev = dmz_zone_to_dev(zmd, zmd->sb[2].zone);
1304 ret = dmz_get_sb(zmd, 2);
1305 if (ret) {
1306 dmz_dev_err(zmd->sb[2].dev,
1307 "Read tertiary super block failed");
1308 return ret;
1309 }
1310 ret = dmz_check_sb(zmd, 2);
1311 if (ret == -EINVAL)
1312 return ret;
1313 }
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09001314 return 0;
1315}
1316
1317/*
1318 * Initialize a zone descriptor.
1319 */
Hannes Reineckebd5c4032020-05-11 10:24:30 +02001320static int dmz_init_zone(struct blk_zone *blkz, unsigned int num, void *data)
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09001321{
Christoph Hellwigd4100352019-11-11 11:39:30 +09001322 struct dmz_metadata *zmd = data;
Hannes Reineckebd5c4032020-05-11 10:24:30 +02001323 struct dmz_dev *dev = zmd->nr_devs > 1 ? &zmd->dev[1] : &zmd->dev[0];
1324 int idx = num + dev->zone_offset;
Christoph Hellwigd4100352019-11-11 11:39:30 +09001325 struct dm_zone *zone = &zmd->zones[idx];
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09001326
Hannes Reinecke36820562020-05-11 10:24:21 +02001327 if (blkz->len != zmd->zone_nr_sectors) {
Hannes Reineckebd5c4032020-05-11 10:24:30 +02001328 if (zmd->sb_version > 1) {
1329 /* Ignore the eventual runt (smaller) zone */
1330 set_bit(DMZ_OFFLINE, &zone->flags);
1331 return 0;
1332 } else if (blkz->start + blkz->len == dev->capacity)
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09001333 return 0;
1334 return -ENXIO;
1335 }
1336
1337 INIT_LIST_HEAD(&zone->link);
1338 atomic_set(&zone->refcount, 0);
Hannes Reineckeb7122872020-05-11 10:24:18 +02001339 zone->id = idx;
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09001340 zone->chunk = DMZ_MAP_UNMAPPED;
1341
Christoph Hellwigd4100352019-11-11 11:39:30 +09001342 switch (blkz->type) {
1343 case BLK_ZONE_TYPE_CONVENTIONAL:
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09001344 set_bit(DMZ_RND, &zone->flags);
Christoph Hellwigd4100352019-11-11 11:39:30 +09001345 break;
1346 case BLK_ZONE_TYPE_SEQWRITE_REQ:
1347 case BLK_ZONE_TYPE_SEQWRITE_PREF:
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09001348 set_bit(DMZ_SEQ, &zone->flags);
Christoph Hellwigd4100352019-11-11 11:39:30 +09001349 break;
1350 default:
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09001351 return -ENXIO;
Christoph Hellwigd4100352019-11-11 11:39:30 +09001352 }
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09001353
1354 if (dmz_is_rnd(zone))
1355 zone->wp_block = 0;
1356 else
1357 zone->wp_block = dmz_sect2blk(blkz->wp - blkz->start);
1358
Christoph Hellwigd4100352019-11-11 11:39:30 +09001359 if (blkz->cond == BLK_ZONE_COND_OFFLINE)
1360 set_bit(DMZ_OFFLINE, &zone->flags);
1361 else if (blkz->cond == BLK_ZONE_COND_READONLY)
1362 set_bit(DMZ_READ_ONLY, &zone->flags);
1363 else {
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09001364 zmd->nr_useable_zones++;
1365 if (dmz_is_rnd(zone)) {
1366 zmd->nr_rnd_zones++;
Hannes Reineckebd5c4032020-05-11 10:24:30 +02001367 if (zmd->nr_devs == 1 && !zmd->sb[0].zone) {
1368 /* Primary super block zone */
Hannes Reinecke735bd7e42020-05-11 10:24:19 +02001369 zmd->sb[0].zone = zone;
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09001370 }
1371 }
Hannes Reineckebd5c4032020-05-11 10:24:30 +02001372 if (zmd->nr_devs > 1 && !zmd->sb[2].zone) {
1373 /* Tertiary superblock zone */
1374 zmd->sb[2].zone = zone;
1375 }
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09001376 }
1377
1378 return 0;
1379}
1380
Hannes Reineckebd5c4032020-05-11 10:24:30 +02001381static void dmz_emulate_zones(struct dmz_metadata *zmd, struct dmz_dev *dev)
1382{
1383 int idx;
1384 sector_t zone_offset = 0;
1385
1386 for(idx = 0; idx < dev->nr_zones; idx++) {
1387 struct dm_zone *zone = &zmd->zones[idx];
1388
1389 INIT_LIST_HEAD(&zone->link);
1390 atomic_set(&zone->refcount, 0);
1391 zone->id = idx;
1392 zone->chunk = DMZ_MAP_UNMAPPED;
1393 set_bit(DMZ_RND, &zone->flags);
1394 zone->wp_block = 0;
1395 zmd->nr_rnd_zones++;
1396 zmd->nr_useable_zones++;
1397 if (dev->capacity - zone_offset < zmd->zone_nr_sectors) {
1398 /* Disable runt zone */
1399 set_bit(DMZ_OFFLINE, &zone->flags);
1400 break;
1401 }
1402 zone_offset += zmd->zone_nr_sectors;
1403 }
1404}
1405
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09001406/*
1407 * Free zones descriptors.
1408 */
1409static void dmz_drop_zones(struct dmz_metadata *zmd)
1410{
1411 kfree(zmd->zones);
1412 zmd->zones = NULL;
1413}
1414
1415/*
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09001416 * Allocate and initialize zone descriptors using the zone
1417 * information from disk.
1418 */
1419static int dmz_init_zones(struct dmz_metadata *zmd)
1420{
Hannes Reineckebd5c4032020-05-11 10:24:30 +02001421 int i, ret;
1422 struct dmz_dev *zoned_dev = &zmd->dev[0];
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09001423
1424 /* Init */
Hannes Reineckebd5c4032020-05-11 10:24:30 +02001425 zmd->zone_nr_sectors = zmd->dev[0].zone_nr_sectors;
Hannes Reinecke36820562020-05-11 10:24:21 +02001426 zmd->zone_nr_sectors_shift = ilog2(zmd->zone_nr_sectors);
1427 zmd->zone_nr_blocks = dmz_sect2blk(zmd->zone_nr_sectors);
1428 zmd->zone_nr_blocks_shift = ilog2(zmd->zone_nr_blocks);
1429 zmd->zone_bitmap_size = zmd->zone_nr_blocks >> 3;
Dmitry Fomichevb3996292019-12-23 17:05:46 -08001430 zmd->zone_nr_bitmap_blocks =
1431 max_t(sector_t, 1, zmd->zone_bitmap_size >> DMZ_BLOCK_SHIFT);
Hannes Reinecke36820562020-05-11 10:24:21 +02001432 zmd->zone_bits_per_mblk = min_t(sector_t, zmd->zone_nr_blocks,
Dmitry Fomichevb3996292019-12-23 17:05:46 -08001433 DMZ_BLOCK_SIZE_BITS);
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09001434
1435 /* Allocate zone array */
Hannes Reineckebd5c4032020-05-11 10:24:30 +02001436 zmd->nr_zones = 0;
1437 for (i = 0; i < zmd->nr_devs; i++)
1438 zmd->nr_zones += zmd->dev[i].nr_zones;
1439
1440 if (!zmd->nr_zones) {
1441 DMERR("(%s): No zones found", zmd->devname);
1442 return -ENXIO;
1443 }
Hannes Reinecke36820562020-05-11 10:24:21 +02001444 zmd->zones = kcalloc(zmd->nr_zones, sizeof(struct dm_zone), GFP_KERNEL);
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09001445 if (!zmd->zones)
1446 return -ENOMEM;
1447
Hannes Reineckeae3c1f12020-05-11 10:24:28 +02001448 DMDEBUG("(%s): Using %zu B for zone information",
1449 zmd->devname, sizeof(struct dm_zone) * zmd->nr_zones);
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09001450
Hannes Reineckebd5c4032020-05-11 10:24:30 +02001451 if (zmd->nr_devs > 1) {
1452 dmz_emulate_zones(zmd, &zmd->dev[0]);
1453 /*
1454 * Primary superblock zone is always at zone 0 when multiple
1455 * drives are present.
1456 */
1457 zmd->sb[0].zone = &zmd->zones[0];
1458
1459 zoned_dev = &zmd->dev[1];
1460 }
1461
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09001462 /*
Christoph Hellwigd4100352019-11-11 11:39:30 +09001463 * Get zone information and initialize zone descriptors. At the same
1464 * time, determine where the super block should be: first block of the
1465 * first randomly writable zone.
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09001466 */
Hannes Reineckebd5c4032020-05-11 10:24:30 +02001467 ret = blkdev_report_zones(zoned_dev->bdev, 0, BLK_ALL_ZONES,
1468 dmz_init_zone, zmd);
Christoph Hellwigd4100352019-11-11 11:39:30 +09001469 if (ret < 0) {
Hannes Reineckebd5c4032020-05-11 10:24:30 +02001470 DMDEBUG("(%s): Failed to report zones, error %d",
1471 zmd->devname, ret);
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09001472 dmz_drop_zones(zmd);
Christoph Hellwigd4100352019-11-11 11:39:30 +09001473 return ret;
1474 }
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09001475
Christoph Hellwigd4100352019-11-11 11:39:30 +09001476 return 0;
1477}
1478
1479static int dmz_update_zone_cb(struct blk_zone *blkz, unsigned int idx,
1480 void *data)
1481{
1482 struct dm_zone *zone = data;
1483
1484 clear_bit(DMZ_OFFLINE, &zone->flags);
1485 clear_bit(DMZ_READ_ONLY, &zone->flags);
1486 if (blkz->cond == BLK_ZONE_COND_OFFLINE)
1487 set_bit(DMZ_OFFLINE, &zone->flags);
1488 else if (blkz->cond == BLK_ZONE_COND_READONLY)
1489 set_bit(DMZ_READ_ONLY, &zone->flags);
1490
1491 if (dmz_is_seq(zone))
1492 zone->wp_block = dmz_sect2blk(blkz->wp - blkz->start);
1493 else
1494 zone->wp_block = 0;
1495 return 0;
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09001496}
1497
1498/*
1499 * Update a zone information.
1500 */
1501static int dmz_update_zone(struct dmz_metadata *zmd, struct dm_zone *zone)
1502{
Hannes Reineckeaa821c82020-05-11 10:24:26 +02001503 struct dmz_dev *dev = dmz_zone_to_dev(zmd, zone);
Damien Le Moalbd976e52019-07-01 14:09:16 +09001504 unsigned int noio_flag;
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09001505 int ret;
1506
Hannes Reineckebd5c4032020-05-11 10:24:30 +02001507 if (dev->flags & DMZ_BDEV_REGULAR)
1508 return 0;
1509
Damien Le Moalbd976e52019-07-01 14:09:16 +09001510 /*
1511 * Get zone information from disk. Since blkdev_report_zones() uses
1512 * GFP_KERNEL by default for memory allocations, set the per-task
1513 * PF_MEMALLOC_NOIO flag so that all allocations are done as if
1514 * GFP_NOIO was specified.
1515 */
1516 noio_flag = memalloc_noio_save();
Hannes Reineckeaa821c82020-05-11 10:24:26 +02001517 ret = blkdev_report_zones(dev->bdev, dmz_start_sect(zmd, zone), 1,
Christoph Hellwigd4100352019-11-11 11:39:30 +09001518 dmz_update_zone_cb, zone);
Damien Le Moalbd976e52019-07-01 14:09:16 +09001519 memalloc_noio_restore(noio_flag);
Christoph Hellwigd4100352019-11-11 11:39:30 +09001520
1521 if (ret == 0)
Damien Le Moal7aedf752019-04-18 18:03:07 +09001522 ret = -EIO;
Christoph Hellwigd4100352019-11-11 11:39:30 +09001523 if (ret < 0) {
Hannes Reineckeaa821c82020-05-11 10:24:26 +02001524 dmz_dev_err(dev, "Get zone %u report failed",
Hannes Reineckeb7122872020-05-11 10:24:18 +02001525 zone->id);
Hannes Reineckeaa821c82020-05-11 10:24:26 +02001526 dmz_check_bdev(dev);
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09001527 return ret;
1528 }
1529
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09001530 return 0;
1531}
1532
1533/*
1534 * Check a zone write pointer position when the zone is marked
1535 * with the sequential write error flag.
1536 */
1537static int dmz_handle_seq_write_err(struct dmz_metadata *zmd,
1538 struct dm_zone *zone)
1539{
Hannes Reineckeaa821c82020-05-11 10:24:26 +02001540 struct dmz_dev *dev = dmz_zone_to_dev(zmd, zone);
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09001541 unsigned int wp = 0;
1542 int ret;
1543
1544 wp = zone->wp_block;
1545 ret = dmz_update_zone(zmd, zone);
1546 if (ret)
1547 return ret;
1548
Hannes Reineckeaa821c82020-05-11 10:24:26 +02001549 dmz_dev_warn(dev, "Processing zone %u write error (zone wp %u/%u)",
Hannes Reineckeb7122872020-05-11 10:24:18 +02001550 zone->id, zone->wp_block, wp);
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09001551
1552 if (zone->wp_block < wp) {
1553 dmz_invalidate_blocks(zmd, zone, zone->wp_block,
1554 wp - zone->wp_block);
1555 }
1556
1557 return 0;
1558}
1559
1560static struct dm_zone *dmz_get(struct dmz_metadata *zmd, unsigned int zone_id)
1561{
1562 return &zmd->zones[zone_id];
1563}
1564
1565/*
1566 * Reset a zone write pointer.
1567 */
1568static int dmz_reset_zone(struct dmz_metadata *zmd, struct dm_zone *zone)
1569{
1570 int ret;
1571
1572 /*
1573 * Ignore offline zones, read only zones,
1574 * and conventional zones.
1575 */
1576 if (dmz_is_offline(zone) ||
1577 dmz_is_readonly(zone) ||
1578 dmz_is_rnd(zone))
1579 return 0;
1580
1581 if (!dmz_is_empty(zone) || dmz_seq_write_err(zone)) {
Hannes Reineckeaa821c82020-05-11 10:24:26 +02001582 struct dmz_dev *dev = dmz_zone_to_dev(zmd, zone);
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09001583
Ajay Joshi6c1b1da2019-10-27 23:05:45 +09001584 ret = blkdev_zone_mgmt(dev->bdev, REQ_OP_ZONE_RESET,
1585 dmz_start_sect(zmd, zone),
Hannes Reinecke36820562020-05-11 10:24:21 +02001586 zmd->zone_nr_sectors, GFP_NOIO);
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09001587 if (ret) {
1588 dmz_dev_err(dev, "Reset zone %u failed %d",
Hannes Reineckeb7122872020-05-11 10:24:18 +02001589 zone->id, ret);
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09001590 return ret;
1591 }
1592 }
1593
1594 /* Clear write error bit and rewind write pointer position */
1595 clear_bit(DMZ_SEQ_WRITE_ERR, &zone->flags);
1596 zone->wp_block = 0;
1597
1598 return 0;
1599}
1600
1601static void dmz_get_zone_weight(struct dmz_metadata *zmd, struct dm_zone *zone);
1602
1603/*
1604 * Initialize chunk mapping.
1605 */
1606static int dmz_load_mapping(struct dmz_metadata *zmd)
1607{
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09001608 struct dm_zone *dzone, *bzone;
1609 struct dmz_mblock *dmap_mblk = NULL;
1610 struct dmz_map *dmap;
1611 unsigned int i = 0, e = 0, chunk = 0;
1612 unsigned int dzone_id;
1613 unsigned int bzone_id;
1614
1615 /* Metadata block array for the chunk mapping table */
1616 zmd->map_mblk = kcalloc(zmd->nr_map_blocks,
1617 sizeof(struct dmz_mblk *), GFP_KERNEL);
1618 if (!zmd->map_mblk)
1619 return -ENOMEM;
1620
1621 /* Get chunk mapping table blocks and initialize zone mapping */
1622 while (chunk < zmd->nr_chunks) {
1623 if (!dmap_mblk) {
1624 /* Get mapping block */
1625 dmap_mblk = dmz_get_mblock(zmd, i + 1);
1626 if (IS_ERR(dmap_mblk))
1627 return PTR_ERR(dmap_mblk);
1628 zmd->map_mblk[i] = dmap_mblk;
1629 dmap = (struct dmz_map *) dmap_mblk->data;
1630 i++;
1631 e = 0;
1632 }
1633
1634 /* Check data zone */
1635 dzone_id = le32_to_cpu(dmap[e].dzone_id);
1636 if (dzone_id == DMZ_MAP_UNMAPPED)
1637 goto next;
1638
Hannes Reinecke36820562020-05-11 10:24:21 +02001639 if (dzone_id >= zmd->nr_zones) {
Hannes Reineckeca1a7042020-05-11 10:24:27 +02001640 dmz_zmd_err(zmd, "Chunk %u mapping: invalid data zone ID %u",
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09001641 chunk, dzone_id);
1642 return -EIO;
1643 }
1644
1645 dzone = dmz_get(zmd, dzone_id);
1646 set_bit(DMZ_DATA, &dzone->flags);
1647 dzone->chunk = chunk;
1648 dmz_get_zone_weight(zmd, dzone);
1649
1650 if (dmz_is_rnd(dzone))
1651 list_add_tail(&dzone->link, &zmd->map_rnd_list);
1652 else
1653 list_add_tail(&dzone->link, &zmd->map_seq_list);
1654
1655 /* Check buffer zone */
1656 bzone_id = le32_to_cpu(dmap[e].bzone_id);
1657 if (bzone_id == DMZ_MAP_UNMAPPED)
1658 goto next;
1659
Hannes Reinecke36820562020-05-11 10:24:21 +02001660 if (bzone_id >= zmd->nr_zones) {
Hannes Reineckeca1a7042020-05-11 10:24:27 +02001661 dmz_zmd_err(zmd, "Chunk %u mapping: invalid buffer zone ID %u",
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09001662 chunk, bzone_id);
1663 return -EIO;
1664 }
1665
1666 bzone = dmz_get(zmd, bzone_id);
1667 if (!dmz_is_rnd(bzone)) {
Hannes Reineckeca1a7042020-05-11 10:24:27 +02001668 dmz_zmd_err(zmd, "Chunk %u mapping: invalid buffer zone %u",
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09001669 chunk, bzone_id);
1670 return -EIO;
1671 }
1672
1673 set_bit(DMZ_DATA, &bzone->flags);
1674 set_bit(DMZ_BUF, &bzone->flags);
1675 bzone->chunk = chunk;
1676 bzone->bzone = dzone;
1677 dzone->bzone = bzone;
1678 dmz_get_zone_weight(zmd, bzone);
1679 list_add_tail(&bzone->link, &zmd->map_rnd_list);
1680next:
1681 chunk++;
1682 e++;
1683 if (e >= DMZ_MAP_ENTRIES)
1684 dmap_mblk = NULL;
1685 }
1686
1687 /*
1688 * At this point, only meta zones and mapped data zones were
1689 * fully initialized. All remaining zones are unmapped data
1690 * zones. Finish initializing those here.
1691 */
Hannes Reinecke36820562020-05-11 10:24:21 +02001692 for (i = 0; i < zmd->nr_zones; i++) {
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09001693 dzone = dmz_get(zmd, i);
1694 if (dmz_is_meta(dzone))
1695 continue;
1696
1697 if (dmz_is_rnd(dzone))
1698 zmd->nr_rnd++;
1699 else
1700 zmd->nr_seq++;
1701
1702 if (dmz_is_data(dzone)) {
1703 /* Already initialized */
1704 continue;
1705 }
1706
1707 /* Unmapped data zone */
1708 set_bit(DMZ_DATA, &dzone->flags);
1709 dzone->chunk = DMZ_MAP_UNMAPPED;
1710 if (dmz_is_rnd(dzone)) {
1711 list_add_tail(&dzone->link, &zmd->unmap_rnd_list);
1712 atomic_inc(&zmd->unmap_nr_rnd);
1713 } else if (atomic_read(&zmd->nr_reserved_seq_zones) < zmd->nr_reserved_seq) {
1714 list_add_tail(&dzone->link, &zmd->reserved_seq_zones_list);
1715 atomic_inc(&zmd->nr_reserved_seq_zones);
1716 zmd->nr_seq--;
1717 } else {
1718 list_add_tail(&dzone->link, &zmd->unmap_seq_list);
1719 atomic_inc(&zmd->unmap_nr_seq);
1720 }
1721 }
1722
1723 return 0;
1724}
1725
1726/*
1727 * Set a data chunk mapping.
1728 */
1729static void dmz_set_chunk_mapping(struct dmz_metadata *zmd, unsigned int chunk,
1730 unsigned int dzone_id, unsigned int bzone_id)
1731{
1732 struct dmz_mblock *dmap_mblk = zmd->map_mblk[chunk >> DMZ_MAP_ENTRIES_SHIFT];
1733 struct dmz_map *dmap = (struct dmz_map *) dmap_mblk->data;
1734 int map_idx = chunk & DMZ_MAP_ENTRIES_MASK;
1735
1736 dmap[map_idx].dzone_id = cpu_to_le32(dzone_id);
1737 dmap[map_idx].bzone_id = cpu_to_le32(bzone_id);
1738 dmz_dirty_mblock(zmd, dmap_mblk);
1739}
1740
1741/*
1742 * The list of mapped zones is maintained in LRU order.
1743 * This rotates a zone at the end of its map list.
1744 */
1745static void __dmz_lru_zone(struct dmz_metadata *zmd, struct dm_zone *zone)
1746{
1747 if (list_empty(&zone->link))
1748 return;
1749
1750 list_del_init(&zone->link);
1751 if (dmz_is_seq(zone)) {
1752 /* LRU rotate sequential zone */
1753 list_add_tail(&zone->link, &zmd->map_seq_list);
1754 } else {
1755 /* LRU rotate random zone */
1756 list_add_tail(&zone->link, &zmd->map_rnd_list);
1757 }
1758}
1759
1760/*
1761 * The list of mapped random zones is maintained
1762 * in LRU order. This rotates a zone at the end of the list.
1763 */
1764static void dmz_lru_zone(struct dmz_metadata *zmd, struct dm_zone *zone)
1765{
1766 __dmz_lru_zone(zmd, zone);
1767 if (zone->bzone)
1768 __dmz_lru_zone(zmd, zone->bzone);
1769}
1770
1771/*
1772 * Wait for any zone to be freed.
1773 */
1774static void dmz_wait_for_free_zones(struct dmz_metadata *zmd)
1775{
1776 DEFINE_WAIT(wait);
1777
1778 prepare_to_wait(&zmd->free_wq, &wait, TASK_UNINTERRUPTIBLE);
1779 dmz_unlock_map(zmd);
1780 dmz_unlock_metadata(zmd);
1781
1782 io_schedule_timeout(HZ);
1783
1784 dmz_lock_metadata(zmd);
1785 dmz_lock_map(zmd);
1786 finish_wait(&zmd->free_wq, &wait);
1787}
1788
1789/*
1790 * Lock a zone for reclaim (set the zone RECLAIM bit).
1791 * Returns false if the zone cannot be locked or if it is already locked
1792 * and 1 otherwise.
1793 */
1794int dmz_lock_zone_reclaim(struct dm_zone *zone)
1795{
1796 /* Active zones cannot be reclaimed */
1797 if (dmz_is_active(zone))
1798 return 0;
1799
1800 return !test_and_set_bit(DMZ_RECLAIM, &zone->flags);
1801}
1802
1803/*
1804 * Clear a zone reclaim flag.
1805 */
1806void dmz_unlock_zone_reclaim(struct dm_zone *zone)
1807{
1808 WARN_ON(dmz_is_active(zone));
1809 WARN_ON(!dmz_in_reclaim(zone));
1810
1811 clear_bit_unlock(DMZ_RECLAIM, &zone->flags);
1812 smp_mb__after_atomic();
1813 wake_up_bit(&zone->flags, DMZ_RECLAIM);
1814}
1815
1816/*
1817 * Wait for a zone reclaim to complete.
1818 */
1819static void dmz_wait_for_reclaim(struct dmz_metadata *zmd, struct dm_zone *zone)
1820{
1821 dmz_unlock_map(zmd);
1822 dmz_unlock_metadata(zmd);
1823 wait_on_bit_timeout(&zone->flags, DMZ_RECLAIM, TASK_UNINTERRUPTIBLE, HZ);
1824 dmz_lock_metadata(zmd);
1825 dmz_lock_map(zmd);
1826}
1827
1828/*
1829 * Select a random write zone for reclaim.
1830 */
1831static struct dm_zone *dmz_get_rnd_zone_for_reclaim(struct dmz_metadata *zmd)
1832{
1833 struct dm_zone *dzone = NULL;
1834 struct dm_zone *zone;
1835
1836 if (list_empty(&zmd->map_rnd_list))
Dmitry Fomichevb234c6d2019-08-10 14:43:09 -07001837 return ERR_PTR(-EBUSY);
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09001838
1839 list_for_each_entry(zone, &zmd->map_rnd_list, link) {
1840 if (dmz_is_buf(zone))
1841 dzone = zone->bzone;
1842 else
1843 dzone = zone;
1844 if (dmz_lock_zone_reclaim(dzone))
1845 return dzone;
1846 }
1847
Hannes Reinecke489dc0f2020-05-19 10:14:19 +02001848 return NULL;
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09001849}
1850
1851/*
1852 * Select a buffered sequential zone for reclaim.
1853 */
1854static struct dm_zone *dmz_get_seq_zone_for_reclaim(struct dmz_metadata *zmd)
1855{
1856 struct dm_zone *zone;
1857
1858 if (list_empty(&zmd->map_seq_list))
Dan Carpentere0702d92019-08-19 12:58:14 +03001859 return ERR_PTR(-EBUSY);
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09001860
1861 list_for_each_entry(zone, &zmd->map_seq_list, link) {
1862 if (!zone->bzone)
1863 continue;
1864 if (dmz_lock_zone_reclaim(zone))
1865 return zone;
1866 }
1867
Hannes Reinecke489dc0f2020-05-19 10:14:19 +02001868 return NULL;
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09001869}
1870
1871/*
1872 * Select a zone for reclaim.
1873 */
1874struct dm_zone *dmz_get_zone_for_reclaim(struct dmz_metadata *zmd)
1875{
1876 struct dm_zone *zone;
1877
1878 /*
1879 * Search for a zone candidate to reclaim: 2 cases are possible.
1880 * (1) There is no free sequential zones. Then a random data zone
1881 * cannot be reclaimed. So choose a sequential zone to reclaim so
1882 * that afterward a random zone can be reclaimed.
1883 * (2) At least one free sequential zone is available, then choose
1884 * the oldest random zone (data or buffer) that can be locked.
1885 */
1886 dmz_lock_map(zmd);
1887 if (list_empty(&zmd->reserved_seq_zones_list))
1888 zone = dmz_get_seq_zone_for_reclaim(zmd);
1889 else
1890 zone = dmz_get_rnd_zone_for_reclaim(zmd);
1891 dmz_unlock_map(zmd);
1892
1893 return zone;
1894}
1895
1896/*
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09001897 * Get the zone mapping a chunk, if the chunk is mapped already.
1898 * If no mapping exist and the operation is WRITE, a zone is
1899 * allocated and used to map the chunk.
1900 * The zone returned will be set to the active state.
1901 */
1902struct dm_zone *dmz_get_chunk_mapping(struct dmz_metadata *zmd, unsigned int chunk, int op)
1903{
1904 struct dmz_mblock *dmap_mblk = zmd->map_mblk[chunk >> DMZ_MAP_ENTRIES_SHIFT];
1905 struct dmz_map *dmap = (struct dmz_map *) dmap_mblk->data;
1906 int dmap_idx = chunk & DMZ_MAP_ENTRIES_MASK;
1907 unsigned int dzone_id;
1908 struct dm_zone *dzone = NULL;
1909 int ret = 0;
1910
1911 dmz_lock_map(zmd);
1912again:
1913 /* Get the chunk mapping */
1914 dzone_id = le32_to_cpu(dmap[dmap_idx].dzone_id);
1915 if (dzone_id == DMZ_MAP_UNMAPPED) {
1916 /*
1917 * Read or discard in unmapped chunks are fine. But for
1918 * writes, we need a mapping, so get one.
1919 */
1920 if (op != REQ_OP_WRITE)
1921 goto out;
1922
Dmitry Fomichevad1bd572019-08-02 15:02:51 -07001923 /* Allocate a random zone */
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09001924 dzone = dmz_alloc_zone(zmd, DMZ_ALLOC_RND);
1925 if (!dzone) {
Hannes Reinecked0e21ce2020-05-11 10:24:23 +02001926 if (dmz_dev_is_dying(zmd)) {
Dmitry Fomichev75d66ff2019-08-10 14:43:11 -07001927 dzone = ERR_PTR(-EIO);
1928 goto out;
1929 }
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09001930 dmz_wait_for_free_zones(zmd);
1931 goto again;
1932 }
1933
1934 dmz_map_zone(zmd, dzone, chunk);
1935
1936 } else {
1937 /* The chunk is already mapped: get the mapping zone */
1938 dzone = dmz_get(zmd, dzone_id);
1939 if (dzone->chunk != chunk) {
1940 dzone = ERR_PTR(-EIO);
1941 goto out;
1942 }
1943
1944 /* Repair write pointer if the sequential dzone has error */
1945 if (dmz_seq_write_err(dzone)) {
1946 ret = dmz_handle_seq_write_err(zmd, dzone);
1947 if (ret) {
1948 dzone = ERR_PTR(-EIO);
1949 goto out;
1950 }
1951 clear_bit(DMZ_SEQ_WRITE_ERR, &dzone->flags);
1952 }
1953 }
1954
1955 /*
1956 * If the zone is being reclaimed, the chunk mapping may change
1957 * to a different zone. So wait for reclaim and retry. Otherwise,
1958 * activate the zone (this will prevent reclaim from touching it).
1959 */
1960 if (dmz_in_reclaim(dzone)) {
1961 dmz_wait_for_reclaim(zmd, dzone);
1962 goto again;
1963 }
1964 dmz_activate_zone(dzone);
1965 dmz_lru_zone(zmd, dzone);
1966out:
1967 dmz_unlock_map(zmd);
1968
1969 return dzone;
1970}
1971
1972/*
1973 * Write and discard change the block validity of data zones and their buffer
1974 * zones. Check here that valid blocks are still present. If all blocks are
1975 * invalid, the zones can be unmapped on the fly without waiting for reclaim
1976 * to do it.
1977 */
1978void dmz_put_chunk_mapping(struct dmz_metadata *zmd, struct dm_zone *dzone)
1979{
1980 struct dm_zone *bzone;
1981
1982 dmz_lock_map(zmd);
1983
1984 bzone = dzone->bzone;
1985 if (bzone) {
1986 if (dmz_weight(bzone))
1987 dmz_lru_zone(zmd, bzone);
1988 else {
1989 /* Empty buffer zone: reclaim it */
1990 dmz_unmap_zone(zmd, bzone);
1991 dmz_free_zone(zmd, bzone);
1992 bzone = NULL;
1993 }
1994 }
1995
1996 /* Deactivate the data zone */
1997 dmz_deactivate_zone(dzone);
1998 if (dmz_is_active(dzone) || bzone || dmz_weight(dzone))
1999 dmz_lru_zone(zmd, dzone);
2000 else {
2001 /* Unbuffered inactive empty data zone: reclaim it */
2002 dmz_unmap_zone(zmd, dzone);
2003 dmz_free_zone(zmd, dzone);
2004 }
2005
2006 dmz_unlock_map(zmd);
2007}
2008
2009/*
2010 * Allocate and map a random zone to buffer a chunk
2011 * already mapped to a sequential zone.
2012 */
2013struct dm_zone *dmz_get_chunk_buffer(struct dmz_metadata *zmd,
2014 struct dm_zone *dzone)
2015{
2016 struct dm_zone *bzone;
2017
2018 dmz_lock_map(zmd);
2019again:
2020 bzone = dzone->bzone;
2021 if (bzone)
2022 goto out;
2023
Dmitry Fomichevad1bd572019-08-02 15:02:51 -07002024 /* Allocate a random zone */
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09002025 bzone = dmz_alloc_zone(zmd, DMZ_ALLOC_RND);
2026 if (!bzone) {
Hannes Reinecked0e21ce2020-05-11 10:24:23 +02002027 if (dmz_dev_is_dying(zmd)) {
Dmitry Fomichev75d66ff2019-08-10 14:43:11 -07002028 bzone = ERR_PTR(-EIO);
2029 goto out;
2030 }
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09002031 dmz_wait_for_free_zones(zmd);
2032 goto again;
2033 }
2034
2035 /* Update the chunk mapping */
Hannes Reineckeb7122872020-05-11 10:24:18 +02002036 dmz_set_chunk_mapping(zmd, dzone->chunk, dzone->id, bzone->id);
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09002037
2038 set_bit(DMZ_BUF, &bzone->flags);
2039 bzone->chunk = dzone->chunk;
2040 bzone->bzone = dzone;
2041 dzone->bzone = bzone;
2042 list_add_tail(&bzone->link, &zmd->map_rnd_list);
2043out:
2044 dmz_unlock_map(zmd);
2045
2046 return bzone;
2047}
2048
2049/*
2050 * Get an unmapped (free) zone.
2051 * This must be called with the mapping lock held.
2052 */
2053struct dm_zone *dmz_alloc_zone(struct dmz_metadata *zmd, unsigned long flags)
2054{
2055 struct list_head *list;
2056 struct dm_zone *zone;
2057
2058 if (flags & DMZ_ALLOC_RND)
2059 list = &zmd->unmap_rnd_list;
2060 else
2061 list = &zmd->unmap_seq_list;
2062again:
2063 if (list_empty(list)) {
2064 /*
2065 * No free zone: if this is for reclaim, allow using the
2066 * reserved sequential zones.
2067 */
2068 if (!(flags & DMZ_ALLOC_RECLAIM) ||
2069 list_empty(&zmd->reserved_seq_zones_list))
2070 return NULL;
2071
2072 zone = list_first_entry(&zmd->reserved_seq_zones_list,
2073 struct dm_zone, link);
2074 list_del_init(&zone->link);
2075 atomic_dec(&zmd->nr_reserved_seq_zones);
2076 return zone;
2077 }
2078
2079 zone = list_first_entry(list, struct dm_zone, link);
2080 list_del_init(&zone->link);
2081
2082 if (dmz_is_rnd(zone))
2083 atomic_dec(&zmd->unmap_nr_rnd);
2084 else
2085 atomic_dec(&zmd->unmap_nr_seq);
2086
2087 if (dmz_is_offline(zone)) {
Hannes Reineckeca1a7042020-05-11 10:24:27 +02002088 dmz_zmd_warn(zmd, "Zone %u is offline", zone->id);
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09002089 zone = NULL;
2090 goto again;
2091 }
Hannes Reineckedc076c82020-05-11 10:24:29 +02002092 if (dmz_is_meta(zone)) {
2093 struct dmz_dev *dev = dmz_zone_to_dev(zmd, zone);
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09002094
Hannes Reineckedc076c82020-05-11 10:24:29 +02002095 dmz_dev_warn(dev, "Zone %u has metadata", zone->id);
2096 zone = NULL;
2097 goto again;
2098 }
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09002099 return zone;
2100}
2101
2102/*
2103 * Free a zone.
2104 * This must be called with the mapping lock held.
2105 */
2106void dmz_free_zone(struct dmz_metadata *zmd, struct dm_zone *zone)
2107{
2108 /* If this is a sequential zone, reset it */
2109 if (dmz_is_seq(zone))
2110 dmz_reset_zone(zmd, zone);
2111
2112 /* Return the zone to its type unmap list */
2113 if (dmz_is_rnd(zone)) {
2114 list_add_tail(&zone->link, &zmd->unmap_rnd_list);
2115 atomic_inc(&zmd->unmap_nr_rnd);
2116 } else if (atomic_read(&zmd->nr_reserved_seq_zones) <
2117 zmd->nr_reserved_seq) {
2118 list_add_tail(&zone->link, &zmd->reserved_seq_zones_list);
2119 atomic_inc(&zmd->nr_reserved_seq_zones);
2120 } else {
2121 list_add_tail(&zone->link, &zmd->unmap_seq_list);
2122 atomic_inc(&zmd->unmap_nr_seq);
2123 }
2124
2125 wake_up_all(&zmd->free_wq);
2126}
2127
2128/*
2129 * Map a chunk to a zone.
2130 * This must be called with the mapping lock held.
2131 */
2132void dmz_map_zone(struct dmz_metadata *zmd, struct dm_zone *dzone,
2133 unsigned int chunk)
2134{
2135 /* Set the chunk mapping */
Hannes Reineckeb7122872020-05-11 10:24:18 +02002136 dmz_set_chunk_mapping(zmd, chunk, dzone->id,
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09002137 DMZ_MAP_UNMAPPED);
2138 dzone->chunk = chunk;
2139 if (dmz_is_rnd(dzone))
2140 list_add_tail(&dzone->link, &zmd->map_rnd_list);
2141 else
2142 list_add_tail(&dzone->link, &zmd->map_seq_list);
2143}
2144
2145/*
2146 * Unmap a zone.
2147 * This must be called with the mapping lock held.
2148 */
2149void dmz_unmap_zone(struct dmz_metadata *zmd, struct dm_zone *zone)
2150{
2151 unsigned int chunk = zone->chunk;
2152 unsigned int dzone_id;
2153
2154 if (chunk == DMZ_MAP_UNMAPPED) {
2155 /* Already unmapped */
2156 return;
2157 }
2158
2159 if (test_and_clear_bit(DMZ_BUF, &zone->flags)) {
2160 /*
2161 * Unmapping the chunk buffer zone: clear only
2162 * the chunk buffer mapping
2163 */
Hannes Reineckeb7122872020-05-11 10:24:18 +02002164 dzone_id = zone->bzone->id;
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09002165 zone->bzone->bzone = NULL;
2166 zone->bzone = NULL;
2167
2168 } else {
2169 /*
2170 * Unmapping the chunk data zone: the zone must
2171 * not be buffered.
2172 */
2173 if (WARN_ON(zone->bzone)) {
2174 zone->bzone->bzone = NULL;
2175 zone->bzone = NULL;
2176 }
2177 dzone_id = DMZ_MAP_UNMAPPED;
2178 }
2179
2180 dmz_set_chunk_mapping(zmd, chunk, dzone_id, DMZ_MAP_UNMAPPED);
2181
2182 zone->chunk = DMZ_MAP_UNMAPPED;
2183 list_del_init(&zone->link);
2184}
2185
2186/*
2187 * Set @nr_bits bits in @bitmap starting from @bit.
2188 * Return the number of bits changed from 0 to 1.
2189 */
2190static unsigned int dmz_set_bits(unsigned long *bitmap,
2191 unsigned int bit, unsigned int nr_bits)
2192{
2193 unsigned long *addr;
2194 unsigned int end = bit + nr_bits;
2195 unsigned int n = 0;
2196
2197 while (bit < end) {
2198 if (((bit & (BITS_PER_LONG - 1)) == 0) &&
2199 ((end - bit) >= BITS_PER_LONG)) {
2200 /* Try to set the whole word at once */
2201 addr = bitmap + BIT_WORD(bit);
2202 if (*addr == 0) {
2203 *addr = ULONG_MAX;
2204 n += BITS_PER_LONG;
2205 bit += BITS_PER_LONG;
2206 continue;
2207 }
2208 }
2209
2210 if (!test_and_set_bit(bit, bitmap))
2211 n++;
2212 bit++;
2213 }
2214
2215 return n;
2216}
2217
2218/*
2219 * Get the bitmap block storing the bit for chunk_block in zone.
2220 */
2221static struct dmz_mblock *dmz_get_bitmap(struct dmz_metadata *zmd,
2222 struct dm_zone *zone,
2223 sector_t chunk_block)
2224{
2225 sector_t bitmap_block = 1 + zmd->nr_map_blocks +
Hannes Reineckeb7122872020-05-11 10:24:18 +02002226 (sector_t)(zone->id * zmd->zone_nr_bitmap_blocks) +
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09002227 (chunk_block >> DMZ_BLOCK_SHIFT_BITS);
2228
2229 return dmz_get_mblock(zmd, bitmap_block);
2230}
2231
2232/*
2233 * Copy the valid blocks bitmap of from_zone to the bitmap of to_zone.
2234 */
2235int dmz_copy_valid_blocks(struct dmz_metadata *zmd, struct dm_zone *from_zone,
2236 struct dm_zone *to_zone)
2237{
2238 struct dmz_mblock *from_mblk, *to_mblk;
2239 sector_t chunk_block = 0;
2240
2241 /* Get the zones bitmap blocks */
Hannes Reinecke36820562020-05-11 10:24:21 +02002242 while (chunk_block < zmd->zone_nr_blocks) {
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09002243 from_mblk = dmz_get_bitmap(zmd, from_zone, chunk_block);
2244 if (IS_ERR(from_mblk))
2245 return PTR_ERR(from_mblk);
2246 to_mblk = dmz_get_bitmap(zmd, to_zone, chunk_block);
2247 if (IS_ERR(to_mblk)) {
2248 dmz_release_mblock(zmd, from_mblk);
2249 return PTR_ERR(to_mblk);
2250 }
2251
2252 memcpy(to_mblk->data, from_mblk->data, DMZ_BLOCK_SIZE);
2253 dmz_dirty_mblock(zmd, to_mblk);
2254
2255 dmz_release_mblock(zmd, to_mblk);
2256 dmz_release_mblock(zmd, from_mblk);
2257
Dmitry Fomichevb3996292019-12-23 17:05:46 -08002258 chunk_block += zmd->zone_bits_per_mblk;
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09002259 }
2260
2261 to_zone->weight = from_zone->weight;
2262
2263 return 0;
2264}
2265
2266/*
2267 * Merge the valid blocks bitmap of from_zone into the bitmap of to_zone,
2268 * starting from chunk_block.
2269 */
2270int dmz_merge_valid_blocks(struct dmz_metadata *zmd, struct dm_zone *from_zone,
2271 struct dm_zone *to_zone, sector_t chunk_block)
2272{
2273 unsigned int nr_blocks;
2274 int ret;
2275
2276 /* Get the zones bitmap blocks */
Hannes Reinecke36820562020-05-11 10:24:21 +02002277 while (chunk_block < zmd->zone_nr_blocks) {
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09002278 /* Get a valid region from the source zone */
2279 ret = dmz_first_valid_block(zmd, from_zone, &chunk_block);
2280 if (ret <= 0)
2281 return ret;
2282
2283 nr_blocks = ret;
2284 ret = dmz_validate_blocks(zmd, to_zone, chunk_block, nr_blocks);
2285 if (ret)
2286 return ret;
2287
2288 chunk_block += nr_blocks;
2289 }
2290
2291 return 0;
2292}
2293
2294/*
2295 * Validate all the blocks in the range [block..block+nr_blocks-1].
2296 */
2297int dmz_validate_blocks(struct dmz_metadata *zmd, struct dm_zone *zone,
2298 sector_t chunk_block, unsigned int nr_blocks)
2299{
2300 unsigned int count, bit, nr_bits;
Hannes Reinecke36820562020-05-11 10:24:21 +02002301 unsigned int zone_nr_blocks = zmd->zone_nr_blocks;
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09002302 struct dmz_mblock *mblk;
2303 unsigned int n = 0;
2304
Hannes Reineckeca1a7042020-05-11 10:24:27 +02002305 dmz_zmd_debug(zmd, "=> VALIDATE zone %u, block %llu, %u blocks",
Hannes Reineckeb7122872020-05-11 10:24:18 +02002306 zone->id, (unsigned long long)chunk_block,
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09002307 nr_blocks);
2308
2309 WARN_ON(chunk_block + nr_blocks > zone_nr_blocks);
2310
2311 while (nr_blocks) {
2312 /* Get bitmap block */
2313 mblk = dmz_get_bitmap(zmd, zone, chunk_block);
2314 if (IS_ERR(mblk))
2315 return PTR_ERR(mblk);
2316
2317 /* Set bits */
2318 bit = chunk_block & DMZ_BLOCK_MASK_BITS;
Dmitry Fomichevb3996292019-12-23 17:05:46 -08002319 nr_bits = min(nr_blocks, zmd->zone_bits_per_mblk - bit);
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09002320
2321 count = dmz_set_bits((unsigned long *)mblk->data, bit, nr_bits);
2322 if (count) {
2323 dmz_dirty_mblock(zmd, mblk);
2324 n += count;
2325 }
2326 dmz_release_mblock(zmd, mblk);
2327
2328 nr_blocks -= nr_bits;
2329 chunk_block += nr_bits;
2330 }
2331
2332 if (likely(zone->weight + n <= zone_nr_blocks))
2333 zone->weight += n;
2334 else {
Hannes Reineckeca1a7042020-05-11 10:24:27 +02002335 dmz_zmd_warn(zmd, "Zone %u: weight %u should be <= %u",
Hannes Reineckeb7122872020-05-11 10:24:18 +02002336 zone->id, zone->weight,
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09002337 zone_nr_blocks - n);
2338 zone->weight = zone_nr_blocks;
2339 }
2340
2341 return 0;
2342}
2343
2344/*
2345 * Clear nr_bits bits in bitmap starting from bit.
2346 * Return the number of bits cleared.
2347 */
2348static int dmz_clear_bits(unsigned long *bitmap, int bit, int nr_bits)
2349{
2350 unsigned long *addr;
2351 int end = bit + nr_bits;
2352 int n = 0;
2353
2354 while (bit < end) {
2355 if (((bit & (BITS_PER_LONG - 1)) == 0) &&
2356 ((end - bit) >= BITS_PER_LONG)) {
2357 /* Try to clear whole word at once */
2358 addr = bitmap + BIT_WORD(bit);
2359 if (*addr == ULONG_MAX) {
2360 *addr = 0;
2361 n += BITS_PER_LONG;
2362 bit += BITS_PER_LONG;
2363 continue;
2364 }
2365 }
2366
2367 if (test_and_clear_bit(bit, bitmap))
2368 n++;
2369 bit++;
2370 }
2371
2372 return n;
2373}
2374
2375/*
2376 * Invalidate all the blocks in the range [block..block+nr_blocks-1].
2377 */
2378int dmz_invalidate_blocks(struct dmz_metadata *zmd, struct dm_zone *zone,
2379 sector_t chunk_block, unsigned int nr_blocks)
2380{
2381 unsigned int count, bit, nr_bits;
2382 struct dmz_mblock *mblk;
2383 unsigned int n = 0;
2384
Hannes Reineckeca1a7042020-05-11 10:24:27 +02002385 dmz_zmd_debug(zmd, "=> INVALIDATE zone %u, block %llu, %u blocks",
Hannes Reineckeb7122872020-05-11 10:24:18 +02002386 zone->id, (u64)chunk_block, nr_blocks);
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09002387
Hannes Reinecke36820562020-05-11 10:24:21 +02002388 WARN_ON(chunk_block + nr_blocks > zmd->zone_nr_blocks);
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09002389
2390 while (nr_blocks) {
2391 /* Get bitmap block */
2392 mblk = dmz_get_bitmap(zmd, zone, chunk_block);
2393 if (IS_ERR(mblk))
2394 return PTR_ERR(mblk);
2395
2396 /* Clear bits */
2397 bit = chunk_block & DMZ_BLOCK_MASK_BITS;
Dmitry Fomichevb3996292019-12-23 17:05:46 -08002398 nr_bits = min(nr_blocks, zmd->zone_bits_per_mblk - bit);
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09002399
2400 count = dmz_clear_bits((unsigned long *)mblk->data,
2401 bit, nr_bits);
2402 if (count) {
2403 dmz_dirty_mblock(zmd, mblk);
2404 n += count;
2405 }
2406 dmz_release_mblock(zmd, mblk);
2407
2408 nr_blocks -= nr_bits;
2409 chunk_block += nr_bits;
2410 }
2411
2412 if (zone->weight >= n)
2413 zone->weight -= n;
2414 else {
Hannes Reineckeca1a7042020-05-11 10:24:27 +02002415 dmz_zmd_warn(zmd, "Zone %u: weight %u should be >= %u",
Hannes Reineckeb7122872020-05-11 10:24:18 +02002416 zone->id, zone->weight, n);
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09002417 zone->weight = 0;
2418 }
2419
2420 return 0;
2421}
2422
2423/*
2424 * Get a block bit value.
2425 */
2426static int dmz_test_block(struct dmz_metadata *zmd, struct dm_zone *zone,
2427 sector_t chunk_block)
2428{
2429 struct dmz_mblock *mblk;
2430 int ret;
2431
Hannes Reinecke36820562020-05-11 10:24:21 +02002432 WARN_ON(chunk_block >= zmd->zone_nr_blocks);
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09002433
2434 /* Get bitmap block */
2435 mblk = dmz_get_bitmap(zmd, zone, chunk_block);
2436 if (IS_ERR(mblk))
2437 return PTR_ERR(mblk);
2438
2439 /* Get offset */
2440 ret = test_bit(chunk_block & DMZ_BLOCK_MASK_BITS,
2441 (unsigned long *) mblk->data) != 0;
2442
2443 dmz_release_mblock(zmd, mblk);
2444
2445 return ret;
2446}
2447
2448/*
2449 * Return the number of blocks from chunk_block to the first block with a bit
2450 * value specified by set. Search at most nr_blocks blocks from chunk_block.
2451 */
2452static int dmz_to_next_set_block(struct dmz_metadata *zmd, struct dm_zone *zone,
2453 sector_t chunk_block, unsigned int nr_blocks,
2454 int set)
2455{
2456 struct dmz_mblock *mblk;
2457 unsigned int bit, set_bit, nr_bits;
Dmitry Fomichevb3996292019-12-23 17:05:46 -08002458 unsigned int zone_bits = zmd->zone_bits_per_mblk;
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09002459 unsigned long *bitmap;
2460 int n = 0;
2461
Hannes Reinecke36820562020-05-11 10:24:21 +02002462 WARN_ON(chunk_block + nr_blocks > zmd->zone_nr_blocks);
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09002463
2464 while (nr_blocks) {
2465 /* Get bitmap block */
2466 mblk = dmz_get_bitmap(zmd, zone, chunk_block);
2467 if (IS_ERR(mblk))
2468 return PTR_ERR(mblk);
2469
2470 /* Get offset */
2471 bitmap = (unsigned long *) mblk->data;
2472 bit = chunk_block & DMZ_BLOCK_MASK_BITS;
Dmitry Fomichevb3996292019-12-23 17:05:46 -08002473 nr_bits = min(nr_blocks, zone_bits - bit);
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09002474 if (set)
Dmitry Fomichevb3996292019-12-23 17:05:46 -08002475 set_bit = find_next_bit(bitmap, zone_bits, bit);
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09002476 else
Dmitry Fomichevb3996292019-12-23 17:05:46 -08002477 set_bit = find_next_zero_bit(bitmap, zone_bits, bit);
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09002478 dmz_release_mblock(zmd, mblk);
2479
2480 n += set_bit - bit;
Dmitry Fomichevb3996292019-12-23 17:05:46 -08002481 if (set_bit < zone_bits)
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09002482 break;
2483
2484 nr_blocks -= nr_bits;
2485 chunk_block += nr_bits;
2486 }
2487
2488 return n;
2489}
2490
2491/*
2492 * Test if chunk_block is valid. If it is, the number of consecutive
2493 * valid blocks from chunk_block will be returned.
2494 */
2495int dmz_block_valid(struct dmz_metadata *zmd, struct dm_zone *zone,
2496 sector_t chunk_block)
2497{
2498 int valid;
2499
2500 valid = dmz_test_block(zmd, zone, chunk_block);
2501 if (valid <= 0)
2502 return valid;
2503
2504 /* The block is valid: get the number of valid blocks from block */
2505 return dmz_to_next_set_block(zmd, zone, chunk_block,
Hannes Reinecke36820562020-05-11 10:24:21 +02002506 zmd->zone_nr_blocks - chunk_block, 0);
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09002507}
2508
2509/*
2510 * Find the first valid block from @chunk_block in @zone.
2511 * If such a block is found, its number is returned using
2512 * @chunk_block and the total number of valid blocks from @chunk_block
2513 * is returned.
2514 */
2515int dmz_first_valid_block(struct dmz_metadata *zmd, struct dm_zone *zone,
2516 sector_t *chunk_block)
2517{
2518 sector_t start_block = *chunk_block;
2519 int ret;
2520
2521 ret = dmz_to_next_set_block(zmd, zone, start_block,
Hannes Reinecke36820562020-05-11 10:24:21 +02002522 zmd->zone_nr_blocks - start_block, 1);
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09002523 if (ret < 0)
2524 return ret;
2525
2526 start_block += ret;
2527 *chunk_block = start_block;
2528
2529 return dmz_to_next_set_block(zmd, zone, start_block,
Hannes Reinecke36820562020-05-11 10:24:21 +02002530 zmd->zone_nr_blocks - start_block, 0);
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09002531}
2532
2533/*
2534 * Count the number of bits set starting from bit up to bit + nr_bits - 1.
2535 */
2536static int dmz_count_bits(void *bitmap, int bit, int nr_bits)
2537{
2538 unsigned long *addr;
2539 int end = bit + nr_bits;
2540 int n = 0;
2541
2542 while (bit < end) {
2543 if (((bit & (BITS_PER_LONG - 1)) == 0) &&
2544 ((end - bit) >= BITS_PER_LONG)) {
2545 addr = (unsigned long *)bitmap + BIT_WORD(bit);
2546 if (*addr == ULONG_MAX) {
2547 n += BITS_PER_LONG;
2548 bit += BITS_PER_LONG;
2549 continue;
2550 }
2551 }
2552
2553 if (test_bit(bit, bitmap))
2554 n++;
2555 bit++;
2556 }
2557
2558 return n;
2559}
2560
2561/*
2562 * Get a zone weight.
2563 */
2564static void dmz_get_zone_weight(struct dmz_metadata *zmd, struct dm_zone *zone)
2565{
2566 struct dmz_mblock *mblk;
2567 sector_t chunk_block = 0;
2568 unsigned int bit, nr_bits;
Hannes Reinecke36820562020-05-11 10:24:21 +02002569 unsigned int nr_blocks = zmd->zone_nr_blocks;
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09002570 void *bitmap;
2571 int n = 0;
2572
2573 while (nr_blocks) {
2574 /* Get bitmap block */
2575 mblk = dmz_get_bitmap(zmd, zone, chunk_block);
2576 if (IS_ERR(mblk)) {
2577 n = 0;
2578 break;
2579 }
2580
2581 /* Count bits in this block */
2582 bitmap = mblk->data;
2583 bit = chunk_block & DMZ_BLOCK_MASK_BITS;
Dmitry Fomichevb3996292019-12-23 17:05:46 -08002584 nr_bits = min(nr_blocks, zmd->zone_bits_per_mblk - bit);
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09002585 n += dmz_count_bits(bitmap, bit, nr_bits);
2586
2587 dmz_release_mblock(zmd, mblk);
2588
2589 nr_blocks -= nr_bits;
2590 chunk_block += nr_bits;
2591 }
2592
2593 zone->weight = n;
2594}
2595
2596/*
2597 * Cleanup the zoned metadata resources.
2598 */
2599static void dmz_cleanup_metadata(struct dmz_metadata *zmd)
2600{
2601 struct rb_root *root;
2602 struct dmz_mblock *mblk, *next;
2603 int i;
2604
2605 /* Release zone mapping resources */
2606 if (zmd->map_mblk) {
2607 for (i = 0; i < zmd->nr_map_blocks; i++)
2608 dmz_release_mblock(zmd, zmd->map_mblk[i]);
2609 kfree(zmd->map_mblk);
2610 zmd->map_mblk = NULL;
2611 }
2612
2613 /* Release super blocks */
2614 for (i = 0; i < 2; i++) {
2615 if (zmd->sb[i].mblk) {
2616 dmz_free_mblock(zmd, zmd->sb[i].mblk);
2617 zmd->sb[i].mblk = NULL;
2618 }
2619 }
2620
2621 /* Free cached blocks */
2622 while (!list_empty(&zmd->mblk_dirty_list)) {
2623 mblk = list_first_entry(&zmd->mblk_dirty_list,
2624 struct dmz_mblock, link);
Hannes Reineckeca1a7042020-05-11 10:24:27 +02002625 dmz_zmd_warn(zmd, "mblock %llu still in dirty list (ref %u)",
Damien Le Moal33c28652018-10-17 18:05:07 +09002626 (u64)mblk->no, mblk->ref);
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09002627 list_del_init(&mblk->link);
2628 rb_erase(&mblk->node, &zmd->mblk_rbtree);
2629 dmz_free_mblock(zmd, mblk);
2630 }
2631
2632 while (!list_empty(&zmd->mblk_lru_list)) {
2633 mblk = list_first_entry(&zmd->mblk_lru_list,
2634 struct dmz_mblock, link);
2635 list_del_init(&mblk->link);
2636 rb_erase(&mblk->node, &zmd->mblk_rbtree);
2637 dmz_free_mblock(zmd, mblk);
2638 }
2639
2640 /* Sanity checks: the mblock rbtree should now be empty */
2641 root = &zmd->mblk_rbtree;
2642 rbtree_postorder_for_each_entry_safe(mblk, next, root, node) {
Hannes Reineckeca1a7042020-05-11 10:24:27 +02002643 dmz_zmd_warn(zmd, "mblock %llu ref %u still in rbtree",
Damien Le Moal33c28652018-10-17 18:05:07 +09002644 (u64)mblk->no, mblk->ref);
2645 mblk->ref = 0;
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09002646 dmz_free_mblock(zmd, mblk);
2647 }
2648
2649 /* Free the zone descriptors */
2650 dmz_drop_zones(zmd);
Mike Snitzerd5ffebd2018-01-05 21:17:20 -05002651
2652 mutex_destroy(&zmd->mblk_flush_lock);
2653 mutex_destroy(&zmd->map_lock);
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09002654}
2655
Hannes Reineckeca1a7042020-05-11 10:24:27 +02002656static void dmz_print_dev(struct dmz_metadata *zmd, int num)
2657{
2658 struct dmz_dev *dev = &zmd->dev[num];
2659
Hannes Reineckebd5c4032020-05-11 10:24:30 +02002660 if (bdev_zoned_model(dev->bdev) == BLK_ZONED_NONE)
2661 dmz_dev_info(dev, "Regular block device");
2662 else
2663 dmz_dev_info(dev, "Host-%s zoned block device",
2664 bdev_zoned_model(dev->bdev) == BLK_ZONED_HA ?
2665 "aware" : "managed");
2666 if (zmd->sb_version > 1) {
2667 sector_t sector_offset =
2668 dev->zone_offset << zmd->zone_nr_sectors_shift;
2669
2670 dmz_dev_info(dev, " %llu 512-byte logical sectors (offset %llu)",
2671 (u64)dev->capacity, (u64)sector_offset);
2672 dmz_dev_info(dev, " %u zones of %llu 512-byte logical sectors (offset %llu)",
2673 dev->nr_zones, (u64)zmd->zone_nr_sectors,
2674 (u64)dev->zone_offset);
2675 } else {
2676 dmz_dev_info(dev, " %llu 512-byte logical sectors",
2677 (u64)dev->capacity);
2678 dmz_dev_info(dev, " %u zones of %llu 512-byte logical sectors",
2679 dev->nr_zones, (u64)zmd->zone_nr_sectors);
2680 }
Hannes Reineckeca1a7042020-05-11 10:24:27 +02002681}
2682
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09002683/*
2684 * Initialize the zoned metadata.
2685 */
Hannes Reineckebd5c4032020-05-11 10:24:30 +02002686int dmz_ctr_metadata(struct dmz_dev *dev, int num_dev,
2687 struct dmz_metadata **metadata,
Hannes Reinecke2234e732020-05-11 10:24:22 +02002688 const char *devname)
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09002689{
2690 struct dmz_metadata *zmd;
Hannes Reineckeb7122872020-05-11 10:24:18 +02002691 unsigned int i;
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09002692 struct dm_zone *zone;
2693 int ret;
2694
2695 zmd = kzalloc(sizeof(struct dmz_metadata), GFP_KERNEL);
2696 if (!zmd)
2697 return -ENOMEM;
2698
Hannes Reinecke2234e732020-05-11 10:24:22 +02002699 strcpy(zmd->devname, devname);
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09002700 zmd->dev = dev;
Hannes Reineckebd5c4032020-05-11 10:24:30 +02002701 zmd->nr_devs = num_dev;
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09002702 zmd->mblk_rbtree = RB_ROOT;
2703 init_rwsem(&zmd->mblk_sem);
2704 mutex_init(&zmd->mblk_flush_lock);
2705 spin_lock_init(&zmd->mblk_lock);
2706 INIT_LIST_HEAD(&zmd->mblk_lru_list);
2707 INIT_LIST_HEAD(&zmd->mblk_dirty_list);
2708
2709 mutex_init(&zmd->map_lock);
2710 atomic_set(&zmd->unmap_nr_rnd, 0);
2711 INIT_LIST_HEAD(&zmd->unmap_rnd_list);
2712 INIT_LIST_HEAD(&zmd->map_rnd_list);
2713
2714 atomic_set(&zmd->unmap_nr_seq, 0);
2715 INIT_LIST_HEAD(&zmd->unmap_seq_list);
2716 INIT_LIST_HEAD(&zmd->map_seq_list);
2717
2718 atomic_set(&zmd->nr_reserved_seq_zones, 0);
2719 INIT_LIST_HEAD(&zmd->reserved_seq_zones_list);
2720
2721 init_waitqueue_head(&zmd->free_wq);
2722
2723 /* Initialize zone descriptors */
2724 ret = dmz_init_zones(zmd);
2725 if (ret)
2726 goto err;
2727
2728 /* Get super block */
2729 ret = dmz_load_sb(zmd);
2730 if (ret)
2731 goto err;
2732
2733 /* Set metadata zones starting from sb_zone */
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09002734 for (i = 0; i < zmd->nr_meta_zones << 1; i++) {
Hannes Reinecke735bd7e42020-05-11 10:24:19 +02002735 zone = dmz_get(zmd, zmd->sb[0].zone->id + i);
Hannes Reineckebd5c4032020-05-11 10:24:30 +02002736 if (!dmz_is_rnd(zone)) {
2737 dmz_zmd_err(zmd,
2738 "metadata zone %d is not random", i);
2739 ret = -ENXIO;
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09002740 goto err;
Hannes Reineckebd5c4032020-05-11 10:24:30 +02002741 }
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09002742 set_bit(DMZ_META, &zone->flags);
2743 }
Hannes Reineckebd5c4032020-05-11 10:24:30 +02002744 if (zmd->sb[2].zone) {
2745 zone = dmz_get(zmd, zmd->sb[2].zone->id);
2746 if (!zone) {
2747 dmz_zmd_err(zmd,
2748 "Tertiary metadata zone not present");
2749 ret = -ENXIO;
2750 goto err;
2751 }
2752 set_bit(DMZ_META, &zone->flags);
2753 }
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09002754 /* Load mapping table */
2755 ret = dmz_load_mapping(zmd);
2756 if (ret)
2757 goto err;
2758
2759 /*
2760 * Cache size boundaries: allow at least 2 super blocks, the chunk map
2761 * blocks and enough blocks to be able to cache the bitmap blocks of
2762 * up to 16 zones when idle (min_nr_mblks). Otherwise, if busy, allow
2763 * the cache to add 512 more metadata blocks.
2764 */
2765 zmd->min_nr_mblks = 2 + zmd->nr_map_blocks + zmd->zone_nr_bitmap_blocks * 16;
2766 zmd->max_nr_mblks = zmd->min_nr_mblks + 512;
2767 zmd->mblk_shrinker.count_objects = dmz_mblock_shrinker_count;
2768 zmd->mblk_shrinker.scan_objects = dmz_mblock_shrinker_scan;
2769 zmd->mblk_shrinker.seeks = DEFAULT_SEEKS;
2770
2771 /* Metadata cache shrinker */
2772 ret = register_shrinker(&zmd->mblk_shrinker);
2773 if (ret) {
Hannes Reineckeca1a7042020-05-11 10:24:27 +02002774 dmz_zmd_err(zmd, "Register metadata cache shrinker failed");
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09002775 goto err;
2776 }
2777
Hannes Reineckebd5c4032020-05-11 10:24:30 +02002778 dmz_zmd_info(zmd, "DM-Zoned metadata version %d", zmd->sb_version);
2779 for (i = 0; i < zmd->nr_devs; i++)
2780 dmz_print_dev(zmd, i);
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09002781
Hannes Reineckeca1a7042020-05-11 10:24:27 +02002782 dmz_zmd_info(zmd, " %u zones of %llu 512-byte logical sectors",
2783 zmd->nr_zones, (u64)zmd->zone_nr_sectors);
Hannes Reineckeae3c1f12020-05-11 10:24:28 +02002784 dmz_zmd_debug(zmd, " %u metadata zones",
2785 zmd->nr_meta_zones * 2);
2786 dmz_zmd_debug(zmd, " %u data zones for %u chunks",
2787 zmd->nr_data_zones, zmd->nr_chunks);
2788 dmz_zmd_debug(zmd, " %u random zones (%u unmapped)",
2789 zmd->nr_rnd, atomic_read(&zmd->unmap_nr_rnd));
2790 dmz_zmd_debug(zmd, " %u sequential zones (%u unmapped)",
2791 zmd->nr_seq, atomic_read(&zmd->unmap_nr_seq));
2792 dmz_zmd_debug(zmd, " %u reserved sequential data zones",
2793 zmd->nr_reserved_seq);
Hannes Reineckeca1a7042020-05-11 10:24:27 +02002794 dmz_zmd_debug(zmd, "Format:");
2795 dmz_zmd_debug(zmd, "%u metadata blocks per set (%u max cache)",
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09002796 zmd->nr_meta_blocks, zmd->max_nr_mblks);
Hannes Reineckeca1a7042020-05-11 10:24:27 +02002797 dmz_zmd_debug(zmd, " %u data zone mapping blocks",
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09002798 zmd->nr_map_blocks);
Hannes Reineckeca1a7042020-05-11 10:24:27 +02002799 dmz_zmd_debug(zmd, " %u bitmap blocks",
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09002800 zmd->nr_bitmap_blocks);
2801
2802 *metadata = zmd;
2803
2804 return 0;
2805err:
2806 dmz_cleanup_metadata(zmd);
2807 kfree(zmd);
2808 *metadata = NULL;
2809
2810 return ret;
2811}
2812
2813/*
2814 * Cleanup the zoned metadata resources.
2815 */
2816void dmz_dtr_metadata(struct dmz_metadata *zmd)
2817{
2818 unregister_shrinker(&zmd->mblk_shrinker);
2819 dmz_cleanup_metadata(zmd);
2820 kfree(zmd);
2821}
2822
2823/*
2824 * Check zone information on resume.
2825 */
2826int dmz_resume_metadata(struct dmz_metadata *zmd)
2827{
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09002828 struct dm_zone *zone;
2829 sector_t wp_block;
2830 unsigned int i;
2831 int ret;
2832
2833 /* Check zones */
Hannes Reinecke36820562020-05-11 10:24:21 +02002834 for (i = 0; i < zmd->nr_zones; i++) {
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09002835 zone = dmz_get(zmd, i);
2836 if (!zone) {
Hannes Reineckeca1a7042020-05-11 10:24:27 +02002837 dmz_zmd_err(zmd, "Unable to get zone %u", i);
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09002838 return -EIO;
2839 }
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09002840 wp_block = zone->wp_block;
2841
2842 ret = dmz_update_zone(zmd, zone);
2843 if (ret) {
Hannes Reineckeca1a7042020-05-11 10:24:27 +02002844 dmz_zmd_err(zmd, "Broken zone %u", i);
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09002845 return ret;
2846 }
2847
2848 if (dmz_is_offline(zone)) {
Hannes Reineckeca1a7042020-05-11 10:24:27 +02002849 dmz_zmd_warn(zmd, "Zone %u is offline", i);
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09002850 continue;
2851 }
2852
2853 /* Check write pointer */
2854 if (!dmz_is_seq(zone))
2855 zone->wp_block = 0;
2856 else if (zone->wp_block != wp_block) {
Hannes Reineckeca1a7042020-05-11 10:24:27 +02002857 dmz_zmd_err(zmd, "Zone %u: Invalid wp (%llu / %llu)",
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09002858 i, (u64)zone->wp_block, (u64)wp_block);
2859 zone->wp_block = wp_block;
2860 dmz_invalidate_blocks(zmd, zone, zone->wp_block,
Hannes Reinecke36820562020-05-11 10:24:21 +02002861 zmd->zone_nr_blocks - zone->wp_block);
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09002862 }
2863 }
2864
2865 return 0;
2866}