blob: b32e791b8a5c0eef660b2f8e1d7b07e1c1d250a3 [file] [log] [blame]
Dmitry Fomichevbae9a0a2019-08-02 15:02:50 -07001// SPDX-License-Identifier: GPL-2.0-only
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09002/*
3 * Copyright (C) 2017 Western Digital Corporation or its affiliates.
4 *
5 * This file is released under the GPL.
6 */
7
8#include "dm-zoned.h"
9
10#include <linux/module.h>
11
12#define DM_MSG_PREFIX "zoned"
13
14#define DMZ_MIN_BIOS 8192
15
16/*
17 * Zone BIO context.
18 */
19struct dmz_bioctx {
20 struct dmz_target *target;
21 struct dm_zone *zone;
22 struct bio *bio;
John Pittman092b56482018-08-23 13:35:57 -040023 refcount_t ref;
Damien Le Moal3b1a94c2017-06-07 15:55:39 +090024};
25
26/*
27 * Chunk work descriptor.
28 */
29struct dm_chunk_work {
30 struct work_struct work;
John Pittman092b56482018-08-23 13:35:57 -040031 refcount_t refcount;
Damien Le Moal3b1a94c2017-06-07 15:55:39 +090032 struct dmz_target *target;
33 unsigned int chunk;
34 struct bio_list bio_list;
35};
36
37/*
38 * Target descriptor.
39 */
40struct dmz_target {
41 struct dm_dev *ddev;
42
43 unsigned long flags;
44
45 /* Zoned block device information */
46 struct dmz_dev *dev;
47
48 /* For metadata handling */
49 struct dmz_metadata *metadata;
50
51 /* For reclaim */
52 struct dmz_reclaim *reclaim;
53
54 /* For chunk work */
Damien Le Moal3b1a94c2017-06-07 15:55:39 +090055 struct radix_tree_root chunk_rxtree;
56 struct workqueue_struct *chunk_wq;
Mike Snitzer72d711c2018-05-22 18:26:20 -040057 struct mutex chunk_lock;
Damien Le Moal3b1a94c2017-06-07 15:55:39 +090058
59 /* For cloned BIOs to zones */
Kent Overstreet6f1c8192018-05-20 18:25:53 -040060 struct bio_set bio_set;
Damien Le Moal3b1a94c2017-06-07 15:55:39 +090061
62 /* For flush */
63 spinlock_t flush_lock;
64 struct bio_list flush_list;
65 struct delayed_work flush_work;
66 struct workqueue_struct *flush_wq;
67};
68
69/*
70 * Flush intervals (seconds).
71 */
72#define DMZ_FLUSH_PERIOD (10 * HZ)
73
74/*
75 * Target BIO completion.
76 */
77static inline void dmz_bio_endio(struct bio *bio, blk_status_t status)
78{
79 struct dmz_bioctx *bioctx = dm_per_bio_data(bio, sizeof(struct dmz_bioctx));
80
Damien Le Moald57f9da2018-11-30 15:31:48 +090081 if (status != BLK_STS_OK && bio->bi_status == BLK_STS_OK)
82 bio->bi_status = status;
Dmitry Fomicheve7fad902019-11-06 14:34:35 -080083 if (bio->bi_status != BLK_STS_OK)
84 bioctx->target->dev->flags |= DMZ_CHECK_BDEV;
Damien Le Moald57f9da2018-11-30 15:31:48 +090085
86 if (refcount_dec_and_test(&bioctx->ref)) {
87 struct dm_zone *zone = bioctx->zone;
88
89 if (zone) {
90 if (bio->bi_status != BLK_STS_OK &&
91 bio_op(bio) == REQ_OP_WRITE &&
92 dmz_is_seq(zone))
93 set_bit(DMZ_SEQ_WRITE_ERR, &zone->flags);
94 dmz_deactivate_zone(zone);
95 }
96 bio_endio(bio);
97 }
Damien Le Moal3b1a94c2017-06-07 15:55:39 +090098}
99
100/*
Damien Le Moald57f9da2018-11-30 15:31:48 +0900101 * Completion callback for an internally cloned target BIO. This terminates the
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900102 * target BIO when there are no more references to its context.
103 */
Damien Le Moald57f9da2018-11-30 15:31:48 +0900104static void dmz_clone_endio(struct bio *clone)
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900105{
Damien Le Moald57f9da2018-11-30 15:31:48 +0900106 struct dmz_bioctx *bioctx = clone->bi_private;
107 blk_status_t status = clone->bi_status;
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900108
Damien Le Moald57f9da2018-11-30 15:31:48 +0900109 bio_put(clone);
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900110 dmz_bio_endio(bioctx->bio, status);
111}
112
113/*
Damien Le Moald57f9da2018-11-30 15:31:48 +0900114 * Issue a clone of a target BIO. The clone may only partially process the
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900115 * original target BIO.
116 */
Damien Le Moald57f9da2018-11-30 15:31:48 +0900117static int dmz_submit_bio(struct dmz_target *dmz, struct dm_zone *zone,
118 struct bio *bio, sector_t chunk_block,
119 unsigned int nr_blocks)
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900120{
121 struct dmz_bioctx *bioctx = dm_per_bio_data(bio, sizeof(struct dmz_bioctx));
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900122 struct bio *clone;
123
Kent Overstreet6f1c8192018-05-20 18:25:53 -0400124 clone = bio_clone_fast(bio, GFP_NOIO, &dmz->bio_set);
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900125 if (!clone)
126 return -ENOMEM;
127
Damien Le Moald57f9da2018-11-30 15:31:48 +0900128 bio_set_dev(clone, dmz->dev->bdev);
129 clone->bi_iter.bi_sector =
130 dmz_start_sect(dmz->metadata, zone) + dmz_blk2sect(chunk_block);
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900131 clone->bi_iter.bi_size = dmz_blk2sect(nr_blocks) << SECTOR_SHIFT;
Damien Le Moald57f9da2018-11-30 15:31:48 +0900132 clone->bi_end_io = dmz_clone_endio;
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900133 clone->bi_private = bioctx;
134
135 bio_advance(bio, clone->bi_iter.bi_size);
136
John Pittman092b56482018-08-23 13:35:57 -0400137 refcount_inc(&bioctx->ref);
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900138 generic_make_request(clone);
139
Damien Le Moald57f9da2018-11-30 15:31:48 +0900140 if (bio_op(bio) == REQ_OP_WRITE && dmz_is_seq(zone))
141 zone->wp_block += nr_blocks;
142
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900143 return 0;
144}
145
146/*
147 * Zero out pages of discarded blocks accessed by a read BIO.
148 */
149static void dmz_handle_read_zero(struct dmz_target *dmz, struct bio *bio,
150 sector_t chunk_block, unsigned int nr_blocks)
151{
152 unsigned int size = nr_blocks << DMZ_BLOCK_SHIFT;
153
154 /* Clear nr_blocks */
155 swap(bio->bi_iter.bi_size, size);
156 zero_fill_bio(bio);
157 swap(bio->bi_iter.bi_size, size);
158
159 bio_advance(bio, size);
160}
161
162/*
163 * Process a read BIO.
164 */
165static int dmz_handle_read(struct dmz_target *dmz, struct dm_zone *zone,
166 struct bio *bio)
167{
Hannes Reinecke36820562020-05-11 10:24:21 +0200168 struct dmz_metadata *zmd = dmz->metadata;
169 sector_t chunk_block = dmz_chunk_block(zmd, dmz_bio_block(bio));
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900170 unsigned int nr_blocks = dmz_bio_blocks(bio);
171 sector_t end_block = chunk_block + nr_blocks;
172 struct dm_zone *rzone, *bzone;
173 int ret;
174
175 /* Read into unmapped chunks need only zeroing the BIO buffer */
176 if (!zone) {
177 zero_fill_bio(bio);
178 return 0;
179 }
180
Hannes Reinecke2234e732020-05-11 10:24:22 +0200181 DMDEBUG("(%s): READ chunk %llu -> %s zone %u, block %llu, %u blocks",
182 dmz_metadata_label(zmd),
183 (unsigned long long)dmz_bio_chunk(zmd, bio),
184 (dmz_is_rnd(zone) ? "RND" : "SEQ"),
185 zone->id,
186 (unsigned long long)chunk_block, nr_blocks);
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900187
188 /* Check block validity to determine the read location */
189 bzone = zone->bzone;
190 while (chunk_block < end_block) {
191 nr_blocks = 0;
192 if (dmz_is_rnd(zone) || chunk_block < zone->wp_block) {
193 /* Test block validity in the data zone */
Hannes Reinecke36820562020-05-11 10:24:21 +0200194 ret = dmz_block_valid(zmd, zone, chunk_block);
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900195 if (ret < 0)
196 return ret;
197 if (ret > 0) {
198 /* Read data zone blocks */
199 nr_blocks = ret;
200 rzone = zone;
201 }
202 }
203
204 /*
205 * No valid blocks found in the data zone.
206 * Check the buffer zone, if there is one.
207 */
208 if (!nr_blocks && bzone) {
Hannes Reinecke36820562020-05-11 10:24:21 +0200209 ret = dmz_block_valid(zmd, bzone, chunk_block);
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900210 if (ret < 0)
211 return ret;
212 if (ret > 0) {
213 /* Read buffer zone blocks */
214 nr_blocks = ret;
215 rzone = bzone;
216 }
217 }
218
219 if (nr_blocks) {
220 /* Valid blocks found: read them */
221 nr_blocks = min_t(unsigned int, nr_blocks, end_block - chunk_block);
Damien Le Moald57f9da2018-11-30 15:31:48 +0900222 ret = dmz_submit_bio(dmz, rzone, bio, chunk_block, nr_blocks);
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900223 if (ret)
224 return ret;
225 chunk_block += nr_blocks;
226 } else {
227 /* No valid block: zeroout the current BIO block */
228 dmz_handle_read_zero(dmz, bio, chunk_block, 1);
229 chunk_block++;
230 }
231 }
232
233 return 0;
234}
235
236/*
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900237 * Write blocks directly in a data zone, at the write pointer.
238 * If a buffer zone is assigned, invalidate the blocks written
239 * in place.
240 */
241static int dmz_handle_direct_write(struct dmz_target *dmz,
242 struct dm_zone *zone, struct bio *bio,
243 sector_t chunk_block,
244 unsigned int nr_blocks)
245{
246 struct dmz_metadata *zmd = dmz->metadata;
247 struct dm_zone *bzone = zone->bzone;
248 int ret;
249
250 if (dmz_is_readonly(zone))
251 return -EROFS;
252
253 /* Submit write */
Damien Le Moald57f9da2018-11-30 15:31:48 +0900254 ret = dmz_submit_bio(dmz, zone, bio, chunk_block, nr_blocks);
255 if (ret)
256 return ret;
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900257
258 /*
259 * Validate the blocks in the data zone and invalidate
260 * in the buffer zone, if there is one.
261 */
262 ret = dmz_validate_blocks(zmd, zone, chunk_block, nr_blocks);
263 if (ret == 0 && bzone)
264 ret = dmz_invalidate_blocks(zmd, bzone, chunk_block, nr_blocks);
265
266 return ret;
267}
268
269/*
270 * Write blocks in the buffer zone of @zone.
271 * If no buffer zone is assigned yet, get one.
272 * Called with @zone write locked.
273 */
274static int dmz_handle_buffered_write(struct dmz_target *dmz,
275 struct dm_zone *zone, struct bio *bio,
276 sector_t chunk_block,
277 unsigned int nr_blocks)
278{
279 struct dmz_metadata *zmd = dmz->metadata;
280 struct dm_zone *bzone;
281 int ret;
282
283 /* Get the buffer zone. One will be allocated if needed */
284 bzone = dmz_get_chunk_buffer(zmd, zone);
Dmitry Fomichev75d66ff2019-08-10 14:43:11 -0700285 if (IS_ERR(bzone))
286 return PTR_ERR(bzone);
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900287
288 if (dmz_is_readonly(bzone))
289 return -EROFS;
290
291 /* Submit write */
Damien Le Moald57f9da2018-11-30 15:31:48 +0900292 ret = dmz_submit_bio(dmz, bzone, bio, chunk_block, nr_blocks);
293 if (ret)
294 return ret;
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900295
296 /*
297 * Validate the blocks in the buffer zone
298 * and invalidate in the data zone.
299 */
300 ret = dmz_validate_blocks(zmd, bzone, chunk_block, nr_blocks);
301 if (ret == 0 && chunk_block < zone->wp_block)
302 ret = dmz_invalidate_blocks(zmd, zone, chunk_block, nr_blocks);
303
304 return ret;
305}
306
307/*
308 * Process a write BIO.
309 */
310static int dmz_handle_write(struct dmz_target *dmz, struct dm_zone *zone,
311 struct bio *bio)
312{
Hannes Reinecke36820562020-05-11 10:24:21 +0200313 struct dmz_metadata *zmd = dmz->metadata;
314 sector_t chunk_block = dmz_chunk_block(zmd, dmz_bio_block(bio));
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900315 unsigned int nr_blocks = dmz_bio_blocks(bio);
316
317 if (!zone)
318 return -ENOSPC;
319
Hannes Reinecke2234e732020-05-11 10:24:22 +0200320 DMDEBUG("(%s): WRITE chunk %llu -> %s zone %u, block %llu, %u blocks",
321 dmz_metadata_label(zmd),
322 (unsigned long long)dmz_bio_chunk(zmd, bio),
323 (dmz_is_rnd(zone) ? "RND" : "SEQ"),
324 zone->id,
325 (unsigned long long)chunk_block, nr_blocks);
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900326
327 if (dmz_is_rnd(zone) || chunk_block == zone->wp_block) {
328 /*
329 * zone is a random zone or it is a sequential zone
330 * and the BIO is aligned to the zone write pointer:
331 * direct write the zone.
332 */
333 return dmz_handle_direct_write(dmz, zone, bio, chunk_block, nr_blocks);
334 }
335
336 /*
337 * This is an unaligned write in a sequential zone:
338 * use buffered write.
339 */
340 return dmz_handle_buffered_write(dmz, zone, bio, chunk_block, nr_blocks);
341}
342
343/*
344 * Process a discard BIO.
345 */
346static int dmz_handle_discard(struct dmz_target *dmz, struct dm_zone *zone,
347 struct bio *bio)
348{
349 struct dmz_metadata *zmd = dmz->metadata;
350 sector_t block = dmz_bio_block(bio);
351 unsigned int nr_blocks = dmz_bio_blocks(bio);
Hannes Reinecke36820562020-05-11 10:24:21 +0200352 sector_t chunk_block = dmz_chunk_block(zmd, block);
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900353 int ret = 0;
354
355 /* For unmapped chunks, there is nothing to do */
356 if (!zone)
357 return 0;
358
359 if (dmz_is_readonly(zone))
360 return -EROFS;
361
Hannes Reinecke2234e732020-05-11 10:24:22 +0200362 DMDEBUG("(%s): DISCARD chunk %llu -> zone %u, block %llu, %u blocks",
363 dmz_metadata_label(dmz->metadata),
364 (unsigned long long)dmz_bio_chunk(zmd, bio),
365 zone->id,
366 (unsigned long long)chunk_block, nr_blocks);
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900367
368 /*
369 * Invalidate blocks in the data zone and its
370 * buffer zone if one is mapped.
371 */
372 if (dmz_is_rnd(zone) || chunk_block < zone->wp_block)
373 ret = dmz_invalidate_blocks(zmd, zone, chunk_block, nr_blocks);
374 if (ret == 0 && zone->bzone)
375 ret = dmz_invalidate_blocks(zmd, zone->bzone,
376 chunk_block, nr_blocks);
377 return ret;
378}
379
380/*
381 * Process a BIO.
382 */
383static void dmz_handle_bio(struct dmz_target *dmz, struct dm_chunk_work *cw,
384 struct bio *bio)
385{
386 struct dmz_bioctx *bioctx = dm_per_bio_data(bio, sizeof(struct dmz_bioctx));
387 struct dmz_metadata *zmd = dmz->metadata;
388 struct dm_zone *zone;
389 int ret;
390
391 /*
392 * Write may trigger a zone allocation. So make sure the
393 * allocation can succeed.
394 */
395 if (bio_op(bio) == REQ_OP_WRITE)
396 dmz_schedule_reclaim(dmz->reclaim);
397
398 dmz_lock_metadata(zmd);
399
Dmitry Fomichev75d66ff2019-08-10 14:43:11 -0700400 if (dmz->dev->flags & DMZ_BDEV_DYING) {
401 ret = -EIO;
402 goto out;
403 }
404
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900405 /*
406 * Get the data zone mapping the chunk. There may be no
407 * mapping for read and discard. If a mapping is obtained,
408 + the zone returned will be set to active state.
409 */
Hannes Reinecke36820562020-05-11 10:24:21 +0200410 zone = dmz_get_chunk_mapping(zmd, dmz_bio_chunk(zmd, bio),
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900411 bio_op(bio));
412 if (IS_ERR(zone)) {
413 ret = PTR_ERR(zone);
414 goto out;
415 }
416
417 /* Process the BIO */
418 if (zone) {
419 dmz_activate_zone(zone);
420 bioctx->zone = zone;
421 }
422
423 switch (bio_op(bio)) {
424 case REQ_OP_READ:
425 ret = dmz_handle_read(dmz, zone, bio);
426 break;
427 case REQ_OP_WRITE:
428 ret = dmz_handle_write(dmz, zone, bio);
429 break;
430 case REQ_OP_DISCARD:
431 case REQ_OP_WRITE_ZEROES:
432 ret = dmz_handle_discard(dmz, zone, bio);
433 break;
434 default:
Hannes Reinecke2234e732020-05-11 10:24:22 +0200435 DMERR("(%s): Unsupported BIO operation 0x%x",
436 dmz_metadata_label(dmz->metadata), bio_op(bio));
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900437 ret = -EIO;
438 }
439
440 /*
441 * Release the chunk mapping. This will check that the mapping
442 * is still valid, that is, that the zone used still has valid blocks.
443 */
444 if (zone)
445 dmz_put_chunk_mapping(zmd, zone);
446out:
447 dmz_bio_endio(bio, errno_to_blk_status(ret));
448
449 dmz_unlock_metadata(zmd);
450}
451
452/*
453 * Increment a chunk reference counter.
454 */
455static inline void dmz_get_chunk_work(struct dm_chunk_work *cw)
456{
John Pittman092b56482018-08-23 13:35:57 -0400457 refcount_inc(&cw->refcount);
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900458}
459
460/*
461 * Decrement a chunk work reference count and
462 * free it if it becomes 0.
463 */
464static void dmz_put_chunk_work(struct dm_chunk_work *cw)
465{
John Pittman092b56482018-08-23 13:35:57 -0400466 if (refcount_dec_and_test(&cw->refcount)) {
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900467 WARN_ON(!bio_list_empty(&cw->bio_list));
468 radix_tree_delete(&cw->target->chunk_rxtree, cw->chunk);
469 kfree(cw);
470 }
471}
472
473/*
474 * Chunk BIO work function.
475 */
476static void dmz_chunk_work(struct work_struct *work)
477{
478 struct dm_chunk_work *cw = container_of(work, struct dm_chunk_work, work);
479 struct dmz_target *dmz = cw->target;
480 struct bio *bio;
481
482 mutex_lock(&dmz->chunk_lock);
483
484 /* Process the chunk BIOs */
485 while ((bio = bio_list_pop(&cw->bio_list))) {
486 mutex_unlock(&dmz->chunk_lock);
487 dmz_handle_bio(dmz, cw, bio);
488 mutex_lock(&dmz->chunk_lock);
489 dmz_put_chunk_work(cw);
490 }
491
492 /* Queueing the work incremented the work refcount */
493 dmz_put_chunk_work(cw);
494
495 mutex_unlock(&dmz->chunk_lock);
496}
497
498/*
499 * Flush work.
500 */
501static void dmz_flush_work(struct work_struct *work)
502{
503 struct dmz_target *dmz = container_of(work, struct dmz_target, flush_work.work);
504 struct bio *bio;
505 int ret;
506
507 /* Flush dirty metadata blocks */
508 ret = dmz_flush_metadata(dmz->metadata);
Dmitry Fomichev75d66ff2019-08-10 14:43:11 -0700509 if (ret)
Hannes Reinecke2234e732020-05-11 10:24:22 +0200510 DMDEBUG("(%s): Metadata flush failed, rc=%d\n",
511 dmz_metadata_label(dmz->metadata), ret);
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900512
513 /* Process queued flush requests */
514 while (1) {
515 spin_lock(&dmz->flush_lock);
516 bio = bio_list_pop(&dmz->flush_list);
517 spin_unlock(&dmz->flush_lock);
518
519 if (!bio)
520 break;
521
522 dmz_bio_endio(bio, errno_to_blk_status(ret));
523 }
524
525 queue_delayed_work(dmz->flush_wq, &dmz->flush_work, DMZ_FLUSH_PERIOD);
526}
527
528/*
529 * Get a chunk work and start it to process a new BIO.
530 * If the BIO chunk has no work yet, create one.
531 */
Dmitry Fomichevd7428c52019-08-10 14:43:10 -0700532static int dmz_queue_chunk_work(struct dmz_target *dmz, struct bio *bio)
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900533{
Hannes Reinecke36820562020-05-11 10:24:21 +0200534 unsigned int chunk = dmz_bio_chunk(dmz->metadata, bio);
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900535 struct dm_chunk_work *cw;
Dmitry Fomichevd7428c52019-08-10 14:43:10 -0700536 int ret = 0;
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900537
538 mutex_lock(&dmz->chunk_lock);
539
540 /* Get the BIO chunk work. If one is not active yet, create one */
541 cw = radix_tree_lookup(&dmz->chunk_rxtree, chunk);
Shin'ichiro Kawasakiee63634b2020-02-27 09:18:52 +0900542 if (cw) {
543 dmz_get_chunk_work(cw);
544 } else {
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900545 /* Create a new chunk work */
Damien Le Moal4218a952017-07-24 16:44:37 +0900546 cw = kmalloc(sizeof(struct dm_chunk_work), GFP_NOIO);
Dmitry Fomichevd7428c52019-08-10 14:43:10 -0700547 if (unlikely(!cw)) {
548 ret = -ENOMEM;
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900549 goto out;
Dmitry Fomichevd7428c52019-08-10 14:43:10 -0700550 }
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900551
552 INIT_WORK(&cw->work, dmz_chunk_work);
Shin'ichiro Kawasakiee63634b2020-02-27 09:18:52 +0900553 refcount_set(&cw->refcount, 1);
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900554 cw->target = dmz;
555 cw->chunk = chunk;
556 bio_list_init(&cw->bio_list);
557
558 ret = radix_tree_insert(&dmz->chunk_rxtree, chunk, cw);
559 if (unlikely(ret)) {
560 kfree(cw);
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900561 goto out;
562 }
563 }
564
565 bio_list_add(&cw->bio_list, bio);
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900566
Dmitry Fomichevd7428c52019-08-10 14:43:10 -0700567 dmz_reclaim_bio_acc(dmz->reclaim);
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900568 if (queue_work(dmz->chunk_wq, &cw->work))
569 dmz_get_chunk_work(cw);
570out:
571 mutex_unlock(&dmz->chunk_lock);
Dmitry Fomichevd7428c52019-08-10 14:43:10 -0700572 return ret;
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900573}
574
575/*
Dmitry Fomicheve7fad902019-11-06 14:34:35 -0800576 * Check if the backing device is being removed. If it's on the way out,
Dmitry Fomichev75d66ff2019-08-10 14:43:11 -0700577 * start failing I/O. Reclaim and metadata components also call this
578 * function to cleanly abort operation in the event of such failure.
579 */
580bool dmz_bdev_is_dying(struct dmz_dev *dmz_dev)
581{
Dmitry Fomicheve7fad902019-11-06 14:34:35 -0800582 if (dmz_dev->flags & DMZ_BDEV_DYING)
583 return true;
Dmitry Fomichev75d66ff2019-08-10 14:43:11 -0700584
Dmitry Fomicheve7fad902019-11-06 14:34:35 -0800585 if (dmz_dev->flags & DMZ_CHECK_BDEV)
586 return !dmz_check_bdev(dmz_dev);
587
588 if (blk_queue_dying(bdev_get_queue(dmz_dev->bdev))) {
589 dmz_dev_warn(dmz_dev, "Backing device queue dying");
590 dmz_dev->flags |= DMZ_BDEV_DYING;
Dmitry Fomichev75d66ff2019-08-10 14:43:11 -0700591 }
592
593 return dmz_dev->flags & DMZ_BDEV_DYING;
594}
595
596/*
Dmitry Fomicheve7fad902019-11-06 14:34:35 -0800597 * Check the backing device availability. This detects such events as
598 * backing device going offline due to errors, media removals, etc.
599 * This check is less efficient than dmz_bdev_is_dying() and should
600 * only be performed as a part of error handling.
601 */
602bool dmz_check_bdev(struct dmz_dev *dmz_dev)
603{
604 struct gendisk *disk;
605
606 dmz_dev->flags &= ~DMZ_CHECK_BDEV;
607
608 if (dmz_bdev_is_dying(dmz_dev))
609 return false;
610
611 disk = dmz_dev->bdev->bd_disk;
612 if (disk->fops->check_events &&
613 disk->fops->check_events(disk, 0) & DISK_EVENT_MEDIA_CHANGE) {
614 dmz_dev_warn(dmz_dev, "Backing device offline");
615 dmz_dev->flags |= DMZ_BDEV_DYING;
616 }
617
618 return !(dmz_dev->flags & DMZ_BDEV_DYING);
619}
620
621/*
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900622 * Process a new BIO.
623 */
624static int dmz_map(struct dm_target *ti, struct bio *bio)
625{
626 struct dmz_target *dmz = ti->private;
Hannes Reinecke36820562020-05-11 10:24:21 +0200627 struct dmz_metadata *zmd = dmz->metadata;
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900628 struct dmz_dev *dev = dmz->dev;
629 struct dmz_bioctx *bioctx = dm_per_bio_data(bio, sizeof(struct dmz_bioctx));
630 sector_t sector = bio->bi_iter.bi_sector;
631 unsigned int nr_sectors = bio_sectors(bio);
632 sector_t chunk_sector;
Dmitry Fomichevd7428c52019-08-10 14:43:10 -0700633 int ret;
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900634
Hannes Reinecked0e21ce2020-05-11 10:24:23 +0200635 if (dmz_dev_is_dying(zmd))
Dmitry Fomichev75d66ff2019-08-10 14:43:11 -0700636 return DM_MAPIO_KILL;
637
Hannes Reinecke2234e732020-05-11 10:24:22 +0200638 DMDEBUG("(%s): BIO op %d sector %llu + %u => chunk %llu, block %llu, %u blocks",
639 dmz_metadata_label(zmd),
640 bio_op(bio), (unsigned long long)sector, nr_sectors,
641 (unsigned long long)dmz_bio_chunk(zmd, bio),
642 (unsigned long long)dmz_chunk_block(zmd, dmz_bio_block(bio)),
643 (unsigned int)dmz_bio_blocks(bio));
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900644
Christoph Hellwig74d46992017-08-23 19:10:32 +0200645 bio_set_dev(bio, dev->bdev);
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900646
Mikulas Patockaedbe9592017-07-21 11:56:46 -0400647 if (!nr_sectors && bio_op(bio) != REQ_OP_WRITE)
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900648 return DM_MAPIO_REMAPPED;
649
650 /* The BIO should be block aligned */
651 if ((nr_sectors & DMZ_BLOCK_SECTORS_MASK) || (sector & DMZ_BLOCK_SECTORS_MASK))
652 return DM_MAPIO_KILL;
653
654 /* Initialize the BIO context */
655 bioctx->target = dmz;
656 bioctx->zone = NULL;
657 bioctx->bio = bio;
John Pittman092b56482018-08-23 13:35:57 -0400658 refcount_set(&bioctx->ref, 1);
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900659
660 /* Set the BIO pending in the flush list */
Mikulas Patockaedbe9592017-07-21 11:56:46 -0400661 if (!nr_sectors && bio_op(bio) == REQ_OP_WRITE) {
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900662 spin_lock(&dmz->flush_lock);
663 bio_list_add(&dmz->flush_list, bio);
664 spin_unlock(&dmz->flush_lock);
665 mod_delayed_work(dmz->flush_wq, &dmz->flush_work, 0);
666 return DM_MAPIO_SUBMITTED;
667 }
668
669 /* Split zone BIOs to fit entirely into a zone */
Hannes Reinecke36820562020-05-11 10:24:21 +0200670 chunk_sector = sector & (dmz_zone_nr_sectors(zmd) - 1);
671 if (chunk_sector + nr_sectors > dmz_zone_nr_sectors(zmd))
672 dm_accept_partial_bio(bio, dmz_zone_nr_sectors(zmd) - chunk_sector);
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900673
674 /* Now ready to handle this BIO */
Dmitry Fomichevd7428c52019-08-10 14:43:10 -0700675 ret = dmz_queue_chunk_work(dmz, bio);
676 if (ret) {
Hannes Reinecke2234e732020-05-11 10:24:22 +0200677 DMDEBUG("(%s): BIO op %d, can't process chunk %llu, err %i\n",
678 dmz_metadata_label(zmd),
679 bio_op(bio), (u64)dmz_bio_chunk(zmd, bio),
680 ret);
Dmitry Fomichevd7428c52019-08-10 14:43:10 -0700681 return DM_MAPIO_REQUEUE;
682 }
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900683
684 return DM_MAPIO_SUBMITTED;
685}
686
687/*
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900688 * Get zoned device information.
689 */
690static int dmz_get_zoned_device(struct dm_target *ti, char *path)
691{
692 struct dmz_target *dmz = ti->private;
693 struct request_queue *q;
694 struct dmz_dev *dev;
Damien Le Moal114e02592017-10-28 16:39:34 +0900695 sector_t aligned_capacity;
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900696 int ret;
697
698 /* Get the target device */
699 ret = dm_get_device(ti, path, dm_table_get_mode(ti->table), &dmz->ddev);
700 if (ret) {
701 ti->error = "Get target device failed";
702 dmz->ddev = NULL;
703 return ret;
704 }
705
706 dev = kzalloc(sizeof(struct dmz_dev), GFP_KERNEL);
707 if (!dev) {
708 ret = -ENOMEM;
709 goto err;
710 }
711
712 dev->bdev = dmz->ddev->bdev;
713 (void)bdevname(dev->bdev, dev->name);
714
715 if (bdev_zoned_model(dev->bdev) == BLK_ZONED_NONE) {
716 ti->error = "Not a zoned block device";
717 ret = -EINVAL;
718 goto err;
719 }
720
Damien Le Moal114e02592017-10-28 16:39:34 +0900721 q = bdev_get_queue(dev->bdev);
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900722 dev->capacity = i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT;
Dan Carpentera3839bc2019-04-10 11:12:31 +0300723 aligned_capacity = dev->capacity &
724 ~((sector_t)blk_queue_zone_sectors(q) - 1);
Damien Le Moal114e02592017-10-28 16:39:34 +0900725 if (ti->begin ||
726 ((ti->len != dev->capacity) && (ti->len != aligned_capacity))) {
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900727 ti->error = "Partial mapping not supported";
728 ret = -EINVAL;
729 goto err;
730 }
731
Damien Le Moal114e02592017-10-28 16:39:34 +0900732 dev->zone_nr_sectors = blk_queue_zone_sectors(q);
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900733
Christoph Hellwig9b38bb42019-12-03 10:39:04 +0100734 dev->nr_zones = blkdev_nr_zones(dev->bdev->bd_disk);
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900735
736 dmz->dev = dev;
737
738 return 0;
739err:
740 dm_put_device(ti, dmz->ddev);
741 kfree(dev);
742
743 return ret;
744}
745
746/*
747 * Cleanup zoned device information.
748 */
749static void dmz_put_zoned_device(struct dm_target *ti)
750{
751 struct dmz_target *dmz = ti->private;
752
753 dm_put_device(ti, dmz->ddev);
754 kfree(dmz->dev);
755 dmz->dev = NULL;
756}
757
758/*
759 * Setup target.
760 */
761static int dmz_ctr(struct dm_target *ti, unsigned int argc, char **argv)
762{
763 struct dmz_target *dmz;
764 struct dmz_dev *dev;
765 int ret;
766
767 /* Check arguments */
768 if (argc != 1) {
769 ti->error = "Invalid argument count";
770 return -EINVAL;
771 }
772
773 /* Allocate and initialize the target descriptor */
774 dmz = kzalloc(sizeof(struct dmz_target), GFP_KERNEL);
775 if (!dmz) {
776 ti->error = "Unable to allocate the zoned target descriptor";
777 return -ENOMEM;
778 }
779 ti->private = dmz;
780
781 /* Get the target zoned block device */
782 ret = dmz_get_zoned_device(ti, argv[0]);
783 if (ret) {
784 dmz->ddev = NULL;
785 goto err;
786 }
787
788 /* Initialize metadata */
789 dev = dmz->dev;
Hannes Reinecke2234e732020-05-11 10:24:22 +0200790 ret = dmz_ctr_metadata(dev, &dmz->metadata,
791 dm_table_device_name(ti->table));
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900792 if (ret) {
793 ti->error = "Metadata initialization failed";
794 goto err_dev;
795 }
796
797 /* Set target (no write same support) */
Hannes Reinecke36820562020-05-11 10:24:21 +0200798 ti->max_io_len = dmz_zone_nr_sectors(dmz->metadata) << 9;
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900799 ti->num_flush_bios = 1;
800 ti->num_discard_bios = 1;
801 ti->num_write_zeroes_bios = 1;
802 ti->per_io_data_size = sizeof(struct dmz_bioctx);
803 ti->flush_supported = true;
804 ti->discards_supported = true;
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900805
806 /* The exposed capacity is the number of chunks that can be mapped */
Hannes Reinecke36820562020-05-11 10:24:21 +0200807 ti->len = (sector_t)dmz_nr_chunks(dmz->metadata) <<
808 dmz_zone_nr_sectors_shift(dmz->metadata);
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900809
810 /* Zone BIO */
Kent Overstreet6f1c8192018-05-20 18:25:53 -0400811 ret = bioset_init(&dmz->bio_set, DMZ_MIN_BIOS, 0, 0);
812 if (ret) {
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900813 ti->error = "Create BIO set failed";
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900814 goto err_meta;
815 }
816
817 /* Chunk BIO work */
818 mutex_init(&dmz->chunk_lock);
Bart Van Assche2d0b2d62018-06-22 08:09:11 -0700819 INIT_RADIX_TREE(&dmz->chunk_rxtree, GFP_NOIO);
Hannes Reinecke2234e732020-05-11 10:24:22 +0200820 dmz->chunk_wq = alloc_workqueue("dmz_cwq_%s",
821 WQ_MEM_RECLAIM | WQ_UNBOUND, 0,
822 dmz_metadata_label(dmz->metadata));
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900823 if (!dmz->chunk_wq) {
824 ti->error = "Create chunk workqueue failed";
825 ret = -ENOMEM;
826 goto err_bio;
827 }
828
829 /* Flush work */
830 spin_lock_init(&dmz->flush_lock);
831 bio_list_init(&dmz->flush_list);
832 INIT_DELAYED_WORK(&dmz->flush_work, dmz_flush_work);
833 dmz->flush_wq = alloc_ordered_workqueue("dmz_fwq_%s", WQ_MEM_RECLAIM,
Hannes Reinecke2234e732020-05-11 10:24:22 +0200834 dmz_metadata_label(dmz->metadata));
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900835 if (!dmz->flush_wq) {
836 ti->error = "Create flush workqueue failed";
837 ret = -ENOMEM;
838 goto err_cwq;
839 }
840 mod_delayed_work(dmz->flush_wq, &dmz->flush_work, DMZ_FLUSH_PERIOD);
841
842 /* Initialize reclaim */
843 ret = dmz_ctr_reclaim(dev, dmz->metadata, &dmz->reclaim);
844 if (ret) {
845 ti->error = "Zone reclaim initialization failed";
846 goto err_fwq;
847 }
848
Hannes Reinecke2234e732020-05-11 10:24:22 +0200849 DMINFO("(%s): Target device: %llu 512-byte logical sectors (%llu blocks)",
850 dmz_metadata_label(dmz->metadata),
851 (unsigned long long)ti->len,
852 (unsigned long long)dmz_sect2blk(ti->len));
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900853
854 return 0;
855err_fwq:
856 destroy_workqueue(dmz->flush_wq);
857err_cwq:
858 destroy_workqueue(dmz->chunk_wq);
859err_bio:
Mike Snitzerd5ffebd2018-01-05 21:17:20 -0500860 mutex_destroy(&dmz->chunk_lock);
Kent Overstreet6f1c8192018-05-20 18:25:53 -0400861 bioset_exit(&dmz->bio_set);
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900862err_meta:
863 dmz_dtr_metadata(dmz->metadata);
864err_dev:
865 dmz_put_zoned_device(ti);
866err:
867 kfree(dmz);
868
869 return ret;
870}
871
872/*
873 * Cleanup target.
874 */
875static void dmz_dtr(struct dm_target *ti)
876{
877 struct dmz_target *dmz = ti->private;
878
879 flush_workqueue(dmz->chunk_wq);
880 destroy_workqueue(dmz->chunk_wq);
881
882 dmz_dtr_reclaim(dmz->reclaim);
883
884 cancel_delayed_work_sync(&dmz->flush_work);
885 destroy_workqueue(dmz->flush_wq);
886
887 (void) dmz_flush_metadata(dmz->metadata);
888
889 dmz_dtr_metadata(dmz->metadata);
890
Kent Overstreet6f1c8192018-05-20 18:25:53 -0400891 bioset_exit(&dmz->bio_set);
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900892
893 dmz_put_zoned_device(ti);
894
Mike Snitzerd5ffebd2018-01-05 21:17:20 -0500895 mutex_destroy(&dmz->chunk_lock);
896
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900897 kfree(dmz);
898}
899
900/*
901 * Setup target request queue limits.
902 */
903static void dmz_io_hints(struct dm_target *ti, struct queue_limits *limits)
904{
905 struct dmz_target *dmz = ti->private;
Hannes Reinecke36820562020-05-11 10:24:21 +0200906 unsigned int chunk_sectors = dmz_zone_nr_sectors(dmz->metadata);
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900907
908 limits->logical_block_size = DMZ_BLOCK_SIZE;
909 limits->physical_block_size = DMZ_BLOCK_SIZE;
910
911 blk_limits_io_min(limits, DMZ_BLOCK_SIZE);
912 blk_limits_io_opt(limits, DMZ_BLOCK_SIZE);
913
914 limits->discard_alignment = DMZ_BLOCK_SIZE;
915 limits->discard_granularity = DMZ_BLOCK_SIZE;
916 limits->max_discard_sectors = chunk_sectors;
917 limits->max_hw_discard_sectors = chunk_sectors;
918 limits->max_write_zeroes_sectors = chunk_sectors;
919
920 /* FS hint to try to align to the device zone size */
921 limits->chunk_sectors = chunk_sectors;
922 limits->max_sectors = chunk_sectors;
923
924 /* We are exposing a drive-managed zoned block device */
925 limits->zoned = BLK_ZONED_NONE;
926}
927
928/*
929 * Pass on ioctl to the backend device.
930 */
Mike Snitzer5bd5e8d2018-04-03 16:54:10 -0400931static int dmz_prepare_ioctl(struct dm_target *ti, struct block_device **bdev)
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900932{
933 struct dmz_target *dmz = ti->private;
934
Dmitry Fomicheve7fad902019-11-06 14:34:35 -0800935 if (!dmz_check_bdev(dmz->dev))
936 return -EIO;
Dmitry Fomichev75d66ff2019-08-10 14:43:11 -0700937
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900938 *bdev = dmz->dev->bdev;
939
940 return 0;
941}
942
943/*
944 * Stop works on suspend.
945 */
946static void dmz_suspend(struct dm_target *ti)
947{
948 struct dmz_target *dmz = ti->private;
949
950 flush_workqueue(dmz->chunk_wq);
951 dmz_suspend_reclaim(dmz->reclaim);
952 cancel_delayed_work_sync(&dmz->flush_work);
953}
954
955/*
956 * Restart works on resume or if suspend failed.
957 */
958static void dmz_resume(struct dm_target *ti)
959{
960 struct dmz_target *dmz = ti->private;
961
962 queue_delayed_work(dmz->flush_wq, &dmz->flush_work, DMZ_FLUSH_PERIOD);
963 dmz_resume_reclaim(dmz->reclaim);
964}
965
966static int dmz_iterate_devices(struct dm_target *ti,
967 iterate_devices_callout_fn fn, void *data)
968{
969 struct dmz_target *dmz = ti->private;
Damien Le Moal114e02592017-10-28 16:39:34 +0900970 struct dmz_dev *dev = dmz->dev;
Hannes Reinecke36820562020-05-11 10:24:21 +0200971 sector_t capacity = dev->capacity & ~(dmz_zone_nr_sectors(dmz->metadata) - 1);
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900972
Damien Le Moal114e02592017-10-28 16:39:34 +0900973 return fn(ti, dmz->ddev, 0, capacity, data);
Damien Le Moal3b1a94c2017-06-07 15:55:39 +0900974}
975
Hannes Reineckebc3d5712020-05-11 10:24:16 +0200976static void dmz_status(struct dm_target *ti, status_type_t type,
977 unsigned int status_flags, char *result,
978 unsigned int maxlen)
979{
980 struct dmz_target *dmz = ti->private;
981 ssize_t sz = 0;
982 char buf[BDEVNAME_SIZE];
983
984 switch (type) {
985 case STATUSTYPE_INFO:
986 DMEMIT("%u zones %u/%u random %u/%u sequential",
987 dmz_nr_zones(dmz->metadata),
988 dmz_nr_unmap_rnd_zones(dmz->metadata),
989 dmz_nr_rnd_zones(dmz->metadata),
990 dmz_nr_unmap_seq_zones(dmz->metadata),
991 dmz_nr_seq_zones(dmz->metadata));
992 break;
993 case STATUSTYPE_TABLE:
994 format_dev_t(buf, dmz->dev->bdev->bd_dev);
995 DMEMIT("%s", buf);
996 break;
997 }
998 return;
999}
1000
Hannes Reinecke90b39d52020-05-11 10:24:17 +02001001static int dmz_message(struct dm_target *ti, unsigned int argc, char **argv,
1002 char *result, unsigned int maxlen)
1003{
1004 struct dmz_target *dmz = ti->private;
1005 int r = -EINVAL;
1006
1007 if (!strcasecmp(argv[0], "reclaim")) {
1008 dmz_schedule_reclaim(dmz->reclaim);
1009 r = 0;
1010 } else
1011 DMERR("unrecognized message %s", argv[0]);
1012 return r;
1013}
1014
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09001015static struct target_type dmz_type = {
1016 .name = "zoned",
Mike Snitzer636be422020-02-27 14:25:31 -05001017 .version = {1, 1, 0},
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09001018 .features = DM_TARGET_SINGLETON | DM_TARGET_ZONED_HM,
1019 .module = THIS_MODULE,
1020 .ctr = dmz_ctr,
1021 .dtr = dmz_dtr,
1022 .map = dmz_map,
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09001023 .io_hints = dmz_io_hints,
1024 .prepare_ioctl = dmz_prepare_ioctl,
1025 .postsuspend = dmz_suspend,
1026 .resume = dmz_resume,
1027 .iterate_devices = dmz_iterate_devices,
Hannes Reineckebc3d5712020-05-11 10:24:16 +02001028 .status = dmz_status,
Hannes Reinecke90b39d52020-05-11 10:24:17 +02001029 .message = dmz_message,
Damien Le Moal3b1a94c2017-06-07 15:55:39 +09001030};
1031
1032static int __init dmz_init(void)
1033{
1034 return dm_register_target(&dmz_type);
1035}
1036
1037static void __exit dmz_exit(void)
1038{
1039 dm_unregister_target(&dmz_type);
1040}
1041
1042module_init(dmz_init);
1043module_exit(dmz_exit);
1044
1045MODULE_DESCRIPTION(DM_NAME " target for zoned block devices");
1046MODULE_AUTHOR("Damien Le Moal <damien.lemoal@wdc.com>");
1047MODULE_LICENSE("GPL");