Greg Kroah-Hartman | b244131 | 2017-11-01 15:07:57 +0100 | [diff] [blame] | 1 | /* SPDX-License-Identifier: GPL-2.0 */ |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2 | #ifndef _RAID10_H |
| 3 | #define _RAID10_H |
| 4 | |
NeilBrown | f2785b5 | 2018-02-03 09:19:30 +1100 | [diff] [blame] | 5 | /* Note: raid10_info.rdev can be set to NULL asynchronously by |
| 6 | * raid10_remove_disk. |
| 7 | * There are three safe ways to access raid10_info.rdev. |
| 8 | * 1/ when holding mddev->reconfig_mutex |
| 9 | * 2/ when resync/recovery/reshape is known to be happening - i.e. in code |
| 10 | * that is called as part of performing resync/recovery/reshape. |
| 11 | * 3/ while holding rcu_read_lock(), use rcu_dereference to get the pointer |
| 12 | * and if it is non-NULL, increment rdev->nr_pending before dropping the |
| 13 | * RCU lock. |
| 14 | * When .rdev is set to NULL, the nr_pending count checked again and if it has |
| 15 | * been incremented, the pointer is put back in .rdev. |
| 16 | */ |
| 17 | |
Jonathan Brassow | dc280d98 | 2012-07-31 10:03:52 +1000 | [diff] [blame] | 18 | struct raid10_info { |
NeilBrown | 69335ef | 2011-12-23 10:17:54 +1100 | [diff] [blame] | 19 | struct md_rdev *rdev, *replacement; |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 20 | sector_t head_position; |
NeilBrown | 2bb7773 | 2011-07-27 11:00:36 +1000 | [diff] [blame] | 21 | int recovery_disabled; /* matches |
| 22 | * mddev->recovery_disabled |
| 23 | * when we shouldn't try |
| 24 | * recovering this device. |
| 25 | */ |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 26 | }; |
| 27 | |
NeilBrown | e879a87 | 2011-10-11 16:49:02 +1100 | [diff] [blame] | 28 | struct r10conf { |
NeilBrown | fd01b88 | 2011-10-11 16:47:53 +1100 | [diff] [blame] | 29 | struct mddev *mddev; |
Jonathan Brassow | dc280d98 | 2012-07-31 10:03:52 +1000 | [diff] [blame] | 30 | struct raid10_info *mirrors; |
| 31 | struct raid10_info *mirrors_new, *mirrors_old; |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 32 | spinlock_t device_lock; |
| 33 | |
| 34 | /* geometry */ |
NeilBrown | 5cf00fc | 2012-05-21 09:28:20 +1000 | [diff] [blame] | 35 | struct geom { |
| 36 | int raid_disks; |
| 37 | int near_copies; /* number of copies laid out |
NeilBrown | 69335ef | 2011-12-23 10:17:54 +1100 | [diff] [blame] | 38 | * raid0 style */ |
NeilBrown | 5cf00fc | 2012-05-21 09:28:20 +1000 | [diff] [blame] | 39 | int far_copies; /* number of copies laid out |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 40 | * at large strides across drives |
| 41 | */ |
NeilBrown | 5cf00fc | 2012-05-21 09:28:20 +1000 | [diff] [blame] | 42 | int far_offset; /* far_copies are offset by 1 |
NeilBrown | 69335ef | 2011-12-23 10:17:54 +1100 | [diff] [blame] | 43 | * stripe instead of many |
NeilBrown | c93983b | 2006-06-26 00:27:41 -0700 | [diff] [blame] | 44 | */ |
NeilBrown | 5cf00fc | 2012-05-21 09:28:20 +1000 | [diff] [blame] | 45 | sector_t stride; /* distance between far copies. |
NeilBrown | c93983b | 2006-06-26 00:27:41 -0700 | [diff] [blame] | 46 | * This is size / far_copies unless |
| 47 | * far_offset, in which case it is |
| 48 | * 1 stripe. |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 49 | */ |
Jonathan Brassow | 475901a | 2013-02-21 13:28:10 +1100 | [diff] [blame] | 50 | int far_set_size; /* The number of devices in a set, |
| 51 | * where a 'set' are devices that |
| 52 | * contain far/offset copies of |
| 53 | * each other. |
| 54 | */ |
NeilBrown | 5cf00fc | 2012-05-21 09:28:20 +1000 | [diff] [blame] | 55 | int chunk_shift; /* shift from chunks to sectors */ |
| 56 | sector_t chunk_mask; |
NeilBrown | f8c9e74 | 2012-05-21 09:28:33 +1000 | [diff] [blame] | 57 | } prev, geo; |
NeilBrown | 5cf00fc | 2012-05-21 09:28:20 +1000 | [diff] [blame] | 58 | int copies; /* near_copies * far_copies. |
| 59 | * must be <= raid_disks |
| 60 | */ |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 61 | |
NeilBrown | 69335ef | 2011-12-23 10:17:54 +1100 | [diff] [blame] | 62 | sector_t dev_sectors; /* temp copy of |
| 63 | * mddev->dev_sectors */ |
NeilBrown | f8c9e74 | 2012-05-21 09:28:33 +1000 | [diff] [blame] | 64 | sector_t reshape_progress; |
NeilBrown | 3ea7daa | 2012-05-22 13:53:47 +1000 | [diff] [blame] | 65 | sector_t reshape_safe; |
| 66 | unsigned long reshape_checkpoint; |
| 67 | sector_t offset_diff; |
Trela, Maciej | dab8b29 | 2010-03-08 16:02:45 +1100 | [diff] [blame] | 68 | |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 69 | struct list_head retry_list; |
NeilBrown | 95af587 | 2015-08-14 11:26:17 +1000 | [diff] [blame] | 70 | /* A separate list of r1bio which just need raid_end_bio_io called. |
| 71 | * This mustn't happen for writes which had any errors if the superblock |
| 72 | * needs to be written. |
| 73 | */ |
| 74 | struct list_head bio_end_io_list; |
| 75 | |
NeilBrown | 6cce3b2 | 2006-01-06 00:20:16 -0800 | [diff] [blame] | 76 | /* queue pending writes and submit them on unplug */ |
| 77 | struct bio_list pending_bio_list; |
NeilBrown | 34db0cd | 2011-10-11 16:50:01 +1100 | [diff] [blame] | 78 | int pending_count; |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 79 | |
| 80 | spinlock_t resync_lock; |
Tomasz Majchrzak | 0e5313e | 2016-06-24 14:20:16 +0200 | [diff] [blame] | 81 | atomic_t nr_pending; |
NeilBrown | 69335ef | 2011-12-23 10:17:54 +1100 | [diff] [blame] | 82 | int nr_waiting; |
| 83 | int nr_queued; |
| 84 | int barrier; |
Tomasz Majchrzak | 0e5313e | 2016-06-24 14:20:16 +0200 | [diff] [blame] | 85 | int array_freeze_pending; |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 86 | sector_t next_resync; |
NeilBrown | 6cce3b2 | 2006-01-06 00:20:16 -0800 | [diff] [blame] | 87 | int fullsync; /* set to 1 if a full sync is needed, |
| 88 | * (fresh device added). |
| 89 | * Cleared when a sync completes. |
| 90 | */ |
NeilBrown | 69335ef | 2011-12-23 10:17:54 +1100 | [diff] [blame] | 91 | int have_replacement; /* There is at least one |
| 92 | * replacement device. |
| 93 | */ |
NeilBrown | 0a27ec9 | 2006-01-06 00:20:13 -0800 | [diff] [blame] | 94 | wait_queue_head_t wait_barrier; |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 95 | |
Kent Overstreet | afeee51 | 2018-05-20 18:25:52 -0400 | [diff] [blame] | 96 | mempool_t r10bio_pool; |
| 97 | mempool_t r10buf_pool; |
NeilBrown | 4443ae1 | 2006-01-06 00:20:28 -0800 | [diff] [blame] | 98 | struct page *tmppage; |
Kent Overstreet | afeee51 | 2018-05-20 18:25:52 -0400 | [diff] [blame] | 99 | struct bio_set bio_split; |
Trela, Maciej | dab8b29 | 2010-03-08 16:02:45 +1100 | [diff] [blame] | 100 | |
| 101 | /* When taking over an array from a different personality, we store |
| 102 | * the new thread here until we fully activate the array. |
| 103 | */ |
NeilBrown | 2b8bf34 | 2011-10-11 16:48:23 +1100 | [diff] [blame] | 104 | struct md_thread *thread; |
Guoqing Jiang | 8db8791 | 2017-10-24 15:11:52 +0800 | [diff] [blame] | 105 | |
| 106 | /* |
| 107 | * Keep track of cluster resync window to send to other nodes. |
| 108 | */ |
| 109 | sector_t cluster_sync_low; |
| 110 | sector_t cluster_sync_high; |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 111 | }; |
| 112 | |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 113 | /* |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 114 | * this is our 'private' RAID10 bio. |
| 115 | * |
| 116 | * it contains information about what kind of IO operations were started |
| 117 | * for this RAID10 operation, and about their status: |
| 118 | */ |
| 119 | |
NeilBrown | 9f2c9d1 | 2011-10-11 16:48:43 +1100 | [diff] [blame] | 120 | struct r10bio { |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 121 | atomic_t remaining; /* 'have we finished' count, |
| 122 | * used from IRQ handlers |
| 123 | */ |
| 124 | sector_t sector; /* virtual sector number */ |
| 125 | int sectors; |
| 126 | unsigned long state; |
NeilBrown | fd01b88 | 2011-10-11 16:47:53 +1100 | [diff] [blame] | 127 | struct mddev *mddev; |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 128 | /* |
| 129 | * original bio going to /dev/mdx |
| 130 | */ |
| 131 | struct bio *master_bio; |
| 132 | /* |
| 133 | * if the IO is in READ direction, then this is where we read |
| 134 | */ |
| 135 | int read_slot; |
| 136 | |
| 137 | struct list_head retry_list; |
| 138 | /* |
| 139 | * if the IO is in WRITE direction, then multiple bios are used, |
| 140 | * one for each copy. |
| 141 | * When resyncing we also use one for each copy. |
| 142 | * When reconstructing, we use 2 bios, one for read, one for write. |
| 143 | * We choose the number when they are allocated. |
NeilBrown | 69335ef | 2011-12-23 10:17:54 +1100 | [diff] [blame] | 144 | * We sometimes need an extra bio to write to the replacement. |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 145 | */ |
NeilBrown | e0ee778 | 2012-08-18 09:51:42 +1000 | [diff] [blame] | 146 | struct r10dev { |
NeilBrown | 69335ef | 2011-12-23 10:17:54 +1100 | [diff] [blame] | 147 | struct bio *bio; |
| 148 | union { |
| 149 | struct bio *repl_bio; /* used for resync and |
| 150 | * writes */ |
| 151 | struct md_rdev *rdev; /* used for reads |
| 152 | * (read_slot >= 0) */ |
| 153 | }; |
| 154 | sector_t addr; |
| 155 | int devnum; |
Gustavo A. R. Silva | 358369f | 2020-05-07 14:22:10 -0500 | [diff] [blame^] | 156 | } devs[]; |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 157 | }; |
| 158 | |
| 159 | /* bits for r10bio.state */ |
NeilBrown | 69335ef | 2011-12-23 10:17:54 +1100 | [diff] [blame] | 160 | enum r10bio_state { |
| 161 | R10BIO_Uptodate, |
| 162 | R10BIO_IsSync, |
| 163 | R10BIO_IsRecover, |
NeilBrown | 3ea7daa | 2012-05-22 13:53:47 +1000 | [diff] [blame] | 164 | R10BIO_IsReshape, |
NeilBrown | 69335ef | 2011-12-23 10:17:54 +1100 | [diff] [blame] | 165 | R10BIO_Degraded, |
NeilBrown | 856e08e | 2011-07-28 11:39:23 +1000 | [diff] [blame] | 166 | /* Set ReadError on bios that experience a read error |
| 167 | * so that raid10d knows what to do with them. |
| 168 | */ |
NeilBrown | 69335ef | 2011-12-23 10:17:54 +1100 | [diff] [blame] | 169 | R10BIO_ReadError, |
NeilBrown | 749c55e | 2011-07-28 11:39:24 +1000 | [diff] [blame] | 170 | /* If a write for this request means we can clear some |
| 171 | * known-bad-block records, we set this flag. |
| 172 | */ |
NeilBrown | 69335ef | 2011-12-23 10:17:54 +1100 | [diff] [blame] | 173 | R10BIO_MadeGood, |
| 174 | R10BIO_WriteError, |
NeilBrown | f8c9e74 | 2012-05-21 09:28:33 +1000 | [diff] [blame] | 175 | /* During a reshape we might be performing IO on the |
| 176 | * 'previous' part of the array, in which case this |
| 177 | * flag is set |
| 178 | */ |
| 179 | R10BIO_Previous, |
NeilBrown | 8d3ca83 | 2016-11-18 16:16:12 +1100 | [diff] [blame] | 180 | /* failfast devices did receive failfast requests. */ |
| 181 | R10BIO_FailFast, |
NeilBrown | 69335ef | 2011-12-23 10:17:54 +1100 | [diff] [blame] | 182 | }; |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 183 | #endif |