Thomas Gleixner | 2025cf9 | 2019-05-29 07:18:02 -0700 | [diff] [blame] | 1 | /* SPDX-License-Identifier: GPL-2.0-only */ |
Dan Williams | 8c2f7e8 | 2015-06-25 04:20:04 -0400 | [diff] [blame] | 2 | /* |
| 3 | * Block Translation Table library |
| 4 | * Copyright (c) 2014-2015, Intel Corporation. |
Dan Williams | 8c2f7e8 | 2015-06-25 04:20:04 -0400 | [diff] [blame] | 5 | */ |
| 6 | |
| 7 | #ifndef _LINUX_BTT_H |
| 8 | #define _LINUX_BTT_H |
| 9 | |
Vishal Verma | d9b83c7 | 2017-08-30 19:36:03 -0600 | [diff] [blame] | 10 | #include <linux/badblocks.h> |
Dan Williams | 8c2f7e8 | 2015-06-25 04:20:04 -0400 | [diff] [blame] | 11 | #include <linux/types.h> |
| 12 | |
| 13 | #define BTT_SIG_LEN 16 |
| 14 | #define BTT_SIG "BTT_ARENA_INFO\0" |
Vishal Verma | 5212e11 | 2015-06-25 04:20:32 -0400 | [diff] [blame] | 15 | #define MAP_ENT_SIZE 4 |
| 16 | #define MAP_TRIM_SHIFT 31 |
| 17 | #define MAP_TRIM_MASK (1 << MAP_TRIM_SHIFT) |
| 18 | #define MAP_ERR_SHIFT 30 |
| 19 | #define MAP_ERR_MASK (1 << MAP_ERR_SHIFT) |
| 20 | #define MAP_LBA_MASK (~((1 << MAP_TRIM_SHIFT) | (1 << MAP_ERR_SHIFT))) |
| 21 | #define MAP_ENT_NORMAL 0xC0000000 |
Vishal Verma | 24e3a7f | 2017-12-18 09:28:39 -0700 | [diff] [blame] | 22 | #define LOG_GRP_SIZE sizeof(struct log_group) |
Vishal Verma | 5212e11 | 2015-06-25 04:20:32 -0400 | [diff] [blame] | 23 | #define LOG_ENT_SIZE sizeof(struct log_entry) |
| 24 | #define ARENA_MIN_SIZE (1UL << 24) /* 16 MB */ |
| 25 | #define ARENA_MAX_SIZE (1ULL << 39) /* 512 GB */ |
| 26 | #define RTT_VALID (1UL << 31) |
| 27 | #define RTT_INVALID 0 |
Vishal Verma | 5212e11 | 2015-06-25 04:20:32 -0400 | [diff] [blame] | 28 | #define BTT_PG_SIZE 4096 |
| 29 | #define BTT_DEFAULT_NFREE ND_MAX_LANES |
| 30 | #define LOG_SEQ_INIT 1 |
| 31 | |
| 32 | #define IB_FLAG_ERROR 0x00000001 |
| 33 | #define IB_FLAG_ERROR_MASK 0x00000001 |
| 34 | |
Vishal Verma | 0595d53 | 2017-08-30 19:35:59 -0600 | [diff] [blame] | 35 | #define ent_lba(ent) (ent & MAP_LBA_MASK) |
| 36 | #define ent_e_flag(ent) (!!(ent & MAP_ERR_MASK)) |
| 37 | #define ent_z_flag(ent) (!!(ent & MAP_TRIM_MASK)) |
Vishal Verma | d9b83c7 | 2017-08-30 19:36:03 -0600 | [diff] [blame] | 38 | #define set_e_flag(ent) (ent |= MAP_ERR_MASK) |
Vishal Verma | 9dedc73 | 2019-02-27 17:06:27 -0700 | [diff] [blame] | 39 | /* 'normal' is both e and z flags set */ |
| 40 | #define ent_normal(ent) (ent_e_flag(ent) && ent_z_flag(ent)) |
Vishal Verma | 0595d53 | 2017-08-30 19:35:59 -0600 | [diff] [blame] | 41 | |
Vishal Verma | 5212e11 | 2015-06-25 04:20:32 -0400 | [diff] [blame] | 42 | enum btt_init_state { |
| 43 | INIT_UNCHECKED = 0, |
| 44 | INIT_NOTFOUND, |
| 45 | INIT_READY |
| 46 | }; |
| 47 | |
Vishal Verma | 24e3a7f | 2017-12-18 09:28:39 -0700 | [diff] [blame] | 48 | /* |
| 49 | * A log group represents one log 'lane', and consists of four log entries. |
| 50 | * Two of the four entries are valid entries, and the remaining two are |
| 51 | * padding. Due to an old bug in the padding location, we need to perform a |
| 52 | * test to determine the padding scheme being used, and use that scheme |
| 53 | * thereafter. |
| 54 | * |
| 55 | * In kernels prior to 4.15, 'log group' would have actual log entries at |
| 56 | * indices (0, 2) and padding at indices (1, 3), where as the correct/updated |
| 57 | * format has log entries at indices (0, 1) and padding at indices (2, 3). |
| 58 | * |
| 59 | * Old (pre 4.15) format: |
| 60 | * +-----------------+-----------------+ |
| 61 | * | ent[0] | ent[1] | |
| 62 | * | 16B | 16B | |
| 63 | * | lba/old/new/seq | pad | |
| 64 | * +-----------------------------------+ |
| 65 | * | ent[2] | ent[3] | |
| 66 | * | 16B | 16B | |
| 67 | * | lba/old/new/seq | pad | |
| 68 | * +-----------------+-----------------+ |
| 69 | * |
| 70 | * New format: |
| 71 | * +-----------------+-----------------+ |
| 72 | * | ent[0] | ent[1] | |
| 73 | * | 16B | 16B | |
| 74 | * | lba/old/new/seq | lba/old/new/seq | |
| 75 | * +-----------------------------------+ |
| 76 | * | ent[2] | ent[3] | |
| 77 | * | 16B | 16B | |
| 78 | * | pad | pad | |
| 79 | * +-----------------+-----------------+ |
| 80 | * |
| 81 | * We detect during start-up which format is in use, and set |
| 82 | * arena->log_index[(0, 1)] with the detected format. |
| 83 | */ |
| 84 | |
Vishal Verma | 5212e11 | 2015-06-25 04:20:32 -0400 | [diff] [blame] | 85 | struct log_entry { |
| 86 | __le32 lba; |
| 87 | __le32 old_map; |
| 88 | __le32 new_map; |
| 89 | __le32 seq; |
Vishal Verma | 24e3a7f | 2017-12-18 09:28:39 -0700 | [diff] [blame] | 90 | }; |
| 91 | |
| 92 | struct log_group { |
| 93 | struct log_entry ent[4]; |
Vishal Verma | 5212e11 | 2015-06-25 04:20:32 -0400 | [diff] [blame] | 94 | }; |
Dan Williams | 8c2f7e8 | 2015-06-25 04:20:04 -0400 | [diff] [blame] | 95 | |
| 96 | struct btt_sb { |
| 97 | u8 signature[BTT_SIG_LEN]; |
| 98 | u8 uuid[16]; |
| 99 | u8 parent_uuid[16]; |
| 100 | __le32 flags; |
| 101 | __le16 version_major; |
| 102 | __le16 version_minor; |
| 103 | __le32 external_lbasize; |
| 104 | __le32 external_nlba; |
| 105 | __le32 internal_lbasize; |
| 106 | __le32 internal_nlba; |
| 107 | __le32 nfree; |
| 108 | __le32 infosize; |
| 109 | __le64 nextoff; |
| 110 | __le64 dataoff; |
| 111 | __le64 mapoff; |
| 112 | __le64 logoff; |
| 113 | __le64 info2off; |
| 114 | u8 padding[3968]; |
| 115 | __le64 checksum; |
| 116 | }; |
| 117 | |
Vishal Verma | 5212e11 | 2015-06-25 04:20:32 -0400 | [diff] [blame] | 118 | struct free_entry { |
| 119 | u32 block; |
| 120 | u8 sub; |
| 121 | u8 seq; |
Vishal Verma | d9b83c7 | 2017-08-30 19:36:03 -0600 | [diff] [blame] | 122 | u8 has_err; |
Vishal Verma | 5212e11 | 2015-06-25 04:20:32 -0400 | [diff] [blame] | 123 | }; |
| 124 | |
| 125 | struct aligned_lock { |
| 126 | union { |
| 127 | spinlock_t lock; |
| 128 | u8 cacheline_padding[L1_CACHE_BYTES]; |
| 129 | }; |
| 130 | }; |
| 131 | |
| 132 | /** |
| 133 | * struct arena_info - handle for an arena |
| 134 | * @size: Size in bytes this arena occupies on the raw device. |
| 135 | * This includes arena metadata. |
| 136 | * @external_lba_start: The first external LBA in this arena. |
| 137 | * @internal_nlba: Number of internal blocks available in the arena |
| 138 | * including nfree reserved blocks |
| 139 | * @internal_lbasize: Internal and external lba sizes may be different as |
| 140 | * we can round up 'odd' external lbasizes such as 520B |
| 141 | * to be aligned. |
| 142 | * @external_nlba: Number of blocks contributed by the arena to the number |
| 143 | * reported to upper layers. (internal_nlba - nfree) |
| 144 | * @external_lbasize: LBA size as exposed to upper layers. |
| 145 | * @nfree: A reserve number of 'free' blocks that is used to |
| 146 | * handle incoming writes. |
| 147 | * @version_major: Metadata layout version major. |
| 148 | * @version_minor: Metadata layout version minor. |
Vishal Verma | 7589200 | 2017-08-30 19:36:01 -0600 | [diff] [blame] | 149 | * @sector_size: The Linux sector size - 512 or 4096 |
Vishal Verma | 5212e11 | 2015-06-25 04:20:32 -0400 | [diff] [blame] | 150 | * @nextoff: Offset in bytes to the start of the next arena. |
| 151 | * @infooff: Offset in bytes to the info block of this arena. |
| 152 | * @dataoff: Offset in bytes to the data area of this arena. |
| 153 | * @mapoff: Offset in bytes to the map area of this arena. |
| 154 | * @logoff: Offset in bytes to the log area of this arena. |
| 155 | * @info2off: Offset in bytes to the backup info block of this arena. |
| 156 | * @freelist: Pointer to in-memory list of free blocks |
| 157 | * @rtt: Pointer to in-memory "Read Tracking Table" |
| 158 | * @map_locks: Spinlocks protecting concurrent map writes |
| 159 | * @nd_btt: Pointer to parent nd_btt structure. |
| 160 | * @list: List head for list of arenas |
| 161 | * @debugfs_dir: Debugfs dentry |
| 162 | * @flags: Arena flags - may signify error states. |
Vishal Verma | 13b7954 | 2017-12-14 17:26:13 -0700 | [diff] [blame] | 163 | * @err_lock: Mutex for synchronizing error clearing. |
Vishal Verma | 24e3a7f | 2017-12-18 09:28:39 -0700 | [diff] [blame] | 164 | * @log_index: Indices of the valid log entries in a log_group |
Vishal Verma | 5212e11 | 2015-06-25 04:20:32 -0400 | [diff] [blame] | 165 | * |
| 166 | * arena_info is a per-arena handle. Once an arena is narrowed down for an |
| 167 | * IO, this struct is passed around for the duration of the IO. |
| 168 | */ |
| 169 | struct arena_info { |
| 170 | u64 size; /* Total bytes for this arena */ |
| 171 | u64 external_lba_start; |
| 172 | u32 internal_nlba; |
| 173 | u32 internal_lbasize; |
| 174 | u32 external_nlba; |
| 175 | u32 external_lbasize; |
| 176 | u32 nfree; |
| 177 | u16 version_major; |
| 178 | u16 version_minor; |
Vishal Verma | 7589200 | 2017-08-30 19:36:01 -0600 | [diff] [blame] | 179 | u32 sector_size; |
Vishal Verma | 5212e11 | 2015-06-25 04:20:32 -0400 | [diff] [blame] | 180 | /* Byte offsets to the different on-media structures */ |
| 181 | u64 nextoff; |
| 182 | u64 infooff; |
| 183 | u64 dataoff; |
| 184 | u64 mapoff; |
| 185 | u64 logoff; |
| 186 | u64 info2off; |
| 187 | /* Pointers to other in-memory structures for this arena */ |
| 188 | struct free_entry *freelist; |
| 189 | u32 *rtt; |
| 190 | struct aligned_lock *map_locks; |
| 191 | struct nd_btt *nd_btt; |
| 192 | struct list_head list; |
| 193 | struct dentry *debugfs_dir; |
| 194 | /* Arena flags */ |
| 195 | u32 flags; |
Vishal Verma | d9b83c7 | 2017-08-30 19:36:03 -0600 | [diff] [blame] | 196 | struct mutex err_lock; |
Vishal Verma | 24e3a7f | 2017-12-18 09:28:39 -0700 | [diff] [blame] | 197 | int log_index[2]; |
Vishal Verma | 5212e11 | 2015-06-25 04:20:32 -0400 | [diff] [blame] | 198 | }; |
| 199 | |
| 200 | /** |
| 201 | * struct btt - handle for a BTT instance |
| 202 | * @btt_disk: Pointer to the gendisk for BTT device |
| 203 | * @btt_queue: Pointer to the request queue for the BTT device |
| 204 | * @arena_list: Head of the list of arenas |
| 205 | * @debugfs_dir: Debugfs dentry |
| 206 | * @nd_btt: Parent nd_btt struct |
| 207 | * @nlba: Number of logical blocks exposed to the upper layers |
| 208 | * after removing the amount of space needed by metadata |
| 209 | * @rawsize: Total size in bytes of the available backing device |
| 210 | * @lbasize: LBA size as requested and presented to upper layers. |
| 211 | * This is sector_size + size of any metadata. |
| 212 | * @sector_size: The Linux sector size - 512 or 4096 |
| 213 | * @lanes: Per-lane spinlocks |
| 214 | * @init_lock: Mutex used for the BTT initialization |
| 215 | * @init_state: Flag describing the initialization state for the BTT |
| 216 | * @num_arenas: Number of arenas in the BTT instance |
Vishal Verma | 13b7954 | 2017-12-14 17:26:13 -0700 | [diff] [blame] | 217 | * @phys_bb: Pointer to the namespace's badblocks structure |
Vishal Verma | 5212e11 | 2015-06-25 04:20:32 -0400 | [diff] [blame] | 218 | */ |
| 219 | struct btt { |
| 220 | struct gendisk *btt_disk; |
| 221 | struct request_queue *btt_queue; |
| 222 | struct list_head arena_list; |
| 223 | struct dentry *debugfs_dir; |
| 224 | struct nd_btt *nd_btt; |
| 225 | u64 nlba; |
| 226 | unsigned long long rawsize; |
| 227 | u32 lbasize; |
| 228 | u32 sector_size; |
| 229 | struct nd_region *nd_region; |
| 230 | struct mutex init_lock; |
| 231 | int init_state; |
| 232 | int num_arenas; |
Vishal Verma | d9b83c7 | 2017-08-30 19:36:03 -0600 | [diff] [blame] | 233 | struct badblocks *phys_bb; |
Vishal Verma | 5212e11 | 2015-06-25 04:20:32 -0400 | [diff] [blame] | 234 | }; |
Vishal Verma | ab45e76 | 2015-07-29 14:58:08 -0600 | [diff] [blame] | 235 | |
| 236 | bool nd_btt_arena_is_valid(struct nd_btt *nd_btt, struct btt_sb *super); |
Vishal Verma | 14e4945 | 2017-06-28 14:25:00 -0600 | [diff] [blame] | 237 | int nd_btt_version(struct nd_btt *nd_btt, struct nd_namespace_common *ndns, |
| 238 | struct btt_sb *btt_sb); |
Vishal Verma | ab45e76 | 2015-07-29 14:58:08 -0600 | [diff] [blame] | 239 | |
Dan Williams | 8c2f7e8 | 2015-06-25 04:20:04 -0400 | [diff] [blame] | 240 | #endif |