blob: 2e258bee7db24e1e0eee6c45f8f7a7cf82f30324 [file] [log] [blame]
Thomas Gleixner2025cf92019-05-29 07:18:02 -07001/* SPDX-License-Identifier: GPL-2.0-only */
Dan Williams8c2f7e82015-06-25 04:20:04 -04002/*
3 * Block Translation Table library
4 * Copyright (c) 2014-2015, Intel Corporation.
Dan Williams8c2f7e82015-06-25 04:20:04 -04005 */
6
7#ifndef _LINUX_BTT_H
8#define _LINUX_BTT_H
9
Vishal Vermad9b83c72017-08-30 19:36:03 -060010#include <linux/badblocks.h>
Dan Williams8c2f7e82015-06-25 04:20:04 -040011#include <linux/types.h>
12
13#define BTT_SIG_LEN 16
14#define BTT_SIG "BTT_ARENA_INFO\0"
Vishal Verma5212e112015-06-25 04:20:32 -040015#define MAP_ENT_SIZE 4
16#define MAP_TRIM_SHIFT 31
17#define MAP_TRIM_MASK (1 << MAP_TRIM_SHIFT)
18#define MAP_ERR_SHIFT 30
19#define MAP_ERR_MASK (1 << MAP_ERR_SHIFT)
20#define MAP_LBA_MASK (~((1 << MAP_TRIM_SHIFT) | (1 << MAP_ERR_SHIFT)))
21#define MAP_ENT_NORMAL 0xC0000000
Vishal Verma24e3a7f2017-12-18 09:28:39 -070022#define LOG_GRP_SIZE sizeof(struct log_group)
Vishal Verma5212e112015-06-25 04:20:32 -040023#define LOG_ENT_SIZE sizeof(struct log_entry)
24#define ARENA_MIN_SIZE (1UL << 24) /* 16 MB */
25#define ARENA_MAX_SIZE (1ULL << 39) /* 512 GB */
26#define RTT_VALID (1UL << 31)
27#define RTT_INVALID 0
Vishal Verma5212e112015-06-25 04:20:32 -040028#define BTT_PG_SIZE 4096
29#define BTT_DEFAULT_NFREE ND_MAX_LANES
30#define LOG_SEQ_INIT 1
31
32#define IB_FLAG_ERROR 0x00000001
33#define IB_FLAG_ERROR_MASK 0x00000001
34
Vishal Verma0595d532017-08-30 19:35:59 -060035#define ent_lba(ent) (ent & MAP_LBA_MASK)
36#define ent_e_flag(ent) (!!(ent & MAP_ERR_MASK))
37#define ent_z_flag(ent) (!!(ent & MAP_TRIM_MASK))
Vishal Vermad9b83c72017-08-30 19:36:03 -060038#define set_e_flag(ent) (ent |= MAP_ERR_MASK)
Vishal Verma9dedc732019-02-27 17:06:27 -070039/* 'normal' is both e and z flags set */
40#define ent_normal(ent) (ent_e_flag(ent) && ent_z_flag(ent))
Vishal Verma0595d532017-08-30 19:35:59 -060041
Vishal Verma5212e112015-06-25 04:20:32 -040042enum btt_init_state {
43 INIT_UNCHECKED = 0,
44 INIT_NOTFOUND,
45 INIT_READY
46};
47
Vishal Verma24e3a7f2017-12-18 09:28:39 -070048/*
49 * A log group represents one log 'lane', and consists of four log entries.
50 * Two of the four entries are valid entries, and the remaining two are
51 * padding. Due to an old bug in the padding location, we need to perform a
52 * test to determine the padding scheme being used, and use that scheme
53 * thereafter.
54 *
55 * In kernels prior to 4.15, 'log group' would have actual log entries at
56 * indices (0, 2) and padding at indices (1, 3), where as the correct/updated
57 * format has log entries at indices (0, 1) and padding at indices (2, 3).
58 *
59 * Old (pre 4.15) format:
60 * +-----------------+-----------------+
61 * | ent[0] | ent[1] |
62 * | 16B | 16B |
63 * | lba/old/new/seq | pad |
64 * +-----------------------------------+
65 * | ent[2] | ent[3] |
66 * | 16B | 16B |
67 * | lba/old/new/seq | pad |
68 * +-----------------+-----------------+
69 *
70 * New format:
71 * +-----------------+-----------------+
72 * | ent[0] | ent[1] |
73 * | 16B | 16B |
74 * | lba/old/new/seq | lba/old/new/seq |
75 * +-----------------------------------+
76 * | ent[2] | ent[3] |
77 * | 16B | 16B |
78 * | pad | pad |
79 * +-----------------+-----------------+
80 *
81 * We detect during start-up which format is in use, and set
82 * arena->log_index[(0, 1)] with the detected format.
83 */
84
Vishal Verma5212e112015-06-25 04:20:32 -040085struct log_entry {
86 __le32 lba;
87 __le32 old_map;
88 __le32 new_map;
89 __le32 seq;
Vishal Verma24e3a7f2017-12-18 09:28:39 -070090};
91
92struct log_group {
93 struct log_entry ent[4];
Vishal Verma5212e112015-06-25 04:20:32 -040094};
Dan Williams8c2f7e82015-06-25 04:20:04 -040095
96struct btt_sb {
97 u8 signature[BTT_SIG_LEN];
98 u8 uuid[16];
99 u8 parent_uuid[16];
100 __le32 flags;
101 __le16 version_major;
102 __le16 version_minor;
103 __le32 external_lbasize;
104 __le32 external_nlba;
105 __le32 internal_lbasize;
106 __le32 internal_nlba;
107 __le32 nfree;
108 __le32 infosize;
109 __le64 nextoff;
110 __le64 dataoff;
111 __le64 mapoff;
112 __le64 logoff;
113 __le64 info2off;
114 u8 padding[3968];
115 __le64 checksum;
116};
117
Vishal Verma5212e112015-06-25 04:20:32 -0400118struct free_entry {
119 u32 block;
120 u8 sub;
121 u8 seq;
Vishal Vermad9b83c72017-08-30 19:36:03 -0600122 u8 has_err;
Vishal Verma5212e112015-06-25 04:20:32 -0400123};
124
125struct aligned_lock {
126 union {
127 spinlock_t lock;
128 u8 cacheline_padding[L1_CACHE_BYTES];
129 };
130};
131
132/**
133 * struct arena_info - handle for an arena
134 * @size: Size in bytes this arena occupies on the raw device.
135 * This includes arena metadata.
136 * @external_lba_start: The first external LBA in this arena.
137 * @internal_nlba: Number of internal blocks available in the arena
138 * including nfree reserved blocks
139 * @internal_lbasize: Internal and external lba sizes may be different as
140 * we can round up 'odd' external lbasizes such as 520B
141 * to be aligned.
142 * @external_nlba: Number of blocks contributed by the arena to the number
143 * reported to upper layers. (internal_nlba - nfree)
144 * @external_lbasize: LBA size as exposed to upper layers.
145 * @nfree: A reserve number of 'free' blocks that is used to
146 * handle incoming writes.
147 * @version_major: Metadata layout version major.
148 * @version_minor: Metadata layout version minor.
Vishal Verma75892002017-08-30 19:36:01 -0600149 * @sector_size: The Linux sector size - 512 or 4096
Vishal Verma5212e112015-06-25 04:20:32 -0400150 * @nextoff: Offset in bytes to the start of the next arena.
151 * @infooff: Offset in bytes to the info block of this arena.
152 * @dataoff: Offset in bytes to the data area of this arena.
153 * @mapoff: Offset in bytes to the map area of this arena.
154 * @logoff: Offset in bytes to the log area of this arena.
155 * @info2off: Offset in bytes to the backup info block of this arena.
156 * @freelist: Pointer to in-memory list of free blocks
157 * @rtt: Pointer to in-memory "Read Tracking Table"
158 * @map_locks: Spinlocks protecting concurrent map writes
159 * @nd_btt: Pointer to parent nd_btt structure.
160 * @list: List head for list of arenas
161 * @debugfs_dir: Debugfs dentry
162 * @flags: Arena flags - may signify error states.
Vishal Verma13b79542017-12-14 17:26:13 -0700163 * @err_lock: Mutex for synchronizing error clearing.
Vishal Verma24e3a7f2017-12-18 09:28:39 -0700164 * @log_index: Indices of the valid log entries in a log_group
Vishal Verma5212e112015-06-25 04:20:32 -0400165 *
166 * arena_info is a per-arena handle. Once an arena is narrowed down for an
167 * IO, this struct is passed around for the duration of the IO.
168 */
169struct arena_info {
170 u64 size; /* Total bytes for this arena */
171 u64 external_lba_start;
172 u32 internal_nlba;
173 u32 internal_lbasize;
174 u32 external_nlba;
175 u32 external_lbasize;
176 u32 nfree;
177 u16 version_major;
178 u16 version_minor;
Vishal Verma75892002017-08-30 19:36:01 -0600179 u32 sector_size;
Vishal Verma5212e112015-06-25 04:20:32 -0400180 /* Byte offsets to the different on-media structures */
181 u64 nextoff;
182 u64 infooff;
183 u64 dataoff;
184 u64 mapoff;
185 u64 logoff;
186 u64 info2off;
187 /* Pointers to other in-memory structures for this arena */
188 struct free_entry *freelist;
189 u32 *rtt;
190 struct aligned_lock *map_locks;
191 struct nd_btt *nd_btt;
192 struct list_head list;
193 struct dentry *debugfs_dir;
194 /* Arena flags */
195 u32 flags;
Vishal Vermad9b83c72017-08-30 19:36:03 -0600196 struct mutex err_lock;
Vishal Verma24e3a7f2017-12-18 09:28:39 -0700197 int log_index[2];
Vishal Verma5212e112015-06-25 04:20:32 -0400198};
199
200/**
201 * struct btt - handle for a BTT instance
202 * @btt_disk: Pointer to the gendisk for BTT device
203 * @btt_queue: Pointer to the request queue for the BTT device
204 * @arena_list: Head of the list of arenas
205 * @debugfs_dir: Debugfs dentry
206 * @nd_btt: Parent nd_btt struct
207 * @nlba: Number of logical blocks exposed to the upper layers
208 * after removing the amount of space needed by metadata
209 * @rawsize: Total size in bytes of the available backing device
210 * @lbasize: LBA size as requested and presented to upper layers.
211 * This is sector_size + size of any metadata.
212 * @sector_size: The Linux sector size - 512 or 4096
213 * @lanes: Per-lane spinlocks
214 * @init_lock: Mutex used for the BTT initialization
215 * @init_state: Flag describing the initialization state for the BTT
216 * @num_arenas: Number of arenas in the BTT instance
Vishal Verma13b79542017-12-14 17:26:13 -0700217 * @phys_bb: Pointer to the namespace's badblocks structure
Vishal Verma5212e112015-06-25 04:20:32 -0400218 */
219struct btt {
220 struct gendisk *btt_disk;
221 struct request_queue *btt_queue;
222 struct list_head arena_list;
223 struct dentry *debugfs_dir;
224 struct nd_btt *nd_btt;
225 u64 nlba;
226 unsigned long long rawsize;
227 u32 lbasize;
228 u32 sector_size;
229 struct nd_region *nd_region;
230 struct mutex init_lock;
231 int init_state;
232 int num_arenas;
Vishal Vermad9b83c72017-08-30 19:36:03 -0600233 struct badblocks *phys_bb;
Vishal Verma5212e112015-06-25 04:20:32 -0400234};
Vishal Vermaab45e762015-07-29 14:58:08 -0600235
236bool nd_btt_arena_is_valid(struct nd_btt *nd_btt, struct btt_sb *super);
Vishal Verma14e49452017-06-28 14:25:00 -0600237int nd_btt_version(struct nd_btt *nd_btt, struct nd_namespace_common *ndns,
238 struct btt_sb *btt_sb);
Vishal Vermaab45e762015-07-29 14:58:08 -0600239
Dan Williams8c2f7e82015-06-25 04:20:04 -0400240#endif