blob: e90ecb1796224a3be18fd2d0e420c011831a61c0 [file] [log] [blame]
Vishal Verma5212e112015-06-25 04:20:32 -04001/*
2 * Block Translation Table
3 * Copyright (c) 2014-2015, Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 */
14#include <linux/highmem.h>
15#include <linux/debugfs.h>
16#include <linux/blkdev.h>
17#include <linux/module.h>
18#include <linux/device.h>
19#include <linux/mutex.h>
20#include <linux/hdreg.h>
21#include <linux/genhd.h>
22#include <linux/sizes.h>
23#include <linux/ndctl.h>
24#include <linux/fs.h>
25#include <linux/nd.h>
26#include "btt.h"
27#include "nd.h"
28
29enum log_ent_request {
30 LOG_NEW_ENT = 0,
31 LOG_OLD_ENT
32};
33
Vishal Verma5212e112015-06-25 04:20:32 -040034static int arena_read_bytes(struct arena_info *arena, resource_size_t offset,
35 void *buf, size_t n)
36{
37 struct nd_btt *nd_btt = arena->nd_btt;
38 struct nd_namespace_common *ndns = nd_btt->ndns;
39
40 /* arena offsets are 4K from the base of the device */
41 offset += SZ_4K;
42 return nvdimm_read_bytes(ndns, offset, buf, n);
43}
44
45static int arena_write_bytes(struct arena_info *arena, resource_size_t offset,
46 void *buf, size_t n)
47{
48 struct nd_btt *nd_btt = arena->nd_btt;
49 struct nd_namespace_common *ndns = nd_btt->ndns;
50
51 /* arena offsets are 4K from the base of the device */
52 offset += SZ_4K;
53 return nvdimm_write_bytes(ndns, offset, buf, n);
54}
55
56static int btt_info_write(struct arena_info *arena, struct btt_sb *super)
57{
58 int ret;
59
60 ret = arena_write_bytes(arena, arena->info2off, super,
61 sizeof(struct btt_sb));
62 if (ret)
63 return ret;
64
65 return arena_write_bytes(arena, arena->infooff, super,
66 sizeof(struct btt_sb));
67}
68
69static int btt_info_read(struct arena_info *arena, struct btt_sb *super)
70{
71 WARN_ON(!super);
72 return arena_read_bytes(arena, arena->infooff, super,
73 sizeof(struct btt_sb));
74}
75
76/*
77 * 'raw' version of btt_map write
78 * Assumptions:
79 * mapping is in little-endian
80 * mapping contains 'E' and 'Z' flags as desired
81 */
82static int __btt_map_write(struct arena_info *arena, u32 lba, __le32 mapping)
83{
84 u64 ns_off = arena->mapoff + (lba * MAP_ENT_SIZE);
85
86 WARN_ON(lba >= arena->external_nlba);
87 return arena_write_bytes(arena, ns_off, &mapping, MAP_ENT_SIZE);
88}
89
90static int btt_map_write(struct arena_info *arena, u32 lba, u32 mapping,
91 u32 z_flag, u32 e_flag)
92{
93 u32 ze;
94 __le32 mapping_le;
95
96 /*
97 * This 'mapping' is supposed to be just the LBA mapping, without
98 * any flags set, so strip the flag bits.
99 */
100 mapping &= MAP_LBA_MASK;
101
102 ze = (z_flag << 1) + e_flag;
103 switch (ze) {
104 case 0:
105 /*
106 * We want to set neither of the Z or E flags, and
107 * in the actual layout, this means setting the bit
108 * positions of both to '1' to indicate a 'normal'
109 * map entry
110 */
111 mapping |= MAP_ENT_NORMAL;
112 break;
113 case 1:
114 mapping |= (1 << MAP_ERR_SHIFT);
115 break;
116 case 2:
117 mapping |= (1 << MAP_TRIM_SHIFT);
118 break;
119 default:
120 /*
121 * The case where Z and E are both sent in as '1' could be
122 * construed as a valid 'normal' case, but we decide not to,
123 * to avoid confusion
124 */
125 WARN_ONCE(1, "Invalid use of Z and E flags\n");
126 return -EIO;
127 }
128
129 mapping_le = cpu_to_le32(mapping);
130 return __btt_map_write(arena, lba, mapping_le);
131}
132
133static int btt_map_read(struct arena_info *arena, u32 lba, u32 *mapping,
134 int *trim, int *error)
135{
136 int ret;
137 __le32 in;
138 u32 raw_mapping, postmap, ze, z_flag, e_flag;
139 u64 ns_off = arena->mapoff + (lba * MAP_ENT_SIZE);
140
141 WARN_ON(lba >= arena->external_nlba);
142
143 ret = arena_read_bytes(arena, ns_off, &in, MAP_ENT_SIZE);
144 if (ret)
145 return ret;
146
147 raw_mapping = le32_to_cpu(in);
148
149 z_flag = (raw_mapping & MAP_TRIM_MASK) >> MAP_TRIM_SHIFT;
150 e_flag = (raw_mapping & MAP_ERR_MASK) >> MAP_ERR_SHIFT;
151 ze = (z_flag << 1) + e_flag;
152 postmap = raw_mapping & MAP_LBA_MASK;
153
154 /* Reuse the {z,e}_flag variables for *trim and *error */
155 z_flag = 0;
156 e_flag = 0;
157
158 switch (ze) {
159 case 0:
160 /* Initial state. Return postmap = premap */
161 *mapping = lba;
162 break;
163 case 1:
164 *mapping = postmap;
165 e_flag = 1;
166 break;
167 case 2:
168 *mapping = postmap;
169 z_flag = 1;
170 break;
171 case 3:
172 *mapping = postmap;
173 break;
174 default:
175 return -EIO;
176 }
177
178 if (trim)
179 *trim = z_flag;
180 if (error)
181 *error = e_flag;
182
183 return ret;
184}
185
Vishal Vermac9ca9d92017-12-18 09:28:39 -0700186static int btt_log_group_read(struct arena_info *arena, u32 lane,
187 struct log_group *log)
Vishal Verma5212e112015-06-25 04:20:32 -0400188{
Vishal Vermac9ca9d92017-12-18 09:28:39 -0700189 WARN_ON(!log);
Vishal Verma5212e112015-06-25 04:20:32 -0400190 return arena_read_bytes(arena,
Vishal Vermac9ca9d92017-12-18 09:28:39 -0700191 arena->logoff + (lane * LOG_GRP_SIZE), log,
192 LOG_GRP_SIZE);
Vishal Verma5212e112015-06-25 04:20:32 -0400193}
194
195static struct dentry *debugfs_root;
196
197static void arena_debugfs_init(struct arena_info *a, struct dentry *parent,
198 int idx)
199{
200 char dirname[32];
201 struct dentry *d;
202
203 /* If for some reason, parent bttN was not created, exit */
204 if (!parent)
205 return;
206
207 snprintf(dirname, 32, "arena%d", idx);
208 d = debugfs_create_dir(dirname, parent);
209 if (IS_ERR_OR_NULL(d))
210 return;
211 a->debugfs_dir = d;
212
213 debugfs_create_x64("size", S_IRUGO, d, &a->size);
214 debugfs_create_x64("external_lba_start", S_IRUGO, d,
215 &a->external_lba_start);
216 debugfs_create_x32("internal_nlba", S_IRUGO, d, &a->internal_nlba);
217 debugfs_create_u32("internal_lbasize", S_IRUGO, d,
218 &a->internal_lbasize);
219 debugfs_create_x32("external_nlba", S_IRUGO, d, &a->external_nlba);
220 debugfs_create_u32("external_lbasize", S_IRUGO, d,
221 &a->external_lbasize);
222 debugfs_create_u32("nfree", S_IRUGO, d, &a->nfree);
223 debugfs_create_u16("version_major", S_IRUGO, d, &a->version_major);
224 debugfs_create_u16("version_minor", S_IRUGO, d, &a->version_minor);
225 debugfs_create_x64("nextoff", S_IRUGO, d, &a->nextoff);
226 debugfs_create_x64("infooff", S_IRUGO, d, &a->infooff);
227 debugfs_create_x64("dataoff", S_IRUGO, d, &a->dataoff);
228 debugfs_create_x64("mapoff", S_IRUGO, d, &a->mapoff);
229 debugfs_create_x64("logoff", S_IRUGO, d, &a->logoff);
230 debugfs_create_x64("info2off", S_IRUGO, d, &a->info2off);
231 debugfs_create_x32("flags", S_IRUGO, d, &a->flags);
Vishal Vermac9ca9d92017-12-18 09:28:39 -0700232 debugfs_create_u32("log_index_0", S_IRUGO, d, &a->log_index[0]);
233 debugfs_create_u32("log_index_1", S_IRUGO, d, &a->log_index[1]);
Vishal Verma5212e112015-06-25 04:20:32 -0400234}
235
236static void btt_debugfs_init(struct btt *btt)
237{
238 int i = 0;
239 struct arena_info *arena;
240
241 btt->debugfs_dir = debugfs_create_dir(dev_name(&btt->nd_btt->dev),
242 debugfs_root);
243 if (IS_ERR_OR_NULL(btt->debugfs_dir))
244 return;
245
246 list_for_each_entry(arena, &btt->arena_list, list) {
247 arena_debugfs_init(arena, btt->debugfs_dir, i);
248 i++;
249 }
250}
251
Vishal Vermac9ca9d92017-12-18 09:28:39 -0700252static u32 log_seq(struct log_group *log, int log_idx)
253{
254 return le32_to_cpu(log->ent[log_idx].seq);
255}
256
Vishal Verma5212e112015-06-25 04:20:32 -0400257/*
258 * This function accepts two log entries, and uses the
259 * sequence number to find the 'older' entry.
260 * It also updates the sequence number in this old entry to
261 * make it the 'new' one if the mark_flag is set.
262 * Finally, it returns which of the entries was the older one.
263 *
264 * TODO The logic feels a bit kludge-y. make it better..
265 */
Vishal Vermac9ca9d92017-12-18 09:28:39 -0700266static int btt_log_get_old(struct arena_info *a, struct log_group *log)
Vishal Verma5212e112015-06-25 04:20:32 -0400267{
Vishal Vermac9ca9d92017-12-18 09:28:39 -0700268 int idx0 = a->log_index[0];
269 int idx1 = a->log_index[1];
Vishal Verma5212e112015-06-25 04:20:32 -0400270 int old;
271
272 /*
273 * the first ever time this is seen, the entry goes into [0]
274 * the next time, the following logic works out to put this
275 * (next) entry into [1]
276 */
Vishal Vermac9ca9d92017-12-18 09:28:39 -0700277 if (log_seq(log, idx0) == 0) {
278 log->ent[idx0].seq = cpu_to_le32(1);
Vishal Verma5212e112015-06-25 04:20:32 -0400279 return 0;
280 }
281
Vishal Vermac9ca9d92017-12-18 09:28:39 -0700282 if (log_seq(log, idx0) == log_seq(log, idx1))
Vishal Verma5212e112015-06-25 04:20:32 -0400283 return -EINVAL;
Vishal Vermac9ca9d92017-12-18 09:28:39 -0700284 if (log_seq(log, idx0) + log_seq(log, idx1) > 5)
Vishal Verma5212e112015-06-25 04:20:32 -0400285 return -EINVAL;
286
Vishal Vermac9ca9d92017-12-18 09:28:39 -0700287 if (log_seq(log, idx0) < log_seq(log, idx1)) {
288 if ((log_seq(log, idx1) - log_seq(log, idx0)) == 1)
Vishal Verma5212e112015-06-25 04:20:32 -0400289 old = 0;
290 else
291 old = 1;
292 } else {
Vishal Vermac9ca9d92017-12-18 09:28:39 -0700293 if ((log_seq(log, idx0) - log_seq(log, idx1)) == 1)
Vishal Verma5212e112015-06-25 04:20:32 -0400294 old = 1;
295 else
296 old = 0;
297 }
298
299 return old;
300}
301
302static struct device *to_dev(struct arena_info *arena)
303{
304 return &arena->nd_btt->dev;
305}
306
307/*
308 * This function copies the desired (old/new) log entry into ent if
309 * it is not NULL. It returns the sub-slot number (0 or 1)
310 * where the desired log entry was found. Negative return values
311 * indicate errors.
312 */
313static int btt_log_read(struct arena_info *arena, u32 lane,
314 struct log_entry *ent, int old_flag)
315{
316 int ret;
317 int old_ent, ret_ent;
Vishal Vermac9ca9d92017-12-18 09:28:39 -0700318 struct log_group log;
Vishal Verma5212e112015-06-25 04:20:32 -0400319
Vishal Vermac9ca9d92017-12-18 09:28:39 -0700320 ret = btt_log_group_read(arena, lane, &log);
Vishal Verma5212e112015-06-25 04:20:32 -0400321 if (ret)
322 return -EIO;
323
Vishal Vermac9ca9d92017-12-18 09:28:39 -0700324 old_ent = btt_log_get_old(arena, &log);
Vishal Verma5212e112015-06-25 04:20:32 -0400325 if (old_ent < 0 || old_ent > 1) {
326 dev_info(to_dev(arena),
327 "log corruption (%d): lane %d seq [%d, %d]\n",
Vishal Vermac9ca9d92017-12-18 09:28:39 -0700328 old_ent, lane, log.ent[arena->log_index[0]].seq,
329 log.ent[arena->log_index[1]].seq);
Vishal Verma5212e112015-06-25 04:20:32 -0400330 /* TODO set error state? */
331 return -EIO;
332 }
333
334 ret_ent = (old_flag ? old_ent : (1 - old_ent));
335
336 if (ent != NULL)
Vishal Vermac9ca9d92017-12-18 09:28:39 -0700337 memcpy(ent, &log.ent[arena->log_index[ret_ent]], LOG_ENT_SIZE);
Vishal Verma5212e112015-06-25 04:20:32 -0400338
339 return ret_ent;
340}
341
342/*
343 * This function commits a log entry to media
344 * It does _not_ prepare the freelist entry for the next write
345 * btt_flog_write is the wrapper for updating the freelist elements
346 */
347static int __btt_log_write(struct arena_info *arena, u32 lane,
348 u32 sub, struct log_entry *ent)
349{
350 int ret;
Vishal Vermac9ca9d92017-12-18 09:28:39 -0700351 u32 group_slot = arena->log_index[sub];
352 unsigned int log_half = LOG_ENT_SIZE / 2;
Vishal Verma5212e112015-06-25 04:20:32 -0400353 void *src = ent;
Vishal Vermac9ca9d92017-12-18 09:28:39 -0700354 u64 ns_off;
Vishal Verma5212e112015-06-25 04:20:32 -0400355
Vishal Vermac9ca9d92017-12-18 09:28:39 -0700356 ns_off = arena->logoff + (lane * LOG_GRP_SIZE) +
357 (group_slot * LOG_ENT_SIZE);
Vishal Verma5212e112015-06-25 04:20:32 -0400358 /* split the 16B write into atomic, durable halves */
359 ret = arena_write_bytes(arena, ns_off, src, log_half);
360 if (ret)
361 return ret;
362
363 ns_off += log_half;
364 src += log_half;
365 return arena_write_bytes(arena, ns_off, src, log_half);
366}
367
368static int btt_flog_write(struct arena_info *arena, u32 lane, u32 sub,
369 struct log_entry *ent)
370{
371 int ret;
372
373 ret = __btt_log_write(arena, lane, sub, ent);
374 if (ret)
375 return ret;
376
377 /* prepare the next free entry */
378 arena->freelist[lane].sub = 1 - arena->freelist[lane].sub;
379 if (++(arena->freelist[lane].seq) == 4)
380 arena->freelist[lane].seq = 1;
381 arena->freelist[lane].block = le32_to_cpu(ent->old_map);
382
383 return ret;
384}
385
386/*
387 * This function initializes the BTT map to the initial state, which is
388 * all-zeroes, and indicates an identity mapping
389 */
390static int btt_map_init(struct arena_info *arena)
391{
392 int ret = -EINVAL;
393 void *zerobuf;
394 size_t offset = 0;
395 size_t chunk_size = SZ_2M;
396 size_t mapsize = arena->logoff - arena->mapoff;
397
398 zerobuf = kzalloc(chunk_size, GFP_KERNEL);
399 if (!zerobuf)
400 return -ENOMEM;
401
402 while (mapsize) {
403 size_t size = min(mapsize, chunk_size);
404
405 ret = arena_write_bytes(arena, arena->mapoff + offset, zerobuf,
406 size);
407 if (ret)
408 goto free;
409
410 offset += size;
411 mapsize -= size;
412 cond_resched();
413 }
414
415 free:
416 kfree(zerobuf);
417 return ret;
418}
419
420/*
421 * This function initializes the BTT log with 'fake' entries pointing
422 * to the initial reserved set of blocks as being free
423 */
424static int btt_log_init(struct arena_info *arena)
425{
426 int ret;
427 u32 i;
Vishal Vermac9ca9d92017-12-18 09:28:39 -0700428 struct log_entry ent, zerolog;
Vishal Verma5212e112015-06-25 04:20:32 -0400429
430 memset(&zerolog, 0, sizeof(zerolog));
431
432 for (i = 0; i < arena->nfree; i++) {
Vishal Vermac9ca9d92017-12-18 09:28:39 -0700433 ent.lba = cpu_to_le32(i);
434 ent.old_map = cpu_to_le32(arena->external_nlba + i);
435 ent.new_map = cpu_to_le32(arena->external_nlba + i);
436 ent.seq = cpu_to_le32(LOG_SEQ_INIT);
437 ret = __btt_log_write(arena, i, 0, &ent);
Vishal Verma5212e112015-06-25 04:20:32 -0400438 if (ret)
439 return ret;
440 ret = __btt_log_write(arena, i, 1, &zerolog);
441 if (ret)
442 return ret;
443 }
444
445 return 0;
446}
447
448static int btt_freelist_init(struct arena_info *arena)
449{
Vishal Vermada9de002019-02-27 17:06:26 -0700450 int new, ret;
Vishal Verma5212e112015-06-25 04:20:32 -0400451 u32 i, map_entry;
Vishal Vermada9de002019-02-27 17:06:26 -0700452 struct log_entry log_new;
Vishal Verma5212e112015-06-25 04:20:32 -0400453
454 arena->freelist = kcalloc(arena->nfree, sizeof(struct free_entry),
455 GFP_KERNEL);
456 if (!arena->freelist)
457 return -ENOMEM;
458
459 for (i = 0; i < arena->nfree; i++) {
Vishal Verma5212e112015-06-25 04:20:32 -0400460 new = btt_log_read(arena, i, &log_new, LOG_NEW_ENT);
461 if (new < 0)
462 return new;
463
464 /* sub points to the next one to be overwritten */
465 arena->freelist[i].sub = 1 - new;
466 arena->freelist[i].seq = nd_inc_seq(le32_to_cpu(log_new.seq));
467 arena->freelist[i].block = le32_to_cpu(log_new.old_map);
468
469 /* This implies a newly created or untouched flog entry */
470 if (log_new.old_map == log_new.new_map)
471 continue;
472
473 /* Check if map recovery is needed */
474 ret = btt_map_read(arena, le32_to_cpu(log_new.lba), &map_entry,
475 NULL, NULL);
476 if (ret)
477 return ret;
478 if ((le32_to_cpu(log_new.new_map) != map_entry) &&
479 (le32_to_cpu(log_new.old_map) == map_entry)) {
480 /*
481 * Last transaction wrote the flog, but wasn't able
482 * to complete the map write. So fix up the map.
483 */
484 ret = btt_map_write(arena, le32_to_cpu(log_new.lba),
485 le32_to_cpu(log_new.new_map), 0, 0);
486 if (ret)
487 return ret;
488 }
489
490 }
491
492 return 0;
493}
494
Vishal Vermac9ca9d92017-12-18 09:28:39 -0700495static bool ent_is_padding(struct log_entry *ent)
496{
497 return (ent->lba == 0) && (ent->old_map == 0) && (ent->new_map == 0)
498 && (ent->seq == 0);
499}
500
501/*
502 * Detecting valid log indices: We read a log group (see the comments in btt.h
503 * for a description of a 'log_group' and its 'slots'), and iterate over its
504 * four slots. We expect that a padding slot will be all-zeroes, and use this
505 * to detect a padding slot vs. an actual entry.
506 *
507 * If a log_group is in the initial state, i.e. hasn't been used since the
508 * creation of this BTT layout, it will have three of the four slots with
509 * zeroes. We skip over these log_groups for the detection of log_index. If
510 * all log_groups are in the initial state (i.e. the BTT has never been
511 * written to), it is safe to assume the 'new format' of log entries in slots
512 * (0, 1).
513 */
514static int log_set_indices(struct arena_info *arena)
515{
516 bool idx_set = false, initial_state = true;
517 int ret, log_index[2] = {-1, -1};
518 u32 i, j, next_idx = 0;
519 struct log_group log;
520 u32 pad_count = 0;
521
522 for (i = 0; i < arena->nfree; i++) {
523 ret = btt_log_group_read(arena, i, &log);
524 if (ret < 0)
525 return ret;
526
527 for (j = 0; j < 4; j++) {
528 if (!idx_set) {
529 if (ent_is_padding(&log.ent[j])) {
530 pad_count++;
531 continue;
532 } else {
533 /* Skip if index has been recorded */
534 if ((next_idx == 1) &&
535 (j == log_index[0]))
536 continue;
537 /* valid entry, record index */
538 log_index[next_idx] = j;
539 next_idx++;
540 }
541 if (next_idx == 2) {
542 /* two valid entries found */
543 idx_set = true;
544 } else if (next_idx > 2) {
545 /* too many valid indices */
546 return -ENXIO;
547 }
548 } else {
549 /*
550 * once the indices have been set, just verify
551 * that all subsequent log groups are either in
552 * their initial state or follow the same
553 * indices.
554 */
555 if (j == log_index[0]) {
556 /* entry must be 'valid' */
557 if (ent_is_padding(&log.ent[j]))
558 return -ENXIO;
559 } else if (j == log_index[1]) {
560 ;
561 /*
562 * log_index[1] can be padding if the
563 * lane never got used and it is still
564 * in the initial state (three 'padding'
565 * entries)
566 */
567 } else {
568 /* entry must be invalid (padding) */
569 if (!ent_is_padding(&log.ent[j]))
570 return -ENXIO;
571 }
572 }
573 }
574 /*
575 * If any of the log_groups have more than one valid,
576 * non-padding entry, then the we are no longer in the
577 * initial_state
578 */
579 if (pad_count < 3)
580 initial_state = false;
581 pad_count = 0;
582 }
583
584 if (!initial_state && !idx_set)
585 return -ENXIO;
586
587 /*
588 * If all the entries in the log were in the initial state,
589 * assume new padding scheme
590 */
591 if (initial_state)
592 log_index[1] = 1;
593
594 /*
595 * Only allow the known permutations of log/padding indices,
596 * i.e. (0, 1), and (0, 2)
597 */
598 if ((log_index[0] == 0) && ((log_index[1] == 1) || (log_index[1] == 2)))
599 ; /* known index possibilities */
600 else {
601 dev_err(to_dev(arena), "Found an unknown padding scheme\n");
602 return -ENXIO;
603 }
604
605 arena->log_index[0] = log_index[0];
606 arena->log_index[1] = log_index[1];
607 dev_dbg(to_dev(arena), "log_index_0 = %d\n", log_index[0]);
608 dev_dbg(to_dev(arena), "log_index_1 = %d\n", log_index[1]);
609 return 0;
610}
611
Vishal Verma5212e112015-06-25 04:20:32 -0400612static int btt_rtt_init(struct arena_info *arena)
613{
614 arena->rtt = kcalloc(arena->nfree, sizeof(u32), GFP_KERNEL);
615 if (arena->rtt == NULL)
616 return -ENOMEM;
617
618 return 0;
619}
620
621static int btt_maplocks_init(struct arena_info *arena)
622{
623 u32 i;
624
625 arena->map_locks = kcalloc(arena->nfree, sizeof(struct aligned_lock),
626 GFP_KERNEL);
627 if (!arena->map_locks)
628 return -ENOMEM;
629
630 for (i = 0; i < arena->nfree; i++)
631 spin_lock_init(&arena->map_locks[i].lock);
632
633 return 0;
634}
635
636static struct arena_info *alloc_arena(struct btt *btt, size_t size,
637 size_t start, size_t arena_off)
638{
639 struct arena_info *arena;
640 u64 logsize, mapsize, datasize;
641 u64 available = size;
642
643 arena = kzalloc(sizeof(struct arena_info), GFP_KERNEL);
644 if (!arena)
645 return NULL;
646 arena->nd_btt = btt->nd_btt;
647
648 if (!size)
649 return arena;
650
651 arena->size = size;
652 arena->external_lba_start = start;
653 arena->external_lbasize = btt->lbasize;
654 arena->internal_lbasize = roundup(arena->external_lbasize,
655 INT_LBASIZE_ALIGNMENT);
656 arena->nfree = BTT_DEFAULT_NFREE;
657 arena->version_major = 1;
658 arena->version_minor = 1;
659
660 if (available % BTT_PG_SIZE)
661 available -= (available % BTT_PG_SIZE);
662
663 /* Two pages are reserved for the super block and its copy */
664 available -= 2 * BTT_PG_SIZE;
665
666 /* The log takes a fixed amount of space based on nfree */
Vishal Vermac9ca9d92017-12-18 09:28:39 -0700667 logsize = roundup(arena->nfree * LOG_GRP_SIZE, BTT_PG_SIZE);
Vishal Verma5212e112015-06-25 04:20:32 -0400668 available -= logsize;
669
670 /* Calculate optimal split between map and data area */
671 arena->internal_nlba = div_u64(available - BTT_PG_SIZE,
672 arena->internal_lbasize + MAP_ENT_SIZE);
673 arena->external_nlba = arena->internal_nlba - arena->nfree;
674
675 mapsize = roundup((arena->external_nlba * MAP_ENT_SIZE), BTT_PG_SIZE);
676 datasize = available - mapsize;
677
678 /* 'Absolute' values, relative to start of storage space */
679 arena->infooff = arena_off;
680 arena->dataoff = arena->infooff + BTT_PG_SIZE;
681 arena->mapoff = arena->dataoff + datasize;
682 arena->logoff = arena->mapoff + mapsize;
683 arena->info2off = arena->logoff + logsize;
Vishal Vermac9ca9d92017-12-18 09:28:39 -0700684
685 /* Default log indices are (0,1) */
686 arena->log_index[0] = 0;
687 arena->log_index[1] = 1;
Vishal Verma5212e112015-06-25 04:20:32 -0400688 return arena;
689}
690
691static void free_arenas(struct btt *btt)
692{
693 struct arena_info *arena, *next;
694
695 list_for_each_entry_safe(arena, next, &btt->arena_list, list) {
696 list_del(&arena->list);
697 kfree(arena->rtt);
698 kfree(arena->map_locks);
699 kfree(arena->freelist);
700 debugfs_remove_recursive(arena->debugfs_dir);
701 kfree(arena);
702 }
703}
704
705/*
Vishal Verma5212e112015-06-25 04:20:32 -0400706 * This function reads an existing valid btt superblock and
707 * populates the corresponding arena_info struct
708 */
709static void parse_arena_meta(struct arena_info *arena, struct btt_sb *super,
710 u64 arena_off)
711{
712 arena->internal_nlba = le32_to_cpu(super->internal_nlba);
713 arena->internal_lbasize = le32_to_cpu(super->internal_lbasize);
714 arena->external_nlba = le32_to_cpu(super->external_nlba);
715 arena->external_lbasize = le32_to_cpu(super->external_lbasize);
716 arena->nfree = le32_to_cpu(super->nfree);
717 arena->version_major = le16_to_cpu(super->version_major);
718 arena->version_minor = le16_to_cpu(super->version_minor);
719
720 arena->nextoff = (super->nextoff == 0) ? 0 : (arena_off +
721 le64_to_cpu(super->nextoff));
722 arena->infooff = arena_off;
723 arena->dataoff = arena_off + le64_to_cpu(super->dataoff);
724 arena->mapoff = arena_off + le64_to_cpu(super->mapoff);
725 arena->logoff = arena_off + le64_to_cpu(super->logoff);
726 arena->info2off = arena_off + le64_to_cpu(super->info2off);
727
Dan Williams5e329402015-07-11 10:02:46 -0400728 arena->size = (le64_to_cpu(super->nextoff) > 0)
729 ? (le64_to_cpu(super->nextoff))
730 : (arena->info2off - arena->infooff + BTT_PG_SIZE);
Vishal Verma5212e112015-06-25 04:20:32 -0400731
732 arena->flags = le32_to_cpu(super->flags);
733}
734
735static int discover_arenas(struct btt *btt)
736{
737 int ret = 0;
738 struct arena_info *arena;
739 struct btt_sb *super;
740 size_t remaining = btt->rawsize;
741 u64 cur_nlba = 0;
742 size_t cur_off = 0;
743 int num_arenas = 0;
744
745 super = kzalloc(sizeof(*super), GFP_KERNEL);
746 if (!super)
747 return -ENOMEM;
748
749 while (remaining) {
750 /* Alloc memory for arena */
751 arena = alloc_arena(btt, 0, 0, 0);
752 if (!arena) {
753 ret = -ENOMEM;
754 goto out_super;
755 }
756
757 arena->infooff = cur_off;
758 ret = btt_info_read(arena, super);
759 if (ret)
760 goto out;
761
Vishal Vermaab45e762015-07-29 14:58:08 -0600762 if (!nd_btt_arena_is_valid(btt->nd_btt, super)) {
Vishal Verma5212e112015-06-25 04:20:32 -0400763 if (remaining == btt->rawsize) {
764 btt->init_state = INIT_NOTFOUND;
765 dev_info(to_dev(arena), "No existing arenas\n");
766 goto out;
767 } else {
768 dev_info(to_dev(arena),
769 "Found corrupted metadata!\n");
770 ret = -ENODEV;
771 goto out;
772 }
773 }
774
775 arena->external_lba_start = cur_nlba;
776 parse_arena_meta(arena, super, cur_off);
777
Vishal Vermac9ca9d92017-12-18 09:28:39 -0700778 ret = log_set_indices(arena);
779 if (ret) {
780 dev_err(to_dev(arena),
781 "Unable to deduce log/padding indices\n");
782 goto out;
783 }
784
Vishal Verma5212e112015-06-25 04:20:32 -0400785 ret = btt_freelist_init(arena);
786 if (ret)
787 goto out;
788
789 ret = btt_rtt_init(arena);
790 if (ret)
791 goto out;
792
793 ret = btt_maplocks_init(arena);
794 if (ret)
795 goto out;
796
797 list_add_tail(&arena->list, &btt->arena_list);
798
799 remaining -= arena->size;
800 cur_off += arena->size;
801 cur_nlba += arena->external_nlba;
802 num_arenas++;
803
804 if (arena->nextoff == 0)
805 break;
806 }
807 btt->num_arenas = num_arenas;
808 btt->nlba = cur_nlba;
809 btt->init_state = INIT_READY;
810
811 kfree(super);
812 return ret;
813
814 out:
815 kfree(arena);
816 free_arenas(btt);
817 out_super:
818 kfree(super);
819 return ret;
820}
821
822static int create_arenas(struct btt *btt)
823{
824 size_t remaining = btt->rawsize;
825 size_t cur_off = 0;
826
827 while (remaining) {
828 struct arena_info *arena;
829 size_t arena_size = min_t(u64, ARENA_MAX_SIZE, remaining);
830
831 remaining -= arena_size;
832 if (arena_size < ARENA_MIN_SIZE)
833 break;
834
835 arena = alloc_arena(btt, arena_size, btt->nlba, cur_off);
836 if (!arena) {
837 free_arenas(btt);
838 return -ENOMEM;
839 }
840 btt->nlba += arena->external_nlba;
841 if (remaining >= ARENA_MIN_SIZE)
842 arena->nextoff = arena->size;
843 else
844 arena->nextoff = 0;
845 cur_off += arena_size;
846 list_add_tail(&arena->list, &btt->arena_list);
847 }
848
849 return 0;
850}
851
852/*
853 * This function completes arena initialization by writing
854 * all the metadata.
855 * It is only called for an uninitialized arena when a write
856 * to that arena occurs for the first time.
857 */
Vishal Vermafbde1412015-07-29 14:58:07 -0600858static int btt_arena_write_layout(struct arena_info *arena)
Vishal Verma5212e112015-06-25 04:20:32 -0400859{
860 int ret;
Dan Williamse1455742015-07-30 17:57:47 -0400861 u64 sum;
Vishal Verma5212e112015-06-25 04:20:32 -0400862 struct btt_sb *super;
Vishal Vermafbde1412015-07-29 14:58:07 -0600863 struct nd_btt *nd_btt = arena->nd_btt;
Vishal Verma6ec68952015-07-29 14:58:09 -0600864 const u8 *parent_uuid = nd_dev_to_uuid(&nd_btt->ndns->dev);
Vishal Verma5212e112015-06-25 04:20:32 -0400865
866 ret = btt_map_init(arena);
867 if (ret)
868 return ret;
869
870 ret = btt_log_init(arena);
871 if (ret)
872 return ret;
873
874 super = kzalloc(sizeof(struct btt_sb), GFP_NOIO);
875 if (!super)
876 return -ENOMEM;
877
878 strncpy(super->signature, BTT_SIG, BTT_SIG_LEN);
Vishal Vermafbde1412015-07-29 14:58:07 -0600879 memcpy(super->uuid, nd_btt->uuid, 16);
Vishal Verma6ec68952015-07-29 14:58:09 -0600880 memcpy(super->parent_uuid, parent_uuid, 16);
Vishal Verma5212e112015-06-25 04:20:32 -0400881 super->flags = cpu_to_le32(arena->flags);
882 super->version_major = cpu_to_le16(arena->version_major);
883 super->version_minor = cpu_to_le16(arena->version_minor);
884 super->external_lbasize = cpu_to_le32(arena->external_lbasize);
885 super->external_nlba = cpu_to_le32(arena->external_nlba);
886 super->internal_lbasize = cpu_to_le32(arena->internal_lbasize);
887 super->internal_nlba = cpu_to_le32(arena->internal_nlba);
888 super->nfree = cpu_to_le32(arena->nfree);
889 super->infosize = cpu_to_le32(sizeof(struct btt_sb));
890 super->nextoff = cpu_to_le64(arena->nextoff);
891 /*
892 * Subtract arena->infooff (arena start) so numbers are relative
893 * to 'this' arena
894 */
895 super->dataoff = cpu_to_le64(arena->dataoff - arena->infooff);
896 super->mapoff = cpu_to_le64(arena->mapoff - arena->infooff);
897 super->logoff = cpu_to_le64(arena->logoff - arena->infooff);
898 super->info2off = cpu_to_le64(arena->info2off - arena->infooff);
899
900 super->flags = 0;
Dan Williamse1455742015-07-30 17:57:47 -0400901 sum = nd_sb_checksum((struct nd_gen_sb *) super);
902 super->checksum = cpu_to_le64(sum);
Vishal Verma5212e112015-06-25 04:20:32 -0400903
904 ret = btt_info_write(arena, super);
905
906 kfree(super);
907 return ret;
908}
909
910/*
911 * This function completes the initialization for the BTT namespace
912 * such that it is ready to accept IOs
913 */
914static int btt_meta_init(struct btt *btt)
915{
916 int ret = 0;
917 struct arena_info *arena;
918
919 mutex_lock(&btt->init_lock);
920 list_for_each_entry(arena, &btt->arena_list, list) {
Vishal Vermafbde1412015-07-29 14:58:07 -0600921 ret = btt_arena_write_layout(arena);
Vishal Verma5212e112015-06-25 04:20:32 -0400922 if (ret)
923 goto unlock;
924
925 ret = btt_freelist_init(arena);
926 if (ret)
927 goto unlock;
928
929 ret = btt_rtt_init(arena);
930 if (ret)
931 goto unlock;
932
933 ret = btt_maplocks_init(arena);
934 if (ret)
935 goto unlock;
936 }
937
938 btt->init_state = INIT_READY;
939
940 unlock:
941 mutex_unlock(&btt->init_lock);
942 return ret;
943}
944
Vishal Verma41cd8b72015-06-25 04:21:52 -0400945static u32 btt_meta_size(struct btt *btt)
946{
947 return btt->lbasize - btt->sector_size;
948}
949
Vishal Verma5212e112015-06-25 04:20:32 -0400950/*
951 * This function calculates the arena in which the given LBA lies
952 * by doing a linear walk. This is acceptable since we expect only
953 * a few arenas. If we have backing devices that get much larger,
954 * we can construct a balanced binary tree of arenas at init time
955 * so that this range search becomes faster.
956 */
957static int lba_to_arena(struct btt *btt, sector_t sector, __u32 *premap,
958 struct arena_info **arena)
959{
960 struct arena_info *arena_list;
961 __u64 lba = div_u64(sector << SECTOR_SHIFT, btt->sector_size);
962
963 list_for_each_entry(arena_list, &btt->arena_list, list) {
964 if (lba < arena_list->external_nlba) {
965 *arena = arena_list;
966 *premap = lba;
967 return 0;
968 }
969 lba -= arena_list->external_nlba;
970 }
971
972 return -EIO;
973}
974
975/*
976 * The following (lock_map, unlock_map) are mostly just to improve
977 * readability, since they index into an array of locks
978 */
979static void lock_map(struct arena_info *arena, u32 premap)
980 __acquires(&arena->map_locks[idx].lock)
981{
982 u32 idx = (premap * MAP_ENT_SIZE / L1_CACHE_BYTES) % arena->nfree;
983
984 spin_lock(&arena->map_locks[idx].lock);
985}
986
987static void unlock_map(struct arena_info *arena, u32 premap)
988 __releases(&arena->map_locks[idx].lock)
989{
990 u32 idx = (premap * MAP_ENT_SIZE / L1_CACHE_BYTES) % arena->nfree;
991
992 spin_unlock(&arena->map_locks[idx].lock);
993}
994
995static u64 to_namespace_offset(struct arena_info *arena, u64 lba)
996{
997 return arena->dataoff + ((u64)lba * arena->internal_lbasize);
998}
999
1000static int btt_data_read(struct arena_info *arena, struct page *page,
1001 unsigned int off, u32 lba, u32 len)
1002{
1003 int ret;
1004 u64 nsoff = to_namespace_offset(arena, lba);
1005 void *mem = kmap_atomic(page);
1006
1007 ret = arena_read_bytes(arena, nsoff, mem + off, len);
1008 kunmap_atomic(mem);
1009
1010 return ret;
1011}
1012
1013static int btt_data_write(struct arena_info *arena, u32 lba,
1014 struct page *page, unsigned int off, u32 len)
1015{
1016 int ret;
1017 u64 nsoff = to_namespace_offset(arena, lba);
1018 void *mem = kmap_atomic(page);
1019
1020 ret = arena_write_bytes(arena, nsoff, mem + off, len);
1021 kunmap_atomic(mem);
1022
1023 return ret;
1024}
1025
1026static void zero_fill_data(struct page *page, unsigned int off, u32 len)
1027{
1028 void *mem = kmap_atomic(page);
1029
1030 memset(mem + off, 0, len);
1031 kunmap_atomic(mem);
1032}
1033
Vishal Verma41cd8b72015-06-25 04:21:52 -04001034#ifdef CONFIG_BLK_DEV_INTEGRITY
1035static int btt_rw_integrity(struct btt *btt, struct bio_integrity_payload *bip,
1036 struct arena_info *arena, u32 postmap, int rw)
1037{
1038 unsigned int len = btt_meta_size(btt);
1039 u64 meta_nsoff;
1040 int ret = 0;
1041
1042 if (bip == NULL)
1043 return 0;
1044
1045 meta_nsoff = to_namespace_offset(arena, postmap) + btt->sector_size;
1046
1047 while (len) {
1048 unsigned int cur_len;
1049 struct bio_vec bv;
1050 void *mem;
1051
1052 bv = bvec_iter_bvec(bip->bip_vec, bip->bip_iter);
1053 /*
1054 * The 'bv' obtained from bvec_iter_bvec has its .bv_len and
1055 * .bv_offset already adjusted for iter->bi_bvec_done, and we
1056 * can use those directly
1057 */
1058
1059 cur_len = min(len, bv.bv_len);
1060 mem = kmap_atomic(bv.bv_page);
1061 if (rw)
1062 ret = arena_write_bytes(arena, meta_nsoff,
1063 mem + bv.bv_offset, cur_len);
1064 else
1065 ret = arena_read_bytes(arena, meta_nsoff,
1066 mem + bv.bv_offset, cur_len);
1067
1068 kunmap_atomic(mem);
1069 if (ret)
1070 return ret;
1071
1072 len -= cur_len;
1073 meta_nsoff += cur_len;
1074 bvec_iter_advance(bip->bip_vec, &bip->bip_iter, cur_len);
1075 }
1076
1077 return ret;
1078}
1079
1080#else /* CONFIG_BLK_DEV_INTEGRITY */
1081static int btt_rw_integrity(struct btt *btt, struct bio_integrity_payload *bip,
1082 struct arena_info *arena, u32 postmap, int rw)
1083{
1084 return 0;
1085}
1086#endif
1087
1088static int btt_read_pg(struct btt *btt, struct bio_integrity_payload *bip,
1089 struct page *page, unsigned int off, sector_t sector,
1090 unsigned int len)
Vishal Verma5212e112015-06-25 04:20:32 -04001091{
1092 int ret = 0;
1093 int t_flag, e_flag;
1094 struct arena_info *arena = NULL;
1095 u32 lane = 0, premap, postmap;
1096
1097 while (len) {
1098 u32 cur_len;
1099
1100 lane = nd_region_acquire_lane(btt->nd_region);
1101
1102 ret = lba_to_arena(btt, sector, &premap, &arena);
1103 if (ret)
1104 goto out_lane;
1105
1106 cur_len = min(btt->sector_size, len);
1107
1108 ret = btt_map_read(arena, premap, &postmap, &t_flag, &e_flag);
1109 if (ret)
1110 goto out_lane;
1111
1112 /*
1113 * We loop to make sure that the post map LBA didn't change
1114 * from under us between writing the RTT and doing the actual
1115 * read.
1116 */
1117 while (1) {
1118 u32 new_map;
1119
1120 if (t_flag) {
1121 zero_fill_data(page, off, cur_len);
1122 goto out_lane;
1123 }
1124
1125 if (e_flag) {
1126 ret = -EIO;
1127 goto out_lane;
1128 }
1129
1130 arena->rtt[lane] = RTT_VALID | postmap;
1131 /*
1132 * Barrier to make sure this write is not reordered
1133 * to do the verification map_read before the RTT store
1134 */
1135 barrier();
1136
1137 ret = btt_map_read(arena, premap, &new_map, &t_flag,
1138 &e_flag);
1139 if (ret)
1140 goto out_rtt;
1141
1142 if (postmap == new_map)
1143 break;
1144
1145 postmap = new_map;
1146 }
1147
1148 ret = btt_data_read(arena, page, off, postmap, cur_len);
1149 if (ret)
1150 goto out_rtt;
1151
Vishal Verma41cd8b72015-06-25 04:21:52 -04001152 if (bip) {
1153 ret = btt_rw_integrity(btt, bip, arena, postmap, READ);
1154 if (ret)
1155 goto out_rtt;
1156 }
1157
Vishal Verma5212e112015-06-25 04:20:32 -04001158 arena->rtt[lane] = RTT_INVALID;
1159 nd_region_release_lane(btt->nd_region, lane);
1160
1161 len -= cur_len;
1162 off += cur_len;
1163 sector += btt->sector_size >> SECTOR_SHIFT;
1164 }
1165
1166 return 0;
1167
1168 out_rtt:
1169 arena->rtt[lane] = RTT_INVALID;
1170 out_lane:
1171 nd_region_release_lane(btt->nd_region, lane);
1172 return ret;
1173}
1174
Vishal Verma41cd8b72015-06-25 04:21:52 -04001175static int btt_write_pg(struct btt *btt, struct bio_integrity_payload *bip,
1176 sector_t sector, struct page *page, unsigned int off,
1177 unsigned int len)
Vishal Verma5212e112015-06-25 04:20:32 -04001178{
1179 int ret = 0;
1180 struct arena_info *arena = NULL;
1181 u32 premap = 0, old_postmap, new_postmap, lane = 0, i;
1182 struct log_entry log;
1183 int sub;
1184
1185 while (len) {
1186 u32 cur_len;
1187
1188 lane = nd_region_acquire_lane(btt->nd_region);
1189
1190 ret = lba_to_arena(btt, sector, &premap, &arena);
1191 if (ret)
1192 goto out_lane;
1193 cur_len = min(btt->sector_size, len);
1194
1195 if ((arena->flags & IB_FLAG_ERROR_MASK) != 0) {
1196 ret = -EIO;
1197 goto out_lane;
1198 }
1199
1200 new_postmap = arena->freelist[lane].block;
1201
1202 /* Wait if the new block is being read from */
1203 for (i = 0; i < arena->nfree; i++)
1204 while (arena->rtt[i] == (RTT_VALID | new_postmap))
1205 cpu_relax();
1206
1207
1208 if (new_postmap >= arena->internal_nlba) {
1209 ret = -EIO;
1210 goto out_lane;
Vishal Verma41cd8b72015-06-25 04:21:52 -04001211 }
1212
1213 ret = btt_data_write(arena, new_postmap, page, off, cur_len);
Vishal Verma5212e112015-06-25 04:20:32 -04001214 if (ret)
1215 goto out_lane;
1216
Vishal Verma41cd8b72015-06-25 04:21:52 -04001217 if (bip) {
1218 ret = btt_rw_integrity(btt, bip, arena, new_postmap,
1219 WRITE);
1220 if (ret)
1221 goto out_lane;
1222 }
1223
Vishal Verma5212e112015-06-25 04:20:32 -04001224 lock_map(arena, premap);
1225 ret = btt_map_read(arena, premap, &old_postmap, NULL, NULL);
1226 if (ret)
1227 goto out_map;
1228 if (old_postmap >= arena->internal_nlba) {
1229 ret = -EIO;
1230 goto out_map;
1231 }
1232
1233 log.lba = cpu_to_le32(premap);
1234 log.old_map = cpu_to_le32(old_postmap);
1235 log.new_map = cpu_to_le32(new_postmap);
1236 log.seq = cpu_to_le32(arena->freelist[lane].seq);
1237 sub = arena->freelist[lane].sub;
1238 ret = btt_flog_write(arena, lane, sub, &log);
1239 if (ret)
1240 goto out_map;
1241
1242 ret = btt_map_write(arena, premap, new_postmap, 0, 0);
1243 if (ret)
1244 goto out_map;
1245
1246 unlock_map(arena, premap);
1247 nd_region_release_lane(btt->nd_region, lane);
1248
1249 len -= cur_len;
1250 off += cur_len;
1251 sector += btt->sector_size >> SECTOR_SHIFT;
1252 }
1253
1254 return 0;
1255
1256 out_map:
1257 unlock_map(arena, premap);
1258 out_lane:
1259 nd_region_release_lane(btt->nd_region, lane);
1260 return ret;
1261}
1262
Vishal Verma41cd8b72015-06-25 04:21:52 -04001263static int btt_do_bvec(struct btt *btt, struct bio_integrity_payload *bip,
1264 struct page *page, unsigned int len, unsigned int off,
Jens Axboec11f0c02016-08-05 08:11:04 -06001265 bool is_write, sector_t sector)
Vishal Verma5212e112015-06-25 04:20:32 -04001266{
1267 int ret;
1268
Jens Axboec11f0c02016-08-05 08:11:04 -06001269 if (!is_write) {
Vishal Verma41cd8b72015-06-25 04:21:52 -04001270 ret = btt_read_pg(btt, bip, page, off, sector, len);
Vishal Verma5212e112015-06-25 04:20:32 -04001271 flush_dcache_page(page);
1272 } else {
1273 flush_dcache_page(page);
Vishal Verma41cd8b72015-06-25 04:21:52 -04001274 ret = btt_write_pg(btt, bip, sector, page, off, len);
Vishal Verma5212e112015-06-25 04:20:32 -04001275 }
1276
1277 return ret;
1278}
1279
Jens Axboedece1632015-11-05 10:41:16 -07001280static blk_qc_t btt_make_request(struct request_queue *q, struct bio *bio)
Vishal Verma5212e112015-06-25 04:20:32 -04001281{
Vishal Verma41cd8b72015-06-25 04:21:52 -04001282 struct bio_integrity_payload *bip = bio_integrity(bio);
Vishal Verma5212e112015-06-25 04:20:32 -04001283 struct btt *btt = q->queuedata;
1284 struct bvec_iter iter;
Dan Williamsf0dc0892015-05-16 12:28:53 -04001285 unsigned long start;
Vishal Verma5212e112015-06-25 04:20:32 -04001286 struct bio_vec bvec;
Mike Christieabf54542016-08-04 14:23:34 -06001287 int err = 0;
Dan Williamsf0dc0892015-05-16 12:28:53 -04001288 bool do_acct;
Vishal Verma5212e112015-06-25 04:20:32 -04001289
Vishal Verma41cd8b72015-06-25 04:21:52 -04001290 /*
1291 * bio_integrity_enabled also checks if the bio already has an
1292 * integrity payload attached. If it does, we *don't* do a
1293 * bio_integrity_prep here - the payload has been generated by
1294 * another kernel subsystem, and we just pass it through.
1295 */
1296 if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
Christoph Hellwig4246a0b2015-07-20 15:29:37 +02001297 bio->bi_error = -EIO;
Vishal Verma41cd8b72015-06-25 04:21:52 -04001298 goto out;
1299 }
1300
Dan Williamsf0dc0892015-05-16 12:28:53 -04001301 do_acct = nd_iostat_start(bio, &start);
Vishal Verma5212e112015-06-25 04:20:32 -04001302 bio_for_each_segment(bvec, bio, iter) {
1303 unsigned int len = bvec.bv_len;
1304
1305 BUG_ON(len > PAGE_SIZE);
1306 /* Make sure len is in multiples of sector size. */
1307 /* XXX is this right? */
1308 BUG_ON(len < btt->sector_size);
1309 BUG_ON(len % btt->sector_size);
1310
Vishal Verma41cd8b72015-06-25 04:21:52 -04001311 err = btt_do_bvec(btt, bip, bvec.bv_page, len, bvec.bv_offset,
Jens Axboec11f0c02016-08-05 08:11:04 -06001312 op_is_write(bio_op(bio)), iter.bi_sector);
Vishal Verma5212e112015-06-25 04:20:32 -04001313 if (err) {
1314 dev_info(&btt->nd_btt->dev,
1315 "io error in %s sector %lld, len %d,\n",
Mike Christieabf54542016-08-04 14:23:34 -06001316 (op_is_write(bio_op(bio))) ? "WRITE" :
1317 "READ",
Vishal Verma5212e112015-06-25 04:20:32 -04001318 (unsigned long long) iter.bi_sector, len);
Christoph Hellwig4246a0b2015-07-20 15:29:37 +02001319 bio->bi_error = err;
Dan Williamsf0dc0892015-05-16 12:28:53 -04001320 break;
Vishal Verma5212e112015-06-25 04:20:32 -04001321 }
1322 }
Dan Williamsf0dc0892015-05-16 12:28:53 -04001323 if (do_acct)
1324 nd_iostat_end(bio, start);
Vishal Verma5212e112015-06-25 04:20:32 -04001325
1326out:
Christoph Hellwig4246a0b2015-07-20 15:29:37 +02001327 bio_endio(bio);
Jens Axboedece1632015-11-05 10:41:16 -07001328 return BLK_QC_T_NONE;
Vishal Verma5212e112015-06-25 04:20:32 -04001329}
1330
1331static int btt_rw_page(struct block_device *bdev, sector_t sector,
Jens Axboec11f0c02016-08-05 08:11:04 -06001332 struct page *page, bool is_write)
Vishal Verma5212e112015-06-25 04:20:32 -04001333{
1334 struct btt *btt = bdev->bd_disk->private_data;
Vishal Verma891c31e2017-06-29 16:59:11 -06001335 int rc;
Vishal Verma5212e112015-06-25 04:20:32 -04001336
Vishal Verma891c31e2017-06-29 16:59:11 -06001337 rc = btt_do_bvec(btt, NULL, page, PAGE_SIZE, 0, is_write, sector);
1338 if (rc == 0)
1339 page_endio(page, is_write, 0);
1340
1341 return rc;
Vishal Verma5212e112015-06-25 04:20:32 -04001342}
1343
1344
1345static int btt_getgeo(struct block_device *bd, struct hd_geometry *geo)
1346{
1347 /* some standard values */
1348 geo->heads = 1 << 6;
1349 geo->sectors = 1 << 5;
1350 geo->cylinders = get_capacity(bd->bd_disk) >> 11;
1351 return 0;
1352}
1353
1354static const struct block_device_operations btt_fops = {
1355 .owner = THIS_MODULE,
1356 .rw_page = btt_rw_page,
1357 .getgeo = btt_getgeo,
Dan Williams58138822015-06-23 20:08:34 -04001358 .revalidate_disk = nvdimm_revalidate_disk,
Vishal Verma5212e112015-06-25 04:20:32 -04001359};
1360
1361static int btt_blk_init(struct btt *btt)
1362{
1363 struct nd_btt *nd_btt = btt->nd_btt;
1364 struct nd_namespace_common *ndns = nd_btt->ndns;
1365
1366 /* create a new disk and request queue for btt */
1367 btt->btt_queue = blk_alloc_queue(GFP_KERNEL);
1368 if (!btt->btt_queue)
1369 return -ENOMEM;
1370
1371 btt->btt_disk = alloc_disk(0);
1372 if (!btt->btt_disk) {
1373 blk_cleanup_queue(btt->btt_queue);
1374 return -ENOMEM;
1375 }
1376
1377 nvdimm_namespace_disk_name(ndns, btt->btt_disk->disk_name);
Vishal Verma5212e112015-06-25 04:20:32 -04001378 btt->btt_disk->first_minor = 0;
1379 btt->btt_disk->fops = &btt_fops;
1380 btt->btt_disk->private_data = btt;
1381 btt->btt_disk->queue = btt->btt_queue;
1382 btt->btt_disk->flags = GENHD_FL_EXT_DEVT;
1383
1384 blk_queue_make_request(btt->btt_queue, btt_make_request);
1385 blk_queue_logical_block_size(btt->btt_queue, btt->sector_size);
1386 blk_queue_max_hw_sectors(btt->btt_queue, UINT_MAX);
1387 blk_queue_bounce_limit(btt->btt_queue, BLK_BOUNCE_ANY);
1388 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, btt->btt_queue);
1389 btt->btt_queue->queuedata = btt;
1390
Vishal Verma41cd8b72015-06-25 04:21:52 -04001391 if (btt_meta_size(btt)) {
1392 int rc = nd_integrity_init(btt->btt_disk, btt_meta_size(btt));
1393
1394 if (rc) {
1395 del_gendisk(btt->btt_disk);
1396 put_disk(btt->btt_disk);
1397 blk_cleanup_queue(btt->btt_queue);
1398 return rc;
1399 }
1400 }
1401 set_capacity(btt->btt_disk, btt->nlba * btt->sector_size >> 9);
Vishal Verma6fa877d2018-03-05 16:56:13 -07001402 device_add_disk(&btt->nd_btt->dev, btt->btt_disk);
Vishal Vermaabe8b4e2016-07-27 16:38:59 -06001403 btt->nd_btt->size = btt->nlba * (u64)btt->sector_size;
Dan Williams58138822015-06-23 20:08:34 -04001404 revalidate_disk(btt->btt_disk);
Vishal Verma5212e112015-06-25 04:20:32 -04001405
1406 return 0;
1407}
1408
1409static void btt_blk_cleanup(struct btt *btt)
1410{
Vishal Verma5212e112015-06-25 04:20:32 -04001411 del_gendisk(btt->btt_disk);
1412 put_disk(btt->btt_disk);
1413 blk_cleanup_queue(btt->btt_queue);
1414}
1415
1416/**
1417 * btt_init - initialize a block translation table for the given device
1418 * @nd_btt: device with BTT geometry and backing device info
1419 * @rawsize: raw size in bytes of the backing device
1420 * @lbasize: lba size of the backing device
1421 * @uuid: A uuid for the backing device - this is stored on media
1422 * @maxlane: maximum number of parallel requests the device can handle
1423 *
1424 * Initialize a Block Translation Table on a backing device to provide
1425 * single sector power fail atomicity.
1426 *
1427 * Context:
1428 * Might sleep.
1429 *
1430 * Returns:
1431 * Pointer to a new struct btt on success, NULL on failure.
1432 */
1433static struct btt *btt_init(struct nd_btt *nd_btt, unsigned long long rawsize,
1434 u32 lbasize, u8 *uuid, struct nd_region *nd_region)
1435{
1436 int ret;
1437 struct btt *btt;
1438 struct device *dev = &nd_btt->dev;
1439
Dan Williamse32bc722016-03-17 18:23:09 -07001440 btt = devm_kzalloc(dev, sizeof(struct btt), GFP_KERNEL);
Vishal Verma5212e112015-06-25 04:20:32 -04001441 if (!btt)
1442 return NULL;
1443
1444 btt->nd_btt = nd_btt;
1445 btt->rawsize = rawsize;
1446 btt->lbasize = lbasize;
1447 btt->sector_size = ((lbasize >= 4096) ? 4096 : 512);
1448 INIT_LIST_HEAD(&btt->arena_list);
1449 mutex_init(&btt->init_lock);
1450 btt->nd_region = nd_region;
1451
1452 ret = discover_arenas(btt);
1453 if (ret) {
1454 dev_err(dev, "init: error in arena_discover: %d\n", ret);
Dan Williamse32bc722016-03-17 18:23:09 -07001455 return NULL;
Vishal Verma5212e112015-06-25 04:20:32 -04001456 }
1457
Dan Williams58138822015-06-23 20:08:34 -04001458 if (btt->init_state != INIT_READY && nd_region->ro) {
1459 dev_info(dev, "%s is read-only, unable to init btt metadata\n",
1460 dev_name(&nd_region->dev));
Dan Williamse32bc722016-03-17 18:23:09 -07001461 return NULL;
Dan Williams58138822015-06-23 20:08:34 -04001462 } else if (btt->init_state != INIT_READY) {
Vishal Verma5212e112015-06-25 04:20:32 -04001463 btt->num_arenas = (rawsize / ARENA_MAX_SIZE) +
1464 ((rawsize % ARENA_MAX_SIZE) ? 1 : 0);
1465 dev_dbg(dev, "init: %d arenas for %llu rawsize\n",
1466 btt->num_arenas, rawsize);
1467
1468 ret = create_arenas(btt);
1469 if (ret) {
1470 dev_info(dev, "init: create_arenas: %d\n", ret);
Dan Williamse32bc722016-03-17 18:23:09 -07001471 return NULL;
Vishal Verma5212e112015-06-25 04:20:32 -04001472 }
1473
1474 ret = btt_meta_init(btt);
1475 if (ret) {
1476 dev_err(dev, "init: error in meta_init: %d\n", ret);
Dan Williamse32bc722016-03-17 18:23:09 -07001477 return NULL;
Vishal Verma5212e112015-06-25 04:20:32 -04001478 }
1479 }
1480
1481 ret = btt_blk_init(btt);
1482 if (ret) {
1483 dev_err(dev, "init: error in blk_init: %d\n", ret);
Dan Williamse32bc722016-03-17 18:23:09 -07001484 return NULL;
Vishal Verma5212e112015-06-25 04:20:32 -04001485 }
1486
1487 btt_debugfs_init(btt);
1488
1489 return btt;
Vishal Verma5212e112015-06-25 04:20:32 -04001490}
1491
1492/**
1493 * btt_fini - de-initialize a BTT
1494 * @btt: the BTT handle that was generated by btt_init
1495 *
1496 * De-initialize a Block Translation Table on device removal
1497 *
1498 * Context:
1499 * Might sleep.
1500 */
1501static void btt_fini(struct btt *btt)
1502{
1503 if (btt) {
1504 btt_blk_cleanup(btt);
1505 free_arenas(btt);
1506 debugfs_remove_recursive(btt->debugfs_dir);
Vishal Verma5212e112015-06-25 04:20:32 -04001507 }
1508}
1509
1510int nvdimm_namespace_attach_btt(struct nd_namespace_common *ndns)
1511{
1512 struct nd_btt *nd_btt = to_nd_btt(ndns->claim);
1513 struct nd_region *nd_region;
1514 struct btt *btt;
1515 size_t rawsize;
1516
Dan Williams9dec4892016-04-22 12:26:05 -07001517 if (!nd_btt->uuid || !nd_btt->ndns || !nd_btt->lbasize) {
1518 dev_dbg(&nd_btt->dev, "incomplete btt configuration\n");
Vishal Verma5212e112015-06-25 04:20:32 -04001519 return -ENODEV;
Dan Williams9dec4892016-04-22 12:26:05 -07001520 }
Vishal Verma5212e112015-06-25 04:20:32 -04001521
1522 rawsize = nvdimm_namespace_capacity(ndns) - SZ_4K;
1523 if (rawsize < ARENA_MIN_SIZE) {
Dan Williams9dec4892016-04-22 12:26:05 -07001524 dev_dbg(&nd_btt->dev, "%s must be at least %ld bytes\n",
1525 dev_name(&ndns->dev), ARENA_MIN_SIZE + SZ_4K);
Vishal Verma5212e112015-06-25 04:20:32 -04001526 return -ENXIO;
1527 }
1528 nd_region = to_nd_region(nd_btt->dev.parent);
1529 btt = btt_init(nd_btt, rawsize, nd_btt->lbasize, nd_btt->uuid,
1530 nd_region);
1531 if (!btt)
1532 return -ENOMEM;
1533 nd_btt->btt = btt;
1534
1535 return 0;
1536}
1537EXPORT_SYMBOL(nvdimm_namespace_attach_btt);
1538
Dan Williams298f2bc2016-03-15 16:41:04 -07001539int nvdimm_namespace_detach_btt(struct nd_btt *nd_btt)
Vishal Verma5212e112015-06-25 04:20:32 -04001540{
Vishal Verma5212e112015-06-25 04:20:32 -04001541 struct btt *btt = nd_btt->btt;
1542
1543 btt_fini(btt);
1544 nd_btt->btt = NULL;
1545
1546 return 0;
1547}
1548EXPORT_SYMBOL(nvdimm_namespace_detach_btt);
1549
1550static int __init nd_btt_init(void)
1551{
NeilBrownff8e92d2016-03-10 08:59:28 +11001552 int rc = 0;
Vishal Verma5212e112015-06-25 04:20:32 -04001553
1554 debugfs_root = debugfs_create_dir("btt", NULL);
NeilBrownff8e92d2016-03-10 08:59:28 +11001555 if (IS_ERR_OR_NULL(debugfs_root))
Vishal Verma5212e112015-06-25 04:20:32 -04001556 rc = -ENXIO;
Vishal Verma5212e112015-06-25 04:20:32 -04001557
1558 return rc;
1559}
1560
1561static void __exit nd_btt_exit(void)
1562{
1563 debugfs_remove_recursive(debugfs_root);
Vishal Verma5212e112015-06-25 04:20:32 -04001564}
1565
1566MODULE_ALIAS_ND_DEVICE(ND_DEVICE_BTT);
1567MODULE_AUTHOR("Vishal Verma <vishal.l.verma@linux.intel.com>");
1568MODULE_LICENSE("GPL v2");
1569module_init(nd_btt_init);
1570module_exit(nd_btt_exit);