blob: b1f4b51783f41763fb96605311b897befff9e039 [file] [log] [blame]
Javier González02a15202018-10-09 13:12:06 +02001// SPDX-License-Identifier: GPL-2.0
Javier Gonzáleza4bd2172017-04-15 20:55:50 +02002/*
3 * Copyright (C) 2016 CNEX Labs
4 * Initial release: Javier Gonzalez <javier@cnexlabs.com>
5 *
6 * Based upon the circular ringbuffer.
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License version
10 * 2 as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License for more details.
16 *
17 * pblk-rb.c - pblk's write buffer
18 */
19
20#include <linux/circ_buf.h>
21
22#include "pblk.h"
23
24static DECLARE_RWSEM(pblk_rb_lock);
25
Javier González9bd1f872018-10-09 13:12:10 +020026static void pblk_rb_data_free(struct pblk_rb *rb)
Javier Gonzáleza4bd2172017-04-15 20:55:50 +020027{
28 struct pblk_rb_pages *p, *t;
29
30 down_write(&pblk_rb_lock);
31 list_for_each_entry_safe(p, t, &rb->pages, list) {
32 free_pages((unsigned long)page_address(p->pages), p->order);
33 list_del(&p->list);
34 kfree(p);
35 }
36 up_write(&pblk_rb_lock);
37}
38
Javier González9bd1f872018-10-09 13:12:10 +020039void pblk_rb_free(struct pblk_rb *rb)
40{
41 pblk_rb_data_free(rb);
42 vfree(rb->entries);
43}
44
45/*
46 * pblk_rb_calculate_size -- calculate the size of the write buffer
47 */
48static unsigned int pblk_rb_calculate_size(unsigned int nr_entries)
49{
50 /* Alloc a write buffer that can at least fit 128 entries */
51 return (1 << max(get_count_order(nr_entries), 7));
52}
53
Javier Gonzáleza4bd2172017-04-15 20:55:50 +020054/*
55 * Initialize ring buffer. The data and metadata buffers must be previously
56 * allocated and their size must be a power of two
Mauro Carvalho Chehab5fb94e92018-05-08 15:14:57 -030057 * (Documentation/core-api/circular-buffers.rst)
Javier Gonzáleza4bd2172017-04-15 20:55:50 +020058 */
Javier González766c8ce2018-10-09 13:12:15 +020059int pblk_rb_init(struct pblk_rb *rb, unsigned int size, unsigned int threshold,
60 unsigned int seg_size)
Javier Gonzáleza4bd2172017-04-15 20:55:50 +020061{
62 struct pblk *pblk = container_of(rb, struct pblk, rwb);
Javier González9bd1f872018-10-09 13:12:10 +020063 struct pblk_rb_entry *entries;
Javier Gonzáleza4bd2172017-04-15 20:55:50 +020064 unsigned int init_entry = 0;
Javier Gonzáleza4bd2172017-04-15 20:55:50 +020065 unsigned int max_order = MAX_ORDER - 1;
Javier González9bd1f872018-10-09 13:12:10 +020066 unsigned int power_size, power_seg_sz;
67 unsigned int alloc_order, order, iter;
68 unsigned int nr_entries;
69
70 nr_entries = pblk_rb_calculate_size(size);
71 entries = vzalloc(array_size(nr_entries, sizeof(struct pblk_rb_entry)));
72 if (!entries)
73 return -ENOMEM;
74
75 power_size = get_count_order(size);
76 power_seg_sz = get_count_order(seg_size);
Javier Gonzáleza4bd2172017-04-15 20:55:50 +020077
78 down_write(&pblk_rb_lock);
Javier González9bd1f872018-10-09 13:12:10 +020079 rb->entries = entries;
Javier Gonzáleza4bd2172017-04-15 20:55:50 +020080 rb->seg_size = (1 << power_seg_sz);
81 rb->nr_entries = (1 << power_size);
82 rb->mem = rb->subm = rb->sync = rb->l2p_update = 0;
Javier González766c8ce2018-10-09 13:12:15 +020083 rb->back_thres = threshold;
Hans Holmberg8154d292018-01-05 14:16:09 +010084 rb->flush_point = EMPTY_ENTRY;
Javier Gonzáleza4bd2172017-04-15 20:55:50 +020085
86 spin_lock_init(&rb->w_lock);
87 spin_lock_init(&rb->s_lock);
88
89 INIT_LIST_HEAD(&rb->pages);
90
Javier González9bd1f872018-10-09 13:12:10 +020091 alloc_order = power_size;
Javier Gonzáleza4bd2172017-04-15 20:55:50 +020092 if (alloc_order >= max_order) {
93 order = max_order;
94 iter = (1 << (alloc_order - max_order));
95 } else {
96 order = alloc_order;
97 iter = 1;
98 }
99
100 do {
101 struct pblk_rb_entry *entry;
102 struct pblk_rb_pages *page_set;
103 void *kaddr;
104 unsigned long set_size;
105 int i;
106
107 page_set = kmalloc(sizeof(struct pblk_rb_pages), GFP_KERNEL);
108 if (!page_set) {
109 up_write(&pblk_rb_lock);
Javier González9bd1f872018-10-09 13:12:10 +0200110 vfree(entries);
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200111 return -ENOMEM;
112 }
113
114 page_set->order = order;
115 page_set->pages = alloc_pages(GFP_KERNEL, order);
116 if (!page_set->pages) {
117 kfree(page_set);
118 pblk_rb_data_free(rb);
119 up_write(&pblk_rb_lock);
Javier González9bd1f872018-10-09 13:12:10 +0200120 vfree(entries);
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200121 return -ENOMEM;
122 }
123 kaddr = page_address(page_set->pages);
124
125 entry = &rb->entries[init_entry];
126 entry->data = kaddr;
127 entry->cacheline = pblk_cacheline_to_addr(init_entry++);
128 entry->w_ctx.flags = PBLK_WRITABLE_ENTRY;
129
130 set_size = (1 << order);
131 for (i = 1; i < set_size; i++) {
132 entry = &rb->entries[init_entry];
133 entry->cacheline = pblk_cacheline_to_addr(init_entry++);
134 entry->data = kaddr + (i * rb->seg_size);
135 entry->w_ctx.flags = PBLK_WRITABLE_ENTRY;
136 bio_list_init(&entry->w_ctx.bios);
137 }
138
139 list_add_tail(&page_set->list, &rb->pages);
140 iter--;
141 } while (iter > 0);
142 up_write(&pblk_rb_lock);
143
Matias Bjørling880eda52018-07-13 10:48:37 +0200144#ifdef CONFIG_NVM_PBLK_DEBUG
Hans Holmberg8154d292018-01-05 14:16:09 +0100145 atomic_set(&rb->inflight_flush_point, 0);
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200146#endif
147
148 /*
149 * Initialize rate-limiter, which controls access to the write buffer
150 * but user and GC I/O
151 */
152 pblk_rl_init(&pblk->rl, rb->nr_entries);
153
154 return 0;
155}
156
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200157static void clean_wctx(struct pblk_w_ctx *w_ctx)
158{
159 int flags;
160
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200161 flags = READ_ONCE(w_ctx->flags);
Javier Gonzáleze37d0792018-06-01 15:04:19 +0200162 WARN_ONCE(!(flags & PBLK_SUBMITTED_ENTRY),
163 "pblk: overwriting unsubmitted data\n");
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200164
165 /* Release flags on context. Protect from writes and reads */
166 smp_store_release(&w_ctx->flags, PBLK_WRITABLE_ENTRY);
167 pblk_ppa_set_empty(&w_ctx->ppa);
Javier González076984662017-06-30 17:56:42 +0200168 w_ctx->lba = ADDR_EMPTY;
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200169}
170
171#define pblk_rb_ring_count(head, tail, size) CIRC_CNT(head, tail, size)
172#define pblk_rb_ring_space(rb, head, tail, size) \
173 (CIRC_SPACE(head, tail, size))
174
175/*
176 * Buffer space is calculated with respect to the back pointer signaling
177 * synchronized entries to the media.
178 */
179static unsigned int pblk_rb_space(struct pblk_rb *rb)
180{
181 unsigned int mem = READ_ONCE(rb->mem);
182 unsigned int sync = READ_ONCE(rb->sync);
183
184 return pblk_rb_ring_space(rb, mem, sync, rb->nr_entries);
185}
186
Javier González40b86572018-10-09 13:12:09 +0200187unsigned int pblk_rb_ptr_wrap(struct pblk_rb *rb, unsigned int p,
188 unsigned int nr_entries)
189{
190 return (p + nr_entries) & (rb->nr_entries - 1);
191}
192
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200193/*
194 * Buffer count is calculated with respect to the submission entry signaling the
195 * entries that are available to send to the media
196 */
197unsigned int pblk_rb_read_count(struct pblk_rb *rb)
198{
199 unsigned int mem = READ_ONCE(rb->mem);
200 unsigned int subm = READ_ONCE(rb->subm);
201
202 return pblk_rb_ring_count(mem, subm, rb->nr_entries);
203}
204
Javier Gonzálezee8d5c12017-06-30 17:56:40 +0200205unsigned int pblk_rb_sync_count(struct pblk_rb *rb)
206{
207 unsigned int mem = READ_ONCE(rb->mem);
208 unsigned int sync = READ_ONCE(rb->sync);
209
210 return pblk_rb_ring_count(mem, sync, rb->nr_entries);
211}
212
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200213unsigned int pblk_rb_read_commit(struct pblk_rb *rb, unsigned int nr_entries)
214{
215 unsigned int subm;
216
217 subm = READ_ONCE(rb->subm);
218 /* Commit read means updating submission pointer */
Javier González40b86572018-10-09 13:12:09 +0200219 smp_store_release(&rb->subm, pblk_rb_ptr_wrap(rb, subm, nr_entries));
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200220
221 return subm;
222}
223
Rakesh Pandit05ed3442017-10-13 14:46:32 +0200224static int __pblk_rb_update_l2p(struct pblk_rb *rb, unsigned int to_update)
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200225{
226 struct pblk *pblk = container_of(rb, struct pblk, rwb);
227 struct pblk_line *line;
228 struct pblk_rb_entry *entry;
229 struct pblk_w_ctx *w_ctx;
Javier Gonzálezb20ba1b2017-06-26 11:57:27 +0200230 unsigned int user_io = 0, gc_io = 0;
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200231 unsigned int i;
Javier Gonzálezb20ba1b2017-06-26 11:57:27 +0200232 int flags;
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200233
234 for (i = 0; i < to_update; i++) {
Rakesh Pandit05ed3442017-10-13 14:46:32 +0200235 entry = &rb->entries[rb->l2p_update];
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200236 w_ctx = &entry->w_ctx;
237
Javier Gonzálezb20ba1b2017-06-26 11:57:27 +0200238 flags = READ_ONCE(entry->w_ctx.flags);
239 if (flags & PBLK_IOTYPE_USER)
240 user_io++;
241 else if (flags & PBLK_IOTYPE_GC)
242 gc_io++;
243 else
244 WARN(1, "pblk: unknown IO type\n");
245
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200246 pblk_update_map_dev(pblk, w_ctx->lba, w_ctx->ppa,
247 entry->cacheline);
248
Javier Gonzálezcb216652018-10-09 13:11:42 +0200249 line = pblk_ppa_to_line(pblk, w_ctx->ppa);
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200250 kref_put(&line->ref, pblk_line_put);
251 clean_wctx(w_ctx);
Javier González40b86572018-10-09 13:12:09 +0200252 rb->l2p_update = pblk_rb_ptr_wrap(rb, rb->l2p_update, 1);
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200253 }
254
Javier Gonzálezb20ba1b2017-06-26 11:57:27 +0200255 pblk_rl_out(&pblk->rl, user_io, gc_io);
256
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200257 return 0;
258}
259
260/*
261 * When we move the l2p_update pointer, we update the l2p table - lookups will
262 * point to the physical address instead of to the cacheline in the write buffer
263 * from this moment on.
264 */
265static int pblk_rb_update_l2p(struct pblk_rb *rb, unsigned int nr_entries,
266 unsigned int mem, unsigned int sync)
267{
268 unsigned int space, count;
269 int ret = 0;
270
271 lockdep_assert_held(&rb->w_lock);
272
273 /* Update l2p only as buffer entries are being overwritten */
274 space = pblk_rb_ring_space(rb, mem, rb->l2p_update, rb->nr_entries);
275 if (space > nr_entries)
276 goto out;
277
278 count = nr_entries - space;
279 /* l2p_update used exclusively under rb->w_lock */
Rakesh Pandit05ed3442017-10-13 14:46:32 +0200280 ret = __pblk_rb_update_l2p(rb, count);
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200281
282out:
283 return ret;
284}
285
286/*
287 * Update the l2p entry for all sectors stored on the write buffer. This means
288 * that all future lookups to the l2p table will point to a device address, not
289 * to the cacheline in the write buffer.
290 */
291void pblk_rb_sync_l2p(struct pblk_rb *rb)
292{
293 unsigned int sync;
294 unsigned int to_update;
295
296 spin_lock(&rb->w_lock);
297
298 /* Protect from reads and writes */
299 sync = smp_load_acquire(&rb->sync);
300
301 to_update = pblk_rb_ring_count(sync, rb->l2p_update, rb->nr_entries);
Rakesh Pandit05ed3442017-10-13 14:46:32 +0200302 __pblk_rb_update_l2p(rb, to_update);
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200303
304 spin_unlock(&rb->w_lock);
305}
306
307/*
308 * Write @nr_entries to ring buffer from @data buffer if there is enough space.
309 * Typically, 4KB data chunks coming from a bio will be copied to the ring
310 * buffer, thus the write will fail if not all incoming data can be copied.
311 *
312 */
313static void __pblk_rb_write_entry(struct pblk_rb *rb, void *data,
314 struct pblk_w_ctx w_ctx,
315 struct pblk_rb_entry *entry)
316{
317 memcpy(entry->data, data, rb->seg_size);
318
319 entry->w_ctx.lba = w_ctx.lba;
320 entry->w_ctx.ppa = w_ctx.ppa;
321}
322
323void pblk_rb_write_entry_user(struct pblk_rb *rb, void *data,
324 struct pblk_w_ctx w_ctx, unsigned int ring_pos)
325{
326 struct pblk *pblk = container_of(rb, struct pblk, rwb);
327 struct pblk_rb_entry *entry;
328 int flags;
329
330 entry = &rb->entries[ring_pos];
331 flags = READ_ONCE(entry->w_ctx.flags);
Matias Bjørling880eda52018-07-13 10:48:37 +0200332#ifdef CONFIG_NVM_PBLK_DEBUG
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200333 /* Caller must guarantee that the entry is free */
334 BUG_ON(!(flags & PBLK_WRITABLE_ENTRY));
335#endif
336
337 __pblk_rb_write_entry(rb, data, w_ctx, entry);
338
339 pblk_update_map_cache(pblk, w_ctx.lba, entry->cacheline);
340 flags = w_ctx.flags | PBLK_WRITTEN_DATA;
341
342 /* Release flags on write context. Protect from writes */
343 smp_store_release(&entry->w_ctx.flags, flags);
344}
345
346void pblk_rb_write_entry_gc(struct pblk_rb *rb, void *data,
Javier Gonzálezd3401212017-10-13 14:46:14 +0200347 struct pblk_w_ctx w_ctx, struct pblk_line *line,
348 u64 paddr, unsigned int ring_pos)
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200349{
350 struct pblk *pblk = container_of(rb, struct pblk, rwb);
351 struct pblk_rb_entry *entry;
352 int flags;
353
354 entry = &rb->entries[ring_pos];
355 flags = READ_ONCE(entry->w_ctx.flags);
Matias Bjørling880eda52018-07-13 10:48:37 +0200356#ifdef CONFIG_NVM_PBLK_DEBUG
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200357 /* Caller must guarantee that the entry is free */
358 BUG_ON(!(flags & PBLK_WRITABLE_ENTRY));
359#endif
360
361 __pblk_rb_write_entry(rb, data, w_ctx, entry);
362
Javier Gonzálezd3401212017-10-13 14:46:14 +0200363 if (!pblk_update_map_gc(pblk, w_ctx.lba, entry->cacheline, line, paddr))
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200364 entry->w_ctx.lba = ADDR_EMPTY;
365
366 flags = w_ctx.flags | PBLK_WRITTEN_DATA;
367
368 /* Release flags on write context. Protect from writes */
369 smp_store_release(&entry->w_ctx.flags, flags);
370}
371
Hans Holmberg8154d292018-01-05 14:16:09 +0100372static int pblk_rb_flush_point_set(struct pblk_rb *rb, struct bio *bio,
Hans Holmbergcc9c9a02018-06-01 16:41:13 +0200373 unsigned int pos)
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200374{
375 struct pblk_rb_entry *entry;
Hans Holmberg533657c12018-01-05 14:16:10 +0100376 unsigned int sync, flush_point;
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200377
Hans Holmberg3c05ef112018-03-30 00:04:58 +0200378 pblk_rb_sync_init(rb, NULL);
Hans Holmberg533657c12018-01-05 14:16:10 +0100379 sync = READ_ONCE(rb->sync);
380
Hans Holmberg3c05ef112018-03-30 00:04:58 +0200381 if (pos == sync) {
382 pblk_rb_sync_end(rb, NULL);
Hans Holmberg533657c12018-01-05 14:16:10 +0100383 return 0;
Hans Holmberg3c05ef112018-03-30 00:04:58 +0200384 }
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200385
Matias Bjørling880eda52018-07-13 10:48:37 +0200386#ifdef CONFIG_NVM_PBLK_DEBUG
Hans Holmberg8154d292018-01-05 14:16:09 +0100387 atomic_inc(&rb->inflight_flush_point);
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200388#endif
389
Hans Holmberg8154d292018-01-05 14:16:09 +0100390 flush_point = (pos == 0) ? (rb->nr_entries - 1) : (pos - 1);
391 entry = &rb->entries[flush_point];
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200392
Hans Holmberg8154d292018-01-05 14:16:09 +0100393 /* Protect flush points */
394 smp_store_release(&rb->flush_point, flush_point);
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200395
Hans Holmbergb36bbf92018-01-05 14:16:11 +0100396 if (bio)
397 bio_list_add(&entry->w_ctx.bios, bio);
Javier González588726d32017-06-26 11:57:29 +0200398
Hans Holmbergb36bbf92018-01-05 14:16:11 +0100399 pblk_rb_sync_end(rb, NULL);
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200400
Hans Holmbergb36bbf92018-01-05 14:16:11 +0100401 return bio ? 1 : 0;
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200402}
403
404static int __pblk_rb_may_write(struct pblk_rb *rb, unsigned int nr_entries,
405 unsigned int *pos)
406{
407 unsigned int mem;
408 unsigned int sync;
Javier González766c8ce2018-10-09 13:12:15 +0200409 unsigned int threshold;
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200410
411 sync = READ_ONCE(rb->sync);
412 mem = READ_ONCE(rb->mem);
413
Javier González766c8ce2018-10-09 13:12:15 +0200414 threshold = nr_entries + rb->back_thres;
415
416 if (pblk_rb_ring_space(rb, mem, sync, rb->nr_entries) < threshold)
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200417 return 0;
418
419 if (pblk_rb_update_l2p(rb, nr_entries, mem, sync))
420 return 0;
421
422 *pos = mem;
423
424 return 1;
425}
426
427static int pblk_rb_may_write(struct pblk_rb *rb, unsigned int nr_entries,
428 unsigned int *pos)
429{
430 if (!__pblk_rb_may_write(rb, nr_entries, pos))
431 return 0;
432
433 /* Protect from read count */
Javier González40b86572018-10-09 13:12:09 +0200434 smp_store_release(&rb->mem, pblk_rb_ptr_wrap(rb, *pos, nr_entries));
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200435 return 1;
436}
437
Javier González588726d32017-06-26 11:57:29 +0200438void pblk_rb_flush(struct pblk_rb *rb)
439{
440 struct pblk *pblk = container_of(rb, struct pblk, rwb);
441 unsigned int mem = READ_ONCE(rb->mem);
442
Hans Holmberg8154d292018-01-05 14:16:09 +0100443 if (pblk_rb_flush_point_set(rb, NULL, mem))
Javier González588726d32017-06-26 11:57:29 +0200444 return;
445
Hans Holmbergcc9c9a02018-06-01 16:41:13 +0200446 pblk_write_kick(pblk);
Javier González588726d32017-06-26 11:57:29 +0200447}
448
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200449static int pblk_rb_may_write_flush(struct pblk_rb *rb, unsigned int nr_entries,
450 unsigned int *pos, struct bio *bio,
451 int *io_ret)
452{
453 unsigned int mem;
454
455 if (!__pblk_rb_may_write(rb, nr_entries, pos))
456 return 0;
457
Javier González40b86572018-10-09 13:12:09 +0200458 mem = pblk_rb_ptr_wrap(rb, *pos, nr_entries);
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200459 *io_ret = NVM_IO_DONE;
460
461 if (bio->bi_opf & REQ_PREFLUSH) {
462 struct pblk *pblk = container_of(rb, struct pblk, rwb);
463
Hans Holmberg5d149bf2018-03-30 00:04:55 +0200464 atomic64_inc(&pblk->nr_flush);
Hans Holmberg8154d292018-01-05 14:16:09 +0100465 if (pblk_rb_flush_point_set(&pblk->rwb, bio, mem))
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200466 *io_ret = NVM_IO_OK;
467 }
468
469 /* Protect from read count */
470 smp_store_release(&rb->mem, mem);
Javier González6ca2f712017-10-13 14:46:17 +0200471
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200472 return 1;
473}
474
475/*
476 * Atomically check that (i) there is space on the write buffer for the
477 * incoming I/O, and (ii) the current I/O type has enough budget in the write
478 * buffer (rate-limiter).
479 */
480int pblk_rb_may_write_user(struct pblk_rb *rb, struct bio *bio,
481 unsigned int nr_entries, unsigned int *pos)
482{
483 struct pblk *pblk = container_of(rb, struct pblk, rwb);
Javier González588726d32017-06-26 11:57:29 +0200484 int io_ret;
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200485
486 spin_lock(&rb->w_lock);
Javier González588726d32017-06-26 11:57:29 +0200487 io_ret = pblk_rl_user_may_insert(&pblk->rl, nr_entries);
488 if (io_ret) {
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200489 spin_unlock(&rb->w_lock);
Javier González588726d32017-06-26 11:57:29 +0200490 return io_ret;
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200491 }
492
Javier González588726d32017-06-26 11:57:29 +0200493 if (!pblk_rb_may_write_flush(rb, nr_entries, pos, bio, &io_ret)) {
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200494 spin_unlock(&rb->w_lock);
495 return NVM_IO_REQUEUE;
496 }
497
498 pblk_rl_user_in(&pblk->rl, nr_entries);
499 spin_unlock(&rb->w_lock);
500
Javier González588726d32017-06-26 11:57:29 +0200501 return io_ret;
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200502}
503
504/*
505 * Look at pblk_rb_may_write_user comment
506 */
507int pblk_rb_may_write_gc(struct pblk_rb *rb, unsigned int nr_entries,
508 unsigned int *pos)
509{
510 struct pblk *pblk = container_of(rb, struct pblk, rwb);
511
512 spin_lock(&rb->w_lock);
513 if (!pblk_rl_gc_may_insert(&pblk->rl, nr_entries)) {
514 spin_unlock(&rb->w_lock);
515 return 0;
516 }
517
518 if (!pblk_rb_may_write(rb, nr_entries, pos)) {
519 spin_unlock(&rb->w_lock);
520 return 0;
521 }
522
523 pblk_rl_gc_in(&pblk->rl, nr_entries);
524 spin_unlock(&rb->w_lock);
525
526 return 1;
527}
528
529/*
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200530 * Read available entries on rb and add them to the given bio. To avoid a memory
531 * copy, a page reference to the write buffer is used to be added to the bio.
532 *
533 * This function is used by the write thread to form the write bio that will
534 * persist data on the write buffer to the media.
535 */
Javier Gonzálezd624f372017-06-26 11:57:15 +0200536unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct nvm_rq *rqd,
Javier González875d94f2017-10-13 14:46:18 +0200537 unsigned int pos, unsigned int nr_entries,
538 unsigned int count)
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200539{
540 struct pblk *pblk = container_of(rb, struct pblk, rwb);
Javier Gonzálezd624f372017-06-26 11:57:15 +0200541 struct request_queue *q = pblk->dev->q;
542 struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
Javier González875d94f2017-10-13 14:46:18 +0200543 struct bio *bio = rqd->bio;
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200544 struct pblk_rb_entry *entry;
545 struct page *page;
Javier Gonzálezd624f372017-06-26 11:57:15 +0200546 unsigned int pad = 0, to_read = nr_entries;
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200547 unsigned int i;
548 int flags;
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200549
550 if (count < nr_entries) {
551 pad = nr_entries - count;
552 to_read = count;
553 }
554
555 c_ctx->sentry = pos;
556 c_ctx->nr_valid = to_read;
557 c_ctx->nr_padded = pad;
558
559 for (i = 0; i < to_read; i++) {
560 entry = &rb->entries[pos];
561
562 /* A write has been allowed into the buffer, but data is still
563 * being copied to it. It is ok to busy wait.
564 */
565try:
566 flags = READ_ONCE(entry->w_ctx.flags);
Javier González10888122017-06-30 17:56:37 +0200567 if (!(flags & PBLK_WRITTEN_DATA)) {
568 io_schedule();
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200569 goto try;
Javier González10888122017-06-30 17:56:37 +0200570 }
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200571
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200572 page = virt_to_page(entry->data);
573 if (!page) {
Matias Bjørling4e495a42018-07-13 10:48:42 +0200574 pblk_err(pblk, "could not allocate write bio page\n");
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200575 flags &= ~PBLK_WRITTEN_DATA;
576 flags |= PBLK_SUBMITTED_ENTRY;
577 /* Release flags on context. Protect from writes */
578 smp_store_release(&entry->w_ctx.flags, flags);
Javier Gonzálezd624f372017-06-26 11:57:15 +0200579 return NVM_IO_ERR;
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200580 }
581
Javier Gonzálezd624f372017-06-26 11:57:15 +0200582 if (bio_add_pc_page(q, bio, page, rb->seg_size, 0) !=
583 rb->seg_size) {
Matias Bjørling4e495a42018-07-13 10:48:42 +0200584 pblk_err(pblk, "could not add page to write bio\n");
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200585 flags &= ~PBLK_WRITTEN_DATA;
586 flags |= PBLK_SUBMITTED_ENTRY;
587 /* Release flags on context. Protect from writes */
588 smp_store_release(&entry->w_ctx.flags, flags);
Javier Gonzálezd624f372017-06-26 11:57:15 +0200589 return NVM_IO_ERR;
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200590 }
591
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200592 flags &= ~PBLK_WRITTEN_DATA;
593 flags |= PBLK_SUBMITTED_ENTRY;
594
595 /* Release flags on context. Protect from writes */
596 smp_store_release(&entry->w_ctx.flags, flags);
597
Javier González40b86572018-10-09 13:12:09 +0200598 pos = pblk_rb_ptr_wrap(rb, pos, 1);
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200599 }
600
Javier Gonzálezd624f372017-06-26 11:57:15 +0200601 if (pad) {
602 if (pblk_bio_add_pages(pblk, bio, GFP_KERNEL, pad)) {
Matias Bjørling4e495a42018-07-13 10:48:42 +0200603 pblk_err(pblk, "could not pad page in write bio\n");
Javier Gonzálezd624f372017-06-26 11:57:15 +0200604 return NVM_IO_ERR;
605 }
Hans Holmberg5d149bf2018-03-30 00:04:55 +0200606
607 if (pad < pblk->min_write_pgs)
608 atomic64_inc(&pblk->pad_dist[pad - 1]);
609 else
Matias Bjørling4e495a42018-07-13 10:48:42 +0200610 pblk_warn(pblk, "padding more than min. sectors\n");
Hans Holmberg5d149bf2018-03-30 00:04:55 +0200611
612 atomic64_add(pad, &pblk->pad_wa);
Javier Gonzálezd624f372017-06-26 11:57:15 +0200613 }
614
Matias Bjørling880eda52018-07-13 10:48:37 +0200615#ifdef CONFIG_NVM_PBLK_DEBUG
Hans Holmberg5d149bf2018-03-30 00:04:55 +0200616 atomic_long_add(pad, &pblk->padded_writes);
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200617#endif
Javier Gonzálezd624f372017-06-26 11:57:15 +0200618
619 return NVM_IO_OK;
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200620}
621
622/*
623 * Copy to bio only if the lba matches the one on the given cache entry.
624 * Otherwise, it means that the entry has been overwritten, and the bio should
625 * be directed to disk.
626 */
627int pblk_rb_copy_to_bio(struct pblk_rb *rb, struct bio *bio, sector_t lba,
Javier González75cb8e92017-07-28 15:13:16 +0200628 struct ppa_addr ppa, int bio_iter, bool advanced_bio)
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200629{
Javier González076984662017-06-30 17:56:42 +0200630 struct pblk *pblk = container_of(rb, struct pblk, rwb);
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200631 struct pblk_rb_entry *entry;
632 struct pblk_w_ctx *w_ctx;
Javier González076984662017-06-30 17:56:42 +0200633 struct ppa_addr l2p_ppa;
634 u64 pos = pblk_addr_to_cacheline(ppa);
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200635 void *data;
636 int flags;
637 int ret = 1;
638
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200639
Matias Bjørling880eda52018-07-13 10:48:37 +0200640#ifdef CONFIG_NVM_PBLK_DEBUG
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200641 /* Caller must ensure that the access will not cause an overflow */
642 BUG_ON(pos >= rb->nr_entries);
643#endif
644 entry = &rb->entries[pos];
645 w_ctx = &entry->w_ctx;
646 flags = READ_ONCE(w_ctx->flags);
647
Javier González076984662017-06-30 17:56:42 +0200648 spin_lock(&rb->w_lock);
649 spin_lock(&pblk->trans_lock);
650 l2p_ppa = pblk_trans_map_get(pblk, lba);
651 spin_unlock(&pblk->trans_lock);
652
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200653 /* Check if the entry has been overwritten or is scheduled to be */
Javier González076984662017-06-30 17:56:42 +0200654 if (!pblk_ppa_comp(l2p_ppa, ppa) || w_ctx->lba != lba ||
655 flags & PBLK_WRITABLE_ENTRY) {
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200656 ret = 0;
657 goto out;
658 }
659
660 /* Only advance the bio if it hasn't been advanced already. If advanced,
661 * this bio is at least a partial bio (i.e., it has partially been
662 * filled with data from the cache). If part of the data resides on the
663 * media, we will read later on
664 */
Javier González75cb8e92017-07-28 15:13:16 +0200665 if (unlikely(!advanced_bio))
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200666 bio_advance(bio, bio_iter * PBLK_EXPOSED_PAGE_SIZE);
667
668 data = bio_data(bio);
669 memcpy(data, entry->data, rb->seg_size);
670
671out:
672 spin_unlock(&rb->w_lock);
673 return ret;
674}
675
676struct pblk_w_ctx *pblk_rb_w_ctx(struct pblk_rb *rb, unsigned int pos)
677{
Javier González40b86572018-10-09 13:12:09 +0200678 unsigned int entry = pblk_rb_ptr_wrap(rb, pos, 0);
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200679
680 return &rb->entries[entry].w_ctx;
681}
682
683unsigned int pblk_rb_sync_init(struct pblk_rb *rb, unsigned long *flags)
684 __acquires(&rb->s_lock)
685{
686 if (flags)
687 spin_lock_irqsave(&rb->s_lock, *flags);
688 else
689 spin_lock_irq(&rb->s_lock);
690
691 return rb->sync;
692}
693
694void pblk_rb_sync_end(struct pblk_rb *rb, unsigned long *flags)
695 __releases(&rb->s_lock)
696{
697 lockdep_assert_held(&rb->s_lock);
698
699 if (flags)
700 spin_unlock_irqrestore(&rb->s_lock, *flags);
701 else
702 spin_unlock_irq(&rb->s_lock);
703}
704
705unsigned int pblk_rb_sync_advance(struct pblk_rb *rb, unsigned int nr_entries)
706{
Hans Holmberg533657c12018-01-05 14:16:10 +0100707 unsigned int sync, flush_point;
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200708 lockdep_assert_held(&rb->s_lock);
709
710 sync = READ_ONCE(rb->sync);
Hans Holmberg533657c12018-01-05 14:16:10 +0100711 flush_point = READ_ONCE(rb->flush_point);
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200712
Hans Holmberg533657c12018-01-05 14:16:10 +0100713 if (flush_point != EMPTY_ENTRY) {
714 unsigned int secs_to_flush;
715
716 secs_to_flush = pblk_rb_ring_count(flush_point, sync,
717 rb->nr_entries);
718 if (secs_to_flush < nr_entries) {
719 /* Protect flush points */
720 smp_store_release(&rb->flush_point, EMPTY_ENTRY);
721 }
722 }
723
Javier González40b86572018-10-09 13:12:09 +0200724 sync = pblk_rb_ptr_wrap(rb, sync, nr_entries);
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200725
726 /* Protect from counts */
727 smp_store_release(&rb->sync, sync);
728
729 return sync;
730}
731
Hans Holmberg533657c12018-01-05 14:16:10 +0100732/* Calculate how many sectors to submit up to the current flush point. */
Hans Holmberg8154d292018-01-05 14:16:09 +0100733unsigned int pblk_rb_flush_point_count(struct pblk_rb *rb)
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200734{
Hans Holmberg533657c12018-01-05 14:16:10 +0100735 unsigned int subm, sync, flush_point;
736 unsigned int submitted, to_flush;
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200737
Hans Holmberg8154d292018-01-05 14:16:09 +0100738 /* Protect flush points */
739 flush_point = smp_load_acquire(&rb->flush_point);
740 if (flush_point == EMPTY_ENTRY)
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200741 return 0;
742
Hans Holmberg533657c12018-01-05 14:16:10 +0100743 /* Protect syncs */
744 sync = smp_load_acquire(&rb->sync);
745
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200746 subm = READ_ONCE(rb->subm);
Hans Holmberg533657c12018-01-05 14:16:10 +0100747 submitted = pblk_rb_ring_count(subm, sync, rb->nr_entries);
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200748
749 /* The sync point itself counts as a sector to sync */
Hans Holmberg533657c12018-01-05 14:16:10 +0100750 to_flush = pblk_rb_ring_count(flush_point, sync, rb->nr_entries) + 1;
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200751
Hans Holmberg533657c12018-01-05 14:16:10 +0100752 return (submitted < to_flush) ? (to_flush - submitted) : 0;
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200753}
754
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200755int pblk_rb_tear_down_check(struct pblk_rb *rb)
756{
757 struct pblk_rb_entry *entry;
758 int i;
759 int ret = 0;
760
761 spin_lock(&rb->w_lock);
762 spin_lock_irq(&rb->s_lock);
763
764 if ((rb->mem == rb->subm) && (rb->subm == rb->sync) &&
765 (rb->sync == rb->l2p_update) &&
Hans Holmberg8154d292018-01-05 14:16:09 +0100766 (rb->flush_point == EMPTY_ENTRY)) {
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200767 goto out;
768 }
769
770 if (!rb->entries) {
771 ret = 1;
772 goto out;
773 }
774
775 for (i = 0; i < rb->nr_entries; i++) {
776 entry = &rb->entries[i];
777
778 if (!entry->data) {
779 ret = 1;
780 goto out;
781 }
782 }
783
784out:
785 spin_unlock(&rb->w_lock);
786 spin_unlock_irq(&rb->s_lock);
787
788 return ret;
789}
790
791unsigned int pblk_rb_wrap_pos(struct pblk_rb *rb, unsigned int pos)
792{
793 return (pos & (rb->nr_entries - 1));
794}
795
796int pblk_rb_pos_oob(struct pblk_rb *rb, u64 pos)
797{
798 return (pos >= rb->nr_entries);
799}
800
801ssize_t pblk_rb_sysfs(struct pblk_rb *rb, char *buf)
802{
803 struct pblk *pblk = container_of(rb, struct pblk, rwb);
804 struct pblk_c_ctx *c;
805 ssize_t offset;
806 int queued_entries = 0;
807
808 spin_lock_irq(&rb->s_lock);
809 list_for_each_entry(c, &pblk->compl_list, list)
810 queued_entries++;
811 spin_unlock_irq(&rb->s_lock);
812
Hans Holmberg8154d292018-01-05 14:16:09 +0100813 if (rb->flush_point != EMPTY_ENTRY)
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200814 offset = scnprintf(buf, PAGE_SIZE,
815 "%u\t%u\t%u\t%u\t%u\t%u\t%u - %u/%u/%u - %d\n",
816 rb->nr_entries,
817 rb->mem,
818 rb->subm,
819 rb->sync,
820 rb->l2p_update,
Matias Bjørling880eda52018-07-13 10:48:37 +0200821#ifdef CONFIG_NVM_PBLK_DEBUG
Hans Holmberg8154d292018-01-05 14:16:09 +0100822 atomic_read(&rb->inflight_flush_point),
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200823#else
824 0,
825#endif
Hans Holmberg8154d292018-01-05 14:16:09 +0100826 rb->flush_point,
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200827 pblk_rb_read_count(rb),
828 pblk_rb_space(rb),
Hans Holmberg8154d292018-01-05 14:16:09 +0100829 pblk_rb_flush_point_count(rb),
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200830 queued_entries);
831 else
832 offset = scnprintf(buf, PAGE_SIZE,
833 "%u\t%u\t%u\t%u\t%u\t%u\tNULL - %u/%u/%u - %d\n",
834 rb->nr_entries,
835 rb->mem,
836 rb->subm,
837 rb->sync,
838 rb->l2p_update,
Matias Bjørling880eda52018-07-13 10:48:37 +0200839#ifdef CONFIG_NVM_PBLK_DEBUG
Hans Holmberg8154d292018-01-05 14:16:09 +0100840 atomic_read(&rb->inflight_flush_point),
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200841#else
842 0,
843#endif
844 pblk_rb_read_count(rb),
845 pblk_rb_space(rb),
Hans Holmberg8154d292018-01-05 14:16:09 +0100846 pblk_rb_flush_point_count(rb),
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200847 queued_entries);
848
849 return offset;
850}