blob: 03c241b340ea7abf016d72d126818c35f833c83d [file] [log] [blame]
Javier González02a15202018-10-09 13:12:06 +02001// SPDX-License-Identifier: GPL-2.0
Javier Gonzáleza4bd2172017-04-15 20:55:50 +02002/*
3 * Copyright (C) 2016 CNEX Labs
4 * Initial release: Javier Gonzalez <javier@cnexlabs.com>
5 *
6 * Based upon the circular ringbuffer.
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License version
10 * 2 as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License for more details.
16 *
17 * pblk-rb.c - pblk's write buffer
18 */
19
20#include <linux/circ_buf.h>
21
22#include "pblk.h"
23
24static DECLARE_RWSEM(pblk_rb_lock);
25
Javier González9bd1f872018-10-09 13:12:10 +020026static void pblk_rb_data_free(struct pblk_rb *rb)
Javier Gonzáleza4bd2172017-04-15 20:55:50 +020027{
28 struct pblk_rb_pages *p, *t;
29
30 down_write(&pblk_rb_lock);
31 list_for_each_entry_safe(p, t, &rb->pages, list) {
32 free_pages((unsigned long)page_address(p->pages), p->order);
33 list_del(&p->list);
34 kfree(p);
35 }
36 up_write(&pblk_rb_lock);
37}
38
Javier González9bd1f872018-10-09 13:12:10 +020039void pblk_rb_free(struct pblk_rb *rb)
40{
41 pblk_rb_data_free(rb);
42 vfree(rb->entries);
43}
44
45/*
46 * pblk_rb_calculate_size -- calculate the size of the write buffer
47 */
Javier Gonzálezb4cdc422019-02-11 13:25:08 +010048static unsigned int pblk_rb_calculate_size(unsigned int nr_entries,
49 unsigned int threshold)
Javier González9bd1f872018-10-09 13:12:10 +020050{
Javier Gonzálezb4cdc422019-02-11 13:25:08 +010051 unsigned int thr_sz = 1 << (get_count_order(threshold + NVM_MAX_VLBA));
52 unsigned int max_sz = max(thr_sz, nr_entries);
53 unsigned int max_io;
54
55 /* Alloc a write buffer that can (i) fit at least two split bios
56 * (considering max I/O size NVM_MAX_VLBA, and (ii) guarantee that the
57 * threshold will be respected
58 */
59 max_io = (1 << max((int)(get_count_order(max_sz)),
60 (int)(get_count_order(NVM_MAX_VLBA << 1))));
61 if ((threshold + NVM_MAX_VLBA) >= max_io)
62 max_io <<= 1;
63
64 return max_io;
Javier González9bd1f872018-10-09 13:12:10 +020065}
66
Javier Gonzáleza4bd2172017-04-15 20:55:50 +020067/*
68 * Initialize ring buffer. The data and metadata buffers must be previously
69 * allocated and their size must be a power of two
Mauro Carvalho Chehab5fb94e92018-05-08 15:14:57 -030070 * (Documentation/core-api/circular-buffers.rst)
Javier Gonzáleza4bd2172017-04-15 20:55:50 +020071 */
Javier González766c8ce2018-10-09 13:12:15 +020072int pblk_rb_init(struct pblk_rb *rb, unsigned int size, unsigned int threshold,
73 unsigned int seg_size)
Javier Gonzáleza4bd2172017-04-15 20:55:50 +020074{
75 struct pblk *pblk = container_of(rb, struct pblk, rwb);
Javier González9bd1f872018-10-09 13:12:10 +020076 struct pblk_rb_entry *entries;
Javier Gonzáleza4bd2172017-04-15 20:55:50 +020077 unsigned int init_entry = 0;
Javier Gonzáleza4bd2172017-04-15 20:55:50 +020078 unsigned int max_order = MAX_ORDER - 1;
Javier González9bd1f872018-10-09 13:12:10 +020079 unsigned int power_size, power_seg_sz;
80 unsigned int alloc_order, order, iter;
81 unsigned int nr_entries;
82
Javier Gonzálezb4cdc422019-02-11 13:25:08 +010083 nr_entries = pblk_rb_calculate_size(size, threshold);
Javier González9bd1f872018-10-09 13:12:10 +020084 entries = vzalloc(array_size(nr_entries, sizeof(struct pblk_rb_entry)));
85 if (!entries)
86 return -ENOMEM;
87
Javier Gonzálezb4cdc422019-02-11 13:25:08 +010088 power_size = get_count_order(nr_entries);
Javier González9bd1f872018-10-09 13:12:10 +020089 power_seg_sz = get_count_order(seg_size);
Javier Gonzáleza4bd2172017-04-15 20:55:50 +020090
91 down_write(&pblk_rb_lock);
Javier González9bd1f872018-10-09 13:12:10 +020092 rb->entries = entries;
Javier Gonzáleza4bd2172017-04-15 20:55:50 +020093 rb->seg_size = (1 << power_seg_sz);
94 rb->nr_entries = (1 << power_size);
95 rb->mem = rb->subm = rb->sync = rb->l2p_update = 0;
Javier González766c8ce2018-10-09 13:12:15 +020096 rb->back_thres = threshold;
Hans Holmberg8154d292018-01-05 14:16:09 +010097 rb->flush_point = EMPTY_ENTRY;
Javier Gonzáleza4bd2172017-04-15 20:55:50 +020098
99 spin_lock_init(&rb->w_lock);
100 spin_lock_init(&rb->s_lock);
101
102 INIT_LIST_HEAD(&rb->pages);
103
Javier González9bd1f872018-10-09 13:12:10 +0200104 alloc_order = power_size;
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200105 if (alloc_order >= max_order) {
106 order = max_order;
107 iter = (1 << (alloc_order - max_order));
108 } else {
109 order = alloc_order;
110 iter = 1;
111 }
112
113 do {
114 struct pblk_rb_entry *entry;
115 struct pblk_rb_pages *page_set;
116 void *kaddr;
117 unsigned long set_size;
118 int i;
119
120 page_set = kmalloc(sizeof(struct pblk_rb_pages), GFP_KERNEL);
121 if (!page_set) {
122 up_write(&pblk_rb_lock);
Javier González9bd1f872018-10-09 13:12:10 +0200123 vfree(entries);
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200124 return -ENOMEM;
125 }
126
127 page_set->order = order;
128 page_set->pages = alloc_pages(GFP_KERNEL, order);
129 if (!page_set->pages) {
130 kfree(page_set);
131 pblk_rb_data_free(rb);
132 up_write(&pblk_rb_lock);
Javier González9bd1f872018-10-09 13:12:10 +0200133 vfree(entries);
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200134 return -ENOMEM;
135 }
136 kaddr = page_address(page_set->pages);
137
138 entry = &rb->entries[init_entry];
139 entry->data = kaddr;
140 entry->cacheline = pblk_cacheline_to_addr(init_entry++);
141 entry->w_ctx.flags = PBLK_WRITABLE_ENTRY;
142
143 set_size = (1 << order);
144 for (i = 1; i < set_size; i++) {
145 entry = &rb->entries[init_entry];
146 entry->cacheline = pblk_cacheline_to_addr(init_entry++);
147 entry->data = kaddr + (i * rb->seg_size);
148 entry->w_ctx.flags = PBLK_WRITABLE_ENTRY;
149 bio_list_init(&entry->w_ctx.bios);
150 }
151
152 list_add_tail(&page_set->list, &rb->pages);
153 iter--;
154 } while (iter > 0);
155 up_write(&pblk_rb_lock);
156
Matias Bjørling880eda52018-07-13 10:48:37 +0200157#ifdef CONFIG_NVM_PBLK_DEBUG
Hans Holmberg8154d292018-01-05 14:16:09 +0100158 atomic_set(&rb->inflight_flush_point, 0);
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200159#endif
160
161 /*
162 * Initialize rate-limiter, which controls access to the write buffer
Hua Su6e82f0b2018-12-11 20:16:17 +0100163 * by user and GC I/O
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200164 */
Javier Gonzálezb4cdc422019-02-11 13:25:08 +0100165 pblk_rl_init(&pblk->rl, rb->nr_entries, threshold);
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200166
167 return 0;
168}
169
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200170static void clean_wctx(struct pblk_w_ctx *w_ctx)
171{
172 int flags;
173
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200174 flags = READ_ONCE(w_ctx->flags);
Javier Gonzáleze37d0792018-06-01 15:04:19 +0200175 WARN_ONCE(!(flags & PBLK_SUBMITTED_ENTRY),
176 "pblk: overwriting unsubmitted data\n");
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200177
178 /* Release flags on context. Protect from writes and reads */
179 smp_store_release(&w_ctx->flags, PBLK_WRITABLE_ENTRY);
180 pblk_ppa_set_empty(&w_ctx->ppa);
Javier González076984662017-06-30 17:56:42 +0200181 w_ctx->lba = ADDR_EMPTY;
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200182}
183
184#define pblk_rb_ring_count(head, tail, size) CIRC_CNT(head, tail, size)
185#define pblk_rb_ring_space(rb, head, tail, size) \
186 (CIRC_SPACE(head, tail, size))
187
188/*
189 * Buffer space is calculated with respect to the back pointer signaling
190 * synchronized entries to the media.
191 */
192static unsigned int pblk_rb_space(struct pblk_rb *rb)
193{
194 unsigned int mem = READ_ONCE(rb->mem);
195 unsigned int sync = READ_ONCE(rb->sync);
196
197 return pblk_rb_ring_space(rb, mem, sync, rb->nr_entries);
198}
199
Javier González40b86572018-10-09 13:12:09 +0200200unsigned int pblk_rb_ptr_wrap(struct pblk_rb *rb, unsigned int p,
201 unsigned int nr_entries)
202{
203 return (p + nr_entries) & (rb->nr_entries - 1);
204}
205
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200206/*
207 * Buffer count is calculated with respect to the submission entry signaling the
208 * entries that are available to send to the media
209 */
210unsigned int pblk_rb_read_count(struct pblk_rb *rb)
211{
212 unsigned int mem = READ_ONCE(rb->mem);
213 unsigned int subm = READ_ONCE(rb->subm);
214
215 return pblk_rb_ring_count(mem, subm, rb->nr_entries);
216}
217
Javier Gonzálezee8d5c12017-06-30 17:56:40 +0200218unsigned int pblk_rb_sync_count(struct pblk_rb *rb)
219{
220 unsigned int mem = READ_ONCE(rb->mem);
221 unsigned int sync = READ_ONCE(rb->sync);
222
223 return pblk_rb_ring_count(mem, sync, rb->nr_entries);
224}
225
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200226unsigned int pblk_rb_read_commit(struct pblk_rb *rb, unsigned int nr_entries)
227{
228 unsigned int subm;
229
230 subm = READ_ONCE(rb->subm);
231 /* Commit read means updating submission pointer */
Javier González40b86572018-10-09 13:12:09 +0200232 smp_store_release(&rb->subm, pblk_rb_ptr_wrap(rb, subm, nr_entries));
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200233
234 return subm;
235}
236
Rakesh Pandit05ed3442017-10-13 14:46:32 +0200237static int __pblk_rb_update_l2p(struct pblk_rb *rb, unsigned int to_update)
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200238{
239 struct pblk *pblk = container_of(rb, struct pblk, rwb);
240 struct pblk_line *line;
241 struct pblk_rb_entry *entry;
242 struct pblk_w_ctx *w_ctx;
Javier Gonzálezb20ba1b2017-06-26 11:57:27 +0200243 unsigned int user_io = 0, gc_io = 0;
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200244 unsigned int i;
Javier Gonzálezb20ba1b2017-06-26 11:57:27 +0200245 int flags;
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200246
247 for (i = 0; i < to_update; i++) {
Rakesh Pandit05ed3442017-10-13 14:46:32 +0200248 entry = &rb->entries[rb->l2p_update];
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200249 w_ctx = &entry->w_ctx;
250
Javier Gonzálezb20ba1b2017-06-26 11:57:27 +0200251 flags = READ_ONCE(entry->w_ctx.flags);
252 if (flags & PBLK_IOTYPE_USER)
253 user_io++;
254 else if (flags & PBLK_IOTYPE_GC)
255 gc_io++;
256 else
257 WARN(1, "pblk: unknown IO type\n");
258
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200259 pblk_update_map_dev(pblk, w_ctx->lba, w_ctx->ppa,
260 entry->cacheline);
261
Javier Gonzálezcb216652018-10-09 13:11:42 +0200262 line = pblk_ppa_to_line(pblk, w_ctx->ppa);
Heiner Litz05869422019-02-11 13:25:09 +0100263 atomic_dec(&line->sec_to_update);
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200264 kref_put(&line->ref, pblk_line_put);
265 clean_wctx(w_ctx);
Javier González40b86572018-10-09 13:12:09 +0200266 rb->l2p_update = pblk_rb_ptr_wrap(rb, rb->l2p_update, 1);
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200267 }
268
Javier Gonzálezb20ba1b2017-06-26 11:57:27 +0200269 pblk_rl_out(&pblk->rl, user_io, gc_io);
270
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200271 return 0;
272}
273
274/*
275 * When we move the l2p_update pointer, we update the l2p table - lookups will
276 * point to the physical address instead of to the cacheline in the write buffer
277 * from this moment on.
278 */
279static int pblk_rb_update_l2p(struct pblk_rb *rb, unsigned int nr_entries,
280 unsigned int mem, unsigned int sync)
281{
282 unsigned int space, count;
283 int ret = 0;
284
285 lockdep_assert_held(&rb->w_lock);
286
287 /* Update l2p only as buffer entries are being overwritten */
288 space = pblk_rb_ring_space(rb, mem, rb->l2p_update, rb->nr_entries);
289 if (space > nr_entries)
290 goto out;
291
292 count = nr_entries - space;
293 /* l2p_update used exclusively under rb->w_lock */
Rakesh Pandit05ed3442017-10-13 14:46:32 +0200294 ret = __pblk_rb_update_l2p(rb, count);
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200295
296out:
297 return ret;
298}
299
300/*
301 * Update the l2p entry for all sectors stored on the write buffer. This means
302 * that all future lookups to the l2p table will point to a device address, not
303 * to the cacheline in the write buffer.
304 */
305void pblk_rb_sync_l2p(struct pblk_rb *rb)
306{
307 unsigned int sync;
308 unsigned int to_update;
309
310 spin_lock(&rb->w_lock);
311
312 /* Protect from reads and writes */
313 sync = smp_load_acquire(&rb->sync);
314
315 to_update = pblk_rb_ring_count(sync, rb->l2p_update, rb->nr_entries);
Rakesh Pandit05ed3442017-10-13 14:46:32 +0200316 __pblk_rb_update_l2p(rb, to_update);
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200317
318 spin_unlock(&rb->w_lock);
319}
320
321/*
322 * Write @nr_entries to ring buffer from @data buffer if there is enough space.
323 * Typically, 4KB data chunks coming from a bio will be copied to the ring
324 * buffer, thus the write will fail if not all incoming data can be copied.
325 *
326 */
327static void __pblk_rb_write_entry(struct pblk_rb *rb, void *data,
328 struct pblk_w_ctx w_ctx,
329 struct pblk_rb_entry *entry)
330{
331 memcpy(entry->data, data, rb->seg_size);
332
333 entry->w_ctx.lba = w_ctx.lba;
334 entry->w_ctx.ppa = w_ctx.ppa;
335}
336
337void pblk_rb_write_entry_user(struct pblk_rb *rb, void *data,
338 struct pblk_w_ctx w_ctx, unsigned int ring_pos)
339{
340 struct pblk *pblk = container_of(rb, struct pblk, rwb);
341 struct pblk_rb_entry *entry;
342 int flags;
343
344 entry = &rb->entries[ring_pos];
345 flags = READ_ONCE(entry->w_ctx.flags);
Matias Bjørling880eda52018-07-13 10:48:37 +0200346#ifdef CONFIG_NVM_PBLK_DEBUG
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200347 /* Caller must guarantee that the entry is free */
348 BUG_ON(!(flags & PBLK_WRITABLE_ENTRY));
349#endif
350
351 __pblk_rb_write_entry(rb, data, w_ctx, entry);
352
353 pblk_update_map_cache(pblk, w_ctx.lba, entry->cacheline);
354 flags = w_ctx.flags | PBLK_WRITTEN_DATA;
355
356 /* Release flags on write context. Protect from writes */
357 smp_store_release(&entry->w_ctx.flags, flags);
358}
359
360void pblk_rb_write_entry_gc(struct pblk_rb *rb, void *data,
Javier Gonzálezd3401212017-10-13 14:46:14 +0200361 struct pblk_w_ctx w_ctx, struct pblk_line *line,
362 u64 paddr, unsigned int ring_pos)
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200363{
364 struct pblk *pblk = container_of(rb, struct pblk, rwb);
365 struct pblk_rb_entry *entry;
366 int flags;
367
368 entry = &rb->entries[ring_pos];
369 flags = READ_ONCE(entry->w_ctx.flags);
Matias Bjørling880eda52018-07-13 10:48:37 +0200370#ifdef CONFIG_NVM_PBLK_DEBUG
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200371 /* Caller must guarantee that the entry is free */
372 BUG_ON(!(flags & PBLK_WRITABLE_ENTRY));
373#endif
374
375 __pblk_rb_write_entry(rb, data, w_ctx, entry);
376
Javier Gonzálezd3401212017-10-13 14:46:14 +0200377 if (!pblk_update_map_gc(pblk, w_ctx.lba, entry->cacheline, line, paddr))
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200378 entry->w_ctx.lba = ADDR_EMPTY;
379
380 flags = w_ctx.flags | PBLK_WRITTEN_DATA;
381
382 /* Release flags on write context. Protect from writes */
383 smp_store_release(&entry->w_ctx.flags, flags);
384}
385
Hans Holmberg8154d292018-01-05 14:16:09 +0100386static int pblk_rb_flush_point_set(struct pblk_rb *rb, struct bio *bio,
Hans Holmbergcc9c9a02018-06-01 16:41:13 +0200387 unsigned int pos)
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200388{
389 struct pblk_rb_entry *entry;
Hans Holmberg533657c12018-01-05 14:16:10 +0100390 unsigned int sync, flush_point;
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200391
Hans Holmberg3c05ef112018-03-30 00:04:58 +0200392 pblk_rb_sync_init(rb, NULL);
Hans Holmberg533657c12018-01-05 14:16:10 +0100393 sync = READ_ONCE(rb->sync);
394
Hans Holmberg3c05ef112018-03-30 00:04:58 +0200395 if (pos == sync) {
396 pblk_rb_sync_end(rb, NULL);
Hans Holmberg533657c12018-01-05 14:16:10 +0100397 return 0;
Hans Holmberg3c05ef112018-03-30 00:04:58 +0200398 }
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200399
Matias Bjørling880eda52018-07-13 10:48:37 +0200400#ifdef CONFIG_NVM_PBLK_DEBUG
Hans Holmberg8154d292018-01-05 14:16:09 +0100401 atomic_inc(&rb->inflight_flush_point);
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200402#endif
403
Hans Holmberg8154d292018-01-05 14:16:09 +0100404 flush_point = (pos == 0) ? (rb->nr_entries - 1) : (pos - 1);
405 entry = &rb->entries[flush_point];
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200406
Hans Holmberg8154d292018-01-05 14:16:09 +0100407 /* Protect flush points */
408 smp_store_release(&rb->flush_point, flush_point);
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200409
Hans Holmbergb36bbf92018-01-05 14:16:11 +0100410 if (bio)
411 bio_list_add(&entry->w_ctx.bios, bio);
Javier González588726d32017-06-26 11:57:29 +0200412
Hans Holmbergb36bbf92018-01-05 14:16:11 +0100413 pblk_rb_sync_end(rb, NULL);
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200414
Hans Holmbergb36bbf92018-01-05 14:16:11 +0100415 return bio ? 1 : 0;
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200416}
417
418static int __pblk_rb_may_write(struct pblk_rb *rb, unsigned int nr_entries,
419 unsigned int *pos)
420{
421 unsigned int mem;
422 unsigned int sync;
Javier González766c8ce2018-10-09 13:12:15 +0200423 unsigned int threshold;
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200424
425 sync = READ_ONCE(rb->sync);
426 mem = READ_ONCE(rb->mem);
427
Javier González766c8ce2018-10-09 13:12:15 +0200428 threshold = nr_entries + rb->back_thres;
429
430 if (pblk_rb_ring_space(rb, mem, sync, rb->nr_entries) < threshold)
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200431 return 0;
432
433 if (pblk_rb_update_l2p(rb, nr_entries, mem, sync))
434 return 0;
435
436 *pos = mem;
437
438 return 1;
439}
440
441static int pblk_rb_may_write(struct pblk_rb *rb, unsigned int nr_entries,
442 unsigned int *pos)
443{
444 if (!__pblk_rb_may_write(rb, nr_entries, pos))
445 return 0;
446
447 /* Protect from read count */
Javier González40b86572018-10-09 13:12:09 +0200448 smp_store_release(&rb->mem, pblk_rb_ptr_wrap(rb, *pos, nr_entries));
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200449 return 1;
450}
451
Javier González588726d32017-06-26 11:57:29 +0200452void pblk_rb_flush(struct pblk_rb *rb)
453{
454 struct pblk *pblk = container_of(rb, struct pblk, rwb);
455 unsigned int mem = READ_ONCE(rb->mem);
456
Hans Holmberg8154d292018-01-05 14:16:09 +0100457 if (pblk_rb_flush_point_set(rb, NULL, mem))
Javier González588726d32017-06-26 11:57:29 +0200458 return;
459
Hans Holmbergcc9c9a02018-06-01 16:41:13 +0200460 pblk_write_kick(pblk);
Javier González588726d32017-06-26 11:57:29 +0200461}
462
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200463static int pblk_rb_may_write_flush(struct pblk_rb *rb, unsigned int nr_entries,
464 unsigned int *pos, struct bio *bio,
465 int *io_ret)
466{
467 unsigned int mem;
468
469 if (!__pblk_rb_may_write(rb, nr_entries, pos))
470 return 0;
471
Javier González40b86572018-10-09 13:12:09 +0200472 mem = pblk_rb_ptr_wrap(rb, *pos, nr_entries);
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200473 *io_ret = NVM_IO_DONE;
474
475 if (bio->bi_opf & REQ_PREFLUSH) {
476 struct pblk *pblk = container_of(rb, struct pblk, rwb);
477
Hans Holmberg5d149bf2018-03-30 00:04:55 +0200478 atomic64_inc(&pblk->nr_flush);
Hans Holmberg8154d292018-01-05 14:16:09 +0100479 if (pblk_rb_flush_point_set(&pblk->rwb, bio, mem))
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200480 *io_ret = NVM_IO_OK;
481 }
482
483 /* Protect from read count */
484 smp_store_release(&rb->mem, mem);
Javier González6ca2f712017-10-13 14:46:17 +0200485
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200486 return 1;
487}
488
489/*
490 * Atomically check that (i) there is space on the write buffer for the
491 * incoming I/O, and (ii) the current I/O type has enough budget in the write
492 * buffer (rate-limiter).
493 */
494int pblk_rb_may_write_user(struct pblk_rb *rb, struct bio *bio,
495 unsigned int nr_entries, unsigned int *pos)
496{
497 struct pblk *pblk = container_of(rb, struct pblk, rwb);
Javier González588726d32017-06-26 11:57:29 +0200498 int io_ret;
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200499
500 spin_lock(&rb->w_lock);
Javier González588726d32017-06-26 11:57:29 +0200501 io_ret = pblk_rl_user_may_insert(&pblk->rl, nr_entries);
502 if (io_ret) {
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200503 spin_unlock(&rb->w_lock);
Javier González588726d32017-06-26 11:57:29 +0200504 return io_ret;
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200505 }
506
Javier González588726d32017-06-26 11:57:29 +0200507 if (!pblk_rb_may_write_flush(rb, nr_entries, pos, bio, &io_ret)) {
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200508 spin_unlock(&rb->w_lock);
509 return NVM_IO_REQUEUE;
510 }
511
512 pblk_rl_user_in(&pblk->rl, nr_entries);
513 spin_unlock(&rb->w_lock);
514
Javier González588726d32017-06-26 11:57:29 +0200515 return io_ret;
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200516}
517
518/*
519 * Look at pblk_rb_may_write_user comment
520 */
521int pblk_rb_may_write_gc(struct pblk_rb *rb, unsigned int nr_entries,
522 unsigned int *pos)
523{
524 struct pblk *pblk = container_of(rb, struct pblk, rwb);
525
526 spin_lock(&rb->w_lock);
527 if (!pblk_rl_gc_may_insert(&pblk->rl, nr_entries)) {
528 spin_unlock(&rb->w_lock);
529 return 0;
530 }
531
532 if (!pblk_rb_may_write(rb, nr_entries, pos)) {
533 spin_unlock(&rb->w_lock);
534 return 0;
535 }
536
537 pblk_rl_gc_in(&pblk->rl, nr_entries);
538 spin_unlock(&rb->w_lock);
539
540 return 1;
541}
542
543/*
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200544 * Read available entries on rb and add them to the given bio. To avoid a memory
545 * copy, a page reference to the write buffer is used to be added to the bio.
546 *
547 * This function is used by the write thread to form the write bio that will
548 * persist data on the write buffer to the media.
549 */
Javier Gonzálezd624f372017-06-26 11:57:15 +0200550unsigned int pblk_rb_read_to_bio(struct pblk_rb *rb, struct nvm_rq *rqd,
Javier González875d94f2017-10-13 14:46:18 +0200551 unsigned int pos, unsigned int nr_entries,
552 unsigned int count)
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200553{
554 struct pblk *pblk = container_of(rb, struct pblk, rwb);
Javier Gonzálezd624f372017-06-26 11:57:15 +0200555 struct request_queue *q = pblk->dev->q;
556 struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
Javier González875d94f2017-10-13 14:46:18 +0200557 struct bio *bio = rqd->bio;
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200558 struct pblk_rb_entry *entry;
559 struct page *page;
Javier Gonzálezd624f372017-06-26 11:57:15 +0200560 unsigned int pad = 0, to_read = nr_entries;
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200561 unsigned int i;
562 int flags;
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200563
564 if (count < nr_entries) {
565 pad = nr_entries - count;
566 to_read = count;
567 }
568
Igor Konopko55d8ec32018-12-11 20:16:26 +0100569 /* Add space for packed metadata if in use*/
570 pad += (pblk->min_write_pgs - pblk->min_write_pgs_data);
571
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200572 c_ctx->sentry = pos;
573 c_ctx->nr_valid = to_read;
574 c_ctx->nr_padded = pad;
575
576 for (i = 0; i < to_read; i++) {
577 entry = &rb->entries[pos];
578
579 /* A write has been allowed into the buffer, but data is still
580 * being copied to it. It is ok to busy wait.
581 */
582try:
583 flags = READ_ONCE(entry->w_ctx.flags);
Javier González10888122017-06-30 17:56:37 +0200584 if (!(flags & PBLK_WRITTEN_DATA)) {
585 io_schedule();
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200586 goto try;
Javier González10888122017-06-30 17:56:37 +0200587 }
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200588
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200589 page = virt_to_page(entry->data);
590 if (!page) {
Matias Bjørling4e495a42018-07-13 10:48:42 +0200591 pblk_err(pblk, "could not allocate write bio page\n");
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200592 flags &= ~PBLK_WRITTEN_DATA;
593 flags |= PBLK_SUBMITTED_ENTRY;
594 /* Release flags on context. Protect from writes */
595 smp_store_release(&entry->w_ctx.flags, flags);
Javier Gonzálezd624f372017-06-26 11:57:15 +0200596 return NVM_IO_ERR;
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200597 }
598
Javier Gonzálezd624f372017-06-26 11:57:15 +0200599 if (bio_add_pc_page(q, bio, page, rb->seg_size, 0) !=
600 rb->seg_size) {
Matias Bjørling4e495a42018-07-13 10:48:42 +0200601 pblk_err(pblk, "could not add page to write bio\n");
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200602 flags &= ~PBLK_WRITTEN_DATA;
603 flags |= PBLK_SUBMITTED_ENTRY;
604 /* Release flags on context. Protect from writes */
605 smp_store_release(&entry->w_ctx.flags, flags);
Javier Gonzálezd624f372017-06-26 11:57:15 +0200606 return NVM_IO_ERR;
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200607 }
608
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200609 flags &= ~PBLK_WRITTEN_DATA;
610 flags |= PBLK_SUBMITTED_ENTRY;
611
612 /* Release flags on context. Protect from writes */
613 smp_store_release(&entry->w_ctx.flags, flags);
614
Javier González40b86572018-10-09 13:12:09 +0200615 pos = pblk_rb_ptr_wrap(rb, pos, 1);
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200616 }
617
Javier Gonzálezd624f372017-06-26 11:57:15 +0200618 if (pad) {
619 if (pblk_bio_add_pages(pblk, bio, GFP_KERNEL, pad)) {
Matias Bjørling4e495a42018-07-13 10:48:42 +0200620 pblk_err(pblk, "could not pad page in write bio\n");
Javier Gonzálezd624f372017-06-26 11:57:15 +0200621 return NVM_IO_ERR;
622 }
Hans Holmberg5d149bf2018-03-30 00:04:55 +0200623
624 if (pad < pblk->min_write_pgs)
625 atomic64_inc(&pblk->pad_dist[pad - 1]);
626 else
Matias Bjørling4e495a42018-07-13 10:48:42 +0200627 pblk_warn(pblk, "padding more than min. sectors\n");
Hans Holmberg5d149bf2018-03-30 00:04:55 +0200628
629 atomic64_add(pad, &pblk->pad_wa);
Javier Gonzálezd624f372017-06-26 11:57:15 +0200630 }
631
Matias Bjørling880eda52018-07-13 10:48:37 +0200632#ifdef CONFIG_NVM_PBLK_DEBUG
Hans Holmberg5d149bf2018-03-30 00:04:55 +0200633 atomic_long_add(pad, &pblk->padded_writes);
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200634#endif
Javier Gonzálezd624f372017-06-26 11:57:15 +0200635
636 return NVM_IO_OK;
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200637}
638
639/*
640 * Copy to bio only if the lba matches the one on the given cache entry.
641 * Otherwise, it means that the entry has been overwritten, and the bio should
642 * be directed to disk.
643 */
644int pblk_rb_copy_to_bio(struct pblk_rb *rb, struct bio *bio, sector_t lba,
Javier González75cb8e92017-07-28 15:13:16 +0200645 struct ppa_addr ppa, int bio_iter, bool advanced_bio)
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200646{
Javier González076984662017-06-30 17:56:42 +0200647 struct pblk *pblk = container_of(rb, struct pblk, rwb);
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200648 struct pblk_rb_entry *entry;
649 struct pblk_w_ctx *w_ctx;
Javier González076984662017-06-30 17:56:42 +0200650 struct ppa_addr l2p_ppa;
651 u64 pos = pblk_addr_to_cacheline(ppa);
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200652 void *data;
653 int flags;
654 int ret = 1;
655
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200656
Matias Bjørling880eda52018-07-13 10:48:37 +0200657#ifdef CONFIG_NVM_PBLK_DEBUG
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200658 /* Caller must ensure that the access will not cause an overflow */
659 BUG_ON(pos >= rb->nr_entries);
660#endif
661 entry = &rb->entries[pos];
662 w_ctx = &entry->w_ctx;
663 flags = READ_ONCE(w_ctx->flags);
664
Javier González076984662017-06-30 17:56:42 +0200665 spin_lock(&rb->w_lock);
666 spin_lock(&pblk->trans_lock);
667 l2p_ppa = pblk_trans_map_get(pblk, lba);
668 spin_unlock(&pblk->trans_lock);
669
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200670 /* Check if the entry has been overwritten or is scheduled to be */
Javier González076984662017-06-30 17:56:42 +0200671 if (!pblk_ppa_comp(l2p_ppa, ppa) || w_ctx->lba != lba ||
672 flags & PBLK_WRITABLE_ENTRY) {
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200673 ret = 0;
674 goto out;
675 }
676
677 /* Only advance the bio if it hasn't been advanced already. If advanced,
678 * this bio is at least a partial bio (i.e., it has partially been
679 * filled with data from the cache). If part of the data resides on the
680 * media, we will read later on
681 */
Javier González75cb8e92017-07-28 15:13:16 +0200682 if (unlikely(!advanced_bio))
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200683 bio_advance(bio, bio_iter * PBLK_EXPOSED_PAGE_SIZE);
684
685 data = bio_data(bio);
686 memcpy(data, entry->data, rb->seg_size);
687
688out:
689 spin_unlock(&rb->w_lock);
690 return ret;
691}
692
693struct pblk_w_ctx *pblk_rb_w_ctx(struct pblk_rb *rb, unsigned int pos)
694{
Javier González40b86572018-10-09 13:12:09 +0200695 unsigned int entry = pblk_rb_ptr_wrap(rb, pos, 0);
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200696
697 return &rb->entries[entry].w_ctx;
698}
699
700unsigned int pblk_rb_sync_init(struct pblk_rb *rb, unsigned long *flags)
701 __acquires(&rb->s_lock)
702{
703 if (flags)
704 spin_lock_irqsave(&rb->s_lock, *flags);
705 else
706 spin_lock_irq(&rb->s_lock);
707
708 return rb->sync;
709}
710
711void pblk_rb_sync_end(struct pblk_rb *rb, unsigned long *flags)
712 __releases(&rb->s_lock)
713{
714 lockdep_assert_held(&rb->s_lock);
715
716 if (flags)
717 spin_unlock_irqrestore(&rb->s_lock, *flags);
718 else
719 spin_unlock_irq(&rb->s_lock);
720}
721
722unsigned int pblk_rb_sync_advance(struct pblk_rb *rb, unsigned int nr_entries)
723{
Hans Holmberg533657c12018-01-05 14:16:10 +0100724 unsigned int sync, flush_point;
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200725 lockdep_assert_held(&rb->s_lock);
726
727 sync = READ_ONCE(rb->sync);
Hans Holmberg533657c12018-01-05 14:16:10 +0100728 flush_point = READ_ONCE(rb->flush_point);
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200729
Hans Holmberg533657c12018-01-05 14:16:10 +0100730 if (flush_point != EMPTY_ENTRY) {
731 unsigned int secs_to_flush;
732
733 secs_to_flush = pblk_rb_ring_count(flush_point, sync,
734 rb->nr_entries);
735 if (secs_to_flush < nr_entries) {
736 /* Protect flush points */
737 smp_store_release(&rb->flush_point, EMPTY_ENTRY);
738 }
739 }
740
Javier González40b86572018-10-09 13:12:09 +0200741 sync = pblk_rb_ptr_wrap(rb, sync, nr_entries);
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200742
743 /* Protect from counts */
744 smp_store_release(&rb->sync, sync);
745
746 return sync;
747}
748
Hans Holmberg533657c12018-01-05 14:16:10 +0100749/* Calculate how many sectors to submit up to the current flush point. */
Hans Holmberg8154d292018-01-05 14:16:09 +0100750unsigned int pblk_rb_flush_point_count(struct pblk_rb *rb)
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200751{
Hans Holmberg533657c12018-01-05 14:16:10 +0100752 unsigned int subm, sync, flush_point;
753 unsigned int submitted, to_flush;
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200754
Hans Holmberg8154d292018-01-05 14:16:09 +0100755 /* Protect flush points */
756 flush_point = smp_load_acquire(&rb->flush_point);
757 if (flush_point == EMPTY_ENTRY)
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200758 return 0;
759
Hans Holmberg533657c12018-01-05 14:16:10 +0100760 /* Protect syncs */
761 sync = smp_load_acquire(&rb->sync);
762
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200763 subm = READ_ONCE(rb->subm);
Hans Holmberg533657c12018-01-05 14:16:10 +0100764 submitted = pblk_rb_ring_count(subm, sync, rb->nr_entries);
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200765
766 /* The sync point itself counts as a sector to sync */
Hans Holmberg533657c12018-01-05 14:16:10 +0100767 to_flush = pblk_rb_ring_count(flush_point, sync, rb->nr_entries) + 1;
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200768
Hans Holmberg533657c12018-01-05 14:16:10 +0100769 return (submitted < to_flush) ? (to_flush - submitted) : 0;
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200770}
771
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200772int pblk_rb_tear_down_check(struct pblk_rb *rb)
773{
774 struct pblk_rb_entry *entry;
775 int i;
776 int ret = 0;
777
778 spin_lock(&rb->w_lock);
779 spin_lock_irq(&rb->s_lock);
780
781 if ((rb->mem == rb->subm) && (rb->subm == rb->sync) &&
782 (rb->sync == rb->l2p_update) &&
Hans Holmberg8154d292018-01-05 14:16:09 +0100783 (rb->flush_point == EMPTY_ENTRY)) {
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200784 goto out;
785 }
786
787 if (!rb->entries) {
788 ret = 1;
789 goto out;
790 }
791
792 for (i = 0; i < rb->nr_entries; i++) {
793 entry = &rb->entries[i];
794
795 if (!entry->data) {
796 ret = 1;
797 goto out;
798 }
799 }
800
801out:
802 spin_unlock(&rb->w_lock);
803 spin_unlock_irq(&rb->s_lock);
804
805 return ret;
806}
807
808unsigned int pblk_rb_wrap_pos(struct pblk_rb *rb, unsigned int pos)
809{
810 return (pos & (rb->nr_entries - 1));
811}
812
813int pblk_rb_pos_oob(struct pblk_rb *rb, u64 pos)
814{
815 return (pos >= rb->nr_entries);
816}
817
818ssize_t pblk_rb_sysfs(struct pblk_rb *rb, char *buf)
819{
820 struct pblk *pblk = container_of(rb, struct pblk, rwb);
821 struct pblk_c_ctx *c;
822 ssize_t offset;
823 int queued_entries = 0;
824
825 spin_lock_irq(&rb->s_lock);
826 list_for_each_entry(c, &pblk->compl_list, list)
827 queued_entries++;
828 spin_unlock_irq(&rb->s_lock);
829
Hans Holmberg8154d292018-01-05 14:16:09 +0100830 if (rb->flush_point != EMPTY_ENTRY)
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200831 offset = scnprintf(buf, PAGE_SIZE,
832 "%u\t%u\t%u\t%u\t%u\t%u\t%u - %u/%u/%u - %d\n",
833 rb->nr_entries,
834 rb->mem,
835 rb->subm,
836 rb->sync,
837 rb->l2p_update,
Matias Bjørling880eda52018-07-13 10:48:37 +0200838#ifdef CONFIG_NVM_PBLK_DEBUG
Hans Holmberg8154d292018-01-05 14:16:09 +0100839 atomic_read(&rb->inflight_flush_point),
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200840#else
841 0,
842#endif
Hans Holmberg8154d292018-01-05 14:16:09 +0100843 rb->flush_point,
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200844 pblk_rb_read_count(rb),
845 pblk_rb_space(rb),
Hans Holmberg8154d292018-01-05 14:16:09 +0100846 pblk_rb_flush_point_count(rb),
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200847 queued_entries);
848 else
849 offset = scnprintf(buf, PAGE_SIZE,
850 "%u\t%u\t%u\t%u\t%u\t%u\tNULL - %u/%u/%u - %d\n",
851 rb->nr_entries,
852 rb->mem,
853 rb->subm,
854 rb->sync,
855 rb->l2p_update,
Matias Bjørling880eda52018-07-13 10:48:37 +0200856#ifdef CONFIG_NVM_PBLK_DEBUG
Hans Holmberg8154d292018-01-05 14:16:09 +0100857 atomic_read(&rb->inflight_flush_point),
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200858#else
859 0,
860#endif
861 pblk_rb_read_count(rb),
862 pblk_rb_space(rb),
Hans Holmberg8154d292018-01-05 14:16:09 +0100863 pblk_rb_flush_point_count(rb),
Javier Gonzáleza4bd2172017-04-15 20:55:50 +0200864 queued_entries);
865
866 return offset;
867}