blob: 753986ea1583b3139e22ef6edb94f57d9a0beec9 [file] [log] [blame]
David Howells287fd612021-10-21 11:05:53 +01001// SPDX-License-Identifier: GPL-2.0-or-later
2/* kiocb-using read/write
3 *
4 * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved.
5 * Written by David Howells (dhowells@redhat.com)
6 */
7
8#include <linux/mount.h>
9#include <linux/slab.h>
10#include <linux/file.h>
11#include <linux/uio.h>
12#include <linux/falloc.h>
13#include <linux/sched/mm.h>
14#include <trace/events/fscache.h>
15#include "internal.h"
16
David Howells047487c2021-10-21 00:19:46 +010017struct cachefiles_kiocb {
18 struct kiocb iocb;
19 refcount_t ki_refcnt;
20 loff_t start;
21 union {
22 size_t skipped;
23 size_t len;
24 };
25 struct cachefiles_object *object;
26 netfs_io_terminated_t term_func;
27 void *term_func_priv;
28 bool was_async;
29 unsigned int inval_counter; /* Copy of cookie->inval_counter */
30 u64 b_writing;
31};
32
33static inline void cachefiles_put_kiocb(struct cachefiles_kiocb *ki)
34{
35 if (refcount_dec_and_test(&ki->ki_refcnt)) {
36 cachefiles_put_object(ki->object, cachefiles_obj_put_ioreq);
37 fput(ki->iocb.ki_filp);
38 kfree(ki);
39 }
40}
41
42/*
43 * Handle completion of a read from the cache.
44 */
45static void cachefiles_read_complete(struct kiocb *iocb, long ret)
46{
47 struct cachefiles_kiocb *ki = container_of(iocb, struct cachefiles_kiocb, iocb);
48 struct inode *inode = file_inode(ki->iocb.ki_filp);
49
50 _enter("%ld", ret);
51
52 if (ret < 0)
53 trace_cachefiles_io_error(ki->object, inode, ret,
54 cachefiles_trace_read_error);
55
56 if (ki->term_func) {
57 if (ret >= 0) {
58 if (ki->object->cookie->inval_counter == ki->inval_counter)
59 ki->skipped += ret;
60 else
61 ret = -ESTALE;
62 }
63
64 ki->term_func(ki->term_func_priv, ret, ki->was_async);
65 }
66
67 cachefiles_put_kiocb(ki);
68}
69
70/*
71 * Initiate a read from the cache.
72 */
73static int cachefiles_read(struct netfs_cache_resources *cres,
74 loff_t start_pos,
75 struct iov_iter *iter,
76 enum netfs_read_from_hole read_hole,
77 netfs_io_terminated_t term_func,
78 void *term_func_priv)
79{
80 struct cachefiles_object *object;
81 struct cachefiles_kiocb *ki;
82 struct file *file;
83 unsigned int old_nofs;
84 ssize_t ret = -ENOBUFS;
85 size_t len = iov_iter_count(iter), skipped = 0;
86
87 if (!fscache_wait_for_operation(cres, FSCACHE_WANT_READ))
88 goto presubmission_error;
89
90 fscache_count_read();
91 object = cachefiles_cres_object(cres);
92 file = cachefiles_cres_file(cres);
93
94 _enter("%pD,%li,%llx,%zx/%llx",
95 file, file_inode(file)->i_ino, start_pos, len,
96 i_size_read(file_inode(file)));
97
98 /* If the caller asked us to seek for data before doing the read, then
99 * we should do that now. If we find a gap, we fill it with zeros.
100 */
101 if (read_hole != NETFS_READ_HOLE_IGNORE) {
102 loff_t off = start_pos, off2;
103
104 off2 = cachefiles_inject_read_error();
105 if (off2 == 0)
106 off2 = vfs_llseek(file, off, SEEK_DATA);
107 if (off2 < 0 && off2 >= (loff_t)-MAX_ERRNO && off2 != -ENXIO) {
108 skipped = 0;
109 ret = off2;
110 goto presubmission_error;
111 }
112
113 if (off2 == -ENXIO || off2 >= start_pos + len) {
114 /* The region is beyond the EOF or there's no more data
115 * in the region, so clear the rest of the buffer and
116 * return success.
117 */
118 ret = -ENODATA;
119 if (read_hole == NETFS_READ_HOLE_FAIL)
120 goto presubmission_error;
121
122 iov_iter_zero(len, iter);
123 skipped = len;
124 ret = 0;
125 goto presubmission_error;
126 }
127
128 skipped = off2 - off;
129 iov_iter_zero(skipped, iter);
130 }
131
132 ret = -ENOMEM;
133 ki = kzalloc(sizeof(struct cachefiles_kiocb), GFP_KERNEL);
134 if (!ki)
135 goto presubmission_error;
136
137 refcount_set(&ki->ki_refcnt, 2);
138 ki->iocb.ki_filp = file;
139 ki->iocb.ki_pos = start_pos + skipped;
140 ki->iocb.ki_flags = IOCB_DIRECT;
141 ki->iocb.ki_hint = ki_hint_validate(file_write_hint(file));
142 ki->iocb.ki_ioprio = get_current_ioprio();
143 ki->skipped = skipped;
144 ki->object = object;
145 ki->inval_counter = cres->inval_counter;
146 ki->term_func = term_func;
147 ki->term_func_priv = term_func_priv;
148 ki->was_async = true;
149
150 if (ki->term_func)
151 ki->iocb.ki_complete = cachefiles_read_complete;
152
153 get_file(ki->iocb.ki_filp);
154 cachefiles_grab_object(object, cachefiles_obj_get_ioreq);
155
156 trace_cachefiles_read(object, file_inode(file), ki->iocb.ki_pos, len - skipped);
157 old_nofs = memalloc_nofs_save();
158 ret = cachefiles_inject_read_error();
159 if (ret == 0)
160 ret = vfs_iocb_iter_read(file, &ki->iocb, iter);
161 memalloc_nofs_restore(old_nofs);
162 switch (ret) {
163 case -EIOCBQUEUED:
164 goto in_progress;
165
166 case -ERESTARTSYS:
167 case -ERESTARTNOINTR:
168 case -ERESTARTNOHAND:
169 case -ERESTART_RESTARTBLOCK:
170 /* There's no easy way to restart the syscall since other AIO's
171 * may be already running. Just fail this IO with EINTR.
172 */
173 ret = -EINTR;
174 fallthrough;
175 default:
176 ki->was_async = false;
177 cachefiles_read_complete(&ki->iocb, ret);
178 if (ret > 0)
179 ret = 0;
180 break;
181 }
182
183in_progress:
184 cachefiles_put_kiocb(ki);
185 _leave(" = %zd", ret);
186 return ret;
187
188presubmission_error:
189 if (term_func)
190 term_func(term_func_priv, ret < 0 ? ret : skipped, false);
191 return ret;
192}
193
194/*
David Howellsbee9f652022-01-27 16:02:50 +0000195 * Query the occupancy of the cache in a region, returning where the next chunk
196 * of data starts and how long it is.
197 */
198static int cachefiles_query_occupancy(struct netfs_cache_resources *cres,
199 loff_t start, size_t len, size_t granularity,
200 loff_t *_data_start, size_t *_data_len)
201{
202 struct cachefiles_object *object;
203 struct file *file;
204 loff_t off, off2;
205
206 *_data_start = -1;
207 *_data_len = 0;
208
209 if (!fscache_wait_for_operation(cres, FSCACHE_WANT_READ))
210 return -ENOBUFS;
211
212 object = cachefiles_cres_object(cres);
213 file = cachefiles_cres_file(cres);
214 granularity = max_t(size_t, object->volume->cache->bsize, granularity);
215
216 _enter("%pD,%li,%llx,%zx/%llx",
217 file, file_inode(file)->i_ino, start, len,
218 i_size_read(file_inode(file)));
219
220 off = cachefiles_inject_read_error();
221 if (off == 0)
222 off = vfs_llseek(file, start, SEEK_DATA);
223 if (off == -ENXIO)
224 return -ENODATA; /* Beyond EOF */
225 if (off < 0 && off >= (loff_t)-MAX_ERRNO)
226 return -ENOBUFS; /* Error. */
227 if (round_up(off, granularity) >= start + len)
228 return -ENODATA; /* No data in range */
229
230 off2 = cachefiles_inject_read_error();
231 if (off2 == 0)
232 off2 = vfs_llseek(file, off, SEEK_HOLE);
233 if (off2 == -ENXIO)
234 return -ENODATA; /* Beyond EOF */
235 if (off2 < 0 && off2 >= (loff_t)-MAX_ERRNO)
236 return -ENOBUFS; /* Error. */
237
238 /* Round away partial blocks */
239 off = round_up(off, granularity);
240 off2 = round_down(off2, granularity);
241 if (off2 <= off)
242 return -ENODATA;
243
244 *_data_start = off;
245 if (off2 > start + len)
246 *_data_len = len;
247 else
248 *_data_len = off2 - off;
249 return 0;
250}
251
252/*
David Howells047487c2021-10-21 00:19:46 +0100253 * Handle completion of a write to the cache.
254 */
255static void cachefiles_write_complete(struct kiocb *iocb, long ret)
256{
257 struct cachefiles_kiocb *ki = container_of(iocb, struct cachefiles_kiocb, iocb);
258 struct cachefiles_object *object = ki->object;
259 struct inode *inode = file_inode(ki->iocb.ki_filp);
260
261 _enter("%ld", ret);
262
263 /* Tell lockdep we inherited freeze protection from submission thread */
264 __sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE);
265 __sb_end_write(inode->i_sb, SB_FREEZE_WRITE);
266
267 if (ret < 0)
268 trace_cachefiles_io_error(object, inode, ret,
269 cachefiles_trace_write_error);
270
271 atomic_long_sub(ki->b_writing, &object->volume->cache->b_writing);
272 set_bit(FSCACHE_COOKIE_HAVE_DATA, &object->cookie->flags);
273 if (ki->term_func)
274 ki->term_func(ki->term_func_priv, ret, ki->was_async);
275 cachefiles_put_kiocb(ki);
276}
277
278/*
279 * Initiate a write to the cache.
280 */
281static int cachefiles_write(struct netfs_cache_resources *cres,
282 loff_t start_pos,
283 struct iov_iter *iter,
284 netfs_io_terminated_t term_func,
285 void *term_func_priv)
286{
287 struct cachefiles_object *object;
288 struct cachefiles_cache *cache;
289 struct cachefiles_kiocb *ki;
290 struct inode *inode;
291 struct file *file;
292 unsigned int old_nofs;
293 ssize_t ret = -ENOBUFS;
294 size_t len = iov_iter_count(iter);
295
296 if (!fscache_wait_for_operation(cres, FSCACHE_WANT_WRITE))
297 goto presubmission_error;
298 fscache_count_write();
299 object = cachefiles_cres_object(cres);
300 cache = object->volume->cache;
301 file = cachefiles_cres_file(cres);
302
303 _enter("%pD,%li,%llx,%zx/%llx",
304 file, file_inode(file)->i_ino, start_pos, len,
305 i_size_read(file_inode(file)));
306
307 ret = -ENOMEM;
308 ki = kzalloc(sizeof(struct cachefiles_kiocb), GFP_KERNEL);
309 if (!ki)
310 goto presubmission_error;
311
312 refcount_set(&ki->ki_refcnt, 2);
313 ki->iocb.ki_filp = file;
314 ki->iocb.ki_pos = start_pos;
315 ki->iocb.ki_flags = IOCB_DIRECT | IOCB_WRITE;
316 ki->iocb.ki_hint = ki_hint_validate(file_write_hint(file));
317 ki->iocb.ki_ioprio = get_current_ioprio();
318 ki->object = object;
319 ki->inval_counter = cres->inval_counter;
320 ki->start = start_pos;
321 ki->len = len;
322 ki->term_func = term_func;
323 ki->term_func_priv = term_func_priv;
324 ki->was_async = true;
David Howells5638b062022-01-14 14:13:59 +0000325 ki->b_writing = (len + (1 << cache->bshift) - 1) >> cache->bshift;
David Howells047487c2021-10-21 00:19:46 +0100326
327 if (ki->term_func)
328 ki->iocb.ki_complete = cachefiles_write_complete;
329 atomic_long_add(ki->b_writing, &cache->b_writing);
330
331 /* Open-code file_start_write here to grab freeze protection, which
332 * will be released by another thread in aio_complete_rw(). Fool
333 * lockdep by telling it the lock got released so that it doesn't
334 * complain about the held lock when we return to userspace.
335 */
336 inode = file_inode(file);
337 __sb_start_write(inode->i_sb, SB_FREEZE_WRITE);
338 __sb_writers_release(inode->i_sb, SB_FREEZE_WRITE);
339
340 get_file(ki->iocb.ki_filp);
341 cachefiles_grab_object(object, cachefiles_obj_get_ioreq);
342
343 trace_cachefiles_write(object, inode, ki->iocb.ki_pos, len);
344 old_nofs = memalloc_nofs_save();
345 ret = cachefiles_inject_write_error();
346 if (ret == 0)
347 ret = vfs_iocb_iter_write(file, &ki->iocb, iter);
348 memalloc_nofs_restore(old_nofs);
349 switch (ret) {
350 case -EIOCBQUEUED:
351 goto in_progress;
352
353 case -ERESTARTSYS:
354 case -ERESTARTNOINTR:
355 case -ERESTARTNOHAND:
356 case -ERESTART_RESTARTBLOCK:
357 /* There's no easy way to restart the syscall since other AIO's
358 * may be already running. Just fail this IO with EINTR.
359 */
360 ret = -EINTR;
361 fallthrough;
362 default:
363 ki->was_async = false;
364 cachefiles_write_complete(&ki->iocb, ret);
365 if (ret > 0)
366 ret = 0;
367 break;
368 }
369
370in_progress:
371 cachefiles_put_kiocb(ki);
372 _leave(" = %zd", ret);
373 return ret;
374
375presubmission_error:
376 if (term_func)
377 term_func(term_func_priv, ret, false);
378 return ret;
379}
380
381/*
382 * Prepare a read operation, shortening it to a cached/uncached
383 * boundary as appropriate.
384 */
385static enum netfs_read_source cachefiles_prepare_read(struct netfs_read_subrequest *subreq,
386 loff_t i_size)
387{
388 enum cachefiles_prepare_read_trace why;
389 struct netfs_read_request *rreq = subreq->rreq;
390 struct netfs_cache_resources *cres = &rreq->cache_resources;
391 struct cachefiles_object *object;
392 struct cachefiles_cache *cache;
393 struct fscache_cookie *cookie = fscache_cres_cookie(cres);
394 const struct cred *saved_cred;
395 struct file *file = cachefiles_cres_file(cres);
396 enum netfs_read_source ret = NETFS_DOWNLOAD_FROM_SERVER;
397 loff_t off, to;
398 ino_t ino = file ? file_inode(file)->i_ino : 0;
399
400 _enter("%zx @%llx/%llx", subreq->len, subreq->start, i_size);
401
402 if (subreq->start >= i_size) {
403 ret = NETFS_FILL_WITH_ZEROES;
404 why = cachefiles_trace_read_after_eof;
405 goto out_no_object;
406 }
407
408 if (test_bit(FSCACHE_COOKIE_NO_DATA_TO_READ, &cookie->flags)) {
409 __set_bit(NETFS_SREQ_WRITE_TO_CACHE, &subreq->flags);
410 why = cachefiles_trace_read_no_data;
411 goto out_no_object;
412 }
413
414 /* The object and the file may be being created in the background. */
415 if (!file) {
416 why = cachefiles_trace_read_no_file;
417 if (!fscache_wait_for_operation(cres, FSCACHE_WANT_READ))
418 goto out_no_object;
419 file = cachefiles_cres_file(cres);
420 if (!file)
421 goto out_no_object;
422 ino = file_inode(file)->i_ino;
423 }
424
425 object = cachefiles_cres_object(cres);
426 cache = object->volume->cache;
427 cachefiles_begin_secure(cache, &saved_cred);
428
429 off = cachefiles_inject_read_error();
430 if (off == 0)
431 off = vfs_llseek(file, subreq->start, SEEK_DATA);
432 if (off < 0 && off >= (loff_t)-MAX_ERRNO) {
433 if (off == (loff_t)-ENXIO) {
434 why = cachefiles_trace_read_seek_nxio;
435 goto download_and_store;
436 }
437 trace_cachefiles_io_error(object, file_inode(file), off,
438 cachefiles_trace_seek_error);
439 why = cachefiles_trace_read_seek_error;
440 goto out;
441 }
442
443 if (off >= subreq->start + subreq->len) {
444 why = cachefiles_trace_read_found_hole;
445 goto download_and_store;
446 }
447
448 if (off > subreq->start) {
449 off = round_up(off, cache->bsize);
450 subreq->len = off - subreq->start;
451 why = cachefiles_trace_read_found_part;
452 goto download_and_store;
453 }
454
455 to = cachefiles_inject_read_error();
456 if (to == 0)
457 to = vfs_llseek(file, subreq->start, SEEK_HOLE);
458 if (to < 0 && to >= (loff_t)-MAX_ERRNO) {
459 trace_cachefiles_io_error(object, file_inode(file), to,
460 cachefiles_trace_seek_error);
461 why = cachefiles_trace_read_seek_error;
462 goto out;
463 }
464
465 if (to < subreq->start + subreq->len) {
466 if (subreq->start + subreq->len >= i_size)
467 to = round_up(to, cache->bsize);
468 else
469 to = round_down(to, cache->bsize);
470 subreq->len = to - subreq->start;
471 }
472
473 why = cachefiles_trace_read_have_data;
474 ret = NETFS_READ_FROM_CACHE;
475 goto out;
476
477download_and_store:
478 __set_bit(NETFS_SREQ_WRITE_TO_CACHE, &subreq->flags);
479out:
480 cachefiles_end_secure(cache, saved_cred);
481out_no_object:
482 trace_cachefiles_prep_read(subreq, ret, why, ino);
483 return ret;
484}
485
486/*
487 * Prepare for a write to occur.
488 */
489static int __cachefiles_prepare_write(struct netfs_cache_resources *cres,
490 loff_t *_start, size_t *_len, loff_t i_size,
491 bool no_space_allocated_yet)
492{
493 struct cachefiles_object *object = cachefiles_cres_object(cres);
494 struct cachefiles_cache *cache = object->volume->cache;
495 struct file *file = cachefiles_cres_file(cres);
496 loff_t start = *_start, pos;
497 size_t len = *_len, down;
498 int ret;
499
500 /* Round to DIO size */
501 down = start - round_down(start, PAGE_SIZE);
502 *_start = start - down;
503 *_len = round_up(down + len, PAGE_SIZE);
504
505 /* We need to work out whether there's sufficient disk space to perform
506 * the write - but we can skip that check if we have space already
507 * allocated.
508 */
509 if (no_space_allocated_yet)
510 goto check_space;
511
512 pos = cachefiles_inject_read_error();
513 if (pos == 0)
514 pos = vfs_llseek(file, *_start, SEEK_DATA);
515 if (pos < 0 && pos >= (loff_t)-MAX_ERRNO) {
516 if (pos == -ENXIO)
517 goto check_space; /* Unallocated tail */
518 trace_cachefiles_io_error(object, file_inode(file), pos,
519 cachefiles_trace_seek_error);
520 return pos;
521 }
522 if ((u64)pos >= (u64)*_start + *_len)
523 goto check_space; /* Unallocated region */
524
525 /* We have a block that's at least partially filled - if we're low on
526 * space, we need to see if it's fully allocated. If it's not, we may
527 * want to cull it.
528 */
David Howells3929eca2021-10-21 21:58:29 +0100529 if (cachefiles_has_space(cache, 0, *_len / PAGE_SIZE,
530 cachefiles_has_space_check) == 0)
David Howells047487c2021-10-21 00:19:46 +0100531 return 0; /* Enough space to simply overwrite the whole block */
532
533 pos = cachefiles_inject_read_error();
534 if (pos == 0)
535 pos = vfs_llseek(file, *_start, SEEK_HOLE);
536 if (pos < 0 && pos >= (loff_t)-MAX_ERRNO) {
537 trace_cachefiles_io_error(object, file_inode(file), pos,
538 cachefiles_trace_seek_error);
539 return pos;
540 }
541 if ((u64)pos >= (u64)*_start + *_len)
542 return 0; /* Fully allocated */
543
544 /* Partially allocated, but insufficient space: cull. */
David Howells3929eca2021-10-21 21:58:29 +0100545 fscache_count_no_write_space();
David Howells047487c2021-10-21 00:19:46 +0100546 ret = cachefiles_inject_remove_error();
547 if (ret == 0)
548 ret = vfs_fallocate(file, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
549 *_start, *_len);
550 if (ret < 0) {
551 trace_cachefiles_io_error(object, file_inode(file), ret,
552 cachefiles_trace_fallocate_error);
553 cachefiles_io_error_obj(object,
554 "CacheFiles: fallocate failed (%d)\n", ret);
555 ret = -EIO;
556 }
557
558 return ret;
559
560check_space:
David Howells3929eca2021-10-21 21:58:29 +0100561 return cachefiles_has_space(cache, 0, *_len / PAGE_SIZE,
562 cachefiles_has_space_for_write);
David Howells047487c2021-10-21 00:19:46 +0100563}
564
565static int cachefiles_prepare_write(struct netfs_cache_resources *cres,
566 loff_t *_start, size_t *_len, loff_t i_size,
567 bool no_space_allocated_yet)
568{
569 struct cachefiles_object *object = cachefiles_cres_object(cres);
570 struct cachefiles_cache *cache = object->volume->cache;
571 const struct cred *saved_cred;
572 int ret;
573
574 if (!cachefiles_cres_file(cres)) {
575 if (!fscache_wait_for_operation(cres, FSCACHE_WANT_WRITE))
576 return -ENOBUFS;
577 if (!cachefiles_cres_file(cres))
578 return -ENOBUFS;
579 }
580
581 cachefiles_begin_secure(cache, &saved_cred);
582 ret = __cachefiles_prepare_write(cres, _start, _len, i_size,
583 no_space_allocated_yet);
584 cachefiles_end_secure(cache, saved_cred);
585 return ret;
586}
587
David Howells287fd612021-10-21 11:05:53 +0100588/*
589 * Clean up an operation.
590 */
591static void cachefiles_end_operation(struct netfs_cache_resources *cres)
592{
593 struct file *file = cachefiles_cres_file(cres);
594
595 if (file)
596 fput(file);
597 fscache_end_cookie_access(fscache_cres_cookie(cres), fscache_access_io_end);
598}
599
600static const struct netfs_cache_ops cachefiles_netfs_cache_ops = {
601 .end_operation = cachefiles_end_operation,
David Howells047487c2021-10-21 00:19:46 +0100602 .read = cachefiles_read,
603 .write = cachefiles_write,
604 .prepare_read = cachefiles_prepare_read,
605 .prepare_write = cachefiles_prepare_write,
David Howellsbee9f652022-01-27 16:02:50 +0000606 .query_occupancy = cachefiles_query_occupancy,
David Howells287fd612021-10-21 11:05:53 +0100607};
608
609/*
610 * Open the cache file when beginning a cache operation.
611 */
612bool cachefiles_begin_operation(struct netfs_cache_resources *cres,
613 enum fscache_want_state want_state)
614{
615 struct cachefiles_object *object = cachefiles_cres_object(cres);
616
617 if (!cachefiles_cres_file(cres)) {
618 cres->ops = &cachefiles_netfs_cache_ops;
619 if (object->file) {
620 spin_lock(&object->lock);
621 if (!cres->cache_priv2 && object->file)
622 cres->cache_priv2 = get_file(object->file);
623 spin_unlock(&object->lock);
624 }
625 }
626
627 if (!cachefiles_cres_file(cres) && want_state != FSCACHE_WANT_PARAMS) {
628 pr_err("failed to get cres->file\n");
629 return false;
630 }
631
632 return true;
633}