blob: effe37ef86291b9ee3e7260e23dd7287038af0d0 [file] [log] [blame]
David Howells26aaeff2021-02-22 11:39:47 +00001// SPDX-License-Identifier: GPL-2.0-or-later
2/* kiocb-using read/write
3 *
4 * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved.
5 * Written by David Howells (dhowells@redhat.com)
6 */
7
8#include <linux/mount.h>
9#include <linux/slab.h>
10#include <linux/file.h>
11#include <linux/uio.h>
12#include <linux/sched/mm.h>
13#include <linux/netfs.h>
14#include "internal.h"
15
16struct cachefiles_kiocb {
17 struct kiocb iocb;
18 refcount_t ki_refcnt;
19 loff_t start;
20 union {
21 size_t skipped;
22 size_t len;
23 };
24 netfs_io_terminated_t term_func;
25 void *term_func_priv;
26 bool was_async;
27};
28
29static inline void cachefiles_put_kiocb(struct cachefiles_kiocb *ki)
30{
31 if (refcount_dec_and_test(&ki->ki_refcnt)) {
32 fput(ki->iocb.ki_filp);
33 kfree(ki);
34 }
35}
36
37/*
38 * Handle completion of a read from the cache.
39 */
Jens Axboe6b19b762021-10-21 09:22:35 -060040static void cachefiles_read_complete(struct kiocb *iocb, long ret)
David Howells26aaeff2021-02-22 11:39:47 +000041{
42 struct cachefiles_kiocb *ki = container_of(iocb, struct cachefiles_kiocb, iocb);
43
Jens Axboe6b19b762021-10-21 09:22:35 -060044 _enter("%ld", ret);
David Howells26aaeff2021-02-22 11:39:47 +000045
46 if (ki->term_func) {
47 if (ret >= 0)
48 ret += ki->skipped;
49 ki->term_func(ki->term_func_priv, ret, ki->was_async);
50 }
51
52 cachefiles_put_kiocb(ki);
53}
54
55/*
56 * Initiate a read from the cache.
57 */
58static int cachefiles_read(struct netfs_cache_resources *cres,
59 loff_t start_pos,
60 struct iov_iter *iter,
61 bool seek_data,
62 netfs_io_terminated_t term_func,
63 void *term_func_priv)
64{
65 struct cachefiles_kiocb *ki;
66 struct file *file = cres->cache_priv2;
67 unsigned int old_nofs;
68 ssize_t ret = -ENOBUFS;
69 size_t len = iov_iter_count(iter), skipped = 0;
70
71 _enter("%pD,%li,%llx,%zx/%llx",
72 file, file_inode(file)->i_ino, start_pos, len,
David Howells18598192021-06-03 10:51:28 +010073 i_size_read(file_inode(file)));
David Howells26aaeff2021-02-22 11:39:47 +000074
75 /* If the caller asked us to seek for data before doing the read, then
76 * we should do that now. If we find a gap, we fill it with zeros.
77 */
78 if (seek_data) {
79 loff_t off = start_pos, off2;
80
81 off2 = vfs_llseek(file, off, SEEK_DATA);
82 if (off2 < 0 && off2 >= (loff_t)-MAX_ERRNO && off2 != -ENXIO) {
83 skipped = 0;
84 ret = off2;
85 goto presubmission_error;
86 }
87
88 if (off2 == -ENXIO || off2 >= start_pos + len) {
89 /* The region is beyond the EOF or there's no more data
90 * in the region, so clear the rest of the buffer and
91 * return success.
92 */
93 iov_iter_zero(len, iter);
94 skipped = len;
95 ret = 0;
96 goto presubmission_error;
97 }
98
99 skipped = off2 - off;
100 iov_iter_zero(skipped, iter);
101 }
102
103 ret = -ENOBUFS;
104 ki = kzalloc(sizeof(struct cachefiles_kiocb), GFP_KERNEL);
105 if (!ki)
106 goto presubmission_error;
107
108 refcount_set(&ki->ki_refcnt, 2);
109 ki->iocb.ki_filp = file;
110 ki->iocb.ki_pos = start_pos + skipped;
111 ki->iocb.ki_flags = IOCB_DIRECT;
112 ki->iocb.ki_hint = ki_hint_validate(file_write_hint(file));
113 ki->iocb.ki_ioprio = get_current_ioprio();
114 ki->skipped = skipped;
115 ki->term_func = term_func;
116 ki->term_func_priv = term_func_priv;
117 ki->was_async = true;
118
119 if (ki->term_func)
120 ki->iocb.ki_complete = cachefiles_read_complete;
121
122 get_file(ki->iocb.ki_filp);
123
124 old_nofs = memalloc_nofs_save();
125 ret = vfs_iocb_iter_read(file, &ki->iocb, iter);
126 memalloc_nofs_restore(old_nofs);
127 switch (ret) {
128 case -EIOCBQUEUED:
129 goto in_progress;
130
131 case -ERESTARTSYS:
132 case -ERESTARTNOINTR:
133 case -ERESTARTNOHAND:
134 case -ERESTART_RESTARTBLOCK:
135 /* There's no easy way to restart the syscall since other AIO's
136 * may be already running. Just fail this IO with EINTR.
137 */
138 ret = -EINTR;
139 fallthrough;
140 default:
141 ki->was_async = false;
Jens Axboe6b19b762021-10-21 09:22:35 -0600142 cachefiles_read_complete(&ki->iocb, ret);
David Howells26aaeff2021-02-22 11:39:47 +0000143 if (ret > 0)
144 ret = 0;
145 break;
146 }
147
148in_progress:
149 cachefiles_put_kiocb(ki);
150 _leave(" = %zd", ret);
151 return ret;
152
153presubmission_error:
154 if (term_func)
155 term_func(term_func_priv, ret < 0 ? ret : skipped, false);
156 return ret;
157}
158
159/*
160 * Handle completion of a write to the cache.
161 */
Jens Axboe6b19b762021-10-21 09:22:35 -0600162static void cachefiles_write_complete(struct kiocb *iocb, long ret)
David Howells26aaeff2021-02-22 11:39:47 +0000163{
164 struct cachefiles_kiocb *ki = container_of(iocb, struct cachefiles_kiocb, iocb);
165 struct inode *inode = file_inode(ki->iocb.ki_filp);
166
Jens Axboe6b19b762021-10-21 09:22:35 -0600167 _enter("%ld", ret);
David Howells26aaeff2021-02-22 11:39:47 +0000168
169 /* Tell lockdep we inherited freeze protection from submission thread */
170 __sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE);
171 __sb_end_write(inode->i_sb, SB_FREEZE_WRITE);
172
173 if (ki->term_func)
174 ki->term_func(ki->term_func_priv, ret, ki->was_async);
175
176 cachefiles_put_kiocb(ki);
177}
178
179/*
180 * Initiate a write to the cache.
181 */
182static int cachefiles_write(struct netfs_cache_resources *cres,
183 loff_t start_pos,
184 struct iov_iter *iter,
185 netfs_io_terminated_t term_func,
186 void *term_func_priv)
187{
188 struct cachefiles_kiocb *ki;
189 struct inode *inode;
190 struct file *file = cres->cache_priv2;
191 unsigned int old_nofs;
192 ssize_t ret = -ENOBUFS;
193 size_t len = iov_iter_count(iter);
194
195 _enter("%pD,%li,%llx,%zx/%llx",
196 file, file_inode(file)->i_ino, start_pos, len,
David Howells18598192021-06-03 10:51:28 +0100197 i_size_read(file_inode(file)));
David Howells26aaeff2021-02-22 11:39:47 +0000198
199 ki = kzalloc(sizeof(struct cachefiles_kiocb), GFP_KERNEL);
200 if (!ki)
201 goto presubmission_error;
202
203 refcount_set(&ki->ki_refcnt, 2);
204 ki->iocb.ki_filp = file;
205 ki->iocb.ki_pos = start_pos;
206 ki->iocb.ki_flags = IOCB_DIRECT | IOCB_WRITE;
207 ki->iocb.ki_hint = ki_hint_validate(file_write_hint(file));
208 ki->iocb.ki_ioprio = get_current_ioprio();
209 ki->start = start_pos;
210 ki->len = len;
211 ki->term_func = term_func;
212 ki->term_func_priv = term_func_priv;
213 ki->was_async = true;
214
215 if (ki->term_func)
216 ki->iocb.ki_complete = cachefiles_write_complete;
217
218 /* Open-code file_start_write here to grab freeze protection, which
219 * will be released by another thread in aio_complete_rw(). Fool
220 * lockdep by telling it the lock got released so that it doesn't
221 * complain about the held lock when we return to userspace.
222 */
223 inode = file_inode(file);
224 __sb_start_write(inode->i_sb, SB_FREEZE_WRITE);
225 __sb_writers_release(inode->i_sb, SB_FREEZE_WRITE);
226
227 get_file(ki->iocb.ki_filp);
228
229 old_nofs = memalloc_nofs_save();
230 ret = vfs_iocb_iter_write(file, &ki->iocb, iter);
231 memalloc_nofs_restore(old_nofs);
232 switch (ret) {
233 case -EIOCBQUEUED:
234 goto in_progress;
235
236 case -ERESTARTSYS:
237 case -ERESTARTNOINTR:
238 case -ERESTARTNOHAND:
239 case -ERESTART_RESTARTBLOCK:
240 /* There's no easy way to restart the syscall since other AIO's
241 * may be already running. Just fail this IO with EINTR.
242 */
243 ret = -EINTR;
244 fallthrough;
245 default:
246 ki->was_async = false;
Jens Axboe6b19b762021-10-21 09:22:35 -0600247 cachefiles_write_complete(&ki->iocb, ret);
David Howells26aaeff2021-02-22 11:39:47 +0000248 if (ret > 0)
249 ret = 0;
250 break;
251 }
252
253in_progress:
254 cachefiles_put_kiocb(ki);
255 _leave(" = %zd", ret);
256 return ret;
257
258presubmission_error:
259 if (term_func)
260 term_func(term_func_priv, -ENOMEM, false);
261 return -ENOMEM;
262}
263
264/*
265 * Prepare a read operation, shortening it to a cached/uncached
266 * boundary as appropriate.
267 */
268static enum netfs_read_source cachefiles_prepare_read(struct netfs_read_subrequest *subreq,
269 loff_t i_size)
270{
271 struct fscache_retrieval *op = subreq->rreq->cache_resources.cache_priv;
272 struct cachefiles_object *object;
273 struct cachefiles_cache *cache;
274 const struct cred *saved_cred;
275 struct file *file = subreq->rreq->cache_resources.cache_priv2;
276 loff_t off, to;
277
278 _enter("%zx @%llx/%llx", subreq->len, subreq->start, i_size);
279
280 object = container_of(op->op.object,
281 struct cachefiles_object, fscache);
282 cache = container_of(object->fscache.cache,
283 struct cachefiles_cache, cache);
284
285 if (!file)
286 goto cache_fail_nosec;
287
288 if (subreq->start >= i_size)
289 return NETFS_FILL_WITH_ZEROES;
290
291 cachefiles_begin_secure(cache, &saved_cred);
292
293 off = vfs_llseek(file, subreq->start, SEEK_DATA);
294 if (off < 0 && off >= (loff_t)-MAX_ERRNO) {
295 if (off == (loff_t)-ENXIO)
296 goto download_and_store;
297 goto cache_fail;
298 }
299
300 if (off >= subreq->start + subreq->len)
301 goto download_and_store;
302
303 if (off > subreq->start) {
304 off = round_up(off, cache->bsize);
305 subreq->len = off - subreq->start;
306 goto download_and_store;
307 }
308
309 to = vfs_llseek(file, subreq->start, SEEK_HOLE);
310 if (to < 0 && to >= (loff_t)-MAX_ERRNO)
311 goto cache_fail;
312
313 if (to < subreq->start + subreq->len) {
314 if (subreq->start + subreq->len >= i_size)
315 to = round_up(to, cache->bsize);
316 else
317 to = round_down(to, cache->bsize);
318 subreq->len = to - subreq->start;
319 }
320
321 cachefiles_end_secure(cache, saved_cred);
322 return NETFS_READ_FROM_CACHE;
323
324download_and_store:
325 if (cachefiles_has_space(cache, 0, (subreq->len + PAGE_SIZE - 1) / PAGE_SIZE) == 0)
326 __set_bit(NETFS_SREQ_WRITE_TO_CACHE, &subreq->flags);
327cache_fail:
328 cachefiles_end_secure(cache, saved_cred);
329cache_fail_nosec:
330 return NETFS_DOWNLOAD_FROM_SERVER;
331}
332
333/*
334 * Prepare for a write to occur.
335 */
336static int cachefiles_prepare_write(struct netfs_cache_resources *cres,
337 loff_t *_start, size_t *_len, loff_t i_size)
338{
339 loff_t start = *_start;
340 size_t len = *_len, down;
341
342 /* Round to DIO size */
343 down = start - round_down(start, PAGE_SIZE);
344 *_start = start - down;
345 *_len = round_up(down + len, PAGE_SIZE);
346 return 0;
347}
348
349/*
350 * Clean up an operation.
351 */
352static void cachefiles_end_operation(struct netfs_cache_resources *cres)
353{
354 struct fscache_retrieval *op = cres->cache_priv;
355 struct file *file = cres->cache_priv2;
356
357 _enter("");
358
359 if (file)
360 fput(file);
361 if (op) {
362 fscache_op_complete(&op->op, false);
363 fscache_put_retrieval(op);
364 }
365
366 _leave("");
367}
368
369static const struct netfs_cache_ops cachefiles_netfs_cache_ops = {
370 .end_operation = cachefiles_end_operation,
371 .read = cachefiles_read,
372 .write = cachefiles_write,
373 .prepare_read = cachefiles_prepare_read,
374 .prepare_write = cachefiles_prepare_write,
375};
376
377/*
378 * Open the cache file when beginning a cache operation.
379 */
380int cachefiles_begin_read_operation(struct netfs_read_request *rreq,
381 struct fscache_retrieval *op)
382{
383 struct cachefiles_object *object;
384 struct cachefiles_cache *cache;
385 struct path path;
386 struct file *file;
387
388 _enter("");
389
390 object = container_of(op->op.object,
391 struct cachefiles_object, fscache);
392 cache = container_of(object->fscache.cache,
393 struct cachefiles_cache, cache);
394
395 path.mnt = cache->mnt;
396 path.dentry = object->backer;
397 file = open_with_fake_path(&path, O_RDWR | O_LARGEFILE | O_DIRECT,
398 d_inode(object->backer), cache->cache_cred);
399 if (IS_ERR(file))
400 return PTR_ERR(file);
401 if (!S_ISREG(file_inode(file)->i_mode))
402 goto error_file;
403 if (unlikely(!file->f_op->read_iter) ||
404 unlikely(!file->f_op->write_iter)) {
405 pr_notice("Cache does not support read_iter and write_iter\n");
406 goto error_file;
407 }
408
409 fscache_get_retrieval(op);
410 rreq->cache_resources.cache_priv = op;
411 rreq->cache_resources.cache_priv2 = file;
412 rreq->cache_resources.ops = &cachefiles_netfs_cache_ops;
David Howellsa7e20e32021-05-12 14:10:09 +0100413 rreq->cache_resources.debug_id = object->fscache.debug_id;
David Howells26aaeff2021-02-22 11:39:47 +0000414 _leave("");
415 return 0;
416
417error_file:
418 fput(file);
419 return -EIO;
420}