David Howells | 26aaeff | 2021-02-22 11:39:47 +0000 | [diff] [blame] | 1 | // SPDX-License-Identifier: GPL-2.0-or-later |
| 2 | /* kiocb-using read/write |
| 3 | * |
| 4 | * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved. |
| 5 | * Written by David Howells (dhowells@redhat.com) |
| 6 | */ |
| 7 | |
| 8 | #include <linux/mount.h> |
| 9 | #include <linux/slab.h> |
| 10 | #include <linux/file.h> |
| 11 | #include <linux/uio.h> |
| 12 | #include <linux/sched/mm.h> |
| 13 | #include <linux/netfs.h> |
| 14 | #include "internal.h" |
| 15 | |
| 16 | struct cachefiles_kiocb { |
| 17 | struct kiocb iocb; |
| 18 | refcount_t ki_refcnt; |
| 19 | loff_t start; |
| 20 | union { |
| 21 | size_t skipped; |
| 22 | size_t len; |
| 23 | }; |
| 24 | netfs_io_terminated_t term_func; |
| 25 | void *term_func_priv; |
| 26 | bool was_async; |
| 27 | }; |
| 28 | |
| 29 | static inline void cachefiles_put_kiocb(struct cachefiles_kiocb *ki) |
| 30 | { |
| 31 | if (refcount_dec_and_test(&ki->ki_refcnt)) { |
| 32 | fput(ki->iocb.ki_filp); |
| 33 | kfree(ki); |
| 34 | } |
| 35 | } |
| 36 | |
| 37 | /* |
| 38 | * Handle completion of a read from the cache. |
| 39 | */ |
Jens Axboe | 6b19b76 | 2021-10-21 09:22:35 -0600 | [diff] [blame] | 40 | static void cachefiles_read_complete(struct kiocb *iocb, long ret) |
David Howells | 26aaeff | 2021-02-22 11:39:47 +0000 | [diff] [blame] | 41 | { |
| 42 | struct cachefiles_kiocb *ki = container_of(iocb, struct cachefiles_kiocb, iocb); |
| 43 | |
Jens Axboe | 6b19b76 | 2021-10-21 09:22:35 -0600 | [diff] [blame] | 44 | _enter("%ld", ret); |
David Howells | 26aaeff | 2021-02-22 11:39:47 +0000 | [diff] [blame] | 45 | |
| 46 | if (ki->term_func) { |
| 47 | if (ret >= 0) |
| 48 | ret += ki->skipped; |
| 49 | ki->term_func(ki->term_func_priv, ret, ki->was_async); |
| 50 | } |
| 51 | |
| 52 | cachefiles_put_kiocb(ki); |
| 53 | } |
| 54 | |
| 55 | /* |
| 56 | * Initiate a read from the cache. |
| 57 | */ |
| 58 | static int cachefiles_read(struct netfs_cache_resources *cres, |
| 59 | loff_t start_pos, |
| 60 | struct iov_iter *iter, |
| 61 | bool seek_data, |
| 62 | netfs_io_terminated_t term_func, |
| 63 | void *term_func_priv) |
| 64 | { |
| 65 | struct cachefiles_kiocb *ki; |
| 66 | struct file *file = cres->cache_priv2; |
| 67 | unsigned int old_nofs; |
| 68 | ssize_t ret = -ENOBUFS; |
| 69 | size_t len = iov_iter_count(iter), skipped = 0; |
| 70 | |
| 71 | _enter("%pD,%li,%llx,%zx/%llx", |
| 72 | file, file_inode(file)->i_ino, start_pos, len, |
David Howells | 1859819 | 2021-06-03 10:51:28 +0100 | [diff] [blame] | 73 | i_size_read(file_inode(file))); |
David Howells | 26aaeff | 2021-02-22 11:39:47 +0000 | [diff] [blame] | 74 | |
| 75 | /* If the caller asked us to seek for data before doing the read, then |
| 76 | * we should do that now. If we find a gap, we fill it with zeros. |
| 77 | */ |
| 78 | if (seek_data) { |
| 79 | loff_t off = start_pos, off2; |
| 80 | |
| 81 | off2 = vfs_llseek(file, off, SEEK_DATA); |
| 82 | if (off2 < 0 && off2 >= (loff_t)-MAX_ERRNO && off2 != -ENXIO) { |
| 83 | skipped = 0; |
| 84 | ret = off2; |
| 85 | goto presubmission_error; |
| 86 | } |
| 87 | |
| 88 | if (off2 == -ENXIO || off2 >= start_pos + len) { |
| 89 | /* The region is beyond the EOF or there's no more data |
| 90 | * in the region, so clear the rest of the buffer and |
| 91 | * return success. |
| 92 | */ |
| 93 | iov_iter_zero(len, iter); |
| 94 | skipped = len; |
| 95 | ret = 0; |
| 96 | goto presubmission_error; |
| 97 | } |
| 98 | |
| 99 | skipped = off2 - off; |
| 100 | iov_iter_zero(skipped, iter); |
| 101 | } |
| 102 | |
| 103 | ret = -ENOBUFS; |
| 104 | ki = kzalloc(sizeof(struct cachefiles_kiocb), GFP_KERNEL); |
| 105 | if (!ki) |
| 106 | goto presubmission_error; |
| 107 | |
| 108 | refcount_set(&ki->ki_refcnt, 2); |
| 109 | ki->iocb.ki_filp = file; |
| 110 | ki->iocb.ki_pos = start_pos + skipped; |
| 111 | ki->iocb.ki_flags = IOCB_DIRECT; |
| 112 | ki->iocb.ki_hint = ki_hint_validate(file_write_hint(file)); |
| 113 | ki->iocb.ki_ioprio = get_current_ioprio(); |
| 114 | ki->skipped = skipped; |
| 115 | ki->term_func = term_func; |
| 116 | ki->term_func_priv = term_func_priv; |
| 117 | ki->was_async = true; |
| 118 | |
| 119 | if (ki->term_func) |
| 120 | ki->iocb.ki_complete = cachefiles_read_complete; |
| 121 | |
| 122 | get_file(ki->iocb.ki_filp); |
| 123 | |
| 124 | old_nofs = memalloc_nofs_save(); |
| 125 | ret = vfs_iocb_iter_read(file, &ki->iocb, iter); |
| 126 | memalloc_nofs_restore(old_nofs); |
| 127 | switch (ret) { |
| 128 | case -EIOCBQUEUED: |
| 129 | goto in_progress; |
| 130 | |
| 131 | case -ERESTARTSYS: |
| 132 | case -ERESTARTNOINTR: |
| 133 | case -ERESTARTNOHAND: |
| 134 | case -ERESTART_RESTARTBLOCK: |
| 135 | /* There's no easy way to restart the syscall since other AIO's |
| 136 | * may be already running. Just fail this IO with EINTR. |
| 137 | */ |
| 138 | ret = -EINTR; |
| 139 | fallthrough; |
| 140 | default: |
| 141 | ki->was_async = false; |
Jens Axboe | 6b19b76 | 2021-10-21 09:22:35 -0600 | [diff] [blame] | 142 | cachefiles_read_complete(&ki->iocb, ret); |
David Howells | 26aaeff | 2021-02-22 11:39:47 +0000 | [diff] [blame] | 143 | if (ret > 0) |
| 144 | ret = 0; |
| 145 | break; |
| 146 | } |
| 147 | |
| 148 | in_progress: |
| 149 | cachefiles_put_kiocb(ki); |
| 150 | _leave(" = %zd", ret); |
| 151 | return ret; |
| 152 | |
| 153 | presubmission_error: |
| 154 | if (term_func) |
| 155 | term_func(term_func_priv, ret < 0 ? ret : skipped, false); |
| 156 | return ret; |
| 157 | } |
| 158 | |
| 159 | /* |
| 160 | * Handle completion of a write to the cache. |
| 161 | */ |
Jens Axboe | 6b19b76 | 2021-10-21 09:22:35 -0600 | [diff] [blame] | 162 | static void cachefiles_write_complete(struct kiocb *iocb, long ret) |
David Howells | 26aaeff | 2021-02-22 11:39:47 +0000 | [diff] [blame] | 163 | { |
| 164 | struct cachefiles_kiocb *ki = container_of(iocb, struct cachefiles_kiocb, iocb); |
| 165 | struct inode *inode = file_inode(ki->iocb.ki_filp); |
| 166 | |
Jens Axboe | 6b19b76 | 2021-10-21 09:22:35 -0600 | [diff] [blame] | 167 | _enter("%ld", ret); |
David Howells | 26aaeff | 2021-02-22 11:39:47 +0000 | [diff] [blame] | 168 | |
| 169 | /* Tell lockdep we inherited freeze protection from submission thread */ |
| 170 | __sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE); |
| 171 | __sb_end_write(inode->i_sb, SB_FREEZE_WRITE); |
| 172 | |
| 173 | if (ki->term_func) |
| 174 | ki->term_func(ki->term_func_priv, ret, ki->was_async); |
| 175 | |
| 176 | cachefiles_put_kiocb(ki); |
| 177 | } |
| 178 | |
| 179 | /* |
| 180 | * Initiate a write to the cache. |
| 181 | */ |
| 182 | static int cachefiles_write(struct netfs_cache_resources *cres, |
| 183 | loff_t start_pos, |
| 184 | struct iov_iter *iter, |
| 185 | netfs_io_terminated_t term_func, |
| 186 | void *term_func_priv) |
| 187 | { |
| 188 | struct cachefiles_kiocb *ki; |
| 189 | struct inode *inode; |
| 190 | struct file *file = cres->cache_priv2; |
| 191 | unsigned int old_nofs; |
| 192 | ssize_t ret = -ENOBUFS; |
| 193 | size_t len = iov_iter_count(iter); |
| 194 | |
| 195 | _enter("%pD,%li,%llx,%zx/%llx", |
| 196 | file, file_inode(file)->i_ino, start_pos, len, |
David Howells | 1859819 | 2021-06-03 10:51:28 +0100 | [diff] [blame] | 197 | i_size_read(file_inode(file))); |
David Howells | 26aaeff | 2021-02-22 11:39:47 +0000 | [diff] [blame] | 198 | |
| 199 | ki = kzalloc(sizeof(struct cachefiles_kiocb), GFP_KERNEL); |
| 200 | if (!ki) |
| 201 | goto presubmission_error; |
| 202 | |
| 203 | refcount_set(&ki->ki_refcnt, 2); |
| 204 | ki->iocb.ki_filp = file; |
| 205 | ki->iocb.ki_pos = start_pos; |
| 206 | ki->iocb.ki_flags = IOCB_DIRECT | IOCB_WRITE; |
| 207 | ki->iocb.ki_hint = ki_hint_validate(file_write_hint(file)); |
| 208 | ki->iocb.ki_ioprio = get_current_ioprio(); |
| 209 | ki->start = start_pos; |
| 210 | ki->len = len; |
| 211 | ki->term_func = term_func; |
| 212 | ki->term_func_priv = term_func_priv; |
| 213 | ki->was_async = true; |
| 214 | |
| 215 | if (ki->term_func) |
| 216 | ki->iocb.ki_complete = cachefiles_write_complete; |
| 217 | |
| 218 | /* Open-code file_start_write here to grab freeze protection, which |
| 219 | * will be released by another thread in aio_complete_rw(). Fool |
| 220 | * lockdep by telling it the lock got released so that it doesn't |
| 221 | * complain about the held lock when we return to userspace. |
| 222 | */ |
| 223 | inode = file_inode(file); |
| 224 | __sb_start_write(inode->i_sb, SB_FREEZE_WRITE); |
| 225 | __sb_writers_release(inode->i_sb, SB_FREEZE_WRITE); |
| 226 | |
| 227 | get_file(ki->iocb.ki_filp); |
| 228 | |
| 229 | old_nofs = memalloc_nofs_save(); |
| 230 | ret = vfs_iocb_iter_write(file, &ki->iocb, iter); |
| 231 | memalloc_nofs_restore(old_nofs); |
| 232 | switch (ret) { |
| 233 | case -EIOCBQUEUED: |
| 234 | goto in_progress; |
| 235 | |
| 236 | case -ERESTARTSYS: |
| 237 | case -ERESTARTNOINTR: |
| 238 | case -ERESTARTNOHAND: |
| 239 | case -ERESTART_RESTARTBLOCK: |
| 240 | /* There's no easy way to restart the syscall since other AIO's |
| 241 | * may be already running. Just fail this IO with EINTR. |
| 242 | */ |
| 243 | ret = -EINTR; |
| 244 | fallthrough; |
| 245 | default: |
| 246 | ki->was_async = false; |
Jens Axboe | 6b19b76 | 2021-10-21 09:22:35 -0600 | [diff] [blame] | 247 | cachefiles_write_complete(&ki->iocb, ret); |
David Howells | 26aaeff | 2021-02-22 11:39:47 +0000 | [diff] [blame] | 248 | if (ret > 0) |
| 249 | ret = 0; |
| 250 | break; |
| 251 | } |
| 252 | |
| 253 | in_progress: |
| 254 | cachefiles_put_kiocb(ki); |
| 255 | _leave(" = %zd", ret); |
| 256 | return ret; |
| 257 | |
| 258 | presubmission_error: |
| 259 | if (term_func) |
| 260 | term_func(term_func_priv, -ENOMEM, false); |
| 261 | return -ENOMEM; |
| 262 | } |
| 263 | |
| 264 | /* |
| 265 | * Prepare a read operation, shortening it to a cached/uncached |
| 266 | * boundary as appropriate. |
| 267 | */ |
| 268 | static enum netfs_read_source cachefiles_prepare_read(struct netfs_read_subrequest *subreq, |
| 269 | loff_t i_size) |
| 270 | { |
| 271 | struct fscache_retrieval *op = subreq->rreq->cache_resources.cache_priv; |
| 272 | struct cachefiles_object *object; |
| 273 | struct cachefiles_cache *cache; |
| 274 | const struct cred *saved_cred; |
| 275 | struct file *file = subreq->rreq->cache_resources.cache_priv2; |
| 276 | loff_t off, to; |
| 277 | |
| 278 | _enter("%zx @%llx/%llx", subreq->len, subreq->start, i_size); |
| 279 | |
| 280 | object = container_of(op->op.object, |
| 281 | struct cachefiles_object, fscache); |
| 282 | cache = container_of(object->fscache.cache, |
| 283 | struct cachefiles_cache, cache); |
| 284 | |
| 285 | if (!file) |
| 286 | goto cache_fail_nosec; |
| 287 | |
| 288 | if (subreq->start >= i_size) |
| 289 | return NETFS_FILL_WITH_ZEROES; |
| 290 | |
| 291 | cachefiles_begin_secure(cache, &saved_cred); |
| 292 | |
| 293 | off = vfs_llseek(file, subreq->start, SEEK_DATA); |
| 294 | if (off < 0 && off >= (loff_t)-MAX_ERRNO) { |
| 295 | if (off == (loff_t)-ENXIO) |
| 296 | goto download_and_store; |
| 297 | goto cache_fail; |
| 298 | } |
| 299 | |
| 300 | if (off >= subreq->start + subreq->len) |
| 301 | goto download_and_store; |
| 302 | |
| 303 | if (off > subreq->start) { |
| 304 | off = round_up(off, cache->bsize); |
| 305 | subreq->len = off - subreq->start; |
| 306 | goto download_and_store; |
| 307 | } |
| 308 | |
| 309 | to = vfs_llseek(file, subreq->start, SEEK_HOLE); |
| 310 | if (to < 0 && to >= (loff_t)-MAX_ERRNO) |
| 311 | goto cache_fail; |
| 312 | |
| 313 | if (to < subreq->start + subreq->len) { |
| 314 | if (subreq->start + subreq->len >= i_size) |
| 315 | to = round_up(to, cache->bsize); |
| 316 | else |
| 317 | to = round_down(to, cache->bsize); |
| 318 | subreq->len = to - subreq->start; |
| 319 | } |
| 320 | |
| 321 | cachefiles_end_secure(cache, saved_cred); |
| 322 | return NETFS_READ_FROM_CACHE; |
| 323 | |
| 324 | download_and_store: |
| 325 | if (cachefiles_has_space(cache, 0, (subreq->len + PAGE_SIZE - 1) / PAGE_SIZE) == 0) |
| 326 | __set_bit(NETFS_SREQ_WRITE_TO_CACHE, &subreq->flags); |
| 327 | cache_fail: |
| 328 | cachefiles_end_secure(cache, saved_cred); |
| 329 | cache_fail_nosec: |
| 330 | return NETFS_DOWNLOAD_FROM_SERVER; |
| 331 | } |
| 332 | |
| 333 | /* |
| 334 | * Prepare for a write to occur. |
| 335 | */ |
| 336 | static int cachefiles_prepare_write(struct netfs_cache_resources *cres, |
| 337 | loff_t *_start, size_t *_len, loff_t i_size) |
| 338 | { |
| 339 | loff_t start = *_start; |
| 340 | size_t len = *_len, down; |
| 341 | |
| 342 | /* Round to DIO size */ |
| 343 | down = start - round_down(start, PAGE_SIZE); |
| 344 | *_start = start - down; |
| 345 | *_len = round_up(down + len, PAGE_SIZE); |
| 346 | return 0; |
| 347 | } |
| 348 | |
| 349 | /* |
| 350 | * Clean up an operation. |
| 351 | */ |
| 352 | static void cachefiles_end_operation(struct netfs_cache_resources *cres) |
| 353 | { |
| 354 | struct fscache_retrieval *op = cres->cache_priv; |
| 355 | struct file *file = cres->cache_priv2; |
| 356 | |
| 357 | _enter(""); |
| 358 | |
| 359 | if (file) |
| 360 | fput(file); |
| 361 | if (op) { |
| 362 | fscache_op_complete(&op->op, false); |
| 363 | fscache_put_retrieval(op); |
| 364 | } |
| 365 | |
| 366 | _leave(""); |
| 367 | } |
| 368 | |
| 369 | static const struct netfs_cache_ops cachefiles_netfs_cache_ops = { |
| 370 | .end_operation = cachefiles_end_operation, |
| 371 | .read = cachefiles_read, |
| 372 | .write = cachefiles_write, |
| 373 | .prepare_read = cachefiles_prepare_read, |
| 374 | .prepare_write = cachefiles_prepare_write, |
| 375 | }; |
| 376 | |
| 377 | /* |
| 378 | * Open the cache file when beginning a cache operation. |
| 379 | */ |
| 380 | int cachefiles_begin_read_operation(struct netfs_read_request *rreq, |
| 381 | struct fscache_retrieval *op) |
| 382 | { |
| 383 | struct cachefiles_object *object; |
| 384 | struct cachefiles_cache *cache; |
| 385 | struct path path; |
| 386 | struct file *file; |
| 387 | |
| 388 | _enter(""); |
| 389 | |
| 390 | object = container_of(op->op.object, |
| 391 | struct cachefiles_object, fscache); |
| 392 | cache = container_of(object->fscache.cache, |
| 393 | struct cachefiles_cache, cache); |
| 394 | |
| 395 | path.mnt = cache->mnt; |
| 396 | path.dentry = object->backer; |
| 397 | file = open_with_fake_path(&path, O_RDWR | O_LARGEFILE | O_DIRECT, |
| 398 | d_inode(object->backer), cache->cache_cred); |
| 399 | if (IS_ERR(file)) |
| 400 | return PTR_ERR(file); |
| 401 | if (!S_ISREG(file_inode(file)->i_mode)) |
| 402 | goto error_file; |
| 403 | if (unlikely(!file->f_op->read_iter) || |
| 404 | unlikely(!file->f_op->write_iter)) { |
| 405 | pr_notice("Cache does not support read_iter and write_iter\n"); |
| 406 | goto error_file; |
| 407 | } |
| 408 | |
| 409 | fscache_get_retrieval(op); |
| 410 | rreq->cache_resources.cache_priv = op; |
| 411 | rreq->cache_resources.cache_priv2 = file; |
| 412 | rreq->cache_resources.ops = &cachefiles_netfs_cache_ops; |
David Howells | a7e20e3 | 2021-05-12 14:10:09 +0100 | [diff] [blame] | 413 | rreq->cache_resources.debug_id = object->fscache.debug_id; |
David Howells | 26aaeff | 2021-02-22 11:39:47 +0000 | [diff] [blame] | 414 | _leave(""); |
| 415 | return 0; |
| 416 | |
| 417 | error_file: |
| 418 | fput(file); |
| 419 | return -EIO; |
| 420 | } |