Greg Kroah-Hartman | b244131 | 2017-11-01 15:07:57 +0100 | [diff] [blame] | 1 | // SPDX-License-Identifier: GPL-2.0 |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 2 | /* |
| 3 | * (C) 2001 Clemson University and The University of Chicago |
Martin Brandenburg | 85ac799 | 2018-02-22 18:10:43 +0000 | [diff] [blame] | 4 | * Copyright 2018 Omnibond Systems, L.L.C. |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 5 | * |
| 6 | * See COPYING in top-level directory. |
| 7 | */ |
| 8 | |
| 9 | /* |
| 10 | * Linux VFS file operations. |
| 11 | */ |
| 12 | |
| 13 | #include "protocol.h" |
Mike Marshall | 575e946 | 2015-12-04 12:56:14 -0500 | [diff] [blame] | 14 | #include "orangefs-kernel.h" |
| 15 | #include "orangefs-bufmap.h" |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 16 | #include <linux/fs.h> |
| 17 | #include <linux/pagemap.h> |
| 18 | |
Martin Brandenburg | ed1e158 | 2016-08-02 16:32:15 -0400 | [diff] [blame] | 19 | static int flush_racache(struct inode *inode) |
| 20 | { |
| 21 | struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); |
| 22 | struct orangefs_kernel_op_s *new_op; |
| 23 | int ret; |
| 24 | |
| 25 | gossip_debug(GOSSIP_UTILS_DEBUG, |
| 26 | "%s: %pU: Handle is %pU | fs_id %d\n", __func__, |
| 27 | get_khandle_from_ino(inode), &orangefs_inode->refn.khandle, |
| 28 | orangefs_inode->refn.fs_id); |
| 29 | |
| 30 | new_op = op_alloc(ORANGEFS_VFS_OP_RA_FLUSH); |
| 31 | if (!new_op) |
| 32 | return -ENOMEM; |
| 33 | new_op->upcall.req.ra_cache_flush.refn = orangefs_inode->refn; |
| 34 | |
| 35 | ret = service_operation(new_op, "orangefs_flush_racache", |
| 36 | get_interruptible_flag(inode)); |
| 37 | |
| 38 | gossip_debug(GOSSIP_UTILS_DEBUG, "%s: got return value of %d\n", |
| 39 | __func__, ret); |
| 40 | |
| 41 | op_release(new_op); |
| 42 | return ret; |
| 43 | } |
| 44 | |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 45 | /* |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 46 | * Post and wait for the I/O upcall to finish |
| 47 | */ |
Martin Brandenburg | c453dcf | 2018-02-16 20:51:24 +0000 | [diff] [blame] | 48 | ssize_t wait_for_direct_io(enum ORANGEFS_io_type type, struct inode *inode, |
Mike Marshall | f9bbb68 | 2019-11-26 12:39:37 -0500 | [diff] [blame] | 49 | loff_t *offset, struct iov_iter *iter, size_t total_size, |
| 50 | loff_t readahead_size, struct orangefs_write_range *wr, |
| 51 | int *index_return, struct file *file) |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 52 | { |
Yi Liu | 8bb8aef | 2015-11-24 15:12:14 -0500 | [diff] [blame] | 53 | struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); |
| 54 | struct orangefs_khandle *handle = &orangefs_inode->refn.khandle; |
Yi Liu | 8bb8aef | 2015-11-24 15:12:14 -0500 | [diff] [blame] | 55 | struct orangefs_kernel_op_s *new_op = NULL; |
Colin Ian King | f10789e | 2019-05-11 14:27:00 +0100 | [diff] [blame] | 56 | int buffer_index; |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 57 | ssize_t ret; |
Mike Marshall | dd59a64 | 2019-03-25 18:59:29 -0400 | [diff] [blame] | 58 | size_t copy_amount; |
Mike Marshall | f9bbb68 | 2019-11-26 12:39:37 -0500 | [diff] [blame] | 59 | int open_for_read; |
| 60 | int open_for_write; |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 61 | |
Yi Liu | 8bb8aef | 2015-11-24 15:12:14 -0500 | [diff] [blame] | 62 | new_op = op_alloc(ORANGEFS_VFS_OP_FILE_IO); |
Al Viro | ed42fe0 | 2016-01-22 19:47:47 -0500 | [diff] [blame] | 63 | if (!new_op) |
| 64 | return -ENOMEM; |
| 65 | |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 66 | /* synchronous I/O */ |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 67 | new_op->upcall.req.io.readahead_size = readahead_size; |
| 68 | new_op->upcall.req.io.io_type = type; |
Yi Liu | 8bb8aef | 2015-11-24 15:12:14 -0500 | [diff] [blame] | 69 | new_op->upcall.req.io.refn = orangefs_inode->refn; |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 70 | |
| 71 | populate_shared_memory: |
| 72 | /* get a shared buffer index */ |
Al Viro | b8a99a8 | 2016-02-16 20:10:26 -0500 | [diff] [blame] | 73 | buffer_index = orangefs_bufmap_get(); |
| 74 | if (buffer_index < 0) { |
| 75 | ret = buffer_index; |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 76 | gossip_debug(GOSSIP_FILE_DEBUG, |
Al Viro | b8a99a8 | 2016-02-16 20:10:26 -0500 | [diff] [blame] | 77 | "%s: orangefs_bufmap_get failure (%zd)\n", |
| 78 | __func__, ret); |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 79 | goto out; |
| 80 | } |
| 81 | gossip_debug(GOSSIP_FILE_DEBUG, |
| 82 | "%s(%pU): GET op %p -> buffer_index %d\n", |
| 83 | __func__, |
| 84 | handle, |
| 85 | new_op, |
| 86 | buffer_index); |
| 87 | |
| 88 | new_op->uses_shared_memory = 1; |
| 89 | new_op->upcall.req.io.buf_index = buffer_index; |
| 90 | new_op->upcall.req.io.count = total_size; |
| 91 | new_op->upcall.req.io.offset = *offset; |
Martin Brandenburg | 52e2d0a | 2018-12-14 15:24:43 -0500 | [diff] [blame] | 92 | if (type == ORANGEFS_IO_WRITE && wr) { |
| 93 | new_op->upcall.uid = from_kuid(&init_user_ns, wr->uid); |
| 94 | new_op->upcall.gid = from_kgid(&init_user_ns, wr->gid); |
| 95 | } |
Mike Marshall | f9bbb68 | 2019-11-26 12:39:37 -0500 | [diff] [blame] | 96 | /* |
| 97 | * Orangefs has no open, and orangefs checks file permissions |
| 98 | * on each file access. Posix requires that file permissions |
| 99 | * be checked on open and nowhere else. Orangefs-through-the-kernel |
| 100 | * needs to seem posix compliant. |
| 101 | * |
| 102 | * The VFS opens files, even if the filesystem provides no |
| 103 | * method. We can see if a file was successfully opened for |
| 104 | * read and or for write by looking at file->f_mode. |
| 105 | * |
| 106 | * When writes are flowing from the page cache, file is no |
| 107 | * longer available. We can trust the VFS to have checked |
| 108 | * file->f_mode before writing to the page cache. |
| 109 | * |
| 110 | * The mode of a file might change between when it is opened |
| 111 | * and IO commences, or it might be created with an arbitrary mode. |
| 112 | * |
| 113 | * We'll make sure we don't hit EACCES during the IO stage by |
| 114 | * using UID 0. Some of the time we have access without changing |
| 115 | * to UID 0 - how to check? |
| 116 | */ |
| 117 | if (file) { |
| 118 | open_for_write = file->f_mode & FMODE_WRITE; |
| 119 | open_for_read = file->f_mode & FMODE_READ; |
| 120 | } else { |
| 121 | open_for_write = 1; |
| 122 | open_for_read = 0; /* not relevant? */ |
| 123 | } |
| 124 | if ((type == ORANGEFS_IO_WRITE) && open_for_write) |
| 125 | new_op->upcall.uid = 0; |
| 126 | if ((type == ORANGEFS_IO_READ) && open_for_read) |
| 127 | new_op->upcall.uid = 0; |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 128 | |
| 129 | gossip_debug(GOSSIP_FILE_DEBUG, |
Al Viro | 3c2fcfc | 2015-10-08 18:00:26 -0400 | [diff] [blame] | 130 | "%s(%pU): offset: %llu total_size: %zd\n", |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 131 | __func__, |
| 132 | handle, |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 133 | llu(*offset), |
| 134 | total_size); |
| 135 | /* |
| 136 | * Stage 1: copy the buffers into client-core's address space |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 137 | */ |
Martin Brandenburg | dbcb5e7 | 2018-03-20 17:00:12 +0000 | [diff] [blame] | 138 | if (type == ORANGEFS_IO_WRITE && total_size) { |
| 139 | ret = orangefs_bufmap_copy_from_iovec(iter, buffer_index, |
| 140 | total_size); |
| 141 | if (ret < 0) { |
| 142 | gossip_err("%s: Failed to copy-in buffers. Please make sure that the pvfs2-client is running. %ld\n", |
| 143 | __func__, (long)ret); |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 144 | goto out; |
Martin Brandenburg | dbcb5e7 | 2018-03-20 17:00:12 +0000 | [diff] [blame] | 145 | } |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 146 | } |
| 147 | |
| 148 | gossip_debug(GOSSIP_FILE_DEBUG, |
| 149 | "%s(%pU): Calling post_io_request with tag (%llu)\n", |
| 150 | __func__, |
| 151 | handle, |
| 152 | llu(new_op->tag)); |
| 153 | |
| 154 | /* Stage 2: Service the I/O operation */ |
| 155 | ret = service_operation(new_op, |
Yi Liu | 8bb8aef | 2015-11-24 15:12:14 -0500 | [diff] [blame] | 156 | type == ORANGEFS_IO_WRITE ? |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 157 | "file_write" : |
| 158 | "file_read", |
| 159 | get_interruptible_flag(inode)); |
| 160 | |
| 161 | /* |
| 162 | * If service_operation() returns -EAGAIN #and# the operation was |
Yi Liu | 8bb8aef | 2015-11-24 15:12:14 -0500 | [diff] [blame] | 163 | * purged from orangefs_request_list or htable_ops_in_progress, then |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 164 | * we know that the client was restarted, causing the shared memory |
| 165 | * area to be wiped clean. To restart a write operation in this |
| 166 | * case, we must re-copy the data from the user's iovec to a NEW |
| 167 | * shared memory location. To restart a read operation, we must get |
| 168 | * a new shared memory location. |
| 169 | */ |
| 170 | if (ret == -EAGAIN && op_state_purged(new_op)) { |
Al Viro | 1357d06 | 2016-02-11 21:34:52 -0500 | [diff] [blame] | 171 | orangefs_bufmap_put(buffer_index); |
Al Viro | 7b9761a | 2016-02-07 01:25:06 -0500 | [diff] [blame] | 172 | if (type == ORANGEFS_IO_WRITE) |
Al Viro | c63ed80 | 2017-04-13 03:12:24 -0400 | [diff] [blame] | 173 | iov_iter_revert(iter, total_size); |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 174 | gossip_debug(GOSSIP_FILE_DEBUG, |
| 175 | "%s:going to repopulate_shared_memory.\n", |
| 176 | __func__); |
| 177 | goto populate_shared_memory; |
| 178 | } |
| 179 | |
| 180 | if (ret < 0) { |
Mike Marshall | 162ada7 | 2016-03-09 13:12:37 -0500 | [diff] [blame] | 181 | if (ret == -EINTR) { |
| 182 | /* |
| 183 | * We can't return EINTR if any data was written, |
| 184 | * it's not POSIX. It is minimally acceptable |
| 185 | * to give a partial write, the way NFS does. |
| 186 | * |
| 187 | * It would be optimal to return all or nothing, |
| 188 | * but if a userspace write is bigger than |
| 189 | * an IO buffer, and the interrupt occurs |
| 190 | * between buffer writes, that would not be |
| 191 | * possible. |
| 192 | */ |
| 193 | switch (new_op->op_state - OP_VFS_STATE_GIVEN_UP) { |
| 194 | /* |
| 195 | * If the op was waiting when the interrupt |
| 196 | * occurred, then the client-core did not |
| 197 | * trigger the write. |
| 198 | */ |
| 199 | case OP_VFS_STATE_WAITING: |
| 200 | if (*offset == 0) |
| 201 | ret = -EINTR; |
| 202 | else |
| 203 | ret = 0; |
| 204 | break; |
Mike Marshall | 95f5f88 | 2018-05-11 17:11:48 -0400 | [diff] [blame] | 205 | /* |
Mike Marshall | 162ada7 | 2016-03-09 13:12:37 -0500 | [diff] [blame] | 206 | * If the op was in progress when the interrupt |
| 207 | * occurred, then the client-core was able to |
| 208 | * trigger the write. |
| 209 | */ |
| 210 | case OP_VFS_STATE_INPROGR: |
Martin Brandenburg | 43f3457 | 2018-11-06 19:51:39 +0000 | [diff] [blame] | 211 | if (type == ORANGEFS_IO_READ) |
| 212 | ret = -EINTR; |
| 213 | else |
| 214 | ret = total_size; |
Mike Marshall | 162ada7 | 2016-03-09 13:12:37 -0500 | [diff] [blame] | 215 | break; |
| 216 | default: |
| 217 | gossip_err("%s: unexpected op state :%d:.\n", |
| 218 | __func__, |
| 219 | new_op->op_state); |
| 220 | ret = 0; |
| 221 | break; |
| 222 | } |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 223 | gossip_debug(GOSSIP_FILE_DEBUG, |
Mike Marshall | 162ada7 | 2016-03-09 13:12:37 -0500 | [diff] [blame] | 224 | "%s: got EINTR, state:%d: %p\n", |
| 225 | __func__, |
| 226 | new_op->op_state, |
| 227 | new_op); |
| 228 | } else { |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 229 | gossip_err("%s: error in %s handle %pU, returning %zd\n", |
| 230 | __func__, |
Yi Liu | 8bb8aef | 2015-11-24 15:12:14 -0500 | [diff] [blame] | 231 | type == ORANGEFS_IO_READ ? |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 232 | "read from" : "write to", |
| 233 | handle, ret); |
Mike Marshall | 162ada7 | 2016-03-09 13:12:37 -0500 | [diff] [blame] | 234 | } |
Al Viro | 78699e2 | 2016-02-11 23:07:19 -0500 | [diff] [blame] | 235 | if (orangefs_cancel_op_in_progress(new_op)) |
| 236 | return ret; |
| 237 | |
Al Viro | 897c5df | 2016-02-13 21:06:50 -0500 | [diff] [blame] | 238 | goto out; |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 239 | } |
| 240 | |
| 241 | /* |
| 242 | * Stage 3: Post copy buffers from client-core's address space |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 243 | */ |
Martin Brandenburg | dbcb5e7 | 2018-03-20 17:00:12 +0000 | [diff] [blame] | 244 | if (type == ORANGEFS_IO_READ && new_op->downcall.resp.io.amt_complete) { |
| 245 | /* |
| 246 | * NOTE: the iovector can either contain addresses which |
| 247 | * can futher be kernel-space or user-space addresses. |
| 248 | * or it can pointers to struct page's |
| 249 | */ |
Mike Marshall | dd59a64 | 2019-03-25 18:59:29 -0400 | [diff] [blame] | 250 | |
Mike Marshall | 0c4b7ca | 2021-03-28 17:18:11 -0400 | [diff] [blame] | 251 | copy_amount = new_op->downcall.resp.io.amt_complete; |
Mike Marshall | dd59a64 | 2019-03-25 18:59:29 -0400 | [diff] [blame] | 252 | |
Martin Brandenburg | dbcb5e7 | 2018-03-20 17:00:12 +0000 | [diff] [blame] | 253 | ret = orangefs_bufmap_copy_to_iovec(iter, buffer_index, |
Mike Marshall | dd59a64 | 2019-03-25 18:59:29 -0400 | [diff] [blame] | 254 | copy_amount); |
Martin Brandenburg | dbcb5e7 | 2018-03-20 17:00:12 +0000 | [diff] [blame] | 255 | if (ret < 0) { |
| 256 | gossip_err("%s: Failed to copy-out buffers. Please make sure that the pvfs2-client is running (%ld)\n", |
| 257 | __func__, (long)ret); |
Al Viro | 897c5df | 2016-02-13 21:06:50 -0500 | [diff] [blame] | 258 | goto out; |
Martin Brandenburg | dbcb5e7 | 2018-03-20 17:00:12 +0000 | [diff] [blame] | 259 | } |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 260 | } |
| 261 | gossip_debug(GOSSIP_FILE_DEBUG, |
Mike Marshall | 9d9e7ba | 2016-03-03 13:46:48 -0500 | [diff] [blame] | 262 | "%s(%pU): Amount %s, returned by the sys-io call:%d\n", |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 263 | __func__, |
| 264 | handle, |
Mike Marshall | 9d9e7ba | 2016-03-03 13:46:48 -0500 | [diff] [blame] | 265 | type == ORANGEFS_IO_READ ? "read" : "written", |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 266 | (int)new_op->downcall.resp.io.amt_complete); |
| 267 | |
| 268 | ret = new_op->downcall.resp.io.amt_complete; |
| 269 | |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 270 | out: |
| 271 | if (buffer_index >= 0) { |
Mike Marshall | 0c4b7ca | 2021-03-28 17:18:11 -0400 | [diff] [blame] | 272 | orangefs_bufmap_put(buffer_index); |
| 273 | gossip_debug(GOSSIP_FILE_DEBUG, |
| 274 | "%s(%pU): PUT buffer_index %d\n", |
| 275 | __func__, handle, buffer_index); |
| 276 | buffer_index = -1; |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 277 | } |
Al Viro | ed42fe0 | 2016-01-22 19:47:47 -0500 | [diff] [blame] | 278 | op_release(new_op); |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 279 | return ret; |
| 280 | } |
| 281 | |
Martin Brandenburg | 8f04e1b | 2019-02-12 20:19:06 +0000 | [diff] [blame] | 282 | int orangefs_revalidate_mapping(struct inode *inode) |
| 283 | { |
| 284 | struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); |
| 285 | struct address_space *mapping = inode->i_mapping; |
| 286 | unsigned long *bitlock = &orangefs_inode->bitlock; |
| 287 | int ret; |
| 288 | |
| 289 | while (1) { |
| 290 | ret = wait_on_bit(bitlock, 1, TASK_KILLABLE); |
| 291 | if (ret) |
| 292 | return ret; |
| 293 | spin_lock(&inode->i_lock); |
| 294 | if (test_bit(1, bitlock)) { |
| 295 | spin_unlock(&inode->i_lock); |
| 296 | continue; |
| 297 | } |
| 298 | if (!time_before(jiffies, orangefs_inode->mapping_time)) |
| 299 | break; |
| 300 | spin_unlock(&inode->i_lock); |
| 301 | return 0; |
| 302 | } |
| 303 | |
| 304 | set_bit(1, bitlock); |
| 305 | smp_wmb(); |
| 306 | spin_unlock(&inode->i_lock); |
| 307 | |
| 308 | unmap_mapping_range(mapping, 0, 0, 0); |
| 309 | ret = filemap_write_and_wait(mapping); |
| 310 | if (!ret) |
| 311 | ret = invalidate_inode_pages2(mapping); |
| 312 | |
| 313 | orangefs_inode->mapping_time = jiffies + |
| 314 | orangefs_cache_timeout_msecs*HZ/1000; |
| 315 | |
| 316 | clear_bit(1, bitlock); |
| 317 | smp_mb__after_atomic(); |
| 318 | wake_up_bit(bitlock, 1); |
| 319 | |
| 320 | return ret; |
| 321 | } |
| 322 | |
Martin Brandenburg | c453dcf | 2018-02-16 20:51:24 +0000 | [diff] [blame] | 323 | static ssize_t orangefs_file_read_iter(struct kiocb *iocb, |
| 324 | struct iov_iter *iter) |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 325 | { |
Martin Brandenburg | 8f04e1b | 2019-02-12 20:19:06 +0000 | [diff] [blame] | 326 | int ret; |
Martin Brandenburg | 889d5f1 | 2016-08-15 15:33:42 -0400 | [diff] [blame] | 327 | orangefs_stats.reads++; |
Martin Brandenburg | 8f04e1b | 2019-02-12 20:19:06 +0000 | [diff] [blame] | 328 | |
| 329 | down_read(&file_inode(iocb->ki_filp)->i_rwsem); |
| 330 | ret = orangefs_revalidate_mapping(file_inode(iocb->ki_filp)); |
| 331 | if (ret) |
| 332 | goto out; |
| 333 | |
| 334 | ret = generic_file_read_iter(iocb, iter); |
| 335 | out: |
| 336 | up_read(&file_inode(iocb->ki_filp)->i_rwsem); |
| 337 | return ret; |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 338 | } |
| 339 | |
Martin Brandenburg | 85ac799 | 2018-02-22 18:10:43 +0000 | [diff] [blame] | 340 | static ssize_t orangefs_file_write_iter(struct kiocb *iocb, |
| 341 | struct iov_iter *iter) |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 342 | { |
Martin Brandenburg | 8f04e1b | 2019-02-12 20:19:06 +0000 | [diff] [blame] | 343 | int ret; |
Martin Brandenburg | 889d5f1 | 2016-08-15 15:33:42 -0400 | [diff] [blame] | 344 | orangefs_stats.writes++; |
Martin Brandenburg | 8f04e1b | 2019-02-12 20:19:06 +0000 | [diff] [blame] | 345 | |
| 346 | if (iocb->ki_pos > i_size_read(file_inode(iocb->ki_filp))) { |
| 347 | ret = orangefs_revalidate_mapping(file_inode(iocb->ki_filp)); |
| 348 | if (ret) |
| 349 | return ret; |
| 350 | } |
| 351 | |
| 352 | ret = generic_file_write_iter(iocb, iter); |
| 353 | return ret; |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 354 | } |
| 355 | |
Souptick Joarder | 8bf782f | 2018-06-29 00:12:40 +0530 | [diff] [blame] | 356 | static vm_fault_t orangefs_fault(struct vm_fault *vmf) |
Martin Brandenburg | a5135ee | 2018-04-03 16:27:12 +0000 | [diff] [blame] | 357 | { |
| 358 | struct file *file = vmf->vma->vm_file; |
Souptick Joarder | 8bf782f | 2018-06-29 00:12:40 +0530 | [diff] [blame] | 359 | int ret; |
Martin Brandenburg | 8b60785 | 2018-02-07 18:44:50 +0000 | [diff] [blame] | 360 | ret = orangefs_inode_getattr(file->f_mapping->host, |
| 361 | ORANGEFS_GETATTR_SIZE); |
Souptick Joarder | 8bf782f | 2018-06-29 00:12:40 +0530 | [diff] [blame] | 362 | if (ret == -ESTALE) |
| 363 | ret = -EIO; |
| 364 | if (ret) { |
Martin Brandenburg | 8b60785 | 2018-02-07 18:44:50 +0000 | [diff] [blame] | 365 | gossip_err("%s: orangefs_inode_getattr failed, " |
| 366 | "ret:%d:.\n", __func__, ret); |
Souptick Joarder | 8bf782f | 2018-06-29 00:12:40 +0530 | [diff] [blame] | 367 | return VM_FAULT_SIGBUS; |
Martin Brandenburg | a5135ee | 2018-04-03 16:27:12 +0000 | [diff] [blame] | 368 | } |
| 369 | return filemap_fault(vmf); |
| 370 | } |
| 371 | |
Colin Ian King | ec62e95a | 2018-04-05 11:50:10 +0100 | [diff] [blame] | 372 | static const struct vm_operations_struct orangefs_file_vm_ops = { |
Martin Brandenburg | a5135ee | 2018-04-03 16:27:12 +0000 | [diff] [blame] | 373 | .fault = orangefs_fault, |
| 374 | .map_pages = filemap_map_pages, |
Martin Brandenburg | 52e2d0a | 2018-12-14 15:24:43 -0500 | [diff] [blame] | 375 | .page_mkwrite = orangefs_page_mkwrite, |
Martin Brandenburg | a5135ee | 2018-04-03 16:27:12 +0000 | [diff] [blame] | 376 | }; |
| 377 | |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 378 | /* |
| 379 | * Memory map a region of a file. |
| 380 | */ |
Yi Liu | 8bb8aef | 2015-11-24 15:12:14 -0500 | [diff] [blame] | 381 | static int orangefs_file_mmap(struct file *file, struct vm_area_struct *vma) |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 382 | { |
Martin Brandenburg | 8f04e1b | 2019-02-12 20:19:06 +0000 | [diff] [blame] | 383 | int ret; |
| 384 | |
| 385 | ret = orangefs_revalidate_mapping(file_inode(file)); |
| 386 | if (ret) |
| 387 | return ret; |
| 388 | |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 389 | gossip_debug(GOSSIP_FILE_DEBUG, |
Al Viro | d675684 | 2021-01-06 21:59:52 -0500 | [diff] [blame] | 390 | "orangefs_file_mmap: called on %pD\n", file); |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 391 | |
| 392 | /* set the sequential readahead hint */ |
| 393 | vma->vm_flags |= VM_SEQ_READ; |
| 394 | vma->vm_flags &= ~VM_RAND_READ; |
Martin Brandenburg | 3539080 | 2015-09-30 13:11:54 -0400 | [diff] [blame] | 395 | |
Martin Brandenburg | a5135ee | 2018-04-03 16:27:12 +0000 | [diff] [blame] | 396 | file_accessed(file); |
| 397 | vma->vm_ops = &orangefs_file_vm_ops; |
| 398 | return 0; |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 399 | } |
| 400 | |
| 401 | #define mapping_nrpages(idata) ((idata)->nrpages) |
| 402 | |
| 403 | /* |
| 404 | * Called to notify the module that there are no more references to |
| 405 | * this file (i.e. no processes have it open). |
| 406 | * |
| 407 | * \note Not called when each file is closed. |
| 408 | */ |
Yi Liu | 8bb8aef | 2015-11-24 15:12:14 -0500 | [diff] [blame] | 409 | static int orangefs_file_release(struct inode *inode, struct file *file) |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 410 | { |
| 411 | gossip_debug(GOSSIP_FILE_DEBUG, |
Al Viro | f66debf | 2016-08-07 12:20:01 -0400 | [diff] [blame] | 412 | "orangefs_file_release: called on %pD\n", |
| 413 | file); |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 414 | |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 415 | /* |
Martin Brandenburg | 6eaff8c | 2016-08-02 14:31:05 -0400 | [diff] [blame] | 416 | * remove all associated inode pages from the page cache and |
Mike Marshall | 5480494 | 2015-10-05 13:44:24 -0400 | [diff] [blame] | 417 | * readahead cache (if any); this forces an expensive refresh of |
| 418 | * data for the next caller of mmap (or 'get_block' accesses) |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 419 | */ |
Amir Goldstein | d62a902 | 2016-10-21 07:33:57 +0300 | [diff] [blame] | 420 | if (file_inode(file) && |
| 421 | file_inode(file)->i_mapping && |
| 422 | mapping_nrpages(&file_inode(file)->i_data)) { |
Martin Brandenburg | c51e012 | 2016-08-12 16:12:09 -0400 | [diff] [blame] | 423 | if (orangefs_features & ORANGEFS_FEATURE_READAHEAD) { |
| 424 | gossip_debug(GOSSIP_INODE_DEBUG, |
| 425 | "calling flush_racache on %pU\n", |
| 426 | get_khandle_from_ino(inode)); |
| 427 | flush_racache(inode); |
| 428 | gossip_debug(GOSSIP_INODE_DEBUG, |
| 429 | "flush_racache finished\n"); |
| 430 | } |
Martin Brandenburg | c472ebc | 2018-12-14 17:04:21 -0500 | [diff] [blame] | 431 | |
Martin Brandenburg | ed1e158 | 2016-08-02 16:32:15 -0400 | [diff] [blame] | 432 | } |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 433 | return 0; |
| 434 | } |
| 435 | |
| 436 | /* |
| 437 | * Push all data for a specific file onto permanent storage. |
| 438 | */ |
Yi Liu | 8bb8aef | 2015-11-24 15:12:14 -0500 | [diff] [blame] | 439 | static int orangefs_fsync(struct file *file, |
Mike Marshall | 84d0215 | 2015-07-28 13:27:51 -0400 | [diff] [blame] | 440 | loff_t start, |
| 441 | loff_t end, |
| 442 | int datasync) |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 443 | { |
Jeff Layton | 49e5571 | 2017-04-12 08:06:02 -0400 | [diff] [blame] | 444 | int ret; |
Yi Liu | 8bb8aef | 2015-11-24 15:12:14 -0500 | [diff] [blame] | 445 | struct orangefs_inode_s *orangefs_inode = |
Amir Goldstein | d62a902 | 2016-10-21 07:33:57 +0300 | [diff] [blame] | 446 | ORANGEFS_I(file_inode(file)); |
Yi Liu | 8bb8aef | 2015-11-24 15:12:14 -0500 | [diff] [blame] | 447 | struct orangefs_kernel_op_s *new_op = NULL; |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 448 | |
Martin Brandenburg | 85ac799 | 2018-02-22 18:10:43 +0000 | [diff] [blame] | 449 | ret = filemap_write_and_wait_range(file_inode(file)->i_mapping, |
| 450 | start, end); |
| 451 | if (ret < 0) |
| 452 | return ret; |
| 453 | |
Yi Liu | 8bb8aef | 2015-11-24 15:12:14 -0500 | [diff] [blame] | 454 | new_op = op_alloc(ORANGEFS_VFS_OP_FSYNC); |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 455 | if (!new_op) |
| 456 | return -ENOMEM; |
Yi Liu | 8bb8aef | 2015-11-24 15:12:14 -0500 | [diff] [blame] | 457 | new_op->upcall.req.fsync.refn = orangefs_inode->refn; |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 458 | |
| 459 | ret = service_operation(new_op, |
Yi Liu | 8bb8aef | 2015-11-24 15:12:14 -0500 | [diff] [blame] | 460 | "orangefs_fsync", |
Amir Goldstein | d62a902 | 2016-10-21 07:33:57 +0300 | [diff] [blame] | 461 | get_interruptible_flag(file_inode(file))); |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 462 | |
| 463 | gossip_debug(GOSSIP_FILE_DEBUG, |
Yi Liu | 8bb8aef | 2015-11-24 15:12:14 -0500 | [diff] [blame] | 464 | "orangefs_fsync got return value of %d\n", |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 465 | ret); |
| 466 | |
| 467 | op_release(new_op); |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 468 | return ret; |
| 469 | } |
| 470 | |
| 471 | /* |
| 472 | * Change the file pointer position for an instance of an open file. |
| 473 | * |
| 474 | * \note If .llseek is overriden, we must acquire lock as described in |
Mauro Carvalho Chehab | ec23eb5 | 2019-07-26 09:51:27 -0300 | [diff] [blame] | 475 | * Documentation/filesystems/locking.rst. |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 476 | * |
| 477 | * Future upgrade could support SEEK_DATA and SEEK_HOLE but would |
| 478 | * require much changes to the FS |
| 479 | */ |
Yi Liu | 8bb8aef | 2015-11-24 15:12:14 -0500 | [diff] [blame] | 480 | static loff_t orangefs_file_llseek(struct file *file, loff_t offset, int origin) |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 481 | { |
| 482 | int ret = -EINVAL; |
Al Viro | 177f8fc | 2016-02-16 20:25:19 -0500 | [diff] [blame] | 483 | struct inode *inode = file_inode(file); |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 484 | |
Al Viro | 177f8fc | 2016-02-16 20:25:19 -0500 | [diff] [blame] | 485 | if (origin == SEEK_END) { |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 486 | /* |
| 487 | * revalidate the inode's file size. |
| 488 | * NOTE: We are only interested in file size here, |
| 489 | * so we set mask accordingly. |
| 490 | */ |
Martin Brandenburg | 8b60785 | 2018-02-07 18:44:50 +0000 | [diff] [blame] | 491 | ret = orangefs_inode_getattr(file->f_mapping->host, |
| 492 | ORANGEFS_GETATTR_SIZE); |
Martin Brandenburg | e2f7f0d | 2016-03-15 12:33:20 -0400 | [diff] [blame] | 493 | if (ret == -ESTALE) |
| 494 | ret = -EIO; |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 495 | if (ret) { |
| 496 | gossip_debug(GOSSIP_FILE_DEBUG, |
| 497 | "%s:%s:%d calling make bad inode\n", |
| 498 | __FILE__, |
| 499 | __func__, |
| 500 | __LINE__); |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 501 | return ret; |
| 502 | } |
| 503 | } |
| 504 | |
| 505 | gossip_debug(GOSSIP_FILE_DEBUG, |
Yi Liu | 8bb8aef | 2015-11-24 15:12:14 -0500 | [diff] [blame] | 506 | "orangefs_file_llseek: offset is %ld | origin is %d" |
Mike Marshall | 5480494 | 2015-10-05 13:44:24 -0400 | [diff] [blame] | 507 | " | inode size is %lu\n", |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 508 | (long)offset, |
| 509 | origin, |
Al Viro | 177f8fc | 2016-02-16 20:25:19 -0500 | [diff] [blame] | 510 | (unsigned long)i_size_read(inode)); |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 511 | |
| 512 | return generic_file_llseek(file, offset, origin); |
| 513 | } |
| 514 | |
| 515 | /* |
| 516 | * Support local locks (locks that only this kernel knows about) |
| 517 | * if Orangefs was mounted -o local_lock. |
| 518 | */ |
Yi Liu | 8bb8aef | 2015-11-24 15:12:14 -0500 | [diff] [blame] | 519 | static int orangefs_lock(struct file *filp, int cmd, struct file_lock *fl) |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 520 | { |
Mike Marshall | f957ae2 | 2015-09-24 12:53:05 -0400 | [diff] [blame] | 521 | int rc = -EINVAL; |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 522 | |
Al Viro | 4506309 | 2016-12-04 18:24:56 -0500 | [diff] [blame] | 523 | if (ORANGEFS_SB(file_inode(filp)->i_sb)->flags & ORANGEFS_OPT_LOCAL_LOCK) { |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 524 | if (cmd == F_GETLK) { |
| 525 | rc = 0; |
| 526 | posix_test_lock(filp, fl); |
| 527 | } else { |
| 528 | rc = posix_lock_file(filp, fl, NULL); |
| 529 | } |
| 530 | } |
| 531 | |
| 532 | return rc; |
| 533 | } |
| 534 | |
Martin Brandenburg | 85ac799 | 2018-02-22 18:10:43 +0000 | [diff] [blame] | 535 | static int orangefs_flush(struct file *file, fl_owner_t id) |
| 536 | { |
Martin Brandenburg | 90fc070 | 2018-03-26 18:58:11 +0000 | [diff] [blame] | 537 | /* |
| 538 | * This is vfs_fsync_range(file, 0, LLONG_MAX, 0) without the |
| 539 | * service_operation in orangefs_fsync. |
| 540 | * |
| 541 | * Do not send fsync to OrangeFS server on a close. Do send fsync |
| 542 | * on an explicit fsync call. This duplicates historical OrangeFS |
| 543 | * behavior. |
| 544 | */ |
Martin Brandenburg | 90fc070 | 2018-03-26 18:58:11 +0000 | [diff] [blame] | 545 | int r; |
| 546 | |
Martin Brandenburg | 90fc070 | 2018-03-26 18:58:11 +0000 | [diff] [blame] | 547 | r = filemap_write_and_wait_range(file->f_mapping, 0, LLONG_MAX); |
| 548 | if (r > 0) |
| 549 | return 0; |
| 550 | else |
| 551 | return r; |
Martin Brandenburg | 85ac799 | 2018-02-22 18:10:43 +0000 | [diff] [blame] | 552 | } |
| 553 | |
Yi Liu | 8bb8aef | 2015-11-24 15:12:14 -0500 | [diff] [blame] | 554 | /** ORANGEFS implementation of VFS file operations */ |
| 555 | const struct file_operations orangefs_file_operations = { |
| 556 | .llseek = orangefs_file_llseek, |
| 557 | .read_iter = orangefs_file_read_iter, |
| 558 | .write_iter = orangefs_file_write_iter, |
| 559 | .lock = orangefs_lock, |
Yi Liu | 8bb8aef | 2015-11-24 15:12:14 -0500 | [diff] [blame] | 560 | .mmap = orangefs_file_mmap, |
Mike Marshall | ec95f1d | 2020-04-08 08:52:40 -0400 | [diff] [blame] | 561 | .open = generic_file_open, |
Mike Marshall | c104882 | 2020-12-16 16:14:08 -0500 | [diff] [blame] | 562 | .splice_read = generic_file_splice_read, |
| 563 | .splice_write = iter_file_splice_write, |
Martin Brandenburg | 85ac799 | 2018-02-22 18:10:43 +0000 | [diff] [blame] | 564 | .flush = orangefs_flush, |
Yi Liu | 8bb8aef | 2015-11-24 15:12:14 -0500 | [diff] [blame] | 565 | .release = orangefs_file_release, |
| 566 | .fsync = orangefs_fsync, |
Mike Marshall | 5db11c2 | 2015-07-17 10:38:12 -0400 | [diff] [blame] | 567 | }; |