blob: 1cd37ebc4f25febcefa5baea7f9980353b273d3a [file] [log] [blame]
Mike Marshall5db11c22015-07-17 10:38:12 -04001/*
2 * (C) 2001 Clemson University and The University of Chicago
3 *
4 * See COPYING in top-level directory.
5 */
6
7/*
8 * Linux VFS file operations.
9 */
10
11#include "protocol.h"
Mike Marshall575e9462015-12-04 12:56:14 -050012#include "orangefs-kernel.h"
13#include "orangefs-bufmap.h"
Mike Marshall5db11c22015-07-17 10:38:12 -040014#include <linux/fs.h>
15#include <linux/pagemap.h>
16
Martin Brandenburged1e1582016-08-02 16:32:15 -040017static int flush_racache(struct inode *inode)
18{
19 struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
20 struct orangefs_kernel_op_s *new_op;
21 int ret;
22
23 gossip_debug(GOSSIP_UTILS_DEBUG,
24 "%s: %pU: Handle is %pU | fs_id %d\n", __func__,
25 get_khandle_from_ino(inode), &orangefs_inode->refn.khandle,
26 orangefs_inode->refn.fs_id);
27
28 new_op = op_alloc(ORANGEFS_VFS_OP_RA_FLUSH);
29 if (!new_op)
30 return -ENOMEM;
31 new_op->upcall.req.ra_cache_flush.refn = orangefs_inode->refn;
32
33 ret = service_operation(new_op, "orangefs_flush_racache",
34 get_interruptible_flag(inode));
35
36 gossip_debug(GOSSIP_UTILS_DEBUG, "%s: got return value of %d\n",
37 __func__, ret);
38
39 op_release(new_op);
40 return ret;
41}
42
Mike Marshall5db11c22015-07-17 10:38:12 -040043/*
44 * Copy to client-core's address space from the buffers specified
45 * by the iovec upto total_size bytes.
46 * NOTE: the iovector can either contain addresses which
47 * can futher be kernel-space or user-space addresses.
48 * or it can pointers to struct page's
49 */
Al Virobf6bf602016-02-16 20:06:19 -050050static int precopy_buffers(int buffer_index,
Al Viroa5c126a2015-10-08 17:54:31 -040051 struct iov_iter *iter,
Mike Marshall4d1c4402015-09-04 10:31:16 -040052 size_t total_size)
Mike Marshall5db11c22015-07-17 10:38:12 -040053{
54 int ret = 0;
Mike Marshall5db11c22015-07-17 10:38:12 -040055 /*
56 * copy data from application/kernel by pulling it out
57 * of the iovec.
58 */
Mike Marshall4d1c4402015-09-04 10:31:16 -040059
60
61 if (total_size) {
Al Virobf6bf602016-02-16 20:06:19 -050062 ret = orangefs_bufmap_copy_from_iovec(iter,
Yi Liu8bb8aef2015-11-24 15:12:14 -050063 buffer_index,
64 total_size);
Mike Marshall4d1c4402015-09-04 10:31:16 -040065 if (ret < 0)
66 gossip_err("%s: Failed to copy-in buffers. Please make sure that the pvfs2-client is running. %ld\n",
67 __func__,
68 (long)ret);
Mike Marshall4d1c4402015-09-04 10:31:16 -040069 }
70
Mike Marshall5db11c22015-07-17 10:38:12 -040071 if (ret < 0)
72 gossip_err("%s: Failed to copy-in buffers. Please make sure that the pvfs2-client is running. %ld\n",
73 __func__,
74 (long)ret);
75 return ret;
76}
77
78/*
79 * Copy from client-core's address space to the buffers specified
80 * by the iovec upto total_size bytes.
81 * NOTE: the iovector can either contain addresses which
82 * can futher be kernel-space or user-space addresses.
83 * or it can pointers to struct page's
84 */
Al Virobf6bf602016-02-16 20:06:19 -050085static int postcopy_buffers(int buffer_index,
Al Viro5f0e3c92015-10-08 17:52:44 -040086 struct iov_iter *iter,
Mike Marshall4d1c4402015-09-04 10:31:16 -040087 size_t total_size)
Mike Marshall5db11c22015-07-17 10:38:12 -040088{
89 int ret = 0;
Mike Marshall5db11c22015-07-17 10:38:12 -040090 /*
91 * copy data to application/kernel by pushing it out to
92 * the iovec. NOTE; target buffers can be addresses or
93 * struct page pointers.
94 */
95 if (total_size) {
Al Virobf6bf602016-02-16 20:06:19 -050096 ret = orangefs_bufmap_copy_to_iovec(iter,
Yi Liu8bb8aef2015-11-24 15:12:14 -050097 buffer_index,
98 total_size);
Mike Marshall5db11c22015-07-17 10:38:12 -040099 if (ret < 0)
Mike Marshall4d1c4402015-09-04 10:31:16 -0400100 gossip_err("%s: Failed to copy-out buffers. Please make sure that the pvfs2-client is running (%ld)\n",
Mike Marshall5db11c22015-07-17 10:38:12 -0400101 __func__,
102 (long)ret);
103 }
104 return ret;
105}
106
107/*
108 * Post and wait for the I/O upcall to finish
109 */
Yi Liu8bb8aef2015-11-24 15:12:14 -0500110static ssize_t wait_for_direct_io(enum ORANGEFS_io_type type, struct inode *inode,
Al Viro3c2fcfc2015-10-08 18:00:26 -0400111 loff_t *offset, struct iov_iter *iter,
Mike Marshall4d1c4402015-09-04 10:31:16 -0400112 size_t total_size, loff_t readahead_size)
Mike Marshall5db11c22015-07-17 10:38:12 -0400113{
Yi Liu8bb8aef2015-11-24 15:12:14 -0500114 struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
115 struct orangefs_khandle *handle = &orangefs_inode->refn.khandle;
Yi Liu8bb8aef2015-11-24 15:12:14 -0500116 struct orangefs_kernel_op_s *new_op = NULL;
Mike Marshall5db11c22015-07-17 10:38:12 -0400117 int buffer_index = -1;
118 ssize_t ret;
119
Yi Liu8bb8aef2015-11-24 15:12:14 -0500120 new_op = op_alloc(ORANGEFS_VFS_OP_FILE_IO);
Al Viroed42fe02016-01-22 19:47:47 -0500121 if (!new_op)
122 return -ENOMEM;
123
Mike Marshall5db11c22015-07-17 10:38:12 -0400124 /* synchronous I/O */
Mike Marshall5db11c22015-07-17 10:38:12 -0400125 new_op->upcall.req.io.readahead_size = readahead_size;
126 new_op->upcall.req.io.io_type = type;
Yi Liu8bb8aef2015-11-24 15:12:14 -0500127 new_op->upcall.req.io.refn = orangefs_inode->refn;
Mike Marshall5db11c22015-07-17 10:38:12 -0400128
129populate_shared_memory:
130 /* get a shared buffer index */
Al Virob8a99a82016-02-16 20:10:26 -0500131 buffer_index = orangefs_bufmap_get();
132 if (buffer_index < 0) {
133 ret = buffer_index;
Mike Marshall5db11c22015-07-17 10:38:12 -0400134 gossip_debug(GOSSIP_FILE_DEBUG,
Al Virob8a99a82016-02-16 20:10:26 -0500135 "%s: orangefs_bufmap_get failure (%zd)\n",
136 __func__, ret);
Mike Marshall5db11c22015-07-17 10:38:12 -0400137 goto out;
138 }
139 gossip_debug(GOSSIP_FILE_DEBUG,
140 "%s(%pU): GET op %p -> buffer_index %d\n",
141 __func__,
142 handle,
143 new_op,
144 buffer_index);
145
146 new_op->uses_shared_memory = 1;
147 new_op->upcall.req.io.buf_index = buffer_index;
148 new_op->upcall.req.io.count = total_size;
149 new_op->upcall.req.io.offset = *offset;
150
151 gossip_debug(GOSSIP_FILE_DEBUG,
Al Viro3c2fcfc2015-10-08 18:00:26 -0400152 "%s(%pU): offset: %llu total_size: %zd\n",
Mike Marshall5db11c22015-07-17 10:38:12 -0400153 __func__,
154 handle,
Mike Marshall5db11c22015-07-17 10:38:12 -0400155 llu(*offset),
156 total_size);
157 /*
158 * Stage 1: copy the buffers into client-core's address space
159 * precopy_buffers only pertains to writes.
160 */
Yi Liu8bb8aef2015-11-24 15:12:14 -0500161 if (type == ORANGEFS_IO_WRITE) {
Al Virobf6bf602016-02-16 20:06:19 -0500162 ret = precopy_buffers(buffer_index,
Al Viro3c2fcfc2015-10-08 18:00:26 -0400163 iter,
Mike Marshall4d1c4402015-09-04 10:31:16 -0400164 total_size);
Mike Marshall5db11c22015-07-17 10:38:12 -0400165 if (ret < 0)
166 goto out;
167 }
168
169 gossip_debug(GOSSIP_FILE_DEBUG,
170 "%s(%pU): Calling post_io_request with tag (%llu)\n",
171 __func__,
172 handle,
173 llu(new_op->tag));
174
175 /* Stage 2: Service the I/O operation */
176 ret = service_operation(new_op,
Yi Liu8bb8aef2015-11-24 15:12:14 -0500177 type == ORANGEFS_IO_WRITE ?
Mike Marshall5db11c22015-07-17 10:38:12 -0400178 "file_write" :
179 "file_read",
180 get_interruptible_flag(inode));
181
182 /*
183 * If service_operation() returns -EAGAIN #and# the operation was
Yi Liu8bb8aef2015-11-24 15:12:14 -0500184 * purged from orangefs_request_list or htable_ops_in_progress, then
Mike Marshall5db11c22015-07-17 10:38:12 -0400185 * we know that the client was restarted, causing the shared memory
186 * area to be wiped clean. To restart a write operation in this
187 * case, we must re-copy the data from the user's iovec to a NEW
188 * shared memory location. To restart a read operation, we must get
189 * a new shared memory location.
190 */
191 if (ret == -EAGAIN && op_state_purged(new_op)) {
Al Viro1357d062016-02-11 21:34:52 -0500192 orangefs_bufmap_put(buffer_index);
Al Viroe17be9f2016-02-06 14:59:38 -0500193 buffer_index = -1;
Al Viro7b9761a2016-02-07 01:25:06 -0500194 if (type == ORANGEFS_IO_WRITE)
Al Viroc63ed802017-04-13 03:12:24 -0400195 iov_iter_revert(iter, total_size);
Mike Marshall5db11c22015-07-17 10:38:12 -0400196 gossip_debug(GOSSIP_FILE_DEBUG,
197 "%s:going to repopulate_shared_memory.\n",
198 __func__);
199 goto populate_shared_memory;
200 }
201
202 if (ret < 0) {
Mike Marshall162ada72016-03-09 13:12:37 -0500203 if (ret == -EINTR) {
204 /*
205 * We can't return EINTR if any data was written,
206 * it's not POSIX. It is minimally acceptable
207 * to give a partial write, the way NFS does.
208 *
209 * It would be optimal to return all or nothing,
210 * but if a userspace write is bigger than
211 * an IO buffer, and the interrupt occurs
212 * between buffer writes, that would not be
213 * possible.
214 */
215 switch (new_op->op_state - OP_VFS_STATE_GIVEN_UP) {
216 /*
217 * If the op was waiting when the interrupt
218 * occurred, then the client-core did not
219 * trigger the write.
220 */
221 case OP_VFS_STATE_WAITING:
222 if (*offset == 0)
223 ret = -EINTR;
224 else
225 ret = 0;
226 break;
227 /*
228 * If the op was in progress when the interrupt
229 * occurred, then the client-core was able to
230 * trigger the write.
231 */
232 case OP_VFS_STATE_INPROGR:
233 ret = total_size;
234 break;
235 default:
236 gossip_err("%s: unexpected op state :%d:.\n",
237 __func__,
238 new_op->op_state);
239 ret = 0;
240 break;
241 }
Mike Marshall5db11c22015-07-17 10:38:12 -0400242 gossip_debug(GOSSIP_FILE_DEBUG,
Mike Marshall162ada72016-03-09 13:12:37 -0500243 "%s: got EINTR, state:%d: %p\n",
244 __func__,
245 new_op->op_state,
246 new_op);
247 } else {
Mike Marshall5db11c22015-07-17 10:38:12 -0400248 gossip_err("%s: error in %s handle %pU, returning %zd\n",
249 __func__,
Yi Liu8bb8aef2015-11-24 15:12:14 -0500250 type == ORANGEFS_IO_READ ?
Mike Marshall5db11c22015-07-17 10:38:12 -0400251 "read from" : "write to",
252 handle, ret);
Mike Marshall162ada72016-03-09 13:12:37 -0500253 }
Al Viro78699e22016-02-11 23:07:19 -0500254 if (orangefs_cancel_op_in_progress(new_op))
255 return ret;
256
Al Viro897c5df2016-02-13 21:06:50 -0500257 goto out;
Mike Marshall5db11c22015-07-17 10:38:12 -0400258 }
259
260 /*
261 * Stage 3: Post copy buffers from client-core's address space
262 * postcopy_buffers only pertains to reads.
263 */
Yi Liu8bb8aef2015-11-24 15:12:14 -0500264 if (type == ORANGEFS_IO_READ) {
Al Virobf6bf602016-02-16 20:06:19 -0500265 ret = postcopy_buffers(buffer_index,
Al Viro3c2fcfc2015-10-08 18:00:26 -0400266 iter,
Mike Marshall4d1c4402015-09-04 10:31:16 -0400267 new_op->downcall.resp.io.amt_complete);
Al Viroc0eae8c2016-02-11 21:28:52 -0500268 if (ret < 0)
Al Viro897c5df2016-02-13 21:06:50 -0500269 goto out;
Mike Marshall5db11c22015-07-17 10:38:12 -0400270 }
271 gossip_debug(GOSSIP_FILE_DEBUG,
Mike Marshall9d9e7ba2016-03-03 13:46:48 -0500272 "%s(%pU): Amount %s, returned by the sys-io call:%d\n",
Mike Marshall5db11c22015-07-17 10:38:12 -0400273 __func__,
274 handle,
Mike Marshall9d9e7ba2016-03-03 13:46:48 -0500275 type == ORANGEFS_IO_READ ? "read" : "written",
Mike Marshall5db11c22015-07-17 10:38:12 -0400276 (int)new_op->downcall.resp.io.amt_complete);
277
278 ret = new_op->downcall.resp.io.amt_complete;
279
Mike Marshall5db11c22015-07-17 10:38:12 -0400280out:
281 if (buffer_index >= 0) {
Al Viro1357d062016-02-11 21:34:52 -0500282 orangefs_bufmap_put(buffer_index);
Mike Marshall5db11c22015-07-17 10:38:12 -0400283 gossip_debug(GOSSIP_FILE_DEBUG,
284 "%s(%pU): PUT buffer_index %d\n",
285 __func__, handle, buffer_index);
286 buffer_index = -1;
287 }
Al Viroed42fe02016-01-22 19:47:47 -0500288 op_release(new_op);
Mike Marshall5db11c22015-07-17 10:38:12 -0400289 return ret;
290}
291
292/*
Mike Marshall5db11c22015-07-17 10:38:12 -0400293 * Common entry point for read/write/readv/writev
294 * This function will dispatch it to either the direct I/O
295 * or buffered I/O path depending on the mount options and/or
296 * augmented/extended metadata attached to the file.
297 * Note: File extended attributes override any mount options.
298 */
Yi Liu8bb8aef2015-11-24 15:12:14 -0500299static ssize_t do_readv_writev(enum ORANGEFS_io_type type, struct file *file,
Al Viro0071ed12015-10-08 18:22:08 -0400300 loff_t *offset, struct iov_iter *iter)
Mike Marshall5db11c22015-07-17 10:38:12 -0400301{
302 struct inode *inode = file->f_mapping->host;
Yi Liu8bb8aef2015-11-24 15:12:14 -0500303 struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
304 struct orangefs_khandle *handle = &orangefs_inode->refn.khandle;
Al Viro0071ed12015-10-08 18:22:08 -0400305 size_t count = iov_iter_count(iter);
Al Virodc4067f2015-10-08 18:17:26 -0400306 ssize_t total_count = 0;
307 ssize_t ret = -EINVAL;
Mike Marshall5db11c22015-07-17 10:38:12 -0400308
309 gossip_debug(GOSSIP_FILE_DEBUG,
310 "%s-BEGIN(%pU): count(%d) after estimate_max_iovecs.\n",
311 __func__,
312 handle,
313 (int)count);
314
Yi Liu8bb8aef2015-11-24 15:12:14 -0500315 if (type == ORANGEFS_IO_WRITE) {
Mike Marshall5db11c22015-07-17 10:38:12 -0400316 gossip_debug(GOSSIP_FILE_DEBUG,
317 "%s(%pU): proceeding with offset : %llu, "
318 "size %d\n",
319 __func__,
320 handle,
321 llu(*offset),
322 (int)count);
323 }
324
325 if (count == 0) {
326 ret = 0;
327 goto out;
328 }
329
Al Viro0071ed12015-10-08 18:22:08 -0400330 while (iov_iter_count(iter)) {
331 size_t each_count = iov_iter_count(iter);
Mike Marshall5db11c22015-07-17 10:38:12 -0400332 size_t amt_complete;
333
334 /* how much to transfer in this loop iteration */
Yi Liu8bb8aef2015-11-24 15:12:14 -0500335 if (each_count > orangefs_bufmap_size_query())
336 each_count = orangefs_bufmap_size_query();
Mike Marshall5db11c22015-07-17 10:38:12 -0400337
338 gossip_debug(GOSSIP_FILE_DEBUG,
339 "%s(%pU): size of each_count(%d)\n",
340 __func__,
341 handle,
342 (int)each_count);
343 gossip_debug(GOSSIP_FILE_DEBUG,
344 "%s(%pU): BEFORE wait_for_io: offset is %d\n",
345 __func__,
346 handle,
347 (int)*offset);
348
Al Viro0071ed12015-10-08 18:22:08 -0400349 ret = wait_for_direct_io(type, inode, offset, iter,
Al Viro3c2fcfc2015-10-08 18:00:26 -0400350 each_count, 0);
Mike Marshall5db11c22015-07-17 10:38:12 -0400351 gossip_debug(GOSSIP_FILE_DEBUG,
352 "%s(%pU): return from wait_for_io:%d\n",
353 __func__,
354 handle,
355 (int)ret);
356
357 if (ret < 0)
358 goto out;
359
Mike Marshall5db11c22015-07-17 10:38:12 -0400360 *offset += ret;
361 total_count += ret;
362 amt_complete = ret;
363
364 gossip_debug(GOSSIP_FILE_DEBUG,
365 "%s(%pU): AFTER wait_for_io: offset is %d\n",
366 __func__,
367 handle,
368 (int)*offset);
369
370 /*
371 * if we got a short I/O operations,
372 * fall out and return what we got so far
373 */
374 if (amt_complete < each_count)
375 break;
376 } /*end while */
377
Al Viro6d4c1a32016-02-16 20:15:43 -0500378out:
Mike Marshall5db11c22015-07-17 10:38:12 -0400379 if (total_count > 0)
380 ret = total_count;
Mike Marshall5db11c22015-07-17 10:38:12 -0400381 if (ret > 0) {
Yi Liu8bb8aef2015-11-24 15:12:14 -0500382 if (type == ORANGEFS_IO_READ) {
Mike Marshall5db11c22015-07-17 10:38:12 -0400383 file_accessed(file);
384 } else {
Yi Liu8bb8aef2015-11-24 15:12:14 -0500385 SetMtimeFlag(orangefs_inode);
Deepa Dinamani078cd822016-09-14 07:48:04 -0700386 inode->i_mtime = current_time(inode);
Mike Marshall5db11c22015-07-17 10:38:12 -0400387 mark_inode_dirty_sync(inode);
388 }
389 }
390
391 gossip_debug(GOSSIP_FILE_DEBUG,
392 "%s(%pU): Value(%d) returned.\n",
393 __func__,
394 handle,
395 (int)ret);
396
397 return ret;
398}
399
400/*
401 * Read data from a specified offset in a file (referenced by inode).
402 * Data may be placed either in a user or kernel buffer.
403 */
Yi Liu8bb8aef2015-11-24 15:12:14 -0500404ssize_t orangefs_inode_read(struct inode *inode,
405 struct iov_iter *iter,
406 loff_t *offset,
407 loff_t readahead_size)
Mike Marshall5db11c22015-07-17 10:38:12 -0400408{
Yi Liu8bb8aef2015-11-24 15:12:14 -0500409 struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
Al Viro74f68fc2015-10-08 18:31:05 -0400410 size_t count = iov_iter_count(iter);
Mike Marshall5db11c22015-07-17 10:38:12 -0400411 size_t bufmap_size;
Mike Marshall5db11c22015-07-17 10:38:12 -0400412 ssize_t ret = -EINVAL;
413
Martin Brandenburg889d5f12016-08-15 15:33:42 -0400414 orangefs_stats.reads++;
Mike Marshall5db11c22015-07-17 10:38:12 -0400415
Yi Liu8bb8aef2015-11-24 15:12:14 -0500416 bufmap_size = orangefs_bufmap_size_query();
Mike Marshall5db11c22015-07-17 10:38:12 -0400417 if (count > bufmap_size) {
418 gossip_debug(GOSSIP_FILE_DEBUG,
419 "%s: count is too large (%zd/%zd)!\n",
420 __func__, count, bufmap_size);
421 return -EINVAL;
422 }
423
424 gossip_debug(GOSSIP_FILE_DEBUG,
425 "%s(%pU) %zd@%llu\n",
426 __func__,
Yi Liu8bb8aef2015-11-24 15:12:14 -0500427 &orangefs_inode->refn.khandle,
Mike Marshall5db11c22015-07-17 10:38:12 -0400428 count,
429 llu(*offset));
430
Yi Liu8bb8aef2015-11-24 15:12:14 -0500431 ret = wait_for_direct_io(ORANGEFS_IO_READ, inode, offset, iter,
Mike Marshall4d1c4402015-09-04 10:31:16 -0400432 count, readahead_size);
Mike Marshall5db11c22015-07-17 10:38:12 -0400433 if (ret > 0)
434 *offset += ret;
435
436 gossip_debug(GOSSIP_FILE_DEBUG,
437 "%s(%pU): Value(%zd) returned.\n",
438 __func__,
Yi Liu8bb8aef2015-11-24 15:12:14 -0500439 &orangefs_inode->refn.khandle,
Mike Marshall5db11c22015-07-17 10:38:12 -0400440 ret);
441
442 return ret;
443}
444
Yi Liu8bb8aef2015-11-24 15:12:14 -0500445static ssize_t orangefs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
Mike Marshall5db11c22015-07-17 10:38:12 -0400446{
447 struct file *file = iocb->ki_filp;
448 loff_t pos = *(&iocb->ki_pos);
449 ssize_t rc = 0;
Mike Marshall5db11c22015-07-17 10:38:12 -0400450
451 BUG_ON(iocb->private);
452
Yi Liu8bb8aef2015-11-24 15:12:14 -0500453 gossip_debug(GOSSIP_FILE_DEBUG, "orangefs_file_read_iter\n");
Mike Marshall5db11c22015-07-17 10:38:12 -0400454
Martin Brandenburg889d5f12016-08-15 15:33:42 -0400455 orangefs_stats.reads++;
Mike Marshall5db11c22015-07-17 10:38:12 -0400456
Yi Liu8bb8aef2015-11-24 15:12:14 -0500457 rc = do_readv_writev(ORANGEFS_IO_READ, file, &pos, iter);
Mike Marshall5db11c22015-07-17 10:38:12 -0400458 iocb->ki_pos = pos;
459
460 return rc;
461}
462
Yi Liu8bb8aef2015-11-24 15:12:14 -0500463static ssize_t orangefs_file_write_iter(struct kiocb *iocb, struct iov_iter *iter)
Mike Marshall5db11c22015-07-17 10:38:12 -0400464{
465 struct file *file = iocb->ki_filp;
Mike Marshall3f1b6942015-11-13 13:05:11 -0500466 loff_t pos;
Mike Marshall5db11c22015-07-17 10:38:12 -0400467 ssize_t rc;
468
469 BUG_ON(iocb->private);
470
Yi Liu8bb8aef2015-11-24 15:12:14 -0500471 gossip_debug(GOSSIP_FILE_DEBUG, "orangefs_file_write_iter\n");
Mike Marshall5db11c22015-07-17 10:38:12 -0400472
Al Viro5ecfcb22016-04-12 00:43:20 -0400473 inode_lock(file->f_mapping->host);
Mike Marshall5db11c22015-07-17 10:38:12 -0400474
475 /* Make sure generic_write_checks sees an up to date inode size. */
476 if (file->f_flags & O_APPEND) {
Martin Brandenburge2f7f0d2016-03-15 12:33:20 -0400477 rc = orangefs_inode_getattr(file->f_mapping->host, 0, 1);
478 if (rc == -ESTALE)
479 rc = -EIO;
Mike Marshall5db11c22015-07-17 10:38:12 -0400480 if (rc) {
Martin Brandenburge2f7f0d2016-03-15 12:33:20 -0400481 gossip_err("%s: orangefs_inode_getattr failed, "
482 "rc:%zd:.\n", __func__, rc);
Mike Marshall5db11c22015-07-17 10:38:12 -0400483 goto out;
484 }
485 }
486
487 if (file->f_pos > i_size_read(file->f_mapping->host))
Yi Liu8bb8aef2015-11-24 15:12:14 -0500488 orangefs_i_size_write(file->f_mapping->host, file->f_pos);
Mike Marshall5db11c22015-07-17 10:38:12 -0400489
490 rc = generic_write_checks(iocb, iter);
491
492 if (rc <= 0) {
493 gossip_err("%s: generic_write_checks failed, rc:%zd:.\n",
494 __func__, rc);
495 goto out;
496 }
497
Mike Marshall3f1b6942015-11-13 13:05:11 -0500498 /*
499 * if we are appending, generic_write_checks would have updated
500 * pos to the end of the file, so we will wait till now to set
501 * pos...
502 */
503 pos = *(&iocb->ki_pos);
504
Yi Liu8bb8aef2015-11-24 15:12:14 -0500505 rc = do_readv_writev(ORANGEFS_IO_WRITE,
Mike Marshall5db11c22015-07-17 10:38:12 -0400506 file,
507 &pos,
Al Viro0071ed12015-10-08 18:22:08 -0400508 iter);
Mike Marshall5db11c22015-07-17 10:38:12 -0400509 if (rc < 0) {
510 gossip_err("%s: do_readv_writev failed, rc:%zd:.\n",
511 __func__, rc);
512 goto out;
513 }
514
515 iocb->ki_pos = pos;
Martin Brandenburg889d5f12016-08-15 15:33:42 -0400516 orangefs_stats.writes++;
Mike Marshall5db11c22015-07-17 10:38:12 -0400517
518out:
519
Al Viro5ecfcb22016-04-12 00:43:20 -0400520 inode_unlock(file->f_mapping->host);
Mike Marshall5db11c22015-07-17 10:38:12 -0400521 return rc;
522}
523
524/*
525 * Perform a miscellaneous operation on a file.
526 */
Yi Liu8bb8aef2015-11-24 15:12:14 -0500527static long orangefs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
Mike Marshall5db11c22015-07-17 10:38:12 -0400528{
529 int ret = -ENOTTY;
530 __u64 val = 0;
531 unsigned long uval;
532
533 gossip_debug(GOSSIP_FILE_DEBUG,
Yi Liu8bb8aef2015-11-24 15:12:14 -0500534 "orangefs_ioctl: called with cmd %d\n",
Mike Marshall5db11c22015-07-17 10:38:12 -0400535 cmd);
536
537 /*
538 * we understand some general ioctls on files, such as the immutable
539 * and append flags
540 */
541 if (cmd == FS_IOC_GETFLAGS) {
542 val = 0;
Yi Liu8bb8aef2015-11-24 15:12:14 -0500543 ret = orangefs_inode_getxattr(file_inode(file),
Yi Liu8bb8aef2015-11-24 15:12:14 -0500544 "user.pvfs2.meta_hint",
545 &val, sizeof(val));
Mike Marshall5db11c22015-07-17 10:38:12 -0400546 if (ret < 0 && ret != -ENODATA)
547 return ret;
548 else if (ret == -ENODATA)
549 val = 0;
550 uval = val;
551 gossip_debug(GOSSIP_FILE_DEBUG,
Yi Liu8bb8aef2015-11-24 15:12:14 -0500552 "orangefs_ioctl: FS_IOC_GETFLAGS: %llu\n",
Mike Marshall5db11c22015-07-17 10:38:12 -0400553 (unsigned long long)uval);
554 return put_user(uval, (int __user *)arg);
555 } else if (cmd == FS_IOC_SETFLAGS) {
556 ret = 0;
557 if (get_user(uval, (int __user *)arg))
558 return -EFAULT;
559 /*
Yi Liu8bb8aef2015-11-24 15:12:14 -0500560 * ORANGEFS_MIRROR_FL is set internally when the mirroring mode
Mike Marshall5db11c22015-07-17 10:38:12 -0400561 * is turned on for a file. The user is not allowed to turn
562 * on this bit, but the bit is present if the user first gets
563 * the flags and then updates the flags with some new
564 * settings. So, we ignore it in the following edit. bligon.
565 */
Yi Liu8bb8aef2015-11-24 15:12:14 -0500566 if ((uval & ~ORANGEFS_MIRROR_FL) &
Mike Marshall5db11c22015-07-17 10:38:12 -0400567 (~(FS_IMMUTABLE_FL | FS_APPEND_FL | FS_NOATIME_FL))) {
Yi Liu8bb8aef2015-11-24 15:12:14 -0500568 gossip_err("orangefs_ioctl: the FS_IOC_SETFLAGS only supports setting one of FS_IMMUTABLE_FL|FS_APPEND_FL|FS_NOATIME_FL\n");
Mike Marshall5db11c22015-07-17 10:38:12 -0400569 return -EINVAL;
570 }
571 val = uval;
572 gossip_debug(GOSSIP_FILE_DEBUG,
Yi Liu8bb8aef2015-11-24 15:12:14 -0500573 "orangefs_ioctl: FS_IOC_SETFLAGS: %llu\n",
Mike Marshall5db11c22015-07-17 10:38:12 -0400574 (unsigned long long)val);
Yi Liu8bb8aef2015-11-24 15:12:14 -0500575 ret = orangefs_inode_setxattr(file_inode(file),
Yi Liu8bb8aef2015-11-24 15:12:14 -0500576 "user.pvfs2.meta_hint",
577 &val, sizeof(val), 0);
Mike Marshall5db11c22015-07-17 10:38:12 -0400578 }
579
580 return ret;
581}
582
583/*
584 * Memory map a region of a file.
585 */
Yi Liu8bb8aef2015-11-24 15:12:14 -0500586static int orangefs_file_mmap(struct file *file, struct vm_area_struct *vma)
Mike Marshall5db11c22015-07-17 10:38:12 -0400587{
588 gossip_debug(GOSSIP_FILE_DEBUG,
Yi Liu8bb8aef2015-11-24 15:12:14 -0500589 "orangefs_file_mmap: called on %s\n",
Mike Marshall5db11c22015-07-17 10:38:12 -0400590 (file ?
591 (char *)file->f_path.dentry->d_name.name :
592 (char *)"Unknown"));
593
594 /* set the sequential readahead hint */
595 vma->vm_flags |= VM_SEQ_READ;
596 vma->vm_flags &= ~VM_RAND_READ;
Martin Brandenburg35390802015-09-30 13:11:54 -0400597
598 /* Use readonly mmap since we cannot support writable maps. */
599 return generic_file_readonly_mmap(file, vma);
Mike Marshall5db11c22015-07-17 10:38:12 -0400600}
601
602#define mapping_nrpages(idata) ((idata)->nrpages)
603
604/*
605 * Called to notify the module that there are no more references to
606 * this file (i.e. no processes have it open).
607 *
608 * \note Not called when each file is closed.
609 */
Yi Liu8bb8aef2015-11-24 15:12:14 -0500610static int orangefs_file_release(struct inode *inode, struct file *file)
Mike Marshall5db11c22015-07-17 10:38:12 -0400611{
612 gossip_debug(GOSSIP_FILE_DEBUG,
Al Virof66debf2016-08-07 12:20:01 -0400613 "orangefs_file_release: called on %pD\n",
614 file);
Mike Marshall5db11c22015-07-17 10:38:12 -0400615
Yi Liu8bb8aef2015-11-24 15:12:14 -0500616 orangefs_flush_inode(inode);
Mike Marshall5db11c22015-07-17 10:38:12 -0400617
618 /*
Martin Brandenburg6eaff8c2016-08-02 14:31:05 -0400619 * remove all associated inode pages from the page cache and
Mike Marshall54804942015-10-05 13:44:24 -0400620 * readahead cache (if any); this forces an expensive refresh of
621 * data for the next caller of mmap (or 'get_block' accesses)
Mike Marshall5db11c22015-07-17 10:38:12 -0400622 */
Amir Goldsteind62a9022016-10-21 07:33:57 +0300623 if (file_inode(file) &&
624 file_inode(file)->i_mapping &&
625 mapping_nrpages(&file_inode(file)->i_data)) {
Martin Brandenburgc51e0122016-08-12 16:12:09 -0400626 if (orangefs_features & ORANGEFS_FEATURE_READAHEAD) {
627 gossip_debug(GOSSIP_INODE_DEBUG,
628 "calling flush_racache on %pU\n",
629 get_khandle_from_ino(inode));
630 flush_racache(inode);
631 gossip_debug(GOSSIP_INODE_DEBUG,
632 "flush_racache finished\n");
633 }
Amir Goldsteind62a9022016-10-21 07:33:57 +0300634 truncate_inode_pages(file_inode(file)->i_mapping,
Mike Marshall5db11c22015-07-17 10:38:12 -0400635 0);
Martin Brandenburged1e1582016-08-02 16:32:15 -0400636 }
Mike Marshall5db11c22015-07-17 10:38:12 -0400637 return 0;
638}
639
640/*
641 * Push all data for a specific file onto permanent storage.
642 */
Yi Liu8bb8aef2015-11-24 15:12:14 -0500643static int orangefs_fsync(struct file *file,
Mike Marshall84d02152015-07-28 13:27:51 -0400644 loff_t start,
645 loff_t end,
646 int datasync)
Mike Marshall5db11c22015-07-17 10:38:12 -0400647{
648 int ret = -EINVAL;
Yi Liu8bb8aef2015-11-24 15:12:14 -0500649 struct orangefs_inode_s *orangefs_inode =
Amir Goldsteind62a9022016-10-21 07:33:57 +0300650 ORANGEFS_I(file_inode(file));
Yi Liu8bb8aef2015-11-24 15:12:14 -0500651 struct orangefs_kernel_op_s *new_op = NULL;
Mike Marshall5db11c22015-07-17 10:38:12 -0400652
653 /* required call */
654 filemap_write_and_wait_range(file->f_mapping, start, end);
655
Yi Liu8bb8aef2015-11-24 15:12:14 -0500656 new_op = op_alloc(ORANGEFS_VFS_OP_FSYNC);
Mike Marshall5db11c22015-07-17 10:38:12 -0400657 if (!new_op)
658 return -ENOMEM;
Yi Liu8bb8aef2015-11-24 15:12:14 -0500659 new_op->upcall.req.fsync.refn = orangefs_inode->refn;
Mike Marshall5db11c22015-07-17 10:38:12 -0400660
661 ret = service_operation(new_op,
Yi Liu8bb8aef2015-11-24 15:12:14 -0500662 "orangefs_fsync",
Amir Goldsteind62a9022016-10-21 07:33:57 +0300663 get_interruptible_flag(file_inode(file)));
Mike Marshall5db11c22015-07-17 10:38:12 -0400664
665 gossip_debug(GOSSIP_FILE_DEBUG,
Yi Liu8bb8aef2015-11-24 15:12:14 -0500666 "orangefs_fsync got return value of %d\n",
Mike Marshall5db11c22015-07-17 10:38:12 -0400667 ret);
668
669 op_release(new_op);
670
Amir Goldsteind62a9022016-10-21 07:33:57 +0300671 orangefs_flush_inode(file_inode(file));
Mike Marshall5db11c22015-07-17 10:38:12 -0400672 return ret;
673}
674
675/*
676 * Change the file pointer position for an instance of an open file.
677 *
678 * \note If .llseek is overriden, we must acquire lock as described in
679 * Documentation/filesystems/Locking.
680 *
681 * Future upgrade could support SEEK_DATA and SEEK_HOLE but would
682 * require much changes to the FS
683 */
Yi Liu8bb8aef2015-11-24 15:12:14 -0500684static loff_t orangefs_file_llseek(struct file *file, loff_t offset, int origin)
Mike Marshall5db11c22015-07-17 10:38:12 -0400685{
686 int ret = -EINVAL;
Al Viro177f8fc2016-02-16 20:25:19 -0500687 struct inode *inode = file_inode(file);
Mike Marshall5db11c22015-07-17 10:38:12 -0400688
Al Viro177f8fc2016-02-16 20:25:19 -0500689 if (origin == SEEK_END) {
Mike Marshall5db11c22015-07-17 10:38:12 -0400690 /*
691 * revalidate the inode's file size.
692 * NOTE: We are only interested in file size here,
693 * so we set mask accordingly.
694 */
Martin Brandenburge2f7f0d2016-03-15 12:33:20 -0400695 ret = orangefs_inode_getattr(file->f_mapping->host, 0, 1);
696 if (ret == -ESTALE)
697 ret = -EIO;
Mike Marshall5db11c22015-07-17 10:38:12 -0400698 if (ret) {
699 gossip_debug(GOSSIP_FILE_DEBUG,
700 "%s:%s:%d calling make bad inode\n",
701 __FILE__,
702 __func__,
703 __LINE__);
Mike Marshall5db11c22015-07-17 10:38:12 -0400704 return ret;
705 }
706 }
707
708 gossip_debug(GOSSIP_FILE_DEBUG,
Yi Liu8bb8aef2015-11-24 15:12:14 -0500709 "orangefs_file_llseek: offset is %ld | origin is %d"
Mike Marshall54804942015-10-05 13:44:24 -0400710 " | inode size is %lu\n",
Mike Marshall5db11c22015-07-17 10:38:12 -0400711 (long)offset,
712 origin,
Al Viro177f8fc2016-02-16 20:25:19 -0500713 (unsigned long)i_size_read(inode));
Mike Marshall5db11c22015-07-17 10:38:12 -0400714
715 return generic_file_llseek(file, offset, origin);
716}
717
718/*
719 * Support local locks (locks that only this kernel knows about)
720 * if Orangefs was mounted -o local_lock.
721 */
Yi Liu8bb8aef2015-11-24 15:12:14 -0500722static int orangefs_lock(struct file *filp, int cmd, struct file_lock *fl)
Mike Marshall5db11c22015-07-17 10:38:12 -0400723{
Mike Marshallf957ae22015-09-24 12:53:05 -0400724 int rc = -EINVAL;
Mike Marshall5db11c22015-07-17 10:38:12 -0400725
Al Viro45063092016-12-04 18:24:56 -0500726 if (ORANGEFS_SB(file_inode(filp)->i_sb)->flags & ORANGEFS_OPT_LOCAL_LOCK) {
Mike Marshall5db11c22015-07-17 10:38:12 -0400727 if (cmd == F_GETLK) {
728 rc = 0;
729 posix_test_lock(filp, fl);
730 } else {
731 rc = posix_lock_file(filp, fl, NULL);
732 }
733 }
734
735 return rc;
736}
737
Yi Liu8bb8aef2015-11-24 15:12:14 -0500738/** ORANGEFS implementation of VFS file operations */
739const struct file_operations orangefs_file_operations = {
740 .llseek = orangefs_file_llseek,
741 .read_iter = orangefs_file_read_iter,
742 .write_iter = orangefs_file_write_iter,
743 .lock = orangefs_lock,
744 .unlocked_ioctl = orangefs_ioctl,
745 .mmap = orangefs_file_mmap,
Mike Marshall5db11c22015-07-17 10:38:12 -0400746 .open = generic_file_open,
Yi Liu8bb8aef2015-11-24 15:12:14 -0500747 .release = orangefs_file_release,
748 .fsync = orangefs_fsync,
Mike Marshall5db11c22015-07-17 10:38:12 -0400749};