blob: 92573d9cc17c36d692f5336de4fb43e28bc5f5ab [file] [log] [blame]
Mike Marshall5db11c22015-07-17 10:38:12 -04001/*
2 * (C) 2001 Clemson University and The University of Chicago
3 *
4 * Changes by Acxiom Corporation to add protocol version to kernel
5 * communication, Copyright Acxiom Corporation, 2005.
6 *
7 * See COPYING in top-level directory.
8 */
9
10#include "protocol.h"
Mike Marshall575e9462015-12-04 12:56:14 -050011#include "orangefs-kernel.h"
12#include "orangefs-dev-proto.h"
13#include "orangefs-bufmap.h"
Mike Marshall5db11c22015-07-17 10:38:12 -040014
15#include <linux/debugfs.h>
16#include <linux/slab.h>
17
18/* this file implements the /dev/pvfs2-req device node */
19
20static int open_access_count;
21
22#define DUMP_DEVICE_ERROR() \
23do { \
24 gossip_err("*****************************************************\n");\
Yi Liu8bb8aef2015-11-24 15:12:14 -050025 gossip_err("ORANGEFS Device Error: You cannot open the device file "); \
Mike Marshall5db11c22015-07-17 10:38:12 -040026 gossip_err("\n/dev/%s more than once. Please make sure that\nthere " \
Yi Liu8bb8aef2015-11-24 15:12:14 -050027 "are no ", ORANGEFS_REQDEVICE_NAME); \
Mike Marshall5db11c22015-07-17 10:38:12 -040028 gossip_err("instances of a program using this device\ncurrently " \
29 "running. (You must verify this!)\n"); \
30 gossip_err("For example, you can use the lsof program as follows:\n");\
31 gossip_err("'lsof | grep %s' (run this as root)\n", \
Yi Liu8bb8aef2015-11-24 15:12:14 -050032 ORANGEFS_REQDEVICE_NAME); \
Mike Marshall5db11c22015-07-17 10:38:12 -040033 gossip_err(" open_access_count = %d\n", open_access_count); \
34 gossip_err("*****************************************************\n");\
35} while (0)
36
37static int hash_func(__u64 tag, int table_size)
38{
Mike Marshall2c590d52015-07-24 10:37:15 -040039 return do_div(tag, (unsigned int)table_size);
Mike Marshall5db11c22015-07-17 10:38:12 -040040}
41
Yi Liu8bb8aef2015-11-24 15:12:14 -050042static void orangefs_devreq_add_op(struct orangefs_kernel_op_s *op)
Mike Marshall5db11c22015-07-17 10:38:12 -040043{
44 int index = hash_func(op->tag, hash_table_size);
45
46 spin_lock(&htable_ops_in_progress_lock);
47 list_add_tail(&op->list, &htable_ops_in_progress[index]);
48 spin_unlock(&htable_ops_in_progress_lock);
49}
50
Yi Liu8bb8aef2015-11-24 15:12:14 -050051static struct orangefs_kernel_op_s *orangefs_devreq_remove_op(__u64 tag)
Mike Marshall5db11c22015-07-17 10:38:12 -040052{
Yi Liu8bb8aef2015-11-24 15:12:14 -050053 struct orangefs_kernel_op_s *op, *next;
Mike Marshall5db11c22015-07-17 10:38:12 -040054 int index;
55
56 index = hash_func(tag, hash_table_size);
57
58 spin_lock(&htable_ops_in_progress_lock);
59 list_for_each_entry_safe(op,
60 next,
61 &htable_ops_in_progress[index],
62 list) {
63 if (op->tag == tag) {
64 list_del(&op->list);
65 spin_unlock(&htable_ops_in_progress_lock);
66 return op;
67 }
68 }
69
70 spin_unlock(&htable_ops_in_progress_lock);
71 return NULL;
72}
73
Yi Liu8bb8aef2015-11-24 15:12:14 -050074static int orangefs_devreq_open(struct inode *inode, struct file *file)
Mike Marshall5db11c22015-07-17 10:38:12 -040075{
76 int ret = -EINVAL;
77
78 if (!(file->f_flags & O_NONBLOCK)) {
Mike Marshall97f10022015-12-11 16:45:03 -050079 gossip_err("%s: device cannot be opened in blocking mode\n",
80 __func__);
Mike Marshall5db11c22015-07-17 10:38:12 -040081 goto out;
82 }
83 ret = -EACCES;
Mike Marshall97f10022015-12-11 16:45:03 -050084 gossip_debug(GOSSIP_DEV_DEBUG, "client-core: opening device\n");
Mike Marshall5db11c22015-07-17 10:38:12 -040085 mutex_lock(&devreq_mutex);
86
87 if (open_access_count == 0) {
Al Virofee25ce2016-01-22 19:46:08 -050088 open_access_count = 1;
Al Virofb6d2522016-01-19 12:00:26 -050089 ret = 0;
Mike Marshall5db11c22015-07-17 10:38:12 -040090 } else {
91 DUMP_DEVICE_ERROR();
92 }
93 mutex_unlock(&devreq_mutex);
94
95out:
96
97 gossip_debug(GOSSIP_DEV_DEBUG,
98 "pvfs2-client-core: open device complete (ret = %d)\n",
99 ret);
100 return ret;
101}
102
Mike Marshall97f10022015-12-11 16:45:03 -0500103/* Function for read() callers into the device */
Yi Liu8bb8aef2015-11-24 15:12:14 -0500104static ssize_t orangefs_devreq_read(struct file *file,
Mike Marshall5db11c22015-07-17 10:38:12 -0400105 char __user *buf,
106 size_t count, loff_t *offset)
107{
Yi Liu8bb8aef2015-11-24 15:12:14 -0500108 struct orangefs_kernel_op_s *op, *temp;
109 __s32 proto_ver = ORANGEFS_KERNEL_PROTO_VERSION;
110 static __s32 magic = ORANGEFS_DEVREQ_MAGIC;
111 struct orangefs_kernel_op_s *cur_op = NULL;
Martin Brandenburg24c8d082015-11-13 14:26:10 -0500112 unsigned long ret;
Mike Marshall5db11c22015-07-17 10:38:12 -0400113
Martin Brandenburg24c8d082015-11-13 14:26:10 -0500114 /* We do not support blocking IO. */
Mike Marshall5db11c22015-07-17 10:38:12 -0400115 if (!(file->f_flags & O_NONBLOCK)) {
Mike Marshall97f10022015-12-11 16:45:03 -0500116 gossip_err("%s: blocking read from client-core.\n",
117 __func__);
Mike Marshall5db11c22015-07-17 10:38:12 -0400118 return -EINVAL;
Martin Brandenburg24c8d082015-11-13 14:26:10 -0500119 }
120
121 /*
Martin Brandenburga762ae62015-12-15 14:22:06 -0500122 * The client will do an ioctl to find MAX_DEV_REQ_UPSIZE, then
Martin Brandenburg24c8d082015-11-13 14:26:10 -0500123 * always read with that size buffer.
124 */
Martin Brandenburga762ae62015-12-15 14:22:06 -0500125 if (count != MAX_DEV_REQ_UPSIZE) {
Martin Brandenburg24c8d082015-11-13 14:26:10 -0500126 gossip_err("orangefs: client-core tried to read wrong size\n");
127 return -EINVAL;
128 }
129
130 /* Get next op (if any) from top of list. */
Yi Liu8bb8aef2015-11-24 15:12:14 -0500131 spin_lock(&orangefs_request_list_lock);
132 list_for_each_entry_safe(op, temp, &orangefs_request_list, list) {
Martin Brandenburg24c8d082015-11-13 14:26:10 -0500133 __s32 fsid;
134 /* This lock is held past the end of the loop when we break. */
135 spin_lock(&op->lock);
136
137 fsid = fsid_of_op(op);
Yi Liu8bb8aef2015-11-24 15:12:14 -0500138 if (fsid != ORANGEFS_FS_ID_NULL) {
Martin Brandenburg24c8d082015-11-13 14:26:10 -0500139 int ret;
140 /* Skip ops whose filesystem needs to be mounted. */
141 ret = fs_mount_pending(fsid);
142 if (ret == 1) {
Mike Marshall5db11c22015-07-17 10:38:12 -0400143 gossip_debug(GOSSIP_DEV_DEBUG,
Martin Brandenburg24c8d082015-11-13 14:26:10 -0500144 "orangefs: skipping op tag %llu %s\n",
145 llu(op->tag), get_opname_string(op));
146 spin_unlock(&op->lock);
Mike Marshall5db11c22015-07-17 10:38:12 -0400147 continue;
Mike Marshall97f10022015-12-11 16:45:03 -0500148 /*
149 * Skip ops whose filesystem we don't know about unless
150 * it is being mounted.
151 */
Martin Brandenburg24c8d082015-11-13 14:26:10 -0500152 /* XXX: is there a better way to detect this? */
153 } else if (ret == -1 &&
Mike Marshall97f10022015-12-11 16:45:03 -0500154 !(op->upcall.type ==
155 ORANGEFS_VFS_OP_FS_MOUNT ||
156 op->upcall.type ==
157 ORANGEFS_VFS_OP_GETATTR)) {
Martin Brandenburg24c8d082015-11-13 14:26:10 -0500158 gossip_debug(GOSSIP_DEV_DEBUG,
159 "orangefs: skipping op tag %llu %s\n",
160 llu(op->tag), get_opname_string(op));
161 gossip_err(
162 "orangefs: ERROR: fs_mount_pending %d\n",
163 fsid);
164 spin_unlock(&op->lock);
165 continue;
Mike Marshall5db11c22015-07-17 10:38:12 -0400166 }
167 }
Mike Marshall5db11c22015-07-17 10:38:12 -0400168 /*
Martin Brandenburg24c8d082015-11-13 14:26:10 -0500169 * Either this op does not pertain to a filesystem, is mounting
170 * a filesystem, or pertains to a mounted filesystem. Let it
171 * through.
Mike Marshall5db11c22015-07-17 10:38:12 -0400172 */
Martin Brandenburg24c8d082015-11-13 14:26:10 -0500173 cur_op = op;
174 break;
Mike Marshall5db11c22015-07-17 10:38:12 -0400175 }
Martin Brandenburg24c8d082015-11-13 14:26:10 -0500176
177 /*
178 * At this point we either have a valid op and can continue or have not
179 * found an op and must ask the client to try again later.
180 */
181 if (!cur_op) {
Yi Liu8bb8aef2015-11-24 15:12:14 -0500182 spin_unlock(&orangefs_request_list_lock);
Martin Brandenburg24c8d082015-11-13 14:26:10 -0500183 return -EAGAIN;
184 }
185
186 gossip_debug(GOSSIP_DEV_DEBUG, "orangefs: reading op tag %llu %s\n",
187 llu(cur_op->tag), get_opname_string(cur_op));
188
189 /*
190 * Such an op should never be on the list in the first place. If so, we
191 * will abort.
192 */
193 if (op_state_in_progress(cur_op) || op_state_serviced(cur_op)) {
194 gossip_err("orangefs: ERROR: Current op already queued.\n");
195 list_del(&cur_op->list);
196 spin_unlock(&cur_op->lock);
Yi Liu8bb8aef2015-11-24 15:12:14 -0500197 spin_unlock(&orangefs_request_list_lock);
Martin Brandenburg24c8d082015-11-13 14:26:10 -0500198 return -EAGAIN;
199 }
200
201 /*
202 * Set the operation to be in progress and move it between lists since
203 * it has been sent to the client.
204 */
205 set_op_state_inprogress(cur_op);
206
207 list_del(&cur_op->list);
Yi Liu8bb8aef2015-11-24 15:12:14 -0500208 spin_unlock(&orangefs_request_list_lock);
209 orangefs_devreq_add_op(cur_op);
Martin Brandenburg24c8d082015-11-13 14:26:10 -0500210 spin_unlock(&cur_op->lock);
211
212 /* Push the upcall out. */
213 ret = copy_to_user(buf, &proto_ver, sizeof(__s32));
214 if (ret != 0)
215 goto error;
216 ret = copy_to_user(buf+sizeof(__s32), &magic, sizeof(__s32));
217 if (ret != 0)
218 goto error;
219 ret = copy_to_user(buf+2 * sizeof(__s32), &cur_op->tag, sizeof(__u64));
220 if (ret != 0)
221 goto error;
222 ret = copy_to_user(buf+2*sizeof(__s32)+sizeof(__u64), &cur_op->upcall,
Yi Liu8bb8aef2015-11-24 15:12:14 -0500223 sizeof(struct orangefs_upcall_s));
Martin Brandenburg24c8d082015-11-13 14:26:10 -0500224 if (ret != 0)
225 goto error;
226
227 /* The client only asks to read one size buffer. */
Martin Brandenburga762ae62015-12-15 14:22:06 -0500228 return MAX_DEV_REQ_UPSIZE;
Martin Brandenburg24c8d082015-11-13 14:26:10 -0500229error:
230 /*
231 * We were unable to copy the op data to the client. Put the op back in
232 * list. If client has crashed, the op will be purged later when the
233 * device is released.
234 */
235 gossip_err("orangefs: Failed to copy data to user space\n");
Yi Liu8bb8aef2015-11-24 15:12:14 -0500236 spin_lock(&orangefs_request_list_lock);
Martin Brandenburg24c8d082015-11-13 14:26:10 -0500237 spin_lock(&cur_op->lock);
238 set_op_state_waiting(cur_op);
Yi Liu8bb8aef2015-11-24 15:12:14 -0500239 orangefs_devreq_remove_op(cur_op->tag);
240 list_add(&cur_op->list, &orangefs_request_list);
Martin Brandenburg24c8d082015-11-13 14:26:10 -0500241 spin_unlock(&cur_op->lock);
Yi Liu8bb8aef2015-11-24 15:12:14 -0500242 spin_unlock(&orangefs_request_list_lock);
Martin Brandenburg24c8d082015-11-13 14:26:10 -0500243 return -EFAULT;
Mike Marshall5db11c22015-07-17 10:38:12 -0400244}
245
Mike Marshall97f10022015-12-11 16:45:03 -0500246/*
Mike Marshallb3ae4752016-01-13 11:18:12 -0500247 * Function for writev() callers into the device.
248 *
249 * Userspace should have written:
250 * - __u32 version
251 * - __u32 magic
252 * - __u64 tag
253 * - struct orangefs_downcall_s
254 * - trailer buffer (in the case of READDIR operations)
Mike Marshall97f10022015-12-11 16:45:03 -0500255 */
Yi Liu8bb8aef2015-11-24 15:12:14 -0500256static ssize_t orangefs_devreq_write_iter(struct kiocb *iocb,
Mike Marshall5db11c22015-07-17 10:38:12 -0400257 struct iov_iter *iter)
258{
Mike Marshallb3ae4752016-01-13 11:18:12 -0500259 ssize_t ret;
260 struct orangefs_kernel_op_s *op = NULL;
261 struct {
262 __u32 version;
263 __u32 magic;
264 __u64 tag;
265 } head;
266 int total = ret = iov_iter_count(iter);
267 int n;
268 int downcall_size = sizeof(struct orangefs_downcall_s);
269 int head_size = sizeof(head);
270
271 gossip_debug(GOSSIP_DEV_DEBUG, "%s: total:%d: ret:%zd:\n",
272 __func__,
273 total,
274 ret);
275
276 if (total < MAX_DEV_REQ_DOWNSIZE) {
Mike Marshallcf0c2772016-01-19 12:04:40 -0500277 gossip_err("%s: total:%d: must be at least:%u:\n",
Mike Marshallb3ae4752016-01-13 11:18:12 -0500278 __func__,
279 total,
Mike Marshallcf0c2772016-01-19 12:04:40 -0500280 (unsigned int) MAX_DEV_REQ_DOWNSIZE);
Mike Marshallb3ae4752016-01-13 11:18:12 -0500281 ret = -EFAULT;
282 goto out;
283 }
284
285 n = copy_from_iter(&head, head_size, iter);
286 if (n < head_size) {
287 gossip_err("%s: failed to copy head.\n", __func__);
288 ret = -EFAULT;
289 goto out;
290 }
291
292 if (head.version < ORANGEFS_MINIMUM_USERSPACE_VERSION) {
293 gossip_err("%s: userspace claims version"
294 "%d, minimum version required: %d.\n",
295 __func__,
296 head.version,
297 ORANGEFS_MINIMUM_USERSPACE_VERSION);
298 ret = -EPROTO;
299 goto out;
300 }
301
302 if (head.magic != ORANGEFS_DEVREQ_MAGIC) {
303 gossip_err("Error: Device magic number does not match.\n");
304 ret = -EPROTO;
305 goto out;
306 }
307
308 op = orangefs_devreq_remove_op(head.tag);
309 if (!op) {
310 gossip_err("WARNING: No one's waiting for tag %llu\n",
311 llu(head.tag));
312 goto out;
313 }
314
315 get_op(op); /* increase ref count. */
316
317 n = copy_from_iter(&op->downcall, downcall_size, iter);
318 if (n != downcall_size) {
319 gossip_err("%s: failed to copy downcall.\n", __func__);
320 put_op(op);
321 ret = -EFAULT;
322 goto out;
323 }
324
325 if (op->downcall.status)
326 goto wakeup;
327
328 /*
329 * We've successfully peeled off the head and the downcall.
330 * Something has gone awry if total doesn't equal the
331 * sum of head_size, downcall_size and trailer_size.
332 */
333 if ((head_size + downcall_size + op->downcall.trailer_size) != total) {
334 gossip_err("%s: funky write, head_size:%d"
335 ": downcall_size:%d: trailer_size:%lld"
336 ": total size:%d:\n",
337 __func__,
338 head_size,
339 downcall_size,
340 op->downcall.trailer_size,
341 total);
342 put_op(op);
343 ret = -EFAULT;
344 goto out;
345 }
346
347 /* Only READDIR operations should have trailers. */
348 if ((op->downcall.type != ORANGEFS_VFS_OP_READDIR) &&
349 (op->downcall.trailer_size != 0)) {
350 gossip_err("%s: %x operation with trailer.",
351 __func__,
352 op->downcall.type);
353 put_op(op);
354 ret = -EFAULT;
355 goto out;
356 }
357
358 /* READDIR operations should always have trailers. */
359 if ((op->downcall.type == ORANGEFS_VFS_OP_READDIR) &&
360 (op->downcall.trailer_size == 0)) {
361 gossip_err("%s: %x operation with no trailer.",
362 __func__,
363 op->downcall.type);
364 put_op(op);
365 ret = -EFAULT;
366 goto out;
367 }
368
369 if (op->downcall.type != ORANGEFS_VFS_OP_READDIR)
370 goto wakeup;
371
372 op->downcall.trailer_buf =
373 vmalloc(op->downcall.trailer_size);
374 if (op->downcall.trailer_buf == NULL) {
375 gossip_err("%s: failed trailer vmalloc.\n",
376 __func__);
377 put_op(op);
378 ret = -ENOMEM;
379 goto out;
380 }
381 memset(op->downcall.trailer_buf, 0, op->downcall.trailer_size);
382 n = copy_from_iter(op->downcall.trailer_buf,
383 op->downcall.trailer_size,
384 iter);
385 if (n != op->downcall.trailer_size) {
386 gossip_err("%s: failed to copy trailer.\n", __func__);
387 vfree(op->downcall.trailer_buf);
388 put_op(op);
389 ret = -EFAULT;
390 goto out;
391 }
392
393wakeup:
394
395 /*
396 * If this operation is an I/O operation we need to wait
397 * for all data to be copied before we can return to avoid
398 * buffer corruption and races that can pull the buffers
399 * out from under us.
400 *
401 * Essentially we're synchronizing with other parts of the
402 * vfs implicitly by not allowing the user space
403 * application reading/writing this device to return until
404 * the buffers are done being used.
405 */
406 if (op->downcall.type == ORANGEFS_VFS_OP_FILE_IO) {
407 int timed_out = 0;
408 DEFINE_WAIT(wait_entry);
409
410 /*
411 * tell the vfs op waiting on a waitqueue
412 * that this op is done
413 */
414 spin_lock(&op->lock);
415 set_op_state_serviced(op);
416 spin_unlock(&op->lock);
417
Mike Marshallb3ae4752016-01-13 11:18:12 -0500418 while (1) {
419 spin_lock(&op->lock);
420 prepare_to_wait_exclusive(
421 &op->io_completion_waitq,
422 &wait_entry,
423 TASK_INTERRUPTIBLE);
424 if (op->io_completed) {
425 spin_unlock(&op->lock);
426 break;
427 }
428 spin_unlock(&op->lock);
429
430 if (!signal_pending(current)) {
431 int timeout =
432 MSECS_TO_JIFFIES(1000 *
433 op_timeout_secs);
434 if (!schedule_timeout(timeout)) {
435 gossip_debug(GOSSIP_DEV_DEBUG,
436 "%s: timed out.\n",
437 __func__);
438 timed_out = 1;
439 break;
440 }
441 continue;
442 }
443
444 gossip_debug(GOSSIP_DEV_DEBUG,
445 "%s: signal on I/O wait, aborting\n",
446 __func__);
447 break;
448 }
449
450 spin_lock(&op->lock);
451 finish_wait(&op->io_completion_waitq, &wait_entry);
452 spin_unlock(&op->lock);
453
454 /* NOTE: for I/O operations we handle releasing the op
455 * object except in the case of timeout. the reason we
456 * can't free the op in timeout cases is that the op
457 * service logic in the vfs retries operations using
458 * the same op ptr, thus it can't be freed.
459 */
460 if (!timed_out)
461 op_release(op);
462 } else {
463 /*
464 * tell the vfs op waiting on a waitqueue that
Al Viro831d0942016-01-21 23:17:37 -0500465 * this op is done -
Mike Marshallb3ae4752016-01-13 11:18:12 -0500466 * for every other operation (i.e. non-I/O), we need to
467 * wake up the callers for downcall completion
468 * notification
469 */
Al Viro831d0942016-01-21 23:17:37 -0500470 spin_lock(&op->lock);
471 set_op_state_serviced(op);
472 spin_unlock(&op->lock);
Mike Marshallb3ae4752016-01-13 11:18:12 -0500473 }
474out:
475 return ret;
Mike Marshall5db11c22015-07-17 10:38:12 -0400476}
477
478/* Returns whether any FS are still pending remounted */
479static int mark_all_pending_mounts(void)
480{
481 int unmounted = 1;
Yi Liu8bb8aef2015-11-24 15:12:14 -0500482 struct orangefs_sb_info_s *orangefs_sb = NULL;
Mike Marshall5db11c22015-07-17 10:38:12 -0400483
Yi Liu8bb8aef2015-11-24 15:12:14 -0500484 spin_lock(&orangefs_superblocks_lock);
485 list_for_each_entry(orangefs_sb, &orangefs_superblocks, list) {
Mike Marshall5db11c22015-07-17 10:38:12 -0400486 /* All of these file system require a remount */
Yi Liu8bb8aef2015-11-24 15:12:14 -0500487 orangefs_sb->mount_pending = 1;
Mike Marshall5db11c22015-07-17 10:38:12 -0400488 unmounted = 0;
489 }
Yi Liu8bb8aef2015-11-24 15:12:14 -0500490 spin_unlock(&orangefs_superblocks_lock);
Mike Marshall5db11c22015-07-17 10:38:12 -0400491 return unmounted;
492}
493
494/*
495 * Determine if a given file system needs to be remounted or not
496 * Returns -1 on error
497 * 0 if already mounted
498 * 1 if needs remount
499 */
500int fs_mount_pending(__s32 fsid)
501{
502 int mount_pending = -1;
Yi Liu8bb8aef2015-11-24 15:12:14 -0500503 struct orangefs_sb_info_s *orangefs_sb = NULL;
Mike Marshall5db11c22015-07-17 10:38:12 -0400504
Yi Liu8bb8aef2015-11-24 15:12:14 -0500505 spin_lock(&orangefs_superblocks_lock);
506 list_for_each_entry(orangefs_sb, &orangefs_superblocks, list) {
507 if (orangefs_sb->fs_id == fsid) {
508 mount_pending = orangefs_sb->mount_pending;
Mike Marshall5db11c22015-07-17 10:38:12 -0400509 break;
510 }
511 }
Yi Liu8bb8aef2015-11-24 15:12:14 -0500512 spin_unlock(&orangefs_superblocks_lock);
Mike Marshall5db11c22015-07-17 10:38:12 -0400513 return mount_pending;
514}
515
516/*
517 * NOTE: gets called when the last reference to this device is dropped.
518 * Using the open_access_count variable, we enforce a reference count
519 * on this file so that it can be opened by only one process at a time.
520 * the devreq_mutex is used to make sure all i/o has completed
Yi Liu8bb8aef2015-11-24 15:12:14 -0500521 * before we call orangefs_bufmap_finalize, and similar such tricky
Mike Marshall5db11c22015-07-17 10:38:12 -0400522 * situations
523 */
Yi Liu8bb8aef2015-11-24 15:12:14 -0500524static int orangefs_devreq_release(struct inode *inode, struct file *file)
Mike Marshall5db11c22015-07-17 10:38:12 -0400525{
526 int unmounted = 0;
527
528 gossip_debug(GOSSIP_DEV_DEBUG,
529 "%s:pvfs2-client-core: exiting, closing device\n",
530 __func__);
531
532 mutex_lock(&devreq_mutex);
Martin Brandenburg7d221482016-01-04 15:05:28 -0500533 if (orangefs_get_bufmap_init())
Martin Brandenburg90d26aa2015-12-14 15:26:38 -0500534 orangefs_bufmap_finalize();
Mike Marshall5db11c22015-07-17 10:38:12 -0400535
Al Virofee25ce2016-01-22 19:46:08 -0500536 open_access_count = -1;
Mike Marshall5db11c22015-07-17 10:38:12 -0400537
538 unmounted = mark_all_pending_mounts();
Yi Liu8bb8aef2015-11-24 15:12:14 -0500539 gossip_debug(GOSSIP_DEV_DEBUG, "ORANGEFS Device Close: Filesystem(s) %s\n",
Mike Marshall5db11c22015-07-17 10:38:12 -0400540 (unmounted ? "UNMOUNTED" : "MOUNTED"));
Mike Marshall5db11c22015-07-17 10:38:12 -0400541
542 /*
543 * Walk through the list of ops in the request list, mark them
544 * as purged and wake them up.
545 */
546 purge_waiting_ops();
547 /*
548 * Walk through the hash table of in progress operations; mark
549 * them as purged and wake them up
550 */
551 purge_inprogress_ops();
552 gossip_debug(GOSSIP_DEV_DEBUG,
553 "pvfs2-client-core: device close complete\n");
Al Virofee25ce2016-01-22 19:46:08 -0500554 open_access_count = 0;
555 mutex_unlock(&devreq_mutex);
Mike Marshall5db11c22015-07-17 10:38:12 -0400556 return 0;
557}
558
559int is_daemon_in_service(void)
560{
561 int in_service;
562
563 /*
564 * What this function does is checks if client-core is alive
565 * based on the access count we maintain on the device.
566 */
567 mutex_lock(&devreq_mutex);
568 in_service = open_access_count == 1 ? 0 : -EIO;
569 mutex_unlock(&devreq_mutex);
570 return in_service;
571}
572
573static inline long check_ioctl_command(unsigned int command)
574{
575 /* Check for valid ioctl codes */
Yi Liu8bb8aef2015-11-24 15:12:14 -0500576 if (_IOC_TYPE(command) != ORANGEFS_DEV_MAGIC) {
Mike Marshall5db11c22015-07-17 10:38:12 -0400577 gossip_err("device ioctl magic numbers don't match! Did you rebuild pvfs2-client-core/libpvfs2? [cmd %x, magic %x != %x]\n",
578 command,
579 _IOC_TYPE(command),
Yi Liu8bb8aef2015-11-24 15:12:14 -0500580 ORANGEFS_DEV_MAGIC);
Mike Marshall5db11c22015-07-17 10:38:12 -0400581 return -EINVAL;
582 }
583 /* and valid ioctl commands */
Yi Liu8bb8aef2015-11-24 15:12:14 -0500584 if (_IOC_NR(command) >= ORANGEFS_DEV_MAXNR || _IOC_NR(command) <= 0) {
Mike Marshall5db11c22015-07-17 10:38:12 -0400585 gossip_err("Invalid ioctl command number [%d >= %d]\n",
Yi Liu8bb8aef2015-11-24 15:12:14 -0500586 _IOC_NR(command), ORANGEFS_DEV_MAXNR);
Mike Marshall5db11c22015-07-17 10:38:12 -0400587 return -ENOIOCTLCMD;
588 }
589 return 0;
590}
591
592static long dispatch_ioctl_command(unsigned int command, unsigned long arg)
593{
Yi Liu8bb8aef2015-11-24 15:12:14 -0500594 static __s32 magic = ORANGEFS_DEVREQ_MAGIC;
Martin Brandenburga762ae62015-12-15 14:22:06 -0500595 static __s32 max_up_size = MAX_DEV_REQ_UPSIZE;
596 static __s32 max_down_size = MAX_DEV_REQ_DOWNSIZE;
Yi Liu8bb8aef2015-11-24 15:12:14 -0500597 struct ORANGEFS_dev_map_desc user_desc;
Mike Marshall5db11c22015-07-17 10:38:12 -0400598 int ret = 0;
599 struct dev_mask_info_s mask_info = { 0 };
600 struct dev_mask2_info_s mask2_info = { 0, 0 };
601 int upstream_kmod = 1;
602 struct list_head *tmp = NULL;
Yi Liu8bb8aef2015-11-24 15:12:14 -0500603 struct orangefs_sb_info_s *orangefs_sb = NULL;
Mike Marshall5db11c22015-07-17 10:38:12 -0400604
605 /* mtmoore: add locking here */
606
607 switch (command) {
Yi Liu8bb8aef2015-11-24 15:12:14 -0500608 case ORANGEFS_DEV_GET_MAGIC:
Mike Marshall5db11c22015-07-17 10:38:12 -0400609 return ((put_user(magic, (__s32 __user *) arg) == -EFAULT) ?
610 -EIO :
611 0);
Yi Liu8bb8aef2015-11-24 15:12:14 -0500612 case ORANGEFS_DEV_GET_MAX_UPSIZE:
Mike Marshall5db11c22015-07-17 10:38:12 -0400613 return ((put_user(max_up_size,
614 (__s32 __user *) arg) == -EFAULT) ?
615 -EIO :
616 0);
Yi Liu8bb8aef2015-11-24 15:12:14 -0500617 case ORANGEFS_DEV_GET_MAX_DOWNSIZE:
Mike Marshall5db11c22015-07-17 10:38:12 -0400618 return ((put_user(max_down_size,
619 (__s32 __user *) arg) == -EFAULT) ?
620 -EIO :
621 0);
Yi Liu8bb8aef2015-11-24 15:12:14 -0500622 case ORANGEFS_DEV_MAP:
Mike Marshall5db11c22015-07-17 10:38:12 -0400623 ret = copy_from_user(&user_desc,
Yi Liu8bb8aef2015-11-24 15:12:14 -0500624 (struct ORANGEFS_dev_map_desc __user *)
Mike Marshall5db11c22015-07-17 10:38:12 -0400625 arg,
Yi Liu8bb8aef2015-11-24 15:12:14 -0500626 sizeof(struct ORANGEFS_dev_map_desc));
Martin Brandenburg7d221482016-01-04 15:05:28 -0500627 if (orangefs_get_bufmap_init()) {
Martin Brandenburg90d26aa2015-12-14 15:26:38 -0500628 return -EINVAL;
629 } else {
630 return ret ?
631 -EIO :
632 orangefs_bufmap_initialize(&user_desc);
633 }
Yi Liu8bb8aef2015-11-24 15:12:14 -0500634 case ORANGEFS_DEV_REMOUNT_ALL:
Mike Marshall5db11c22015-07-17 10:38:12 -0400635 gossip_debug(GOSSIP_DEV_DEBUG,
Mike Marshall97f10022015-12-11 16:45:03 -0500636 "%s: got ORANGEFS_DEV_REMOUNT_ALL\n",
637 __func__);
Mike Marshall5db11c22015-07-17 10:38:12 -0400638
639 /*
Yi Liu8bb8aef2015-11-24 15:12:14 -0500640 * remount all mounted orangefs volumes to regain the lost
Mike Marshall5db11c22015-07-17 10:38:12 -0400641 * dynamic mount tables (if any) -- NOTE: this is done
642 * without keeping the superblock list locked due to the
643 * upcall/downcall waiting. also, the request semaphore is
644 * used to ensure that no operations will be serviced until
645 * all of the remounts are serviced (to avoid ops between
646 * mounts to fail)
647 */
648 ret = mutex_lock_interruptible(&request_mutex);
649 if (ret < 0)
650 return ret;
651 gossip_debug(GOSSIP_DEV_DEBUG,
Mike Marshall97f10022015-12-11 16:45:03 -0500652 "%s: priority remount in progress\n",
653 __func__);
Yi Liu8bb8aef2015-11-24 15:12:14 -0500654 list_for_each(tmp, &orangefs_superblocks) {
655 orangefs_sb =
Mike Marshall97f10022015-12-11 16:45:03 -0500656 list_entry(tmp,
657 struct orangefs_sb_info_s,
658 list);
Yi Liu8bb8aef2015-11-24 15:12:14 -0500659 if (orangefs_sb && (orangefs_sb->sb)) {
Mike Marshall5db11c22015-07-17 10:38:12 -0400660 gossip_debug(GOSSIP_DEV_DEBUG,
Mike Marshall97f10022015-12-11 16:45:03 -0500661 "%s: Remounting SB %p\n",
662 __func__,
Yi Liu8bb8aef2015-11-24 15:12:14 -0500663 orangefs_sb);
Mike Marshall5db11c22015-07-17 10:38:12 -0400664
Yi Liu8bb8aef2015-11-24 15:12:14 -0500665 ret = orangefs_remount(orangefs_sb->sb);
Mike Marshall5db11c22015-07-17 10:38:12 -0400666 if (ret) {
667 gossip_debug(GOSSIP_DEV_DEBUG,
668 "SB %p remount failed\n",
Yi Liu8bb8aef2015-11-24 15:12:14 -0500669 orangefs_sb);
Mike Marshall97f10022015-12-11 16:45:03 -0500670 break;
Mike Marshall5db11c22015-07-17 10:38:12 -0400671 }
672 }
673 }
674 gossip_debug(GOSSIP_DEV_DEBUG,
Mike Marshall97f10022015-12-11 16:45:03 -0500675 "%s: priority remount complete\n",
676 __func__);
Mike Marshall5db11c22015-07-17 10:38:12 -0400677 mutex_unlock(&request_mutex);
678 return ret;
679
Yi Liu8bb8aef2015-11-24 15:12:14 -0500680 case ORANGEFS_DEV_UPSTREAM:
Mike Marshall5db11c22015-07-17 10:38:12 -0400681 ret = copy_to_user((void __user *)arg,
682 &upstream_kmod,
683 sizeof(upstream_kmod));
684
685 if (ret != 0)
686 return -EIO;
687 else
688 return ret;
689
Yi Liu8bb8aef2015-11-24 15:12:14 -0500690 case ORANGEFS_DEV_CLIENT_MASK:
Mike Marshall5db11c22015-07-17 10:38:12 -0400691 ret = copy_from_user(&mask2_info,
692 (void __user *)arg,
693 sizeof(struct dev_mask2_info_s));
694
695 if (ret != 0)
696 return -EIO;
697
698 client_debug_mask.mask1 = mask2_info.mask1_value;
699 client_debug_mask.mask2 = mask2_info.mask2_value;
700
701 pr_info("%s: client debug mask has been been received "
702 ":%llx: :%llx:\n",
703 __func__,
704 (unsigned long long)client_debug_mask.mask1,
705 (unsigned long long)client_debug_mask.mask2);
706
707 return ret;
708
Yi Liu8bb8aef2015-11-24 15:12:14 -0500709 case ORANGEFS_DEV_CLIENT_STRING:
Mike Marshall5db11c22015-07-17 10:38:12 -0400710 ret = copy_from_user(&client_debug_array_string,
711 (void __user *)arg,
Yi Liu8bb8aef2015-11-24 15:12:14 -0500712 ORANGEFS_MAX_DEBUG_STRING_LEN);
Mike Marshall5db11c22015-07-17 10:38:12 -0400713 if (ret != 0) {
Mike Marshall97f10022015-12-11 16:45:03 -0500714 pr_info("%s: CLIENT_STRING: copy_from_user failed\n",
Mike Marshall5db11c22015-07-17 10:38:12 -0400715 __func__);
716 return -EIO;
717 }
718
Mike Marshall97f10022015-12-11 16:45:03 -0500719 pr_info("%s: client debug array string has been received.\n",
Mike Marshall5db11c22015-07-17 10:38:12 -0400720 __func__);
721
722 if (!help_string_initialized) {
723
724 /* Free the "we don't know yet" default string... */
725 kfree(debug_help_string);
726
727 /* build a proper debug help string */
728 if (orangefs_prepare_debugfs_help_string(0)) {
Mike Marshall97f10022015-12-11 16:45:03 -0500729 gossip_err("%s: no debug help string \n",
Mike Marshall5db11c22015-07-17 10:38:12 -0400730 __func__);
731 return -EIO;
732 }
733
734 /* Replace the boilerplate boot-time debug-help file. */
735 debugfs_remove(help_file_dentry);
736
737 help_file_dentry =
738 debugfs_create_file(
739 ORANGEFS_KMOD_DEBUG_HELP_FILE,
740 0444,
741 debug_dir,
742 debug_help_string,
743 &debug_help_fops);
744
745 if (!help_file_dentry) {
746 gossip_err("%s: debugfs_create_file failed for"
747 " :%s:!\n",
748 __func__,
749 ORANGEFS_KMOD_DEBUG_HELP_FILE);
750 return -EIO;
751 }
752 }
753
754 debug_mask_to_string(&client_debug_mask, 1);
755
756 debugfs_remove(client_debug_dentry);
757
Yi Liu8bb8aef2015-11-24 15:12:14 -0500758 orangefs_client_debug_init();
Mike Marshall5db11c22015-07-17 10:38:12 -0400759
760 help_string_initialized++;
761
762 return ret;
763
Yi Liu8bb8aef2015-11-24 15:12:14 -0500764 case ORANGEFS_DEV_DEBUG:
Mike Marshall5db11c22015-07-17 10:38:12 -0400765 ret = copy_from_user(&mask_info,
766 (void __user *)arg,
767 sizeof(mask_info));
768
769 if (ret != 0)
770 return -EIO;
771
772 if (mask_info.mask_type == KERNEL_MASK) {
773 if ((mask_info.mask_value == 0)
774 && (kernel_mask_set_mod_init)) {
775 /*
776 * the kernel debug mask was set when the
777 * kernel module was loaded; don't override
778 * it if the client-core was started without
Yi Liu8bb8aef2015-11-24 15:12:14 -0500779 * a value for ORANGEFS_KMODMASK.
Mike Marshall5db11c22015-07-17 10:38:12 -0400780 */
781 return 0;
782 }
783 debug_mask_to_string(&mask_info.mask_value,
784 mask_info.mask_type);
785 gossip_debug_mask = mask_info.mask_value;
Mike Marshall97f10022015-12-11 16:45:03 -0500786 pr_info("%s: kernel debug mask has been modified to "
Mike Marshall5db11c22015-07-17 10:38:12 -0400787 ":%s: :%llx:\n",
Mike Marshall97f10022015-12-11 16:45:03 -0500788 __func__,
Mike Marshall5db11c22015-07-17 10:38:12 -0400789 kernel_debug_string,
790 (unsigned long long)gossip_debug_mask);
791 } else if (mask_info.mask_type == CLIENT_MASK) {
792 debug_mask_to_string(&mask_info.mask_value,
793 mask_info.mask_type);
Mike Marshall97f10022015-12-11 16:45:03 -0500794 pr_info("%s: client debug mask has been modified to"
Mike Marshall5db11c22015-07-17 10:38:12 -0400795 ":%s: :%llx:\n",
Mike Marshall97f10022015-12-11 16:45:03 -0500796 __func__,
Mike Marshall5db11c22015-07-17 10:38:12 -0400797 client_debug_string,
798 llu(mask_info.mask_value));
799 } else {
800 gossip_lerr("Invalid mask type....\n");
801 return -EINVAL;
802 }
803
804 return ret;
805
806 default:
807 return -ENOIOCTLCMD;
808 }
809 return -ENOIOCTLCMD;
810}
811
Yi Liu8bb8aef2015-11-24 15:12:14 -0500812static long orangefs_devreq_ioctl(struct file *file,
Mike Marshall5db11c22015-07-17 10:38:12 -0400813 unsigned int command, unsigned long arg)
814{
815 long ret;
816
817 /* Check for properly constructed commands */
818 ret = check_ioctl_command(command);
819 if (ret < 0)
820 return (int)ret;
821
822 return (int)dispatch_ioctl_command(command, arg);
823}
824
825#ifdef CONFIG_COMPAT /* CONFIG_COMPAT is in .config */
826
Yi Liu8bb8aef2015-11-24 15:12:14 -0500827/* Compat structure for the ORANGEFS_DEV_MAP ioctl */
828struct ORANGEFS_dev_map_desc32 {
Mike Marshall5db11c22015-07-17 10:38:12 -0400829 compat_uptr_t ptr;
830 __s32 total_size;
831 __s32 size;
832 __s32 count;
833};
834
835static unsigned long translate_dev_map26(unsigned long args, long *error)
836{
Yi Liu8bb8aef2015-11-24 15:12:14 -0500837 struct ORANGEFS_dev_map_desc32 __user *p32 = (void __user *)args;
Mike Marshall5db11c22015-07-17 10:38:12 -0400838 /*
839 * Depending on the architecture, allocate some space on the
840 * user-call-stack based on our expected layout.
841 */
Yi Liu8bb8aef2015-11-24 15:12:14 -0500842 struct ORANGEFS_dev_map_desc __user *p =
Mike Marshall5db11c22015-07-17 10:38:12 -0400843 compat_alloc_user_space(sizeof(*p));
Mike Marshall84d02152015-07-28 13:27:51 -0400844 compat_uptr_t addr;
Mike Marshall5db11c22015-07-17 10:38:12 -0400845
846 *error = 0;
847 /* get the ptr from the 32 bit user-space */
848 if (get_user(addr, &p32->ptr))
849 goto err;
850 /* try to put that into a 64-bit layout */
851 if (put_user(compat_ptr(addr), &p->ptr))
852 goto err;
853 /* copy the remaining fields */
854 if (copy_in_user(&p->total_size, &p32->total_size, sizeof(__s32)))
855 goto err;
856 if (copy_in_user(&p->size, &p32->size, sizeof(__s32)))
857 goto err;
858 if (copy_in_user(&p->count, &p32->count, sizeof(__s32)))
859 goto err;
860 return (unsigned long)p;
861err:
862 *error = -EFAULT;
863 return 0;
864}
865
866/*
867 * 32 bit user-space apps' ioctl handlers when kernel modules
868 * is compiled as a 64 bit one
869 */
Yi Liu8bb8aef2015-11-24 15:12:14 -0500870static long orangefs_devreq_compat_ioctl(struct file *filp, unsigned int cmd,
Mike Marshall5db11c22015-07-17 10:38:12 -0400871 unsigned long args)
872{
873 long ret;
874 unsigned long arg = args;
875
876 /* Check for properly constructed commands */
877 ret = check_ioctl_command(cmd);
878 if (ret < 0)
879 return ret;
Yi Liu8bb8aef2015-11-24 15:12:14 -0500880 if (cmd == ORANGEFS_DEV_MAP) {
Mike Marshall5db11c22015-07-17 10:38:12 -0400881 /*
882 * convert the arguments to what we expect internally
883 * in kernel space
884 */
885 arg = translate_dev_map26(args, &ret);
886 if (ret < 0) {
887 gossip_err("Could not translate dev map\n");
888 return ret;
889 }
890 }
891 /* no other ioctl requires translation */
892 return dispatch_ioctl_command(cmd, arg);
893}
894
Mike Marshall2c590d52015-07-24 10:37:15 -0400895#endif /* CONFIG_COMPAT is in .config */
896
Mike Marshall5db11c22015-07-17 10:38:12 -0400897/* the assigned character device major number */
Yi Liu8bb8aef2015-11-24 15:12:14 -0500898static int orangefs_dev_major;
Mike Marshall5db11c22015-07-17 10:38:12 -0400899
900/*
Yi Liu8bb8aef2015-11-24 15:12:14 -0500901 * Initialize orangefs device specific state:
Mike Marshall5db11c22015-07-17 10:38:12 -0400902 * Must be called at module load time only
903 */
Yi Liu8bb8aef2015-11-24 15:12:14 -0500904int orangefs_dev_init(void)
Mike Marshall5db11c22015-07-17 10:38:12 -0400905{
Yi Liu8bb8aef2015-11-24 15:12:14 -0500906 /* register orangefs-req device */
907 orangefs_dev_major = register_chrdev(0,
908 ORANGEFS_REQDEVICE_NAME,
909 &orangefs_devreq_file_operations);
910 if (orangefs_dev_major < 0) {
Mike Marshall5db11c22015-07-17 10:38:12 -0400911 gossip_debug(GOSSIP_DEV_DEBUG,
912 "Failed to register /dev/%s (error %d)\n",
Yi Liu8bb8aef2015-11-24 15:12:14 -0500913 ORANGEFS_REQDEVICE_NAME, orangefs_dev_major);
Yi Liu8bb8aef2015-11-24 15:12:14 -0500914 return orangefs_dev_major;
Mike Marshall5db11c22015-07-17 10:38:12 -0400915 }
916
917 gossip_debug(GOSSIP_DEV_DEBUG,
918 "*** /dev/%s character device registered ***\n",
Yi Liu8bb8aef2015-11-24 15:12:14 -0500919 ORANGEFS_REQDEVICE_NAME);
Mike Marshall5db11c22015-07-17 10:38:12 -0400920 gossip_debug(GOSSIP_DEV_DEBUG, "'mknod /dev/%s c %d 0'.\n",
Yi Liu8bb8aef2015-11-24 15:12:14 -0500921 ORANGEFS_REQDEVICE_NAME, orangefs_dev_major);
Mike Marshall5db11c22015-07-17 10:38:12 -0400922 return 0;
923}
924
Yi Liu8bb8aef2015-11-24 15:12:14 -0500925void orangefs_dev_cleanup(void)
Mike Marshall5db11c22015-07-17 10:38:12 -0400926{
Yi Liu8bb8aef2015-11-24 15:12:14 -0500927 unregister_chrdev(orangefs_dev_major, ORANGEFS_REQDEVICE_NAME);
Mike Marshall5db11c22015-07-17 10:38:12 -0400928 gossip_debug(GOSSIP_DEV_DEBUG,
929 "*** /dev/%s character device unregistered ***\n",
Yi Liu8bb8aef2015-11-24 15:12:14 -0500930 ORANGEFS_REQDEVICE_NAME);
Mike Marshall5db11c22015-07-17 10:38:12 -0400931}
932
Yi Liu8bb8aef2015-11-24 15:12:14 -0500933static unsigned int orangefs_devreq_poll(struct file *file,
Mike Marshall5db11c22015-07-17 10:38:12 -0400934 struct poll_table_struct *poll_table)
935{
936 int poll_revent_mask = 0;
937
Al Viro83595db2016-01-19 12:03:05 -0500938 poll_wait(file, &orangefs_request_list_waitq, poll_table);
Mike Marshall5db11c22015-07-17 10:38:12 -0400939
Al Viro83595db2016-01-19 12:03:05 -0500940 if (!list_empty(&orangefs_request_list))
941 poll_revent_mask |= POLL_IN;
Mike Marshall5db11c22015-07-17 10:38:12 -0400942 return poll_revent_mask;
943}
944
Yi Liu8bb8aef2015-11-24 15:12:14 -0500945const struct file_operations orangefs_devreq_file_operations = {
Mike Marshall5db11c22015-07-17 10:38:12 -0400946 .owner = THIS_MODULE,
Yi Liu8bb8aef2015-11-24 15:12:14 -0500947 .read = orangefs_devreq_read,
948 .write_iter = orangefs_devreq_write_iter,
949 .open = orangefs_devreq_open,
950 .release = orangefs_devreq_release,
951 .unlocked_ioctl = orangefs_devreq_ioctl,
Mike Marshall5db11c22015-07-17 10:38:12 -0400952
953#ifdef CONFIG_COMPAT /* CONFIG_COMPAT is in .config */
Yi Liu8bb8aef2015-11-24 15:12:14 -0500954 .compat_ioctl = orangefs_devreq_compat_ioctl,
Mike Marshall5db11c22015-07-17 10:38:12 -0400955#endif
Yi Liu8bb8aef2015-11-24 15:12:14 -0500956 .poll = orangefs_devreq_poll
Mike Marshall5db11c22015-07-17 10:38:12 -0400957};