blob: de4a0a0bda8a6bd55a792a492c8ce1acb6eda20b [file] [log] [blame]
Miklos Szeredi334f4852005-09-09 13:10:27 -07001/*
2 FUSE: Filesystem in Userspace
3 Copyright (C) 2001-2005 Miklos Szeredi <miklos@szeredi.hu>
4
5 This program can be distributed under the terms of the GNU GPL.
6 See the file COPYING.
7*/
8
9#include "fuse_i.h"
10
11#include <linux/init.h>
12#include <linux/module.h>
13#include <linux/poll.h>
14#include <linux/uio.h>
15#include <linux/miscdevice.h>
16#include <linux/pagemap.h>
17#include <linux/file.h>
18#include <linux/slab.h>
19
20MODULE_ALIAS_MISCDEV(FUSE_MINOR);
21
22static kmem_cache_t *fuse_req_cachep;
23
Miklos Szeredi8bfc0162006-01-16 22:14:28 -080024static struct fuse_conn *fuse_get_conn(struct file *file)
Miklos Szeredi334f4852005-09-09 13:10:27 -070025{
26 struct fuse_conn *fc;
27 spin_lock(&fuse_lock);
28 fc = file->private_data;
Miklos Szeredi9ba7cbb2006-01-16 22:14:34 -080029 if (fc && !fc->connected)
Miklos Szeredi334f4852005-09-09 13:10:27 -070030 fc = NULL;
31 spin_unlock(&fuse_lock);
32 return fc;
33}
34
Miklos Szeredi8bfc0162006-01-16 22:14:28 -080035static void fuse_request_init(struct fuse_req *req)
Miklos Szeredi334f4852005-09-09 13:10:27 -070036{
37 memset(req, 0, sizeof(*req));
38 INIT_LIST_HEAD(&req->list);
39 init_waitqueue_head(&req->waitq);
40 atomic_set(&req->count, 1);
41}
42
43struct fuse_req *fuse_request_alloc(void)
44{
45 struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, SLAB_KERNEL);
46 if (req)
47 fuse_request_init(req);
48 return req;
49}
50
51void fuse_request_free(struct fuse_req *req)
52{
53 kmem_cache_free(fuse_req_cachep, req);
54}
55
Miklos Szeredi8bfc0162006-01-16 22:14:28 -080056static void block_sigs(sigset_t *oldset)
Miklos Szeredi334f4852005-09-09 13:10:27 -070057{
58 sigset_t mask;
59
60 siginitsetinv(&mask, sigmask(SIGKILL));
61 sigprocmask(SIG_BLOCK, &mask, oldset);
62}
63
Miklos Szeredi8bfc0162006-01-16 22:14:28 -080064static void restore_sigs(sigset_t *oldset)
Miklos Szeredi334f4852005-09-09 13:10:27 -070065{
66 sigprocmask(SIG_SETMASK, oldset, NULL);
67}
68
69void fuse_reset_request(struct fuse_req *req)
70{
71 int preallocated = req->preallocated;
72 BUG_ON(atomic_read(&req->count) != 1);
73 fuse_request_init(req);
74 req->preallocated = preallocated;
75}
76
77static void __fuse_get_request(struct fuse_req *req)
78{
79 atomic_inc(&req->count);
80}
81
82/* Must be called with > 1 refcount */
83static void __fuse_put_request(struct fuse_req *req)
84{
85 BUG_ON(atomic_read(&req->count) < 2);
86 atomic_dec(&req->count);
87}
88
89static struct fuse_req *do_get_request(struct fuse_conn *fc)
90{
91 struct fuse_req *req;
92
93 spin_lock(&fuse_lock);
94 BUG_ON(list_empty(&fc->unused_list));
95 req = list_entry(fc->unused_list.next, struct fuse_req, list);
96 list_del_init(&req->list);
97 spin_unlock(&fuse_lock);
98 fuse_request_init(req);
99 req->preallocated = 1;
100 req->in.h.uid = current->fsuid;
101 req->in.h.gid = current->fsgid;
102 req->in.h.pid = current->pid;
103 return req;
104}
105
Miklos Szeredi7c352bd2005-09-09 13:10:39 -0700106/* This can return NULL, but only in case it's interrupted by a SIGKILL */
Miklos Szeredi334f4852005-09-09 13:10:27 -0700107struct fuse_req *fuse_get_request(struct fuse_conn *fc)
108{
Miklos Szeredi334f4852005-09-09 13:10:27 -0700109 int intr;
110 sigset_t oldset;
111
112 block_sigs(&oldset);
113 intr = down_interruptible(&fc->outstanding_sem);
114 restore_sigs(&oldset);
115 return intr ? NULL : do_get_request(fc);
116}
117
118static void fuse_putback_request(struct fuse_conn *fc, struct fuse_req *req)
119{
120 spin_lock(&fuse_lock);
121 if (req->preallocated)
122 list_add(&req->list, &fc->unused_list);
123 else
124 fuse_request_free(req);
125
126 /* If we are in debt decrease that first */
127 if (fc->outstanding_debt)
128 fc->outstanding_debt--;
129 else
130 up(&fc->outstanding_sem);
131 spin_unlock(&fuse_lock);
132}
133
134void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req)
135{
136 if (atomic_dec_and_test(&req->count))
137 fuse_putback_request(fc, req);
138}
139
Miklos Szeredi1e9a4ed2005-09-09 13:10:31 -0700140void fuse_release_background(struct fuse_req *req)
141{
142 iput(req->inode);
143 iput(req->inode2);
144 if (req->file)
145 fput(req->file);
146 spin_lock(&fuse_lock);
147 list_del(&req->bg_entry);
148 spin_unlock(&fuse_lock);
149}
150
Miklos Szeredi3ec870d2006-01-06 00:19:41 -0800151static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
152{
153 int i;
154 struct fuse_init_out *arg = &req->misc.init_out;
155
Miklos Szeredib3bebd92006-01-16 22:14:27 -0800156 if (req->out.h.error || arg->major != FUSE_KERNEL_VERSION)
Miklos Szeredi3ec870d2006-01-06 00:19:41 -0800157 fc->conn_error = 1;
158 else {
159 fc->minor = arg->minor;
160 fc->max_write = arg->minor < 5 ? 4096 : arg->max_write;
161 }
162
163 /* After INIT reply is received other requests can go
164 out. So do (FUSE_MAX_OUTSTANDING - 1) number of
165 up()s on outstanding_sem. The last up() is done in
166 fuse_putback_request() */
167 for (i = 1; i < FUSE_MAX_OUTSTANDING; i++)
168 up(&fc->outstanding_sem);
169}
170
Miklos Szeredi334f4852005-09-09 13:10:27 -0700171/*
172 * This function is called when a request is finished. Either a reply
173 * has arrived or it was interrupted (and not yet sent) or some error
Miklos Szeredif43b1552006-01-16 22:14:26 -0800174 * occurred during communication with userspace, or the device file
175 * was closed. In case of a background request the reference to the
176 * stored objects are released. The requester thread is woken up (if
177 * still waiting), and finally the reference to the request is
178 * released
Miklos Szeredi334f4852005-09-09 13:10:27 -0700179 *
180 * Called with fuse_lock, unlocks it
181 */
182static void request_end(struct fuse_conn *fc, struct fuse_req *req)
183{
Miklos Szeredid77a1d52006-01-16 22:14:31 -0800184 list_del(&req->list);
Miklos Szeredi83cfd492006-01-16 22:14:31 -0800185 req->state = FUSE_REQ_FINISHED;
Miklos Szeredi334f4852005-09-09 13:10:27 -0700186 spin_unlock(&fuse_lock);
187 if (req->background) {
Miklos Szeredi1e9a4ed2005-09-09 13:10:31 -0700188 down_read(&fc->sbput_sem);
189 if (fc->mounted)
190 fuse_release_background(req);
191 up_read(&fc->sbput_sem);
Miklos Szeredi334f4852005-09-09 13:10:27 -0700192 }
193 wake_up(&req->waitq);
Miklos Szeredi3ec870d2006-01-06 00:19:41 -0800194 if (req->in.h.opcode == FUSE_INIT)
195 process_init_reply(fc, req);
196 else if (req->in.h.opcode == FUSE_RELEASE && req->inode == NULL) {
Miklos Szeredifd72faa2005-11-07 00:59:51 -0800197 /* Special case for failed iget in CREATE */
198 u64 nodeid = req->in.h.nodeid;
Miklos Szeredifd72faa2005-11-07 00:59:51 -0800199 fuse_reset_request(req);
200 fuse_send_forget(fc, req, nodeid, 1);
Miklos Szeredif43b1552006-01-16 22:14:26 -0800201 return;
Miklos Szeredi334f4852005-09-09 13:10:27 -0700202 }
Miklos Szeredif43b1552006-01-16 22:14:26 -0800203 fuse_put_request(fc, req);
Miklos Szeredi334f4852005-09-09 13:10:27 -0700204}
205
Miklos Szeredi1e9a4ed2005-09-09 13:10:31 -0700206/*
207 * Unfortunately request interruption not just solves the deadlock
208 * problem, it causes problems too. These stem from the fact, that an
209 * interrupted request is continued to be processed in userspace,
210 * while all the locks and object references (inode and file) held
211 * during the operation are released.
212 *
213 * To release the locks is exactly why there's a need to interrupt the
214 * request, so there's not a lot that can be done about this, except
215 * introduce additional locking in userspace.
216 *
217 * More important is to keep inode and file references until userspace
218 * has replied, otherwise FORGET and RELEASE could be sent while the
219 * inode/file is still used by the filesystem.
220 *
221 * For this reason the concept of "background" request is introduced.
222 * An interrupted request is backgrounded if it has been already sent
223 * to userspace. Backgrounding involves getting an extra reference to
224 * inode(s) or file used in the request, and adding the request to
225 * fc->background list. When a reply is received for a background
226 * request, the object references are released, and the request is
227 * removed from the list. If the filesystem is unmounted while there
228 * are still background requests, the list is walked and references
229 * are released as if a reply was received.
230 *
231 * There's one more use for a background request. The RELEASE message is
232 * always sent as background, since it doesn't return an error or
233 * data.
234 */
235static void background_request(struct fuse_conn *fc, struct fuse_req *req)
Miklos Szeredi334f4852005-09-09 13:10:27 -0700236{
Miklos Szeredi334f4852005-09-09 13:10:27 -0700237 req->background = 1;
Miklos Szeredi1e9a4ed2005-09-09 13:10:31 -0700238 list_add(&req->bg_entry, &fc->background);
Miklos Szeredi334f4852005-09-09 13:10:27 -0700239 if (req->inode)
240 req->inode = igrab(req->inode);
241 if (req->inode2)
242 req->inode2 = igrab(req->inode2);
243 if (req->file)
244 get_file(req->file);
245}
246
Miklos Szeredi334f4852005-09-09 13:10:27 -0700247/* Called with fuse_lock held. Releases, and then reacquires it. */
Miklos Szeredi7c352bd2005-09-09 13:10:39 -0700248static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req)
Miklos Szeredi334f4852005-09-09 13:10:27 -0700249{
Miklos Szeredi7c352bd2005-09-09 13:10:39 -0700250 sigset_t oldset;
Miklos Szeredi334f4852005-09-09 13:10:27 -0700251
252 spin_unlock(&fuse_lock);
Miklos Szeredi7c352bd2005-09-09 13:10:39 -0700253 block_sigs(&oldset);
Miklos Szeredi83cfd492006-01-16 22:14:31 -0800254 wait_event_interruptible(req->waitq, req->state == FUSE_REQ_FINISHED);
Miklos Szeredi7c352bd2005-09-09 13:10:39 -0700255 restore_sigs(&oldset);
Miklos Szeredi334f4852005-09-09 13:10:27 -0700256 spin_lock(&fuse_lock);
Miklos Szeredi83cfd492006-01-16 22:14:31 -0800257 if (req->state == FUSE_REQ_FINISHED)
Miklos Szeredi334f4852005-09-09 13:10:27 -0700258 return;
259
Miklos Szeredi7c352bd2005-09-09 13:10:39 -0700260 req->out.h.error = -EINTR;
Miklos Szeredi334f4852005-09-09 13:10:27 -0700261 req->interrupted = 1;
262 if (req->locked) {
263 /* This is uninterruptible sleep, because data is
264 being copied to/from the buffers of req. During
265 locked state, there mustn't be any filesystem
266 operation (e.g. page fault), since that could lead
267 to deadlock */
268 spin_unlock(&fuse_lock);
269 wait_event(req->waitq, !req->locked);
270 spin_lock(&fuse_lock);
271 }
Miklos Szeredi83cfd492006-01-16 22:14:31 -0800272 if (req->state == FUSE_REQ_PENDING) {
Miklos Szeredi334f4852005-09-09 13:10:27 -0700273 list_del(&req->list);
274 __fuse_put_request(req);
Miklos Szeredi83cfd492006-01-16 22:14:31 -0800275 } else if (req->state == FUSE_REQ_SENT)
Miklos Szeredi1e9a4ed2005-09-09 13:10:31 -0700276 background_request(fc, req);
Miklos Szeredi334f4852005-09-09 13:10:27 -0700277}
278
279static unsigned len_args(unsigned numargs, struct fuse_arg *args)
280{
281 unsigned nbytes = 0;
282 unsigned i;
283
284 for (i = 0; i < numargs; i++)
285 nbytes += args[i].size;
286
287 return nbytes;
288}
289
290static void queue_request(struct fuse_conn *fc, struct fuse_req *req)
291{
292 fc->reqctr++;
293 /* zero is special */
294 if (fc->reqctr == 0)
295 fc->reqctr = 1;
296 req->in.h.unique = fc->reqctr;
297 req->in.h.len = sizeof(struct fuse_in_header) +
298 len_args(req->in.numargs, (struct fuse_arg *) req->in.args);
299 if (!req->preallocated) {
300 /* If request is not preallocated (either FORGET or
301 RELEASE), then still decrease outstanding_sem, so
302 user can't open infinite number of files while not
303 processing the RELEASE requests. However for
304 efficiency do it without blocking, so if down()
305 would block, just increase the debt instead */
306 if (down_trylock(&fc->outstanding_sem))
307 fc->outstanding_debt++;
308 }
309 list_add_tail(&req->list, &fc->pending);
Miklos Szeredi83cfd492006-01-16 22:14:31 -0800310 req->state = FUSE_REQ_PENDING;
Miklos Szeredi334f4852005-09-09 13:10:27 -0700311 wake_up(&fc->waitq);
312}
313
Miklos Szeredi7c352bd2005-09-09 13:10:39 -0700314/*
315 * This can only be interrupted by a SIGKILL
316 */
317void request_send(struct fuse_conn *fc, struct fuse_req *req)
Miklos Szeredi334f4852005-09-09 13:10:27 -0700318{
319 req->isreply = 1;
320 spin_lock(&fuse_lock);
Miklos Szeredi1e9a4ed2005-09-09 13:10:31 -0700321 if (!fc->connected)
Miklos Szeredi334f4852005-09-09 13:10:27 -0700322 req->out.h.error = -ENOTCONN;
323 else if (fc->conn_error)
324 req->out.h.error = -ECONNREFUSED;
325 else {
326 queue_request(fc, req);
327 /* acquire extra reference, since request is still needed
328 after request_end() */
329 __fuse_get_request(req);
330
Miklos Szeredi7c352bd2005-09-09 13:10:39 -0700331 request_wait_answer(fc, req);
Miklos Szeredi334f4852005-09-09 13:10:27 -0700332 }
333 spin_unlock(&fuse_lock);
334}
335
Miklos Szeredi334f4852005-09-09 13:10:27 -0700336static void request_send_nowait(struct fuse_conn *fc, struct fuse_req *req)
337{
338 spin_lock(&fuse_lock);
Miklos Szeredi1e9a4ed2005-09-09 13:10:31 -0700339 if (fc->connected) {
Miklos Szeredi334f4852005-09-09 13:10:27 -0700340 queue_request(fc, req);
341 spin_unlock(&fuse_lock);
342 } else {
343 req->out.h.error = -ENOTCONN;
344 request_end(fc, req);
345 }
346}
347
348void request_send_noreply(struct fuse_conn *fc, struct fuse_req *req)
349{
350 req->isreply = 0;
351 request_send_nowait(fc, req);
352}
353
354void request_send_background(struct fuse_conn *fc, struct fuse_req *req)
355{
356 req->isreply = 1;
Miklos Szeredi1e9a4ed2005-09-09 13:10:31 -0700357 spin_lock(&fuse_lock);
358 background_request(fc, req);
359 spin_unlock(&fuse_lock);
Miklos Szeredi334f4852005-09-09 13:10:27 -0700360 request_send_nowait(fc, req);
361}
362
363void fuse_send_init(struct fuse_conn *fc)
364{
365 /* This is called from fuse_read_super() so there's guaranteed
Miklos Szeredi6383bda2006-01-16 22:14:29 -0800366 to be exactly one request available */
367 struct fuse_req *req = fuse_get_request(fc);
Miklos Szeredi3ec870d2006-01-06 00:19:41 -0800368 struct fuse_init_in *arg = &req->misc.init_in;
Miklos Szeredi334f4852005-09-09 13:10:27 -0700369 arg->major = FUSE_KERNEL_VERSION;
370 arg->minor = FUSE_KERNEL_MINOR_VERSION;
371 req->in.h.opcode = FUSE_INIT;
372 req->in.numargs = 1;
373 req->in.args[0].size = sizeof(*arg);
374 req->in.args[0].value = arg;
375 req->out.numargs = 1;
Miklos Szeredi3ec870d2006-01-06 00:19:41 -0800376 /* Variable length arguement used for backward compatibility
377 with interface version < 7.5. Rest of init_out is zeroed
378 by do_get_request(), so a short reply is not a problem */
379 req->out.argvar = 1;
380 req->out.args[0].size = sizeof(struct fuse_init_out);
381 req->out.args[0].value = &req->misc.init_out;
Miklos Szeredi334f4852005-09-09 13:10:27 -0700382 request_send_background(fc, req);
383}
384
385/*
386 * Lock the request. Up to the next unlock_request() there mustn't be
387 * anything that could cause a page-fault. If the request was already
388 * interrupted bail out.
389 */
Miklos Szeredi8bfc0162006-01-16 22:14:28 -0800390static int lock_request(struct fuse_req *req)
Miklos Szeredi334f4852005-09-09 13:10:27 -0700391{
392 int err = 0;
393 if (req) {
394 spin_lock(&fuse_lock);
395 if (req->interrupted)
396 err = -ENOENT;
397 else
398 req->locked = 1;
399 spin_unlock(&fuse_lock);
400 }
401 return err;
402}
403
404/*
405 * Unlock request. If it was interrupted during being locked, the
406 * requester thread is currently waiting for it to be unlocked, so
407 * wake it up.
408 */
Miklos Szeredi8bfc0162006-01-16 22:14:28 -0800409static void unlock_request(struct fuse_req *req)
Miklos Szeredi334f4852005-09-09 13:10:27 -0700410{
411 if (req) {
412 spin_lock(&fuse_lock);
413 req->locked = 0;
414 if (req->interrupted)
415 wake_up(&req->waitq);
416 spin_unlock(&fuse_lock);
417 }
418}
419
420struct fuse_copy_state {
421 int write;
422 struct fuse_req *req;
423 const struct iovec *iov;
424 unsigned long nr_segs;
425 unsigned long seglen;
426 unsigned long addr;
427 struct page *pg;
428 void *mapaddr;
429 void *buf;
430 unsigned len;
431};
432
433static void fuse_copy_init(struct fuse_copy_state *cs, int write,
434 struct fuse_req *req, const struct iovec *iov,
435 unsigned long nr_segs)
436{
437 memset(cs, 0, sizeof(*cs));
438 cs->write = write;
439 cs->req = req;
440 cs->iov = iov;
441 cs->nr_segs = nr_segs;
442}
443
444/* Unmap and put previous page of userspace buffer */
Miklos Szeredi8bfc0162006-01-16 22:14:28 -0800445static void fuse_copy_finish(struct fuse_copy_state *cs)
Miklos Szeredi334f4852005-09-09 13:10:27 -0700446{
447 if (cs->mapaddr) {
448 kunmap_atomic(cs->mapaddr, KM_USER0);
449 if (cs->write) {
450 flush_dcache_page(cs->pg);
451 set_page_dirty_lock(cs->pg);
452 }
453 put_page(cs->pg);
454 cs->mapaddr = NULL;
455 }
456}
457
458/*
459 * Get another pagefull of userspace buffer, and map it to kernel
460 * address space, and lock request
461 */
462static int fuse_copy_fill(struct fuse_copy_state *cs)
463{
464 unsigned long offset;
465 int err;
466
467 unlock_request(cs->req);
468 fuse_copy_finish(cs);
469 if (!cs->seglen) {
470 BUG_ON(!cs->nr_segs);
471 cs->seglen = cs->iov[0].iov_len;
472 cs->addr = (unsigned long) cs->iov[0].iov_base;
473 cs->iov ++;
474 cs->nr_segs --;
475 }
476 down_read(&current->mm->mmap_sem);
477 err = get_user_pages(current, current->mm, cs->addr, 1, cs->write, 0,
478 &cs->pg, NULL);
479 up_read(&current->mm->mmap_sem);
480 if (err < 0)
481 return err;
482 BUG_ON(err != 1);
483 offset = cs->addr % PAGE_SIZE;
484 cs->mapaddr = kmap_atomic(cs->pg, KM_USER0);
485 cs->buf = cs->mapaddr + offset;
486 cs->len = min(PAGE_SIZE - offset, cs->seglen);
487 cs->seglen -= cs->len;
488 cs->addr += cs->len;
489
490 return lock_request(cs->req);
491}
492
493/* Do as much copy to/from userspace buffer as we can */
Miklos Szeredi8bfc0162006-01-16 22:14:28 -0800494static int fuse_copy_do(struct fuse_copy_state *cs, void **val, unsigned *size)
Miklos Szeredi334f4852005-09-09 13:10:27 -0700495{
496 unsigned ncpy = min(*size, cs->len);
497 if (val) {
498 if (cs->write)
499 memcpy(cs->buf, *val, ncpy);
500 else
501 memcpy(*val, cs->buf, ncpy);
502 *val += ncpy;
503 }
504 *size -= ncpy;
505 cs->len -= ncpy;
506 cs->buf += ncpy;
507 return ncpy;
508}
509
510/*
511 * Copy a page in the request to/from the userspace buffer. Must be
512 * done atomically
513 */
Miklos Szeredi8bfc0162006-01-16 22:14:28 -0800514static int fuse_copy_page(struct fuse_copy_state *cs, struct page *page,
515 unsigned offset, unsigned count, int zeroing)
Miklos Szeredi334f4852005-09-09 13:10:27 -0700516{
517 if (page && zeroing && count < PAGE_SIZE) {
518 void *mapaddr = kmap_atomic(page, KM_USER1);
519 memset(mapaddr, 0, PAGE_SIZE);
520 kunmap_atomic(mapaddr, KM_USER1);
521 }
522 while (count) {
523 int err;
524 if (!cs->len && (err = fuse_copy_fill(cs)))
525 return err;
526 if (page) {
527 void *mapaddr = kmap_atomic(page, KM_USER1);
528 void *buf = mapaddr + offset;
529 offset += fuse_copy_do(cs, &buf, &count);
530 kunmap_atomic(mapaddr, KM_USER1);
531 } else
532 offset += fuse_copy_do(cs, NULL, &count);
533 }
534 if (page && !cs->write)
535 flush_dcache_page(page);
536 return 0;
537}
538
539/* Copy pages in the request to/from userspace buffer */
540static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes,
541 int zeroing)
542{
543 unsigned i;
544 struct fuse_req *req = cs->req;
545 unsigned offset = req->page_offset;
546 unsigned count = min(nbytes, (unsigned) PAGE_SIZE - offset);
547
548 for (i = 0; i < req->num_pages && (nbytes || zeroing); i++) {
549 struct page *page = req->pages[i];
550 int err = fuse_copy_page(cs, page, offset, count, zeroing);
551 if (err)
552 return err;
553
554 nbytes -= count;
555 count = min(nbytes, (unsigned) PAGE_SIZE);
556 offset = 0;
557 }
558 return 0;
559}
560
561/* Copy a single argument in the request to/from userspace buffer */
562static int fuse_copy_one(struct fuse_copy_state *cs, void *val, unsigned size)
563{
564 while (size) {
565 int err;
566 if (!cs->len && (err = fuse_copy_fill(cs)))
567 return err;
568 fuse_copy_do(cs, &val, &size);
569 }
570 return 0;
571}
572
573/* Copy request arguments to/from userspace buffer */
574static int fuse_copy_args(struct fuse_copy_state *cs, unsigned numargs,
575 unsigned argpages, struct fuse_arg *args,
576 int zeroing)
577{
578 int err = 0;
579 unsigned i;
580
581 for (i = 0; !err && i < numargs; i++) {
582 struct fuse_arg *arg = &args[i];
583 if (i == numargs - 1 && argpages)
584 err = fuse_copy_pages(cs, arg->size, zeroing);
585 else
586 err = fuse_copy_one(cs, arg->value, arg->size);
587 }
588 return err;
589}
590
591/* Wait until a request is available on the pending list */
592static void request_wait(struct fuse_conn *fc)
593{
594 DECLARE_WAITQUEUE(wait, current);
595
596 add_wait_queue_exclusive(&fc->waitq, &wait);
Miklos Szeredi9ba7cbb2006-01-16 22:14:34 -0800597 while (fc->connected && list_empty(&fc->pending)) {
Miklos Szeredi334f4852005-09-09 13:10:27 -0700598 set_current_state(TASK_INTERRUPTIBLE);
599 if (signal_pending(current))
600 break;
601
602 spin_unlock(&fuse_lock);
603 schedule();
604 spin_lock(&fuse_lock);
605 }
606 set_current_state(TASK_RUNNING);
607 remove_wait_queue(&fc->waitq, &wait);
608}
609
610/*
611 * Read a single request into the userspace filesystem's buffer. This
612 * function waits until a request is available, then removes it from
613 * the pending list and copies request data to userspace buffer. If
614 * no reply is needed (FORGET) or request has been interrupted or
615 * there was an error during the copying then it's finished by calling
616 * request_end(). Otherwise add it to the processing list, and set
617 * the 'sent' flag.
618 */
619static ssize_t fuse_dev_readv(struct file *file, const struct iovec *iov,
620 unsigned long nr_segs, loff_t *off)
621{
622 int err;
623 struct fuse_conn *fc;
624 struct fuse_req *req;
625 struct fuse_in *in;
626 struct fuse_copy_state cs;
627 unsigned reqsize;
628
Miklos Szeredi1d3d7522006-01-06 00:19:40 -0800629 restart:
Miklos Szeredi334f4852005-09-09 13:10:27 -0700630 spin_lock(&fuse_lock);
631 fc = file->private_data;
632 err = -EPERM;
633 if (!fc)
634 goto err_unlock;
635 request_wait(fc);
636 err = -ENODEV;
Miklos Szeredi9ba7cbb2006-01-16 22:14:34 -0800637 if (!fc->connected)
Miklos Szeredi334f4852005-09-09 13:10:27 -0700638 goto err_unlock;
639 err = -ERESTARTSYS;
640 if (list_empty(&fc->pending))
641 goto err_unlock;
642
643 req = list_entry(fc->pending.next, struct fuse_req, list);
Miklos Szeredi83cfd492006-01-16 22:14:31 -0800644 req->state = FUSE_REQ_READING;
Miklos Szeredid77a1d52006-01-16 22:14:31 -0800645 list_move(&req->list, &fc->io);
Miklos Szeredi334f4852005-09-09 13:10:27 -0700646
647 in = &req->in;
Miklos Szeredi1d3d7522006-01-06 00:19:40 -0800648 reqsize = in->h.len;
649 /* If request is too large, reply with an error and restart the read */
650 if (iov_length(iov, nr_segs) < reqsize) {
651 req->out.h.error = -EIO;
652 /* SETXATTR is special, since it may contain too large data */
653 if (in->h.opcode == FUSE_SETXATTR)
654 req->out.h.error = -E2BIG;
655 request_end(fc, req);
656 goto restart;
Miklos Szeredi334f4852005-09-09 13:10:27 -0700657 }
Miklos Szeredi1d3d7522006-01-06 00:19:40 -0800658 spin_unlock(&fuse_lock);
659 fuse_copy_init(&cs, 1, req, iov, nr_segs);
660 err = fuse_copy_one(&cs, &in->h, sizeof(in->h));
661 if (!err)
662 err = fuse_copy_args(&cs, in->numargs, in->argpages,
663 (struct fuse_arg *) in->args, 0);
Miklos Szeredi334f4852005-09-09 13:10:27 -0700664 fuse_copy_finish(&cs);
Miklos Szeredi334f4852005-09-09 13:10:27 -0700665 spin_lock(&fuse_lock);
666 req->locked = 0;
667 if (!err && req->interrupted)
668 err = -ENOENT;
669 if (err) {
670 if (!req->interrupted)
671 req->out.h.error = -EIO;
672 request_end(fc, req);
673 return err;
674 }
675 if (!req->isreply)
676 request_end(fc, req);
677 else {
Miklos Szeredi83cfd492006-01-16 22:14:31 -0800678 req->state = FUSE_REQ_SENT;
Miklos Szeredid77a1d52006-01-16 22:14:31 -0800679 list_move_tail(&req->list, &fc->processing);
Miklos Szeredi334f4852005-09-09 13:10:27 -0700680 spin_unlock(&fuse_lock);
681 }
682 return reqsize;
683
684 err_unlock:
685 spin_unlock(&fuse_lock);
686 return err;
687}
688
689static ssize_t fuse_dev_read(struct file *file, char __user *buf,
690 size_t nbytes, loff_t *off)
691{
692 struct iovec iov;
693 iov.iov_len = nbytes;
694 iov.iov_base = buf;
695 return fuse_dev_readv(file, &iov, 1, off);
696}
697
698/* Look up request on processing list by unique ID */
699static struct fuse_req *request_find(struct fuse_conn *fc, u64 unique)
700{
701 struct list_head *entry;
702
703 list_for_each(entry, &fc->processing) {
704 struct fuse_req *req;
705 req = list_entry(entry, struct fuse_req, list);
706 if (req->in.h.unique == unique)
707 return req;
708 }
709 return NULL;
710}
711
712static int copy_out_args(struct fuse_copy_state *cs, struct fuse_out *out,
713 unsigned nbytes)
714{
715 unsigned reqsize = sizeof(struct fuse_out_header);
716
717 if (out->h.error)
718 return nbytes != reqsize ? -EINVAL : 0;
719
720 reqsize += len_args(out->numargs, out->args);
721
722 if (reqsize < nbytes || (reqsize > nbytes && !out->argvar))
723 return -EINVAL;
724 else if (reqsize > nbytes) {
725 struct fuse_arg *lastarg = &out->args[out->numargs-1];
726 unsigned diffsize = reqsize - nbytes;
727 if (diffsize > lastarg->size)
728 return -EINVAL;
729 lastarg->size -= diffsize;
730 }
731 return fuse_copy_args(cs, out->numargs, out->argpages, out->args,
732 out->page_zeroing);
733}
734
735/*
736 * Write a single reply to a request. First the header is copied from
737 * the write buffer. The request is then searched on the processing
738 * list by the unique ID found in the header. If found, then remove
739 * it from the list and copy the rest of the buffer to the request.
740 * The request is finished by calling request_end()
741 */
742static ssize_t fuse_dev_writev(struct file *file, const struct iovec *iov,
743 unsigned long nr_segs, loff_t *off)
744{
745 int err;
746 unsigned nbytes = iov_length(iov, nr_segs);
747 struct fuse_req *req;
748 struct fuse_out_header oh;
749 struct fuse_copy_state cs;
750 struct fuse_conn *fc = fuse_get_conn(file);
751 if (!fc)
752 return -ENODEV;
753
754 fuse_copy_init(&cs, 0, NULL, iov, nr_segs);
755 if (nbytes < sizeof(struct fuse_out_header))
756 return -EINVAL;
757
758 err = fuse_copy_one(&cs, &oh, sizeof(oh));
759 if (err)
760 goto err_finish;
761 err = -EINVAL;
762 if (!oh.unique || oh.error <= -1000 || oh.error > 0 ||
763 oh.len != nbytes)
764 goto err_finish;
765
766 spin_lock(&fuse_lock);
767 req = request_find(fc, oh.unique);
768 err = -EINVAL;
769 if (!req)
770 goto err_unlock;
771
Miklos Szeredi334f4852005-09-09 13:10:27 -0700772 if (req->interrupted) {
Miklos Szeredi222f1d62006-01-16 22:14:25 -0800773 spin_unlock(&fuse_lock);
Miklos Szeredi334f4852005-09-09 13:10:27 -0700774 fuse_copy_finish(&cs);
Miklos Szeredi222f1d62006-01-16 22:14:25 -0800775 spin_lock(&fuse_lock);
776 request_end(fc, req);
Miklos Szeredi334f4852005-09-09 13:10:27 -0700777 return -ENOENT;
778 }
Miklos Szeredid77a1d52006-01-16 22:14:31 -0800779 list_move(&req->list, &fc->io);
Miklos Szeredi334f4852005-09-09 13:10:27 -0700780 req->out.h = oh;
781 req->locked = 1;
782 cs.req = req;
783 spin_unlock(&fuse_lock);
784
785 err = copy_out_args(&cs, &req->out, nbytes);
786 fuse_copy_finish(&cs);
787
788 spin_lock(&fuse_lock);
789 req->locked = 0;
790 if (!err) {
791 if (req->interrupted)
792 err = -ENOENT;
Miklos Szeredi334f4852005-09-09 13:10:27 -0700793 } else if (!req->interrupted)
794 req->out.h.error = -EIO;
795 request_end(fc, req);
796
797 return err ? err : nbytes;
798
799 err_unlock:
800 spin_unlock(&fuse_lock);
801 err_finish:
802 fuse_copy_finish(&cs);
803 return err;
804}
805
806static ssize_t fuse_dev_write(struct file *file, const char __user *buf,
807 size_t nbytes, loff_t *off)
808{
809 struct iovec iov;
810 iov.iov_len = nbytes;
811 iov.iov_base = (char __user *) buf;
812 return fuse_dev_writev(file, &iov, 1, off);
813}
814
815static unsigned fuse_dev_poll(struct file *file, poll_table *wait)
816{
817 struct fuse_conn *fc = fuse_get_conn(file);
818 unsigned mask = POLLOUT | POLLWRNORM;
819
820 if (!fc)
821 return -ENODEV;
822
823 poll_wait(file, &fc->waitq, wait);
824
825 spin_lock(&fuse_lock);
826 if (!list_empty(&fc->pending))
827 mask |= POLLIN | POLLRDNORM;
828 spin_unlock(&fuse_lock);
829
830 return mask;
831}
832
833/* Abort all requests on the given list (pending or processing) */
834static void end_requests(struct fuse_conn *fc, struct list_head *head)
835{
836 while (!list_empty(head)) {
837 struct fuse_req *req;
838 req = list_entry(head->next, struct fuse_req, list);
Miklos Szeredi334f4852005-09-09 13:10:27 -0700839 req->out.h.error = -ECONNABORTED;
840 request_end(fc, req);
841 spin_lock(&fuse_lock);
842 }
843}
844
845static int fuse_dev_release(struct inode *inode, struct file *file)
846{
847 struct fuse_conn *fc;
848
849 spin_lock(&fuse_lock);
850 fc = file->private_data;
851 if (fc) {
Miklos Szeredi1e9a4ed2005-09-09 13:10:31 -0700852 fc->connected = 0;
Miklos Szeredi334f4852005-09-09 13:10:27 -0700853 end_requests(fc, &fc->pending);
854 end_requests(fc, &fc->processing);
Miklos Szeredi334f4852005-09-09 13:10:27 -0700855 }
856 spin_unlock(&fuse_lock);
Miklos Szeredif543f252006-01-16 22:14:35 -0800857 if (fc)
858 kobject_put(&fc->kobj);
859
Miklos Szeredi334f4852005-09-09 13:10:27 -0700860 return 0;
861}
862
863struct file_operations fuse_dev_operations = {
864 .owner = THIS_MODULE,
865 .llseek = no_llseek,
866 .read = fuse_dev_read,
867 .readv = fuse_dev_readv,
868 .write = fuse_dev_write,
869 .writev = fuse_dev_writev,
870 .poll = fuse_dev_poll,
871 .release = fuse_dev_release,
872};
873
874static struct miscdevice fuse_miscdevice = {
875 .minor = FUSE_MINOR,
876 .name = "fuse",
877 .fops = &fuse_dev_operations,
878};
879
880int __init fuse_dev_init(void)
881{
882 int err = -ENOMEM;
883 fuse_req_cachep = kmem_cache_create("fuse_request",
884 sizeof(struct fuse_req),
885 0, 0, NULL, NULL);
886 if (!fuse_req_cachep)
887 goto out;
888
889 err = misc_register(&fuse_miscdevice);
890 if (err)
891 goto out_cache_clean;
892
893 return 0;
894
895 out_cache_clean:
896 kmem_cache_destroy(fuse_req_cachep);
897 out:
898 return err;
899}
900
901void fuse_dev_cleanup(void)
902{
903 misc_deregister(&fuse_miscdevice);
904 kmem_cache_destroy(fuse_req_cachep);
905}