blob: 036536f072c98b64218e2a9b4115b8a3ac4d4c7b [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * linux/fs/pipe.c
3 *
4 * Copyright (C) 1991, 1992, 1999 Linus Torvalds
5 */
6
7#include <linux/mm.h>
8#include <linux/file.h>
9#include <linux/poll.h>
10#include <linux/slab.h>
11#include <linux/module.h>
12#include <linux/init.h>
13#include <linux/fs.h>
14#include <linux/mount.h>
15#include <linux/pipe_fs_i.h>
16#include <linux/uio.h>
17#include <linux/highmem.h>
Jens Axboe5274f052006-03-30 15:15:30 +020018#include <linux/pagemap.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070019
20#include <asm/uaccess.h>
21#include <asm/ioctls.h>
22
23/*
24 * We use a start+len construction, which provides full use of the
25 * allocated memory.
26 * -- Florian Coosmann (FGC)
27 *
28 * Reads with count = 0 should always return 0.
29 * -- Julian Bradfield 1999-06-07.
30 *
31 * FIFOs and Pipes now generate SIGIO for both readers and writers.
32 * -- Jeremy Elson <jelson@circlemud.org> 2001-08-16
33 *
34 * pipe_read & write cleanup
35 * -- Manfred Spraul <manfred@colorfullife.com> 2002-05-09
36 */
37
38/* Drop the inode semaphore and wait for a pipe event, atomically */
Ingo Molnar3a326a22006-04-10 15:18:35 +020039void pipe_wait(struct pipe_inode_info *pipe)
Linus Torvalds1da177e2005-04-16 15:20:36 -070040{
41 DEFINE_WAIT(wait);
42
Ingo Molnard79fc0f2005-09-10 00:26:12 -070043 /*
44 * Pipes are system-local resources, so sleeping on them
45 * is considered a noninteractive wait:
46 */
Ingo Molnar3a326a22006-04-10 15:18:35 +020047 prepare_to_wait(&pipe->wait, &wait, TASK_INTERRUPTIBLE|TASK_NONINTERACTIVE);
48 if (pipe->inode)
49 mutex_unlock(&pipe->inode->i_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -070050 schedule();
Ingo Molnar3a326a22006-04-10 15:18:35 +020051 finish_wait(&pipe->wait, &wait);
52 if (pipe->inode)
53 mutex_lock(&pipe->inode->i_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -070054}
55
Arjan van de Ven858119e2006-01-14 13:20:43 -080056static int
Linus Torvalds1da177e2005-04-16 15:20:36 -070057pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len)
58{
59 unsigned long copy;
60
61 while (len > 0) {
62 while (!iov->iov_len)
63 iov++;
64 copy = min_t(unsigned long, len, iov->iov_len);
65
66 if (copy_from_user(to, iov->iov_base, copy))
67 return -EFAULT;
68 to += copy;
69 len -= copy;
70 iov->iov_base += copy;
71 iov->iov_len -= copy;
72 }
73 return 0;
74}
75
Arjan van de Ven858119e2006-01-14 13:20:43 -080076static int
Linus Torvalds1da177e2005-04-16 15:20:36 -070077pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len)
78{
79 unsigned long copy;
80
81 while (len > 0) {
82 while (!iov->iov_len)
83 iov++;
84 copy = min_t(unsigned long, len, iov->iov_len);
85
86 if (copy_to_user(iov->iov_base, from, copy))
87 return -EFAULT;
88 from += copy;
89 len -= copy;
90 iov->iov_base += copy;
91 iov->iov_len -= copy;
92 }
93 return 0;
94}
95
96static void anon_pipe_buf_release(struct pipe_inode_info *info, struct pipe_buffer *buf)
97{
98 struct page *page = buf->page;
99
Jens Axboe3e7ee3e2006-04-02 23:11:04 +0200100 buf->flags &= ~PIPE_BUF_FLAG_STOLEN;
101
Jens Axboe5274f052006-03-30 15:15:30 +0200102 /*
103 * If nobody else uses this page, and we don't already have a
104 * temporary page, let's keep track of it as a one-deep
105 * allocation cache
106 */
107 if (page_count(page) == 1 && !info->tmp_page) {
108 info->tmp_page = page;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700109 return;
110 }
Jens Axboe5274f052006-03-30 15:15:30 +0200111
112 /*
113 * Otherwise just release our reference to it
114 */
115 page_cache_release(page);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700116}
117
118static void *anon_pipe_buf_map(struct file *file, struct pipe_inode_info *info, struct pipe_buffer *buf)
119{
120 return kmap(buf->page);
121}
122
123static void anon_pipe_buf_unmap(struct pipe_inode_info *info, struct pipe_buffer *buf)
124{
125 kunmap(buf->page);
126}
127
Jens Axboe5abc97a2006-03-30 15:16:46 +0200128static int anon_pipe_buf_steal(struct pipe_inode_info *info,
129 struct pipe_buffer *buf)
130{
Jens Axboe3e7ee3e2006-04-02 23:11:04 +0200131 buf->flags |= PIPE_BUF_FLAG_STOLEN;
Jens Axboe5abc97a2006-03-30 15:16:46 +0200132 return 0;
133}
134
Linus Torvalds1da177e2005-04-16 15:20:36 -0700135static struct pipe_buf_operations anon_pipe_buf_ops = {
136 .can_merge = 1,
137 .map = anon_pipe_buf_map,
138 .unmap = anon_pipe_buf_unmap,
139 .release = anon_pipe_buf_release,
Jens Axboe5abc97a2006-03-30 15:16:46 +0200140 .steal = anon_pipe_buf_steal,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700141};
142
143static ssize_t
144pipe_readv(struct file *filp, const struct iovec *_iov,
145 unsigned long nr_segs, loff_t *ppos)
146{
147 struct inode *inode = filp->f_dentry->d_inode;
148 struct pipe_inode_info *info;
149 int do_wakeup;
150 ssize_t ret;
151 struct iovec *iov = (struct iovec *)_iov;
152 size_t total_len;
153
154 total_len = iov_length(iov, nr_segs);
155 /* Null read succeeds. */
156 if (unlikely(total_len == 0))
157 return 0;
158
159 do_wakeup = 0;
160 ret = 0;
Jes Sorensen1b1dcc12006-01-09 15:59:24 -0800161 mutex_lock(PIPE_MUTEX(*inode));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700162 info = inode->i_pipe;
163 for (;;) {
164 int bufs = info->nrbufs;
165 if (bufs) {
166 int curbuf = info->curbuf;
167 struct pipe_buffer *buf = info->bufs + curbuf;
168 struct pipe_buf_operations *ops = buf->ops;
169 void *addr;
170 size_t chars = buf->len;
171 int error;
172
173 if (chars > total_len)
174 chars = total_len;
175
176 addr = ops->map(filp, info, buf);
Jens Axboe5274f052006-03-30 15:15:30 +0200177 if (IS_ERR(addr)) {
178 if (!ret)
179 ret = PTR_ERR(addr);
180 break;
181 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700182 error = pipe_iov_copy_to_user(iov, addr + buf->offset, chars);
183 ops->unmap(info, buf);
184 if (unlikely(error)) {
185 if (!ret) ret = -EFAULT;
186 break;
187 }
188 ret += chars;
189 buf->offset += chars;
190 buf->len -= chars;
191 if (!buf->len) {
192 buf->ops = NULL;
193 ops->release(info, buf);
194 curbuf = (curbuf + 1) & (PIPE_BUFFERS-1);
195 info->curbuf = curbuf;
196 info->nrbufs = --bufs;
197 do_wakeup = 1;
198 }
199 total_len -= chars;
200 if (!total_len)
201 break; /* common path: read succeeded */
202 }
203 if (bufs) /* More to do? */
204 continue;
205 if (!PIPE_WRITERS(*inode))
206 break;
207 if (!PIPE_WAITING_WRITERS(*inode)) {
208 /* syscall merging: Usually we must not sleep
209 * if O_NONBLOCK is set, or if we got some data.
210 * But if a writer sleeps in kernel space, then
211 * we can wait for that data without violating POSIX.
212 */
213 if (ret)
214 break;
215 if (filp->f_flags & O_NONBLOCK) {
216 ret = -EAGAIN;
217 break;
218 }
219 }
220 if (signal_pending(current)) {
221 if (!ret) ret = -ERESTARTSYS;
222 break;
223 }
224 if (do_wakeup) {
225 wake_up_interruptible_sync(PIPE_WAIT(*inode));
226 kill_fasync(PIPE_FASYNC_WRITERS(*inode), SIGIO, POLL_OUT);
227 }
Ingo Molnar3a326a22006-04-10 15:18:35 +0200228 pipe_wait(inode->i_pipe);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700229 }
Jes Sorensen1b1dcc12006-01-09 15:59:24 -0800230 mutex_unlock(PIPE_MUTEX(*inode));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700231 /* Signal writers asynchronously that there is more room. */
232 if (do_wakeup) {
233 wake_up_interruptible(PIPE_WAIT(*inode));
234 kill_fasync(PIPE_FASYNC_WRITERS(*inode), SIGIO, POLL_OUT);
235 }
236 if (ret > 0)
237 file_accessed(filp);
238 return ret;
239}
240
241static ssize_t
242pipe_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
243{
244 struct iovec iov = { .iov_base = buf, .iov_len = count };
245 return pipe_readv(filp, &iov, 1, ppos);
246}
247
248static ssize_t
249pipe_writev(struct file *filp, const struct iovec *_iov,
250 unsigned long nr_segs, loff_t *ppos)
251{
252 struct inode *inode = filp->f_dentry->d_inode;
253 struct pipe_inode_info *info;
254 ssize_t ret;
255 int do_wakeup;
256 struct iovec *iov = (struct iovec *)_iov;
257 size_t total_len;
258 ssize_t chars;
259
260 total_len = iov_length(iov, nr_segs);
261 /* Null write succeeds. */
262 if (unlikely(total_len == 0))
263 return 0;
264
265 do_wakeup = 0;
266 ret = 0;
Jes Sorensen1b1dcc12006-01-09 15:59:24 -0800267 mutex_lock(PIPE_MUTEX(*inode));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700268 info = inode->i_pipe;
269
270 if (!PIPE_READERS(*inode)) {
271 send_sig(SIGPIPE, current, 0);
272 ret = -EPIPE;
273 goto out;
274 }
275
276 /* We try to merge small writes */
277 chars = total_len & (PAGE_SIZE-1); /* size of the last buffer */
278 if (info->nrbufs && chars != 0) {
279 int lastbuf = (info->curbuf + info->nrbufs - 1) & (PIPE_BUFFERS-1);
280 struct pipe_buffer *buf = info->bufs + lastbuf;
281 struct pipe_buf_operations *ops = buf->ops;
282 int offset = buf->offset + buf->len;
283 if (ops->can_merge && offset + chars <= PAGE_SIZE) {
Jens Axboe5274f052006-03-30 15:15:30 +0200284 void *addr;
285 int error;
286
287 addr = ops->map(filp, info, buf);
288 if (IS_ERR(addr)) {
289 error = PTR_ERR(addr);
290 goto out;
291 }
292 error = pipe_iov_copy_from_user(offset + addr, iov,
293 chars);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700294 ops->unmap(info, buf);
295 ret = error;
296 do_wakeup = 1;
297 if (error)
298 goto out;
299 buf->len += chars;
300 total_len -= chars;
301 ret = chars;
302 if (!total_len)
303 goto out;
304 }
305 }
306
307 for (;;) {
308 int bufs;
309 if (!PIPE_READERS(*inode)) {
310 send_sig(SIGPIPE, current, 0);
311 if (!ret) ret = -EPIPE;
312 break;
313 }
314 bufs = info->nrbufs;
315 if (bufs < PIPE_BUFFERS) {
316 int newbuf = (info->curbuf + bufs) & (PIPE_BUFFERS-1);
317 struct pipe_buffer *buf = info->bufs + newbuf;
318 struct page *page = info->tmp_page;
319 int error;
320
321 if (!page) {
322 page = alloc_page(GFP_HIGHUSER);
323 if (unlikely(!page)) {
324 ret = ret ? : -ENOMEM;
325 break;
326 }
327 info->tmp_page = page;
328 }
329 /* Always wakeup, even if the copy fails. Otherwise
330 * we lock up (O_NONBLOCK-)readers that sleep due to
331 * syscall merging.
332 * FIXME! Is this really true?
333 */
334 do_wakeup = 1;
335 chars = PAGE_SIZE;
336 if (chars > total_len)
337 chars = total_len;
338
339 error = pipe_iov_copy_from_user(kmap(page), iov, chars);
340 kunmap(page);
341 if (unlikely(error)) {
342 if (!ret) ret = -EFAULT;
343 break;
344 }
345 ret += chars;
346
347 /* Insert it into the buffer array */
348 buf->page = page;
349 buf->ops = &anon_pipe_buf_ops;
350 buf->offset = 0;
351 buf->len = chars;
352 info->nrbufs = ++bufs;
353 info->tmp_page = NULL;
354
355 total_len -= chars;
356 if (!total_len)
357 break;
358 }
359 if (bufs < PIPE_BUFFERS)
360 continue;
361 if (filp->f_flags & O_NONBLOCK) {
362 if (!ret) ret = -EAGAIN;
363 break;
364 }
365 if (signal_pending(current)) {
366 if (!ret) ret = -ERESTARTSYS;
367 break;
368 }
369 if (do_wakeup) {
370 wake_up_interruptible_sync(PIPE_WAIT(*inode));
371 kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, POLL_IN);
372 do_wakeup = 0;
373 }
374 PIPE_WAITING_WRITERS(*inode)++;
Ingo Molnar3a326a22006-04-10 15:18:35 +0200375 pipe_wait(inode->i_pipe);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700376 PIPE_WAITING_WRITERS(*inode)--;
377 }
378out:
Jes Sorensen1b1dcc12006-01-09 15:59:24 -0800379 mutex_unlock(PIPE_MUTEX(*inode));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700380 if (do_wakeup) {
381 wake_up_interruptible(PIPE_WAIT(*inode));
382 kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, POLL_IN);
383 }
384 if (ret > 0)
Christoph Hellwig870f4812006-01-09 20:52:01 -0800385 file_update_time(filp);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700386 return ret;
387}
388
389static ssize_t
390pipe_write(struct file *filp, const char __user *buf,
391 size_t count, loff_t *ppos)
392{
393 struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count };
394 return pipe_writev(filp, &iov, 1, ppos);
395}
396
397static ssize_t
398bad_pipe_r(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
399{
400 return -EBADF;
401}
402
403static ssize_t
404bad_pipe_w(struct file *filp, const char __user *buf, size_t count, loff_t *ppos)
405{
406 return -EBADF;
407}
408
409static int
410pipe_ioctl(struct inode *pino, struct file *filp,
411 unsigned int cmd, unsigned long arg)
412{
413 struct inode *inode = filp->f_dentry->d_inode;
414 struct pipe_inode_info *info;
415 int count, buf, nrbufs;
416
417 switch (cmd) {
418 case FIONREAD:
Jes Sorensen1b1dcc12006-01-09 15:59:24 -0800419 mutex_lock(PIPE_MUTEX(*inode));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700420 info = inode->i_pipe;
421 count = 0;
422 buf = info->curbuf;
423 nrbufs = info->nrbufs;
424 while (--nrbufs >= 0) {
425 count += info->bufs[buf].len;
426 buf = (buf+1) & (PIPE_BUFFERS-1);
427 }
Jes Sorensen1b1dcc12006-01-09 15:59:24 -0800428 mutex_unlock(PIPE_MUTEX(*inode));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700429 return put_user(count, (int __user *)arg);
430 default:
431 return -EINVAL;
432 }
433}
434
435/* No kernel lock held - fine */
436static unsigned int
437pipe_poll(struct file *filp, poll_table *wait)
438{
439 unsigned int mask;
440 struct inode *inode = filp->f_dentry->d_inode;
441 struct pipe_inode_info *info = inode->i_pipe;
442 int nrbufs;
443
444 poll_wait(filp, PIPE_WAIT(*inode), wait);
445
446 /* Reading only -- no need for acquiring the semaphore. */
447 nrbufs = info->nrbufs;
448 mask = 0;
449 if (filp->f_mode & FMODE_READ) {
450 mask = (nrbufs > 0) ? POLLIN | POLLRDNORM : 0;
451 if (!PIPE_WRITERS(*inode) && filp->f_version != PIPE_WCOUNTER(*inode))
452 mask |= POLLHUP;
453 }
454
455 if (filp->f_mode & FMODE_WRITE) {
456 mask |= (nrbufs < PIPE_BUFFERS) ? POLLOUT | POLLWRNORM : 0;
Pekka Enberg5e5d7a22005-09-06 15:17:48 -0700457 /*
458 * Most Unices do not set POLLERR for FIFOs but on Linux they
459 * behave exactly like pipes for poll().
460 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700461 if (!PIPE_READERS(*inode))
462 mask |= POLLERR;
463 }
464
465 return mask;
466}
467
Linus Torvalds1da177e2005-04-16 15:20:36 -0700468static int
469pipe_release(struct inode *inode, int decr, int decw)
470{
Jes Sorensen1b1dcc12006-01-09 15:59:24 -0800471 mutex_lock(PIPE_MUTEX(*inode));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700472 PIPE_READERS(*inode) -= decr;
473 PIPE_WRITERS(*inode) -= decw;
474 if (!PIPE_READERS(*inode) && !PIPE_WRITERS(*inode)) {
475 free_pipe_info(inode);
476 } else {
477 wake_up_interruptible(PIPE_WAIT(*inode));
478 kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, POLL_IN);
479 kill_fasync(PIPE_FASYNC_WRITERS(*inode), SIGIO, POLL_OUT);
480 }
Jes Sorensen1b1dcc12006-01-09 15:59:24 -0800481 mutex_unlock(PIPE_MUTEX(*inode));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700482
483 return 0;
484}
485
486static int
487pipe_read_fasync(int fd, struct file *filp, int on)
488{
489 struct inode *inode = filp->f_dentry->d_inode;
490 int retval;
491
Jes Sorensen1b1dcc12006-01-09 15:59:24 -0800492 mutex_lock(PIPE_MUTEX(*inode));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700493 retval = fasync_helper(fd, filp, on, PIPE_FASYNC_READERS(*inode));
Jes Sorensen1b1dcc12006-01-09 15:59:24 -0800494 mutex_unlock(PIPE_MUTEX(*inode));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700495
496 if (retval < 0)
497 return retval;
498
499 return 0;
500}
501
502
503static int
504pipe_write_fasync(int fd, struct file *filp, int on)
505{
506 struct inode *inode = filp->f_dentry->d_inode;
507 int retval;
508
Jes Sorensen1b1dcc12006-01-09 15:59:24 -0800509 mutex_lock(PIPE_MUTEX(*inode));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700510 retval = fasync_helper(fd, filp, on, PIPE_FASYNC_WRITERS(*inode));
Jes Sorensen1b1dcc12006-01-09 15:59:24 -0800511 mutex_unlock(PIPE_MUTEX(*inode));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700512
513 if (retval < 0)
514 return retval;
515
516 return 0;
517}
518
519
520static int
521pipe_rdwr_fasync(int fd, struct file *filp, int on)
522{
523 struct inode *inode = filp->f_dentry->d_inode;
524 int retval;
525
Jes Sorensen1b1dcc12006-01-09 15:59:24 -0800526 mutex_lock(PIPE_MUTEX(*inode));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700527
528 retval = fasync_helper(fd, filp, on, PIPE_FASYNC_READERS(*inode));
529
530 if (retval >= 0)
531 retval = fasync_helper(fd, filp, on, PIPE_FASYNC_WRITERS(*inode));
532
Jes Sorensen1b1dcc12006-01-09 15:59:24 -0800533 mutex_unlock(PIPE_MUTEX(*inode));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700534
535 if (retval < 0)
536 return retval;
537
538 return 0;
539}
540
541
542static int
543pipe_read_release(struct inode *inode, struct file *filp)
544{
545 pipe_read_fasync(-1, filp, 0);
546 return pipe_release(inode, 1, 0);
547}
548
549static int
550pipe_write_release(struct inode *inode, struct file *filp)
551{
552 pipe_write_fasync(-1, filp, 0);
553 return pipe_release(inode, 0, 1);
554}
555
556static int
557pipe_rdwr_release(struct inode *inode, struct file *filp)
558{
559 int decr, decw;
560
561 pipe_rdwr_fasync(-1, filp, 0);
562 decr = (filp->f_mode & FMODE_READ) != 0;
563 decw = (filp->f_mode & FMODE_WRITE) != 0;
564 return pipe_release(inode, decr, decw);
565}
566
567static int
568pipe_read_open(struct inode *inode, struct file *filp)
569{
570 /* We could have perhaps used atomic_t, but this and friends
571 below are the only places. So it doesn't seem worthwhile. */
Jes Sorensen1b1dcc12006-01-09 15:59:24 -0800572 mutex_lock(PIPE_MUTEX(*inode));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700573 PIPE_READERS(*inode)++;
Jes Sorensen1b1dcc12006-01-09 15:59:24 -0800574 mutex_unlock(PIPE_MUTEX(*inode));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700575
576 return 0;
577}
578
579static int
580pipe_write_open(struct inode *inode, struct file *filp)
581{
Jes Sorensen1b1dcc12006-01-09 15:59:24 -0800582 mutex_lock(PIPE_MUTEX(*inode));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700583 PIPE_WRITERS(*inode)++;
Jes Sorensen1b1dcc12006-01-09 15:59:24 -0800584 mutex_unlock(PIPE_MUTEX(*inode));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700585
586 return 0;
587}
588
589static int
590pipe_rdwr_open(struct inode *inode, struct file *filp)
591{
Jes Sorensen1b1dcc12006-01-09 15:59:24 -0800592 mutex_lock(PIPE_MUTEX(*inode));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700593 if (filp->f_mode & FMODE_READ)
594 PIPE_READERS(*inode)++;
595 if (filp->f_mode & FMODE_WRITE)
596 PIPE_WRITERS(*inode)++;
Jes Sorensen1b1dcc12006-01-09 15:59:24 -0800597 mutex_unlock(PIPE_MUTEX(*inode));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700598
599 return 0;
600}
601
602/*
603 * The file_operations structs are not static because they
604 * are also used in linux/fs/fifo.c to do operations on FIFOs.
605 */
Arjan van de Ven4b6f5d22006-03-28 01:56:42 -0800606const struct file_operations read_fifo_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700607 .llseek = no_llseek,
608 .read = pipe_read,
609 .readv = pipe_readv,
610 .write = bad_pipe_w,
Pekka Enberg5e5d7a22005-09-06 15:17:48 -0700611 .poll = pipe_poll,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700612 .ioctl = pipe_ioctl,
613 .open = pipe_read_open,
614 .release = pipe_read_release,
615 .fasync = pipe_read_fasync,
616};
617
Arjan van de Ven4b6f5d22006-03-28 01:56:42 -0800618const struct file_operations write_fifo_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700619 .llseek = no_llseek,
620 .read = bad_pipe_r,
621 .write = pipe_write,
622 .writev = pipe_writev,
Pekka Enberg5e5d7a22005-09-06 15:17:48 -0700623 .poll = pipe_poll,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700624 .ioctl = pipe_ioctl,
625 .open = pipe_write_open,
626 .release = pipe_write_release,
627 .fasync = pipe_write_fasync,
628};
629
Arjan van de Ven4b6f5d22006-03-28 01:56:42 -0800630const struct file_operations rdwr_fifo_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700631 .llseek = no_llseek,
632 .read = pipe_read,
633 .readv = pipe_readv,
634 .write = pipe_write,
635 .writev = pipe_writev,
Pekka Enberg5e5d7a22005-09-06 15:17:48 -0700636 .poll = pipe_poll,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700637 .ioctl = pipe_ioctl,
638 .open = pipe_rdwr_open,
639 .release = pipe_rdwr_release,
640 .fasync = pipe_rdwr_fasync,
641};
642
Linus Torvaldsa19cbd42006-03-08 14:03:09 -0800643static struct file_operations read_pipe_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700644 .llseek = no_llseek,
645 .read = pipe_read,
646 .readv = pipe_readv,
647 .write = bad_pipe_w,
648 .poll = pipe_poll,
649 .ioctl = pipe_ioctl,
650 .open = pipe_read_open,
651 .release = pipe_read_release,
652 .fasync = pipe_read_fasync,
653};
654
Linus Torvaldsa19cbd42006-03-08 14:03:09 -0800655static struct file_operations write_pipe_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700656 .llseek = no_llseek,
657 .read = bad_pipe_r,
658 .write = pipe_write,
659 .writev = pipe_writev,
660 .poll = pipe_poll,
661 .ioctl = pipe_ioctl,
662 .open = pipe_write_open,
663 .release = pipe_write_release,
664 .fasync = pipe_write_fasync,
665};
666
Linus Torvaldsa19cbd42006-03-08 14:03:09 -0800667static struct file_operations rdwr_pipe_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700668 .llseek = no_llseek,
669 .read = pipe_read,
670 .readv = pipe_readv,
671 .write = pipe_write,
672 .writev = pipe_writev,
673 .poll = pipe_poll,
674 .ioctl = pipe_ioctl,
675 .open = pipe_rdwr_open,
676 .release = pipe_rdwr_release,
677 .fasync = pipe_rdwr_fasync,
678};
679
Ingo Molnar3a326a22006-04-10 15:18:35 +0200680struct pipe_inode_info * alloc_pipe_info(struct inode *inode)
681{
682 struct pipe_inode_info *info;
683
684 info = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL);
685 if (info) {
686 init_waitqueue_head(&info->wait);
687 info->r_counter = info->w_counter = 1;
688 info->inode = inode;
689 }
690
691 return info;
692}
693
Jens Axboeb92ce552006-04-11 13:52:07 +0200694void __free_pipe_info(struct pipe_inode_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700695{
696 int i;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700697
Linus Torvalds1da177e2005-04-16 15:20:36 -0700698 for (i = 0; i < PIPE_BUFFERS; i++) {
699 struct pipe_buffer *buf = info->bufs + i;
700 if (buf->ops)
701 buf->ops->release(info, buf);
702 }
703 if (info->tmp_page)
704 __free_page(info->tmp_page);
705 kfree(info);
706}
707
Jens Axboeb92ce552006-04-11 13:52:07 +0200708void free_pipe_info(struct inode *inode)
709{
710 __free_pipe_info(inode->i_pipe);
711 inode->i_pipe = NULL;
712}
713
Eric Dumazetfa3536c2006-03-26 01:37:24 -0800714static struct vfsmount *pipe_mnt __read_mostly;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700715static int pipefs_delete_dentry(struct dentry *dentry)
716{
717 return 1;
718}
719static struct dentry_operations pipefs_dentry_operations = {
720 .d_delete = pipefs_delete_dentry,
721};
722
723static struct inode * get_pipe_inode(void)
724{
725 struct inode *inode = new_inode(pipe_mnt->mnt_sb);
726
727 if (!inode)
728 goto fail_inode;
729
Ingo Molnar3a326a22006-04-10 15:18:35 +0200730 inode->i_pipe = alloc_pipe_info(inode);
731 if (!inode->i_pipe)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700732 goto fail_iput;
Ingo Molnar3a326a22006-04-10 15:18:35 +0200733
Linus Torvalds1da177e2005-04-16 15:20:36 -0700734 PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 1;
735 inode->i_fop = &rdwr_pipe_fops;
736
737 /*
738 * Mark the inode dirty from the very beginning,
739 * that way it will never be moved to the dirty
740 * list because "mark_inode_dirty()" will think
741 * that it already _is_ on the dirty list.
742 */
743 inode->i_state = I_DIRTY;
744 inode->i_mode = S_IFIFO | S_IRUSR | S_IWUSR;
745 inode->i_uid = current->fsuid;
746 inode->i_gid = current->fsgid;
747 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
748 inode->i_blksize = PAGE_SIZE;
749 return inode;
750
751fail_iput:
752 iput(inode);
753fail_inode:
754 return NULL;
755}
756
757int do_pipe(int *fd)
758{
759 struct qstr this;
760 char name[32];
761 struct dentry *dentry;
762 struct inode * inode;
763 struct file *f1, *f2;
764 int error;
765 int i,j;
766
767 error = -ENFILE;
768 f1 = get_empty_filp();
769 if (!f1)
770 goto no_files;
771
772 f2 = get_empty_filp();
773 if (!f2)
774 goto close_f1;
775
776 inode = get_pipe_inode();
777 if (!inode)
778 goto close_f12;
779
780 error = get_unused_fd();
781 if (error < 0)
782 goto close_f12_inode;
783 i = error;
784
785 error = get_unused_fd();
786 if (error < 0)
787 goto close_f12_inode_i;
788 j = error;
789
790 error = -ENOMEM;
791 sprintf(name, "[%lu]", inode->i_ino);
792 this.name = name;
793 this.len = strlen(name);
794 this.hash = inode->i_ino; /* will go */
795 dentry = d_alloc(pipe_mnt->mnt_sb->s_root, &this);
796 if (!dentry)
797 goto close_f12_inode_i_j;
798 dentry->d_op = &pipefs_dentry_operations;
799 d_add(dentry, inode);
800 f1->f_vfsmnt = f2->f_vfsmnt = mntget(mntget(pipe_mnt));
801 f1->f_dentry = f2->f_dentry = dget(dentry);
802 f1->f_mapping = f2->f_mapping = inode->i_mapping;
803
804 /* read file */
805 f1->f_pos = f2->f_pos = 0;
806 f1->f_flags = O_RDONLY;
807 f1->f_op = &read_pipe_fops;
808 f1->f_mode = FMODE_READ;
809 f1->f_version = 0;
810
811 /* write file */
812 f2->f_flags = O_WRONLY;
813 f2->f_op = &write_pipe_fops;
814 f2->f_mode = FMODE_WRITE;
815 f2->f_version = 0;
816
817 fd_install(i, f1);
818 fd_install(j, f2);
819 fd[0] = i;
820 fd[1] = j;
821 return 0;
822
823close_f12_inode_i_j:
824 put_unused_fd(j);
825close_f12_inode_i:
826 put_unused_fd(i);
827close_f12_inode:
828 free_pipe_info(inode);
829 iput(inode);
830close_f12:
831 put_filp(f2);
832close_f1:
833 put_filp(f1);
834no_files:
835 return error;
836}
837
838/*
839 * pipefs should _never_ be mounted by userland - too much of security hassle,
840 * no real gain from having the whole whorehouse mounted. So we don't need
841 * any operations on the root directory. However, we need a non-trivial
842 * d_name - pipe: will go nicely and kill the special-casing in procfs.
843 */
844
845static struct super_block *pipefs_get_sb(struct file_system_type *fs_type,
846 int flags, const char *dev_name, void *data)
847{
848 return get_sb_pseudo(fs_type, "pipe:", NULL, PIPEFS_MAGIC);
849}
850
851static struct file_system_type pipe_fs_type = {
852 .name = "pipefs",
853 .get_sb = pipefs_get_sb,
854 .kill_sb = kill_anon_super,
855};
856
857static int __init init_pipe_fs(void)
858{
859 int err = register_filesystem(&pipe_fs_type);
860 if (!err) {
861 pipe_mnt = kern_mount(&pipe_fs_type);
862 if (IS_ERR(pipe_mnt)) {
863 err = PTR_ERR(pipe_mnt);
864 unregister_filesystem(&pipe_fs_type);
865 }
866 }
867 return err;
868}
869
870static void __exit exit_pipe_fs(void)
871{
872 unregister_filesystem(&pipe_fs_type);
873 mntput(pipe_mnt);
874}
875
876fs_initcall(init_pipe_fs);
877module_exit(exit_pipe_fs);