blob: acc73fe686983d4f71333a636cadd1d3f17e743e [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * NET4: Implementation of BSD Unix domain sockets.
3 *
4 * Authors: Alan Cox, <alan.cox@linux.org>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 * Version: $Id: af_unix.c,v 1.133 2002/02/08 03:57:19 davem Exp $
12 *
13 * Fixes:
14 * Linus Torvalds : Assorted bug cures.
15 * Niibe Yutaka : async I/O support.
16 * Carsten Paeth : PF_UNIX check, address fixes.
17 * Alan Cox : Limit size of allocated blocks.
18 * Alan Cox : Fixed the stupid socketpair bug.
19 * Alan Cox : BSD compatibility fine tuning.
20 * Alan Cox : Fixed a bug in connect when interrupted.
21 * Alan Cox : Sorted out a proper draft version of
22 * file descriptor passing hacked up from
23 * Mike Shaver's work.
24 * Marty Leisner : Fixes to fd passing
25 * Nick Nevin : recvmsg bugfix.
26 * Alan Cox : Started proper garbage collector
27 * Heiko EiBfeldt : Missing verify_area check
28 * Alan Cox : Started POSIXisms
29 * Andreas Schwab : Replace inode by dentry for proper
30 * reference counting
31 * Kirk Petersen : Made this a module
32 * Christoph Rohland : Elegant non-blocking accept/connect algorithm.
33 * Lots of bug fixes.
34 * Alexey Kuznetosv : Repaired (I hope) bugs introduces
35 * by above two patches.
36 * Andrea Arcangeli : If possible we block in connect(2)
37 * if the max backlog of the listen socket
38 * is been reached. This won't break
39 * old apps and it will avoid huge amount
40 * of socks hashed (this for unix_gc()
41 * performances reasons).
42 * Security fix that limits the max
43 * number of socks to 2*max_files and
44 * the number of skb queueable in the
45 * dgram receiver.
46 * Artur Skawina : Hash function optimizations
47 * Alexey Kuznetsov : Full scale SMP. Lot of bugs are introduced 8)
48 * Malcolm Beattie : Set peercred for socketpair
49 * Michal Ostrowski : Module initialization cleanup.
50 * Arnaldo C. Melo : Remove MOD_{INC,DEC}_USE_COUNT,
51 * the core infrastructure is doing that
52 * for all net proto families now (2.5.69+)
53 *
54 *
55 * Known differences from reference BSD that was tested:
56 *
57 * [TO FIX]
58 * ECONNREFUSED is not returned from one end of a connected() socket to the
59 * other the moment one end closes.
60 * fstat() doesn't return st_dev=0, and give the blksize as high water mark
61 * and a fake inode identifier (nor the BSD first socket fstat twice bug).
62 * [NOT TO FIX]
63 * accept() returns a path name even if the connecting socket has closed
64 * in the meantime (BSD loses the path and gives up).
65 * accept() returns 0 length path for an unbound connector. BSD returns 16
66 * and a null first byte in the path (but not for gethost/peername - BSD bug ??)
67 * socketpair(...SOCK_RAW..) doesn't panic the kernel.
68 * BSD af_unix apparently has connect forgetting to block properly.
69 * (need to check this with the POSIX spec in detail)
70 *
71 * Differences from 2.0.0-11-... (ANK)
72 * Bug fixes and improvements.
73 * - client shutdown killed server socket.
74 * - removed all useless cli/sti pairs.
75 *
76 * Semantic changes/extensions.
77 * - generic control message passing.
78 * - SCM_CREDENTIALS control message.
79 * - "Abstract" (not FS based) socket bindings.
80 * Abstract names are sequences of bytes (not zero terminated)
81 * started by 0, so that this name space does not intersect
82 * with BSD names.
83 */
84
85#include <linux/module.h>
86#include <linux/config.h>
87#include <linux/kernel.h>
88#include <linux/major.h>
89#include <linux/signal.h>
90#include <linux/sched.h>
91#include <linux/errno.h>
92#include <linux/string.h>
93#include <linux/stat.h>
94#include <linux/dcache.h>
95#include <linux/namei.h>
96#include <linux/socket.h>
97#include <linux/un.h>
98#include <linux/fcntl.h>
99#include <linux/termios.h>
100#include <linux/sockios.h>
101#include <linux/net.h>
102#include <linux/in.h>
103#include <linux/fs.h>
104#include <linux/slab.h>
105#include <asm/uaccess.h>
106#include <linux/skbuff.h>
107#include <linux/netdevice.h>
108#include <net/sock.h>
109#include <linux/tcp.h>
110#include <net/af_unix.h>
111#include <linux/proc_fs.h>
112#include <linux/seq_file.h>
113#include <net/scm.h>
114#include <linux/init.h>
115#include <linux/poll.h>
116#include <linux/smp_lock.h>
117#include <linux/rtnetlink.h>
118#include <linux/mount.h>
119#include <net/checksum.h>
120#include <linux/security.h>
121
122int sysctl_unix_max_dgram_qlen = 10;
123
124struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
125DEFINE_RWLOCK(unix_table_lock);
126static atomic_t unix_nr_socks = ATOMIC_INIT(0);
127
128#define unix_sockets_unbound (&unix_socket_table[UNIX_HASH_SIZE])
129
130#define UNIX_ABSTRACT(sk) (unix_sk(sk)->addr->hash != UNIX_HASH_SIZE)
131
132/*
133 * SMP locking strategy:
134 * hash table is protected with rwlock unix_table_lock
135 * each socket state is protected by separate rwlock.
136 */
137
138static inline unsigned unix_hash_fold(unsigned hash)
139{
140 hash ^= hash>>16;
141 hash ^= hash>>8;
142 return hash&(UNIX_HASH_SIZE-1);
143}
144
145#define unix_peer(sk) (unix_sk(sk)->peer)
146
147static inline int unix_our_peer(struct sock *sk, struct sock *osk)
148{
149 return unix_peer(osk) == sk;
150}
151
152static inline int unix_may_send(struct sock *sk, struct sock *osk)
153{
154 return (unix_peer(osk) == NULL || unix_our_peer(sk, osk));
155}
156
157static struct sock *unix_peer_get(struct sock *s)
158{
159 struct sock *peer;
160
161 unix_state_rlock(s);
162 peer = unix_peer(s);
163 if (peer)
164 sock_hold(peer);
165 unix_state_runlock(s);
166 return peer;
167}
168
169static inline void unix_release_addr(struct unix_address *addr)
170{
171 if (atomic_dec_and_test(&addr->refcnt))
172 kfree(addr);
173}
174
175/*
176 * Check unix socket name:
177 * - should be not zero length.
178 * - if started by not zero, should be NULL terminated (FS object)
179 * - if started by zero, it is abstract name.
180 */
181
182static int unix_mkname(struct sockaddr_un * sunaddr, int len, unsigned *hashp)
183{
184 if (len <= sizeof(short) || len > sizeof(*sunaddr))
185 return -EINVAL;
186 if (!sunaddr || sunaddr->sun_family != AF_UNIX)
187 return -EINVAL;
188 if (sunaddr->sun_path[0]) {
189 /*
190 * This may look like an off by one error but it is a bit more
191 * subtle. 108 is the longest valid AF_UNIX path for a binding.
192 * sun_path[108] doesnt as such exist. However in kernel space
193 * we are guaranteed that it is a valid memory location in our
194 * kernel address buffer.
195 */
196 ((char *)sunaddr)[len]=0;
197 len = strlen(sunaddr->sun_path)+1+sizeof(short);
198 return len;
199 }
200
201 *hashp = unix_hash_fold(csum_partial((char*)sunaddr, len, 0));
202 return len;
203}
204
205static void __unix_remove_socket(struct sock *sk)
206{
207 sk_del_node_init(sk);
208}
209
210static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
211{
212 BUG_TRAP(sk_unhashed(sk));
213 sk_add_node(sk, list);
214}
215
216static inline void unix_remove_socket(struct sock *sk)
217{
218 write_lock(&unix_table_lock);
219 __unix_remove_socket(sk);
220 write_unlock(&unix_table_lock);
221}
222
223static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
224{
225 write_lock(&unix_table_lock);
226 __unix_insert_socket(list, sk);
227 write_unlock(&unix_table_lock);
228}
229
230static struct sock *__unix_find_socket_byname(struct sockaddr_un *sunname,
231 int len, int type, unsigned hash)
232{
233 struct sock *s;
234 struct hlist_node *node;
235
236 sk_for_each(s, node, &unix_socket_table[hash ^ type]) {
237 struct unix_sock *u = unix_sk(s);
238
239 if (u->addr->len == len &&
240 !memcmp(u->addr->name, sunname, len))
241 goto found;
242 }
243 s = NULL;
244found:
245 return s;
246}
247
248static inline struct sock *unix_find_socket_byname(struct sockaddr_un *sunname,
249 int len, int type,
250 unsigned hash)
251{
252 struct sock *s;
253
254 read_lock(&unix_table_lock);
255 s = __unix_find_socket_byname(sunname, len, type, hash);
256 if (s)
257 sock_hold(s);
258 read_unlock(&unix_table_lock);
259 return s;
260}
261
262static struct sock *unix_find_socket_byinode(struct inode *i)
263{
264 struct sock *s;
265 struct hlist_node *node;
266
267 read_lock(&unix_table_lock);
268 sk_for_each(s, node,
269 &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
270 struct dentry *dentry = unix_sk(s)->dentry;
271
272 if(dentry && dentry->d_inode == i)
273 {
274 sock_hold(s);
275 goto found;
276 }
277 }
278 s = NULL;
279found:
280 read_unlock(&unix_table_lock);
281 return s;
282}
283
284static inline int unix_writable(struct sock *sk)
285{
286 return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
287}
288
289static void unix_write_space(struct sock *sk)
290{
291 read_lock(&sk->sk_callback_lock);
292 if (unix_writable(sk)) {
293 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
294 wake_up_interruptible(sk->sk_sleep);
295 sk_wake_async(sk, 2, POLL_OUT);
296 }
297 read_unlock(&sk->sk_callback_lock);
298}
299
300/* When dgram socket disconnects (or changes its peer), we clear its receive
301 * queue of packets arrived from previous peer. First, it allows to do
302 * flow control based only on wmem_alloc; second, sk connected to peer
303 * may receive messages only from that peer. */
304static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
305{
306 if (skb_queue_len(&sk->sk_receive_queue)) {
307 skb_queue_purge(&sk->sk_receive_queue);
308 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
309
310 /* If one link of bidirectional dgram pipe is disconnected,
311 * we signal error. Messages are lost. Do not make this,
312 * when peer was not connected to us.
313 */
314 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
315 other->sk_err = ECONNRESET;
316 other->sk_error_report(other);
317 }
318 }
319}
320
321static void unix_sock_destructor(struct sock *sk)
322{
323 struct unix_sock *u = unix_sk(sk);
324
325 skb_queue_purge(&sk->sk_receive_queue);
326
327 BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc));
328 BUG_TRAP(sk_unhashed(sk));
329 BUG_TRAP(!sk->sk_socket);
330 if (!sock_flag(sk, SOCK_DEAD)) {
331 printk("Attempt to release alive unix socket: %p\n", sk);
332 return;
333 }
334
335 if (u->addr)
336 unix_release_addr(u->addr);
337
338 atomic_dec(&unix_nr_socks);
339#ifdef UNIX_REFCNT_DEBUG
340 printk(KERN_DEBUG "UNIX %p is destroyed, %d are still alive.\n", sk, atomic_read(&unix_nr_socks));
341#endif
342}
343
344static int unix_release_sock (struct sock *sk, int embrion)
345{
346 struct unix_sock *u = unix_sk(sk);
347 struct dentry *dentry;
348 struct vfsmount *mnt;
349 struct sock *skpair;
350 struct sk_buff *skb;
351 int state;
352
353 unix_remove_socket(sk);
354
355 /* Clear state */
356 unix_state_wlock(sk);
357 sock_orphan(sk);
358 sk->sk_shutdown = SHUTDOWN_MASK;
359 dentry = u->dentry;
360 u->dentry = NULL;
361 mnt = u->mnt;
362 u->mnt = NULL;
363 state = sk->sk_state;
364 sk->sk_state = TCP_CLOSE;
365 unix_state_wunlock(sk);
366
367 wake_up_interruptible_all(&u->peer_wait);
368
369 skpair=unix_peer(sk);
370
371 if (skpair!=NULL) {
372 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
373 unix_state_wlock(skpair);
374 /* No more writes */
375 skpair->sk_shutdown = SHUTDOWN_MASK;
376 if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
377 skpair->sk_err = ECONNRESET;
378 unix_state_wunlock(skpair);
379 skpair->sk_state_change(skpair);
380 read_lock(&skpair->sk_callback_lock);
381 sk_wake_async(skpair,1,POLL_HUP);
382 read_unlock(&skpair->sk_callback_lock);
383 }
384 sock_put(skpair); /* It may now die */
385 unix_peer(sk) = NULL;
386 }
387
388 /* Try to flush out this socket. Throw out buffers at least */
389
390 while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
391 if (state==TCP_LISTEN)
392 unix_release_sock(skb->sk, 1);
393 /* passed fds are erased in the kfree_skb hook */
394 kfree_skb(skb);
395 }
396
397 if (dentry) {
398 dput(dentry);
399 mntput(mnt);
400 }
401
402 sock_put(sk);
403
404 /* ---- Socket is dead now and most probably destroyed ---- */
405
406 /*
407 * Fixme: BSD difference: In BSD all sockets connected to use get
408 * ECONNRESET and we die on the spot. In Linux we behave
409 * like files and pipes do and wait for the last
410 * dereference.
411 *
412 * Can't we simply set sock->err?
413 *
414 * What the above comment does talk about? --ANK(980817)
415 */
416
417 if (atomic_read(&unix_tot_inflight))
418 unix_gc(); /* Garbage collect fds */
419
420 return 0;
421}
422
423static int unix_listen(struct socket *sock, int backlog)
424{
425 int err;
426 struct sock *sk = sock->sk;
427 struct unix_sock *u = unix_sk(sk);
428
429 err = -EOPNOTSUPP;
430 if (sock->type!=SOCK_STREAM && sock->type!=SOCK_SEQPACKET)
431 goto out; /* Only stream/seqpacket sockets accept */
432 err = -EINVAL;
433 if (!u->addr)
434 goto out; /* No listens on an unbound socket */
435 unix_state_wlock(sk);
436 if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
437 goto out_unlock;
438 if (backlog > sk->sk_max_ack_backlog)
439 wake_up_interruptible_all(&u->peer_wait);
440 sk->sk_max_ack_backlog = backlog;
441 sk->sk_state = TCP_LISTEN;
442 /* set credentials so connect can copy them */
443 sk->sk_peercred.pid = current->tgid;
444 sk->sk_peercred.uid = current->euid;
445 sk->sk_peercred.gid = current->egid;
446 err = 0;
447
448out_unlock:
449 unix_state_wunlock(sk);
450out:
451 return err;
452}
453
454static int unix_release(struct socket *);
455static int unix_bind(struct socket *, struct sockaddr *, int);
456static int unix_stream_connect(struct socket *, struct sockaddr *,
457 int addr_len, int flags);
458static int unix_socketpair(struct socket *, struct socket *);
459static int unix_accept(struct socket *, struct socket *, int);
460static int unix_getname(struct socket *, struct sockaddr *, int *, int);
461static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
462static int unix_ioctl(struct socket *, unsigned int, unsigned long);
463static int unix_shutdown(struct socket *, int);
464static int unix_stream_sendmsg(struct kiocb *, struct socket *,
465 struct msghdr *, size_t);
466static int unix_stream_recvmsg(struct kiocb *, struct socket *,
467 struct msghdr *, size_t, int);
468static int unix_dgram_sendmsg(struct kiocb *, struct socket *,
469 struct msghdr *, size_t);
470static int unix_dgram_recvmsg(struct kiocb *, struct socket *,
471 struct msghdr *, size_t, int);
472static int unix_dgram_connect(struct socket *, struct sockaddr *,
473 int, int);
474static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *,
475 struct msghdr *, size_t);
476
477static struct proto_ops unix_stream_ops = {
478 .family = PF_UNIX,
479 .owner = THIS_MODULE,
480 .release = unix_release,
481 .bind = unix_bind,
482 .connect = unix_stream_connect,
483 .socketpair = unix_socketpair,
484 .accept = unix_accept,
485 .getname = unix_getname,
486 .poll = unix_poll,
487 .ioctl = unix_ioctl,
488 .listen = unix_listen,
489 .shutdown = unix_shutdown,
490 .setsockopt = sock_no_setsockopt,
491 .getsockopt = sock_no_getsockopt,
492 .sendmsg = unix_stream_sendmsg,
493 .recvmsg = unix_stream_recvmsg,
494 .mmap = sock_no_mmap,
495 .sendpage = sock_no_sendpage,
496};
497
498static struct proto_ops unix_dgram_ops = {
499 .family = PF_UNIX,
500 .owner = THIS_MODULE,
501 .release = unix_release,
502 .bind = unix_bind,
503 .connect = unix_dgram_connect,
504 .socketpair = unix_socketpair,
505 .accept = sock_no_accept,
506 .getname = unix_getname,
507 .poll = datagram_poll,
508 .ioctl = unix_ioctl,
509 .listen = sock_no_listen,
510 .shutdown = unix_shutdown,
511 .setsockopt = sock_no_setsockopt,
512 .getsockopt = sock_no_getsockopt,
513 .sendmsg = unix_dgram_sendmsg,
514 .recvmsg = unix_dgram_recvmsg,
515 .mmap = sock_no_mmap,
516 .sendpage = sock_no_sendpage,
517};
518
519static struct proto_ops unix_seqpacket_ops = {
520 .family = PF_UNIX,
521 .owner = THIS_MODULE,
522 .release = unix_release,
523 .bind = unix_bind,
524 .connect = unix_stream_connect,
525 .socketpair = unix_socketpair,
526 .accept = unix_accept,
527 .getname = unix_getname,
528 .poll = datagram_poll,
529 .ioctl = unix_ioctl,
530 .listen = unix_listen,
531 .shutdown = unix_shutdown,
532 .setsockopt = sock_no_setsockopt,
533 .getsockopt = sock_no_getsockopt,
534 .sendmsg = unix_seqpacket_sendmsg,
535 .recvmsg = unix_dgram_recvmsg,
536 .mmap = sock_no_mmap,
537 .sendpage = sock_no_sendpage,
538};
539
540static struct proto unix_proto = {
541 .name = "UNIX",
542 .owner = THIS_MODULE,
543 .obj_size = sizeof(struct unix_sock),
544};
545
546static struct sock * unix_create1(struct socket *sock)
547{
548 struct sock *sk = NULL;
549 struct unix_sock *u;
550
551 if (atomic_read(&unix_nr_socks) >= 2*files_stat.max_files)
552 goto out;
553
554 sk = sk_alloc(PF_UNIX, GFP_KERNEL, &unix_proto, 1);
555 if (!sk)
556 goto out;
557
558 atomic_inc(&unix_nr_socks);
559
560 sock_init_data(sock,sk);
561
562 sk->sk_write_space = unix_write_space;
563 sk->sk_max_ack_backlog = sysctl_unix_max_dgram_qlen;
564 sk->sk_destruct = unix_sock_destructor;
565 u = unix_sk(sk);
566 u->dentry = NULL;
567 u->mnt = NULL;
568 rwlock_init(&u->lock);
569 atomic_set(&u->inflight, sock ? 0 : -1);
570 init_MUTEX(&u->readsem); /* single task reading lock */
571 init_waitqueue_head(&u->peer_wait);
572 unix_insert_socket(unix_sockets_unbound, sk);
573out:
574 return sk;
575}
576
577static int unix_create(struct socket *sock, int protocol)
578{
579 if (protocol && protocol != PF_UNIX)
580 return -EPROTONOSUPPORT;
581
582 sock->state = SS_UNCONNECTED;
583
584 switch (sock->type) {
585 case SOCK_STREAM:
586 sock->ops = &unix_stream_ops;
587 break;
588 /*
589 * Believe it or not BSD has AF_UNIX, SOCK_RAW though
590 * nothing uses it.
591 */
592 case SOCK_RAW:
593 sock->type=SOCK_DGRAM;
594 case SOCK_DGRAM:
595 sock->ops = &unix_dgram_ops;
596 break;
597 case SOCK_SEQPACKET:
598 sock->ops = &unix_seqpacket_ops;
599 break;
600 default:
601 return -ESOCKTNOSUPPORT;
602 }
603
604 return unix_create1(sock) ? 0 : -ENOMEM;
605}
606
607static int unix_release(struct socket *sock)
608{
609 struct sock *sk = sock->sk;
610
611 if (!sk)
612 return 0;
613
614 sock->sk = NULL;
615
616 return unix_release_sock (sk, 0);
617}
618
619static int unix_autobind(struct socket *sock)
620{
621 struct sock *sk = sock->sk;
622 struct unix_sock *u = unix_sk(sk);
623 static u32 ordernum = 1;
624 struct unix_address * addr;
625 int err;
626
627 down(&u->readsem);
628
629 err = 0;
630 if (u->addr)
631 goto out;
632
633 err = -ENOMEM;
634 addr = kmalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
635 if (!addr)
636 goto out;
637
638 memset(addr, 0, sizeof(*addr) + sizeof(short) + 16);
639 addr->name->sun_family = AF_UNIX;
640 atomic_set(&addr->refcnt, 1);
641
642retry:
643 addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
644 addr->hash = unix_hash_fold(csum_partial((void*)addr->name, addr->len, 0));
645
646 write_lock(&unix_table_lock);
647 ordernum = (ordernum+1)&0xFFFFF;
648
649 if (__unix_find_socket_byname(addr->name, addr->len, sock->type,
650 addr->hash)) {
651 write_unlock(&unix_table_lock);
652 /* Sanity yield. It is unusual case, but yet... */
653 if (!(ordernum&0xFF))
654 yield();
655 goto retry;
656 }
657 addr->hash ^= sk->sk_type;
658
659 __unix_remove_socket(sk);
660 u->addr = addr;
661 __unix_insert_socket(&unix_socket_table[addr->hash], sk);
662 write_unlock(&unix_table_lock);
663 err = 0;
664
665out: up(&u->readsem);
666 return err;
667}
668
669static struct sock *unix_find_other(struct sockaddr_un *sunname, int len,
670 int type, unsigned hash, int *error)
671{
672 struct sock *u;
673 struct nameidata nd;
674 int err = 0;
675
676 if (sunname->sun_path[0]) {
677 err = path_lookup(sunname->sun_path, LOOKUP_FOLLOW, &nd);
678 if (err)
679 goto fail;
680 err = permission(nd.dentry->d_inode,MAY_WRITE, &nd);
681 if (err)
682 goto put_fail;
683
684 err = -ECONNREFUSED;
685 if (!S_ISSOCK(nd.dentry->d_inode->i_mode))
686 goto put_fail;
687 u=unix_find_socket_byinode(nd.dentry->d_inode);
688 if (!u)
689 goto put_fail;
690
691 if (u->sk_type == type)
692 touch_atime(nd.mnt, nd.dentry);
693
694 path_release(&nd);
695
696 err=-EPROTOTYPE;
697 if (u->sk_type != type) {
698 sock_put(u);
699 goto fail;
700 }
701 } else {
702 err = -ECONNREFUSED;
703 u=unix_find_socket_byname(sunname, len, type, hash);
704 if (u) {
705 struct dentry *dentry;
706 dentry = unix_sk(u)->dentry;
707 if (dentry)
708 touch_atime(unix_sk(u)->mnt, dentry);
709 } else
710 goto fail;
711 }
712 return u;
713
714put_fail:
715 path_release(&nd);
716fail:
717 *error=err;
718 return NULL;
719}
720
721
722static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
723{
724 struct sock *sk = sock->sk;
725 struct unix_sock *u = unix_sk(sk);
726 struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
727 struct dentry * dentry = NULL;
728 struct nameidata nd;
729 int err;
730 unsigned hash;
731 struct unix_address *addr;
732 struct hlist_head *list;
733
734 err = -EINVAL;
735 if (sunaddr->sun_family != AF_UNIX)
736 goto out;
737
738 if (addr_len==sizeof(short)) {
739 err = unix_autobind(sock);
740 goto out;
741 }
742
743 err = unix_mkname(sunaddr, addr_len, &hash);
744 if (err < 0)
745 goto out;
746 addr_len = err;
747
748 down(&u->readsem);
749
750 err = -EINVAL;
751 if (u->addr)
752 goto out_up;
753
754 err = -ENOMEM;
755 addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
756 if (!addr)
757 goto out_up;
758
759 memcpy(addr->name, sunaddr, addr_len);
760 addr->len = addr_len;
761 addr->hash = hash ^ sk->sk_type;
762 atomic_set(&addr->refcnt, 1);
763
764 if (sunaddr->sun_path[0]) {
765 unsigned int mode;
766 err = 0;
767 /*
768 * Get the parent directory, calculate the hash for last
769 * component.
770 */
771 err = path_lookup(sunaddr->sun_path, LOOKUP_PARENT, &nd);
772 if (err)
773 goto out_mknod_parent;
774 /*
775 * Yucky last component or no last component at all?
776 * (foo/., foo/.., /////)
777 */
778 err = -EEXIST;
779 if (nd.last_type != LAST_NORM)
780 goto out_mknod;
781 /*
782 * Lock the directory.
783 */
784 down(&nd.dentry->d_inode->i_sem);
785 /*
786 * Do the final lookup.
787 */
788 dentry = lookup_hash(&nd.last, nd.dentry);
789 err = PTR_ERR(dentry);
790 if (IS_ERR(dentry))
791 goto out_mknod_unlock;
792 err = -ENOENT;
793 /*
794 * Special case - lookup gave negative, but... we had foo/bar/
795 * From the vfs_mknod() POV we just have a negative dentry -
796 * all is fine. Let's be bastards - you had / on the end, you've
797 * been asking for (non-existent) directory. -ENOENT for you.
798 */
799 if (nd.last.name[nd.last.len] && !dentry->d_inode)
800 goto out_mknod_dput;
801 /*
802 * All right, let's create it.
803 */
804 mode = S_IFSOCK |
805 (SOCK_INODE(sock)->i_mode & ~current->fs->umask);
806 err = vfs_mknod(nd.dentry->d_inode, dentry, mode, 0);
807 if (err)
808 goto out_mknod_dput;
809 up(&nd.dentry->d_inode->i_sem);
810 dput(nd.dentry);
811 nd.dentry = dentry;
812
813 addr->hash = UNIX_HASH_SIZE;
814 }
815
816 write_lock(&unix_table_lock);
817
818 if (!sunaddr->sun_path[0]) {
819 err = -EADDRINUSE;
820 if (__unix_find_socket_byname(sunaddr, addr_len,
821 sk->sk_type, hash)) {
822 unix_release_addr(addr);
823 goto out_unlock;
824 }
825
826 list = &unix_socket_table[addr->hash];
827 } else {
828 list = &unix_socket_table[dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1)];
829 u->dentry = nd.dentry;
830 u->mnt = nd.mnt;
831 }
832
833 err = 0;
834 __unix_remove_socket(sk);
835 u->addr = addr;
836 __unix_insert_socket(list, sk);
837
838out_unlock:
839 write_unlock(&unix_table_lock);
840out_up:
841 up(&u->readsem);
842out:
843 return err;
844
845out_mknod_dput:
846 dput(dentry);
847out_mknod_unlock:
848 up(&nd.dentry->d_inode->i_sem);
849out_mknod:
850 path_release(&nd);
851out_mknod_parent:
852 if (err==-EEXIST)
853 err=-EADDRINUSE;
854 unix_release_addr(addr);
855 goto out_up;
856}
857
858static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
859 int alen, int flags)
860{
861 struct sock *sk = sock->sk;
862 struct sockaddr_un *sunaddr=(struct sockaddr_un*)addr;
863 struct sock *other;
864 unsigned hash;
865 int err;
866
867 if (addr->sa_family != AF_UNSPEC) {
868 err = unix_mkname(sunaddr, alen, &hash);
869 if (err < 0)
870 goto out;
871 alen = err;
872
873 if (test_bit(SOCK_PASSCRED, &sock->flags) &&
874 !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
875 goto out;
876
877 other=unix_find_other(sunaddr, alen, sock->type, hash, &err);
878 if (!other)
879 goto out;
880
881 unix_state_wlock(sk);
882
883 err = -EPERM;
884 if (!unix_may_send(sk, other))
885 goto out_unlock;
886
887 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
888 if (err)
889 goto out_unlock;
890
891 } else {
892 /*
893 * 1003.1g breaking connected state with AF_UNSPEC
894 */
895 other = NULL;
896 unix_state_wlock(sk);
897 }
898
899 /*
900 * If it was connected, reconnect.
901 */
902 if (unix_peer(sk)) {
903 struct sock *old_peer = unix_peer(sk);
904 unix_peer(sk)=other;
905 unix_state_wunlock(sk);
906
907 if (other != old_peer)
908 unix_dgram_disconnected(sk, old_peer);
909 sock_put(old_peer);
910 } else {
911 unix_peer(sk)=other;
912 unix_state_wunlock(sk);
913 }
914 return 0;
915
916out_unlock:
917 unix_state_wunlock(sk);
918 sock_put(other);
919out:
920 return err;
921}
922
923static long unix_wait_for_peer(struct sock *other, long timeo)
924{
925 struct unix_sock *u = unix_sk(other);
926 int sched;
927 DEFINE_WAIT(wait);
928
929 prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
930
931 sched = !sock_flag(other, SOCK_DEAD) &&
932 !(other->sk_shutdown & RCV_SHUTDOWN) &&
933 (skb_queue_len(&other->sk_receive_queue) >
934 other->sk_max_ack_backlog);
935
936 unix_state_runlock(other);
937
938 if (sched)
939 timeo = schedule_timeout(timeo);
940
941 finish_wait(&u->peer_wait, &wait);
942 return timeo;
943}
944
945static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
946 int addr_len, int flags)
947{
948 struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
949 struct sock *sk = sock->sk;
950 struct unix_sock *u = unix_sk(sk), *newu, *otheru;
951 struct sock *newsk = NULL;
952 struct sock *other = NULL;
953 struct sk_buff *skb = NULL;
954 unsigned hash;
955 int st;
956 int err;
957 long timeo;
958
959 err = unix_mkname(sunaddr, addr_len, &hash);
960 if (err < 0)
961 goto out;
962 addr_len = err;
963
964 if (test_bit(SOCK_PASSCRED, &sock->flags)
965 && !u->addr && (err = unix_autobind(sock)) != 0)
966 goto out;
967
968 timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
969
970 /* First of all allocate resources.
971 If we will make it after state is locked,
972 we will have to recheck all again in any case.
973 */
974
975 err = -ENOMEM;
976
977 /* create new sock for complete connection */
978 newsk = unix_create1(NULL);
979 if (newsk == NULL)
980 goto out;
981
982 /* Allocate skb for sending to listening sock */
983 skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
984 if (skb == NULL)
985 goto out;
986
987restart:
988 /* Find listening sock. */
989 other = unix_find_other(sunaddr, addr_len, sk->sk_type, hash, &err);
990 if (!other)
991 goto out;
992
993 /* Latch state of peer */
994 unix_state_rlock(other);
995
996 /* Apparently VFS overslept socket death. Retry. */
997 if (sock_flag(other, SOCK_DEAD)) {
998 unix_state_runlock(other);
999 sock_put(other);
1000 goto restart;
1001 }
1002
1003 err = -ECONNREFUSED;
1004 if (other->sk_state != TCP_LISTEN)
1005 goto out_unlock;
1006
1007 if (skb_queue_len(&other->sk_receive_queue) >
1008 other->sk_max_ack_backlog) {
1009 err = -EAGAIN;
1010 if (!timeo)
1011 goto out_unlock;
1012
1013 timeo = unix_wait_for_peer(other, timeo);
1014
1015 err = sock_intr_errno(timeo);
1016 if (signal_pending(current))
1017 goto out;
1018 sock_put(other);
1019 goto restart;
1020 }
1021
1022 /* Latch our state.
1023
1024 It is tricky place. We need to grab write lock and cannot
1025 drop lock on peer. It is dangerous because deadlock is
1026 possible. Connect to self case and simultaneous
1027 attempt to connect are eliminated by checking socket
1028 state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1029 check this before attempt to grab lock.
1030
1031 Well, and we have to recheck the state after socket locked.
1032 */
1033 st = sk->sk_state;
1034
1035 switch (st) {
1036 case TCP_CLOSE:
1037 /* This is ok... continue with connect */
1038 break;
1039 case TCP_ESTABLISHED:
1040 /* Socket is already connected */
1041 err = -EISCONN;
1042 goto out_unlock;
1043 default:
1044 err = -EINVAL;
1045 goto out_unlock;
1046 }
1047
1048 unix_state_wlock(sk);
1049
1050 if (sk->sk_state != st) {
1051 unix_state_wunlock(sk);
1052 unix_state_runlock(other);
1053 sock_put(other);
1054 goto restart;
1055 }
1056
1057 err = security_unix_stream_connect(sock, other->sk_socket, newsk);
1058 if (err) {
1059 unix_state_wunlock(sk);
1060 goto out_unlock;
1061 }
1062
1063 /* The way is open! Fastly set all the necessary fields... */
1064
1065 sock_hold(sk);
1066 unix_peer(newsk) = sk;
1067 newsk->sk_state = TCP_ESTABLISHED;
1068 newsk->sk_type = sk->sk_type;
1069 newsk->sk_peercred.pid = current->tgid;
1070 newsk->sk_peercred.uid = current->euid;
1071 newsk->sk_peercred.gid = current->egid;
1072 newu = unix_sk(newsk);
1073 newsk->sk_sleep = &newu->peer_wait;
1074 otheru = unix_sk(other);
1075
1076 /* copy address information from listening to new sock*/
1077 if (otheru->addr) {
1078 atomic_inc(&otheru->addr->refcnt);
1079 newu->addr = otheru->addr;
1080 }
1081 if (otheru->dentry) {
1082 newu->dentry = dget(otheru->dentry);
1083 newu->mnt = mntget(otheru->mnt);
1084 }
1085
1086 /* Set credentials */
1087 sk->sk_peercred = other->sk_peercred;
1088
1089 sock_hold(newsk);
1090 unix_peer(sk) = newsk;
1091 sock->state = SS_CONNECTED;
1092 sk->sk_state = TCP_ESTABLISHED;
1093
1094 unix_state_wunlock(sk);
1095
1096 /* take ten and and send info to listening sock */
1097 spin_lock(&other->sk_receive_queue.lock);
1098 __skb_queue_tail(&other->sk_receive_queue, skb);
1099 /* Undo artificially decreased inflight after embrion
1100 * is installed to listening socket. */
1101 atomic_inc(&newu->inflight);
1102 spin_unlock(&other->sk_receive_queue.lock);
1103 unix_state_runlock(other);
1104 other->sk_data_ready(other, 0);
1105 sock_put(other);
1106 return 0;
1107
1108out_unlock:
1109 if (other)
1110 unix_state_runlock(other);
1111
1112out:
1113 if (skb)
1114 kfree_skb(skb);
1115 if (newsk)
1116 unix_release_sock(newsk, 0);
1117 if (other)
1118 sock_put(other);
1119 return err;
1120}
1121
1122static int unix_socketpair(struct socket *socka, struct socket *sockb)
1123{
1124 struct sock *ska=socka->sk, *skb = sockb->sk;
1125
1126 /* Join our sockets back to back */
1127 sock_hold(ska);
1128 sock_hold(skb);
1129 unix_peer(ska)=skb;
1130 unix_peer(skb)=ska;
1131 ska->sk_peercred.pid = skb->sk_peercred.pid = current->tgid;
1132 ska->sk_peercred.uid = skb->sk_peercred.uid = current->euid;
1133 ska->sk_peercred.gid = skb->sk_peercred.gid = current->egid;
1134
1135 if (ska->sk_type != SOCK_DGRAM) {
1136 ska->sk_state = TCP_ESTABLISHED;
1137 skb->sk_state = TCP_ESTABLISHED;
1138 socka->state = SS_CONNECTED;
1139 sockb->state = SS_CONNECTED;
1140 }
1141 return 0;
1142}
1143
1144static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1145{
1146 struct sock *sk = sock->sk;
1147 struct sock *tsk;
1148 struct sk_buff *skb;
1149 int err;
1150
1151 err = -EOPNOTSUPP;
1152 if (sock->type!=SOCK_STREAM && sock->type!=SOCK_SEQPACKET)
1153 goto out;
1154
1155 err = -EINVAL;
1156 if (sk->sk_state != TCP_LISTEN)
1157 goto out;
1158
1159 /* If socket state is TCP_LISTEN it cannot change (for now...),
1160 * so that no locks are necessary.
1161 */
1162
1163 skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1164 if (!skb) {
1165 /* This means receive shutdown. */
1166 if (err == 0)
1167 err = -EINVAL;
1168 goto out;
1169 }
1170
1171 tsk = skb->sk;
1172 skb_free_datagram(sk, skb);
1173 wake_up_interruptible(&unix_sk(sk)->peer_wait);
1174
1175 /* attach accepted sock to socket */
1176 unix_state_wlock(tsk);
1177 newsock->state = SS_CONNECTED;
1178 sock_graft(tsk, newsock);
1179 unix_state_wunlock(tsk);
1180 return 0;
1181
1182out:
1183 return err;
1184}
1185
1186
1187static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1188{
1189 struct sock *sk = sock->sk;
1190 struct unix_sock *u;
1191 struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
1192 int err = 0;
1193
1194 if (peer) {
1195 sk = unix_peer_get(sk);
1196
1197 err = -ENOTCONN;
1198 if (!sk)
1199 goto out;
1200 err = 0;
1201 } else {
1202 sock_hold(sk);
1203 }
1204
1205 u = unix_sk(sk);
1206 unix_state_rlock(sk);
1207 if (!u->addr) {
1208 sunaddr->sun_family = AF_UNIX;
1209 sunaddr->sun_path[0] = 0;
1210 *uaddr_len = sizeof(short);
1211 } else {
1212 struct unix_address *addr = u->addr;
1213
1214 *uaddr_len = addr->len;
1215 memcpy(sunaddr, addr->name, *uaddr_len);
1216 }
1217 unix_state_runlock(sk);
1218 sock_put(sk);
1219out:
1220 return err;
1221}
1222
1223static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1224{
1225 int i;
1226
1227 scm->fp = UNIXCB(skb).fp;
1228 skb->destructor = sock_wfree;
1229 UNIXCB(skb).fp = NULL;
1230
1231 for (i=scm->fp->count-1; i>=0; i--)
1232 unix_notinflight(scm->fp->fp[i]);
1233}
1234
1235static void unix_destruct_fds(struct sk_buff *skb)
1236{
1237 struct scm_cookie scm;
1238 memset(&scm, 0, sizeof(scm));
1239 unix_detach_fds(&scm, skb);
1240
1241 /* Alas, it calls VFS */
1242 /* So fscking what? fput() had been SMP-safe since the last Summer */
1243 scm_destroy(&scm);
1244 sock_wfree(skb);
1245}
1246
1247static void unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1248{
1249 int i;
1250 for (i=scm->fp->count-1; i>=0; i--)
1251 unix_inflight(scm->fp->fp[i]);
1252 UNIXCB(skb).fp = scm->fp;
1253 skb->destructor = unix_destruct_fds;
1254 scm->fp = NULL;
1255}
1256
1257/*
1258 * Send AF_UNIX data.
1259 */
1260
1261static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
1262 struct msghdr *msg, size_t len)
1263{
1264 struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1265 struct sock *sk = sock->sk;
1266 struct unix_sock *u = unix_sk(sk);
1267 struct sockaddr_un *sunaddr=msg->msg_name;
1268 struct sock *other = NULL;
1269 int namelen = 0; /* fake GCC */
1270 int err;
1271 unsigned hash;
1272 struct sk_buff *skb;
1273 long timeo;
1274 struct scm_cookie tmp_scm;
1275
1276 if (NULL == siocb->scm)
1277 siocb->scm = &tmp_scm;
1278 err = scm_send(sock, msg, siocb->scm);
1279 if (err < 0)
1280 return err;
1281
1282 err = -EOPNOTSUPP;
1283 if (msg->msg_flags&MSG_OOB)
1284 goto out;
1285
1286 if (msg->msg_namelen) {
1287 err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1288 if (err < 0)
1289 goto out;
1290 namelen = err;
1291 } else {
1292 sunaddr = NULL;
1293 err = -ENOTCONN;
1294 other = unix_peer_get(sk);
1295 if (!other)
1296 goto out;
1297 }
1298
1299 if (test_bit(SOCK_PASSCRED, &sock->flags)
1300 && !u->addr && (err = unix_autobind(sock)) != 0)
1301 goto out;
1302
1303 err = -EMSGSIZE;
1304 if (len > sk->sk_sndbuf - 32)
1305 goto out;
1306
1307 skb = sock_alloc_send_skb(sk, len, msg->msg_flags&MSG_DONTWAIT, &err);
1308 if (skb==NULL)
1309 goto out;
1310
1311 memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1312 if (siocb->scm->fp)
1313 unix_attach_fds(siocb->scm, skb);
1314
1315 skb->h.raw = skb->data;
1316 err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
1317 if (err)
1318 goto out_free;
1319
1320 timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1321
1322restart:
1323 if (!other) {
1324 err = -ECONNRESET;
1325 if (sunaddr == NULL)
1326 goto out_free;
1327
1328 other = unix_find_other(sunaddr, namelen, sk->sk_type,
1329 hash, &err);
1330 if (other==NULL)
1331 goto out_free;
1332 }
1333
1334 unix_state_rlock(other);
1335 err = -EPERM;
1336 if (!unix_may_send(sk, other))
1337 goto out_unlock;
1338
1339 if (sock_flag(other, SOCK_DEAD)) {
1340 /*
1341 * Check with 1003.1g - what should
1342 * datagram error
1343 */
1344 unix_state_runlock(other);
1345 sock_put(other);
1346
1347 err = 0;
1348 unix_state_wlock(sk);
1349 if (unix_peer(sk) == other) {
1350 unix_peer(sk)=NULL;
1351 unix_state_wunlock(sk);
1352
1353 unix_dgram_disconnected(sk, other);
1354 sock_put(other);
1355 err = -ECONNREFUSED;
1356 } else {
1357 unix_state_wunlock(sk);
1358 }
1359
1360 other = NULL;
1361 if (err)
1362 goto out_free;
1363 goto restart;
1364 }
1365
1366 err = -EPIPE;
1367 if (other->sk_shutdown & RCV_SHUTDOWN)
1368 goto out_unlock;
1369
1370 if (sk->sk_type != SOCK_SEQPACKET) {
1371 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1372 if (err)
1373 goto out_unlock;
1374 }
1375
1376 if (unix_peer(other) != sk &&
1377 (skb_queue_len(&other->sk_receive_queue) >
1378 other->sk_max_ack_backlog)) {
1379 if (!timeo) {
1380 err = -EAGAIN;
1381 goto out_unlock;
1382 }
1383
1384 timeo = unix_wait_for_peer(other, timeo);
1385
1386 err = sock_intr_errno(timeo);
1387 if (signal_pending(current))
1388 goto out_free;
1389
1390 goto restart;
1391 }
1392
1393 skb_queue_tail(&other->sk_receive_queue, skb);
1394 unix_state_runlock(other);
1395 other->sk_data_ready(other, len);
1396 sock_put(other);
1397 scm_destroy(siocb->scm);
1398 return len;
1399
1400out_unlock:
1401 unix_state_runlock(other);
1402out_free:
1403 kfree_skb(skb);
1404out:
1405 if (other)
1406 sock_put(other);
1407 scm_destroy(siocb->scm);
1408 return err;
1409}
1410
1411
1412static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1413 struct msghdr *msg, size_t len)
1414{
1415 struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1416 struct sock *sk = sock->sk;
1417 struct sock *other = NULL;
1418 struct sockaddr_un *sunaddr=msg->msg_name;
1419 int err,size;
1420 struct sk_buff *skb;
1421 int sent=0;
1422 struct scm_cookie tmp_scm;
1423
1424 if (NULL == siocb->scm)
1425 siocb->scm = &tmp_scm;
1426 err = scm_send(sock, msg, siocb->scm);
1427 if (err < 0)
1428 return err;
1429
1430 err = -EOPNOTSUPP;
1431 if (msg->msg_flags&MSG_OOB)
1432 goto out_err;
1433
1434 if (msg->msg_namelen) {
1435 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1436 goto out_err;
1437 } else {
1438 sunaddr = NULL;
1439 err = -ENOTCONN;
1440 other = unix_peer_get(sk);
1441 if (!other)
1442 goto out_err;
1443 }
1444
1445 if (sk->sk_shutdown & SEND_SHUTDOWN)
1446 goto pipe_err;
1447
1448 while(sent < len)
1449 {
1450 /*
1451 * Optimisation for the fact that under 0.01% of X messages typically
1452 * need breaking up.
1453 */
1454
1455 size=len-sent;
1456
1457 /* Keep two messages in the pipe so it schedules better */
1458 if (size > sk->sk_sndbuf / 2 - 64)
1459 size = sk->sk_sndbuf / 2 - 64;
1460
1461 if (size > SKB_MAX_ALLOC)
1462 size = SKB_MAX_ALLOC;
1463
1464 /*
1465 * Grab a buffer
1466 */
1467
1468 skb=sock_alloc_send_skb(sk,size,msg->msg_flags&MSG_DONTWAIT, &err);
1469
1470 if (skb==NULL)
1471 goto out_err;
1472
1473 /*
1474 * If you pass two values to the sock_alloc_send_skb
1475 * it tries to grab the large buffer with GFP_NOFS
1476 * (which can fail easily), and if it fails grab the
1477 * fallback size buffer which is under a page and will
1478 * succeed. [Alan]
1479 */
1480 size = min_t(int, size, skb_tailroom(skb));
1481
1482 memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1483 if (siocb->scm->fp)
1484 unix_attach_fds(siocb->scm, skb);
1485
1486 if ((err = memcpy_fromiovec(skb_put(skb,size), msg->msg_iov, size)) != 0) {
1487 kfree_skb(skb);
1488 goto out_err;
1489 }
1490
1491 unix_state_rlock(other);
1492
1493 if (sock_flag(other, SOCK_DEAD) ||
1494 (other->sk_shutdown & RCV_SHUTDOWN))
1495 goto pipe_err_free;
1496
1497 skb_queue_tail(&other->sk_receive_queue, skb);
1498 unix_state_runlock(other);
1499 other->sk_data_ready(other, size);
1500 sent+=size;
1501 }
1502 sock_put(other);
1503
1504 scm_destroy(siocb->scm);
1505 siocb->scm = NULL;
1506
1507 return sent;
1508
1509pipe_err_free:
1510 unix_state_runlock(other);
1511 kfree_skb(skb);
1512pipe_err:
1513 if (sent==0 && !(msg->msg_flags&MSG_NOSIGNAL))
1514 send_sig(SIGPIPE,current,0);
1515 err = -EPIPE;
1516out_err:
1517 if (other)
1518 sock_put(other);
1519 scm_destroy(siocb->scm);
1520 siocb->scm = NULL;
1521 return sent ? : err;
1522}
1523
1524static int unix_seqpacket_sendmsg(struct kiocb *kiocb, struct socket *sock,
1525 struct msghdr *msg, size_t len)
1526{
1527 int err;
1528 struct sock *sk = sock->sk;
1529
1530 err = sock_error(sk);
1531 if (err)
1532 return err;
1533
1534 if (sk->sk_state != TCP_ESTABLISHED)
1535 return -ENOTCONN;
1536
1537 if (msg->msg_namelen)
1538 msg->msg_namelen = 0;
1539
1540 return unix_dgram_sendmsg(kiocb, sock, msg, len);
1541}
1542
1543static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
1544{
1545 struct unix_sock *u = unix_sk(sk);
1546
1547 msg->msg_namelen = 0;
1548 if (u->addr) {
1549 msg->msg_namelen = u->addr->len;
1550 memcpy(msg->msg_name, u->addr->name, u->addr->len);
1551 }
1552}
1553
1554static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
1555 struct msghdr *msg, size_t size,
1556 int flags)
1557{
1558 struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1559 struct scm_cookie tmp_scm;
1560 struct sock *sk = sock->sk;
1561 struct unix_sock *u = unix_sk(sk);
1562 int noblock = flags & MSG_DONTWAIT;
1563 struct sk_buff *skb;
1564 int err;
1565
1566 err = -EOPNOTSUPP;
1567 if (flags&MSG_OOB)
1568 goto out;
1569
1570 msg->msg_namelen = 0;
1571
1572 down(&u->readsem);
1573
1574 skb = skb_recv_datagram(sk, flags, noblock, &err);
1575 if (!skb)
1576 goto out_unlock;
1577
1578 wake_up_interruptible(&u->peer_wait);
1579
1580 if (msg->msg_name)
1581 unix_copy_addr(msg, skb->sk);
1582
1583 if (size > skb->len)
1584 size = skb->len;
1585 else if (size < skb->len)
1586 msg->msg_flags |= MSG_TRUNC;
1587
1588 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, size);
1589 if (err)
1590 goto out_free;
1591
1592 if (!siocb->scm) {
1593 siocb->scm = &tmp_scm;
1594 memset(&tmp_scm, 0, sizeof(tmp_scm));
1595 }
1596 siocb->scm->creds = *UNIXCREDS(skb);
1597
1598 if (!(flags & MSG_PEEK))
1599 {
1600 if (UNIXCB(skb).fp)
1601 unix_detach_fds(siocb->scm, skb);
1602 }
1603 else
1604 {
1605 /* It is questionable: on PEEK we could:
1606 - do not return fds - good, but too simple 8)
1607 - return fds, and do not return them on read (old strategy,
1608 apparently wrong)
1609 - clone fds (I chose it for now, it is the most universal
1610 solution)
1611
1612 POSIX 1003.1g does not actually define this clearly
1613 at all. POSIX 1003.1g doesn't define a lot of things
1614 clearly however!
1615
1616 */
1617 if (UNIXCB(skb).fp)
1618 siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1619 }
1620 err = size;
1621
1622 scm_recv(sock, msg, siocb->scm, flags);
1623
1624out_free:
1625 skb_free_datagram(sk,skb);
1626out_unlock:
1627 up(&u->readsem);
1628out:
1629 return err;
1630}
1631
1632/*
1633 * Sleep until data has arrive. But check for races..
1634 */
1635
1636static long unix_stream_data_wait(struct sock * sk, long timeo)
1637{
1638 DEFINE_WAIT(wait);
1639
1640 unix_state_rlock(sk);
1641
1642 for (;;) {
1643 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1644
1645 if (skb_queue_len(&sk->sk_receive_queue) ||
1646 sk->sk_err ||
1647 (sk->sk_shutdown & RCV_SHUTDOWN) ||
1648 signal_pending(current) ||
1649 !timeo)
1650 break;
1651
1652 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1653 unix_state_runlock(sk);
1654 timeo = schedule_timeout(timeo);
1655 unix_state_rlock(sk);
1656 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1657 }
1658
1659 finish_wait(sk->sk_sleep, &wait);
1660 unix_state_runlock(sk);
1661 return timeo;
1662}
1663
1664
1665
1666static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
1667 struct msghdr *msg, size_t size,
1668 int flags)
1669{
1670 struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1671 struct scm_cookie tmp_scm;
1672 struct sock *sk = sock->sk;
1673 struct unix_sock *u = unix_sk(sk);
1674 struct sockaddr_un *sunaddr=msg->msg_name;
1675 int copied = 0;
1676 int check_creds = 0;
1677 int target;
1678 int err = 0;
1679 long timeo;
1680
1681 err = -EINVAL;
1682 if (sk->sk_state != TCP_ESTABLISHED)
1683 goto out;
1684
1685 err = -EOPNOTSUPP;
1686 if (flags&MSG_OOB)
1687 goto out;
1688
1689 target = sock_rcvlowat(sk, flags&MSG_WAITALL, size);
1690 timeo = sock_rcvtimeo(sk, flags&MSG_DONTWAIT);
1691
1692 msg->msg_namelen = 0;
1693
1694 /* Lock the socket to prevent queue disordering
1695 * while sleeps in memcpy_tomsg
1696 */
1697
1698 if (!siocb->scm) {
1699 siocb->scm = &tmp_scm;
1700 memset(&tmp_scm, 0, sizeof(tmp_scm));
1701 }
1702
1703 down(&u->readsem);
1704
1705 do
1706 {
1707 int chunk;
1708 struct sk_buff *skb;
1709
1710 skb = skb_dequeue(&sk->sk_receive_queue);
1711 if (skb==NULL)
1712 {
1713 if (copied >= target)
1714 break;
1715
1716 /*
1717 * POSIX 1003.1g mandates this order.
1718 */
1719
1720 if ((err = sock_error(sk)) != 0)
1721 break;
1722 if (sk->sk_shutdown & RCV_SHUTDOWN)
1723 break;
1724 err = -EAGAIN;
1725 if (!timeo)
1726 break;
1727 up(&u->readsem);
1728
1729 timeo = unix_stream_data_wait(sk, timeo);
1730
1731 if (signal_pending(current)) {
1732 err = sock_intr_errno(timeo);
1733 goto out;
1734 }
1735 down(&u->readsem);
1736 continue;
1737 }
1738
1739 if (check_creds) {
1740 /* Never glue messages from different writers */
1741 if (memcmp(UNIXCREDS(skb), &siocb->scm->creds, sizeof(siocb->scm->creds)) != 0) {
1742 skb_queue_head(&sk->sk_receive_queue, skb);
1743 break;
1744 }
1745 } else {
1746 /* Copy credentials */
1747 siocb->scm->creds = *UNIXCREDS(skb);
1748 check_creds = 1;
1749 }
1750
1751 /* Copy address just once */
1752 if (sunaddr)
1753 {
1754 unix_copy_addr(msg, skb->sk);
1755 sunaddr = NULL;
1756 }
1757
1758 chunk = min_t(unsigned int, skb->len, size);
1759 if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) {
1760 skb_queue_head(&sk->sk_receive_queue, skb);
1761 if (copied == 0)
1762 copied = -EFAULT;
1763 break;
1764 }
1765 copied += chunk;
1766 size -= chunk;
1767
1768 /* Mark read part of skb as used */
1769 if (!(flags & MSG_PEEK))
1770 {
1771 skb_pull(skb, chunk);
1772
1773 if (UNIXCB(skb).fp)
1774 unix_detach_fds(siocb->scm, skb);
1775
1776 /* put the skb back if we didn't use it up.. */
1777 if (skb->len)
1778 {
1779 skb_queue_head(&sk->sk_receive_queue, skb);
1780 break;
1781 }
1782
1783 kfree_skb(skb);
1784
1785 if (siocb->scm->fp)
1786 break;
1787 }
1788 else
1789 {
1790 /* It is questionable, see note in unix_dgram_recvmsg.
1791 */
1792 if (UNIXCB(skb).fp)
1793 siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1794
1795 /* put message back and return */
1796 skb_queue_head(&sk->sk_receive_queue, skb);
1797 break;
1798 }
1799 } while (size);
1800
1801 up(&u->readsem);
1802 scm_recv(sock, msg, siocb->scm, flags);
1803out:
1804 return copied ? : err;
1805}
1806
1807static int unix_shutdown(struct socket *sock, int mode)
1808{
1809 struct sock *sk = sock->sk;
1810 struct sock *other;
1811
1812 mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN);
1813
1814 if (mode) {
1815 unix_state_wlock(sk);
1816 sk->sk_shutdown |= mode;
1817 other=unix_peer(sk);
1818 if (other)
1819 sock_hold(other);
1820 unix_state_wunlock(sk);
1821 sk->sk_state_change(sk);
1822
1823 if (other &&
1824 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
1825
1826 int peer_mode = 0;
1827
1828 if (mode&RCV_SHUTDOWN)
1829 peer_mode |= SEND_SHUTDOWN;
1830 if (mode&SEND_SHUTDOWN)
1831 peer_mode |= RCV_SHUTDOWN;
1832 unix_state_wlock(other);
1833 other->sk_shutdown |= peer_mode;
1834 unix_state_wunlock(other);
1835 other->sk_state_change(other);
1836 read_lock(&other->sk_callback_lock);
1837 if (peer_mode == SHUTDOWN_MASK)
1838 sk_wake_async(other,1,POLL_HUP);
1839 else if (peer_mode & RCV_SHUTDOWN)
1840 sk_wake_async(other,1,POLL_IN);
1841 read_unlock(&other->sk_callback_lock);
1842 }
1843 if (other)
1844 sock_put(other);
1845 }
1846 return 0;
1847}
1848
1849static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1850{
1851 struct sock *sk = sock->sk;
1852 long amount=0;
1853 int err;
1854
1855 switch(cmd)
1856 {
1857 case SIOCOUTQ:
1858 amount = atomic_read(&sk->sk_wmem_alloc);
1859 err = put_user(amount, (int __user *)arg);
1860 break;
1861 case SIOCINQ:
1862 {
1863 struct sk_buff *skb;
1864
1865 if (sk->sk_state == TCP_LISTEN) {
1866 err = -EINVAL;
1867 break;
1868 }
1869
1870 spin_lock(&sk->sk_receive_queue.lock);
1871 if (sk->sk_type == SOCK_STREAM ||
1872 sk->sk_type == SOCK_SEQPACKET) {
1873 skb_queue_walk(&sk->sk_receive_queue, skb)
1874 amount += skb->len;
1875 } else {
1876 skb = skb_peek(&sk->sk_receive_queue);
1877 if (skb)
1878 amount=skb->len;
1879 }
1880 spin_unlock(&sk->sk_receive_queue.lock);
1881 err = put_user(amount, (int __user *)arg);
1882 break;
1883 }
1884
1885 default:
1886 err = dev_ioctl(cmd, (void __user *)arg);
1887 break;
1888 }
1889 return err;
1890}
1891
1892static unsigned int unix_poll(struct file * file, struct socket *sock, poll_table *wait)
1893{
1894 struct sock *sk = sock->sk;
1895 unsigned int mask;
1896
1897 poll_wait(file, sk->sk_sleep, wait);
1898 mask = 0;
1899
1900 /* exceptional events? */
1901 if (sk->sk_err)
1902 mask |= POLLERR;
1903 if (sk->sk_shutdown == SHUTDOWN_MASK)
1904 mask |= POLLHUP;
1905
1906 /* readable? */
1907 if (!skb_queue_empty(&sk->sk_receive_queue) ||
1908 (sk->sk_shutdown & RCV_SHUTDOWN))
1909 mask |= POLLIN | POLLRDNORM;
1910
1911 /* Connection-based need to check for termination and startup */
1912 if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) && sk->sk_state == TCP_CLOSE)
1913 mask |= POLLHUP;
1914
1915 /*
1916 * we set writable also when the other side has shut down the
1917 * connection. This prevents stuck sockets.
1918 */
1919 if (unix_writable(sk))
1920 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
1921
1922 return mask;
1923}
1924
1925
1926#ifdef CONFIG_PROC_FS
1927static struct sock *unix_seq_idx(int *iter, loff_t pos)
1928{
1929 loff_t off = 0;
1930 struct sock *s;
1931
1932 for (s = first_unix_socket(iter); s; s = next_unix_socket(iter, s)) {
1933 if (off == pos)
1934 return s;
1935 ++off;
1936 }
1937 return NULL;
1938}
1939
1940
1941static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
1942{
1943 read_lock(&unix_table_lock);
1944 return *pos ? unix_seq_idx(seq->private, *pos - 1) : ((void *) 1);
1945}
1946
1947static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1948{
1949 ++*pos;
1950
1951 if (v == (void *)1)
1952 return first_unix_socket(seq->private);
1953 return next_unix_socket(seq->private, v);
1954}
1955
1956static void unix_seq_stop(struct seq_file *seq, void *v)
1957{
1958 read_unlock(&unix_table_lock);
1959}
1960
1961static int unix_seq_show(struct seq_file *seq, void *v)
1962{
1963
1964 if (v == (void *)1)
1965 seq_puts(seq, "Num RefCount Protocol Flags Type St "
1966 "Inode Path\n");
1967 else {
1968 struct sock *s = v;
1969 struct unix_sock *u = unix_sk(s);
1970 unix_state_rlock(s);
1971
1972 seq_printf(seq, "%p: %08X %08X %08X %04X %02X %5lu",
1973 s,
1974 atomic_read(&s->sk_refcnt),
1975 0,
1976 s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
1977 s->sk_type,
1978 s->sk_socket ?
1979 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
1980 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
1981 sock_i_ino(s));
1982
1983 if (u->addr) {
1984 int i, len;
1985 seq_putc(seq, ' ');
1986
1987 i = 0;
1988 len = u->addr->len - sizeof(short);
1989 if (!UNIX_ABSTRACT(s))
1990 len--;
1991 else {
1992 seq_putc(seq, '@');
1993 i++;
1994 }
1995 for ( ; i < len; i++)
1996 seq_putc(seq, u->addr->name->sun_path[i]);
1997 }
1998 unix_state_runlock(s);
1999 seq_putc(seq, '\n');
2000 }
2001
2002 return 0;
2003}
2004
2005static struct seq_operations unix_seq_ops = {
2006 .start = unix_seq_start,
2007 .next = unix_seq_next,
2008 .stop = unix_seq_stop,
2009 .show = unix_seq_show,
2010};
2011
2012
2013static int unix_seq_open(struct inode *inode, struct file *file)
2014{
2015 struct seq_file *seq;
2016 int rc = -ENOMEM;
2017 int *iter = kmalloc(sizeof(int), GFP_KERNEL);
2018
2019 if (!iter)
2020 goto out;
2021
2022 rc = seq_open(file, &unix_seq_ops);
2023 if (rc)
2024 goto out_kfree;
2025
2026 seq = file->private_data;
2027 seq->private = iter;
2028 *iter = 0;
2029out:
2030 return rc;
2031out_kfree:
2032 kfree(iter);
2033 goto out;
2034}
2035
2036static struct file_operations unix_seq_fops = {
2037 .owner = THIS_MODULE,
2038 .open = unix_seq_open,
2039 .read = seq_read,
2040 .llseek = seq_lseek,
2041 .release = seq_release_private,
2042};
2043
2044#endif
2045
2046static struct net_proto_family unix_family_ops = {
2047 .family = PF_UNIX,
2048 .create = unix_create,
2049 .owner = THIS_MODULE,
2050};
2051
2052#ifdef CONFIG_SYSCTL
2053extern void unix_sysctl_register(void);
2054extern void unix_sysctl_unregister(void);
2055#else
2056static inline void unix_sysctl_register(void) {}
2057static inline void unix_sysctl_unregister(void) {}
2058#endif
2059
2060static int __init af_unix_init(void)
2061{
2062 int rc = -1;
2063 struct sk_buff *dummy_skb;
2064
2065 if (sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb)) {
2066 printk(KERN_CRIT "%s: panic\n", __FUNCTION__);
2067 goto out;
2068 }
2069
2070 rc = proto_register(&unix_proto, 1);
2071 if (rc != 0) {
2072 printk(KERN_CRIT "%s: Cannot create unix_sock SLAB cache!\n",
2073 __FUNCTION__);
2074 goto out;
2075 }
2076
2077 sock_register(&unix_family_ops);
2078#ifdef CONFIG_PROC_FS
2079 proc_net_fops_create("unix", 0, &unix_seq_fops);
2080#endif
2081 unix_sysctl_register();
2082out:
2083 return rc;
2084}
2085
2086static void __exit af_unix_exit(void)
2087{
2088 sock_unregister(PF_UNIX);
2089 unix_sysctl_unregister();
2090 proc_net_remove("unix");
2091 proto_unregister(&unix_proto);
2092}
2093
2094module_init(af_unix_init);
2095module_exit(af_unix_exit);
2096
2097MODULE_LICENSE("GPL");
2098MODULE_ALIAS_NETPROTO(PF_UNIX);