blob: 4455b62317d4796635a3cbdcc5f62e3b9a9f5ac0 [file] [log] [blame]
Thomas Gleixner2874c5f2019-05-27 08:55:01 +02001// SPDX-License-Identifier: GPL-2.0-or-later
Linus Torvalds1da177e2005-04-16 15:20:36 -07002/*
3 * NET4: Implementation of BSD Unix domain sockets.
4 *
Alan Cox113aa832008-10-13 19:01:08 -07005 * Authors: Alan Cox, <alan@lxorguk.ukuu.org.uk>
Linus Torvalds1da177e2005-04-16 15:20:36 -07006 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07007 * Fixes:
8 * Linus Torvalds : Assorted bug cures.
9 * Niibe Yutaka : async I/O support.
10 * Carsten Paeth : PF_UNIX check, address fixes.
11 * Alan Cox : Limit size of allocated blocks.
12 * Alan Cox : Fixed the stupid socketpair bug.
13 * Alan Cox : BSD compatibility fine tuning.
14 * Alan Cox : Fixed a bug in connect when interrupted.
15 * Alan Cox : Sorted out a proper draft version of
16 * file descriptor passing hacked up from
17 * Mike Shaver's work.
18 * Marty Leisner : Fixes to fd passing
19 * Nick Nevin : recvmsg bugfix.
20 * Alan Cox : Started proper garbage collector
21 * Heiko EiBfeldt : Missing verify_area check
22 * Alan Cox : Started POSIXisms
23 * Andreas Schwab : Replace inode by dentry for proper
24 * reference counting
25 * Kirk Petersen : Made this a module
26 * Christoph Rohland : Elegant non-blocking accept/connect algorithm.
27 * Lots of bug fixes.
28 * Alexey Kuznetosv : Repaired (I hope) bugs introduces
29 * by above two patches.
30 * Andrea Arcangeli : If possible we block in connect(2)
31 * if the max backlog of the listen socket
32 * is been reached. This won't break
33 * old apps and it will avoid huge amount
34 * of socks hashed (this for unix_gc()
35 * performances reasons).
36 * Security fix that limits the max
37 * number of socks to 2*max_files and
38 * the number of skb queueable in the
39 * dgram receiver.
40 * Artur Skawina : Hash function optimizations
41 * Alexey Kuznetsov : Full scale SMP. Lot of bugs are introduced 8)
42 * Malcolm Beattie : Set peercred for socketpair
43 * Michal Ostrowski : Module initialization cleanup.
44 * Arnaldo C. Melo : Remove MOD_{INC,DEC}_USE_COUNT,
45 * the core infrastructure is doing that
46 * for all net proto families now (2.5.69+)
47 *
Linus Torvalds1da177e2005-04-16 15:20:36 -070048 * Known differences from reference BSD that was tested:
49 *
50 * [TO FIX]
51 * ECONNREFUSED is not returned from one end of a connected() socket to the
52 * other the moment one end closes.
53 * fstat() doesn't return st_dev=0, and give the blksize as high water mark
54 * and a fake inode identifier (nor the BSD first socket fstat twice bug).
55 * [NOT TO FIX]
56 * accept() returns a path name even if the connecting socket has closed
57 * in the meantime (BSD loses the path and gives up).
58 * accept() returns 0 length path for an unbound connector. BSD returns 16
59 * and a null first byte in the path (but not for gethost/peername - BSD bug ??)
60 * socketpair(...SOCK_RAW..) doesn't panic the kernel.
61 * BSD af_unix apparently has connect forgetting to block properly.
62 * (need to check this with the POSIX spec in detail)
63 *
64 * Differences from 2.0.0-11-... (ANK)
65 * Bug fixes and improvements.
66 * - client shutdown killed server socket.
67 * - removed all useless cli/sti pairs.
68 *
69 * Semantic changes/extensions.
70 * - generic control message passing.
71 * - SCM_CREDENTIALS control message.
72 * - "Abstract" (not FS based) socket bindings.
73 * Abstract names are sequences of bytes (not zero terminated)
74 * started by 0, so that this name space does not intersect
75 * with BSD names.
76 */
77
wangweidong5cc208b2013-12-06 18:03:36 +080078#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
79
Linus Torvalds1da177e2005-04-16 15:20:36 -070080#include <linux/module.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070081#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070082#include <linux/signal.h>
Ingo Molnar3f07c012017-02-08 18:51:30 +010083#include <linux/sched/signal.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070084#include <linux/errno.h>
85#include <linux/string.h>
86#include <linux/stat.h>
87#include <linux/dcache.h>
88#include <linux/namei.h>
89#include <linux/socket.h>
90#include <linux/un.h>
91#include <linux/fcntl.h>
92#include <linux/termios.h>
93#include <linux/sockios.h>
94#include <linux/net.h>
95#include <linux/in.h>
96#include <linux/fs.h>
97#include <linux/slab.h>
Linus Torvalds7c0f6ba2016-12-24 11:46:01 -080098#include <linux/uaccess.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070099#include <linux/skbuff.h>
100#include <linux/netdevice.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +0200101#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700102#include <net/sock.h>
Arnaldo Carvalho de Meloc752f072005-08-09 20:08:28 -0700103#include <net/tcp_states.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700104#include <net/af_unix.h>
105#include <linux/proc_fs.h>
106#include <linux/seq_file.h>
107#include <net/scm.h>
108#include <linux/init.h>
109#include <linux/poll.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700110#include <linux/rtnetlink.h>
111#include <linux/mount.h>
112#include <net/checksum.h>
113#include <linux/security.h>
Colin Cross2b15af62013-05-06 23:50:21 +0000114#include <linux/freezer.h>
Andrey Vaginba94f302017-02-01 11:00:45 -0800115#include <linux/file.h>
Kuniyuki Iwashima2c860a42021-08-14 10:57:15 +0900116#include <linux/btf_ids.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700117
Jens Axboef4e65872019-02-08 09:01:44 -0700118#include "scm.h"
119
Eric Dumazet7123aaa2012-06-08 05:03:21 +0000120struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
Pavel Emelyanovfa7ff562011-12-15 02:44:03 +0000121EXPORT_SYMBOL_GPL(unix_socket_table);
122DEFINE_SPINLOCK(unix_table_lock);
123EXPORT_SYMBOL_GPL(unix_table_lock);
Eric Dumazet518de9b2010-10-26 14:22:44 -0700124static atomic_long_t unix_nr_socks;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700125
Linus Torvalds1da177e2005-04-16 15:20:36 -0700126
Eric Dumazet7123aaa2012-06-08 05:03:21 +0000127static struct hlist_head *unix_sockets_unbound(void *addr)
128{
129 unsigned long hash = (unsigned long)addr;
130
131 hash ^= hash >> 16;
132 hash ^= hash >> 8;
133 hash %= UNIX_HASH_SIZE;
134 return &unix_socket_table[UNIX_HASH_SIZE + hash];
135}
136
137#define UNIX_ABSTRACT(sk) (unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700138
Catherine Zhang877ce7c2006-06-29 12:27:47 -0700139#ifdef CONFIG_SECURITY_NETWORK
Catherine Zhangdc49c1f2006-08-02 14:12:06 -0700140static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
Catherine Zhang877ce7c2006-06-29 12:27:47 -0700141{
Stephen Smalley37a9a8d2015-06-10 08:44:59 -0400142 UNIXCB(skb).secid = scm->secid;
Catherine Zhang877ce7c2006-06-29 12:27:47 -0700143}
144
145static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
146{
Stephen Smalley37a9a8d2015-06-10 08:44:59 -0400147 scm->secid = UNIXCB(skb).secid;
148}
149
150static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
151{
152 return (scm->secid == UNIXCB(skb).secid);
Catherine Zhang877ce7c2006-06-29 12:27:47 -0700153}
154#else
Catherine Zhangdc49c1f2006-08-02 14:12:06 -0700155static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
Catherine Zhang877ce7c2006-06-29 12:27:47 -0700156{ }
157
158static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
159{ }
Stephen Smalley37a9a8d2015-06-10 08:44:59 -0400160
161static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
162{
163 return true;
164}
Catherine Zhang877ce7c2006-06-29 12:27:47 -0700165#endif /* CONFIG_SECURITY_NETWORK */
166
Linus Torvalds1da177e2005-04-16 15:20:36 -0700167/*
168 * SMP locking strategy:
David S. Millerfbe9cc42005-12-13 23:26:29 -0800169 * hash table is protected with spinlock unix_table_lock
Stephen Hemminger663717f2010-02-18 14:12:06 -0800170 * each socket state is protected by separate spin lock.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700171 */
172
Eric Dumazet95c96172012-04-15 05:58:06 +0000173static inline unsigned int unix_hash_fold(__wsum n)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700174{
Anton Blanchard0a134042014-03-05 14:29:58 +1100175 unsigned int hash = (__force unsigned int)csum_fold(n);
Eric Dumazet95c96172012-04-15 05:58:06 +0000176
Linus Torvalds1da177e2005-04-16 15:20:36 -0700177 hash ^= hash>>8;
178 return hash&(UNIX_HASH_SIZE-1);
179}
180
181#define unix_peer(sk) (unix_sk(sk)->peer)
182
183static inline int unix_our_peer(struct sock *sk, struct sock *osk)
184{
185 return unix_peer(osk) == sk;
186}
187
188static inline int unix_may_send(struct sock *sk, struct sock *osk)
189{
Eric Dumazet6eba6a32008-11-16 22:58:44 -0800190 return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700191}
192
Qian Cai86b18aa2020-02-04 13:40:29 -0500193static inline int unix_recvq_full(const struct sock *sk)
Rainer Weikusat3c734192008-06-17 22:28:05 -0700194{
195 return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
196}
197
Qian Cai86b18aa2020-02-04 13:40:29 -0500198static inline int unix_recvq_full_lockless(const struct sock *sk)
199{
200 return skb_queue_len_lockless(&sk->sk_receive_queue) >
201 READ_ONCE(sk->sk_max_ack_backlog);
202}
203
Pavel Emelyanovfa7ff562011-12-15 02:44:03 +0000204struct sock *unix_peer_get(struct sock *s)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700205{
206 struct sock *peer;
207
David S. Miller1c92b4e2007-05-31 13:24:26 -0700208 unix_state_lock(s);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700209 peer = unix_peer(s);
210 if (peer)
211 sock_hold(peer);
David S. Miller1c92b4e2007-05-31 13:24:26 -0700212 unix_state_unlock(s);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700213 return peer;
214}
Pavel Emelyanovfa7ff562011-12-15 02:44:03 +0000215EXPORT_SYMBOL_GPL(unix_peer_get);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700216
217static inline void unix_release_addr(struct unix_address *addr)
218{
Reshetova, Elena8c9814b2017-06-30 13:08:05 +0300219 if (refcount_dec_and_test(&addr->refcnt))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700220 kfree(addr);
221}
222
223/*
224 * Check unix socket name:
225 * - should be not zero length.
226 * - if started by not zero, should be NULL terminated (FS object)
227 * - if started by zero, it is abstract name.
228 */
YOSHIFUJI Hideakiac7bfa62007-02-09 23:25:23 +0900229
Eric Dumazet95c96172012-04-15 05:58:06 +0000230static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700231{
Kyeongdon Kim33c43682018-10-16 14:57:26 +0900232 *hashp = 0;
233
Linus Torvalds1da177e2005-04-16 15:20:36 -0700234 if (len <= sizeof(short) || len > sizeof(*sunaddr))
235 return -EINVAL;
236 if (!sunaddr || sunaddr->sun_family != AF_UNIX)
237 return -EINVAL;
238 if (sunaddr->sun_path[0]) {
239 /*
240 * This may look like an off by one error but it is a bit more
241 * subtle. 108 is the longest valid AF_UNIX path for a binding.
Lucas De Marchi25985ed2011-03-30 22:57:33 -0300242 * sun_path[108] doesn't as such exist. However in kernel space
Linus Torvalds1da177e2005-04-16 15:20:36 -0700243 * we are guaranteed that it is a valid memory location in our
244 * kernel address buffer.
245 */
Jianjun Konge27dfce2008-11-01 21:38:31 -0700246 ((char *)sunaddr)[len] = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700247 len = strlen(sunaddr->sun_path)+1+sizeof(short);
248 return len;
249 }
250
Joe Perches07f07572008-11-19 15:44:53 -0800251 *hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700252 return len;
253}
254
255static void __unix_remove_socket(struct sock *sk)
256{
257 sk_del_node_init(sk);
258}
259
260static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
261{
Ilpo Järvinen547b7922008-07-25 21:43:18 -0700262 WARN_ON(!sk_unhashed(sk));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700263 sk_add_node(sk, list);
264}
265
Al Viro185ab882021-06-19 03:50:26 +0000266static void __unix_set_addr(struct sock *sk, struct unix_address *addr,
267 unsigned hash)
268{
269 __unix_remove_socket(sk);
270 smp_store_release(&unix_sk(sk)->addr, addr);
271 __unix_insert_socket(&unix_socket_table[hash], sk);
272}
273
Linus Torvalds1da177e2005-04-16 15:20:36 -0700274static inline void unix_remove_socket(struct sock *sk)
275{
David S. Millerfbe9cc42005-12-13 23:26:29 -0800276 spin_lock(&unix_table_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700277 __unix_remove_socket(sk);
David S. Millerfbe9cc42005-12-13 23:26:29 -0800278 spin_unlock(&unix_table_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700279}
280
281static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
282{
David S. Millerfbe9cc42005-12-13 23:26:29 -0800283 spin_lock(&unix_table_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700284 __unix_insert_socket(list, sk);
David S. Millerfbe9cc42005-12-13 23:26:29 -0800285 spin_unlock(&unix_table_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700286}
287
Denis V. Lunev097e66c2007-11-19 22:29:30 -0800288static struct sock *__unix_find_socket_byname(struct net *net,
289 struct sockaddr_un *sunname,
Al Virobe752282021-06-19 03:50:33 +0000290 int len, unsigned int hash)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700291{
292 struct sock *s;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700293
Al Virobe752282021-06-19 03:50:33 +0000294 sk_for_each(s, &unix_socket_table[hash]) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700295 struct unix_sock *u = unix_sk(s);
296
YOSHIFUJI Hideaki878628f2008-03-26 03:57:35 +0900297 if (!net_eq(sock_net(s), net))
Denis V. Lunev097e66c2007-11-19 22:29:30 -0800298 continue;
299
Linus Torvalds1da177e2005-04-16 15:20:36 -0700300 if (u->addr->len == len &&
301 !memcmp(u->addr->name, sunname, len))
Vito Caputo262ce0a2019-10-09 20:43:47 -0700302 return s;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700303 }
Vito Caputo262ce0a2019-10-09 20:43:47 -0700304 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700305}
306
Denis V. Lunev097e66c2007-11-19 22:29:30 -0800307static inline struct sock *unix_find_socket_byname(struct net *net,
308 struct sockaddr_un *sunname,
Al Virobe752282021-06-19 03:50:33 +0000309 int len, unsigned int hash)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700310{
311 struct sock *s;
312
David S. Millerfbe9cc42005-12-13 23:26:29 -0800313 spin_lock(&unix_table_lock);
Al Virobe752282021-06-19 03:50:33 +0000314 s = __unix_find_socket_byname(net, sunname, len, hash);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700315 if (s)
316 sock_hold(s);
David S. Millerfbe9cc42005-12-13 23:26:29 -0800317 spin_unlock(&unix_table_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700318 return s;
319}
320
Eric W. Biederman6616f782010-06-13 03:35:48 +0000321static struct sock *unix_find_socket_byinode(struct inode *i)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700322{
323 struct sock *s;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700324
David S. Millerfbe9cc42005-12-13 23:26:29 -0800325 spin_lock(&unix_table_lock);
Sasha Levinb67bfe02013-02-27 17:06:00 -0800326 sk_for_each(s,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700327 &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
Al Viro40ffe672012-03-14 21:54:32 -0400328 struct dentry *dentry = unix_sk(s)->path.dentry;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700329
Miklos Szeredibeef5122016-12-16 11:02:53 +0100330 if (dentry && d_backing_inode(dentry) == i) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700331 sock_hold(s);
332 goto found;
333 }
334 }
335 s = NULL;
336found:
David S. Millerfbe9cc42005-12-13 23:26:29 -0800337 spin_unlock(&unix_table_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700338 return s;
339}
340
Rainer Weikusat7d267272015-11-20 22:07:23 +0000341/* Support code for asymmetrically connected dgram sockets
342 *
343 * If a datagram socket is connected to a socket not itself connected
344 * to the first socket (eg, /dev/log), clients may only enqueue more
345 * messages if the present receive queue of the server socket is not
346 * "too large". This means there's a second writeability condition
347 * poll and sendmsg need to test. The dgram recv code will do a wake
348 * up on the peer_wait wait queue of a socket upon reception of a
349 * datagram which needs to be propagated to sleeping would-be writers
350 * since these might not have sent anything so far. This can't be
351 * accomplished via poll_wait because the lifetime of the server
352 * socket might be less than that of its clients if these break their
353 * association with it or if the server socket is closed while clients
354 * are still connected to it and there's no way to inform "a polling
355 * implementation" that it should let go of a certain wait queue
356 *
Ingo Molnarac6424b2017-06-20 12:06:13 +0200357 * In order to propagate a wake up, a wait_queue_entry_t of the client
Rainer Weikusat7d267272015-11-20 22:07:23 +0000358 * socket is enqueued on the peer_wait queue of the server socket
359 * whose wake function does a wake_up on the ordinary client socket
360 * wait queue. This connection is established whenever a write (or
361 * poll for write) hit the flow control condition and broken when the
362 * association to the server socket is dissolved or after a wake up
363 * was relayed.
364 */
365
Ingo Molnarac6424b2017-06-20 12:06:13 +0200366static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags,
Rainer Weikusat7d267272015-11-20 22:07:23 +0000367 void *key)
368{
369 struct unix_sock *u;
370 wait_queue_head_t *u_sleep;
371
372 u = container_of(q, struct unix_sock, peer_wake);
373
374 __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
375 q);
376 u->peer_wake.private = NULL;
377
378 /* relaying can only happen while the wq still exists */
379 u_sleep = sk_sleep(&u->sk);
380 if (u_sleep)
Al Viro3ad6f932017-07-03 20:14:56 -0400381 wake_up_interruptible_poll(u_sleep, key_to_poll(key));
Rainer Weikusat7d267272015-11-20 22:07:23 +0000382
383 return 0;
384}
385
386static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
387{
388 struct unix_sock *u, *u_other;
389 int rc;
390
391 u = unix_sk(sk);
392 u_other = unix_sk(other);
393 rc = 0;
394 spin_lock(&u_other->peer_wait.lock);
395
396 if (!u->peer_wake.private) {
397 u->peer_wake.private = other;
398 __add_wait_queue(&u_other->peer_wait, &u->peer_wake);
399
400 rc = 1;
401 }
402
403 spin_unlock(&u_other->peer_wait.lock);
404 return rc;
405}
406
407static void unix_dgram_peer_wake_disconnect(struct sock *sk,
408 struct sock *other)
409{
410 struct unix_sock *u, *u_other;
411
412 u = unix_sk(sk);
413 u_other = unix_sk(other);
414 spin_lock(&u_other->peer_wait.lock);
415
416 if (u->peer_wake.private == other) {
417 __remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
418 u->peer_wake.private = NULL;
419 }
420
421 spin_unlock(&u_other->peer_wait.lock);
422}
423
424static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
425 struct sock *other)
426{
427 unix_dgram_peer_wake_disconnect(sk, other);
428 wake_up_interruptible_poll(sk_sleep(sk),
Linus Torvaldsa9a08842018-02-11 14:34:03 -0800429 EPOLLOUT |
430 EPOLLWRNORM |
431 EPOLLWRBAND);
Rainer Weikusat7d267272015-11-20 22:07:23 +0000432}
433
434/* preconditions:
435 * - unix_peer(sk) == other
436 * - association is stable
437 */
438static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
439{
440 int connected;
441
442 connected = unix_dgram_peer_wake_connect(sk, other);
443
Jason Baron51f7e952018-08-03 17:24:53 -0400444 /* If other is SOCK_DEAD, we want to make sure we signal
445 * POLLOUT, such that a subsequent write() can get a
446 * -ECONNREFUSED. Otherwise, if we haven't queued any skbs
447 * to other and its full, we will hang waiting for POLLOUT.
448 */
449 if (unix_recvq_full(other) && !sock_flag(other, SOCK_DEAD))
Rainer Weikusat7d267272015-11-20 22:07:23 +0000450 return 1;
451
452 if (connected)
453 unix_dgram_peer_wake_disconnect(sk, other);
454
455 return 0;
456}
457
Eric Dumazet1586a582015-10-23 10:59:16 -0700458static int unix_writable(const struct sock *sk)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700459{
Eric Dumazet1586a582015-10-23 10:59:16 -0700460 return sk->sk_state != TCP_LISTEN &&
Reshetova, Elena14afee42017-06-30 13:08:00 +0300461 (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700462}
463
464static void unix_write_space(struct sock *sk)
465{
Eric Dumazet43815482010-04-29 11:01:49 +0000466 struct socket_wq *wq;
467
468 rcu_read_lock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700469 if (unix_writable(sk)) {
Eric Dumazet43815482010-04-29 11:01:49 +0000470 wq = rcu_dereference(sk->sk_wq);
Herbert Xu1ce0bf52015-11-26 13:55:39 +0800471 if (skwq_has_sleeper(wq))
Eric Dumazet67426b72010-10-29 20:44:44 +0000472 wake_up_interruptible_sync_poll(&wq->wait,
Linus Torvaldsa9a08842018-02-11 14:34:03 -0800473 EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND);
Pavel Emelyanov8d8ad9d2007-11-26 20:10:50 +0800474 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700475 }
Eric Dumazet43815482010-04-29 11:01:49 +0000476 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700477}
478
479/* When dgram socket disconnects (or changes its peer), we clear its receive
480 * queue of packets arrived from previous peer. First, it allows to do
481 * flow control based only on wmem_alloc; second, sk connected to peer
482 * may receive messages only from that peer. */
483static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
484{
David S. Millerb03efcf2005-07-08 14:57:23 -0700485 if (!skb_queue_empty(&sk->sk_receive_queue)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700486 skb_queue_purge(&sk->sk_receive_queue);
487 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
488
489 /* If one link of bidirectional dgram pipe is disconnected,
490 * we signal error. Messages are lost. Do not make this,
491 * when peer was not connected to us.
492 */
493 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
494 other->sk_err = ECONNRESET;
Alexander Aringe3ae2362021-06-27 18:48:21 -0400495 sk_error_report(other);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700496 }
497 }
Cong Wang83301b52021-07-04 12:02:45 -0700498 sk->sk_state = other->sk_state = TCP_CLOSE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700499}
500
501static void unix_sock_destructor(struct sock *sk)
502{
503 struct unix_sock *u = unix_sk(sk);
504
505 skb_queue_purge(&sk->sk_receive_queue);
506
Rao Shoaib314001f2021-08-01 00:57:07 -0700507#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
508 if (u->oob_skb) {
509 kfree_skb(u->oob_skb);
510 u->oob_skb = NULL;
511 }
512#endif
Reshetova, Elena14afee42017-06-30 13:08:00 +0300513 WARN_ON(refcount_read(&sk->sk_wmem_alloc));
Ilpo Järvinen547b7922008-07-25 21:43:18 -0700514 WARN_ON(!sk_unhashed(sk));
515 WARN_ON(sk->sk_socket);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700516 if (!sock_flag(sk, SOCK_DEAD)) {
wangweidong5cc208b2013-12-06 18:03:36 +0800517 pr_info("Attempt to release alive unix socket: %p\n", sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700518 return;
519 }
520
521 if (u->addr)
522 unix_release_addr(u->addr);
523
Eric Dumazet518de9b2010-10-26 14:22:44 -0700524 atomic_long_dec(&unix_nr_socks);
David S. Miller6f756a82008-11-23 17:34:03 -0800525 local_bh_disable();
Eric Dumazeta8076d82008-11-17 02:38:49 -0800526 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
David S. Miller6f756a82008-11-23 17:34:03 -0800527 local_bh_enable();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700528#ifdef UNIX_REFCNT_DEBUG
wangweidong5cc208b2013-12-06 18:03:36 +0800529 pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
Eric Dumazet518de9b2010-10-26 14:22:44 -0700530 atomic_long_read(&unix_nr_socks));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700531#endif
532}
533
Paul Mooreded34e02013-03-25 03:18:33 +0000534static void unix_release_sock(struct sock *sk, int embrion)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700535{
536 struct unix_sock *u = unix_sk(sk);
Al Viro40ffe672012-03-14 21:54:32 -0400537 struct path path;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700538 struct sock *skpair;
539 struct sk_buff *skb;
540 int state;
541
542 unix_remove_socket(sk);
543
544 /* Clear state */
David S. Miller1c92b4e2007-05-31 13:24:26 -0700545 unix_state_lock(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700546 sock_orphan(sk);
547 sk->sk_shutdown = SHUTDOWN_MASK;
Al Viro40ffe672012-03-14 21:54:32 -0400548 path = u->path;
549 u->path.dentry = NULL;
550 u->path.mnt = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700551 state = sk->sk_state;
552 sk->sk_state = TCP_CLOSE;
Eric Dumazeta494bd62021-06-16 07:47:15 -0700553
554 skpair = unix_peer(sk);
555 unix_peer(sk) = NULL;
556
David S. Miller1c92b4e2007-05-31 13:24:26 -0700557 unix_state_unlock(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700558
559 wake_up_interruptible_all(&u->peer_wait);
560
Jianjun Konge27dfce2008-11-01 21:38:31 -0700561 if (skpair != NULL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700562 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
David S. Miller1c92b4e2007-05-31 13:24:26 -0700563 unix_state_lock(skpair);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700564 /* No more writes */
565 skpair->sk_shutdown = SHUTDOWN_MASK;
566 if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
567 skpair->sk_err = ECONNRESET;
David S. Miller1c92b4e2007-05-31 13:24:26 -0700568 unix_state_unlock(skpair);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700569 skpair->sk_state_change(skpair);
Pavel Emelyanov8d8ad9d2007-11-26 20:10:50 +0800570 sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700571 }
Rainer Weikusat7d267272015-11-20 22:07:23 +0000572
573 unix_dgram_peer_wake_disconnect(sk, skpair);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700574 sock_put(skpair); /* It may now die */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700575 }
576
577 /* Try to flush out this socket. Throw out buffers at least */
578
579 while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
Jianjun Konge27dfce2008-11-01 21:38:31 -0700580 if (state == TCP_LISTEN)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700581 unix_release_sock(skb->sk, 1);
582 /* passed fds are erased in the kfree_skb hook */
Hannes Frederic Sowa73ed5d22015-11-10 16:23:15 +0100583 UNIXCB(skb).consumed = skb->len;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700584 kfree_skb(skb);
585 }
586
Al Viro40ffe672012-03-14 21:54:32 -0400587 if (path.dentry)
588 path_put(&path);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700589
590 sock_put(sk);
591
592 /* ---- Socket is dead now and most probably destroyed ---- */
593
594 /*
Alan Coxe04dae82012-09-17 00:52:41 +0000595 * Fixme: BSD difference: In BSD all sockets connected to us get
Linus Torvalds1da177e2005-04-16 15:20:36 -0700596 * ECONNRESET and we die on the spot. In Linux we behave
597 * like files and pipes do and wait for the last
598 * dereference.
599 *
600 * Can't we simply set sock->err?
601 *
602 * What the above comment does talk about? --ANK(980817)
603 */
604
Pavel Emelyanov9305cfa2007-11-10 22:06:01 -0800605 if (unix_tot_inflight)
YOSHIFUJI Hideakiac7bfa62007-02-09 23:25:23 +0900606 unix_gc(); /* Garbage collect fds */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700607}
608
Eric W. Biederman109f6e32010-06-13 03:30:14 +0000609static void init_peercred(struct sock *sk)
610{
611 put_pid(sk->sk_peer_pid);
612 if (sk->sk_peer_cred)
613 put_cred(sk->sk_peer_cred);
614 sk->sk_peer_pid = get_pid(task_tgid(current));
615 sk->sk_peer_cred = get_current_cred();
616}
617
618static void copy_peercred(struct sock *sk, struct sock *peersk)
619{
620 put_pid(sk->sk_peer_pid);
621 if (sk->sk_peer_cred)
622 put_cred(sk->sk_peer_cred);
623 sk->sk_peer_pid = get_pid(peersk->sk_peer_pid);
624 sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
625}
626
Linus Torvalds1da177e2005-04-16 15:20:36 -0700627static int unix_listen(struct socket *sock, int backlog)
628{
629 int err;
630 struct sock *sk = sock->sk;
631 struct unix_sock *u = unix_sk(sk);
632
633 err = -EOPNOTSUPP;
Eric Dumazet6eba6a32008-11-16 22:58:44 -0800634 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
635 goto out; /* Only stream/seqpacket sockets accept */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700636 err = -EINVAL;
637 if (!u->addr)
Eric Dumazet6eba6a32008-11-16 22:58:44 -0800638 goto out; /* No listens on an unbound socket */
David S. Miller1c92b4e2007-05-31 13:24:26 -0700639 unix_state_lock(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700640 if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
641 goto out_unlock;
642 if (backlog > sk->sk_max_ack_backlog)
643 wake_up_interruptible_all(&u->peer_wait);
644 sk->sk_max_ack_backlog = backlog;
645 sk->sk_state = TCP_LISTEN;
646 /* set credentials so connect can copy them */
Eric W. Biederman109f6e32010-06-13 03:30:14 +0000647 init_peercred(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700648 err = 0;
649
650out_unlock:
David S. Miller1c92b4e2007-05-31 13:24:26 -0700651 unix_state_unlock(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700652out:
653 return err;
654}
655
656static int unix_release(struct socket *);
657static int unix_bind(struct socket *, struct sockaddr *, int);
658static int unix_stream_connect(struct socket *, struct sockaddr *,
659 int addr_len, int flags);
660static int unix_socketpair(struct socket *, struct socket *);
David Howellscdfbabf2017-03-09 08:09:05 +0000661static int unix_accept(struct socket *, struct socket *, int, bool);
Denys Vlasenko9b2c45d2018-02-12 20:00:20 +0100662static int unix_getname(struct socket *, struct sockaddr *, int);
Linus Torvaldsa11e1d42018-06-28 09:43:44 -0700663static __poll_t unix_poll(struct file *, struct socket *, poll_table *);
664static __poll_t unix_dgram_poll(struct file *, struct socket *,
665 poll_table *);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700666static int unix_ioctl(struct socket *, unsigned int, unsigned long);
Arnd Bergmann5f6beb92019-06-03 22:03:44 +0200667#ifdef CONFIG_COMPAT
668static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
669#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700670static int unix_shutdown(struct socket *, int);
Ying Xue1b784142015-03-02 15:37:48 +0800671static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
672static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
Hannes Frederic Sowa869e7c62015-05-21 16:59:59 +0200673static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset,
674 size_t size, int flags);
Hannes Frederic Sowa2b514572015-05-21 17:00:01 +0200675static ssize_t unix_stream_splice_read(struct socket *, loff_t *ppos,
676 struct pipe_inode_info *, size_t size,
677 unsigned int flags);
Ying Xue1b784142015-03-02 15:37:48 +0800678static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
679static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
Cong Wang29df44f2021-07-04 12:02:44 -0700680static int unix_read_sock(struct sock *sk, read_descriptor_t *desc,
681 sk_read_actor_t recv_actor);
Jiang Wang77462de2021-08-16 19:03:20 +0000682static int unix_stream_read_sock(struct sock *sk, read_descriptor_t *desc,
683 sk_read_actor_t recv_actor);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700684static int unix_dgram_connect(struct socket *, struct sockaddr *,
685 int, int);
Ying Xue1b784142015-03-02 15:37:48 +0800686static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
687static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
688 int);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700689
Sasha Levin12663bf2013-12-07 17:26:27 -0500690static int unix_set_peek_off(struct sock *sk, int val)
Pavel Emelyanovf55bb7f2012-02-21 07:31:51 +0000691{
692 struct unix_sock *u = unix_sk(sk);
693
Linus Torvalds6e1ce3c2016-09-01 14:43:53 -0700694 if (mutex_lock_interruptible(&u->iolock))
Sasha Levin12663bf2013-12-07 17:26:27 -0500695 return -EINTR;
696
Pavel Emelyanovf55bb7f2012-02-21 07:31:51 +0000697 sk->sk_peek_off = val;
Linus Torvalds6e1ce3c2016-09-01 14:43:53 -0700698 mutex_unlock(&u->iolock);
Sasha Levin12663bf2013-12-07 17:26:27 -0500699
700 return 0;
Pavel Emelyanovf55bb7f2012-02-21 07:31:51 +0000701}
702
David S. Miller5c05a162020-02-27 11:52:35 -0800703#ifdef CONFIG_PROC_FS
Kirill Tkhai3c32da12019-12-09 13:03:46 +0300704static void unix_show_fdinfo(struct seq_file *m, struct socket *sock)
705{
706 struct sock *sk = sock->sk;
707 struct unix_sock *u;
708
709 if (sk) {
710 u = unix_sk(sock->sk);
Paolo Abeni77820402020-02-28 14:45:21 +0100711 seq_printf(m, "scm_fds: %u\n",
712 atomic_read(&u->scm_stat.nr_fds));
Kirill Tkhai3c32da12019-12-09 13:03:46 +0300713 }
714}
Tobias Klauser3a125002020-02-26 18:29:53 +0100715#else
716#define unix_show_fdinfo NULL
717#endif
Pavel Emelyanovf55bb7f2012-02-21 07:31:51 +0000718
Eric Dumazet90ddc4f2005-12-22 12:49:22 -0800719static const struct proto_ops unix_stream_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700720 .family = PF_UNIX,
721 .owner = THIS_MODULE,
722 .release = unix_release,
723 .bind = unix_bind,
724 .connect = unix_stream_connect,
725 .socketpair = unix_socketpair,
726 .accept = unix_accept,
727 .getname = unix_getname,
Linus Torvaldsa11e1d42018-06-28 09:43:44 -0700728 .poll = unix_poll,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700729 .ioctl = unix_ioctl,
Arnd Bergmann5f6beb92019-06-03 22:03:44 +0200730#ifdef CONFIG_COMPAT
731 .compat_ioctl = unix_compat_ioctl,
732#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700733 .listen = unix_listen,
734 .shutdown = unix_shutdown,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700735 .sendmsg = unix_stream_sendmsg,
736 .recvmsg = unix_stream_recvmsg,
Jiang Wang77462de2021-08-16 19:03:20 +0000737 .read_sock = unix_stream_read_sock,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700738 .mmap = sock_no_mmap,
Hannes Frederic Sowa869e7c62015-05-21 16:59:59 +0200739 .sendpage = unix_stream_sendpage,
Hannes Frederic Sowa2b514572015-05-21 17:00:01 +0200740 .splice_read = unix_stream_splice_read,
Pavel Emelyanovfc0d7532012-02-21 07:32:06 +0000741 .set_peek_off = unix_set_peek_off,
Kirill Tkhai3c32da12019-12-09 13:03:46 +0300742 .show_fdinfo = unix_show_fdinfo,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700743};
744
Eric Dumazet90ddc4f2005-12-22 12:49:22 -0800745static const struct proto_ops unix_dgram_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700746 .family = PF_UNIX,
747 .owner = THIS_MODULE,
748 .release = unix_release,
749 .bind = unix_bind,
750 .connect = unix_dgram_connect,
751 .socketpair = unix_socketpair,
752 .accept = sock_no_accept,
753 .getname = unix_getname,
Linus Torvaldsa11e1d42018-06-28 09:43:44 -0700754 .poll = unix_dgram_poll,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700755 .ioctl = unix_ioctl,
Arnd Bergmann5f6beb92019-06-03 22:03:44 +0200756#ifdef CONFIG_COMPAT
757 .compat_ioctl = unix_compat_ioctl,
758#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700759 .listen = sock_no_listen,
760 .shutdown = unix_shutdown,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700761 .sendmsg = unix_dgram_sendmsg,
Cong Wang29df44f2021-07-04 12:02:44 -0700762 .read_sock = unix_read_sock,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700763 .recvmsg = unix_dgram_recvmsg,
764 .mmap = sock_no_mmap,
765 .sendpage = sock_no_sendpage,
Pavel Emelyanovf55bb7f2012-02-21 07:31:51 +0000766 .set_peek_off = unix_set_peek_off,
Kirill Tkhai3c32da12019-12-09 13:03:46 +0300767 .show_fdinfo = unix_show_fdinfo,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700768};
769
Eric Dumazet90ddc4f2005-12-22 12:49:22 -0800770static const struct proto_ops unix_seqpacket_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700771 .family = PF_UNIX,
772 .owner = THIS_MODULE,
773 .release = unix_release,
774 .bind = unix_bind,
775 .connect = unix_stream_connect,
776 .socketpair = unix_socketpair,
777 .accept = unix_accept,
778 .getname = unix_getname,
Linus Torvaldsa11e1d42018-06-28 09:43:44 -0700779 .poll = unix_dgram_poll,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700780 .ioctl = unix_ioctl,
Arnd Bergmann5f6beb92019-06-03 22:03:44 +0200781#ifdef CONFIG_COMPAT
782 .compat_ioctl = unix_compat_ioctl,
783#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700784 .listen = unix_listen,
785 .shutdown = unix_shutdown,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700786 .sendmsg = unix_seqpacket_sendmsg,
Eric W. Biedermana05d2ad2011-04-24 01:54:57 +0000787 .recvmsg = unix_seqpacket_recvmsg,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700788 .mmap = sock_no_mmap,
789 .sendpage = sock_no_sendpage,
Pavel Emelyanovf55bb7f2012-02-21 07:31:51 +0000790 .set_peek_off = unix_set_peek_off,
Kirill Tkhai3c32da12019-12-09 13:03:46 +0300791 .show_fdinfo = unix_show_fdinfo,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700792};
793
Cong Wangc7272e12021-07-04 12:02:46 -0700794static void unix_close(struct sock *sk, long timeout)
795{
796 /* Nothing to do here, unix socket does not need a ->close().
797 * This is merely for sockmap.
798 */
799}
800
Cong Wangc6382912021-07-04 12:02:47 -0700801struct proto unix_proto = {
Eric Dumazet248969a2008-11-17 00:00:30 -0800802 .name = "UNIX",
803 .owner = THIS_MODULE,
Eric Dumazet248969a2008-11-17 00:00:30 -0800804 .obj_size = sizeof(struct unix_sock),
Cong Wangc7272e12021-07-04 12:02:46 -0700805 .close = unix_close,
Cong Wangc6382912021-07-04 12:02:47 -0700806#ifdef CONFIG_BPF_SYSCALL
807 .psock_update_sk_prot = unix_bpf_update_proto,
808#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700809};
810
Eric W. Biederman11aa9c22015-05-08 21:09:13 -0500811static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700812{
813 struct sock *sk = NULL;
814 struct unix_sock *u;
815
Eric Dumazet518de9b2010-10-26 14:22:44 -0700816 atomic_long_inc(&unix_nr_socks);
817 if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
Linus Torvalds1da177e2005-04-16 15:20:36 -0700818 goto out;
819
Eric W. Biederman11aa9c22015-05-08 21:09:13 -0500820 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, kern);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700821 if (!sk)
822 goto out;
823
Eric Dumazet6eba6a32008-11-16 22:58:44 -0800824 sock_init_data(sock, sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700825
Vladimir Davydov3aa97992016-07-26 15:24:36 -0700826 sk->sk_allocation = GFP_KERNEL_ACCOUNT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700827 sk->sk_write_space = unix_write_space;
Denis V. Luneva0a53c82007-12-11 04:19:17 -0800828 sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700829 sk->sk_destruct = unix_sock_destructor;
830 u = unix_sk(sk);
Al Viro40ffe672012-03-14 21:54:32 -0400831 u->path.dentry = NULL;
832 u->path.mnt = NULL;
Benjamin LaHaisefd19f322006-01-03 14:10:46 -0800833 spin_lock_init(&u->lock);
Al Viro516e0cc2008-07-26 00:39:17 -0400834 atomic_long_set(&u->inflight, 0);
Miklos Szeredi1fd05ba2007-07-11 14:22:39 -0700835 INIT_LIST_HEAD(&u->link);
Linus Torvalds6e1ce3c2016-09-01 14:43:53 -0700836 mutex_init(&u->iolock); /* single task reading lock */
837 mutex_init(&u->bindlock); /* single task binding lock */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700838 init_waitqueue_head(&u->peer_wait);
Rainer Weikusat7d267272015-11-20 22:07:23 +0000839 init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
Kirill Tkhai3c32da12019-12-09 13:03:46 +0300840 memset(&u->scm_stat, 0, sizeof(struct scm_stat));
Eric Dumazet7123aaa2012-06-08 05:03:21 +0000841 unix_insert_socket(unix_sockets_unbound(sk), sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700842out:
Pavel Emelyanov284b3272007-11-10 22:08:30 -0800843 if (sk == NULL)
Eric Dumazet518de9b2010-10-26 14:22:44 -0700844 atomic_long_dec(&unix_nr_socks);
Eric Dumazet920de802008-11-24 00:09:29 -0800845 else {
846 local_bh_disable();
Eric Dumazeta8076d82008-11-17 02:38:49 -0800847 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
Eric Dumazet920de802008-11-24 00:09:29 -0800848 local_bh_enable();
849 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700850 return sk;
851}
852
Eric Paris3f378b62009-11-05 22:18:14 -0800853static int unix_create(struct net *net, struct socket *sock, int protocol,
854 int kern)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700855{
856 if (protocol && protocol != PF_UNIX)
857 return -EPROTONOSUPPORT;
858
859 sock->state = SS_UNCONNECTED;
860
861 switch (sock->type) {
862 case SOCK_STREAM:
863 sock->ops = &unix_stream_ops;
864 break;
865 /*
866 * Believe it or not BSD has AF_UNIX, SOCK_RAW though
867 * nothing uses it.
868 */
869 case SOCK_RAW:
Jianjun Konge27dfce2008-11-01 21:38:31 -0700870 sock->type = SOCK_DGRAM;
Gustavo A. R. Silvadf561f662020-08-23 17:36:59 -0500871 fallthrough;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700872 case SOCK_DGRAM:
873 sock->ops = &unix_dgram_ops;
874 break;
875 case SOCK_SEQPACKET:
876 sock->ops = &unix_seqpacket_ops;
877 break;
878 default:
879 return -ESOCKTNOSUPPORT;
880 }
881
Eric W. Biederman11aa9c22015-05-08 21:09:13 -0500882 return unix_create1(net, sock, kern) ? 0 : -ENOMEM;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700883}
884
885static int unix_release(struct socket *sock)
886{
887 struct sock *sk = sock->sk;
888
889 if (!sk)
890 return 0;
891
Cong Wangc7272e12021-07-04 12:02:46 -0700892 sk->sk_prot->close(sk, 0);
Paul Mooreded34e02013-03-25 03:18:33 +0000893 unix_release_sock(sk, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700894 sock->sk = NULL;
895
Paul Mooreded34e02013-03-25 03:18:33 +0000896 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700897}
898
899static int unix_autobind(struct socket *sock)
900{
901 struct sock *sk = sock->sk;
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +0900902 struct net *net = sock_net(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700903 struct unix_sock *u = unix_sk(sk);
904 static u32 ordernum = 1;
Eric Dumazet6eba6a32008-11-16 22:58:44 -0800905 struct unix_address *addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700906 int err;
Tetsuo Handa8df73ff2010-09-04 01:34:28 +0000907 unsigned int retries = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700908
Linus Torvalds6e1ce3c2016-09-01 14:43:53 -0700909 err = mutex_lock_interruptible(&u->bindlock);
Sasha Levin37ab4fa2013-12-13 10:54:22 -0500910 if (err)
911 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700912
Linus Torvalds1da177e2005-04-16 15:20:36 -0700913 if (u->addr)
914 goto out;
915
916 err = -ENOMEM;
Panagiotis Issaris0da974f2006-07-21 14:51:30 -0700917 addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700918 if (!addr)
919 goto out;
920
Linus Torvalds1da177e2005-04-16 15:20:36 -0700921 addr->name->sun_family = AF_UNIX;
Reshetova, Elena8c9814b2017-06-30 13:08:05 +0300922 refcount_set(&addr->refcnt, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700923
924retry:
925 addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
Joe Perches07f07572008-11-19 15:44:53 -0800926 addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
Al Virobe752282021-06-19 03:50:33 +0000927 addr->hash ^= sk->sk_type;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700928
David S. Millerfbe9cc42005-12-13 23:26:29 -0800929 spin_lock(&unix_table_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700930 ordernum = (ordernum+1)&0xFFFFF;
931
Al Virobe752282021-06-19 03:50:33 +0000932 if (__unix_find_socket_byname(net, addr->name, addr->len, addr->hash)) {
David S. Millerfbe9cc42005-12-13 23:26:29 -0800933 spin_unlock(&unix_table_lock);
Tetsuo Handa8df73ff2010-09-04 01:34:28 +0000934 /*
935 * __unix_find_socket_byname() may take long time if many names
936 * are already in use.
937 */
938 cond_resched();
939 /* Give up if all names seems to be in use. */
940 if (retries++ == 0xFFFFF) {
941 err = -ENOSPC;
942 kfree(addr);
943 goto out;
944 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700945 goto retry;
946 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700947
Al Viro185ab882021-06-19 03:50:26 +0000948 __unix_set_addr(sk, addr, addr->hash);
David S. Millerfbe9cc42005-12-13 23:26:29 -0800949 spin_unlock(&unix_table_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700950 err = 0;
951
Linus Torvalds6e1ce3c2016-09-01 14:43:53 -0700952out: mutex_unlock(&u->bindlock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700953 return err;
954}
955
Denis V. Lunev097e66c2007-11-19 22:29:30 -0800956static struct sock *unix_find_other(struct net *net,
957 struct sockaddr_un *sunname, int len,
Eric Dumazet95c96172012-04-15 05:58:06 +0000958 int type, unsigned int hash, int *error)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700959{
960 struct sock *u;
Al Viro421748e2008-08-02 01:04:36 -0400961 struct path path;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700962 int err = 0;
YOSHIFUJI Hideakiac7bfa62007-02-09 23:25:23 +0900963
Linus Torvalds1da177e2005-04-16 15:20:36 -0700964 if (sunname->sun_path[0]) {
Al Viro421748e2008-08-02 01:04:36 -0400965 struct inode *inode;
966 err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700967 if (err)
968 goto fail;
Miklos Szeredibeef5122016-12-16 11:02:53 +0100969 inode = d_backing_inode(path.dentry);
Christian Brauner02f92b32021-01-21 14:19:22 +0100970 err = path_permission(&path, MAY_WRITE);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700971 if (err)
972 goto put_fail;
973
974 err = -ECONNREFUSED;
Al Viro421748e2008-08-02 01:04:36 -0400975 if (!S_ISSOCK(inode->i_mode))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700976 goto put_fail;
Eric W. Biederman6616f782010-06-13 03:35:48 +0000977 u = unix_find_socket_byinode(inode);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700978 if (!u)
979 goto put_fail;
980
981 if (u->sk_type == type)
Al Viro68ac1232012-03-15 08:21:57 -0400982 touch_atime(&path);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700983
Al Viro421748e2008-08-02 01:04:36 -0400984 path_put(&path);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700985
Jianjun Konge27dfce2008-11-01 21:38:31 -0700986 err = -EPROTOTYPE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700987 if (u->sk_type != type) {
988 sock_put(u);
989 goto fail;
990 }
991 } else {
992 err = -ECONNREFUSED;
Al Virobe752282021-06-19 03:50:33 +0000993 u = unix_find_socket_byname(net, sunname, len, type ^ hash);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700994 if (u) {
995 struct dentry *dentry;
Al Viro40ffe672012-03-14 21:54:32 -0400996 dentry = unix_sk(u)->path.dentry;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700997 if (dentry)
Al Viro68ac1232012-03-15 08:21:57 -0400998 touch_atime(&unix_sk(u)->path);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700999 } else
1000 goto fail;
1001 }
1002 return u;
1003
1004put_fail:
Al Viro421748e2008-08-02 01:04:36 -04001005 path_put(&path);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001006fail:
Jianjun Konge27dfce2008-11-01 21:38:31 -07001007 *error = err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001008 return NULL;
1009}
1010
Al Viro71e6be62021-06-19 03:50:30 +00001011static int unix_bind_bsd(struct sock *sk, struct unix_address *addr)
Al Virofaf02012012-07-20 02:37:29 +04001012{
Al Viro71e6be62021-06-19 03:50:30 +00001013 struct unix_sock *u = unix_sk(sk);
1014 umode_t mode = S_IFSOCK |
1015 (SOCK_INODE(sk->sk_socket)->i_mode & ~current_umask());
Al Viro71e6be62021-06-19 03:50:30 +00001016 struct user_namespace *ns; // barf...
Al Viro56c17312021-06-19 03:50:31 +00001017 struct path parent;
Linus Torvalds38f7bd942016-09-01 14:56:49 -07001018 struct dentry *dentry;
Al Viro71e6be62021-06-19 03:50:30 +00001019 unsigned int hash;
1020 int err;
1021
Linus Torvalds38f7bd942016-09-01 14:56:49 -07001022 /*
1023 * Get the parent directory, calculate the hash for last
1024 * component.
1025 */
Al Viro71e6be62021-06-19 03:50:30 +00001026 dentry = kern_path_create(AT_FDCWD, addr->name->sun_path, &parent, 0);
Linus Torvalds38f7bd942016-09-01 14:56:49 -07001027 if (IS_ERR(dentry))
Al Viro71e6be62021-06-19 03:50:30 +00001028 return PTR_ERR(dentry);
1029 ns = mnt_user_ns(parent.mnt);
Al Virofaf02012012-07-20 02:37:29 +04001030
Linus Torvalds38f7bd942016-09-01 14:56:49 -07001031 /*
1032 * All right, let's create it.
1033 */
Al Viro71e6be62021-06-19 03:50:30 +00001034 err = security_path_mknod(&parent, dentry, mode, 0);
Al Viro56c17312021-06-19 03:50:31 +00001035 if (!err)
Al Viro71e6be62021-06-19 03:50:30 +00001036 err = vfs_mknod(ns, d_inode(parent.dentry), dentry, mode, 0);
Al Viroc0c3b8d2021-06-19 03:50:32 +00001037 if (err)
1038 goto out;
Al Virofa42d912021-06-19 03:50:29 +00001039 err = mutex_lock_interruptible(&u->bindlock);
Al Viroc0c3b8d2021-06-19 03:50:32 +00001040 if (err)
1041 goto out_unlink;
1042 if (u->addr)
1043 goto out_unlock;
Al Virofa42d912021-06-19 03:50:29 +00001044
1045 addr->hash = UNIX_HASH_SIZE;
Al Viro56c17312021-06-19 03:50:31 +00001046 hash = d_backing_inode(dentry)->i_ino & (UNIX_HASH_SIZE - 1);
Al Virofa42d912021-06-19 03:50:29 +00001047 spin_lock(&unix_table_lock);
Al Viro56c17312021-06-19 03:50:31 +00001048 u->path.mnt = mntget(parent.mnt);
1049 u->path.dentry = dget(dentry);
Al Virofa42d912021-06-19 03:50:29 +00001050 __unix_set_addr(sk, addr, hash);
1051 spin_unlock(&unix_table_lock);
1052 mutex_unlock(&u->bindlock);
Al Viro56c17312021-06-19 03:50:31 +00001053 done_path_create(&parent, dentry);
Al Virofa42d912021-06-19 03:50:29 +00001054 return 0;
Al Viroc0c3b8d2021-06-19 03:50:32 +00001055
1056out_unlock:
1057 mutex_unlock(&u->bindlock);
1058 err = -EINVAL;
1059out_unlink:
1060 /* failed after successful mknod? unlink what we'd created... */
1061 vfs_unlink(ns, d_inode(parent.dentry), dentry, NULL);
1062out:
1063 done_path_create(&parent, dentry);
1064 return err;
Al Virofa42d912021-06-19 03:50:29 +00001065}
1066
Al Virobe752282021-06-19 03:50:33 +00001067static int unix_bind_abstract(struct sock *sk, struct unix_address *addr)
Al Virofa42d912021-06-19 03:50:29 +00001068{
1069 struct unix_sock *u = unix_sk(sk);
1070 int err;
1071
1072 err = mutex_lock_interruptible(&u->bindlock);
1073 if (err)
1074 return err;
1075
1076 if (u->addr) {
1077 mutex_unlock(&u->bindlock);
1078 return -EINVAL;
1079 }
1080
1081 spin_lock(&unix_table_lock);
1082 if (__unix_find_socket_byname(sock_net(sk), addr->name, addr->len,
Al Virobe752282021-06-19 03:50:33 +00001083 addr->hash)) {
Al Virofa42d912021-06-19 03:50:29 +00001084 spin_unlock(&unix_table_lock);
1085 mutex_unlock(&u->bindlock);
1086 return -EADDRINUSE;
1087 }
1088 __unix_set_addr(sk, addr, addr->hash);
1089 spin_unlock(&unix_table_lock);
1090 mutex_unlock(&u->bindlock);
1091 return 0;
1092}
1093
Linus Torvalds1da177e2005-04-16 15:20:36 -07001094static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
1095{
1096 struct sock *sk = sock->sk;
Jianjun Konge27dfce2008-11-01 21:38:31 -07001097 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
Al Virodae6ad82011-06-26 11:50:15 -04001098 char *sun_path = sunaddr->sun_path;
Linus Torvalds38f7bd942016-09-01 14:56:49 -07001099 int err;
Eric Dumazet95c96172012-04-15 05:58:06 +00001100 unsigned int hash;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001101 struct unix_address *addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001102
Mateusz Jurczykdefbcf22017-06-08 11:13:36 +02001103 if (addr_len < offsetofend(struct sockaddr_un, sun_family) ||
1104 sunaddr->sun_family != AF_UNIX)
Al Virofa42d912021-06-19 03:50:29 +00001105 return -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001106
Al Virofa42d912021-06-19 03:50:29 +00001107 if (addr_len == sizeof(short))
1108 return unix_autobind(sock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001109
1110 err = unix_mkname(sunaddr, addr_len, &hash);
1111 if (err < 0)
Al Virofa42d912021-06-19 03:50:29 +00001112 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001113 addr_len = err;
Al Viroc34d4582021-06-19 03:50:27 +00001114 addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
1115 if (!addr)
Al Virofa42d912021-06-19 03:50:29 +00001116 return -ENOMEM;
Al Viroc34d4582021-06-19 03:50:27 +00001117
1118 memcpy(addr->name, sunaddr, addr_len);
1119 addr->len = addr_len;
1120 addr->hash = hash ^ sk->sk_type;
1121 refcount_set(&addr->refcnt, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001122
Al Virofa42d912021-06-19 03:50:29 +00001123 if (sun_path[0])
1124 err = unix_bind_bsd(sk, addr);
1125 else
Al Virobe752282021-06-19 03:50:33 +00001126 err = unix_bind_abstract(sk, addr);
Al Virofa42d912021-06-19 03:50:29 +00001127 if (err)
Al Viroc34d4582021-06-19 03:50:27 +00001128 unix_release_addr(addr);
Al Virofa42d912021-06-19 03:50:29 +00001129 return err == -EEXIST ? -EADDRINUSE : err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001130}
1131
David S. Miller278a3de2007-05-31 15:19:20 -07001132static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1133{
1134 if (unlikely(sk1 == sk2) || !sk2) {
1135 unix_state_lock(sk1);
1136 return;
1137 }
1138 if (sk1 < sk2) {
1139 unix_state_lock(sk1);
1140 unix_state_lock_nested(sk2);
1141 } else {
1142 unix_state_lock(sk2);
1143 unix_state_lock_nested(sk1);
1144 }
1145}
1146
1147static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1148{
1149 if (unlikely(sk1 == sk2) || !sk2) {
1150 unix_state_unlock(sk1);
1151 return;
1152 }
1153 unix_state_unlock(sk1);
1154 unix_state_unlock(sk2);
1155}
1156
Linus Torvalds1da177e2005-04-16 15:20:36 -07001157static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1158 int alen, int flags)
1159{
1160 struct sock *sk = sock->sk;
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001161 struct net *net = sock_net(sk);
Jianjun Konge27dfce2008-11-01 21:38:31 -07001162 struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001163 struct sock *other;
Eric Dumazet95c96172012-04-15 05:58:06 +00001164 unsigned int hash;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001165 int err;
1166
Mateusz Jurczykdefbcf22017-06-08 11:13:36 +02001167 err = -EINVAL;
1168 if (alen < offsetofend(struct sockaddr, sa_family))
1169 goto out;
1170
Linus Torvalds1da177e2005-04-16 15:20:36 -07001171 if (addr->sa_family != AF_UNSPEC) {
1172 err = unix_mkname(sunaddr, alen, &hash);
1173 if (err < 0)
1174 goto out;
1175 alen = err;
1176
1177 if (test_bit(SOCK_PASSCRED, &sock->flags) &&
1178 !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
1179 goto out;
1180
David S. Miller278a3de2007-05-31 15:19:20 -07001181restart:
Jianjun Konge27dfce2008-11-01 21:38:31 -07001182 other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001183 if (!other)
1184 goto out;
1185
David S. Miller278a3de2007-05-31 15:19:20 -07001186 unix_state_double_lock(sk, other);
1187
1188 /* Apparently VFS overslept socket death. Retry. */
1189 if (sock_flag(other, SOCK_DEAD)) {
1190 unix_state_double_unlock(sk, other);
1191 sock_put(other);
1192 goto restart;
1193 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001194
1195 err = -EPERM;
1196 if (!unix_may_send(sk, other))
1197 goto out_unlock;
1198
1199 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1200 if (err)
1201 goto out_unlock;
1202
1203 } else {
1204 /*
1205 * 1003.1g breaking connected state with AF_UNSPEC
1206 */
1207 other = NULL;
David S. Miller278a3de2007-05-31 15:19:20 -07001208 unix_state_double_lock(sk, other);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001209 }
1210
1211 /*
1212 * If it was connected, reconnect.
1213 */
1214 if (unix_peer(sk)) {
1215 struct sock *old_peer = unix_peer(sk);
Jianjun Konge27dfce2008-11-01 21:38:31 -07001216 unix_peer(sk) = other;
Rainer Weikusat7d267272015-11-20 22:07:23 +00001217 unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1218
David S. Miller278a3de2007-05-31 15:19:20 -07001219 unix_state_double_unlock(sk, other);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001220
1221 if (other != old_peer)
1222 unix_dgram_disconnected(sk, old_peer);
1223 sock_put(old_peer);
1224 } else {
Jianjun Konge27dfce2008-11-01 21:38:31 -07001225 unix_peer(sk) = other;
David S. Miller278a3de2007-05-31 15:19:20 -07001226 unix_state_double_unlock(sk, other);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001227 }
Cong Wang83301b52021-07-04 12:02:45 -07001228
1229 if (unix_peer(sk))
1230 sk->sk_state = other->sk_state = TCP_ESTABLISHED;
YOSHIFUJI Hideakiac7bfa62007-02-09 23:25:23 +09001231 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001232
1233out_unlock:
David S. Miller278a3de2007-05-31 15:19:20 -07001234 unix_state_double_unlock(sk, other);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001235 sock_put(other);
1236out:
1237 return err;
1238}
1239
1240static long unix_wait_for_peer(struct sock *other, long timeo)
Jules Irenge48851e92020-02-23 23:16:56 +00001241 __releases(&unix_sk(other)->lock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001242{
1243 struct unix_sock *u = unix_sk(other);
1244 int sched;
1245 DEFINE_WAIT(wait);
1246
1247 prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1248
1249 sched = !sock_flag(other, SOCK_DEAD) &&
1250 !(other->sk_shutdown & RCV_SHUTDOWN) &&
Rainer Weikusat3c734192008-06-17 22:28:05 -07001251 unix_recvq_full(other);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001252
David S. Miller1c92b4e2007-05-31 13:24:26 -07001253 unix_state_unlock(other);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001254
1255 if (sched)
1256 timeo = schedule_timeout(timeo);
1257
1258 finish_wait(&u->peer_wait, &wait);
1259 return timeo;
1260}
1261
1262static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1263 int addr_len, int flags)
1264{
Jianjun Konge27dfce2008-11-01 21:38:31 -07001265 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001266 struct sock *sk = sock->sk;
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001267 struct net *net = sock_net(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001268 struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1269 struct sock *newsk = NULL;
1270 struct sock *other = NULL;
1271 struct sk_buff *skb = NULL;
Eric Dumazet95c96172012-04-15 05:58:06 +00001272 unsigned int hash;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001273 int st;
1274 int err;
1275 long timeo;
1276
1277 err = unix_mkname(sunaddr, addr_len, &hash);
1278 if (err < 0)
1279 goto out;
1280 addr_len = err;
1281
Joe Perchesf64f9e72009-11-29 16:55:45 -08001282 if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1283 (err = unix_autobind(sock)) != 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001284 goto out;
1285
1286 timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1287
1288 /* First of all allocate resources.
1289 If we will make it after state is locked,
1290 we will have to recheck all again in any case.
1291 */
1292
1293 err = -ENOMEM;
1294
1295 /* create new sock for complete connection */
Eric W. Biederman11aa9c22015-05-08 21:09:13 -05001296 newsk = unix_create1(sock_net(sk), NULL, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001297 if (newsk == NULL)
1298 goto out;
1299
1300 /* Allocate skb for sending to listening sock */
1301 skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1302 if (skb == NULL)
1303 goto out;
1304
1305restart:
1306 /* Find listening sock. */
Denis V. Lunev097e66c2007-11-19 22:29:30 -08001307 other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001308 if (!other)
1309 goto out;
1310
1311 /* Latch state of peer */
David S. Miller1c92b4e2007-05-31 13:24:26 -07001312 unix_state_lock(other);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001313
1314 /* Apparently VFS overslept socket death. Retry. */
1315 if (sock_flag(other, SOCK_DEAD)) {
David S. Miller1c92b4e2007-05-31 13:24:26 -07001316 unix_state_unlock(other);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001317 sock_put(other);
1318 goto restart;
1319 }
1320
1321 err = -ECONNREFUSED;
1322 if (other->sk_state != TCP_LISTEN)
1323 goto out_unlock;
Tomoki Sekiyama77238f22009-10-18 23:17:37 -07001324 if (other->sk_shutdown & RCV_SHUTDOWN)
1325 goto out_unlock;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001326
Rainer Weikusat3c734192008-06-17 22:28:05 -07001327 if (unix_recvq_full(other)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001328 err = -EAGAIN;
1329 if (!timeo)
1330 goto out_unlock;
1331
1332 timeo = unix_wait_for_peer(other, timeo);
1333
1334 err = sock_intr_errno(timeo);
1335 if (signal_pending(current))
1336 goto out;
1337 sock_put(other);
1338 goto restart;
YOSHIFUJI Hideakiac7bfa62007-02-09 23:25:23 +09001339 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001340
1341 /* Latch our state.
1342
Daniel Balutae5537bf2011-03-14 15:25:33 -07001343 It is tricky place. We need to grab our state lock and cannot
Linus Torvalds1da177e2005-04-16 15:20:36 -07001344 drop lock on peer. It is dangerous because deadlock is
1345 possible. Connect to self case and simultaneous
1346 attempt to connect are eliminated by checking socket
1347 state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1348 check this before attempt to grab lock.
1349
1350 Well, and we have to recheck the state after socket locked.
1351 */
1352 st = sk->sk_state;
1353
1354 switch (st) {
1355 case TCP_CLOSE:
1356 /* This is ok... continue with connect */
1357 break;
1358 case TCP_ESTABLISHED:
1359 /* Socket is already connected */
1360 err = -EISCONN;
1361 goto out_unlock;
1362 default:
1363 err = -EINVAL;
1364 goto out_unlock;
1365 }
1366
David S. Miller1c92b4e2007-05-31 13:24:26 -07001367 unix_state_lock_nested(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001368
1369 if (sk->sk_state != st) {
David S. Miller1c92b4e2007-05-31 13:24:26 -07001370 unix_state_unlock(sk);
1371 unix_state_unlock(other);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001372 sock_put(other);
1373 goto restart;
1374 }
1375
David S. Miller3610cda2011-01-05 15:38:53 -08001376 err = security_unix_stream_connect(sk, other, newsk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001377 if (err) {
David S. Miller1c92b4e2007-05-31 13:24:26 -07001378 unix_state_unlock(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001379 goto out_unlock;
1380 }
1381
1382 /* The way is open! Fastly set all the necessary fields... */
1383
1384 sock_hold(sk);
1385 unix_peer(newsk) = sk;
1386 newsk->sk_state = TCP_ESTABLISHED;
1387 newsk->sk_type = sk->sk_type;
Eric W. Biederman109f6e32010-06-13 03:30:14 +00001388 init_peercred(newsk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001389 newu = unix_sk(newsk);
Eric Dumazeteaefd1102011-02-18 03:26:36 +00001390 RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001391 otheru = unix_sk(other);
1392
Al Viroae3b5642019-02-15 20:09:35 +00001393 /* copy address information from listening to new sock
1394 *
1395 * The contents of *(otheru->addr) and otheru->path
1396 * are seen fully set up here, since we have found
1397 * otheru in hash under unix_table_lock. Insertion
1398 * into the hash chain we'd found it in had been done
1399 * in an earlier critical area protected by unix_table_lock,
1400 * the same one where we'd set *(otheru->addr) contents,
1401 * as well as otheru->path and otheru->addr itself.
1402 *
1403 * Using smp_store_release() here to set newu->addr
1404 * is enough to make those stores, as well as stores
1405 * to newu->path visible to anyone who gets newu->addr
1406 * by smp_load_acquire(). IOW, the same warranties
1407 * as for unix_sock instances bound in unix_bind() or
1408 * in unix_autobind().
1409 */
Al Viro40ffe672012-03-14 21:54:32 -04001410 if (otheru->path.dentry) {
1411 path_get(&otheru->path);
1412 newu->path = otheru->path;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001413 }
Al Viroae3b5642019-02-15 20:09:35 +00001414 refcount_inc(&otheru->addr->refcnt);
1415 smp_store_release(&newu->addr, otheru->addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001416
1417 /* Set credentials */
Eric W. Biederman109f6e32010-06-13 03:30:14 +00001418 copy_peercred(sk, other);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001419
Linus Torvalds1da177e2005-04-16 15:20:36 -07001420 sock->state = SS_CONNECTED;
1421 sk->sk_state = TCP_ESTABLISHED;
Benjamin LaHaise830a1e52005-12-13 23:22:32 -08001422 sock_hold(newsk);
1423
Peter Zijlstra4e857c52014-03-17 18:06:10 +01001424 smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */
Benjamin LaHaise830a1e52005-12-13 23:22:32 -08001425 unix_peer(sk) = newsk;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001426
David S. Miller1c92b4e2007-05-31 13:24:26 -07001427 unix_state_unlock(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001428
gushengxian4e03d072021-06-09 20:09:35 -07001429 /* take ten and send info to listening sock */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001430 spin_lock(&other->sk_receive_queue.lock);
1431 __skb_queue_tail(&other->sk_receive_queue, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001432 spin_unlock(&other->sk_receive_queue.lock);
David S. Miller1c92b4e2007-05-31 13:24:26 -07001433 unix_state_unlock(other);
David S. Miller676d2362014-04-11 16:15:36 -04001434 other->sk_data_ready(other);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001435 sock_put(other);
1436 return 0;
1437
1438out_unlock:
1439 if (other)
David S. Miller1c92b4e2007-05-31 13:24:26 -07001440 unix_state_unlock(other);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001441
1442out:
Wei Yongjun40d44442009-02-25 00:32:45 +00001443 kfree_skb(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001444 if (newsk)
1445 unix_release_sock(newsk, 0);
1446 if (other)
1447 sock_put(other);
1448 return err;
1449}
1450
1451static int unix_socketpair(struct socket *socka, struct socket *sockb)
1452{
Jianjun Konge27dfce2008-11-01 21:38:31 -07001453 struct sock *ska = socka->sk, *skb = sockb->sk;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001454
1455 /* Join our sockets back to back */
1456 sock_hold(ska);
1457 sock_hold(skb);
Jianjun Konge27dfce2008-11-01 21:38:31 -07001458 unix_peer(ska) = skb;
1459 unix_peer(skb) = ska;
Eric W. Biederman109f6e32010-06-13 03:30:14 +00001460 init_peercred(ska);
1461 init_peercred(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001462
Cong Wang83301b52021-07-04 12:02:45 -07001463 ska->sk_state = TCP_ESTABLISHED;
1464 skb->sk_state = TCP_ESTABLISHED;
1465 socka->state = SS_CONNECTED;
1466 sockb->state = SS_CONNECTED;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001467 return 0;
1468}
1469
Daniel Borkmann90c6bd32013-10-17 22:51:31 +02001470static void unix_sock_inherit_flags(const struct socket *old,
1471 struct socket *new)
1472{
1473 if (test_bit(SOCK_PASSCRED, &old->flags))
1474 set_bit(SOCK_PASSCRED, &new->flags);
1475 if (test_bit(SOCK_PASSSEC, &old->flags))
1476 set_bit(SOCK_PASSSEC, &new->flags);
1477}
1478
David Howellscdfbabf2017-03-09 08:09:05 +00001479static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
1480 bool kern)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001481{
1482 struct sock *sk = sock->sk;
1483 struct sock *tsk;
1484 struct sk_buff *skb;
1485 int err;
1486
1487 err = -EOPNOTSUPP;
Eric Dumazet6eba6a32008-11-16 22:58:44 -08001488 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001489 goto out;
1490
1491 err = -EINVAL;
1492 if (sk->sk_state != TCP_LISTEN)
1493 goto out;
1494
1495 /* If socket state is TCP_LISTEN it cannot change (for now...),
1496 * so that no locks are necessary.
1497 */
1498
1499 skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1500 if (!skb) {
1501 /* This means receive shutdown. */
1502 if (err == 0)
1503 err = -EINVAL;
1504 goto out;
1505 }
1506
1507 tsk = skb->sk;
1508 skb_free_datagram(sk, skb);
1509 wake_up_interruptible(&unix_sk(sk)->peer_wait);
1510
1511 /* attach accepted sock to socket */
David S. Miller1c92b4e2007-05-31 13:24:26 -07001512 unix_state_lock(tsk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001513 newsock->state = SS_CONNECTED;
Daniel Borkmann90c6bd32013-10-17 22:51:31 +02001514 unix_sock_inherit_flags(sock, newsock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001515 sock_graft(tsk, newsock);
David S. Miller1c92b4e2007-05-31 13:24:26 -07001516 unix_state_unlock(tsk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001517 return 0;
1518
1519out:
1520 return err;
1521}
1522
1523
Denys Vlasenko9b2c45d2018-02-12 20:00:20 +01001524static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001525{
1526 struct sock *sk = sock->sk;
Al Viroae3b5642019-02-15 20:09:35 +00001527 struct unix_address *addr;
Cyrill Gorcunov13cfa972009-11-08 05:51:19 +00001528 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001529 int err = 0;
1530
1531 if (peer) {
1532 sk = unix_peer_get(sk);
1533
1534 err = -ENOTCONN;
1535 if (!sk)
1536 goto out;
1537 err = 0;
1538 } else {
1539 sock_hold(sk);
1540 }
1541
Al Viroae3b5642019-02-15 20:09:35 +00001542 addr = smp_load_acquire(&unix_sk(sk)->addr);
1543 if (!addr) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001544 sunaddr->sun_family = AF_UNIX;
1545 sunaddr->sun_path[0] = 0;
Denys Vlasenko9b2c45d2018-02-12 20:00:20 +01001546 err = sizeof(short);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001547 } else {
Denys Vlasenko9b2c45d2018-02-12 20:00:20 +01001548 err = addr->len;
1549 memcpy(sunaddr, addr->name, addr->len);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001550 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001551 sock_put(sk);
1552out:
1553 return err;
1554}
1555
Miklos Szeredicbcf0112021-07-28 14:47:20 +02001556static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb)
1557{
1558 scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1559
1560 /*
1561 * Garbage collection of unix sockets starts by selecting a set of
1562 * candidate sockets which have reference only from being in flight
1563 * (total_refs == inflight_refs). This condition is checked once during
1564 * the candidate collection phase, and candidates are marked as such, so
1565 * that non-candidates can later be ignored. While inflight_refs is
1566 * protected by unix_gc_lock, total_refs (file count) is not, hence this
1567 * is an instantaneous decision.
1568 *
1569 * Once a candidate, however, the socket must not be reinstalled into a
1570 * file descriptor while the garbage collection is in progress.
1571 *
1572 * If the above conditions are met, then the directed graph of
1573 * candidates (*) does not change while unix_gc_lock is held.
1574 *
1575 * Any operations that changes the file count through file descriptors
1576 * (dup, close, sendmsg) does not change the graph since candidates are
1577 * not installed in fds.
1578 *
1579 * Dequeing a candidate via recvmsg would install it into an fd, but
1580 * that takes unix_gc_lock to decrement the inflight count, so it's
1581 * serialized with garbage collection.
1582 *
1583 * MSG_PEEK is special in that it does not change the inflight count,
1584 * yet does install the socket into an fd. The following lock/unlock
1585 * pair is to ensure serialization with garbage collection. It must be
1586 * done between incrementing the file count and installing the file into
1587 * an fd.
1588 *
1589 * If garbage collection starts after the barrier provided by the
1590 * lock/unlock, then it will see the elevated refcount and not mark this
1591 * as a candidate. If a garbage collection is already in progress
1592 * before the file count was incremented, then the lock/unlock pair will
1593 * ensure that garbage collection is finished before progressing to
1594 * installing the fd.
1595 *
1596 * (*) A -> B where B is on the queue of A or B is on the queue of C
1597 * which is on the queue of listening socket A.
1598 */
1599 spin_lock(&unix_gc_lock);
1600 spin_unlock(&unix_gc_lock);
1601}
1602
David S. Millerf78a5fd2011-09-16 19:34:00 -04001603static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
Eric W. Biederman7361c362010-06-13 03:34:33 +00001604{
1605 int err = 0;
Eric Dumazet16e57262011-09-19 05:52:27 +00001606
David S. Millerf78a5fd2011-09-16 19:34:00 -04001607 UNIXCB(skb).pid = get_pid(scm->pid);
Eric W. Biederman6b0ee8c02013-04-03 17:28:16 +00001608 UNIXCB(skb).uid = scm->creds.uid;
1609 UNIXCB(skb).gid = scm->creds.gid;
Eric W. Biederman7361c362010-06-13 03:34:33 +00001610 UNIXCB(skb).fp = NULL;
Stephen Smalley37a9a8d2015-06-10 08:44:59 -04001611 unix_get_secdata(scm, skb);
Eric W. Biederman7361c362010-06-13 03:34:33 +00001612 if (scm->fp && send_fds)
1613 err = unix_attach_fds(scm, skb);
1614
1615 skb->destructor = unix_destruct_scm;
1616 return err;
1617}
1618
Hannes Frederic Sowa9490f882015-11-26 12:08:18 +01001619static bool unix_passcred_enabled(const struct socket *sock,
1620 const struct sock *other)
1621{
1622 return test_bit(SOCK_PASSCRED, &sock->flags) ||
1623 !other->sk_socket ||
1624 test_bit(SOCK_PASSCRED, &other->sk_socket->flags);
1625}
1626
Linus Torvalds1da177e2005-04-16 15:20:36 -07001627/*
Eric Dumazet16e57262011-09-19 05:52:27 +00001628 * Some apps rely on write() giving SCM_CREDENTIALS
1629 * We include credentials if source or destination socket
1630 * asserted SOCK_PASSCRED.
1631 */
1632static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1633 const struct sock *other)
1634{
Eric W. Biederman6b0ee8c02013-04-03 17:28:16 +00001635 if (UNIXCB(skb).pid)
Eric Dumazet16e57262011-09-19 05:52:27 +00001636 return;
Hannes Frederic Sowa9490f882015-11-26 12:08:18 +01001637 if (unix_passcred_enabled(sock, other)) {
Eric Dumazet16e57262011-09-19 05:52:27 +00001638 UNIXCB(skb).pid = get_pid(task_tgid(current));
David S. Miller6e0895c2013-04-22 20:32:51 -04001639 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
Eric Dumazet16e57262011-09-19 05:52:27 +00001640 }
1641}
1642
Hannes Frederic Sowa9490f882015-11-26 12:08:18 +01001643static int maybe_init_creds(struct scm_cookie *scm,
1644 struct socket *socket,
1645 const struct sock *other)
1646{
1647 int err;
1648 struct msghdr msg = { .msg_controllen = 0 };
1649
1650 err = scm_send(socket, &msg, scm, false);
1651 if (err)
1652 return err;
1653
1654 if (unix_passcred_enabled(socket, other)) {
1655 scm->pid = get_pid(task_tgid(current));
1656 current_uid_gid(&scm->creds.uid, &scm->creds.gid);
1657 }
1658 return err;
1659}
1660
1661static bool unix_skb_scm_eq(struct sk_buff *skb,
1662 struct scm_cookie *scm)
1663{
1664 const struct unix_skb_parms *u = &UNIXCB(skb);
1665
1666 return u->pid == scm->pid &&
1667 uid_eq(u->uid, scm->creds.uid) &&
1668 gid_eq(u->gid, scm->creds.gid) &&
1669 unix_secdata_eq(scm, skb);
1670}
1671
Kirill Tkhai3c32da12019-12-09 13:03:46 +03001672static void scm_stat_add(struct sock *sk, struct sk_buff *skb)
1673{
1674 struct scm_fp_list *fp = UNIXCB(skb).fp;
1675 struct unix_sock *u = unix_sk(sk);
1676
Kirill Tkhai3c32da12019-12-09 13:03:46 +03001677 if (unlikely(fp && fp->count))
Paolo Abeni77820402020-02-28 14:45:21 +01001678 atomic_add(fp->count, &u->scm_stat.nr_fds);
Kirill Tkhai3c32da12019-12-09 13:03:46 +03001679}
1680
1681static void scm_stat_del(struct sock *sk, struct sk_buff *skb)
1682{
1683 struct scm_fp_list *fp = UNIXCB(skb).fp;
1684 struct unix_sock *u = unix_sk(sk);
1685
Kirill Tkhai3c32da12019-12-09 13:03:46 +03001686 if (unlikely(fp && fp->count))
Paolo Abeni77820402020-02-28 14:45:21 +01001687 atomic_sub(fp->count, &u->scm_stat.nr_fds);
Kirill Tkhai3c32da12019-12-09 13:03:46 +03001688}
1689
Eric Dumazet16e57262011-09-19 05:52:27 +00001690/*
Linus Torvalds1da177e2005-04-16 15:20:36 -07001691 * Send AF_UNIX data.
1692 */
1693
Ying Xue1b784142015-03-02 15:37:48 +08001694static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
1695 size_t len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001696{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001697 struct sock *sk = sock->sk;
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001698 struct net *net = sock_net(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001699 struct unix_sock *u = unix_sk(sk);
Steffen Hurrle342dfc32014-01-17 22:53:15 +01001700 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001701 struct sock *other = NULL;
1702 int namelen = 0; /* fake GCC */
1703 int err;
Eric Dumazet95c96172012-04-15 05:58:06 +00001704 unsigned int hash;
David S. Millerf78a5fd2011-09-16 19:34:00 -04001705 struct sk_buff *skb;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001706 long timeo;
Christoph Hellwig7cc05662015-01-28 18:04:53 +01001707 struct scm_cookie scm;
Eric Dumazeteb6a2482012-04-03 05:28:28 +00001708 int data_len = 0;
Rainer Weikusat7d267272015-11-20 22:07:23 +00001709 int sk_locked;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001710
dann frazier5f23b732008-11-26 15:32:27 -08001711 wait_for_unix_gc();
Christoph Hellwig7cc05662015-01-28 18:04:53 +01001712 err = scm_send(sock, msg, &scm, false);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001713 if (err < 0)
1714 return err;
1715
1716 err = -EOPNOTSUPP;
1717 if (msg->msg_flags&MSG_OOB)
1718 goto out;
1719
1720 if (msg->msg_namelen) {
1721 err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1722 if (err < 0)
1723 goto out;
1724 namelen = err;
1725 } else {
1726 sunaddr = NULL;
1727 err = -ENOTCONN;
1728 other = unix_peer_get(sk);
1729 if (!other)
1730 goto out;
1731 }
1732
Joe Perchesf64f9e72009-11-29 16:55:45 -08001733 if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1734 && (err = unix_autobind(sock)) != 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001735 goto out;
1736
1737 err = -EMSGSIZE;
1738 if (len > sk->sk_sndbuf - 32)
1739 goto out;
1740
Kirill Tkhai31ff6aa2014-05-15 19:56:28 +04001741 if (len > SKB_MAX_ALLOC) {
Eric Dumazeteb6a2482012-04-03 05:28:28 +00001742 data_len = min_t(size_t,
1743 len - SKB_MAX_ALLOC,
1744 MAX_SKB_FRAGS * PAGE_SIZE);
Kirill Tkhai31ff6aa2014-05-15 19:56:28 +04001745 data_len = PAGE_ALIGN(data_len);
1746
1747 BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
1748 }
Eric Dumazeteb6a2482012-04-03 05:28:28 +00001749
1750 skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
Eric Dumazet28d64272013-08-08 14:38:47 -07001751 msg->msg_flags & MSG_DONTWAIT, &err,
1752 PAGE_ALLOC_COSTLY_ORDER);
Jianjun Konge27dfce2008-11-01 21:38:31 -07001753 if (skb == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001754 goto out;
1755
Christoph Hellwig7cc05662015-01-28 18:04:53 +01001756 err = unix_scm_to_skb(&scm, skb, true);
Eric Dumazet25888e32010-11-25 04:11:39 +00001757 if (err < 0)
Eric W. Biederman7361c362010-06-13 03:34:33 +00001758 goto out_free;
Catherine Zhang877ce7c2006-06-29 12:27:47 -07001759
Eric Dumazeteb6a2482012-04-03 05:28:28 +00001760 skb_put(skb, len - data_len);
1761 skb->data_len = data_len;
1762 skb->len = len;
Al Viroc0371da2014-11-24 10:42:55 -05001763 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001764 if (err)
1765 goto out_free;
1766
1767 timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1768
1769restart:
1770 if (!other) {
1771 err = -ECONNRESET;
1772 if (sunaddr == NULL)
1773 goto out_free;
1774
Denis V. Lunev097e66c2007-11-19 22:29:30 -08001775 other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001776 hash, &err);
Jianjun Konge27dfce2008-11-01 21:38:31 -07001777 if (other == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001778 goto out_free;
1779 }
1780
Alban Crequyd6ae3ba2011-01-18 06:39:15 +00001781 if (sk_filter(other, skb) < 0) {
1782 /* Toss the packet but do not return any error to the sender */
1783 err = len;
1784 goto out_free;
1785 }
1786
Rainer Weikusat7d267272015-11-20 22:07:23 +00001787 sk_locked = 0;
David S. Miller1c92b4e2007-05-31 13:24:26 -07001788 unix_state_lock(other);
Rainer Weikusat7d267272015-11-20 22:07:23 +00001789restart_locked:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001790 err = -EPERM;
1791 if (!unix_may_send(sk, other))
1792 goto out_unlock;
1793
Rainer Weikusat7d267272015-11-20 22:07:23 +00001794 if (unlikely(sock_flag(other, SOCK_DEAD))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001795 /*
1796 * Check with 1003.1g - what should
1797 * datagram error
1798 */
David S. Miller1c92b4e2007-05-31 13:24:26 -07001799 unix_state_unlock(other);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001800 sock_put(other);
1801
Rainer Weikusat7d267272015-11-20 22:07:23 +00001802 if (!sk_locked)
1803 unix_state_lock(sk);
1804
Linus Torvalds1da177e2005-04-16 15:20:36 -07001805 err = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001806 if (unix_peer(sk) == other) {
Jianjun Konge27dfce2008-11-01 21:38:31 -07001807 unix_peer(sk) = NULL;
Rainer Weikusat7d267272015-11-20 22:07:23 +00001808 unix_dgram_peer_wake_disconnect_wakeup(sk, other);
1809
David S. Miller1c92b4e2007-05-31 13:24:26 -07001810 unix_state_unlock(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001811
1812 unix_dgram_disconnected(sk, other);
1813 sock_put(other);
1814 err = -ECONNREFUSED;
1815 } else {
David S. Miller1c92b4e2007-05-31 13:24:26 -07001816 unix_state_unlock(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001817 }
1818
1819 other = NULL;
1820 if (err)
1821 goto out_free;
1822 goto restart;
1823 }
1824
1825 err = -EPIPE;
1826 if (other->sk_shutdown & RCV_SHUTDOWN)
1827 goto out_unlock;
1828
1829 if (sk->sk_type != SOCK_SEQPACKET) {
1830 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1831 if (err)
1832 goto out_unlock;
1833 }
1834
Rainer Weikusata5527dd2016-02-11 19:37:27 +00001835 /* other == sk && unix_peer(other) != sk if
1836 * - unix_peer(sk) == NULL, destination address bound to sk
1837 * - unix_peer(sk) == sk by time of get but disconnected before lock
1838 */
1839 if (other != sk &&
Qian Cai86b18aa2020-02-04 13:40:29 -05001840 unlikely(unix_peer(other) != sk &&
1841 unix_recvq_full_lockless(other))) {
Rainer Weikusat7d267272015-11-20 22:07:23 +00001842 if (timeo) {
1843 timeo = unix_wait_for_peer(other, timeo);
1844
1845 err = sock_intr_errno(timeo);
1846 if (signal_pending(current))
1847 goto out_free;
1848
1849 goto restart;
1850 }
1851
1852 if (!sk_locked) {
1853 unix_state_unlock(other);
1854 unix_state_double_lock(sk, other);
1855 }
1856
1857 if (unix_peer(sk) != other ||
1858 unix_dgram_peer_wake_me(sk, other)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001859 err = -EAGAIN;
Rainer Weikusat7d267272015-11-20 22:07:23 +00001860 sk_locked = 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001861 goto out_unlock;
1862 }
1863
Rainer Weikusat7d267272015-11-20 22:07:23 +00001864 if (!sk_locked) {
1865 sk_locked = 1;
1866 goto restart_locked;
1867 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001868 }
1869
Rainer Weikusat7d267272015-11-20 22:07:23 +00001870 if (unlikely(sk_locked))
1871 unix_state_unlock(sk);
1872
Alban Crequy3f661162010-10-04 08:48:28 +00001873 if (sock_flag(other, SOCK_RCVTSTAMP))
1874 __net_timestamp(skb);
Eric Dumazet16e57262011-09-19 05:52:27 +00001875 maybe_add_creds(skb, sock, other);
Kirill Tkhai3c32da12019-12-09 13:03:46 +03001876 scm_stat_add(other, skb);
Paolo Abeni77820402020-02-28 14:45:21 +01001877 skb_queue_tail(&other->sk_receive_queue, skb);
David S. Miller1c92b4e2007-05-31 13:24:26 -07001878 unix_state_unlock(other);
David S. Miller676d2362014-04-11 16:15:36 -04001879 other->sk_data_ready(other);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001880 sock_put(other);
Christoph Hellwig7cc05662015-01-28 18:04:53 +01001881 scm_destroy(&scm);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001882 return len;
1883
1884out_unlock:
Rainer Weikusat7d267272015-11-20 22:07:23 +00001885 if (sk_locked)
1886 unix_state_unlock(sk);
David S. Miller1c92b4e2007-05-31 13:24:26 -07001887 unix_state_unlock(other);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001888out_free:
1889 kfree_skb(skb);
1890out:
1891 if (other)
1892 sock_put(other);
Christoph Hellwig7cc05662015-01-28 18:04:53 +01001893 scm_destroy(&scm);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001894 return err;
1895}
1896
Eric Dumazete370a722013-08-08 14:37:32 -07001897/* We use paged skbs for stream sockets, and limit occupancy to 32768
Tobias Klauserd4e9a402018-02-13 11:11:30 +01001898 * bytes, and a minimum of a full page.
Eric Dumazete370a722013-08-08 14:37:32 -07001899 */
1900#define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
YOSHIFUJI Hideakiac7bfa62007-02-09 23:25:23 +09001901
Rao Shoaib314001f2021-08-01 00:57:07 -07001902#if (IS_ENABLED(CONFIG_AF_UNIX_OOB))
1903static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other)
1904{
1905 struct unix_sock *ousk = unix_sk(other);
1906 struct sk_buff *skb;
1907 int err = 0;
1908
1909 skb = sock_alloc_send_skb(sock->sk, 1, msg->msg_flags & MSG_DONTWAIT, &err);
1910
1911 if (!skb)
1912 return err;
1913
1914 skb_put(skb, 1);
1915 skb->len = 1;
1916 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, 1);
1917
1918 if (err) {
1919 kfree_skb(skb);
1920 return err;
1921 }
1922
1923 unix_state_lock(other);
1924 maybe_add_creds(skb, sock, other);
1925 skb_get(skb);
1926
1927 if (ousk->oob_skb)
1928 kfree_skb(ousk->oob_skb);
1929
1930 ousk->oob_skb = skb;
1931
1932 scm_stat_add(other, skb);
1933 skb_queue_tail(&other->sk_receive_queue, skb);
1934 sk_send_sigurg(other);
1935 unix_state_unlock(other);
1936 other->sk_data_ready(other);
1937
1938 return err;
1939}
1940#endif
1941
Ying Xue1b784142015-03-02 15:37:48 +08001942static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
1943 size_t len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001944{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001945 struct sock *sk = sock->sk;
1946 struct sock *other = NULL;
Eric Dumazet6eba6a32008-11-16 22:58:44 -08001947 int err, size;
David S. Millerf78a5fd2011-09-16 19:34:00 -04001948 struct sk_buff *skb;
Jianjun Konge27dfce2008-11-01 21:38:31 -07001949 int sent = 0;
Christoph Hellwig7cc05662015-01-28 18:04:53 +01001950 struct scm_cookie scm;
Miklos Szeredi8ba69ba2009-09-11 11:31:45 -07001951 bool fds_sent = false;
Eric Dumazete370a722013-08-08 14:37:32 -07001952 int data_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001953
dann frazier5f23b732008-11-26 15:32:27 -08001954 wait_for_unix_gc();
Christoph Hellwig7cc05662015-01-28 18:04:53 +01001955 err = scm_send(sock, msg, &scm, false);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001956 if (err < 0)
1957 return err;
1958
1959 err = -EOPNOTSUPP;
Rao Shoaib314001f2021-08-01 00:57:07 -07001960 if (msg->msg_flags & MSG_OOB) {
1961#if (IS_ENABLED(CONFIG_AF_UNIX_OOB))
1962 if (len)
1963 len--;
1964 else
1965#endif
1966 goto out_err;
1967 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001968
1969 if (msg->msg_namelen) {
1970 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1971 goto out_err;
1972 } else {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001973 err = -ENOTCONN;
Benjamin LaHaise830a1e52005-12-13 23:22:32 -08001974 other = unix_peer(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001975 if (!other)
1976 goto out_err;
1977 }
1978
1979 if (sk->sk_shutdown & SEND_SHUTDOWN)
1980 goto pipe_err;
1981
Eric Dumazet6eba6a32008-11-16 22:58:44 -08001982 while (sent < len) {
Eric Dumazete370a722013-08-08 14:37:32 -07001983 size = len - sent;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001984
1985 /* Keep two messages in the pipe so it schedules better */
Eric Dumazete370a722013-08-08 14:37:32 -07001986 size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001987
Eric Dumazete370a722013-08-08 14:37:32 -07001988 /* allow fallback to order-0 allocations */
1989 size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
YOSHIFUJI Hideakiac7bfa62007-02-09 23:25:23 +09001990
Eric Dumazete370a722013-08-08 14:37:32 -07001991 data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
YOSHIFUJI Hideakiac7bfa62007-02-09 23:25:23 +09001992
Kirill Tkhai31ff6aa2014-05-15 19:56:28 +04001993 data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
1994
Eric Dumazete370a722013-08-08 14:37:32 -07001995 skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
Eric Dumazet28d64272013-08-08 14:38:47 -07001996 msg->msg_flags & MSG_DONTWAIT, &err,
1997 get_order(UNIX_SKB_FRAGS_SZ));
Eric Dumazete370a722013-08-08 14:37:32 -07001998 if (!skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001999 goto out_err;
2000
David S. Millerf78a5fd2011-09-16 19:34:00 -04002001 /* Only send the fds in the first buffer */
Christoph Hellwig7cc05662015-01-28 18:04:53 +01002002 err = unix_scm_to_skb(&scm, skb, !fds_sent);
Eric Dumazet25888e32010-11-25 04:11:39 +00002003 if (err < 0) {
Eric W. Biederman7361c362010-06-13 03:34:33 +00002004 kfree_skb(skb);
David S. Millerf78a5fd2011-09-16 19:34:00 -04002005 goto out_err;
Miklos Szeredi62093442008-11-09 15:23:57 +01002006 }
Eric W. Biederman7361c362010-06-13 03:34:33 +00002007 fds_sent = true;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002008
Eric Dumazete370a722013-08-08 14:37:32 -07002009 skb_put(skb, size - data_len);
2010 skb->data_len = data_len;
2011 skb->len = size;
Al Viroc0371da2014-11-24 10:42:55 -05002012 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
Eric Dumazet6eba6a32008-11-16 22:58:44 -08002013 if (err) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002014 kfree_skb(skb);
David S. Millerf78a5fd2011-09-16 19:34:00 -04002015 goto out_err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002016 }
2017
David S. Miller1c92b4e2007-05-31 13:24:26 -07002018 unix_state_lock(other);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002019
2020 if (sock_flag(other, SOCK_DEAD) ||
2021 (other->sk_shutdown & RCV_SHUTDOWN))
2022 goto pipe_err_free;
2023
Eric Dumazet16e57262011-09-19 05:52:27 +00002024 maybe_add_creds(skb, sock, other);
Kirill Tkhai3c32da12019-12-09 13:03:46 +03002025 scm_stat_add(other, skb);
Paolo Abeni77820402020-02-28 14:45:21 +01002026 skb_queue_tail(&other->sk_receive_queue, skb);
David S. Miller1c92b4e2007-05-31 13:24:26 -07002027 unix_state_unlock(other);
David S. Miller676d2362014-04-11 16:15:36 -04002028 other->sk_data_ready(other);
Jianjun Konge27dfce2008-11-01 21:38:31 -07002029 sent += size;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002030 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002031
Rao Shoaib314001f2021-08-01 00:57:07 -07002032#if (IS_ENABLED(CONFIG_AF_UNIX_OOB))
2033 if (msg->msg_flags & MSG_OOB) {
2034 err = queue_oob(sock, msg, other);
2035 if (err)
2036 goto out_err;
2037 sent++;
2038 }
2039#endif
2040
Christoph Hellwig7cc05662015-01-28 18:04:53 +01002041 scm_destroy(&scm);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002042
2043 return sent;
2044
2045pipe_err_free:
David S. Miller1c92b4e2007-05-31 13:24:26 -07002046 unix_state_unlock(other);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002047 kfree_skb(skb);
2048pipe_err:
Eric Dumazet6eba6a32008-11-16 22:58:44 -08002049 if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
2050 send_sig(SIGPIPE, current, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002051 err = -EPIPE;
2052out_err:
Christoph Hellwig7cc05662015-01-28 18:04:53 +01002053 scm_destroy(&scm);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002054 return sent ? : err;
2055}
2056
Hannes Frederic Sowa869e7c62015-05-21 16:59:59 +02002057static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
2058 int offset, size_t size, int flags)
2059{
Hannes Frederic Sowa9490f882015-11-26 12:08:18 +01002060 int err;
2061 bool send_sigpipe = false;
2062 bool init_scm = true;
2063 struct scm_cookie scm;
Hannes Frederic Sowa869e7c62015-05-21 16:59:59 +02002064 struct sock *other, *sk = socket->sk;
2065 struct sk_buff *skb, *newskb = NULL, *tail = NULL;
2066
2067 if (flags & MSG_OOB)
2068 return -EOPNOTSUPP;
2069
2070 other = unix_peer(sk);
2071 if (!other || sk->sk_state != TCP_ESTABLISHED)
2072 return -ENOTCONN;
2073
2074 if (false) {
2075alloc_skb:
2076 unix_state_unlock(other);
Linus Torvalds6e1ce3c2016-09-01 14:43:53 -07002077 mutex_unlock(&unix_sk(other)->iolock);
Hannes Frederic Sowa869e7c62015-05-21 16:59:59 +02002078 newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
2079 &err, 0);
2080 if (!newskb)
Hannes Frederic Sowa9490f882015-11-26 12:08:18 +01002081 goto err;
Hannes Frederic Sowa869e7c62015-05-21 16:59:59 +02002082 }
2083
Linus Torvalds6e1ce3c2016-09-01 14:43:53 -07002084 /* we must acquire iolock as we modify already present
Hannes Frederic Sowa869e7c62015-05-21 16:59:59 +02002085 * skbs in the sk_receive_queue and mess with skb->len
2086 */
Linus Torvalds6e1ce3c2016-09-01 14:43:53 -07002087 err = mutex_lock_interruptible(&unix_sk(other)->iolock);
Hannes Frederic Sowa869e7c62015-05-21 16:59:59 +02002088 if (err) {
2089 err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
Hannes Frederic Sowa869e7c62015-05-21 16:59:59 +02002090 goto err;
2091 }
2092
2093 if (sk->sk_shutdown & SEND_SHUTDOWN) {
2094 err = -EPIPE;
Hannes Frederic Sowa9490f882015-11-26 12:08:18 +01002095 send_sigpipe = true;
Hannes Frederic Sowa869e7c62015-05-21 16:59:59 +02002096 goto err_unlock;
2097 }
2098
2099 unix_state_lock(other);
2100
2101 if (sock_flag(other, SOCK_DEAD) ||
2102 other->sk_shutdown & RCV_SHUTDOWN) {
2103 err = -EPIPE;
Hannes Frederic Sowa9490f882015-11-26 12:08:18 +01002104 send_sigpipe = true;
Hannes Frederic Sowa869e7c62015-05-21 16:59:59 +02002105 goto err_state_unlock;
2106 }
2107
Hannes Frederic Sowa9490f882015-11-26 12:08:18 +01002108 if (init_scm) {
2109 err = maybe_init_creds(&scm, socket, other);
2110 if (err)
2111 goto err_state_unlock;
2112 init_scm = false;
2113 }
2114
Hannes Frederic Sowa869e7c62015-05-21 16:59:59 +02002115 skb = skb_peek_tail(&other->sk_receive_queue);
2116 if (tail && tail == skb) {
2117 skb = newskb;
Hannes Frederic Sowa9490f882015-11-26 12:08:18 +01002118 } else if (!skb || !unix_skb_scm_eq(skb, &scm)) {
2119 if (newskb) {
Hannes Frederic Sowa869e7c62015-05-21 16:59:59 +02002120 skb = newskb;
Hannes Frederic Sowa9490f882015-11-26 12:08:18 +01002121 } else {
2122 tail = skb;
Hannes Frederic Sowa869e7c62015-05-21 16:59:59 +02002123 goto alloc_skb;
Hannes Frederic Sowa9490f882015-11-26 12:08:18 +01002124 }
Hannes Frederic Sowa869e7c62015-05-21 16:59:59 +02002125 } else if (newskb) {
2126 /* this is fast path, we don't necessarily need to
2127 * call to kfree_skb even though with newskb == NULL
2128 * this - does no harm
2129 */
2130 consume_skb(newskb);
Hannes Frederic Sowa8844f972015-11-16 16:25:56 +01002131 newskb = NULL;
Hannes Frederic Sowa869e7c62015-05-21 16:59:59 +02002132 }
2133
2134 if (skb_append_pagefrags(skb, page, offset, size)) {
2135 tail = skb;
2136 goto alloc_skb;
2137 }
2138
2139 skb->len += size;
2140 skb->data_len += size;
2141 skb->truesize += size;
Reshetova, Elena14afee42017-06-30 13:08:00 +03002142 refcount_add(size, &sk->sk_wmem_alloc);
Hannes Frederic Sowa869e7c62015-05-21 16:59:59 +02002143
Hannes Frederic Sowaa3a116e2015-11-17 15:10:59 +01002144 if (newskb) {
Hannes Frederic Sowa9490f882015-11-26 12:08:18 +01002145 err = unix_scm_to_skb(&scm, skb, false);
2146 if (err)
2147 goto err_state_unlock;
Hannes Frederic Sowaa3a116e2015-11-17 15:10:59 +01002148 spin_lock(&other->sk_receive_queue.lock);
Hannes Frederic Sowa869e7c62015-05-21 16:59:59 +02002149 __skb_queue_tail(&other->sk_receive_queue, newskb);
Hannes Frederic Sowaa3a116e2015-11-17 15:10:59 +01002150 spin_unlock(&other->sk_receive_queue.lock);
2151 }
Hannes Frederic Sowa869e7c62015-05-21 16:59:59 +02002152
2153 unix_state_unlock(other);
Linus Torvalds6e1ce3c2016-09-01 14:43:53 -07002154 mutex_unlock(&unix_sk(other)->iolock);
Hannes Frederic Sowa869e7c62015-05-21 16:59:59 +02002155
2156 other->sk_data_ready(other);
Hannes Frederic Sowa9490f882015-11-26 12:08:18 +01002157 scm_destroy(&scm);
Hannes Frederic Sowa869e7c62015-05-21 16:59:59 +02002158 return size;
2159
2160err_state_unlock:
2161 unix_state_unlock(other);
2162err_unlock:
Linus Torvalds6e1ce3c2016-09-01 14:43:53 -07002163 mutex_unlock(&unix_sk(other)->iolock);
Hannes Frederic Sowa869e7c62015-05-21 16:59:59 +02002164err:
2165 kfree_skb(newskb);
2166 if (send_sigpipe && !(flags & MSG_NOSIGNAL))
2167 send_sig(SIGPIPE, current, 0);
Hannes Frederic Sowa9490f882015-11-26 12:08:18 +01002168 if (!init_scm)
2169 scm_destroy(&scm);
Hannes Frederic Sowa869e7c62015-05-21 16:59:59 +02002170 return err;
2171}
2172
Ying Xue1b784142015-03-02 15:37:48 +08002173static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
2174 size_t len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002175{
2176 int err;
2177 struct sock *sk = sock->sk;
YOSHIFUJI Hideakiac7bfa62007-02-09 23:25:23 +09002178
Linus Torvalds1da177e2005-04-16 15:20:36 -07002179 err = sock_error(sk);
2180 if (err)
2181 return err;
2182
2183 if (sk->sk_state != TCP_ESTABLISHED)
2184 return -ENOTCONN;
2185
2186 if (msg->msg_namelen)
2187 msg->msg_namelen = 0;
2188
Ying Xue1b784142015-03-02 15:37:48 +08002189 return unix_dgram_sendmsg(sock, msg, len);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002190}
YOSHIFUJI Hideakiac7bfa62007-02-09 23:25:23 +09002191
Ying Xue1b784142015-03-02 15:37:48 +08002192static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
2193 size_t size, int flags)
Eric W. Biedermana05d2ad2011-04-24 01:54:57 +00002194{
2195 struct sock *sk = sock->sk;
2196
2197 if (sk->sk_state != TCP_ESTABLISHED)
2198 return -ENOTCONN;
2199
Ying Xue1b784142015-03-02 15:37:48 +08002200 return unix_dgram_recvmsg(sock, msg, size, flags);
Eric W. Biedermana05d2ad2011-04-24 01:54:57 +00002201}
2202
Linus Torvalds1da177e2005-04-16 15:20:36 -07002203static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
2204{
Al Viroae3b5642019-02-15 20:09:35 +00002205 struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002206
Al Viroae3b5642019-02-15 20:09:35 +00002207 if (addr) {
2208 msg->msg_namelen = addr->len;
2209 memcpy(msg->msg_name, addr->name, addr->len);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002210 }
2211}
2212
Cong Wang9825d862021-07-04 12:02:48 -07002213int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size,
2214 int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002215{
Christoph Hellwig7cc05662015-01-28 18:04:53 +01002216 struct scm_cookie scm;
Cong Wang9825d862021-07-04 12:02:48 -07002217 struct socket *sock = sk->sk_socket;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002218 struct unix_sock *u = unix_sk(sk);
Rainer Weikusat64874282015-12-06 21:11:38 +00002219 struct sk_buff *skb, *last;
2220 long timeo;
Paolo Abenifd69c392019-04-08 10:15:59 +02002221 int skip;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002222 int err;
2223
2224 err = -EOPNOTSUPP;
2225 if (flags&MSG_OOB)
2226 goto out;
2227
Rainer Weikusat64874282015-12-06 21:11:38 +00002228 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002229
Rainer Weikusat64874282015-12-06 21:11:38 +00002230 do {
Linus Torvalds6e1ce3c2016-09-01 14:43:53 -07002231 mutex_lock(&u->iolock);
Pavel Emelyanovf55bb7f2012-02-21 07:31:51 +00002232
Rainer Weikusat64874282015-12-06 21:11:38 +00002233 skip = sk_peek_offset(sk, flags);
Sabrina Dubrocab50b0582019-11-25 14:48:57 +01002234 skb = __skb_try_recv_datagram(sk, &sk->sk_receive_queue, flags,
Paolo Abenie427cad2020-02-28 14:45:22 +01002235 &skip, &err, &last);
2236 if (skb) {
2237 if (!(flags & MSG_PEEK))
2238 scm_stat_del(sk, skb);
Rainer Weikusat64874282015-12-06 21:11:38 +00002239 break;
Paolo Abenie427cad2020-02-28 14:45:22 +01002240 }
Rainer Weikusat64874282015-12-06 21:11:38 +00002241
Linus Torvalds6e1ce3c2016-09-01 14:43:53 -07002242 mutex_unlock(&u->iolock);
Rainer Weikusat64874282015-12-06 21:11:38 +00002243
2244 if (err != -EAGAIN)
2245 break;
2246 } while (timeo &&
Sabrina Dubrocab50b0582019-11-25 14:48:57 +01002247 !__skb_wait_for_more_packets(sk, &sk->sk_receive_queue,
2248 &err, &timeo, last));
Rainer Weikusat64874282015-12-06 21:11:38 +00002249
Linus Torvalds6e1ce3c2016-09-01 14:43:53 -07002250 if (!skb) { /* implies iolock unlocked */
Florian Zumbiehl0a112252007-11-29 23:19:23 +11002251 unix_state_lock(sk);
2252 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
2253 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
2254 (sk->sk_shutdown & RCV_SHUTDOWN))
2255 err = 0;
2256 unix_state_unlock(sk);
Rainer Weikusat64874282015-12-06 21:11:38 +00002257 goto out;
Florian Zumbiehl0a112252007-11-29 23:19:23 +11002258 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002259
Rainer Weikusat77b75f42015-11-26 19:23:15 +00002260 if (wq_has_sleeper(&u->peer_wait))
2261 wake_up_interruptible_sync_poll(&u->peer_wait,
Linus Torvaldsa9a08842018-02-11 14:34:03 -08002262 EPOLLOUT | EPOLLWRNORM |
2263 EPOLLWRBAND);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002264
2265 if (msg->msg_name)
2266 unix_copy_addr(msg, skb->sk);
2267
Pavel Emelyanovf55bb7f2012-02-21 07:31:51 +00002268 if (size > skb->len - skip)
2269 size = skb->len - skip;
2270 else if (size < skb->len - skip)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002271 msg->msg_flags |= MSG_TRUNC;
2272
David S. Miller51f3d022014-11-05 16:46:40 -05002273 err = skb_copy_datagram_msg(skb, skip, msg, size);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002274 if (err)
2275 goto out_free;
2276
Alban Crequy3f661162010-10-04 08:48:28 +00002277 if (sock_flag(sk, SOCK_RCVTSTAMP))
2278 __sock_recv_timestamp(msg, sk, skb);
2279
Christoph Hellwig7cc05662015-01-28 18:04:53 +01002280 memset(&scm, 0, sizeof(scm));
2281
2282 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2283 unix_set_secdata(&scm, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002284
Eric Dumazet6eba6a32008-11-16 22:58:44 -08002285 if (!(flags & MSG_PEEK)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002286 if (UNIXCB(skb).fp)
Christoph Hellwig7cc05662015-01-28 18:04:53 +01002287 unix_detach_fds(&scm, skb);
Pavel Emelyanovf55bb7f2012-02-21 07:31:51 +00002288
2289 sk_peek_offset_bwd(sk, skb->len);
Eric Dumazet6eba6a32008-11-16 22:58:44 -08002290 } else {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002291 /* It is questionable: on PEEK we could:
2292 - do not return fds - good, but too simple 8)
2293 - return fds, and do not return them on read (old strategy,
2294 apparently wrong)
2295 - clone fds (I chose it for now, it is the most universal
2296 solution)
YOSHIFUJI Hideakiac7bfa62007-02-09 23:25:23 +09002297
2298 POSIX 1003.1g does not actually define this clearly
2299 at all. POSIX 1003.1g doesn't define a lot of things
2300 clearly however!
2301
Linus Torvalds1da177e2005-04-16 15:20:36 -07002302 */
Pavel Emelyanovf55bb7f2012-02-21 07:31:51 +00002303
2304 sk_peek_offset_fwd(sk, size);
2305
Linus Torvalds1da177e2005-04-16 15:20:36 -07002306 if (UNIXCB(skb).fp)
Miklos Szeredicbcf0112021-07-28 14:47:20 +02002307 unix_peek_fds(&scm, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002308 }
Eric Dumazet9f6f9af2012-02-21 23:24:55 +00002309 err = (flags & MSG_TRUNC) ? skb->len - skip : size;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002310
Christoph Hellwig7cc05662015-01-28 18:04:53 +01002311 scm_recv(sock, msg, &scm, flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002312
2313out_free:
Eric Dumazet6eba6a32008-11-16 22:58:44 -08002314 skb_free_datagram(sk, skb);
Linus Torvalds6e1ce3c2016-09-01 14:43:53 -07002315 mutex_unlock(&u->iolock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002316out:
2317 return err;
2318}
2319
Cong Wang9825d862021-07-04 12:02:48 -07002320static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
2321 int flags)
2322{
2323 struct sock *sk = sock->sk;
2324
2325#ifdef CONFIG_BPF_SYSCALL
2326 if (sk->sk_prot != &unix_proto)
2327 return sk->sk_prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT,
2328 flags & ~MSG_DONTWAIT, NULL);
2329#endif
2330 return __unix_dgram_recvmsg(sk, msg, size, flags);
2331}
2332
Cong Wang29df44f2021-07-04 12:02:44 -07002333static int unix_read_sock(struct sock *sk, read_descriptor_t *desc,
2334 sk_read_actor_t recv_actor)
2335{
2336 int copied = 0;
2337
2338 while (1) {
2339 struct unix_sock *u = unix_sk(sk);
2340 struct sk_buff *skb;
2341 int used, err;
2342
2343 mutex_lock(&u->iolock);
2344 skb = skb_recv_datagram(sk, 0, 1, &err);
2345 mutex_unlock(&u->iolock);
2346 if (!skb)
2347 return err;
2348
2349 used = recv_actor(desc, skb, 0, skb->len);
2350 if (used <= 0) {
2351 if (!copied)
2352 copied = used;
2353 kfree_skb(skb);
2354 break;
2355 } else if (used <= skb->len) {
2356 copied += used;
2357 }
2358
2359 kfree_skb(skb);
2360 if (!desc->count)
2361 break;
2362 }
2363
2364 return copied;
2365}
2366
Linus Torvalds1da177e2005-04-16 15:20:36 -07002367/*
Benjamin Poirier79f632c2013-04-29 11:42:14 +00002368 * Sleep until more data has arrived. But check for races..
Linus Torvalds1da177e2005-04-16 15:20:36 -07002369 */
Benjamin Poirier79f632c2013-04-29 11:42:14 +00002370static long unix_stream_data_wait(struct sock *sk, long timeo,
WANG Cong06a77b02016-11-17 15:55:26 -08002371 struct sk_buff *last, unsigned int last_len,
2372 bool freezable)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002373{
Hannes Frederic Sowa2b514572015-05-21 17:00:01 +02002374 struct sk_buff *tail;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002375 DEFINE_WAIT(wait);
2376
David S. Miller1c92b4e2007-05-31 13:24:26 -07002377 unix_state_lock(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002378
2379 for (;;) {
Eric Dumazetaa395142010-04-20 13:03:51 +00002380 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002381
Hannes Frederic Sowa2b514572015-05-21 17:00:01 +02002382 tail = skb_peek_tail(&sk->sk_receive_queue);
2383 if (tail != last ||
2384 (tail && tail->len != last_len) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07002385 sk->sk_err ||
2386 (sk->sk_shutdown & RCV_SHUTDOWN) ||
2387 signal_pending(current) ||
2388 !timeo)
2389 break;
2390
Eric Dumazet9cd3e072015-11-29 20:03:10 -08002391 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
David S. Miller1c92b4e2007-05-31 13:24:26 -07002392 unix_state_unlock(sk);
WANG Cong06a77b02016-11-17 15:55:26 -08002393 if (freezable)
2394 timeo = freezable_schedule_timeout(timeo);
2395 else
2396 timeo = schedule_timeout(timeo);
David S. Miller1c92b4e2007-05-31 13:24:26 -07002397 unix_state_lock(sk);
Mark Salyzynb48732e2015-05-26 08:22:19 -07002398
2399 if (sock_flag(sk, SOCK_DEAD))
2400 break;
2401
Eric Dumazet9cd3e072015-11-29 20:03:10 -08002402 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002403 }
2404
Eric Dumazetaa395142010-04-20 13:03:51 +00002405 finish_wait(sk_sleep(sk), &wait);
David S. Miller1c92b4e2007-05-31 13:24:26 -07002406 unix_state_unlock(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002407 return timeo;
2408}
2409
Eric Dumazete370a722013-08-08 14:37:32 -07002410static unsigned int unix_skb_len(const struct sk_buff *skb)
2411{
2412 return skb->len - UNIXCB(skb).consumed;
2413}
2414
Hannes Frederic Sowa2b514572015-05-21 17:00:01 +02002415struct unix_stream_read_state {
2416 int (*recv_actor)(struct sk_buff *, int, int,
2417 struct unix_stream_read_state *);
2418 struct socket *socket;
2419 struct msghdr *msg;
2420 struct pipe_inode_info *pipe;
2421 size_t size;
2422 int flags;
2423 unsigned int splice_flags;
2424};
2425
Rao Shoaib314001f2021-08-01 00:57:07 -07002426#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2427static int unix_stream_recv_urg(struct unix_stream_read_state *state)
2428{
2429 struct socket *sock = state->socket;
2430 struct sock *sk = sock->sk;
2431 struct unix_sock *u = unix_sk(sk);
2432 int chunk = 1;
Rao Shoaib876c14a2021-08-11 15:06:52 -07002433 struct sk_buff *oob_skb;
Rao Shoaib314001f2021-08-01 00:57:07 -07002434
Rao Shoaib876c14a2021-08-11 15:06:52 -07002435 mutex_lock(&u->iolock);
2436 unix_state_lock(sk);
2437
2438 if (sock_flag(sk, SOCK_URGINLINE) || !u->oob_skb) {
2439 unix_state_unlock(sk);
2440 mutex_unlock(&u->iolock);
Rao Shoaib314001f2021-08-01 00:57:07 -07002441 return -EINVAL;
Rao Shoaib876c14a2021-08-11 15:06:52 -07002442 }
Rao Shoaib314001f2021-08-01 00:57:07 -07002443
Rao Shoaib876c14a2021-08-11 15:06:52 -07002444 oob_skb = u->oob_skb;
2445
2446 if (!(state->flags & MSG_PEEK)) {
2447 u->oob_skb = NULL;
2448 }
2449
2450 unix_state_unlock(sk);
2451
2452 chunk = state->recv_actor(oob_skb, 0, chunk, state);
2453
2454 if (!(state->flags & MSG_PEEK)) {
2455 UNIXCB(oob_skb).consumed += 1;
2456 kfree_skb(oob_skb);
2457 }
2458
2459 mutex_unlock(&u->iolock);
2460
Rao Shoaib314001f2021-08-01 00:57:07 -07002461 if (chunk < 0)
2462 return -EFAULT;
2463
Rao Shoaib314001f2021-08-01 00:57:07 -07002464 state->msg->msg_flags |= MSG_OOB;
2465 return 1;
2466}
2467
2468static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk,
2469 int flags, int copied)
2470{
2471 struct unix_sock *u = unix_sk(sk);
2472
2473 if (!unix_skb_len(skb) && !(flags & MSG_PEEK)) {
2474 skb_unlink(skb, &sk->sk_receive_queue);
2475 consume_skb(skb);
2476 skb = NULL;
2477 } else {
2478 if (skb == u->oob_skb) {
2479 if (copied) {
2480 skb = NULL;
2481 } else if (sock_flag(sk, SOCK_URGINLINE)) {
2482 if (!(flags & MSG_PEEK)) {
2483 u->oob_skb = NULL;
2484 consume_skb(skb);
2485 }
2486 } else if (!(flags & MSG_PEEK)) {
2487 skb_unlink(skb, &sk->sk_receive_queue);
2488 consume_skb(skb);
2489 skb = skb_peek(&sk->sk_receive_queue);
2490 }
2491 }
2492 }
2493 return skb;
2494}
2495#endif
2496
Jiang Wang77462de2021-08-16 19:03:20 +00002497static int unix_stream_read_sock(struct sock *sk, read_descriptor_t *desc,
2498 sk_read_actor_t recv_actor)
2499{
2500 if (unlikely(sk->sk_state != TCP_ESTABLISHED))
2501 return -ENOTCONN;
2502
2503 return unix_read_sock(sk, desc, recv_actor);
2504}
2505
WANG Cong06a77b02016-11-17 15:55:26 -08002506static int unix_stream_read_generic(struct unix_stream_read_state *state,
2507 bool freezable)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002508{
Christoph Hellwig7cc05662015-01-28 18:04:53 +01002509 struct scm_cookie scm;
Hannes Frederic Sowa2b514572015-05-21 17:00:01 +02002510 struct socket *sock = state->socket;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002511 struct sock *sk = sock->sk;
2512 struct unix_sock *u = unix_sk(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002513 int copied = 0;
Hannes Frederic Sowa2b514572015-05-21 17:00:01 +02002514 int flags = state->flags;
Eric Dumazetde144392014-03-25 18:42:27 -07002515 int noblock = flags & MSG_DONTWAIT;
Hannes Frederic Sowa2b514572015-05-21 17:00:01 +02002516 bool check_creds = false;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002517 int target;
2518 int err = 0;
2519 long timeo;
Pavel Emelyanovfc0d7532012-02-21 07:32:06 +00002520 int skip;
Hannes Frederic Sowa2b514572015-05-21 17:00:01 +02002521 size_t size = state->size;
2522 unsigned int last_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002523
Rainer Weikusat1b92ee32016-02-08 18:47:19 +00002524 if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
2525 err = -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002526 goto out;
Rainer Weikusat1b92ee32016-02-08 18:47:19 +00002527 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002528
Rainer Weikusat1b92ee32016-02-08 18:47:19 +00002529 if (unlikely(flags & MSG_OOB)) {
2530 err = -EOPNOTSUPP;
Rao Shoaib314001f2021-08-01 00:57:07 -07002531#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
Rao Shoaib314001f2021-08-01 00:57:07 -07002532 err = unix_stream_recv_urg(state);
Rao Shoaib314001f2021-08-01 00:57:07 -07002533#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07002534 goto out;
Rainer Weikusat1b92ee32016-02-08 18:47:19 +00002535 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002536
Hannes Frederic Sowa2b514572015-05-21 17:00:01 +02002537 target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
Eric Dumazetde144392014-03-25 18:42:27 -07002538 timeo = sock_rcvtimeo(sk, noblock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002539
Hannes Frederic Sowa2b514572015-05-21 17:00:01 +02002540 memset(&scm, 0, sizeof(scm));
2541
Linus Torvalds1da177e2005-04-16 15:20:36 -07002542 /* Lock the socket to prevent queue disordering
2543 * while sleeps in memcpy_tomsg
2544 */
Linus Torvalds6e1ce3c2016-09-01 14:43:53 -07002545 mutex_lock(&u->iolock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002546
Matthew Dawsona0917e02017-08-18 15:04:54 -04002547 skip = max(sk_peek_offset(sk, flags), 0);
Andrey Vagine9193d62015-10-02 00:05:36 +03002548
Eric Dumazet6eba6a32008-11-16 22:58:44 -08002549 do {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002550 int chunk;
Hannes Frederic Sowa73ed5d22015-11-10 16:23:15 +01002551 bool drop_skb;
Benjamin Poirier79f632c2013-04-29 11:42:14 +00002552 struct sk_buff *skb, *last;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002553
Rainer Weikusat18eceb82016-02-18 12:39:46 +00002554redo:
Miklos Szeredi3c0d2f32007-06-05 13:10:29 -07002555 unix_state_lock(sk);
Mark Salyzynb48732e2015-05-26 08:22:19 -07002556 if (sock_flag(sk, SOCK_DEAD)) {
2557 err = -ECONNRESET;
2558 goto unlock;
2559 }
Benjamin Poirier79f632c2013-04-29 11:42:14 +00002560 last = skb = skb_peek(&sk->sk_receive_queue);
Hannes Frederic Sowa2b514572015-05-21 17:00:01 +02002561 last_len = last ? last->len : 0;
Rao Shoaib314001f2021-08-01 00:57:07 -07002562
2563#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2564 if (skb) {
2565 skb = manage_oob(skb, sk, flags, copied);
2566 if (!skb) {
2567 unix_state_unlock(sk);
2568 if (copied)
2569 break;
2570 goto redo;
2571 }
2572 }
2573#endif
Pavel Emelyanovfc0d7532012-02-21 07:32:06 +00002574again:
Eric Dumazet6eba6a32008-11-16 22:58:44 -08002575 if (skb == NULL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002576 if (copied >= target)
Miklos Szeredi3c0d2f32007-06-05 13:10:29 -07002577 goto unlock;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002578
2579 /*
2580 * POSIX 1003.1g mandates this order.
2581 */
YOSHIFUJI Hideakiac7bfa62007-02-09 23:25:23 +09002582
Eric Dumazet6eba6a32008-11-16 22:58:44 -08002583 err = sock_error(sk);
2584 if (err)
Miklos Szeredi3c0d2f32007-06-05 13:10:29 -07002585 goto unlock;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002586 if (sk->sk_shutdown & RCV_SHUTDOWN)
Miklos Szeredi3c0d2f32007-06-05 13:10:29 -07002587 goto unlock;
2588
2589 unix_state_unlock(sk);
Rainer Weikusat1b92ee32016-02-08 18:47:19 +00002590 if (!timeo) {
2591 err = -EAGAIN;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002592 break;
Rainer Weikusat1b92ee32016-02-08 18:47:19 +00002593 }
2594
Linus Torvalds6e1ce3c2016-09-01 14:43:53 -07002595 mutex_unlock(&u->iolock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002596
Hannes Frederic Sowa2b514572015-05-21 17:00:01 +02002597 timeo = unix_stream_data_wait(sk, timeo, last,
WANG Cong06a77b02016-11-17 15:55:26 -08002598 last_len, freezable);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002599
Rainer Weikusat3822b5c2015-12-16 20:09:25 +00002600 if (signal_pending(current)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002601 err = sock_intr_errno(timeo);
Eric Dumazetfa0dc042016-01-24 13:53:50 -08002602 scm_destroy(&scm);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002603 goto out;
2604 }
Rainer Weikusatb3ca9b02011-02-28 04:50:55 +00002605
Linus Torvalds6e1ce3c2016-09-01 14:43:53 -07002606 mutex_lock(&u->iolock);
Rainer Weikusat18eceb82016-02-18 12:39:46 +00002607 goto redo;
Hannes Frederic Sowa2b514572015-05-21 17:00:01 +02002608unlock:
Miklos Szeredi3c0d2f32007-06-05 13:10:29 -07002609 unix_state_unlock(sk);
2610 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002611 }
Pavel Emelyanovfc0d7532012-02-21 07:32:06 +00002612
Eric Dumazete370a722013-08-08 14:37:32 -07002613 while (skip >= unix_skb_len(skb)) {
2614 skip -= unix_skb_len(skb);
Benjamin Poirier79f632c2013-04-29 11:42:14 +00002615 last = skb;
Hannes Frederic Sowa2b514572015-05-21 17:00:01 +02002616 last_len = skb->len;
Pavel Emelyanovfc0d7532012-02-21 07:32:06 +00002617 skb = skb_peek_next(skb, &sk->sk_receive_queue);
Benjamin Poirier79f632c2013-04-29 11:42:14 +00002618 if (!skb)
2619 goto again;
Pavel Emelyanovfc0d7532012-02-21 07:32:06 +00002620 }
2621
Miklos Szeredi3c0d2f32007-06-05 13:10:29 -07002622 unix_state_unlock(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002623
2624 if (check_creds) {
2625 /* Never glue messages from different writers */
Hannes Frederic Sowa9490f882015-11-26 12:08:18 +01002626 if (!unix_skb_scm_eq(skb, &scm))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002627 break;
Eric W. Biederman0e82e7f6d2013-04-03 16:14:47 +00002628 } else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002629 /* Copy credentials */
Christoph Hellwig7cc05662015-01-28 18:04:53 +01002630 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
Stephen Smalley37a9a8d2015-06-10 08:44:59 -04002631 unix_set_secdata(&scm, skb);
Hannes Frederic Sowa2b514572015-05-21 17:00:01 +02002632 check_creds = true;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002633 }
2634
2635 /* Copy address just once */
Hannes Frederic Sowa2b514572015-05-21 17:00:01 +02002636 if (state->msg && state->msg->msg_name) {
2637 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
2638 state->msg->msg_name);
2639 unix_copy_addr(state->msg, skb->sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002640 sunaddr = NULL;
2641 }
2642
Eric Dumazete370a722013-08-08 14:37:32 -07002643 chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
Hannes Frederic Sowa73ed5d22015-11-10 16:23:15 +01002644 skb_get(skb);
Hannes Frederic Sowa2b514572015-05-21 17:00:01 +02002645 chunk = state->recv_actor(skb, skip, chunk, state);
Hannes Frederic Sowa73ed5d22015-11-10 16:23:15 +01002646 drop_skb = !unix_skb_len(skb);
2647 /* skb is only safe to use if !drop_skb */
2648 consume_skb(skb);
Hannes Frederic Sowa2b514572015-05-21 17:00:01 +02002649 if (chunk < 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002650 if (copied == 0)
2651 copied = -EFAULT;
2652 break;
2653 }
2654 copied += chunk;
2655 size -= chunk;
2656
Hannes Frederic Sowa73ed5d22015-11-10 16:23:15 +01002657 if (drop_skb) {
2658 /* the skb was touched by a concurrent reader;
2659 * we should not expect anything from this skb
2660 * anymore and assume it invalid - we can be
2661 * sure it was dropped from the socket queue
2662 *
2663 * let's report a short read
2664 */
2665 err = 0;
2666 break;
2667 }
2668
Linus Torvalds1da177e2005-04-16 15:20:36 -07002669 /* Mark read part of skb as used */
Eric Dumazet6eba6a32008-11-16 22:58:44 -08002670 if (!(flags & MSG_PEEK)) {
Eric Dumazete370a722013-08-08 14:37:32 -07002671 UNIXCB(skb).consumed += chunk;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002672
Pavel Emelyanovfc0d7532012-02-21 07:32:06 +00002673 sk_peek_offset_bwd(sk, chunk);
2674
Kirill Tkhai3c32da12019-12-09 13:03:46 +03002675 if (UNIXCB(skb).fp) {
Kirill Tkhai3c32da12019-12-09 13:03:46 +03002676 scm_stat_del(sk, skb);
Christoph Hellwig7cc05662015-01-28 18:04:53 +01002677 unix_detach_fds(&scm, skb);
Kirill Tkhai3c32da12019-12-09 13:03:46 +03002678 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002679
Eric Dumazete370a722013-08-08 14:37:32 -07002680 if (unix_skb_len(skb))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002681 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002682
Eric Dumazet6f01fd62012-01-28 16:11:03 +00002683 skb_unlink(skb, &sk->sk_receive_queue);
Neil Horman70d4bf62010-07-20 06:45:56 +00002684 consume_skb(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002685
Christoph Hellwig7cc05662015-01-28 18:04:53 +01002686 if (scm.fp)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002687 break;
Eric Dumazet6eba6a32008-11-16 22:58:44 -08002688 } else {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002689 /* It is questionable, see note in unix_dgram_recvmsg.
2690 */
2691 if (UNIXCB(skb).fp)
Miklos Szeredicbcf0112021-07-28 14:47:20 +02002692 unix_peek_fds(&scm, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002693
Andrey Vagine9193d62015-10-02 00:05:36 +03002694 sk_peek_offset_fwd(sk, chunk);
Pavel Emelyanovfc0d7532012-02-21 07:32:06 +00002695
Aaron Conole9f389e32015-09-26 18:50:43 -04002696 if (UNIXCB(skb).fp)
2697 break;
2698
Andrey Vagine9193d62015-10-02 00:05:36 +03002699 skip = 0;
Aaron Conole9f389e32015-09-26 18:50:43 -04002700 last = skb;
2701 last_len = skb->len;
2702 unix_state_lock(sk);
2703 skb = skb_peek_next(skb, &sk->sk_receive_queue);
2704 if (skb)
2705 goto again;
2706 unix_state_unlock(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002707 break;
2708 }
2709 } while (size);
2710
Linus Torvalds6e1ce3c2016-09-01 14:43:53 -07002711 mutex_unlock(&u->iolock);
Hannes Frederic Sowa2b514572015-05-21 17:00:01 +02002712 if (state->msg)
2713 scm_recv(sock, state->msg, &scm, flags);
2714 else
2715 scm_destroy(&scm);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002716out:
2717 return copied ? : err;
2718}
2719
Hannes Frederic Sowa2b514572015-05-21 17:00:01 +02002720static int unix_stream_read_actor(struct sk_buff *skb,
2721 int skip, int chunk,
2722 struct unix_stream_read_state *state)
2723{
2724 int ret;
2725
2726 ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
2727 state->msg, chunk);
2728 return ret ?: chunk;
2729}
2730
2731static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
2732 size_t size, int flags)
2733{
2734 struct unix_stream_read_state state = {
2735 .recv_actor = unix_stream_read_actor,
2736 .socket = sock,
2737 .msg = msg,
2738 .size = size,
2739 .flags = flags
2740 };
2741
WANG Cong06a77b02016-11-17 15:55:26 -08002742 return unix_stream_read_generic(&state, true);
Hannes Frederic Sowa2b514572015-05-21 17:00:01 +02002743}
2744
Hannes Frederic Sowa2b514572015-05-21 17:00:01 +02002745static int unix_stream_splice_actor(struct sk_buff *skb,
2746 int skip, int chunk,
2747 struct unix_stream_read_state *state)
2748{
2749 return skb_splice_bits(skb, state->socket->sk,
2750 UNIXCB(skb).consumed + skip,
Al Viro25869262016-09-17 21:02:10 -04002751 state->pipe, chunk, state->splice_flags);
Hannes Frederic Sowa2b514572015-05-21 17:00:01 +02002752}
2753
2754static ssize_t unix_stream_splice_read(struct socket *sock, loff_t *ppos,
2755 struct pipe_inode_info *pipe,
2756 size_t size, unsigned int flags)
2757{
2758 struct unix_stream_read_state state = {
2759 .recv_actor = unix_stream_splice_actor,
2760 .socket = sock,
2761 .pipe = pipe,
2762 .size = size,
2763 .splice_flags = flags,
2764 };
2765
2766 if (unlikely(*ppos))
2767 return -ESPIPE;
2768
2769 if (sock->file->f_flags & O_NONBLOCK ||
2770 flags & SPLICE_F_NONBLOCK)
2771 state.flags = MSG_DONTWAIT;
2772
WANG Cong06a77b02016-11-17 15:55:26 -08002773 return unix_stream_read_generic(&state, false);
Hannes Frederic Sowa2b514572015-05-21 17:00:01 +02002774}
2775
Linus Torvalds1da177e2005-04-16 15:20:36 -07002776static int unix_shutdown(struct socket *sock, int mode)
2777{
2778 struct sock *sk = sock->sk;
2779 struct sock *other;
2780
Xi Wangfc61b922012-08-26 16:47:13 +00002781 if (mode < SHUT_RD || mode > SHUT_RDWR)
2782 return -EINVAL;
2783 /* This maps:
2784 * SHUT_RD (0) -> RCV_SHUTDOWN (1)
2785 * SHUT_WR (1) -> SEND_SHUTDOWN (2)
2786 * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2787 */
2788 ++mode;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002789
Alban Crequy7180a032011-01-19 04:56:36 +00002790 unix_state_lock(sk);
2791 sk->sk_shutdown |= mode;
2792 other = unix_peer(sk);
2793 if (other)
2794 sock_hold(other);
2795 unix_state_unlock(sk);
2796 sk->sk_state_change(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002797
Alban Crequy7180a032011-01-19 04:56:36 +00002798 if (other &&
2799 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002800
Alban Crequy7180a032011-01-19 04:56:36 +00002801 int peer_mode = 0;
2802
2803 if (mode&RCV_SHUTDOWN)
2804 peer_mode |= SEND_SHUTDOWN;
2805 if (mode&SEND_SHUTDOWN)
2806 peer_mode |= RCV_SHUTDOWN;
2807 unix_state_lock(other);
2808 other->sk_shutdown |= peer_mode;
2809 unix_state_unlock(other);
2810 other->sk_state_change(other);
2811 if (peer_mode == SHUTDOWN_MASK)
2812 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2813 else if (peer_mode & RCV_SHUTDOWN)
2814 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002815 }
Alban Crequy7180a032011-01-19 04:56:36 +00002816 if (other)
2817 sock_put(other);
2818
Linus Torvalds1da177e2005-04-16 15:20:36 -07002819 return 0;
2820}
2821
Pavel Emelyanov885ee742011-12-30 00:54:11 +00002822long unix_inq_len(struct sock *sk)
2823{
2824 struct sk_buff *skb;
2825 long amount = 0;
2826
2827 if (sk->sk_state == TCP_LISTEN)
2828 return -EINVAL;
2829
2830 spin_lock(&sk->sk_receive_queue.lock);
2831 if (sk->sk_type == SOCK_STREAM ||
2832 sk->sk_type == SOCK_SEQPACKET) {
2833 skb_queue_walk(&sk->sk_receive_queue, skb)
Eric Dumazete370a722013-08-08 14:37:32 -07002834 amount += unix_skb_len(skb);
Pavel Emelyanov885ee742011-12-30 00:54:11 +00002835 } else {
2836 skb = skb_peek(&sk->sk_receive_queue);
2837 if (skb)
2838 amount = skb->len;
2839 }
2840 spin_unlock(&sk->sk_receive_queue.lock);
2841
2842 return amount;
2843}
2844EXPORT_SYMBOL_GPL(unix_inq_len);
2845
2846long unix_outq_len(struct sock *sk)
2847{
2848 return sk_wmem_alloc_get(sk);
2849}
2850EXPORT_SYMBOL_GPL(unix_outq_len);
2851
Andrey Vaginba94f302017-02-01 11:00:45 -08002852static int unix_open_file(struct sock *sk)
2853{
2854 struct path path;
2855 struct file *f;
2856 int fd;
2857
2858 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
2859 return -EPERM;
2860
Al Viroae3b5642019-02-15 20:09:35 +00002861 if (!smp_load_acquire(&unix_sk(sk)->addr))
Andrey Vaginba94f302017-02-01 11:00:45 -08002862 return -ENOENT;
Al Viroae3b5642019-02-15 20:09:35 +00002863
2864 path = unix_sk(sk)->path;
2865 if (!path.dentry)
2866 return -ENOENT;
Andrey Vaginba94f302017-02-01 11:00:45 -08002867
2868 path_get(&path);
Andrey Vaginba94f302017-02-01 11:00:45 -08002869
2870 fd = get_unused_fd_flags(O_CLOEXEC);
2871 if (fd < 0)
2872 goto out;
2873
2874 f = dentry_open(&path, O_PATH, current_cred());
2875 if (IS_ERR(f)) {
2876 put_unused_fd(fd);
2877 fd = PTR_ERR(f);
2878 goto out;
2879 }
2880
2881 fd_install(fd, f);
2882out:
2883 path_put(&path);
2884
2885 return fd;
2886}
2887
Linus Torvalds1da177e2005-04-16 15:20:36 -07002888static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2889{
2890 struct sock *sk = sock->sk;
Jianjun Konge27dfce2008-11-01 21:38:31 -07002891 long amount = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002892 int err;
2893
Eric Dumazet6eba6a32008-11-16 22:58:44 -08002894 switch (cmd) {
2895 case SIOCOUTQ:
Pavel Emelyanov885ee742011-12-30 00:54:11 +00002896 amount = unix_outq_len(sk);
Eric Dumazet6eba6a32008-11-16 22:58:44 -08002897 err = put_user(amount, (int __user *)arg);
2898 break;
2899 case SIOCINQ:
Pavel Emelyanov885ee742011-12-30 00:54:11 +00002900 amount = unix_inq_len(sk);
2901 if (amount < 0)
2902 err = amount;
2903 else
Linus Torvalds1da177e2005-04-16 15:20:36 -07002904 err = put_user(amount, (int __user *)arg);
Pavel Emelyanov885ee742011-12-30 00:54:11 +00002905 break;
Andrey Vaginba94f302017-02-01 11:00:45 -08002906 case SIOCUNIXFILE:
2907 err = unix_open_file(sk);
2908 break;
Rao Shoaib314001f2021-08-01 00:57:07 -07002909#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
2910 case SIOCATMARK:
2911 {
2912 struct sk_buff *skb;
2913 struct unix_sock *u = unix_sk(sk);
2914 int answ = 0;
2915
2916 skb = skb_peek(&sk->sk_receive_queue);
2917 if (skb && skb == u->oob_skb)
2918 answ = 1;
2919 err = put_user(answ, (int __user *)arg);
2920 }
2921 break;
2922#endif
Eric Dumazet6eba6a32008-11-16 22:58:44 -08002923 default:
2924 err = -ENOIOCTLCMD;
2925 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002926 }
2927 return err;
2928}
2929
Arnd Bergmann5f6beb92019-06-03 22:03:44 +02002930#ifdef CONFIG_COMPAT
2931static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2932{
2933 return unix_ioctl(sock, cmd, (unsigned long)compat_ptr(arg));
2934}
2935#endif
2936
Linus Torvaldsa11e1d42018-06-28 09:43:44 -07002937static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002938{
2939 struct sock *sk = sock->sk;
Linus Torvaldsa11e1d42018-06-28 09:43:44 -07002940 __poll_t mask;
2941
Karsten Graul89ab0662018-10-23 13:40:39 +02002942 sock_poll_wait(file, sock, wait);
Linus Torvaldsa11e1d42018-06-28 09:43:44 -07002943 mask = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002944
2945 /* exceptional events? */
2946 if (sk->sk_err)
Linus Torvaldsa9a08842018-02-11 14:34:03 -08002947 mask |= EPOLLERR;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002948 if (sk->sk_shutdown == SHUTDOWN_MASK)
Linus Torvaldsa9a08842018-02-11 14:34:03 -08002949 mask |= EPOLLHUP;
Davide Libenzif348d702006-03-25 03:07:39 -08002950 if (sk->sk_shutdown & RCV_SHUTDOWN)
Linus Torvaldsa9a08842018-02-11 14:34:03 -08002951 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002952
2953 /* readable? */
Eric Dumazet3ef7cf52019-10-23 22:44:50 -07002954 if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
Linus Torvaldsa9a08842018-02-11 14:34:03 -08002955 mask |= EPOLLIN | EPOLLRDNORM;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002956
2957 /* Connection-based need to check for termination and startup */
Eric Dumazet6eba6a32008-11-16 22:58:44 -08002958 if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2959 sk->sk_state == TCP_CLOSE)
Linus Torvaldsa9a08842018-02-11 14:34:03 -08002960 mask |= EPOLLHUP;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002961
2962 /*
2963 * we set writable also when the other side has shut down the
2964 * connection. This prevents stuck sockets.
2965 */
2966 if (unix_writable(sk))
Linus Torvaldsa9a08842018-02-11 14:34:03 -08002967 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002968
2969 return mask;
2970}
2971
Linus Torvaldsa11e1d42018-06-28 09:43:44 -07002972static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
2973 poll_table *wait)
Rainer Weikusat3c734192008-06-17 22:28:05 -07002974{
Rainer Weikusatec0d2152008-06-27 19:34:18 -07002975 struct sock *sk = sock->sk, *other;
Linus Torvaldsa11e1d42018-06-28 09:43:44 -07002976 unsigned int writable;
2977 __poll_t mask;
2978
Karsten Graul89ab0662018-10-23 13:40:39 +02002979 sock_poll_wait(file, sock, wait);
Linus Torvaldsa11e1d42018-06-28 09:43:44 -07002980 mask = 0;
Rainer Weikusat3c734192008-06-17 22:28:05 -07002981
2982 /* exceptional events? */
Eric Dumazet3ef7cf52019-10-23 22:44:50 -07002983 if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue))
Linus Torvaldsa9a08842018-02-11 14:34:03 -08002984 mask |= EPOLLERR |
2985 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
Keller, Jacob E7d4c04f2013-03-28 11:19:25 +00002986
Rainer Weikusat3c734192008-06-17 22:28:05 -07002987 if (sk->sk_shutdown & RCV_SHUTDOWN)
Linus Torvaldsa9a08842018-02-11 14:34:03 -08002988 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
Rainer Weikusat3c734192008-06-17 22:28:05 -07002989 if (sk->sk_shutdown == SHUTDOWN_MASK)
Linus Torvaldsa9a08842018-02-11 14:34:03 -08002990 mask |= EPOLLHUP;
Rainer Weikusat3c734192008-06-17 22:28:05 -07002991
2992 /* readable? */
Eric Dumazet3ef7cf52019-10-23 22:44:50 -07002993 if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
Linus Torvaldsa9a08842018-02-11 14:34:03 -08002994 mask |= EPOLLIN | EPOLLRDNORM;
Rainer Weikusat3c734192008-06-17 22:28:05 -07002995
2996 /* Connection-based need to check for termination and startup */
2997 if (sk->sk_type == SOCK_SEQPACKET) {
2998 if (sk->sk_state == TCP_CLOSE)
Linus Torvaldsa9a08842018-02-11 14:34:03 -08002999 mask |= EPOLLHUP;
Rainer Weikusat3c734192008-06-17 22:28:05 -07003000 /* connection hasn't started yet? */
3001 if (sk->sk_state == TCP_SYN_SENT)
3002 return mask;
3003 }
3004
Eric Dumazet973a34a2010-10-31 05:38:25 +00003005 /* No write status requested, avoid expensive OUT tests. */
Linus Torvaldsa11e1d42018-06-28 09:43:44 -07003006 if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT)))
Eric Dumazet973a34a2010-10-31 05:38:25 +00003007 return mask;
3008
Rainer Weikusatec0d2152008-06-27 19:34:18 -07003009 writable = unix_writable(sk);
Rainer Weikusat7d267272015-11-20 22:07:23 +00003010 if (writable) {
3011 unix_state_lock(sk);
3012
3013 other = unix_peer(sk);
3014 if (other && unix_peer(other) != sk &&
3015 unix_recvq_full(other) &&
3016 unix_dgram_peer_wake_me(sk, other))
3017 writable = 0;
3018
3019 unix_state_unlock(sk);
Rainer Weikusatec0d2152008-06-27 19:34:18 -07003020 }
3021
3022 if (writable)
Linus Torvaldsa9a08842018-02-11 14:34:03 -08003023 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
Rainer Weikusat3c734192008-06-17 22:28:05 -07003024 else
Eric Dumazet9cd3e072015-11-29 20:03:10 -08003025 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
Rainer Weikusat3c734192008-06-17 22:28:05 -07003026
Rainer Weikusat3c734192008-06-17 22:28:05 -07003027 return mask;
3028}
Linus Torvalds1da177e2005-04-16 15:20:36 -07003029
3030#ifdef CONFIG_PROC_FS
Pavel Emelyanova53eb3f2007-11-23 20:30:01 +08003031
Eric Dumazet7123aaa2012-06-08 05:03:21 +00003032#define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
3033
3034#define get_bucket(x) ((x) >> BUCKET_SPACE)
3035#define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
3036#define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
Pavel Emelyanova53eb3f2007-11-23 20:30:01 +08003037
Eric Dumazet7123aaa2012-06-08 05:03:21 +00003038static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003039{
Eric Dumazet7123aaa2012-06-08 05:03:21 +00003040 unsigned long offset = get_offset(*pos);
3041 unsigned long bucket = get_bucket(*pos);
3042 struct sock *sk;
3043 unsigned long count = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003044
Eric Dumazet7123aaa2012-06-08 05:03:21 +00003045 for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
3046 if (sock_net(sk) != seq_file_net(seq))
Denis V. Lunev097e66c2007-11-19 22:29:30 -08003047 continue;
Eric Dumazet7123aaa2012-06-08 05:03:21 +00003048 if (++count == offset)
3049 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003050 }
Eric Dumazet7123aaa2012-06-08 05:03:21 +00003051
3052 return sk;
3053}
3054
3055static struct sock *unix_next_socket(struct seq_file *seq,
3056 struct sock *sk,
3057 loff_t *pos)
3058{
3059 unsigned long bucket;
3060
3061 while (sk > (struct sock *)SEQ_START_TOKEN) {
3062 sk = sk_next(sk);
3063 if (!sk)
3064 goto next_bucket;
3065 if (sock_net(sk) == seq_file_net(seq))
3066 return sk;
3067 }
3068
3069 do {
3070 sk = unix_from_bucket(seq, pos);
3071 if (sk)
3072 return sk;
3073
3074next_bucket:
3075 bucket = get_bucket(*pos) + 1;
3076 *pos = set_bucket_offset(bucket, 1);
3077 } while (bucket < ARRAY_SIZE(unix_socket_table));
3078
Linus Torvalds1da177e2005-04-16 15:20:36 -07003079 return NULL;
3080}
3081
Linus Torvalds1da177e2005-04-16 15:20:36 -07003082static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
Eric Dumazet9a429c42008-01-01 21:58:02 -08003083 __acquires(unix_table_lock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003084{
David S. Millerfbe9cc42005-12-13 23:26:29 -08003085 spin_lock(&unix_table_lock);
Eric Dumazet7123aaa2012-06-08 05:03:21 +00003086
3087 if (!*pos)
3088 return SEQ_START_TOKEN;
3089
3090 if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
3091 return NULL;
3092
3093 return unix_next_socket(seq, NULL, pos);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003094}
3095
3096static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3097{
3098 ++*pos;
Eric Dumazet7123aaa2012-06-08 05:03:21 +00003099 return unix_next_socket(seq, v, pos);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003100}
3101
3102static void unix_seq_stop(struct seq_file *seq, void *v)
Eric Dumazet9a429c42008-01-01 21:58:02 -08003103 __releases(unix_table_lock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003104{
David S. Millerfbe9cc42005-12-13 23:26:29 -08003105 spin_unlock(&unix_table_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003106}
3107
3108static int unix_seq_show(struct seq_file *seq, void *v)
3109{
YOSHIFUJI Hideakiac7bfa62007-02-09 23:25:23 +09003110
Joe Perchesb9f31242008-04-12 19:04:38 -07003111 if (v == SEQ_START_TOKEN)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003112 seq_puts(seq, "Num RefCount Protocol Flags Type St "
3113 "Inode Path\n");
3114 else {
3115 struct sock *s = v;
3116 struct unix_sock *u = unix_sk(s);
David S. Miller1c92b4e2007-05-31 13:24:26 -07003117 unix_state_lock(s);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003118
Dan Rosenberg71338aa2011-05-23 12:17:35 +00003119 seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
Linus Torvalds1da177e2005-04-16 15:20:36 -07003120 s,
Reshetova, Elena41c6d652017-06-30 13:08:01 +03003121 refcount_read(&s->sk_refcnt),
Linus Torvalds1da177e2005-04-16 15:20:36 -07003122 0,
3123 s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
3124 s->sk_type,
3125 s->sk_socket ?
3126 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
3127 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
3128 sock_i_ino(s));
3129
Al Viroae3b5642019-02-15 20:09:35 +00003130 if (u->addr) { // under unix_table_lock here
Linus Torvalds1da177e2005-04-16 15:20:36 -07003131 int i, len;
3132 seq_putc(seq, ' ');
3133
3134 i = 0;
3135 len = u->addr->len - sizeof(short);
3136 if (!UNIX_ABSTRACT(s))
3137 len--;
3138 else {
3139 seq_putc(seq, '@');
3140 i++;
3141 }
3142 for ( ; i < len; i++)
Isaac Boukrise7947ea2016-11-01 02:41:35 +02003143 seq_putc(seq, u->addr->name->sun_path[i] ?:
3144 '@');
Linus Torvalds1da177e2005-04-16 15:20:36 -07003145 }
David S. Miller1c92b4e2007-05-31 13:24:26 -07003146 unix_state_unlock(s);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003147 seq_putc(seq, '\n');
3148 }
3149
3150 return 0;
3151}
3152
Philippe De Muyter56b3d972007-07-10 23:07:31 -07003153static const struct seq_operations unix_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003154 .start = unix_seq_start,
3155 .next = unix_seq_next,
3156 .stop = unix_seq_stop,
3157 .show = unix_seq_show,
3158};
Kuniyuki Iwashima2c860a42021-08-14 10:57:15 +09003159
3160#if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL)
3161struct bpf_iter__unix {
3162 __bpf_md_ptr(struct bpf_iter_meta *, meta);
3163 __bpf_md_ptr(struct unix_sock *, unix_sk);
3164 uid_t uid __aligned(8);
3165};
3166
3167static int unix_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta,
3168 struct unix_sock *unix_sk, uid_t uid)
3169{
3170 struct bpf_iter__unix ctx;
3171
3172 meta->seq_num--; /* skip SEQ_START_TOKEN */
3173 ctx.meta = meta;
3174 ctx.unix_sk = unix_sk;
3175 ctx.uid = uid;
3176 return bpf_iter_run_prog(prog, &ctx);
3177}
3178
3179static int bpf_iter_unix_seq_show(struct seq_file *seq, void *v)
3180{
3181 struct bpf_iter_meta meta;
3182 struct bpf_prog *prog;
3183 struct sock *sk = v;
3184 uid_t uid;
3185
3186 if (v == SEQ_START_TOKEN)
3187 return 0;
3188
3189 uid = from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk));
3190 meta.seq = seq;
3191 prog = bpf_iter_get_info(&meta, false);
3192 return unix_prog_seq_show(prog, &meta, v, uid);
3193}
3194
3195static void bpf_iter_unix_seq_stop(struct seq_file *seq, void *v)
3196{
3197 struct bpf_iter_meta meta;
3198 struct bpf_prog *prog;
3199
3200 if (!v) {
3201 meta.seq = seq;
3202 prog = bpf_iter_get_info(&meta, true);
3203 if (prog)
3204 (void)unix_prog_seq_show(prog, &meta, v, 0);
3205 }
3206
3207 unix_seq_stop(seq, v);
3208}
3209
3210static const struct seq_operations bpf_iter_unix_seq_ops = {
3211 .start = unix_seq_start,
3212 .next = unix_seq_next,
3213 .stop = bpf_iter_unix_seq_stop,
3214 .show = bpf_iter_unix_seq_show,
3215};
3216#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07003217#endif
3218
Stephen Hemmingerec1b4cf2009-10-05 05:58:39 +00003219static const struct net_proto_family unix_family_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003220 .family = PF_UNIX,
3221 .create = unix_create,
3222 .owner = THIS_MODULE,
3223};
3224
Denis V. Lunev097e66c2007-11-19 22:29:30 -08003225
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00003226static int __net_init unix_net_init(struct net *net)
Denis V. Lunev097e66c2007-11-19 22:29:30 -08003227{
3228 int error = -ENOMEM;
3229
Denis V. Luneva0a53c82007-12-11 04:19:17 -08003230 net->unx.sysctl_max_dgram_qlen = 10;
Pavel Emelyanov1597fbc2007-12-01 23:51:01 +11003231 if (unix_sysctl_register(net))
3232 goto out;
Pavel Emelyanovd392e492007-12-01 23:44:15 +11003233
Denis V. Lunev097e66c2007-11-19 22:29:30 -08003234#ifdef CONFIG_PROC_FS
Christoph Hellwigc3506372018-04-10 19:42:55 +02003235 if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops,
3236 sizeof(struct seq_net_private))) {
Pavel Emelyanov1597fbc2007-12-01 23:51:01 +11003237 unix_sysctl_unregister(net);
Denis V. Lunev097e66c2007-11-19 22:29:30 -08003238 goto out;
Pavel Emelyanov1597fbc2007-12-01 23:51:01 +11003239 }
Denis V. Lunev097e66c2007-11-19 22:29:30 -08003240#endif
3241 error = 0;
3242out:
Jianjun Kong48dcc33e2008-11-01 21:37:27 -07003243 return error;
Denis V. Lunev097e66c2007-11-19 22:29:30 -08003244}
3245
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00003246static void __net_exit unix_net_exit(struct net *net)
Denis V. Lunev097e66c2007-11-19 22:29:30 -08003247{
Pavel Emelyanov1597fbc2007-12-01 23:51:01 +11003248 unix_sysctl_unregister(net);
Gao fengece31ff2013-02-18 01:34:56 +00003249 remove_proc_entry("unix", net->proc_net);
Denis V. Lunev097e66c2007-11-19 22:29:30 -08003250}
3251
3252static struct pernet_operations unix_net_ops = {
3253 .init = unix_net_init,
3254 .exit = unix_net_exit,
3255};
3256
Kuniyuki Iwashima2c860a42021-08-14 10:57:15 +09003257#if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
3258DEFINE_BPF_ITER_FUNC(unix, struct bpf_iter_meta *meta,
3259 struct unix_sock *unix_sk, uid_t uid)
3260
3261static const struct bpf_iter_seq_info unix_seq_info = {
3262 .seq_ops = &bpf_iter_unix_seq_ops,
3263 .init_seq_private = bpf_iter_init_seq_net,
3264 .fini_seq_private = bpf_iter_fini_seq_net,
3265 .seq_priv_size = sizeof(struct seq_net_private),
3266};
3267
3268static struct bpf_iter_reg unix_reg_info = {
3269 .target = "unix",
3270 .ctx_arg_info_size = 1,
3271 .ctx_arg_info = {
3272 { offsetof(struct bpf_iter__unix, unix_sk),
3273 PTR_TO_BTF_ID_OR_NULL },
3274 },
3275 .seq_info = &unix_seq_info,
3276};
3277
3278static void __init bpf_iter_register(void)
3279{
3280 unix_reg_info.ctx_arg_info[0].btf_id = btf_sock_ids[BTF_SOCK_TYPE_UNIX];
3281 if (bpf_iter_reg_target(&unix_reg_info))
3282 pr_warn("Warning: could not register bpf iterator unix\n");
3283}
3284#endif
3285
Linus Torvalds1da177e2005-04-16 15:20:36 -07003286static int __init af_unix_init(void)
3287{
3288 int rc = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003289
Pankaj Bharadiyac5936422019-12-09 10:31:43 -08003290 BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb));
Linus Torvalds1da177e2005-04-16 15:20:36 -07003291
3292 rc = proto_register(&unix_proto, 1);
YOSHIFUJI Hideakiac7bfa62007-02-09 23:25:23 +09003293 if (rc != 0) {
wangweidong5cc208b2013-12-06 18:03:36 +08003294 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003295 goto out;
3296 }
3297
3298 sock_register(&unix_family_ops);
Denis V. Lunev097e66c2007-11-19 22:29:30 -08003299 register_pernet_subsys(&unix_net_ops);
Cong Wangc6382912021-07-04 12:02:47 -07003300 unix_bpf_build_proto();
Kuniyuki Iwashima2c860a42021-08-14 10:57:15 +09003301
3302#if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
3303 bpf_iter_register();
3304#endif
3305
Linus Torvalds1da177e2005-04-16 15:20:36 -07003306out:
3307 return rc;
3308}
3309
3310static void __exit af_unix_exit(void)
3311{
3312 sock_unregister(PF_UNIX);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003313 proto_unregister(&unix_proto);
Denis V. Lunev097e66c2007-11-19 22:29:30 -08003314 unregister_pernet_subsys(&unix_net_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003315}
3316
David Woodhouse3d366962008-04-24 00:59:25 -07003317/* Earlier than device_initcall() so that other drivers invoking
3318 request_module() don't end up in a loop when modprobe tries
3319 to use a UNIX socket. But later than subsys_initcall() because
3320 we depend on stuff initialised there */
3321fs_initcall(af_unix_init);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003322module_exit(af_unix_exit);
3323
3324MODULE_LICENSE("GPL");
3325MODULE_ALIAS_NETPROTO(PF_UNIX);