Paolo Abeni | 0abdde8 | 2021-04-15 16:44:51 -0700 | [diff] [blame] | 1 | // SPDX-License-Identifier: GPL-2.0 |
| 2 | /* Multipath TCP |
| 3 | * |
| 4 | * Copyright (c) 2021, Red Hat. |
| 5 | */ |
| 6 | |
| 7 | #define pr_fmt(fmt) "MPTCP: " fmt |
| 8 | |
| 9 | #include <linux/kernel.h> |
| 10 | #include <linux/module.h> |
| 11 | #include <net/sock.h> |
| 12 | #include <net/protocol.h> |
| 13 | #include <net/tcp.h> |
| 14 | #include <net/mptcp.h> |
| 15 | #include "protocol.h" |
| 16 | |
| 17 | static struct sock *__mptcp_tcp_fallback(struct mptcp_sock *msk) |
| 18 | { |
| 19 | sock_owned_by_me((const struct sock *)msk); |
| 20 | |
| 21 | if (likely(!__mptcp_check_fallback(msk))) |
| 22 | return NULL; |
| 23 | |
| 24 | return msk->first; |
| 25 | } |
| 26 | |
Florian Westphal | df00b08 | 2021-04-15 16:44:54 -0700 | [diff] [blame] | 27 | static u32 sockopt_seq_reset(const struct sock *sk) |
| 28 | { |
| 29 | sock_owned_by_me(sk); |
| 30 | |
| 31 | /* Highbits contain state. Allows to distinguish sockopt_seq |
| 32 | * of listener and established: |
| 33 | * s0 = new_listener() |
| 34 | * sockopt(s0) - seq is 1 |
| 35 | * s1 = accept(s0) - s1 inherits seq 1 if listener sk (s0) |
| 36 | * sockopt(s0) - seq increments to 2 on s0 |
| 37 | * sockopt(s1) // seq increments to 2 on s1 (different option) |
| 38 | * new ssk completes join, inherits options from s0 // seq 2 |
| 39 | * Needs sync from mptcp join logic, but ssk->seq == msk->seq |
| 40 | * |
| 41 | * Set High order bits to sk_state so ssk->seq == msk->seq test |
| 42 | * will fail. |
| 43 | */ |
| 44 | |
| 45 | return (u32)sk->sk_state << 24u; |
| 46 | } |
| 47 | |
Florian Westphal | 1b3e7ed | 2021-04-15 16:44:55 -0700 | [diff] [blame] | 48 | static void sockopt_seq_inc(struct mptcp_sock *msk) |
| 49 | { |
| 50 | u32 seq = (msk->setsockopt_seq + 1) & 0x00ffffff; |
| 51 | |
| 52 | msk->setsockopt_seq = sockopt_seq_reset((struct sock *)msk) + seq; |
| 53 | } |
| 54 | |
| 55 | static int mptcp_get_int_option(struct mptcp_sock *msk, sockptr_t optval, |
| 56 | unsigned int optlen, int *val) |
| 57 | { |
| 58 | if (optlen < sizeof(int)) |
| 59 | return -EINVAL; |
| 60 | |
| 61 | if (copy_from_sockptr(val, optval, sizeof(*val))) |
| 62 | return -EFAULT; |
| 63 | |
| 64 | return 0; |
| 65 | } |
| 66 | |
| 67 | static void mptcp_sol_socket_sync_intval(struct mptcp_sock *msk, int optname, int val) |
| 68 | { |
| 69 | struct mptcp_subflow_context *subflow; |
| 70 | struct sock *sk = (struct sock *)msk; |
| 71 | |
| 72 | lock_sock(sk); |
| 73 | sockopt_seq_inc(msk); |
| 74 | |
| 75 | mptcp_for_each_subflow(msk, subflow) { |
| 76 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); |
| 77 | bool slow = lock_sock_fast(ssk); |
| 78 | |
| 79 | switch (optname) { |
Florian Westphal | a03c99b | 2021-04-15 16:45:00 -0700 | [diff] [blame] | 80 | case SO_DEBUG: |
| 81 | sock_valbool_flag(ssk, SOCK_DBG, !!val); |
| 82 | break; |
Florian Westphal | 1b3e7ed | 2021-04-15 16:44:55 -0700 | [diff] [blame] | 83 | case SO_KEEPALIVE: |
| 84 | if (ssk->sk_prot->keepalive) |
| 85 | ssk->sk_prot->keepalive(ssk, !!val); |
| 86 | sock_valbool_flag(ssk, SOCK_KEEPOPEN, !!val); |
| 87 | break; |
| 88 | case SO_PRIORITY: |
| 89 | ssk->sk_priority = val; |
| 90 | break; |
Florian Westphal | 5d0a6bc82d | 2021-04-15 16:44:56 -0700 | [diff] [blame] | 91 | case SO_SNDBUF: |
| 92 | case SO_SNDBUFFORCE: |
| 93 | ssk->sk_userlocks |= SOCK_SNDBUF_LOCK; |
| 94 | WRITE_ONCE(ssk->sk_sndbuf, sk->sk_sndbuf); |
| 95 | break; |
| 96 | case SO_RCVBUF: |
| 97 | case SO_RCVBUFFORCE: |
| 98 | ssk->sk_userlocks |= SOCK_RCVBUF_LOCK; |
| 99 | WRITE_ONCE(ssk->sk_rcvbuf, sk->sk_rcvbuf); |
| 100 | break; |
Florian Westphal | 3670441 | 2021-04-15 16:44:58 -0700 | [diff] [blame] | 101 | case SO_MARK: |
| 102 | if (READ_ONCE(ssk->sk_mark) != sk->sk_mark) { |
| 103 | ssk->sk_mark = sk->sk_mark; |
| 104 | sk_dst_reset(ssk); |
| 105 | } |
| 106 | break; |
Florian Westphal | 6f0d719 | 2021-04-15 16:44:59 -0700 | [diff] [blame] | 107 | case SO_INCOMING_CPU: |
| 108 | WRITE_ONCE(ssk->sk_incoming_cpu, val); |
| 109 | break; |
Florian Westphal | 1b3e7ed | 2021-04-15 16:44:55 -0700 | [diff] [blame] | 110 | } |
| 111 | |
| 112 | subflow->setsockopt_seq = msk->setsockopt_seq; |
| 113 | unlock_sock_fast(ssk, slow); |
| 114 | } |
| 115 | |
| 116 | release_sock(sk); |
| 117 | } |
| 118 | |
| 119 | static int mptcp_sol_socket_intval(struct mptcp_sock *msk, int optname, int val) |
| 120 | { |
| 121 | sockptr_t optval = KERNEL_SOCKPTR(&val); |
| 122 | struct sock *sk = (struct sock *)msk; |
| 123 | int ret; |
| 124 | |
| 125 | ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, |
| 126 | optval, sizeof(val)); |
| 127 | if (ret) |
| 128 | return ret; |
| 129 | |
| 130 | mptcp_sol_socket_sync_intval(msk, optname, val); |
| 131 | return 0; |
| 132 | } |
| 133 | |
Florian Westphal | 6f0d719 | 2021-04-15 16:44:59 -0700 | [diff] [blame] | 134 | static void mptcp_so_incoming_cpu(struct mptcp_sock *msk, int val) |
| 135 | { |
| 136 | struct sock *sk = (struct sock *)msk; |
| 137 | |
| 138 | WRITE_ONCE(sk->sk_incoming_cpu, val); |
| 139 | |
| 140 | mptcp_sol_socket_sync_intval(msk, SO_INCOMING_CPU, val); |
| 141 | } |
| 142 | |
Florian Westphal | 9061f24 | 2021-06-03 16:24:29 -0700 | [diff] [blame] | 143 | static int mptcp_setsockopt_sol_socket_tstamp(struct mptcp_sock *msk, int optname, int val) |
| 144 | { |
| 145 | sockptr_t optval = KERNEL_SOCKPTR(&val); |
| 146 | struct mptcp_subflow_context *subflow; |
| 147 | struct sock *sk = (struct sock *)msk; |
| 148 | int ret; |
| 149 | |
| 150 | ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, |
| 151 | optval, sizeof(val)); |
| 152 | if (ret) |
| 153 | return ret; |
| 154 | |
| 155 | lock_sock(sk); |
| 156 | mptcp_for_each_subflow(msk, subflow) { |
| 157 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); |
| 158 | bool slow = lock_sock_fast(ssk); |
| 159 | |
| 160 | switch (optname) { |
| 161 | case SO_TIMESTAMP_OLD: |
| 162 | case SO_TIMESTAMP_NEW: |
| 163 | case SO_TIMESTAMPNS_OLD: |
| 164 | case SO_TIMESTAMPNS_NEW: |
| 165 | sock_set_timestamp(sk, optname, !!val); |
| 166 | break; |
| 167 | case SO_TIMESTAMPING_NEW: |
| 168 | case SO_TIMESTAMPING_OLD: |
| 169 | sock_set_timestamping(sk, optname, val); |
| 170 | break; |
| 171 | } |
| 172 | |
| 173 | unlock_sock_fast(ssk, slow); |
| 174 | } |
| 175 | |
| 176 | release_sock(sk); |
| 177 | return 0; |
| 178 | } |
| 179 | |
Florian Westphal | 1b3e7ed | 2021-04-15 16:44:55 -0700 | [diff] [blame] | 180 | static int mptcp_setsockopt_sol_socket_int(struct mptcp_sock *msk, int optname, |
| 181 | sockptr_t optval, unsigned int optlen) |
| 182 | { |
| 183 | int val, ret; |
| 184 | |
| 185 | ret = mptcp_get_int_option(msk, optval, optlen, &val); |
| 186 | if (ret) |
| 187 | return ret; |
| 188 | |
| 189 | switch (optname) { |
| 190 | case SO_KEEPALIVE: |
| 191 | mptcp_sol_socket_sync_intval(msk, optname, val); |
| 192 | return 0; |
Florian Westphal | a03c99b | 2021-04-15 16:45:00 -0700 | [diff] [blame] | 193 | case SO_DEBUG: |
Florian Westphal | 3670441 | 2021-04-15 16:44:58 -0700 | [diff] [blame] | 194 | case SO_MARK: |
Florian Westphal | 1b3e7ed | 2021-04-15 16:44:55 -0700 | [diff] [blame] | 195 | case SO_PRIORITY: |
Florian Westphal | 5d0a6bc82d | 2021-04-15 16:44:56 -0700 | [diff] [blame] | 196 | case SO_SNDBUF: |
| 197 | case SO_SNDBUFFORCE: |
| 198 | case SO_RCVBUF: |
| 199 | case SO_RCVBUFFORCE: |
Florian Westphal | 1b3e7ed | 2021-04-15 16:44:55 -0700 | [diff] [blame] | 200 | return mptcp_sol_socket_intval(msk, optname, val); |
Florian Westphal | 6f0d719 | 2021-04-15 16:44:59 -0700 | [diff] [blame] | 201 | case SO_INCOMING_CPU: |
| 202 | mptcp_so_incoming_cpu(msk, val); |
| 203 | return 0; |
Florian Westphal | 9061f24 | 2021-06-03 16:24:29 -0700 | [diff] [blame] | 204 | case SO_TIMESTAMP_OLD: |
| 205 | case SO_TIMESTAMP_NEW: |
| 206 | case SO_TIMESTAMPNS_OLD: |
| 207 | case SO_TIMESTAMPNS_NEW: |
| 208 | case SO_TIMESTAMPING_OLD: |
| 209 | case SO_TIMESTAMPING_NEW: |
| 210 | return mptcp_setsockopt_sol_socket_tstamp(msk, optname, val); |
Florian Westphal | 1b3e7ed | 2021-04-15 16:44:55 -0700 | [diff] [blame] | 211 | } |
| 212 | |
| 213 | return -ENOPROTOOPT; |
| 214 | } |
| 215 | |
Florian Westphal | 268b123 | 2021-04-15 16:44:57 -0700 | [diff] [blame] | 216 | static int mptcp_setsockopt_sol_socket_linger(struct mptcp_sock *msk, sockptr_t optval, |
| 217 | unsigned int optlen) |
| 218 | { |
| 219 | struct mptcp_subflow_context *subflow; |
| 220 | struct sock *sk = (struct sock *)msk; |
| 221 | struct linger ling; |
| 222 | sockptr_t kopt; |
| 223 | int ret; |
| 224 | |
| 225 | if (optlen < sizeof(ling)) |
| 226 | return -EINVAL; |
| 227 | |
| 228 | if (copy_from_sockptr(&ling, optval, sizeof(ling))) |
| 229 | return -EFAULT; |
| 230 | |
| 231 | kopt = KERNEL_SOCKPTR(&ling); |
| 232 | ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, SO_LINGER, kopt, sizeof(ling)); |
| 233 | if (ret) |
| 234 | return ret; |
| 235 | |
| 236 | lock_sock(sk); |
| 237 | sockopt_seq_inc(msk); |
| 238 | mptcp_for_each_subflow(msk, subflow) { |
| 239 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); |
| 240 | bool slow = lock_sock_fast(ssk); |
| 241 | |
| 242 | if (!ling.l_onoff) { |
| 243 | sock_reset_flag(ssk, SOCK_LINGER); |
| 244 | } else { |
| 245 | ssk->sk_lingertime = sk->sk_lingertime; |
| 246 | sock_set_flag(ssk, SOCK_LINGER); |
| 247 | } |
| 248 | |
| 249 | subflow->setsockopt_seq = msk->setsockopt_seq; |
| 250 | unlock_sock_fast(ssk, slow); |
| 251 | } |
| 252 | |
| 253 | release_sock(sk); |
| 254 | return 0; |
| 255 | } |
| 256 | |
Paolo Abeni | 0abdde8 | 2021-04-15 16:44:51 -0700 | [diff] [blame] | 257 | static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname, |
| 258 | sockptr_t optval, unsigned int optlen) |
| 259 | { |
| 260 | struct sock *sk = (struct sock *)msk; |
| 261 | struct socket *ssock; |
| 262 | int ret; |
| 263 | |
| 264 | switch (optname) { |
| 265 | case SO_REUSEPORT: |
| 266 | case SO_REUSEADDR: |
Florian Westphal | 5d0a6bc82d | 2021-04-15 16:44:56 -0700 | [diff] [blame] | 267 | case SO_BINDTODEVICE: |
| 268 | case SO_BINDTOIFINDEX: |
Paolo Abeni | 0abdde8 | 2021-04-15 16:44:51 -0700 | [diff] [blame] | 269 | lock_sock(sk); |
| 270 | ssock = __mptcp_nmpc_socket(msk); |
| 271 | if (!ssock) { |
| 272 | release_sock(sk); |
| 273 | return -EINVAL; |
| 274 | } |
| 275 | |
| 276 | ret = sock_setsockopt(ssock, SOL_SOCKET, optname, optval, optlen); |
| 277 | if (ret == 0) { |
| 278 | if (optname == SO_REUSEPORT) |
| 279 | sk->sk_reuseport = ssock->sk->sk_reuseport; |
| 280 | else if (optname == SO_REUSEADDR) |
| 281 | sk->sk_reuse = ssock->sk->sk_reuse; |
Florian Westphal | 5d0a6bc82d | 2021-04-15 16:44:56 -0700 | [diff] [blame] | 282 | else if (optname == SO_BINDTODEVICE) |
| 283 | sk->sk_bound_dev_if = ssock->sk->sk_bound_dev_if; |
| 284 | else if (optname == SO_BINDTOIFINDEX) |
| 285 | sk->sk_bound_dev_if = ssock->sk->sk_bound_dev_if; |
Paolo Abeni | 0abdde8 | 2021-04-15 16:44:51 -0700 | [diff] [blame] | 286 | } |
| 287 | release_sock(sk); |
| 288 | return ret; |
Florian Westphal | 1b3e7ed | 2021-04-15 16:44:55 -0700 | [diff] [blame] | 289 | case SO_KEEPALIVE: |
| 290 | case SO_PRIORITY: |
Florian Westphal | 5d0a6bc82d | 2021-04-15 16:44:56 -0700 | [diff] [blame] | 291 | case SO_SNDBUF: |
| 292 | case SO_SNDBUFFORCE: |
| 293 | case SO_RCVBUF: |
| 294 | case SO_RCVBUFFORCE: |
Florian Westphal | 3670441 | 2021-04-15 16:44:58 -0700 | [diff] [blame] | 295 | case SO_MARK: |
Florian Westphal | 6f0d719 | 2021-04-15 16:44:59 -0700 | [diff] [blame] | 296 | case SO_INCOMING_CPU: |
Florian Westphal | a03c99b | 2021-04-15 16:45:00 -0700 | [diff] [blame] | 297 | case SO_DEBUG: |
Florian Westphal | 9061f24 | 2021-06-03 16:24:29 -0700 | [diff] [blame] | 298 | case SO_TIMESTAMP_OLD: |
| 299 | case SO_TIMESTAMP_NEW: |
| 300 | case SO_TIMESTAMPNS_OLD: |
| 301 | case SO_TIMESTAMPNS_NEW: |
| 302 | case SO_TIMESTAMPING_OLD: |
| 303 | case SO_TIMESTAMPING_NEW: |
Florian Westphal | 1b3e7ed | 2021-04-15 16:44:55 -0700 | [diff] [blame] | 304 | return mptcp_setsockopt_sol_socket_int(msk, optname, optval, optlen); |
Florian Westphal | 268b123 | 2021-04-15 16:44:57 -0700 | [diff] [blame] | 305 | case SO_LINGER: |
| 306 | return mptcp_setsockopt_sol_socket_linger(msk, optval, optlen); |
Florian Westphal | 7a009a7 | 2021-06-03 16:24:30 -0700 | [diff] [blame^] | 307 | case SO_RCVLOWAT: |
| 308 | case SO_RCVTIMEO_OLD: |
| 309 | case SO_RCVTIMEO_NEW: |
| 310 | case SO_BUSY_POLL: |
| 311 | case SO_PREFER_BUSY_POLL: |
| 312 | case SO_BUSY_POLL_BUDGET: |
| 313 | /* No need to copy: only relevant for msk */ |
| 314 | return sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, optval, optlen); |
Florian Westphal | a03c99b | 2021-04-15 16:45:00 -0700 | [diff] [blame] | 315 | case SO_NO_CHECK: |
| 316 | case SO_DONTROUTE: |
| 317 | case SO_BROADCAST: |
| 318 | case SO_BSDCOMPAT: |
| 319 | case SO_PASSCRED: |
| 320 | case SO_PASSSEC: |
| 321 | case SO_RXQ_OVFL: |
| 322 | case SO_WIFI_STATUS: |
| 323 | case SO_NOFCS: |
| 324 | case SO_SELECT_ERR_QUEUE: |
| 325 | return 0; |
Paolo Abeni | 0abdde8 | 2021-04-15 16:44:51 -0700 | [diff] [blame] | 326 | } |
| 327 | |
Florian Westphal | 7a009a7 | 2021-06-03 16:24:30 -0700 | [diff] [blame^] | 328 | /* SO_OOBINLINE is not supported, let's avoid the related mess |
| 329 | * SO_ATTACH_FILTER, SO_ATTACH_BPF, SO_ATTACH_REUSEPORT_CBPF, |
| 330 | * SO_DETACH_REUSEPORT_BPF, SO_DETACH_FILTER, SO_LOCK_FILTER, |
| 331 | * we must be careful with subflows |
| 332 | * |
| 333 | * SO_ATTACH_REUSEPORT_EBPF is not supported, at it checks |
| 334 | * explicitly the sk_protocol field |
| 335 | * |
| 336 | * SO_PEEK_OFF is unsupported, as it is for plain TCP |
| 337 | * SO_MAX_PACING_RATE is unsupported, we must be careful with subflows |
| 338 | * SO_CNX_ADVICE is currently unsupported, could possibly be relevant, |
| 339 | * but likely needs careful design |
| 340 | * |
| 341 | * SO_ZEROCOPY is currently unsupported, TODO in sndmsg |
| 342 | * SO_TXTIME is currently unsupported |
| 343 | */ |
| 344 | |
| 345 | return -EOPNOTSUPP; |
Paolo Abeni | 0abdde8 | 2021-04-15 16:44:51 -0700 | [diff] [blame] | 346 | } |
| 347 | |
| 348 | static int mptcp_setsockopt_v6(struct mptcp_sock *msk, int optname, |
| 349 | sockptr_t optval, unsigned int optlen) |
| 350 | { |
| 351 | struct sock *sk = (struct sock *)msk; |
| 352 | int ret = -EOPNOTSUPP; |
| 353 | struct socket *ssock; |
| 354 | |
| 355 | switch (optname) { |
| 356 | case IPV6_V6ONLY: |
| 357 | lock_sock(sk); |
| 358 | ssock = __mptcp_nmpc_socket(msk); |
| 359 | if (!ssock) { |
| 360 | release_sock(sk); |
| 361 | return -EINVAL; |
| 362 | } |
| 363 | |
| 364 | ret = tcp_setsockopt(ssock->sk, SOL_IPV6, optname, optval, optlen); |
| 365 | if (ret == 0) |
| 366 | sk->sk_ipv6only = ssock->sk->sk_ipv6only; |
| 367 | |
| 368 | release_sock(sk); |
| 369 | break; |
| 370 | } |
| 371 | |
| 372 | return ret; |
| 373 | } |
| 374 | |
Paolo Abeni | d9e4c12 | 2021-04-15 16:44:52 -0700 | [diff] [blame] | 375 | static bool mptcp_supported_sockopt(int level, int optname) |
| 376 | { |
Paolo Abeni | d9e4c12 | 2021-04-15 16:44:52 -0700 | [diff] [blame] | 377 | if (level == SOL_IP) { |
| 378 | switch (optname) { |
| 379 | /* should work fine */ |
| 380 | case IP_FREEBIND: |
| 381 | case IP_TRANSPARENT: |
| 382 | |
| 383 | /* the following are control cmsg related */ |
| 384 | case IP_PKTINFO: |
| 385 | case IP_RECVTTL: |
| 386 | case IP_RECVTOS: |
| 387 | case IP_RECVOPTS: |
| 388 | case IP_RETOPTS: |
| 389 | case IP_PASSSEC: |
| 390 | case IP_RECVORIGDSTADDR: |
| 391 | case IP_CHECKSUM: |
| 392 | case IP_RECVFRAGSIZE: |
| 393 | |
| 394 | /* common stuff that need some love */ |
| 395 | case IP_TOS: |
| 396 | case IP_TTL: |
| 397 | case IP_BIND_ADDRESS_NO_PORT: |
| 398 | case IP_MTU_DISCOVER: |
| 399 | case IP_RECVERR: |
| 400 | |
| 401 | /* possibly less common may deserve some love */ |
| 402 | case IP_MINTTL: |
| 403 | |
| 404 | /* the following is apparently a no-op for plain TCP */ |
| 405 | case IP_RECVERR_RFC4884: |
| 406 | return true; |
| 407 | } |
| 408 | |
| 409 | /* IP_OPTIONS is not supported, needs subflow care */ |
| 410 | /* IP_HDRINCL, IP_NODEFRAG are not supported, RAW specific */ |
| 411 | /* IP_MULTICAST_TTL, IP_MULTICAST_LOOP, IP_UNICAST_IF, |
| 412 | * IP_ADD_MEMBERSHIP, IP_ADD_SOURCE_MEMBERSHIP, IP_DROP_MEMBERSHIP, |
| 413 | * IP_DROP_SOURCE_MEMBERSHIP, IP_BLOCK_SOURCE, IP_UNBLOCK_SOURCE, |
| 414 | * MCAST_JOIN_GROUP, MCAST_LEAVE_GROUP MCAST_JOIN_SOURCE_GROUP, |
| 415 | * MCAST_LEAVE_SOURCE_GROUP, MCAST_BLOCK_SOURCE, MCAST_UNBLOCK_SOURCE, |
| 416 | * MCAST_MSFILTER, IP_MULTICAST_ALL are not supported, better not deal |
| 417 | * with mcast stuff |
| 418 | */ |
| 419 | /* IP_IPSEC_POLICY, IP_XFRM_POLICY are nut supported, unrelated here */ |
| 420 | return false; |
| 421 | } |
| 422 | if (level == SOL_IPV6) { |
| 423 | switch (optname) { |
| 424 | case IPV6_V6ONLY: |
| 425 | |
| 426 | /* the following are control cmsg related */ |
| 427 | case IPV6_RECVPKTINFO: |
| 428 | case IPV6_2292PKTINFO: |
| 429 | case IPV6_RECVHOPLIMIT: |
| 430 | case IPV6_2292HOPLIMIT: |
| 431 | case IPV6_RECVRTHDR: |
| 432 | case IPV6_2292RTHDR: |
| 433 | case IPV6_RECVHOPOPTS: |
| 434 | case IPV6_2292HOPOPTS: |
| 435 | case IPV6_RECVDSTOPTS: |
| 436 | case IPV6_2292DSTOPTS: |
| 437 | case IPV6_RECVTCLASS: |
| 438 | case IPV6_FLOWINFO: |
| 439 | case IPV6_RECVPATHMTU: |
| 440 | case IPV6_RECVORIGDSTADDR: |
| 441 | case IPV6_RECVFRAGSIZE: |
| 442 | |
| 443 | /* the following ones need some love but are quite common */ |
| 444 | case IPV6_TCLASS: |
| 445 | case IPV6_TRANSPARENT: |
| 446 | case IPV6_FREEBIND: |
| 447 | case IPV6_PKTINFO: |
| 448 | case IPV6_2292PKTOPTIONS: |
| 449 | case IPV6_UNICAST_HOPS: |
| 450 | case IPV6_MTU_DISCOVER: |
| 451 | case IPV6_MTU: |
| 452 | case IPV6_RECVERR: |
| 453 | case IPV6_FLOWINFO_SEND: |
| 454 | case IPV6_FLOWLABEL_MGR: |
| 455 | case IPV6_MINHOPCOUNT: |
| 456 | case IPV6_DONTFRAG: |
| 457 | case IPV6_AUTOFLOWLABEL: |
| 458 | |
| 459 | /* the following one is a no-op for plain TCP */ |
| 460 | case IPV6_RECVERR_RFC4884: |
| 461 | return true; |
| 462 | } |
| 463 | |
| 464 | /* IPV6_HOPOPTS, IPV6_RTHDRDSTOPTS, IPV6_RTHDR, IPV6_DSTOPTS are |
| 465 | * not supported |
| 466 | */ |
| 467 | /* IPV6_MULTICAST_HOPS, IPV6_MULTICAST_LOOP, IPV6_UNICAST_IF, |
| 468 | * IPV6_MULTICAST_IF, IPV6_ADDRFORM, |
| 469 | * IPV6_ADD_MEMBERSHIP, IPV6_DROP_MEMBERSHIP, IPV6_JOIN_ANYCAST, |
| 470 | * IPV6_LEAVE_ANYCAST, IPV6_MULTICAST_ALL, MCAST_JOIN_GROUP, MCAST_LEAVE_GROUP, |
| 471 | * MCAST_JOIN_SOURCE_GROUP, MCAST_LEAVE_SOURCE_GROUP, |
| 472 | * MCAST_BLOCK_SOURCE, MCAST_UNBLOCK_SOURCE, MCAST_MSFILTER |
| 473 | * are not supported better not deal with mcast |
| 474 | */ |
| 475 | /* IPV6_ROUTER_ALERT, IPV6_ROUTER_ALERT_ISOLATE are not supported, since are evil */ |
| 476 | |
| 477 | /* IPV6_IPSEC_POLICY, IPV6_XFRM_POLICY are not supported */ |
| 478 | /* IPV6_ADDR_PREFERENCES is not supported, we must be careful with subflows */ |
| 479 | return false; |
| 480 | } |
| 481 | if (level == SOL_TCP) { |
| 482 | switch (optname) { |
| 483 | /* the following are no-op or should work just fine */ |
| 484 | case TCP_THIN_DUPACK: |
| 485 | case TCP_DEFER_ACCEPT: |
| 486 | |
| 487 | /* the following need some love */ |
| 488 | case TCP_MAXSEG: |
| 489 | case TCP_NODELAY: |
| 490 | case TCP_THIN_LINEAR_TIMEOUTS: |
| 491 | case TCP_CONGESTION: |
| 492 | case TCP_ULP: |
| 493 | case TCP_CORK: |
| 494 | case TCP_KEEPIDLE: |
| 495 | case TCP_KEEPINTVL: |
| 496 | case TCP_KEEPCNT: |
| 497 | case TCP_SYNCNT: |
| 498 | case TCP_SAVE_SYN: |
| 499 | case TCP_LINGER2: |
| 500 | case TCP_WINDOW_CLAMP: |
| 501 | case TCP_QUICKACK: |
| 502 | case TCP_USER_TIMEOUT: |
| 503 | case TCP_TIMESTAMP: |
| 504 | case TCP_NOTSENT_LOWAT: |
| 505 | case TCP_TX_DELAY: |
| 506 | return true; |
| 507 | } |
| 508 | |
| 509 | /* TCP_MD5SIG, TCP_MD5SIG_EXT are not supported, MD5 is not compatible with MPTCP */ |
| 510 | |
| 511 | /* TCP_REPAIR, TCP_REPAIR_QUEUE, TCP_QUEUE_SEQ, TCP_REPAIR_OPTIONS, |
| 512 | * TCP_REPAIR_WINDOW are not supported, better avoid this mess |
| 513 | */ |
| 514 | /* TCP_FASTOPEN_KEY, TCP_FASTOPEN TCP_FASTOPEN_CONNECT, TCP_FASTOPEN_NO_COOKIE, |
| 515 | * are not supported fastopen is currently unsupported |
| 516 | */ |
| 517 | /* TCP_INQ is currently unsupported, needs some recvmsg work */ |
| 518 | } |
| 519 | return false; |
| 520 | } |
| 521 | |
Florian Westphal | aa1fbd9 | 2021-04-15 16:45:01 -0700 | [diff] [blame] | 522 | static int mptcp_setsockopt_sol_tcp_congestion(struct mptcp_sock *msk, sockptr_t optval, |
| 523 | unsigned int optlen) |
| 524 | { |
| 525 | struct mptcp_subflow_context *subflow; |
| 526 | struct sock *sk = (struct sock *)msk; |
| 527 | char name[TCP_CA_NAME_MAX]; |
| 528 | bool cap_net_admin; |
| 529 | int ret; |
| 530 | |
| 531 | if (optlen < 1) |
| 532 | return -EINVAL; |
| 533 | |
| 534 | ret = strncpy_from_sockptr(name, optval, |
| 535 | min_t(long, TCP_CA_NAME_MAX - 1, optlen)); |
| 536 | if (ret < 0) |
| 537 | return -EFAULT; |
| 538 | |
| 539 | name[ret] = 0; |
| 540 | |
| 541 | cap_net_admin = ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN); |
| 542 | |
| 543 | ret = 0; |
| 544 | lock_sock(sk); |
| 545 | sockopt_seq_inc(msk); |
| 546 | mptcp_for_each_subflow(msk, subflow) { |
| 547 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); |
| 548 | int err; |
| 549 | |
| 550 | lock_sock(ssk); |
| 551 | err = tcp_set_congestion_control(ssk, name, true, cap_net_admin); |
| 552 | if (err < 0 && ret == 0) |
| 553 | ret = err; |
| 554 | subflow->setsockopt_seq = msk->setsockopt_seq; |
| 555 | release_sock(ssk); |
| 556 | } |
| 557 | |
| 558 | if (ret == 0) |
Paolo Abeni | 20b5759 | 2021-05-25 14:23:10 -0700 | [diff] [blame] | 559 | strcpy(msk->ca_name, name); |
Florian Westphal | aa1fbd9 | 2021-04-15 16:45:01 -0700 | [diff] [blame] | 560 | |
| 561 | release_sock(sk); |
| 562 | return ret; |
| 563 | } |
| 564 | |
| 565 | static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname, |
| 566 | sockptr_t optval, unsigned int optlen) |
| 567 | { |
| 568 | switch (optname) { |
| 569 | case TCP_ULP: |
| 570 | return -EOPNOTSUPP; |
| 571 | case TCP_CONGESTION: |
| 572 | return mptcp_setsockopt_sol_tcp_congestion(msk, optval, optlen); |
| 573 | } |
| 574 | |
| 575 | return -EOPNOTSUPP; |
| 576 | } |
| 577 | |
Paolo Abeni | 0abdde8 | 2021-04-15 16:44:51 -0700 | [diff] [blame] | 578 | int mptcp_setsockopt(struct sock *sk, int level, int optname, |
| 579 | sockptr_t optval, unsigned int optlen) |
| 580 | { |
| 581 | struct mptcp_sock *msk = mptcp_sk(sk); |
| 582 | struct sock *ssk; |
| 583 | |
| 584 | pr_debug("msk=%p", msk); |
| 585 | |
Paolo Abeni | 0abdde8 | 2021-04-15 16:44:51 -0700 | [diff] [blame] | 586 | if (level == SOL_SOCKET) |
| 587 | return mptcp_setsockopt_sol_socket(msk, optname, optval, optlen); |
| 588 | |
Florian Westphal | 7a009a7 | 2021-06-03 16:24:30 -0700 | [diff] [blame^] | 589 | if (!mptcp_supported_sockopt(level, optname)) |
| 590 | return -ENOPROTOOPT; |
| 591 | |
Paolo Abeni | 0abdde8 | 2021-04-15 16:44:51 -0700 | [diff] [blame] | 592 | /* @@ the meaning of setsockopt() when the socket is connected and |
| 593 | * there are multiple subflows is not yet defined. It is up to the |
| 594 | * MPTCP-level socket to configure the subflows until the subflow |
| 595 | * is in TCP fallback, when TCP socket options are passed through |
| 596 | * to the one remaining subflow. |
| 597 | */ |
| 598 | lock_sock(sk); |
| 599 | ssk = __mptcp_tcp_fallback(msk); |
| 600 | release_sock(sk); |
| 601 | if (ssk) |
| 602 | return tcp_setsockopt(ssk, level, optname, optval, optlen); |
| 603 | |
| 604 | if (level == SOL_IPV6) |
| 605 | return mptcp_setsockopt_v6(msk, optname, optval, optlen); |
| 606 | |
Florian Westphal | aa1fbd9 | 2021-04-15 16:45:01 -0700 | [diff] [blame] | 607 | if (level == SOL_TCP) |
| 608 | return mptcp_setsockopt_sol_tcp(msk, optname, optval, optlen); |
| 609 | |
| 610 | return -EOPNOTSUPP; |
| 611 | } |
| 612 | |
| 613 | static int mptcp_getsockopt_first_sf_only(struct mptcp_sock *msk, int level, int optname, |
| 614 | char __user *optval, int __user *optlen) |
| 615 | { |
| 616 | struct sock *sk = (struct sock *)msk; |
| 617 | struct socket *ssock; |
| 618 | int ret = -EINVAL; |
| 619 | struct sock *ssk; |
| 620 | |
| 621 | lock_sock(sk); |
| 622 | ssk = msk->first; |
| 623 | if (ssk) { |
| 624 | ret = tcp_getsockopt(ssk, level, optname, optval, optlen); |
| 625 | goto out; |
| 626 | } |
| 627 | |
| 628 | ssock = __mptcp_nmpc_socket(msk); |
| 629 | if (!ssock) |
| 630 | goto out; |
| 631 | |
| 632 | ret = tcp_getsockopt(ssock->sk, level, optname, optval, optlen); |
| 633 | |
| 634 | out: |
| 635 | release_sock(sk); |
| 636 | return ret; |
| 637 | } |
| 638 | |
| 639 | static int mptcp_getsockopt_sol_tcp(struct mptcp_sock *msk, int optname, |
| 640 | char __user *optval, int __user *optlen) |
| 641 | { |
| 642 | switch (optname) { |
| 643 | case TCP_ULP: |
| 644 | case TCP_CONGESTION: |
| 645 | case TCP_INFO: |
| 646 | case TCP_CC_INFO: |
| 647 | return mptcp_getsockopt_first_sf_only(msk, SOL_TCP, optname, |
| 648 | optval, optlen); |
| 649 | } |
Paolo Abeni | 0abdde8 | 2021-04-15 16:44:51 -0700 | [diff] [blame] | 650 | return -EOPNOTSUPP; |
| 651 | } |
| 652 | |
| 653 | int mptcp_getsockopt(struct sock *sk, int level, int optname, |
| 654 | char __user *optval, int __user *option) |
| 655 | { |
| 656 | struct mptcp_sock *msk = mptcp_sk(sk); |
| 657 | struct sock *ssk; |
| 658 | |
| 659 | pr_debug("msk=%p", msk); |
| 660 | |
| 661 | /* @@ the meaning of setsockopt() when the socket is connected and |
| 662 | * there are multiple subflows is not yet defined. It is up to the |
| 663 | * MPTCP-level socket to configure the subflows until the subflow |
| 664 | * is in TCP fallback, when socket options are passed through |
| 665 | * to the one remaining subflow. |
| 666 | */ |
| 667 | lock_sock(sk); |
| 668 | ssk = __mptcp_tcp_fallback(msk); |
| 669 | release_sock(sk); |
| 670 | if (ssk) |
| 671 | return tcp_getsockopt(ssk, level, optname, optval, option); |
| 672 | |
Florian Westphal | aa1fbd9 | 2021-04-15 16:45:01 -0700 | [diff] [blame] | 673 | if (level == SOL_TCP) |
| 674 | return mptcp_getsockopt_sol_tcp(msk, optname, optval, option); |
Paolo Abeni | 0abdde8 | 2021-04-15 16:44:51 -0700 | [diff] [blame] | 675 | return -EOPNOTSUPP; |
| 676 | } |
| 677 | |
Florian Westphal | 1b3e7ed | 2021-04-15 16:44:55 -0700 | [diff] [blame] | 678 | static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk) |
| 679 | { |
Florian Westphal | 5d0a6bc82d | 2021-04-15 16:44:56 -0700 | [diff] [blame] | 680 | static const unsigned int tx_rx_locks = SOCK_RCVBUF_LOCK | SOCK_SNDBUF_LOCK; |
Florian Westphal | 1b3e7ed | 2021-04-15 16:44:55 -0700 | [diff] [blame] | 681 | struct sock *sk = (struct sock *)msk; |
| 682 | |
| 683 | if (ssk->sk_prot->keepalive) { |
| 684 | if (sock_flag(sk, SOCK_KEEPOPEN)) |
| 685 | ssk->sk_prot->keepalive(ssk, 1); |
| 686 | else |
| 687 | ssk->sk_prot->keepalive(ssk, 0); |
| 688 | } |
| 689 | |
| 690 | ssk->sk_priority = sk->sk_priority; |
Florian Westphal | 5d0a6bc82d | 2021-04-15 16:44:56 -0700 | [diff] [blame] | 691 | ssk->sk_bound_dev_if = sk->sk_bound_dev_if; |
| 692 | ssk->sk_incoming_cpu = sk->sk_incoming_cpu; |
| 693 | |
| 694 | if (sk->sk_userlocks & tx_rx_locks) { |
| 695 | ssk->sk_userlocks |= sk->sk_userlocks & tx_rx_locks; |
| 696 | if (sk->sk_userlocks & SOCK_SNDBUF_LOCK) |
| 697 | WRITE_ONCE(ssk->sk_sndbuf, sk->sk_sndbuf); |
| 698 | if (sk->sk_userlocks & SOCK_RCVBUF_LOCK) |
| 699 | WRITE_ONCE(ssk->sk_rcvbuf, sk->sk_rcvbuf); |
| 700 | } |
| 701 | |
| 702 | if (sock_flag(sk, SOCK_LINGER)) { |
| 703 | ssk->sk_lingertime = sk->sk_lingertime; |
| 704 | sock_set_flag(ssk, SOCK_LINGER); |
| 705 | } else { |
| 706 | sock_reset_flag(ssk, SOCK_LINGER); |
| 707 | } |
| 708 | |
| 709 | if (sk->sk_mark != ssk->sk_mark) { |
| 710 | ssk->sk_mark = sk->sk_mark; |
| 711 | sk_dst_reset(ssk); |
| 712 | } |
| 713 | |
| 714 | sock_valbool_flag(ssk, SOCK_DBG, sock_flag(sk, SOCK_DBG)); |
| 715 | |
| 716 | if (inet_csk(sk)->icsk_ca_ops != inet_csk(ssk)->icsk_ca_ops) |
Paolo Abeni | 20b5759 | 2021-05-25 14:23:10 -0700 | [diff] [blame] | 717 | tcp_set_congestion_control(ssk, msk->ca_name, false, true); |
Florian Westphal | 1b3e7ed | 2021-04-15 16:44:55 -0700 | [diff] [blame] | 718 | } |
| 719 | |
Florian Westphal | df00b08 | 2021-04-15 16:44:54 -0700 | [diff] [blame] | 720 | static void __mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk) |
| 721 | { |
Florian Westphal | 1b3e7ed | 2021-04-15 16:44:55 -0700 | [diff] [blame] | 722 | bool slow = lock_sock_fast(ssk); |
| 723 | |
| 724 | sync_socket_options(msk, ssk); |
| 725 | |
| 726 | unlock_sock_fast(ssk, slow); |
Florian Westphal | df00b08 | 2021-04-15 16:44:54 -0700 | [diff] [blame] | 727 | } |
| 728 | |
Florian Westphal | 7896248 | 2021-04-15 16:44:53 -0700 | [diff] [blame] | 729 | void mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk) |
| 730 | { |
Florian Westphal | df00b08 | 2021-04-15 16:44:54 -0700 | [diff] [blame] | 731 | struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); |
| 732 | |
Florian Westphal | 7896248 | 2021-04-15 16:44:53 -0700 | [diff] [blame] | 733 | msk_owned_by_me(msk); |
Florian Westphal | df00b08 | 2021-04-15 16:44:54 -0700 | [diff] [blame] | 734 | |
| 735 | if (READ_ONCE(subflow->setsockopt_seq) != msk->setsockopt_seq) { |
| 736 | __mptcp_sockopt_sync(msk, ssk); |
| 737 | |
| 738 | subflow->setsockopt_seq = msk->setsockopt_seq; |
| 739 | } |
Florian Westphal | 7896248 | 2021-04-15 16:44:53 -0700 | [diff] [blame] | 740 | } |
| 741 | |
| 742 | void mptcp_sockopt_sync_all(struct mptcp_sock *msk) |
| 743 | { |
| 744 | struct mptcp_subflow_context *subflow; |
Florian Westphal | df00b08 | 2021-04-15 16:44:54 -0700 | [diff] [blame] | 745 | struct sock *sk = (struct sock *)msk; |
| 746 | u32 seq; |
Florian Westphal | 7896248 | 2021-04-15 16:44:53 -0700 | [diff] [blame] | 747 | |
Florian Westphal | df00b08 | 2021-04-15 16:44:54 -0700 | [diff] [blame] | 748 | seq = sockopt_seq_reset(sk); |
Florian Westphal | 7896248 | 2021-04-15 16:44:53 -0700 | [diff] [blame] | 749 | |
| 750 | mptcp_for_each_subflow(msk, subflow) { |
| 751 | struct sock *ssk = mptcp_subflow_tcp_sock(subflow); |
Florian Westphal | df00b08 | 2021-04-15 16:44:54 -0700 | [diff] [blame] | 752 | u32 sseq = READ_ONCE(subflow->setsockopt_seq); |
Florian Westphal | 7896248 | 2021-04-15 16:44:53 -0700 | [diff] [blame] | 753 | |
Florian Westphal | df00b08 | 2021-04-15 16:44:54 -0700 | [diff] [blame] | 754 | if (sseq != msk->setsockopt_seq) { |
| 755 | __mptcp_sockopt_sync(msk, ssk); |
| 756 | WRITE_ONCE(subflow->setsockopt_seq, seq); |
| 757 | } else if (sseq != seq) { |
| 758 | WRITE_ONCE(subflow->setsockopt_seq, seq); |
| 759 | } |
Florian Westphal | 7896248 | 2021-04-15 16:44:53 -0700 | [diff] [blame] | 760 | |
| 761 | cond_resched(); |
| 762 | } |
Florian Westphal | df00b08 | 2021-04-15 16:44:54 -0700 | [diff] [blame] | 763 | |
| 764 | msk->setsockopt_seq = seq; |
Florian Westphal | 7896248 | 2021-04-15 16:44:53 -0700 | [diff] [blame] | 765 | } |