blob: ab64bd8260ea5724d350042a2e781d6b3b866920 [file] [log] [blame]
Björn Töpelc0c77d82018-05-02 13:01:23 +02001// SPDX-License-Identifier: GPL-2.0
2/* XDP sockets
3 *
4 * AF_XDP sockets allows a channel between XDP programs and userspace
5 * applications.
6 * Copyright(c) 2018 Intel Corporation.
7 *
Björn Töpelc0c77d82018-05-02 13:01:23 +02008 * Author(s): Björn Töpel <bjorn.topel@intel.com>
9 * Magnus Karlsson <magnus.karlsson@intel.com>
10 */
11
12#define pr_fmt(fmt) "AF_XDP: %s: " fmt, __func__
13
14#include <linux/if_xdp.h>
15#include <linux/init.h>
16#include <linux/sched/mm.h>
17#include <linux/sched/signal.h>
18#include <linux/sched/task.h>
19#include <linux/socket.h>
20#include <linux/file.h>
21#include <linux/uaccess.h>
22#include <linux/net.h>
23#include <linux/netdevice.h>
24#include <net/xdp_sock.h>
Björn Töpelb9b6b682018-05-02 13:01:25 +020025#include <net/xdp.h>
Björn Töpelc0c77d82018-05-02 13:01:23 +020026
Magnus Karlsson423f3832018-05-02 13:01:24 +020027#include "xsk_queue.h"
Björn Töpelc0c77d82018-05-02 13:01:23 +020028#include "xdp_umem.h"
29
Magnus Karlsson35fcde72018-05-02 13:01:34 +020030#define TX_BATCH_SIZE 16
31
Björn Töpelc0c77d82018-05-02 13:01:23 +020032static struct xdp_sock *xdp_sk(struct sock *sk)
33{
34 return (struct xdp_sock *)sk;
35}
36
Björn Töpelfbfc504a2018-05-02 13:01:28 +020037bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs)
38{
Björn Töpel173d3ad2018-06-04 14:05:55 +020039 return READ_ONCE(xs->rx) && READ_ONCE(xs->umem) &&
40 READ_ONCE(xs->umem->fq);
Björn Töpelfbfc504a2018-05-02 13:01:28 +020041}
42
Björn Töpel173d3ad2018-06-04 14:05:55 +020043u64 *xsk_umem_peek_addr(struct xdp_umem *umem, u64 *addr)
Björn Töpelc4971762018-05-02 13:01:27 +020044{
Björn Töpel173d3ad2018-06-04 14:05:55 +020045 return xskq_peek_addr(umem->fq, addr);
46}
47EXPORT_SYMBOL(xsk_umem_peek_addr);
48
49void xsk_umem_discard_addr(struct xdp_umem *umem)
50{
51 xskq_discard_addr(umem->fq);
52}
53EXPORT_SYMBOL(xsk_umem_discard_addr);
54
55static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
56{
Björn Töpelc4971762018-05-02 13:01:27 +020057 void *buffer;
Björn Töpelbbff2f32018-06-04 13:57:13 +020058 u64 addr;
Björn Töpel4e64c832018-06-04 13:57:11 +020059 int err;
Björn Töpelc4971762018-05-02 13:01:27 +020060
Björn Töpelbbff2f32018-06-04 13:57:13 +020061 if (!xskq_peek_addr(xs->umem->fq, &addr) ||
62 len > xs->umem->chunk_size_nohr) {
Björn Töpela509a952018-06-04 13:57:12 +020063 xs->rx_dropped++;
Björn Töpelc4971762018-05-02 13:01:27 +020064 return -ENOSPC;
Björn Töpela509a952018-06-04 13:57:12 +020065 }
Björn Töpelc4971762018-05-02 13:01:27 +020066
Björn Töpelbbff2f32018-06-04 13:57:13 +020067 addr += xs->umem->headroom;
68
69 buffer = xdp_umem_get_data(xs->umem, addr);
Björn Töpelc4971762018-05-02 13:01:27 +020070 memcpy(buffer, xdp->data, len);
Björn Töpelbbff2f32018-06-04 13:57:13 +020071 err = xskq_produce_batch_desc(xs->rx, addr, len);
Björn Töpel173d3ad2018-06-04 14:05:55 +020072 if (!err) {
Björn Töpelbbff2f32018-06-04 13:57:13 +020073 xskq_discard_addr(xs->umem->fq);
Björn Töpel173d3ad2018-06-04 14:05:55 +020074 xdp_return_buff(xdp);
75 return 0;
76 }
77
78 xs->rx_dropped++;
79 return err;
80}
81
82static int __xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
83{
84 int err = xskq_produce_batch_desc(xs->rx, (u64)xdp->handle, len);
85
86 if (err) {
87 xdp_return_buff(xdp);
Björn Töpela509a952018-06-04 13:57:12 +020088 xs->rx_dropped++;
Björn Töpel173d3ad2018-06-04 14:05:55 +020089 }
Björn Töpelc4971762018-05-02 13:01:27 +020090
91 return err;
92}
93
94int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
95{
Björn Töpel173d3ad2018-06-04 14:05:55 +020096 u32 len;
Björn Töpelc4971762018-05-02 13:01:27 +020097
Björn Töpel173d3ad2018-06-04 14:05:55 +020098 if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index)
99 return -EINVAL;
Björn Töpelc4971762018-05-02 13:01:27 +0200100
Björn Töpel173d3ad2018-06-04 14:05:55 +0200101 len = xdp->data_end - xdp->data;
102
103 return (xdp->rxq->mem.type == MEM_TYPE_ZERO_COPY) ?
104 __xsk_rcv_zc(xs, xdp, len) : __xsk_rcv(xs, xdp, len);
Björn Töpelc4971762018-05-02 13:01:27 +0200105}
106
107void xsk_flush(struct xdp_sock *xs)
108{
109 xskq_produce_flush_desc(xs->rx);
110 xs->sk.sk_data_ready(&xs->sk);
111}
112
113int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
114{
Björn Töpel173d3ad2018-06-04 14:05:55 +0200115 u32 len = xdp->data_end - xdp->data;
116 void *buffer;
117 u64 addr;
Björn Töpelc4971762018-05-02 13:01:27 +0200118 int err;
119
Björn Töpel173d3ad2018-06-04 14:05:55 +0200120 if (!xskq_peek_addr(xs->umem->fq, &addr) ||
121 len > xs->umem->chunk_size_nohr) {
122 xs->rx_dropped++;
123 return -ENOSPC;
124 }
Björn Töpelc4971762018-05-02 13:01:27 +0200125
Björn Töpel173d3ad2018-06-04 14:05:55 +0200126 addr += xs->umem->headroom;
127
128 buffer = xdp_umem_get_data(xs->umem, addr);
129 memcpy(buffer, xdp->data, len);
130 err = xskq_produce_batch_desc(xs->rx, addr, len);
131 if (!err) {
132 xskq_discard_addr(xs->umem->fq);
133 xsk_flush(xs);
134 return 0;
135 }
136
137 xs->rx_dropped++;
Björn Töpelc4971762018-05-02 13:01:27 +0200138 return err;
139}
140
Magnus Karlsson35fcde72018-05-02 13:01:34 +0200141static void xsk_destruct_skb(struct sk_buff *skb)
142{
Björn Töpelbbff2f32018-06-04 13:57:13 +0200143 u64 addr = (u64)(long)skb_shinfo(skb)->destructor_arg;
Magnus Karlsson35fcde72018-05-02 13:01:34 +0200144 struct xdp_sock *xs = xdp_sk(skb->sk);
145
Björn Töpelbbff2f32018-06-04 13:57:13 +0200146 WARN_ON_ONCE(xskq_produce_addr(xs->umem->cq, addr));
Magnus Karlsson35fcde72018-05-02 13:01:34 +0200147
148 sock_wfree(skb);
149}
150
151static int xsk_generic_xmit(struct sock *sk, struct msghdr *m,
152 size_t total_len)
153{
154 bool need_wait = !(m->msg_flags & MSG_DONTWAIT);
155 u32 max_batch = TX_BATCH_SIZE;
156 struct xdp_sock *xs = xdp_sk(sk);
157 bool sent_frame = false;
158 struct xdp_desc desc;
159 struct sk_buff *skb;
160 int err = 0;
161
162 if (unlikely(!xs->tx))
163 return -ENOBUFS;
164 if (need_wait)
165 return -EOPNOTSUPP;
166
167 mutex_lock(&xs->mutex);
168
169 while (xskq_peek_desc(xs->tx, &desc)) {
170 char *buffer;
Björn Töpelbbff2f32018-06-04 13:57:13 +0200171 u64 addr;
172 u32 len;
Magnus Karlsson35fcde72018-05-02 13:01:34 +0200173
174 if (max_batch-- == 0) {
175 err = -EAGAIN;
176 goto out;
177 }
178
Björn Töpelbbff2f32018-06-04 13:57:13 +0200179 if (xskq_reserve_addr(xs->umem->cq)) {
Magnus Karlsson35fcde72018-05-02 13:01:34 +0200180 err = -EAGAIN;
181 goto out;
182 }
183
184 len = desc.len;
185 if (unlikely(len > xs->dev->mtu)) {
186 err = -EMSGSIZE;
187 goto out;
188 }
189
Magnus Karlsson2e59dd52018-05-22 09:34:58 +0200190 if (xs->queue_id >= xs->dev->real_num_tx_queues) {
191 err = -ENXIO;
192 goto out;
193 }
194
Magnus Karlsson35fcde72018-05-02 13:01:34 +0200195 skb = sock_alloc_send_skb(sk, len, !need_wait, &err);
196 if (unlikely(!skb)) {
197 err = -EAGAIN;
198 goto out;
199 }
200
201 skb_put(skb, len);
Björn Töpelbbff2f32018-06-04 13:57:13 +0200202 addr = desc.addr;
203 buffer = xdp_umem_get_data(xs->umem, addr);
Magnus Karlsson35fcde72018-05-02 13:01:34 +0200204 err = skb_store_bits(skb, 0, buffer, len);
205 if (unlikely(err)) {
206 kfree_skb(skb);
207 goto out;
208 }
209
210 skb->dev = xs->dev;
211 skb->priority = sk->sk_priority;
212 skb->mark = sk->sk_mark;
Björn Töpelbbff2f32018-06-04 13:57:13 +0200213 skb_shinfo(skb)->destructor_arg = (void *)(long)addr;
Magnus Karlsson35fcde72018-05-02 13:01:34 +0200214 skb->destructor = xsk_destruct_skb;
215
216 err = dev_direct_xmit(skb, xs->queue_id);
217 /* Ignore NET_XMIT_CN as packet might have been sent */
218 if (err == NET_XMIT_DROP || err == NETDEV_TX_BUSY) {
219 err = -EAGAIN;
220 /* SKB consumed by dev_direct_xmit() */
221 goto out;
222 }
223
224 sent_frame = true;
225 xskq_discard_desc(xs->tx);
226 }
227
228out:
229 if (sent_frame)
230 sk->sk_write_space(sk);
231
232 mutex_unlock(&xs->mutex);
233 return err;
234}
235
236static int xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
237{
238 struct sock *sk = sock->sk;
239 struct xdp_sock *xs = xdp_sk(sk);
240
241 if (unlikely(!xs->dev))
242 return -ENXIO;
243 if (unlikely(!(xs->dev->flags & IFF_UP)))
244 return -ENETDOWN;
245
246 return xsk_generic_xmit(sk, m, total_len);
247}
248
Björn Töpelc4971762018-05-02 13:01:27 +0200249static unsigned int xsk_poll(struct file *file, struct socket *sock,
250 struct poll_table_struct *wait)
251{
252 unsigned int mask = datagram_poll(file, sock, wait);
253 struct sock *sk = sock->sk;
254 struct xdp_sock *xs = xdp_sk(sk);
255
256 if (xs->rx && !xskq_empty_desc(xs->rx))
257 mask |= POLLIN | POLLRDNORM;
Magnus Karlsson35fcde72018-05-02 13:01:34 +0200258 if (xs->tx && !xskq_full_desc(xs->tx))
259 mask |= POLLOUT | POLLWRNORM;
Björn Töpelc4971762018-05-02 13:01:27 +0200260
261 return mask;
262}
263
Björn Töpelb9b6b682018-05-02 13:01:25 +0200264static int xsk_init_queue(u32 entries, struct xsk_queue **queue,
265 bool umem_queue)
Magnus Karlsson423f3832018-05-02 13:01:24 +0200266{
267 struct xsk_queue *q;
268
269 if (entries == 0 || *queue || !is_power_of_2(entries))
270 return -EINVAL;
271
Björn Töpelb9b6b682018-05-02 13:01:25 +0200272 q = xskq_create(entries, umem_queue);
Magnus Karlsson423f3832018-05-02 13:01:24 +0200273 if (!q)
274 return -ENOMEM;
275
Björn Töpel37b07692018-05-22 09:35:01 +0200276 /* Make sure queue is ready before it can be seen by others */
277 smp_wmb();
Magnus Karlsson423f3832018-05-02 13:01:24 +0200278 *queue = q;
279 return 0;
280}
281
Björn Töpelc0c77d82018-05-02 13:01:23 +0200282static int xsk_release(struct socket *sock)
283{
284 struct sock *sk = sock->sk;
Magnus Karlsson965a9902018-05-02 13:01:26 +0200285 struct xdp_sock *xs = xdp_sk(sk);
Björn Töpelc0c77d82018-05-02 13:01:23 +0200286 struct net *net;
287
288 if (!sk)
289 return 0;
290
291 net = sock_net(sk);
292
293 local_bh_disable();
294 sock_prot_inuse_add(net, sk->sk_prot, -1);
295 local_bh_enable();
296
Magnus Karlsson965a9902018-05-02 13:01:26 +0200297 if (xs->dev) {
Björn Töpel959b71d2018-05-22 09:34:56 +0200298 /* Wait for driver to stop using the xdp socket. */
299 synchronize_net();
300 dev_put(xs->dev);
Magnus Karlsson965a9902018-05-02 13:01:26 +0200301 xs->dev = NULL;
302 }
303
Björn Töpelc0c77d82018-05-02 13:01:23 +0200304 sock_orphan(sk);
305 sock->sk = NULL;
306
307 sk_refcnt_debug_release(sk);
308 sock_put(sk);
309
310 return 0;
311}
312
Magnus Karlsson965a9902018-05-02 13:01:26 +0200313static struct socket *xsk_lookup_xsk_from_fd(int fd)
314{
315 struct socket *sock;
316 int err;
317
318 sock = sockfd_lookup(fd, &err);
319 if (!sock)
320 return ERR_PTR(-ENOTSOCK);
321
322 if (sock->sk->sk_family != PF_XDP) {
323 sockfd_put(sock);
324 return ERR_PTR(-ENOPROTOOPT);
325 }
326
327 return sock;
328}
329
330static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
331{
332 struct sockaddr_xdp *sxdp = (struct sockaddr_xdp *)addr;
333 struct sock *sk = sock->sk;
Magnus Karlsson965a9902018-05-02 13:01:26 +0200334 struct xdp_sock *xs = xdp_sk(sk);
Björn Töpel959b71d2018-05-22 09:34:56 +0200335 struct net_device *dev;
Björn Töpel173d3ad2018-06-04 14:05:55 +0200336 u32 flags, qid;
Magnus Karlsson965a9902018-05-02 13:01:26 +0200337 int err = 0;
338
339 if (addr_len < sizeof(struct sockaddr_xdp))
340 return -EINVAL;
341 if (sxdp->sxdp_family != AF_XDP)
342 return -EINVAL;
343
344 mutex_lock(&xs->mutex);
Björn Töpel959b71d2018-05-22 09:34:56 +0200345 if (xs->dev) {
346 err = -EBUSY;
347 goto out_release;
348 }
349
Magnus Karlsson965a9902018-05-02 13:01:26 +0200350 dev = dev_get_by_index(sock_net(sk), sxdp->sxdp_ifindex);
351 if (!dev) {
352 err = -ENODEV;
353 goto out_release;
354 }
355
Magnus Karlssonf6145902018-05-02 13:01:32 +0200356 if (!xs->rx && !xs->tx) {
Magnus Karlsson965a9902018-05-02 13:01:26 +0200357 err = -EINVAL;
358 goto out_unlock;
359 }
360
Björn Töpel173d3ad2018-06-04 14:05:55 +0200361 qid = sxdp->sxdp_queue_id;
362
363 if ((xs->rx && qid >= dev->real_num_rx_queues) ||
364 (xs->tx && qid >= dev->real_num_tx_queues)) {
Magnus Karlsson965a9902018-05-02 13:01:26 +0200365 err = -EINVAL;
366 goto out_unlock;
367 }
368
Björn Töpel173d3ad2018-06-04 14:05:55 +0200369 flags = sxdp->sxdp_flags;
370
371 if (flags & XDP_SHARED_UMEM) {
Magnus Karlsson965a9902018-05-02 13:01:26 +0200372 struct xdp_sock *umem_xs;
373 struct socket *sock;
374
Björn Töpel173d3ad2018-06-04 14:05:55 +0200375 if ((flags & XDP_COPY) || (flags & XDP_ZEROCOPY)) {
376 /* Cannot specify flags for shared sockets. */
377 err = -EINVAL;
378 goto out_unlock;
379 }
380
Magnus Karlsson965a9902018-05-02 13:01:26 +0200381 if (xs->umem) {
382 /* We have already our own. */
383 err = -EINVAL;
384 goto out_unlock;
385 }
386
387 sock = xsk_lookup_xsk_from_fd(sxdp->sxdp_shared_umem_fd);
388 if (IS_ERR(sock)) {
389 err = PTR_ERR(sock);
390 goto out_unlock;
391 }
392
393 umem_xs = xdp_sk(sock->sk);
394 if (!umem_xs->umem) {
395 /* No umem to inherit. */
396 err = -EBADF;
397 sockfd_put(sock);
398 goto out_unlock;
Björn Töpel173d3ad2018-06-04 14:05:55 +0200399 } else if (umem_xs->dev != dev || umem_xs->queue_id != qid) {
Magnus Karlsson965a9902018-05-02 13:01:26 +0200400 err = -EINVAL;
401 sockfd_put(sock);
402 goto out_unlock;
403 }
404
405 xdp_get_umem(umem_xs->umem);
Magnus Karlsson965a9902018-05-02 13:01:26 +0200406 xs->umem = umem_xs->umem;
407 sockfd_put(sock);
408 } else if (!xs->umem || !xdp_umem_validate_queues(xs->umem)) {
409 err = -EINVAL;
410 goto out_unlock;
Björn Töpelc4971762018-05-02 13:01:27 +0200411 } else {
412 /* This xsk has its own umem. */
413 xskq_set_umem(xs->umem->fq, &xs->umem->props);
Magnus Karlssonfe230832018-05-02 13:01:31 +0200414 xskq_set_umem(xs->umem->cq, &xs->umem->props);
Björn Töpel173d3ad2018-06-04 14:05:55 +0200415
416 err = xdp_umem_assign_dev(xs->umem, dev, qid, flags);
417 if (err)
418 goto out_unlock;
Magnus Karlsson965a9902018-05-02 13:01:26 +0200419 }
420
Magnus Karlsson965a9902018-05-02 13:01:26 +0200421 xs->dev = dev;
422 xs->queue_id = sxdp->sxdp_queue_id;
423
424 xskq_set_umem(xs->rx, &xs->umem->props);
Magnus Karlsson35fcde72018-05-02 13:01:34 +0200425 xskq_set_umem(xs->tx, &xs->umem->props);
Magnus Karlsson965a9902018-05-02 13:01:26 +0200426
427out_unlock:
428 if (err)
429 dev_put(dev);
430out_release:
431 mutex_unlock(&xs->mutex);
432 return err;
433}
434
Björn Töpelc0c77d82018-05-02 13:01:23 +0200435static int xsk_setsockopt(struct socket *sock, int level, int optname,
436 char __user *optval, unsigned int optlen)
437{
438 struct sock *sk = sock->sk;
439 struct xdp_sock *xs = xdp_sk(sk);
440 int err;
441
442 if (level != SOL_XDP)
443 return -ENOPROTOOPT;
444
445 switch (optname) {
Björn Töpelb9b6b682018-05-02 13:01:25 +0200446 case XDP_RX_RING:
Magnus Karlssonf6145902018-05-02 13:01:32 +0200447 case XDP_TX_RING:
Björn Töpelb9b6b682018-05-02 13:01:25 +0200448 {
449 struct xsk_queue **q;
450 int entries;
451
452 if (optlen < sizeof(entries))
453 return -EINVAL;
454 if (copy_from_user(&entries, optval, sizeof(entries)))
455 return -EFAULT;
456
457 mutex_lock(&xs->mutex);
Magnus Karlssonf6145902018-05-02 13:01:32 +0200458 q = (optname == XDP_TX_RING) ? &xs->tx : &xs->rx;
Björn Töpelb9b6b682018-05-02 13:01:25 +0200459 err = xsk_init_queue(entries, q, false);
460 mutex_unlock(&xs->mutex);
461 return err;
462 }
Björn Töpelc0c77d82018-05-02 13:01:23 +0200463 case XDP_UMEM_REG:
464 {
465 struct xdp_umem_reg mr;
466 struct xdp_umem *umem;
467
Björn Töpelc0c77d82018-05-02 13:01:23 +0200468 if (copy_from_user(&mr, optval, sizeof(mr)))
469 return -EFAULT;
470
471 mutex_lock(&xs->mutex);
Björn Töpela49049e2018-05-22 09:35:02 +0200472 if (xs->umem) {
Björn Töpelc0c77d82018-05-02 13:01:23 +0200473 mutex_unlock(&xs->mutex);
Björn Töpela49049e2018-05-22 09:35:02 +0200474 return -EBUSY;
475 }
476
477 umem = xdp_umem_create(&mr);
478 if (IS_ERR(umem)) {
479 mutex_unlock(&xs->mutex);
480 return PTR_ERR(umem);
Björn Töpelc0c77d82018-05-02 13:01:23 +0200481 }
482
483 /* Make sure umem is ready before it can be seen by others */
484 smp_wmb();
Björn Töpelc0c77d82018-05-02 13:01:23 +0200485 xs->umem = umem;
486 mutex_unlock(&xs->mutex);
487 return 0;
488 }
Magnus Karlsson423f3832018-05-02 13:01:24 +0200489 case XDP_UMEM_FILL_RING:
Magnus Karlssonfe230832018-05-02 13:01:31 +0200490 case XDP_UMEM_COMPLETION_RING:
Magnus Karlsson423f3832018-05-02 13:01:24 +0200491 {
492 struct xsk_queue **q;
493 int entries;
494
Magnus Karlsson423f3832018-05-02 13:01:24 +0200495 if (copy_from_user(&entries, optval, sizeof(entries)))
496 return -EFAULT;
497
498 mutex_lock(&xs->mutex);
Björn Töpela49049e2018-05-22 09:35:02 +0200499 if (!xs->umem) {
500 mutex_unlock(&xs->mutex);
501 return -EINVAL;
502 }
503
Magnus Karlssonfe230832018-05-02 13:01:31 +0200504 q = (optname == XDP_UMEM_FILL_RING) ? &xs->umem->fq :
505 &xs->umem->cq;
Björn Töpelb9b6b682018-05-02 13:01:25 +0200506 err = xsk_init_queue(entries, q, true);
Magnus Karlsson423f3832018-05-02 13:01:24 +0200507 mutex_unlock(&xs->mutex);
508 return err;
509 }
Björn Töpelc0c77d82018-05-02 13:01:23 +0200510 default:
511 break;
512 }
513
514 return -ENOPROTOOPT;
515}
516
Magnus Karlssonaf75d9e2018-05-02 13:01:35 +0200517static int xsk_getsockopt(struct socket *sock, int level, int optname,
518 char __user *optval, int __user *optlen)
519{
520 struct sock *sk = sock->sk;
521 struct xdp_sock *xs = xdp_sk(sk);
522 int len;
523
524 if (level != SOL_XDP)
525 return -ENOPROTOOPT;
526
527 if (get_user(len, optlen))
528 return -EFAULT;
529 if (len < 0)
530 return -EINVAL;
531
532 switch (optname) {
533 case XDP_STATISTICS:
534 {
535 struct xdp_statistics stats;
536
537 if (len < sizeof(stats))
538 return -EINVAL;
539
540 mutex_lock(&xs->mutex);
541 stats.rx_dropped = xs->rx_dropped;
542 stats.rx_invalid_descs = xskq_nb_invalid_descs(xs->rx);
543 stats.tx_invalid_descs = xskq_nb_invalid_descs(xs->tx);
544 mutex_unlock(&xs->mutex);
545
546 if (copy_to_user(optval, &stats, sizeof(stats)))
547 return -EFAULT;
548 if (put_user(sizeof(stats), optlen))
549 return -EFAULT;
550
551 return 0;
552 }
Björn Töpelb3a9e0b2018-05-22 09:34:59 +0200553 case XDP_MMAP_OFFSETS:
554 {
555 struct xdp_mmap_offsets off;
556
557 if (len < sizeof(off))
558 return -EINVAL;
559
560 off.rx.producer = offsetof(struct xdp_rxtx_ring, ptrs.producer);
561 off.rx.consumer = offsetof(struct xdp_rxtx_ring, ptrs.consumer);
562 off.rx.desc = offsetof(struct xdp_rxtx_ring, desc);
563 off.tx.producer = offsetof(struct xdp_rxtx_ring, ptrs.producer);
564 off.tx.consumer = offsetof(struct xdp_rxtx_ring, ptrs.consumer);
565 off.tx.desc = offsetof(struct xdp_rxtx_ring, desc);
566
567 off.fr.producer = offsetof(struct xdp_umem_ring, ptrs.producer);
568 off.fr.consumer = offsetof(struct xdp_umem_ring, ptrs.consumer);
569 off.fr.desc = offsetof(struct xdp_umem_ring, desc);
570 off.cr.producer = offsetof(struct xdp_umem_ring, ptrs.producer);
571 off.cr.consumer = offsetof(struct xdp_umem_ring, ptrs.consumer);
572 off.cr.desc = offsetof(struct xdp_umem_ring, desc);
573
574 len = sizeof(off);
575 if (copy_to_user(optval, &off, len))
576 return -EFAULT;
577 if (put_user(len, optlen))
578 return -EFAULT;
579
580 return 0;
581 }
Magnus Karlssonaf75d9e2018-05-02 13:01:35 +0200582 default:
583 break;
584 }
585
586 return -EOPNOTSUPP;
587}
588
Magnus Karlsson423f3832018-05-02 13:01:24 +0200589static int xsk_mmap(struct file *file, struct socket *sock,
590 struct vm_area_struct *vma)
591{
592 unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
593 unsigned long size = vma->vm_end - vma->vm_start;
594 struct xdp_sock *xs = xdp_sk(sock->sk);
595 struct xsk_queue *q = NULL;
Björn Töpel37b07692018-05-22 09:35:01 +0200596 struct xdp_umem *umem;
Magnus Karlsson423f3832018-05-02 13:01:24 +0200597 unsigned long pfn;
598 struct page *qpg;
599
Björn Töpelb9b6b682018-05-02 13:01:25 +0200600 if (offset == XDP_PGOFF_RX_RING) {
Björn Töpel37b07692018-05-22 09:35:01 +0200601 q = READ_ONCE(xs->rx);
Magnus Karlssonf6145902018-05-02 13:01:32 +0200602 } else if (offset == XDP_PGOFF_TX_RING) {
Björn Töpel37b07692018-05-22 09:35:01 +0200603 q = READ_ONCE(xs->tx);
Björn Töpelb9b6b682018-05-02 13:01:25 +0200604 } else {
Björn Töpel37b07692018-05-22 09:35:01 +0200605 umem = READ_ONCE(xs->umem);
606 if (!umem)
Björn Töpelb9b6b682018-05-02 13:01:25 +0200607 return -EINVAL;
Magnus Karlsson423f3832018-05-02 13:01:24 +0200608
Björn Töpelb9b6b682018-05-02 13:01:25 +0200609 if (offset == XDP_UMEM_PGOFF_FILL_RING)
Björn Töpel37b07692018-05-22 09:35:01 +0200610 q = READ_ONCE(umem->fq);
Magnus Karlssonfe230832018-05-02 13:01:31 +0200611 else if (offset == XDP_UMEM_PGOFF_COMPLETION_RING)
Björn Töpel37b07692018-05-22 09:35:01 +0200612 q = READ_ONCE(umem->cq);
Björn Töpelb9b6b682018-05-02 13:01:25 +0200613 }
Magnus Karlsson423f3832018-05-02 13:01:24 +0200614
615 if (!q)
616 return -EINVAL;
617
618 qpg = virt_to_head_page(q->ring);
619 if (size > (PAGE_SIZE << compound_order(qpg)))
620 return -EINVAL;
621
622 pfn = virt_to_phys(q->ring) >> PAGE_SHIFT;
623 return remap_pfn_range(vma, vma->vm_start, pfn,
624 size, vma->vm_page_prot);
625}
626
Björn Töpelc0c77d82018-05-02 13:01:23 +0200627static struct proto xsk_proto = {
628 .name = "XDP",
629 .owner = THIS_MODULE,
630 .obj_size = sizeof(struct xdp_sock),
631};
632
633static const struct proto_ops xsk_proto_ops = {
Björn Töpelc2f43742018-05-18 14:00:24 +0200634 .family = PF_XDP,
635 .owner = THIS_MODULE,
636 .release = xsk_release,
637 .bind = xsk_bind,
638 .connect = sock_no_connect,
639 .socketpair = sock_no_socketpair,
640 .accept = sock_no_accept,
641 .getname = sock_no_getname,
642 .poll = xsk_poll,
643 .ioctl = sock_no_ioctl,
644 .listen = sock_no_listen,
645 .shutdown = sock_no_shutdown,
646 .setsockopt = xsk_setsockopt,
647 .getsockopt = xsk_getsockopt,
648 .sendmsg = xsk_sendmsg,
649 .recvmsg = sock_no_recvmsg,
650 .mmap = xsk_mmap,
651 .sendpage = sock_no_sendpage,
Björn Töpelc0c77d82018-05-02 13:01:23 +0200652};
653
654static void xsk_destruct(struct sock *sk)
655{
656 struct xdp_sock *xs = xdp_sk(sk);
657
658 if (!sock_flag(sk, SOCK_DEAD))
659 return;
660
Björn Töpelb9b6b682018-05-02 13:01:25 +0200661 xskq_destroy(xs->rx);
Magnus Karlssonf6145902018-05-02 13:01:32 +0200662 xskq_destroy(xs->tx);
Björn Töpelc0c77d82018-05-02 13:01:23 +0200663 xdp_put_umem(xs->umem);
664
665 sk_refcnt_debug_dec(sk);
666}
667
668static int xsk_create(struct net *net, struct socket *sock, int protocol,
669 int kern)
670{
671 struct sock *sk;
672 struct xdp_sock *xs;
673
674 if (!ns_capable(net->user_ns, CAP_NET_RAW))
675 return -EPERM;
676 if (sock->type != SOCK_RAW)
677 return -ESOCKTNOSUPPORT;
678
679 if (protocol)
680 return -EPROTONOSUPPORT;
681
682 sock->state = SS_UNCONNECTED;
683
684 sk = sk_alloc(net, PF_XDP, GFP_KERNEL, &xsk_proto, kern);
685 if (!sk)
686 return -ENOBUFS;
687
688 sock->ops = &xsk_proto_ops;
689
690 sock_init_data(sock, sk);
691
692 sk->sk_family = PF_XDP;
693
694 sk->sk_destruct = xsk_destruct;
695 sk_refcnt_debug_inc(sk);
696
697 xs = xdp_sk(sk);
698 mutex_init(&xs->mutex);
699
700 local_bh_disable();
701 sock_prot_inuse_add(net, &xsk_proto, 1);
702 local_bh_enable();
703
704 return 0;
705}
706
707static const struct net_proto_family xsk_family_ops = {
708 .family = PF_XDP,
709 .create = xsk_create,
710 .owner = THIS_MODULE,
711};
712
713static int __init xsk_init(void)
714{
715 int err;
716
717 err = proto_register(&xsk_proto, 0 /* no slab */);
718 if (err)
719 goto out;
720
721 err = sock_register(&xsk_family_ops);
722 if (err)
723 goto out_proto;
724
725 return 0;
726
727out_proto:
728 proto_unregister(&xsk_proto);
729out:
730 return err;
731}
732
733fs_initcall(xsk_init);