blob: d4de871e7d4d7fa8f2c426559e92313c33d2655d [file] [log] [blame]
Björn Töpelc0c77d82018-05-02 13:01:23 +02001// SPDX-License-Identifier: GPL-2.0
2/* XDP user-space packet buffer
3 * Copyright(c) 2018 Intel Corporation.
Björn Töpelc0c77d82018-05-02 13:01:23 +02004 */
5
6#include <linux/init.h>
7#include <linux/sched/mm.h>
8#include <linux/sched/signal.h>
9#include <linux/sched/task.h>
10#include <linux/uaccess.h>
11#include <linux/slab.h>
12#include <linux/bpf.h>
13#include <linux/mm.h>
Jakub Kicinski84c6b862018-07-30 20:43:53 -070014#include <linux/netdevice.h>
15#include <linux/rtnetlink.h>
Björn Töpelc0c77d82018-05-02 13:01:23 +020016
17#include "xdp_umem.h"
Björn Töpele61e62b2018-06-04 14:05:51 +020018#include "xsk_queue.h"
Björn Töpelc0c77d82018-05-02 13:01:23 +020019
Björn Töpelbbff2f32018-06-04 13:57:13 +020020#define XDP_UMEM_MIN_CHUNK_SIZE 2048
Björn Töpelc0c77d82018-05-02 13:01:23 +020021
Magnus Karlssonac98d8a2018-06-04 14:05:57 +020022void xdp_add_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs)
23{
24 unsigned long flags;
25
26 spin_lock_irqsave(&umem->xsk_list_lock, flags);
27 list_add_rcu(&xs->list, &umem->xsk_list);
28 spin_unlock_irqrestore(&umem->xsk_list_lock, flags);
29}
30
31void xdp_del_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs)
32{
33 unsigned long flags;
34
Björn Töpel541d7fd2018-10-05 13:25:15 +020035 spin_lock_irqsave(&umem->xsk_list_lock, flags);
36 list_del_rcu(&xs->list);
37 spin_unlock_irqrestore(&umem->xsk_list_lock, flags);
Magnus Karlssonac98d8a2018-06-04 14:05:57 +020038}
39
Magnus Karlssonc9b47cc2018-10-01 14:51:34 +020040/* The umem is stored both in the _rx struct and the _tx struct as we do
41 * not know if the device has more tx queues than rx, or the opposite.
42 * This might also change during run time.
43 */
Krzysztof Kazimierczakcc5b5d352019-01-10 20:29:02 +010044static int xdp_reg_umem_at_qid(struct net_device *dev, struct xdp_umem *umem,
45 u16 queue_id)
Jakub Kicinski84c6b862018-07-30 20:43:53 -070046{
Krzysztof Kazimierczakcc5b5d352019-01-10 20:29:02 +010047 if (queue_id >= max_t(unsigned int,
48 dev->real_num_rx_queues,
49 dev->real_num_tx_queues))
50 return -EINVAL;
51
Magnus Karlssonc9b47cc2018-10-01 14:51:34 +020052 if (queue_id < dev->real_num_rx_queues)
53 dev->_rx[queue_id].umem = umem;
54 if (queue_id < dev->real_num_tx_queues)
55 dev->_tx[queue_id].umem = umem;
Krzysztof Kazimierczakcc5b5d352019-01-10 20:29:02 +010056
57 return 0;
Magnus Karlssonc9b47cc2018-10-01 14:51:34 +020058}
Jakub Kicinski84c6b862018-07-30 20:43:53 -070059
Jakub Kicinski1661d342018-10-01 14:51:36 +020060struct xdp_umem *xdp_get_umem_from_qid(struct net_device *dev,
61 u16 queue_id)
Magnus Karlssonc9b47cc2018-10-01 14:51:34 +020062{
63 if (queue_id < dev->real_num_rx_queues)
64 return dev->_rx[queue_id].umem;
65 if (queue_id < dev->real_num_tx_queues)
66 return dev->_tx[queue_id].umem;
Jakub Kicinski84c6b862018-07-30 20:43:53 -070067
Magnus Karlssonc9b47cc2018-10-01 14:51:34 +020068 return NULL;
69}
Jakub Kicinski84c6b862018-07-30 20:43:53 -070070
Magnus Karlssonc9b47cc2018-10-01 14:51:34 +020071static void xdp_clear_umem_at_qid(struct net_device *dev, u16 queue_id)
72{
Magnus Karlssona41b4f32018-10-01 14:51:37 +020073 if (queue_id < dev->real_num_rx_queues)
Magnus Karlssonc9b47cc2018-10-01 14:51:34 +020074 dev->_rx[queue_id].umem = NULL;
Magnus Karlssona41b4f32018-10-01 14:51:37 +020075 if (queue_id < dev->real_num_tx_queues)
Magnus Karlssonc9b47cc2018-10-01 14:51:34 +020076 dev->_tx[queue_id].umem = NULL;
Jakub Kicinski84c6b862018-07-30 20:43:53 -070077}
78
Björn Töpel173d3ad2018-06-04 14:05:55 +020079int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev,
Magnus Karlssonc9b47cc2018-10-01 14:51:34 +020080 u16 queue_id, u16 flags)
Björn Töpel173d3ad2018-06-04 14:05:55 +020081{
82 bool force_zc, force_copy;
83 struct netdev_bpf bpf;
Magnus Karlssonc9b47cc2018-10-01 14:51:34 +020084 int err = 0;
Björn Töpel173d3ad2018-06-04 14:05:55 +020085
86 force_zc = flags & XDP_ZEROCOPY;
87 force_copy = flags & XDP_COPY;
88
89 if (force_zc && force_copy)
90 return -EINVAL;
91
Jakub Kicinskif7346072018-07-30 20:43:52 -070092 rtnl_lock();
Magnus Karlssonc9b47cc2018-10-01 14:51:34 +020093 if (xdp_get_umem_from_qid(dev, queue_id)) {
94 err = -EBUSY;
95 goto out_rtnl_unlock;
96 }
97
Krzysztof Kazimierczakcc5b5d352019-01-10 20:29:02 +010098 err = xdp_reg_umem_at_qid(dev, umem, queue_id);
99 if (err)
100 goto out_rtnl_unlock;
101
Magnus Karlssonc9b47cc2018-10-01 14:51:34 +0200102 umem->dev = dev;
103 umem->queue_id = queue_id;
104 if (force_copy)
105 /* For copy-mode, we are done. */
106 goto out_rtnl_unlock;
107
108 if (!dev->netdev_ops->ndo_bpf ||
109 !dev->netdev_ops->ndo_xsk_async_xmit) {
110 err = -EOPNOTSUPP;
111 goto err_unreg_umem;
Jakub Kicinski84c6b862018-07-30 20:43:53 -0700112 }
Jakub Kicinskif7346072018-07-30 20:43:52 -0700113
114 bpf.command = XDP_SETUP_XSK_UMEM;
115 bpf.xsk.umem = umem;
116 bpf.xsk.queue_id = queue_id;
117
Jakub Kicinskif7346072018-07-30 20:43:52 -0700118 err = dev->netdev_ops->ndo_bpf(dev, &bpf);
Jakub Kicinskif7346072018-07-30 20:43:52 -0700119 if (err)
Magnus Karlssonc9b47cc2018-10-01 14:51:34 +0200120 goto err_unreg_umem;
Jakub Kicinski84c6b862018-07-30 20:43:53 -0700121 rtnl_unlock();
Jakub Kicinskif7346072018-07-30 20:43:52 -0700122
Björn Töpel173d3ad2018-06-04 14:05:55 +0200123 dev_hold(dev);
Jakub Kicinskif7346072018-07-30 20:43:52 -0700124 umem->zc = true;
125 return 0;
Jakub Kicinski84c6b862018-07-30 20:43:53 -0700126
Magnus Karlssonc9b47cc2018-10-01 14:51:34 +0200127err_unreg_umem:
128 xdp_clear_umem_at_qid(dev, queue_id);
129 if (!force_zc)
130 err = 0; /* fallback to copy mode */
131out_rtnl_unlock:
Jakub Kicinski84c6b862018-07-30 20:43:53 -0700132 rtnl_unlock();
Magnus Karlssonc9b47cc2018-10-01 14:51:34 +0200133 return err;
Björn Töpel173d3ad2018-06-04 14:05:55 +0200134}
135
Magnus Karlssonac98d8a2018-06-04 14:05:57 +0200136static void xdp_umem_clear_dev(struct xdp_umem *umem)
Björn Töpel173d3ad2018-06-04 14:05:55 +0200137{
138 struct netdev_bpf bpf;
139 int err;
140
Magnus Karlssonc9b47cc2018-10-01 14:51:34 +0200141 if (umem->zc) {
Björn Töpel173d3ad2018-06-04 14:05:55 +0200142 bpf.command = XDP_SETUP_XSK_UMEM;
143 bpf.xsk.umem = NULL;
144 bpf.xsk.queue_id = umem->queue_id;
145
146 rtnl_lock();
147 err = umem->dev->netdev_ops->ndo_bpf(umem->dev, &bpf);
148 rtnl_unlock();
149
150 if (err)
151 WARN(1, "failed to disable umem!\n");
Magnus Karlssonc9b47cc2018-10-01 14:51:34 +0200152 }
Björn Töpel173d3ad2018-06-04 14:05:55 +0200153
Magnus Karlssonc9b47cc2018-10-01 14:51:34 +0200154 if (umem->dev) {
155 rtnl_lock();
156 xdp_clear_umem_at_qid(umem->dev, umem->queue_id);
157 rtnl_unlock();
158 }
159
160 if (umem->zc) {
Björn Töpel173d3ad2018-06-04 14:05:55 +0200161 dev_put(umem->dev);
Magnus Karlssonc9b47cc2018-10-01 14:51:34 +0200162 umem->zc = false;
Björn Töpel173d3ad2018-06-04 14:05:55 +0200163 }
164}
165
Björn Töpelc0c77d82018-05-02 13:01:23 +0200166static void xdp_umem_unpin_pages(struct xdp_umem *umem)
167{
168 unsigned int i;
169
Björn Töpela49049e2018-05-22 09:35:02 +0200170 for (i = 0; i < umem->npgs; i++) {
171 struct page *page = umem->pgs[i];
Björn Töpelc0c77d82018-05-02 13:01:23 +0200172
Björn Töpela49049e2018-05-22 09:35:02 +0200173 set_page_dirty_lock(page);
174 put_page(page);
Björn Töpelc0c77d82018-05-02 13:01:23 +0200175 }
Björn Töpela49049e2018-05-22 09:35:02 +0200176
177 kfree(umem->pgs);
178 umem->pgs = NULL;
Björn Töpelc0c77d82018-05-02 13:01:23 +0200179}
180
181static void xdp_umem_unaccount_pages(struct xdp_umem *umem)
182{
Daniel Borkmannc09290c2018-06-08 00:06:01 +0200183 if (umem->user) {
184 atomic_long_sub(umem->npgs, &umem->user->locked_vm);
185 free_uid(umem->user);
186 }
Björn Töpelc0c77d82018-05-02 13:01:23 +0200187}
188
189static void xdp_umem_release(struct xdp_umem *umem)
190{
191 struct task_struct *task;
192 struct mm_struct *mm;
193
Björn Töpel173d3ad2018-06-04 14:05:55 +0200194 xdp_umem_clear_dev(umem);
195
Magnus Karlsson423f3832018-05-02 13:01:24 +0200196 if (umem->fq) {
197 xskq_destroy(umem->fq);
198 umem->fq = NULL;
199 }
200
Magnus Karlssonfe230832018-05-02 13:01:31 +0200201 if (umem->cq) {
202 xskq_destroy(umem->cq);
203 umem->cq = NULL;
204 }
205
Jakub Kicinskif5bd9132018-09-07 10:18:46 +0200206 xsk_reuseq_destroy(umem);
207
Björn Töpela49049e2018-05-22 09:35:02 +0200208 xdp_umem_unpin_pages(umem);
Björn Töpelc0c77d82018-05-02 13:01:23 +0200209
Björn Töpela49049e2018-05-22 09:35:02 +0200210 task = get_pid_task(umem->pid, PIDTYPE_PID);
211 put_pid(umem->pid);
212 if (!task)
213 goto out;
214 mm = get_task_mm(task);
215 put_task_struct(task);
216 if (!mm)
217 goto out;
Björn Töpelc0c77d82018-05-02 13:01:23 +0200218
Björn Töpela49049e2018-05-22 09:35:02 +0200219 mmput(mm);
Björn Töpel8aef7342018-06-04 14:05:52 +0200220 kfree(umem->pages);
221 umem->pages = NULL;
222
Björn Töpelc0c77d82018-05-02 13:01:23 +0200223 xdp_umem_unaccount_pages(umem);
224out:
225 kfree(umem);
226}
227
228static void xdp_umem_release_deferred(struct work_struct *work)
229{
230 struct xdp_umem *umem = container_of(work, struct xdp_umem, work);
231
232 xdp_umem_release(umem);
233}
234
235void xdp_get_umem(struct xdp_umem *umem)
236{
Björn Töpeld3b42f12018-05-22 09:35:03 +0200237 refcount_inc(&umem->users);
Björn Töpelc0c77d82018-05-02 13:01:23 +0200238}
239
240void xdp_put_umem(struct xdp_umem *umem)
241{
242 if (!umem)
243 return;
244
Björn Töpeld3b42f12018-05-22 09:35:03 +0200245 if (refcount_dec_and_test(&umem->users)) {
Björn Töpelc0c77d82018-05-02 13:01:23 +0200246 INIT_WORK(&umem->work, xdp_umem_release_deferred);
247 schedule_work(&umem->work);
248 }
249}
250
251static int xdp_umem_pin_pages(struct xdp_umem *umem)
252{
253 unsigned int gup_flags = FOLL_WRITE;
254 long npgs;
255 int err;
256
Björn Töpela3439932018-06-11 13:57:12 +0200257 umem->pgs = kcalloc(umem->npgs, sizeof(*umem->pgs),
258 GFP_KERNEL | __GFP_NOWARN);
Björn Töpelc0c77d82018-05-02 13:01:23 +0200259 if (!umem->pgs)
260 return -ENOMEM;
261
262 down_write(&current->mm->mmap_sem);
263 npgs = get_user_pages(umem->address, umem->npgs,
264 gup_flags, &umem->pgs[0], NULL);
265 up_write(&current->mm->mmap_sem);
266
267 if (npgs != umem->npgs) {
268 if (npgs >= 0) {
269 umem->npgs = npgs;
270 err = -ENOMEM;
271 goto out_pin;
272 }
273 err = npgs;
274 goto out_pgs;
275 }
276 return 0;
277
278out_pin:
279 xdp_umem_unpin_pages(umem);
280out_pgs:
281 kfree(umem->pgs);
282 umem->pgs = NULL;
283 return err;
284}
285
286static int xdp_umem_account_pages(struct xdp_umem *umem)
287{
288 unsigned long lock_limit, new_npgs, old_npgs;
289
290 if (capable(CAP_IPC_LOCK))
291 return 0;
292
293 lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
294 umem->user = get_uid(current_user());
295
296 do {
297 old_npgs = atomic_long_read(&umem->user->locked_vm);
298 new_npgs = old_npgs + umem->npgs;
299 if (new_npgs > lock_limit) {
300 free_uid(umem->user);
301 umem->user = NULL;
302 return -ENOBUFS;
303 }
304 } while (atomic_long_cmpxchg(&umem->user->locked_vm, old_npgs,
305 new_npgs) != old_npgs);
306 return 0;
307}
308
Björn Töpela49049e2018-05-22 09:35:02 +0200309static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
Björn Töpelc0c77d82018-05-02 13:01:23 +0200310{
Björn Töpelbbff2f32018-06-04 13:57:13 +0200311 u32 chunk_size = mr->chunk_size, headroom = mr->headroom;
312 unsigned int chunks, chunks_per_page;
Björn Töpelc0c77d82018-05-02 13:01:23 +0200313 u64 addr = mr->addr, size = mr->len;
Björn Töpel8aef7342018-06-04 14:05:52 +0200314 int size_chk, err, i;
Björn Töpelc0c77d82018-05-02 13:01:23 +0200315
Björn Töpelbbff2f32018-06-04 13:57:13 +0200316 if (chunk_size < XDP_UMEM_MIN_CHUNK_SIZE || chunk_size > PAGE_SIZE) {
Björn Töpelc0c77d82018-05-02 13:01:23 +0200317 /* Strictly speaking we could support this, if:
318 * - huge pages, or*
319 * - using an IOMMU, or
320 * - making sure the memory area is consecutive
321 * but for now, we simply say "computer says no".
322 */
323 return -EINVAL;
324 }
325
Björn Töpelbbff2f32018-06-04 13:57:13 +0200326 if (!is_power_of_2(chunk_size))
Björn Töpelc0c77d82018-05-02 13:01:23 +0200327 return -EINVAL;
328
329 if (!PAGE_ALIGNED(addr)) {
330 /* Memory area has to be page size aligned. For
331 * simplicity, this might change.
332 */
333 return -EINVAL;
334 }
335
336 if ((addr + size) < addr)
337 return -EINVAL;
338
Björn Töpelbbff2f32018-06-04 13:57:13 +0200339 chunks = (unsigned int)div_u64(size, chunk_size);
340 if (chunks == 0)
Björn Töpelc0c77d82018-05-02 13:01:23 +0200341 return -EINVAL;
342
Björn Töpelbbff2f32018-06-04 13:57:13 +0200343 chunks_per_page = PAGE_SIZE / chunk_size;
344 if (chunks < chunks_per_page || chunks % chunks_per_page)
Björn Töpelc0c77d82018-05-02 13:01:23 +0200345 return -EINVAL;
346
Björn Töpelbbff2f32018-06-04 13:57:13 +0200347 headroom = ALIGN(headroom, 64);
Björn Töpelc0c77d82018-05-02 13:01:23 +0200348
Björn Töpelbbff2f32018-06-04 13:57:13 +0200349 size_chk = chunk_size - headroom - XDP_PACKET_HEADROOM;
Björn Töpelc0c77d82018-05-02 13:01:23 +0200350 if (size_chk < 0)
351 return -EINVAL;
352
353 umem->pid = get_task_pid(current, PIDTYPE_PID);
Björn Töpelc0c77d82018-05-02 13:01:23 +0200354 umem->address = (unsigned long)addr;
Magnus Karlsson93ee30f2018-08-31 13:40:02 +0200355 umem->chunk_mask = ~((u64)chunk_size - 1);
356 umem->size = size;
Björn Töpelbbff2f32018-06-04 13:57:13 +0200357 umem->headroom = headroom;
358 umem->chunk_size_nohr = chunk_size - headroom;
Björn Töpelc0c77d82018-05-02 13:01:23 +0200359 umem->npgs = size / PAGE_SIZE;
360 umem->pgs = NULL;
361 umem->user = NULL;
Magnus Karlssonac98d8a2018-06-04 14:05:57 +0200362 INIT_LIST_HEAD(&umem->xsk_list);
363 spin_lock_init(&umem->xsk_list_lock);
Björn Töpelc0c77d82018-05-02 13:01:23 +0200364
Björn Töpeld3b42f12018-05-22 09:35:03 +0200365 refcount_set(&umem->users, 1);
Björn Töpelc0c77d82018-05-02 13:01:23 +0200366
367 err = xdp_umem_account_pages(umem);
368 if (err)
369 goto out;
370
371 err = xdp_umem_pin_pages(umem);
372 if (err)
373 goto out_account;
Björn Töpel8aef7342018-06-04 14:05:52 +0200374
375 umem->pages = kcalloc(umem->npgs, sizeof(*umem->pages), GFP_KERNEL);
376 if (!umem->pages) {
377 err = -ENOMEM;
378 goto out_account;
379 }
380
381 for (i = 0; i < umem->npgs; i++)
382 umem->pages[i].addr = page_address(umem->pgs[i]);
383
Björn Töpelc0c77d82018-05-02 13:01:23 +0200384 return 0;
385
386out_account:
387 xdp_umem_unaccount_pages(umem);
388out:
389 put_pid(umem->pid);
390 return err;
391}
Magnus Karlsson965a9902018-05-02 13:01:26 +0200392
Björn Töpela49049e2018-05-22 09:35:02 +0200393struct xdp_umem *xdp_umem_create(struct xdp_umem_reg *mr)
394{
395 struct xdp_umem *umem;
396 int err;
397
398 umem = kzalloc(sizeof(*umem), GFP_KERNEL);
399 if (!umem)
400 return ERR_PTR(-ENOMEM);
401
402 err = xdp_umem_reg(umem, mr);
403 if (err) {
404 kfree(umem);
405 return ERR_PTR(err);
406 }
407
408 return umem;
409}
410
Magnus Karlsson965a9902018-05-02 13:01:26 +0200411bool xdp_umem_validate_queues(struct xdp_umem *umem)
412{
Björn Töpelda60cf02018-05-18 14:00:23 +0200413 return umem->fq && umem->cq;
Magnus Karlsson965a9902018-05-02 13:01:26 +0200414}