blob: c47909c74899538dbaa5bd8d49a50898757e3eb4 [file] [log] [blame]
Björn Töpelc0c77d82018-05-02 13:01:23 +02001// SPDX-License-Identifier: GPL-2.0
2/* XDP user-space packet buffer
3 * Copyright(c) 2018 Intel Corporation.
Björn Töpelc0c77d82018-05-02 13:01:23 +02004 */
5
6#include <linux/init.h>
7#include <linux/sched/mm.h>
8#include <linux/sched/signal.h>
9#include <linux/sched/task.h>
10#include <linux/uaccess.h>
11#include <linux/slab.h>
12#include <linux/bpf.h>
13#include <linux/mm.h>
14
15#include "xdp_umem.h"
16
17#define XDP_UMEM_MIN_FRAME_SIZE 2048
18
19int xdp_umem_create(struct xdp_umem **umem)
20{
21 *umem = kzalloc(sizeof(**umem), GFP_KERNEL);
22
Björn Töpelda60cf02018-05-18 14:00:23 +020023 if (!*umem)
Björn Töpelc0c77d82018-05-02 13:01:23 +020024 return -ENOMEM;
25
26 return 0;
27}
28
29static void xdp_umem_unpin_pages(struct xdp_umem *umem)
30{
31 unsigned int i;
32
33 if (umem->pgs) {
34 for (i = 0; i < umem->npgs; i++) {
35 struct page *page = umem->pgs[i];
36
37 set_page_dirty_lock(page);
38 put_page(page);
39 }
40
41 kfree(umem->pgs);
42 umem->pgs = NULL;
43 }
44}
45
46static void xdp_umem_unaccount_pages(struct xdp_umem *umem)
47{
48 if (umem->user) {
49 atomic_long_sub(umem->npgs, &umem->user->locked_vm);
50 free_uid(umem->user);
51 }
52}
53
54static void xdp_umem_release(struct xdp_umem *umem)
55{
56 struct task_struct *task;
57 struct mm_struct *mm;
58
Magnus Karlsson423f3832018-05-02 13:01:24 +020059 if (umem->fq) {
60 xskq_destroy(umem->fq);
61 umem->fq = NULL;
62 }
63
Magnus Karlssonfe230832018-05-02 13:01:31 +020064 if (umem->cq) {
65 xskq_destroy(umem->cq);
66 umem->cq = NULL;
67 }
68
Björn Töpelc0c77d82018-05-02 13:01:23 +020069 if (umem->pgs) {
70 xdp_umem_unpin_pages(umem);
71
72 task = get_pid_task(umem->pid, PIDTYPE_PID);
73 put_pid(umem->pid);
74 if (!task)
75 goto out;
76 mm = get_task_mm(task);
77 put_task_struct(task);
78 if (!mm)
79 goto out;
80
81 mmput(mm);
82 umem->pgs = NULL;
83 }
84
85 xdp_umem_unaccount_pages(umem);
86out:
87 kfree(umem);
88}
89
90static void xdp_umem_release_deferred(struct work_struct *work)
91{
92 struct xdp_umem *umem = container_of(work, struct xdp_umem, work);
93
94 xdp_umem_release(umem);
95}
96
97void xdp_get_umem(struct xdp_umem *umem)
98{
99 atomic_inc(&umem->users);
100}
101
102void xdp_put_umem(struct xdp_umem *umem)
103{
104 if (!umem)
105 return;
106
107 if (atomic_dec_and_test(&umem->users)) {
108 INIT_WORK(&umem->work, xdp_umem_release_deferred);
109 schedule_work(&umem->work);
110 }
111}
112
113static int xdp_umem_pin_pages(struct xdp_umem *umem)
114{
115 unsigned int gup_flags = FOLL_WRITE;
116 long npgs;
117 int err;
118
119 umem->pgs = kcalloc(umem->npgs, sizeof(*umem->pgs), GFP_KERNEL);
120 if (!umem->pgs)
121 return -ENOMEM;
122
123 down_write(&current->mm->mmap_sem);
124 npgs = get_user_pages(umem->address, umem->npgs,
125 gup_flags, &umem->pgs[0], NULL);
126 up_write(&current->mm->mmap_sem);
127
128 if (npgs != umem->npgs) {
129 if (npgs >= 0) {
130 umem->npgs = npgs;
131 err = -ENOMEM;
132 goto out_pin;
133 }
134 err = npgs;
135 goto out_pgs;
136 }
137 return 0;
138
139out_pin:
140 xdp_umem_unpin_pages(umem);
141out_pgs:
142 kfree(umem->pgs);
143 umem->pgs = NULL;
144 return err;
145}
146
147static int xdp_umem_account_pages(struct xdp_umem *umem)
148{
149 unsigned long lock_limit, new_npgs, old_npgs;
150
151 if (capable(CAP_IPC_LOCK))
152 return 0;
153
154 lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
155 umem->user = get_uid(current_user());
156
157 do {
158 old_npgs = atomic_long_read(&umem->user->locked_vm);
159 new_npgs = old_npgs + umem->npgs;
160 if (new_npgs > lock_limit) {
161 free_uid(umem->user);
162 umem->user = NULL;
163 return -ENOBUFS;
164 }
165 } while (atomic_long_cmpxchg(&umem->user->locked_vm, old_npgs,
166 new_npgs) != old_npgs);
167 return 0;
168}
169
170int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
171{
172 u32 frame_size = mr->frame_size, frame_headroom = mr->frame_headroom;
173 u64 addr = mr->addr, size = mr->len;
174 unsigned int nframes, nfpp;
175 int size_chk, err;
176
177 if (!umem)
178 return -EINVAL;
179
180 if (frame_size < XDP_UMEM_MIN_FRAME_SIZE || frame_size > PAGE_SIZE) {
181 /* Strictly speaking we could support this, if:
182 * - huge pages, or*
183 * - using an IOMMU, or
184 * - making sure the memory area is consecutive
185 * but for now, we simply say "computer says no".
186 */
187 return -EINVAL;
188 }
189
190 if (!is_power_of_2(frame_size))
191 return -EINVAL;
192
193 if (!PAGE_ALIGNED(addr)) {
194 /* Memory area has to be page size aligned. For
195 * simplicity, this might change.
196 */
197 return -EINVAL;
198 }
199
200 if ((addr + size) < addr)
201 return -EINVAL;
202
Björn Töpelea7e3432018-05-07 19:43:50 +0200203 nframes = (unsigned int)div_u64(size, frame_size);
Björn Töpelc0c77d82018-05-02 13:01:23 +0200204 if (nframes == 0 || nframes > UINT_MAX)
205 return -EINVAL;
206
207 nfpp = PAGE_SIZE / frame_size;
208 if (nframes < nfpp || nframes % nfpp)
209 return -EINVAL;
210
211 frame_headroom = ALIGN(frame_headroom, 64);
212
213 size_chk = frame_size - frame_headroom - XDP_PACKET_HEADROOM;
214 if (size_chk < 0)
215 return -EINVAL;
216
217 umem->pid = get_task_pid(current, PIDTYPE_PID);
218 umem->size = (size_t)size;
219 umem->address = (unsigned long)addr;
220 umem->props.frame_size = frame_size;
221 umem->props.nframes = nframes;
222 umem->frame_headroom = frame_headroom;
223 umem->npgs = size / PAGE_SIZE;
224 umem->pgs = NULL;
225 umem->user = NULL;
226
227 umem->frame_size_log2 = ilog2(frame_size);
228 umem->nfpp_mask = nfpp - 1;
229 umem->nfpplog2 = ilog2(nfpp);
230 atomic_set(&umem->users, 1);
231
232 err = xdp_umem_account_pages(umem);
233 if (err)
234 goto out;
235
236 err = xdp_umem_pin_pages(umem);
237 if (err)
238 goto out_account;
239 return 0;
240
241out_account:
242 xdp_umem_unaccount_pages(umem);
243out:
244 put_pid(umem->pid);
245 return err;
246}
Magnus Karlsson965a9902018-05-02 13:01:26 +0200247
248bool xdp_umem_validate_queues(struct xdp_umem *umem)
249{
Björn Töpelda60cf02018-05-18 14:00:23 +0200250 return umem->fq && umem->cq;
Magnus Karlsson965a9902018-05-02 13:01:26 +0200251}