blob: 7657ede7aee211623ffbc782d29da49f0686684c [file] [log] [blame]
Alexei Starovoitovfec56f52019-11-14 10:57:04 -08001// SPDX-License-Identifier: GPL-2.0-only
2/* Copyright (c) 2019 Facebook */
3#include <linux/hash.h>
4#include <linux/bpf.h>
5#include <linux/filter.h>
Alexei Starovoitovb91e0142019-12-08 16:01:13 -08006#include <linux/ftrace.h>
Alexei Starovoitovfec56f52019-11-14 10:57:04 -08007
8/* btf_vmlinux has ~22k attachable functions. 1k htab is enough. */
9#define TRAMPOLINE_HASH_BITS 10
10#define TRAMPOLINE_TABLE_SIZE (1 << TRAMPOLINE_HASH_BITS)
11
12static struct hlist_head trampoline_table[TRAMPOLINE_TABLE_SIZE];
13
14/* serializes access to trampoline_table */
15static DEFINE_MUTEX(trampoline_mutex);
16
Björn Töpel98e86272019-12-13 18:51:07 +010017void *bpf_jit_alloc_exec_page(void)
18{
19 void *image;
20
21 image = bpf_jit_alloc_exec(PAGE_SIZE);
22 if (!image)
23 return NULL;
24
25 set_vm_flush_reset_perms(image);
26 /* Keep image as writeable. The alternative is to keep flipping ro/rw
27 * everytime new program is attached or detached.
28 */
29 set_memory_x((long)image, 1);
30 return image;
31}
32
Alexei Starovoitovfec56f52019-11-14 10:57:04 -080033struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
34{
35 struct bpf_trampoline *tr;
36 struct hlist_head *head;
37 void *image;
38 int i;
39
40 mutex_lock(&trampoline_mutex);
41 head = &trampoline_table[hash_64(key, TRAMPOLINE_HASH_BITS)];
42 hlist_for_each_entry(tr, head, hlist) {
43 if (tr->key == key) {
44 refcount_inc(&tr->refcnt);
45 goto out;
46 }
47 }
48 tr = kzalloc(sizeof(*tr), GFP_KERNEL);
49 if (!tr)
50 goto out;
51
52 /* is_root was checked earlier. No need for bpf_jit_charge_modmem() */
Björn Töpel98e86272019-12-13 18:51:07 +010053 image = bpf_jit_alloc_exec_page();
Alexei Starovoitovfec56f52019-11-14 10:57:04 -080054 if (!image) {
55 kfree(tr);
56 tr = NULL;
57 goto out;
58 }
59
60 tr->key = key;
61 INIT_HLIST_NODE(&tr->hlist);
62 hlist_add_head(&tr->hlist, head);
63 refcount_set(&tr->refcnt, 1);
64 mutex_init(&tr->mutex);
65 for (i = 0; i < BPF_TRAMP_MAX; i++)
66 INIT_HLIST_HEAD(&tr->progs_hlist[i]);
Alexei Starovoitovfec56f52019-11-14 10:57:04 -080067 tr->image = image;
68out:
69 mutex_unlock(&trampoline_mutex);
70 return tr;
71}
72
Alexei Starovoitovb91e0142019-12-08 16:01:13 -080073static int is_ftrace_location(void *ip)
74{
75 long addr;
76
77 addr = ftrace_location((long)ip);
78 if (!addr)
79 return 0;
80 if (WARN_ON_ONCE(addr != (long)ip))
81 return -EFAULT;
82 return 1;
83}
84
85static int unregister_fentry(struct bpf_trampoline *tr, void *old_addr)
86{
87 void *ip = tr->func.addr;
88 int ret;
89
90 if (tr->func.ftrace_managed)
91 ret = unregister_ftrace_direct((long)ip, (long)old_addr);
92 else
93 ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, old_addr, NULL);
94 return ret;
95}
96
97static int modify_fentry(struct bpf_trampoline *tr, void *old_addr, void *new_addr)
98{
99 void *ip = tr->func.addr;
100 int ret;
101
102 if (tr->func.ftrace_managed)
103 ret = modify_ftrace_direct((long)ip, (long)old_addr, (long)new_addr);
104 else
105 ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, old_addr, new_addr);
106 return ret;
107}
108
109/* first time registering */
110static int register_fentry(struct bpf_trampoline *tr, void *new_addr)
111{
112 void *ip = tr->func.addr;
113 int ret;
114
115 ret = is_ftrace_location(ip);
116 if (ret < 0)
117 return ret;
118 tr->func.ftrace_managed = ret;
119
120 if (tr->func.ftrace_managed)
121 ret = register_ftrace_direct((long)ip, (long)new_addr);
122 else
123 ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, NULL, new_addr);
124 return ret;
125}
126
Alexei Starovoitovfec56f52019-11-14 10:57:04 -0800127/* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50
128 * bytes on x86. Pick a number to fit into PAGE_SIZE / 2
129 */
130#define BPF_MAX_TRAMP_PROGS 40
131
132static int bpf_trampoline_update(struct bpf_trampoline *tr)
133{
134 void *old_image = tr->image + ((tr->selector + 1) & 1) * PAGE_SIZE/2;
135 void *new_image = tr->image + (tr->selector & 1) * PAGE_SIZE/2;
136 struct bpf_prog *progs_to_run[BPF_MAX_TRAMP_PROGS];
137 int fentry_cnt = tr->progs_cnt[BPF_TRAMP_FENTRY];
138 int fexit_cnt = tr->progs_cnt[BPF_TRAMP_FEXIT];
139 struct bpf_prog **progs, **fentry, **fexit;
140 u32 flags = BPF_TRAMP_F_RESTORE_REGS;
141 struct bpf_prog_aux *aux;
142 int err;
143
144 if (fentry_cnt + fexit_cnt == 0) {
Alexei Starovoitovb91e0142019-12-08 16:01:13 -0800145 err = unregister_fentry(tr, old_image);
Alexei Starovoitovfec56f52019-11-14 10:57:04 -0800146 tr->selector = 0;
147 goto out;
148 }
149
150 /* populate fentry progs */
151 fentry = progs = progs_to_run;
152 hlist_for_each_entry(aux, &tr->progs_hlist[BPF_TRAMP_FENTRY], tramp_hlist)
153 *progs++ = aux->prog;
154
155 /* populate fexit progs */
156 fexit = progs;
157 hlist_for_each_entry(aux, &tr->progs_hlist[BPF_TRAMP_FEXIT], tramp_hlist)
158 *progs++ = aux->prog;
159
160 if (fexit_cnt)
161 flags = BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_SKIP_FRAME;
162
Alexei Starovoitov05d57f12020-01-20 19:22:31 -0800163 /* Though the second half of trampoline page is unused a task could be
164 * preempted in the middle of the first half of trampoline and two
165 * updates to trampoline would change the code from underneath the
166 * preempted task. Hence wait for tasks to voluntarily schedule or go
167 * to userspace.
168 */
169 synchronize_rcu_tasks();
170
Martin KaFai Lau85d33df2020-01-08 16:35:05 -0800171 err = arch_prepare_bpf_trampoline(new_image, new_image + PAGE_SIZE / 2,
172 &tr->func.model, flags,
Alexei Starovoitovfec56f52019-11-14 10:57:04 -0800173 fentry, fentry_cnt,
174 fexit, fexit_cnt,
175 tr->func.addr);
Martin KaFai Lau85d33df2020-01-08 16:35:05 -0800176 if (err < 0)
Alexei Starovoitovfec56f52019-11-14 10:57:04 -0800177 goto out;
178
179 if (tr->selector)
180 /* progs already running at this address */
Alexei Starovoitovb91e0142019-12-08 16:01:13 -0800181 err = modify_fentry(tr, old_image, new_image);
Alexei Starovoitovfec56f52019-11-14 10:57:04 -0800182 else
183 /* first time registering */
Alexei Starovoitovb91e0142019-12-08 16:01:13 -0800184 err = register_fentry(tr, new_image);
Alexei Starovoitovfec56f52019-11-14 10:57:04 -0800185 if (err)
186 goto out;
187 tr->selector++;
188out:
189 return err;
190}
191
192static enum bpf_tramp_prog_type bpf_attach_type_to_tramp(enum bpf_attach_type t)
193{
194 switch (t) {
195 case BPF_TRACE_FENTRY:
196 return BPF_TRAMP_FENTRY;
197 default:
198 return BPF_TRAMP_FEXIT;
199 }
200}
201
202int bpf_trampoline_link_prog(struct bpf_prog *prog)
203{
204 enum bpf_tramp_prog_type kind;
205 struct bpf_trampoline *tr;
206 int err = 0;
207
208 tr = prog->aux->trampoline;
209 kind = bpf_attach_type_to_tramp(prog->expected_attach_type);
210 mutex_lock(&tr->mutex);
211 if (tr->progs_cnt[BPF_TRAMP_FENTRY] + tr->progs_cnt[BPF_TRAMP_FEXIT]
212 >= BPF_MAX_TRAMP_PROGS) {
213 err = -E2BIG;
214 goto out;
215 }
216 if (!hlist_unhashed(&prog->aux->tramp_hlist)) {
217 /* prog already linked */
218 err = -EBUSY;
219 goto out;
220 }
221 hlist_add_head(&prog->aux->tramp_hlist, &tr->progs_hlist[kind]);
222 tr->progs_cnt[kind]++;
223 err = bpf_trampoline_update(prog->aux->trampoline);
224 if (err) {
225 hlist_del(&prog->aux->tramp_hlist);
226 tr->progs_cnt[kind]--;
227 }
228out:
229 mutex_unlock(&tr->mutex);
230 return err;
231}
232
233/* bpf_trampoline_unlink_prog() should never fail. */
234int bpf_trampoline_unlink_prog(struct bpf_prog *prog)
235{
236 enum bpf_tramp_prog_type kind;
237 struct bpf_trampoline *tr;
238 int err;
239
240 tr = prog->aux->trampoline;
241 kind = bpf_attach_type_to_tramp(prog->expected_attach_type);
242 mutex_lock(&tr->mutex);
243 hlist_del(&prog->aux->tramp_hlist);
244 tr->progs_cnt[kind]--;
245 err = bpf_trampoline_update(prog->aux->trampoline);
246 mutex_unlock(&tr->mutex);
247 return err;
248}
249
250void bpf_trampoline_put(struct bpf_trampoline *tr)
251{
252 if (!tr)
253 return;
254 mutex_lock(&trampoline_mutex);
255 if (!refcount_dec_and_test(&tr->refcnt))
256 goto out;
257 WARN_ON_ONCE(mutex_is_locked(&tr->mutex));
258 if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[BPF_TRAMP_FENTRY])))
259 goto out;
260 if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[BPF_TRAMP_FEXIT])))
261 goto out;
Alexei Starovoitov05d57f12020-01-20 19:22:31 -0800262 /* wait for tasks to get out of trampoline before freeing it */
263 synchronize_rcu_tasks();
Alexei Starovoitovfec56f52019-11-14 10:57:04 -0800264 bpf_jit_free_exec(tr->image);
265 hlist_del(&tr->hlist);
266 kfree(tr);
267out:
268 mutex_unlock(&trampoline_mutex);
269}
270
271/* The logic is similar to BPF_PROG_RUN, but with explicit rcu and preempt that
272 * are needed for trampoline. The macro is split into
273 * call _bpf_prog_enter
274 * call prog->bpf_func
275 * call __bpf_prog_exit
276 */
277u64 notrace __bpf_prog_enter(void)
278{
279 u64 start = 0;
280
281 rcu_read_lock();
282 preempt_disable();
283 if (static_branch_unlikely(&bpf_stats_enabled_key))
284 start = sched_clock();
285 return start;
286}
287
288void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start)
289{
290 struct bpf_prog_stats *stats;
291
292 if (static_branch_unlikely(&bpf_stats_enabled_key) &&
293 /* static_key could be enabled in __bpf_prog_enter
294 * and disabled in __bpf_prog_exit.
295 * And vice versa.
296 * Hence check that 'start' is not zero.
297 */
298 start) {
299 stats = this_cpu_ptr(prog->aux->stats);
300 u64_stats_update_begin(&stats->syncp);
301 stats->cnt++;
302 stats->nsecs += sched_clock() - start;
303 u64_stats_update_end(&stats->syncp);
304 }
305 preempt_enable();
306 rcu_read_unlock();
307}
308
309int __weak
Martin KaFai Lau85d33df2020-01-08 16:35:05 -0800310arch_prepare_bpf_trampoline(void *image, void *image_end,
311 const struct btf_func_model *m, u32 flags,
Alexei Starovoitovfec56f52019-11-14 10:57:04 -0800312 struct bpf_prog **fentry_progs, int fentry_cnt,
313 struct bpf_prog **fexit_progs, int fexit_cnt,
314 void *orig_call)
315{
316 return -ENOTSUPP;
317}
318
319static int __init init_trampolines(void)
320{
321 int i;
322
323 for (i = 0; i < TRAMPOLINE_TABLE_SIZE; i++)
324 INIT_HLIST_HEAD(&trampoline_table[i]);
325 return 0;
326}
327late_initcall(init_trampolines);