Alexei Starovoitov | fec56f5 | 2019-11-14 10:57:04 -0800 | [diff] [blame] | 1 | // SPDX-License-Identifier: GPL-2.0-only |
| 2 | /* Copyright (c) 2019 Facebook */ |
| 3 | #include <linux/hash.h> |
| 4 | #include <linux/bpf.h> |
| 5 | #include <linux/filter.h> |
Alexei Starovoitov | b91e014 | 2019-12-08 16:01:13 -0800 | [diff] [blame] | 6 | #include <linux/ftrace.h> |
Jiri Olsa | e9b4e60 | 2020-01-23 17:15:07 +0100 | [diff] [blame] | 7 | #include <linux/rbtree_latch.h> |
Jiri Olsa | a108f7d | 2020-03-12 20:56:05 +0100 | [diff] [blame] | 8 | #include <linux/perf_event.h> |
KP Singh | 9e4e01d | 2020-03-29 01:43:52 +0100 | [diff] [blame] | 9 | #include <linux/btf.h> |
Alexei Starovoitov | 1e6c62a | 2020-08-27 15:01:11 -0700 | [diff] [blame] | 10 | #include <linux/rcupdate_trace.h> |
| 11 | #include <linux/rcupdate_wait.h> |
Jiri Olsa | 861de02 | 2021-03-26 11:59:00 +0100 | [diff] [blame] | 12 | #include <linux/module.h> |
Alexei Starovoitov | fec56f5 | 2019-11-14 10:57:04 -0800 | [diff] [blame] | 13 | |
Alexei Starovoitov | be8704f | 2020-01-20 16:53:46 -0800 | [diff] [blame] | 14 | /* dummy _ops. The verifier will operate on target program's ops. */ |
| 15 | const struct bpf_verifier_ops bpf_extension_verifier_ops = { |
| 16 | }; |
| 17 | const struct bpf_prog_ops bpf_extension_prog_ops = { |
| 18 | }; |
| 19 | |
Alexei Starovoitov | fec56f5 | 2019-11-14 10:57:04 -0800 | [diff] [blame] | 20 | /* btf_vmlinux has ~22k attachable functions. 1k htab is enough. */ |
| 21 | #define TRAMPOLINE_HASH_BITS 10 |
| 22 | #define TRAMPOLINE_TABLE_SIZE (1 << TRAMPOLINE_HASH_BITS) |
| 23 | |
| 24 | static struct hlist_head trampoline_table[TRAMPOLINE_TABLE_SIZE]; |
| 25 | |
Jiri Olsa | 7ac88eb | 2020-03-12 20:56:07 +0100 | [diff] [blame] | 26 | /* serializes access to trampoline_table */ |
Alexei Starovoitov | fec56f5 | 2019-11-14 10:57:04 -0800 | [diff] [blame] | 27 | static DEFINE_MUTEX(trampoline_mutex); |
| 28 | |
Jiri Olsa | 7ac88eb | 2020-03-12 20:56:07 +0100 | [diff] [blame] | 29 | void *bpf_jit_alloc_exec_page(void) |
Björn Töpel | 98e8627 | 2019-12-13 18:51:07 +0100 | [diff] [blame] | 30 | { |
| 31 | void *image; |
| 32 | |
| 33 | image = bpf_jit_alloc_exec(PAGE_SIZE); |
| 34 | if (!image) |
| 35 | return NULL; |
| 36 | |
| 37 | set_vm_flush_reset_perms(image); |
| 38 | /* Keep image as writeable. The alternative is to keep flipping ro/rw |
| 39 | * everytime new program is attached or detached. |
| 40 | */ |
| 41 | set_memory_x((long)image, 1); |
| 42 | return image; |
| 43 | } |
| 44 | |
Jiri Olsa | a108f7d | 2020-03-12 20:56:05 +0100 | [diff] [blame] | 45 | void bpf_image_ksym_add(void *data, struct bpf_ksym *ksym) |
| 46 | { |
| 47 | ksym->start = (unsigned long) data; |
Jiri Olsa | 7ac88eb | 2020-03-12 20:56:07 +0100 | [diff] [blame] | 48 | ksym->end = ksym->start + PAGE_SIZE; |
Jiri Olsa | a108f7d | 2020-03-12 20:56:05 +0100 | [diff] [blame] | 49 | bpf_ksym_add(ksym); |
| 50 | perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_BPF, ksym->start, |
Jiri Olsa | 7ac88eb | 2020-03-12 20:56:07 +0100 | [diff] [blame] | 51 | PAGE_SIZE, false, ksym->name); |
Jiri Olsa | a108f7d | 2020-03-12 20:56:05 +0100 | [diff] [blame] | 52 | } |
| 53 | |
| 54 | void bpf_image_ksym_del(struct bpf_ksym *ksym) |
| 55 | { |
| 56 | bpf_ksym_del(ksym); |
| 57 | perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_BPF, ksym->start, |
Jiri Olsa | 7ac88eb | 2020-03-12 20:56:07 +0100 | [diff] [blame] | 58 | PAGE_SIZE, true, ksym->name); |
Jiri Olsa | a108f7d | 2020-03-12 20:56:05 +0100 | [diff] [blame] | 59 | } |
| 60 | |
Toke Høiland-Jørgensen | f7b12b6 | 2020-09-25 23:25:02 +0200 | [diff] [blame] | 61 | static struct bpf_trampoline *bpf_trampoline_lookup(u64 key) |
Alexei Starovoitov | fec56f5 | 2019-11-14 10:57:04 -0800 | [diff] [blame] | 62 | { |
| 63 | struct bpf_trampoline *tr; |
| 64 | struct hlist_head *head; |
Alexei Starovoitov | fec56f5 | 2019-11-14 10:57:04 -0800 | [diff] [blame] | 65 | int i; |
| 66 | |
| 67 | mutex_lock(&trampoline_mutex); |
| 68 | head = &trampoline_table[hash_64(key, TRAMPOLINE_HASH_BITS)]; |
| 69 | hlist_for_each_entry(tr, head, hlist) { |
| 70 | if (tr->key == key) { |
| 71 | refcount_inc(&tr->refcnt); |
| 72 | goto out; |
| 73 | } |
| 74 | } |
| 75 | tr = kzalloc(sizeof(*tr), GFP_KERNEL); |
| 76 | if (!tr) |
| 77 | goto out; |
| 78 | |
Alexei Starovoitov | fec56f5 | 2019-11-14 10:57:04 -0800 | [diff] [blame] | 79 | tr->key = key; |
| 80 | INIT_HLIST_NODE(&tr->hlist); |
| 81 | hlist_add_head(&tr->hlist, head); |
| 82 | refcount_set(&tr->refcnt, 1); |
| 83 | mutex_init(&tr->mutex); |
| 84 | for (i = 0; i < BPF_TRAMP_MAX; i++) |
| 85 | INIT_HLIST_HEAD(&tr->progs_hlist[i]); |
Alexei Starovoitov | fec56f5 | 2019-11-14 10:57:04 -0800 | [diff] [blame] | 86 | out: |
| 87 | mutex_unlock(&trampoline_mutex); |
| 88 | return tr; |
| 89 | } |
| 90 | |
Jiri Olsa | 861de02 | 2021-03-26 11:59:00 +0100 | [diff] [blame] | 91 | static int bpf_trampoline_module_get(struct bpf_trampoline *tr) |
| 92 | { |
| 93 | struct module *mod; |
| 94 | int err = 0; |
| 95 | |
| 96 | preempt_disable(); |
| 97 | mod = __module_text_address((unsigned long) tr->func.addr); |
| 98 | if (mod && !try_module_get(mod)) |
| 99 | err = -ENOENT; |
| 100 | preempt_enable(); |
| 101 | tr->mod = mod; |
| 102 | return err; |
| 103 | } |
| 104 | |
| 105 | static void bpf_trampoline_module_put(struct bpf_trampoline *tr) |
| 106 | { |
| 107 | module_put(tr->mod); |
| 108 | tr->mod = NULL; |
| 109 | } |
| 110 | |
Alexei Starovoitov | b91e014 | 2019-12-08 16:01:13 -0800 | [diff] [blame] | 111 | static int is_ftrace_location(void *ip) |
| 112 | { |
| 113 | long addr; |
| 114 | |
| 115 | addr = ftrace_location((long)ip); |
| 116 | if (!addr) |
| 117 | return 0; |
| 118 | if (WARN_ON_ONCE(addr != (long)ip)) |
| 119 | return -EFAULT; |
| 120 | return 1; |
| 121 | } |
| 122 | |
| 123 | static int unregister_fentry(struct bpf_trampoline *tr, void *old_addr) |
| 124 | { |
| 125 | void *ip = tr->func.addr; |
| 126 | int ret; |
| 127 | |
| 128 | if (tr->func.ftrace_managed) |
| 129 | ret = unregister_ftrace_direct((long)ip, (long)old_addr); |
| 130 | else |
| 131 | ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, old_addr, NULL); |
Jiri Olsa | 861de02 | 2021-03-26 11:59:00 +0100 | [diff] [blame] | 132 | |
| 133 | if (!ret) |
| 134 | bpf_trampoline_module_put(tr); |
Alexei Starovoitov | b91e014 | 2019-12-08 16:01:13 -0800 | [diff] [blame] | 135 | return ret; |
| 136 | } |
| 137 | |
| 138 | static int modify_fentry(struct bpf_trampoline *tr, void *old_addr, void *new_addr) |
| 139 | { |
| 140 | void *ip = tr->func.addr; |
| 141 | int ret; |
| 142 | |
| 143 | if (tr->func.ftrace_managed) |
| 144 | ret = modify_ftrace_direct((long)ip, (long)old_addr, (long)new_addr); |
| 145 | else |
| 146 | ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, old_addr, new_addr); |
| 147 | return ret; |
| 148 | } |
| 149 | |
| 150 | /* first time registering */ |
| 151 | static int register_fentry(struct bpf_trampoline *tr, void *new_addr) |
| 152 | { |
| 153 | void *ip = tr->func.addr; |
| 154 | int ret; |
| 155 | |
| 156 | ret = is_ftrace_location(ip); |
| 157 | if (ret < 0) |
| 158 | return ret; |
| 159 | tr->func.ftrace_managed = ret; |
| 160 | |
Jiri Olsa | 861de02 | 2021-03-26 11:59:00 +0100 | [diff] [blame] | 161 | if (bpf_trampoline_module_get(tr)) |
| 162 | return -ENOENT; |
| 163 | |
Alexei Starovoitov | b91e014 | 2019-12-08 16:01:13 -0800 | [diff] [blame] | 164 | if (tr->func.ftrace_managed) |
| 165 | ret = register_ftrace_direct((long)ip, (long)new_addr); |
| 166 | else |
| 167 | ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, NULL, new_addr); |
Jiri Olsa | 861de02 | 2021-03-26 11:59:00 +0100 | [diff] [blame] | 168 | |
| 169 | if (ret) |
| 170 | bpf_trampoline_module_put(tr); |
Alexei Starovoitov | b91e014 | 2019-12-08 16:01:13 -0800 | [diff] [blame] | 171 | return ret; |
| 172 | } |
| 173 | |
KP Singh | 88fd9e5 | 2020-03-04 20:18:47 +0100 | [diff] [blame] | 174 | static struct bpf_tramp_progs * |
| 175 | bpf_trampoline_get_progs(const struct bpf_trampoline *tr, int *total) |
| 176 | { |
| 177 | const struct bpf_prog_aux *aux; |
| 178 | struct bpf_tramp_progs *tprogs; |
| 179 | struct bpf_prog **progs; |
| 180 | int kind; |
| 181 | |
| 182 | *total = 0; |
| 183 | tprogs = kcalloc(BPF_TRAMP_MAX, sizeof(*tprogs), GFP_KERNEL); |
| 184 | if (!tprogs) |
| 185 | return ERR_PTR(-ENOMEM); |
| 186 | |
| 187 | for (kind = 0; kind < BPF_TRAMP_MAX; kind++) { |
| 188 | tprogs[kind].nr_progs = tr->progs_cnt[kind]; |
| 189 | *total += tr->progs_cnt[kind]; |
| 190 | progs = tprogs[kind].progs; |
| 191 | |
| 192 | hlist_for_each_entry(aux, &tr->progs_hlist[kind], tramp_hlist) |
| 193 | *progs++ = aux->prog; |
| 194 | } |
| 195 | return tprogs; |
| 196 | } |
Alexei Starovoitov | fec56f5 | 2019-11-14 10:57:04 -0800 | [diff] [blame] | 197 | |
Alexei Starovoitov | e21aa34 | 2021-03-16 14:00:07 -0700 | [diff] [blame] | 198 | static void __bpf_tramp_image_put_deferred(struct work_struct *work) |
| 199 | { |
| 200 | struct bpf_tramp_image *im; |
| 201 | |
| 202 | im = container_of(work, struct bpf_tramp_image, work); |
| 203 | bpf_image_ksym_del(&im->ksym); |
| 204 | bpf_jit_free_exec(im->image); |
| 205 | bpf_jit_uncharge_modmem(1); |
| 206 | percpu_ref_exit(&im->pcref); |
| 207 | kfree_rcu(im, rcu); |
| 208 | } |
| 209 | |
| 210 | /* callback, fexit step 3 or fentry step 2 */ |
| 211 | static void __bpf_tramp_image_put_rcu(struct rcu_head *rcu) |
| 212 | { |
| 213 | struct bpf_tramp_image *im; |
| 214 | |
| 215 | im = container_of(rcu, struct bpf_tramp_image, rcu); |
| 216 | INIT_WORK(&im->work, __bpf_tramp_image_put_deferred); |
| 217 | schedule_work(&im->work); |
| 218 | } |
| 219 | |
| 220 | /* callback, fexit step 2. Called after percpu_ref_kill confirms. */ |
| 221 | static void __bpf_tramp_image_release(struct percpu_ref *pcref) |
| 222 | { |
| 223 | struct bpf_tramp_image *im; |
| 224 | |
| 225 | im = container_of(pcref, struct bpf_tramp_image, pcref); |
| 226 | call_rcu_tasks(&im->rcu, __bpf_tramp_image_put_rcu); |
| 227 | } |
| 228 | |
| 229 | /* callback, fexit or fentry step 1 */ |
| 230 | static void __bpf_tramp_image_put_rcu_tasks(struct rcu_head *rcu) |
| 231 | { |
| 232 | struct bpf_tramp_image *im; |
| 233 | |
| 234 | im = container_of(rcu, struct bpf_tramp_image, rcu); |
| 235 | if (im->ip_after_call) |
| 236 | /* the case of fmod_ret/fexit trampoline and CONFIG_PREEMPTION=y */ |
| 237 | percpu_ref_kill(&im->pcref); |
| 238 | else |
| 239 | /* the case of fentry trampoline */ |
| 240 | call_rcu_tasks(&im->rcu, __bpf_tramp_image_put_rcu); |
| 241 | } |
| 242 | |
| 243 | static void bpf_tramp_image_put(struct bpf_tramp_image *im) |
| 244 | { |
| 245 | /* The trampoline image that calls original function is using: |
| 246 | * rcu_read_lock_trace to protect sleepable bpf progs |
| 247 | * rcu_read_lock to protect normal bpf progs |
| 248 | * percpu_ref to protect trampoline itself |
| 249 | * rcu tasks to protect trampoline asm not covered by percpu_ref |
| 250 | * (which are few asm insns before __bpf_tramp_enter and |
| 251 | * after __bpf_tramp_exit) |
| 252 | * |
| 253 | * The trampoline is unreachable before bpf_tramp_image_put(). |
| 254 | * |
| 255 | * First, patch the trampoline to avoid calling into fexit progs. |
| 256 | * The progs will be freed even if the original function is still |
| 257 | * executing or sleeping. |
| 258 | * In case of CONFIG_PREEMPT=y use call_rcu_tasks() to wait on |
| 259 | * first few asm instructions to execute and call into |
| 260 | * __bpf_tramp_enter->percpu_ref_get. |
| 261 | * Then use percpu_ref_kill to wait for the trampoline and the original |
| 262 | * function to finish. |
| 263 | * Then use call_rcu_tasks() to make sure few asm insns in |
| 264 | * the trampoline epilogue are done as well. |
| 265 | * |
| 266 | * In !PREEMPT case the task that got interrupted in the first asm |
| 267 | * insns won't go through an RCU quiescent state which the |
| 268 | * percpu_ref_kill will be waiting for. Hence the first |
| 269 | * call_rcu_tasks() is not necessary. |
| 270 | */ |
| 271 | if (im->ip_after_call) { |
| 272 | int err = bpf_arch_text_poke(im->ip_after_call, BPF_MOD_JUMP, |
| 273 | NULL, im->ip_epilogue); |
| 274 | WARN_ON(err); |
| 275 | if (IS_ENABLED(CONFIG_PREEMPTION)) |
| 276 | call_rcu_tasks(&im->rcu, __bpf_tramp_image_put_rcu_tasks); |
| 277 | else |
| 278 | percpu_ref_kill(&im->pcref); |
| 279 | return; |
| 280 | } |
| 281 | |
| 282 | /* The trampoline without fexit and fmod_ret progs doesn't call original |
| 283 | * function and doesn't use percpu_ref. |
| 284 | * Use call_rcu_tasks_trace() to wait for sleepable progs to finish. |
| 285 | * Then use call_rcu_tasks() to wait for the rest of trampoline asm |
| 286 | * and normal progs. |
| 287 | */ |
| 288 | call_rcu_tasks_trace(&im->rcu, __bpf_tramp_image_put_rcu_tasks); |
| 289 | } |
| 290 | |
| 291 | static struct bpf_tramp_image *bpf_tramp_image_alloc(u64 key, u32 idx) |
| 292 | { |
| 293 | struct bpf_tramp_image *im; |
| 294 | struct bpf_ksym *ksym; |
| 295 | void *image; |
| 296 | int err = -ENOMEM; |
| 297 | |
| 298 | im = kzalloc(sizeof(*im), GFP_KERNEL); |
| 299 | if (!im) |
| 300 | goto out; |
| 301 | |
| 302 | err = bpf_jit_charge_modmem(1); |
| 303 | if (err) |
| 304 | goto out_free_im; |
| 305 | |
| 306 | err = -ENOMEM; |
| 307 | im->image = image = bpf_jit_alloc_exec_page(); |
| 308 | if (!image) |
| 309 | goto out_uncharge; |
| 310 | |
| 311 | err = percpu_ref_init(&im->pcref, __bpf_tramp_image_release, 0, GFP_KERNEL); |
| 312 | if (err) |
| 313 | goto out_free_image; |
| 314 | |
| 315 | ksym = &im->ksym; |
| 316 | INIT_LIST_HEAD_RCU(&ksym->lnode); |
| 317 | snprintf(ksym->name, KSYM_NAME_LEN, "bpf_trampoline_%llu_%u", key, idx); |
| 318 | bpf_image_ksym_add(image, ksym); |
| 319 | return im; |
| 320 | |
| 321 | out_free_image: |
| 322 | bpf_jit_free_exec(im->image); |
| 323 | out_uncharge: |
| 324 | bpf_jit_uncharge_modmem(1); |
| 325 | out_free_im: |
| 326 | kfree(im); |
| 327 | out: |
| 328 | return ERR_PTR(err); |
| 329 | } |
| 330 | |
Alexei Starovoitov | fec56f5 | 2019-11-14 10:57:04 -0800 | [diff] [blame] | 331 | static int bpf_trampoline_update(struct bpf_trampoline *tr) |
| 332 | { |
Alexei Starovoitov | e21aa34 | 2021-03-16 14:00:07 -0700 | [diff] [blame] | 333 | struct bpf_tramp_image *im; |
KP Singh | 88fd9e5 | 2020-03-04 20:18:47 +0100 | [diff] [blame] | 334 | struct bpf_tramp_progs *tprogs; |
Alexei Starovoitov | fec56f5 | 2019-11-14 10:57:04 -0800 | [diff] [blame] | 335 | u32 flags = BPF_TRAMP_F_RESTORE_REGS; |
KP Singh | 88fd9e5 | 2020-03-04 20:18:47 +0100 | [diff] [blame] | 336 | int err, total; |
Alexei Starovoitov | fec56f5 | 2019-11-14 10:57:04 -0800 | [diff] [blame] | 337 | |
KP Singh | 88fd9e5 | 2020-03-04 20:18:47 +0100 | [diff] [blame] | 338 | tprogs = bpf_trampoline_get_progs(tr, &total); |
| 339 | if (IS_ERR(tprogs)) |
| 340 | return PTR_ERR(tprogs); |
| 341 | |
| 342 | if (total == 0) { |
Alexei Starovoitov | e21aa34 | 2021-03-16 14:00:07 -0700 | [diff] [blame] | 343 | err = unregister_fentry(tr, tr->cur_image->image); |
| 344 | bpf_tramp_image_put(tr->cur_image); |
| 345 | tr->cur_image = NULL; |
Alexei Starovoitov | fec56f5 | 2019-11-14 10:57:04 -0800 | [diff] [blame] | 346 | tr->selector = 0; |
| 347 | goto out; |
| 348 | } |
| 349 | |
Alexei Starovoitov | e21aa34 | 2021-03-16 14:00:07 -0700 | [diff] [blame] | 350 | im = bpf_tramp_image_alloc(tr->key, tr->selector); |
| 351 | if (IS_ERR(im)) { |
| 352 | err = PTR_ERR(im); |
| 353 | goto out; |
| 354 | } |
| 355 | |
KP Singh | ae24082 | 2020-03-04 20:18:49 +0100 | [diff] [blame] | 356 | if (tprogs[BPF_TRAMP_FEXIT].nr_progs || |
| 357 | tprogs[BPF_TRAMP_MODIFY_RETURN].nr_progs) |
Alexei Starovoitov | fec56f5 | 2019-11-14 10:57:04 -0800 | [diff] [blame] | 358 | flags = BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_SKIP_FRAME; |
| 359 | |
Alexei Starovoitov | e21aa34 | 2021-03-16 14:00:07 -0700 | [diff] [blame] | 360 | err = arch_prepare_bpf_trampoline(im, im->image, im->image + PAGE_SIZE, |
KP Singh | 88fd9e5 | 2020-03-04 20:18:47 +0100 | [diff] [blame] | 361 | &tr->func.model, flags, tprogs, |
Alexei Starovoitov | fec56f5 | 2019-11-14 10:57:04 -0800 | [diff] [blame] | 362 | tr->func.addr); |
Martin KaFai Lau | 85d33df | 2020-01-08 16:35:05 -0800 | [diff] [blame] | 363 | if (err < 0) |
Alexei Starovoitov | fec56f5 | 2019-11-14 10:57:04 -0800 | [diff] [blame] | 364 | goto out; |
| 365 | |
Alexei Starovoitov | e21aa34 | 2021-03-16 14:00:07 -0700 | [diff] [blame] | 366 | WARN_ON(tr->cur_image && tr->selector == 0); |
| 367 | WARN_ON(!tr->cur_image && tr->selector); |
| 368 | if (tr->cur_image) |
Alexei Starovoitov | fec56f5 | 2019-11-14 10:57:04 -0800 | [diff] [blame] | 369 | /* progs already running at this address */ |
Alexei Starovoitov | e21aa34 | 2021-03-16 14:00:07 -0700 | [diff] [blame] | 370 | err = modify_fentry(tr, tr->cur_image->image, im->image); |
Alexei Starovoitov | fec56f5 | 2019-11-14 10:57:04 -0800 | [diff] [blame] | 371 | else |
| 372 | /* first time registering */ |
Alexei Starovoitov | e21aa34 | 2021-03-16 14:00:07 -0700 | [diff] [blame] | 373 | err = register_fentry(tr, im->image); |
Alexei Starovoitov | fec56f5 | 2019-11-14 10:57:04 -0800 | [diff] [blame] | 374 | if (err) |
| 375 | goto out; |
Alexei Starovoitov | e21aa34 | 2021-03-16 14:00:07 -0700 | [diff] [blame] | 376 | if (tr->cur_image) |
| 377 | bpf_tramp_image_put(tr->cur_image); |
| 378 | tr->cur_image = im; |
Alexei Starovoitov | fec56f5 | 2019-11-14 10:57:04 -0800 | [diff] [blame] | 379 | tr->selector++; |
| 380 | out: |
KP Singh | 88fd9e5 | 2020-03-04 20:18:47 +0100 | [diff] [blame] | 381 | kfree(tprogs); |
Alexei Starovoitov | fec56f5 | 2019-11-14 10:57:04 -0800 | [diff] [blame] | 382 | return err; |
| 383 | } |
| 384 | |
KP Singh | 9e4e01d | 2020-03-29 01:43:52 +0100 | [diff] [blame] | 385 | static enum bpf_tramp_prog_type bpf_attach_type_to_tramp(struct bpf_prog *prog) |
Alexei Starovoitov | fec56f5 | 2019-11-14 10:57:04 -0800 | [diff] [blame] | 386 | { |
KP Singh | 9e4e01d | 2020-03-29 01:43:52 +0100 | [diff] [blame] | 387 | switch (prog->expected_attach_type) { |
Alexei Starovoitov | fec56f5 | 2019-11-14 10:57:04 -0800 | [diff] [blame] | 388 | case BPF_TRACE_FENTRY: |
| 389 | return BPF_TRAMP_FENTRY; |
KP Singh | ae24082 | 2020-03-04 20:18:49 +0100 | [diff] [blame] | 390 | case BPF_MODIFY_RETURN: |
| 391 | return BPF_TRAMP_MODIFY_RETURN; |
Alexei Starovoitov | be8704f | 2020-01-20 16:53:46 -0800 | [diff] [blame] | 392 | case BPF_TRACE_FEXIT: |
Alexei Starovoitov | fec56f5 | 2019-11-14 10:57:04 -0800 | [diff] [blame] | 393 | return BPF_TRAMP_FEXIT; |
KP Singh | 9e4e01d | 2020-03-29 01:43:52 +0100 | [diff] [blame] | 394 | case BPF_LSM_MAC: |
| 395 | if (!prog->aux->attach_func_proto->type) |
| 396 | /* The function returns void, we cannot modify its |
| 397 | * return value. |
| 398 | */ |
| 399 | return BPF_TRAMP_FEXIT; |
| 400 | else |
| 401 | return BPF_TRAMP_MODIFY_RETURN; |
Alexei Starovoitov | be8704f | 2020-01-20 16:53:46 -0800 | [diff] [blame] | 402 | default: |
| 403 | return BPF_TRAMP_REPLACE; |
Alexei Starovoitov | fec56f5 | 2019-11-14 10:57:04 -0800 | [diff] [blame] | 404 | } |
| 405 | } |
| 406 | |
Toke Høiland-Jørgensen | 3aac1ea | 2020-09-29 14:45:50 +0200 | [diff] [blame] | 407 | int bpf_trampoline_link_prog(struct bpf_prog *prog, struct bpf_trampoline *tr) |
Alexei Starovoitov | fec56f5 | 2019-11-14 10:57:04 -0800 | [diff] [blame] | 408 | { |
| 409 | enum bpf_tramp_prog_type kind; |
Alexei Starovoitov | fec56f5 | 2019-11-14 10:57:04 -0800 | [diff] [blame] | 410 | int err = 0; |
Alexei Starovoitov | be8704f | 2020-01-20 16:53:46 -0800 | [diff] [blame] | 411 | int cnt; |
Alexei Starovoitov | fec56f5 | 2019-11-14 10:57:04 -0800 | [diff] [blame] | 412 | |
KP Singh | 9e4e01d | 2020-03-29 01:43:52 +0100 | [diff] [blame] | 413 | kind = bpf_attach_type_to_tramp(prog); |
Alexei Starovoitov | fec56f5 | 2019-11-14 10:57:04 -0800 | [diff] [blame] | 414 | mutex_lock(&tr->mutex); |
Alexei Starovoitov | be8704f | 2020-01-20 16:53:46 -0800 | [diff] [blame] | 415 | if (tr->extension_prog) { |
| 416 | /* cannot attach fentry/fexit if extension prog is attached. |
| 417 | * cannot overwrite extension prog either. |
| 418 | */ |
| 419 | err = -EBUSY; |
| 420 | goto out; |
| 421 | } |
| 422 | cnt = tr->progs_cnt[BPF_TRAMP_FENTRY] + tr->progs_cnt[BPF_TRAMP_FEXIT]; |
| 423 | if (kind == BPF_TRAMP_REPLACE) { |
| 424 | /* Cannot attach extension if fentry/fexit are in use. */ |
| 425 | if (cnt) { |
| 426 | err = -EBUSY; |
| 427 | goto out; |
| 428 | } |
| 429 | tr->extension_prog = prog; |
| 430 | err = bpf_arch_text_poke(tr->func.addr, BPF_MOD_JUMP, NULL, |
| 431 | prog->bpf_func); |
| 432 | goto out; |
| 433 | } |
| 434 | if (cnt >= BPF_MAX_TRAMP_PROGS) { |
Alexei Starovoitov | fec56f5 | 2019-11-14 10:57:04 -0800 | [diff] [blame] | 435 | err = -E2BIG; |
| 436 | goto out; |
| 437 | } |
| 438 | if (!hlist_unhashed(&prog->aux->tramp_hlist)) { |
| 439 | /* prog already linked */ |
| 440 | err = -EBUSY; |
| 441 | goto out; |
| 442 | } |
| 443 | hlist_add_head(&prog->aux->tramp_hlist, &tr->progs_hlist[kind]); |
| 444 | tr->progs_cnt[kind]++; |
Toke Høiland-Jørgensen | 3aac1ea | 2020-09-29 14:45:50 +0200 | [diff] [blame] | 445 | err = bpf_trampoline_update(tr); |
Alexei Starovoitov | fec56f5 | 2019-11-14 10:57:04 -0800 | [diff] [blame] | 446 | if (err) { |
Jiri Olsa | f3a9507 | 2021-04-14 21:51:41 +0200 | [diff] [blame] | 447 | hlist_del_init(&prog->aux->tramp_hlist); |
Alexei Starovoitov | fec56f5 | 2019-11-14 10:57:04 -0800 | [diff] [blame] | 448 | tr->progs_cnt[kind]--; |
| 449 | } |
| 450 | out: |
| 451 | mutex_unlock(&tr->mutex); |
| 452 | return err; |
| 453 | } |
| 454 | |
| 455 | /* bpf_trampoline_unlink_prog() should never fail. */ |
Toke Høiland-Jørgensen | 3aac1ea | 2020-09-29 14:45:50 +0200 | [diff] [blame] | 456 | int bpf_trampoline_unlink_prog(struct bpf_prog *prog, struct bpf_trampoline *tr) |
Alexei Starovoitov | fec56f5 | 2019-11-14 10:57:04 -0800 | [diff] [blame] | 457 | { |
| 458 | enum bpf_tramp_prog_type kind; |
Alexei Starovoitov | fec56f5 | 2019-11-14 10:57:04 -0800 | [diff] [blame] | 459 | int err; |
| 460 | |
KP Singh | 9e4e01d | 2020-03-29 01:43:52 +0100 | [diff] [blame] | 461 | kind = bpf_attach_type_to_tramp(prog); |
Alexei Starovoitov | fec56f5 | 2019-11-14 10:57:04 -0800 | [diff] [blame] | 462 | mutex_lock(&tr->mutex); |
Alexei Starovoitov | be8704f | 2020-01-20 16:53:46 -0800 | [diff] [blame] | 463 | if (kind == BPF_TRAMP_REPLACE) { |
| 464 | WARN_ON_ONCE(!tr->extension_prog); |
| 465 | err = bpf_arch_text_poke(tr->func.addr, BPF_MOD_JUMP, |
| 466 | tr->extension_prog->bpf_func, NULL); |
| 467 | tr->extension_prog = NULL; |
| 468 | goto out; |
| 469 | } |
Jiri Olsa | f3a9507 | 2021-04-14 21:51:41 +0200 | [diff] [blame] | 470 | hlist_del_init(&prog->aux->tramp_hlist); |
Alexei Starovoitov | fec56f5 | 2019-11-14 10:57:04 -0800 | [diff] [blame] | 471 | tr->progs_cnt[kind]--; |
Toke Høiland-Jørgensen | 3aac1ea | 2020-09-29 14:45:50 +0200 | [diff] [blame] | 472 | err = bpf_trampoline_update(tr); |
Alexei Starovoitov | be8704f | 2020-01-20 16:53:46 -0800 | [diff] [blame] | 473 | out: |
Alexei Starovoitov | fec56f5 | 2019-11-14 10:57:04 -0800 | [diff] [blame] | 474 | mutex_unlock(&tr->mutex); |
| 475 | return err; |
| 476 | } |
| 477 | |
Toke Høiland-Jørgensen | f7b12b6 | 2020-09-25 23:25:02 +0200 | [diff] [blame] | 478 | struct bpf_trampoline *bpf_trampoline_get(u64 key, |
| 479 | struct bpf_attach_target_info *tgt_info) |
| 480 | { |
| 481 | struct bpf_trampoline *tr; |
| 482 | |
| 483 | tr = bpf_trampoline_lookup(key); |
| 484 | if (!tr) |
| 485 | return NULL; |
| 486 | |
| 487 | mutex_lock(&tr->mutex); |
| 488 | if (tr->func.addr) |
| 489 | goto out; |
| 490 | |
| 491 | memcpy(&tr->func.model, &tgt_info->fmodel, sizeof(tgt_info->fmodel)); |
| 492 | tr->func.addr = (void *)tgt_info->tgt_addr; |
| 493 | out: |
| 494 | mutex_unlock(&tr->mutex); |
| 495 | return tr; |
| 496 | } |
| 497 | |
Alexei Starovoitov | fec56f5 | 2019-11-14 10:57:04 -0800 | [diff] [blame] | 498 | void bpf_trampoline_put(struct bpf_trampoline *tr) |
| 499 | { |
| 500 | if (!tr) |
| 501 | return; |
| 502 | mutex_lock(&trampoline_mutex); |
| 503 | if (!refcount_dec_and_test(&tr->refcnt)) |
| 504 | goto out; |
| 505 | WARN_ON_ONCE(mutex_is_locked(&tr->mutex)); |
| 506 | if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[BPF_TRAMP_FENTRY]))) |
| 507 | goto out; |
| 508 | if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[BPF_TRAMP_FEXIT]))) |
| 509 | goto out; |
Alexei Starovoitov | e21aa34 | 2021-03-16 14:00:07 -0700 | [diff] [blame] | 510 | /* This code will be executed even when the last bpf_tramp_image |
| 511 | * is alive. All progs are detached from the trampoline and the |
| 512 | * trampoline image is patched with jmp into epilogue to skip |
| 513 | * fexit progs. The fentry-only trampoline will be freed via |
| 514 | * multiple rcu callbacks. |
Alexei Starovoitov | 1e6c62a | 2020-08-27 15:01:11 -0700 | [diff] [blame] | 515 | */ |
Alexei Starovoitov | fec56f5 | 2019-11-14 10:57:04 -0800 | [diff] [blame] | 516 | hlist_del(&tr->hlist); |
| 517 | kfree(tr); |
| 518 | out: |
| 519 | mutex_unlock(&trampoline_mutex); |
| 520 | } |
| 521 | |
Alexei Starovoitov | ca06f55 | 2021-02-09 19:36:29 -0800 | [diff] [blame] | 522 | #define NO_START_TIME 1 |
Alexei Starovoitov | f2dd3b3 | 2021-02-09 19:36:28 -0800 | [diff] [blame] | 523 | static u64 notrace bpf_prog_start_time(void) |
| 524 | { |
| 525 | u64 start = NO_START_TIME; |
| 526 | |
Alexei Starovoitov | ca06f55 | 2021-02-09 19:36:29 -0800 | [diff] [blame] | 527 | if (static_branch_unlikely(&bpf_stats_enabled_key)) { |
Alexei Starovoitov | f2dd3b3 | 2021-02-09 19:36:28 -0800 | [diff] [blame] | 528 | start = sched_clock(); |
Alexei Starovoitov | ca06f55 | 2021-02-09 19:36:29 -0800 | [diff] [blame] | 529 | if (unlikely(!start)) |
| 530 | start = NO_START_TIME; |
| 531 | } |
Alexei Starovoitov | f2dd3b3 | 2021-02-09 19:36:28 -0800 | [diff] [blame] | 532 | return start; |
| 533 | } |
| 534 | |
Alexei Starovoitov | 9ed9e9b | 2021-02-09 19:36:31 -0800 | [diff] [blame] | 535 | static void notrace inc_misses_counter(struct bpf_prog *prog) |
| 536 | { |
| 537 | struct bpf_prog_stats *stats; |
| 538 | |
| 539 | stats = this_cpu_ptr(prog->stats); |
| 540 | u64_stats_update_begin(&stats->syncp); |
| 541 | stats->misses++; |
| 542 | u64_stats_update_end(&stats->syncp); |
| 543 | } |
| 544 | |
David Miller | 02ad059 | 2020-02-24 15:01:45 +0100 | [diff] [blame] | 545 | /* The logic is similar to BPF_PROG_RUN, but with an explicit |
| 546 | * rcu_read_lock() and migrate_disable() which are required |
| 547 | * for the trampoline. The macro is split into |
Alexei Starovoitov | f2dd3b3 | 2021-02-09 19:36:28 -0800 | [diff] [blame] | 548 | * call __bpf_prog_enter |
Alexei Starovoitov | fec56f5 | 2019-11-14 10:57:04 -0800 | [diff] [blame] | 549 | * call prog->bpf_func |
| 550 | * call __bpf_prog_exit |
Alexei Starovoitov | ca06f55 | 2021-02-09 19:36:29 -0800 | [diff] [blame] | 551 | * |
| 552 | * __bpf_prog_enter returns: |
| 553 | * 0 - skip execution of the bpf prog |
| 554 | * 1 - execute bpf prog |
Zhen Lei | 8fb33b6 | 2021-05-25 10:56:59 +0800 | [diff] [blame] | 555 | * [2..MAX_U64] - execute bpf prog and record execution time. |
Alexei Starovoitov | ca06f55 | 2021-02-09 19:36:29 -0800 | [diff] [blame] | 556 | * This is start time. |
Alexei Starovoitov | fec56f5 | 2019-11-14 10:57:04 -0800 | [diff] [blame] | 557 | */ |
Alexei Starovoitov | ca06f55 | 2021-02-09 19:36:29 -0800 | [diff] [blame] | 558 | u64 notrace __bpf_prog_enter(struct bpf_prog *prog) |
Jules Irenge | dcce11d | 2020-03-11 01:09:01 +0000 | [diff] [blame] | 559 | __acquires(RCU) |
Alexei Starovoitov | fec56f5 | 2019-11-14 10:57:04 -0800 | [diff] [blame] | 560 | { |
Alexei Starovoitov | fec56f5 | 2019-11-14 10:57:04 -0800 | [diff] [blame] | 561 | rcu_read_lock(); |
David Miller | 02ad059 | 2020-02-24 15:01:45 +0100 | [diff] [blame] | 562 | migrate_disable(); |
Alexei Starovoitov | 9ed9e9b | 2021-02-09 19:36:31 -0800 | [diff] [blame] | 563 | if (unlikely(__this_cpu_inc_return(*(prog->active)) != 1)) { |
| 564 | inc_misses_counter(prog); |
Alexei Starovoitov | ca06f55 | 2021-02-09 19:36:29 -0800 | [diff] [blame] | 565 | return 0; |
Alexei Starovoitov | 9ed9e9b | 2021-02-09 19:36:31 -0800 | [diff] [blame] | 566 | } |
Alexei Starovoitov | f2dd3b3 | 2021-02-09 19:36:28 -0800 | [diff] [blame] | 567 | return bpf_prog_start_time(); |
Alexei Starovoitov | fec56f5 | 2019-11-14 10:57:04 -0800 | [diff] [blame] | 568 | } |
| 569 | |
Alexei Starovoitov | f2dd3b3 | 2021-02-09 19:36:28 -0800 | [diff] [blame] | 570 | static void notrace update_prog_stats(struct bpf_prog *prog, |
| 571 | u64 start) |
Alexei Starovoitov | fec56f5 | 2019-11-14 10:57:04 -0800 | [diff] [blame] | 572 | { |
| 573 | struct bpf_prog_stats *stats; |
| 574 | |
| 575 | if (static_branch_unlikely(&bpf_stats_enabled_key) && |
Alexei Starovoitov | f2dd3b3 | 2021-02-09 19:36:28 -0800 | [diff] [blame] | 576 | /* static_key could be enabled in __bpf_prog_enter* |
| 577 | * and disabled in __bpf_prog_exit*. |
Alexei Starovoitov | fec56f5 | 2019-11-14 10:57:04 -0800 | [diff] [blame] | 578 | * And vice versa. |
Alexei Starovoitov | f2dd3b3 | 2021-02-09 19:36:28 -0800 | [diff] [blame] | 579 | * Hence check that 'start' is valid. |
Alexei Starovoitov | fec56f5 | 2019-11-14 10:57:04 -0800 | [diff] [blame] | 580 | */ |
Alexei Starovoitov | f2dd3b3 | 2021-02-09 19:36:28 -0800 | [diff] [blame] | 581 | start > NO_START_TIME) { |
Alexei Starovoitov | 700d479 | 2021-02-09 19:36:26 -0800 | [diff] [blame] | 582 | stats = this_cpu_ptr(prog->stats); |
Alexei Starovoitov | fec56f5 | 2019-11-14 10:57:04 -0800 | [diff] [blame] | 583 | u64_stats_update_begin(&stats->syncp); |
| 584 | stats->cnt++; |
| 585 | stats->nsecs += sched_clock() - start; |
| 586 | u64_stats_update_end(&stats->syncp); |
| 587 | } |
Alexei Starovoitov | f2dd3b3 | 2021-02-09 19:36:28 -0800 | [diff] [blame] | 588 | } |
| 589 | |
| 590 | void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start) |
| 591 | __releases(RCU) |
| 592 | { |
| 593 | update_prog_stats(prog, start); |
Alexei Starovoitov | ca06f55 | 2021-02-09 19:36:29 -0800 | [diff] [blame] | 594 | __this_cpu_dec(*(prog->active)); |
David Miller | 02ad059 | 2020-02-24 15:01:45 +0100 | [diff] [blame] | 595 | migrate_enable(); |
Alexei Starovoitov | fec56f5 | 2019-11-14 10:57:04 -0800 | [diff] [blame] | 596 | rcu_read_unlock(); |
| 597 | } |
| 598 | |
Alexei Starovoitov | ca06f55 | 2021-02-09 19:36:29 -0800 | [diff] [blame] | 599 | u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog) |
Alexei Starovoitov | 1e6c62a | 2020-08-27 15:01:11 -0700 | [diff] [blame] | 600 | { |
| 601 | rcu_read_lock_trace(); |
Alexei Starovoitov | 031d6e0 | 2021-02-09 19:36:27 -0800 | [diff] [blame] | 602 | migrate_disable(); |
Alexei Starovoitov | f56407f | 2020-08-31 13:16:51 -0700 | [diff] [blame] | 603 | might_fault(); |
Alexei Starovoitov | 9ed9e9b | 2021-02-09 19:36:31 -0800 | [diff] [blame] | 604 | if (unlikely(__this_cpu_inc_return(*(prog->active)) != 1)) { |
| 605 | inc_misses_counter(prog); |
Alexei Starovoitov | ca06f55 | 2021-02-09 19:36:29 -0800 | [diff] [blame] | 606 | return 0; |
Alexei Starovoitov | 9ed9e9b | 2021-02-09 19:36:31 -0800 | [diff] [blame] | 607 | } |
Alexei Starovoitov | f2dd3b3 | 2021-02-09 19:36:28 -0800 | [diff] [blame] | 608 | return bpf_prog_start_time(); |
Alexei Starovoitov | 1e6c62a | 2020-08-27 15:01:11 -0700 | [diff] [blame] | 609 | } |
| 610 | |
Alexei Starovoitov | f2dd3b3 | 2021-02-09 19:36:28 -0800 | [diff] [blame] | 611 | void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start) |
Alexei Starovoitov | 1e6c62a | 2020-08-27 15:01:11 -0700 | [diff] [blame] | 612 | { |
Alexei Starovoitov | f2dd3b3 | 2021-02-09 19:36:28 -0800 | [diff] [blame] | 613 | update_prog_stats(prog, start); |
Alexei Starovoitov | ca06f55 | 2021-02-09 19:36:29 -0800 | [diff] [blame] | 614 | __this_cpu_dec(*(prog->active)); |
Alexei Starovoitov | 031d6e0 | 2021-02-09 19:36:27 -0800 | [diff] [blame] | 615 | migrate_enable(); |
Alexei Starovoitov | 1e6c62a | 2020-08-27 15:01:11 -0700 | [diff] [blame] | 616 | rcu_read_unlock_trace(); |
| 617 | } |
| 618 | |
Alexei Starovoitov | e21aa34 | 2021-03-16 14:00:07 -0700 | [diff] [blame] | 619 | void notrace __bpf_tramp_enter(struct bpf_tramp_image *tr) |
| 620 | { |
| 621 | percpu_ref_get(&tr->pcref); |
| 622 | } |
| 623 | |
| 624 | void notrace __bpf_tramp_exit(struct bpf_tramp_image *tr) |
| 625 | { |
| 626 | percpu_ref_put(&tr->pcref); |
| 627 | } |
| 628 | |
Alexei Starovoitov | fec56f5 | 2019-11-14 10:57:04 -0800 | [diff] [blame] | 629 | int __weak |
Alexei Starovoitov | e21aa34 | 2021-03-16 14:00:07 -0700 | [diff] [blame] | 630 | arch_prepare_bpf_trampoline(struct bpf_tramp_image *tr, void *image, void *image_end, |
Martin KaFai Lau | 85d33df | 2020-01-08 16:35:05 -0800 | [diff] [blame] | 631 | const struct btf_func_model *m, u32 flags, |
KP Singh | 88fd9e5 | 2020-03-04 20:18:47 +0100 | [diff] [blame] | 632 | struct bpf_tramp_progs *tprogs, |
Alexei Starovoitov | fec56f5 | 2019-11-14 10:57:04 -0800 | [diff] [blame] | 633 | void *orig_call) |
| 634 | { |
| 635 | return -ENOTSUPP; |
| 636 | } |
| 637 | |
| 638 | static int __init init_trampolines(void) |
| 639 | { |
| 640 | int i; |
| 641 | |
| 642 | for (i = 0; i < TRAMPOLINE_TABLE_SIZE; i++) |
| 643 | INIT_HLIST_HEAD(&trampoline_table[i]); |
| 644 | return 0; |
| 645 | } |
| 646 | late_initcall(init_trampolines); |