blob: 81fdf2fc94ac9a954702f5d312a415a33972a012 [file] [log] [blame]
Alexei Starovoitov25415172015-03-25 12:49:20 -07001/* Copyright (c) 2011-2015 PLUMgrid, http://plumgrid.com
Alexei Starovoitov0515e592016-09-01 18:37:22 -07002 * Copyright (c) 2016 Facebook
Alexei Starovoitov25415172015-03-25 12:49:20 -07003 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
7 */
8#include <linux/kernel.h>
9#include <linux/types.h>
10#include <linux/slab.h>
11#include <linux/bpf.h>
Alexei Starovoitov0515e592016-09-01 18:37:22 -070012#include <linux/bpf_perf_event.h>
Alexei Starovoitov25415172015-03-25 12:49:20 -070013#include <linux/filter.h>
14#include <linux/uaccess.h>
Alexei Starovoitov9c959c82015-03-25 12:49:22 -070015#include <linux/ctype.h>
Josef Bacik9802d862017-12-11 11:36:48 -050016#include <linux/kprobes.h>
Yonghong Song41bdc4b2018-05-24 11:21:09 -070017#include <linux/syscalls.h>
Masami Hiramatsu540adea2018-01-13 02:55:03 +090018#include <linux/error-injection.h>
Josef Bacik9802d862017-12-11 11:36:48 -050019
20#include "trace_probe.h"
Alexei Starovoitov25415172015-03-25 12:49:20 -070021#include "trace.h"
22
Gianluca Borello035226b2017-10-26 01:47:42 +000023u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
Yonghong Songc195651e2018-04-28 22:28:08 -070024u64 bpf_get_stack(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
Gianluca Borello035226b2017-10-26 01:47:42 +000025
Alexei Starovoitov25415172015-03-25 12:49:20 -070026/**
27 * trace_call_bpf - invoke BPF program
Yonghong Songe87c6bc2017-10-23 23:53:08 -070028 * @call: tracepoint event
Alexei Starovoitov25415172015-03-25 12:49:20 -070029 * @ctx: opaque context pointer
30 *
31 * kprobe handlers execute BPF programs via this helper.
32 * Can be used from static tracepoints in the future.
33 *
34 * Return: BPF programs always return an integer which is interpreted by
35 * kprobe handler as:
36 * 0 - return from kprobe (event is filtered out)
37 * 1 - store kprobe event into ring buffer
38 * Other values are reserved and currently alias to 1
39 */
Yonghong Songe87c6bc2017-10-23 23:53:08 -070040unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx)
Alexei Starovoitov25415172015-03-25 12:49:20 -070041{
42 unsigned int ret;
43
44 if (in_nmi()) /* not supported yet */
45 return 1;
46
47 preempt_disable();
48
49 if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) {
50 /*
51 * since some bpf program is already running on this cpu,
52 * don't call into another bpf program (same or different)
53 * and don't send kprobe event into ring-buffer,
54 * so return zero here
55 */
56 ret = 0;
57 goto out;
58 }
59
Yonghong Songe87c6bc2017-10-23 23:53:08 -070060 /*
61 * Instead of moving rcu_read_lock/rcu_dereference/rcu_read_unlock
62 * to all call sites, we did a bpf_prog_array_valid() there to check
63 * whether call->prog_array is empty or not, which is
64 * a heurisitc to speed up execution.
65 *
66 * If bpf_prog_array_valid() fetched prog_array was
67 * non-NULL, we go into trace_call_bpf() and do the actual
68 * proper rcu_dereference() under RCU lock.
69 * If it turns out that prog_array is NULL then, we bail out.
70 * For the opposite, if the bpf_prog_array_valid() fetched pointer
71 * was NULL, you'll skip the prog_array with the risk of missing
72 * out of events when it was updated in between this and the
73 * rcu_dereference() which is accepted risk.
74 */
75 ret = BPF_PROG_RUN_ARRAY_CHECK(call->prog_array, ctx, BPF_PROG_RUN);
Alexei Starovoitov25415172015-03-25 12:49:20 -070076
77 out:
78 __this_cpu_dec(bpf_prog_active);
79 preempt_enable();
80
81 return ret;
82}
83EXPORT_SYMBOL_GPL(trace_call_bpf);
84
Josef Bacik9802d862017-12-11 11:36:48 -050085#ifdef CONFIG_BPF_KPROBE_OVERRIDE
86BPF_CALL_2(bpf_override_return, struct pt_regs *, regs, unsigned long, rc)
87{
Josef Bacik9802d862017-12-11 11:36:48 -050088 regs_set_return_value(regs, rc);
Masami Hiramatsu540adea2018-01-13 02:55:03 +090089 override_function_with_return(regs);
Josef Bacik9802d862017-12-11 11:36:48 -050090 return 0;
91}
92
93static const struct bpf_func_proto bpf_override_return_proto = {
94 .func = bpf_override_return,
95 .gpl_only = true,
96 .ret_type = RET_INTEGER,
97 .arg1_type = ARG_PTR_TO_CTX,
98 .arg2_type = ARG_ANYTHING,
99};
100#endif
101
Daniel Borkmannf3694e02016-09-09 02:45:31 +0200102BPF_CALL_3(bpf_probe_read, void *, dst, u32, size, const void *, unsafe_ptr)
Alexei Starovoitov25415172015-03-25 12:49:20 -0700103{
Gianluca Borelloeb33f2c2017-11-22 18:32:54 +0000104 int ret;
Alexei Starovoitov25415172015-03-25 12:49:20 -0700105
Daniel Borkmann074f528e2016-04-13 00:10:52 +0200106 ret = probe_kernel_read(dst, unsafe_ptr, size);
107 if (unlikely(ret < 0))
108 memset(dst, 0, size);
109
110 return ret;
Alexei Starovoitov25415172015-03-25 12:49:20 -0700111}
112
113static const struct bpf_func_proto bpf_probe_read_proto = {
114 .func = bpf_probe_read,
115 .gpl_only = true,
116 .ret_type = RET_INTEGER,
Alexei Starovoitov39f19ebb2017-01-09 10:19:50 -0800117 .arg1_type = ARG_PTR_TO_UNINIT_MEM,
Yonghong Song9c019e22017-11-12 14:49:10 -0800118 .arg2_type = ARG_CONST_SIZE_OR_ZERO,
Alexei Starovoitov25415172015-03-25 12:49:20 -0700119 .arg3_type = ARG_ANYTHING,
120};
121
Daniel Borkmannf3694e02016-09-09 02:45:31 +0200122BPF_CALL_3(bpf_probe_write_user, void *, unsafe_ptr, const void *, src,
123 u32, size)
Sargun Dhillon96ae5222016-07-25 05:54:46 -0700124{
Sargun Dhillon96ae5222016-07-25 05:54:46 -0700125 /*
126 * Ensure we're in user context which is safe for the helper to
127 * run. This helper has no business in a kthread.
128 *
129 * access_ok() should prevent writing to non-user memory, but in
130 * some situations (nommu, temporary switch, etc) access_ok() does
131 * not provide enough validation, hence the check on KERNEL_DS.
132 */
133
134 if (unlikely(in_interrupt() ||
135 current->flags & (PF_KTHREAD | PF_EXITING)))
136 return -EPERM;
Al Virodb68ce12017-03-20 21:08:07 -0400137 if (unlikely(uaccess_kernel()))
Sargun Dhillon96ae5222016-07-25 05:54:46 -0700138 return -EPERM;
139 if (!access_ok(VERIFY_WRITE, unsafe_ptr, size))
140 return -EPERM;
141
142 return probe_kernel_write(unsafe_ptr, src, size);
143}
144
145static const struct bpf_func_proto bpf_probe_write_user_proto = {
146 .func = bpf_probe_write_user,
147 .gpl_only = true,
148 .ret_type = RET_INTEGER,
149 .arg1_type = ARG_ANYTHING,
Alexei Starovoitov39f19ebb2017-01-09 10:19:50 -0800150 .arg2_type = ARG_PTR_TO_MEM,
151 .arg3_type = ARG_CONST_SIZE,
Sargun Dhillon96ae5222016-07-25 05:54:46 -0700152};
153
154static const struct bpf_func_proto *bpf_get_probe_write_proto(void)
155{
156 pr_warn_ratelimited("%s[%d] is installing a program with bpf_probe_write_user helper that may corrupt user memory!",
157 current->comm, task_pid_nr(current));
158
159 return &bpf_probe_write_user_proto;
160}
161
Alexei Starovoitov9c959c82015-03-25 12:49:22 -0700162/*
John Fastabend7bda4b42017-07-02 02:13:29 +0200163 * Only limited trace_printk() conversion specifiers allowed:
164 * %d %i %u %x %ld %li %lu %lx %lld %lli %llu %llx %p %s
Alexei Starovoitov9c959c82015-03-25 12:49:22 -0700165 */
Daniel Borkmannf3694e02016-09-09 02:45:31 +0200166BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1,
167 u64, arg2, u64, arg3)
Alexei Starovoitov9c959c82015-03-25 12:49:22 -0700168{
Alexei Starovoitov8d3b7dc2015-08-28 15:56:23 -0700169 bool str_seen = false;
Alexei Starovoitov9c959c82015-03-25 12:49:22 -0700170 int mod[3] = {};
171 int fmt_cnt = 0;
Alexei Starovoitov8d3b7dc2015-08-28 15:56:23 -0700172 u64 unsafe_addr;
173 char buf[64];
Alexei Starovoitov9c959c82015-03-25 12:49:22 -0700174 int i;
175
176 /*
177 * bpf_check()->check_func_arg()->check_stack_boundary()
178 * guarantees that fmt points to bpf program stack,
179 * fmt_size bytes of it were initialized and fmt_size > 0
180 */
181 if (fmt[--fmt_size] != 0)
182 return -EINVAL;
183
184 /* check format string for allowed specifiers */
185 for (i = 0; i < fmt_size; i++) {
186 if ((!isprint(fmt[i]) && !isspace(fmt[i])) || !isascii(fmt[i]))
187 return -EINVAL;
188
189 if (fmt[i] != '%')
190 continue;
191
192 if (fmt_cnt >= 3)
193 return -EINVAL;
194
195 /* fmt[i] != 0 && fmt[last] == 0, so we can access fmt[i + 1] */
196 i++;
197 if (fmt[i] == 'l') {
198 mod[fmt_cnt]++;
199 i++;
Alexei Starovoitov8d3b7dc2015-08-28 15:56:23 -0700200 } else if (fmt[i] == 'p' || fmt[i] == 's') {
Alexei Starovoitov9c959c82015-03-25 12:49:22 -0700201 mod[fmt_cnt]++;
202 i++;
203 if (!isspace(fmt[i]) && !ispunct(fmt[i]) && fmt[i] != 0)
204 return -EINVAL;
205 fmt_cnt++;
Alexei Starovoitov8d3b7dc2015-08-28 15:56:23 -0700206 if (fmt[i - 1] == 's') {
207 if (str_seen)
208 /* allow only one '%s' per fmt string */
209 return -EINVAL;
210 str_seen = true;
211
212 switch (fmt_cnt) {
213 case 1:
Daniel Borkmannf3694e02016-09-09 02:45:31 +0200214 unsafe_addr = arg1;
215 arg1 = (long) buf;
Alexei Starovoitov8d3b7dc2015-08-28 15:56:23 -0700216 break;
217 case 2:
Daniel Borkmannf3694e02016-09-09 02:45:31 +0200218 unsafe_addr = arg2;
219 arg2 = (long) buf;
Alexei Starovoitov8d3b7dc2015-08-28 15:56:23 -0700220 break;
221 case 3:
Daniel Borkmannf3694e02016-09-09 02:45:31 +0200222 unsafe_addr = arg3;
223 arg3 = (long) buf;
Alexei Starovoitov8d3b7dc2015-08-28 15:56:23 -0700224 break;
225 }
226 buf[0] = 0;
227 strncpy_from_unsafe(buf,
228 (void *) (long) unsafe_addr,
229 sizeof(buf));
230 }
Alexei Starovoitov9c959c82015-03-25 12:49:22 -0700231 continue;
232 }
233
234 if (fmt[i] == 'l') {
235 mod[fmt_cnt]++;
236 i++;
237 }
238
John Fastabend7bda4b42017-07-02 02:13:29 +0200239 if (fmt[i] != 'i' && fmt[i] != 'd' &&
240 fmt[i] != 'u' && fmt[i] != 'x')
Alexei Starovoitov9c959c82015-03-25 12:49:22 -0700241 return -EINVAL;
242 fmt_cnt++;
243 }
244
Daniel Borkmann88a5c692017-08-16 01:45:33 +0200245/* Horrid workaround for getting va_list handling working with different
246 * argument type combinations generically for 32 and 64 bit archs.
247 */
248#define __BPF_TP_EMIT() __BPF_ARG3_TP()
249#define __BPF_TP(...) \
Yonghong Songeefa864a2018-01-17 09:19:32 -0800250 __trace_printk(0 /* Fake ip */, \
Daniel Borkmann88a5c692017-08-16 01:45:33 +0200251 fmt, ##__VA_ARGS__)
252
253#define __BPF_ARG1_TP(...) \
254 ((mod[0] == 2 || (mod[0] == 1 && __BITS_PER_LONG == 64)) \
255 ? __BPF_TP(arg1, ##__VA_ARGS__) \
256 : ((mod[0] == 1 || (mod[0] == 0 && __BITS_PER_LONG == 32)) \
257 ? __BPF_TP((long)arg1, ##__VA_ARGS__) \
258 : __BPF_TP((u32)arg1, ##__VA_ARGS__)))
259
260#define __BPF_ARG2_TP(...) \
261 ((mod[1] == 2 || (mod[1] == 1 && __BITS_PER_LONG == 64)) \
262 ? __BPF_ARG1_TP(arg2, ##__VA_ARGS__) \
263 : ((mod[1] == 1 || (mod[1] == 0 && __BITS_PER_LONG == 32)) \
264 ? __BPF_ARG1_TP((long)arg2, ##__VA_ARGS__) \
265 : __BPF_ARG1_TP((u32)arg2, ##__VA_ARGS__)))
266
267#define __BPF_ARG3_TP(...) \
268 ((mod[2] == 2 || (mod[2] == 1 && __BITS_PER_LONG == 64)) \
269 ? __BPF_ARG2_TP(arg3, ##__VA_ARGS__) \
270 : ((mod[2] == 1 || (mod[2] == 0 && __BITS_PER_LONG == 32)) \
271 ? __BPF_ARG2_TP((long)arg3, ##__VA_ARGS__) \
272 : __BPF_ARG2_TP((u32)arg3, ##__VA_ARGS__)))
273
274 return __BPF_TP_EMIT();
Alexei Starovoitov9c959c82015-03-25 12:49:22 -0700275}
276
277static const struct bpf_func_proto bpf_trace_printk_proto = {
278 .func = bpf_trace_printk,
279 .gpl_only = true,
280 .ret_type = RET_INTEGER,
Alexei Starovoitov39f19ebb2017-01-09 10:19:50 -0800281 .arg1_type = ARG_PTR_TO_MEM,
282 .arg2_type = ARG_CONST_SIZE,
Alexei Starovoitov9c959c82015-03-25 12:49:22 -0700283};
284
Alexei Starovoitov0756ea32015-06-12 19:39:13 -0700285const struct bpf_func_proto *bpf_get_trace_printk_proto(void)
286{
287 /*
288 * this program might be calling bpf_trace_printk,
289 * so allocate per-cpu printk buffers
290 */
291 trace_printk_init_buffers();
292
293 return &bpf_trace_printk_proto;
294}
295
Yonghong Song908432c2017-10-05 09:19:20 -0700296static __always_inline int
297get_map_perf_counter(struct bpf_map *map, u64 flags,
298 u64 *value, u64 *enabled, u64 *running)
Kaixu Xia35578d72015-08-06 07:02:35 +0000299{
Kaixu Xia35578d72015-08-06 07:02:35 +0000300 struct bpf_array *array = container_of(map, struct bpf_array, map);
Daniel Borkmann6816a7f2016-06-28 12:18:25 +0200301 unsigned int cpu = smp_processor_id();
302 u64 index = flags & BPF_F_INDEX_MASK;
Daniel Borkmann3b1efb12016-06-15 22:47:14 +0200303 struct bpf_event_entry *ee;
Kaixu Xia35578d72015-08-06 07:02:35 +0000304
Daniel Borkmann6816a7f2016-06-28 12:18:25 +0200305 if (unlikely(flags & ~(BPF_F_INDEX_MASK)))
306 return -EINVAL;
307 if (index == BPF_F_CURRENT_CPU)
308 index = cpu;
Kaixu Xia35578d72015-08-06 07:02:35 +0000309 if (unlikely(index >= array->map.max_entries))
310 return -E2BIG;
311
Daniel Borkmann3b1efb12016-06-15 22:47:14 +0200312 ee = READ_ONCE(array->ptrs[index]);
Daniel Borkmann1ca1cc92016-06-28 12:18:23 +0200313 if (!ee)
Kaixu Xia35578d72015-08-06 07:02:35 +0000314 return -ENOENT;
315
Yonghong Song908432c2017-10-05 09:19:20 -0700316 return perf_event_read_local(ee->event, value, enabled, running);
317}
318
319BPF_CALL_2(bpf_perf_event_read, struct bpf_map *, map, u64, flags)
320{
321 u64 value = 0;
322 int err;
323
324 err = get_map_perf_counter(map, flags, &value, NULL, NULL);
Kaixu Xia35578d72015-08-06 07:02:35 +0000325 /*
Alexei Starovoitovf91840a2017-06-02 21:03:52 -0700326 * this api is ugly since we miss [-22..-2] range of valid
327 * counter values, but that's uapi
Kaixu Xia35578d72015-08-06 07:02:35 +0000328 */
Alexei Starovoitovf91840a2017-06-02 21:03:52 -0700329 if (err)
330 return err;
331 return value;
Kaixu Xia35578d72015-08-06 07:02:35 +0000332}
333
Alexei Starovoitov62544ce2015-10-22 17:10:14 -0700334static const struct bpf_func_proto bpf_perf_event_read_proto = {
Kaixu Xia35578d72015-08-06 07:02:35 +0000335 .func = bpf_perf_event_read,
Alexei Starovoitov1075ef52015-10-23 14:58:19 -0700336 .gpl_only = true,
Kaixu Xia35578d72015-08-06 07:02:35 +0000337 .ret_type = RET_INTEGER,
338 .arg1_type = ARG_CONST_MAP_PTR,
339 .arg2_type = ARG_ANYTHING,
340};
341
Yonghong Song908432c2017-10-05 09:19:20 -0700342BPF_CALL_4(bpf_perf_event_read_value, struct bpf_map *, map, u64, flags,
343 struct bpf_perf_event_value *, buf, u32, size)
344{
345 int err = -EINVAL;
346
347 if (unlikely(size != sizeof(struct bpf_perf_event_value)))
348 goto clear;
349 err = get_map_perf_counter(map, flags, &buf->counter, &buf->enabled,
350 &buf->running);
351 if (unlikely(err))
352 goto clear;
353 return 0;
354clear:
355 memset(buf, 0, size);
356 return err;
357}
358
359static const struct bpf_func_proto bpf_perf_event_read_value_proto = {
360 .func = bpf_perf_event_read_value,
361 .gpl_only = true,
362 .ret_type = RET_INTEGER,
363 .arg1_type = ARG_CONST_MAP_PTR,
364 .arg2_type = ARG_ANYTHING,
365 .arg3_type = ARG_PTR_TO_UNINIT_MEM,
366 .arg4_type = ARG_CONST_SIZE,
367};
368
Daniel Borkmann283ca522017-12-12 02:25:30 +0100369static DEFINE_PER_CPU(struct perf_sample_data, bpf_trace_sd);
Daniel Borkmann20b9d7a2017-06-11 00:50:40 +0200370
Daniel Borkmann8e7a3922016-07-14 18:08:04 +0200371static __always_inline u64
372__bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map,
Daniel Borkmann283ca522017-12-12 02:25:30 +0100373 u64 flags, struct perf_sample_data *sd)
Alexei Starovoitova43eec32015-10-20 20:02:34 -0700374{
Alexei Starovoitova43eec32015-10-20 20:02:34 -0700375 struct bpf_array *array = container_of(map, struct bpf_array, map);
Daniel Borkmannd7931332016-06-28 12:18:24 +0200376 unsigned int cpu = smp_processor_id();
Daniel Borkmann1e337592016-04-18 21:01:23 +0200377 u64 index = flags & BPF_F_INDEX_MASK;
Daniel Borkmann3b1efb12016-06-15 22:47:14 +0200378 struct bpf_event_entry *ee;
Alexei Starovoitova43eec32015-10-20 20:02:34 -0700379 struct perf_event *event;
Alexei Starovoitova43eec32015-10-20 20:02:34 -0700380
Daniel Borkmann1e337592016-04-18 21:01:23 +0200381 if (index == BPF_F_CURRENT_CPU)
Daniel Borkmannd7931332016-06-28 12:18:24 +0200382 index = cpu;
Alexei Starovoitova43eec32015-10-20 20:02:34 -0700383 if (unlikely(index >= array->map.max_entries))
384 return -E2BIG;
385
Daniel Borkmann3b1efb12016-06-15 22:47:14 +0200386 ee = READ_ONCE(array->ptrs[index]);
Daniel Borkmann1ca1cc92016-06-28 12:18:23 +0200387 if (!ee)
Alexei Starovoitova43eec32015-10-20 20:02:34 -0700388 return -ENOENT;
389
Daniel Borkmann3b1efb12016-06-15 22:47:14 +0200390 event = ee->event;
Alexei Starovoitova43eec32015-10-20 20:02:34 -0700391 if (unlikely(event->attr.type != PERF_TYPE_SOFTWARE ||
392 event->attr.config != PERF_COUNT_SW_BPF_OUTPUT))
393 return -EINVAL;
394
Daniel Borkmannd7931332016-06-28 12:18:24 +0200395 if (unlikely(event->oncpu != cpu))
Alexei Starovoitova43eec32015-10-20 20:02:34 -0700396 return -EOPNOTSUPP;
397
Daniel Borkmann20b9d7a2017-06-11 00:50:40 +0200398 perf_event_output(event, sd, regs);
Alexei Starovoitova43eec32015-10-20 20:02:34 -0700399 return 0;
400}
401
Daniel Borkmannf3694e02016-09-09 02:45:31 +0200402BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map,
403 u64, flags, void *, data, u64, size)
Daniel Borkmann8e7a3922016-07-14 18:08:04 +0200404{
Daniel Borkmann283ca522017-12-12 02:25:30 +0100405 struct perf_sample_data *sd = this_cpu_ptr(&bpf_trace_sd);
Daniel Borkmann8e7a3922016-07-14 18:08:04 +0200406 struct perf_raw_record raw = {
407 .frag = {
408 .size = size,
409 .data = data,
410 },
411 };
412
413 if (unlikely(flags & ~(BPF_F_INDEX_MASK)))
414 return -EINVAL;
415
Daniel Borkmann283ca522017-12-12 02:25:30 +0100416 perf_sample_data_init(sd, 0, 0);
417 sd->raw = &raw;
418
419 return __bpf_perf_event_output(regs, map, flags, sd);
Daniel Borkmann8e7a3922016-07-14 18:08:04 +0200420}
421
Alexei Starovoitova43eec32015-10-20 20:02:34 -0700422static const struct bpf_func_proto bpf_perf_event_output_proto = {
423 .func = bpf_perf_event_output,
Alexei Starovoitov1075ef52015-10-23 14:58:19 -0700424 .gpl_only = true,
Alexei Starovoitova43eec32015-10-20 20:02:34 -0700425 .ret_type = RET_INTEGER,
426 .arg1_type = ARG_PTR_TO_CTX,
427 .arg2_type = ARG_CONST_MAP_PTR,
428 .arg3_type = ARG_ANYTHING,
Alexei Starovoitov39f19ebb2017-01-09 10:19:50 -0800429 .arg4_type = ARG_PTR_TO_MEM,
Gianluca Borelloa60dd352017-11-22 18:32:56 +0000430 .arg5_type = ARG_CONST_SIZE_OR_ZERO,
Alexei Starovoitova43eec32015-10-20 20:02:34 -0700431};
432
Daniel Borkmannbd570ff2016-04-18 21:01:24 +0200433static DEFINE_PER_CPU(struct pt_regs, bpf_pt_regs);
Daniel Borkmann283ca522017-12-12 02:25:30 +0100434static DEFINE_PER_CPU(struct perf_sample_data, bpf_misc_sd);
Daniel Borkmannbd570ff2016-04-18 21:01:24 +0200435
Daniel Borkmann555c8a82016-07-14 18:08:05 +0200436u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
437 void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy)
Daniel Borkmannbd570ff2016-04-18 21:01:24 +0200438{
Daniel Borkmann283ca522017-12-12 02:25:30 +0100439 struct perf_sample_data *sd = this_cpu_ptr(&bpf_misc_sd);
Daniel Borkmannbd570ff2016-04-18 21:01:24 +0200440 struct pt_regs *regs = this_cpu_ptr(&bpf_pt_regs);
Daniel Borkmann555c8a82016-07-14 18:08:05 +0200441 struct perf_raw_frag frag = {
442 .copy = ctx_copy,
443 .size = ctx_size,
444 .data = ctx,
445 };
446 struct perf_raw_record raw = {
447 .frag = {
Andrew Morton183fc152016-07-18 15:50:58 -0700448 {
449 .next = ctx_size ? &frag : NULL,
450 },
Daniel Borkmann555c8a82016-07-14 18:08:05 +0200451 .size = meta_size,
452 .data = meta,
453 },
454 };
Daniel Borkmannbd570ff2016-04-18 21:01:24 +0200455
456 perf_fetch_caller_regs(regs);
Daniel Borkmann283ca522017-12-12 02:25:30 +0100457 perf_sample_data_init(sd, 0, 0);
458 sd->raw = &raw;
Daniel Borkmannbd570ff2016-04-18 21:01:24 +0200459
Daniel Borkmann283ca522017-12-12 02:25:30 +0100460 return __bpf_perf_event_output(regs, map, flags, sd);
Daniel Borkmannbd570ff2016-04-18 21:01:24 +0200461}
462
Daniel Borkmannf3694e02016-09-09 02:45:31 +0200463BPF_CALL_0(bpf_get_current_task)
Alexei Starovoitov606274c2016-07-06 22:38:36 -0700464{
465 return (long) current;
466}
467
468static const struct bpf_func_proto bpf_get_current_task_proto = {
469 .func = bpf_get_current_task,
470 .gpl_only = true,
471 .ret_type = RET_INTEGER,
472};
473
Daniel Borkmannf3694e02016-09-09 02:45:31 +0200474BPF_CALL_2(bpf_current_task_under_cgroup, struct bpf_map *, map, u32, idx)
Sargun Dhillon60d20f92016-08-12 08:56:52 -0700475{
Sargun Dhillon60d20f92016-08-12 08:56:52 -0700476 struct bpf_array *array = container_of(map, struct bpf_array, map);
477 struct cgroup *cgrp;
Sargun Dhillon60d20f92016-08-12 08:56:52 -0700478
Sargun Dhillon60d20f92016-08-12 08:56:52 -0700479 if (unlikely(idx >= array->map.max_entries))
480 return -E2BIG;
481
482 cgrp = READ_ONCE(array->ptrs[idx]);
483 if (unlikely(!cgrp))
484 return -EAGAIN;
485
486 return task_under_cgroup_hierarchy(current, cgrp);
487}
488
489static const struct bpf_func_proto bpf_current_task_under_cgroup_proto = {
490 .func = bpf_current_task_under_cgroup,
491 .gpl_only = false,
492 .ret_type = RET_INTEGER,
493 .arg1_type = ARG_CONST_MAP_PTR,
494 .arg2_type = ARG_ANYTHING,
495};
496
Gianluca Borelloa5e8c072017-01-18 17:55:49 +0000497BPF_CALL_3(bpf_probe_read_str, void *, dst, u32, size,
498 const void *, unsafe_ptr)
499{
500 int ret;
501
502 /*
503 * The strncpy_from_unsafe() call will likely not fill the entire
504 * buffer, but that's okay in this circumstance as we're probing
505 * arbitrary memory anyway similar to bpf_probe_read() and might
506 * as well probe the stack. Thus, memory is explicitly cleared
507 * only in error case, so that improper users ignoring return
508 * code altogether don't copy garbage; otherwise length of string
509 * is returned that can be used for bpf_perf_event_output() et al.
510 */
511 ret = strncpy_from_unsafe(dst, unsafe_ptr, size);
512 if (unlikely(ret < 0))
513 memset(dst, 0, size);
514
515 return ret;
516}
517
518static const struct bpf_func_proto bpf_probe_read_str_proto = {
519 .func = bpf_probe_read_str,
520 .gpl_only = true,
521 .ret_type = RET_INTEGER,
522 .arg1_type = ARG_PTR_TO_UNINIT_MEM,
Gianluca Borello5c4e1202017-11-22 18:32:55 +0000523 .arg2_type = ARG_CONST_SIZE_OR_ZERO,
Gianluca Borelloa5e8c072017-01-18 17:55:49 +0000524 .arg3_type = ARG_ANYTHING,
525};
526
Andrey Ignatov5e43f892018-03-30 15:08:00 -0700527static const struct bpf_func_proto *
528tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
Alexei Starovoitov25415172015-03-25 12:49:20 -0700529{
530 switch (func_id) {
531 case BPF_FUNC_map_lookup_elem:
532 return &bpf_map_lookup_elem_proto;
533 case BPF_FUNC_map_update_elem:
534 return &bpf_map_update_elem_proto;
535 case BPF_FUNC_map_delete_elem:
536 return &bpf_map_delete_elem_proto;
537 case BPF_FUNC_probe_read:
538 return &bpf_probe_read_proto;
Alexei Starovoitovd9847d32015-03-25 12:49:21 -0700539 case BPF_FUNC_ktime_get_ns:
540 return &bpf_ktime_get_ns_proto;
Alexei Starovoitov04fd61ab2015-05-19 16:59:03 -0700541 case BPF_FUNC_tail_call:
542 return &bpf_tail_call_proto;
Alexei Starovoitovffeedaf2015-06-12 19:39:12 -0700543 case BPF_FUNC_get_current_pid_tgid:
544 return &bpf_get_current_pid_tgid_proto;
Alexei Starovoitov606274c2016-07-06 22:38:36 -0700545 case BPF_FUNC_get_current_task:
546 return &bpf_get_current_task_proto;
Alexei Starovoitovffeedaf2015-06-12 19:39:12 -0700547 case BPF_FUNC_get_current_uid_gid:
548 return &bpf_get_current_uid_gid_proto;
549 case BPF_FUNC_get_current_comm:
550 return &bpf_get_current_comm_proto;
Alexei Starovoitov9c959c82015-03-25 12:49:22 -0700551 case BPF_FUNC_trace_printk:
Alexei Starovoitov0756ea32015-06-12 19:39:13 -0700552 return bpf_get_trace_printk_proto();
Alexei Starovoitovab1973d2015-06-12 19:39:14 -0700553 case BPF_FUNC_get_smp_processor_id:
554 return &bpf_get_smp_processor_id_proto;
Daniel Borkmann2d0e30c2016-10-21 12:46:33 +0200555 case BPF_FUNC_get_numa_node_id:
556 return &bpf_get_numa_node_id_proto;
Kaixu Xia35578d72015-08-06 07:02:35 +0000557 case BPF_FUNC_perf_event_read:
558 return &bpf_perf_event_read_proto;
Sargun Dhillon96ae5222016-07-25 05:54:46 -0700559 case BPF_FUNC_probe_write_user:
560 return bpf_get_probe_write_proto();
Sargun Dhillon60d20f92016-08-12 08:56:52 -0700561 case BPF_FUNC_current_task_under_cgroup:
562 return &bpf_current_task_under_cgroup_proto;
Alexei Starovoitov8937bd82016-08-11 18:17:18 -0700563 case BPF_FUNC_get_prandom_u32:
564 return &bpf_get_prandom_u32_proto;
Gianluca Borelloa5e8c072017-01-18 17:55:49 +0000565 case BPF_FUNC_probe_read_str:
566 return &bpf_probe_read_str_proto;
Alexei Starovoitov9fd82b612016-04-06 18:43:26 -0700567 default:
568 return NULL;
569 }
570}
571
Andrey Ignatov5e43f892018-03-30 15:08:00 -0700572static const struct bpf_func_proto *
573kprobe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
Alexei Starovoitov9fd82b612016-04-06 18:43:26 -0700574{
575 switch (func_id) {
Alexei Starovoitova43eec32015-10-20 20:02:34 -0700576 case BPF_FUNC_perf_event_output:
577 return &bpf_perf_event_output_proto;
Alexei Starovoitovd5a3b1f2016-02-17 19:58:58 -0800578 case BPF_FUNC_get_stackid:
579 return &bpf_get_stackid_proto;
Yonghong Songc195651e2018-04-28 22:28:08 -0700580 case BPF_FUNC_get_stack:
581 return &bpf_get_stack_proto;
Yonghong Song908432c2017-10-05 09:19:20 -0700582 case BPF_FUNC_perf_event_read_value:
583 return &bpf_perf_event_read_value_proto;
Josef Bacik9802d862017-12-11 11:36:48 -0500584#ifdef CONFIG_BPF_KPROBE_OVERRIDE
585 case BPF_FUNC_override_return:
586 return &bpf_override_return_proto;
587#endif
Alexei Starovoitov25415172015-03-25 12:49:20 -0700588 default:
Andrey Ignatov5e43f892018-03-30 15:08:00 -0700589 return tracing_func_proto(func_id, prog);
Alexei Starovoitov25415172015-03-25 12:49:20 -0700590 }
591}
592
593/* bpf+kprobe programs can access fields of 'struct pt_regs' */
Alexei Starovoitov19de99f2016-06-15 18:25:38 -0700594static bool kprobe_prog_is_valid_access(int off, int size, enum bpf_access_type type,
Andrey Ignatov5e43f892018-03-30 15:08:00 -0700595 const struct bpf_prog *prog,
Yonghong Song23994632017-06-22 15:07:39 -0700596 struct bpf_insn_access_aux *info)
Alexei Starovoitov25415172015-03-25 12:49:20 -0700597{
Alexei Starovoitov25415172015-03-25 12:49:20 -0700598 if (off < 0 || off >= sizeof(struct pt_regs))
599 return false;
Alexei Starovoitov25415172015-03-25 12:49:20 -0700600 if (type != BPF_READ)
601 return false;
Alexei Starovoitov25415172015-03-25 12:49:20 -0700602 if (off % size != 0)
603 return false;
Daniel Borkmann2d071c62017-01-15 01:34:25 +0100604 /*
605 * Assertion for 32 bit to make sure last 8 byte access
606 * (BPF_DW) to the last 4 byte member is disallowed.
607 */
608 if (off + size > sizeof(struct pt_regs))
609 return false;
610
Alexei Starovoitov25415172015-03-25 12:49:20 -0700611 return true;
612}
613
Jakub Kicinski7de16e32017-10-16 16:40:53 -0700614const struct bpf_verifier_ops kprobe_verifier_ops = {
Alexei Starovoitov25415172015-03-25 12:49:20 -0700615 .get_func_proto = kprobe_prog_func_proto,
616 .is_valid_access = kprobe_prog_is_valid_access,
617};
618
Jakub Kicinski7de16e32017-10-16 16:40:53 -0700619const struct bpf_prog_ops kprobe_prog_ops = {
620};
621
Daniel Borkmannf3694e02016-09-09 02:45:31 +0200622BPF_CALL_5(bpf_perf_event_output_tp, void *, tp_buff, struct bpf_map *, map,
623 u64, flags, void *, data, u64, size)
Alexei Starovoitov9940d672016-04-06 18:43:27 -0700624{
Daniel Borkmannf3694e02016-09-09 02:45:31 +0200625 struct pt_regs *regs = *(struct pt_regs **)tp_buff;
626
Alexei Starovoitov9940d672016-04-06 18:43:27 -0700627 /*
628 * r1 points to perf tracepoint buffer where first 8 bytes are hidden
629 * from bpf program and contain a pointer to 'struct pt_regs'. Fetch it
Daniel Borkmannf3694e02016-09-09 02:45:31 +0200630 * from there and call the same bpf_perf_event_output() helper inline.
Alexei Starovoitov9940d672016-04-06 18:43:27 -0700631 */
Daniel Borkmannf3694e02016-09-09 02:45:31 +0200632 return ____bpf_perf_event_output(regs, map, flags, data, size);
Alexei Starovoitov9940d672016-04-06 18:43:27 -0700633}
634
635static const struct bpf_func_proto bpf_perf_event_output_proto_tp = {
636 .func = bpf_perf_event_output_tp,
637 .gpl_only = true,
638 .ret_type = RET_INTEGER,
639 .arg1_type = ARG_PTR_TO_CTX,
640 .arg2_type = ARG_CONST_MAP_PTR,
641 .arg3_type = ARG_ANYTHING,
Alexei Starovoitov39f19ebb2017-01-09 10:19:50 -0800642 .arg4_type = ARG_PTR_TO_MEM,
Gianluca Borelloa60dd352017-11-22 18:32:56 +0000643 .arg5_type = ARG_CONST_SIZE_OR_ZERO,
Alexei Starovoitov9940d672016-04-06 18:43:27 -0700644};
645
Daniel Borkmannf3694e02016-09-09 02:45:31 +0200646BPF_CALL_3(bpf_get_stackid_tp, void *, tp_buff, struct bpf_map *, map,
647 u64, flags)
Alexei Starovoitov9940d672016-04-06 18:43:27 -0700648{
Daniel Borkmannf3694e02016-09-09 02:45:31 +0200649 struct pt_regs *regs = *(struct pt_regs **)tp_buff;
Alexei Starovoitov9940d672016-04-06 18:43:27 -0700650
Daniel Borkmannf3694e02016-09-09 02:45:31 +0200651 /*
652 * Same comment as in bpf_perf_event_output_tp(), only that this time
653 * the other helper's function body cannot be inlined due to being
654 * external, thus we need to call raw helper function.
655 */
656 return bpf_get_stackid((unsigned long) regs, (unsigned long) map,
657 flags, 0, 0);
Alexei Starovoitov9940d672016-04-06 18:43:27 -0700658}
659
660static const struct bpf_func_proto bpf_get_stackid_proto_tp = {
661 .func = bpf_get_stackid_tp,
662 .gpl_only = true,
663 .ret_type = RET_INTEGER,
664 .arg1_type = ARG_PTR_TO_CTX,
665 .arg2_type = ARG_CONST_MAP_PTR,
666 .arg3_type = ARG_ANYTHING,
667};
668
Yonghong Songc195651e2018-04-28 22:28:08 -0700669BPF_CALL_4(bpf_get_stack_tp, void *, tp_buff, void *, buf, u32, size,
670 u64, flags)
671{
672 struct pt_regs *regs = *(struct pt_regs **)tp_buff;
673
674 return bpf_get_stack((unsigned long) regs, (unsigned long) buf,
675 (unsigned long) size, flags, 0);
676}
677
678static const struct bpf_func_proto bpf_get_stack_proto_tp = {
679 .func = bpf_get_stack_tp,
680 .gpl_only = true,
681 .ret_type = RET_INTEGER,
682 .arg1_type = ARG_PTR_TO_CTX,
683 .arg2_type = ARG_PTR_TO_UNINIT_MEM,
684 .arg3_type = ARG_CONST_SIZE_OR_ZERO,
685 .arg4_type = ARG_ANYTHING,
686};
687
Andrey Ignatov5e43f892018-03-30 15:08:00 -0700688static const struct bpf_func_proto *
689tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
Alexei Starovoitov9fd82b612016-04-06 18:43:26 -0700690{
691 switch (func_id) {
692 case BPF_FUNC_perf_event_output:
Alexei Starovoitov9940d672016-04-06 18:43:27 -0700693 return &bpf_perf_event_output_proto_tp;
Alexei Starovoitov9fd82b612016-04-06 18:43:26 -0700694 case BPF_FUNC_get_stackid:
Alexei Starovoitov9940d672016-04-06 18:43:27 -0700695 return &bpf_get_stackid_proto_tp;
Yonghong Songc195651e2018-04-28 22:28:08 -0700696 case BPF_FUNC_get_stack:
697 return &bpf_get_stack_proto_tp;
Alexei Starovoitov9fd82b612016-04-06 18:43:26 -0700698 default:
Andrey Ignatov5e43f892018-03-30 15:08:00 -0700699 return tracing_func_proto(func_id, prog);
Alexei Starovoitov9fd82b612016-04-06 18:43:26 -0700700 }
701}
702
Alexei Starovoitov19de99f2016-06-15 18:25:38 -0700703static bool tp_prog_is_valid_access(int off, int size, enum bpf_access_type type,
Andrey Ignatov5e43f892018-03-30 15:08:00 -0700704 const struct bpf_prog *prog,
Yonghong Song23994632017-06-22 15:07:39 -0700705 struct bpf_insn_access_aux *info)
Alexei Starovoitov9fd82b612016-04-06 18:43:26 -0700706{
707 if (off < sizeof(void *) || off >= PERF_MAX_TRACE_SIZE)
708 return false;
709 if (type != BPF_READ)
710 return false;
711 if (off % size != 0)
712 return false;
Daniel Borkmann2d071c62017-01-15 01:34:25 +0100713
714 BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(__u64));
Alexei Starovoitov9fd82b612016-04-06 18:43:26 -0700715 return true;
716}
717
Jakub Kicinski7de16e32017-10-16 16:40:53 -0700718const struct bpf_verifier_ops tracepoint_verifier_ops = {
Alexei Starovoitov9fd82b612016-04-06 18:43:26 -0700719 .get_func_proto = tp_prog_func_proto,
720 .is_valid_access = tp_prog_is_valid_access,
721};
722
Jakub Kicinski7de16e32017-10-16 16:40:53 -0700723const struct bpf_prog_ops tracepoint_prog_ops = {
724};
725
Yonghong Songf005afe2018-03-20 11:19:17 -0700726BPF_CALL_3(bpf_perf_prog_read_value, struct bpf_perf_event_data_kern *, ctx,
727 struct bpf_perf_event_value *, buf, u32, size)
728{
729 int err = -EINVAL;
730
731 if (unlikely(size != sizeof(struct bpf_perf_event_value)))
732 goto clear;
733 err = perf_event_read_local(ctx->event, &buf->counter, &buf->enabled,
734 &buf->running);
735 if (unlikely(err))
736 goto clear;
737 return 0;
738clear:
739 memset(buf, 0, size);
740 return err;
741}
742
743static const struct bpf_func_proto bpf_perf_prog_read_value_proto = {
744 .func = bpf_perf_prog_read_value,
745 .gpl_only = true,
746 .ret_type = RET_INTEGER,
747 .arg1_type = ARG_PTR_TO_CTX,
748 .arg2_type = ARG_PTR_TO_UNINIT_MEM,
749 .arg3_type = ARG_CONST_SIZE,
750};
751
Andrey Ignatov5e43f892018-03-30 15:08:00 -0700752static const struct bpf_func_proto *
753pe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
Yonghong Songf005afe2018-03-20 11:19:17 -0700754{
755 switch (func_id) {
756 case BPF_FUNC_perf_event_output:
757 return &bpf_perf_event_output_proto_tp;
758 case BPF_FUNC_get_stackid:
759 return &bpf_get_stackid_proto_tp;
Yonghong Songc195651e2018-04-28 22:28:08 -0700760 case BPF_FUNC_get_stack:
761 return &bpf_get_stack_proto_tp;
Yonghong Songf005afe2018-03-20 11:19:17 -0700762 case BPF_FUNC_perf_prog_read_value:
763 return &bpf_perf_prog_read_value_proto;
764 default:
Andrey Ignatov5e43f892018-03-30 15:08:00 -0700765 return tracing_func_proto(func_id, prog);
Yonghong Songf005afe2018-03-20 11:19:17 -0700766 }
767}
768
Alexei Starovoitovc4f66992018-03-28 12:05:37 -0700769/*
770 * bpf_raw_tp_regs are separate from bpf_pt_regs used from skb/xdp
771 * to avoid potential recursive reuse issue when/if tracepoints are added
Yonghong Songc195651e2018-04-28 22:28:08 -0700772 * inside bpf_*_event_output, bpf_get_stackid and/or bpf_get_stack
Alexei Starovoitovc4f66992018-03-28 12:05:37 -0700773 */
774static DEFINE_PER_CPU(struct pt_regs, bpf_raw_tp_regs);
775BPF_CALL_5(bpf_perf_event_output_raw_tp, struct bpf_raw_tracepoint_args *, args,
776 struct bpf_map *, map, u64, flags, void *, data, u64, size)
777{
778 struct pt_regs *regs = this_cpu_ptr(&bpf_raw_tp_regs);
779
780 perf_fetch_caller_regs(regs);
781 return ____bpf_perf_event_output(regs, map, flags, data, size);
782}
783
784static const struct bpf_func_proto bpf_perf_event_output_proto_raw_tp = {
785 .func = bpf_perf_event_output_raw_tp,
786 .gpl_only = true,
787 .ret_type = RET_INTEGER,
788 .arg1_type = ARG_PTR_TO_CTX,
789 .arg2_type = ARG_CONST_MAP_PTR,
790 .arg3_type = ARG_ANYTHING,
791 .arg4_type = ARG_PTR_TO_MEM,
792 .arg5_type = ARG_CONST_SIZE_OR_ZERO,
793};
794
795BPF_CALL_3(bpf_get_stackid_raw_tp, struct bpf_raw_tracepoint_args *, args,
796 struct bpf_map *, map, u64, flags)
797{
798 struct pt_regs *regs = this_cpu_ptr(&bpf_raw_tp_regs);
799
800 perf_fetch_caller_regs(regs);
801 /* similar to bpf_perf_event_output_tp, but pt_regs fetched differently */
802 return bpf_get_stackid((unsigned long) regs, (unsigned long) map,
803 flags, 0, 0);
804}
805
806static const struct bpf_func_proto bpf_get_stackid_proto_raw_tp = {
807 .func = bpf_get_stackid_raw_tp,
808 .gpl_only = true,
809 .ret_type = RET_INTEGER,
810 .arg1_type = ARG_PTR_TO_CTX,
811 .arg2_type = ARG_CONST_MAP_PTR,
812 .arg3_type = ARG_ANYTHING,
813};
814
Yonghong Songc195651e2018-04-28 22:28:08 -0700815BPF_CALL_4(bpf_get_stack_raw_tp, struct bpf_raw_tracepoint_args *, args,
816 void *, buf, u32, size, u64, flags)
817{
818 struct pt_regs *regs = this_cpu_ptr(&bpf_raw_tp_regs);
819
820 perf_fetch_caller_regs(regs);
821 return bpf_get_stack((unsigned long) regs, (unsigned long) buf,
822 (unsigned long) size, flags, 0);
823}
824
825static const struct bpf_func_proto bpf_get_stack_proto_raw_tp = {
826 .func = bpf_get_stack_raw_tp,
827 .gpl_only = true,
828 .ret_type = RET_INTEGER,
829 .arg1_type = ARG_PTR_TO_CTX,
830 .arg2_type = ARG_PTR_TO_MEM,
831 .arg3_type = ARG_CONST_SIZE_OR_ZERO,
832 .arg4_type = ARG_ANYTHING,
833};
834
Andrey Ignatov5e43f892018-03-30 15:08:00 -0700835static const struct bpf_func_proto *
836raw_tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
Alexei Starovoitovc4f66992018-03-28 12:05:37 -0700837{
838 switch (func_id) {
839 case BPF_FUNC_perf_event_output:
840 return &bpf_perf_event_output_proto_raw_tp;
841 case BPF_FUNC_get_stackid:
842 return &bpf_get_stackid_proto_raw_tp;
Yonghong Songc195651e2018-04-28 22:28:08 -0700843 case BPF_FUNC_get_stack:
844 return &bpf_get_stack_proto_raw_tp;
Alexei Starovoitovc4f66992018-03-28 12:05:37 -0700845 default:
Andrey Ignatov5e43f892018-03-30 15:08:00 -0700846 return tracing_func_proto(func_id, prog);
Alexei Starovoitovc4f66992018-03-28 12:05:37 -0700847 }
848}
849
850static bool raw_tp_prog_is_valid_access(int off, int size,
851 enum bpf_access_type type,
Andrey Ignatov5e43f892018-03-30 15:08:00 -0700852 const struct bpf_prog *prog,
Alexei Starovoitovc4f66992018-03-28 12:05:37 -0700853 struct bpf_insn_access_aux *info)
854{
855 /* largest tracepoint in the kernel has 12 args */
856 if (off < 0 || off >= sizeof(__u64) * 12)
857 return false;
858 if (type != BPF_READ)
859 return false;
860 if (off % size != 0)
861 return false;
862 return true;
863}
864
865const struct bpf_verifier_ops raw_tracepoint_verifier_ops = {
866 .get_func_proto = raw_tp_prog_func_proto,
867 .is_valid_access = raw_tp_prog_is_valid_access,
868};
869
870const struct bpf_prog_ops raw_tracepoint_prog_ops = {
871};
872
Alexei Starovoitov0515e592016-09-01 18:37:22 -0700873static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type,
Andrey Ignatov5e43f892018-03-30 15:08:00 -0700874 const struct bpf_prog *prog,
Yonghong Song23994632017-06-22 15:07:39 -0700875 struct bpf_insn_access_aux *info)
Alexei Starovoitov0515e592016-09-01 18:37:22 -0700876{
Teng Qin95da0cd2018-03-06 10:55:01 -0800877 const int size_u64 = sizeof(u64);
Yonghong Song31fd8582017-06-13 15:52:13 -0700878
Alexei Starovoitov0515e592016-09-01 18:37:22 -0700879 if (off < 0 || off >= sizeof(struct bpf_perf_event_data))
880 return false;
881 if (type != BPF_READ)
882 return false;
883 if (off % size != 0)
884 return false;
Yonghong Song31fd8582017-06-13 15:52:13 -0700885
Daniel Borkmannf96da092017-07-02 02:13:27 +0200886 switch (off) {
887 case bpf_ctx_range(struct bpf_perf_event_data, sample_period):
Teng Qin95da0cd2018-03-06 10:55:01 -0800888 bpf_ctx_record_field_size(info, size_u64);
889 if (!bpf_ctx_narrow_access_ok(off, size, size_u64))
890 return false;
891 break;
892 case bpf_ctx_range(struct bpf_perf_event_data, addr):
893 bpf_ctx_record_field_size(info, size_u64);
894 if (!bpf_ctx_narrow_access_ok(off, size, size_u64))
Yonghong Song23994632017-06-22 15:07:39 -0700895 return false;
Daniel Borkmannf96da092017-07-02 02:13:27 +0200896 break;
897 default:
Alexei Starovoitov0515e592016-09-01 18:37:22 -0700898 if (size != sizeof(long))
899 return false;
900 }
Daniel Borkmannf96da092017-07-02 02:13:27 +0200901
Alexei Starovoitov0515e592016-09-01 18:37:22 -0700902 return true;
903}
904
Daniel Borkmann6b8cc1d2017-01-12 11:51:32 +0100905static u32 pe_prog_convert_ctx_access(enum bpf_access_type type,
906 const struct bpf_insn *si,
Alexei Starovoitov0515e592016-09-01 18:37:22 -0700907 struct bpf_insn *insn_buf,
Daniel Borkmannf96da092017-07-02 02:13:27 +0200908 struct bpf_prog *prog, u32 *target_size)
Alexei Starovoitov0515e592016-09-01 18:37:22 -0700909{
910 struct bpf_insn *insn = insn_buf;
911
Daniel Borkmann6b8cc1d2017-01-12 11:51:32 +0100912 switch (si->off) {
Alexei Starovoitov0515e592016-09-01 18:37:22 -0700913 case offsetof(struct bpf_perf_event_data, sample_period):
Daniel Borkmannf035a512016-09-09 02:45:29 +0200914 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern,
Daniel Borkmann6b8cc1d2017-01-12 11:51:32 +0100915 data), si->dst_reg, si->src_reg,
Alexei Starovoitov0515e592016-09-01 18:37:22 -0700916 offsetof(struct bpf_perf_event_data_kern, data));
Daniel Borkmann6b8cc1d2017-01-12 11:51:32 +0100917 *insn++ = BPF_LDX_MEM(BPF_DW, si->dst_reg, si->dst_reg,
Daniel Borkmannf96da092017-07-02 02:13:27 +0200918 bpf_target_off(struct perf_sample_data, period, 8,
919 target_size));
Alexei Starovoitov0515e592016-09-01 18:37:22 -0700920 break;
Teng Qin95da0cd2018-03-06 10:55:01 -0800921 case offsetof(struct bpf_perf_event_data, addr):
922 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern,
923 data), si->dst_reg, si->src_reg,
924 offsetof(struct bpf_perf_event_data_kern, data));
925 *insn++ = BPF_LDX_MEM(BPF_DW, si->dst_reg, si->dst_reg,
926 bpf_target_off(struct perf_sample_data, addr, 8,
927 target_size));
928 break;
Alexei Starovoitov0515e592016-09-01 18:37:22 -0700929 default:
Daniel Borkmannf035a512016-09-09 02:45:29 +0200930 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern,
Daniel Borkmann6b8cc1d2017-01-12 11:51:32 +0100931 regs), si->dst_reg, si->src_reg,
Alexei Starovoitov0515e592016-09-01 18:37:22 -0700932 offsetof(struct bpf_perf_event_data_kern, regs));
Daniel Borkmann6b8cc1d2017-01-12 11:51:32 +0100933 *insn++ = BPF_LDX_MEM(BPF_SIZEOF(long), si->dst_reg, si->dst_reg,
934 si->off);
Alexei Starovoitov0515e592016-09-01 18:37:22 -0700935 break;
936 }
937
938 return insn - insn_buf;
939}
940
Jakub Kicinski7de16e32017-10-16 16:40:53 -0700941const struct bpf_verifier_ops perf_event_verifier_ops = {
Yonghong Songf005afe2018-03-20 11:19:17 -0700942 .get_func_proto = pe_prog_func_proto,
Alexei Starovoitov0515e592016-09-01 18:37:22 -0700943 .is_valid_access = pe_prog_is_valid_access,
944 .convert_ctx_access = pe_prog_convert_ctx_access,
945};
Jakub Kicinski7de16e32017-10-16 16:40:53 -0700946
947const struct bpf_prog_ops perf_event_prog_ops = {
948};
Yonghong Songe87c6bc2017-10-23 23:53:08 -0700949
950static DEFINE_MUTEX(bpf_event_mutex);
951
Yonghong Songc8c088b2017-11-30 13:47:54 -0800952#define BPF_TRACE_MAX_PROGS 64
953
Yonghong Songe87c6bc2017-10-23 23:53:08 -0700954int perf_event_attach_bpf_prog(struct perf_event *event,
955 struct bpf_prog *prog)
956{
957 struct bpf_prog_array __rcu *old_array;
958 struct bpf_prog_array *new_array;
959 int ret = -EEXIST;
960
Josef Bacik9802d862017-12-11 11:36:48 -0500961 /*
Masami Hiramatsub4da3342018-01-13 02:54:04 +0900962 * Kprobe override only works if they are on the function entry,
963 * and only if they are on the opt-in list.
Josef Bacik9802d862017-12-11 11:36:48 -0500964 */
965 if (prog->kprobe_override &&
Masami Hiramatsub4da3342018-01-13 02:54:04 +0900966 (!trace_kprobe_on_func_entry(event->tp_event) ||
Josef Bacik9802d862017-12-11 11:36:48 -0500967 !trace_kprobe_error_injectable(event->tp_event)))
968 return -EINVAL;
969
Yonghong Songe87c6bc2017-10-23 23:53:08 -0700970 mutex_lock(&bpf_event_mutex);
971
972 if (event->prog)
Yonghong Song07c41a22017-10-30 13:50:22 -0700973 goto unlock;
Yonghong Songe87c6bc2017-10-23 23:53:08 -0700974
Yonghong Song07c41a22017-10-30 13:50:22 -0700975 old_array = event->tp_event->prog_array;
Yonghong Songc8c088b2017-11-30 13:47:54 -0800976 if (old_array &&
977 bpf_prog_array_length(old_array) >= BPF_TRACE_MAX_PROGS) {
978 ret = -E2BIG;
979 goto unlock;
980 }
981
Yonghong Songe87c6bc2017-10-23 23:53:08 -0700982 ret = bpf_prog_array_copy(old_array, NULL, prog, &new_array);
983 if (ret < 0)
Yonghong Song07c41a22017-10-30 13:50:22 -0700984 goto unlock;
Yonghong Songe87c6bc2017-10-23 23:53:08 -0700985
986 /* set the new array to event->tp_event and set event->prog */
987 event->prog = prog;
988 rcu_assign_pointer(event->tp_event->prog_array, new_array);
989 bpf_prog_array_free(old_array);
990
Yonghong Song07c41a22017-10-30 13:50:22 -0700991unlock:
Yonghong Songe87c6bc2017-10-23 23:53:08 -0700992 mutex_unlock(&bpf_event_mutex);
993 return ret;
994}
995
996void perf_event_detach_bpf_prog(struct perf_event *event)
997{
998 struct bpf_prog_array __rcu *old_array;
999 struct bpf_prog_array *new_array;
1000 int ret;
1001
1002 mutex_lock(&bpf_event_mutex);
1003
1004 if (!event->prog)
Yonghong Song07c41a22017-10-30 13:50:22 -07001005 goto unlock;
Yonghong Songe87c6bc2017-10-23 23:53:08 -07001006
Yonghong Song07c41a22017-10-30 13:50:22 -07001007 old_array = event->tp_event->prog_array;
Yonghong Songe87c6bc2017-10-23 23:53:08 -07001008 ret = bpf_prog_array_copy(old_array, event->prog, NULL, &new_array);
1009 if (ret < 0) {
1010 bpf_prog_array_delete_safe(old_array, event->prog);
1011 } else {
1012 rcu_assign_pointer(event->tp_event->prog_array, new_array);
1013 bpf_prog_array_free(old_array);
1014 }
1015
1016 bpf_prog_put(event->prog);
1017 event->prog = NULL;
1018
Yonghong Song07c41a22017-10-30 13:50:22 -07001019unlock:
Yonghong Songe87c6bc2017-10-23 23:53:08 -07001020 mutex_unlock(&bpf_event_mutex);
1021}
Yonghong Songf371b302017-12-11 11:39:02 -08001022
Yonghong Songf4e22982017-12-13 10:35:37 -08001023int perf_event_query_prog_array(struct perf_event *event, void __user *info)
Yonghong Songf371b302017-12-11 11:39:02 -08001024{
1025 struct perf_event_query_bpf __user *uquery = info;
1026 struct perf_event_query_bpf query = {};
Yonghong Song3a38bb92018-04-10 09:37:32 -07001027 u32 *ids, prog_cnt, ids_len;
Yonghong Songf371b302017-12-11 11:39:02 -08001028 int ret;
1029
1030 if (!capable(CAP_SYS_ADMIN))
1031 return -EPERM;
1032 if (event->attr.type != PERF_TYPE_TRACEPOINT)
1033 return -EINVAL;
1034 if (copy_from_user(&query, uquery, sizeof(query)))
1035 return -EFAULT;
Yonghong Song3a38bb92018-04-10 09:37:32 -07001036
1037 ids_len = query.ids_len;
1038 if (ids_len > BPF_TRACE_MAX_PROGS)
Daniel Borkmann9c481b92018-02-14 15:31:00 +01001039 return -E2BIG;
Yonghong Song3a38bb92018-04-10 09:37:32 -07001040 ids = kcalloc(ids_len, sizeof(u32), GFP_USER | __GFP_NOWARN);
1041 if (!ids)
1042 return -ENOMEM;
1043 /*
1044 * The above kcalloc returns ZERO_SIZE_PTR when ids_len = 0, which
1045 * is required when user only wants to check for uquery->prog_cnt.
1046 * There is no need to check for it since the case is handled
1047 * gracefully in bpf_prog_array_copy_info.
1048 */
Yonghong Songf371b302017-12-11 11:39:02 -08001049
1050 mutex_lock(&bpf_event_mutex);
1051 ret = bpf_prog_array_copy_info(event->tp_event->prog_array,
Yonghong Song3a38bb92018-04-10 09:37:32 -07001052 ids,
1053 ids_len,
1054 &prog_cnt);
Yonghong Songf371b302017-12-11 11:39:02 -08001055 mutex_unlock(&bpf_event_mutex);
1056
Yonghong Song3a38bb92018-04-10 09:37:32 -07001057 if (copy_to_user(&uquery->prog_cnt, &prog_cnt, sizeof(prog_cnt)) ||
1058 copy_to_user(uquery->ids, ids, ids_len * sizeof(u32)))
1059 ret = -EFAULT;
1060
1061 kfree(ids);
Yonghong Songf371b302017-12-11 11:39:02 -08001062 return ret;
1063}
Alexei Starovoitovc4f66992018-03-28 12:05:37 -07001064
1065extern struct bpf_raw_event_map __start__bpf_raw_tp[];
1066extern struct bpf_raw_event_map __stop__bpf_raw_tp[];
1067
1068struct bpf_raw_event_map *bpf_find_raw_tracepoint(const char *name)
1069{
1070 struct bpf_raw_event_map *btp = __start__bpf_raw_tp;
1071
1072 for (; btp < __stop__bpf_raw_tp; btp++) {
1073 if (!strcmp(btp->tp->name, name))
1074 return btp;
1075 }
1076 return NULL;
1077}
1078
1079static __always_inline
1080void __bpf_trace_run(struct bpf_prog *prog, u64 *args)
1081{
1082 rcu_read_lock();
1083 preempt_disable();
1084 (void) BPF_PROG_RUN(prog, args);
1085 preempt_enable();
1086 rcu_read_unlock();
1087}
1088
1089#define UNPACK(...) __VA_ARGS__
1090#define REPEAT_1(FN, DL, X, ...) FN(X)
1091#define REPEAT_2(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_1(FN, DL, __VA_ARGS__)
1092#define REPEAT_3(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_2(FN, DL, __VA_ARGS__)
1093#define REPEAT_4(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_3(FN, DL, __VA_ARGS__)
1094#define REPEAT_5(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_4(FN, DL, __VA_ARGS__)
1095#define REPEAT_6(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_5(FN, DL, __VA_ARGS__)
1096#define REPEAT_7(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_6(FN, DL, __VA_ARGS__)
1097#define REPEAT_8(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_7(FN, DL, __VA_ARGS__)
1098#define REPEAT_9(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_8(FN, DL, __VA_ARGS__)
1099#define REPEAT_10(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_9(FN, DL, __VA_ARGS__)
1100#define REPEAT_11(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_10(FN, DL, __VA_ARGS__)
1101#define REPEAT_12(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_11(FN, DL, __VA_ARGS__)
1102#define REPEAT(X, FN, DL, ...) REPEAT_##X(FN, DL, __VA_ARGS__)
1103
1104#define SARG(X) u64 arg##X
1105#define COPY(X) args[X] = arg##X
1106
1107#define __DL_COM (,)
1108#define __DL_SEM (;)
1109
1110#define __SEQ_0_11 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11
1111
1112#define BPF_TRACE_DEFN_x(x) \
1113 void bpf_trace_run##x(struct bpf_prog *prog, \
1114 REPEAT(x, SARG, __DL_COM, __SEQ_0_11)) \
1115 { \
1116 u64 args[x]; \
1117 REPEAT(x, COPY, __DL_SEM, __SEQ_0_11); \
1118 __bpf_trace_run(prog, args); \
1119 } \
1120 EXPORT_SYMBOL_GPL(bpf_trace_run##x)
1121BPF_TRACE_DEFN_x(1);
1122BPF_TRACE_DEFN_x(2);
1123BPF_TRACE_DEFN_x(3);
1124BPF_TRACE_DEFN_x(4);
1125BPF_TRACE_DEFN_x(5);
1126BPF_TRACE_DEFN_x(6);
1127BPF_TRACE_DEFN_x(7);
1128BPF_TRACE_DEFN_x(8);
1129BPF_TRACE_DEFN_x(9);
1130BPF_TRACE_DEFN_x(10);
1131BPF_TRACE_DEFN_x(11);
1132BPF_TRACE_DEFN_x(12);
1133
1134static int __bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog)
1135{
1136 struct tracepoint *tp = btp->tp;
1137
1138 /*
1139 * check that program doesn't access arguments beyond what's
1140 * available in this tracepoint
1141 */
1142 if (prog->aux->max_ctx_offset > btp->num_args * sizeof(u64))
1143 return -EINVAL;
1144
1145 return tracepoint_probe_register(tp, (void *)btp->bpf_func, prog);
1146}
1147
1148int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog)
1149{
1150 int err;
1151
1152 mutex_lock(&bpf_event_mutex);
1153 err = __bpf_probe_register(btp, prog);
1154 mutex_unlock(&bpf_event_mutex);
1155 return err;
1156}
1157
1158int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *prog)
1159{
1160 int err;
1161
1162 mutex_lock(&bpf_event_mutex);
1163 err = tracepoint_probe_unregister(btp->tp, (void *)btp->bpf_func, prog);
1164 mutex_unlock(&bpf_event_mutex);
1165 return err;
1166}
Yonghong Song41bdc4b2018-05-24 11:21:09 -07001167
1168int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id,
1169 u32 *fd_type, const char **buf,
1170 u64 *probe_offset, u64 *probe_addr)
1171{
1172 bool is_tracepoint, is_syscall_tp;
1173 struct bpf_prog *prog;
1174 int flags, err = 0;
1175
1176 prog = event->prog;
1177 if (!prog)
1178 return -ENOENT;
1179
1180 /* not supporting BPF_PROG_TYPE_PERF_EVENT yet */
1181 if (prog->type == BPF_PROG_TYPE_PERF_EVENT)
1182 return -EOPNOTSUPP;
1183
1184 *prog_id = prog->aux->id;
1185 flags = event->tp_event->flags;
1186 is_tracepoint = flags & TRACE_EVENT_FL_TRACEPOINT;
1187 is_syscall_tp = is_syscall_trace_event(event->tp_event);
1188
1189 if (is_tracepoint || is_syscall_tp) {
1190 *buf = is_tracepoint ? event->tp_event->tp->name
1191 : event->tp_event->name;
1192 *fd_type = BPF_FD_TYPE_TRACEPOINT;
1193 *probe_offset = 0x0;
1194 *probe_addr = 0x0;
1195 } else {
1196 /* kprobe/uprobe */
1197 err = -EOPNOTSUPP;
1198#ifdef CONFIG_KPROBE_EVENTS
1199 if (flags & TRACE_EVENT_FL_KPROBE)
1200 err = bpf_get_kprobe_info(event, fd_type, buf,
1201 probe_offset, probe_addr,
1202 event->attr.type == PERF_TYPE_TRACEPOINT);
1203#endif
1204#ifdef CONFIG_UPROBE_EVENTS
1205 if (flags & TRACE_EVENT_FL_UPROBE)
1206 err = bpf_get_uprobe_info(event, fd_type, buf,
1207 probe_offset,
1208 event->attr.type == PERF_TYPE_TRACEPOINT);
1209#endif
1210 }
1211
1212 return err;
1213}