blob: d94696198ef89b54ec41231ae2dc2254898c9c2f [file] [log] [blame]
Yonghong Songeaaacd22020-05-09 10:59:11 -07001// SPDX-License-Identifier: GPL-2.0-only
2/* Copyright (c) 2020 Facebook */
3
4#include <linux/init.h>
5#include <linux/namei.h>
6#include <linux/pid_namespace.h>
7#include <linux/fs.h>
8#include <linux/fdtable.h>
9#include <linux/filter.h>
Yonghong Song951cf362020-07-20 09:34:03 -070010#include <linux/btf_ids.h>
Song Liu7c7e3d32021-11-05 16:23:29 -070011#include "mmap_unlock_work.h"
Yonghong Songeaaacd22020-05-09 10:59:11 -070012
13struct bpf_iter_seq_task_common {
14 struct pid_namespace *ns;
15};
16
17struct bpf_iter_seq_task_info {
18 /* The first field must be struct bpf_iter_seq_task_common.
19 * this is assumed by {init, fini}_seq_pidns() callback functions.
20 */
21 struct bpf_iter_seq_task_common common;
22 u32 tid;
23};
24
25static struct task_struct *task_seq_get_next(struct pid_namespace *ns,
Yonghong Song203d7b052020-09-01 19:31:12 -070026 u32 *tid,
27 bool skip_if_dup_files)
Yonghong Songeaaacd22020-05-09 10:59:11 -070028{
29 struct task_struct *task = NULL;
30 struct pid *pid;
31
32 rcu_read_lock();
Andrii Nakryikoc70f34a2020-05-13 22:51:37 -070033retry:
Yonghong Songe60572b2020-08-18 15:23:10 -070034 pid = find_ge_pid(*tid, ns);
Andrii Nakryikoc70f34a2020-05-13 22:51:37 -070035 if (pid) {
Yonghong Songe60572b2020-08-18 15:23:10 -070036 *tid = pid_nr_ns(pid, ns);
Yonghong Songeaaacd22020-05-09 10:59:11 -070037 task = get_pid_task(pid, PIDTYPE_PID);
Andrii Nakryikoc70f34a2020-05-13 22:51:37 -070038 if (!task) {
39 ++*tid;
40 goto retry;
Jonathan Lemona61daaf2020-12-18 10:50:31 -080041 } else if (skip_if_dup_files && !thread_group_leader(task) &&
Yonghong Song203d7b052020-09-01 19:31:12 -070042 task->files == task->group_leader->files) {
43 put_task_struct(task);
44 task = NULL;
45 ++*tid;
46 goto retry;
Andrii Nakryikoc70f34a2020-05-13 22:51:37 -070047 }
48 }
Yonghong Songeaaacd22020-05-09 10:59:11 -070049 rcu_read_unlock();
50
51 return task;
52}
53
54static void *task_seq_start(struct seq_file *seq, loff_t *pos)
55{
56 struct bpf_iter_seq_task_info *info = seq->private;
57 struct task_struct *task;
58
Yonghong Song203d7b052020-09-01 19:31:12 -070059 task = task_seq_get_next(info->common.ns, &info->tid, false);
Yonghong Songeaaacd22020-05-09 10:59:11 -070060 if (!task)
61 return NULL;
62
Yonghong Song3f9969f2020-07-22 12:51:56 -070063 if (*pos == 0)
64 ++*pos;
Yonghong Songeaaacd22020-05-09 10:59:11 -070065 return task;
66}
67
68static void *task_seq_next(struct seq_file *seq, void *v, loff_t *pos)
69{
70 struct bpf_iter_seq_task_info *info = seq->private;
71 struct task_struct *task;
72
73 ++*pos;
74 ++info->tid;
75 put_task_struct((struct task_struct *)v);
Yonghong Song203d7b052020-09-01 19:31:12 -070076 task = task_seq_get_next(info->common.ns, &info->tid, false);
Yonghong Songeaaacd22020-05-09 10:59:11 -070077 if (!task)
78 return NULL;
79
80 return task;
81}
82
83struct bpf_iter__task {
84 __bpf_md_ptr(struct bpf_iter_meta *, meta);
85 __bpf_md_ptr(struct task_struct *, task);
86};
87
88DEFINE_BPF_ITER_FUNC(task, struct bpf_iter_meta *meta, struct task_struct *task)
89
90static int __task_seq_show(struct seq_file *seq, struct task_struct *task,
91 bool in_stop)
92{
93 struct bpf_iter_meta meta;
94 struct bpf_iter__task ctx;
95 struct bpf_prog *prog;
96
97 meta.seq = seq;
98 prog = bpf_iter_get_info(&meta, in_stop);
99 if (!prog)
100 return 0;
101
102 meta.seq = seq;
103 ctx.meta = &meta;
104 ctx.task = task;
105 return bpf_iter_run_prog(prog, &ctx);
106}
107
108static int task_seq_show(struct seq_file *seq, void *v)
109{
110 return __task_seq_show(seq, v, false);
111}
112
113static void task_seq_stop(struct seq_file *seq, void *v)
114{
115 if (!v)
116 (void)__task_seq_show(seq, v, true);
117 else
118 put_task_struct((struct task_struct *)v);
119}
120
121static const struct seq_operations task_seq_ops = {
122 .start = task_seq_start,
123 .next = task_seq_next,
124 .stop = task_seq_stop,
125 .show = task_seq_show,
126};
127
128struct bpf_iter_seq_task_file_info {
129 /* The first field must be struct bpf_iter_seq_task_common.
130 * this is assumed by {init, fini}_seq_pidns() callback functions.
131 */
132 struct bpf_iter_seq_task_common common;
133 struct task_struct *task;
Yonghong Songeaaacd22020-05-09 10:59:11 -0700134 u32 tid;
135 u32 fd;
136};
137
138static struct file *
Song Liu91b2db22020-11-19 16:28:33 -0800139task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info)
Yonghong Songeaaacd22020-05-09 10:59:11 -0700140{
141 struct pid_namespace *ns = info->common.ns;
Eric W. Biederman66ed5942020-11-20 17:14:33 -0600142 u32 curr_tid = info->tid;
Yonghong Songeaaacd22020-05-09 10:59:11 -0700143 struct task_struct *curr_task;
Eric W. Biederman66ed5942020-11-20 17:14:33 -0600144 unsigned int curr_fd = info->fd;
Yonghong Songeaaacd22020-05-09 10:59:11 -0700145
146 /* If this function returns a non-NULL file object,
Eric W. Biederman66ed5942020-11-20 17:14:33 -0600147 * it held a reference to the task/file.
Yonghong Songeaaacd22020-05-09 10:59:11 -0700148 * Otherwise, it does not hold any reference.
149 */
150again:
Song Liu91b2db22020-11-19 16:28:33 -0800151 if (info->task) {
152 curr_task = info->task;
Yonghong Songeaaacd22020-05-09 10:59:11 -0700153 curr_fd = info->fd;
154 } else {
David S. Miller4bfc4712020-12-28 15:20:48 -0800155 curr_task = task_seq_get_next(ns, &curr_tid, true);
156 if (!curr_task) {
157 info->task = NULL;
158 info->tid = curr_tid;
159 return NULL;
160 }
Yonghong Songeaaacd22020-05-09 10:59:11 -0700161
David S. Miller4bfc4712020-12-28 15:20:48 -0800162 /* set info->task and info->tid */
Yonghong Song04901aa2020-12-30 21:24:18 -0800163 info->task = curr_task;
Yonghong Songeaaacd22020-05-09 10:59:11 -0700164 if (curr_tid == info->tid) {
165 curr_fd = info->fd;
166 } else {
167 info->tid = curr_tid;
168 curr_fd = 0;
169 }
170 }
171
172 rcu_read_lock();
Eric W. Biederman66ed5942020-11-20 17:14:33 -0600173 for (;; curr_fd++) {
Yonghong Songeaaacd22020-05-09 10:59:11 -0700174 struct file *f;
Eric W. Biederman66ed5942020-11-20 17:14:33 -0600175 f = task_lookup_next_fd_rcu(curr_task, &curr_fd);
Yonghong Songeaaacd22020-05-09 10:59:11 -0700176 if (!f)
Eric W. Biederman66ed5942020-11-20 17:14:33 -0600177 break;
Yonghong Songcf28f3b2020-08-17 10:42:14 -0700178 if (!get_file_rcu(f))
179 continue;
Yonghong Songeaaacd22020-05-09 10:59:11 -0700180
181 /* set info->fd */
182 info->fd = curr_fd;
Yonghong Songeaaacd22020-05-09 10:59:11 -0700183 rcu_read_unlock();
184 return f;
185 }
186
187 /* the current task is done, go to the next task */
188 rcu_read_unlock();
Yonghong Songeaaacd22020-05-09 10:59:11 -0700189 put_task_struct(curr_task);
Song Liu91b2db22020-11-19 16:28:33 -0800190 info->task = NULL;
Yonghong Songeaaacd22020-05-09 10:59:11 -0700191 info->fd = 0;
192 curr_tid = ++(info->tid);
193 goto again;
194}
195
196static void *task_file_seq_start(struct seq_file *seq, loff_t *pos)
197{
198 struct bpf_iter_seq_task_file_info *info = seq->private;
Yonghong Songeaaacd22020-05-09 10:59:11 -0700199 struct file *file;
200
Song Liu91b2db22020-11-19 16:28:33 -0800201 info->task = NULL;
Song Liu91b2db22020-11-19 16:28:33 -0800202 file = task_file_seq_get_next(info);
203 if (file && *pos == 0)
Yonghong Song3f9969f2020-07-22 12:51:56 -0700204 ++*pos;
Yonghong Songeaaacd22020-05-09 10:59:11 -0700205
206 return file;
207}
208
209static void *task_file_seq_next(struct seq_file *seq, void *v, loff_t *pos)
210{
211 struct bpf_iter_seq_task_file_info *info = seq->private;
Yonghong Songeaaacd22020-05-09 10:59:11 -0700212
213 ++*pos;
214 ++info->fd;
215 fput((struct file *)v);
Song Liu91b2db22020-11-19 16:28:33 -0800216 return task_file_seq_get_next(info);
Yonghong Songeaaacd22020-05-09 10:59:11 -0700217}
218
219struct bpf_iter__task_file {
220 __bpf_md_ptr(struct bpf_iter_meta *, meta);
221 __bpf_md_ptr(struct task_struct *, task);
222 u32 fd __aligned(8);
223 __bpf_md_ptr(struct file *, file);
224};
225
226DEFINE_BPF_ITER_FUNC(task_file, struct bpf_iter_meta *meta,
227 struct task_struct *task, u32 fd,
228 struct file *file)
229
230static int __task_file_seq_show(struct seq_file *seq, struct file *file,
231 bool in_stop)
232{
233 struct bpf_iter_seq_task_file_info *info = seq->private;
234 struct bpf_iter__task_file ctx;
235 struct bpf_iter_meta meta;
236 struct bpf_prog *prog;
237
238 meta.seq = seq;
239 prog = bpf_iter_get_info(&meta, in_stop);
240 if (!prog)
241 return 0;
242
243 ctx.meta = &meta;
244 ctx.task = info->task;
245 ctx.fd = info->fd;
246 ctx.file = file;
247 return bpf_iter_run_prog(prog, &ctx);
248}
249
250static int task_file_seq_show(struct seq_file *seq, void *v)
251{
252 return __task_file_seq_show(seq, v, false);
253}
254
255static void task_file_seq_stop(struct seq_file *seq, void *v)
256{
257 struct bpf_iter_seq_task_file_info *info = seq->private;
258
259 if (!v) {
260 (void)__task_file_seq_show(seq, v, true);
261 } else {
262 fput((struct file *)v);
Yonghong Songeaaacd22020-05-09 10:59:11 -0700263 put_task_struct(info->task);
Yonghong Songeaaacd22020-05-09 10:59:11 -0700264 info->task = NULL;
265 }
266}
267
Yonghong Songf9c79272020-07-23 11:41:10 -0700268static int init_seq_pidns(void *priv_data, struct bpf_iter_aux_info *aux)
Yonghong Songeaaacd22020-05-09 10:59:11 -0700269{
270 struct bpf_iter_seq_task_common *common = priv_data;
271
272 common->ns = get_pid_ns(task_active_pid_ns(current));
273 return 0;
274}
275
276static void fini_seq_pidns(void *priv_data)
277{
278 struct bpf_iter_seq_task_common *common = priv_data;
279
280 put_pid_ns(common->ns);
281}
282
283static const struct seq_operations task_file_seq_ops = {
284 .start = task_file_seq_start,
285 .next = task_file_seq_next,
286 .stop = task_file_seq_stop,
287 .show = task_file_seq_show,
288};
289
Song Liu3a7b35b2021-02-12 10:31:05 -0800290struct bpf_iter_seq_task_vma_info {
291 /* The first field must be struct bpf_iter_seq_task_common.
292 * this is assumed by {init, fini}_seq_pidns() callback functions.
293 */
294 struct bpf_iter_seq_task_common common;
295 struct task_struct *task;
296 struct vm_area_struct *vma;
297 u32 tid;
298 unsigned long prev_vm_start;
299 unsigned long prev_vm_end;
300};
301
302enum bpf_task_vma_iter_find_op {
303 task_vma_iter_first_vma, /* use mm->mmap */
304 task_vma_iter_next_vma, /* use curr_vma->vm_next */
305 task_vma_iter_find_vma, /* use find_vma() to find next vma */
306};
307
308static struct vm_area_struct *
309task_vma_seq_get_next(struct bpf_iter_seq_task_vma_info *info)
310{
311 struct pid_namespace *ns = info->common.ns;
312 enum bpf_task_vma_iter_find_op op;
313 struct vm_area_struct *curr_vma;
314 struct task_struct *curr_task;
315 u32 curr_tid = info->tid;
316
317 /* If this function returns a non-NULL vma, it holds a reference to
318 * the task_struct, and holds read lock on vma->mm->mmap_lock.
319 * If this function returns NULL, it does not hold any reference or
320 * lock.
321 */
322 if (info->task) {
323 curr_task = info->task;
324 curr_vma = info->vma;
325 /* In case of lock contention, drop mmap_lock to unblock
326 * the writer.
327 *
328 * After relock, call find(mm, prev_vm_end - 1) to find
329 * new vma to process.
330 *
331 * +------+------+-----------+
332 * | VMA1 | VMA2 | VMA3 |
333 * +------+------+-----------+
334 * | | | |
335 * 4k 8k 16k 400k
336 *
337 * For example, curr_vma == VMA2. Before unlock, we set
338 *
339 * prev_vm_start = 8k
340 * prev_vm_end = 16k
341 *
342 * There are a few cases:
343 *
344 * 1) VMA2 is freed, but VMA3 exists.
345 *
346 * find_vma() will return VMA3, just process VMA3.
347 *
348 * 2) VMA2 still exists.
349 *
350 * find_vma() will return VMA2, process VMA2->next.
351 *
352 * 3) no more vma in this mm.
353 *
354 * Process the next task.
355 *
356 * 4) find_vma() returns a different vma, VMA2'.
357 *
358 * 4.1) If VMA2 covers same range as VMA2', skip VMA2',
359 * because we already covered the range;
360 * 4.2) VMA2 and VMA2' covers different ranges, process
361 * VMA2'.
362 */
363 if (mmap_lock_is_contended(curr_task->mm)) {
364 info->prev_vm_start = curr_vma->vm_start;
365 info->prev_vm_end = curr_vma->vm_end;
366 op = task_vma_iter_find_vma;
367 mmap_read_unlock(curr_task->mm);
368 if (mmap_read_lock_killable(curr_task->mm))
369 goto finish;
370 } else {
371 op = task_vma_iter_next_vma;
372 }
373 } else {
374again:
375 curr_task = task_seq_get_next(ns, &curr_tid, true);
376 if (!curr_task) {
377 info->tid = curr_tid + 1;
378 goto finish;
379 }
380
381 if (curr_tid != info->tid) {
382 info->tid = curr_tid;
383 /* new task, process the first vma */
384 op = task_vma_iter_first_vma;
385 } else {
386 /* Found the same tid, which means the user space
387 * finished data in previous buffer and read more.
388 * We dropped mmap_lock before returning to user
389 * space, so it is necessary to use find_vma() to
390 * find the next vma to process.
391 */
392 op = task_vma_iter_find_vma;
393 }
394
395 if (!curr_task->mm)
396 goto next_task;
397
398 if (mmap_read_lock_killable(curr_task->mm))
399 goto finish;
400 }
401
402 switch (op) {
403 case task_vma_iter_first_vma:
404 curr_vma = curr_task->mm->mmap;
405 break;
406 case task_vma_iter_next_vma:
407 curr_vma = curr_vma->vm_next;
408 break;
409 case task_vma_iter_find_vma:
410 /* We dropped mmap_lock so it is necessary to use find_vma
411 * to find the next vma. This is similar to the mechanism
412 * in show_smaps_rollup().
413 */
414 curr_vma = find_vma(curr_task->mm, info->prev_vm_end - 1);
415 /* case 1) and 4.2) above just use curr_vma */
416
417 /* check for case 2) or case 4.1) above */
418 if (curr_vma &&
419 curr_vma->vm_start == info->prev_vm_start &&
420 curr_vma->vm_end == info->prev_vm_end)
421 curr_vma = curr_vma->vm_next;
422 break;
423 }
424 if (!curr_vma) {
425 /* case 3) above, or case 2) 4.1) with vma->next == NULL */
426 mmap_read_unlock(curr_task->mm);
427 goto next_task;
428 }
429 info->task = curr_task;
430 info->vma = curr_vma;
431 return curr_vma;
432
433next_task:
434 put_task_struct(curr_task);
435 info->task = NULL;
436 curr_tid++;
437 goto again;
438
439finish:
440 if (curr_task)
441 put_task_struct(curr_task);
442 info->task = NULL;
443 info->vma = NULL;
444 return NULL;
445}
446
447static void *task_vma_seq_start(struct seq_file *seq, loff_t *pos)
448{
449 struct bpf_iter_seq_task_vma_info *info = seq->private;
450 struct vm_area_struct *vma;
451
452 vma = task_vma_seq_get_next(info);
453 if (vma && *pos == 0)
454 ++*pos;
455
456 return vma;
457}
458
459static void *task_vma_seq_next(struct seq_file *seq, void *v, loff_t *pos)
460{
461 struct bpf_iter_seq_task_vma_info *info = seq->private;
462
463 ++*pos;
464 return task_vma_seq_get_next(info);
465}
466
467struct bpf_iter__task_vma {
468 __bpf_md_ptr(struct bpf_iter_meta *, meta);
469 __bpf_md_ptr(struct task_struct *, task);
470 __bpf_md_ptr(struct vm_area_struct *, vma);
471};
472
473DEFINE_BPF_ITER_FUNC(task_vma, struct bpf_iter_meta *meta,
474 struct task_struct *task, struct vm_area_struct *vma)
475
476static int __task_vma_seq_show(struct seq_file *seq, bool in_stop)
477{
478 struct bpf_iter_seq_task_vma_info *info = seq->private;
479 struct bpf_iter__task_vma ctx;
480 struct bpf_iter_meta meta;
481 struct bpf_prog *prog;
482
483 meta.seq = seq;
484 prog = bpf_iter_get_info(&meta, in_stop);
485 if (!prog)
486 return 0;
487
488 ctx.meta = &meta;
489 ctx.task = info->task;
490 ctx.vma = info->vma;
491 return bpf_iter_run_prog(prog, &ctx);
492}
493
494static int task_vma_seq_show(struct seq_file *seq, void *v)
495{
496 return __task_vma_seq_show(seq, false);
497}
498
499static void task_vma_seq_stop(struct seq_file *seq, void *v)
500{
501 struct bpf_iter_seq_task_vma_info *info = seq->private;
502
503 if (!v) {
504 (void)__task_vma_seq_show(seq, true);
505 } else {
506 /* info->vma has not been seen by the BPF program. If the
507 * user space reads more, task_vma_seq_get_next should
508 * return this vma again. Set prev_vm_start to ~0UL,
509 * so that we don't skip the vma returned by the next
510 * find_vma() (case task_vma_iter_find_vma in
511 * task_vma_seq_get_next()).
512 */
513 info->prev_vm_start = ~0UL;
514 info->prev_vm_end = info->vma->vm_end;
515 mmap_read_unlock(info->task->mm);
516 put_task_struct(info->task);
517 info->task = NULL;
518 }
519}
520
521static const struct seq_operations task_vma_seq_ops = {
522 .start = task_vma_seq_start,
523 .next = task_vma_seq_next,
524 .stop = task_vma_seq_stop,
525 .show = task_vma_seq_show,
526};
527
Yonghong Song14fc6bd62020-07-23 11:41:09 -0700528static const struct bpf_iter_seq_info task_seq_info = {
Yonghong Song15172a42020-05-13 11:02:19 -0700529 .seq_ops = &task_seq_ops,
530 .init_seq_private = init_seq_pidns,
531 .fini_seq_private = fini_seq_pidns,
532 .seq_priv_size = sizeof(struct bpf_iter_seq_task_info),
Yonghong Song14fc6bd62020-07-23 11:41:09 -0700533};
534
535static struct bpf_iter_reg task_reg_info = {
536 .target = "task",
Yonghong Songcf83b2d2020-10-27 23:10:54 -0700537 .feature = BPF_ITER_RESCHED,
Yonghong Song3c32cc12020-05-13 11:02:21 -0700538 .ctx_arg_info_size = 1,
539 .ctx_arg_info = {
540 { offsetof(struct bpf_iter__task, task),
541 PTR_TO_BTF_ID_OR_NULL },
542 },
Yonghong Song14fc6bd62020-07-23 11:41:09 -0700543 .seq_info = &task_seq_info,
Yonghong Song15172a42020-05-13 11:02:19 -0700544};
545
Yonghong Song14fc6bd62020-07-23 11:41:09 -0700546static const struct bpf_iter_seq_info task_file_seq_info = {
Yonghong Song15172a42020-05-13 11:02:19 -0700547 .seq_ops = &task_file_seq_ops,
548 .init_seq_private = init_seq_pidns,
549 .fini_seq_private = fini_seq_pidns,
550 .seq_priv_size = sizeof(struct bpf_iter_seq_task_file_info),
Yonghong Song14fc6bd62020-07-23 11:41:09 -0700551};
552
553static struct bpf_iter_reg task_file_reg_info = {
554 .target = "task_file",
Yonghong Songcf83b2d2020-10-27 23:10:54 -0700555 .feature = BPF_ITER_RESCHED,
Yonghong Song3c32cc12020-05-13 11:02:21 -0700556 .ctx_arg_info_size = 2,
557 .ctx_arg_info = {
558 { offsetof(struct bpf_iter__task_file, task),
559 PTR_TO_BTF_ID_OR_NULL },
560 { offsetof(struct bpf_iter__task_file, file),
561 PTR_TO_BTF_ID_OR_NULL },
562 },
Yonghong Song14fc6bd62020-07-23 11:41:09 -0700563 .seq_info = &task_file_seq_info,
Yonghong Song15172a42020-05-13 11:02:19 -0700564};
565
Song Liu3a7b35b2021-02-12 10:31:05 -0800566static const struct bpf_iter_seq_info task_vma_seq_info = {
567 .seq_ops = &task_vma_seq_ops,
568 .init_seq_private = init_seq_pidns,
569 .fini_seq_private = fini_seq_pidns,
570 .seq_priv_size = sizeof(struct bpf_iter_seq_task_vma_info),
571};
572
573static struct bpf_iter_reg task_vma_reg_info = {
574 .target = "task_vma",
575 .feature = BPF_ITER_RESCHED,
576 .ctx_arg_info_size = 2,
577 .ctx_arg_info = {
578 { offsetof(struct bpf_iter__task_vma, task),
579 PTR_TO_BTF_ID_OR_NULL },
580 { offsetof(struct bpf_iter__task_vma, vma),
581 PTR_TO_BTF_ID_OR_NULL },
582 },
583 .seq_info = &task_vma_seq_info,
584};
585
Song Liu7c7e3d32021-11-05 16:23:29 -0700586BPF_CALL_5(bpf_find_vma, struct task_struct *, task, u64, start,
587 bpf_callback_t, callback_fn, void *, callback_ctx, u64, flags)
588{
589 struct mmap_unlock_irq_work *work = NULL;
590 struct vm_area_struct *vma;
591 bool irq_work_busy = false;
592 struct mm_struct *mm;
593 int ret = -ENOENT;
594
595 if (flags)
596 return -EINVAL;
597
598 if (!task)
599 return -ENOENT;
600
601 mm = task->mm;
602 if (!mm)
603 return -ENOENT;
604
605 irq_work_busy = bpf_mmap_unlock_get_irq_work(&work);
606
607 if (irq_work_busy || !mmap_read_trylock(mm))
608 return -EBUSY;
609
610 vma = find_vma(mm, start);
611
612 if (vma && vma->vm_start <= start && vma->vm_end > start) {
613 callback_fn((u64)(long)task, (u64)(long)vma,
614 (u64)(long)callback_ctx, 0, 0);
615 ret = 0;
616 }
617 bpf_mmap_unlock_mm(work, mm);
618 return ret;
619}
620
621const struct bpf_func_proto bpf_find_vma_proto = {
622 .func = bpf_find_vma,
623 .ret_type = RET_INTEGER,
624 .arg1_type = ARG_PTR_TO_BTF_ID,
Song Liud19ddb42021-11-12 07:02:43 -0800625 .arg1_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK],
Song Liu7c7e3d32021-11-05 16:23:29 -0700626 .arg2_type = ARG_ANYTHING,
627 .arg3_type = ARG_PTR_TO_FUNC,
628 .arg4_type = ARG_PTR_TO_STACK_OR_NULL,
629 .arg5_type = ARG_ANYTHING,
630};
631
632DEFINE_PER_CPU(struct mmap_unlock_irq_work, mmap_unlock_work);
633
634static void do_mmap_read_unlock(struct irq_work *entry)
635{
636 struct mmap_unlock_irq_work *work;
637
638 if (WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_RT)))
639 return;
640
641 work = container_of(entry, struct mmap_unlock_irq_work, irq_work);
642 mmap_read_unlock_non_owner(work->mm);
643}
644
Yonghong Songeaaacd22020-05-09 10:59:11 -0700645static int __init task_iter_init(void)
646{
Song Liu7c7e3d32021-11-05 16:23:29 -0700647 struct mmap_unlock_irq_work *work;
648 int ret, cpu;
649
650 for_each_possible_cpu(cpu) {
651 work = per_cpu_ptr(&mmap_unlock_work, cpu);
652 init_irq_work(&work->irq_work, do_mmap_read_unlock);
653 }
Yonghong Songeaaacd22020-05-09 10:59:11 -0700654
Song Liud19ddb42021-11-12 07:02:43 -0800655 task_reg_info.ctx_arg_info[0].btf_id = btf_tracing_ids[BTF_TRACING_TYPE_TASK];
Yonghong Songeaaacd22020-05-09 10:59:11 -0700656 ret = bpf_iter_reg_target(&task_reg_info);
657 if (ret)
658 return ret;
659
Song Liud19ddb42021-11-12 07:02:43 -0800660 task_file_reg_info.ctx_arg_info[0].btf_id = btf_tracing_ids[BTF_TRACING_TYPE_TASK];
661 task_file_reg_info.ctx_arg_info[1].btf_id = btf_tracing_ids[BTF_TRACING_TYPE_FILE];
Song Liu3a7b35b2021-02-12 10:31:05 -0800662 ret = bpf_iter_reg_target(&task_file_reg_info);
663 if (ret)
664 return ret;
665
Song Liud19ddb42021-11-12 07:02:43 -0800666 task_vma_reg_info.ctx_arg_info[0].btf_id = btf_tracing_ids[BTF_TRACING_TYPE_TASK];
667 task_vma_reg_info.ctx_arg_info[1].btf_id = btf_tracing_ids[BTF_TRACING_TYPE_VMA];
Song Liu3a7b35b2021-02-12 10:31:05 -0800668 return bpf_iter_reg_target(&task_vma_reg_info);
Yonghong Songeaaacd22020-05-09 10:59:11 -0700669}
670late_initcall(task_iter_init);