blob: 05853b0b883181fa4dc784166f2828fb17cbbedf [file] [log] [blame]
Roman Gushchin84092dbc2018-05-11 19:03:49 +01001/* SPDX-License-Identifier: GPL-2.0 */
2
3#define _GNU_SOURCE
4
5#include <errno.h>
6#include <fcntl.h>
7#include <linux/limits.h>
8#include <signal.h>
9#include <stdio.h>
10#include <stdlib.h>
11#include <string.h>
12#include <sys/stat.h>
13#include <sys/types.h>
14#include <sys/wait.h>
15#include <unistd.h>
16
17#include "cgroup_util.h"
Christian Brauner9bd59102020-02-05 14:26:23 +010018#include "../clone3/clone3_selftests.h"
Roman Gushchin84092dbc2018-05-11 19:03:49 +010019
20static ssize_t read_text(const char *path, char *buf, size_t max_len)
21{
22 ssize_t len;
23 int fd;
24
25 fd = open(path, O_RDONLY);
26 if (fd < 0)
27 return fd;
28
29 len = read(fd, buf, max_len - 1);
30 if (len < 0)
31 goto out;
32
33 buf[len] = 0;
34out:
35 close(fd);
36 return len;
37}
38
Dan Carpenter53c3daf2018-06-07 11:30:02 +030039static ssize_t write_text(const char *path, char *buf, ssize_t len)
Roman Gushchin84092dbc2018-05-11 19:03:49 +010040{
41 int fd;
42
43 fd = open(path, O_WRONLY | O_APPEND);
44 if (fd < 0)
45 return fd;
46
47 len = write(fd, buf, len);
48 if (len < 0) {
49 close(fd);
50 return len;
51 }
52
53 close(fd);
54
55 return len;
56}
57
58char *cg_name(const char *root, const char *name)
59{
60 size_t len = strlen(root) + strlen(name) + 2;
61 char *ret = malloc(len);
62
Roman Gushchinadb31be2018-05-22 11:10:31 +010063 snprintf(ret, len, "%s/%s", root, name);
Roman Gushchin84092dbc2018-05-11 19:03:49 +010064
65 return ret;
66}
67
68char *cg_name_indexed(const char *root, const char *name, int index)
69{
70 size_t len = strlen(root) + strlen(name) + 10;
71 char *ret = malloc(len);
72
Roman Gushchinadb31be2018-05-22 11:10:31 +010073 snprintf(ret, len, "%s/%s_%d", root, name, index);
Roman Gushchin84092dbc2018-05-11 19:03:49 +010074
75 return ret;
76}
77
Roman Gushchin5313bfe2019-04-19 10:03:06 -070078char *cg_control(const char *cgroup, const char *control)
79{
80 size_t len = strlen(cgroup) + strlen(control) + 2;
81 char *ret = malloc(len);
82
83 snprintf(ret, len, "%s/%s", cgroup, control);
84
85 return ret;
86}
87
Roman Gushchin84092dbc2018-05-11 19:03:49 +010088int cg_read(const char *cgroup, const char *control, char *buf, size_t len)
89{
90 char path[PATH_MAX];
91
92 snprintf(path, sizeof(path), "%s/%s", cgroup, control);
93
94 if (read_text(path, buf, len) >= 0)
95 return 0;
96
97 return -1;
98}
99
100int cg_read_strcmp(const char *cgroup, const char *control,
101 const char *expected)
102{
Jay Kamat48c2bb02018-09-07 14:34:04 -0700103 size_t size;
Roman Gushchin84092dbc2018-05-11 19:03:49 +0100104 char *buf;
Jay Kamat48c2bb02018-09-07 14:34:04 -0700105 int ret;
106
107 /* Handle the case of comparing against empty string */
108 if (!expected)
Gaurav Singhd8300202020-08-06 23:17:25 -0700109 return -1;
Jay Kamat48c2bb02018-09-07 14:34:04 -0700110 else
111 size = strlen(expected) + 1;
Roman Gushchin84092dbc2018-05-11 19:03:49 +0100112
113 buf = malloc(size);
114 if (!buf)
115 return -1;
116
Jay Kamat48c2bb02018-09-07 14:34:04 -0700117 if (cg_read(cgroup, control, buf, size)) {
118 free(buf);
Roman Gushchin84092dbc2018-05-11 19:03:49 +0100119 return -1;
Jay Kamat48c2bb02018-09-07 14:34:04 -0700120 }
Roman Gushchin84092dbc2018-05-11 19:03:49 +0100121
Jay Kamat48c2bb02018-09-07 14:34:04 -0700122 ret = strcmp(expected, buf);
123 free(buf);
124 return ret;
Roman Gushchin84092dbc2018-05-11 19:03:49 +0100125}
126
127int cg_read_strstr(const char *cgroup, const char *control, const char *needle)
128{
129 char buf[PAGE_SIZE];
130
131 if (cg_read(cgroup, control, buf, sizeof(buf)))
132 return -1;
133
134 return strstr(buf, needle) ? 0 : -1;
135}
136
137long cg_read_long(const char *cgroup, const char *control)
138{
139 char buf[128];
140
141 if (cg_read(cgroup, control, buf, sizeof(buf)))
142 return -1;
143
144 return atol(buf);
145}
146
147long cg_read_key_long(const char *cgroup, const char *control, const char *key)
148{
149 char buf[PAGE_SIZE];
150 char *ptr;
151
152 if (cg_read(cgroup, control, buf, sizeof(buf)))
153 return -1;
154
155 ptr = strstr(buf, key);
156 if (!ptr)
157 return -1;
158
159 return atol(ptr + strlen(key));
160}
161
Michal Koutný11318982019-10-04 12:57:42 +0200162long cg_read_lc(const char *cgroup, const char *control)
163{
164 char buf[PAGE_SIZE];
165 const char delim[] = "\n";
166 char *line;
167 long cnt = 0;
168
169 if (cg_read(cgroup, control, buf, sizeof(buf)))
170 return -1;
171
172 for (line = strtok(buf, delim); line; line = strtok(NULL, delim))
173 cnt++;
174
175 return cnt;
176}
177
Roman Gushchin84092dbc2018-05-11 19:03:49 +0100178int cg_write(const char *cgroup, const char *control, char *buf)
179{
180 char path[PATH_MAX];
Dan Carpenter53c3daf2018-06-07 11:30:02 +0300181 ssize_t len = strlen(buf);
Roman Gushchin84092dbc2018-05-11 19:03:49 +0100182
183 snprintf(path, sizeof(path), "%s/%s", cgroup, control);
184
185 if (write_text(path, buf, len) == len)
186 return 0;
187
188 return -1;
189}
190
191int cg_find_unified_root(char *root, size_t len)
192{
193 char buf[10 * PAGE_SIZE];
194 char *fs, *mount, *type;
195 const char delim[] = "\n\t ";
196
197 if (read_text("/proc/self/mounts", buf, sizeof(buf)) <= 0)
198 return -1;
199
200 /*
201 * Example:
202 * cgroup /sys/fs/cgroup cgroup2 rw,seclabel,noexec,relatime 0 0
203 */
204 for (fs = strtok(buf, delim); fs; fs = strtok(NULL, delim)) {
205 mount = strtok(NULL, delim);
206 type = strtok(NULL, delim);
207 strtok(NULL, delim);
208 strtok(NULL, delim);
209 strtok(NULL, delim);
210
Chris Downb59b1ba2019-08-02 21:49:15 -0700211 if (strcmp(type, "cgroup2") == 0) {
Roman Gushchin84092dbc2018-05-11 19:03:49 +0100212 strncpy(root, mount, len);
213 return 0;
214 }
215 }
216
217 return -1;
218}
219
220int cg_create(const char *cgroup)
221{
222 return mkdir(cgroup, 0644);
223}
224
Roman Gushchin5313bfe2019-04-19 10:03:06 -0700225int cg_wait_for_proc_count(const char *cgroup, int count)
226{
227 char buf[10 * PAGE_SIZE] = {0};
228 int attempts;
229 char *ptr;
230
231 for (attempts = 10; attempts >= 0; attempts--) {
232 int nr = 0;
233
234 if (cg_read(cgroup, "cgroup.procs", buf, sizeof(buf)))
235 break;
236
237 for (ptr = buf; *ptr; ptr++)
238 if (*ptr == '\n')
239 nr++;
240
241 if (nr >= count)
242 return 0;
243
244 usleep(100000);
245 }
246
247 return -1;
248}
249
250int cg_killall(const char *cgroup)
Roman Gushchin84092dbc2018-05-11 19:03:49 +0100251{
252 char buf[PAGE_SIZE];
253 char *ptr = buf;
254
255 if (cg_read(cgroup, "cgroup.procs", buf, sizeof(buf)))
256 return -1;
257
258 while (ptr < buf + sizeof(buf)) {
259 int pid = strtol(ptr, &ptr, 10);
260
261 if (pid == 0)
262 break;
263 if (*ptr)
264 ptr++;
265 else
266 break;
267 if (kill(pid, SIGKILL))
268 return -1;
269 }
270
271 return 0;
272}
273
274int cg_destroy(const char *cgroup)
275{
276 int ret;
277
278retry:
279 ret = rmdir(cgroup);
280 if (ret && errno == EBUSY) {
Roman Gushchinff9fb7cb2019-04-19 10:03:05 -0700281 cg_killall(cgroup);
Roman Gushchin84092dbc2018-05-11 19:03:49 +0100282 usleep(100);
283 goto retry;
284 }
285
286 if (ret && errno == ENOENT)
287 ret = 0;
288
289 return ret;
290}
291
Roman Gushchin5313bfe2019-04-19 10:03:06 -0700292int cg_enter(const char *cgroup, int pid)
293{
294 char pidbuf[64];
295
296 snprintf(pidbuf, sizeof(pidbuf), "%d", pid);
297 return cg_write(cgroup, "cgroup.procs", pidbuf);
298}
299
Claudiod863cb03fc2018-07-18 19:33:58 +0200300int cg_enter_current(const char *cgroup)
301{
Michal Koutný58c9f752019-10-04 12:57:41 +0200302 return cg_write(cgroup, "cgroup.procs", "0");
303}
Claudiod863cb03fc2018-07-18 19:33:58 +0200304
Michal Koutný58c9f752019-10-04 12:57:41 +0200305int cg_enter_current_thread(const char *cgroup)
306{
307 return cg_write(cgroup, "cgroup.threads", "0");
Claudiod863cb03fc2018-07-18 19:33:58 +0200308}
309
Roman Gushchin84092dbc2018-05-11 19:03:49 +0100310int cg_run(const char *cgroup,
311 int (*fn)(const char *cgroup, void *arg),
312 void *arg)
313{
314 int pid, retcode;
315
316 pid = fork();
317 if (pid < 0) {
318 return pid;
319 } else if (pid == 0) {
320 char buf[64];
321
322 snprintf(buf, sizeof(buf), "%d", getpid());
323 if (cg_write(cgroup, "cgroup.procs", buf))
324 exit(EXIT_FAILURE);
325 exit(fn(cgroup, arg));
326 } else {
327 waitpid(pid, &retcode, 0);
328 if (WIFEXITED(retcode))
329 return WEXITSTATUS(retcode);
330 else
331 return -1;
332 }
333}
334
Christian Brauner9bd59102020-02-05 14:26:23 +0100335pid_t clone_into_cgroup(int cgroup_fd)
336{
337#ifdef CLONE_ARGS_SIZE_VER2
338 pid_t pid;
339
340 struct clone_args args = {
341 .flags = CLONE_INTO_CGROUP,
342 .exit_signal = SIGCHLD,
343 .cgroup = cgroup_fd,
344 };
345
346 pid = sys_clone3(&args, sizeof(struct clone_args));
347 /*
348 * Verify that this is a genuine test failure:
349 * ENOSYS -> clone3() not available
350 * E2BIG -> CLONE_INTO_CGROUP not available
351 */
352 if (pid < 0 && (errno == ENOSYS || errno == E2BIG))
353 goto pretend_enosys;
354
355 return pid;
356
357pretend_enosys:
358#endif
359 errno = ENOSYS;
360 return -ENOSYS;
361}
362
363int clone_reap(pid_t pid, int options)
364{
365 int ret;
366 siginfo_t info = {
367 .si_signo = 0,
368 };
369
370again:
371 ret = waitid(P_PID, pid, &info, options | __WALL | __WNOTHREAD);
372 if (ret < 0) {
373 if (errno == EINTR)
374 goto again;
375 return -1;
376 }
377
378 if (options & WEXITED) {
379 if (WIFEXITED(info.si_status))
380 return WEXITSTATUS(info.si_status);
381 }
382
383 if (options & WSTOPPED) {
384 if (WIFSTOPPED(info.si_status))
385 return WSTOPSIG(info.si_status);
386 }
387
388 if (options & WCONTINUED) {
389 if (WIFCONTINUED(info.si_status))
390 return 0;
391 }
392
393 return -1;
394}
395
396int dirfd_open_opath(const char *dir)
397{
398 return open(dir, O_DIRECTORY | O_CLOEXEC | O_NOFOLLOW | O_PATH);
399}
400
401#define close_prot_errno(fd) \
402 if (fd >= 0) { \
403 int _e_ = errno; \
404 close(fd); \
405 errno = _e_; \
406 }
407
408static int clone_into_cgroup_run_nowait(const char *cgroup,
409 int (*fn)(const char *cgroup, void *arg),
410 void *arg)
411{
412 int cgroup_fd;
413 pid_t pid;
414
415 cgroup_fd = dirfd_open_opath(cgroup);
416 if (cgroup_fd < 0)
417 return -1;
418
419 pid = clone_into_cgroup(cgroup_fd);
420 close_prot_errno(cgroup_fd);
421 if (pid == 0)
422 exit(fn(cgroup, arg));
423
424 return pid;
425}
426
Roman Gushchin84092dbc2018-05-11 19:03:49 +0100427int cg_run_nowait(const char *cgroup,
428 int (*fn)(const char *cgroup, void *arg),
429 void *arg)
430{
431 int pid;
432
Christian Brauner9bd59102020-02-05 14:26:23 +0100433 pid = clone_into_cgroup_run_nowait(cgroup, fn, arg);
434 if (pid > 0)
435 return pid;
436
437 /* Genuine test failure. */
438 if (pid < 0 && errno != ENOSYS)
439 return -1;
440
Roman Gushchin84092dbc2018-05-11 19:03:49 +0100441 pid = fork();
442 if (pid == 0) {
443 char buf[64];
444
445 snprintf(buf, sizeof(buf), "%d", getpid());
446 if (cg_write(cgroup, "cgroup.procs", buf))
447 exit(EXIT_FAILURE);
448 exit(fn(cgroup, arg));
449 }
450
451 return pid;
452}
453
454int get_temp_fd(void)
455{
456 return open(".", O_TMPFILE | O_RDWR | O_EXCL);
457}
458
459int alloc_pagecache(int fd, size_t size)
460{
461 char buf[PAGE_SIZE];
462 struct stat st;
463 int i;
464
465 if (fstat(fd, &st))
466 goto cleanup;
467
468 size += st.st_size;
469
470 if (ftruncate(fd, size))
471 goto cleanup;
472
473 for (i = 0; i < size; i += sizeof(buf))
474 read(fd, buf, sizeof(buf));
475
476 return 0;
477
478cleanup:
479 return -1;
480}
481
482int alloc_anon(const char *cgroup, void *arg)
483{
484 size_t size = (unsigned long)arg;
485 char *buf, *ptr;
486
487 buf = malloc(size);
488 for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
489 *ptr = 0;
490
491 free(buf);
492 return 0;
493}
Mike Rapoport478b2782018-05-15 19:05:53 +0300494
495int is_swap_enabled(void)
496{
497 char buf[PAGE_SIZE];
498 const char delim[] = "\n";
499 int cnt = 0;
500 char *line;
501
502 if (read_text("/proc/swaps", buf, sizeof(buf)) <= 0)
503 return -1;
504
505 for (line = strtok(buf, delim); line; line = strtok(NULL, delim))
506 cnt++;
507
508 return cnt > 1;
509}
Jay Kamata9877852018-09-07 14:34:05 -0700510
511int set_oom_adj_score(int pid, int score)
512{
513 char path[PATH_MAX];
514 int fd, len;
515
516 sprintf(path, "/proc/%d/oom_score_adj", pid);
517
518 fd = open(path, O_WRONLY | O_APPEND);
519 if (fd < 0)
520 return fd;
521
522 len = dprintf(fd, "%d", score);
523 if (len < 0) {
524 close(fd);
525 return len;
526 }
527
528 close(fd);
529 return 0;
530}
Roman Gushchin5313bfe2019-04-19 10:03:06 -0700531
Michal Koutný58c9f752019-10-04 12:57:41 +0200532ssize_t proc_read_text(int pid, bool thread, const char *item, char *buf, size_t size)
Roman Gushchin5313bfe2019-04-19 10:03:06 -0700533{
534 char path[PATH_MAX];
535
Michal Koutný58c9f752019-10-04 12:57:41 +0200536 if (!pid)
537 snprintf(path, sizeof(path), "/proc/%s/%s",
538 thread ? "thread-self" : "self", item);
539 else
540 snprintf(path, sizeof(path), "/proc/%d/%s", pid, item);
Roman Gushchin5313bfe2019-04-19 10:03:06 -0700541
542 return read_text(path, buf, size);
543}
Michal Koutný11318982019-10-04 12:57:42 +0200544
545int proc_read_strstr(int pid, bool thread, const char *item, const char *needle)
546{
547 char buf[PAGE_SIZE];
548
549 if (proc_read_text(pid, thread, item, buf, sizeof(buf)) < 0)
550 return -1;
551
552 return strstr(buf, needle) ? 0 : -1;
553}
Christian Brauner9bd59102020-02-05 14:26:23 +0100554
555int clone_into_cgroup_run_wait(const char *cgroup)
556{
557 int cgroup_fd;
558 pid_t pid;
559
560 cgroup_fd = dirfd_open_opath(cgroup);
561 if (cgroup_fd < 0)
562 return -1;
563
564 pid = clone_into_cgroup(cgroup_fd);
565 close_prot_errno(cgroup_fd);
566 if (pid < 0)
567 return -1;
568
569 if (pid == 0)
570 exit(EXIT_SUCCESS);
571
572 /*
573 * We don't care whether this fails. We only care whether the initial
574 * clone succeeded.
575 */
576 (void)clone_reap(pid, WEXITED);
577 return 0;
578}