Blame - kernel/cgroup/pids.c - SHIFTPHONES/mainline/linux

blob: 511af87f685e8ec6bffc482a1f2941379ce36a05 [file] [log] [blame]

Thomas Gleixner	f85d208	2019-06-04 10:10:45 +0200	[diff] [blame]	1	// SPDX-License-Identifier: GPL-2.0-only
Aleksa Sarai	49b786e	2015-06-09 21:32:10 +1000	[diff] [blame]	2	/*
				3	* Process number limiting controller for cgroups.
				4	*
				5	* Used to allow a cgroup hierarchy to stop any new processes from fork()ing
				6	* after a certain limit is reached.
				7	*
				8	* Since it is trivial to hit the task limit without hitting any kmemcg limits
				9	* in place, PIDs are a fundamental resource. As such, PID exhaustion must be
				10	* preventable in the scope of a cgroup hierarchy by allowing resource limiting
				11	* of the number of tasks in a cgroup.
				12	*
				13	* In order to use the `pids` controller, set the maximum number of tasks in
				14	* pids.max (this is not available in the root cgroup for obvious reasons). The
				15	* number of processes currently in the cgroup is given by pids.current.
				16	* Organisational operations are not blocked by cgroup policies, so it is
				17	* possible to have pids.current > pids.max. However, it is not possible to
				18	* violate a cgroup policy through fork(). fork() will return -EAGAIN if forking
				19	* would cause a cgroup policy to be violated.
				20	*
				21	* To set a cgroup to have no limit, set pids.max to "max". This is the default
				22	* for all new cgroups (N.B. that PID limits are hierarchical, so the most
				23	* stringent limit in the hierarchy is followed).
				24	*
				25	* pids.current tracks all child cgroup hierarchies, so parent/pids.current is
				26	* a superset of parent/child/pids.current.
				27	*
				28	* Copyright (C) 2015 Aleksa Sarai <cyphar@cyphar.com>
Aleksa Sarai	49b786e	2015-06-09 21:32:10 +1000	[diff] [blame]	29	*/
				30
				31	#include <linux/kernel.h>
				32	#include <linux/threads.h>
				33	#include <linux/atomic.h>
				34	#include <linux/cgroup.h>
				35	#include <linux/slab.h>
Christian Brauner	ef2c41c	2020-02-05 14:26:22 +0100	[diff] [blame]	36	#include <linux/sched/task.h>
Aleksa Sarai	49b786e	2015-06-09 21:32:10 +1000	[diff] [blame]	37
				38	#define PIDS_MAX (PID_MAX_LIMIT + 1ULL)
				39	#define PIDS_MAX_STR "max"
				40
				41	struct pids_cgroup {
				42	struct cgroup_subsys_state css;
				43
				44	/*
				45	* Use 64-bit types so that we can safely represent "max" as
				46	* %PIDS_MAX = (%PID_MAX_LIMIT + 1).
				47	*/
				48	atomic64_t counter;
Aleksa Sarai	a713af3	2019-10-17 02:50:01 +1100	[diff] [blame]	49	atomic64_t limit;
Kenny Yu	135b8b3	2016-06-21 14:04:36 -0400	[diff] [blame]	50
				51	/* Handle for "pids.events" */
				52	struct cgroup_file events_file;
				53
				54	/* Number of times fork failed because limit was hit. */
				55	atomic64_t events_limit;
Aleksa Sarai	49b786e	2015-06-09 21:32:10 +1000	[diff] [blame]	56	};
				57
				58	static struct pids_cgroup css_pids(struct cgroup_subsys_state css)
				59	{
				60	return container_of(css, struct pids_cgroup, css);
				61	}
				62
				63	static struct pids_cgroup parent_pids(struct pids_cgroup pids)
				64	{
				65	return css_pids(pids->css.parent);
				66	}
				67
				68	static struct cgroup_subsys_state *
				69	pids_css_alloc(struct cgroup_subsys_state *parent)
				70	{
				71	struct pids_cgroup *pids;
				72
				73	pids = kzalloc(sizeof(struct pids_cgroup), GFP_KERNEL);
				74	if (!pids)
				75	return ERR_PTR(-ENOMEM);
				76
Aleksa Sarai	49b786e	2015-06-09 21:32:10 +1000	[diff] [blame]	77	atomic64_set(&pids->counter, 0);
Aleksa Sarai	a713af3	2019-10-17 02:50:01 +1100	[diff] [blame]	78	atomic64_set(&pids->limit, PIDS_MAX);
Kenny Yu	135b8b3	2016-06-21 14:04:36 -0400	[diff] [blame]	79	atomic64_set(&pids->events_limit, 0);
Aleksa Sarai	49b786e	2015-06-09 21:32:10 +1000	[diff] [blame]	80	return &pids->css;
				81	}
				82
				83	static void pids_css_free(struct cgroup_subsys_state *css)
				84	{
				85	kfree(css_pids(css));
				86	}
				87
				88	/**
				89	* pids_cancel - uncharge the local pid count
				90	* @pids: the pid cgroup state
				91	* @num: the number of pids to cancel
				92	*
				93	* This function will WARN if the pid count goes under 0, because such a case is
				94	* a bug in the pids controller proper.
				95	*/
				96	static void pids_cancel(struct pids_cgroup *pids, int num)
				97	{
				98	/*
				99	* A negative count (or overflow for that matter) is invalid,
				100	* and indicates a bug in the `pids` controller proper.
				101	*/
				102	WARN_ON_ONCE(atomic64_add_negative(-num, &pids->counter));
				103	}
				104
				105	/**
				106	* pids_uncharge - hierarchically uncharge the pid count
				107	* @pids: the pid cgroup state
				108	* @num: the number of pids to uncharge
				109	*/
				110	static void pids_uncharge(struct pids_cgroup *pids, int num)
				111	{
				112	struct pids_cgroup *p;
				113
Tejun Heo	67cde9c	2015-12-03 10:18:21 -0500	[diff] [blame]	114	for (p = pids; parent_pids(p); p = parent_pids(p))
Aleksa Sarai	49b786e	2015-06-09 21:32:10 +1000	[diff] [blame]	115	pids_cancel(p, num);
				116	}
				117
				118	/**
				119	* pids_charge - hierarchically charge the pid count
				120	* @pids: the pid cgroup state
				121	* @num: the number of pids to charge
				122	*
				123	* This function does not follow the pid limit set. It cannot fail and the new
				124	* pid count may exceed the limit. This is only used for reverting failed
				125	* attaches, where there is no other way out than violating the limit.
				126	*/
				127	static void pids_charge(struct pids_cgroup *pids, int num)
				128	{
				129	struct pids_cgroup *p;
				130
Tejun Heo	67cde9c	2015-12-03 10:18:21 -0500	[diff] [blame]	131	for (p = pids; parent_pids(p); p = parent_pids(p))
Aleksa Sarai	49b786e	2015-06-09 21:32:10 +1000	[diff] [blame]	132	atomic64_add(num, &p->counter);
				133	}
				134
				135	/**
				136	* pids_try_charge - hierarchically try to charge the pid count
				137	* @pids: the pid cgroup state
				138	* @num: the number of pids to charge
				139	*
				140	* This function follows the set limit. It will fail if the charge would cause
				141	* the new value to exceed the hierarchical limit. Returns 0 if the charge
Rami Rosen	fccd3af	2015-12-13 22:13:08 +0200	[diff] [blame]	142	* succeeded, otherwise -EAGAIN.
Aleksa Sarai	49b786e	2015-06-09 21:32:10 +1000	[diff] [blame]	143	*/
				144	static int pids_try_charge(struct pids_cgroup *pids, int num)
				145	{
				146	struct pids_cgroup p, q;
				147
Tejun Heo	67cde9c	2015-12-03 10:18:21 -0500	[diff] [blame]	148	for (p = pids; parent_pids(p); p = parent_pids(p)) {
Aleksa Sarai	49b786e	2015-06-09 21:32:10 +1000	[diff] [blame]	149	int64_t new = atomic64_add_return(num, &p->counter);
Aleksa Sarai	a713af3	2019-10-17 02:50:01 +1100	[diff] [blame]	150	int64_t limit = atomic64_read(&p->limit);
Aleksa Sarai	49b786e	2015-06-09 21:32:10 +1000	[diff] [blame]	151
				152	/*
				153	* Since new is capped to the maximum number of pid_t, if
				154	* p->limit is %PIDS_MAX then we know that this test will never
				155	* fail.
				156	*/
Aleksa Sarai	a713af3	2019-10-17 02:50:01 +1100	[diff] [blame]	157	if (new > limit)
Aleksa Sarai	49b786e	2015-06-09 21:32:10 +1000	[diff] [blame]	158	goto revert;
				159	}
				160
				161	return 0;
				162
				163	revert:
				164	for (q = pids; q != p; q = parent_pids(q))
				165	pids_cancel(q, num);
				166	pids_cancel(p, num);
				167
				168	return -EAGAIN;
				169	}
				170
Tejun Heo	1f7dd3e5	2015-12-03 10:18:21 -0500	[diff] [blame]	171	static int pids_can_attach(struct cgroup_taskset *tset)
Aleksa Sarai	49b786e	2015-06-09 21:32:10 +1000	[diff] [blame]	172	{
Aleksa Sarai	49b786e	2015-06-09 21:32:10 +1000	[diff] [blame]	173	struct task_struct *task;
Tejun Heo	1f7dd3e5	2015-12-03 10:18:21 -0500	[diff] [blame]	174	struct cgroup_subsys_state *dst_css;
Aleksa Sarai	49b786e	2015-06-09 21:32:10 +1000	[diff] [blame]	175
Tejun Heo	1f7dd3e5	2015-12-03 10:18:21 -0500	[diff] [blame]	176	cgroup_taskset_for_each(task, dst_css, tset) {
				177	struct pids_cgroup *pids = css_pids(dst_css);
Aleksa Sarai	49b786e	2015-06-09 21:32:10 +1000	[diff] [blame]	178	struct cgroup_subsys_state *old_css;
				179	struct pids_cgroup *old_pids;
				180
				181	/*
Aleksa Sarai	ce52399	2015-08-25 12:50:44 +1000	[diff] [blame]	182	* No need to pin @old_css between here and cancel_attach()
				183	* because cgroup core protects it from being freed before
				184	* the migration completes or fails.
Aleksa Sarai	49b786e	2015-06-09 21:32:10 +1000	[diff] [blame]	185	*/
Aleksa Sarai	ce52399	2015-08-25 12:50:44 +1000	[diff] [blame]	186	old_css = task_css(task, pids_cgrp_id);
Aleksa Sarai	49b786e	2015-06-09 21:32:10 +1000	[diff] [blame]	187	old_pids = css_pids(old_css);
				188
				189	pids_charge(pids, 1);
				190	pids_uncharge(old_pids, 1);
				191	}
				192
				193	return 0;
				194	}
				195
Tejun Heo	1f7dd3e5	2015-12-03 10:18:21 -0500	[diff] [blame]	196	static void pids_cancel_attach(struct cgroup_taskset *tset)
Aleksa Sarai	49b786e	2015-06-09 21:32:10 +1000	[diff] [blame]	197	{
Aleksa Sarai	49b786e	2015-06-09 21:32:10 +1000	[diff] [blame]	198	struct task_struct *task;
Tejun Heo	1f7dd3e5	2015-12-03 10:18:21 -0500	[diff] [blame]	199	struct cgroup_subsys_state *dst_css;
Aleksa Sarai	49b786e	2015-06-09 21:32:10 +1000	[diff] [blame]	200
Tejun Heo	1f7dd3e5	2015-12-03 10:18:21 -0500	[diff] [blame]	201	cgroup_taskset_for_each(task, dst_css, tset) {
				202	struct pids_cgroup *pids = css_pids(dst_css);
Aleksa Sarai	49b786e	2015-06-09 21:32:10 +1000	[diff] [blame]	203	struct cgroup_subsys_state *old_css;
				204	struct pids_cgroup *old_pids;
				205
				206	old_css = task_css(task, pids_cgrp_id);
				207	old_pids = css_pids(old_css);
				208
				209	pids_charge(old_pids, 1);
				210	pids_uncharge(pids, 1);
Aleksa Sarai	49b786e	2015-06-09 21:32:10 +1000	[diff] [blame]	211	}
				212	}
				213
Oleg Nesterov	afbcb36	2015-11-27 19:57:22 +0100	[diff] [blame]	214	/*
				215	* task_css_check(true) in pids_can_fork() and pids_cancel_fork() relies
Ingo Molnar	780de9d	2017-02-02 11:50:56 +0100	[diff] [blame]	216	* on cgroup_threadgroup_change_begin() held by the copy_process().
Oleg Nesterov	afbcb36	2015-11-27 19:57:22 +0100	[diff] [blame]	217	*/
Christian Brauner	ef2c41c	2020-02-05 14:26:22 +0100	[diff] [blame]	218	static int pids_can_fork(struct task_struct task, struct css_set cset)
Aleksa Sarai	49b786e	2015-06-09 21:32:10 +1000	[diff] [blame]	219	{
				220	struct cgroup_subsys_state *css;
				221	struct pids_cgroup *pids;
Kenny Yu	135b8b3	2016-06-21 14:04:36 -0400	[diff] [blame]	222	int err;
Aleksa Sarai	49b786e	2015-06-09 21:32:10 +1000	[diff] [blame]	223
Christian Brauner	ef2c41c	2020-02-05 14:26:22 +0100	[diff] [blame]	224	if (cset)
				225	css = cset->subsys[pids_cgrp_id];
				226	else
				227	css = task_css_check(current, pids_cgrp_id, true);
Aleksa Sarai	49b786e	2015-06-09 21:32:10 +1000	[diff] [blame]	228	pids = css_pids(css);
Kenny Yu	135b8b3	2016-06-21 14:04:36 -0400	[diff] [blame]	229	err = pids_try_charge(pids, 1);
				230	if (err) {
				231	/* Only log the first time events_limit is incremented. */
				232	if (atomic64_inc_return(&pids->events_limit) == 1) {
				233	pr_info("cgroup: fork rejected by pids controller in ");
Tejun Heo	1d18c27	2017-03-01 15:39:07 -0500	[diff] [blame]	234	pr_cont_cgroup_path(css->cgroup);
Kenny Yu	135b8b3	2016-06-21 14:04:36 -0400	[diff] [blame]	235	pr_cont("\n");
				236	}
				237	cgroup_file_notify(&pids->events_file);
				238	}
				239	return err;
Aleksa Sarai	49b786e	2015-06-09 21:32:10 +1000	[diff] [blame]	240	}
				241
Christian Brauner	ef2c41c	2020-02-05 14:26:22 +0100	[diff] [blame]	242	static void pids_cancel_fork(struct task_struct task, struct css_set cset)
Aleksa Sarai	49b786e	2015-06-09 21:32:10 +1000	[diff] [blame]	243	{
Oleg Nesterov	afbcb36	2015-11-27 19:57:22 +0100	[diff] [blame]	244	struct cgroup_subsys_state *css;
				245	struct pids_cgroup *pids;
Aleksa Sarai	49b786e	2015-06-09 21:32:10 +1000	[diff] [blame]	246
Christian Brauner	ef2c41c	2020-02-05 14:26:22 +0100	[diff] [blame]	247	if (cset)
				248	css = cset->subsys[pids_cgrp_id];
				249	else
				250	css = task_css_check(current, pids_cgrp_id, true);
Oleg Nesterov	afbcb36	2015-11-27 19:57:22 +0100	[diff] [blame]	251	pids = css_pids(css);
Aleksa Sarai	49b786e	2015-06-09 21:32:10 +1000	[diff] [blame]	252	pids_uncharge(pids, 1);
Aleksa Sarai	49b786e	2015-06-09 21:32:10 +1000	[diff] [blame]	253	}
				254
Oleg Nesterov	51bee5a	2019-01-28 17:00:13 +0100	[diff] [blame]	255	static void pids_release(struct task_struct *task)
Aleksa Sarai	49b786e	2015-06-09 21:32:10 +1000	[diff] [blame]	256	{
Tejun Heo	2e91fa7	2015-10-15 16:41:53 -0400	[diff] [blame]	257	struct pids_cgroup *pids = css_pids(task_css(task, pids_cgrp_id));
Aleksa Sarai	49b786e	2015-06-09 21:32:10 +1000	[diff] [blame]	258
				259	pids_uncharge(pids, 1);
				260	}
				261
				262	static ssize_t pids_max_write(struct kernfs_open_file of, char buf,
				263	size_t nbytes, loff_t off)
				264	{
				265	struct cgroup_subsys_state *css = of_css(of);
				266	struct pids_cgroup *pids = css_pids(css);
				267	int64_t limit;
				268	int err;
				269
				270	buf = strstrip(buf);
				271	if (!strcmp(buf, PIDS_MAX_STR)) {
				272	limit = PIDS_MAX;
				273	goto set_limit;
				274	}
				275
				276	err = kstrtoll(buf, 0, &limit);
				277	if (err)
				278	return err;
				279
				280	if (limit < 0 \|\| limit >= PIDS_MAX)
				281	return -EINVAL;
				282
				283	set_limit:
				284	/*
				285	* Limit updates don't need to be mutex'd, since it isn't
				286	* critical that any racing fork()s follow the new limit.
				287	*/
Aleksa Sarai	a713af3	2019-10-17 02:50:01 +1100	[diff] [blame]	288	atomic64_set(&pids->limit, limit);
Aleksa Sarai	49b786e	2015-06-09 21:32:10 +1000	[diff] [blame]	289	return nbytes;
				290	}
				291
				292	static int pids_max_show(struct seq_file sf, void v)
				293	{
				294	struct cgroup_subsys_state *css = seq_css(sf);
				295	struct pids_cgroup *pids = css_pids(css);
Aleksa Sarai	a713af3	2019-10-17 02:50:01 +1100	[diff] [blame]	296	int64_t limit = atomic64_read(&pids->limit);
Aleksa Sarai	49b786e	2015-06-09 21:32:10 +1000	[diff] [blame]	297
				298	if (limit >= PIDS_MAX)
				299	seq_printf(sf, "%s\n", PIDS_MAX_STR);
				300	else
				301	seq_printf(sf, "%lld\n", limit);
				302
				303	return 0;
				304	}
				305
				306	static s64 pids_current_read(struct cgroup_subsys_state *css,
				307	struct cftype *cft)
				308	{
				309	struct pids_cgroup *pids = css_pids(css);
				310
				311	return atomic64_read(&pids->counter);
				312	}
				313
Kenny Yu	135b8b3	2016-06-21 14:04:36 -0400	[diff] [blame]	314	static int pids_events_show(struct seq_file sf, void v)
				315	{
				316	struct pids_cgroup *pids = css_pids(seq_css(sf));
				317
Kenny Yu	9f6870d	2016-06-21 11:55:35 -0700	[diff] [blame]	318	seq_printf(sf, "max %lld\n", (s64)atomic64_read(&pids->events_limit));
Kenny Yu	135b8b3	2016-06-21 14:04:36 -0400	[diff] [blame]	319	return 0;
				320	}
				321
Aleksa Sarai	49b786e	2015-06-09 21:32:10 +1000	[diff] [blame]	322	static struct cftype pids_files[] = {
				323	{
				324	.name = "max",
				325	.write = pids_max_write,
				326	.seq_show = pids_max_show,
				327	.flags = CFTYPE_NOT_ON_ROOT,
				328	},
				329	{
				330	.name = "current",
				331	.read_s64 = pids_current_read,
Tejun Heo	67cde9c	2015-12-03 10:18:21 -0500	[diff] [blame]	332	.flags = CFTYPE_NOT_ON_ROOT,
Aleksa Sarai	49b786e	2015-06-09 21:32:10 +1000	[diff] [blame]	333	},
Kenny Yu	135b8b3	2016-06-21 14:04:36 -0400	[diff] [blame]	334	{
				335	.name = "events",
				336	.seq_show = pids_events_show,
				337	.file_offset = offsetof(struct pids_cgroup, events_file),
				338	.flags = CFTYPE_NOT_ON_ROOT,
				339	},
Aleksa Sarai	49b786e	2015-06-09 21:32:10 +1000	[diff] [blame]	340	{ } /* terminate */
				341	};
				342
				343	struct cgroup_subsys pids_cgrp_subsys = {
				344	.css_alloc = pids_css_alloc,
				345	.css_free = pids_css_free,
Aleksa Sarai	49b786e	2015-06-09 21:32:10 +1000	[diff] [blame]	346	.can_attach = pids_can_attach,
				347	.cancel_attach = pids_cancel_attach,
				348	.can_fork = pids_can_fork,
				349	.cancel_fork = pids_cancel_fork,
Oleg Nesterov	51bee5a	2019-01-28 17:00:13 +0100	[diff] [blame]	350	.release = pids_release,
Aleksa Sarai	49b786e	2015-06-09 21:32:10 +1000	[diff] [blame]	351	.legacy_cftypes = pids_files,
				352	.dfl_cftypes = pids_files,
Tejun Heo	8cfd814	2017-07-21 11:14:51 -0400	[diff] [blame]	353	.threaded = true,
Aleksa Sarai	49b786e	2015-06-09 21:32:10 +1000	[diff] [blame]	354	};