Merge branch 'cgroup/for-4.11-rdmacg' into cgroup/for-4.11
Merge in to resolve conflicts in Documentation/cgroup-v2.txt. The
conflicts are from multiple section additions and trivial to resolve.
Signed-off-by: Tejun Heo <tj@kernel.org>
diff --git a/Documentation/cgroup-v2.txt b/Documentation/cgroup-v2.txt
index 94350d7..3b8449f 100644
--- a/Documentation/cgroup-v2.txt
+++ b/Documentation/cgroup-v2.txt
@@ -47,8 +47,12 @@
5-3. IO
5-3-1. IO Interface Files
5-3-2. Writeback
- 5-4. RDMA
- 5-4-1. RDMA Interface Files
+ 5-4. PID
+ 5-4-1. PID Interface Files
+ 5-5. RDMA
+ 5-5-1. RDMA Interface Files
+ 5-6. Misc
+ 5-6-1. perf_event
6. Namespace
6-1. Basics
6-2. The Root and Views
@@ -330,14 +334,12 @@
cgroup by writing its PID to the "cgroup.procs" file, the following
conditions must be met.
-- The writer's euid must match either uid or suid of the target process.
-
- The writer must have write access to the "cgroup.procs" file.
- The writer must have write access to the "cgroup.procs" file of the
common ancestor of the source and destination cgroups.
-The above three constraints ensure that while a delegatee may migrate
+The above two constraints ensure that while a delegatee may migrate
processes around freely in the delegated sub-hierarchy it can't pull
in from or push out to outside the sub-hierarchy.
@@ -352,10 +354,10 @@
Let's also say U0 wants to write the PID of a process which is
currently in C10 into "C00/cgroup.procs". U0 has write access to the
-file and uid match on the process; however, the common ancestor of the
-source cgroup C10 and the destination cgroup C00 is above the points
-of delegation and U0 would not have write access to its "cgroup.procs"
-files and thus the write will be denied with -EACCES.
+file; however, the common ancestor of the source cgroup C10 and the
+destination cgroup C00 is above the points of delegation and U0 would
+not have write access to its "cgroup.procs" files and thus the write
+will be denied with -EACCES.
2-6. Guidelines
@@ -1121,12 +1123,51 @@
vm.dirty[_background]_ratio.
-5-4. RDMA
+5-4. PID
+
+The process number controller is used to allow a cgroup to stop any
+new tasks from being fork()'d or clone()'d after a specified limit is
+reached.
+
+The number of tasks in a cgroup can be exhausted in ways which other
+controllers cannot prevent, thus warranting its own controller. For
+example, a fork bomb is likely to exhaust the number of tasks before
+hitting memory restrictions.
+
+Note that PIDs used in this controller refer to TIDs, process IDs as
+used by the kernel.
+
+
+5-4-1. PID Interface Files
+
+ pids.max
+
+ A read-write single value file which exists on non-root cgroups. The
+ default is "max".
+
+ Hard limit of number of processes.
+
+ pids.current
+
+ A read-only single value file which exists on all cgroups.
+
+ The number of processes currently in the cgroup and its descendants.
+
+Organisational operations are not blocked by cgroup policies, so it is
+possible to have pids.current > pids.max. This can be done by either
+setting the limit to be smaller than pids.current, or attaching enough
+processes to the cgroup such that pids.current is larger than
+pids.max. However, it is not possible to violate a cgroup PID policy
+through fork() or clone(). These will return -EAGAIN if the creation
+of a new process would cause a cgroup policy to be violated.
+
+
+5-5. RDMA
The "rdma" controller regulates the distribution and accounting of
of RDMA resources.
-5-4-1. RDMA Interface Files
+5-5-1. RDMA Interface Files
rdma.max
A readwrite nested-keyed file that exists for all the cgroups
@@ -1157,6 +1198,16 @@
ocrdma1 hca_handle=1 hca_object=23
+5-6. Misc
+
+5-6-1. perf_event
+
+perf_event controller, if not mounted on a legacy hierarchy, is
+automatically enabled on the v2 hierarchy so that perf events can
+always be filtered by cgroup v2 path. The controller can still be
+moved to a legacy hierarchy after v2 hierarchy is populated.
+
+
6. Namespace
6-1. Basics
diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h
index 589b0e7..9203bfb 100644
--- a/kernel/cgroup/cgroup-internal.h
+++ b/kernel/cgroup/cgroup-internal.h
@@ -26,6 +26,64 @@ struct cgrp_cset_link {
struct list_head cgrp_link;
};
+/* used to track tasks and csets during migration */
+struct cgroup_taskset {
+ /* the src and dst cset list running through cset->mg_node */
+ struct list_head src_csets;
+ struct list_head dst_csets;
+
+ /* the subsys currently being processed */
+ int ssid;
+
+ /*
+ * Fields for cgroup_taskset_*() iteration.
+ *
+ * Before migration is committed, the target migration tasks are on
+ * ->mg_tasks of the csets on ->src_csets. After, on ->mg_tasks of
+ * the csets on ->dst_csets. ->csets point to either ->src_csets
+ * or ->dst_csets depending on whether migration is committed.
+ *
+ * ->cur_csets and ->cur_task point to the current task position
+ * during iteration.
+ */
+ struct list_head *csets;
+ struct css_set *cur_cset;
+ struct task_struct *cur_task;
+};
+
+/* migration context also tracks preloading */
+struct cgroup_mgctx {
+ /*
+ * Preloaded source and destination csets. Used to guarantee
+ * atomic success or failure on actual migration.
+ */
+ struct list_head preloaded_src_csets;
+ struct list_head preloaded_dst_csets;
+
+ /* tasks and csets to migrate */
+ struct cgroup_taskset tset;
+
+ /* subsystems affected by migration */
+ u16 ss_mask;
+};
+
+#define CGROUP_TASKSET_INIT(tset) \
+{ \
+ .src_csets = LIST_HEAD_INIT(tset.src_csets), \
+ .dst_csets = LIST_HEAD_INIT(tset.dst_csets), \
+ .csets = &tset.src_csets, \
+}
+
+#define CGROUP_MGCTX_INIT(name) \
+{ \
+ LIST_HEAD_INIT(name.preloaded_src_csets), \
+ LIST_HEAD_INIT(name.preloaded_dst_csets), \
+ CGROUP_TASKSET_INIT(name.tset), \
+}
+
+#define DEFINE_CGROUP_MGCTX(name) \
+ struct cgroup_mgctx name = CGROUP_MGCTX_INIT(name)
+
struct cgroup_sb_opts {
u16 subsys_mask;
unsigned int flags;
@@ -112,13 +170,12 @@ struct dentry *cgroup_do_mount(struct file_system_type *fs_type, int flags,
struct cgroup_namespace *ns);
bool cgroup_may_migrate_to(struct cgroup *dst_cgrp);
-void cgroup_migrate_finish(struct list_head *preloaded_csets);
-void cgroup_migrate_add_src(struct css_set *src_cset,
- struct cgroup *dst_cgrp,
- struct list_head *preloaded_csets);
-int cgroup_migrate_prepare_dst(struct list_head *preloaded_csets);
+void cgroup_migrate_finish(struct cgroup_mgctx *mgctx);
+void cgroup_migrate_add_src(struct css_set *src_cset, struct cgroup *dst_cgrp,
+ struct cgroup_mgctx *mgctx);
+int cgroup_migrate_prepare_dst(struct cgroup_mgctx *mgctx);
int cgroup_migrate(struct task_struct *leader, bool threadgroup,
- struct cgroup_root *root);
+ struct cgroup_mgctx *mgctx);
int cgroup_attach_task(struct cgroup *dst_cgrp, struct task_struct *leader,
bool threadgroup);
diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c
index 465b101..fc34bcf 100644
--- a/kernel/cgroup/cgroup-v1.c
+++ b/kernel/cgroup/cgroup-v1.c
@@ -87,7 +87,7 @@ EXPORT_SYMBOL_GPL(cgroup_attach_task_all);
*/
int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from)
{
- LIST_HEAD(preloaded_csets);
+ DEFINE_CGROUP_MGCTX(mgctx);
struct cgrp_cset_link *link;
struct css_task_iter it;
struct task_struct *task;
@@ -106,10 +106,10 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from)
/* all tasks in @from are being moved, all csets are source */
spin_lock_irq(&css_set_lock);
list_for_each_entry(link, &from->cset_links, cset_link)
- cgroup_migrate_add_src(link->cset, to, &preloaded_csets);
+ cgroup_migrate_add_src(link->cset, to, &mgctx);
spin_unlock_irq(&css_set_lock);
- ret = cgroup_migrate_prepare_dst(&preloaded_csets);
+ ret = cgroup_migrate_prepare_dst(&mgctx);
if (ret)
goto out_err;
@@ -125,14 +125,14 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from)
css_task_iter_end(&it);
if (task) {
- ret = cgroup_migrate(task, false, to->root);
+ ret = cgroup_migrate(task, false, &mgctx);
if (!ret)
trace_cgroup_transfer_tasks(to, task, false);
put_task_struct(task);
}
} while (task && !ret);
out_err:
- cgroup_migrate_finish(&preloaded_csets);
+ cgroup_migrate_finish(&mgctx);
percpu_up_write(&cgroup_threadgroup_rwsem);
mutex_unlock(&cgroup_mutex);
return ret;
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index d9d82e9..fe374f8 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -159,7 +159,7 @@ static bool cgrp_dfl_visible;
static u16 cgrp_dfl_inhibit_ss_mask;
/* some controllers are implicitly enabled on the default hierarchy */
-static unsigned long cgrp_dfl_implicit_ss_mask;
+static u16 cgrp_dfl_implicit_ss_mask;
/* The list of hierarchy roots */
LIST_HEAD(cgroup_roots);
@@ -178,13 +178,13 @@ static DEFINE_IDR(cgroup_hierarchy_idr);
static u64 css_serial_nr_next = 1;
/*
- * These bitmask flags indicate whether tasks in the fork and exit paths have
- * fork/exit handlers to call. This avoids us having to do extra work in the
- * fork/exit path to check which subsystems have fork/exit callbacks.
+ * These bitmasks identify subsystems with specific features to avoid
+ * having to do iterative checks repeatedly.
*/
static u16 have_fork_callback __read_mostly;
static u16 have_exit_callback __read_mostly;
static u16 have_free_callback __read_mostly;
+static u16 have_canfork_callback __read_mostly;
/* cgroup namespace for init task */
struct cgroup_namespace init_cgroup_ns = {
@@ -195,9 +195,6 @@ struct cgroup_namespace init_cgroup_ns = {
.root_cset = &init_css_set,
};
-/* Ditto for the can_fork callback. */
-static u16 have_canfork_callback __read_mostly;
-
static struct file_system_type cgroup2_fs_type;
static struct cftype cgroup_base_files[];
@@ -1916,49 +1913,18 @@ int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
}
EXPORT_SYMBOL_GPL(task_cgroup_path);
-/* used to track tasks and other necessary states during migration */
-struct cgroup_taskset {
- /* the src and dst cset list running through cset->mg_node */
- struct list_head src_csets;
- struct list_head dst_csets;
-
- /* the subsys currently being processed */
- int ssid;
-
- /*
- * Fields for cgroup_taskset_*() iteration.
- *
- * Before migration is committed, the target migration tasks are on
- * ->mg_tasks of the csets on ->src_csets. After, on ->mg_tasks of
- * the csets on ->dst_csets. ->csets point to either ->src_csets
- * or ->dst_csets depending on whether migration is committed.
- *
- * ->cur_csets and ->cur_task point to the current task position
- * during iteration.
- */
- struct list_head *csets;
- struct css_set *cur_cset;
- struct task_struct *cur_task;
-};
-
-#define CGROUP_TASKSET_INIT(tset) (struct cgroup_taskset){ \
- .src_csets = LIST_HEAD_INIT(tset.src_csets), \
- .dst_csets = LIST_HEAD_INIT(tset.dst_csets), \
- .csets = &tset.src_csets, \
-}
-
/**
- * cgroup_taskset_add - try to add a migration target task to a taskset
+ * cgroup_migrate_add_task - add a migration target task to a migration context
* @task: target task
- * @tset: target taskset
+ * @mgctx: target migration context
*
- * Add @task, which is a migration target, to @tset. This function becomes
- * noop if @task doesn't need to be migrated. @task's css_set should have
- * been added as a migration source and @task->cg_list will be moved from
- * the css_set's tasks list to mg_tasks one.
+ * Add @task, which is a migration target, to @mgctx->tset. This function
+ * becomes noop if @task doesn't need to be migrated. @task's css_set
+ * should have been added as a migration source and @task->cg_list will be
+ * moved from the css_set's tasks list to mg_tasks one.
*/
-static void cgroup_taskset_add(struct task_struct *task,
- struct cgroup_taskset *tset)
+static void cgroup_migrate_add_task(struct task_struct *task,
+ struct cgroup_mgctx *mgctx)
{
struct css_set *cset;
@@ -1978,10 +1944,11 @@ static void cgroup_taskset_add(struct task_struct *task,
list_move_tail(&task->cg_list, &cset->mg_tasks);
if (list_empty(&cset->mg_node))
- list_add_tail(&cset->mg_node, &tset->src_csets);
+ list_add_tail(&cset->mg_node,
+ &mgctx->tset.src_csets);
if (list_empty(&cset->mg_dst_cset->mg_node))
- list_move_tail(&cset->mg_dst_cset->mg_node,
- &tset->dst_csets);
+ list_add_tail(&cset->mg_dst_cset->mg_node,
+ &mgctx->tset.dst_csets);
}
/**
@@ -2048,17 +2015,16 @@ struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset,
/**
* cgroup_taskset_migrate - migrate a taskset
- * @tset: taget taskset
- * @root: cgroup root the migration is taking place on
+ * @mgctx: migration context
*
- * Migrate tasks in @tset as setup by migration preparation functions.
+ * Migrate tasks in @mgctx as setup by migration preparation functions.
* This function fails iff one of the ->can_attach callbacks fails and
- * guarantees that either all or none of the tasks in @tset are migrated.
- * @tset is consumed regardless of success.
+ * guarantees that either all or none of the tasks in @mgctx are migrated.
+ * @mgctx is consumed regardless of success.
*/
-static int cgroup_taskset_migrate(struct cgroup_taskset *tset,
- struct cgroup_root *root)
+static int cgroup_migrate_execute(struct cgroup_mgctx *mgctx)
{
+ struct cgroup_taskset *tset = &mgctx->tset;
struct cgroup_subsys *ss;
struct task_struct *task, *tmp_task;
struct css_set *cset, *tmp_cset;
@@ -2069,7 +2035,7 @@ static int cgroup_taskset_migrate(struct cgroup_taskset *tset,
return 0;
/* check that we can legitimately attach to the cgroup */
- do_each_subsys_mask(ss, ssid, root->subsys_mask) {
+ do_each_subsys_mask(ss, ssid, mgctx->ss_mask) {
if (ss->can_attach) {
tset->ssid = ssid;
ret = ss->can_attach(tset);
@@ -2105,7 +2071,7 @@ static int cgroup_taskset_migrate(struct cgroup_taskset *tset,
*/
tset->csets = &tset->dst_csets;
- do_each_subsys_mask(ss, ssid, root->subsys_mask) {
+ do_each_subsys_mask(ss, ssid, mgctx->ss_mask) {
if (ss->attach) {
tset->ssid = ssid;
ss->attach(tset);
@@ -2116,7 +2082,7 @@ static int cgroup_taskset_migrate(struct cgroup_taskset *tset,
goto out_release_tset;
out_cancel_attach:
- do_each_subsys_mask(ss, ssid, root->subsys_mask) {
+ do_each_subsys_mask(ss, ssid, mgctx->ss_mask) {
if (ssid == failed_ssid)
break;
if (ss->cancel_attach) {
@@ -2151,25 +2117,31 @@ bool cgroup_may_migrate_to(struct cgroup *dst_cgrp)
/**
* cgroup_migrate_finish - cleanup after attach
- * @preloaded_csets: list of preloaded css_sets
+ * @mgctx: migration context
*
* Undo cgroup_migrate_add_src() and cgroup_migrate_prepare_dst(). See
* those functions for details.
*/
-void cgroup_migrate_finish(struct list_head *preloaded_csets)
+void cgroup_migrate_finish(struct cgroup_mgctx *mgctx)
{
+ LIST_HEAD(preloaded);
struct css_set *cset, *tmp_cset;
lockdep_assert_held(&cgroup_mutex);
spin_lock_irq(&css_set_lock);
- list_for_each_entry_safe(cset, tmp_cset, preloaded_csets, mg_preload_node) {
+
+ list_splice_tail_init(&mgctx->preloaded_src_csets, &preloaded);
+ list_splice_tail_init(&mgctx->preloaded_dst_csets, &preloaded);
+
+ list_for_each_entry_safe(cset, tmp_cset, &preloaded, mg_preload_node) {
cset->mg_src_cgrp = NULL;
cset->mg_dst_cgrp = NULL;
cset->mg_dst_cset = NULL;
list_del_init(&cset->mg_preload_node);
put_css_set_locked(cset);
}
+
spin_unlock_irq(&css_set_lock);
}
@@ -2177,10 +2149,10 @@ void cgroup_migrate_finish(struct list_head *preloaded_csets)
* cgroup_migrate_add_src - add a migration source css_set
* @src_cset: the source css_set to add
* @dst_cgrp: the destination cgroup
- * @preloaded_csets: list of preloaded css_sets
+ * @mgctx: migration context
*
* Tasks belonging to @src_cset are about to be migrated to @dst_cgrp. Pin
- * @src_cset and add it to @preloaded_csets, which should later be cleaned
+ * @src_cset and add it to @mgctx->src_csets, which should later be cleaned
* up by cgroup_migrate_finish().
*
* This function may be called without holding cgroup_threadgroup_rwsem
@@ -2191,7 +2163,7 @@ void cgroup_migrate_finish(struct list_head *preloaded_csets)
*/
void cgroup_migrate_add_src(struct css_set *src_cset,
struct cgroup *dst_cgrp,
- struct list_head *preloaded_csets)
+ struct cgroup_mgctx *mgctx)
{
struct cgroup *src_cgrp;
@@ -2219,33 +2191,35 @@ void cgroup_migrate_add_src(struct css_set *src_cset,
src_cset->mg_src_cgrp = src_cgrp;
src_cset->mg_dst_cgrp = dst_cgrp;
get_css_set(src_cset);
- list_add(&src_cset->mg_preload_node, preloaded_csets);
+ list_add_tail(&src_cset->mg_preload_node, &mgctx->preloaded_src_csets);
}
/**
* cgroup_migrate_prepare_dst - prepare destination css_sets for migration
- * @preloaded_csets: list of preloaded source css_sets
+ * @mgctx: migration context
*
* Tasks are about to be moved and all the source css_sets have been
- * preloaded to @preloaded_csets. This function looks up and pins all
- * destination css_sets, links each to its source, and append them to
- * @preloaded_csets.
+ * preloaded to @mgctx->preloaded_src_csets. This function looks up and
+ * pins all destination css_sets, links each to its source, and append them
+ * to @mgctx->preloaded_dst_csets.
*
* This function must be called after cgroup_migrate_add_src() has been
* called on each migration source css_set. After migration is performed
* using cgroup_migrate(), cgroup_migrate_finish() must be called on
- * @preloaded_csets.
+ * @mgctx.
*/
-int cgroup_migrate_prepare_dst(struct list_head *preloaded_csets)
+int cgroup_migrate_prepare_dst(struct cgroup_mgctx *mgctx)
{
- LIST_HEAD(csets);
struct css_set *src_cset, *tmp_cset;
lockdep_assert_held(&cgroup_mutex);
/* look up the dst cset for each src cset and link it to src */
- list_for_each_entry_safe(src_cset, tmp_cset, preloaded_csets, mg_preload_node) {
+ list_for_each_entry_safe(src_cset, tmp_cset, &mgctx->preloaded_src_csets,
+ mg_preload_node) {
struct css_set *dst_cset;
+ struct cgroup_subsys *ss;
+ int ssid;
dst_cset = find_css_set(src_cset, src_cset->mg_dst_cgrp);
if (!dst_cset)
@@ -2270,15 +2244,19 @@ int cgroup_migrate_prepare_dst(struct list_head *preloaded_csets)
src_cset->mg_dst_cset = dst_cset;
if (list_empty(&dst_cset->mg_preload_node))
- list_add(&dst_cset->mg_preload_node, &csets);
+ list_add_tail(&dst_cset->mg_preload_node,
+ &mgctx->preloaded_dst_csets);
else
put_css_set(dst_cset);
+
+ for_each_subsys(ss, ssid)
+ if (src_cset->subsys[ssid] != dst_cset->subsys[ssid])
+ mgctx->ss_mask |= 1 << ssid;
}
- list_splice_tail(&csets, preloaded_csets);
return 0;
err:
- cgroup_migrate_finish(&csets);
+ cgroup_migrate_finish(mgctx);
return -ENOMEM;
}
@@ -2286,7 +2264,7 @@ int cgroup_migrate_prepare_dst(struct list_head *preloaded_csets)
* cgroup_migrate - migrate a process or task to a cgroup
* @leader: the leader of the process or the task to migrate
* @threadgroup: whether @leader points to the whole process or a single task
- * @root: cgroup root migration is taking place on
+ * @mgctx: migration context
*
* Migrate a process or task denoted by @leader. If migrating a process,
* the caller must be holding cgroup_threadgroup_rwsem. The caller is also
@@ -2301,9 +2279,8 @@ int cgroup_migrate_prepare_dst(struct list_head *preloaded_csets)
* actually starting migrating.
*/
int cgroup_migrate(struct task_struct *leader, bool threadgroup,
- struct cgroup_root *root)
+ struct cgroup_mgctx *mgctx)
{
- struct cgroup_taskset tset = CGROUP_TASKSET_INIT(tset);
struct task_struct *task;
/*
@@ -2315,14 +2292,14 @@ int cgroup_migrate(struct task_struct *leader, bool threadgroup,
rcu_read_lock();
task = leader;
do {
- cgroup_taskset_add(task, &tset);
+ cgroup_migrate_add_task(task, mgctx);
if (!threadgroup)
break;
} while_each_thread(leader, task);
rcu_read_unlock();
spin_unlock_irq(&css_set_lock);
- return cgroup_taskset_migrate(&tset, root);
+ return cgroup_migrate_execute(mgctx);
}
/**
@@ -2336,7 +2313,7 @@ int cgroup_migrate(struct task_struct *leader, bool threadgroup,
int cgroup_attach_task(struct cgroup *dst_cgrp, struct task_struct *leader,
bool threadgroup)
{
- LIST_HEAD(preloaded_csets);
+ DEFINE_CGROUP_MGCTX(mgctx);
struct task_struct *task;
int ret;
@@ -2348,8 +2325,7 @@ int cgroup_attach_task(struct cgroup *dst_cgrp, struct task_struct *leader,
rcu_read_lock();
task = leader;
do {
- cgroup_migrate_add_src(task_css_set(task), dst_cgrp,
- &preloaded_csets);
+ cgroup_migrate_add_src(task_css_set(task), dst_cgrp, &mgctx);
if (!threadgroup)
break;
} while_each_thread(leader, task);
@@ -2357,11 +2333,11 @@ int cgroup_attach_task(struct cgroup *dst_cgrp, struct task_struct *leader,
spin_unlock_irq(&css_set_lock);
/* prepare dst csets and commit */
- ret = cgroup_migrate_prepare_dst(&preloaded_csets);
+ ret = cgroup_migrate_prepare_dst(&mgctx);
if (!ret)
- ret = cgroup_migrate(leader, threadgroup, dst_cgrp->root);
+ ret = cgroup_migrate(leader, threadgroup, &mgctx);
- cgroup_migrate_finish(&preloaded_csets);
+ cgroup_migrate_finish(&mgctx);
if (!ret)
trace_cgroup_attach_task(dst_cgrp, leader, threadgroup);
@@ -2373,20 +2349,9 @@ static int cgroup_procs_write_permission(struct task_struct *task,
struct cgroup *dst_cgrp,
struct kernfs_open_file *of)
{
- const struct cred *cred = current_cred();
- const struct cred *tcred = get_task_cred(task);
int ret = 0;
- /*
- * even if we're attaching all tasks in the thread group, we only
- * need to check permissions on one of them.
- */
- if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) &&
- !uid_eq(cred->euid, tcred->uid) &&
- !uid_eq(cred->euid, tcred->suid))
- ret = -EACCES;
-
- if (!ret && cgroup_on_dfl(dst_cgrp)) {
+ if (cgroup_on_dfl(dst_cgrp)) {
struct super_block *sb = of->file->f_path.dentry->d_sb;
struct cgroup *cgrp;
struct inode *inode;
@@ -2404,9 +2369,21 @@ static int cgroup_procs_write_permission(struct task_struct *task,
ret = inode_permission(inode, MAY_WRITE);
iput(inode);
}
+ } else {
+ const struct cred *cred = current_cred();
+ const struct cred *tcred = get_task_cred(task);
+
+ /*
+ * even if we're attaching all tasks in the thread group,
+ * we only need to check permissions on one of them.
+ */
+ if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) &&
+ !uid_eq(cred->euid, tcred->uid) &&
+ !uid_eq(cred->euid, tcred->suid))
+ ret = -EACCES;
+ put_cred(tcred);
}
- put_cred(tcred);
return ret;
}
@@ -2528,8 +2505,7 @@ static int cgroup_subtree_control_show(struct seq_file *seq, void *v)
*/
static int cgroup_update_dfl_csses(struct cgroup *cgrp)
{
- LIST_HEAD(preloaded_csets);
- struct cgroup_taskset tset = CGROUP_TASKSET_INIT(tset);
+ DEFINE_CGROUP_MGCTX(mgctx);
struct cgroup_subsys_state *d_css;
struct cgroup *dsct;
struct css_set *src_cset;
@@ -2545,33 +2521,28 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp)
struct cgrp_cset_link *link;
list_for_each_entry(link, &dsct->cset_links, cset_link)
- cgroup_migrate_add_src(link->cset, dsct,
- &preloaded_csets);
+ cgroup_migrate_add_src(link->cset, dsct, &mgctx);
}
spin_unlock_irq(&css_set_lock);
/* NULL dst indicates self on default hierarchy */
- ret = cgroup_migrate_prepare_dst(&preloaded_csets);
+ ret = cgroup_migrate_prepare_dst(&mgctx);
if (ret)
goto out_finish;
spin_lock_irq(&css_set_lock);
- list_for_each_entry(src_cset, &preloaded_csets, mg_preload_node) {
+ list_for_each_entry(src_cset, &mgctx.preloaded_src_csets, mg_preload_node) {
struct task_struct *task, *ntask;
- /* src_csets precede dst_csets, break on the first dst_cset */
- if (!src_cset->mg_src_cgrp)
- break;
-
/* all tasks in src_csets need to be migrated */
list_for_each_entry_safe(task, ntask, &src_cset->tasks, cg_list)
- cgroup_taskset_add(task, &tset);
+ cgroup_migrate_add_task(task, &mgctx);
}
spin_unlock_irq(&css_set_lock);
- ret = cgroup_taskset_migrate(&tset, cgrp->root);
+ ret = cgroup_migrate_execute(&mgctx);
out_finish:
- cgroup_migrate_finish(&preloaded_csets);
+ cgroup_migrate_finish(&mgctx);
percpu_up_write(&cgroup_threadgroup_rwsem);
return ret;
}
@@ -4098,6 +4069,11 @@ static struct cgroup_subsys_state *css_create(struct cgroup *cgrp,
return ERR_PTR(err);
}
+/*
+ * The returned cgroup is fully initialized including its control mask, but
+ * it isn't associated with its kernfs_node and doesn't have the control
+ * mask applied.
+ */
static struct cgroup *cgroup_create(struct cgroup *parent)
{
struct cgroup_root *root = parent->root;
@@ -4165,11 +4141,6 @@ static struct cgroup *cgroup_create(struct cgroup *parent)
cgroup_propagate_control(cgrp);
- /* @cgrp doesn't have dir yet so the following will only create csses */
- ret = cgroup_apply_control_enable(cgrp);
- if (ret)
- goto out_destroy;
-
return cgrp;
out_cancel_ref:
@@ -4177,9 +4148,6 @@ static struct cgroup *cgroup_create(struct cgroup *parent)
out_free_cgrp:
kfree(cgrp);
return ERR_PTR(ret);
-out_destroy:
- cgroup_destroy_locked(cgrp);
- return ERR_PTR(ret);
}
int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, umode_t mode)
diff --git a/kernel/events/core.c b/kernel/events/core.c
index ab15509..d72128d 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -10792,5 +10792,11 @@ struct cgroup_subsys perf_event_cgrp_subsys = {
.css_alloc = perf_cgroup_css_alloc,
.css_free = perf_cgroup_css_free,
.attach = perf_cgroup_attach,
+ /*
+ * Implicitly enable on dfl hierarchy so that perf events can
+ * always be filtered by cgroup2 path as long as perf_event
+ * controller is not mounted on a legacy hierarchy.
+ */
+ .implicit_on_dfl = true,
};
#endif /* CONFIG_CGROUP_PERF */
diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c
index 8fdee24..eafbf114 100644
--- a/tools/perf/util/cgroup.c
+++ b/tools/perf/util/cgroup.c
@@ -12,8 +12,8 @@ cgroupfs_find_mountpoint(char *buf, size_t maxlen)
{
FILE *fp;
char mountpoint[PATH_MAX + 1], tokens[PATH_MAX + 1], type[PATH_MAX + 1];
+ char path_v1[PATH_MAX + 1], path_v2[PATH_MAX + 2], *path;
char *token, *saved_ptr = NULL;
- int found = 0;
fp = fopen("/proc/mounts", "r");
if (!fp)
@@ -24,31 +24,43 @@ cgroupfs_find_mountpoint(char *buf, size_t maxlen)
* and inspect every cgroupfs mount point to find one that has
* perf_event subsystem
*/
+ path_v1[0] = '\0';
+ path_v2[0] = '\0';
+
while (fscanf(fp, "%*s %"STR(PATH_MAX)"s %"STR(PATH_MAX)"s %"
STR(PATH_MAX)"s %*d %*d\n",
mountpoint, type, tokens) == 3) {
- if (!strcmp(type, "cgroup")) {
+ if (!path_v1[0] && !strcmp(type, "cgroup")) {
token = strtok_r(tokens, ",", &saved_ptr);
while (token != NULL) {
if (!strcmp(token, "perf_event")) {
- found = 1;
+ strcpy(path_v1, mountpoint);
break;
}
token = strtok_r(NULL, ",", &saved_ptr);
}
}
- if (found)
+
+ if (!path_v2[0] && !strcmp(type, "cgroup2"))
+ strcpy(path_v2, mountpoint);
+
+ if (path_v1[0] && path_v2[0])
break;
}
fclose(fp);
- if (!found)
+
+ if (path_v1[0])
+ path = path_v1;
+ else if (path_v2[0])
+ path = path_v2;
+ else
return -1;
- if (strlen(mountpoint) < maxlen) {
- strcpy(buf, mountpoint);
+ if (strlen(path) < maxlen) {
+ strcpy(buf, path);
return 0;
}
return -1;