Merge branch 'for-5.17' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
Pull cgroup updates from Tejun Heo:
"Nothing too interesting. The only two noticeable changes are a subtle
cpuset behavior fix and trace event id field being expanded to u64
from int. Most others are code cleanups"
* 'for-5.17' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup:
cpuset: convert 'allowed' in __cpuset_node_allowed() to be boolean
cgroup/rstat: check updated_next only for root
cgroup: rstat: explicitly put loop variant in while
cgroup: return early if it is already on preloaded list
cgroup/cpuset: Don't let child cpusets restrict parent in default hierarchy
cgroup: Trace event cgroup id fields should be u64
cgroup: fix a typo in comment
cgroup: get the wrong css for css_alloc() during cgroup_init_subsys()
cgroup: rstat: Mark benign data race to silence KCSAN
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 411684c..1bfcfb1 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -413,7 +413,7 @@ struct cgroup {
/*
* The bitmask of subsystems enabled on the child cgroups.
* ->subtree_control is the one configured through
- * "cgroup.subtree_control" while ->child_ss_mask is the effective
+ * "cgroup.subtree_control" while ->subtree_ss_mask is the effective
* one which may have more subsystems enabled. Controller knobs
* are made available iff it's enabled in ->subtree_control.
*/
diff --git a/include/trace/events/cgroup.h b/include/trace/events/cgroup.h
index 7f42a3d..dd7d7c9 100644
--- a/include/trace/events/cgroup.h
+++ b/include/trace/events/cgroup.h
@@ -59,8 +59,8 @@ DECLARE_EVENT_CLASS(cgroup,
TP_STRUCT__entry(
__field( int, root )
- __field( int, id )
__field( int, level )
+ __field( u64, id )
__string( path, path )
),
@@ -71,7 +71,7 @@ DECLARE_EVENT_CLASS(cgroup,
__assign_str(path, path);
),
- TP_printk("root=%d id=%d level=%d path=%s",
+ TP_printk("root=%d id=%llu level=%d path=%s",
__entry->root, __entry->id, __entry->level, __get_str(path))
);
@@ -126,8 +126,8 @@ DECLARE_EVENT_CLASS(cgroup_migrate,
TP_STRUCT__entry(
__field( int, dst_root )
- __field( int, dst_id )
__field( int, dst_level )
+ __field( u64, dst_id )
__field( int, pid )
__string( dst_path, path )
__string( comm, task->comm )
@@ -142,7 +142,7 @@ DECLARE_EVENT_CLASS(cgroup_migrate,
__assign_str(comm, task->comm);
),
- TP_printk("dst_root=%d dst_id=%d dst_level=%d dst_path=%s pid=%d comm=%s",
+ TP_printk("dst_root=%d dst_id=%llu dst_level=%d dst_path=%s pid=%d comm=%s",
__entry->dst_root, __entry->dst_id, __entry->dst_level,
__get_str(dst_path), __entry->pid, __get_str(comm))
);
@@ -171,8 +171,8 @@ DECLARE_EVENT_CLASS(cgroup_event,
TP_STRUCT__entry(
__field( int, root )
- __field( int, id )
__field( int, level )
+ __field( u64, id )
__string( path, path )
__field( int, val )
),
@@ -185,7 +185,7 @@ DECLARE_EVENT_CLASS(cgroup_event,
__entry->val = val;
),
- TP_printk("root=%d id=%d level=%d path=%s val=%d",
+ TP_printk("root=%d id=%llu level=%d path=%s val=%d",
__entry->root, __entry->id, __entry->level, __get_str(path),
__entry->val)
);
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 581ca5a..b31e146 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -2651,11 +2651,11 @@ void cgroup_migrate_add_src(struct css_set *src_cset,
if (src_cset->dead)
return;
- src_cgrp = cset_cgroup_from_root(src_cset, dst_cgrp->root);
-
if (!list_empty(&src_cset->mg_preload_node))
return;
+ src_cgrp = cset_cgroup_from_root(src_cset, dst_cgrp->root);
+
WARN_ON(src_cset->mg_src_cgrp);
WARN_ON(src_cset->mg_dst_cgrp);
WARN_ON(!list_empty(&src_cset->mg_tasks));
@@ -5743,7 +5743,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss, bool early)
/* Create the root cgroup state for this subsystem */
ss->root = &cgrp_dfl_root;
- css = ss->css_alloc(cgroup_css(&cgrp_dfl_root.cgrp, ss));
+ css = ss->css_alloc(NULL);
/* We don't handle early failures gracefully */
BUG_ON(IS_ERR(css));
init_and_link_css(css, ss, &cgrp_dfl_root.cgrp);
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index d0e163a..dc653ab 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -616,19 +616,11 @@ static int validate_change(struct cpuset *cur, struct cpuset *trial)
struct cpuset *c, *par;
int ret;
- rcu_read_lock();
-
- /* Each of our child cpusets must be a subset of us */
- ret = -EBUSY;
- cpuset_for_each_child(c, css, cur)
- if (!is_cpuset_subset(c, trial))
- goto out;
-
- /* Remaining checks don't apply to root cpuset */
- ret = 0;
+ /* The checks don't apply to root cpuset */
if (cur == &top_cpuset)
- goto out;
+ return 0;
+ rcu_read_lock();
par = parent_cs(cur);
/* On legacy hierarchy, we must be a subset of our parent cpuset. */
@@ -3536,7 +3528,7 @@ static struct cpuset *nearest_hardwall_ancestor(struct cpuset *cs)
bool __cpuset_node_allowed(int node, gfp_t gfp_mask)
{
struct cpuset *cs; /* current cpuset ancestors */
- int allowed; /* is allocation in zone z allowed? */
+ bool allowed; /* is allocation in zone z allowed? */
unsigned long flags;
if (in_interrupt())
diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c
index 1486768..9d331ba 100644
--- a/kernel/cgroup/rstat.c
+++ b/kernel/cgroup/rstat.c
@@ -35,7 +35,7 @@ void cgroup_rstat_updated(struct cgroup *cgrp, int cpu)
* instead of NULL, we can tell whether @cgrp is on the list by
* testing the next pointer for NULL.
*/
- if (cgroup_rstat_cpu(cgrp, cpu)->updated_next)
+ if (data_race(cgroup_rstat_cpu(cgrp, cpu)->updated_next))
return;
raw_spin_lock_irqsave(cpu_lock, flags);
@@ -88,6 +88,7 @@ static struct cgroup *cgroup_rstat_cpu_pop_updated(struct cgroup *pos,
struct cgroup *root, int cpu)
{
struct cgroup_rstat_cpu *rstatc;
+ struct cgroup *parent;
if (pos == root)
return NULL;
@@ -96,10 +97,14 @@ static struct cgroup *cgroup_rstat_cpu_pop_updated(struct cgroup *pos,
* We're gonna walk down to the first leaf and visit/remove it. We
* can pick whatever unvisited node as the starting point.
*/
- if (!pos)
+ if (!pos) {
pos = root;
- else
+ /* return NULL if this subtree is not on-list */
+ if (!cgroup_rstat_cpu(pos, cpu)->updated_next)
+ return NULL;
+ } else {
pos = cgroup_parent(pos);
+ }
/* walk down to the first leaf */
while (true) {
@@ -115,33 +120,25 @@ static struct cgroup *cgroup_rstat_cpu_pop_updated(struct cgroup *pos,
* However, due to the way we traverse, @pos will be the first
* child in most cases. The only exception is @root.
*/
- if (rstatc->updated_next) {
- struct cgroup *parent = cgroup_parent(pos);
+ parent = cgroup_parent(pos);
+ if (parent) {
+ struct cgroup_rstat_cpu *prstatc;
+ struct cgroup **nextp;
- if (parent) {
- struct cgroup_rstat_cpu *prstatc;
- struct cgroup **nextp;
+ prstatc = cgroup_rstat_cpu(parent, cpu);
+ nextp = &prstatc->updated_children;
+ while (*nextp != pos) {
+ struct cgroup_rstat_cpu *nrstatc;
- prstatc = cgroup_rstat_cpu(parent, cpu);
- nextp = &prstatc->updated_children;
- while (true) {
- struct cgroup_rstat_cpu *nrstatc;
-
- nrstatc = cgroup_rstat_cpu(*nextp, cpu);
- if (*nextp == pos)
- break;
- WARN_ON_ONCE(*nextp == parent);
- nextp = &nrstatc->updated_next;
- }
- *nextp = rstatc->updated_next;
+ nrstatc = cgroup_rstat_cpu(*nextp, cpu);
+ WARN_ON_ONCE(*nextp == parent);
+ nextp = &nrstatc->updated_next;
}
-
- rstatc->updated_next = NULL;
- return pos;
+ *nextp = rstatc->updated_next;
}
- /* only happens for @root */
- return NULL;
+ rstatc->updated_next = NULL;
+ return pos;
}
/* see cgroup_rstat_flush() */