cgroup: pin cgroup_subsys_state when opening a cgroupfs file
Previously, each file read/write operation relied on the inode
reference count pinning the cgroup and simply checked whether the
cgroup was marked dead before proceeding to invoke the per-subsystem
callback. This was rather silly as it didn't have any synchronization
or css pinning around the check and the cgroup may be removed and all
css refs drained between the DEAD check and actual method invocation.
This patch pins the css between open() and release() so that it is
guaranteed to be alive for all file operations and remove the silly
DEAD checks from cgroup_file_read/write().
Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Li Zefan <lizefan@huawei.com>
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index c4bc8da..583f8f6 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2277,6 +2277,17 @@
return 0;
}
+/* return the css for the given cgroup file */
+static struct cgroup_subsys_state *cgroup_file_css(struct cfent *cfe)
+{
+ struct cftype *cft = cfe->type;
+ struct cgroup *cgrp = __d_cgrp(cfe->dentry->d_parent);
+
+ if (cft->ss)
+ return cgrp->subsys[cft->ss->subsys_id];
+ return NULL;
+}
+
/* A buffer size big enough for numbers or short strings */
#define CGROUP_LOCAL_BUFFER_SIZE 64
@@ -2354,8 +2365,6 @@
struct cftype *cft = __d_cft(file->f_dentry);
struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
- if (cgroup_is_dead(cgrp))
- return -ENODEV;
if (cft->write)
return cft->write(cgrp, cft, file, buf, nbytes, ppos);
if (cft->write_u64 || cft->write_s64)
@@ -2399,9 +2408,6 @@
struct cftype *cft = __d_cft(file->f_dentry);
struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
- if (cgroup_is_dead(cgrp))
- return -ENODEV;
-
if (cft->read)
return cft->read(cgrp, cft, file, buf, nbytes, ppos);
if (cft->read_u64)
@@ -2447,15 +2453,22 @@
static int cgroup_file_open(struct inode *inode, struct file *file)
{
+ struct cfent *cfe = __d_cfe(file->f_dentry);
+ struct cftype *cft = __d_cft(file->f_dentry);
+ struct cgroup_subsys_state *css = cgroup_file_css(cfe);
int err;
- struct cfent *cfe;
- struct cftype *cft;
err = generic_file_open(inode, file);
if (err)
return err;
- cfe = __d_cfe(file->f_dentry);
- cft = cfe->type;
+
+ /*
+ * If the file belongs to a subsystem, pin the css. Will be
+ * unpinned either on open failure or release. This ensures that
+ * @css stays alive for all file operations.
+ */
+ if (css && !css_tryget(css))
+ return -ENODEV;
if (cft->read_map || cft->read_seq_string) {
file->f_op = &cgroup_seqfile_operations;
@@ -2464,15 +2477,23 @@
err = cft->open(inode, file);
}
+ if (css && err)
+ css_put(css);
return err;
}
static int cgroup_file_release(struct inode *inode, struct file *file)
{
+ struct cfent *cfe = __d_cfe(file->f_dentry);
struct cftype *cft = __d_cft(file->f_dentry);
+ struct cgroup_subsys_state *css = cgroup_file_css(cfe);
+ int ret = 0;
+
if (cft->release)
- return cft->release(inode, file);
- return 0;
+ ret = cft->release(inode, file);
+ if (css)
+ css_put(css);
+ return ret;
}
/*