cgroups: revert "cgroups: fix pid namespace bug"
The following series adds a "cgroup.procs" file to each cgroup that
reports unique tgids rather than pids, and allows all threads in a
threadgroup to be atomically moved to a new cgroup.
The subsystem "attach" interface is modified to support attaching whole
threadgroups at a time, which could introduce potential problems if any
subsystem were to need to access the old cgroup of every thread being
moved. The attach interface may need to be revised if this becomes the
case.
Also added is functionality for read/write locking all CLONE_THREAD
fork()ing within a threadgroup, by means of an rwsem that lives in the
sighand_struct, for per-threadgroup-ness and also for sharing a cacheline
with the sighand's atomic count. This scheme should introduce no extra
overhead in the fork path when there's no contention.
The final patch reveals potential for a race when forking before a
subsystem's attach function is called - one potential solution in case any
subsystem has this problem is to hang on to the group's fork mutex through
the attach() calls, though no subsystem yet demonstrates need for an
extended critical section.
This patch:
Revert
commit 096b7fe012d66ed55e98bc8022405ede0cc80e96
Author: Li Zefan <lizf@cn.fujitsu.com>
AuthorDate: Wed Jul 29 15:04:04 2009 -0700
Commit: Linus Torvalds <torvalds@linux-foundation.org>
CommitDate: Wed Jul 29 19:10:35 2009 -0700
cgroups: fix pid namespace bug
This is in preparation for some clashing cgroups changes that subsume the
original commit's functionaliy.
The original commit fixed a pid namespace bug which Ben Blum fixed
independently (in the same way, but with different code) as part of a
series of patches. I played around with trying to reconcile Ben's patch
series with Li's patch, but concluded that it was simpler to just revert
Li's, given that Ben's patch series contained essentially the same fix.
Signed-off-by: Paul Menage <menage@google.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Matt Helsley <matthltc@us.ibm.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 14efffe..22db0a7 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1121,7 +1121,6 @@
INIT_LIST_HEAD(&cgrp->children);
INIT_LIST_HEAD(&cgrp->css_sets);
INIT_LIST_HEAD(&cgrp->release_list);
- INIT_LIST_HEAD(&cgrp->pids_list);
init_rwsem(&cgrp->pids_mutex);
}
@@ -2431,30 +2430,12 @@
return ret;
}
-/*
- * Cache pids for all threads in the same pid namespace that are
- * opening the same "tasks" file.
- */
-struct cgroup_pids {
- /* The node in cgrp->pids_list */
- struct list_head list;
- /* The cgroup those pids belong to */
- struct cgroup *cgrp;
- /* The namepsace those pids belong to */
- struct pid_namespace *ns;
- /* Array of process ids in the cgroup */
- pid_t *tasks_pids;
- /* How many files are using the this tasks_pids array */
- int use_count;
- /* Length of the current tasks_pids array */
- int length;
-};
-
static int cmppid(const void *a, const void *b)
{
return *(pid_t *)a - *(pid_t *)b;
}
+
/*
* seq_file methods for the "tasks" file. The seq_file position is the
* next pid to display; the seq_file iterator is a pointer to the pid
@@ -2469,47 +2450,45 @@
* after a seek to the start). Use a binary-search to find the
* next pid to display, if any
*/
- struct cgroup_pids *cp = s->private;
- struct cgroup *cgrp = cp->cgrp;
+ struct cgroup *cgrp = s->private;
int index = 0, pid = *pos;
int *iter;
down_read(&cgrp->pids_mutex);
if (pid) {
- int end = cp->length;
+ int end = cgrp->pids_length;
while (index < end) {
int mid = (index + end) / 2;
- if (cp->tasks_pids[mid] == pid) {
+ if (cgrp->tasks_pids[mid] == pid) {
index = mid;
break;
- } else if (cp->tasks_pids[mid] <= pid)
+ } else if (cgrp->tasks_pids[mid] <= pid)
index = mid + 1;
else
end = mid;
}
}
/* If we're off the end of the array, we're done */
- if (index >= cp->length)
+ if (index >= cgrp->pids_length)
return NULL;
/* Update the abstract position to be the actual pid that we found */
- iter = cp->tasks_pids + index;
+ iter = cgrp->tasks_pids + index;
*pos = *iter;
return iter;
}
static void cgroup_tasks_stop(struct seq_file *s, void *v)
{
- struct cgroup_pids *cp = s->private;
- struct cgroup *cgrp = cp->cgrp;
+ struct cgroup *cgrp = s->private;
up_read(&cgrp->pids_mutex);
}
static void *cgroup_tasks_next(struct seq_file *s, void *v, loff_t *pos)
{
- struct cgroup_pids *cp = s->private;
+ struct cgroup *cgrp = s->private;
int *p = v;
- int *end = cp->tasks_pids + cp->length;
+ int *end = cgrp->tasks_pids + cgrp->pids_length;
/*
* Advance to the next pid in the array. If this goes off the
@@ -2536,33 +2515,26 @@
.show = cgroup_tasks_show,
};
-static void release_cgroup_pid_array(struct cgroup_pids *cp)
+static void release_cgroup_pid_array(struct cgroup *cgrp)
{
- struct cgroup *cgrp = cp->cgrp;
-
down_write(&cgrp->pids_mutex);
- BUG_ON(!cp->use_count);
- if (!--cp->use_count) {
- list_del(&cp->list);
- put_pid_ns(cp->ns);
- kfree(cp->tasks_pids);
- kfree(cp);
+ BUG_ON(!cgrp->pids_use_count);
+ if (!--cgrp->pids_use_count) {
+ kfree(cgrp->tasks_pids);
+ cgrp->tasks_pids = NULL;
+ cgrp->pids_length = 0;
}
up_write(&cgrp->pids_mutex);
}
static int cgroup_tasks_release(struct inode *inode, struct file *file)
{
- struct seq_file *seq;
- struct cgroup_pids *cp;
+ struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
if (!(file->f_mode & FMODE_READ))
return 0;
- seq = file->private_data;
- cp = seq->private;
-
- release_cgroup_pid_array(cp);
+ release_cgroup_pid_array(cgrp);
return seq_release(inode, file);
}
@@ -2581,8 +2553,6 @@
static int cgroup_tasks_open(struct inode *unused, struct file *file)
{
struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
- struct pid_namespace *ns = current->nsproxy->pid_ns;
- struct cgroup_pids *cp;
pid_t *pidarray;
int npids;
int retval;
@@ -2609,37 +2579,20 @@
* array if necessary
*/
down_write(&cgrp->pids_mutex);
-
- list_for_each_entry(cp, &cgrp->pids_list, list) {
- if (ns == cp->ns)
- goto found;
- }
-
- cp = kzalloc(sizeof(*cp), GFP_KERNEL);
- if (!cp) {
- up_write(&cgrp->pids_mutex);
- kfree(pidarray);
- return -ENOMEM;
- }
- cp->cgrp = cgrp;
- cp->ns = ns;
- get_pid_ns(ns);
- list_add(&cp->list, &cgrp->pids_list);
-found:
- kfree(cp->tasks_pids);
- cp->tasks_pids = pidarray;
- cp->length = npids;
- cp->use_count++;
+ kfree(cgrp->tasks_pids);
+ cgrp->tasks_pids = pidarray;
+ cgrp->pids_length = npids;
+ cgrp->pids_use_count++;
up_write(&cgrp->pids_mutex);
file->f_op = &cgroup_tasks_operations;
retval = seq_open(file, &cgroup_tasks_seq_operations);
if (retval) {
- release_cgroup_pid_array(cp);
+ release_cgroup_pid_array(cgrp);
return retval;
}
- ((struct seq_file *)file->private_data)->private = cp;
+ ((struct seq_file *)file->private_data)->private = cgrp;
return 0;
}