Blame - kernel/exit.c - SHIFTPHONES/mainline/linux

blob: aecb48ca7370800e0e0d802cfbf64867d78cef4b [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1	/*
				2	* linux/kernel/exit.c
				3	*
				4	* Copyright (C) 1991, 1992 Linus Torvalds
				5	*/
				6
				7	#include <linux/config.h>
				8	#include <linux/mm.h>
				9	#include <linux/slab.h>
				10	#include <linux/interrupt.h>
				11	#include <linux/smp_lock.h>
				12	#include <linux/module.h>
Randy.Dunlap	c59ede7	2006-01-11 12:17:46 -0800	[diff] [blame]	13	#include <linux/capability.h>
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	14	#include <linux/completion.h>
				15	#include <linux/personality.h>
				16	#include <linux/tty.h>
				17	#include <linux/namespace.h>
				18	#include <linux/key.h>
				19	#include <linux/security.h>
				20	#include <linux/cpu.h>
				21	#include <linux/acct.h>
				22	#include <linux/file.h>
				23	#include <linux/binfmts.h>
				24	#include <linux/ptrace.h>
				25	#include <linux/profile.h>
				26	#include <linux/mount.h>
				27	#include <linux/proc_fs.h>
				28	#include <linux/mempolicy.h>
				29	#include <linux/cpuset.h>
				30	#include <linux/syscalls.h>
Jesper Juhl	7ed20e1	2005-05-01 08:59:14 -0700	[diff] [blame]	31	#include <linux/signal.h>
Matt Helsley	9f46080	2005-11-07 00:59:16 -0800	[diff] [blame]	32	#include <linux/cn_proc.h>
Ingo Molnar	de5097c	2006-01-09 15:59:21 -0800	[diff] [blame]	33	#include <linux/mutex.h>
Ingo Molnar	0771dfe	2006-03-27 01:16:22 -0800	[diff] [blame^]	34	#include <linux/futex.h>
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	35
				36	#include <asm/uaccess.h>
				37	#include <asm/unistd.h>
				38	#include <asm/pgtable.h>
				39	#include <asm/mmu_context.h>
				40
				41	extern void sem_exit (void);
				42	extern struct task_struct *child_reaper;
				43
				44	int getrusage(struct task_struct , int, struct rusage __user );
				45
Adrian Bunk	408b664	2005-05-01 08:59:29 -0700	[diff] [blame]	46	static void exit_mm(struct task_struct * tsk);
				47
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	48	static void __unhash_process(struct task_struct *p)
				49	{
				50	nr_threads--;
				51	detach_pid(p, PIDTYPE_PID);
				52	detach_pid(p, PIDTYPE_TGID);
				53	if (thread_group_leader(p)) {
				54	detach_pid(p, PIDTYPE_PGID);
				55	detach_pid(p, PIDTYPE_SID);
				56	if (p->pid)
				57	__get_cpu_var(process_counts)--;
				58	}
				59
				60	REMOVE_LINKS(p);
				61	}
				62
				63	void release_task(struct task_struct * p)
				64	{
				65	int zap_leader;
				66	task_t *leader;
				67	struct dentry *proc_dentry;
				68
				69	repeat:
				70	atomic_dec(&p->user->processes);
				71	spin_lock(&p->proc_lock);
				72	proc_dentry = proc_pid_unhash(p);
				73	write_lock_irq(&tasklist_lock);
				74	if (unlikely(p->ptrace))
				75	__ptrace_unlink(p);
				76	BUG_ON(!list_empty(&p->ptrace_list) \|\| !list_empty(&p->ptrace_children));
				77	__exit_signal(p);
Christoph Lameter	71a2224	2005-06-23 00:10:05 -0700	[diff] [blame]	78	/*
				79	* Note that the fastpath in sys_times depends on __exit_signal having
				80	* updated the counters before a task is removed from the tasklist of
				81	* the process by __unhash_process.
				82	*/
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	83	__unhash_process(p);
				84
				85	/*
				86	* If we are the last non-leader member of the thread
				87	* group, and the leader is zombie, then notify the
				88	* group leader's parent process. (if it wants notification.)
				89	*/
				90	zap_leader = 0;
				91	leader = p->group_leader;
				92	if (leader != p && thread_group_empty(leader) && leader->exit_state == EXIT_ZOMBIE) {
				93	BUG_ON(leader->exit_signal == -1);
				94	do_notify_parent(leader, leader->exit_signal);
				95	/*
				96	* If we were the last child thread and the leader has
				97	* exited already, and the leader's parent ignores SIGCHLD,
				98	* then we are the one who should release the leader.
				99	*
				100	* do_notify_parent() will have marked it self-reaping in
				101	* that case.
				102	*/
				103	zap_leader = (leader->exit_signal == -1);
				104	}
				105
				106	sched_exit(p);
				107	write_unlock_irq(&tasklist_lock);
				108	spin_unlock(&p->proc_lock);
				109	proc_pid_flush(proc_dentry);
				110	release_thread(p);
				111	put_task_struct(p);
				112
				113	p = leader;
				114	if (unlikely(zap_leader))
				115	goto repeat;
				116	}
				117
				118	/* we are using it only for SMP init */
				119
				120	void unhash_process(struct task_struct *p)
				121	{
				122	struct dentry *proc_dentry;
				123
				124	spin_lock(&p->proc_lock);
				125	proc_dentry = proc_pid_unhash(p);
				126	write_lock_irq(&tasklist_lock);
				127	__unhash_process(p);
				128	write_unlock_irq(&tasklist_lock);
				129	spin_unlock(&p->proc_lock);
				130	proc_pid_flush(proc_dentry);
				131	}
				132
				133	/*
				134	* This checks not only the pgrp, but falls back on the pid if no
				135	* satisfactory pgrp is found. I dunno - gdb doesn't work correctly
				136	* without this...
				137	*/
				138	int session_of_pgrp(int pgrp)
				139	{
				140	struct task_struct *p;
				141	int sid = -1;
				142
				143	read_lock(&tasklist_lock);
				144	do_each_task_pid(pgrp, PIDTYPE_PGID, p) {
				145	if (p->signal->session > 0) {
				146	sid = p->signal->session;
				147	goto out;
				148	}
				149	} while_each_task_pid(pgrp, PIDTYPE_PGID, p);
				150	p = find_task_by_pid(pgrp);
				151	if (p)
				152	sid = p->signal->session;
				153	out:
				154	read_unlock(&tasklist_lock);
				155
				156	return sid;
				157	}
				158
				159	/*
				160	* Determine if a process group is "orphaned", according to the POSIX
				161	* definition in 2.2.2.52. Orphaned process groups are not to be affected
				162	* by terminal-generated stop signals. Newly orphaned process groups are
				163	* to receive a SIGHUP and a SIGCONT.
				164	*
				165	* "I ask you, have you ever known what it is to be an orphan?"
				166	*/
				167	static int will_become_orphaned_pgrp(int pgrp, task_t *ignored_task)
				168	{
				169	struct task_struct *p;
				170	int ret = 1;
				171
				172	do_each_task_pid(pgrp, PIDTYPE_PGID, p) {
				173	if (p == ignored_task
				174	\|\| p->exit_state
				175	\|\| p->real_parent->pid == 1)
				176	continue;
				177	if (process_group(p->real_parent) != pgrp
				178	&& p->real_parent->signal->session == p->signal->session) {
				179	ret = 0;
				180	break;
				181	}
				182	} while_each_task_pid(pgrp, PIDTYPE_PGID, p);
				183	return ret; /* (sighing) "Often!" */
				184	}
				185
				186	int is_orphaned_pgrp(int pgrp)
				187	{
				188	int retval;
				189
				190	read_lock(&tasklist_lock);
				191	retval = will_become_orphaned_pgrp(pgrp, NULL);
				192	read_unlock(&tasklist_lock);
				193
				194	return retval;
				195	}
				196
Arjan van de Ven	858119e	2006-01-14 13:20:43 -0800	[diff] [blame]	197	static int has_stopped_jobs(int pgrp)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	198	{
				199	int retval = 0;
				200	struct task_struct *p;
				201
				202	do_each_task_pid(pgrp, PIDTYPE_PGID, p) {
				203	if (p->state != TASK_STOPPED)
				204	continue;
				205
				206	/* If p is stopped by a debugger on a signal that won't
				207	stop it, then don't count p as stopped. This isn't
				208	perfect but it's a good approximation. */
				209	if (unlikely (p->ptrace)
				210	&& p->exit_code != SIGSTOP
				211	&& p->exit_code != SIGTSTP
				212	&& p->exit_code != SIGTTOU
				213	&& p->exit_code != SIGTTIN)
				214	continue;
				215
				216	retval = 1;
				217	break;
				218	} while_each_task_pid(pgrp, PIDTYPE_PGID, p);
				219	return retval;
				220	}
				221
				222	/**
Pavel Pisa	4dc3b16	2005-05-01 08:59:25 -0700	[diff] [blame]	223	* reparent_to_init - Reparent the calling kernel thread to the init task.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	224	*
				225	* If a kernel thread is launched as a result of a system call, or if
				226	* it ever exits, it should generally reparent itself to init so that
				227	* it is correctly cleaned up on exit.
				228	*
				229	* The various task state such as scheduling policy and priority may have
				230	* been inherited from a user process, so we reset them to sane values here.
				231	*
				232	* NOTE that reparent_to_init() gives the caller full capabilities.
				233	*/
Arjan van de Ven	858119e	2006-01-14 13:20:43 -0800	[diff] [blame]	234	static void reparent_to_init(void)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	235	{
				236	write_lock_irq(&tasklist_lock);
				237
				238	ptrace_unlink(current);
				239	/* Reparent to init */
				240	REMOVE_LINKS(current);
				241	current->parent = child_reaper;
				242	current->real_parent = child_reaper;
				243	SET_LINKS(current);
				244
				245	/* Set the exit signal to SIGCHLD so we signal init on exit */
				246	current->exit_signal = SIGCHLD;
				247
Ingo Molnar	b0a9499	2006-01-14 13:20:41 -0800	[diff] [blame]	248	if ((current->policy == SCHED_NORMAL \|\|
				249	current->policy == SCHED_BATCH)
				250	&& (task_nice(current) < 0))
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	251	set_user_nice(current, 0);
				252	/* cpus_allowed? */
				253	/* rt_priority? */
				254	/* signals? */
				255	security_task_reparent_to_init(current);
				256	memcpy(current->signal->rlim, init_task.signal->rlim,
				257	sizeof(current->signal->rlim));
				258	atomic_inc(&(INIT_USER->__count));
				259	write_unlock_irq(&tasklist_lock);
				260	switch_uid(INIT_USER);
				261	}
				262
				263	void __set_special_pids(pid_t session, pid_t pgrp)
				264	{
Oren Laadan	e19f247	2006-01-08 01:03:58 -0800	[diff] [blame]	265	struct task_struct *curr = current->group_leader;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	266
				267	if (curr->signal->session != session) {
				268	detach_pid(curr, PIDTYPE_SID);
				269	curr->signal->session = session;
				270	attach_pid(curr, PIDTYPE_SID, session);
				271	}
				272	if (process_group(curr) != pgrp) {
				273	detach_pid(curr, PIDTYPE_PGID);
				274	curr->signal->pgrp = pgrp;
				275	attach_pid(curr, PIDTYPE_PGID, pgrp);
				276	}
				277	}
				278
				279	void set_special_pids(pid_t session, pid_t pgrp)
				280	{
				281	write_lock_irq(&tasklist_lock);
				282	__set_special_pids(session, pgrp);
				283	write_unlock_irq(&tasklist_lock);
				284	}
				285
				286	/*
				287	* Let kernel threads use this to say that they
				288	* allow a certain signal (since daemonize() will
				289	* have disabled all of them by default).
				290	*/
				291	int allow_signal(int sig)
				292	{
Jesper Juhl	7ed20e1	2005-05-01 08:59:14 -0700	[diff] [blame]	293	if (!valid_signal(sig) \|\| sig < 1)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	294	return -EINVAL;
				295
				296	spin_lock_irq(&current->sighand->siglock);
				297	sigdelset(&current->blocked, sig);
				298	if (!current->mm) {
				299	/* Kernel threads handle their own signals.
				300	Let the signal code know it'll be handled, so
				301	that they don't get converted to SIGKILL or
				302	just silently dropped */
				303	current->sighand->action[(sig)-1].sa.sa_handler = (void __user *)2;
				304	}
				305	recalc_sigpending();
				306	spin_unlock_irq(&current->sighand->siglock);
				307	return 0;
				308	}
				309
				310	EXPORT_SYMBOL(allow_signal);
				311
				312	int disallow_signal(int sig)
				313	{
Jesper Juhl	7ed20e1	2005-05-01 08:59:14 -0700	[diff] [blame]	314	if (!valid_signal(sig) \|\| sig < 1)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	315	return -EINVAL;
				316
				317	spin_lock_irq(&current->sighand->siglock);
				318	sigaddset(&current->blocked, sig);
				319	recalc_sigpending();
				320	spin_unlock_irq(&current->sighand->siglock);
				321	return 0;
				322	}
				323
				324	EXPORT_SYMBOL(disallow_signal);
				325
				326	/*
				327	* Put all the gunge required to become a kernel thread without
				328	* attached user resources in one place where it belongs.
				329	*/
				330
				331	void daemonize(const char *name, ...)
				332	{
				333	va_list args;
				334	struct fs_struct *fs;
				335	sigset_t blocked;
				336
				337	va_start(args, name);
				338	vsnprintf(current->comm, sizeof(current->comm), name, args);
				339	va_end(args);
				340
				341	/*
				342	* If we were started as result of loading a module, close all of the
				343	* user space pages. We don't need them, and if we didn't close them
				344	* they would be locked into memory.
				345	*/
				346	exit_mm(current);
				347
				348	set_special_pids(1, 1);
Ingo Molnar	70522e1	2006-03-23 03:00:31 -0800	[diff] [blame]	349	mutex_lock(&tty_mutex);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	350	current->signal->tty = NULL;
Ingo Molnar	70522e1	2006-03-23 03:00:31 -0800	[diff] [blame]	351	mutex_unlock(&tty_mutex);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	352
				353	/* Block and flush all signals */
				354	sigfillset(&blocked);
				355	sigprocmask(SIG_BLOCK, &blocked, NULL);
				356	flush_signals(current);
				357
				358	/* Become as one with the init task */
				359
				360	exit_fs(current); /* current->fs->count--; */
				361	fs = init_task.fs;
				362	current->fs = fs;
				363	atomic_inc(&fs->count);
Björn Steinbrink	5914811	2006-02-18 18:12:43 +0100	[diff] [blame]	364	exit_namespace(current);
				365	current->namespace = init_task.namespace;
				366	get_namespace(current->namespace);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	367	exit_files(current);
				368	current->files = init_task.files;
				369	atomic_inc(&current->files->count);
				370
				371	reparent_to_init();
				372	}
				373
				374	EXPORT_SYMBOL(daemonize);
				375
Arjan van de Ven	858119e	2006-01-14 13:20:43 -0800	[diff] [blame]	376	static void close_files(struct files_struct * files)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	377	{
				378	int i, j;
Dipankar Sarma	badf166	2005-09-09 13:04:10 -0700	[diff] [blame]	379	struct fdtable *fdt;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	380
				381	j = 0;
Dipankar Sarma	4fb3a53	2005-09-16 19:28:13 -0700	[diff] [blame]	382
				383	/*
				384	* It is safe to dereference the fd table without RCU or
				385	* ->file_lock because this is the last reference to the
				386	* files structure.
				387	*/
Dipankar Sarma	badf166	2005-09-09 13:04:10 -0700	[diff] [blame]	388	fdt = files_fdtable(files);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	389	for (;;) {
				390	unsigned long set;
				391	i = j * __NFDBITS;
Dipankar Sarma	badf166	2005-09-09 13:04:10 -0700	[diff] [blame]	392	if (i >= fdt->max_fdset \|\| i >= fdt->max_fds)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	393	break;
Dipankar Sarma	badf166	2005-09-09 13:04:10 -0700	[diff] [blame]	394	set = fdt->open_fds->fds_bits[j++];
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	395	while (set) {
				396	if (set & 1) {
Dipankar Sarma	badf166	2005-09-09 13:04:10 -0700	[diff] [blame]	397	struct file * file = xchg(&fdt->fd[i], NULL);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	398	if (file)
				399	filp_close(file, files);
				400	}
				401	i++;
				402	set >>= 1;
				403	}
				404	}
				405	}
				406
				407	struct files_struct get_files_struct(struct task_struct task)
				408	{
				409	struct files_struct *files;
				410
				411	task_lock(task);
				412	files = task->files;
				413	if (files)
				414	atomic_inc(&files->count);
				415	task_unlock(task);
				416
				417	return files;
				418	}
				419
				420	void fastcall put_files_struct(struct files_struct *files)
				421	{
Dipankar Sarma	badf166	2005-09-09 13:04:10 -0700	[diff] [blame]	422	struct fdtable *fdt;
				423
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	424	if (atomic_dec_and_test(&files->count)) {
				425	close_files(files);
				426	/*
				427	* Free the fd and fdset arrays if we expanded them.
Dipankar Sarma	ab2af1f	2005-09-09 13:04:13 -0700	[diff] [blame]	428	* If the fdtable was embedded, pass files for freeing
				429	* at the end of the RCU grace period. Otherwise,
				430	* you can free files immediately.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	431	*/
Dipankar Sarma	badf166	2005-09-09 13:04:10 -0700	[diff] [blame]	432	fdt = files_fdtable(files);
Dipankar Sarma	ab2af1f	2005-09-09 13:04:13 -0700	[diff] [blame]	433	if (fdt == &files->fdtab)
				434	fdt->free_files = files;
				435	else
				436	kmem_cache_free(files_cachep, files);
				437	free_fdtable(fdt);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	438	}
				439	}
				440
				441	EXPORT_SYMBOL(put_files_struct);
				442
				443	static inline void __exit_files(struct task_struct *tsk)
				444	{
				445	struct files_struct * files = tsk->files;
				446
				447	if (files) {
				448	task_lock(tsk);
				449	tsk->files = NULL;
				450	task_unlock(tsk);
				451	put_files_struct(files);
				452	}
				453	}
				454
				455	void exit_files(struct task_struct *tsk)
				456	{
				457	__exit_files(tsk);
				458	}
				459
				460	static inline void __put_fs_struct(struct fs_struct *fs)
				461	{
				462	/* No need to hold fs->lock if we are killing it */
				463	if (atomic_dec_and_test(&fs->count)) {
				464	dput(fs->root);
				465	mntput(fs->rootmnt);
				466	dput(fs->pwd);
				467	mntput(fs->pwdmnt);
				468	if (fs->altroot) {
				469	dput(fs->altroot);
				470	mntput(fs->altrootmnt);
				471	}
				472	kmem_cache_free(fs_cachep, fs);
				473	}
				474	}
				475
				476	void put_fs_struct(struct fs_struct *fs)
				477	{
				478	__put_fs_struct(fs);
				479	}
				480
				481	static inline void __exit_fs(struct task_struct *tsk)
				482	{
				483	struct fs_struct * fs = tsk->fs;
				484
				485	if (fs) {
				486	task_lock(tsk);
				487	tsk->fs = NULL;
				488	task_unlock(tsk);
				489	__put_fs_struct(fs);
				490	}
				491	}
				492
				493	void exit_fs(struct task_struct *tsk)
				494	{
				495	__exit_fs(tsk);
				496	}
				497
				498	EXPORT_SYMBOL_GPL(exit_fs);
				499
				500	/*
				501	* Turn us into a lazy TLB process if we
				502	* aren't already..
				503	*/
Adrian Bunk	408b664	2005-05-01 08:59:29 -0700	[diff] [blame]	504	static void exit_mm(struct task_struct * tsk)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	505	{
				506	struct mm_struct *mm = tsk->mm;
				507
				508	mm_release(tsk, mm);
				509	if (!mm)
				510	return;
				511	/*
				512	* Serialize with any possible pending coredump.
				513	* We must hold mmap_sem around checking core_waiters
				514	* and clearing tsk->mm. The core-inducing thread
				515	* will increment core_waiters for each thread in the
				516	* group with ->mm != NULL.
				517	*/
				518	down_read(&mm->mmap_sem);
				519	if (mm->core_waiters) {
				520	up_read(&mm->mmap_sem);
				521	down_write(&mm->mmap_sem);
				522	if (!--mm->core_waiters)
				523	complete(mm->core_startup_done);
				524	up_write(&mm->mmap_sem);
				525
				526	wait_for_completion(&mm->core_done);
				527	down_read(&mm->mmap_sem);
				528	}
				529	atomic_inc(&mm->mm_count);
				530	if (mm != tsk->active_mm) BUG();
				531	/* more a memory barrier than a real lock */
				532	task_lock(tsk);
				533	tsk->mm = NULL;
				534	up_read(&mm->mmap_sem);
				535	enter_lazy_tlb(mm, current);
				536	task_unlock(tsk);
				537	mmput(mm);
				538	}
				539
				540	static inline void choose_new_parent(task_t p, task_t reaper, task_t *child_reaper)
				541	{
				542	/*
				543	* Make sure we're not reparenting to ourselves and that
				544	* the parent is not a zombie.
				545	*/
				546	BUG_ON(p == reaper \|\| reaper->exit_state >= EXIT_ZOMBIE);
				547	p->real_parent = reaper;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	548	}
				549
Arjan van de Ven	858119e	2006-01-14 13:20:43 -0800	[diff] [blame]	550	static void reparent_thread(task_t p, task_t father, int traced)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	551	{
				552	/* We don't want people slaying init. */
				553	if (p->exit_signal != -1)
				554	p->exit_signal = SIGCHLD;
				555
				556	if (p->pdeath_signal)
				557	/* We already hold the tasklist_lock here. */
Oleg Nesterov	b67a1b9	2005-10-30 15:03:44 -0800	[diff] [blame]	558	group_send_sig_info(p->pdeath_signal, SEND_SIG_NOINFO, p);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	559
				560	/* Move the child from its dying parent to the new one. */
				561	if (unlikely(traced)) {
				562	/* Preserve ptrace links if someone else is tracing this child. */
				563	list_del_init(&p->ptrace_list);
				564	if (p->parent != p->real_parent)
				565	list_add(&p->ptrace_list, &p->real_parent->ptrace_children);
				566	} else {
				567	/* If this child is being traced, then we're the one tracing it
				568	* anyway, so let go of it.
				569	*/
				570	p->ptrace = 0;
				571	list_del_init(&p->sibling);
				572	p->parent = p->real_parent;
				573	list_add_tail(&p->sibling, &p->parent->children);
				574
				575	/* If we'd notified the old parent about this child's death,
				576	* also notify the new parent.
				577	*/
				578	if (p->exit_state == EXIT_ZOMBIE && p->exit_signal != -1 &&
				579	thread_group_empty(p))
				580	do_notify_parent(p, p->exit_signal);
				581	else if (p->state == TASK_TRACED) {
				582	/*
				583	* If it was at a trace stop, turn it into
				584	* a normal stop since it's no longer being
				585	* traced.
				586	*/
				587	ptrace_untrace(p);
				588	}
				589	}
				590
				591	/*
				592	* process group orphan check
				593	* Case ii: Our child is in a different pgrp
				594	* than we are, and it was the only connection
				595	* outside, so the child pgrp is now orphaned.
				596	*/
				597	if ((process_group(p) != process_group(father)) &&
				598	(p->signal->session == father->signal->session)) {
				599	int pgrp = process_group(p);
				600
				601	if (will_become_orphaned_pgrp(pgrp, NULL) && has_stopped_jobs(pgrp)) {
Oleg Nesterov	b67a1b9	2005-10-30 15:03:44 -0800	[diff] [blame]	602	__kill_pg_info(SIGHUP, SEND_SIG_PRIV, pgrp);
				603	__kill_pg_info(SIGCONT, SEND_SIG_PRIV, pgrp);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	604	}
				605	}
				606	}
				607
				608	/*
				609	* When we die, we re-parent all our children.
				610	* Try to give them to another thread in our thread
				611	* group, and if no such member exists, give it to
				612	* the global child reaper process (ie "init")
				613	*/
Arjan van de Ven	858119e	2006-01-14 13:20:43 -0800	[diff] [blame]	614	static void forget_original_parent(struct task_struct * father,
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	615	struct list_head *to_release)
				616	{
				617	struct task_struct p, reaper = father;
				618	struct list_head _p, _n;
				619
				620	do {
				621	reaper = next_thread(reaper);
				622	if (reaper == father) {
				623	reaper = child_reaper;
				624	break;
				625	}
				626	} while (reaper->exit_state);
				627
				628	/*
				629	* There are only two places where our children can be:
				630	*
				631	* - in our child list
				632	* - in our ptraced child list
				633	*
				634	* Search them and reparent children.
				635	*/
				636	list_for_each_safe(_p, _n, &father->children) {
				637	int ptrace;
				638	p = list_entry(_p,struct task_struct,sibling);
				639
				640	ptrace = p->ptrace;
				641
				642	/* if father isn't the real parent, then ptrace must be enabled */
				643	BUG_ON(father != p->real_parent && !ptrace);
				644
				645	if (father == p->real_parent) {
				646	/* reparent with a reaper, real father it's us */
				647	choose_new_parent(p, reaper, child_reaper);
				648	reparent_thread(p, father, 0);
				649	} else {
				650	/* reparent ptraced task to its real parent */
				651	__ptrace_unlink (p);
				652	if (p->exit_state == EXIT_ZOMBIE && p->exit_signal != -1 &&
				653	thread_group_empty(p))
				654	do_notify_parent(p, p->exit_signal);
				655	}
				656
				657	/*
				658	* if the ptraced child is a zombie with exit_signal == -1
				659	* we must collect it before we exit, or it will remain
				660	* zombie forever since we prevented it from self-reap itself
				661	* while it was being traced by us, to be able to see it in wait4.
				662	*/
				663	if (unlikely(ptrace && p->exit_state == EXIT_ZOMBIE && p->exit_signal == -1))
				664	list_add(&p->ptrace_list, to_release);
				665	}
				666	list_for_each_safe(_p, _n, &father->ptrace_children) {
				667	p = list_entry(_p,struct task_struct,ptrace_list);
				668	choose_new_parent(p, reaper, child_reaper);
				669	reparent_thread(p, father, 1);
				670	}
				671	}
				672
				673	/*
				674	* Send signals to all our closest relatives so that they know
				675	* to properly mourn us..
				676	*/
				677	static void exit_notify(struct task_struct *tsk)
				678	{
				679	int state;
				680	struct task_struct *t;
				681	struct list_head ptrace_dead, _p, _n;
				682
				683	if (signal_pending(tsk) && !(tsk->signal->flags & SIGNAL_GROUP_EXIT)
				684	&& !thread_group_empty(tsk)) {
				685	/*
				686	* This occurs when there was a race between our exit
				687	* syscall and a group signal choosing us as the one to
				688	* wake up. It could be that we are the only thread
				689	* alerted to check for pending signals, but another thread
				690	* should be woken now to take the signal since we will not.
				691	* Now we'll wake all the threads in the group just to make
				692	* sure someone gets all the pending signals.
				693	*/
				694	read_lock(&tasklist_lock);
				695	spin_lock_irq(&tsk->sighand->siglock);
				696	for (t = next_thread(tsk); t != tsk; t = next_thread(t))
				697	if (!signal_pending(t) && !(t->flags & PF_EXITING)) {
				698	recalc_sigpending_tsk(t);
				699	if (signal_pending(t))
				700	signal_wake_up(t, 0);
				701	}
				702	spin_unlock_irq(&tsk->sighand->siglock);
				703	read_unlock(&tasklist_lock);
				704	}
				705
				706	write_lock_irq(&tasklist_lock);
				707
				708	/*
				709	* This does two things:
				710	*
				711	* A. Make init inherit all the child processes
				712	* B. Check to see if any process groups have become orphaned
				713	* as a result of our exiting, and if they have any stopped
				714	* jobs, send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2)
				715	*/
				716
				717	INIT_LIST_HEAD(&ptrace_dead);
				718	forget_original_parent(tsk, &ptrace_dead);
				719	BUG_ON(!list_empty(&tsk->children));
				720	BUG_ON(!list_empty(&tsk->ptrace_children));
				721
				722	/*
				723	* Check to see if any process groups have become orphaned
				724	* as a result of our exiting, and if they have any stopped
				725	* jobs, send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2)
				726	*
				727	* Case i: Our father is in a different pgrp than we are
				728	* and we were the only connection outside, so our pgrp
				729	* is about to become orphaned.
				730	*/
				731
				732	t = tsk->real_parent;
				733
				734	if ((process_group(t) != process_group(tsk)) &&
				735	(t->signal->session == tsk->signal->session) &&
				736	will_become_orphaned_pgrp(process_group(tsk), tsk) &&
				737	has_stopped_jobs(process_group(tsk))) {
Oleg Nesterov	b67a1b9	2005-10-30 15:03:44 -0800	[diff] [blame]	738	__kill_pg_info(SIGHUP, SEND_SIG_PRIV, process_group(tsk));
				739	__kill_pg_info(SIGCONT, SEND_SIG_PRIV, process_group(tsk));
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	740	}
				741
				742	/* Let father know we died
				743	*
				744	* Thread signals are configurable, but you aren't going to use
				745	* that to send signals to arbitary processes.
				746	* That stops right now.
				747	*
				748	* If the parent exec id doesn't match the exec id we saved
				749	* when we started then we know the parent has changed security
				750	* domain.
				751	*
				752	* If our self_exec id doesn't match our parent_exec_id then
				753	* we have changed execution domain as these two values started
				754	* the same after a fork.
				755	*
				756	*/
				757
				758	if (tsk->exit_signal != SIGCHLD && tsk->exit_signal != -1 &&
				759	( tsk->parent_exec_id != t->self_exec_id \|\|
				760	tsk->self_exec_id != tsk->parent_exec_id)
				761	&& !capable(CAP_KILL))
				762	tsk->exit_signal = SIGCHLD;
				763
				764
				765	/* If something other than our normal parent is ptracing us, then
				766	* send it a SIGCHLD instead of honoring exit_signal. exit_signal
				767	* only has special meaning to our real parent.
				768	*/
				769	if (tsk->exit_signal != -1 && thread_group_empty(tsk)) {
				770	int signal = tsk->parent == tsk->real_parent ? tsk->exit_signal : SIGCHLD;
				771	do_notify_parent(tsk, signal);
				772	} else if (tsk->ptrace) {
				773	do_notify_parent(tsk, SIGCHLD);
				774	}
				775
				776	state = EXIT_ZOMBIE;
				777	if (tsk->exit_signal == -1 &&
				778	(likely(tsk->ptrace == 0) \|\|
				779	unlikely(tsk->parent->signal->flags & SIGNAL_GROUP_EXIT)))
				780	state = EXIT_DEAD;
				781	tsk->exit_state = state;
				782
				783	write_unlock_irq(&tasklist_lock);
				784
				785	list_for_each_safe(_p, _n, &ptrace_dead) {
				786	list_del_init(_p);
				787	t = list_entry(_p,struct task_struct,ptrace_list);
				788	release_task(t);
				789	}
				790
				791	/* If the process is dead, release it - nobody will wait for it */
				792	if (state == EXIT_DEAD)
				793	release_task(tsk);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	794	}
				795
				796	fastcall NORET_TYPE void do_exit(long code)
				797	{
				798	struct task_struct *tsk = current;
				799	int group_dead;
				800
				801	profile_task_exit(tsk);
				802
Jens Axboe	22e2c50	2005-06-27 10:55:12 +0200	[diff] [blame]	803	WARN_ON(atomic_read(&tsk->fs_excl));
				804
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	805	if (unlikely(in_interrupt()))
				806	panic("Aiee, killing interrupt handler!");
				807	if (unlikely(!tsk->pid))
				808	panic("Attempted to kill the idle task!");
				809	if (unlikely(tsk->pid == 1))
				810	panic("Attempted to kill init!");
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	811
				812	if (unlikely(current->ptrace & PT_TRACE_EXIT)) {
				813	current->ptrace_message = code;
				814	ptrace_notify((PTRACE_EVENT_EXIT << 8) \| SIGTRAP);
				815	}
				816
Alexander Nyberg	df164db	2005-06-23 00:09:13 -0700	[diff] [blame]	817	/*
				818	* We're taking recursive faults here in do_exit. Safest is to just
				819	* leave this task alone and wait for reboot.
				820	*/
				821	if (unlikely(tsk->flags & PF_EXITING)) {
				822	printk(KERN_ALERT
				823	"Fixing recursive fault but reboot is needed!\n");
Al Viro	afc847b	2006-02-28 12:51:55 -0500	[diff] [blame]	824	if (tsk->io_context)
				825	exit_io_context();
Alexander Nyberg	df164db	2005-06-23 00:09:13 -0700	[diff] [blame]	826	set_current_state(TASK_UNINTERRUPTIBLE);
				827	schedule();
				828	}
				829
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	830	tsk->flags \|= PF_EXITING;
				831
Linus Torvalds	a362f46	2005-10-27 09:07:33 -0700	[diff] [blame]	832	/*
				833	* Make sure we don't try to process any timer firings
				834	* while we are already exiting.
				835	*/
				836	tsk->it_virt_expires = cputime_zero;
				837	tsk->it_prof_expires = cputime_zero;
				838	tsk->it_sched_expires = 0;
				839
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	840	if (unlikely(in_atomic()))
				841	printk(KERN_INFO "note: %s[%d] exited with preempt_count %d\n",
				842	current->comm, current->pid,
				843	preempt_count());
				844
				845	acct_update_integrals(tsk);
Hugh Dickins	365e9c87	2005-10-29 18:16:18 -0700	[diff] [blame]	846	if (tsk->mm) {
				847	update_hiwater_rss(tsk->mm);
				848	update_hiwater_vm(tsk->mm);
				849	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	850	group_dead = atomic_dec_and_test(&tsk->signal->live);
Andrew Morton	c306895	2005-08-04 16:49:32 -0700	[diff] [blame]	851	if (group_dead) {
Thomas Gleixner	2ff678b	2006-01-09 20:52:34 -0800	[diff] [blame]	852	hrtimer_cancel(&tsk->signal->real_timer);
Roland McGrath	25f407f	2005-10-21 15:03:29 -0700	[diff] [blame]	853	exit_itimers(tsk->signal);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	854	acct_process(code);
Andrew Morton	c306895	2005-08-04 16:49:32 -0700	[diff] [blame]	855	}
Ingo Molnar	0771dfe	2006-03-27 01:16:22 -0800	[diff] [blame^]	856	if (unlikely(tsk->robust_list))
				857	exit_robust_list(tsk);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	858	exit_mm(tsk);
				859
				860	exit_sem(tsk);
				861	__exit_files(tsk);
				862	__exit_fs(tsk);
				863	exit_namespace(tsk);
				864	exit_thread();
				865	cpuset_exit(tsk);
				866	exit_keys(tsk);
				867
				868	if (group_dead && tsk->signal->leader)
				869	disassociate_ctty(1);
				870
Al Viro	a1261f54	2005-11-13 16:06:55 -0800	[diff] [blame]	871	module_put(task_thread_info(tsk)->exec_domain->module);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	872	if (tsk->binfmt)
				873	module_put(tsk->binfmt->module);
				874
				875	tsk->exit_code = code;
Matt Helsley	9f46080	2005-11-07 00:59:16 -0800	[diff] [blame]	876	proc_exit_connector(tsk);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	877	exit_notify(tsk);
				878	#ifdef CONFIG_NUMA
				879	mpol_free(tsk->mempolicy);
				880	tsk->mempolicy = NULL;
				881	#endif
Ingo Molnar	de5097c	2006-01-09 15:59:21 -0800	[diff] [blame]	882	/*
				883	* If DEBUG_MUTEXES is on, make sure we are holding no locks:
				884	*/
				885	mutex_debug_check_no_locks_held(tsk);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	886
Al Viro	afc847b	2006-02-28 12:51:55 -0500	[diff] [blame]	887	if (tsk->io_context)
				888	exit_io_context();
				889
Coywolf Qi Hunt	7407251	2005-10-30 15:02:47 -0800	[diff] [blame]	890	/* PF_DEAD causes final put_task_struct after we schedule. */
				891	preempt_disable();
				892	BUG_ON(tsk->flags & PF_DEAD);
				893	tsk->flags \|= PF_DEAD;
				894
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	895	schedule();
				896	BUG();
				897	/* Avoid "noreturn function does return". */
				898	for (;;) ;
				899	}
				900
Russ Anderson	012914d	2005-04-23 00:08:00 -0700	[diff] [blame]	901	EXPORT_SYMBOL_GPL(do_exit);
				902
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	903	NORET_TYPE void complete_and_exit(struct completion *comp, long code)
				904	{
				905	if (comp)
				906	complete(comp);
				907
				908	do_exit(code);
				909	}
				910
				911	EXPORT_SYMBOL(complete_and_exit);
				912
				913	asmlinkage long sys_exit(int error_code)
				914	{
				915	do_exit((error_code&0xff)<<8);
				916	}
				917
				918	task_t fastcall next_thread(const task_t p)
				919	{
				920	return pid_task(p->pids[PIDTYPE_TGID].pid_list.next, PIDTYPE_TGID);
				921	}
				922
				923	EXPORT_SYMBOL(next_thread);
				924
				925	/*
				926	* Take down every thread in the group. This is called by fatal signals
				927	* as well as by sys_exit_group (below).
				928	*/
				929	NORET_TYPE void
				930	do_group_exit(int exit_code)
				931	{
				932	BUG_ON(exit_code & 0x80); /* core dumps don't get here */
				933
				934	if (current->signal->flags & SIGNAL_GROUP_EXIT)
				935	exit_code = current->signal->group_exit_code;
				936	else if (!thread_group_empty(current)) {
				937	struct signal_struct *const sig = current->signal;
				938	struct sighand_struct *const sighand = current->sighand;
				939	read_lock(&tasklist_lock);
				940	spin_lock_irq(&sighand->siglock);
				941	if (sig->flags & SIGNAL_GROUP_EXIT)
				942	/* Another thread got here before we took the lock. */
				943	exit_code = sig->group_exit_code;
				944	else {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	945	sig->group_exit_code = exit_code;
				946	zap_other_threads(current);
				947	}
				948	spin_unlock_irq(&sighand->siglock);
				949	read_unlock(&tasklist_lock);
				950	}
				951
				952	do_exit(exit_code);
				953	/* NOTREACHED */
				954	}
				955
				956	/*
				957	* this kills every thread in the thread group. Note that any externally
				958	* wait4()-ing process will get the correct exit code - even if this
				959	* thread is not the thread group leader.
				960	*/
				961	asmlinkage void sys_exit_group(int error_code)
				962	{
				963	do_group_exit((error_code & 0xff) << 8);
				964	}
				965
				966	static int eligible_child(pid_t pid, int options, task_t *p)
				967	{
				968	if (pid > 0) {
				969	if (p->pid != pid)
				970	return 0;
				971	} else if (!pid) {
				972	if (process_group(p) != process_group(current))
				973	return 0;
				974	} else if (pid != -1) {
				975	if (process_group(p) != -pid)
				976	return 0;
				977	}
				978
				979	/*
				980	* Do not consider detached threads that are
				981	* not ptraced:
				982	*/
				983	if (p->exit_signal == -1 && !p->ptrace)
				984	return 0;
				985
				986	/* Wait for all children (clone and not) if __WALL is set;
				987	* otherwise, wait for clone children only if __WCLONE is
				988	* set; otherwise, wait for non-clone children only. (Note:
				989	* A "clone" child here is one that reports to its parent
				990	* using a signal other than SIGCHLD.) */
				991	if (((p->exit_signal != SIGCHLD) ^ ((options & __WCLONE) != 0))
				992	&& !(options & __WALL))
				993	return 0;
				994	/*
				995	* Do not consider thread group leaders that are
				996	* in a non-empty thread group:
				997	*/
				998	if (current->tgid != p->tgid && delay_group_leader(p))
				999	return 2;
				1000
				1001	if (security_task_wait(p))
				1002	return 0;
				1003
				1004	return 1;
				1005	}
				1006
				1007	static int wait_noreap_copyout(task_t *p, pid_t pid, uid_t uid,
				1008	int why, int status,
				1009	struct siginfo __user *infop,
				1010	struct rusage __user *rusagep)
				1011	{
				1012	int retval = rusagep ? getrusage(p, RUSAGE_BOTH, rusagep) : 0;
				1013	put_task_struct(p);
				1014	if (!retval)
				1015	retval = put_user(SIGCHLD, &infop->si_signo);
				1016	if (!retval)
				1017	retval = put_user(0, &infop->si_errno);
				1018	if (!retval)
				1019	retval = put_user((short)why, &infop->si_code);
				1020	if (!retval)
				1021	retval = put_user(pid, &infop->si_pid);
				1022	if (!retval)
				1023	retval = put_user(uid, &infop->si_uid);
				1024	if (!retval)
				1025	retval = put_user(status, &infop->si_status);
				1026	if (!retval)
				1027	retval = pid;
				1028	return retval;
				1029	}
				1030
				1031	/*
				1032	* Handle sys_wait4 work for one task in state EXIT_ZOMBIE. We hold
				1033	* read_lock(&tasklist_lock) on entry. If we return zero, we still hold
				1034	* the lock and this task is uninteresting. If we return nonzero, we have
				1035	* released the lock and the system call should return.
				1036	*/
				1037	static int wait_task_zombie(task_t *p, int noreap,
				1038	struct siginfo __user *infop,
				1039	int __user stat_addr, struct rusage __user ru)
				1040	{
				1041	unsigned long state;
				1042	int retval;
				1043	int status;
				1044
				1045	if (unlikely(noreap)) {
				1046	pid_t pid = p->pid;
				1047	uid_t uid = p->uid;
				1048	int exit_code = p->exit_code;
				1049	int why, status;
				1050
				1051	if (unlikely(p->exit_state != EXIT_ZOMBIE))
				1052	return 0;
				1053	if (unlikely(p->exit_signal == -1 && p->ptrace == 0))
				1054	return 0;
				1055	get_task_struct(p);
				1056	read_unlock(&tasklist_lock);
				1057	if ((exit_code & 0x7f) == 0) {
				1058	why = CLD_EXITED;
				1059	status = exit_code >> 8;
				1060	} else {
				1061	why = (exit_code & 0x80) ? CLD_DUMPED : CLD_KILLED;
				1062	status = exit_code & 0x7f;
				1063	}
				1064	return wait_noreap_copyout(p, pid, uid, why,
				1065	status, infop, ru);
				1066	}
				1067
				1068	/*
				1069	* Try to move the task's state to DEAD
				1070	* only one thread is allowed to do this:
				1071	*/
				1072	state = xchg(&p->exit_state, EXIT_DEAD);
				1073	if (state != EXIT_ZOMBIE) {
				1074	BUG_ON(state != EXIT_DEAD);
				1075	return 0;
				1076	}
				1077	if (unlikely(p->exit_signal == -1 && p->ptrace == 0)) {
				1078	/*
				1079	* This can only happen in a race with a ptraced thread
				1080	* dying on another processor.
				1081	*/
				1082	return 0;
				1083	}
				1084
				1085	if (likely(p->real_parent == p->parent) && likely(p->signal)) {
Jesper Juhl	3795e16	2006-01-09 20:54:39 -0800	[diff] [blame]	1086	struct signal_struct *psig;
				1087	struct signal_struct *sig;
				1088
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1089	/*
				1090	* The resource counters for the group leader are in its
				1091	* own task_struct. Those for dead threads in the group
				1092	* are in its signal_struct, as are those for the child
				1093	* processes it has previously reaped. All these
				1094	* accumulate in the parent's signal_struct c* fields.
				1095	*
				1096	* We don't bother to take a lock here to protect these
				1097	* p->signal fields, because they are only touched by
				1098	* __exit_signal, which runs with tasklist_lock
				1099	* write-locked anyway, and so is excluded here. We do
				1100	* need to protect the access to p->parent->signal fields,
				1101	* as other threads in the parent group can be right
				1102	* here reaping other children at the same time.
				1103	*/
				1104	spin_lock_irq(&p->parent->sighand->siglock);
Jesper Juhl	3795e16	2006-01-09 20:54:39 -0800	[diff] [blame]	1105	psig = p->parent->signal;
				1106	sig = p->signal;
				1107	psig->cutime =
				1108	cputime_add(psig->cutime,
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1109	cputime_add(p->utime,
Jesper Juhl	3795e16	2006-01-09 20:54:39 -0800	[diff] [blame]	1110	cputime_add(sig->utime,
				1111	sig->cutime)));
				1112	psig->cstime =
				1113	cputime_add(psig->cstime,
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1114	cputime_add(p->stime,
Jesper Juhl	3795e16	2006-01-09 20:54:39 -0800	[diff] [blame]	1115	cputime_add(sig->stime,
				1116	sig->cstime)));
				1117	psig->cmin_flt +=
				1118	p->min_flt + sig->min_flt + sig->cmin_flt;
				1119	psig->cmaj_flt +=
				1120	p->maj_flt + sig->maj_flt + sig->cmaj_flt;
				1121	psig->cnvcsw +=
				1122	p->nvcsw + sig->nvcsw + sig->cnvcsw;
				1123	psig->cnivcsw +=
				1124	p->nivcsw + sig->nivcsw + sig->cnivcsw;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1125	spin_unlock_irq(&p->parent->sighand->siglock);
				1126	}
				1127
				1128	/*
				1129	* Now we are sure this task is interesting, and no other
				1130	* thread can reap it because we set its state to EXIT_DEAD.
				1131	*/
				1132	read_unlock(&tasklist_lock);
				1133
				1134	retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0;
				1135	status = (p->signal->flags & SIGNAL_GROUP_EXIT)
				1136	? p->signal->group_exit_code : p->exit_code;
				1137	if (!retval && stat_addr)
				1138	retval = put_user(status, stat_addr);
				1139	if (!retval && infop)
				1140	retval = put_user(SIGCHLD, &infop->si_signo);
				1141	if (!retval && infop)
				1142	retval = put_user(0, &infop->si_errno);
				1143	if (!retval && infop) {
				1144	int why;
				1145
				1146	if ((status & 0x7f) == 0) {
				1147	why = CLD_EXITED;
				1148	status >>= 8;
				1149	} else {
				1150	why = (status & 0x80) ? CLD_DUMPED : CLD_KILLED;
				1151	status &= 0x7f;
				1152	}
				1153	retval = put_user((short)why, &infop->si_code);
				1154	if (!retval)
				1155	retval = put_user(status, &infop->si_status);
				1156	}
				1157	if (!retval && infop)
				1158	retval = put_user(p->pid, &infop->si_pid);
				1159	if (!retval && infop)
				1160	retval = put_user(p->uid, &infop->si_uid);
				1161	if (retval) {
				1162	// TODO: is this safe?
				1163	p->exit_state = EXIT_ZOMBIE;
				1164	return retval;
				1165	}
				1166	retval = p->pid;
				1167	if (p->real_parent != p->parent) {
				1168	write_lock_irq(&tasklist_lock);
				1169	/* Double-check with lock held. */
				1170	if (p->real_parent != p->parent) {
				1171	__ptrace_unlink(p);
				1172	// TODO: is this safe?
				1173	p->exit_state = EXIT_ZOMBIE;
				1174	/*
				1175	* If this is not a detached task, notify the parent.
				1176	* If it's still not detached after that, don't release
				1177	* it now.
				1178	*/
				1179	if (p->exit_signal != -1) {
				1180	do_notify_parent(p, p->exit_signal);
				1181	if (p->exit_signal != -1)
				1182	p = NULL;
				1183	}
				1184	}
				1185	write_unlock_irq(&tasklist_lock);
				1186	}
				1187	if (p != NULL)
				1188	release_task(p);
				1189	BUG_ON(!retval);
				1190	return retval;
				1191	}
				1192
				1193	/*
				1194	* Handle sys_wait4 work for one task in state TASK_STOPPED. We hold
				1195	* read_lock(&tasklist_lock) on entry. If we return zero, we still hold
				1196	* the lock and this task is uninteresting. If we return nonzero, we have
				1197	* released the lock and the system call should return.
				1198	*/
				1199	static int wait_task_stopped(task_t *p, int delayed_group_leader, int noreap,
				1200	struct siginfo __user *infop,
				1201	int __user stat_addr, struct rusage __user ru)
				1202	{
				1203	int retval, exit_code;
				1204
				1205	if (!p->exit_code)
				1206	return 0;
				1207	if (delayed_group_leader && !(p->ptrace & PT_PTRACED) &&
				1208	p->signal && p->signal->group_stop_count > 0)
				1209	/*
				1210	* A group stop is in progress and this is the group leader.
				1211	* We won't report until all threads have stopped.
				1212	*/
				1213	return 0;
				1214
				1215	/*
				1216	* Now we are pretty sure this task is interesting.
				1217	* Make sure it doesn't get reaped out from under us while we
				1218	* give up the lock and then examine it below. We don't want to
				1219	* keep holding onto the tasklist_lock while we call getrusage and
				1220	* possibly take page faults for user memory.
				1221	*/
				1222	get_task_struct(p);
				1223	read_unlock(&tasklist_lock);
				1224
				1225	if (unlikely(noreap)) {
				1226	pid_t pid = p->pid;
				1227	uid_t uid = p->uid;
				1228	int why = (p->ptrace & PT_PTRACED) ? CLD_TRAPPED : CLD_STOPPED;
				1229
				1230	exit_code = p->exit_code;
				1231	if (unlikely(!exit_code) \|\|
Linus Torvalds	14bf01b	2005-10-01 11:04:18 -0700	[diff] [blame]	1232	unlikely(p->state & TASK_TRACED))
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1233	goto bail_ref;
				1234	return wait_noreap_copyout(p, pid, uid,
				1235	why, (exit_code << 8) \| 0x7f,
				1236	infop, ru);
				1237	}
				1238
				1239	write_lock_irq(&tasklist_lock);
				1240
				1241	/*
				1242	* This uses xchg to be atomic with the thread resuming and setting
				1243	* it. It must also be done with the write lock held to prevent a
				1244	* race with the EXIT_ZOMBIE case.
				1245	*/
				1246	exit_code = xchg(&p->exit_code, 0);
				1247	if (unlikely(p->exit_state)) {
				1248	/*
				1249	* The task resumed and then died. Let the next iteration
				1250	* catch it in EXIT_ZOMBIE. Note that exit_code might
				1251	* already be zero here if it resumed and did _exit(0).
				1252	* The task itself is dead and won't touch exit_code again;
				1253	* other processors in this function are locked out.
				1254	*/
				1255	p->exit_code = exit_code;
				1256	exit_code = 0;
				1257	}
				1258	if (unlikely(exit_code == 0)) {
				1259	/*
				1260	* Another thread in this function got to it first, or it
				1261	* resumed, or it resumed and then died.
				1262	*/
				1263	write_unlock_irq(&tasklist_lock);
				1264	bail_ref:
				1265	put_task_struct(p);
				1266	/*
				1267	* We are returning to the wait loop without having successfully
				1268	* removed the process and having released the lock. We cannot
				1269	* continue, since the "p" task pointer is potentially stale.
				1270	*
				1271	* Return -EAGAIN, and do_wait() will restart the loop from the
				1272	* beginning. Do _not_ re-acquire the lock.
				1273	*/
				1274	return -EAGAIN;
				1275	}
				1276
				1277	/* move to end of parent's list to avoid starvation */
				1278	remove_parent(p);
				1279	add_parent(p, p->parent);
				1280
				1281	write_unlock_irq(&tasklist_lock);
				1282
				1283	retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0;
				1284	if (!retval && stat_addr)
				1285	retval = put_user((exit_code << 8) \| 0x7f, stat_addr);
				1286	if (!retval && infop)
				1287	retval = put_user(SIGCHLD, &infop->si_signo);
				1288	if (!retval && infop)
				1289	retval = put_user(0, &infop->si_errno);
				1290	if (!retval && infop)
				1291	retval = put_user((short)((p->ptrace & PT_PTRACED)
				1292	? CLD_TRAPPED : CLD_STOPPED),
				1293	&infop->si_code);
				1294	if (!retval && infop)
				1295	retval = put_user(exit_code, &infop->si_status);
				1296	if (!retval && infop)
				1297	retval = put_user(p->pid, &infop->si_pid);
				1298	if (!retval && infop)
				1299	retval = put_user(p->uid, &infop->si_uid);
				1300	if (!retval)
				1301	retval = p->pid;
				1302	put_task_struct(p);
				1303
				1304	BUG_ON(!retval);
				1305	return retval;
				1306	}
				1307
				1308	/*
				1309	* Handle do_wait work for one task in a live, non-stopped state.
				1310	* read_lock(&tasklist_lock) on entry. If we return zero, we still hold
				1311	* the lock and this task is uninteresting. If we return nonzero, we have
				1312	* released the lock and the system call should return.
				1313	*/
				1314	static int wait_task_continued(task_t *p, int noreap,
				1315	struct siginfo __user *infop,
				1316	int __user stat_addr, struct rusage __user ru)
				1317	{
				1318	int retval;
				1319	pid_t pid;
				1320	uid_t uid;
				1321
				1322	if (unlikely(!p->signal))
				1323	return 0;
				1324
				1325	if (!(p->signal->flags & SIGNAL_STOP_CONTINUED))
				1326	return 0;
				1327
				1328	spin_lock_irq(&p->sighand->siglock);
				1329	/* Re-check with the lock held. */
				1330	if (!(p->signal->flags & SIGNAL_STOP_CONTINUED)) {
				1331	spin_unlock_irq(&p->sighand->siglock);
				1332	return 0;
				1333	}
				1334	if (!noreap)
				1335	p->signal->flags &= ~SIGNAL_STOP_CONTINUED;
				1336	spin_unlock_irq(&p->sighand->siglock);
				1337
				1338	pid = p->pid;
				1339	uid = p->uid;
				1340	get_task_struct(p);
				1341	read_unlock(&tasklist_lock);
				1342
				1343	if (!infop) {
				1344	retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0;
				1345	put_task_struct(p);
				1346	if (!retval && stat_addr)
				1347	retval = put_user(0xffff, stat_addr);
				1348	if (!retval)
				1349	retval = p->pid;
				1350	} else {
				1351	retval = wait_noreap_copyout(p, pid, uid,
				1352	CLD_CONTINUED, SIGCONT,
				1353	infop, ru);
				1354	BUG_ON(retval == 0);
				1355	}
				1356
				1357	return retval;
				1358	}
				1359
				1360
				1361	static inline int my_ptrace_child(struct task_struct *p)
				1362	{
				1363	if (!(p->ptrace & PT_PTRACED))
				1364	return 0;
				1365	if (!(p->ptrace & PT_ATTACHED))
				1366	return 1;
				1367	/*
				1368	* This child was PTRACE_ATTACH'd. We should be seeing it only if
				1369	* we are the attacher. If we are the real parent, this is a race
				1370	* inside ptrace_attach. It is waiting for the tasklist_lock,
				1371	* which we have to switch the parent links, but has already set
				1372	* the flags in p->ptrace.
				1373	*/
				1374	return (p->parent != p->real_parent);
				1375	}
				1376
				1377	static long do_wait(pid_t pid, int options, struct siginfo __user *infop,
				1378	int __user stat_addr, struct rusage __user ru)
				1379	{
				1380	DECLARE_WAITQUEUE(wait, current);
				1381	struct task_struct *tsk;
				1382	int flag, retval;
				1383
				1384	add_wait_queue(&current->signal->wait_chldexit,&wait);
				1385	repeat:
				1386	/*
				1387	* We will set this flag if we see any child that might later
				1388	* match our criteria, even if we are not able to reap it yet.
				1389	*/
				1390	flag = 0;
				1391	current->state = TASK_INTERRUPTIBLE;
				1392	read_lock(&tasklist_lock);
				1393	tsk = current;
				1394	do {
				1395	struct task_struct *p;
				1396	struct list_head *_p;
				1397	int ret;
				1398
				1399	list_for_each(_p,&tsk->children) {
				1400	p = list_entry(_p,struct task_struct,sibling);
				1401
				1402	ret = eligible_child(pid, options, p);
				1403	if (!ret)
				1404	continue;
				1405
				1406	switch (p->state) {
				1407	case TASK_TRACED:
Roland McGrath	7f2a525	2005-10-30 15:02:50 -0800	[diff] [blame]	1408	/*
				1409	* When we hit the race with PTRACE_ATTACH,
				1410	* we will not report this child. But the
				1411	* race means it has not yet been moved to
				1412	* our ptrace_children list, so we need to
				1413	* set the flag here to avoid a spurious ECHILD
				1414	* when the race happens with the only child.
				1415	*/
				1416	flag = 1;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1417	if (!my_ptrace_child(p))
				1418	continue;
				1419	/FALLTHROUGH/
				1420	case TASK_STOPPED:
				1421	/*
				1422	* It's stopped now, so it might later
				1423	* continue, exit, or stop again.
				1424	*/
				1425	flag = 1;
				1426	if (!(options & WUNTRACED) &&
				1427	!my_ptrace_child(p))
				1428	continue;
				1429	retval = wait_task_stopped(p, ret == 2,
				1430	(options & WNOWAIT),
				1431	infop,
				1432	stat_addr, ru);
				1433	if (retval == -EAGAIN)
				1434	goto repeat;
				1435	if (retval != 0) /* He released the lock. */
				1436	goto end;
				1437	break;
				1438	default:
				1439	// case EXIT_DEAD:
				1440	if (p->exit_state == EXIT_DEAD)
				1441	continue;
				1442	// case EXIT_ZOMBIE:
				1443	if (p->exit_state == EXIT_ZOMBIE) {
				1444	/*
				1445	* Eligible but we cannot release
				1446	* it yet:
				1447	*/
				1448	if (ret == 2)
				1449	goto check_continued;
				1450	if (!likely(options & WEXITED))
				1451	continue;
				1452	retval = wait_task_zombie(
				1453	p, (options & WNOWAIT),
				1454	infop, stat_addr, ru);
				1455	/* He released the lock. */
				1456	if (retval != 0)
				1457	goto end;
				1458	break;
				1459	}
				1460	check_continued:
				1461	/*
				1462	* It's running now, so it might later
				1463	* exit, stop, or stop and then continue.
				1464	*/
				1465	flag = 1;
				1466	if (!unlikely(options & WCONTINUED))
				1467	continue;
				1468	retval = wait_task_continued(
				1469	p, (options & WNOWAIT),
				1470	infop, stat_addr, ru);
				1471	if (retval != 0) /* He released the lock. */
				1472	goto end;
				1473	break;
				1474	}
				1475	}
				1476	if (!flag) {
				1477	list_for_each(_p, &tsk->ptrace_children) {
				1478	p = list_entry(_p, struct task_struct,
				1479	ptrace_list);
				1480	if (!eligible_child(pid, options, p))
				1481	continue;
				1482	flag = 1;
				1483	break;
				1484	}
				1485	}
				1486	if (options & __WNOTHREAD)
				1487	break;
				1488	tsk = next_thread(tsk);
				1489	if (tsk->signal != current->signal)
				1490	BUG();
				1491	} while (tsk != current);
				1492
				1493	read_unlock(&tasklist_lock);
				1494	if (flag) {
				1495	retval = 0;
				1496	if (options & WNOHANG)
				1497	goto end;
				1498	retval = -ERESTARTSYS;
				1499	if (signal_pending(current))
				1500	goto end;
				1501	schedule();
				1502	goto repeat;
				1503	}
				1504	retval = -ECHILD;
				1505	end:
				1506	current->state = TASK_RUNNING;
				1507	remove_wait_queue(&current->signal->wait_chldexit,&wait);
				1508	if (infop) {
				1509	if (retval > 0)
				1510	retval = 0;
				1511	else {
				1512	/*
				1513	* For a WNOHANG return, clear out all the fields
				1514	* we would set so the user can easily tell the
				1515	* difference.
				1516	*/
				1517	if (!retval)
				1518	retval = put_user(0, &infop->si_signo);
				1519	if (!retval)
				1520	retval = put_user(0, &infop->si_errno);
				1521	if (!retval)
				1522	retval = put_user(0, &infop->si_code);
				1523	if (!retval)
				1524	retval = put_user(0, &infop->si_pid);
				1525	if (!retval)
				1526	retval = put_user(0, &infop->si_uid);
				1527	if (!retval)
				1528	retval = put_user(0, &infop->si_status);
				1529	}
				1530	}
				1531	return retval;
				1532	}
				1533
				1534	asmlinkage long sys_waitid(int which, pid_t pid,
				1535	struct siginfo __user *infop, int options,
				1536	struct rusage __user *ru)
				1537	{
				1538	long ret;
				1539
				1540	if (options & ~(WNOHANG\|WNOWAIT\|WEXITED\|WSTOPPED\|WCONTINUED))
				1541	return -EINVAL;
				1542	if (!(options & (WEXITED\|WSTOPPED\|WCONTINUED)))
				1543	return -EINVAL;
				1544
				1545	switch (which) {
				1546	case P_ALL:
				1547	pid = -1;
				1548	break;
				1549	case P_PID:
				1550	if (pid <= 0)
				1551	return -EINVAL;
				1552	break;
				1553	case P_PGID:
				1554	if (pid <= 0)
				1555	return -EINVAL;
				1556	pid = -pid;
				1557	break;
				1558	default:
				1559	return -EINVAL;
				1560	}
				1561
				1562	ret = do_wait(pid, options, infop, NULL, ru);
				1563
				1564	/* avoid REGPARM breakage on x86: */
				1565	prevent_tail_call(ret);
				1566	return ret;
				1567	}
				1568
				1569	asmlinkage long sys_wait4(pid_t pid, int __user *stat_addr,
				1570	int options, struct rusage __user *ru)
				1571	{
				1572	long ret;
				1573
				1574	if (options & ~(WNOHANG\|WUNTRACED\|WCONTINUED\|
				1575	__WNOTHREAD\|__WCLONE\|__WALL))
				1576	return -EINVAL;
				1577	ret = do_wait(pid, options \| WEXITED, NULL, stat_addr, ru);
				1578
				1579	/* avoid REGPARM breakage on x86: */
				1580	prevent_tail_call(ret);
				1581	return ret;
				1582	}
				1583
				1584	#ifdef __ARCH_WANT_SYS_WAITPID
				1585
				1586	/*
				1587	* sys_waitpid() remains for compatibility. waitpid() should be
				1588	* implemented by calling sys_wait4() from libc.a.
				1589	*/
				1590	asmlinkage long sys_waitpid(pid_t pid, int __user *stat_addr, int options)
				1591	{
				1592	return sys_wait4(pid, stat_addr, options, NULL);
				1593	}
				1594
				1595	#endif