Blame - mm/msync.c - SHIFTPHONES/mainline/linux

blob: 090f426bca7d538d5f84c9a8c53a677f50a31434 [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame^]	1	/*
				2	* linux/mm/msync.c
				3	*
				4	* Copyright (C) 1994-1999 Linus Torvalds
				5	*/
				6
				7	/*
				8	* The msync() system call.
				9	*/
				10	#include <linux/slab.h>
				11	#include <linux/pagemap.h>
				12	#include <linux/mm.h>
				13	#include <linux/mman.h>
				14	#include <linux/hugetlb.h>
				15	#include <linux/syscalls.h>
				16
				17	#include <asm/pgtable.h>
				18	#include <asm/tlbflush.h>
				19
				20	/*
				21	* Called with mm->page_table_lock held to protect against other
				22	* threads/the swapper from ripping pte's out from under us.
				23	*/
				24
				25	static void sync_pte_range(struct vm_area_struct vma, pmd_t pmd,
				26	unsigned long addr, unsigned long end)
				27	{
				28	pte_t *pte;
				29
				30	pte = pte_offset_map(pmd, addr);
				31	do {
				32	unsigned long pfn;
				33	struct page *page;
				34
				35	if (!pte_present(*pte))
				36	continue;
				37	pfn = pte_pfn(*pte);
				38	if (!pfn_valid(pfn))
				39	continue;
				40	page = pfn_to_page(pfn);
				41	if (PageReserved(page))
				42	continue;
				43
				44	if (ptep_clear_flush_dirty(vma, addr, pte) \|\|
				45	page_test_and_clear_dirty(page))
				46	set_page_dirty(page);
				47	} while (pte++, addr += PAGE_SIZE, addr != end);
				48	pte_unmap(pte - 1);
				49	}
				50
				51	static inline void sync_pmd_range(struct vm_area_struct vma, pud_t pud,
				52	unsigned long addr, unsigned long end)
				53	{
				54	pmd_t *pmd;
				55	unsigned long next;
				56
				57	pmd = pmd_offset(pud, addr);
				58	do {
				59	next = pmd_addr_end(addr, end);
				60	if (pmd_none_or_clear_bad(pmd))
				61	continue;
				62	sync_pte_range(vma, pmd, addr, next);
				63	} while (pmd++, addr = next, addr != end);
				64	}
				65
				66	static inline void sync_pud_range(struct vm_area_struct vma, pgd_t pgd,
				67	unsigned long addr, unsigned long end)
				68	{
				69	pud_t *pud;
				70	unsigned long next;
				71
				72	pud = pud_offset(pgd, addr);
				73	do {
				74	next = pud_addr_end(addr, end);
				75	if (pud_none_or_clear_bad(pud))
				76	continue;
				77	sync_pmd_range(vma, pud, addr, next);
				78	} while (pud++, addr = next, addr != end);
				79	}
				80
				81	static void sync_page_range(struct vm_area_struct *vma,
				82	unsigned long addr, unsigned long end)
				83	{
				84	struct mm_struct *mm = vma->vm_mm;
				85	pgd_t *pgd;
				86	unsigned long next;
				87
				88	/* For hugepages we can't go walking the page table normally,
				89	* but that's ok, hugetlbfs is memory based, so we don't need
				90	* to do anything more on an msync() */
				91	if (is_vm_hugetlb_page(vma))
				92	return;
				93
				94	BUG_ON(addr >= end);
				95	pgd = pgd_offset(mm, addr);
				96	flush_cache_range(vma, addr, end);
				97	spin_lock(&mm->page_table_lock);
				98	do {
				99	next = pgd_addr_end(addr, end);
				100	if (pgd_none_or_clear_bad(pgd))
				101	continue;
				102	sync_pud_range(vma, pgd, addr, next);
				103	} while (pgd++, addr = next, addr != end);
				104	spin_unlock(&mm->page_table_lock);
				105	}
				106
				107	#ifdef CONFIG_PREEMPT
				108	static inline void filemap_sync(struct vm_area_struct *vma,
				109	unsigned long addr, unsigned long end)
				110	{
				111	const size_t chunk = 64 * 1024; /* bytes */
				112	unsigned long next;
				113
				114	do {
				115	next = addr + chunk;
				116	if (next > end \|\| next < addr)
				117	next = end;
				118	sync_page_range(vma, addr, next);
				119	cond_resched();
				120	} while (addr = next, addr != end);
				121	}
				122	#else
				123	static inline void filemap_sync(struct vm_area_struct *vma,
				124	unsigned long addr, unsigned long end)
				125	{
				126	sync_page_range(vma, addr, end);
				127	}
				128	#endif
				129
				130	/*
				131	* MS_SYNC syncs the entire file - including mappings.
				132	*
				133	* MS_ASYNC does not start I/O (it used to, up to 2.5.67). Instead, it just
				134	* marks the relevant pages dirty. The application may now run fsync() to
				135	* write out the dirty pages and wait on the writeout and check the result.
				136	* Or the application may run fadvise(FADV_DONTNEED) against the fd to start
				137	* async writeout immediately.
				138	* So my _not_ starting I/O in MS_ASYNC we provide complete flexibility to
				139	* applications.
				140	*/
				141	static int msync_interval(struct vm_area_struct *vma,
				142	unsigned long addr, unsigned long end, int flags)
				143	{
				144	int ret = 0;
				145	struct file *file = vma->vm_file;
				146
				147	if ((flags & MS_INVALIDATE) && (vma->vm_flags & VM_LOCKED))
				148	return -EBUSY;
				149
				150	if (file && (vma->vm_flags & VM_SHARED)) {
				151	filemap_sync(vma, addr, end);
				152
				153	if (flags & MS_SYNC) {
				154	struct address_space *mapping = file->f_mapping;
				155	int err;
				156
				157	ret = filemap_fdatawrite(mapping);
				158	if (file->f_op && file->f_op->fsync) {
				159	/*
				160	* We don't take i_sem here because mmap_sem
				161	* is already held.
				162	*/
				163	err = file->f_op->fsync(file,file->f_dentry,1);
				164	if (err && !ret)
				165	ret = err;
				166	}
				167	err = filemap_fdatawait(mapping);
				168	if (!ret)
				169	ret = err;
				170	}
				171	}
				172	return ret;
				173	}
				174
				175	asmlinkage long sys_msync(unsigned long start, size_t len, int flags)
				176	{
				177	unsigned long end;
				178	struct vm_area_struct *vma;
				179	int unmapped_error, error = -EINVAL;
				180
				181	if (flags & MS_SYNC)
				182	current->flags \|= PF_SYNCWRITE;
				183
				184	down_read(&current->mm->mmap_sem);
				185	if (flags & ~(MS_ASYNC \| MS_INVALIDATE \| MS_SYNC))
				186	goto out;
				187	if (start & ~PAGE_MASK)
				188	goto out;
				189	if ((flags & MS_ASYNC) && (flags & MS_SYNC))
				190	goto out;
				191	error = -ENOMEM;
				192	len = (len + ~PAGE_MASK) & PAGE_MASK;
				193	end = start + len;
				194	if (end < start)
				195	goto out;
				196	error = 0;
				197	if (end == start)
				198	goto out;
				199	/*
				200	* If the interval [start,end) covers some unmapped address ranges,
				201	* just ignore them, but return -ENOMEM at the end.
				202	*/
				203	vma = find_vma(current->mm, start);
				204	unmapped_error = 0;
				205	for (;;) {
				206	/* Still start < end. */
				207	error = -ENOMEM;
				208	if (!vma)
				209	goto out;
				210	/* Here start < vma->vm_end. */
				211	if (start < vma->vm_start) {
				212	unmapped_error = -ENOMEM;
				213	start = vma->vm_start;
				214	}
				215	/* Here vma->vm_start <= start < vma->vm_end. */
				216	if (end <= vma->vm_end) {
				217	if (start < end) {
				218	error = msync_interval(vma, start, end, flags);
				219	if (error)
				220	goto out;
				221	}
				222	error = unmapped_error;
				223	goto out;
				224	}
				225	/* Here vma->vm_start <= start < vma->vm_end < end. */
				226	error = msync_interval(vma, start, vma->vm_end, flags);
				227	if (error)
				228	goto out;
				229	start = vma->vm_end;
				230	vma = vma->vm_next;
				231	}
				232	out:
				233	up_read(&current->mm->mmap_sem);
				234	current->flags &= ~PF_SYNCWRITE;
				235	return error;
				236	}