Blame - arch/powerpc/mm/pgtable_64.c - SHIFTPHONES/mainline/linux

blob: e0c71854317412e60671b5074fe07bcf91f47b38 [file] [log] [blame]

Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	1	/*
				2	* This file contains ioremap and related functions for 64-bit machines.
				3	*
				4	* Derived from arch/ppc64/mm/init.c
				5	* Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
				6	*
				7	* Modifications by Paul Mackerras (PowerMac) (paulus@samba.org)
				8	* and Cort Dougan (PReP) (cort@cs.nmt.edu)
				9	* Copyright (C) 1996 Paul Mackerras
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	10	*
				11	* Derived from "arch/i386/mm/init.c"
				12	* Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
				13	*
				14	* Dave Engebretsen <engebret@us.ibm.com>
				15	* Rework for PPC64 port.
				16	*
				17	* This program is free software; you can redistribute it and/or
				18	* modify it under the terms of the GNU General Public License
				19	* as published by the Free Software Foundation; either version
				20	* 2 of the License, or (at your option) any later version.
				21	*
				22	*/
				23
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	24	#include <linux/signal.h>
				25	#include <linux/sched.h>
				26	#include <linux/kernel.h>
				27	#include <linux/errno.h>
				28	#include <linux/string.h>
Paul Gortmaker	66b15db	2011-05-27 10:46:24 -0400	[diff] [blame]	29	#include <linux/export.h>
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	30	#include <linux/types.h>
				31	#include <linux/mman.h>
				32	#include <linux/mm.h>
				33	#include <linux/swap.h>
				34	#include <linux/stddef.h>
				35	#include <linux/vmalloc.h>
Yinghai Lu	95f72d1	2010-07-12 14:36:09 +1000	[diff] [blame]	36	#include <linux/memblock.h>
Tejun Heo	5a0e3ad	2010-03-24 17:04:11 +0900	[diff] [blame]	37	#include <linux/slab.h>
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	38
				39	#include <asm/pgalloc.h>
				40	#include <asm/page.h>
				41	#include <asm/prom.h>
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	42	#include <asm/io.h>
				43	#include <asm/mmu_context.h>
				44	#include <asm/pgtable.h>
				45	#include <asm/mmu.h>
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	46	#include <asm/smp.h>
				47	#include <asm/machdep.h>
				48	#include <asm/tlb.h>
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	49	#include <asm/processor.h>
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	50	#include <asm/cputable.h>
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	51	#include <asm/sections.h>
Stephen Rothwell	5e203d6	2006-09-25 13:36:31 +1000	[diff] [blame]	52	#include <asm/firmware.h>
Anton Blanchard	68cf0d6	2014-09-17 22:15:35 +1000	[diff] [blame^]	53	#include <asm/dma.h>
David Gibson	800fc3e	2005-11-16 15:43:48 +1100	[diff] [blame]	54
				55	#include "mmu_decl.h"
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	56
Aneesh Kumar K.V	9e81330	2014-08-13 12:32:04 +0530	[diff] [blame]	57	#define CREATE_TRACE_POINTS
				58	#include <trace/events/thp.h>
				59
Aneesh Kumar K.V	78f1dbd	2012-09-10 02:52:57 +0000	[diff] [blame]	60	/* Some sanity checking */
				61	#if TASK_SIZE_USER64 > PGTABLE_RANGE
				62	#error TASK_SIZE_USER64 exceeds pagetable range
				63	#endif
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	64
Aneesh Kumar K.V	78f1dbd	2012-09-10 02:52:57 +0000	[diff] [blame]	65	#ifdef CONFIG_PPC_STD_MMU_64
Aneesh Kumar K.V	af81d78	2013-03-13 03:34:55 +0000	[diff] [blame]	66	#if TASK_SIZE_USER64 > (1UL << (ESID_BITS + SID_SHIFT))
Aneesh Kumar K.V	78f1dbd	2012-09-10 02:52:57 +0000	[diff] [blame]	67	#error TASK_SIZE_USER64 exceeds user VSID range
				68	#endif
				69	#endif
				70
				71	unsigned long ioremap_bot = IOREMAP_BASE;
Benjamin Herrenschmidt	a245067	2009-07-23 23:15:16 +0000	[diff] [blame]	72
				73	#ifdef CONFIG_PPC_MMU_NOHASH
Scott Wood	7d17622	2014-08-01 22:07:40 -0500	[diff] [blame]	74	static __ref void *early_alloc_pgtable(unsigned long size)
Benjamin Herrenschmidt	a245067	2009-07-23 23:15:16 +0000	[diff] [blame]	75	{
				76	void *pt;
				77
Anton Blanchard	1023973	2014-09-17 22:15:33 +1000	[diff] [blame]	78	pt = __va(memblock_alloc_base(size, size, __pa(MAX_DMA_ADDRESS)));
Benjamin Herrenschmidt	a245067	2009-07-23 23:15:16 +0000	[diff] [blame]	79	memset(pt, 0, size);
				80
				81	return pt;
				82	}
				83	#endif /* CONFIG_PPC_MMU_NOHASH */
				84
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	85	/*
Benjamin Herrenschmidt	a245067	2009-07-23 23:15:16 +0000	[diff] [blame]	86	* map_kernel_page currently only called by __ioremap
				87	* map_kernel_page adds an entry to the ioremap page table
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	88	* and adds an entry to the HPT, possibly bolting it
				89	*/
Benjamin Herrenschmidt	32a7494	2009-07-23 23:15:58 +0000	[diff] [blame]	90	int map_kernel_page(unsigned long ea, unsigned long pa, int flags)
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	91	{
				92	pgd_t *pgdp;
				93	pud_t *pudp;
				94	pmd_t *pmdp;
				95	pte_t *ptep;
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	96
Benjamin Herrenschmidt	a245067	2009-07-23 23:15:16 +0000	[diff] [blame]	97	if (slab_is_available()) {
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	98	pgdp = pgd_offset_k(ea);
				99	pudp = pud_alloc(&init_mm, pgdp, ea);
				100	if (!pudp)
				101	return -ENOMEM;
				102	pmdp = pmd_alloc(&init_mm, pudp, ea);
				103	if (!pmdp)
				104	return -ENOMEM;
Paul Mackerras	23fd077	2005-10-31 13:37:12 +1100	[diff] [blame]	105	ptep = pte_alloc_kernel(pmdp, ea);
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	106	if (!ptep)
				107	return -ENOMEM;
				108	set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT,
				109	__pgprot(flags)));
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	110	} else {
Benjamin Herrenschmidt	a245067	2009-07-23 23:15:16 +0000	[diff] [blame]	111	#ifdef CONFIG_PPC_MMU_NOHASH
Benjamin Herrenschmidt	a245067	2009-07-23 23:15:16 +0000	[diff] [blame]	112	pgdp = pgd_offset_k(ea);
				113	#ifdef PUD_TABLE_SIZE
				114	if (pgd_none(*pgdp)) {
				115	pudp = early_alloc_pgtable(PUD_TABLE_SIZE);
				116	BUG_ON(pudp == NULL);
				117	pgd_populate(&init_mm, pgdp, pudp);
				118	}
				119	#endif /* PUD_TABLE_SIZE */
				120	pudp = pud_offset(pgdp, ea);
				121	if (pud_none(*pudp)) {
				122	pmdp = early_alloc_pgtable(PMD_TABLE_SIZE);
				123	BUG_ON(pmdp == NULL);
				124	pud_populate(&init_mm, pudp, pmdp);
				125	}
				126	pmdp = pmd_offset(pudp, ea);
				127	if (!pmd_present(*pmdp)) {
				128	ptep = early_alloc_pgtable(PAGE_SIZE);
				129	BUG_ON(ptep == NULL);
				130	pmd_populate_kernel(&init_mm, pmdp, ptep);
				131	}
				132	ptep = pte_offset_kernel(pmdp, ea);
				133	set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT,
				134	__pgprot(flags)));
				135	#else /* CONFIG_PPC_MMU_NOHASH */
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	136	/*
				137	* If the mm subsystem is not fully up, we cannot create a
				138	* linux page table entry for this mapping. Simply bolt an
				139	* entry in the hardware page table.
Benjamin Herrenschmidt	3c726f8	2005-11-07 11:06:55 +1100	[diff] [blame]	140	*
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	141	*/
Paul Mackerras	1189be6	2007-10-11 20:37:10 +1000	[diff] [blame]	142	if (htab_bolt_mapping(ea, ea + PAGE_SIZE, pa, flags,
				143	mmu_io_psize, mmu_kernel_ssize)) {
Benjamin Herrenschmidt	77ac166	2005-11-10 11:12:11 +1100	[diff] [blame]	144	printk(KERN_ERR "Failed to do bolted mapping IO "
				145	"memory at %016lx !\n", pa);
				146	return -ENOMEM;
				147	}
Benjamin Herrenschmidt	a245067	2009-07-23 23:15:16 +0000	[diff] [blame]	148	#endif /* !CONFIG_PPC_MMU_NOHASH */
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	149	}
Scott Wood	47ce8af	2013-10-11 19:22:37 -0500	[diff] [blame]	150
				151	#ifdef CONFIG_PPC_BOOK3E_64
				152	/*
				153	* With hardware tablewalk, a sync is needed to ensure that
				154	* subsequent accesses see the PTE we just wrote. Unlike userspace
				155	* mappings, we can't tolerate spurious faults, so make sure
				156	* the new PTE will be seen the first time.
				157	*/
				158	mb();
				159	#else
				160	smp_wmb();
				161	#endif
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	162	return 0;
				163	}
				164
				165
Benjamin Herrenschmidt	3d5134e	2007-06-04 15:15:36 +1000	[diff] [blame]	166	/**
				167	* __ioremap_at - Low level function to establish the page tables
				168	* for an IO mapping
				169	*/
				170	void __iomem * __ioremap_at(phys_addr_t pa, void *ea, unsigned long size,
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	171	unsigned long flags)
				172	{
				173	unsigned long i;
				174
Benjamin Herrenschmidt	a1f242f	2008-07-23 21:27:08 -0700	[diff] [blame]	175	/* Make sure we have the base flags */
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	176	if ((flags & _PAGE_PRESENT) == 0)
				177	flags \|= pgprot_val(PAGE_KERNEL);
				178
Benjamin Herrenschmidt	a1f242f	2008-07-23 21:27:08 -0700	[diff] [blame]	179	/* Non-cacheable page cannot be coherent */
				180	if (flags & _PAGE_NO_CACHE)
				181	flags &= ~_PAGE_COHERENT;
				182
				183	/* We don't support the 4K PFN hack with ioremap */
				184	if (flags & _PAGE_4K_PFN)
				185	return NULL;
				186
Benjamin Herrenschmidt	3d5134e	2007-06-04 15:15:36 +1000	[diff] [blame]	187	WARN_ON(pa & ~PAGE_MASK);
				188	WARN_ON(((unsigned long)ea) & ~PAGE_MASK);
				189	WARN_ON(size & ~PAGE_MASK);
				190
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	191	for (i = 0; i < size; i += PAGE_SIZE)
Benjamin Herrenschmidt	a245067	2009-07-23 23:15:16 +0000	[diff] [blame]	192	if (map_kernel_page((unsigned long)ea+i, pa+i, flags))
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	193	return NULL;
				194
Benjamin Herrenschmidt	3d5134e	2007-06-04 15:15:36 +1000	[diff] [blame]	195	return (void __iomem *)ea;
				196	}
				197
				198	/**
				199	* __iounmap_from - Low level function to tear down the page tables
				200	* for an IO mapping. This is used for mappings that
				201	* are manipulated manually, like partial unmapping of
				202	* PCI IOs or ISA space.
				203	*/
				204	void __iounmap_at(void *ea, unsigned long size)
				205	{
				206	WARN_ON(((unsigned long)ea) & ~PAGE_MASK);
				207	WARN_ON(size & ~PAGE_MASK);
				208
				209	unmap_kernel_range((unsigned long)ea, size);
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	210	}
				211
Benjamin Herrenschmidt	1cdab55	2009-02-22 16:19:14 +0000	[diff] [blame]	212	void __iomem * __ioremap_caller(phys_addr_t addr, unsigned long size,
				213	unsigned long flags, void *caller)
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	214	{
Benjamin Herrenschmidt	3d5134e	2007-06-04 15:15:36 +1000	[diff] [blame]	215	phys_addr_t paligned;
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	216	void __iomem *ret;
				217
				218	/*
				219	* Choose an address to map it to.
				220	* Once the imalloc system is running, we use it.
				221	* Before that, we map using addresses going
				222	* up from ioremap_bot. imalloc will use
				223	* the addresses from ioremap_bot through
				224	* IMALLOC_END
				225	*
				226	*/
Benjamin Herrenschmidt	3d5134e	2007-06-04 15:15:36 +1000	[diff] [blame]	227	paligned = addr & PAGE_MASK;
				228	size = PAGE_ALIGN(addr + size) - paligned;
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	229
Benjamin Herrenschmidt	3d5134e	2007-06-04 15:15:36 +1000	[diff] [blame]	230	if ((size == 0) \|\| (paligned == 0))
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	231	return NULL;
				232
				233	if (mem_init_done) {
				234	struct vm_struct *area;
Benjamin Herrenschmidt	3d5134e	2007-06-04 15:15:36 +1000	[diff] [blame]	235
Benjamin Herrenschmidt	1cdab55	2009-02-22 16:19:14 +0000	[diff] [blame]	236	area = __get_vm_area_caller(size, VM_IOREMAP,
				237	ioremap_bot, IOREMAP_END,
				238	caller);
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	239	if (area == NULL)
				240	return NULL;
Michael Ellerman	7a9d125	2010-11-28 18:26:36 +0000	[diff] [blame]	241
				242	area->phys_addr = paligned;
Benjamin Herrenschmidt	3d5134e	2007-06-04 15:15:36 +1000	[diff] [blame]	243	ret = __ioremap_at(paligned, area->addr, size, flags);
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	244	if (!ret)
Benjamin Herrenschmidt	3d5134e	2007-06-04 15:15:36 +1000	[diff] [blame]	245	vunmap(area->addr);
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	246	} else {
Benjamin Herrenschmidt	3d5134e	2007-06-04 15:15:36 +1000	[diff] [blame]	247	ret = __ioremap_at(paligned, (void *)ioremap_bot, size, flags);
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	248	if (ret)
				249	ioremap_bot += size;
				250	}
Benjamin Herrenschmidt	3d5134e	2007-06-04 15:15:36 +1000	[diff] [blame]	251
				252	if (ret)
				253	ret += addr & ~PAGE_MASK;
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	254	return ret;
				255	}
				256
Benjamin Herrenschmidt	1cdab55	2009-02-22 16:19:14 +0000	[diff] [blame]	257	void __iomem * __ioremap(phys_addr_t addr, unsigned long size,
				258	unsigned long flags)
				259	{
				260	return __ioremap_caller(addr, size, flags, __builtin_return_address(0));
				261	}
Benjamin Herrenschmidt	4cb3cee	2006-11-11 17:25:10 +1100	[diff] [blame]	262
Benjamin Herrenschmidt	68a6435	2006-11-13 09:27:39 +1100	[diff] [blame]	263	void __iomem * ioremap(phys_addr_t addr, unsigned long size)
Benjamin Herrenschmidt	4cb3cee	2006-11-11 17:25:10 +1100	[diff] [blame]	264	{
				265	unsigned long flags = _PAGE_NO_CACHE \| _PAGE_GUARDED;
Benjamin Herrenschmidt	1cdab55	2009-02-22 16:19:14 +0000	[diff] [blame]	266	void *caller = __builtin_return_address(0);
Benjamin Herrenschmidt	4cb3cee	2006-11-11 17:25:10 +1100	[diff] [blame]	267
				268	if (ppc_md.ioremap)
Benjamin Herrenschmidt	1cdab55	2009-02-22 16:19:14 +0000	[diff] [blame]	269	return ppc_md.ioremap(addr, size, flags, caller);
				270	return __ioremap_caller(addr, size, flags, caller);
Benjamin Herrenschmidt	4cb3cee	2006-11-11 17:25:10 +1100	[diff] [blame]	271	}
				272
Anton Blanchard	be135f4	2011-05-08 21:41:59 +0000	[diff] [blame]	273	void __iomem * ioremap_wc(phys_addr_t addr, unsigned long size)
				274	{
				275	unsigned long flags = _PAGE_NO_CACHE;
				276	void *caller = __builtin_return_address(0);
				277
				278	if (ppc_md.ioremap)
				279	return ppc_md.ioremap(addr, size, flags, caller);
				280	return __ioremap_caller(addr, size, flags, caller);
				281	}
				282
Anton Blanchard	40f1ce7	2011-05-08 21:43:47 +0000	[diff] [blame]	283	void __iomem * ioremap_prot(phys_addr_t addr, unsigned long size,
Benjamin Herrenschmidt	4cb3cee	2006-11-11 17:25:10 +1100	[diff] [blame]	284	unsigned long flags)
				285	{
Benjamin Herrenschmidt	1cdab55	2009-02-22 16:19:14 +0000	[diff] [blame]	286	void *caller = __builtin_return_address(0);
				287
Benjamin Herrenschmidt	a1f242f	2008-07-23 21:27:08 -0700	[diff] [blame]	288	/* writeable implies dirty for kernel addresses */
				289	if (flags & _PAGE_RW)
				290	flags \|= _PAGE_DIRTY;
				291
				292	/* we don't want to let _PAGE_USER and _PAGE_EXEC leak out */
				293	flags &= ~(_PAGE_USER \| _PAGE_EXEC);
				294
Benjamin Herrenschmidt	55052ee	2010-04-07 14:39:36 +1000	[diff] [blame]	295	#ifdef _PAGE_BAP_SR
				296	/* _PAGE_USER contains _PAGE_BAP_SR on BookE using the new PTE format
				297	* which means that we just cleared supervisor access... oops ;-) This
				298	* restores it
				299	*/
				300	flags \|= _PAGE_BAP_SR;
				301	#endif
				302
Benjamin Herrenschmidt	4cb3cee	2006-11-11 17:25:10 +1100	[diff] [blame]	303	if (ppc_md.ioremap)
Benjamin Herrenschmidt	1cdab55	2009-02-22 16:19:14 +0000	[diff] [blame]	304	return ppc_md.ioremap(addr, size, flags, caller);
				305	return __ioremap_caller(addr, size, flags, caller);
Benjamin Herrenschmidt	4cb3cee	2006-11-11 17:25:10 +1100	[diff] [blame]	306	}
				307
				308
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	309	/*
				310	* Unmap an IO region and remove it from imalloc'd list.
				311	* Access to IO memory should be serialized by driver.
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	312	*/
Benjamin Herrenschmidt	68a6435	2006-11-13 09:27:39 +1100	[diff] [blame]	313	void __iounmap(volatile void __iomem *token)
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	314	{
				315	void *addr;
				316
				317	if (!mem_init_done)
				318	return;
				319
Benjamin Herrenschmidt	3d5134e	2007-06-04 15:15:36 +1000	[diff] [blame]	320	addr = (void *) ((unsigned long __force)
				321	PCI_FIX_ADDR(token) & PAGE_MASK);
				322	if ((unsigned long)addr < ioremap_bot) {
				323	printk(KERN_WARNING "Attempt to iounmap early bolted mapping"
				324	" at 0x%p\n", addr);
				325	return;
				326	}
				327	vunmap(addr);
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	328	}
				329
Benjamin Herrenschmidt	68a6435	2006-11-13 09:27:39 +1100	[diff] [blame]	330	void iounmap(volatile void __iomem *token)
Benjamin Herrenschmidt	4cb3cee	2006-11-11 17:25:10 +1100	[diff] [blame]	331	{
				332	if (ppc_md.iounmap)
				333	ppc_md.iounmap(token);
				334	else
				335	__iounmap(token);
				336	}
				337
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	338	EXPORT_SYMBOL(ioremap);
Anton Blanchard	be135f4	2011-05-08 21:41:59 +0000	[diff] [blame]	339	EXPORT_SYMBOL(ioremap_wc);
Anton Blanchard	40f1ce7	2011-05-08 21:43:47 +0000	[diff] [blame]	340	EXPORT_SYMBOL(ioremap_prot);
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	341	EXPORT_SYMBOL(__ioremap);
Olof Johansson	a302cb9	2007-08-31 13:58:51 +1000	[diff] [blame]	342	EXPORT_SYMBOL(__ioremap_at);
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	343	EXPORT_SYMBOL(iounmap);
Benjamin Herrenschmidt	4cb3cee	2006-11-11 17:25:10 +1100	[diff] [blame]	344	EXPORT_SYMBOL(__iounmap);
Olof Johansson	a302cb9	2007-08-31 13:58:51 +1000	[diff] [blame]	345	EXPORT_SYMBOL(__iounmap_at);
Aneesh Kumar K.V	5c1f6ee	2013-04-28 09:37:33 +0000	[diff] [blame]	346
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	347	/*
				348	* For hugepage we have pfn in the pmd, we use PTE_RPN_SHIFT bits for flags
				349	* For PTE page, we have a PTE_FRAG_SIZE (4K) aligned virtual address.
				350	*/
				351	struct page *pmd_page(pmd_t pmd)
				352	{
				353	#ifdef CONFIG_TRANSPARENT_HUGEPAGE
				354	if (pmd_trans_huge(pmd))
				355	return pfn_to_page(pmd_pfn(pmd));
				356	#endif
				357	return virt_to_page(pmd_page_vaddr(pmd));
				358	}
				359
Aneesh Kumar K.V	5c1f6ee	2013-04-28 09:37:33 +0000	[diff] [blame]	360	#ifdef CONFIG_PPC_64K_PAGES
				361	static pte_t get_from_cache(struct mm_struct mm)
				362	{
				363	void pte_frag, ret;
				364
				365	spin_lock(&mm->page_table_lock);
				366	ret = mm->context.pte_frag;
				367	if (ret) {
				368	pte_frag = ret + PTE_FRAG_SIZE;
				369	/*
				370	* If we have taken up all the fragments mark PTE page NULL
				371	*/
				372	if (((unsigned long)pte_frag & ~PAGE_MASK) == 0)
				373	pte_frag = NULL;
				374	mm->context.pte_frag = pte_frag;
				375	}
				376	spin_unlock(&mm->page_table_lock);
				377	return (pte_t *)ret;
				378	}
				379
				380	static pte_t __alloc_for_cache(struct mm_struct mm, int kernel)
				381	{
				382	void *ret = NULL;
				383	struct page *page = alloc_page(GFP_KERNEL \| __GFP_NOTRACK \|
				384	__GFP_REPEAT \| __GFP_ZERO);
				385	if (!page)
				386	return NULL;
Kirill A. Shutemov	4f804943	2013-11-14 14:31:38 -0800	[diff] [blame]	387	if (!kernel && !pgtable_page_ctor(page)) {
				388	__free_page(page);
				389	return NULL;
				390	}
Aneesh Kumar K.V	5c1f6ee	2013-04-28 09:37:33 +0000	[diff] [blame]	391
				392	ret = page_address(page);
				393	spin_lock(&mm->page_table_lock);
				394	/*
				395	* If we find pgtable_page set, we return
				396	* the allocated page with single fragement
				397	* count.
				398	*/
				399	if (likely(!mm->context.pte_frag)) {
				400	atomic_set(&page->_count, PTE_FRAG_NR);
				401	mm->context.pte_frag = ret + PTE_FRAG_SIZE;
				402	}
				403	spin_unlock(&mm->page_table_lock);
				404
Aneesh Kumar K.V	5c1f6ee	2013-04-28 09:37:33 +0000	[diff] [blame]	405	return (pte_t *)ret;
				406	}
				407
				408	pte_t page_table_alloc(struct mm_struct mm, unsigned long vmaddr, int kernel)
				409	{
				410	pte_t *pte;
				411
				412	pte = get_from_cache(mm);
				413	if (pte)
				414	return pte;
				415
				416	return __alloc_for_cache(mm, kernel);
				417	}
				418
				419	void page_table_free(struct mm_struct mm, unsigned long table, int kernel)
				420	{
				421	struct page *page = virt_to_page(table);
				422	if (put_page_testzero(page)) {
				423	if (!kernel)
				424	pgtable_page_dtor(page);
				425	free_hot_cold_page(page, 0);
				426	}
				427	}
				428
				429	#ifdef CONFIG_SMP
				430	static void page_table_free_rcu(void *table)
				431	{
				432	struct page *page = virt_to_page(table);
				433	if (put_page_testzero(page)) {
				434	pgtable_page_dtor(page);
				435	free_hot_cold_page(page, 0);
				436	}
				437	}
				438
				439	void pgtable_free_tlb(struct mmu_gather tlb, void table, int shift)
				440	{
				441	unsigned long pgf = (unsigned long)table;
				442
				443	BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
				444	pgf \|= shift;
				445	tlb_remove_table(tlb, (void *)pgf);
				446	}
				447
				448	void __tlb_remove_table(void *_table)
				449	{
				450	void table = (void )((unsigned long)_table & ~MAX_PGTABLE_INDEX_SIZE);
				451	unsigned shift = (unsigned long)_table & MAX_PGTABLE_INDEX_SIZE;
				452
				453	if (!shift)
				454	/* PTE page needs special handling */
				455	page_table_free_rcu(table);
				456	else {
				457	BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
				458	kmem_cache_free(PGT_CACHE(shift), table);
				459	}
				460	}
				461	#else
				462	void pgtable_free_tlb(struct mmu_gather tlb, void table, int shift)
				463	{
				464	if (!shift) {
				465	/* PTE page needs special handling */
				466	struct page *page = virt_to_page(table);
				467	if (put_page_testzero(page)) {
				468	pgtable_page_dtor(page);
				469	free_hot_cold_page(page, 0);
				470	}
				471	} else {
				472	BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
				473	kmem_cache_free(PGT_CACHE(shift), table);
				474	}
				475	}
				476	#endif
				477	#endif /* CONFIG_PPC_64K_PAGES */
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	478
				479	#ifdef CONFIG_TRANSPARENT_HUGEPAGE
				480
				481	/*
				482	* This is called when relaxing access to a hugepage. It's also called in the page
				483	* fault path when we don't hit any of the major fault cases, ie, a minor
				484	* update of _PAGE_ACCESSED, _PAGE_DIRTY, etc... The generic code will have
				485	* handled those two for us, we additionally deal with missing execute
				486	* permission here on some processors
				487	*/
				488	int pmdp_set_access_flags(struct vm_area_struct *vma, unsigned long address,
				489	pmd_t *pmdp, pmd_t entry, int dirty)
				490	{
				491	int changed;
				492	#ifdef CONFIG_DEBUG_VM
				493	WARN_ON(!pmd_trans_huge(*pmdp));
				494	assert_spin_locked(&vma->vm_mm->page_table_lock);
				495	#endif
				496	changed = !pmd_same(*(pmdp), entry);
				497	if (changed) {
				498	__ptep_set_access_flags(pmdp_ptep(pmdp), pmd_pte(entry));
				499	/*
				500	* Since we are not supporting SW TLB systems, we don't
				501	* have any thing similar to flush_tlb_page_nohash()
				502	*/
				503	}
				504	return changed;
				505	}
				506
				507	unsigned long pmd_hugepage_update(struct mm_struct *mm, unsigned long addr,
Aneesh Kumar K.V	88247e8	2014-02-12 09:13:36 +0530	[diff] [blame]	508	pmd_t *pmdp, unsigned long clr,
				509	unsigned long set)
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	510	{
				511
				512	unsigned long old, tmp;
				513
				514	#ifdef CONFIG_DEBUG_VM
				515	WARN_ON(!pmd_trans_huge(*pmdp));
				516	assert_spin_locked(&mm->page_table_lock);
				517	#endif
				518
				519	#ifdef PTE_ATOMIC_UPDATES
				520	__asm__ __volatile__(
				521	"1: ldarx %0,0,%3\n\
				522	andi. %1,%0,%6\n\
				523	bne- 1b \n\
				524	andc %1,%0,%4 \n\
Aneesh Kumar K.V	88247e8	2014-02-12 09:13:36 +0530	[diff] [blame]	525	or %1,%1,%7\n\
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	526	stdcx. %1,0,%3 \n\
				527	bne- 1b"
				528	: "=&r" (old), "=&r" (tmp), "=m" (*pmdp)
Aneesh Kumar K.V	88247e8	2014-02-12 09:13:36 +0530	[diff] [blame]	529	: "r" (pmdp), "r" (clr), "m" (*pmdp), "i" (_PAGE_BUSY), "r" (set)
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	530	: "cc" );
				531	#else
				532	old = pmd_val(*pmdp);
Aneesh Kumar K.V	88247e8	2014-02-12 09:13:36 +0530	[diff] [blame]	533	*pmdp = __pmd((old & ~clr) \| set);
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	534	#endif
Aneesh Kumar K.V	9e81330	2014-08-13 12:32:04 +0530	[diff] [blame]	535	trace_hugepage_update(addr, old, clr, set);
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	536	if (old & _PAGE_HASHPTE)
Aneesh Kumar K.V	fc04795	2014-08-13 12:32:00 +0530	[diff] [blame]	537	hpte_do_hugepage_flush(mm, addr, pmdp, old);
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	538	return old;
				539	}
				540
				541	pmd_t pmdp_clear_flush(struct vm_area_struct *vma, unsigned long address,
				542	pmd_t *pmdp)
				543	{
				544	pmd_t pmd;
				545
				546	VM_BUG_ON(address & ~HPAGE_PMD_MASK);
				547	if (pmd_trans_huge(*pmdp)) {
				548	pmd = pmdp_get_and_clear(vma->vm_mm, address, pmdp);
				549	} else {
				550	/*
				551	* khugepaged calls this for normal pmd
				552	*/
				553	pmd = *pmdp;
				554	pmd_clear(pmdp);
				555	/*
				556	* Wait for all pending hash_page to finish. This is needed
				557	* in case of subpage collapse. When we collapse normal pages
				558	* to hugepage, we first clear the pmd, then invalidate all
				559	* the PTE entries. The assumption here is that any low level
				560	* page fault will see a none pmd and take the slow path that
				561	* will wait on mmap_sem. But we could very well be in a
				562	* hash_page with local ptep pointer value. Such a hash page
				563	* can result in adding new HPTE entries for normal subpages.
				564	* That means we could be modifying the page content as we
				565	* copy them to a huge page. So wait for parallel hash_page
				566	* to finish before invalidating HPTE entries. We can do this
				567	* by sending an IPI to all the cpus and executing a dummy
				568	* function there.
				569	*/
				570	kick_all_cpus_sync();
				571	/*
				572	* Now invalidate the hpte entries in the range
				573	* covered by pmd. This make sure we take a
				574	* fault and will find the pmd as none, which will
				575	* result in a major fault which takes mmap_sem and
				576	* hence wait for collapse to complete. Without this
				577	* the __collapse_huge_page_copy can result in copying
				578	* the old content.
				579	*/
				580	flush_tlb_pmd_range(vma->vm_mm, &pmd, address);
				581	}
				582	return pmd;
				583	}
				584
				585	int pmdp_test_and_clear_young(struct vm_area_struct *vma,
				586	unsigned long address, pmd_t *pmdp)
				587	{
				588	return __pmdp_test_and_clear_young(vma->vm_mm, address, pmdp);
				589	}
				590
				591	/*
				592	* We currently remove entries from the hashtable regardless of whether
				593	* the entry was young or dirty. The generic routines only flush if the
				594	* entry was young or dirty which is not good enough.
				595	*
				596	* We should be more intelligent about this but for the moment we override
				597	* these functions and force a tlb flush unconditionally
				598	*/
				599	int pmdp_clear_flush_young(struct vm_area_struct *vma,
				600	unsigned long address, pmd_t *pmdp)
				601	{
				602	return __pmdp_test_and_clear_young(vma->vm_mm, address, pmdp);
				603	}
				604
				605	/*
				606	* We mark the pmd splitting and invalidate all the hpte
				607	* entries for this hugepage.
				608	*/
				609	void pmdp_splitting_flush(struct vm_area_struct *vma,
				610	unsigned long address, pmd_t *pmdp)
				611	{
				612	unsigned long old, tmp;
				613
				614	VM_BUG_ON(address & ~HPAGE_PMD_MASK);
				615
				616	#ifdef CONFIG_DEBUG_VM
				617	WARN_ON(!pmd_trans_huge(*pmdp));
				618	assert_spin_locked(&vma->vm_mm->page_table_lock);
				619	#endif
				620
				621	#ifdef PTE_ATOMIC_UPDATES
				622
				623	__asm__ __volatile__(
				624	"1: ldarx %0,0,%3\n\
				625	andi. %1,%0,%6\n\
				626	bne- 1b \n\
				627	ori %1,%0,%4 \n\
				628	stdcx. %1,0,%3 \n\
				629	bne- 1b"
				630	: "=&r" (old), "=&r" (tmp), "=m" (*pmdp)
				631	: "r" (pmdp), "i" (_PAGE_SPLITTING), "m" (*pmdp), "i" (_PAGE_BUSY)
				632	: "cc" );
				633	#else
				634	old = pmd_val(*pmdp);
				635	*pmdp = __pmd(old \| _PAGE_SPLITTING);
				636	#endif
				637	/*
				638	* If we didn't had the splitting flag set, go and flush the
				639	* HPTE entries.
				640	*/
Aneesh Kumar K.V	9e81330	2014-08-13 12:32:04 +0530	[diff] [blame]	641	trace_hugepage_splitting(address, old);
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	642	if (!(old & _PAGE_SPLITTING)) {
				643	/* We need to flush the hpte */
				644	if (old & _PAGE_HASHPTE)
Aneesh Kumar K.V	fc04795	2014-08-13 12:32:00 +0530	[diff] [blame]	645	hpte_do_hugepage_flush(vma->vm_mm, address, pmdp, old);
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	646	}
Aneesh Kumar K.V	346519a	2014-03-15 16:17:58 +0530	[diff] [blame]	647	/*
				648	* This ensures that generic code that rely on IRQ disabling
				649	* to prevent a parallel THP split work as expected.
				650	*/
				651	kick_all_cpus_sync();
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	652	}
				653
				654	/*
				655	* We want to put the pgtable in pmd and use pgtable for tracking
				656	* the base page size hptes
				657	*/
				658	void pgtable_trans_huge_deposit(struct mm_struct mm, pmd_t pmdp,
				659	pgtable_t pgtable)
				660	{
				661	pgtable_t *pgtable_slot;
				662	assert_spin_locked(&mm->page_table_lock);
				663	/*
				664	* we store the pgtable in the second half of PMD
				665	*/
				666	pgtable_slot = (pgtable_t *)pmdp + PTRS_PER_PMD;
				667	*pgtable_slot = pgtable;
				668	/*
				669	* expose the deposited pgtable to other cpus.
				670	* before we set the hugepage PTE at pmd level
				671	* hash fault code looks at the deposted pgtable
				672	* to store hash index values.
				673	*/
				674	smp_wmb();
				675	}
				676
				677	pgtable_t pgtable_trans_huge_withdraw(struct mm_struct mm, pmd_t pmdp)
				678	{
				679	pgtable_t pgtable;
				680	pgtable_t *pgtable_slot;
				681
				682	assert_spin_locked(&mm->page_table_lock);
				683	pgtable_slot = (pgtable_t *)pmdp + PTRS_PER_PMD;
				684	pgtable = *pgtable_slot;
				685	/*
				686	* Once we withdraw, mark the entry NULL.
				687	*/
				688	*pgtable_slot = NULL;
				689	/*
				690	* We store HPTE information in the deposited PTE fragment.
				691	* zero out the content on withdraw.
				692	*/
				693	memset(pgtable, 0, PTE_FRAG_SIZE);
				694	return pgtable;
				695	}
				696
				697	/*
				698	* set a new huge pmd. We should not be called for updating
				699	* an existing pmd entry. That should go via pmd_hugepage_update.
				700	*/
				701	void set_pmd_at(struct mm_struct *mm, unsigned long addr,
				702	pmd_t *pmdp, pmd_t pmd)
				703	{
				704	#ifdef CONFIG_DEBUG_VM
Aneesh Kumar K.V	8937ba4	2013-11-18 14:58:12 +0530	[diff] [blame]	705	WARN_ON(pmd_val(*pmdp) & _PAGE_PRESENT);
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	706	assert_spin_locked(&mm->page_table_lock);
				707	WARN_ON(!pmd_trans_huge(pmd));
				708	#endif
Aneesh Kumar K.V	9e81330	2014-08-13 12:32:04 +0530	[diff] [blame]	709	trace_hugepage_set_pmd(addr, pmd);
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	710	return set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd));
				711	}
				712
				713	void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
				714	pmd_t *pmdp)
				715	{
Aneesh Kumar K.V	88247e8	2014-02-12 09:13:36 +0530	[diff] [blame]	716	pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_PRESENT, 0);
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	717	}
				718
				719	/*
				720	* A linux hugepage PMD was changed and the corresponding hash table entries
				721	* neesd to be flushed.
				722	*/
				723	void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr,
Aneesh Kumar K.V	fc04795	2014-08-13 12:32:00 +0530	[diff] [blame]	724	pmd_t *pmdp, unsigned long old_pmd)
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	725	{
				726	int ssize, i;
				727	unsigned long s_addr;
Aneesh Kumar K.V	1a52728	2013-06-20 14:30:27 +0530	[diff] [blame]	728	int max_hpte_count;
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	729	unsigned int psize, valid;
				730	unsigned char *hpte_slot_array;
				731	unsigned long hidx, vpn, vsid, hash, shift, slot;
				732
				733	/*
				734	* Flush all the hptes mapping this hugepage
				735	*/
				736	s_addr = addr & HPAGE_PMD_MASK;
				737	hpte_slot_array = get_hpte_slot_array(pmdp);
				738	/*
				739	* IF we try to do a HUGE PTE update after a withdraw is done.
				740	* we will find the below NULL. This happens when we do
				741	* split_huge_page_pmd
				742	*/
				743	if (!hpte_slot_array)
				744	return;
				745
Aneesh Kumar K.V	fa1f8ae	2014-08-13 12:31:58 +0530	[diff] [blame]	746	/* get the base page size,vsid and segment size */
Aneesh Kumar K.V	fc04795	2014-08-13 12:32:00 +0530	[diff] [blame]	747	#ifdef CONFIG_DEBUG_VM
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	748	psize = get_slice_psize(mm, s_addr);
Aneesh Kumar K.V	fc04795	2014-08-13 12:32:00 +0530	[diff] [blame]	749	BUG_ON(psize == MMU_PAGE_16M);
				750	#endif
				751	if (old_pmd & _PAGE_COMBO)
				752	psize = MMU_PAGE_4K;
				753	else
				754	psize = MMU_PAGE_64K;
				755
Aneesh Kumar K.V	fa1f8ae	2014-08-13 12:31:58 +0530	[diff] [blame]	756	if (!is_kernel_addr(s_addr)) {
				757	ssize = user_segment_size(s_addr);
				758	vsid = get_vsid(mm->context.id, s_addr, ssize);
				759	WARN_ON(vsid == 0);
				760	} else {
				761	vsid = get_kernel_vsid(s_addr, mmu_kernel_ssize);
				762	ssize = mmu_kernel_ssize;
				763	}
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	764
Aneesh Kumar K.V	1a52728	2013-06-20 14:30:27 +0530	[diff] [blame]	765	if (ppc_md.hugepage_invalidate)
Aneesh Kumar K.V	fa1f8ae	2014-08-13 12:31:58 +0530	[diff] [blame]	766	return ppc_md.hugepage_invalidate(vsid, s_addr,
				767	hpte_slot_array,
				768	psize, ssize);
Aneesh Kumar K.V	1a52728	2013-06-20 14:30:27 +0530	[diff] [blame]	769	/*
				770	* No bluk hpte removal support, invalidate each entry
				771	*/
				772	shift = mmu_psize_defs[psize].shift;
				773	max_hpte_count = HPAGE_PMD_SIZE >> shift;
				774	for (i = 0; i < max_hpte_count; i++) {
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	775	/*
				776	* 8 bits per each hpte entries
				777	* 000\| [ secondary group (one bit) \| hidx (3 bits) \| valid bit]
				778	*/
				779	valid = hpte_valid(hpte_slot_array, i);
				780	if (!valid)
				781	continue;
				782	hidx = hpte_hash_index(hpte_slot_array, i);
				783
				784	/* get the vpn */
				785	addr = s_addr + (i * (1ul << shift));
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	786	vpn = hpt_vpn(addr, vsid, ssize);
				787	hash = hpt_hash(vpn, shift, ssize);
				788	if (hidx & _PTEIDX_SECONDARY)
				789	hash = ~hash;
				790
				791	slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
				792	slot += hidx & _PTEIDX_GROUP_IX;
				793	ppc_md.hpte_invalidate(slot, vpn, psize,
				794	MMU_PAGE_16M, ssize, 0);
				795	}
				796	}
				797
				798	static pmd_t pmd_set_protbits(pmd_t pmd, pgprot_t pgprot)
				799	{
				800	pmd_val(pmd) \|= pgprot_val(pgprot);
				801	return pmd;
				802	}
				803
				804	pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot)
				805	{
				806	pmd_t pmd;
				807	/*
				808	* For a valid pte, we would have _PAGE_PRESENT or _PAGE_FILE always
				809	* set. We use this to check THP page at pmd level.
				810	* leaf pte for huge page, bottom two bits != 00
				811	*/
				812	pmd_val(pmd) = pfn << PTE_RPN_SHIFT;
				813	pmd_val(pmd) \|= _PAGE_THP_HUGE;
				814	pmd = pmd_set_protbits(pmd, pgprot);
				815	return pmd;
				816	}
				817
				818	pmd_t mk_pmd(struct page *page, pgprot_t pgprot)
				819	{
				820	return pfn_pmd(page_to_pfn(page), pgprot);
				821	}
				822
				823	pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
				824	{
				825
				826	pmd_val(pmd) &= _HPAGE_CHG_MASK;
				827	pmd = pmd_set_protbits(pmd, newprot);
				828	return pmd;
				829	}
				830
				831	/*
				832	* This is called at the end of handling a user page fault, when the
				833	* fault has been handled by updating a HUGE PMD entry in the linux page tables.
				834	* We use it to preload an HPTE into the hash table corresponding to
				835	* the updated linux HUGE PMD entry.
				836	*/
				837	void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
				838	pmd_t *pmd)
				839	{
				840	return;
				841	}
				842
				843	pmd_t pmdp_get_and_clear(struct mm_struct *mm,
				844	unsigned long addr, pmd_t *pmdp)
				845	{
				846	pmd_t old_pmd;
				847	pgtable_t pgtable;
				848	unsigned long old;
				849	pgtable_t *pgtable_slot;
				850
Aneesh Kumar K.V	88247e8	2014-02-12 09:13:36 +0530	[diff] [blame]	851	old = pmd_hugepage_update(mm, addr, pmdp, ~0UL, 0);
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	852	old_pmd = __pmd(old);
				853	/*
				854	* We have pmd == none and we are holding page_table_lock.
				855	* So we can safely go and clear the pgtable hash
				856	* index info.
				857	*/
				858	pgtable_slot = (pgtable_t *)pmdp + PTRS_PER_PMD;
				859	pgtable = *pgtable_slot;
				860	/*
				861	* Let's zero out old valid and hash index details
				862	* hash fault look at them.
				863	*/
				864	memset(pgtable, 0, PTE_FRAG_SIZE);
				865	return old_pmd;
				866	}
Aneesh Kumar K.V	437d496	2013-06-20 14:30:26 +0530	[diff] [blame]	867
				868	int has_transparent_hugepage(void)
				869	{
				870	if (!mmu_has_feature(MMU_FTR_16M_PAGE))
				871	return 0;
				872	/*
				873	* We support THP only if PMD_SIZE is 16MB.
				874	*/
				875	if (mmu_psize_defs[MMU_PAGE_16M].shift != PMD_SHIFT)
				876	return 0;
				877	/*
				878	* We need to make sure that we support 16MB hugepage in a segement
				879	* with base page size 64K or 4K. We only enable THP with a PAGE_SIZE
				880	* of 64K.
				881	*/
				882	/*
				883	* If we have 64K HPTE, we will be using that by default
				884	*/
				885	if (mmu_psize_defs[MMU_PAGE_64K].shift &&
				886	(mmu_psize_defs[MMU_PAGE_64K].penc[MMU_PAGE_16M] == -1))
				887	return 0;
				888	/*
				889	* Ok we only have 4K HPTE
				890	*/
				891	if (mmu_psize_defs[MMU_PAGE_4K].penc[MMU_PAGE_16M] == -1)
				892	return 0;
				893
				894	return 1;
				895	}
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	896	#endif /* CONFIG_TRANSPARENT_HUGEPAGE */