Blame - arch/powerpc/mm/pgtable_64.c - SHIFTPHONES/mainline/linux

blob: 95df08e735593df75b1a80379a1bc424969643f9 [file] [log] [blame]

Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	1	/*
				2	* This file contains ioremap and related functions for 64-bit machines.
				3	*
				4	* Derived from arch/ppc64/mm/init.c
				5	* Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
				6	*
				7	* Modifications by Paul Mackerras (PowerMac) (paulus@samba.org)
				8	* and Cort Dougan (PReP) (cort@cs.nmt.edu)
				9	* Copyright (C) 1996 Paul Mackerras
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	10	*
				11	* Derived from "arch/i386/mm/init.c"
				12	* Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
				13	*
				14	* Dave Engebretsen <engebret@us.ibm.com>
				15	* Rework for PPC64 port.
				16	*
				17	* This program is free software; you can redistribute it and/or
				18	* modify it under the terms of the GNU General Public License
				19	* as published by the Free Software Foundation; either version
				20	* 2 of the License, or (at your option) any later version.
				21	*
				22	*/
				23
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	24	#include <linux/signal.h>
				25	#include <linux/sched.h>
				26	#include <linux/kernel.h>
				27	#include <linux/errno.h>
				28	#include <linux/string.h>
Paul Gortmaker	66b15db	2011-05-27 10:46:24 -0400	[diff] [blame]	29	#include <linux/export.h>
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	30	#include <linux/types.h>
				31	#include <linux/mman.h>
				32	#include <linux/mm.h>
				33	#include <linux/swap.h>
				34	#include <linux/stddef.h>
				35	#include <linux/vmalloc.h>
Yinghai Lu	95f72d1	2010-07-12 14:36:09 +1000	[diff] [blame]	36	#include <linux/memblock.h>
Tejun Heo	5a0e3ad	2010-03-24 17:04:11 +0900	[diff] [blame]	37	#include <linux/slab.h>
Aneesh Kumar K.V	0674352	2014-11-05 21:57:39 +0530	[diff] [blame]	38	#include <linux/hugetlb.h>
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	39
				40	#include <asm/pgalloc.h>
				41	#include <asm/page.h>
				42	#include <asm/prom.h>
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	43	#include <asm/io.h>
				44	#include <asm/mmu_context.h>
				45	#include <asm/pgtable.h>
				46	#include <asm/mmu.h>
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	47	#include <asm/smp.h>
				48	#include <asm/machdep.h>
				49	#include <asm/tlb.h>
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	50	#include <asm/processor.h>
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	51	#include <asm/cputable.h>
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	52	#include <asm/sections.h>
Stephen Rothwell	5e203d6	2006-09-25 13:36:31 +1000	[diff] [blame]	53	#include <asm/firmware.h>
Anton Blanchard	68cf0d6	2014-09-17 22:15:35 +1000	[diff] [blame]	54	#include <asm/dma.h>
David Gibson	800fc3e	2005-11-16 15:43:48 +1100	[diff] [blame]	55
				56	#include "mmu_decl.h"
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	57
Aneesh Kumar K.V	9e81330	2014-08-13 12:32:04 +0530	[diff] [blame]	58	#define CREATE_TRACE_POINTS
				59	#include <trace/events/thp.h>
				60
Aneesh Kumar K.V	78f1dbd	2012-09-10 02:52:57 +0000	[diff] [blame]	61	#ifdef CONFIG_PPC_STD_MMU_64
Aneesh Kumar K.V	af81d78	2013-03-13 03:34:55 +0000	[diff] [blame]	62	#if TASK_SIZE_USER64 > (1UL << (ESID_BITS + SID_SHIFT))
Aneesh Kumar K.V	78f1dbd	2012-09-10 02:52:57 +0000	[diff] [blame]	63	#error TASK_SIZE_USER64 exceeds user VSID range
				64	#endif
				65	#endif
				66
Aneesh Kumar K.V	50de596	2016-04-29 23:25:43 +1000	[diff] [blame]	67	#ifdef CONFIG_PPC_BOOK3S_64
				68	/*
				69	* partition table and process table for ISA 3.0
				70	*/
				71	struct prtb_entry *process_tb;
				72	struct patb_entry *partition_tb;
Aneesh Kumar K.V	dd1842a	2016-04-29 23:25:49 +1000	[diff] [blame]	73	/*
				74	* page table size
				75	*/
				76	unsigned long __pte_index_size;
				77	EXPORT_SYMBOL(__pte_index_size);
				78	unsigned long __pmd_index_size;
				79	EXPORT_SYMBOL(__pmd_index_size);
				80	unsigned long __pud_index_size;
				81	EXPORT_SYMBOL(__pud_index_size);
				82	unsigned long __pgd_index_size;
				83	EXPORT_SYMBOL(__pgd_index_size);
				84	unsigned long __pmd_cache_index;
				85	EXPORT_SYMBOL(__pmd_cache_index);
				86	unsigned long __pte_table_size;
				87	EXPORT_SYMBOL(__pte_table_size);
				88	unsigned long __pmd_table_size;
				89	EXPORT_SYMBOL(__pmd_table_size);
				90	unsigned long __pud_table_size;
				91	EXPORT_SYMBOL(__pud_table_size);
				92	unsigned long __pgd_table_size;
				93	EXPORT_SYMBOL(__pgd_table_size);
Aneesh Kumar K.V	a2f41eb	2016-04-29 23:26:19 +1000	[diff] [blame]	94	unsigned long __pmd_val_bits;
				95	EXPORT_SYMBOL(__pmd_val_bits);
				96	unsigned long __pud_val_bits;
				97	EXPORT_SYMBOL(__pud_val_bits);
				98	unsigned long __pgd_val_bits;
				99	EXPORT_SYMBOL(__pgd_val_bits);
Aneesh Kumar K.V	d6a9996	2016-04-29 23:26:21 +1000	[diff] [blame]	100	unsigned long __kernel_virt_start;
				101	EXPORT_SYMBOL(__kernel_virt_start);
				102	unsigned long __kernel_virt_size;
				103	EXPORT_SYMBOL(__kernel_virt_size);
				104	unsigned long __vmalloc_start;
				105	EXPORT_SYMBOL(__vmalloc_start);
				106	unsigned long __vmalloc_end;
				107	EXPORT_SYMBOL(__vmalloc_end);
				108	struct page *vmemmap;
				109	EXPORT_SYMBOL(vmemmap);
Aneesh Kumar K.V	5ed7ecd	2016-04-29 23:26:23 +1000	[diff] [blame^]	110	unsigned long __pte_frag_nr;
				111	EXPORT_SYMBOL(__pte_frag_nr);
				112	unsigned long __pte_frag_size_shift;
				113	EXPORT_SYMBOL(__pte_frag_size_shift);
Aneesh Kumar K.V	d6a9996	2016-04-29 23:26:21 +1000	[diff] [blame]	114	unsigned long ioremap_bot;
				115	#else /* !CONFIG_PPC_BOOK3S_64 */
Aneesh Kumar K.V	78f1dbd	2012-09-10 02:52:57 +0000	[diff] [blame]	116	unsigned long ioremap_bot = IOREMAP_BASE;
Aneesh Kumar K.V	d6a9996	2016-04-29 23:26:21 +1000	[diff] [blame]	117	#endif
Benjamin Herrenschmidt	a245067	2009-07-23 23:15:16 +0000	[diff] [blame]	118
Benjamin Herrenschmidt	3d5134e	2007-06-04 15:15:36 +1000	[diff] [blame]	119	/**
				120	* __ioremap_at - Low level function to establish the page tables
				121	* for an IO mapping
				122	*/
				123	void __iomem * __ioremap_at(phys_addr_t pa, void *ea, unsigned long size,
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	124	unsigned long flags)
				125	{
				126	unsigned long i;
				127
Benjamin Herrenschmidt	a1f242f	2008-07-23 21:27:08 -0700	[diff] [blame]	128	/* Make sure we have the base flags */
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	129	if ((flags & _PAGE_PRESENT) == 0)
				130	flags \|= pgprot_val(PAGE_KERNEL);
				131
Benjamin Herrenschmidt	a1f242f	2008-07-23 21:27:08 -0700	[diff] [blame]	132	/* We don't support the 4K PFN hack with ioremap */
Aneesh Kumar K.V	945537d	2016-04-29 23:25:45 +1000	[diff] [blame]	133	if (flags & H_PAGE_4K_PFN)
Benjamin Herrenschmidt	a1f242f	2008-07-23 21:27:08 -0700	[diff] [blame]	134	return NULL;
				135
Benjamin Herrenschmidt	3d5134e	2007-06-04 15:15:36 +1000	[diff] [blame]	136	WARN_ON(pa & ~PAGE_MASK);
				137	WARN_ON(((unsigned long)ea) & ~PAGE_MASK);
				138	WARN_ON(size & ~PAGE_MASK);
				139
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	140	for (i = 0; i < size; i += PAGE_SIZE)
Benjamin Herrenschmidt	a245067	2009-07-23 23:15:16 +0000	[diff] [blame]	141	if (map_kernel_page((unsigned long)ea+i, pa+i, flags))
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	142	return NULL;
				143
Benjamin Herrenschmidt	3d5134e	2007-06-04 15:15:36 +1000	[diff] [blame]	144	return (void __iomem *)ea;
				145	}
				146
				147	/**
				148	* __iounmap_from - Low level function to tear down the page tables
				149	* for an IO mapping. This is used for mappings that
				150	* are manipulated manually, like partial unmapping of
				151	* PCI IOs or ISA space.
				152	*/
				153	void __iounmap_at(void *ea, unsigned long size)
				154	{
				155	WARN_ON(((unsigned long)ea) & ~PAGE_MASK);
				156	WARN_ON(size & ~PAGE_MASK);
				157
				158	unmap_kernel_range((unsigned long)ea, size);
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	159	}
				160
Benjamin Herrenschmidt	1cdab55	2009-02-22 16:19:14 +0000	[diff] [blame]	161	void __iomem * __ioremap_caller(phys_addr_t addr, unsigned long size,
				162	unsigned long flags, void *caller)
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	163	{
Benjamin Herrenschmidt	3d5134e	2007-06-04 15:15:36 +1000	[diff] [blame]	164	phys_addr_t paligned;
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	165	void __iomem *ret;
				166
				167	/*
				168	* Choose an address to map it to.
				169	* Once the imalloc system is running, we use it.
				170	* Before that, we map using addresses going
				171	* up from ioremap_bot. imalloc will use
				172	* the addresses from ioremap_bot through
				173	* IMALLOC_END
				174	*
				175	*/
Benjamin Herrenschmidt	3d5134e	2007-06-04 15:15:36 +1000	[diff] [blame]	176	paligned = addr & PAGE_MASK;
				177	size = PAGE_ALIGN(addr + size) - paligned;
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	178
Benjamin Herrenschmidt	3d5134e	2007-06-04 15:15:36 +1000	[diff] [blame]	179	if ((size == 0) \|\| (paligned == 0))
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	180	return NULL;
				181
Michael Ellerman	f691fa1	2015-03-30 14:10:37 +1100	[diff] [blame]	182	if (slab_is_available()) {
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	183	struct vm_struct *area;
Benjamin Herrenschmidt	3d5134e	2007-06-04 15:15:36 +1000	[diff] [blame]	184
Benjamin Herrenschmidt	1cdab55	2009-02-22 16:19:14 +0000	[diff] [blame]	185	area = __get_vm_area_caller(size, VM_IOREMAP,
				186	ioremap_bot, IOREMAP_END,
				187	caller);
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	188	if (area == NULL)
				189	return NULL;
Michael Ellerman	7a9d125	2010-11-28 18:26:36 +0000	[diff] [blame]	190
				191	area->phys_addr = paligned;
Benjamin Herrenschmidt	3d5134e	2007-06-04 15:15:36 +1000	[diff] [blame]	192	ret = __ioremap_at(paligned, area->addr, size, flags);
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	193	if (!ret)
Benjamin Herrenschmidt	3d5134e	2007-06-04 15:15:36 +1000	[diff] [blame]	194	vunmap(area->addr);
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	195	} else {
Benjamin Herrenschmidt	3d5134e	2007-06-04 15:15:36 +1000	[diff] [blame]	196	ret = __ioremap_at(paligned, (void *)ioremap_bot, size, flags);
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	197	if (ret)
				198	ioremap_bot += size;
				199	}
Benjamin Herrenschmidt	3d5134e	2007-06-04 15:15:36 +1000	[diff] [blame]	200
				201	if (ret)
				202	ret += addr & ~PAGE_MASK;
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	203	return ret;
				204	}
				205
Benjamin Herrenschmidt	1cdab55	2009-02-22 16:19:14 +0000	[diff] [blame]	206	void __iomem * __ioremap(phys_addr_t addr, unsigned long size,
				207	unsigned long flags)
				208	{
				209	return __ioremap_caller(addr, size, flags, __builtin_return_address(0));
				210	}
Benjamin Herrenschmidt	4cb3cee	2006-11-11 17:25:10 +1100	[diff] [blame]	211
Benjamin Herrenschmidt	68a6435	2006-11-13 09:27:39 +1100	[diff] [blame]	212	void __iomem * ioremap(phys_addr_t addr, unsigned long size)
Benjamin Herrenschmidt	4cb3cee	2006-11-11 17:25:10 +1100	[diff] [blame]	213	{
Aneesh Kumar K.V	72176dd	2016-04-29 23:25:37 +1000	[diff] [blame]	214	unsigned long flags = pgprot_val(pgprot_noncached(__pgprot(0)));
Benjamin Herrenschmidt	1cdab55	2009-02-22 16:19:14 +0000	[diff] [blame]	215	void *caller = __builtin_return_address(0);
Benjamin Herrenschmidt	4cb3cee	2006-11-11 17:25:10 +1100	[diff] [blame]	216
				217	if (ppc_md.ioremap)
Benjamin Herrenschmidt	1cdab55	2009-02-22 16:19:14 +0000	[diff] [blame]	218	return ppc_md.ioremap(addr, size, flags, caller);
				219	return __ioremap_caller(addr, size, flags, caller);
Benjamin Herrenschmidt	4cb3cee	2006-11-11 17:25:10 +1100	[diff] [blame]	220	}
				221
Anton Blanchard	be135f4	2011-05-08 21:41:59 +0000	[diff] [blame]	222	void __iomem * ioremap_wc(phys_addr_t addr, unsigned long size)
				223	{
Aneesh Kumar K.V	72176dd	2016-04-29 23:25:37 +1000	[diff] [blame]	224	unsigned long flags = pgprot_val(pgprot_noncached_wc(__pgprot(0)));
Anton Blanchard	be135f4	2011-05-08 21:41:59 +0000	[diff] [blame]	225	void *caller = __builtin_return_address(0);
				226
				227	if (ppc_md.ioremap)
				228	return ppc_md.ioremap(addr, size, flags, caller);
				229	return __ioremap_caller(addr, size, flags, caller);
				230	}
				231
Anton Blanchard	40f1ce7	2011-05-08 21:43:47 +0000	[diff] [blame]	232	void __iomem * ioremap_prot(phys_addr_t addr, unsigned long size,
Benjamin Herrenschmidt	4cb3cee	2006-11-11 17:25:10 +1100	[diff] [blame]	233	unsigned long flags)
				234	{
Benjamin Herrenschmidt	1cdab55	2009-02-22 16:19:14 +0000	[diff] [blame]	235	void *caller = __builtin_return_address(0);
				236
Benjamin Herrenschmidt	a1f242f	2008-07-23 21:27:08 -0700	[diff] [blame]	237	/* writeable implies dirty for kernel addresses */
Aneesh Kumar K.V	c7d5484	2016-04-29 23:25:30 +1000	[diff] [blame]	238	if (flags & _PAGE_WRITE)
Benjamin Herrenschmidt	a1f242f	2008-07-23 21:27:08 -0700	[diff] [blame]	239	flags \|= _PAGE_DIRTY;
				240
Aneesh Kumar K.V	ac29c64	2016-04-29 23:25:34 +1000	[diff] [blame]	241	/* we don't want to let _PAGE_EXEC leak out */
				242	flags &= ~_PAGE_EXEC;
				243	/*
				244	* Force kernel mapping.
				245	*/
				246	#if defined(CONFIG_PPC_BOOK3S_64)
				247	flags \|= _PAGE_PRIVILEGED;
				248	#else
				249	flags &= ~_PAGE_USER;
				250	#endif
				251
Benjamin Herrenschmidt	a1f242f	2008-07-23 21:27:08 -0700	[diff] [blame]	252
Benjamin Herrenschmidt	55052ee	2010-04-07 14:39:36 +1000	[diff] [blame]	253	#ifdef _PAGE_BAP_SR
				254	/* _PAGE_USER contains _PAGE_BAP_SR on BookE using the new PTE format
				255	* which means that we just cleared supervisor access... oops ;-) This
				256	* restores it
				257	*/
				258	flags \|= _PAGE_BAP_SR;
				259	#endif
				260
Benjamin Herrenschmidt	4cb3cee	2006-11-11 17:25:10 +1100	[diff] [blame]	261	if (ppc_md.ioremap)
Benjamin Herrenschmidt	1cdab55	2009-02-22 16:19:14 +0000	[diff] [blame]	262	return ppc_md.ioremap(addr, size, flags, caller);
				263	return __ioremap_caller(addr, size, flags, caller);
Benjamin Herrenschmidt	4cb3cee	2006-11-11 17:25:10 +1100	[diff] [blame]	264	}
				265
				266
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	267	/*
				268	* Unmap an IO region and remove it from imalloc'd list.
				269	* Access to IO memory should be serialized by driver.
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	270	*/
Benjamin Herrenschmidt	68a6435	2006-11-13 09:27:39 +1100	[diff] [blame]	271	void __iounmap(volatile void __iomem *token)
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	272	{
				273	void *addr;
				274
Michael Ellerman	f691fa1	2015-03-30 14:10:37 +1100	[diff] [blame]	275	if (!slab_is_available())
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	276	return;
				277
Benjamin Herrenschmidt	3d5134e	2007-06-04 15:15:36 +1000	[diff] [blame]	278	addr = (void *) ((unsigned long __force)
				279	PCI_FIX_ADDR(token) & PAGE_MASK);
				280	if ((unsigned long)addr < ioremap_bot) {
				281	printk(KERN_WARNING "Attempt to iounmap early bolted mapping"
				282	" at 0x%p\n", addr);
				283	return;
				284	}
				285	vunmap(addr);
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	286	}
				287
Benjamin Herrenschmidt	68a6435	2006-11-13 09:27:39 +1100	[diff] [blame]	288	void iounmap(volatile void __iomem *token)
Benjamin Herrenschmidt	4cb3cee	2006-11-11 17:25:10 +1100	[diff] [blame]	289	{
				290	if (ppc_md.iounmap)
				291	ppc_md.iounmap(token);
				292	else
				293	__iounmap(token);
				294	}
				295
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	296	EXPORT_SYMBOL(ioremap);
Anton Blanchard	be135f4	2011-05-08 21:41:59 +0000	[diff] [blame]	297	EXPORT_SYMBOL(ioremap_wc);
Anton Blanchard	40f1ce7	2011-05-08 21:43:47 +0000	[diff] [blame]	298	EXPORT_SYMBOL(ioremap_prot);
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	299	EXPORT_SYMBOL(__ioremap);
Olof Johansson	a302cb9	2007-08-31 13:58:51 +1000	[diff] [blame]	300	EXPORT_SYMBOL(__ioremap_at);
Paul Mackerras	14cf11a	2005-09-26 16:04:21 +1000	[diff] [blame]	301	EXPORT_SYMBOL(iounmap);
Benjamin Herrenschmidt	4cb3cee	2006-11-11 17:25:10 +1100	[diff] [blame]	302	EXPORT_SYMBOL(__iounmap);
Olof Johansson	a302cb9	2007-08-31 13:58:51 +1000	[diff] [blame]	303	EXPORT_SYMBOL(__iounmap_at);
Aneesh Kumar K.V	5c1f6ee	2013-04-28 09:37:33 +0000	[diff] [blame]	304
Aneesh Kumar K.V	0674352	2014-11-05 21:57:39 +0530	[diff] [blame]	305	#ifndef __PAGETABLE_PUD_FOLDED
				306	/* 4 level page table */
				307	struct page *pgd_page(pgd_t pgd)
				308	{
				309	if (pgd_huge(pgd))
				310	return pte_page(pgd_pte(pgd));
				311	return virt_to_page(pgd_page_vaddr(pgd));
				312	}
				313	#endif
				314
				315	struct page *pud_page(pud_t pud)
				316	{
				317	if (pud_huge(pud))
				318	return pte_page(pud_pte(pud));
				319	return virt_to_page(pud_page_vaddr(pud));
				320	}
				321
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	322	/*
				323	* For hugepage we have pfn in the pmd, we use PTE_RPN_SHIFT bits for flags
				324	* For PTE page, we have a PTE_FRAG_SIZE (4K) aligned virtual address.
				325	*/
				326	struct page *pmd_page(pmd_t pmd)
				327	{
Aneesh Kumar K.V	0674352	2014-11-05 21:57:39 +0530	[diff] [blame]	328	if (pmd_trans_huge(pmd) \|\| pmd_huge(pmd))
Aneesh Kumar K.V	e34aa03	2015-12-01 09:06:53 +0530	[diff] [blame]	329	return pte_page(pmd_pte(pmd));
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	330	return virt_to_page(pmd_page_vaddr(pmd));
				331	}
				332
Aneesh Kumar K.V	5c1f6ee	2013-04-28 09:37:33 +0000	[diff] [blame]	333	#ifdef CONFIG_PPC_64K_PAGES
				334	static pte_t get_from_cache(struct mm_struct mm)
				335	{
				336	void pte_frag, ret;
				337
				338	spin_lock(&mm->page_table_lock);
				339	ret = mm->context.pte_frag;
				340	if (ret) {
				341	pte_frag = ret + PTE_FRAG_SIZE;
				342	/*
				343	* If we have taken up all the fragments mark PTE page NULL
				344	*/
				345	if (((unsigned long)pte_frag & ~PAGE_MASK) == 0)
				346	pte_frag = NULL;
				347	mm->context.pte_frag = pte_frag;
				348	}
				349	spin_unlock(&mm->page_table_lock);
				350	return (pte_t *)ret;
				351	}
				352
				353	static pte_t __alloc_for_cache(struct mm_struct mm, int kernel)
				354	{
				355	void *ret = NULL;
				356	struct page *page = alloc_page(GFP_KERNEL \| __GFP_NOTRACK \|
				357	__GFP_REPEAT \| __GFP_ZERO);
				358	if (!page)
				359	return NULL;
Kirill A. Shutemov	4f804943	2013-11-14 14:31:38 -0800	[diff] [blame]	360	if (!kernel && !pgtable_page_ctor(page)) {
				361	__free_page(page);
				362	return NULL;
				363	}
Aneesh Kumar K.V	5c1f6ee	2013-04-28 09:37:33 +0000	[diff] [blame]	364
				365	ret = page_address(page);
				366	spin_lock(&mm->page_table_lock);
				367	/*
				368	* If we find pgtable_page set, we return
				369	* the allocated page with single fragement
				370	* count.
				371	*/
				372	if (likely(!mm->context.pte_frag)) {
Joonsoo Kim	fe896d1	2016-03-17 14:19:26 -0700	[diff] [blame]	373	set_page_count(page, PTE_FRAG_NR);
Aneesh Kumar K.V	5c1f6ee	2013-04-28 09:37:33 +0000	[diff] [blame]	374	mm->context.pte_frag = ret + PTE_FRAG_SIZE;
				375	}
				376	spin_unlock(&mm->page_table_lock);
				377
Aneesh Kumar K.V	5c1f6ee	2013-04-28 09:37:33 +0000	[diff] [blame]	378	return (pte_t *)ret;
				379	}
				380
Aneesh Kumar K.V	74701d5	2016-04-29 23:26:17 +1000	[diff] [blame]	381	pte_t pte_fragment_alloc(struct mm_struct mm, unsigned long vmaddr, int kernel)
Aneesh Kumar K.V	5c1f6ee	2013-04-28 09:37:33 +0000	[diff] [blame]	382	{
				383	pte_t *pte;
				384
				385	pte = get_from_cache(mm);
				386	if (pte)
				387	return pte;
				388
				389	return __alloc_for_cache(mm, kernel);
				390	}
Aneesh Kumar K.V	934828e	2016-04-29 23:26:18 +1000	[diff] [blame]	391	#endif /* CONFIG_PPC_64K_PAGES */
Aneesh Kumar K.V	5c1f6ee	2013-04-28 09:37:33 +0000	[diff] [blame]	392
Aneesh Kumar K.V	74701d5	2016-04-29 23:26:17 +1000	[diff] [blame]	393	void pte_fragment_free(unsigned long *table, int kernel)
Aneesh Kumar K.V	5c1f6ee	2013-04-28 09:37:33 +0000	[diff] [blame]	394	{
				395	struct page *page = virt_to_page(table);
				396	if (put_page_testzero(page)) {
				397	if (!kernel)
				398	pgtable_page_dtor(page);
				399	free_hot_cold_page(page, 0);
				400	}
				401	}
				402
				403	#ifdef CONFIG_SMP
Aneesh Kumar K.V	5c1f6ee	2013-04-28 09:37:33 +0000	[diff] [blame]	404	void pgtable_free_tlb(struct mmu_gather tlb, void table, int shift)
				405	{
				406	unsigned long pgf = (unsigned long)table;
				407
				408	BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
				409	pgf \|= shift;
				410	tlb_remove_table(tlb, (void *)pgf);
				411	}
				412
				413	void __tlb_remove_table(void *_table)
				414	{
				415	void table = (void )((unsigned long)_table & ~MAX_PGTABLE_INDEX_SIZE);
				416	unsigned shift = (unsigned long)_table & MAX_PGTABLE_INDEX_SIZE;
				417
				418	if (!shift)
				419	/* PTE page needs special handling */
Aneesh Kumar K.V	74701d5	2016-04-29 23:26:17 +1000	[diff] [blame]	420	pte_fragment_free(table, 0);
Aneesh Kumar K.V	5c1f6ee	2013-04-28 09:37:33 +0000	[diff] [blame]	421	else {
				422	BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
				423	kmem_cache_free(PGT_CACHE(shift), table);
				424	}
				425	}
				426	#else
				427	void pgtable_free_tlb(struct mmu_gather tlb, void table, int shift)
				428	{
				429	if (!shift) {
				430	/* PTE page needs special handling */
Aneesh Kumar K.V	74701d5	2016-04-29 23:26:17 +1000	[diff] [blame]	431	pte_fragment_free(table, 0);
Aneesh Kumar K.V	5c1f6ee	2013-04-28 09:37:33 +0000	[diff] [blame]	432	} else {
				433	BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
				434	kmem_cache_free(PGT_CACHE(shift), table);
				435	}
				436	}
				437	#endif
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	438
				439	#ifdef CONFIG_TRANSPARENT_HUGEPAGE
				440
				441	/*
				442	* This is called when relaxing access to a hugepage. It's also called in the page
				443	* fault path when we don't hit any of the major fault cases, ie, a minor
				444	* update of _PAGE_ACCESSED, _PAGE_DIRTY, etc... The generic code will have
				445	* handled those two for us, we additionally deal with missing execute
				446	* permission here on some processors
				447	*/
				448	int pmdp_set_access_flags(struct vm_area_struct *vma, unsigned long address,
				449	pmd_t *pmdp, pmd_t entry, int dirty)
				450	{
				451	int changed;
				452	#ifdef CONFIG_DEBUG_VM
				453	WARN_ON(!pmd_trans_huge(*pmdp));
				454	assert_spin_locked(&vma->vm_mm->page_table_lock);
				455	#endif
				456	changed = !pmd_same(*(pmdp), entry);
				457	if (changed) {
				458	__ptep_set_access_flags(pmdp_ptep(pmdp), pmd_pte(entry));
				459	/*
				460	* Since we are not supporting SW TLB systems, we don't
				461	* have any thing similar to flush_tlb_page_nohash()
				462	*/
				463	}
				464	return changed;
				465	}
				466
				467	unsigned long pmd_hugepage_update(struct mm_struct *mm, unsigned long addr,
Aneesh Kumar K.V	88247e8	2014-02-12 09:13:36 +0530	[diff] [blame]	468	pmd_t *pmdp, unsigned long clr,
				469	unsigned long set)
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	470	{
				471
Aneesh Kumar K.V	5dc1ef8	2016-04-29 23:25:28 +1000	[diff] [blame]	472	__be64 old_be, tmp;
				473	unsigned long old;
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	474
				475	#ifdef CONFIG_DEBUG_VM
				476	WARN_ON(!pmd_trans_huge(*pmdp));
				477	assert_spin_locked(&mm->page_table_lock);
				478	#endif
				479
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	480	__asm__ __volatile__(
				481	"1: ldarx %0,0,%3\n\
Aneesh Kumar K.V	5dc1ef8	2016-04-29 23:25:28 +1000	[diff] [blame]	482	and. %1,%0,%6\n\
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	483	bne- 1b \n\
				484	andc %1,%0,%4 \n\
Aneesh Kumar K.V	88247e8	2014-02-12 09:13:36 +0530	[diff] [blame]	485	or %1,%1,%7\n\
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	486	stdcx. %1,0,%3 \n\
				487	bne- 1b"
Aneesh Kumar K.V	5dc1ef8	2016-04-29 23:25:28 +1000	[diff] [blame]	488	: "=&r" (old_be), "=&r" (tmp), "=m" (*pmdp)
				489	: "r" (pmdp), "r" (cpu_to_be64(clr)), "m" (*pmdp),
Aneesh Kumar K.V	945537d	2016-04-29 23:25:45 +1000	[diff] [blame]	490	"r" (cpu_to_be64(H_PAGE_BUSY)), "r" (cpu_to_be64(set))
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	491	: "cc" );
Aneesh Kumar K.V	4bece39	2016-04-29 23:25:26 +1000	[diff] [blame]	492
Aneesh Kumar K.V	5dc1ef8	2016-04-29 23:25:28 +1000	[diff] [blame]	493	old = be64_to_cpu(old_be);
				494
Aneesh Kumar K.V	9e81330	2014-08-13 12:32:04 +0530	[diff] [blame]	495	trace_hugepage_update(addr, old, clr, set);
Aneesh Kumar K.V	945537d	2016-04-29 23:25:45 +1000	[diff] [blame]	496	if (old & H_PAGE_HASHPTE)
Aneesh Kumar K.V	fc04795	2014-08-13 12:32:00 +0530	[diff] [blame]	497	hpte_do_hugepage_flush(mm, addr, pmdp, old);
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	498	return old;
				499	}
				500
Aneesh Kumar K.V	15a25b2	2015-06-24 16:57:39 -0700	[diff] [blame]	501	pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long address,
				502	pmd_t *pmdp)
				503	{
				504	pmd_t pmd;
				505
				506	VM_BUG_ON(address & ~HPAGE_PMD_MASK);
				507	VM_BUG_ON(pmd_trans_huge(*pmdp));
				508
				509	pmd = *pmdp;
				510	pmd_clear(pmdp);
				511	/*
				512	* Wait for all pending hash_page to finish. This is needed
				513	* in case of subpage collapse. When we collapse normal pages
				514	* to hugepage, we first clear the pmd, then invalidate all
				515	* the PTE entries. The assumption here is that any low level
				516	* page fault will see a none pmd and take the slow path that
				517	* will wait on mmap_sem. But we could very well be in a
				518	* hash_page with local ptep pointer value. Such a hash page
				519	* can result in adding new HPTE entries for normal subpages.
				520	* That means we could be modifying the page content as we
				521	* copy them to a huge page. So wait for parallel hash_page
				522	* to finish before invalidating HPTE entries. We can do this
				523	* by sending an IPI to all the cpus and executing a dummy
				524	* function there.
				525	*/
				526	kick_all_cpus_sync();
				527	/*
				528	* Now invalidate the hpte entries in the range
				529	* covered by pmd. This make sure we take a
				530	* fault and will find the pmd as none, which will
				531	* result in a major fault which takes mmap_sem and
				532	* hence wait for collapse to complete. Without this
				533	* the __collapse_huge_page_copy can result in copying
				534	* the old content.
				535	*/
				536	flush_tlb_pmd_range(vma->vm_mm, &pmd, address);
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	537	return pmd;
				538	}
				539
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	540	/*
				541	* We currently remove entries from the hashtable regardless of whether
Aneesh Kumar K.V	ff844b7	2016-04-29 23:25:39 +1000	[diff] [blame]	542	* the entry was young or dirty.
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	543	*
				544	* We should be more intelligent about this but for the moment we override
				545	* these functions and force a tlb flush unconditionally
				546	*/
Aneesh Kumar K.V	ff844b7	2016-04-29 23:25:39 +1000	[diff] [blame]	547	int pmdp_test_and_clear_young(struct vm_area_struct *vma,
				548	unsigned long address, pmd_t *pmdp)
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	549	{
				550	return __pmdp_test_and_clear_young(vma->vm_mm, address, pmdp);
				551	}
				552
				553	/*
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	554	* We want to put the pgtable in pmd and use pgtable for tracking
				555	* the base page size hptes
				556	*/
				557	void pgtable_trans_huge_deposit(struct mm_struct mm, pmd_t pmdp,
				558	pgtable_t pgtable)
				559	{
				560	pgtable_t *pgtable_slot;
				561	assert_spin_locked(&mm->page_table_lock);
				562	/*
				563	* we store the pgtable in the second half of PMD
				564	*/
				565	pgtable_slot = (pgtable_t *)pmdp + PTRS_PER_PMD;
				566	*pgtable_slot = pgtable;
				567	/*
				568	* expose the deposited pgtable to other cpus.
				569	* before we set the hugepage PTE at pmd level
				570	* hash fault code looks at the deposted pgtable
				571	* to store hash index values.
				572	*/
				573	smp_wmb();
				574	}
				575
				576	pgtable_t pgtable_trans_huge_withdraw(struct mm_struct mm, pmd_t pmdp)
				577	{
				578	pgtable_t pgtable;
				579	pgtable_t *pgtable_slot;
				580
				581	assert_spin_locked(&mm->page_table_lock);
				582	pgtable_slot = (pgtable_t *)pmdp + PTRS_PER_PMD;
				583	pgtable = *pgtable_slot;
				584	/*
				585	* Once we withdraw, mark the entry NULL.
				586	*/
				587	*pgtable_slot = NULL;
				588	/*
				589	* We store HPTE information in the deposited PTE fragment.
				590	* zero out the content on withdraw.
				591	*/
				592	memset(pgtable, 0, PTE_FRAG_SIZE);
				593	return pgtable;
				594	}
				595
Aneesh Kumar K.V	c777e2a	2016-02-09 06:50:31 +0530	[diff] [blame]	596	void pmdp_huge_split_prepare(struct vm_area_struct *vma,
				597	unsigned long address, pmd_t *pmdp)
				598	{
				599	VM_BUG_ON(address & ~HPAGE_PMD_MASK);
				600	VM_BUG_ON(REGION_ID(address) != USER_REGION_ID);
				601
				602	/*
				603	* We can't mark the pmd none here, because that will cause a race
				604	* against exit_mmap. We need to continue mark pmd TRANS HUGE, while
				605	* we spilt, but at the same time we wan't rest of the ppc64 code
				606	* not to insert hash pte on this, because we will be modifying
				607	* the deposited pgtable in the caller of this function. Hence
				608	* clear the _PAGE_USER so that we move the fault handling to
				609	* higher level function and that will serialize against ptl.
				610	* We need to flush existing hash pte entries here even though,
				611	* the translation is still valid, because we will withdraw
				612	* pgtable_t after this.
				613	*/
Aneesh Kumar K.V	ac29c64	2016-04-29 23:25:34 +1000	[diff] [blame]	614	pmd_hugepage_update(vma->vm_mm, address, pmdp, 0, _PAGE_PRIVILEGED);
Aneesh Kumar K.V	c777e2a	2016-02-09 06:50:31 +0530	[diff] [blame]	615	}
				616
				617
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	618	/*
				619	* set a new huge pmd. We should not be called for updating
				620	* an existing pmd entry. That should go via pmd_hugepage_update.
				621	*/
				622	void set_pmd_at(struct mm_struct *mm, unsigned long addr,
				623	pmd_t *pmdp, pmd_t pmd)
				624	{
				625	#ifdef CONFIG_DEBUG_VM
Aneesh Kumar K.V	c7d5484	2016-04-29 23:25:30 +1000	[diff] [blame]	626	WARN_ON(pte_present(pmd_pte(pmdp)) && !pte_protnone(pmd_pte(pmdp)));
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	627	assert_spin_locked(&mm->page_table_lock);
				628	WARN_ON(!pmd_trans_huge(pmd));
				629	#endif
Michael Ellerman	4f9c53c	2015-03-25 20:11:57 +1100	[diff] [blame]	630	trace_hugepage_set_pmd(addr, pmd_val(pmd));
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	631	return set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd));
				632	}
				633
Aneesh Kumar K.V	c777e2a	2016-02-09 06:50:31 +0530	[diff] [blame]	634	/*
				635	* We use this to invalidate a pmdp entry before switching from a
				636	* hugepte to regular pmd entry.
				637	*/
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	638	void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
				639	pmd_t *pmdp)
				640	{
Aneesh Kumar K.V	88247e8	2014-02-12 09:13:36 +0530	[diff] [blame]	641	pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_PRESENT, 0);
Aneesh Kumar K.V	c777e2a	2016-02-09 06:50:31 +0530	[diff] [blame]	642
				643	/*
				644	* This ensures that generic code that rely on IRQ disabling
				645	* to prevent a parallel THP split work as expected.
				646	*/
				647	kick_all_cpus_sync();
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	648	}
				649
				650	/*
				651	* A linux hugepage PMD was changed and the corresponding hash table entries
				652	* neesd to be flushed.
				653	*/
				654	void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr,
Aneesh Kumar K.V	fc04795	2014-08-13 12:32:00 +0530	[diff] [blame]	655	pmd_t *pmdp, unsigned long old_pmd)
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	656	{
Aneesh Kumar K.V	aefa568	2014-12-04 11:00:14 +0530	[diff] [blame]	657	int ssize;
Aneesh Kumar K.V	f1581bf	2014-11-02 21:15:27 +0530	[diff] [blame]	658	unsigned int psize;
				659	unsigned long vsid;
Aneesh Kumar K.V	aefa568	2014-12-04 11:00:14 +0530	[diff] [blame]	660	unsigned long flags = 0;
Aneesh Kumar K.V	d557b09	2014-11-02 21:15:28 +0530	[diff] [blame]	661	const struct cpumask *tmp;
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	662
Aneesh Kumar K.V	fa1f8ae	2014-08-13 12:31:58 +0530	[diff] [blame]	663	/* get the base page size,vsid and segment size */
Aneesh Kumar K.V	fc04795	2014-08-13 12:32:00 +0530	[diff] [blame]	664	#ifdef CONFIG_DEBUG_VM
Aneesh Kumar K.V	f1581bf	2014-11-02 21:15:27 +0530	[diff] [blame]	665	psize = get_slice_psize(mm, addr);
Aneesh Kumar K.V	fc04795	2014-08-13 12:32:00 +0530	[diff] [blame]	666	BUG_ON(psize == MMU_PAGE_16M);
				667	#endif
Aneesh Kumar K.V	945537d	2016-04-29 23:25:45 +1000	[diff] [blame]	668	if (old_pmd & H_PAGE_COMBO)
Aneesh Kumar K.V	fc04795	2014-08-13 12:32:00 +0530	[diff] [blame]	669	psize = MMU_PAGE_4K;
				670	else
				671	psize = MMU_PAGE_64K;
				672
Aneesh Kumar K.V	f1581bf	2014-11-02 21:15:27 +0530	[diff] [blame]	673	if (!is_kernel_addr(addr)) {
				674	ssize = user_segment_size(addr);
				675	vsid = get_vsid(mm->context.id, addr, ssize);
Aneesh Kumar K.V	fa1f8ae	2014-08-13 12:31:58 +0530	[diff] [blame]	676	WARN_ON(vsid == 0);
				677	} else {
Aneesh Kumar K.V	f1581bf	2014-11-02 21:15:27 +0530	[diff] [blame]	678	vsid = get_kernel_vsid(addr, mmu_kernel_ssize);
Aneesh Kumar K.V	fa1f8ae	2014-08-13 12:31:58 +0530	[diff] [blame]	679	ssize = mmu_kernel_ssize;
				680	}
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	681
Aneesh Kumar K.V	d557b09	2014-11-02 21:15:28 +0530	[diff] [blame]	682	tmp = cpumask_of(smp_processor_id());
				683	if (cpumask_equal(mm_cpumask(mm), tmp))
Aneesh Kumar K.V	aefa568	2014-12-04 11:00:14 +0530	[diff] [blame]	684	flags \|= HPTE_LOCAL_UPDATE;
Aneesh Kumar K.V	d557b09	2014-11-02 21:15:28 +0530	[diff] [blame]	685
Aneesh Kumar K.V	aefa568	2014-12-04 11:00:14 +0530	[diff] [blame]	686	return flush_hash_hugepage(vsid, addr, pmdp, psize, ssize, flags);
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	687	}
				688
				689	static pmd_t pmd_set_protbits(pmd_t pmd, pgprot_t pgprot)
				690	{
Aneesh Kumar K.V	f281b5d	2015-12-01 09:06:35 +0530	[diff] [blame]	691	return __pmd(pmd_val(pmd) \| pgprot_val(pgprot));
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	692	}
				693
				694	pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot)
				695	{
Aneesh Kumar K.V	f281b5d	2015-12-01 09:06:35 +0530	[diff] [blame]	696	unsigned long pmdv;
Aneesh Kumar K.V	6a119ea	2015-12-01 09:06:54 +0530	[diff] [blame]	697
Aneesh Kumar K.V	96270b1	2016-04-29 23:25:35 +1000	[diff] [blame]	698	pmdv = (pfn << PAGE_SHIFT) & PTE_RPN_MASK;
Aneesh Kumar K.V	f281b5d	2015-12-01 09:06:35 +0530	[diff] [blame]	699	return pmd_set_protbits(__pmd(pmdv), pgprot);
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	700	}
				701
				702	pmd_t mk_pmd(struct page *page, pgprot_t pgprot)
				703	{
				704	return pfn_pmd(page_to_pfn(page), pgprot);
				705	}
				706
				707	pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
				708	{
Aneesh Kumar K.V	f281b5d	2015-12-01 09:06:35 +0530	[diff] [blame]	709	unsigned long pmdv;
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	710
Aneesh Kumar K.V	f281b5d	2015-12-01 09:06:35 +0530	[diff] [blame]	711	pmdv = pmd_val(pmd);
				712	pmdv &= _HPAGE_CHG_MASK;
				713	return pmd_set_protbits(__pmd(pmdv), newprot);
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	714	}
				715
				716	/*
				717	* This is called at the end of handling a user page fault, when the
				718	* fault has been handled by updating a HUGE PMD entry in the linux page tables.
				719	* We use it to preload an HPTE into the hash table corresponding to
				720	* the updated linux HUGE PMD entry.
				721	*/
				722	void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
				723	pmd_t *pmd)
				724	{
				725	return;
				726	}
				727
Aneesh Kumar K.V	8809aa2	2015-06-24 16:57:44 -0700	[diff] [blame]	728	pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
				729	unsigned long addr, pmd_t *pmdp)
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	730	{
				731	pmd_t old_pmd;
				732	pgtable_t pgtable;
				733	unsigned long old;
				734	pgtable_t *pgtable_slot;
				735
Aneesh Kumar K.V	88247e8	2014-02-12 09:13:36 +0530	[diff] [blame]	736	old = pmd_hugepage_update(mm, addr, pmdp, ~0UL, 0);
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	737	old_pmd = __pmd(old);
				738	/*
				739	* We have pmd == none and we are holding page_table_lock.
				740	* So we can safely go and clear the pgtable hash
				741	* index info.
				742	*/
				743	pgtable_slot = (pgtable_t *)pmdp + PTRS_PER_PMD;
				744	pgtable = *pgtable_slot;
				745	/*
				746	* Let's zero out old valid and hash index details
				747	* hash fault look at them.
				748	*/
				749	memset(pgtable, 0, PTE_FRAG_SIZE);
Aneesh Kumar K.V	13bd817	2015-05-11 11:56:01 +0530	[diff] [blame]	750	/*
				751	* Serialize against find_linux_pte_or_hugepte which does lock-less
				752	* lookup in page tables with local interrupts disabled. For huge pages
				753	* it casts pmd_t to pte_t. Since format of pte_t is different from
				754	* pmd_t we want to prevent transit from pmd pointing to page table
				755	* to pmd pointing to huge page (and back) while interrupts are disabled.
				756	* We clear pmd to possibly replace it with page table pointer in
				757	* different code paths. So make sure we wait for the parallel
				758	* find_linux_pte_or_hugepage to finish.
				759	*/
				760	kick_all_cpus_sync();
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	761	return old_pmd;
				762	}
Aneesh Kumar K.V	437d496	2013-06-20 14:30:26 +0530	[diff] [blame]	763
				764	int has_transparent_hugepage(void)
				765	{
Kirill A. Shutemov	ff20c2e	2016-03-01 09:45:14 +0530	[diff] [blame]	766
Aneesh Kumar K.V	437d496	2013-06-20 14:30:26 +0530	[diff] [blame]	767	if (!mmu_has_feature(MMU_FTR_16M_PAGE))
				768	return 0;
				769	/*
				770	* We support THP only if PMD_SIZE is 16MB.
				771	*/
				772	if (mmu_psize_defs[MMU_PAGE_16M].shift != PMD_SHIFT)
				773	return 0;
				774	/*
				775	* We need to make sure that we support 16MB hugepage in a segement
				776	* with base page size 64K or 4K. We only enable THP with a PAGE_SIZE
				777	* of 64K.
				778	*/
				779	/*
				780	* If we have 64K HPTE, we will be using that by default
				781	*/
				782	if (mmu_psize_defs[MMU_PAGE_64K].shift &&
				783	(mmu_psize_defs[MMU_PAGE_64K].penc[MMU_PAGE_16M] == -1))
				784	return 0;
				785	/*
				786	* Ok we only have 4K HPTE
				787	*/
				788	if (mmu_psize_defs[MMU_PAGE_4K].penc[MMU_PAGE_16M] == -1)
				789	return 0;
				790
				791	return 1;
				792	}
Aneesh Kumar K.V	074c2ea	2013-06-20 14:30:15 +0530	[diff] [blame]	793	#endif /* CONFIG_TRANSPARENT_HUGEPAGE */