blob: e19d853883beaad9f79daaf1199b55d2d48f157a [file] [log] [blame]
Gerald Schaefer53492b12008-04-30 13:38:46 +02001/*
2 * IBM System z Huge TLB Page Support for Kernel.
3 *
Gerald Schaeferd08de8e2016-07-04 14:47:01 +02004 * Copyright IBM Corp. 2007,2016
Gerald Schaefer53492b12008-04-30 13:38:46 +02005 * Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com>
6 */
7
Gerald Schaeferd08de8e2016-07-04 14:47:01 +02008#define KMSG_COMPONENT "hugetlb"
9#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
10
Gerald Schaefer53492b12008-04-30 13:38:46 +020011#include <linux/mm.h>
12#include <linux/hugetlb.h>
13
Gerald Schaeferd08de8e2016-07-04 14:47:01 +020014static inline unsigned long __pte_to_rste(pte_t pte)
Martin Schwidefskye5098612013-07-23 20:57:57 +020015{
Gerald Schaeferd08de8e2016-07-04 14:47:01 +020016 unsigned long rste;
Martin Schwidefskye5098612013-07-23 20:57:57 +020017
18 /*
Gerald Schaeferd08de8e2016-07-04 14:47:01 +020019 * Convert encoding pte bits pmd / pud bits
Martin Schwidefskya1c843b2015-04-22 13:55:59 +020020 * lIR.uswrdy.p dy..R...I...wr
21 * empty 010.000000.0 -> 00..0...1...00
22 * prot-none, clean, old 111.000000.1 -> 00..1...1...00
23 * prot-none, clean, young 111.000001.1 -> 01..1...1...00
24 * prot-none, dirty, old 111.000010.1 -> 10..1...1...00
25 * prot-none, dirty, young 111.000011.1 -> 11..1...1...00
26 * read-only, clean, old 111.000100.1 -> 00..1...1...01
27 * read-only, clean, young 101.000101.1 -> 01..1...0...01
28 * read-only, dirty, old 111.000110.1 -> 10..1...1...01
29 * read-only, dirty, young 101.000111.1 -> 11..1...0...01
30 * read-write, clean, old 111.001100.1 -> 00..1...1...11
31 * read-write, clean, young 101.001101.1 -> 01..1...0...11
32 * read-write, dirty, old 110.001110.1 -> 10..0...1...11
33 * read-write, dirty, young 100.001111.1 -> 11..0...0...11
34 * HW-bits: R read-only, I invalid
35 * SW-bits: p present, y young, d dirty, r read, w write, s special,
36 * u unused, l large
Martin Schwidefskye5098612013-07-23 20:57:57 +020037 */
38 if (pte_present(pte)) {
Gerald Schaeferd08de8e2016-07-04 14:47:01 +020039 rste = pte_val(pte) & PAGE_MASK;
40 rste |= (pte_val(pte) & _PAGE_READ) >> 4;
41 rste |= (pte_val(pte) & _PAGE_WRITE) >> 4;
42 rste |= (pte_val(pte) & _PAGE_INVALID) >> 5;
43 rste |= (pte_val(pte) & _PAGE_PROTECT);
44 rste |= (pte_val(pte) & _PAGE_DIRTY) << 10;
45 rste |= (pte_val(pte) & _PAGE_YOUNG) << 10;
46 rste |= (pte_val(pte) & _PAGE_SOFT_DIRTY) << 13;
Martin Schwidefskye5098612013-07-23 20:57:57 +020047 } else
Gerald Schaeferd08de8e2016-07-04 14:47:01 +020048 rste = _SEGMENT_ENTRY_INVALID;
49 return rste;
Martin Schwidefskye5098612013-07-23 20:57:57 +020050}
51
Gerald Schaeferd08de8e2016-07-04 14:47:01 +020052static inline pte_t __rste_to_pte(unsigned long rste)
Martin Schwidefskye5098612013-07-23 20:57:57 +020053{
Gerald Schaeferd08de8e2016-07-04 14:47:01 +020054 int present;
Martin Schwidefskye5098612013-07-23 20:57:57 +020055 pte_t pte;
56
Gerald Schaeferd08de8e2016-07-04 14:47:01 +020057 if ((rste & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3)
58 present = pud_present(__pud(rste));
59 else
60 present = pmd_present(__pmd(rste));
61
Martin Schwidefskye5098612013-07-23 20:57:57 +020062 /*
Gerald Schaeferd08de8e2016-07-04 14:47:01 +020063 * Convert encoding pmd / pud bits pte bits
Martin Schwidefskya1c843b2015-04-22 13:55:59 +020064 * dy..R...I...wr lIR.uswrdy.p
65 * empty 00..0...1...00 -> 010.000000.0
66 * prot-none, clean, old 00..1...1...00 -> 111.000000.1
67 * prot-none, clean, young 01..1...1...00 -> 111.000001.1
68 * prot-none, dirty, old 10..1...1...00 -> 111.000010.1
69 * prot-none, dirty, young 11..1...1...00 -> 111.000011.1
70 * read-only, clean, old 00..1...1...01 -> 111.000100.1
71 * read-only, clean, young 01..1...0...01 -> 101.000101.1
72 * read-only, dirty, old 10..1...1...01 -> 111.000110.1
73 * read-only, dirty, young 11..1...0...01 -> 101.000111.1
74 * read-write, clean, old 00..1...1...11 -> 111.001100.1
75 * read-write, clean, young 01..1...0...11 -> 101.001101.1
76 * read-write, dirty, old 10..0...1...11 -> 110.001110.1
77 * read-write, dirty, young 11..0...0...11 -> 100.001111.1
78 * HW-bits: R read-only, I invalid
79 * SW-bits: p present, y young, d dirty, r read, w write, s special,
80 * u unused, l large
Martin Schwidefskye5098612013-07-23 20:57:57 +020081 */
Gerald Schaeferd08de8e2016-07-04 14:47:01 +020082 if (present) {
83 pte_val(pte) = rste & _SEGMENT_ENTRY_ORIGIN_LARGE;
Martin Schwidefsky152125b2014-07-24 11:03:41 +020084 pte_val(pte) |= _PAGE_LARGE | _PAGE_PRESENT;
Gerald Schaeferd08de8e2016-07-04 14:47:01 +020085 pte_val(pte) |= (rste & _SEGMENT_ENTRY_READ) << 4;
86 pte_val(pte) |= (rste & _SEGMENT_ENTRY_WRITE) << 4;
87 pte_val(pte) |= (rste & _SEGMENT_ENTRY_INVALID) << 5;
88 pte_val(pte) |= (rste & _SEGMENT_ENTRY_PROTECT);
89 pte_val(pte) |= (rste & _SEGMENT_ENTRY_DIRTY) >> 10;
90 pte_val(pte) |= (rste & _SEGMENT_ENTRY_YOUNG) >> 10;
91 pte_val(pte) |= (rste & _SEGMENT_ENTRY_SOFT_DIRTY) >> 13;
Martin Schwidefskye5098612013-07-23 20:57:57 +020092 } else
93 pte_val(pte) = _PAGE_INVALID;
94 return pte;
95}
Gerald Schaefer53492b12008-04-30 13:38:46 +020096
97void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
Martin Schwidefskye5098612013-07-23 20:57:57 +020098 pte_t *ptep, pte_t pte)
Gerald Schaefer53492b12008-04-30 13:38:46 +020099{
Gerald Schaeferd08de8e2016-07-04 14:47:01 +0200100 unsigned long rste = __pte_to_rste(pte);
Gerald Schaefer53492b12008-04-30 13:38:46 +0200101
Gerald Schaeferd08de8e2016-07-04 14:47:01 +0200102 /* Set correct table type for 2G hugepages */
103 if ((pte_val(*ptep) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3)
104 rste |= _REGION_ENTRY_TYPE_R3 | _REGION3_ENTRY_LARGE;
105 else
106 rste |= _SEGMENT_ENTRY_LARGE;
107 pte_val(*ptep) = rste;
Martin Schwidefskye5098612013-07-23 20:57:57 +0200108}
Gerald Schaefer53492b12008-04-30 13:38:46 +0200109
Martin Schwidefskye5098612013-07-23 20:57:57 +0200110pte_t huge_ptep_get(pte_t *ptep)
111{
Gerald Schaeferd08de8e2016-07-04 14:47:01 +0200112 return __rste_to_pte(pte_val(*ptep));
Martin Schwidefskye5098612013-07-23 20:57:57 +0200113}
114
115pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
116 unsigned long addr, pte_t *ptep)
117{
Gerald Schaeferd08de8e2016-07-04 14:47:01 +0200118 pte_t pte = huge_ptep_get(ptep);
Martin Schwidefskye5098612013-07-23 20:57:57 +0200119 pmd_t *pmdp = (pmd_t *) ptep;
Gerald Schaeferd08de8e2016-07-04 14:47:01 +0200120 pud_t *pudp = (pud_t *) ptep;
Martin Schwidefskye5098612013-07-23 20:57:57 +0200121
Gerald Schaeferd08de8e2016-07-04 14:47:01 +0200122 if ((pte_val(*ptep) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3)
123 pudp_xchg_direct(mm, addr, pudp, __pud(_REGION3_ENTRY_EMPTY));
124 else
125 pmdp_xchg_direct(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_EMPTY));
126 return pte;
Gerald Schaefer53492b12008-04-30 13:38:46 +0200127}
128
Andi Kleena5516432008-07-23 21:27:41 -0700129pte_t *huge_pte_alloc(struct mm_struct *mm,
130 unsigned long addr, unsigned long sz)
Gerald Schaefer53492b12008-04-30 13:38:46 +0200131{
132 pgd_t *pgdp;
133 pud_t *pudp;
134 pmd_t *pmdp = NULL;
135
136 pgdp = pgd_offset(mm, addr);
137 pudp = pud_alloc(mm, pgdp, addr);
Gerald Schaeferd08de8e2016-07-04 14:47:01 +0200138 if (pudp) {
139 if (sz == PUD_SIZE)
140 return (pte_t *) pudp;
141 else if (sz == PMD_SIZE)
142 pmdp = pmd_alloc(mm, pudp, addr);
143 }
Gerald Schaefer53492b12008-04-30 13:38:46 +0200144 return (pte_t *) pmdp;
145}
146
147pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
148{
149 pgd_t *pgdp;
150 pud_t *pudp;
151 pmd_t *pmdp = NULL;
152
153 pgdp = pgd_offset(mm, addr);
154 if (pgd_present(*pgdp)) {
155 pudp = pud_offset(pgdp, addr);
Gerald Schaeferd08de8e2016-07-04 14:47:01 +0200156 if (pud_present(*pudp)) {
157 if (pud_large(*pudp))
158 return (pte_t *) pudp;
Gerald Schaefer53492b12008-04-30 13:38:46 +0200159 pmdp = pmd_offset(pudp, addr);
Gerald Schaeferd08de8e2016-07-04 14:47:01 +0200160 }
Gerald Schaefer53492b12008-04-30 13:38:46 +0200161 }
162 return (pte_t *) pmdp;
163}
164
Gerald Schaefer53492b12008-04-30 13:38:46 +0200165int pmd_huge(pmd_t pmd)
166{
Dominik Dingelcbd7d9c2015-06-25 14:59:49 -0700167 return pmd_large(pmd);
Gerald Schaefer53492b12008-04-30 13:38:46 +0200168}
169
Andi Kleenceb86872008-07-23 21:27:50 -0700170int pud_huge(pud_t pud)
171{
Gerald Schaeferd08de8e2016-07-04 14:47:01 +0200172 return pud_large(pud);
Andi Kleenceb86872008-07-23 21:27:50 -0700173}
Gerald Schaeferd08de8e2016-07-04 14:47:01 +0200174
175struct page *
176follow_huge_pud(struct mm_struct *mm, unsigned long address,
177 pud_t *pud, int flags)
178{
179 if (flags & FOLL_GET)
180 return NULL;
181
182 return pud_page(*pud) + ((address & ~PUD_MASK) >> PAGE_SHIFT);
183}
184
185static __init int setup_hugepagesz(char *opt)
186{
187 unsigned long size;
188 char *string = opt;
189
190 size = memparse(opt, &opt);
191 if (MACHINE_HAS_EDAT1 && size == PMD_SIZE) {
192 hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT);
193 } else if (MACHINE_HAS_EDAT2 && size == PUD_SIZE) {
194 hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
195 } else {
196 pr_err("hugepagesz= specifies an unsupported page size %s\n",
197 string);
198 return 0;
199 }
200 return 1;
201}
202__setup("hugepagesz=", setup_hugepagesz);