blob: 16ca8617f2e1853339421feec30a8858eaec7c0b [file] [log] [blame]
Martin Schwidefsky3610cce2007-10-22 12:52:47 +02001/*
Heiko Carstensa53c8fa2012-07-20 11:15:04 +02002 * Copyright IBM Corp. 2007, 2011
Martin Schwidefsky3610cce2007-10-22 12:52:47 +02003 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
4 */
5
6#include <linux/sched.h>
7#include <linux/kernel.h>
8#include <linux/errno.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +09009#include <linux/gfp.h>
Martin Schwidefsky3610cce2007-10-22 12:52:47 +020010#include <linux/mm.h>
11#include <linux/swap.h>
12#include <linux/smp.h>
13#include <linux/highmem.h>
Martin Schwidefsky3610cce2007-10-22 12:52:47 +020014#include <linux/pagemap.h>
15#include <linux/spinlock.h>
16#include <linux/module.h>
17#include <linux/quicklist.h>
Martin Schwidefsky80217142010-10-25 16:10:11 +020018#include <linux/rcupdate.h>
Martin Schwidefskye5992f22011-07-24 10:48:20 +020019#include <linux/slab.h>
Konstantin Weitzb31288f2013-04-17 17:36:29 +020020#include <linux/swapops.h>
Martin Schwidefsky3610cce2007-10-22 12:52:47 +020021
Martin Schwidefsky3610cce2007-10-22 12:52:47 +020022#include <asm/pgtable.h>
23#include <asm/pgalloc.h>
24#include <asm/tlb.h>
25#include <asm/tlbflush.h>
Martin Schwidefsky6252d702008-02-09 18:24:37 +010026#include <asm/mmu_context.h>
Martin Schwidefsky3610cce2007-10-22 12:52:47 +020027
28#ifndef CONFIG_64BIT
29#define ALLOC_ORDER 1
Martin Schwidefsky36409f62011-06-06 14:14:41 +020030#define FRAG_MASK 0x0f
Martin Schwidefsky3610cce2007-10-22 12:52:47 +020031#else
32#define ALLOC_ORDER 2
Martin Schwidefsky36409f62011-06-06 14:14:41 +020033#define FRAG_MASK 0x03
Martin Schwidefsky3610cce2007-10-22 12:52:47 +020034#endif
35
Heiko Carstens239a64252009-06-12 10:26:33 +020036
Martin Schwidefsky043d0702011-05-23 10:24:23 +020037unsigned long *crst_table_alloc(struct mm_struct *mm)
Martin Schwidefsky3610cce2007-10-22 12:52:47 +020038{
39 struct page *page = alloc_pages(GFP_KERNEL, ALLOC_ORDER);
40
41 if (!page)
42 return NULL;
Martin Schwidefsky3610cce2007-10-22 12:52:47 +020043 return (unsigned long *) page_to_phys(page);
44}
45
Martin Schwidefsky146e4b32008-02-09 18:24:35 +010046void crst_table_free(struct mm_struct *mm, unsigned long *table)
Martin Schwidefsky3610cce2007-10-22 12:52:47 +020047{
Martin Schwidefsky043d0702011-05-23 10:24:23 +020048 free_pages((unsigned long) table, ALLOC_ORDER);
Martin Schwidefsky80217142010-10-25 16:10:11 +020049}
50
Martin Schwidefsky6252d702008-02-09 18:24:37 +010051#ifdef CONFIG_64BIT
Martin Schwidefsky10607862013-10-28 14:48:30 +010052static void __crst_table_upgrade(void *arg)
53{
54 struct mm_struct *mm = arg;
55
Martin Schwidefskybeef5602014-04-14 15:11:26 +020056 if (current->active_mm == mm) {
57 clear_user_asce();
58 set_user_asce(mm);
59 }
Martin Schwidefsky10607862013-10-28 14:48:30 +010060 __tlb_flush_local();
61}
62
Martin Schwidefsky6252d702008-02-09 18:24:37 +010063int crst_table_upgrade(struct mm_struct *mm, unsigned long limit)
64{
65 unsigned long *table, *pgd;
66 unsigned long entry;
Martin Schwidefsky10607862013-10-28 14:48:30 +010067 int flush;
Martin Schwidefsky6252d702008-02-09 18:24:37 +010068
69 BUG_ON(limit > (1UL << 53));
Martin Schwidefsky10607862013-10-28 14:48:30 +010070 flush = 0;
Martin Schwidefsky6252d702008-02-09 18:24:37 +010071repeat:
Martin Schwidefsky043d0702011-05-23 10:24:23 +020072 table = crst_table_alloc(mm);
Martin Schwidefsky6252d702008-02-09 18:24:37 +010073 if (!table)
74 return -ENOMEM;
Martin Schwidefsky80217142010-10-25 16:10:11 +020075 spin_lock_bh(&mm->page_table_lock);
Martin Schwidefsky6252d702008-02-09 18:24:37 +010076 if (mm->context.asce_limit < limit) {
77 pgd = (unsigned long *) mm->pgd;
78 if (mm->context.asce_limit <= (1UL << 31)) {
79 entry = _REGION3_ENTRY_EMPTY;
80 mm->context.asce_limit = 1UL << 42;
81 mm->context.asce_bits = _ASCE_TABLE_LENGTH |
82 _ASCE_USER_BITS |
83 _ASCE_TYPE_REGION3;
84 } else {
85 entry = _REGION2_ENTRY_EMPTY;
86 mm->context.asce_limit = 1UL << 53;
87 mm->context.asce_bits = _ASCE_TABLE_LENGTH |
88 _ASCE_USER_BITS |
89 _ASCE_TYPE_REGION2;
90 }
91 crst_table_init(table, entry);
92 pgd_populate(mm, (pgd_t *) table, (pud_t *) pgd);
93 mm->pgd = (pgd_t *) table;
Martin Schwidefskyf481bfa2009-03-18 13:27:36 +010094 mm->task_size = mm->context.asce_limit;
Martin Schwidefsky6252d702008-02-09 18:24:37 +010095 table = NULL;
Martin Schwidefsky10607862013-10-28 14:48:30 +010096 flush = 1;
Martin Schwidefsky6252d702008-02-09 18:24:37 +010097 }
Martin Schwidefsky80217142010-10-25 16:10:11 +020098 spin_unlock_bh(&mm->page_table_lock);
Martin Schwidefsky6252d702008-02-09 18:24:37 +010099 if (table)
100 crst_table_free(mm, table);
101 if (mm->context.asce_limit < limit)
102 goto repeat;
Martin Schwidefsky10607862013-10-28 14:48:30 +0100103 if (flush)
104 on_each_cpu(__crst_table_upgrade, mm, 0);
Martin Schwidefsky6252d702008-02-09 18:24:37 +0100105 return 0;
106}
107
108void crst_table_downgrade(struct mm_struct *mm, unsigned long limit)
109{
110 pgd_t *pgd;
111
Martin Schwidefsky02a8f3a2014-04-03 13:54:59 +0200112 if (current->active_mm == mm) {
Martin Schwidefskybeef5602014-04-14 15:11:26 +0200113 clear_user_asce();
Martin Schwidefsky10607862013-10-28 14:48:30 +0100114 __tlb_flush_mm(mm);
Martin Schwidefsky02a8f3a2014-04-03 13:54:59 +0200115 }
Martin Schwidefsky6252d702008-02-09 18:24:37 +0100116 while (mm->context.asce_limit > limit) {
117 pgd = mm->pgd;
118 switch (pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) {
119 case _REGION_ENTRY_TYPE_R2:
120 mm->context.asce_limit = 1UL << 42;
121 mm->context.asce_bits = _ASCE_TABLE_LENGTH |
122 _ASCE_USER_BITS |
123 _ASCE_TYPE_REGION3;
124 break;
125 case _REGION_ENTRY_TYPE_R3:
126 mm->context.asce_limit = 1UL << 31;
127 mm->context.asce_bits = _ASCE_TABLE_LENGTH |
128 _ASCE_USER_BITS |
129 _ASCE_TYPE_SEGMENT;
130 break;
131 default:
132 BUG();
133 }
134 mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN);
Martin Schwidefskyf481bfa2009-03-18 13:27:36 +0100135 mm->task_size = mm->context.asce_limit;
Martin Schwidefsky6252d702008-02-09 18:24:37 +0100136 crst_table_free(mm, (unsigned long *) pgd);
137 }
Martin Schwidefsky10607862013-10-28 14:48:30 +0100138 if (current->active_mm == mm)
Martin Schwidefskybeef5602014-04-14 15:11:26 +0200139 set_user_asce(mm);
Martin Schwidefsky6252d702008-02-09 18:24:37 +0100140}
141#endif
142
Martin Schwidefskye5992f22011-07-24 10:48:20 +0200143#ifdef CONFIG_PGSTE
144
145/**
146 * gmap_alloc - allocate a guest address space
147 * @mm: pointer to the parent mm_struct
148 *
149 * Returns a guest address space structure.
150 */
151struct gmap *gmap_alloc(struct mm_struct *mm)
152{
153 struct gmap *gmap;
154 struct page *page;
155 unsigned long *table;
156
157 gmap = kzalloc(sizeof(struct gmap), GFP_KERNEL);
158 if (!gmap)
159 goto out;
160 INIT_LIST_HEAD(&gmap->crst_list);
161 gmap->mm = mm;
162 page = alloc_pages(GFP_KERNEL, ALLOC_ORDER);
163 if (!page)
164 goto out_free;
165 list_add(&page->lru, &gmap->crst_list);
166 table = (unsigned long *) page_to_phys(page);
167 crst_table_init(table, _REGION1_ENTRY_EMPTY);
168 gmap->table = table;
Christian Borntraeger480e5922011-09-20 17:07:28 +0200169 gmap->asce = _ASCE_TYPE_REGION1 | _ASCE_TABLE_LENGTH |
170 _ASCE_USER_BITS | __pa(table);
Martin Schwidefskye5992f22011-07-24 10:48:20 +0200171 list_add(&gmap->list, &mm->context.gmap_list);
172 return gmap;
173
174out_free:
175 kfree(gmap);
176out:
177 return NULL;
178}
179EXPORT_SYMBOL_GPL(gmap_alloc);
180
181static int gmap_unlink_segment(struct gmap *gmap, unsigned long *table)
182{
183 struct gmap_pgtable *mp;
184 struct gmap_rmap *rmap;
185 struct page *page;
186
Martin Schwidefskye5098612013-07-23 20:57:57 +0200187 if (*table & _SEGMENT_ENTRY_INVALID)
Martin Schwidefskye5992f22011-07-24 10:48:20 +0200188 return 0;
189 page = pfn_to_page(*table >> PAGE_SHIFT);
190 mp = (struct gmap_pgtable *) page->index;
191 list_for_each_entry(rmap, &mp->mapper, list) {
192 if (rmap->entry != table)
193 continue;
194 list_del(&rmap->list);
195 kfree(rmap);
196 break;
197 }
Martin Schwidefskye5098612013-07-23 20:57:57 +0200198 *table = mp->vmaddr | _SEGMENT_ENTRY_INVALID | _SEGMENT_ENTRY_PROTECT;
Martin Schwidefskye5992f22011-07-24 10:48:20 +0200199 return 1;
200}
201
202static void gmap_flush_tlb(struct gmap *gmap)
203{
204 if (MACHINE_HAS_IDTE)
Martin Schwidefsky1b948d62014-04-03 13:55:01 +0200205 __tlb_flush_asce(gmap->mm, (unsigned long) gmap->table |
Martin Schwidefskye5992f22011-07-24 10:48:20 +0200206 _ASCE_TYPE_REGION1);
207 else
208 __tlb_flush_global();
209}
210
211/**
212 * gmap_free - free a guest address space
213 * @gmap: pointer to the guest address space structure
214 */
215void gmap_free(struct gmap *gmap)
216{
217 struct page *page, *next;
218 unsigned long *table;
219 int i;
220
221
222 /* Flush tlb. */
223 if (MACHINE_HAS_IDTE)
Martin Schwidefsky1b948d62014-04-03 13:55:01 +0200224 __tlb_flush_asce(gmap->mm, (unsigned long) gmap->table |
Martin Schwidefskye5992f22011-07-24 10:48:20 +0200225 _ASCE_TYPE_REGION1);
226 else
227 __tlb_flush_global();
228
229 /* Free all segment & region tables. */
230 down_read(&gmap->mm->mmap_sem);
Carsten Ottecc772452011-10-30 15:17:01 +0100231 spin_lock(&gmap->mm->page_table_lock);
Martin Schwidefskye5992f22011-07-24 10:48:20 +0200232 list_for_each_entry_safe(page, next, &gmap->crst_list, lru) {
233 table = (unsigned long *) page_to_phys(page);
234 if ((*table & _REGION_ENTRY_TYPE_MASK) == 0)
235 /* Remove gmap rmap structures for segment table. */
236 for (i = 0; i < PTRS_PER_PMD; i++, table++)
237 gmap_unlink_segment(gmap, table);
238 __free_pages(page, ALLOC_ORDER);
239 }
Carsten Ottecc772452011-10-30 15:17:01 +0100240 spin_unlock(&gmap->mm->page_table_lock);
Martin Schwidefskye5992f22011-07-24 10:48:20 +0200241 up_read(&gmap->mm->mmap_sem);
242 list_del(&gmap->list);
243 kfree(gmap);
244}
245EXPORT_SYMBOL_GPL(gmap_free);
246
247/**
248 * gmap_enable - switch primary space to the guest address space
249 * @gmap: pointer to the guest address space structure
250 */
251void gmap_enable(struct gmap *gmap)
252{
Martin Schwidefskye5992f22011-07-24 10:48:20 +0200253 S390_lowcore.gmap = (unsigned long) gmap;
254}
255EXPORT_SYMBOL_GPL(gmap_enable);
256
257/**
258 * gmap_disable - switch back to the standard primary address space
259 * @gmap: pointer to the guest address space structure
260 */
261void gmap_disable(struct gmap *gmap)
262{
Martin Schwidefskye5992f22011-07-24 10:48:20 +0200263 S390_lowcore.gmap = 0UL;
264}
265EXPORT_SYMBOL_GPL(gmap_disable);
266
Carsten Ottea9162f22011-10-30 15:17:00 +0100267/*
268 * gmap_alloc_table is assumed to be called with mmap_sem held
269 */
Martin Schwidefskye5992f22011-07-24 10:48:20 +0200270static int gmap_alloc_table(struct gmap *gmap,
Heiko Carstens984e2a52013-09-06 18:48:58 +0200271 unsigned long *table, unsigned long init)
272 __releases(&gmap->mm->page_table_lock)
273 __acquires(&gmap->mm->page_table_lock)
Martin Schwidefskye5992f22011-07-24 10:48:20 +0200274{
275 struct page *page;
276 unsigned long *new;
277
Christian Borntraegerc86cce22011-12-27 11:25:47 +0100278 /* since we dont free the gmap table until gmap_free we can unlock */
279 spin_unlock(&gmap->mm->page_table_lock);
Martin Schwidefskye5992f22011-07-24 10:48:20 +0200280 page = alloc_pages(GFP_KERNEL, ALLOC_ORDER);
Christian Borntraegerc86cce22011-12-27 11:25:47 +0100281 spin_lock(&gmap->mm->page_table_lock);
Martin Schwidefskye5992f22011-07-24 10:48:20 +0200282 if (!page)
283 return -ENOMEM;
284 new = (unsigned long *) page_to_phys(page);
285 crst_table_init(new, init);
Martin Schwidefskye5098612013-07-23 20:57:57 +0200286 if (*table & _REGION_ENTRY_INVALID) {
Martin Schwidefskye5992f22011-07-24 10:48:20 +0200287 list_add(&page->lru, &gmap->crst_list);
288 *table = (unsigned long) new | _REGION_ENTRY_LENGTH |
289 (*table & _REGION_ENTRY_TYPE_MASK);
290 } else
291 __free_pages(page, ALLOC_ORDER);
Martin Schwidefskye5992f22011-07-24 10:48:20 +0200292 return 0;
293}
294
295/**
296 * gmap_unmap_segment - unmap segment from the guest address space
297 * @gmap: pointer to the guest address space structure
Martin Schwidefsky6e0a0432014-04-29 09:34:41 +0200298 * @to: address in the guest address space
Martin Schwidefskye5992f22011-07-24 10:48:20 +0200299 * @len: length of the memory area to unmap
300 *
Hendrik Bruecknerb4a96012013-12-13 12:53:42 +0100301 * Returns 0 if the unmap succeeded, -EINVAL if not.
Martin Schwidefskye5992f22011-07-24 10:48:20 +0200302 */
303int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len)
304{
305 unsigned long *table;
306 unsigned long off;
307 int flush;
308
309 if ((to | len) & (PMD_SIZE - 1))
310 return -EINVAL;
311 if (len == 0 || to + len < to)
312 return -EINVAL;
313
314 flush = 0;
315 down_read(&gmap->mm->mmap_sem);
Carsten Ottecc772452011-10-30 15:17:01 +0100316 spin_lock(&gmap->mm->page_table_lock);
Martin Schwidefskye5992f22011-07-24 10:48:20 +0200317 for (off = 0; off < len; off += PMD_SIZE) {
318 /* Walk the guest addr space page table */
319 table = gmap->table + (((to + off) >> 53) & 0x7ff);
Martin Schwidefskye5098612013-07-23 20:57:57 +0200320 if (*table & _REGION_ENTRY_INVALID)
Carsten Otte05873df2011-09-26 16:40:34 +0200321 goto out;
Martin Schwidefskye5992f22011-07-24 10:48:20 +0200322 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
323 table = table + (((to + off) >> 42) & 0x7ff);
Martin Schwidefskye5098612013-07-23 20:57:57 +0200324 if (*table & _REGION_ENTRY_INVALID)
Carsten Otte05873df2011-09-26 16:40:34 +0200325 goto out;
Martin Schwidefskye5992f22011-07-24 10:48:20 +0200326 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
327 table = table + (((to + off) >> 31) & 0x7ff);
Martin Schwidefskye5098612013-07-23 20:57:57 +0200328 if (*table & _REGION_ENTRY_INVALID)
Carsten Otte05873df2011-09-26 16:40:34 +0200329 goto out;
Martin Schwidefskye5992f22011-07-24 10:48:20 +0200330 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
331 table = table + (((to + off) >> 20) & 0x7ff);
332
333 /* Clear segment table entry in guest address space. */
334 flush |= gmap_unlink_segment(gmap, table);
Martin Schwidefskye5098612013-07-23 20:57:57 +0200335 *table = _SEGMENT_ENTRY_INVALID;
Martin Schwidefskye5992f22011-07-24 10:48:20 +0200336 }
Carsten Otte05873df2011-09-26 16:40:34 +0200337out:
Carsten Ottecc772452011-10-30 15:17:01 +0100338 spin_unlock(&gmap->mm->page_table_lock);
Martin Schwidefskye5992f22011-07-24 10:48:20 +0200339 up_read(&gmap->mm->mmap_sem);
340 if (flush)
341 gmap_flush_tlb(gmap);
342 return 0;
343}
344EXPORT_SYMBOL_GPL(gmap_unmap_segment);
345
346/**
347 * gmap_mmap_segment - map a segment to the guest address space
348 * @gmap: pointer to the guest address space structure
349 * @from: source address in the parent address space
350 * @to: target address in the guest address space
Martin Schwidefsky6e0a0432014-04-29 09:34:41 +0200351 * @len: length of the memory area to map
Martin Schwidefskye5992f22011-07-24 10:48:20 +0200352 *
Hendrik Bruecknerb4a96012013-12-13 12:53:42 +0100353 * Returns 0 if the mmap succeeded, -EINVAL or -ENOMEM if not.
Martin Schwidefskye5992f22011-07-24 10:48:20 +0200354 */
355int gmap_map_segment(struct gmap *gmap, unsigned long from,
356 unsigned long to, unsigned long len)
357{
358 unsigned long *table;
359 unsigned long off;
360 int flush;
361
362 if ((from | to | len) & (PMD_SIZE - 1))
363 return -EINVAL;
Martin Schwidefskyee6ee552013-07-26 15:04:03 +0200364 if (len == 0 || from + len > TASK_MAX_SIZE ||
Martin Schwidefskye5992f22011-07-24 10:48:20 +0200365 from + len < from || to + len < to)
366 return -EINVAL;
367
368 flush = 0;
369 down_read(&gmap->mm->mmap_sem);
Carsten Ottecc772452011-10-30 15:17:01 +0100370 spin_lock(&gmap->mm->page_table_lock);
Martin Schwidefskye5992f22011-07-24 10:48:20 +0200371 for (off = 0; off < len; off += PMD_SIZE) {
372 /* Walk the gmap address space page table */
373 table = gmap->table + (((to + off) >> 53) & 0x7ff);
Martin Schwidefskye5098612013-07-23 20:57:57 +0200374 if ((*table & _REGION_ENTRY_INVALID) &&
Martin Schwidefskye5992f22011-07-24 10:48:20 +0200375 gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY))
376 goto out_unmap;
377 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
378 table = table + (((to + off) >> 42) & 0x7ff);
Martin Schwidefskye5098612013-07-23 20:57:57 +0200379 if ((*table & _REGION_ENTRY_INVALID) &&
Martin Schwidefskye5992f22011-07-24 10:48:20 +0200380 gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY))
381 goto out_unmap;
382 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
383 table = table + (((to + off) >> 31) & 0x7ff);
Martin Schwidefskye5098612013-07-23 20:57:57 +0200384 if ((*table & _REGION_ENTRY_INVALID) &&
Martin Schwidefskye5992f22011-07-24 10:48:20 +0200385 gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY))
386 goto out_unmap;
387 table = (unsigned long *) (*table & _REGION_ENTRY_ORIGIN);
388 table = table + (((to + off) >> 20) & 0x7ff);
389
390 /* Store 'from' address in an invalid segment table entry. */
391 flush |= gmap_unlink_segment(gmap, table);
Martin Schwidefskye5098612013-07-23 20:57:57 +0200392 *table = (from + off) | (_SEGMENT_ENTRY_INVALID |
393 _SEGMENT_ENTRY_PROTECT);
Martin Schwidefskye5992f22011-07-24 10:48:20 +0200394 }
Carsten Ottecc772452011-10-30 15:17:01 +0100395 spin_unlock(&gmap->mm->page_table_lock);
Martin Schwidefskye5992f22011-07-24 10:48:20 +0200396 up_read(&gmap->mm->mmap_sem);
397 if (flush)
398 gmap_flush_tlb(gmap);
399 return 0;
400
401out_unmap:
Carsten Ottecc772452011-10-30 15:17:01 +0100402 spin_unlock(&gmap->mm->page_table_lock);
Martin Schwidefskye5992f22011-07-24 10:48:20 +0200403 up_read(&gmap->mm->mmap_sem);
404 gmap_unmap_segment(gmap, to, len);
405 return -ENOMEM;
406}
407EXPORT_SYMBOL_GPL(gmap_map_segment);
408
Martin Schwidefsky6e0a0432014-04-29 09:34:41 +0200409static unsigned long *gmap_table_walk(struct gmap *gmap, unsigned long gaddr)
Heiko Carstensc5034942012-09-10 16:14:33 +0200410{
411 unsigned long *table;
412
Martin Schwidefsky6e0a0432014-04-29 09:34:41 +0200413 table = gmap->table + ((gaddr >> 53) & 0x7ff);
Martin Schwidefskye5098612013-07-23 20:57:57 +0200414 if (unlikely(*table & _REGION_ENTRY_INVALID))
Heiko Carstensc5034942012-09-10 16:14:33 +0200415 return ERR_PTR(-EFAULT);
416 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
Martin Schwidefsky6e0a0432014-04-29 09:34:41 +0200417 table = table + ((gaddr >> 42) & 0x7ff);
Martin Schwidefskye5098612013-07-23 20:57:57 +0200418 if (unlikely(*table & _REGION_ENTRY_INVALID))
Heiko Carstensc5034942012-09-10 16:14:33 +0200419 return ERR_PTR(-EFAULT);
420 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
Martin Schwidefsky6e0a0432014-04-29 09:34:41 +0200421 table = table + ((gaddr >> 31) & 0x7ff);
Martin Schwidefskye5098612013-07-23 20:57:57 +0200422 if (unlikely(*table & _REGION_ENTRY_INVALID))
Heiko Carstensc5034942012-09-10 16:14:33 +0200423 return ERR_PTR(-EFAULT);
424 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
Martin Schwidefsky6e0a0432014-04-29 09:34:41 +0200425 table = table + ((gaddr >> 20) & 0x7ff);
Heiko Carstensc5034942012-09-10 16:14:33 +0200426 return table;
427}
428
429/**
430 * __gmap_translate - translate a guest address to a user space address
Heiko Carstensc5034942012-09-10 16:14:33 +0200431 * @gmap: pointer to guest mapping meta data structure
Martin Schwidefsky6e0a0432014-04-29 09:34:41 +0200432 * @gaddr: guest address
Heiko Carstensc5034942012-09-10 16:14:33 +0200433 *
434 * Returns user space address which corresponds to the guest address or
435 * -EFAULT if no such mapping exists.
436 * This function does not establish potentially missing page table entries.
437 * The mmap_sem of the mm that belongs to the address space must be held
438 * when this function gets called.
439 */
Martin Schwidefsky6e0a0432014-04-29 09:34:41 +0200440unsigned long __gmap_translate(struct gmap *gmap, unsigned long gaddr)
Heiko Carstensc5034942012-09-10 16:14:33 +0200441{
442 unsigned long *segment_ptr, vmaddr, segment;
443 struct gmap_pgtable *mp;
444 struct page *page;
445
Martin Schwidefsky6e0a0432014-04-29 09:34:41 +0200446 current->thread.gmap_addr = gaddr;
447 segment_ptr = gmap_table_walk(gmap, gaddr);
Heiko Carstensc5034942012-09-10 16:14:33 +0200448 if (IS_ERR(segment_ptr))
449 return PTR_ERR(segment_ptr);
450 /* Convert the gmap address to an mm address. */
451 segment = *segment_ptr;
Martin Schwidefskye5098612013-07-23 20:57:57 +0200452 if (!(segment & _SEGMENT_ENTRY_INVALID)) {
Heiko Carstensc5034942012-09-10 16:14:33 +0200453 page = pfn_to_page(segment >> PAGE_SHIFT);
454 mp = (struct gmap_pgtable *) page->index;
Martin Schwidefsky6e0a0432014-04-29 09:34:41 +0200455 return mp->vmaddr | (gaddr & ~PMD_MASK);
Martin Schwidefskye5098612013-07-23 20:57:57 +0200456 } else if (segment & _SEGMENT_ENTRY_PROTECT) {
Heiko Carstensc5034942012-09-10 16:14:33 +0200457 vmaddr = segment & _SEGMENT_ENTRY_ORIGIN;
Martin Schwidefsky6e0a0432014-04-29 09:34:41 +0200458 return vmaddr | (gaddr & ~PMD_MASK);
Heiko Carstensc5034942012-09-10 16:14:33 +0200459 }
460 return -EFAULT;
461}
462EXPORT_SYMBOL_GPL(__gmap_translate);
463
464/**
465 * gmap_translate - translate a guest address to a user space address
Heiko Carstensc5034942012-09-10 16:14:33 +0200466 * @gmap: pointer to guest mapping meta data structure
Martin Schwidefsky6e0a0432014-04-29 09:34:41 +0200467 * @gaddr: guest address
Heiko Carstensc5034942012-09-10 16:14:33 +0200468 *
469 * Returns user space address which corresponds to the guest address or
470 * -EFAULT if no such mapping exists.
471 * This function does not establish potentially missing page table entries.
472 */
Martin Schwidefsky6e0a0432014-04-29 09:34:41 +0200473unsigned long gmap_translate(struct gmap *gmap, unsigned long gaddr)
Heiko Carstensc5034942012-09-10 16:14:33 +0200474{
475 unsigned long rc;
476
477 down_read(&gmap->mm->mmap_sem);
Martin Schwidefsky6e0a0432014-04-29 09:34:41 +0200478 rc = __gmap_translate(gmap, gaddr);
Heiko Carstensc5034942012-09-10 16:14:33 +0200479 up_read(&gmap->mm->mmap_sem);
480 return rc;
481}
482EXPORT_SYMBOL_GPL(gmap_translate);
483
Martin Schwidefsky6e0a0432014-04-29 09:34:41 +0200484static int gmap_connect_pgtable(struct gmap *gmap, unsigned long gaddr,
485 unsigned long segment,
486 unsigned long *segment_ptr)
Martin Schwidefskye5992f22011-07-24 10:48:20 +0200487{
Martin Schwidefskyab8e5232013-04-16 13:37:46 +0200488 unsigned long vmaddr;
Heiko Carstensc5034942012-09-10 16:14:33 +0200489 struct vm_area_struct *vma;
Martin Schwidefskye5992f22011-07-24 10:48:20 +0200490 struct gmap_pgtable *mp;
491 struct gmap_rmap *rmap;
Heiko Carstensc5034942012-09-10 16:14:33 +0200492 struct mm_struct *mm;
Martin Schwidefskye5992f22011-07-24 10:48:20 +0200493 struct page *page;
494 pgd_t *pgd;
495 pud_t *pud;
496 pmd_t *pmd;
497
Martin Schwidefskyab8e5232013-04-16 13:37:46 +0200498 mm = gmap->mm;
499 vmaddr = segment & _SEGMENT_ENTRY_ORIGIN;
500 vma = find_vma(mm, vmaddr);
501 if (!vma || vma->vm_start > vmaddr)
502 return -EFAULT;
503 /* Walk the parent mm page table */
504 pgd = pgd_offset(mm, vmaddr);
505 pud = pud_alloc(mm, pgd, vmaddr);
506 if (!pud)
507 return -ENOMEM;
508 pmd = pmd_alloc(mm, pud, vmaddr);
509 if (!pmd)
510 return -ENOMEM;
511 if (!pmd_present(*pmd) &&
512 __pte_alloc(mm, vma, pmd, vmaddr))
513 return -ENOMEM;
Alex Thorlton1e1836e2014-04-07 15:37:09 -0700514 /* large pmds cannot yet be handled */
515 if (pmd_large(*pmd))
516 return -EFAULT;
Martin Schwidefskyab8e5232013-04-16 13:37:46 +0200517 /* pmd now points to a valid segment table entry. */
518 rmap = kmalloc(sizeof(*rmap), GFP_KERNEL|__GFP_REPEAT);
519 if (!rmap)
520 return -ENOMEM;
521 /* Link gmap segment table entry location to page table. */
522 page = pmd_page(*pmd);
523 mp = (struct gmap_pgtable *) page->index;
Martin Schwidefskyd3383632013-04-17 10:53:39 +0200524 rmap->gmap = gmap;
Martin Schwidefskyab8e5232013-04-16 13:37:46 +0200525 rmap->entry = segment_ptr;
Martin Schwidefsky6e0a0432014-04-29 09:34:41 +0200526 rmap->vmaddr = gaddr & PMD_MASK;
Martin Schwidefskyab8e5232013-04-16 13:37:46 +0200527 spin_lock(&mm->page_table_lock);
528 if (*segment_ptr == segment) {
529 list_add(&rmap->list, &mp->mapper);
530 /* Set gmap segment table entry to page table. */
531 *segment_ptr = pmd_val(*pmd) & PAGE_MASK;
532 rmap = NULL;
533 }
534 spin_unlock(&mm->page_table_lock);
535 kfree(rmap);
536 return 0;
537}
538
539static void gmap_disconnect_pgtable(struct mm_struct *mm, unsigned long *table)
540{
541 struct gmap_rmap *rmap, *next;
542 struct gmap_pgtable *mp;
543 struct page *page;
544 int flush;
545
546 flush = 0;
547 spin_lock(&mm->page_table_lock);
548 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
549 mp = (struct gmap_pgtable *) page->index;
550 list_for_each_entry_safe(rmap, next, &mp->mapper, list) {
Martin Schwidefskye5098612013-07-23 20:57:57 +0200551 *rmap->entry = mp->vmaddr | (_SEGMENT_ENTRY_INVALID |
552 _SEGMENT_ENTRY_PROTECT);
Martin Schwidefskyab8e5232013-04-16 13:37:46 +0200553 list_del(&rmap->list);
554 kfree(rmap);
555 flush = 1;
556 }
557 spin_unlock(&mm->page_table_lock);
558 if (flush)
559 __tlb_flush_global();
560}
561
562/*
563 * this function is assumed to be called with mmap_sem held
564 */
Martin Schwidefsky6e0a0432014-04-29 09:34:41 +0200565unsigned long __gmap_fault(struct gmap *gmap, unsigned long gaddr)
Martin Schwidefskyab8e5232013-04-16 13:37:46 +0200566{
567 unsigned long *segment_ptr, segment;
568 struct gmap_pgtable *mp;
569 struct page *page;
570 int rc;
571
Martin Schwidefsky6e0a0432014-04-29 09:34:41 +0200572 current->thread.gmap_addr = gaddr;
573 segment_ptr = gmap_table_walk(gmap, gaddr);
Heiko Carstensc5034942012-09-10 16:14:33 +0200574 if (IS_ERR(segment_ptr))
Martin Schwidefskye5992f22011-07-24 10:48:20 +0200575 return -EFAULT;
Martin Schwidefskye5992f22011-07-24 10:48:20 +0200576 /* Convert the gmap address to an mm address. */
Martin Schwidefskyab8e5232013-04-16 13:37:46 +0200577 while (1) {
578 segment = *segment_ptr;
Martin Schwidefskye5098612013-07-23 20:57:57 +0200579 if (!(segment & _SEGMENT_ENTRY_INVALID)) {
Martin Schwidefskyab8e5232013-04-16 13:37:46 +0200580 /* Page table is present */
581 page = pfn_to_page(segment >> PAGE_SHIFT);
582 mp = (struct gmap_pgtable *) page->index;
Martin Schwidefsky6e0a0432014-04-29 09:34:41 +0200583 return mp->vmaddr | (gaddr & ~PMD_MASK);
Martin Schwidefskyab8e5232013-04-16 13:37:46 +0200584 }
Martin Schwidefskye5098612013-07-23 20:57:57 +0200585 if (!(segment & _SEGMENT_ENTRY_PROTECT))
Martin Schwidefskyab8e5232013-04-16 13:37:46 +0200586 /* Nothing mapped in the gmap address space. */
587 break;
Martin Schwidefsky6e0a0432014-04-29 09:34:41 +0200588 rc = gmap_connect_pgtable(gmap, gaddr, segment, segment_ptr);
Martin Schwidefskyab8e5232013-04-16 13:37:46 +0200589 if (rc)
590 return rc;
Martin Schwidefskye5992f22011-07-24 10:48:20 +0200591 }
592 return -EFAULT;
Carsten Otte499069e2011-10-30 15:17:02 +0100593}
Martin Schwidefskye5992f22011-07-24 10:48:20 +0200594
Martin Schwidefsky6e0a0432014-04-29 09:34:41 +0200595unsigned long gmap_fault(struct gmap *gmap, unsigned long gaddr)
Carsten Otte499069e2011-10-30 15:17:02 +0100596{
597 unsigned long rc;
598
599 down_read(&gmap->mm->mmap_sem);
Martin Schwidefsky6e0a0432014-04-29 09:34:41 +0200600 rc = __gmap_fault(gmap, gaddr);
Carsten Otte499069e2011-10-30 15:17:02 +0100601 up_read(&gmap->mm->mmap_sem);
602
603 return rc;
Martin Schwidefskye5992f22011-07-24 10:48:20 +0200604}
605EXPORT_SYMBOL_GPL(gmap_fault);
606
Konstantin Weitzb31288f2013-04-17 17:36:29 +0200607static void gmap_zap_swap_entry(swp_entry_t entry, struct mm_struct *mm)
608{
609 if (!non_swap_entry(entry))
610 dec_mm_counter(mm, MM_SWAPENTS);
611 else if (is_migration_entry(entry)) {
612 struct page *page = migration_entry_to_page(entry);
613
614 if (PageAnon(page))
615 dec_mm_counter(mm, MM_ANONPAGES);
616 else
617 dec_mm_counter(mm, MM_FILEPAGES);
618 }
619 free_swap_and_cache(entry);
620}
621
622/**
623 * The mm->mmap_sem lock must be held
624 */
Martin Schwidefsky6e0a0432014-04-29 09:34:41 +0200625static void gmap_zap_unused(struct mm_struct *mm, unsigned long vmaddr)
Konstantin Weitzb31288f2013-04-17 17:36:29 +0200626{
627 unsigned long ptev, pgstev;
628 spinlock_t *ptl;
629 pgste_t pgste;
630 pte_t *ptep, pte;
631
Martin Schwidefsky6e0a0432014-04-29 09:34:41 +0200632 ptep = get_locked_pte(mm, vmaddr, &ptl);
Konstantin Weitzb31288f2013-04-17 17:36:29 +0200633 if (unlikely(!ptep))
634 return;
635 pte = *ptep;
636 if (!pte_swap(pte))
637 goto out_pte;
638 /* Zap unused and logically-zero pages */
639 pgste = pgste_get_lock(ptep);
640 pgstev = pgste_val(pgste);
641 ptev = pte_val(pte);
642 if (((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED) ||
643 ((pgstev & _PGSTE_GPS_ZERO) && (ptev & _PAGE_INVALID))) {
644 gmap_zap_swap_entry(pte_to_swp_entry(pte), mm);
Martin Schwidefsky6e0a0432014-04-29 09:34:41 +0200645 pte_clear(mm, vmaddr, ptep);
Konstantin Weitzb31288f2013-04-17 17:36:29 +0200646 }
647 pgste_set_unlock(ptep, pgste);
648out_pte:
649 pte_unmap_unlock(*ptep, ptl);
650}
651
652/*
653 * this function is assumed to be called with mmap_sem held
654 */
Martin Schwidefsky6e0a0432014-04-29 09:34:41 +0200655void __gmap_zap(struct gmap *gmap, unsigned long gaddr)
Konstantin Weitzb31288f2013-04-17 17:36:29 +0200656{
657 unsigned long *table, *segment_ptr;
Martin Schwidefsky6e0a0432014-04-29 09:34:41 +0200658 unsigned long segment, vmaddr, pgstev, ptev;
Konstantin Weitzb31288f2013-04-17 17:36:29 +0200659 struct gmap_pgtable *mp;
660 struct page *page;
661
Martin Schwidefsky6e0a0432014-04-29 09:34:41 +0200662 segment_ptr = gmap_table_walk(gmap, gaddr);
Konstantin Weitzb31288f2013-04-17 17:36:29 +0200663 if (IS_ERR(segment_ptr))
664 return;
665 segment = *segment_ptr;
666 if (segment & _SEGMENT_ENTRY_INVALID)
667 return;
668 page = pfn_to_page(segment >> PAGE_SHIFT);
669 mp = (struct gmap_pgtable *) page->index;
Martin Schwidefsky6e0a0432014-04-29 09:34:41 +0200670 vmaddr = mp->vmaddr | (gaddr & ~PMD_MASK);
Konstantin Weitzb31288f2013-04-17 17:36:29 +0200671 /* Page table is present */
672 table = (unsigned long *)(segment & _SEGMENT_ENTRY_ORIGIN);
Martin Schwidefsky6e0a0432014-04-29 09:34:41 +0200673 table = table + ((vmaddr >> 12) & 0xff);
Konstantin Weitzb31288f2013-04-17 17:36:29 +0200674 pgstev = table[PTRS_PER_PTE];
675 ptev = table[0];
676 /* quick check, checked again with locks held */
677 if (((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED) ||
678 ((pgstev & _PGSTE_GPS_ZERO) && (ptev & _PAGE_INVALID)))
Martin Schwidefsky6e0a0432014-04-29 09:34:41 +0200679 gmap_zap_unused(gmap->mm, vmaddr);
Konstantin Weitzb31288f2013-04-17 17:36:29 +0200680}
681EXPORT_SYMBOL_GPL(__gmap_zap);
682
Martin Schwidefsky6e0a0432014-04-29 09:34:41 +0200683void gmap_discard(struct gmap *gmap, unsigned long from, unsigned long to)
Christian Borntraeger388186b2011-10-30 15:17:03 +0100684{
685
Martin Schwidefsky6e0a0432014-04-29 09:34:41 +0200686 unsigned long *table, gaddr, size;
Christian Borntraeger388186b2011-10-30 15:17:03 +0100687 struct vm_area_struct *vma;
688 struct gmap_pgtable *mp;
689 struct page *page;
690
691 down_read(&gmap->mm->mmap_sem);
Martin Schwidefsky6e0a0432014-04-29 09:34:41 +0200692 gaddr = from;
693 while (gaddr < to) {
Christian Borntraeger388186b2011-10-30 15:17:03 +0100694 /* Walk the gmap address space page table */
Martin Schwidefsky6e0a0432014-04-29 09:34:41 +0200695 table = gmap->table + ((gaddr >> 53) & 0x7ff);
Martin Schwidefskye5098612013-07-23 20:57:57 +0200696 if (unlikely(*table & _REGION_ENTRY_INVALID)) {
Martin Schwidefsky6e0a0432014-04-29 09:34:41 +0200697 gaddr = (gaddr + PMD_SIZE) & PMD_MASK;
Christian Borntraeger388186b2011-10-30 15:17:03 +0100698 continue;
699 }
700 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
Martin Schwidefsky6e0a0432014-04-29 09:34:41 +0200701 table = table + ((gaddr >> 42) & 0x7ff);
Martin Schwidefskye5098612013-07-23 20:57:57 +0200702 if (unlikely(*table & _REGION_ENTRY_INVALID)) {
Martin Schwidefsky6e0a0432014-04-29 09:34:41 +0200703 gaddr = (gaddr + PMD_SIZE) & PMD_MASK;
Christian Borntraeger388186b2011-10-30 15:17:03 +0100704 continue;
705 }
706 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
Martin Schwidefsky6e0a0432014-04-29 09:34:41 +0200707 table = table + ((gaddr >> 31) & 0x7ff);
Martin Schwidefskye5098612013-07-23 20:57:57 +0200708 if (unlikely(*table & _REGION_ENTRY_INVALID)) {
Martin Schwidefsky6e0a0432014-04-29 09:34:41 +0200709 gaddr = (gaddr + PMD_SIZE) & PMD_MASK;
Christian Borntraeger388186b2011-10-30 15:17:03 +0100710 continue;
711 }
712 table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
Martin Schwidefsky6e0a0432014-04-29 09:34:41 +0200713 table = table + ((gaddr >> 20) & 0x7ff);
Martin Schwidefskye5098612013-07-23 20:57:57 +0200714 if (unlikely(*table & _SEGMENT_ENTRY_INVALID)) {
Martin Schwidefsky6e0a0432014-04-29 09:34:41 +0200715 gaddr = (gaddr + PMD_SIZE) & PMD_MASK;
Christian Borntraeger388186b2011-10-30 15:17:03 +0100716 continue;
717 }
718 page = pfn_to_page(*table >> PAGE_SHIFT);
719 mp = (struct gmap_pgtable *) page->index;
720 vma = find_vma(gmap->mm, mp->vmaddr);
Martin Schwidefsky6e0a0432014-04-29 09:34:41 +0200721 size = min(to - gaddr, PMD_SIZE - (gaddr & ~PMD_MASK));
722 zap_page_range(vma, mp->vmaddr | (gaddr & ~PMD_MASK),
Christian Borntraeger388186b2011-10-30 15:17:03 +0100723 size, NULL);
Martin Schwidefsky6e0a0432014-04-29 09:34:41 +0200724 gaddr = (gaddr + PMD_SIZE) & PMD_MASK;
Christian Borntraeger388186b2011-10-30 15:17:03 +0100725 }
726 up_read(&gmap->mm->mmap_sem);
727}
728EXPORT_SYMBOL_GPL(gmap_discard);
729
Martin Schwidefskyd3383632013-04-17 10:53:39 +0200730static LIST_HEAD(gmap_notifier_list);
731static DEFINE_SPINLOCK(gmap_notifier_lock);
732
733/**
734 * gmap_register_ipte_notifier - register a pte invalidation callback
735 * @nb: pointer to the gmap notifier block
736 */
737void gmap_register_ipte_notifier(struct gmap_notifier *nb)
738{
739 spin_lock(&gmap_notifier_lock);
740 list_add(&nb->list, &gmap_notifier_list);
741 spin_unlock(&gmap_notifier_lock);
742}
743EXPORT_SYMBOL_GPL(gmap_register_ipte_notifier);
744
745/**
746 * gmap_unregister_ipte_notifier - remove a pte invalidation callback
747 * @nb: pointer to the gmap notifier block
748 */
749void gmap_unregister_ipte_notifier(struct gmap_notifier *nb)
750{
751 spin_lock(&gmap_notifier_lock);
752 list_del_init(&nb->list);
753 spin_unlock(&gmap_notifier_lock);
754}
755EXPORT_SYMBOL_GPL(gmap_unregister_ipte_notifier);
756
757/**
758 * gmap_ipte_notify - mark a range of ptes for invalidation notification
759 * @gmap: pointer to guest mapping meta data structure
Martin Schwidefsky6e0a0432014-04-29 09:34:41 +0200760 * @gaddr: virtual address in the guest address space
Martin Schwidefskyd3383632013-04-17 10:53:39 +0200761 * @len: size of area
762 *
763 * Returns 0 if for each page in the given range a gmap mapping exists and
764 * the invalidation notification could be set. If the gmap mapping is missing
765 * for one or more pages -EFAULT is returned. If no memory could be allocated
766 * -ENOMEM is returned. This function establishes missing page table entries.
767 */
Martin Schwidefsky6e0a0432014-04-29 09:34:41 +0200768int gmap_ipte_notify(struct gmap *gmap, unsigned long gaddr, unsigned long len)
Martin Schwidefskyd3383632013-04-17 10:53:39 +0200769{
770 unsigned long addr;
771 spinlock_t *ptl;
772 pte_t *ptep, entry;
773 pgste_t pgste;
774 int rc = 0;
775
Martin Schwidefsky6e0a0432014-04-29 09:34:41 +0200776 if ((gaddr & ~PAGE_MASK) || (len & ~PAGE_MASK))
Martin Schwidefskyd3383632013-04-17 10:53:39 +0200777 return -EINVAL;
778 down_read(&gmap->mm->mmap_sem);
779 while (len) {
780 /* Convert gmap address and connect the page tables */
Martin Schwidefsky6e0a0432014-04-29 09:34:41 +0200781 addr = __gmap_fault(gmap, gaddr);
Martin Schwidefskyd3383632013-04-17 10:53:39 +0200782 if (IS_ERR_VALUE(addr)) {
783 rc = addr;
784 break;
785 }
786 /* Get the page mapped */
Christian Borntraegerbb4b42c2013-05-08 15:25:38 +0200787 if (fixup_user_fault(current, gmap->mm, addr, FAULT_FLAG_WRITE)) {
Martin Schwidefskyd3383632013-04-17 10:53:39 +0200788 rc = -EFAULT;
789 break;
790 }
791 /* Walk the process page table, lock and get pte pointer */
792 ptep = get_locked_pte(gmap->mm, addr, &ptl);
793 if (unlikely(!ptep))
794 continue;
795 /* Set notification bit in the pgste of the pte */
796 entry = *ptep;
Martin Schwidefskye5098612013-07-23 20:57:57 +0200797 if ((pte_val(entry) & (_PAGE_INVALID | _PAGE_PROTECT)) == 0) {
Martin Schwidefskyd3383632013-04-17 10:53:39 +0200798 pgste = pgste_get_lock(ptep);
Martin Schwidefsky0d0dafc2013-05-17 14:41:33 +0200799 pgste_val(pgste) |= PGSTE_IN_BIT;
Martin Schwidefskyd3383632013-04-17 10:53:39 +0200800 pgste_set_unlock(ptep, pgste);
Martin Schwidefsky6e0a0432014-04-29 09:34:41 +0200801 gaddr += PAGE_SIZE;
Martin Schwidefskyd3383632013-04-17 10:53:39 +0200802 len -= PAGE_SIZE;
803 }
804 spin_unlock(ptl);
805 }
806 up_read(&gmap->mm->mmap_sem);
807 return rc;
808}
809EXPORT_SYMBOL_GPL(gmap_ipte_notify);
810
811/**
812 * gmap_do_ipte_notify - call all invalidation callbacks for a specific pte.
813 * @mm: pointer to the process mm_struct
Martin Schwidefsky9da4e382014-04-30 14:46:26 +0200814 * @addr: virtual address in the process address space
Martin Schwidefskyd3383632013-04-17 10:53:39 +0200815 * @pte: pointer to the page table entry
816 *
817 * This function is assumed to be called with the page table lock held
818 * for the pte to notify.
819 */
Martin Schwidefsky9da4e382014-04-30 14:46:26 +0200820void gmap_do_ipte_notify(struct mm_struct *mm, unsigned long addr, pte_t *pte)
Martin Schwidefskyd3383632013-04-17 10:53:39 +0200821{
822 unsigned long segment_offset;
823 struct gmap_notifier *nb;
824 struct gmap_pgtable *mp;
825 struct gmap_rmap *rmap;
826 struct page *page;
827
828 segment_offset = ((unsigned long) pte) & (255 * sizeof(pte_t));
829 segment_offset = segment_offset * (4096 / sizeof(pte_t));
830 page = pfn_to_page(__pa(pte) >> PAGE_SHIFT);
831 mp = (struct gmap_pgtable *) page->index;
832 spin_lock(&gmap_notifier_lock);
833 list_for_each_entry(rmap, &mp->mapper, list) {
834 list_for_each_entry(nb, &gmap_notifier_list, list)
835 nb->notifier_call(rmap->gmap,
836 rmap->vmaddr + segment_offset);
837 }
838 spin_unlock(&gmap_notifier_lock);
839}
Martin Schwidefsky0a61b222013-10-18 12:03:41 +0200840EXPORT_SYMBOL_GPL(gmap_do_ipte_notify);
Martin Schwidefskyd3383632013-04-17 10:53:39 +0200841
Martin Schwidefsky3eabaee2013-07-26 15:04:02 +0200842static inline int page_table_with_pgste(struct page *page)
843{
844 return atomic_read(&page->_mapcount) == 0;
845}
846
Martin Schwidefskye5992f22011-07-24 10:48:20 +0200847static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm,
848 unsigned long vmaddr)
849{
850 struct page *page;
851 unsigned long *table;
852 struct gmap_pgtable *mp;
853
854 page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
855 if (!page)
856 return NULL;
857 mp = kmalloc(sizeof(*mp), GFP_KERNEL|__GFP_REPEAT);
858 if (!mp) {
859 __free_page(page);
860 return NULL;
861 }
Kirill A. Shutemove89cfa52013-11-14 14:31:39 -0800862 if (!pgtable_page_ctor(page)) {
863 kfree(mp);
864 __free_page(page);
865 return NULL;
866 }
Martin Schwidefskye5992f22011-07-24 10:48:20 +0200867 mp->vmaddr = vmaddr & PMD_MASK;
868 INIT_LIST_HEAD(&mp->mapper);
869 page->index = (unsigned long) mp;
Martin Schwidefsky3eabaee2013-07-26 15:04:02 +0200870 atomic_set(&page->_mapcount, 0);
Martin Schwidefskye5992f22011-07-24 10:48:20 +0200871 table = (unsigned long *) page_to_phys(page);
Martin Schwidefskye5098612013-07-23 20:57:57 +0200872 clear_table(table, _PAGE_INVALID, PAGE_SIZE/2);
Martin Schwidefsky0a61b222013-10-18 12:03:41 +0200873 clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2);
Martin Schwidefskye5992f22011-07-24 10:48:20 +0200874 return table;
875}
876
877static inline void page_table_free_pgste(unsigned long *table)
878{
879 struct page *page;
880 struct gmap_pgtable *mp;
881
882 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
883 mp = (struct gmap_pgtable *) page->index;
884 BUG_ON(!list_empty(&mp->mapper));
Martin Schwidefsky2320c572012-02-17 10:29:21 +0100885 pgtable_page_dtor(page);
Martin Schwidefskye5992f22011-07-24 10:48:20 +0200886 atomic_set(&page->_mapcount, -1);
887 kfree(mp);
888 __free_page(page);
889}
890
Dominik Dingeld4cb1132014-01-29 16:02:32 +0100891static inline unsigned long page_table_reset_pte(struct mm_struct *mm, pmd_t *pmd,
892 unsigned long addr, unsigned long end, bool init_skey)
Martin Schwidefskydeedabb2013-05-21 17:29:52 +0200893{
894 pte_t *start_pte, *pte;
895 spinlock_t *ptl;
896 pgste_t pgste;
897
898 start_pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
899 pte = start_pte;
900 do {
901 pgste = pgste_get_lock(pte);
902 pgste_val(pgste) &= ~_PGSTE_GPS_USAGE_MASK;
Dominik Dingeld4cb1132014-01-29 16:02:32 +0100903 if (init_skey) {
904 unsigned long address;
905
906 pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT |
907 PGSTE_GR_BIT | PGSTE_GC_BIT);
908
909 /* skip invalid and not writable pages */
910 if (pte_val(*pte) & _PAGE_INVALID ||
911 !(pte_val(*pte) & _PAGE_WRITE)) {
912 pgste_set_unlock(pte, pgste);
913 continue;
914 }
915
916 address = pte_val(*pte) & PAGE_MASK;
917 page_set_storage_key(address, PAGE_DEFAULT_KEY, 1);
918 }
Martin Schwidefskydeedabb2013-05-21 17:29:52 +0200919 pgste_set_unlock(pte, pgste);
920 } while (pte++, addr += PAGE_SIZE, addr != end);
921 pte_unmap_unlock(start_pte, ptl);
922
923 return addr;
924}
925
Dominik Dingeld4cb1132014-01-29 16:02:32 +0100926static inline unsigned long page_table_reset_pmd(struct mm_struct *mm, pud_t *pud,
927 unsigned long addr, unsigned long end, bool init_skey)
Martin Schwidefskydeedabb2013-05-21 17:29:52 +0200928{
929 unsigned long next;
930 pmd_t *pmd;
931
932 pmd = pmd_offset(pud, addr);
933 do {
934 next = pmd_addr_end(addr, end);
935 if (pmd_none_or_clear_bad(pmd))
936 continue;
Dominik Dingeld4cb1132014-01-29 16:02:32 +0100937 next = page_table_reset_pte(mm, pmd, addr, next, init_skey);
Martin Schwidefskydeedabb2013-05-21 17:29:52 +0200938 } while (pmd++, addr = next, addr != end);
939
940 return addr;
941}
942
Dominik Dingeld4cb1132014-01-29 16:02:32 +0100943static inline unsigned long page_table_reset_pud(struct mm_struct *mm, pgd_t *pgd,
944 unsigned long addr, unsigned long end, bool init_skey)
Martin Schwidefskydeedabb2013-05-21 17:29:52 +0200945{
946 unsigned long next;
947 pud_t *pud;
948
949 pud = pud_offset(pgd, addr);
950 do {
951 next = pud_addr_end(addr, end);
952 if (pud_none_or_clear_bad(pud))
953 continue;
Dominik Dingeld4cb1132014-01-29 16:02:32 +0100954 next = page_table_reset_pmd(mm, pud, addr, next, init_skey);
Martin Schwidefskydeedabb2013-05-21 17:29:52 +0200955 } while (pud++, addr = next, addr != end);
956
957 return addr;
958}
959
Dominik Dingeld4cb1132014-01-29 16:02:32 +0100960void page_table_reset_pgste(struct mm_struct *mm, unsigned long start,
961 unsigned long end, bool init_skey)
Martin Schwidefskydeedabb2013-05-21 17:29:52 +0200962{
963 unsigned long addr, next;
964 pgd_t *pgd;
965
Martin Schwidefsky3a801512014-05-16 10:34:11 +0200966 down_write(&mm->mmap_sem);
967 if (init_skey && mm_use_skey(mm))
968 goto out_up;
Martin Schwidefskydeedabb2013-05-21 17:29:52 +0200969 addr = start;
Martin Schwidefskydeedabb2013-05-21 17:29:52 +0200970 pgd = pgd_offset(mm, addr);
971 do {
972 next = pgd_addr_end(addr, end);
973 if (pgd_none_or_clear_bad(pgd))
974 continue;
Dominik Dingeld4cb1132014-01-29 16:02:32 +0100975 next = page_table_reset_pud(mm, pgd, addr, next, init_skey);
Martin Schwidefskydeedabb2013-05-21 17:29:52 +0200976 } while (pgd++, addr = next, addr != end);
Martin Schwidefsky3a801512014-05-16 10:34:11 +0200977 if (init_skey)
978 current->mm->context.use_skey = 1;
979out_up:
980 up_write(&mm->mmap_sem);
Martin Schwidefskydeedabb2013-05-21 17:29:52 +0200981}
982EXPORT_SYMBOL(page_table_reset_pgste);
983
Christian Borntraeger24d5dd02013-05-27 10:42:04 +0200984int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
985 unsigned long key, bool nq)
986{
987 spinlock_t *ptl;
988 pgste_t old, new;
989 pte_t *ptep;
990
991 down_read(&mm->mmap_sem);
Christian Borntraegerab3f2852014-08-19 16:19:35 +0200992retry:
Christian Borntraeger24d5dd02013-05-27 10:42:04 +0200993 ptep = get_locked_pte(current->mm, addr, &ptl);
994 if (unlikely(!ptep)) {
995 up_read(&mm->mmap_sem);
996 return -EFAULT;
997 }
Christian Borntraegerab3f2852014-08-19 16:19:35 +0200998 if (!(pte_val(*ptep) & _PAGE_INVALID) &&
999 (pte_val(*ptep) & _PAGE_PROTECT)) {
1000 pte_unmap_unlock(*ptep, ptl);
1001 if (fixup_user_fault(current, mm, addr, FAULT_FLAG_WRITE)) {
1002 up_read(&mm->mmap_sem);
1003 return -EFAULT;
1004 }
1005 goto retry;
1006 }
Christian Borntraeger24d5dd02013-05-27 10:42:04 +02001007
1008 new = old = pgste_get_lock(ptep);
1009 pgste_val(new) &= ~(PGSTE_GR_BIT | PGSTE_GC_BIT |
1010 PGSTE_ACC_BITS | PGSTE_FP_BIT);
1011 pgste_val(new) |= (key & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48;
1012 pgste_val(new) |= (key & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
1013 if (!(pte_val(*ptep) & _PAGE_INVALID)) {
Martin Schwidefsky0944fe32013-07-23 22:11:42 +02001014 unsigned long address, bits, skey;
Christian Borntraeger24d5dd02013-05-27 10:42:04 +02001015
1016 address = pte_val(*ptep) & PAGE_MASK;
Martin Schwidefsky0944fe32013-07-23 22:11:42 +02001017 skey = (unsigned long) page_get_storage_key(address);
Christian Borntraeger24d5dd02013-05-27 10:42:04 +02001018 bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
Martin Schwidefsky0944fe32013-07-23 22:11:42 +02001019 skey = key & (_PAGE_ACC_BITS | _PAGE_FP_BIT);
Christian Borntraeger24d5dd02013-05-27 10:42:04 +02001020 /* Set storage key ACC and FP */
Martin Schwidefsky0944fe32013-07-23 22:11:42 +02001021 page_set_storage_key(address, skey, !nq);
Christian Borntraeger24d5dd02013-05-27 10:42:04 +02001022 /* Merge host changed & referenced into pgste */
1023 pgste_val(new) |= bits << 52;
Christian Borntraeger24d5dd02013-05-27 10:42:04 +02001024 }
1025 /* changing the guest storage key is considered a change of the page */
1026 if ((pgste_val(new) ^ pgste_val(old)) &
1027 (PGSTE_ACC_BITS | PGSTE_FP_BIT | PGSTE_GR_BIT | PGSTE_GC_BIT))
Martin Schwidefsky0a61b222013-10-18 12:03:41 +02001028 pgste_val(new) |= PGSTE_UC_BIT;
Christian Borntraeger24d5dd02013-05-27 10:42:04 +02001029
1030 pgste_set_unlock(ptep, new);
1031 pte_unmap_unlock(*ptep, ptl);
1032 up_read(&mm->mmap_sem);
1033 return 0;
1034}
1035EXPORT_SYMBOL(set_guest_storage_key);
1036
Martin Schwidefskye5992f22011-07-24 10:48:20 +02001037#else /* CONFIG_PGSTE */
1038
Martin Schwidefsky3eabaee2013-07-26 15:04:02 +02001039static inline int page_table_with_pgste(struct page *page)
1040{
1041 return 0;
1042}
1043
Martin Schwidefskye5992f22011-07-24 10:48:20 +02001044static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm,
1045 unsigned long vmaddr)
1046{
Jan Glauber944291d2011-08-03 16:44:18 +02001047 return NULL;
Martin Schwidefskye5992f22011-07-24 10:48:20 +02001048}
1049
Dominik Dingeld4cb1132014-01-29 16:02:32 +01001050void page_table_reset_pgste(struct mm_struct *mm, unsigned long start,
1051 unsigned long end, bool init_skey)
1052{
1053}
1054
Martin Schwidefskye5992f22011-07-24 10:48:20 +02001055static inline void page_table_free_pgste(unsigned long *table)
1056{
1057}
1058
Martin Schwidefskyab8e5232013-04-16 13:37:46 +02001059static inline void gmap_disconnect_pgtable(struct mm_struct *mm,
1060 unsigned long *table)
Martin Schwidefskye5992f22011-07-24 10:48:20 +02001061{
1062}
1063
1064#endif /* CONFIG_PGSTE */
1065
Martin Schwidefsky36409f62011-06-06 14:14:41 +02001066static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits)
1067{
1068 unsigned int old, new;
1069
1070 do {
1071 old = atomic_read(v);
1072 new = old ^ bits;
1073 } while (atomic_cmpxchg(v, old, new) != old);
1074 return new;
1075}
1076
Martin Schwidefsky3610cce2007-10-22 12:52:47 +02001077/*
1078 * page table entry allocation/free routines.
1079 */
Martin Schwidefskye5992f22011-07-24 10:48:20 +02001080unsigned long *page_table_alloc(struct mm_struct *mm, unsigned long vmaddr)
Martin Schwidefsky3610cce2007-10-22 12:52:47 +02001081{
Heiko Carstens41459d32012-09-14 11:09:52 +02001082 unsigned long *uninitialized_var(table);
1083 struct page *uninitialized_var(page);
Martin Schwidefsky36409f62011-06-06 14:14:41 +02001084 unsigned int mask, bit;
Martin Schwidefsky3610cce2007-10-22 12:52:47 +02001085
Martin Schwidefsky36409f62011-06-06 14:14:41 +02001086 if (mm_has_pgste(mm))
Martin Schwidefskye5992f22011-07-24 10:48:20 +02001087 return page_table_alloc_pgste(mm, vmaddr);
Martin Schwidefsky36409f62011-06-06 14:14:41 +02001088 /* Allocate fragments of a 4K page as 1K/2K page table */
Martin Schwidefsky80217142010-10-25 16:10:11 +02001089 spin_lock_bh(&mm->context.list_lock);
Martin Schwidefsky36409f62011-06-06 14:14:41 +02001090 mask = FRAG_MASK;
Martin Schwidefsky146e4b32008-02-09 18:24:35 +01001091 if (!list_empty(&mm->context.pgtable_list)) {
1092 page = list_first_entry(&mm->context.pgtable_list,
1093 struct page, lru);
Martin Schwidefsky36409f62011-06-06 14:14:41 +02001094 table = (unsigned long *) page_to_phys(page);
1095 mask = atomic_read(&page->_mapcount);
1096 mask = mask | (mask >> 4);
Martin Schwidefsky3610cce2007-10-22 12:52:47 +02001097 }
Martin Schwidefsky36409f62011-06-06 14:14:41 +02001098 if ((mask & FRAG_MASK) == FRAG_MASK) {
Martin Schwidefsky80217142010-10-25 16:10:11 +02001099 spin_unlock_bh(&mm->context.list_lock);
Martin Schwidefsky146e4b32008-02-09 18:24:35 +01001100 page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
1101 if (!page)
1102 return NULL;
Kirill A. Shutemove89cfa52013-11-14 14:31:39 -08001103 if (!pgtable_page_ctor(page)) {
1104 __free_page(page);
1105 return NULL;
1106 }
Martin Schwidefsky36409f62011-06-06 14:14:41 +02001107 atomic_set(&page->_mapcount, 1);
Martin Schwidefsky146e4b32008-02-09 18:24:35 +01001108 table = (unsigned long *) page_to_phys(page);
Martin Schwidefskye5098612013-07-23 20:57:57 +02001109 clear_table(table, _PAGE_INVALID, PAGE_SIZE);
Martin Schwidefsky80217142010-10-25 16:10:11 +02001110 spin_lock_bh(&mm->context.list_lock);
Martin Schwidefsky146e4b32008-02-09 18:24:35 +01001111 list_add(&page->lru, &mm->context.pgtable_list);
Martin Schwidefsky36409f62011-06-06 14:14:41 +02001112 } else {
1113 for (bit = 1; mask & bit; bit <<= 1)
1114 table += PTRS_PER_PTE;
1115 mask = atomic_xor_bits(&page->_mapcount, bit);
1116 if ((mask & FRAG_MASK) == FRAG_MASK)
1117 list_del(&page->lru);
Martin Schwidefsky146e4b32008-02-09 18:24:35 +01001118 }
Martin Schwidefsky80217142010-10-25 16:10:11 +02001119 spin_unlock_bh(&mm->context.list_lock);
Martin Schwidefsky3610cce2007-10-22 12:52:47 +02001120 return table;
1121}
1122
Martin Schwidefsky146e4b32008-02-09 18:24:35 +01001123void page_table_free(struct mm_struct *mm, unsigned long *table)
Martin Schwidefsky3610cce2007-10-22 12:52:47 +02001124{
Martin Schwidefsky146e4b32008-02-09 18:24:35 +01001125 struct page *page;
Martin Schwidefsky36409f62011-06-06 14:14:41 +02001126 unsigned int bit, mask;
Martin Schwidefsky3610cce2007-10-22 12:52:47 +02001127
Martin Schwidefsky3eabaee2013-07-26 15:04:02 +02001128 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
1129 if (page_table_with_pgste(page)) {
Martin Schwidefskyab8e5232013-04-16 13:37:46 +02001130 gmap_disconnect_pgtable(mm, table);
Martin Schwidefsky36409f62011-06-06 14:14:41 +02001131 return page_table_free_pgste(table);
Martin Schwidefskye5992f22011-07-24 10:48:20 +02001132 }
Martin Schwidefsky36409f62011-06-06 14:14:41 +02001133 /* Free 1K/2K page table fragment of a 4K page */
Martin Schwidefsky36409f62011-06-06 14:14:41 +02001134 bit = 1 << ((__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t)));
Martin Schwidefsky80217142010-10-25 16:10:11 +02001135 spin_lock_bh(&mm->context.list_lock);
Martin Schwidefsky36409f62011-06-06 14:14:41 +02001136 if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK)
Martin Schwidefsky146e4b32008-02-09 18:24:35 +01001137 list_del(&page->lru);
Martin Schwidefsky36409f62011-06-06 14:14:41 +02001138 mask = atomic_xor_bits(&page->_mapcount, bit);
1139 if (mask & FRAG_MASK)
1140 list_add(&page->lru, &mm->context.pgtable_list);
Martin Schwidefsky80217142010-10-25 16:10:11 +02001141 spin_unlock_bh(&mm->context.list_lock);
Martin Schwidefsky36409f62011-06-06 14:14:41 +02001142 if (mask == 0) {
Martin Schwidefsky146e4b32008-02-09 18:24:35 +01001143 pgtable_page_dtor(page);
Martin Schwidefsky36409f62011-06-06 14:14:41 +02001144 atomic_set(&page->_mapcount, -1);
Martin Schwidefsky146e4b32008-02-09 18:24:35 +01001145 __free_page(page);
1146 }
1147}
Martin Schwidefsky3610cce2007-10-22 12:52:47 +02001148
Martin Schwidefsky36409f62011-06-06 14:14:41 +02001149static void __page_table_free_rcu(void *table, unsigned bit)
1150{
1151 struct page *page;
1152
Martin Schwidefsky36409f62011-06-06 14:14:41 +02001153 if (bit == FRAG_MASK)
1154 return page_table_free_pgste(table);
Martin Schwidefsky36409f62011-06-06 14:14:41 +02001155 /* Free 1K/2K page table fragment of a 4K page */
1156 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
1157 if (atomic_xor_bits(&page->_mapcount, bit) == 0) {
1158 pgtable_page_dtor(page);
1159 atomic_set(&page->_mapcount, -1);
1160 __free_page(page);
Martin Schwidefsky80217142010-10-25 16:10:11 +02001161 }
Martin Schwidefsky36409f62011-06-06 14:14:41 +02001162}
1163
1164void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table)
1165{
1166 struct mm_struct *mm;
1167 struct page *page;
1168 unsigned int bit, mask;
1169
1170 mm = tlb->mm;
Martin Schwidefsky3eabaee2013-07-26 15:04:02 +02001171 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
1172 if (page_table_with_pgste(page)) {
Martin Schwidefskyab8e5232013-04-16 13:37:46 +02001173 gmap_disconnect_pgtable(mm, table);
Martin Schwidefsky36409f62011-06-06 14:14:41 +02001174 table = (unsigned long *) (__pa(table) | FRAG_MASK);
1175 tlb_remove_table(tlb, table);
1176 return;
Martin Schwidefsky80217142010-10-25 16:10:11 +02001177 }
Martin Schwidefsky36409f62011-06-06 14:14:41 +02001178 bit = 1 << ((__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t)));
Martin Schwidefsky80217142010-10-25 16:10:11 +02001179 spin_lock_bh(&mm->context.list_lock);
Martin Schwidefsky36409f62011-06-06 14:14:41 +02001180 if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK)
1181 list_del(&page->lru);
1182 mask = atomic_xor_bits(&page->_mapcount, bit | (bit << 4));
1183 if (mask & FRAG_MASK)
1184 list_add_tail(&page->lru, &mm->context.pgtable_list);
Martin Schwidefsky80217142010-10-25 16:10:11 +02001185 spin_unlock_bh(&mm->context.list_lock);
Martin Schwidefsky36409f62011-06-06 14:14:41 +02001186 table = (unsigned long *) (__pa(table) | (bit << 4));
1187 tlb_remove_table(tlb, table);
Martin Schwidefsky80217142010-10-25 16:10:11 +02001188}
1189
Heiko Carstens63df41d62013-09-06 19:10:48 +02001190static void __tlb_remove_table(void *_table)
Martin Schwidefsky36409f62011-06-06 14:14:41 +02001191{
Martin Schwidefskye73b7ff2011-10-30 15:16:08 +01001192 const unsigned long mask = (FRAG_MASK << 4) | FRAG_MASK;
1193 void *table = (void *)((unsigned long) _table & ~mask);
1194 unsigned type = (unsigned long) _table & mask;
Martin Schwidefsky36409f62011-06-06 14:14:41 +02001195
1196 if (type)
1197 __page_table_free_rcu(table, type);
1198 else
1199 free_pages((unsigned long) table, ALLOC_ORDER);
1200}
1201
Martin Schwidefskycd941542012-04-11 14:28:07 +02001202static void tlb_remove_table_smp_sync(void *arg)
1203{
1204 /* Simply deliver the interrupt */
1205}
1206
1207static void tlb_remove_table_one(void *table)
1208{
1209 /*
1210 * This isn't an RCU grace period and hence the page-tables cannot be
1211 * assumed to be actually RCU-freed.
1212 *
1213 * It is however sufficient for software page-table walkers that rely
1214 * on IRQ disabling. See the comment near struct mmu_table_batch.
1215 */
1216 smp_call_function(tlb_remove_table_smp_sync, NULL, 1);
1217 __tlb_remove_table(table);
1218}
1219
1220static void tlb_remove_table_rcu(struct rcu_head *head)
1221{
1222 struct mmu_table_batch *batch;
1223 int i;
1224
1225 batch = container_of(head, struct mmu_table_batch, rcu);
1226
1227 for (i = 0; i < batch->nr; i++)
1228 __tlb_remove_table(batch->tables[i]);
1229
1230 free_page((unsigned long)batch);
1231}
1232
1233void tlb_table_flush(struct mmu_gather *tlb)
1234{
1235 struct mmu_table_batch **batch = &tlb->batch;
1236
1237 if (*batch) {
Martin Schwidefskycd941542012-04-11 14:28:07 +02001238 call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu);
1239 *batch = NULL;
1240 }
1241}
1242
1243void tlb_remove_table(struct mmu_gather *tlb, void *table)
1244{
1245 struct mmu_table_batch **batch = &tlb->batch;
1246
Martin Schwidefsky5c474a12013-08-16 13:31:40 +02001247 tlb->mm->context.flush_mm = 1;
Martin Schwidefskycd941542012-04-11 14:28:07 +02001248 if (*batch == NULL) {
1249 *batch = (struct mmu_table_batch *)
1250 __get_free_page(GFP_NOWAIT | __GFP_NOWARN);
1251 if (*batch == NULL) {
Martin Schwidefsky5c474a12013-08-16 13:31:40 +02001252 __tlb_flush_mm_lazy(tlb->mm);
Martin Schwidefskycd941542012-04-11 14:28:07 +02001253 tlb_remove_table_one(table);
1254 return;
1255 }
1256 (*batch)->nr = 0;
1257 }
1258 (*batch)->tables[(*batch)->nr++] = table;
1259 if ((*batch)->nr == MAX_TABLE_BATCH)
Martin Schwidefsky5c474a12013-08-16 13:31:40 +02001260 tlb_flush_mmu(tlb);
Martin Schwidefskycd941542012-04-11 14:28:07 +02001261}
Martin Schwidefsky36409f62011-06-06 14:14:41 +02001262
Gerald Schaefer274023d2012-10-08 16:30:21 -07001263#ifdef CONFIG_TRANSPARENT_HUGEPAGE
Martin Schwidefsky3eabaee2013-07-26 15:04:02 +02001264static inline void thp_split_vma(struct vm_area_struct *vma)
Gerald Schaefer274023d2012-10-08 16:30:21 -07001265{
1266 unsigned long addr;
Gerald Schaefer274023d2012-10-08 16:30:21 -07001267
Martin Schwidefsky3eabaee2013-07-26 15:04:02 +02001268 for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE)
1269 follow_page(vma, addr, FOLL_SPLIT);
Gerald Schaefer274023d2012-10-08 16:30:21 -07001270}
1271
Martin Schwidefsky3eabaee2013-07-26 15:04:02 +02001272static inline void thp_split_mm(struct mm_struct *mm)
Gerald Schaefer274023d2012-10-08 16:30:21 -07001273{
Martin Schwidefsky3eabaee2013-07-26 15:04:02 +02001274 struct vm_area_struct *vma;
Gerald Schaefer274023d2012-10-08 16:30:21 -07001275
Martin Schwidefsky3eabaee2013-07-26 15:04:02 +02001276 for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) {
Gerald Schaefer274023d2012-10-08 16:30:21 -07001277 thp_split_vma(vma);
1278 vma->vm_flags &= ~VM_HUGEPAGE;
1279 vma->vm_flags |= VM_NOHUGEPAGE;
Gerald Schaefer274023d2012-10-08 16:30:21 -07001280 }
Martin Schwidefsky3eabaee2013-07-26 15:04:02 +02001281 mm->def_flags |= VM_NOHUGEPAGE;
1282}
1283#else
1284static inline void thp_split_mm(struct mm_struct *mm)
1285{
Gerald Schaefer274023d2012-10-08 16:30:21 -07001286}
1287#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
1288
Martin Schwidefsky3eabaee2013-07-26 15:04:02 +02001289static unsigned long page_table_realloc_pmd(struct mmu_gather *tlb,
1290 struct mm_struct *mm, pud_t *pud,
1291 unsigned long addr, unsigned long end)
1292{
1293 unsigned long next, *table, *new;
1294 struct page *page;
Christian Borntraeger55e42832014-07-25 14:23:29 +02001295 spinlock_t *ptl;
Martin Schwidefsky3eabaee2013-07-26 15:04:02 +02001296 pmd_t *pmd;
1297
1298 pmd = pmd_offset(pud, addr);
1299 do {
1300 next = pmd_addr_end(addr, end);
1301again:
1302 if (pmd_none_or_clear_bad(pmd))
1303 continue;
1304 table = (unsigned long *) pmd_deref(*pmd);
1305 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
1306 if (page_table_with_pgste(page))
1307 continue;
1308 /* Allocate new page table with pgstes */
1309 new = page_table_alloc_pgste(mm, addr);
Dominik Dingelbe39f192013-10-31 10:01:16 +01001310 if (!new)
1311 return -ENOMEM;
1312
Christian Borntraeger55e42832014-07-25 14:23:29 +02001313 ptl = pmd_lock(mm, pmd);
Martin Schwidefsky3eabaee2013-07-26 15:04:02 +02001314 if (likely((unsigned long *) pmd_deref(*pmd) == table)) {
1315 /* Nuke pmd entry pointing to the "short" page table */
1316 pmdp_flush_lazy(mm, addr, pmd);
1317 pmd_clear(pmd);
1318 /* Copy ptes from old table to new table */
1319 memcpy(new, table, PAGE_SIZE/2);
1320 clear_table(table, _PAGE_INVALID, PAGE_SIZE/2);
1321 /* Establish new table */
1322 pmd_populate(mm, pmd, (pte_t *) new);
1323 /* Free old table with rcu, there might be a walker! */
1324 page_table_free_rcu(tlb, table);
1325 new = NULL;
1326 }
Christian Borntraeger55e42832014-07-25 14:23:29 +02001327 spin_unlock(ptl);
Martin Schwidefsky3eabaee2013-07-26 15:04:02 +02001328 if (new) {
1329 page_table_free_pgste(new);
1330 goto again;
1331 }
1332 } while (pmd++, addr = next, addr != end);
1333
1334 return addr;
1335}
1336
1337static unsigned long page_table_realloc_pud(struct mmu_gather *tlb,
1338 struct mm_struct *mm, pgd_t *pgd,
1339 unsigned long addr, unsigned long end)
1340{
1341 unsigned long next;
1342 pud_t *pud;
1343
1344 pud = pud_offset(pgd, addr);
1345 do {
1346 next = pud_addr_end(addr, end);
1347 if (pud_none_or_clear_bad(pud))
1348 continue;
1349 next = page_table_realloc_pmd(tlb, mm, pud, addr, next);
Dominik Dingelbe39f192013-10-31 10:01:16 +01001350 if (unlikely(IS_ERR_VALUE(next)))
1351 return next;
Martin Schwidefsky3eabaee2013-07-26 15:04:02 +02001352 } while (pud++, addr = next, addr != end);
1353
1354 return addr;
1355}
1356
Dominik Dingelbe39f192013-10-31 10:01:16 +01001357static unsigned long page_table_realloc(struct mmu_gather *tlb, struct mm_struct *mm,
1358 unsigned long addr, unsigned long end)
Martin Schwidefsky3eabaee2013-07-26 15:04:02 +02001359{
1360 unsigned long next;
1361 pgd_t *pgd;
1362
1363 pgd = pgd_offset(mm, addr);
1364 do {
1365 next = pgd_addr_end(addr, end);
1366 if (pgd_none_or_clear_bad(pgd))
1367 continue;
1368 next = page_table_realloc_pud(tlb, mm, pgd, addr, next);
Dominik Dingelbe39f192013-10-31 10:01:16 +01001369 if (unlikely(IS_ERR_VALUE(next)))
1370 return next;
Martin Schwidefsky3eabaee2013-07-26 15:04:02 +02001371 } while (pgd++, addr = next, addr != end);
Dominik Dingelbe39f192013-10-31 10:01:16 +01001372
1373 return 0;
Martin Schwidefsky3eabaee2013-07-26 15:04:02 +02001374}
1375
Carsten Otte402b0862008-03-25 18:47:10 +01001376/*
1377 * switch on pgstes for its userspace process (for kvm)
1378 */
1379int s390_enable_sie(void)
1380{
1381 struct task_struct *tsk = current;
Martin Schwidefsky3eabaee2013-07-26 15:04:02 +02001382 struct mm_struct *mm = tsk->mm;
1383 struct mmu_gather tlb;
Carsten Otte402b0862008-03-25 18:47:10 +01001384
Christian Borntraeger74b6b522008-05-21 13:37:29 +02001385 /* Do we have pgstes? if yes, we are done */
Martin Schwidefsky36409f62011-06-06 14:14:41 +02001386 if (mm_has_pgste(tsk->mm))
Christian Borntraeger74b6b522008-05-21 13:37:29 +02001387 return 0;
Carsten Otte402b0862008-03-25 18:47:10 +01001388
Martin Schwidefsky3eabaee2013-07-26 15:04:02 +02001389 down_write(&mm->mmap_sem);
Gerald Schaefer274023d2012-10-08 16:30:21 -07001390 /* split thp mappings and disable thp for future mappings */
1391 thp_split_mm(mm);
Martin Schwidefsky3eabaee2013-07-26 15:04:02 +02001392 /* Reallocate the page tables with pgstes */
Linus Torvaldsae7a8352013-09-04 18:15:06 -07001393 tlb_gather_mmu(&tlb, mm, 0, TASK_SIZE);
Dominik Dingelbe39f192013-10-31 10:01:16 +01001394 if (!page_table_realloc(&tlb, mm, 0, TASK_SIZE))
1395 mm->context.has_pgste = 1;
Linus Torvaldsae7a8352013-09-04 18:15:06 -07001396 tlb_finish_mmu(&tlb, 0, TASK_SIZE);
Martin Schwidefsky3eabaee2013-07-26 15:04:02 +02001397 up_write(&mm->mmap_sem);
1398 return mm->context.has_pgste ? 0 : -ENOMEM;
Carsten Otte402b0862008-03-25 18:47:10 +01001399}
1400EXPORT_SYMBOL_GPL(s390_enable_sie);
Hans-Joachim Picht7db11a32009-06-16 10:30:26 +02001401
Dominik Dingel934bc132014-01-14 18:10:17 +01001402/*
1403 * Enable storage key handling from now on and initialize the storage
1404 * keys with the default key.
1405 */
1406void s390_enable_skey(void)
1407{
Dominik Dingel934bc132014-01-14 18:10:17 +01001408 page_table_reset_pgste(current->mm, 0, TASK_SIZE, true);
1409}
1410EXPORT_SYMBOL_GPL(s390_enable_skey);
1411
Dominik Dingela0bf4f12014-03-24 14:27:58 +01001412/*
1413 * Test and reset if a guest page is dirty
1414 */
1415bool gmap_test_and_clear_dirty(unsigned long address, struct gmap *gmap)
1416{
1417 pte_t *pte;
1418 spinlock_t *ptl;
1419 bool dirty = false;
1420
1421 pte = get_locked_pte(gmap->mm, address, &ptl);
1422 if (unlikely(!pte))
1423 return false;
1424
1425 if (ptep_test_and_clear_user_dirty(gmap->mm, address, pte))
1426 dirty = true;
1427
1428 spin_unlock(ptl);
1429 return dirty;
1430}
1431EXPORT_SYMBOL_GPL(gmap_test_and_clear_dirty);
1432
Gerald Schaefer75077af2012-10-08 16:30:15 -07001433#ifdef CONFIG_TRANSPARENT_HUGEPAGE
Gerald Schaefer1ae1c1d2012-10-08 16:30:24 -07001434int pmdp_clear_flush_young(struct vm_area_struct *vma, unsigned long address,
1435 pmd_t *pmdp)
1436{
1437 VM_BUG_ON(address & ~HPAGE_PMD_MASK);
1438 /* No need to flush TLB
1439 * On s390 reference bits are in storage key and never in TLB */
1440 return pmdp_test_and_clear_young(vma, address, pmdp);
1441}
1442
1443int pmdp_set_access_flags(struct vm_area_struct *vma,
1444 unsigned long address, pmd_t *pmdp,
1445 pmd_t entry, int dirty)
1446{
1447 VM_BUG_ON(address & ~HPAGE_PMD_MASK);
1448
Martin Schwidefsky152125b2014-07-24 11:03:41 +02001449 entry = pmd_mkyoung(entry);
1450 if (dirty)
1451 entry = pmd_mkdirty(entry);
Gerald Schaefer1ae1c1d2012-10-08 16:30:24 -07001452 if (pmd_same(*pmdp, entry))
1453 return 0;
1454 pmdp_invalidate(vma, address, pmdp);
1455 set_pmd_at(vma->vm_mm, address, pmdp, entry);
1456 return 1;
1457}
1458
Gerald Schaefer75077af2012-10-08 16:30:15 -07001459static void pmdp_splitting_flush_sync(void *arg)
1460{
1461 /* Simply deliver the interrupt */
1462}
1463
1464void pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long address,
1465 pmd_t *pmdp)
1466{
1467 VM_BUG_ON(address & ~HPAGE_PMD_MASK);
1468 if (!test_and_set_bit(_SEGMENT_ENTRY_SPLIT_BIT,
1469 (unsigned long *) pmdp)) {
1470 /* need to serialize against gup-fast (IRQ disabled) */
1471 smp_call_function(pmdp_splitting_flush_sync, NULL, 1);
1472 }
1473}
Gerald Schaefer9501d092012-10-08 16:30:18 -07001474
Aneesh Kumar K.V6b0b50b2013-06-05 17:14:02 -07001475void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
1476 pgtable_t pgtable)
Gerald Schaefer9501d092012-10-08 16:30:18 -07001477{
1478 struct list_head *lh = (struct list_head *) pgtable;
1479
Martin Schwidefskyec66ad62014-02-12 14:16:18 +01001480 assert_spin_locked(pmd_lockptr(mm, pmdp));
Gerald Schaefer9501d092012-10-08 16:30:18 -07001481
1482 /* FIFO */
Kirill A. Shutemovc389a252013-11-14 14:30:59 -08001483 if (!pmd_huge_pte(mm, pmdp))
Gerald Schaefer9501d092012-10-08 16:30:18 -07001484 INIT_LIST_HEAD(lh);
1485 else
Kirill A. Shutemovc389a252013-11-14 14:30:59 -08001486 list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp));
1487 pmd_huge_pte(mm, pmdp) = pgtable;
Gerald Schaefer9501d092012-10-08 16:30:18 -07001488}
1489
Aneesh Kumar K.V6b0b50b2013-06-05 17:14:02 -07001490pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
Gerald Schaefer9501d092012-10-08 16:30:18 -07001491{
1492 struct list_head *lh;
1493 pgtable_t pgtable;
1494 pte_t *ptep;
1495
Martin Schwidefskyec66ad62014-02-12 14:16:18 +01001496 assert_spin_locked(pmd_lockptr(mm, pmdp));
Gerald Schaefer9501d092012-10-08 16:30:18 -07001497
1498 /* FIFO */
Kirill A. Shutemovc389a252013-11-14 14:30:59 -08001499 pgtable = pmd_huge_pte(mm, pmdp);
Gerald Schaefer9501d092012-10-08 16:30:18 -07001500 lh = (struct list_head *) pgtable;
1501 if (list_empty(lh))
Kirill A. Shutemovc389a252013-11-14 14:30:59 -08001502 pmd_huge_pte(mm, pmdp) = NULL;
Gerald Schaefer9501d092012-10-08 16:30:18 -07001503 else {
Kirill A. Shutemovc389a252013-11-14 14:30:59 -08001504 pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next;
Gerald Schaefer9501d092012-10-08 16:30:18 -07001505 list_del(lh);
1506 }
1507 ptep = (pte_t *) pgtable;
Martin Schwidefskye5098612013-07-23 20:57:57 +02001508 pte_val(*ptep) = _PAGE_INVALID;
Gerald Schaefer9501d092012-10-08 16:30:18 -07001509 ptep++;
Martin Schwidefskye5098612013-07-23 20:57:57 +02001510 pte_val(*ptep) = _PAGE_INVALID;
Gerald Schaefer9501d092012-10-08 16:30:18 -07001511 return pgtable;
1512}
Gerald Schaefer75077af2012-10-08 16:30:15 -07001513#endif /* CONFIG_TRANSPARENT_HUGEPAGE */