blob: fd072013f88ce11658adab8b7db4818a6479f50b [file] [log] [blame]
Martin Schwidefsky3610cce2007-10-22 12:52:47 +02001/*
2 * arch/s390/mm/pgtable.c
3 *
4 * Copyright IBM Corp. 2007
5 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
6 */
7
8#include <linux/sched.h>
9#include <linux/kernel.h>
10#include <linux/errno.h>
11#include <linux/mm.h>
12#include <linux/swap.h>
13#include <linux/smp.h>
14#include <linux/highmem.h>
15#include <linux/slab.h>
16#include <linux/pagemap.h>
17#include <linux/spinlock.h>
18#include <linux/module.h>
19#include <linux/quicklist.h>
20
21#include <asm/system.h>
22#include <asm/pgtable.h>
23#include <asm/pgalloc.h>
24#include <asm/tlb.h>
25#include <asm/tlbflush.h>
Martin Schwidefsky6252d702008-02-09 18:24:37 +010026#include <asm/mmu_context.h>
Martin Schwidefsky3610cce2007-10-22 12:52:47 +020027
28#ifndef CONFIG_64BIT
29#define ALLOC_ORDER 1
Martin Schwidefsky146e4b32008-02-09 18:24:35 +010030#define TABLES_PER_PAGE 4
31#define FRAG_MASK 15UL
32#define SECOND_HALVES 10UL
Martin Schwidefsky3610cce2007-10-22 12:52:47 +020033#else
34#define ALLOC_ORDER 2
Martin Schwidefsky146e4b32008-02-09 18:24:35 +010035#define TABLES_PER_PAGE 2
36#define FRAG_MASK 3UL
37#define SECOND_HALVES 2UL
Martin Schwidefsky3610cce2007-10-22 12:52:47 +020038#endif
39
40unsigned long *crst_table_alloc(struct mm_struct *mm, int noexec)
41{
42 struct page *page = alloc_pages(GFP_KERNEL, ALLOC_ORDER);
43
44 if (!page)
45 return NULL;
46 page->index = 0;
47 if (noexec) {
48 struct page *shadow = alloc_pages(GFP_KERNEL, ALLOC_ORDER);
49 if (!shadow) {
50 __free_pages(page, ALLOC_ORDER);
51 return NULL;
52 }
53 page->index = page_to_phys(shadow);
54 }
Martin Schwidefsky146e4b32008-02-09 18:24:35 +010055 spin_lock(&mm->page_table_lock);
56 list_add(&page->lru, &mm->context.crst_list);
57 spin_unlock(&mm->page_table_lock);
Martin Schwidefsky3610cce2007-10-22 12:52:47 +020058 return (unsigned long *) page_to_phys(page);
59}
60
Martin Schwidefsky146e4b32008-02-09 18:24:35 +010061void crst_table_free(struct mm_struct *mm, unsigned long *table)
Martin Schwidefsky3610cce2007-10-22 12:52:47 +020062{
63 unsigned long *shadow = get_shadow_table(table);
Martin Schwidefsky146e4b32008-02-09 18:24:35 +010064 struct page *page = virt_to_page(table);
Martin Schwidefsky3610cce2007-10-22 12:52:47 +020065
Martin Schwidefsky146e4b32008-02-09 18:24:35 +010066 spin_lock(&mm->page_table_lock);
67 list_del(&page->lru);
68 spin_unlock(&mm->page_table_lock);
Martin Schwidefsky3610cce2007-10-22 12:52:47 +020069 if (shadow)
70 free_pages((unsigned long) shadow, ALLOC_ORDER);
71 free_pages((unsigned long) table, ALLOC_ORDER);
72}
73
Martin Schwidefsky6252d702008-02-09 18:24:37 +010074#ifdef CONFIG_64BIT
75int crst_table_upgrade(struct mm_struct *mm, unsigned long limit)
76{
77 unsigned long *table, *pgd;
78 unsigned long entry;
79
80 BUG_ON(limit > (1UL << 53));
81repeat:
82 table = crst_table_alloc(mm, mm->context.noexec);
83 if (!table)
84 return -ENOMEM;
85 spin_lock(&mm->page_table_lock);
86 if (mm->context.asce_limit < limit) {
87 pgd = (unsigned long *) mm->pgd;
88 if (mm->context.asce_limit <= (1UL << 31)) {
89 entry = _REGION3_ENTRY_EMPTY;
90 mm->context.asce_limit = 1UL << 42;
91 mm->context.asce_bits = _ASCE_TABLE_LENGTH |
92 _ASCE_USER_BITS |
93 _ASCE_TYPE_REGION3;
94 } else {
95 entry = _REGION2_ENTRY_EMPTY;
96 mm->context.asce_limit = 1UL << 53;
97 mm->context.asce_bits = _ASCE_TABLE_LENGTH |
98 _ASCE_USER_BITS |
99 _ASCE_TYPE_REGION2;
100 }
101 crst_table_init(table, entry);
102 pgd_populate(mm, (pgd_t *) table, (pud_t *) pgd);
103 mm->pgd = (pgd_t *) table;
104 table = NULL;
105 }
106 spin_unlock(&mm->page_table_lock);
107 if (table)
108 crst_table_free(mm, table);
109 if (mm->context.asce_limit < limit)
110 goto repeat;
111 update_mm(mm, current);
112 return 0;
113}
114
115void crst_table_downgrade(struct mm_struct *mm, unsigned long limit)
116{
117 pgd_t *pgd;
118
119 if (mm->context.asce_limit <= limit)
120 return;
121 __tlb_flush_mm(mm);
122 while (mm->context.asce_limit > limit) {
123 pgd = mm->pgd;
124 switch (pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) {
125 case _REGION_ENTRY_TYPE_R2:
126 mm->context.asce_limit = 1UL << 42;
127 mm->context.asce_bits = _ASCE_TABLE_LENGTH |
128 _ASCE_USER_BITS |
129 _ASCE_TYPE_REGION3;
130 break;
131 case _REGION_ENTRY_TYPE_R3:
132 mm->context.asce_limit = 1UL << 31;
133 mm->context.asce_bits = _ASCE_TABLE_LENGTH |
134 _ASCE_USER_BITS |
135 _ASCE_TYPE_SEGMENT;
136 break;
137 default:
138 BUG();
139 }
140 mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN);
141 crst_table_free(mm, (unsigned long *) pgd);
142 }
143 update_mm(mm, current);
144}
145#endif
146
Martin Schwidefsky3610cce2007-10-22 12:52:47 +0200147/*
148 * page table entry allocation/free routines.
149 */
Martin Schwidefsky146e4b32008-02-09 18:24:35 +0100150unsigned long *page_table_alloc(struct mm_struct *mm)
Martin Schwidefsky3610cce2007-10-22 12:52:47 +0200151{
Martin Schwidefsky146e4b32008-02-09 18:24:35 +0100152 struct page *page;
Martin Schwidefsky3610cce2007-10-22 12:52:47 +0200153 unsigned long *table;
Martin Schwidefsky146e4b32008-02-09 18:24:35 +0100154 unsigned long bits;
Martin Schwidefsky3610cce2007-10-22 12:52:47 +0200155
Martin Schwidefsky146e4b32008-02-09 18:24:35 +0100156 bits = mm->context.noexec ? 3UL : 1UL;
157 spin_lock(&mm->page_table_lock);
158 page = NULL;
159 if (!list_empty(&mm->context.pgtable_list)) {
160 page = list_first_entry(&mm->context.pgtable_list,
161 struct page, lru);
162 if ((page->flags & FRAG_MASK) == ((1UL << TABLES_PER_PAGE) - 1))
163 page = NULL;
Martin Schwidefsky3610cce2007-10-22 12:52:47 +0200164 }
Martin Schwidefsky146e4b32008-02-09 18:24:35 +0100165 if (!page) {
166 spin_unlock(&mm->page_table_lock);
167 page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
168 if (!page)
169 return NULL;
170 pgtable_page_ctor(page);
171 page->flags &= ~FRAG_MASK;
172 table = (unsigned long *) page_to_phys(page);
173 clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE);
174 spin_lock(&mm->page_table_lock);
175 list_add(&page->lru, &mm->context.pgtable_list);
176 }
Martin Schwidefsky3610cce2007-10-22 12:52:47 +0200177 table = (unsigned long *) page_to_phys(page);
Martin Schwidefsky146e4b32008-02-09 18:24:35 +0100178 while (page->flags & bits) {
179 table += 256;
180 bits <<= 1;
181 }
182 page->flags |= bits;
183 if ((page->flags & FRAG_MASK) == ((1UL << TABLES_PER_PAGE) - 1))
184 list_move_tail(&page->lru, &mm->context.pgtable_list);
185 spin_unlock(&mm->page_table_lock);
Martin Schwidefsky3610cce2007-10-22 12:52:47 +0200186 return table;
187}
188
Martin Schwidefsky146e4b32008-02-09 18:24:35 +0100189void page_table_free(struct mm_struct *mm, unsigned long *table)
Martin Schwidefsky3610cce2007-10-22 12:52:47 +0200190{
Martin Schwidefsky146e4b32008-02-09 18:24:35 +0100191 struct page *page;
192 unsigned long bits;
Martin Schwidefsky3610cce2007-10-22 12:52:47 +0200193
Martin Schwidefsky146e4b32008-02-09 18:24:35 +0100194 bits = mm->context.noexec ? 3UL : 1UL;
195 bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long);
196 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
197 spin_lock(&mm->page_table_lock);
198 page->flags ^= bits;
199 if (page->flags & FRAG_MASK) {
200 /* Page now has some free pgtable fragments. */
201 list_move(&page->lru, &mm->context.pgtable_list);
202 page = NULL;
203 } else
204 /* All fragments of the 4K page have been freed. */
205 list_del(&page->lru);
206 spin_unlock(&mm->page_table_lock);
207 if (page) {
208 pgtable_page_dtor(page);
209 __free_page(page);
210 }
211}
Martin Schwidefsky3610cce2007-10-22 12:52:47 +0200212
Martin Schwidefsky146e4b32008-02-09 18:24:35 +0100213void disable_noexec(struct mm_struct *mm, struct task_struct *tsk)
214{
215 struct page *page;
216
217 spin_lock(&mm->page_table_lock);
218 /* Free shadow region and segment tables. */
219 list_for_each_entry(page, &mm->context.crst_list, lru)
220 if (page->index) {
221 free_pages((unsigned long) page->index, ALLOC_ORDER);
222 page->index = 0;
223 }
224 /* "Free" second halves of page tables. */
225 list_for_each_entry(page, &mm->context.pgtable_list, lru)
226 page->flags &= ~SECOND_HALVES;
227 spin_unlock(&mm->page_table_lock);
228 mm->context.noexec = 0;
229 update_mm(mm, tsk);
Martin Schwidefsky3610cce2007-10-22 12:52:47 +0200230}