blob: c7192f314f868eeb828738f04ef19c15f23cf2a1 [file] [log] [blame]
Jeremy Fitzhardinge1c5de192009-02-09 12:05:49 -08001/******************************************************************************
2 * privcmd.c
3 *
4 * Interface to privileged domain-0 commands.
5 *
6 * Copyright (c) 2002-2004, K A Fraser, B Dragovic
7 */
8
9#include <linux/kernel.h>
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/string.h>
13#include <linux/errno.h>
14#include <linux/mm.h>
15#include <linux/mman.h>
16#include <linux/uaccess.h>
17#include <linux/swap.h>
18#include <linux/smp_lock.h>
19#include <linux/highmem.h>
20#include <linux/pagemap.h>
21#include <linux/seq_file.h>
22
23#include <asm/pgalloc.h>
24#include <asm/pgtable.h>
25#include <asm/tlb.h>
26#include <asm/xen/hypervisor.h>
27#include <asm/xen/hypercall.h>
28
29#include <xen/xen.h>
30#include <xen/privcmd.h>
31#include <xen/interface/xen.h>
32#include <xen/features.h>
33#include <xen/page.h>
34
35#ifndef HAVE_ARCH_PRIVCMD_MMAP
36static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma);
37#endif
38
39struct remap_data {
40 unsigned long mfn;
41 unsigned domid;
42 pgprot_t prot;
43};
44
45static int remap_area_mfn_pte_fn(pte_t *ptep, pgtable_t token,
46 unsigned long addr, void *data)
47{
48 struct remap_data *rmd = data;
49 pte_t pte = pte_mkspecial(pfn_pte(rmd->mfn++, rmd->prot));
50
51 xen_set_domain_pte(ptep, pte, rmd->domid);
52
53 return 0;
54}
55
56int remap_domain_mfn_range(struct vm_area_struct *vma, unsigned long addr,
57 unsigned long mfn, unsigned long size,
58 pgprot_t prot, unsigned domid)
59{
60 struct remap_data rmd;
61 int err;
62
63 prot = __pgprot(pgprot_val(prot) | _PAGE_IOMAP);
64
65 vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP;
66
67 rmd.mfn = mfn;
68 rmd.prot = prot;
69 rmd.domid = domid;
70
71 err = apply_to_page_range(vma->vm_mm, addr, size,
72 remap_area_mfn_pte_fn, &rmd);
73
74 return err;
75}
76
77static long privcmd_ioctl_hypercall(void __user *udata)
78{
79 struct privcmd_hypercall hypercall;
80 long ret;
81
82 if (copy_from_user(&hypercall, udata, sizeof(hypercall)))
83 return -EFAULT;
84
85 ret = privcmd_call(hypercall.op,
86 hypercall.arg[0], hypercall.arg[1],
87 hypercall.arg[2], hypercall.arg[3],
88 hypercall.arg[4]);
89
90 return ret;
91}
92
93static void free_page_list(struct list_head *pages)
94{
95 struct page *p, *n;
96
97 list_for_each_entry_safe(p, n, pages, lru)
98 __free_page(p);
99
100 INIT_LIST_HEAD(pages);
101}
102
103/*
104 * Given an array of items in userspace, return a list of pages
105 * containing the data. If copying fails, either because of memory
106 * allocation failure or a problem reading user memory, return an
107 * error code; its up to the caller to dispose of any partial list.
108 */
109static int gather_array(struct list_head *pagelist,
110 unsigned nelem, size_t size,
111 void __user *data)
112{
113 unsigned pageidx;
114 void *pagedata;
115 int ret;
116
117 if (size > PAGE_SIZE)
118 return 0;
119
120 pageidx = PAGE_SIZE;
121 pagedata = NULL; /* quiet, gcc */
122 while (nelem--) {
123 if (pageidx > PAGE_SIZE-size) {
124 struct page *page = alloc_page(GFP_KERNEL);
125
126 ret = -ENOMEM;
127 if (page == NULL)
128 goto fail;
129
130 pagedata = page_address(page);
131
132 list_add_tail(&page->lru, pagelist);
133 pageidx = 0;
134 }
135
136 ret = -EFAULT;
137 if (copy_from_user(pagedata + pageidx, data, size))
138 goto fail;
139
140 data += size;
141 pageidx += size;
142 }
143
144 ret = 0;
145
146fail:
147 return ret;
148}
149
150/*
151 * Call function "fn" on each element of the array fragmented
152 * over a list of pages.
153 */
154static int traverse_pages(unsigned nelem, size_t size,
155 struct list_head *pos,
156 int (*fn)(void *data, void *state),
157 void *state)
158{
159 void *pagedata;
160 unsigned pageidx;
161 int ret;
162
163 BUG_ON(size > PAGE_SIZE);
164
165 pageidx = PAGE_SIZE;
166 pagedata = NULL; /* hush, gcc */
167
168 while (nelem--) {
169 if (pageidx > PAGE_SIZE-size) {
170 struct page *page;
171 pos = pos->next;
172 page = list_entry(pos, struct page, lru);
173 pagedata = page_address(page);
174 pageidx = 0;
175 }
176
177 ret = (*fn)(pagedata + pageidx, state);
178 if (ret)
179 break;
180 pageidx += size;
181 }
182
183 return ret;
184}
185
186struct mmap_mfn_state {
187 unsigned long va;
188 struct vm_area_struct *vma;
189 domid_t domain;
190};
191
192static int mmap_mfn_range(void *data, void *state)
193{
194 struct privcmd_mmap_entry *msg = data;
195 struct mmap_mfn_state *st = state;
196 struct vm_area_struct *vma = st->vma;
197 int rc;
198
199 /* Do not allow range to wrap the address space. */
200 if ((msg->npages > (LONG_MAX >> PAGE_SHIFT)) ||
201 ((unsigned long)(msg->npages << PAGE_SHIFT) >= -st->va))
202 return -EINVAL;
203
204 /* Range chunks must be contiguous in va space. */
205 if ((msg->va != st->va) ||
206 ((msg->va+(msg->npages<<PAGE_SHIFT)) > vma->vm_end))
207 return -EINVAL;
208
209 rc = remap_domain_mfn_range(vma,
210 msg->va & PAGE_MASK,
211 msg->mfn,
212 msg->npages << PAGE_SHIFT,
213 vma->vm_page_prot,
214 st->domain);
215 if (rc < 0)
216 return rc;
217
218 st->va += msg->npages << PAGE_SHIFT;
219
220 return 0;
221}
222
223static long privcmd_ioctl_mmap(void __user *udata)
224{
225 struct privcmd_mmap mmapcmd;
226 struct mm_struct *mm = current->mm;
227 struct vm_area_struct *vma;
228 int rc;
229 LIST_HEAD(pagelist);
230 struct mmap_mfn_state state;
231
232 if (!xen_initial_domain())
233 return -EPERM;
234
235 if (copy_from_user(&mmapcmd, udata, sizeof(mmapcmd)))
236 return -EFAULT;
237
238 rc = gather_array(&pagelist,
239 mmapcmd.num, sizeof(struct privcmd_mmap_entry),
240 mmapcmd.entry);
241
242 if (rc || list_empty(&pagelist))
243 goto out;
244
245 down_write(&mm->mmap_sem);
246
247 {
248 struct page *page = list_first_entry(&pagelist,
249 struct page, lru);
250 struct privcmd_mmap_entry *msg = page_address(page);
251
252 vma = find_vma(mm, msg->va);
253 rc = -EINVAL;
254
255 if (!vma || (msg->va != vma->vm_start) ||
256 !privcmd_enforce_singleshot_mapping(vma))
257 goto out_up;
258 }
259
260 state.va = vma->vm_start;
261 state.vma = vma;
262 state.domain = mmapcmd.dom;
263
264 rc = traverse_pages(mmapcmd.num, sizeof(struct privcmd_mmap_entry),
265 &pagelist,
266 mmap_mfn_range, &state);
267
268
269out_up:
270 up_write(&mm->mmap_sem);
271
272out:
273 free_page_list(&pagelist);
274
275 return rc;
276}
277
278struct mmap_batch_state {
279 domid_t domain;
280 unsigned long va;
281 struct vm_area_struct *vma;
282 int err;
283
284 xen_pfn_t __user *user;
285};
286
287static int mmap_batch_fn(void *data, void *state)
288{
289 xen_pfn_t *mfnp = data;
290 struct mmap_batch_state *st = state;
291
292 if (remap_domain_mfn_range(st->vma, st->va & PAGE_MASK,
293 *mfnp, PAGE_SIZE,
294 st->vma->vm_page_prot, st->domain) < 0) {
295 *mfnp |= 0xf0000000U;
296 st->err++;
297 }
298 st->va += PAGE_SIZE;
299
300 return 0;
301}
302
303static int mmap_return_errors(void *data, void *state)
304{
305 xen_pfn_t *mfnp = data;
306 struct mmap_batch_state *st = state;
307
308 put_user(*mfnp, st->user++);
309
310 return 0;
311}
312
313static long privcmd_ioctl_mmap_batch(void __user *udata)
314{
315 int ret;
316 struct privcmd_mmapbatch m;
317 struct mm_struct *mm = current->mm;
318 struct vm_area_struct *vma;
319 unsigned long nr_pages;
320 LIST_HEAD(pagelist);
321 struct mmap_batch_state state;
322
323 if (!xen_initial_domain())
324 return -EPERM;
325
326 if (copy_from_user(&m, udata, sizeof(m)))
327 return -EFAULT;
328
329 nr_pages = m.num;
330 if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT)))
331 return -EINVAL;
332
333 ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t),
334 m.arr);
335
336 if (ret || list_empty(&pagelist))
337 goto out;
338
339 down_write(&mm->mmap_sem);
340
341 vma = find_vma(mm, m.addr);
342 ret = -EINVAL;
343 if (!vma ||
344 (m.addr != vma->vm_start) ||
345 ((m.addr + (nr_pages << PAGE_SHIFT)) != vma->vm_end) ||
346 !privcmd_enforce_singleshot_mapping(vma)) {
347 up_write(&mm->mmap_sem);
348 goto out;
349 }
350
351 state.domain = m.dom;
352 state.vma = vma;
353 state.va = m.addr;
354 state.err = 0;
355
356 ret = traverse_pages(m.num, sizeof(xen_pfn_t),
357 &pagelist, mmap_batch_fn, &state);
358
359 up_write(&mm->mmap_sem);
360
361 if (state.err > 0) {
362 ret = state.err;
363
364 state.user = udata;
365 traverse_pages(m.num, sizeof(xen_pfn_t),
366 &pagelist,
367 mmap_return_errors, &state);
368 }
369
370out:
371 free_page_list(&pagelist);
372
373 return ret;
374}
375
376static long privcmd_ioctl(struct file *file,
377 unsigned int cmd, unsigned long data)
378{
379 int ret = -ENOSYS;
380 void __user *udata = (void __user *) data;
381
382 switch (cmd) {
383 case IOCTL_PRIVCMD_HYPERCALL:
384 ret = privcmd_ioctl_hypercall(udata);
385 break;
386
387 case IOCTL_PRIVCMD_MMAP:
388 ret = privcmd_ioctl_mmap(udata);
389 break;
390
391 case IOCTL_PRIVCMD_MMAPBATCH:
392 ret = privcmd_ioctl_mmap_batch(udata);
393 break;
394
395 default:
396 ret = -EINVAL;
397 break;
398 }
399
400 return ret;
401}
402
403#ifndef HAVE_ARCH_PRIVCMD_MMAP
404static int privcmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
405{
406 return VM_FAULT_SIGBUS;
407}
408
409static struct vm_operations_struct privcmd_vm_ops = {
410 .fault = privcmd_fault
411};
412
413static int privcmd_mmap(struct file *file, struct vm_area_struct *vma)
414{
415 /* Unsupported for auto-translate guests. */
416 if (xen_feature(XENFEAT_auto_translated_physmap))
417 return -ENOSYS;
418
419 /* DONTCOPY is essential for Xen as copy_page_range is broken. */
420 vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY;
421 vma->vm_ops = &privcmd_vm_ops;
422 vma->vm_private_data = NULL;
423
424 return 0;
425}
426
427static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma)
428{
429 return (xchg(&vma->vm_private_data, (void *)1) == NULL);
430}
431#endif
432
433const struct file_operations privcmd_file_ops = {
434 .unlocked_ioctl = privcmd_ioctl,
435 .mmap = privcmd_mmap,
436};