blob: 80526afd3063275a185f0b136e0288aafa5c400c [file] [log] [blame]
Jeremy Fitzhardinge1c5de192009-02-09 12:05:49 -08001/******************************************************************************
2 * privcmd.c
3 *
4 * Interface to privileged domain-0 commands.
5 *
6 * Copyright (c) 2002-2004, K A Fraser, B Dragovic
7 */
8
9#include <linux/kernel.h>
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/string.h>
13#include <linux/errno.h>
14#include <linux/mm.h>
15#include <linux/mman.h>
16#include <linux/uaccess.h>
17#include <linux/swap.h>
18#include <linux/smp_lock.h>
19#include <linux/highmem.h>
20#include <linux/pagemap.h>
21#include <linux/seq_file.h>
22
23#include <asm/pgalloc.h>
24#include <asm/pgtable.h>
25#include <asm/tlb.h>
26#include <asm/xen/hypervisor.h>
27#include <asm/xen/hypercall.h>
28
29#include <xen/xen.h>
30#include <xen/privcmd.h>
31#include <xen/interface/xen.h>
32#include <xen/features.h>
33#include <xen/page.h>
34
35#ifndef HAVE_ARCH_PRIVCMD_MMAP
36static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma);
37#endif
38
39struct remap_data {
40 unsigned long mfn;
41 unsigned domid;
42 pgprot_t prot;
43};
44
45static int remap_area_mfn_pte_fn(pte_t *ptep, pgtable_t token,
46 unsigned long addr, void *data)
47{
48 struct remap_data *rmd = data;
49 pte_t pte = pte_mkspecial(pfn_pte(rmd->mfn++, rmd->prot));
50
51 xen_set_domain_pte(ptep, pte, rmd->domid);
52
53 return 0;
54}
55
56int remap_domain_mfn_range(struct vm_area_struct *vma, unsigned long addr,
57 unsigned long mfn, unsigned long size,
58 pgprot_t prot, unsigned domid)
59{
60 struct remap_data rmd;
61 int err;
62
63 prot = __pgprot(pgprot_val(prot) | _PAGE_IOMAP);
64
65 vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP;
66
67 rmd.mfn = mfn;
68 rmd.prot = prot;
69 rmd.domid = domid;
70
71 err = apply_to_page_range(vma->vm_mm, addr, size,
72 remap_area_mfn_pte_fn, &rmd);
73
74 return err;
75}
76
77static long privcmd_ioctl_hypercall(void __user *udata)
78{
79 struct privcmd_hypercall hypercall;
80 long ret;
81
82 if (copy_from_user(&hypercall, udata, sizeof(hypercall)))
83 return -EFAULT;
84
85 ret = privcmd_call(hypercall.op,
86 hypercall.arg[0], hypercall.arg[1],
87 hypercall.arg[2], hypercall.arg[3],
88 hypercall.arg[4]);
89
90 return ret;
91}
92
93static void free_page_list(struct list_head *pages)
94{
95 struct page *p, *n;
96
97 list_for_each_entry_safe(p, n, pages, lru)
98 __free_page(p);
99
100 INIT_LIST_HEAD(pages);
101}
102
103/*
104 * Given an array of items in userspace, return a list of pages
105 * containing the data. If copying fails, either because of memory
106 * allocation failure or a problem reading user memory, return an
107 * error code; its up to the caller to dispose of any partial list.
108 */
109static int gather_array(struct list_head *pagelist,
110 unsigned nelem, size_t size,
111 void __user *data)
112{
113 unsigned pageidx;
114 void *pagedata;
115 int ret;
116
117 if (size > PAGE_SIZE)
118 return 0;
119
120 pageidx = PAGE_SIZE;
121 pagedata = NULL; /* quiet, gcc */
122 while (nelem--) {
123 if (pageidx > PAGE_SIZE-size) {
124 struct page *page = alloc_page(GFP_KERNEL);
125
126 ret = -ENOMEM;
127 if (page == NULL)
128 goto fail;
129
130 pagedata = page_address(page);
131
132 list_add_tail(&page->lru, pagelist);
133 pageidx = 0;
134 }
135
136 ret = -EFAULT;
137 if (copy_from_user(pagedata + pageidx, data, size))
138 goto fail;
139
140 data += size;
141 pageidx += size;
142 }
143
144 ret = 0;
145
146fail:
147 return ret;
148}
149
150/*
151 * Call function "fn" on each element of the array fragmented
152 * over a list of pages.
153 */
154static int traverse_pages(unsigned nelem, size_t size,
155 struct list_head *pos,
156 int (*fn)(void *data, void *state),
157 void *state)
158{
159 void *pagedata;
160 unsigned pageidx;
161 int ret;
162
163 BUG_ON(size > PAGE_SIZE);
164
165 pageidx = PAGE_SIZE;
166 pagedata = NULL; /* hush, gcc */
167
168 while (nelem--) {
169 if (pageidx > PAGE_SIZE-size) {
170 struct page *page;
171 pos = pos->next;
172 page = list_entry(pos, struct page, lru);
173 pagedata = page_address(page);
174 pageidx = 0;
175 }
176
177 ret = (*fn)(pagedata + pageidx, state);
178 if (ret)
179 break;
180 pageidx += size;
181 }
182
183 return ret;
184}
185
186struct mmap_mfn_state {
187 unsigned long va;
188 struct vm_area_struct *vma;
189 domid_t domain;
190};
191
192static int mmap_mfn_range(void *data, void *state)
193{
194 struct privcmd_mmap_entry *msg = data;
195 struct mmap_mfn_state *st = state;
196 struct vm_area_struct *vma = st->vma;
197 int rc;
198
199 /* Do not allow range to wrap the address space. */
200 if ((msg->npages > (LONG_MAX >> PAGE_SHIFT)) ||
201 ((unsigned long)(msg->npages << PAGE_SHIFT) >= -st->va))
202 return -EINVAL;
203
204 /* Range chunks must be contiguous in va space. */
205 if ((msg->va != st->va) ||
206 ((msg->va+(msg->npages<<PAGE_SHIFT)) > vma->vm_end))
207 return -EINVAL;
208
209 rc = remap_domain_mfn_range(vma,
210 msg->va & PAGE_MASK,
211 msg->mfn,
212 msg->npages << PAGE_SHIFT,
213 vma->vm_page_prot,
214 st->domain);
215 if (rc < 0)
216 return rc;
217
218 st->va += msg->npages << PAGE_SHIFT;
219
220 return 0;
221}
222
223static long privcmd_ioctl_mmap(void __user *udata)
224{
225 struct privcmd_mmap mmapcmd;
226 struct mm_struct *mm = current->mm;
227 struct vm_area_struct *vma;
228 int rc;
229 LIST_HEAD(pagelist);
230 struct mmap_mfn_state state;
231
232 if (!xen_initial_domain())
233 return -EPERM;
234
235 if (copy_from_user(&mmapcmd, udata, sizeof(mmapcmd)))
236 return -EFAULT;
237
238 rc = gather_array(&pagelist,
239 mmapcmd.num, sizeof(struct privcmd_mmap_entry),
240 mmapcmd.entry);
241
242 if (rc || list_empty(&pagelist))
243 goto out;
244
245 down_write(&mm->mmap_sem);
246
247 {
248 struct page *page = list_first_entry(&pagelist,
249 struct page, lru);
250 struct privcmd_mmap_entry *msg = page_address(page);
251
252 vma = find_vma(mm, msg->va);
253 rc = -EINVAL;
254
255 if (!vma || (msg->va != vma->vm_start) ||
256 !privcmd_enforce_singleshot_mapping(vma))
257 goto out_up;
258 }
259
260 state.va = vma->vm_start;
261 state.vma = vma;
262 state.domain = mmapcmd.dom;
263
264 rc = traverse_pages(mmapcmd.num, sizeof(struct privcmd_mmap_entry),
265 &pagelist,
266 mmap_mfn_range, &state);
267
268
269out_up:
270 up_write(&mm->mmap_sem);
271
272out:
273 free_page_list(&pagelist);
274
275 return rc;
276}
277
278struct mmap_batch_state {
279 domid_t domain;
280 unsigned long va;
281 struct vm_area_struct *vma;
282 int err;
283
284 xen_pfn_t __user *user;
285};
286
287static int mmap_batch_fn(void *data, void *state)
288{
289 xen_pfn_t *mfnp = data;
290 struct mmap_batch_state *st = state;
291
292 if (remap_domain_mfn_range(st->vma, st->va & PAGE_MASK,
293 *mfnp, PAGE_SIZE,
294 st->vma->vm_page_prot, st->domain) < 0) {
295 *mfnp |= 0xf0000000U;
296 st->err++;
297 }
298 st->va += PAGE_SIZE;
299
300 return 0;
301}
302
303static int mmap_return_errors(void *data, void *state)
304{
305 xen_pfn_t *mfnp = data;
306 struct mmap_batch_state *st = state;
307
308 put_user(*mfnp, st->user++);
309
310 return 0;
311}
312
Jeremy Fitzhardingef31fdf52009-03-08 04:10:00 -0700313static struct vm_operations_struct privcmd_vm_ops;
314
Jeremy Fitzhardinge1c5de192009-02-09 12:05:49 -0800315static long privcmd_ioctl_mmap_batch(void __user *udata)
316{
317 int ret;
318 struct privcmd_mmapbatch m;
319 struct mm_struct *mm = current->mm;
320 struct vm_area_struct *vma;
321 unsigned long nr_pages;
322 LIST_HEAD(pagelist);
323 struct mmap_batch_state state;
324
325 if (!xen_initial_domain())
326 return -EPERM;
327
328 if (copy_from_user(&m, udata, sizeof(m)))
329 return -EFAULT;
330
331 nr_pages = m.num;
332 if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT)))
333 return -EINVAL;
334
335 ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t),
336 m.arr);
337
338 if (ret || list_empty(&pagelist))
339 goto out;
340
341 down_write(&mm->mmap_sem);
342
343 vma = find_vma(mm, m.addr);
344 ret = -EINVAL;
345 if (!vma ||
Jeremy Fitzhardingef31fdf52009-03-08 04:10:00 -0700346 vma->vm_ops != &privcmd_vm_ops ||
Jeremy Fitzhardinge1c5de192009-02-09 12:05:49 -0800347 (m.addr != vma->vm_start) ||
348 ((m.addr + (nr_pages << PAGE_SHIFT)) != vma->vm_end) ||
349 !privcmd_enforce_singleshot_mapping(vma)) {
350 up_write(&mm->mmap_sem);
351 goto out;
352 }
353
354 state.domain = m.dom;
355 state.vma = vma;
356 state.va = m.addr;
357 state.err = 0;
358
359 ret = traverse_pages(m.num, sizeof(xen_pfn_t),
360 &pagelist, mmap_batch_fn, &state);
361
362 up_write(&mm->mmap_sem);
363
364 if (state.err > 0) {
365 ret = state.err;
366
367 state.user = udata;
368 traverse_pages(m.num, sizeof(xen_pfn_t),
369 &pagelist,
370 mmap_return_errors, &state);
371 }
372
373out:
374 free_page_list(&pagelist);
375
376 return ret;
377}
378
379static long privcmd_ioctl(struct file *file,
380 unsigned int cmd, unsigned long data)
381{
382 int ret = -ENOSYS;
383 void __user *udata = (void __user *) data;
384
385 switch (cmd) {
386 case IOCTL_PRIVCMD_HYPERCALL:
387 ret = privcmd_ioctl_hypercall(udata);
388 break;
389
390 case IOCTL_PRIVCMD_MMAP:
391 ret = privcmd_ioctl_mmap(udata);
392 break;
393
394 case IOCTL_PRIVCMD_MMAPBATCH:
395 ret = privcmd_ioctl_mmap_batch(udata);
396 break;
397
398 default:
399 ret = -EINVAL;
400 break;
401 }
402
403 return ret;
404}
405
406#ifndef HAVE_ARCH_PRIVCMD_MMAP
407static int privcmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
408{
Jeremy Fitzhardinge441c7412009-03-06 09:56:59 -0800409 printk(KERN_DEBUG "privcmd_fault: vma=%p %lx-%lx, pgoff=%lx, uv=%p\n",
410 vma, vma->vm_start, vma->vm_end,
411 vmf->pgoff, vmf->virtual_address);
412
Jeremy Fitzhardinge1c5de192009-02-09 12:05:49 -0800413 return VM_FAULT_SIGBUS;
414}
415
416static struct vm_operations_struct privcmd_vm_ops = {
417 .fault = privcmd_fault
418};
419
420static int privcmd_mmap(struct file *file, struct vm_area_struct *vma)
421{
422 /* Unsupported for auto-translate guests. */
423 if (xen_feature(XENFEAT_auto_translated_physmap))
424 return -ENOSYS;
425
426 /* DONTCOPY is essential for Xen as copy_page_range is broken. */
427 vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY;
428 vma->vm_ops = &privcmd_vm_ops;
429 vma->vm_private_data = NULL;
430
431 return 0;
432}
433
434static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma)
435{
436 return (xchg(&vma->vm_private_data, (void *)1) == NULL);
437}
438#endif
439
440const struct file_operations privcmd_file_ops = {
441 .unlocked_ioctl = privcmd_ioctl,
442 .mmap = privcmd_mmap,
443};