blob: 5a959220410a9c57277f289c048a3f0727baa163 [file] [log] [blame]
Carsten Otte043405e2007-10-10 17:16:19 +02001/*
2 * Kernel-based Virtual Machine driver for Linux
3 *
4 * derived from drivers/kvm/kvm_main.c
5 *
6 * Copyright (C) 2006 Qumranet, Inc.
7 *
8 * Authors:
9 * Avi Kivity <avi@qumranet.com>
10 * Yaniv Kamay <yaniv@qumranet.com>
11 *
12 * This work is licensed under the terms of the GNU GPL, version 2. See
13 * the COPYING file in the top-level directory.
14 *
15 */
16
Carsten Otte313a3dc2007-10-11 19:16:52 +020017#include "kvm.h"
Carsten Otte043405e2007-10-10 17:16:19 +020018#include "x86.h"
Carsten Otte5fb76f92007-10-29 16:08:51 +010019#include "segment_descriptor.h"
Carsten Otte313a3dc2007-10-11 19:16:52 +020020#include "irq.h"
21
22#include <linux/kvm.h>
23#include <linux/fs.h>
24#include <linux/vmalloc.h>
Carsten Otte5fb76f92007-10-29 16:08:51 +010025#include <linux/module.h>
Carsten Otte043405e2007-10-10 17:16:19 +020026
27#include <asm/uaccess.h>
28
Carsten Otte313a3dc2007-10-11 19:16:52 +020029#define MAX_IO_MSRS 256
30
Carsten Otte5fb76f92007-10-29 16:08:51 +010031unsigned long segment_base(u16 selector)
32{
33 struct descriptor_table gdt;
34 struct segment_descriptor *d;
35 unsigned long table_base;
36 unsigned long v;
37
38 if (selector == 0)
39 return 0;
40
41 asm("sgdt %0" : "=m"(gdt));
42 table_base = gdt.base;
43
44 if (selector & 4) { /* from ldt */
45 u16 ldt_selector;
46
47 asm("sldt %0" : "=g"(ldt_selector));
48 table_base = segment_base(ldt_selector);
49 }
50 d = (struct segment_descriptor *)(table_base + (selector & ~7));
51 v = d->base_low | ((unsigned long)d->base_mid << 16) |
52 ((unsigned long)d->base_high << 24);
53#ifdef CONFIG_X86_64
54 if (d->system == 0 && (d->type == 2 || d->type == 9 || d->type == 11))
55 v |= ((unsigned long) \
56 ((struct segment_descriptor_64 *)d)->base_higher) << 32;
57#endif
58 return v;
59}
60EXPORT_SYMBOL_GPL(segment_base);
61
Carsten Otte043405e2007-10-10 17:16:19 +020062/*
63 * List of msr numbers which we expose to userspace through KVM_GET_MSRS
64 * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST.
65 *
66 * This list is modified at module load time to reflect the
67 * capabilities of the host cpu.
68 */
69static u32 msrs_to_save[] = {
70 MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
71 MSR_K6_STAR,
72#ifdef CONFIG_X86_64
73 MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
74#endif
75 MSR_IA32_TIME_STAMP_COUNTER,
76};
77
78static unsigned num_msrs_to_save;
79
80static u32 emulated_msrs[] = {
81 MSR_IA32_MISC_ENABLE,
82};
83
Carsten Otte313a3dc2007-10-11 19:16:52 +020084/*
85 * Adapt set_msr() to msr_io()'s calling convention
86 */
87static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
88{
89 return kvm_set_msr(vcpu, index, *data);
90}
91
92/*
93 * Read or write a bunch of msrs. All parameters are kernel addresses.
94 *
95 * @return number of msrs set successfully.
96 */
97static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs,
98 struct kvm_msr_entry *entries,
99 int (*do_msr)(struct kvm_vcpu *vcpu,
100 unsigned index, u64 *data))
101{
102 int i;
103
104 vcpu_load(vcpu);
105
106 for (i = 0; i < msrs->nmsrs; ++i)
107 if (do_msr(vcpu, entries[i].index, &entries[i].data))
108 break;
109
110 vcpu_put(vcpu);
111
112 return i;
113}
114
115/*
116 * Read or write a bunch of msrs. Parameters are user addresses.
117 *
118 * @return number of msrs set successfully.
119 */
120static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs,
121 int (*do_msr)(struct kvm_vcpu *vcpu,
122 unsigned index, u64 *data),
123 int writeback)
124{
125 struct kvm_msrs msrs;
126 struct kvm_msr_entry *entries;
127 int r, n;
128 unsigned size;
129
130 r = -EFAULT;
131 if (copy_from_user(&msrs, user_msrs, sizeof msrs))
132 goto out;
133
134 r = -E2BIG;
135 if (msrs.nmsrs >= MAX_IO_MSRS)
136 goto out;
137
138 r = -ENOMEM;
139 size = sizeof(struct kvm_msr_entry) * msrs.nmsrs;
140 entries = vmalloc(size);
141 if (!entries)
142 goto out;
143
144 r = -EFAULT;
145 if (copy_from_user(entries, user_msrs->entries, size))
146 goto out_free;
147
148 r = n = __msr_io(vcpu, &msrs, entries, do_msr);
149 if (r < 0)
150 goto out_free;
151
152 r = -EFAULT;
153 if (writeback && copy_to_user(user_msrs->entries, entries, size))
154 goto out_free;
155
156 r = n;
157
158out_free:
159 vfree(entries);
160out:
161 return r;
162}
163
Carsten Otte043405e2007-10-10 17:16:19 +0200164long kvm_arch_dev_ioctl(struct file *filp,
165 unsigned int ioctl, unsigned long arg)
166{
167 void __user *argp = (void __user *)arg;
168 long r;
169
170 switch (ioctl) {
171 case KVM_GET_MSR_INDEX_LIST: {
172 struct kvm_msr_list __user *user_msr_list = argp;
173 struct kvm_msr_list msr_list;
174 unsigned n;
175
176 r = -EFAULT;
177 if (copy_from_user(&msr_list, user_msr_list, sizeof msr_list))
178 goto out;
179 n = msr_list.nmsrs;
180 msr_list.nmsrs = num_msrs_to_save + ARRAY_SIZE(emulated_msrs);
181 if (copy_to_user(user_msr_list, &msr_list, sizeof msr_list))
182 goto out;
183 r = -E2BIG;
184 if (n < num_msrs_to_save)
185 goto out;
186 r = -EFAULT;
187 if (copy_to_user(user_msr_list->indices, &msrs_to_save,
188 num_msrs_to_save * sizeof(u32)))
189 goto out;
190 if (copy_to_user(user_msr_list->indices
191 + num_msrs_to_save * sizeof(u32),
192 &emulated_msrs,
193 ARRAY_SIZE(emulated_msrs) * sizeof(u32)))
194 goto out;
195 r = 0;
196 break;
197 }
198 default:
199 r = -EINVAL;
200 }
201out:
202 return r;
203}
204
Carsten Otte313a3dc2007-10-11 19:16:52 +0200205void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
206{
207 kvm_x86_ops->vcpu_load(vcpu, cpu);
208}
209
210void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
211{
212 kvm_x86_ops->vcpu_put(vcpu);
213}
214
215static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu)
216{
217 u64 efer;
218 int i;
219 struct kvm_cpuid_entry *e, *entry;
220
221 rdmsrl(MSR_EFER, efer);
222 entry = NULL;
223 for (i = 0; i < vcpu->cpuid_nent; ++i) {
224 e = &vcpu->cpuid_entries[i];
225 if (e->function == 0x80000001) {
226 entry = e;
227 break;
228 }
229 }
230 if (entry && (entry->edx & (1 << 20)) && !(efer & EFER_NX)) {
231 entry->edx &= ~(1 << 20);
232 printk(KERN_INFO "kvm: guest NX capability removed\n");
233 }
234}
235
236static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
237 struct kvm_cpuid *cpuid,
238 struct kvm_cpuid_entry __user *entries)
239{
240 int r;
241
242 r = -E2BIG;
243 if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
244 goto out;
245 r = -EFAULT;
246 if (copy_from_user(&vcpu->cpuid_entries, entries,
247 cpuid->nent * sizeof(struct kvm_cpuid_entry)))
248 goto out;
249 vcpu->cpuid_nent = cpuid->nent;
250 cpuid_fix_nx_cap(vcpu);
251 return 0;
252
253out:
254 return r;
255}
256
257static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
258 struct kvm_lapic_state *s)
259{
260 vcpu_load(vcpu);
261 memcpy(s->regs, vcpu->apic->regs, sizeof *s);
262 vcpu_put(vcpu);
263
264 return 0;
265}
266
267static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu,
268 struct kvm_lapic_state *s)
269{
270 vcpu_load(vcpu);
271 memcpy(vcpu->apic->regs, s->regs, sizeof *s);
272 kvm_apic_post_state_restore(vcpu);
273 vcpu_put(vcpu);
274
275 return 0;
276}
277
278long kvm_arch_vcpu_ioctl(struct file *filp,
279 unsigned int ioctl, unsigned long arg)
280{
281 struct kvm_vcpu *vcpu = filp->private_data;
282 void __user *argp = (void __user *)arg;
283 int r;
284
285 switch (ioctl) {
286 case KVM_GET_LAPIC: {
287 struct kvm_lapic_state lapic;
288
289 memset(&lapic, 0, sizeof lapic);
290 r = kvm_vcpu_ioctl_get_lapic(vcpu, &lapic);
291 if (r)
292 goto out;
293 r = -EFAULT;
294 if (copy_to_user(argp, &lapic, sizeof lapic))
295 goto out;
296 r = 0;
297 break;
298 }
299 case KVM_SET_LAPIC: {
300 struct kvm_lapic_state lapic;
301
302 r = -EFAULT;
303 if (copy_from_user(&lapic, argp, sizeof lapic))
304 goto out;
305 r = kvm_vcpu_ioctl_set_lapic(vcpu, &lapic);;
306 if (r)
307 goto out;
308 r = 0;
309 break;
310 }
311 case KVM_SET_CPUID: {
312 struct kvm_cpuid __user *cpuid_arg = argp;
313 struct kvm_cpuid cpuid;
314
315 r = -EFAULT;
316 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
317 goto out;
318 r = kvm_vcpu_ioctl_set_cpuid(vcpu, &cpuid, cpuid_arg->entries);
319 if (r)
320 goto out;
321 break;
322 }
323 case KVM_GET_MSRS:
324 r = msr_io(vcpu, argp, kvm_get_msr, 1);
325 break;
326 case KVM_SET_MSRS:
327 r = msr_io(vcpu, argp, do_set_msr, 0);
328 break;
329 default:
330 r = -EINVAL;
331 }
332out:
333 return r;
334}
335
Carsten Otte1fe779f2007-10-29 16:08:35 +0100336static int kvm_vm_ioctl_set_tss_addr(struct kvm *kvm, unsigned long addr)
337{
338 int ret;
339
340 if (addr > (unsigned int)(-3 * PAGE_SIZE))
341 return -1;
342 ret = kvm_x86_ops->set_tss_addr(kvm, addr);
343 return ret;
344}
345
346static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
347 u32 kvm_nr_mmu_pages)
348{
349 if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES)
350 return -EINVAL;
351
352 mutex_lock(&kvm->lock);
353
354 kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages);
355 kvm->n_requested_mmu_pages = kvm_nr_mmu_pages;
356
357 mutex_unlock(&kvm->lock);
358 return 0;
359}
360
361static int kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm)
362{
363 return kvm->n_alloc_mmu_pages;
364}
365
366/*
367 * Set a new alias region. Aliases map a portion of physical memory into
368 * another portion. This is useful for memory windows, for example the PC
369 * VGA region.
370 */
371static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm,
372 struct kvm_memory_alias *alias)
373{
374 int r, n;
375 struct kvm_mem_alias *p;
376
377 r = -EINVAL;
378 /* General sanity checks */
379 if (alias->memory_size & (PAGE_SIZE - 1))
380 goto out;
381 if (alias->guest_phys_addr & (PAGE_SIZE - 1))
382 goto out;
383 if (alias->slot >= KVM_ALIAS_SLOTS)
384 goto out;
385 if (alias->guest_phys_addr + alias->memory_size
386 < alias->guest_phys_addr)
387 goto out;
388 if (alias->target_phys_addr + alias->memory_size
389 < alias->target_phys_addr)
390 goto out;
391
392 mutex_lock(&kvm->lock);
393
394 p = &kvm->aliases[alias->slot];
395 p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT;
396 p->npages = alias->memory_size >> PAGE_SHIFT;
397 p->target_gfn = alias->target_phys_addr >> PAGE_SHIFT;
398
399 for (n = KVM_ALIAS_SLOTS; n > 0; --n)
400 if (kvm->aliases[n - 1].npages)
401 break;
402 kvm->naliases = n;
403
404 kvm_mmu_zap_all(kvm);
405
406 mutex_unlock(&kvm->lock);
407
408 return 0;
409
410out:
411 return r;
412}
413
414static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
415{
416 int r;
417
418 r = 0;
419 switch (chip->chip_id) {
420 case KVM_IRQCHIP_PIC_MASTER:
421 memcpy(&chip->chip.pic,
422 &pic_irqchip(kvm)->pics[0],
423 sizeof(struct kvm_pic_state));
424 break;
425 case KVM_IRQCHIP_PIC_SLAVE:
426 memcpy(&chip->chip.pic,
427 &pic_irqchip(kvm)->pics[1],
428 sizeof(struct kvm_pic_state));
429 break;
430 case KVM_IRQCHIP_IOAPIC:
431 memcpy(&chip->chip.ioapic,
432 ioapic_irqchip(kvm),
433 sizeof(struct kvm_ioapic_state));
434 break;
435 default:
436 r = -EINVAL;
437 break;
438 }
439 return r;
440}
441
442static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
443{
444 int r;
445
446 r = 0;
447 switch (chip->chip_id) {
448 case KVM_IRQCHIP_PIC_MASTER:
449 memcpy(&pic_irqchip(kvm)->pics[0],
450 &chip->chip.pic,
451 sizeof(struct kvm_pic_state));
452 break;
453 case KVM_IRQCHIP_PIC_SLAVE:
454 memcpy(&pic_irqchip(kvm)->pics[1],
455 &chip->chip.pic,
456 sizeof(struct kvm_pic_state));
457 break;
458 case KVM_IRQCHIP_IOAPIC:
459 memcpy(ioapic_irqchip(kvm),
460 &chip->chip.ioapic,
461 sizeof(struct kvm_ioapic_state));
462 break;
463 default:
464 r = -EINVAL;
465 break;
466 }
467 kvm_pic_update_irq(pic_irqchip(kvm));
468 return r;
469}
470
471long kvm_arch_vm_ioctl(struct file *filp,
472 unsigned int ioctl, unsigned long arg)
473{
474 struct kvm *kvm = filp->private_data;
475 void __user *argp = (void __user *)arg;
476 int r = -EINVAL;
477
478 switch (ioctl) {
479 case KVM_SET_TSS_ADDR:
480 r = kvm_vm_ioctl_set_tss_addr(kvm, arg);
481 if (r < 0)
482 goto out;
483 break;
484 case KVM_SET_MEMORY_REGION: {
485 struct kvm_memory_region kvm_mem;
486 struct kvm_userspace_memory_region kvm_userspace_mem;
487
488 r = -EFAULT;
489 if (copy_from_user(&kvm_mem, argp, sizeof kvm_mem))
490 goto out;
491 kvm_userspace_mem.slot = kvm_mem.slot;
492 kvm_userspace_mem.flags = kvm_mem.flags;
493 kvm_userspace_mem.guest_phys_addr = kvm_mem.guest_phys_addr;
494 kvm_userspace_mem.memory_size = kvm_mem.memory_size;
495 r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem, 0);
496 if (r)
497 goto out;
498 break;
499 }
500 case KVM_SET_NR_MMU_PAGES:
501 r = kvm_vm_ioctl_set_nr_mmu_pages(kvm, arg);
502 if (r)
503 goto out;
504 break;
505 case KVM_GET_NR_MMU_PAGES:
506 r = kvm_vm_ioctl_get_nr_mmu_pages(kvm);
507 break;
508 case KVM_SET_MEMORY_ALIAS: {
509 struct kvm_memory_alias alias;
510
511 r = -EFAULT;
512 if (copy_from_user(&alias, argp, sizeof alias))
513 goto out;
514 r = kvm_vm_ioctl_set_memory_alias(kvm, &alias);
515 if (r)
516 goto out;
517 break;
518 }
519 case KVM_CREATE_IRQCHIP:
520 r = -ENOMEM;
521 kvm->vpic = kvm_create_pic(kvm);
522 if (kvm->vpic) {
523 r = kvm_ioapic_init(kvm);
524 if (r) {
525 kfree(kvm->vpic);
526 kvm->vpic = NULL;
527 goto out;
528 }
529 } else
530 goto out;
531 break;
532 case KVM_IRQ_LINE: {
533 struct kvm_irq_level irq_event;
534
535 r = -EFAULT;
536 if (copy_from_user(&irq_event, argp, sizeof irq_event))
537 goto out;
538 if (irqchip_in_kernel(kvm)) {
539 mutex_lock(&kvm->lock);
540 if (irq_event.irq < 16)
541 kvm_pic_set_irq(pic_irqchip(kvm),
542 irq_event.irq,
543 irq_event.level);
544 kvm_ioapic_set_irq(kvm->vioapic,
545 irq_event.irq,
546 irq_event.level);
547 mutex_unlock(&kvm->lock);
548 r = 0;
549 }
550 break;
551 }
552 case KVM_GET_IRQCHIP: {
553 /* 0: PIC master, 1: PIC slave, 2: IOAPIC */
554 struct kvm_irqchip chip;
555
556 r = -EFAULT;
557 if (copy_from_user(&chip, argp, sizeof chip))
558 goto out;
559 r = -ENXIO;
560 if (!irqchip_in_kernel(kvm))
561 goto out;
562 r = kvm_vm_ioctl_get_irqchip(kvm, &chip);
563 if (r)
564 goto out;
565 r = -EFAULT;
566 if (copy_to_user(argp, &chip, sizeof chip))
567 goto out;
568 r = 0;
569 break;
570 }
571 case KVM_SET_IRQCHIP: {
572 /* 0: PIC master, 1: PIC slave, 2: IOAPIC */
573 struct kvm_irqchip chip;
574
575 r = -EFAULT;
576 if (copy_from_user(&chip, argp, sizeof chip))
577 goto out;
578 r = -ENXIO;
579 if (!irqchip_in_kernel(kvm))
580 goto out;
581 r = kvm_vm_ioctl_set_irqchip(kvm, &chip);
582 if (r)
583 goto out;
584 r = 0;
585 break;
586 }
587 default:
588 ;
589 }
590out:
591 return r;
592}
593
Carsten Otte043405e2007-10-10 17:16:19 +0200594static __init void kvm_init_msr_list(void)
595{
596 u32 dummy[2];
597 unsigned i, j;
598
599 for (i = j = 0; i < ARRAY_SIZE(msrs_to_save); i++) {
600 if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0)
601 continue;
602 if (j < i)
603 msrs_to_save[j] = msrs_to_save[i];
604 j++;
605 }
606 num_msrs_to_save = j;
607}
608
609__init void kvm_arch_init(void)
610{
611 kvm_init_msr_list();
612}