blob: a728af8a83e57a74888747e379deb006d4c30997 [file] [log] [blame]
Carsten Otte043405e2007-10-10 17:16:19 +02001/*
2 * Kernel-based Virtual Machine driver for Linux
3 *
4 * derived from drivers/kvm/kvm_main.c
5 *
6 * Copyright (C) 2006 Qumranet, Inc.
7 *
8 * Authors:
9 * Avi Kivity <avi@qumranet.com>
10 * Yaniv Kamay <yaniv@qumranet.com>
11 *
12 * This work is licensed under the terms of the GNU GPL, version 2. See
13 * the COPYING file in the top-level directory.
14 *
15 */
16
Carsten Otte313a3dc2007-10-11 19:16:52 +020017#include "kvm.h"
Carsten Otte043405e2007-10-10 17:16:19 +020018#include "x86.h"
Carsten Otte5fb76f92007-10-29 16:08:51 +010019#include "segment_descriptor.h"
Carsten Otte313a3dc2007-10-11 19:16:52 +020020#include "irq.h"
21
22#include <linux/kvm.h>
23#include <linux/fs.h>
24#include <linux/vmalloc.h>
Carsten Otte5fb76f92007-10-29 16:08:51 +010025#include <linux/module.h>
Carsten Otte043405e2007-10-10 17:16:19 +020026
27#include <asm/uaccess.h>
28
Carsten Otte313a3dc2007-10-11 19:16:52 +020029#define MAX_IO_MSRS 256
Carsten Ottea03490e2007-10-29 16:09:35 +010030#define CR0_RESERVED_BITS \
31 (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
32 | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \
33 | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG))
34#define CR4_RESERVED_BITS \
35 (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\
36 | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \
37 | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR \
38 | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE))
39
40#define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
Carsten Otte313a3dc2007-10-11 19:16:52 +020041
Carsten Otte5fb76f92007-10-29 16:08:51 +010042unsigned long segment_base(u16 selector)
43{
44 struct descriptor_table gdt;
45 struct segment_descriptor *d;
46 unsigned long table_base;
47 unsigned long v;
48
49 if (selector == 0)
50 return 0;
51
52 asm("sgdt %0" : "=m"(gdt));
53 table_base = gdt.base;
54
55 if (selector & 4) { /* from ldt */
56 u16 ldt_selector;
57
58 asm("sldt %0" : "=g"(ldt_selector));
59 table_base = segment_base(ldt_selector);
60 }
61 d = (struct segment_descriptor *)(table_base + (selector & ~7));
62 v = d->base_low | ((unsigned long)d->base_mid << 16) |
63 ((unsigned long)d->base_high << 24);
64#ifdef CONFIG_X86_64
65 if (d->system == 0 && (d->type == 2 || d->type == 9 || d->type == 11))
66 v |= ((unsigned long) \
67 ((struct segment_descriptor_64 *)d)->base_higher) << 32;
68#endif
69 return v;
70}
71EXPORT_SYMBOL_GPL(segment_base);
72
Carsten Otte6866b832007-10-29 16:09:10 +010073u64 kvm_get_apic_base(struct kvm_vcpu *vcpu)
74{
75 if (irqchip_in_kernel(vcpu->kvm))
76 return vcpu->apic_base;
77 else
78 return vcpu->apic_base;
79}
80EXPORT_SYMBOL_GPL(kvm_get_apic_base);
81
82void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data)
83{
84 /* TODO: reserve bits check */
85 if (irqchip_in_kernel(vcpu->kvm))
86 kvm_lapic_set_base(vcpu, data);
87 else
88 vcpu->apic_base = data;
89}
90EXPORT_SYMBOL_GPL(kvm_set_apic_base);
91
Carsten Ottea03490e2007-10-29 16:09:35 +010092static void inject_gp(struct kvm_vcpu *vcpu)
93{
94 kvm_x86_ops->inject_gp(vcpu, 0);
95}
96
97/*
98 * Load the pae pdptrs. Return true is they are all valid.
99 */
100int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3)
101{
102 gfn_t pdpt_gfn = cr3 >> PAGE_SHIFT;
103 unsigned offset = ((cr3 & (PAGE_SIZE-1)) >> 5) << 2;
104 int i;
105 int ret;
106 u64 pdpte[ARRAY_SIZE(vcpu->pdptrs)];
107
108 mutex_lock(&vcpu->kvm->lock);
109 ret = kvm_read_guest_page(vcpu->kvm, pdpt_gfn, pdpte,
110 offset * sizeof(u64), sizeof(pdpte));
111 if (ret < 0) {
112 ret = 0;
113 goto out;
114 }
115 for (i = 0; i < ARRAY_SIZE(pdpte); ++i) {
116 if ((pdpte[i] & 1) && (pdpte[i] & 0xfffffff0000001e6ull)) {
117 ret = 0;
118 goto out;
119 }
120 }
121 ret = 1;
122
123 memcpy(vcpu->pdptrs, pdpte, sizeof(vcpu->pdptrs));
124out:
125 mutex_unlock(&vcpu->kvm->lock);
126
127 return ret;
128}
129
130void set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
131{
132 if (cr0 & CR0_RESERVED_BITS) {
133 printk(KERN_DEBUG "set_cr0: 0x%lx #GP, reserved bits 0x%lx\n",
134 cr0, vcpu->cr0);
135 inject_gp(vcpu);
136 return;
137 }
138
139 if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD)) {
140 printk(KERN_DEBUG "set_cr0: #GP, CD == 0 && NW == 1\n");
141 inject_gp(vcpu);
142 return;
143 }
144
145 if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE)) {
146 printk(KERN_DEBUG "set_cr0: #GP, set PG flag "
147 "and a clear PE flag\n");
148 inject_gp(vcpu);
149 return;
150 }
151
152 if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
153#ifdef CONFIG_X86_64
154 if ((vcpu->shadow_efer & EFER_LME)) {
155 int cs_db, cs_l;
156
157 if (!is_pae(vcpu)) {
158 printk(KERN_DEBUG "set_cr0: #GP, start paging "
159 "in long mode while PAE is disabled\n");
160 inject_gp(vcpu);
161 return;
162 }
163 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
164 if (cs_l) {
165 printk(KERN_DEBUG "set_cr0: #GP, start paging "
166 "in long mode while CS.L == 1\n");
167 inject_gp(vcpu);
168 return;
169
170 }
171 } else
172#endif
173 if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->cr3)) {
174 printk(KERN_DEBUG "set_cr0: #GP, pdptrs "
175 "reserved bits\n");
176 inject_gp(vcpu);
177 return;
178 }
179
180 }
181
182 kvm_x86_ops->set_cr0(vcpu, cr0);
183 vcpu->cr0 = cr0;
184
185 mutex_lock(&vcpu->kvm->lock);
186 kvm_mmu_reset_context(vcpu);
187 mutex_unlock(&vcpu->kvm->lock);
188 return;
189}
190EXPORT_SYMBOL_GPL(set_cr0);
191
192void lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
193{
194 set_cr0(vcpu, (vcpu->cr0 & ~0x0ful) | (msw & 0x0f));
195}
196EXPORT_SYMBOL_GPL(lmsw);
197
198void set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
199{
200 if (cr4 & CR4_RESERVED_BITS) {
201 printk(KERN_DEBUG "set_cr4: #GP, reserved bits\n");
202 inject_gp(vcpu);
203 return;
204 }
205
206 if (is_long_mode(vcpu)) {
207 if (!(cr4 & X86_CR4_PAE)) {
208 printk(KERN_DEBUG "set_cr4: #GP, clearing PAE while "
209 "in long mode\n");
210 inject_gp(vcpu);
211 return;
212 }
213 } else if (is_paging(vcpu) && !is_pae(vcpu) && (cr4 & X86_CR4_PAE)
214 && !load_pdptrs(vcpu, vcpu->cr3)) {
215 printk(KERN_DEBUG "set_cr4: #GP, pdptrs reserved bits\n");
216 inject_gp(vcpu);
217 return;
218 }
219
220 if (cr4 & X86_CR4_VMXE) {
221 printk(KERN_DEBUG "set_cr4: #GP, setting VMXE\n");
222 inject_gp(vcpu);
223 return;
224 }
225 kvm_x86_ops->set_cr4(vcpu, cr4);
226 vcpu->cr4 = cr4;
227 mutex_lock(&vcpu->kvm->lock);
228 kvm_mmu_reset_context(vcpu);
229 mutex_unlock(&vcpu->kvm->lock);
230}
231EXPORT_SYMBOL_GPL(set_cr4);
232
233void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
234{
235 if (is_long_mode(vcpu)) {
236 if (cr3 & CR3_L_MODE_RESERVED_BITS) {
237 printk(KERN_DEBUG "set_cr3: #GP, reserved bits\n");
238 inject_gp(vcpu);
239 return;
240 }
241 } else {
242 if (is_pae(vcpu)) {
243 if (cr3 & CR3_PAE_RESERVED_BITS) {
244 printk(KERN_DEBUG
245 "set_cr3: #GP, reserved bits\n");
246 inject_gp(vcpu);
247 return;
248 }
249 if (is_paging(vcpu) && !load_pdptrs(vcpu, cr3)) {
250 printk(KERN_DEBUG "set_cr3: #GP, pdptrs "
251 "reserved bits\n");
252 inject_gp(vcpu);
253 return;
254 }
255 }
256 /*
257 * We don't check reserved bits in nonpae mode, because
258 * this isn't enforced, and VMware depends on this.
259 */
260 }
261
262 mutex_lock(&vcpu->kvm->lock);
263 /*
264 * Does the new cr3 value map to physical memory? (Note, we
265 * catch an invalid cr3 even in real-mode, because it would
266 * cause trouble later on when we turn on paging anyway.)
267 *
268 * A real CPU would silently accept an invalid cr3 and would
269 * attempt to use it - with largely undefined (and often hard
270 * to debug) behavior on the guest side.
271 */
272 if (unlikely(!gfn_to_memslot(vcpu->kvm, cr3 >> PAGE_SHIFT)))
273 inject_gp(vcpu);
274 else {
275 vcpu->cr3 = cr3;
276 vcpu->mmu.new_cr3(vcpu);
277 }
278 mutex_unlock(&vcpu->kvm->lock);
279}
280EXPORT_SYMBOL_GPL(set_cr3);
281
282void set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
283{
284 if (cr8 & CR8_RESERVED_BITS) {
285 printk(KERN_DEBUG "set_cr8: #GP, reserved bits 0x%lx\n", cr8);
286 inject_gp(vcpu);
287 return;
288 }
289 if (irqchip_in_kernel(vcpu->kvm))
290 kvm_lapic_set_tpr(vcpu, cr8);
291 else
292 vcpu->cr8 = cr8;
293}
294EXPORT_SYMBOL_GPL(set_cr8);
295
296unsigned long get_cr8(struct kvm_vcpu *vcpu)
297{
298 if (irqchip_in_kernel(vcpu->kvm))
299 return kvm_lapic_get_cr8(vcpu);
300 else
301 return vcpu->cr8;
302}
303EXPORT_SYMBOL_GPL(get_cr8);
304
Carsten Otte043405e2007-10-10 17:16:19 +0200305/*
306 * List of msr numbers which we expose to userspace through KVM_GET_MSRS
307 * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST.
308 *
309 * This list is modified at module load time to reflect the
310 * capabilities of the host cpu.
311 */
312static u32 msrs_to_save[] = {
313 MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
314 MSR_K6_STAR,
315#ifdef CONFIG_X86_64
316 MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
317#endif
318 MSR_IA32_TIME_STAMP_COUNTER,
319};
320
321static unsigned num_msrs_to_save;
322
323static u32 emulated_msrs[] = {
324 MSR_IA32_MISC_ENABLE,
325};
326
Carsten Otte313a3dc2007-10-11 19:16:52 +0200327/*
328 * Adapt set_msr() to msr_io()'s calling convention
329 */
330static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
331{
332 return kvm_set_msr(vcpu, index, *data);
333}
334
335/*
336 * Read or write a bunch of msrs. All parameters are kernel addresses.
337 *
338 * @return number of msrs set successfully.
339 */
340static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs,
341 struct kvm_msr_entry *entries,
342 int (*do_msr)(struct kvm_vcpu *vcpu,
343 unsigned index, u64 *data))
344{
345 int i;
346
347 vcpu_load(vcpu);
348
349 for (i = 0; i < msrs->nmsrs; ++i)
350 if (do_msr(vcpu, entries[i].index, &entries[i].data))
351 break;
352
353 vcpu_put(vcpu);
354
355 return i;
356}
357
358/*
359 * Read or write a bunch of msrs. Parameters are user addresses.
360 *
361 * @return number of msrs set successfully.
362 */
363static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs,
364 int (*do_msr)(struct kvm_vcpu *vcpu,
365 unsigned index, u64 *data),
366 int writeback)
367{
368 struct kvm_msrs msrs;
369 struct kvm_msr_entry *entries;
370 int r, n;
371 unsigned size;
372
373 r = -EFAULT;
374 if (copy_from_user(&msrs, user_msrs, sizeof msrs))
375 goto out;
376
377 r = -E2BIG;
378 if (msrs.nmsrs >= MAX_IO_MSRS)
379 goto out;
380
381 r = -ENOMEM;
382 size = sizeof(struct kvm_msr_entry) * msrs.nmsrs;
383 entries = vmalloc(size);
384 if (!entries)
385 goto out;
386
387 r = -EFAULT;
388 if (copy_from_user(entries, user_msrs->entries, size))
389 goto out_free;
390
391 r = n = __msr_io(vcpu, &msrs, entries, do_msr);
392 if (r < 0)
393 goto out_free;
394
395 r = -EFAULT;
396 if (writeback && copy_to_user(user_msrs->entries, entries, size))
397 goto out_free;
398
399 r = n;
400
401out_free:
402 vfree(entries);
403out:
404 return r;
405}
406
Carsten Otte043405e2007-10-10 17:16:19 +0200407long kvm_arch_dev_ioctl(struct file *filp,
408 unsigned int ioctl, unsigned long arg)
409{
410 void __user *argp = (void __user *)arg;
411 long r;
412
413 switch (ioctl) {
414 case KVM_GET_MSR_INDEX_LIST: {
415 struct kvm_msr_list __user *user_msr_list = argp;
416 struct kvm_msr_list msr_list;
417 unsigned n;
418
419 r = -EFAULT;
420 if (copy_from_user(&msr_list, user_msr_list, sizeof msr_list))
421 goto out;
422 n = msr_list.nmsrs;
423 msr_list.nmsrs = num_msrs_to_save + ARRAY_SIZE(emulated_msrs);
424 if (copy_to_user(user_msr_list, &msr_list, sizeof msr_list))
425 goto out;
426 r = -E2BIG;
427 if (n < num_msrs_to_save)
428 goto out;
429 r = -EFAULT;
430 if (copy_to_user(user_msr_list->indices, &msrs_to_save,
431 num_msrs_to_save * sizeof(u32)))
432 goto out;
433 if (copy_to_user(user_msr_list->indices
434 + num_msrs_to_save * sizeof(u32),
435 &emulated_msrs,
436 ARRAY_SIZE(emulated_msrs) * sizeof(u32)))
437 goto out;
438 r = 0;
439 break;
440 }
441 default:
442 r = -EINVAL;
443 }
444out:
445 return r;
446}
447
Carsten Otte313a3dc2007-10-11 19:16:52 +0200448void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
449{
450 kvm_x86_ops->vcpu_load(vcpu, cpu);
451}
452
453void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
454{
455 kvm_x86_ops->vcpu_put(vcpu);
456}
457
458static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu)
459{
460 u64 efer;
461 int i;
462 struct kvm_cpuid_entry *e, *entry;
463
464 rdmsrl(MSR_EFER, efer);
465 entry = NULL;
466 for (i = 0; i < vcpu->cpuid_nent; ++i) {
467 e = &vcpu->cpuid_entries[i];
468 if (e->function == 0x80000001) {
469 entry = e;
470 break;
471 }
472 }
473 if (entry && (entry->edx & (1 << 20)) && !(efer & EFER_NX)) {
474 entry->edx &= ~(1 << 20);
475 printk(KERN_INFO "kvm: guest NX capability removed\n");
476 }
477}
478
479static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
480 struct kvm_cpuid *cpuid,
481 struct kvm_cpuid_entry __user *entries)
482{
483 int r;
484
485 r = -E2BIG;
486 if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
487 goto out;
488 r = -EFAULT;
489 if (copy_from_user(&vcpu->cpuid_entries, entries,
490 cpuid->nent * sizeof(struct kvm_cpuid_entry)))
491 goto out;
492 vcpu->cpuid_nent = cpuid->nent;
493 cpuid_fix_nx_cap(vcpu);
494 return 0;
495
496out:
497 return r;
498}
499
500static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
501 struct kvm_lapic_state *s)
502{
503 vcpu_load(vcpu);
504 memcpy(s->regs, vcpu->apic->regs, sizeof *s);
505 vcpu_put(vcpu);
506
507 return 0;
508}
509
510static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu,
511 struct kvm_lapic_state *s)
512{
513 vcpu_load(vcpu);
514 memcpy(vcpu->apic->regs, s->regs, sizeof *s);
515 kvm_apic_post_state_restore(vcpu);
516 vcpu_put(vcpu);
517
518 return 0;
519}
520
521long kvm_arch_vcpu_ioctl(struct file *filp,
522 unsigned int ioctl, unsigned long arg)
523{
524 struct kvm_vcpu *vcpu = filp->private_data;
525 void __user *argp = (void __user *)arg;
526 int r;
527
528 switch (ioctl) {
529 case KVM_GET_LAPIC: {
530 struct kvm_lapic_state lapic;
531
532 memset(&lapic, 0, sizeof lapic);
533 r = kvm_vcpu_ioctl_get_lapic(vcpu, &lapic);
534 if (r)
535 goto out;
536 r = -EFAULT;
537 if (copy_to_user(argp, &lapic, sizeof lapic))
538 goto out;
539 r = 0;
540 break;
541 }
542 case KVM_SET_LAPIC: {
543 struct kvm_lapic_state lapic;
544
545 r = -EFAULT;
546 if (copy_from_user(&lapic, argp, sizeof lapic))
547 goto out;
548 r = kvm_vcpu_ioctl_set_lapic(vcpu, &lapic);;
549 if (r)
550 goto out;
551 r = 0;
552 break;
553 }
554 case KVM_SET_CPUID: {
555 struct kvm_cpuid __user *cpuid_arg = argp;
556 struct kvm_cpuid cpuid;
557
558 r = -EFAULT;
559 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
560 goto out;
561 r = kvm_vcpu_ioctl_set_cpuid(vcpu, &cpuid, cpuid_arg->entries);
562 if (r)
563 goto out;
564 break;
565 }
566 case KVM_GET_MSRS:
567 r = msr_io(vcpu, argp, kvm_get_msr, 1);
568 break;
569 case KVM_SET_MSRS:
570 r = msr_io(vcpu, argp, do_set_msr, 0);
571 break;
572 default:
573 r = -EINVAL;
574 }
575out:
576 return r;
577}
578
Carsten Otte1fe779f2007-10-29 16:08:35 +0100579static int kvm_vm_ioctl_set_tss_addr(struct kvm *kvm, unsigned long addr)
580{
581 int ret;
582
583 if (addr > (unsigned int)(-3 * PAGE_SIZE))
584 return -1;
585 ret = kvm_x86_ops->set_tss_addr(kvm, addr);
586 return ret;
587}
588
589static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
590 u32 kvm_nr_mmu_pages)
591{
592 if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES)
593 return -EINVAL;
594
595 mutex_lock(&kvm->lock);
596
597 kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages);
598 kvm->n_requested_mmu_pages = kvm_nr_mmu_pages;
599
600 mutex_unlock(&kvm->lock);
601 return 0;
602}
603
604static int kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm)
605{
606 return kvm->n_alloc_mmu_pages;
607}
608
609/*
610 * Set a new alias region. Aliases map a portion of physical memory into
611 * another portion. This is useful for memory windows, for example the PC
612 * VGA region.
613 */
614static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm,
615 struct kvm_memory_alias *alias)
616{
617 int r, n;
618 struct kvm_mem_alias *p;
619
620 r = -EINVAL;
621 /* General sanity checks */
622 if (alias->memory_size & (PAGE_SIZE - 1))
623 goto out;
624 if (alias->guest_phys_addr & (PAGE_SIZE - 1))
625 goto out;
626 if (alias->slot >= KVM_ALIAS_SLOTS)
627 goto out;
628 if (alias->guest_phys_addr + alias->memory_size
629 < alias->guest_phys_addr)
630 goto out;
631 if (alias->target_phys_addr + alias->memory_size
632 < alias->target_phys_addr)
633 goto out;
634
635 mutex_lock(&kvm->lock);
636
637 p = &kvm->aliases[alias->slot];
638 p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT;
639 p->npages = alias->memory_size >> PAGE_SHIFT;
640 p->target_gfn = alias->target_phys_addr >> PAGE_SHIFT;
641
642 for (n = KVM_ALIAS_SLOTS; n > 0; --n)
643 if (kvm->aliases[n - 1].npages)
644 break;
645 kvm->naliases = n;
646
647 kvm_mmu_zap_all(kvm);
648
649 mutex_unlock(&kvm->lock);
650
651 return 0;
652
653out:
654 return r;
655}
656
657static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
658{
659 int r;
660
661 r = 0;
662 switch (chip->chip_id) {
663 case KVM_IRQCHIP_PIC_MASTER:
664 memcpy(&chip->chip.pic,
665 &pic_irqchip(kvm)->pics[0],
666 sizeof(struct kvm_pic_state));
667 break;
668 case KVM_IRQCHIP_PIC_SLAVE:
669 memcpy(&chip->chip.pic,
670 &pic_irqchip(kvm)->pics[1],
671 sizeof(struct kvm_pic_state));
672 break;
673 case KVM_IRQCHIP_IOAPIC:
674 memcpy(&chip->chip.ioapic,
675 ioapic_irqchip(kvm),
676 sizeof(struct kvm_ioapic_state));
677 break;
678 default:
679 r = -EINVAL;
680 break;
681 }
682 return r;
683}
684
685static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
686{
687 int r;
688
689 r = 0;
690 switch (chip->chip_id) {
691 case KVM_IRQCHIP_PIC_MASTER:
692 memcpy(&pic_irqchip(kvm)->pics[0],
693 &chip->chip.pic,
694 sizeof(struct kvm_pic_state));
695 break;
696 case KVM_IRQCHIP_PIC_SLAVE:
697 memcpy(&pic_irqchip(kvm)->pics[1],
698 &chip->chip.pic,
699 sizeof(struct kvm_pic_state));
700 break;
701 case KVM_IRQCHIP_IOAPIC:
702 memcpy(ioapic_irqchip(kvm),
703 &chip->chip.ioapic,
704 sizeof(struct kvm_ioapic_state));
705 break;
706 default:
707 r = -EINVAL;
708 break;
709 }
710 kvm_pic_update_irq(pic_irqchip(kvm));
711 return r;
712}
713
714long kvm_arch_vm_ioctl(struct file *filp,
715 unsigned int ioctl, unsigned long arg)
716{
717 struct kvm *kvm = filp->private_data;
718 void __user *argp = (void __user *)arg;
719 int r = -EINVAL;
720
721 switch (ioctl) {
722 case KVM_SET_TSS_ADDR:
723 r = kvm_vm_ioctl_set_tss_addr(kvm, arg);
724 if (r < 0)
725 goto out;
726 break;
727 case KVM_SET_MEMORY_REGION: {
728 struct kvm_memory_region kvm_mem;
729 struct kvm_userspace_memory_region kvm_userspace_mem;
730
731 r = -EFAULT;
732 if (copy_from_user(&kvm_mem, argp, sizeof kvm_mem))
733 goto out;
734 kvm_userspace_mem.slot = kvm_mem.slot;
735 kvm_userspace_mem.flags = kvm_mem.flags;
736 kvm_userspace_mem.guest_phys_addr = kvm_mem.guest_phys_addr;
737 kvm_userspace_mem.memory_size = kvm_mem.memory_size;
738 r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem, 0);
739 if (r)
740 goto out;
741 break;
742 }
743 case KVM_SET_NR_MMU_PAGES:
744 r = kvm_vm_ioctl_set_nr_mmu_pages(kvm, arg);
745 if (r)
746 goto out;
747 break;
748 case KVM_GET_NR_MMU_PAGES:
749 r = kvm_vm_ioctl_get_nr_mmu_pages(kvm);
750 break;
751 case KVM_SET_MEMORY_ALIAS: {
752 struct kvm_memory_alias alias;
753
754 r = -EFAULT;
755 if (copy_from_user(&alias, argp, sizeof alias))
756 goto out;
757 r = kvm_vm_ioctl_set_memory_alias(kvm, &alias);
758 if (r)
759 goto out;
760 break;
761 }
762 case KVM_CREATE_IRQCHIP:
763 r = -ENOMEM;
764 kvm->vpic = kvm_create_pic(kvm);
765 if (kvm->vpic) {
766 r = kvm_ioapic_init(kvm);
767 if (r) {
768 kfree(kvm->vpic);
769 kvm->vpic = NULL;
770 goto out;
771 }
772 } else
773 goto out;
774 break;
775 case KVM_IRQ_LINE: {
776 struct kvm_irq_level irq_event;
777
778 r = -EFAULT;
779 if (copy_from_user(&irq_event, argp, sizeof irq_event))
780 goto out;
781 if (irqchip_in_kernel(kvm)) {
782 mutex_lock(&kvm->lock);
783 if (irq_event.irq < 16)
784 kvm_pic_set_irq(pic_irqchip(kvm),
785 irq_event.irq,
786 irq_event.level);
787 kvm_ioapic_set_irq(kvm->vioapic,
788 irq_event.irq,
789 irq_event.level);
790 mutex_unlock(&kvm->lock);
791 r = 0;
792 }
793 break;
794 }
795 case KVM_GET_IRQCHIP: {
796 /* 0: PIC master, 1: PIC slave, 2: IOAPIC */
797 struct kvm_irqchip chip;
798
799 r = -EFAULT;
800 if (copy_from_user(&chip, argp, sizeof chip))
801 goto out;
802 r = -ENXIO;
803 if (!irqchip_in_kernel(kvm))
804 goto out;
805 r = kvm_vm_ioctl_get_irqchip(kvm, &chip);
806 if (r)
807 goto out;
808 r = -EFAULT;
809 if (copy_to_user(argp, &chip, sizeof chip))
810 goto out;
811 r = 0;
812 break;
813 }
814 case KVM_SET_IRQCHIP: {
815 /* 0: PIC master, 1: PIC slave, 2: IOAPIC */
816 struct kvm_irqchip chip;
817
818 r = -EFAULT;
819 if (copy_from_user(&chip, argp, sizeof chip))
820 goto out;
821 r = -ENXIO;
822 if (!irqchip_in_kernel(kvm))
823 goto out;
824 r = kvm_vm_ioctl_set_irqchip(kvm, &chip);
825 if (r)
826 goto out;
827 r = 0;
828 break;
829 }
830 default:
831 ;
832 }
833out:
834 return r;
835}
836
Carsten Otte043405e2007-10-10 17:16:19 +0200837static __init void kvm_init_msr_list(void)
838{
839 u32 dummy[2];
840 unsigned i, j;
841
842 for (i = j = 0; i < ARRAY_SIZE(msrs_to_save); i++) {
843 if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0)
844 continue;
845 if (j < i)
846 msrs_to_save[j] = msrs_to_save[i];
847 j++;
848 }
849 num_msrs_to_save = j;
850}
851
852__init void kvm_arch_init(void)
853{
854 kvm_init_msr_list();
855}