blob: a2c4b55945553cf0c70840da5a56074dca323bd9 [file] [log] [blame]
Carsten Otte043405e2007-10-10 17:16:19 +02001/*
2 * Kernel-based Virtual Machine driver for Linux
3 *
4 * derived from drivers/kvm/kvm_main.c
5 *
6 * Copyright (C) 2006 Qumranet, Inc.
Ben-Ami Yassour4d5c5d02008-07-28 19:26:26 +03007 * Copyright (C) 2008 Qumranet, Inc.
8 * Copyright IBM Corporation, 2008
Carsten Otte043405e2007-10-10 17:16:19 +02009 *
10 * Authors:
11 * Avi Kivity <avi@qumranet.com>
12 * Yaniv Kamay <yaniv@qumranet.com>
Ben-Ami Yassour4d5c5d02008-07-28 19:26:26 +030013 * Amit Shah <amit.shah@qumranet.com>
14 * Ben-Ami Yassour <benami@il.ibm.com>
Carsten Otte043405e2007-10-10 17:16:19 +020015 *
16 * This work is licensed under the terms of the GNU GPL, version 2. See
17 * the COPYING file in the top-level directory.
18 *
19 */
20
Avi Kivityedf88412007-12-16 11:02:48 +020021#include <linux/kvm_host.h>
Carsten Otte313a3dc2007-10-11 19:16:52 +020022#include "irq.h"
Zhang Xiantao1d737c82007-12-14 09:35:10 +080023#include "mmu.h"
Sheng Yang78376992008-01-28 05:10:22 +080024#include "i8254.h"
Izik Eidus37817f22008-03-24 23:14:53 +020025#include "tss.h"
Marcelo Tosatti5fdbf972008-06-27 14:58:02 -030026#include "kvm_cache_regs.h"
Avi Kivity26eef702008-07-03 14:59:22 +030027#include "x86.h"
Carsten Otte313a3dc2007-10-11 19:16:52 +020028
Glauber de Oliveira Costa18068522008-02-15 17:52:47 -020029#include <linux/clocksource.h>
Ben-Ami Yassour4d5c5d02008-07-28 19:26:26 +030030#include <linux/interrupt.h>
Carsten Otte313a3dc2007-10-11 19:16:52 +020031#include <linux/kvm.h>
32#include <linux/fs.h>
33#include <linux/vmalloc.h>
Carsten Otte5fb76f92007-10-29 16:08:51 +010034#include <linux/module.h>
Zhang Xiantao0de10342007-11-20 16:25:04 +080035#include <linux/mman.h>
Marcelo Tosatti2bacc552007-12-12 10:46:12 -050036#include <linux/highmem.h>
Ben-Ami Yassour62c476c2008-09-14 03:48:28 +030037#include <linux/intel-iommu.h>
Carsten Otte043405e2007-10-10 17:16:19 +020038
39#include <asm/uaccess.h>
Zhang Xiantaod825ed02007-11-14 20:08:51 +080040#include <asm/msr.h>
Avi Kivitya5f61302008-02-20 17:57:21 +020041#include <asm/desc.h>
Carsten Otte043405e2007-10-10 17:16:19 +020042
Carsten Otte313a3dc2007-10-11 19:16:52 +020043#define MAX_IO_MSRS 256
Carsten Ottea03490e2007-10-29 16:09:35 +010044#define CR0_RESERVED_BITS \
45 (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
46 | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \
47 | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG))
48#define CR4_RESERVED_BITS \
49 (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\
50 | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \
51 | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR \
52 | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE))
53
54#define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
Joerg Roedel50a37eb2008-01-31 14:57:38 +010055/* EFER defaults:
56 * - enable syscall per default because its emulated by KVM
57 * - enable LME and LMA per default on 64 bit KVM
58 */
59#ifdef CONFIG_X86_64
60static u64 __read_mostly efer_reserved_bits = 0xfffffffffffffafeULL;
61#else
62static u64 __read_mostly efer_reserved_bits = 0xfffffffffffffffeULL;
63#endif
Carsten Otte313a3dc2007-10-11 19:16:52 +020064
Avi Kivityba1389b2007-11-18 16:24:12 +020065#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
66#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
Hollis Blanchard417bc302007-10-31 17:24:23 -050067
Avi Kivity674eea02008-02-11 18:37:23 +020068static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
69 struct kvm_cpuid_entry2 __user *entries);
70
Zhang Xiantao97896d02007-11-14 20:09:30 +080071struct kvm_x86_ops *kvm_x86_ops;
Marcelo Tosatti5fdbf972008-06-27 14:58:02 -030072EXPORT_SYMBOL_GPL(kvm_x86_ops);
Zhang Xiantao97896d02007-11-14 20:09:30 +080073
Hollis Blanchard417bc302007-10-31 17:24:23 -050074struct kvm_stats_debugfs_item debugfs_entries[] = {
Avi Kivityba1389b2007-11-18 16:24:12 +020075 { "pf_fixed", VCPU_STAT(pf_fixed) },
76 { "pf_guest", VCPU_STAT(pf_guest) },
77 { "tlb_flush", VCPU_STAT(tlb_flush) },
78 { "invlpg", VCPU_STAT(invlpg) },
79 { "exits", VCPU_STAT(exits) },
80 { "io_exits", VCPU_STAT(io_exits) },
81 { "mmio_exits", VCPU_STAT(mmio_exits) },
82 { "signal_exits", VCPU_STAT(signal_exits) },
83 { "irq_window", VCPU_STAT(irq_window_exits) },
Sheng Yangf08864b2008-05-15 18:23:25 +080084 { "nmi_window", VCPU_STAT(nmi_window_exits) },
Avi Kivityba1389b2007-11-18 16:24:12 +020085 { "halt_exits", VCPU_STAT(halt_exits) },
86 { "halt_wakeup", VCPU_STAT(halt_wakeup) },
Amit Shahf11c3a82008-02-21 01:00:30 +053087 { "hypercalls", VCPU_STAT(hypercalls) },
Avi Kivityba1389b2007-11-18 16:24:12 +020088 { "request_irq", VCPU_STAT(request_irq_exits) },
Jan Kiszkac4abb7c2008-09-26 09:30:55 +020089 { "request_nmi", VCPU_STAT(request_nmi_exits) },
Avi Kivityba1389b2007-11-18 16:24:12 +020090 { "irq_exits", VCPU_STAT(irq_exits) },
91 { "host_state_reload", VCPU_STAT(host_state_reload) },
92 { "efer_reload", VCPU_STAT(efer_reload) },
93 { "fpu_reload", VCPU_STAT(fpu_reload) },
94 { "insn_emulation", VCPU_STAT(insn_emulation) },
95 { "insn_emulation_fail", VCPU_STAT(insn_emulation_fail) },
Avi Kivityfa89a812008-09-01 15:57:51 +030096 { "irq_injections", VCPU_STAT(irq_injections) },
Jan Kiszkac4abb7c2008-09-26 09:30:55 +020097 { "nmi_injections", VCPU_STAT(nmi_injections) },
Avi Kivity4cee5762007-11-18 16:37:07 +020098 { "mmu_shadow_zapped", VM_STAT(mmu_shadow_zapped) },
99 { "mmu_pte_write", VM_STAT(mmu_pte_write) },
100 { "mmu_pte_updated", VM_STAT(mmu_pte_updated) },
101 { "mmu_pde_zapped", VM_STAT(mmu_pde_zapped) },
102 { "mmu_flooded", VM_STAT(mmu_flooded) },
103 { "mmu_recycled", VM_STAT(mmu_recycled) },
Avi Kivitydfc5aa02007-12-18 19:47:18 +0200104 { "mmu_cache_miss", VM_STAT(mmu_cache_miss) },
Marcelo Tosatti4731d4c2008-09-23 13:18:39 -0300105 { "mmu_unsync", VM_STAT(mmu_unsync) },
Avi Kivity0f74a242007-11-20 23:01:14 +0200106 { "remote_tlb_flush", VM_STAT(remote_tlb_flush) },
Marcelo Tosatti05da4552008-02-23 11:44:30 -0300107 { "largepages", VM_STAT(lpages) },
Hollis Blanchard417bc302007-10-31 17:24:23 -0500108 { NULL }
109};
110
Carsten Otte5fb76f92007-10-29 16:08:51 +0100111unsigned long segment_base(u16 selector)
112{
113 struct descriptor_table gdt;
Avi Kivitya5f61302008-02-20 17:57:21 +0200114 struct desc_struct *d;
Carsten Otte5fb76f92007-10-29 16:08:51 +0100115 unsigned long table_base;
116 unsigned long v;
117
118 if (selector == 0)
119 return 0;
120
121 asm("sgdt %0" : "=m"(gdt));
122 table_base = gdt.base;
123
124 if (selector & 4) { /* from ldt */
125 u16 ldt_selector;
126
127 asm("sldt %0" : "=g"(ldt_selector));
128 table_base = segment_base(ldt_selector);
129 }
Avi Kivitya5f61302008-02-20 17:57:21 +0200130 d = (struct desc_struct *)(table_base + (selector & ~7));
131 v = d->base0 | ((unsigned long)d->base1 << 16) |
132 ((unsigned long)d->base2 << 24);
Carsten Otte5fb76f92007-10-29 16:08:51 +0100133#ifdef CONFIG_X86_64
Avi Kivitya5f61302008-02-20 17:57:21 +0200134 if (d->s == 0 && (d->type == 2 || d->type == 9 || d->type == 11))
135 v |= ((unsigned long)((struct ldttss_desc64 *)d)->base3) << 32;
Carsten Otte5fb76f92007-10-29 16:08:51 +0100136#endif
137 return v;
138}
139EXPORT_SYMBOL_GPL(segment_base);
140
Carsten Otte6866b832007-10-29 16:09:10 +0100141u64 kvm_get_apic_base(struct kvm_vcpu *vcpu)
142{
143 if (irqchip_in_kernel(vcpu->kvm))
Zhang Xiantaoad312c72007-12-13 23:50:52 +0800144 return vcpu->arch.apic_base;
Carsten Otte6866b832007-10-29 16:09:10 +0100145 else
Zhang Xiantaoad312c72007-12-13 23:50:52 +0800146 return vcpu->arch.apic_base;
Carsten Otte6866b832007-10-29 16:09:10 +0100147}
148EXPORT_SYMBOL_GPL(kvm_get_apic_base);
149
150void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data)
151{
152 /* TODO: reserve bits check */
153 if (irqchip_in_kernel(vcpu->kvm))
154 kvm_lapic_set_base(vcpu, data);
155 else
Zhang Xiantaoad312c72007-12-13 23:50:52 +0800156 vcpu->arch.apic_base = data;
Carsten Otte6866b832007-10-29 16:09:10 +0100157}
158EXPORT_SYMBOL_GPL(kvm_set_apic_base);
159
Avi Kivity298101d2007-11-25 13:41:11 +0200160void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr)
161{
Zhang Xiantaoad312c72007-12-13 23:50:52 +0800162 WARN_ON(vcpu->arch.exception.pending);
163 vcpu->arch.exception.pending = true;
164 vcpu->arch.exception.has_error_code = false;
165 vcpu->arch.exception.nr = nr;
Avi Kivity298101d2007-11-25 13:41:11 +0200166}
167EXPORT_SYMBOL_GPL(kvm_queue_exception);
168
Avi Kivityc3c91fe2007-11-25 14:04:58 +0200169void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long addr,
170 u32 error_code)
171{
172 ++vcpu->stat.pf_guest;
Joerg Roedel71c4dfa2008-02-26 16:49:16 +0100173 if (vcpu->arch.exception.pending) {
174 if (vcpu->arch.exception.nr == PF_VECTOR) {
175 printk(KERN_DEBUG "kvm: inject_page_fault:"
176 " double fault 0x%lx\n", addr);
177 vcpu->arch.exception.nr = DF_VECTOR;
178 vcpu->arch.exception.error_code = 0;
179 } else if (vcpu->arch.exception.nr == DF_VECTOR) {
180 /* triple fault -> shutdown */
181 set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests);
182 }
Avi Kivityc3c91fe2007-11-25 14:04:58 +0200183 return;
184 }
Zhang Xiantaoad312c72007-12-13 23:50:52 +0800185 vcpu->arch.cr2 = addr;
Avi Kivityc3c91fe2007-11-25 14:04:58 +0200186 kvm_queue_exception_e(vcpu, PF_VECTOR, error_code);
187}
188
Sheng Yang3419ffc2008-05-15 09:52:48 +0800189void kvm_inject_nmi(struct kvm_vcpu *vcpu)
190{
191 vcpu->arch.nmi_pending = 1;
192}
193EXPORT_SYMBOL_GPL(kvm_inject_nmi);
194
Avi Kivity298101d2007-11-25 13:41:11 +0200195void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
196{
Zhang Xiantaoad312c72007-12-13 23:50:52 +0800197 WARN_ON(vcpu->arch.exception.pending);
198 vcpu->arch.exception.pending = true;
199 vcpu->arch.exception.has_error_code = true;
200 vcpu->arch.exception.nr = nr;
201 vcpu->arch.exception.error_code = error_code;
Avi Kivity298101d2007-11-25 13:41:11 +0200202}
203EXPORT_SYMBOL_GPL(kvm_queue_exception_e);
204
205static void __queue_exception(struct kvm_vcpu *vcpu)
206{
Zhang Xiantaoad312c72007-12-13 23:50:52 +0800207 kvm_x86_ops->queue_exception(vcpu, vcpu->arch.exception.nr,
208 vcpu->arch.exception.has_error_code,
209 vcpu->arch.exception.error_code);
Avi Kivity298101d2007-11-25 13:41:11 +0200210}
211
Carsten Ottea03490e2007-10-29 16:09:35 +0100212/*
213 * Load the pae pdptrs. Return true is they are all valid.
214 */
215int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3)
216{
217 gfn_t pdpt_gfn = cr3 >> PAGE_SHIFT;
218 unsigned offset = ((cr3 & (PAGE_SIZE-1)) >> 5) << 2;
219 int i;
220 int ret;
Zhang Xiantaoad312c72007-12-13 23:50:52 +0800221 u64 pdpte[ARRAY_SIZE(vcpu->arch.pdptrs)];
Carsten Ottea03490e2007-10-29 16:09:35 +0100222
Carsten Ottea03490e2007-10-29 16:09:35 +0100223 ret = kvm_read_guest_page(vcpu->kvm, pdpt_gfn, pdpte,
224 offset * sizeof(u64), sizeof(pdpte));
225 if (ret < 0) {
226 ret = 0;
227 goto out;
228 }
229 for (i = 0; i < ARRAY_SIZE(pdpte); ++i) {
230 if ((pdpte[i] & 1) && (pdpte[i] & 0xfffffff0000001e6ull)) {
231 ret = 0;
232 goto out;
233 }
234 }
235 ret = 1;
236
Zhang Xiantaoad312c72007-12-13 23:50:52 +0800237 memcpy(vcpu->arch.pdptrs, pdpte, sizeof(vcpu->arch.pdptrs));
Carsten Ottea03490e2007-10-29 16:09:35 +0100238out:
Carsten Ottea03490e2007-10-29 16:09:35 +0100239
240 return ret;
241}
Joerg Roedelcc4b6872008-02-07 13:47:43 +0100242EXPORT_SYMBOL_GPL(load_pdptrs);
Carsten Ottea03490e2007-10-29 16:09:35 +0100243
Avi Kivityd835dfe2007-11-21 02:57:59 +0200244static bool pdptrs_changed(struct kvm_vcpu *vcpu)
245{
Zhang Xiantaoad312c72007-12-13 23:50:52 +0800246 u64 pdpte[ARRAY_SIZE(vcpu->arch.pdptrs)];
Avi Kivityd835dfe2007-11-21 02:57:59 +0200247 bool changed = true;
248 int r;
249
250 if (is_long_mode(vcpu) || !is_pae(vcpu))
251 return false;
252
Zhang Xiantaoad312c72007-12-13 23:50:52 +0800253 r = kvm_read_guest(vcpu->kvm, vcpu->arch.cr3 & ~31u, pdpte, sizeof(pdpte));
Avi Kivityd835dfe2007-11-21 02:57:59 +0200254 if (r < 0)
255 goto out;
Zhang Xiantaoad312c72007-12-13 23:50:52 +0800256 changed = memcmp(pdpte, vcpu->arch.pdptrs, sizeof(pdpte)) != 0;
Avi Kivityd835dfe2007-11-21 02:57:59 +0200257out:
Avi Kivityd835dfe2007-11-21 02:57:59 +0200258
259 return changed;
260}
261
Avi Kivity2d3ad1f2008-02-24 11:20:43 +0200262void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
Carsten Ottea03490e2007-10-29 16:09:35 +0100263{
264 if (cr0 & CR0_RESERVED_BITS) {
265 printk(KERN_DEBUG "set_cr0: 0x%lx #GP, reserved bits 0x%lx\n",
Zhang Xiantaoad312c72007-12-13 23:50:52 +0800266 cr0, vcpu->arch.cr0);
Avi Kivityc1a5d4f2007-11-25 14:12:03 +0200267 kvm_inject_gp(vcpu, 0);
Carsten Ottea03490e2007-10-29 16:09:35 +0100268 return;
269 }
270
271 if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD)) {
272 printk(KERN_DEBUG "set_cr0: #GP, CD == 0 && NW == 1\n");
Avi Kivityc1a5d4f2007-11-25 14:12:03 +0200273 kvm_inject_gp(vcpu, 0);
Carsten Ottea03490e2007-10-29 16:09:35 +0100274 return;
275 }
276
277 if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE)) {
278 printk(KERN_DEBUG "set_cr0: #GP, set PG flag "
279 "and a clear PE flag\n");
Avi Kivityc1a5d4f2007-11-25 14:12:03 +0200280 kvm_inject_gp(vcpu, 0);
Carsten Ottea03490e2007-10-29 16:09:35 +0100281 return;
282 }
283
284 if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
285#ifdef CONFIG_X86_64
Zhang Xiantaoad312c72007-12-13 23:50:52 +0800286 if ((vcpu->arch.shadow_efer & EFER_LME)) {
Carsten Ottea03490e2007-10-29 16:09:35 +0100287 int cs_db, cs_l;
288
289 if (!is_pae(vcpu)) {
290 printk(KERN_DEBUG "set_cr0: #GP, start paging "
291 "in long mode while PAE is disabled\n");
Avi Kivityc1a5d4f2007-11-25 14:12:03 +0200292 kvm_inject_gp(vcpu, 0);
Carsten Ottea03490e2007-10-29 16:09:35 +0100293 return;
294 }
295 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
296 if (cs_l) {
297 printk(KERN_DEBUG "set_cr0: #GP, start paging "
298 "in long mode while CS.L == 1\n");
Avi Kivityc1a5d4f2007-11-25 14:12:03 +0200299 kvm_inject_gp(vcpu, 0);
Carsten Ottea03490e2007-10-29 16:09:35 +0100300 return;
301
302 }
303 } else
304#endif
Zhang Xiantaoad312c72007-12-13 23:50:52 +0800305 if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->arch.cr3)) {
Carsten Ottea03490e2007-10-29 16:09:35 +0100306 printk(KERN_DEBUG "set_cr0: #GP, pdptrs "
307 "reserved bits\n");
Avi Kivityc1a5d4f2007-11-25 14:12:03 +0200308 kvm_inject_gp(vcpu, 0);
Carsten Ottea03490e2007-10-29 16:09:35 +0100309 return;
310 }
311
312 }
313
314 kvm_x86_ops->set_cr0(vcpu, cr0);
Zhang Xiantaoad312c72007-12-13 23:50:52 +0800315 vcpu->arch.cr0 = cr0;
Carsten Ottea03490e2007-10-29 16:09:35 +0100316
Carsten Ottea03490e2007-10-29 16:09:35 +0100317 kvm_mmu_reset_context(vcpu);
Carsten Ottea03490e2007-10-29 16:09:35 +0100318 return;
319}
Avi Kivity2d3ad1f2008-02-24 11:20:43 +0200320EXPORT_SYMBOL_GPL(kvm_set_cr0);
Carsten Ottea03490e2007-10-29 16:09:35 +0100321
Avi Kivity2d3ad1f2008-02-24 11:20:43 +0200322void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
Carsten Ottea03490e2007-10-29 16:09:35 +0100323{
Avi Kivity2d3ad1f2008-02-24 11:20:43 +0200324 kvm_set_cr0(vcpu, (vcpu->arch.cr0 & ~0x0ful) | (msw & 0x0f));
Feng (Eric) Liu2714d1d2008-04-10 15:31:10 -0400325 KVMTRACE_1D(LMSW, vcpu,
326 (u32)((vcpu->arch.cr0 & ~0x0ful) | (msw & 0x0f)),
327 handler);
Carsten Ottea03490e2007-10-29 16:09:35 +0100328}
Avi Kivity2d3ad1f2008-02-24 11:20:43 +0200329EXPORT_SYMBOL_GPL(kvm_lmsw);
Carsten Ottea03490e2007-10-29 16:09:35 +0100330
Avi Kivity2d3ad1f2008-02-24 11:20:43 +0200331void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
Carsten Ottea03490e2007-10-29 16:09:35 +0100332{
333 if (cr4 & CR4_RESERVED_BITS) {
334 printk(KERN_DEBUG "set_cr4: #GP, reserved bits\n");
Avi Kivityc1a5d4f2007-11-25 14:12:03 +0200335 kvm_inject_gp(vcpu, 0);
Carsten Ottea03490e2007-10-29 16:09:35 +0100336 return;
337 }
338
339 if (is_long_mode(vcpu)) {
340 if (!(cr4 & X86_CR4_PAE)) {
341 printk(KERN_DEBUG "set_cr4: #GP, clearing PAE while "
342 "in long mode\n");
Avi Kivityc1a5d4f2007-11-25 14:12:03 +0200343 kvm_inject_gp(vcpu, 0);
Carsten Ottea03490e2007-10-29 16:09:35 +0100344 return;
345 }
346 } else if (is_paging(vcpu) && !is_pae(vcpu) && (cr4 & X86_CR4_PAE)
Zhang Xiantaoad312c72007-12-13 23:50:52 +0800347 && !load_pdptrs(vcpu, vcpu->arch.cr3)) {
Carsten Ottea03490e2007-10-29 16:09:35 +0100348 printk(KERN_DEBUG "set_cr4: #GP, pdptrs reserved bits\n");
Avi Kivityc1a5d4f2007-11-25 14:12:03 +0200349 kvm_inject_gp(vcpu, 0);
Carsten Ottea03490e2007-10-29 16:09:35 +0100350 return;
351 }
352
353 if (cr4 & X86_CR4_VMXE) {
354 printk(KERN_DEBUG "set_cr4: #GP, setting VMXE\n");
Avi Kivityc1a5d4f2007-11-25 14:12:03 +0200355 kvm_inject_gp(vcpu, 0);
Carsten Ottea03490e2007-10-29 16:09:35 +0100356 return;
357 }
358 kvm_x86_ops->set_cr4(vcpu, cr4);
Zhang Xiantaoad312c72007-12-13 23:50:52 +0800359 vcpu->arch.cr4 = cr4;
Carsten Ottea03490e2007-10-29 16:09:35 +0100360 kvm_mmu_reset_context(vcpu);
Carsten Ottea03490e2007-10-29 16:09:35 +0100361}
Avi Kivity2d3ad1f2008-02-24 11:20:43 +0200362EXPORT_SYMBOL_GPL(kvm_set_cr4);
Carsten Ottea03490e2007-10-29 16:09:35 +0100363
Avi Kivity2d3ad1f2008-02-24 11:20:43 +0200364void kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
Carsten Ottea03490e2007-10-29 16:09:35 +0100365{
Zhang Xiantaoad312c72007-12-13 23:50:52 +0800366 if (cr3 == vcpu->arch.cr3 && !pdptrs_changed(vcpu)) {
Marcelo Tosatti0ba73cd2008-09-23 13:18:34 -0300367 kvm_mmu_sync_roots(vcpu);
Avi Kivityd835dfe2007-11-21 02:57:59 +0200368 kvm_mmu_flush_tlb(vcpu);
369 return;
370 }
371
Carsten Ottea03490e2007-10-29 16:09:35 +0100372 if (is_long_mode(vcpu)) {
373 if (cr3 & CR3_L_MODE_RESERVED_BITS) {
374 printk(KERN_DEBUG "set_cr3: #GP, reserved bits\n");
Avi Kivityc1a5d4f2007-11-25 14:12:03 +0200375 kvm_inject_gp(vcpu, 0);
Carsten Ottea03490e2007-10-29 16:09:35 +0100376 return;
377 }
378 } else {
379 if (is_pae(vcpu)) {
380 if (cr3 & CR3_PAE_RESERVED_BITS) {
381 printk(KERN_DEBUG
382 "set_cr3: #GP, reserved bits\n");
Avi Kivityc1a5d4f2007-11-25 14:12:03 +0200383 kvm_inject_gp(vcpu, 0);
Carsten Ottea03490e2007-10-29 16:09:35 +0100384 return;
385 }
386 if (is_paging(vcpu) && !load_pdptrs(vcpu, cr3)) {
387 printk(KERN_DEBUG "set_cr3: #GP, pdptrs "
388 "reserved bits\n");
Avi Kivityc1a5d4f2007-11-25 14:12:03 +0200389 kvm_inject_gp(vcpu, 0);
Carsten Ottea03490e2007-10-29 16:09:35 +0100390 return;
391 }
392 }
393 /*
394 * We don't check reserved bits in nonpae mode, because
395 * this isn't enforced, and VMware depends on this.
396 */
397 }
398
Carsten Ottea03490e2007-10-29 16:09:35 +0100399 /*
400 * Does the new cr3 value map to physical memory? (Note, we
401 * catch an invalid cr3 even in real-mode, because it would
402 * cause trouble later on when we turn on paging anyway.)
403 *
404 * A real CPU would silently accept an invalid cr3 and would
405 * attempt to use it - with largely undefined (and often hard
406 * to debug) behavior on the guest side.
407 */
408 if (unlikely(!gfn_to_memslot(vcpu->kvm, cr3 >> PAGE_SHIFT)))
Avi Kivityc1a5d4f2007-11-25 14:12:03 +0200409 kvm_inject_gp(vcpu, 0);
Carsten Ottea03490e2007-10-29 16:09:35 +0100410 else {
Zhang Xiantaoad312c72007-12-13 23:50:52 +0800411 vcpu->arch.cr3 = cr3;
412 vcpu->arch.mmu.new_cr3(vcpu);
Carsten Ottea03490e2007-10-29 16:09:35 +0100413 }
Carsten Ottea03490e2007-10-29 16:09:35 +0100414}
Avi Kivity2d3ad1f2008-02-24 11:20:43 +0200415EXPORT_SYMBOL_GPL(kvm_set_cr3);
Carsten Ottea03490e2007-10-29 16:09:35 +0100416
Avi Kivity2d3ad1f2008-02-24 11:20:43 +0200417void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
Carsten Ottea03490e2007-10-29 16:09:35 +0100418{
419 if (cr8 & CR8_RESERVED_BITS) {
420 printk(KERN_DEBUG "set_cr8: #GP, reserved bits 0x%lx\n", cr8);
Avi Kivityc1a5d4f2007-11-25 14:12:03 +0200421 kvm_inject_gp(vcpu, 0);
Carsten Ottea03490e2007-10-29 16:09:35 +0100422 return;
423 }
424 if (irqchip_in_kernel(vcpu->kvm))
425 kvm_lapic_set_tpr(vcpu, cr8);
426 else
Zhang Xiantaoad312c72007-12-13 23:50:52 +0800427 vcpu->arch.cr8 = cr8;
Carsten Ottea03490e2007-10-29 16:09:35 +0100428}
Avi Kivity2d3ad1f2008-02-24 11:20:43 +0200429EXPORT_SYMBOL_GPL(kvm_set_cr8);
Carsten Ottea03490e2007-10-29 16:09:35 +0100430
Avi Kivity2d3ad1f2008-02-24 11:20:43 +0200431unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu)
Carsten Ottea03490e2007-10-29 16:09:35 +0100432{
433 if (irqchip_in_kernel(vcpu->kvm))
434 return kvm_lapic_get_cr8(vcpu);
435 else
Zhang Xiantaoad312c72007-12-13 23:50:52 +0800436 return vcpu->arch.cr8;
Carsten Ottea03490e2007-10-29 16:09:35 +0100437}
Avi Kivity2d3ad1f2008-02-24 11:20:43 +0200438EXPORT_SYMBOL_GPL(kvm_get_cr8);
Carsten Ottea03490e2007-10-29 16:09:35 +0100439
Carsten Otte043405e2007-10-10 17:16:19 +0200440/*
441 * List of msr numbers which we expose to userspace through KVM_GET_MSRS
442 * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST.
443 *
444 * This list is modified at module load time to reflect the
445 * capabilities of the host cpu.
446 */
447static u32 msrs_to_save[] = {
448 MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
449 MSR_K6_STAR,
450#ifdef CONFIG_X86_64
451 MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
452#endif
Glauber de Oliveira Costa18068522008-02-15 17:52:47 -0200453 MSR_IA32_TIME_STAMP_COUNTER, MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
Alexander Graf847f0ad2008-02-21 12:11:01 +0100454 MSR_IA32_PERF_STATUS,
Carsten Otte043405e2007-10-10 17:16:19 +0200455};
456
457static unsigned num_msrs_to_save;
458
459static u32 emulated_msrs[] = {
460 MSR_IA32_MISC_ENABLE,
461};
462
Carsten Otte15c4a642007-10-30 18:44:17 +0100463static void set_efer(struct kvm_vcpu *vcpu, u64 efer)
464{
Joerg Roedelf2b4b7d2008-01-31 14:57:37 +0100465 if (efer & efer_reserved_bits) {
Carsten Otte15c4a642007-10-30 18:44:17 +0100466 printk(KERN_DEBUG "set_efer: 0x%llx #GP, reserved bits\n",
467 efer);
Avi Kivityc1a5d4f2007-11-25 14:12:03 +0200468 kvm_inject_gp(vcpu, 0);
Carsten Otte15c4a642007-10-30 18:44:17 +0100469 return;
470 }
471
472 if (is_paging(vcpu)
Zhang Xiantaoad312c72007-12-13 23:50:52 +0800473 && (vcpu->arch.shadow_efer & EFER_LME) != (efer & EFER_LME)) {
Carsten Otte15c4a642007-10-30 18:44:17 +0100474 printk(KERN_DEBUG "set_efer: #GP, change LME while paging\n");
Avi Kivityc1a5d4f2007-11-25 14:12:03 +0200475 kvm_inject_gp(vcpu, 0);
Carsten Otte15c4a642007-10-30 18:44:17 +0100476 return;
477 }
478
479 kvm_x86_ops->set_efer(vcpu, efer);
480
481 efer &= ~EFER_LMA;
Zhang Xiantaoad312c72007-12-13 23:50:52 +0800482 efer |= vcpu->arch.shadow_efer & EFER_LMA;
Carsten Otte15c4a642007-10-30 18:44:17 +0100483
Zhang Xiantaoad312c72007-12-13 23:50:52 +0800484 vcpu->arch.shadow_efer = efer;
Carsten Otte15c4a642007-10-30 18:44:17 +0100485}
486
Joerg Roedelf2b4b7d2008-01-31 14:57:37 +0100487void kvm_enable_efer_bits(u64 mask)
488{
489 efer_reserved_bits &= ~mask;
490}
491EXPORT_SYMBOL_GPL(kvm_enable_efer_bits);
492
493
Carsten Otte15c4a642007-10-30 18:44:17 +0100494/*
495 * Writes msr value into into the appropriate "register".
496 * Returns 0 on success, non-0 otherwise.
497 * Assumes vcpu_load() was already called.
498 */
499int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
500{
501 return kvm_x86_ops->set_msr(vcpu, msr_index, data);
502}
503
Carsten Otte313a3dc2007-10-11 19:16:52 +0200504/*
505 * Adapt set_msr() to msr_io()'s calling convention
506 */
507static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
508{
509 return kvm_set_msr(vcpu, index, *data);
510}
511
Glauber de Oliveira Costa18068522008-02-15 17:52:47 -0200512static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
513{
514 static int version;
Gerd Hoffmann50d0a0f2008-06-03 16:17:31 +0200515 struct pvclock_wall_clock wc;
516 struct timespec now, sys, boot;
Glauber de Oliveira Costa18068522008-02-15 17:52:47 -0200517
518 if (!wall_clock)
519 return;
520
521 version++;
522
Glauber de Oliveira Costa18068522008-02-15 17:52:47 -0200523 kvm_write_guest(kvm, wall_clock, &version, sizeof(version));
524
Gerd Hoffmann50d0a0f2008-06-03 16:17:31 +0200525 /*
526 * The guest calculates current wall clock time by adding
527 * system time (updated by kvm_write_guest_time below) to the
528 * wall clock specified here. guest system time equals host
529 * system time for us, thus we must fill in host boot time here.
530 */
531 now = current_kernel_time();
532 ktime_get_ts(&sys);
533 boot = ns_to_timespec(timespec_to_ns(&now) - timespec_to_ns(&sys));
534
535 wc.sec = boot.tv_sec;
536 wc.nsec = boot.tv_nsec;
537 wc.version = version;
Glauber de Oliveira Costa18068522008-02-15 17:52:47 -0200538
539 kvm_write_guest(kvm, wall_clock, &wc, sizeof(wc));
540
541 version++;
542 kvm_write_guest(kvm, wall_clock, &version, sizeof(version));
Glauber de Oliveira Costa18068522008-02-15 17:52:47 -0200543}
544
Gerd Hoffmann50d0a0f2008-06-03 16:17:31 +0200545static uint32_t div_frac(uint32_t dividend, uint32_t divisor)
546{
547 uint32_t quotient, remainder;
548
549 /* Don't try to replace with do_div(), this one calculates
550 * "(dividend << 32) / divisor" */
551 __asm__ ( "divl %4"
552 : "=a" (quotient), "=d" (remainder)
553 : "0" (0), "1" (dividend), "r" (divisor) );
554 return quotient;
555}
556
557static void kvm_set_time_scale(uint32_t tsc_khz, struct pvclock_vcpu_time_info *hv_clock)
558{
559 uint64_t nsecs = 1000000000LL;
560 int32_t shift = 0;
561 uint64_t tps64;
562 uint32_t tps32;
563
564 tps64 = tsc_khz * 1000LL;
565 while (tps64 > nsecs*2) {
566 tps64 >>= 1;
567 shift--;
568 }
569
570 tps32 = (uint32_t)tps64;
571 while (tps32 <= (uint32_t)nsecs) {
572 tps32 <<= 1;
573 shift++;
574 }
575
576 hv_clock->tsc_shift = shift;
577 hv_clock->tsc_to_system_mul = div_frac(nsecs, tps32);
578
579 pr_debug("%s: tsc_khz %u, tsc_shift %d, tsc_mul %u\n",
Harvey Harrison80a914d2008-10-15 22:01:25 -0700580 __func__, tsc_khz, hv_clock->tsc_shift,
Gerd Hoffmann50d0a0f2008-06-03 16:17:31 +0200581 hv_clock->tsc_to_system_mul);
582}
583
Glauber de Oliveira Costa18068522008-02-15 17:52:47 -0200584static void kvm_write_guest_time(struct kvm_vcpu *v)
585{
586 struct timespec ts;
587 unsigned long flags;
588 struct kvm_vcpu_arch *vcpu = &v->arch;
589 void *shared_kaddr;
590
591 if ((!vcpu->time_page))
592 return;
593
Gerd Hoffmann50d0a0f2008-06-03 16:17:31 +0200594 if (unlikely(vcpu->hv_clock_tsc_khz != tsc_khz)) {
595 kvm_set_time_scale(tsc_khz, &vcpu->hv_clock);
596 vcpu->hv_clock_tsc_khz = tsc_khz;
597 }
598
Glauber de Oliveira Costa18068522008-02-15 17:52:47 -0200599 /* Keep irq disabled to prevent changes to the clock */
600 local_irq_save(flags);
601 kvm_get_msr(v, MSR_IA32_TIME_STAMP_COUNTER,
602 &vcpu->hv_clock.tsc_timestamp);
603 ktime_get_ts(&ts);
604 local_irq_restore(flags);
605
606 /* With all the info we got, fill in the values */
607
608 vcpu->hv_clock.system_time = ts.tv_nsec +
609 (NSEC_PER_SEC * (u64)ts.tv_sec);
610 /*
611 * The interface expects us to write an even number signaling that the
612 * update is finished. Since the guest won't see the intermediate
Gerd Hoffmann50d0a0f2008-06-03 16:17:31 +0200613 * state, we just increase by 2 at the end.
Glauber de Oliveira Costa18068522008-02-15 17:52:47 -0200614 */
Gerd Hoffmann50d0a0f2008-06-03 16:17:31 +0200615 vcpu->hv_clock.version += 2;
Glauber de Oliveira Costa18068522008-02-15 17:52:47 -0200616
617 shared_kaddr = kmap_atomic(vcpu->time_page, KM_USER0);
618
619 memcpy(shared_kaddr + vcpu->time_offset, &vcpu->hv_clock,
Gerd Hoffmann50d0a0f2008-06-03 16:17:31 +0200620 sizeof(vcpu->hv_clock));
Glauber de Oliveira Costa18068522008-02-15 17:52:47 -0200621
622 kunmap_atomic(shared_kaddr, KM_USER0);
623
624 mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT);
625}
626
Avi Kivity9ba075a2008-05-26 20:06:35 +0300627static bool msr_mtrr_valid(unsigned msr)
628{
629 switch (msr) {
630 case 0x200 ... 0x200 + 2 * KVM_NR_VAR_MTRR - 1:
631 case MSR_MTRRfix64K_00000:
632 case MSR_MTRRfix16K_80000:
633 case MSR_MTRRfix16K_A0000:
634 case MSR_MTRRfix4K_C0000:
635 case MSR_MTRRfix4K_C8000:
636 case MSR_MTRRfix4K_D0000:
637 case MSR_MTRRfix4K_D8000:
638 case MSR_MTRRfix4K_E0000:
639 case MSR_MTRRfix4K_E8000:
640 case MSR_MTRRfix4K_F0000:
641 case MSR_MTRRfix4K_F8000:
642 case MSR_MTRRdefType:
643 case MSR_IA32_CR_PAT:
644 return true;
645 case 0x2f8:
646 return true;
647 }
648 return false;
649}
650
651static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
652{
653 if (!msr_mtrr_valid(msr))
654 return 1;
655
656 vcpu->arch.mtrr[msr - 0x200] = data;
657 return 0;
658}
Carsten Otte15c4a642007-10-30 18:44:17 +0100659
660int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
661{
662 switch (msr) {
Carsten Otte15c4a642007-10-30 18:44:17 +0100663 case MSR_EFER:
664 set_efer(vcpu, data);
665 break;
Carsten Otte15c4a642007-10-30 18:44:17 +0100666 case MSR_IA32_MC0_STATUS:
667 pr_unimpl(vcpu, "%s: MSR_IA32_MC0_STATUS 0x%llx, nop\n",
Harvey Harrisonb8688d52008-03-03 12:59:56 -0800668 __func__, data);
Carsten Otte15c4a642007-10-30 18:44:17 +0100669 break;
670 case MSR_IA32_MCG_STATUS:
671 pr_unimpl(vcpu, "%s: MSR_IA32_MCG_STATUS 0x%llx, nop\n",
Harvey Harrisonb8688d52008-03-03 12:59:56 -0800672 __func__, data);
Carsten Otte15c4a642007-10-30 18:44:17 +0100673 break;
Joerg Roedelc7ac6792008-02-11 20:28:27 +0100674 case MSR_IA32_MCG_CTL:
675 pr_unimpl(vcpu, "%s: MSR_IA32_MCG_CTL 0x%llx, nop\n",
Harvey Harrisonb8688d52008-03-03 12:59:56 -0800676 __func__, data);
Joerg Roedelc7ac6792008-02-11 20:28:27 +0100677 break;
Alexander Grafb5e2fec2008-07-22 08:00:45 +0200678 case MSR_IA32_DEBUGCTLMSR:
679 if (!data) {
680 /* We support the non-activated case already */
681 break;
682 } else if (data & ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_BTF)) {
683 /* Values other than LBR and BTF are vendor-specific,
684 thus reserved and should throw a #GP */
685 return 1;
686 }
687 pr_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n",
688 __func__, data);
689 break;
Carsten Otte15c4a642007-10-30 18:44:17 +0100690 case MSR_IA32_UCODE_REV:
691 case MSR_IA32_UCODE_WRITE:
Carsten Otte15c4a642007-10-30 18:44:17 +0100692 break;
Avi Kivity9ba075a2008-05-26 20:06:35 +0300693 case 0x200 ... 0x2ff:
694 return set_msr_mtrr(vcpu, msr, data);
Carsten Otte15c4a642007-10-30 18:44:17 +0100695 case MSR_IA32_APICBASE:
696 kvm_set_apic_base(vcpu, data);
697 break;
698 case MSR_IA32_MISC_ENABLE:
Zhang Xiantaoad312c72007-12-13 23:50:52 +0800699 vcpu->arch.ia32_misc_enable_msr = data;
Carsten Otte15c4a642007-10-30 18:44:17 +0100700 break;
Glauber de Oliveira Costa18068522008-02-15 17:52:47 -0200701 case MSR_KVM_WALL_CLOCK:
702 vcpu->kvm->arch.wall_clock = data;
703 kvm_write_wall_clock(vcpu->kvm, data);
704 break;
705 case MSR_KVM_SYSTEM_TIME: {
706 if (vcpu->arch.time_page) {
707 kvm_release_page_dirty(vcpu->arch.time_page);
708 vcpu->arch.time_page = NULL;
709 }
710
711 vcpu->arch.time = data;
712
713 /* we verify if the enable bit is set... */
714 if (!(data & 1))
715 break;
716
717 /* ...but clean it before doing the actual write */
718 vcpu->arch.time_offset = data & ~(PAGE_MASK | 1);
719
Glauber de Oliveira Costa18068522008-02-15 17:52:47 -0200720 vcpu->arch.time_page =
721 gfn_to_page(vcpu->kvm, data >> PAGE_SHIFT);
Glauber de Oliveira Costa18068522008-02-15 17:52:47 -0200722
723 if (is_error_page(vcpu->arch.time_page)) {
724 kvm_release_page_clean(vcpu->arch.time_page);
725 vcpu->arch.time_page = NULL;
726 }
727
728 kvm_write_guest_time(vcpu);
729 break;
730 }
Carsten Otte15c4a642007-10-30 18:44:17 +0100731 default:
Avi Kivity565f1fb2007-12-19 12:02:40 +0200732 pr_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n", msr, data);
Carsten Otte15c4a642007-10-30 18:44:17 +0100733 return 1;
734 }
735 return 0;
736}
737EXPORT_SYMBOL_GPL(kvm_set_msr_common);
738
739
740/*
741 * Reads an msr value (of 'msr_index') into 'pdata'.
742 * Returns 0 on success, non-0 otherwise.
743 * Assumes vcpu_load() was already called.
744 */
745int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
746{
747 return kvm_x86_ops->get_msr(vcpu, msr_index, pdata);
748}
749
Avi Kivity9ba075a2008-05-26 20:06:35 +0300750static int get_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
751{
752 if (!msr_mtrr_valid(msr))
753 return 1;
754
755 *pdata = vcpu->arch.mtrr[msr - 0x200];
756 return 0;
757}
758
Carsten Otte15c4a642007-10-30 18:44:17 +0100759int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
760{
761 u64 data;
762
763 switch (msr) {
764 case 0xc0010010: /* SYSCFG */
765 case 0xc0010015: /* HWCR */
766 case MSR_IA32_PLATFORM_ID:
767 case MSR_IA32_P5_MC_ADDR:
768 case MSR_IA32_P5_MC_TYPE:
769 case MSR_IA32_MC0_CTL:
770 case MSR_IA32_MCG_STATUS:
771 case MSR_IA32_MCG_CAP:
Joerg Roedelc7ac6792008-02-11 20:28:27 +0100772 case MSR_IA32_MCG_CTL:
Carsten Otte15c4a642007-10-30 18:44:17 +0100773 case MSR_IA32_MC0_MISC:
774 case MSR_IA32_MC0_MISC+4:
775 case MSR_IA32_MC0_MISC+8:
776 case MSR_IA32_MC0_MISC+12:
777 case MSR_IA32_MC0_MISC+16:
Joerg Roedela89c1ad2008-08-29 11:52:07 +0200778 case MSR_IA32_MC0_MISC+20:
Carsten Otte15c4a642007-10-30 18:44:17 +0100779 case MSR_IA32_UCODE_REV:
Carsten Otte15c4a642007-10-30 18:44:17 +0100780 case MSR_IA32_EBL_CR_POWERON:
Alexander Grafb5e2fec2008-07-22 08:00:45 +0200781 case MSR_IA32_DEBUGCTLMSR:
782 case MSR_IA32_LASTBRANCHFROMIP:
783 case MSR_IA32_LASTBRANCHTOIP:
784 case MSR_IA32_LASTINTFROMIP:
785 case MSR_IA32_LASTINTTOIP:
Carsten Otte15c4a642007-10-30 18:44:17 +0100786 data = 0;
787 break;
Avi Kivity9ba075a2008-05-26 20:06:35 +0300788 case MSR_MTRRcap:
789 data = 0x500 | KVM_NR_VAR_MTRR;
790 break;
791 case 0x200 ... 0x2ff:
792 return get_msr_mtrr(vcpu, msr, pdata);
Carsten Otte15c4a642007-10-30 18:44:17 +0100793 case 0xcd: /* fsb frequency */
794 data = 3;
795 break;
796 case MSR_IA32_APICBASE:
797 data = kvm_get_apic_base(vcpu);
798 break;
799 case MSR_IA32_MISC_ENABLE:
Zhang Xiantaoad312c72007-12-13 23:50:52 +0800800 data = vcpu->arch.ia32_misc_enable_msr;
Carsten Otte15c4a642007-10-30 18:44:17 +0100801 break;
Alexander Graf847f0ad2008-02-21 12:11:01 +0100802 case MSR_IA32_PERF_STATUS:
803 /* TSC increment by tick */
804 data = 1000ULL;
805 /* CPU multiplier */
806 data |= (((uint64_t)4ULL) << 40);
807 break;
Carsten Otte15c4a642007-10-30 18:44:17 +0100808 case MSR_EFER:
Zhang Xiantaoad312c72007-12-13 23:50:52 +0800809 data = vcpu->arch.shadow_efer;
Carsten Otte15c4a642007-10-30 18:44:17 +0100810 break;
Glauber de Oliveira Costa18068522008-02-15 17:52:47 -0200811 case MSR_KVM_WALL_CLOCK:
812 data = vcpu->kvm->arch.wall_clock;
813 break;
814 case MSR_KVM_SYSTEM_TIME:
815 data = vcpu->arch.time;
816 break;
Carsten Otte15c4a642007-10-30 18:44:17 +0100817 default:
818 pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr);
819 return 1;
820 }
821 *pdata = data;
822 return 0;
823}
824EXPORT_SYMBOL_GPL(kvm_get_msr_common);
825
Carsten Otte313a3dc2007-10-11 19:16:52 +0200826/*
827 * Read or write a bunch of msrs. All parameters are kernel addresses.
828 *
829 * @return number of msrs set successfully.
830 */
831static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs,
832 struct kvm_msr_entry *entries,
833 int (*do_msr)(struct kvm_vcpu *vcpu,
834 unsigned index, u64 *data))
835{
836 int i;
837
838 vcpu_load(vcpu);
839
Marcelo Tosatti3200f402008-03-29 20:17:59 -0300840 down_read(&vcpu->kvm->slots_lock);
Carsten Otte313a3dc2007-10-11 19:16:52 +0200841 for (i = 0; i < msrs->nmsrs; ++i)
842 if (do_msr(vcpu, entries[i].index, &entries[i].data))
843 break;
Marcelo Tosatti3200f402008-03-29 20:17:59 -0300844 up_read(&vcpu->kvm->slots_lock);
Carsten Otte313a3dc2007-10-11 19:16:52 +0200845
846 vcpu_put(vcpu);
847
848 return i;
849}
850
851/*
852 * Read or write a bunch of msrs. Parameters are user addresses.
853 *
854 * @return number of msrs set successfully.
855 */
856static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs,
857 int (*do_msr)(struct kvm_vcpu *vcpu,
858 unsigned index, u64 *data),
859 int writeback)
860{
861 struct kvm_msrs msrs;
862 struct kvm_msr_entry *entries;
863 int r, n;
864 unsigned size;
865
866 r = -EFAULT;
867 if (copy_from_user(&msrs, user_msrs, sizeof msrs))
868 goto out;
869
870 r = -E2BIG;
871 if (msrs.nmsrs >= MAX_IO_MSRS)
872 goto out;
873
874 r = -ENOMEM;
875 size = sizeof(struct kvm_msr_entry) * msrs.nmsrs;
876 entries = vmalloc(size);
877 if (!entries)
878 goto out;
879
880 r = -EFAULT;
881 if (copy_from_user(entries, user_msrs->entries, size))
882 goto out_free;
883
884 r = n = __msr_io(vcpu, &msrs, entries, do_msr);
885 if (r < 0)
886 goto out_free;
887
888 r = -EFAULT;
889 if (writeback && copy_to_user(user_msrs->entries, entries, size))
890 goto out_free;
891
892 r = n;
893
894out_free:
895 vfree(entries);
896out:
897 return r;
898}
899
Zhang Xiantao018d00d2007-11-15 23:07:47 +0800900int kvm_dev_ioctl_check_extension(long ext)
901{
902 int r;
903
904 switch (ext) {
905 case KVM_CAP_IRQCHIP:
906 case KVM_CAP_HLT:
907 case KVM_CAP_MMU_SHADOW_CACHE_CONTROL:
908 case KVM_CAP_USER_MEMORY:
909 case KVM_CAP_SET_TSS_ADDR:
Dan Kenigsberg07716712007-11-21 17:10:04 +0200910 case KVM_CAP_EXT_CPUID:
Glauber de Oliveira Costa18068522008-02-15 17:52:47 -0200911 case KVM_CAP_CLOCKSOURCE:
Sheng Yang78376992008-01-28 05:10:22 +0800912 case KVM_CAP_PIT:
Marcelo Tosattia28e4f52008-02-22 12:21:36 -0500913 case KVM_CAP_NOP_IO_DELAY:
Marcelo Tosatti62d9f0d2008-04-11 13:24:45 -0300914 case KVM_CAP_MP_STATE:
Avi Kivityed848622008-07-29 11:30:57 +0300915 case KVM_CAP_SYNC_MMU:
Zhang Xiantao018d00d2007-11-15 23:07:47 +0800916 r = 1;
917 break;
Laurent Vivier542472b2008-05-30 16:05:55 +0200918 case KVM_CAP_COALESCED_MMIO:
919 r = KVM_COALESCED_MMIO_PAGE_OFFSET;
920 break;
Avi Kivity774ead32007-12-26 13:57:04 +0200921 case KVM_CAP_VAPIC:
922 r = !kvm_x86_ops->cpu_has_accelerated_tpr();
923 break;
Avi Kivityf7252302008-02-20 11:53:16 +0200924 case KVM_CAP_NR_VCPUS:
925 r = KVM_MAX_VCPUS;
926 break;
Avi Kivitya988b912008-02-20 11:59:20 +0200927 case KVM_CAP_NR_MEMSLOTS:
928 r = KVM_MEMORY_SLOTS;
929 break;
Marcelo Tosatti2f333bc2008-02-22 12:21:37 -0500930 case KVM_CAP_PV_MMU:
931 r = !tdp_enabled;
932 break;
Ben-Ami Yassour62c476c2008-09-14 03:48:28 +0300933 case KVM_CAP_IOMMU:
934 r = intel_iommu_found();
935 break;
Zhang Xiantao018d00d2007-11-15 23:07:47 +0800936 default:
937 r = 0;
938 break;
939 }
940 return r;
941
942}
943
Carsten Otte043405e2007-10-10 17:16:19 +0200944long kvm_arch_dev_ioctl(struct file *filp,
945 unsigned int ioctl, unsigned long arg)
946{
947 void __user *argp = (void __user *)arg;
948 long r;
949
950 switch (ioctl) {
951 case KVM_GET_MSR_INDEX_LIST: {
952 struct kvm_msr_list __user *user_msr_list = argp;
953 struct kvm_msr_list msr_list;
954 unsigned n;
955
956 r = -EFAULT;
957 if (copy_from_user(&msr_list, user_msr_list, sizeof msr_list))
958 goto out;
959 n = msr_list.nmsrs;
960 msr_list.nmsrs = num_msrs_to_save + ARRAY_SIZE(emulated_msrs);
961 if (copy_to_user(user_msr_list, &msr_list, sizeof msr_list))
962 goto out;
963 r = -E2BIG;
964 if (n < num_msrs_to_save)
965 goto out;
966 r = -EFAULT;
967 if (copy_to_user(user_msr_list->indices, &msrs_to_save,
968 num_msrs_to_save * sizeof(u32)))
969 goto out;
970 if (copy_to_user(user_msr_list->indices
971 + num_msrs_to_save * sizeof(u32),
972 &emulated_msrs,
973 ARRAY_SIZE(emulated_msrs) * sizeof(u32)))
974 goto out;
975 r = 0;
976 break;
977 }
Avi Kivity674eea02008-02-11 18:37:23 +0200978 case KVM_GET_SUPPORTED_CPUID: {
979 struct kvm_cpuid2 __user *cpuid_arg = argp;
980 struct kvm_cpuid2 cpuid;
981
982 r = -EFAULT;
983 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
984 goto out;
985 r = kvm_dev_ioctl_get_supported_cpuid(&cpuid,
986 cpuid_arg->entries);
987 if (r)
988 goto out;
989
990 r = -EFAULT;
991 if (copy_to_user(cpuid_arg, &cpuid, sizeof cpuid))
992 goto out;
993 r = 0;
994 break;
995 }
Carsten Otte043405e2007-10-10 17:16:19 +0200996 default:
997 r = -EINVAL;
998 }
999out:
1000 return r;
1001}
1002
Carsten Otte313a3dc2007-10-11 19:16:52 +02001003void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1004{
1005 kvm_x86_ops->vcpu_load(vcpu, cpu);
Glauber de Oliveira Costa18068522008-02-15 17:52:47 -02001006 kvm_write_guest_time(vcpu);
Carsten Otte313a3dc2007-10-11 19:16:52 +02001007}
1008
1009void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1010{
1011 kvm_x86_ops->vcpu_put(vcpu);
Amit Shah9327fd12007-11-15 18:38:46 +02001012 kvm_put_guest_fpu(vcpu);
Carsten Otte313a3dc2007-10-11 19:16:52 +02001013}
1014
Dan Kenigsberg07716712007-11-21 17:10:04 +02001015static int is_efer_nx(void)
Carsten Otte313a3dc2007-10-11 19:16:52 +02001016{
1017 u64 efer;
Carsten Otte313a3dc2007-10-11 19:16:52 +02001018
1019 rdmsrl(MSR_EFER, efer);
Dan Kenigsberg07716712007-11-21 17:10:04 +02001020 return efer & EFER_NX;
1021}
1022
1023static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu)
1024{
1025 int i;
1026 struct kvm_cpuid_entry2 *e, *entry;
1027
Carsten Otte313a3dc2007-10-11 19:16:52 +02001028 entry = NULL;
Zhang Xiantaoad312c72007-12-13 23:50:52 +08001029 for (i = 0; i < vcpu->arch.cpuid_nent; ++i) {
1030 e = &vcpu->arch.cpuid_entries[i];
Carsten Otte313a3dc2007-10-11 19:16:52 +02001031 if (e->function == 0x80000001) {
1032 entry = e;
1033 break;
1034 }
1035 }
Dan Kenigsberg07716712007-11-21 17:10:04 +02001036 if (entry && (entry->edx & (1 << 20)) && !is_efer_nx()) {
Carsten Otte313a3dc2007-10-11 19:16:52 +02001037 entry->edx &= ~(1 << 20);
1038 printk(KERN_INFO "kvm: guest NX capability removed\n");
1039 }
1040}
1041
Dan Kenigsberg07716712007-11-21 17:10:04 +02001042/* when an old userspace process fills a new kernel module */
Carsten Otte313a3dc2007-10-11 19:16:52 +02001043static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
1044 struct kvm_cpuid *cpuid,
1045 struct kvm_cpuid_entry __user *entries)
1046{
Dan Kenigsberg07716712007-11-21 17:10:04 +02001047 int r, i;
1048 struct kvm_cpuid_entry *cpuid_entries;
1049
1050 r = -E2BIG;
1051 if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
1052 goto out;
1053 r = -ENOMEM;
1054 cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry) * cpuid->nent);
1055 if (!cpuid_entries)
1056 goto out;
1057 r = -EFAULT;
1058 if (copy_from_user(cpuid_entries, entries,
1059 cpuid->nent * sizeof(struct kvm_cpuid_entry)))
1060 goto out_free;
1061 for (i = 0; i < cpuid->nent; i++) {
Zhang Xiantaoad312c72007-12-13 23:50:52 +08001062 vcpu->arch.cpuid_entries[i].function = cpuid_entries[i].function;
1063 vcpu->arch.cpuid_entries[i].eax = cpuid_entries[i].eax;
1064 vcpu->arch.cpuid_entries[i].ebx = cpuid_entries[i].ebx;
1065 vcpu->arch.cpuid_entries[i].ecx = cpuid_entries[i].ecx;
1066 vcpu->arch.cpuid_entries[i].edx = cpuid_entries[i].edx;
1067 vcpu->arch.cpuid_entries[i].index = 0;
1068 vcpu->arch.cpuid_entries[i].flags = 0;
1069 vcpu->arch.cpuid_entries[i].padding[0] = 0;
1070 vcpu->arch.cpuid_entries[i].padding[1] = 0;
1071 vcpu->arch.cpuid_entries[i].padding[2] = 0;
Dan Kenigsberg07716712007-11-21 17:10:04 +02001072 }
Zhang Xiantaoad312c72007-12-13 23:50:52 +08001073 vcpu->arch.cpuid_nent = cpuid->nent;
Dan Kenigsberg07716712007-11-21 17:10:04 +02001074 cpuid_fix_nx_cap(vcpu);
1075 r = 0;
1076
1077out_free:
1078 vfree(cpuid_entries);
1079out:
1080 return r;
1081}
1082
1083static int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu,
1084 struct kvm_cpuid2 *cpuid,
1085 struct kvm_cpuid_entry2 __user *entries)
1086{
Carsten Otte313a3dc2007-10-11 19:16:52 +02001087 int r;
1088
1089 r = -E2BIG;
1090 if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
1091 goto out;
1092 r = -EFAULT;
Zhang Xiantaoad312c72007-12-13 23:50:52 +08001093 if (copy_from_user(&vcpu->arch.cpuid_entries, entries,
Dan Kenigsberg07716712007-11-21 17:10:04 +02001094 cpuid->nent * sizeof(struct kvm_cpuid_entry2)))
Carsten Otte313a3dc2007-10-11 19:16:52 +02001095 goto out;
Zhang Xiantaoad312c72007-12-13 23:50:52 +08001096 vcpu->arch.cpuid_nent = cpuid->nent;
Carsten Otte313a3dc2007-10-11 19:16:52 +02001097 return 0;
1098
1099out:
1100 return r;
1101}
1102
Dan Kenigsberg07716712007-11-21 17:10:04 +02001103static int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu,
1104 struct kvm_cpuid2 *cpuid,
1105 struct kvm_cpuid_entry2 __user *entries)
1106{
1107 int r;
1108
1109 r = -E2BIG;
Zhang Xiantaoad312c72007-12-13 23:50:52 +08001110 if (cpuid->nent < vcpu->arch.cpuid_nent)
Dan Kenigsberg07716712007-11-21 17:10:04 +02001111 goto out;
1112 r = -EFAULT;
Zhang Xiantaoad312c72007-12-13 23:50:52 +08001113 if (copy_to_user(entries, &vcpu->arch.cpuid_entries,
1114 vcpu->arch.cpuid_nent * sizeof(struct kvm_cpuid_entry2)))
Dan Kenigsberg07716712007-11-21 17:10:04 +02001115 goto out;
1116 return 0;
1117
1118out:
Zhang Xiantaoad312c72007-12-13 23:50:52 +08001119 cpuid->nent = vcpu->arch.cpuid_nent;
Dan Kenigsberg07716712007-11-21 17:10:04 +02001120 return r;
1121}
1122
1123static inline u32 bit(int bitno)
1124{
1125 return 1 << (bitno & 31);
1126}
1127
1128static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function,
1129 u32 index)
1130{
1131 entry->function = function;
1132 entry->index = index;
1133 cpuid_count(entry->function, entry->index,
1134 &entry->eax, &entry->ebx, &entry->ecx, &entry->edx);
1135 entry->flags = 0;
1136}
1137
1138static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
1139 u32 index, int *nent, int maxnent)
1140{
1141 const u32 kvm_supported_word0_x86_features = bit(X86_FEATURE_FPU) |
1142 bit(X86_FEATURE_VME) | bit(X86_FEATURE_DE) |
1143 bit(X86_FEATURE_PSE) | bit(X86_FEATURE_TSC) |
1144 bit(X86_FEATURE_MSR) | bit(X86_FEATURE_PAE) |
1145 bit(X86_FEATURE_CX8) | bit(X86_FEATURE_APIC) |
1146 bit(X86_FEATURE_SEP) | bit(X86_FEATURE_PGE) |
1147 bit(X86_FEATURE_CMOV) | bit(X86_FEATURE_PSE36) |
1148 bit(X86_FEATURE_CLFLSH) | bit(X86_FEATURE_MMX) |
1149 bit(X86_FEATURE_FXSR) | bit(X86_FEATURE_XMM) |
1150 bit(X86_FEATURE_XMM2) | bit(X86_FEATURE_SELFSNOOP);
1151 const u32 kvm_supported_word1_x86_features = bit(X86_FEATURE_FPU) |
1152 bit(X86_FEATURE_VME) | bit(X86_FEATURE_DE) |
1153 bit(X86_FEATURE_PSE) | bit(X86_FEATURE_TSC) |
1154 bit(X86_FEATURE_MSR) | bit(X86_FEATURE_PAE) |
1155 bit(X86_FEATURE_CX8) | bit(X86_FEATURE_APIC) |
1156 bit(X86_FEATURE_PGE) |
1157 bit(X86_FEATURE_CMOV) | bit(X86_FEATURE_PSE36) |
1158 bit(X86_FEATURE_MMX) | bit(X86_FEATURE_FXSR) |
1159 bit(X86_FEATURE_SYSCALL) |
1160 (bit(X86_FEATURE_NX) && is_efer_nx()) |
1161#ifdef CONFIG_X86_64
1162 bit(X86_FEATURE_LM) |
1163#endif
1164 bit(X86_FEATURE_MMXEXT) |
1165 bit(X86_FEATURE_3DNOWEXT) |
1166 bit(X86_FEATURE_3DNOW);
1167 const u32 kvm_supported_word3_x86_features =
1168 bit(X86_FEATURE_XMM3) | bit(X86_FEATURE_CX16);
1169 const u32 kvm_supported_word6_x86_features =
1170 bit(X86_FEATURE_LAHF_LM) | bit(X86_FEATURE_CMP_LEGACY);
1171
1172 /* all func 2 cpuid_count() should be called on the same cpu */
1173 get_cpu();
1174 do_cpuid_1_ent(entry, function, index);
1175 ++*nent;
1176
1177 switch (function) {
1178 case 0:
1179 entry->eax = min(entry->eax, (u32)0xb);
1180 break;
1181 case 1:
1182 entry->edx &= kvm_supported_word0_x86_features;
1183 entry->ecx &= kvm_supported_word3_x86_features;
1184 break;
1185 /* function 2 entries are STATEFUL. That is, repeated cpuid commands
1186 * may return different values. This forces us to get_cpu() before
1187 * issuing the first command, and also to emulate this annoying behavior
1188 * in kvm_emulate_cpuid() using KVM_CPUID_FLAG_STATE_READ_NEXT */
1189 case 2: {
1190 int t, times = entry->eax & 0xff;
1191
1192 entry->flags |= KVM_CPUID_FLAG_STATEFUL_FUNC;
1193 for (t = 1; t < times && *nent < maxnent; ++t) {
1194 do_cpuid_1_ent(&entry[t], function, 0);
1195 entry[t].flags |= KVM_CPUID_FLAG_STATEFUL_FUNC;
1196 ++*nent;
1197 }
1198 break;
1199 }
1200 /* function 4 and 0xb have additional index. */
1201 case 4: {
Harvey Harrison14af3f32008-02-19 10:25:50 -08001202 int i, cache_type;
Dan Kenigsberg07716712007-11-21 17:10:04 +02001203
1204 entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
1205 /* read more entries until cache_type is zero */
Harvey Harrison14af3f32008-02-19 10:25:50 -08001206 for (i = 1; *nent < maxnent; ++i) {
1207 cache_type = entry[i - 1].eax & 0x1f;
Dan Kenigsberg07716712007-11-21 17:10:04 +02001208 if (!cache_type)
1209 break;
Harvey Harrison14af3f32008-02-19 10:25:50 -08001210 do_cpuid_1_ent(&entry[i], function, i);
1211 entry[i].flags |=
Dan Kenigsberg07716712007-11-21 17:10:04 +02001212 KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
1213 ++*nent;
1214 }
1215 break;
1216 }
1217 case 0xb: {
Harvey Harrison14af3f32008-02-19 10:25:50 -08001218 int i, level_type;
Dan Kenigsberg07716712007-11-21 17:10:04 +02001219
1220 entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
1221 /* read more entries until level_type is zero */
Harvey Harrison14af3f32008-02-19 10:25:50 -08001222 for (i = 1; *nent < maxnent; ++i) {
1223 level_type = entry[i - 1].ecx & 0xff;
Dan Kenigsberg07716712007-11-21 17:10:04 +02001224 if (!level_type)
1225 break;
Harvey Harrison14af3f32008-02-19 10:25:50 -08001226 do_cpuid_1_ent(&entry[i], function, i);
1227 entry[i].flags |=
Dan Kenigsberg07716712007-11-21 17:10:04 +02001228 KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
1229 ++*nent;
1230 }
1231 break;
1232 }
1233 case 0x80000000:
1234 entry->eax = min(entry->eax, 0x8000001a);
1235 break;
1236 case 0x80000001:
1237 entry->edx &= kvm_supported_word1_x86_features;
1238 entry->ecx &= kvm_supported_word6_x86_features;
1239 break;
1240 }
1241 put_cpu();
1242}
1243
Avi Kivity674eea02008-02-11 18:37:23 +02001244static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
Dan Kenigsberg07716712007-11-21 17:10:04 +02001245 struct kvm_cpuid_entry2 __user *entries)
1246{
1247 struct kvm_cpuid_entry2 *cpuid_entries;
1248 int limit, nent = 0, r = -E2BIG;
1249 u32 func;
1250
1251 if (cpuid->nent < 1)
1252 goto out;
1253 r = -ENOMEM;
1254 cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry2) * cpuid->nent);
1255 if (!cpuid_entries)
1256 goto out;
1257
1258 do_cpuid_ent(&cpuid_entries[0], 0, 0, &nent, cpuid->nent);
1259 limit = cpuid_entries[0].eax;
1260 for (func = 1; func <= limit && nent < cpuid->nent; ++func)
1261 do_cpuid_ent(&cpuid_entries[nent], func, 0,
1262 &nent, cpuid->nent);
1263 r = -E2BIG;
1264 if (nent >= cpuid->nent)
1265 goto out_free;
1266
1267 do_cpuid_ent(&cpuid_entries[nent], 0x80000000, 0, &nent, cpuid->nent);
1268 limit = cpuid_entries[nent - 1].eax;
1269 for (func = 0x80000001; func <= limit && nent < cpuid->nent; ++func)
1270 do_cpuid_ent(&cpuid_entries[nent], func, 0,
1271 &nent, cpuid->nent);
1272 r = -EFAULT;
1273 if (copy_to_user(entries, cpuid_entries,
1274 nent * sizeof(struct kvm_cpuid_entry2)))
1275 goto out_free;
1276 cpuid->nent = nent;
1277 r = 0;
1278
1279out_free:
1280 vfree(cpuid_entries);
1281out:
1282 return r;
1283}
1284
Carsten Otte313a3dc2007-10-11 19:16:52 +02001285static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
1286 struct kvm_lapic_state *s)
1287{
1288 vcpu_load(vcpu);
Zhang Xiantaoad312c72007-12-13 23:50:52 +08001289 memcpy(s->regs, vcpu->arch.apic->regs, sizeof *s);
Carsten Otte313a3dc2007-10-11 19:16:52 +02001290 vcpu_put(vcpu);
1291
1292 return 0;
1293}
1294
1295static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu,
1296 struct kvm_lapic_state *s)
1297{
1298 vcpu_load(vcpu);
Zhang Xiantaoad312c72007-12-13 23:50:52 +08001299 memcpy(vcpu->arch.apic->regs, s->regs, sizeof *s);
Carsten Otte313a3dc2007-10-11 19:16:52 +02001300 kvm_apic_post_state_restore(vcpu);
1301 vcpu_put(vcpu);
1302
1303 return 0;
1304}
1305
Zhang Xiantaof77bc6a2007-11-21 04:36:41 +08001306static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
1307 struct kvm_interrupt *irq)
1308{
1309 if (irq->irq < 0 || irq->irq >= 256)
1310 return -EINVAL;
1311 if (irqchip_in_kernel(vcpu->kvm))
1312 return -ENXIO;
1313 vcpu_load(vcpu);
1314
Zhang Xiantaoad312c72007-12-13 23:50:52 +08001315 set_bit(irq->irq, vcpu->arch.irq_pending);
1316 set_bit(irq->irq / BITS_PER_LONG, &vcpu->arch.irq_summary);
Zhang Xiantaof77bc6a2007-11-21 04:36:41 +08001317
1318 vcpu_put(vcpu);
1319
1320 return 0;
1321}
1322
Jan Kiszkac4abb7c2008-09-26 09:30:55 +02001323static int kvm_vcpu_ioctl_nmi(struct kvm_vcpu *vcpu)
1324{
1325 vcpu_load(vcpu);
1326 kvm_inject_nmi(vcpu);
1327 vcpu_put(vcpu);
1328
1329 return 0;
1330}
1331
Avi Kivityb209749f2007-10-22 16:50:39 +02001332static int vcpu_ioctl_tpr_access_reporting(struct kvm_vcpu *vcpu,
1333 struct kvm_tpr_access_ctl *tac)
1334{
1335 if (tac->flags)
1336 return -EINVAL;
1337 vcpu->arch.tpr_access_reporting = !!tac->enabled;
1338 return 0;
1339}
1340
Carsten Otte313a3dc2007-10-11 19:16:52 +02001341long kvm_arch_vcpu_ioctl(struct file *filp,
1342 unsigned int ioctl, unsigned long arg)
1343{
1344 struct kvm_vcpu *vcpu = filp->private_data;
1345 void __user *argp = (void __user *)arg;
1346 int r;
Dave Hansenb772ff32008-08-11 10:01:47 -07001347 struct kvm_lapic_state *lapic = NULL;
Carsten Otte313a3dc2007-10-11 19:16:52 +02001348
1349 switch (ioctl) {
1350 case KVM_GET_LAPIC: {
Dave Hansenb772ff32008-08-11 10:01:47 -07001351 lapic = kzalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL);
Carsten Otte313a3dc2007-10-11 19:16:52 +02001352
Dave Hansenb772ff32008-08-11 10:01:47 -07001353 r = -ENOMEM;
1354 if (!lapic)
1355 goto out;
1356 r = kvm_vcpu_ioctl_get_lapic(vcpu, lapic);
Carsten Otte313a3dc2007-10-11 19:16:52 +02001357 if (r)
1358 goto out;
1359 r = -EFAULT;
Dave Hansenb772ff32008-08-11 10:01:47 -07001360 if (copy_to_user(argp, lapic, sizeof(struct kvm_lapic_state)))
Carsten Otte313a3dc2007-10-11 19:16:52 +02001361 goto out;
1362 r = 0;
1363 break;
1364 }
1365 case KVM_SET_LAPIC: {
Dave Hansenb772ff32008-08-11 10:01:47 -07001366 lapic = kmalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL);
1367 r = -ENOMEM;
1368 if (!lapic)
Carsten Otte313a3dc2007-10-11 19:16:52 +02001369 goto out;
Dave Hansenb772ff32008-08-11 10:01:47 -07001370 r = -EFAULT;
1371 if (copy_from_user(lapic, argp, sizeof(struct kvm_lapic_state)))
1372 goto out;
1373 r = kvm_vcpu_ioctl_set_lapic(vcpu, lapic);
Carsten Otte313a3dc2007-10-11 19:16:52 +02001374 if (r)
1375 goto out;
1376 r = 0;
1377 break;
1378 }
Zhang Xiantaof77bc6a2007-11-21 04:36:41 +08001379 case KVM_INTERRUPT: {
1380 struct kvm_interrupt irq;
1381
1382 r = -EFAULT;
1383 if (copy_from_user(&irq, argp, sizeof irq))
1384 goto out;
1385 r = kvm_vcpu_ioctl_interrupt(vcpu, &irq);
1386 if (r)
1387 goto out;
1388 r = 0;
1389 break;
1390 }
Jan Kiszkac4abb7c2008-09-26 09:30:55 +02001391 case KVM_NMI: {
1392 r = kvm_vcpu_ioctl_nmi(vcpu);
1393 if (r)
1394 goto out;
1395 r = 0;
1396 break;
1397 }
Carsten Otte313a3dc2007-10-11 19:16:52 +02001398 case KVM_SET_CPUID: {
1399 struct kvm_cpuid __user *cpuid_arg = argp;
1400 struct kvm_cpuid cpuid;
1401
1402 r = -EFAULT;
1403 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
1404 goto out;
1405 r = kvm_vcpu_ioctl_set_cpuid(vcpu, &cpuid, cpuid_arg->entries);
1406 if (r)
1407 goto out;
1408 break;
1409 }
Dan Kenigsberg07716712007-11-21 17:10:04 +02001410 case KVM_SET_CPUID2: {
1411 struct kvm_cpuid2 __user *cpuid_arg = argp;
1412 struct kvm_cpuid2 cpuid;
1413
1414 r = -EFAULT;
1415 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
1416 goto out;
1417 r = kvm_vcpu_ioctl_set_cpuid2(vcpu, &cpuid,
1418 cpuid_arg->entries);
1419 if (r)
1420 goto out;
1421 break;
1422 }
1423 case KVM_GET_CPUID2: {
1424 struct kvm_cpuid2 __user *cpuid_arg = argp;
1425 struct kvm_cpuid2 cpuid;
1426
1427 r = -EFAULT;
1428 if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
1429 goto out;
1430 r = kvm_vcpu_ioctl_get_cpuid2(vcpu, &cpuid,
1431 cpuid_arg->entries);
1432 if (r)
1433 goto out;
1434 r = -EFAULT;
1435 if (copy_to_user(cpuid_arg, &cpuid, sizeof cpuid))
1436 goto out;
1437 r = 0;
1438 break;
1439 }
Carsten Otte313a3dc2007-10-11 19:16:52 +02001440 case KVM_GET_MSRS:
1441 r = msr_io(vcpu, argp, kvm_get_msr, 1);
1442 break;
1443 case KVM_SET_MSRS:
1444 r = msr_io(vcpu, argp, do_set_msr, 0);
1445 break;
Avi Kivityb209749f2007-10-22 16:50:39 +02001446 case KVM_TPR_ACCESS_REPORTING: {
1447 struct kvm_tpr_access_ctl tac;
1448
1449 r = -EFAULT;
1450 if (copy_from_user(&tac, argp, sizeof tac))
1451 goto out;
1452 r = vcpu_ioctl_tpr_access_reporting(vcpu, &tac);
1453 if (r)
1454 goto out;
1455 r = -EFAULT;
1456 if (copy_to_user(argp, &tac, sizeof tac))
1457 goto out;
1458 r = 0;
1459 break;
1460 };
Avi Kivityb93463a2007-10-25 16:52:32 +02001461 case KVM_SET_VAPIC_ADDR: {
1462 struct kvm_vapic_addr va;
1463
1464 r = -EINVAL;
1465 if (!irqchip_in_kernel(vcpu->kvm))
1466 goto out;
1467 r = -EFAULT;
1468 if (copy_from_user(&va, argp, sizeof va))
1469 goto out;
1470 r = 0;
1471 kvm_lapic_set_vapic_addr(vcpu, va.vapic_addr);
1472 break;
1473 }
Carsten Otte313a3dc2007-10-11 19:16:52 +02001474 default:
1475 r = -EINVAL;
1476 }
1477out:
Dave Hansenb772ff32008-08-11 10:01:47 -07001478 if (lapic)
1479 kfree(lapic);
Carsten Otte313a3dc2007-10-11 19:16:52 +02001480 return r;
1481}
1482
Carsten Otte1fe779f2007-10-29 16:08:35 +01001483static int kvm_vm_ioctl_set_tss_addr(struct kvm *kvm, unsigned long addr)
1484{
1485 int ret;
1486
1487 if (addr > (unsigned int)(-3 * PAGE_SIZE))
1488 return -1;
1489 ret = kvm_x86_ops->set_tss_addr(kvm, addr);
1490 return ret;
1491}
1492
1493static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
1494 u32 kvm_nr_mmu_pages)
1495{
1496 if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES)
1497 return -EINVAL;
1498
Izik Eidus72dc67a2008-02-10 18:04:15 +02001499 down_write(&kvm->slots_lock);
Carsten Otte1fe779f2007-10-29 16:08:35 +01001500
1501 kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages);
Zhang Xiantaof05e70a2007-12-14 10:01:48 +08001502 kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages;
Carsten Otte1fe779f2007-10-29 16:08:35 +01001503
Izik Eidus72dc67a2008-02-10 18:04:15 +02001504 up_write(&kvm->slots_lock);
Carsten Otte1fe779f2007-10-29 16:08:35 +01001505 return 0;
1506}
1507
1508static int kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm)
1509{
Zhang Xiantaof05e70a2007-12-14 10:01:48 +08001510 return kvm->arch.n_alloc_mmu_pages;
Carsten Otte1fe779f2007-10-29 16:08:35 +01001511}
1512
Zhang Xiantaoe9f85cd2007-11-22 11:20:33 +08001513gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
1514{
1515 int i;
1516 struct kvm_mem_alias *alias;
1517
Zhang Xiantaod69fb812007-12-14 09:54:20 +08001518 for (i = 0; i < kvm->arch.naliases; ++i) {
1519 alias = &kvm->arch.aliases[i];
Zhang Xiantaoe9f85cd2007-11-22 11:20:33 +08001520 if (gfn >= alias->base_gfn
1521 && gfn < alias->base_gfn + alias->npages)
1522 return alias->target_gfn + gfn - alias->base_gfn;
1523 }
1524 return gfn;
1525}
1526
Carsten Otte1fe779f2007-10-29 16:08:35 +01001527/*
1528 * Set a new alias region. Aliases map a portion of physical memory into
1529 * another portion. This is useful for memory windows, for example the PC
1530 * VGA region.
1531 */
1532static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm,
1533 struct kvm_memory_alias *alias)
1534{
1535 int r, n;
1536 struct kvm_mem_alias *p;
1537
1538 r = -EINVAL;
1539 /* General sanity checks */
1540 if (alias->memory_size & (PAGE_SIZE - 1))
1541 goto out;
1542 if (alias->guest_phys_addr & (PAGE_SIZE - 1))
1543 goto out;
1544 if (alias->slot >= KVM_ALIAS_SLOTS)
1545 goto out;
1546 if (alias->guest_phys_addr + alias->memory_size
1547 < alias->guest_phys_addr)
1548 goto out;
1549 if (alias->target_phys_addr + alias->memory_size
1550 < alias->target_phys_addr)
1551 goto out;
1552
Izik Eidus72dc67a2008-02-10 18:04:15 +02001553 down_write(&kvm->slots_lock);
Andrea Arcangelia1708ce2008-07-25 16:26:39 +02001554 spin_lock(&kvm->mmu_lock);
Carsten Otte1fe779f2007-10-29 16:08:35 +01001555
Zhang Xiantaod69fb812007-12-14 09:54:20 +08001556 p = &kvm->arch.aliases[alias->slot];
Carsten Otte1fe779f2007-10-29 16:08:35 +01001557 p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT;
1558 p->npages = alias->memory_size >> PAGE_SHIFT;
1559 p->target_gfn = alias->target_phys_addr >> PAGE_SHIFT;
1560
1561 for (n = KVM_ALIAS_SLOTS; n > 0; --n)
Zhang Xiantaod69fb812007-12-14 09:54:20 +08001562 if (kvm->arch.aliases[n - 1].npages)
Carsten Otte1fe779f2007-10-29 16:08:35 +01001563 break;
Zhang Xiantaod69fb812007-12-14 09:54:20 +08001564 kvm->arch.naliases = n;
Carsten Otte1fe779f2007-10-29 16:08:35 +01001565
Andrea Arcangelia1708ce2008-07-25 16:26:39 +02001566 spin_unlock(&kvm->mmu_lock);
Carsten Otte1fe779f2007-10-29 16:08:35 +01001567 kvm_mmu_zap_all(kvm);
1568
Izik Eidus72dc67a2008-02-10 18:04:15 +02001569 up_write(&kvm->slots_lock);
Carsten Otte1fe779f2007-10-29 16:08:35 +01001570
1571 return 0;
1572
1573out:
1574 return r;
1575}
1576
1577static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
1578{
1579 int r;
1580
1581 r = 0;
1582 switch (chip->chip_id) {
1583 case KVM_IRQCHIP_PIC_MASTER:
1584 memcpy(&chip->chip.pic,
1585 &pic_irqchip(kvm)->pics[0],
1586 sizeof(struct kvm_pic_state));
1587 break;
1588 case KVM_IRQCHIP_PIC_SLAVE:
1589 memcpy(&chip->chip.pic,
1590 &pic_irqchip(kvm)->pics[1],
1591 sizeof(struct kvm_pic_state));
1592 break;
1593 case KVM_IRQCHIP_IOAPIC:
1594 memcpy(&chip->chip.ioapic,
1595 ioapic_irqchip(kvm),
1596 sizeof(struct kvm_ioapic_state));
1597 break;
1598 default:
1599 r = -EINVAL;
1600 break;
1601 }
1602 return r;
1603}
1604
1605static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
1606{
1607 int r;
1608
1609 r = 0;
1610 switch (chip->chip_id) {
1611 case KVM_IRQCHIP_PIC_MASTER:
1612 memcpy(&pic_irqchip(kvm)->pics[0],
1613 &chip->chip.pic,
1614 sizeof(struct kvm_pic_state));
1615 break;
1616 case KVM_IRQCHIP_PIC_SLAVE:
1617 memcpy(&pic_irqchip(kvm)->pics[1],
1618 &chip->chip.pic,
1619 sizeof(struct kvm_pic_state));
1620 break;
1621 case KVM_IRQCHIP_IOAPIC:
1622 memcpy(ioapic_irqchip(kvm),
1623 &chip->chip.ioapic,
1624 sizeof(struct kvm_ioapic_state));
1625 break;
1626 default:
1627 r = -EINVAL;
1628 break;
1629 }
1630 kvm_pic_update_irq(pic_irqchip(kvm));
1631 return r;
1632}
1633
Sheng Yange0f63cb2008-03-04 00:50:59 +08001634static int kvm_vm_ioctl_get_pit(struct kvm *kvm, struct kvm_pit_state *ps)
1635{
1636 int r = 0;
1637
1638 memcpy(ps, &kvm->arch.vpit->pit_state, sizeof(struct kvm_pit_state));
1639 return r;
1640}
1641
1642static int kvm_vm_ioctl_set_pit(struct kvm *kvm, struct kvm_pit_state *ps)
1643{
1644 int r = 0;
1645
1646 memcpy(&kvm->arch.vpit->pit_state, ps, sizeof(struct kvm_pit_state));
1647 kvm_pit_load_count(kvm, 0, ps->channels[0].count);
1648 return r;
1649}
1650
Zhang Xiantao5bb064d2007-11-18 20:29:43 +08001651/*
1652 * Get (and clear) the dirty memory log for a memory slot.
1653 */
1654int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
1655 struct kvm_dirty_log *log)
1656{
1657 int r;
1658 int n;
1659 struct kvm_memory_slot *memslot;
1660 int is_dirty = 0;
1661
Izik Eidus72dc67a2008-02-10 18:04:15 +02001662 down_write(&kvm->slots_lock);
Zhang Xiantao5bb064d2007-11-18 20:29:43 +08001663
1664 r = kvm_get_dirty_log(kvm, log, &is_dirty);
1665 if (r)
1666 goto out;
1667
1668 /* If nothing is dirty, don't bother messing with page tables. */
1669 if (is_dirty) {
1670 kvm_mmu_slot_remove_write_access(kvm, log->slot);
1671 kvm_flush_remote_tlbs(kvm);
1672 memslot = &kvm->memslots[log->slot];
1673 n = ALIGN(memslot->npages, BITS_PER_LONG) / 8;
1674 memset(memslot->dirty_bitmap, 0, n);
1675 }
1676 r = 0;
1677out:
Izik Eidus72dc67a2008-02-10 18:04:15 +02001678 up_write(&kvm->slots_lock);
Zhang Xiantao5bb064d2007-11-18 20:29:43 +08001679 return r;
1680}
1681
Carsten Otte1fe779f2007-10-29 16:08:35 +01001682long kvm_arch_vm_ioctl(struct file *filp,
1683 unsigned int ioctl, unsigned long arg)
1684{
1685 struct kvm *kvm = filp->private_data;
1686 void __user *argp = (void __user *)arg;
1687 int r = -EINVAL;
Dave Hansenf0d66272008-08-11 10:01:45 -07001688 /*
1689 * This union makes it completely explicit to gcc-3.x
1690 * that these two variables' stack usage should be
1691 * combined, not added together.
1692 */
1693 union {
1694 struct kvm_pit_state ps;
1695 struct kvm_memory_alias alias;
1696 } u;
Carsten Otte1fe779f2007-10-29 16:08:35 +01001697
1698 switch (ioctl) {
1699 case KVM_SET_TSS_ADDR:
1700 r = kvm_vm_ioctl_set_tss_addr(kvm, arg);
1701 if (r < 0)
1702 goto out;
1703 break;
1704 case KVM_SET_MEMORY_REGION: {
1705 struct kvm_memory_region kvm_mem;
1706 struct kvm_userspace_memory_region kvm_userspace_mem;
1707
1708 r = -EFAULT;
1709 if (copy_from_user(&kvm_mem, argp, sizeof kvm_mem))
1710 goto out;
1711 kvm_userspace_mem.slot = kvm_mem.slot;
1712 kvm_userspace_mem.flags = kvm_mem.flags;
1713 kvm_userspace_mem.guest_phys_addr = kvm_mem.guest_phys_addr;
1714 kvm_userspace_mem.memory_size = kvm_mem.memory_size;
1715 r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem, 0);
1716 if (r)
1717 goto out;
1718 break;
1719 }
1720 case KVM_SET_NR_MMU_PAGES:
1721 r = kvm_vm_ioctl_set_nr_mmu_pages(kvm, arg);
1722 if (r)
1723 goto out;
1724 break;
1725 case KVM_GET_NR_MMU_PAGES:
1726 r = kvm_vm_ioctl_get_nr_mmu_pages(kvm);
1727 break;
Dave Hansenf0d66272008-08-11 10:01:45 -07001728 case KVM_SET_MEMORY_ALIAS:
Carsten Otte1fe779f2007-10-29 16:08:35 +01001729 r = -EFAULT;
Dave Hansenf0d66272008-08-11 10:01:45 -07001730 if (copy_from_user(&u.alias, argp, sizeof(struct kvm_memory_alias)))
Carsten Otte1fe779f2007-10-29 16:08:35 +01001731 goto out;
Dave Hansenf0d66272008-08-11 10:01:45 -07001732 r = kvm_vm_ioctl_set_memory_alias(kvm, &u.alias);
Carsten Otte1fe779f2007-10-29 16:08:35 +01001733 if (r)
1734 goto out;
1735 break;
Carsten Otte1fe779f2007-10-29 16:08:35 +01001736 case KVM_CREATE_IRQCHIP:
1737 r = -ENOMEM;
Zhang Xiantaod7deeeb02007-12-14 10:17:34 +08001738 kvm->arch.vpic = kvm_create_pic(kvm);
1739 if (kvm->arch.vpic) {
Carsten Otte1fe779f2007-10-29 16:08:35 +01001740 r = kvm_ioapic_init(kvm);
1741 if (r) {
Zhang Xiantaod7deeeb02007-12-14 10:17:34 +08001742 kfree(kvm->arch.vpic);
1743 kvm->arch.vpic = NULL;
Carsten Otte1fe779f2007-10-29 16:08:35 +01001744 goto out;
1745 }
1746 } else
1747 goto out;
1748 break;
Sheng Yang78376992008-01-28 05:10:22 +08001749 case KVM_CREATE_PIT:
1750 r = -ENOMEM;
1751 kvm->arch.vpit = kvm_create_pit(kvm);
1752 if (kvm->arch.vpit)
1753 r = 0;
1754 break;
Carsten Otte1fe779f2007-10-29 16:08:35 +01001755 case KVM_IRQ_LINE: {
1756 struct kvm_irq_level irq_event;
1757
1758 r = -EFAULT;
1759 if (copy_from_user(&irq_event, argp, sizeof irq_event))
1760 goto out;
1761 if (irqchip_in_kernel(kvm)) {
1762 mutex_lock(&kvm->lock);
Sheng Yang5550af42008-10-15 20:15:06 +08001763 kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
1764 irq_event.irq, irq_event.level);
Carsten Otte1fe779f2007-10-29 16:08:35 +01001765 mutex_unlock(&kvm->lock);
1766 r = 0;
1767 }
1768 break;
1769 }
1770 case KVM_GET_IRQCHIP: {
1771 /* 0: PIC master, 1: PIC slave, 2: IOAPIC */
Dave Hansenf0d66272008-08-11 10:01:45 -07001772 struct kvm_irqchip *chip = kmalloc(sizeof(*chip), GFP_KERNEL);
Carsten Otte1fe779f2007-10-29 16:08:35 +01001773
Dave Hansenf0d66272008-08-11 10:01:45 -07001774 r = -ENOMEM;
1775 if (!chip)
Carsten Otte1fe779f2007-10-29 16:08:35 +01001776 goto out;
Dave Hansenf0d66272008-08-11 10:01:45 -07001777 r = -EFAULT;
1778 if (copy_from_user(chip, argp, sizeof *chip))
1779 goto get_irqchip_out;
Carsten Otte1fe779f2007-10-29 16:08:35 +01001780 r = -ENXIO;
1781 if (!irqchip_in_kernel(kvm))
Dave Hansenf0d66272008-08-11 10:01:45 -07001782 goto get_irqchip_out;
1783 r = kvm_vm_ioctl_get_irqchip(kvm, chip);
1784 if (r)
1785 goto get_irqchip_out;
1786 r = -EFAULT;
1787 if (copy_to_user(argp, chip, sizeof *chip))
1788 goto get_irqchip_out;
1789 r = 0;
1790 get_irqchip_out:
1791 kfree(chip);
Carsten Otte1fe779f2007-10-29 16:08:35 +01001792 if (r)
1793 goto out;
Carsten Otte1fe779f2007-10-29 16:08:35 +01001794 break;
1795 }
1796 case KVM_SET_IRQCHIP: {
1797 /* 0: PIC master, 1: PIC slave, 2: IOAPIC */
Dave Hansenf0d66272008-08-11 10:01:45 -07001798 struct kvm_irqchip *chip = kmalloc(sizeof(*chip), GFP_KERNEL);
Carsten Otte1fe779f2007-10-29 16:08:35 +01001799
Dave Hansenf0d66272008-08-11 10:01:45 -07001800 r = -ENOMEM;
1801 if (!chip)
Carsten Otte1fe779f2007-10-29 16:08:35 +01001802 goto out;
Dave Hansenf0d66272008-08-11 10:01:45 -07001803 r = -EFAULT;
1804 if (copy_from_user(chip, argp, sizeof *chip))
1805 goto set_irqchip_out;
Carsten Otte1fe779f2007-10-29 16:08:35 +01001806 r = -ENXIO;
1807 if (!irqchip_in_kernel(kvm))
Dave Hansenf0d66272008-08-11 10:01:45 -07001808 goto set_irqchip_out;
1809 r = kvm_vm_ioctl_set_irqchip(kvm, chip);
1810 if (r)
1811 goto set_irqchip_out;
1812 r = 0;
1813 set_irqchip_out:
1814 kfree(chip);
Carsten Otte1fe779f2007-10-29 16:08:35 +01001815 if (r)
1816 goto out;
Carsten Otte1fe779f2007-10-29 16:08:35 +01001817 break;
1818 }
Sheng Yange0f63cb2008-03-04 00:50:59 +08001819 case KVM_GET_PIT: {
Sheng Yange0f63cb2008-03-04 00:50:59 +08001820 r = -EFAULT;
Dave Hansenf0d66272008-08-11 10:01:45 -07001821 if (copy_from_user(&u.ps, argp, sizeof(struct kvm_pit_state)))
Sheng Yange0f63cb2008-03-04 00:50:59 +08001822 goto out;
1823 r = -ENXIO;
1824 if (!kvm->arch.vpit)
1825 goto out;
Dave Hansenf0d66272008-08-11 10:01:45 -07001826 r = kvm_vm_ioctl_get_pit(kvm, &u.ps);
Sheng Yange0f63cb2008-03-04 00:50:59 +08001827 if (r)
1828 goto out;
1829 r = -EFAULT;
Dave Hansenf0d66272008-08-11 10:01:45 -07001830 if (copy_to_user(argp, &u.ps, sizeof(struct kvm_pit_state)))
Sheng Yange0f63cb2008-03-04 00:50:59 +08001831 goto out;
1832 r = 0;
1833 break;
1834 }
1835 case KVM_SET_PIT: {
Sheng Yange0f63cb2008-03-04 00:50:59 +08001836 r = -EFAULT;
Dave Hansenf0d66272008-08-11 10:01:45 -07001837 if (copy_from_user(&u.ps, argp, sizeof u.ps))
Sheng Yange0f63cb2008-03-04 00:50:59 +08001838 goto out;
1839 r = -ENXIO;
1840 if (!kvm->arch.vpit)
1841 goto out;
Dave Hansenf0d66272008-08-11 10:01:45 -07001842 r = kvm_vm_ioctl_set_pit(kvm, &u.ps);
Sheng Yange0f63cb2008-03-04 00:50:59 +08001843 if (r)
1844 goto out;
1845 r = 0;
1846 break;
1847 }
Carsten Otte1fe779f2007-10-29 16:08:35 +01001848 default:
1849 ;
1850 }
1851out:
1852 return r;
1853}
1854
Zhang Xiantaoa16b0432007-11-16 14:38:21 +08001855static void kvm_init_msr_list(void)
Carsten Otte043405e2007-10-10 17:16:19 +02001856{
1857 u32 dummy[2];
1858 unsigned i, j;
1859
1860 for (i = j = 0; i < ARRAY_SIZE(msrs_to_save); i++) {
1861 if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0)
1862 continue;
1863 if (j < i)
1864 msrs_to_save[j] = msrs_to_save[i];
1865 j++;
1866 }
1867 num_msrs_to_save = j;
1868}
1869
Carsten Ottebbd9b642007-10-30 18:44:21 +01001870/*
1871 * Only apic need an MMIO device hook, so shortcut now..
1872 */
1873static struct kvm_io_device *vcpu_find_pervcpu_dev(struct kvm_vcpu *vcpu,
Laurent Vivier92760492008-05-30 16:05:53 +02001874 gpa_t addr, int len,
1875 int is_write)
Carsten Ottebbd9b642007-10-30 18:44:21 +01001876{
1877 struct kvm_io_device *dev;
1878
Zhang Xiantaoad312c72007-12-13 23:50:52 +08001879 if (vcpu->arch.apic) {
1880 dev = &vcpu->arch.apic->dev;
Laurent Vivier92760492008-05-30 16:05:53 +02001881 if (dev->in_range(dev, addr, len, is_write))
Carsten Ottebbd9b642007-10-30 18:44:21 +01001882 return dev;
1883 }
1884 return NULL;
1885}
1886
1887
1888static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu,
Laurent Vivier92760492008-05-30 16:05:53 +02001889 gpa_t addr, int len,
1890 int is_write)
Carsten Ottebbd9b642007-10-30 18:44:21 +01001891{
1892 struct kvm_io_device *dev;
1893
Laurent Vivier92760492008-05-30 16:05:53 +02001894 dev = vcpu_find_pervcpu_dev(vcpu, addr, len, is_write);
Carsten Ottebbd9b642007-10-30 18:44:21 +01001895 if (dev == NULL)
Laurent Vivier92760492008-05-30 16:05:53 +02001896 dev = kvm_io_bus_find_dev(&vcpu->kvm->mmio_bus, addr, len,
1897 is_write);
Carsten Ottebbd9b642007-10-30 18:44:21 +01001898 return dev;
1899}
1900
1901int emulator_read_std(unsigned long addr,
1902 void *val,
1903 unsigned int bytes,
1904 struct kvm_vcpu *vcpu)
1905{
1906 void *data = val;
Marcelo Tosatti10589a42007-12-20 19:18:22 -05001907 int r = X86EMUL_CONTINUE;
Carsten Ottebbd9b642007-10-30 18:44:21 +01001908
1909 while (bytes) {
Zhang Xiantaoad312c72007-12-13 23:50:52 +08001910 gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
Carsten Ottebbd9b642007-10-30 18:44:21 +01001911 unsigned offset = addr & (PAGE_SIZE-1);
1912 unsigned tocopy = min(bytes, (unsigned)PAGE_SIZE - offset);
1913 int ret;
1914
Marcelo Tosatti10589a42007-12-20 19:18:22 -05001915 if (gpa == UNMAPPED_GVA) {
1916 r = X86EMUL_PROPAGATE_FAULT;
1917 goto out;
1918 }
Carsten Ottebbd9b642007-10-30 18:44:21 +01001919 ret = kvm_read_guest(vcpu->kvm, gpa, data, tocopy);
Marcelo Tosatti10589a42007-12-20 19:18:22 -05001920 if (ret < 0) {
1921 r = X86EMUL_UNHANDLEABLE;
1922 goto out;
1923 }
Carsten Ottebbd9b642007-10-30 18:44:21 +01001924
1925 bytes -= tocopy;
1926 data += tocopy;
1927 addr += tocopy;
1928 }
Marcelo Tosatti10589a42007-12-20 19:18:22 -05001929out:
Marcelo Tosatti10589a42007-12-20 19:18:22 -05001930 return r;
Carsten Ottebbd9b642007-10-30 18:44:21 +01001931}
1932EXPORT_SYMBOL_GPL(emulator_read_std);
1933
Carsten Ottebbd9b642007-10-30 18:44:21 +01001934static int emulator_read_emulated(unsigned long addr,
1935 void *val,
1936 unsigned int bytes,
1937 struct kvm_vcpu *vcpu)
1938{
1939 struct kvm_io_device *mmio_dev;
1940 gpa_t gpa;
1941
1942 if (vcpu->mmio_read_completed) {
1943 memcpy(val, vcpu->mmio_data, bytes);
1944 vcpu->mmio_read_completed = 0;
1945 return X86EMUL_CONTINUE;
1946 }
1947
Zhang Xiantaoad312c72007-12-13 23:50:52 +08001948 gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
Carsten Ottebbd9b642007-10-30 18:44:21 +01001949
1950 /* For APIC access vmexit */
1951 if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
1952 goto mmio;
1953
1954 if (emulator_read_std(addr, val, bytes, vcpu)
1955 == X86EMUL_CONTINUE)
1956 return X86EMUL_CONTINUE;
1957 if (gpa == UNMAPPED_GVA)
1958 return X86EMUL_PROPAGATE_FAULT;
1959
1960mmio:
1961 /*
1962 * Is this MMIO handled locally?
1963 */
Marcelo Tosatti10589a42007-12-20 19:18:22 -05001964 mutex_lock(&vcpu->kvm->lock);
Laurent Vivier92760492008-05-30 16:05:53 +02001965 mmio_dev = vcpu_find_mmio_dev(vcpu, gpa, bytes, 0);
Carsten Ottebbd9b642007-10-30 18:44:21 +01001966 if (mmio_dev) {
1967 kvm_iodevice_read(mmio_dev, gpa, bytes, val);
Marcelo Tosatti10589a42007-12-20 19:18:22 -05001968 mutex_unlock(&vcpu->kvm->lock);
Carsten Ottebbd9b642007-10-30 18:44:21 +01001969 return X86EMUL_CONTINUE;
1970 }
Marcelo Tosatti10589a42007-12-20 19:18:22 -05001971 mutex_unlock(&vcpu->kvm->lock);
Carsten Ottebbd9b642007-10-30 18:44:21 +01001972
1973 vcpu->mmio_needed = 1;
1974 vcpu->mmio_phys_addr = gpa;
1975 vcpu->mmio_size = bytes;
1976 vcpu->mmio_is_write = 0;
1977
1978 return X86EMUL_UNHANDLEABLE;
1979}
1980
Marcelo Tosatti3200f402008-03-29 20:17:59 -03001981int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
Avi Kivity9f811282008-03-02 14:06:05 +02001982 const void *val, int bytes)
1983{
1984 int ret;
1985
1986 ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes);
1987 if (ret < 0)
1988 return 0;
1989 kvm_mmu_pte_write(vcpu, gpa, val, bytes);
1990 return 1;
1991}
1992
Carsten Ottebbd9b642007-10-30 18:44:21 +01001993static int emulator_write_emulated_onepage(unsigned long addr,
1994 const void *val,
1995 unsigned int bytes,
1996 struct kvm_vcpu *vcpu)
1997{
1998 struct kvm_io_device *mmio_dev;
Marcelo Tosatti10589a42007-12-20 19:18:22 -05001999 gpa_t gpa;
2000
Marcelo Tosatti10589a42007-12-20 19:18:22 -05002001 gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
Carsten Ottebbd9b642007-10-30 18:44:21 +01002002
2003 if (gpa == UNMAPPED_GVA) {
Avi Kivityc3c91fe2007-11-25 14:04:58 +02002004 kvm_inject_page_fault(vcpu, addr, 2);
Carsten Ottebbd9b642007-10-30 18:44:21 +01002005 return X86EMUL_PROPAGATE_FAULT;
2006 }
2007
2008 /* For APIC access vmexit */
2009 if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
2010 goto mmio;
2011
2012 if (emulator_write_phys(vcpu, gpa, val, bytes))
2013 return X86EMUL_CONTINUE;
2014
2015mmio:
2016 /*
2017 * Is this MMIO handled locally?
2018 */
Marcelo Tosatti10589a42007-12-20 19:18:22 -05002019 mutex_lock(&vcpu->kvm->lock);
Laurent Vivier92760492008-05-30 16:05:53 +02002020 mmio_dev = vcpu_find_mmio_dev(vcpu, gpa, bytes, 1);
Carsten Ottebbd9b642007-10-30 18:44:21 +01002021 if (mmio_dev) {
2022 kvm_iodevice_write(mmio_dev, gpa, bytes, val);
Marcelo Tosatti10589a42007-12-20 19:18:22 -05002023 mutex_unlock(&vcpu->kvm->lock);
Carsten Ottebbd9b642007-10-30 18:44:21 +01002024 return X86EMUL_CONTINUE;
2025 }
Marcelo Tosatti10589a42007-12-20 19:18:22 -05002026 mutex_unlock(&vcpu->kvm->lock);
Carsten Ottebbd9b642007-10-30 18:44:21 +01002027
2028 vcpu->mmio_needed = 1;
2029 vcpu->mmio_phys_addr = gpa;
2030 vcpu->mmio_size = bytes;
2031 vcpu->mmio_is_write = 1;
2032 memcpy(vcpu->mmio_data, val, bytes);
2033
2034 return X86EMUL_CONTINUE;
2035}
2036
2037int emulator_write_emulated(unsigned long addr,
2038 const void *val,
2039 unsigned int bytes,
2040 struct kvm_vcpu *vcpu)
2041{
2042 /* Crossing a page boundary? */
2043 if (((addr + bytes - 1) ^ addr) & PAGE_MASK) {
2044 int rc, now;
2045
2046 now = -addr & ~PAGE_MASK;
2047 rc = emulator_write_emulated_onepage(addr, val, now, vcpu);
2048 if (rc != X86EMUL_CONTINUE)
2049 return rc;
2050 addr += now;
2051 val += now;
2052 bytes -= now;
2053 }
2054 return emulator_write_emulated_onepage(addr, val, bytes, vcpu);
2055}
2056EXPORT_SYMBOL_GPL(emulator_write_emulated);
2057
2058static int emulator_cmpxchg_emulated(unsigned long addr,
2059 const void *old,
2060 const void *new,
2061 unsigned int bytes,
2062 struct kvm_vcpu *vcpu)
2063{
2064 static int reported;
2065
2066 if (!reported) {
2067 reported = 1;
2068 printk(KERN_WARNING "kvm: emulating exchange as write\n");
2069 }
Marcelo Tosatti2bacc552007-12-12 10:46:12 -05002070#ifndef CONFIG_X86_64
2071 /* guests cmpxchg8b have to be emulated atomically */
2072 if (bytes == 8) {
Marcelo Tosatti10589a42007-12-20 19:18:22 -05002073 gpa_t gpa;
Marcelo Tosatti2bacc552007-12-12 10:46:12 -05002074 struct page *page;
Andrew Mortonc0b49b02008-02-04 22:27:18 -08002075 char *kaddr;
Marcelo Tosatti2bacc552007-12-12 10:46:12 -05002076 u64 val;
2077
Marcelo Tosatti10589a42007-12-20 19:18:22 -05002078 gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
2079
Marcelo Tosatti2bacc552007-12-12 10:46:12 -05002080 if (gpa == UNMAPPED_GVA ||
2081 (gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
2082 goto emul_write;
2083
2084 if (((gpa + bytes - 1) & PAGE_MASK) != (gpa & PAGE_MASK))
2085 goto emul_write;
2086
2087 val = *(u64 *)new;
Izik Eidus72dc67a2008-02-10 18:04:15 +02002088
Marcelo Tosatti2bacc552007-12-12 10:46:12 -05002089 page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
Izik Eidus72dc67a2008-02-10 18:04:15 +02002090
Andrew Mortonc0b49b02008-02-04 22:27:18 -08002091 kaddr = kmap_atomic(page, KM_USER0);
2092 set_64bit((u64 *)(kaddr + offset_in_page(gpa)), val);
2093 kunmap_atomic(kaddr, KM_USER0);
Marcelo Tosatti2bacc552007-12-12 10:46:12 -05002094 kvm_release_page_dirty(page);
2095 }
Marcelo Tosatti3200f402008-03-29 20:17:59 -03002096emul_write:
Marcelo Tosatti2bacc552007-12-12 10:46:12 -05002097#endif
2098
Carsten Ottebbd9b642007-10-30 18:44:21 +01002099 return emulator_write_emulated(addr, new, bytes, vcpu);
2100}
2101
2102static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg)
2103{
2104 return kvm_x86_ops->get_segment_base(vcpu, seg);
2105}
2106
2107int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address)
2108{
Marcelo Tosattia7052892008-09-23 13:18:35 -03002109 kvm_mmu_invlpg(vcpu, address);
Carsten Ottebbd9b642007-10-30 18:44:21 +01002110 return X86EMUL_CONTINUE;
2111}
2112
2113int emulate_clts(struct kvm_vcpu *vcpu)
2114{
Joerg Roedel54e445c2008-04-30 17:56:02 +02002115 KVMTRACE_0D(CLTS, vcpu, handler);
Zhang Xiantaoad312c72007-12-13 23:50:52 +08002116 kvm_x86_ops->set_cr0(vcpu, vcpu->arch.cr0 & ~X86_CR0_TS);
Carsten Ottebbd9b642007-10-30 18:44:21 +01002117 return X86EMUL_CONTINUE;
2118}
2119
2120int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest)
2121{
2122 struct kvm_vcpu *vcpu = ctxt->vcpu;
2123
2124 switch (dr) {
2125 case 0 ... 3:
2126 *dest = kvm_x86_ops->get_dr(vcpu, dr);
2127 return X86EMUL_CONTINUE;
2128 default:
Harvey Harrisonb8688d52008-03-03 12:59:56 -08002129 pr_unimpl(vcpu, "%s: unexpected dr %u\n", __func__, dr);
Carsten Ottebbd9b642007-10-30 18:44:21 +01002130 return X86EMUL_UNHANDLEABLE;
2131 }
2132}
2133
2134int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value)
2135{
2136 unsigned long mask = (ctxt->mode == X86EMUL_MODE_PROT64) ? ~0ULL : ~0U;
2137 int exception;
2138
2139 kvm_x86_ops->set_dr(ctxt->vcpu, dr, value & mask, &exception);
2140 if (exception) {
2141 /* FIXME: better handling */
2142 return X86EMUL_UNHANDLEABLE;
2143 }
2144 return X86EMUL_CONTINUE;
2145}
2146
2147void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context)
2148{
Carsten Ottebbd9b642007-10-30 18:44:21 +01002149 u8 opcodes[4];
Marcelo Tosatti5fdbf972008-06-27 14:58:02 -03002150 unsigned long rip = kvm_rip_read(vcpu);
Carsten Ottebbd9b642007-10-30 18:44:21 +01002151 unsigned long rip_linear;
2152
Avi Kivityf76c7102008-06-13 22:45:42 +03002153 if (!printk_ratelimit())
Carsten Ottebbd9b642007-10-30 18:44:21 +01002154 return;
2155
Glauber Costa25be4602008-06-10 10:46:53 -03002156 rip_linear = rip + get_segment_base(vcpu, VCPU_SREG_CS);
2157
Carsten Ottebbd9b642007-10-30 18:44:21 +01002158 emulator_read_std(rip_linear, (void *)opcodes, 4, vcpu);
2159
2160 printk(KERN_ERR "emulation failed (%s) rip %lx %02x %02x %02x %02x\n",
2161 context, rip, opcodes[0], opcodes[1], opcodes[2], opcodes[3]);
Carsten Ottebbd9b642007-10-30 18:44:21 +01002162}
2163EXPORT_SYMBOL_GPL(kvm_report_emulation_failure);
2164
Harvey Harrison14af3f32008-02-19 10:25:50 -08002165static struct x86_emulate_ops emulate_ops = {
Carsten Ottebbd9b642007-10-30 18:44:21 +01002166 .read_std = emulator_read_std,
Carsten Ottebbd9b642007-10-30 18:44:21 +01002167 .read_emulated = emulator_read_emulated,
2168 .write_emulated = emulator_write_emulated,
2169 .cmpxchg_emulated = emulator_cmpxchg_emulated,
2170};
2171
Marcelo Tosatti5fdbf972008-06-27 14:58:02 -03002172static void cache_all_regs(struct kvm_vcpu *vcpu)
2173{
2174 kvm_register_read(vcpu, VCPU_REGS_RAX);
2175 kvm_register_read(vcpu, VCPU_REGS_RSP);
2176 kvm_register_read(vcpu, VCPU_REGS_RIP);
2177 vcpu->arch.regs_dirty = ~0;
2178}
2179
Carsten Ottebbd9b642007-10-30 18:44:21 +01002180int emulate_instruction(struct kvm_vcpu *vcpu,
2181 struct kvm_run *run,
2182 unsigned long cr2,
2183 u16 error_code,
Sheng Yang571008d2008-01-02 14:49:22 +08002184 int emulation_type)
Carsten Ottebbd9b642007-10-30 18:44:21 +01002185{
2186 int r;
Sheng Yang571008d2008-01-02 14:49:22 +08002187 struct decode_cache *c;
Carsten Ottebbd9b642007-10-30 18:44:21 +01002188
Avi Kivity26eef702008-07-03 14:59:22 +03002189 kvm_clear_exception_queue(vcpu);
Zhang Xiantaoad312c72007-12-13 23:50:52 +08002190 vcpu->arch.mmio_fault_cr2 = cr2;
Marcelo Tosatti5fdbf972008-06-27 14:58:02 -03002191 /*
2192 * TODO: fix x86_emulate.c to use guest_read/write_register
2193 * instead of direct ->regs accesses, can save hundred cycles
2194 * on Intel for instructions that don't read/change RSP, for
2195 * for example.
2196 */
2197 cache_all_regs(vcpu);
Carsten Ottebbd9b642007-10-30 18:44:21 +01002198
2199 vcpu->mmio_is_write = 0;
Zhang Xiantaoad312c72007-12-13 23:50:52 +08002200 vcpu->arch.pio.string = 0;
Carsten Ottebbd9b642007-10-30 18:44:21 +01002201
Sheng Yang571008d2008-01-02 14:49:22 +08002202 if (!(emulation_type & EMULTYPE_NO_DECODE)) {
Carsten Ottebbd9b642007-10-30 18:44:21 +01002203 int cs_db, cs_l;
2204 kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
2205
Zhang Xiantaoad312c72007-12-13 23:50:52 +08002206 vcpu->arch.emulate_ctxt.vcpu = vcpu;
2207 vcpu->arch.emulate_ctxt.eflags = kvm_x86_ops->get_rflags(vcpu);
2208 vcpu->arch.emulate_ctxt.mode =
2209 (vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM)
Carsten Ottebbd9b642007-10-30 18:44:21 +01002210 ? X86EMUL_MODE_REAL : cs_l
2211 ? X86EMUL_MODE_PROT64 : cs_db
2212 ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
2213
Zhang Xiantaoad312c72007-12-13 23:50:52 +08002214 r = x86_decode_insn(&vcpu->arch.emulate_ctxt, &emulate_ops);
Sheng Yang571008d2008-01-02 14:49:22 +08002215
2216 /* Reject the instructions other than VMCALL/VMMCALL when
2217 * try to emulate invalid opcode */
2218 c = &vcpu->arch.emulate_ctxt.decode;
2219 if ((emulation_type & EMULTYPE_TRAP_UD) &&
2220 (!(c->twobyte && c->b == 0x01 &&
2221 (c->modrm_reg == 0 || c->modrm_reg == 3) &&
2222 c->modrm_mod == 3 && c->modrm_rm == 1)))
2223 return EMULATE_FAIL;
2224
Avi Kivityf2b57562007-11-18 15:17:51 +02002225 ++vcpu->stat.insn_emulation;
Carsten Ottebbd9b642007-10-30 18:44:21 +01002226 if (r) {
Avi Kivityf2b57562007-11-18 15:17:51 +02002227 ++vcpu->stat.insn_emulation_fail;
Carsten Ottebbd9b642007-10-30 18:44:21 +01002228 if (kvm_mmu_unprotect_page_virt(vcpu, cr2))
2229 return EMULATE_DONE;
2230 return EMULATE_FAIL;
2231 }
2232 }
2233
Zhang Xiantaoad312c72007-12-13 23:50:52 +08002234 r = x86_emulate_insn(&vcpu->arch.emulate_ctxt, &emulate_ops);
Carsten Ottebbd9b642007-10-30 18:44:21 +01002235
Zhang Xiantaoad312c72007-12-13 23:50:52 +08002236 if (vcpu->arch.pio.string)
Carsten Ottebbd9b642007-10-30 18:44:21 +01002237 return EMULATE_DO_MMIO;
2238
2239 if ((r || vcpu->mmio_is_write) && run) {
2240 run->exit_reason = KVM_EXIT_MMIO;
2241 run->mmio.phys_addr = vcpu->mmio_phys_addr;
2242 memcpy(run->mmio.data, vcpu->mmio_data, 8);
2243 run->mmio.len = vcpu->mmio_size;
2244 run->mmio.is_write = vcpu->mmio_is_write;
2245 }
2246
2247 if (r) {
2248 if (kvm_mmu_unprotect_page_virt(vcpu, cr2))
2249 return EMULATE_DONE;
2250 if (!vcpu->mmio_needed) {
2251 kvm_report_emulation_failure(vcpu, "mmio");
2252 return EMULATE_FAIL;
2253 }
2254 return EMULATE_DO_MMIO;
2255 }
2256
Zhang Xiantaoad312c72007-12-13 23:50:52 +08002257 kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags);
Carsten Ottebbd9b642007-10-30 18:44:21 +01002258
2259 if (vcpu->mmio_is_write) {
2260 vcpu->mmio_needed = 0;
2261 return EMULATE_DO_MMIO;
2262 }
2263
2264 return EMULATE_DONE;
2265}
2266EXPORT_SYMBOL_GPL(emulate_instruction);
2267
Carsten Ottede7d7892007-10-30 18:44:25 +01002268static void free_pio_guest_pages(struct kvm_vcpu *vcpu)
2269{
2270 int i;
2271
Zhang Xiantaoad312c72007-12-13 23:50:52 +08002272 for (i = 0; i < ARRAY_SIZE(vcpu->arch.pio.guest_pages); ++i)
2273 if (vcpu->arch.pio.guest_pages[i]) {
2274 kvm_release_page_dirty(vcpu->arch.pio.guest_pages[i]);
2275 vcpu->arch.pio.guest_pages[i] = NULL;
Carsten Ottede7d7892007-10-30 18:44:25 +01002276 }
2277}
2278
2279static int pio_copy_data(struct kvm_vcpu *vcpu)
2280{
Zhang Xiantaoad312c72007-12-13 23:50:52 +08002281 void *p = vcpu->arch.pio_data;
Carsten Ottede7d7892007-10-30 18:44:25 +01002282 void *q;
2283 unsigned bytes;
Zhang Xiantaoad312c72007-12-13 23:50:52 +08002284 int nr_pages = vcpu->arch.pio.guest_pages[1] ? 2 : 1;
Carsten Ottede7d7892007-10-30 18:44:25 +01002285
Zhang Xiantaoad312c72007-12-13 23:50:52 +08002286 q = vmap(vcpu->arch.pio.guest_pages, nr_pages, VM_READ|VM_WRITE,
Carsten Ottede7d7892007-10-30 18:44:25 +01002287 PAGE_KERNEL);
2288 if (!q) {
2289 free_pio_guest_pages(vcpu);
2290 return -ENOMEM;
2291 }
Zhang Xiantaoad312c72007-12-13 23:50:52 +08002292 q += vcpu->arch.pio.guest_page_offset;
2293 bytes = vcpu->arch.pio.size * vcpu->arch.pio.cur_count;
2294 if (vcpu->arch.pio.in)
Carsten Ottede7d7892007-10-30 18:44:25 +01002295 memcpy(q, p, bytes);
2296 else
2297 memcpy(p, q, bytes);
Zhang Xiantaoad312c72007-12-13 23:50:52 +08002298 q -= vcpu->arch.pio.guest_page_offset;
Carsten Ottede7d7892007-10-30 18:44:25 +01002299 vunmap(q);
2300 free_pio_guest_pages(vcpu);
2301 return 0;
2302}
2303
2304int complete_pio(struct kvm_vcpu *vcpu)
2305{
Zhang Xiantaoad312c72007-12-13 23:50:52 +08002306 struct kvm_pio_request *io = &vcpu->arch.pio;
Carsten Ottede7d7892007-10-30 18:44:25 +01002307 long delta;
2308 int r;
Marcelo Tosatti5fdbf972008-06-27 14:58:02 -03002309 unsigned long val;
Carsten Ottede7d7892007-10-30 18:44:25 +01002310
2311 if (!io->string) {
Marcelo Tosatti5fdbf972008-06-27 14:58:02 -03002312 if (io->in) {
2313 val = kvm_register_read(vcpu, VCPU_REGS_RAX);
2314 memcpy(&val, vcpu->arch.pio_data, io->size);
2315 kvm_register_write(vcpu, VCPU_REGS_RAX, val);
2316 }
Carsten Ottede7d7892007-10-30 18:44:25 +01002317 } else {
2318 if (io->in) {
2319 r = pio_copy_data(vcpu);
Marcelo Tosatti5fdbf972008-06-27 14:58:02 -03002320 if (r)
Carsten Ottede7d7892007-10-30 18:44:25 +01002321 return r;
Carsten Ottede7d7892007-10-30 18:44:25 +01002322 }
2323
2324 delta = 1;
2325 if (io->rep) {
2326 delta *= io->cur_count;
2327 /*
2328 * The size of the register should really depend on
2329 * current address size.
2330 */
Marcelo Tosatti5fdbf972008-06-27 14:58:02 -03002331 val = kvm_register_read(vcpu, VCPU_REGS_RCX);
2332 val -= delta;
2333 kvm_register_write(vcpu, VCPU_REGS_RCX, val);
Carsten Ottede7d7892007-10-30 18:44:25 +01002334 }
2335 if (io->down)
2336 delta = -delta;
2337 delta *= io->size;
Marcelo Tosatti5fdbf972008-06-27 14:58:02 -03002338 if (io->in) {
2339 val = kvm_register_read(vcpu, VCPU_REGS_RDI);
2340 val += delta;
2341 kvm_register_write(vcpu, VCPU_REGS_RDI, val);
2342 } else {
2343 val = kvm_register_read(vcpu, VCPU_REGS_RSI);
2344 val += delta;
2345 kvm_register_write(vcpu, VCPU_REGS_RSI, val);
2346 }
Carsten Ottede7d7892007-10-30 18:44:25 +01002347 }
2348
Carsten Ottede7d7892007-10-30 18:44:25 +01002349 io->count -= io->cur_count;
2350 io->cur_count = 0;
2351
2352 return 0;
2353}
2354
2355static void kernel_pio(struct kvm_io_device *pio_dev,
2356 struct kvm_vcpu *vcpu,
2357 void *pd)
2358{
2359 /* TODO: String I/O for in kernel device */
2360
2361 mutex_lock(&vcpu->kvm->lock);
Zhang Xiantaoad312c72007-12-13 23:50:52 +08002362 if (vcpu->arch.pio.in)
2363 kvm_iodevice_read(pio_dev, vcpu->arch.pio.port,
2364 vcpu->arch.pio.size,
Carsten Ottede7d7892007-10-30 18:44:25 +01002365 pd);
2366 else
Zhang Xiantaoad312c72007-12-13 23:50:52 +08002367 kvm_iodevice_write(pio_dev, vcpu->arch.pio.port,
2368 vcpu->arch.pio.size,
Carsten Ottede7d7892007-10-30 18:44:25 +01002369 pd);
2370 mutex_unlock(&vcpu->kvm->lock);
2371}
2372
2373static void pio_string_write(struct kvm_io_device *pio_dev,
2374 struct kvm_vcpu *vcpu)
2375{
Zhang Xiantaoad312c72007-12-13 23:50:52 +08002376 struct kvm_pio_request *io = &vcpu->arch.pio;
2377 void *pd = vcpu->arch.pio_data;
Carsten Ottede7d7892007-10-30 18:44:25 +01002378 int i;
2379
2380 mutex_lock(&vcpu->kvm->lock);
2381 for (i = 0; i < io->cur_count; i++) {
2382 kvm_iodevice_write(pio_dev, io->port,
2383 io->size,
2384 pd);
2385 pd += io->size;
2386 }
2387 mutex_unlock(&vcpu->kvm->lock);
2388}
2389
2390static struct kvm_io_device *vcpu_find_pio_dev(struct kvm_vcpu *vcpu,
Laurent Vivier92760492008-05-30 16:05:53 +02002391 gpa_t addr, int len,
2392 int is_write)
Carsten Ottede7d7892007-10-30 18:44:25 +01002393{
Laurent Vivier92760492008-05-30 16:05:53 +02002394 return kvm_io_bus_find_dev(&vcpu->kvm->pio_bus, addr, len, is_write);
Carsten Ottede7d7892007-10-30 18:44:25 +01002395}
2396
2397int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
2398 int size, unsigned port)
2399{
2400 struct kvm_io_device *pio_dev;
Marcelo Tosatti5fdbf972008-06-27 14:58:02 -03002401 unsigned long val;
Carsten Ottede7d7892007-10-30 18:44:25 +01002402
2403 vcpu->run->exit_reason = KVM_EXIT_IO;
2404 vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
Zhang Xiantaoad312c72007-12-13 23:50:52 +08002405 vcpu->run->io.size = vcpu->arch.pio.size = size;
Carsten Ottede7d7892007-10-30 18:44:25 +01002406 vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE;
Zhang Xiantaoad312c72007-12-13 23:50:52 +08002407 vcpu->run->io.count = vcpu->arch.pio.count = vcpu->arch.pio.cur_count = 1;
2408 vcpu->run->io.port = vcpu->arch.pio.port = port;
2409 vcpu->arch.pio.in = in;
2410 vcpu->arch.pio.string = 0;
2411 vcpu->arch.pio.down = 0;
2412 vcpu->arch.pio.guest_page_offset = 0;
2413 vcpu->arch.pio.rep = 0;
Carsten Ottede7d7892007-10-30 18:44:25 +01002414
Feng (Eric) Liu2714d1d2008-04-10 15:31:10 -04002415 if (vcpu->run->io.direction == KVM_EXIT_IO_IN)
2416 KVMTRACE_2D(IO_READ, vcpu, vcpu->run->io.port, (u32)size,
2417 handler);
2418 else
2419 KVMTRACE_2D(IO_WRITE, vcpu, vcpu->run->io.port, (u32)size,
2420 handler);
2421
Marcelo Tosatti5fdbf972008-06-27 14:58:02 -03002422 val = kvm_register_read(vcpu, VCPU_REGS_RAX);
2423 memcpy(vcpu->arch.pio_data, &val, 4);
Carsten Ottede7d7892007-10-30 18:44:25 +01002424
2425 kvm_x86_ops->skip_emulated_instruction(vcpu);
2426
Laurent Vivier92760492008-05-30 16:05:53 +02002427 pio_dev = vcpu_find_pio_dev(vcpu, port, size, !in);
Carsten Ottede7d7892007-10-30 18:44:25 +01002428 if (pio_dev) {
Zhang Xiantaoad312c72007-12-13 23:50:52 +08002429 kernel_pio(pio_dev, vcpu, vcpu->arch.pio_data);
Carsten Ottede7d7892007-10-30 18:44:25 +01002430 complete_pio(vcpu);
2431 return 1;
2432 }
2433 return 0;
2434}
2435EXPORT_SYMBOL_GPL(kvm_emulate_pio);
2436
2437int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
2438 int size, unsigned long count, int down,
2439 gva_t address, int rep, unsigned port)
2440{
2441 unsigned now, in_page;
2442 int i, ret = 0;
2443 int nr_pages = 1;
2444 struct page *page;
2445 struct kvm_io_device *pio_dev;
2446
2447 vcpu->run->exit_reason = KVM_EXIT_IO;
2448 vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
Zhang Xiantaoad312c72007-12-13 23:50:52 +08002449 vcpu->run->io.size = vcpu->arch.pio.size = size;
Carsten Ottede7d7892007-10-30 18:44:25 +01002450 vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE;
Zhang Xiantaoad312c72007-12-13 23:50:52 +08002451 vcpu->run->io.count = vcpu->arch.pio.count = vcpu->arch.pio.cur_count = count;
2452 vcpu->run->io.port = vcpu->arch.pio.port = port;
2453 vcpu->arch.pio.in = in;
2454 vcpu->arch.pio.string = 1;
2455 vcpu->arch.pio.down = down;
2456 vcpu->arch.pio.guest_page_offset = offset_in_page(address);
2457 vcpu->arch.pio.rep = rep;
Carsten Ottede7d7892007-10-30 18:44:25 +01002458
Feng (Eric) Liu2714d1d2008-04-10 15:31:10 -04002459 if (vcpu->run->io.direction == KVM_EXIT_IO_IN)
2460 KVMTRACE_2D(IO_READ, vcpu, vcpu->run->io.port, (u32)size,
2461 handler);
2462 else
2463 KVMTRACE_2D(IO_WRITE, vcpu, vcpu->run->io.port, (u32)size,
2464 handler);
2465
Carsten Ottede7d7892007-10-30 18:44:25 +01002466 if (!count) {
2467 kvm_x86_ops->skip_emulated_instruction(vcpu);
2468 return 1;
2469 }
2470
2471 if (!down)
2472 in_page = PAGE_SIZE - offset_in_page(address);
2473 else
2474 in_page = offset_in_page(address) + size;
2475 now = min(count, (unsigned long)in_page / size);
2476 if (!now) {
2477 /*
2478 * String I/O straddles page boundary. Pin two guest pages
2479 * so that we satisfy atomicity constraints. Do just one
2480 * transaction to avoid complexity.
2481 */
2482 nr_pages = 2;
2483 now = 1;
2484 }
2485 if (down) {
2486 /*
2487 * String I/O in reverse. Yuck. Kill the guest, fix later.
2488 */
2489 pr_unimpl(vcpu, "guest string pio down\n");
Avi Kivityc1a5d4f2007-11-25 14:12:03 +02002490 kvm_inject_gp(vcpu, 0);
Carsten Ottede7d7892007-10-30 18:44:25 +01002491 return 1;
2492 }
2493 vcpu->run->io.count = now;
Zhang Xiantaoad312c72007-12-13 23:50:52 +08002494 vcpu->arch.pio.cur_count = now;
Carsten Ottede7d7892007-10-30 18:44:25 +01002495
Zhang Xiantaoad312c72007-12-13 23:50:52 +08002496 if (vcpu->arch.pio.cur_count == vcpu->arch.pio.count)
Carsten Ottede7d7892007-10-30 18:44:25 +01002497 kvm_x86_ops->skip_emulated_instruction(vcpu);
2498
2499 for (i = 0; i < nr_pages; ++i) {
Carsten Ottede7d7892007-10-30 18:44:25 +01002500 page = gva_to_page(vcpu, address + i * PAGE_SIZE);
Zhang Xiantaoad312c72007-12-13 23:50:52 +08002501 vcpu->arch.pio.guest_pages[i] = page;
Carsten Ottede7d7892007-10-30 18:44:25 +01002502 if (!page) {
Avi Kivityc1a5d4f2007-11-25 14:12:03 +02002503 kvm_inject_gp(vcpu, 0);
Carsten Ottede7d7892007-10-30 18:44:25 +01002504 free_pio_guest_pages(vcpu);
2505 return 1;
2506 }
2507 }
2508
Laurent Vivier92760492008-05-30 16:05:53 +02002509 pio_dev = vcpu_find_pio_dev(vcpu, port,
2510 vcpu->arch.pio.cur_count,
2511 !vcpu->arch.pio.in);
Zhang Xiantaoad312c72007-12-13 23:50:52 +08002512 if (!vcpu->arch.pio.in) {
Carsten Ottede7d7892007-10-30 18:44:25 +01002513 /* string PIO write */
2514 ret = pio_copy_data(vcpu);
2515 if (ret >= 0 && pio_dev) {
2516 pio_string_write(pio_dev, vcpu);
2517 complete_pio(vcpu);
Zhang Xiantaoad312c72007-12-13 23:50:52 +08002518 if (vcpu->arch.pio.count == 0)
Carsten Ottede7d7892007-10-30 18:44:25 +01002519 ret = 1;
2520 }
2521 } else if (pio_dev)
2522 pr_unimpl(vcpu, "no string pio read support yet, "
2523 "port %x size %d count %ld\n",
2524 port, size, count);
2525
2526 return ret;
2527}
2528EXPORT_SYMBOL_GPL(kvm_emulate_pio_string);
2529
Zhang Xiantaof8c16bb2007-11-14 20:40:21 +08002530int kvm_arch_init(void *opaque)
Carsten Otte043405e2007-10-10 17:16:19 +02002531{
Zhang Xiantao56c6d282007-11-18 20:43:21 +08002532 int r;
Zhang Xiantaof8c16bb2007-11-14 20:40:21 +08002533 struct kvm_x86_ops *ops = (struct kvm_x86_ops *)opaque;
2534
Zhang Xiantaof8c16bb2007-11-14 20:40:21 +08002535 if (kvm_x86_ops) {
2536 printk(KERN_ERR "kvm: already loaded the other module\n");
Zhang Xiantao56c6d282007-11-18 20:43:21 +08002537 r = -EEXIST;
2538 goto out;
Zhang Xiantaof8c16bb2007-11-14 20:40:21 +08002539 }
2540
2541 if (!ops->cpu_has_kvm_support()) {
2542 printk(KERN_ERR "kvm: no hardware support\n");
Zhang Xiantao56c6d282007-11-18 20:43:21 +08002543 r = -EOPNOTSUPP;
2544 goto out;
Zhang Xiantaof8c16bb2007-11-14 20:40:21 +08002545 }
2546 if (ops->disabled_by_bios()) {
2547 printk(KERN_ERR "kvm: disabled by bios\n");
Zhang Xiantao56c6d282007-11-18 20:43:21 +08002548 r = -EOPNOTSUPP;
2549 goto out;
Zhang Xiantaof8c16bb2007-11-14 20:40:21 +08002550 }
2551
Avi Kivity97db56c2008-01-13 13:23:56 +02002552 r = kvm_mmu_module_init();
2553 if (r)
2554 goto out;
2555
2556 kvm_init_msr_list();
2557
Zhang Xiantaof8c16bb2007-11-14 20:40:21 +08002558 kvm_x86_ops = ops;
Zhang Xiantao56c6d282007-11-18 20:43:21 +08002559 kvm_mmu_set_nonpresent_ptes(0ull, 0ull);
Sheng Yang7b523452008-04-25 21:13:50 +08002560 kvm_mmu_set_base_ptes(PT_PRESENT_MASK);
2561 kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
2562 PT_DIRTY_MASK, PT64_NX_MASK, 0);
Zhang Xiantaof8c16bb2007-11-14 20:40:21 +08002563 return 0;
Zhang Xiantao56c6d282007-11-18 20:43:21 +08002564
2565out:
Zhang Xiantao56c6d282007-11-18 20:43:21 +08002566 return r;
Carsten Otte043405e2007-10-10 17:16:19 +02002567}
Hollis Blanchard8776e512007-10-31 17:24:24 -05002568
Zhang Xiantaof8c16bb2007-11-14 20:40:21 +08002569void kvm_arch_exit(void)
2570{
2571 kvm_x86_ops = NULL;
Zhang Xiantao56c6d282007-11-18 20:43:21 +08002572 kvm_mmu_module_exit();
2573}
Zhang Xiantaof8c16bb2007-11-14 20:40:21 +08002574
Hollis Blanchard8776e512007-10-31 17:24:24 -05002575int kvm_emulate_halt(struct kvm_vcpu *vcpu)
2576{
2577 ++vcpu->stat.halt_exits;
Feng (Eric) Liu2714d1d2008-04-10 15:31:10 -04002578 KVMTRACE_0D(HLT, vcpu, handler);
Hollis Blanchard8776e512007-10-31 17:24:24 -05002579 if (irqchip_in_kernel(vcpu->kvm)) {
Avi Kivitya4535292008-04-13 17:54:35 +03002580 vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
Hollis Blanchard8776e512007-10-31 17:24:24 -05002581 return 1;
2582 } else {
2583 vcpu->run->exit_reason = KVM_EXIT_HLT;
2584 return 0;
2585 }
2586}
2587EXPORT_SYMBOL_GPL(kvm_emulate_halt);
2588
Marcelo Tosatti2f333bc2008-02-22 12:21:37 -05002589static inline gpa_t hc_gpa(struct kvm_vcpu *vcpu, unsigned long a0,
2590 unsigned long a1)
2591{
2592 if (is_long_mode(vcpu))
2593 return a0;
2594 else
2595 return a0 | ((gpa_t)a1 << 32);
2596}
2597
Hollis Blanchard8776e512007-10-31 17:24:24 -05002598int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
2599{
2600 unsigned long nr, a0, a1, a2, a3, ret;
Marcelo Tosatti2f333bc2008-02-22 12:21:37 -05002601 int r = 1;
Hollis Blanchard8776e512007-10-31 17:24:24 -05002602
Marcelo Tosatti5fdbf972008-06-27 14:58:02 -03002603 nr = kvm_register_read(vcpu, VCPU_REGS_RAX);
2604 a0 = kvm_register_read(vcpu, VCPU_REGS_RBX);
2605 a1 = kvm_register_read(vcpu, VCPU_REGS_RCX);
2606 a2 = kvm_register_read(vcpu, VCPU_REGS_RDX);
2607 a3 = kvm_register_read(vcpu, VCPU_REGS_RSI);
Hollis Blanchard8776e512007-10-31 17:24:24 -05002608
Feng (Eric) Liu2714d1d2008-04-10 15:31:10 -04002609 KVMTRACE_1D(VMMCALL, vcpu, (u32)nr, handler);
2610
Hollis Blanchard8776e512007-10-31 17:24:24 -05002611 if (!is_long_mode(vcpu)) {
2612 nr &= 0xFFFFFFFF;
2613 a0 &= 0xFFFFFFFF;
2614 a1 &= 0xFFFFFFFF;
2615 a2 &= 0xFFFFFFFF;
2616 a3 &= 0xFFFFFFFF;
2617 }
2618
2619 switch (nr) {
Avi Kivityb93463a2007-10-25 16:52:32 +02002620 case KVM_HC_VAPIC_POLL_IRQ:
2621 ret = 0;
2622 break;
Marcelo Tosatti2f333bc2008-02-22 12:21:37 -05002623 case KVM_HC_MMU_OP:
2624 r = kvm_pv_mmu_op(vcpu, a0, hc_gpa(vcpu, a1, a2), &ret);
2625 break;
Hollis Blanchard8776e512007-10-31 17:24:24 -05002626 default:
2627 ret = -KVM_ENOSYS;
2628 break;
2629 }
Marcelo Tosatti5fdbf972008-06-27 14:58:02 -03002630 kvm_register_write(vcpu, VCPU_REGS_RAX, ret);
Amit Shahf11c3a82008-02-21 01:00:30 +05302631 ++vcpu->stat.hypercalls;
Marcelo Tosatti2f333bc2008-02-22 12:21:37 -05002632 return r;
Hollis Blanchard8776e512007-10-31 17:24:24 -05002633}
2634EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);
2635
2636int kvm_fix_hypercall(struct kvm_vcpu *vcpu)
2637{
2638 char instruction[3];
2639 int ret = 0;
Marcelo Tosatti5fdbf972008-06-27 14:58:02 -03002640 unsigned long rip = kvm_rip_read(vcpu);
Hollis Blanchard8776e512007-10-31 17:24:24 -05002641
Hollis Blanchard8776e512007-10-31 17:24:24 -05002642
2643 /*
2644 * Blow out the MMU to ensure that no other VCPU has an active mapping
2645 * to ensure that the updated hypercall appears atomically across all
2646 * VCPUs.
2647 */
2648 kvm_mmu_zap_all(vcpu->kvm);
2649
Hollis Blanchard8776e512007-10-31 17:24:24 -05002650 kvm_x86_ops->patch_hypercall(vcpu, instruction);
Marcelo Tosatti5fdbf972008-06-27 14:58:02 -03002651 if (emulator_write_emulated(rip, instruction, 3, vcpu)
Hollis Blanchard8776e512007-10-31 17:24:24 -05002652 != X86EMUL_CONTINUE)
2653 ret = -EFAULT;
2654
Hollis Blanchard8776e512007-10-31 17:24:24 -05002655 return ret;
2656}
2657
2658static u64 mk_cr_64(u64 curr_cr, u32 new_val)
2659{
2660 return (curr_cr & ~((1ULL << 32) - 1)) | new_val;
2661}
2662
2663void realmode_lgdt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base)
2664{
2665 struct descriptor_table dt = { limit, base };
2666
2667 kvm_x86_ops->set_gdt(vcpu, &dt);
2668}
2669
2670void realmode_lidt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base)
2671{
2672 struct descriptor_table dt = { limit, base };
2673
2674 kvm_x86_ops->set_idt(vcpu, &dt);
2675}
2676
2677void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw,
2678 unsigned long *rflags)
2679{
Avi Kivity2d3ad1f2008-02-24 11:20:43 +02002680 kvm_lmsw(vcpu, msw);
Hollis Blanchard8776e512007-10-31 17:24:24 -05002681 *rflags = kvm_x86_ops->get_rflags(vcpu);
2682}
2683
2684unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr)
2685{
Joerg Roedel54e445c2008-04-30 17:56:02 +02002686 unsigned long value;
2687
Hollis Blanchard8776e512007-10-31 17:24:24 -05002688 kvm_x86_ops->decache_cr4_guest_bits(vcpu);
2689 switch (cr) {
2690 case 0:
Joerg Roedel54e445c2008-04-30 17:56:02 +02002691 value = vcpu->arch.cr0;
2692 break;
Hollis Blanchard8776e512007-10-31 17:24:24 -05002693 case 2:
Joerg Roedel54e445c2008-04-30 17:56:02 +02002694 value = vcpu->arch.cr2;
2695 break;
Hollis Blanchard8776e512007-10-31 17:24:24 -05002696 case 3:
Joerg Roedel54e445c2008-04-30 17:56:02 +02002697 value = vcpu->arch.cr3;
2698 break;
Hollis Blanchard8776e512007-10-31 17:24:24 -05002699 case 4:
Joerg Roedel54e445c2008-04-30 17:56:02 +02002700 value = vcpu->arch.cr4;
2701 break;
Joerg Roedel152ff9b2007-12-06 15:46:52 +01002702 case 8:
Joerg Roedel54e445c2008-04-30 17:56:02 +02002703 value = kvm_get_cr8(vcpu);
2704 break;
Hollis Blanchard8776e512007-10-31 17:24:24 -05002705 default:
Harvey Harrisonb8688d52008-03-03 12:59:56 -08002706 vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr);
Hollis Blanchard8776e512007-10-31 17:24:24 -05002707 return 0;
2708 }
Joerg Roedel54e445c2008-04-30 17:56:02 +02002709 KVMTRACE_3D(CR_READ, vcpu, (u32)cr, (u32)value,
2710 (u32)((u64)value >> 32), handler);
2711
2712 return value;
Hollis Blanchard8776e512007-10-31 17:24:24 -05002713}
2714
2715void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val,
2716 unsigned long *rflags)
2717{
Joerg Roedel54e445c2008-04-30 17:56:02 +02002718 KVMTRACE_3D(CR_WRITE, vcpu, (u32)cr, (u32)val,
2719 (u32)((u64)val >> 32), handler);
2720
Hollis Blanchard8776e512007-10-31 17:24:24 -05002721 switch (cr) {
2722 case 0:
Avi Kivity2d3ad1f2008-02-24 11:20:43 +02002723 kvm_set_cr0(vcpu, mk_cr_64(vcpu->arch.cr0, val));
Hollis Blanchard8776e512007-10-31 17:24:24 -05002724 *rflags = kvm_x86_ops->get_rflags(vcpu);
2725 break;
2726 case 2:
Zhang Xiantaoad312c72007-12-13 23:50:52 +08002727 vcpu->arch.cr2 = val;
Hollis Blanchard8776e512007-10-31 17:24:24 -05002728 break;
2729 case 3:
Avi Kivity2d3ad1f2008-02-24 11:20:43 +02002730 kvm_set_cr3(vcpu, val);
Hollis Blanchard8776e512007-10-31 17:24:24 -05002731 break;
2732 case 4:
Avi Kivity2d3ad1f2008-02-24 11:20:43 +02002733 kvm_set_cr4(vcpu, mk_cr_64(vcpu->arch.cr4, val));
Hollis Blanchard8776e512007-10-31 17:24:24 -05002734 break;
Joerg Roedel152ff9b2007-12-06 15:46:52 +01002735 case 8:
Avi Kivity2d3ad1f2008-02-24 11:20:43 +02002736 kvm_set_cr8(vcpu, val & 0xfUL);
Joerg Roedel152ff9b2007-12-06 15:46:52 +01002737 break;
Hollis Blanchard8776e512007-10-31 17:24:24 -05002738 default:
Harvey Harrisonb8688d52008-03-03 12:59:56 -08002739 vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr);
Hollis Blanchard8776e512007-10-31 17:24:24 -05002740 }
2741}
2742
Dan Kenigsberg07716712007-11-21 17:10:04 +02002743static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i)
2744{
Zhang Xiantaoad312c72007-12-13 23:50:52 +08002745 struct kvm_cpuid_entry2 *e = &vcpu->arch.cpuid_entries[i];
2746 int j, nent = vcpu->arch.cpuid_nent;
Dan Kenigsberg07716712007-11-21 17:10:04 +02002747
2748 e->flags &= ~KVM_CPUID_FLAG_STATE_READ_NEXT;
2749 /* when no next entry is found, the current entry[i] is reselected */
2750 for (j = i + 1; j == i; j = (j + 1) % nent) {
Zhang Xiantaoad312c72007-12-13 23:50:52 +08002751 struct kvm_cpuid_entry2 *ej = &vcpu->arch.cpuid_entries[j];
Dan Kenigsberg07716712007-11-21 17:10:04 +02002752 if (ej->function == e->function) {
2753 ej->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT;
2754 return j;
2755 }
2756 }
2757 return 0; /* silence gcc, even though control never reaches here */
2758}
2759
2760/* find an entry with matching function, matching index (if needed), and that
2761 * should be read next (if it's stateful) */
2762static int is_matching_cpuid_entry(struct kvm_cpuid_entry2 *e,
2763 u32 function, u32 index)
2764{
2765 if (e->function != function)
2766 return 0;
2767 if ((e->flags & KVM_CPUID_FLAG_SIGNIFCANT_INDEX) && e->index != index)
2768 return 0;
2769 if ((e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC) &&
2770 !(e->flags & KVM_CPUID_FLAG_STATE_READ_NEXT))
2771 return 0;
2772 return 1;
2773}
2774
Hollis Blanchard8776e512007-10-31 17:24:24 -05002775void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
2776{
2777 int i;
Dan Kenigsberg07716712007-11-21 17:10:04 +02002778 u32 function, index;
2779 struct kvm_cpuid_entry2 *e, *best;
Hollis Blanchard8776e512007-10-31 17:24:24 -05002780
Marcelo Tosatti5fdbf972008-06-27 14:58:02 -03002781 function = kvm_register_read(vcpu, VCPU_REGS_RAX);
2782 index = kvm_register_read(vcpu, VCPU_REGS_RCX);
2783 kvm_register_write(vcpu, VCPU_REGS_RAX, 0);
2784 kvm_register_write(vcpu, VCPU_REGS_RBX, 0);
2785 kvm_register_write(vcpu, VCPU_REGS_RCX, 0);
2786 kvm_register_write(vcpu, VCPU_REGS_RDX, 0);
Hollis Blanchard8776e512007-10-31 17:24:24 -05002787 best = NULL;
Zhang Xiantaoad312c72007-12-13 23:50:52 +08002788 for (i = 0; i < vcpu->arch.cpuid_nent; ++i) {
2789 e = &vcpu->arch.cpuid_entries[i];
Dan Kenigsberg07716712007-11-21 17:10:04 +02002790 if (is_matching_cpuid_entry(e, function, index)) {
2791 if (e->flags & KVM_CPUID_FLAG_STATEFUL_FUNC)
2792 move_to_next_stateful_cpuid_entry(vcpu, i);
Hollis Blanchard8776e512007-10-31 17:24:24 -05002793 best = e;
2794 break;
2795 }
2796 /*
2797 * Both basic or both extended?
2798 */
2799 if (((e->function ^ function) & 0x80000000) == 0)
2800 if (!best || e->function > best->function)
2801 best = e;
2802 }
2803 if (best) {
Marcelo Tosatti5fdbf972008-06-27 14:58:02 -03002804 kvm_register_write(vcpu, VCPU_REGS_RAX, best->eax);
2805 kvm_register_write(vcpu, VCPU_REGS_RBX, best->ebx);
2806 kvm_register_write(vcpu, VCPU_REGS_RCX, best->ecx);
2807 kvm_register_write(vcpu, VCPU_REGS_RDX, best->edx);
Hollis Blanchard8776e512007-10-31 17:24:24 -05002808 }
Hollis Blanchard8776e512007-10-31 17:24:24 -05002809 kvm_x86_ops->skip_emulated_instruction(vcpu);
Feng (Eric) Liu2714d1d2008-04-10 15:31:10 -04002810 KVMTRACE_5D(CPUID, vcpu, function,
Marcelo Tosatti5fdbf972008-06-27 14:58:02 -03002811 (u32)kvm_register_read(vcpu, VCPU_REGS_RAX),
2812 (u32)kvm_register_read(vcpu, VCPU_REGS_RBX),
2813 (u32)kvm_register_read(vcpu, VCPU_REGS_RCX),
2814 (u32)kvm_register_read(vcpu, VCPU_REGS_RDX), handler);
Hollis Blanchard8776e512007-10-31 17:24:24 -05002815}
2816EXPORT_SYMBOL_GPL(kvm_emulate_cpuid);
Hollis Blanchardd0752062007-10-31 17:24:25 -05002817
2818/*
Hollis Blanchardb6c7a5d2007-11-01 14:16:10 -05002819 * Check if userspace requested an interrupt window, and that the
2820 * interrupt window is open.
2821 *
2822 * No need to exit to userspace if we already have an interrupt queued.
2823 */
2824static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu,
2825 struct kvm_run *kvm_run)
2826{
Zhang Xiantaoad312c72007-12-13 23:50:52 +08002827 return (!vcpu->arch.irq_summary &&
Hollis Blanchardb6c7a5d2007-11-01 14:16:10 -05002828 kvm_run->request_interrupt_window &&
Zhang Xiantaoad312c72007-12-13 23:50:52 +08002829 vcpu->arch.interrupt_window_open &&
Hollis Blanchardb6c7a5d2007-11-01 14:16:10 -05002830 (kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF));
2831}
2832
Jan Kiszkac4abb7c2008-09-26 09:30:55 +02002833/*
2834 * Check if userspace requested a NMI window, and that the NMI window
2835 * is open.
2836 *
2837 * No need to exit to userspace if we already have a NMI queued.
2838 */
2839static int dm_request_for_nmi_injection(struct kvm_vcpu *vcpu,
2840 struct kvm_run *kvm_run)
2841{
2842 return (!vcpu->arch.nmi_pending &&
2843 kvm_run->request_nmi_window &&
2844 vcpu->arch.nmi_window_open);
2845}
2846
Hollis Blanchardb6c7a5d2007-11-01 14:16:10 -05002847static void post_kvm_run_save(struct kvm_vcpu *vcpu,
2848 struct kvm_run *kvm_run)
2849{
2850 kvm_run->if_flag = (kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
Avi Kivity2d3ad1f2008-02-24 11:20:43 +02002851 kvm_run->cr8 = kvm_get_cr8(vcpu);
Hollis Blanchardb6c7a5d2007-11-01 14:16:10 -05002852 kvm_run->apic_base = kvm_get_apic_base(vcpu);
Jan Kiszkac4abb7c2008-09-26 09:30:55 +02002853 if (irqchip_in_kernel(vcpu->kvm)) {
Hollis Blanchardb6c7a5d2007-11-01 14:16:10 -05002854 kvm_run->ready_for_interrupt_injection = 1;
Jan Kiszkac4abb7c2008-09-26 09:30:55 +02002855 kvm_run->ready_for_nmi_injection = 1;
2856 } else {
Hollis Blanchardb6c7a5d2007-11-01 14:16:10 -05002857 kvm_run->ready_for_interrupt_injection =
Zhang Xiantaoad312c72007-12-13 23:50:52 +08002858 (vcpu->arch.interrupt_window_open &&
2859 vcpu->arch.irq_summary == 0);
Jan Kiszkac4abb7c2008-09-26 09:30:55 +02002860 kvm_run->ready_for_nmi_injection =
2861 (vcpu->arch.nmi_window_open &&
2862 vcpu->arch.nmi_pending == 0);
2863 }
Hollis Blanchardb6c7a5d2007-11-01 14:16:10 -05002864}
2865
Avi Kivityb93463a2007-10-25 16:52:32 +02002866static void vapic_enter(struct kvm_vcpu *vcpu)
2867{
2868 struct kvm_lapic *apic = vcpu->arch.apic;
2869 struct page *page;
2870
2871 if (!apic || !apic->vapic_addr)
2872 return;
2873
2874 page = gfn_to_page(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT);
Izik Eidus72dc67a2008-02-10 18:04:15 +02002875
2876 vcpu->arch.apic->vapic_page = page;
Avi Kivityb93463a2007-10-25 16:52:32 +02002877}
2878
2879static void vapic_exit(struct kvm_vcpu *vcpu)
2880{
2881 struct kvm_lapic *apic = vcpu->arch.apic;
2882
2883 if (!apic || !apic->vapic_addr)
2884 return;
2885
Marcelo Tosattif8b78fa2008-06-23 12:04:25 -03002886 down_read(&vcpu->kvm->slots_lock);
Avi Kivityb93463a2007-10-25 16:52:32 +02002887 kvm_release_page_dirty(apic->vapic_page);
2888 mark_page_dirty(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT);
Marcelo Tosattif8b78fa2008-06-23 12:04:25 -03002889 up_read(&vcpu->kvm->slots_lock);
Avi Kivityb93463a2007-10-25 16:52:32 +02002890}
2891
Marcelo Tosattid7690172008-09-08 15:23:48 -03002892static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
Hollis Blanchardb6c7a5d2007-11-01 14:16:10 -05002893{
2894 int r;
2895
Marcelo Tosatti2e53d632008-02-20 14:47:24 -05002896 if (vcpu->requests)
2897 if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests))
2898 kvm_mmu_unload(vcpu);
2899
Hollis Blanchardb6c7a5d2007-11-01 14:16:10 -05002900 r = kvm_mmu_reload(vcpu);
2901 if (unlikely(r))
2902 goto out;
2903
Avi Kivity2f52d582008-01-16 12:49:30 +02002904 if (vcpu->requests) {
2905 if (test_and_clear_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests))
Marcelo Tosatti2f599712008-05-27 12:10:20 -03002906 __kvm_migrate_timers(vcpu);
Marcelo Tosatti4731d4c2008-09-23 13:18:39 -03002907 if (test_and_clear_bit(KVM_REQ_MMU_SYNC, &vcpu->requests))
2908 kvm_mmu_sync_roots(vcpu);
Marcelo Tosattid4acf7e2008-06-06 16:37:35 -03002909 if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))
2910 kvm_x86_ops->tlb_flush(vcpu);
Avi Kivityb93463a2007-10-25 16:52:32 +02002911 if (test_and_clear_bit(KVM_REQ_REPORT_TPR_ACCESS,
2912 &vcpu->requests)) {
2913 kvm_run->exit_reason = KVM_EXIT_TPR_ACCESS;
2914 r = 0;
2915 goto out;
2916 }
Joerg Roedel71c4dfa2008-02-26 16:49:16 +01002917 if (test_and_clear_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests)) {
2918 kvm_run->exit_reason = KVM_EXIT_SHUTDOWN;
2919 r = 0;
2920 goto out;
2921 }
Avi Kivity2f52d582008-01-16 12:49:30 +02002922 }
Avi Kivityb93463a2007-10-25 16:52:32 +02002923
Marcelo Tosatti06e05642008-06-06 16:37:36 -03002924 clear_bit(KVM_REQ_PENDING_TIMER, &vcpu->requests);
Hollis Blanchardb6c7a5d2007-11-01 14:16:10 -05002925 kvm_inject_pending_timer_irqs(vcpu);
2926
2927 preempt_disable();
2928
2929 kvm_x86_ops->prepare_guest_switch(vcpu);
2930 kvm_load_guest_fpu(vcpu);
2931
2932 local_irq_disable();
2933
Marcelo Tosattid7690172008-09-08 15:23:48 -03002934 if (vcpu->requests || need_resched() || signal_pending(current)) {
Avi Kivity6c1428012008-01-15 18:27:32 +02002935 local_irq_enable();
2936 preempt_enable();
2937 r = 1;
2938 goto out;
2939 }
2940
Marcelo Tosatti29415c32008-08-01 20:09:13 -03002941 if (vcpu->guest_debug.enabled)
2942 kvm_x86_ops->guest_debug_pre(vcpu);
Hollis Blanchardb6c7a5d2007-11-01 14:16:10 -05002943
Marcelo Tosattie9571ed2008-04-11 15:01:22 -03002944 vcpu->guest_mode = 1;
2945 /*
2946 * Make sure that guest_mode assignment won't happen after
2947 * testing the pending IRQ vector bitmap.
2948 */
2949 smp_wmb();
2950
Zhang Xiantaoad312c72007-12-13 23:50:52 +08002951 if (vcpu->arch.exception.pending)
Avi Kivity298101d2007-11-25 13:41:11 +02002952 __queue_exception(vcpu);
2953 else if (irqchip_in_kernel(vcpu->kvm))
Hollis Blanchardb6c7a5d2007-11-01 14:16:10 -05002954 kvm_x86_ops->inject_pending_irq(vcpu);
Avi Kivityeb9774f2007-11-25 17:45:31 +02002955 else
Hollis Blanchardb6c7a5d2007-11-01 14:16:10 -05002956 kvm_x86_ops->inject_pending_vectors(vcpu, kvm_run);
2957
Avi Kivityb93463a2007-10-25 16:52:32 +02002958 kvm_lapic_sync_to_vapic(vcpu);
2959
Marcelo Tosatti3200f402008-03-29 20:17:59 -03002960 up_read(&vcpu->kvm->slots_lock);
2961
Hollis Blanchardb6c7a5d2007-11-01 14:16:10 -05002962 kvm_guest_enter();
2963
Hollis Blanchardb6c7a5d2007-11-01 14:16:10 -05002964
Feng (Eric) Liu2714d1d2008-04-10 15:31:10 -04002965 KVMTRACE_0D(VMENTRY, vcpu, entryexit);
Hollis Blanchardb6c7a5d2007-11-01 14:16:10 -05002966 kvm_x86_ops->run(vcpu, kvm_run);
2967
2968 vcpu->guest_mode = 0;
2969 local_irq_enable();
2970
2971 ++vcpu->stat.exits;
2972
2973 /*
2974 * We must have an instruction between local_irq_enable() and
2975 * kvm_guest_exit(), so the timer interrupt isn't delayed by
2976 * the interrupt shadow. The stat.exits increment will do nicely.
2977 * But we need to prevent reordering, hence this barrier():
2978 */
2979 barrier();
2980
2981 kvm_guest_exit();
2982
2983 preempt_enable();
2984
Marcelo Tosatti3200f402008-03-29 20:17:59 -03002985 down_read(&vcpu->kvm->slots_lock);
2986
Hollis Blanchardb6c7a5d2007-11-01 14:16:10 -05002987 /*
2988 * Profile KVM exit RIPs:
2989 */
2990 if (unlikely(prof_on == KVM_PROFILING)) {
Marcelo Tosatti5fdbf972008-06-27 14:58:02 -03002991 unsigned long rip = kvm_rip_read(vcpu);
2992 profile_hit(KVM_PROFILING, (void *)rip);
Hollis Blanchardb6c7a5d2007-11-01 14:16:10 -05002993 }
2994
Zhang Xiantaoad312c72007-12-13 23:50:52 +08002995 if (vcpu->arch.exception.pending && kvm_x86_ops->exception_injected(vcpu))
2996 vcpu->arch.exception.pending = false;
Avi Kivity298101d2007-11-25 13:41:11 +02002997
Avi Kivityb93463a2007-10-25 16:52:32 +02002998 kvm_lapic_sync_from_vapic(vcpu);
2999
Hollis Blanchardb6c7a5d2007-11-01 14:16:10 -05003000 r = kvm_x86_ops->handle_exit(kvm_run, vcpu);
Hollis Blanchardb6c7a5d2007-11-01 14:16:10 -05003001out:
Marcelo Tosattid7690172008-09-08 15:23:48 -03003002 return r;
3003}
3004
3005static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3006{
3007 int r;
3008
3009 if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED)) {
Jan Kiszka1b10bf32008-09-30 10:41:06 +02003010 pr_debug("vcpu %d received sipi with vector # %x\n",
3011 vcpu->vcpu_id, vcpu->arch.sipi_vector);
Marcelo Tosattid7690172008-09-08 15:23:48 -03003012 kvm_lapic_reset(vcpu);
Gleb Natapov5f179282008-10-07 15:42:33 +02003013 r = kvm_arch_vcpu_reset(vcpu);
Marcelo Tosattid7690172008-09-08 15:23:48 -03003014 if (r)
3015 return r;
3016 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
Hollis Blanchardb6c7a5d2007-11-01 14:16:10 -05003017 }
3018
Marcelo Tosattid7690172008-09-08 15:23:48 -03003019 down_read(&vcpu->kvm->slots_lock);
3020 vapic_enter(vcpu);
3021
3022 r = 1;
3023 while (r > 0) {
Gleb Natapovaf2152f2008-09-22 14:28:53 +03003024 if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE)
Marcelo Tosattid7690172008-09-08 15:23:48 -03003025 r = vcpu_enter_guest(vcpu, kvm_run);
3026 else {
3027 up_read(&vcpu->kvm->slots_lock);
3028 kvm_vcpu_block(vcpu);
3029 down_read(&vcpu->kvm->slots_lock);
3030 if (test_and_clear_bit(KVM_REQ_UNHALT, &vcpu->requests))
3031 if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED)
3032 vcpu->arch.mp_state =
3033 KVM_MP_STATE_RUNNABLE;
3034 if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE)
3035 r = -EINTR;
3036 }
3037
3038 if (r > 0) {
Jan Kiszkac4abb7c2008-09-26 09:30:55 +02003039 if (dm_request_for_nmi_injection(vcpu, kvm_run)) {
3040 r = -EINTR;
3041 kvm_run->exit_reason = KVM_EXIT_NMI;
3042 ++vcpu->stat.request_nmi_exits;
3043 }
Marcelo Tosattid7690172008-09-08 15:23:48 -03003044 if (dm_request_for_irq_injection(vcpu, kvm_run)) {
3045 r = -EINTR;
3046 kvm_run->exit_reason = KVM_EXIT_INTR;
3047 ++vcpu->stat.request_irq_exits;
3048 }
3049 if (signal_pending(current)) {
3050 r = -EINTR;
3051 kvm_run->exit_reason = KVM_EXIT_INTR;
3052 ++vcpu->stat.signal_exits;
3053 }
3054 if (need_resched()) {
3055 up_read(&vcpu->kvm->slots_lock);
3056 kvm_resched(vcpu);
3057 down_read(&vcpu->kvm->slots_lock);
3058 }
3059 }
3060 }
3061
3062 up_read(&vcpu->kvm->slots_lock);
Hollis Blanchardb6c7a5d2007-11-01 14:16:10 -05003063 post_kvm_run_save(vcpu, kvm_run);
3064
Avi Kivityb93463a2007-10-25 16:52:32 +02003065 vapic_exit(vcpu);
3066
Hollis Blanchardb6c7a5d2007-11-01 14:16:10 -05003067 return r;
3068}
3069
3070int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3071{
3072 int r;
3073 sigset_t sigsaved;
3074
3075 vcpu_load(vcpu);
3076
Hollis Blanchardb6c7a5d2007-11-01 14:16:10 -05003077 if (vcpu->sigset_active)
3078 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
3079
Avi Kivityac9f6dc2008-07-06 15:48:31 +03003080 if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
3081 kvm_vcpu_block(vcpu);
Marcelo Tosattid7690172008-09-08 15:23:48 -03003082 clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
Avi Kivityac9f6dc2008-07-06 15:48:31 +03003083 r = -EAGAIN;
3084 goto out;
3085 }
3086
Hollis Blanchardb6c7a5d2007-11-01 14:16:10 -05003087 /* re-sync apic's tpr */
3088 if (!irqchip_in_kernel(vcpu->kvm))
Avi Kivity2d3ad1f2008-02-24 11:20:43 +02003089 kvm_set_cr8(vcpu, kvm_run->cr8);
Hollis Blanchardb6c7a5d2007-11-01 14:16:10 -05003090
Zhang Xiantaoad312c72007-12-13 23:50:52 +08003091 if (vcpu->arch.pio.cur_count) {
Hollis Blanchardb6c7a5d2007-11-01 14:16:10 -05003092 r = complete_pio(vcpu);
3093 if (r)
3094 goto out;
3095 }
3096#if CONFIG_HAS_IOMEM
3097 if (vcpu->mmio_needed) {
3098 memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8);
3099 vcpu->mmio_read_completed = 1;
3100 vcpu->mmio_needed = 0;
Marcelo Tosatti3200f402008-03-29 20:17:59 -03003101
3102 down_read(&vcpu->kvm->slots_lock);
Hollis Blanchardb6c7a5d2007-11-01 14:16:10 -05003103 r = emulate_instruction(vcpu, kvm_run,
Sheng Yang571008d2008-01-02 14:49:22 +08003104 vcpu->arch.mmio_fault_cr2, 0,
3105 EMULTYPE_NO_DECODE);
Marcelo Tosatti3200f402008-03-29 20:17:59 -03003106 up_read(&vcpu->kvm->slots_lock);
Hollis Blanchardb6c7a5d2007-11-01 14:16:10 -05003107 if (r == EMULATE_DO_MMIO) {
3108 /*
3109 * Read-modify-write. Back to userspace.
3110 */
3111 r = 0;
3112 goto out;
3113 }
3114 }
3115#endif
Marcelo Tosatti5fdbf972008-06-27 14:58:02 -03003116 if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL)
3117 kvm_register_write(vcpu, VCPU_REGS_RAX,
3118 kvm_run->hypercall.ret);
Hollis Blanchardb6c7a5d2007-11-01 14:16:10 -05003119
3120 r = __vcpu_run(vcpu, kvm_run);
3121
3122out:
3123 if (vcpu->sigset_active)
3124 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
3125
3126 vcpu_put(vcpu);
3127 return r;
3128}
3129
3130int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3131{
3132 vcpu_load(vcpu);
3133
Marcelo Tosatti5fdbf972008-06-27 14:58:02 -03003134 regs->rax = kvm_register_read(vcpu, VCPU_REGS_RAX);
3135 regs->rbx = kvm_register_read(vcpu, VCPU_REGS_RBX);
3136 regs->rcx = kvm_register_read(vcpu, VCPU_REGS_RCX);
3137 regs->rdx = kvm_register_read(vcpu, VCPU_REGS_RDX);
3138 regs->rsi = kvm_register_read(vcpu, VCPU_REGS_RSI);
3139 regs->rdi = kvm_register_read(vcpu, VCPU_REGS_RDI);
3140 regs->rsp = kvm_register_read(vcpu, VCPU_REGS_RSP);
3141 regs->rbp = kvm_register_read(vcpu, VCPU_REGS_RBP);
Hollis Blanchardb6c7a5d2007-11-01 14:16:10 -05003142#ifdef CONFIG_X86_64
Marcelo Tosatti5fdbf972008-06-27 14:58:02 -03003143 regs->r8 = kvm_register_read(vcpu, VCPU_REGS_R8);
3144 regs->r9 = kvm_register_read(vcpu, VCPU_REGS_R9);
3145 regs->r10 = kvm_register_read(vcpu, VCPU_REGS_R10);
3146 regs->r11 = kvm_register_read(vcpu, VCPU_REGS_R11);
3147 regs->r12 = kvm_register_read(vcpu, VCPU_REGS_R12);
3148 regs->r13 = kvm_register_read(vcpu, VCPU_REGS_R13);
3149 regs->r14 = kvm_register_read(vcpu, VCPU_REGS_R14);
3150 regs->r15 = kvm_register_read(vcpu, VCPU_REGS_R15);
Hollis Blanchardb6c7a5d2007-11-01 14:16:10 -05003151#endif
3152
Marcelo Tosatti5fdbf972008-06-27 14:58:02 -03003153 regs->rip = kvm_rip_read(vcpu);
Hollis Blanchardb6c7a5d2007-11-01 14:16:10 -05003154 regs->rflags = kvm_x86_ops->get_rflags(vcpu);
3155
3156 /*
3157 * Don't leak debug flags in case they were set for guest debugging
3158 */
3159 if (vcpu->guest_debug.enabled && vcpu->guest_debug.singlestep)
3160 regs->rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
3161
3162 vcpu_put(vcpu);
3163
3164 return 0;
3165}
3166
3167int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
3168{
3169 vcpu_load(vcpu);
3170
Marcelo Tosatti5fdbf972008-06-27 14:58:02 -03003171 kvm_register_write(vcpu, VCPU_REGS_RAX, regs->rax);
3172 kvm_register_write(vcpu, VCPU_REGS_RBX, regs->rbx);
3173 kvm_register_write(vcpu, VCPU_REGS_RCX, regs->rcx);
3174 kvm_register_write(vcpu, VCPU_REGS_RDX, regs->rdx);
3175 kvm_register_write(vcpu, VCPU_REGS_RSI, regs->rsi);
3176 kvm_register_write(vcpu, VCPU_REGS_RDI, regs->rdi);
3177 kvm_register_write(vcpu, VCPU_REGS_RSP, regs->rsp);
3178 kvm_register_write(vcpu, VCPU_REGS_RBP, regs->rbp);
Hollis Blanchardb6c7a5d2007-11-01 14:16:10 -05003179#ifdef CONFIG_X86_64
Marcelo Tosatti5fdbf972008-06-27 14:58:02 -03003180 kvm_register_write(vcpu, VCPU_REGS_R8, regs->r8);
3181 kvm_register_write(vcpu, VCPU_REGS_R9, regs->r9);
3182 kvm_register_write(vcpu, VCPU_REGS_R10, regs->r10);
3183 kvm_register_write(vcpu, VCPU_REGS_R11, regs->r11);
3184 kvm_register_write(vcpu, VCPU_REGS_R12, regs->r12);
3185 kvm_register_write(vcpu, VCPU_REGS_R13, regs->r13);
3186 kvm_register_write(vcpu, VCPU_REGS_R14, regs->r14);
3187 kvm_register_write(vcpu, VCPU_REGS_R15, regs->r15);
3188
Hollis Blanchardb6c7a5d2007-11-01 14:16:10 -05003189#endif
3190
Marcelo Tosatti5fdbf972008-06-27 14:58:02 -03003191 kvm_rip_write(vcpu, regs->rip);
Hollis Blanchardb6c7a5d2007-11-01 14:16:10 -05003192 kvm_x86_ops->set_rflags(vcpu, regs->rflags);
3193
Hollis Blanchardb6c7a5d2007-11-01 14:16:10 -05003194
Jan Kiszkab4f14ab2008-04-30 17:59:04 +02003195 vcpu->arch.exception.pending = false;
3196
Hollis Blanchardb6c7a5d2007-11-01 14:16:10 -05003197 vcpu_put(vcpu);
3198
3199 return 0;
3200}
3201
Guillaume Thouvenin3e6e0aa2008-05-27 10:18:46 +02003202void kvm_get_segment(struct kvm_vcpu *vcpu,
3203 struct kvm_segment *var, int seg)
Hollis Blanchardb6c7a5d2007-11-01 14:16:10 -05003204{
Harvey Harrison14af3f32008-02-19 10:25:50 -08003205 kvm_x86_ops->get_segment(vcpu, var, seg);
Hollis Blanchardb6c7a5d2007-11-01 14:16:10 -05003206}
3207
3208void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
3209{
3210 struct kvm_segment cs;
3211
Guillaume Thouvenin3e6e0aa2008-05-27 10:18:46 +02003212 kvm_get_segment(vcpu, &cs, VCPU_SREG_CS);
Hollis Blanchardb6c7a5d2007-11-01 14:16:10 -05003213 *db = cs.db;
3214 *l = cs.l;
3215}
3216EXPORT_SYMBOL_GPL(kvm_get_cs_db_l_bits);
3217
3218int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
3219 struct kvm_sregs *sregs)
3220{
3221 struct descriptor_table dt;
3222 int pending_vec;
3223
3224 vcpu_load(vcpu);
3225
Guillaume Thouvenin3e6e0aa2008-05-27 10:18:46 +02003226 kvm_get_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
3227 kvm_get_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
3228 kvm_get_segment(vcpu, &sregs->es, VCPU_SREG_ES);
3229 kvm_get_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
3230 kvm_get_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
3231 kvm_get_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
Hollis Blanchardb6c7a5d2007-11-01 14:16:10 -05003232
Guillaume Thouvenin3e6e0aa2008-05-27 10:18:46 +02003233 kvm_get_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
3234 kvm_get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
Hollis Blanchardb6c7a5d2007-11-01 14:16:10 -05003235
3236 kvm_x86_ops->get_idt(vcpu, &dt);
3237 sregs->idt.limit = dt.limit;
3238 sregs->idt.base = dt.base;
3239 kvm_x86_ops->get_gdt(vcpu, &dt);
3240 sregs->gdt.limit = dt.limit;
3241 sregs->gdt.base = dt.base;
3242
3243 kvm_x86_ops->decache_cr4_guest_bits(vcpu);
Zhang Xiantaoad312c72007-12-13 23:50:52 +08003244 sregs->cr0 = vcpu->arch.cr0;
3245 sregs->cr2 = vcpu->arch.cr2;
3246 sregs->cr3 = vcpu->arch.cr3;
3247 sregs->cr4 = vcpu->arch.cr4;
Avi Kivity2d3ad1f2008-02-24 11:20:43 +02003248 sregs->cr8 = kvm_get_cr8(vcpu);
Zhang Xiantaoad312c72007-12-13 23:50:52 +08003249 sregs->efer = vcpu->arch.shadow_efer;
Hollis Blanchardb6c7a5d2007-11-01 14:16:10 -05003250 sregs->apic_base = kvm_get_apic_base(vcpu);
3251
3252 if (irqchip_in_kernel(vcpu->kvm)) {
3253 memset(sregs->interrupt_bitmap, 0,
3254 sizeof sregs->interrupt_bitmap);
3255 pending_vec = kvm_x86_ops->get_irq(vcpu);
3256 if (pending_vec >= 0)
3257 set_bit(pending_vec,
3258 (unsigned long *)sregs->interrupt_bitmap);
3259 } else
Zhang Xiantaoad312c72007-12-13 23:50:52 +08003260 memcpy(sregs->interrupt_bitmap, vcpu->arch.irq_pending,
Hollis Blanchardb6c7a5d2007-11-01 14:16:10 -05003261 sizeof sregs->interrupt_bitmap);
3262
3263 vcpu_put(vcpu);
3264
3265 return 0;
3266}
3267
Marcelo Tosatti62d9f0d2008-04-11 13:24:45 -03003268int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
3269 struct kvm_mp_state *mp_state)
3270{
3271 vcpu_load(vcpu);
3272 mp_state->mp_state = vcpu->arch.mp_state;
3273 vcpu_put(vcpu);
3274 return 0;
3275}
3276
3277int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
3278 struct kvm_mp_state *mp_state)
3279{
3280 vcpu_load(vcpu);
3281 vcpu->arch.mp_state = mp_state->mp_state;
3282 vcpu_put(vcpu);
3283 return 0;
3284}
3285
Guillaume Thouvenin3e6e0aa2008-05-27 10:18:46 +02003286static void kvm_set_segment(struct kvm_vcpu *vcpu,
Hollis Blanchardb6c7a5d2007-11-01 14:16:10 -05003287 struct kvm_segment *var, int seg)
3288{
Harvey Harrison14af3f32008-02-19 10:25:50 -08003289 kvm_x86_ops->set_segment(vcpu, var, seg);
Hollis Blanchardb6c7a5d2007-11-01 14:16:10 -05003290}
3291
Izik Eidus37817f22008-03-24 23:14:53 +02003292static void seg_desct_to_kvm_desct(struct desc_struct *seg_desc, u16 selector,
3293 struct kvm_segment *kvm_desct)
3294{
3295 kvm_desct->base = seg_desc->base0;
3296 kvm_desct->base |= seg_desc->base1 << 16;
3297 kvm_desct->base |= seg_desc->base2 << 24;
3298 kvm_desct->limit = seg_desc->limit0;
3299 kvm_desct->limit |= seg_desc->limit << 16;
Marcelo Tosattic93cd3a2008-07-19 19:08:07 -03003300 if (seg_desc->g) {
3301 kvm_desct->limit <<= 12;
3302 kvm_desct->limit |= 0xfff;
3303 }
Izik Eidus37817f22008-03-24 23:14:53 +02003304 kvm_desct->selector = selector;
3305 kvm_desct->type = seg_desc->type;
3306 kvm_desct->present = seg_desc->p;
3307 kvm_desct->dpl = seg_desc->dpl;
3308 kvm_desct->db = seg_desc->d;
3309 kvm_desct->s = seg_desc->s;
3310 kvm_desct->l = seg_desc->l;
3311 kvm_desct->g = seg_desc->g;
3312 kvm_desct->avl = seg_desc->avl;
3313 if (!selector)
3314 kvm_desct->unusable = 1;
3315 else
3316 kvm_desct->unusable = 0;
3317 kvm_desct->padding = 0;
3318}
3319
3320static void get_segment_descritptor_dtable(struct kvm_vcpu *vcpu,
3321 u16 selector,
3322 struct descriptor_table *dtable)
3323{
3324 if (selector & 1 << 2) {
3325 struct kvm_segment kvm_seg;
3326
Guillaume Thouvenin3e6e0aa2008-05-27 10:18:46 +02003327 kvm_get_segment(vcpu, &kvm_seg, VCPU_SREG_LDTR);
Izik Eidus37817f22008-03-24 23:14:53 +02003328
3329 if (kvm_seg.unusable)
3330 dtable->limit = 0;
3331 else
3332 dtable->limit = kvm_seg.limit;
3333 dtable->base = kvm_seg.base;
3334 }
3335 else
3336 kvm_x86_ops->get_gdt(vcpu, dtable);
3337}
3338
3339/* allowed just for 8 bytes segments */
3340static int load_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
3341 struct desc_struct *seg_desc)
3342{
Marcelo Tosatti98899aa2008-07-16 19:07:10 -03003343 gpa_t gpa;
Izik Eidus37817f22008-03-24 23:14:53 +02003344 struct descriptor_table dtable;
3345 u16 index = selector >> 3;
3346
3347 get_segment_descritptor_dtable(vcpu, selector, &dtable);
3348
3349 if (dtable.limit < index * 8 + 7) {
3350 kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc);
3351 return 1;
3352 }
Marcelo Tosatti98899aa2008-07-16 19:07:10 -03003353 gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, dtable.base);
3354 gpa += index * 8;
3355 return kvm_read_guest(vcpu->kvm, gpa, seg_desc, 8);
Izik Eidus37817f22008-03-24 23:14:53 +02003356}
3357
3358/* allowed just for 8 bytes segments */
3359static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
3360 struct desc_struct *seg_desc)
3361{
Marcelo Tosatti98899aa2008-07-16 19:07:10 -03003362 gpa_t gpa;
Izik Eidus37817f22008-03-24 23:14:53 +02003363 struct descriptor_table dtable;
3364 u16 index = selector >> 3;
3365
3366 get_segment_descritptor_dtable(vcpu, selector, &dtable);
3367
3368 if (dtable.limit < index * 8 + 7)
3369 return 1;
Marcelo Tosatti98899aa2008-07-16 19:07:10 -03003370 gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, dtable.base);
3371 gpa += index * 8;
3372 return kvm_write_guest(vcpu->kvm, gpa, seg_desc, 8);
Izik Eidus37817f22008-03-24 23:14:53 +02003373}
3374
3375static u32 get_tss_base_addr(struct kvm_vcpu *vcpu,
3376 struct desc_struct *seg_desc)
3377{
3378 u32 base_addr;
3379
3380 base_addr = seg_desc->base0;
3381 base_addr |= (seg_desc->base1 << 16);
3382 base_addr |= (seg_desc->base2 << 24);
3383
Marcelo Tosatti98899aa2008-07-16 19:07:10 -03003384 return vcpu->arch.mmu.gva_to_gpa(vcpu, base_addr);
Izik Eidus37817f22008-03-24 23:14:53 +02003385}
3386
Izik Eidus37817f22008-03-24 23:14:53 +02003387static u16 get_segment_selector(struct kvm_vcpu *vcpu, int seg)
3388{
3389 struct kvm_segment kvm_seg;
3390
Guillaume Thouvenin3e6e0aa2008-05-27 10:18:46 +02003391 kvm_get_segment(vcpu, &kvm_seg, seg);
Izik Eidus37817f22008-03-24 23:14:53 +02003392 return kvm_seg.selector;
3393}
3394
3395static int load_segment_descriptor_to_kvm_desct(struct kvm_vcpu *vcpu,
3396 u16 selector,
3397 struct kvm_segment *kvm_seg)
3398{
3399 struct desc_struct seg_desc;
3400
3401 if (load_guest_segment_descriptor(vcpu, selector, &seg_desc))
3402 return 1;
3403 seg_desct_to_kvm_desct(&seg_desc, selector, kvm_seg);
3404 return 0;
3405}
3406
Harvey Harrison2259e3a2008-08-22 13:29:17 -07003407static int kvm_load_realmode_segment(struct kvm_vcpu *vcpu, u16 selector, int seg)
Avi Kivityf4bbd9a2008-08-20 15:51:42 +03003408{
3409 struct kvm_segment segvar = {
3410 .base = selector << 4,
3411 .limit = 0xffff,
3412 .selector = selector,
3413 .type = 3,
3414 .present = 1,
3415 .dpl = 3,
3416 .db = 0,
3417 .s = 1,
3418 .l = 0,
3419 .g = 0,
3420 .avl = 0,
3421 .unusable = 0,
3422 };
3423 kvm_x86_ops->set_segment(vcpu, &segvar, seg);
3424 return 0;
3425}
3426
Guillaume Thouvenin3e6e0aa2008-05-27 10:18:46 +02003427int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
3428 int type_bits, int seg)
Izik Eidus37817f22008-03-24 23:14:53 +02003429{
3430 struct kvm_segment kvm_seg;
3431
Avi Kivityf4bbd9a2008-08-20 15:51:42 +03003432 if (!(vcpu->arch.cr0 & X86_CR0_PE))
3433 return kvm_load_realmode_segment(vcpu, selector, seg);
Izik Eidus37817f22008-03-24 23:14:53 +02003434 if (load_segment_descriptor_to_kvm_desct(vcpu, selector, &kvm_seg))
3435 return 1;
3436 kvm_seg.type |= type_bits;
3437
3438 if (seg != VCPU_SREG_SS && seg != VCPU_SREG_CS &&
3439 seg != VCPU_SREG_LDTR)
3440 if (!kvm_seg.s)
3441 kvm_seg.unusable = 1;
3442
Guillaume Thouvenin3e6e0aa2008-05-27 10:18:46 +02003443 kvm_set_segment(vcpu, &kvm_seg, seg);
Izik Eidus37817f22008-03-24 23:14:53 +02003444 return 0;
3445}
3446
3447static void save_state_to_tss32(struct kvm_vcpu *vcpu,
3448 struct tss_segment_32 *tss)
3449{
3450 tss->cr3 = vcpu->arch.cr3;
Marcelo Tosatti5fdbf972008-06-27 14:58:02 -03003451 tss->eip = kvm_rip_read(vcpu);
Izik Eidus37817f22008-03-24 23:14:53 +02003452 tss->eflags = kvm_x86_ops->get_rflags(vcpu);
Marcelo Tosatti5fdbf972008-06-27 14:58:02 -03003453 tss->eax = kvm_register_read(vcpu, VCPU_REGS_RAX);
3454 tss->ecx = kvm_register_read(vcpu, VCPU_REGS_RCX);
3455 tss->edx = kvm_register_read(vcpu, VCPU_REGS_RDX);
3456 tss->ebx = kvm_register_read(vcpu, VCPU_REGS_RBX);
3457 tss->esp = kvm_register_read(vcpu, VCPU_REGS_RSP);
3458 tss->ebp = kvm_register_read(vcpu, VCPU_REGS_RBP);
3459 tss->esi = kvm_register_read(vcpu, VCPU_REGS_RSI);
3460 tss->edi = kvm_register_read(vcpu, VCPU_REGS_RDI);
Izik Eidus37817f22008-03-24 23:14:53 +02003461 tss->es = get_segment_selector(vcpu, VCPU_SREG_ES);
3462 tss->cs = get_segment_selector(vcpu, VCPU_SREG_CS);
3463 tss->ss = get_segment_selector(vcpu, VCPU_SREG_SS);
3464 tss->ds = get_segment_selector(vcpu, VCPU_SREG_DS);
3465 tss->fs = get_segment_selector(vcpu, VCPU_SREG_FS);
3466 tss->gs = get_segment_selector(vcpu, VCPU_SREG_GS);
3467 tss->ldt_selector = get_segment_selector(vcpu, VCPU_SREG_LDTR);
3468 tss->prev_task_link = get_segment_selector(vcpu, VCPU_SREG_TR);
3469}
3470
3471static int load_state_from_tss32(struct kvm_vcpu *vcpu,
3472 struct tss_segment_32 *tss)
3473{
3474 kvm_set_cr3(vcpu, tss->cr3);
3475
Marcelo Tosatti5fdbf972008-06-27 14:58:02 -03003476 kvm_rip_write(vcpu, tss->eip);
Izik Eidus37817f22008-03-24 23:14:53 +02003477 kvm_x86_ops->set_rflags(vcpu, tss->eflags | 2);
3478
Marcelo Tosatti5fdbf972008-06-27 14:58:02 -03003479 kvm_register_write(vcpu, VCPU_REGS_RAX, tss->eax);
3480 kvm_register_write(vcpu, VCPU_REGS_RCX, tss->ecx);
3481 kvm_register_write(vcpu, VCPU_REGS_RDX, tss->edx);
3482 kvm_register_write(vcpu, VCPU_REGS_RBX, tss->ebx);
3483 kvm_register_write(vcpu, VCPU_REGS_RSP, tss->esp);
3484 kvm_register_write(vcpu, VCPU_REGS_RBP, tss->ebp);
3485 kvm_register_write(vcpu, VCPU_REGS_RSI, tss->esi);
3486 kvm_register_write(vcpu, VCPU_REGS_RDI, tss->edi);
Izik Eidus37817f22008-03-24 23:14:53 +02003487
Guillaume Thouvenin3e6e0aa2008-05-27 10:18:46 +02003488 if (kvm_load_segment_descriptor(vcpu, tss->ldt_selector, 0, VCPU_SREG_LDTR))
Izik Eidus37817f22008-03-24 23:14:53 +02003489 return 1;
3490
Guillaume Thouvenin3e6e0aa2008-05-27 10:18:46 +02003491 if (kvm_load_segment_descriptor(vcpu, tss->es, 1, VCPU_SREG_ES))
Izik Eidus37817f22008-03-24 23:14:53 +02003492 return 1;
3493
Guillaume Thouvenin3e6e0aa2008-05-27 10:18:46 +02003494 if (kvm_load_segment_descriptor(vcpu, tss->cs, 9, VCPU_SREG_CS))
Izik Eidus37817f22008-03-24 23:14:53 +02003495 return 1;
3496
Guillaume Thouvenin3e6e0aa2008-05-27 10:18:46 +02003497 if (kvm_load_segment_descriptor(vcpu, tss->ss, 1, VCPU_SREG_SS))
Izik Eidus37817f22008-03-24 23:14:53 +02003498 return 1;
3499
Guillaume Thouvenin3e6e0aa2008-05-27 10:18:46 +02003500 if (kvm_load_segment_descriptor(vcpu, tss->ds, 1, VCPU_SREG_DS))
Izik Eidus37817f22008-03-24 23:14:53 +02003501 return 1;
3502
Guillaume Thouvenin3e6e0aa2008-05-27 10:18:46 +02003503 if (kvm_load_segment_descriptor(vcpu, tss->fs, 1, VCPU_SREG_FS))
Izik Eidus37817f22008-03-24 23:14:53 +02003504 return 1;
3505
Guillaume Thouvenin3e6e0aa2008-05-27 10:18:46 +02003506 if (kvm_load_segment_descriptor(vcpu, tss->gs, 1, VCPU_SREG_GS))
Izik Eidus37817f22008-03-24 23:14:53 +02003507 return 1;
3508 return 0;
3509}
3510
3511static void save_state_to_tss16(struct kvm_vcpu *vcpu,
3512 struct tss_segment_16 *tss)
3513{
Marcelo Tosatti5fdbf972008-06-27 14:58:02 -03003514 tss->ip = kvm_rip_read(vcpu);
Izik Eidus37817f22008-03-24 23:14:53 +02003515 tss->flag = kvm_x86_ops->get_rflags(vcpu);
Marcelo Tosatti5fdbf972008-06-27 14:58:02 -03003516 tss->ax = kvm_register_read(vcpu, VCPU_REGS_RAX);
3517 tss->cx = kvm_register_read(vcpu, VCPU_REGS_RCX);
3518 tss->dx = kvm_register_read(vcpu, VCPU_REGS_RDX);
3519 tss->bx = kvm_register_read(vcpu, VCPU_REGS_RBX);
3520 tss->sp = kvm_register_read(vcpu, VCPU_REGS_RSP);
3521 tss->bp = kvm_register_read(vcpu, VCPU_REGS_RBP);
3522 tss->si = kvm_register_read(vcpu, VCPU_REGS_RSI);
3523 tss->di = kvm_register_read(vcpu, VCPU_REGS_RDI);
Izik Eidus37817f22008-03-24 23:14:53 +02003524
3525 tss->es = get_segment_selector(vcpu, VCPU_SREG_ES);
3526 tss->cs = get_segment_selector(vcpu, VCPU_SREG_CS);
3527 tss->ss = get_segment_selector(vcpu, VCPU_SREG_SS);
3528 tss->ds = get_segment_selector(vcpu, VCPU_SREG_DS);
3529 tss->ldt = get_segment_selector(vcpu, VCPU_SREG_LDTR);
3530 tss->prev_task_link = get_segment_selector(vcpu, VCPU_SREG_TR);
3531}
3532
3533static int load_state_from_tss16(struct kvm_vcpu *vcpu,
3534 struct tss_segment_16 *tss)
3535{
Marcelo Tosatti5fdbf972008-06-27 14:58:02 -03003536 kvm_rip_write(vcpu, tss->ip);
Izik Eidus37817f22008-03-24 23:14:53 +02003537 kvm_x86_ops->set_rflags(vcpu, tss->flag | 2);
Marcelo Tosatti5fdbf972008-06-27 14:58:02 -03003538 kvm_register_write(vcpu, VCPU_REGS_RAX, tss->ax);
3539 kvm_register_write(vcpu, VCPU_REGS_RCX, tss->cx);
3540 kvm_register_write(vcpu, VCPU_REGS_RDX, tss->dx);
3541 kvm_register_write(vcpu, VCPU_REGS_RBX, tss->bx);
3542 kvm_register_write(vcpu, VCPU_REGS_RSP, tss->sp);
3543 kvm_register_write(vcpu, VCPU_REGS_RBP, tss->bp);
3544 kvm_register_write(vcpu, VCPU_REGS_RSI, tss->si);
3545 kvm_register_write(vcpu, VCPU_REGS_RDI, tss->di);
Izik Eidus37817f22008-03-24 23:14:53 +02003546
Guillaume Thouvenin3e6e0aa2008-05-27 10:18:46 +02003547 if (kvm_load_segment_descriptor(vcpu, tss->ldt, 0, VCPU_SREG_LDTR))
Izik Eidus37817f22008-03-24 23:14:53 +02003548 return 1;
3549
Guillaume Thouvenin3e6e0aa2008-05-27 10:18:46 +02003550 if (kvm_load_segment_descriptor(vcpu, tss->es, 1, VCPU_SREG_ES))
Izik Eidus37817f22008-03-24 23:14:53 +02003551 return 1;
3552
Guillaume Thouvenin3e6e0aa2008-05-27 10:18:46 +02003553 if (kvm_load_segment_descriptor(vcpu, tss->cs, 9, VCPU_SREG_CS))
Izik Eidus37817f22008-03-24 23:14:53 +02003554 return 1;
3555
Guillaume Thouvenin3e6e0aa2008-05-27 10:18:46 +02003556 if (kvm_load_segment_descriptor(vcpu, tss->ss, 1, VCPU_SREG_SS))
Izik Eidus37817f22008-03-24 23:14:53 +02003557 return 1;
3558
Guillaume Thouvenin3e6e0aa2008-05-27 10:18:46 +02003559 if (kvm_load_segment_descriptor(vcpu, tss->ds, 1, VCPU_SREG_DS))
Izik Eidus37817f22008-03-24 23:14:53 +02003560 return 1;
3561 return 0;
3562}
3563
Harvey Harrison8b2cf732008-04-27 12:14:13 -07003564static int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector,
Marcelo Tosatti34198bf82008-07-16 19:07:11 -03003565 u32 old_tss_base,
Izik Eidus37817f22008-03-24 23:14:53 +02003566 struct desc_struct *nseg_desc)
3567{
3568 struct tss_segment_16 tss_segment_16;
3569 int ret = 0;
3570
Marcelo Tosatti34198bf82008-07-16 19:07:11 -03003571 if (kvm_read_guest(vcpu->kvm, old_tss_base, &tss_segment_16,
3572 sizeof tss_segment_16))
Izik Eidus37817f22008-03-24 23:14:53 +02003573 goto out;
3574
3575 save_state_to_tss16(vcpu, &tss_segment_16);
Izik Eidus37817f22008-03-24 23:14:53 +02003576
Marcelo Tosatti34198bf82008-07-16 19:07:11 -03003577 if (kvm_write_guest(vcpu->kvm, old_tss_base, &tss_segment_16,
3578 sizeof tss_segment_16))
Izik Eidus37817f22008-03-24 23:14:53 +02003579 goto out;
Marcelo Tosatti34198bf82008-07-16 19:07:11 -03003580
3581 if (kvm_read_guest(vcpu->kvm, get_tss_base_addr(vcpu, nseg_desc),
3582 &tss_segment_16, sizeof tss_segment_16))
3583 goto out;
3584
Izik Eidus37817f22008-03-24 23:14:53 +02003585 if (load_state_from_tss16(vcpu, &tss_segment_16))
3586 goto out;
3587
3588 ret = 1;
3589out:
3590 return ret;
3591}
3592
Harvey Harrison8b2cf732008-04-27 12:14:13 -07003593static int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector,
Marcelo Tosatti34198bf82008-07-16 19:07:11 -03003594 u32 old_tss_base,
Izik Eidus37817f22008-03-24 23:14:53 +02003595 struct desc_struct *nseg_desc)
3596{
3597 struct tss_segment_32 tss_segment_32;
3598 int ret = 0;
3599
Marcelo Tosatti34198bf82008-07-16 19:07:11 -03003600 if (kvm_read_guest(vcpu->kvm, old_tss_base, &tss_segment_32,
3601 sizeof tss_segment_32))
Izik Eidus37817f22008-03-24 23:14:53 +02003602 goto out;
3603
3604 save_state_to_tss32(vcpu, &tss_segment_32);
Izik Eidus37817f22008-03-24 23:14:53 +02003605
Marcelo Tosatti34198bf82008-07-16 19:07:11 -03003606 if (kvm_write_guest(vcpu->kvm, old_tss_base, &tss_segment_32,
3607 sizeof tss_segment_32))
Izik Eidus37817f22008-03-24 23:14:53 +02003608 goto out;
Marcelo Tosatti34198bf82008-07-16 19:07:11 -03003609
3610 if (kvm_read_guest(vcpu->kvm, get_tss_base_addr(vcpu, nseg_desc),
3611 &tss_segment_32, sizeof tss_segment_32))
3612 goto out;
3613
Izik Eidus37817f22008-03-24 23:14:53 +02003614 if (load_state_from_tss32(vcpu, &tss_segment_32))
3615 goto out;
3616
3617 ret = 1;
3618out:
3619 return ret;
3620}
3621
3622int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
3623{
3624 struct kvm_segment tr_seg;
3625 struct desc_struct cseg_desc;
3626 struct desc_struct nseg_desc;
3627 int ret = 0;
Marcelo Tosatti34198bf82008-07-16 19:07:11 -03003628 u32 old_tss_base = get_segment_base(vcpu, VCPU_SREG_TR);
3629 u16 old_tss_sel = get_segment_selector(vcpu, VCPU_SREG_TR);
Izik Eidus37817f22008-03-24 23:14:53 +02003630
Marcelo Tosatti34198bf82008-07-16 19:07:11 -03003631 old_tss_base = vcpu->arch.mmu.gva_to_gpa(vcpu, old_tss_base);
Izik Eidus37817f22008-03-24 23:14:53 +02003632
Marcelo Tosatti34198bf82008-07-16 19:07:11 -03003633 /* FIXME: Handle errors. Failure to read either TSS or their
3634 * descriptors should generate a pagefault.
3635 */
Izik Eidus37817f22008-03-24 23:14:53 +02003636 if (load_guest_segment_descriptor(vcpu, tss_selector, &nseg_desc))
3637 goto out;
3638
Marcelo Tosatti34198bf82008-07-16 19:07:11 -03003639 if (load_guest_segment_descriptor(vcpu, old_tss_sel, &cseg_desc))
Izik Eidus37817f22008-03-24 23:14:53 +02003640 goto out;
3641
Izik Eidus37817f22008-03-24 23:14:53 +02003642 if (reason != TASK_SWITCH_IRET) {
3643 int cpl;
3644
3645 cpl = kvm_x86_ops->get_cpl(vcpu);
3646 if ((tss_selector & 3) > nseg_desc.dpl || cpl > nseg_desc.dpl) {
3647 kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
3648 return 1;
3649 }
3650 }
3651
3652 if (!nseg_desc.p || (nseg_desc.limit0 | nseg_desc.limit << 16) < 0x67) {
3653 kvm_queue_exception_e(vcpu, TS_VECTOR, tss_selector & 0xfffc);
3654 return 1;
3655 }
3656
3657 if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) {
Izik Eidus3fe913e2008-04-28 18:23:52 +03003658 cseg_desc.type &= ~(1 << 1); //clear the B flag
Marcelo Tosatti34198bf82008-07-16 19:07:11 -03003659 save_guest_segment_descriptor(vcpu, old_tss_sel, &cseg_desc);
Izik Eidus37817f22008-03-24 23:14:53 +02003660 }
3661
3662 if (reason == TASK_SWITCH_IRET) {
3663 u32 eflags = kvm_x86_ops->get_rflags(vcpu);
3664 kvm_x86_ops->set_rflags(vcpu, eflags & ~X86_EFLAGS_NT);
3665 }
3666
3667 kvm_x86_ops->skip_emulated_instruction(vcpu);
Izik Eidus37817f22008-03-24 23:14:53 +02003668
3669 if (nseg_desc.type & 8)
Marcelo Tosatti34198bf82008-07-16 19:07:11 -03003670 ret = kvm_task_switch_32(vcpu, tss_selector, old_tss_base,
Izik Eidus37817f22008-03-24 23:14:53 +02003671 &nseg_desc);
3672 else
Marcelo Tosatti34198bf82008-07-16 19:07:11 -03003673 ret = kvm_task_switch_16(vcpu, tss_selector, old_tss_base,
Izik Eidus37817f22008-03-24 23:14:53 +02003674 &nseg_desc);
3675
3676 if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE) {
3677 u32 eflags = kvm_x86_ops->get_rflags(vcpu);
3678 kvm_x86_ops->set_rflags(vcpu, eflags | X86_EFLAGS_NT);
3679 }
3680
3681 if (reason != TASK_SWITCH_IRET) {
Izik Eidus3fe913e2008-04-28 18:23:52 +03003682 nseg_desc.type |= (1 << 1);
Izik Eidus37817f22008-03-24 23:14:53 +02003683 save_guest_segment_descriptor(vcpu, tss_selector,
3684 &nseg_desc);
3685 }
3686
3687 kvm_x86_ops->set_cr0(vcpu, vcpu->arch.cr0 | X86_CR0_TS);
3688 seg_desct_to_kvm_desct(&nseg_desc, tss_selector, &tr_seg);
3689 tr_seg.type = 11;
Guillaume Thouvenin3e6e0aa2008-05-27 10:18:46 +02003690 kvm_set_segment(vcpu, &tr_seg, VCPU_SREG_TR);
Izik Eidus37817f22008-03-24 23:14:53 +02003691out:
Izik Eidus37817f22008-03-24 23:14:53 +02003692 return ret;
3693}
3694EXPORT_SYMBOL_GPL(kvm_task_switch);
3695
Hollis Blanchardb6c7a5d2007-11-01 14:16:10 -05003696int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
3697 struct kvm_sregs *sregs)
3698{
3699 int mmu_reset_needed = 0;
3700 int i, pending_vec, max_bits;
3701 struct descriptor_table dt;
3702
3703 vcpu_load(vcpu);
3704
3705 dt.limit = sregs->idt.limit;
3706 dt.base = sregs->idt.base;
3707 kvm_x86_ops->set_idt(vcpu, &dt);
3708 dt.limit = sregs->gdt.limit;
3709 dt.base = sregs->gdt.base;
3710 kvm_x86_ops->set_gdt(vcpu, &dt);
3711
Zhang Xiantaoad312c72007-12-13 23:50:52 +08003712 vcpu->arch.cr2 = sregs->cr2;
3713 mmu_reset_needed |= vcpu->arch.cr3 != sregs->cr3;
3714 vcpu->arch.cr3 = sregs->cr3;
Hollis Blanchardb6c7a5d2007-11-01 14:16:10 -05003715
Avi Kivity2d3ad1f2008-02-24 11:20:43 +02003716 kvm_set_cr8(vcpu, sregs->cr8);
Hollis Blanchardb6c7a5d2007-11-01 14:16:10 -05003717
Zhang Xiantaoad312c72007-12-13 23:50:52 +08003718 mmu_reset_needed |= vcpu->arch.shadow_efer != sregs->efer;
Hollis Blanchardb6c7a5d2007-11-01 14:16:10 -05003719 kvm_x86_ops->set_efer(vcpu, sregs->efer);
Hollis Blanchardb6c7a5d2007-11-01 14:16:10 -05003720 kvm_set_apic_base(vcpu, sregs->apic_base);
3721
3722 kvm_x86_ops->decache_cr4_guest_bits(vcpu);
3723
Zhang Xiantaoad312c72007-12-13 23:50:52 +08003724 mmu_reset_needed |= vcpu->arch.cr0 != sregs->cr0;
Hollis Blanchardb6c7a5d2007-11-01 14:16:10 -05003725 kvm_x86_ops->set_cr0(vcpu, sregs->cr0);
Paul Knowlesd7306162008-02-06 11:02:35 +00003726 vcpu->arch.cr0 = sregs->cr0;
Hollis Blanchardb6c7a5d2007-11-01 14:16:10 -05003727
Zhang Xiantaoad312c72007-12-13 23:50:52 +08003728 mmu_reset_needed |= vcpu->arch.cr4 != sregs->cr4;
Hollis Blanchardb6c7a5d2007-11-01 14:16:10 -05003729 kvm_x86_ops->set_cr4(vcpu, sregs->cr4);
3730 if (!is_long_mode(vcpu) && is_pae(vcpu))
Zhang Xiantaoad312c72007-12-13 23:50:52 +08003731 load_pdptrs(vcpu, vcpu->arch.cr3);
Hollis Blanchardb6c7a5d2007-11-01 14:16:10 -05003732
3733 if (mmu_reset_needed)
3734 kvm_mmu_reset_context(vcpu);
3735
3736 if (!irqchip_in_kernel(vcpu->kvm)) {
Zhang Xiantaoad312c72007-12-13 23:50:52 +08003737 memcpy(vcpu->arch.irq_pending, sregs->interrupt_bitmap,
3738 sizeof vcpu->arch.irq_pending);
3739 vcpu->arch.irq_summary = 0;
3740 for (i = 0; i < ARRAY_SIZE(vcpu->arch.irq_pending); ++i)
3741 if (vcpu->arch.irq_pending[i])
3742 __set_bit(i, &vcpu->arch.irq_summary);
Hollis Blanchardb6c7a5d2007-11-01 14:16:10 -05003743 } else {
3744 max_bits = (sizeof sregs->interrupt_bitmap) << 3;
3745 pending_vec = find_first_bit(
3746 (const unsigned long *)sregs->interrupt_bitmap,
3747 max_bits);
3748 /* Only pending external irq is handled here */
3749 if (pending_vec < max_bits) {
3750 kvm_x86_ops->set_irq(vcpu, pending_vec);
3751 pr_debug("Set back pending irq %d\n",
3752 pending_vec);
3753 }
Marcelo Tosattie4825802008-09-24 20:28:34 -03003754 kvm_pic_clear_isr_ack(vcpu->kvm);
Hollis Blanchardb6c7a5d2007-11-01 14:16:10 -05003755 }
3756
Guillaume Thouvenin3e6e0aa2008-05-27 10:18:46 +02003757 kvm_set_segment(vcpu, &sregs->cs, VCPU_SREG_CS);
3758 kvm_set_segment(vcpu, &sregs->ds, VCPU_SREG_DS);
3759 kvm_set_segment(vcpu, &sregs->es, VCPU_SREG_ES);
3760 kvm_set_segment(vcpu, &sregs->fs, VCPU_SREG_FS);
3761 kvm_set_segment(vcpu, &sregs->gs, VCPU_SREG_GS);
3762 kvm_set_segment(vcpu, &sregs->ss, VCPU_SREG_SS);
Hollis Blanchardb6c7a5d2007-11-01 14:16:10 -05003763
Guillaume Thouvenin3e6e0aa2008-05-27 10:18:46 +02003764 kvm_set_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
3765 kvm_set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
Hollis Blanchardb6c7a5d2007-11-01 14:16:10 -05003766
Marcelo Tosatti9c3e4aa2008-09-10 16:40:55 -03003767 /* Older userspace won't unhalt the vcpu on reset. */
3768 if (vcpu->vcpu_id == 0 && kvm_rip_read(vcpu) == 0xfff0 &&
3769 sregs->cs.selector == 0xf000 && sregs->cs.base == 0xffff0000 &&
3770 !(vcpu->arch.cr0 & X86_CR0_PE))
3771 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
3772
Hollis Blanchardb6c7a5d2007-11-01 14:16:10 -05003773 vcpu_put(vcpu);
3774
3775 return 0;
3776}
3777
3778int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
3779 struct kvm_debug_guest *dbg)
3780{
3781 int r;
3782
3783 vcpu_load(vcpu);
3784
3785 r = kvm_x86_ops->set_guest_debug(vcpu, dbg);
3786
3787 vcpu_put(vcpu);
3788
3789 return r;
3790}
3791
3792/*
Hollis Blanchardd0752062007-10-31 17:24:25 -05003793 * fxsave fpu state. Taken from x86_64/processor.h. To be killed when
3794 * we have asm/x86/processor.h
3795 */
3796struct fxsave {
3797 u16 cwd;
3798 u16 swd;
3799 u16 twd;
3800 u16 fop;
3801 u64 rip;
3802 u64 rdp;
3803 u32 mxcsr;
3804 u32 mxcsr_mask;
3805 u32 st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */
3806#ifdef CONFIG_X86_64
3807 u32 xmm_space[64]; /* 16*16 bytes for each XMM-reg = 256 bytes */
3808#else
3809 u32 xmm_space[32]; /* 8*16 bytes for each XMM-reg = 128 bytes */
3810#endif
3811};
3812
Zhang Xiantao8b006792007-11-16 13:05:55 +08003813/*
3814 * Translate a guest virtual address to a guest physical address.
3815 */
3816int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
3817 struct kvm_translation *tr)
3818{
3819 unsigned long vaddr = tr->linear_address;
3820 gpa_t gpa;
3821
3822 vcpu_load(vcpu);
Izik Eidus72dc67a2008-02-10 18:04:15 +02003823 down_read(&vcpu->kvm->slots_lock);
Zhang Xiantaoad312c72007-12-13 23:50:52 +08003824 gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, vaddr);
Izik Eidus72dc67a2008-02-10 18:04:15 +02003825 up_read(&vcpu->kvm->slots_lock);
Zhang Xiantao8b006792007-11-16 13:05:55 +08003826 tr->physical_address = gpa;
3827 tr->valid = gpa != UNMAPPED_GVA;
3828 tr->writeable = 1;
3829 tr->usermode = 0;
Zhang Xiantao8b006792007-11-16 13:05:55 +08003830 vcpu_put(vcpu);
3831
3832 return 0;
3833}
3834
Hollis Blanchardd0752062007-10-31 17:24:25 -05003835int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3836{
Zhang Xiantaoad312c72007-12-13 23:50:52 +08003837 struct fxsave *fxsave = (struct fxsave *)&vcpu->arch.guest_fx_image;
Hollis Blanchardd0752062007-10-31 17:24:25 -05003838
3839 vcpu_load(vcpu);
3840
3841 memcpy(fpu->fpr, fxsave->st_space, 128);
3842 fpu->fcw = fxsave->cwd;
3843 fpu->fsw = fxsave->swd;
3844 fpu->ftwx = fxsave->twd;
3845 fpu->last_opcode = fxsave->fop;
3846 fpu->last_ip = fxsave->rip;
3847 fpu->last_dp = fxsave->rdp;
3848 memcpy(fpu->xmm, fxsave->xmm_space, sizeof fxsave->xmm_space);
3849
3850 vcpu_put(vcpu);
3851
3852 return 0;
3853}
3854
3855int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
3856{
Zhang Xiantaoad312c72007-12-13 23:50:52 +08003857 struct fxsave *fxsave = (struct fxsave *)&vcpu->arch.guest_fx_image;
Hollis Blanchardd0752062007-10-31 17:24:25 -05003858
3859 vcpu_load(vcpu);
3860
3861 memcpy(fxsave->st_space, fpu->fpr, 128);
3862 fxsave->cwd = fpu->fcw;
3863 fxsave->swd = fpu->fsw;
3864 fxsave->twd = fpu->ftwx;
3865 fxsave->fop = fpu->last_opcode;
3866 fxsave->rip = fpu->last_ip;
3867 fxsave->rdp = fpu->last_dp;
3868 memcpy(fxsave->xmm_space, fpu->xmm, sizeof fxsave->xmm_space);
3869
3870 vcpu_put(vcpu);
3871
3872 return 0;
3873}
3874
3875void fx_init(struct kvm_vcpu *vcpu)
3876{
3877 unsigned after_mxcsr_mask;
3878
Andrea Arcangelibc1a34f2008-05-01 18:43:33 +02003879 /*
3880 * Touch the fpu the first time in non atomic context as if
3881 * this is the first fpu instruction the exception handler
3882 * will fire before the instruction returns and it'll have to
3883 * allocate ram with GFP_KERNEL.
3884 */
3885 if (!used_math())
Avi Kivityd6e88ae2008-07-10 16:53:33 +03003886 kvm_fx_save(&vcpu->arch.host_fx_image);
Andrea Arcangelibc1a34f2008-05-01 18:43:33 +02003887
Hollis Blanchardd0752062007-10-31 17:24:25 -05003888 /* Initialize guest FPU by resetting ours and saving into guest's */
3889 preempt_disable();
Avi Kivityd6e88ae2008-07-10 16:53:33 +03003890 kvm_fx_save(&vcpu->arch.host_fx_image);
3891 kvm_fx_finit();
3892 kvm_fx_save(&vcpu->arch.guest_fx_image);
3893 kvm_fx_restore(&vcpu->arch.host_fx_image);
Hollis Blanchardd0752062007-10-31 17:24:25 -05003894 preempt_enable();
3895
Zhang Xiantaoad312c72007-12-13 23:50:52 +08003896 vcpu->arch.cr0 |= X86_CR0_ET;
Hollis Blanchardd0752062007-10-31 17:24:25 -05003897 after_mxcsr_mask = offsetof(struct i387_fxsave_struct, st_space);
Zhang Xiantaoad312c72007-12-13 23:50:52 +08003898 vcpu->arch.guest_fx_image.mxcsr = 0x1f80;
3899 memset((void *)&vcpu->arch.guest_fx_image + after_mxcsr_mask,
Hollis Blanchardd0752062007-10-31 17:24:25 -05003900 0, sizeof(struct i387_fxsave_struct) - after_mxcsr_mask);
3901}
3902EXPORT_SYMBOL_GPL(fx_init);
3903
3904void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
3905{
3906 if (!vcpu->fpu_active || vcpu->guest_fpu_loaded)
3907 return;
3908
3909 vcpu->guest_fpu_loaded = 1;
Avi Kivityd6e88ae2008-07-10 16:53:33 +03003910 kvm_fx_save(&vcpu->arch.host_fx_image);
3911 kvm_fx_restore(&vcpu->arch.guest_fx_image);
Hollis Blanchardd0752062007-10-31 17:24:25 -05003912}
3913EXPORT_SYMBOL_GPL(kvm_load_guest_fpu);
3914
3915void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
3916{
3917 if (!vcpu->guest_fpu_loaded)
3918 return;
3919
3920 vcpu->guest_fpu_loaded = 0;
Avi Kivityd6e88ae2008-07-10 16:53:33 +03003921 kvm_fx_save(&vcpu->arch.guest_fx_image);
3922 kvm_fx_restore(&vcpu->arch.host_fx_image);
Avi Kivityf096ed82007-11-18 13:54:33 +02003923 ++vcpu->stat.fpu_reload;
Hollis Blanchardd0752062007-10-31 17:24:25 -05003924}
3925EXPORT_SYMBOL_GPL(kvm_put_guest_fpu);
Zhang Xiantaoe9b11c12007-11-14 20:38:21 +08003926
3927void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
3928{
3929 kvm_x86_ops->vcpu_free(vcpu);
3930}
3931
3932struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
3933 unsigned int id)
3934{
Avi Kivity26e52152007-11-20 15:30:24 +02003935 return kvm_x86_ops->vcpu_create(kvm, id);
3936}
Zhang Xiantaoe9b11c12007-11-14 20:38:21 +08003937
Avi Kivity26e52152007-11-20 15:30:24 +02003938int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
3939{
3940 int r;
Zhang Xiantaoe9b11c12007-11-14 20:38:21 +08003941
3942 /* We do fxsave: this must be aligned. */
Zhang Xiantaoad312c72007-12-13 23:50:52 +08003943 BUG_ON((unsigned long)&vcpu->arch.host_fx_image & 0xF);
Zhang Xiantaoe9b11c12007-11-14 20:38:21 +08003944
3945 vcpu_load(vcpu);
3946 r = kvm_arch_vcpu_reset(vcpu);
3947 if (r == 0)
3948 r = kvm_mmu_setup(vcpu);
3949 vcpu_put(vcpu);
3950 if (r < 0)
3951 goto free_vcpu;
3952
Avi Kivity26e52152007-11-20 15:30:24 +02003953 return 0;
Zhang Xiantaoe9b11c12007-11-14 20:38:21 +08003954free_vcpu:
3955 kvm_x86_ops->vcpu_free(vcpu);
Avi Kivity26e52152007-11-20 15:30:24 +02003956 return r;
Zhang Xiantaoe9b11c12007-11-14 20:38:21 +08003957}
3958
Hollis Blanchardd40ccc62007-11-19 14:04:43 -06003959void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
Zhang Xiantaoe9b11c12007-11-14 20:38:21 +08003960{
3961 vcpu_load(vcpu);
3962 kvm_mmu_unload(vcpu);
3963 vcpu_put(vcpu);
3964
3965 kvm_x86_ops->vcpu_free(vcpu);
3966}
3967
3968int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
3969{
Jan Kiszka448fa4a2008-09-26 09:30:48 +02003970 vcpu->arch.nmi_pending = false;
3971 vcpu->arch.nmi_injected = false;
3972
Zhang Xiantaoe9b11c12007-11-14 20:38:21 +08003973 return kvm_x86_ops->vcpu_reset(vcpu);
3974}
3975
3976void kvm_arch_hardware_enable(void *garbage)
3977{
3978 kvm_x86_ops->hardware_enable(garbage);
3979}
3980
3981void kvm_arch_hardware_disable(void *garbage)
3982{
3983 kvm_x86_ops->hardware_disable(garbage);
3984}
3985
3986int kvm_arch_hardware_setup(void)
3987{
3988 return kvm_x86_ops->hardware_setup();
3989}
3990
3991void kvm_arch_hardware_unsetup(void)
3992{
3993 kvm_x86_ops->hardware_unsetup();
3994}
3995
3996void kvm_arch_check_processor_compat(void *rtn)
3997{
3998 kvm_x86_ops->check_processor_compatibility(rtn);
3999}
4000
4001int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
4002{
4003 struct page *page;
4004 struct kvm *kvm;
4005 int r;
4006
4007 BUG_ON(vcpu->kvm == NULL);
4008 kvm = vcpu->kvm;
4009
Zhang Xiantaoad312c72007-12-13 23:50:52 +08004010 vcpu->arch.mmu.root_hpa = INVALID_PAGE;
Zhang Xiantaoe9b11c12007-11-14 20:38:21 +08004011 if (!irqchip_in_kernel(kvm) || vcpu->vcpu_id == 0)
Avi Kivitya4535292008-04-13 17:54:35 +03004012 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
Zhang Xiantaoe9b11c12007-11-14 20:38:21 +08004013 else
Avi Kivitya4535292008-04-13 17:54:35 +03004014 vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED;
Zhang Xiantaoe9b11c12007-11-14 20:38:21 +08004015
4016 page = alloc_page(GFP_KERNEL | __GFP_ZERO);
4017 if (!page) {
4018 r = -ENOMEM;
4019 goto fail;
4020 }
Zhang Xiantaoad312c72007-12-13 23:50:52 +08004021 vcpu->arch.pio_data = page_address(page);
Zhang Xiantaoe9b11c12007-11-14 20:38:21 +08004022
4023 r = kvm_mmu_create(vcpu);
4024 if (r < 0)
4025 goto fail_free_pio_data;
4026
4027 if (irqchip_in_kernel(kvm)) {
4028 r = kvm_create_lapic(vcpu);
4029 if (r < 0)
4030 goto fail_mmu_destroy;
4031 }
4032
4033 return 0;
4034
4035fail_mmu_destroy:
4036 kvm_mmu_destroy(vcpu);
4037fail_free_pio_data:
Zhang Xiantaoad312c72007-12-13 23:50:52 +08004038 free_page((unsigned long)vcpu->arch.pio_data);
Zhang Xiantaoe9b11c12007-11-14 20:38:21 +08004039fail:
4040 return r;
4041}
4042
4043void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
4044{
4045 kvm_free_lapic(vcpu);
Marcelo Tosatti3200f402008-03-29 20:17:59 -03004046 down_read(&vcpu->kvm->slots_lock);
Zhang Xiantaoe9b11c12007-11-14 20:38:21 +08004047 kvm_mmu_destroy(vcpu);
Marcelo Tosatti3200f402008-03-29 20:17:59 -03004048 up_read(&vcpu->kvm->slots_lock);
Zhang Xiantaoad312c72007-12-13 23:50:52 +08004049 free_page((unsigned long)vcpu->arch.pio_data);
Zhang Xiantaoe9b11c12007-11-14 20:38:21 +08004050}
Zhang Xiantaod19a9cd2007-11-18 18:43:45 +08004051
4052struct kvm *kvm_arch_create_vm(void)
4053{
4054 struct kvm *kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL);
4055
4056 if (!kvm)
4057 return ERR_PTR(-ENOMEM);
4058
Zhang Xiantaof05e70a2007-12-14 10:01:48 +08004059 INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
Ben-Ami Yassour4d5c5d02008-07-28 19:26:26 +03004060 INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
Zhang Xiantaod19a9cd2007-11-18 18:43:45 +08004061
Sheng Yang5550af42008-10-15 20:15:06 +08004062 /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */
4063 set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
4064
Zhang Xiantaod19a9cd2007-11-18 18:43:45 +08004065 return kvm;
4066}
4067
4068static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
4069{
4070 vcpu_load(vcpu);
4071 kvm_mmu_unload(vcpu);
4072 vcpu_put(vcpu);
4073}
4074
4075static void kvm_free_vcpus(struct kvm *kvm)
4076{
4077 unsigned int i;
4078
4079 /*
4080 * Unpin any mmu pages first.
4081 */
4082 for (i = 0; i < KVM_MAX_VCPUS; ++i)
4083 if (kvm->vcpus[i])
4084 kvm_unload_vcpu_mmu(kvm->vcpus[i]);
4085 for (i = 0; i < KVM_MAX_VCPUS; ++i) {
4086 if (kvm->vcpus[i]) {
4087 kvm_arch_vcpu_free(kvm->vcpus[i]);
4088 kvm->vcpus[i] = NULL;
4089 }
4090 }
4091
4092}
4093
4094void kvm_arch_destroy_vm(struct kvm *kvm)
4095{
Ben-Ami Yassour62c476c2008-09-14 03:48:28 +03004096 kvm_iommu_unmap_guest(kvm);
Amit Shahbfadade2008-09-16 18:04:28 +03004097 kvm_free_all_assigned_devices(kvm);
Sheng Yang78376992008-01-28 05:10:22 +08004098 kvm_free_pit(kvm);
Zhang Xiantaod7deeeb02007-12-14 10:17:34 +08004099 kfree(kvm->arch.vpic);
4100 kfree(kvm->arch.vioapic);
Zhang Xiantaod19a9cd2007-11-18 18:43:45 +08004101 kvm_free_vcpus(kvm);
4102 kvm_free_physmem(kvm);
Avi Kivity3d458302008-03-25 11:26:13 +02004103 if (kvm->arch.apic_access_page)
4104 put_page(kvm->arch.apic_access_page);
Sheng Yangb7ebfb02008-04-25 21:44:52 +08004105 if (kvm->arch.ept_identity_pagetable)
4106 put_page(kvm->arch.ept_identity_pagetable);
Zhang Xiantaod19a9cd2007-11-18 18:43:45 +08004107 kfree(kvm);
4108}
Zhang Xiantao0de10342007-11-20 16:25:04 +08004109
4110int kvm_arch_set_memory_region(struct kvm *kvm,
4111 struct kvm_userspace_memory_region *mem,
4112 struct kvm_memory_slot old,
4113 int user_alloc)
4114{
4115 int npages = mem->memory_size >> PAGE_SHIFT;
4116 struct kvm_memory_slot *memslot = &kvm->memslots[mem->slot];
4117
4118 /*To keep backward compatibility with older userspace,
4119 *x86 needs to hanlde !user_alloc case.
4120 */
4121 if (!user_alloc) {
4122 if (npages && !old.rmap) {
Andrea Arcangeli604b38a2008-07-25 16:32:03 +02004123 unsigned long userspace_addr;
4124
Izik Eidus72dc67a2008-02-10 18:04:15 +02004125 down_write(&current->mm->mmap_sem);
Andrea Arcangeli604b38a2008-07-25 16:32:03 +02004126 userspace_addr = do_mmap(NULL, 0,
4127 npages * PAGE_SIZE,
4128 PROT_READ | PROT_WRITE,
Avi Kivityacee3c02008-08-26 17:22:47 +03004129 MAP_PRIVATE | MAP_ANONYMOUS,
Andrea Arcangeli604b38a2008-07-25 16:32:03 +02004130 0);
Izik Eidus72dc67a2008-02-10 18:04:15 +02004131 up_write(&current->mm->mmap_sem);
Zhang Xiantao0de10342007-11-20 16:25:04 +08004132
Andrea Arcangeli604b38a2008-07-25 16:32:03 +02004133 if (IS_ERR((void *)userspace_addr))
4134 return PTR_ERR((void *)userspace_addr);
4135
4136 /* set userspace_addr atomically for kvm_hva_to_rmapp */
4137 spin_lock(&kvm->mmu_lock);
4138 memslot->userspace_addr = userspace_addr;
4139 spin_unlock(&kvm->mmu_lock);
Zhang Xiantao0de10342007-11-20 16:25:04 +08004140 } else {
4141 if (!old.user_alloc && old.rmap) {
4142 int ret;
4143
Izik Eidus72dc67a2008-02-10 18:04:15 +02004144 down_write(&current->mm->mmap_sem);
Zhang Xiantao0de10342007-11-20 16:25:04 +08004145 ret = do_munmap(current->mm, old.userspace_addr,
4146 old.npages * PAGE_SIZE);
Izik Eidus72dc67a2008-02-10 18:04:15 +02004147 up_write(&current->mm->mmap_sem);
Zhang Xiantao0de10342007-11-20 16:25:04 +08004148 if (ret < 0)
4149 printk(KERN_WARNING
4150 "kvm_vm_ioctl_set_memory_region: "
4151 "failed to munmap memory\n");
4152 }
4153 }
4154 }
4155
Zhang Xiantaof05e70a2007-12-14 10:01:48 +08004156 if (!kvm->arch.n_requested_mmu_pages) {
Zhang Xiantao0de10342007-11-20 16:25:04 +08004157 unsigned int nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm);
4158 kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages);
4159 }
4160
4161 kvm_mmu_slot_remove_write_access(kvm, mem->slot);
4162 kvm_flush_remote_tlbs(kvm);
4163
4164 return 0;
4165}
Zhang Xiantao1d737c82007-12-14 09:35:10 +08004166
Marcelo Tosatti34d4cb82008-07-10 20:49:31 -03004167void kvm_arch_flush_shadow(struct kvm *kvm)
4168{
4169 kvm_mmu_zap_all(kvm);
4170}
4171
Zhang Xiantao1d737c82007-12-14 09:35:10 +08004172int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
4173{
Avi Kivitya4535292008-04-13 17:54:35 +03004174 return vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE
Jan Kiszka0496fbb2008-09-26 09:30:53 +02004175 || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED
4176 || vcpu->arch.nmi_pending;
Zhang Xiantao1d737c82007-12-14 09:35:10 +08004177}
Zhang Xiantao57361992007-12-17 14:21:40 +08004178
4179static void vcpu_kick_intr(void *info)
4180{
4181#ifdef DEBUG
4182 struct kvm_vcpu *vcpu = (struct kvm_vcpu *)info;
4183 printk(KERN_DEBUG "vcpu_kick_intr %p \n", vcpu);
4184#endif
4185}
4186
4187void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
4188{
4189 int ipi_pcpu = vcpu->cpu;
Marcelo Tosattie9571ed2008-04-11 15:01:22 -03004190 int cpu = get_cpu();
Zhang Xiantao57361992007-12-17 14:21:40 +08004191
4192 if (waitqueue_active(&vcpu->wq)) {
4193 wake_up_interruptible(&vcpu->wq);
4194 ++vcpu->stat.halt_wakeup;
4195 }
Marcelo Tosattie9571ed2008-04-11 15:01:22 -03004196 /*
4197 * We may be called synchronously with irqs disabled in guest mode,
4198 * So need not to call smp_call_function_single() in that case.
4199 */
4200 if (vcpu->guest_mode && vcpu->cpu != cpu)
Jens Axboe8691e5a2008-06-06 11:18:06 +02004201 smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu, 0);
Marcelo Tosattie9571ed2008-04-11 15:01:22 -03004202 put_cpu();
Zhang Xiantao57361992007-12-17 14:21:40 +08004203}