blob: 94ec0e30969d1b58dada27eca491ed0eacff827c [file] [log] [blame]
Paul Mackerrasde56a942011-06-29 00:21:34 +00001/*
2 * Copyright 2011 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
3 * Copyright (C) 2009. SUSE Linux Products GmbH. All rights reserved.
4 *
5 * Authors:
6 * Paul Mackerras <paulus@au1.ibm.com>
7 * Alexander Graf <agraf@suse.de>
8 * Kevin Wolf <mail@kevin-wolf.de>
9 *
10 * Description: KVM functions specific to running on Book 3S
11 * processors in hypervisor mode (specifically POWER7 and later).
12 *
13 * This file is derived from arch/powerpc/kvm/book3s.c,
14 * by Alexander Graf <agraf@suse.de>.
15 *
16 * This program is free software; you can redistribute it and/or modify
17 * it under the terms of the GNU General Public License, version 2, as
18 * published by the Free Software Foundation.
19 */
20
21#include <linux/kvm_host.h>
22#include <linux/err.h>
23#include <linux/slab.h>
24#include <linux/preempt.h>
25#include <linux/sched.h>
26#include <linux/delay.h>
Paul Gortmaker66b15db2011-05-27 10:46:24 -040027#include <linux/export.h>
Paul Mackerrasde56a942011-06-29 00:21:34 +000028#include <linux/fs.h>
29#include <linux/anon_inodes.h>
30#include <linux/cpumask.h>
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +000031#include <linux/spinlock.h>
32#include <linux/page-flags.h>
Paul Mackerras2c9097e2012-09-11 13:27:01 +000033#include <linux/srcu.h>
Paul Mackerrasde56a942011-06-29 00:21:34 +000034
35#include <asm/reg.h>
36#include <asm/cputable.h>
37#include <asm/cacheflush.h>
38#include <asm/tlbflush.h>
39#include <asm/uaccess.h>
40#include <asm/io.h>
41#include <asm/kvm_ppc.h>
42#include <asm/kvm_book3s.h>
43#include <asm/mmu_context.h>
44#include <asm/lppaca.h>
45#include <asm/processor.h>
Paul Mackerras371fefd2011-06-29 00:23:08 +000046#include <asm/cputhreads.h>
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +000047#include <asm/page.h>
Michael Neulingde1d9242011-11-09 20:39:49 +000048#include <asm/hvcall.h>
David Howellsae3a1972012-03-28 18:30:02 +010049#include <asm/switch_to.h>
Paul Mackerrasde56a942011-06-29 00:21:34 +000050#include <linux/gfp.h>
Paul Mackerrasde56a942011-06-29 00:21:34 +000051#include <linux/vmalloc.h>
52#include <linux/highmem.h>
Paul Mackerrasc77162d2011-12-12 12:31:00 +000053#include <linux/hugetlb.h>
Paul Mackerrasde56a942011-06-29 00:21:34 +000054
55/* #define EXIT_DEBUG */
56/* #define EXIT_DEBUG_SIMPLE */
57/* #define EXIT_DEBUG_INT */
58
Paul Mackerras19ccb762011-07-23 17:42:46 +100059static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
Paul Mackerras32fad282012-05-04 02:32:53 +000060static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
Paul Mackerras19ccb762011-07-23 17:42:46 +100061
Paul Mackerrasde56a942011-06-29 00:21:34 +000062void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
63{
Paul Mackerras0456ec42012-02-03 00:56:21 +000064 struct kvmppc_vcore *vc = vcpu->arch.vcore;
65
Paul Mackerrasde56a942011-06-29 00:21:34 +000066 local_paca->kvm_hstate.kvm_vcpu = vcpu;
Paul Mackerras0456ec42012-02-03 00:56:21 +000067 local_paca->kvm_hstate.kvm_vcore = vc;
68 if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE)
69 vc->stolen_tb += mftb() - vc->preempt_tb;
Paul Mackerrasde56a942011-06-29 00:21:34 +000070}
71
72void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
73{
Paul Mackerras0456ec42012-02-03 00:56:21 +000074 struct kvmppc_vcore *vc = vcpu->arch.vcore;
75
76 if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE)
77 vc->preempt_tb = mftb();
Paul Mackerrasde56a942011-06-29 00:21:34 +000078}
79
Paul Mackerrasde56a942011-06-29 00:21:34 +000080void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
81{
82 vcpu->arch.shregs.msr = msr;
Paul Mackerras19ccb762011-07-23 17:42:46 +100083 kvmppc_end_cede(vcpu);
Paul Mackerrasde56a942011-06-29 00:21:34 +000084}
85
86void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr)
87{
88 vcpu->arch.pvr = pvr;
89}
90
91void kvmppc_dump_regs(struct kvm_vcpu *vcpu)
92{
93 int r;
94
95 pr_err("vcpu %p (%d):\n", vcpu, vcpu->vcpu_id);
96 pr_err("pc = %.16lx msr = %.16llx trap = %x\n",
97 vcpu->arch.pc, vcpu->arch.shregs.msr, vcpu->arch.trap);
98 for (r = 0; r < 16; ++r)
99 pr_err("r%2d = %.16lx r%d = %.16lx\n",
100 r, kvmppc_get_gpr(vcpu, r),
101 r+16, kvmppc_get_gpr(vcpu, r+16));
102 pr_err("ctr = %.16lx lr = %.16lx\n",
103 vcpu->arch.ctr, vcpu->arch.lr);
104 pr_err("srr0 = %.16llx srr1 = %.16llx\n",
105 vcpu->arch.shregs.srr0, vcpu->arch.shregs.srr1);
106 pr_err("sprg0 = %.16llx sprg1 = %.16llx\n",
107 vcpu->arch.shregs.sprg0, vcpu->arch.shregs.sprg1);
108 pr_err("sprg2 = %.16llx sprg3 = %.16llx\n",
109 vcpu->arch.shregs.sprg2, vcpu->arch.shregs.sprg3);
110 pr_err("cr = %.8x xer = %.16lx dsisr = %.8x\n",
111 vcpu->arch.cr, vcpu->arch.xer, vcpu->arch.shregs.dsisr);
112 pr_err("dar = %.16llx\n", vcpu->arch.shregs.dar);
113 pr_err("fault dar = %.16lx dsisr = %.8x\n",
114 vcpu->arch.fault_dar, vcpu->arch.fault_dsisr);
115 pr_err("SLB (%d entries):\n", vcpu->arch.slb_max);
116 for (r = 0; r < vcpu->arch.slb_max; ++r)
117 pr_err(" ESID = %.16llx VSID = %.16llx\n",
118 vcpu->arch.slb[r].orige, vcpu->arch.slb[r].origv);
119 pr_err("lpcr = %.16lx sdr1 = %.16lx last_inst = %.8x\n",
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +0000120 vcpu->kvm->arch.lpcr, vcpu->kvm->arch.sdr1,
Paul Mackerrasde56a942011-06-29 00:21:34 +0000121 vcpu->arch.last_inst);
122}
123
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000124struct kvm_vcpu *kvmppc_find_vcpu(struct kvm *kvm, int id)
125{
126 int r;
127 struct kvm_vcpu *v, *ret = NULL;
128
129 mutex_lock(&kvm->lock);
130 kvm_for_each_vcpu(r, v, kvm) {
131 if (v->vcpu_id == id) {
132 ret = v;
133 break;
134 }
135 }
136 mutex_unlock(&kvm->lock);
137 return ret;
138}
139
140static void init_vpa(struct kvm_vcpu *vcpu, struct lppaca *vpa)
141{
142 vpa->shared_proc = 1;
143 vpa->yield_count = 1;
144}
145
Paul Mackerras2e25aa52012-02-19 17:46:32 +0000146/* Length for a per-processor buffer is passed in at offset 4 in the buffer */
147struct reg_vpa {
148 u32 dummy;
149 union {
150 u16 hword;
151 u32 word;
152 } length;
153};
154
155static int vpa_is_registered(struct kvmppc_vpa *vpap)
156{
157 if (vpap->update_pending)
158 return vpap->next_gpa != 0;
159 return vpap->pinned_addr != NULL;
160}
161
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000162static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu,
163 unsigned long flags,
164 unsigned long vcpuid, unsigned long vpa)
165{
166 struct kvm *kvm = vcpu->kvm;
Paul Mackerras93e60242011-12-12 12:28:55 +0000167 unsigned long len, nb;
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000168 void *va;
169 struct kvm_vcpu *tvcpu;
Paul Mackerras2e25aa52012-02-19 17:46:32 +0000170 int err;
171 int subfunc;
172 struct kvmppc_vpa *vpap;
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000173
174 tvcpu = kvmppc_find_vcpu(kvm, vcpuid);
175 if (!tvcpu)
176 return H_PARAMETER;
177
Paul Mackerras2e25aa52012-02-19 17:46:32 +0000178 subfunc = (flags >> H_VPA_FUNC_SHIFT) & H_VPA_FUNC_MASK;
179 if (subfunc == H_VPA_REG_VPA || subfunc == H_VPA_REG_DTL ||
180 subfunc == H_VPA_REG_SLB) {
181 /* Registering new area - address must be cache-line aligned */
182 if ((vpa & (L1_CACHE_BYTES - 1)) || !vpa)
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000183 return H_PARAMETER;
Paul Mackerras2e25aa52012-02-19 17:46:32 +0000184
185 /* convert logical addr to kernel addr and read length */
Paul Mackerras93e60242011-12-12 12:28:55 +0000186 va = kvmppc_pin_guest_page(kvm, vpa, &nb);
187 if (va == NULL)
Paul Mackerrasb2b2f162011-12-12 12:28:21 +0000188 return H_PARAMETER;
Paul Mackerras2e25aa52012-02-19 17:46:32 +0000189 if (subfunc == H_VPA_REG_VPA)
190 len = ((struct reg_vpa *)va)->length.hword;
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000191 else
Paul Mackerras2e25aa52012-02-19 17:46:32 +0000192 len = ((struct reg_vpa *)va)->length.word;
193 kvmppc_unpin_guest_page(kvm, va);
194
195 /* Check length */
196 if (len > nb || len < sizeof(struct reg_vpa))
197 return H_PARAMETER;
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000198 } else {
Paul Mackerras2e25aa52012-02-19 17:46:32 +0000199 vpa = 0;
200 len = 0;
201 }
202
203 err = H_PARAMETER;
204 vpap = NULL;
205 spin_lock(&tvcpu->arch.vpa_update_lock);
206
207 switch (subfunc) {
208 case H_VPA_REG_VPA: /* register VPA */
209 if (len < sizeof(struct lppaca))
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000210 break;
Paul Mackerras2e25aa52012-02-19 17:46:32 +0000211 vpap = &tvcpu->arch.vpa;
212 err = 0;
213 break;
214
215 case H_VPA_REG_DTL: /* register DTL */
216 if (len < sizeof(struct dtl_entry))
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000217 break;
Paul Mackerras2e25aa52012-02-19 17:46:32 +0000218 len -= len % sizeof(struct dtl_entry);
219
220 /* Check that they have previously registered a VPA */
221 err = H_RESOURCE;
222 if (!vpa_is_registered(&tvcpu->arch.vpa))
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000223 break;
Paul Mackerras2e25aa52012-02-19 17:46:32 +0000224
225 vpap = &tvcpu->arch.dtl;
226 err = 0;
227 break;
228
229 case H_VPA_REG_SLB: /* register SLB shadow buffer */
230 /* Check that they have previously registered a VPA */
231 err = H_RESOURCE;
232 if (!vpa_is_registered(&tvcpu->arch.vpa))
233 break;
234
235 vpap = &tvcpu->arch.slb_shadow;
236 err = 0;
237 break;
238
239 case H_VPA_DEREG_VPA: /* deregister VPA */
240 /* Check they don't still have a DTL or SLB buf registered */
241 err = H_RESOURCE;
242 if (vpa_is_registered(&tvcpu->arch.dtl) ||
243 vpa_is_registered(&tvcpu->arch.slb_shadow))
244 break;
245
246 vpap = &tvcpu->arch.vpa;
247 err = 0;
248 break;
249
250 case H_VPA_DEREG_DTL: /* deregister DTL */
251 vpap = &tvcpu->arch.dtl;
252 err = 0;
253 break;
254
255 case H_VPA_DEREG_SLB: /* deregister SLB shadow buffer */
256 vpap = &tvcpu->arch.slb_shadow;
257 err = 0;
258 break;
259 }
260
261 if (vpap) {
262 vpap->next_gpa = vpa;
263 vpap->len = len;
264 vpap->update_pending = 1;
265 }
266
267 spin_unlock(&tvcpu->arch.vpa_update_lock);
268
269 return err;
270}
271
Paul Mackerras081f3232012-06-01 20:20:24 +1000272static void kvmppc_update_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *vpap)
Paul Mackerras2e25aa52012-02-19 17:46:32 +0000273{
Paul Mackerras081f3232012-06-01 20:20:24 +1000274 struct kvm *kvm = vcpu->kvm;
Paul Mackerras2e25aa52012-02-19 17:46:32 +0000275 void *va;
276 unsigned long nb;
Paul Mackerras081f3232012-06-01 20:20:24 +1000277 unsigned long gpa;
278
279 /*
280 * We need to pin the page pointed to by vpap->next_gpa,
281 * but we can't call kvmppc_pin_guest_page under the lock
282 * as it does get_user_pages() and down_read(). So we
283 * have to drop the lock, pin the page, then get the lock
284 * again and check that a new area didn't get registered
285 * in the meantime.
286 */
287 for (;;) {
288 gpa = vpap->next_gpa;
289 spin_unlock(&vcpu->arch.vpa_update_lock);
290 va = NULL;
291 nb = 0;
292 if (gpa)
293 va = kvmppc_pin_guest_page(kvm, vpap->next_gpa, &nb);
294 spin_lock(&vcpu->arch.vpa_update_lock);
295 if (gpa == vpap->next_gpa)
296 break;
297 /* sigh... unpin that one and try again */
298 if (va)
299 kvmppc_unpin_guest_page(kvm, va);
300 }
Paul Mackerras2e25aa52012-02-19 17:46:32 +0000301
302 vpap->update_pending = 0;
Paul Mackerras081f3232012-06-01 20:20:24 +1000303 if (va && nb < vpap->len) {
304 /*
305 * If it's now too short, it must be that userspace
306 * has changed the mappings underlying guest memory,
307 * so unregister the region.
308 */
309 kvmppc_unpin_guest_page(kvm, va);
310 va = NULL;
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000311 }
Paul Mackerras2e25aa52012-02-19 17:46:32 +0000312 if (vpap->pinned_addr)
313 kvmppc_unpin_guest_page(kvm, vpap->pinned_addr);
314 vpap->pinned_addr = va;
315 if (va)
316 vpap->pinned_end = va + vpap->len;
317}
Paul Mackerras93e60242011-12-12 12:28:55 +0000318
Paul Mackerras2e25aa52012-02-19 17:46:32 +0000319static void kvmppc_update_vpas(struct kvm_vcpu *vcpu)
320{
Paul Mackerras2e25aa52012-02-19 17:46:32 +0000321 spin_lock(&vcpu->arch.vpa_update_lock);
322 if (vcpu->arch.vpa.update_pending) {
Paul Mackerras081f3232012-06-01 20:20:24 +1000323 kvmppc_update_vpa(vcpu, &vcpu->arch.vpa);
Paul Mackerras2e25aa52012-02-19 17:46:32 +0000324 init_vpa(vcpu, vcpu->arch.vpa.pinned_addr);
325 }
326 if (vcpu->arch.dtl.update_pending) {
Paul Mackerras081f3232012-06-01 20:20:24 +1000327 kvmppc_update_vpa(vcpu, &vcpu->arch.dtl);
Paul Mackerras2e25aa52012-02-19 17:46:32 +0000328 vcpu->arch.dtl_ptr = vcpu->arch.dtl.pinned_addr;
329 vcpu->arch.dtl_index = 0;
330 }
331 if (vcpu->arch.slb_shadow.update_pending)
Paul Mackerras081f3232012-06-01 20:20:24 +1000332 kvmppc_update_vpa(vcpu, &vcpu->arch.slb_shadow);
Paul Mackerras2e25aa52012-02-19 17:46:32 +0000333 spin_unlock(&vcpu->arch.vpa_update_lock);
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000334}
335
Paul Mackerras0456ec42012-02-03 00:56:21 +0000336static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
337 struct kvmppc_vcore *vc)
338{
339 struct dtl_entry *dt;
340 struct lppaca *vpa;
341 unsigned long old_stolen;
342
343 dt = vcpu->arch.dtl_ptr;
344 vpa = vcpu->arch.vpa.pinned_addr;
345 old_stolen = vcpu->arch.stolen_logged;
346 vcpu->arch.stolen_logged = vc->stolen_tb;
347 if (!dt || !vpa)
348 return;
349 memset(dt, 0, sizeof(struct dtl_entry));
350 dt->dispatch_reason = 7;
351 dt->processor_id = vc->pcpu + vcpu->arch.ptid;
352 dt->timebase = mftb();
353 dt->enqueue_to_dispatch_time = vc->stolen_tb - old_stolen;
354 dt->srr0 = kvmppc_get_pc(vcpu);
355 dt->srr1 = vcpu->arch.shregs.msr;
356 ++dt;
357 if (dt == vcpu->arch.dtl.pinned_end)
358 dt = vcpu->arch.dtl.pinned_addr;
359 vcpu->arch.dtl_ptr = dt;
360 /* order writing *dt vs. writing vpa->dtl_idx */
361 smp_wmb();
362 vpa->dtl_idx = ++vcpu->arch.dtl_index;
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000363}
364
365int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
366{
367 unsigned long req = kvmppc_get_gpr(vcpu, 3);
368 unsigned long target, ret = H_SUCCESS;
369 struct kvm_vcpu *tvcpu;
Paul Mackerras2c9097e2012-09-11 13:27:01 +0000370 int idx;
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000371
372 switch (req) {
Paul Mackerrasc77162d2011-12-12 12:31:00 +0000373 case H_ENTER:
Paul Mackerras2c9097e2012-09-11 13:27:01 +0000374 idx = srcu_read_lock(&vcpu->kvm->srcu);
Paul Mackerrasc77162d2011-12-12 12:31:00 +0000375 ret = kvmppc_virtmode_h_enter(vcpu, kvmppc_get_gpr(vcpu, 4),
376 kvmppc_get_gpr(vcpu, 5),
377 kvmppc_get_gpr(vcpu, 6),
378 kvmppc_get_gpr(vcpu, 7));
Paul Mackerras2c9097e2012-09-11 13:27:01 +0000379 srcu_read_unlock(&vcpu->kvm->srcu, idx);
Paul Mackerrasc77162d2011-12-12 12:31:00 +0000380 break;
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000381 case H_CEDE:
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000382 break;
383 case H_PROD:
384 target = kvmppc_get_gpr(vcpu, 4);
385 tvcpu = kvmppc_find_vcpu(vcpu->kvm, target);
386 if (!tvcpu) {
387 ret = H_PARAMETER;
388 break;
389 }
390 tvcpu->arch.prodded = 1;
391 smp_mb();
392 if (vcpu->arch.ceded) {
393 if (waitqueue_active(&vcpu->wq)) {
394 wake_up_interruptible(&vcpu->wq);
395 vcpu->stat.halt_wakeup++;
396 }
397 }
398 break;
399 case H_CONFER:
400 break;
401 case H_REGISTER_VPA:
402 ret = do_h_register_vpa(vcpu, kvmppc_get_gpr(vcpu, 4),
403 kvmppc_get_gpr(vcpu, 5),
404 kvmppc_get_gpr(vcpu, 6));
405 break;
406 default:
407 return RESUME_HOST;
408 }
409 kvmppc_set_gpr(vcpu, 3, ret);
410 vcpu->arch.hcall_needed = 0;
411 return RESUME_GUEST;
412}
413
Paul Mackerrasde56a942011-06-29 00:21:34 +0000414static int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
415 struct task_struct *tsk)
416{
417 int r = RESUME_HOST;
Paul Mackerras2c9097e2012-09-11 13:27:01 +0000418 int srcu_idx;
Paul Mackerrasde56a942011-06-29 00:21:34 +0000419
420 vcpu->stat.sum_exits++;
421
422 run->exit_reason = KVM_EXIT_UNKNOWN;
423 run->ready_for_interrupt_injection = 1;
424 switch (vcpu->arch.trap) {
425 /* We're good on these - the host merely wanted to get our attention */
426 case BOOK3S_INTERRUPT_HV_DECREMENTER:
427 vcpu->stat.dec_exits++;
428 r = RESUME_GUEST;
429 break;
430 case BOOK3S_INTERRUPT_EXTERNAL:
431 vcpu->stat.ext_intr_exits++;
432 r = RESUME_GUEST;
433 break;
434 case BOOK3S_INTERRUPT_PERFMON:
435 r = RESUME_GUEST;
436 break;
437 case BOOK3S_INTERRUPT_PROGRAM:
438 {
439 ulong flags;
440 /*
441 * Normally program interrupts are delivered directly
442 * to the guest by the hardware, but we can get here
443 * as a result of a hypervisor emulation interrupt
444 * (e40) getting turned into a 700 by BML RTAS.
445 */
446 flags = vcpu->arch.shregs.msr & 0x1f0000ull;
447 kvmppc_core_queue_program(vcpu, flags);
448 r = RESUME_GUEST;
449 break;
450 }
451 case BOOK3S_INTERRUPT_SYSCALL:
452 {
453 /* hcall - punt to userspace */
454 int i;
455
456 if (vcpu->arch.shregs.msr & MSR_PR) {
457 /* sc 1 from userspace - reflect to guest syscall */
458 kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_SYSCALL);
459 r = RESUME_GUEST;
460 break;
461 }
462 run->papr_hcall.nr = kvmppc_get_gpr(vcpu, 3);
463 for (i = 0; i < 9; ++i)
464 run->papr_hcall.args[i] = kvmppc_get_gpr(vcpu, 4 + i);
465 run->exit_reason = KVM_EXIT_PAPR_HCALL;
466 vcpu->arch.hcall_needed = 1;
467 r = RESUME_HOST;
468 break;
469 }
470 /*
Paul Mackerras342d3db2011-12-12 12:38:05 +0000471 * We get these next two if the guest accesses a page which it thinks
472 * it has mapped but which is not actually present, either because
473 * it is for an emulated I/O device or because the corresonding
474 * host page has been paged out. Any other HDSI/HISI interrupts
475 * have been handled already.
Paul Mackerrasde56a942011-06-29 00:21:34 +0000476 */
477 case BOOK3S_INTERRUPT_H_DATA_STORAGE:
Paul Mackerras2c9097e2012-09-11 13:27:01 +0000478 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
Paul Mackerras697d3892011-12-12 12:36:37 +0000479 r = kvmppc_book3s_hv_page_fault(run, vcpu,
480 vcpu->arch.fault_dar, vcpu->arch.fault_dsisr);
Paul Mackerras2c9097e2012-09-11 13:27:01 +0000481 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
Paul Mackerrasde56a942011-06-29 00:21:34 +0000482 break;
483 case BOOK3S_INTERRUPT_H_INST_STORAGE:
Paul Mackerras2c9097e2012-09-11 13:27:01 +0000484 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
Paul Mackerras342d3db2011-12-12 12:38:05 +0000485 r = kvmppc_book3s_hv_page_fault(run, vcpu,
486 kvmppc_get_pc(vcpu), 0);
Paul Mackerras2c9097e2012-09-11 13:27:01 +0000487 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
Paul Mackerrasde56a942011-06-29 00:21:34 +0000488 break;
489 /*
490 * This occurs if the guest executes an illegal instruction.
491 * We just generate a program interrupt to the guest, since
492 * we don't emulate any guest instructions at this stage.
493 */
494 case BOOK3S_INTERRUPT_H_EMUL_ASSIST:
495 kvmppc_core_queue_program(vcpu, 0x80000);
496 r = RESUME_GUEST;
497 break;
498 default:
499 kvmppc_dump_regs(vcpu);
500 printk(KERN_EMERG "trap=0x%x | pc=0x%lx | msr=0x%llx\n",
501 vcpu->arch.trap, kvmppc_get_pc(vcpu),
502 vcpu->arch.shregs.msr);
503 r = RESUME_HOST;
504 BUG();
505 break;
506 }
507
Paul Mackerrasde56a942011-06-29 00:21:34 +0000508 return r;
509}
510
511int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
512 struct kvm_sregs *sregs)
513{
514 int i;
515
516 sregs->pvr = vcpu->arch.pvr;
517
518 memset(sregs, 0, sizeof(struct kvm_sregs));
519 for (i = 0; i < vcpu->arch.slb_max; i++) {
520 sregs->u.s.ppc64.slb[i].slbe = vcpu->arch.slb[i].orige;
521 sregs->u.s.ppc64.slb[i].slbv = vcpu->arch.slb[i].origv;
522 }
523
524 return 0;
525}
526
527int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
528 struct kvm_sregs *sregs)
529{
530 int i, j;
531
532 kvmppc_set_pvr(vcpu, sregs->pvr);
533
534 j = 0;
535 for (i = 0; i < vcpu->arch.slb_nr; i++) {
536 if (sregs->u.s.ppc64.slb[i].slbe & SLB_ESID_V) {
537 vcpu->arch.slb[j].orige = sregs->u.s.ppc64.slb[i].slbe;
538 vcpu->arch.slb[j].origv = sregs->u.s.ppc64.slb[i].slbv;
539 ++j;
540 }
541 }
542 vcpu->arch.slb_max = j;
543
544 return 0;
545}
546
Paul Mackerrasa136a8b2012-09-25 20:31:56 +0000547int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
Paul Mackerras31f34382011-12-12 12:26:50 +0000548{
Paul Mackerrasa136a8b2012-09-25 20:31:56 +0000549 int r = 0;
550 long int i;
Paul Mackerras31f34382011-12-12 12:26:50 +0000551
Paul Mackerrasa136a8b2012-09-25 20:31:56 +0000552 switch (id) {
Paul Mackerras31f34382011-12-12 12:26:50 +0000553 case KVM_REG_PPC_HIOR:
Paul Mackerrasa136a8b2012-09-25 20:31:56 +0000554 *val = get_reg_val(id, 0);
555 break;
556 case KVM_REG_PPC_DABR:
557 *val = get_reg_val(id, vcpu->arch.dabr);
558 break;
559 case KVM_REG_PPC_DSCR:
560 *val = get_reg_val(id, vcpu->arch.dscr);
561 break;
562 case KVM_REG_PPC_PURR:
563 *val = get_reg_val(id, vcpu->arch.purr);
564 break;
565 case KVM_REG_PPC_SPURR:
566 *val = get_reg_val(id, vcpu->arch.spurr);
567 break;
568 case KVM_REG_PPC_AMR:
569 *val = get_reg_val(id, vcpu->arch.amr);
570 break;
571 case KVM_REG_PPC_UAMOR:
572 *val = get_reg_val(id, vcpu->arch.uamor);
573 break;
574 case KVM_REG_PPC_MMCR0 ... KVM_REG_PPC_MMCRA:
575 i = id - KVM_REG_PPC_MMCR0;
576 *val = get_reg_val(id, vcpu->arch.mmcr[i]);
577 break;
578 case KVM_REG_PPC_PMC1 ... KVM_REG_PPC_PMC8:
579 i = id - KVM_REG_PPC_PMC1;
580 *val = get_reg_val(id, vcpu->arch.pmc[i]);
Paul Mackerras31f34382011-12-12 12:26:50 +0000581 break;
Paul Mackerrasa8bd19e2012-09-25 20:32:30 +0000582#ifdef CONFIG_VSX
583 case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31:
584 if (cpu_has_feature(CPU_FTR_VSX)) {
585 /* VSX => FP reg i is stored in arch.vsr[2*i] */
586 long int i = id - KVM_REG_PPC_FPR0;
587 *val = get_reg_val(id, vcpu->arch.vsr[2 * i]);
588 } else {
589 /* let generic code handle it */
590 r = -EINVAL;
591 }
592 break;
593 case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31:
594 if (cpu_has_feature(CPU_FTR_VSX)) {
595 long int i = id - KVM_REG_PPC_VSR0;
596 val->vsxval[0] = vcpu->arch.vsr[2 * i];
597 val->vsxval[1] = vcpu->arch.vsr[2 * i + 1];
598 } else {
599 r = -ENXIO;
600 }
601 break;
602#endif /* CONFIG_VSX */
Paul Mackerras31f34382011-12-12 12:26:50 +0000603 default:
Paul Mackerrasa136a8b2012-09-25 20:31:56 +0000604 r = -EINVAL;
Paul Mackerras31f34382011-12-12 12:26:50 +0000605 break;
606 }
607
608 return r;
609}
610
Paul Mackerrasa136a8b2012-09-25 20:31:56 +0000611int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
Paul Mackerras31f34382011-12-12 12:26:50 +0000612{
Paul Mackerrasa136a8b2012-09-25 20:31:56 +0000613 int r = 0;
614 long int i;
Paul Mackerras31f34382011-12-12 12:26:50 +0000615
Paul Mackerrasa136a8b2012-09-25 20:31:56 +0000616 switch (id) {
Paul Mackerras31f34382011-12-12 12:26:50 +0000617 case KVM_REG_PPC_HIOR:
Paul Mackerras31f34382011-12-12 12:26:50 +0000618 /* Only allow this to be set to zero */
Paul Mackerrasa136a8b2012-09-25 20:31:56 +0000619 if (set_reg_val(id, *val))
Paul Mackerras31f34382011-12-12 12:26:50 +0000620 r = -EINVAL;
621 break;
Paul Mackerrasa136a8b2012-09-25 20:31:56 +0000622 case KVM_REG_PPC_DABR:
623 vcpu->arch.dabr = set_reg_val(id, *val);
624 break;
625 case KVM_REG_PPC_DSCR:
626 vcpu->arch.dscr = set_reg_val(id, *val);
627 break;
628 case KVM_REG_PPC_PURR:
629 vcpu->arch.purr = set_reg_val(id, *val);
630 break;
631 case KVM_REG_PPC_SPURR:
632 vcpu->arch.spurr = set_reg_val(id, *val);
633 break;
634 case KVM_REG_PPC_AMR:
635 vcpu->arch.amr = set_reg_val(id, *val);
636 break;
637 case KVM_REG_PPC_UAMOR:
638 vcpu->arch.uamor = set_reg_val(id, *val);
639 break;
640 case KVM_REG_PPC_MMCR0 ... KVM_REG_PPC_MMCRA:
641 i = id - KVM_REG_PPC_MMCR0;
642 vcpu->arch.mmcr[i] = set_reg_val(id, *val);
643 break;
644 case KVM_REG_PPC_PMC1 ... KVM_REG_PPC_PMC8:
645 i = id - KVM_REG_PPC_PMC1;
646 vcpu->arch.pmc[i] = set_reg_val(id, *val);
647 break;
Paul Mackerrasa8bd19e2012-09-25 20:32:30 +0000648#ifdef CONFIG_VSX
649 case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31:
650 if (cpu_has_feature(CPU_FTR_VSX)) {
651 /* VSX => FP reg i is stored in arch.vsr[2*i] */
652 long int i = id - KVM_REG_PPC_FPR0;
653 vcpu->arch.vsr[2 * i] = set_reg_val(id, *val);
654 } else {
655 /* let generic code handle it */
656 r = -EINVAL;
657 }
658 break;
659 case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31:
660 if (cpu_has_feature(CPU_FTR_VSX)) {
661 long int i = id - KVM_REG_PPC_VSR0;
662 vcpu->arch.vsr[2 * i] = val->vsxval[0];
663 vcpu->arch.vsr[2 * i + 1] = val->vsxval[1];
664 } else {
665 r = -ENXIO;
666 }
667 break;
668#endif /* CONFIG_VSX */
Paul Mackerras31f34382011-12-12 12:26:50 +0000669 default:
Paul Mackerrasa136a8b2012-09-25 20:31:56 +0000670 r = -EINVAL;
Paul Mackerras31f34382011-12-12 12:26:50 +0000671 break;
672 }
673
674 return r;
675}
676
Paul Mackerrasde56a942011-06-29 00:21:34 +0000677int kvmppc_core_check_processor_compat(void)
678{
Paul Mackerras9e368f22011-06-29 00:40:08 +0000679 if (cpu_has_feature(CPU_FTR_HVMODE))
Paul Mackerrasde56a942011-06-29 00:21:34 +0000680 return 0;
681 return -EIO;
682}
683
684struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
685{
686 struct kvm_vcpu *vcpu;
Paul Mackerras371fefd2011-06-29 00:23:08 +0000687 int err = -EINVAL;
688 int core;
689 struct kvmppc_vcore *vcore;
Paul Mackerrasde56a942011-06-29 00:21:34 +0000690
Paul Mackerras371fefd2011-06-29 00:23:08 +0000691 core = id / threads_per_core;
692 if (core >= KVM_MAX_VCORES)
693 goto out;
694
695 err = -ENOMEM;
Sasha Levin6b75e6b2011-12-07 10:24:56 +0200696 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
Paul Mackerrasde56a942011-06-29 00:21:34 +0000697 if (!vcpu)
698 goto out;
699
700 err = kvm_vcpu_init(vcpu, kvm, id);
701 if (err)
702 goto free_vcpu;
703
704 vcpu->arch.shared = &vcpu->arch.shregs;
705 vcpu->arch.last_cpu = -1;
706 vcpu->arch.mmcr[0] = MMCR0_FC;
707 vcpu->arch.ctrl = CTRL_RUNLATCH;
708 /* default to host PVR, since we can't spoof it */
709 vcpu->arch.pvr = mfspr(SPRN_PVR);
710 kvmppc_set_pvr(vcpu, vcpu->arch.pvr);
Paul Mackerras2e25aa52012-02-19 17:46:32 +0000711 spin_lock_init(&vcpu->arch.vpa_update_lock);
Paul Mackerrasde56a942011-06-29 00:21:34 +0000712
Paul Mackerrasde56a942011-06-29 00:21:34 +0000713 kvmppc_mmu_book3s_hv_init(vcpu);
714
Paul Mackerras371fefd2011-06-29 00:23:08 +0000715 /*
Paul Mackerras19ccb762011-07-23 17:42:46 +1000716 * We consider the vcpu stopped until we see the first run ioctl for it.
Paul Mackerras371fefd2011-06-29 00:23:08 +0000717 */
Paul Mackerras19ccb762011-07-23 17:42:46 +1000718 vcpu->arch.state = KVMPPC_VCPU_STOPPED;
Paul Mackerras371fefd2011-06-29 00:23:08 +0000719
720 init_waitqueue_head(&vcpu->arch.cpu_run);
721
722 mutex_lock(&kvm->lock);
723 vcore = kvm->arch.vcores[core];
724 if (!vcore) {
725 vcore = kzalloc(sizeof(struct kvmppc_vcore), GFP_KERNEL);
726 if (vcore) {
727 INIT_LIST_HEAD(&vcore->runnable_threads);
728 spin_lock_init(&vcore->lock);
Paul Mackerras19ccb762011-07-23 17:42:46 +1000729 init_waitqueue_head(&vcore->wq);
Paul Mackerras0456ec42012-02-03 00:56:21 +0000730 vcore->preempt_tb = mftb();
Paul Mackerras371fefd2011-06-29 00:23:08 +0000731 }
732 kvm->arch.vcores[core] = vcore;
733 }
734 mutex_unlock(&kvm->lock);
735
736 if (!vcore)
737 goto free_vcpu;
738
739 spin_lock(&vcore->lock);
740 ++vcore->num_threads;
Paul Mackerras371fefd2011-06-29 00:23:08 +0000741 spin_unlock(&vcore->lock);
742 vcpu->arch.vcore = vcore;
Paul Mackerras0456ec42012-02-03 00:56:21 +0000743 vcpu->arch.stolen_logged = vcore->stolen_tb;
Paul Mackerras371fefd2011-06-29 00:23:08 +0000744
Alexander Grafaf8f38b2011-08-10 13:57:08 +0200745 vcpu->arch.cpu_type = KVM_CPU_3S_64;
746 kvmppc_sanity_check(vcpu);
747
Paul Mackerrasde56a942011-06-29 00:21:34 +0000748 return vcpu;
749
750free_vcpu:
Sasha Levin6b75e6b2011-12-07 10:24:56 +0200751 kmem_cache_free(kvm_vcpu_cache, vcpu);
Paul Mackerrasde56a942011-06-29 00:21:34 +0000752out:
753 return ERR_PTR(err);
754}
755
756void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
757{
Paul Mackerras2e25aa52012-02-19 17:46:32 +0000758 spin_lock(&vcpu->arch.vpa_update_lock);
759 if (vcpu->arch.dtl.pinned_addr)
760 kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.dtl.pinned_addr);
761 if (vcpu->arch.slb_shadow.pinned_addr)
762 kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.slb_shadow.pinned_addr);
763 if (vcpu->arch.vpa.pinned_addr)
764 kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.vpa.pinned_addr);
765 spin_unlock(&vcpu->arch.vpa_update_lock);
Paul Mackerrasde56a942011-06-29 00:21:34 +0000766 kvm_vcpu_uninit(vcpu);
Sasha Levin6b75e6b2011-12-07 10:24:56 +0200767 kmem_cache_free(kvm_vcpu_cache, vcpu);
Paul Mackerrasde56a942011-06-29 00:21:34 +0000768}
769
Paul Mackerras19ccb762011-07-23 17:42:46 +1000770static void kvmppc_set_timer(struct kvm_vcpu *vcpu)
Paul Mackerrasde56a942011-06-29 00:21:34 +0000771{
Paul Mackerras19ccb762011-07-23 17:42:46 +1000772 unsigned long dec_nsec, now;
Paul Mackerras371fefd2011-06-29 00:23:08 +0000773
Paul Mackerras19ccb762011-07-23 17:42:46 +1000774 now = get_tb();
775 if (now > vcpu->arch.dec_expires) {
776 /* decrementer has already gone negative */
777 kvmppc_core_queue_dec(vcpu);
Scott Wood7e28e60e2011-11-08 18:23:20 -0600778 kvmppc_core_prepare_to_enter(vcpu);
Paul Mackerras19ccb762011-07-23 17:42:46 +1000779 return;
Paul Mackerras371fefd2011-06-29 00:23:08 +0000780 }
Paul Mackerras19ccb762011-07-23 17:42:46 +1000781 dec_nsec = (vcpu->arch.dec_expires - now) * NSEC_PER_SEC
782 / tb_ticks_per_sec;
783 hrtimer_start(&vcpu->arch.dec_timer, ktime_set(0, dec_nsec),
784 HRTIMER_MODE_REL);
785 vcpu->arch.timer_running = 1;
Paul Mackerras371fefd2011-06-29 00:23:08 +0000786}
787
Paul Mackerras19ccb762011-07-23 17:42:46 +1000788static void kvmppc_end_cede(struct kvm_vcpu *vcpu)
Paul Mackerras371fefd2011-06-29 00:23:08 +0000789{
Paul Mackerras19ccb762011-07-23 17:42:46 +1000790 vcpu->arch.ceded = 0;
791 if (vcpu->arch.timer_running) {
792 hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
793 vcpu->arch.timer_running = 0;
794 }
Paul Mackerras371fefd2011-06-29 00:23:08 +0000795}
796
797extern int __kvmppc_vcore_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu);
798extern void xics_wake_cpu(int cpu);
799
800static void kvmppc_remove_runnable(struct kvmppc_vcore *vc,
801 struct kvm_vcpu *vcpu)
802{
Paul Mackerras371fefd2011-06-29 00:23:08 +0000803 if (vcpu->arch.state != KVMPPC_VCPU_RUNNABLE)
804 return;
805 vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
806 --vc->n_runnable;
Paul Mackerras19ccb762011-07-23 17:42:46 +1000807 ++vc->n_busy;
Paul Mackerras371fefd2011-06-29 00:23:08 +0000808 list_del(&vcpu->arch.run_list);
809}
810
Paul Mackerrasf0888f72012-02-03 00:54:17 +0000811static int kvmppc_grab_hwthread(int cpu)
812{
813 struct paca_struct *tpaca;
814 long timeout = 1000;
815
816 tpaca = &paca[cpu];
817
818 /* Ensure the thread won't go into the kernel if it wakes */
819 tpaca->kvm_hstate.hwthread_req = 1;
820
821 /*
822 * If the thread is already executing in the kernel (e.g. handling
823 * a stray interrupt), wait for it to get back to nap mode.
824 * The smp_mb() is to ensure that our setting of hwthread_req
825 * is visible before we look at hwthread_state, so if this
826 * races with the code at system_reset_pSeries and the thread
827 * misses our setting of hwthread_req, we are sure to see its
828 * setting of hwthread_state, and vice versa.
829 */
830 smp_mb();
831 while (tpaca->kvm_hstate.hwthread_state == KVM_HWTHREAD_IN_KERNEL) {
832 if (--timeout <= 0) {
833 pr_err("KVM: couldn't grab cpu %d\n", cpu);
834 return -EBUSY;
835 }
836 udelay(1);
837 }
838 return 0;
839}
840
841static void kvmppc_release_hwthread(int cpu)
842{
843 struct paca_struct *tpaca;
844
845 tpaca = &paca[cpu];
846 tpaca->kvm_hstate.hwthread_req = 0;
847 tpaca->kvm_hstate.kvm_vcpu = NULL;
848}
849
Paul Mackerras371fefd2011-06-29 00:23:08 +0000850static void kvmppc_start_thread(struct kvm_vcpu *vcpu)
851{
852 int cpu;
853 struct paca_struct *tpaca;
854 struct kvmppc_vcore *vc = vcpu->arch.vcore;
855
Paul Mackerras19ccb762011-07-23 17:42:46 +1000856 if (vcpu->arch.timer_running) {
857 hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
858 vcpu->arch.timer_running = 0;
859 }
Paul Mackerras371fefd2011-06-29 00:23:08 +0000860 cpu = vc->pcpu + vcpu->arch.ptid;
861 tpaca = &paca[cpu];
862 tpaca->kvm_hstate.kvm_vcpu = vcpu;
863 tpaca->kvm_hstate.kvm_vcore = vc;
Paul Mackerras19ccb762011-07-23 17:42:46 +1000864 tpaca->kvm_hstate.napping = 0;
865 vcpu->cpu = vc->pcpu;
Paul Mackerras371fefd2011-06-29 00:23:08 +0000866 smp_wmb();
Michael Neuling251da032011-11-10 16:03:20 +0000867#if defined(CONFIG_PPC_ICP_NATIVE) && defined(CONFIG_SMP)
Paul Mackerras371fefd2011-06-29 00:23:08 +0000868 if (vcpu->arch.ptid) {
Paul Mackerrasf0888f72012-02-03 00:54:17 +0000869 kvmppc_grab_hwthread(cpu);
Paul Mackerras371fefd2011-06-29 00:23:08 +0000870 xics_wake_cpu(cpu);
871 ++vc->n_woken;
872 }
873#endif
874}
875
876static void kvmppc_wait_for_nap(struct kvmppc_vcore *vc)
877{
878 int i;
879
880 HMT_low();
881 i = 0;
882 while (vc->nap_count < vc->n_woken) {
883 if (++i >= 1000000) {
884 pr_err("kvmppc_wait_for_nap timeout %d %d\n",
885 vc->nap_count, vc->n_woken);
886 break;
887 }
888 cpu_relax();
889 }
890 HMT_medium();
891}
892
893/*
894 * Check that we are on thread 0 and that any other threads in
895 * this core are off-line.
896 */
897static int on_primary_thread(void)
898{
899 int cpu = smp_processor_id();
900 int thr = cpu_thread_in_core(cpu);
901
902 if (thr)
903 return 0;
904 while (++thr < threads_per_core)
905 if (cpu_online(cpu + thr))
906 return 0;
907 return 1;
908}
909
910/*
911 * Run a set of guest threads on a physical core.
912 * Called with vc->lock held.
913 */
914static int kvmppc_run_core(struct kvmppc_vcore *vc)
915{
Paul Mackerras19ccb762011-07-23 17:42:46 +1000916 struct kvm_vcpu *vcpu, *vcpu0, *vnext;
Paul Mackerras371fefd2011-06-29 00:23:08 +0000917 long ret;
Paul Mackerrasde56a942011-06-29 00:21:34 +0000918 u64 now;
Paul Mackerras081f3232012-06-01 20:20:24 +1000919 int ptid, i, need_vpa_update;
Paul Mackerras2c9097e2012-09-11 13:27:01 +0000920 int srcu_idx;
Paul Mackerrasde56a942011-06-29 00:21:34 +0000921
Paul Mackerras371fefd2011-06-29 00:23:08 +0000922 /* don't start if any threads have a signal pending */
Paul Mackerras081f3232012-06-01 20:20:24 +1000923 need_vpa_update = 0;
924 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
Paul Mackerras371fefd2011-06-29 00:23:08 +0000925 if (signal_pending(vcpu->arch.run_task))
926 return 0;
Paul Mackerras081f3232012-06-01 20:20:24 +1000927 need_vpa_update |= vcpu->arch.vpa.update_pending |
928 vcpu->arch.slb_shadow.update_pending |
929 vcpu->arch.dtl.update_pending;
930 }
931
932 /*
933 * Initialize *vc, in particular vc->vcore_state, so we can
934 * drop the vcore lock if necessary.
935 */
936 vc->n_woken = 0;
937 vc->nap_count = 0;
938 vc->entry_exit_count = 0;
939 vc->vcore_state = VCORE_RUNNING;
940 vc->in_guest = 0;
941 vc->napping_threads = 0;
942
943 /*
944 * Updating any of the vpas requires calling kvmppc_pin_guest_page,
945 * which can't be called with any spinlocks held.
946 */
947 if (need_vpa_update) {
948 spin_unlock(&vc->lock);
949 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
950 kvmppc_update_vpas(vcpu);
951 spin_lock(&vc->lock);
952 }
Paul Mackerrasde56a942011-06-29 00:21:34 +0000953
954 /*
955 * Make sure we are running on thread 0, and that
956 * secondary threads are offline.
957 * XXX we should also block attempts to bring any
958 * secondary threads online.
959 */
Paul Mackerras371fefd2011-06-29 00:23:08 +0000960 if (threads_per_core > 1 && !on_primary_thread()) {
961 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
962 vcpu->arch.ret = -EBUSY;
963 goto out;
Paul Mackerrasde56a942011-06-29 00:21:34 +0000964 }
965
Paul Mackerras19ccb762011-07-23 17:42:46 +1000966 /*
967 * Assign physical thread IDs, first to non-ceded vcpus
968 * and then to ceded ones.
969 */
970 ptid = 0;
971 vcpu0 = NULL;
972 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
973 if (!vcpu->arch.ceded) {
974 if (!ptid)
975 vcpu0 = vcpu;
976 vcpu->arch.ptid = ptid++;
977 }
978 }
979 if (!vcpu0)
980 return 0; /* nothing to run */
981 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
982 if (vcpu->arch.ceded)
983 vcpu->arch.ptid = ptid++;
984
Paul Mackerras0456ec42012-02-03 00:56:21 +0000985 vc->stolen_tb += mftb() - vc->preempt_tb;
Paul Mackerras371fefd2011-06-29 00:23:08 +0000986 vc->pcpu = smp_processor_id();
Paul Mackerras2e25aa52012-02-19 17:46:32 +0000987 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
Paul Mackerras371fefd2011-06-29 00:23:08 +0000988 kvmppc_start_thread(vcpu);
Paul Mackerras0456ec42012-02-03 00:56:21 +0000989 kvmppc_create_dtl_entry(vcpu, vc);
Paul Mackerras2e25aa52012-02-19 17:46:32 +0000990 }
Paul Mackerrasf0888f72012-02-03 00:54:17 +0000991 /* Grab any remaining hw threads so they can't go into the kernel */
992 for (i = ptid; i < threads_per_core; ++i)
993 kvmppc_grab_hwthread(vc->pcpu + i);
Paul Mackerras371fefd2011-06-29 00:23:08 +0000994
995 preempt_disable();
Paul Mackerras19ccb762011-07-23 17:42:46 +1000996 spin_unlock(&vc->lock);
Paul Mackerrasde56a942011-06-29 00:21:34 +0000997
Paul Mackerras19ccb762011-07-23 17:42:46 +1000998 kvm_guest_enter();
Paul Mackerras2c9097e2012-09-11 13:27:01 +0000999
1000 srcu_idx = srcu_read_lock(&vcpu0->kvm->srcu);
1001
Paul Mackerras19ccb762011-07-23 17:42:46 +10001002 __kvmppc_vcore_entry(NULL, vcpu0);
Paul Mackerrasf0888f72012-02-03 00:54:17 +00001003 for (i = 0; i < threads_per_core; ++i)
1004 kvmppc_release_hwthread(vc->pcpu + i);
Paul Mackerras19ccb762011-07-23 17:42:46 +10001005
Paul Mackerras371fefd2011-06-29 00:23:08 +00001006 spin_lock(&vc->lock);
Paul Mackerras19ccb762011-07-23 17:42:46 +10001007 /* disable sending of IPIs on virtual external irqs */
1008 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
1009 vcpu->cpu = -1;
1010 /* wait for secondary threads to finish writing their state to memory */
Paul Mackerras371fefd2011-06-29 00:23:08 +00001011 if (vc->nap_count < vc->n_woken)
1012 kvmppc_wait_for_nap(vc);
1013 /* prevent other vcpu threads from doing kvmppc_start_thread() now */
Paul Mackerras19ccb762011-07-23 17:42:46 +10001014 vc->vcore_state = VCORE_EXITING;
Paul Mackerras371fefd2011-06-29 00:23:08 +00001015 spin_unlock(&vc->lock);
1016
Paul Mackerras2c9097e2012-09-11 13:27:01 +00001017 srcu_read_unlock(&vcpu0->kvm->srcu, srcu_idx);
1018
Paul Mackerras371fefd2011-06-29 00:23:08 +00001019 /* make sure updates to secondary vcpu structs are visible now */
1020 smp_mb();
Paul Mackerrasde56a942011-06-29 00:21:34 +00001021 kvm_guest_exit();
1022
1023 preempt_enable();
1024 kvm_resched(vcpu);
1025
1026 now = get_tb();
Paul Mackerras371fefd2011-06-29 00:23:08 +00001027 list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
1028 /* cancel pending dec exception if dec is positive */
1029 if (now < vcpu->arch.dec_expires &&
1030 kvmppc_core_pending_dec(vcpu))
1031 kvmppc_core_dequeue_dec(vcpu);
Paul Mackerras19ccb762011-07-23 17:42:46 +10001032
1033 ret = RESUME_GUEST;
1034 if (vcpu->arch.trap)
1035 ret = kvmppc_handle_exit(vcpu->arch.kvm_run, vcpu,
1036 vcpu->arch.run_task);
1037
Paul Mackerras371fefd2011-06-29 00:23:08 +00001038 vcpu->arch.ret = ret;
1039 vcpu->arch.trap = 0;
Paul Mackerras19ccb762011-07-23 17:42:46 +10001040
1041 if (vcpu->arch.ceded) {
1042 if (ret != RESUME_GUEST)
1043 kvmppc_end_cede(vcpu);
1044 else
1045 kvmppc_set_timer(vcpu);
1046 }
Paul Mackerras371fefd2011-06-29 00:23:08 +00001047 }
Paul Mackerrasde56a942011-06-29 00:21:34 +00001048
Paul Mackerras371fefd2011-06-29 00:23:08 +00001049 spin_lock(&vc->lock);
Paul Mackerrasde56a942011-06-29 00:21:34 +00001050 out:
Paul Mackerras19ccb762011-07-23 17:42:46 +10001051 vc->vcore_state = VCORE_INACTIVE;
Paul Mackerras0456ec42012-02-03 00:56:21 +00001052 vc->preempt_tb = mftb();
Paul Mackerras371fefd2011-06-29 00:23:08 +00001053 list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads,
1054 arch.run_list) {
1055 if (vcpu->arch.ret != RESUME_GUEST) {
1056 kvmppc_remove_runnable(vc, vcpu);
1057 wake_up(&vcpu->arch.cpu_run);
1058 }
1059 }
1060
1061 return 1;
1062}
1063
Paul Mackerras19ccb762011-07-23 17:42:46 +10001064/*
1065 * Wait for some other vcpu thread to execute us, and
1066 * wake us up when we need to handle something in the host.
1067 */
1068static void kvmppc_wait_for_exec(struct kvm_vcpu *vcpu, int wait_state)
Paul Mackerras371fefd2011-06-29 00:23:08 +00001069{
Paul Mackerras371fefd2011-06-29 00:23:08 +00001070 DEFINE_WAIT(wait);
1071
Paul Mackerras19ccb762011-07-23 17:42:46 +10001072 prepare_to_wait(&vcpu->arch.cpu_run, &wait, wait_state);
1073 if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE)
1074 schedule();
1075 finish_wait(&vcpu->arch.cpu_run, &wait);
1076}
Paul Mackerras371fefd2011-06-29 00:23:08 +00001077
Paul Mackerras19ccb762011-07-23 17:42:46 +10001078/*
1079 * All the vcpus in this vcore are idle, so wait for a decrementer
1080 * or external interrupt to one of the vcpus. vc->lock is held.
1081 */
1082static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
1083{
1084 DEFINE_WAIT(wait);
1085 struct kvm_vcpu *v;
1086 int all_idle = 1;
1087
1088 prepare_to_wait(&vc->wq, &wait, TASK_INTERRUPTIBLE);
1089 vc->vcore_state = VCORE_SLEEPING;
1090 spin_unlock(&vc->lock);
1091 list_for_each_entry(v, &vc->runnable_threads, arch.run_list) {
1092 if (!v->arch.ceded || v->arch.pending_exceptions) {
1093 all_idle = 0;
1094 break;
1095 }
1096 }
1097 if (all_idle)
1098 schedule();
1099 finish_wait(&vc->wq, &wait);
1100 spin_lock(&vc->lock);
1101 vc->vcore_state = VCORE_INACTIVE;
1102}
1103
1104static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
1105{
1106 int n_ceded;
1107 int prev_state;
1108 struct kvmppc_vcore *vc;
1109 struct kvm_vcpu *v, *vn;
Paul Mackerras9e368f22011-06-29 00:40:08 +00001110
Paul Mackerras371fefd2011-06-29 00:23:08 +00001111 kvm_run->exit_reason = 0;
1112 vcpu->arch.ret = RESUME_GUEST;
1113 vcpu->arch.trap = 0;
1114
Paul Mackerras371fefd2011-06-29 00:23:08 +00001115 /*
1116 * Synchronize with other threads in this virtual core
1117 */
1118 vc = vcpu->arch.vcore;
1119 spin_lock(&vc->lock);
Paul Mackerras19ccb762011-07-23 17:42:46 +10001120 vcpu->arch.ceded = 0;
Paul Mackerras371fefd2011-06-29 00:23:08 +00001121 vcpu->arch.run_task = current;
1122 vcpu->arch.kvm_run = kvm_run;
Paul Mackerras19ccb762011-07-23 17:42:46 +10001123 prev_state = vcpu->arch.state;
1124 vcpu->arch.state = KVMPPC_VCPU_RUNNABLE;
Paul Mackerras371fefd2011-06-29 00:23:08 +00001125 list_add_tail(&vcpu->arch.run_list, &vc->runnable_threads);
1126 ++vc->n_runnable;
1127
Paul Mackerras19ccb762011-07-23 17:42:46 +10001128 /*
1129 * This happens the first time this is called for a vcpu.
1130 * If the vcore is already running, we may be able to start
1131 * this thread straight away and have it join in.
1132 */
1133 if (prev_state == KVMPPC_VCPU_STOPPED) {
1134 if (vc->vcore_state == VCORE_RUNNING &&
1135 VCORE_EXIT_COUNT(vc) == 0) {
1136 vcpu->arch.ptid = vc->n_runnable - 1;
Paul Mackerras371fefd2011-06-29 00:23:08 +00001137 kvmppc_start_thread(vcpu);
Paul Mackerras19ccb762011-07-23 17:42:46 +10001138 }
Paul Mackerras371fefd2011-06-29 00:23:08 +00001139
Paul Mackerras19ccb762011-07-23 17:42:46 +10001140 } else if (prev_state == KVMPPC_VCPU_BUSY_IN_HOST)
1141 --vc->n_busy;
1142
1143 while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE &&
1144 !signal_pending(current)) {
1145 if (vc->n_busy || vc->vcore_state != VCORE_INACTIVE) {
1146 spin_unlock(&vc->lock);
1147 kvmppc_wait_for_exec(vcpu, TASK_INTERRUPTIBLE);
1148 spin_lock(&vc->lock);
1149 continue;
1150 }
Paul Mackerras0456ec42012-02-03 00:56:21 +00001151 vc->runner = vcpu;
Paul Mackerras19ccb762011-07-23 17:42:46 +10001152 n_ceded = 0;
1153 list_for_each_entry(v, &vc->runnable_threads, arch.run_list)
1154 n_ceded += v->arch.ceded;
1155 if (n_ceded == vc->n_runnable)
1156 kvmppc_vcore_blocked(vc);
1157 else
1158 kvmppc_run_core(vc);
1159
1160 list_for_each_entry_safe(v, vn, &vc->runnable_threads,
1161 arch.run_list) {
Scott Wood7e28e60e2011-11-08 18:23:20 -06001162 kvmppc_core_prepare_to_enter(v);
Paul Mackerras19ccb762011-07-23 17:42:46 +10001163 if (signal_pending(v->arch.run_task)) {
1164 kvmppc_remove_runnable(vc, v);
1165 v->stat.signal_exits++;
1166 v->arch.kvm_run->exit_reason = KVM_EXIT_INTR;
1167 v->arch.ret = -EINTR;
1168 wake_up(&v->arch.cpu_run);
1169 }
1170 }
Paul Mackerras0456ec42012-02-03 00:56:21 +00001171 vc->runner = NULL;
Paul Mackerras371fefd2011-06-29 00:23:08 +00001172 }
1173
Paul Mackerras19ccb762011-07-23 17:42:46 +10001174 if (signal_pending(current)) {
1175 if (vc->vcore_state == VCORE_RUNNING ||
1176 vc->vcore_state == VCORE_EXITING) {
1177 spin_unlock(&vc->lock);
1178 kvmppc_wait_for_exec(vcpu, TASK_UNINTERRUPTIBLE);
1179 spin_lock(&vc->lock);
1180 }
1181 if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) {
1182 kvmppc_remove_runnable(vc, vcpu);
1183 vcpu->stat.signal_exits++;
1184 kvm_run->exit_reason = KVM_EXIT_INTR;
1185 vcpu->arch.ret = -EINTR;
1186 }
1187 }
Paul Mackerras371fefd2011-06-29 00:23:08 +00001188
Paul Mackerras19ccb762011-07-23 17:42:46 +10001189 spin_unlock(&vc->lock);
Paul Mackerras371fefd2011-06-29 00:23:08 +00001190 return vcpu->arch.ret;
Paul Mackerrasde56a942011-06-29 00:21:34 +00001191}
1192
Paul Mackerrasa8606e22011-06-29 00:22:05 +00001193int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu)
1194{
1195 int r;
1196
Alexander Grafaf8f38b2011-08-10 13:57:08 +02001197 if (!vcpu->arch.sane) {
1198 run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
1199 return -EINVAL;
1200 }
1201
Scott Wood25051b5a2011-11-08 18:23:23 -06001202 kvmppc_core_prepare_to_enter(vcpu);
1203
Paul Mackerras19ccb762011-07-23 17:42:46 +10001204 /* No need to go into the guest when all we'll do is come back out */
1205 if (signal_pending(current)) {
1206 run->exit_reason = KVM_EXIT_INTR;
1207 return -EINTR;
1208 }
1209
Paul Mackerras32fad282012-05-04 02:32:53 +00001210 atomic_inc(&vcpu->kvm->arch.vcpus_running);
1211 /* Order vcpus_running vs. rma_setup_done, see kvmppc_alloc_reset_hpt */
1212 smp_mb();
1213
1214 /* On the first time here, set up HTAB and VRMA or RMA */
Paul Mackerrasc77162d2011-12-12 12:31:00 +00001215 if (!vcpu->kvm->arch.rma_setup_done) {
Paul Mackerras32fad282012-05-04 02:32:53 +00001216 r = kvmppc_hv_setup_htab_rma(vcpu);
Paul Mackerrasc77162d2011-12-12 12:31:00 +00001217 if (r)
Paul Mackerras32fad282012-05-04 02:32:53 +00001218 goto out;
Paul Mackerrasc77162d2011-12-12 12:31:00 +00001219 }
Paul Mackerras19ccb762011-07-23 17:42:46 +10001220
1221 flush_fp_to_thread(current);
1222 flush_altivec_to_thread(current);
1223 flush_vsx_to_thread(current);
1224 vcpu->arch.wqp = &vcpu->arch.vcore->wq;
Paul Mackerras342d3db2011-12-12 12:38:05 +00001225 vcpu->arch.pgdir = current->mm->pgd;
Paul Mackerras19ccb762011-07-23 17:42:46 +10001226
Paul Mackerrasa8606e22011-06-29 00:22:05 +00001227 do {
1228 r = kvmppc_run_vcpu(run, vcpu);
1229
1230 if (run->exit_reason == KVM_EXIT_PAPR_HCALL &&
1231 !(vcpu->arch.shregs.msr & MSR_PR)) {
1232 r = kvmppc_pseries_do_hcall(vcpu);
Scott Wood7e28e60e2011-11-08 18:23:20 -06001233 kvmppc_core_prepare_to_enter(vcpu);
Paul Mackerrasa8606e22011-06-29 00:22:05 +00001234 }
1235 } while (r == RESUME_GUEST);
Paul Mackerras32fad282012-05-04 02:32:53 +00001236
1237 out:
1238 atomic_dec(&vcpu->kvm->arch.vcpus_running);
Paul Mackerrasa8606e22011-06-29 00:22:05 +00001239 return r;
1240}
1241
David Gibson54738c02011-06-29 00:22:41 +00001242
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001243/* Work out RMLS (real mode limit selector) field value for a given RMA size.
Paul Mackerras9e368f22011-06-29 00:40:08 +00001244 Assumes POWER7 or PPC970. */
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001245static inline int lpcr_rmls(unsigned long rma_size)
1246{
1247 switch (rma_size) {
1248 case 32ul << 20: /* 32 MB */
Paul Mackerras9e368f22011-06-29 00:40:08 +00001249 if (cpu_has_feature(CPU_FTR_ARCH_206))
1250 return 8; /* only supported on POWER7 */
1251 return -1;
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001252 case 64ul << 20: /* 64 MB */
1253 return 3;
1254 case 128ul << 20: /* 128 MB */
1255 return 7;
1256 case 256ul << 20: /* 256 MB */
1257 return 4;
1258 case 1ul << 30: /* 1 GB */
1259 return 2;
1260 case 16ul << 30: /* 16 GB */
1261 return 1;
1262 case 256ul << 30: /* 256 GB */
1263 return 0;
1264 default:
1265 return -1;
1266 }
1267}
1268
1269static int kvm_rma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1270{
Alexander Grafb4e70612012-01-16 16:50:10 +01001271 struct kvmppc_linear_info *ri = vma->vm_file->private_data;
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001272 struct page *page;
1273
1274 if (vmf->pgoff >= ri->npages)
1275 return VM_FAULT_SIGBUS;
1276
1277 page = pfn_to_page(ri->base_pfn + vmf->pgoff);
1278 get_page(page);
1279 vmf->page = page;
1280 return 0;
1281}
1282
1283static const struct vm_operations_struct kvm_rma_vm_ops = {
1284 .fault = kvm_rma_fault,
1285};
1286
1287static int kvm_rma_mmap(struct file *file, struct vm_area_struct *vma)
1288{
1289 vma->vm_flags |= VM_RESERVED;
1290 vma->vm_ops = &kvm_rma_vm_ops;
1291 return 0;
1292}
1293
1294static int kvm_rma_release(struct inode *inode, struct file *filp)
1295{
Alexander Grafb4e70612012-01-16 16:50:10 +01001296 struct kvmppc_linear_info *ri = filp->private_data;
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001297
1298 kvm_release_rma(ri);
1299 return 0;
1300}
1301
1302static struct file_operations kvm_rma_fops = {
1303 .mmap = kvm_rma_mmap,
1304 .release = kvm_rma_release,
1305};
1306
1307long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret)
1308{
Alexander Grafb4e70612012-01-16 16:50:10 +01001309 struct kvmppc_linear_info *ri;
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001310 long fd;
1311
1312 ri = kvm_alloc_rma();
1313 if (!ri)
1314 return -ENOMEM;
1315
1316 fd = anon_inode_getfd("kvm-rma", &kvm_rma_fops, ri, O_RDWR);
1317 if (fd < 0)
1318 kvm_release_rma(ri);
1319
1320 ret->rma_size = ri->npages << PAGE_SHIFT;
1321 return fd;
1322}
1323
Benjamin Herrenschmidt5b747162012-04-26 19:43:42 +00001324static void kvmppc_add_seg_page_size(struct kvm_ppc_one_seg_page_size **sps,
1325 int linux_psize)
1326{
1327 struct mmu_psize_def *def = &mmu_psize_defs[linux_psize];
1328
1329 if (!def->shift)
1330 return;
1331 (*sps)->page_shift = def->shift;
1332 (*sps)->slb_enc = def->sllp;
1333 (*sps)->enc[0].page_shift = def->shift;
1334 (*sps)->enc[0].pte_enc = def->penc;
1335 (*sps)++;
1336}
1337
1338int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, struct kvm_ppc_smmu_info *info)
1339{
1340 struct kvm_ppc_one_seg_page_size *sps;
1341
1342 info->flags = KVM_PPC_PAGE_SIZES_REAL;
1343 if (mmu_has_feature(MMU_FTR_1T_SEGMENT))
1344 info->flags |= KVM_PPC_1T_SEGMENTS;
1345 info->slb_size = mmu_slb_size;
1346
1347 /* We only support these sizes for now, and no muti-size segments */
1348 sps = &info->sps[0];
1349 kvmppc_add_seg_page_size(&sps, MMU_PAGE_4K);
1350 kvmppc_add_seg_page_size(&sps, MMU_PAGE_64K);
1351 kvmppc_add_seg_page_size(&sps, MMU_PAGE_16M);
1352
1353 return 0;
1354}
1355
Paul Mackerras82ed3612011-12-15 02:03:22 +00001356/*
1357 * Get (and clear) the dirty memory log for a memory slot.
1358 */
1359int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
1360{
1361 struct kvm_memory_slot *memslot;
1362 int r;
1363 unsigned long n;
1364
1365 mutex_lock(&kvm->slots_lock);
1366
1367 r = -EINVAL;
1368 if (log->slot >= KVM_MEMORY_SLOTS)
1369 goto out;
1370
1371 memslot = id_to_memslot(kvm->memslots, log->slot);
1372 r = -ENOENT;
1373 if (!memslot->dirty_bitmap)
1374 goto out;
1375
1376 n = kvm_dirty_bitmap_bytes(memslot);
1377 memset(memslot->dirty_bitmap, 0, n);
1378
Paul Mackerrasdfe49db2012-09-11 13:28:18 +00001379 r = kvmppc_hv_get_dirty_log(kvm, memslot, memslot->dirty_bitmap);
Paul Mackerras82ed3612011-12-15 02:03:22 +00001380 if (r)
1381 goto out;
1382
1383 r = -EFAULT;
1384 if (copy_to_user(log->dirty_bitmap, memslot->dirty_bitmap, n))
1385 goto out;
1386
1387 r = 0;
1388out:
1389 mutex_unlock(&kvm->slots_lock);
1390 return r;
1391}
1392
Paul Mackerrasda9d1d72011-12-12 12:31:41 +00001393static unsigned long slb_pgsize_encoding(unsigned long psize)
1394{
1395 unsigned long senc = 0;
1396
1397 if (psize > 0x1000) {
1398 senc = SLB_VSID_L;
1399 if (psize == 0x10000)
1400 senc |= SLB_VSID_LP_01;
1401 }
1402 return senc;
1403}
1404
Paul Mackerrasa66b48c2012-09-11 13:27:46 +00001405static void unpin_slot(struct kvm_memory_slot *memslot)
Paul Mackerrasc77162d2011-12-12 12:31:00 +00001406{
1407 unsigned long *physp;
1408 unsigned long j, npages, pfn;
1409 struct page *page;
1410
Paul Mackerrasa66b48c2012-09-11 13:27:46 +00001411 physp = memslot->arch.slot_phys;
1412 npages = memslot->npages;
1413 if (!physp)
1414 return;
1415 for (j = 0; j < npages; j++) {
1416 if (!(physp[j] & KVMPPC_GOT_PAGE))
1417 continue;
1418 pfn = physp[j] >> PAGE_SHIFT;
1419 page = pfn_to_page(pfn);
1420 SetPageDirty(page);
1421 put_page(page);
Paul Mackerrasc77162d2011-12-12 12:31:00 +00001422 }
1423}
1424
Paul Mackerrasa66b48c2012-09-11 13:27:46 +00001425void kvmppc_core_free_memslot(struct kvm_memory_slot *free,
1426 struct kvm_memory_slot *dont)
1427{
1428 if (!dont || free->arch.rmap != dont->arch.rmap) {
1429 vfree(free->arch.rmap);
1430 free->arch.rmap = NULL;
1431 }
1432 if (!dont || free->arch.slot_phys != dont->arch.slot_phys) {
1433 unpin_slot(free);
1434 vfree(free->arch.slot_phys);
1435 free->arch.slot_phys = NULL;
1436 }
1437}
1438
1439int kvmppc_core_create_memslot(struct kvm_memory_slot *slot,
1440 unsigned long npages)
1441{
1442 slot->arch.rmap = vzalloc(npages * sizeof(*slot->arch.rmap));
1443 if (!slot->arch.rmap)
1444 return -ENOMEM;
1445 slot->arch.slot_phys = NULL;
1446
1447 return 0;
1448}
1449
1450int kvmppc_core_prepare_memory_region(struct kvm *kvm,
1451 struct kvm_memory_slot *memslot,
1452 struct kvm_userspace_memory_region *mem)
1453{
1454 unsigned long *phys;
1455
1456 /* Allocate a slot_phys array if needed */
1457 phys = memslot->arch.slot_phys;
1458 if (!kvm->arch.using_mmu_notifiers && !phys && memslot->npages) {
1459 phys = vzalloc(memslot->npages * sizeof(unsigned long));
1460 if (!phys)
1461 return -ENOMEM;
1462 memslot->arch.slot_phys = phys;
1463 }
1464
1465 return 0;
1466}
1467
Paul Mackerrasc77162d2011-12-12 12:31:00 +00001468void kvmppc_core_commit_memory_region(struct kvm *kvm,
Paul Mackerrasdfe49db2012-09-11 13:28:18 +00001469 struct kvm_userspace_memory_region *mem,
1470 struct kvm_memory_slot old)
Paul Mackerrasc77162d2011-12-12 12:31:00 +00001471{
Paul Mackerrasdfe49db2012-09-11 13:28:18 +00001472 unsigned long npages = mem->memory_size >> PAGE_SHIFT;
1473 struct kvm_memory_slot *memslot;
1474
1475 if (npages && old.npages) {
1476 /*
1477 * If modifying a memslot, reset all the rmap dirty bits.
1478 * If this is a new memslot, we don't need to do anything
1479 * since the rmap array starts out as all zeroes,
1480 * i.e. no pages are dirty.
1481 */
1482 memslot = id_to_memslot(kvm->memslots, mem->slot);
1483 kvmppc_hv_get_dirty_log(kvm, memslot, NULL);
1484 }
Paul Mackerrasc77162d2011-12-12 12:31:00 +00001485}
1486
Paul Mackerras32fad282012-05-04 02:32:53 +00001487static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
Paul Mackerrasc77162d2011-12-12 12:31:00 +00001488{
1489 int err = 0;
1490 struct kvm *kvm = vcpu->kvm;
Alexander Grafb4e70612012-01-16 16:50:10 +01001491 struct kvmppc_linear_info *ri = NULL;
Paul Mackerrasc77162d2011-12-12 12:31:00 +00001492 unsigned long hva;
1493 struct kvm_memory_slot *memslot;
1494 struct vm_area_struct *vma;
Paul Mackerrasda9d1d72011-12-12 12:31:41 +00001495 unsigned long lpcr, senc;
Paul Mackerrasc77162d2011-12-12 12:31:00 +00001496 unsigned long psize, porder;
1497 unsigned long rma_size;
1498 unsigned long rmls;
1499 unsigned long *physp;
Paul Mackerrasda9d1d72011-12-12 12:31:41 +00001500 unsigned long i, npages;
Paul Mackerras2c9097e2012-09-11 13:27:01 +00001501 int srcu_idx;
Paul Mackerrasc77162d2011-12-12 12:31:00 +00001502
1503 mutex_lock(&kvm->lock);
1504 if (kvm->arch.rma_setup_done)
1505 goto out; /* another vcpu beat us to it */
1506
Paul Mackerras32fad282012-05-04 02:32:53 +00001507 /* Allocate hashed page table (if not done already) and reset it */
1508 if (!kvm->arch.hpt_virt) {
1509 err = kvmppc_alloc_hpt(kvm, NULL);
1510 if (err) {
1511 pr_err("KVM: Couldn't alloc HPT\n");
1512 goto out;
1513 }
1514 }
1515
Paul Mackerrasc77162d2011-12-12 12:31:00 +00001516 /* Look up the memslot for guest physical address 0 */
Paul Mackerras2c9097e2012-09-11 13:27:01 +00001517 srcu_idx = srcu_read_lock(&kvm->srcu);
Paul Mackerrasc77162d2011-12-12 12:31:00 +00001518 memslot = gfn_to_memslot(kvm, 0);
1519
1520 /* We must have some memory at 0 by now */
1521 err = -EINVAL;
1522 if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
Paul Mackerras2c9097e2012-09-11 13:27:01 +00001523 goto out_srcu;
Paul Mackerrasc77162d2011-12-12 12:31:00 +00001524
1525 /* Look up the VMA for the start of this memory slot */
1526 hva = memslot->userspace_addr;
1527 down_read(&current->mm->mmap_sem);
1528 vma = find_vma(current->mm, hva);
1529 if (!vma || vma->vm_start > hva || (vma->vm_flags & VM_IO))
1530 goto up_out;
1531
1532 psize = vma_kernel_pagesize(vma);
Paul Mackerrasda9d1d72011-12-12 12:31:41 +00001533 porder = __ilog2(psize);
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001534
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001535 /* Is this one of our preallocated RMAs? */
Paul Mackerrasc77162d2011-12-12 12:31:00 +00001536 if (vma->vm_file && vma->vm_file->f_op == &kvm_rma_fops &&
1537 hva == vma->vm_start)
1538 ri = vma->vm_file->private_data;
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001539
Paul Mackerrasc77162d2011-12-12 12:31:00 +00001540 up_read(&current->mm->mmap_sem);
1541
1542 if (!ri) {
1543 /* On POWER7, use VRMA; on PPC970, give up */
1544 err = -EPERM;
1545 if (cpu_has_feature(CPU_FTR_ARCH_201)) {
1546 pr_err("KVM: CPU requires an RMO\n");
Paul Mackerras2c9097e2012-09-11 13:27:01 +00001547 goto out_srcu;
Paul Mackerras9e368f22011-06-29 00:40:08 +00001548 }
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001549
Paul Mackerrasda9d1d72011-12-12 12:31:41 +00001550 /* We can handle 4k, 64k or 16M pages in the VRMA */
1551 err = -EINVAL;
1552 if (!(psize == 0x1000 || psize == 0x10000 ||
1553 psize == 0x1000000))
Paul Mackerras2c9097e2012-09-11 13:27:01 +00001554 goto out_srcu;
Paul Mackerrasda9d1d72011-12-12 12:31:41 +00001555
Paul Mackerrasc77162d2011-12-12 12:31:00 +00001556 /* Update VRMASD field in the LPCR */
Paul Mackerrasda9d1d72011-12-12 12:31:41 +00001557 senc = slb_pgsize_encoding(psize);
Paul Mackerras697d3892011-12-12 12:36:37 +00001558 kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T |
1559 (VRMA_VSID << SLB_VSID_SHIFT_1T);
Paul Mackerrasda9d1d72011-12-12 12:31:41 +00001560 lpcr = kvm->arch.lpcr & ~LPCR_VRMASD;
1561 lpcr |= senc << (LPCR_VRMASD_SH - 4);
Paul Mackerrasc77162d2011-12-12 12:31:00 +00001562 kvm->arch.lpcr = lpcr;
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001563
Paul Mackerrasc77162d2011-12-12 12:31:00 +00001564 /* Create HPTEs in the hash page table for the VRMA */
Paul Mackerrasda9d1d72011-12-12 12:31:41 +00001565 kvmppc_map_vrma(vcpu, memslot, porder);
Paul Mackerrasc77162d2011-12-12 12:31:00 +00001566
1567 } else {
1568 /* Set up to use an RMO region */
1569 rma_size = ri->npages;
1570 if (rma_size > memslot->npages)
1571 rma_size = memslot->npages;
1572 rma_size <<= PAGE_SHIFT;
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001573 rmls = lpcr_rmls(rma_size);
Paul Mackerrasc77162d2011-12-12 12:31:00 +00001574 err = -EINVAL;
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001575 if (rmls < 0) {
Paul Mackerrasc77162d2011-12-12 12:31:00 +00001576 pr_err("KVM: Can't use RMA of 0x%lx bytes\n", rma_size);
Paul Mackerras2c9097e2012-09-11 13:27:01 +00001577 goto out_srcu;
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001578 }
1579 atomic_inc(&ri->use_count);
1580 kvm->arch.rma = ri;
Paul Mackerras9e368f22011-06-29 00:40:08 +00001581
1582 /* Update LPCR and RMOR */
1583 lpcr = kvm->arch.lpcr;
1584 if (cpu_has_feature(CPU_FTR_ARCH_201)) {
1585 /* PPC970; insert RMLS value (split field) in HID4 */
1586 lpcr &= ~((1ul << HID4_RMLS0_SH) |
1587 (3ul << HID4_RMLS2_SH));
1588 lpcr |= ((rmls >> 2) << HID4_RMLS0_SH) |
1589 ((rmls & 3) << HID4_RMLS2_SH);
1590 /* RMOR is also in HID4 */
1591 lpcr |= ((ri->base_pfn >> (26 - PAGE_SHIFT)) & 0xffff)
1592 << HID4_RMOR_SH;
1593 } else {
1594 /* POWER7 */
1595 lpcr &= ~(LPCR_VPM0 | LPCR_VRMA_L);
1596 lpcr |= rmls << LPCR_RMLS_SH;
1597 kvm->arch.rmor = kvm->arch.rma->base_pfn << PAGE_SHIFT;
1598 }
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001599 kvm->arch.lpcr = lpcr;
Paul Mackerrasc77162d2011-12-12 12:31:00 +00001600 pr_info("KVM: Using RMO at %lx size %lx (LPCR = %lx)\n",
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001601 ri->base_pfn << PAGE_SHIFT, rma_size, lpcr);
Paul Mackerrasc77162d2011-12-12 12:31:00 +00001602
1603 /* Initialize phys addrs of pages in RMO */
Paul Mackerrasda9d1d72011-12-12 12:31:41 +00001604 npages = ri->npages;
1605 porder = __ilog2(npages);
Paul Mackerrasa66b48c2012-09-11 13:27:46 +00001606 physp = memslot->arch.slot_phys;
1607 if (physp) {
1608 if (npages > memslot->npages)
1609 npages = memslot->npages;
1610 spin_lock(&kvm->arch.slot_phys_lock);
1611 for (i = 0; i < npages; ++i)
1612 physp[i] = ((ri->base_pfn + i) << PAGE_SHIFT) +
1613 porder;
1614 spin_unlock(&kvm->arch.slot_phys_lock);
1615 }
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001616 }
1617
Paul Mackerrasc77162d2011-12-12 12:31:00 +00001618 /* Order updates to kvm->arch.lpcr etc. vs. rma_setup_done */
1619 smp_wmb();
1620 kvm->arch.rma_setup_done = 1;
1621 err = 0;
Paul Mackerras2c9097e2012-09-11 13:27:01 +00001622 out_srcu:
1623 srcu_read_unlock(&kvm->srcu, srcu_idx);
Paul Mackerrasc77162d2011-12-12 12:31:00 +00001624 out:
1625 mutex_unlock(&kvm->lock);
1626 return err;
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001627
Paul Mackerrasc77162d2011-12-12 12:31:00 +00001628 up_out:
1629 up_read(&current->mm->mmap_sem);
1630 goto out;
Paul Mackerrasde56a942011-06-29 00:21:34 +00001631}
1632
1633int kvmppc_core_init_vm(struct kvm *kvm)
1634{
Paul Mackerras32fad282012-05-04 02:32:53 +00001635 unsigned long lpcr, lpid;
Paul Mackerrasde56a942011-06-29 00:21:34 +00001636
Paul Mackerras32fad282012-05-04 02:32:53 +00001637 /* Allocate the guest's logical partition ID */
1638
1639 lpid = kvmppc_alloc_lpid();
1640 if (lpid < 0)
1641 return -ENOMEM;
1642 kvm->arch.lpid = lpid;
Paul Mackerrasde56a942011-06-29 00:21:34 +00001643
David Gibson54738c02011-06-29 00:22:41 +00001644 INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables);
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001645
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001646 kvm->arch.rma = NULL;
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001647
Paul Mackerras9e368f22011-06-29 00:40:08 +00001648 kvm->arch.host_sdr1 = mfspr(SPRN_SDR1);
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001649
Paul Mackerras9e368f22011-06-29 00:40:08 +00001650 if (cpu_has_feature(CPU_FTR_ARCH_201)) {
1651 /* PPC970; HID4 is effectively the LPCR */
Paul Mackerras9e368f22011-06-29 00:40:08 +00001652 kvm->arch.host_lpid = 0;
1653 kvm->arch.host_lpcr = lpcr = mfspr(SPRN_HID4);
1654 lpcr &= ~((3 << HID4_LPID1_SH) | (0xful << HID4_LPID5_SH));
1655 lpcr |= ((lpid >> 4) << HID4_LPID1_SH) |
1656 ((lpid & 0xf) << HID4_LPID5_SH);
1657 } else {
1658 /* POWER7; init LPCR for virtual RMA mode */
1659 kvm->arch.host_lpid = mfspr(SPRN_LPID);
1660 kvm->arch.host_lpcr = lpcr = mfspr(SPRN_LPCR);
1661 lpcr &= LPCR_PECE | LPCR_LPES;
1662 lpcr |= (4UL << LPCR_DPFD_SH) | LPCR_HDICE |
Paul Mackerras697d3892011-12-12 12:36:37 +00001663 LPCR_VPM0 | LPCR_VPM1;
1664 kvm->arch.vrma_slb_v = SLB_VSID_B_1T |
1665 (VRMA_VSID << SLB_VSID_SHIFT_1T);
Paul Mackerras9e368f22011-06-29 00:40:08 +00001666 }
1667 kvm->arch.lpcr = lpcr;
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001668
Paul Mackerras342d3db2011-12-12 12:38:05 +00001669 kvm->arch.using_mmu_notifiers = !!cpu_has_feature(CPU_FTR_ARCH_206);
Paul Mackerrasc77162d2011-12-12 12:31:00 +00001670 spin_lock_init(&kvm->arch.slot_phys_lock);
David Gibson54738c02011-06-29 00:22:41 +00001671 return 0;
Paul Mackerrasde56a942011-06-29 00:21:34 +00001672}
1673
1674void kvmppc_core_destroy_vm(struct kvm *kvm)
1675{
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00001676 if (kvm->arch.rma) {
1677 kvm_release_rma(kvm->arch.rma);
1678 kvm->arch.rma = NULL;
1679 }
1680
Paul Mackerrasde56a942011-06-29 00:21:34 +00001681 kvmppc_free_hpt(kvm);
David Gibson54738c02011-06-29 00:22:41 +00001682 WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables));
Paul Mackerrasde56a942011-06-29 00:21:34 +00001683}
1684
1685/* These are stubs for now */
1686void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong pa_start, ulong pa_end)
1687{
1688}
1689
1690/* We don't need to emulate any privileged instructions or dcbz */
1691int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
1692 unsigned int inst, int *advance)
1693{
1694 return EMULATE_FAIL;
1695}
1696
Alexander Graf54771e62012-05-04 14:55:12 +02001697int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
Paul Mackerrasde56a942011-06-29 00:21:34 +00001698{
1699 return EMULATE_FAIL;
1700}
1701
Alexander Graf54771e62012-05-04 14:55:12 +02001702int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
Paul Mackerrasde56a942011-06-29 00:21:34 +00001703{
1704 return EMULATE_FAIL;
1705}
1706
1707static int kvmppc_book3s_hv_init(void)
1708{
1709 int r;
1710
1711 r = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
1712
1713 if (r)
1714 return r;
1715
1716 r = kvmppc_mmu_hv_init();
1717
1718 return r;
1719}
1720
1721static void kvmppc_book3s_hv_exit(void)
1722{
1723 kvm_exit();
1724}
1725
1726module_init(kvmppc_book3s_hv_init);
1727module_exit(kvmppc_book3s_hv_exit);