blob: 96b92d4adb876d1984a6493ef9a5829188360ef6 [file] [log] [blame]
Paul Mackerrasde56a942011-06-29 00:21:34 +00001/*
2 * Copyright 2011 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
3 * Copyright (C) 2009. SUSE Linux Products GmbH. All rights reserved.
4 *
5 * Authors:
6 * Paul Mackerras <paulus@au1.ibm.com>
7 * Alexander Graf <agraf@suse.de>
8 * Kevin Wolf <mail@kevin-wolf.de>
9 *
10 * Description: KVM functions specific to running on Book 3S
11 * processors in hypervisor mode (specifically POWER7 and later).
12 *
13 * This file is derived from arch/powerpc/kvm/book3s.c,
14 * by Alexander Graf <agraf@suse.de>.
15 *
16 * This program is free software; you can redistribute it and/or modify
17 * it under the terms of the GNU General Public License, version 2, as
18 * published by the Free Software Foundation.
19 */
20
21#include <linux/kvm_host.h>
Thomas Meyer4bb817ed2017-09-03 14:19:31 +020022#include <linux/kernel.h>
Paul Mackerrasde56a942011-06-29 00:21:34 +000023#include <linux/err.h>
24#include <linux/slab.h>
25#include <linux/preempt.h>
Ingo Molnar174cd4b2017-02-02 19:15:33 +010026#include <linux/sched/signal.h>
Ingo Molnar03441a32017-02-08 18:51:35 +010027#include <linux/sched/stat.h>
Paul Mackerrasde56a942011-06-29 00:21:34 +000028#include <linux/delay.h>
Paul Gortmaker66b15db2011-05-27 10:46:24 -040029#include <linux/export.h>
Paul Mackerrasde56a942011-06-29 00:21:34 +000030#include <linux/fs.h>
31#include <linux/anon_inodes.h>
Gavin Shan07f8ab252016-05-11 11:15:55 +100032#include <linux/cpu.h>
Paul Mackerrasde56a942011-06-29 00:21:34 +000033#include <linux/cpumask.h>
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +000034#include <linux/spinlock.h>
35#include <linux/page-flags.h>
Paul Mackerras2c9097e2012-09-11 13:27:01 +000036#include <linux/srcu.h>
Alexander Graf398a76c2013-12-09 13:53:42 +010037#include <linux/miscdevice.h>
Paul Mackerrase23a8082015-03-28 14:21:01 +110038#include <linux/debugfs.h>
Benjamin Herrenschmidtd3989142017-04-05 17:54:51 +100039#include <linux/gfp.h>
40#include <linux/vmalloc.h>
41#include <linux/highmem.h>
42#include <linux/hugetlb.h>
43#include <linux/kvm_irqfd.h>
44#include <linux/irqbypass.h>
45#include <linux/module.h>
46#include <linux/compiler.h>
47#include <linux/of.h>
Paul Mackerrasde56a942011-06-29 00:21:34 +000048
49#include <asm/reg.h>
Paul Mackerras57900692017-05-16 16:41:20 +100050#include <asm/ppc-opcode.h>
51#include <asm/disassemble.h>
Paul Mackerrasde56a942011-06-29 00:21:34 +000052#include <asm/cputable.h>
53#include <asm/cacheflush.h>
54#include <asm/tlbflush.h>
Linus Torvalds7c0f6ba2016-12-24 11:46:01 -080055#include <linux/uaccess.h>
Paul Mackerrasde56a942011-06-29 00:21:34 +000056#include <asm/io.h>
57#include <asm/kvm_ppc.h>
58#include <asm/kvm_book3s.h>
59#include <asm/mmu_context.h>
60#include <asm/lppaca.h>
61#include <asm/processor.h>
Paul Mackerras371fefd2011-06-29 00:23:08 +000062#include <asm/cputhreads.h>
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +000063#include <asm/page.h>
Michael Neulingde1d9242011-11-09 20:39:49 +000064#include <asm/hvcall.h>
David Howellsae3a1972012-03-28 18:30:02 +010065#include <asm/switch_to.h>
Paul Mackerras512691d2012-10-15 01:15:41 +000066#include <asm/smp.h>
Paul Mackerras66feed62015-03-28 14:21:12 +110067#include <asm/dbell.h>
Mahesh Salgaonkarfd7bacb2016-05-15 09:44:26 +053068#include <asm/hmi.h>
Suresh Warrierc57875f2016-08-19 15:35:50 +100069#include <asm/pnv-pci.h>
Paul Mackerras7a840842016-11-16 22:25:20 +110070#include <asm/mmu.h>
Paul Mackerrasf7257582016-11-18 09:02:08 +110071#include <asm/opal.h>
72#include <asm/xics.h>
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +100073#include <asm/xive.h>
Paul Mackerrasde56a942011-06-29 00:21:34 +000074
Aneesh Kumar K.V3a167bea2013-10-07 22:17:53 +053075#include "book3s.h"
76
Suresh E. Warrier3c78f782014-12-03 18:48:10 -060077#define CREATE_TRACE_POINTS
78#include "trace_hv.h"
79
Paul Mackerrasde56a942011-06-29 00:21:34 +000080/* #define EXIT_DEBUG */
81/* #define EXIT_DEBUG_SIMPLE */
82/* #define EXIT_DEBUG_INT */
83
Paul Mackerras913d3ff9a2012-10-15 01:16:48 +000084/* Used to indicate that a guest page fault needs to be handled */
85#define RESUME_PAGE_FAULT (RESUME_GUEST | RESUME_FLAG_ARCH1)
Suresh Warrierf7af5202016-08-19 15:35:52 +100086/* Used to indicate that a guest passthrough interrupt needs to be handled */
87#define RESUME_PASSTHROUGH (RESUME_GUEST | RESUME_FLAG_ARCH2)
Paul Mackerras913d3ff9a2012-10-15 01:16:48 +000088
Paul Mackerrasc7b67672012-10-15 01:18:07 +000089/* Used as a "null" value for timebase values */
90#define TB_NIL (~(u64)0)
91
Paul Mackerras699a0ea2014-06-02 11:02:59 +100092static DECLARE_BITMAP(default_enabled_hcalls, MAX_HCALL_OPCODE/4 + 1);
93
Paul Mackerrasb4deba52015-07-02 20:38:16 +100094static int dynamic_mt_modes = 6;
95module_param(dynamic_mt_modes, int, S_IRUGO | S_IWUSR);
96MODULE_PARM_DESC(dynamic_mt_modes, "Set of allowed dynamic micro-threading modes: 0 (= none), 2, 4, or 6 (= 2 or 4)");
Paul Mackerrasec257162015-06-24 21:18:03 +100097static int target_smt_mode;
98module_param(target_smt_mode, int, S_IRUGO | S_IWUSR);
99MODULE_PARM_DESC(target_smt_mode, "Target threads per core (0 = max)");
Stewart Smith9678cda2014-07-18 14:18:43 +1000100
Suresh E. Warrier520fe9c2015-12-21 16:33:57 -0600101#ifdef CONFIG_KVM_XICS
102static struct kernel_param_ops module_param_ops = {
103 .set = param_set_int,
104 .get = param_get_int,
105};
106
Suresh Warrier644abbb2016-08-19 15:35:54 +1000107module_param_cb(kvm_irq_bypass, &module_param_ops, &kvm_irq_bypass,
108 S_IRUGO | S_IWUSR);
109MODULE_PARM_DESC(kvm_irq_bypass, "Bypass passthrough interrupt optimization");
110
Suresh E. Warrier520fe9c2015-12-21 16:33:57 -0600111module_param_cb(h_ipi_redirect, &module_param_ops, &h_ipi_redirect,
112 S_IRUGO | S_IWUSR);
113MODULE_PARM_DESC(h_ipi_redirect, "Redirect H_IPI wakeup to a free host core");
114#endif
115
Paul Mackerras19ccb762011-07-23 17:42:46 +1000116static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
Paul Mackerras32fad282012-05-04 02:32:53 +0000117static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
Paul Mackerras1b151ce2017-09-13 15:53:48 +1000118static void kvmppc_setup_partition_table(struct kvm *kvm);
Paul Mackerras19ccb762011-07-23 17:42:46 +1000119
Suraj Jitindar Singh7b5f8272016-08-02 14:03:20 +1000120static inline struct kvm_vcpu *next_runnable_thread(struct kvmppc_vcore *vc,
121 int *ip)
122{
123 int i = *ip;
124 struct kvm_vcpu *vcpu;
125
126 while (++i < MAX_SMT_THREADS) {
127 vcpu = READ_ONCE(vc->runnable_threads[i]);
128 if (vcpu) {
129 *ip = i;
130 return vcpu;
131 }
132 }
133 return NULL;
134}
135
136/* Used to traverse the list of runnable threads for a given vcore */
137#define for_each_runnable_thread(i, vcpu, vc) \
138 for (i = -1; (vcpu = next_runnable_thread(vc, &i)); )
139
Paul Mackerras66feed62015-03-28 14:21:12 +1100140static bool kvmppc_ipi_thread(int cpu)
141{
Paul Mackerras1704a812016-11-18 08:47:08 +1100142 unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER);
143
144 /* On POWER9 we can use msgsnd to IPI any cpu */
145 if (cpu_has_feature(CPU_FTR_ARCH_300)) {
146 msg |= get_hard_smp_processor_id(cpu);
147 smp_mb();
148 __asm__ __volatile__ (PPC_MSGSND(%0) : : "r" (msg));
149 return true;
150 }
151
Paul Mackerras66feed62015-03-28 14:21:12 +1100152 /* On POWER8 for IPIs to threads in the same core, use msgsnd */
153 if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
154 preempt_disable();
155 if (cpu_first_thread_sibling(cpu) ==
156 cpu_first_thread_sibling(smp_processor_id())) {
Paul Mackerras66feed62015-03-28 14:21:12 +1100157 msg |= cpu_thread_in_core(cpu);
158 smp_mb();
159 __asm__ __volatile__ (PPC_MSGSND(%0) : : "r" (msg));
160 preempt_enable();
161 return true;
162 }
163 preempt_enable();
164 }
165
166#if defined(CONFIG_PPC_ICP_NATIVE) && defined(CONFIG_SMP)
Paul Mackerrasf7257582016-11-18 09:02:08 +1100167 if (cpu >= 0 && cpu < nr_cpu_ids) {
168 if (paca[cpu].kvm_hstate.xics_phys) {
169 xics_wake_cpu(cpu);
170 return true;
171 }
172 opal_int_set_mfrr(get_hard_smp_processor_id(cpu), IPI_PRIORITY);
Paul Mackerras66feed62015-03-28 14:21:12 +1100173 return true;
174 }
175#endif
176
177 return false;
178}
179
Aneesh Kumar K.V3a167bea2013-10-07 22:17:53 +0530180static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
Benjamin Herrenschmidt54695c32013-04-17 20:30:50 +0000181{
Paul Mackerrasec257162015-06-24 21:18:03 +1000182 int cpu;
Marcelo Tosatti85773702016-02-19 09:46:39 +0100183 struct swait_queue_head *wqp;
Benjamin Herrenschmidt54695c32013-04-17 20:30:50 +0000184
185 wqp = kvm_arch_vcpu_wq(vcpu);
Davidlohr Bueso267ad7b2017-09-13 13:08:23 -0700186 if (swq_has_sleeper(wqp)) {
Marcelo Tosatti85773702016-02-19 09:46:39 +0100187 swake_up(wqp);
Benjamin Herrenschmidt54695c32013-04-17 20:30:50 +0000188 ++vcpu->stat.halt_wakeup;
189 }
190
Paul Mackerras3deda5e2016-12-20 14:02:29 +1100191 cpu = READ_ONCE(vcpu->arch.thread_cpu);
192 if (cpu >= 0 && kvmppc_ipi_thread(cpu))
Paul Mackerras66feed62015-03-28 14:21:12 +1100193 return;
Benjamin Herrenschmidt54695c32013-04-17 20:30:50 +0000194
195 /* CPU points to the first thread of the core */
Paul Mackerrasec257162015-06-24 21:18:03 +1000196 cpu = vcpu->cpu;
Paul Mackerras66feed62015-03-28 14:21:12 +1100197 if (cpu >= 0 && cpu < nr_cpu_ids && cpu_online(cpu))
198 smp_send_reschedule(cpu);
Benjamin Herrenschmidt54695c32013-04-17 20:30:50 +0000199}
200
Paul Mackerrasc7b67672012-10-15 01:18:07 +0000201/*
202 * We use the vcpu_load/put functions to measure stolen time.
203 * Stolen time is counted as time when either the vcpu is able to
204 * run as part of a virtual core, but the task running the vcore
205 * is preempted or sleeping, or when the vcpu needs something done
206 * in the kernel by the task running the vcpu, but that task is
207 * preempted or sleeping. Those two things have to be counted
208 * separately, since one of the vcpu tasks will take on the job
209 * of running the core, and the other vcpu tasks in the vcore will
210 * sleep waiting for it to do that, but that sleep shouldn't count
211 * as stolen time.
212 *
213 * Hence we accumulate stolen time when the vcpu can run as part of
214 * a vcore using vc->stolen_tb, and the stolen time when the vcpu
215 * needs its task to do other things in the kernel (for example,
216 * service a page fault) in busy_stolen. We don't accumulate
217 * stolen time for a vcore when it is inactive, or for a vcpu
218 * when it is in state RUNNING or NOTREADY. NOTREADY is a bit of
219 * a misnomer; it means that the vcpu task is not executing in
220 * the KVM_VCPU_RUN ioctl, i.e. it is in userspace or elsewhere in
221 * the kernel. We don't have any way of dividing up that time
222 * between time that the vcpu is genuinely stopped, time that
223 * the task is actively working on behalf of the vcpu, and time
224 * that the task is preempted, so we don't count any of it as
225 * stolen.
226 *
227 * Updates to busy_stolen are protected by arch.tbacct_lock;
Paul Mackerras2711e242014-12-04 16:43:28 +1100228 * updates to vc->stolen_tb are protected by the vcore->stoltb_lock
229 * lock. The stolen times are measured in units of timebase ticks.
230 * (Note that the != TB_NIL checks below are purely defensive;
231 * they should never fail.)
Paul Mackerrasc7b67672012-10-15 01:18:07 +0000232 */
233
Paul Mackerrasec257162015-06-24 21:18:03 +1000234static void kvmppc_core_start_stolen(struct kvmppc_vcore *vc)
235{
236 unsigned long flags;
237
238 spin_lock_irqsave(&vc->stoltb_lock, flags);
239 vc->preempt_tb = mftb();
240 spin_unlock_irqrestore(&vc->stoltb_lock, flags);
241}
242
243static void kvmppc_core_end_stolen(struct kvmppc_vcore *vc)
244{
245 unsigned long flags;
246
247 spin_lock_irqsave(&vc->stoltb_lock, flags);
248 if (vc->preempt_tb != TB_NIL) {
249 vc->stolen_tb += mftb() - vc->preempt_tb;
250 vc->preempt_tb = TB_NIL;
251 }
252 spin_unlock_irqrestore(&vc->stoltb_lock, flags);
253}
254
Aneesh Kumar K.V3a167bea2013-10-07 22:17:53 +0530255static void kvmppc_core_vcpu_load_hv(struct kvm_vcpu *vcpu, int cpu)
Paul Mackerrasde56a942011-06-29 00:21:34 +0000256{
Paul Mackerras0456ec42012-02-03 00:56:21 +0000257 struct kvmppc_vcore *vc = vcpu->arch.vcore;
Paul Mackerrasbf3d32e2013-11-16 17:46:04 +1100258 unsigned long flags;
Paul Mackerras0456ec42012-02-03 00:56:21 +0000259
Paul Mackerras2711e242014-12-04 16:43:28 +1100260 /*
261 * We can test vc->runner without taking the vcore lock,
262 * because only this task ever sets vc->runner to this
263 * vcpu, and once it is set to this vcpu, only this task
264 * ever sets it to NULL.
265 */
Paul Mackerrasec257162015-06-24 21:18:03 +1000266 if (vc->runner == vcpu && vc->vcore_state >= VCORE_SLEEPING)
267 kvmppc_core_end_stolen(vc);
268
Paul Mackerras2711e242014-12-04 16:43:28 +1100269 spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
Paul Mackerrasc7b67672012-10-15 01:18:07 +0000270 if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST &&
271 vcpu->arch.busy_preempt != TB_NIL) {
272 vcpu->arch.busy_stolen += mftb() - vcpu->arch.busy_preempt;
273 vcpu->arch.busy_preempt = TB_NIL;
274 }
Paul Mackerrasbf3d32e2013-11-16 17:46:04 +1100275 spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags);
Paul Mackerrasde56a942011-06-29 00:21:34 +0000276}
277
Aneesh Kumar K.V3a167bea2013-10-07 22:17:53 +0530278static void kvmppc_core_vcpu_put_hv(struct kvm_vcpu *vcpu)
Paul Mackerrasde56a942011-06-29 00:21:34 +0000279{
Paul Mackerras0456ec42012-02-03 00:56:21 +0000280 struct kvmppc_vcore *vc = vcpu->arch.vcore;
Paul Mackerrasbf3d32e2013-11-16 17:46:04 +1100281 unsigned long flags;
Paul Mackerras0456ec42012-02-03 00:56:21 +0000282
Paul Mackerrasec257162015-06-24 21:18:03 +1000283 if (vc->runner == vcpu && vc->vcore_state >= VCORE_SLEEPING)
284 kvmppc_core_start_stolen(vc);
285
Paul Mackerras2711e242014-12-04 16:43:28 +1100286 spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
Paul Mackerrasc7b67672012-10-15 01:18:07 +0000287 if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST)
288 vcpu->arch.busy_preempt = mftb();
Paul Mackerrasbf3d32e2013-11-16 17:46:04 +1100289 spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags);
Paul Mackerrasde56a942011-06-29 00:21:34 +0000290}
291
Aneesh Kumar K.V3a167bea2013-10-07 22:17:53 +0530292static void kvmppc_set_msr_hv(struct kvm_vcpu *vcpu, u64 msr)
Paul Mackerrasde56a942011-06-29 00:21:34 +0000293{
Paul Mackerrasc20875a2015-11-12 16:43:02 +1100294 /*
295 * Check for illegal transactional state bit combination
296 * and if we find it, force the TS field to a safe state.
297 */
298 if ((msr & MSR_TS_MASK) == MSR_TS_MASK)
299 msr &= ~MSR_TS_MASK;
Paul Mackerrasde56a942011-06-29 00:21:34 +0000300 vcpu->arch.shregs.msr = msr;
Paul Mackerras19ccb762011-07-23 17:42:46 +1000301 kvmppc_end_cede(vcpu);
Paul Mackerrasde56a942011-06-29 00:21:34 +0000302}
303
Thomas Huth5358a962015-05-22 09:25:02 +0200304static void kvmppc_set_pvr_hv(struct kvm_vcpu *vcpu, u32 pvr)
Paul Mackerrasde56a942011-06-29 00:21:34 +0000305{
306 vcpu->arch.pvr = pvr;
307}
308
Suraj Jitindar Singh2ee13be32016-11-14 11:35:08 +1100309/* Dummy value used in computing PCR value below */
310#define PCR_ARCH_300 (PCR_ARCH_207 << 1)
311
Thomas Huth5358a962015-05-22 09:25:02 +0200312static int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat)
Paul Mackerras388cc6e2013-09-21 14:35:02 +1000313{
Suraj Jitindar Singh2ee13be32016-11-14 11:35:08 +1100314 unsigned long host_pcr_bit = 0, guest_pcr_bit = 0;
Paul Mackerras388cc6e2013-09-21 14:35:02 +1000315 struct kvmppc_vcore *vc = vcpu->arch.vcore;
316
Suraj Jitindar Singh2ee13be32016-11-14 11:35:08 +1100317 /* We can (emulate) our own architecture version and anything older */
318 if (cpu_has_feature(CPU_FTR_ARCH_300))
319 host_pcr_bit = PCR_ARCH_300;
320 else if (cpu_has_feature(CPU_FTR_ARCH_207S))
321 host_pcr_bit = PCR_ARCH_207;
322 else if (cpu_has_feature(CPU_FTR_ARCH_206))
323 host_pcr_bit = PCR_ARCH_206;
324 else
325 host_pcr_bit = PCR_ARCH_205;
326
327 /* Determine lowest PCR bit needed to run guest in given PVR level */
328 guest_pcr_bit = host_pcr_bit;
Paul Mackerras388cc6e2013-09-21 14:35:02 +1000329 if (arch_compat) {
Paul Mackerras388cc6e2013-09-21 14:35:02 +1000330 switch (arch_compat) {
331 case PVR_ARCH_205:
Suraj Jitindar Singh2ee13be32016-11-14 11:35:08 +1100332 guest_pcr_bit = PCR_ARCH_205;
Paul Mackerras388cc6e2013-09-21 14:35:02 +1000333 break;
334 case PVR_ARCH_206:
335 case PVR_ARCH_206p:
Suraj Jitindar Singh2ee13be32016-11-14 11:35:08 +1100336 guest_pcr_bit = PCR_ARCH_206;
Paul Mackerras5557ae02014-01-08 21:25:24 +1100337 break;
338 case PVR_ARCH_207:
Suraj Jitindar Singh2ee13be32016-11-14 11:35:08 +1100339 guest_pcr_bit = PCR_ARCH_207;
340 break;
341 case PVR_ARCH_300:
342 guest_pcr_bit = PCR_ARCH_300;
Paul Mackerras388cc6e2013-09-21 14:35:02 +1000343 break;
344 default:
345 return -EINVAL;
346 }
347 }
348
Suraj Jitindar Singh2ee13be32016-11-14 11:35:08 +1100349 /* Check requested PCR bits don't exceed our capabilities */
350 if (guest_pcr_bit > host_pcr_bit)
351 return -EINVAL;
352
Paul Mackerras388cc6e2013-09-21 14:35:02 +1000353 spin_lock(&vc->lock);
354 vc->arch_compat = arch_compat;
Suraj Jitindar Singh2ee13be32016-11-14 11:35:08 +1100355 /* Set all PCR bits for which guest_pcr_bit <= bit < host_pcr_bit */
356 vc->pcr = host_pcr_bit - guest_pcr_bit;
Paul Mackerras388cc6e2013-09-21 14:35:02 +1000357 spin_unlock(&vc->lock);
358
359 return 0;
360}
361
Thomas Huth5358a962015-05-22 09:25:02 +0200362static void kvmppc_dump_regs(struct kvm_vcpu *vcpu)
Paul Mackerrasde56a942011-06-29 00:21:34 +0000363{
364 int r;
365
366 pr_err("vcpu %p (%d):\n", vcpu, vcpu->vcpu_id);
367 pr_err("pc = %.16lx msr = %.16llx trap = %x\n",
368 vcpu->arch.pc, vcpu->arch.shregs.msr, vcpu->arch.trap);
369 for (r = 0; r < 16; ++r)
370 pr_err("r%2d = %.16lx r%d = %.16lx\n",
371 r, kvmppc_get_gpr(vcpu, r),
372 r+16, kvmppc_get_gpr(vcpu, r+16));
373 pr_err("ctr = %.16lx lr = %.16lx\n",
374 vcpu->arch.ctr, vcpu->arch.lr);
375 pr_err("srr0 = %.16llx srr1 = %.16llx\n",
376 vcpu->arch.shregs.srr0, vcpu->arch.shregs.srr1);
377 pr_err("sprg0 = %.16llx sprg1 = %.16llx\n",
378 vcpu->arch.shregs.sprg0, vcpu->arch.shregs.sprg1);
379 pr_err("sprg2 = %.16llx sprg3 = %.16llx\n",
380 vcpu->arch.shregs.sprg2, vcpu->arch.shregs.sprg3);
381 pr_err("cr = %.8x xer = %.16lx dsisr = %.8x\n",
382 vcpu->arch.cr, vcpu->arch.xer, vcpu->arch.shregs.dsisr);
383 pr_err("dar = %.16llx\n", vcpu->arch.shregs.dar);
384 pr_err("fault dar = %.16lx dsisr = %.8x\n",
385 vcpu->arch.fault_dar, vcpu->arch.fault_dsisr);
386 pr_err("SLB (%d entries):\n", vcpu->arch.slb_max);
387 for (r = 0; r < vcpu->arch.slb_max; ++r)
388 pr_err(" ESID = %.16llx VSID = %.16llx\n",
389 vcpu->arch.slb[r].orige, vcpu->arch.slb[r].origv);
390 pr_err("lpcr = %.16lx sdr1 = %.16lx last_inst = %.8x\n",
Paul Mackerrasa0144e22013-09-20 14:52:38 +1000391 vcpu->arch.vcore->lpcr, vcpu->kvm->arch.sdr1,
Paul Mackerrasde56a942011-06-29 00:21:34 +0000392 vcpu->arch.last_inst);
393}
394
Thomas Huth5358a962015-05-22 09:25:02 +0200395static struct kvm_vcpu *kvmppc_find_vcpu(struct kvm *kvm, int id)
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000396{
David Hildenbrande09fefd2015-11-05 09:03:50 +0100397 struct kvm_vcpu *ret;
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000398
399 mutex_lock(&kvm->lock);
David Hildenbrande09fefd2015-11-05 09:03:50 +0100400 ret = kvm_get_vcpu_by_id(kvm, id);
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000401 mutex_unlock(&kvm->lock);
402 return ret;
403}
404
405static void init_vpa(struct kvm_vcpu *vcpu, struct lppaca *vpa)
406{
Anton Blanchardf13c13a2013-08-07 02:01:26 +1000407 vpa->__old_status |= LPPACA_OLD_SHARED_PROC;
Alexander Graf02407552014-06-11 10:34:19 +0200408 vpa->yield_count = cpu_to_be32(1);
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000409}
410
Paul Mackerras55b665b2012-09-25 20:33:06 +0000411static int set_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *v,
412 unsigned long addr, unsigned long len)
413{
414 /* check address is cacheline aligned */
415 if (addr & (L1_CACHE_BYTES - 1))
416 return -EINVAL;
417 spin_lock(&vcpu->arch.vpa_update_lock);
418 if (v->next_gpa != addr || v->len != len) {
419 v->next_gpa = addr;
420 v->len = addr ? len : 0;
421 v->update_pending = 1;
422 }
423 spin_unlock(&vcpu->arch.vpa_update_lock);
424 return 0;
425}
426
Paul Mackerras2e25aa52012-02-19 17:46:32 +0000427/* Length for a per-processor buffer is passed in at offset 4 in the buffer */
428struct reg_vpa {
429 u32 dummy;
430 union {
Alexander Graf02407552014-06-11 10:34:19 +0200431 __be16 hword;
432 __be32 word;
Paul Mackerras2e25aa52012-02-19 17:46:32 +0000433 } length;
434};
435
436static int vpa_is_registered(struct kvmppc_vpa *vpap)
437{
438 if (vpap->update_pending)
439 return vpap->next_gpa != 0;
440 return vpap->pinned_addr != NULL;
441}
442
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000443static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu,
444 unsigned long flags,
445 unsigned long vcpuid, unsigned long vpa)
446{
447 struct kvm *kvm = vcpu->kvm;
Paul Mackerras93e60242011-12-12 12:28:55 +0000448 unsigned long len, nb;
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000449 void *va;
450 struct kvm_vcpu *tvcpu;
Paul Mackerras2e25aa52012-02-19 17:46:32 +0000451 int err;
452 int subfunc;
453 struct kvmppc_vpa *vpap;
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000454
455 tvcpu = kvmppc_find_vcpu(kvm, vcpuid);
456 if (!tvcpu)
457 return H_PARAMETER;
458
Paul Mackerras2e25aa52012-02-19 17:46:32 +0000459 subfunc = (flags >> H_VPA_FUNC_SHIFT) & H_VPA_FUNC_MASK;
460 if (subfunc == H_VPA_REG_VPA || subfunc == H_VPA_REG_DTL ||
461 subfunc == H_VPA_REG_SLB) {
462 /* Registering new area - address must be cache-line aligned */
463 if ((vpa & (L1_CACHE_BYTES - 1)) || !vpa)
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000464 return H_PARAMETER;
Paul Mackerras2e25aa52012-02-19 17:46:32 +0000465
466 /* convert logical addr to kernel addr and read length */
Paul Mackerras93e60242011-12-12 12:28:55 +0000467 va = kvmppc_pin_guest_page(kvm, vpa, &nb);
468 if (va == NULL)
Paul Mackerrasb2b2f162011-12-12 12:28:21 +0000469 return H_PARAMETER;
Paul Mackerras2e25aa52012-02-19 17:46:32 +0000470 if (subfunc == H_VPA_REG_VPA)
Alexander Graf02407552014-06-11 10:34:19 +0200471 len = be16_to_cpu(((struct reg_vpa *)va)->length.hword);
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000472 else
Alexander Graf02407552014-06-11 10:34:19 +0200473 len = be32_to_cpu(((struct reg_vpa *)va)->length.word);
Paul Mackerrasc35635e2013-04-18 19:51:04 +0000474 kvmppc_unpin_guest_page(kvm, va, vpa, false);
Paul Mackerras2e25aa52012-02-19 17:46:32 +0000475
476 /* Check length */
477 if (len > nb || len < sizeof(struct reg_vpa))
478 return H_PARAMETER;
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000479 } else {
Paul Mackerras2e25aa52012-02-19 17:46:32 +0000480 vpa = 0;
481 len = 0;
482 }
483
484 err = H_PARAMETER;
485 vpap = NULL;
486 spin_lock(&tvcpu->arch.vpa_update_lock);
487
488 switch (subfunc) {
489 case H_VPA_REG_VPA: /* register VPA */
Nicholas Piggineaac112e2017-08-13 11:33:38 +1000490 /*
491 * The size of our lppaca is 1kB because of the way we align
492 * it for the guest to avoid crossing a 4kB boundary. We only
493 * use 640 bytes of the structure though, so we should accept
494 * clients that set a size of 640.
495 */
496 if (len < 640)
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000497 break;
Paul Mackerras2e25aa52012-02-19 17:46:32 +0000498 vpap = &tvcpu->arch.vpa;
499 err = 0;
500 break;
501
502 case H_VPA_REG_DTL: /* register DTL */
503 if (len < sizeof(struct dtl_entry))
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000504 break;
Paul Mackerras2e25aa52012-02-19 17:46:32 +0000505 len -= len % sizeof(struct dtl_entry);
506
507 /* Check that they have previously registered a VPA */
508 err = H_RESOURCE;
509 if (!vpa_is_registered(&tvcpu->arch.vpa))
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000510 break;
Paul Mackerras2e25aa52012-02-19 17:46:32 +0000511
512 vpap = &tvcpu->arch.dtl;
513 err = 0;
514 break;
515
516 case H_VPA_REG_SLB: /* register SLB shadow buffer */
517 /* Check that they have previously registered a VPA */
518 err = H_RESOURCE;
519 if (!vpa_is_registered(&tvcpu->arch.vpa))
520 break;
521
522 vpap = &tvcpu->arch.slb_shadow;
523 err = 0;
524 break;
525
526 case H_VPA_DEREG_VPA: /* deregister VPA */
527 /* Check they don't still have a DTL or SLB buf registered */
528 err = H_RESOURCE;
529 if (vpa_is_registered(&tvcpu->arch.dtl) ||
530 vpa_is_registered(&tvcpu->arch.slb_shadow))
531 break;
532
533 vpap = &tvcpu->arch.vpa;
534 err = 0;
535 break;
536
537 case H_VPA_DEREG_DTL: /* deregister DTL */
538 vpap = &tvcpu->arch.dtl;
539 err = 0;
540 break;
541
542 case H_VPA_DEREG_SLB: /* deregister SLB shadow buffer */
543 vpap = &tvcpu->arch.slb_shadow;
544 err = 0;
545 break;
546 }
547
548 if (vpap) {
549 vpap->next_gpa = vpa;
550 vpap->len = len;
551 vpap->update_pending = 1;
552 }
553
554 spin_unlock(&tvcpu->arch.vpa_update_lock);
555
556 return err;
557}
558
Paul Mackerras081f3232012-06-01 20:20:24 +1000559static void kvmppc_update_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *vpap)
Paul Mackerras2e25aa52012-02-19 17:46:32 +0000560{
Paul Mackerras081f3232012-06-01 20:20:24 +1000561 struct kvm *kvm = vcpu->kvm;
Paul Mackerras2e25aa52012-02-19 17:46:32 +0000562 void *va;
563 unsigned long nb;
Paul Mackerras081f3232012-06-01 20:20:24 +1000564 unsigned long gpa;
565
566 /*
567 * We need to pin the page pointed to by vpap->next_gpa,
568 * but we can't call kvmppc_pin_guest_page under the lock
569 * as it does get_user_pages() and down_read(). So we
570 * have to drop the lock, pin the page, then get the lock
571 * again and check that a new area didn't get registered
572 * in the meantime.
573 */
574 for (;;) {
575 gpa = vpap->next_gpa;
576 spin_unlock(&vcpu->arch.vpa_update_lock);
577 va = NULL;
578 nb = 0;
579 if (gpa)
Paul Mackerrasc35635e2013-04-18 19:51:04 +0000580 va = kvmppc_pin_guest_page(kvm, gpa, &nb);
Paul Mackerras081f3232012-06-01 20:20:24 +1000581 spin_lock(&vcpu->arch.vpa_update_lock);
582 if (gpa == vpap->next_gpa)
583 break;
584 /* sigh... unpin that one and try again */
585 if (va)
Paul Mackerrasc35635e2013-04-18 19:51:04 +0000586 kvmppc_unpin_guest_page(kvm, va, gpa, false);
Paul Mackerras081f3232012-06-01 20:20:24 +1000587 }
Paul Mackerras2e25aa52012-02-19 17:46:32 +0000588
589 vpap->update_pending = 0;
Paul Mackerras081f3232012-06-01 20:20:24 +1000590 if (va && nb < vpap->len) {
591 /*
592 * If it's now too short, it must be that userspace
593 * has changed the mappings underlying guest memory,
594 * so unregister the region.
595 */
Paul Mackerrasc35635e2013-04-18 19:51:04 +0000596 kvmppc_unpin_guest_page(kvm, va, gpa, false);
Paul Mackerras081f3232012-06-01 20:20:24 +1000597 va = NULL;
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000598 }
Paul Mackerras2e25aa52012-02-19 17:46:32 +0000599 if (vpap->pinned_addr)
Paul Mackerrasc35635e2013-04-18 19:51:04 +0000600 kvmppc_unpin_guest_page(kvm, vpap->pinned_addr, vpap->gpa,
601 vpap->dirty);
602 vpap->gpa = gpa;
Paul Mackerras2e25aa52012-02-19 17:46:32 +0000603 vpap->pinned_addr = va;
Paul Mackerrasc35635e2013-04-18 19:51:04 +0000604 vpap->dirty = false;
Paul Mackerras2e25aa52012-02-19 17:46:32 +0000605 if (va)
606 vpap->pinned_end = va + vpap->len;
607}
Paul Mackerras93e60242011-12-12 12:28:55 +0000608
Paul Mackerras2e25aa52012-02-19 17:46:32 +0000609static void kvmppc_update_vpas(struct kvm_vcpu *vcpu)
610{
Paul Mackerras2f12f032012-10-15 01:17:17 +0000611 if (!(vcpu->arch.vpa.update_pending ||
612 vcpu->arch.slb_shadow.update_pending ||
613 vcpu->arch.dtl.update_pending))
614 return;
615
Paul Mackerras2e25aa52012-02-19 17:46:32 +0000616 spin_lock(&vcpu->arch.vpa_update_lock);
617 if (vcpu->arch.vpa.update_pending) {
Paul Mackerras081f3232012-06-01 20:20:24 +1000618 kvmppc_update_vpa(vcpu, &vcpu->arch.vpa);
Paul Mackerras55b665b2012-09-25 20:33:06 +0000619 if (vcpu->arch.vpa.pinned_addr)
620 init_vpa(vcpu, vcpu->arch.vpa.pinned_addr);
Paul Mackerras2e25aa52012-02-19 17:46:32 +0000621 }
622 if (vcpu->arch.dtl.update_pending) {
Paul Mackerras081f3232012-06-01 20:20:24 +1000623 kvmppc_update_vpa(vcpu, &vcpu->arch.dtl);
Paul Mackerras2e25aa52012-02-19 17:46:32 +0000624 vcpu->arch.dtl_ptr = vcpu->arch.dtl.pinned_addr;
625 vcpu->arch.dtl_index = 0;
626 }
627 if (vcpu->arch.slb_shadow.update_pending)
Paul Mackerras081f3232012-06-01 20:20:24 +1000628 kvmppc_update_vpa(vcpu, &vcpu->arch.slb_shadow);
Paul Mackerras2e25aa52012-02-19 17:46:32 +0000629 spin_unlock(&vcpu->arch.vpa_update_lock);
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000630}
631
Paul Mackerrasc7b67672012-10-15 01:18:07 +0000632/*
633 * Return the accumulated stolen time for the vcore up until `now'.
634 * The caller should hold the vcore lock.
635 */
636static u64 vcore_stolen_time(struct kvmppc_vcore *vc, u64 now)
637{
638 u64 p;
Paul Mackerras2711e242014-12-04 16:43:28 +1100639 unsigned long flags;
Paul Mackerrasc7b67672012-10-15 01:18:07 +0000640
Paul Mackerras2711e242014-12-04 16:43:28 +1100641 spin_lock_irqsave(&vc->stoltb_lock, flags);
642 p = vc->stolen_tb;
Paul Mackerrasc7b67672012-10-15 01:18:07 +0000643 if (vc->vcore_state != VCORE_INACTIVE &&
Paul Mackerras2711e242014-12-04 16:43:28 +1100644 vc->preempt_tb != TB_NIL)
645 p += now - vc->preempt_tb;
646 spin_unlock_irqrestore(&vc->stoltb_lock, flags);
Paul Mackerrasc7b67672012-10-15 01:18:07 +0000647 return p;
648}
649
Paul Mackerras0456ec42012-02-03 00:56:21 +0000650static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
651 struct kvmppc_vcore *vc)
652{
653 struct dtl_entry *dt;
654 struct lppaca *vpa;
Paul Mackerrasc7b67672012-10-15 01:18:07 +0000655 unsigned long stolen;
656 unsigned long core_stolen;
657 u64 now;
Paul Mackerras8b24e692017-06-26 15:45:51 +1000658 unsigned long flags;
Paul Mackerras0456ec42012-02-03 00:56:21 +0000659
660 dt = vcpu->arch.dtl_ptr;
661 vpa = vcpu->arch.vpa.pinned_addr;
Paul Mackerrasc7b67672012-10-15 01:18:07 +0000662 now = mftb();
663 core_stolen = vcore_stolen_time(vc, now);
664 stolen = core_stolen - vcpu->arch.stolen_logged;
665 vcpu->arch.stolen_logged = core_stolen;
Paul Mackerras8b24e692017-06-26 15:45:51 +1000666 spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
Paul Mackerrasc7b67672012-10-15 01:18:07 +0000667 stolen += vcpu->arch.busy_stolen;
668 vcpu->arch.busy_stolen = 0;
Paul Mackerras8b24e692017-06-26 15:45:51 +1000669 spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags);
Paul Mackerras0456ec42012-02-03 00:56:21 +0000670 if (!dt || !vpa)
671 return;
672 memset(dt, 0, sizeof(struct dtl_entry));
673 dt->dispatch_reason = 7;
Alexander Graf02407552014-06-11 10:34:19 +0200674 dt->processor_id = cpu_to_be16(vc->pcpu + vcpu->arch.ptid);
675 dt->timebase = cpu_to_be64(now + vc->tb_offset);
676 dt->enqueue_to_dispatch_time = cpu_to_be32(stolen);
677 dt->srr0 = cpu_to_be64(kvmppc_get_pc(vcpu));
678 dt->srr1 = cpu_to_be64(vcpu->arch.shregs.msr);
Paul Mackerras0456ec42012-02-03 00:56:21 +0000679 ++dt;
680 if (dt == vcpu->arch.dtl.pinned_end)
681 dt = vcpu->arch.dtl.pinned_addr;
682 vcpu->arch.dtl_ptr = dt;
683 /* order writing *dt vs. writing vpa->dtl_idx */
684 smp_wmb();
Alexander Graf02407552014-06-11 10:34:19 +0200685 vpa->dtl_idx = cpu_to_be64(++vcpu->arch.dtl_index);
Paul Mackerrasc35635e2013-04-18 19:51:04 +0000686 vcpu->arch.dtl.dirty = true;
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000687}
688
Paul Mackerras1da4e2f2017-05-19 16:26:16 +1000689/* See if there is a doorbell interrupt pending for a vcpu */
690static bool kvmppc_doorbell_pending(struct kvm_vcpu *vcpu)
691{
692 int thr;
693 struct kvmppc_vcore *vc;
694
Paul Mackerras57900692017-05-16 16:41:20 +1000695 if (vcpu->arch.doorbell_request)
696 return true;
697 /*
698 * Ensure that the read of vcore->dpdes comes after the read
699 * of vcpu->doorbell_request. This barrier matches the
700 * lwsync in book3s_hv_rmhandlers.S just before the
701 * fast_guest_return label.
702 */
703 smp_rmb();
Paul Mackerras1da4e2f2017-05-19 16:26:16 +1000704 vc = vcpu->arch.vcore;
705 thr = vcpu->vcpu_id - vc->first_vcpuid;
706 return !!(vc->dpdes & (1 << thr));
707}
708
Michael Neuling96423822014-06-02 11:03:01 +1000709static bool kvmppc_power8_compatible(struct kvm_vcpu *vcpu)
710{
711 if (vcpu->arch.vcore->arch_compat >= PVR_ARCH_207)
712 return true;
713 if ((!vcpu->arch.vcore->arch_compat) &&
714 cpu_has_feature(CPU_FTR_ARCH_207S))
715 return true;
716 return false;
717}
718
719static int kvmppc_h_set_mode(struct kvm_vcpu *vcpu, unsigned long mflags,
720 unsigned long resource, unsigned long value1,
721 unsigned long value2)
722{
723 switch (resource) {
724 case H_SET_MODE_RESOURCE_SET_CIABR:
725 if (!kvmppc_power8_compatible(vcpu))
726 return H_P2;
727 if (value2)
728 return H_P4;
729 if (mflags)
730 return H_UNSUPPORTED_FLAG_START;
731 /* Guests can't breakpoint the hypervisor */
732 if ((value1 & CIABR_PRIV) == CIABR_PRIV_HYPER)
733 return H_P3;
734 vcpu->arch.ciabr = value1;
735 return H_SUCCESS;
736 case H_SET_MODE_RESOURCE_SET_DAWR:
737 if (!kvmppc_power8_compatible(vcpu))
738 return H_P2;
739 if (mflags)
740 return H_UNSUPPORTED_FLAG_START;
741 if (value2 & DABRX_HYP)
742 return H_P4;
743 vcpu->arch.dawr = value1;
744 vcpu->arch.dawrx = value2;
745 return H_SUCCESS;
746 default:
747 return H_TOO_HARD;
748 }
749}
750
Sam Bobroff90fd09f2014-12-03 13:30:40 +1100751static int kvm_arch_vcpu_yield_to(struct kvm_vcpu *target)
752{
753 struct kvmppc_vcore *vcore = target->arch.vcore;
754
755 /*
756 * We expect to have been called by the real mode handler
757 * (kvmppc_rm_h_confer()) which would have directly returned
758 * H_SUCCESS if the source vcore wasn't idle (e.g. if it may
759 * have useful work to do and should not confer) so we don't
760 * recheck that here.
761 */
762
763 spin_lock(&vcore->lock);
764 if (target->arch.state == KVMPPC_VCPU_RUNNABLE &&
Paul Mackerrasec257162015-06-24 21:18:03 +1000765 vcore->vcore_state != VCORE_INACTIVE &&
766 vcore->runner)
Sam Bobroff90fd09f2014-12-03 13:30:40 +1100767 target = vcore->runner;
768 spin_unlock(&vcore->lock);
769
770 return kvm_vcpu_yield_to(target);
771}
772
773static int kvmppc_get_yield_count(struct kvm_vcpu *vcpu)
774{
775 int yield_count = 0;
776 struct lppaca *lppaca;
777
778 spin_lock(&vcpu->arch.vpa_update_lock);
779 lppaca = (struct lppaca *)vcpu->arch.vpa.pinned_addr;
780 if (lppaca)
Paul Mackerrasecb6d612015-03-20 20:39:39 +1100781 yield_count = be32_to_cpu(lppaca->yield_count);
Sam Bobroff90fd09f2014-12-03 13:30:40 +1100782 spin_unlock(&vcpu->arch.vpa_update_lock);
783 return yield_count;
784}
785
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000786int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
787{
788 unsigned long req = kvmppc_get_gpr(vcpu, 3);
789 unsigned long target, ret = H_SUCCESS;
Sam Bobroff90fd09f2014-12-03 13:30:40 +1100790 int yield_count;
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000791 struct kvm_vcpu *tvcpu;
Michael Ellerman8e591cb2013-04-17 20:30:00 +0000792 int idx, rc;
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000793
Paul Mackerras699a0ea2014-06-02 11:02:59 +1000794 if (req <= MAX_HCALL_OPCODE &&
795 !test_bit(req/4, vcpu->kvm->arch.enabled_hcalls))
796 return RESUME_HOST;
797
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000798 switch (req) {
799 case H_CEDE:
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000800 break;
801 case H_PROD:
802 target = kvmppc_get_gpr(vcpu, 4);
803 tvcpu = kvmppc_find_vcpu(vcpu->kvm, target);
804 if (!tvcpu) {
805 ret = H_PARAMETER;
806 break;
807 }
808 tvcpu->arch.prodded = 1;
809 smp_mb();
Paul Mackerras8464c882016-12-06 20:42:05 +1100810 if (tvcpu->arch.ceded)
811 kvmppc_fast_vcpu_kick_hv(tvcpu);
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000812 break;
813 case H_CONFER:
Paul Mackerras42d76042013-09-06 13:23:21 +1000814 target = kvmppc_get_gpr(vcpu, 4);
815 if (target == -1)
816 break;
817 tvcpu = kvmppc_find_vcpu(vcpu->kvm, target);
818 if (!tvcpu) {
819 ret = H_PARAMETER;
820 break;
821 }
Sam Bobroff90fd09f2014-12-03 13:30:40 +1100822 yield_count = kvmppc_get_gpr(vcpu, 5);
823 if (kvmppc_get_yield_count(tvcpu) != yield_count)
824 break;
825 kvm_arch_vcpu_yield_to(tvcpu);
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000826 break;
827 case H_REGISTER_VPA:
828 ret = do_h_register_vpa(vcpu, kvmppc_get_gpr(vcpu, 4),
829 kvmppc_get_gpr(vcpu, 5),
830 kvmppc_get_gpr(vcpu, 6));
831 break;
Michael Ellerman8e591cb2013-04-17 20:30:00 +0000832 case H_RTAS:
833 if (list_empty(&vcpu->kvm->arch.rtas_tokens))
834 return RESUME_HOST;
835
Paul Mackerrasc9438092013-11-16 17:46:05 +1100836 idx = srcu_read_lock(&vcpu->kvm->srcu);
Michael Ellerman8e591cb2013-04-17 20:30:00 +0000837 rc = kvmppc_rtas_hcall(vcpu);
Paul Mackerrasc9438092013-11-16 17:46:05 +1100838 srcu_read_unlock(&vcpu->kvm->srcu, idx);
Michael Ellerman8e591cb2013-04-17 20:30:00 +0000839
840 if (rc == -ENOENT)
841 return RESUME_HOST;
842 else if (rc == 0)
843 break;
844
845 /* Send the error out to userspace via KVM_RUN */
846 return rc;
David Gibson99342cf82015-02-05 11:53:25 +1100847 case H_LOGICAL_CI_LOAD:
848 ret = kvmppc_h_logical_ci_load(vcpu);
849 if (ret == H_TOO_HARD)
850 return RESUME_HOST;
851 break;
852 case H_LOGICAL_CI_STORE:
853 ret = kvmppc_h_logical_ci_store(vcpu);
854 if (ret == H_TOO_HARD)
855 return RESUME_HOST;
856 break;
Michael Neuling96423822014-06-02 11:03:01 +1000857 case H_SET_MODE:
858 ret = kvmppc_h_set_mode(vcpu, kvmppc_get_gpr(vcpu, 4),
859 kvmppc_get_gpr(vcpu, 5),
860 kvmppc_get_gpr(vcpu, 6),
861 kvmppc_get_gpr(vcpu, 7));
862 if (ret == H_TOO_HARD)
863 return RESUME_HOST;
864 break;
Benjamin Herrenschmidtbc5ad3f2013-04-17 20:30:26 +0000865 case H_XIRR:
866 case H_CPPR:
867 case H_EOI:
868 case H_IPI:
Paul Mackerras8e44ddc2013-05-23 15:42:21 +0000869 case H_IPOLL:
870 case H_XIRR_X:
Benjamin Herrenschmidtbc5ad3f2013-04-17 20:30:26 +0000871 if (kvmppc_xics_enabled(vcpu)) {
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +1000872 if (xive_enabled()) {
873 ret = H_NOT_AVAILABLE;
874 return RESUME_GUEST;
875 }
Benjamin Herrenschmidtbc5ad3f2013-04-17 20:30:26 +0000876 ret = kvmppc_xics_hcall(vcpu, req);
877 break;
Alexey Kardashevskiyd3695aa2016-02-15 12:55:09 +1100878 }
879 return RESUME_HOST;
880 case H_PUT_TCE:
881 ret = kvmppc_h_put_tce(vcpu, kvmppc_get_gpr(vcpu, 4),
882 kvmppc_get_gpr(vcpu, 5),
883 kvmppc_get_gpr(vcpu, 6));
884 if (ret == H_TOO_HARD)
885 return RESUME_HOST;
886 break;
887 case H_PUT_TCE_INDIRECT:
888 ret = kvmppc_h_put_tce_indirect(vcpu, kvmppc_get_gpr(vcpu, 4),
889 kvmppc_get_gpr(vcpu, 5),
890 kvmppc_get_gpr(vcpu, 6),
891 kvmppc_get_gpr(vcpu, 7));
892 if (ret == H_TOO_HARD)
893 return RESUME_HOST;
894 break;
895 case H_STUFF_TCE:
896 ret = kvmppc_h_stuff_tce(vcpu, kvmppc_get_gpr(vcpu, 4),
897 kvmppc_get_gpr(vcpu, 5),
898 kvmppc_get_gpr(vcpu, 6),
899 kvmppc_get_gpr(vcpu, 7));
900 if (ret == H_TOO_HARD)
901 return RESUME_HOST;
902 break;
Paul Mackerrasa8606e22011-06-29 00:22:05 +0000903 default:
904 return RESUME_HOST;
905 }
906 kvmppc_set_gpr(vcpu, 3, ret);
907 vcpu->arch.hcall_needed = 0;
908 return RESUME_GUEST;
909}
910
Paul Mackerrasae2113a2014-06-02 11:03:00 +1000911static int kvmppc_hcall_impl_hv(unsigned long cmd)
912{
913 switch (cmd) {
914 case H_CEDE:
915 case H_PROD:
916 case H_CONFER:
917 case H_REGISTER_VPA:
Michael Neuling96423822014-06-02 11:03:01 +1000918 case H_SET_MODE:
David Gibson99342cf82015-02-05 11:53:25 +1100919 case H_LOGICAL_CI_LOAD:
920 case H_LOGICAL_CI_STORE:
Paul Mackerrasae2113a2014-06-02 11:03:00 +1000921#ifdef CONFIG_KVM_XICS
922 case H_XIRR:
923 case H_CPPR:
924 case H_EOI:
925 case H_IPI:
926 case H_IPOLL:
927 case H_XIRR_X:
928#endif
929 return 1;
930 }
931
932 /* See if it's in the real-mode table */
933 return kvmppc_hcall_impl_hv_realmode(cmd);
934}
935
Madhavan Srinivasana59c1d92014-09-09 22:37:35 +0530936static int kvmppc_emulate_debug_inst(struct kvm_run *run,
937 struct kvm_vcpu *vcpu)
938{
939 u32 last_inst;
940
941 if (kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst) !=
942 EMULATE_DONE) {
943 /*
944 * Fetch failed, so return to guest and
945 * try executing it again.
946 */
947 return RESUME_GUEST;
948 }
949
950 if (last_inst == KVMPPC_INST_SW_BREAKPOINT) {
951 run->exit_reason = KVM_EXIT_DEBUG;
952 run->debug.arch.address = kvmppc_get_pc(vcpu);
953 return RESUME_HOST;
954 } else {
955 kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
956 return RESUME_GUEST;
957 }
958}
959
Paul Mackerras57900692017-05-16 16:41:20 +1000960static void do_nothing(void *x)
961{
962}
963
964static unsigned long kvmppc_read_dpdes(struct kvm_vcpu *vcpu)
965{
966 int thr, cpu, pcpu, nthreads;
967 struct kvm_vcpu *v;
968 unsigned long dpdes;
969
970 nthreads = vcpu->kvm->arch.emul_smt_mode;
971 dpdes = 0;
972 cpu = vcpu->vcpu_id & ~(nthreads - 1);
973 for (thr = 0; thr < nthreads; ++thr, ++cpu) {
974 v = kvmppc_find_vcpu(vcpu->kvm, cpu);
975 if (!v)
976 continue;
977 /*
978 * If the vcpu is currently running on a physical cpu thread,
979 * interrupt it in order to pull it out of the guest briefly,
980 * which will update its vcore->dpdes value.
981 */
982 pcpu = READ_ONCE(v->cpu);
983 if (pcpu >= 0)
984 smp_call_function_single(pcpu, do_nothing, NULL, 1);
985 if (kvmppc_doorbell_pending(v))
986 dpdes |= 1 << thr;
987 }
988 return dpdes;
989}
990
991/*
992 * On POWER9, emulate doorbell-related instructions in order to
993 * give the guest the illusion of running on a multi-threaded core.
994 * The instructions emulated are msgsndp, msgclrp, mfspr TIR,
995 * and mfspr DPDES.
996 */
997static int kvmppc_emulate_doorbell_instr(struct kvm_vcpu *vcpu)
998{
999 u32 inst, rb, thr;
1000 unsigned long arg;
1001 struct kvm *kvm = vcpu->kvm;
1002 struct kvm_vcpu *tvcpu;
1003
1004 if (!cpu_has_feature(CPU_FTR_ARCH_300))
1005 return EMULATE_FAIL;
1006 if (kvmppc_get_last_inst(vcpu, INST_GENERIC, &inst) != EMULATE_DONE)
1007 return RESUME_GUEST;
1008 if (get_op(inst) != 31)
1009 return EMULATE_FAIL;
1010 rb = get_rb(inst);
1011 thr = vcpu->vcpu_id & (kvm->arch.emul_smt_mode - 1);
1012 switch (get_xop(inst)) {
1013 case OP_31_XOP_MSGSNDP:
1014 arg = kvmppc_get_gpr(vcpu, rb);
1015 if (((arg >> 27) & 0xf) != PPC_DBELL_SERVER)
1016 break;
1017 arg &= 0x3f;
1018 if (arg >= kvm->arch.emul_smt_mode)
1019 break;
1020 tvcpu = kvmppc_find_vcpu(kvm, vcpu->vcpu_id - thr + arg);
1021 if (!tvcpu)
1022 break;
1023 if (!tvcpu->arch.doorbell_request) {
1024 tvcpu->arch.doorbell_request = 1;
1025 kvmppc_fast_vcpu_kick_hv(tvcpu);
1026 }
1027 break;
1028 case OP_31_XOP_MSGCLRP:
1029 arg = kvmppc_get_gpr(vcpu, rb);
1030 if (((arg >> 27) & 0xf) != PPC_DBELL_SERVER)
1031 break;
1032 vcpu->arch.vcore->dpdes = 0;
1033 vcpu->arch.doorbell_request = 0;
1034 break;
1035 case OP_31_XOP_MFSPR:
1036 switch (get_sprn(inst)) {
1037 case SPRN_TIR:
1038 arg = thr;
1039 break;
1040 case SPRN_DPDES:
1041 arg = kvmppc_read_dpdes(vcpu);
1042 break;
1043 default:
1044 return EMULATE_FAIL;
1045 }
1046 kvmppc_set_gpr(vcpu, get_rt(inst), arg);
1047 break;
1048 default:
1049 return EMULATE_FAIL;
1050 }
1051 kvmppc_set_pc(vcpu, kvmppc_get_pc(vcpu) + 4);
1052 return RESUME_GUEST;
1053}
1054
Aneesh Kumar K.V3a167bea2013-10-07 22:17:53 +05301055static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
1056 struct task_struct *tsk)
Paul Mackerrasde56a942011-06-29 00:21:34 +00001057{
1058 int r = RESUME_HOST;
1059
1060 vcpu->stat.sum_exits++;
1061
Paul Mackerras1c9e3d52015-11-12 16:43:48 +11001062 /*
1063 * This can happen if an interrupt occurs in the last stages
1064 * of guest entry or the first stages of guest exit (i.e. after
1065 * setting paca->kvm_hstate.in_guest to KVM_GUEST_MODE_GUEST_HV
1066 * and before setting it to KVM_GUEST_MODE_HOST_HV).
1067 * That can happen due to a bug, or due to a machine check
1068 * occurring at just the wrong time.
1069 */
1070 if (vcpu->arch.shregs.msr & MSR_HV) {
1071 printk(KERN_EMERG "KVM trap in HV mode!\n");
1072 printk(KERN_EMERG "trap=0x%x | pc=0x%lx | msr=0x%llx\n",
1073 vcpu->arch.trap, kvmppc_get_pc(vcpu),
1074 vcpu->arch.shregs.msr);
1075 kvmppc_dump_regs(vcpu);
1076 run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
1077 run->hw.hardware_exit_reason = vcpu->arch.trap;
1078 return RESUME_HOST;
1079 }
Paul Mackerrasde56a942011-06-29 00:21:34 +00001080 run->exit_reason = KVM_EXIT_UNKNOWN;
1081 run->ready_for_interrupt_injection = 1;
1082 switch (vcpu->arch.trap) {
1083 /* We're good on these - the host merely wanted to get our attention */
1084 case BOOK3S_INTERRUPT_HV_DECREMENTER:
1085 vcpu->stat.dec_exits++;
1086 r = RESUME_GUEST;
1087 break;
1088 case BOOK3S_INTERRUPT_EXTERNAL:
Paul Mackerras5d00f662014-01-08 21:25:28 +11001089 case BOOK3S_INTERRUPT_H_DOORBELL:
Paul Mackerras84f71392016-11-22 14:30:14 +11001090 case BOOK3S_INTERRUPT_H_VIRT:
Paul Mackerrasde56a942011-06-29 00:21:34 +00001091 vcpu->stat.ext_intr_exits++;
1092 r = RESUME_GUEST;
1093 break;
Mahesh Salgaonkardee6f242014-11-03 15:51:57 +11001094 /* HMI is hypervisor interrupt and host has handled it. Resume guest.*/
1095 case BOOK3S_INTERRUPT_HMI:
Paul Mackerrasde56a942011-06-29 00:21:34 +00001096 case BOOK3S_INTERRUPT_PERFMON:
1097 r = RESUME_GUEST;
1098 break;
Paul Mackerrasb4072df2012-11-23 22:37:50 +00001099 case BOOK3S_INTERRUPT_MACHINE_CHECK:
Aravinda Prasade20bbd32017-05-11 16:33:37 +05301100 /* Exit to guest with KVM_EXIT_NMI as exit reason */
1101 run->exit_reason = KVM_EXIT_NMI;
1102 run->hw.hardware_exit_reason = vcpu->arch.trap;
1103 /* Clear out the old NMI status from run->flags */
1104 run->flags &= ~KVM_RUN_PPC_NMI_DISP_MASK;
1105 /* Now set the NMI status */
1106 if (vcpu->arch.mce_evt.disposition == MCE_DISPOSITION_RECOVERED)
1107 run->flags |= KVM_RUN_PPC_NMI_DISP_FULLY_RECOV;
1108 else
1109 run->flags |= KVM_RUN_PPC_NMI_DISP_NOT_RECOV;
1110
1111 r = RESUME_HOST;
1112 /* Print the MCE event to host console. */
1113 machine_check_print_event_info(&vcpu->arch.mce_evt, false);
Paul Mackerrasb4072df2012-11-23 22:37:50 +00001114 break;
Paul Mackerrasde56a942011-06-29 00:21:34 +00001115 case BOOK3S_INTERRUPT_PROGRAM:
1116 {
1117 ulong flags;
1118 /*
1119 * Normally program interrupts are delivered directly
1120 * to the guest by the hardware, but we can get here
1121 * as a result of a hypervisor emulation interrupt
1122 * (e40) getting turned into a 700 by BML RTAS.
1123 */
1124 flags = vcpu->arch.shregs.msr & 0x1f0000ull;
1125 kvmppc_core_queue_program(vcpu, flags);
1126 r = RESUME_GUEST;
1127 break;
1128 }
1129 case BOOK3S_INTERRUPT_SYSCALL:
1130 {
1131 /* hcall - punt to userspace */
1132 int i;
1133
Liu Ping Fan27025a62013-11-19 14:12:48 +08001134 /* hypercall with MSR_PR has already been handled in rmode,
1135 * and never reaches here.
1136 */
1137
Paul Mackerrasde56a942011-06-29 00:21:34 +00001138 run->papr_hcall.nr = kvmppc_get_gpr(vcpu, 3);
1139 for (i = 0; i < 9; ++i)
1140 run->papr_hcall.args[i] = kvmppc_get_gpr(vcpu, 4 + i);
1141 run->exit_reason = KVM_EXIT_PAPR_HCALL;
1142 vcpu->arch.hcall_needed = 1;
1143 r = RESUME_HOST;
1144 break;
1145 }
1146 /*
Paul Mackerras342d3db2011-12-12 12:38:05 +00001147 * We get these next two if the guest accesses a page which it thinks
1148 * it has mapped but which is not actually present, either because
1149 * it is for an emulated I/O device or because the corresonding
1150 * host page has been paged out. Any other HDSI/HISI interrupts
1151 * have been handled already.
Paul Mackerrasde56a942011-06-29 00:21:34 +00001152 */
1153 case BOOK3S_INTERRUPT_H_DATA_STORAGE:
Paul Mackerras913d3ff9a2012-10-15 01:16:48 +00001154 r = RESUME_PAGE_FAULT;
Paul Mackerrasde56a942011-06-29 00:21:34 +00001155 break;
1156 case BOOK3S_INTERRUPT_H_INST_STORAGE:
Paul Mackerras913d3ff9a2012-10-15 01:16:48 +00001157 vcpu->arch.fault_dar = kvmppc_get_pc(vcpu);
1158 vcpu->arch.fault_dsisr = 0;
1159 r = RESUME_PAGE_FAULT;
Paul Mackerrasde56a942011-06-29 00:21:34 +00001160 break;
1161 /*
1162 * This occurs if the guest executes an illegal instruction.
Madhavan Srinivasana59c1d92014-09-09 22:37:35 +05301163 * If the guest debug is disabled, generate a program interrupt
1164 * to the guest. If guest debug is enabled, we need to check
1165 * whether the instruction is a software breakpoint instruction.
1166 * Accordingly return to Guest or Host.
Paul Mackerrasde56a942011-06-29 00:21:34 +00001167 */
1168 case BOOK3S_INTERRUPT_H_EMUL_ASSIST:
Paul Mackerras4a157d62014-12-03 13:30:39 +11001169 if (vcpu->arch.emul_inst != KVM_INST_FETCH_FAILED)
1170 vcpu->arch.last_inst = kvmppc_need_byteswap(vcpu) ?
1171 swab32(vcpu->arch.emul_inst) :
1172 vcpu->arch.emul_inst;
Madhavan Srinivasana59c1d92014-09-09 22:37:35 +05301173 if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) {
1174 r = kvmppc_emulate_debug_inst(run, vcpu);
1175 } else {
1176 kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
1177 r = RESUME_GUEST;
1178 }
Michael Ellermanbd3048b2014-01-08 21:25:23 +11001179 break;
1180 /*
1181 * This occurs if the guest (kernel or userspace), does something that
Paul Mackerras57900692017-05-16 16:41:20 +10001182 * is prohibited by HFSCR.
1183 * On POWER9, this could be a doorbell instruction that we need
1184 * to emulate.
1185 * Otherwise, we just generate a program interrupt to the guest.
Michael Ellermanbd3048b2014-01-08 21:25:23 +11001186 */
1187 case BOOK3S_INTERRUPT_H_FAC_UNAVAIL:
Paul Mackerras57900692017-05-16 16:41:20 +10001188 r = EMULATE_FAIL;
1189 if ((vcpu->arch.hfscr >> 56) == FSCR_MSGP_LG)
1190 r = kvmppc_emulate_doorbell_instr(vcpu);
1191 if (r == EMULATE_FAIL) {
1192 kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
1193 r = RESUME_GUEST;
1194 }
Paul Mackerrasde56a942011-06-29 00:21:34 +00001195 break;
Suresh Warrierf7af5202016-08-19 15:35:52 +10001196 case BOOK3S_INTERRUPT_HV_RM_HARD:
1197 r = RESUME_PASSTHROUGH;
1198 break;
Paul Mackerrasde56a942011-06-29 00:21:34 +00001199 default:
1200 kvmppc_dump_regs(vcpu);
1201 printk(KERN_EMERG "trap=0x%x | pc=0x%lx | msr=0x%llx\n",
1202 vcpu->arch.trap, kvmppc_get_pc(vcpu),
1203 vcpu->arch.shregs.msr);
Paul Mackerrasf3271d42013-09-20 14:52:41 +10001204 run->hw.hardware_exit_reason = vcpu->arch.trap;
Paul Mackerrasde56a942011-06-29 00:21:34 +00001205 r = RESUME_HOST;
Paul Mackerrasde56a942011-06-29 00:21:34 +00001206 break;
1207 }
1208
Paul Mackerrasde56a942011-06-29 00:21:34 +00001209 return r;
1210}
1211
Aneesh Kumar K.V3a167bea2013-10-07 22:17:53 +05301212static int kvm_arch_vcpu_ioctl_get_sregs_hv(struct kvm_vcpu *vcpu,
1213 struct kvm_sregs *sregs)
Paul Mackerrasde56a942011-06-29 00:21:34 +00001214{
1215 int i;
1216
Paul Mackerrasde56a942011-06-29 00:21:34 +00001217 memset(sregs, 0, sizeof(struct kvm_sregs));
Aneesh Kumar K.V87916442013-08-22 17:08:39 +05301218 sregs->pvr = vcpu->arch.pvr;
Paul Mackerrasde56a942011-06-29 00:21:34 +00001219 for (i = 0; i < vcpu->arch.slb_max; i++) {
1220 sregs->u.s.ppc64.slb[i].slbe = vcpu->arch.slb[i].orige;
1221 sregs->u.s.ppc64.slb[i].slbv = vcpu->arch.slb[i].origv;
1222 }
1223
1224 return 0;
1225}
1226
Aneesh Kumar K.V3a167bea2013-10-07 22:17:53 +05301227static int kvm_arch_vcpu_ioctl_set_sregs_hv(struct kvm_vcpu *vcpu,
1228 struct kvm_sregs *sregs)
Paul Mackerrasde56a942011-06-29 00:21:34 +00001229{
1230 int i, j;
1231
Paul Mackerras9333e6c2014-09-02 16:14:43 +10001232 /* Only accept the same PVR as the host's, since we can't spoof it */
1233 if (sregs->pvr != vcpu->arch.pvr)
1234 return -EINVAL;
Paul Mackerrasde56a942011-06-29 00:21:34 +00001235
1236 j = 0;
1237 for (i = 0; i < vcpu->arch.slb_nr; i++) {
1238 if (sregs->u.s.ppc64.slb[i].slbe & SLB_ESID_V) {
1239 vcpu->arch.slb[j].orige = sregs->u.s.ppc64.slb[i].slbe;
1240 vcpu->arch.slb[j].origv = sregs->u.s.ppc64.slb[i].slbv;
1241 ++j;
1242 }
1243 }
1244 vcpu->arch.slb_max = j;
1245
1246 return 0;
1247}
1248
Alexey Kardashevskiya0840242014-07-19 17:59:34 +10001249static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr,
1250 bool preserve_top32)
Paul Mackerrasa0144e22013-09-20 14:52:38 +10001251{
Paul Mackerras8f902b02015-03-20 20:39:38 +11001252 struct kvm *kvm = vcpu->kvm;
Paul Mackerrasa0144e22013-09-20 14:52:38 +10001253 struct kvmppc_vcore *vc = vcpu->arch.vcore;
1254 u64 mask;
1255
Paul Mackerras8f902b02015-03-20 20:39:38 +11001256 mutex_lock(&kvm->lock);
Paul Mackerrasa0144e22013-09-20 14:52:38 +10001257 spin_lock(&vc->lock);
1258 /*
Anton Blanchardd6829162014-01-08 21:25:30 +11001259 * If ILE (interrupt little-endian) has changed, update the
1260 * MSR_LE bit in the intr_msr for each vcpu in this vcore.
1261 */
1262 if ((new_lpcr & LPCR_ILE) != (vc->lpcr & LPCR_ILE)) {
Anton Blanchardd6829162014-01-08 21:25:30 +11001263 struct kvm_vcpu *vcpu;
1264 int i;
1265
Anton Blanchardd6829162014-01-08 21:25:30 +11001266 kvm_for_each_vcpu(i, vcpu, kvm) {
1267 if (vcpu->arch.vcore != vc)
1268 continue;
1269 if (new_lpcr & LPCR_ILE)
1270 vcpu->arch.intr_msr |= MSR_LE;
1271 else
1272 vcpu->arch.intr_msr &= ~MSR_LE;
1273 }
Anton Blanchardd6829162014-01-08 21:25:30 +11001274 }
1275
1276 /*
Paul Mackerrasa0144e22013-09-20 14:52:38 +10001277 * Userspace can only modify DPFD (default prefetch depth),
1278 * ILE (interrupt little-endian) and TC (translation control).
Paul Mackerras8cf4ecc2017-01-30 21:21:53 +11001279 * On POWER8 and POWER9 userspace can also modify AIL (alt. interrupt loc.).
Paul Mackerrasa0144e22013-09-20 14:52:38 +10001280 */
1281 mask = LPCR_DPFD | LPCR_ILE | LPCR_TC;
Paul Mackerrase0622bd2014-01-08 21:25:27 +11001282 if (cpu_has_feature(CPU_FTR_ARCH_207S))
1283 mask |= LPCR_AIL;
Paul Mackerras1bc3fe82017-05-22 16:55:16 +10001284 /*
1285 * On POWER9, allow userspace to enable large decrementer for the
1286 * guest, whether or not the host has it enabled.
1287 */
1288 if (cpu_has_feature(CPU_FTR_ARCH_300))
1289 mask |= LPCR_LD;
Alexey Kardashevskiya0840242014-07-19 17:59:34 +10001290
1291 /* Broken 32-bit version of LPCR must not clear top bits */
1292 if (preserve_top32)
1293 mask &= 0xFFFFFFFF;
Paul Mackerrasa0144e22013-09-20 14:52:38 +10001294 vc->lpcr = (vc->lpcr & ~mask) | (new_lpcr & mask);
1295 spin_unlock(&vc->lock);
Paul Mackerras8f902b02015-03-20 20:39:38 +11001296 mutex_unlock(&kvm->lock);
Paul Mackerrasa0144e22013-09-20 14:52:38 +10001297}
1298
Aneesh Kumar K.V3a167bea2013-10-07 22:17:53 +05301299static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
1300 union kvmppc_one_reg *val)
Paul Mackerras31f34382011-12-12 12:26:50 +00001301{
Paul Mackerrasa136a8b2012-09-25 20:31:56 +00001302 int r = 0;
1303 long int i;
Paul Mackerras31f34382011-12-12 12:26:50 +00001304
Paul Mackerrasa136a8b2012-09-25 20:31:56 +00001305 switch (id) {
Madhavan Srinivasana59c1d92014-09-09 22:37:35 +05301306 case KVM_REG_PPC_DEBUG_INST:
1307 *val = get_reg_val(id, KVMPPC_INST_SW_BREAKPOINT);
1308 break;
Paul Mackerras31f34382011-12-12 12:26:50 +00001309 case KVM_REG_PPC_HIOR:
Paul Mackerrasa136a8b2012-09-25 20:31:56 +00001310 *val = get_reg_val(id, 0);
1311 break;
1312 case KVM_REG_PPC_DABR:
1313 *val = get_reg_val(id, vcpu->arch.dabr);
1314 break;
Paul Mackerras8563bf52014-01-08 21:25:29 +11001315 case KVM_REG_PPC_DABRX:
1316 *val = get_reg_val(id, vcpu->arch.dabrx);
1317 break;
Paul Mackerrasa136a8b2012-09-25 20:31:56 +00001318 case KVM_REG_PPC_DSCR:
1319 *val = get_reg_val(id, vcpu->arch.dscr);
1320 break;
1321 case KVM_REG_PPC_PURR:
1322 *val = get_reg_val(id, vcpu->arch.purr);
1323 break;
1324 case KVM_REG_PPC_SPURR:
1325 *val = get_reg_val(id, vcpu->arch.spurr);
1326 break;
1327 case KVM_REG_PPC_AMR:
1328 *val = get_reg_val(id, vcpu->arch.amr);
1329 break;
1330 case KVM_REG_PPC_UAMOR:
1331 *val = get_reg_val(id, vcpu->arch.uamor);
1332 break;
Michael Neulingb005255e2014-01-08 21:25:21 +11001333 case KVM_REG_PPC_MMCR0 ... KVM_REG_PPC_MMCRS:
Paul Mackerrasa136a8b2012-09-25 20:31:56 +00001334 i = id - KVM_REG_PPC_MMCR0;
1335 *val = get_reg_val(id, vcpu->arch.mmcr[i]);
1336 break;
1337 case KVM_REG_PPC_PMC1 ... KVM_REG_PPC_PMC8:
1338 i = id - KVM_REG_PPC_PMC1;
1339 *val = get_reg_val(id, vcpu->arch.pmc[i]);
Paul Mackerras31f34382011-12-12 12:26:50 +00001340 break;
Michael Neulingb005255e2014-01-08 21:25:21 +11001341 case KVM_REG_PPC_SPMC1 ... KVM_REG_PPC_SPMC2:
1342 i = id - KVM_REG_PPC_SPMC1;
1343 *val = get_reg_val(id, vcpu->arch.spmc[i]);
1344 break;
Paul Mackerras14941782013-09-06 13:11:18 +10001345 case KVM_REG_PPC_SIAR:
1346 *val = get_reg_val(id, vcpu->arch.siar);
1347 break;
1348 case KVM_REG_PPC_SDAR:
1349 *val = get_reg_val(id, vcpu->arch.sdar);
1350 break;
Michael Neulingb005255e2014-01-08 21:25:21 +11001351 case KVM_REG_PPC_SIER:
1352 *val = get_reg_val(id, vcpu->arch.sier);
Paul Mackerrasa8bd19e2012-09-25 20:32:30 +00001353 break;
Michael Neulingb005255e2014-01-08 21:25:21 +11001354 case KVM_REG_PPC_IAMR:
1355 *val = get_reg_val(id, vcpu->arch.iamr);
Paul Mackerrasa8bd19e2012-09-25 20:32:30 +00001356 break;
Michael Neulingb005255e2014-01-08 21:25:21 +11001357 case KVM_REG_PPC_PSPB:
1358 *val = get_reg_val(id, vcpu->arch.pspb);
1359 break;
Michael Neulingb005255e2014-01-08 21:25:21 +11001360 case KVM_REG_PPC_DPDES:
1361 *val = get_reg_val(id, vcpu->arch.vcore->dpdes);
1362 break;
Paul Mackerras88b02cf92016-09-15 13:42:52 +10001363 case KVM_REG_PPC_VTB:
1364 *val = get_reg_val(id, vcpu->arch.vcore->vtb);
1365 break;
Michael Neulingb005255e2014-01-08 21:25:21 +11001366 case KVM_REG_PPC_DAWR:
1367 *val = get_reg_val(id, vcpu->arch.dawr);
1368 break;
1369 case KVM_REG_PPC_DAWRX:
1370 *val = get_reg_val(id, vcpu->arch.dawrx);
1371 break;
1372 case KVM_REG_PPC_CIABR:
1373 *val = get_reg_val(id, vcpu->arch.ciabr);
1374 break;
Michael Neulingb005255e2014-01-08 21:25:21 +11001375 case KVM_REG_PPC_CSIGR:
1376 *val = get_reg_val(id, vcpu->arch.csigr);
1377 break;
1378 case KVM_REG_PPC_TACR:
1379 *val = get_reg_val(id, vcpu->arch.tacr);
1380 break;
1381 case KVM_REG_PPC_TCSCR:
1382 *val = get_reg_val(id, vcpu->arch.tcscr);
1383 break;
1384 case KVM_REG_PPC_PID:
1385 *val = get_reg_val(id, vcpu->arch.pid);
1386 break;
1387 case KVM_REG_PPC_ACOP:
1388 *val = get_reg_val(id, vcpu->arch.acop);
1389 break;
1390 case KVM_REG_PPC_WORT:
1391 *val = get_reg_val(id, vcpu->arch.wort);
1392 break;
Paul Mackerrase9cf1e02016-11-18 13:11:42 +11001393 case KVM_REG_PPC_TIDR:
1394 *val = get_reg_val(id, vcpu->arch.tid);
1395 break;
1396 case KVM_REG_PPC_PSSCR:
1397 *val = get_reg_val(id, vcpu->arch.psscr);
1398 break;
Paul Mackerras55b665b2012-09-25 20:33:06 +00001399 case KVM_REG_PPC_VPA_ADDR:
1400 spin_lock(&vcpu->arch.vpa_update_lock);
1401 *val = get_reg_val(id, vcpu->arch.vpa.next_gpa);
1402 spin_unlock(&vcpu->arch.vpa_update_lock);
1403 break;
1404 case KVM_REG_PPC_VPA_SLB:
1405 spin_lock(&vcpu->arch.vpa_update_lock);
1406 val->vpaval.addr = vcpu->arch.slb_shadow.next_gpa;
1407 val->vpaval.length = vcpu->arch.slb_shadow.len;
1408 spin_unlock(&vcpu->arch.vpa_update_lock);
1409 break;
1410 case KVM_REG_PPC_VPA_DTL:
1411 spin_lock(&vcpu->arch.vpa_update_lock);
1412 val->vpaval.addr = vcpu->arch.dtl.next_gpa;
1413 val->vpaval.length = vcpu->arch.dtl.len;
1414 spin_unlock(&vcpu->arch.vpa_update_lock);
1415 break;
Paul Mackerras93b0f4d2013-09-06 13:17:46 +10001416 case KVM_REG_PPC_TB_OFFSET:
1417 *val = get_reg_val(id, vcpu->arch.vcore->tb_offset);
1418 break;
Paul Mackerrasa0144e22013-09-20 14:52:38 +10001419 case KVM_REG_PPC_LPCR:
Alexey Kardashevskiya0840242014-07-19 17:59:34 +10001420 case KVM_REG_PPC_LPCR_64:
Paul Mackerrasa0144e22013-09-20 14:52:38 +10001421 *val = get_reg_val(id, vcpu->arch.vcore->lpcr);
1422 break;
Paul Mackerras4b8473c2013-09-20 14:52:39 +10001423 case KVM_REG_PPC_PPR:
1424 *val = get_reg_val(id, vcpu->arch.ppr);
1425 break;
Michael Neulinga7d80d02014-03-25 10:47:03 +11001426#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
1427 case KVM_REG_PPC_TFHAR:
1428 *val = get_reg_val(id, vcpu->arch.tfhar);
1429 break;
1430 case KVM_REG_PPC_TFIAR:
1431 *val = get_reg_val(id, vcpu->arch.tfiar);
1432 break;
1433 case KVM_REG_PPC_TEXASR:
1434 *val = get_reg_val(id, vcpu->arch.texasr);
1435 break;
1436 case KVM_REG_PPC_TM_GPR0 ... KVM_REG_PPC_TM_GPR31:
1437 i = id - KVM_REG_PPC_TM_GPR0;
1438 *val = get_reg_val(id, vcpu->arch.gpr_tm[i]);
1439 break;
1440 case KVM_REG_PPC_TM_VSR0 ... KVM_REG_PPC_TM_VSR63:
1441 {
1442 int j;
1443 i = id - KVM_REG_PPC_TM_VSR0;
1444 if (i < 32)
1445 for (j = 0; j < TS_FPRWIDTH; j++)
1446 val->vsxval[j] = vcpu->arch.fp_tm.fpr[i][j];
1447 else {
1448 if (cpu_has_feature(CPU_FTR_ALTIVEC))
1449 val->vval = vcpu->arch.vr_tm.vr[i-32];
1450 else
1451 r = -ENXIO;
1452 }
1453 break;
1454 }
1455 case KVM_REG_PPC_TM_CR:
1456 *val = get_reg_val(id, vcpu->arch.cr_tm);
1457 break;
Paul Mackerras0d808df2016-11-07 15:09:58 +11001458 case KVM_REG_PPC_TM_XER:
1459 *val = get_reg_val(id, vcpu->arch.xer_tm);
1460 break;
Michael Neulinga7d80d02014-03-25 10:47:03 +11001461 case KVM_REG_PPC_TM_LR:
1462 *val = get_reg_val(id, vcpu->arch.lr_tm);
1463 break;
1464 case KVM_REG_PPC_TM_CTR:
1465 *val = get_reg_val(id, vcpu->arch.ctr_tm);
1466 break;
1467 case KVM_REG_PPC_TM_FPSCR:
1468 *val = get_reg_val(id, vcpu->arch.fp_tm.fpscr);
1469 break;
1470 case KVM_REG_PPC_TM_AMR:
1471 *val = get_reg_val(id, vcpu->arch.amr_tm);
1472 break;
1473 case KVM_REG_PPC_TM_PPR:
1474 *val = get_reg_val(id, vcpu->arch.ppr_tm);
1475 break;
1476 case KVM_REG_PPC_TM_VRSAVE:
1477 *val = get_reg_val(id, vcpu->arch.vrsave_tm);
1478 break;
1479 case KVM_REG_PPC_TM_VSCR:
1480 if (cpu_has_feature(CPU_FTR_ALTIVEC))
1481 *val = get_reg_val(id, vcpu->arch.vr_tm.vscr.u[3]);
1482 else
1483 r = -ENXIO;
1484 break;
1485 case KVM_REG_PPC_TM_DSCR:
1486 *val = get_reg_val(id, vcpu->arch.dscr_tm);
1487 break;
1488 case KVM_REG_PPC_TM_TAR:
1489 *val = get_reg_val(id, vcpu->arch.tar_tm);
1490 break;
1491#endif
Paul Mackerras388cc6e2013-09-21 14:35:02 +10001492 case KVM_REG_PPC_ARCH_COMPAT:
1493 *val = get_reg_val(id, vcpu->arch.vcore->arch_compat);
1494 break;
Paul Mackerras31f34382011-12-12 12:26:50 +00001495 default:
Paul Mackerrasa136a8b2012-09-25 20:31:56 +00001496 r = -EINVAL;
Paul Mackerras31f34382011-12-12 12:26:50 +00001497 break;
1498 }
1499
1500 return r;
1501}
1502
Aneesh Kumar K.V3a167bea2013-10-07 22:17:53 +05301503static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
1504 union kvmppc_one_reg *val)
Paul Mackerras31f34382011-12-12 12:26:50 +00001505{
Paul Mackerrasa136a8b2012-09-25 20:31:56 +00001506 int r = 0;
1507 long int i;
Paul Mackerras55b665b2012-09-25 20:33:06 +00001508 unsigned long addr, len;
Paul Mackerras31f34382011-12-12 12:26:50 +00001509
Paul Mackerrasa136a8b2012-09-25 20:31:56 +00001510 switch (id) {
Paul Mackerras31f34382011-12-12 12:26:50 +00001511 case KVM_REG_PPC_HIOR:
Paul Mackerras31f34382011-12-12 12:26:50 +00001512 /* Only allow this to be set to zero */
Paul Mackerrasa136a8b2012-09-25 20:31:56 +00001513 if (set_reg_val(id, *val))
Paul Mackerras31f34382011-12-12 12:26:50 +00001514 r = -EINVAL;
1515 break;
Paul Mackerrasa136a8b2012-09-25 20:31:56 +00001516 case KVM_REG_PPC_DABR:
1517 vcpu->arch.dabr = set_reg_val(id, *val);
1518 break;
Paul Mackerras8563bf52014-01-08 21:25:29 +11001519 case KVM_REG_PPC_DABRX:
1520 vcpu->arch.dabrx = set_reg_val(id, *val) & ~DABRX_HYP;
1521 break;
Paul Mackerrasa136a8b2012-09-25 20:31:56 +00001522 case KVM_REG_PPC_DSCR:
1523 vcpu->arch.dscr = set_reg_val(id, *val);
1524 break;
1525 case KVM_REG_PPC_PURR:
1526 vcpu->arch.purr = set_reg_val(id, *val);
1527 break;
1528 case KVM_REG_PPC_SPURR:
1529 vcpu->arch.spurr = set_reg_val(id, *val);
1530 break;
1531 case KVM_REG_PPC_AMR:
1532 vcpu->arch.amr = set_reg_val(id, *val);
1533 break;
1534 case KVM_REG_PPC_UAMOR:
1535 vcpu->arch.uamor = set_reg_val(id, *val);
1536 break;
Michael Neulingb005255e2014-01-08 21:25:21 +11001537 case KVM_REG_PPC_MMCR0 ... KVM_REG_PPC_MMCRS:
Paul Mackerrasa136a8b2012-09-25 20:31:56 +00001538 i = id - KVM_REG_PPC_MMCR0;
1539 vcpu->arch.mmcr[i] = set_reg_val(id, *val);
1540 break;
1541 case KVM_REG_PPC_PMC1 ... KVM_REG_PPC_PMC8:
1542 i = id - KVM_REG_PPC_PMC1;
1543 vcpu->arch.pmc[i] = set_reg_val(id, *val);
1544 break;
Michael Neulingb005255e2014-01-08 21:25:21 +11001545 case KVM_REG_PPC_SPMC1 ... KVM_REG_PPC_SPMC2:
1546 i = id - KVM_REG_PPC_SPMC1;
1547 vcpu->arch.spmc[i] = set_reg_val(id, *val);
1548 break;
Paul Mackerras14941782013-09-06 13:11:18 +10001549 case KVM_REG_PPC_SIAR:
1550 vcpu->arch.siar = set_reg_val(id, *val);
1551 break;
1552 case KVM_REG_PPC_SDAR:
1553 vcpu->arch.sdar = set_reg_val(id, *val);
1554 break;
Michael Neulingb005255e2014-01-08 21:25:21 +11001555 case KVM_REG_PPC_SIER:
1556 vcpu->arch.sier = set_reg_val(id, *val);
Paul Mackerrasa8bd19e2012-09-25 20:32:30 +00001557 break;
Michael Neulingb005255e2014-01-08 21:25:21 +11001558 case KVM_REG_PPC_IAMR:
1559 vcpu->arch.iamr = set_reg_val(id, *val);
Paul Mackerrasa8bd19e2012-09-25 20:32:30 +00001560 break;
Michael Neulingb005255e2014-01-08 21:25:21 +11001561 case KVM_REG_PPC_PSPB:
1562 vcpu->arch.pspb = set_reg_val(id, *val);
1563 break;
Michael Neulingb005255e2014-01-08 21:25:21 +11001564 case KVM_REG_PPC_DPDES:
1565 vcpu->arch.vcore->dpdes = set_reg_val(id, *val);
1566 break;
Paul Mackerras88b02cf92016-09-15 13:42:52 +10001567 case KVM_REG_PPC_VTB:
1568 vcpu->arch.vcore->vtb = set_reg_val(id, *val);
1569 break;
Michael Neulingb005255e2014-01-08 21:25:21 +11001570 case KVM_REG_PPC_DAWR:
1571 vcpu->arch.dawr = set_reg_val(id, *val);
1572 break;
1573 case KVM_REG_PPC_DAWRX:
1574 vcpu->arch.dawrx = set_reg_val(id, *val) & ~DAWRX_HYP;
1575 break;
1576 case KVM_REG_PPC_CIABR:
1577 vcpu->arch.ciabr = set_reg_val(id, *val);
1578 /* Don't allow setting breakpoints in hypervisor code */
1579 if ((vcpu->arch.ciabr & CIABR_PRIV) == CIABR_PRIV_HYPER)
1580 vcpu->arch.ciabr &= ~CIABR_PRIV; /* disable */
1581 break;
Michael Neulingb005255e2014-01-08 21:25:21 +11001582 case KVM_REG_PPC_CSIGR:
1583 vcpu->arch.csigr = set_reg_val(id, *val);
1584 break;
1585 case KVM_REG_PPC_TACR:
1586 vcpu->arch.tacr = set_reg_val(id, *val);
1587 break;
1588 case KVM_REG_PPC_TCSCR:
1589 vcpu->arch.tcscr = set_reg_val(id, *val);
1590 break;
1591 case KVM_REG_PPC_PID:
1592 vcpu->arch.pid = set_reg_val(id, *val);
1593 break;
1594 case KVM_REG_PPC_ACOP:
1595 vcpu->arch.acop = set_reg_val(id, *val);
1596 break;
1597 case KVM_REG_PPC_WORT:
1598 vcpu->arch.wort = set_reg_val(id, *val);
1599 break;
Paul Mackerrase9cf1e02016-11-18 13:11:42 +11001600 case KVM_REG_PPC_TIDR:
1601 vcpu->arch.tid = set_reg_val(id, *val);
1602 break;
1603 case KVM_REG_PPC_PSSCR:
1604 vcpu->arch.psscr = set_reg_val(id, *val) & PSSCR_GUEST_VIS;
1605 break;
Paul Mackerras55b665b2012-09-25 20:33:06 +00001606 case KVM_REG_PPC_VPA_ADDR:
1607 addr = set_reg_val(id, *val);
1608 r = -EINVAL;
1609 if (!addr && (vcpu->arch.slb_shadow.next_gpa ||
1610 vcpu->arch.dtl.next_gpa))
1611 break;
1612 r = set_vpa(vcpu, &vcpu->arch.vpa, addr, sizeof(struct lppaca));
1613 break;
1614 case KVM_REG_PPC_VPA_SLB:
1615 addr = val->vpaval.addr;
1616 len = val->vpaval.length;
1617 r = -EINVAL;
1618 if (addr && !vcpu->arch.vpa.next_gpa)
1619 break;
1620 r = set_vpa(vcpu, &vcpu->arch.slb_shadow, addr, len);
1621 break;
1622 case KVM_REG_PPC_VPA_DTL:
1623 addr = val->vpaval.addr;
1624 len = val->vpaval.length;
1625 r = -EINVAL;
Paul Mackerras9f8c8c72012-10-15 01:18:37 +00001626 if (addr && (len < sizeof(struct dtl_entry) ||
1627 !vcpu->arch.vpa.next_gpa))
Paul Mackerras55b665b2012-09-25 20:33:06 +00001628 break;
1629 len -= len % sizeof(struct dtl_entry);
1630 r = set_vpa(vcpu, &vcpu->arch.dtl, addr, len);
1631 break;
Paul Mackerras93b0f4d2013-09-06 13:17:46 +10001632 case KVM_REG_PPC_TB_OFFSET:
Paul Mackerras3d3efb62017-06-06 14:35:30 +10001633 /*
1634 * POWER9 DD1 has an erratum where writing TBU40 causes
1635 * the timebase to lose ticks. So we don't let the
1636 * timebase offset be changed on P9 DD1. (It is
1637 * initialized to zero.)
1638 */
1639 if (cpu_has_feature(CPU_FTR_POWER9_DD1))
1640 break;
Paul Mackerras93b0f4d2013-09-06 13:17:46 +10001641 /* round up to multiple of 2^24 */
1642 vcpu->arch.vcore->tb_offset =
1643 ALIGN(set_reg_val(id, *val), 1UL << 24);
1644 break;
Paul Mackerrasa0144e22013-09-20 14:52:38 +10001645 case KVM_REG_PPC_LPCR:
Alexey Kardashevskiya0840242014-07-19 17:59:34 +10001646 kvmppc_set_lpcr(vcpu, set_reg_val(id, *val), true);
1647 break;
1648 case KVM_REG_PPC_LPCR_64:
1649 kvmppc_set_lpcr(vcpu, set_reg_val(id, *val), false);
Paul Mackerrasa0144e22013-09-20 14:52:38 +10001650 break;
Paul Mackerras4b8473c2013-09-20 14:52:39 +10001651 case KVM_REG_PPC_PPR:
1652 vcpu->arch.ppr = set_reg_val(id, *val);
1653 break;
Michael Neulinga7d80d02014-03-25 10:47:03 +11001654#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
1655 case KVM_REG_PPC_TFHAR:
1656 vcpu->arch.tfhar = set_reg_val(id, *val);
1657 break;
1658 case KVM_REG_PPC_TFIAR:
1659 vcpu->arch.tfiar = set_reg_val(id, *val);
1660 break;
1661 case KVM_REG_PPC_TEXASR:
1662 vcpu->arch.texasr = set_reg_val(id, *val);
1663 break;
1664 case KVM_REG_PPC_TM_GPR0 ... KVM_REG_PPC_TM_GPR31:
1665 i = id - KVM_REG_PPC_TM_GPR0;
1666 vcpu->arch.gpr_tm[i] = set_reg_val(id, *val);
1667 break;
1668 case KVM_REG_PPC_TM_VSR0 ... KVM_REG_PPC_TM_VSR63:
1669 {
1670 int j;
1671 i = id - KVM_REG_PPC_TM_VSR0;
1672 if (i < 32)
1673 for (j = 0; j < TS_FPRWIDTH; j++)
1674 vcpu->arch.fp_tm.fpr[i][j] = val->vsxval[j];
1675 else
1676 if (cpu_has_feature(CPU_FTR_ALTIVEC))
1677 vcpu->arch.vr_tm.vr[i-32] = val->vval;
1678 else
1679 r = -ENXIO;
1680 break;
1681 }
1682 case KVM_REG_PPC_TM_CR:
1683 vcpu->arch.cr_tm = set_reg_val(id, *val);
1684 break;
Paul Mackerras0d808df2016-11-07 15:09:58 +11001685 case KVM_REG_PPC_TM_XER:
1686 vcpu->arch.xer_tm = set_reg_val(id, *val);
1687 break;
Michael Neulinga7d80d02014-03-25 10:47:03 +11001688 case KVM_REG_PPC_TM_LR:
1689 vcpu->arch.lr_tm = set_reg_val(id, *val);
1690 break;
1691 case KVM_REG_PPC_TM_CTR:
1692 vcpu->arch.ctr_tm = set_reg_val(id, *val);
1693 break;
1694 case KVM_REG_PPC_TM_FPSCR:
1695 vcpu->arch.fp_tm.fpscr = set_reg_val(id, *val);
1696 break;
1697 case KVM_REG_PPC_TM_AMR:
1698 vcpu->arch.amr_tm = set_reg_val(id, *val);
1699 break;
1700 case KVM_REG_PPC_TM_PPR:
1701 vcpu->arch.ppr_tm = set_reg_val(id, *val);
1702 break;
1703 case KVM_REG_PPC_TM_VRSAVE:
1704 vcpu->arch.vrsave_tm = set_reg_val(id, *val);
1705 break;
1706 case KVM_REG_PPC_TM_VSCR:
1707 if (cpu_has_feature(CPU_FTR_ALTIVEC))
1708 vcpu->arch.vr.vscr.u[3] = set_reg_val(id, *val);
1709 else
1710 r = - ENXIO;
1711 break;
1712 case KVM_REG_PPC_TM_DSCR:
1713 vcpu->arch.dscr_tm = set_reg_val(id, *val);
1714 break;
1715 case KVM_REG_PPC_TM_TAR:
1716 vcpu->arch.tar_tm = set_reg_val(id, *val);
1717 break;
1718#endif
Paul Mackerras388cc6e2013-09-21 14:35:02 +10001719 case KVM_REG_PPC_ARCH_COMPAT:
1720 r = kvmppc_set_arch_compat(vcpu, set_reg_val(id, *val));
1721 break;
Paul Mackerras31f34382011-12-12 12:26:50 +00001722 default:
Paul Mackerrasa136a8b2012-09-25 20:31:56 +00001723 r = -EINVAL;
Paul Mackerras31f34382011-12-12 12:26:50 +00001724 break;
1725 }
1726
1727 return r;
1728}
1729
Paul Mackerras45c940b2016-11-18 17:43:30 +11001730/*
1731 * On POWER9, threads are independent and can be in different partitions.
1732 * Therefore we consider each thread to be a subcore.
1733 * There is a restriction that all threads have to be in the same
1734 * MMU mode (radix or HPT), unfortunately, but since we only support
1735 * HPT guests on a HPT host so far, that isn't an impediment yet.
1736 */
1737static int threads_per_vcore(void)
1738{
1739 if (cpu_has_feature(CPU_FTR_ARCH_300))
1740 return 1;
1741 return threads_per_subcore;
1742}
1743
Stewart Smithde9bdd12014-07-18 14:18:42 +10001744static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core)
1745{
1746 struct kvmppc_vcore *vcore;
1747
1748 vcore = kzalloc(sizeof(struct kvmppc_vcore), GFP_KERNEL);
1749
1750 if (vcore == NULL)
1751 return NULL;
1752
Stewart Smithde9bdd12014-07-18 14:18:42 +10001753 spin_lock_init(&vcore->lock);
Paul Mackerras2711e242014-12-04 16:43:28 +11001754 spin_lock_init(&vcore->stoltb_lock);
Marcelo Tosatti85773702016-02-19 09:46:39 +01001755 init_swait_queue_head(&vcore->wq);
Stewart Smithde9bdd12014-07-18 14:18:42 +10001756 vcore->preempt_tb = TB_NIL;
1757 vcore->lpcr = kvm->arch.lpcr;
Paul Mackerras3c313522017-02-06 13:24:41 +11001758 vcore->first_vcpuid = core * kvm->arch.smt_mode;
Stewart Smithde9bdd12014-07-18 14:18:42 +10001759 vcore->kvm = kvm;
Paul Mackerrasec257162015-06-24 21:18:03 +10001760 INIT_LIST_HEAD(&vcore->preempt_list);
Stewart Smithde9bdd12014-07-18 14:18:42 +10001761
1762 return vcore;
1763}
1764
Paul Mackerrasb6c295d2015-03-28 14:21:02 +11001765#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
1766static struct debugfs_timings_element {
1767 const char *name;
1768 size_t offset;
1769} timings[] = {
1770 {"rm_entry", offsetof(struct kvm_vcpu, arch.rm_entry)},
1771 {"rm_intr", offsetof(struct kvm_vcpu, arch.rm_intr)},
1772 {"rm_exit", offsetof(struct kvm_vcpu, arch.rm_exit)},
1773 {"guest", offsetof(struct kvm_vcpu, arch.guest_time)},
1774 {"cede", offsetof(struct kvm_vcpu, arch.cede_time)},
1775};
1776
Thomas Meyer4bb817ed2017-09-03 14:19:31 +02001777#define N_TIMINGS (ARRAY_SIZE(timings))
Paul Mackerrasb6c295d2015-03-28 14:21:02 +11001778
1779struct debugfs_timings_state {
1780 struct kvm_vcpu *vcpu;
1781 unsigned int buflen;
1782 char buf[N_TIMINGS * 100];
1783};
1784
1785static int debugfs_timings_open(struct inode *inode, struct file *file)
1786{
1787 struct kvm_vcpu *vcpu = inode->i_private;
1788 struct debugfs_timings_state *p;
1789
1790 p = kzalloc(sizeof(*p), GFP_KERNEL);
1791 if (!p)
1792 return -ENOMEM;
1793
1794 kvm_get_kvm(vcpu->kvm);
1795 p->vcpu = vcpu;
1796 file->private_data = p;
1797
1798 return nonseekable_open(inode, file);
1799}
1800
1801static int debugfs_timings_release(struct inode *inode, struct file *file)
1802{
1803 struct debugfs_timings_state *p = file->private_data;
1804
1805 kvm_put_kvm(p->vcpu->kvm);
1806 kfree(p);
1807 return 0;
1808}
1809
1810static ssize_t debugfs_timings_read(struct file *file, char __user *buf,
1811 size_t len, loff_t *ppos)
1812{
1813 struct debugfs_timings_state *p = file->private_data;
1814 struct kvm_vcpu *vcpu = p->vcpu;
1815 char *s, *buf_end;
1816 struct kvmhv_tb_accumulator tb;
1817 u64 count;
1818 loff_t pos;
1819 ssize_t n;
1820 int i, loops;
1821 bool ok;
1822
1823 if (!p->buflen) {
1824 s = p->buf;
1825 buf_end = s + sizeof(p->buf);
1826 for (i = 0; i < N_TIMINGS; ++i) {
1827 struct kvmhv_tb_accumulator *acc;
1828
1829 acc = (struct kvmhv_tb_accumulator *)
1830 ((unsigned long)vcpu + timings[i].offset);
1831 ok = false;
1832 for (loops = 0; loops < 1000; ++loops) {
1833 count = acc->seqcount;
1834 if (!(count & 1)) {
1835 smp_rmb();
1836 tb = *acc;
1837 smp_rmb();
1838 if (count == acc->seqcount) {
1839 ok = true;
1840 break;
1841 }
1842 }
1843 udelay(1);
1844 }
1845 if (!ok)
1846 snprintf(s, buf_end - s, "%s: stuck\n",
1847 timings[i].name);
1848 else
1849 snprintf(s, buf_end - s,
1850 "%s: %llu %llu %llu %llu\n",
1851 timings[i].name, count / 2,
1852 tb_to_ns(tb.tb_total),
1853 tb_to_ns(tb.tb_min),
1854 tb_to_ns(tb.tb_max));
1855 s += strlen(s);
1856 }
1857 p->buflen = s - p->buf;
1858 }
1859
1860 pos = *ppos;
1861 if (pos >= p->buflen)
1862 return 0;
1863 if (len > p->buflen - pos)
1864 len = p->buflen - pos;
1865 n = copy_to_user(buf, p->buf + pos, len);
1866 if (n) {
1867 if (n == len)
1868 return -EFAULT;
1869 len -= n;
1870 }
1871 *ppos = pos + len;
1872 return len;
1873}
1874
1875static ssize_t debugfs_timings_write(struct file *file, const char __user *buf,
1876 size_t len, loff_t *ppos)
1877{
1878 return -EACCES;
1879}
1880
1881static const struct file_operations debugfs_timings_ops = {
1882 .owner = THIS_MODULE,
1883 .open = debugfs_timings_open,
1884 .release = debugfs_timings_release,
1885 .read = debugfs_timings_read,
1886 .write = debugfs_timings_write,
1887 .llseek = generic_file_llseek,
1888};
1889
1890/* Create a debugfs directory for the vcpu */
1891static void debugfs_vcpu_init(struct kvm_vcpu *vcpu, unsigned int id)
1892{
1893 char buf[16];
1894 struct kvm *kvm = vcpu->kvm;
1895
1896 snprintf(buf, sizeof(buf), "vcpu%u", id);
1897 if (IS_ERR_OR_NULL(kvm->arch.debugfs_dir))
1898 return;
1899 vcpu->arch.debugfs_dir = debugfs_create_dir(buf, kvm->arch.debugfs_dir);
1900 if (IS_ERR_OR_NULL(vcpu->arch.debugfs_dir))
1901 return;
1902 vcpu->arch.debugfs_timings =
1903 debugfs_create_file("timings", 0444, vcpu->arch.debugfs_dir,
1904 vcpu, &debugfs_timings_ops);
1905}
1906
1907#else /* CONFIG_KVM_BOOK3S_HV_EXIT_TIMING */
1908static void debugfs_vcpu_init(struct kvm_vcpu *vcpu, unsigned int id)
1909{
1910}
1911#endif /* CONFIG_KVM_BOOK3S_HV_EXIT_TIMING */
1912
Aneesh Kumar K.V3a167bea2013-10-07 22:17:53 +05301913static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
1914 unsigned int id)
Paul Mackerrasde56a942011-06-29 00:21:34 +00001915{
1916 struct kvm_vcpu *vcpu;
Paul Mackerras3c313522017-02-06 13:24:41 +11001917 int err;
Paul Mackerras371fefd2011-06-29 00:23:08 +00001918 int core;
1919 struct kvmppc_vcore *vcore;
Paul Mackerrasde56a942011-06-29 00:21:34 +00001920
Paul Mackerras371fefd2011-06-29 00:23:08 +00001921 err = -ENOMEM;
Sasha Levin6b75e6b2011-12-07 10:24:56 +02001922 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
Paul Mackerrasde56a942011-06-29 00:21:34 +00001923 if (!vcpu)
1924 goto out;
1925
1926 err = kvm_vcpu_init(vcpu, kvm, id);
1927 if (err)
1928 goto free_vcpu;
1929
1930 vcpu->arch.shared = &vcpu->arch.shregs;
Alexander Graf5deb8e72014-04-24 13:46:24 +02001931#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
1932 /*
1933 * The shared struct is never shared on HV,
1934 * so we can always use host endianness
1935 */
1936#ifdef __BIG_ENDIAN__
1937 vcpu->arch.shared_big_endian = true;
1938#else
1939 vcpu->arch.shared_big_endian = false;
1940#endif
1941#endif
Paul Mackerrasde56a942011-06-29 00:21:34 +00001942 vcpu->arch.mmcr[0] = MMCR0_FC;
1943 vcpu->arch.ctrl = CTRL_RUNLATCH;
1944 /* default to host PVR, since we can't spoof it */
Aneesh Kumar K.V3a167bea2013-10-07 22:17:53 +05301945 kvmppc_set_pvr_hv(vcpu, mfspr(SPRN_PVR));
Paul Mackerras2e25aa52012-02-19 17:46:32 +00001946 spin_lock_init(&vcpu->arch.vpa_update_lock);
Paul Mackerrasc7b67672012-10-15 01:18:07 +00001947 spin_lock_init(&vcpu->arch.tbacct_lock);
1948 vcpu->arch.busy_preempt = TB_NIL;
Anton Blanchardd6829162014-01-08 21:25:30 +11001949 vcpu->arch.intr_msr = MSR_SF | MSR_ME;
Paul Mackerrasde56a942011-06-29 00:21:34 +00001950
Paul Mackerras769377f2017-02-15 14:30:17 +11001951 /*
1952 * Set the default HFSCR for the guest from the host value.
1953 * This value is only used on POWER9.
1954 * On POWER9 DD1, TM doesn't work, so we make sure to
1955 * prevent the guest from using it.
Paul Mackerras57900692017-05-16 16:41:20 +10001956 * On POWER9, we want to virtualize the doorbell facility, so we
1957 * turn off the HFSCR bit, which causes those instructions to trap.
Paul Mackerras769377f2017-02-15 14:30:17 +11001958 */
1959 vcpu->arch.hfscr = mfspr(SPRN_HFSCR);
1960 if (!cpu_has_feature(CPU_FTR_TM))
1961 vcpu->arch.hfscr &= ~HFSCR_TM;
Paul Mackerras57900692017-05-16 16:41:20 +10001962 if (cpu_has_feature(CPU_FTR_ARCH_300))
1963 vcpu->arch.hfscr &= ~HFSCR_MSGP;
Paul Mackerras769377f2017-02-15 14:30:17 +11001964
Paul Mackerrasde56a942011-06-29 00:21:34 +00001965 kvmppc_mmu_book3s_hv_init(vcpu);
1966
Paul Mackerras8455d792012-10-15 01:17:42 +00001967 vcpu->arch.state = KVMPPC_VCPU_NOTREADY;
Paul Mackerras371fefd2011-06-29 00:23:08 +00001968
1969 init_waitqueue_head(&vcpu->arch.cpu_run);
1970
1971 mutex_lock(&kvm->lock);
Paul Mackerras3c313522017-02-06 13:24:41 +11001972 vcore = NULL;
1973 err = -EINVAL;
1974 core = id / kvm->arch.smt_mode;
1975 if (core < KVM_MAX_VCORES) {
1976 vcore = kvm->arch.vcores[core];
1977 if (!vcore) {
1978 err = -ENOMEM;
1979 vcore = kvmppc_vcore_create(kvm, core);
1980 kvm->arch.vcores[core] = vcore;
1981 kvm->arch.online_vcores++;
1982 }
Paul Mackerras371fefd2011-06-29 00:23:08 +00001983 }
1984 mutex_unlock(&kvm->lock);
1985
1986 if (!vcore)
1987 goto free_vcpu;
1988
1989 spin_lock(&vcore->lock);
1990 ++vcore->num_threads;
Paul Mackerras371fefd2011-06-29 00:23:08 +00001991 spin_unlock(&vcore->lock);
1992 vcpu->arch.vcore = vcore;
Paul Mackerrase0b7ec02014-01-08 21:25:20 +11001993 vcpu->arch.ptid = vcpu->vcpu_id - vcore->first_vcpuid;
Paul Mackerrasec257162015-06-24 21:18:03 +10001994 vcpu->arch.thread_cpu = -1;
Paul Mackerrasa29ebea2017-01-30 21:21:50 +11001995 vcpu->arch.prev_cpu = -1;
Paul Mackerras371fefd2011-06-29 00:23:08 +00001996
Alexander Grafaf8f38b2011-08-10 13:57:08 +02001997 vcpu->arch.cpu_type = KVM_CPU_3S_64;
1998 kvmppc_sanity_check(vcpu);
1999
Paul Mackerrasb6c295d2015-03-28 14:21:02 +11002000 debugfs_vcpu_init(vcpu, id);
2001
Paul Mackerrasde56a942011-06-29 00:21:34 +00002002 return vcpu;
2003
2004free_vcpu:
Sasha Levin6b75e6b2011-12-07 10:24:56 +02002005 kmem_cache_free(kvm_vcpu_cache, vcpu);
Paul Mackerrasde56a942011-06-29 00:21:34 +00002006out:
2007 return ERR_PTR(err);
2008}
2009
Paul Mackerras3c313522017-02-06 13:24:41 +11002010static int kvmhv_set_smt_mode(struct kvm *kvm, unsigned long smt_mode,
2011 unsigned long flags)
2012{
2013 int err;
Paul Mackerras57900692017-05-16 16:41:20 +10002014 int esmt = 0;
Paul Mackerras3c313522017-02-06 13:24:41 +11002015
2016 if (flags)
2017 return -EINVAL;
2018 if (smt_mode > MAX_SMT_THREADS || !is_power_of_2(smt_mode))
2019 return -EINVAL;
2020 if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
2021 /*
2022 * On POWER8 (or POWER7), the threading mode is "strict",
2023 * so we pack smt_mode vcpus per vcore.
2024 */
2025 if (smt_mode > threads_per_subcore)
2026 return -EINVAL;
2027 } else {
2028 /*
2029 * On POWER9, the threading mode is "loose",
2030 * so each vcpu gets its own vcore.
2031 */
Paul Mackerras57900692017-05-16 16:41:20 +10002032 esmt = smt_mode;
Paul Mackerras3c313522017-02-06 13:24:41 +11002033 smt_mode = 1;
2034 }
2035 mutex_lock(&kvm->lock);
2036 err = -EBUSY;
2037 if (!kvm->arch.online_vcores) {
2038 kvm->arch.smt_mode = smt_mode;
Paul Mackerras57900692017-05-16 16:41:20 +10002039 kvm->arch.emul_smt_mode = esmt;
Paul Mackerras3c313522017-02-06 13:24:41 +11002040 err = 0;
2041 }
2042 mutex_unlock(&kvm->lock);
2043
2044 return err;
2045}
2046
Paul Mackerrasc35635e2013-04-18 19:51:04 +00002047static void unpin_vpa(struct kvm *kvm, struct kvmppc_vpa *vpa)
2048{
2049 if (vpa->pinned_addr)
2050 kvmppc_unpin_guest_page(kvm, vpa->pinned_addr, vpa->gpa,
2051 vpa->dirty);
2052}
2053
Aneesh Kumar K.V3a167bea2013-10-07 22:17:53 +05302054static void kvmppc_core_vcpu_free_hv(struct kvm_vcpu *vcpu)
Paul Mackerrasde56a942011-06-29 00:21:34 +00002055{
Paul Mackerras2e25aa52012-02-19 17:46:32 +00002056 spin_lock(&vcpu->arch.vpa_update_lock);
Paul Mackerrasc35635e2013-04-18 19:51:04 +00002057 unpin_vpa(vcpu->kvm, &vcpu->arch.dtl);
2058 unpin_vpa(vcpu->kvm, &vcpu->arch.slb_shadow);
2059 unpin_vpa(vcpu->kvm, &vcpu->arch.vpa);
Paul Mackerras2e25aa52012-02-19 17:46:32 +00002060 spin_unlock(&vcpu->arch.vpa_update_lock);
Paul Mackerrasde56a942011-06-29 00:21:34 +00002061 kvm_vcpu_uninit(vcpu);
Sasha Levin6b75e6b2011-12-07 10:24:56 +02002062 kmem_cache_free(kvm_vcpu_cache, vcpu);
Paul Mackerrasde56a942011-06-29 00:21:34 +00002063}
2064
Aneesh Kumar K.V3a167bea2013-10-07 22:17:53 +05302065static int kvmppc_core_check_requests_hv(struct kvm_vcpu *vcpu)
2066{
2067 /* Indicate we want to get back into the guest */
2068 return 1;
2069}
2070
Paul Mackerras19ccb762011-07-23 17:42:46 +10002071static void kvmppc_set_timer(struct kvm_vcpu *vcpu)
Paul Mackerrasde56a942011-06-29 00:21:34 +00002072{
Paul Mackerras19ccb762011-07-23 17:42:46 +10002073 unsigned long dec_nsec, now;
Paul Mackerras371fefd2011-06-29 00:23:08 +00002074
Paul Mackerras19ccb762011-07-23 17:42:46 +10002075 now = get_tb();
2076 if (now > vcpu->arch.dec_expires) {
2077 /* decrementer has already gone negative */
2078 kvmppc_core_queue_dec(vcpu);
Scott Wood7e28e60e2011-11-08 18:23:20 -06002079 kvmppc_core_prepare_to_enter(vcpu);
Paul Mackerras19ccb762011-07-23 17:42:46 +10002080 return;
Paul Mackerras371fefd2011-06-29 00:23:08 +00002081 }
Paul Mackerras19ccb762011-07-23 17:42:46 +10002082 dec_nsec = (vcpu->arch.dec_expires - now) * NSEC_PER_SEC
2083 / tb_ticks_per_sec;
Thomas Gleixner8b0e1952016-12-25 12:30:41 +01002084 hrtimer_start(&vcpu->arch.dec_timer, dec_nsec, HRTIMER_MODE_REL);
Paul Mackerras19ccb762011-07-23 17:42:46 +10002085 vcpu->arch.timer_running = 1;
Paul Mackerras371fefd2011-06-29 00:23:08 +00002086}
2087
Paul Mackerras19ccb762011-07-23 17:42:46 +10002088static void kvmppc_end_cede(struct kvm_vcpu *vcpu)
Paul Mackerras371fefd2011-06-29 00:23:08 +00002089{
Paul Mackerras19ccb762011-07-23 17:42:46 +10002090 vcpu->arch.ceded = 0;
2091 if (vcpu->arch.timer_running) {
2092 hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
2093 vcpu->arch.timer_running = 0;
2094 }
Paul Mackerras371fefd2011-06-29 00:23:08 +00002095}
2096
Paul Mackerras8b24e692017-06-26 15:45:51 +10002097extern int __kvmppc_vcore_entry(void);
Paul Mackerras371fefd2011-06-29 00:23:08 +00002098
2099static void kvmppc_remove_runnable(struct kvmppc_vcore *vc,
2100 struct kvm_vcpu *vcpu)
2101{
Paul Mackerrasc7b67672012-10-15 01:18:07 +00002102 u64 now;
2103
Paul Mackerras371fefd2011-06-29 00:23:08 +00002104 if (vcpu->arch.state != KVMPPC_VCPU_RUNNABLE)
2105 return;
Paul Mackerrasbf3d32e2013-11-16 17:46:04 +11002106 spin_lock_irq(&vcpu->arch.tbacct_lock);
Paul Mackerrasc7b67672012-10-15 01:18:07 +00002107 now = mftb();
2108 vcpu->arch.busy_stolen += vcore_stolen_time(vc, now) -
2109 vcpu->arch.stolen_logged;
2110 vcpu->arch.busy_preempt = now;
2111 vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
Paul Mackerrasbf3d32e2013-11-16 17:46:04 +11002112 spin_unlock_irq(&vcpu->arch.tbacct_lock);
Paul Mackerras371fefd2011-06-29 00:23:08 +00002113 --vc->n_runnable;
Suraj Jitindar Singh7b5f8272016-08-02 14:03:20 +10002114 WRITE_ONCE(vc->runnable_threads[vcpu->arch.ptid], NULL);
Paul Mackerras371fefd2011-06-29 00:23:08 +00002115}
2116
Paul Mackerrasf0888f72012-02-03 00:54:17 +00002117static int kvmppc_grab_hwthread(int cpu)
2118{
2119 struct paca_struct *tpaca;
Paul Mackerrasb754c732014-09-02 16:14:42 +10002120 long timeout = 10000;
Paul Mackerrasf0888f72012-02-03 00:54:17 +00002121
2122 tpaca = &paca[cpu];
2123
2124 /* Ensure the thread won't go into the kernel if it wakes */
Paul Mackerras7b444c62012-10-15 01:16:14 +00002125 tpaca->kvm_hstate.kvm_vcpu = NULL;
Paul Mackerrasb4deba52015-07-02 20:38:16 +10002126 tpaca->kvm_hstate.kvm_vcore = NULL;
Paul Mackerras5d5b99c2015-03-28 14:21:06 +11002127 tpaca->kvm_hstate.napping = 0;
2128 smp_wmb();
2129 tpaca->kvm_hstate.hwthread_req = 1;
Paul Mackerrasf0888f72012-02-03 00:54:17 +00002130
2131 /*
2132 * If the thread is already executing in the kernel (e.g. handling
2133 * a stray interrupt), wait for it to get back to nap mode.
2134 * The smp_mb() is to ensure that our setting of hwthread_req
2135 * is visible before we look at hwthread_state, so if this
2136 * races with the code at system_reset_pSeries and the thread
2137 * misses our setting of hwthread_req, we are sure to see its
2138 * setting of hwthread_state, and vice versa.
2139 */
2140 smp_mb();
2141 while (tpaca->kvm_hstate.hwthread_state == KVM_HWTHREAD_IN_KERNEL) {
2142 if (--timeout <= 0) {
2143 pr_err("KVM: couldn't grab cpu %d\n", cpu);
2144 return -EBUSY;
2145 }
2146 udelay(1);
2147 }
2148 return 0;
2149}
2150
2151static void kvmppc_release_hwthread(int cpu)
2152{
2153 struct paca_struct *tpaca;
2154
2155 tpaca = &paca[cpu];
Paul Mackerras31a4d442017-10-19 15:14:20 +11002156 tpaca->kvm_hstate.hwthread_req = 0;
Paul Mackerrasf0888f72012-02-03 00:54:17 +00002157 tpaca->kvm_hstate.kvm_vcpu = NULL;
Paul Mackerrasb4deba52015-07-02 20:38:16 +10002158 tpaca->kvm_hstate.kvm_vcore = NULL;
2159 tpaca->kvm_hstate.kvm_split_mode = NULL;
Paul Mackerrasf0888f72012-02-03 00:54:17 +00002160}
2161
Paul Mackerrasa29ebea2017-01-30 21:21:50 +11002162static void radix_flush_cpu(struct kvm *kvm, int cpu, struct kvm_vcpu *vcpu)
2163{
2164 int i;
2165
2166 cpu = cpu_first_thread_sibling(cpu);
2167 cpumask_set_cpu(cpu, &kvm->arch.need_tlb_flush);
2168 /*
2169 * Make sure setting of bit in need_tlb_flush precedes
2170 * testing of cpu_in_guest bits. The matching barrier on
2171 * the other side is the first smp_mb() in kvmppc_run_core().
2172 */
2173 smp_mb();
2174 for (i = 0; i < threads_per_core; ++i)
2175 if (cpumask_test_cpu(cpu + i, &kvm->arch.cpu_in_guest))
2176 smp_call_function_single(cpu + i, do_nothing, NULL, 1);
2177}
2178
Paul Mackerras8b24e692017-06-26 15:45:51 +10002179static void kvmppc_prepare_radix_vcpu(struct kvm_vcpu *vcpu, int pcpu)
2180{
2181 struct kvm *kvm = vcpu->kvm;
2182
2183 /*
2184 * With radix, the guest can do TLB invalidations itself,
2185 * and it could choose to use the local form (tlbiel) if
2186 * it is invalidating a translation that has only ever been
2187 * used on one vcpu. However, that doesn't mean it has
2188 * only ever been used on one physical cpu, since vcpus
2189 * can move around between pcpus. To cope with this, when
2190 * a vcpu moves from one pcpu to another, we need to tell
2191 * any vcpus running on the same core as this vcpu previously
2192 * ran to flush the TLB. The TLB is shared between threads,
2193 * so we use a single bit in .need_tlb_flush for all 4 threads.
2194 */
2195 if (vcpu->arch.prev_cpu != pcpu) {
2196 if (vcpu->arch.prev_cpu >= 0 &&
2197 cpu_first_thread_sibling(vcpu->arch.prev_cpu) !=
2198 cpu_first_thread_sibling(pcpu))
2199 radix_flush_cpu(kvm, vcpu->arch.prev_cpu, vcpu);
2200 vcpu->arch.prev_cpu = pcpu;
2201 }
2202}
2203
Paul Mackerrasb4deba52015-07-02 20:38:16 +10002204static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc)
Paul Mackerras371fefd2011-06-29 00:23:08 +00002205{
2206 int cpu;
2207 struct paca_struct *tpaca;
Paul Mackerrasa29ebea2017-01-30 21:21:50 +11002208 struct kvm *kvm = vc->kvm;
Paul Mackerras371fefd2011-06-29 00:23:08 +00002209
Paul Mackerrasb4deba52015-07-02 20:38:16 +10002210 cpu = vc->pcpu;
2211 if (vcpu) {
2212 if (vcpu->arch.timer_running) {
2213 hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
2214 vcpu->arch.timer_running = 0;
2215 }
2216 cpu += vcpu->arch.ptid;
Paul Mackerras898b25b2017-06-22 15:08:42 +10002217 vcpu->cpu = vc->pcpu;
Paul Mackerrasb4deba52015-07-02 20:38:16 +10002218 vcpu->arch.thread_cpu = cpu;
Paul Mackerrasa29ebea2017-01-30 21:21:50 +11002219 cpumask_set_cpu(cpu, &kvm->arch.cpu_in_guest);
Paul Mackerras19ccb762011-07-23 17:42:46 +10002220 }
Paul Mackerras371fefd2011-06-29 00:23:08 +00002221 tpaca = &paca[cpu];
Paul Mackerras5d5b99c2015-03-28 14:21:06 +11002222 tpaca->kvm_hstate.kvm_vcpu = vcpu;
Paul Mackerras898b25b2017-06-22 15:08:42 +10002223 tpaca->kvm_hstate.ptid = cpu - vc->pcpu;
Paul Mackerrasec257162015-06-24 21:18:03 +10002224 /* Order stores to hstate.kvm_vcpu etc. before store to kvm_vcore */
Paul Mackerras19ccb762011-07-23 17:42:46 +10002225 smp_wmb();
Paul Mackerras898b25b2017-06-22 15:08:42 +10002226 tpaca->kvm_hstate.kvm_vcore = vc;
Paul Mackerras5d5b99c2015-03-28 14:21:06 +11002227 if (cpu != smp_processor_id())
Paul Mackerras66feed62015-03-28 14:21:12 +11002228 kvmppc_ipi_thread(cpu);
Paul Mackerras371fefd2011-06-29 00:23:08 +00002229}
2230
Paul Mackerras5d5b99c2015-03-28 14:21:06 +11002231static void kvmppc_wait_for_nap(void)
Paul Mackerras371fefd2011-06-29 00:23:08 +00002232{
Paul Mackerras5d5b99c2015-03-28 14:21:06 +11002233 int cpu = smp_processor_id();
2234 int i, loops;
Paul Mackerras45c940b2016-11-18 17:43:30 +11002235 int n_threads = threads_per_vcore();
Paul Mackerras371fefd2011-06-29 00:23:08 +00002236
Paul Mackerras45c940b2016-11-18 17:43:30 +11002237 if (n_threads <= 1)
2238 return;
Paul Mackerras5d5b99c2015-03-28 14:21:06 +11002239 for (loops = 0; loops < 1000000; ++loops) {
2240 /*
2241 * Check if all threads are finished.
Paul Mackerrasb4deba52015-07-02 20:38:16 +10002242 * We set the vcore pointer when starting a thread
Paul Mackerras5d5b99c2015-03-28 14:21:06 +11002243 * and the thread clears it when finished, so we look
Paul Mackerrasb4deba52015-07-02 20:38:16 +10002244 * for any threads that still have a non-NULL vcore ptr.
Paul Mackerras5d5b99c2015-03-28 14:21:06 +11002245 */
Paul Mackerras45c940b2016-11-18 17:43:30 +11002246 for (i = 1; i < n_threads; ++i)
Paul Mackerrasb4deba52015-07-02 20:38:16 +10002247 if (paca[cpu + i].kvm_hstate.kvm_vcore)
Paul Mackerras5d5b99c2015-03-28 14:21:06 +11002248 break;
Paul Mackerras45c940b2016-11-18 17:43:30 +11002249 if (i == n_threads) {
Paul Mackerras5d5b99c2015-03-28 14:21:06 +11002250 HMT_medium();
2251 return;
Paul Mackerras371fefd2011-06-29 00:23:08 +00002252 }
Paul Mackerras5d5b99c2015-03-28 14:21:06 +11002253 HMT_low();
Paul Mackerras371fefd2011-06-29 00:23:08 +00002254 }
2255 HMT_medium();
Paul Mackerras45c940b2016-11-18 17:43:30 +11002256 for (i = 1; i < n_threads; ++i)
Paul Mackerrasb4deba52015-07-02 20:38:16 +10002257 if (paca[cpu + i].kvm_hstate.kvm_vcore)
Paul Mackerras5d5b99c2015-03-28 14:21:06 +11002258 pr_err("KVM: CPU %d seems to be stuck\n", cpu + i);
Paul Mackerras371fefd2011-06-29 00:23:08 +00002259}
2260
2261/*
2262 * Check that we are on thread 0 and that any other threads in
Paul Mackerras7b444c62012-10-15 01:16:14 +00002263 * this core are off-line. Then grab the threads so they can't
2264 * enter the kernel.
Paul Mackerras371fefd2011-06-29 00:23:08 +00002265 */
2266static int on_primary_thread(void)
2267{
2268 int cpu = smp_processor_id();
Michael Ellerman3102f782014-05-23 18:15:29 +10002269 int thr;
Paul Mackerras371fefd2011-06-29 00:23:08 +00002270
Michael Ellerman3102f782014-05-23 18:15:29 +10002271 /* Are we on a primary subcore? */
2272 if (cpu_thread_in_subcore(cpu))
Paul Mackerras371fefd2011-06-29 00:23:08 +00002273 return 0;
Michael Ellerman3102f782014-05-23 18:15:29 +10002274
2275 thr = 0;
2276 while (++thr < threads_per_subcore)
Paul Mackerras371fefd2011-06-29 00:23:08 +00002277 if (cpu_online(cpu + thr))
2278 return 0;
Paul Mackerras7b444c62012-10-15 01:16:14 +00002279
2280 /* Grab all hw threads so they can't go into the kernel */
Michael Ellerman3102f782014-05-23 18:15:29 +10002281 for (thr = 1; thr < threads_per_subcore; ++thr) {
Paul Mackerras7b444c62012-10-15 01:16:14 +00002282 if (kvmppc_grab_hwthread(cpu + thr)) {
2283 /* Couldn't grab one; let the others go */
2284 do {
2285 kvmppc_release_hwthread(cpu + thr);
2286 } while (--thr > 0);
2287 return 0;
2288 }
2289 }
Paul Mackerras371fefd2011-06-29 00:23:08 +00002290 return 1;
2291}
2292
Paul Mackerrasec257162015-06-24 21:18:03 +10002293/*
2294 * A list of virtual cores for each physical CPU.
2295 * These are vcores that could run but their runner VCPU tasks are
2296 * (or may be) preempted.
2297 */
2298struct preempted_vcore_list {
2299 struct list_head list;
2300 spinlock_t lock;
2301};
2302
2303static DEFINE_PER_CPU(struct preempted_vcore_list, preempted_vcores);
2304
2305static void init_vcore_lists(void)
2306{
2307 int cpu;
2308
2309 for_each_possible_cpu(cpu) {
2310 struct preempted_vcore_list *lp = &per_cpu(preempted_vcores, cpu);
2311 spin_lock_init(&lp->lock);
2312 INIT_LIST_HEAD(&lp->list);
2313 }
2314}
2315
2316static void kvmppc_vcore_preempt(struct kvmppc_vcore *vc)
2317{
2318 struct preempted_vcore_list *lp = this_cpu_ptr(&preempted_vcores);
2319
2320 vc->vcore_state = VCORE_PREEMPT;
2321 vc->pcpu = smp_processor_id();
Paul Mackerras45c940b2016-11-18 17:43:30 +11002322 if (vc->num_threads < threads_per_vcore()) {
Paul Mackerrasec257162015-06-24 21:18:03 +10002323 spin_lock(&lp->lock);
2324 list_add_tail(&vc->preempt_list, &lp->list);
2325 spin_unlock(&lp->lock);
2326 }
2327
2328 /* Start accumulating stolen time */
2329 kvmppc_core_start_stolen(vc);
2330}
2331
2332static void kvmppc_vcore_end_preempt(struct kvmppc_vcore *vc)
2333{
Paul Mackerras402813f2015-07-16 17:11:13 +10002334 struct preempted_vcore_list *lp;
Paul Mackerrasec257162015-06-24 21:18:03 +10002335
2336 kvmppc_core_end_stolen(vc);
2337 if (!list_empty(&vc->preempt_list)) {
Paul Mackerras402813f2015-07-16 17:11:13 +10002338 lp = &per_cpu(preempted_vcores, vc->pcpu);
Paul Mackerrasec257162015-06-24 21:18:03 +10002339 spin_lock(&lp->lock);
2340 list_del_init(&vc->preempt_list);
2341 spin_unlock(&lp->lock);
2342 }
2343 vc->vcore_state = VCORE_INACTIVE;
2344}
2345
Paul Mackerrasb4deba52015-07-02 20:38:16 +10002346/*
2347 * This stores information about the virtual cores currently
2348 * assigned to a physical core.
2349 */
Paul Mackerrasec257162015-06-24 21:18:03 +10002350struct core_info {
Paul Mackerrasb4deba52015-07-02 20:38:16 +10002351 int n_subcores;
2352 int max_subcore_threads;
Paul Mackerrasec257162015-06-24 21:18:03 +10002353 int total_threads;
Paul Mackerrasb4deba52015-07-02 20:38:16 +10002354 int subcore_threads[MAX_SUBCORES];
Paul Mackerras898b25b2017-06-22 15:08:42 +10002355 struct kvmppc_vcore *vc[MAX_SUBCORES];
Paul Mackerrasec257162015-06-24 21:18:03 +10002356};
2357
Paul Mackerrasb4deba52015-07-02 20:38:16 +10002358/*
2359 * This mapping means subcores 0 and 1 can use threads 0-3 and 4-7
2360 * respectively in 2-way micro-threading (split-core) mode.
2361 */
2362static int subcore_thread_map[MAX_SUBCORES] = { 0, 4, 2, 6 };
2363
Paul Mackerrasec257162015-06-24 21:18:03 +10002364static void init_core_info(struct core_info *cip, struct kvmppc_vcore *vc)
2365{
2366 memset(cip, 0, sizeof(*cip));
Paul Mackerrasb4deba52015-07-02 20:38:16 +10002367 cip->n_subcores = 1;
2368 cip->max_subcore_threads = vc->num_threads;
Paul Mackerrasec257162015-06-24 21:18:03 +10002369 cip->total_threads = vc->num_threads;
Paul Mackerrasb4deba52015-07-02 20:38:16 +10002370 cip->subcore_threads[0] = vc->num_threads;
Paul Mackerras898b25b2017-06-22 15:08:42 +10002371 cip->vc[0] = vc;
Paul Mackerrasb4deba52015-07-02 20:38:16 +10002372}
2373
2374static bool subcore_config_ok(int n_subcores, int n_threads)
2375{
2376 /* Can only dynamically split if unsplit to begin with */
2377 if (n_subcores > 1 && threads_per_subcore < MAX_SMT_THREADS)
2378 return false;
2379 if (n_subcores > MAX_SUBCORES)
2380 return false;
2381 if (n_subcores > 1) {
2382 if (!(dynamic_mt_modes & 2))
2383 n_subcores = 4;
2384 if (n_subcores > 2 && !(dynamic_mt_modes & 4))
2385 return false;
2386 }
2387
2388 return n_subcores * roundup_pow_of_two(n_threads) <= MAX_SMT_THREADS;
Paul Mackerrasec257162015-06-24 21:18:03 +10002389}
2390
Paul Mackerras898b25b2017-06-22 15:08:42 +10002391static void init_vcore_to_run(struct kvmppc_vcore *vc)
Paul Mackerrasec257162015-06-24 21:18:03 +10002392{
Paul Mackerrasec257162015-06-24 21:18:03 +10002393 vc->entry_exit_map = 0;
2394 vc->in_guest = 0;
2395 vc->napping_threads = 0;
2396 vc->conferring_threads = 0;
2397}
2398
Paul Mackerrasb4deba52015-07-02 20:38:16 +10002399static bool can_dynamic_split(struct kvmppc_vcore *vc, struct core_info *cip)
2400{
2401 int n_threads = vc->num_threads;
2402 int sub;
2403
2404 if (!cpu_has_feature(CPU_FTR_ARCH_207S))
2405 return false;
2406
2407 if (n_threads < cip->max_subcore_threads)
2408 n_threads = cip->max_subcore_threads;
Paul Mackerrasb0090312016-09-15 16:27:41 +10002409 if (!subcore_config_ok(cip->n_subcores + 1, n_threads))
Paul Mackerrasb4deba52015-07-02 20:38:16 +10002410 return false;
Paul Mackerrasb0090312016-09-15 16:27:41 +10002411 cip->max_subcore_threads = n_threads;
Paul Mackerrasb4deba52015-07-02 20:38:16 +10002412
2413 sub = cip->n_subcores;
2414 ++cip->n_subcores;
2415 cip->total_threads += vc->num_threads;
2416 cip->subcore_threads[sub] = vc->num_threads;
Paul Mackerras898b25b2017-06-22 15:08:42 +10002417 cip->vc[sub] = vc;
2418 init_vcore_to_run(vc);
2419 list_del_init(&vc->preempt_list);
Paul Mackerrasb4deba52015-07-02 20:38:16 +10002420
2421 return true;
2422}
2423
Paul Mackerrasb4deba52015-07-02 20:38:16 +10002424/*
2425 * Work out whether it is possible to piggyback the execution of
2426 * vcore *pvc onto the execution of the other vcores described in *cip.
2427 */
2428static bool can_piggyback(struct kvmppc_vcore *pvc, struct core_info *cip,
2429 int target_threads)
2430{
Paul Mackerrasb4deba52015-07-02 20:38:16 +10002431 if (cip->total_threads + pvc->num_threads > target_threads)
2432 return false;
Paul Mackerrasb4deba52015-07-02 20:38:16 +10002433
Paul Mackerrasb0090312016-09-15 16:27:41 +10002434 return can_dynamic_split(pvc, cip);
Paul Mackerrasb4deba52015-07-02 20:38:16 +10002435}
2436
Paul Mackerrasd911f0b2015-03-28 14:21:03 +11002437static void prepare_threads(struct kvmppc_vcore *vc)
2438{
Suraj Jitindar Singh7b5f8272016-08-02 14:03:20 +10002439 int i;
2440 struct kvm_vcpu *vcpu;
Paul Mackerrasd911f0b2015-03-28 14:21:03 +11002441
Suraj Jitindar Singh7b5f8272016-08-02 14:03:20 +10002442 for_each_runnable_thread(i, vcpu, vc) {
Paul Mackerrasd911f0b2015-03-28 14:21:03 +11002443 if (signal_pending(vcpu->arch.run_task))
2444 vcpu->arch.ret = -EINTR;
2445 else if (vcpu->arch.vpa.update_pending ||
2446 vcpu->arch.slb_shadow.update_pending ||
2447 vcpu->arch.dtl.update_pending)
2448 vcpu->arch.ret = RESUME_GUEST;
2449 else
2450 continue;
2451 kvmppc_remove_runnable(vc, vcpu);
2452 wake_up(&vcpu->arch.cpu_run);
2453 }
2454}
2455
Paul Mackerrasec257162015-06-24 21:18:03 +10002456static void collect_piggybacks(struct core_info *cip, int target_threads)
Paul Mackerras25fedfc2015-03-28 14:21:05 +11002457{
Paul Mackerrasec257162015-06-24 21:18:03 +10002458 struct preempted_vcore_list *lp = this_cpu_ptr(&preempted_vcores);
2459 struct kvmppc_vcore *pvc, *vcnext;
2460
2461 spin_lock(&lp->lock);
2462 list_for_each_entry_safe(pvc, vcnext, &lp->list, preempt_list) {
2463 if (!spin_trylock(&pvc->lock))
2464 continue;
2465 prepare_threads(pvc);
2466 if (!pvc->n_runnable) {
2467 list_del_init(&pvc->preempt_list);
2468 if (pvc->runner == NULL) {
2469 pvc->vcore_state = VCORE_INACTIVE;
2470 kvmppc_core_end_stolen(pvc);
2471 }
2472 spin_unlock(&pvc->lock);
2473 continue;
2474 }
2475 if (!can_piggyback(pvc, cip, target_threads)) {
2476 spin_unlock(&pvc->lock);
2477 continue;
2478 }
2479 kvmppc_core_end_stolen(pvc);
2480 pvc->vcore_state = VCORE_PIGGYBACK;
2481 if (cip->total_threads >= target_threads)
2482 break;
2483 }
2484 spin_unlock(&lp->lock);
2485}
2486
Paul Mackerras8b24e692017-06-26 15:45:51 +10002487static bool recheck_signals(struct core_info *cip)
2488{
2489 int sub, i;
2490 struct kvm_vcpu *vcpu;
2491
2492 for (sub = 0; sub < cip->n_subcores; ++sub)
2493 for_each_runnable_thread(i, vcpu, cip->vc[sub])
2494 if (signal_pending(vcpu->arch.run_task))
2495 return true;
2496 return false;
2497}
2498
Paul Mackerrasec257162015-06-24 21:18:03 +10002499static void post_guest_process(struct kvmppc_vcore *vc, bool is_master)
2500{
Suraj Jitindar Singh7b5f8272016-08-02 14:03:20 +10002501 int still_running = 0, i;
Paul Mackerras25fedfc2015-03-28 14:21:05 +11002502 u64 now;
2503 long ret;
Suraj Jitindar Singh7b5f8272016-08-02 14:03:20 +10002504 struct kvm_vcpu *vcpu;
Paul Mackerras25fedfc2015-03-28 14:21:05 +11002505
Paul Mackerrasec257162015-06-24 21:18:03 +10002506 spin_lock(&vc->lock);
Paul Mackerras25fedfc2015-03-28 14:21:05 +11002507 now = get_tb();
Suraj Jitindar Singh7b5f8272016-08-02 14:03:20 +10002508 for_each_runnable_thread(i, vcpu, vc) {
Paul Mackerras25fedfc2015-03-28 14:21:05 +11002509 /* cancel pending dec exception if dec is positive */
2510 if (now < vcpu->arch.dec_expires &&
2511 kvmppc_core_pending_dec(vcpu))
2512 kvmppc_core_dequeue_dec(vcpu);
2513
2514 trace_kvm_guest_exit(vcpu);
2515
2516 ret = RESUME_GUEST;
2517 if (vcpu->arch.trap)
2518 ret = kvmppc_handle_exit_hv(vcpu->arch.kvm_run, vcpu,
2519 vcpu->arch.run_task);
2520
2521 vcpu->arch.ret = ret;
2522 vcpu->arch.trap = 0;
2523
Paul Mackerrasec257162015-06-24 21:18:03 +10002524 if (is_kvmppc_resume_guest(vcpu->arch.ret)) {
2525 if (vcpu->arch.pending_exceptions)
2526 kvmppc_core_prepare_to_enter(vcpu);
2527 if (vcpu->arch.ceded)
Paul Mackerras25fedfc2015-03-28 14:21:05 +11002528 kvmppc_set_timer(vcpu);
Paul Mackerrasec257162015-06-24 21:18:03 +10002529 else
2530 ++still_running;
2531 } else {
Paul Mackerras25fedfc2015-03-28 14:21:05 +11002532 kvmppc_remove_runnable(vc, vcpu);
2533 wake_up(&vcpu->arch.cpu_run);
2534 }
2535 }
Paul Mackerrasec257162015-06-24 21:18:03 +10002536 if (!is_master) {
Paul Mackerras563a1e92015-07-16 17:11:14 +10002537 if (still_running > 0) {
Paul Mackerrasec257162015-06-24 21:18:03 +10002538 kvmppc_vcore_preempt(vc);
Paul Mackerras563a1e92015-07-16 17:11:14 +10002539 } else if (vc->runner) {
2540 vc->vcore_state = VCORE_PREEMPT;
2541 kvmppc_core_start_stolen(vc);
2542 } else {
2543 vc->vcore_state = VCORE_INACTIVE;
2544 }
Paul Mackerrasec257162015-06-24 21:18:03 +10002545 if (vc->n_runnable > 0 && vc->runner == NULL) {
2546 /* make sure there's a candidate runner awake */
Suraj Jitindar Singh7b5f8272016-08-02 14:03:20 +10002547 i = -1;
2548 vcpu = next_runnable_thread(vc, &i);
Paul Mackerrasec257162015-06-24 21:18:03 +10002549 wake_up(&vcpu->arch.cpu_run);
2550 }
2551 }
2552 spin_unlock(&vc->lock);
Paul Mackerras25fedfc2015-03-28 14:21:05 +11002553}
2554
Paul Mackerras371fefd2011-06-29 00:23:08 +00002555/*
Suresh Warrierb8e6a872015-12-17 14:59:07 -06002556 * Clear core from the list of active host cores as we are about to
2557 * enter the guest. Only do this if it is the primary thread of the
2558 * core (not if a subcore) that is entering the guest.
2559 */
Anna-Maria Gleixner3f7cd912016-11-27 00:13:45 +01002560static inline int kvmppc_clear_host_core(unsigned int cpu)
Suresh Warrierb8e6a872015-12-17 14:59:07 -06002561{
2562 int core;
2563
2564 if (!kvmppc_host_rm_ops_hv || cpu_thread_in_core(cpu))
Anna-Maria Gleixner3f7cd912016-11-27 00:13:45 +01002565 return 0;
Suresh Warrierb8e6a872015-12-17 14:59:07 -06002566 /*
2567 * Memory barrier can be omitted here as we will do a smp_wmb()
2568 * later in kvmppc_start_thread and we need ensure that state is
2569 * visible to other CPUs only after we enter guest.
2570 */
2571 core = cpu >> threads_shift;
2572 kvmppc_host_rm_ops_hv->rm_core[core].rm_state.in_host = 0;
Anna-Maria Gleixner3f7cd912016-11-27 00:13:45 +01002573 return 0;
Suresh Warrierb8e6a872015-12-17 14:59:07 -06002574}
2575
2576/*
2577 * Advertise this core as an active host core since we exited the guest
2578 * Only need to do this if it is the primary thread of the core that is
2579 * exiting.
2580 */
Anna-Maria Gleixner3f7cd912016-11-27 00:13:45 +01002581static inline int kvmppc_set_host_core(unsigned int cpu)
Suresh Warrierb8e6a872015-12-17 14:59:07 -06002582{
2583 int core;
2584
2585 if (!kvmppc_host_rm_ops_hv || cpu_thread_in_core(cpu))
Anna-Maria Gleixner3f7cd912016-11-27 00:13:45 +01002586 return 0;
Suresh Warrierb8e6a872015-12-17 14:59:07 -06002587
2588 /*
2589 * Memory barrier can be omitted here because we do a spin_unlock
2590 * immediately after this which provides the memory barrier.
2591 */
2592 core = cpu >> threads_shift;
2593 kvmppc_host_rm_ops_hv->rm_core[core].rm_state.in_host = 1;
Anna-Maria Gleixner3f7cd912016-11-27 00:13:45 +01002594 return 0;
Suresh Warrierb8e6a872015-12-17 14:59:07 -06002595}
2596
Paul Mackerras8b24e692017-06-26 15:45:51 +10002597static void set_irq_happened(int trap)
2598{
2599 switch (trap) {
2600 case BOOK3S_INTERRUPT_EXTERNAL:
2601 local_paca->irq_happened |= PACA_IRQ_EE;
2602 break;
2603 case BOOK3S_INTERRUPT_H_DOORBELL:
2604 local_paca->irq_happened |= PACA_IRQ_DBELL;
2605 break;
2606 case BOOK3S_INTERRUPT_HMI:
2607 local_paca->irq_happened |= PACA_IRQ_HMI;
2608 break;
2609 }
2610}
2611
Suresh Warrierb8e6a872015-12-17 14:59:07 -06002612/*
Paul Mackerras371fefd2011-06-29 00:23:08 +00002613 * Run a set of guest threads on a physical core.
2614 * Called with vc->lock held.
2615 */
Paul Mackerras66feed62015-03-28 14:21:12 +11002616static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
Paul Mackerras371fefd2011-06-29 00:23:08 +00002617{
Suraj Jitindar Singh7b5f8272016-08-02 14:03:20 +10002618 struct kvm_vcpu *vcpu;
Paul Mackerrasd911f0b2015-03-28 14:21:03 +11002619 int i;
Paul Mackerras2c9097e2012-09-11 13:27:01 +00002620 int srcu_idx;
Paul Mackerrasec257162015-06-24 21:18:03 +10002621 struct core_info core_info;
Paul Mackerras898b25b2017-06-22 15:08:42 +10002622 struct kvmppc_vcore *pvc;
Paul Mackerrasb4deba52015-07-02 20:38:16 +10002623 struct kvm_split_mode split_info, *sip;
2624 int split, subcore_size, active;
2625 int sub;
2626 bool thr0_done;
2627 unsigned long cmd_bit, stat_bit;
Paul Mackerrasec257162015-06-24 21:18:03 +10002628 int pcpu, thr;
2629 int target_threads;
Paul Mackerras45c940b2016-11-18 17:43:30 +11002630 int controlled_threads;
Paul Mackerras8b24e692017-06-26 15:45:51 +10002631 int trap;
Paul Mackerras081f3232012-06-01 20:20:24 +10002632
2633 /*
Paul Mackerrasd911f0b2015-03-28 14:21:03 +11002634 * Remove from the list any threads that have a signal pending
2635 * or need a VPA update done
2636 */
2637 prepare_threads(vc);
2638
2639 /* if the runner is no longer runnable, let the caller pick a new one */
2640 if (vc->runner->arch.state != KVMPPC_VCPU_RUNNABLE)
2641 return;
2642
2643 /*
2644 * Initialize *vc.
Paul Mackerras081f3232012-06-01 20:20:24 +10002645 */
Paul Mackerras898b25b2017-06-22 15:08:42 +10002646 init_vcore_to_run(vc);
Paul Mackerras2711e242014-12-04 16:43:28 +11002647 vc->preempt_tb = TB_NIL;
Paul Mackerras081f3232012-06-01 20:20:24 +10002648
2649 /*
Paul Mackerras45c940b2016-11-18 17:43:30 +11002650 * Number of threads that we will be controlling: the same as
2651 * the number of threads per subcore, except on POWER9,
2652 * where it's 1 because the threads are (mostly) independent.
2653 */
2654 controlled_threads = threads_per_vcore();
2655
2656 /*
Michael Ellerman3102f782014-05-23 18:15:29 +10002657 * Make sure we are running on primary threads, and that secondary
2658 * threads are offline. Also check if the number of threads in this
2659 * guest are greater than the current system threads per guest.
Paul Mackerras7b444c62012-10-15 01:16:14 +00002660 */
Paul Mackerras45c940b2016-11-18 17:43:30 +11002661 if ((controlled_threads > 1) &&
Michael Ellerman3102f782014-05-23 18:15:29 +10002662 ((vc->num_threads > threads_per_subcore) || !on_primary_thread())) {
Suraj Jitindar Singh7b5f8272016-08-02 14:03:20 +10002663 for_each_runnable_thread(i, vcpu, vc) {
Paul Mackerras7b444c62012-10-15 01:16:14 +00002664 vcpu->arch.ret = -EBUSY;
Paul Mackerras25fedfc2015-03-28 14:21:05 +11002665 kvmppc_remove_runnable(vc, vcpu);
2666 wake_up(&vcpu->arch.cpu_run);
2667 }
Paul Mackerras7b444c62012-10-15 01:16:14 +00002668 goto out;
2669 }
2670
Paul Mackerrasec257162015-06-24 21:18:03 +10002671 /*
2672 * See if we could run any other vcores on the physical core
2673 * along with this one.
2674 */
2675 init_core_info(&core_info, vc);
2676 pcpu = smp_processor_id();
Paul Mackerras45c940b2016-11-18 17:43:30 +11002677 target_threads = controlled_threads;
Paul Mackerrasec257162015-06-24 21:18:03 +10002678 if (target_smt_mode && target_smt_mode < target_threads)
2679 target_threads = target_smt_mode;
2680 if (vc->num_threads < target_threads)
2681 collect_piggybacks(&core_info, target_threads);
Michael Ellerman3102f782014-05-23 18:15:29 +10002682
Paul Mackerras8b24e692017-06-26 15:45:51 +10002683 /*
2684 * On radix, arrange for TLB flushing if necessary.
2685 * This has to be done before disabling interrupts since
2686 * it uses smp_call_function().
2687 */
2688 pcpu = smp_processor_id();
2689 if (kvm_is_radix(vc->kvm)) {
2690 for (sub = 0; sub < core_info.n_subcores; ++sub)
2691 for_each_runnable_thread(i, vcpu, core_info.vc[sub])
2692 kvmppc_prepare_radix_vcpu(vcpu, pcpu);
2693 }
2694
2695 /*
2696 * Hard-disable interrupts, and check resched flag and signals.
2697 * If we need to reschedule or deliver a signal, clean up
2698 * and return without going into the guest(s).
2699 */
2700 local_irq_disable();
2701 hard_irq_disable();
2702 if (lazy_irq_pending() || need_resched() ||
2703 recheck_signals(&core_info)) {
2704 local_irq_enable();
2705 vc->vcore_state = VCORE_INACTIVE;
2706 /* Unlock all except the primary vcore */
2707 for (sub = 1; sub < core_info.n_subcores; ++sub) {
2708 pvc = core_info.vc[sub];
2709 /* Put back on to the preempted vcores list */
2710 kvmppc_vcore_preempt(pvc);
2711 spin_unlock(&pvc->lock);
2712 }
2713 for (i = 0; i < controlled_threads; ++i)
2714 kvmppc_release_hwthread(pcpu + i);
2715 return;
2716 }
2717
2718 kvmppc_clear_host_core(pcpu);
2719
Paul Mackerrasb4deba52015-07-02 20:38:16 +10002720 /* Decide on micro-threading (split-core) mode */
2721 subcore_size = threads_per_subcore;
2722 cmd_bit = stat_bit = 0;
2723 split = core_info.n_subcores;
2724 sip = NULL;
2725 if (split > 1) {
2726 /* threads_per_subcore must be MAX_SMT_THREADS (8) here */
2727 if (split == 2 && (dynamic_mt_modes & 2)) {
2728 cmd_bit = HID0_POWER8_1TO2LPAR;
2729 stat_bit = HID0_POWER8_2LPARMODE;
2730 } else {
2731 split = 4;
2732 cmd_bit = HID0_POWER8_1TO4LPAR;
2733 stat_bit = HID0_POWER8_4LPARMODE;
Paul Mackerrasec257162015-06-24 21:18:03 +10002734 }
Paul Mackerrasb4deba52015-07-02 20:38:16 +10002735 subcore_size = MAX_SMT_THREADS / split;
2736 sip = &split_info;
2737 memset(&split_info, 0, sizeof(split_info));
2738 split_info.rpr = mfspr(SPRN_RPR);
2739 split_info.pmmar = mfspr(SPRN_PMMAR);
2740 split_info.ldbar = mfspr(SPRN_LDBAR);
2741 split_info.subcore_size = subcore_size;
2742 for (sub = 0; sub < core_info.n_subcores; ++sub)
Paul Mackerras898b25b2017-06-22 15:08:42 +10002743 split_info.vc[sub] = core_info.vc[sub];
Paul Mackerrasb4deba52015-07-02 20:38:16 +10002744 /* order writes to split_info before kvm_split_mode pointer */
2745 smp_wmb();
2746 }
Paul Mackerras45c940b2016-11-18 17:43:30 +11002747 for (thr = 0; thr < controlled_threads; ++thr)
Paul Mackerrasb4deba52015-07-02 20:38:16 +10002748 paca[pcpu + thr].kvm_hstate.kvm_split_mode = sip;
2749
2750 /* Initiate micro-threading (split-core) if required */
2751 if (cmd_bit) {
2752 unsigned long hid0 = mfspr(SPRN_HID0);
2753
2754 hid0 |= cmd_bit | HID0_POWER8_DYNLPARDIS;
2755 mb();
2756 mtspr(SPRN_HID0, hid0);
2757 isync();
2758 for (;;) {
2759 hid0 = mfspr(SPRN_HID0);
2760 if (hid0 & stat_bit)
2761 break;
2762 cpu_relax();
2763 }
Paul Mackerras2e25aa52012-02-19 17:46:32 +00002764 }
Paul Mackerras371fefd2011-06-29 00:23:08 +00002765
Paul Mackerrasb4deba52015-07-02 20:38:16 +10002766 /* Start all the threads */
2767 active = 0;
2768 for (sub = 0; sub < core_info.n_subcores; ++sub) {
2769 thr = subcore_thread_map[sub];
2770 thr0_done = false;
2771 active |= 1 << thr;
Paul Mackerras898b25b2017-06-22 15:08:42 +10002772 pvc = core_info.vc[sub];
2773 pvc->pcpu = pcpu + thr;
2774 for_each_runnable_thread(i, vcpu, pvc) {
2775 kvmppc_start_thread(vcpu, pvc);
2776 kvmppc_create_dtl_entry(vcpu, pvc);
2777 trace_kvm_guest_enter(vcpu);
2778 if (!vcpu->arch.ptid)
2779 thr0_done = true;
2780 active |= 1 << (thr + vcpu->arch.ptid);
Paul Mackerrasb4deba52015-07-02 20:38:16 +10002781 }
Paul Mackerras898b25b2017-06-22 15:08:42 +10002782 /*
2783 * We need to start the first thread of each subcore
2784 * even if it doesn't have a vcpu.
2785 */
2786 if (!thr0_done)
2787 kvmppc_start_thread(NULL, pvc);
2788 thr += pvc->num_threads;
Paul Mackerrasb4deba52015-07-02 20:38:16 +10002789 }
Gautham R. Shenoy7f235322015-09-02 21:48:58 +05302790
2791 /*
2792 * Ensure that split_info.do_nap is set after setting
2793 * the vcore pointer in the PACA of the secondaries.
2794 */
2795 smp_mb();
2796 if (cmd_bit)
2797 split_info.do_nap = 1; /* ask secondaries to nap when done */
2798
Paul Mackerrasb4deba52015-07-02 20:38:16 +10002799 /*
2800 * When doing micro-threading, poke the inactive threads as well.
2801 * This gets them to the nap instruction after kvm_do_nap,
2802 * which reduces the time taken to unsplit later.
2803 */
2804 if (split > 1)
2805 for (thr = 1; thr < threads_per_subcore; ++thr)
2806 if (!(active & (1 << thr)))
2807 kvmppc_ipi_thread(pcpu + thr);
Paul Mackerrase0b7ec02014-01-08 21:25:20 +11002808
Paul Mackerras2f12f032012-10-15 01:17:17 +00002809 vc->vcore_state = VCORE_RUNNING;
Paul Mackerras371fefd2011-06-29 00:23:08 +00002810 preempt_disable();
Suresh E. Warrier3c78f782014-12-03 18:48:10 -06002811
2812 trace_kvmppc_run_core(vc, 0);
2813
Paul Mackerrasb4deba52015-07-02 20:38:16 +10002814 for (sub = 0; sub < core_info.n_subcores; ++sub)
Paul Mackerras898b25b2017-06-22 15:08:42 +10002815 spin_unlock(&core_info.vc[sub]->lock);
Paul Mackerrasde56a942011-06-29 00:21:34 +00002816
Paul Mackerras8b24e692017-06-26 15:45:51 +10002817 /*
2818 * Interrupts will be enabled once we get into the guest,
2819 * so tell lockdep that we're about to enable interrupts.
2820 */
2821 trace_hardirqs_on();
Paul Mackerrasde56a942011-06-29 00:21:34 +00002822
Paolo Bonzini6edaa532016-06-15 15:18:26 +02002823 guest_enter();
Paul Mackerras2c9097e2012-09-11 13:27:01 +00002824
Paul Mackerrase0b7ec02014-01-08 21:25:20 +11002825 srcu_idx = srcu_read_lock(&vc->kvm->srcu);
Paul Mackerras2c9097e2012-09-11 13:27:01 +00002826
Paul Mackerras8b24e692017-06-26 15:45:51 +10002827 trap = __kvmppc_vcore_entry();
Paul Mackerras19ccb762011-07-23 17:42:46 +10002828
Paul Mackerrasec257162015-06-24 21:18:03 +10002829 srcu_read_unlock(&vc->kvm->srcu, srcu_idx);
2830
Paul Mackerras8b24e692017-06-26 15:45:51 +10002831 guest_exit();
2832
2833 trace_hardirqs_off();
2834 set_irq_happened(trap);
2835
Paul Mackerrasec257162015-06-24 21:18:03 +10002836 spin_lock(&vc->lock);
Paul Mackerras371fefd2011-06-29 00:23:08 +00002837 /* prevent other vcpu threads from doing kvmppc_start_thread() now */
Paul Mackerras19ccb762011-07-23 17:42:46 +10002838 vc->vcore_state = VCORE_EXITING;
Paul Mackerras371fefd2011-06-29 00:23:08 +00002839
Paul Mackerras371fefd2011-06-29 00:23:08 +00002840 /* wait for secondary threads to finish writing their state to memory */
Paul Mackerrase0b7ec02014-01-08 21:25:20 +11002841 kvmppc_wait_for_nap();
Paul Mackerrasb4deba52015-07-02 20:38:16 +10002842
2843 /* Return to whole-core mode if we split the core earlier */
2844 if (split > 1) {
2845 unsigned long hid0 = mfspr(SPRN_HID0);
2846 unsigned long loops = 0;
2847
2848 hid0 &= ~HID0_POWER8_DYNLPARDIS;
2849 stat_bit = HID0_POWER8_2LPARMODE | HID0_POWER8_4LPARMODE;
2850 mb();
2851 mtspr(SPRN_HID0, hid0);
2852 isync();
2853 for (;;) {
2854 hid0 = mfspr(SPRN_HID0);
2855 if (!(hid0 & stat_bit))
2856 break;
2857 cpu_relax();
2858 ++loops;
2859 }
2860 split_info.do_nap = 0;
2861 }
2862
Paul Mackerras8b24e692017-06-26 15:45:51 +10002863 kvmppc_set_host_core(pcpu);
2864
2865 local_irq_enable();
2866
Paul Mackerrasb4deba52015-07-02 20:38:16 +10002867 /* Let secondaries go back to the offline loop */
Paul Mackerras45c940b2016-11-18 17:43:30 +11002868 for (i = 0; i < controlled_threads; ++i) {
Paul Mackerrasb4deba52015-07-02 20:38:16 +10002869 kvmppc_release_hwthread(pcpu + i);
2870 if (sip && sip->napped[i])
2871 kvmppc_ipi_thread(pcpu + i);
Paul Mackerrasa29ebea2017-01-30 21:21:50 +11002872 cpumask_clear_cpu(pcpu + i, &vc->kvm->arch.cpu_in_guest);
Paul Mackerrasb4deba52015-07-02 20:38:16 +10002873 }
2874
Paul Mackerras371fefd2011-06-29 00:23:08 +00002875 spin_unlock(&vc->lock);
Paul Mackerras2c9097e2012-09-11 13:27:01 +00002876
Paul Mackerras371fefd2011-06-29 00:23:08 +00002877 /* make sure updates to secondary vcpu structs are visible now */
2878 smp_mb();
Paul Mackerrasde56a942011-06-29 00:21:34 +00002879
Paul Mackerras898b25b2017-06-22 15:08:42 +10002880 for (sub = 0; sub < core_info.n_subcores; ++sub) {
2881 pvc = core_info.vc[sub];
2882 post_guest_process(pvc, pvc == vc);
2883 }
Paul Mackerrasde56a942011-06-29 00:21:34 +00002884
Paul Mackerras913d3ff9a2012-10-15 01:16:48 +00002885 spin_lock(&vc->lock);
Paul Mackerrasec257162015-06-24 21:18:03 +10002886 preempt_enable();
Paul Mackerrasde56a942011-06-29 00:21:34 +00002887
Paul Mackerrasde56a942011-06-29 00:21:34 +00002888 out:
Paul Mackerras19ccb762011-07-23 17:42:46 +10002889 vc->vcore_state = VCORE_INACTIVE;
Suresh E. Warrier3c78f782014-12-03 18:48:10 -06002890 trace_kvmppc_run_core(vc, 1);
Paul Mackerras371fefd2011-06-29 00:23:08 +00002891}
2892
Paul Mackerras19ccb762011-07-23 17:42:46 +10002893/*
2894 * Wait for some other vcpu thread to execute us, and
2895 * wake us up when we need to handle something in the host.
2896 */
Paul Mackerrasec257162015-06-24 21:18:03 +10002897static void kvmppc_wait_for_exec(struct kvmppc_vcore *vc,
2898 struct kvm_vcpu *vcpu, int wait_state)
Paul Mackerras371fefd2011-06-29 00:23:08 +00002899{
Paul Mackerras371fefd2011-06-29 00:23:08 +00002900 DEFINE_WAIT(wait);
2901
Paul Mackerras19ccb762011-07-23 17:42:46 +10002902 prepare_to_wait(&vcpu->arch.cpu_run, &wait, wait_state);
Paul Mackerrasec257162015-06-24 21:18:03 +10002903 if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) {
2904 spin_unlock(&vc->lock);
Paul Mackerras19ccb762011-07-23 17:42:46 +10002905 schedule();
Paul Mackerrasec257162015-06-24 21:18:03 +10002906 spin_lock(&vc->lock);
2907 }
Paul Mackerras19ccb762011-07-23 17:42:46 +10002908 finish_wait(&vcpu->arch.cpu_run, &wait);
2909}
Paul Mackerras371fefd2011-06-29 00:23:08 +00002910
Suraj Jitindar Singh0cda69d2016-08-02 14:03:21 +10002911static void grow_halt_poll_ns(struct kvmppc_vcore *vc)
2912{
2913 /* 10us base */
2914 if (vc->halt_poll_ns == 0 && halt_poll_ns_grow)
2915 vc->halt_poll_ns = 10000;
2916 else
2917 vc->halt_poll_ns *= halt_poll_ns_grow;
Suraj Jitindar Singh0cda69d2016-08-02 14:03:21 +10002918}
2919
2920static void shrink_halt_poll_ns(struct kvmppc_vcore *vc)
2921{
2922 if (halt_poll_ns_shrink == 0)
2923 vc->halt_poll_ns = 0;
2924 else
2925 vc->halt_poll_ns /= halt_poll_ns_shrink;
2926}
2927
Paul Mackerrasee3308a2017-06-20 15:46:12 +10002928#ifdef CONFIG_KVM_XICS
2929static inline bool xive_interrupt_pending(struct kvm_vcpu *vcpu)
2930{
2931 if (!xive_enabled())
2932 return false;
2933 return vcpu->arch.xive_saved_state.pipr <
2934 vcpu->arch.xive_saved_state.cppr;
2935}
2936#else
2937static inline bool xive_interrupt_pending(struct kvm_vcpu *vcpu)
2938{
2939 return false;
2940}
2941#endif /* CONFIG_KVM_XICS */
2942
Paul Mackerras1da4e2f2017-05-19 16:26:16 +10002943static bool kvmppc_vcpu_woken(struct kvm_vcpu *vcpu)
2944{
2945 if (vcpu->arch.pending_exceptions || vcpu->arch.prodded ||
Paul Mackerrasee3308a2017-06-20 15:46:12 +10002946 kvmppc_doorbell_pending(vcpu) || xive_interrupt_pending(vcpu))
Paul Mackerras1da4e2f2017-05-19 16:26:16 +10002947 return true;
2948
2949 return false;
2950}
2951
Suraj Jitindar Singh908a0932016-10-14 11:53:23 +11002952/*
2953 * Check to see if any of the runnable vcpus on the vcore have pending
Suraj Jitindar Singh0cda69d2016-08-02 14:03:21 +10002954 * exceptions or are no longer ceded
2955 */
2956static int kvmppc_vcore_check_block(struct kvmppc_vcore *vc)
2957{
2958 struct kvm_vcpu *vcpu;
2959 int i;
2960
2961 for_each_runnable_thread(i, vcpu, vc) {
Paul Mackerras1da4e2f2017-05-19 16:26:16 +10002962 if (!vcpu->arch.ceded || kvmppc_vcpu_woken(vcpu))
Suraj Jitindar Singh0cda69d2016-08-02 14:03:21 +10002963 return 1;
2964 }
2965
2966 return 0;
2967}
2968
Paul Mackerras19ccb762011-07-23 17:42:46 +10002969/*
2970 * All the vcpus in this vcore are idle, so wait for a decrementer
2971 * or external interrupt to one of the vcpus. vc->lock is held.
2972 */
2973static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
2974{
Suraj Jitindar Singh2a27f512016-08-02 14:03:23 +10002975 ktime_t cur, start_poll, start_wait;
Suraj Jitindar Singh0cda69d2016-08-02 14:03:21 +10002976 int do_sleep = 1;
Suraj Jitindar Singh0cda69d2016-08-02 14:03:21 +10002977 u64 block_ns;
Marcelo Tosatti85773702016-02-19 09:46:39 +01002978 DECLARE_SWAITQUEUE(wait);
Suresh E. Warrier1bc5d592014-11-03 15:52:00 +11002979
Suraj Jitindar Singh0cda69d2016-08-02 14:03:21 +10002980 /* Poll for pending exceptions and ceded state */
Suraj Jitindar Singh2a27f512016-08-02 14:03:23 +10002981 cur = start_poll = ktime_get();
Suraj Jitindar Singh0cda69d2016-08-02 14:03:21 +10002982 if (vc->halt_poll_ns) {
Suraj Jitindar Singh2a27f512016-08-02 14:03:23 +10002983 ktime_t stop = ktime_add_ns(start_poll, vc->halt_poll_ns);
2984 ++vc->runner->stat.halt_attempted_poll;
Suraj Jitindar Singh0cda69d2016-08-02 14:03:21 +10002985
2986 vc->vcore_state = VCORE_POLLING;
2987 spin_unlock(&vc->lock);
2988
2989 do {
2990 if (kvmppc_vcore_check_block(vc)) {
2991 do_sleep = 0;
2992 break;
2993 }
2994 cur = ktime_get();
2995 } while (single_task_running() && ktime_before(cur, stop));
2996
2997 spin_lock(&vc->lock);
2998 vc->vcore_state = VCORE_INACTIVE;
2999
Suraj Jitindar Singh2a27f512016-08-02 14:03:23 +10003000 if (!do_sleep) {
3001 ++vc->runner->stat.halt_successful_poll;
Suraj Jitindar Singh0cda69d2016-08-02 14:03:21 +10003002 goto out;
Suraj Jitindar Singh2a27f512016-08-02 14:03:23 +10003003 }
Suraj Jitindar Singh0cda69d2016-08-02 14:03:21 +10003004 }
3005
Marcelo Tosatti85773702016-02-19 09:46:39 +01003006 prepare_to_swait(&vc->wq, &wait, TASK_INTERRUPTIBLE);
Suresh E. Warrier1bc5d592014-11-03 15:52:00 +11003007
Suraj Jitindar Singh0cda69d2016-08-02 14:03:21 +10003008 if (kvmppc_vcore_check_block(vc)) {
Marcelo Tosatti85773702016-02-19 09:46:39 +01003009 finish_swait(&vc->wq, &wait);
Suraj Jitindar Singh0cda69d2016-08-02 14:03:21 +10003010 do_sleep = 0;
Suraj Jitindar Singh2a27f512016-08-02 14:03:23 +10003011 /* If we polled, count this as a successful poll */
3012 if (vc->halt_poll_ns)
3013 ++vc->runner->stat.halt_successful_poll;
Suraj Jitindar Singh0cda69d2016-08-02 14:03:21 +10003014 goto out;
Suresh E. Warrier1bc5d592014-11-03 15:52:00 +11003015 }
3016
Suraj Jitindar Singh2a27f512016-08-02 14:03:23 +10003017 start_wait = ktime_get();
3018
Paul Mackerras19ccb762011-07-23 17:42:46 +10003019 vc->vcore_state = VCORE_SLEEPING;
Suresh E. Warrier3c78f782014-12-03 18:48:10 -06003020 trace_kvmppc_vcore_blocked(vc, 0);
Paul Mackerras19ccb762011-07-23 17:42:46 +10003021 spin_unlock(&vc->lock);
Paul Mackerras913d3ff9a2012-10-15 01:16:48 +00003022 schedule();
Marcelo Tosatti85773702016-02-19 09:46:39 +01003023 finish_swait(&vc->wq, &wait);
Paul Mackerras19ccb762011-07-23 17:42:46 +10003024 spin_lock(&vc->lock);
3025 vc->vcore_state = VCORE_INACTIVE;
Suresh E. Warrier3c78f782014-12-03 18:48:10 -06003026 trace_kvmppc_vcore_blocked(vc, 1);
Suraj Jitindar Singh2a27f512016-08-02 14:03:23 +10003027 ++vc->runner->stat.halt_successful_wait;
Suraj Jitindar Singh0cda69d2016-08-02 14:03:21 +10003028
3029 cur = ktime_get();
3030
3031out:
Suraj Jitindar Singh2a27f512016-08-02 14:03:23 +10003032 block_ns = ktime_to_ns(cur) - ktime_to_ns(start_poll);
3033
3034 /* Attribute wait time */
3035 if (do_sleep) {
3036 vc->runner->stat.halt_wait_ns +=
3037 ktime_to_ns(cur) - ktime_to_ns(start_wait);
3038 /* Attribute failed poll time */
3039 if (vc->halt_poll_ns)
3040 vc->runner->stat.halt_poll_fail_ns +=
3041 ktime_to_ns(start_wait) -
3042 ktime_to_ns(start_poll);
3043 } else {
3044 /* Attribute successful poll time */
3045 if (vc->halt_poll_ns)
3046 vc->runner->stat.halt_poll_success_ns +=
3047 ktime_to_ns(cur) -
3048 ktime_to_ns(start_poll);
3049 }
Suraj Jitindar Singh0cda69d2016-08-02 14:03:21 +10003050
3051 /* Adjust poll time */
Suraj Jitindar Singh307d93e42016-10-14 11:53:20 +11003052 if (halt_poll_ns) {
Suraj Jitindar Singh0cda69d2016-08-02 14:03:21 +10003053 if (block_ns <= vc->halt_poll_ns)
3054 ;
3055 /* We slept and blocked for longer than the max halt time */
Suraj Jitindar Singh307d93e42016-10-14 11:53:20 +11003056 else if (vc->halt_poll_ns && block_ns > halt_poll_ns)
Suraj Jitindar Singh0cda69d2016-08-02 14:03:21 +10003057 shrink_halt_poll_ns(vc);
3058 /* We slept and our poll time is too small */
Suraj Jitindar Singh307d93e42016-10-14 11:53:20 +11003059 else if (vc->halt_poll_ns < halt_poll_ns &&
3060 block_ns < halt_poll_ns)
Suraj Jitindar Singh0cda69d2016-08-02 14:03:21 +10003061 grow_halt_poll_ns(vc);
Suraj Jitindar Singhe03f3922016-10-14 11:53:21 +11003062 if (vc->halt_poll_ns > halt_poll_ns)
3063 vc->halt_poll_ns = halt_poll_ns;
Suraj Jitindar Singh0cda69d2016-08-02 14:03:21 +10003064 } else
3065 vc->halt_poll_ns = 0;
3066
3067 trace_kvmppc_vcore_wakeup(do_sleep, block_ns);
Paul Mackerras19ccb762011-07-23 17:42:46 +10003068}
3069
3070static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
3071{
Suraj Jitindar Singh7b5f8272016-08-02 14:03:20 +10003072 int n_ceded, i;
Paul Mackerras19ccb762011-07-23 17:42:46 +10003073 struct kvmppc_vcore *vc;
Suraj Jitindar Singh7b5f8272016-08-02 14:03:20 +10003074 struct kvm_vcpu *v;
Paul Mackerras9e368f22011-06-29 00:40:08 +00003075
Suresh E. Warrier3c78f782014-12-03 18:48:10 -06003076 trace_kvmppc_run_vcpu_enter(vcpu);
3077
Paul Mackerras371fefd2011-06-29 00:23:08 +00003078 kvm_run->exit_reason = 0;
3079 vcpu->arch.ret = RESUME_GUEST;
3080 vcpu->arch.trap = 0;
Paul Mackerras2f12f032012-10-15 01:17:17 +00003081 kvmppc_update_vpas(vcpu);
Paul Mackerras371fefd2011-06-29 00:23:08 +00003082
Paul Mackerras371fefd2011-06-29 00:23:08 +00003083 /*
3084 * Synchronize with other threads in this virtual core
3085 */
3086 vc = vcpu->arch.vcore;
3087 spin_lock(&vc->lock);
Paul Mackerras19ccb762011-07-23 17:42:46 +10003088 vcpu->arch.ceded = 0;
Paul Mackerras371fefd2011-06-29 00:23:08 +00003089 vcpu->arch.run_task = current;
3090 vcpu->arch.kvm_run = kvm_run;
Paul Mackerrasc7b67672012-10-15 01:18:07 +00003091 vcpu->arch.stolen_logged = vcore_stolen_time(vc, mftb());
Paul Mackerras19ccb762011-07-23 17:42:46 +10003092 vcpu->arch.state = KVMPPC_VCPU_RUNNABLE;
Paul Mackerrasc7b67672012-10-15 01:18:07 +00003093 vcpu->arch.busy_preempt = TB_NIL;
Suraj Jitindar Singh7b5f8272016-08-02 14:03:20 +10003094 WRITE_ONCE(vc->runnable_threads[vcpu->arch.ptid], vcpu);
Paul Mackerras371fefd2011-06-29 00:23:08 +00003095 ++vc->n_runnable;
3096
Paul Mackerras19ccb762011-07-23 17:42:46 +10003097 /*
3098 * This happens the first time this is called for a vcpu.
3099 * If the vcore is already running, we may be able to start
3100 * this thread straight away and have it join in.
3101 */
Paul Mackerras8455d792012-10-15 01:17:42 +00003102 if (!signal_pending(current)) {
Paul Mackerrasec257162015-06-24 21:18:03 +10003103 if (vc->vcore_state == VCORE_PIGGYBACK) {
Paul Mackerras898b25b2017-06-22 15:08:42 +10003104 if (spin_trylock(&vc->lock)) {
3105 if (vc->vcore_state == VCORE_RUNNING &&
3106 !VCORE_IS_EXITING(vc)) {
Paul Mackerrasec257162015-06-24 21:18:03 +10003107 kvmppc_create_dtl_entry(vcpu, vc);
Paul Mackerrasb4deba52015-07-02 20:38:16 +10003108 kvmppc_start_thread(vcpu, vc);
Paul Mackerrasec257162015-06-24 21:18:03 +10003109 trace_kvm_guest_enter(vcpu);
3110 }
Paul Mackerras898b25b2017-06-22 15:08:42 +10003111 spin_unlock(&vc->lock);
Paul Mackerrasec257162015-06-24 21:18:03 +10003112 }
3113 } else if (vc->vcore_state == VCORE_RUNNING &&
3114 !VCORE_IS_EXITING(vc)) {
Paul Mackerras2f12f032012-10-15 01:17:17 +00003115 kvmppc_create_dtl_entry(vcpu, vc);
Paul Mackerrasb4deba52015-07-02 20:38:16 +10003116 kvmppc_start_thread(vcpu, vc);
Suresh E. Warrier3c78f782014-12-03 18:48:10 -06003117 trace_kvm_guest_enter(vcpu);
Paul Mackerras8455d792012-10-15 01:17:42 +00003118 } else if (vc->vcore_state == VCORE_SLEEPING) {
Marcelo Tosatti85773702016-02-19 09:46:39 +01003119 swake_up(&vc->wq);
Paul Mackerras19ccb762011-07-23 17:42:46 +10003120 }
Paul Mackerras371fefd2011-06-29 00:23:08 +00003121
Paul Mackerras8455d792012-10-15 01:17:42 +00003122 }
Paul Mackerras19ccb762011-07-23 17:42:46 +10003123
3124 while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE &&
3125 !signal_pending(current)) {
Paul Mackerrasec257162015-06-24 21:18:03 +10003126 if (vc->vcore_state == VCORE_PREEMPT && vc->runner == NULL)
3127 kvmppc_vcore_end_preempt(vc);
3128
Paul Mackerras8455d792012-10-15 01:17:42 +00003129 if (vc->vcore_state != VCORE_INACTIVE) {
Paul Mackerrasec257162015-06-24 21:18:03 +10003130 kvmppc_wait_for_exec(vc, vcpu, TASK_INTERRUPTIBLE);
Paul Mackerras19ccb762011-07-23 17:42:46 +10003131 continue;
3132 }
Suraj Jitindar Singh7b5f8272016-08-02 14:03:20 +10003133 for_each_runnable_thread(i, v, vc) {
Scott Wood7e28e60e2011-11-08 18:23:20 -06003134 kvmppc_core_prepare_to_enter(v);
Paul Mackerras19ccb762011-07-23 17:42:46 +10003135 if (signal_pending(v->arch.run_task)) {
3136 kvmppc_remove_runnable(vc, v);
3137 v->stat.signal_exits++;
3138 v->arch.kvm_run->exit_reason = KVM_EXIT_INTR;
3139 v->arch.ret = -EINTR;
3140 wake_up(&v->arch.cpu_run);
3141 }
3142 }
Paul Mackerras8455d792012-10-15 01:17:42 +00003143 if (!vc->n_runnable || vcpu->arch.state != KVMPPC_VCPU_RUNNABLE)
3144 break;
Paul Mackerras8455d792012-10-15 01:17:42 +00003145 n_ceded = 0;
Suraj Jitindar Singh7b5f8272016-08-02 14:03:20 +10003146 for_each_runnable_thread(i, v, vc) {
Paul Mackerras1da4e2f2017-05-19 16:26:16 +10003147 if (!kvmppc_vcpu_woken(v))
Paul Mackerras8455d792012-10-15 01:17:42 +00003148 n_ceded += v->arch.ceded;
Paul Mackerras4619ac82013-04-17 20:31:41 +00003149 else
3150 v->arch.ceded = 0;
3151 }
Paul Mackerras25fedfc2015-03-28 14:21:05 +11003152 vc->runner = vcpu;
3153 if (n_ceded == vc->n_runnable) {
Paul Mackerras8455d792012-10-15 01:17:42 +00003154 kvmppc_vcore_blocked(vc);
Konstantin Khlebnikovc56dadf2015-07-15 12:52:03 +03003155 } else if (need_resched()) {
Paul Mackerrasec257162015-06-24 21:18:03 +10003156 kvmppc_vcore_preempt(vc);
Paul Mackerras25fedfc2015-03-28 14:21:05 +11003157 /* Let something else run */
3158 cond_resched_lock(&vc->lock);
Paul Mackerrasec257162015-06-24 21:18:03 +10003159 if (vc->vcore_state == VCORE_PREEMPT)
3160 kvmppc_vcore_end_preempt(vc);
Paul Mackerras25fedfc2015-03-28 14:21:05 +11003161 } else {
Paul Mackerras8455d792012-10-15 01:17:42 +00003162 kvmppc_run_core(vc);
Paul Mackerras25fedfc2015-03-28 14:21:05 +11003163 }
Paul Mackerras0456ec42012-02-03 00:56:21 +00003164 vc->runner = NULL;
Paul Mackerras371fefd2011-06-29 00:23:08 +00003165 }
3166
Paul Mackerras8455d792012-10-15 01:17:42 +00003167 while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE &&
3168 (vc->vcore_state == VCORE_RUNNING ||
Paul Mackerras5fc3e642015-09-18 13:13:44 +10003169 vc->vcore_state == VCORE_EXITING ||
3170 vc->vcore_state == VCORE_PIGGYBACK))
Paul Mackerrasec257162015-06-24 21:18:03 +10003171 kvmppc_wait_for_exec(vc, vcpu, TASK_UNINTERRUPTIBLE);
Paul Mackerras8455d792012-10-15 01:17:42 +00003172
Paul Mackerras5fc3e642015-09-18 13:13:44 +10003173 if (vc->vcore_state == VCORE_PREEMPT && vc->runner == NULL)
3174 kvmppc_vcore_end_preempt(vc);
3175
Paul Mackerras8455d792012-10-15 01:17:42 +00003176 if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) {
3177 kvmppc_remove_runnable(vc, vcpu);
3178 vcpu->stat.signal_exits++;
3179 kvm_run->exit_reason = KVM_EXIT_INTR;
3180 vcpu->arch.ret = -EINTR;
3181 }
3182
3183 if (vc->n_runnable && vc->vcore_state == VCORE_INACTIVE) {
3184 /* Wake up some vcpu to run the core */
Suraj Jitindar Singh7b5f8272016-08-02 14:03:20 +10003185 i = -1;
3186 v = next_runnable_thread(vc, &i);
Paul Mackerras8455d792012-10-15 01:17:42 +00003187 wake_up(&v->arch.cpu_run);
Paul Mackerras19ccb762011-07-23 17:42:46 +10003188 }
Paul Mackerras371fefd2011-06-29 00:23:08 +00003189
Suresh E. Warrier3c78f782014-12-03 18:48:10 -06003190 trace_kvmppc_run_vcpu_exit(vcpu, kvm_run);
Paul Mackerras19ccb762011-07-23 17:42:46 +10003191 spin_unlock(&vc->lock);
Paul Mackerras371fefd2011-06-29 00:23:08 +00003192 return vcpu->arch.ret;
Paul Mackerrasde56a942011-06-29 00:21:34 +00003193}
3194
Aneesh Kumar K.V3a167bea2013-10-07 22:17:53 +05303195static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
Paul Mackerrasa8606e22011-06-29 00:22:05 +00003196{
3197 int r;
Paul Mackerras913d3ff9a2012-10-15 01:16:48 +00003198 int srcu_idx;
Paul Mackerrasca8efa12017-06-06 16:47:22 +10003199 unsigned long ebb_regs[3] = {}; /* shut up GCC */
Paul Mackerras4c3bb4c2017-06-15 15:43:17 +10003200 unsigned long user_tar = 0;
3201 unsigned int user_vrsave;
Paul Mackerras1b151ce2017-09-13 15:53:48 +10003202 struct kvm *kvm;
Paul Mackerrasa8606e22011-06-29 00:22:05 +00003203
Alexander Grafaf8f38b2011-08-10 13:57:08 +02003204 if (!vcpu->arch.sane) {
3205 run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
3206 return -EINVAL;
3207 }
3208
Paul Mackerras46a704f2017-06-15 16:10:27 +10003209 /*
3210 * Don't allow entry with a suspended transaction, because
3211 * the guest entry/exit code will lose it.
3212 * If the guest has TM enabled, save away their TM-related SPRs
3213 * (they will get restored by the TM unavailable interrupt).
3214 */
3215#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
3216 if (cpu_has_feature(CPU_FTR_TM) && current->thread.regs &&
3217 (current->thread.regs->msr & MSR_TM)) {
3218 if (MSR_TM_ACTIVE(current->thread.regs->msr)) {
3219 run->exit_reason = KVM_EXIT_FAIL_ENTRY;
3220 run->fail_entry.hardware_entry_failure_reason = 0;
3221 return -EINVAL;
3222 }
Paul Mackerrase4705712017-07-21 13:57:14 +10003223 /* Enable TM so we can read the TM SPRs */
3224 mtmsr(mfmsr() | MSR_TM);
Paul Mackerras46a704f2017-06-15 16:10:27 +10003225 current->thread.tm_tfhar = mfspr(SPRN_TFHAR);
3226 current->thread.tm_tfiar = mfspr(SPRN_TFIAR);
3227 current->thread.tm_texasr = mfspr(SPRN_TEXASR);
3228 current->thread.regs->msr &= ~MSR_TM;
3229 }
3230#endif
3231
Scott Wood25051b5a2011-11-08 18:23:23 -06003232 kvmppc_core_prepare_to_enter(vcpu);
3233
Paul Mackerras19ccb762011-07-23 17:42:46 +10003234 /* No need to go into the guest when all we'll do is come back out */
3235 if (signal_pending(current)) {
3236 run->exit_reason = KVM_EXIT_INTR;
3237 return -EINTR;
3238 }
3239
Paul Mackerras1b151ce2017-09-13 15:53:48 +10003240 kvm = vcpu->kvm;
3241 atomic_inc(&kvm->arch.vcpus_running);
3242 /* Order vcpus_running vs. mmu_ready, see kvmppc_alloc_reset_hpt */
Paul Mackerras32fad282012-05-04 02:32:53 +00003243 smp_mb();
3244
Paul Mackerras1b151ce2017-09-13 15:53:48 +10003245 /* On the first time here, set up MMU if necessary */
3246 if (!vcpu->kvm->arch.mmu_ready) {
3247 mutex_lock(&kvm->lock);
3248 r = 0;
3249 if (!kvm->arch.mmu_ready) {
3250 if (!kvm_is_radix(vcpu->kvm))
3251 r = kvmppc_hv_setup_htab_rma(vcpu);
3252 if (!r) {
3253 if (cpu_has_feature(CPU_FTR_ARCH_300))
3254 kvmppc_setup_partition_table(kvm);
3255 kvm->arch.mmu_ready = 1;
3256 }
3257 }
3258 mutex_unlock(&kvm->lock);
Paul Mackerrasc77162d2011-12-12 12:31:00 +00003259 if (r)
Paul Mackerras32fad282012-05-04 02:32:53 +00003260 goto out;
Paul Mackerrasc77162d2011-12-12 12:31:00 +00003261 }
Paul Mackerras19ccb762011-07-23 17:42:46 +10003262
Anton Blanchard579e6332015-10-29 11:44:09 +11003263 flush_all_to_thread(current);
3264
Paul Mackerras4c3bb4c2017-06-15 15:43:17 +10003265 /* Save userspace EBB and other register values */
Paul Mackerrasca8efa12017-06-06 16:47:22 +10003266 if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
3267 ebb_regs[0] = mfspr(SPRN_EBBHR);
3268 ebb_regs[1] = mfspr(SPRN_EBBRR);
3269 ebb_regs[2] = mfspr(SPRN_BESCR);
Paul Mackerras4c3bb4c2017-06-15 15:43:17 +10003270 user_tar = mfspr(SPRN_TAR);
Paul Mackerrasca8efa12017-06-06 16:47:22 +10003271 }
Paul Mackerras4c3bb4c2017-06-15 15:43:17 +10003272 user_vrsave = mfspr(SPRN_VRSAVE);
Paul Mackerrasca8efa12017-06-06 16:47:22 +10003273
Paul Mackerras19ccb762011-07-23 17:42:46 +10003274 vcpu->arch.wqp = &vcpu->arch.vcore->wq;
Paul Mackerras342d3db2011-12-12 12:38:05 +00003275 vcpu->arch.pgdir = current->mm->pgd;
Paul Mackerrasc7b67672012-10-15 01:18:07 +00003276 vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
Paul Mackerras19ccb762011-07-23 17:42:46 +10003277
Paul Mackerrasa8606e22011-06-29 00:22:05 +00003278 do {
3279 r = kvmppc_run_vcpu(run, vcpu);
3280
3281 if (run->exit_reason == KVM_EXIT_PAPR_HCALL &&
3282 !(vcpu->arch.shregs.msr & MSR_PR)) {
Suresh E. Warrier3c78f782014-12-03 18:48:10 -06003283 trace_kvm_hcall_enter(vcpu);
Paul Mackerrasa8606e22011-06-29 00:22:05 +00003284 r = kvmppc_pseries_do_hcall(vcpu);
Suresh E. Warrier3c78f782014-12-03 18:48:10 -06003285 trace_kvm_hcall_exit(vcpu, r);
Scott Wood7e28e60e2011-11-08 18:23:20 -06003286 kvmppc_core_prepare_to_enter(vcpu);
Paul Mackerras913d3ff9a2012-10-15 01:16:48 +00003287 } else if (r == RESUME_PAGE_FAULT) {
3288 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3289 r = kvmppc_book3s_hv_page_fault(run, vcpu,
3290 vcpu->arch.fault_dar, vcpu->arch.fault_dsisr);
3291 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +10003292 } else if (r == RESUME_PASSTHROUGH) {
3293 if (WARN_ON(xive_enabled()))
3294 r = H_SUCCESS;
3295 else
3296 r = kvmppc_xics_rm_complete(vcpu, 0);
3297 }
Greg Kurze59d24e2014-02-06 17:36:56 +01003298 } while (is_kvmppc_resume_guest(r));
Paul Mackerras32fad282012-05-04 02:32:53 +00003299
Paul Mackerras4c3bb4c2017-06-15 15:43:17 +10003300 /* Restore userspace EBB and other register values */
Paul Mackerrasca8efa12017-06-06 16:47:22 +10003301 if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
3302 mtspr(SPRN_EBBHR, ebb_regs[0]);
3303 mtspr(SPRN_EBBRR, ebb_regs[1]);
3304 mtspr(SPRN_BESCR, ebb_regs[2]);
Paul Mackerras4c3bb4c2017-06-15 15:43:17 +10003305 mtspr(SPRN_TAR, user_tar);
3306 mtspr(SPRN_FSCR, current->thread.fscr);
Paul Mackerrasca8efa12017-06-06 16:47:22 +10003307 }
Paul Mackerras4c3bb4c2017-06-15 15:43:17 +10003308 mtspr(SPRN_VRSAVE, user_vrsave);
Paul Mackerrasca8efa12017-06-06 16:47:22 +10003309
Paul Mackerras32fad282012-05-04 02:32:53 +00003310 out:
Paul Mackerrasc7b67672012-10-15 01:18:07 +00003311 vcpu->arch.state = KVMPPC_VCPU_NOTREADY;
Paul Mackerras32fad282012-05-04 02:32:53 +00003312 atomic_dec(&vcpu->kvm->arch.vcpus_running);
Paul Mackerrasa8606e22011-06-29 00:22:05 +00003313 return r;
3314}
3315
Benjamin Herrenschmidt5b747162012-04-26 19:43:42 +00003316static void kvmppc_add_seg_page_size(struct kvm_ppc_one_seg_page_size **sps,
Paul Mackerras8dc6cca2017-09-11 15:29:45 +10003317 int shift, int sllp)
Benjamin Herrenschmidt5b747162012-04-26 19:43:42 +00003318{
Paul Mackerras8dc6cca2017-09-11 15:29:45 +10003319 (*sps)->page_shift = shift;
3320 (*sps)->slb_enc = sllp;
3321 (*sps)->enc[0].page_shift = shift;
3322 (*sps)->enc[0].pte_enc = kvmppc_pgsize_lp_encoding(shift, shift);
Aneesh Kumar K.V1f365bb2014-05-06 23:31:36 +05303323 /*
Paul Mackerras8dc6cca2017-09-11 15:29:45 +10003324 * Add 16MB MPSS support (may get filtered out by userspace)
Aneesh Kumar K.V1f365bb2014-05-06 23:31:36 +05303325 */
Paul Mackerras8dc6cca2017-09-11 15:29:45 +10003326 if (shift != 24) {
3327 int penc = kvmppc_pgsize_lp_encoding(shift, 24);
3328 if (penc != -1) {
3329 (*sps)->enc[1].page_shift = 24;
3330 (*sps)->enc[1].pte_enc = penc;
3331 }
Aneesh Kumar K.V1f365bb2014-05-06 23:31:36 +05303332 }
Benjamin Herrenschmidt5b747162012-04-26 19:43:42 +00003333 (*sps)++;
3334}
3335
Aneesh Kumar K.V3a167bea2013-10-07 22:17:53 +05303336static int kvm_vm_ioctl_get_smmu_info_hv(struct kvm *kvm,
3337 struct kvm_ppc_smmu_info *info)
Benjamin Herrenschmidt5b747162012-04-26 19:43:42 +00003338{
3339 struct kvm_ppc_one_seg_page_size *sps;
3340
Paul Mackerras8cf4ecc2017-01-30 21:21:53 +11003341 /*
3342 * Since we don't yet support HPT guests on a radix host,
3343 * return an error if the host uses radix.
3344 */
3345 if (radix_enabled())
3346 return -EINVAL;
3347
Paul Mackerrase3bfed12017-08-25 19:53:39 +10003348 /*
3349 * POWER7, POWER8 and POWER9 all support 32 storage keys for data.
3350 * POWER7 doesn't support keys for instruction accesses,
3351 * POWER8 and POWER9 do.
3352 */
3353 info->data_keys = 32;
3354 info->instr_keys = cpu_has_feature(CPU_FTR_ARCH_207S) ? 32 : 0;
3355
Paul Mackerras8dc6cca2017-09-11 15:29:45 +10003356 /* POWER7, 8 and 9 all have 1T segments and 32-entry SLB */
3357 info->flags = KVM_PPC_PAGE_SIZES_REAL | KVM_PPC_1T_SEGMENTS;
3358 info->slb_size = 32;
Benjamin Herrenschmidt5b747162012-04-26 19:43:42 +00003359
3360 /* We only support these sizes for now, and no muti-size segments */
3361 sps = &info->sps[0];
Paul Mackerras8dc6cca2017-09-11 15:29:45 +10003362 kvmppc_add_seg_page_size(&sps, 12, 0);
3363 kvmppc_add_seg_page_size(&sps, 16, SLB_VSID_L | SLB_VSID_LP_01);
3364 kvmppc_add_seg_page_size(&sps, 24, SLB_VSID_L);
Benjamin Herrenschmidt5b747162012-04-26 19:43:42 +00003365
3366 return 0;
3367}
3368
Paul Mackerras82ed3612011-12-15 02:03:22 +00003369/*
3370 * Get (and clear) the dirty memory log for a memory slot.
3371 */
Aneesh Kumar K.V3a167bea2013-10-07 22:17:53 +05303372static int kvm_vm_ioctl_get_dirty_log_hv(struct kvm *kvm,
3373 struct kvm_dirty_log *log)
Paul Mackerras82ed3612011-12-15 02:03:22 +00003374{
Paolo Bonzini9f6b8022015-05-17 16:20:07 +02003375 struct kvm_memslots *slots;
Paul Mackerras82ed3612011-12-15 02:03:22 +00003376 struct kvm_memory_slot *memslot;
Paul Mackerras8f7b79b2017-01-30 21:21:48 +11003377 int i, r;
Paul Mackerras82ed3612011-12-15 02:03:22 +00003378 unsigned long n;
Paul Mackerrase641a312017-10-26 16:39:19 +11003379 unsigned long *buf, *p;
Paul Mackerras8f7b79b2017-01-30 21:21:48 +11003380 struct kvm_vcpu *vcpu;
Paul Mackerras82ed3612011-12-15 02:03:22 +00003381
3382 mutex_lock(&kvm->slots_lock);
3383
3384 r = -EINVAL;
Alex Williamsonbbacc0c2012-12-10 10:33:09 -07003385 if (log->slot >= KVM_USER_MEM_SLOTS)
Paul Mackerras82ed3612011-12-15 02:03:22 +00003386 goto out;
3387
Paolo Bonzini9f6b8022015-05-17 16:20:07 +02003388 slots = kvm_memslots(kvm);
3389 memslot = id_to_memslot(slots, log->slot);
Paul Mackerras82ed3612011-12-15 02:03:22 +00003390 r = -ENOENT;
3391 if (!memslot->dirty_bitmap)
3392 goto out;
3393
Paul Mackerras8f7b79b2017-01-30 21:21:48 +11003394 /*
Paul Mackerrase641a312017-10-26 16:39:19 +11003395 * Use second half of bitmap area because both HPT and radix
3396 * accumulate bits in the first half.
Paul Mackerras8f7b79b2017-01-30 21:21:48 +11003397 */
Paul Mackerras82ed3612011-12-15 02:03:22 +00003398 n = kvm_dirty_bitmap_bytes(memslot);
Paul Mackerras8f7b79b2017-01-30 21:21:48 +11003399 buf = memslot->dirty_bitmap + n / sizeof(long);
3400 memset(buf, 0, n);
Paul Mackerras82ed3612011-12-15 02:03:22 +00003401
Paul Mackerras8f7b79b2017-01-30 21:21:48 +11003402 if (kvm_is_radix(kvm))
3403 r = kvmppc_hv_get_dirty_log_radix(kvm, memslot, buf);
3404 else
3405 r = kvmppc_hv_get_dirty_log_hpt(kvm, memslot, buf);
Paul Mackerras82ed3612011-12-15 02:03:22 +00003406 if (r)
3407 goto out;
3408
Paul Mackerrase641a312017-10-26 16:39:19 +11003409 /*
3410 * We accumulate dirty bits in the first half of the
3411 * memslot's dirty_bitmap area, for when pages are paged
3412 * out or modified by the host directly. Pick up these
3413 * bits and add them to the map.
3414 */
3415 p = memslot->dirty_bitmap;
3416 for (i = 0; i < n / sizeof(long); ++i)
3417 buf[i] |= xchg(&p[i], 0);
3418
Paul Mackerras8f7b79b2017-01-30 21:21:48 +11003419 /* Harvest dirty bits from VPA and DTL updates */
3420 /* Note: we never modify the SLB shadow buffer areas */
3421 kvm_for_each_vcpu(i, vcpu, kvm) {
3422 spin_lock(&vcpu->arch.vpa_update_lock);
3423 kvmppc_harvest_vpa_dirty(&vcpu->arch.vpa, memslot, buf);
3424 kvmppc_harvest_vpa_dirty(&vcpu->arch.dtl, memslot, buf);
3425 spin_unlock(&vcpu->arch.vpa_update_lock);
3426 }
3427
Paul Mackerras82ed3612011-12-15 02:03:22 +00003428 r = -EFAULT;
Paul Mackerras8f7b79b2017-01-30 21:21:48 +11003429 if (copy_to_user(log->dirty_bitmap, buf, n))
Paul Mackerras82ed3612011-12-15 02:03:22 +00003430 goto out;
3431
3432 r = 0;
3433out:
3434 mutex_unlock(&kvm->slots_lock);
3435 return r;
3436}
3437
Aneesh Kumar K.V3a167bea2013-10-07 22:17:53 +05303438static void kvmppc_core_free_memslot_hv(struct kvm_memory_slot *free,
3439 struct kvm_memory_slot *dont)
Paul Mackerrasa66b48c2012-09-11 13:27:46 +00003440{
3441 if (!dont || free->arch.rmap != dont->arch.rmap) {
3442 vfree(free->arch.rmap);
3443 free->arch.rmap = NULL;
3444 }
Paul Mackerrasa66b48c2012-09-11 13:27:46 +00003445}
3446
Aneesh Kumar K.V3a167bea2013-10-07 22:17:53 +05303447static int kvmppc_core_create_memslot_hv(struct kvm_memory_slot *slot,
3448 unsigned long npages)
Paul Mackerrasa66b48c2012-09-11 13:27:46 +00003449{
Paul Mackerras8cf4ecc2017-01-30 21:21:53 +11003450 /*
3451 * For now, if radix_enabled() then we only support radix guests,
3452 * and in that case we don't need the rmap array.
3453 */
3454 if (radix_enabled()) {
3455 slot->arch.rmap = NULL;
3456 return 0;
3457 }
3458
Paul Mackerrasa66b48c2012-09-11 13:27:46 +00003459 slot->arch.rmap = vzalloc(npages * sizeof(*slot->arch.rmap));
3460 if (!slot->arch.rmap)
3461 return -ENOMEM;
Paul Mackerrasa66b48c2012-09-11 13:27:46 +00003462
3463 return 0;
3464}
3465
Aneesh Kumar K.V3a167bea2013-10-07 22:17:53 +05303466static int kvmppc_core_prepare_memory_region_hv(struct kvm *kvm,
3467 struct kvm_memory_slot *memslot,
Paolo Bonzini09170a42015-05-18 13:59:39 +02003468 const struct kvm_userspace_memory_region *mem)
Paul Mackerrasa66b48c2012-09-11 13:27:46 +00003469{
Paul Mackerrasa66b48c2012-09-11 13:27:46 +00003470 return 0;
3471}
3472
Aneesh Kumar K.V3a167bea2013-10-07 22:17:53 +05303473static void kvmppc_core_commit_memory_region_hv(struct kvm *kvm,
Paolo Bonzini09170a42015-05-18 13:59:39 +02003474 const struct kvm_userspace_memory_region *mem,
Paolo Bonzinif36f3f22015-05-18 13:20:23 +02003475 const struct kvm_memory_slot *old,
3476 const struct kvm_memory_slot *new)
Paul Mackerrasc77162d2011-12-12 12:31:00 +00003477{
Paul Mackerrasdfe49db2012-09-11 13:28:18 +00003478 unsigned long npages = mem->memory_size >> PAGE_SHIFT;
Paul Mackerrasdfe49db2012-09-11 13:28:18 +00003479
Yongji Xiea56ee9f2016-11-04 13:55:12 +08003480 /*
3481 * If we are making a new memslot, it might make
3482 * some address that was previously cached as emulated
3483 * MMIO be no longer emulated MMIO, so invalidate
3484 * all the caches of emulated MMIO translations.
3485 */
3486 if (npages)
3487 atomic64_inc(&kvm->arch.mmio_update);
Paul Mackerrasc77162d2011-12-12 12:31:00 +00003488}
3489
Paul Mackerrasa0144e22013-09-20 14:52:38 +10003490/*
3491 * Update LPCR values in kvm->arch and in vcores.
3492 * Caller must hold kvm->lock.
3493 */
3494void kvmppc_update_lpcr(struct kvm *kvm, unsigned long lpcr, unsigned long mask)
3495{
3496 long int i;
3497 u32 cores_done = 0;
3498
3499 if ((kvm->arch.lpcr & mask) == lpcr)
3500 return;
3501
3502 kvm->arch.lpcr = (kvm->arch.lpcr & ~mask) | lpcr;
3503
3504 for (i = 0; i < KVM_MAX_VCORES; ++i) {
3505 struct kvmppc_vcore *vc = kvm->arch.vcores[i];
3506 if (!vc)
3507 continue;
3508 spin_lock(&vc->lock);
3509 vc->lpcr = (vc->lpcr & ~mask) | lpcr;
3510 spin_unlock(&vc->lock);
3511 if (++cores_done >= kvm->arch.online_vcores)
3512 break;
3513 }
3514}
3515
Aneesh Kumar K.V3a167bea2013-10-07 22:17:53 +05303516static void kvmppc_mmu_destroy_hv(struct kvm_vcpu *vcpu)
3517{
3518 return;
3519}
3520
Paul Mackerras7a840842016-11-16 22:25:20 +11003521static void kvmppc_setup_partition_table(struct kvm *kvm)
3522{
3523 unsigned long dw0, dw1;
3524
Paul Mackerras8cf4ecc2017-01-30 21:21:53 +11003525 if (!kvm_is_radix(kvm)) {
3526 /* PS field - page size for VRMA */
3527 dw0 = ((kvm->arch.vrma_slb_v & SLB_VSID_L) >> 1) |
3528 ((kvm->arch.vrma_slb_v & SLB_VSID_LP) << 1);
3529 /* HTABSIZE and HTABORG fields */
3530 dw0 |= kvm->arch.sdr1;
Paul Mackerras7a840842016-11-16 22:25:20 +11003531
Paul Mackerras8cf4ecc2017-01-30 21:21:53 +11003532 /* Second dword as set by userspace */
3533 dw1 = kvm->arch.process_table;
3534 } else {
3535 dw0 = PATB_HR | radix__get_tree_size() |
3536 __pa(kvm->arch.pgtable) | RADIX_PGD_INDEX_SIZE;
3537 dw1 = PATB_GR | kvm->arch.process_table;
3538 }
Paul Mackerras7a840842016-11-16 22:25:20 +11003539
3540 mmu_partition_table_set_entry(kvm->arch.lpid, dw0, dw1);
3541}
3542
Paul Mackerras1b151ce2017-09-13 15:53:48 +10003543/*
3544 * Set up HPT (hashed page table) and RMA (real-mode area).
3545 * Must be called with kvm->lock held.
3546 */
Paul Mackerras32fad282012-05-04 02:32:53 +00003547static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
Paul Mackerrasc77162d2011-12-12 12:31:00 +00003548{
3549 int err = 0;
3550 struct kvm *kvm = vcpu->kvm;
Paul Mackerrasc77162d2011-12-12 12:31:00 +00003551 unsigned long hva;
3552 struct kvm_memory_slot *memslot;
3553 struct vm_area_struct *vma;
Paul Mackerrasa0144e22013-09-20 14:52:38 +10003554 unsigned long lpcr = 0, senc;
Paul Mackerrasc77162d2011-12-12 12:31:00 +00003555 unsigned long psize, porder;
Paul Mackerras2c9097e2012-09-11 13:27:01 +00003556 int srcu_idx;
Paul Mackerrasc77162d2011-12-12 12:31:00 +00003557
Paul Mackerras32fad282012-05-04 02:32:53 +00003558 /* Allocate hashed page table (if not done already) and reset it */
David Gibson3f9d4f52016-12-20 16:49:00 +11003559 if (!kvm->arch.hpt.virt) {
David Gibsonaae07772016-12-20 16:49:02 +11003560 int order = KVM_DEFAULT_HPT_ORDER;
3561 struct kvm_hpt_info info;
3562
3563 err = kvmppc_allocate_hpt(&info, order);
3564 /* If we get here, it means userspace didn't specify a
3565 * size explicitly. So, try successively smaller
3566 * sizes if the default failed. */
3567 while ((err == -ENOMEM) && --order >= PPC_MIN_HPT_ORDER)
3568 err = kvmppc_allocate_hpt(&info, order);
3569
3570 if (err < 0) {
Paul Mackerras32fad282012-05-04 02:32:53 +00003571 pr_err("KVM: Couldn't alloc HPT\n");
3572 goto out;
3573 }
David Gibsonaae07772016-12-20 16:49:02 +11003574
3575 kvmppc_set_hpt(kvm, &info);
Paul Mackerras32fad282012-05-04 02:32:53 +00003576 }
3577
Paul Mackerrasc77162d2011-12-12 12:31:00 +00003578 /* Look up the memslot for guest physical address 0 */
Paul Mackerras2c9097e2012-09-11 13:27:01 +00003579 srcu_idx = srcu_read_lock(&kvm->srcu);
Paul Mackerrasc77162d2011-12-12 12:31:00 +00003580 memslot = gfn_to_memslot(kvm, 0);
3581
3582 /* We must have some memory at 0 by now */
3583 err = -EINVAL;
3584 if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
Paul Mackerras2c9097e2012-09-11 13:27:01 +00003585 goto out_srcu;
Paul Mackerrasc77162d2011-12-12 12:31:00 +00003586
3587 /* Look up the VMA for the start of this memory slot */
3588 hva = memslot->userspace_addr;
3589 down_read(&current->mm->mmap_sem);
3590 vma = find_vma(current->mm, hva);
3591 if (!vma || vma->vm_start > hva || (vma->vm_flags & VM_IO))
3592 goto up_out;
3593
3594 psize = vma_kernel_pagesize(vma);
Paul Mackerrasda9d1d72011-12-12 12:31:41 +00003595 porder = __ilog2(psize);
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00003596
Paul Mackerrasc77162d2011-12-12 12:31:00 +00003597 up_read(&current->mm->mmap_sem);
3598
Paul Mackerrasc17b98c2014-12-03 13:30:38 +11003599 /* We can handle 4k, 64k or 16M pages in the VRMA */
3600 err = -EINVAL;
3601 if (!(psize == 0x1000 || psize == 0x10000 ||
3602 psize == 0x1000000))
3603 goto out_srcu;
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00003604
Paul Mackerrasc17b98c2014-12-03 13:30:38 +11003605 senc = slb_pgsize_encoding(psize);
3606 kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T |
3607 (VRMA_VSID << SLB_VSID_SHIFT_1T);
Paul Mackerrasc17b98c2014-12-03 13:30:38 +11003608 /* Create HPTEs in the hash page table for the VRMA */
3609 kvmppc_map_vrma(vcpu, memslot, porder);
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00003610
Paul Mackerras7a840842016-11-16 22:25:20 +11003611 /* Update VRMASD field in the LPCR */
3612 if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
3613 /* the -4 is to account for senc values starting at 0x10 */
3614 lpcr = senc << (LPCR_VRMASD_SH - 4);
3615 kvmppc_update_lpcr(kvm, lpcr, LPCR_VRMASD);
Paul Mackerras7a840842016-11-16 22:25:20 +11003616 }
Paul Mackerrasa0144e22013-09-20 14:52:38 +10003617
Paul Mackerras1b151ce2017-09-13 15:53:48 +10003618 /* Order updates to kvm->arch.lpcr etc. vs. mmu_ready */
Paul Mackerrasc77162d2011-12-12 12:31:00 +00003619 smp_wmb();
Paul Mackerrasc77162d2011-12-12 12:31:00 +00003620 err = 0;
Paul Mackerras2c9097e2012-09-11 13:27:01 +00003621 out_srcu:
3622 srcu_read_unlock(&kvm->srcu, srcu_idx);
Paul Mackerrasc77162d2011-12-12 12:31:00 +00003623 out:
Paul Mackerrasc77162d2011-12-12 12:31:00 +00003624 return err;
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00003625
Paul Mackerrasc77162d2011-12-12 12:31:00 +00003626 up_out:
3627 up_read(&current->mm->mmap_sem);
Lai Jiangshan505d6422013-03-16 00:50:49 +08003628 goto out_srcu;
Paul Mackerrasde56a942011-06-29 00:21:34 +00003629}
3630
Suresh Warrier79b6c242015-12-17 14:59:06 -06003631#ifdef CONFIG_KVM_XICS
3632/*
3633 * Allocate a per-core structure for managing state about which cores are
3634 * running in the host versus the guest and for exchanging data between
3635 * real mode KVM and CPU running in the host.
3636 * This is only done for the first VM.
3637 * The allocated structure stays even if all VMs have stopped.
3638 * It is only freed when the kvm-hv module is unloaded.
3639 * It's OK for this routine to fail, we just don't support host
3640 * core operations like redirecting H_IPI wakeups.
3641 */
3642void kvmppc_alloc_host_rm_ops(void)
3643{
3644 struct kvmppc_host_rm_ops *ops;
3645 unsigned long l_ops;
3646 int cpu, core;
3647 int size;
3648
3649 /* Not the first time here ? */
3650 if (kvmppc_host_rm_ops_hv != NULL)
3651 return;
3652
3653 ops = kzalloc(sizeof(struct kvmppc_host_rm_ops), GFP_KERNEL);
3654 if (!ops)
3655 return;
3656
3657 size = cpu_nr_cores() * sizeof(struct kvmppc_host_rm_core);
3658 ops->rm_core = kzalloc(size, GFP_KERNEL);
3659
3660 if (!ops->rm_core) {
3661 kfree(ops);
3662 return;
3663 }
3664
Sebastian Andrzej Siewior419af25f2017-05-24 10:15:21 +02003665 cpus_read_lock();
Suresh Warrier6f3bb802015-12-17 14:59:08 -06003666
Suresh Warrier79b6c242015-12-17 14:59:06 -06003667 for (cpu = 0; cpu < nr_cpu_ids; cpu += threads_per_core) {
3668 if (!cpu_online(cpu))
3669 continue;
3670
3671 core = cpu >> threads_shift;
3672 ops->rm_core[core].rm_state.in_host = 1;
3673 }
3674
Suresh Warrier0c2a6602015-12-17 14:59:09 -06003675 ops->vcpu_kick = kvmppc_fast_vcpu_kick_hv;
3676
Suresh Warrier79b6c242015-12-17 14:59:06 -06003677 /*
3678 * Make the contents of the kvmppc_host_rm_ops structure visible
3679 * to other CPUs before we assign it to the global variable.
3680 * Do an atomic assignment (no locks used here), but if someone
3681 * beats us to it, just free our copy and return.
3682 */
3683 smp_wmb();
3684 l_ops = (unsigned long) ops;
3685
3686 if (cmpxchg64((unsigned long *)&kvmppc_host_rm_ops_hv, 0, l_ops)) {
Sebastian Andrzej Siewior419af25f2017-05-24 10:15:21 +02003687 cpus_read_unlock();
Suresh Warrier79b6c242015-12-17 14:59:06 -06003688 kfree(ops->rm_core);
3689 kfree(ops);
Suresh Warrier6f3bb802015-12-17 14:59:08 -06003690 return;
Suresh Warrier79b6c242015-12-17 14:59:06 -06003691 }
Suresh Warrier6f3bb802015-12-17 14:59:08 -06003692
Sebastian Andrzej Siewior419af25f2017-05-24 10:15:21 +02003693 cpuhp_setup_state_nocalls_cpuslocked(CPUHP_KVM_PPC_BOOK3S_PREPARE,
3694 "ppc/kvm_book3s:prepare",
3695 kvmppc_set_host_core,
3696 kvmppc_clear_host_core);
3697 cpus_read_unlock();
Suresh Warrier79b6c242015-12-17 14:59:06 -06003698}
3699
3700void kvmppc_free_host_rm_ops(void)
3701{
3702 if (kvmppc_host_rm_ops_hv) {
Anna-Maria Gleixner3f7cd912016-11-27 00:13:45 +01003703 cpuhp_remove_state_nocalls(CPUHP_KVM_PPC_BOOK3S_PREPARE);
Suresh Warrier79b6c242015-12-17 14:59:06 -06003704 kfree(kvmppc_host_rm_ops_hv->rm_core);
3705 kfree(kvmppc_host_rm_ops_hv);
3706 kvmppc_host_rm_ops_hv = NULL;
3707 }
3708}
3709#endif
3710
Aneesh Kumar K.V3a167bea2013-10-07 22:17:53 +05303711static int kvmppc_core_init_vm_hv(struct kvm *kvm)
Paul Mackerrasde56a942011-06-29 00:21:34 +00003712{
Paul Mackerras32fad282012-05-04 02:32:53 +00003713 unsigned long lpcr, lpid;
Paul Mackerrase23a8082015-03-28 14:21:01 +11003714 char buf[32];
Paul Mackerras8cf4ecc2017-01-30 21:21:53 +11003715 int ret;
Paul Mackerrasde56a942011-06-29 00:21:34 +00003716
Paul Mackerras32fad282012-05-04 02:32:53 +00003717 /* Allocate the guest's logical partition ID */
3718
3719 lpid = kvmppc_alloc_lpid();
Chen Gang5d226ae2013-07-22 14:32:35 +08003720 if ((long)lpid < 0)
Paul Mackerras32fad282012-05-04 02:32:53 +00003721 return -ENOMEM;
3722 kvm->arch.lpid = lpid;
Paul Mackerrasde56a942011-06-29 00:21:34 +00003723
Suresh Warrier79b6c242015-12-17 14:59:06 -06003724 kvmppc_alloc_host_rm_ops();
3725
Paul Mackerras1b400ba2012-11-21 23:28:08 +00003726 /*
3727 * Since we don't flush the TLB when tearing down a VM,
3728 * and this lpid might have previously been used,
3729 * make sure we flush on each core before running the new VM.
Paul Mackerras7c5b06c2016-11-18 08:28:51 +11003730 * On POWER9, the tlbie in mmu_partition_table_set_entry()
3731 * does this flush for us.
Paul Mackerras1b400ba2012-11-21 23:28:08 +00003732 */
Paul Mackerras7c5b06c2016-11-18 08:28:51 +11003733 if (!cpu_has_feature(CPU_FTR_ARCH_300))
3734 cpumask_setall(&kvm->arch.need_tlb_flush);
Paul Mackerras1b400ba2012-11-21 23:28:08 +00003735
Paul Mackerras699a0ea2014-06-02 11:02:59 +10003736 /* Start out with the default set of hcalls enabled */
3737 memcpy(kvm->arch.enabled_hcalls, default_enabled_hcalls,
3738 sizeof(kvm->arch.enabled_hcalls));
3739
Paul Mackerras7a840842016-11-16 22:25:20 +11003740 if (!cpu_has_feature(CPU_FTR_ARCH_300))
3741 kvm->arch.host_sdr1 = mfspr(SPRN_SDR1);
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00003742
Paul Mackerrasc17b98c2014-12-03 13:30:38 +11003743 /* Init LPCR for virtual RMA mode */
3744 kvm->arch.host_lpid = mfspr(SPRN_LPID);
3745 kvm->arch.host_lpcr = lpcr = mfspr(SPRN_LPCR);
3746 lpcr &= LPCR_PECE | LPCR_LPES;
3747 lpcr |= (4UL << LPCR_DPFD_SH) | LPCR_HDICE |
3748 LPCR_VPM0 | LPCR_VPM1;
3749 kvm->arch.vrma_slb_v = SLB_VSID_B_1T |
3750 (VRMA_VSID << SLB_VSID_SHIFT_1T);
3751 /* On POWER8 turn on online bit to enable PURR/SPURR */
3752 if (cpu_has_feature(CPU_FTR_ARCH_207S))
3753 lpcr |= LPCR_ONL;
Paul Mackerras84f71392016-11-22 14:30:14 +11003754 /*
3755 * On POWER9, VPM0 bit is reserved (VPM0=1 behaviour is assumed)
3756 * Set HVICE bit to enable hypervisor virtualization interrupts.
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +10003757 * Set HEIC to prevent OS interrupts to go to hypervisor (should
3758 * be unnecessary but better safe than sorry in case we re-enable
3759 * EE in HV mode with this LPCR still set)
Paul Mackerras84f71392016-11-22 14:30:14 +11003760 */
3761 if (cpu_has_feature(CPU_FTR_ARCH_300)) {
Paul Mackerras7a840842016-11-16 22:25:20 +11003762 lpcr &= ~LPCR_VPM0;
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +10003763 lpcr |= LPCR_HVICE | LPCR_HEIC;
3764
3765 /*
3766 * If xive is enabled, we route 0x500 interrupts directly
3767 * to the guest.
3768 */
3769 if (xive_enabled())
3770 lpcr |= LPCR_LPES;
Paul Mackerras84f71392016-11-22 14:30:14 +11003771 }
3772
Paul Mackerras8cf4ecc2017-01-30 21:21:53 +11003773 /*
3774 * For now, if the host uses radix, the guest must be radix.
3775 */
3776 if (radix_enabled()) {
3777 kvm->arch.radix = 1;
Paul Mackerras1b151ce2017-09-13 15:53:48 +10003778 kvm->arch.mmu_ready = 1;
Paul Mackerras8cf4ecc2017-01-30 21:21:53 +11003779 lpcr &= ~LPCR_VPM1;
3780 lpcr |= LPCR_UPRT | LPCR_GTSE | LPCR_HR;
3781 ret = kvmppc_init_vm_radix(kvm);
3782 if (ret) {
3783 kvmppc_free_lpid(kvm->arch.lpid);
3784 return ret;
3785 }
3786 kvmppc_setup_partition_table(kvm);
3787 }
3788
Paul Mackerras9e368f22011-06-29 00:40:08 +00003789 kvm->arch.lpcr = lpcr;
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00003790
David Gibson5e985962016-12-20 16:49:05 +11003791 /* Initialization for future HPT resizes */
3792 kvm->arch.resize_hpt = NULL;
3793
Paul Mackerras512691d2012-10-15 01:15:41 +00003794 /*
Paul Mackerras7c5b06c2016-11-18 08:28:51 +11003795 * Work out how many sets the TLB has, for the use of
3796 * the TLB invalidation loop in book3s_hv_rmhandlers.S.
3797 */
Paul Mackerras8cf4ecc2017-01-30 21:21:53 +11003798 if (kvm_is_radix(kvm))
3799 kvm->arch.tlb_sets = POWER9_TLB_SETS_RADIX; /* 128 */
3800 else if (cpu_has_feature(CPU_FTR_ARCH_300))
Paul Mackerras7c5b06c2016-11-18 08:28:51 +11003801 kvm->arch.tlb_sets = POWER9_TLB_SETS_HASH; /* 256 */
3802 else if (cpu_has_feature(CPU_FTR_ARCH_207S))
3803 kvm->arch.tlb_sets = POWER8_TLB_SETS; /* 512 */
3804 else
3805 kvm->arch.tlb_sets = POWER7_TLB_SETS; /* 128 */
3806
3807 /*
Michael Ellerman441c19c2014-05-23 18:15:25 +10003808 * Track that we now have a HV mode VM active. This blocks secondary
3809 * CPU threads from coming online.
Paul Mackerras8cf4ecc2017-01-30 21:21:53 +11003810 * On POWER9, we only need to do this for HPT guests on a radix
3811 * host, which is not yet supported.
Paul Mackerras512691d2012-10-15 01:15:41 +00003812 */
Paul Mackerras8cf4ecc2017-01-30 21:21:53 +11003813 if (!cpu_has_feature(CPU_FTR_ARCH_300))
3814 kvm_hv_vm_activated();
Paul Mackerras512691d2012-10-15 01:15:41 +00003815
Paul Mackerrase23a8082015-03-28 14:21:01 +11003816 /*
Paul Mackerras3c313522017-02-06 13:24:41 +11003817 * Initialize smt_mode depending on processor.
3818 * POWER8 and earlier have to use "strict" threading, where
3819 * all vCPUs in a vcore have to run on the same (sub)core,
3820 * whereas on POWER9 the threads can each run a different
3821 * guest.
3822 */
3823 if (!cpu_has_feature(CPU_FTR_ARCH_300))
3824 kvm->arch.smt_mode = threads_per_subcore;
3825 else
3826 kvm->arch.smt_mode = 1;
Paul Mackerras57900692017-05-16 16:41:20 +10003827 kvm->arch.emul_smt_mode = 1;
Paul Mackerras3c313522017-02-06 13:24:41 +11003828
3829 /*
Paul Mackerrase23a8082015-03-28 14:21:01 +11003830 * Create a debugfs directory for the VM
3831 */
3832 snprintf(buf, sizeof(buf), "vm%d", current->pid);
3833 kvm->arch.debugfs_dir = debugfs_create_dir(buf, kvm_debugfs_dir);
3834 if (!IS_ERR_OR_NULL(kvm->arch.debugfs_dir))
3835 kvmppc_mmu_debugfs_init(kvm);
3836
David Gibson54738c02011-06-29 00:22:41 +00003837 return 0;
Paul Mackerrasde56a942011-06-29 00:21:34 +00003838}
3839
Paul Mackerrasf1378b12013-09-27 15:33:43 +05303840static void kvmppc_free_vcores(struct kvm *kvm)
3841{
3842 long int i;
3843
Paul Mackerras23316312015-10-21 16:03:14 +11003844 for (i = 0; i < KVM_MAX_VCORES; ++i)
Paul Mackerrasf1378b12013-09-27 15:33:43 +05303845 kfree(kvm->arch.vcores[i]);
3846 kvm->arch.online_vcores = 0;
3847}
3848
Aneesh Kumar K.V3a167bea2013-10-07 22:17:53 +05303849static void kvmppc_core_destroy_vm_hv(struct kvm *kvm)
Paul Mackerrasde56a942011-06-29 00:21:34 +00003850{
Paul Mackerrase23a8082015-03-28 14:21:01 +11003851 debugfs_remove_recursive(kvm->arch.debugfs_dir);
3852
Paul Mackerras8cf4ecc2017-01-30 21:21:53 +11003853 if (!cpu_has_feature(CPU_FTR_ARCH_300))
3854 kvm_hv_vm_deactivated();
Paul Mackerras512691d2012-10-15 01:15:41 +00003855
Paul Mackerrasf1378b12013-09-27 15:33:43 +05303856 kvmppc_free_vcores(kvm);
Paul Mackerrasaa04b4c2011-06-29 00:25:44 +00003857
Paul Mackerras8cf4ecc2017-01-30 21:21:53 +11003858 kvmppc_free_lpid(kvm->arch.lpid);
3859
Paul Mackerras5a319352017-01-30 21:21:46 +11003860 if (kvm_is_radix(kvm))
3861 kvmppc_free_radix(kvm);
3862 else
David Gibsonaae07772016-12-20 16:49:02 +11003863 kvmppc_free_hpt(&kvm->arch.hpt);
Suresh Warrierc57875f2016-08-19 15:35:50 +10003864
3865 kvmppc_free_pimap(kvm);
Paul Mackerrasde56a942011-06-29 00:21:34 +00003866}
3867
3868/* We don't need to emulate any privileged instructions or dcbz */
Aneesh Kumar K.V3a167bea2013-10-07 22:17:53 +05303869static int kvmppc_core_emulate_op_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
3870 unsigned int inst, int *advance)
Paul Mackerrasde56a942011-06-29 00:21:34 +00003871{
3872 return EMULATE_FAIL;
3873}
3874
Aneesh Kumar K.V3a167bea2013-10-07 22:17:53 +05303875static int kvmppc_core_emulate_mtspr_hv(struct kvm_vcpu *vcpu, int sprn,
3876 ulong spr_val)
Paul Mackerrasde56a942011-06-29 00:21:34 +00003877{
3878 return EMULATE_FAIL;
3879}
3880
Aneesh Kumar K.V3a167bea2013-10-07 22:17:53 +05303881static int kvmppc_core_emulate_mfspr_hv(struct kvm_vcpu *vcpu, int sprn,
3882 ulong *spr_val)
Paul Mackerrasde56a942011-06-29 00:21:34 +00003883{
3884 return EMULATE_FAIL;
3885}
3886
Aneesh Kumar K.V3a167bea2013-10-07 22:17:53 +05303887static int kvmppc_core_check_processor_compat_hv(void)
3888{
Paul Mackerrasc17b98c2014-12-03 13:30:38 +11003889 if (!cpu_has_feature(CPU_FTR_HVMODE) ||
3890 !cpu_has_feature(CPU_FTR_ARCH_206))
Aneesh Kumar K.V3a167bea2013-10-07 22:17:53 +05303891 return -EIO;
Aneesh Kumar K.V50de5962016-04-29 23:25:43 +10003892
Aneesh Kumar K.V3a167bea2013-10-07 22:17:53 +05303893 return 0;
3894}
3895
Suresh Warrier8daaafc2016-08-19 15:35:48 +10003896#ifdef CONFIG_KVM_XICS
3897
3898void kvmppc_free_pimap(struct kvm *kvm)
3899{
3900 kfree(kvm->arch.pimap);
3901}
3902
Suresh Warrierc57875f2016-08-19 15:35:50 +10003903static struct kvmppc_passthru_irqmap *kvmppc_alloc_pimap(void)
Suresh Warrier8daaafc2016-08-19 15:35:48 +10003904{
3905 return kzalloc(sizeof(struct kvmppc_passthru_irqmap), GFP_KERNEL);
3906}
Suresh Warrierc57875f2016-08-19 15:35:50 +10003907
3908static int kvmppc_set_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi)
3909{
3910 struct irq_desc *desc;
3911 struct kvmppc_irq_map *irq_map;
3912 struct kvmppc_passthru_irqmap *pimap;
3913 struct irq_chip *chip;
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +10003914 int i, rc = 0;
Suresh Warrierc57875f2016-08-19 15:35:50 +10003915
Suresh Warrier644abbb2016-08-19 15:35:54 +10003916 if (!kvm_irq_bypass)
3917 return 1;
3918
Suresh Warrierc57875f2016-08-19 15:35:50 +10003919 desc = irq_to_desc(host_irq);
3920 if (!desc)
3921 return -EIO;
3922
3923 mutex_lock(&kvm->lock);
3924
3925 pimap = kvm->arch.pimap;
3926 if (pimap == NULL) {
3927 /* First call, allocate structure to hold IRQ map */
3928 pimap = kvmppc_alloc_pimap();
3929 if (pimap == NULL) {
3930 mutex_unlock(&kvm->lock);
3931 return -ENOMEM;
3932 }
3933 kvm->arch.pimap = pimap;
3934 }
3935
3936 /*
3937 * For now, we only support interrupts for which the EOI operation
3938 * is an OPAL call followed by a write to XIRR, since that's
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +10003939 * what our real-mode EOI code does, or a XIVE interrupt
Suresh Warrierc57875f2016-08-19 15:35:50 +10003940 */
3941 chip = irq_data_get_irq_chip(&desc->irq_data);
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +10003942 if (!chip || !(is_pnv_opal_msi(chip) || is_xive_irq(chip))) {
Suresh Warrierc57875f2016-08-19 15:35:50 +10003943 pr_warn("kvmppc_set_passthru_irq_hv: Could not assign IRQ map for (%d,%d)\n",
3944 host_irq, guest_gsi);
3945 mutex_unlock(&kvm->lock);
3946 return -ENOENT;
3947 }
3948
3949 /*
3950 * See if we already have an entry for this guest IRQ number.
3951 * If it's mapped to a hardware IRQ number, that's an error,
3952 * otherwise re-use this entry.
3953 */
3954 for (i = 0; i < pimap->n_mapped; i++) {
3955 if (guest_gsi == pimap->mapped[i].v_hwirq) {
3956 if (pimap->mapped[i].r_hwirq) {
3957 mutex_unlock(&kvm->lock);
3958 return -EINVAL;
3959 }
3960 break;
3961 }
3962 }
3963
3964 if (i == KVMPPC_PIRQ_MAPPED) {
3965 mutex_unlock(&kvm->lock);
3966 return -EAGAIN; /* table is full */
3967 }
3968
3969 irq_map = &pimap->mapped[i];
3970
3971 irq_map->v_hwirq = guest_gsi;
Suresh Warrierc57875f2016-08-19 15:35:50 +10003972 irq_map->desc = desc;
3973
Suresh Warriere3c13e52016-08-19 15:35:51 +10003974 /*
3975 * Order the above two stores before the next to serialize with
3976 * the KVM real mode handler.
3977 */
3978 smp_wmb();
3979 irq_map->r_hwirq = desc->irq_data.hwirq;
3980
Suresh Warrierc57875f2016-08-19 15:35:50 +10003981 if (i == pimap->n_mapped)
3982 pimap->n_mapped++;
3983
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +10003984 if (xive_enabled())
3985 rc = kvmppc_xive_set_mapped(kvm, guest_gsi, desc);
3986 else
3987 kvmppc_xics_set_mapped(kvm, guest_gsi, desc->irq_data.hwirq);
3988 if (rc)
3989 irq_map->r_hwirq = 0;
Paul Mackerras5d375192016-08-19 15:35:56 +10003990
Suresh Warrierc57875f2016-08-19 15:35:50 +10003991 mutex_unlock(&kvm->lock);
3992
3993 return 0;
3994}
3995
3996static int kvmppc_clr_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi)
3997{
3998 struct irq_desc *desc;
3999 struct kvmppc_passthru_irqmap *pimap;
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +10004000 int i, rc = 0;
Suresh Warrierc57875f2016-08-19 15:35:50 +10004001
Suresh Warrier644abbb2016-08-19 15:35:54 +10004002 if (!kvm_irq_bypass)
4003 return 0;
4004
Suresh Warrierc57875f2016-08-19 15:35:50 +10004005 desc = irq_to_desc(host_irq);
4006 if (!desc)
4007 return -EIO;
4008
4009 mutex_lock(&kvm->lock);
Markus Elfringa1c52e12017-01-20 11:00:08 +01004010 if (!kvm->arch.pimap)
4011 goto unlock;
Suresh Warrierc57875f2016-08-19 15:35:50 +10004012
Suresh Warrierc57875f2016-08-19 15:35:50 +10004013 pimap = kvm->arch.pimap;
4014
4015 for (i = 0; i < pimap->n_mapped; i++) {
4016 if (guest_gsi == pimap->mapped[i].v_hwirq)
4017 break;
4018 }
4019
4020 if (i == pimap->n_mapped) {
4021 mutex_unlock(&kvm->lock);
4022 return -ENODEV;
4023 }
4024
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +10004025 if (xive_enabled())
4026 rc = kvmppc_xive_clr_mapped(kvm, guest_gsi, pimap->mapped[i].desc);
4027 else
4028 kvmppc_xics_clr_mapped(kvm, guest_gsi, pimap->mapped[i].r_hwirq);
Paul Mackerras5d375192016-08-19 15:35:56 +10004029
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +10004030 /* invalidate the entry (what do do on error from the above ?) */
Suresh Warrierc57875f2016-08-19 15:35:50 +10004031 pimap->mapped[i].r_hwirq = 0;
4032
4033 /*
4034 * We don't free this structure even when the count goes to
4035 * zero. The structure is freed when we destroy the VM.
4036 */
Markus Elfringa1c52e12017-01-20 11:00:08 +01004037 unlock:
Suresh Warrierc57875f2016-08-19 15:35:50 +10004038 mutex_unlock(&kvm->lock);
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +10004039 return rc;
Suresh Warrierc57875f2016-08-19 15:35:50 +10004040}
4041
4042static int kvmppc_irq_bypass_add_producer_hv(struct irq_bypass_consumer *cons,
4043 struct irq_bypass_producer *prod)
4044{
4045 int ret = 0;
4046 struct kvm_kernel_irqfd *irqfd =
4047 container_of(cons, struct kvm_kernel_irqfd, consumer);
4048
4049 irqfd->producer = prod;
4050
4051 ret = kvmppc_set_passthru_irq(irqfd->kvm, prod->irq, irqfd->gsi);
4052 if (ret)
4053 pr_info("kvmppc_set_passthru_irq (irq %d, gsi %d) fails: %d\n",
4054 prod->irq, irqfd->gsi, ret);
4055
4056 return ret;
4057}
4058
4059static void kvmppc_irq_bypass_del_producer_hv(struct irq_bypass_consumer *cons,
4060 struct irq_bypass_producer *prod)
4061{
4062 int ret;
4063 struct kvm_kernel_irqfd *irqfd =
4064 container_of(cons, struct kvm_kernel_irqfd, consumer);
4065
4066 irqfd->producer = NULL;
4067
4068 /*
4069 * When producer of consumer is unregistered, we change back to
4070 * default external interrupt handling mode - KVM real mode
4071 * will switch back to host.
4072 */
4073 ret = kvmppc_clr_passthru_irq(irqfd->kvm, prod->irq, irqfd->gsi);
4074 if (ret)
4075 pr_warn("kvmppc_clr_passthru_irq (irq %d, gsi %d) fails: %d\n",
4076 prod->irq, irqfd->gsi, ret);
4077}
Suresh Warrier8daaafc2016-08-19 15:35:48 +10004078#endif
4079
Aneesh Kumar K.V3a167bea2013-10-07 22:17:53 +05304080static long kvm_arch_vm_ioctl_hv(struct file *filp,
4081 unsigned int ioctl, unsigned long arg)
4082{
4083 struct kvm *kvm __maybe_unused = filp->private_data;
4084 void __user *argp = (void __user *)arg;
4085 long r;
4086
4087 switch (ioctl) {
4088
Aneesh Kumar K.V3a167bea2013-10-07 22:17:53 +05304089 case KVM_PPC_ALLOCATE_HTAB: {
4090 u32 htab_order;
4091
4092 r = -EFAULT;
4093 if (get_user(htab_order, (u32 __user *)argp))
4094 break;
David Gibsonf98a8bf2016-12-20 16:49:03 +11004095 r = kvmppc_alloc_reset_hpt(kvm, htab_order);
Aneesh Kumar K.V3a167bea2013-10-07 22:17:53 +05304096 if (r)
4097 break;
Aneesh Kumar K.V3a167bea2013-10-07 22:17:53 +05304098 r = 0;
4099 break;
4100 }
4101
4102 case KVM_PPC_GET_HTAB_FD: {
4103 struct kvm_get_htab_fd ghf;
4104
4105 r = -EFAULT;
4106 if (copy_from_user(&ghf, argp, sizeof(ghf)))
4107 break;
4108 r = kvm_vm_ioctl_get_htab_fd(kvm, &ghf);
4109 break;
4110 }
4111
David Gibson5e985962016-12-20 16:49:05 +11004112 case KVM_PPC_RESIZE_HPT_PREPARE: {
4113 struct kvm_ppc_resize_hpt rhpt;
4114
4115 r = -EFAULT;
4116 if (copy_from_user(&rhpt, argp, sizeof(rhpt)))
4117 break;
4118
4119 r = kvm_vm_ioctl_resize_hpt_prepare(kvm, &rhpt);
4120 break;
4121 }
4122
4123 case KVM_PPC_RESIZE_HPT_COMMIT: {
4124 struct kvm_ppc_resize_hpt rhpt;
4125
4126 r = -EFAULT;
4127 if (copy_from_user(&rhpt, argp, sizeof(rhpt)))
4128 break;
4129
4130 r = kvm_vm_ioctl_resize_hpt_commit(kvm, &rhpt);
4131 break;
4132 }
4133
Aneesh Kumar K.V3a167bea2013-10-07 22:17:53 +05304134 default:
4135 r = -ENOTTY;
4136 }
4137
4138 return r;
4139}
4140
Paul Mackerras699a0ea2014-06-02 11:02:59 +10004141/*
4142 * List of hcall numbers to enable by default.
4143 * For compatibility with old userspace, we enable by default
4144 * all hcalls that were implemented before the hcall-enabling
4145 * facility was added. Note this list should not include H_RTAS.
4146 */
4147static unsigned int default_hcall_list[] = {
4148 H_REMOVE,
4149 H_ENTER,
4150 H_READ,
4151 H_PROTECT,
4152 H_BULK_REMOVE,
4153 H_GET_TCE,
4154 H_PUT_TCE,
4155 H_SET_DABR,
4156 H_SET_XDABR,
4157 H_CEDE,
4158 H_PROD,
4159 H_CONFER,
4160 H_REGISTER_VPA,
4161#ifdef CONFIG_KVM_XICS
4162 H_EOI,
4163 H_CPPR,
4164 H_IPI,
4165 H_IPOLL,
4166 H_XIRR,
4167 H_XIRR_X,
4168#endif
4169 0
4170};
4171
4172static void init_default_hcalls(void)
4173{
4174 int i;
Paul Mackerrasae2113a2014-06-02 11:03:00 +10004175 unsigned int hcall;
Paul Mackerras699a0ea2014-06-02 11:02:59 +10004176
Paul Mackerrasae2113a2014-06-02 11:03:00 +10004177 for (i = 0; default_hcall_list[i]; ++i) {
4178 hcall = default_hcall_list[i];
4179 WARN_ON(!kvmppc_hcall_impl_hv(hcall));
4180 __set_bit(hcall / 4, default_enabled_hcalls);
4181 }
Paul Mackerras699a0ea2014-06-02 11:02:59 +10004182}
4183
Paul Mackerrasc9270132017-01-30 21:21:41 +11004184static int kvmhv_configure_mmu(struct kvm *kvm, struct kvm_ppc_mmuv3_cfg *cfg)
4185{
Paul Mackerras468808b2017-01-30 21:21:42 +11004186 unsigned long lpcr;
Paul Mackerras8cf4ecc2017-01-30 21:21:53 +11004187 int radix;
Paul Mackerras468808b2017-01-30 21:21:42 +11004188
4189 /* If not on a POWER9, reject it */
4190 if (!cpu_has_feature(CPU_FTR_ARCH_300))
4191 return -ENODEV;
4192
4193 /* If any unknown flags set, reject it */
4194 if (cfg->flags & ~(KVM_PPC_MMUV3_RADIX | KVM_PPC_MMUV3_GTSE))
4195 return -EINVAL;
4196
Paul Mackerras8cf4ecc2017-01-30 21:21:53 +11004197 /* We can't change a guest to/from radix yet */
4198 radix = !!(cfg->flags & KVM_PPC_MMUV3_RADIX);
4199 if (radix != kvm_is_radix(kvm))
Paul Mackerras468808b2017-01-30 21:21:42 +11004200 return -EINVAL;
4201
4202 /* GR (guest radix) bit in process_table field must match */
Paul Mackerras8cf4ecc2017-01-30 21:21:53 +11004203 if (!!(cfg->process_table & PATB_GR) != radix)
Paul Mackerras468808b2017-01-30 21:21:42 +11004204 return -EINVAL;
4205
4206 /* Process table size field must be reasonable, i.e. <= 24 */
4207 if ((cfg->process_table & PRTS_MASK) > 24)
4208 return -EINVAL;
4209
Paul Mackerrascf5f6f32017-09-11 16:05:30 +10004210 mutex_lock(&kvm->lock);
Paul Mackerras468808b2017-01-30 21:21:42 +11004211 kvm->arch.process_table = cfg->process_table;
4212 kvmppc_setup_partition_table(kvm);
4213
4214 lpcr = (cfg->flags & KVM_PPC_MMUV3_GTSE) ? LPCR_GTSE : 0;
4215 kvmppc_update_lpcr(kvm, lpcr, LPCR_GTSE);
Paul Mackerrascf5f6f32017-09-11 16:05:30 +10004216 mutex_unlock(&kvm->lock);
Paul Mackerras468808b2017-01-30 21:21:42 +11004217
4218 return 0;
Paul Mackerrasc9270132017-01-30 21:21:41 +11004219}
4220
Aneesh Kumar K.Vcbbc58d2013-10-07 22:18:01 +05304221static struct kvmppc_ops kvm_ops_hv = {
Aneesh Kumar K.V3a167bea2013-10-07 22:17:53 +05304222 .get_sregs = kvm_arch_vcpu_ioctl_get_sregs_hv,
4223 .set_sregs = kvm_arch_vcpu_ioctl_set_sregs_hv,
4224 .get_one_reg = kvmppc_get_one_reg_hv,
4225 .set_one_reg = kvmppc_set_one_reg_hv,
4226 .vcpu_load = kvmppc_core_vcpu_load_hv,
4227 .vcpu_put = kvmppc_core_vcpu_put_hv,
4228 .set_msr = kvmppc_set_msr_hv,
4229 .vcpu_run = kvmppc_vcpu_run_hv,
4230 .vcpu_create = kvmppc_core_vcpu_create_hv,
4231 .vcpu_free = kvmppc_core_vcpu_free_hv,
4232 .check_requests = kvmppc_core_check_requests_hv,
4233 .get_dirty_log = kvm_vm_ioctl_get_dirty_log_hv,
4234 .flush_memslot = kvmppc_core_flush_memslot_hv,
4235 .prepare_memory_region = kvmppc_core_prepare_memory_region_hv,
4236 .commit_memory_region = kvmppc_core_commit_memory_region_hv,
4237 .unmap_hva = kvm_unmap_hva_hv,
4238 .unmap_hva_range = kvm_unmap_hva_range_hv,
4239 .age_hva = kvm_age_hva_hv,
4240 .test_age_hva = kvm_test_age_hva_hv,
4241 .set_spte_hva = kvm_set_spte_hva_hv,
4242 .mmu_destroy = kvmppc_mmu_destroy_hv,
4243 .free_memslot = kvmppc_core_free_memslot_hv,
4244 .create_memslot = kvmppc_core_create_memslot_hv,
4245 .init_vm = kvmppc_core_init_vm_hv,
4246 .destroy_vm = kvmppc_core_destroy_vm_hv,
Aneesh Kumar K.V3a167bea2013-10-07 22:17:53 +05304247 .get_smmu_info = kvm_vm_ioctl_get_smmu_info_hv,
4248 .emulate_op = kvmppc_core_emulate_op_hv,
4249 .emulate_mtspr = kvmppc_core_emulate_mtspr_hv,
4250 .emulate_mfspr = kvmppc_core_emulate_mfspr_hv,
4251 .fast_vcpu_kick = kvmppc_fast_vcpu_kick_hv,
4252 .arch_vm_ioctl = kvm_arch_vm_ioctl_hv,
Paul Mackerrasae2113a2014-06-02 11:03:00 +10004253 .hcall_implemented = kvmppc_hcall_impl_hv,
Suresh Warrierc57875f2016-08-19 15:35:50 +10004254#ifdef CONFIG_KVM_XICS
4255 .irq_bypass_add_producer = kvmppc_irq_bypass_add_producer_hv,
4256 .irq_bypass_del_producer = kvmppc_irq_bypass_del_producer_hv,
4257#endif
Paul Mackerrasc9270132017-01-30 21:21:41 +11004258 .configure_mmu = kvmhv_configure_mmu,
4259 .get_rmmu_info = kvmhv_get_rmmu_info,
Paul Mackerras3c313522017-02-06 13:24:41 +11004260 .set_smt_mode = kvmhv_set_smt_mode,
Aneesh Kumar K.V3a167bea2013-10-07 22:17:53 +05304261};
4262
Mahesh Salgaonkarfd7bacb2016-05-15 09:44:26 +05304263static int kvm_init_subcore_bitmap(void)
4264{
4265 int i, j;
4266 int nr_cores = cpu_nr_cores();
4267 struct sibling_subcore_state *sibling_subcore_state;
4268
4269 for (i = 0; i < nr_cores; i++) {
4270 int first_cpu = i * threads_per_core;
4271 int node = cpu_to_node(first_cpu);
4272
4273 /* Ignore if it is already allocated. */
4274 if (paca[first_cpu].sibling_subcore_state)
4275 continue;
4276
4277 sibling_subcore_state =
4278 kmalloc_node(sizeof(struct sibling_subcore_state),
4279 GFP_KERNEL, node);
4280 if (!sibling_subcore_state)
4281 return -ENOMEM;
4282
4283 memset(sibling_subcore_state, 0,
4284 sizeof(struct sibling_subcore_state));
4285
4286 for (j = 0; j < threads_per_core; j++) {
4287 int cpu = first_cpu + j;
4288
4289 paca[cpu].sibling_subcore_state = sibling_subcore_state;
4290 }
4291 }
4292 return 0;
4293}
4294
Paul Mackerras5a319352017-01-30 21:21:46 +11004295static int kvmppc_radix_possible(void)
4296{
4297 return cpu_has_feature(CPU_FTR_ARCH_300) && radix_enabled();
4298}
4299
Aneesh Kumar K.V3a167bea2013-10-07 22:17:53 +05304300static int kvmppc_book3s_init_hv(void)
Paul Mackerrasde56a942011-06-29 00:21:34 +00004301{
4302 int r;
Aneesh Kumar K.Vcbbc58d2013-10-07 22:18:01 +05304303 /*
4304 * FIXME!! Do we need to check on all cpus ?
4305 */
4306 r = kvmppc_core_check_processor_compat_hv();
4307 if (r < 0)
Paul Mackerras739e2422014-03-25 10:47:05 +11004308 return -ENODEV;
Paul Mackerrasde56a942011-06-29 00:21:34 +00004309
Mahesh Salgaonkarfd7bacb2016-05-15 09:44:26 +05304310 r = kvm_init_subcore_bitmap();
4311 if (r)
4312 return r;
4313
Paul Mackerrasf7257582016-11-18 09:02:08 +11004314 /*
4315 * We need a way of accessing the XICS interrupt controller,
4316 * either directly, via paca[cpu].kvm_hstate.xics_phys, or
4317 * indirectly, via OPAL.
4318 */
4319#ifdef CONFIG_SMP
Paul Mackerrasfb7dcf72017-04-28 08:23:16 +10004320 if (!xive_enabled() && !local_paca->kvm_hstate.xics_phys) {
Paul Mackerrasf7257582016-11-18 09:02:08 +11004321 struct device_node *np;
4322
4323 np = of_find_compatible_node(NULL, NULL, "ibm,opal-intc");
4324 if (!np) {
4325 pr_err("KVM-HV: Cannot determine method for accessing XICS\n");
4326 return -ENODEV;
4327 }
4328 }
4329#endif
4330
Aneesh Kumar K.Vcbbc58d2013-10-07 22:18:01 +05304331 kvm_ops_hv.owner = THIS_MODULE;
4332 kvmppc_hv_ops = &kvm_ops_hv;
Paul Mackerrasde56a942011-06-29 00:21:34 +00004333
Paul Mackerras699a0ea2014-06-02 11:02:59 +10004334 init_default_hcalls();
4335
Paul Mackerrasec257162015-06-24 21:18:03 +10004336 init_vcore_lists();
4337
Aneesh Kumar K.Vcbbc58d2013-10-07 22:18:01 +05304338 r = kvmppc_mmu_hv_init();
Paul Mackerras5a319352017-01-30 21:21:46 +11004339 if (r)
4340 return r;
4341
4342 if (kvmppc_radix_possible())
4343 r = kvmppc_radix_init();
Paul Mackerrasde56a942011-06-29 00:21:34 +00004344 return r;
4345}
4346
Aneesh Kumar K.V3a167bea2013-10-07 22:17:53 +05304347static void kvmppc_book3s_exit_hv(void)
Paul Mackerrasde56a942011-06-29 00:21:34 +00004348{
Suresh Warrier79b6c242015-12-17 14:59:06 -06004349 kvmppc_free_host_rm_ops();
Paul Mackerras5a319352017-01-30 21:21:46 +11004350 if (kvmppc_radix_possible())
4351 kvmppc_radix_exit();
Aneesh Kumar K.Vcbbc58d2013-10-07 22:18:01 +05304352 kvmppc_hv_ops = NULL;
Paul Mackerrasde56a942011-06-29 00:21:34 +00004353}
4354
Aneesh Kumar K.V3a167bea2013-10-07 22:17:53 +05304355module_init(kvmppc_book3s_init_hv);
4356module_exit(kvmppc_book3s_exit_hv);
Aneesh Kumar K.V2ba9f0d2013-10-07 22:17:59 +05304357MODULE_LICENSE("GPL");
Alexander Graf398a76c2013-12-09 13:53:42 +01004358MODULE_ALIAS_MISCDEV(KVM_MINOR);
4359MODULE_ALIAS("devname:kvm");