blob: 9d373f8963ee98a9977f4a796f200c6343b5d67b [file] [log] [blame]
Paul Mackerras8e3f5fc2018-10-08 16:31:03 +11001// SPDX-License-Identifier: GPL-2.0
2/*
3 * Copyright IBM Corporation, 2018
4 * Authors Suraj Jitindar Singh <sjitindarsingh@gmail.com>
5 * Paul Mackerras <paulus@ozlabs.org>
6 *
7 * Description: KVM functions specific to running nested KVM-HV guests
8 * on Book3S processors (specifically POWER9 and later).
9 */
10
11#include <linux/kernel.h>
12#include <linux/kvm_host.h>
Suraj Jitindar Singh8cf531e2018-10-08 16:31:08 +110013#include <linux/llist.h>
Mike Rapoport65fddcf2020-06-08 21:32:42 -070014#include <linux/pgtable.h>
Paul Mackerras8e3f5fc2018-10-08 16:31:03 +110015
16#include <asm/kvm_ppc.h>
Suraj Jitindar Singhfd10be22018-10-08 16:31:07 +110017#include <asm/kvm_book3s.h>
Paul Mackerras8e3f5fc2018-10-08 16:31:03 +110018#include <asm/mmu.h>
Paul Mackerras8e3f5fc2018-10-08 16:31:03 +110019#include <asm/pgalloc.h>
Suraj Jitindar Singhfd10be22018-10-08 16:31:07 +110020#include <asm/pte-walk.h>
21#include <asm/reg.h>
Bharata B Rao81468082021-06-21 14:20:03 +053022#include <asm/plpar_wrappers.h>
Paul Mackerras8e3f5fc2018-10-08 16:31:03 +110023
24static struct patb_entry *pseries_partition_tb;
25
26static void kvmhv_update_ptbl_cache(struct kvm_nested_guest *gp);
Suraj Jitindar Singh8cf531e2018-10-08 16:31:08 +110027static void kvmhv_free_memslot_nest_rmap(struct kvm_memory_slot *free);
Paul Mackerras8e3f5fc2018-10-08 16:31:03 +110028
Paul Mackerras360cae32018-10-08 16:31:04 +110029void kvmhv_save_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr)
30{
31 struct kvmppc_vcore *vc = vcpu->arch.vcore;
32
Jordan Niethe13c7bb32019-09-17 10:46:05 +100033 hr->pcr = vc->pcr | PCR_MASK;
Paul Mackerras360cae32018-10-08 16:31:04 +110034 hr->dpdes = vc->dpdes;
35 hr->hfscr = vcpu->arch.hfscr;
36 hr->tb_offset = vc->tb_offset;
Ravi Bangoria122954ed72020-12-16 16:12:17 +053037 hr->dawr0 = vcpu->arch.dawr0;
38 hr->dawrx0 = vcpu->arch.dawrx0;
Paul Mackerras360cae32018-10-08 16:31:04 +110039 hr->ciabr = vcpu->arch.ciabr;
40 hr->purr = vcpu->arch.purr;
41 hr->spurr = vcpu->arch.spurr;
42 hr->ic = vcpu->arch.ic;
43 hr->vtb = vc->vtb;
44 hr->srr0 = vcpu->arch.shregs.srr0;
45 hr->srr1 = vcpu->arch.shregs.srr1;
46 hr->sprg[0] = vcpu->arch.shregs.sprg0;
47 hr->sprg[1] = vcpu->arch.shregs.sprg1;
48 hr->sprg[2] = vcpu->arch.shregs.sprg2;
49 hr->sprg[3] = vcpu->arch.shregs.sprg3;
50 hr->pidr = vcpu->arch.pid;
51 hr->cfar = vcpu->arch.cfar;
52 hr->ppr = vcpu->arch.ppr;
Ravi Bangoriabd1de1a2020-12-16 16:12:18 +053053 hr->dawr1 = vcpu->arch.dawr1;
54 hr->dawrx1 = vcpu->arch.dawrx1;
Paul Mackerras360cae32018-10-08 16:31:04 +110055}
56
Nathan Chancellor51696f32021-06-21 11:24:40 -070057/* Use noinline_for_stack due to https://bugs.llvm.org/show_bug.cgi?id=49610 */
58static noinline_for_stack void byteswap_pt_regs(struct pt_regs *regs)
Suraj Jitindar Singh10b50222018-10-08 16:31:15 +110059{
60 unsigned long *addr = (unsigned long *) regs;
61
62 for (; addr < ((unsigned long *) (regs + 1)); addr++)
63 *addr = swab64(*addr);
64}
65
66static void byteswap_hv_regs(struct hv_guest_state *hr)
67{
68 hr->version = swab64(hr->version);
69 hr->lpid = swab32(hr->lpid);
70 hr->vcpu_token = swab32(hr->vcpu_token);
71 hr->lpcr = swab64(hr->lpcr);
Jordan Niethe13c7bb32019-09-17 10:46:05 +100072 hr->pcr = swab64(hr->pcr) | PCR_MASK;
Suraj Jitindar Singh10b50222018-10-08 16:31:15 +110073 hr->amor = swab64(hr->amor);
74 hr->dpdes = swab64(hr->dpdes);
75 hr->hfscr = swab64(hr->hfscr);
76 hr->tb_offset = swab64(hr->tb_offset);
77 hr->dawr0 = swab64(hr->dawr0);
78 hr->dawrx0 = swab64(hr->dawrx0);
79 hr->ciabr = swab64(hr->ciabr);
80 hr->hdec_expiry = swab64(hr->hdec_expiry);
81 hr->purr = swab64(hr->purr);
82 hr->spurr = swab64(hr->spurr);
83 hr->ic = swab64(hr->ic);
84 hr->vtb = swab64(hr->vtb);
85 hr->hdar = swab64(hr->hdar);
86 hr->hdsisr = swab64(hr->hdsisr);
87 hr->heir = swab64(hr->heir);
88 hr->asdr = swab64(hr->asdr);
89 hr->srr0 = swab64(hr->srr0);
90 hr->srr1 = swab64(hr->srr1);
91 hr->sprg[0] = swab64(hr->sprg[0]);
92 hr->sprg[1] = swab64(hr->sprg[1]);
93 hr->sprg[2] = swab64(hr->sprg[2]);
94 hr->sprg[3] = swab64(hr->sprg[3]);
95 hr->pidr = swab64(hr->pidr);
96 hr->cfar = swab64(hr->cfar);
97 hr->ppr = swab64(hr->ppr);
Ravi Bangoriabd1de1a2020-12-16 16:12:18 +053098 hr->dawr1 = swab64(hr->dawr1);
99 hr->dawrx1 = swab64(hr->dawrx1);
Suraj Jitindar Singh10b50222018-10-08 16:31:15 +1100100}
101
Fabiano Rosasf2e29db2021-08-12 02:00:42 +1000102static void save_hv_return_state(struct kvm_vcpu *vcpu,
Paul Mackerras360cae32018-10-08 16:31:04 +1100103 struct hv_guest_state *hr)
104{
105 struct kvmppc_vcore *vc = vcpu->arch.vcore;
106
107 hr->dpdes = vc->dpdes;
Paul Mackerras360cae32018-10-08 16:31:04 +1100108 hr->purr = vcpu->arch.purr;
109 hr->spurr = vcpu->arch.spurr;
110 hr->ic = vcpu->arch.ic;
111 hr->vtb = vc->vtb;
112 hr->srr0 = vcpu->arch.shregs.srr0;
113 hr->srr1 = vcpu->arch.shregs.srr1;
114 hr->sprg[0] = vcpu->arch.shregs.sprg0;
115 hr->sprg[1] = vcpu->arch.shregs.sprg1;
116 hr->sprg[2] = vcpu->arch.shregs.sprg2;
117 hr->sprg[3] = vcpu->arch.shregs.sprg3;
118 hr->pidr = vcpu->arch.pid;
119 hr->cfar = vcpu->arch.cfar;
120 hr->ppr = vcpu->arch.ppr;
Fabiano Rosasf2e29db2021-08-12 02:00:42 +1000121 switch (vcpu->arch.trap) {
Paul Mackerras360cae32018-10-08 16:31:04 +1100122 case BOOK3S_INTERRUPT_H_DATA_STORAGE:
123 hr->hdar = vcpu->arch.fault_dar;
124 hr->hdsisr = vcpu->arch.fault_dsisr;
125 hr->asdr = vcpu->arch.fault_gpa;
126 break;
127 case BOOK3S_INTERRUPT_H_INST_STORAGE:
128 hr->asdr = vcpu->arch.fault_gpa;
129 break;
Fabiano Rosas7487cab2021-08-12 02:00:39 +1000130 case BOOK3S_INTERRUPT_H_FAC_UNAVAIL:
131 hr->hfscr = ((~HFSCR_INTR_CAUSE & hr->hfscr) |
132 (HFSCR_INTR_CAUSE & vcpu->arch.hfscr));
133 break;
Paul Mackerras360cae32018-10-08 16:31:04 +1100134 case BOOK3S_INTERRUPT_H_EMUL_ASSIST:
135 hr->heir = vcpu->arch.emul_inst;
136 break;
137 }
138}
139
Fabiano Rosas7487cab2021-08-12 02:00:39 +1000140static void restore_hv_regs(struct kvm_vcpu *vcpu, const struct hv_guest_state *hr)
Paul Mackerras360cae32018-10-08 16:31:04 +1100141{
142 struct kvmppc_vcore *vc = vcpu->arch.vcore;
143
Jordan Niethe13c7bb32019-09-17 10:46:05 +1000144 vc->pcr = hr->pcr | PCR_MASK;
Paul Mackerras360cae32018-10-08 16:31:04 +1100145 vc->dpdes = hr->dpdes;
146 vcpu->arch.hfscr = hr->hfscr;
Ravi Bangoria122954ed72020-12-16 16:12:17 +0530147 vcpu->arch.dawr0 = hr->dawr0;
148 vcpu->arch.dawrx0 = hr->dawrx0;
Paul Mackerras360cae32018-10-08 16:31:04 +1100149 vcpu->arch.ciabr = hr->ciabr;
150 vcpu->arch.purr = hr->purr;
151 vcpu->arch.spurr = hr->spurr;
152 vcpu->arch.ic = hr->ic;
153 vc->vtb = hr->vtb;
154 vcpu->arch.shregs.srr0 = hr->srr0;
155 vcpu->arch.shregs.srr1 = hr->srr1;
156 vcpu->arch.shregs.sprg0 = hr->sprg[0];
157 vcpu->arch.shregs.sprg1 = hr->sprg[1];
158 vcpu->arch.shregs.sprg2 = hr->sprg[2];
159 vcpu->arch.shregs.sprg3 = hr->sprg[3];
160 vcpu->arch.pid = hr->pidr;
161 vcpu->arch.cfar = hr->cfar;
162 vcpu->arch.ppr = hr->ppr;
Ravi Bangoriabd1de1a2020-12-16 16:12:18 +0530163 vcpu->arch.dawr1 = hr->dawr1;
164 vcpu->arch.dawrx1 = hr->dawrx1;
Paul Mackerras360cae32018-10-08 16:31:04 +1100165}
166
167void kvmhv_restore_hv_return_state(struct kvm_vcpu *vcpu,
168 struct hv_guest_state *hr)
169{
170 struct kvmppc_vcore *vc = vcpu->arch.vcore;
171
172 vc->dpdes = hr->dpdes;
173 vcpu->arch.hfscr = hr->hfscr;
174 vcpu->arch.purr = hr->purr;
175 vcpu->arch.spurr = hr->spurr;
176 vcpu->arch.ic = hr->ic;
177 vc->vtb = hr->vtb;
178 vcpu->arch.fault_dar = hr->hdar;
179 vcpu->arch.fault_dsisr = hr->hdsisr;
180 vcpu->arch.fault_gpa = hr->asdr;
181 vcpu->arch.emul_inst = hr->heir;
182 vcpu->arch.shregs.srr0 = hr->srr0;
183 vcpu->arch.shregs.srr1 = hr->srr1;
184 vcpu->arch.shregs.sprg0 = hr->sprg[0];
185 vcpu->arch.shregs.sprg1 = hr->sprg[1];
186 vcpu->arch.shregs.sprg2 = hr->sprg[2];
187 vcpu->arch.shregs.sprg3 = hr->sprg[3];
188 vcpu->arch.pid = hr->pidr;
189 vcpu->arch.cfar = hr->cfar;
190 vcpu->arch.ppr = hr->ppr;
191}
192
Suraj Jitindar Singh873db2c2018-12-14 16:29:08 +1100193static void kvmhv_nested_mmio_needed(struct kvm_vcpu *vcpu, u64 regs_ptr)
194{
195 /* No need to reflect the page fault to L1, we've handled it */
196 vcpu->arch.trap = 0;
197
198 /*
199 * Since the L2 gprs have already been written back into L1 memory when
200 * we complete the mmio, store the L1 memory location of the L2 gpr
201 * being loaded into by the mmio so that the loaded value can be
202 * written there in kvmppc_complete_mmio_load()
203 */
204 if (((vcpu->arch.io_gpr & KVM_MMIO_REG_EXT_MASK) == KVM_MMIO_REG_GPR)
205 && (vcpu->mmio_is_write == 0)) {
206 vcpu->arch.nested_io_gpr = (gpa_t) regs_ptr +
207 offsetof(struct pt_regs,
208 gpr[vcpu->arch.io_gpr]);
209 vcpu->arch.io_gpr = KVM_MMIO_REG_NESTED_GPR;
210 }
211}
212
Ravi Bangoriaafe75042020-12-16 16:12:16 +0530213static int kvmhv_read_guest_state_and_regs(struct kvm_vcpu *vcpu,
214 struct hv_guest_state *l2_hv,
215 struct pt_regs *l2_regs,
216 u64 hv_ptr, u64 regs_ptr)
217{
218 int size;
219
220 if (kvm_vcpu_read_guest(vcpu, hv_ptr, &l2_hv->version,
221 sizeof(l2_hv->version)))
222 return -1;
223
224 if (kvmppc_need_byteswap(vcpu))
225 l2_hv->version = swab64(l2_hv->version);
226
227 size = hv_guest_state_size(l2_hv->version);
228 if (size < 0)
229 return -1;
230
231 return kvm_vcpu_read_guest(vcpu, hv_ptr, l2_hv, size) ||
232 kvm_vcpu_read_guest(vcpu, regs_ptr, l2_regs,
233 sizeof(struct pt_regs));
234}
235
236static int kvmhv_write_guest_state_and_regs(struct kvm_vcpu *vcpu,
237 struct hv_guest_state *l2_hv,
238 struct pt_regs *l2_regs,
239 u64 hv_ptr, u64 regs_ptr)
240{
241 int size;
242
243 size = hv_guest_state_size(l2_hv->version);
244 if (size < 0)
245 return -1;
246
247 return kvm_vcpu_write_guest(vcpu, hv_ptr, l2_hv, size) ||
248 kvm_vcpu_write_guest(vcpu, regs_ptr, l2_regs,
249 sizeof(struct pt_regs));
250}
251
Fabiano Rosas7487cab2021-08-12 02:00:39 +1000252static void load_l2_hv_regs(struct kvm_vcpu *vcpu,
253 const struct hv_guest_state *l2_hv,
254 const struct hv_guest_state *l1_hv, u64 *lpcr)
255{
256 struct kvmppc_vcore *vc = vcpu->arch.vcore;
257 u64 mask;
258
259 restore_hv_regs(vcpu, l2_hv);
260
261 /*
262 * Don't let L1 change LPCR bits for the L2 except these:
263 */
264 mask = LPCR_DPFD | LPCR_ILE | LPCR_TC | LPCR_AIL | LPCR_LD |
265 LPCR_LPES | LPCR_MER;
266
267 /*
268 * Additional filtering is required depending on hardware
269 * and configuration.
270 */
271 *lpcr = kvmppc_filter_lpcr_hv(vcpu->kvm,
272 (vc->lpcr & ~mask) | (*lpcr & mask));
273
274 /*
Nicholas Piggin8b210a82021-08-12 02:00:40 +1000275 * Don't let L1 enable features for L2 which we don't allow for L1,
Fabiano Rosas7487cab2021-08-12 02:00:39 +1000276 * but preserve the interrupt cause field.
277 */
Nicholas Piggin8b210a82021-08-12 02:00:40 +1000278 vcpu->arch.hfscr = l2_hv->hfscr & (HFSCR_INTR_CAUSE | vcpu->arch.hfscr_permitted);
Fabiano Rosas7487cab2021-08-12 02:00:39 +1000279
280 /* Don't let data address watchpoint match in hypervisor state */
281 vcpu->arch.dawrx0 = l2_hv->dawrx0 & ~DAWRX_HYP;
282 vcpu->arch.dawrx1 = l2_hv->dawrx1 & ~DAWRX_HYP;
283
284 /* Don't let completed instruction address breakpt match in HV state */
285 if ((l2_hv->ciabr & CIABR_PRIV) == CIABR_PRIV_HYPER)
286 vcpu->arch.ciabr = l2_hv->ciabr & ~CIABR_PRIV;
287}
288
Paul Mackerras360cae32018-10-08 16:31:04 +1100289long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
290{
291 long int err, r;
292 struct kvm_nested_guest *l2;
293 struct pt_regs l2_regs, saved_l1_regs;
Ravi Bangoriaafe75042020-12-16 16:12:16 +0530294 struct hv_guest_state l2_hv = {0}, saved_l1_hv;
Paul Mackerras360cae32018-10-08 16:31:04 +1100295 struct kvmppc_vcore *vc = vcpu->arch.vcore;
296 u64 hv_ptr, regs_ptr;
Fabiano Rosas7487cab2021-08-12 02:00:39 +1000297 u64 hdec_exp, lpcr;
Paul Mackerras360cae32018-10-08 16:31:04 +1100298 s64 delta_purr, delta_spurr, delta_ic, delta_vtb;
Paul Mackerras360cae32018-10-08 16:31:04 +1100299
300 if (vcpu->kvm->arch.l1_ptcr == 0)
301 return H_NOT_AVAILABLE;
302
Nicholas Piggind9c57d32021-07-08 21:26:22 +1000303 if (MSR_TM_TRANSACTIONAL(vcpu->arch.shregs.msr))
304 return H_BAD_MODE;
305
Paul Mackerras360cae32018-10-08 16:31:04 +1100306 /* copy parameters in */
307 hv_ptr = kvmppc_get_gpr(vcpu, 4);
Alexey Kardashevskiy1508c222020-06-09 12:12:29 +1000308 regs_ptr = kvmppc_get_gpr(vcpu, 5);
309 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
Ravi Bangoriaafe75042020-12-16 16:12:16 +0530310 err = kvmhv_read_guest_state_and_regs(vcpu, &l2_hv, &l2_regs,
311 hv_ptr, regs_ptr);
Alexey Kardashevskiy1508c222020-06-09 12:12:29 +1000312 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
Paul Mackerras360cae32018-10-08 16:31:04 +1100313 if (err)
314 return H_PARAMETER;
Alexey Kardashevskiy1508c222020-06-09 12:12:29 +1000315
Suraj Jitindar Singh10b50222018-10-08 16:31:15 +1100316 if (kvmppc_need_byteswap(vcpu))
317 byteswap_hv_regs(&l2_hv);
Ravi Bangoriaafe75042020-12-16 16:12:16 +0530318 if (l2_hv.version > HV_GUEST_STATE_VERSION)
Paul Mackerras360cae32018-10-08 16:31:04 +1100319 return H_P2;
320
Suraj Jitindar Singh10b50222018-10-08 16:31:15 +1100321 if (kvmppc_need_byteswap(vcpu))
322 byteswap_pt_regs(&l2_regs);
Suraj Jitindar Singh9d0b0482018-10-08 16:31:11 +1100323 if (l2_hv.vcpu_token >= NR_CPUS)
324 return H_PARAMETER;
325
Nicholas Piggind9c57d32021-07-08 21:26:22 +1000326 /*
327 * L1 must have set up a suspended state to enter the L2 in a
328 * transactional state, and only in that case. These have to be
329 * filtered out here to prevent causing a TM Bad Thing in the
330 * host HRFID. We could synthesize a TM Bad Thing back to the L1
331 * here but there doesn't seem like much point.
332 */
333 if (MSR_TM_SUSPENDED(vcpu->arch.shregs.msr)) {
334 if (!MSR_TM_ACTIVE(l2_regs.msr))
335 return H_BAD_MODE;
336 } else {
337 if (l2_regs.msr & MSR_TS_MASK)
338 return H_BAD_MODE;
339 if (WARN_ON_ONCE(vcpu->arch.shregs.msr & MSR_TS_MASK))
340 return H_BAD_MODE;
341 }
342
Paul Mackerras360cae32018-10-08 16:31:04 +1100343 /* translate lpid */
344 l2 = kvmhv_get_nested(vcpu->kvm, l2_hv.lpid, true);
345 if (!l2)
346 return H_PARAMETER;
347 if (!l2->l1_gr_to_hr) {
348 mutex_lock(&l2->tlb_lock);
349 kvmhv_update_ptbl_cache(l2);
350 mutex_unlock(&l2->tlb_lock);
351 }
352
353 /* save l1 values of things */
354 vcpu->arch.regs.msr = vcpu->arch.shregs.msr;
355 saved_l1_regs = vcpu->arch.regs;
356 kvmhv_save_hv_regs(vcpu, &saved_l1_hv);
357
358 /* convert TB values/offsets to host (L0) values */
359 hdec_exp = l2_hv.hdec_expiry - vc->tb_offset;
360 vc->tb_offset += l2_hv.tb_offset;
Nicholas Piggin3c1a43222021-11-23 19:52:00 +1000361 vcpu->arch.dec_expires += l2_hv.tb_offset;
Paul Mackerras360cae32018-10-08 16:31:04 +1100362
363 /* set L1 state to L2 state */
364 vcpu->arch.nested = l2;
365 vcpu->arch.nested_vcpu_id = l2_hv.vcpu_token;
Nicholas Piggin22f7ff02022-01-22 20:55:30 +1000366 vcpu->arch.nested_hfscr = l2_hv.hfscr;
Paul Mackerras360cae32018-10-08 16:31:04 +1100367 vcpu->arch.regs = l2_regs;
Nicholas Piggin946cf442021-04-12 11:48:44 +1000368
Nicholas Piggin732f21a2021-04-12 11:48:45 +1000369 /* Guest must always run with ME enabled, HV disabled. */
370 vcpu->arch.shregs.msr = (vcpu->arch.regs.msr | MSR_ME) & ~MSR_HV;
Nicholas Piggina19b70a2021-04-12 11:48:35 +1000371
Fabiano Rosas7487cab2021-08-12 02:00:39 +1000372 lpcr = l2_hv.lpcr;
373 load_l2_hv_regs(vcpu, &l2_hv, &saved_l1_hv, &lpcr);
Paul Mackerras360cae32018-10-08 16:31:04 +1100374
375 vcpu->arch.ret = RESUME_GUEST;
376 vcpu->arch.trap = 0;
377 do {
Fabiano Rosas7487cab2021-08-12 02:00:39 +1000378 r = kvmhv_run_single_vcpu(vcpu, hdec_exp, lpcr);
Paul Mackerras360cae32018-10-08 16:31:04 +1100379 } while (is_kvmppc_resume_guest(r));
380
381 /* save L2 state for return */
382 l2_regs = vcpu->arch.regs;
383 l2_regs.msr = vcpu->arch.shregs.msr;
384 delta_purr = vcpu->arch.purr - l2_hv.purr;
385 delta_spurr = vcpu->arch.spurr - l2_hv.spurr;
386 delta_ic = vcpu->arch.ic - l2_hv.ic;
387 delta_vtb = vc->vtb - l2_hv.vtb;
Fabiano Rosasf2e29db2021-08-12 02:00:42 +1000388 save_hv_return_state(vcpu, &l2_hv);
Paul Mackerras360cae32018-10-08 16:31:04 +1100389
390 /* restore L1 state */
391 vcpu->arch.nested = NULL;
392 vcpu->arch.regs = saved_l1_regs;
393 vcpu->arch.shregs.msr = saved_l1_regs.msr & ~MSR_TS_MASK;
394 /* set L1 MSR TS field according to L2 transaction state */
395 if (l2_regs.msr & MSR_TS_MASK)
396 vcpu->arch.shregs.msr |= MSR_TS_S;
397 vc->tb_offset = saved_l1_hv.tb_offset;
Nicholas Piggin3c1a43222021-11-23 19:52:00 +1000398 /* XXX: is this always the same delta as saved_l1_hv.tb_offset? */
399 vcpu->arch.dec_expires -= l2_hv.tb_offset;
Paul Mackerras360cae32018-10-08 16:31:04 +1100400 restore_hv_regs(vcpu, &saved_l1_hv);
401 vcpu->arch.purr += delta_purr;
402 vcpu->arch.spurr += delta_spurr;
403 vcpu->arch.ic += delta_ic;
404 vc->vtb += delta_vtb;
405
406 kvmhv_put_nested(l2);
407
408 /* copy l2_hv_state and regs back to guest */
Suraj Jitindar Singh10b50222018-10-08 16:31:15 +1100409 if (kvmppc_need_byteswap(vcpu)) {
410 byteswap_hv_regs(&l2_hv);
411 byteswap_pt_regs(&l2_regs);
412 }
Alexey Kardashevskiy1508c222020-06-09 12:12:29 +1000413 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
Ravi Bangoriaafe75042020-12-16 16:12:16 +0530414 err = kvmhv_write_guest_state_and_regs(vcpu, &l2_hv, &l2_regs,
415 hv_ptr, regs_ptr);
Alexey Kardashevskiy1508c222020-06-09 12:12:29 +1000416 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
Paul Mackerras360cae32018-10-08 16:31:04 +1100417 if (err)
418 return H_AUTHORITY;
419
420 if (r == -EINTR)
421 return H_INTERRUPT;
422
Suraj Jitindar Singh873db2c2018-12-14 16:29:08 +1100423 if (vcpu->mmio_needed) {
424 kvmhv_nested_mmio_needed(vcpu, regs_ptr);
425 return H_TOO_HARD;
426 }
427
Paul Mackerras360cae32018-10-08 16:31:04 +1100428 return vcpu->arch.trap;
429}
430
Paul Mackerras8e3f5fc2018-10-08 16:31:03 +1100431long kvmhv_nested_init(void)
432{
433 long int ptb_order;
434 unsigned long ptcr;
435 long rc;
436
437 if (!kvmhv_on_pseries())
438 return 0;
439 if (!radix_enabled())
440 return -ENODEV;
441
442 /* find log base 2 of KVMPPC_NR_LPIDS, rounding up */
443 ptb_order = __ilog2(KVMPPC_NR_LPIDS - 1) + 1;
444 if (ptb_order < 8)
445 ptb_order = 8;
446 pseries_partition_tb = kmalloc(sizeof(struct patb_entry) << ptb_order,
447 GFP_KERNEL);
448 if (!pseries_partition_tb) {
449 pr_err("kvm-hv: failed to allocated nested partition table\n");
450 return -ENOMEM;
451 }
452
453 ptcr = __pa(pseries_partition_tb) | (ptb_order - 8);
454 rc = plpar_hcall_norets(H_SET_PARTITION_TABLE, ptcr);
455 if (rc != H_SUCCESS) {
456 pr_err("kvm-hv: Parent hypervisor does not support nesting (rc=%ld)\n",
457 rc);
458 kfree(pseries_partition_tb);
459 pseries_partition_tb = NULL;
460 return -ENODEV;
461 }
462
463 return 0;
464}
465
466void kvmhv_nested_exit(void)
467{
468 /*
469 * N.B. the kvmhv_on_pseries() test is there because it enables
470 * the compiler to remove the call to plpar_hcall_norets()
471 * when CONFIG_PPC_PSERIES=n.
472 */
473 if (kvmhv_on_pseries() && pseries_partition_tb) {
474 plpar_hcall_norets(H_SET_PARTITION_TABLE, 0);
475 kfree(pseries_partition_tb);
476 pseries_partition_tb = NULL;
477 }
478}
479
Paul Mackerras690ed4c2018-10-08 16:31:10 +1100480static void kvmhv_flush_lpid(unsigned int lpid)
481{
482 long rc;
483
484 if (!kvmhv_on_pseries()) {
Nicholas Piggin99161de2019-09-03 01:29:27 +1000485 radix__flush_all_lpid(lpid);
Paul Mackerras690ed4c2018-10-08 16:31:10 +1100486 return;
487 }
488
Bharata B Rao81468082021-06-21 14:20:03 +0530489 if (!firmware_has_feature(FW_FEATURE_RPT_INVALIDATE))
490 rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(2, 0, 1),
491 lpid, TLBIEL_INVAL_SET_LPID);
492 else
493 rc = pseries_rpt_invalidate(lpid, H_RPTI_TARGET_CMMU,
494 H_RPTI_TYPE_NESTED |
495 H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC |
496 H_RPTI_TYPE_PAT,
497 H_RPTI_PAGE_ALL, 0, -1UL);
Paul Mackerras690ed4c2018-10-08 16:31:10 +1100498 if (rc)
499 pr_err("KVM: TLB LPID invalidation hcall failed, rc=%ld\n", rc);
500}
501
Paul Mackerras8e3f5fc2018-10-08 16:31:03 +1100502void kvmhv_set_ptbl_entry(unsigned int lpid, u64 dw0, u64 dw1)
503{
Paul Mackerras690ed4c2018-10-08 16:31:10 +1100504 if (!kvmhv_on_pseries()) {
Nicholas Pigginfd13dae2019-09-03 01:29:28 +1000505 mmu_partition_table_set_entry(lpid, dw0, dw1, true);
Paul Mackerras690ed4c2018-10-08 16:31:10 +1100506 return;
Paul Mackerras8e3f5fc2018-10-08 16:31:03 +1100507 }
Paul Mackerras690ed4c2018-10-08 16:31:10 +1100508
509 pseries_partition_tb[lpid].patb0 = cpu_to_be64(dw0);
510 pseries_partition_tb[lpid].patb1 = cpu_to_be64(dw1);
511 /* L0 will do the necessary barriers */
512 kvmhv_flush_lpid(lpid);
Paul Mackerras8e3f5fc2018-10-08 16:31:03 +1100513}
514
515static void kvmhv_set_nested_ptbl(struct kvm_nested_guest *gp)
516{
517 unsigned long dw0;
518
519 dw0 = PATB_HR | radix__get_tree_size() |
520 __pa(gp->shadow_pgtable) | RADIX_PGD_INDEX_SIZE;
521 kvmhv_set_ptbl_entry(gp->shadow_lpid, dw0, gp->process_table);
522}
523
524void kvmhv_vm_nested_init(struct kvm *kvm)
525{
526 kvm->arch.max_nested_lpid = -1;
527}
528
529/*
530 * Handle the H_SET_PARTITION_TABLE hcall.
531 * r4 = guest real address of partition table + log_2(size) - 12
532 * (formatted as for the PTCR).
533 */
534long kvmhv_set_partition_table(struct kvm_vcpu *vcpu)
535{
536 struct kvm *kvm = vcpu->kvm;
537 unsigned long ptcr = kvmppc_get_gpr(vcpu, 4);
538 int srcu_idx;
539 long ret = H_SUCCESS;
540
541 srcu_idx = srcu_read_lock(&kvm->srcu);
542 /*
543 * Limit the partition table to 4096 entries (because that's what
544 * hardware supports), and check the base address.
545 */
546 if ((ptcr & PRTS_MASK) > 12 - 8 ||
547 !kvm_is_visible_gfn(vcpu->kvm, (ptcr & PRTB_MASK) >> PAGE_SHIFT))
548 ret = H_PARAMETER;
549 srcu_read_unlock(&kvm->srcu, srcu_idx);
550 if (ret == H_SUCCESS)
551 kvm->arch.l1_ptcr = ptcr;
552 return ret;
553}
554
555/*
Suraj Jitindar Singh6ff887b2018-12-14 16:29:09 +1100556 * Handle the H_COPY_TOFROM_GUEST hcall.
557 * r4 = L1 lpid of nested guest
558 * r5 = pid
559 * r6 = eaddr to access
560 * r7 = to buffer (L1 gpa)
561 * r8 = from buffer (L1 gpa)
562 * r9 = n bytes to copy
563 */
564long kvmhv_copy_tofrom_guest_nested(struct kvm_vcpu *vcpu)
565{
566 struct kvm_nested_guest *gp;
567 int l1_lpid = kvmppc_get_gpr(vcpu, 4);
568 int pid = kvmppc_get_gpr(vcpu, 5);
569 gva_t eaddr = kvmppc_get_gpr(vcpu, 6);
570 gpa_t gp_to = (gpa_t) kvmppc_get_gpr(vcpu, 7);
571 gpa_t gp_from = (gpa_t) kvmppc_get_gpr(vcpu, 8);
572 void *buf;
573 unsigned long n = kvmppc_get_gpr(vcpu, 9);
574 bool is_load = !!gp_to;
575 long rc;
576
577 if (gp_to && gp_from) /* One must be NULL to determine the direction */
578 return H_PARAMETER;
579
580 if (eaddr & (0xFFFUL << 52))
581 return H_PARAMETER;
582
Alexey Kardashevskiy79202092021-09-01 18:45:50 +1000583 buf = kzalloc(n, GFP_KERNEL | __GFP_NOWARN);
Suraj Jitindar Singh6ff887b2018-12-14 16:29:09 +1100584 if (!buf)
585 return H_NO_MEM;
586
587 gp = kvmhv_get_nested(vcpu->kvm, l1_lpid, false);
588 if (!gp) {
589 rc = H_PARAMETER;
590 goto out_free;
591 }
592
593 mutex_lock(&gp->tlb_lock);
594
595 if (is_load) {
596 /* Load from the nested guest into our buffer */
597 rc = __kvmhv_copy_tofrom_guest_radix(gp->shadow_lpid, pid,
598 eaddr, buf, NULL, n);
599 if (rc)
600 goto not_found;
601
602 /* Write what was loaded into our buffer back to the L1 guest */
Alexey Kardashevskiy1508c222020-06-09 12:12:29 +1000603 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
Suraj Jitindar Singh6ff887b2018-12-14 16:29:09 +1100604 rc = kvm_vcpu_write_guest(vcpu, gp_to, buf, n);
Alexey Kardashevskiy1508c222020-06-09 12:12:29 +1000605 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
Suraj Jitindar Singh6ff887b2018-12-14 16:29:09 +1100606 if (rc)
607 goto not_found;
608 } else {
609 /* Load the data to be stored from the L1 guest into our buf */
Alexey Kardashevskiy1508c222020-06-09 12:12:29 +1000610 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
Suraj Jitindar Singh6ff887b2018-12-14 16:29:09 +1100611 rc = kvm_vcpu_read_guest(vcpu, gp_from, buf, n);
Alexey Kardashevskiy1508c222020-06-09 12:12:29 +1000612 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
Suraj Jitindar Singh6ff887b2018-12-14 16:29:09 +1100613 if (rc)
614 goto not_found;
615
616 /* Store from our buffer into the nested guest */
617 rc = __kvmhv_copy_tofrom_guest_radix(gp->shadow_lpid, pid,
618 eaddr, NULL, buf, n);
619 if (rc)
620 goto not_found;
621 }
622
623out_unlock:
624 mutex_unlock(&gp->tlb_lock);
625 kvmhv_put_nested(gp);
626out_free:
627 kfree(buf);
628 return rc;
629not_found:
630 rc = H_NOT_FOUND;
631 goto out_unlock;
632}
633
634/*
Paul Mackerras8e3f5fc2018-10-08 16:31:03 +1100635 * Reload the partition table entry for a guest.
636 * Caller must hold gp->tlb_lock.
637 */
638static void kvmhv_update_ptbl_cache(struct kvm_nested_guest *gp)
639{
640 int ret;
641 struct patb_entry ptbl_entry;
642 unsigned long ptbl_addr;
643 struct kvm *kvm = gp->l1_host;
644
645 ret = -EFAULT;
646 ptbl_addr = (kvm->arch.l1_ptcr & PRTB_MASK) + (gp->l1_lpid << 4);
Alexey Kardashevskiy1508c222020-06-09 12:12:29 +1000647 if (gp->l1_lpid < (1ul << ((kvm->arch.l1_ptcr & PRTS_MASK) + 8))) {
648 int srcu_idx = srcu_read_lock(&kvm->srcu);
Paul Mackerras8e3f5fc2018-10-08 16:31:03 +1100649 ret = kvm_read_guest(kvm, ptbl_addr,
650 &ptbl_entry, sizeof(ptbl_entry));
Alexey Kardashevskiy1508c222020-06-09 12:12:29 +1000651 srcu_read_unlock(&kvm->srcu, srcu_idx);
652 }
Paul Mackerras8e3f5fc2018-10-08 16:31:03 +1100653 if (ret) {
654 gp->l1_gr_to_hr = 0;
655 gp->process_table = 0;
656 } else {
657 gp->l1_gr_to_hr = be64_to_cpu(ptbl_entry.patb0);
658 gp->process_table = be64_to_cpu(ptbl_entry.patb1);
659 }
660 kvmhv_set_nested_ptbl(gp);
661}
662
Wang Wenshengcf59eb12020-09-21 11:22:11 +0000663static struct kvm_nested_guest *kvmhv_alloc_nested(struct kvm *kvm, unsigned int lpid)
Paul Mackerras8e3f5fc2018-10-08 16:31:03 +1100664{
665 struct kvm_nested_guest *gp;
666 long shadow_lpid;
667
668 gp = kzalloc(sizeof(*gp), GFP_KERNEL);
669 if (!gp)
670 return NULL;
671 gp->l1_host = kvm;
672 gp->l1_lpid = lpid;
673 mutex_init(&gp->tlb_lock);
674 gp->shadow_pgtable = pgd_alloc(kvm->mm);
675 if (!gp->shadow_pgtable)
676 goto out_free;
677 shadow_lpid = kvmppc_alloc_lpid();
678 if (shadow_lpid < 0)
679 goto out_free2;
680 gp->shadow_lpid = shadow_lpid;
Suraj Jitindar Singhd232afe2018-12-14 16:29:04 +1100681 gp->radix = 1;
Paul Mackerras8e3f5fc2018-10-08 16:31:03 +1100682
Suraj Jitindar Singh9d0b0482018-10-08 16:31:11 +1100683 memset(gp->prev_cpu, -1, sizeof(gp->prev_cpu));
684
Paul Mackerras8e3f5fc2018-10-08 16:31:03 +1100685 return gp;
686
687 out_free2:
688 pgd_free(kvm->mm, gp->shadow_pgtable);
689 out_free:
690 kfree(gp);
691 return NULL;
692}
693
694/*
695 * Free up any resources allocated for a nested guest.
696 */
697static void kvmhv_release_nested(struct kvm_nested_guest *gp)
698{
Suraj Jitindar Singhfd10be22018-10-08 16:31:07 +1100699 struct kvm *kvm = gp->l1_host;
700
701 if (gp->shadow_pgtable) {
702 /*
703 * No vcpu is using this struct and no call to
704 * kvmhv_get_nested can find this struct,
705 * so we don't need to hold kvm->mmu_lock.
706 */
707 kvmppc_free_pgtable_radix(kvm, gp->shadow_pgtable,
708 gp->shadow_lpid);
709 pgd_free(kvm->mm, gp->shadow_pgtable);
710 }
Paul Mackerras8e3f5fc2018-10-08 16:31:03 +1100711 kvmhv_set_ptbl_entry(gp->shadow_lpid, 0, 0);
712 kvmppc_free_lpid(gp->shadow_lpid);
Paul Mackerras8e3f5fc2018-10-08 16:31:03 +1100713 kfree(gp);
714}
715
716static void kvmhv_remove_nested(struct kvm_nested_guest *gp)
717{
718 struct kvm *kvm = gp->l1_host;
719 int lpid = gp->l1_lpid;
720 long ref;
721
722 spin_lock(&kvm->mmu_lock);
723 if (gp == kvm->arch.nested_guests[lpid]) {
724 kvm->arch.nested_guests[lpid] = NULL;
725 if (lpid == kvm->arch.max_nested_lpid) {
726 while (--lpid >= 0 && !kvm->arch.nested_guests[lpid])
727 ;
728 kvm->arch.max_nested_lpid = lpid;
729 }
730 --gp->refcnt;
731 }
732 ref = gp->refcnt;
733 spin_unlock(&kvm->mmu_lock);
734 if (ref == 0)
735 kvmhv_release_nested(gp);
736}
737
738/*
739 * Free up all nested resources allocated for this guest.
740 * This is called with no vcpus of the guest running, when
741 * switching the guest to HPT mode or when destroying the
742 * guest.
743 */
744void kvmhv_release_all_nested(struct kvm *kvm)
745{
746 int i;
747 struct kvm_nested_guest *gp;
748 struct kvm_nested_guest *freelist = NULL;
Suraj Jitindar Singh8cf531e2018-10-08 16:31:08 +1100749 struct kvm_memory_slot *memslot;
Maciej S. Szmigieroa54d8062021-12-06 20:54:30 +0100750 int srcu_idx, bkt;
Paul Mackerras8e3f5fc2018-10-08 16:31:03 +1100751
752 spin_lock(&kvm->mmu_lock);
753 for (i = 0; i <= kvm->arch.max_nested_lpid; i++) {
754 gp = kvm->arch.nested_guests[i];
755 if (!gp)
756 continue;
757 kvm->arch.nested_guests[i] = NULL;
758 if (--gp->refcnt == 0) {
759 gp->next = freelist;
760 freelist = gp;
761 }
762 }
763 kvm->arch.max_nested_lpid = -1;
764 spin_unlock(&kvm->mmu_lock);
765 while ((gp = freelist) != NULL) {
766 freelist = gp->next;
767 kvmhv_release_nested(gp);
768 }
Suraj Jitindar Singh8cf531e2018-10-08 16:31:08 +1100769
770 srcu_idx = srcu_read_lock(&kvm->srcu);
Maciej S. Szmigieroa54d8062021-12-06 20:54:30 +0100771 kvm_for_each_memslot(memslot, bkt, kvm_memslots(kvm))
Suraj Jitindar Singh8cf531e2018-10-08 16:31:08 +1100772 kvmhv_free_memslot_nest_rmap(memslot);
773 srcu_read_unlock(&kvm->srcu, srcu_idx);
Paul Mackerras8e3f5fc2018-10-08 16:31:03 +1100774}
775
776/* caller must hold gp->tlb_lock */
Suraj Jitindar Singhe3b6b462018-10-08 16:31:09 +1100777static void kvmhv_flush_nested(struct kvm_nested_guest *gp)
Paul Mackerras8e3f5fc2018-10-08 16:31:03 +1100778{
Suraj Jitindar Singhfd10be22018-10-08 16:31:07 +1100779 struct kvm *kvm = gp->l1_host;
780
781 spin_lock(&kvm->mmu_lock);
782 kvmppc_free_pgtable_radix(kvm, gp->shadow_pgtable, gp->shadow_lpid);
783 spin_unlock(&kvm->mmu_lock);
Paul Mackerras690ed4c2018-10-08 16:31:10 +1100784 kvmhv_flush_lpid(gp->shadow_lpid);
Paul Mackerras8e3f5fc2018-10-08 16:31:03 +1100785 kvmhv_update_ptbl_cache(gp);
786 if (gp->l1_gr_to_hr == 0)
787 kvmhv_remove_nested(gp);
788}
789
790struct kvm_nested_guest *kvmhv_get_nested(struct kvm *kvm, int l1_lpid,
791 bool create)
792{
793 struct kvm_nested_guest *gp, *newgp;
794
795 if (l1_lpid >= KVM_MAX_NESTED_GUESTS ||
796 l1_lpid >= (1ul << ((kvm->arch.l1_ptcr & PRTS_MASK) + 12 - 4)))
797 return NULL;
798
799 spin_lock(&kvm->mmu_lock);
800 gp = kvm->arch.nested_guests[l1_lpid];
801 if (gp)
802 ++gp->refcnt;
803 spin_unlock(&kvm->mmu_lock);
804
805 if (gp || !create)
806 return gp;
807
808 newgp = kvmhv_alloc_nested(kvm, l1_lpid);
809 if (!newgp)
810 return NULL;
811 spin_lock(&kvm->mmu_lock);
812 if (kvm->arch.nested_guests[l1_lpid]) {
813 /* someone else beat us to it */
814 gp = kvm->arch.nested_guests[l1_lpid];
815 } else {
816 kvm->arch.nested_guests[l1_lpid] = newgp;
817 ++newgp->refcnt;
818 gp = newgp;
819 newgp = NULL;
820 if (l1_lpid > kvm->arch.max_nested_lpid)
821 kvm->arch.max_nested_lpid = l1_lpid;
822 }
823 ++gp->refcnt;
824 spin_unlock(&kvm->mmu_lock);
825
826 if (newgp)
827 kvmhv_release_nested(newgp);
828
829 return gp;
830}
831
832void kvmhv_put_nested(struct kvm_nested_guest *gp)
833{
834 struct kvm *kvm = gp->l1_host;
835 long ref;
836
837 spin_lock(&kvm->mmu_lock);
838 ref = --gp->refcnt;
839 spin_unlock(&kvm->mmu_lock);
840 if (ref == 0)
841 kvmhv_release_nested(gp);
842}
Paul Mackerras360cae32018-10-08 16:31:04 +1100843
Suraj Jitindar Singh8cf531e2018-10-08 16:31:08 +1100844static struct kvm_nested_guest *kvmhv_find_nested(struct kvm *kvm, int lpid)
845{
846 if (lpid > kvm->arch.max_nested_lpid)
847 return NULL;
848 return kvm->arch.nested_guests[lpid];
849}
850
Aneesh Kumar K.V6cdf3032020-05-05 12:47:18 +0530851pte_t *find_kvm_nested_guest_pte(struct kvm *kvm, unsigned long lpid,
852 unsigned long ea, unsigned *hshift)
Aneesh Kumar K.Vdc891842020-05-05 12:47:17 +0530853{
854 struct kvm_nested_guest *gp;
855 pte_t *pte;
856
857 gp = kvmhv_find_nested(kvm, lpid);
858 if (!gp)
859 return NULL;
860
861 VM_WARN(!spin_is_locked(&kvm->mmu_lock),
862 "%s called with kvm mmu_lock not held \n", __func__);
863 pte = __find_linux_pte(gp->shadow_pgtable, ea, NULL, hshift);
864
865 return pte;
866}
867
Suraj Jitindar Singh8cf531e2018-10-08 16:31:08 +1100868static inline bool kvmhv_n_rmap_is_equal(u64 rmap_1, u64 rmap_2)
869{
870 return !((rmap_1 ^ rmap_2) & (RMAP_NESTED_LPID_MASK |
871 RMAP_NESTED_GPA_MASK));
872}
873
874void kvmhv_insert_nest_rmap(struct kvm *kvm, unsigned long *rmapp,
875 struct rmap_nested **n_rmap)
876{
877 struct llist_node *entry = ((struct llist_head *) rmapp)->first;
878 struct rmap_nested *cursor;
879 u64 rmap, new_rmap = (*n_rmap)->rmap;
880
881 /* Are there any existing entries? */
882 if (!(*rmapp)) {
883 /* No -> use the rmap as a single entry */
884 *rmapp = new_rmap | RMAP_NESTED_IS_SINGLE_ENTRY;
885 return;
886 }
887
888 /* Do any entries match what we're trying to insert? */
889 for_each_nest_rmap_safe(cursor, entry, &rmap) {
890 if (kvmhv_n_rmap_is_equal(rmap, new_rmap))
891 return;
892 }
893
894 /* Do we need to create a list or just add the new entry? */
895 rmap = *rmapp;
896 if (rmap & RMAP_NESTED_IS_SINGLE_ENTRY) /* Not previously a list */
897 *rmapp = 0UL;
898 llist_add(&((*n_rmap)->list), (struct llist_head *) rmapp);
899 if (rmap & RMAP_NESTED_IS_SINGLE_ENTRY) /* Not previously a list */
900 (*n_rmap)->list.next = (struct llist_node *) rmap;
901
902 /* Set NULL so not freed by caller */
903 *n_rmap = NULL;
904}
905
Suraj Jitindar Singh90165d32018-12-21 14:28:42 +1100906static void kvmhv_update_nest_rmap_rc(struct kvm *kvm, u64 n_rmap,
907 unsigned long clr, unsigned long set,
908 unsigned long hpa, unsigned long mask)
909{
Suraj Jitindar Singh90165d32018-12-21 14:28:42 +1100910 unsigned long gpa;
911 unsigned int shift, lpid;
912 pte_t *ptep;
913
914 gpa = n_rmap & RMAP_NESTED_GPA_MASK;
915 lpid = (n_rmap & RMAP_NESTED_LPID_MASK) >> RMAP_NESTED_LPID_SHIFT;
Suraj Jitindar Singh90165d32018-12-21 14:28:42 +1100916
917 /* Find the pte */
Aneesh Kumar K.Vdc891842020-05-05 12:47:17 +0530918 ptep = find_kvm_nested_guest_pte(kvm, lpid, gpa, &shift);
Suraj Jitindar Singh90165d32018-12-21 14:28:42 +1100919 /*
920 * If the pte is present and the pfn is still the same, update the pte.
921 * If the pfn has changed then this is a stale rmap entry, the nested
922 * gpa actually points somewhere else now, and there is nothing to do.
923 * XXX A future optimisation would be to remove the rmap entry here.
924 */
925 if (ptep && pte_present(*ptep) && ((pte_val(*ptep) & mask) == hpa)) {
926 __radix_pte_update(ptep, clr, set);
927 kvmppc_radix_tlbie_page(kvm, gpa, shift, lpid);
928 }
929}
930
931/*
932 * For a given list of rmap entries, update the rc bits in all ptes in shadow
933 * page tables for nested guests which are referenced by the rmap list.
934 */
935void kvmhv_update_nest_rmap_rc_list(struct kvm *kvm, unsigned long *rmapp,
936 unsigned long clr, unsigned long set,
937 unsigned long hpa, unsigned long nbytes)
938{
939 struct llist_node *entry = ((struct llist_head *) rmapp)->first;
940 struct rmap_nested *cursor;
941 unsigned long rmap, mask;
942
943 if ((clr | set) & ~(_PAGE_DIRTY | _PAGE_ACCESSED))
944 return;
945
946 mask = PTE_RPN_MASK & ~(nbytes - 1);
947 hpa &= mask;
948
949 for_each_nest_rmap_safe(cursor, entry, &rmap)
950 kvmhv_update_nest_rmap_rc(kvm, rmap, clr, set, hpa, mask);
951}
952
Suraj Jitindar Singh8cf531e2018-10-08 16:31:08 +1100953static void kvmhv_remove_nest_rmap(struct kvm *kvm, u64 n_rmap,
954 unsigned long hpa, unsigned long mask)
955{
956 struct kvm_nested_guest *gp;
957 unsigned long gpa;
958 unsigned int shift, lpid;
959 pte_t *ptep;
960
961 gpa = n_rmap & RMAP_NESTED_GPA_MASK;
962 lpid = (n_rmap & RMAP_NESTED_LPID_MASK) >> RMAP_NESTED_LPID_SHIFT;
963 gp = kvmhv_find_nested(kvm, lpid);
964 if (!gp)
965 return;
966
967 /* Find and invalidate the pte */
Aneesh Kumar K.Vdc891842020-05-05 12:47:17 +0530968 ptep = find_kvm_nested_guest_pte(kvm, lpid, gpa, &shift);
Suraj Jitindar Singh8cf531e2018-10-08 16:31:08 +1100969 /* Don't spuriously invalidate ptes if the pfn has changed */
970 if (ptep && pte_present(*ptep) && ((pte_val(*ptep) & mask) == hpa))
971 kvmppc_unmap_pte(kvm, ptep, gpa, shift, NULL, gp->shadow_lpid);
972}
973
974static void kvmhv_remove_nest_rmap_list(struct kvm *kvm, unsigned long *rmapp,
975 unsigned long hpa, unsigned long mask)
976{
977 struct llist_node *entry = llist_del_all((struct llist_head *) rmapp);
978 struct rmap_nested *cursor;
979 unsigned long rmap;
980
981 for_each_nest_rmap_safe(cursor, entry, &rmap) {
982 kvmhv_remove_nest_rmap(kvm, rmap, hpa, mask);
983 kfree(cursor);
984 }
985}
986
987/* called with kvm->mmu_lock held */
988void kvmhv_remove_nest_rmap_range(struct kvm *kvm,
Paul Mackerrasc43c3a82018-12-12 15:16:48 +1100989 const struct kvm_memory_slot *memslot,
Suraj Jitindar Singh8cf531e2018-10-08 16:31:08 +1100990 unsigned long gpa, unsigned long hpa,
991 unsigned long nbytes)
992{
993 unsigned long gfn, end_gfn;
994 unsigned long addr_mask;
995
996 if (!memslot)
997 return;
998 gfn = (gpa >> PAGE_SHIFT) - memslot->base_gfn;
999 end_gfn = gfn + (nbytes >> PAGE_SHIFT);
1000
1001 addr_mask = PTE_RPN_MASK & ~(nbytes - 1);
1002 hpa &= addr_mask;
1003
1004 for (; gfn < end_gfn; gfn++) {
1005 unsigned long *rmap = &memslot->arch.rmap[gfn];
1006 kvmhv_remove_nest_rmap_list(kvm, rmap, hpa, addr_mask);
1007 }
1008}
1009
1010static void kvmhv_free_memslot_nest_rmap(struct kvm_memory_slot *free)
1011{
1012 unsigned long page;
1013
1014 for (page = 0; page < free->npages; page++) {
1015 unsigned long rmap, *rmapp = &free->arch.rmap[page];
1016 struct rmap_nested *cursor;
1017 struct llist_node *entry;
1018
1019 entry = llist_del_all((struct llist_head *) rmapp);
1020 for_each_nest_rmap_safe(cursor, entry, &rmap)
1021 kfree(cursor);
1022 }
1023}
1024
Suraj Jitindar Singhfd10be22018-10-08 16:31:07 +11001025static bool kvmhv_invalidate_shadow_pte(struct kvm_vcpu *vcpu,
1026 struct kvm_nested_guest *gp,
1027 long gpa, int *shift_ret)
Paul Mackerras360cae32018-10-08 16:31:04 +11001028{
Suraj Jitindar Singhfd10be22018-10-08 16:31:07 +11001029 struct kvm *kvm = vcpu->kvm;
1030 bool ret = false;
1031 pte_t *ptep;
1032 int shift;
1033
1034 spin_lock(&kvm->mmu_lock);
Aneesh Kumar K.Vdc891842020-05-05 12:47:17 +05301035 ptep = find_kvm_nested_guest_pte(kvm, gp->l1_lpid, gpa, &shift);
Suraj Jitindar Singhfd10be22018-10-08 16:31:07 +11001036 if (!shift)
1037 shift = PAGE_SHIFT;
1038 if (ptep && pte_present(*ptep)) {
1039 kvmppc_unmap_pte(kvm, ptep, gpa, shift, NULL, gp->shadow_lpid);
1040 ret = true;
1041 }
1042 spin_unlock(&kvm->mmu_lock);
1043
1044 if (shift_ret)
1045 *shift_ret = shift;
1046 return ret;
1047}
1048
Suraj Jitindar Singhe3b6b462018-10-08 16:31:09 +11001049static inline int get_ric(unsigned int instr)
1050{
1051 return (instr >> 18) & 0x3;
1052}
1053
1054static inline int get_prs(unsigned int instr)
1055{
1056 return (instr >> 17) & 0x1;
1057}
1058
1059static inline int get_r(unsigned int instr)
1060{
1061 return (instr >> 16) & 0x1;
1062}
1063
1064static inline int get_lpid(unsigned long r_val)
1065{
1066 return r_val & 0xffffffff;
1067}
1068
1069static inline int get_is(unsigned long r_val)
1070{
1071 return (r_val >> 10) & 0x3;
1072}
1073
1074static inline int get_ap(unsigned long r_val)
1075{
1076 return (r_val >> 5) & 0x7;
1077}
1078
1079static inline long get_epn(unsigned long r_val)
1080{
1081 return r_val >> 12;
1082}
1083
1084static int kvmhv_emulate_tlbie_tlb_addr(struct kvm_vcpu *vcpu, int lpid,
1085 int ap, long epn)
1086{
1087 struct kvm *kvm = vcpu->kvm;
1088 struct kvm_nested_guest *gp;
1089 long npages;
1090 int shift, shadow_shift;
1091 unsigned long addr;
1092
1093 shift = ap_to_shift(ap);
1094 addr = epn << 12;
1095 if (shift < 0)
1096 /* Invalid ap encoding */
1097 return -EINVAL;
1098
1099 addr &= ~((1UL << shift) - 1);
1100 npages = 1UL << (shift - PAGE_SHIFT);
1101
1102 gp = kvmhv_get_nested(kvm, lpid, false);
1103 if (!gp) /* No such guest -> nothing to do */
1104 return 0;
1105 mutex_lock(&gp->tlb_lock);
1106
1107 /* There may be more than one host page backing this single guest pte */
1108 do {
1109 kvmhv_invalidate_shadow_pte(vcpu, gp, addr, &shadow_shift);
1110
1111 npages -= 1UL << (shadow_shift - PAGE_SHIFT);
1112 addr += 1UL << shadow_shift;
1113 } while (npages > 0);
1114
1115 mutex_unlock(&gp->tlb_lock);
1116 kvmhv_put_nested(gp);
1117 return 0;
1118}
1119
1120static void kvmhv_emulate_tlbie_lpid(struct kvm_vcpu *vcpu,
1121 struct kvm_nested_guest *gp, int ric)
1122{
1123 struct kvm *kvm = vcpu->kvm;
1124
1125 mutex_lock(&gp->tlb_lock);
1126 switch (ric) {
1127 case 0:
1128 /* Invalidate TLB */
1129 spin_lock(&kvm->mmu_lock);
1130 kvmppc_free_pgtable_radix(kvm, gp->shadow_pgtable,
1131 gp->shadow_lpid);
Paul Mackerras690ed4c2018-10-08 16:31:10 +11001132 kvmhv_flush_lpid(gp->shadow_lpid);
Suraj Jitindar Singhe3b6b462018-10-08 16:31:09 +11001133 spin_unlock(&kvm->mmu_lock);
1134 break;
1135 case 1:
1136 /*
1137 * Invalidate PWC
1138 * We don't cache this -> nothing to do
1139 */
1140 break;
1141 case 2:
1142 /* Invalidate TLB, PWC and caching of partition table entries */
1143 kvmhv_flush_nested(gp);
1144 break;
1145 default:
1146 break;
1147 }
1148 mutex_unlock(&gp->tlb_lock);
1149}
1150
1151static void kvmhv_emulate_tlbie_all_lpid(struct kvm_vcpu *vcpu, int ric)
1152{
1153 struct kvm *kvm = vcpu->kvm;
1154 struct kvm_nested_guest *gp;
1155 int i;
1156
1157 spin_lock(&kvm->mmu_lock);
1158 for (i = 0; i <= kvm->arch.max_nested_lpid; i++) {
1159 gp = kvm->arch.nested_guests[i];
1160 if (gp) {
1161 spin_unlock(&kvm->mmu_lock);
1162 kvmhv_emulate_tlbie_lpid(vcpu, gp, ric);
1163 spin_lock(&kvm->mmu_lock);
1164 }
1165 }
1166 spin_unlock(&kvm->mmu_lock);
1167}
1168
1169static int kvmhv_emulate_priv_tlbie(struct kvm_vcpu *vcpu, unsigned int instr,
1170 unsigned long rsval, unsigned long rbval)
1171{
1172 struct kvm *kvm = vcpu->kvm;
1173 struct kvm_nested_guest *gp;
1174 int r, ric, prs, is, ap;
1175 int lpid;
1176 long epn;
1177 int ret = 0;
1178
1179 ric = get_ric(instr);
1180 prs = get_prs(instr);
1181 r = get_r(instr);
1182 lpid = get_lpid(rsval);
1183 is = get_is(rbval);
1184
1185 /*
1186 * These cases are invalid and are not handled:
1187 * r != 1 -> Only radix supported
1188 * prs == 1 -> Not HV privileged
1189 * ric == 3 -> No cluster bombs for radix
1190 * is == 1 -> Partition scoped translations not associated with pid
1191 * (!is) && (ric == 1 || ric == 2) -> Not supported by ISA
1192 */
1193 if ((!r) || (prs) || (ric == 3) || (is == 1) ||
1194 ((!is) && (ric == 1 || ric == 2)))
1195 return -EINVAL;
1196
1197 switch (is) {
1198 case 0:
1199 /*
1200 * We know ric == 0
1201 * Invalidate TLB for a given target address
1202 */
1203 epn = get_epn(rbval);
1204 ap = get_ap(rbval);
1205 ret = kvmhv_emulate_tlbie_tlb_addr(vcpu, lpid, ap, epn);
1206 break;
1207 case 2:
1208 /* Invalidate matching LPID */
1209 gp = kvmhv_get_nested(kvm, lpid, false);
1210 if (gp) {
1211 kvmhv_emulate_tlbie_lpid(vcpu, gp, ric);
1212 kvmhv_put_nested(gp);
1213 }
1214 break;
1215 case 3:
1216 /* Invalidate ALL LPIDs */
1217 kvmhv_emulate_tlbie_all_lpid(vcpu, ric);
1218 break;
1219 default:
1220 ret = -EINVAL;
1221 break;
1222 }
1223
1224 return ret;
1225}
1226
1227/*
1228 * This handles the H_TLB_INVALIDATE hcall.
1229 * Parameters are (r4) tlbie instruction code, (r5) rS contents,
1230 * (r6) rB contents.
1231 */
1232long kvmhv_do_nested_tlbie(struct kvm_vcpu *vcpu)
1233{
1234 int ret;
1235
1236 ret = kvmhv_emulate_priv_tlbie(vcpu, kvmppc_get_gpr(vcpu, 4),
1237 kvmppc_get_gpr(vcpu, 5), kvmppc_get_gpr(vcpu, 6));
1238 if (ret)
1239 return H_PARAMETER;
1240 return H_SUCCESS;
1241}
1242
Bharata B Rao53324b52021-06-21 14:20:01 +05301243static long do_tlb_invalidate_nested_all(struct kvm_vcpu *vcpu,
1244 unsigned long lpid, unsigned long ric)
1245{
1246 struct kvm *kvm = vcpu->kvm;
1247 struct kvm_nested_guest *gp;
1248
1249 gp = kvmhv_get_nested(kvm, lpid, false);
1250 if (gp) {
1251 kvmhv_emulate_tlbie_lpid(vcpu, gp, ric);
1252 kvmhv_put_nested(gp);
1253 }
1254 return H_SUCCESS;
1255}
1256
1257/*
1258 * Number of pages above which we invalidate the entire LPID rather than
1259 * flush individual pages.
1260 */
1261static unsigned long tlb_range_flush_page_ceiling __read_mostly = 33;
1262
1263static long do_tlb_invalidate_nested_tlb(struct kvm_vcpu *vcpu,
1264 unsigned long lpid,
1265 unsigned long pg_sizes,
1266 unsigned long start,
1267 unsigned long end)
1268{
1269 int ret = H_P4;
1270 unsigned long addr, nr_pages;
1271 struct mmu_psize_def *def;
1272 unsigned long psize, ap, page_size;
1273 bool flush_lpid;
1274
1275 for (psize = 0; psize < MMU_PAGE_COUNT; psize++) {
1276 def = &mmu_psize_defs[psize];
1277 if (!(pg_sizes & def->h_rpt_pgsize))
1278 continue;
1279
1280 nr_pages = (end - start) >> def->shift;
1281 flush_lpid = nr_pages > tlb_range_flush_page_ceiling;
1282 if (flush_lpid)
1283 return do_tlb_invalidate_nested_all(vcpu, lpid,
1284 RIC_FLUSH_TLB);
1285 addr = start;
1286 ap = mmu_get_ap(psize);
1287 page_size = 1UL << def->shift;
1288 do {
1289 ret = kvmhv_emulate_tlbie_tlb_addr(vcpu, lpid, ap,
1290 get_epn(addr));
1291 if (ret)
1292 return H_P4;
1293 addr += page_size;
1294 } while (addr < end);
1295 }
1296 return ret;
1297}
1298
1299/*
1300 * Performs partition-scoped invalidations for nested guests
1301 * as part of H_RPT_INVALIDATE hcall.
1302 */
1303long do_h_rpt_invalidate_pat(struct kvm_vcpu *vcpu, unsigned long lpid,
1304 unsigned long type, unsigned long pg_sizes,
1305 unsigned long start, unsigned long end)
1306{
1307 /*
1308 * If L2 lpid isn't valid, we need to return H_PARAMETER.
1309 *
1310 * However, nested KVM issues a L2 lpid flush call when creating
1311 * partition table entries for L2. This happens even before the
1312 * corresponding shadow lpid is created in HV which happens in
1313 * H_ENTER_NESTED call. Since we can't differentiate this case from
1314 * the invalid case, we ignore such flush requests and return success.
1315 */
1316 if (!kvmhv_find_nested(vcpu->kvm, lpid))
1317 return H_SUCCESS;
1318
1319 /*
1320 * A flush all request can be handled by a full lpid flush only.
1321 */
1322 if ((type & H_RPTI_TYPE_NESTED_ALL) == H_RPTI_TYPE_NESTED_ALL)
1323 return do_tlb_invalidate_nested_all(vcpu, lpid, RIC_FLUSH_ALL);
1324
1325 /*
1326 * We don't need to handle a PWC flush like process table here,
1327 * because intermediate partition scoped table in nested guest doesn't
1328 * really have PWC. Only level we have PWC is in L0 and for nested
1329 * invalidate at L0 we always do kvm_flush_lpid() which does
1330 * radix__flush_all_lpid(). For range invalidate at any level, we
1331 * are not removing the higher level page tables and hence there is
1332 * no PWC invalidate needed.
1333 *
1334 * if (type & H_RPTI_TYPE_PWC) {
1335 * ret = do_tlb_invalidate_nested_all(vcpu, lpid, RIC_FLUSH_PWC);
1336 * if (ret)
1337 * return H_P4;
1338 * }
1339 */
1340
1341 if (start == 0 && end == -1)
1342 return do_tlb_invalidate_nested_all(vcpu, lpid, RIC_FLUSH_TLB);
1343
1344 if (type & H_RPTI_TYPE_TLB)
1345 return do_tlb_invalidate_nested_tlb(vcpu, lpid, pg_sizes,
1346 start, end);
1347 return H_SUCCESS;
1348}
1349
Suraj Jitindar Singhfd10be22018-10-08 16:31:07 +11001350/* Used to convert a nested guest real address to a L1 guest real address */
1351static int kvmhv_translate_addr_nested(struct kvm_vcpu *vcpu,
1352 struct kvm_nested_guest *gp,
1353 unsigned long n_gpa, unsigned long dsisr,
1354 struct kvmppc_pte *gpte_p)
1355{
1356 u64 fault_addr, flags = dsisr & DSISR_ISSTORE;
1357 int ret;
1358
1359 ret = kvmppc_mmu_walk_radix_tree(vcpu, n_gpa, gpte_p, gp->l1_gr_to_hr,
1360 &fault_addr);
1361
1362 if (ret) {
1363 /* We didn't find a pte */
1364 if (ret == -EINVAL) {
1365 /* Unsupported mmu config */
1366 flags |= DSISR_UNSUPP_MMU;
1367 } else if (ret == -ENOENT) {
1368 /* No translation found */
1369 flags |= DSISR_NOHPTE;
1370 } else if (ret == -EFAULT) {
1371 /* Couldn't access L1 real address */
1372 flags |= DSISR_PRTABLE_FAULT;
1373 vcpu->arch.fault_gpa = fault_addr;
1374 } else {
1375 /* Unknown error */
1376 return ret;
1377 }
1378 goto forward_to_l1;
1379 } else {
1380 /* We found a pte -> check permissions */
1381 if (dsisr & DSISR_ISSTORE) {
1382 /* Can we write? */
1383 if (!gpte_p->may_write) {
1384 flags |= DSISR_PROTFAULT;
1385 goto forward_to_l1;
1386 }
1387 } else if (vcpu->arch.trap == BOOK3S_INTERRUPT_H_INST_STORAGE) {
1388 /* Can we execute? */
1389 if (!gpte_p->may_execute) {
Jordan Nietheb6915052020-05-06 13:40:42 +10001390 flags |= SRR1_ISI_N_G_OR_CIP;
Suraj Jitindar Singhfd10be22018-10-08 16:31:07 +11001391 goto forward_to_l1;
1392 }
1393 } else {
1394 /* Can we read? */
1395 if (!gpte_p->may_read && !gpte_p->may_write) {
1396 flags |= DSISR_PROTFAULT;
1397 goto forward_to_l1;
1398 }
1399 }
1400 }
1401
1402 return 0;
1403
1404forward_to_l1:
1405 vcpu->arch.fault_dsisr = flags;
1406 if (vcpu->arch.trap == BOOK3S_INTERRUPT_H_INST_STORAGE) {
Nicholas Piggin9ee64712019-10-02 16:00:21 +10001407 vcpu->arch.shregs.msr &= SRR1_MSR_BITS;
Suraj Jitindar Singhfd10be22018-10-08 16:31:07 +11001408 vcpu->arch.shregs.msr |= flags;
1409 }
Paul Mackerras360cae32018-10-08 16:31:04 +11001410 return RESUME_HOST;
1411}
Suraj Jitindar Singhfd10be22018-10-08 16:31:07 +11001412
1413static long kvmhv_handle_nested_set_rc(struct kvm_vcpu *vcpu,
1414 struct kvm_nested_guest *gp,
1415 unsigned long n_gpa,
1416 struct kvmppc_pte gpte,
1417 unsigned long dsisr)
1418{
1419 struct kvm *kvm = vcpu->kvm;
1420 bool writing = !!(dsisr & DSISR_ISSTORE);
1421 u64 pgflags;
Suraj Jitindar Singhbec6e032018-12-21 14:28:39 +11001422 long ret;
Suraj Jitindar Singhfd10be22018-10-08 16:31:07 +11001423
1424 /* Are the rc bits set in the L1 partition scoped pte? */
1425 pgflags = _PAGE_ACCESSED;
1426 if (writing)
1427 pgflags |= _PAGE_DIRTY;
1428 if (pgflags & ~gpte.rc)
1429 return RESUME_HOST;
1430
1431 spin_lock(&kvm->mmu_lock);
1432 /* Set the rc bit in the pte of our (L0) pgtable for the L1 guest */
Aneesh Kumar K.V6cdf3032020-05-05 12:47:18 +05301433 ret = kvmppc_hv_handle_set_rc(kvm, false, writing,
1434 gpte.raddr, kvm->arch.lpid);
Suraj Jitindar Singhbec6e032018-12-21 14:28:39 +11001435 if (!ret) {
1436 ret = -EINVAL;
1437 goto out_unlock;
1438 }
Suraj Jitindar Singhfd10be22018-10-08 16:31:07 +11001439
1440 /* Set the rc bit in the pte of the shadow_pgtable for the nest guest */
Aneesh Kumar K.V6cdf3032020-05-05 12:47:18 +05301441 ret = kvmppc_hv_handle_set_rc(kvm, true, writing,
Alexey Kardashevskiye881bfa2020-06-11 13:05:59 +10001442 n_gpa, gp->l1_lpid);
Suraj Jitindar Singhfd10be22018-10-08 16:31:07 +11001443 if (!ret)
Suraj Jitindar Singhbec6e032018-12-21 14:28:39 +11001444 ret = -EINVAL;
1445 else
1446 ret = 0;
1447
1448out_unlock:
1449 spin_unlock(&kvm->mmu_lock);
1450 return ret;
Suraj Jitindar Singhfd10be22018-10-08 16:31:07 +11001451}
1452
1453static inline int kvmppc_radix_level_to_shift(int level)
1454{
1455 switch (level) {
1456 case 2:
1457 return PUD_SHIFT;
1458 case 1:
1459 return PMD_SHIFT;
1460 default:
1461 return PAGE_SHIFT;
1462 }
1463}
1464
1465static inline int kvmppc_radix_shift_to_level(int shift)
1466{
1467 if (shift == PUD_SHIFT)
1468 return 2;
1469 if (shift == PMD_SHIFT)
1470 return 1;
1471 if (shift == PAGE_SHIFT)
1472 return 0;
1473 WARN_ON_ONCE(1);
1474 return 0;
1475}
1476
1477/* called with gp->tlb_lock held */
Tianjia Zhang8c99d342020-04-27 12:35:11 +08001478static long int __kvmhv_nested_page_fault(struct kvm_vcpu *vcpu,
Suraj Jitindar Singhfd10be22018-10-08 16:31:07 +11001479 struct kvm_nested_guest *gp)
1480{
1481 struct kvm *kvm = vcpu->kvm;
1482 struct kvm_memory_slot *memslot;
Suraj Jitindar Singh8cf531e2018-10-08 16:31:08 +11001483 struct rmap_nested *n_rmap;
Suraj Jitindar Singhfd10be22018-10-08 16:31:07 +11001484 struct kvmppc_pte gpte;
1485 pte_t pte, *pte_p;
1486 unsigned long mmu_seq;
1487 unsigned long dsisr = vcpu->arch.fault_dsisr;
1488 unsigned long ea = vcpu->arch.fault_dar;
Suraj Jitindar Singh8cf531e2018-10-08 16:31:08 +11001489 unsigned long *rmapp;
Suraj Jitindar Singhfd10be22018-10-08 16:31:07 +11001490 unsigned long n_gpa, gpa, gfn, perm = 0UL;
1491 unsigned int shift, l1_shift, level;
1492 bool writing = !!(dsisr & DSISR_ISSTORE);
1493 bool kvm_ro = false;
1494 long int ret;
1495
1496 if (!gp->l1_gr_to_hr) {
1497 kvmhv_update_ptbl_cache(gp);
1498 if (!gp->l1_gr_to_hr)
1499 return RESUME_HOST;
1500 }
1501
1502 /* Convert the nested guest real address into a L1 guest real address */
1503
1504 n_gpa = vcpu->arch.fault_gpa & ~0xF000000000000FFFULL;
1505 if (!(dsisr & DSISR_PRTABLE_FAULT))
1506 n_gpa |= ea & 0xFFF;
1507 ret = kvmhv_translate_addr_nested(vcpu, gp, n_gpa, dsisr, &gpte);
1508
1509 /*
1510 * If the hardware found a translation but we don't now have a usable
1511 * translation in the l1 partition-scoped tree, remove the shadow pte
1512 * and let the guest retry.
1513 */
1514 if (ret == RESUME_HOST &&
1515 (dsisr & (DSISR_PROTFAULT | DSISR_BADACCESS | DSISR_NOEXEC_OR_G |
1516 DSISR_BAD_COPYPASTE)))
1517 goto inval;
1518 if (ret)
1519 return ret;
1520
1521 /* Failed to set the reference/change bits */
1522 if (dsisr & DSISR_SET_RC) {
1523 ret = kvmhv_handle_nested_set_rc(vcpu, gp, n_gpa, gpte, dsisr);
1524 if (ret == RESUME_HOST)
1525 return ret;
1526 if (ret)
1527 goto inval;
1528 dsisr &= ~DSISR_SET_RC;
1529 if (!(dsisr & (DSISR_BAD_FAULT_64S | DSISR_NOHPTE |
1530 DSISR_PROTFAULT)))
1531 return RESUME_GUEST;
1532 }
1533
1534 /*
1535 * We took an HISI or HDSI while we were running a nested guest which
1536 * means we have no partition scoped translation for that. This means
1537 * we need to insert a pte for the mapping into our shadow_pgtable.
1538 */
1539
1540 l1_shift = gpte.page_shift;
1541 if (l1_shift < PAGE_SHIFT) {
1542 /* We don't support l1 using a page size smaller than our own */
1543 pr_err("KVM: L1 guest page shift (%d) less than our own (%d)\n",
1544 l1_shift, PAGE_SHIFT);
1545 return -EINVAL;
1546 }
1547 gpa = gpte.raddr;
1548 gfn = gpa >> PAGE_SHIFT;
1549
1550 /* 1. Get the corresponding host memslot */
1551
1552 memslot = gfn_to_memslot(kvm, gfn);
1553 if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) {
1554 if (dsisr & (DSISR_PRTABLE_FAULT | DSISR_BADACCESS)) {
1555 /* unusual error -> reflect to the guest as a DSI */
1556 kvmppc_core_queue_data_storage(vcpu, ea, dsisr);
1557 return RESUME_GUEST;
1558 }
Suraj Jitindar Singh873db2c2018-12-14 16:29:08 +11001559
1560 /* passthrough of emulated MMIO case */
Tianjia Zhang8c99d342020-04-27 12:35:11 +08001561 return kvmppc_hv_emulate_mmio(vcpu, gpa, ea, writing);
Suraj Jitindar Singhfd10be22018-10-08 16:31:07 +11001562 }
1563 if (memslot->flags & KVM_MEM_READONLY) {
1564 if (writing) {
1565 /* Give the guest a DSI */
1566 kvmppc_core_queue_data_storage(vcpu, ea,
1567 DSISR_ISSTORE | DSISR_PROTFAULT);
1568 return RESUME_GUEST;
1569 }
1570 kvm_ro = true;
1571 }
1572
1573 /* 2. Find the host pte for this L1 guest real address */
1574
1575 /* Used to check for invalidations in progress */
1576 mmu_seq = kvm->mmu_notifier_seq;
1577 smp_rmb();
1578
1579 /* See if can find translation in our partition scoped tables for L1 */
1580 pte = __pte(0);
1581 spin_lock(&kvm->mmu_lock);
Aneesh Kumar K.V4b994122020-05-05 12:47:16 +05301582 pte_p = find_kvm_secondary_pte(kvm, gpa, &shift);
Suraj Jitindar Singhfd10be22018-10-08 16:31:07 +11001583 if (!shift)
1584 shift = PAGE_SHIFT;
1585 if (pte_p)
1586 pte = *pte_p;
1587 spin_unlock(&kvm->mmu_lock);
1588
1589 if (!pte_present(pte) || (writing && !(pte_val(pte) & _PAGE_WRITE))) {
1590 /* No suitable pte found -> try to insert a mapping */
1591 ret = kvmppc_book3s_instantiate_page(vcpu, gpa, memslot,
1592 writing, kvm_ro, &pte, &level);
1593 if (ret == -EAGAIN)
1594 return RESUME_GUEST;
1595 else if (ret)
1596 return ret;
1597 shift = kvmppc_radix_level_to_shift(level);
1598 }
Suraj Jitindar Singh8400f872018-12-21 14:28:40 +11001599 /* Align gfn to the start of the page */
1600 gfn = (gpa & ~((1UL << shift) - 1)) >> PAGE_SHIFT;
Suraj Jitindar Singhfd10be22018-10-08 16:31:07 +11001601
1602 /* 3. Compute the pte we need to insert for nest_gpa -> host r_addr */
1603
1604 /* The permissions is the combination of the host and l1 guest ptes */
1605 perm |= gpte.may_read ? 0UL : _PAGE_READ;
1606 perm |= gpte.may_write ? 0UL : _PAGE_WRITE;
1607 perm |= gpte.may_execute ? 0UL : _PAGE_EXEC;
Suraj Jitindar Singh8b23eee2018-12-21 14:28:41 +11001608 /* Only set accessed/dirty (rc) bits if set in host and l1 guest ptes */
1609 perm |= (gpte.rc & _PAGE_ACCESSED) ? 0UL : _PAGE_ACCESSED;
1610 perm |= ((gpte.rc & _PAGE_DIRTY) && writing) ? 0UL : _PAGE_DIRTY;
Suraj Jitindar Singhfd10be22018-10-08 16:31:07 +11001611 pte = __pte(pte_val(pte) & ~perm);
1612
1613 /* What size pte can we insert? */
1614 if (shift > l1_shift) {
1615 u64 mask;
1616 unsigned int actual_shift = PAGE_SHIFT;
1617 if (PMD_SHIFT < l1_shift)
1618 actual_shift = PMD_SHIFT;
1619 mask = (1UL << shift) - (1UL << actual_shift);
1620 pte = __pte(pte_val(pte) | (gpa & mask));
1621 shift = actual_shift;
1622 }
1623 level = kvmppc_radix_shift_to_level(shift);
1624 n_gpa &= ~((1UL << shift) - 1);
1625
1626 /* 4. Insert the pte into our shadow_pgtable */
1627
Suraj Jitindar Singh8cf531e2018-10-08 16:31:08 +11001628 n_rmap = kzalloc(sizeof(*n_rmap), GFP_KERNEL);
1629 if (!n_rmap)
1630 return RESUME_GUEST; /* Let the guest try again */
1631 n_rmap->rmap = (n_gpa & RMAP_NESTED_GPA_MASK) |
1632 (((unsigned long) gp->l1_lpid) << RMAP_NESTED_LPID_SHIFT);
1633 rmapp = &memslot->arch.rmap[gfn - memslot->base_gfn];
Suraj Jitindar Singhfd10be22018-10-08 16:31:07 +11001634 ret = kvmppc_create_pte(kvm, gp->shadow_pgtable, pte, n_gpa, level,
Suraj Jitindar Singh8cf531e2018-10-08 16:31:08 +11001635 mmu_seq, gp->shadow_lpid, rmapp, &n_rmap);
Chen Zhou32e594f2020-04-01 21:09:03 +08001636 kfree(n_rmap);
Suraj Jitindar Singhfd10be22018-10-08 16:31:07 +11001637 if (ret == -EAGAIN)
1638 ret = RESUME_GUEST; /* Let the guest try again */
1639
1640 return ret;
1641
1642 inval:
1643 kvmhv_invalidate_shadow_pte(vcpu, gp, n_gpa, NULL);
1644 return RESUME_GUEST;
1645}
1646
Tianjia Zhang8c99d342020-04-27 12:35:11 +08001647long int kvmhv_nested_page_fault(struct kvm_vcpu *vcpu)
Suraj Jitindar Singhfd10be22018-10-08 16:31:07 +11001648{
1649 struct kvm_nested_guest *gp = vcpu->arch.nested;
1650 long int ret;
1651
1652 mutex_lock(&gp->tlb_lock);
Tianjia Zhang8c99d342020-04-27 12:35:11 +08001653 ret = __kvmhv_nested_page_fault(vcpu, gp);
Suraj Jitindar Singhfd10be22018-10-08 16:31:07 +11001654 mutex_unlock(&gp->tlb_lock);
1655 return ret;
1656}
Paul Mackerras83a05512018-10-08 16:31:17 +11001657
1658int kvmhv_nested_next_lpid(struct kvm *kvm, int lpid)
1659{
1660 int ret = -1;
1661
1662 spin_lock(&kvm->mmu_lock);
1663 while (++lpid <= kvm->arch.max_nested_lpid) {
1664 if (kvm->arch.nested_guests[lpid]) {
1665 ret = lpid;
1666 break;
1667 }
1668 }
1669 spin_unlock(&kvm->mmu_lock);
1670 return ret;
1671}