Paul Mackerras | 8e3f5fc | 2018-10-08 16:31:03 +1100 | [diff] [blame^] | 1 | // SPDX-License-Identifier: GPL-2.0 |
| 2 | /* |
| 3 | * Copyright IBM Corporation, 2018 |
| 4 | * Authors Suraj Jitindar Singh <sjitindarsingh@gmail.com> |
| 5 | * Paul Mackerras <paulus@ozlabs.org> |
| 6 | * |
| 7 | * Description: KVM functions specific to running nested KVM-HV guests |
| 8 | * on Book3S processors (specifically POWER9 and later). |
| 9 | */ |
| 10 | |
| 11 | #include <linux/kernel.h> |
| 12 | #include <linux/kvm_host.h> |
| 13 | |
| 14 | #include <asm/kvm_ppc.h> |
| 15 | #include <asm/mmu.h> |
| 16 | #include <asm/pgtable.h> |
| 17 | #include <asm/pgalloc.h> |
| 18 | |
| 19 | static struct patb_entry *pseries_partition_tb; |
| 20 | |
| 21 | static void kvmhv_update_ptbl_cache(struct kvm_nested_guest *gp); |
| 22 | |
| 23 | long kvmhv_nested_init(void) |
| 24 | { |
| 25 | long int ptb_order; |
| 26 | unsigned long ptcr; |
| 27 | long rc; |
| 28 | |
| 29 | if (!kvmhv_on_pseries()) |
| 30 | return 0; |
| 31 | if (!radix_enabled()) |
| 32 | return -ENODEV; |
| 33 | |
| 34 | /* find log base 2 of KVMPPC_NR_LPIDS, rounding up */ |
| 35 | ptb_order = __ilog2(KVMPPC_NR_LPIDS - 1) + 1; |
| 36 | if (ptb_order < 8) |
| 37 | ptb_order = 8; |
| 38 | pseries_partition_tb = kmalloc(sizeof(struct patb_entry) << ptb_order, |
| 39 | GFP_KERNEL); |
| 40 | if (!pseries_partition_tb) { |
| 41 | pr_err("kvm-hv: failed to allocated nested partition table\n"); |
| 42 | return -ENOMEM; |
| 43 | } |
| 44 | |
| 45 | ptcr = __pa(pseries_partition_tb) | (ptb_order - 8); |
| 46 | rc = plpar_hcall_norets(H_SET_PARTITION_TABLE, ptcr); |
| 47 | if (rc != H_SUCCESS) { |
| 48 | pr_err("kvm-hv: Parent hypervisor does not support nesting (rc=%ld)\n", |
| 49 | rc); |
| 50 | kfree(pseries_partition_tb); |
| 51 | pseries_partition_tb = NULL; |
| 52 | return -ENODEV; |
| 53 | } |
| 54 | |
| 55 | return 0; |
| 56 | } |
| 57 | |
| 58 | void kvmhv_nested_exit(void) |
| 59 | { |
| 60 | /* |
| 61 | * N.B. the kvmhv_on_pseries() test is there because it enables |
| 62 | * the compiler to remove the call to plpar_hcall_norets() |
| 63 | * when CONFIG_PPC_PSERIES=n. |
| 64 | */ |
| 65 | if (kvmhv_on_pseries() && pseries_partition_tb) { |
| 66 | plpar_hcall_norets(H_SET_PARTITION_TABLE, 0); |
| 67 | kfree(pseries_partition_tb); |
| 68 | pseries_partition_tb = NULL; |
| 69 | } |
| 70 | } |
| 71 | |
| 72 | void kvmhv_set_ptbl_entry(unsigned int lpid, u64 dw0, u64 dw1) |
| 73 | { |
| 74 | if (cpu_has_feature(CPU_FTR_HVMODE)) { |
| 75 | mmu_partition_table_set_entry(lpid, dw0, dw1); |
| 76 | } else { |
| 77 | pseries_partition_tb[lpid].patb0 = cpu_to_be64(dw0); |
| 78 | pseries_partition_tb[lpid].patb1 = cpu_to_be64(dw1); |
| 79 | } |
| 80 | } |
| 81 | |
| 82 | static void kvmhv_set_nested_ptbl(struct kvm_nested_guest *gp) |
| 83 | { |
| 84 | unsigned long dw0; |
| 85 | |
| 86 | dw0 = PATB_HR | radix__get_tree_size() | |
| 87 | __pa(gp->shadow_pgtable) | RADIX_PGD_INDEX_SIZE; |
| 88 | kvmhv_set_ptbl_entry(gp->shadow_lpid, dw0, gp->process_table); |
| 89 | } |
| 90 | |
| 91 | void kvmhv_vm_nested_init(struct kvm *kvm) |
| 92 | { |
| 93 | kvm->arch.max_nested_lpid = -1; |
| 94 | } |
| 95 | |
| 96 | /* |
| 97 | * Handle the H_SET_PARTITION_TABLE hcall. |
| 98 | * r4 = guest real address of partition table + log_2(size) - 12 |
| 99 | * (formatted as for the PTCR). |
| 100 | */ |
| 101 | long kvmhv_set_partition_table(struct kvm_vcpu *vcpu) |
| 102 | { |
| 103 | struct kvm *kvm = vcpu->kvm; |
| 104 | unsigned long ptcr = kvmppc_get_gpr(vcpu, 4); |
| 105 | int srcu_idx; |
| 106 | long ret = H_SUCCESS; |
| 107 | |
| 108 | srcu_idx = srcu_read_lock(&kvm->srcu); |
| 109 | /* |
| 110 | * Limit the partition table to 4096 entries (because that's what |
| 111 | * hardware supports), and check the base address. |
| 112 | */ |
| 113 | if ((ptcr & PRTS_MASK) > 12 - 8 || |
| 114 | !kvm_is_visible_gfn(vcpu->kvm, (ptcr & PRTB_MASK) >> PAGE_SHIFT)) |
| 115 | ret = H_PARAMETER; |
| 116 | srcu_read_unlock(&kvm->srcu, srcu_idx); |
| 117 | if (ret == H_SUCCESS) |
| 118 | kvm->arch.l1_ptcr = ptcr; |
| 119 | return ret; |
| 120 | } |
| 121 | |
| 122 | /* |
| 123 | * Reload the partition table entry for a guest. |
| 124 | * Caller must hold gp->tlb_lock. |
| 125 | */ |
| 126 | static void kvmhv_update_ptbl_cache(struct kvm_nested_guest *gp) |
| 127 | { |
| 128 | int ret; |
| 129 | struct patb_entry ptbl_entry; |
| 130 | unsigned long ptbl_addr; |
| 131 | struct kvm *kvm = gp->l1_host; |
| 132 | |
| 133 | ret = -EFAULT; |
| 134 | ptbl_addr = (kvm->arch.l1_ptcr & PRTB_MASK) + (gp->l1_lpid << 4); |
| 135 | if (gp->l1_lpid < (1ul << ((kvm->arch.l1_ptcr & PRTS_MASK) + 8))) |
| 136 | ret = kvm_read_guest(kvm, ptbl_addr, |
| 137 | &ptbl_entry, sizeof(ptbl_entry)); |
| 138 | if (ret) { |
| 139 | gp->l1_gr_to_hr = 0; |
| 140 | gp->process_table = 0; |
| 141 | } else { |
| 142 | gp->l1_gr_to_hr = be64_to_cpu(ptbl_entry.patb0); |
| 143 | gp->process_table = be64_to_cpu(ptbl_entry.patb1); |
| 144 | } |
| 145 | kvmhv_set_nested_ptbl(gp); |
| 146 | } |
| 147 | |
| 148 | struct kvm_nested_guest *kvmhv_alloc_nested(struct kvm *kvm, unsigned int lpid) |
| 149 | { |
| 150 | struct kvm_nested_guest *gp; |
| 151 | long shadow_lpid; |
| 152 | |
| 153 | gp = kzalloc(sizeof(*gp), GFP_KERNEL); |
| 154 | if (!gp) |
| 155 | return NULL; |
| 156 | gp->l1_host = kvm; |
| 157 | gp->l1_lpid = lpid; |
| 158 | mutex_init(&gp->tlb_lock); |
| 159 | gp->shadow_pgtable = pgd_alloc(kvm->mm); |
| 160 | if (!gp->shadow_pgtable) |
| 161 | goto out_free; |
| 162 | shadow_lpid = kvmppc_alloc_lpid(); |
| 163 | if (shadow_lpid < 0) |
| 164 | goto out_free2; |
| 165 | gp->shadow_lpid = shadow_lpid; |
| 166 | |
| 167 | return gp; |
| 168 | |
| 169 | out_free2: |
| 170 | pgd_free(kvm->mm, gp->shadow_pgtable); |
| 171 | out_free: |
| 172 | kfree(gp); |
| 173 | return NULL; |
| 174 | } |
| 175 | |
| 176 | /* |
| 177 | * Free up any resources allocated for a nested guest. |
| 178 | */ |
| 179 | static void kvmhv_release_nested(struct kvm_nested_guest *gp) |
| 180 | { |
| 181 | kvmhv_set_ptbl_entry(gp->shadow_lpid, 0, 0); |
| 182 | kvmppc_free_lpid(gp->shadow_lpid); |
| 183 | if (gp->shadow_pgtable) |
| 184 | pgd_free(gp->l1_host->mm, gp->shadow_pgtable); |
| 185 | kfree(gp); |
| 186 | } |
| 187 | |
| 188 | static void kvmhv_remove_nested(struct kvm_nested_guest *gp) |
| 189 | { |
| 190 | struct kvm *kvm = gp->l1_host; |
| 191 | int lpid = gp->l1_lpid; |
| 192 | long ref; |
| 193 | |
| 194 | spin_lock(&kvm->mmu_lock); |
| 195 | if (gp == kvm->arch.nested_guests[lpid]) { |
| 196 | kvm->arch.nested_guests[lpid] = NULL; |
| 197 | if (lpid == kvm->arch.max_nested_lpid) { |
| 198 | while (--lpid >= 0 && !kvm->arch.nested_guests[lpid]) |
| 199 | ; |
| 200 | kvm->arch.max_nested_lpid = lpid; |
| 201 | } |
| 202 | --gp->refcnt; |
| 203 | } |
| 204 | ref = gp->refcnt; |
| 205 | spin_unlock(&kvm->mmu_lock); |
| 206 | if (ref == 0) |
| 207 | kvmhv_release_nested(gp); |
| 208 | } |
| 209 | |
| 210 | /* |
| 211 | * Free up all nested resources allocated for this guest. |
| 212 | * This is called with no vcpus of the guest running, when |
| 213 | * switching the guest to HPT mode or when destroying the |
| 214 | * guest. |
| 215 | */ |
| 216 | void kvmhv_release_all_nested(struct kvm *kvm) |
| 217 | { |
| 218 | int i; |
| 219 | struct kvm_nested_guest *gp; |
| 220 | struct kvm_nested_guest *freelist = NULL; |
| 221 | |
| 222 | spin_lock(&kvm->mmu_lock); |
| 223 | for (i = 0; i <= kvm->arch.max_nested_lpid; i++) { |
| 224 | gp = kvm->arch.nested_guests[i]; |
| 225 | if (!gp) |
| 226 | continue; |
| 227 | kvm->arch.nested_guests[i] = NULL; |
| 228 | if (--gp->refcnt == 0) { |
| 229 | gp->next = freelist; |
| 230 | freelist = gp; |
| 231 | } |
| 232 | } |
| 233 | kvm->arch.max_nested_lpid = -1; |
| 234 | spin_unlock(&kvm->mmu_lock); |
| 235 | while ((gp = freelist) != NULL) { |
| 236 | freelist = gp->next; |
| 237 | kvmhv_release_nested(gp); |
| 238 | } |
| 239 | } |
| 240 | |
| 241 | /* caller must hold gp->tlb_lock */ |
| 242 | void kvmhv_flush_nested(struct kvm_nested_guest *gp) |
| 243 | { |
| 244 | kvmhv_update_ptbl_cache(gp); |
| 245 | if (gp->l1_gr_to_hr == 0) |
| 246 | kvmhv_remove_nested(gp); |
| 247 | } |
| 248 | |
| 249 | struct kvm_nested_guest *kvmhv_get_nested(struct kvm *kvm, int l1_lpid, |
| 250 | bool create) |
| 251 | { |
| 252 | struct kvm_nested_guest *gp, *newgp; |
| 253 | |
| 254 | if (l1_lpid >= KVM_MAX_NESTED_GUESTS || |
| 255 | l1_lpid >= (1ul << ((kvm->arch.l1_ptcr & PRTS_MASK) + 12 - 4))) |
| 256 | return NULL; |
| 257 | |
| 258 | spin_lock(&kvm->mmu_lock); |
| 259 | gp = kvm->arch.nested_guests[l1_lpid]; |
| 260 | if (gp) |
| 261 | ++gp->refcnt; |
| 262 | spin_unlock(&kvm->mmu_lock); |
| 263 | |
| 264 | if (gp || !create) |
| 265 | return gp; |
| 266 | |
| 267 | newgp = kvmhv_alloc_nested(kvm, l1_lpid); |
| 268 | if (!newgp) |
| 269 | return NULL; |
| 270 | spin_lock(&kvm->mmu_lock); |
| 271 | if (kvm->arch.nested_guests[l1_lpid]) { |
| 272 | /* someone else beat us to it */ |
| 273 | gp = kvm->arch.nested_guests[l1_lpid]; |
| 274 | } else { |
| 275 | kvm->arch.nested_guests[l1_lpid] = newgp; |
| 276 | ++newgp->refcnt; |
| 277 | gp = newgp; |
| 278 | newgp = NULL; |
| 279 | if (l1_lpid > kvm->arch.max_nested_lpid) |
| 280 | kvm->arch.max_nested_lpid = l1_lpid; |
| 281 | } |
| 282 | ++gp->refcnt; |
| 283 | spin_unlock(&kvm->mmu_lock); |
| 284 | |
| 285 | if (newgp) |
| 286 | kvmhv_release_nested(newgp); |
| 287 | |
| 288 | return gp; |
| 289 | } |
| 290 | |
| 291 | void kvmhv_put_nested(struct kvm_nested_guest *gp) |
| 292 | { |
| 293 | struct kvm *kvm = gp->l1_host; |
| 294 | long ref; |
| 295 | |
| 296 | spin_lock(&kvm->mmu_lock); |
| 297 | ref = --gp->refcnt; |
| 298 | spin_unlock(&kvm->mmu_lock); |
| 299 | if (ref == 0) |
| 300 | kvmhv_release_nested(gp); |
| 301 | } |