blob: 242cb220893ae72a7c7e3d4d23671a78ff818c22 [file] [log] [blame]
Joerg Roedelef0f6492020-03-31 12:17:38 -04001// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Kernel-based Virtual Machine driver for Linux
4 *
5 * AMD SVM support
6 *
7 * Copyright (C) 2006 Qumranet, Inc.
8 * Copyright 2010 Red Hat, Inc. and/or its affiliates.
9 *
10 * Authors:
11 * Yaniv Kamay <yaniv@qumranet.com>
12 * Avi Kivity <avi@qumranet.com>
13 */
14
15#define pr_fmt(fmt) "SVM: " fmt
16
17#include <linux/kvm_types.h>
18#include <linux/hashtable.h>
19#include <linux/amd-iommu.h>
20#include <linux/kvm_host.h>
21
22#include <asm/irq_remapping.h>
23
24#include "trace.h"
25#include "lapic.h"
26#include "x86.h"
27#include "irq.h"
28#include "svm.h"
29
Joerg Roedelef0f6492020-03-31 12:17:38 -040030/* AVIC GATAG is encoded using VM and VCPU IDs */
31#define AVIC_VCPU_ID_BITS 8
32#define AVIC_VCPU_ID_MASK ((1 << AVIC_VCPU_ID_BITS) - 1)
33
34#define AVIC_VM_ID_BITS 24
35#define AVIC_VM_ID_NR (1 << AVIC_VM_ID_BITS)
36#define AVIC_VM_ID_MASK ((1 << AVIC_VM_ID_BITS) - 1)
37
38#define AVIC_GATAG(x, y) (((x & AVIC_VM_ID_MASK) << AVIC_VCPU_ID_BITS) | \
39 (y & AVIC_VCPU_ID_MASK))
40#define AVIC_GATAG_TO_VMID(x) ((x >> AVIC_VCPU_ID_BITS) & AVIC_VM_ID_MASK)
41#define AVIC_GATAG_TO_VCPUID(x) (x & AVIC_VCPU_ID_MASK)
42
43/* Note:
44 * This hash table is used to map VM_ID to a struct kvm_svm,
45 * when handling AMD IOMMU GALOG notification to schedule in
46 * a particular vCPU.
47 */
48#define SVM_VM_DATA_HASH_BITS 8
49static DEFINE_HASHTABLE(svm_vm_data_hash, SVM_VM_DATA_HASH_BITS);
50static u32 next_vm_id = 0;
51static bool next_vm_id_wrapped = 0;
52static DEFINE_SPINLOCK(svm_vm_data_hash_lock);
53
54/*
55 * This is a wrapper of struct amd_iommu_ir_data.
56 */
57struct amd_svm_iommu_ir {
58 struct list_head node; /* Used by SVM for per-vcpu ir_list */
59 void *data; /* Storing pointer to struct amd_ir_data */
60};
61
Joerg Roedelef0f6492020-03-31 12:17:38 -040062
63/* Note:
64 * This function is called from IOMMU driver to notify
65 * SVM to schedule in a particular vCPU of a particular VM.
66 */
67int avic_ga_log_notifier(u32 ga_tag)
68{
69 unsigned long flags;
70 struct kvm_svm *kvm_svm;
71 struct kvm_vcpu *vcpu = NULL;
72 u32 vm_id = AVIC_GATAG_TO_VMID(ga_tag);
73 u32 vcpu_id = AVIC_GATAG_TO_VCPUID(ga_tag);
74
75 pr_debug("SVM: %s: vm_id=%#x, vcpu_id=%#x\n", __func__, vm_id, vcpu_id);
76 trace_kvm_avic_ga_log(vm_id, vcpu_id);
77
78 spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
79 hash_for_each_possible(svm_vm_data_hash, kvm_svm, hnode, vm_id) {
80 if (kvm_svm->avic_vm_id != vm_id)
81 continue;
82 vcpu = kvm_get_vcpu_by_id(&kvm_svm->kvm, vcpu_id);
83 break;
84 }
85 spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);
86
87 /* Note:
88 * At this point, the IOMMU should have already set the pending
89 * bit in the vAPIC backing page. So, we just need to schedule
90 * in the vcpu.
91 */
92 if (vcpu)
93 kvm_vcpu_wake_up(vcpu);
94
95 return 0;
96}
97
98void avic_vm_destroy(struct kvm *kvm)
99{
100 unsigned long flags;
101 struct kvm_svm *kvm_svm = to_kvm_svm(kvm);
102
Vitaly Kuznetsovfdf513e2021-06-09 17:09:08 +0200103 if (!enable_apicv)
Joerg Roedelef0f6492020-03-31 12:17:38 -0400104 return;
105
106 if (kvm_svm->avic_logical_id_table_page)
107 __free_page(kvm_svm->avic_logical_id_table_page);
108 if (kvm_svm->avic_physical_id_table_page)
109 __free_page(kvm_svm->avic_physical_id_table_page);
110
111 spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
112 hash_del(&kvm_svm->hnode);
113 spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);
114}
115
116int avic_vm_init(struct kvm *kvm)
117{
118 unsigned long flags;
119 int err = -ENOMEM;
120 struct kvm_svm *kvm_svm = to_kvm_svm(kvm);
121 struct kvm_svm *k2;
122 struct page *p_page;
123 struct page *l_page;
124 u32 vm_id;
125
Vitaly Kuznetsovfdf513e2021-06-09 17:09:08 +0200126 if (!enable_apicv)
Joerg Roedelef0f6492020-03-31 12:17:38 -0400127 return 0;
128
129 /* Allocating physical APIC ID table (4KB) */
Haiwei Liae5a2a32020-09-16 16:36:21 +0800130 p_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
Joerg Roedelef0f6492020-03-31 12:17:38 -0400131 if (!p_page)
132 goto free_avic;
133
134 kvm_svm->avic_physical_id_table_page = p_page;
Joerg Roedelef0f6492020-03-31 12:17:38 -0400135
136 /* Allocating logical APIC ID table (4KB) */
Haiwei Liae5a2a32020-09-16 16:36:21 +0800137 l_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
Joerg Roedelef0f6492020-03-31 12:17:38 -0400138 if (!l_page)
139 goto free_avic;
140
141 kvm_svm->avic_logical_id_table_page = l_page;
Joerg Roedelef0f6492020-03-31 12:17:38 -0400142
143 spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
144 again:
145 vm_id = next_vm_id = (next_vm_id + 1) & AVIC_VM_ID_MASK;
146 if (vm_id == 0) { /* id is 1-based, zero is not okay */
147 next_vm_id_wrapped = 1;
148 goto again;
149 }
150 /* Is it still in use? Only possible if wrapped at least once */
151 if (next_vm_id_wrapped) {
152 hash_for_each_possible(svm_vm_data_hash, k2, hnode, vm_id) {
153 if (k2->avic_vm_id == vm_id)
154 goto again;
155 }
156 }
157 kvm_svm->avic_vm_id = vm_id;
158 hash_add(svm_vm_data_hash, &kvm_svm->hnode, kvm_svm->avic_vm_id);
159 spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);
160
161 return 0;
162
163free_avic:
164 avic_vm_destroy(kvm);
165 return err;
166}
167
168void avic_init_vmcb(struct vcpu_svm *svm)
169{
170 struct vmcb *vmcb = svm->vmcb;
171 struct kvm_svm *kvm_svm = to_kvm_svm(svm->vcpu.kvm);
172 phys_addr_t bpa = __sme_set(page_to_phys(svm->avic_backing_page));
173 phys_addr_t lpa = __sme_set(page_to_phys(kvm_svm->avic_logical_id_table_page));
174 phys_addr_t ppa = __sme_set(page_to_phys(kvm_svm->avic_physical_id_table_page));
175
176 vmcb->control.avic_backing_page = bpa & AVIC_HPA_MASK;
177 vmcb->control.avic_logical_id = lpa & AVIC_HPA_MASK;
178 vmcb->control.avic_physical_id = ppa & AVIC_HPA_MASK;
179 vmcb->control.avic_physical_id |= AVIC_MAX_PHYSICAL_ID_COUNT;
Maxim Levitsky73143032021-08-10 23:52:51 +0300180 vmcb->control.avic_vapic_bar = APIC_DEFAULT_PHYS_BASE & VMCB_AVIC_APIC_BAR_MASK;
181
Joerg Roedelef0f6492020-03-31 12:17:38 -0400182 if (kvm_apicv_activated(svm->vcpu.kvm))
183 vmcb->control.int_ctl |= AVIC_ENABLE_MASK;
184 else
185 vmcb->control.int_ctl &= ~AVIC_ENABLE_MASK;
186}
187
188static u64 *avic_get_physical_id_entry(struct kvm_vcpu *vcpu,
189 unsigned int index)
190{
191 u64 *avic_physical_id_table;
192 struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm);
193
194 if (index >= AVIC_MAX_PHYSICAL_ID_COUNT)
195 return NULL;
196
197 avic_physical_id_table = page_address(kvm_svm->avic_physical_id_table_page);
198
199 return &avic_physical_id_table[index];
200}
201
ChenXiaoSong02ffbe62021-06-09 20:22:17 +0800202/*
Joerg Roedelef0f6492020-03-31 12:17:38 -0400203 * Note:
204 * AVIC hardware walks the nested page table to check permissions,
205 * but does not use the SPA address specified in the leaf page
206 * table entry since it uses address in the AVIC_BACKING_PAGE pointer
207 * field of the VMCB. Therefore, we set up the
208 * APIC_ACCESS_PAGE_PRIVATE_MEMSLOT (4KB) here.
209 */
Maxim Levitsky36222b12021-08-10 23:52:43 +0300210static int avic_alloc_access_page(struct kvm *kvm)
Joerg Roedelef0f6492020-03-31 12:17:38 -0400211{
Peter Xuff5a9832020-09-30 21:20:33 -0400212 void __user *ret;
213 int r = 0;
Joerg Roedelef0f6492020-03-31 12:17:38 -0400214
215 mutex_lock(&kvm->slots_lock);
Maxim Levitsky36222b12021-08-10 23:52:43 +0300216
217 if (kvm->arch.apic_access_memslot_enabled)
Joerg Roedelef0f6492020-03-31 12:17:38 -0400218 goto out;
219
220 ret = __x86_set_memory_region(kvm,
221 APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
222 APIC_DEFAULT_PHYS_BASE,
Maxim Levitsky36222b12021-08-10 23:52:43 +0300223 PAGE_SIZE);
Peter Xuff5a9832020-09-30 21:20:33 -0400224 if (IS_ERR(ret)) {
225 r = PTR_ERR(ret);
Joerg Roedelef0f6492020-03-31 12:17:38 -0400226 goto out;
Peter Xuff5a9832020-09-30 21:20:33 -0400227 }
Joerg Roedelef0f6492020-03-31 12:17:38 -0400228
Maxim Levitsky36222b12021-08-10 23:52:43 +0300229 kvm->arch.apic_access_memslot_enabled = true;
Joerg Roedelef0f6492020-03-31 12:17:38 -0400230out:
231 mutex_unlock(&kvm->slots_lock);
Peter Xuff5a9832020-09-30 21:20:33 -0400232 return r;
Joerg Roedelef0f6492020-03-31 12:17:38 -0400233}
234
235static int avic_init_backing_page(struct kvm_vcpu *vcpu)
236{
237 u64 *entry, new_entry;
238 int id = vcpu->vcpu_id;
239 struct vcpu_svm *svm = to_svm(vcpu);
240
241 if (id >= AVIC_MAX_PHYSICAL_ID_COUNT)
242 return -EINVAL;
243
Paolo Bonzini63129752021-03-02 14:40:39 -0500244 if (!vcpu->arch.apic->regs)
Joerg Roedelef0f6492020-03-31 12:17:38 -0400245 return -EINVAL;
246
247 if (kvm_apicv_activated(vcpu->kvm)) {
248 int ret;
249
Maxim Levitsky36222b12021-08-10 23:52:43 +0300250 ret = avic_alloc_access_page(vcpu->kvm);
Joerg Roedelef0f6492020-03-31 12:17:38 -0400251 if (ret)
252 return ret;
253 }
254
Paolo Bonzini63129752021-03-02 14:40:39 -0500255 svm->avic_backing_page = virt_to_page(vcpu->arch.apic->regs);
Joerg Roedelef0f6492020-03-31 12:17:38 -0400256
257 /* Setting AVIC backing page address in the phy APIC ID table */
258 entry = avic_get_physical_id_entry(vcpu, id);
259 if (!entry)
260 return -EINVAL;
261
262 new_entry = __sme_set((page_to_phys(svm->avic_backing_page) &
263 AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK) |
264 AVIC_PHYSICAL_ID_ENTRY_VALID_MASK);
265 WRITE_ONCE(*entry, new_entry);
266
267 svm->avic_physical_id_cache = entry;
268
269 return 0;
270}
271
Maxim Levitsky0a5f7842022-02-08 06:45:16 -0500272static void avic_ring_doorbell(struct kvm_vcpu *vcpu)
273{
274 /*
275 * Note, the vCPU could get migrated to a different pCPU at any point,
276 * which could result in signalling the wrong/previous pCPU. But if
277 * that happens the vCPU is guaranteed to do a VMRUN (after being
278 * migrated) and thus will process pending interrupts, i.e. a doorbell
279 * is not needed (and the spurious one is harmless).
280 */
281 int cpu = READ_ONCE(vcpu->cpu);
282
283 if (cpu != get_cpu())
284 wrmsrl(MSR_AMD64_SVM_AVIC_DOORBELL, kvm_cpu_get_apicid(cpu));
285 put_cpu();
286}
287
Sean Christophersone6c804a2021-02-04 16:57:42 -0800288static void avic_kick_target_vcpus(struct kvm *kvm, struct kvm_lapic *source,
289 u32 icrl, u32 icrh)
290{
291 struct kvm_vcpu *vcpu;
Marc Zyngier46808a42021-11-16 16:04:02 +0000292 unsigned long i;
Sean Christophersone6c804a2021-02-04 16:57:42 -0800293
Sean Christopherson202470d2021-12-08 01:52:21 +0000294 /*
295 * Wake any target vCPUs that are blocking, i.e. waiting for a wake
296 * event. There's no need to signal doorbells, as hardware has handled
297 * vCPUs that were in guest at the time of the IPI, and vCPUs that have
298 * since entered the guest will have processed pending IRQs at VMRUN.
299 */
Sean Christophersone6c804a2021-02-04 16:57:42 -0800300 kvm_for_each_vcpu(i, vcpu, kvm) {
Sean Christopherson202470d2021-12-08 01:52:21 +0000301 if (kvm_apic_match_dest(vcpu, source, icrl & APIC_SHORT_MASK,
302 GET_APIC_DEST_FIELD(icrh),
303 icrl & APIC_DEST_MASK))
Sean Christophersone6c804a2021-02-04 16:57:42 -0800304 kvm_vcpu_wake_up(vcpu);
305 }
306}
307
Paolo Bonzini63129752021-03-02 14:40:39 -0500308int avic_incomplete_ipi_interception(struct kvm_vcpu *vcpu)
Joerg Roedelef0f6492020-03-31 12:17:38 -0400309{
Paolo Bonzini63129752021-03-02 14:40:39 -0500310 struct vcpu_svm *svm = to_svm(vcpu);
Joerg Roedelef0f6492020-03-31 12:17:38 -0400311 u32 icrh = svm->vmcb->control.exit_info_1 >> 32;
312 u32 icrl = svm->vmcb->control.exit_info_1;
313 u32 id = svm->vmcb->control.exit_info_2 >> 32;
314 u32 index = svm->vmcb->control.exit_info_2 & 0xFF;
Paolo Bonzini63129752021-03-02 14:40:39 -0500315 struct kvm_lapic *apic = vcpu->arch.apic;
Joerg Roedelef0f6492020-03-31 12:17:38 -0400316
Paolo Bonzini63129752021-03-02 14:40:39 -0500317 trace_kvm_avic_incomplete_ipi(vcpu->vcpu_id, icrh, icrl, id, index);
Joerg Roedelef0f6492020-03-31 12:17:38 -0400318
319 switch (id) {
320 case AVIC_IPI_FAILURE_INVALID_INT_TYPE:
321 /*
322 * AVIC hardware handles the generation of
323 * IPIs when the specified Message Type is Fixed
324 * (also known as fixed delivery mode) and
325 * the Trigger Mode is edge-triggered. The hardware
326 * also supports self and broadcast delivery modes
327 * specified via the Destination Shorthand(DSH)
328 * field of the ICRL. Logical and physical APIC ID
329 * formats are supported. All other IPI types cause
330 * a #VMEXIT, which needs to emulated.
331 */
332 kvm_lapic_reg_write(apic, APIC_ICR2, icrh);
333 kvm_lapic_reg_write(apic, APIC_ICR, icrl);
334 break;
Sean Christophersone6c804a2021-02-04 16:57:42 -0800335 case AVIC_IPI_FAILURE_TARGET_NOT_RUNNING:
Joerg Roedelef0f6492020-03-31 12:17:38 -0400336 /*
337 * At this point, we expect that the AVIC HW has already
338 * set the appropriate IRR bits on the valid target
339 * vcpus. So, we just need to kick the appropriate vcpu.
340 */
Paolo Bonzini63129752021-03-02 14:40:39 -0500341 avic_kick_target_vcpus(vcpu->kvm, apic, icrl, icrh);
Joerg Roedelef0f6492020-03-31 12:17:38 -0400342 break;
Joerg Roedelef0f6492020-03-31 12:17:38 -0400343 case AVIC_IPI_FAILURE_INVALID_TARGET:
Joerg Roedelef0f6492020-03-31 12:17:38 -0400344 break;
345 case AVIC_IPI_FAILURE_INVALID_BACKING_PAGE:
346 WARN_ONCE(1, "Invalid backing page\n");
347 break;
348 default:
349 pr_err("Unknown IPI interception\n");
350 }
351
352 return 1;
353}
354
355static u32 *avic_get_logical_id_entry(struct kvm_vcpu *vcpu, u32 ldr, bool flat)
356{
357 struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm);
358 int index;
359 u32 *logical_apic_id_table;
360 int dlid = GET_APIC_LOGICAL_ID(ldr);
361
362 if (!dlid)
363 return NULL;
364
365 if (flat) { /* flat */
366 index = ffs(dlid) - 1;
367 if (index > 7)
368 return NULL;
369 } else { /* cluster */
370 int cluster = (dlid & 0xf0) >> 4;
371 int apic = ffs(dlid & 0x0f) - 1;
372
373 if ((apic < 0) || (apic > 7) ||
374 (cluster >= 0xf))
375 return NULL;
376 index = (cluster << 2) + apic;
377 }
378
379 logical_apic_id_table = (u32 *) page_address(kvm_svm->avic_logical_id_table_page);
380
381 return &logical_apic_id_table[index];
382}
383
384static int avic_ldr_write(struct kvm_vcpu *vcpu, u8 g_physical_id, u32 ldr)
385{
386 bool flat;
387 u32 *entry, new_entry;
388
389 flat = kvm_lapic_get_reg(vcpu->arch.apic, APIC_DFR) == APIC_DFR_FLAT;
390 entry = avic_get_logical_id_entry(vcpu, ldr, flat);
391 if (!entry)
392 return -EINVAL;
393
394 new_entry = READ_ONCE(*entry);
395 new_entry &= ~AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK;
396 new_entry |= (g_physical_id & AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK);
397 new_entry |= AVIC_LOGICAL_ID_ENTRY_VALID_MASK;
398 WRITE_ONCE(*entry, new_entry);
399
400 return 0;
401}
402
403static void avic_invalidate_logical_id_entry(struct kvm_vcpu *vcpu)
404{
405 struct vcpu_svm *svm = to_svm(vcpu);
406 bool flat = svm->dfr_reg == APIC_DFR_FLAT;
407 u32 *entry = avic_get_logical_id_entry(vcpu, svm->ldr_reg, flat);
408
409 if (entry)
410 clear_bit(AVIC_LOGICAL_ID_ENTRY_VALID_BIT, (unsigned long *)entry);
411}
412
413static int avic_handle_ldr_update(struct kvm_vcpu *vcpu)
414{
415 int ret = 0;
416 struct vcpu_svm *svm = to_svm(vcpu);
417 u32 ldr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_LDR);
418 u32 id = kvm_xapic_id(vcpu->arch.apic);
419
420 if (ldr == svm->ldr_reg)
421 return 0;
422
423 avic_invalidate_logical_id_entry(vcpu);
424
425 if (ldr)
426 ret = avic_ldr_write(vcpu, id, ldr);
427
428 if (!ret)
429 svm->ldr_reg = ldr;
430
431 return ret;
432}
433
434static int avic_handle_apic_id_update(struct kvm_vcpu *vcpu)
435{
436 u64 *old, *new;
437 struct vcpu_svm *svm = to_svm(vcpu);
438 u32 id = kvm_xapic_id(vcpu->arch.apic);
439
440 if (vcpu->vcpu_id == id)
441 return 0;
442
443 old = avic_get_physical_id_entry(vcpu, vcpu->vcpu_id);
444 new = avic_get_physical_id_entry(vcpu, id);
445 if (!new || !old)
446 return 1;
447
448 /* We need to move physical_id_entry to new offset */
449 *new = *old;
450 *old = 0ULL;
451 to_svm(vcpu)->avic_physical_id_cache = new;
452
453 /*
454 * Also update the guest physical APIC ID in the logical
455 * APIC ID table entry if already setup the LDR.
456 */
457 if (svm->ldr_reg)
458 avic_handle_ldr_update(vcpu);
459
460 return 0;
461}
462
463static void avic_handle_dfr_update(struct kvm_vcpu *vcpu)
464{
465 struct vcpu_svm *svm = to_svm(vcpu);
466 u32 dfr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_DFR);
467
468 if (svm->dfr_reg == dfr)
469 return;
470
471 avic_invalidate_logical_id_entry(vcpu);
472 svm->dfr_reg = dfr;
473}
474
475static int avic_unaccel_trap_write(struct vcpu_svm *svm)
476{
477 struct kvm_lapic *apic = svm->vcpu.arch.apic;
478 u32 offset = svm->vmcb->control.exit_info_1 &
479 AVIC_UNACCEL_ACCESS_OFFSET_MASK;
480
481 switch (offset) {
482 case APIC_ID:
483 if (avic_handle_apic_id_update(&svm->vcpu))
484 return 0;
485 break;
486 case APIC_LDR:
487 if (avic_handle_ldr_update(&svm->vcpu))
488 return 0;
489 break;
490 case APIC_DFR:
491 avic_handle_dfr_update(&svm->vcpu);
492 break;
493 default:
494 break;
495 }
496
497 kvm_lapic_reg_write(apic, offset, kvm_lapic_get_reg(apic, offset));
498
499 return 1;
500}
501
502static bool is_avic_unaccelerated_access_trap(u32 offset)
503{
504 bool ret = false;
505
506 switch (offset) {
507 case APIC_ID:
508 case APIC_EOI:
509 case APIC_RRR:
510 case APIC_LDR:
511 case APIC_DFR:
512 case APIC_SPIV:
513 case APIC_ESR:
514 case APIC_ICR:
515 case APIC_LVTT:
516 case APIC_LVTTHMR:
517 case APIC_LVTPC:
518 case APIC_LVT0:
519 case APIC_LVT1:
520 case APIC_LVTERR:
521 case APIC_TMICT:
522 case APIC_TDCR:
523 ret = true;
524 break;
525 default:
526 break;
527 }
528 return ret;
529}
530
Paolo Bonzini63129752021-03-02 14:40:39 -0500531int avic_unaccelerated_access_interception(struct kvm_vcpu *vcpu)
Joerg Roedelef0f6492020-03-31 12:17:38 -0400532{
Paolo Bonzini63129752021-03-02 14:40:39 -0500533 struct vcpu_svm *svm = to_svm(vcpu);
Joerg Roedelef0f6492020-03-31 12:17:38 -0400534 int ret = 0;
535 u32 offset = svm->vmcb->control.exit_info_1 &
536 AVIC_UNACCEL_ACCESS_OFFSET_MASK;
537 u32 vector = svm->vmcb->control.exit_info_2 &
538 AVIC_UNACCEL_ACCESS_VECTOR_MASK;
539 bool write = (svm->vmcb->control.exit_info_1 >> 32) &
540 AVIC_UNACCEL_ACCESS_WRITE_MASK;
541 bool trap = is_avic_unaccelerated_access_trap(offset);
542
Paolo Bonzini63129752021-03-02 14:40:39 -0500543 trace_kvm_avic_unaccelerated_access(vcpu->vcpu_id, offset,
Joerg Roedelef0f6492020-03-31 12:17:38 -0400544 trap, write, vector);
545 if (trap) {
546 /* Handling Trap */
547 WARN_ONCE(!write, "svm: Handling trap read.\n");
548 ret = avic_unaccel_trap_write(svm);
549 } else {
550 /* Handling Fault */
Paolo Bonzini63129752021-03-02 14:40:39 -0500551 ret = kvm_emulate_instruction(vcpu, 0);
Joerg Roedelef0f6492020-03-31 12:17:38 -0400552 }
553
554 return ret;
555}
556
557int avic_init_vcpu(struct vcpu_svm *svm)
558{
559 int ret;
560 struct kvm_vcpu *vcpu = &svm->vcpu;
561
Vitaly Kuznetsovfdf513e2021-06-09 17:09:08 +0200562 if (!enable_apicv || !irqchip_in_kernel(vcpu->kvm))
Joerg Roedelef0f6492020-03-31 12:17:38 -0400563 return 0;
564
Paolo Bonzini63129752021-03-02 14:40:39 -0500565 ret = avic_init_backing_page(vcpu);
Joerg Roedelef0f6492020-03-31 12:17:38 -0400566 if (ret)
567 return ret;
568
569 INIT_LIST_HEAD(&svm->ir_list);
570 spin_lock_init(&svm->ir_list_lock);
571 svm->dfr_reg = APIC_DFR_FLAT;
572
573 return ret;
574}
575
576void avic_post_state_restore(struct kvm_vcpu *vcpu)
577{
578 if (avic_handle_apic_id_update(vcpu) != 0)
579 return;
580 avic_handle_dfr_update(vcpu);
581 avic_handle_ldr_update(vcpu);
582}
583
Joerg Roedelef0f6492020-03-31 12:17:38 -0400584void svm_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
585{
586 return;
587}
588
589void svm_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr)
590{
591}
592
593void svm_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr)
594{
595}
596
597static int svm_set_pi_irte_mode(struct kvm_vcpu *vcpu, bool activate)
598{
599 int ret = 0;
600 unsigned long flags;
601 struct amd_svm_iommu_ir *ir;
602 struct vcpu_svm *svm = to_svm(vcpu);
603
604 if (!kvm_arch_has_assigned_device(vcpu->kvm))
605 return 0;
606
607 /*
608 * Here, we go through the per-vcpu ir_list to update all existing
609 * interrupt remapping table entry targeting this vcpu.
610 */
611 spin_lock_irqsave(&svm->ir_list_lock, flags);
612
613 if (list_empty(&svm->ir_list))
614 goto out;
615
616 list_for_each_entry(ir, &svm->ir_list, node) {
617 if (activate)
618 ret = amd_iommu_activate_guest_mode(ir->data);
619 else
620 ret = amd_iommu_deactivate_guest_mode(ir->data);
621 if (ret)
622 break;
623 }
624out:
625 spin_unlock_irqrestore(&svm->ir_list_lock, flags);
626 return ret;
627}
628
629void svm_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
630{
631 struct vcpu_svm *svm = to_svm(vcpu);
Maxim Levitsky5868b822021-07-13 17:20:18 +0300632 struct vmcb *vmcb = svm->vmcb01.ptr;
Joerg Roedelef0f6492020-03-31 12:17:38 -0400633 bool activated = kvm_vcpu_apicv_active(vcpu);
634
Vitaly Kuznetsovfdf513e2021-06-09 17:09:08 +0200635 if (!enable_apicv)
Joerg Roedelef0f6492020-03-31 12:17:38 -0400636 return;
637
638 if (activated) {
639 /**
640 * During AVIC temporary deactivation, guest could update
641 * APIC ID, DFR and LDR registers, which would not be trapped
642 * by avic_unaccelerated_access_interception(). In this case,
643 * we need to check and update the AVIC logical APIC ID table
644 * accordingly before re-activating.
645 */
646 avic_post_state_restore(vcpu);
647 vmcb->control.int_ctl |= AVIC_ENABLE_MASK;
648 } else {
649 vmcb->control.int_ctl &= ~AVIC_ENABLE_MASK;
650 }
Joerg Roedel06e78522020-06-25 10:03:23 +0200651 vmcb_mark_dirty(vmcb, VMCB_AVIC);
Joerg Roedelef0f6492020-03-31 12:17:38 -0400652
Maxim Levitskydf7e4822021-08-10 23:52:50 +0300653 if (activated)
654 avic_vcpu_load(vcpu, vcpu->cpu);
655 else
656 avic_vcpu_put(vcpu);
657
Joerg Roedelef0f6492020-03-31 12:17:38 -0400658 svm_set_pi_irte_mode(vcpu, activated);
659}
660
661void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
662{
663 return;
664}
665
666int svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec)
667{
668 if (!vcpu->arch.apicv_active)
669 return -1;
670
671 kvm_lapic_set_irr(vec, vcpu->arch.apic);
Sean Christopherson31f251d2021-12-08 01:52:20 +0000672
673 /*
674 * Pairs with the smp_mb_*() after setting vcpu->guest_mode in
675 * vcpu_enter_guest() to ensure the write to the vIRR is ordered before
676 * the read of guest_mode, which guarantees that either VMRUN will see
677 * and process the new vIRR entry, or that the below code will signal
678 * the doorbell if the vCPU is already running in the guest.
679 */
Joerg Roedelef0f6492020-03-31 12:17:38 -0400680 smp_mb__after_atomic();
681
Sean Christopherson31f251d2021-12-08 01:52:20 +0000682 /*
683 * Signal the doorbell to tell hardware to inject the IRQ if the vCPU
684 * is in the guest. If the vCPU is not in the guest, hardware will
685 * automatically process AVIC interrupts at VMRUN.
686 */
687 if (vcpu->mode == IN_GUEST_MODE) {
Sean Christopherson91b01892021-10-08 19:11:55 -0700688 /*
Maxim Levitsky0a5f7842022-02-08 06:45:16 -0500689 * Signal the doorbell to tell hardware to inject the IRQ. If
690 * the vCPU exits the guest before the doorbell chimes, hardware
691 * will automatically process AVIC interrupts at the next VMRUN.
Sean Christopherson91b01892021-10-08 19:11:55 -0700692 */
Maxim Levitsky0a5f7842022-02-08 06:45:16 -0500693 avic_ring_doorbell(vcpu);
Sean Christopherson31f251d2021-12-08 01:52:20 +0000694 } else {
695 /*
696 * Wake the vCPU if it was blocking. KVM will then detect the
697 * pending IRQ when checking if the vCPU has a wake event.
698 */
Joerg Roedelef0f6492020-03-31 12:17:38 -0400699 kvm_vcpu_wake_up(vcpu);
Sean Christopherson31f251d2021-12-08 01:52:20 +0000700 }
Joerg Roedelef0f6492020-03-31 12:17:38 -0400701
702 return 0;
703}
704
705bool svm_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu)
706{
707 return false;
708}
709
710static void svm_ir_list_del(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
711{
712 unsigned long flags;
713 struct amd_svm_iommu_ir *cur;
714
715 spin_lock_irqsave(&svm->ir_list_lock, flags);
716 list_for_each_entry(cur, &svm->ir_list, node) {
717 if (cur->data != pi->ir_data)
718 continue;
719 list_del(&cur->node);
720 kfree(cur);
721 break;
722 }
723 spin_unlock_irqrestore(&svm->ir_list_lock, flags);
724}
725
726static int svm_ir_list_add(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
727{
728 int ret = 0;
729 unsigned long flags;
730 struct amd_svm_iommu_ir *ir;
731
732 /**
Ingo Molnar163b0992021-03-21 22:28:53 +0100733 * In some cases, the existing irte is updated and re-set,
Joerg Roedelef0f6492020-03-31 12:17:38 -0400734 * so we need to check here if it's already been * added
735 * to the ir_list.
736 */
737 if (pi->ir_data && (pi->prev_ga_tag != 0)) {
738 struct kvm *kvm = svm->vcpu.kvm;
739 u32 vcpu_id = AVIC_GATAG_TO_VCPUID(pi->prev_ga_tag);
740 struct kvm_vcpu *prev_vcpu = kvm_get_vcpu_by_id(kvm, vcpu_id);
741 struct vcpu_svm *prev_svm;
742
743 if (!prev_vcpu) {
744 ret = -EINVAL;
745 goto out;
746 }
747
748 prev_svm = to_svm(prev_vcpu);
749 svm_ir_list_del(prev_svm, pi);
750 }
751
752 /**
753 * Allocating new amd_iommu_pi_data, which will get
754 * add to the per-vcpu ir_list.
755 */
756 ir = kzalloc(sizeof(struct amd_svm_iommu_ir), GFP_KERNEL_ACCOUNT);
757 if (!ir) {
758 ret = -ENOMEM;
759 goto out;
760 }
761 ir->data = pi->ir_data;
762
763 spin_lock_irqsave(&svm->ir_list_lock, flags);
764 list_add(&ir->node, &svm->ir_list);
765 spin_unlock_irqrestore(&svm->ir_list_lock, flags);
766out:
767 return ret;
768}
769
ChenXiaoSong02ffbe62021-06-09 20:22:17 +0800770/*
Joerg Roedelef0f6492020-03-31 12:17:38 -0400771 * Note:
772 * The HW cannot support posting multicast/broadcast
773 * interrupts to a vCPU. So, we still use legacy interrupt
774 * remapping for these kind of interrupts.
775 *
776 * For lowest-priority interrupts, we only support
777 * those with single CPU as the destination, e.g. user
778 * configures the interrupts via /proc/irq or uses
779 * irqbalance to make the interrupts single-CPU.
780 */
781static int
782get_pi_vcpu_info(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e,
783 struct vcpu_data *vcpu_info, struct vcpu_svm **svm)
784{
785 struct kvm_lapic_irq irq;
786 struct kvm_vcpu *vcpu = NULL;
787
788 kvm_set_msi_irq(kvm, e, &irq);
789
790 if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu) ||
791 !kvm_irq_is_postable(&irq)) {
792 pr_debug("SVM: %s: use legacy intr remap mode for irq %u\n",
793 __func__, irq.vector);
794 return -1;
795 }
796
797 pr_debug("SVM: %s: use GA mode for irq %u\n", __func__,
798 irq.vector);
799 *svm = to_svm(vcpu);
800 vcpu_info->pi_desc_addr = __sme_set(page_to_phys((*svm)->avic_backing_page));
801 vcpu_info->vector = irq.vector;
802
803 return 0;
804}
805
806/*
807 * svm_update_pi_irte - set IRTE for Posted-Interrupts
808 *
809 * @kvm: kvm
810 * @host_irq: host irq of the interrupt
811 * @guest_irq: gsi of the interrupt
812 * @set: set or unset PI
813 * returns 0 on success, < 0 on failure
814 */
815int svm_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
816 uint32_t guest_irq, bool set)
817{
818 struct kvm_kernel_irq_routing_entry *e;
819 struct kvm_irq_routing_table *irq_rt;
820 int idx, ret = -EINVAL;
821
822 if (!kvm_arch_has_assigned_device(kvm) ||
823 !irq_remapping_cap(IRQ_POSTING_CAP))
824 return 0;
825
826 pr_debug("SVM: %s: host_irq=%#x, guest_irq=%#x, set=%#x\n",
827 __func__, host_irq, guest_irq, set);
828
829 idx = srcu_read_lock(&kvm->irq_srcu);
830 irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
831 WARN_ON(guest_irq >= irq_rt->nr_rt_entries);
832
833 hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) {
834 struct vcpu_data vcpu_info;
835 struct vcpu_svm *svm = NULL;
836
837 if (e->type != KVM_IRQ_ROUTING_MSI)
838 continue;
839
840 /**
841 * Here, we setup with legacy mode in the following cases:
842 * 1. When cannot target interrupt to a specific vcpu.
843 * 2. Unsetting posted interrupt.
Ingo Molnard9f6e122021-03-18 15:28:01 +0100844 * 3. APIC virtualization is disabled for the vcpu.
Joerg Roedelef0f6492020-03-31 12:17:38 -0400845 * 4. IRQ has incompatible delivery mode (SMI, INIT, etc)
846 */
847 if (!get_pi_vcpu_info(kvm, e, &vcpu_info, &svm) && set &&
848 kvm_vcpu_apicv_active(&svm->vcpu)) {
849 struct amd_iommu_pi_data pi;
850
851 /* Try to enable guest_mode in IRTE */
852 pi.base = __sme_set(page_to_phys(svm->avic_backing_page) &
853 AVIC_HPA_MASK);
854 pi.ga_tag = AVIC_GATAG(to_kvm_svm(kvm)->avic_vm_id,
855 svm->vcpu.vcpu_id);
856 pi.is_guest_mode = true;
857 pi.vcpu_data = &vcpu_info;
858 ret = irq_set_vcpu_affinity(host_irq, &pi);
859
860 /**
861 * Here, we successfully setting up vcpu affinity in
862 * IOMMU guest mode. Now, we need to store the posted
863 * interrupt information in a per-vcpu ir_list so that
864 * we can reference to them directly when we update vcpu
865 * scheduling information in IOMMU irte.
866 */
867 if (!ret && pi.is_guest_mode)
868 svm_ir_list_add(svm, &pi);
869 } else {
870 /* Use legacy mode in IRTE */
871 struct amd_iommu_pi_data pi;
872
873 /**
874 * Here, pi is used to:
875 * - Tell IOMMU to use legacy mode for this interrupt.
876 * - Retrieve ga_tag of prior interrupt remapping data.
877 */
Suravee Suthikulpanitf6426ab2020-10-03 23:27:07 +0000878 pi.prev_ga_tag = 0;
Joerg Roedelef0f6492020-03-31 12:17:38 -0400879 pi.is_guest_mode = false;
880 ret = irq_set_vcpu_affinity(host_irq, &pi);
881
882 /**
883 * Check if the posted interrupt was previously
884 * setup with the guest_mode by checking if the ga_tag
885 * was cached. If so, we need to clean up the per-vcpu
886 * ir_list.
887 */
888 if (!ret && pi.prev_ga_tag) {
889 int id = AVIC_GATAG_TO_VCPUID(pi.prev_ga_tag);
890 struct kvm_vcpu *vcpu;
891
892 vcpu = kvm_get_vcpu_by_id(kvm, id);
893 if (vcpu)
894 svm_ir_list_del(to_svm(vcpu), &pi);
895 }
896 }
897
898 if (!ret && svm) {
899 trace_kvm_pi_irte_update(host_irq, svm->vcpu.vcpu_id,
900 e->gsi, vcpu_info.vector,
901 vcpu_info.pi_desc_addr, set);
902 }
903
904 if (ret < 0) {
905 pr_err("%s: failed to update PI IRTE\n", __func__);
906 goto out;
907 }
908 }
909
910 ret = 0;
911out:
912 srcu_read_unlock(&kvm->irq_srcu, idx);
913 return ret;
914}
915
916bool svm_check_apicv_inhibit_reasons(ulong bit)
917{
918 ulong supported = BIT(APICV_INHIBIT_REASON_DISABLE) |
Paolo Bonzinief8b4b72021-11-30 07:37:45 -0500919 BIT(APICV_INHIBIT_REASON_ABSENT) |
Joerg Roedelef0f6492020-03-31 12:17:38 -0400920 BIT(APICV_INHIBIT_REASON_HYPERV) |
921 BIT(APICV_INHIBIT_REASON_NESTED) |
922 BIT(APICV_INHIBIT_REASON_IRQWIN) |
923 BIT(APICV_INHIBIT_REASON_PIT_REINJ) |
Maxim Levitskycae72dc2021-11-08 11:02:45 +0200924 BIT(APICV_INHIBIT_REASON_X2APIC) |
925 BIT(APICV_INHIBIT_REASON_BLOCKIRQ);
Joerg Roedelef0f6492020-03-31 12:17:38 -0400926
927 return supported & BIT(bit);
928}
929
Joerg Roedelef0f6492020-03-31 12:17:38 -0400930
931static inline int
932avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r)
933{
934 int ret = 0;
935 unsigned long flags;
936 struct amd_svm_iommu_ir *ir;
937 struct vcpu_svm *svm = to_svm(vcpu);
938
939 if (!kvm_arch_has_assigned_device(vcpu->kvm))
940 return 0;
941
942 /*
943 * Here, we go through the per-vcpu ir_list to update all existing
944 * interrupt remapping table entry targeting this vcpu.
945 */
946 spin_lock_irqsave(&svm->ir_list_lock, flags);
947
948 if (list_empty(&svm->ir_list))
949 goto out;
950
951 list_for_each_entry(ir, &svm->ir_list, node) {
952 ret = amd_iommu_update_ga(cpu, r, ir->data);
953 if (ret)
954 break;
955 }
956out:
957 spin_unlock_irqrestore(&svm->ir_list_lock, flags);
958 return ret;
959}
960
961void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
962{
963 u64 entry;
964 /* ID = 0xff (broadcast), ID > 0xff (reserved) */
965 int h_physical_id = kvm_cpu_get_apicid(cpu);
966 struct vcpu_svm *svm = to_svm(vcpu);
967
Sean Christopherson935a7332021-12-08 01:52:31 +0000968 lockdep_assert_preemption_disabled();
969
Joerg Roedelef0f6492020-03-31 12:17:38 -0400970 /*
971 * Since the host physical APIC id is 8 bits,
972 * we can support host APIC ID upto 255.
973 */
974 if (WARN_ON(h_physical_id > AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK))
975 return;
976
Sean Christopherson782f6452021-12-08 01:52:24 +0000977 /*
978 * No need to update anything if the vCPU is blocking, i.e. if the vCPU
979 * is being scheduled in after being preempted. The CPU entries in the
980 * Physical APIC table and IRTE are consumed iff IsRun{ning} is '1'.
981 * If the vCPU was migrated, its new CPU value will be stuffed when the
982 * vCPU unblocks.
983 */
984 if (kvm_vcpu_is_blocking(vcpu))
985 return;
986
Joerg Roedelef0f6492020-03-31 12:17:38 -0400987 entry = READ_ONCE(*(svm->avic_physical_id_cache));
988 WARN_ON(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK);
989
990 entry &= ~AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK;
991 entry |= (h_physical_id & AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK);
Sean Christopherson782f6452021-12-08 01:52:24 +0000992 entry |= AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
Joerg Roedelef0f6492020-03-31 12:17:38 -0400993
994 WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
Sean Christopherson782f6452021-12-08 01:52:24 +0000995 avic_update_iommu_vcpu_affinity(vcpu, h_physical_id, true);
Joerg Roedelef0f6492020-03-31 12:17:38 -0400996}
997
998void avic_vcpu_put(struct kvm_vcpu *vcpu)
999{
1000 u64 entry;
1001 struct vcpu_svm *svm = to_svm(vcpu);
1002
Sean Christopherson935a7332021-12-08 01:52:31 +00001003 lockdep_assert_preemption_disabled();
1004
Joerg Roedelef0f6492020-03-31 12:17:38 -04001005 entry = READ_ONCE(*(svm->avic_physical_id_cache));
Sean Christopherson782f6452021-12-08 01:52:24 +00001006
1007 /* Nothing to do if IsRunning == '0' due to vCPU blocking. */
1008 if (!(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK))
1009 return;
1010
1011 avic_update_iommu_vcpu_affinity(vcpu, -1, 0);
Joerg Roedelef0f6492020-03-31 12:17:38 -04001012
1013 entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
1014 WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
1015}
1016
Sean Christophersona3c19d52021-12-08 01:52:33 +00001017void avic_vcpu_blocking(struct kvm_vcpu *vcpu)
Joerg Roedelef0f6492020-03-31 12:17:38 -04001018{
Sean Christopherson935a7332021-12-08 01:52:31 +00001019 if (!kvm_vcpu_apicv_active(vcpu))
1020 return;
1021
1022 preempt_disable();
1023
1024 /*
1025 * Unload the AVIC when the vCPU is about to block, _before_
1026 * the vCPU actually blocks.
1027 *
1028 * Any IRQs that arrive before IsRunning=0 will not cause an
1029 * incomplete IPI vmexit on the source, therefore vIRR will also
1030 * be checked by kvm_vcpu_check_block() before blocking. The
1031 * memory barrier implicit in set_current_state orders writing
1032 * IsRunning=0 before reading the vIRR. The processor needs a
1033 * matching memory barrier on interrupt delivery between writing
1034 * IRR and reading IsRunning; the lack of this barrier might be
1035 * the cause of errata #1235).
1036 */
1037 avic_vcpu_put(vcpu);
1038
1039 preempt_enable();
Joerg Roedelef0f6492020-03-31 12:17:38 -04001040}
1041
Sean Christophersona3c19d52021-12-08 01:52:33 +00001042void avic_vcpu_unblocking(struct kvm_vcpu *vcpu)
Joerg Roedelef0f6492020-03-31 12:17:38 -04001043{
Sean Christopherson935a7332021-12-08 01:52:31 +00001044 int cpu;
1045
1046 if (!kvm_vcpu_apicv_active(vcpu))
1047 return;
1048
1049 cpu = get_cpu();
1050 WARN_ON(cpu != vcpu->cpu);
1051
1052 avic_vcpu_load(vcpu, cpu);
1053
1054 put_cpu();
Joerg Roedelef0f6492020-03-31 12:17:38 -04001055}