Blame - arch/x86/kvm/svm/avic.c - SHIFTPHONES/mainline/linux

blob: 3e55674098be08c7f983f2d1810d2908961da1b2 [file] [log] [blame]

Joerg Roedel	ef0f649	2020-03-31 12:17:38 -0400	[diff] [blame]	1	// SPDX-License-Identifier: GPL-2.0-only
				2	/*
				3	* Kernel-based Virtual Machine driver for Linux
				4	*
				5	* AMD SVM support
				6	*
				7	* Copyright (C) 2006 Qumranet, Inc.
				8	* Copyright 2010 Red Hat, Inc. and/or its affiliates.
				9	*
				10	* Authors:
				11	* Yaniv Kamay <yaniv@qumranet.com>
				12	* Avi Kivity <avi@qumranet.com>
				13	*/
				14
				15	#define pr_fmt(fmt) "SVM: " fmt
				16
				17	#include <linux/kvm_types.h>
				18	#include <linux/hashtable.h>
				19	#include <linux/amd-iommu.h>
				20	#include <linux/kvm_host.h>
				21
				22	#include <asm/irq_remapping.h>
				23
				24	#include "trace.h"
				25	#include "lapic.h"
				26	#include "x86.h"
				27	#include "irq.h"
				28	#include "svm.h"
				29
				30	/* enable / disable AVIC */
				31	int avic;
				32	#ifdef CONFIG_X86_LOCAL_APIC
				33	module_param(avic, int, S_IRUGO);
				34	#endif
				35
				36	#define SVM_AVIC_DOORBELL 0xc001011b
				37
				38	#define AVIC_HPA_MASK ~((0xFFFULL << 52) \| 0xFFF)
				39
				40	/*
				41	* 0xff is broadcast, so the max index allowed for physical APIC ID
				42	* table is 0xfe. APIC IDs above 0xff are reserved.
				43	*/
				44	#define AVIC_MAX_PHYSICAL_ID_COUNT 255
				45
				46	#define AVIC_UNACCEL_ACCESS_WRITE_MASK 1
				47	#define AVIC_UNACCEL_ACCESS_OFFSET_MASK 0xFF0
				48	#define AVIC_UNACCEL_ACCESS_VECTOR_MASK 0xFFFFFFFF
				49
				50	/* AVIC GATAG is encoded using VM and VCPU IDs */
				51	#define AVIC_VCPU_ID_BITS 8
				52	#define AVIC_VCPU_ID_MASK ((1 << AVIC_VCPU_ID_BITS) - 1)
				53
				54	#define AVIC_VM_ID_BITS 24
				55	#define AVIC_VM_ID_NR (1 << AVIC_VM_ID_BITS)
				56	#define AVIC_VM_ID_MASK ((1 << AVIC_VM_ID_BITS) - 1)
				57
				58	#define AVIC_GATAG(x, y) (((x & AVIC_VM_ID_MASK) << AVIC_VCPU_ID_BITS) \| \
				59	(y & AVIC_VCPU_ID_MASK))
				60	#define AVIC_GATAG_TO_VMID(x) ((x >> AVIC_VCPU_ID_BITS) & AVIC_VM_ID_MASK)
				61	#define AVIC_GATAG_TO_VCPUID(x) (x & AVIC_VCPU_ID_MASK)
				62
				63	/* Note:
				64	* This hash table is used to map VM_ID to a struct kvm_svm,
				65	* when handling AMD IOMMU GALOG notification to schedule in
				66	* a particular vCPU.
				67	*/
				68	#define SVM_VM_DATA_HASH_BITS 8
				69	static DEFINE_HASHTABLE(svm_vm_data_hash, SVM_VM_DATA_HASH_BITS);
				70	static u32 next_vm_id = 0;
				71	static bool next_vm_id_wrapped = 0;
				72	static DEFINE_SPINLOCK(svm_vm_data_hash_lock);
				73
				74	/*
				75	* This is a wrapper of struct amd_iommu_ir_data.
				76	*/
				77	struct amd_svm_iommu_ir {
				78	struct list_head node; /* Used by SVM for per-vcpu ir_list */
				79	void data; / Storing pointer to struct amd_ir_data */
				80	};
				81
				82	enum avic_ipi_failure_cause {
				83	AVIC_IPI_FAILURE_INVALID_INT_TYPE,
				84	AVIC_IPI_FAILURE_TARGET_NOT_RUNNING,
				85	AVIC_IPI_FAILURE_INVALID_TARGET,
				86	AVIC_IPI_FAILURE_INVALID_BACKING_PAGE,
				87	};
				88
				89	/* Note:
				90	* This function is called from IOMMU driver to notify
				91	* SVM to schedule in a particular vCPU of a particular VM.
				92	*/
				93	int avic_ga_log_notifier(u32 ga_tag)
				94	{
				95	unsigned long flags;
				96	struct kvm_svm *kvm_svm;
				97	struct kvm_vcpu *vcpu = NULL;
				98	u32 vm_id = AVIC_GATAG_TO_VMID(ga_tag);
				99	u32 vcpu_id = AVIC_GATAG_TO_VCPUID(ga_tag);
				100
				101	pr_debug("SVM: %s: vm_id=%#x, vcpu_id=%#x\n", __func__, vm_id, vcpu_id);
				102	trace_kvm_avic_ga_log(vm_id, vcpu_id);
				103
				104	spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
				105	hash_for_each_possible(svm_vm_data_hash, kvm_svm, hnode, vm_id) {
				106	if (kvm_svm->avic_vm_id != vm_id)
				107	continue;
				108	vcpu = kvm_get_vcpu_by_id(&kvm_svm->kvm, vcpu_id);
				109	break;
				110	}
				111	spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);
				112
				113	/* Note:
				114	* At this point, the IOMMU should have already set the pending
				115	* bit in the vAPIC backing page. So, we just need to schedule
				116	* in the vcpu.
				117	*/
				118	if (vcpu)
				119	kvm_vcpu_wake_up(vcpu);
				120
				121	return 0;
				122	}
				123
				124	void avic_vm_destroy(struct kvm *kvm)
				125	{
				126	unsigned long flags;
				127	struct kvm_svm *kvm_svm = to_kvm_svm(kvm);
				128
				129	if (!avic)
				130	return;
				131
				132	if (kvm_svm->avic_logical_id_table_page)
				133	__free_page(kvm_svm->avic_logical_id_table_page);
				134	if (kvm_svm->avic_physical_id_table_page)
				135	__free_page(kvm_svm->avic_physical_id_table_page);
				136
				137	spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
				138	hash_del(&kvm_svm->hnode);
				139	spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);
				140	}
				141
				142	int avic_vm_init(struct kvm *kvm)
				143	{
				144	unsigned long flags;
				145	int err = -ENOMEM;
				146	struct kvm_svm *kvm_svm = to_kvm_svm(kvm);
				147	struct kvm_svm *k2;
				148	struct page *p_page;
				149	struct page *l_page;
				150	u32 vm_id;
				151
				152	if (!avic)
				153	return 0;
				154
				155	/* Allocating physical APIC ID table (4KB) */
Haiwei Li	ae5a2a3	2020-09-16 16:36:21 +0800	[diff] [blame]	156	p_page = alloc_page(GFP_KERNEL_ACCOUNT \| __GFP_ZERO);
Joerg Roedel	ef0f649	2020-03-31 12:17:38 -0400	[diff] [blame]	157	if (!p_page)
				158	goto free_avic;
				159
				160	kvm_svm->avic_physical_id_table_page = p_page;
Joerg Roedel	ef0f649	2020-03-31 12:17:38 -0400	[diff] [blame]	161
				162	/* Allocating logical APIC ID table (4KB) */
Haiwei Li	ae5a2a3	2020-09-16 16:36:21 +0800	[diff] [blame]	163	l_page = alloc_page(GFP_KERNEL_ACCOUNT \| __GFP_ZERO);
Joerg Roedel	ef0f649	2020-03-31 12:17:38 -0400	[diff] [blame]	164	if (!l_page)
				165	goto free_avic;
				166
				167	kvm_svm->avic_logical_id_table_page = l_page;
Joerg Roedel	ef0f649	2020-03-31 12:17:38 -0400	[diff] [blame]	168
				169	spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
				170	again:
				171	vm_id = next_vm_id = (next_vm_id + 1) & AVIC_VM_ID_MASK;
				172	if (vm_id == 0) { /* id is 1-based, zero is not okay */
				173	next_vm_id_wrapped = 1;
				174	goto again;
				175	}
				176	/* Is it still in use? Only possible if wrapped at least once */
				177	if (next_vm_id_wrapped) {
				178	hash_for_each_possible(svm_vm_data_hash, k2, hnode, vm_id) {
				179	if (k2->avic_vm_id == vm_id)
				180	goto again;
				181	}
				182	}
				183	kvm_svm->avic_vm_id = vm_id;
				184	hash_add(svm_vm_data_hash, &kvm_svm->hnode, kvm_svm->avic_vm_id);
				185	spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);
				186
				187	return 0;
				188
				189	free_avic:
				190	avic_vm_destroy(kvm);
				191	return err;
				192	}
				193
				194	void avic_init_vmcb(struct vcpu_svm *svm)
				195	{
				196	struct vmcb *vmcb = svm->vmcb;
				197	struct kvm_svm *kvm_svm = to_kvm_svm(svm->vcpu.kvm);
				198	phys_addr_t bpa = __sme_set(page_to_phys(svm->avic_backing_page));
				199	phys_addr_t lpa = __sme_set(page_to_phys(kvm_svm->avic_logical_id_table_page));
				200	phys_addr_t ppa = __sme_set(page_to_phys(kvm_svm->avic_physical_id_table_page));
				201
				202	vmcb->control.avic_backing_page = bpa & AVIC_HPA_MASK;
				203	vmcb->control.avic_logical_id = lpa & AVIC_HPA_MASK;
				204	vmcb->control.avic_physical_id = ppa & AVIC_HPA_MASK;
				205	vmcb->control.avic_physical_id \|= AVIC_MAX_PHYSICAL_ID_COUNT;
				206	if (kvm_apicv_activated(svm->vcpu.kvm))
				207	vmcb->control.int_ctl \|= AVIC_ENABLE_MASK;
				208	else
				209	vmcb->control.int_ctl &= ~AVIC_ENABLE_MASK;
				210	}
				211
				212	static u64 avic_get_physical_id_entry(struct kvm_vcpu vcpu,
				213	unsigned int index)
				214	{
				215	u64 *avic_physical_id_table;
				216	struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm);
				217
				218	if (index >= AVIC_MAX_PHYSICAL_ID_COUNT)
				219	return NULL;
				220
				221	avic_physical_id_table = page_address(kvm_svm->avic_physical_id_table_page);
				222
				223	return &avic_physical_id_table[index];
				224	}
				225
				226	/**
				227	* Note:
				228	* AVIC hardware walks the nested page table to check permissions,
				229	* but does not use the SPA address specified in the leaf page
				230	* table entry since it uses address in the AVIC_BACKING_PAGE pointer
				231	* field of the VMCB. Therefore, we set up the
				232	* APIC_ACCESS_PAGE_PRIVATE_MEMSLOT (4KB) here.
				233	*/
				234	static int avic_update_access_page(struct kvm *kvm, bool activate)
				235	{
Peter Xu	ff5a983	2020-09-30 21:20:33 -0400	[diff] [blame]	236	void __user *ret;
				237	int r = 0;
Joerg Roedel	ef0f649	2020-03-31 12:17:38 -0400	[diff] [blame]	238
				239	mutex_lock(&kvm->slots_lock);
				240	/*
				241	* During kvm_destroy_vm(), kvm_pit_set_reinject() could trigger
				242	* APICv mode change, which update APIC_ACCESS_PAGE_PRIVATE_MEMSLOT
				243	* memory region. So, we need to ensure that kvm->mm == current->mm.
				244	*/
				245	if ((kvm->arch.apic_access_page_done == activate) \|\|
				246	(kvm->mm != current->mm))
				247	goto out;
				248
				249	ret = __x86_set_memory_region(kvm,
				250	APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
				251	APIC_DEFAULT_PHYS_BASE,
				252	activate ? PAGE_SIZE : 0);
Peter Xu	ff5a983	2020-09-30 21:20:33 -0400	[diff] [blame]	253	if (IS_ERR(ret)) {
				254	r = PTR_ERR(ret);
Joerg Roedel	ef0f649	2020-03-31 12:17:38 -0400	[diff] [blame]	255	goto out;
Peter Xu	ff5a983	2020-09-30 21:20:33 -0400	[diff] [blame]	256	}
Joerg Roedel	ef0f649	2020-03-31 12:17:38 -0400	[diff] [blame]	257
				258	kvm->arch.apic_access_page_done = activate;
				259	out:
				260	mutex_unlock(&kvm->slots_lock);
Peter Xu	ff5a983	2020-09-30 21:20:33 -0400	[diff] [blame]	261	return r;
Joerg Roedel	ef0f649	2020-03-31 12:17:38 -0400	[diff] [blame]	262	}
				263
				264	static int avic_init_backing_page(struct kvm_vcpu *vcpu)
				265	{
				266	u64 *entry, new_entry;
				267	int id = vcpu->vcpu_id;
				268	struct vcpu_svm *svm = to_svm(vcpu);
				269
				270	if (id >= AVIC_MAX_PHYSICAL_ID_COUNT)
				271	return -EINVAL;
				272
				273	if (!svm->vcpu.arch.apic->regs)
				274	return -EINVAL;
				275
				276	if (kvm_apicv_activated(vcpu->kvm)) {
				277	int ret;
				278
				279	ret = avic_update_access_page(vcpu->kvm, true);
				280	if (ret)
				281	return ret;
				282	}
				283
				284	svm->avic_backing_page = virt_to_page(svm->vcpu.arch.apic->regs);
				285
				286	/* Setting AVIC backing page address in the phy APIC ID table */
				287	entry = avic_get_physical_id_entry(vcpu, id);
				288	if (!entry)
				289	return -EINVAL;
				290
				291	new_entry = __sme_set((page_to_phys(svm->avic_backing_page) &
				292	AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK) \|
				293	AVIC_PHYSICAL_ID_ENTRY_VALID_MASK);
				294	WRITE_ONCE(*entry, new_entry);
				295
				296	svm->avic_physical_id_cache = entry;
				297
				298	return 0;
				299	}
				300
Sean Christopherson	e6c804a	2021-02-04 16:57:42 -0800	[diff] [blame]	301	static void avic_kick_target_vcpus(struct kvm kvm, struct kvm_lapic source,
				302	u32 icrl, u32 icrh)
				303	{
				304	struct kvm_vcpu *vcpu;
				305	int i;
				306
				307	kvm_for_each_vcpu(i, vcpu, kvm) {
				308	bool m = kvm_apic_match_dest(vcpu, source,
				309	icrl & APIC_SHORT_MASK,
				310	GET_APIC_DEST_FIELD(icrh),
				311	icrl & APIC_DEST_MASK);
				312
				313	if (m && !avic_vcpu_is_running(vcpu))
				314	kvm_vcpu_wake_up(vcpu);
				315	}
				316	}
				317
Joerg Roedel	ef0f649	2020-03-31 12:17:38 -0400	[diff] [blame]	318	int avic_incomplete_ipi_interception(struct vcpu_svm *svm)
				319	{
				320	u32 icrh = svm->vmcb->control.exit_info_1 >> 32;
				321	u32 icrl = svm->vmcb->control.exit_info_1;
				322	u32 id = svm->vmcb->control.exit_info_2 >> 32;
				323	u32 index = svm->vmcb->control.exit_info_2 & 0xFF;
				324	struct kvm_lapic *apic = svm->vcpu.arch.apic;
				325
				326	trace_kvm_avic_incomplete_ipi(svm->vcpu.vcpu_id, icrh, icrl, id, index);
				327
				328	switch (id) {
				329	case AVIC_IPI_FAILURE_INVALID_INT_TYPE:
				330	/*
				331	* AVIC hardware handles the generation of
				332	* IPIs when the specified Message Type is Fixed
				333	* (also known as fixed delivery mode) and
				334	* the Trigger Mode is edge-triggered. The hardware
				335	* also supports self and broadcast delivery modes
				336	* specified via the Destination Shorthand(DSH)
				337	* field of the ICRL. Logical and physical APIC ID
				338	* formats are supported. All other IPI types cause
				339	* a #VMEXIT, which needs to emulated.
				340	*/
				341	kvm_lapic_reg_write(apic, APIC_ICR2, icrh);
				342	kvm_lapic_reg_write(apic, APIC_ICR, icrl);
				343	break;
Sean Christopherson	e6c804a	2021-02-04 16:57:42 -0800	[diff] [blame]	344	case AVIC_IPI_FAILURE_TARGET_NOT_RUNNING:
Joerg Roedel	ef0f649	2020-03-31 12:17:38 -0400	[diff] [blame]	345	/*
				346	* At this point, we expect that the AVIC HW has already
				347	* set the appropriate IRR bits on the valid target
				348	* vcpus. So, we just need to kick the appropriate vcpu.
				349	*/
Sean Christopherson	e6c804a	2021-02-04 16:57:42 -0800	[diff] [blame]	350	avic_kick_target_vcpus(svm->vcpu.kvm, apic, icrl, icrh);
Joerg Roedel	ef0f649	2020-03-31 12:17:38 -0400	[diff] [blame]	351	break;
Joerg Roedel	ef0f649	2020-03-31 12:17:38 -0400	[diff] [blame]	352	case AVIC_IPI_FAILURE_INVALID_TARGET:
				353	WARN_ONCE(1, "Invalid IPI target: index=%u, vcpu=%d, icr=%#0x:%#0x\n",
				354	index, svm->vcpu.vcpu_id, icrh, icrl);
				355	break;
				356	case AVIC_IPI_FAILURE_INVALID_BACKING_PAGE:
				357	WARN_ONCE(1, "Invalid backing page\n");
				358	break;
				359	default:
				360	pr_err("Unknown IPI interception\n");
				361	}
				362
				363	return 1;
				364	}
				365
				366	static u32 avic_get_logical_id_entry(struct kvm_vcpu vcpu, u32 ldr, bool flat)
				367	{
				368	struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm);
				369	int index;
				370	u32 *logical_apic_id_table;
				371	int dlid = GET_APIC_LOGICAL_ID(ldr);
				372
				373	if (!dlid)
				374	return NULL;
				375
				376	if (flat) { /* flat */
				377	index = ffs(dlid) - 1;
				378	if (index > 7)
				379	return NULL;
				380	} else { /* cluster */
				381	int cluster = (dlid & 0xf0) >> 4;
				382	int apic = ffs(dlid & 0x0f) - 1;
				383
				384	if ((apic < 0) \|\| (apic > 7) \|\|
				385	(cluster >= 0xf))
				386	return NULL;
				387	index = (cluster << 2) + apic;
				388	}
				389
				390	logical_apic_id_table = (u32 *) page_address(kvm_svm->avic_logical_id_table_page);
				391
				392	return &logical_apic_id_table[index];
				393	}
				394
				395	static int avic_ldr_write(struct kvm_vcpu *vcpu, u8 g_physical_id, u32 ldr)
				396	{
				397	bool flat;
				398	u32 *entry, new_entry;
				399
				400	flat = kvm_lapic_get_reg(vcpu->arch.apic, APIC_DFR) == APIC_DFR_FLAT;
				401	entry = avic_get_logical_id_entry(vcpu, ldr, flat);
				402	if (!entry)
				403	return -EINVAL;
				404
				405	new_entry = READ_ONCE(*entry);
				406	new_entry &= ~AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK;
				407	new_entry \|= (g_physical_id & AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK);
				408	new_entry \|= AVIC_LOGICAL_ID_ENTRY_VALID_MASK;
				409	WRITE_ONCE(*entry, new_entry);
				410
				411	return 0;
				412	}
				413
				414	static void avic_invalidate_logical_id_entry(struct kvm_vcpu *vcpu)
				415	{
				416	struct vcpu_svm *svm = to_svm(vcpu);
				417	bool flat = svm->dfr_reg == APIC_DFR_FLAT;
				418	u32 *entry = avic_get_logical_id_entry(vcpu, svm->ldr_reg, flat);
				419
				420	if (entry)
				421	clear_bit(AVIC_LOGICAL_ID_ENTRY_VALID_BIT, (unsigned long *)entry);
				422	}
				423
				424	static int avic_handle_ldr_update(struct kvm_vcpu *vcpu)
				425	{
				426	int ret = 0;
				427	struct vcpu_svm *svm = to_svm(vcpu);
				428	u32 ldr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_LDR);
				429	u32 id = kvm_xapic_id(vcpu->arch.apic);
				430
				431	if (ldr == svm->ldr_reg)
				432	return 0;
				433
				434	avic_invalidate_logical_id_entry(vcpu);
				435
				436	if (ldr)
				437	ret = avic_ldr_write(vcpu, id, ldr);
				438
				439	if (!ret)
				440	svm->ldr_reg = ldr;
				441
				442	return ret;
				443	}
				444
				445	static int avic_handle_apic_id_update(struct kvm_vcpu *vcpu)
				446	{
				447	u64 old, new;
				448	struct vcpu_svm *svm = to_svm(vcpu);
				449	u32 id = kvm_xapic_id(vcpu->arch.apic);
				450
				451	if (vcpu->vcpu_id == id)
				452	return 0;
				453
				454	old = avic_get_physical_id_entry(vcpu, vcpu->vcpu_id);
				455	new = avic_get_physical_id_entry(vcpu, id);
				456	if (!new \|\| !old)
				457	return 1;
				458
				459	/* We need to move physical_id_entry to new offset */
				460	new = old;
				461	*old = 0ULL;
				462	to_svm(vcpu)->avic_physical_id_cache = new;
				463
				464	/*
				465	* Also update the guest physical APIC ID in the logical
				466	* APIC ID table entry if already setup the LDR.
				467	*/
				468	if (svm->ldr_reg)
				469	avic_handle_ldr_update(vcpu);
				470
				471	return 0;
				472	}
				473
				474	static void avic_handle_dfr_update(struct kvm_vcpu *vcpu)
				475	{
				476	struct vcpu_svm *svm = to_svm(vcpu);
				477	u32 dfr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_DFR);
				478
				479	if (svm->dfr_reg == dfr)
				480	return;
				481
				482	avic_invalidate_logical_id_entry(vcpu);
				483	svm->dfr_reg = dfr;
				484	}
				485
				486	static int avic_unaccel_trap_write(struct vcpu_svm *svm)
				487	{
				488	struct kvm_lapic *apic = svm->vcpu.arch.apic;
				489	u32 offset = svm->vmcb->control.exit_info_1 &
				490	AVIC_UNACCEL_ACCESS_OFFSET_MASK;
				491
				492	switch (offset) {
				493	case APIC_ID:
				494	if (avic_handle_apic_id_update(&svm->vcpu))
				495	return 0;
				496	break;
				497	case APIC_LDR:
				498	if (avic_handle_ldr_update(&svm->vcpu))
				499	return 0;
				500	break;
				501	case APIC_DFR:
				502	avic_handle_dfr_update(&svm->vcpu);
				503	break;
				504	default:
				505	break;
				506	}
				507
				508	kvm_lapic_reg_write(apic, offset, kvm_lapic_get_reg(apic, offset));
				509
				510	return 1;
				511	}
				512
				513	static bool is_avic_unaccelerated_access_trap(u32 offset)
				514	{
				515	bool ret = false;
				516
				517	switch (offset) {
				518	case APIC_ID:
				519	case APIC_EOI:
				520	case APIC_RRR:
				521	case APIC_LDR:
				522	case APIC_DFR:
				523	case APIC_SPIV:
				524	case APIC_ESR:
				525	case APIC_ICR:
				526	case APIC_LVTT:
				527	case APIC_LVTTHMR:
				528	case APIC_LVTPC:
				529	case APIC_LVT0:
				530	case APIC_LVT1:
				531	case APIC_LVTERR:
				532	case APIC_TMICT:
				533	case APIC_TDCR:
				534	ret = true;
				535	break;
				536	default:
				537	break;
				538	}
				539	return ret;
				540	}
				541
				542	int avic_unaccelerated_access_interception(struct vcpu_svm *svm)
				543	{
				544	int ret = 0;
				545	u32 offset = svm->vmcb->control.exit_info_1 &
				546	AVIC_UNACCEL_ACCESS_OFFSET_MASK;
				547	u32 vector = svm->vmcb->control.exit_info_2 &
				548	AVIC_UNACCEL_ACCESS_VECTOR_MASK;
				549	bool write = (svm->vmcb->control.exit_info_1 >> 32) &
				550	AVIC_UNACCEL_ACCESS_WRITE_MASK;
				551	bool trap = is_avic_unaccelerated_access_trap(offset);
				552
				553	trace_kvm_avic_unaccelerated_access(svm->vcpu.vcpu_id, offset,
				554	trap, write, vector);
				555	if (trap) {
				556	/* Handling Trap */
				557	WARN_ONCE(!write, "svm: Handling trap read.\n");
				558	ret = avic_unaccel_trap_write(svm);
				559	} else {
				560	/* Handling Fault */
				561	ret = kvm_emulate_instruction(&svm->vcpu, 0);
				562	}
				563
				564	return ret;
				565	}
				566
				567	int avic_init_vcpu(struct vcpu_svm *svm)
				568	{
				569	int ret;
				570	struct kvm_vcpu *vcpu = &svm->vcpu;
				571
				572	if (!avic \|\| !irqchip_in_kernel(vcpu->kvm))
				573	return 0;
				574
				575	ret = avic_init_backing_page(&svm->vcpu);
				576	if (ret)
				577	return ret;
				578
				579	INIT_LIST_HEAD(&svm->ir_list);
				580	spin_lock_init(&svm->ir_list_lock);
				581	svm->dfr_reg = APIC_DFR_FLAT;
				582
				583	return ret;
				584	}
				585
				586	void avic_post_state_restore(struct kvm_vcpu *vcpu)
				587	{
				588	if (avic_handle_apic_id_update(vcpu) != 0)
				589	return;
				590	avic_handle_dfr_update(vcpu);
				591	avic_handle_ldr_update(vcpu);
				592	}
				593
				594	void svm_toggle_avic_for_irq_window(struct kvm_vcpu *vcpu, bool activate)
				595	{
				596	if (!avic \|\| !lapic_in_kernel(vcpu))
				597	return;
				598
				599	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
				600	kvm_request_apicv_update(vcpu->kvm, activate,
				601	APICV_INHIBIT_REASON_IRQWIN);
				602	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
				603	}
				604
				605	void svm_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
				606	{
				607	return;
				608	}
				609
				610	void svm_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr)
				611	{
				612	}
				613
				614	void svm_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr)
				615	{
				616	}
				617
				618	static int svm_set_pi_irte_mode(struct kvm_vcpu *vcpu, bool activate)
				619	{
				620	int ret = 0;
				621	unsigned long flags;
				622	struct amd_svm_iommu_ir *ir;
				623	struct vcpu_svm *svm = to_svm(vcpu);
				624
				625	if (!kvm_arch_has_assigned_device(vcpu->kvm))
				626	return 0;
				627
				628	/*
				629	* Here, we go through the per-vcpu ir_list to update all existing
				630	* interrupt remapping table entry targeting this vcpu.
				631	*/
				632	spin_lock_irqsave(&svm->ir_list_lock, flags);
				633
				634	if (list_empty(&svm->ir_list))
				635	goto out;
				636
				637	list_for_each_entry(ir, &svm->ir_list, node) {
				638	if (activate)
				639	ret = amd_iommu_activate_guest_mode(ir->data);
				640	else
				641	ret = amd_iommu_deactivate_guest_mode(ir->data);
				642	if (ret)
				643	break;
				644	}
				645	out:
				646	spin_unlock_irqrestore(&svm->ir_list_lock, flags);
				647	return ret;
				648	}
				649
				650	void svm_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
				651	{
				652	struct vcpu_svm *svm = to_svm(vcpu);
				653	struct vmcb *vmcb = svm->vmcb;
				654	bool activated = kvm_vcpu_apicv_active(vcpu);
				655
				656	if (!avic)
				657	return;
				658
				659	if (activated) {
				660	/**
				661	* During AVIC temporary deactivation, guest could update
				662	* APIC ID, DFR and LDR registers, which would not be trapped
				663	* by avic_unaccelerated_access_interception(). In this case,
				664	* we need to check and update the AVIC logical APIC ID table
				665	* accordingly before re-activating.
				666	*/
				667	avic_post_state_restore(vcpu);
				668	vmcb->control.int_ctl \|= AVIC_ENABLE_MASK;
				669	} else {
				670	vmcb->control.int_ctl &= ~AVIC_ENABLE_MASK;
				671	}
Joerg Roedel	06e7852	2020-06-25 10:03:23 +0200	[diff] [blame]	672	vmcb_mark_dirty(vmcb, VMCB_AVIC);
Joerg Roedel	ef0f649	2020-03-31 12:17:38 -0400	[diff] [blame]	673
				674	svm_set_pi_irte_mode(vcpu, activated);
				675	}
				676
				677	void svm_load_eoi_exitmap(struct kvm_vcpu vcpu, u64 eoi_exit_bitmap)
				678	{
				679	return;
				680	}
				681
				682	int svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec)
				683	{
				684	if (!vcpu->arch.apicv_active)
				685	return -1;
				686
				687	kvm_lapic_set_irr(vec, vcpu->arch.apic);
				688	smp_mb__after_atomic();
				689
				690	if (avic_vcpu_is_running(vcpu)) {
				691	int cpuid = vcpu->cpu;
				692
				693	if (cpuid != get_cpu())
				694	wrmsrl(SVM_AVIC_DOORBELL, kvm_cpu_get_apicid(cpuid));
				695	put_cpu();
				696	} else
				697	kvm_vcpu_wake_up(vcpu);
				698
				699	return 0;
				700	}
				701
				702	bool svm_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu)
				703	{
				704	return false;
				705	}
				706
				707	static void svm_ir_list_del(struct vcpu_svm svm, struct amd_iommu_pi_data pi)
				708	{
				709	unsigned long flags;
				710	struct amd_svm_iommu_ir *cur;
				711
				712	spin_lock_irqsave(&svm->ir_list_lock, flags);
				713	list_for_each_entry(cur, &svm->ir_list, node) {
				714	if (cur->data != pi->ir_data)
				715	continue;
				716	list_del(&cur->node);
				717	kfree(cur);
				718	break;
				719	}
				720	spin_unlock_irqrestore(&svm->ir_list_lock, flags);
				721	}
				722
				723	static int svm_ir_list_add(struct vcpu_svm svm, struct amd_iommu_pi_data pi)
				724	{
				725	int ret = 0;
				726	unsigned long flags;
				727	struct amd_svm_iommu_ir *ir;
				728
				729	/**
Ingo Molnar	163b099	2021-03-21 22:28:53 +0100	[diff] [blame^]	730	* In some cases, the existing irte is updated and re-set,
Joerg Roedel	ef0f649	2020-03-31 12:17:38 -0400	[diff] [blame]	731	* so we need to check here if it's already been * added
				732	* to the ir_list.
				733	*/
				734	if (pi->ir_data && (pi->prev_ga_tag != 0)) {
				735	struct kvm *kvm = svm->vcpu.kvm;
				736	u32 vcpu_id = AVIC_GATAG_TO_VCPUID(pi->prev_ga_tag);
				737	struct kvm_vcpu *prev_vcpu = kvm_get_vcpu_by_id(kvm, vcpu_id);
				738	struct vcpu_svm *prev_svm;
				739
				740	if (!prev_vcpu) {
				741	ret = -EINVAL;
				742	goto out;
				743	}
				744
				745	prev_svm = to_svm(prev_vcpu);
				746	svm_ir_list_del(prev_svm, pi);
				747	}
				748
				749	/**
				750	* Allocating new amd_iommu_pi_data, which will get
				751	* add to the per-vcpu ir_list.
				752	*/
				753	ir = kzalloc(sizeof(struct amd_svm_iommu_ir), GFP_KERNEL_ACCOUNT);
				754	if (!ir) {
				755	ret = -ENOMEM;
				756	goto out;
				757	}
				758	ir->data = pi->ir_data;
				759
				760	spin_lock_irqsave(&svm->ir_list_lock, flags);
				761	list_add(&ir->node, &svm->ir_list);
				762	spin_unlock_irqrestore(&svm->ir_list_lock, flags);
				763	out:
				764	return ret;
				765	}
				766
				767	/**
				768	* Note:
				769	* The HW cannot support posting multicast/broadcast
				770	* interrupts to a vCPU. So, we still use legacy interrupt
				771	* remapping for these kind of interrupts.
				772	*
				773	* For lowest-priority interrupts, we only support
				774	* those with single CPU as the destination, e.g. user
				775	* configures the interrupts via /proc/irq or uses
				776	* irqbalance to make the interrupts single-CPU.
				777	*/
				778	static int
				779	get_pi_vcpu_info(struct kvm kvm, struct kvm_kernel_irq_routing_entry e,
				780	struct vcpu_data vcpu_info, struct vcpu_svm *svm)
				781	{
				782	struct kvm_lapic_irq irq;
				783	struct kvm_vcpu *vcpu = NULL;
				784
				785	kvm_set_msi_irq(kvm, e, &irq);
				786
				787	if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu) \|\|
				788	!kvm_irq_is_postable(&irq)) {
				789	pr_debug("SVM: %s: use legacy intr remap mode for irq %u\n",
				790	__func__, irq.vector);
				791	return -1;
				792	}
				793
				794	pr_debug("SVM: %s: use GA mode for irq %u\n", __func__,
				795	irq.vector);
				796	*svm = to_svm(vcpu);
				797	vcpu_info->pi_desc_addr = __sme_set(page_to_phys((*svm)->avic_backing_page));
				798	vcpu_info->vector = irq.vector;
				799
				800	return 0;
				801	}
				802
				803	/*
				804	* svm_update_pi_irte - set IRTE for Posted-Interrupts
				805	*
				806	* @kvm: kvm
				807	* @host_irq: host irq of the interrupt
				808	* @guest_irq: gsi of the interrupt
				809	* @set: set or unset PI
				810	* returns 0 on success, < 0 on failure
				811	*/
				812	int svm_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
				813	uint32_t guest_irq, bool set)
				814	{
				815	struct kvm_kernel_irq_routing_entry *e;
				816	struct kvm_irq_routing_table *irq_rt;
				817	int idx, ret = -EINVAL;
				818
				819	if (!kvm_arch_has_assigned_device(kvm) \|\|
				820	!irq_remapping_cap(IRQ_POSTING_CAP))
				821	return 0;
				822
				823	pr_debug("SVM: %s: host_irq=%#x, guest_irq=%#x, set=%#x\n",
				824	__func__, host_irq, guest_irq, set);
				825
				826	idx = srcu_read_lock(&kvm->irq_srcu);
				827	irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
				828	WARN_ON(guest_irq >= irq_rt->nr_rt_entries);
				829
				830	hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) {
				831	struct vcpu_data vcpu_info;
				832	struct vcpu_svm *svm = NULL;
				833
				834	if (e->type != KVM_IRQ_ROUTING_MSI)
				835	continue;
				836
				837	/**
				838	* Here, we setup with legacy mode in the following cases:
				839	* 1. When cannot target interrupt to a specific vcpu.
				840	* 2. Unsetting posted interrupt.
Ingo Molnar	d9f6e12	2021-03-18 15:28:01 +0100	[diff] [blame]	841	* 3. APIC virtualization is disabled for the vcpu.
Joerg Roedel	ef0f649	2020-03-31 12:17:38 -0400	[diff] [blame]	842	* 4. IRQ has incompatible delivery mode (SMI, INIT, etc)
				843	*/
				844	if (!get_pi_vcpu_info(kvm, e, &vcpu_info, &svm) && set &&
				845	kvm_vcpu_apicv_active(&svm->vcpu)) {
				846	struct amd_iommu_pi_data pi;
				847
				848	/* Try to enable guest_mode in IRTE */
				849	pi.base = __sme_set(page_to_phys(svm->avic_backing_page) &
				850	AVIC_HPA_MASK);
				851	pi.ga_tag = AVIC_GATAG(to_kvm_svm(kvm)->avic_vm_id,
				852	svm->vcpu.vcpu_id);
				853	pi.is_guest_mode = true;
				854	pi.vcpu_data = &vcpu_info;
				855	ret = irq_set_vcpu_affinity(host_irq, &pi);
				856
				857	/**
				858	* Here, we successfully setting up vcpu affinity in
				859	* IOMMU guest mode. Now, we need to store the posted
				860	* interrupt information in a per-vcpu ir_list so that
				861	* we can reference to them directly when we update vcpu
				862	* scheduling information in IOMMU irte.
				863	*/
				864	if (!ret && pi.is_guest_mode)
				865	svm_ir_list_add(svm, &pi);
				866	} else {
				867	/* Use legacy mode in IRTE */
				868	struct amd_iommu_pi_data pi;
				869
				870	/**
				871	* Here, pi is used to:
				872	* - Tell IOMMU to use legacy mode for this interrupt.
				873	* - Retrieve ga_tag of prior interrupt remapping data.
				874	*/
Suravee Suthikulpanit	f6426ab	2020-10-03 23:27:07 +0000	[diff] [blame]	875	pi.prev_ga_tag = 0;
Joerg Roedel	ef0f649	2020-03-31 12:17:38 -0400	[diff] [blame]	876	pi.is_guest_mode = false;
				877	ret = irq_set_vcpu_affinity(host_irq, &pi);
				878
				879	/**
				880	* Check if the posted interrupt was previously
				881	* setup with the guest_mode by checking if the ga_tag
				882	* was cached. If so, we need to clean up the per-vcpu
				883	* ir_list.
				884	*/
				885	if (!ret && pi.prev_ga_tag) {
				886	int id = AVIC_GATAG_TO_VCPUID(pi.prev_ga_tag);
				887	struct kvm_vcpu *vcpu;
				888
				889	vcpu = kvm_get_vcpu_by_id(kvm, id);
				890	if (vcpu)
				891	svm_ir_list_del(to_svm(vcpu), &pi);
				892	}
				893	}
				894
				895	if (!ret && svm) {
				896	trace_kvm_pi_irte_update(host_irq, svm->vcpu.vcpu_id,
				897	e->gsi, vcpu_info.vector,
				898	vcpu_info.pi_desc_addr, set);
				899	}
				900
				901	if (ret < 0) {
				902	pr_err("%s: failed to update PI IRTE\n", __func__);
				903	goto out;
				904	}
				905	}
				906
				907	ret = 0;
				908	out:
				909	srcu_read_unlock(&kvm->irq_srcu, idx);
				910	return ret;
				911	}
				912
				913	bool svm_check_apicv_inhibit_reasons(ulong bit)
				914	{
				915	ulong supported = BIT(APICV_INHIBIT_REASON_DISABLE) \|
				916	BIT(APICV_INHIBIT_REASON_HYPERV) \|
				917	BIT(APICV_INHIBIT_REASON_NESTED) \|
				918	BIT(APICV_INHIBIT_REASON_IRQWIN) \|
				919	BIT(APICV_INHIBIT_REASON_PIT_REINJ) \|
				920	BIT(APICV_INHIBIT_REASON_X2APIC);
				921
				922	return supported & BIT(bit);
				923	}
				924
				925	void svm_pre_update_apicv_exec_ctrl(struct kvm *kvm, bool activate)
				926	{
				927	avic_update_access_page(kvm, activate);
				928	}
				929
				930	static inline int
				931	avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r)
				932	{
				933	int ret = 0;
				934	unsigned long flags;
				935	struct amd_svm_iommu_ir *ir;
				936	struct vcpu_svm *svm = to_svm(vcpu);
				937
				938	if (!kvm_arch_has_assigned_device(vcpu->kvm))
				939	return 0;
				940
				941	/*
				942	* Here, we go through the per-vcpu ir_list to update all existing
				943	* interrupt remapping table entry targeting this vcpu.
				944	*/
				945	spin_lock_irqsave(&svm->ir_list_lock, flags);
				946
				947	if (list_empty(&svm->ir_list))
				948	goto out;
				949
				950	list_for_each_entry(ir, &svm->ir_list, node) {
				951	ret = amd_iommu_update_ga(cpu, r, ir->data);
				952	if (ret)
				953	break;
				954	}
				955	out:
				956	spin_unlock_irqrestore(&svm->ir_list_lock, flags);
				957	return ret;
				958	}
				959
				960	void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
				961	{
				962	u64 entry;
				963	/* ID = 0xff (broadcast), ID > 0xff (reserved) */
				964	int h_physical_id = kvm_cpu_get_apicid(cpu);
				965	struct vcpu_svm *svm = to_svm(vcpu);
				966
				967	if (!kvm_vcpu_apicv_active(vcpu))
				968	return;
				969
				970	/*
				971	* Since the host physical APIC id is 8 bits,
				972	* we can support host APIC ID upto 255.
				973	*/
				974	if (WARN_ON(h_physical_id > AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK))
				975	return;
				976
				977	entry = READ_ONCE(*(svm->avic_physical_id_cache));
				978	WARN_ON(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK);
				979
				980	entry &= ~AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK;
				981	entry \|= (h_physical_id & AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK);
				982
				983	entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
				984	if (svm->avic_is_running)
				985	entry \|= AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
				986
				987	WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
				988	avic_update_iommu_vcpu_affinity(vcpu, h_physical_id,
				989	svm->avic_is_running);
				990	}
				991
				992	void avic_vcpu_put(struct kvm_vcpu *vcpu)
				993	{
				994	u64 entry;
				995	struct vcpu_svm *svm = to_svm(vcpu);
				996
				997	if (!kvm_vcpu_apicv_active(vcpu))
				998	return;
				999
				1000	entry = READ_ONCE(*(svm->avic_physical_id_cache));
				1001	if (entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK)
				1002	avic_update_iommu_vcpu_affinity(vcpu, -1, 0);
				1003
				1004	entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
				1005	WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
				1006	}
				1007
				1008	/**
				1009	* This function is called during VCPU halt/unhalt.
				1010	*/
				1011	static void avic_set_running(struct kvm_vcpu *vcpu, bool is_run)
				1012	{
				1013	struct vcpu_svm *svm = to_svm(vcpu);
				1014
				1015	svm->avic_is_running = is_run;
				1016	if (is_run)
				1017	avic_vcpu_load(vcpu, vcpu->cpu);
				1018	else
				1019	avic_vcpu_put(vcpu);
				1020	}
				1021
				1022	void svm_vcpu_blocking(struct kvm_vcpu *vcpu)
				1023	{
				1024	avic_set_running(vcpu, false);
				1025	}
				1026
				1027	void svm_vcpu_unblocking(struct kvm_vcpu *vcpu)
				1028	{
				1029	if (kvm_check_request(KVM_REQ_APICV_UPDATE, vcpu))
				1030	kvm_vcpu_update_apicv(vcpu);
				1031	avic_set_running(vcpu, true);
				1032	}