Blame - arch/x86/kvm/xen.c - SHIFTPHONES/mainline/linux

blob: ceddabd1f5c6f38bd4067bfbf0d123a539b321a5 [file] [log] [blame]

Joao Martins	23200b7	2018-06-13 09:55:44 -0400	[diff] [blame]	1	// SPDX-License-Identifier: GPL-2.0
				2	/*
				3	* Copyright © 2019 Oracle and/or its affiliates. All rights reserved.
				4	* Copyright © 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
				5	*
				6	* KVM Xen emulation
				7	*/
				8
				9	#include "x86.h"
				10	#include "xen.h"
Joao Martins	79033be	2018-06-13 09:55:44 -0400	[diff] [blame]	11	#include "hyperv.h"
Joao Martins	23200b7	2018-06-13 09:55:44 -0400	[diff] [blame]	12
				13	#include <linux/kvm_host.h>
David Woodhouse	30b5c85	2021-03-01 12:53:09 +0000	[diff] [blame]	14	#include <linux/sched/stat.h>
Joao Martins	23200b7	2018-06-13 09:55:44 -0400	[diff] [blame]	15
				16	#include <trace/events/kvm.h>
Joao Martins	13ffb97	2018-06-15 21:17:14 -0400	[diff] [blame]	17	#include <xen/interface/xen.h>
David Woodhouse	30b5c85	2021-03-01 12:53:09 +0000	[diff] [blame]	18	#include <xen/interface/vcpu.h>
David Woodhouse	14243b3	2021-12-10 16:36:23 +0000	[diff] [blame^]	19	#include <xen/interface/event_channel.h>
Joao Martins	23200b7	2018-06-13 09:55:44 -0400	[diff] [blame]	20
				21	#include "trace.h"
				22
David Woodhouse	7d6bbeb	2021-02-02 15:48:05 +0000	[diff] [blame]	23	DEFINE_STATIC_KEY_DEFERRED_FALSE(kvm_xen_enabled, HZ);
				24
Joao Martins	13ffb97	2018-06-15 21:17:14 -0400	[diff] [blame]	25	static int kvm_xen_shared_info_init(struct kvm *kvm, gfn_t gfn)
				26	{
David Woodhouse	1cfc9c4	2021-12-10 16:36:22 +0000	[diff] [blame]	27	struct gfn_to_pfn_cache *gpc = &kvm->arch.xen.shinfo_cache;
Joao Martins	629b534	2018-06-28 15:06:43 -0400	[diff] [blame]	28	gpa_t gpa = gfn_to_gpa(gfn);
				29	int wc_ofs, sec_hi_ofs;
Paolo Bonzini	319afe6	2021-08-04 12:48:41 -0400	[diff] [blame]	30	int ret = 0;
Joao Martins	13ffb97	2018-06-15 21:17:14 -0400	[diff] [blame]	31	int idx = srcu_read_lock(&kvm->srcu);
				32
David Woodhouse	1cfc9c4	2021-12-10 16:36:22 +0000	[diff] [blame]	33	if (gfn == GPA_INVALID) {
				34	kvm_gfn_to_pfn_cache_destroy(kvm, gpc);
Joao Martins	629b534	2018-06-28 15:06:43 -0400	[diff] [blame]	35	goto out;
Paolo Bonzini	319afe6	2021-08-04 12:48:41 -0400	[diff] [blame]	36	}
David Woodhouse	1cfc9c4	2021-12-10 16:36:22 +0000	[diff] [blame]	37
				38	ret = kvm_gfn_to_pfn_cache_init(kvm, gpc, NULL, false, true, gpa,
				39	PAGE_SIZE, false);
				40	if (ret)
				41	goto out;
Joao Martins	629b534	2018-06-28 15:06:43 -0400	[diff] [blame]	42
				43	/* Paranoia checks on the 32-bit struct layout */
				44	BUILD_BUG_ON(offsetof(struct compat_shared_info, wc) != 0x900);
				45	BUILD_BUG_ON(offsetof(struct compat_shared_info, arch.wc_sec_hi) != 0x924);
				46	BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0);
				47
				48	/* 32-bit location by default */
				49	wc_ofs = offsetof(struct compat_shared_info, wc);
				50	sec_hi_ofs = offsetof(struct compat_shared_info, arch.wc_sec_hi);
				51
				52	#ifdef CONFIG_X86_64
				53	/* Paranoia checks on the 64-bit struct layout */
				54	BUILD_BUG_ON(offsetof(struct shared_info, wc) != 0xc00);
				55	BUILD_BUG_ON(offsetof(struct shared_info, wc_sec_hi) != 0xc0c);
				56
				57	if (kvm->arch.xen.long_mode) {
				58	wc_ofs = offsetof(struct shared_info, wc);
				59	sec_hi_ofs = offsetof(struct shared_info, wc_sec_hi);
				60	}
				61	#endif
				62
				63	kvm_write_wall_clock(kvm, gpa + wc_ofs, sec_hi_ofs - wc_ofs);
				64	kvm_make_all_cpus_request(kvm, KVM_REQ_MASTERCLOCK_UPDATE);
				65
				66	out:
Joao Martins	13ffb97	2018-06-15 21:17:14 -0400	[diff] [blame]	67	srcu_read_unlock(&kvm->srcu, idx);
				68	return ret;
				69	}
				70
David Woodhouse	30b5c85	2021-03-01 12:53:09 +0000	[diff] [blame]	71	static void kvm_xen_update_runstate(struct kvm_vcpu *v, int state)
				72	{
				73	struct kvm_vcpu_xen *vx = &v->arch.xen;
				74	u64 now = get_kvmclock_ns(v->kvm);
				75	u64 delta_ns = now - vx->runstate_entry_time;
				76	u64 run_delay = current->sched_info.run_delay;
				77
				78	if (unlikely(!vx->runstate_entry_time))
				79	vx->current_runstate = RUNSTATE_offline;
				80
				81	/*
				82	* Time waiting for the scheduler isn't "stolen" if the
				83	* vCPU wasn't running anyway.
				84	*/
				85	if (vx->current_runstate == RUNSTATE_running) {
				86	u64 steal_ns = run_delay - vx->last_steal;
				87
				88	delta_ns -= steal_ns;
				89
				90	vx->runstate_times[RUNSTATE_runnable] += steal_ns;
				91	}
				92	vx->last_steal = run_delay;
				93
				94	vx->runstate_times[vx->current_runstate] += delta_ns;
				95	vx->current_runstate = state;
				96	vx->runstate_entry_time = now;
				97	}
				98
				99	void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state)
				100	{
				101	struct kvm_vcpu_xen *vx = &v->arch.xen;
				102	uint64_t state_entry_time;
				103	unsigned int offset;
				104
				105	kvm_xen_update_runstate(v, state);
				106
				107	if (!vx->runstate_set)
				108	return;
				109
				110	BUILD_BUG_ON(sizeof(struct compat_vcpu_runstate_info) != 0x2c);
				111
				112	offset = offsetof(struct compat_vcpu_runstate_info, state_entry_time);
				113	#ifdef CONFIG_X86_64
				114	/*
				115	* The only difference is alignment of uint64_t in 32-bit.
				116	* So the first field 'state' is accessed directly using
				117	* offsetof() (where its offset happens to be zero), while the
				118	* remaining fields which are all uint64_t, start at 'offset'
				119	* which we tweak here by adding 4.
				120	*/
				121	BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state_entry_time) !=
				122	offsetof(struct compat_vcpu_runstate_info, state_entry_time) + 4);
				123	BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, time) !=
				124	offsetof(struct compat_vcpu_runstate_info, time) + 4);
				125
				126	if (v->kvm->arch.xen.long_mode)
				127	offset = offsetof(struct vcpu_runstate_info, state_entry_time);
				128	#endif
				129	/*
				130	* First write the updated state_entry_time at the appropriate
				131	* location determined by 'offset'.
				132	*/
				133	state_entry_time = vx->runstate_entry_time;
				134	state_entry_time \|= XEN_RUNSTATE_UPDATE;
				135
David Woodhouse	6a83475	2021-11-15 16:50:23 +0000	[diff] [blame]	136	BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, state_entry_time) !=
David Woodhouse	30b5c85	2021-03-01 12:53:09 +0000	[diff] [blame]	137	sizeof(state_entry_time));
David Woodhouse	6a83475	2021-11-15 16:50:23 +0000	[diff] [blame]	138	BUILD_BUG_ON(sizeof_field(struct compat_vcpu_runstate_info, state_entry_time) !=
David Woodhouse	30b5c85	2021-03-01 12:53:09 +0000	[diff] [blame]	139	sizeof(state_entry_time));
				140
				141	if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
				142	&state_entry_time, offset,
				143	sizeof(state_entry_time)))
				144	return;
				145	smp_wmb();
				146
				147	/*
				148	* Next, write the new runstate. This is in the same place
				149	* for 32-bit and 64-bit guests, asserted here for paranoia.
				150	*/
				151	BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state) !=
				152	offsetof(struct compat_vcpu_runstate_info, state));
David Woodhouse	6a83475	2021-11-15 16:50:23 +0000	[diff] [blame]	153	BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, state) !=
David Woodhouse	30b5c85	2021-03-01 12:53:09 +0000	[diff] [blame]	154	sizeof(vx->current_runstate));
David Woodhouse	6a83475	2021-11-15 16:50:23 +0000	[diff] [blame]	155	BUILD_BUG_ON(sizeof_field(struct compat_vcpu_runstate_info, state) !=
David Woodhouse	30b5c85	2021-03-01 12:53:09 +0000	[diff] [blame]	156	sizeof(vx->current_runstate));
				157
				158	if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
				159	&vx->current_runstate,
				160	offsetof(struct vcpu_runstate_info, state),
				161	sizeof(vx->current_runstate)))
				162	return;
				163
				164	/*
				165	* Write the actual runstate times immediately after the
				166	* runstate_entry_time.
				167	*/
				168	BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state_entry_time) !=
				169	offsetof(struct vcpu_runstate_info, time) - sizeof(u64));
				170	BUILD_BUG_ON(offsetof(struct compat_vcpu_runstate_info, state_entry_time) !=
				171	offsetof(struct compat_vcpu_runstate_info, time) - sizeof(u64));
David Woodhouse	6a83475	2021-11-15 16:50:23 +0000	[diff] [blame]	172	BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, time) !=
				173	sizeof_field(struct compat_vcpu_runstate_info, time));
				174	BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, time) !=
David Woodhouse	30b5c85	2021-03-01 12:53:09 +0000	[diff] [blame]	175	sizeof(vx->runstate_times));
				176
				177	if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
				178	&vx->runstate_times[0],
				179	offset + sizeof(u64),
				180	sizeof(vx->runstate_times)))
				181	return;
				182
				183	smp_wmb();
				184
				185	/*
				186	* Finally, clear the XEN_RUNSTATE_UPDATE bit in the guest's
				187	* runstate_entry_time field.
				188	*/
				189
				190	state_entry_time &= ~XEN_RUNSTATE_UPDATE;
				191	if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
				192	&state_entry_time, offset,
				193	sizeof(state_entry_time)))
				194	return;
				195	}
				196
David Woodhouse	40da8cc	2020-12-09 20:08:30 +0000	[diff] [blame]	197	int __kvm_xen_has_interrupt(struct kvm_vcpu *v)
				198	{
David Woodhouse	14243b3	2021-12-10 16:36:23 +0000	[diff] [blame^]	199	unsigned long evtchn_pending_sel = READ_ONCE(v->arch.xen.evtchn_pending_sel);
				200	bool atomic = in_atomic() \|\| !task_is_running(current);
David Woodhouse	0985dba	2021-10-23 20:47:19 +0100	[diff] [blame]	201	int err;
David Woodhouse	40da8cc	2020-12-09 20:08:30 +0000	[diff] [blame]	202	u8 rc = 0;
				203
				204	/*
				205	* If the global upcall vector (HVMIRQ_callback_vector) is set and
				206	* the vCPU's evtchn_upcall_pending flag is set, the IRQ is pending.
				207	*/
				208	struct gfn_to_hva_cache *ghc = &v->arch.xen.vcpu_info_cache;
				209	struct kvm_memslots *slots = kvm_memslots(v->kvm);
David Woodhouse	14243b3	2021-12-10 16:36:23 +0000	[diff] [blame^]	210	bool ghc_valid = slots->generation == ghc->generation &&
				211	!kvm_is_error_hva(ghc->hva) && ghc->memslot;
				212
David Woodhouse	40da8cc	2020-12-09 20:08:30 +0000	[diff] [blame]	213	unsigned int offset = offsetof(struct vcpu_info, evtchn_upcall_pending);
				214
				215	/* No need for compat handling here */
				216	BUILD_BUG_ON(offsetof(struct vcpu_info, evtchn_upcall_pending) !=
				217	offsetof(struct compat_vcpu_info, evtchn_upcall_pending));
				218	BUILD_BUG_ON(sizeof(rc) !=
David Woodhouse	6a83475	2021-11-15 16:50:23 +0000	[diff] [blame]	219	sizeof_field(struct vcpu_info, evtchn_upcall_pending));
David Woodhouse	40da8cc	2020-12-09 20:08:30 +0000	[diff] [blame]	220	BUILD_BUG_ON(sizeof(rc) !=
David Woodhouse	6a83475	2021-11-15 16:50:23 +0000	[diff] [blame]	221	sizeof_field(struct compat_vcpu_info, evtchn_upcall_pending));
David Woodhouse	40da8cc	2020-12-09 20:08:30 +0000	[diff] [blame]	222
				223	/*
				224	* For efficiency, this mirrors the checks for using the valid
				225	* cache in kvm_read_guest_offset_cached(), but just uses
				226	* __get_user() instead. And falls back to the slow path.
				227	*/
David Woodhouse	14243b3	2021-12-10 16:36:23 +0000	[diff] [blame^]	228	if (!evtchn_pending_sel && ghc_valid) {
David Woodhouse	40da8cc	2020-12-09 20:08:30 +0000	[diff] [blame]	229	/* Fast path */
David Woodhouse	0985dba	2021-10-23 20:47:19 +0100	[diff] [blame]	230	pagefault_disable();
				231	err = __get_user(rc, (u8 __user *)ghc->hva + offset);
				232	pagefault_enable();
				233	if (!err)
				234	return rc;
David Woodhouse	40da8cc	2020-12-09 20:08:30 +0000	[diff] [blame]	235	}
				236
David Woodhouse	0985dba	2021-10-23 20:47:19 +0100	[diff] [blame]	237	/* Slow path */
				238
				239	/*
				240	* This function gets called from kvm_vcpu_block() after setting the
				241	* task to TASK_INTERRUPTIBLE, to see if it needs to wake immediately
				242	* from a HLT. So we really mustn't sleep. If the page ended up absent
				243	* at that point, just return 1 in order to trigger an immediate wake,
				244	* and we'll end up getting called again from a context where we can
				245	* fault in the page and wait for it.
				246	*/
David Woodhouse	14243b3	2021-12-10 16:36:23 +0000	[diff] [blame^]	247	if (atomic)
David Woodhouse	0985dba	2021-10-23 20:47:19 +0100	[diff] [blame]	248	return 1;
				249
David Woodhouse	14243b3	2021-12-10 16:36:23 +0000	[diff] [blame^]	250	if (!ghc_valid) {
				251	err = kvm_gfn_to_hva_cache_init(v->kvm, ghc, ghc->gpa, ghc->len);
				252	if (err \|\| !ghc->memslot) {
				253	/*
				254	* If this failed, userspace has screwed up the
				255	* vcpu_info mapping. No interrupts for you.
				256	*/
				257	return 0;
				258	}
				259	}
				260
				261	/*
				262	* Now we have a valid (protected by srcu) userspace HVA in
				263	* ghc->hva which points to the struct vcpu_info. If there
				264	* are any bits in the in-kernel evtchn_pending_sel then
				265	* we need to write those to the guest vcpu_info and set
				266	* its evtchn_upcall_pending flag. If there aren't any bits
				267	* to add, we only want to check evtchn_upcall_pending.
				268	*/
				269	if (evtchn_pending_sel) {
				270	bool long_mode = v->kvm->arch.xen.long_mode;
				271
				272	if (!user_access_begin((void __user *)ghc->hva, sizeof(struct vcpu_info)))
				273	return 0;
				274
				275	if (IS_ENABLED(CONFIG_64BIT) && long_mode) {
				276	struct vcpu_info __user vi = (void __user )ghc->hva;
				277
				278	/* Attempt to set the evtchn_pending_sel bits in the
				279	* guest, and if that succeeds then clear the same
				280	* bits in the in-kernel version. */
				281	asm volatile("1:\t" LOCK_PREFIX "orq %0, %1\n"
				282	"\tnotq %0\n"
				283	"\t" LOCK_PREFIX "andq %0, %2\n"
				284	"2:\n"
				285	"\t.section .fixup,\"ax\"\n"
				286	"3:\tjmp\t2b\n"
				287	"\t.previous\n"
				288	_ASM_EXTABLE_UA(1b, 3b)
				289	: "=r" (evtchn_pending_sel),
				290	"+m" (vi->evtchn_pending_sel),
				291	"+m" (v->arch.xen.evtchn_pending_sel)
				292	: "0" (evtchn_pending_sel));
				293	} else {
				294	struct compat_vcpu_info __user vi = (void __user )ghc->hva;
				295	u32 evtchn_pending_sel32 = evtchn_pending_sel;
				296
				297	/* Attempt to set the evtchn_pending_sel bits in the
				298	* guest, and if that succeeds then clear the same
				299	* bits in the in-kernel version. */
				300	asm volatile("1:\t" LOCK_PREFIX "orl %0, %1\n"
				301	"\tnotl %0\n"
				302	"\t" LOCK_PREFIX "andl %0, %2\n"
				303	"2:\n"
				304	"\t.section .fixup,\"ax\"\n"
				305	"3:\tjmp\t2b\n"
				306	"\t.previous\n"
				307	_ASM_EXTABLE_UA(1b, 3b)
				308	: "=r" (evtchn_pending_sel32),
				309	"+m" (vi->evtchn_pending_sel),
				310	"+m" (v->arch.xen.evtchn_pending_sel)
				311	: "0" (evtchn_pending_sel32));
				312	}
				313	rc = 1;
				314	unsafe_put_user(rc, (u8 __user *)ghc->hva + offset, err);
				315
				316	err:
				317	user_access_end();
				318
				319	mark_page_dirty_in_slot(v->kvm, ghc->memslot, ghc->gpa >> PAGE_SHIFT);
				320	} else {
				321	__get_user(rc, (u8 __user *)ghc->hva + offset);
				322	}
David Woodhouse	0985dba	2021-10-23 20:47:19 +0100	[diff] [blame]	323
David Woodhouse	40da8cc	2020-12-09 20:08:30 +0000	[diff] [blame]	324	return rc;
				325	}
				326
Joao Martins	a76b964	2020-12-03 15:52:25 +0000	[diff] [blame]	327	int kvm_xen_hvm_set_attr(struct kvm kvm, struct kvm_xen_hvm_attr data)
				328	{
				329	int r = -ENOENT;
				330
Joao Martins	13ffb97	2018-06-15 21:17:14 -0400	[diff] [blame]	331	mutex_lock(&kvm->lock);
				332
Joao Martins	a76b964	2020-12-03 15:52:25 +0000	[diff] [blame]	333	switch (data->type) {
David Woodhouse	a3833b8	2020-12-03 16:20:32 +0000	[diff] [blame]	334	case KVM_XEN_ATTR_TYPE_LONG_MODE:
Joao Martins	13ffb97	2018-06-15 21:17:14 -0400	[diff] [blame]	335	if (!IS_ENABLED(CONFIG_64BIT) && data->u.long_mode) {
				336	r = -EINVAL;
				337	} else {
				338	kvm->arch.xen.long_mode = !!data->u.long_mode;
				339	r = 0;
				340	}
David Woodhouse	a3833b8	2020-12-03 16:20:32 +0000	[diff] [blame]	341	break;
Joao Martins	13ffb97	2018-06-15 21:17:14 -0400	[diff] [blame]	342
				343	case KVM_XEN_ATTR_TYPE_SHARED_INFO:
				344	r = kvm_xen_shared_info_init(kvm, data->u.shared_info.gfn);
				345	break;
				346
David Woodhouse	40da8cc	2020-12-09 20:08:30 +0000	[diff] [blame]	347	case KVM_XEN_ATTR_TYPE_UPCALL_VECTOR:
David Woodhouse	0c165b3	2021-02-08 23:23:25 +0000	[diff] [blame]	348	if (data->u.vector && data->u.vector < 0x10)
David Woodhouse	40da8cc	2020-12-09 20:08:30 +0000	[diff] [blame]	349	r = -EINVAL;
				350	else {
				351	kvm->arch.xen.upcall_vector = data->u.vector;
				352	r = 0;
				353	}
				354	break;
				355
Joao Martins	a76b964	2020-12-03 15:52:25 +0000	[diff] [blame]	356	default:
				357	break;
				358	}
				359
				360	mutex_unlock(&kvm->lock);
				361	return r;
				362	}
				363
				364	int kvm_xen_hvm_get_attr(struct kvm kvm, struct kvm_xen_hvm_attr data)
				365	{
				366	int r = -ENOENT;
				367
				368	mutex_lock(&kvm->lock);
				369
				370	switch (data->type) {
David Woodhouse	a3833b8	2020-12-03 16:20:32 +0000	[diff] [blame]	371	case KVM_XEN_ATTR_TYPE_LONG_MODE:
				372	data->u.long_mode = kvm->arch.xen.long_mode;
				373	r = 0;
				374	break;
Joao Martins	13ffb97	2018-06-15 21:17:14 -0400	[diff] [blame]	375
				376	case KVM_XEN_ATTR_TYPE_SHARED_INFO:
David Woodhouse	1cfc9c4	2021-12-10 16:36:22 +0000	[diff] [blame]	377	if (kvm->arch.xen.shinfo_cache.active)
				378	data->u.shared_info.gfn = gpa_to_gfn(kvm->arch.xen.shinfo_cache.gpa);
				379	else
				380	data->u.shared_info.gfn = GPA_INVALID;
David Woodhouse	0c165b3	2021-02-08 23:23:25 +0000	[diff] [blame]	381	r = 0;
Joao Martins	13ffb97	2018-06-15 21:17:14 -0400	[diff] [blame]	382	break;
				383
David Woodhouse	40da8cc	2020-12-09 20:08:30 +0000	[diff] [blame]	384	case KVM_XEN_ATTR_TYPE_UPCALL_VECTOR:
				385	data->u.vector = kvm->arch.xen.upcall_vector;
				386	r = 0;
				387	break;
				388
Joao Martins	a76b964	2020-12-03 15:52:25 +0000	[diff] [blame]	389	default:
				390	break;
				391	}
				392
				393	mutex_unlock(&kvm->lock);
				394	return r;
				395	}
				396
David Woodhouse	3e32461	2021-02-02 16:53:25 +0000	[diff] [blame]	397	int kvm_xen_vcpu_set_attr(struct kvm_vcpu vcpu, struct kvm_xen_vcpu_attr data)
				398	{
Joao Martins	73e69a8	2018-06-29 10:52:52 -0400	[diff] [blame]	399	int idx, r = -ENOENT;
David Woodhouse	3e32461	2021-02-02 16:53:25 +0000	[diff] [blame]	400
				401	mutex_lock(&vcpu->kvm->lock);
Joao Martins	73e69a8	2018-06-29 10:52:52 -0400	[diff] [blame]	402	idx = srcu_read_lock(&vcpu->kvm->srcu);
David Woodhouse	3e32461	2021-02-02 16:53:25 +0000	[diff] [blame]	403
				404	switch (data->type) {
Joao Martins	73e69a8	2018-06-29 10:52:52 -0400	[diff] [blame]	405	case KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO:
				406	/* No compat necessary here. */
				407	BUILD_BUG_ON(sizeof(struct vcpu_info) !=
				408	sizeof(struct compat_vcpu_info));
David Woodhouse	7d7c5f7	2021-03-01 12:53:08 +0000	[diff] [blame]	409	BUILD_BUG_ON(offsetof(struct vcpu_info, time) !=
				410	offsetof(struct compat_vcpu_info, time));
Joao Martins	73e69a8	2018-06-29 10:52:52 -0400	[diff] [blame]	411
David Woodhouse	0c165b3	2021-02-08 23:23:25 +0000	[diff] [blame]	412	if (data->u.gpa == GPA_INVALID) {
				413	vcpu->arch.xen.vcpu_info_set = false;
David Woodhouse	7d7c5f7	2021-03-01 12:53:08 +0000	[diff] [blame]	414	r = 0;
David Woodhouse	0c165b3	2021-02-08 23:23:25 +0000	[diff] [blame]	415	break;
				416	}
				417
Joao Martins	73e69a8	2018-06-29 10:52:52 -0400	[diff] [blame]	418	r = kvm_gfn_to_hva_cache_init(vcpu->kvm,
				419	&vcpu->arch.xen.vcpu_info_cache,
				420	data->u.gpa,
				421	sizeof(struct vcpu_info));
Joao Martins	aa096aa	2019-02-01 13:01:45 -0500	[diff] [blame]	422	if (!r) {
Joao Martins	73e69a8	2018-06-29 10:52:52 -0400	[diff] [blame]	423	vcpu->arch.xen.vcpu_info_set = true;
Joao Martins	aa096aa	2019-02-01 13:01:45 -0500	[diff] [blame]	424	kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
				425	}
Joao Martins	73e69a8	2018-06-29 10:52:52 -0400	[diff] [blame]	426	break;
				427
Joao Martins	f2340cd	2018-07-23 11:20:57 -0400	[diff] [blame]	428	case KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO:
David Woodhouse	0c165b3	2021-02-08 23:23:25 +0000	[diff] [blame]	429	if (data->u.gpa == GPA_INVALID) {
				430	vcpu->arch.xen.vcpu_time_info_set = false;
David Woodhouse	7d7c5f7	2021-03-01 12:53:08 +0000	[diff] [blame]	431	r = 0;
David Woodhouse	0c165b3	2021-02-08 23:23:25 +0000	[diff] [blame]	432	break;
				433	}
				434
Joao Martins	f2340cd	2018-07-23 11:20:57 -0400	[diff] [blame]	435	r = kvm_gfn_to_hva_cache_init(vcpu->kvm,
				436	&vcpu->arch.xen.vcpu_time_info_cache,
				437	data->u.gpa,
				438	sizeof(struct pvclock_vcpu_time_info));
				439	if (!r) {
				440	vcpu->arch.xen.vcpu_time_info_set = true;
				441	kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
				442	}
				443	break;
				444
David Woodhouse	30b5c85	2021-03-01 12:53:09 +0000	[diff] [blame]	445	case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR:
				446	if (!sched_info_on()) {
				447	r = -EOPNOTSUPP;
				448	break;
				449	}
				450	if (data->u.gpa == GPA_INVALID) {
				451	vcpu->arch.xen.runstate_set = false;
				452	r = 0;
				453	break;
				454	}
				455
				456	r = kvm_gfn_to_hva_cache_init(vcpu->kvm,
				457	&vcpu->arch.xen.runstate_cache,
				458	data->u.gpa,
				459	sizeof(struct vcpu_runstate_info));
				460	if (!r) {
				461	vcpu->arch.xen.runstate_set = true;
				462	}
				463	break;
				464
				465	case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT:
				466	if (!sched_info_on()) {
				467	r = -EOPNOTSUPP;
				468	break;
				469	}
				470	if (data->u.runstate.state > RUNSTATE_offline) {
				471	r = -EINVAL;
				472	break;
				473	}
				474
				475	kvm_xen_update_runstate(vcpu, data->u.runstate.state);
				476	r = 0;
				477	break;
				478
				479	case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA:
				480	if (!sched_info_on()) {
				481	r = -EOPNOTSUPP;
				482	break;
				483	}
				484	if (data->u.runstate.state > RUNSTATE_offline) {
				485	r = -EINVAL;
				486	break;
				487	}
				488	if (data->u.runstate.state_entry_time !=
				489	(data->u.runstate.time_running +
				490	data->u.runstate.time_runnable +
				491	data->u.runstate.time_blocked +
				492	data->u.runstate.time_offline)) {
				493	r = -EINVAL;
				494	break;
				495	}
				496	if (get_kvmclock_ns(vcpu->kvm) <
				497	data->u.runstate.state_entry_time) {
				498	r = -EINVAL;
				499	break;
				500	}
				501
				502	vcpu->arch.xen.current_runstate = data->u.runstate.state;
				503	vcpu->arch.xen.runstate_entry_time =
				504	data->u.runstate.state_entry_time;
				505	vcpu->arch.xen.runstate_times[RUNSTATE_running] =
				506	data->u.runstate.time_running;
				507	vcpu->arch.xen.runstate_times[RUNSTATE_runnable] =
				508	data->u.runstate.time_runnable;
				509	vcpu->arch.xen.runstate_times[RUNSTATE_blocked] =
				510	data->u.runstate.time_blocked;
				511	vcpu->arch.xen.runstate_times[RUNSTATE_offline] =
				512	data->u.runstate.time_offline;
				513	vcpu->arch.xen.last_steal = current->sched_info.run_delay;
				514	r = 0;
				515	break;
				516
				517	case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST:
				518	if (!sched_info_on()) {
				519	r = -EOPNOTSUPP;
				520	break;
				521	}
				522	if (data->u.runstate.state > RUNSTATE_offline &&
				523	data->u.runstate.state != (u64)-1) {
				524	r = -EINVAL;
				525	break;
				526	}
				527	/* The adjustment must add up */
				528	if (data->u.runstate.state_entry_time !=
				529	(data->u.runstate.time_running +
				530	data->u.runstate.time_runnable +
				531	data->u.runstate.time_blocked +
				532	data->u.runstate.time_offline)) {
				533	r = -EINVAL;
				534	break;
				535	}
				536
				537	if (get_kvmclock_ns(vcpu->kvm) <
				538	(vcpu->arch.xen.runstate_entry_time +
				539	data->u.runstate.state_entry_time)) {
				540	r = -EINVAL;
				541	break;
				542	}
				543
				544	vcpu->arch.xen.runstate_entry_time +=
				545	data->u.runstate.state_entry_time;
				546	vcpu->arch.xen.runstate_times[RUNSTATE_running] +=
				547	data->u.runstate.time_running;
				548	vcpu->arch.xen.runstate_times[RUNSTATE_runnable] +=
				549	data->u.runstate.time_runnable;
				550	vcpu->arch.xen.runstate_times[RUNSTATE_blocked] +=
				551	data->u.runstate.time_blocked;
				552	vcpu->arch.xen.runstate_times[RUNSTATE_offline] +=
				553	data->u.runstate.time_offline;
				554
				555	if (data->u.runstate.state <= RUNSTATE_offline)
				556	kvm_xen_update_runstate(vcpu, data->u.runstate.state);
				557	r = 0;
				558	break;
				559
David Woodhouse	3e32461	2021-02-02 16:53:25 +0000	[diff] [blame]	560	default:
				561	break;
				562	}
				563
Joao Martins	73e69a8	2018-06-29 10:52:52 -0400	[diff] [blame]	564	srcu_read_unlock(&vcpu->kvm->srcu, idx);
David Woodhouse	3e32461	2021-02-02 16:53:25 +0000	[diff] [blame]	565	mutex_unlock(&vcpu->kvm->lock);
				566	return r;
				567	}
				568
				569	int kvm_xen_vcpu_get_attr(struct kvm_vcpu vcpu, struct kvm_xen_vcpu_attr data)
				570	{
				571	int r = -ENOENT;
				572
				573	mutex_lock(&vcpu->kvm->lock);
				574
				575	switch (data->type) {
Joao Martins	73e69a8	2018-06-29 10:52:52 -0400	[diff] [blame]	576	case KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO:
David Woodhouse	0c165b3	2021-02-08 23:23:25 +0000	[diff] [blame]	577	if (vcpu->arch.xen.vcpu_info_set)
Joao Martins	73e69a8	2018-06-29 10:52:52 -0400	[diff] [blame]	578	data->u.gpa = vcpu->arch.xen.vcpu_info_cache.gpa;
David Woodhouse	0c165b3	2021-02-08 23:23:25 +0000	[diff] [blame]	579	else
				580	data->u.gpa = GPA_INVALID;
				581	r = 0;
Joao Martins	73e69a8	2018-06-29 10:52:52 -0400	[diff] [blame]	582	break;
				583
Joao Martins	f2340cd	2018-07-23 11:20:57 -0400	[diff] [blame]	584	case KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO:
David Woodhouse	0c165b3	2021-02-08 23:23:25 +0000	[diff] [blame]	585	if (vcpu->arch.xen.vcpu_time_info_set)
Joao Martins	f2340cd	2018-07-23 11:20:57 -0400	[diff] [blame]	586	data->u.gpa = vcpu->arch.xen.vcpu_time_info_cache.gpa;
David Woodhouse	0c165b3	2021-02-08 23:23:25 +0000	[diff] [blame]	587	else
				588	data->u.gpa = GPA_INVALID;
				589	r = 0;
Joao Martins	f2340cd	2018-07-23 11:20:57 -0400	[diff] [blame]	590	break;
				591
David Woodhouse	30b5c85	2021-03-01 12:53:09 +0000	[diff] [blame]	592	case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR:
				593	if (!sched_info_on()) {
				594	r = -EOPNOTSUPP;
				595	break;
				596	}
				597	if (vcpu->arch.xen.runstate_set) {
				598	data->u.gpa = vcpu->arch.xen.runstate_cache.gpa;
				599	r = 0;
				600	}
				601	break;
				602
				603	case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT:
				604	if (!sched_info_on()) {
				605	r = -EOPNOTSUPP;
				606	break;
				607	}
				608	data->u.runstate.state = vcpu->arch.xen.current_runstate;
				609	r = 0;
				610	break;
				611
				612	case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA:
				613	if (!sched_info_on()) {
				614	r = -EOPNOTSUPP;
				615	break;
				616	}
				617	data->u.runstate.state = vcpu->arch.xen.current_runstate;
				618	data->u.runstate.state_entry_time =
				619	vcpu->arch.xen.runstate_entry_time;
				620	data->u.runstate.time_running =
				621	vcpu->arch.xen.runstate_times[RUNSTATE_running];
				622	data->u.runstate.time_runnable =
				623	vcpu->arch.xen.runstate_times[RUNSTATE_runnable];
				624	data->u.runstate.time_blocked =
				625	vcpu->arch.xen.runstate_times[RUNSTATE_blocked];
				626	data->u.runstate.time_offline =
				627	vcpu->arch.xen.runstate_times[RUNSTATE_offline];
				628	r = 0;
				629	break;
				630
				631	case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST:
				632	r = -EINVAL;
				633	break;
				634
David Woodhouse	3e32461	2021-02-02 16:53:25 +0000	[diff] [blame]	635	default:
				636	break;
				637	}
				638
				639	mutex_unlock(&vcpu->kvm->lock);
				640	return r;
				641	}
				642
Joao Martins	23200b7	2018-06-13 09:55:44 -0400	[diff] [blame]	643	int kvm_xen_write_hypercall_page(struct kvm_vcpu *vcpu, u64 data)
				644	{
				645	struct kvm *kvm = vcpu->kvm;
				646	u32 page_num = data & ~PAGE_MASK;
				647	u64 page_addr = data & PAGE_MASK;
David Woodhouse	a3833b8	2020-12-03 16:20:32 +0000	[diff] [blame]	648	bool lm = is_long_mode(vcpu);
				649
				650	/* Latch long_mode for shared_info pages etc. */
				651	vcpu->kvm->arch.xen.long_mode = lm;
Joao Martins	23200b7	2018-06-13 09:55:44 -0400	[diff] [blame]	652
				653	/*
				654	* If Xen hypercall intercept is enabled, fill the hypercall
				655	* page with VMCALL/VMMCALL instructions since that's what
				656	* we catch. Else the VMM has provided the hypercall pages
				657	* with instructions of its own choosing, so use those.
				658	*/
				659	if (kvm_xen_hypercall_enabled(kvm)) {
				660	u8 instructions[32];
				661	int i;
				662
				663	if (page_num)
				664	return 1;
				665
				666	/* mov imm32, %eax */
				667	instructions[0] = 0xb8;
				668
				669	/* vmcall / vmmcall */
				670	kvm_x86_ops.patch_hypercall(vcpu, instructions + 5);
				671
				672	/* ret */
				673	instructions[8] = 0xc3;
				674
				675	/* int3 to pad */
				676	memset(instructions + 9, 0xcc, sizeof(instructions) - 9);
				677
				678	for (i = 0; i < PAGE_SIZE / sizeof(instructions); i++) {
				679	(u32 )&instructions[1] = i;
				680	if (kvm_vcpu_write_guest(vcpu,
				681	page_addr + (i * sizeof(instructions)),
				682	instructions, sizeof(instructions)))
				683	return 1;
				684	}
				685	} else {
Sean Christopherson	448841f	2021-02-08 12:15:02 -0800	[diff] [blame]	686	/*
				687	* Note, truncation is a non-issue as 'lm' is guaranteed to be
				688	* false for a 32-bit kernel, i.e. when hva_t is only 4 bytes.
				689	*/
				690	hva_t blob_addr = lm ? kvm->arch.xen_hvm_config.blob_addr_64
				691	: kvm->arch.xen_hvm_config.blob_addr_32;
Joao Martins	23200b7	2018-06-13 09:55:44 -0400	[diff] [blame]	692	u8 blob_size = lm ? kvm->arch.xen_hvm_config.blob_size_64
				693	: kvm->arch.xen_hvm_config.blob_size_32;
				694	u8 *page;
				695
				696	if (page_num >= blob_size)
				697	return 1;
				698
				699	blob_addr += page_num * PAGE_SIZE;
				700
				701	page = memdup_user((u8 __user *)blob_addr, PAGE_SIZE);
				702	if (IS_ERR(page))
				703	return PTR_ERR(page);
				704
				705	if (kvm_vcpu_write_guest(vcpu, page_addr, page, PAGE_SIZE)) {
				706	kfree(page);
				707	return 1;
				708	}
				709	}
				710	return 0;
				711	}
				712
David Woodhouse	78e9878	2021-02-02 13:19:35 +0000	[diff] [blame]	713	int kvm_xen_hvm_config(struct kvm kvm, struct kvm_xen_hvm_config xhc)
				714	{
				715	if (xhc->flags & ~KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL)
				716	return -EINVAL;
				717
				718	/*
				719	* With hypercall interception the kernel generates its own
				720	* hypercall page so it must not be provided.
				721	*/
				722	if ((xhc->flags & KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL) &&
				723	(xhc->blob_addr_32 \|\| xhc->blob_addr_64 \|\|
				724	xhc->blob_size_32 \|\| xhc->blob_size_64))
				725	return -EINVAL;
				726
David Woodhouse	7d6bbeb	2021-02-02 15:48:05 +0000	[diff] [blame]	727	mutex_lock(&kvm->lock);
				728
				729	if (xhc->msr && !kvm->arch.xen_hvm_config.msr)
				730	static_branch_inc(&kvm_xen_enabled.key);
				731	else if (!xhc->msr && kvm->arch.xen_hvm_config.msr)
				732	static_branch_slow_dec_deferred(&kvm_xen_enabled);
				733
David Woodhouse	78e9878	2021-02-02 13:19:35 +0000	[diff] [blame]	734	memcpy(&kvm->arch.xen_hvm_config, xhc, sizeof(*xhc));
David Woodhouse	7d6bbeb	2021-02-02 15:48:05 +0000	[diff] [blame]	735
				736	mutex_unlock(&kvm->lock);
David Woodhouse	78e9878	2021-02-02 13:19:35 +0000	[diff] [blame]	737	return 0;
				738	}
				739
Paolo Bonzini	319afe6	2021-08-04 12:48:41 -0400	[diff] [blame]	740	void kvm_xen_init_vm(struct kvm *kvm)
				741	{
Paolo Bonzini	319afe6	2021-08-04 12:48:41 -0400	[diff] [blame]	742	}
				743
David Woodhouse	7d6bbeb	2021-02-02 15:48:05 +0000	[diff] [blame]	744	void kvm_xen_destroy_vm(struct kvm *kvm)
				745	{
David Woodhouse	1cfc9c4	2021-12-10 16:36:22 +0000	[diff] [blame]	746	kvm_gfn_to_pfn_cache_destroy(kvm, &kvm->arch.xen.shinfo_cache);
				747
David Woodhouse	7d6bbeb	2021-02-02 15:48:05 +0000	[diff] [blame]	748	if (kvm->arch.xen_hvm_config.msr)
				749	static_branch_slow_dec_deferred(&kvm_xen_enabled);
				750	}
				751
Joao Martins	23200b7	2018-06-13 09:55:44 -0400	[diff] [blame]	752	static int kvm_xen_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result)
				753	{
				754	kvm_rax_write(vcpu, result);
				755	return kvm_skip_emulated_instruction(vcpu);
				756	}
				757
				758	static int kvm_xen_hypercall_complete_userspace(struct kvm_vcpu *vcpu)
				759	{
				760	struct kvm_run *run = vcpu->run;
				761
				762	if (unlikely(!kvm_is_linear_rip(vcpu, vcpu->arch.xen.hypercall_rip)))
				763	return 1;
				764
				765	return kvm_xen_hypercall_set_result(vcpu, run->xen.u.hcall.result);
				766	}
				767
				768	int kvm_xen_hypercall(struct kvm_vcpu *vcpu)
				769	{
				770	bool longmode;
				771	u64 input, params[6];
				772
				773	input = (u64)kvm_register_read(vcpu, VCPU_REGS_RAX);
				774
Joao Martins	79033be	2018-06-13 09:55:44 -0400	[diff] [blame]	775	/* Hyper-V hypercalls get bit 31 set in EAX */
				776	if ((input & 0x80000000) &&
Vitaly Kuznetsov	8f01455	2021-01-26 14:48:14 +0100	[diff] [blame]	777	kvm_hv_hypercall_enabled(vcpu))
Joao Martins	79033be	2018-06-13 09:55:44 -0400	[diff] [blame]	778	return kvm_hv_hypercall(vcpu);
				779
Tom Lendacky	b5aead0	2021-05-24 12:48:57 -0500	[diff] [blame]	780	longmode = is_64_bit_hypercall(vcpu);
Joao Martins	23200b7	2018-06-13 09:55:44 -0400	[diff] [blame]	781	if (!longmode) {
				782	params[0] = (u32)kvm_rbx_read(vcpu);
				783	params[1] = (u32)kvm_rcx_read(vcpu);
				784	params[2] = (u32)kvm_rdx_read(vcpu);
				785	params[3] = (u32)kvm_rsi_read(vcpu);
				786	params[4] = (u32)kvm_rdi_read(vcpu);
				787	params[5] = (u32)kvm_rbp_read(vcpu);
				788	}
				789	#ifdef CONFIG_X86_64
				790	else {
				791	params[0] = (u64)kvm_rdi_read(vcpu);
				792	params[1] = (u64)kvm_rsi_read(vcpu);
				793	params[2] = (u64)kvm_rdx_read(vcpu);
				794	params[3] = (u64)kvm_r10_read(vcpu);
				795	params[4] = (u64)kvm_r8_read(vcpu);
				796	params[5] = (u64)kvm_r9_read(vcpu);
				797	}
				798	#endif
				799	trace_kvm_xen_hypercall(input, params[0], params[1], params[2],
				800	params[3], params[4], params[5]);
				801
				802	vcpu->run->exit_reason = KVM_EXIT_XEN;
				803	vcpu->run->xen.type = KVM_EXIT_XEN_HCALL;
				804	vcpu->run->xen.u.hcall.longmode = longmode;
				805	vcpu->run->xen.u.hcall.cpl = kvm_x86_ops.get_cpl(vcpu);
				806	vcpu->run->xen.u.hcall.input = input;
				807	vcpu->run->xen.u.hcall.params[0] = params[0];
				808	vcpu->run->xen.u.hcall.params[1] = params[1];
				809	vcpu->run->xen.u.hcall.params[2] = params[2];
				810	vcpu->run->xen.u.hcall.params[3] = params[3];
				811	vcpu->run->xen.u.hcall.params[4] = params[4];
				812	vcpu->run->xen.u.hcall.params[5] = params[5];
				813	vcpu->arch.xen.hypercall_rip = kvm_get_linear_rip(vcpu);
				814	vcpu->arch.complete_userspace_io =
				815	kvm_xen_hypercall_complete_userspace;
				816
				817	return 0;
				818	}
David Woodhouse	14243b3	2021-12-10 16:36:23 +0000	[diff] [blame^]	819
				820	static inline int max_evtchn_port(struct kvm *kvm)
				821	{
				822	if (IS_ENABLED(CONFIG_64BIT) && kvm->arch.xen.long_mode)
				823	return EVTCHN_2L_NR_CHANNELS;
				824	else
				825	return COMPAT_EVTCHN_2L_NR_CHANNELS;
				826	}
				827
				828	/*
				829	* This follows the kvm_set_irq() API, so it returns:
				830	* < 0 Interrupt was ignored (masked or not delivered for other reasons)
				831	* = 0 Interrupt was coalesced (previous irq is still pending)
				832	* > 0 Number of CPUs interrupt was delivered to
				833	*/
				834	int kvm_xen_set_evtchn_fast(struct kvm_kernel_irq_routing_entry *e,
				835	struct kvm *kvm)
				836	{
				837	struct gfn_to_pfn_cache *gpc = &kvm->arch.xen.shinfo_cache;
				838	struct kvm_vcpu *vcpu;
				839	unsigned long pending_bits, mask_bits;
				840	unsigned long flags;
				841	int port_word_bit;
				842	bool kick_vcpu = false;
				843	int idx;
				844	int rc;
				845
				846	vcpu = kvm_get_vcpu_by_id(kvm, e->xen_evtchn.vcpu);
				847	if (!vcpu)
				848	return -1;
				849
				850	if (!vcpu->arch.xen.vcpu_info_set)
				851	return -1;
				852
				853	if (e->xen_evtchn.port >= max_evtchn_port(kvm))
				854	return -1;
				855
				856	rc = -EWOULDBLOCK;
				857	read_lock_irqsave(&gpc->lock, flags);
				858
				859	idx = srcu_read_lock(&kvm->srcu);
				860	if (!kvm_gfn_to_pfn_cache_check(kvm, gpc, gpc->gpa, PAGE_SIZE))
				861	goto out_rcu;
				862
				863	if (IS_ENABLED(CONFIG_64BIT) && kvm->arch.xen.long_mode) {
				864	struct shared_info *shinfo = gpc->khva;
				865	pending_bits = (unsigned long *)&shinfo->evtchn_pending;
				866	mask_bits = (unsigned long *)&shinfo->evtchn_mask;
				867	port_word_bit = e->xen_evtchn.port / 64;
				868	} else {
				869	struct compat_shared_info *shinfo = gpc->khva;
				870	pending_bits = (unsigned long *)&shinfo->evtchn_pending;
				871	mask_bits = (unsigned long *)&shinfo->evtchn_mask;
				872	port_word_bit = e->xen_evtchn.port / 32;
				873	}
				874
				875	/*
				876	* If this port wasn't already set, and if it isn't masked, then
				877	* we try to set the corresponding bit in the in-kernel shadow of
				878	* evtchn_pending_sel for the target vCPU. And if that wasn't
				879	* already set, then we kick the vCPU in question to write to the
				880	* real evtchn_pending_sel in its own guest vcpu_info struct.
				881	*/
				882	if (test_and_set_bit(e->xen_evtchn.port, pending_bits)) {
				883	rc = 0; /* It was already raised */
				884	} else if (test_bit(e->xen_evtchn.port, mask_bits)) {
				885	rc = -1; /* Masked */
				886	} else {
				887	rc = 1; /* Delivered. But was the vCPU waking already? */
				888	if (!test_and_set_bit(port_word_bit, &vcpu->arch.xen.evtchn_pending_sel))
				889	kick_vcpu = true;
				890	}
				891
				892	out_rcu:
				893	srcu_read_unlock(&kvm->srcu, idx);
				894	read_unlock_irqrestore(&gpc->lock, flags);
				895
				896	if (kick_vcpu) {
				897	kvm_make_request(KVM_REQ_EVENT, vcpu);
				898	kvm_vcpu_kick(vcpu);
				899	}
				900
				901	return rc;
				902	}
				903
				904	/* This is the version called from kvm_set_irq() as the .set function */
				905	static int evtchn_set_fn(struct kvm_kernel_irq_routing_entry e, struct kvm kvm,
				906	int irq_source_id, int level, bool line_status)
				907	{
				908	bool mm_borrowed = false;
				909	int rc;
				910
				911	if (!level)
				912	return -1;
				913
				914	rc = kvm_xen_set_evtchn_fast(e, kvm);
				915	if (rc != -EWOULDBLOCK)
				916	return rc;
				917
				918	if (current->mm != kvm->mm) {
				919	/*
				920	* If not on a thread which already belongs to this KVM,
				921	* we'd better be in the irqfd workqueue.
				922	*/
				923	if (WARN_ON_ONCE(current->mm))
				924	return -EINVAL;
				925
				926	kthread_use_mm(kvm->mm);
				927	mm_borrowed = true;
				928	}
				929
				930	/*
				931	* For the irqfd workqueue, using the main kvm->lock mutex is
				932	* fine since this function is invoked from kvm_set_irq() with
				933	* no other lock held, no srcu. In future if it will be called
				934	* directly from a vCPU thread (e.g. on hypercall for an IPI)
				935	* then it may need to switch to using a leaf-node mutex for
				936	* serializing the shared_info mapping.
				937	*/
				938	mutex_lock(&kvm->lock);
				939
				940	/*
				941	* It is theoretically possible for the page to be unmapped
				942	* and the MMU notifier to invalidate the shared_info before
				943	* we even get to use it. In that case, this looks like an
				944	* infinite loop. It was tempting to do it via the userspace
				945	* HVA instead... but that just hides the fact that it's
				946	* an infinite loop, because if a fault occurs and it waits
				947	* for the page to come back, it can still immediately
				948	* fault and have to wait again, repeatedly.
				949	*
				950	* Conversely, the page could also have been reinstated by
				951	* another thread before we even obtain the mutex above, so
				952	* check again first before remapping it.
				953	*/
				954	do {
				955	struct gfn_to_pfn_cache *gpc = &kvm->arch.xen.shinfo_cache;
				956	int idx;
				957
				958	rc = kvm_xen_set_evtchn_fast(e, kvm);
				959	if (rc != -EWOULDBLOCK)
				960	break;
				961
				962	idx = srcu_read_lock(&kvm->srcu);
				963	rc = kvm_gfn_to_pfn_cache_refresh(kvm, gpc, gpc->gpa,
				964	PAGE_SIZE, false);
				965	srcu_read_unlock(&kvm->srcu, idx);
				966	} while(!rc);
				967
				968	mutex_unlock(&kvm->lock);
				969
				970	if (mm_borrowed)
				971	kthread_unuse_mm(kvm->mm);
				972
				973	return rc;
				974	}
				975
				976	int kvm_xen_setup_evtchn(struct kvm *kvm,
				977	struct kvm_kernel_irq_routing_entry *e,
				978	const struct kvm_irq_routing_entry *ue)
				979
				980	{
				981	if (ue->u.xen_evtchn.port >= max_evtchn_port(kvm))
				982	return -EINVAL;
				983
				984	/* We only support 2 level event channels for now */
				985	if (ue->u.xen_evtchn.priority != KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL)
				986	return -EINVAL;
				987
				988	e->xen_evtchn.port = ue->u.xen_evtchn.port;
				989	e->xen_evtchn.vcpu = ue->u.xen_evtchn.vcpu;
				990	e->xen_evtchn.priority = ue->u.xen_evtchn.priority;
				991	e->set = evtchn_set_fn;
				992
				993	return 0;
				994	}