Blame - arch/x86/kvm/xen.c - SHIFTPHONES/mainline/linux

blob: bad57535fad0825608c91e9cdd590c9fc0040a42 [file] [log] [blame]

Joao Martins	23200b7	2018-06-13 09:55:44 -0400	[diff] [blame]	1	// SPDX-License-Identifier: GPL-2.0
				2	/*
				3	* Copyright © 2019 Oracle and/or its affiliates. All rights reserved.
				4	* Copyright © 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
				5	*
				6	* KVM Xen emulation
				7	*/
				8
				9	#include "x86.h"
				10	#include "xen.h"
Joao Martins	79033be	2018-06-13 09:55:44 -0400	[diff] [blame]	11	#include "hyperv.h"
Joao Martins	23200b7	2018-06-13 09:55:44 -0400	[diff] [blame]	12
				13	#include <linux/kvm_host.h>
David Woodhouse	30b5c85	2021-03-01 12:53:09 +0000	[diff] [blame]	14	#include <linux/sched/stat.h>
Joao Martins	23200b7	2018-06-13 09:55:44 -0400	[diff] [blame]	15
				16	#include <trace/events/kvm.h>
Joao Martins	13ffb97	2018-06-15 21:17:14 -0400	[diff] [blame]	17	#include <xen/interface/xen.h>
David Woodhouse	30b5c85	2021-03-01 12:53:09 +0000	[diff] [blame]	18	#include <xen/interface/vcpu.h>
David Woodhouse	14243b3	2021-12-10 16:36:23 +0000	[diff] [blame]	19	#include <xen/interface/event_channel.h>
Joao Martins	23200b7	2018-06-13 09:55:44 -0400	[diff] [blame]	20
				21	#include "trace.h"
				22
David Woodhouse	7d6bbeb	2021-02-02 15:48:05 +0000	[diff] [blame]	23	DEFINE_STATIC_KEY_DEFERRED_FALSE(kvm_xen_enabled, HZ);
				24
Joao Martins	13ffb97	2018-06-15 21:17:14 -0400	[diff] [blame]	25	static int kvm_xen_shared_info_init(struct kvm *kvm, gfn_t gfn)
				26	{
David Woodhouse	1cfc9c4	2021-12-10 16:36:22 +0000	[diff] [blame]	27	struct gfn_to_pfn_cache *gpc = &kvm->arch.xen.shinfo_cache;
David Woodhouse	5574976	2021-12-10 16:36:24 +0000	[diff] [blame]	28	struct pvclock_wall_clock *wc;
Joao Martins	629b534	2018-06-28 15:06:43 -0400	[diff] [blame]	29	gpa_t gpa = gfn_to_gpa(gfn);
David Woodhouse	5574976	2021-12-10 16:36:24 +0000	[diff] [blame]	30	u32 *wc_sec_hi;
				31	u32 wc_version;
				32	u64 wall_nsec;
Paolo Bonzini	319afe6	2021-08-04 12:48:41 -0400	[diff] [blame]	33	int ret = 0;
Joao Martins	13ffb97	2018-06-15 21:17:14 -0400	[diff] [blame]	34	int idx = srcu_read_lock(&kvm->srcu);
				35
David Woodhouse	1cfc9c4	2021-12-10 16:36:22 +0000	[diff] [blame]	36	if (gfn == GPA_INVALID) {
				37	kvm_gfn_to_pfn_cache_destroy(kvm, gpc);
Joao Martins	629b534	2018-06-28 15:06:43 -0400	[diff] [blame]	38	goto out;
Paolo Bonzini	319afe6	2021-08-04 12:48:41 -0400	[diff] [blame]	39	}
David Woodhouse	1cfc9c4	2021-12-10 16:36:22 +0000	[diff] [blame]	40
David Woodhouse	5574976	2021-12-10 16:36:24 +0000	[diff] [blame]	41	do {
				42	ret = kvm_gfn_to_pfn_cache_init(kvm, gpc, NULL, false, true,
				43	gpa, PAGE_SIZE, false);
				44	if (ret)
				45	goto out;
				46
				47	/*
				48	* This code mirrors kvm_write_wall_clock() except that it writes
				49	* directly through the pfn cache and doesn't mark the page dirty.
				50	*/
				51	wall_nsec = ktime_get_real_ns() - get_kvmclock_ns(kvm);
				52
				53	/* It could be invalid again already, so we need to check */
				54	read_lock_irq(&gpc->lock);
				55
				56	if (gpc->valid)
				57	break;
				58
				59	read_unlock_irq(&gpc->lock);
				60	} while (1);
Joao Martins	629b534	2018-06-28 15:06:43 -0400	[diff] [blame]	61
				62	/* Paranoia checks on the 32-bit struct layout */
				63	BUILD_BUG_ON(offsetof(struct compat_shared_info, wc) != 0x900);
				64	BUILD_BUG_ON(offsetof(struct compat_shared_info, arch.wc_sec_hi) != 0x924);
				65	BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0);
				66
Joao Martins	629b534	2018-06-28 15:06:43 -0400	[diff] [blame]	67	#ifdef CONFIG_X86_64
				68	/* Paranoia checks on the 64-bit struct layout */
				69	BUILD_BUG_ON(offsetof(struct shared_info, wc) != 0xc00);
				70	BUILD_BUG_ON(offsetof(struct shared_info, wc_sec_hi) != 0xc0c);
				71
David Woodhouse	5574976	2021-12-10 16:36:24 +0000	[diff] [blame]	72	if (IS_ENABLED(CONFIG_64BIT) && kvm->arch.xen.long_mode) {
				73	struct shared_info *shinfo = gpc->khva;
Joao Martins	629b534	2018-06-28 15:06:43 -0400	[diff] [blame]	74
David Woodhouse	5574976	2021-12-10 16:36:24 +0000	[diff] [blame]	75	wc_sec_hi = &shinfo->wc_sec_hi;
				76	wc = &shinfo->wc;
				77	} else
				78	#endif
				79	{
				80	struct compat_shared_info *shinfo = gpc->khva;
				81
				82	wc_sec_hi = &shinfo->arch.wc_sec_hi;
				83	wc = &shinfo->wc;
				84	}
				85
				86	/* Increment and ensure an odd value */
				87	wc_version = wc->version = (wc->version + 1) \| 1;
				88	smp_wmb();
				89
				90	wc->nsec = do_div(wall_nsec, 1000000000);
				91	wc->sec = (u32)wall_nsec;
				92	*wc_sec_hi = wall_nsec >> 32;
				93	smp_wmb();
				94
				95	wc->version = wc_version + 1;
				96	read_unlock_irq(&gpc->lock);
				97
Joao Martins	629b534	2018-06-28 15:06:43 -0400	[diff] [blame]	98	kvm_make_all_cpus_request(kvm, KVM_REQ_MASTERCLOCK_UPDATE);
				99
				100	out:
Joao Martins	13ffb97	2018-06-15 21:17:14 -0400	[diff] [blame]	101	srcu_read_unlock(&kvm->srcu, idx);
				102	return ret;
				103	}
				104
David Woodhouse	30b5c85	2021-03-01 12:53:09 +0000	[diff] [blame]	105	static void kvm_xen_update_runstate(struct kvm_vcpu *v, int state)
				106	{
				107	struct kvm_vcpu_xen *vx = &v->arch.xen;
				108	u64 now = get_kvmclock_ns(v->kvm);
				109	u64 delta_ns = now - vx->runstate_entry_time;
				110	u64 run_delay = current->sched_info.run_delay;
				111
				112	if (unlikely(!vx->runstate_entry_time))
				113	vx->current_runstate = RUNSTATE_offline;
				114
				115	/*
				116	* Time waiting for the scheduler isn't "stolen" if the
				117	* vCPU wasn't running anyway.
				118	*/
				119	if (vx->current_runstate == RUNSTATE_running) {
				120	u64 steal_ns = run_delay - vx->last_steal;
				121
				122	delta_ns -= steal_ns;
				123
				124	vx->runstate_times[RUNSTATE_runnable] += steal_ns;
				125	}
				126	vx->last_steal = run_delay;
				127
				128	vx->runstate_times[vx->current_runstate] += delta_ns;
				129	vx->current_runstate = state;
				130	vx->runstate_entry_time = now;
				131	}
				132
				133	void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state)
				134	{
				135	struct kvm_vcpu_xen *vx = &v->arch.xen;
				136	uint64_t state_entry_time;
				137	unsigned int offset;
				138
				139	kvm_xen_update_runstate(v, state);
				140
				141	if (!vx->runstate_set)
				142	return;
				143
				144	BUILD_BUG_ON(sizeof(struct compat_vcpu_runstate_info) != 0x2c);
				145
				146	offset = offsetof(struct compat_vcpu_runstate_info, state_entry_time);
				147	#ifdef CONFIG_X86_64
				148	/*
				149	* The only difference is alignment of uint64_t in 32-bit.
				150	* So the first field 'state' is accessed directly using
				151	* offsetof() (where its offset happens to be zero), while the
				152	* remaining fields which are all uint64_t, start at 'offset'
				153	* which we tweak here by adding 4.
				154	*/
				155	BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state_entry_time) !=
				156	offsetof(struct compat_vcpu_runstate_info, state_entry_time) + 4);
				157	BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, time) !=
				158	offsetof(struct compat_vcpu_runstate_info, time) + 4);
				159
				160	if (v->kvm->arch.xen.long_mode)
				161	offset = offsetof(struct vcpu_runstate_info, state_entry_time);
				162	#endif
				163	/*
				164	* First write the updated state_entry_time at the appropriate
				165	* location determined by 'offset'.
				166	*/
				167	state_entry_time = vx->runstate_entry_time;
				168	state_entry_time \|= XEN_RUNSTATE_UPDATE;
				169
David Woodhouse	6a83475	2021-11-15 16:50:23 +0000	[diff] [blame]	170	BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, state_entry_time) !=
David Woodhouse	30b5c85	2021-03-01 12:53:09 +0000	[diff] [blame]	171	sizeof(state_entry_time));
David Woodhouse	6a83475	2021-11-15 16:50:23 +0000	[diff] [blame]	172	BUILD_BUG_ON(sizeof_field(struct compat_vcpu_runstate_info, state_entry_time) !=
David Woodhouse	30b5c85	2021-03-01 12:53:09 +0000	[diff] [blame]	173	sizeof(state_entry_time));
				174
				175	if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
				176	&state_entry_time, offset,
				177	sizeof(state_entry_time)))
				178	return;
				179	smp_wmb();
				180
				181	/*
				182	* Next, write the new runstate. This is in the same place
				183	* for 32-bit and 64-bit guests, asserted here for paranoia.
				184	*/
				185	BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state) !=
				186	offsetof(struct compat_vcpu_runstate_info, state));
David Woodhouse	6a83475	2021-11-15 16:50:23 +0000	[diff] [blame]	187	BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, state) !=
David Woodhouse	30b5c85	2021-03-01 12:53:09 +0000	[diff] [blame]	188	sizeof(vx->current_runstate));
David Woodhouse	6a83475	2021-11-15 16:50:23 +0000	[diff] [blame]	189	BUILD_BUG_ON(sizeof_field(struct compat_vcpu_runstate_info, state) !=
David Woodhouse	30b5c85	2021-03-01 12:53:09 +0000	[diff] [blame]	190	sizeof(vx->current_runstate));
				191
				192	if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
				193	&vx->current_runstate,
				194	offsetof(struct vcpu_runstate_info, state),
				195	sizeof(vx->current_runstate)))
				196	return;
				197
				198	/*
				199	* Write the actual runstate times immediately after the
				200	* runstate_entry_time.
				201	*/
				202	BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state_entry_time) !=
				203	offsetof(struct vcpu_runstate_info, time) - sizeof(u64));
				204	BUILD_BUG_ON(offsetof(struct compat_vcpu_runstate_info, state_entry_time) !=
				205	offsetof(struct compat_vcpu_runstate_info, time) - sizeof(u64));
David Woodhouse	6a83475	2021-11-15 16:50:23 +0000	[diff] [blame]	206	BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, time) !=
				207	sizeof_field(struct compat_vcpu_runstate_info, time));
				208	BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, time) !=
David Woodhouse	30b5c85	2021-03-01 12:53:09 +0000	[diff] [blame]	209	sizeof(vx->runstate_times));
				210
				211	if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
				212	&vx->runstate_times[0],
				213	offset + sizeof(u64),
				214	sizeof(vx->runstate_times)))
				215	return;
				216
				217	smp_wmb();
				218
				219	/*
				220	* Finally, clear the XEN_RUNSTATE_UPDATE bit in the guest's
				221	* runstate_entry_time field.
				222	*/
				223
				224	state_entry_time &= ~XEN_RUNSTATE_UPDATE;
				225	if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
				226	&state_entry_time, offset,
				227	sizeof(state_entry_time)))
				228	return;
				229	}
				230
David Woodhouse	40da8cc	2020-12-09 20:08:30 +0000	[diff] [blame]	231	int __kvm_xen_has_interrupt(struct kvm_vcpu *v)
				232	{
David Woodhouse	14243b3	2021-12-10 16:36:23 +0000	[diff] [blame]	233	unsigned long evtchn_pending_sel = READ_ONCE(v->arch.xen.evtchn_pending_sel);
				234	bool atomic = in_atomic() \|\| !task_is_running(current);
David Woodhouse	0985dba	2021-10-23 20:47:19 +0100	[diff] [blame]	235	int err;
David Woodhouse	40da8cc	2020-12-09 20:08:30 +0000	[diff] [blame]	236	u8 rc = 0;
				237
				238	/*
				239	* If the global upcall vector (HVMIRQ_callback_vector) is set and
				240	* the vCPU's evtchn_upcall_pending flag is set, the IRQ is pending.
				241	*/
				242	struct gfn_to_hva_cache *ghc = &v->arch.xen.vcpu_info_cache;
				243	struct kvm_memslots *slots = kvm_memslots(v->kvm);
David Woodhouse	14243b3	2021-12-10 16:36:23 +0000	[diff] [blame]	244	bool ghc_valid = slots->generation == ghc->generation &&
				245	!kvm_is_error_hva(ghc->hva) && ghc->memslot;
				246
David Woodhouse	40da8cc	2020-12-09 20:08:30 +0000	[diff] [blame]	247	unsigned int offset = offsetof(struct vcpu_info, evtchn_upcall_pending);
				248
				249	/* No need for compat handling here */
				250	BUILD_BUG_ON(offsetof(struct vcpu_info, evtchn_upcall_pending) !=
				251	offsetof(struct compat_vcpu_info, evtchn_upcall_pending));
				252	BUILD_BUG_ON(sizeof(rc) !=
David Woodhouse	6a83475	2021-11-15 16:50:23 +0000	[diff] [blame]	253	sizeof_field(struct vcpu_info, evtchn_upcall_pending));
David Woodhouse	40da8cc	2020-12-09 20:08:30 +0000	[diff] [blame]	254	BUILD_BUG_ON(sizeof(rc) !=
David Woodhouse	6a83475	2021-11-15 16:50:23 +0000	[diff] [blame]	255	sizeof_field(struct compat_vcpu_info, evtchn_upcall_pending));
David Woodhouse	40da8cc	2020-12-09 20:08:30 +0000	[diff] [blame]	256
				257	/*
				258	* For efficiency, this mirrors the checks for using the valid
				259	* cache in kvm_read_guest_offset_cached(), but just uses
				260	* __get_user() instead. And falls back to the slow path.
				261	*/
David Woodhouse	14243b3	2021-12-10 16:36:23 +0000	[diff] [blame]	262	if (!evtchn_pending_sel && ghc_valid) {
David Woodhouse	40da8cc	2020-12-09 20:08:30 +0000	[diff] [blame]	263	/* Fast path */
David Woodhouse	0985dba	2021-10-23 20:47:19 +0100	[diff] [blame]	264	pagefault_disable();
				265	err = __get_user(rc, (u8 __user *)ghc->hva + offset);
				266	pagefault_enable();
				267	if (!err)
				268	return rc;
David Woodhouse	40da8cc	2020-12-09 20:08:30 +0000	[diff] [blame]	269	}
				270
David Woodhouse	0985dba	2021-10-23 20:47:19 +0100	[diff] [blame]	271	/* Slow path */
				272
				273	/*
				274	* This function gets called from kvm_vcpu_block() after setting the
				275	* task to TASK_INTERRUPTIBLE, to see if it needs to wake immediately
				276	* from a HLT. So we really mustn't sleep. If the page ended up absent
				277	* at that point, just return 1 in order to trigger an immediate wake,
				278	* and we'll end up getting called again from a context where we can
				279	* fault in the page and wait for it.
				280	*/
David Woodhouse	14243b3	2021-12-10 16:36:23 +0000	[diff] [blame]	281	if (atomic)
David Woodhouse	0985dba	2021-10-23 20:47:19 +0100	[diff] [blame]	282	return 1;
				283
David Woodhouse	14243b3	2021-12-10 16:36:23 +0000	[diff] [blame]	284	if (!ghc_valid) {
				285	err = kvm_gfn_to_hva_cache_init(v->kvm, ghc, ghc->gpa, ghc->len);
				286	if (err \|\| !ghc->memslot) {
				287	/*
				288	* If this failed, userspace has screwed up the
				289	* vcpu_info mapping. No interrupts for you.
				290	*/
				291	return 0;
				292	}
				293	}
				294
				295	/*
				296	* Now we have a valid (protected by srcu) userspace HVA in
				297	* ghc->hva which points to the struct vcpu_info. If there
				298	* are any bits in the in-kernel evtchn_pending_sel then
				299	* we need to write those to the guest vcpu_info and set
				300	* its evtchn_upcall_pending flag. If there aren't any bits
				301	* to add, we only want to check evtchn_upcall_pending.
				302	*/
				303	if (evtchn_pending_sel) {
				304	bool long_mode = v->kvm->arch.xen.long_mode;
				305
				306	if (!user_access_begin((void __user *)ghc->hva, sizeof(struct vcpu_info)))
				307	return 0;
				308
				309	if (IS_ENABLED(CONFIG_64BIT) && long_mode) {
				310	struct vcpu_info __user vi = (void __user )ghc->hva;
				311
				312	/* Attempt to set the evtchn_pending_sel bits in the
				313	* guest, and if that succeeds then clear the same
				314	* bits in the in-kernel version. */
				315	asm volatile("1:\t" LOCK_PREFIX "orq %0, %1\n"
				316	"\tnotq %0\n"
				317	"\t" LOCK_PREFIX "andq %0, %2\n"
				318	"2:\n"
Peter Zijlstra	adb759e	2022-01-23 13:42:19 +0100	[diff] [blame]	319	_ASM_EXTABLE_UA(1b, 2b)
David Woodhouse	14243b3	2021-12-10 16:36:23 +0000	[diff] [blame]	320	: "=r" (evtchn_pending_sel),
				321	"+m" (vi->evtchn_pending_sel),
				322	"+m" (v->arch.xen.evtchn_pending_sel)
				323	: "0" (evtchn_pending_sel));
				324	} else {
				325	struct compat_vcpu_info __user vi = (void __user )ghc->hva;
				326	u32 evtchn_pending_sel32 = evtchn_pending_sel;
				327
				328	/* Attempt to set the evtchn_pending_sel bits in the
				329	* guest, and if that succeeds then clear the same
				330	* bits in the in-kernel version. */
				331	asm volatile("1:\t" LOCK_PREFIX "orl %0, %1\n"
				332	"\tnotl %0\n"
				333	"\t" LOCK_PREFIX "andl %0, %2\n"
				334	"2:\n"
Peter Zijlstra	adb759e	2022-01-23 13:42:19 +0100	[diff] [blame]	335	_ASM_EXTABLE_UA(1b, 2b)
David Woodhouse	14243b3	2021-12-10 16:36:23 +0000	[diff] [blame]	336	: "=r" (evtchn_pending_sel32),
				337	"+m" (vi->evtchn_pending_sel),
				338	"+m" (v->arch.xen.evtchn_pending_sel)
				339	: "0" (evtchn_pending_sel32));
				340	}
				341	rc = 1;
				342	unsafe_put_user(rc, (u8 __user *)ghc->hva + offset, err);
				343
				344	err:
				345	user_access_end();
				346
				347	mark_page_dirty_in_slot(v->kvm, ghc->memslot, ghc->gpa >> PAGE_SHIFT);
				348	} else {
				349	__get_user(rc, (u8 __user *)ghc->hva + offset);
				350	}
David Woodhouse	0985dba	2021-10-23 20:47:19 +0100	[diff] [blame]	351
David Woodhouse	40da8cc	2020-12-09 20:08:30 +0000	[diff] [blame]	352	return rc;
				353	}
				354
Joao Martins	a76b964	2020-12-03 15:52:25 +0000	[diff] [blame]	355	int kvm_xen_hvm_set_attr(struct kvm kvm, struct kvm_xen_hvm_attr data)
				356	{
				357	int r = -ENOENT;
				358
Joao Martins	13ffb97	2018-06-15 21:17:14 -0400	[diff] [blame]	359	mutex_lock(&kvm->lock);
				360
Joao Martins	a76b964	2020-12-03 15:52:25 +0000	[diff] [blame]	361	switch (data->type) {
David Woodhouse	a3833b8	2020-12-03 16:20:32 +0000	[diff] [blame]	362	case KVM_XEN_ATTR_TYPE_LONG_MODE:
Joao Martins	13ffb97	2018-06-15 21:17:14 -0400	[diff] [blame]	363	if (!IS_ENABLED(CONFIG_64BIT) && data->u.long_mode) {
				364	r = -EINVAL;
				365	} else {
				366	kvm->arch.xen.long_mode = !!data->u.long_mode;
				367	r = 0;
				368	}
David Woodhouse	a3833b8	2020-12-03 16:20:32 +0000	[diff] [blame]	369	break;
Joao Martins	13ffb97	2018-06-15 21:17:14 -0400	[diff] [blame]	370
				371	case KVM_XEN_ATTR_TYPE_SHARED_INFO:
				372	r = kvm_xen_shared_info_init(kvm, data->u.shared_info.gfn);
				373	break;
				374
David Woodhouse	40da8cc	2020-12-09 20:08:30 +0000	[diff] [blame]	375	case KVM_XEN_ATTR_TYPE_UPCALL_VECTOR:
David Woodhouse	0c165b3	2021-02-08 23:23:25 +0000	[diff] [blame]	376	if (data->u.vector && data->u.vector < 0x10)
David Woodhouse	40da8cc	2020-12-09 20:08:30 +0000	[diff] [blame]	377	r = -EINVAL;
				378	else {
				379	kvm->arch.xen.upcall_vector = data->u.vector;
				380	r = 0;
				381	}
				382	break;
				383
Joao Martins	a76b964	2020-12-03 15:52:25 +0000	[diff] [blame]	384	default:
				385	break;
				386	}
				387
				388	mutex_unlock(&kvm->lock);
				389	return r;
				390	}
				391
				392	int kvm_xen_hvm_get_attr(struct kvm kvm, struct kvm_xen_hvm_attr data)
				393	{
				394	int r = -ENOENT;
				395
				396	mutex_lock(&kvm->lock);
				397
				398	switch (data->type) {
David Woodhouse	a3833b8	2020-12-03 16:20:32 +0000	[diff] [blame]	399	case KVM_XEN_ATTR_TYPE_LONG_MODE:
				400	data->u.long_mode = kvm->arch.xen.long_mode;
				401	r = 0;
				402	break;
Joao Martins	13ffb97	2018-06-15 21:17:14 -0400	[diff] [blame]	403
				404	case KVM_XEN_ATTR_TYPE_SHARED_INFO:
David Woodhouse	1cfc9c4	2021-12-10 16:36:22 +0000	[diff] [blame]	405	if (kvm->arch.xen.shinfo_cache.active)
				406	data->u.shared_info.gfn = gpa_to_gfn(kvm->arch.xen.shinfo_cache.gpa);
				407	else
				408	data->u.shared_info.gfn = GPA_INVALID;
David Woodhouse	0c165b3	2021-02-08 23:23:25 +0000	[diff] [blame]	409	r = 0;
Joao Martins	13ffb97	2018-06-15 21:17:14 -0400	[diff] [blame]	410	break;
				411
David Woodhouse	40da8cc	2020-12-09 20:08:30 +0000	[diff] [blame]	412	case KVM_XEN_ATTR_TYPE_UPCALL_VECTOR:
				413	data->u.vector = kvm->arch.xen.upcall_vector;
				414	r = 0;
				415	break;
				416
Joao Martins	a76b964	2020-12-03 15:52:25 +0000	[diff] [blame]	417	default:
				418	break;
				419	}
				420
				421	mutex_unlock(&kvm->lock);
				422	return r;
				423	}
				424
David Woodhouse	3e32461	2021-02-02 16:53:25 +0000	[diff] [blame]	425	int kvm_xen_vcpu_set_attr(struct kvm_vcpu vcpu, struct kvm_xen_vcpu_attr data)
				426	{
Joao Martins	73e69a8	2018-06-29 10:52:52 -0400	[diff] [blame]	427	int idx, r = -ENOENT;
David Woodhouse	3e32461	2021-02-02 16:53:25 +0000	[diff] [blame]	428
				429	mutex_lock(&vcpu->kvm->lock);
Joao Martins	73e69a8	2018-06-29 10:52:52 -0400	[diff] [blame]	430	idx = srcu_read_lock(&vcpu->kvm->srcu);
David Woodhouse	3e32461	2021-02-02 16:53:25 +0000	[diff] [blame]	431
				432	switch (data->type) {
Joao Martins	73e69a8	2018-06-29 10:52:52 -0400	[diff] [blame]	433	case KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO:
				434	/* No compat necessary here. */
				435	BUILD_BUG_ON(sizeof(struct vcpu_info) !=
				436	sizeof(struct compat_vcpu_info));
David Woodhouse	7d7c5f7	2021-03-01 12:53:08 +0000	[diff] [blame]	437	BUILD_BUG_ON(offsetof(struct vcpu_info, time) !=
				438	offsetof(struct compat_vcpu_info, time));
Joao Martins	73e69a8	2018-06-29 10:52:52 -0400	[diff] [blame]	439
David Woodhouse	0c165b3	2021-02-08 23:23:25 +0000	[diff] [blame]	440	if (data->u.gpa == GPA_INVALID) {
				441	vcpu->arch.xen.vcpu_info_set = false;
David Woodhouse	7d7c5f7	2021-03-01 12:53:08 +0000	[diff] [blame]	442	r = 0;
David Woodhouse	0c165b3	2021-02-08 23:23:25 +0000	[diff] [blame]	443	break;
				444	}
				445
Joao Martins	73e69a8	2018-06-29 10:52:52 -0400	[diff] [blame]	446	r = kvm_gfn_to_hva_cache_init(vcpu->kvm,
				447	&vcpu->arch.xen.vcpu_info_cache,
				448	data->u.gpa,
				449	sizeof(struct vcpu_info));
Joao Martins	aa096aa	2019-02-01 13:01:45 -0500	[diff] [blame]	450	if (!r) {
Joao Martins	73e69a8	2018-06-29 10:52:52 -0400	[diff] [blame]	451	vcpu->arch.xen.vcpu_info_set = true;
Joao Martins	aa096aa	2019-02-01 13:01:45 -0500	[diff] [blame]	452	kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
				453	}
Joao Martins	73e69a8	2018-06-29 10:52:52 -0400	[diff] [blame]	454	break;
				455
Joao Martins	f2340cd	2018-07-23 11:20:57 -0400	[diff] [blame]	456	case KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO:
David Woodhouse	0c165b3	2021-02-08 23:23:25 +0000	[diff] [blame]	457	if (data->u.gpa == GPA_INVALID) {
				458	vcpu->arch.xen.vcpu_time_info_set = false;
David Woodhouse	7d7c5f7	2021-03-01 12:53:08 +0000	[diff] [blame]	459	r = 0;
David Woodhouse	0c165b3	2021-02-08 23:23:25 +0000	[diff] [blame]	460	break;
				461	}
				462
Joao Martins	f2340cd	2018-07-23 11:20:57 -0400	[diff] [blame]	463	r = kvm_gfn_to_hva_cache_init(vcpu->kvm,
				464	&vcpu->arch.xen.vcpu_time_info_cache,
				465	data->u.gpa,
				466	sizeof(struct pvclock_vcpu_time_info));
				467	if (!r) {
				468	vcpu->arch.xen.vcpu_time_info_set = true;
				469	kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
				470	}
				471	break;
				472
David Woodhouse	30b5c85	2021-03-01 12:53:09 +0000	[diff] [blame]	473	case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR:
				474	if (!sched_info_on()) {
				475	r = -EOPNOTSUPP;
				476	break;
				477	}
				478	if (data->u.gpa == GPA_INVALID) {
				479	vcpu->arch.xen.runstate_set = false;
				480	r = 0;
				481	break;
				482	}
				483
				484	r = kvm_gfn_to_hva_cache_init(vcpu->kvm,
				485	&vcpu->arch.xen.runstate_cache,
				486	data->u.gpa,
				487	sizeof(struct vcpu_runstate_info));
				488	if (!r) {
				489	vcpu->arch.xen.runstate_set = true;
				490	}
				491	break;
				492
				493	case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT:
				494	if (!sched_info_on()) {
				495	r = -EOPNOTSUPP;
				496	break;
				497	}
				498	if (data->u.runstate.state > RUNSTATE_offline) {
				499	r = -EINVAL;
				500	break;
				501	}
				502
				503	kvm_xen_update_runstate(vcpu, data->u.runstate.state);
				504	r = 0;
				505	break;
				506
				507	case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA:
				508	if (!sched_info_on()) {
				509	r = -EOPNOTSUPP;
				510	break;
				511	}
				512	if (data->u.runstate.state > RUNSTATE_offline) {
				513	r = -EINVAL;
				514	break;
				515	}
				516	if (data->u.runstate.state_entry_time !=
				517	(data->u.runstate.time_running +
				518	data->u.runstate.time_runnable +
				519	data->u.runstate.time_blocked +
				520	data->u.runstate.time_offline)) {
				521	r = -EINVAL;
				522	break;
				523	}
				524	if (get_kvmclock_ns(vcpu->kvm) <
				525	data->u.runstate.state_entry_time) {
				526	r = -EINVAL;
				527	break;
				528	}
				529
				530	vcpu->arch.xen.current_runstate = data->u.runstate.state;
				531	vcpu->arch.xen.runstate_entry_time =
				532	data->u.runstate.state_entry_time;
				533	vcpu->arch.xen.runstate_times[RUNSTATE_running] =
				534	data->u.runstate.time_running;
				535	vcpu->arch.xen.runstate_times[RUNSTATE_runnable] =
				536	data->u.runstate.time_runnable;
				537	vcpu->arch.xen.runstate_times[RUNSTATE_blocked] =
				538	data->u.runstate.time_blocked;
				539	vcpu->arch.xen.runstate_times[RUNSTATE_offline] =
				540	data->u.runstate.time_offline;
				541	vcpu->arch.xen.last_steal = current->sched_info.run_delay;
				542	r = 0;
				543	break;
				544
				545	case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST:
				546	if (!sched_info_on()) {
				547	r = -EOPNOTSUPP;
				548	break;
				549	}
				550	if (data->u.runstate.state > RUNSTATE_offline &&
				551	data->u.runstate.state != (u64)-1) {
				552	r = -EINVAL;
				553	break;
				554	}
				555	/* The adjustment must add up */
				556	if (data->u.runstate.state_entry_time !=
				557	(data->u.runstate.time_running +
				558	data->u.runstate.time_runnable +
				559	data->u.runstate.time_blocked +
				560	data->u.runstate.time_offline)) {
				561	r = -EINVAL;
				562	break;
				563	}
				564
				565	if (get_kvmclock_ns(vcpu->kvm) <
				566	(vcpu->arch.xen.runstate_entry_time +
				567	data->u.runstate.state_entry_time)) {
				568	r = -EINVAL;
				569	break;
				570	}
				571
				572	vcpu->arch.xen.runstate_entry_time +=
				573	data->u.runstate.state_entry_time;
				574	vcpu->arch.xen.runstate_times[RUNSTATE_running] +=
				575	data->u.runstate.time_running;
				576	vcpu->arch.xen.runstate_times[RUNSTATE_runnable] +=
				577	data->u.runstate.time_runnable;
				578	vcpu->arch.xen.runstate_times[RUNSTATE_blocked] +=
				579	data->u.runstate.time_blocked;
				580	vcpu->arch.xen.runstate_times[RUNSTATE_offline] +=
				581	data->u.runstate.time_offline;
				582
				583	if (data->u.runstate.state <= RUNSTATE_offline)
				584	kvm_xen_update_runstate(vcpu, data->u.runstate.state);
				585	r = 0;
				586	break;
				587
David Woodhouse	3e32461	2021-02-02 16:53:25 +0000	[diff] [blame]	588	default:
				589	break;
				590	}
				591
Joao Martins	73e69a8	2018-06-29 10:52:52 -0400	[diff] [blame]	592	srcu_read_unlock(&vcpu->kvm->srcu, idx);
David Woodhouse	3e32461	2021-02-02 16:53:25 +0000	[diff] [blame]	593	mutex_unlock(&vcpu->kvm->lock);
				594	return r;
				595	}
				596
				597	int kvm_xen_vcpu_get_attr(struct kvm_vcpu vcpu, struct kvm_xen_vcpu_attr data)
				598	{
				599	int r = -ENOENT;
				600
				601	mutex_lock(&vcpu->kvm->lock);
				602
				603	switch (data->type) {
Joao Martins	73e69a8	2018-06-29 10:52:52 -0400	[diff] [blame]	604	case KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO:
David Woodhouse	0c165b3	2021-02-08 23:23:25 +0000	[diff] [blame]	605	if (vcpu->arch.xen.vcpu_info_set)
Joao Martins	73e69a8	2018-06-29 10:52:52 -0400	[diff] [blame]	606	data->u.gpa = vcpu->arch.xen.vcpu_info_cache.gpa;
David Woodhouse	0c165b3	2021-02-08 23:23:25 +0000	[diff] [blame]	607	else
				608	data->u.gpa = GPA_INVALID;
				609	r = 0;
Joao Martins	73e69a8	2018-06-29 10:52:52 -0400	[diff] [blame]	610	break;
				611
Joao Martins	f2340cd	2018-07-23 11:20:57 -0400	[diff] [blame]	612	case KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO:
David Woodhouse	0c165b3	2021-02-08 23:23:25 +0000	[diff] [blame]	613	if (vcpu->arch.xen.vcpu_time_info_set)
Joao Martins	f2340cd	2018-07-23 11:20:57 -0400	[diff] [blame]	614	data->u.gpa = vcpu->arch.xen.vcpu_time_info_cache.gpa;
David Woodhouse	0c165b3	2021-02-08 23:23:25 +0000	[diff] [blame]	615	else
				616	data->u.gpa = GPA_INVALID;
				617	r = 0;
Joao Martins	f2340cd	2018-07-23 11:20:57 -0400	[diff] [blame]	618	break;
				619
David Woodhouse	30b5c85	2021-03-01 12:53:09 +0000	[diff] [blame]	620	case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR:
				621	if (!sched_info_on()) {
				622	r = -EOPNOTSUPP;
				623	break;
				624	}
				625	if (vcpu->arch.xen.runstate_set) {
				626	data->u.gpa = vcpu->arch.xen.runstate_cache.gpa;
				627	r = 0;
				628	}
				629	break;
				630
				631	case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT:
				632	if (!sched_info_on()) {
				633	r = -EOPNOTSUPP;
				634	break;
				635	}
				636	data->u.runstate.state = vcpu->arch.xen.current_runstate;
				637	r = 0;
				638	break;
				639
				640	case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA:
				641	if (!sched_info_on()) {
				642	r = -EOPNOTSUPP;
				643	break;
				644	}
				645	data->u.runstate.state = vcpu->arch.xen.current_runstate;
				646	data->u.runstate.state_entry_time =
				647	vcpu->arch.xen.runstate_entry_time;
				648	data->u.runstate.time_running =
				649	vcpu->arch.xen.runstate_times[RUNSTATE_running];
				650	data->u.runstate.time_runnable =
				651	vcpu->arch.xen.runstate_times[RUNSTATE_runnable];
				652	data->u.runstate.time_blocked =
				653	vcpu->arch.xen.runstate_times[RUNSTATE_blocked];
				654	data->u.runstate.time_offline =
				655	vcpu->arch.xen.runstate_times[RUNSTATE_offline];
				656	r = 0;
				657	break;
				658
				659	case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST:
				660	r = -EINVAL;
				661	break;
				662
David Woodhouse	3e32461	2021-02-02 16:53:25 +0000	[diff] [blame]	663	default:
				664	break;
				665	}
				666
				667	mutex_unlock(&vcpu->kvm->lock);
				668	return r;
				669	}
				670
Joao Martins	23200b7	2018-06-13 09:55:44 -0400	[diff] [blame]	671	int kvm_xen_write_hypercall_page(struct kvm_vcpu *vcpu, u64 data)
				672	{
				673	struct kvm *kvm = vcpu->kvm;
				674	u32 page_num = data & ~PAGE_MASK;
				675	u64 page_addr = data & PAGE_MASK;
David Woodhouse	a3833b8	2020-12-03 16:20:32 +0000	[diff] [blame]	676	bool lm = is_long_mode(vcpu);
				677
				678	/* Latch long_mode for shared_info pages etc. */
				679	vcpu->kvm->arch.xen.long_mode = lm;
Joao Martins	23200b7	2018-06-13 09:55:44 -0400	[diff] [blame]	680
				681	/*
				682	* If Xen hypercall intercept is enabled, fill the hypercall
				683	* page with VMCALL/VMMCALL instructions since that's what
				684	* we catch. Else the VMM has provided the hypercall pages
				685	* with instructions of its own choosing, so use those.
				686	*/
				687	if (kvm_xen_hypercall_enabled(kvm)) {
				688	u8 instructions[32];
				689	int i;
				690
				691	if (page_num)
				692	return 1;
				693
				694	/* mov imm32, %eax */
				695	instructions[0] = 0xb8;
				696
				697	/* vmcall / vmmcall */
				698	kvm_x86_ops.patch_hypercall(vcpu, instructions + 5);
				699
				700	/* ret */
				701	instructions[8] = 0xc3;
				702
				703	/* int3 to pad */
				704	memset(instructions + 9, 0xcc, sizeof(instructions) - 9);
				705
				706	for (i = 0; i < PAGE_SIZE / sizeof(instructions); i++) {
				707	(u32 )&instructions[1] = i;
				708	if (kvm_vcpu_write_guest(vcpu,
				709	page_addr + (i * sizeof(instructions)),
				710	instructions, sizeof(instructions)))
				711	return 1;
				712	}
				713	} else {
Sean Christopherson	448841f	2021-02-08 12:15:02 -0800	[diff] [blame]	714	/*
				715	* Note, truncation is a non-issue as 'lm' is guaranteed to be
				716	* false for a 32-bit kernel, i.e. when hva_t is only 4 bytes.
				717	*/
				718	hva_t blob_addr = lm ? kvm->arch.xen_hvm_config.blob_addr_64
				719	: kvm->arch.xen_hvm_config.blob_addr_32;
Joao Martins	23200b7	2018-06-13 09:55:44 -0400	[diff] [blame]	720	u8 blob_size = lm ? kvm->arch.xen_hvm_config.blob_size_64
				721	: kvm->arch.xen_hvm_config.blob_size_32;
				722	u8 *page;
				723
				724	if (page_num >= blob_size)
				725	return 1;
				726
				727	blob_addr += page_num * PAGE_SIZE;
				728
				729	page = memdup_user((u8 __user *)blob_addr, PAGE_SIZE);
				730	if (IS_ERR(page))
				731	return PTR_ERR(page);
				732
				733	if (kvm_vcpu_write_guest(vcpu, page_addr, page, PAGE_SIZE)) {
				734	kfree(page);
				735	return 1;
				736	}
				737	}
				738	return 0;
				739	}
				740
David Woodhouse	78e9878	2021-02-02 13:19:35 +0000	[diff] [blame]	741	int kvm_xen_hvm_config(struct kvm kvm, struct kvm_xen_hvm_config xhc)
				742	{
				743	if (xhc->flags & ~KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL)
				744	return -EINVAL;
				745
				746	/*
				747	* With hypercall interception the kernel generates its own
				748	* hypercall page so it must not be provided.
				749	*/
				750	if ((xhc->flags & KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL) &&
				751	(xhc->blob_addr_32 \|\| xhc->blob_addr_64 \|\|
				752	xhc->blob_size_32 \|\| xhc->blob_size_64))
				753	return -EINVAL;
				754
David Woodhouse	7d6bbeb	2021-02-02 15:48:05 +0000	[diff] [blame]	755	mutex_lock(&kvm->lock);
				756
				757	if (xhc->msr && !kvm->arch.xen_hvm_config.msr)
				758	static_branch_inc(&kvm_xen_enabled.key);
				759	else if (!xhc->msr && kvm->arch.xen_hvm_config.msr)
				760	static_branch_slow_dec_deferred(&kvm_xen_enabled);
				761
David Woodhouse	78e9878	2021-02-02 13:19:35 +0000	[diff] [blame]	762	memcpy(&kvm->arch.xen_hvm_config, xhc, sizeof(*xhc));
David Woodhouse	7d6bbeb	2021-02-02 15:48:05 +0000	[diff] [blame]	763
				764	mutex_unlock(&kvm->lock);
David Woodhouse	78e9878	2021-02-02 13:19:35 +0000	[diff] [blame]	765	return 0;
				766	}
				767
Paolo Bonzini	319afe6	2021-08-04 12:48:41 -0400	[diff] [blame]	768	void kvm_xen_init_vm(struct kvm *kvm)
				769	{
Paolo Bonzini	319afe6	2021-08-04 12:48:41 -0400	[diff] [blame]	770	}
				771
David Woodhouse	7d6bbeb	2021-02-02 15:48:05 +0000	[diff] [blame]	772	void kvm_xen_destroy_vm(struct kvm *kvm)
				773	{
David Woodhouse	1cfc9c4	2021-12-10 16:36:22 +0000	[diff] [blame]	774	kvm_gfn_to_pfn_cache_destroy(kvm, &kvm->arch.xen.shinfo_cache);
				775
David Woodhouse	7d6bbeb	2021-02-02 15:48:05 +0000	[diff] [blame]	776	if (kvm->arch.xen_hvm_config.msr)
				777	static_branch_slow_dec_deferred(&kvm_xen_enabled);
				778	}
				779
Joao Martins	23200b7	2018-06-13 09:55:44 -0400	[diff] [blame]	780	static int kvm_xen_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result)
				781	{
				782	kvm_rax_write(vcpu, result);
				783	return kvm_skip_emulated_instruction(vcpu);
				784	}
				785
				786	static int kvm_xen_hypercall_complete_userspace(struct kvm_vcpu *vcpu)
				787	{
				788	struct kvm_run *run = vcpu->run;
				789
				790	if (unlikely(!kvm_is_linear_rip(vcpu, vcpu->arch.xen.hypercall_rip)))
				791	return 1;
				792
				793	return kvm_xen_hypercall_set_result(vcpu, run->xen.u.hcall.result);
				794	}
				795
				796	int kvm_xen_hypercall(struct kvm_vcpu *vcpu)
				797	{
				798	bool longmode;
				799	u64 input, params[6];
				800
				801	input = (u64)kvm_register_read(vcpu, VCPU_REGS_RAX);
				802
Joao Martins	79033be	2018-06-13 09:55:44 -0400	[diff] [blame]	803	/* Hyper-V hypercalls get bit 31 set in EAX */
				804	if ((input & 0x80000000) &&
Vitaly Kuznetsov	8f01455	2021-01-26 14:48:14 +0100	[diff] [blame]	805	kvm_hv_hypercall_enabled(vcpu))
Joao Martins	79033be	2018-06-13 09:55:44 -0400	[diff] [blame]	806	return kvm_hv_hypercall(vcpu);
				807
Tom Lendacky	b5aead0	2021-05-24 12:48:57 -0500	[diff] [blame]	808	longmode = is_64_bit_hypercall(vcpu);
Joao Martins	23200b7	2018-06-13 09:55:44 -0400	[diff] [blame]	809	if (!longmode) {
				810	params[0] = (u32)kvm_rbx_read(vcpu);
				811	params[1] = (u32)kvm_rcx_read(vcpu);
				812	params[2] = (u32)kvm_rdx_read(vcpu);
				813	params[3] = (u32)kvm_rsi_read(vcpu);
				814	params[4] = (u32)kvm_rdi_read(vcpu);
				815	params[5] = (u32)kvm_rbp_read(vcpu);
				816	}
				817	#ifdef CONFIG_X86_64
				818	else {
				819	params[0] = (u64)kvm_rdi_read(vcpu);
				820	params[1] = (u64)kvm_rsi_read(vcpu);
				821	params[2] = (u64)kvm_rdx_read(vcpu);
				822	params[3] = (u64)kvm_r10_read(vcpu);
				823	params[4] = (u64)kvm_r8_read(vcpu);
				824	params[5] = (u64)kvm_r9_read(vcpu);
				825	}
				826	#endif
				827	trace_kvm_xen_hypercall(input, params[0], params[1], params[2],
				828	params[3], params[4], params[5]);
				829
				830	vcpu->run->exit_reason = KVM_EXIT_XEN;
				831	vcpu->run->xen.type = KVM_EXIT_XEN_HCALL;
				832	vcpu->run->xen.u.hcall.longmode = longmode;
				833	vcpu->run->xen.u.hcall.cpl = kvm_x86_ops.get_cpl(vcpu);
				834	vcpu->run->xen.u.hcall.input = input;
				835	vcpu->run->xen.u.hcall.params[0] = params[0];
				836	vcpu->run->xen.u.hcall.params[1] = params[1];
				837	vcpu->run->xen.u.hcall.params[2] = params[2];
				838	vcpu->run->xen.u.hcall.params[3] = params[3];
				839	vcpu->run->xen.u.hcall.params[4] = params[4];
				840	vcpu->run->xen.u.hcall.params[5] = params[5];
				841	vcpu->arch.xen.hypercall_rip = kvm_get_linear_rip(vcpu);
				842	vcpu->arch.complete_userspace_io =
				843	kvm_xen_hypercall_complete_userspace;
				844
				845	return 0;
				846	}
David Woodhouse	14243b3	2021-12-10 16:36:23 +0000	[diff] [blame]	847
				848	static inline int max_evtchn_port(struct kvm *kvm)
				849	{
				850	if (IS_ENABLED(CONFIG_64BIT) && kvm->arch.xen.long_mode)
				851	return EVTCHN_2L_NR_CHANNELS;
				852	else
				853	return COMPAT_EVTCHN_2L_NR_CHANNELS;
				854	}
				855
				856	/*
				857	* This follows the kvm_set_irq() API, so it returns:
				858	* < 0 Interrupt was ignored (masked or not delivered for other reasons)
				859	* = 0 Interrupt was coalesced (previous irq is still pending)
				860	* > 0 Number of CPUs interrupt was delivered to
				861	*/
				862	int kvm_xen_set_evtchn_fast(struct kvm_kernel_irq_routing_entry *e,
				863	struct kvm *kvm)
				864	{
				865	struct gfn_to_pfn_cache *gpc = &kvm->arch.xen.shinfo_cache;
				866	struct kvm_vcpu *vcpu;
				867	unsigned long pending_bits, mask_bits;
				868	unsigned long flags;
				869	int port_word_bit;
				870	bool kick_vcpu = false;
				871	int idx;
				872	int rc;
				873
				874	vcpu = kvm_get_vcpu_by_id(kvm, e->xen_evtchn.vcpu);
				875	if (!vcpu)
				876	return -1;
				877
				878	if (!vcpu->arch.xen.vcpu_info_set)
				879	return -1;
				880
				881	if (e->xen_evtchn.port >= max_evtchn_port(kvm))
				882	return -1;
				883
				884	rc = -EWOULDBLOCK;
				885	read_lock_irqsave(&gpc->lock, flags);
				886
				887	idx = srcu_read_lock(&kvm->srcu);
				888	if (!kvm_gfn_to_pfn_cache_check(kvm, gpc, gpc->gpa, PAGE_SIZE))
				889	goto out_rcu;
				890
				891	if (IS_ENABLED(CONFIG_64BIT) && kvm->arch.xen.long_mode) {
				892	struct shared_info *shinfo = gpc->khva;
				893	pending_bits = (unsigned long *)&shinfo->evtchn_pending;
				894	mask_bits = (unsigned long *)&shinfo->evtchn_mask;
				895	port_word_bit = e->xen_evtchn.port / 64;
				896	} else {
				897	struct compat_shared_info *shinfo = gpc->khva;
				898	pending_bits = (unsigned long *)&shinfo->evtchn_pending;
				899	mask_bits = (unsigned long *)&shinfo->evtchn_mask;
				900	port_word_bit = e->xen_evtchn.port / 32;
				901	}
				902
				903	/*
				904	* If this port wasn't already set, and if it isn't masked, then
				905	* we try to set the corresponding bit in the in-kernel shadow of
				906	* evtchn_pending_sel for the target vCPU. And if that wasn't
				907	* already set, then we kick the vCPU in question to write to the
				908	* real evtchn_pending_sel in its own guest vcpu_info struct.
				909	*/
				910	if (test_and_set_bit(e->xen_evtchn.port, pending_bits)) {
				911	rc = 0; /* It was already raised */
				912	} else if (test_bit(e->xen_evtchn.port, mask_bits)) {
				913	rc = -1; /* Masked */
				914	} else {
				915	rc = 1; /* Delivered. But was the vCPU waking already? */
				916	if (!test_and_set_bit(port_word_bit, &vcpu->arch.xen.evtchn_pending_sel))
				917	kick_vcpu = true;
				918	}
				919
				920	out_rcu:
				921	srcu_read_unlock(&kvm->srcu, idx);
				922	read_unlock_irqrestore(&gpc->lock, flags);
				923
				924	if (kick_vcpu) {
				925	kvm_make_request(KVM_REQ_EVENT, vcpu);
				926	kvm_vcpu_kick(vcpu);
				927	}
				928
				929	return rc;
				930	}
				931
				932	/* This is the version called from kvm_set_irq() as the .set function */
				933	static int evtchn_set_fn(struct kvm_kernel_irq_routing_entry e, struct kvm kvm,
				934	int irq_source_id, int level, bool line_status)
				935	{
				936	bool mm_borrowed = false;
				937	int rc;
				938
				939	if (!level)
				940	return -1;
				941
				942	rc = kvm_xen_set_evtchn_fast(e, kvm);
				943	if (rc != -EWOULDBLOCK)
				944	return rc;
				945
				946	if (current->mm != kvm->mm) {
				947	/*
				948	* If not on a thread which already belongs to this KVM,
				949	* we'd better be in the irqfd workqueue.
				950	*/
				951	if (WARN_ON_ONCE(current->mm))
				952	return -EINVAL;
				953
				954	kthread_use_mm(kvm->mm);
				955	mm_borrowed = true;
				956	}
				957
				958	/*
				959	* For the irqfd workqueue, using the main kvm->lock mutex is
				960	* fine since this function is invoked from kvm_set_irq() with
				961	* no other lock held, no srcu. In future if it will be called
				962	* directly from a vCPU thread (e.g. on hypercall for an IPI)
				963	* then it may need to switch to using a leaf-node mutex for
				964	* serializing the shared_info mapping.
				965	*/
				966	mutex_lock(&kvm->lock);
				967
				968	/*
				969	* It is theoretically possible for the page to be unmapped
				970	* and the MMU notifier to invalidate the shared_info before
				971	* we even get to use it. In that case, this looks like an
				972	* infinite loop. It was tempting to do it via the userspace
				973	* HVA instead... but that just hides the fact that it's
				974	* an infinite loop, because if a fault occurs and it waits
				975	* for the page to come back, it can still immediately
				976	* fault and have to wait again, repeatedly.
				977	*
				978	* Conversely, the page could also have been reinstated by
				979	* another thread before we even obtain the mutex above, so
				980	* check again first before remapping it.
				981	*/
				982	do {
				983	struct gfn_to_pfn_cache *gpc = &kvm->arch.xen.shinfo_cache;
				984	int idx;
				985
				986	rc = kvm_xen_set_evtchn_fast(e, kvm);
				987	if (rc != -EWOULDBLOCK)
				988	break;
				989
				990	idx = srcu_read_lock(&kvm->srcu);
				991	rc = kvm_gfn_to_pfn_cache_refresh(kvm, gpc, gpc->gpa,
				992	PAGE_SIZE, false);
				993	srcu_read_unlock(&kvm->srcu, idx);
				994	} while(!rc);
				995
				996	mutex_unlock(&kvm->lock);
				997
				998	if (mm_borrowed)
				999	kthread_unuse_mm(kvm->mm);
				1000
				1001	return rc;
				1002	}
				1003
				1004	int kvm_xen_setup_evtchn(struct kvm *kvm,
				1005	struct kvm_kernel_irq_routing_entry *e,
				1006	const struct kvm_irq_routing_entry *ue)
				1007
				1008	{
				1009	if (ue->u.xen_evtchn.port >= max_evtchn_port(kvm))
				1010	return -EINVAL;
				1011
				1012	/* We only support 2 level event channels for now */
				1013	if (ue->u.xen_evtchn.priority != KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL)
				1014	return -EINVAL;
				1015
				1016	e->xen_evtchn.port = ue->u.xen_evtchn.port;
				1017	e->xen_evtchn.vcpu = ue->u.xen_evtchn.vcpu;
				1018	e->xen_evtchn.priority = ue->u.xen_evtchn.priority;
				1019	e->set = evtchn_set_fn;
				1020
				1021	return 0;
				1022	}