Blame - drivers/iommu/intel-svm.c - SHIFTPHONES/mainline/linux

blob: 45f6e581cd56f63026f50ffed1c2e815f76deff8 [file] [log] [blame]

David Woodhouse	8a94ade	2015-03-24 14:54:56 +0000	[diff] [blame]	1	/*
				2	* Copyright © 2015 Intel Corporation.
				3	*
				4	* This program is free software; you can redistribute it and/or modify it
				5	* under the terms and conditions of the GNU General Public License,
				6	* version 2, as published by the Free Software Foundation.
				7	*
				8	* This program is distributed in the hope it will be useful, but WITHOUT
				9	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
				10	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
				11	* more details.
				12	*
				13	* Authors: David Woodhouse <dwmw2@infradead.org>
				14	*/
				15
				16	#include <linux/intel-iommu.h>
David Woodhouse	2f26e0a	2015-09-09 11:40:47 +0100	[diff] [blame]	17	#include <linux/mmu_notifier.h>
				18	#include <linux/sched.h>
Ingo Molnar	6e84f31	2017-02-08 18:51:29 +0100	[diff] [blame]	19	#include <linux/sched/mm.h>
David Woodhouse	2f26e0a	2015-09-09 11:40:47 +0100	[diff] [blame]	20	#include <linux/slab.h>
				21	#include <linux/intel-svm.h>
				22	#include <linux/rculist.h>
				23	#include <linux/pci.h>
				24	#include <linux/pci-ats.h>
David Woodhouse	a222a7f	2015-10-07 23:35:18 +0100	[diff] [blame]	25	#include <linux/dmar.h>
				26	#include <linux/interrupt.h>
Ashok Raj	9d8c3af	2017-08-08 13:29:27 -0700	[diff] [blame]	27	#include <asm/page.h>
David Woodhouse	a222a7f	2015-10-07 23:35:18 +0100	[diff] [blame]	28
Sohil Mehta	2f13eb7	2017-12-20 11:59:27 -0800	[diff] [blame]	29	#define PASID_ENTRY_P BIT_ULL(0)
				30	#define PASID_ENTRY_FLPM_5LP BIT_ULL(9)
				31	#define PASID_ENTRY_SRE BIT_ULL(11)
				32
David Woodhouse	a222a7f	2015-10-07 23:35:18 +0100	[diff] [blame]	33	static irqreturn_t prq_event_thread(int irq, void *d);
David Woodhouse	2f26e0a	2015-09-09 11:40:47 +0100	[diff] [blame]	34
				35	struct pasid_entry {
				36	u64 val;
				37	};
David Woodhouse	8a94ade	2015-03-24 14:54:56 +0000	[diff] [blame]	38
David Woodhouse	907fea3	2015-10-13 14:11:13 +0100	[diff] [blame]	39	struct pasid_state_entry {
				40	u64 val;
				41	};
				42
David Woodhouse	8a94ade	2015-03-24 14:54:56 +0000	[diff] [blame]	43	int intel_svm_alloc_pasid_tables(struct intel_iommu *iommu)
				44	{
				45	struct page *pages;
				46	int order;
				47
Sohil Mehta	59103ca	2017-12-20 11:59:25 -0800	[diff] [blame]	48	if (cpu_feature_enabled(X86_FEATURE_GBPAGES) &&
				49	!cap_fl1gp_support(iommu->cap))
				50	return -EINVAL;
				51
Sohil Mehta	f1ac10c	2017-12-20 11:59:26 -0800	[diff] [blame]	52	if (cpu_feature_enabled(X86_FEATURE_LA57) &&
				53	!cap_5lp_support(iommu->cap))
				54	return -EINVAL;
				55
David Woodhouse	9101704	2016-09-12 10:49:11 +0800	[diff] [blame]	56	/* Start at 2 because it's defined as 2^(1+PSS) */
				57	iommu->pasid_max = 2 << ecap_pss(iommu->ecap);
David Woodhouse	8a94ade	2015-03-24 14:54:56 +0000	[diff] [blame]	58
David Woodhouse	9101704	2016-09-12 10:49:11 +0800	[diff] [blame]	59	/* Eventually I'm promised we will get a multi-level PASID table
				60	* and it won't have to be physically contiguous. Until then,
				61	* limit the size because 8MiB contiguous allocations can be hard
				62	* to come by. The limit of 0x20000, which is 1MiB for each of
				63	* the PASID and PASID-state tables, is somewhat arbitrary. */
				64	if (iommu->pasid_max > 0x20000)
				65	iommu->pasid_max = 0x20000;
				66
				67	order = get_order(sizeof(struct pasid_entry) * iommu->pasid_max);
David Woodhouse	8a94ade	2015-03-24 14:54:56 +0000	[diff] [blame]	68	pages = alloc_pages(GFP_KERNEL \| __GFP_ZERO, order);
				69	if (!pages) {
				70	pr_warn("IOMMU: %s: Failed to allocate PASID table\n",
				71	iommu->name);
				72	return -ENOMEM;
				73	}
				74	iommu->pasid_table = page_address(pages);
				75	pr_info("%s: Allocated order %d PASID table.\n", iommu->name, order);
				76
				77	if (ecap_dis(iommu->ecap)) {
David Woodhouse	9101704	2016-09-12 10:49:11 +0800	[diff] [blame]	78	/* Just making it explicit... */
				79	BUILD_BUG_ON(sizeof(struct pasid_entry) != sizeof(struct pasid_state_entry));
David Woodhouse	8a94ade	2015-03-24 14:54:56 +0000	[diff] [blame]	80	pages = alloc_pages(GFP_KERNEL \| __GFP_ZERO, order);
				81	if (pages)
				82	iommu->pasid_state_table = page_address(pages);
				83	else
				84	pr_warn("IOMMU: %s: Failed to allocate PASID state table\n",
				85	iommu->name);
				86	}
				87
David Woodhouse	2f26e0a	2015-09-09 11:40:47 +0100	[diff] [blame]	88	idr_init(&iommu->pasid_idr);
				89
David Woodhouse	8a94ade	2015-03-24 14:54:56 +0000	[diff] [blame]	90	return 0;
				91	}
				92
				93	int intel_svm_free_pasid_tables(struct intel_iommu *iommu)
				94	{
David Woodhouse	9101704	2016-09-12 10:49:11 +0800	[diff] [blame]	95	int order = get_order(sizeof(struct pasid_entry) * iommu->pasid_max);
David Woodhouse	8a94ade	2015-03-24 14:54:56 +0000	[diff] [blame]	96
				97	if (iommu->pasid_table) {
				98	free_pages((unsigned long)iommu->pasid_table, order);
				99	iommu->pasid_table = NULL;
				100	}
				101	if (iommu->pasid_state_table) {
				102	free_pages((unsigned long)iommu->pasid_state_table, order);
				103	iommu->pasid_state_table = NULL;
				104	}
David Woodhouse	2f26e0a	2015-09-09 11:40:47 +0100	[diff] [blame]	105	idr_destroy(&iommu->pasid_idr);
David Woodhouse	8a94ade	2015-03-24 14:54:56 +0000	[diff] [blame]	106	return 0;
				107	}
David Woodhouse	2f26e0a	2015-09-09 11:40:47 +0100	[diff] [blame]	108
David Woodhouse	a222a7f	2015-10-07 23:35:18 +0100	[diff] [blame]	109	#define PRQ_ORDER 0
				110
				111	int intel_svm_enable_prq(struct intel_iommu *iommu)
				112	{
				113	struct page *pages;
				114	int irq, ret;
				115
				116	pages = alloc_pages(GFP_KERNEL \| __GFP_ZERO, PRQ_ORDER);
				117	if (!pages) {
				118	pr_warn("IOMMU: %s: Failed to allocate page request queue\n",
				119	iommu->name);
				120	return -ENOMEM;
				121	}
				122	iommu->prq = page_address(pages);
				123
				124	irq = dmar_alloc_hwirq(DMAR_UNITS_SUPPORTED + iommu->seq_id, iommu->node, iommu);
				125	if (irq <= 0) {
				126	pr_err("IOMMU: %s: Failed to create IRQ vector for page request queue\n",
				127	iommu->name);
				128	ret = -EINVAL;
				129	err:
				130	free_pages((unsigned long)iommu->prq, PRQ_ORDER);
				131	iommu->prq = NULL;
				132	return ret;
				133	}
				134	iommu->pr_irq = irq;
				135
				136	snprintf(iommu->prq_name, sizeof(iommu->prq_name), "dmar%d-prq", iommu->seq_id);
				137
				138	ret = request_threaded_irq(irq, NULL, prq_event_thread, IRQF_ONESHOT,
				139	iommu->prq_name, iommu);
				140	if (ret) {
				141	pr_err("IOMMU: %s: Failed to request IRQ for page request queue\n",
				142	iommu->name);
				143	dmar_free_hwirq(irq);
Jerry Snitselaar	72d5481	2017-12-20 09:48:56 -0700	[diff] [blame]	144	iommu->pr_irq = 0;
David Woodhouse	a222a7f	2015-10-07 23:35:18 +0100	[diff] [blame]	145	goto err;
				146	}
				147	dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL);
				148	dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL);
				149	dmar_writeq(iommu->reg + DMAR_PQA_REG, virt_to_phys(iommu->prq) \| PRQ_ORDER);
				150
				151	return 0;
				152	}
				153
				154	int intel_svm_finish_prq(struct intel_iommu *iommu)
				155	{
				156	dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL);
				157	dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL);
				158	dmar_writeq(iommu->reg + DMAR_PQA_REG, 0ULL);
				159
Jerry Snitselaar	72d5481	2017-12-20 09:48:56 -0700	[diff] [blame]	160	if (iommu->pr_irq) {
				161	free_irq(iommu->pr_irq, iommu);
				162	dmar_free_hwirq(iommu->pr_irq);
				163	iommu->pr_irq = 0;
				164	}
David Woodhouse	a222a7f	2015-10-07 23:35:18 +0100	[diff] [blame]	165
				166	free_pages((unsigned long)iommu->prq, PRQ_ORDER);
				167	iommu->prq = NULL;
				168
				169	return 0;
				170	}
				171
David Woodhouse	2f26e0a	2015-09-09 11:40:47 +0100	[diff] [blame]	172	static void intel_flush_svm_range_dev (struct intel_svm svm, struct intel_svm_dev sdev,
David Woodhouse	5d52f48	2015-10-20 15:52:13 +0100	[diff] [blame]	173	unsigned long address, unsigned long pages, int ih, int gl)
David Woodhouse	2f26e0a	2015-09-09 11:40:47 +0100	[diff] [blame]	174	{
				175	struct qi_desc desc;
David Woodhouse	2f26e0a	2015-09-09 11:40:47 +0100	[diff] [blame]	176
David Woodhouse	5d52f48	2015-10-20 15:52:13 +0100	[diff] [blame]	177	if (pages == -1) {
David Woodhouse	e034992	2015-10-16 19:36:53 +0100	[diff] [blame]	178	/* For global kernel pages we have to flush them in all PASIDs
				179	* because that's the only option the hardware gives us. Despite
				180	* the fact that they are actually only accessible through one. */
				181	if (gl)
				182	desc.low = QI_EIOTLB_PASID(svm->pasid) \| QI_EIOTLB_DID(sdev->did) \|
				183	QI_EIOTLB_GRAN(QI_GRAN_ALL_ALL) \| QI_EIOTLB_TYPE;
				184	else
				185	desc.low = QI_EIOTLB_PASID(svm->pasid) \| QI_EIOTLB_DID(sdev->did) \|
				186	QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) \| QI_EIOTLB_TYPE;
David Woodhouse	2f26e0a	2015-09-09 11:40:47 +0100	[diff] [blame]	187	desc.high = 0;
				188	} else {
David Woodhouse	5d52f48	2015-10-20 15:52:13 +0100	[diff] [blame]	189	int mask = ilog2(__roundup_pow_of_two(pages));
				190
David Woodhouse	2f26e0a	2015-09-09 11:40:47 +0100	[diff] [blame]	191	desc.low = QI_EIOTLB_PASID(svm->pasid) \| QI_EIOTLB_DID(sdev->did) \|
				192	QI_EIOTLB_GRAN(QI_GRAN_PSI_PASID) \| QI_EIOTLB_TYPE;
David Woodhouse	e034992	2015-10-16 19:36:53 +0100	[diff] [blame]	193	desc.high = QI_EIOTLB_ADDR(address) \| QI_EIOTLB_GL(gl) \|
David Woodhouse	2f26e0a	2015-09-09 11:40:47 +0100	[diff] [blame]	194	QI_EIOTLB_IH(ih) \| QI_EIOTLB_AM(mask);
				195	}
David Woodhouse	2f26e0a	2015-09-09 11:40:47 +0100	[diff] [blame]	196	qi_submit_sync(&desc, svm->iommu);
				197
				198	if (sdev->dev_iotlb) {
				199	desc.low = QI_DEV_EIOTLB_PASID(svm->pasid) \| QI_DEV_EIOTLB_SID(sdev->sid) \|
				200	QI_DEV_EIOTLB_QDEP(sdev->qdep) \| QI_DEIOTLB_TYPE;
David Woodhouse	5d52f48	2015-10-20 15:52:13 +0100	[diff] [blame]	201	if (pages == -1) {
				202	desc.high = QI_DEV_EIOTLB_ADDR(-1ULL >> 1) \| QI_DEV_EIOTLB_SIZE;
				203	} else if (pages > 1) {
				204	/* The least significant zero bit indicates the size. So,
				205	* for example, an "address" value of 0x12345f000 will
				206	* flush from 0x123440000 to 0x12347ffff (256KiB). */
				207	unsigned long last = address + ((unsigned long)(pages - 1) << VTD_PAGE_SHIFT);
Ingo Molnar	ed7158b	2018-02-22 10:54:55 +0100	[diff] [blame]	208	unsigned long mask = __rounddown_pow_of_two(address ^ last);
David Woodhouse	2f26e0a	2015-09-09 11:40:47 +0100	[diff] [blame]	209
David Woodhouse	5d52f48	2015-10-20 15:52:13 +0100	[diff] [blame]	210	desc.high = QI_DEV_EIOTLB_ADDR((address & ~mask) \| (mask - 1)) \| QI_DEV_EIOTLB_SIZE;
David Woodhouse	2f26e0a	2015-09-09 11:40:47 +0100	[diff] [blame]	211	} else {
				212	desc.high = QI_DEV_EIOTLB_ADDR(address);
				213	}
				214	qi_submit_sync(&desc, svm->iommu);
				215	}
				216	}
				217
				218	static void intel_flush_svm_range(struct intel_svm *svm, unsigned long address,
David Woodhouse	5d52f48	2015-10-20 15:52:13 +0100	[diff] [blame]	219	unsigned long pages, int ih, int gl)
David Woodhouse	2f26e0a	2015-09-09 11:40:47 +0100	[diff] [blame]	220	{
				221	struct intel_svm_dev *sdev;
				222
David Woodhouse	907fea3	2015-10-13 14:11:13 +0100	[diff] [blame]	223	/* Try deferred invalidate if available */
				224	if (svm->iommu->pasid_state_table &&
				225	!cmpxchg64(&svm->iommu->pasid_state_table[svm->pasid].val, 0, 1ULL << 63))
				226	return;
				227
David Woodhouse	2f26e0a	2015-09-09 11:40:47 +0100	[diff] [blame]	228	rcu_read_lock();
				229	list_for_each_entry_rcu(sdev, &svm->devs, list)
David Woodhouse	e034992	2015-10-16 19:36:53 +0100	[diff] [blame]	230	intel_flush_svm_range_dev(svm, sdev, address, pages, ih, gl);
David Woodhouse	2f26e0a	2015-09-09 11:40:47 +0100	[diff] [blame]	231	rcu_read_unlock();
				232	}
				233
				234	static void intel_change_pte(struct mmu_notifier mn, struct mm_struct mm,
				235	unsigned long address, pte_t pte)
				236	{
				237	struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
				238
David Woodhouse	e034992	2015-10-16 19:36:53 +0100	[diff] [blame]	239	intel_flush_svm_range(svm, address, 1, 1, 0);
David Woodhouse	2f26e0a	2015-09-09 11:40:47 +0100	[diff] [blame]	240	}
				241
David Woodhouse	2f26e0a	2015-09-09 11:40:47 +0100	[diff] [blame]	242	/* Pages have been freed at this point */
				243	static void intel_invalidate_range(struct mmu_notifier *mn,
				244	struct mm_struct *mm,
				245	unsigned long start, unsigned long end)
				246	{
				247	struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
				248
				249	intel_flush_svm_range(svm, start,
David Woodhouse	e034992	2015-10-16 19:36:53 +0100	[diff] [blame]	250	(end - start + PAGE_SIZE - 1) >> VTD_PAGE_SHIFT, 0, 0);
David Woodhouse	2f26e0a	2015-09-09 11:40:47 +0100	[diff] [blame]	251	}
				252
				253
David Woodhouse	5a10ba2	2015-10-24 21:06:39 +0200	[diff] [blame]	254	static void intel_flush_pasid_dev(struct intel_svm svm, struct intel_svm_dev sdev, int pasid)
David Woodhouse	2f26e0a	2015-09-09 11:40:47 +0100	[diff] [blame]	255	{
				256	struct qi_desc desc;
				257
				258	desc.high = 0;
David Woodhouse	5a10ba2	2015-10-24 21:06:39 +0200	[diff] [blame]	259	desc.low = QI_PC_TYPE \| QI_PC_DID(sdev->did) \| QI_PC_PASID_SEL \| QI_PC_PASID(pasid);
David Woodhouse	2f26e0a	2015-09-09 11:40:47 +0100	[diff] [blame]	260
				261	qi_submit_sync(&desc, svm->iommu);
				262	}
				263
				264	static void intel_mm_release(struct mmu_notifier mn, struct mm_struct mm)
				265	{
				266	struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
David Woodhouse	e57e58b	2016-01-12 19:18:06 +0000	[diff] [blame]	267	struct intel_svm_dev *sdev;
David Woodhouse	2f26e0a	2015-09-09 11:40:47 +0100	[diff] [blame]	268
David Woodhouse	e57e58b	2016-01-12 19:18:06 +0000	[diff] [blame]	269	/* This might end up being called from exit_mmap(), before the page
				270	* tables are cleared. And __mmu_notifier_release() will delete us from
				271	* the list of notifiers so that our invalidate_range() callback doesn't
				272	* get called when the page tables are cleared. So we need to protect
				273	* against hardware accessing those page tables.
				274	*
				275	* We do it by clearing the entry in the PASID table and then flushing
				276	* the IOTLB and the PASID table caches. This might upset hardware;
				277	* perhaps we'll want to point the PASID to a dummy PGD (like the zero
				278	* page) so that we end up taking a fault that the hardware really
				279	* has to handle gracefully without affecting other processes.
				280	*/
David Woodhouse	2f26e0a	2015-09-09 11:40:47 +0100	[diff] [blame]	281	svm->iommu->pasid_table[svm->pasid].val = 0;
David Woodhouse	e57e58b	2016-01-12 19:18:06 +0000	[diff] [blame]	282	wmb();
David Woodhouse	2f26e0a	2015-09-09 11:40:47 +0100	[diff] [blame]	283
David Woodhouse	e57e58b	2016-01-12 19:18:06 +0000	[diff] [blame]	284	rcu_read_lock();
				285	list_for_each_entry_rcu(sdev, &svm->devs, list) {
				286	intel_flush_pasid_dev(svm, sdev, svm->pasid);
				287	intel_flush_svm_range_dev(svm, sdev, 0, -1, 0, !svm->mm);
				288	}
				289	rcu_read_unlock();
				290
David Woodhouse	2f26e0a	2015-09-09 11:40:47 +0100	[diff] [blame]	291	}
				292
				293	static const struct mmu_notifier_ops intel_mmuops = {
David Rientjes	5ff7091	2018-01-31 16:18:32 -0800	[diff] [blame]	294	.flags = MMU_INVALIDATE_DOES_NOT_BLOCK,
David Woodhouse	2f26e0a	2015-09-09 11:40:47 +0100	[diff] [blame]	295	.release = intel_mm_release,
				296	.change_pte = intel_change_pte,
David Woodhouse	2f26e0a	2015-09-09 11:40:47 +0100	[diff] [blame]	297	.invalidate_range = intel_invalidate_range,
				298	};
				299
				300	static DEFINE_MUTEX(pasid_mutex);
				301
David Woodhouse	0204a49	2015-10-13 17:18:10 +0100	[diff] [blame]	302	int intel_svm_bind_mm(struct device dev, int pasid, int flags, struct svm_dev_ops *ops)
David Woodhouse	2f26e0a	2015-09-09 11:40:47 +0100	[diff] [blame]	303	{
				304	struct intel_iommu *iommu = intel_svm_device_to_iommu(dev);
				305	struct intel_svm_dev *sdev;
				306	struct intel_svm *svm = NULL;
David Woodhouse	5cec753	2015-10-15 15:52:15 +0100	[diff] [blame]	307	struct mm_struct *mm = NULL;
Sohil Mehta	2f13eb7	2017-12-20 11:59:27 -0800	[diff] [blame]	308	u64 pasid_entry_val;
David Woodhouse	2f26e0a	2015-09-09 11:40:47 +0100	[diff] [blame]	309	int pasid_max;
				310	int ret;
				311
Lu Baolu	2e2e35d	2017-11-03 10:51:32 -0600	[diff] [blame]	312	if (WARN_ON(!iommu \|\| !iommu->pasid_table))
David Woodhouse	2f26e0a	2015-09-09 11:40:47 +0100	[diff] [blame]	313	return -EINVAL;
				314
				315	if (dev_is_pci(dev)) {
				316	pasid_max = pci_max_pasids(to_pci_dev(dev));
				317	if (pasid_max < 0)
				318	return -EINVAL;
				319	} else
				320	pasid_max = 1 << 20;
				321
Lu Baolu	bb37f7d	2018-05-04 13:08:19 +0800	[diff] [blame^]	322	if (flags & SVM_FLAG_SUPERVISOR_MODE) {
David Woodhouse	5cec753	2015-10-15 15:52:15 +0100	[diff] [blame]	323	if (!ecap_srs(iommu->ecap))
				324	return -EINVAL;
				325	} else if (pasid) {
				326	mm = get_task_mm(current);
				327	BUG_ON(!mm);
				328	}
				329
David Woodhouse	2f26e0a	2015-09-09 11:40:47 +0100	[diff] [blame]	330	mutex_lock(&pasid_mutex);
David Woodhouse	569e4f7	2015-10-15 13:59:14 +0100	[diff] [blame]	331	if (pasid && !(flags & SVM_FLAG_PRIVATE_PASID)) {
David Woodhouse	2f26e0a	2015-09-09 11:40:47 +0100	[diff] [blame]	332	int i;
				333
				334	idr_for_each_entry(&iommu->pasid_idr, svm, i) {
David Woodhouse	5cec753	2015-10-15 15:52:15 +0100	[diff] [blame]	335	if (svm->mm != mm \|\|
David Woodhouse	569e4f7	2015-10-15 13:59:14 +0100	[diff] [blame]	336	(svm->flags & SVM_FLAG_PRIVATE_PASID))
David Woodhouse	2f26e0a	2015-09-09 11:40:47 +0100	[diff] [blame]	337	continue;
				338
				339	if (svm->pasid >= pasid_max) {
				340	dev_warn(dev,
				341	"Limited PASID width. Cannot use existing PASID %d\n",
				342	svm->pasid);
				343	ret = -ENOSPC;
				344	goto out;
				345	}
				346
				347	list_for_each_entry(sdev, &svm->devs, list) {
				348	if (dev == sdev->dev) {
David Woodhouse	0204a49	2015-10-13 17:18:10 +0100	[diff] [blame]	349	if (sdev->ops != ops) {
				350	ret = -EBUSY;
				351	goto out;
				352	}
David Woodhouse	2f26e0a	2015-09-09 11:40:47 +0100	[diff] [blame]	353	sdev->users++;
				354	goto success;
				355	}
				356	}
				357
				358	break;
				359	}
				360	}
				361
				362	sdev = kzalloc(sizeof(*sdev), GFP_KERNEL);
				363	if (!sdev) {
				364	ret = -ENOMEM;
				365	goto out;
				366	}
				367	sdev->dev = dev;
				368
				369	ret = intel_iommu_enable_pasid(iommu, sdev);
				370	if (ret \|\| !pasid) {
				371	/* If they don't actually want to assign a PASID, this is
				372	* just an enabling check/preparation. */
				373	kfree(sdev);
				374	goto out;
				375	}
				376	/* Finish the setup now we know we're keeping it */
				377	sdev->users = 1;
David Woodhouse	0204a49	2015-10-13 17:18:10 +0100	[diff] [blame]	378	sdev->ops = ops;
David Woodhouse	2f26e0a	2015-09-09 11:40:47 +0100	[diff] [blame]	379	init_rcu_head(&sdev->rcu);
				380
				381	if (!svm) {
				382	svm = kzalloc(sizeof(*svm), GFP_KERNEL);
				383	if (!svm) {
				384	ret = -ENOMEM;
				385	kfree(sdev);
				386	goto out;
				387	}
				388	svm->iommu = iommu;
				389
David Woodhouse	9101704	2016-09-12 10:49:11 +0800	[diff] [blame]	390	if (pasid_max > iommu->pasid_max)
				391	pasid_max = iommu->pasid_max;
David Woodhouse	2f26e0a	2015-09-09 11:40:47 +0100	[diff] [blame]	392
David Woodhouse	5a10ba2	2015-10-24 21:06:39 +0200	[diff] [blame]	393	/* Do not use PASID 0 in caching mode (virtualised IOMMU) */
				394	ret = idr_alloc(&iommu->pasid_idr, svm,
				395	!!cap_caching_mode(iommu->cap),
				396	pasid_max - 1, GFP_KERNEL);
David Woodhouse	2f26e0a	2015-09-09 11:40:47 +0100	[diff] [blame]	397	if (ret < 0) {
				398	kfree(svm);
Lu Baolu	bbe4b3a	2018-02-24 13:42:27 +0800	[diff] [blame]	399	kfree(sdev);
David Woodhouse	2f26e0a	2015-09-09 11:40:47 +0100	[diff] [blame]	400	goto out;
				401	}
				402	svm->pasid = ret;
				403	svm->notifier.ops = &intel_mmuops;
David Woodhouse	5cec753	2015-10-15 15:52:15 +0100	[diff] [blame]	404	svm->mm = mm;
David Woodhouse	569e4f7	2015-10-15 13:59:14 +0100	[diff] [blame]	405	svm->flags = flags;
David Woodhouse	2f26e0a	2015-09-09 11:40:47 +0100	[diff] [blame]	406	INIT_LIST_HEAD_RCU(&svm->devs);
				407	ret = -ENOMEM;
David Woodhouse	5cec753	2015-10-15 15:52:15 +0100	[diff] [blame]	408	if (mm) {
				409	ret = mmu_notifier_register(&svm->notifier, mm);
				410	if (ret) {
				411	idr_remove(&svm->iommu->pasid_idr, svm->pasid);
				412	kfree(svm);
				413	kfree(sdev);
				414	goto out;
				415	}
Sohil Mehta	2f13eb7	2017-12-20 11:59:27 -0800	[diff] [blame]	416	pasid_entry_val = (u64)__pa(mm->pgd) \| PASID_ENTRY_P;
David Woodhouse	5cec753	2015-10-15 15:52:15 +0100	[diff] [blame]	417	} else
Sohil Mehta	2f13eb7	2017-12-20 11:59:27 -0800	[diff] [blame]	418	pasid_entry_val = (u64)__pa(init_mm.pgd) \|
				419	PASID_ENTRY_P \| PASID_ENTRY_SRE;
				420	if (cpu_feature_enabled(X86_FEATURE_LA57))
				421	pasid_entry_val \|= PASID_ENTRY_FLPM_5LP;
				422
				423	iommu->pasid_table[svm->pasid].val = pasid_entry_val;
				424
David Woodhouse	2f26e0a	2015-09-09 11:40:47 +0100	[diff] [blame]	425	wmb();
Lu Baolu	9714010	2018-03-16 12:31:36 +0800	[diff] [blame]	426
				427	/*
				428	* Flush PASID cache when a PASID table entry becomes
				429	* present.
				430	*/
David Woodhouse	5a10ba2	2015-10-24 21:06:39 +0200	[diff] [blame]	431	if (cap_caching_mode(iommu->cap))
Lu Baolu	9714010	2018-03-16 12:31:36 +0800	[diff] [blame]	432	intel_flush_pasid_dev(svm, sdev, svm->pasid);
David Woodhouse	2f26e0a	2015-09-09 11:40:47 +0100	[diff] [blame]	433	}
				434	list_add_rcu(&sdev->list, &svm->devs);
				435
				436	success:
				437	*pasid = svm->pasid;
				438	ret = 0;
				439	out:
				440	mutex_unlock(&pasid_mutex);
David Woodhouse	5cec753	2015-10-15 15:52:15 +0100	[diff] [blame]	441	if (mm)
				442	mmput(mm);
David Woodhouse	2f26e0a	2015-09-09 11:40:47 +0100	[diff] [blame]	443	return ret;
				444	}
				445	EXPORT_SYMBOL_GPL(intel_svm_bind_mm);
				446
				447	int intel_svm_unbind_mm(struct device *dev, int pasid)
				448	{
				449	struct intel_svm_dev *sdev;
				450	struct intel_iommu *iommu;
				451	struct intel_svm *svm;
				452	int ret = -EINVAL;
				453
				454	mutex_lock(&pasid_mutex);
				455	iommu = intel_svm_device_to_iommu(dev);
				456	if (!iommu \|\| !iommu->pasid_table)
				457	goto out;
				458
				459	svm = idr_find(&iommu->pasid_idr, pasid);
				460	if (!svm)
				461	goto out;
				462
				463	list_for_each_entry(sdev, &svm->devs, list) {
				464	if (dev == sdev->dev) {
				465	ret = 0;
				466	sdev->users--;
				467	if (!sdev->users) {
				468	list_del_rcu(&sdev->list);
				469	/* Flush the PASID cache and IOTLB for this device.
				470	* Note that we do depend on the hardware not using
				471	* the PASID any more. Just as we depend on other
				472	* devices never using PASIDs that they have no right
				473	* to use. We have a shared PASID table, because it's
				474	* large and has to be physically contiguous. So it's
				475	* hard to be as defensive as we might like. */
David Woodhouse	5a10ba2	2015-10-24 21:06:39 +0200	[diff] [blame]	476	intel_flush_pasid_dev(svm, sdev, svm->pasid);
David Woodhouse	e034992	2015-10-16 19:36:53 +0100	[diff] [blame]	477	intel_flush_svm_range_dev(svm, sdev, 0, -1, 0, !svm->mm);
David Woodhouse	2f26e0a	2015-09-09 11:40:47 +0100	[diff] [blame]	478	kfree_rcu(sdev, rcu);
				479
				480	if (list_empty(&svm->devs)) {
Lu Baolu	4fa064b	2017-11-03 10:51:34 -0600	[diff] [blame]	481	svm->iommu->pasid_table[svm->pasid].val = 0;
				482	wmb();
David Woodhouse	2f26e0a	2015-09-09 11:40:47 +0100	[diff] [blame]	483
				484	idr_remove(&svm->iommu->pasid_idr, svm->pasid);
David Woodhouse	5cec753	2015-10-15 15:52:15 +0100	[diff] [blame]	485	if (svm->mm)
David Woodhouse	e57e58b	2016-01-12 19:18:06 +0000	[diff] [blame]	486	mmu_notifier_unregister(&svm->notifier, svm->mm);
				487
David Woodhouse	2f26e0a	2015-09-09 11:40:47 +0100	[diff] [blame]	488	/* We mandate that no page faults may be outstanding
				489	* for the PASID when intel_svm_unbind_mm() is called.
				490	* If that is not obeyed, subtle errors will happen.
				491	* Let's make them less subtle... */
				492	memset(svm, 0x6b, sizeof(*svm));
				493	kfree(svm);
				494	}
				495	}
				496	break;
				497	}
				498	}
				499	out:
				500	mutex_unlock(&pasid_mutex);
				501
				502	return ret;
				503	}
				504	EXPORT_SYMBOL_GPL(intel_svm_unbind_mm);
David Woodhouse	a222a7f	2015-10-07 23:35:18 +0100	[diff] [blame]	505
CQ Tang	15060ab	2017-05-10 11:39:03 -0700	[diff] [blame]	506	int intel_svm_is_pasid_valid(struct device *dev, int pasid)
				507	{
				508	struct intel_iommu *iommu;
				509	struct intel_svm *svm;
				510	int ret = -EINVAL;
				511
				512	mutex_lock(&pasid_mutex);
				513	iommu = intel_svm_device_to_iommu(dev);
				514	if (!iommu \|\| !iommu->pasid_table)
				515	goto out;
				516
				517	svm = idr_find(&iommu->pasid_idr, pasid);
				518	if (!svm)
				519	goto out;
				520
				521	/* init_mm is used in this case */
				522	if (!svm->mm)
				523	ret = 1;
				524	else if (atomic_read(&svm->mm->mm_users) > 0)
				525	ret = 1;
				526	else
				527	ret = 0;
				528
				529	out:
				530	mutex_unlock(&pasid_mutex);
				531
				532	return ret;
				533	}
				534	EXPORT_SYMBOL_GPL(intel_svm_is_pasid_valid);
				535
David Woodhouse	a222a7f	2015-10-07 23:35:18 +0100	[diff] [blame]	536	/* Page request queue descriptor */
				537	struct page_req_dsc {
				538	u64 srr:1;
				539	u64 bof:1;
				540	u64 pasid_present:1;
				541	u64 lpig:1;
				542	u64 pasid:20;
				543	u64 bus:8;
				544	u64 private:23;
				545	u64 prg_index:9;
				546	u64 rd_req:1;
				547	u64 wr_req:1;
				548	u64 exe_req:1;
				549	u64 priv_req:1;
				550	u64 devfn:8;
				551	u64 addr:52;
				552	};
				553
				554	#define PRQ_RING_MASK ((0x1000 << PRQ_ORDER) - 0x10)
Joerg Roedel	7f8312a	2015-11-17 16:11:39 +0100	[diff] [blame]	555
				556	static bool access_error(struct vm_area_struct vma, struct page_req_dsc req)
				557	{
				558	unsigned long requested = 0;
				559
				560	if (req->exe_req)
				561	requested \|= VM_EXEC;
				562
				563	if (req->rd_req)
				564	requested \|= VM_READ;
				565
				566	if (req->wr_req)
				567	requested \|= VM_WRITE;
				568
				569	return (requested & ~vma->vm_flags) != 0;
				570	}
				571
Ashok Raj	9d8c3af	2017-08-08 13:29:27 -0700	[diff] [blame]	572	static bool is_canonical_address(u64 addr)
				573	{
				574	int shift = 64 - (__VIRTUAL_MASK_SHIFT + 1);
				575	long saddr = (long) addr;
				576
				577	return (((saddr << shift) >> shift) == saddr);
				578	}
				579
David Woodhouse	a222a7f	2015-10-07 23:35:18 +0100	[diff] [blame]	580	static irqreturn_t prq_event_thread(int irq, void *d)
				581	{
				582	struct intel_iommu *iommu = d;
				583	struct intel_svm *svm = NULL;
				584	int head, tail, handled = 0;
				585
David Woodhouse	4692400	2016-02-15 12:42:38 +0000	[diff] [blame]	586	/* Clear PPR bit before reading head/tail registers, to
				587	* ensure that we get a new interrupt if needed. */
				588	writel(DMA_PRS_PPR, iommu->reg + DMAR_PRS_REG);
				589
David Woodhouse	a222a7f	2015-10-07 23:35:18 +0100	[diff] [blame]	590	tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
				591	head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
				592	while (head != tail) {
David Woodhouse	0204a49	2015-10-13 17:18:10 +0100	[diff] [blame]	593	struct intel_svm_dev *sdev;
David Woodhouse	a222a7f	2015-10-07 23:35:18 +0100	[diff] [blame]	594	struct vm_area_struct *vma;
				595	struct page_req_dsc *req;
				596	struct qi_desc resp;
				597	int ret, result;
				598	u64 address;
				599
				600	handled = 1;
				601
				602	req = &iommu->prq[head / sizeof(*req)];
				603
				604	result = QI_RESP_FAILURE;
David Woodhouse	7f92a2e	2015-10-16 17:22:31 +0100	[diff] [blame]	605	address = (u64)req->addr << VTD_PAGE_SHIFT;
David Woodhouse	a222a7f	2015-10-07 23:35:18 +0100	[diff] [blame]	606	if (!req->pasid_present) {
				607	pr_err("%s: Page request without PASID: %08llx %08llx\n",
				608	iommu->name, ((unsigned long long *)req)[0],
				609	((unsigned long long *)req)[1]);
				610	goto bad_req;
				611	}
				612
				613	if (!svm \|\| svm->pasid != req->pasid) {
				614	rcu_read_lock();
				615	svm = idr_find(&iommu->pasid_idr, req->pasid);
				616	/* It can't go away, because the driver is not permitted
				617	* to unbind the mm while any page faults are outstanding.
				618	* So we only need RCU to protect the internal idr code. */
				619	rcu_read_unlock();
				620
				621	if (!svm) {
				622	pr_err("%s: Page request for invalid PASID %d: %08llx %08llx\n",
				623	iommu->name, req->pasid, ((unsigned long long *)req)[0],
				624	((unsigned long long *)req)[1]);
David Woodhouse	26322ab	2015-10-15 21:12:56 +0100	[diff] [blame]	625	goto no_pasid;
David Woodhouse	a222a7f	2015-10-07 23:35:18 +0100	[diff] [blame]	626	}
				627	}
				628
				629	result = QI_RESP_INVALID;
David Woodhouse	5cec753	2015-10-15 15:52:15 +0100	[diff] [blame]	630	/* Since we're using init_mm.pgd directly, we should never take
				631	* any faults on kernel addresses. */
				632	if (!svm->mm)
				633	goto bad_req;
David Woodhouse	e57e58b	2016-01-12 19:18:06 +0000	[diff] [blame]	634	/* If the mm is already defunct, don't handle faults. */
Vegard Nossum	388f793	2017-02-27 14:30:13 -0800	[diff] [blame]	635	if (!mmget_not_zero(svm->mm))
David Woodhouse	e57e58b	2016-01-12 19:18:06 +0000	[diff] [blame]	636	goto bad_req;
Ashok Raj	9d8c3af	2017-08-08 13:29:27 -0700	[diff] [blame]	637
				638	/* If address is not canonical, return invalid response */
				639	if (!is_canonical_address(address))
				640	goto bad_req;
				641
David Woodhouse	a222a7f	2015-10-07 23:35:18 +0100	[diff] [blame]	642	down_read(&svm->mm->mmap_sem);
				643	vma = find_extend_vma(svm->mm, address);
				644	if (!vma \|\| address < vma->vm_start)
				645	goto invalid;
				646
Joerg Roedel	7f8312a	2015-11-17 16:11:39 +0100	[diff] [blame]	647	if (access_error(vma, req))
				648	goto invalid;
				649
Kirill A. Shutemov	dcddffd	2016-07-26 15:25:18 -0700	[diff] [blame]	650	ret = handle_mm_fault(vma, address,
David Woodhouse	a222a7f	2015-10-07 23:35:18 +0100	[diff] [blame]	651	req->wr_req ? FAULT_FLAG_WRITE : 0);
				652	if (ret & VM_FAULT_ERROR)
				653	goto invalid;
				654
				655	result = QI_RESP_SUCCESS;
				656	invalid:
				657	up_read(&svm->mm->mmap_sem);
David Woodhouse	e57e58b	2016-01-12 19:18:06 +0000	[diff] [blame]	658	mmput(svm->mm);
David Woodhouse	a222a7f	2015-10-07 23:35:18 +0100	[diff] [blame]	659	bad_req:
				660	/* Accounting for major/minor faults? */
David Woodhouse	0204a49	2015-10-13 17:18:10 +0100	[diff] [blame]	661	rcu_read_lock();
				662	list_for_each_entry_rcu(sdev, &svm->devs, list) {
Dan Carpenter	3c7c2f3	2015-10-17 08:18:47 +0300	[diff] [blame]	663	if (sdev->sid == PCI_DEVID(req->bus, req->devfn))
David Woodhouse	0204a49	2015-10-13 17:18:10 +0100	[diff] [blame]	664	break;
				665	}
				666	/* Other devices can go away, but the drivers are not permitted
				667	* to unbind while any page faults might be in flight. So it's
				668	* OK to drop the 'lock' here now we have it. */
				669	rcu_read_unlock();
				670
				671	if (WARN_ON(&sdev->list == &svm->devs))
				672	sdev = NULL;
				673
				674	if (sdev && sdev->ops && sdev->ops->fault_cb) {
				675	int rwxp = (req->rd_req << 3) \| (req->wr_req << 2) \|
David Woodhouse	0bdec95	2015-10-28 15:14:09 +0900	[diff] [blame]	676	(req->exe_req << 1) \| (req->priv_req);
David Woodhouse	0204a49	2015-10-13 17:18:10 +0100	[diff] [blame]	677	sdev->ops->fault_cb(sdev->dev, req->pasid, req->addr, req->private, rwxp, result);
				678	}
David Woodhouse	26322ab	2015-10-15 21:12:56 +0100	[diff] [blame]	679	/* We get here in the error case where the PASID lookup failed,
				680	and these can be NULL. Do not use them below this point! */
				681	sdev = NULL;
				682	svm = NULL;
				683	no_pasid:
David Woodhouse	a222a7f	2015-10-07 23:35:18 +0100	[diff] [blame]	684	if (req->lpig) {
				685	/* Page Group Response */
				686	resp.low = QI_PGRP_PASID(req->pasid) \|
				687	QI_PGRP_DID((req->bus << 8) \| req->devfn) \|
				688	QI_PGRP_PASID_P(req->pasid_present) \|
				689	QI_PGRP_RESP_TYPE;
				690	resp.high = QI_PGRP_IDX(req->prg_index) \|
				691	QI_PGRP_PRIV(req->private) \| QI_PGRP_RESP_CODE(result);
				692
David Woodhouse	26322ab	2015-10-15 21:12:56 +0100	[diff] [blame]	693	qi_submit_sync(&resp, iommu);
David Woodhouse	a222a7f	2015-10-07 23:35:18 +0100	[diff] [blame]	694	} else if (req->srr) {
				695	/* Page Stream Response */
				696	resp.low = QI_PSTRM_IDX(req->prg_index) \|
				697	QI_PSTRM_PRIV(req->private) \| QI_PSTRM_BUS(req->bus) \|
				698	QI_PSTRM_PASID(req->pasid) \| QI_PSTRM_RESP_TYPE;
				699	resp.high = QI_PSTRM_ADDR(address) \| QI_PSTRM_DEVFN(req->devfn) \|
				700	QI_PSTRM_RESP_CODE(result);
				701
David Woodhouse	26322ab	2015-10-15 21:12:56 +0100	[diff] [blame]	702	qi_submit_sync(&resp, iommu);
David Woodhouse	a222a7f	2015-10-07 23:35:18 +0100	[diff] [blame]	703	}
				704
				705	head = (head + sizeof(*req)) & PRQ_RING_MASK;
				706	}
				707
				708	dmar_writeq(iommu->reg + DMAR_PQH_REG, tail);
				709
				710	return IRQ_RETVAL(handled);
				711	}