blob: 18a9f05df4079b1b3ee2d19f8a748536cbec360e [file] [log] [blame]
Thomas Gleixner2025cf92019-05-29 07:18:02 -07001// SPDX-License-Identifier: GPL-2.0-only
David Woodhouse8a94ade2015-03-24 14:54:56 +00002/*
3 * Copyright © 2015 Intel Corporation.
4 *
David Woodhouse8a94ade2015-03-24 14:54:56 +00005 * Authors: David Woodhouse <dwmw2@infradead.org>
6 */
7
8#include <linux/intel-iommu.h>
David Woodhouse2f26e0a2015-09-09 11:40:47 +01009#include <linux/mmu_notifier.h>
10#include <linux/sched.h>
Ingo Molnar6e84f312017-02-08 18:51:29 +010011#include <linux/sched/mm.h>
David Woodhouse2f26e0a2015-09-09 11:40:47 +010012#include <linux/slab.h>
13#include <linux/intel-svm.h>
14#include <linux/rculist.h>
15#include <linux/pci.h>
16#include <linux/pci-ats.h>
David Woodhousea222a7f2015-10-07 23:35:18 +010017#include <linux/dmar.h>
18#include <linux/interrupt.h>
Souptick Joarder50a7ca32018-08-17 15:44:47 -070019#include <linux/mm_types.h>
Jacob Pan59a62332020-01-02 08:18:08 +080020#include <linux/ioasid.h>
Ashok Raj9d8c3af2017-08-08 13:29:27 -070021#include <asm/page.h>
Fenghua Yu20f0afd2020-09-15 09:30:13 -070022#include <asm/fpu/api.h>
David Woodhousea222a7f2015-10-07 23:35:18 +010023
Lu Baolu02f3eff2020-07-24 09:49:25 +080024#include "pasid.h"
Lu Baoluaf395072018-07-14 15:46:56 +080025
David Woodhousea222a7f2015-10-07 23:35:18 +010026static irqreturn_t prq_event_thread(int irq, void *d);
Fenghua Yuc7b6bac2020-09-15 09:30:05 -070027static void intel_svm_drain_prq(struct device *dev, u32 pasid);
David Woodhouse2f26e0a2015-09-09 11:40:47 +010028
David Woodhousea222a7f2015-10-07 23:35:18 +010029#define PRQ_ORDER 0
30
31int intel_svm_enable_prq(struct intel_iommu *iommu)
32{
33 struct page *pages;
34 int irq, ret;
35
36 pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, PRQ_ORDER);
37 if (!pages) {
38 pr_warn("IOMMU: %s: Failed to allocate page request queue\n",
39 iommu->name);
40 return -ENOMEM;
41 }
42 iommu->prq = page_address(pages);
43
44 irq = dmar_alloc_hwirq(DMAR_UNITS_SUPPORTED + iommu->seq_id, iommu->node, iommu);
45 if (irq <= 0) {
46 pr_err("IOMMU: %s: Failed to create IRQ vector for page request queue\n",
47 iommu->name);
48 ret = -EINVAL;
49 err:
50 free_pages((unsigned long)iommu->prq, PRQ_ORDER);
51 iommu->prq = NULL;
52 return ret;
53 }
54 iommu->pr_irq = irq;
55
56 snprintf(iommu->prq_name, sizeof(iommu->prq_name), "dmar%d-prq", iommu->seq_id);
57
58 ret = request_threaded_irq(irq, NULL, prq_event_thread, IRQF_ONESHOT,
59 iommu->prq_name, iommu);
60 if (ret) {
61 pr_err("IOMMU: %s: Failed to request IRQ for page request queue\n",
62 iommu->name);
63 dmar_free_hwirq(irq);
Jerry Snitselaar72d54812017-12-20 09:48:56 -070064 iommu->pr_irq = 0;
David Woodhousea222a7f2015-10-07 23:35:18 +010065 goto err;
66 }
67 dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL);
68 dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL);
69 dmar_writeq(iommu->reg + DMAR_PQA_REG, virt_to_phys(iommu->prq) | PRQ_ORDER);
70
Lu Baolu66ac4db2020-05-16 14:20:58 +080071 init_completion(&iommu->prq_complete);
72
David Woodhousea222a7f2015-10-07 23:35:18 +010073 return 0;
74}
75
76int intel_svm_finish_prq(struct intel_iommu *iommu)
77{
78 dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL);
79 dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL);
80 dmar_writeq(iommu->reg + DMAR_PQA_REG, 0ULL);
81
Jerry Snitselaar72d54812017-12-20 09:48:56 -070082 if (iommu->pr_irq) {
83 free_irq(iommu->pr_irq, iommu);
84 dmar_free_hwirq(iommu->pr_irq);
85 iommu->pr_irq = 0;
86 }
David Woodhousea222a7f2015-10-07 23:35:18 +010087
88 free_pages((unsigned long)iommu->prq, PRQ_ORDER);
89 iommu->prq = NULL;
90
91 return 0;
92}
93
Jacob Panff3dc652020-01-02 08:18:03 +080094static inline bool intel_svm_capable(struct intel_iommu *iommu)
95{
96 return iommu->flags & VTD_FLAG_SVM_CAPABLE;
97}
98
99void intel_svm_check(struct intel_iommu *iommu)
100{
101 if (!pasid_supported(iommu))
102 return;
103
104 if (cpu_feature_enabled(X86_FEATURE_GBPAGES) &&
105 !cap_fl1gp_support(iommu->cap)) {
106 pr_err("%s SVM disabled, incompatible 1GB page capability\n",
107 iommu->name);
108 return;
109 }
110
111 if (cpu_feature_enabled(X86_FEATURE_LA57) &&
112 !cap_5lp_support(iommu->cap)) {
113 pr_err("%s SVM disabled, incompatible paging mode\n",
114 iommu->name);
115 return;
116 }
117
118 iommu->flags |= VTD_FLAG_SVM_CAPABLE;
119}
120
Lu Baolu2d6ffc62020-12-31 08:53:20 +0800121static void __flush_svm_range_dev(struct intel_svm *svm,
122 struct intel_svm_dev *sdev,
123 unsigned long address,
124 unsigned long pages, int ih)
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100125{
126 struct qi_desc desc;
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100127
Lu Baoluf81b8462019-11-20 14:10:16 +0800128 if (pages == -1) {
Jacob Pan8744daf2019-08-26 08:53:29 -0700129 desc.qw0 = QI_EIOTLB_PASID(svm->pasid) |
130 QI_EIOTLB_DID(sdev->did) |
131 QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) |
132 QI_EIOTLB_TYPE;
Lu Baolu5d308fc2018-12-10 09:58:58 +0800133 desc.qw1 = 0;
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100134 } else {
David Woodhouse5d52f482015-10-20 15:52:13 +0100135 int mask = ilog2(__roundup_pow_of_two(pages));
136
Lu Baolu5d308fc2018-12-10 09:58:58 +0800137 desc.qw0 = QI_EIOTLB_PASID(svm->pasid) |
138 QI_EIOTLB_DID(sdev->did) |
139 QI_EIOTLB_GRAN(QI_GRAN_PSI_PASID) |
140 QI_EIOTLB_TYPE;
141 desc.qw1 = QI_EIOTLB_ADDR(address) |
Lu Baolu5d308fc2018-12-10 09:58:58 +0800142 QI_EIOTLB_IH(ih) |
143 QI_EIOTLB_AM(mask);
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100144 }
Lu Baolu5d308fc2018-12-10 09:58:58 +0800145 desc.qw2 = 0;
146 desc.qw3 = 0;
Liu Yi L9ad9f452021-01-07 00:03:55 +0800147 qi_submit_sync(sdev->iommu, &desc, 1, 0);
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100148
149 if (sdev->dev_iotlb) {
Lu Baolu5d308fc2018-12-10 09:58:58 +0800150 desc.qw0 = QI_DEV_EIOTLB_PASID(svm->pasid) |
151 QI_DEV_EIOTLB_SID(sdev->sid) |
152 QI_DEV_EIOTLB_QDEP(sdev->qdep) |
153 QI_DEIOTLB_TYPE;
David Woodhouse5d52f482015-10-20 15:52:13 +0100154 if (pages == -1) {
Lu Baolu5d308fc2018-12-10 09:58:58 +0800155 desc.qw1 = QI_DEV_EIOTLB_ADDR(-1ULL >> 1) |
156 QI_DEV_EIOTLB_SIZE;
David Woodhouse5d52f482015-10-20 15:52:13 +0100157 } else if (pages > 1) {
158 /* The least significant zero bit indicates the size. So,
159 * for example, an "address" value of 0x12345f000 will
160 * flush from 0x123440000 to 0x12347ffff (256KiB). */
161 unsigned long last = address + ((unsigned long)(pages - 1) << VTD_PAGE_SHIFT);
Ingo Molnared7158b2018-02-22 10:54:55 +0100162 unsigned long mask = __rounddown_pow_of_two(address ^ last);
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100163
Lu Baolu5d308fc2018-12-10 09:58:58 +0800164 desc.qw1 = QI_DEV_EIOTLB_ADDR((address & ~mask) |
165 (mask - 1)) | QI_DEV_EIOTLB_SIZE;
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100166 } else {
Lu Baolu5d308fc2018-12-10 09:58:58 +0800167 desc.qw1 = QI_DEV_EIOTLB_ADDR(address);
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100168 }
Lu Baolu5d308fc2018-12-10 09:58:58 +0800169 desc.qw2 = 0;
170 desc.qw3 = 0;
Liu Yi L9ad9f452021-01-07 00:03:55 +0800171 qi_submit_sync(sdev->iommu, &desc, 1, 0);
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100172 }
173}
174
Lu Baolu2d6ffc62020-12-31 08:53:20 +0800175static void intel_flush_svm_range_dev(struct intel_svm *svm,
176 struct intel_svm_dev *sdev,
177 unsigned long address,
178 unsigned long pages, int ih)
179{
180 unsigned long shift = ilog2(__roundup_pow_of_two(pages));
181 unsigned long align = (1ULL << (VTD_PAGE_SHIFT + shift));
182 unsigned long start = ALIGN_DOWN(address, align);
183 unsigned long end = ALIGN(address + (pages << VTD_PAGE_SHIFT), align);
184
185 while (start < end) {
186 __flush_svm_range_dev(svm, sdev, start, align >> VTD_PAGE_SHIFT, ih);
187 start += align;
188 }
189}
190
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100191static void intel_flush_svm_range(struct intel_svm *svm, unsigned long address,
Jacob Pan8744daf2019-08-26 08:53:29 -0700192 unsigned long pages, int ih)
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100193{
194 struct intel_svm_dev *sdev;
195
196 rcu_read_lock();
197 list_for_each_entry_rcu(sdev, &svm->devs, list)
Jacob Pan8744daf2019-08-26 08:53:29 -0700198 intel_flush_svm_range_dev(svm, sdev, address, pages, ih);
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100199 rcu_read_unlock();
200}
201
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100202/* Pages have been freed at this point */
203static void intel_invalidate_range(struct mmu_notifier *mn,
204 struct mm_struct *mm,
205 unsigned long start, unsigned long end)
206{
207 struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
208
209 intel_flush_svm_range(svm, start,
Jacob Pan8744daf2019-08-26 08:53:29 -0700210 (end - start + PAGE_SIZE - 1) >> VTD_PAGE_SHIFT, 0);
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100211}
212
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100213static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
214{
215 struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
David Woodhousee57e58b2016-01-12 19:18:06 +0000216 struct intel_svm_dev *sdev;
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100217
David Woodhousee57e58b2016-01-12 19:18:06 +0000218 /* This might end up being called from exit_mmap(), *before* the page
219 * tables are cleared. And __mmu_notifier_release() will delete us from
220 * the list of notifiers so that our invalidate_range() callback doesn't
221 * get called when the page tables are cleared. So we need to protect
222 * against hardware accessing those page tables.
223 *
224 * We do it by clearing the entry in the PASID table and then flushing
225 * the IOTLB and the PASID table caches. This might upset hardware;
226 * perhaps we'll want to point the PASID to a dummy PGD (like the zero
227 * page) so that we end up taking a fault that the hardware really
228 * *has* to handle gracefully without affecting other processes.
229 */
David Woodhousee57e58b2016-01-12 19:18:06 +0000230 rcu_read_lock();
Lu Baolu81ebd912020-05-16 14:20:59 +0800231 list_for_each_entry_rcu(sdev, &svm->devs, list)
Liu Yi L9ad9f452021-01-07 00:03:55 +0800232 intel_pasid_tear_down_entry(sdev->iommu, sdev->dev,
Lu Baolu37e91bd2020-05-16 14:20:57 +0800233 svm->pasid, true);
David Woodhousee57e58b2016-01-12 19:18:06 +0000234 rcu_read_unlock();
235
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100236}
237
238static const struct mmu_notifier_ops intel_mmuops = {
239 .release = intel_mm_release,
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100240 .invalidate_range = intel_invalidate_range,
241};
242
243static DEFINE_MUTEX(pasid_mutex);
Lu Baolu51261aa2018-07-14 15:46:55 +0800244static LIST_HEAD(global_svm_list);
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100245
Jacob Pan034d4732020-01-02 08:18:10 +0800246#define for_each_svm_dev(sdev, svm, d) \
247 list_for_each_entry((sdev), &(svm)->devs, list) \
248 if ((d) != (sdev)->dev) {} else
249
Lu Baolu19abcf72020-07-24 09:49:22 +0800250static int pasid_to_svm_sdev(struct device *dev, unsigned int pasid,
251 struct intel_svm **rsvm,
252 struct intel_svm_dev **rsdev)
253{
254 struct intel_svm_dev *d, *sdev = NULL;
255 struct intel_svm *svm;
256
257 /* The caller should hold the pasid_mutex lock */
258 if (WARN_ON(!mutex_is_locked(&pasid_mutex)))
259 return -EINVAL;
260
261 if (pasid == INVALID_IOASID || pasid >= PASID_MAX)
262 return -EINVAL;
263
264 svm = ioasid_find(NULL, pasid, NULL);
265 if (IS_ERR(svm))
266 return PTR_ERR(svm);
267
268 if (!svm)
269 goto out;
270
271 /*
272 * If we found svm for the PASID, there must be at least one device
273 * bond.
274 */
275 if (WARN_ON(list_empty(&svm->devs)))
276 return -EINVAL;
277
278 rcu_read_lock();
279 list_for_each_entry_rcu(d, &svm->devs, list) {
280 if (d->dev == dev) {
281 sdev = d;
282 break;
283 }
284 }
285 rcu_read_unlock();
286
287out:
288 *rsvm = svm;
289 *rsdev = sdev;
290
291 return 0;
292}
293
Jacob Pan56722a42020-05-16 14:20:47 +0800294int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev,
295 struct iommu_gpasid_bind_data *data)
296{
Lu Baoludd6692f2020-07-24 09:49:21 +0800297 struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL);
Lu Baolu19abcf72020-07-24 09:49:22 +0800298 struct intel_svm_dev *sdev = NULL;
Jacob Pan56722a42020-05-16 14:20:47 +0800299 struct dmar_domain *dmar_domain;
Liu Yi Leea4e292020-10-30 10:37:23 +0800300 struct device_domain_info *info;
Lu Baolu19abcf72020-07-24 09:49:22 +0800301 struct intel_svm *svm = NULL;
Lu Baolu420d42f2020-12-31 08:53:23 +0800302 unsigned long iflags;
Jacob Pan56722a42020-05-16 14:20:47 +0800303 int ret = 0;
304
305 if (WARN_ON(!iommu) || !data)
306 return -EINVAL;
307
Jacob Pan6278eec2020-09-25 09:32:47 -0700308 if (data->format != IOMMU_PASID_FORMAT_INTEL_VTD)
309 return -EINVAL;
310
311 /* IOMMU core ensures argsz is more than the start of the union */
312 if (data->argsz < offsetofend(struct iommu_gpasid_bind_data, vendor.vtd))
313 return -EINVAL;
314
315 /* Make sure no undefined flags are used in vendor data */
316 if (data->vendor.vtd.flags & ~(IOMMU_SVA_VTD_GPASID_LAST - 1))
Jacob Pan56722a42020-05-16 14:20:47 +0800317 return -EINVAL;
318
319 if (!dev_is_pci(dev))
320 return -ENOTSUPP;
321
322 /* VT-d supports devices with full 20 bit PASIDs only */
323 if (pci_max_pasids(to_pci_dev(dev)) != PASID_MAX)
324 return -EINVAL;
325
326 /*
327 * We only check host PASID range, we have no knowledge to check
328 * guest PASID range.
329 */
330 if (data->hpasid <= 0 || data->hpasid >= PASID_MAX)
331 return -EINVAL;
332
Liu Yi Leea4e292020-10-30 10:37:23 +0800333 info = get_domain_info(dev);
334 if (!info)
335 return -EINVAL;
336
Jacob Pan56722a42020-05-16 14:20:47 +0800337 dmar_domain = to_dmar_domain(domain);
338
339 mutex_lock(&pasid_mutex);
Lu Baolu19abcf72020-07-24 09:49:22 +0800340 ret = pasid_to_svm_sdev(dev, data->hpasid, &svm, &sdev);
341 if (ret)
Jacob Pan56722a42020-05-16 14:20:47 +0800342 goto out;
Jacob Pan56722a42020-05-16 14:20:47 +0800343
Lu Baolu19abcf72020-07-24 09:49:22 +0800344 if (sdev) {
Jacob Pand315e9e2020-07-24 09:49:20 +0800345 /*
346 * Do not allow multiple bindings of the same device-PASID since
347 * there is only one SL page tables per PASID. We may revisit
348 * once sharing PGD across domains are supported.
349 */
Lu Baolu19abcf72020-07-24 09:49:22 +0800350 dev_warn_ratelimited(dev, "Already bound with PASID %u\n",
351 svm->pasid);
352 ret = -EBUSY;
353 goto out;
354 }
355
356 if (!svm) {
Jacob Pan56722a42020-05-16 14:20:47 +0800357 /* We come here when PASID has never been bond to a device. */
358 svm = kzalloc(sizeof(*svm), GFP_KERNEL);
359 if (!svm) {
360 ret = -ENOMEM;
361 goto out;
362 }
363 /* REVISIT: upper layer/VFIO can track host process that bind
364 * the PASID. ioasid_set = mm might be sufficient for vfio to
365 * check pasid VMM ownership. We can drop the following line
366 * once VFIO and IOASID set check is in place.
367 */
368 svm->mm = get_task_mm(current);
369 svm->pasid = data->hpasid;
370 if (data->flags & IOMMU_SVA_GPASID_VAL) {
371 svm->gpasid = data->gpasid;
372 svm->flags |= SVM_FLAG_GUEST_PASID;
373 }
374 ioasid_set_data(data->hpasid, svm);
375 INIT_LIST_HEAD_RCU(&svm->devs);
376 mmput(svm->mm);
377 }
378 sdev = kzalloc(sizeof(*sdev), GFP_KERNEL);
379 if (!sdev) {
380 ret = -ENOMEM;
381 goto out;
382 }
383 sdev->dev = dev;
Liu Yi Leea4e292020-10-30 10:37:23 +0800384 sdev->sid = PCI_DEVID(info->bus, info->devfn);
Liu Yi L9ad9f452021-01-07 00:03:55 +0800385 sdev->iommu = iommu;
Jacob Pan56722a42020-05-16 14:20:47 +0800386
387 /* Only count users if device has aux domains */
388 if (iommu_dev_feature_enabled(dev, IOMMU_DEV_FEAT_AUX))
389 sdev->users = 1;
390
391 /* Set up device context entry for PASID if not enabled already */
392 ret = intel_iommu_enable_pasid(iommu, sdev->dev);
393 if (ret) {
394 dev_err_ratelimited(dev, "Failed to enable PASID capability\n");
395 kfree(sdev);
396 goto out;
397 }
398
399 /*
400 * PASID table is per device for better security. Therefore, for
401 * each bind of a new device even with an existing PASID, we need to
402 * call the nested mode setup function here.
403 */
Lu Baolu420d42f2020-12-31 08:53:23 +0800404 spin_lock_irqsave(&iommu->lock, iflags);
Lu Baolubfe62402020-05-19 09:34:23 +0800405 ret = intel_pasid_setup_nested(iommu, dev,
406 (pgd_t *)(uintptr_t)data->gpgd,
Jacob Pan8d3bb3b2020-09-25 09:32:44 -0700407 data->hpasid, &data->vendor.vtd, dmar_domain,
Jacob Pan56722a42020-05-16 14:20:47 +0800408 data->addr_width);
Lu Baolu420d42f2020-12-31 08:53:23 +0800409 spin_unlock_irqrestore(&iommu->lock, iflags);
Jacob Pan56722a42020-05-16 14:20:47 +0800410 if (ret) {
411 dev_err_ratelimited(dev, "Failed to set up PASID %llu in nested mode, Err %d\n",
412 data->hpasid, ret);
413 /*
414 * PASID entry should be in cleared state if nested mode
415 * set up failed. So we only need to clear IOASID tracking
416 * data such that free call will succeed.
417 */
418 kfree(sdev);
419 goto out;
420 }
421
422 svm->flags |= SVM_FLAG_GUEST_MODE;
423
424 init_rcu_head(&sdev->rcu);
425 list_add_rcu(&sdev->list, &svm->devs);
426 out:
427 if (!IS_ERR_OR_NULL(svm) && list_empty(&svm->devs)) {
428 ioasid_set_data(data->hpasid, NULL);
429 kfree(svm);
430 }
431
432 mutex_unlock(&pasid_mutex);
433 return ret;
434}
435
Fenghua Yuc7b6bac2020-09-15 09:30:05 -0700436int intel_svm_unbind_gpasid(struct device *dev, u32 pasid)
Jacob Pan56722a42020-05-16 14:20:47 +0800437{
Lu Baoludd6692f2020-07-24 09:49:21 +0800438 struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL);
Jacob Pan56722a42020-05-16 14:20:47 +0800439 struct intel_svm_dev *sdev;
440 struct intel_svm *svm;
Lu Baolu19abcf72020-07-24 09:49:22 +0800441 int ret;
Jacob Pan56722a42020-05-16 14:20:47 +0800442
443 if (WARN_ON(!iommu))
444 return -EINVAL;
445
446 mutex_lock(&pasid_mutex);
Lu Baolu19abcf72020-07-24 09:49:22 +0800447 ret = pasid_to_svm_sdev(dev, pasid, &svm, &sdev);
448 if (ret)
Jacob Pan56722a42020-05-16 14:20:47 +0800449 goto out;
Jacob Pan56722a42020-05-16 14:20:47 +0800450
Lu Baolu19abcf72020-07-24 09:49:22 +0800451 if (sdev) {
Jacob Pan56722a42020-05-16 14:20:47 +0800452 if (iommu_dev_feature_enabled(dev, IOMMU_DEV_FEAT_AUX))
453 sdev->users--;
454 if (!sdev->users) {
455 list_del_rcu(&sdev->list);
Lu Baolu37e91bd2020-05-16 14:20:57 +0800456 intel_pasid_tear_down_entry(iommu, dev,
457 svm->pasid, false);
Lu Baolu66ac4db2020-05-16 14:20:58 +0800458 intel_svm_drain_prq(dev, svm->pasid);
Jacob Pan56722a42020-05-16 14:20:47 +0800459 kfree_rcu(sdev, rcu);
460
461 if (list_empty(&svm->devs)) {
462 /*
463 * We do not free the IOASID here in that
464 * IOMMU driver did not allocate it.
465 * Unlike native SVM, IOASID for guest use was
466 * allocated prior to the bind call.
467 * In any case, if the free call comes before
468 * the unbind, IOMMU driver will get notified
469 * and perform cleanup.
470 */
471 ioasid_set_data(pasid, NULL);
472 kfree(svm);
473 }
474 }
Jacob Pan56722a42020-05-16 14:20:47 +0800475 }
476out:
477 mutex_unlock(&pasid_mutex);
478 return ret;
479}
480
Fenghua Yu20f0afd2020-09-15 09:30:13 -0700481static void _load_pasid(void *unused)
482{
483 update_pasid();
484}
485
486static void load_pasid(struct mm_struct *mm, u32 pasid)
487{
488 mutex_lock(&mm->context.lock);
489
490 /* Synchronize with READ_ONCE in update_pasid(). */
491 smp_store_release(&mm->pasid, pasid);
492
493 /* Update PASID MSR on all CPUs running the mm's tasks. */
494 on_each_cpu_mask(mm_cpumask(mm), _load_pasid, NULL, true);
495
496 mutex_unlock(&mm->context.lock);
497}
498
Jacob Pan064a57d2020-05-16 14:20:54 +0800499/* Caller must hold pasid_mutex, mm reference */
500static int
Fenghua Yu2a5054c2020-09-15 09:30:06 -0700501intel_svm_bind_mm(struct device *dev, unsigned int flags,
502 struct svm_dev_ops *ops,
Jacob Pan064a57d2020-05-16 14:20:54 +0800503 struct mm_struct *mm, struct intel_svm_dev **sd)
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100504{
Lu Baoludd6692f2020-07-24 09:49:21 +0800505 struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL);
Lu Baolud7cbc0f2019-03-25 09:30:29 +0800506 struct device_domain_info *info;
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100507 struct intel_svm_dev *sdev;
508 struct intel_svm *svm = NULL;
Lu Baolu420d42f2020-12-31 08:53:23 +0800509 unsigned long iflags;
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100510 int pasid_max;
511 int ret;
512
Lu Baoluc56cba52019-03-01 11:23:12 +0800513 if (!iommu || dmar_disabled)
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100514 return -EINVAL;
515
Jacob Pan6eba09a2020-01-02 08:18:05 +0800516 if (!intel_svm_capable(iommu))
517 return -ENOTSUPP;
518
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100519 if (dev_is_pci(dev)) {
520 pasid_max = pci_max_pasids(to_pci_dev(dev));
521 if (pasid_max < 0)
522 return -EINVAL;
523 } else
524 pasid_max = 1 << 20;
525
Jacob Pan064a57d2020-05-16 14:20:54 +0800526 /* Bind supervisor PASID shuld have mm = NULL */
Lu Baolubb37f7d2018-05-04 13:08:19 +0800527 if (flags & SVM_FLAG_SUPERVISOR_MODE) {
Jacob Pan064a57d2020-05-16 14:20:54 +0800528 if (!ecap_srs(iommu->ecap) || mm) {
529 pr_err("Supervisor PASID with user provided mm.\n");
David Woodhouse5cec7532015-10-15 15:52:15 +0100530 return -EINVAL;
Jacob Pan064a57d2020-05-16 14:20:54 +0800531 }
David Woodhouse5cec7532015-10-15 15:52:15 +0100532 }
533
Jacob Pan064a57d2020-05-16 14:20:54 +0800534 if (!(flags & SVM_FLAG_PRIVATE_PASID)) {
Lu Baolu51261aa2018-07-14 15:46:55 +0800535 struct intel_svm *t;
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100536
Lu Baolu51261aa2018-07-14 15:46:55 +0800537 list_for_each_entry(t, &global_svm_list, list) {
538 if (t->mm != mm || (t->flags & SVM_FLAG_PRIVATE_PASID))
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100539 continue;
540
Lu Baolu51261aa2018-07-14 15:46:55 +0800541 svm = t;
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100542 if (svm->pasid >= pasid_max) {
543 dev_warn(dev,
544 "Limited PASID width. Cannot use existing PASID %d\n",
545 svm->pasid);
546 ret = -ENOSPC;
547 goto out;
548 }
549
Jacob Pan034d4732020-01-02 08:18:10 +0800550 /* Find the matching device in svm list */
551 for_each_svm_dev(sdev, svm, dev) {
552 if (sdev->ops != ops) {
553 ret = -EBUSY;
554 goto out;
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100555 }
Jacob Pan034d4732020-01-02 08:18:10 +0800556 sdev->users++;
557 goto success;
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100558 }
559
560 break;
561 }
562 }
563
564 sdev = kzalloc(sizeof(*sdev), GFP_KERNEL);
565 if (!sdev) {
566 ret = -ENOMEM;
567 goto out;
568 }
569 sdev->dev = dev;
Liu Yi L9ad9f452021-01-07 00:03:55 +0800570 sdev->iommu = iommu;
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100571
Lu Baolud7cbc0f2019-03-25 09:30:29 +0800572 ret = intel_iommu_enable_pasid(iommu, dev);
Jacob Pan064a57d2020-05-16 14:20:54 +0800573 if (ret) {
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100574 kfree(sdev);
575 goto out;
576 }
Lu Baolud7cbc0f2019-03-25 09:30:29 +0800577
Lu Baolue85bb992020-05-16 14:20:52 +0800578 info = get_domain_info(dev);
Lu Baolud7cbc0f2019-03-25 09:30:29 +0800579 sdev->did = FLPT_DEFAULT_DID;
580 sdev->sid = PCI_DEVID(info->bus, info->devfn);
581 if (info->ats_enabled) {
582 sdev->dev_iotlb = 1;
583 sdev->qdep = info->ats_qdep;
584 if (sdev->qdep >= QI_DEV_EIOTLB_MAX_INVS)
585 sdev->qdep = 0;
586 }
587
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100588 /* Finish the setup now we know we're keeping it */
589 sdev->users = 1;
David Woodhouse0204a492015-10-13 17:18:10 +0100590 sdev->ops = ops;
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100591 init_rcu_head(&sdev->rcu);
592
593 if (!svm) {
594 svm = kzalloc(sizeof(*svm), GFP_KERNEL);
595 if (!svm) {
596 ret = -ENOMEM;
597 kfree(sdev);
598 goto out;
599 }
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100600
Lu Baolu4774cc52018-07-14 15:47:01 +0800601 if (pasid_max > intel_pasid_max_id)
602 pasid_max = intel_pasid_max_id;
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100603
Jacob Pan59a62332020-01-02 08:18:08 +0800604 /* Do not use PASID 0, reserved for RID to PASID */
605 svm->pasid = ioasid_alloc(NULL, PASID_MIN,
606 pasid_max - 1, svm);
607 if (svm->pasid == INVALID_IOASID) {
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100608 kfree(svm);
Lu Baolubbe4b3a2018-02-24 13:42:27 +0800609 kfree(sdev);
Jacob Pan59a62332020-01-02 08:18:08 +0800610 ret = -ENOSPC;
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100611 goto out;
612 }
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100613 svm->notifier.ops = &intel_mmuops;
David Woodhouse5cec7532015-10-15 15:52:15 +0100614 svm->mm = mm;
David Woodhouse569e4f72015-10-15 13:59:14 +0100615 svm->flags = flags;
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100616 INIT_LIST_HEAD_RCU(&svm->devs);
Lu Baolu51261aa2018-07-14 15:46:55 +0800617 INIT_LIST_HEAD(&svm->list);
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100618 ret = -ENOMEM;
David Woodhouse5cec7532015-10-15 15:52:15 +0100619 if (mm) {
620 ret = mmu_notifier_register(&svm->notifier, mm);
621 if (ret) {
Jean-Philippe Bruckercb4789b2020-11-06 16:50:47 +0100622 ioasid_put(svm->pasid);
David Woodhouse5cec7532015-10-15 15:52:15 +0100623 kfree(svm);
624 kfree(sdev);
625 goto out;
626 }
Lu Baolu1c4f88b2018-12-10 09:59:05 +0800627 }
Sohil Mehta2f13eb72017-12-20 11:59:27 -0800628
Lu Baolu420d42f2020-12-31 08:53:23 +0800629 spin_lock_irqsave(&iommu->lock, iflags);
Lu Baolu1c4f88b2018-12-10 09:59:05 +0800630 ret = intel_pasid_setup_first_level(iommu, dev,
631 mm ? mm->pgd : init_mm.pgd,
632 svm->pasid, FLPT_DEFAULT_DID,
Lu Baolu87208f22020-01-02 08:18:16 +0800633 (mm ? 0 : PASID_FLAG_SUPERVISOR_MODE) |
634 (cpu_feature_enabled(X86_FEATURE_LA57) ?
635 PASID_FLAG_FL5LP : 0));
Lu Baolu420d42f2020-12-31 08:53:23 +0800636 spin_unlock_irqrestore(&iommu->lock, iflags);
Lu Baolu1c4f88b2018-12-10 09:59:05 +0800637 if (ret) {
638 if (mm)
639 mmu_notifier_unregister(&svm->notifier, mm);
Jean-Philippe Bruckercb4789b2020-11-06 16:50:47 +0100640 ioasid_put(svm->pasid);
Lu Baolu1c4f88b2018-12-10 09:59:05 +0800641 kfree(svm);
642 kfree(sdev);
643 goto out;
644 }
Lu Baolu51261aa2018-07-14 15:46:55 +0800645
646 list_add_tail(&svm->list, &global_svm_list);
Fenghua Yu20f0afd2020-09-15 09:30:13 -0700647 if (mm) {
648 /* The newly allocated pasid is loaded to the mm. */
649 load_pasid(mm, svm->pasid);
650 }
Jacob Pand7af4d92019-05-08 12:22:46 -0700651 } else {
652 /*
653 * Binding a new device with existing PASID, need to setup
654 * the PASID entry.
655 */
Lu Baolu420d42f2020-12-31 08:53:23 +0800656 spin_lock_irqsave(&iommu->lock, iflags);
Jacob Pand7af4d92019-05-08 12:22:46 -0700657 ret = intel_pasid_setup_first_level(iommu, dev,
658 mm ? mm->pgd : init_mm.pgd,
659 svm->pasid, FLPT_DEFAULT_DID,
Lu Baolu87208f22020-01-02 08:18:16 +0800660 (mm ? 0 : PASID_FLAG_SUPERVISOR_MODE) |
661 (cpu_feature_enabled(X86_FEATURE_LA57) ?
662 PASID_FLAG_FL5LP : 0));
Lu Baolu420d42f2020-12-31 08:53:23 +0800663 spin_unlock_irqrestore(&iommu->lock, iflags);
Jacob Pand7af4d92019-05-08 12:22:46 -0700664 if (ret) {
665 kfree(sdev);
666 goto out;
667 }
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100668 }
669 list_add_rcu(&sdev->list, &svm->devs);
Jacob Pan064a57d2020-05-16 14:20:54 +0800670success:
671 sdev->pasid = svm->pasid;
672 sdev->sva.dev = dev;
673 if (sd)
674 *sd = sdev;
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100675 ret = 0;
Lu Baolu19abcf72020-07-24 09:49:22 +0800676out:
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100677 return ret;
678}
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100679
Jacob Pan064a57d2020-05-16 14:20:54 +0800680/* Caller must hold pasid_mutex */
Fenghua Yuc7b6bac2020-09-15 09:30:05 -0700681static int intel_svm_unbind_mm(struct device *dev, u32 pasid)
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100682{
683 struct intel_svm_dev *sdev;
684 struct intel_iommu *iommu;
685 struct intel_svm *svm;
686 int ret = -EINVAL;
687
Lu Baoludd6692f2020-07-24 09:49:21 +0800688 iommu = device_to_iommu(dev, NULL, NULL);
Lu Baolu4774cc52018-07-14 15:47:01 +0800689 if (!iommu)
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100690 goto out;
691
Lu Baolu19abcf72020-07-24 09:49:22 +0800692 ret = pasid_to_svm_sdev(dev, pasid, &svm, &sdev);
693 if (ret)
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100694 goto out;
695
Lu Baolu19abcf72020-07-24 09:49:22 +0800696 if (sdev) {
Jacob Pan034d4732020-01-02 08:18:10 +0800697 sdev->users--;
698 if (!sdev->users) {
699 list_del_rcu(&sdev->list);
700 /* Flush the PASID cache and IOTLB for this device.
701 * Note that we do depend on the hardware *not* using
702 * the PASID any more. Just as we depend on other
703 * devices never using PASIDs that they have no right
704 * to use. We have a *shared* PASID table, because it's
705 * large and has to be physically contiguous. So it's
706 * hard to be as defensive as we might like. */
Lu Baolu37e91bd2020-05-16 14:20:57 +0800707 intel_pasid_tear_down_entry(iommu, dev,
708 svm->pasid, false);
Lu Baolu66ac4db2020-05-16 14:20:58 +0800709 intel_svm_drain_prq(dev, svm->pasid);
Jacob Pan034d4732020-01-02 08:18:10 +0800710 kfree_rcu(sdev, rcu);
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100711
Jacob Pan034d4732020-01-02 08:18:10 +0800712 if (list_empty(&svm->devs)) {
Jean-Philippe Bruckercb4789b2020-11-06 16:50:47 +0100713 ioasid_put(svm->pasid);
Fenghua Yu20f0afd2020-09-15 09:30:13 -0700714 if (svm->mm) {
Jacob Pan034d4732020-01-02 08:18:10 +0800715 mmu_notifier_unregister(&svm->notifier, svm->mm);
Fenghua Yu20f0afd2020-09-15 09:30:13 -0700716 /* Clear mm's pasid. */
717 load_pasid(svm->mm, PASID_DISABLED);
718 }
Jacob Pan034d4732020-01-02 08:18:10 +0800719 list_del(&svm->list);
720 /* We mandate that no page faults may be outstanding
721 * for the PASID when intel_svm_unbind_mm() is called.
722 * If that is not obeyed, subtle errors will happen.
723 * Let's make them less subtle... */
724 memset(svm, 0x6b, sizeof(*svm));
725 kfree(svm);
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100726 }
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100727 }
728 }
Lu Baolu19abcf72020-07-24 09:49:22 +0800729out:
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100730 return ret;
731}
CQ Tang15060ab2017-05-10 11:39:03 -0700732
David Woodhousea222a7f2015-10-07 23:35:18 +0100733/* Page request queue descriptor */
734struct page_req_dsc {
Jacob Pan5b438f42019-01-11 13:04:57 +0800735 union {
736 struct {
737 u64 type:8;
738 u64 pasid_present:1;
739 u64 priv_data_present:1;
740 u64 rsvd:6;
741 u64 rid:16;
742 u64 pasid:20;
743 u64 exe_req:1;
744 u64 pm_req:1;
745 u64 rsvd2:10;
746 };
747 u64 qw_0;
748 };
749 union {
750 struct {
751 u64 rd_req:1;
752 u64 wr_req:1;
753 u64 lpig:1;
754 u64 prg_index:9;
755 u64 addr:52;
756 };
757 u64 qw_1;
758 };
759 u64 priv_data[2];
David Woodhousea222a7f2015-10-07 23:35:18 +0100760};
761
Jacob Pan52355fb2020-03-17 09:10:18 +0800762#define PRQ_RING_MASK ((0x1000 << PRQ_ORDER) - 0x20)
Joerg Roedel7f8312a2015-11-17 16:11:39 +0100763
764static bool access_error(struct vm_area_struct *vma, struct page_req_dsc *req)
765{
766 unsigned long requested = 0;
767
768 if (req->exe_req)
769 requested |= VM_EXEC;
770
771 if (req->rd_req)
772 requested |= VM_READ;
773
774 if (req->wr_req)
775 requested |= VM_WRITE;
776
777 return (requested & ~vma->vm_flags) != 0;
778}
779
Ashok Raj9d8c3af2017-08-08 13:29:27 -0700780static bool is_canonical_address(u64 addr)
781{
782 int shift = 64 - (__VIRTUAL_MASK_SHIFT + 1);
783 long saddr = (long) addr;
784
785 return (((saddr << shift) >> shift) == saddr);
786}
787
Lu Baolu66ac4db2020-05-16 14:20:58 +0800788/**
789 * intel_svm_drain_prq - Drain page requests and responses for a pasid
790 * @dev: target device
791 * @pasid: pasid for draining
792 *
793 * Drain all pending page requests and responses related to @pasid in both
794 * software and hardware. This is supposed to be called after the device
795 * driver has stopped DMA, the pasid entry has been cleared, and both IOTLB
796 * and DevTLB have been invalidated.
797 *
798 * It waits until all pending page requests for @pasid in the page fault
799 * queue are completed by the prq handling thread. Then follow the steps
800 * described in VT-d spec CH7.10 to drain all page requests and page
801 * responses pending in the hardware.
802 */
Fenghua Yuc7b6bac2020-09-15 09:30:05 -0700803static void intel_svm_drain_prq(struct device *dev, u32 pasid)
Lu Baolu66ac4db2020-05-16 14:20:58 +0800804{
805 struct device_domain_info *info;
806 struct dmar_domain *domain;
807 struct intel_iommu *iommu;
808 struct qi_desc desc[3];
809 struct pci_dev *pdev;
810 int head, tail;
811 u16 sid, did;
812 int qdep;
813
814 info = get_domain_info(dev);
815 if (WARN_ON(!info || !dev_is_pci(dev)))
816 return;
817
818 if (!info->pri_enabled)
819 return;
820
821 iommu = info->iommu;
822 domain = info->domain;
823 pdev = to_pci_dev(dev);
824 sid = PCI_DEVID(info->bus, info->devfn);
825 did = domain->iommu_did[iommu->seq_id];
826 qdep = pci_ats_queue_depth(pdev);
827
828 /*
829 * Check and wait until all pending page requests in the queue are
830 * handled by the prq handling thread.
831 */
832prq_retry:
833 reinit_completion(&iommu->prq_complete);
834 tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
835 head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
836 while (head != tail) {
837 struct page_req_dsc *req;
838
839 req = &iommu->prq[head / sizeof(*req)];
840 if (!req->pasid_present || req->pasid != pasid) {
841 head = (head + sizeof(*req)) & PRQ_RING_MASK;
842 continue;
843 }
844
845 wait_for_completion(&iommu->prq_complete);
846 goto prq_retry;
847 }
848
849 /*
850 * Perform steps described in VT-d spec CH7.10 to drain page
851 * requests and responses in hardware.
852 */
853 memset(desc, 0, sizeof(desc));
854 desc[0].qw0 = QI_IWD_STATUS_DATA(QI_DONE) |
855 QI_IWD_FENCE |
856 QI_IWD_TYPE;
857 desc[1].qw0 = QI_EIOTLB_PASID(pasid) |
858 QI_EIOTLB_DID(did) |
859 QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) |
860 QI_EIOTLB_TYPE;
861 desc[2].qw0 = QI_DEV_EIOTLB_PASID(pasid) |
862 QI_DEV_EIOTLB_SID(sid) |
863 QI_DEV_EIOTLB_QDEP(qdep) |
864 QI_DEIOTLB_TYPE |
865 QI_DEV_IOTLB_PFSID(info->pfsid);
866qi_retry:
867 reinit_completion(&iommu->prq_complete);
868 qi_submit_sync(iommu, desc, 3, QI_OPT_WAIT_DRAIN);
869 if (readl(iommu->reg + DMAR_PRS_REG) & DMA_PRS_PRO) {
870 wait_for_completion(&iommu->prq_complete);
871 goto qi_retry;
872 }
873}
874
Lu Baolueb8d93e2020-07-24 09:49:23 +0800875static int prq_to_iommu_prot(struct page_req_dsc *req)
876{
877 int prot = 0;
878
879 if (req->rd_req)
880 prot |= IOMMU_FAULT_PERM_READ;
881 if (req->wr_req)
882 prot |= IOMMU_FAULT_PERM_WRITE;
883 if (req->exe_req)
884 prot |= IOMMU_FAULT_PERM_EXEC;
885 if (req->pm_req)
886 prot |= IOMMU_FAULT_PERM_PRIV;
887
888 return prot;
889}
890
891static int
892intel_svm_prq_report(struct device *dev, struct page_req_dsc *desc)
893{
894 struct iommu_fault_event event;
895
896 if (!dev || !dev_is_pci(dev))
897 return -ENODEV;
898
899 /* Fill in event data for device specific processing */
900 memset(&event, 0, sizeof(struct iommu_fault_event));
901 event.fault.type = IOMMU_FAULT_PAGE_REQ;
902 event.fault.prm.addr = desc->addr;
903 event.fault.prm.pasid = desc->pasid;
904 event.fault.prm.grpid = desc->prg_index;
905 event.fault.prm.perm = prq_to_iommu_prot(desc);
906
907 if (desc->lpig)
908 event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
909 if (desc->pasid_present) {
910 event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
911 event.fault.prm.flags |= IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID;
912 }
913 if (desc->priv_data_present) {
914 /*
915 * Set last page in group bit if private data is present,
916 * page response is required as it does for LPIG.
917 * iommu_report_device_fault() doesn't understand this vendor
918 * specific requirement thus we set last_page as a workaround.
919 */
920 event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
921 event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA;
922 memcpy(event.fault.prm.private_data, desc->priv_data,
923 sizeof(desc->priv_data));
924 }
925
926 return iommu_report_device_fault(dev, &event);
927}
928
David Woodhousea222a7f2015-10-07 23:35:18 +0100929static irqreturn_t prq_event_thread(int irq, void *d)
930{
Lu Baolueb8d93e2020-07-24 09:49:23 +0800931 struct intel_svm_dev *sdev = NULL;
David Woodhousea222a7f2015-10-07 23:35:18 +0100932 struct intel_iommu *iommu = d;
933 struct intel_svm *svm = NULL;
934 int head, tail, handled = 0;
935
David Woodhouse46924002016-02-15 12:42:38 +0000936 /* Clear PPR bit before reading head/tail registers, to
937 * ensure that we get a new interrupt if needed. */
938 writel(DMA_PRS_PPR, iommu->reg + DMAR_PRS_REG);
939
David Woodhousea222a7f2015-10-07 23:35:18 +0100940 tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
941 head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
942 while (head != tail) {
943 struct vm_area_struct *vma;
944 struct page_req_dsc *req;
945 struct qi_desc resp;
Souptick Joarder50a7ca32018-08-17 15:44:47 -0700946 int result;
947 vm_fault_t ret;
David Woodhousea222a7f2015-10-07 23:35:18 +0100948 u64 address;
949
950 handled = 1;
951
952 req = &iommu->prq[head / sizeof(*req)];
953
954 result = QI_RESP_FAILURE;
David Woodhouse7f92a2e2015-10-16 17:22:31 +0100955 address = (u64)req->addr << VTD_PAGE_SHIFT;
David Woodhousea222a7f2015-10-07 23:35:18 +0100956 if (!req->pasid_present) {
957 pr_err("%s: Page request without PASID: %08llx %08llx\n",
958 iommu->name, ((unsigned long long *)req)[0],
959 ((unsigned long long *)req)[1]);
Lu Baolu19ed3e22018-11-05 10:18:58 +0800960 goto no_pasid;
David Woodhousea222a7f2015-10-07 23:35:18 +0100961 }
962
963 if (!svm || svm->pasid != req->pasid) {
964 rcu_read_lock();
Jacob Pan59a62332020-01-02 08:18:08 +0800965 svm = ioasid_find(NULL, req->pasid, NULL);
David Woodhousea222a7f2015-10-07 23:35:18 +0100966 /* It *can't* go away, because the driver is not permitted
967 * to unbind the mm while any page faults are outstanding.
968 * So we only need RCU to protect the internal idr code. */
969 rcu_read_unlock();
Jacob Pan59a62332020-01-02 08:18:08 +0800970 if (IS_ERR_OR_NULL(svm)) {
David Woodhousea222a7f2015-10-07 23:35:18 +0100971 pr_err("%s: Page request for invalid PASID %d: %08llx %08llx\n",
972 iommu->name, req->pasid, ((unsigned long long *)req)[0],
973 ((unsigned long long *)req)[1]);
David Woodhouse26322ab2015-10-15 21:12:56 +0100974 goto no_pasid;
David Woodhousea222a7f2015-10-07 23:35:18 +0100975 }
976 }
977
Lu Baolueb8d93e2020-07-24 09:49:23 +0800978 if (!sdev || sdev->sid != req->rid) {
979 struct intel_svm_dev *t;
980
981 sdev = NULL;
982 rcu_read_lock();
983 list_for_each_entry_rcu(t, &svm->devs, list) {
984 if (t->sid == req->rid) {
985 sdev = t;
986 break;
987 }
988 }
989 rcu_read_unlock();
990 }
991
David Woodhousea222a7f2015-10-07 23:35:18 +0100992 result = QI_RESP_INVALID;
David Woodhouse5cec7532015-10-15 15:52:15 +0100993 /* Since we're using init_mm.pgd directly, we should never take
994 * any faults on kernel addresses. */
995 if (!svm->mm)
996 goto bad_req;
Ashok Raj9d8c3af2017-08-08 13:29:27 -0700997
998 /* If address is not canonical, return invalid response */
999 if (!is_canonical_address(address))
1000 goto bad_req;
1001
Lu Baolueb8d93e2020-07-24 09:49:23 +08001002 /*
1003 * If prq is to be handled outside iommu driver via receiver of
1004 * the fault notifiers, we skip the page response here.
1005 */
1006 if (svm->flags & SVM_FLAG_GUEST_MODE) {
1007 if (sdev && !intel_svm_prq_report(sdev->dev, req))
1008 goto prq_advance;
1009 else
1010 goto bad_req;
1011 }
1012
Jacob Pan902baf62020-03-19 21:32:30 -07001013 /* If the mm is already defunct, don't handle faults. */
1014 if (!mmget_not_zero(svm->mm))
1015 goto bad_req;
1016
Michel Lespinassed8ed45c2020-06-08 21:33:25 -07001017 mmap_read_lock(svm->mm);
David Woodhousea222a7f2015-10-07 23:35:18 +01001018 vma = find_extend_vma(svm->mm, address);
1019 if (!vma || address < vma->vm_start)
1020 goto invalid;
1021
Joerg Roedel7f8312a2015-11-17 16:11:39 +01001022 if (access_error(vma, req))
1023 goto invalid;
1024
Kirill A. Shutemovdcddffd2016-07-26 15:25:18 -07001025 ret = handle_mm_fault(vma, address,
Peter Xubce617e2020-08-11 18:37:44 -07001026 req->wr_req ? FAULT_FLAG_WRITE : 0,
1027 NULL);
David Woodhousea222a7f2015-10-07 23:35:18 +01001028 if (ret & VM_FAULT_ERROR)
1029 goto invalid;
1030
1031 result = QI_RESP_SUCCESS;
Lu Baolueb8d93e2020-07-24 09:49:23 +08001032invalid:
Michel Lespinassed8ed45c2020-06-08 21:33:25 -07001033 mmap_read_unlock(svm->mm);
David Woodhousee57e58b2016-01-12 19:18:06 +00001034 mmput(svm->mm);
Lu Baolueb8d93e2020-07-24 09:49:23 +08001035bad_req:
1036 WARN_ON(!sdev);
David Woodhouse0204a492015-10-13 17:18:10 +01001037 if (sdev && sdev->ops && sdev->ops->fault_cb) {
1038 int rwxp = (req->rd_req << 3) | (req->wr_req << 2) |
Jacob Pan5b438f42019-01-11 13:04:57 +08001039 (req->exe_req << 1) | (req->pm_req);
1040 sdev->ops->fault_cb(sdev->dev, req->pasid, req->addr,
1041 req->priv_data, rwxp, result);
David Woodhouse0204a492015-10-13 17:18:10 +01001042 }
David Woodhouse26322ab2015-10-15 21:12:56 +01001043 /* We get here in the error case where the PASID lookup failed,
1044 and these can be NULL. Do not use them below this point! */
1045 sdev = NULL;
1046 svm = NULL;
Lu Baolueb8d93e2020-07-24 09:49:23 +08001047no_pasid:
Jacob Pan5b438f42019-01-11 13:04:57 +08001048 if (req->lpig || req->priv_data_present) {
1049 /*
1050 * Per VT-d spec. v3.0 ch7.7, system software must
1051 * respond with page group response if private data
1052 * is present (PDP) or last page in group (LPIG) bit
1053 * is set. This is an additional VT-d feature beyond
1054 * PCI ATS spec.
1055 */
Lu Baolu5d308fc2018-12-10 09:58:58 +08001056 resp.qw0 = QI_PGRP_PASID(req->pasid) |
Jacob Pan5b438f42019-01-11 13:04:57 +08001057 QI_PGRP_DID(req->rid) |
David Woodhousea222a7f2015-10-07 23:35:18 +01001058 QI_PGRP_PASID_P(req->pasid_present) |
Liu, Yi L71cd8e22020-10-30 10:37:24 +08001059 QI_PGRP_PDP(req->priv_data_present) |
Jacob Pan5b438f42019-01-11 13:04:57 +08001060 QI_PGRP_RESP_CODE(result) |
David Woodhousea222a7f2015-10-07 23:35:18 +01001061 QI_PGRP_RESP_TYPE;
Lu Baolu5d308fc2018-12-10 09:58:58 +08001062 resp.qw1 = QI_PGRP_IDX(req->prg_index) |
Jacob Pan5b438f42019-01-11 13:04:57 +08001063 QI_PGRP_LPIG(req->lpig);
1064
1065 if (req->priv_data_present)
1066 memcpy(&resp.qw2, req->priv_data,
1067 sizeof(req->priv_data));
Jacob Pan5f755852020-01-02 08:18:09 +08001068 resp.qw2 = 0;
1069 resp.qw3 = 0;
Lu Baolu8a1d8242020-05-16 14:20:55 +08001070 qi_submit_sync(iommu, &resp, 1, 0);
David Woodhousea222a7f2015-10-07 23:35:18 +01001071 }
Lu Baolueb8d93e2020-07-24 09:49:23 +08001072prq_advance:
David Woodhousea222a7f2015-10-07 23:35:18 +01001073 head = (head + sizeof(*req)) & PRQ_RING_MASK;
1074 }
1075
1076 dmar_writeq(iommu->reg + DMAR_PQH_REG, tail);
1077
Lu Baolu66ac4db2020-05-16 14:20:58 +08001078 /*
1079 * Clear the page request overflow bit and wake up all threads that
1080 * are waiting for the completion of this handling.
1081 */
1082 if (readl(iommu->reg + DMAR_PRS_REG) & DMA_PRS_PRO)
1083 writel(DMA_PRS_PRO, iommu->reg + DMAR_PRS_REG);
1084
1085 if (!completion_done(&iommu->prq_complete))
1086 complete(&iommu->prq_complete);
1087
David Woodhousea222a7f2015-10-07 23:35:18 +01001088 return IRQ_RETVAL(handled);
1089}
Jacob Pan064a57d2020-05-16 14:20:54 +08001090
1091#define to_intel_svm_dev(handle) container_of(handle, struct intel_svm_dev, sva)
1092struct iommu_sva *
1093intel_svm_bind(struct device *dev, struct mm_struct *mm, void *drvdata)
1094{
1095 struct iommu_sva *sva = ERR_PTR(-EINVAL);
1096 struct intel_svm_dev *sdev = NULL;
Fenghua Yu2a5054c2020-09-15 09:30:06 -07001097 unsigned int flags = 0;
Jacob Pan064a57d2020-05-16 14:20:54 +08001098 int ret;
1099
1100 /*
1101 * TODO: Consolidate with generic iommu-sva bind after it is merged.
1102 * It will require shared SVM data structures, i.e. combine io_mm
1103 * and intel_svm etc.
1104 */
1105 if (drvdata)
Fenghua Yu2a5054c2020-09-15 09:30:06 -07001106 flags = *(unsigned int *)drvdata;
Jacob Pan064a57d2020-05-16 14:20:54 +08001107 mutex_lock(&pasid_mutex);
1108 ret = intel_svm_bind_mm(dev, flags, NULL, mm, &sdev);
1109 if (ret)
1110 sva = ERR_PTR(ret);
1111 else if (sdev)
1112 sva = &sdev->sva;
1113 else
1114 WARN(!sdev, "SVM bind succeeded with no sdev!\n");
1115
1116 mutex_unlock(&pasid_mutex);
1117
1118 return sva;
1119}
1120
1121void intel_svm_unbind(struct iommu_sva *sva)
1122{
1123 struct intel_svm_dev *sdev;
1124
1125 mutex_lock(&pasid_mutex);
1126 sdev = to_intel_svm_dev(sva);
1127 intel_svm_unbind_mm(sdev->dev, sdev->pasid);
1128 mutex_unlock(&pasid_mutex);
1129}
1130
Fenghua Yuc7b6bac2020-09-15 09:30:05 -07001131u32 intel_svm_get_pasid(struct iommu_sva *sva)
Jacob Pan064a57d2020-05-16 14:20:54 +08001132{
1133 struct intel_svm_dev *sdev;
Fenghua Yuc7b6bac2020-09-15 09:30:05 -07001134 u32 pasid;
Jacob Pan064a57d2020-05-16 14:20:54 +08001135
1136 mutex_lock(&pasid_mutex);
1137 sdev = to_intel_svm_dev(sva);
1138 pasid = sdev->pasid;
1139 mutex_unlock(&pasid_mutex);
1140
1141 return pasid;
1142}
Lu Baolu8b737122020-07-24 09:49:24 +08001143
1144int intel_svm_page_response(struct device *dev,
1145 struct iommu_fault_event *evt,
1146 struct iommu_page_response *msg)
1147{
1148 struct iommu_fault_page_request *prm;
1149 struct intel_svm_dev *sdev = NULL;
1150 struct intel_svm *svm = NULL;
1151 struct intel_iommu *iommu;
1152 bool private_present;
1153 bool pasid_present;
1154 bool last_page;
1155 u8 bus, devfn;
1156 int ret = 0;
1157 u16 sid;
1158
1159 if (!dev || !dev_is_pci(dev))
1160 return -ENODEV;
1161
1162 iommu = device_to_iommu(dev, &bus, &devfn);
1163 if (!iommu)
1164 return -ENODEV;
1165
1166 if (!msg || !evt)
1167 return -EINVAL;
1168
1169 mutex_lock(&pasid_mutex);
1170
1171 prm = &evt->fault.prm;
1172 sid = PCI_DEVID(bus, devfn);
1173 pasid_present = prm->flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
1174 private_present = prm->flags & IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA;
1175 last_page = prm->flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
1176
1177 if (!pasid_present) {
1178 ret = -EINVAL;
1179 goto out;
1180 }
1181
1182 if (prm->pasid == 0 || prm->pasid >= PASID_MAX) {
1183 ret = -EINVAL;
1184 goto out;
1185 }
1186
1187 ret = pasid_to_svm_sdev(dev, prm->pasid, &svm, &sdev);
1188 if (ret || !sdev) {
1189 ret = -ENODEV;
1190 goto out;
1191 }
1192
1193 /*
1194 * For responses from userspace, need to make sure that the
1195 * pasid has been bound to its mm.
1196 */
1197 if (svm->flags & SVM_FLAG_GUEST_MODE) {
1198 struct mm_struct *mm;
1199
1200 mm = get_task_mm(current);
1201 if (!mm) {
1202 ret = -EINVAL;
1203 goto out;
1204 }
1205
1206 if (mm != svm->mm) {
1207 ret = -ENODEV;
1208 mmput(mm);
1209 goto out;
1210 }
1211
1212 mmput(mm);
1213 }
1214
1215 /*
1216 * Per VT-d spec. v3.0 ch7.7, system software must respond
1217 * with page group response if private data is present (PDP)
1218 * or last page in group (LPIG) bit is set. This is an
1219 * additional VT-d requirement beyond PCI ATS spec.
1220 */
1221 if (last_page || private_present) {
1222 struct qi_desc desc;
1223
1224 desc.qw0 = QI_PGRP_PASID(prm->pasid) | QI_PGRP_DID(sid) |
1225 QI_PGRP_PASID_P(pasid_present) |
1226 QI_PGRP_PDP(private_present) |
1227 QI_PGRP_RESP_CODE(msg->code) |
1228 QI_PGRP_RESP_TYPE;
1229 desc.qw1 = QI_PGRP_IDX(prm->grpid) | QI_PGRP_LPIG(last_page);
1230 desc.qw2 = 0;
1231 desc.qw3 = 0;
1232 if (private_present)
1233 memcpy(&desc.qw2, prm->private_data,
1234 sizeof(prm->private_data));
1235
1236 qi_submit_sync(iommu, &desc, 1, 0);
1237 }
1238out:
1239 mutex_unlock(&pasid_mutex);
1240 return ret;
1241}