blob: b43dc1edcef065085bdd41c855b0043780eea211 [file] [log] [blame]
David Woodhouse8a94ade2015-03-24 14:54:56 +00001/*
2 * Copyright © 2015 Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * Authors: David Woodhouse <dwmw2@infradead.org>
14 */
15
16#include <linux/intel-iommu.h>
David Woodhouse2f26e0a2015-09-09 11:40:47 +010017#include <linux/mmu_notifier.h>
18#include <linux/sched.h>
Ingo Molnar6e84f312017-02-08 18:51:29 +010019#include <linux/sched/mm.h>
David Woodhouse2f26e0a2015-09-09 11:40:47 +010020#include <linux/slab.h>
21#include <linux/intel-svm.h>
22#include <linux/rculist.h>
23#include <linux/pci.h>
24#include <linux/pci-ats.h>
David Woodhousea222a7f2015-10-07 23:35:18 +010025#include <linux/dmar.h>
26#include <linux/interrupt.h>
Ashok Raj9d8c3af2017-08-08 13:29:27 -070027#include <asm/page.h>
David Woodhousea222a7f2015-10-07 23:35:18 +010028
29static irqreturn_t prq_event_thread(int irq, void *d);
David Woodhouse2f26e0a2015-09-09 11:40:47 +010030
31struct pasid_entry {
32 u64 val;
33};
David Woodhouse8a94ade2015-03-24 14:54:56 +000034
David Woodhouse907fea32015-10-13 14:11:13 +010035struct pasid_state_entry {
36 u64 val;
37};
38
David Woodhouse8a94ade2015-03-24 14:54:56 +000039int intel_svm_alloc_pasid_tables(struct intel_iommu *iommu)
40{
41 struct page *pages;
42 int order;
43
Sohil Mehta59103ca2017-12-20 11:59:25 -080044 if (cpu_feature_enabled(X86_FEATURE_GBPAGES) &&
45 !cap_fl1gp_support(iommu->cap))
46 return -EINVAL;
47
Sohil Mehtaf1ac10c2017-12-20 11:59:26 -080048 if (cpu_feature_enabled(X86_FEATURE_LA57) &&
49 !cap_5lp_support(iommu->cap))
50 return -EINVAL;
51
David Woodhouse91017042016-09-12 10:49:11 +080052 /* Start at 2 because it's defined as 2^(1+PSS) */
53 iommu->pasid_max = 2 << ecap_pss(iommu->ecap);
David Woodhouse8a94ade2015-03-24 14:54:56 +000054
David Woodhouse91017042016-09-12 10:49:11 +080055 /* Eventually I'm promised we will get a multi-level PASID table
56 * and it won't have to be physically contiguous. Until then,
57 * limit the size because 8MiB contiguous allocations can be hard
58 * to come by. The limit of 0x20000, which is 1MiB for each of
59 * the PASID and PASID-state tables, is somewhat arbitrary. */
60 if (iommu->pasid_max > 0x20000)
61 iommu->pasid_max = 0x20000;
62
63 order = get_order(sizeof(struct pasid_entry) * iommu->pasid_max);
David Woodhouse8a94ade2015-03-24 14:54:56 +000064 pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, order);
65 if (!pages) {
66 pr_warn("IOMMU: %s: Failed to allocate PASID table\n",
67 iommu->name);
68 return -ENOMEM;
69 }
70 iommu->pasid_table = page_address(pages);
71 pr_info("%s: Allocated order %d PASID table.\n", iommu->name, order);
72
73 if (ecap_dis(iommu->ecap)) {
David Woodhouse91017042016-09-12 10:49:11 +080074 /* Just making it explicit... */
75 BUILD_BUG_ON(sizeof(struct pasid_entry) != sizeof(struct pasid_state_entry));
David Woodhouse8a94ade2015-03-24 14:54:56 +000076 pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, order);
77 if (pages)
78 iommu->pasid_state_table = page_address(pages);
79 else
80 pr_warn("IOMMU: %s: Failed to allocate PASID state table\n",
81 iommu->name);
82 }
83
David Woodhouse2f26e0a2015-09-09 11:40:47 +010084 idr_init(&iommu->pasid_idr);
85
David Woodhouse8a94ade2015-03-24 14:54:56 +000086 return 0;
87}
88
89int intel_svm_free_pasid_tables(struct intel_iommu *iommu)
90{
David Woodhouse91017042016-09-12 10:49:11 +080091 int order = get_order(sizeof(struct pasid_entry) * iommu->pasid_max);
David Woodhouse8a94ade2015-03-24 14:54:56 +000092
93 if (iommu->pasid_table) {
94 free_pages((unsigned long)iommu->pasid_table, order);
95 iommu->pasid_table = NULL;
96 }
97 if (iommu->pasid_state_table) {
98 free_pages((unsigned long)iommu->pasid_state_table, order);
99 iommu->pasid_state_table = NULL;
100 }
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100101 idr_destroy(&iommu->pasid_idr);
David Woodhouse8a94ade2015-03-24 14:54:56 +0000102 return 0;
103}
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100104
David Woodhousea222a7f2015-10-07 23:35:18 +0100105#define PRQ_ORDER 0
106
107int intel_svm_enable_prq(struct intel_iommu *iommu)
108{
109 struct page *pages;
110 int irq, ret;
111
112 pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, PRQ_ORDER);
113 if (!pages) {
114 pr_warn("IOMMU: %s: Failed to allocate page request queue\n",
115 iommu->name);
116 return -ENOMEM;
117 }
118 iommu->prq = page_address(pages);
119
120 irq = dmar_alloc_hwirq(DMAR_UNITS_SUPPORTED + iommu->seq_id, iommu->node, iommu);
121 if (irq <= 0) {
122 pr_err("IOMMU: %s: Failed to create IRQ vector for page request queue\n",
123 iommu->name);
124 ret = -EINVAL;
125 err:
126 free_pages((unsigned long)iommu->prq, PRQ_ORDER);
127 iommu->prq = NULL;
128 return ret;
129 }
130 iommu->pr_irq = irq;
131
132 snprintf(iommu->prq_name, sizeof(iommu->prq_name), "dmar%d-prq", iommu->seq_id);
133
134 ret = request_threaded_irq(irq, NULL, prq_event_thread, IRQF_ONESHOT,
135 iommu->prq_name, iommu);
136 if (ret) {
137 pr_err("IOMMU: %s: Failed to request IRQ for page request queue\n",
138 iommu->name);
139 dmar_free_hwirq(irq);
Jerry Snitselaar72d54812017-12-20 09:48:56 -0700140 iommu->pr_irq = 0;
David Woodhousea222a7f2015-10-07 23:35:18 +0100141 goto err;
142 }
143 dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL);
144 dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL);
145 dmar_writeq(iommu->reg + DMAR_PQA_REG, virt_to_phys(iommu->prq) | PRQ_ORDER);
146
147 return 0;
148}
149
150int intel_svm_finish_prq(struct intel_iommu *iommu)
151{
152 dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL);
153 dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL);
154 dmar_writeq(iommu->reg + DMAR_PQA_REG, 0ULL);
155
Jerry Snitselaar72d54812017-12-20 09:48:56 -0700156 if (iommu->pr_irq) {
157 free_irq(iommu->pr_irq, iommu);
158 dmar_free_hwirq(iommu->pr_irq);
159 iommu->pr_irq = 0;
160 }
David Woodhousea222a7f2015-10-07 23:35:18 +0100161
162 free_pages((unsigned long)iommu->prq, PRQ_ORDER);
163 iommu->prq = NULL;
164
165 return 0;
166}
167
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100168static void intel_flush_svm_range_dev (struct intel_svm *svm, struct intel_svm_dev *sdev,
David Woodhouse5d52f482015-10-20 15:52:13 +0100169 unsigned long address, unsigned long pages, int ih, int gl)
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100170{
171 struct qi_desc desc;
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100172
David Woodhouse5d52f482015-10-20 15:52:13 +0100173 if (pages == -1) {
David Woodhousee0349922015-10-16 19:36:53 +0100174 /* For global kernel pages we have to flush them in *all* PASIDs
175 * because that's the only option the hardware gives us. Despite
176 * the fact that they are actually only accessible through one. */
177 if (gl)
178 desc.low = QI_EIOTLB_PASID(svm->pasid) | QI_EIOTLB_DID(sdev->did) |
179 QI_EIOTLB_GRAN(QI_GRAN_ALL_ALL) | QI_EIOTLB_TYPE;
180 else
181 desc.low = QI_EIOTLB_PASID(svm->pasid) | QI_EIOTLB_DID(sdev->did) |
182 QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) | QI_EIOTLB_TYPE;
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100183 desc.high = 0;
184 } else {
David Woodhouse5d52f482015-10-20 15:52:13 +0100185 int mask = ilog2(__roundup_pow_of_two(pages));
186
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100187 desc.low = QI_EIOTLB_PASID(svm->pasid) | QI_EIOTLB_DID(sdev->did) |
188 QI_EIOTLB_GRAN(QI_GRAN_PSI_PASID) | QI_EIOTLB_TYPE;
David Woodhousee0349922015-10-16 19:36:53 +0100189 desc.high = QI_EIOTLB_ADDR(address) | QI_EIOTLB_GL(gl) |
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100190 QI_EIOTLB_IH(ih) | QI_EIOTLB_AM(mask);
191 }
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100192 qi_submit_sync(&desc, svm->iommu);
193
194 if (sdev->dev_iotlb) {
195 desc.low = QI_DEV_EIOTLB_PASID(svm->pasid) | QI_DEV_EIOTLB_SID(sdev->sid) |
196 QI_DEV_EIOTLB_QDEP(sdev->qdep) | QI_DEIOTLB_TYPE;
David Woodhouse5d52f482015-10-20 15:52:13 +0100197 if (pages == -1) {
198 desc.high = QI_DEV_EIOTLB_ADDR(-1ULL >> 1) | QI_DEV_EIOTLB_SIZE;
199 } else if (pages > 1) {
200 /* The least significant zero bit indicates the size. So,
201 * for example, an "address" value of 0x12345f000 will
202 * flush from 0x123440000 to 0x12347ffff (256KiB). */
203 unsigned long last = address + ((unsigned long)(pages - 1) << VTD_PAGE_SHIFT);
204 unsigned long mask = __rounddown_pow_of_two(address ^ last);;
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100205
David Woodhouse5d52f482015-10-20 15:52:13 +0100206 desc.high = QI_DEV_EIOTLB_ADDR((address & ~mask) | (mask - 1)) | QI_DEV_EIOTLB_SIZE;
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100207 } else {
208 desc.high = QI_DEV_EIOTLB_ADDR(address);
209 }
210 qi_submit_sync(&desc, svm->iommu);
211 }
212}
213
214static void intel_flush_svm_range(struct intel_svm *svm, unsigned long address,
David Woodhouse5d52f482015-10-20 15:52:13 +0100215 unsigned long pages, int ih, int gl)
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100216{
217 struct intel_svm_dev *sdev;
218
David Woodhouse907fea32015-10-13 14:11:13 +0100219 /* Try deferred invalidate if available */
220 if (svm->iommu->pasid_state_table &&
221 !cmpxchg64(&svm->iommu->pasid_state_table[svm->pasid].val, 0, 1ULL << 63))
222 return;
223
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100224 rcu_read_lock();
225 list_for_each_entry_rcu(sdev, &svm->devs, list)
David Woodhousee0349922015-10-16 19:36:53 +0100226 intel_flush_svm_range_dev(svm, sdev, address, pages, ih, gl);
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100227 rcu_read_unlock();
228}
229
230static void intel_change_pte(struct mmu_notifier *mn, struct mm_struct *mm,
231 unsigned long address, pte_t pte)
232{
233 struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
234
David Woodhousee0349922015-10-16 19:36:53 +0100235 intel_flush_svm_range(svm, address, 1, 1, 0);
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100236}
237
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100238/* Pages have been freed at this point */
239static void intel_invalidate_range(struct mmu_notifier *mn,
240 struct mm_struct *mm,
241 unsigned long start, unsigned long end)
242{
243 struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
244
245 intel_flush_svm_range(svm, start,
David Woodhousee0349922015-10-16 19:36:53 +0100246 (end - start + PAGE_SIZE - 1) >> VTD_PAGE_SHIFT, 0, 0);
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100247}
248
249
David Woodhouse5a10ba22015-10-24 21:06:39 +0200250static void intel_flush_pasid_dev(struct intel_svm *svm, struct intel_svm_dev *sdev, int pasid)
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100251{
252 struct qi_desc desc;
253
254 desc.high = 0;
David Woodhouse5a10ba22015-10-24 21:06:39 +0200255 desc.low = QI_PC_TYPE | QI_PC_DID(sdev->did) | QI_PC_PASID_SEL | QI_PC_PASID(pasid);
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100256
257 qi_submit_sync(&desc, svm->iommu);
258}
259
260static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
261{
262 struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
David Woodhousee57e58b2016-01-12 19:18:06 +0000263 struct intel_svm_dev *sdev;
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100264
David Woodhousee57e58b2016-01-12 19:18:06 +0000265 /* This might end up being called from exit_mmap(), *before* the page
266 * tables are cleared. And __mmu_notifier_release() will delete us from
267 * the list of notifiers so that our invalidate_range() callback doesn't
268 * get called when the page tables are cleared. So we need to protect
269 * against hardware accessing those page tables.
270 *
271 * We do it by clearing the entry in the PASID table and then flushing
272 * the IOTLB and the PASID table caches. This might upset hardware;
273 * perhaps we'll want to point the PASID to a dummy PGD (like the zero
274 * page) so that we end up taking a fault that the hardware really
275 * *has* to handle gracefully without affecting other processes.
276 */
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100277 svm->iommu->pasid_table[svm->pasid].val = 0;
David Woodhousee57e58b2016-01-12 19:18:06 +0000278 wmb();
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100279
David Woodhousee57e58b2016-01-12 19:18:06 +0000280 rcu_read_lock();
281 list_for_each_entry_rcu(sdev, &svm->devs, list) {
282 intel_flush_pasid_dev(svm, sdev, svm->pasid);
283 intel_flush_svm_range_dev(svm, sdev, 0, -1, 0, !svm->mm);
284 }
285 rcu_read_unlock();
286
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100287}
288
289static const struct mmu_notifier_ops intel_mmuops = {
290 .release = intel_mm_release,
291 .change_pte = intel_change_pte,
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100292 .invalidate_range = intel_invalidate_range,
293};
294
295static DEFINE_MUTEX(pasid_mutex);
296
David Woodhouse0204a492015-10-13 17:18:10 +0100297int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ops *ops)
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100298{
299 struct intel_iommu *iommu = intel_svm_device_to_iommu(dev);
300 struct intel_svm_dev *sdev;
301 struct intel_svm *svm = NULL;
David Woodhouse5cec7532015-10-15 15:52:15 +0100302 struct mm_struct *mm = NULL;
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100303 int pasid_max;
304 int ret;
305
Lu Baolu2e2e35d2017-11-03 10:51:32 -0600306 if (WARN_ON(!iommu || !iommu->pasid_table))
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100307 return -EINVAL;
308
309 if (dev_is_pci(dev)) {
310 pasid_max = pci_max_pasids(to_pci_dev(dev));
311 if (pasid_max < 0)
312 return -EINVAL;
313 } else
314 pasid_max = 1 << 20;
315
David Woodhouse5cec7532015-10-15 15:52:15 +0100316 if ((flags & SVM_FLAG_SUPERVISOR_MODE)) {
317 if (!ecap_srs(iommu->ecap))
318 return -EINVAL;
319 } else if (pasid) {
320 mm = get_task_mm(current);
321 BUG_ON(!mm);
322 }
323
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100324 mutex_lock(&pasid_mutex);
David Woodhouse569e4f72015-10-15 13:59:14 +0100325 if (pasid && !(flags & SVM_FLAG_PRIVATE_PASID)) {
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100326 int i;
327
328 idr_for_each_entry(&iommu->pasid_idr, svm, i) {
David Woodhouse5cec7532015-10-15 15:52:15 +0100329 if (svm->mm != mm ||
David Woodhouse569e4f72015-10-15 13:59:14 +0100330 (svm->flags & SVM_FLAG_PRIVATE_PASID))
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100331 continue;
332
333 if (svm->pasid >= pasid_max) {
334 dev_warn(dev,
335 "Limited PASID width. Cannot use existing PASID %d\n",
336 svm->pasid);
337 ret = -ENOSPC;
338 goto out;
339 }
340
341 list_for_each_entry(sdev, &svm->devs, list) {
342 if (dev == sdev->dev) {
David Woodhouse0204a492015-10-13 17:18:10 +0100343 if (sdev->ops != ops) {
344 ret = -EBUSY;
345 goto out;
346 }
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100347 sdev->users++;
348 goto success;
349 }
350 }
351
352 break;
353 }
354 }
355
356 sdev = kzalloc(sizeof(*sdev), GFP_KERNEL);
357 if (!sdev) {
358 ret = -ENOMEM;
359 goto out;
360 }
361 sdev->dev = dev;
362
363 ret = intel_iommu_enable_pasid(iommu, sdev);
364 if (ret || !pasid) {
365 /* If they don't actually want to assign a PASID, this is
366 * just an enabling check/preparation. */
367 kfree(sdev);
368 goto out;
369 }
370 /* Finish the setup now we know we're keeping it */
371 sdev->users = 1;
David Woodhouse0204a492015-10-13 17:18:10 +0100372 sdev->ops = ops;
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100373 init_rcu_head(&sdev->rcu);
374
375 if (!svm) {
376 svm = kzalloc(sizeof(*svm), GFP_KERNEL);
377 if (!svm) {
378 ret = -ENOMEM;
379 kfree(sdev);
380 goto out;
381 }
382 svm->iommu = iommu;
383
David Woodhouse91017042016-09-12 10:49:11 +0800384 if (pasid_max > iommu->pasid_max)
385 pasid_max = iommu->pasid_max;
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100386
David Woodhouse5a10ba22015-10-24 21:06:39 +0200387 /* Do not use PASID 0 in caching mode (virtualised IOMMU) */
388 ret = idr_alloc(&iommu->pasid_idr, svm,
389 !!cap_caching_mode(iommu->cap),
390 pasid_max - 1, GFP_KERNEL);
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100391 if (ret < 0) {
392 kfree(svm);
393 goto out;
394 }
395 svm->pasid = ret;
396 svm->notifier.ops = &intel_mmuops;
David Woodhouse5cec7532015-10-15 15:52:15 +0100397 svm->mm = mm;
David Woodhouse569e4f72015-10-15 13:59:14 +0100398 svm->flags = flags;
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100399 INIT_LIST_HEAD_RCU(&svm->devs);
400 ret = -ENOMEM;
David Woodhouse5cec7532015-10-15 15:52:15 +0100401 if (mm) {
402 ret = mmu_notifier_register(&svm->notifier, mm);
403 if (ret) {
404 idr_remove(&svm->iommu->pasid_idr, svm->pasid);
405 kfree(svm);
406 kfree(sdev);
407 goto out;
408 }
409 iommu->pasid_table[svm->pasid].val = (u64)__pa(mm->pgd) | 1;
David Woodhouse5cec7532015-10-15 15:52:15 +0100410 } else
411 iommu->pasid_table[svm->pasid].val = (u64)__pa(init_mm.pgd) | 1 | (1ULL << 11);
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100412 wmb();
David Woodhouse5a10ba22015-10-24 21:06:39 +0200413 /* In caching mode, we still have to flush with PASID 0 when
414 * a PASID table entry becomes present. Not entirely clear
415 * *why* that would be the case — surely we could just issue
416 * a flush with the PASID value that we've changed? The PASID
417 * is the index into the table, after all. It's not like domain
418 * IDs in the case of the equivalent context-entry change in
419 * caching mode. And for that matter it's not entirely clear why
420 * a VMM would be in the business of caching the PASID table
421 * anyway. Surely that can be left entirely to the guest? */
422 if (cap_caching_mode(iommu->cap))
423 intel_flush_pasid_dev(svm, sdev, 0);
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100424 }
425 list_add_rcu(&sdev->list, &svm->devs);
426
427 success:
428 *pasid = svm->pasid;
429 ret = 0;
430 out:
431 mutex_unlock(&pasid_mutex);
David Woodhouse5cec7532015-10-15 15:52:15 +0100432 if (mm)
433 mmput(mm);
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100434 return ret;
435}
436EXPORT_SYMBOL_GPL(intel_svm_bind_mm);
437
438int intel_svm_unbind_mm(struct device *dev, int pasid)
439{
440 struct intel_svm_dev *sdev;
441 struct intel_iommu *iommu;
442 struct intel_svm *svm;
443 int ret = -EINVAL;
444
445 mutex_lock(&pasid_mutex);
446 iommu = intel_svm_device_to_iommu(dev);
447 if (!iommu || !iommu->pasid_table)
448 goto out;
449
450 svm = idr_find(&iommu->pasid_idr, pasid);
451 if (!svm)
452 goto out;
453
454 list_for_each_entry(sdev, &svm->devs, list) {
455 if (dev == sdev->dev) {
456 ret = 0;
457 sdev->users--;
458 if (!sdev->users) {
459 list_del_rcu(&sdev->list);
460 /* Flush the PASID cache and IOTLB for this device.
461 * Note that we do depend on the hardware *not* using
462 * the PASID any more. Just as we depend on other
463 * devices never using PASIDs that they have no right
464 * to use. We have a *shared* PASID table, because it's
465 * large and has to be physically contiguous. So it's
466 * hard to be as defensive as we might like. */
David Woodhouse5a10ba22015-10-24 21:06:39 +0200467 intel_flush_pasid_dev(svm, sdev, svm->pasid);
David Woodhousee0349922015-10-16 19:36:53 +0100468 intel_flush_svm_range_dev(svm, sdev, 0, -1, 0, !svm->mm);
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100469 kfree_rcu(sdev, rcu);
470
471 if (list_empty(&svm->devs)) {
Lu Baolu4fa064b2017-11-03 10:51:34 -0600472 svm->iommu->pasid_table[svm->pasid].val = 0;
473 wmb();
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100474
475 idr_remove(&svm->iommu->pasid_idr, svm->pasid);
David Woodhouse5cec7532015-10-15 15:52:15 +0100476 if (svm->mm)
David Woodhousee57e58b2016-01-12 19:18:06 +0000477 mmu_notifier_unregister(&svm->notifier, svm->mm);
478
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100479 /* We mandate that no page faults may be outstanding
480 * for the PASID when intel_svm_unbind_mm() is called.
481 * If that is not obeyed, subtle errors will happen.
482 * Let's make them less subtle... */
483 memset(svm, 0x6b, sizeof(*svm));
484 kfree(svm);
485 }
486 }
487 break;
488 }
489 }
490 out:
491 mutex_unlock(&pasid_mutex);
492
493 return ret;
494}
495EXPORT_SYMBOL_GPL(intel_svm_unbind_mm);
David Woodhousea222a7f2015-10-07 23:35:18 +0100496
CQ Tang15060ab2017-05-10 11:39:03 -0700497int intel_svm_is_pasid_valid(struct device *dev, int pasid)
498{
499 struct intel_iommu *iommu;
500 struct intel_svm *svm;
501 int ret = -EINVAL;
502
503 mutex_lock(&pasid_mutex);
504 iommu = intel_svm_device_to_iommu(dev);
505 if (!iommu || !iommu->pasid_table)
506 goto out;
507
508 svm = idr_find(&iommu->pasid_idr, pasid);
509 if (!svm)
510 goto out;
511
512 /* init_mm is used in this case */
513 if (!svm->mm)
514 ret = 1;
515 else if (atomic_read(&svm->mm->mm_users) > 0)
516 ret = 1;
517 else
518 ret = 0;
519
520 out:
521 mutex_unlock(&pasid_mutex);
522
523 return ret;
524}
525EXPORT_SYMBOL_GPL(intel_svm_is_pasid_valid);
526
David Woodhousea222a7f2015-10-07 23:35:18 +0100527/* Page request queue descriptor */
528struct page_req_dsc {
529 u64 srr:1;
530 u64 bof:1;
531 u64 pasid_present:1;
532 u64 lpig:1;
533 u64 pasid:20;
534 u64 bus:8;
535 u64 private:23;
536 u64 prg_index:9;
537 u64 rd_req:1;
538 u64 wr_req:1;
539 u64 exe_req:1;
540 u64 priv_req:1;
541 u64 devfn:8;
542 u64 addr:52;
543};
544
545#define PRQ_RING_MASK ((0x1000 << PRQ_ORDER) - 0x10)
Joerg Roedel7f8312a2015-11-17 16:11:39 +0100546
547static bool access_error(struct vm_area_struct *vma, struct page_req_dsc *req)
548{
549 unsigned long requested = 0;
550
551 if (req->exe_req)
552 requested |= VM_EXEC;
553
554 if (req->rd_req)
555 requested |= VM_READ;
556
557 if (req->wr_req)
558 requested |= VM_WRITE;
559
560 return (requested & ~vma->vm_flags) != 0;
561}
562
Ashok Raj9d8c3af2017-08-08 13:29:27 -0700563static bool is_canonical_address(u64 addr)
564{
565 int shift = 64 - (__VIRTUAL_MASK_SHIFT + 1);
566 long saddr = (long) addr;
567
568 return (((saddr << shift) >> shift) == saddr);
569}
570
David Woodhousea222a7f2015-10-07 23:35:18 +0100571static irqreturn_t prq_event_thread(int irq, void *d)
572{
573 struct intel_iommu *iommu = d;
574 struct intel_svm *svm = NULL;
575 int head, tail, handled = 0;
576
David Woodhouse46924002016-02-15 12:42:38 +0000577 /* Clear PPR bit before reading head/tail registers, to
578 * ensure that we get a new interrupt if needed. */
579 writel(DMA_PRS_PPR, iommu->reg + DMAR_PRS_REG);
580
David Woodhousea222a7f2015-10-07 23:35:18 +0100581 tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
582 head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
583 while (head != tail) {
David Woodhouse0204a492015-10-13 17:18:10 +0100584 struct intel_svm_dev *sdev;
David Woodhousea222a7f2015-10-07 23:35:18 +0100585 struct vm_area_struct *vma;
586 struct page_req_dsc *req;
587 struct qi_desc resp;
588 int ret, result;
589 u64 address;
590
591 handled = 1;
592
593 req = &iommu->prq[head / sizeof(*req)];
594
595 result = QI_RESP_FAILURE;
David Woodhouse7f92a2e2015-10-16 17:22:31 +0100596 address = (u64)req->addr << VTD_PAGE_SHIFT;
David Woodhousea222a7f2015-10-07 23:35:18 +0100597 if (!req->pasid_present) {
598 pr_err("%s: Page request without PASID: %08llx %08llx\n",
599 iommu->name, ((unsigned long long *)req)[0],
600 ((unsigned long long *)req)[1]);
601 goto bad_req;
602 }
603
604 if (!svm || svm->pasid != req->pasid) {
605 rcu_read_lock();
606 svm = idr_find(&iommu->pasid_idr, req->pasid);
607 /* It *can't* go away, because the driver is not permitted
608 * to unbind the mm while any page faults are outstanding.
609 * So we only need RCU to protect the internal idr code. */
610 rcu_read_unlock();
611
612 if (!svm) {
613 pr_err("%s: Page request for invalid PASID %d: %08llx %08llx\n",
614 iommu->name, req->pasid, ((unsigned long long *)req)[0],
615 ((unsigned long long *)req)[1]);
David Woodhouse26322ab2015-10-15 21:12:56 +0100616 goto no_pasid;
David Woodhousea222a7f2015-10-07 23:35:18 +0100617 }
618 }
619
620 result = QI_RESP_INVALID;
David Woodhouse5cec7532015-10-15 15:52:15 +0100621 /* Since we're using init_mm.pgd directly, we should never take
622 * any faults on kernel addresses. */
623 if (!svm->mm)
624 goto bad_req;
David Woodhousee57e58b2016-01-12 19:18:06 +0000625 /* If the mm is already defunct, don't handle faults. */
Vegard Nossum388f7932017-02-27 14:30:13 -0800626 if (!mmget_not_zero(svm->mm))
David Woodhousee57e58b2016-01-12 19:18:06 +0000627 goto bad_req;
Ashok Raj9d8c3af2017-08-08 13:29:27 -0700628
629 /* If address is not canonical, return invalid response */
630 if (!is_canonical_address(address))
631 goto bad_req;
632
David Woodhousea222a7f2015-10-07 23:35:18 +0100633 down_read(&svm->mm->mmap_sem);
634 vma = find_extend_vma(svm->mm, address);
635 if (!vma || address < vma->vm_start)
636 goto invalid;
637
Joerg Roedel7f8312a2015-11-17 16:11:39 +0100638 if (access_error(vma, req))
639 goto invalid;
640
Kirill A. Shutemovdcddffd2016-07-26 15:25:18 -0700641 ret = handle_mm_fault(vma, address,
David Woodhousea222a7f2015-10-07 23:35:18 +0100642 req->wr_req ? FAULT_FLAG_WRITE : 0);
643 if (ret & VM_FAULT_ERROR)
644 goto invalid;
645
646 result = QI_RESP_SUCCESS;
647 invalid:
648 up_read(&svm->mm->mmap_sem);
David Woodhousee57e58b2016-01-12 19:18:06 +0000649 mmput(svm->mm);
David Woodhousea222a7f2015-10-07 23:35:18 +0100650 bad_req:
651 /* Accounting for major/minor faults? */
David Woodhouse0204a492015-10-13 17:18:10 +0100652 rcu_read_lock();
653 list_for_each_entry_rcu(sdev, &svm->devs, list) {
Dan Carpenter3c7c2f32015-10-17 08:18:47 +0300654 if (sdev->sid == PCI_DEVID(req->bus, req->devfn))
David Woodhouse0204a492015-10-13 17:18:10 +0100655 break;
656 }
657 /* Other devices can go away, but the drivers are not permitted
658 * to unbind while any page faults might be in flight. So it's
659 * OK to drop the 'lock' here now we have it. */
660 rcu_read_unlock();
661
662 if (WARN_ON(&sdev->list == &svm->devs))
663 sdev = NULL;
664
665 if (sdev && sdev->ops && sdev->ops->fault_cb) {
666 int rwxp = (req->rd_req << 3) | (req->wr_req << 2) |
David Woodhouse0bdec952015-10-28 15:14:09 +0900667 (req->exe_req << 1) | (req->priv_req);
David Woodhouse0204a492015-10-13 17:18:10 +0100668 sdev->ops->fault_cb(sdev->dev, req->pasid, req->addr, req->private, rwxp, result);
669 }
David Woodhouse26322ab2015-10-15 21:12:56 +0100670 /* We get here in the error case where the PASID lookup failed,
671 and these can be NULL. Do not use them below this point! */
672 sdev = NULL;
673 svm = NULL;
674 no_pasid:
David Woodhousea222a7f2015-10-07 23:35:18 +0100675 if (req->lpig) {
676 /* Page Group Response */
677 resp.low = QI_PGRP_PASID(req->pasid) |
678 QI_PGRP_DID((req->bus << 8) | req->devfn) |
679 QI_PGRP_PASID_P(req->pasid_present) |
680 QI_PGRP_RESP_TYPE;
681 resp.high = QI_PGRP_IDX(req->prg_index) |
682 QI_PGRP_PRIV(req->private) | QI_PGRP_RESP_CODE(result);
683
David Woodhouse26322ab2015-10-15 21:12:56 +0100684 qi_submit_sync(&resp, iommu);
David Woodhousea222a7f2015-10-07 23:35:18 +0100685 } else if (req->srr) {
686 /* Page Stream Response */
687 resp.low = QI_PSTRM_IDX(req->prg_index) |
688 QI_PSTRM_PRIV(req->private) | QI_PSTRM_BUS(req->bus) |
689 QI_PSTRM_PASID(req->pasid) | QI_PSTRM_RESP_TYPE;
690 resp.high = QI_PSTRM_ADDR(address) | QI_PSTRM_DEVFN(req->devfn) |
691 QI_PSTRM_RESP_CODE(result);
692
David Woodhouse26322ab2015-10-15 21:12:56 +0100693 qi_submit_sync(&resp, iommu);
David Woodhousea222a7f2015-10-07 23:35:18 +0100694 }
695
696 head = (head + sizeof(*req)) & PRQ_RING_MASK;
697 }
698
699 dmar_writeq(iommu->reg + DMAR_PQH_REG, tail);
700
701 return IRQ_RETVAL(handled);
702}