blob: 358dd406fe21eb1e40a58f57813e5641a1c1ae7a [file] [log] [blame]
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001/*
2 * Copyright (c) 2006, Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
16 *
17 * Copyright (C) Ashok Raj <ashok.raj@intel.com>
18 * Copyright (C) Shaohua Li <shaohua.li@intel.com>
19 * Copyright (C) Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
20 */
21
22#include <linux/init.h>
23#include <linux/bitmap.h>
24#include <linux/slab.h>
25#include <linux/irq.h>
26#include <linux/interrupt.h>
27#include <linux/sysdev.h>
28#include <linux/spinlock.h>
29#include <linux/pci.h>
30#include <linux/dmar.h>
31#include <linux/dma-mapping.h>
32#include <linux/mempool.h>
33#include "iova.h"
34#include "intel-iommu.h"
35#include <asm/proto.h> /* force_iommu in this header in x86-64*/
36#include <asm/cacheflush.h>
37#include <asm/iommu.h>
38#include "pci.h"
39
40#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
41#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
42
43#define IOAPIC_RANGE_START (0xfee00000)
44#define IOAPIC_RANGE_END (0xfeefffff)
45#define IOVA_START_ADDR (0x1000)
46
47#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
48
49#define DMAR_OPERATION_TIMEOUT (HZ*60) /* 1m */
50
51#define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
52
53static void domain_remove_dev_info(struct dmar_domain *domain);
54
55static int dmar_disabled;
56static int __initdata dmar_map_gfx = 1;
Keshavamurthy, Anil S7d3b03c2007-10-21 16:41:53 -070057static int dmar_forcedac;
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -070058
59#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
60static DEFINE_SPINLOCK(device_domain_lock);
61static LIST_HEAD(device_domain_list);
62
63static int __init intel_iommu_setup(char *str)
64{
65 if (!str)
66 return -EINVAL;
67 while (*str) {
68 if (!strncmp(str, "off", 3)) {
69 dmar_disabled = 1;
70 printk(KERN_INFO"Intel-IOMMU: disabled\n");
71 } else if (!strncmp(str, "igfx_off", 8)) {
72 dmar_map_gfx = 0;
73 printk(KERN_INFO
74 "Intel-IOMMU: disable GFX device mapping\n");
Keshavamurthy, Anil S7d3b03c2007-10-21 16:41:53 -070075 } else if (!strncmp(str, "forcedac", 8)) {
76 printk (KERN_INFO
77 "Intel-IOMMU: Forcing DAC for PCI devices\n");
78 dmar_forcedac = 1;
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -070079 }
80
81 str += strcspn(str, ",");
82 while (*str == ',')
83 str++;
84 }
85 return 0;
86}
87__setup("intel_iommu=", intel_iommu_setup);
88
89static struct kmem_cache *iommu_domain_cache;
90static struct kmem_cache *iommu_devinfo_cache;
91static struct kmem_cache *iommu_iova_cache;
92
Keshavamurthy, Anil Seb3fa7c2007-10-21 16:41:52 -070093static inline void *iommu_kmem_cache_alloc(struct kmem_cache *cachep)
94{
95 unsigned int flags;
96 void *vaddr;
97
98 /* trying to avoid low memory issues */
99 flags = current->flags & PF_MEMALLOC;
100 current->flags |= PF_MEMALLOC;
101 vaddr = kmem_cache_alloc(cachep, GFP_ATOMIC);
102 current->flags &= (~PF_MEMALLOC | flags);
103 return vaddr;
104}
105
106
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -0700107static inline void *alloc_pgtable_page(void)
108{
Keshavamurthy, Anil Seb3fa7c2007-10-21 16:41:52 -0700109 unsigned int flags;
110 void *vaddr;
111
112 /* trying to avoid low memory issues */
113 flags = current->flags & PF_MEMALLOC;
114 current->flags |= PF_MEMALLOC;
115 vaddr = (void *)get_zeroed_page(GFP_ATOMIC);
116 current->flags &= (~PF_MEMALLOC | flags);
117 return vaddr;
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -0700118}
119
120static inline void free_pgtable_page(void *vaddr)
121{
122 free_page((unsigned long)vaddr);
123}
124
125static inline void *alloc_domain_mem(void)
126{
Keshavamurthy, Anil Seb3fa7c2007-10-21 16:41:52 -0700127 return iommu_kmem_cache_alloc(iommu_domain_cache);
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -0700128}
129
130static inline void free_domain_mem(void *vaddr)
131{
132 kmem_cache_free(iommu_domain_cache, vaddr);
133}
134
135static inline void * alloc_devinfo_mem(void)
136{
Keshavamurthy, Anil Seb3fa7c2007-10-21 16:41:52 -0700137 return iommu_kmem_cache_alloc(iommu_devinfo_cache);
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -0700138}
139
140static inline void free_devinfo_mem(void *vaddr)
141{
142 kmem_cache_free(iommu_devinfo_cache, vaddr);
143}
144
145struct iova *alloc_iova_mem(void)
146{
Keshavamurthy, Anil Seb3fa7c2007-10-21 16:41:52 -0700147 return iommu_kmem_cache_alloc(iommu_iova_cache);
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -0700148}
149
150void free_iova_mem(struct iova *iova)
151{
152 kmem_cache_free(iommu_iova_cache, iova);
153}
154
155static inline void __iommu_flush_cache(
156 struct intel_iommu *iommu, void *addr, int size)
157{
158 if (!ecap_coherent(iommu->ecap))
159 clflush_cache_range(addr, size);
160}
161
162/* Gets context entry for a given bus and devfn */
163static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
164 u8 bus, u8 devfn)
165{
166 struct root_entry *root;
167 struct context_entry *context;
168 unsigned long phy_addr;
169 unsigned long flags;
170
171 spin_lock_irqsave(&iommu->lock, flags);
172 root = &iommu->root_entry[bus];
173 context = get_context_addr_from_root(root);
174 if (!context) {
175 context = (struct context_entry *)alloc_pgtable_page();
176 if (!context) {
177 spin_unlock_irqrestore(&iommu->lock, flags);
178 return NULL;
179 }
180 __iommu_flush_cache(iommu, (void *)context, PAGE_SIZE_4K);
181 phy_addr = virt_to_phys((void *)context);
182 set_root_value(root, phy_addr);
183 set_root_present(root);
184 __iommu_flush_cache(iommu, root, sizeof(*root));
185 }
186 spin_unlock_irqrestore(&iommu->lock, flags);
187 return &context[devfn];
188}
189
190static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
191{
192 struct root_entry *root;
193 struct context_entry *context;
194 int ret;
195 unsigned long flags;
196
197 spin_lock_irqsave(&iommu->lock, flags);
198 root = &iommu->root_entry[bus];
199 context = get_context_addr_from_root(root);
200 if (!context) {
201 ret = 0;
202 goto out;
203 }
204 ret = context_present(context[devfn]);
205out:
206 spin_unlock_irqrestore(&iommu->lock, flags);
207 return ret;
208}
209
210static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
211{
212 struct root_entry *root;
213 struct context_entry *context;
214 unsigned long flags;
215
216 spin_lock_irqsave(&iommu->lock, flags);
217 root = &iommu->root_entry[bus];
218 context = get_context_addr_from_root(root);
219 if (context) {
220 context_clear_entry(context[devfn]);
221 __iommu_flush_cache(iommu, &context[devfn], \
222 sizeof(*context));
223 }
224 spin_unlock_irqrestore(&iommu->lock, flags);
225}
226
227static void free_context_table(struct intel_iommu *iommu)
228{
229 struct root_entry *root;
230 int i;
231 unsigned long flags;
232 struct context_entry *context;
233
234 spin_lock_irqsave(&iommu->lock, flags);
235 if (!iommu->root_entry) {
236 goto out;
237 }
238 for (i = 0; i < ROOT_ENTRY_NR; i++) {
239 root = &iommu->root_entry[i];
240 context = get_context_addr_from_root(root);
241 if (context)
242 free_pgtable_page(context);
243 }
244 free_pgtable_page(iommu->root_entry);
245 iommu->root_entry = NULL;
246out:
247 spin_unlock_irqrestore(&iommu->lock, flags);
248}
249
250/* page table handling */
251#define LEVEL_STRIDE (9)
252#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
253
254static inline int agaw_to_level(int agaw)
255{
256 return agaw + 2;
257}
258
259static inline int agaw_to_width(int agaw)
260{
261 return 30 + agaw * LEVEL_STRIDE;
262
263}
264
265static inline int width_to_agaw(int width)
266{
267 return (width - 30) / LEVEL_STRIDE;
268}
269
270static inline unsigned int level_to_offset_bits(int level)
271{
272 return (12 + (level - 1) * LEVEL_STRIDE);
273}
274
275static inline int address_level_offset(u64 addr, int level)
276{
277 return ((addr >> level_to_offset_bits(level)) & LEVEL_MASK);
278}
279
280static inline u64 level_mask(int level)
281{
282 return ((u64)-1 << level_to_offset_bits(level));
283}
284
285static inline u64 level_size(int level)
286{
287 return ((u64)1 << level_to_offset_bits(level));
288}
289
290static inline u64 align_to_level(u64 addr, int level)
291{
292 return ((addr + level_size(level) - 1) & level_mask(level));
293}
294
295static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr)
296{
297 int addr_width = agaw_to_width(domain->agaw);
298 struct dma_pte *parent, *pte = NULL;
299 int level = agaw_to_level(domain->agaw);
300 int offset;
301 unsigned long flags;
302
303 BUG_ON(!domain->pgd);
304
305 addr &= (((u64)1) << addr_width) - 1;
306 parent = domain->pgd;
307
308 spin_lock_irqsave(&domain->mapping_lock, flags);
309 while (level > 0) {
310 void *tmp_page;
311
312 offset = address_level_offset(addr, level);
313 pte = &parent[offset];
314 if (level == 1)
315 break;
316
317 if (!dma_pte_present(*pte)) {
318 tmp_page = alloc_pgtable_page();
319
320 if (!tmp_page) {
321 spin_unlock_irqrestore(&domain->mapping_lock,
322 flags);
323 return NULL;
324 }
325 __iommu_flush_cache(domain->iommu, tmp_page,
326 PAGE_SIZE_4K);
327 dma_set_pte_addr(*pte, virt_to_phys(tmp_page));
328 /*
329 * high level table always sets r/w, last level page
330 * table control read/write
331 */
332 dma_set_pte_readable(*pte);
333 dma_set_pte_writable(*pte);
334 __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
335 }
336 parent = phys_to_virt(dma_pte_addr(*pte));
337 level--;
338 }
339
340 spin_unlock_irqrestore(&domain->mapping_lock, flags);
341 return pte;
342}
343
344/* return address's pte at specific level */
345static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr,
346 int level)
347{
348 struct dma_pte *parent, *pte = NULL;
349 int total = agaw_to_level(domain->agaw);
350 int offset;
351
352 parent = domain->pgd;
353 while (level <= total) {
354 offset = address_level_offset(addr, total);
355 pte = &parent[offset];
356 if (level == total)
357 return pte;
358
359 if (!dma_pte_present(*pte))
360 break;
361 parent = phys_to_virt(dma_pte_addr(*pte));
362 total--;
363 }
364 return NULL;
365}
366
367/* clear one page's page table */
368static void dma_pte_clear_one(struct dmar_domain *domain, u64 addr)
369{
370 struct dma_pte *pte = NULL;
371
372 /* get last level pte */
373 pte = dma_addr_level_pte(domain, addr, 1);
374
375 if (pte) {
376 dma_clear_pte(*pte);
377 __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
378 }
379}
380
381/* clear last level pte, a tlb flush should be followed */
382static void dma_pte_clear_range(struct dmar_domain *domain, u64 start, u64 end)
383{
384 int addr_width = agaw_to_width(domain->agaw);
385
386 start &= (((u64)1) << addr_width) - 1;
387 end &= (((u64)1) << addr_width) - 1;
388 /* in case it's partial page */
389 start = PAGE_ALIGN_4K(start);
390 end &= PAGE_MASK_4K;
391
392 /* we don't need lock here, nobody else touches the iova range */
393 while (start < end) {
394 dma_pte_clear_one(domain, start);
395 start += PAGE_SIZE_4K;
396 }
397}
398
399/* free page table pages. last level pte should already be cleared */
400static void dma_pte_free_pagetable(struct dmar_domain *domain,
401 u64 start, u64 end)
402{
403 int addr_width = agaw_to_width(domain->agaw);
404 struct dma_pte *pte;
405 int total = agaw_to_level(domain->agaw);
406 int level;
407 u64 tmp;
408
409 start &= (((u64)1) << addr_width) - 1;
410 end &= (((u64)1) << addr_width) - 1;
411
412 /* we don't need lock here, nobody else touches the iova range */
413 level = 2;
414 while (level <= total) {
415 tmp = align_to_level(start, level);
416 if (tmp >= end || (tmp + level_size(level) > end))
417 return;
418
419 while (tmp < end) {
420 pte = dma_addr_level_pte(domain, tmp, level);
421 if (pte) {
422 free_pgtable_page(
423 phys_to_virt(dma_pte_addr(*pte)));
424 dma_clear_pte(*pte);
425 __iommu_flush_cache(domain->iommu,
426 pte, sizeof(*pte));
427 }
428 tmp += level_size(level);
429 }
430 level++;
431 }
432 /* free pgd */
433 if (start == 0 && end >= ((((u64)1) << addr_width) - 1)) {
434 free_pgtable_page(domain->pgd);
435 domain->pgd = NULL;
436 }
437}
438
439/* iommu handling */
440static int iommu_alloc_root_entry(struct intel_iommu *iommu)
441{
442 struct root_entry *root;
443 unsigned long flags;
444
445 root = (struct root_entry *)alloc_pgtable_page();
446 if (!root)
447 return -ENOMEM;
448
449 __iommu_flush_cache(iommu, root, PAGE_SIZE_4K);
450
451 spin_lock_irqsave(&iommu->lock, flags);
452 iommu->root_entry = root;
453 spin_unlock_irqrestore(&iommu->lock, flags);
454
455 return 0;
456}
457
458#define IOMMU_WAIT_OP(iommu, offset, op, cond, sts) \
459{\
460 unsigned long start_time = jiffies;\
461 while (1) {\
462 sts = op (iommu->reg + offset);\
463 if (cond)\
464 break;\
465 if (time_after(jiffies, start_time + DMAR_OPERATION_TIMEOUT))\
466 panic("DMAR hardware is malfunctioning\n");\
467 cpu_relax();\
468 }\
469}
470
471static void iommu_set_root_entry(struct intel_iommu *iommu)
472{
473 void *addr;
474 u32 cmd, sts;
475 unsigned long flag;
476
477 addr = iommu->root_entry;
478
479 spin_lock_irqsave(&iommu->register_lock, flag);
480 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
481
482 cmd = iommu->gcmd | DMA_GCMD_SRTP;
483 writel(cmd, iommu->reg + DMAR_GCMD_REG);
484
485 /* Make sure hardware complete it */
486 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
487 readl, (sts & DMA_GSTS_RTPS), sts);
488
489 spin_unlock_irqrestore(&iommu->register_lock, flag);
490}
491
492static void iommu_flush_write_buffer(struct intel_iommu *iommu)
493{
494 u32 val;
495 unsigned long flag;
496
497 if (!cap_rwbf(iommu->cap))
498 return;
499 val = iommu->gcmd | DMA_GCMD_WBF;
500
501 spin_lock_irqsave(&iommu->register_lock, flag);
502 writel(val, iommu->reg + DMAR_GCMD_REG);
503
504 /* Make sure hardware complete it */
505 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
506 readl, (!(val & DMA_GSTS_WBFS)), val);
507
508 spin_unlock_irqrestore(&iommu->register_lock, flag);
509}
510
511/* return value determine if we need a write buffer flush */
512static int __iommu_flush_context(struct intel_iommu *iommu,
513 u16 did, u16 source_id, u8 function_mask, u64 type,
514 int non_present_entry_flush)
515{
516 u64 val = 0;
517 unsigned long flag;
518
519 /*
520 * In the non-present entry flush case, if hardware doesn't cache
521 * non-present entry we do nothing and if hardware cache non-present
522 * entry, we flush entries of domain 0 (the domain id is used to cache
523 * any non-present entries)
524 */
525 if (non_present_entry_flush) {
526 if (!cap_caching_mode(iommu->cap))
527 return 1;
528 else
529 did = 0;
530 }
531
532 switch (type) {
533 case DMA_CCMD_GLOBAL_INVL:
534 val = DMA_CCMD_GLOBAL_INVL;
535 break;
536 case DMA_CCMD_DOMAIN_INVL:
537 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
538 break;
539 case DMA_CCMD_DEVICE_INVL:
540 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
541 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
542 break;
543 default:
544 BUG();
545 }
546 val |= DMA_CCMD_ICC;
547
548 spin_lock_irqsave(&iommu->register_lock, flag);
549 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
550
551 /* Make sure hardware complete it */
552 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
553 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
554
555 spin_unlock_irqrestore(&iommu->register_lock, flag);
556
557 /* flush context entry will implictly flush write buffer */
558 return 0;
559}
560
561static int inline iommu_flush_context_global(struct intel_iommu *iommu,
562 int non_present_entry_flush)
563{
564 return __iommu_flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL,
565 non_present_entry_flush);
566}
567
568static int inline iommu_flush_context_domain(struct intel_iommu *iommu, u16 did,
569 int non_present_entry_flush)
570{
571 return __iommu_flush_context(iommu, did, 0, 0, DMA_CCMD_DOMAIN_INVL,
572 non_present_entry_flush);
573}
574
575static int inline iommu_flush_context_device(struct intel_iommu *iommu,
576 u16 did, u16 source_id, u8 function_mask, int non_present_entry_flush)
577{
578 return __iommu_flush_context(iommu, did, source_id, function_mask,
579 DMA_CCMD_DEVICE_INVL, non_present_entry_flush);
580}
581
582/* return value determine if we need a write buffer flush */
583static int __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
584 u64 addr, unsigned int size_order, u64 type,
585 int non_present_entry_flush)
586{
587 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
588 u64 val = 0, val_iva = 0;
589 unsigned long flag;
590
591 /*
592 * In the non-present entry flush case, if hardware doesn't cache
593 * non-present entry we do nothing and if hardware cache non-present
594 * entry, we flush entries of domain 0 (the domain id is used to cache
595 * any non-present entries)
596 */
597 if (non_present_entry_flush) {
598 if (!cap_caching_mode(iommu->cap))
599 return 1;
600 else
601 did = 0;
602 }
603
604 switch (type) {
605 case DMA_TLB_GLOBAL_FLUSH:
606 /* global flush doesn't need set IVA_REG */
607 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
608 break;
609 case DMA_TLB_DSI_FLUSH:
610 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
611 break;
612 case DMA_TLB_PSI_FLUSH:
613 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
614 /* Note: always flush non-leaf currently */
615 val_iva = size_order | addr;
616 break;
617 default:
618 BUG();
619 }
620 /* Note: set drain read/write */
621#if 0
622 /*
623 * This is probably to be super secure.. Looks like we can
624 * ignore it without any impact.
625 */
626 if (cap_read_drain(iommu->cap))
627 val |= DMA_TLB_READ_DRAIN;
628#endif
629 if (cap_write_drain(iommu->cap))
630 val |= DMA_TLB_WRITE_DRAIN;
631
632 spin_lock_irqsave(&iommu->register_lock, flag);
633 /* Note: Only uses first TLB reg currently */
634 if (val_iva)
635 dmar_writeq(iommu->reg + tlb_offset, val_iva);
636 dmar_writeq(iommu->reg + tlb_offset + 8, val);
637
638 /* Make sure hardware complete it */
639 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
640 dmar_readq, (!(val & DMA_TLB_IVT)), val);
641
642 spin_unlock_irqrestore(&iommu->register_lock, flag);
643
644 /* check IOTLB invalidation granularity */
645 if (DMA_TLB_IAIG(val) == 0)
646 printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
647 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
648 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
649 DMA_TLB_IIRG(type), DMA_TLB_IAIG(val));
650 /* flush context entry will implictly flush write buffer */
651 return 0;
652}
653
654static int inline iommu_flush_iotlb_global(struct intel_iommu *iommu,
655 int non_present_entry_flush)
656{
657 return __iommu_flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH,
658 non_present_entry_flush);
659}
660
661static int inline iommu_flush_iotlb_dsi(struct intel_iommu *iommu, u16 did,
662 int non_present_entry_flush)
663{
664 return __iommu_flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH,
665 non_present_entry_flush);
666}
667
668static int iommu_get_alignment(u64 base, unsigned int size)
669{
670 int t = 0;
671 u64 end;
672
673 end = base + size - 1;
674 while (base != end) {
675 t++;
676 base >>= 1;
677 end >>= 1;
678 }
679 return t;
680}
681
682static int iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
683 u64 addr, unsigned int pages, int non_present_entry_flush)
684{
685 unsigned int align;
686
687 BUG_ON(addr & (~PAGE_MASK_4K));
688 BUG_ON(pages == 0);
689
690 /* Fallback to domain selective flush if no PSI support */
691 if (!cap_pgsel_inv(iommu->cap))
692 return iommu_flush_iotlb_dsi(iommu, did,
693 non_present_entry_flush);
694
695 /*
696 * PSI requires page size to be 2 ^ x, and the base address is naturally
697 * aligned to the size
698 */
699 align = iommu_get_alignment(addr >> PAGE_SHIFT_4K, pages);
700 /* Fallback to domain selective flush if size is too big */
701 if (align > cap_max_amask_val(iommu->cap))
702 return iommu_flush_iotlb_dsi(iommu, did,
703 non_present_entry_flush);
704
705 addr >>= PAGE_SHIFT_4K + align;
706 addr <<= PAGE_SHIFT_4K + align;
707
708 return __iommu_flush_iotlb(iommu, did, addr, align,
709 DMA_TLB_PSI_FLUSH, non_present_entry_flush);
710}
711
712static int iommu_enable_translation(struct intel_iommu *iommu)
713{
714 u32 sts;
715 unsigned long flags;
716
717 spin_lock_irqsave(&iommu->register_lock, flags);
718 writel(iommu->gcmd|DMA_GCMD_TE, iommu->reg + DMAR_GCMD_REG);
719
720 /* Make sure hardware complete it */
721 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
722 readl, (sts & DMA_GSTS_TES), sts);
723
724 iommu->gcmd |= DMA_GCMD_TE;
725 spin_unlock_irqrestore(&iommu->register_lock, flags);
726 return 0;
727}
728
729static int iommu_disable_translation(struct intel_iommu *iommu)
730{
731 u32 sts;
732 unsigned long flag;
733
734 spin_lock_irqsave(&iommu->register_lock, flag);
735 iommu->gcmd &= ~DMA_GCMD_TE;
736 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
737
738 /* Make sure hardware complete it */
739 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
740 readl, (!(sts & DMA_GSTS_TES)), sts);
741
742 spin_unlock_irqrestore(&iommu->register_lock, flag);
743 return 0;
744}
745
Keshavamurthy, Anil S3460a6d2007-10-21 16:41:54 -0700746/* iommu interrupt handling. Most stuff are MSI-like. */
747
748static char *fault_reason_strings[] =
749{
750 "Software",
751 "Present bit in root entry is clear",
752 "Present bit in context entry is clear",
753 "Invalid context entry",
754 "Access beyond MGAW",
755 "PTE Write access is not set",
756 "PTE Read access is not set",
757 "Next page table ptr is invalid",
758 "Root table address invalid",
759 "Context table ptr is invalid",
760 "non-zero reserved fields in RTP",
761 "non-zero reserved fields in CTP",
762 "non-zero reserved fields in PTE",
763 "Unknown"
764};
765#define MAX_FAULT_REASON_IDX ARRAY_SIZE(fault_reason_strings)
766
767char *dmar_get_fault_reason(u8 fault_reason)
768{
769 if (fault_reason > MAX_FAULT_REASON_IDX)
770 return fault_reason_strings[MAX_FAULT_REASON_IDX];
771 else
772 return fault_reason_strings[fault_reason];
773}
774
775void dmar_msi_unmask(unsigned int irq)
776{
777 struct intel_iommu *iommu = get_irq_data(irq);
778 unsigned long flag;
779
780 /* unmask it */
781 spin_lock_irqsave(&iommu->register_lock, flag);
782 writel(0, iommu->reg + DMAR_FECTL_REG);
783 /* Read a reg to force flush the post write */
784 readl(iommu->reg + DMAR_FECTL_REG);
785 spin_unlock_irqrestore(&iommu->register_lock, flag);
786}
787
788void dmar_msi_mask(unsigned int irq)
789{
790 unsigned long flag;
791 struct intel_iommu *iommu = get_irq_data(irq);
792
793 /* mask it */
794 spin_lock_irqsave(&iommu->register_lock, flag);
795 writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
796 /* Read a reg to force flush the post write */
797 readl(iommu->reg + DMAR_FECTL_REG);
798 spin_unlock_irqrestore(&iommu->register_lock, flag);
799}
800
801void dmar_msi_write(int irq, struct msi_msg *msg)
802{
803 struct intel_iommu *iommu = get_irq_data(irq);
804 unsigned long flag;
805
806 spin_lock_irqsave(&iommu->register_lock, flag);
807 writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
808 writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
809 writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
810 spin_unlock_irqrestore(&iommu->register_lock, flag);
811}
812
813void dmar_msi_read(int irq, struct msi_msg *msg)
814{
815 struct intel_iommu *iommu = get_irq_data(irq);
816 unsigned long flag;
817
818 spin_lock_irqsave(&iommu->register_lock, flag);
819 msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
820 msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
821 msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
822 spin_unlock_irqrestore(&iommu->register_lock, flag);
823}
824
825static int iommu_page_fault_do_one(struct intel_iommu *iommu, int type,
826 u8 fault_reason, u16 source_id, u64 addr)
827{
828 char *reason;
829
830 reason = dmar_get_fault_reason(fault_reason);
831
832 printk(KERN_ERR
833 "DMAR:[%s] Request device [%02x:%02x.%d] "
834 "fault addr %llx \n"
835 "DMAR:[fault reason %02d] %s\n",
836 (type ? "DMA Read" : "DMA Write"),
837 (source_id >> 8), PCI_SLOT(source_id & 0xFF),
838 PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
839 return 0;
840}
841
842#define PRIMARY_FAULT_REG_LEN (16)
843static irqreturn_t iommu_page_fault(int irq, void *dev_id)
844{
845 struct intel_iommu *iommu = dev_id;
846 int reg, fault_index;
847 u32 fault_status;
848 unsigned long flag;
849
850 spin_lock_irqsave(&iommu->register_lock, flag);
851 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
852
853 /* TBD: ignore advanced fault log currently */
854 if (!(fault_status & DMA_FSTS_PPF))
855 goto clear_overflow;
856
857 fault_index = dma_fsts_fault_record_index(fault_status);
858 reg = cap_fault_reg_offset(iommu->cap);
859 while (1) {
860 u8 fault_reason;
861 u16 source_id;
862 u64 guest_addr;
863 int type;
864 u32 data;
865
866 /* highest 32 bits */
867 data = readl(iommu->reg + reg +
868 fault_index * PRIMARY_FAULT_REG_LEN + 12);
869 if (!(data & DMA_FRCD_F))
870 break;
871
872 fault_reason = dma_frcd_fault_reason(data);
873 type = dma_frcd_type(data);
874
875 data = readl(iommu->reg + reg +
876 fault_index * PRIMARY_FAULT_REG_LEN + 8);
877 source_id = dma_frcd_source_id(data);
878
879 guest_addr = dmar_readq(iommu->reg + reg +
880 fault_index * PRIMARY_FAULT_REG_LEN);
881 guest_addr = dma_frcd_page_addr(guest_addr);
882 /* clear the fault */
883 writel(DMA_FRCD_F, iommu->reg + reg +
884 fault_index * PRIMARY_FAULT_REG_LEN + 12);
885
886 spin_unlock_irqrestore(&iommu->register_lock, flag);
887
888 iommu_page_fault_do_one(iommu, type, fault_reason,
889 source_id, guest_addr);
890
891 fault_index++;
892 if (fault_index > cap_num_fault_regs(iommu->cap))
893 fault_index = 0;
894 spin_lock_irqsave(&iommu->register_lock, flag);
895 }
896clear_overflow:
897 /* clear primary fault overflow */
898 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
899 if (fault_status & DMA_FSTS_PFO)
900 writel(DMA_FSTS_PFO, iommu->reg + DMAR_FSTS_REG);
901
902 spin_unlock_irqrestore(&iommu->register_lock, flag);
903 return IRQ_HANDLED;
904}
905
906int dmar_set_interrupt(struct intel_iommu *iommu)
907{
908 int irq, ret;
909
910 irq = create_irq();
911 if (!irq) {
912 printk(KERN_ERR "IOMMU: no free vectors\n");
913 return -EINVAL;
914 }
915
916 set_irq_data(irq, iommu);
917 iommu->irq = irq;
918
919 ret = arch_setup_dmar_msi(irq);
920 if (ret) {
921 set_irq_data(irq, NULL);
922 iommu->irq = 0;
923 destroy_irq(irq);
924 return 0;
925 }
926
927 /* Force fault register is cleared */
928 iommu_page_fault(irq, iommu);
929
930 ret = request_irq(irq, iommu_page_fault, 0, iommu->name, iommu);
931 if (ret)
932 printk(KERN_ERR "IOMMU: can't request irq\n");
933 return ret;
934}
935
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -0700936static int iommu_init_domains(struct intel_iommu *iommu)
937{
938 unsigned long ndomains;
939 unsigned long nlongs;
940
941 ndomains = cap_ndoms(iommu->cap);
942 pr_debug("Number of Domains supportd <%ld>\n", ndomains);
943 nlongs = BITS_TO_LONGS(ndomains);
944
945 /* TBD: there might be 64K domains,
946 * consider other allocation for future chip
947 */
948 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
949 if (!iommu->domain_ids) {
950 printk(KERN_ERR "Allocating domain id array failed\n");
951 return -ENOMEM;
952 }
953 iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
954 GFP_KERNEL);
955 if (!iommu->domains) {
956 printk(KERN_ERR "Allocating domain array failed\n");
957 kfree(iommu->domain_ids);
958 return -ENOMEM;
959 }
960
961 /*
962 * if Caching mode is set, then invalid translations are tagged
963 * with domainid 0. Hence we need to pre-allocate it.
964 */
965 if (cap_caching_mode(iommu->cap))
966 set_bit(0, iommu->domain_ids);
967 return 0;
968}
969
970static struct intel_iommu *alloc_iommu(struct dmar_drhd_unit *drhd)
971{
972 struct intel_iommu *iommu;
973 int ret;
974 int map_size;
975 u32 ver;
976
977 iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
978 if (!iommu)
979 return NULL;
980 iommu->reg = ioremap(drhd->reg_base_addr, PAGE_SIZE_4K);
981 if (!iommu->reg) {
982 printk(KERN_ERR "IOMMU: can't map the region\n");
983 goto error;
984 }
985 iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
986 iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
987
988 /* the registers might be more than one page */
989 map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
990 cap_max_fault_reg_offset(iommu->cap));
991 map_size = PAGE_ALIGN_4K(map_size);
992 if (map_size > PAGE_SIZE_4K) {
993 iounmap(iommu->reg);
994 iommu->reg = ioremap(drhd->reg_base_addr, map_size);
995 if (!iommu->reg) {
996 printk(KERN_ERR "IOMMU: can't map the region\n");
997 goto error;
998 }
999 }
1000
1001 ver = readl(iommu->reg + DMAR_VER_REG);
1002 pr_debug("IOMMU %llx: ver %d:%d cap %llx ecap %llx\n",
1003 drhd->reg_base_addr, DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
1004 iommu->cap, iommu->ecap);
1005 ret = iommu_init_domains(iommu);
1006 if (ret)
1007 goto error_unmap;
1008 spin_lock_init(&iommu->lock);
1009 spin_lock_init(&iommu->register_lock);
1010
1011 drhd->iommu = iommu;
1012 return iommu;
1013error_unmap:
1014 iounmap(iommu->reg);
1015 iommu->reg = 0;
1016error:
1017 kfree(iommu);
1018 return NULL;
1019}
1020
1021static void domain_exit(struct dmar_domain *domain);
1022static void free_iommu(struct intel_iommu *iommu)
1023{
1024 struct dmar_domain *domain;
1025 int i;
1026
1027 if (!iommu)
1028 return;
1029
1030 i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap));
1031 for (; i < cap_ndoms(iommu->cap); ) {
1032 domain = iommu->domains[i];
1033 clear_bit(i, iommu->domain_ids);
1034 domain_exit(domain);
1035 i = find_next_bit(iommu->domain_ids,
1036 cap_ndoms(iommu->cap), i+1);
1037 }
1038
1039 if (iommu->gcmd & DMA_GCMD_TE)
1040 iommu_disable_translation(iommu);
1041
1042 if (iommu->irq) {
1043 set_irq_data(iommu->irq, NULL);
1044 /* This will mask the irq */
1045 free_irq(iommu->irq, iommu);
1046 destroy_irq(iommu->irq);
1047 }
1048
1049 kfree(iommu->domains);
1050 kfree(iommu->domain_ids);
1051
1052 /* free context mapping */
1053 free_context_table(iommu);
1054
1055 if (iommu->reg)
1056 iounmap(iommu->reg);
1057 kfree(iommu);
1058}
1059
1060static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu)
1061{
1062 unsigned long num;
1063 unsigned long ndomains;
1064 struct dmar_domain *domain;
1065 unsigned long flags;
1066
1067 domain = alloc_domain_mem();
1068 if (!domain)
1069 return NULL;
1070
1071 ndomains = cap_ndoms(iommu->cap);
1072
1073 spin_lock_irqsave(&iommu->lock, flags);
1074 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1075 if (num >= ndomains) {
1076 spin_unlock_irqrestore(&iommu->lock, flags);
1077 free_domain_mem(domain);
1078 printk(KERN_ERR "IOMMU: no free domain ids\n");
1079 return NULL;
1080 }
1081
1082 set_bit(num, iommu->domain_ids);
1083 domain->id = num;
1084 domain->iommu = iommu;
1085 iommu->domains[num] = domain;
1086 spin_unlock_irqrestore(&iommu->lock, flags);
1087
1088 return domain;
1089}
1090
1091static void iommu_free_domain(struct dmar_domain *domain)
1092{
1093 unsigned long flags;
1094
1095 spin_lock_irqsave(&domain->iommu->lock, flags);
1096 clear_bit(domain->id, domain->iommu->domain_ids);
1097 spin_unlock_irqrestore(&domain->iommu->lock, flags);
1098}
1099
1100static struct iova_domain reserved_iova_list;
1101
1102static void dmar_init_reserved_ranges(void)
1103{
1104 struct pci_dev *pdev = NULL;
1105 struct iova *iova;
1106 int i;
1107 u64 addr, size;
1108
1109 init_iova_domain(&reserved_iova_list);
1110
1111 /* IOAPIC ranges shouldn't be accessed by DMA */
1112 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1113 IOVA_PFN(IOAPIC_RANGE_END));
1114 if (!iova)
1115 printk(KERN_ERR "Reserve IOAPIC range failed\n");
1116
1117 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1118 for_each_pci_dev(pdev) {
1119 struct resource *r;
1120
1121 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1122 r = &pdev->resource[i];
1123 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1124 continue;
1125 addr = r->start;
1126 addr &= PAGE_MASK_4K;
1127 size = r->end - addr;
1128 size = PAGE_ALIGN_4K(size);
1129 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(addr),
1130 IOVA_PFN(size + addr) - 1);
1131 if (!iova)
1132 printk(KERN_ERR "Reserve iova failed\n");
1133 }
1134 }
1135
1136}
1137
1138static void domain_reserve_special_ranges(struct dmar_domain *domain)
1139{
1140 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1141}
1142
1143static inline int guestwidth_to_adjustwidth(int gaw)
1144{
1145 int agaw;
1146 int r = (gaw - 12) % 9;
1147
1148 if (r == 0)
1149 agaw = gaw;
1150 else
1151 agaw = gaw + 9 - r;
1152 if (agaw > 64)
1153 agaw = 64;
1154 return agaw;
1155}
1156
1157static int domain_init(struct dmar_domain *domain, int guest_width)
1158{
1159 struct intel_iommu *iommu;
1160 int adjust_width, agaw;
1161 unsigned long sagaw;
1162
1163 init_iova_domain(&domain->iovad);
1164 spin_lock_init(&domain->mapping_lock);
1165
1166 domain_reserve_special_ranges(domain);
1167
1168 /* calculate AGAW */
1169 iommu = domain->iommu;
1170 if (guest_width > cap_mgaw(iommu->cap))
1171 guest_width = cap_mgaw(iommu->cap);
1172 domain->gaw = guest_width;
1173 adjust_width = guestwidth_to_adjustwidth(guest_width);
1174 agaw = width_to_agaw(adjust_width);
1175 sagaw = cap_sagaw(iommu->cap);
1176 if (!test_bit(agaw, &sagaw)) {
1177 /* hardware doesn't support it, choose a bigger one */
1178 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1179 agaw = find_next_bit(&sagaw, 5, agaw);
1180 if (agaw >= 5)
1181 return -ENODEV;
1182 }
1183 domain->agaw = agaw;
1184 INIT_LIST_HEAD(&domain->devices);
1185
1186 /* always allocate the top pgd */
1187 domain->pgd = (struct dma_pte *)alloc_pgtable_page();
1188 if (!domain->pgd)
1189 return -ENOMEM;
1190 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE_4K);
1191 return 0;
1192}
1193
1194static void domain_exit(struct dmar_domain *domain)
1195{
1196 u64 end;
1197
1198 /* Domain 0 is reserved, so dont process it */
1199 if (!domain)
1200 return;
1201
1202 domain_remove_dev_info(domain);
1203 /* destroy iovas */
1204 put_iova_domain(&domain->iovad);
1205 end = DOMAIN_MAX_ADDR(domain->gaw);
1206 end = end & (~PAGE_MASK_4K);
1207
1208 /* clear ptes */
1209 dma_pte_clear_range(domain, 0, end);
1210
1211 /* free page tables */
1212 dma_pte_free_pagetable(domain, 0, end);
1213
1214 iommu_free_domain(domain);
1215 free_domain_mem(domain);
1216}
1217
1218static int domain_context_mapping_one(struct dmar_domain *domain,
1219 u8 bus, u8 devfn)
1220{
1221 struct context_entry *context;
1222 struct intel_iommu *iommu = domain->iommu;
1223 unsigned long flags;
1224
1225 pr_debug("Set context mapping for %02x:%02x.%d\n",
1226 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1227 BUG_ON(!domain->pgd);
1228 context = device_to_context_entry(iommu, bus, devfn);
1229 if (!context)
1230 return -ENOMEM;
1231 spin_lock_irqsave(&iommu->lock, flags);
1232 if (context_present(*context)) {
1233 spin_unlock_irqrestore(&iommu->lock, flags);
1234 return 0;
1235 }
1236
1237 context_set_domain_id(*context, domain->id);
1238 context_set_address_width(*context, domain->agaw);
1239 context_set_address_root(*context, virt_to_phys(domain->pgd));
1240 context_set_translation_type(*context, CONTEXT_TT_MULTI_LEVEL);
1241 context_set_fault_enable(*context);
1242 context_set_present(*context);
1243 __iommu_flush_cache(iommu, context, sizeof(*context));
1244
1245 /* it's a non-present to present mapping */
1246 if (iommu_flush_context_device(iommu, domain->id,
1247 (((u16)bus) << 8) | devfn, DMA_CCMD_MASK_NOBIT, 1))
1248 iommu_flush_write_buffer(iommu);
1249 else
1250 iommu_flush_iotlb_dsi(iommu, 0, 0);
1251 spin_unlock_irqrestore(&iommu->lock, flags);
1252 return 0;
1253}
1254
1255static int
1256domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev)
1257{
1258 int ret;
1259 struct pci_dev *tmp, *parent;
1260
1261 ret = domain_context_mapping_one(domain, pdev->bus->number,
1262 pdev->devfn);
1263 if (ret)
1264 return ret;
1265
1266 /* dependent device mapping */
1267 tmp = pci_find_upstream_pcie_bridge(pdev);
1268 if (!tmp)
1269 return 0;
1270 /* Secondary interface's bus number and devfn 0 */
1271 parent = pdev->bus->self;
1272 while (parent != tmp) {
1273 ret = domain_context_mapping_one(domain, parent->bus->number,
1274 parent->devfn);
1275 if (ret)
1276 return ret;
1277 parent = parent->bus->self;
1278 }
1279 if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */
1280 return domain_context_mapping_one(domain,
1281 tmp->subordinate->number, 0);
1282 else /* this is a legacy PCI bridge */
1283 return domain_context_mapping_one(domain,
1284 tmp->bus->number, tmp->devfn);
1285}
1286
1287static int domain_context_mapped(struct dmar_domain *domain,
1288 struct pci_dev *pdev)
1289{
1290 int ret;
1291 struct pci_dev *tmp, *parent;
1292
1293 ret = device_context_mapped(domain->iommu,
1294 pdev->bus->number, pdev->devfn);
1295 if (!ret)
1296 return ret;
1297 /* dependent device mapping */
1298 tmp = pci_find_upstream_pcie_bridge(pdev);
1299 if (!tmp)
1300 return ret;
1301 /* Secondary interface's bus number and devfn 0 */
1302 parent = pdev->bus->self;
1303 while (parent != tmp) {
1304 ret = device_context_mapped(domain->iommu, parent->bus->number,
1305 parent->devfn);
1306 if (!ret)
1307 return ret;
1308 parent = parent->bus->self;
1309 }
1310 if (tmp->is_pcie)
1311 return device_context_mapped(domain->iommu,
1312 tmp->subordinate->number, 0);
1313 else
1314 return device_context_mapped(domain->iommu,
1315 tmp->bus->number, tmp->devfn);
1316}
1317
1318static int
1319domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova,
1320 u64 hpa, size_t size, int prot)
1321{
1322 u64 start_pfn, end_pfn;
1323 struct dma_pte *pte;
1324 int index;
1325
1326 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1327 return -EINVAL;
1328 iova &= PAGE_MASK_4K;
1329 start_pfn = ((u64)hpa) >> PAGE_SHIFT_4K;
1330 end_pfn = (PAGE_ALIGN_4K(((u64)hpa) + size)) >> PAGE_SHIFT_4K;
1331 index = 0;
1332 while (start_pfn < end_pfn) {
1333 pte = addr_to_dma_pte(domain, iova + PAGE_SIZE_4K * index);
1334 if (!pte)
1335 return -ENOMEM;
1336 /* We don't need lock here, nobody else
1337 * touches the iova range
1338 */
1339 BUG_ON(dma_pte_addr(*pte));
1340 dma_set_pte_addr(*pte, start_pfn << PAGE_SHIFT_4K);
1341 dma_set_pte_prot(*pte, prot);
1342 __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
1343 start_pfn++;
1344 index++;
1345 }
1346 return 0;
1347}
1348
1349static void detach_domain_for_dev(struct dmar_domain *domain, u8 bus, u8 devfn)
1350{
1351 clear_context_table(domain->iommu, bus, devfn);
1352 iommu_flush_context_global(domain->iommu, 0);
1353 iommu_flush_iotlb_global(domain->iommu, 0);
1354}
1355
1356static void domain_remove_dev_info(struct dmar_domain *domain)
1357{
1358 struct device_domain_info *info;
1359 unsigned long flags;
1360
1361 spin_lock_irqsave(&device_domain_lock, flags);
1362 while (!list_empty(&domain->devices)) {
1363 info = list_entry(domain->devices.next,
1364 struct device_domain_info, link);
1365 list_del(&info->link);
1366 list_del(&info->global);
1367 if (info->dev)
1368 info->dev->sysdata = NULL;
1369 spin_unlock_irqrestore(&device_domain_lock, flags);
1370
1371 detach_domain_for_dev(info->domain, info->bus, info->devfn);
1372 free_devinfo_mem(info);
1373
1374 spin_lock_irqsave(&device_domain_lock, flags);
1375 }
1376 spin_unlock_irqrestore(&device_domain_lock, flags);
1377}
1378
1379/*
1380 * find_domain
1381 * Note: we use struct pci_dev->sysdata stores the info
1382 */
1383struct dmar_domain *
1384find_domain(struct pci_dev *pdev)
1385{
1386 struct device_domain_info *info;
1387
1388 /* No lock here, assumes no domain exit in normal case */
1389 info = pdev->sysdata;
1390 if (info)
1391 return info->domain;
1392 return NULL;
1393}
1394
1395static int dmar_pci_device_match(struct pci_dev *devices[], int cnt,
1396 struct pci_dev *dev)
1397{
1398 int index;
1399
1400 while (dev) {
1401 for (index = 0; index < cnt; index ++)
1402 if (dev == devices[index])
1403 return 1;
1404
1405 /* Check our parent */
1406 dev = dev->bus->self;
1407 }
1408
1409 return 0;
1410}
1411
1412static struct dmar_drhd_unit *
1413dmar_find_matched_drhd_unit(struct pci_dev *dev)
1414{
1415 struct dmar_drhd_unit *drhd = NULL;
1416
1417 list_for_each_entry(drhd, &dmar_drhd_units, list) {
1418 if (drhd->include_all || dmar_pci_device_match(drhd->devices,
1419 drhd->devices_cnt, dev))
1420 return drhd;
1421 }
1422
1423 return NULL;
1424}
1425
1426/* domain is initialized */
1427static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1428{
1429 struct dmar_domain *domain, *found = NULL;
1430 struct intel_iommu *iommu;
1431 struct dmar_drhd_unit *drhd;
1432 struct device_domain_info *info, *tmp;
1433 struct pci_dev *dev_tmp;
1434 unsigned long flags;
1435 int bus = 0, devfn = 0;
1436
1437 domain = find_domain(pdev);
1438 if (domain)
1439 return domain;
1440
1441 dev_tmp = pci_find_upstream_pcie_bridge(pdev);
1442 if (dev_tmp) {
1443 if (dev_tmp->is_pcie) {
1444 bus = dev_tmp->subordinate->number;
1445 devfn = 0;
1446 } else {
1447 bus = dev_tmp->bus->number;
1448 devfn = dev_tmp->devfn;
1449 }
1450 spin_lock_irqsave(&device_domain_lock, flags);
1451 list_for_each_entry(info, &device_domain_list, global) {
1452 if (info->bus == bus && info->devfn == devfn) {
1453 found = info->domain;
1454 break;
1455 }
1456 }
1457 spin_unlock_irqrestore(&device_domain_lock, flags);
1458 /* pcie-pci bridge already has a domain, uses it */
1459 if (found) {
1460 domain = found;
1461 goto found_domain;
1462 }
1463 }
1464
1465 /* Allocate new domain for the device */
1466 drhd = dmar_find_matched_drhd_unit(pdev);
1467 if (!drhd) {
1468 printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n",
1469 pci_name(pdev));
1470 return NULL;
1471 }
1472 iommu = drhd->iommu;
1473
1474 domain = iommu_alloc_domain(iommu);
1475 if (!domain)
1476 goto error;
1477
1478 if (domain_init(domain, gaw)) {
1479 domain_exit(domain);
1480 goto error;
1481 }
1482
1483 /* register pcie-to-pci device */
1484 if (dev_tmp) {
1485 info = alloc_devinfo_mem();
1486 if (!info) {
1487 domain_exit(domain);
1488 goto error;
1489 }
1490 info->bus = bus;
1491 info->devfn = devfn;
1492 info->dev = NULL;
1493 info->domain = domain;
1494 /* This domain is shared by devices under p2p bridge */
1495 domain->flags |= DOMAIN_FLAG_MULTIPLE_DEVICES;
1496
1497 /* pcie-to-pci bridge already has a domain, uses it */
1498 found = NULL;
1499 spin_lock_irqsave(&device_domain_lock, flags);
1500 list_for_each_entry(tmp, &device_domain_list, global) {
1501 if (tmp->bus == bus && tmp->devfn == devfn) {
1502 found = tmp->domain;
1503 break;
1504 }
1505 }
1506 if (found) {
1507 free_devinfo_mem(info);
1508 domain_exit(domain);
1509 domain = found;
1510 } else {
1511 list_add(&info->link, &domain->devices);
1512 list_add(&info->global, &device_domain_list);
1513 }
1514 spin_unlock_irqrestore(&device_domain_lock, flags);
1515 }
1516
1517found_domain:
1518 info = alloc_devinfo_mem();
1519 if (!info)
1520 goto error;
1521 info->bus = pdev->bus->number;
1522 info->devfn = pdev->devfn;
1523 info->dev = pdev;
1524 info->domain = domain;
1525 spin_lock_irqsave(&device_domain_lock, flags);
1526 /* somebody is fast */
1527 found = find_domain(pdev);
1528 if (found != NULL) {
1529 spin_unlock_irqrestore(&device_domain_lock, flags);
1530 if (found != domain) {
1531 domain_exit(domain);
1532 domain = found;
1533 }
1534 free_devinfo_mem(info);
1535 return domain;
1536 }
1537 list_add(&info->link, &domain->devices);
1538 list_add(&info->global, &device_domain_list);
1539 pdev->sysdata = info;
1540 spin_unlock_irqrestore(&device_domain_lock, flags);
1541 return domain;
1542error:
1543 /* recheck it here, maybe others set it */
1544 return find_domain(pdev);
1545}
1546
1547static int iommu_prepare_identity_map(struct pci_dev *pdev, u64 start, u64 end)
1548{
1549 struct dmar_domain *domain;
1550 unsigned long size;
1551 u64 base;
1552 int ret;
1553
1554 printk(KERN_INFO
1555 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
1556 pci_name(pdev), start, end);
1557 /* page table init */
1558 domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
1559 if (!domain)
1560 return -ENOMEM;
1561
1562 /* The address might not be aligned */
1563 base = start & PAGE_MASK_4K;
1564 size = end - base;
1565 size = PAGE_ALIGN_4K(size);
1566 if (!reserve_iova(&domain->iovad, IOVA_PFN(base),
1567 IOVA_PFN(base + size) - 1)) {
1568 printk(KERN_ERR "IOMMU: reserve iova failed\n");
1569 ret = -ENOMEM;
1570 goto error;
1571 }
1572
1573 pr_debug("Mapping reserved region %lx@%llx for %s\n",
1574 size, base, pci_name(pdev));
1575 /*
1576 * RMRR range might have overlap with physical memory range,
1577 * clear it first
1578 */
1579 dma_pte_clear_range(domain, base, base + size);
1580
1581 ret = domain_page_mapping(domain, base, base, size,
1582 DMA_PTE_READ|DMA_PTE_WRITE);
1583 if (ret)
1584 goto error;
1585
1586 /* context entry init */
1587 ret = domain_context_mapping(domain, pdev);
1588 if (!ret)
1589 return 0;
1590error:
1591 domain_exit(domain);
1592 return ret;
1593
1594}
1595
1596static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
1597 struct pci_dev *pdev)
1598{
1599 if (pdev->sysdata == DUMMY_DEVICE_DOMAIN_INFO)
1600 return 0;
1601 return iommu_prepare_identity_map(pdev, rmrr->base_address,
1602 rmrr->end_address + 1);
1603}
1604
1605int __init init_dmars(void)
1606{
1607 struct dmar_drhd_unit *drhd;
1608 struct dmar_rmrr_unit *rmrr;
1609 struct pci_dev *pdev;
1610 struct intel_iommu *iommu;
1611 int ret, unit = 0;
1612
1613 /*
1614 * for each drhd
1615 * allocate root
1616 * initialize and program root entry to not present
1617 * endfor
1618 */
1619 for_each_drhd_unit(drhd) {
1620 if (drhd->ignored)
1621 continue;
1622 iommu = alloc_iommu(drhd);
1623 if (!iommu) {
1624 ret = -ENOMEM;
1625 goto error;
1626 }
1627
1628 /*
1629 * TBD:
1630 * we could share the same root & context tables
1631 * amoung all IOMMU's. Need to Split it later.
1632 */
1633 ret = iommu_alloc_root_entry(iommu);
1634 if (ret) {
1635 printk(KERN_ERR "IOMMU: allocate root entry failed\n");
1636 goto error;
1637 }
1638 }
1639
1640 /*
1641 * For each rmrr
1642 * for each dev attached to rmrr
1643 * do
1644 * locate drhd for dev, alloc domain for dev
1645 * allocate free domain
1646 * allocate page table entries for rmrr
1647 * if context not allocated for bus
1648 * allocate and init context
1649 * set present in root table for this bus
1650 * init context with domain, translation etc
1651 * endfor
1652 * endfor
1653 */
1654 for_each_rmrr_units(rmrr) {
1655 int i;
1656 for (i = 0; i < rmrr->devices_cnt; i++) {
1657 pdev = rmrr->devices[i];
1658 /* some BIOS lists non-exist devices in DMAR table */
1659 if (!pdev)
1660 continue;
1661 ret = iommu_prepare_rmrr_dev(rmrr, pdev);
1662 if (ret)
1663 printk(KERN_ERR
1664 "IOMMU: mapping reserved region failed\n");
1665 }
1666 }
1667
1668 /*
1669 * for each drhd
1670 * enable fault log
1671 * global invalidate context cache
1672 * global invalidate iotlb
1673 * enable translation
1674 */
1675 for_each_drhd_unit(drhd) {
1676 if (drhd->ignored)
1677 continue;
1678 iommu = drhd->iommu;
1679 sprintf (iommu->name, "dmar%d", unit++);
1680
1681 iommu_flush_write_buffer(iommu);
1682
Keshavamurthy, Anil S3460a6d2007-10-21 16:41:54 -07001683 ret = dmar_set_interrupt(iommu);
1684 if (ret)
1685 goto error;
1686
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001687 iommu_set_root_entry(iommu);
1688
1689 iommu_flush_context_global(iommu, 0);
1690 iommu_flush_iotlb_global(iommu, 0);
1691
1692 ret = iommu_enable_translation(iommu);
1693 if (ret)
1694 goto error;
1695 }
1696
1697 return 0;
1698error:
1699 for_each_drhd_unit(drhd) {
1700 if (drhd->ignored)
1701 continue;
1702 iommu = drhd->iommu;
1703 free_iommu(iommu);
1704 }
1705 return ret;
1706}
1707
1708static inline u64 aligned_size(u64 host_addr, size_t size)
1709{
1710 u64 addr;
1711 addr = (host_addr & (~PAGE_MASK_4K)) + size;
1712 return PAGE_ALIGN_4K(addr);
1713}
1714
1715struct iova *
1716iommu_alloc_iova(struct dmar_domain *domain, void *host_addr, size_t size,
1717 u64 start, u64 end)
1718{
1719 u64 start_addr;
1720 struct iova *piova;
1721
1722 /* Make sure it's in range */
1723 if ((start > DOMAIN_MAX_ADDR(domain->gaw)) || end < start)
1724 return NULL;
1725
1726 end = min_t(u64, DOMAIN_MAX_ADDR(domain->gaw), end);
1727 start_addr = PAGE_ALIGN_4K(start);
1728 size = aligned_size((u64)host_addr, size);
1729 if (!size || (start_addr + size > end))
1730 return NULL;
1731
1732 piova = alloc_iova(&domain->iovad,
1733 size >> PAGE_SHIFT_4K, IOVA_PFN(end));
1734
1735 return piova;
1736}
1737
1738static dma_addr_t __intel_map_single(struct device *dev, void *addr,
1739 size_t size, int dir, u64 *flush_addr, unsigned int *flush_size)
1740{
1741 struct dmar_domain *domain;
1742 struct pci_dev *pdev = to_pci_dev(dev);
1743 int ret;
1744 int prot = 0;
1745 struct iova *iova = NULL;
1746 u64 start_addr;
1747
1748 addr = (void *)virt_to_phys(addr);
1749
1750 domain = get_domain_for_dev(pdev,
1751 DEFAULT_DOMAIN_ADDRESS_WIDTH);
1752 if (!domain) {
1753 printk(KERN_ERR
1754 "Allocating domain for %s failed", pci_name(pdev));
1755 return 0;
1756 }
1757
1758 start_addr = IOVA_START_ADDR;
1759
Keshavamurthy, Anil S7d3b03c2007-10-21 16:41:53 -07001760 if ((pdev->dma_mask <= DMA_32BIT_MASK) || (dmar_forcedac)) {
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001761 iova = iommu_alloc_iova(domain, addr, size, start_addr,
1762 pdev->dma_mask);
1763 } else {
1764 /*
1765 * First try to allocate an io virtual address in
1766 * DMA_32BIT_MASK and if that fails then try allocating
1767 * from higer range
1768 */
1769 iova = iommu_alloc_iova(domain, addr, size, start_addr,
1770 DMA_32BIT_MASK);
1771 if (!iova)
1772 iova = iommu_alloc_iova(domain, addr, size, start_addr,
1773 pdev->dma_mask);
1774 }
1775
1776 if (!iova) {
1777 printk(KERN_ERR"Allocating iova for %s failed", pci_name(pdev));
1778 return 0;
1779 }
1780
1781 /* make sure context mapping is ok */
1782 if (unlikely(!domain_context_mapped(domain, pdev))) {
1783 ret = domain_context_mapping(domain, pdev);
1784 if (ret)
1785 goto error;
1786 }
1787
1788 /*
1789 * Check if DMAR supports zero-length reads on write only
1790 * mappings..
1791 */
1792 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
1793 !cap_zlr(domain->iommu->cap))
1794 prot |= DMA_PTE_READ;
1795 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
1796 prot |= DMA_PTE_WRITE;
1797 /*
1798 * addr - (addr + size) might be partial page, we should map the whole
1799 * page. Note: if two part of one page are separately mapped, we
1800 * might have two guest_addr mapping to the same host addr, but this
1801 * is not a big problem
1802 */
1803 ret = domain_page_mapping(domain, iova->pfn_lo << PAGE_SHIFT_4K,
1804 ((u64)addr) & PAGE_MASK_4K,
1805 (iova->pfn_hi - iova->pfn_lo + 1) << PAGE_SHIFT_4K, prot);
1806 if (ret)
1807 goto error;
1808
1809 pr_debug("Device %s request: %lx@%llx mapping: %lx@%llx, dir %d\n",
1810 pci_name(pdev), size, (u64)addr,
1811 (iova->pfn_hi - iova->pfn_lo + 1) << PAGE_SHIFT_4K,
1812 (u64)(iova->pfn_lo << PAGE_SHIFT_4K), dir);
1813
1814 *flush_addr = iova->pfn_lo << PAGE_SHIFT_4K;
1815 *flush_size = (iova->pfn_hi - iova->pfn_lo + 1) << PAGE_SHIFT_4K;
1816 return (iova->pfn_lo << PAGE_SHIFT_4K) + ((u64)addr & (~PAGE_MASK_4K));
1817error:
1818 __free_iova(&domain->iovad, iova);
1819 printk(KERN_ERR"Device %s request: %lx@%llx dir %d --- failed\n",
1820 pci_name(pdev), size, (u64)addr, dir);
1821 return 0;
1822}
1823
1824static dma_addr_t intel_map_single(struct device *hwdev, void *addr,
1825 size_t size, int dir)
1826{
1827 struct pci_dev *pdev = to_pci_dev(hwdev);
1828 dma_addr_t ret;
1829 struct dmar_domain *domain;
1830 u64 flush_addr;
1831 unsigned int flush_size;
1832
1833 BUG_ON(dir == DMA_NONE);
1834 if (pdev->sysdata == DUMMY_DEVICE_DOMAIN_INFO)
1835 return virt_to_bus(addr);
1836
1837 ret = __intel_map_single(hwdev, addr, size,
1838 dir, &flush_addr, &flush_size);
1839 if (ret) {
1840 domain = find_domain(pdev);
1841 /* it's a non-present to present mapping */
1842 if (iommu_flush_iotlb_psi(domain->iommu, domain->id,
1843 flush_addr, flush_size >> PAGE_SHIFT_4K, 1))
1844 iommu_flush_write_buffer(domain->iommu);
1845 }
1846 return ret;
1847}
1848
1849static void __intel_unmap_single(struct device *dev, dma_addr_t dev_addr,
1850 size_t size, int dir, u64 *flush_addr, unsigned int *flush_size)
1851{
1852 struct dmar_domain *domain;
1853 struct pci_dev *pdev = to_pci_dev(dev);
1854 struct iova *iova;
1855
1856 domain = find_domain(pdev);
1857 BUG_ON(!domain);
1858
1859 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
1860 if (!iova) {
1861 *flush_size = 0;
1862 return;
1863 }
1864 pr_debug("Device %s unmapping: %lx@%llx\n",
1865 pci_name(pdev),
1866 (iova->pfn_hi - iova->pfn_lo + 1) << PAGE_SHIFT_4K,
1867 (u64)(iova->pfn_lo << PAGE_SHIFT_4K));
1868
1869 *flush_addr = iova->pfn_lo << PAGE_SHIFT_4K;
1870 *flush_size = (iova->pfn_hi - iova->pfn_lo + 1) << PAGE_SHIFT_4K;
1871 /* clear the whole page, not just dev_addr - (dev_addr + size) */
1872 dma_pte_clear_range(domain, *flush_addr, *flush_addr + *flush_size);
1873 /* free page tables */
1874 dma_pte_free_pagetable(domain, *flush_addr, *flush_addr + *flush_size);
1875 /* free iova */
1876 __free_iova(&domain->iovad, iova);
1877}
1878
1879static void intel_unmap_single(struct device *dev, dma_addr_t dev_addr,
1880 size_t size, int dir)
1881{
1882 struct pci_dev *pdev = to_pci_dev(dev);
1883 struct dmar_domain *domain;
1884 u64 flush_addr;
1885 unsigned int flush_size;
1886
1887 if (pdev->sysdata == DUMMY_DEVICE_DOMAIN_INFO)
1888 return;
1889
1890 domain = find_domain(pdev);
1891 __intel_unmap_single(dev, dev_addr, size,
1892 dir, &flush_addr, &flush_size);
1893 if (flush_size == 0)
1894 return;
1895 if (iommu_flush_iotlb_psi(domain->iommu, domain->id, flush_addr,
1896 flush_size >> PAGE_SHIFT_4K, 0))
1897 iommu_flush_write_buffer(domain->iommu);
1898}
1899
1900static void * intel_alloc_coherent(struct device *hwdev, size_t size,
1901 dma_addr_t *dma_handle, gfp_t flags)
1902{
1903 void *vaddr;
1904 int order;
1905
1906 size = PAGE_ALIGN_4K(size);
1907 order = get_order(size);
1908 flags &= ~(GFP_DMA | GFP_DMA32);
1909
1910 vaddr = (void *)__get_free_pages(flags, order);
1911 if (!vaddr)
1912 return NULL;
1913 memset(vaddr, 0, size);
1914
1915 *dma_handle = intel_map_single(hwdev, vaddr, size, DMA_BIDIRECTIONAL);
1916 if (*dma_handle)
1917 return vaddr;
1918 free_pages((unsigned long)vaddr, order);
1919 return NULL;
1920}
1921
1922static void intel_free_coherent(struct device *hwdev, size_t size,
1923 void *vaddr, dma_addr_t dma_handle)
1924{
1925 int order;
1926
1927 size = PAGE_ALIGN_4K(size);
1928 order = get_order(size);
1929
1930 intel_unmap_single(hwdev, dma_handle, size, DMA_BIDIRECTIONAL);
1931 free_pages((unsigned long)vaddr, order);
1932}
1933
1934static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sg,
1935 int nelems, int dir)
1936{
1937 int i;
1938 struct pci_dev *pdev = to_pci_dev(hwdev);
1939 struct dmar_domain *domain;
1940 u64 flush_addr;
1941 unsigned int flush_size;
1942
1943 if (pdev->sysdata == DUMMY_DEVICE_DOMAIN_INFO)
1944 return;
1945
1946 domain = find_domain(pdev);
1947 for (i = 0; i < nelems; i++, sg++)
1948 __intel_unmap_single(hwdev, sg->dma_address,
1949 sg->dma_length, dir, &flush_addr, &flush_size);
1950
1951 if (iommu_flush_iotlb_dsi(domain->iommu, domain->id, 0))
1952 iommu_flush_write_buffer(domain->iommu);
1953}
1954
1955#define SG_ENT_VIRT_ADDRESS(sg) (page_address((sg)->page) + (sg)->offset)
1956static int intel_nontranslate_map_sg(struct device *hddev,
1957 struct scatterlist *sg, int nelems, int dir)
1958{
1959 int i;
1960
1961 for (i = 0; i < nelems; i++) {
1962 struct scatterlist *s = &sg[i];
1963 BUG_ON(!s->page);
1964 s->dma_address = virt_to_bus(SG_ENT_VIRT_ADDRESS(s));
1965 s->dma_length = s->length;
1966 }
1967 return nelems;
1968}
1969
1970static int intel_map_sg(struct device *hwdev, struct scatterlist *sg,
1971 int nelems, int dir)
1972{
1973 void *addr;
1974 int i;
1975 dma_addr_t dma_handle;
1976 struct pci_dev *pdev = to_pci_dev(hwdev);
1977 struct dmar_domain *domain;
1978 u64 flush_addr;
1979 unsigned int flush_size;
1980
1981 BUG_ON(dir == DMA_NONE);
1982 if (pdev->sysdata == DUMMY_DEVICE_DOMAIN_INFO)
1983 return intel_nontranslate_map_sg(hwdev, sg, nelems, dir);
1984
1985 for (i = 0; i < nelems; i++, sg++) {
1986 addr = SG_ENT_VIRT_ADDRESS(sg);
1987 dma_handle = __intel_map_single(hwdev, addr,
1988 sg->length, dir, &flush_addr, &flush_size);
1989 if (!dma_handle) {
1990 intel_unmap_sg(hwdev, sg - i, i, dir);
1991 sg[0].dma_length = 0;
1992 return 0;
1993 }
1994 sg->dma_address = dma_handle;
1995 sg->dma_length = sg->length;
1996 }
1997
1998 domain = find_domain(pdev);
1999
2000 /* it's a non-present to present mapping */
2001 if (iommu_flush_iotlb_dsi(domain->iommu, domain->id, 1))
2002 iommu_flush_write_buffer(domain->iommu);
2003 return nelems;
2004}
2005
2006static struct dma_mapping_ops intel_dma_ops = {
2007 .alloc_coherent = intel_alloc_coherent,
2008 .free_coherent = intel_free_coherent,
2009 .map_single = intel_map_single,
2010 .unmap_single = intel_unmap_single,
2011 .map_sg = intel_map_sg,
2012 .unmap_sg = intel_unmap_sg,
2013};
2014
2015static inline int iommu_domain_cache_init(void)
2016{
2017 int ret = 0;
2018
2019 iommu_domain_cache = kmem_cache_create("iommu_domain",
2020 sizeof(struct dmar_domain),
2021 0,
2022 SLAB_HWCACHE_ALIGN,
2023
2024 NULL);
2025 if (!iommu_domain_cache) {
2026 printk(KERN_ERR "Couldn't create iommu_domain cache\n");
2027 ret = -ENOMEM;
2028 }
2029
2030 return ret;
2031}
2032
2033static inline int iommu_devinfo_cache_init(void)
2034{
2035 int ret = 0;
2036
2037 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
2038 sizeof(struct device_domain_info),
2039 0,
2040 SLAB_HWCACHE_ALIGN,
2041
2042 NULL);
2043 if (!iommu_devinfo_cache) {
2044 printk(KERN_ERR "Couldn't create devinfo cache\n");
2045 ret = -ENOMEM;
2046 }
2047
2048 return ret;
2049}
2050
2051static inline int iommu_iova_cache_init(void)
2052{
2053 int ret = 0;
2054
2055 iommu_iova_cache = kmem_cache_create("iommu_iova",
2056 sizeof(struct iova),
2057 0,
2058 SLAB_HWCACHE_ALIGN,
2059
2060 NULL);
2061 if (!iommu_iova_cache) {
2062 printk(KERN_ERR "Couldn't create iova cache\n");
2063 ret = -ENOMEM;
2064 }
2065
2066 return ret;
2067}
2068
2069static int __init iommu_init_mempool(void)
2070{
2071 int ret;
2072 ret = iommu_iova_cache_init();
2073 if (ret)
2074 return ret;
2075
2076 ret = iommu_domain_cache_init();
2077 if (ret)
2078 goto domain_error;
2079
2080 ret = iommu_devinfo_cache_init();
2081 if (!ret)
2082 return ret;
2083
2084 kmem_cache_destroy(iommu_domain_cache);
2085domain_error:
2086 kmem_cache_destroy(iommu_iova_cache);
2087
2088 return -ENOMEM;
2089}
2090
2091static void __init iommu_exit_mempool(void)
2092{
2093 kmem_cache_destroy(iommu_devinfo_cache);
2094 kmem_cache_destroy(iommu_domain_cache);
2095 kmem_cache_destroy(iommu_iova_cache);
2096
2097}
2098
2099void __init detect_intel_iommu(void)
2100{
2101 if (swiotlb || no_iommu || iommu_detected || dmar_disabled)
2102 return;
2103 if (early_dmar_detect()) {
2104 iommu_detected = 1;
2105 }
2106}
2107
2108static void __init init_no_remapping_devices(void)
2109{
2110 struct dmar_drhd_unit *drhd;
2111
2112 for_each_drhd_unit(drhd) {
2113 if (!drhd->include_all) {
2114 int i;
2115 for (i = 0; i < drhd->devices_cnt; i++)
2116 if (drhd->devices[i] != NULL)
2117 break;
2118 /* ignore DMAR unit if no pci devices exist */
2119 if (i == drhd->devices_cnt)
2120 drhd->ignored = 1;
2121 }
2122 }
2123
2124 if (dmar_map_gfx)
2125 return;
2126
2127 for_each_drhd_unit(drhd) {
2128 int i;
2129 if (drhd->ignored || drhd->include_all)
2130 continue;
2131
2132 for (i = 0; i < drhd->devices_cnt; i++)
2133 if (drhd->devices[i] &&
2134 !IS_GFX_DEVICE(drhd->devices[i]))
2135 break;
2136
2137 if (i < drhd->devices_cnt)
2138 continue;
2139
2140 /* bypass IOMMU if it is just for gfx devices */
2141 drhd->ignored = 1;
2142 for (i = 0; i < drhd->devices_cnt; i++) {
2143 if (!drhd->devices[i])
2144 continue;
2145 drhd->devices[i]->sysdata = DUMMY_DEVICE_DOMAIN_INFO;
2146 }
2147 }
2148}
2149
2150int __init intel_iommu_init(void)
2151{
2152 int ret = 0;
2153
2154 if (no_iommu || swiotlb || dmar_disabled)
2155 return -ENODEV;
2156
2157 if (dmar_table_init())
2158 return -ENODEV;
2159
2160 iommu_init_mempool();
2161 dmar_init_reserved_ranges();
2162
2163 init_no_remapping_devices();
2164
2165 ret = init_dmars();
2166 if (ret) {
2167 printk(KERN_ERR "IOMMU: dmar init failed\n");
2168 put_iova_domain(&reserved_iova_list);
2169 iommu_exit_mempool();
2170 return ret;
2171 }
2172 printk(KERN_INFO
2173 "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
2174
2175 force_iommu = 1;
2176 dma_ops = &intel_dma_ops;
2177 return 0;
2178}