blob: dab329f015844f4f8cc875f3b223c29b8af3ce89 [file] [log] [blame]
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001/*
2 * Copyright (c) 2006, Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
16 *
17 * Copyright (C) Ashok Raj <ashok.raj@intel.com>
18 * Copyright (C) Shaohua Li <shaohua.li@intel.com>
19 * Copyright (C) Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
20 */
21
22#include <linux/init.h>
23#include <linux/bitmap.h>
24#include <linux/slab.h>
25#include <linux/irq.h>
26#include <linux/interrupt.h>
27#include <linux/sysdev.h>
28#include <linux/spinlock.h>
29#include <linux/pci.h>
30#include <linux/dmar.h>
31#include <linux/dma-mapping.h>
32#include <linux/mempool.h>
33#include "iova.h"
34#include "intel-iommu.h"
35#include <asm/proto.h> /* force_iommu in this header in x86-64*/
36#include <asm/cacheflush.h>
37#include <asm/iommu.h>
38#include "pci.h"
39
40#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
41#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
42
43#define IOAPIC_RANGE_START (0xfee00000)
44#define IOAPIC_RANGE_END (0xfeefffff)
45#define IOVA_START_ADDR (0x1000)
46
47#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
48
49#define DMAR_OPERATION_TIMEOUT (HZ*60) /* 1m */
50
51#define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
52
53static void domain_remove_dev_info(struct dmar_domain *domain);
54
55static int dmar_disabled;
56static int __initdata dmar_map_gfx = 1;
Keshavamurthy, Anil S7d3b03c2007-10-21 16:41:53 -070057static int dmar_forcedac;
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -070058
59#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
60static DEFINE_SPINLOCK(device_domain_lock);
61static LIST_HEAD(device_domain_list);
62
63static int __init intel_iommu_setup(char *str)
64{
65 if (!str)
66 return -EINVAL;
67 while (*str) {
68 if (!strncmp(str, "off", 3)) {
69 dmar_disabled = 1;
70 printk(KERN_INFO"Intel-IOMMU: disabled\n");
71 } else if (!strncmp(str, "igfx_off", 8)) {
72 dmar_map_gfx = 0;
73 printk(KERN_INFO
74 "Intel-IOMMU: disable GFX device mapping\n");
Keshavamurthy, Anil S7d3b03c2007-10-21 16:41:53 -070075 } else if (!strncmp(str, "forcedac", 8)) {
76 printk (KERN_INFO
77 "Intel-IOMMU: Forcing DAC for PCI devices\n");
78 dmar_forcedac = 1;
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -070079 }
80
81 str += strcspn(str, ",");
82 while (*str == ',')
83 str++;
84 }
85 return 0;
86}
87__setup("intel_iommu=", intel_iommu_setup);
88
89static struct kmem_cache *iommu_domain_cache;
90static struct kmem_cache *iommu_devinfo_cache;
91static struct kmem_cache *iommu_iova_cache;
92
Keshavamurthy, Anil Seb3fa7c2007-10-21 16:41:52 -070093static inline void *iommu_kmem_cache_alloc(struct kmem_cache *cachep)
94{
95 unsigned int flags;
96 void *vaddr;
97
98 /* trying to avoid low memory issues */
99 flags = current->flags & PF_MEMALLOC;
100 current->flags |= PF_MEMALLOC;
101 vaddr = kmem_cache_alloc(cachep, GFP_ATOMIC);
102 current->flags &= (~PF_MEMALLOC | flags);
103 return vaddr;
104}
105
106
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -0700107static inline void *alloc_pgtable_page(void)
108{
Keshavamurthy, Anil Seb3fa7c2007-10-21 16:41:52 -0700109 unsigned int flags;
110 void *vaddr;
111
112 /* trying to avoid low memory issues */
113 flags = current->flags & PF_MEMALLOC;
114 current->flags |= PF_MEMALLOC;
115 vaddr = (void *)get_zeroed_page(GFP_ATOMIC);
116 current->flags &= (~PF_MEMALLOC | flags);
117 return vaddr;
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -0700118}
119
120static inline void free_pgtable_page(void *vaddr)
121{
122 free_page((unsigned long)vaddr);
123}
124
125static inline void *alloc_domain_mem(void)
126{
Keshavamurthy, Anil Seb3fa7c2007-10-21 16:41:52 -0700127 return iommu_kmem_cache_alloc(iommu_domain_cache);
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -0700128}
129
130static inline void free_domain_mem(void *vaddr)
131{
132 kmem_cache_free(iommu_domain_cache, vaddr);
133}
134
135static inline void * alloc_devinfo_mem(void)
136{
Keshavamurthy, Anil Seb3fa7c2007-10-21 16:41:52 -0700137 return iommu_kmem_cache_alloc(iommu_devinfo_cache);
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -0700138}
139
140static inline void free_devinfo_mem(void *vaddr)
141{
142 kmem_cache_free(iommu_devinfo_cache, vaddr);
143}
144
145struct iova *alloc_iova_mem(void)
146{
Keshavamurthy, Anil Seb3fa7c2007-10-21 16:41:52 -0700147 return iommu_kmem_cache_alloc(iommu_iova_cache);
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -0700148}
149
150void free_iova_mem(struct iova *iova)
151{
152 kmem_cache_free(iommu_iova_cache, iova);
153}
154
155static inline void __iommu_flush_cache(
156 struct intel_iommu *iommu, void *addr, int size)
157{
158 if (!ecap_coherent(iommu->ecap))
159 clflush_cache_range(addr, size);
160}
161
162/* Gets context entry for a given bus and devfn */
163static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
164 u8 bus, u8 devfn)
165{
166 struct root_entry *root;
167 struct context_entry *context;
168 unsigned long phy_addr;
169 unsigned long flags;
170
171 spin_lock_irqsave(&iommu->lock, flags);
172 root = &iommu->root_entry[bus];
173 context = get_context_addr_from_root(root);
174 if (!context) {
175 context = (struct context_entry *)alloc_pgtable_page();
176 if (!context) {
177 spin_unlock_irqrestore(&iommu->lock, flags);
178 return NULL;
179 }
180 __iommu_flush_cache(iommu, (void *)context, PAGE_SIZE_4K);
181 phy_addr = virt_to_phys((void *)context);
182 set_root_value(root, phy_addr);
183 set_root_present(root);
184 __iommu_flush_cache(iommu, root, sizeof(*root));
185 }
186 spin_unlock_irqrestore(&iommu->lock, flags);
187 return &context[devfn];
188}
189
190static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
191{
192 struct root_entry *root;
193 struct context_entry *context;
194 int ret;
195 unsigned long flags;
196
197 spin_lock_irqsave(&iommu->lock, flags);
198 root = &iommu->root_entry[bus];
199 context = get_context_addr_from_root(root);
200 if (!context) {
201 ret = 0;
202 goto out;
203 }
204 ret = context_present(context[devfn]);
205out:
206 spin_unlock_irqrestore(&iommu->lock, flags);
207 return ret;
208}
209
210static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
211{
212 struct root_entry *root;
213 struct context_entry *context;
214 unsigned long flags;
215
216 spin_lock_irqsave(&iommu->lock, flags);
217 root = &iommu->root_entry[bus];
218 context = get_context_addr_from_root(root);
219 if (context) {
220 context_clear_entry(context[devfn]);
221 __iommu_flush_cache(iommu, &context[devfn], \
222 sizeof(*context));
223 }
224 spin_unlock_irqrestore(&iommu->lock, flags);
225}
226
227static void free_context_table(struct intel_iommu *iommu)
228{
229 struct root_entry *root;
230 int i;
231 unsigned long flags;
232 struct context_entry *context;
233
234 spin_lock_irqsave(&iommu->lock, flags);
235 if (!iommu->root_entry) {
236 goto out;
237 }
238 for (i = 0; i < ROOT_ENTRY_NR; i++) {
239 root = &iommu->root_entry[i];
240 context = get_context_addr_from_root(root);
241 if (context)
242 free_pgtable_page(context);
243 }
244 free_pgtable_page(iommu->root_entry);
245 iommu->root_entry = NULL;
246out:
247 spin_unlock_irqrestore(&iommu->lock, flags);
248}
249
250/* page table handling */
251#define LEVEL_STRIDE (9)
252#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
253
254static inline int agaw_to_level(int agaw)
255{
256 return agaw + 2;
257}
258
259static inline int agaw_to_width(int agaw)
260{
261 return 30 + agaw * LEVEL_STRIDE;
262
263}
264
265static inline int width_to_agaw(int width)
266{
267 return (width - 30) / LEVEL_STRIDE;
268}
269
270static inline unsigned int level_to_offset_bits(int level)
271{
272 return (12 + (level - 1) * LEVEL_STRIDE);
273}
274
275static inline int address_level_offset(u64 addr, int level)
276{
277 return ((addr >> level_to_offset_bits(level)) & LEVEL_MASK);
278}
279
280static inline u64 level_mask(int level)
281{
282 return ((u64)-1 << level_to_offset_bits(level));
283}
284
285static inline u64 level_size(int level)
286{
287 return ((u64)1 << level_to_offset_bits(level));
288}
289
290static inline u64 align_to_level(u64 addr, int level)
291{
292 return ((addr + level_size(level) - 1) & level_mask(level));
293}
294
295static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr)
296{
297 int addr_width = agaw_to_width(domain->agaw);
298 struct dma_pte *parent, *pte = NULL;
299 int level = agaw_to_level(domain->agaw);
300 int offset;
301 unsigned long flags;
302
303 BUG_ON(!domain->pgd);
304
305 addr &= (((u64)1) << addr_width) - 1;
306 parent = domain->pgd;
307
308 spin_lock_irqsave(&domain->mapping_lock, flags);
309 while (level > 0) {
310 void *tmp_page;
311
312 offset = address_level_offset(addr, level);
313 pte = &parent[offset];
314 if (level == 1)
315 break;
316
317 if (!dma_pte_present(*pte)) {
318 tmp_page = alloc_pgtable_page();
319
320 if (!tmp_page) {
321 spin_unlock_irqrestore(&domain->mapping_lock,
322 flags);
323 return NULL;
324 }
325 __iommu_flush_cache(domain->iommu, tmp_page,
326 PAGE_SIZE_4K);
327 dma_set_pte_addr(*pte, virt_to_phys(tmp_page));
328 /*
329 * high level table always sets r/w, last level page
330 * table control read/write
331 */
332 dma_set_pte_readable(*pte);
333 dma_set_pte_writable(*pte);
334 __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
335 }
336 parent = phys_to_virt(dma_pte_addr(*pte));
337 level--;
338 }
339
340 spin_unlock_irqrestore(&domain->mapping_lock, flags);
341 return pte;
342}
343
344/* return address's pte at specific level */
345static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr,
346 int level)
347{
348 struct dma_pte *parent, *pte = NULL;
349 int total = agaw_to_level(domain->agaw);
350 int offset;
351
352 parent = domain->pgd;
353 while (level <= total) {
354 offset = address_level_offset(addr, total);
355 pte = &parent[offset];
356 if (level == total)
357 return pte;
358
359 if (!dma_pte_present(*pte))
360 break;
361 parent = phys_to_virt(dma_pte_addr(*pte));
362 total--;
363 }
364 return NULL;
365}
366
367/* clear one page's page table */
368static void dma_pte_clear_one(struct dmar_domain *domain, u64 addr)
369{
370 struct dma_pte *pte = NULL;
371
372 /* get last level pte */
373 pte = dma_addr_level_pte(domain, addr, 1);
374
375 if (pte) {
376 dma_clear_pte(*pte);
377 __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
378 }
379}
380
381/* clear last level pte, a tlb flush should be followed */
382static void dma_pte_clear_range(struct dmar_domain *domain, u64 start, u64 end)
383{
384 int addr_width = agaw_to_width(domain->agaw);
385
386 start &= (((u64)1) << addr_width) - 1;
387 end &= (((u64)1) << addr_width) - 1;
388 /* in case it's partial page */
389 start = PAGE_ALIGN_4K(start);
390 end &= PAGE_MASK_4K;
391
392 /* we don't need lock here, nobody else touches the iova range */
393 while (start < end) {
394 dma_pte_clear_one(domain, start);
395 start += PAGE_SIZE_4K;
396 }
397}
398
399/* free page table pages. last level pte should already be cleared */
400static void dma_pte_free_pagetable(struct dmar_domain *domain,
401 u64 start, u64 end)
402{
403 int addr_width = agaw_to_width(domain->agaw);
404 struct dma_pte *pte;
405 int total = agaw_to_level(domain->agaw);
406 int level;
407 u64 tmp;
408
409 start &= (((u64)1) << addr_width) - 1;
410 end &= (((u64)1) << addr_width) - 1;
411
412 /* we don't need lock here, nobody else touches the iova range */
413 level = 2;
414 while (level <= total) {
415 tmp = align_to_level(start, level);
416 if (tmp >= end || (tmp + level_size(level) > end))
417 return;
418
419 while (tmp < end) {
420 pte = dma_addr_level_pte(domain, tmp, level);
421 if (pte) {
422 free_pgtable_page(
423 phys_to_virt(dma_pte_addr(*pte)));
424 dma_clear_pte(*pte);
425 __iommu_flush_cache(domain->iommu,
426 pte, sizeof(*pte));
427 }
428 tmp += level_size(level);
429 }
430 level++;
431 }
432 /* free pgd */
433 if (start == 0 && end >= ((((u64)1) << addr_width) - 1)) {
434 free_pgtable_page(domain->pgd);
435 domain->pgd = NULL;
436 }
437}
438
439/* iommu handling */
440static int iommu_alloc_root_entry(struct intel_iommu *iommu)
441{
442 struct root_entry *root;
443 unsigned long flags;
444
445 root = (struct root_entry *)alloc_pgtable_page();
446 if (!root)
447 return -ENOMEM;
448
449 __iommu_flush_cache(iommu, root, PAGE_SIZE_4K);
450
451 spin_lock_irqsave(&iommu->lock, flags);
452 iommu->root_entry = root;
453 spin_unlock_irqrestore(&iommu->lock, flags);
454
455 return 0;
456}
457
458#define IOMMU_WAIT_OP(iommu, offset, op, cond, sts) \
459{\
460 unsigned long start_time = jiffies;\
461 while (1) {\
462 sts = op (iommu->reg + offset);\
463 if (cond)\
464 break;\
465 if (time_after(jiffies, start_time + DMAR_OPERATION_TIMEOUT))\
466 panic("DMAR hardware is malfunctioning\n");\
467 cpu_relax();\
468 }\
469}
470
471static void iommu_set_root_entry(struct intel_iommu *iommu)
472{
473 void *addr;
474 u32 cmd, sts;
475 unsigned long flag;
476
477 addr = iommu->root_entry;
478
479 spin_lock_irqsave(&iommu->register_lock, flag);
480 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
481
482 cmd = iommu->gcmd | DMA_GCMD_SRTP;
483 writel(cmd, iommu->reg + DMAR_GCMD_REG);
484
485 /* Make sure hardware complete it */
486 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
487 readl, (sts & DMA_GSTS_RTPS), sts);
488
489 spin_unlock_irqrestore(&iommu->register_lock, flag);
490}
491
492static void iommu_flush_write_buffer(struct intel_iommu *iommu)
493{
494 u32 val;
495 unsigned long flag;
496
497 if (!cap_rwbf(iommu->cap))
498 return;
499 val = iommu->gcmd | DMA_GCMD_WBF;
500
501 spin_lock_irqsave(&iommu->register_lock, flag);
502 writel(val, iommu->reg + DMAR_GCMD_REG);
503
504 /* Make sure hardware complete it */
505 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
506 readl, (!(val & DMA_GSTS_WBFS)), val);
507
508 spin_unlock_irqrestore(&iommu->register_lock, flag);
509}
510
511/* return value determine if we need a write buffer flush */
512static int __iommu_flush_context(struct intel_iommu *iommu,
513 u16 did, u16 source_id, u8 function_mask, u64 type,
514 int non_present_entry_flush)
515{
516 u64 val = 0;
517 unsigned long flag;
518
519 /*
520 * In the non-present entry flush case, if hardware doesn't cache
521 * non-present entry we do nothing and if hardware cache non-present
522 * entry, we flush entries of domain 0 (the domain id is used to cache
523 * any non-present entries)
524 */
525 if (non_present_entry_flush) {
526 if (!cap_caching_mode(iommu->cap))
527 return 1;
528 else
529 did = 0;
530 }
531
532 switch (type) {
533 case DMA_CCMD_GLOBAL_INVL:
534 val = DMA_CCMD_GLOBAL_INVL;
535 break;
536 case DMA_CCMD_DOMAIN_INVL:
537 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
538 break;
539 case DMA_CCMD_DEVICE_INVL:
540 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
541 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
542 break;
543 default:
544 BUG();
545 }
546 val |= DMA_CCMD_ICC;
547
548 spin_lock_irqsave(&iommu->register_lock, flag);
549 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
550
551 /* Make sure hardware complete it */
552 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
553 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
554
555 spin_unlock_irqrestore(&iommu->register_lock, flag);
556
557 /* flush context entry will implictly flush write buffer */
558 return 0;
559}
560
561static int inline iommu_flush_context_global(struct intel_iommu *iommu,
562 int non_present_entry_flush)
563{
564 return __iommu_flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL,
565 non_present_entry_flush);
566}
567
568static int inline iommu_flush_context_domain(struct intel_iommu *iommu, u16 did,
569 int non_present_entry_flush)
570{
571 return __iommu_flush_context(iommu, did, 0, 0, DMA_CCMD_DOMAIN_INVL,
572 non_present_entry_flush);
573}
574
575static int inline iommu_flush_context_device(struct intel_iommu *iommu,
576 u16 did, u16 source_id, u8 function_mask, int non_present_entry_flush)
577{
578 return __iommu_flush_context(iommu, did, source_id, function_mask,
579 DMA_CCMD_DEVICE_INVL, non_present_entry_flush);
580}
581
582/* return value determine if we need a write buffer flush */
583static int __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
584 u64 addr, unsigned int size_order, u64 type,
585 int non_present_entry_flush)
586{
587 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
588 u64 val = 0, val_iva = 0;
589 unsigned long flag;
590
591 /*
592 * In the non-present entry flush case, if hardware doesn't cache
593 * non-present entry we do nothing and if hardware cache non-present
594 * entry, we flush entries of domain 0 (the domain id is used to cache
595 * any non-present entries)
596 */
597 if (non_present_entry_flush) {
598 if (!cap_caching_mode(iommu->cap))
599 return 1;
600 else
601 did = 0;
602 }
603
604 switch (type) {
605 case DMA_TLB_GLOBAL_FLUSH:
606 /* global flush doesn't need set IVA_REG */
607 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
608 break;
609 case DMA_TLB_DSI_FLUSH:
610 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
611 break;
612 case DMA_TLB_PSI_FLUSH:
613 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
614 /* Note: always flush non-leaf currently */
615 val_iva = size_order | addr;
616 break;
617 default:
618 BUG();
619 }
620 /* Note: set drain read/write */
621#if 0
622 /*
623 * This is probably to be super secure.. Looks like we can
624 * ignore it without any impact.
625 */
626 if (cap_read_drain(iommu->cap))
627 val |= DMA_TLB_READ_DRAIN;
628#endif
629 if (cap_write_drain(iommu->cap))
630 val |= DMA_TLB_WRITE_DRAIN;
631
632 spin_lock_irqsave(&iommu->register_lock, flag);
633 /* Note: Only uses first TLB reg currently */
634 if (val_iva)
635 dmar_writeq(iommu->reg + tlb_offset, val_iva);
636 dmar_writeq(iommu->reg + tlb_offset + 8, val);
637
638 /* Make sure hardware complete it */
639 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
640 dmar_readq, (!(val & DMA_TLB_IVT)), val);
641
642 spin_unlock_irqrestore(&iommu->register_lock, flag);
643
644 /* check IOTLB invalidation granularity */
645 if (DMA_TLB_IAIG(val) == 0)
646 printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
647 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
648 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
649 DMA_TLB_IIRG(type), DMA_TLB_IAIG(val));
650 /* flush context entry will implictly flush write buffer */
651 return 0;
652}
653
654static int inline iommu_flush_iotlb_global(struct intel_iommu *iommu,
655 int non_present_entry_flush)
656{
657 return __iommu_flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH,
658 non_present_entry_flush);
659}
660
661static int inline iommu_flush_iotlb_dsi(struct intel_iommu *iommu, u16 did,
662 int non_present_entry_flush)
663{
664 return __iommu_flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH,
665 non_present_entry_flush);
666}
667
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -0700668static int iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
669 u64 addr, unsigned int pages, int non_present_entry_flush)
670{
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -0700671 unsigned int mask;
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -0700672
673 BUG_ON(addr & (~PAGE_MASK_4K));
674 BUG_ON(pages == 0);
675
676 /* Fallback to domain selective flush if no PSI support */
677 if (!cap_pgsel_inv(iommu->cap))
678 return iommu_flush_iotlb_dsi(iommu, did,
679 non_present_entry_flush);
680
681 /*
682 * PSI requires page size to be 2 ^ x, and the base address is naturally
683 * aligned to the size
684 */
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -0700685 mask = ilog2(__roundup_pow_of_two(pages));
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -0700686 /* Fallback to domain selective flush if size is too big */
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -0700687 if (mask > cap_max_amask_val(iommu->cap))
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -0700688 return iommu_flush_iotlb_dsi(iommu, did,
689 non_present_entry_flush);
690
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -0700691 return __iommu_flush_iotlb(iommu, did, addr, mask,
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -0700692 DMA_TLB_PSI_FLUSH, non_present_entry_flush);
693}
694
695static int iommu_enable_translation(struct intel_iommu *iommu)
696{
697 u32 sts;
698 unsigned long flags;
699
700 spin_lock_irqsave(&iommu->register_lock, flags);
701 writel(iommu->gcmd|DMA_GCMD_TE, iommu->reg + DMAR_GCMD_REG);
702
703 /* Make sure hardware complete it */
704 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
705 readl, (sts & DMA_GSTS_TES), sts);
706
707 iommu->gcmd |= DMA_GCMD_TE;
708 spin_unlock_irqrestore(&iommu->register_lock, flags);
709 return 0;
710}
711
712static int iommu_disable_translation(struct intel_iommu *iommu)
713{
714 u32 sts;
715 unsigned long flag;
716
717 spin_lock_irqsave(&iommu->register_lock, flag);
718 iommu->gcmd &= ~DMA_GCMD_TE;
719 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
720
721 /* Make sure hardware complete it */
722 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
723 readl, (!(sts & DMA_GSTS_TES)), sts);
724
725 spin_unlock_irqrestore(&iommu->register_lock, flag);
726 return 0;
727}
728
Keshavamurthy, Anil S3460a6d2007-10-21 16:41:54 -0700729/* iommu interrupt handling. Most stuff are MSI-like. */
730
731static char *fault_reason_strings[] =
732{
733 "Software",
734 "Present bit in root entry is clear",
735 "Present bit in context entry is clear",
736 "Invalid context entry",
737 "Access beyond MGAW",
738 "PTE Write access is not set",
739 "PTE Read access is not set",
740 "Next page table ptr is invalid",
741 "Root table address invalid",
742 "Context table ptr is invalid",
743 "non-zero reserved fields in RTP",
744 "non-zero reserved fields in CTP",
745 "non-zero reserved fields in PTE",
746 "Unknown"
747};
748#define MAX_FAULT_REASON_IDX ARRAY_SIZE(fault_reason_strings)
749
750char *dmar_get_fault_reason(u8 fault_reason)
751{
752 if (fault_reason > MAX_FAULT_REASON_IDX)
753 return fault_reason_strings[MAX_FAULT_REASON_IDX];
754 else
755 return fault_reason_strings[fault_reason];
756}
757
758void dmar_msi_unmask(unsigned int irq)
759{
760 struct intel_iommu *iommu = get_irq_data(irq);
761 unsigned long flag;
762
763 /* unmask it */
764 spin_lock_irqsave(&iommu->register_lock, flag);
765 writel(0, iommu->reg + DMAR_FECTL_REG);
766 /* Read a reg to force flush the post write */
767 readl(iommu->reg + DMAR_FECTL_REG);
768 spin_unlock_irqrestore(&iommu->register_lock, flag);
769}
770
771void dmar_msi_mask(unsigned int irq)
772{
773 unsigned long flag;
774 struct intel_iommu *iommu = get_irq_data(irq);
775
776 /* mask it */
777 spin_lock_irqsave(&iommu->register_lock, flag);
778 writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
779 /* Read a reg to force flush the post write */
780 readl(iommu->reg + DMAR_FECTL_REG);
781 spin_unlock_irqrestore(&iommu->register_lock, flag);
782}
783
784void dmar_msi_write(int irq, struct msi_msg *msg)
785{
786 struct intel_iommu *iommu = get_irq_data(irq);
787 unsigned long flag;
788
789 spin_lock_irqsave(&iommu->register_lock, flag);
790 writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
791 writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
792 writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
793 spin_unlock_irqrestore(&iommu->register_lock, flag);
794}
795
796void dmar_msi_read(int irq, struct msi_msg *msg)
797{
798 struct intel_iommu *iommu = get_irq_data(irq);
799 unsigned long flag;
800
801 spin_lock_irqsave(&iommu->register_lock, flag);
802 msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
803 msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
804 msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
805 spin_unlock_irqrestore(&iommu->register_lock, flag);
806}
807
808static int iommu_page_fault_do_one(struct intel_iommu *iommu, int type,
809 u8 fault_reason, u16 source_id, u64 addr)
810{
811 char *reason;
812
813 reason = dmar_get_fault_reason(fault_reason);
814
815 printk(KERN_ERR
816 "DMAR:[%s] Request device [%02x:%02x.%d] "
817 "fault addr %llx \n"
818 "DMAR:[fault reason %02d] %s\n",
819 (type ? "DMA Read" : "DMA Write"),
820 (source_id >> 8), PCI_SLOT(source_id & 0xFF),
821 PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
822 return 0;
823}
824
825#define PRIMARY_FAULT_REG_LEN (16)
826static irqreturn_t iommu_page_fault(int irq, void *dev_id)
827{
828 struct intel_iommu *iommu = dev_id;
829 int reg, fault_index;
830 u32 fault_status;
831 unsigned long flag;
832
833 spin_lock_irqsave(&iommu->register_lock, flag);
834 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
835
836 /* TBD: ignore advanced fault log currently */
837 if (!(fault_status & DMA_FSTS_PPF))
838 goto clear_overflow;
839
840 fault_index = dma_fsts_fault_record_index(fault_status);
841 reg = cap_fault_reg_offset(iommu->cap);
842 while (1) {
843 u8 fault_reason;
844 u16 source_id;
845 u64 guest_addr;
846 int type;
847 u32 data;
848
849 /* highest 32 bits */
850 data = readl(iommu->reg + reg +
851 fault_index * PRIMARY_FAULT_REG_LEN + 12);
852 if (!(data & DMA_FRCD_F))
853 break;
854
855 fault_reason = dma_frcd_fault_reason(data);
856 type = dma_frcd_type(data);
857
858 data = readl(iommu->reg + reg +
859 fault_index * PRIMARY_FAULT_REG_LEN + 8);
860 source_id = dma_frcd_source_id(data);
861
862 guest_addr = dmar_readq(iommu->reg + reg +
863 fault_index * PRIMARY_FAULT_REG_LEN);
864 guest_addr = dma_frcd_page_addr(guest_addr);
865 /* clear the fault */
866 writel(DMA_FRCD_F, iommu->reg + reg +
867 fault_index * PRIMARY_FAULT_REG_LEN + 12);
868
869 spin_unlock_irqrestore(&iommu->register_lock, flag);
870
871 iommu_page_fault_do_one(iommu, type, fault_reason,
872 source_id, guest_addr);
873
874 fault_index++;
875 if (fault_index > cap_num_fault_regs(iommu->cap))
876 fault_index = 0;
877 spin_lock_irqsave(&iommu->register_lock, flag);
878 }
879clear_overflow:
880 /* clear primary fault overflow */
881 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
882 if (fault_status & DMA_FSTS_PFO)
883 writel(DMA_FSTS_PFO, iommu->reg + DMAR_FSTS_REG);
884
885 spin_unlock_irqrestore(&iommu->register_lock, flag);
886 return IRQ_HANDLED;
887}
888
889int dmar_set_interrupt(struct intel_iommu *iommu)
890{
891 int irq, ret;
892
893 irq = create_irq();
894 if (!irq) {
895 printk(KERN_ERR "IOMMU: no free vectors\n");
896 return -EINVAL;
897 }
898
899 set_irq_data(irq, iommu);
900 iommu->irq = irq;
901
902 ret = arch_setup_dmar_msi(irq);
903 if (ret) {
904 set_irq_data(irq, NULL);
905 iommu->irq = 0;
906 destroy_irq(irq);
907 return 0;
908 }
909
910 /* Force fault register is cleared */
911 iommu_page_fault(irq, iommu);
912
913 ret = request_irq(irq, iommu_page_fault, 0, iommu->name, iommu);
914 if (ret)
915 printk(KERN_ERR "IOMMU: can't request irq\n");
916 return ret;
917}
918
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -0700919static int iommu_init_domains(struct intel_iommu *iommu)
920{
921 unsigned long ndomains;
922 unsigned long nlongs;
923
924 ndomains = cap_ndoms(iommu->cap);
925 pr_debug("Number of Domains supportd <%ld>\n", ndomains);
926 nlongs = BITS_TO_LONGS(ndomains);
927
928 /* TBD: there might be 64K domains,
929 * consider other allocation for future chip
930 */
931 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
932 if (!iommu->domain_ids) {
933 printk(KERN_ERR "Allocating domain id array failed\n");
934 return -ENOMEM;
935 }
936 iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
937 GFP_KERNEL);
938 if (!iommu->domains) {
939 printk(KERN_ERR "Allocating domain array failed\n");
940 kfree(iommu->domain_ids);
941 return -ENOMEM;
942 }
943
944 /*
945 * if Caching mode is set, then invalid translations are tagged
946 * with domainid 0. Hence we need to pre-allocate it.
947 */
948 if (cap_caching_mode(iommu->cap))
949 set_bit(0, iommu->domain_ids);
950 return 0;
951}
952
953static struct intel_iommu *alloc_iommu(struct dmar_drhd_unit *drhd)
954{
955 struct intel_iommu *iommu;
956 int ret;
957 int map_size;
958 u32 ver;
959
960 iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
961 if (!iommu)
962 return NULL;
963 iommu->reg = ioremap(drhd->reg_base_addr, PAGE_SIZE_4K);
964 if (!iommu->reg) {
965 printk(KERN_ERR "IOMMU: can't map the region\n");
966 goto error;
967 }
968 iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
969 iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
970
971 /* the registers might be more than one page */
972 map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
973 cap_max_fault_reg_offset(iommu->cap));
974 map_size = PAGE_ALIGN_4K(map_size);
975 if (map_size > PAGE_SIZE_4K) {
976 iounmap(iommu->reg);
977 iommu->reg = ioremap(drhd->reg_base_addr, map_size);
978 if (!iommu->reg) {
979 printk(KERN_ERR "IOMMU: can't map the region\n");
980 goto error;
981 }
982 }
983
984 ver = readl(iommu->reg + DMAR_VER_REG);
985 pr_debug("IOMMU %llx: ver %d:%d cap %llx ecap %llx\n",
986 drhd->reg_base_addr, DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
987 iommu->cap, iommu->ecap);
988 ret = iommu_init_domains(iommu);
989 if (ret)
990 goto error_unmap;
991 spin_lock_init(&iommu->lock);
992 spin_lock_init(&iommu->register_lock);
993
994 drhd->iommu = iommu;
995 return iommu;
996error_unmap:
997 iounmap(iommu->reg);
998 iommu->reg = 0;
999error:
1000 kfree(iommu);
1001 return NULL;
1002}
1003
1004static void domain_exit(struct dmar_domain *domain);
1005static void free_iommu(struct intel_iommu *iommu)
1006{
1007 struct dmar_domain *domain;
1008 int i;
1009
1010 if (!iommu)
1011 return;
1012
1013 i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap));
1014 for (; i < cap_ndoms(iommu->cap); ) {
1015 domain = iommu->domains[i];
1016 clear_bit(i, iommu->domain_ids);
1017 domain_exit(domain);
1018 i = find_next_bit(iommu->domain_ids,
1019 cap_ndoms(iommu->cap), i+1);
1020 }
1021
1022 if (iommu->gcmd & DMA_GCMD_TE)
1023 iommu_disable_translation(iommu);
1024
1025 if (iommu->irq) {
1026 set_irq_data(iommu->irq, NULL);
1027 /* This will mask the irq */
1028 free_irq(iommu->irq, iommu);
1029 destroy_irq(iommu->irq);
1030 }
1031
1032 kfree(iommu->domains);
1033 kfree(iommu->domain_ids);
1034
1035 /* free context mapping */
1036 free_context_table(iommu);
1037
1038 if (iommu->reg)
1039 iounmap(iommu->reg);
1040 kfree(iommu);
1041}
1042
1043static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu)
1044{
1045 unsigned long num;
1046 unsigned long ndomains;
1047 struct dmar_domain *domain;
1048 unsigned long flags;
1049
1050 domain = alloc_domain_mem();
1051 if (!domain)
1052 return NULL;
1053
1054 ndomains = cap_ndoms(iommu->cap);
1055
1056 spin_lock_irqsave(&iommu->lock, flags);
1057 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1058 if (num >= ndomains) {
1059 spin_unlock_irqrestore(&iommu->lock, flags);
1060 free_domain_mem(domain);
1061 printk(KERN_ERR "IOMMU: no free domain ids\n");
1062 return NULL;
1063 }
1064
1065 set_bit(num, iommu->domain_ids);
1066 domain->id = num;
1067 domain->iommu = iommu;
1068 iommu->domains[num] = domain;
1069 spin_unlock_irqrestore(&iommu->lock, flags);
1070
1071 return domain;
1072}
1073
1074static void iommu_free_domain(struct dmar_domain *domain)
1075{
1076 unsigned long flags;
1077
1078 spin_lock_irqsave(&domain->iommu->lock, flags);
1079 clear_bit(domain->id, domain->iommu->domain_ids);
1080 spin_unlock_irqrestore(&domain->iommu->lock, flags);
1081}
1082
1083static struct iova_domain reserved_iova_list;
1084
1085static void dmar_init_reserved_ranges(void)
1086{
1087 struct pci_dev *pdev = NULL;
1088 struct iova *iova;
1089 int i;
1090 u64 addr, size;
1091
1092 init_iova_domain(&reserved_iova_list);
1093
1094 /* IOAPIC ranges shouldn't be accessed by DMA */
1095 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1096 IOVA_PFN(IOAPIC_RANGE_END));
1097 if (!iova)
1098 printk(KERN_ERR "Reserve IOAPIC range failed\n");
1099
1100 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1101 for_each_pci_dev(pdev) {
1102 struct resource *r;
1103
1104 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1105 r = &pdev->resource[i];
1106 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1107 continue;
1108 addr = r->start;
1109 addr &= PAGE_MASK_4K;
1110 size = r->end - addr;
1111 size = PAGE_ALIGN_4K(size);
1112 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(addr),
1113 IOVA_PFN(size + addr) - 1);
1114 if (!iova)
1115 printk(KERN_ERR "Reserve iova failed\n");
1116 }
1117 }
1118
1119}
1120
1121static void domain_reserve_special_ranges(struct dmar_domain *domain)
1122{
1123 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1124}
1125
1126static inline int guestwidth_to_adjustwidth(int gaw)
1127{
1128 int agaw;
1129 int r = (gaw - 12) % 9;
1130
1131 if (r == 0)
1132 agaw = gaw;
1133 else
1134 agaw = gaw + 9 - r;
1135 if (agaw > 64)
1136 agaw = 64;
1137 return agaw;
1138}
1139
1140static int domain_init(struct dmar_domain *domain, int guest_width)
1141{
1142 struct intel_iommu *iommu;
1143 int adjust_width, agaw;
1144 unsigned long sagaw;
1145
1146 init_iova_domain(&domain->iovad);
1147 spin_lock_init(&domain->mapping_lock);
1148
1149 domain_reserve_special_ranges(domain);
1150
1151 /* calculate AGAW */
1152 iommu = domain->iommu;
1153 if (guest_width > cap_mgaw(iommu->cap))
1154 guest_width = cap_mgaw(iommu->cap);
1155 domain->gaw = guest_width;
1156 adjust_width = guestwidth_to_adjustwidth(guest_width);
1157 agaw = width_to_agaw(adjust_width);
1158 sagaw = cap_sagaw(iommu->cap);
1159 if (!test_bit(agaw, &sagaw)) {
1160 /* hardware doesn't support it, choose a bigger one */
1161 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1162 agaw = find_next_bit(&sagaw, 5, agaw);
1163 if (agaw >= 5)
1164 return -ENODEV;
1165 }
1166 domain->agaw = agaw;
1167 INIT_LIST_HEAD(&domain->devices);
1168
1169 /* always allocate the top pgd */
1170 domain->pgd = (struct dma_pte *)alloc_pgtable_page();
1171 if (!domain->pgd)
1172 return -ENOMEM;
1173 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE_4K);
1174 return 0;
1175}
1176
1177static void domain_exit(struct dmar_domain *domain)
1178{
1179 u64 end;
1180
1181 /* Domain 0 is reserved, so dont process it */
1182 if (!domain)
1183 return;
1184
1185 domain_remove_dev_info(domain);
1186 /* destroy iovas */
1187 put_iova_domain(&domain->iovad);
1188 end = DOMAIN_MAX_ADDR(domain->gaw);
1189 end = end & (~PAGE_MASK_4K);
1190
1191 /* clear ptes */
1192 dma_pte_clear_range(domain, 0, end);
1193
1194 /* free page tables */
1195 dma_pte_free_pagetable(domain, 0, end);
1196
1197 iommu_free_domain(domain);
1198 free_domain_mem(domain);
1199}
1200
1201static int domain_context_mapping_one(struct dmar_domain *domain,
1202 u8 bus, u8 devfn)
1203{
1204 struct context_entry *context;
1205 struct intel_iommu *iommu = domain->iommu;
1206 unsigned long flags;
1207
1208 pr_debug("Set context mapping for %02x:%02x.%d\n",
1209 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1210 BUG_ON(!domain->pgd);
1211 context = device_to_context_entry(iommu, bus, devfn);
1212 if (!context)
1213 return -ENOMEM;
1214 spin_lock_irqsave(&iommu->lock, flags);
1215 if (context_present(*context)) {
1216 spin_unlock_irqrestore(&iommu->lock, flags);
1217 return 0;
1218 }
1219
1220 context_set_domain_id(*context, domain->id);
1221 context_set_address_width(*context, domain->agaw);
1222 context_set_address_root(*context, virt_to_phys(domain->pgd));
1223 context_set_translation_type(*context, CONTEXT_TT_MULTI_LEVEL);
1224 context_set_fault_enable(*context);
1225 context_set_present(*context);
1226 __iommu_flush_cache(iommu, context, sizeof(*context));
1227
1228 /* it's a non-present to present mapping */
1229 if (iommu_flush_context_device(iommu, domain->id,
1230 (((u16)bus) << 8) | devfn, DMA_CCMD_MASK_NOBIT, 1))
1231 iommu_flush_write_buffer(iommu);
1232 else
1233 iommu_flush_iotlb_dsi(iommu, 0, 0);
1234 spin_unlock_irqrestore(&iommu->lock, flags);
1235 return 0;
1236}
1237
1238static int
1239domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev)
1240{
1241 int ret;
1242 struct pci_dev *tmp, *parent;
1243
1244 ret = domain_context_mapping_one(domain, pdev->bus->number,
1245 pdev->devfn);
1246 if (ret)
1247 return ret;
1248
1249 /* dependent device mapping */
1250 tmp = pci_find_upstream_pcie_bridge(pdev);
1251 if (!tmp)
1252 return 0;
1253 /* Secondary interface's bus number and devfn 0 */
1254 parent = pdev->bus->self;
1255 while (parent != tmp) {
1256 ret = domain_context_mapping_one(domain, parent->bus->number,
1257 parent->devfn);
1258 if (ret)
1259 return ret;
1260 parent = parent->bus->self;
1261 }
1262 if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */
1263 return domain_context_mapping_one(domain,
1264 tmp->subordinate->number, 0);
1265 else /* this is a legacy PCI bridge */
1266 return domain_context_mapping_one(domain,
1267 tmp->bus->number, tmp->devfn);
1268}
1269
1270static int domain_context_mapped(struct dmar_domain *domain,
1271 struct pci_dev *pdev)
1272{
1273 int ret;
1274 struct pci_dev *tmp, *parent;
1275
1276 ret = device_context_mapped(domain->iommu,
1277 pdev->bus->number, pdev->devfn);
1278 if (!ret)
1279 return ret;
1280 /* dependent device mapping */
1281 tmp = pci_find_upstream_pcie_bridge(pdev);
1282 if (!tmp)
1283 return ret;
1284 /* Secondary interface's bus number and devfn 0 */
1285 parent = pdev->bus->self;
1286 while (parent != tmp) {
1287 ret = device_context_mapped(domain->iommu, parent->bus->number,
1288 parent->devfn);
1289 if (!ret)
1290 return ret;
1291 parent = parent->bus->self;
1292 }
1293 if (tmp->is_pcie)
1294 return device_context_mapped(domain->iommu,
1295 tmp->subordinate->number, 0);
1296 else
1297 return device_context_mapped(domain->iommu,
1298 tmp->bus->number, tmp->devfn);
1299}
1300
1301static int
1302domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova,
1303 u64 hpa, size_t size, int prot)
1304{
1305 u64 start_pfn, end_pfn;
1306 struct dma_pte *pte;
1307 int index;
1308
1309 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1310 return -EINVAL;
1311 iova &= PAGE_MASK_4K;
1312 start_pfn = ((u64)hpa) >> PAGE_SHIFT_4K;
1313 end_pfn = (PAGE_ALIGN_4K(((u64)hpa) + size)) >> PAGE_SHIFT_4K;
1314 index = 0;
1315 while (start_pfn < end_pfn) {
1316 pte = addr_to_dma_pte(domain, iova + PAGE_SIZE_4K * index);
1317 if (!pte)
1318 return -ENOMEM;
1319 /* We don't need lock here, nobody else
1320 * touches the iova range
1321 */
1322 BUG_ON(dma_pte_addr(*pte));
1323 dma_set_pte_addr(*pte, start_pfn << PAGE_SHIFT_4K);
1324 dma_set_pte_prot(*pte, prot);
1325 __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
1326 start_pfn++;
1327 index++;
1328 }
1329 return 0;
1330}
1331
1332static void detach_domain_for_dev(struct dmar_domain *domain, u8 bus, u8 devfn)
1333{
1334 clear_context_table(domain->iommu, bus, devfn);
1335 iommu_flush_context_global(domain->iommu, 0);
1336 iommu_flush_iotlb_global(domain->iommu, 0);
1337}
1338
1339static void domain_remove_dev_info(struct dmar_domain *domain)
1340{
1341 struct device_domain_info *info;
1342 unsigned long flags;
1343
1344 spin_lock_irqsave(&device_domain_lock, flags);
1345 while (!list_empty(&domain->devices)) {
1346 info = list_entry(domain->devices.next,
1347 struct device_domain_info, link);
1348 list_del(&info->link);
1349 list_del(&info->global);
1350 if (info->dev)
1351 info->dev->sysdata = NULL;
1352 spin_unlock_irqrestore(&device_domain_lock, flags);
1353
1354 detach_domain_for_dev(info->domain, info->bus, info->devfn);
1355 free_devinfo_mem(info);
1356
1357 spin_lock_irqsave(&device_domain_lock, flags);
1358 }
1359 spin_unlock_irqrestore(&device_domain_lock, flags);
1360}
1361
1362/*
1363 * find_domain
1364 * Note: we use struct pci_dev->sysdata stores the info
1365 */
1366struct dmar_domain *
1367find_domain(struct pci_dev *pdev)
1368{
1369 struct device_domain_info *info;
1370
1371 /* No lock here, assumes no domain exit in normal case */
1372 info = pdev->sysdata;
1373 if (info)
1374 return info->domain;
1375 return NULL;
1376}
1377
1378static int dmar_pci_device_match(struct pci_dev *devices[], int cnt,
1379 struct pci_dev *dev)
1380{
1381 int index;
1382
1383 while (dev) {
1384 for (index = 0; index < cnt; index ++)
1385 if (dev == devices[index])
1386 return 1;
1387
1388 /* Check our parent */
1389 dev = dev->bus->self;
1390 }
1391
1392 return 0;
1393}
1394
1395static struct dmar_drhd_unit *
1396dmar_find_matched_drhd_unit(struct pci_dev *dev)
1397{
1398 struct dmar_drhd_unit *drhd = NULL;
1399
1400 list_for_each_entry(drhd, &dmar_drhd_units, list) {
1401 if (drhd->include_all || dmar_pci_device_match(drhd->devices,
1402 drhd->devices_cnt, dev))
1403 return drhd;
1404 }
1405
1406 return NULL;
1407}
1408
1409/* domain is initialized */
1410static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1411{
1412 struct dmar_domain *domain, *found = NULL;
1413 struct intel_iommu *iommu;
1414 struct dmar_drhd_unit *drhd;
1415 struct device_domain_info *info, *tmp;
1416 struct pci_dev *dev_tmp;
1417 unsigned long flags;
1418 int bus = 0, devfn = 0;
1419
1420 domain = find_domain(pdev);
1421 if (domain)
1422 return domain;
1423
1424 dev_tmp = pci_find_upstream_pcie_bridge(pdev);
1425 if (dev_tmp) {
1426 if (dev_tmp->is_pcie) {
1427 bus = dev_tmp->subordinate->number;
1428 devfn = 0;
1429 } else {
1430 bus = dev_tmp->bus->number;
1431 devfn = dev_tmp->devfn;
1432 }
1433 spin_lock_irqsave(&device_domain_lock, flags);
1434 list_for_each_entry(info, &device_domain_list, global) {
1435 if (info->bus == bus && info->devfn == devfn) {
1436 found = info->domain;
1437 break;
1438 }
1439 }
1440 spin_unlock_irqrestore(&device_domain_lock, flags);
1441 /* pcie-pci bridge already has a domain, uses it */
1442 if (found) {
1443 domain = found;
1444 goto found_domain;
1445 }
1446 }
1447
1448 /* Allocate new domain for the device */
1449 drhd = dmar_find_matched_drhd_unit(pdev);
1450 if (!drhd) {
1451 printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n",
1452 pci_name(pdev));
1453 return NULL;
1454 }
1455 iommu = drhd->iommu;
1456
1457 domain = iommu_alloc_domain(iommu);
1458 if (!domain)
1459 goto error;
1460
1461 if (domain_init(domain, gaw)) {
1462 domain_exit(domain);
1463 goto error;
1464 }
1465
1466 /* register pcie-to-pci device */
1467 if (dev_tmp) {
1468 info = alloc_devinfo_mem();
1469 if (!info) {
1470 domain_exit(domain);
1471 goto error;
1472 }
1473 info->bus = bus;
1474 info->devfn = devfn;
1475 info->dev = NULL;
1476 info->domain = domain;
1477 /* This domain is shared by devices under p2p bridge */
1478 domain->flags |= DOMAIN_FLAG_MULTIPLE_DEVICES;
1479
1480 /* pcie-to-pci bridge already has a domain, uses it */
1481 found = NULL;
1482 spin_lock_irqsave(&device_domain_lock, flags);
1483 list_for_each_entry(tmp, &device_domain_list, global) {
1484 if (tmp->bus == bus && tmp->devfn == devfn) {
1485 found = tmp->domain;
1486 break;
1487 }
1488 }
1489 if (found) {
1490 free_devinfo_mem(info);
1491 domain_exit(domain);
1492 domain = found;
1493 } else {
1494 list_add(&info->link, &domain->devices);
1495 list_add(&info->global, &device_domain_list);
1496 }
1497 spin_unlock_irqrestore(&device_domain_lock, flags);
1498 }
1499
1500found_domain:
1501 info = alloc_devinfo_mem();
1502 if (!info)
1503 goto error;
1504 info->bus = pdev->bus->number;
1505 info->devfn = pdev->devfn;
1506 info->dev = pdev;
1507 info->domain = domain;
1508 spin_lock_irqsave(&device_domain_lock, flags);
1509 /* somebody is fast */
1510 found = find_domain(pdev);
1511 if (found != NULL) {
1512 spin_unlock_irqrestore(&device_domain_lock, flags);
1513 if (found != domain) {
1514 domain_exit(domain);
1515 domain = found;
1516 }
1517 free_devinfo_mem(info);
1518 return domain;
1519 }
1520 list_add(&info->link, &domain->devices);
1521 list_add(&info->global, &device_domain_list);
1522 pdev->sysdata = info;
1523 spin_unlock_irqrestore(&device_domain_lock, flags);
1524 return domain;
1525error:
1526 /* recheck it here, maybe others set it */
1527 return find_domain(pdev);
1528}
1529
1530static int iommu_prepare_identity_map(struct pci_dev *pdev, u64 start, u64 end)
1531{
1532 struct dmar_domain *domain;
1533 unsigned long size;
1534 u64 base;
1535 int ret;
1536
1537 printk(KERN_INFO
1538 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
1539 pci_name(pdev), start, end);
1540 /* page table init */
1541 domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
1542 if (!domain)
1543 return -ENOMEM;
1544
1545 /* The address might not be aligned */
1546 base = start & PAGE_MASK_4K;
1547 size = end - base;
1548 size = PAGE_ALIGN_4K(size);
1549 if (!reserve_iova(&domain->iovad, IOVA_PFN(base),
1550 IOVA_PFN(base + size) - 1)) {
1551 printk(KERN_ERR "IOMMU: reserve iova failed\n");
1552 ret = -ENOMEM;
1553 goto error;
1554 }
1555
1556 pr_debug("Mapping reserved region %lx@%llx for %s\n",
1557 size, base, pci_name(pdev));
1558 /*
1559 * RMRR range might have overlap with physical memory range,
1560 * clear it first
1561 */
1562 dma_pte_clear_range(domain, base, base + size);
1563
1564 ret = domain_page_mapping(domain, base, base, size,
1565 DMA_PTE_READ|DMA_PTE_WRITE);
1566 if (ret)
1567 goto error;
1568
1569 /* context entry init */
1570 ret = domain_context_mapping(domain, pdev);
1571 if (!ret)
1572 return 0;
1573error:
1574 domain_exit(domain);
1575 return ret;
1576
1577}
1578
1579static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
1580 struct pci_dev *pdev)
1581{
1582 if (pdev->sysdata == DUMMY_DEVICE_DOMAIN_INFO)
1583 return 0;
1584 return iommu_prepare_identity_map(pdev, rmrr->base_address,
1585 rmrr->end_address + 1);
1586}
1587
Keshavamurthy, Anil Se8204822007-10-21 16:41:55 -07001588#ifdef CONFIG_DMAR_GFX_WA
1589extern int arch_get_ram_range(int slot, u64 *addr, u64 *size);
1590static void __init iommu_prepare_gfx_mapping(void)
1591{
1592 struct pci_dev *pdev = NULL;
1593 u64 base, size;
1594 int slot;
1595 int ret;
1596
1597 for_each_pci_dev(pdev) {
1598 if (pdev->sysdata == DUMMY_DEVICE_DOMAIN_INFO ||
1599 !IS_GFX_DEVICE(pdev))
1600 continue;
1601 printk(KERN_INFO "IOMMU: gfx device %s 1-1 mapping\n",
1602 pci_name(pdev));
1603 slot = arch_get_ram_range(0, &base, &size);
1604 while (slot >= 0) {
1605 ret = iommu_prepare_identity_map(pdev,
1606 base, base + size);
1607 if (ret)
1608 goto error;
1609 slot = arch_get_ram_range(slot, &base, &size);
1610 }
1611 continue;
1612error:
1613 printk(KERN_ERR "IOMMU: mapping reserved region failed\n");
1614 }
1615}
1616#endif
1617
Keshavamurthy, Anil S49a04292007-10-21 16:41:57 -07001618#ifdef CONFIG_DMAR_FLOPPY_WA
1619static inline void iommu_prepare_isa(void)
1620{
1621 struct pci_dev *pdev;
1622 int ret;
1623
1624 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
1625 if (!pdev)
1626 return;
1627
1628 printk(KERN_INFO "IOMMU: Prepare 0-16M unity mapping for LPC\n");
1629 ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024);
1630
1631 if (ret)
1632 printk("IOMMU: Failed to create 0-64M identity map, "
1633 "floppy might not work\n");
1634
1635}
1636#else
1637static inline void iommu_prepare_isa(void)
1638{
1639 return;
1640}
1641#endif /* !CONFIG_DMAR_FLPY_WA */
1642
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001643int __init init_dmars(void)
1644{
1645 struct dmar_drhd_unit *drhd;
1646 struct dmar_rmrr_unit *rmrr;
1647 struct pci_dev *pdev;
1648 struct intel_iommu *iommu;
1649 int ret, unit = 0;
1650
1651 /*
1652 * for each drhd
1653 * allocate root
1654 * initialize and program root entry to not present
1655 * endfor
1656 */
1657 for_each_drhd_unit(drhd) {
1658 if (drhd->ignored)
1659 continue;
1660 iommu = alloc_iommu(drhd);
1661 if (!iommu) {
1662 ret = -ENOMEM;
1663 goto error;
1664 }
1665
1666 /*
1667 * TBD:
1668 * we could share the same root & context tables
1669 * amoung all IOMMU's. Need to Split it later.
1670 */
1671 ret = iommu_alloc_root_entry(iommu);
1672 if (ret) {
1673 printk(KERN_ERR "IOMMU: allocate root entry failed\n");
1674 goto error;
1675 }
1676 }
1677
1678 /*
1679 * For each rmrr
1680 * for each dev attached to rmrr
1681 * do
1682 * locate drhd for dev, alloc domain for dev
1683 * allocate free domain
1684 * allocate page table entries for rmrr
1685 * if context not allocated for bus
1686 * allocate and init context
1687 * set present in root table for this bus
1688 * init context with domain, translation etc
1689 * endfor
1690 * endfor
1691 */
1692 for_each_rmrr_units(rmrr) {
1693 int i;
1694 for (i = 0; i < rmrr->devices_cnt; i++) {
1695 pdev = rmrr->devices[i];
1696 /* some BIOS lists non-exist devices in DMAR table */
1697 if (!pdev)
1698 continue;
1699 ret = iommu_prepare_rmrr_dev(rmrr, pdev);
1700 if (ret)
1701 printk(KERN_ERR
1702 "IOMMU: mapping reserved region failed\n");
1703 }
1704 }
1705
Keshavamurthy, Anil Se8204822007-10-21 16:41:55 -07001706 iommu_prepare_gfx_mapping();
1707
Keshavamurthy, Anil S49a04292007-10-21 16:41:57 -07001708 iommu_prepare_isa();
1709
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001710 /*
1711 * for each drhd
1712 * enable fault log
1713 * global invalidate context cache
1714 * global invalidate iotlb
1715 * enable translation
1716 */
1717 for_each_drhd_unit(drhd) {
1718 if (drhd->ignored)
1719 continue;
1720 iommu = drhd->iommu;
1721 sprintf (iommu->name, "dmar%d", unit++);
1722
1723 iommu_flush_write_buffer(iommu);
1724
Keshavamurthy, Anil S3460a6d2007-10-21 16:41:54 -07001725 ret = dmar_set_interrupt(iommu);
1726 if (ret)
1727 goto error;
1728
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001729 iommu_set_root_entry(iommu);
1730
1731 iommu_flush_context_global(iommu, 0);
1732 iommu_flush_iotlb_global(iommu, 0);
1733
1734 ret = iommu_enable_translation(iommu);
1735 if (ret)
1736 goto error;
1737 }
1738
1739 return 0;
1740error:
1741 for_each_drhd_unit(drhd) {
1742 if (drhd->ignored)
1743 continue;
1744 iommu = drhd->iommu;
1745 free_iommu(iommu);
1746 }
1747 return ret;
1748}
1749
1750static inline u64 aligned_size(u64 host_addr, size_t size)
1751{
1752 u64 addr;
1753 addr = (host_addr & (~PAGE_MASK_4K)) + size;
1754 return PAGE_ALIGN_4K(addr);
1755}
1756
1757struct iova *
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07001758iommu_alloc_iova(struct dmar_domain *domain, size_t size, u64 end)
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001759{
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001760 struct iova *piova;
1761
1762 /* Make sure it's in range */
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001763 end = min_t(u64, DOMAIN_MAX_ADDR(domain->gaw), end);
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07001764 if (!size || (IOVA_START_ADDR + size > end))
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001765 return NULL;
1766
1767 piova = alloc_iova(&domain->iovad,
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07001768 size >> PAGE_SHIFT_4K, IOVA_PFN(end), 1);
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001769 return piova;
1770}
1771
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07001772static struct iova *
1773__intel_alloc_iova(struct device *dev, struct dmar_domain *domain,
1774 size_t size)
1775{
1776 struct pci_dev *pdev = to_pci_dev(dev);
1777 struct iova *iova = NULL;
1778
1779 if ((pdev->dma_mask <= DMA_32BIT_MASK) || (dmar_forcedac)) {
1780 iova = iommu_alloc_iova(domain, size, pdev->dma_mask);
1781 } else {
1782 /*
1783 * First try to allocate an io virtual address in
1784 * DMA_32BIT_MASK and if that fails then try allocating
1785 * from higer range
1786 */
1787 iova = iommu_alloc_iova(domain, size, DMA_32BIT_MASK);
1788 if (!iova)
1789 iova = iommu_alloc_iova(domain, size, pdev->dma_mask);
1790 }
1791
1792 if (!iova) {
1793 printk(KERN_ERR"Allocating iova for %s failed", pci_name(pdev));
1794 return NULL;
1795 }
1796
1797 return iova;
1798}
1799
1800static struct dmar_domain *
1801get_valid_domain_for_dev(struct pci_dev *pdev)
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001802{
1803 struct dmar_domain *domain;
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001804 int ret;
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001805
1806 domain = get_domain_for_dev(pdev,
1807 DEFAULT_DOMAIN_ADDRESS_WIDTH);
1808 if (!domain) {
1809 printk(KERN_ERR
1810 "Allocating domain for %s failed", pci_name(pdev));
1811 return 0;
1812 }
1813
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001814 /* make sure context mapping is ok */
1815 if (unlikely(!domain_context_mapped(domain, pdev))) {
1816 ret = domain_context_mapping(domain, pdev);
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07001817 if (ret) {
1818 printk(KERN_ERR
1819 "Domain context map for %s failed",
1820 pci_name(pdev));
1821 return 0;
1822 }
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001823 }
1824
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07001825 return domain;
1826}
1827
1828static dma_addr_t intel_map_single(struct device *hwdev, void *addr,
1829 size_t size, int dir)
1830{
1831 struct pci_dev *pdev = to_pci_dev(hwdev);
1832 int ret;
1833 struct dmar_domain *domain;
1834 unsigned long start_addr;
1835 struct iova *iova;
1836 int prot = 0;
1837
1838 BUG_ON(dir == DMA_NONE);
1839 if (pdev->sysdata == DUMMY_DEVICE_DOMAIN_INFO)
1840 return virt_to_bus(addr);
1841
1842 domain = get_valid_domain_for_dev(pdev);
1843 if (!domain)
1844 return 0;
1845
1846 addr = (void *)virt_to_phys(addr);
1847 size = aligned_size((u64)addr, size);
1848
1849 iova = __intel_alloc_iova(hwdev, domain, size);
1850 if (!iova)
1851 goto error;
1852
1853 start_addr = iova->pfn_lo << PAGE_SHIFT_4K;
1854
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001855 /*
1856 * Check if DMAR supports zero-length reads on write only
1857 * mappings..
1858 */
1859 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
1860 !cap_zlr(domain->iommu->cap))
1861 prot |= DMA_PTE_READ;
1862 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
1863 prot |= DMA_PTE_WRITE;
1864 /*
1865 * addr - (addr + size) might be partial page, we should map the whole
1866 * page. Note: if two part of one page are separately mapped, we
1867 * might have two guest_addr mapping to the same host addr, but this
1868 * is not a big problem
1869 */
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07001870 ret = domain_page_mapping(domain, start_addr,
1871 ((u64)addr) & PAGE_MASK_4K, size, prot);
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001872 if (ret)
1873 goto error;
1874
1875 pr_debug("Device %s request: %lx@%llx mapping: %lx@%llx, dir %d\n",
1876 pci_name(pdev), size, (u64)addr,
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07001877 size, (u64)start_addr, dir);
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001878
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07001879 /* it's a non-present to present mapping */
1880 ret = iommu_flush_iotlb_psi(domain->iommu, domain->id,
1881 start_addr, size >> PAGE_SHIFT_4K, 1);
1882 if (ret)
1883 iommu_flush_write_buffer(domain->iommu);
1884
1885 return (start_addr + ((u64)addr & (~PAGE_MASK_4K)));
1886
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001887error:
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07001888 if (iova)
1889 __free_iova(&domain->iovad, iova);
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001890 printk(KERN_ERR"Device %s request: %lx@%llx dir %d --- failed\n",
1891 pci_name(pdev), size, (u64)addr, dir);
1892 return 0;
1893}
1894
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001895static void intel_unmap_single(struct device *dev, dma_addr_t dev_addr,
1896 size_t size, int dir)
1897{
1898 struct pci_dev *pdev = to_pci_dev(dev);
1899 struct dmar_domain *domain;
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07001900 unsigned long start_addr;
1901 struct iova *iova;
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001902
1903 if (pdev->sysdata == DUMMY_DEVICE_DOMAIN_INFO)
1904 return;
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001905 domain = find_domain(pdev);
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07001906 BUG_ON(!domain);
1907
1908 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
1909 if (!iova)
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001910 return;
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07001911
1912 start_addr = iova->pfn_lo << PAGE_SHIFT_4K;
1913 size = aligned_size((u64)dev_addr, size);
1914
1915 pr_debug("Device %s unmapping: %lx@%llx\n",
1916 pci_name(pdev), size, (u64)start_addr);
1917
1918 /* clear the whole page */
1919 dma_pte_clear_range(domain, start_addr, start_addr + size);
1920 /* free page tables */
1921 dma_pte_free_pagetable(domain, start_addr, start_addr + size);
1922
1923 if (iommu_flush_iotlb_psi(domain->iommu, domain->id, start_addr,
1924 size >> PAGE_SHIFT_4K, 0))
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001925 iommu_flush_write_buffer(domain->iommu);
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07001926
1927 /* free iova */
1928 __free_iova(&domain->iovad, iova);
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001929}
1930
1931static void * intel_alloc_coherent(struct device *hwdev, size_t size,
1932 dma_addr_t *dma_handle, gfp_t flags)
1933{
1934 void *vaddr;
1935 int order;
1936
1937 size = PAGE_ALIGN_4K(size);
1938 order = get_order(size);
1939 flags &= ~(GFP_DMA | GFP_DMA32);
1940
1941 vaddr = (void *)__get_free_pages(flags, order);
1942 if (!vaddr)
1943 return NULL;
1944 memset(vaddr, 0, size);
1945
1946 *dma_handle = intel_map_single(hwdev, vaddr, size, DMA_BIDIRECTIONAL);
1947 if (*dma_handle)
1948 return vaddr;
1949 free_pages((unsigned long)vaddr, order);
1950 return NULL;
1951}
1952
1953static void intel_free_coherent(struct device *hwdev, size_t size,
1954 void *vaddr, dma_addr_t dma_handle)
1955{
1956 int order;
1957
1958 size = PAGE_ALIGN_4K(size);
1959 order = get_order(size);
1960
1961 intel_unmap_single(hwdev, dma_handle, size, DMA_BIDIRECTIONAL);
1962 free_pages((unsigned long)vaddr, order);
1963}
1964
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07001965#define SG_ENT_VIRT_ADDRESS(sg) (page_address((sg)->page) + (sg)->offset)
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001966static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sg,
1967 int nelems, int dir)
1968{
1969 int i;
1970 struct pci_dev *pdev = to_pci_dev(hwdev);
1971 struct dmar_domain *domain;
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07001972 unsigned long start_addr;
1973 struct iova *iova;
1974 size_t size = 0;
1975 void *addr;
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001976
1977 if (pdev->sysdata == DUMMY_DEVICE_DOMAIN_INFO)
1978 return;
1979
1980 domain = find_domain(pdev);
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001981
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07001982 iova = find_iova(&domain->iovad, IOVA_PFN(sg[0].dma_address));
1983 if (!iova)
1984 return;
1985 for (i = 0; i < nelems; i++, sg++) {
1986 addr = SG_ENT_VIRT_ADDRESS(sg);
1987 size += aligned_size((u64)addr, sg->length);
1988 }
1989
1990 start_addr = iova->pfn_lo << PAGE_SHIFT_4K;
1991
1992 /* clear the whole page */
1993 dma_pte_clear_range(domain, start_addr, start_addr + size);
1994 /* free page tables */
1995 dma_pte_free_pagetable(domain, start_addr, start_addr + size);
1996
1997 if (iommu_flush_iotlb_psi(domain->iommu, domain->id, start_addr,
1998 size >> PAGE_SHIFT_4K, 0))
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001999 iommu_flush_write_buffer(domain->iommu);
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07002000
2001 /* free iova */
2002 __free_iova(&domain->iovad, iova);
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07002003}
2004
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07002005static int intel_nontranslate_map_sg(struct device *hddev,
2006 struct scatterlist *sg, int nelems, int dir)
2007{
2008 int i;
2009
2010 for (i = 0; i < nelems; i++) {
2011 struct scatterlist *s = &sg[i];
2012 BUG_ON(!s->page);
2013 s->dma_address = virt_to_bus(SG_ENT_VIRT_ADDRESS(s));
2014 s->dma_length = s->length;
2015 }
2016 return nelems;
2017}
2018
2019static int intel_map_sg(struct device *hwdev, struct scatterlist *sg,
2020 int nelems, int dir)
2021{
2022 void *addr;
2023 int i;
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07002024 struct pci_dev *pdev = to_pci_dev(hwdev);
2025 struct dmar_domain *domain;
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07002026 size_t size = 0;
2027 int prot = 0;
2028 size_t offset = 0;
2029 struct iova *iova = NULL;
2030 int ret;
2031 struct scatterlist *orig_sg = sg;
2032 unsigned long start_addr;
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07002033
2034 BUG_ON(dir == DMA_NONE);
2035 if (pdev->sysdata == DUMMY_DEVICE_DOMAIN_INFO)
2036 return intel_nontranslate_map_sg(hwdev, sg, nelems, dir);
2037
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07002038 domain = get_valid_domain_for_dev(pdev);
2039 if (!domain)
2040 return 0;
2041
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07002042 for (i = 0; i < nelems; i++, sg++) {
2043 addr = SG_ENT_VIRT_ADDRESS(sg);
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07002044 addr = (void *)virt_to_phys(addr);
2045 size += aligned_size((u64)addr, sg->length);
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07002046 }
2047
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07002048 iova = __intel_alloc_iova(hwdev, domain, size);
2049 if (!iova) {
2050 orig_sg->dma_length = 0;
2051 return 0;
2052 }
2053
2054 /*
2055 * Check if DMAR supports zero-length reads on write only
2056 * mappings..
2057 */
2058 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
2059 !cap_zlr(domain->iommu->cap))
2060 prot |= DMA_PTE_READ;
2061 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2062 prot |= DMA_PTE_WRITE;
2063
2064 start_addr = iova->pfn_lo << PAGE_SHIFT_4K;
2065 offset = 0;
2066 sg = orig_sg;
2067 for (i = 0; i < nelems; i++, sg++) {
2068 addr = SG_ENT_VIRT_ADDRESS(sg);
2069 addr = (void *)virt_to_phys(addr);
2070 size = aligned_size((u64)addr, sg->length);
2071 ret = domain_page_mapping(domain, start_addr + offset,
2072 ((u64)addr) & PAGE_MASK_4K,
2073 size, prot);
2074 if (ret) {
2075 /* clear the page */
2076 dma_pte_clear_range(domain, start_addr,
2077 start_addr + offset);
2078 /* free page tables */
2079 dma_pte_free_pagetable(domain, start_addr,
2080 start_addr + offset);
2081 /* free iova */
2082 __free_iova(&domain->iovad, iova);
2083 return 0;
2084 }
2085 sg->dma_address = start_addr + offset +
2086 ((u64)addr & (~PAGE_MASK_4K));
2087 sg->dma_length = sg->length;
2088 offset += size;
2089 }
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07002090
2091 /* it's a non-present to present mapping */
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07002092 if (iommu_flush_iotlb_psi(domain->iommu, domain->id,
2093 start_addr, offset >> PAGE_SHIFT_4K, 1))
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07002094 iommu_flush_write_buffer(domain->iommu);
2095 return nelems;
2096}
2097
2098static struct dma_mapping_ops intel_dma_ops = {
2099 .alloc_coherent = intel_alloc_coherent,
2100 .free_coherent = intel_free_coherent,
2101 .map_single = intel_map_single,
2102 .unmap_single = intel_unmap_single,
2103 .map_sg = intel_map_sg,
2104 .unmap_sg = intel_unmap_sg,
2105};
2106
2107static inline int iommu_domain_cache_init(void)
2108{
2109 int ret = 0;
2110
2111 iommu_domain_cache = kmem_cache_create("iommu_domain",
2112 sizeof(struct dmar_domain),
2113 0,
2114 SLAB_HWCACHE_ALIGN,
2115
2116 NULL);
2117 if (!iommu_domain_cache) {
2118 printk(KERN_ERR "Couldn't create iommu_domain cache\n");
2119 ret = -ENOMEM;
2120 }
2121
2122 return ret;
2123}
2124
2125static inline int iommu_devinfo_cache_init(void)
2126{
2127 int ret = 0;
2128
2129 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
2130 sizeof(struct device_domain_info),
2131 0,
2132 SLAB_HWCACHE_ALIGN,
2133
2134 NULL);
2135 if (!iommu_devinfo_cache) {
2136 printk(KERN_ERR "Couldn't create devinfo cache\n");
2137 ret = -ENOMEM;
2138 }
2139
2140 return ret;
2141}
2142
2143static inline int iommu_iova_cache_init(void)
2144{
2145 int ret = 0;
2146
2147 iommu_iova_cache = kmem_cache_create("iommu_iova",
2148 sizeof(struct iova),
2149 0,
2150 SLAB_HWCACHE_ALIGN,
2151
2152 NULL);
2153 if (!iommu_iova_cache) {
2154 printk(KERN_ERR "Couldn't create iova cache\n");
2155 ret = -ENOMEM;
2156 }
2157
2158 return ret;
2159}
2160
2161static int __init iommu_init_mempool(void)
2162{
2163 int ret;
2164 ret = iommu_iova_cache_init();
2165 if (ret)
2166 return ret;
2167
2168 ret = iommu_domain_cache_init();
2169 if (ret)
2170 goto domain_error;
2171
2172 ret = iommu_devinfo_cache_init();
2173 if (!ret)
2174 return ret;
2175
2176 kmem_cache_destroy(iommu_domain_cache);
2177domain_error:
2178 kmem_cache_destroy(iommu_iova_cache);
2179
2180 return -ENOMEM;
2181}
2182
2183static void __init iommu_exit_mempool(void)
2184{
2185 kmem_cache_destroy(iommu_devinfo_cache);
2186 kmem_cache_destroy(iommu_domain_cache);
2187 kmem_cache_destroy(iommu_iova_cache);
2188
2189}
2190
2191void __init detect_intel_iommu(void)
2192{
2193 if (swiotlb || no_iommu || iommu_detected || dmar_disabled)
2194 return;
2195 if (early_dmar_detect()) {
2196 iommu_detected = 1;
2197 }
2198}
2199
2200static void __init init_no_remapping_devices(void)
2201{
2202 struct dmar_drhd_unit *drhd;
2203
2204 for_each_drhd_unit(drhd) {
2205 if (!drhd->include_all) {
2206 int i;
2207 for (i = 0; i < drhd->devices_cnt; i++)
2208 if (drhd->devices[i] != NULL)
2209 break;
2210 /* ignore DMAR unit if no pci devices exist */
2211 if (i == drhd->devices_cnt)
2212 drhd->ignored = 1;
2213 }
2214 }
2215
2216 if (dmar_map_gfx)
2217 return;
2218
2219 for_each_drhd_unit(drhd) {
2220 int i;
2221 if (drhd->ignored || drhd->include_all)
2222 continue;
2223
2224 for (i = 0; i < drhd->devices_cnt; i++)
2225 if (drhd->devices[i] &&
2226 !IS_GFX_DEVICE(drhd->devices[i]))
2227 break;
2228
2229 if (i < drhd->devices_cnt)
2230 continue;
2231
2232 /* bypass IOMMU if it is just for gfx devices */
2233 drhd->ignored = 1;
2234 for (i = 0; i < drhd->devices_cnt; i++) {
2235 if (!drhd->devices[i])
2236 continue;
2237 drhd->devices[i]->sysdata = DUMMY_DEVICE_DOMAIN_INFO;
2238 }
2239 }
2240}
2241
2242int __init intel_iommu_init(void)
2243{
2244 int ret = 0;
2245
2246 if (no_iommu || swiotlb || dmar_disabled)
2247 return -ENODEV;
2248
2249 if (dmar_table_init())
2250 return -ENODEV;
2251
2252 iommu_init_mempool();
2253 dmar_init_reserved_ranges();
2254
2255 init_no_remapping_devices();
2256
2257 ret = init_dmars();
2258 if (ret) {
2259 printk(KERN_ERR "IOMMU: dmar init failed\n");
2260 put_iova_domain(&reserved_iova_list);
2261 iommu_exit_mempool();
2262 return ret;
2263 }
2264 printk(KERN_INFO
2265 "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
2266
2267 force_iommu = 1;
2268 dma_ops = &intel_dma_ops;
2269 return 0;
2270}
Keshavamurthy, Anil Se8204822007-10-21 16:41:55 -07002271