blob: 2168714144348c2c9de38b1cee941a79f5fbc29b [file] [log] [blame]
Thomas Gleixner1a59d1b82019-05-27 08:55:05 +02001// SPDX-License-Identifier: GPL-2.0-or-later
Linus Torvalds1da177e2005-04-16 15:20:36 -07002/*
Linus Torvalds1da177e2005-04-16 15:20:36 -07003 * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation
4 *
5 * Rewrite, cleanup, new allocation schemes, virtual merging:
6 * Copyright (C) 2004 Olof Johansson, IBM Corporation
7 * and Ben. Herrenschmidt, IBM Corporation
8 *
9 * Dynamic DMA mapping support, bus-independent parts.
Linus Torvalds1da177e2005-04-16 15:20:36 -070010 */
11
12
Linus Torvalds1da177e2005-04-16 15:20:36 -070013#include <linux/init.h>
14#include <linux/types.h>
15#include <linux/slab.h>
16#include <linux/mm.h>
17#include <linux/spinlock.h>
18#include <linux/string.h>
19#include <linux/dma-mapping.h>
Akinobu Mitaa66022c2009-12-15 16:48:28 -080020#include <linux/bitmap.h>
FUJITA Tomonorifb3475e2008-02-04 22:28:08 -080021#include <linux/iommu-helper.h>
Milton Miller62a8bd62008-10-22 15:39:04 -050022#include <linux/crash_dump.h>
Anton Blanchardb4c3a872012-06-07 18:14:48 +000023#include <linux/hash.h>
Anton Blanchardd6b9a812012-06-24 18:26:17 +000024#include <linux/fault-inject.h>
25#include <linux/pci.h>
Alexey Kardashevskiy4e13c1a2013-05-21 13:33:09 +100026#include <linux/iommu.h>
27#include <linux/sched.h>
Alexey Kardashevskiy691602a2021-01-13 21:20:14 +110028#include <linux/debugfs.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070029#include <asm/io.h>
30#include <asm/prom.h>
31#include <asm/iommu.h>
32#include <asm/pci-bridge.h>
33#include <asm/machdep.h>
Haren Myneni5f508672006-06-22 23:35:10 -070034#include <asm/kdump.h>
Mahesh Salgaonkar3ccc00a2012-02-20 02:15:03 +000035#include <asm/fadump.h>
Anton Blanchardd6b9a812012-06-24 18:26:17 +000036#include <asm/vio.h>
Alexey Kardashevskiy4e13c1a2013-05-21 13:33:09 +100037#include <asm/tce.h>
Alexey Kardashevskiyc10c21e2018-12-19 19:52:15 +110038#include <asm/mmu_context.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070039
40#define DBG(...)
41
Alexey Kardashevskiy691602a2021-01-13 21:20:14 +110042#ifdef CONFIG_IOMMU_DEBUGFS
43static int iommu_debugfs_weight_get(void *data, u64 *val)
44{
45 struct iommu_table *tbl = data;
46 *val = bitmap_weight(tbl->it_map, tbl->it_size);
47 return 0;
48}
49DEFINE_DEBUGFS_ATTRIBUTE(iommu_debugfs_fops_weight, iommu_debugfs_weight_get, NULL, "%llu\n");
50
51static void iommu_debugfs_add(struct iommu_table *tbl)
52{
53 char name[10];
54 struct dentry *liobn_entry;
55
56 sprintf(name, "%08lx", tbl->it_index);
57 liobn_entry = debugfs_create_dir(name, iommu_debugfs_dir);
58
59 debugfs_create_file_unsafe("weight", 0400, liobn_entry, tbl, &iommu_debugfs_fops_weight);
60 debugfs_create_ulong("it_size", 0400, liobn_entry, &tbl->it_size);
61 debugfs_create_ulong("it_page_shift", 0400, liobn_entry, &tbl->it_page_shift);
62 debugfs_create_ulong("it_reserved_start", 0400, liobn_entry, &tbl->it_reserved_start);
63 debugfs_create_ulong("it_reserved_end", 0400, liobn_entry, &tbl->it_reserved_end);
64 debugfs_create_ulong("it_indirect_levels", 0400, liobn_entry, &tbl->it_indirect_levels);
65 debugfs_create_ulong("it_level_size", 0400, liobn_entry, &tbl->it_level_size);
66}
67
68static void iommu_debugfs_del(struct iommu_table *tbl)
69{
70 char name[10];
71 struct dentry *liobn_entry;
72
73 sprintf(name, "%08lx", tbl->it_index);
74 liobn_entry = debugfs_lookup(name, iommu_debugfs_dir);
kernel test robotbbbe5632021-03-19 07:44:41 +080075 debugfs_remove(liobn_entry);
Alexey Kardashevskiy691602a2021-01-13 21:20:14 +110076}
77#else
78static void iommu_debugfs_add(struct iommu_table *tbl){}
79static void iommu_debugfs_del(struct iommu_table *tbl){}
80#endif
81
FUJITA Tomonori191aee52010-03-02 14:25:38 +000082static int novmerge;
Jake Moilanen56997552007-03-29 08:44:02 -050083
Robert Jennings6490c492008-07-24 04:31:16 +100084static void __iommu_free(struct iommu_table *, dma_addr_t, unsigned int);
85
Linus Torvalds1da177e2005-04-16 15:20:36 -070086static int __init setup_iommu(char *str)
87{
88 if (!strcmp(str, "novmerge"))
89 novmerge = 1;
90 else if (!strcmp(str, "vmerge"))
91 novmerge = 0;
92 return 1;
93}
94
95__setup("iommu=", setup_iommu);
96
Anton Blanchardb4c3a872012-06-07 18:14:48 +000097static DEFINE_PER_CPU(unsigned int, iommu_pool_hash);
98
99/*
100 * We precalculate the hash to avoid doing it on every allocation.
101 *
102 * The hash is important to spread CPUs across all the pools. For example,
103 * on a POWER7 with 4 way SMT we want interrupts on the primary threads and
104 * with 4 pools all primary threads would map to the same pool.
105 */
106static int __init setup_iommu_pool_hash(void)
107{
108 unsigned int i;
109
110 for_each_possible_cpu(i)
111 per_cpu(iommu_pool_hash, i) = hash_32(i, IOMMU_POOL_HASHBITS);
112
113 return 0;
114}
115subsys_initcall(setup_iommu_pool_hash);
116
Anton Blanchardd6b9a812012-06-24 18:26:17 +0000117#ifdef CONFIG_FAIL_IOMMU
118
119static DECLARE_FAULT_ATTR(fail_iommu);
120
121static int __init setup_fail_iommu(char *str)
122{
123 return setup_fault_attr(&fail_iommu, str);
124}
125__setup("fail_iommu=", setup_fail_iommu);
126
127static bool should_fail_iommu(struct device *dev)
128{
129 return dev->archdata.fail_iommu && should_fail(&fail_iommu, 1);
130}
131
132static int __init fail_iommu_debugfs(void)
133{
134 struct dentry *dir = fault_create_debugfs_attr("fail_iommu",
135 NULL, &fail_iommu);
136
Rusty Russell8c6ffba2013-07-15 11:20:32 +0930137 return PTR_ERR_OR_ZERO(dir);
Anton Blanchardd6b9a812012-06-24 18:26:17 +0000138}
139late_initcall(fail_iommu_debugfs);
140
141static ssize_t fail_iommu_show(struct device *dev,
142 struct device_attribute *attr, char *buf)
143{
144 return sprintf(buf, "%d\n", dev->archdata.fail_iommu);
145}
146
147static ssize_t fail_iommu_store(struct device *dev,
148 struct device_attribute *attr, const char *buf,
149 size_t count)
150{
151 int i;
152
153 if (count > 0 && sscanf(buf, "%d", &i) > 0)
154 dev->archdata.fail_iommu = (i == 0) ? 0 : 1;
155
156 return count;
157}
158
Julia Lawall8a7aef22016-10-29 21:37:02 +0200159static DEVICE_ATTR_RW(fail_iommu);
Anton Blanchardd6b9a812012-06-24 18:26:17 +0000160
161static int fail_iommu_bus_notify(struct notifier_block *nb,
162 unsigned long action, void *data)
163{
164 struct device *dev = data;
165
166 if (action == BUS_NOTIFY_ADD_DEVICE) {
167 if (device_create_file(dev, &dev_attr_fail_iommu))
168 pr_warn("Unable to create IOMMU fault injection sysfs "
169 "entries\n");
170 } else if (action == BUS_NOTIFY_DEL_DEVICE) {
171 device_remove_file(dev, &dev_attr_fail_iommu);
172 }
173
174 return 0;
175}
176
177static struct notifier_block fail_iommu_bus_notifier = {
178 .notifier_call = fail_iommu_bus_notify
179};
180
181static int __init fail_iommu_setup(void)
182{
183#ifdef CONFIG_PCI
184 bus_register_notifier(&pci_bus_type, &fail_iommu_bus_notifier);
185#endif
186#ifdef CONFIG_IBMVIO
187 bus_register_notifier(&vio_bus_type, &fail_iommu_bus_notifier);
188#endif
189
190 return 0;
191}
192/*
193 * Must execute after PCI and VIO subsystem have initialised but before
194 * devices are probed.
195 */
196arch_initcall(fail_iommu_setup);
197#else
198static inline bool should_fail_iommu(struct device *dev)
199{
200 return false;
201}
202#endif
203
FUJITA Tomonorifb3475e2008-02-04 22:28:08 -0800204static unsigned long iommu_range_alloc(struct device *dev,
205 struct iommu_table *tbl,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700206 unsigned long npages,
207 unsigned long *handle,
Olof Johansson7daa4112006-04-12 21:05:59 -0500208 unsigned long mask,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700209 unsigned int align_order)
210{
FUJITA Tomonorifb3475e2008-02-04 22:28:08 -0800211 unsigned long n, end, start;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700212 unsigned long limit;
213 int largealloc = npages > 15;
214 int pass = 0;
215 unsigned long align_mask;
Anton Blanchardd3622132012-06-03 19:44:25 +0000216 unsigned long flags;
Anton Blanchardb4c3a872012-06-07 18:14:48 +0000217 unsigned int pool_nr;
218 struct iommu_pool *pool;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700219
Michael Ellerman63b85622017-08-08 17:06:32 +1000220 align_mask = (1ull << align_order) - 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700221
222 /* This allocator was derived from x86_64's bit string search */
223
224 /* Sanity check */
Nick Piggin13a2eea2006-10-04 17:25:44 +0200225 if (unlikely(npages == 0)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700226 if (printk_ratelimit())
227 WARN_ON(1);
Christoph Hellwigd11e3d32018-11-21 18:56:25 +0100228 return DMA_MAPPING_ERROR;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700229 }
230
Anton Blanchardd6b9a812012-06-24 18:26:17 +0000231 if (should_fail_iommu(dev))
Christoph Hellwigd11e3d32018-11-21 18:56:25 +0100232 return DMA_MAPPING_ERROR;
Anton Blanchardd6b9a812012-06-24 18:26:17 +0000233
Anton Blanchardb4c3a872012-06-07 18:14:48 +0000234 /*
235 * We don't need to disable preemption here because any CPU can
236 * safely use any IOMMU pool.
237 */
Victor Aoqui75f327c2017-07-20 14:26:06 -0300238 pool_nr = raw_cpu_read(iommu_pool_hash) & (tbl->nr_pools - 1);
Anton Blanchardd3622132012-06-03 19:44:25 +0000239
Anton Blanchardb4c3a872012-06-07 18:14:48 +0000240 if (largealloc)
241 pool = &(tbl->large_pool);
242 else
243 pool = &(tbl->pools[pool_nr]);
244
245 spin_lock_irqsave(&(pool->lock), flags);
246
247again:
Anton Blanchardd900bd72012-10-03 18:57:10 +0000248 if ((pass == 0) && handle && *handle &&
249 (*handle >= pool->start) && (*handle < pool->end))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700250 start = *handle;
251 else
Anton Blanchardb4c3a872012-06-07 18:14:48 +0000252 start = pool->hint;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700253
Anton Blanchardb4c3a872012-06-07 18:14:48 +0000254 limit = pool->end;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700255
256 /* The case below can happen if we have a small segment appended
257 * to a large, or when the previous alloc was at the very end of
258 * the available space. If so, go back to the initial start.
259 */
260 if (start >= limit)
Anton Blanchardb4c3a872012-06-07 18:14:48 +0000261 start = pool->start;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700262
Olof Johansson7daa4112006-04-12 21:05:59 -0500263 if (limit + tbl->it_offset > mask) {
264 limit = mask - tbl->it_offset + 1;
265 /* If we're constrained on address range, first try
266 * at the masked hint to avoid O(n) search complexity,
Anton Blanchardb4c3a872012-06-07 18:14:48 +0000267 * but on second pass, start at 0 in pool 0.
Olof Johansson7daa4112006-04-12 21:05:59 -0500268 */
Anton Blanchardb4c3a872012-06-07 18:14:48 +0000269 if ((start & mask) >= limit || pass > 0) {
Anton Blanchardd900bd72012-10-03 18:57:10 +0000270 spin_unlock(&(pool->lock));
Anton Blanchardb4c3a872012-06-07 18:14:48 +0000271 pool = &(tbl->pools[0]);
Anton Blanchardd900bd72012-10-03 18:57:10 +0000272 spin_lock(&(pool->lock));
Anton Blanchardb4c3a872012-06-07 18:14:48 +0000273 start = pool->start;
274 } else {
Olof Johansson7daa4112006-04-12 21:05:59 -0500275 start &= mask;
Anton Blanchardb4c3a872012-06-07 18:14:48 +0000276 }
Olof Johansson7daa4112006-04-12 21:05:59 -0500277 }
278
Alistair Poppled0847752013-12-09 18:17:03 +1100279 n = iommu_area_alloc(tbl->it_map, limit, start, npages, tbl->it_offset,
Nicolin Chen1e9d90d2020-09-01 15:16:45 -0700280 dma_get_seg_boundary_nr_pages(dev, tbl->it_page_shift),
281 align_mask);
FUJITA Tomonorifb3475e2008-02-04 22:28:08 -0800282 if (n == -1) {
Anton Blanchardb4c3a872012-06-07 18:14:48 +0000283 if (likely(pass == 0)) {
284 /* First try the pool from the start */
285 pool->hint = pool->start;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700286 pass++;
287 goto again;
Anton Blanchardb4c3a872012-06-07 18:14:48 +0000288
289 } else if (pass <= tbl->nr_pools) {
290 /* Now try scanning all the other pools */
291 spin_unlock(&(pool->lock));
292 pool_nr = (pool_nr + 1) & (tbl->nr_pools - 1);
293 pool = &tbl->pools[pool_nr];
294 spin_lock(&(pool->lock));
295 pool->hint = pool->start;
296 pass++;
297 goto again;
298
Linus Torvalds1da177e2005-04-16 15:20:36 -0700299 } else {
Anton Blanchardb4c3a872012-06-07 18:14:48 +0000300 /* Give up */
301 spin_unlock_irqrestore(&(pool->lock), flags);
Christoph Hellwigd11e3d32018-11-21 18:56:25 +0100302 return DMA_MAPPING_ERROR;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700303 }
304 }
305
FUJITA Tomonorifb3475e2008-02-04 22:28:08 -0800306 end = n + npages;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700307
308 /* Bump the hint to a new block for small allocs. */
309 if (largealloc) {
310 /* Don't bump to new block to avoid fragmentation */
Anton Blanchardb4c3a872012-06-07 18:14:48 +0000311 pool->hint = end;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700312 } else {
313 /* Overflow will be taken care of at the next allocation */
Anton Blanchardb4c3a872012-06-07 18:14:48 +0000314 pool->hint = (end + tbl->it_blocksize - 1) &
Linus Torvalds1da177e2005-04-16 15:20:36 -0700315 ~(tbl->it_blocksize - 1);
316 }
317
318 /* Update handle for SG allocations */
319 if (handle)
320 *handle = end;
321
Anton Blanchardb4c3a872012-06-07 18:14:48 +0000322 spin_unlock_irqrestore(&(pool->lock), flags);
323
Linus Torvalds1da177e2005-04-16 15:20:36 -0700324 return n;
325}
326
FUJITA Tomonorifb3475e2008-02-04 22:28:08 -0800327static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl,
328 void *page, unsigned int npages,
329 enum dma_data_direction direction,
Mark Nelson4f3dd8a2008-07-16 05:51:47 +1000330 unsigned long mask, unsigned int align_order,
Krzysztof Kozlowski00085f12016-08-03 13:46:00 -0700331 unsigned long attrs)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700332{
Anton Blanchardd3622132012-06-03 19:44:25 +0000333 unsigned long entry;
Christoph Hellwigd11e3d32018-11-21 18:56:25 +0100334 dma_addr_t ret = DMA_MAPPING_ERROR;
Robert Jennings6490c492008-07-24 04:31:16 +1000335 int build_fail;
Olof Johansson7daa4112006-04-12 21:05:59 -0500336
FUJITA Tomonorifb3475e2008-02-04 22:28:08 -0800337 entry = iommu_range_alloc(dev, tbl, npages, NULL, mask, align_order);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700338
Christoph Hellwigd11e3d32018-11-21 18:56:25 +0100339 if (unlikely(entry == DMA_MAPPING_ERROR))
340 return DMA_MAPPING_ERROR;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700341
342 entry += tbl->it_offset; /* Offset into real TCE table */
Alistair Poppled0847752013-12-09 18:17:03 +1100343 ret = entry << tbl->it_page_shift; /* Set the return dma address */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700344
345 /* Put the TCEs in the HW table */
Alexey Kardashevskiyda004c32015-06-05 16:35:06 +1000346 build_fail = tbl->it_ops->set(tbl, entry, npages,
Alistair Poppled0847752013-12-09 18:17:03 +1100347 (unsigned long)page &
348 IOMMU_PAGE_MASK(tbl), direction, attrs);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700349
Alexey Kardashevskiyda004c32015-06-05 16:35:06 +1000350 /* tbl->it_ops->set() only returns non-zero for transient errors.
Robert Jennings6490c492008-07-24 04:31:16 +1000351 * Clean up the table bitmap in this case and return
Christoph Hellwigd11e3d32018-11-21 18:56:25 +0100352 * DMA_MAPPING_ERROR. For all other errors the functionality is
Robert Jennings6490c492008-07-24 04:31:16 +1000353 * not altered.
354 */
355 if (unlikely(build_fail)) {
356 __iommu_free(tbl, ret, npages);
Christoph Hellwigd11e3d32018-11-21 18:56:25 +0100357 return DMA_MAPPING_ERROR;
Robert Jennings6490c492008-07-24 04:31:16 +1000358 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700359
360 /* Flush/invalidate TLB caches if necessary */
Alexey Kardashevskiyda004c32015-06-05 16:35:06 +1000361 if (tbl->it_ops->flush)
362 tbl->it_ops->flush(tbl);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700363
Linus Torvalds1da177e2005-04-16 15:20:36 -0700364 /* Make sure updates are seen by hardware */
365 mb();
366
367 return ret;
368}
369
Anton Blanchard67ca1412012-06-03 19:43:44 +0000370static bool iommu_free_check(struct iommu_table *tbl, dma_addr_t dma_addr,
371 unsigned int npages)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700372{
373 unsigned long entry, free_entry;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700374
Alistair Poppled0847752013-12-09 18:17:03 +1100375 entry = dma_addr >> tbl->it_page_shift;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700376 free_entry = entry - tbl->it_offset;
377
378 if (((free_entry + npages) > tbl->it_size) ||
379 (entry < tbl->it_offset)) {
380 if (printk_ratelimit()) {
381 printk(KERN_INFO "iommu_free: invalid entry\n");
382 printk(KERN_INFO "\tentry = 0x%lx\n", entry);
Ingo Molnarfe333322009-01-06 14:26:03 +0000383 printk(KERN_INFO "\tdma_addr = 0x%llx\n", (u64)dma_addr);
384 printk(KERN_INFO "\tTable = 0x%llx\n", (u64)tbl);
385 printk(KERN_INFO "\tbus# = 0x%llx\n", (u64)tbl->it_busno);
386 printk(KERN_INFO "\tsize = 0x%llx\n", (u64)tbl->it_size);
387 printk(KERN_INFO "\tstartOff = 0x%llx\n", (u64)tbl->it_offset);
388 printk(KERN_INFO "\tindex = 0x%llx\n", (u64)tbl->it_index);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700389 WARN_ON(1);
390 }
Anton Blanchard67ca1412012-06-03 19:43:44 +0000391
392 return false;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700393 }
394
Anton Blanchard67ca1412012-06-03 19:43:44 +0000395 return true;
396}
397
Anton Blanchardb4c3a872012-06-07 18:14:48 +0000398static struct iommu_pool *get_pool(struct iommu_table *tbl,
399 unsigned long entry)
400{
401 struct iommu_pool *p;
402 unsigned long largepool_start = tbl->large_pool.start;
403
404 /* The large pool is the last pool at the top of the table */
405 if (entry >= largepool_start) {
406 p = &tbl->large_pool;
407 } else {
408 unsigned int pool_nr = entry / tbl->poolsize;
409
410 BUG_ON(pool_nr > tbl->nr_pools);
411 p = &tbl->pools[pool_nr];
412 }
413
414 return p;
415}
416
Anton Blanchard67ca1412012-06-03 19:43:44 +0000417static void __iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
418 unsigned int npages)
419{
420 unsigned long entry, free_entry;
421 unsigned long flags;
Anton Blanchardb4c3a872012-06-07 18:14:48 +0000422 struct iommu_pool *pool;
Anton Blanchard67ca1412012-06-03 19:43:44 +0000423
Alistair Poppled0847752013-12-09 18:17:03 +1100424 entry = dma_addr >> tbl->it_page_shift;
Anton Blanchard67ca1412012-06-03 19:43:44 +0000425 free_entry = entry - tbl->it_offset;
426
Anton Blanchardb4c3a872012-06-07 18:14:48 +0000427 pool = get_pool(tbl, free_entry);
428
Anton Blanchard67ca1412012-06-03 19:43:44 +0000429 if (!iommu_free_check(tbl, dma_addr, npages))
430 return;
431
Alexey Kardashevskiyda004c32015-06-05 16:35:06 +1000432 tbl->it_ops->clear(tbl, entry, npages);
Anton Blanchard67ca1412012-06-03 19:43:44 +0000433
Anton Blanchardb4c3a872012-06-07 18:14:48 +0000434 spin_lock_irqsave(&(pool->lock), flags);
Anton Blanchard67ca1412012-06-03 19:43:44 +0000435 bitmap_clear(tbl->it_map, free_entry, npages);
Anton Blanchardb4c3a872012-06-07 18:14:48 +0000436 spin_unlock_irqrestore(&(pool->lock), flags);
Anton Blanchard67ca1412012-06-03 19:43:44 +0000437}
438
Linus Torvalds1da177e2005-04-16 15:20:36 -0700439static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
440 unsigned int npages)
441{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700442 __iommu_free(tbl, dma_addr, npages);
443
444 /* Make sure TLB cache is flushed if the HW needs it. We do
445 * not do an mb() here on purpose, it is not needed on any of
446 * the current platforms.
447 */
Alexey Kardashevskiyda004c32015-06-05 16:35:06 +1000448 if (tbl->it_ops->flush)
449 tbl->it_ops->flush(tbl);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700450}
451
Joerg Roedel0690cbd2014-11-05 15:28:30 +0100452int ppc_iommu_map_sg(struct device *dev, struct iommu_table *tbl,
453 struct scatterlist *sglist, int nelems,
454 unsigned long mask, enum dma_data_direction direction,
Krzysztof Kozlowski00085f12016-08-03 13:46:00 -0700455 unsigned long attrs)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700456{
457 dma_addr_t dma_next = 0, dma_addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700458 struct scatterlist *s, *outs, *segstart;
Robert Jennings6490c492008-07-24 04:31:16 +1000459 int outcount, incount, i, build_fail = 0;
Benjamin Herrenschmidtd262c322008-01-08 10:34:22 +1100460 unsigned int align;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700461 unsigned long handle;
FUJITA Tomonori740c3ce2008-02-04 22:27:57 -0800462 unsigned int max_seg_size;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700463
464 BUG_ON(direction == DMA_NONE);
465
466 if ((nelems == 0) || !tbl)
467 return 0;
468
469 outs = s = segstart = &sglist[0];
470 outcount = 1;
Brian Kingac9af7c2005-08-18 07:32:18 +1000471 incount = nelems;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700472 handle = 0;
473
474 /* Init first segment length for backout at failure */
475 outs->dma_length = 0;
476
Linas Vepstas5d2efba2006-10-30 16:15:59 +1100477 DBG("sg mapping %d elements:\n", nelems);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700478
FUJITA Tomonori740c3ce2008-02-04 22:27:57 -0800479 max_seg_size = dma_get_max_seg_size(dev);
Jens Axboe78bdc312007-10-12 13:44:12 +0200480 for_each_sg(sglist, s, nelems, i) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700481 unsigned long vaddr, npages, entry, slen;
482
483 slen = s->length;
484 /* Sanity check */
485 if (slen == 0) {
486 dma_next = 0;
487 continue;
488 }
489 /* Allocate iommu entries for that segment */
Jens Axboe58b053e2007-10-22 20:02:46 +0200490 vaddr = (unsigned long) sg_virt(s);
Alistair Poppled0847752013-12-09 18:17:03 +1100491 npages = iommu_num_pages(vaddr, slen, IOMMU_PAGE_SIZE(tbl));
Benjamin Herrenschmidtd262c322008-01-08 10:34:22 +1100492 align = 0;
Alistair Poppled0847752013-12-09 18:17:03 +1100493 if (tbl->it_page_shift < PAGE_SHIFT && slen >= PAGE_SIZE &&
Benjamin Herrenschmidtd262c322008-01-08 10:34:22 +1100494 (vaddr & ~PAGE_MASK) == 0)
Alistair Poppled0847752013-12-09 18:17:03 +1100495 align = PAGE_SHIFT - tbl->it_page_shift;
FUJITA Tomonorifb3475e2008-02-04 22:28:08 -0800496 entry = iommu_range_alloc(dev, tbl, npages, &handle,
Alistair Poppled0847752013-12-09 18:17:03 +1100497 mask >> tbl->it_page_shift, align);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700498
499 DBG(" - vaddr: %lx, size: %lx\n", vaddr, slen);
500
501 /* Handle failure */
Christoph Hellwigd11e3d32018-11-21 18:56:25 +0100502 if (unlikely(entry == DMA_MAPPING_ERROR)) {
Mauricio Faria de Oliveiraaf8a2492016-10-11 13:54:17 -0700503 if (!(attrs & DMA_ATTR_NO_WARN) &&
504 printk_ratelimit())
Anton Blanchard4dfa9c42010-12-07 14:36:05 +0000505 dev_info(dev, "iommu_alloc failed, tbl %p "
506 "vaddr %lx npages %lu\n", tbl, vaddr,
507 npages);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700508 goto failure;
509 }
510
511 /* Convert entry to a dma_addr_t */
512 entry += tbl->it_offset;
Alistair Poppled0847752013-12-09 18:17:03 +1100513 dma_addr = entry << tbl->it_page_shift;
514 dma_addr |= (s->offset & ~IOMMU_PAGE_MASK(tbl));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700515
Linas Vepstas5d2efba2006-10-30 16:15:59 +1100516 DBG(" - %lu pages, entry: %lx, dma_addr: %lx\n",
Linus Torvalds1da177e2005-04-16 15:20:36 -0700517 npages, entry, dma_addr);
518
519 /* Insert into HW table */
Alexey Kardashevskiyda004c32015-06-05 16:35:06 +1000520 build_fail = tbl->it_ops->set(tbl, entry, npages,
Alistair Poppled0847752013-12-09 18:17:03 +1100521 vaddr & IOMMU_PAGE_MASK(tbl),
522 direction, attrs);
Robert Jennings6490c492008-07-24 04:31:16 +1000523 if(unlikely(build_fail))
524 goto failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700525
526 /* If we are in an open segment, try merging */
527 if (segstart != s) {
528 DBG(" - trying merge...\n");
529 /* We cannot merge if:
530 * - allocated dma_addr isn't contiguous to previous allocation
531 */
FUJITA Tomonori740c3ce2008-02-04 22:27:57 -0800532 if (novmerge || (dma_addr != dma_next) ||
533 (outs->dma_length + s->length > max_seg_size)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700534 /* Can't merge: create a new segment */
535 segstart = s;
Jens Axboe78bdc312007-10-12 13:44:12 +0200536 outcount++;
537 outs = sg_next(outs);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700538 DBG(" can't merge, new segment.\n");
539 } else {
540 outs->dma_length += s->length;
Linas Vepstas5d2efba2006-10-30 16:15:59 +1100541 DBG(" merged, new len: %ux\n", outs->dma_length);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700542 }
543 }
544
545 if (segstart == s) {
546 /* This is a new segment, fill entries */
547 DBG(" - filling new segment.\n");
548 outs->dma_address = dma_addr;
549 outs->dma_length = slen;
550 }
551
552 /* Calculate next page pointer for contiguous check */
553 dma_next = dma_addr + slen;
554
555 DBG(" - dma next is: %lx\n", dma_next);
556 }
557
558 /* Flush/invalidate TLB caches if necessary */
Alexey Kardashevskiyda004c32015-06-05 16:35:06 +1000559 if (tbl->it_ops->flush)
560 tbl->it_ops->flush(tbl);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700561
Linus Torvalds1da177e2005-04-16 15:20:36 -0700562 DBG("mapped %d elements:\n", outcount);
563
Joerg Roedel0690cbd2014-11-05 15:28:30 +0100564 /* For the sake of ppc_iommu_unmap_sg, we clear out the length in the
Linus Torvalds1da177e2005-04-16 15:20:36 -0700565 * next entry of the sglist if we didn't fill the list completely
566 */
Brian Kingac9af7c2005-08-18 07:32:18 +1000567 if (outcount < incount) {
Jens Axboe78bdc312007-10-12 13:44:12 +0200568 outs = sg_next(outs);
Christoph Hellwigd11e3d32018-11-21 18:56:25 +0100569 outs->dma_address = DMA_MAPPING_ERROR;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700570 outs->dma_length = 0;
571 }
Jake Moilanena958a262006-01-30 21:51:54 -0600572
573 /* Make sure updates are seen by hardware */
574 mb();
575
Linus Torvalds1da177e2005-04-16 15:20:36 -0700576 return outcount;
577
578 failure:
Jens Axboe78bdc312007-10-12 13:44:12 +0200579 for_each_sg(sglist, s, nelems, i) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700580 if (s->dma_length != 0) {
581 unsigned long vaddr, npages;
582
Alistair Poppled0847752013-12-09 18:17:03 +1100583 vaddr = s->dma_address & IOMMU_PAGE_MASK(tbl);
Joerg Roedel2994a3b2008-10-15 22:02:13 -0700584 npages = iommu_num_pages(s->dma_address, s->dma_length,
Alistair Poppled0847752013-12-09 18:17:03 +1100585 IOMMU_PAGE_SIZE(tbl));
Anton Blanchardd3622132012-06-03 19:44:25 +0000586 __iommu_free(tbl, vaddr, npages);
Christoph Hellwigd11e3d32018-11-21 18:56:25 +0100587 s->dma_address = DMA_MAPPING_ERROR;
Jake Moilanena958a262006-01-30 21:51:54 -0600588 s->dma_length = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700589 }
Jens Axboe78bdc312007-10-12 13:44:12 +0200590 if (s == outs)
591 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700592 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700593 return 0;
594}
595
596
Joerg Roedel0690cbd2014-11-05 15:28:30 +0100597void ppc_iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist,
598 int nelems, enum dma_data_direction direction,
Krzysztof Kozlowski00085f12016-08-03 13:46:00 -0700599 unsigned long attrs)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700600{
Jens Axboe78bdc312007-10-12 13:44:12 +0200601 struct scatterlist *sg;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700602
603 BUG_ON(direction == DMA_NONE);
604
605 if (!tbl)
606 return;
607
Jens Axboe78bdc312007-10-12 13:44:12 +0200608 sg = sglist;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700609 while (nelems--) {
610 unsigned int npages;
Jens Axboe78bdc312007-10-12 13:44:12 +0200611 dma_addr_t dma_handle = sg->dma_address;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700612
Jens Axboe78bdc312007-10-12 13:44:12 +0200613 if (sg->dma_length == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700614 break;
Joerg Roedel2994a3b2008-10-15 22:02:13 -0700615 npages = iommu_num_pages(dma_handle, sg->dma_length,
Alistair Poppled0847752013-12-09 18:17:03 +1100616 IOMMU_PAGE_SIZE(tbl));
Anton Blanchardd3622132012-06-03 19:44:25 +0000617 __iommu_free(tbl, dma_handle, npages);
Jens Axboe78bdc312007-10-12 13:44:12 +0200618 sg = sg_next(sg);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700619 }
620
621 /* Flush/invalidate TLBs if necessary. As for iommu_free(), we
622 * do not do an mb() here, the affected platforms do not need it
623 * when freeing.
624 */
Alexey Kardashevskiyda004c32015-06-05 16:35:06 +1000625 if (tbl->it_ops->flush)
626 tbl->it_ops->flush(tbl);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700627}
628
Mohan Kumar M54622f12008-10-21 17:38:10 +0000629static void iommu_table_clear(struct iommu_table *tbl)
630{
Mahesh Salgaonkar3ccc00a2012-02-20 02:15:03 +0000631 /*
632 * In case of firmware assisted dump system goes through clean
633 * reboot process at the time of system crash. Hence it's safe to
634 * clear the TCE entries if firmware assisted dump is active.
635 */
636 if (!is_kdump_kernel() || is_fadump_active()) {
Mohan Kumar M54622f12008-10-21 17:38:10 +0000637 /* Clear the table in case firmware left allocations in it */
Alexey Kardashevskiyda004c32015-06-05 16:35:06 +1000638 tbl->it_ops->clear(tbl, tbl->it_offset, tbl->it_size);
Mohan Kumar M54622f12008-10-21 17:38:10 +0000639 return;
640 }
641
642#ifdef CONFIG_CRASH_DUMP
Alexey Kardashevskiyda004c32015-06-05 16:35:06 +1000643 if (tbl->it_ops->get) {
Mohan Kumar M54622f12008-10-21 17:38:10 +0000644 unsigned long index, tceval, tcecount = 0;
645
646 /* Reserve the existing mappings left by the first kernel. */
647 for (index = 0; index < tbl->it_size; index++) {
Alexey Kardashevskiyda004c32015-06-05 16:35:06 +1000648 tceval = tbl->it_ops->get(tbl, index + tbl->it_offset);
Mohan Kumar M54622f12008-10-21 17:38:10 +0000649 /*
650 * Freed TCE entry contains 0x7fffffffffffffff on JS20
651 */
652 if (tceval && (tceval != 0x7fffffffffffffffUL)) {
653 __set_bit(index, tbl->it_map);
654 tcecount++;
655 }
656 }
657
658 if ((tbl->it_size - tcecount) < KDUMP_MIN_TCE_ENTRIES) {
659 printk(KERN_WARNING "TCE table is full; freeing ");
660 printk(KERN_WARNING "%d entries for the kdump boot\n",
661 KDUMP_MIN_TCE_ENTRIES);
662 for (index = tbl->it_size - KDUMP_MIN_TCE_ENTRIES;
663 index < tbl->it_size; index++)
664 __clear_bit(index, tbl->it_map);
665 }
666 }
667#endif
668}
669
Alexey Kardashevskiy201ed7f2019-07-18 15:11:39 +1000670static void iommu_table_reserve_pages(struct iommu_table *tbl,
671 unsigned long res_start, unsigned long res_end)
672{
673 int i;
674
675 WARN_ON_ONCE(res_end < res_start);
676 /*
677 * Reserve page 0 so it will not be used for any mappings.
678 * This avoids buggy drivers that consider page 0 to be invalid
679 * to crash the machine or even lose data.
680 */
681 if (tbl->it_offset == 0)
682 set_bit(0, tbl->it_map);
683
684 tbl->it_reserved_start = res_start;
685 tbl->it_reserved_end = res_end;
686
687 /* Check if res_start..res_end isn't empty and overlaps the table */
688 if (res_start && res_end &&
689 (tbl->it_offset + tbl->it_size < res_start ||
690 res_end < tbl->it_offset))
691 return;
692
693 for (i = tbl->it_reserved_start; i < tbl->it_reserved_end; ++i)
694 set_bit(i - tbl->it_offset, tbl->it_map);
695}
696
697static void iommu_table_release_pages(struct iommu_table *tbl)
698{
699 int i;
700
701 /*
702 * In case we have reserved the first bit, we should not emit
703 * the warning below.
704 */
705 if (tbl->it_offset == 0)
706 clear_bit(0, tbl->it_map);
707
708 for (i = tbl->it_reserved_start; i < tbl->it_reserved_end; ++i)
709 clear_bit(i - tbl->it_offset, tbl->it_map);
710}
711
Linus Torvalds1da177e2005-04-16 15:20:36 -0700712/*
713 * Build a iommu_table structure. This contains a bit map which
714 * is used to manage allocation of the tce space.
715 */
Alexey Kardashevskiy201ed7f2019-07-18 15:11:39 +1000716struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid,
717 unsigned long res_start, unsigned long res_end)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700718{
719 unsigned long sz;
720 static int welcomed = 0;
Anton Blanchardca1588e2006-06-10 20:58:08 +1000721 struct page *page;
Anton Blanchardb4c3a872012-06-07 18:14:48 +0000722 unsigned int i;
723 struct iommu_pool *p;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700724
Alexey Kardashevskiyda004c32015-06-05 16:35:06 +1000725 BUG_ON(!tbl->it_ops);
726
Linus Torvalds1da177e2005-04-16 15:20:36 -0700727 /* number of bytes needed for the bitmap */
Akinobu Mitac5a08092012-11-04 02:03:43 +0000728 sz = BITS_TO_LONGS(tbl->it_size) * sizeof(unsigned long);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700729
Nishanth Aravamudan1cf389d2013-10-01 14:04:53 -0700730 page = alloc_pages_node(nid, GFP_KERNEL, get_order(sz));
Anton Blanchardca1588e2006-06-10 20:58:08 +1000731 if (!page)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700732 panic("iommu_init_table: Can't allocate %ld bytes\n", sz);
Anton Blanchardca1588e2006-06-10 20:58:08 +1000733 tbl->it_map = page_address(page);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700734 memset(tbl->it_map, 0, sz);
735
Alexey Kardashevskiy201ed7f2019-07-18 15:11:39 +1000736 iommu_table_reserve_pages(tbl, res_start, res_end);
Thadeu Lima de Souza Cascardod12b5242011-09-20 03:07:24 +0000737
Anton Blanchardb4c3a872012-06-07 18:14:48 +0000738 /* We only split the IOMMU table if we have 1GB or more of space */
Alistair Poppled0847752013-12-09 18:17:03 +1100739 if ((tbl->it_size << tbl->it_page_shift) >= (1UL * 1024 * 1024 * 1024))
Anton Blanchardb4c3a872012-06-07 18:14:48 +0000740 tbl->nr_pools = IOMMU_NR_POOLS;
741 else
742 tbl->nr_pools = 1;
743
744 /* We reserve the top 1/4 of the table for large allocations */
Benjamin Herrenschmidtdcd261b2012-07-13 17:45:49 +1000745 tbl->poolsize = (tbl->it_size * 3 / 4) / tbl->nr_pools;
Anton Blanchardb4c3a872012-06-07 18:14:48 +0000746
Benjamin Herrenschmidtdcd261b2012-07-13 17:45:49 +1000747 for (i = 0; i < tbl->nr_pools; i++) {
Anton Blanchardb4c3a872012-06-07 18:14:48 +0000748 p = &tbl->pools[i];
749 spin_lock_init(&(p->lock));
750 p->start = tbl->poolsize * i;
751 p->hint = p->start;
752 p->end = p->start + tbl->poolsize;
753 }
754
755 p = &tbl->large_pool;
756 spin_lock_init(&(p->lock));
757 p->start = tbl->poolsize * i;
758 p->hint = p->start;
759 p->end = tbl->it_size;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700760
Mohan Kumar M54622f12008-10-21 17:38:10 +0000761 iommu_table_clear(tbl);
John Rosed3588ba2005-06-20 21:43:48 +1000762
Linus Torvalds1da177e2005-04-16 15:20:36 -0700763 if (!welcomed) {
764 printk(KERN_INFO "IOMMU table initialized, virtual merging %s\n",
765 novmerge ? "disabled" : "enabled");
766 welcomed = 1;
767 }
768
Alexey Kardashevskiy691602a2021-01-13 21:20:14 +1100769 iommu_debugfs_add(tbl);
770
Linus Torvalds1da177e2005-04-16 15:20:36 -0700771 return tbl;
772}
773
Alexey Kardashevskiye5afdf92017-03-22 15:21:50 +1100774static void iommu_table_free(struct kref *kref)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700775{
Akinobu Mitac5a08092012-11-04 02:03:43 +0000776 unsigned long bitmap_sz;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700777 unsigned int order;
Alexey Kardashevskiye5afdf92017-03-22 15:21:50 +1100778 struct iommu_table *tbl;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700779
Alexey Kardashevskiye5afdf92017-03-22 15:21:50 +1100780 tbl = container_of(kref, struct iommu_table, it_kref);
Alexey Kardashevskiy8aca92d2015-06-05 16:34:57 +1000781
Alexey Kardashevskiy11edf112017-03-22 15:21:49 +1100782 if (tbl->it_ops->free)
783 tbl->it_ops->free(tbl);
784
Alexey Kardashevskiy8aca92d2015-06-05 16:34:57 +1000785 if (!tbl->it_map) {
786 kfree(tbl);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700787 return;
788 }
789
Alexey Kardashevskiy691602a2021-01-13 21:20:14 +1100790 iommu_debugfs_del(tbl);
791
Alexey Kardashevskiy201ed7f2019-07-18 15:11:39 +1000792 iommu_table_release_pages(tbl);
Thadeu Lima de Souza Cascardo7f966d32012-12-28 09:08:51 +0000793
Linus Torvalds1da177e2005-04-16 15:20:36 -0700794 /* verify that table contains no entries */
Akinobu Mitac5a08092012-11-04 02:03:43 +0000795 if (!bitmap_empty(tbl->it_map, tbl->it_size))
Alexey Kardashevskiye5afdf92017-03-22 15:21:50 +1100796 pr_warn("%s: Unexpected TCEs\n", __func__);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700797
798 /* calculate bitmap size in bytes */
Akinobu Mitac5a08092012-11-04 02:03:43 +0000799 bitmap_sz = BITS_TO_LONGS(tbl->it_size) * sizeof(unsigned long);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700800
801 /* free bitmap */
802 order = get_order(bitmap_sz);
803 free_pages((unsigned long) tbl->it_map, order);
804
805 /* free table */
806 kfree(tbl);
807}
Alexey Kardashevskiye5afdf92017-03-22 15:21:50 +1100808
809struct iommu_table *iommu_tce_table_get(struct iommu_table *tbl)
810{
811 if (kref_get_unless_zero(&tbl->it_kref))
812 return tbl;
813
814 return NULL;
815}
816EXPORT_SYMBOL_GPL(iommu_tce_table_get);
817
818int iommu_tce_table_put(struct iommu_table *tbl)
819{
820 if (WARN_ON(!tbl))
821 return 0;
822
823 return kref_put(&tbl->it_kref, iommu_table_free);
824}
825EXPORT_SYMBOL_GPL(iommu_tce_table_put);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700826
827/* Creates TCEs for a user provided buffer. The user buffer must be
Mark Nelsonf9226d52008-10-27 20:38:08 +0000828 * contiguous real kernel storage (not vmalloc). The address passed here
829 * comprises a page address and offset into that page. The dma_addr_t
830 * returned will point to the same byte within the page as was passed in.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700831 */
Mark Nelsonf9226d52008-10-27 20:38:08 +0000832dma_addr_t iommu_map_page(struct device *dev, struct iommu_table *tbl,
833 struct page *page, unsigned long offset, size_t size,
834 unsigned long mask, enum dma_data_direction direction,
Krzysztof Kozlowski00085f12016-08-03 13:46:00 -0700835 unsigned long attrs)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700836{
Christoph Hellwigd11e3d32018-11-21 18:56:25 +0100837 dma_addr_t dma_handle = DMA_MAPPING_ERROR;
Mark Nelsonf9226d52008-10-27 20:38:08 +0000838 void *vaddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700839 unsigned long uaddr;
Benjamin Herrenschmidtd262c322008-01-08 10:34:22 +1100840 unsigned int npages, align;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700841
842 BUG_ON(direction == DMA_NONE);
843
Mark Nelsonf9226d52008-10-27 20:38:08 +0000844 vaddr = page_address(page) + offset;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700845 uaddr = (unsigned long)vaddr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700846
847 if (tbl) {
Breno Leitao984ecdd2018-08-21 15:44:48 -0300848 npages = iommu_num_pages(uaddr, size, IOMMU_PAGE_SIZE(tbl));
Benjamin Herrenschmidtd262c322008-01-08 10:34:22 +1100849 align = 0;
Alistair Poppled0847752013-12-09 18:17:03 +1100850 if (tbl->it_page_shift < PAGE_SHIFT && size >= PAGE_SIZE &&
Benjamin Herrenschmidtd262c322008-01-08 10:34:22 +1100851 ((unsigned long)vaddr & ~PAGE_MASK) == 0)
Alistair Poppled0847752013-12-09 18:17:03 +1100852 align = PAGE_SHIFT - tbl->it_page_shift;
Benjamin Herrenschmidtd262c322008-01-08 10:34:22 +1100853
FUJITA Tomonorifb3475e2008-02-04 22:28:08 -0800854 dma_handle = iommu_alloc(dev, tbl, vaddr, npages, direction,
Alistair Poppled0847752013-12-09 18:17:03 +1100855 mask >> tbl->it_page_shift, align,
Mark Nelson4f3dd8a2008-07-16 05:51:47 +1000856 attrs);
Christoph Hellwigd11e3d32018-11-21 18:56:25 +0100857 if (dma_handle == DMA_MAPPING_ERROR) {
Mauricio Faria de Oliveiraaf8a2492016-10-11 13:54:17 -0700858 if (!(attrs & DMA_ATTR_NO_WARN) &&
859 printk_ratelimit()) {
Anton Blanchard4dfa9c42010-12-07 14:36:05 +0000860 dev_info(dev, "iommu_alloc failed, tbl %p "
861 "vaddr %p npages %d\n", tbl, vaddr,
862 npages);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700863 }
864 } else
Alistair Poppled0847752013-12-09 18:17:03 +1100865 dma_handle |= (uaddr & ~IOMMU_PAGE_MASK(tbl));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700866 }
867
868 return dma_handle;
869}
870
Mark Nelsonf9226d52008-10-27 20:38:08 +0000871void iommu_unmap_page(struct iommu_table *tbl, dma_addr_t dma_handle,
872 size_t size, enum dma_data_direction direction,
Krzysztof Kozlowski00085f12016-08-03 13:46:00 -0700873 unsigned long attrs)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700874{
Linas Vepstas5d2efba2006-10-30 16:15:59 +1100875 unsigned int npages;
876
Linus Torvalds1da177e2005-04-16 15:20:36 -0700877 BUG_ON(direction == DMA_NONE);
878
Linas Vepstas5d2efba2006-10-30 16:15:59 +1100879 if (tbl) {
Alistair Poppled0847752013-12-09 18:17:03 +1100880 npages = iommu_num_pages(dma_handle, size,
881 IOMMU_PAGE_SIZE(tbl));
Linas Vepstas5d2efba2006-10-30 16:15:59 +1100882 iommu_free(tbl, dma_handle, npages);
883 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700884}
885
886/* Allocates a contiguous real buffer and creates mappings over it.
887 * Returns the virtual address of the buffer and sets dma_handle
888 * to the dma address (mapping) of the first page.
889 */
FUJITA Tomonorifb3475e2008-02-04 22:28:08 -0800890void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl,
891 size_t size, dma_addr_t *dma_handle,
892 unsigned long mask, gfp_t flag, int node)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700893{
894 void *ret = NULL;
895 dma_addr_t mapping;
Linas Vepstas5d2efba2006-10-30 16:15:59 +1100896 unsigned int order;
897 unsigned int nio_pages, io_order;
Christoph Hellwig8eb6c6e2006-06-06 16:11:35 +0200898 struct page *page;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700899
900 size = PAGE_ALIGN(size);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700901 order = get_order(size);
902
903 /*
904 * Client asked for way too much space. This is checked later
905 * anyway. It is easier to debug here for the drivers than in
906 * the tce tables.
907 */
908 if (order >= IOMAP_MAX_ORDER) {
Anton Blanchard4dfa9c42010-12-07 14:36:05 +0000909 dev_info(dev, "iommu_alloc_consistent size too large: 0x%lx\n",
910 size);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700911 return NULL;
912 }
913
914 if (!tbl)
915 return NULL;
916
917 /* Alloc enough pages (and possibly more) */
Paul Mackerras05061352006-06-10 18:17:35 +1000918 page = alloc_pages_node(node, flag, order);
Christoph Hellwig8eb6c6e2006-06-06 16:11:35 +0200919 if (!page)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700920 return NULL;
Christoph Hellwig8eb6c6e2006-06-06 16:11:35 +0200921 ret = page_address(page);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700922 memset(ret, 0, size);
923
924 /* Set up tces to cover the allocated range */
Alistair Poppled0847752013-12-09 18:17:03 +1100925 nio_pages = size >> tbl->it_page_shift;
926 io_order = get_iommu_order(size, tbl);
FUJITA Tomonorifb3475e2008-02-04 22:28:08 -0800927 mapping = iommu_alloc(dev, tbl, ret, nio_pages, DMA_BIDIRECTIONAL,
Krzysztof Kozlowski00085f12016-08-03 13:46:00 -0700928 mask >> tbl->it_page_shift, io_order, 0);
Christoph Hellwigd11e3d32018-11-21 18:56:25 +0100929 if (mapping == DMA_MAPPING_ERROR) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700930 free_pages((unsigned long)ret, order);
Christoph Hellwig8eb6c6e2006-06-06 16:11:35 +0200931 return NULL;
932 }
933 *dma_handle = mapping;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700934 return ret;
935}
936
937void iommu_free_coherent(struct iommu_table *tbl, size_t size,
938 void *vaddr, dma_addr_t dma_handle)
939{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700940 if (tbl) {
Linas Vepstas5d2efba2006-10-30 16:15:59 +1100941 unsigned int nio_pages;
942
Linus Torvalds1da177e2005-04-16 15:20:36 -0700943 size = PAGE_ALIGN(size);
Alistair Poppled0847752013-12-09 18:17:03 +1100944 nio_pages = size >> tbl->it_page_shift;
Linas Vepstas5d2efba2006-10-30 16:15:59 +1100945 iommu_free(tbl, dma_handle, nio_pages);
946 size = PAGE_ALIGN(size);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700947 free_pages((unsigned long)vaddr, get_order(size));
948 }
949}
Alexey Kardashevskiy4e13c1a2013-05-21 13:33:09 +1000950
Alexey Kardashevskiy10b35b22015-06-05 16:35:05 +1000951unsigned long iommu_direction_to_tce_perm(enum dma_data_direction dir)
952{
953 switch (dir) {
954 case DMA_BIDIRECTIONAL:
955 return TCE_PCI_READ | TCE_PCI_WRITE;
956 case DMA_FROM_DEVICE:
957 return TCE_PCI_WRITE;
958 case DMA_TO_DEVICE:
959 return TCE_PCI_READ;
960 default:
961 return 0;
962 }
963}
964EXPORT_SYMBOL_GPL(iommu_direction_to_tce_perm);
965
Alexey Kardashevskiy4e13c1a2013-05-21 13:33:09 +1000966#ifdef CONFIG_IOMMU_API
967/*
968 * SPAPR TCE API
969 */
970static void group_release(void *iommu_data)
971{
Alexey Kardashevskiyb348aa62015-06-05 16:35:08 +1000972 struct iommu_table_group *table_group = iommu_data;
973
974 table_group->group = NULL;
Alexey Kardashevskiy4e13c1a2013-05-21 13:33:09 +1000975}
976
Alexey Kardashevskiyb348aa62015-06-05 16:35:08 +1000977void iommu_register_group(struct iommu_table_group *table_group,
Alexey Kardashevskiy4e13c1a2013-05-21 13:33:09 +1000978 int pci_domain_number, unsigned long pe_num)
979{
980 struct iommu_group *grp;
981 char *name;
982
983 grp = iommu_group_alloc();
984 if (IS_ERR(grp)) {
985 pr_warn("powerpc iommu api: cannot create new group, err=%ld\n",
986 PTR_ERR(grp));
987 return;
988 }
Alexey Kardashevskiyb348aa62015-06-05 16:35:08 +1000989 table_group->group = grp;
990 iommu_group_set_iommudata(grp, table_group, group_release);
Alexey Kardashevskiy4e13c1a2013-05-21 13:33:09 +1000991 name = kasprintf(GFP_KERNEL, "domain%d-pe%lx",
992 pci_domain_number, pe_num);
993 if (!name)
994 return;
995 iommu_group_set_name(grp, name);
996 kfree(name);
997}
998
999enum dma_data_direction iommu_tce_direction(unsigned long tce)
1000{
1001 if ((tce & TCE_PCI_READ) && (tce & TCE_PCI_WRITE))
1002 return DMA_BIDIRECTIONAL;
1003 else if (tce & TCE_PCI_READ)
1004 return DMA_TO_DEVICE;
1005 else if (tce & TCE_PCI_WRITE)
1006 return DMA_FROM_DEVICE;
1007 else
1008 return DMA_NONE;
1009}
1010EXPORT_SYMBOL_GPL(iommu_tce_direction);
1011
1012void iommu_flush_tce(struct iommu_table *tbl)
1013{
1014 /* Flush/invalidate TLB caches if necessary */
Alexey Kardashevskiyda004c32015-06-05 16:35:06 +10001015 if (tbl->it_ops->flush)
1016 tbl->it_ops->flush(tbl);
Alexey Kardashevskiy4e13c1a2013-05-21 13:33:09 +10001017
1018 /* Make sure updates are seen by hardware */
1019 mb();
1020}
1021EXPORT_SYMBOL_GPL(iommu_flush_tce);
1022
Alexey Kardashevskiyb1af23d2017-03-22 15:21:55 +11001023int iommu_tce_check_ioba(unsigned long page_shift,
1024 unsigned long offset, unsigned long size,
1025 unsigned long ioba, unsigned long npages)
Alexey Kardashevskiy4e13c1a2013-05-21 13:33:09 +10001026{
Alexey Kardashevskiyb1af23d2017-03-22 15:21:55 +11001027 unsigned long mask = (1UL << page_shift) - 1;
1028
1029 if (ioba & mask)
Alexey Kardashevskiy4e13c1a2013-05-21 13:33:09 +10001030 return -EINVAL;
1031
Alexey Kardashevskiyb1af23d2017-03-22 15:21:55 +11001032 ioba >>= page_shift;
1033 if (ioba < offset)
Alexey Kardashevskiy4e13c1a2013-05-21 13:33:09 +10001034 return -EINVAL;
1035
Alexey Kardashevskiyb1af23d2017-03-22 15:21:55 +11001036 if ((ioba + 1) > (offset + size))
Alexey Kardashevskiy4e13c1a2013-05-21 13:33:09 +10001037 return -EINVAL;
1038
1039 return 0;
1040}
Alexey Kardashevskiyb1af23d2017-03-22 15:21:55 +11001041EXPORT_SYMBOL_GPL(iommu_tce_check_ioba);
Alexey Kardashevskiy4e13c1a2013-05-21 13:33:09 +10001042
Alexey Kardashevskiyb1af23d2017-03-22 15:21:55 +11001043int iommu_tce_check_gpa(unsigned long page_shift, unsigned long gpa)
Alexey Kardashevskiy4e13c1a2013-05-21 13:33:09 +10001044{
Alexey Kardashevskiyb1af23d2017-03-22 15:21:55 +11001045 unsigned long mask = (1UL << page_shift) - 1;
Alexey Kardashevskiy4e13c1a2013-05-21 13:33:09 +10001046
Alexey Kardashevskiyb1af23d2017-03-22 15:21:55 +11001047 if (gpa & mask)
Alexey Kardashevskiy4e13c1a2013-05-21 13:33:09 +10001048 return -EINVAL;
1049
1050 return 0;
1051}
Alexey Kardashevskiyb1af23d2017-03-22 15:21:55 +11001052EXPORT_SYMBOL_GPL(iommu_tce_check_gpa);
Alexey Kardashevskiy4e13c1a2013-05-21 13:33:09 +10001053
Alexey Kardashevskiy35872482019-08-29 18:52:48 +10001054extern long iommu_tce_xchg_no_kill(struct mm_struct *mm,
1055 struct iommu_table *tbl,
Alexey Kardashevskiyc10c21e2018-12-19 19:52:15 +11001056 unsigned long entry, unsigned long *hpa,
1057 enum dma_data_direction *direction)
Alexey Kardashevskiy4e13c1a2013-05-21 13:33:09 +10001058{
Alexey Kardashevskiy05c6cfb2015-06-05 16:35:15 +10001059 long ret;
Alexey Kardashevskiyc10c21e2018-12-19 19:52:15 +11001060 unsigned long size = 0;
Alexey Kardashevskiy4e13c1a2013-05-21 13:33:09 +10001061
Alexey Kardashevskiy35872482019-08-29 18:52:48 +10001062 ret = tbl->it_ops->xchg_no_kill(tbl, entry, hpa, direction, false);
Alexey Kardashevskiy05c6cfb2015-06-05 16:35:15 +10001063 if (!ret && ((*direction == DMA_FROM_DEVICE) ||
Alexey Kardashevskiyc10c21e2018-12-19 19:52:15 +11001064 (*direction == DMA_BIDIRECTIONAL)) &&
1065 !mm_iommu_is_devmem(mm, *hpa, tbl->it_page_shift,
1066 &size))
Alexey Kardashevskiy05c6cfb2015-06-05 16:35:15 +10001067 SetPageDirty(pfn_to_page(*hpa >> PAGE_SHIFT));
Alexey Kardashevskiy4e13c1a2013-05-21 13:33:09 +10001068
Alexey Kardashevskiy4e13c1a2013-05-21 13:33:09 +10001069 return ret;
1070}
Alexey Kardashevskiy35872482019-08-29 18:52:48 +10001071EXPORT_SYMBOL_GPL(iommu_tce_xchg_no_kill);
1072
1073void iommu_tce_kill(struct iommu_table *tbl,
1074 unsigned long entry, unsigned long pages)
1075{
1076 if (tbl->it_ops->tce_kill)
1077 tbl->it_ops->tce_kill(tbl, entry, pages, false);
1078}
1079EXPORT_SYMBOL_GPL(iommu_tce_kill);
Alexey Kardashevskiy4e13c1a2013-05-21 13:33:09 +10001080
Alexey Kardashevskiy4e13c1a2013-05-21 13:33:09 +10001081int iommu_take_ownership(struct iommu_table *tbl)
1082{
Alexey Kardashevskiyb82c75b2015-06-05 16:35:11 +10001083 unsigned long flags, i, sz = (tbl->it_size + 7) >> 3;
1084 int ret = 0;
1085
Alexey Kardashevskiy05c6cfb2015-06-05 16:35:15 +10001086 /*
1087 * VFIO does not control TCE entries allocation and the guest
1088 * can write new TCEs on top of existing ones so iommu_tce_build()
1089 * must be able to release old pages. This functionality
1090 * requires exchange() callback defined so if it is not
1091 * implemented, we disallow taking ownership over the table.
1092 */
Alexey Kardashevskiya102f132019-08-29 18:52:52 +10001093 if (!tbl->it_ops->xchg_no_kill)
Alexey Kardashevskiy05c6cfb2015-06-05 16:35:15 +10001094 return -EINVAL;
1095
Alexey Kardashevskiyb82c75b2015-06-05 16:35:11 +10001096 spin_lock_irqsave(&tbl->large_pool.lock, flags);
1097 for (i = 0; i < tbl->nr_pools; i++)
1098 spin_lock(&tbl->pools[i].lock);
Alexey Kardashevskiy4e13c1a2013-05-21 13:33:09 +10001099
Alexey Kardashevskiy201ed7f2019-07-18 15:11:39 +10001100 iommu_table_release_pages(tbl);
Alexey Kardashevskiy4e13c1a2013-05-21 13:33:09 +10001101
1102 if (!bitmap_empty(tbl->it_map, tbl->it_size)) {
1103 pr_err("iommu_tce: it_map is not empty");
Alexey Kardashevskiyb82c75b2015-06-05 16:35:11 +10001104 ret = -EBUSY;
Alexey Kardashevskiy201ed7f2019-07-18 15:11:39 +10001105 /* Undo iommu_table_release_pages, i.e. restore bit#0, etc */
1106 iommu_table_reserve_pages(tbl, tbl->it_reserved_start,
1107 tbl->it_reserved_end);
Alexey Kardashevskiyb82c75b2015-06-05 16:35:11 +10001108 } else {
1109 memset(tbl->it_map, 0xff, sz);
Alexey Kardashevskiy4e13c1a2013-05-21 13:33:09 +10001110 }
1111
Alexey Kardashevskiyb82c75b2015-06-05 16:35:11 +10001112 for (i = 0; i < tbl->nr_pools; i++)
1113 spin_unlock(&tbl->pools[i].lock);
1114 spin_unlock_irqrestore(&tbl->large_pool.lock, flags);
Alexey Kardashevskiy4e13c1a2013-05-21 13:33:09 +10001115
Alexey Kardashevskiyb82c75b2015-06-05 16:35:11 +10001116 return ret;
Alexey Kardashevskiy4e13c1a2013-05-21 13:33:09 +10001117}
1118EXPORT_SYMBOL_GPL(iommu_take_ownership);
1119
1120void iommu_release_ownership(struct iommu_table *tbl)
1121{
Alexey Kardashevskiyb82c75b2015-06-05 16:35:11 +10001122 unsigned long flags, i, sz = (tbl->it_size + 7) >> 3;
1123
1124 spin_lock_irqsave(&tbl->large_pool.lock, flags);
1125 for (i = 0; i < tbl->nr_pools; i++)
1126 spin_lock(&tbl->pools[i].lock);
Alexey Kardashevskiy4e13c1a2013-05-21 13:33:09 +10001127
Alexey Kardashevskiy4e13c1a2013-05-21 13:33:09 +10001128 memset(tbl->it_map, 0, sz);
1129
Alexey Kardashevskiy201ed7f2019-07-18 15:11:39 +10001130 iommu_table_reserve_pages(tbl, tbl->it_reserved_start,
1131 tbl->it_reserved_end);
Alexey Kardashevskiyb82c75b2015-06-05 16:35:11 +10001132
1133 for (i = 0; i < tbl->nr_pools; i++)
1134 spin_unlock(&tbl->pools[i].lock);
1135 spin_unlock_irqrestore(&tbl->large_pool.lock, flags);
Alexey Kardashevskiy4e13c1a2013-05-21 13:33:09 +10001136}
1137EXPORT_SYMBOL_GPL(iommu_release_ownership);
1138
Alexey Kardashevskiyc4e9d3c2018-12-19 19:52:21 +11001139int iommu_add_device(struct iommu_table_group *table_group, struct device *dev)
Alexey Kardashevskiy4e13c1a2013-05-21 13:33:09 +10001140{
Gavin Shan763fe0a2014-08-06 17:10:16 +10001141 /*
1142 * The sysfs entries should be populated before
1143 * binding IOMMU group. If sysfs entries isn't
1144 * ready, we simply bail.
1145 */
1146 if (!device_is_registered(dev))
1147 return -ENOENT;
1148
Joerg Roedelbf8763d2018-11-30 14:23:19 +01001149 if (device_iommu_mapped(dev)) {
Gavin Shan763fe0a2014-08-06 17:10:16 +10001150 pr_debug("%s: Skipping device %s with iommu group %d\n",
1151 __func__, dev_name(dev),
1152 iommu_group_id(dev->iommu_group));
Alexey Kardashevskiy4e13c1a2013-05-21 13:33:09 +10001153 return -EBUSY;
1154 }
1155
Gavin Shan763fe0a2014-08-06 17:10:16 +10001156 pr_debug("%s: Adding %s to iommu group %d\n",
Alexey Kardashevskiyc4e9d3c2018-12-19 19:52:21 +11001157 __func__, dev_name(dev), iommu_group_id(table_group->group));
Alexey Kardashevskiy4e13c1a2013-05-21 13:33:09 +10001158
Alexey Kardashevskiyc4e9d3c2018-12-19 19:52:21 +11001159 return iommu_group_add_device(table_group->group, dev);
Alexey Kardashevskiy4e13c1a2013-05-21 13:33:09 +10001160}
Alexey Kardashevskiyd905c5d2013-11-21 17:43:14 +11001161EXPORT_SYMBOL_GPL(iommu_add_device);
Alexey Kardashevskiy4e13c1a2013-05-21 13:33:09 +10001162
Alexey Kardashevskiyd905c5d2013-11-21 17:43:14 +11001163void iommu_del_device(struct device *dev)
Alexey Kardashevskiy4e13c1a2013-05-21 13:33:09 +10001164{
Gavin Shan0c4b9e22014-01-13 11:36:22 +08001165 /*
1166 * Some devices might not have IOMMU table and group
1167 * and we needn't detach them from the associated
1168 * IOMMU groups
1169 */
Joerg Roedelbf8763d2018-11-30 14:23:19 +01001170 if (!device_iommu_mapped(dev)) {
Gavin Shan0c4b9e22014-01-13 11:36:22 +08001171 pr_debug("iommu_tce: skipping device %s with no tbl\n",
1172 dev_name(dev));
1173 return;
1174 }
1175
Alexey Kardashevskiy4e13c1a2013-05-21 13:33:09 +10001176 iommu_group_remove_device(dev);
1177}
Alexey Kardashevskiyd905c5d2013-11-21 17:43:14 +11001178EXPORT_SYMBOL_GPL(iommu_del_device);
Alexey Kardashevskiy4e13c1a2013-05-21 13:33:09 +10001179#endif /* CONFIG_IOMMU_API */