blob: 1d8b889283298b15e3e99eae91add0c5c7ae45ec [file] [log] [blame]
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +10001/*
2 * VFIO: IOMMU DMA mapping support for TCE on POWER
3 *
4 * Copyright (C) 2013 IBM Corp. All rights reserved.
5 * Author: Alexey Kardashevskiy <aik@ozlabs.ru>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 *
11 * Derived from original vfio_iommu_type1.c:
12 * Copyright (C) 2012 Red Hat, Inc. All rights reserved.
13 * Author: Alex Williamson <alex.williamson@redhat.com>
14 */
15
16#include <linux/module.h>
17#include <linux/pci.h>
18#include <linux/slab.h>
19#include <linux/uaccess.h>
20#include <linux/err.h>
21#include <linux/vfio.h>
Alexey Kardashevskiy2157e7b2015-06-05 16:35:25 +100022#include <linux/vmalloc.h>
Ingo Molnar6e84f312017-02-08 18:51:29 +010023#include <linux/sched/mm.h>
Ingo Molnar3f07c012017-02-08 18:51:30 +010024#include <linux/sched/signal.h>
Ingo Molnar6e84f312017-02-08 18:51:29 +010025
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +100026#include <asm/iommu.h>
27#include <asm/tce.h>
Alexey Kardashevskiy2157e7b2015-06-05 16:35:25 +100028#include <asm/mmu_context.h>
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +100029
30#define DRIVER_VERSION "0.1"
31#define DRIVER_AUTHOR "aik@ozlabs.ru"
32#define DRIVER_DESC "VFIO IOMMU SPAPR TCE"
33
34static void tce_iommu_detach_group(void *iommu_data,
35 struct iommu_group *iommu_group);
36
Alexey Kardashevskiybc82d122016-11-30 17:52:04 +110037static long try_increment_locked_vm(struct mm_struct *mm, long npages)
Alexey Kardashevskiy2d270df2015-06-05 16:35:01 +100038{
39 long ret = 0, locked, lock_limit;
40
Alexey Kardashevskiybc82d122016-11-30 17:52:04 +110041 if (WARN_ON_ONCE(!mm))
42 return -EPERM;
Alexey Kardashevskiy2d270df2015-06-05 16:35:01 +100043
44 if (!npages)
45 return 0;
46
Alexey Kardashevskiybc82d122016-11-30 17:52:04 +110047 down_write(&mm->mmap_sem);
48 locked = mm->locked_vm + npages;
Alexey Kardashevskiy2d270df2015-06-05 16:35:01 +100049 lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
50 if (locked > lock_limit && !capable(CAP_IPC_LOCK))
51 ret = -ENOMEM;
52 else
Alexey Kardashevskiybc82d122016-11-30 17:52:04 +110053 mm->locked_vm += npages;
Alexey Kardashevskiy2d270df2015-06-05 16:35:01 +100054
55 pr_debug("[%d] RLIMIT_MEMLOCK +%ld %ld/%ld%s\n", current->pid,
56 npages << PAGE_SHIFT,
Alexey Kardashevskiybc82d122016-11-30 17:52:04 +110057 mm->locked_vm << PAGE_SHIFT,
Alexey Kardashevskiy2d270df2015-06-05 16:35:01 +100058 rlimit(RLIMIT_MEMLOCK),
59 ret ? " - exceeded" : "");
60
Alexey Kardashevskiybc82d122016-11-30 17:52:04 +110061 up_write(&mm->mmap_sem);
Alexey Kardashevskiy2d270df2015-06-05 16:35:01 +100062
63 return ret;
64}
65
Alexey Kardashevskiybc82d122016-11-30 17:52:04 +110066static void decrement_locked_vm(struct mm_struct *mm, long npages)
Alexey Kardashevskiy2d270df2015-06-05 16:35:01 +100067{
Alexey Kardashevskiybc82d122016-11-30 17:52:04 +110068 if (!mm || !npages)
69 return;
Alexey Kardashevskiy2d270df2015-06-05 16:35:01 +100070
Alexey Kardashevskiybc82d122016-11-30 17:52:04 +110071 down_write(&mm->mmap_sem);
72 if (WARN_ON_ONCE(npages > mm->locked_vm))
73 npages = mm->locked_vm;
74 mm->locked_vm -= npages;
Alexey Kardashevskiy2d270df2015-06-05 16:35:01 +100075 pr_debug("[%d] RLIMIT_MEMLOCK -%ld %ld/%ld\n", current->pid,
76 npages << PAGE_SHIFT,
Alexey Kardashevskiybc82d122016-11-30 17:52:04 +110077 mm->locked_vm << PAGE_SHIFT,
Alexey Kardashevskiy2d270df2015-06-05 16:35:01 +100078 rlimit(RLIMIT_MEMLOCK));
Alexey Kardashevskiybc82d122016-11-30 17:52:04 +110079 up_write(&mm->mmap_sem);
Alexey Kardashevskiy2d270df2015-06-05 16:35:01 +100080}
81
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +100082/*
83 * VFIO IOMMU fd for SPAPR_TCE IOMMU implementation
84 *
85 * This code handles mapping and unmapping of user data buffers
86 * into DMA'ble space using the IOMMU
87 */
88
Alexey Kardashevskiy2157e7b2015-06-05 16:35:25 +100089struct tce_iommu_group {
90 struct list_head next;
91 struct iommu_group *grp;
92};
93
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +100094/*
Alexey Kardashevskiy4b6fad72016-11-30 17:52:05 +110095 * A container needs to remember which preregistered region it has
96 * referenced to do proper cleanup at the userspace process exit.
97 */
98struct tce_iommu_prereg {
99 struct list_head next;
100 struct mm_iommu_table_group_mem_t *mem;
101};
102
103/*
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +1000104 * The container descriptor supports only a single group per container.
105 * Required by the API as the container is not supplied with the IOMMU group
106 * at the moment of initialization.
107 */
108struct tce_container {
109 struct mutex lock;
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +1000110 bool enabled;
Alexey Kardashevskiy2157e7b2015-06-05 16:35:25 +1000111 bool v2;
Alexey Kardashevskiyd9c72892016-11-30 17:52:03 +1100112 bool def_window_pending;
Alexey Kardashevskiy2d270df2015-06-05 16:35:01 +1000113 unsigned long locked_pages;
Alexey Kardashevskiybc82d122016-11-30 17:52:04 +1100114 struct mm_struct *mm;
Alexey Kardashevskiy2157e7b2015-06-05 16:35:25 +1000115 struct iommu_table *tables[IOMMU_TABLE_GROUP_MAX_TABLES];
116 struct list_head group_list;
Alexey Kardashevskiy4b6fad72016-11-30 17:52:05 +1100117 struct list_head prereg_list;
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +1000118};
119
Alexey Kardashevskiybc82d122016-11-30 17:52:04 +1100120static long tce_iommu_mm_set(struct tce_container *container)
121{
122 if (container->mm) {
123 if (container->mm == current->mm)
124 return 0;
125 return -EPERM;
126 }
127 BUG_ON(!current->mm);
128 container->mm = current->mm;
129 atomic_inc(&container->mm->mm_count);
130
131 return 0;
132}
133
Alexey Kardashevskiy4b6fad72016-11-30 17:52:05 +1100134static long tce_iommu_prereg_free(struct tce_container *container,
135 struct tce_iommu_prereg *tcemem)
136{
137 long ret;
138
139 ret = mm_iommu_put(container->mm, tcemem->mem);
140 if (ret)
141 return ret;
142
143 list_del(&tcemem->next);
144 kfree(tcemem);
145
146 return 0;
147}
148
Alexey Kardashevskiy2157e7b2015-06-05 16:35:25 +1000149static long tce_iommu_unregister_pages(struct tce_container *container,
150 __u64 vaddr, __u64 size)
151{
152 struct mm_iommu_table_group_mem_t *mem;
Alexey Kardashevskiy4b6fad72016-11-30 17:52:05 +1100153 struct tce_iommu_prereg *tcemem;
154 bool found = false;
Alexey Kardashevskiye0bf78b2018-12-19 19:52:14 +1100155 long ret;
Alexey Kardashevskiy2157e7b2015-06-05 16:35:25 +1000156
157 if ((vaddr & ~PAGE_MASK) || (size & ~PAGE_MASK))
158 return -EINVAL;
159
Alexey Kardashevskiye0bf78b2018-12-19 19:52:14 +1100160 mem = mm_iommu_get(container->mm, vaddr, size >> PAGE_SHIFT);
Alexey Kardashevskiy2157e7b2015-06-05 16:35:25 +1000161 if (!mem)
162 return -ENOENT;
163
Alexey Kardashevskiy4b6fad72016-11-30 17:52:05 +1100164 list_for_each_entry(tcemem, &container->prereg_list, next) {
165 if (tcemem->mem == mem) {
166 found = true;
167 break;
168 }
169 }
170
171 if (!found)
Alexey Kardashevskiye0bf78b2018-12-19 19:52:14 +1100172 ret = -ENOENT;
173 else
174 ret = tce_iommu_prereg_free(container, tcemem);
Alexey Kardashevskiy4b6fad72016-11-30 17:52:05 +1100175
Alexey Kardashevskiye0bf78b2018-12-19 19:52:14 +1100176 mm_iommu_put(container->mm, mem);
177
178 return ret;
Alexey Kardashevskiy2157e7b2015-06-05 16:35:25 +1000179}
180
181static long tce_iommu_register_pages(struct tce_container *container,
182 __u64 vaddr, __u64 size)
183{
184 long ret = 0;
185 struct mm_iommu_table_group_mem_t *mem = NULL;
Alexey Kardashevskiy4b6fad72016-11-30 17:52:05 +1100186 struct tce_iommu_prereg *tcemem;
Alexey Kardashevskiy2157e7b2015-06-05 16:35:25 +1000187 unsigned long entries = size >> PAGE_SHIFT;
188
189 if ((vaddr & ~PAGE_MASK) || (size & ~PAGE_MASK) ||
190 ((vaddr + size) < vaddr))
191 return -EINVAL;
192
Alexey Kardashevskiye0bf78b2018-12-19 19:52:14 +1100193 mem = mm_iommu_get(container->mm, vaddr, entries);
Alexey Kardashevskiy4b6fad72016-11-30 17:52:05 +1100194 if (mem) {
195 list_for_each_entry(tcemem, &container->prereg_list, next) {
Alexey Kardashevskiye0bf78b2018-12-19 19:52:14 +1100196 if (tcemem->mem == mem) {
197 ret = -EBUSY;
198 goto put_exit;
199 }
Alexey Kardashevskiy4b6fad72016-11-30 17:52:05 +1100200 }
Alexey Kardashevskiye0bf78b2018-12-19 19:52:14 +1100201 } else {
202 ret = mm_iommu_new(container->mm, vaddr, entries, &mem);
203 if (ret)
204 return ret;
Alexey Kardashevskiy4b6fad72016-11-30 17:52:05 +1100205 }
206
Alexey Kardashevskiy4b6fad72016-11-30 17:52:05 +1100207 tcemem = kzalloc(sizeof(*tcemem), GFP_KERNEL);
Alexey Kardashevskiy3393af22017-03-27 14:23:40 +1100208 if (!tcemem) {
Alexey Kardashevskiye0bf78b2018-12-19 19:52:14 +1100209 ret = -ENOMEM;
210 goto put_exit;
Alexey Kardashevskiy3393af22017-03-27 14:23:40 +1100211 }
212
Alexey Kardashevskiy4b6fad72016-11-30 17:52:05 +1100213 tcemem->mem = mem;
214 list_add(&tcemem->next, &container->prereg_list);
215
Alexey Kardashevskiy2157e7b2015-06-05 16:35:25 +1000216 container->enabled = true;
217
218 return 0;
Alexey Kardashevskiye0bf78b2018-12-19 19:52:14 +1100219
220put_exit:
221 mm_iommu_put(container->mm, mem);
222 return ret;
Alexey Kardashevskiy2157e7b2015-06-05 16:35:25 +1000223}
224
Alexey Kardashevskiye432bc72015-06-05 16:34:59 +1000225static bool tce_page_is_contained(struct page *page, unsigned page_shift)
226{
227 /*
228 * Check that the TCE table granularity is not bigger than the size of
229 * a page we just found. Otherwise the hardware can get access to
230 * a bigger memory chunk that it should.
231 */
232 return (PAGE_SHIFT + compound_order(compound_head(page))) >= page_shift;
233}
234
Alexey Kardashevskiy2157e7b2015-06-05 16:35:25 +1000235static inline bool tce_groups_attached(struct tce_container *container)
236{
237 return !list_empty(&container->group_list);
238}
239
Alexey Kardashevskiy0eaf4de2015-06-05 16:35:09 +1000240static long tce_iommu_find_table(struct tce_container *container,
241 phys_addr_t ioba, struct iommu_table **ptbl)
242{
243 long i;
Alexey Kardashevskiy0eaf4de2015-06-05 16:35:09 +1000244
245 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
Alexey Kardashevskiy2157e7b2015-06-05 16:35:25 +1000246 struct iommu_table *tbl = container->tables[i];
Alexey Kardashevskiy0eaf4de2015-06-05 16:35:09 +1000247
248 if (tbl) {
249 unsigned long entry = ioba >> tbl->it_page_shift;
250 unsigned long start = tbl->it_offset;
251 unsigned long end = start + tbl->it_size;
252
253 if ((start <= entry) && (entry < end)) {
254 *ptbl = tbl;
255 return i;
256 }
257 }
258 }
259
260 return -1;
261}
262
Alexey Kardashevskiye633bc82015-06-05 16:35:26 +1000263static int tce_iommu_find_free_table(struct tce_container *container)
264{
265 int i;
266
267 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
268 if (!container->tables[i])
269 return i;
270 }
271
272 return -ENOSPC;
273}
274
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +1000275static int tce_iommu_enable(struct tce_container *container)
276{
277 int ret = 0;
Alexey Kardashevskiy2d270df2015-06-05 16:35:01 +1000278 unsigned long locked;
Alexey Kardashevskiy0eaf4de2015-06-05 16:35:09 +1000279 struct iommu_table_group *table_group;
Alexey Kardashevskiy2157e7b2015-06-05 16:35:25 +1000280 struct tce_iommu_group *tcegrp;
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +1000281
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +1000282 if (container->enabled)
283 return -EBUSY;
284
285 /*
286 * When userspace pages are mapped into the IOMMU, they are effectively
287 * locked memory, so, theoretically, we need to update the accounting
288 * of locked pages on each map and unmap. For powerpc, the map unmap
289 * paths can be very hot, though, and the accounting would kill
290 * performance, especially since it would be difficult to impossible
291 * to handle the accounting in real mode only.
292 *
293 * To address that, rather than precisely accounting every page, we
294 * instead account for a worst case on locked memory when the iommu is
295 * enabled and disabled. The worst case upper bound on locked memory
296 * is the size of the whole iommu window, which is usually relatively
297 * small (compared to total memory sizes) on POWER hardware.
298 *
299 * Also we don't have a nice way to fail on H_PUT_TCE due to ulimits,
300 * that would effectively kill the guest at random points, much better
301 * enforcing the limit based on the max that the guest can map.
Alexey Kardashevskiy2d270df2015-06-05 16:35:01 +1000302 *
303 * Unfortunately at the moment it counts whole tables, no matter how
304 * much memory the guest has. I.e. for 4GB guest and 4 IOMMU groups
305 * each with 2GB DMA window, 8GB will be counted here. The reason for
306 * this is that we cannot tell here the amount of RAM used by the guest
307 * as this information is only available from KVM and VFIO is
308 * KVM agnostic.
Alexey Kardashevskiy4793d652015-06-05 16:35:20 +1000309 *
310 * So we do not allow enabling a container without a group attached
311 * as there is no way to know how much we should increment
312 * the locked_vm counter.
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +1000313 */
Alexey Kardashevskiy2157e7b2015-06-05 16:35:25 +1000314 if (!tce_groups_attached(container))
315 return -ENODEV;
316
317 tcegrp = list_first_entry(&container->group_list,
318 struct tce_iommu_group, next);
319 table_group = iommu_group_get_iommudata(tcegrp->grp);
Alexey Kardashevskiy0eaf4de2015-06-05 16:35:09 +1000320 if (!table_group)
321 return -ENODEV;
322
Alexey Kardashevskiy4793d652015-06-05 16:35:20 +1000323 if (!table_group->tce32_size)
324 return -EPERM;
325
Alexey Kardashevskiybc82d122016-11-30 17:52:04 +1100326 ret = tce_iommu_mm_set(container);
327 if (ret)
328 return ret;
329
Alexey Kardashevskiy4793d652015-06-05 16:35:20 +1000330 locked = table_group->tce32_size >> PAGE_SHIFT;
Alexey Kardashevskiybc82d122016-11-30 17:52:04 +1100331 ret = try_increment_locked_vm(container->mm, locked);
Alexey Kardashevskiy2d270df2015-06-05 16:35:01 +1000332 if (ret)
333 return ret;
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +1000334
Alexey Kardashevskiy2d270df2015-06-05 16:35:01 +1000335 container->locked_pages = locked;
336
337 container->enabled = true;
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +1000338
339 return ret;
340}
341
342static void tce_iommu_disable(struct tce_container *container)
343{
344 if (!container->enabled)
345 return;
346
347 container->enabled = false;
348
Alexey Kardashevskiybc82d122016-11-30 17:52:04 +1100349 BUG_ON(!container->mm);
350 decrement_locked_vm(container->mm, container->locked_pages);
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +1000351}
352
353static void *tce_iommu_open(unsigned long arg)
354{
355 struct tce_container *container;
356
Alexey Kardashevskiy2157e7b2015-06-05 16:35:25 +1000357 if ((arg != VFIO_SPAPR_TCE_IOMMU) && (arg != VFIO_SPAPR_TCE_v2_IOMMU)) {
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +1000358 pr_err("tce_vfio: Wrong IOMMU type\n");
359 return ERR_PTR(-EINVAL);
360 }
361
362 container = kzalloc(sizeof(*container), GFP_KERNEL);
363 if (!container)
364 return ERR_PTR(-ENOMEM);
365
366 mutex_init(&container->lock);
Alexey Kardashevskiy2157e7b2015-06-05 16:35:25 +1000367 INIT_LIST_HEAD_RCU(&container->group_list);
Alexey Kardashevskiy4b6fad72016-11-30 17:52:05 +1100368 INIT_LIST_HEAD_RCU(&container->prereg_list);
Alexey Kardashevskiy2157e7b2015-06-05 16:35:25 +1000369
370 container->v2 = arg == VFIO_SPAPR_TCE_v2_IOMMU;
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +1000371
372 return container;
373}
374
Alexey Kardashevskiy2157e7b2015-06-05 16:35:25 +1000375static int tce_iommu_clear(struct tce_container *container,
376 struct iommu_table *tbl,
377 unsigned long entry, unsigned long pages);
Alexey Kardashevskiybc82d122016-11-30 17:52:04 +1100378static void tce_iommu_free_table(struct tce_container *container,
379 struct iommu_table *tbl);
Alexey Kardashevskiy2157e7b2015-06-05 16:35:25 +1000380
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +1000381static void tce_iommu_release(void *iommu_data)
382{
383 struct tce_container *container = iommu_data;
Alexey Kardashevskiy2157e7b2015-06-05 16:35:25 +1000384 struct tce_iommu_group *tcegrp;
Alexey Kardashevskiy517ad4a2018-10-02 13:22:31 +1000385 struct tce_iommu_prereg *tcemem, *tmtmp;
Alexey Kardashevskiy2157e7b2015-06-05 16:35:25 +1000386 long i;
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +1000387
Alexey Kardashevskiy2157e7b2015-06-05 16:35:25 +1000388 while (tce_groups_attached(container)) {
389 tcegrp = list_first_entry(&container->group_list,
390 struct tce_iommu_group, next);
Alexey Kardashevskiy2157e7b2015-06-05 16:35:25 +1000391 tce_iommu_detach_group(iommu_data, tcegrp->grp);
392 }
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +1000393
Alexey Kardashevskiy2157e7b2015-06-05 16:35:25 +1000394 /*
395 * If VFIO created a table, it was not disposed
396 * by tce_iommu_detach_group() so do it now.
397 */
398 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
399 struct iommu_table *tbl = container->tables[i];
400
401 if (!tbl)
402 continue;
403
404 tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size);
Alexey Kardashevskiybc82d122016-11-30 17:52:04 +1100405 tce_iommu_free_table(container, tbl);
Alexey Kardashevskiy2157e7b2015-06-05 16:35:25 +1000406 }
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +1000407
Alexey Kardashevskiy517ad4a2018-10-02 13:22:31 +1000408 list_for_each_entry_safe(tcemem, tmtmp, &container->prereg_list, next)
409 WARN_ON(tce_iommu_prereg_free(container, tcemem));
Alexey Kardashevskiy4b6fad72016-11-30 17:52:05 +1100410
Alexey Kardashevskiy649354b2015-06-05 16:35:03 +1000411 tce_iommu_disable(container);
Alexey Kardashevskiybc82d122016-11-30 17:52:04 +1100412 if (container->mm)
413 mmdrop(container->mm);
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +1000414 mutex_destroy(&container->lock);
415
416 kfree(container);
417}
418
Alexey Kardashevskiy649354b2015-06-05 16:35:03 +1000419static void tce_iommu_unuse_page(struct tce_container *container,
Alexey Kardashevskiy05c6cfb2015-06-05 16:35:15 +1000420 unsigned long hpa)
Alexey Kardashevskiy649354b2015-06-05 16:35:03 +1000421{
422 struct page *page;
423
Alexey Kardashevskiy05c6cfb2015-06-05 16:35:15 +1000424 page = pfn_to_page(hpa >> PAGE_SHIFT);
Alexey Kardashevskiy649354b2015-06-05 16:35:03 +1000425 put_page(page);
426}
427
Alexey Kardashevskiybc82d122016-11-30 17:52:04 +1100428static int tce_iommu_prereg_ua_to_hpa(struct tce_container *container,
Alexey Kardashevskiy1463edc2018-07-17 17:19:12 +1000429 unsigned long tce, unsigned long shift,
Alexey Kardashevskiy2157e7b2015-06-05 16:35:25 +1000430 unsigned long *phpa, struct mm_iommu_table_group_mem_t **pmem)
431{
432 long ret = 0;
433 struct mm_iommu_table_group_mem_t *mem;
434
Alexey Kardashevskiy1463edc2018-07-17 17:19:12 +1000435 mem = mm_iommu_lookup(container->mm, tce, 1ULL << shift);
Alexey Kardashevskiy2157e7b2015-06-05 16:35:25 +1000436 if (!mem)
437 return -EINVAL;
438
Alexey Kardashevskiy76fa4972018-07-17 17:19:13 +1000439 ret = mm_iommu_ua_to_hpa(mem, tce, shift, phpa);
Alexey Kardashevskiy2157e7b2015-06-05 16:35:25 +1000440 if (ret)
441 return -EINVAL;
442
443 *pmem = mem;
444
445 return 0;
446}
447
Alexey Kardashevskiybc82d122016-11-30 17:52:04 +1100448static void tce_iommu_unuse_page_v2(struct tce_container *container,
449 struct iommu_table *tbl, unsigned long entry)
Alexey Kardashevskiy2157e7b2015-06-05 16:35:25 +1000450{
451 struct mm_iommu_table_group_mem_t *mem = NULL;
452 int ret;
453 unsigned long hpa = 0;
Alexey Kardashevskiy6e301a82018-10-15 21:08:41 +1100454 __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RO(tbl, entry);
Alexey Kardashevskiy2157e7b2015-06-05 16:35:25 +1000455
Alexey Kardashevskiybc82d122016-11-30 17:52:04 +1100456 if (!pua)
Alexey Kardashevskiy2157e7b2015-06-05 16:35:25 +1000457 return;
458
Alexey Kardashevskiy00a5c582018-07-04 16:13:46 +1000459 ret = tce_iommu_prereg_ua_to_hpa(container, be64_to_cpu(*pua),
Michael Ellermanb3124ec2018-08-13 15:59:06 +1000460 tbl->it_page_shift, &hpa, &mem);
Alexey Kardashevskiy2157e7b2015-06-05 16:35:25 +1000461 if (ret)
Alexey Kardashevskiy00a5c582018-07-04 16:13:46 +1000462 pr_debug("%s: tce %llx at #%lx was not cached, ret=%d\n",
463 __func__, be64_to_cpu(*pua), entry, ret);
Alexey Kardashevskiy2157e7b2015-06-05 16:35:25 +1000464 if (mem)
465 mm_iommu_mapped_dec(mem);
466
Alexey Kardashevskiy00a5c582018-07-04 16:13:46 +1000467 *pua = cpu_to_be64(0);
Alexey Kardashevskiy2157e7b2015-06-05 16:35:25 +1000468}
469
Alexey Kardashevskiy9b14a1f2015-06-05 16:34:58 +1000470static int tce_iommu_clear(struct tce_container *container,
471 struct iommu_table *tbl,
472 unsigned long entry, unsigned long pages)
473{
Alexey Kardashevskiy05c6cfb2015-06-05 16:35:15 +1000474 unsigned long oldhpa;
475 long ret;
476 enum dma_data_direction direction;
Alexey Kardashevskiy6e301a82018-10-15 21:08:41 +1100477 unsigned long lastentry = entry + pages;
Alexey Kardashevskiy9b14a1f2015-06-05 16:34:58 +1000478
Alexey Kardashevskiy6e301a82018-10-15 21:08:41 +1100479 for ( ; entry < lastentry; ++entry) {
480 if (tbl->it_indirect_levels && tbl->it_userspace) {
481 /*
482 * For multilevel tables, we can take a shortcut here
483 * and skip some TCEs as we know that the userspace
484 * addresses cache is a mirror of the real TCE table
485 * and if it is missing some indirect levels, then
486 * the hardware table does not have them allocated
487 * either and therefore does not require updating.
488 */
489 __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RO(tbl,
490 entry);
491 if (!pua) {
492 /* align to level_size which is power of two */
493 entry |= tbl->it_level_size - 1;
494 continue;
495 }
496 }
497
Alexey Kardashevskiy5c2fefd2017-10-02 12:39:11 -0600498 cond_resched();
499
Alexey Kardashevskiy05c6cfb2015-06-05 16:35:15 +1000500 direction = DMA_NONE;
501 oldhpa = 0;
502 ret = iommu_tce_xchg(tbl, entry, &oldhpa, &direction);
503 if (ret)
Alexey Kardashevskiy9b14a1f2015-06-05 16:34:58 +1000504 continue;
505
Alexey Kardashevskiy05c6cfb2015-06-05 16:35:15 +1000506 if (direction == DMA_NONE)
507 continue;
508
Alexey Kardashevskiy2157e7b2015-06-05 16:35:25 +1000509 if (container->v2) {
Alexey Kardashevskiybc82d122016-11-30 17:52:04 +1100510 tce_iommu_unuse_page_v2(container, tbl, entry);
Alexey Kardashevskiy2157e7b2015-06-05 16:35:25 +1000511 continue;
512 }
513
Alexey Kardashevskiy05c6cfb2015-06-05 16:35:15 +1000514 tce_iommu_unuse_page(container, oldhpa);
Alexey Kardashevskiy9b14a1f2015-06-05 16:34:58 +1000515 }
516
517 return 0;
518}
519
Alexey Kardashevskiy649354b2015-06-05 16:35:03 +1000520static int tce_iommu_use_page(unsigned long tce, unsigned long *hpa)
521{
522 struct page *page = NULL;
523 enum dma_data_direction direction = iommu_tce_direction(tce);
524
525 if (get_user_pages_fast(tce & PAGE_MASK, 1,
526 direction != DMA_TO_DEVICE, &page) != 1)
527 return -EFAULT;
528
529 *hpa = __pa((unsigned long) page_address(page));
530
531 return 0;
532}
533
Alexey Kardashevskiy9b14a1f2015-06-05 16:34:58 +1000534static long tce_iommu_build(struct tce_container *container,
535 struct iommu_table *tbl,
Alexey Kardashevskiy05c6cfb2015-06-05 16:35:15 +1000536 unsigned long entry, unsigned long tce, unsigned long pages,
537 enum dma_data_direction direction)
Alexey Kardashevskiy9b14a1f2015-06-05 16:34:58 +1000538{
539 long i, ret = 0;
Alexey Kardashevskiy649354b2015-06-05 16:35:03 +1000540 struct page *page;
541 unsigned long hpa;
Alexey Kardashevskiy05c6cfb2015-06-05 16:35:15 +1000542 enum dma_data_direction dirtmp;
Alexey Kardashevskiy9b14a1f2015-06-05 16:34:58 +1000543
544 for (i = 0; i < pages; ++i) {
545 unsigned long offset = tce & IOMMU_PAGE_MASK(tbl) & ~PAGE_MASK;
546
Alexey Kardashevskiy649354b2015-06-05 16:35:03 +1000547 ret = tce_iommu_use_page(tce, &hpa);
548 if (ret)
Alexey Kardashevskiy9b14a1f2015-06-05 16:34:58 +1000549 break;
Alexey Kardashevskiye432bc72015-06-05 16:34:59 +1000550
Alexey Kardashevskiy649354b2015-06-05 16:35:03 +1000551 page = pfn_to_page(hpa >> PAGE_SHIFT);
Alexey Kardashevskiye432bc72015-06-05 16:34:59 +1000552 if (!tce_page_is_contained(page, tbl->it_page_shift)) {
553 ret = -EPERM;
554 break;
555 }
556
Alexey Kardashevskiy649354b2015-06-05 16:35:03 +1000557 hpa |= offset;
Alexey Kardashevskiy05c6cfb2015-06-05 16:35:15 +1000558 dirtmp = direction;
559 ret = iommu_tce_xchg(tbl, entry + i, &hpa, &dirtmp);
Alexey Kardashevskiy9b14a1f2015-06-05 16:34:58 +1000560 if (ret) {
Alexey Kardashevskiy649354b2015-06-05 16:35:03 +1000561 tce_iommu_unuse_page(container, hpa);
Alexey Kardashevskiy9b14a1f2015-06-05 16:34:58 +1000562 pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%ld\n",
563 __func__, entry << tbl->it_page_shift,
564 tce, ret);
565 break;
566 }
Alexey Kardashevskiy05c6cfb2015-06-05 16:35:15 +1000567
568 if (dirtmp != DMA_NONE)
569 tce_iommu_unuse_page(container, hpa);
570
Alexey Kardashevskiy00663d42015-06-05 16:35:00 +1000571 tce += IOMMU_PAGE_SIZE(tbl);
Alexey Kardashevskiy9b14a1f2015-06-05 16:34:58 +1000572 }
573
574 if (ret)
575 tce_iommu_clear(container, tbl, entry, i);
576
577 return ret;
578}
579
Alexey Kardashevskiy2157e7b2015-06-05 16:35:25 +1000580static long tce_iommu_build_v2(struct tce_container *container,
581 struct iommu_table *tbl,
582 unsigned long entry, unsigned long tce, unsigned long pages,
583 enum dma_data_direction direction)
584{
585 long i, ret = 0;
586 struct page *page;
587 unsigned long hpa;
588 enum dma_data_direction dirtmp;
589
590 for (i = 0; i < pages; ++i) {
591 struct mm_iommu_table_group_mem_t *mem = NULL;
Alexey Kardashevskiy00a5c582018-07-04 16:13:46 +1000592 __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry + i);
Alexey Kardashevskiy2157e7b2015-06-05 16:35:25 +1000593
Alexey Kardashevskiybc82d122016-11-30 17:52:04 +1100594 ret = tce_iommu_prereg_ua_to_hpa(container,
Alexey Kardashevskiy1463edc2018-07-17 17:19:12 +1000595 tce, tbl->it_page_shift, &hpa, &mem);
Alexey Kardashevskiy2157e7b2015-06-05 16:35:25 +1000596 if (ret)
597 break;
598
599 page = pfn_to_page(hpa >> PAGE_SHIFT);
600 if (!tce_page_is_contained(page, tbl->it_page_shift)) {
601 ret = -EPERM;
602 break;
603 }
604
605 /* Preserve offset within IOMMU page */
606 hpa |= tce & IOMMU_PAGE_MASK(tbl) & ~PAGE_MASK;
607 dirtmp = direction;
608
609 /* The registered region is being unregistered */
610 if (mm_iommu_mapped_inc(mem))
611 break;
612
613 ret = iommu_tce_xchg(tbl, entry + i, &hpa, &dirtmp);
614 if (ret) {
615 /* dirtmp cannot be DMA_NONE here */
Alexey Kardashevskiybc82d122016-11-30 17:52:04 +1100616 tce_iommu_unuse_page_v2(container, tbl, entry + i);
Alexey Kardashevskiy2157e7b2015-06-05 16:35:25 +1000617 pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%ld\n",
618 __func__, entry << tbl->it_page_shift,
619 tce, ret);
620 break;
621 }
622
623 if (dirtmp != DMA_NONE)
Alexey Kardashevskiybc82d122016-11-30 17:52:04 +1100624 tce_iommu_unuse_page_v2(container, tbl, entry + i);
Alexey Kardashevskiy2157e7b2015-06-05 16:35:25 +1000625
Alexey Kardashevskiy00a5c582018-07-04 16:13:46 +1000626 *pua = cpu_to_be64(tce);
Alexey Kardashevskiy2157e7b2015-06-05 16:35:25 +1000627
628 tce += IOMMU_PAGE_SIZE(tbl);
629 }
630
631 if (ret)
632 tce_iommu_clear(container, tbl, entry, i);
633
634 return ret;
635}
636
Alexey Kardashevskiy46d3e1e2015-06-05 16:35:23 +1000637static long tce_iommu_create_table(struct tce_container *container,
638 struct iommu_table_group *table_group,
639 int num,
640 __u32 page_shift,
641 __u64 window_size,
642 __u32 levels,
643 struct iommu_table **ptbl)
644{
645 long ret, table_size;
646
647 table_size = table_group->ops->get_table_size(page_shift, window_size,
648 levels);
649 if (!table_size)
650 return -EINVAL;
651
Alexey Kardashevskiybc82d122016-11-30 17:52:04 +1100652 ret = try_increment_locked_vm(container->mm, table_size >> PAGE_SHIFT);
Alexey Kardashevskiy46d3e1e2015-06-05 16:35:23 +1000653 if (ret)
654 return ret;
655
656 ret = table_group->ops->create_table(table_group, num,
657 page_shift, window_size, levels, ptbl);
658
659 WARN_ON(!ret && !(*ptbl)->it_ops->free);
Alexey Kardashevskiya68bd122018-07-04 16:13:49 +1000660 WARN_ON(!ret && ((*ptbl)->it_allocated_size > table_size));
Alexey Kardashevskiy46d3e1e2015-06-05 16:35:23 +1000661
Alexey Kardashevskiy46d3e1e2015-06-05 16:35:23 +1000662 return ret;
663}
664
Alexey Kardashevskiybc82d122016-11-30 17:52:04 +1100665static void tce_iommu_free_table(struct tce_container *container,
666 struct iommu_table *tbl)
Alexey Kardashevskiy46d3e1e2015-06-05 16:35:23 +1000667{
668 unsigned long pages = tbl->it_allocated_size >> PAGE_SHIFT;
669
Alexey Kardashevskiye5afdf92017-03-22 15:21:50 +1100670 iommu_tce_table_put(tbl);
Alexey Kardashevskiybc82d122016-11-30 17:52:04 +1100671 decrement_locked_vm(container->mm, pages);
Alexey Kardashevskiy46d3e1e2015-06-05 16:35:23 +1000672}
673
Alexey Kardashevskiye633bc82015-06-05 16:35:26 +1000674static long tce_iommu_create_window(struct tce_container *container,
675 __u32 page_shift, __u64 window_size, __u32 levels,
676 __u64 *start_addr)
677{
678 struct tce_iommu_group *tcegrp;
679 struct iommu_table_group *table_group;
680 struct iommu_table *tbl = NULL;
681 long ret, num;
682
683 num = tce_iommu_find_free_table(container);
684 if (num < 0)
685 return num;
686
687 /* Get the first group for ops::create_table */
688 tcegrp = list_first_entry(&container->group_list,
689 struct tce_iommu_group, next);
690 table_group = iommu_group_get_iommudata(tcegrp->grp);
691 if (!table_group)
692 return -EFAULT;
693
694 if (!(table_group->pgsizes & (1ULL << page_shift)))
695 return -EINVAL;
696
697 if (!table_group->ops->set_window || !table_group->ops->unset_window ||
698 !table_group->ops->get_table_size ||
699 !table_group->ops->create_table)
700 return -EPERM;
701
702 /* Create TCE table */
703 ret = tce_iommu_create_table(container, table_group, num,
704 page_shift, window_size, levels, &tbl);
705 if (ret)
706 return ret;
707
708 BUG_ON(!tbl->it_ops->free);
709
710 /*
711 * Program the table to every group.
712 * Groups have been tested for compatibility at the attach time.
713 */
714 list_for_each_entry(tcegrp, &container->group_list, next) {
715 table_group = iommu_group_get_iommudata(tcegrp->grp);
716
717 ret = table_group->ops->set_window(table_group, num, tbl);
718 if (ret)
719 goto unset_exit;
720 }
721
722 container->tables[num] = tbl;
723
724 /* Return start address assigned by platform in create_table() */
725 *start_addr = tbl->it_offset << tbl->it_page_shift;
726
727 return 0;
728
729unset_exit:
730 list_for_each_entry(tcegrp, &container->group_list, next) {
731 table_group = iommu_group_get_iommudata(tcegrp->grp);
732 table_group->ops->unset_window(table_group, num);
733 }
Alexey Kardashevskiybc82d122016-11-30 17:52:04 +1100734 tce_iommu_free_table(container, tbl);
Alexey Kardashevskiye633bc82015-06-05 16:35:26 +1000735
736 return ret;
737}
738
739static long tce_iommu_remove_window(struct tce_container *container,
740 __u64 start_addr)
741{
742 struct iommu_table_group *table_group = NULL;
743 struct iommu_table *tbl;
744 struct tce_iommu_group *tcegrp;
745 int num;
746
747 num = tce_iommu_find_table(container, start_addr, &tbl);
748 if (num < 0)
749 return -EINVAL;
750
751 BUG_ON(!tbl->it_size);
752
753 /* Detach groups from IOMMUs */
754 list_for_each_entry(tcegrp, &container->group_list, next) {
755 table_group = iommu_group_get_iommudata(tcegrp->grp);
756
757 /*
758 * SPAPR TCE IOMMU exposes the default DMA window to
759 * the guest via dma32_window_start/size of
760 * VFIO_IOMMU_SPAPR_TCE_GET_INFO. Some platforms allow
761 * the userspace to remove this window, some do not so
762 * here we check for the platform capability.
763 */
764 if (!table_group->ops || !table_group->ops->unset_window)
765 return -EPERM;
766
767 table_group->ops->unset_window(table_group, num);
768 }
769
770 /* Free table */
771 tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size);
Alexey Kardashevskiybc82d122016-11-30 17:52:04 +1100772 tce_iommu_free_table(container, tbl);
Alexey Kardashevskiye633bc82015-06-05 16:35:26 +1000773 container->tables[num] = NULL;
774
775 return 0;
776}
777
Alexey Kardashevskiy6f01cc62016-11-30 17:52:02 +1100778static long tce_iommu_create_default_window(struct tce_container *container)
779{
780 long ret;
781 __u64 start_addr = 0;
782 struct tce_iommu_group *tcegrp;
783 struct iommu_table_group *table_group;
784
Alexey Kardashevskiyd9c72892016-11-30 17:52:03 +1100785 if (!container->def_window_pending)
786 return 0;
787
Alexey Kardashevskiy6f01cc62016-11-30 17:52:02 +1100788 if (!tce_groups_attached(container))
789 return -ENODEV;
790
791 tcegrp = list_first_entry(&container->group_list,
792 struct tce_iommu_group, next);
793 table_group = iommu_group_get_iommudata(tcegrp->grp);
794 if (!table_group)
795 return -ENODEV;
796
797 ret = tce_iommu_create_window(container, IOMMU_PAGE_SHIFT_4K,
798 table_group->tce32_size, 1, &start_addr);
799 WARN_ON_ONCE(!ret && start_addr);
800
Alexey Kardashevskiyd9c72892016-11-30 17:52:03 +1100801 if (!ret)
802 container->def_window_pending = false;
803
Alexey Kardashevskiy6f01cc62016-11-30 17:52:02 +1100804 return ret;
805}
806
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +1000807static long tce_iommu_ioctl(void *iommu_data,
808 unsigned int cmd, unsigned long arg)
809{
810 struct tce_container *container = iommu_data;
Alexey Kardashevskiye633bc82015-06-05 16:35:26 +1000811 unsigned long minsz, ddwsz;
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +1000812 long ret;
813
814 switch (cmd) {
815 case VFIO_CHECK_EXTENSION:
Gavin Shan1b69be52014-06-10 11:41:57 +1000816 switch (arg) {
817 case VFIO_SPAPR_TCE_IOMMU:
Alexey Kardashevskiy2157e7b2015-06-05 16:35:25 +1000818 case VFIO_SPAPR_TCE_v2_IOMMU:
Gavin Shan1b69be52014-06-10 11:41:57 +1000819 ret = 1;
820 break;
821 default:
822 ret = vfio_spapr_iommu_eeh_ioctl(NULL, cmd, arg);
823 break;
824 }
825
826 return (ret < 0) ? 0 : ret;
Alexey Kardashevskiybc82d122016-11-30 17:52:04 +1100827 }
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +1000828
Alexey Kardashevskiybc82d122016-11-30 17:52:04 +1100829 /*
830 * Sanity check to prevent one userspace from manipulating
831 * another userspace mm.
832 */
833 BUG_ON(!container);
834 if (container->mm && container->mm != current->mm)
835 return -EPERM;
836
837 switch (cmd) {
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +1000838 case VFIO_IOMMU_SPAPR_TCE_GET_INFO: {
839 struct vfio_iommu_spapr_tce_info info;
Alexey Kardashevskiy2157e7b2015-06-05 16:35:25 +1000840 struct tce_iommu_group *tcegrp;
Alexey Kardashevskiy0eaf4de2015-06-05 16:35:09 +1000841 struct iommu_table_group *table_group;
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +1000842
Alexey Kardashevskiy2157e7b2015-06-05 16:35:25 +1000843 if (!tce_groups_attached(container))
Alexey Kardashevskiy0eaf4de2015-06-05 16:35:09 +1000844 return -ENXIO;
845
Alexey Kardashevskiy2157e7b2015-06-05 16:35:25 +1000846 tcegrp = list_first_entry(&container->group_list,
847 struct tce_iommu_group, next);
848 table_group = iommu_group_get_iommudata(tcegrp->grp);
Alexey Kardashevskiy0eaf4de2015-06-05 16:35:09 +1000849
Alexey Kardashevskiy4793d652015-06-05 16:35:20 +1000850 if (!table_group)
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +1000851 return -ENXIO;
852
853 minsz = offsetofend(struct vfio_iommu_spapr_tce_info,
854 dma32_window_size);
855
856 if (copy_from_user(&info, (void __user *)arg, minsz))
857 return -EFAULT;
858
859 if (info.argsz < minsz)
860 return -EINVAL;
861
Alexey Kardashevskiy4793d652015-06-05 16:35:20 +1000862 info.dma32_window_start = table_group->tce32_start;
863 info.dma32_window_size = table_group->tce32_size;
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +1000864 info.flags = 0;
Alexey Kardashevskiye633bc82015-06-05 16:35:26 +1000865 memset(&info.ddw, 0, sizeof(info.ddw));
866
867 if (table_group->max_dynamic_windows_supported &&
868 container->v2) {
869 info.flags |= VFIO_IOMMU_SPAPR_INFO_DDW;
870 info.ddw.pgsizes = table_group->pgsizes;
871 info.ddw.max_dynamic_windows_supported =
872 table_group->max_dynamic_windows_supported;
873 info.ddw.levels = table_group->max_levels;
874 }
875
876 ddwsz = offsetofend(struct vfio_iommu_spapr_tce_info, ddw);
877
878 if (info.argsz >= ddwsz)
879 minsz = ddwsz;
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +1000880
881 if (copy_to_user((void __user *)arg, &info, minsz))
882 return -EFAULT;
883
884 return 0;
885 }
886 case VFIO_IOMMU_MAP_DMA: {
887 struct vfio_iommu_type1_dma_map param;
Alexey Kardashevskiy0eaf4de2015-06-05 16:35:09 +1000888 struct iommu_table *tbl = NULL;
Alexey Kardashevskiy0eaf4de2015-06-05 16:35:09 +1000889 long num;
Alexey Kardashevskiy05c6cfb2015-06-05 16:35:15 +1000890 enum dma_data_direction direction;
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +1000891
Alexey Kardashevskiy3c56e822015-06-05 16:35:02 +1000892 if (!container->enabled)
893 return -EPERM;
894
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +1000895 minsz = offsetofend(struct vfio_iommu_type1_dma_map, size);
896
897 if (copy_from_user(&param, (void __user *)arg, minsz))
898 return -EFAULT;
899
900 if (param.argsz < minsz)
901 return -EINVAL;
902
903 if (param.flags & ~(VFIO_DMA_MAP_FLAG_READ |
904 VFIO_DMA_MAP_FLAG_WRITE))
905 return -EINVAL;
906
Alexey Kardashevskiyd9c72892016-11-30 17:52:03 +1100907 ret = tce_iommu_create_default_window(container);
908 if (ret)
909 return ret;
910
Alexey Kardashevskiy0eaf4de2015-06-05 16:35:09 +1000911 num = tce_iommu_find_table(container, param.iova, &tbl);
912 if (num < 0)
913 return -ENXIO;
914
Alexey Kardashevskiy00663d42015-06-05 16:35:00 +1000915 if ((param.size & ~IOMMU_PAGE_MASK(tbl)) ||
916 (param.vaddr & ~IOMMU_PAGE_MASK(tbl)))
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +1000917 return -EINVAL;
918
919 /* iova is checked by the IOMMU API */
Alexey Kardashevskiy05c6cfb2015-06-05 16:35:15 +1000920 if (param.flags & VFIO_DMA_MAP_FLAG_READ) {
921 if (param.flags & VFIO_DMA_MAP_FLAG_WRITE)
922 direction = DMA_BIDIRECTIONAL;
923 else
924 direction = DMA_TO_DEVICE;
925 } else {
926 if (param.flags & VFIO_DMA_MAP_FLAG_WRITE)
927 direction = DMA_FROM_DEVICE;
928 else
929 return -EINVAL;
930 }
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +1000931
Alexey Kardashevskiy05c6cfb2015-06-05 16:35:15 +1000932 ret = iommu_tce_put_param_check(tbl, param.iova, param.vaddr);
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +1000933 if (ret)
934 return ret;
935
Alexey Kardashevskiy2157e7b2015-06-05 16:35:25 +1000936 if (container->v2)
937 ret = tce_iommu_build_v2(container, tbl,
938 param.iova >> tbl->it_page_shift,
939 param.vaddr,
940 param.size >> tbl->it_page_shift,
941 direction);
942 else
943 ret = tce_iommu_build(container, tbl,
944 param.iova >> tbl->it_page_shift,
945 param.vaddr,
946 param.size >> tbl->it_page_shift,
947 direction);
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +1000948
949 iommu_flush_tce(tbl);
950
951 return ret;
952 }
953 case VFIO_IOMMU_UNMAP_DMA: {
954 struct vfio_iommu_type1_dma_unmap param;
Alexey Kardashevskiy0eaf4de2015-06-05 16:35:09 +1000955 struct iommu_table *tbl = NULL;
956 long num;
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +1000957
Alexey Kardashevskiy3c56e822015-06-05 16:35:02 +1000958 if (!container->enabled)
959 return -EPERM;
960
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +1000961 minsz = offsetofend(struct vfio_iommu_type1_dma_unmap,
962 size);
963
964 if (copy_from_user(&param, (void __user *)arg, minsz))
965 return -EFAULT;
966
967 if (param.argsz < minsz)
968 return -EINVAL;
969
970 /* No flag is supported now */
971 if (param.flags)
972 return -EINVAL;
973
Alexey Kardashevskiyd9c72892016-11-30 17:52:03 +1100974 ret = tce_iommu_create_default_window(container);
975 if (ret)
976 return ret;
977
Alexey Kardashevskiy0eaf4de2015-06-05 16:35:09 +1000978 num = tce_iommu_find_table(container, param.iova, &tbl);
979 if (num < 0)
980 return -ENXIO;
981
Alexey Kardashevskiy00663d42015-06-05 16:35:00 +1000982 if (param.size & ~IOMMU_PAGE_MASK(tbl))
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +1000983 return -EINVAL;
984
985 ret = iommu_tce_clear_param_check(tbl, param.iova, 0,
Alexey Kardashevskiy00663d42015-06-05 16:35:00 +1000986 param.size >> tbl->it_page_shift);
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +1000987 if (ret)
988 return ret;
989
Alexey Kardashevskiy9b14a1f2015-06-05 16:34:58 +1000990 ret = tce_iommu_clear(container, tbl,
Alexey Kardashevskiy00663d42015-06-05 16:35:00 +1000991 param.iova >> tbl->it_page_shift,
992 param.size >> tbl->it_page_shift);
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +1000993 iommu_flush_tce(tbl);
994
995 return ret;
996 }
Alexey Kardashevskiy2157e7b2015-06-05 16:35:25 +1000997 case VFIO_IOMMU_SPAPR_REGISTER_MEMORY: {
998 struct vfio_iommu_spapr_register_memory param;
999
1000 if (!container->v2)
1001 break;
1002
1003 minsz = offsetofend(struct vfio_iommu_spapr_register_memory,
1004 size);
1005
Alexey Kardashevskiybc82d122016-11-30 17:52:04 +11001006 ret = tce_iommu_mm_set(container);
1007 if (ret)
1008 return ret;
1009
Alexey Kardashevskiy2157e7b2015-06-05 16:35:25 +10001010 if (copy_from_user(&param, (void __user *)arg, minsz))
1011 return -EFAULT;
1012
1013 if (param.argsz < minsz)
1014 return -EINVAL;
1015
1016 /* No flag is supported now */
1017 if (param.flags)
1018 return -EINVAL;
1019
1020 mutex_lock(&container->lock);
1021 ret = tce_iommu_register_pages(container, param.vaddr,
1022 param.size);
1023 mutex_unlock(&container->lock);
1024
1025 return ret;
1026 }
1027 case VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY: {
1028 struct vfio_iommu_spapr_register_memory param;
1029
1030 if (!container->v2)
1031 break;
1032
Alexey Kardashevskiybc82d122016-11-30 17:52:04 +11001033 if (!container->mm)
1034 return -EPERM;
1035
Alexey Kardashevskiy2157e7b2015-06-05 16:35:25 +10001036 minsz = offsetofend(struct vfio_iommu_spapr_register_memory,
1037 size);
1038
1039 if (copy_from_user(&param, (void __user *)arg, minsz))
1040 return -EFAULT;
1041
1042 if (param.argsz < minsz)
1043 return -EINVAL;
1044
1045 /* No flag is supported now */
1046 if (param.flags)
1047 return -EINVAL;
1048
1049 mutex_lock(&container->lock);
1050 ret = tce_iommu_unregister_pages(container, param.vaddr,
1051 param.size);
1052 mutex_unlock(&container->lock);
1053
1054 return ret;
1055 }
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +10001056 case VFIO_IOMMU_ENABLE:
Alexey Kardashevskiy2157e7b2015-06-05 16:35:25 +10001057 if (container->v2)
1058 break;
1059
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +10001060 mutex_lock(&container->lock);
1061 ret = tce_iommu_enable(container);
1062 mutex_unlock(&container->lock);
1063 return ret;
1064
1065
1066 case VFIO_IOMMU_DISABLE:
Alexey Kardashevskiy2157e7b2015-06-05 16:35:25 +10001067 if (container->v2)
1068 break;
1069
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +10001070 mutex_lock(&container->lock);
1071 tce_iommu_disable(container);
1072 mutex_unlock(&container->lock);
1073 return 0;
Gavin Shan1b69be52014-06-10 11:41:57 +10001074
Alexey Kardashevskiy2157e7b2015-06-05 16:35:25 +10001075 case VFIO_EEH_PE_OP: {
1076 struct tce_iommu_group *tcegrp;
1077
1078 ret = 0;
1079 list_for_each_entry(tcegrp, &container->group_list, next) {
1080 ret = vfio_spapr_iommu_eeh_ioctl(tcegrp->grp,
1081 cmd, arg);
1082 if (ret)
1083 return ret;
1084 }
1085 return ret;
1086 }
1087
Alexey Kardashevskiye633bc82015-06-05 16:35:26 +10001088 case VFIO_IOMMU_SPAPR_TCE_CREATE: {
1089 struct vfio_iommu_spapr_tce_create create;
1090
1091 if (!container->v2)
1092 break;
1093
Alexey Kardashevskiybc82d122016-11-30 17:52:04 +11001094 ret = tce_iommu_mm_set(container);
1095 if (ret)
1096 return ret;
1097
Alexey Kardashevskiye633bc82015-06-05 16:35:26 +10001098 if (!tce_groups_attached(container))
1099 return -ENXIO;
1100
1101 minsz = offsetofend(struct vfio_iommu_spapr_tce_create,
1102 start_addr);
1103
1104 if (copy_from_user(&create, (void __user *)arg, minsz))
1105 return -EFAULT;
1106
1107 if (create.argsz < minsz)
1108 return -EINVAL;
1109
1110 if (create.flags)
1111 return -EINVAL;
1112
1113 mutex_lock(&container->lock);
1114
Alexey Kardashevskiyd9c72892016-11-30 17:52:03 +11001115 ret = tce_iommu_create_default_window(container);
Alexey Kardashevskiy2da64d22017-02-01 14:26:16 +11001116 if (!ret)
1117 ret = tce_iommu_create_window(container,
1118 create.page_shift,
1119 create.window_size, create.levels,
1120 &create.start_addr);
Alexey Kardashevskiye633bc82015-06-05 16:35:26 +10001121
1122 mutex_unlock(&container->lock);
1123
1124 if (!ret && copy_to_user((void __user *)arg, &create, minsz))
1125 ret = -EFAULT;
1126
1127 return ret;
1128 }
1129 case VFIO_IOMMU_SPAPR_TCE_REMOVE: {
1130 struct vfio_iommu_spapr_tce_remove remove;
1131
1132 if (!container->v2)
1133 break;
1134
Alexey Kardashevskiybc82d122016-11-30 17:52:04 +11001135 ret = tce_iommu_mm_set(container);
1136 if (ret)
1137 return ret;
1138
Alexey Kardashevskiye633bc82015-06-05 16:35:26 +10001139 if (!tce_groups_attached(container))
1140 return -ENXIO;
1141
1142 minsz = offsetofend(struct vfio_iommu_spapr_tce_remove,
1143 start_addr);
1144
1145 if (copy_from_user(&remove, (void __user *)arg, minsz))
1146 return -EFAULT;
1147
1148 if (remove.argsz < minsz)
1149 return -EINVAL;
1150
1151 if (remove.flags)
1152 return -EINVAL;
1153
Alexey Kardashevskiyd9c72892016-11-30 17:52:03 +11001154 if (container->def_window_pending && !remove.start_addr) {
1155 container->def_window_pending = false;
1156 return 0;
1157 }
1158
Alexey Kardashevskiye633bc82015-06-05 16:35:26 +10001159 mutex_lock(&container->lock);
1160
1161 ret = tce_iommu_remove_window(container, remove.start_addr);
1162
1163 mutex_unlock(&container->lock);
1164
1165 return ret;
1166 }
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +10001167 }
1168
1169 return -ENOTTY;
1170}
1171
Alexey Kardashevskiyf87a8862015-06-05 16:35:10 +10001172static void tce_iommu_release_ownership(struct tce_container *container,
1173 struct iommu_table_group *table_group)
1174{
1175 int i;
1176
1177 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
Alexey Kardashevskiy2157e7b2015-06-05 16:35:25 +10001178 struct iommu_table *tbl = container->tables[i];
Alexey Kardashevskiyf87a8862015-06-05 16:35:10 +10001179
1180 if (!tbl)
1181 continue;
1182
1183 tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size);
1184 if (tbl->it_map)
1185 iommu_release_ownership(tbl);
Alexey Kardashevskiy2157e7b2015-06-05 16:35:25 +10001186
1187 container->tables[i] = NULL;
Alexey Kardashevskiyf87a8862015-06-05 16:35:10 +10001188 }
1189}
1190
1191static int tce_iommu_take_ownership(struct tce_container *container,
1192 struct iommu_table_group *table_group)
1193{
1194 int i, j, rc = 0;
1195
1196 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
1197 struct iommu_table *tbl = table_group->tables[i];
1198
1199 if (!tbl || !tbl->it_map)
1200 continue;
1201
Alexey Kardashevskiy39701e52016-11-30 17:52:01 +11001202 rc = iommu_take_ownership(tbl);
Alexey Kardashevskiyf87a8862015-06-05 16:35:10 +10001203 if (rc) {
1204 for (j = 0; j < i; ++j)
1205 iommu_release_ownership(
1206 table_group->tables[j]);
1207
1208 return rc;
1209 }
1210 }
1211
Alexey Kardashevskiy2157e7b2015-06-05 16:35:25 +10001212 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i)
1213 container->tables[i] = table_group->tables[i];
1214
Alexey Kardashevskiyf87a8862015-06-05 16:35:10 +10001215 return 0;
1216}
1217
1218static void tce_iommu_release_ownership_ddw(struct tce_container *container,
1219 struct iommu_table_group *table_group)
1220{
Alexey Kardashevskiy46d3e1e2015-06-05 16:35:23 +10001221 long i;
1222
1223 if (!table_group->ops->unset_window) {
1224 WARN_ON_ONCE(1);
1225 return;
1226 }
1227
Alexey Kardashevskiy2157e7b2015-06-05 16:35:25 +10001228 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i)
Alexey Kardashevskiy46d3e1e2015-06-05 16:35:23 +10001229 table_group->ops->unset_window(table_group, i);
Alexey Kardashevskiy46d3e1e2015-06-05 16:35:23 +10001230
Alexey Kardashevskiyf87a8862015-06-05 16:35:10 +10001231 table_group->ops->release_ownership(table_group);
1232}
1233
1234static long tce_iommu_take_ownership_ddw(struct tce_container *container,
1235 struct iommu_table_group *table_group)
1236{
Alexey Kardashevskiy930a42d2017-02-07 17:26:57 +11001237 long i, ret = 0;
1238
Alexey Kardashevskiy46d3e1e2015-06-05 16:35:23 +10001239 if (!table_group->ops->create_table || !table_group->ops->set_window ||
1240 !table_group->ops->release_ownership) {
1241 WARN_ON_ONCE(1);
1242 return -EFAULT;
1243 }
1244
Alexey Kardashevskiyf87a8862015-06-05 16:35:10 +10001245 table_group->ops->take_ownership(table_group);
1246
Alexey Kardashevskiy930a42d2017-02-07 17:26:57 +11001247 /* Set all windows to the new group */
1248 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
1249 struct iommu_table *tbl = container->tables[i];
1250
1251 if (!tbl)
1252 continue;
1253
1254 ret = table_group->ops->set_window(table_group, i, tbl);
1255 if (ret)
1256 goto release_exit;
1257 }
1258
Alexey Kardashevskiy2157e7b2015-06-05 16:35:25 +10001259 return 0;
Alexey Kardashevskiy930a42d2017-02-07 17:26:57 +11001260
1261release_exit:
1262 for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i)
1263 table_group->ops->unset_window(table_group, i);
1264
1265 table_group->ops->release_ownership(table_group);
1266
1267 return ret;
Alexey Kardashevskiyf87a8862015-06-05 16:35:10 +10001268}
1269
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +10001270static int tce_iommu_attach_group(void *iommu_data,
1271 struct iommu_group *iommu_group)
1272{
1273 int ret;
1274 struct tce_container *container = iommu_data;
Alexey Kardashevskiy0eaf4de2015-06-05 16:35:09 +10001275 struct iommu_table_group *table_group;
Alexey Kardashevskiy2157e7b2015-06-05 16:35:25 +10001276 struct tce_iommu_group *tcegrp = NULL;
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +10001277
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +10001278 mutex_lock(&container->lock);
1279
1280 /* pr_debug("tce_vfio: Attaching group #%u to iommu %p\n",
1281 iommu_group_id(iommu_group), iommu_group); */
Alexey Kardashevskiy0eaf4de2015-06-05 16:35:09 +10001282 table_group = iommu_group_get_iommudata(iommu_group);
Greg Kurzbd00fdf2017-01-24 17:50:26 +01001283 if (!table_group) {
1284 ret = -ENODEV;
1285 goto unlock_exit;
1286 }
Alexey Kardashevskiy2157e7b2015-06-05 16:35:25 +10001287
1288 if (tce_groups_attached(container) && (!table_group->ops ||
1289 !table_group->ops->take_ownership ||
1290 !table_group->ops->release_ownership)) {
1291 ret = -EBUSY;
1292 goto unlock_exit;
1293 }
1294
1295 /* Check if new group has the same iommu_ops (i.e. compatible) */
1296 list_for_each_entry(tcegrp, &container->group_list, next) {
1297 struct iommu_table_group *table_group_tmp;
1298
1299 if (tcegrp->grp == iommu_group) {
1300 pr_warn("tce_vfio: Group %d is already attached\n",
1301 iommu_group_id(iommu_group));
1302 ret = -EBUSY;
1303 goto unlock_exit;
1304 }
1305 table_group_tmp = iommu_group_get_iommudata(tcegrp->grp);
Alexey Kardashevskiy54de2852016-04-29 18:55:15 +10001306 if (table_group_tmp->ops->create_table !=
1307 table_group->ops->create_table) {
Alexey Kardashevskiy2157e7b2015-06-05 16:35:25 +10001308 pr_warn("tce_vfio: Group %d is incompatible with group %d\n",
1309 iommu_group_id(iommu_group),
1310 iommu_group_id(tcegrp->grp));
1311 ret = -EPERM;
1312 goto unlock_exit;
1313 }
1314 }
1315
1316 tcegrp = kzalloc(sizeof(*tcegrp), GFP_KERNEL);
1317 if (!tcegrp) {
1318 ret = -ENOMEM;
Alexey Kardashevskiy0eaf4de2015-06-05 16:35:09 +10001319 goto unlock_exit;
1320 }
1321
Alexey Kardashevskiyf87a8862015-06-05 16:35:10 +10001322 if (!table_group->ops || !table_group->ops->take_ownership ||
Alexey Kardashevskiy6f01cc62016-11-30 17:52:02 +11001323 !table_group->ops->release_ownership) {
Alexey Kardashevskiy1282ba72017-03-24 17:44:06 +11001324 if (container->v2) {
1325 ret = -EPERM;
1326 goto unlock_exit;
1327 }
Alexey Kardashevskiyf87a8862015-06-05 16:35:10 +10001328 ret = tce_iommu_take_ownership(container, table_group);
Alexey Kardashevskiy6f01cc62016-11-30 17:52:02 +11001329 } else {
Alexey Kardashevskiy1282ba72017-03-24 17:44:06 +11001330 if (!container->v2) {
1331 ret = -EPERM;
1332 goto unlock_exit;
1333 }
Alexey Kardashevskiyf87a8862015-06-05 16:35:10 +10001334 ret = tce_iommu_take_ownership_ddw(container, table_group);
Alexey Kardashevskiy6f01cc62016-11-30 17:52:02 +11001335 if (!tce_groups_attached(container) && !container->tables[0])
Alexey Kardashevskiyd9c72892016-11-30 17:52:03 +11001336 container->def_window_pending = true;
Alexey Kardashevskiy6f01cc62016-11-30 17:52:02 +11001337 }
Alexey Kardashevskiyf87a8862015-06-05 16:35:10 +10001338
Alexey Kardashevskiy2157e7b2015-06-05 16:35:25 +10001339 if (!ret) {
1340 tcegrp->grp = iommu_group;
1341 list_add(&tcegrp->next, &container->group_list);
1342 }
Alexey Kardashevskiy22af4852015-06-05 16:35:04 +10001343
1344unlock_exit:
Alexey Kardashevskiy2157e7b2015-06-05 16:35:25 +10001345 if (ret && tcegrp)
1346 kfree(tcegrp);
1347
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +10001348 mutex_unlock(&container->lock);
1349
1350 return ret;
1351}
1352
1353static void tce_iommu_detach_group(void *iommu_data,
1354 struct iommu_group *iommu_group)
1355{
1356 struct tce_container *container = iommu_data;
Alexey Kardashevskiy0eaf4de2015-06-05 16:35:09 +10001357 struct iommu_table_group *table_group;
Alexey Kardashevskiy2157e7b2015-06-05 16:35:25 +10001358 bool found = false;
1359 struct tce_iommu_group *tcegrp;
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +10001360
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +10001361 mutex_lock(&container->lock);
Alexey Kardashevskiy2157e7b2015-06-05 16:35:25 +10001362
1363 list_for_each_entry(tcegrp, &container->group_list, next) {
1364 if (tcegrp->grp == iommu_group) {
1365 found = true;
1366 break;
1367 }
1368 }
1369
1370 if (!found) {
1371 pr_warn("tce_vfio: detaching unattached group #%u\n",
1372 iommu_group_id(iommu_group));
Alexey Kardashevskiy22af4852015-06-05 16:35:04 +10001373 goto unlock_exit;
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +10001374 }
Alexey Kardashevskiy22af4852015-06-05 16:35:04 +10001375
Alexey Kardashevskiy2157e7b2015-06-05 16:35:25 +10001376 list_del(&tcegrp->next);
1377 kfree(tcegrp);
Alexey Kardashevskiy0eaf4de2015-06-05 16:35:09 +10001378
1379 table_group = iommu_group_get_iommudata(iommu_group);
1380 BUG_ON(!table_group);
1381
Alexey Kardashevskiyf87a8862015-06-05 16:35:10 +10001382 if (!table_group->ops || !table_group->ops->release_ownership)
1383 tce_iommu_release_ownership(container, table_group);
1384 else
1385 tce_iommu_release_ownership_ddw(container, table_group);
Alexey Kardashevskiy22af4852015-06-05 16:35:04 +10001386
1387unlock_exit:
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +10001388 mutex_unlock(&container->lock);
1389}
1390
1391const struct vfio_iommu_driver_ops tce_iommu_driver_ops = {
1392 .name = "iommu-vfio-powerpc",
1393 .owner = THIS_MODULE,
1394 .open = tce_iommu_open,
1395 .release = tce_iommu_release,
1396 .ioctl = tce_iommu_ioctl,
1397 .attach_group = tce_iommu_attach_group,
1398 .detach_group = tce_iommu_detach_group,
1399};
1400
1401static int __init tce_iommu_init(void)
1402{
1403 return vfio_register_iommu_driver(&tce_iommu_driver_ops);
1404}
1405
1406static void __exit tce_iommu_cleanup(void)
1407{
1408 vfio_unregister_iommu_driver(&tce_iommu_driver_ops);
1409}
1410
1411module_init(tce_iommu_init);
1412module_exit(tce_iommu_cleanup);
1413
1414MODULE_VERSION(DRIVER_VERSION);
1415MODULE_LICENSE("GPL v2");
1416MODULE_AUTHOR(DRIVER_AUTHOR);
1417MODULE_DESCRIPTION(DRIVER_DESC);
1418