blob: b95fa2b64680c0a7374ae18c65e8b686c4ecc75f [file] [log] [blame]
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +10001/*
2 * VFIO: IOMMU DMA mapping support for TCE on POWER
3 *
4 * Copyright (C) 2013 IBM Corp. All rights reserved.
5 * Author: Alexey Kardashevskiy <aik@ozlabs.ru>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 *
11 * Derived from original vfio_iommu_type1.c:
12 * Copyright (C) 2012 Red Hat, Inc. All rights reserved.
13 * Author: Alex Williamson <alex.williamson@redhat.com>
14 */
15
16#include <linux/module.h>
17#include <linux/pci.h>
18#include <linux/slab.h>
19#include <linux/uaccess.h>
20#include <linux/err.h>
21#include <linux/vfio.h>
22#include <asm/iommu.h>
23#include <asm/tce.h>
24
25#define DRIVER_VERSION "0.1"
26#define DRIVER_AUTHOR "aik@ozlabs.ru"
27#define DRIVER_DESC "VFIO IOMMU SPAPR TCE"
28
29static void tce_iommu_detach_group(void *iommu_data,
30 struct iommu_group *iommu_group);
31
32/*
33 * VFIO IOMMU fd for SPAPR_TCE IOMMU implementation
34 *
35 * This code handles mapping and unmapping of user data buffers
36 * into DMA'ble space using the IOMMU
37 */
38
39/*
40 * The container descriptor supports only a single group per container.
41 * Required by the API as the container is not supplied with the IOMMU group
42 * at the moment of initialization.
43 */
44struct tce_container {
45 struct mutex lock;
46 struct iommu_table *tbl;
47 bool enabled;
48};
49
50static int tce_iommu_enable(struct tce_container *container)
51{
52 int ret = 0;
53 unsigned long locked, lock_limit, npages;
54 struct iommu_table *tbl = container->tbl;
55
56 if (!container->tbl)
57 return -ENXIO;
58
59 if (!current->mm)
60 return -ESRCH; /* process exited */
61
62 if (container->enabled)
63 return -EBUSY;
64
65 /*
66 * When userspace pages are mapped into the IOMMU, they are effectively
67 * locked memory, so, theoretically, we need to update the accounting
68 * of locked pages on each map and unmap. For powerpc, the map unmap
69 * paths can be very hot, though, and the accounting would kill
70 * performance, especially since it would be difficult to impossible
71 * to handle the accounting in real mode only.
72 *
73 * To address that, rather than precisely accounting every page, we
74 * instead account for a worst case on locked memory when the iommu is
75 * enabled and disabled. The worst case upper bound on locked memory
76 * is the size of the whole iommu window, which is usually relatively
77 * small (compared to total memory sizes) on POWER hardware.
78 *
79 * Also we don't have a nice way to fail on H_PUT_TCE due to ulimits,
80 * that would effectively kill the guest at random points, much better
81 * enforcing the limit based on the max that the guest can map.
82 */
83 down_write(&current->mm->mmap_sem);
Alistair Popplee589a4402013-12-09 18:17:01 +110084 npages = (tbl->it_size << IOMMU_PAGE_SHIFT_4K) >> PAGE_SHIFT;
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +100085 locked = current->mm->locked_vm + npages;
86 lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
87 if (locked > lock_limit && !capable(CAP_IPC_LOCK)) {
88 pr_warn("RLIMIT_MEMLOCK (%ld) exceeded\n",
89 rlimit(RLIMIT_MEMLOCK));
90 ret = -ENOMEM;
91 } else {
92
93 current->mm->locked_vm += npages;
94 container->enabled = true;
95 }
96 up_write(&current->mm->mmap_sem);
97
98 return ret;
99}
100
101static void tce_iommu_disable(struct tce_container *container)
102{
103 if (!container->enabled)
104 return;
105
106 container->enabled = false;
107
108 if (!container->tbl || !current->mm)
109 return;
110
111 down_write(&current->mm->mmap_sem);
112 current->mm->locked_vm -= (container->tbl->it_size <<
Alistair Popplee589a4402013-12-09 18:17:01 +1100113 IOMMU_PAGE_SHIFT_4K) >> PAGE_SHIFT;
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +1000114 up_write(&current->mm->mmap_sem);
115}
116
117static void *tce_iommu_open(unsigned long arg)
118{
119 struct tce_container *container;
120
121 if (arg != VFIO_SPAPR_TCE_IOMMU) {
122 pr_err("tce_vfio: Wrong IOMMU type\n");
123 return ERR_PTR(-EINVAL);
124 }
125
126 container = kzalloc(sizeof(*container), GFP_KERNEL);
127 if (!container)
128 return ERR_PTR(-ENOMEM);
129
130 mutex_init(&container->lock);
131
132 return container;
133}
134
135static void tce_iommu_release(void *iommu_data)
136{
137 struct tce_container *container = iommu_data;
138
139 WARN_ON(container->tbl && !container->tbl->it_group);
140 tce_iommu_disable(container);
141
142 if (container->tbl && container->tbl->it_group)
143 tce_iommu_detach_group(iommu_data, container->tbl->it_group);
144
145 mutex_destroy(&container->lock);
146
147 kfree(container);
148}
149
Alexey Kardashevskiy9b14a1f2015-06-05 16:34:58 +1000150static int tce_iommu_clear(struct tce_container *container,
151 struct iommu_table *tbl,
152 unsigned long entry, unsigned long pages)
153{
154 unsigned long oldtce;
155 struct page *page;
156
157 for ( ; pages; --pages, ++entry) {
158 oldtce = iommu_clear_tce(tbl, entry);
159 if (!oldtce)
160 continue;
161
162 page = pfn_to_page(oldtce >> PAGE_SHIFT);
163 WARN_ON(!page);
164 if (page) {
165 if (oldtce & TCE_PCI_WRITE)
166 SetPageDirty(page);
167 put_page(page);
168 }
169 }
170
171 return 0;
172}
173
174static long tce_iommu_build(struct tce_container *container,
175 struct iommu_table *tbl,
176 unsigned long entry, unsigned long tce, unsigned long pages)
177{
178 long i, ret = 0;
179 struct page *page = NULL;
180 unsigned long hva;
181 enum dma_data_direction direction = iommu_tce_direction(tce);
182
183 for (i = 0; i < pages; ++i) {
184 unsigned long offset = tce & IOMMU_PAGE_MASK(tbl) & ~PAGE_MASK;
185
186 ret = get_user_pages_fast(tce & PAGE_MASK, 1,
187 direction != DMA_TO_DEVICE, &page);
188 if (unlikely(ret != 1)) {
189 ret = -EFAULT;
190 break;
191 }
192 hva = (unsigned long) page_address(page) + offset;
193
194 ret = iommu_tce_build(tbl, entry + i, hva, direction);
195 if (ret) {
196 put_page(page);
197 pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%ld\n",
198 __func__, entry << tbl->it_page_shift,
199 tce, ret);
200 break;
201 }
202 tce += IOMMU_PAGE_SIZE_4K;
203 }
204
205 if (ret)
206 tce_iommu_clear(container, tbl, entry, i);
207
208 return ret;
209}
210
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +1000211static long tce_iommu_ioctl(void *iommu_data,
212 unsigned int cmd, unsigned long arg)
213{
214 struct tce_container *container = iommu_data;
215 unsigned long minsz;
216 long ret;
217
218 switch (cmd) {
219 case VFIO_CHECK_EXTENSION:
Gavin Shan1b69be52014-06-10 11:41:57 +1000220 switch (arg) {
221 case VFIO_SPAPR_TCE_IOMMU:
222 ret = 1;
223 break;
224 default:
225 ret = vfio_spapr_iommu_eeh_ioctl(NULL, cmd, arg);
226 break;
227 }
228
229 return (ret < 0) ? 0 : ret;
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +1000230
231 case VFIO_IOMMU_SPAPR_TCE_GET_INFO: {
232 struct vfio_iommu_spapr_tce_info info;
233 struct iommu_table *tbl = container->tbl;
234
235 if (WARN_ON(!tbl))
236 return -ENXIO;
237
238 minsz = offsetofend(struct vfio_iommu_spapr_tce_info,
239 dma32_window_size);
240
241 if (copy_from_user(&info, (void __user *)arg, minsz))
242 return -EFAULT;
243
244 if (info.argsz < minsz)
245 return -EINVAL;
246
Alistair Popplee589a4402013-12-09 18:17:01 +1100247 info.dma32_window_start = tbl->it_offset << IOMMU_PAGE_SHIFT_4K;
248 info.dma32_window_size = tbl->it_size << IOMMU_PAGE_SHIFT_4K;
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +1000249 info.flags = 0;
250
251 if (copy_to_user((void __user *)arg, &info, minsz))
252 return -EFAULT;
253
254 return 0;
255 }
256 case VFIO_IOMMU_MAP_DMA: {
257 struct vfio_iommu_type1_dma_map param;
258 struct iommu_table *tbl = container->tbl;
Alexey Kardashevskiy9b14a1f2015-06-05 16:34:58 +1000259 unsigned long tce;
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +1000260
261 if (!tbl)
262 return -ENXIO;
263
264 BUG_ON(!tbl->it_group);
265
266 minsz = offsetofend(struct vfio_iommu_type1_dma_map, size);
267
268 if (copy_from_user(&param, (void __user *)arg, minsz))
269 return -EFAULT;
270
271 if (param.argsz < minsz)
272 return -EINVAL;
273
274 if (param.flags & ~(VFIO_DMA_MAP_FLAG_READ |
275 VFIO_DMA_MAP_FLAG_WRITE))
276 return -EINVAL;
277
Alistair Popplee589a4402013-12-09 18:17:01 +1100278 if ((param.size & ~IOMMU_PAGE_MASK_4K) ||
279 (param.vaddr & ~IOMMU_PAGE_MASK_4K))
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +1000280 return -EINVAL;
281
282 /* iova is checked by the IOMMU API */
283 tce = param.vaddr;
284 if (param.flags & VFIO_DMA_MAP_FLAG_READ)
285 tce |= TCE_PCI_READ;
286 if (param.flags & VFIO_DMA_MAP_FLAG_WRITE)
287 tce |= TCE_PCI_WRITE;
288
289 ret = iommu_tce_put_param_check(tbl, param.iova, tce);
290 if (ret)
291 return ret;
292
Alexey Kardashevskiy9b14a1f2015-06-05 16:34:58 +1000293 ret = tce_iommu_build(container, tbl,
294 param.iova >> IOMMU_PAGE_SHIFT_4K,
295 tce, param.size >> IOMMU_PAGE_SHIFT_4K);
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +1000296
297 iommu_flush_tce(tbl);
298
299 return ret;
300 }
301 case VFIO_IOMMU_UNMAP_DMA: {
302 struct vfio_iommu_type1_dma_unmap param;
303 struct iommu_table *tbl = container->tbl;
304
305 if (WARN_ON(!tbl))
306 return -ENXIO;
307
308 minsz = offsetofend(struct vfio_iommu_type1_dma_unmap,
309 size);
310
311 if (copy_from_user(&param, (void __user *)arg, minsz))
312 return -EFAULT;
313
314 if (param.argsz < minsz)
315 return -EINVAL;
316
317 /* No flag is supported now */
318 if (param.flags)
319 return -EINVAL;
320
Alistair Popplee589a4402013-12-09 18:17:01 +1100321 if (param.size & ~IOMMU_PAGE_MASK_4K)
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +1000322 return -EINVAL;
323
324 ret = iommu_tce_clear_param_check(tbl, param.iova, 0,
Alistair Popplee589a4402013-12-09 18:17:01 +1100325 param.size >> IOMMU_PAGE_SHIFT_4K);
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +1000326 if (ret)
327 return ret;
328
Alexey Kardashevskiy9b14a1f2015-06-05 16:34:58 +1000329 ret = tce_iommu_clear(container, tbl,
Alistair Popplee589a4402013-12-09 18:17:01 +1100330 param.iova >> IOMMU_PAGE_SHIFT_4K,
331 param.size >> IOMMU_PAGE_SHIFT_4K);
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +1000332 iommu_flush_tce(tbl);
333
334 return ret;
335 }
336 case VFIO_IOMMU_ENABLE:
337 mutex_lock(&container->lock);
338 ret = tce_iommu_enable(container);
339 mutex_unlock(&container->lock);
340 return ret;
341
342
343 case VFIO_IOMMU_DISABLE:
344 mutex_lock(&container->lock);
345 tce_iommu_disable(container);
346 mutex_unlock(&container->lock);
347 return 0;
Gavin Shan1b69be52014-06-10 11:41:57 +1000348 case VFIO_EEH_PE_OP:
349 if (!container->tbl || !container->tbl->it_group)
350 return -ENODEV;
351
352 return vfio_spapr_iommu_eeh_ioctl(container->tbl->it_group,
353 cmd, arg);
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +1000354 }
355
356 return -ENOTTY;
357}
358
359static int tce_iommu_attach_group(void *iommu_data,
360 struct iommu_group *iommu_group)
361{
362 int ret;
363 struct tce_container *container = iommu_data;
364 struct iommu_table *tbl = iommu_group_get_iommudata(iommu_group);
365
366 BUG_ON(!tbl);
367 mutex_lock(&container->lock);
368
369 /* pr_debug("tce_vfio: Attaching group #%u to iommu %p\n",
370 iommu_group_id(iommu_group), iommu_group); */
371 if (container->tbl) {
372 pr_warn("tce_vfio: Only one group per IOMMU container is allowed, existing id=%d, attaching id=%d\n",
373 iommu_group_id(container->tbl->it_group),
374 iommu_group_id(iommu_group));
375 ret = -EBUSY;
376 } else if (container->enabled) {
377 pr_err("tce_vfio: attaching group #%u to enabled container\n",
378 iommu_group_id(iommu_group));
379 ret = -EBUSY;
380 } else {
381 ret = iommu_take_ownership(tbl);
382 if (!ret)
383 container->tbl = tbl;
384 }
385
386 mutex_unlock(&container->lock);
387
388 return ret;
389}
390
391static void tce_iommu_detach_group(void *iommu_data,
392 struct iommu_group *iommu_group)
393{
394 struct tce_container *container = iommu_data;
395 struct iommu_table *tbl = iommu_group_get_iommudata(iommu_group);
396
397 BUG_ON(!tbl);
398 mutex_lock(&container->lock);
399 if (tbl != container->tbl) {
400 pr_warn("tce_vfio: detaching group #%u, expected group is #%u\n",
401 iommu_group_id(iommu_group),
402 iommu_group_id(tbl->it_group));
403 } else {
404 if (container->enabled) {
405 pr_warn("tce_vfio: detaching group #%u from enabled container, forcing disable\n",
406 iommu_group_id(tbl->it_group));
407 tce_iommu_disable(container);
408 }
409
410 /* pr_debug("tce_vfio: detaching group #%u from iommu %p\n",
411 iommu_group_id(iommu_group), iommu_group); */
412 container->tbl = NULL;
Alexey Kardashevskiy9b14a1f2015-06-05 16:34:58 +1000413 tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size);
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +1000414 iommu_release_ownership(tbl);
415 }
416 mutex_unlock(&container->lock);
417}
418
419const struct vfio_iommu_driver_ops tce_iommu_driver_ops = {
420 .name = "iommu-vfio-powerpc",
421 .owner = THIS_MODULE,
422 .open = tce_iommu_open,
423 .release = tce_iommu_release,
424 .ioctl = tce_iommu_ioctl,
425 .attach_group = tce_iommu_attach_group,
426 .detach_group = tce_iommu_detach_group,
427};
428
429static int __init tce_iommu_init(void)
430{
431 return vfio_register_iommu_driver(&tce_iommu_driver_ops);
432}
433
434static void __exit tce_iommu_cleanup(void)
435{
436 vfio_unregister_iommu_driver(&tce_iommu_driver_ops);
437}
438
439module_init(tce_iommu_init);
440module_exit(tce_iommu_cleanup);
441
442MODULE_VERSION(DRIVER_VERSION);
443MODULE_LICENSE("GPL v2");
444MODULE_AUTHOR(DRIVER_AUTHOR);
445MODULE_DESCRIPTION(DRIVER_DESC);
446