blob: bd36c16b0ef241fe851a4fd56246749482287ace [file] [log] [blame]
Alex Williamsoncba33452012-07-31 08:16:22 -06001/*
2 * VFIO core
3 *
4 * Copyright (C) 2012 Red Hat, Inc. All rights reserved.
5 * Author: Alex Williamson <alex.williamson@redhat.com>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 *
11 * Derived from original vfio:
12 * Copyright 2010 Cisco Systems, Inc. All rights reserved.
13 * Author: Tom Lyon, pugs@cisco.com
14 */
15
16#include <linux/cdev.h>
17#include <linux/compat.h>
18#include <linux/device.h>
19#include <linux/file.h>
20#include <linux/anon_inodes.h>
21#include <linux/fs.h>
22#include <linux/idr.h>
23#include <linux/iommu.h>
24#include <linux/list.h>
Alex Williamsond1099902013-12-19 10:17:13 -070025#include <linux/miscdevice.h>
Alex Williamsoncba33452012-07-31 08:16:22 -060026#include <linux/module.h>
27#include <linux/mutex.h>
Alex Williamson5f096b12015-10-27 14:53:04 -060028#include <linux/pci.h>
Alex Williamson9587f442013-04-25 16:12:38 -060029#include <linux/rwsem.h>
Alex Williamsoncba33452012-07-31 08:16:22 -060030#include <linux/sched.h>
31#include <linux/slab.h>
Alex Williamson664e9382013-04-30 15:42:28 -060032#include <linux/stat.h>
Alex Williamsoncba33452012-07-31 08:16:22 -060033#include <linux/string.h>
34#include <linux/uaccess.h>
35#include <linux/vfio.h>
36#include <linux/wait.h>
37
38#define DRIVER_VERSION "0.3"
39#define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>"
40#define DRIVER_DESC "VFIO - User Level meta-driver"
41
42static struct vfio {
43 struct class *class;
44 struct list_head iommu_drivers_list;
45 struct mutex iommu_drivers_lock;
46 struct list_head group_list;
47 struct idr group_idr;
48 struct mutex group_lock;
49 struct cdev group_cdev;
Alex Williamsond1099902013-12-19 10:17:13 -070050 dev_t group_devt;
Alex Williamsoncba33452012-07-31 08:16:22 -060051 wait_queue_head_t release_q;
52} vfio;
53
54struct vfio_iommu_driver {
55 const struct vfio_iommu_driver_ops *ops;
56 struct list_head vfio_next;
57};
58
59struct vfio_container {
60 struct kref kref;
61 struct list_head group_list;
Alex Williamson9587f442013-04-25 16:12:38 -060062 struct rw_semaphore group_lock;
Alex Williamsoncba33452012-07-31 08:16:22 -060063 struct vfio_iommu_driver *iommu_driver;
64 void *iommu_data;
Alex Williamson03a76b62015-12-21 15:13:33 -070065 bool noiommu;
Alex Williamsoncba33452012-07-31 08:16:22 -060066};
67
Alex Williamson60720a02015-02-06 15:05:06 -070068struct vfio_unbound_dev {
69 struct device *dev;
70 struct list_head unbound_next;
71};
72
Alex Williamsoncba33452012-07-31 08:16:22 -060073struct vfio_group {
74 struct kref kref;
75 int minor;
76 atomic_t container_users;
77 struct iommu_group *iommu_group;
78 struct vfio_container *container;
79 struct list_head device_list;
80 struct mutex device_lock;
81 struct device *dev;
82 struct notifier_block nb;
83 struct list_head vfio_next;
84 struct list_head container_next;
Alex Williamson60720a02015-02-06 15:05:06 -070085 struct list_head unbound_list;
86 struct mutex unbound_lock;
Alex Williamson6d6768c2013-06-25 16:06:54 -060087 atomic_t opened;
Alex Williamson03a76b62015-12-21 15:13:33 -070088 bool noiommu;
Alex Williamsoncba33452012-07-31 08:16:22 -060089};
90
91struct vfio_device {
92 struct kref kref;
93 struct device *dev;
94 const struct vfio_device_ops *ops;
95 struct vfio_group *group;
96 struct list_head group_next;
97 void *device_data;
98};
99
Alex Williamson03a76b62015-12-21 15:13:33 -0700100#ifdef CONFIG_VFIO_NOIOMMU
101static bool noiommu __read_mostly;
102module_param_named(enable_unsafe_noiommu_mode,
103 noiommu, bool, S_IRUGO | S_IWUSR);
104MODULE_PARM_DESC(enable_unsafe_noiommu_mode, "Enable UNSAFE, no-IOMMU mode. This mode provides no device isolation, no DMA translation, no host kernel protection, cannot be used for device assignment to virtual machines, requires RAWIO permissions, and will taint the kernel. If you do not know what this is for, step away. (default: false)");
105#endif
106
107/*
108 * vfio_iommu_group_{get,put} are only intended for VFIO bus driver probe
109 * and remove functions, any use cases other than acquiring the first
110 * reference for the purpose of calling vfio_add_group_dev() or removing
111 * that symmetric reference after vfio_del_group_dev() should use the raw
112 * iommu_group_{get,put} functions. In particular, vfio_iommu_group_put()
113 * removes the device from the dummy group and cannot be nested.
114 */
115struct iommu_group *vfio_iommu_group_get(struct device *dev)
116{
117 struct iommu_group *group;
118 int __maybe_unused ret;
119
120 group = iommu_group_get(dev);
121
122#ifdef CONFIG_VFIO_NOIOMMU
123 /*
124 * With noiommu enabled, an IOMMU group will be created for a device
125 * that doesn't already have one and doesn't have an iommu_ops on their
Alex Williamson16ab8a52016-01-27 11:22:25 -0700126 * bus. We set iommudata simply to be able to identify these groups
127 * as special use and for reclamation later.
Alex Williamson03a76b62015-12-21 15:13:33 -0700128 */
129 if (group || !noiommu || iommu_present(dev->bus))
130 return group;
131
132 group = iommu_group_alloc();
133 if (IS_ERR(group))
134 return NULL;
135
136 iommu_group_set_name(group, "vfio-noiommu");
Alex Williamson16ab8a52016-01-27 11:22:25 -0700137 iommu_group_set_iommudata(group, &noiommu, NULL);
Alex Williamson03a76b62015-12-21 15:13:33 -0700138 ret = iommu_group_add_device(group, dev);
139 iommu_group_put(group);
140 if (ret)
141 return NULL;
142
143 /*
144 * Where to taint? At this point we've added an IOMMU group for a
145 * device that is not backed by iommu_ops, therefore any iommu_
146 * callback using iommu_ops can legitimately Oops. So, while we may
147 * be about to give a DMA capable device to a user without IOMMU
148 * protection, which is clearly taint-worthy, let's go ahead and do
149 * it here.
150 */
151 add_taint(TAINT_USER, LOCKDEP_STILL_OK);
152 dev_warn(dev, "Adding kernel taint for vfio-noiommu group on device\n");
153#endif
154
155 return group;
156}
157EXPORT_SYMBOL_GPL(vfio_iommu_group_get);
158
159void vfio_iommu_group_put(struct iommu_group *group, struct device *dev)
160{
161#ifdef CONFIG_VFIO_NOIOMMU
Alex Williamson16ab8a52016-01-27 11:22:25 -0700162 if (iommu_group_get_iommudata(group) == &noiommu)
Alex Williamson03a76b62015-12-21 15:13:33 -0700163 iommu_group_remove_device(dev);
164#endif
165
166 iommu_group_put(group);
167}
168EXPORT_SYMBOL_GPL(vfio_iommu_group_put);
169
170#ifdef CONFIG_VFIO_NOIOMMU
171static void *vfio_noiommu_open(unsigned long arg)
172{
173 if (arg != VFIO_NOIOMMU_IOMMU)
174 return ERR_PTR(-EINVAL);
175 if (!capable(CAP_SYS_RAWIO))
176 return ERR_PTR(-EPERM);
177
178 return NULL;
179}
180
181static void vfio_noiommu_release(void *iommu_data)
182{
183}
184
185static long vfio_noiommu_ioctl(void *iommu_data,
186 unsigned int cmd, unsigned long arg)
187{
188 if (cmd == VFIO_CHECK_EXTENSION)
189 return noiommu && (arg == VFIO_NOIOMMU_IOMMU) ? 1 : 0;
190
191 return -ENOTTY;
192}
193
Alex Williamson03a76b62015-12-21 15:13:33 -0700194static int vfio_noiommu_attach_group(void *iommu_data,
195 struct iommu_group *iommu_group)
196{
Alex Williamson16ab8a52016-01-27 11:22:25 -0700197 return iommu_group_get_iommudata(iommu_group) == &noiommu ? 0 : -EINVAL;
Alex Williamson03a76b62015-12-21 15:13:33 -0700198}
199
200static void vfio_noiommu_detach_group(void *iommu_data,
201 struct iommu_group *iommu_group)
202{
203}
204
205static const struct vfio_iommu_driver_ops vfio_noiommu_ops = {
206 .name = "vfio-noiommu",
207 .owner = THIS_MODULE,
208 .open = vfio_noiommu_open,
209 .release = vfio_noiommu_release,
210 .ioctl = vfio_noiommu_ioctl,
211 .attach_group = vfio_noiommu_attach_group,
212 .detach_group = vfio_noiommu_detach_group,
213};
214#endif
215
216
Alex Williamsoncba33452012-07-31 08:16:22 -0600217/**
218 * IOMMU driver registration
219 */
220int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops)
221{
222 struct vfio_iommu_driver *driver, *tmp;
223
224 driver = kzalloc(sizeof(*driver), GFP_KERNEL);
225 if (!driver)
226 return -ENOMEM;
227
228 driver->ops = ops;
229
230 mutex_lock(&vfio.iommu_drivers_lock);
231
232 /* Check for duplicates */
233 list_for_each_entry(tmp, &vfio.iommu_drivers_list, vfio_next) {
234 if (tmp->ops == ops) {
235 mutex_unlock(&vfio.iommu_drivers_lock);
236 kfree(driver);
237 return -EINVAL;
238 }
239 }
240
241 list_add(&driver->vfio_next, &vfio.iommu_drivers_list);
242
243 mutex_unlock(&vfio.iommu_drivers_lock);
244
245 return 0;
246}
247EXPORT_SYMBOL_GPL(vfio_register_iommu_driver);
248
249void vfio_unregister_iommu_driver(const struct vfio_iommu_driver_ops *ops)
250{
251 struct vfio_iommu_driver *driver;
252
253 mutex_lock(&vfio.iommu_drivers_lock);
254 list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) {
255 if (driver->ops == ops) {
256 list_del(&driver->vfio_next);
257 mutex_unlock(&vfio.iommu_drivers_lock);
258 kfree(driver);
259 return;
260 }
261 }
262 mutex_unlock(&vfio.iommu_drivers_lock);
263}
264EXPORT_SYMBOL_GPL(vfio_unregister_iommu_driver);
265
266/**
267 * Group minor allocation/free - both called with vfio.group_lock held
268 */
269static int vfio_alloc_group_minor(struct vfio_group *group)
270{
Alex Williamsond1099902013-12-19 10:17:13 -0700271 return idr_alloc(&vfio.group_idr, group, 0, MINORMASK + 1, GFP_KERNEL);
Alex Williamsoncba33452012-07-31 08:16:22 -0600272}
273
274static void vfio_free_group_minor(int minor)
275{
276 idr_remove(&vfio.group_idr, minor);
277}
278
279static int vfio_iommu_group_notifier(struct notifier_block *nb,
280 unsigned long action, void *data);
281static void vfio_group_get(struct vfio_group *group);
282
283/**
284 * Container objects - containers are created when /dev/vfio/vfio is
285 * opened, but their lifecycle extends until the last user is done, so
286 * it's freed via kref. Must support container/group/device being
287 * closed in any order.
288 */
289static void vfio_container_get(struct vfio_container *container)
290{
291 kref_get(&container->kref);
292}
293
294static void vfio_container_release(struct kref *kref)
295{
296 struct vfio_container *container;
297 container = container_of(kref, struct vfio_container, kref);
298
299 kfree(container);
300}
301
302static void vfio_container_put(struct vfio_container *container)
303{
304 kref_put(&container->kref, vfio_container_release);
305}
306
Jiang Liu9df7b252012-12-07 13:43:50 -0700307static void vfio_group_unlock_and_free(struct vfio_group *group)
308{
309 mutex_unlock(&vfio.group_lock);
310 /*
311 * Unregister outside of lock. A spurious callback is harmless now
312 * that the group is no longer in vfio.group_list.
313 */
314 iommu_group_unregister_notifier(group->iommu_group, &group->nb);
315 kfree(group);
316}
317
Alex Williamsoncba33452012-07-31 08:16:22 -0600318/**
319 * Group objects - create, release, get, put, search
320 */
Alex Williamson16ab8a52016-01-27 11:22:25 -0700321static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group)
Alex Williamsoncba33452012-07-31 08:16:22 -0600322{
323 struct vfio_group *group, *tmp;
324 struct device *dev;
325 int ret, minor;
326
327 group = kzalloc(sizeof(*group), GFP_KERNEL);
328 if (!group)
329 return ERR_PTR(-ENOMEM);
330
331 kref_init(&group->kref);
332 INIT_LIST_HEAD(&group->device_list);
333 mutex_init(&group->device_lock);
Alex Williamson60720a02015-02-06 15:05:06 -0700334 INIT_LIST_HEAD(&group->unbound_list);
335 mutex_init(&group->unbound_lock);
Alex Williamsoncba33452012-07-31 08:16:22 -0600336 atomic_set(&group->container_users, 0);
Alex Williamson6d6768c2013-06-25 16:06:54 -0600337 atomic_set(&group->opened, 0);
Alex Williamsoncba33452012-07-31 08:16:22 -0600338 group->iommu_group = iommu_group;
Alex Williamson16ab8a52016-01-27 11:22:25 -0700339#ifdef CONFIG_VFIO_NOIOMMU
340 group->noiommu = (iommu_group_get_iommudata(iommu_group) == &noiommu);
341#endif
Alex Williamsoncba33452012-07-31 08:16:22 -0600342
343 group->nb.notifier_call = vfio_iommu_group_notifier;
344
345 /*
346 * blocking notifiers acquire a rwsem around registering and hold
347 * it around callback. Therefore, need to register outside of
348 * vfio.group_lock to avoid A-B/B-A contention. Our callback won't
349 * do anything unless it can find the group in vfio.group_list, so
350 * no harm in registering early.
351 */
352 ret = iommu_group_register_notifier(iommu_group, &group->nb);
353 if (ret) {
354 kfree(group);
355 return ERR_PTR(ret);
356 }
357
358 mutex_lock(&vfio.group_lock);
359
Alex Williamsoncba33452012-07-31 08:16:22 -0600360 /* Did we race creating this group? */
361 list_for_each_entry(tmp, &vfio.group_list, vfio_next) {
362 if (tmp->iommu_group == iommu_group) {
363 vfio_group_get(tmp);
Jiang Liu9df7b252012-12-07 13:43:50 -0700364 vfio_group_unlock_and_free(group);
Alex Williamsoncba33452012-07-31 08:16:22 -0600365 return tmp;
366 }
367 }
368
Zhen Lei2f51bf42015-03-16 14:08:56 -0600369 minor = vfio_alloc_group_minor(group);
370 if (minor < 0) {
371 vfio_group_unlock_and_free(group);
372 return ERR_PTR(minor);
373 }
374
Alex Williamsond1099902013-12-19 10:17:13 -0700375 dev = device_create(vfio.class, NULL,
376 MKDEV(MAJOR(vfio.group_devt), minor),
Alex Williamson03a76b62015-12-21 15:13:33 -0700377 group, "%s%d", group->noiommu ? "noiommu-" : "",
378 iommu_group_id(iommu_group));
Alex Williamsoncba33452012-07-31 08:16:22 -0600379 if (IS_ERR(dev)) {
380 vfio_free_group_minor(minor);
Jiang Liu9df7b252012-12-07 13:43:50 -0700381 vfio_group_unlock_and_free(group);
Alex Williamsoncba33452012-07-31 08:16:22 -0600382 return (struct vfio_group *)dev; /* ERR_PTR */
383 }
384
385 group->minor = minor;
386 group->dev = dev;
387
388 list_add(&group->vfio_next, &vfio.group_list);
389
390 mutex_unlock(&vfio.group_lock);
391
392 return group;
393}
394
Al Viro6d2cd3c2012-08-17 21:27:32 -0400395/* called with vfio.group_lock held */
Alex Williamsoncba33452012-07-31 08:16:22 -0600396static void vfio_group_release(struct kref *kref)
397{
398 struct vfio_group *group = container_of(kref, struct vfio_group, kref);
Alex Williamson60720a02015-02-06 15:05:06 -0700399 struct vfio_unbound_dev *unbound, *tmp;
Alex Williamson4a688102015-02-06 15:05:06 -0700400 struct iommu_group *iommu_group = group->iommu_group;
Alex Williamsoncba33452012-07-31 08:16:22 -0600401
402 WARN_ON(!list_empty(&group->device_list));
403
Alex Williamson60720a02015-02-06 15:05:06 -0700404 list_for_each_entry_safe(unbound, tmp,
405 &group->unbound_list, unbound_next) {
406 list_del(&unbound->unbound_next);
407 kfree(unbound);
408 }
409
Alex Williamsond1099902013-12-19 10:17:13 -0700410 device_destroy(vfio.class, MKDEV(MAJOR(vfio.group_devt), group->minor));
Alex Williamsoncba33452012-07-31 08:16:22 -0600411 list_del(&group->vfio_next);
412 vfio_free_group_minor(group->minor);
Jiang Liu9df7b252012-12-07 13:43:50 -0700413 vfio_group_unlock_and_free(group);
Alex Williamson4a688102015-02-06 15:05:06 -0700414 iommu_group_put(iommu_group);
Alex Williamsoncba33452012-07-31 08:16:22 -0600415}
416
417static void vfio_group_put(struct vfio_group *group)
418{
Al Viro6d2cd3c2012-08-17 21:27:32 -0400419 kref_put_mutex(&group->kref, vfio_group_release, &vfio.group_lock);
Alex Williamsoncba33452012-07-31 08:16:22 -0600420}
421
422/* Assume group_lock or group reference is held */
423static void vfio_group_get(struct vfio_group *group)
424{
425 kref_get(&group->kref);
426}
427
428/*
429 * Not really a try as we will sleep for mutex, but we need to make
430 * sure the group pointer is valid under lock and get a reference.
431 */
432static struct vfio_group *vfio_group_try_get(struct vfio_group *group)
433{
434 struct vfio_group *target = group;
435
436 mutex_lock(&vfio.group_lock);
437 list_for_each_entry(group, &vfio.group_list, vfio_next) {
438 if (group == target) {
439 vfio_group_get(group);
440 mutex_unlock(&vfio.group_lock);
441 return group;
442 }
443 }
444 mutex_unlock(&vfio.group_lock);
445
446 return NULL;
447}
448
449static
450struct vfio_group *vfio_group_get_from_iommu(struct iommu_group *iommu_group)
451{
452 struct vfio_group *group;
453
454 mutex_lock(&vfio.group_lock);
455 list_for_each_entry(group, &vfio.group_list, vfio_next) {
456 if (group->iommu_group == iommu_group) {
457 vfio_group_get(group);
458 mutex_unlock(&vfio.group_lock);
459 return group;
460 }
461 }
462 mutex_unlock(&vfio.group_lock);
463
464 return NULL;
465}
466
467static struct vfio_group *vfio_group_get_from_minor(int minor)
468{
469 struct vfio_group *group;
470
471 mutex_lock(&vfio.group_lock);
472 group = idr_find(&vfio.group_idr, minor);
473 if (!group) {
474 mutex_unlock(&vfio.group_lock);
475 return NULL;
476 }
477 vfio_group_get(group);
478 mutex_unlock(&vfio.group_lock);
479
480 return group;
481}
482
Kirti Wankhede7ed3ea82016-11-17 02:16:15 +0530483static struct vfio_group *vfio_group_get_from_dev(struct device *dev)
484{
485 struct iommu_group *iommu_group;
486 struct vfio_group *group;
487
488 iommu_group = iommu_group_get(dev);
489 if (!iommu_group)
490 return NULL;
491
492 group = vfio_group_get_from_iommu(iommu_group);
493 iommu_group_put(iommu_group);
494
495 return group;
496}
497
Alex Williamsoncba33452012-07-31 08:16:22 -0600498/**
499 * Device objects - create, release, get, put, search
500 */
501static
502struct vfio_device *vfio_group_create_device(struct vfio_group *group,
503 struct device *dev,
504 const struct vfio_device_ops *ops,
505 void *device_data)
506{
507 struct vfio_device *device;
Alex Williamsoncba33452012-07-31 08:16:22 -0600508
509 device = kzalloc(sizeof(*device), GFP_KERNEL);
510 if (!device)
511 return ERR_PTR(-ENOMEM);
512
513 kref_init(&device->kref);
514 device->dev = dev;
515 device->group = group;
516 device->ops = ops;
517 device->device_data = device_data;
Jean Delvare8283b492014-04-14 12:55:38 +0200518 dev_set_drvdata(dev, device);
Alex Williamsoncba33452012-07-31 08:16:22 -0600519
520 /* No need to get group_lock, caller has group reference */
521 vfio_group_get(group);
522
523 mutex_lock(&group->device_lock);
524 list_add(&device->group_next, &group->device_list);
525 mutex_unlock(&group->device_lock);
526
527 return device;
528}
529
530static void vfio_device_release(struct kref *kref)
531{
532 struct vfio_device *device = container_of(kref,
533 struct vfio_device, kref);
534 struct vfio_group *group = device->group;
535
Alex Williamsoncba33452012-07-31 08:16:22 -0600536 list_del(&device->group_next);
537 mutex_unlock(&group->device_lock);
538
539 dev_set_drvdata(device->dev, NULL);
540
541 kfree(device);
542
543 /* vfio_del_group_dev may be waiting for this device */
544 wake_up(&vfio.release_q);
545}
546
547/* Device reference always implies a group reference */
Vijay Mohan Pandarathil44f50712013-03-11 09:28:44 -0600548void vfio_device_put(struct vfio_device *device)
Alex Williamsoncba33452012-07-31 08:16:22 -0600549{
Al Viro934ad4c2012-08-17 19:49:09 -0400550 struct vfio_group *group = device->group;
Al Viro90b12532012-08-17 21:29:06 -0400551 kref_put_mutex(&device->kref, vfio_device_release, &group->device_lock);
Al Viro934ad4c2012-08-17 19:49:09 -0400552 vfio_group_put(group);
Alex Williamsoncba33452012-07-31 08:16:22 -0600553}
Vijay Mohan Pandarathil44f50712013-03-11 09:28:44 -0600554EXPORT_SYMBOL_GPL(vfio_device_put);
Alex Williamsoncba33452012-07-31 08:16:22 -0600555
556static void vfio_device_get(struct vfio_device *device)
557{
558 vfio_group_get(device->group);
559 kref_get(&device->kref);
560}
561
562static struct vfio_device *vfio_group_get_device(struct vfio_group *group,
563 struct device *dev)
564{
565 struct vfio_device *device;
566
567 mutex_lock(&group->device_lock);
568 list_for_each_entry(device, &group->device_list, group_next) {
569 if (device->dev == dev) {
570 vfio_device_get(device);
571 mutex_unlock(&group->device_lock);
572 return device;
573 }
574 }
575 mutex_unlock(&group->device_lock);
576 return NULL;
577}
578
579/*
Alex Williamson5f096b12015-10-27 14:53:04 -0600580 * Some drivers, like pci-stub, are only used to prevent other drivers from
581 * claiming a device and are therefore perfectly legitimate for a user owned
582 * group. The pci-stub driver has no dependencies on DMA or the IOVA mapping
583 * of the device, but it does prevent the user from having direct access to
584 * the device, which is useful in some circumstances.
585 *
586 * We also assume that we can include PCI interconnect devices, ie. bridges.
587 * IOMMU grouping on PCI necessitates that if we lack isolation on a bridge
588 * then all of the downstream devices will be part of the same IOMMU group as
589 * the bridge. Thus, if placing the bridge into the user owned IOVA space
590 * breaks anything, it only does so for user owned devices downstream. Note
591 * that error notification via MSI can be affected for platforms that handle
592 * MSI within the same IOVA space as DMA.
Alex Williamsoncba33452012-07-31 08:16:22 -0600593 */
Alex Williamson5f096b12015-10-27 14:53:04 -0600594static const char * const vfio_driver_whitelist[] = { "pci-stub" };
Alex Williamsoncba33452012-07-31 08:16:22 -0600595
Alex Williamson5f096b12015-10-27 14:53:04 -0600596static bool vfio_dev_whitelisted(struct device *dev, struct device_driver *drv)
Alex Williamsoncba33452012-07-31 08:16:22 -0600597{
598 int i;
599
Alex Williamson5f096b12015-10-27 14:53:04 -0600600 if (dev_is_pci(dev)) {
601 struct pci_dev *pdev = to_pci_dev(dev);
602
603 if (pdev->hdr_type != PCI_HEADER_TYPE_NORMAL)
604 return true;
605 }
606
Alex Williamsoncba33452012-07-31 08:16:22 -0600607 for (i = 0; i < ARRAY_SIZE(vfio_driver_whitelist); i++) {
608 if (!strcmp(drv->name, vfio_driver_whitelist[i]))
609 return true;
610 }
611
612 return false;
613}
614
615/*
Alex Williamson60720a02015-02-06 15:05:06 -0700616 * A vfio group is viable for use by userspace if all devices are in
617 * one of the following states:
618 * - driver-less
619 * - bound to a vfio driver
620 * - bound to a whitelisted driver
Alex Williamson5f096b12015-10-27 14:53:04 -0600621 * - a PCI interconnect device
Alex Williamson60720a02015-02-06 15:05:06 -0700622 *
623 * We use two methods to determine whether a device is bound to a vfio
624 * driver. The first is to test whether the device exists in the vfio
625 * group. The second is to test if the device exists on the group
626 * unbound_list, indicating it's in the middle of transitioning from
627 * a vfio driver to driver-less.
Alex Williamsoncba33452012-07-31 08:16:22 -0600628 */
629static int vfio_dev_viable(struct device *dev, void *data)
630{
631 struct vfio_group *group = data;
632 struct vfio_device *device;
Jiang Liude2b3ee2012-12-07 13:43:50 -0700633 struct device_driver *drv = ACCESS_ONCE(dev->driver);
Alex Williamson60720a02015-02-06 15:05:06 -0700634 struct vfio_unbound_dev *unbound;
635 int ret = -EINVAL;
Alex Williamsoncba33452012-07-31 08:16:22 -0600636
Alex Williamson60720a02015-02-06 15:05:06 -0700637 mutex_lock(&group->unbound_lock);
638 list_for_each_entry(unbound, &group->unbound_list, unbound_next) {
639 if (dev == unbound->dev) {
640 ret = 0;
641 break;
642 }
643 }
644 mutex_unlock(&group->unbound_lock);
645
Alex Williamson5f096b12015-10-27 14:53:04 -0600646 if (!ret || !drv || vfio_dev_whitelisted(dev, drv))
Alex Williamsoncba33452012-07-31 08:16:22 -0600647 return 0;
648
649 device = vfio_group_get_device(group, dev);
650 if (device) {
651 vfio_device_put(device);
652 return 0;
653 }
654
Alex Williamson60720a02015-02-06 15:05:06 -0700655 return ret;
Alex Williamsoncba33452012-07-31 08:16:22 -0600656}
657
658/**
659 * Async device support
660 */
661static int vfio_group_nb_add_dev(struct vfio_group *group, struct device *dev)
662{
663 struct vfio_device *device;
664
665 /* Do we already know about it? We shouldn't */
666 device = vfio_group_get_device(group, dev);
667 if (WARN_ON_ONCE(device)) {
668 vfio_device_put(device);
669 return 0;
670 }
671
672 /* Nothing to do for idle groups */
673 if (!atomic_read(&group->container_users))
674 return 0;
675
676 /* TODO Prevent device auto probing */
Dan Carpenter049af102015-11-21 13:32:21 +0300677 WARN(1, "Device %s added to live group %d!\n", dev_name(dev),
Alex Williamsoncba33452012-07-31 08:16:22 -0600678 iommu_group_id(group->iommu_group));
679
680 return 0;
681}
682
Alex Williamsoncba33452012-07-31 08:16:22 -0600683static int vfio_group_nb_verify(struct vfio_group *group, struct device *dev)
684{
685 /* We don't care what happens when the group isn't in use */
686 if (!atomic_read(&group->container_users))
687 return 0;
688
689 return vfio_dev_viable(dev, group);
690}
691
692static int vfio_iommu_group_notifier(struct notifier_block *nb,
693 unsigned long action, void *data)
694{
695 struct vfio_group *group = container_of(nb, struct vfio_group, nb);
696 struct device *dev = data;
Alex Williamson60720a02015-02-06 15:05:06 -0700697 struct vfio_unbound_dev *unbound;
Alex Williamsoncba33452012-07-31 08:16:22 -0600698
699 /*
Alex Williamsonc6401932013-06-10 16:40:56 -0600700 * Need to go through a group_lock lookup to get a reference or we
701 * risk racing a group being removed. Ignore spurious notifies.
Alex Williamsoncba33452012-07-31 08:16:22 -0600702 */
703 group = vfio_group_try_get(group);
Alex Williamsonc6401932013-06-10 16:40:56 -0600704 if (!group)
Alex Williamsoncba33452012-07-31 08:16:22 -0600705 return NOTIFY_OK;
706
707 switch (action) {
708 case IOMMU_GROUP_NOTIFY_ADD_DEVICE:
709 vfio_group_nb_add_dev(group, dev);
710 break;
711 case IOMMU_GROUP_NOTIFY_DEL_DEVICE:
Alex Williamsonde9c7602013-06-10 16:40:56 -0600712 /*
713 * Nothing to do here. If the device is in use, then the
714 * vfio sub-driver should block the remove callback until
715 * it is unused. If the device is unused or attached to a
716 * stub driver, then it should be released and we don't
717 * care that it will be going away.
718 */
Alex Williamsoncba33452012-07-31 08:16:22 -0600719 break;
720 case IOMMU_GROUP_NOTIFY_BIND_DRIVER:
721 pr_debug("%s: Device %s, group %d binding to driver\n",
722 __func__, dev_name(dev),
723 iommu_group_id(group->iommu_group));
724 break;
725 case IOMMU_GROUP_NOTIFY_BOUND_DRIVER:
726 pr_debug("%s: Device %s, group %d bound to driver %s\n",
727 __func__, dev_name(dev),
728 iommu_group_id(group->iommu_group), dev->driver->name);
729 BUG_ON(vfio_group_nb_verify(group, dev));
730 break;
731 case IOMMU_GROUP_NOTIFY_UNBIND_DRIVER:
732 pr_debug("%s: Device %s, group %d unbinding from driver %s\n",
733 __func__, dev_name(dev),
734 iommu_group_id(group->iommu_group), dev->driver->name);
735 break;
736 case IOMMU_GROUP_NOTIFY_UNBOUND_DRIVER:
737 pr_debug("%s: Device %s, group %d unbound from driver\n",
738 __func__, dev_name(dev),
739 iommu_group_id(group->iommu_group));
740 /*
741 * XXX An unbound device in a live group is ok, but we'd
742 * really like to avoid the above BUG_ON by preventing other
743 * drivers from binding to it. Once that occurs, we have to
744 * stop the system to maintain isolation. At a minimum, we'd
745 * want a toggle to disable driver auto probe for this device.
746 */
Alex Williamson60720a02015-02-06 15:05:06 -0700747
748 mutex_lock(&group->unbound_lock);
749 list_for_each_entry(unbound,
750 &group->unbound_list, unbound_next) {
751 if (dev == unbound->dev) {
752 list_del(&unbound->unbound_next);
753 kfree(unbound);
754 break;
755 }
756 }
757 mutex_unlock(&group->unbound_lock);
Alex Williamsoncba33452012-07-31 08:16:22 -0600758 break;
759 }
760
761 vfio_group_put(group);
762 return NOTIFY_OK;
763}
764
765/**
766 * VFIO driver API
767 */
768int vfio_add_group_dev(struct device *dev,
769 const struct vfio_device_ops *ops, void *device_data)
770{
771 struct iommu_group *iommu_group;
772 struct vfio_group *group;
773 struct vfio_device *device;
774
775 iommu_group = iommu_group_get(dev);
776 if (!iommu_group)
777 return -EINVAL;
778
779 group = vfio_group_get_from_iommu(iommu_group);
780 if (!group) {
Alex Williamson16ab8a52016-01-27 11:22:25 -0700781 group = vfio_create_group(iommu_group);
Alex Williamsoncba33452012-07-31 08:16:22 -0600782 if (IS_ERR(group)) {
783 iommu_group_put(iommu_group);
784 return PTR_ERR(group);
785 }
Alex Williamson4a688102015-02-06 15:05:06 -0700786 } else {
787 /*
788 * A found vfio_group already holds a reference to the
789 * iommu_group. A created vfio_group keeps the reference.
790 */
791 iommu_group_put(iommu_group);
Alex Williamsoncba33452012-07-31 08:16:22 -0600792 }
793
794 device = vfio_group_get_device(group, dev);
795 if (device) {
796 WARN(1, "Device %s already exists on group %d\n",
797 dev_name(dev), iommu_group_id(iommu_group));
798 vfio_device_put(device);
799 vfio_group_put(group);
Alex Williamsoncba33452012-07-31 08:16:22 -0600800 return -EBUSY;
801 }
802
803 device = vfio_group_create_device(group, dev, ops, device_data);
804 if (IS_ERR(device)) {
805 vfio_group_put(group);
Alex Williamsoncba33452012-07-31 08:16:22 -0600806 return PTR_ERR(device);
807 }
808
809 /*
Alex Williamson4a688102015-02-06 15:05:06 -0700810 * Drop all but the vfio_device reference. The vfio_device holds
811 * a reference to the vfio_group, which holds a reference to the
812 * iommu_group.
Alex Williamsoncba33452012-07-31 08:16:22 -0600813 */
814 vfio_group_put(group);
815
816 return 0;
817}
818EXPORT_SYMBOL_GPL(vfio_add_group_dev);
819
Vijay Mohan Pandarathil44f50712013-03-11 09:28:44 -0600820/**
Alex Williamson20f30012015-06-09 10:08:57 -0600821 * Get a reference to the vfio_device for a device. Even if the
822 * caller thinks they own the device, they could be racing with a
823 * release call path, so we can't trust drvdata for the shortcut.
824 * Go the long way around, from the iommu_group to the vfio_group
825 * to the vfio_device.
Vijay Mohan Pandarathil44f50712013-03-11 09:28:44 -0600826 */
827struct vfio_device *vfio_device_get_from_dev(struct device *dev)
828{
Alex Williamson20f30012015-06-09 10:08:57 -0600829 struct vfio_group *group;
830 struct vfio_device *device;
Vijay Mohan Pandarathil44f50712013-03-11 09:28:44 -0600831
Kirti Wankhede7ed3ea82016-11-17 02:16:15 +0530832 group = vfio_group_get_from_dev(dev);
Alex Williamson20f30012015-06-09 10:08:57 -0600833 if (!group)
834 return NULL;
835
836 device = vfio_group_get_device(group, dev);
837 vfio_group_put(group);
Vijay Mohan Pandarathil44f50712013-03-11 09:28:44 -0600838
839 return device;
840}
841EXPORT_SYMBOL_GPL(vfio_device_get_from_dev);
842
Alex Williamson4bc94d52015-07-24 15:14:04 -0600843static struct vfio_device *vfio_device_get_from_name(struct vfio_group *group,
844 char *buf)
845{
Joerg Roedele324fc82015-11-04 13:53:26 +0100846 struct vfio_device *it, *device = NULL;
Alex Williamson4bc94d52015-07-24 15:14:04 -0600847
848 mutex_lock(&group->device_lock);
Joerg Roedele324fc82015-11-04 13:53:26 +0100849 list_for_each_entry(it, &group->device_list, group_next) {
850 if (!strcmp(dev_name(it->dev), buf)) {
851 device = it;
Alex Williamson4bc94d52015-07-24 15:14:04 -0600852 vfio_device_get(device);
853 break;
854 }
855 }
856 mutex_unlock(&group->device_lock);
857
858 return device;
859}
860
Vijay Mohan Pandarathil44f50712013-03-11 09:28:44 -0600861/*
862 * Caller must hold a reference to the vfio_device
863 */
864void *vfio_device_data(struct vfio_device *device)
865{
866 return device->device_data;
867}
868EXPORT_SYMBOL_GPL(vfio_device_data);
869
Alex Williamsone014e942013-02-14 14:02:13 -0700870/* Given a referenced group, check if it contains the device */
871static bool vfio_dev_present(struct vfio_group *group, struct device *dev)
Alex Williamsoncba33452012-07-31 08:16:22 -0600872{
Alex Williamsoncba33452012-07-31 08:16:22 -0600873 struct vfio_device *device;
874
Alex Williamsoncba33452012-07-31 08:16:22 -0600875 device = vfio_group_get_device(group, dev);
Alex Williamsone014e942013-02-14 14:02:13 -0700876 if (!device)
Alex Williamsoncba33452012-07-31 08:16:22 -0600877 return false;
Alex Williamsoncba33452012-07-31 08:16:22 -0600878
879 vfio_device_put(device);
Alex Williamsoncba33452012-07-31 08:16:22 -0600880 return true;
881}
882
883/*
884 * Decrement the device reference count and wait for the device to be
885 * removed. Open file descriptors for the device... */
886void *vfio_del_group_dev(struct device *dev)
887{
888 struct vfio_device *device = dev_get_drvdata(dev);
889 struct vfio_group *group = device->group;
Alex Williamsoncba33452012-07-31 08:16:22 -0600890 void *device_data = device->device_data;
Alex Williamson60720a02015-02-06 15:05:06 -0700891 struct vfio_unbound_dev *unbound;
Alex Williamson13060b62015-02-06 15:05:07 -0700892 unsigned int i = 0;
Alex Williamsondb7d4d72015-05-01 16:31:41 -0600893 long ret;
894 bool interrupted = false;
Alex Williamsoncba33452012-07-31 08:16:22 -0600895
Alex Williamsone014e942013-02-14 14:02:13 -0700896 /*
897 * The group exists so long as we have a device reference. Get
898 * a group reference and use it to scan for the device going away.
899 */
900 vfio_group_get(group);
901
Alex Williamson60720a02015-02-06 15:05:06 -0700902 /*
903 * When the device is removed from the group, the group suddenly
904 * becomes non-viable; the device has a driver (until the unbind
905 * completes), but it's not present in the group. This is bad news
906 * for any external users that need to re-acquire a group reference
907 * in order to match and release their existing reference. To
908 * solve this, we track such devices on the unbound_list to bridge
909 * the gap until they're fully unbound.
910 */
911 unbound = kzalloc(sizeof(*unbound), GFP_KERNEL);
912 if (unbound) {
913 unbound->dev = dev;
914 mutex_lock(&group->unbound_lock);
915 list_add(&unbound->unbound_next, &group->unbound_list);
916 mutex_unlock(&group->unbound_lock);
917 }
918 WARN_ON(!unbound);
919
Alex Williamsoncba33452012-07-31 08:16:22 -0600920 vfio_device_put(device);
921
Alex Williamson13060b62015-02-06 15:05:07 -0700922 /*
923 * If the device is still present in the group after the above
924 * 'put', then it is in use and we need to request it from the
925 * bus driver. The driver may in turn need to request the
926 * device from the user. We send the request on an arbitrary
927 * interval with counter to allow the driver to take escalating
928 * measures to release the device if it has the ability to do so.
929 */
930 do {
931 device = vfio_group_get_device(group, dev);
932 if (!device)
933 break;
934
935 if (device->ops->request)
936 device->ops->request(device_data, i++);
937
938 vfio_device_put(device);
939
Alex Williamsondb7d4d72015-05-01 16:31:41 -0600940 if (interrupted) {
941 ret = wait_event_timeout(vfio.release_q,
942 !vfio_dev_present(group, dev), HZ * 10);
943 } else {
944 ret = wait_event_interruptible_timeout(vfio.release_q,
945 !vfio_dev_present(group, dev), HZ * 10);
946 if (ret == -ERESTARTSYS) {
947 interrupted = true;
948 dev_warn(dev,
949 "Device is currently in use, task"
950 " \"%s\" (%d) "
951 "blocked until device is released",
952 current->comm, task_pid_nr(current));
953 }
954 }
955 } while (ret <= 0);
Alex Williamsone014e942013-02-14 14:02:13 -0700956
957 vfio_group_put(group);
Alex Williamsoncba33452012-07-31 08:16:22 -0600958
Alex Williamsoncba33452012-07-31 08:16:22 -0600959 return device_data;
960}
961EXPORT_SYMBOL_GPL(vfio_del_group_dev);
962
963/**
964 * VFIO base fd, /dev/vfio/vfio
965 */
966static long vfio_ioctl_check_extension(struct vfio_container *container,
967 unsigned long arg)
968{
Alex Williamson0b43c082013-04-29 08:41:36 -0600969 struct vfio_iommu_driver *driver;
Alex Williamsoncba33452012-07-31 08:16:22 -0600970 long ret = 0;
971
Alex Williamson0b43c082013-04-29 08:41:36 -0600972 down_read(&container->group_lock);
973
974 driver = container->iommu_driver;
975
Alex Williamsoncba33452012-07-31 08:16:22 -0600976 switch (arg) {
977 /* No base extensions yet */
978 default:
979 /*
980 * If no driver is set, poll all registered drivers for
981 * extensions and return the first positive result. If
982 * a driver is already set, further queries will be passed
983 * only to that driver.
984 */
985 if (!driver) {
986 mutex_lock(&vfio.iommu_drivers_lock);
Alex Williamsonae5515d2015-12-04 08:38:42 -0700987 list_for_each_entry(driver, &vfio.iommu_drivers_list,
988 vfio_next) {
Alex Williamson03a76b62015-12-21 15:13:33 -0700989
990#ifdef CONFIG_VFIO_NOIOMMU
991 if (!list_empty(&container->group_list) &&
992 (container->noiommu !=
993 (driver->ops == &vfio_noiommu_ops)))
994 continue;
995#endif
996
Alex Williamsoncba33452012-07-31 08:16:22 -0600997 if (!try_module_get(driver->ops->owner))
998 continue;
999
1000 ret = driver->ops->ioctl(NULL,
1001 VFIO_CHECK_EXTENSION,
1002 arg);
1003 module_put(driver->ops->owner);
1004 if (ret > 0)
1005 break;
1006 }
1007 mutex_unlock(&vfio.iommu_drivers_lock);
1008 } else
1009 ret = driver->ops->ioctl(container->iommu_data,
1010 VFIO_CHECK_EXTENSION, arg);
1011 }
1012
Alex Williamson0b43c082013-04-29 08:41:36 -06001013 up_read(&container->group_lock);
1014
Alex Williamsoncba33452012-07-31 08:16:22 -06001015 return ret;
1016}
1017
Alex Williamson9587f442013-04-25 16:12:38 -06001018/* hold write lock on container->group_lock */
Alex Williamsoncba33452012-07-31 08:16:22 -06001019static int __vfio_container_attach_groups(struct vfio_container *container,
1020 struct vfio_iommu_driver *driver,
1021 void *data)
1022{
1023 struct vfio_group *group;
1024 int ret = -ENODEV;
1025
1026 list_for_each_entry(group, &container->group_list, container_next) {
1027 ret = driver->ops->attach_group(data, group->iommu_group);
1028 if (ret)
1029 goto unwind;
1030 }
1031
1032 return ret;
1033
1034unwind:
1035 list_for_each_entry_continue_reverse(group, &container->group_list,
1036 container_next) {
1037 driver->ops->detach_group(data, group->iommu_group);
1038 }
1039
1040 return ret;
1041}
1042
1043static long vfio_ioctl_set_iommu(struct vfio_container *container,
1044 unsigned long arg)
1045{
1046 struct vfio_iommu_driver *driver;
1047 long ret = -ENODEV;
1048
Alex Williamson9587f442013-04-25 16:12:38 -06001049 down_write(&container->group_lock);
Alex Williamsoncba33452012-07-31 08:16:22 -06001050
1051 /*
1052 * The container is designed to be an unprivileged interface while
1053 * the group can be assigned to specific users. Therefore, only by
1054 * adding a group to a container does the user get the privilege of
1055 * enabling the iommu, which may allocate finite resources. There
1056 * is no unset_iommu, but by removing all the groups from a container,
1057 * the container is deprivileged and returns to an unset state.
1058 */
1059 if (list_empty(&container->group_list) || container->iommu_driver) {
Alex Williamson9587f442013-04-25 16:12:38 -06001060 up_write(&container->group_lock);
Alex Williamsoncba33452012-07-31 08:16:22 -06001061 return -EINVAL;
1062 }
1063
1064 mutex_lock(&vfio.iommu_drivers_lock);
Alex Williamsonae5515d2015-12-04 08:38:42 -07001065 list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) {
Alex Williamsoncba33452012-07-31 08:16:22 -06001066 void *data;
1067
Alex Williamson03a76b62015-12-21 15:13:33 -07001068#ifdef CONFIG_VFIO_NOIOMMU
1069 /*
1070 * Only noiommu containers can use vfio-noiommu and noiommu
1071 * containers can only use vfio-noiommu.
1072 */
1073 if (container->noiommu != (driver->ops == &vfio_noiommu_ops))
1074 continue;
1075#endif
1076
Alex Williamsoncba33452012-07-31 08:16:22 -06001077 if (!try_module_get(driver->ops->owner))
1078 continue;
1079
1080 /*
1081 * The arg magic for SET_IOMMU is the same as CHECK_EXTENSION,
1082 * so test which iommu driver reported support for this
1083 * extension and call open on them. We also pass them the
1084 * magic, allowing a single driver to support multiple
1085 * interfaces if they'd like.
1086 */
1087 if (driver->ops->ioctl(NULL, VFIO_CHECK_EXTENSION, arg) <= 0) {
1088 module_put(driver->ops->owner);
1089 continue;
1090 }
1091
Alex Williamsoncba33452012-07-31 08:16:22 -06001092 data = driver->ops->open(arg);
1093 if (IS_ERR(data)) {
1094 ret = PTR_ERR(data);
1095 module_put(driver->ops->owner);
Alex Williamson7c435b42016-02-22 16:02:30 -07001096 continue;
Alex Williamsoncba33452012-07-31 08:16:22 -06001097 }
1098
1099 ret = __vfio_container_attach_groups(container, driver, data);
Alex Williamson7c435b42016-02-22 16:02:30 -07001100 if (ret) {
Alex Williamsoncba33452012-07-31 08:16:22 -06001101 driver->ops->release(data);
1102 module_put(driver->ops->owner);
Alex Williamson7c435b42016-02-22 16:02:30 -07001103 continue;
Alex Williamsoncba33452012-07-31 08:16:22 -06001104 }
1105
Alex Williamson7c435b42016-02-22 16:02:30 -07001106 container->iommu_driver = driver;
1107 container->iommu_data = data;
1108 break;
Alex Williamsoncba33452012-07-31 08:16:22 -06001109 }
1110
1111 mutex_unlock(&vfio.iommu_drivers_lock);
Alex Williamson9587f442013-04-25 16:12:38 -06001112 up_write(&container->group_lock);
Alex Williamsoncba33452012-07-31 08:16:22 -06001113
1114 return ret;
1115}
1116
1117static long vfio_fops_unl_ioctl(struct file *filep,
1118 unsigned int cmd, unsigned long arg)
1119{
1120 struct vfio_container *container = filep->private_data;
1121 struct vfio_iommu_driver *driver;
1122 void *data;
1123 long ret = -EINVAL;
1124
1125 if (!container)
1126 return ret;
1127
Alex Williamsoncba33452012-07-31 08:16:22 -06001128 switch (cmd) {
1129 case VFIO_GET_API_VERSION:
1130 ret = VFIO_API_VERSION;
1131 break;
1132 case VFIO_CHECK_EXTENSION:
1133 ret = vfio_ioctl_check_extension(container, arg);
1134 break;
1135 case VFIO_SET_IOMMU:
1136 ret = vfio_ioctl_set_iommu(container, arg);
1137 break;
1138 default:
Alex Williamson0b43c082013-04-29 08:41:36 -06001139 down_read(&container->group_lock);
1140
1141 driver = container->iommu_driver;
1142 data = container->iommu_data;
1143
Alex Williamsoncba33452012-07-31 08:16:22 -06001144 if (driver) /* passthrough all unrecognized ioctls */
1145 ret = driver->ops->ioctl(data, cmd, arg);
Alex Williamson0b43c082013-04-29 08:41:36 -06001146
1147 up_read(&container->group_lock);
Alex Williamsoncba33452012-07-31 08:16:22 -06001148 }
1149
1150 return ret;
1151}
1152
1153#ifdef CONFIG_COMPAT
1154static long vfio_fops_compat_ioctl(struct file *filep,
1155 unsigned int cmd, unsigned long arg)
1156{
1157 arg = (unsigned long)compat_ptr(arg);
1158 return vfio_fops_unl_ioctl(filep, cmd, arg);
1159}
1160#endif /* CONFIG_COMPAT */
1161
1162static int vfio_fops_open(struct inode *inode, struct file *filep)
1163{
1164 struct vfio_container *container;
1165
1166 container = kzalloc(sizeof(*container), GFP_KERNEL);
1167 if (!container)
1168 return -ENOMEM;
1169
1170 INIT_LIST_HEAD(&container->group_list);
Alex Williamson9587f442013-04-25 16:12:38 -06001171 init_rwsem(&container->group_lock);
Alex Williamsoncba33452012-07-31 08:16:22 -06001172 kref_init(&container->kref);
1173
1174 filep->private_data = container;
1175
1176 return 0;
1177}
1178
1179static int vfio_fops_release(struct inode *inode, struct file *filep)
1180{
1181 struct vfio_container *container = filep->private_data;
1182
1183 filep->private_data = NULL;
1184
1185 vfio_container_put(container);
1186
1187 return 0;
1188}
1189
1190/*
1191 * Once an iommu driver is set, we optionally pass read/write/mmap
1192 * on to the driver, allowing management interfaces beyond ioctl.
1193 */
1194static ssize_t vfio_fops_read(struct file *filep, char __user *buf,
1195 size_t count, loff_t *ppos)
1196{
1197 struct vfio_container *container = filep->private_data;
Alex Williamson0b43c082013-04-29 08:41:36 -06001198 struct vfio_iommu_driver *driver;
1199 ssize_t ret = -EINVAL;
Alex Williamsoncba33452012-07-31 08:16:22 -06001200
Alex Williamson0b43c082013-04-29 08:41:36 -06001201 down_read(&container->group_lock);
Alex Williamsoncba33452012-07-31 08:16:22 -06001202
Alex Williamson0b43c082013-04-29 08:41:36 -06001203 driver = container->iommu_driver;
1204 if (likely(driver && driver->ops->read))
1205 ret = driver->ops->read(container->iommu_data,
1206 buf, count, ppos);
1207
1208 up_read(&container->group_lock);
1209
1210 return ret;
Alex Williamsoncba33452012-07-31 08:16:22 -06001211}
1212
1213static ssize_t vfio_fops_write(struct file *filep, const char __user *buf,
1214 size_t count, loff_t *ppos)
1215{
1216 struct vfio_container *container = filep->private_data;
Alex Williamson0b43c082013-04-29 08:41:36 -06001217 struct vfio_iommu_driver *driver;
1218 ssize_t ret = -EINVAL;
Alex Williamsoncba33452012-07-31 08:16:22 -06001219
Alex Williamson0b43c082013-04-29 08:41:36 -06001220 down_read(&container->group_lock);
Alex Williamsoncba33452012-07-31 08:16:22 -06001221
Alex Williamson0b43c082013-04-29 08:41:36 -06001222 driver = container->iommu_driver;
1223 if (likely(driver && driver->ops->write))
1224 ret = driver->ops->write(container->iommu_data,
1225 buf, count, ppos);
1226
1227 up_read(&container->group_lock);
1228
1229 return ret;
Alex Williamsoncba33452012-07-31 08:16:22 -06001230}
1231
1232static int vfio_fops_mmap(struct file *filep, struct vm_area_struct *vma)
1233{
1234 struct vfio_container *container = filep->private_data;
Alex Williamson0b43c082013-04-29 08:41:36 -06001235 struct vfio_iommu_driver *driver;
1236 int ret = -EINVAL;
Alex Williamsoncba33452012-07-31 08:16:22 -06001237
Alex Williamson0b43c082013-04-29 08:41:36 -06001238 down_read(&container->group_lock);
Alex Williamsoncba33452012-07-31 08:16:22 -06001239
Alex Williamson0b43c082013-04-29 08:41:36 -06001240 driver = container->iommu_driver;
1241 if (likely(driver && driver->ops->mmap))
1242 ret = driver->ops->mmap(container->iommu_data, vma);
1243
1244 up_read(&container->group_lock);
1245
1246 return ret;
Alex Williamsoncba33452012-07-31 08:16:22 -06001247}
1248
1249static const struct file_operations vfio_fops = {
1250 .owner = THIS_MODULE,
1251 .open = vfio_fops_open,
1252 .release = vfio_fops_release,
1253 .read = vfio_fops_read,
1254 .write = vfio_fops_write,
1255 .unlocked_ioctl = vfio_fops_unl_ioctl,
1256#ifdef CONFIG_COMPAT
1257 .compat_ioctl = vfio_fops_compat_ioctl,
1258#endif
1259 .mmap = vfio_fops_mmap,
1260};
1261
1262/**
1263 * VFIO Group fd, /dev/vfio/$GROUP
1264 */
1265static void __vfio_group_unset_container(struct vfio_group *group)
1266{
1267 struct vfio_container *container = group->container;
1268 struct vfio_iommu_driver *driver;
1269
Alex Williamson9587f442013-04-25 16:12:38 -06001270 down_write(&container->group_lock);
Alex Williamsoncba33452012-07-31 08:16:22 -06001271
1272 driver = container->iommu_driver;
1273 if (driver)
1274 driver->ops->detach_group(container->iommu_data,
1275 group->iommu_group);
1276
1277 group->container = NULL;
1278 list_del(&group->container_next);
1279
1280 /* Detaching the last group deprivileges a container, remove iommu */
1281 if (driver && list_empty(&container->group_list)) {
1282 driver->ops->release(container->iommu_data);
1283 module_put(driver->ops->owner);
1284 container->iommu_driver = NULL;
1285 container->iommu_data = NULL;
1286 }
1287
Alex Williamson9587f442013-04-25 16:12:38 -06001288 up_write(&container->group_lock);
Alex Williamsoncba33452012-07-31 08:16:22 -06001289
1290 vfio_container_put(container);
1291}
1292
1293/*
1294 * VFIO_GROUP_UNSET_CONTAINER should fail if there are other users or
1295 * if there was no container to unset. Since the ioctl is called on
1296 * the group, we know that still exists, therefore the only valid
1297 * transition here is 1->0.
1298 */
1299static int vfio_group_unset_container(struct vfio_group *group)
1300{
1301 int users = atomic_cmpxchg(&group->container_users, 1, 0);
1302
1303 if (!users)
1304 return -EINVAL;
1305 if (users != 1)
1306 return -EBUSY;
1307
1308 __vfio_group_unset_container(group);
1309
1310 return 0;
1311}
1312
1313/*
1314 * When removing container users, anything that removes the last user
1315 * implicitly removes the group from the container. That is, if the
1316 * group file descriptor is closed, as well as any device file descriptors,
1317 * the group is free.
1318 */
1319static void vfio_group_try_dissolve_container(struct vfio_group *group)
1320{
1321 if (0 == atomic_dec_if_positive(&group->container_users))
1322 __vfio_group_unset_container(group);
1323}
1324
1325static int vfio_group_set_container(struct vfio_group *group, int container_fd)
1326{
Al Viro2903ff02012-08-28 12:52:22 -04001327 struct fd f;
Alex Williamsoncba33452012-07-31 08:16:22 -06001328 struct vfio_container *container;
1329 struct vfio_iommu_driver *driver;
Al Viro2903ff02012-08-28 12:52:22 -04001330 int ret = 0;
Alex Williamsoncba33452012-07-31 08:16:22 -06001331
1332 if (atomic_read(&group->container_users))
1333 return -EINVAL;
1334
Alex Williamson03a76b62015-12-21 15:13:33 -07001335 if (group->noiommu && !capable(CAP_SYS_RAWIO))
1336 return -EPERM;
1337
Al Viro2903ff02012-08-28 12:52:22 -04001338 f = fdget(container_fd);
1339 if (!f.file)
Alex Williamsoncba33452012-07-31 08:16:22 -06001340 return -EBADF;
1341
1342 /* Sanity check, is this really our fd? */
Al Viro2903ff02012-08-28 12:52:22 -04001343 if (f.file->f_op != &vfio_fops) {
1344 fdput(f);
Alex Williamsoncba33452012-07-31 08:16:22 -06001345 return -EINVAL;
1346 }
1347
Al Viro2903ff02012-08-28 12:52:22 -04001348 container = f.file->private_data;
Alex Williamsoncba33452012-07-31 08:16:22 -06001349 WARN_ON(!container); /* fget ensures we don't race vfio_release */
1350
Alex Williamson9587f442013-04-25 16:12:38 -06001351 down_write(&container->group_lock);
Alex Williamsoncba33452012-07-31 08:16:22 -06001352
Alex Williamson03a76b62015-12-21 15:13:33 -07001353 /* Real groups and fake groups cannot mix */
1354 if (!list_empty(&container->group_list) &&
1355 container->noiommu != group->noiommu) {
1356 ret = -EPERM;
1357 goto unlock_out;
1358 }
1359
Alex Williamsoncba33452012-07-31 08:16:22 -06001360 driver = container->iommu_driver;
1361 if (driver) {
1362 ret = driver->ops->attach_group(container->iommu_data,
1363 group->iommu_group);
1364 if (ret)
1365 goto unlock_out;
1366 }
1367
1368 group->container = container;
Alex Williamson03a76b62015-12-21 15:13:33 -07001369 container->noiommu = group->noiommu;
Alex Williamsoncba33452012-07-31 08:16:22 -06001370 list_add(&group->container_next, &container->group_list);
1371
1372 /* Get a reference on the container and mark a user within the group */
1373 vfio_container_get(container);
1374 atomic_inc(&group->container_users);
1375
1376unlock_out:
Alex Williamson9587f442013-04-25 16:12:38 -06001377 up_write(&container->group_lock);
Al Viro2903ff02012-08-28 12:52:22 -04001378 fdput(f);
Alex Williamsoncba33452012-07-31 08:16:22 -06001379 return ret;
1380}
1381
1382static bool vfio_group_viable(struct vfio_group *group)
1383{
1384 return (iommu_group_for_each_dev(group->iommu_group,
1385 group, vfio_dev_viable) == 0);
1386}
1387
Kirti Wankhede32f55d82016-11-17 02:16:16 +05301388static int vfio_group_add_container_user(struct vfio_group *group)
1389{
1390 if (!atomic_inc_not_zero(&group->container_users))
1391 return -EINVAL;
1392
1393 if (group->noiommu) {
1394 atomic_dec(&group->container_users);
1395 return -EPERM;
1396 }
1397 if (!group->container->iommu_driver || !vfio_group_viable(group)) {
1398 atomic_dec(&group->container_users);
1399 return -EINVAL;
1400 }
1401
1402 return 0;
1403}
1404
Alex Williamsoncba33452012-07-31 08:16:22 -06001405static const struct file_operations vfio_device_fops;
1406
1407static int vfio_group_get_device_fd(struct vfio_group *group, char *buf)
1408{
1409 struct vfio_device *device;
1410 struct file *filep;
Alex Williamson4bc94d52015-07-24 15:14:04 -06001411 int ret;
Alex Williamsoncba33452012-07-31 08:16:22 -06001412
1413 if (0 == atomic_read(&group->container_users) ||
1414 !group->container->iommu_driver || !vfio_group_viable(group))
1415 return -EINVAL;
1416
Alex Williamson03a76b62015-12-21 15:13:33 -07001417 if (group->noiommu && !capable(CAP_SYS_RAWIO))
1418 return -EPERM;
1419
Alex Williamson4bc94d52015-07-24 15:14:04 -06001420 device = vfio_device_get_from_name(group, buf);
1421 if (!device)
1422 return -ENODEV;
Alex Williamsoncba33452012-07-31 08:16:22 -06001423
Alex Williamson4bc94d52015-07-24 15:14:04 -06001424 ret = device->ops->open(device->device_data);
1425 if (ret) {
1426 vfio_device_put(device);
1427 return ret;
Alex Williamsoncba33452012-07-31 08:16:22 -06001428 }
Alex Williamson4bc94d52015-07-24 15:14:04 -06001429
1430 /*
1431 * We can't use anon_inode_getfd() because we need to modify
1432 * the f_mode flags directly to allow more than just ioctls
1433 */
1434 ret = get_unused_fd_flags(O_CLOEXEC);
1435 if (ret < 0) {
1436 device->ops->release(device->device_data);
1437 vfio_device_put(device);
1438 return ret;
1439 }
1440
1441 filep = anon_inode_getfile("[vfio-device]", &vfio_device_fops,
1442 device, O_RDWR);
1443 if (IS_ERR(filep)) {
1444 put_unused_fd(ret);
1445 ret = PTR_ERR(filep);
1446 device->ops->release(device->device_data);
1447 vfio_device_put(device);
1448 return ret;
1449 }
1450
1451 /*
1452 * TODO: add an anon_inode interface to do this.
1453 * Appears to be missing by lack of need rather than
1454 * explicitly prevented. Now there's need.
1455 */
1456 filep->f_mode |= (FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE);
1457
1458 atomic_inc(&group->container_users);
1459
1460 fd_install(ret, filep);
Alex Williamsoncba33452012-07-31 08:16:22 -06001461
Alex Williamson03a76b62015-12-21 15:13:33 -07001462 if (group->noiommu)
1463 dev_warn(device->dev, "vfio-noiommu device opened by user "
1464 "(%s:%d)\n", current->comm, task_pid_nr(current));
1465
Alex Williamsoncba33452012-07-31 08:16:22 -06001466 return ret;
1467}
1468
1469static long vfio_group_fops_unl_ioctl(struct file *filep,
1470 unsigned int cmd, unsigned long arg)
1471{
1472 struct vfio_group *group = filep->private_data;
1473 long ret = -ENOTTY;
1474
1475 switch (cmd) {
1476 case VFIO_GROUP_GET_STATUS:
1477 {
1478 struct vfio_group_status status;
1479 unsigned long minsz;
1480
1481 minsz = offsetofend(struct vfio_group_status, flags);
1482
1483 if (copy_from_user(&status, (void __user *)arg, minsz))
1484 return -EFAULT;
1485
1486 if (status.argsz < minsz)
1487 return -EINVAL;
1488
1489 status.flags = 0;
1490
1491 if (vfio_group_viable(group))
1492 status.flags |= VFIO_GROUP_FLAGS_VIABLE;
1493
1494 if (group->container)
1495 status.flags |= VFIO_GROUP_FLAGS_CONTAINER_SET;
1496
1497 if (copy_to_user((void __user *)arg, &status, minsz))
1498 return -EFAULT;
1499
1500 ret = 0;
1501 break;
1502 }
1503 case VFIO_GROUP_SET_CONTAINER:
1504 {
1505 int fd;
1506
1507 if (get_user(fd, (int __user *)arg))
1508 return -EFAULT;
1509
1510 if (fd < 0)
1511 return -EINVAL;
1512
1513 ret = vfio_group_set_container(group, fd);
1514 break;
1515 }
1516 case VFIO_GROUP_UNSET_CONTAINER:
1517 ret = vfio_group_unset_container(group);
1518 break;
1519 case VFIO_GROUP_GET_DEVICE_FD:
1520 {
1521 char *buf;
1522
1523 buf = strndup_user((const char __user *)arg, PAGE_SIZE);
1524 if (IS_ERR(buf))
1525 return PTR_ERR(buf);
1526
1527 ret = vfio_group_get_device_fd(group, buf);
1528 kfree(buf);
1529 break;
1530 }
1531 }
1532
1533 return ret;
1534}
1535
1536#ifdef CONFIG_COMPAT
1537static long vfio_group_fops_compat_ioctl(struct file *filep,
1538 unsigned int cmd, unsigned long arg)
1539{
1540 arg = (unsigned long)compat_ptr(arg);
1541 return vfio_group_fops_unl_ioctl(filep, cmd, arg);
1542}
1543#endif /* CONFIG_COMPAT */
1544
1545static int vfio_group_fops_open(struct inode *inode, struct file *filep)
1546{
1547 struct vfio_group *group;
Alex Williamson6d6768c2013-06-25 16:06:54 -06001548 int opened;
Alex Williamsoncba33452012-07-31 08:16:22 -06001549
1550 group = vfio_group_get_from_minor(iminor(inode));
1551 if (!group)
1552 return -ENODEV;
1553
Alex Williamson03a76b62015-12-21 15:13:33 -07001554 if (group->noiommu && !capable(CAP_SYS_RAWIO)) {
1555 vfio_group_put(group);
1556 return -EPERM;
1557 }
1558
Alex Williamson6d6768c2013-06-25 16:06:54 -06001559 /* Do we need multiple instances of the group open? Seems not. */
1560 opened = atomic_cmpxchg(&group->opened, 0, 1);
1561 if (opened) {
1562 vfio_group_put(group);
1563 return -EBUSY;
1564 }
1565
1566 /* Is something still in use from a previous open? */
Alex Williamsoncba33452012-07-31 08:16:22 -06001567 if (group->container) {
Alex Williamson6d6768c2013-06-25 16:06:54 -06001568 atomic_dec(&group->opened);
Alex Williamsoncba33452012-07-31 08:16:22 -06001569 vfio_group_put(group);
1570 return -EBUSY;
1571 }
1572
1573 filep->private_data = group;
1574
1575 return 0;
1576}
1577
1578static int vfio_group_fops_release(struct inode *inode, struct file *filep)
1579{
1580 struct vfio_group *group = filep->private_data;
1581
1582 filep->private_data = NULL;
1583
1584 vfio_group_try_dissolve_container(group);
1585
Alex Williamson6d6768c2013-06-25 16:06:54 -06001586 atomic_dec(&group->opened);
1587
Alex Williamsoncba33452012-07-31 08:16:22 -06001588 vfio_group_put(group);
1589
1590 return 0;
1591}
1592
1593static const struct file_operations vfio_group_fops = {
1594 .owner = THIS_MODULE,
1595 .unlocked_ioctl = vfio_group_fops_unl_ioctl,
1596#ifdef CONFIG_COMPAT
1597 .compat_ioctl = vfio_group_fops_compat_ioctl,
1598#endif
1599 .open = vfio_group_fops_open,
1600 .release = vfio_group_fops_release,
1601};
1602
1603/**
1604 * VFIO Device fd
1605 */
1606static int vfio_device_fops_release(struct inode *inode, struct file *filep)
1607{
1608 struct vfio_device *device = filep->private_data;
1609
1610 device->ops->release(device->device_data);
1611
1612 vfio_group_try_dissolve_container(device->group);
1613
1614 vfio_device_put(device);
1615
1616 return 0;
1617}
1618
1619static long vfio_device_fops_unl_ioctl(struct file *filep,
1620 unsigned int cmd, unsigned long arg)
1621{
1622 struct vfio_device *device = filep->private_data;
1623
1624 if (unlikely(!device->ops->ioctl))
1625 return -EINVAL;
1626
1627 return device->ops->ioctl(device->device_data, cmd, arg);
1628}
1629
1630static ssize_t vfio_device_fops_read(struct file *filep, char __user *buf,
1631 size_t count, loff_t *ppos)
1632{
1633 struct vfio_device *device = filep->private_data;
1634
1635 if (unlikely(!device->ops->read))
1636 return -EINVAL;
1637
1638 return device->ops->read(device->device_data, buf, count, ppos);
1639}
1640
1641static ssize_t vfio_device_fops_write(struct file *filep,
1642 const char __user *buf,
1643 size_t count, loff_t *ppos)
1644{
1645 struct vfio_device *device = filep->private_data;
1646
1647 if (unlikely(!device->ops->write))
1648 return -EINVAL;
1649
1650 return device->ops->write(device->device_data, buf, count, ppos);
1651}
1652
1653static int vfio_device_fops_mmap(struct file *filep, struct vm_area_struct *vma)
1654{
1655 struct vfio_device *device = filep->private_data;
1656
1657 if (unlikely(!device->ops->mmap))
1658 return -EINVAL;
1659
1660 return device->ops->mmap(device->device_data, vma);
1661}
1662
1663#ifdef CONFIG_COMPAT
1664static long vfio_device_fops_compat_ioctl(struct file *filep,
1665 unsigned int cmd, unsigned long arg)
1666{
1667 arg = (unsigned long)compat_ptr(arg);
1668 return vfio_device_fops_unl_ioctl(filep, cmd, arg);
1669}
1670#endif /* CONFIG_COMPAT */
1671
1672static const struct file_operations vfio_device_fops = {
1673 .owner = THIS_MODULE,
1674 .release = vfio_device_fops_release,
1675 .read = vfio_device_fops_read,
1676 .write = vfio_device_fops_write,
1677 .unlocked_ioctl = vfio_device_fops_unl_ioctl,
1678#ifdef CONFIG_COMPAT
1679 .compat_ioctl = vfio_device_fops_compat_ioctl,
1680#endif
1681 .mmap = vfio_device_fops_mmap,
1682};
1683
1684/**
Alexey Kardashevskiy6cdd9782013-08-05 10:52:36 -06001685 * External user API, exported by symbols to be linked dynamically.
1686 *
1687 * The protocol includes:
1688 * 1. do normal VFIO init operation:
1689 * - opening a new container;
1690 * - attaching group(s) to it;
1691 * - setting an IOMMU driver for a container.
1692 * When IOMMU is set for a container, all groups in it are
1693 * considered ready to use by an external user.
1694 *
1695 * 2. User space passes a group fd to an external user.
1696 * The external user calls vfio_group_get_external_user()
1697 * to verify that:
1698 * - the group is initialized;
1699 * - IOMMU is set for it.
1700 * If both checks passed, vfio_group_get_external_user()
1701 * increments the container user counter to prevent
1702 * the VFIO group from disposal before KVM exits.
1703 *
1704 * 3. The external user calls vfio_external_user_iommu_id()
1705 * to know an IOMMU ID.
1706 *
1707 * 4. When the external KVM finishes, it calls
1708 * vfio_group_put_external_user() to release the VFIO group.
1709 * This call decrements the container user counter.
1710 */
1711struct vfio_group *vfio_group_get_external_user(struct file *filep)
1712{
1713 struct vfio_group *group = filep->private_data;
Kirti Wankhede32f55d82016-11-17 02:16:16 +05301714 int ret;
Alexey Kardashevskiy6cdd9782013-08-05 10:52:36 -06001715
1716 if (filep->f_op != &vfio_group_fops)
1717 return ERR_PTR(-EINVAL);
1718
Kirti Wankhede32f55d82016-11-17 02:16:16 +05301719 ret = vfio_group_add_container_user(group);
1720 if (ret)
1721 return ERR_PTR(ret);
Alexey Kardashevskiy6cdd9782013-08-05 10:52:36 -06001722
1723 vfio_group_get(group);
1724
1725 return group;
1726}
1727EXPORT_SYMBOL_GPL(vfio_group_get_external_user);
1728
1729void vfio_group_put_external_user(struct vfio_group *group)
1730{
Alexey Kardashevskiy6cdd9782013-08-05 10:52:36 -06001731 vfio_group_try_dissolve_container(group);
Ilya Lesokhind370c912016-07-14 16:50:19 +03001732 vfio_group_put(group);
Alexey Kardashevskiy6cdd9782013-08-05 10:52:36 -06001733}
1734EXPORT_SYMBOL_GPL(vfio_group_put_external_user);
1735
1736int vfio_external_user_iommu_id(struct vfio_group *group)
1737{
1738 return iommu_group_id(group->iommu_group);
1739}
1740EXPORT_SYMBOL_GPL(vfio_external_user_iommu_id);
1741
Alex Williamson88d7ab82014-02-26 11:38:39 -07001742long vfio_external_check_extension(struct vfio_group *group, unsigned long arg)
1743{
1744 return vfio_ioctl_check_extension(group->container, arg);
1745}
1746EXPORT_SYMBOL_GPL(vfio_external_check_extension);
1747
Alexey Kardashevskiy6cdd9782013-08-05 10:52:36 -06001748/**
Alex Williamsond7a8d5e2016-02-22 16:02:33 -07001749 * Sub-module support
1750 */
1751/*
1752 * Helper for managing a buffer of info chain capabilities, allocate or
1753 * reallocate a buffer with additional @size, filling in @id and @version
1754 * of the capability. A pointer to the new capability is returned.
1755 *
1756 * NB. The chain is based at the head of the buffer, so new entries are
1757 * added to the tail, vfio_info_cap_shift() should be called to fixup the
1758 * next offsets prior to copying to the user buffer.
1759 */
1760struct vfio_info_cap_header *vfio_info_cap_add(struct vfio_info_cap *caps,
1761 size_t size, u16 id, u16 version)
1762{
1763 void *buf;
1764 struct vfio_info_cap_header *header, *tmp;
1765
1766 buf = krealloc(caps->buf, caps->size + size, GFP_KERNEL);
1767 if (!buf) {
1768 kfree(caps->buf);
1769 caps->size = 0;
1770 return ERR_PTR(-ENOMEM);
1771 }
1772
1773 caps->buf = buf;
1774 header = buf + caps->size;
1775
1776 /* Eventually copied to user buffer, zero */
1777 memset(header, 0, size);
1778
1779 header->id = id;
1780 header->version = version;
1781
1782 /* Add to the end of the capability chain */
1783 for (tmp = caps->buf; tmp->next; tmp = (void *)tmp + tmp->next)
1784 ; /* nothing */
1785
1786 tmp->next = caps->size;
1787 caps->size += size;
1788
1789 return header;
1790}
1791EXPORT_SYMBOL_GPL(vfio_info_cap_add);
1792
1793void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset)
1794{
1795 struct vfio_info_cap_header *tmp;
1796
1797 for (tmp = caps->buf; tmp->next; tmp = (void *)tmp + tmp->next - offset)
1798 tmp->next += offset;
1799}
1800EXPORT_SYMBOL_GPL(vfio_info_cap_shift);
1801
Kirti Wankhede21690372016-11-17 02:16:17 +05301802
1803/*
1804 * Pin a set of guest PFNs and return their associated host PFNs for local
1805 * domain only.
1806 * @dev [in] : device
1807 * @user_pfn [in]: array of user/guest PFNs to be unpinned.
1808 * @npage [in] : count of elements in user_pfn array. This count should not
1809 * be greater VFIO_PIN_PAGES_MAX_ENTRIES.
1810 * @prot [in] : protection flags
1811 * @phys_pfn[out]: array of host PFNs
1812 * Return error or number of pages pinned.
1813 */
1814int vfio_pin_pages(struct device *dev, unsigned long *user_pfn, int npage,
1815 int prot, unsigned long *phys_pfn)
1816{
1817 struct vfio_container *container;
1818 struct vfio_group *group;
1819 struct vfio_iommu_driver *driver;
1820 int ret;
1821
1822 if (!dev || !user_pfn || !phys_pfn || !npage)
1823 return -EINVAL;
1824
1825 if (npage > VFIO_PIN_PAGES_MAX_ENTRIES)
1826 return -E2BIG;
1827
1828 group = vfio_group_get_from_dev(dev);
1829 if (IS_ERR(group))
1830 return PTR_ERR(group);
1831
1832 ret = vfio_group_add_container_user(group);
1833 if (ret)
1834 goto err_pin_pages;
1835
1836 container = group->container;
1837 down_read(&container->group_lock);
1838
1839 driver = container->iommu_driver;
1840 if (likely(driver && driver->ops->pin_pages))
1841 ret = driver->ops->pin_pages(container->iommu_data, user_pfn,
1842 npage, prot, phys_pfn);
1843 else
1844 ret = -ENOTTY;
1845
1846 up_read(&container->group_lock);
1847 vfio_group_try_dissolve_container(group);
1848
1849err_pin_pages:
1850 vfio_group_put(group);
1851 return ret;
1852}
1853EXPORT_SYMBOL(vfio_pin_pages);
1854
1855/*
1856 * Unpin set of host PFNs for local domain only.
1857 * @dev [in] : device
1858 * @user_pfn [in]: array of user/guest PFNs to be unpinned. Number of user/guest
1859 * PFNs should not be greater than VFIO_PIN_PAGES_MAX_ENTRIES.
1860 * @npage [in] : count of elements in user_pfn array. This count should not
1861 * be greater than VFIO_PIN_PAGES_MAX_ENTRIES.
1862 * Return error or number of pages unpinned.
1863 */
1864int vfio_unpin_pages(struct device *dev, unsigned long *user_pfn, int npage)
1865{
1866 struct vfio_container *container;
1867 struct vfio_group *group;
1868 struct vfio_iommu_driver *driver;
1869 int ret;
1870
1871 if (!dev || !user_pfn || !npage)
1872 return -EINVAL;
1873
1874 if (npage > VFIO_PIN_PAGES_MAX_ENTRIES)
1875 return -E2BIG;
1876
1877 group = vfio_group_get_from_dev(dev);
1878 if (IS_ERR(group))
1879 return PTR_ERR(group);
1880
1881 ret = vfio_group_add_container_user(group);
1882 if (ret)
1883 goto err_unpin_pages;
1884
1885 container = group->container;
1886 down_read(&container->group_lock);
1887
1888 driver = container->iommu_driver;
1889 if (likely(driver && driver->ops->unpin_pages))
1890 ret = driver->ops->unpin_pages(container->iommu_data, user_pfn,
1891 npage);
1892 else
1893 ret = -ENOTTY;
1894
1895 up_read(&container->group_lock);
1896 vfio_group_try_dissolve_container(group);
1897
1898err_unpin_pages:
1899 vfio_group_put(group);
1900 return ret;
1901}
1902EXPORT_SYMBOL(vfio_unpin_pages);
1903
Alex Williamsond7a8d5e2016-02-22 16:02:33 -07001904/**
Alex Williamsoncba33452012-07-31 08:16:22 -06001905 * Module/class support
1906 */
1907static char *vfio_devnode(struct device *dev, umode_t *mode)
1908{
1909 return kasprintf(GFP_KERNEL, "vfio/%s", dev_name(dev));
1910}
1911
Alex Williamsond1099902013-12-19 10:17:13 -07001912static struct miscdevice vfio_dev = {
1913 .minor = VFIO_MINOR,
1914 .name = "vfio",
1915 .fops = &vfio_fops,
1916 .nodename = "vfio/vfio",
1917 .mode = S_IRUGO | S_IWUGO,
1918};
1919
Alex Williamsoncba33452012-07-31 08:16:22 -06001920static int __init vfio_init(void)
1921{
1922 int ret;
1923
1924 idr_init(&vfio.group_idr);
1925 mutex_init(&vfio.group_lock);
1926 mutex_init(&vfio.iommu_drivers_lock);
1927 INIT_LIST_HEAD(&vfio.group_list);
1928 INIT_LIST_HEAD(&vfio.iommu_drivers_list);
1929 init_waitqueue_head(&vfio.release_q);
1930
Alex Williamsond1099902013-12-19 10:17:13 -07001931 ret = misc_register(&vfio_dev);
1932 if (ret) {
1933 pr_err("vfio: misc device register failed\n");
1934 return ret;
1935 }
1936
1937 /* /dev/vfio/$GROUP */
Alex Williamsoncba33452012-07-31 08:16:22 -06001938 vfio.class = class_create(THIS_MODULE, "vfio");
1939 if (IS_ERR(vfio.class)) {
1940 ret = PTR_ERR(vfio.class);
1941 goto err_class;
1942 }
1943
1944 vfio.class->devnode = vfio_devnode;
1945
Alex Williamsond1099902013-12-19 10:17:13 -07001946 ret = alloc_chrdev_region(&vfio.group_devt, 0, MINORMASK, "vfio");
Alex Williamsoncba33452012-07-31 08:16:22 -06001947 if (ret)
Alex Williamsond1099902013-12-19 10:17:13 -07001948 goto err_alloc_chrdev;
Alex Williamsoncba33452012-07-31 08:16:22 -06001949
Alex Williamsoncba33452012-07-31 08:16:22 -06001950 cdev_init(&vfio.group_cdev, &vfio_group_fops);
Alex Williamsond1099902013-12-19 10:17:13 -07001951 ret = cdev_add(&vfio.group_cdev, vfio.group_devt, MINORMASK);
Alex Williamsoncba33452012-07-31 08:16:22 -06001952 if (ret)
Alex Williamsond1099902013-12-19 10:17:13 -07001953 goto err_cdev_add;
Alex Williamsoncba33452012-07-31 08:16:22 -06001954
1955 pr_info(DRIVER_DESC " version: " DRIVER_VERSION "\n");
1956
Alex Williamson73fa0d12012-07-31 08:16:23 -06001957 /*
1958 * Attempt to load known iommu-drivers. This gives us a working
1959 * environment without the user needing to explicitly load iommu
1960 * drivers.
1961 */
1962 request_module_nowait("vfio_iommu_type1");
Alexey Kardashevskiy5ffd2292013-05-21 13:33:10 +10001963 request_module_nowait("vfio_iommu_spapr_tce");
Alex Williamson73fa0d12012-07-31 08:16:23 -06001964
Alex Williamson03a76b62015-12-21 15:13:33 -07001965#ifdef CONFIG_VFIO_NOIOMMU
1966 vfio_register_iommu_driver(&vfio_noiommu_ops);
1967#endif
Alex Williamsoncba33452012-07-31 08:16:22 -06001968 return 0;
1969
Alex Williamsond1099902013-12-19 10:17:13 -07001970err_cdev_add:
1971 unregister_chrdev_region(vfio.group_devt, MINORMASK);
1972err_alloc_chrdev:
Alex Williamsoncba33452012-07-31 08:16:22 -06001973 class_destroy(vfio.class);
1974 vfio.class = NULL;
1975err_class:
Alex Williamsond1099902013-12-19 10:17:13 -07001976 misc_deregister(&vfio_dev);
Alex Williamsoncba33452012-07-31 08:16:22 -06001977 return ret;
1978}
1979
1980static void __exit vfio_cleanup(void)
1981{
1982 WARN_ON(!list_empty(&vfio.group_list));
1983
Alex Williamson03a76b62015-12-21 15:13:33 -07001984#ifdef CONFIG_VFIO_NOIOMMU
1985 vfio_unregister_iommu_driver(&vfio_noiommu_ops);
1986#endif
Alex Williamsoncba33452012-07-31 08:16:22 -06001987 idr_destroy(&vfio.group_idr);
1988 cdev_del(&vfio.group_cdev);
Alex Williamsond1099902013-12-19 10:17:13 -07001989 unregister_chrdev_region(vfio.group_devt, MINORMASK);
Alex Williamsoncba33452012-07-31 08:16:22 -06001990 class_destroy(vfio.class);
1991 vfio.class = NULL;
Alex Williamsond1099902013-12-19 10:17:13 -07001992 misc_deregister(&vfio_dev);
Alex Williamsoncba33452012-07-31 08:16:22 -06001993}
1994
1995module_init(vfio_init);
1996module_exit(vfio_cleanup);
1997
1998MODULE_VERSION(DRIVER_VERSION);
1999MODULE_LICENSE("GPL v2");
2000MODULE_AUTHOR(DRIVER_AUTHOR);
2001MODULE_DESCRIPTION(DRIVER_DESC);
Alex Williamsond1099902013-12-19 10:17:13 -07002002MODULE_ALIAS_MISCDEV(VFIO_MINOR);
2003MODULE_ALIAS("devname:vfio/vfio");