vfio/noiommu: Don't use iommu_present() to track fake groups
Using iommu_present() to determine whether an IOMMU group is real or
fake has some problems. First, apparently Power systems don't
register an IOMMU on the device bus, so the groups and containers get
marked as noiommu and then won't bind to their actual IOMMU driver.
Second, I expect we'll run into the same issue as we try to support
vGPUs through vfio, since they're likely to emulate this behavior of
creating an IOMMU group on a virtual device and then providing a vfio
IOMMU backend tailored to the sort of isolation they provide, which
won't necessarily be fully compatible with the IOMMU API.
The solution here is to use the existing iommudata interface to IOMMU
groups, which allows us to easily identify the fake groups we've
created for noiommu purposes. The iommudata we set is purely
arbitrary since we're only comparing the address, so we use the
address of the noiommu switch itself.
Reported-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Tested-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Tested-by: Anatoly Burakov <anatoly.burakov@intel.com>
Tested-by: Santosh Shukla <sshukla@mvista.com>
Fixes: 03a76b60f8ba ("vfio: Include No-IOMMU mode")
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index 82f25cc..ecca316 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -123,8 +123,8 @@
/*
* With noiommu enabled, an IOMMU group will be created for a device
* that doesn't already have one and doesn't have an iommu_ops on their
- * bus. We use iommu_present() again in the main code to detect these
- * fake groups.
+ * bus. We set iommudata simply to be able to identify these groups
+ * as special use and for reclamation later.
*/
if (group || !noiommu || iommu_present(dev->bus))
return group;
@@ -134,6 +134,7 @@
return NULL;
iommu_group_set_name(group, "vfio-noiommu");
+ iommu_group_set_iommudata(group, &noiommu, NULL);
ret = iommu_group_add_device(group, dev);
iommu_group_put(group);
if (ret)
@@ -158,7 +159,7 @@
void vfio_iommu_group_put(struct iommu_group *group, struct device *dev)
{
#ifdef CONFIG_VFIO_NOIOMMU
- if (!iommu_present(dev->bus))
+ if (iommu_group_get_iommudata(group) == &noiommu)
iommu_group_remove_device(dev);
#endif
@@ -190,16 +191,10 @@
return -ENOTTY;
}
-static int vfio_iommu_present(struct device *dev, void *unused)
-{
- return iommu_present(dev->bus) ? 1 : 0;
-}
-
static int vfio_noiommu_attach_group(void *iommu_data,
struct iommu_group *iommu_group)
{
- return iommu_group_for_each_dev(iommu_group, NULL,
- vfio_iommu_present) ? -EINVAL : 0;
+ return iommu_group_get_iommudata(iommu_group) == &noiommu ? 0 : -EINVAL;
}
static void vfio_noiommu_detach_group(void *iommu_data,
@@ -323,8 +318,7 @@
/**
* Group objects - create, release, get, put, search
*/
-static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group,
- bool iommu_present)
+static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group)
{
struct vfio_group *group, *tmp;
struct device *dev;
@@ -342,7 +336,9 @@
atomic_set(&group->container_users, 0);
atomic_set(&group->opened, 0);
group->iommu_group = iommu_group;
- group->noiommu = !iommu_present;
+#ifdef CONFIG_VFIO_NOIOMMU
+ group->noiommu = (iommu_group_get_iommudata(iommu_group) == &noiommu);
+#endif
group->nb.notifier_call = vfio_iommu_group_notifier;
@@ -767,7 +763,7 @@
group = vfio_group_get_from_iommu(iommu_group);
if (!group) {
- group = vfio_create_group(iommu_group, iommu_present(dev->bus));
+ group = vfio_create_group(iommu_group);
if (IS_ERR(group)) {
iommu_group_put(iommu_group);
return PTR_ERR(group);