Merge tag 'vfio-v5.10-rc1' of git://github.com/awilliam/linux-vfio

Pull VFIO updates from Alex Williamson:

 - New fsl-mc vfio bus driver supporting userspace drivers of objects
   within NXP's DPAA2 architecture (Diana Craciun)

 - Support for exposing zPCI information on s390 (Matthew Rosato)

 - Fixes for "detached" VFs on s390 (Matthew Rosato)

 - Fixes for pin-pages and dma-rw accesses (Yan Zhao)

 - Cleanups and optimize vconfig regen (Zenghui Yu)

 - Fix duplicate irq-bypass token registration (Alex Williamson)

* tag 'vfio-v5.10-rc1' of git://github.com/awilliam/linux-vfio: (30 commits)
  vfio iommu type1: Fix memory leak in vfio_iommu_type1_pin_pages
  vfio/pci: Clear token on bypass registration failure
  vfio/fsl-mc: fix the return of the uninitialized variable ret
  vfio/fsl-mc: Fix the dead code in vfio_fsl_mc_set_irq_trigger
  vfio/fsl-mc: Fixed vfio-fsl-mc driver compilation on 32 bit
  MAINTAINERS: Add entry for s390 vfio-pci
  vfio-pci/zdev: Add zPCI capabilities to VFIO_DEVICE_GET_INFO
  vfio/fsl-mc: Add support for device reset
  vfio/fsl-mc: Add read/write support for fsl-mc devices
  vfio/fsl-mc: trigger an interrupt via eventfd
  vfio/fsl-mc: Add irq infrastructure for fsl-mc devices
  vfio/fsl-mc: Added lock support in preparation for interrupt handling
  vfio/fsl-mc: Allow userspace to MMAP fsl-mc device MMIO regions
  vfio/fsl-mc: Implement VFIO_DEVICE_GET_REGION_INFO ioctl call
  vfio/fsl-mc: Implement VFIO_DEVICE_GET_INFO ioctl
  vfio/fsl-mc: Scan DPRC objects on vfio-fsl-mc driver bind
  vfio: Introduce capability definitions for VFIO_DEVICE_GET_INFO
  s390/pci: track whether util_str is valid in the zpci_dev
  s390/pci: stash version in the zpci_dev
  vfio/fsl-mc: Add VFIO framework skeleton for fsl-mc devices
  ...
diff --git a/MAINTAINERS b/MAINTAINERS
index 8b82a02..f5570ae 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -15287,6 +15287,14 @@
 F:	drivers/s390/cio/vfio_ccw*
 F:	include/uapi/linux/vfio_ccw.h
 
+S390 VFIO-PCI DRIVER
+M:	Matthew Rosato <mjrosato@linux.ibm.com>
+L:	linux-s390@vger.kernel.org
+L:	kvm@vger.kernel.org
+S:	Supported
+F:	drivers/vfio/pci/vfio_pci_zdev.c
+F:	include/uapi/linux/vfio_zdev.h
+
 S390 ZCRYPT DRIVER
 M:	Harald Freudenberger <freude@linux.ibm.com>
 L:	linux-s390@vger.kernel.org
@@ -18385,6 +18393,12 @@
 F:	include/linux/vfio.h
 F:	include/uapi/linux/vfio.h
 
+VFIO FSL-MC DRIVER
+M:	Diana Craciun <diana.craciun@oss.nxp.com>
+L:	kvm@vger.kernel.org
+S:	Maintained
+F:	drivers/vfio/fsl-mc/
+
 VFIO MEDIATED DEVICE DRIVERS
 M:	Kirti Wankhede <kwankhede@nvidia.com>
 L:	kvm@vger.kernel.org
diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index b5380a2..2126289 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -132,7 +132,8 @@ struct zpci_dev {
 	u8		rid_available	: 1;
 	u8		has_hp_slot	: 1;
 	u8		is_physfn	: 1;
-	u8		reserved	: 5;
+	u8		util_str_avail	: 1;
+	u8		reserved	: 4;
 	unsigned int	devfn;		/* DEVFN part of the RID*/
 
 	struct mutex lock;
@@ -179,6 +180,7 @@ struct zpci_dev {
 	atomic64_t mapped_pages;
 	atomic64_t unmapped_pages;
 
+	u8		version;
 	enum pci_bus_speed max_bus_speed;
 
 	struct dentry	*debugfs_dev;
diff --git a/arch/s390/pci/pci_bus.c b/arch/s390/pci/pci_bus.c
index 0c0db7c..755b46f 100644
--- a/arch/s390/pci/pci_bus.c
+++ b/arch/s390/pci/pci_bus.c
@@ -135,9 +135,10 @@ void pcibios_bus_add_device(struct pci_dev *pdev)
 	 * With pdev->no_vf_scan the common PCI probing code does not
 	 * perform PF/VF linking.
 	 */
-	if (zdev->vfn)
+	if (zdev->vfn) {
 		zpci_iov_setup_virtfn(zdev->zbus, pdev, zdev->vfn);
-
+		pdev->no_command_memory = 1;
+	}
 }
 
 static int zpci_bus_add_device(struct zpci_bus *zbus, struct zpci_dev *zdev)
diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c
index 5a34a13..153720d 100644
--- a/arch/s390/pci/pci_clp.c
+++ b/arch/s390/pci/pci_clp.c
@@ -102,6 +102,7 @@ static void clp_store_query_pci_fngrp(struct zpci_dev *zdev,
 	zdev->msi_addr = response->msia;
 	zdev->max_msi = response->noi;
 	zdev->fmb_update = response->mui;
+	zdev->version = response->version;
 
 	switch (response->version) {
 	case 1:
@@ -167,6 +168,7 @@ static int clp_store_query_pci_fn(struct zpci_dev *zdev,
 	if (response->util_str_avail) {
 		memcpy(zdev->util_str, response->util_str,
 		       sizeof(zdev->util_str));
+		zdev->util_str_avail = 1;
 	}
 	zdev->mio_capable = response->mio_addr_avail;
 	for (i = 0; i < PCI_STD_NUM_BARS; i++) {
diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c
index b37e08c..4afd4ee 100644
--- a/drivers/pci/iov.c
+++ b/drivers/pci/iov.c
@@ -180,6 +180,7 @@ int pci_iov_add_virtfn(struct pci_dev *dev, int id)
 	virtfn->device = iov->vf_device;
 	virtfn->is_virtfn = 1;
 	virtfn->physfn = pci_dev_get(dev);
+	virtfn->no_command_memory = 1;
 
 	if (id == 0)
 		pci_read_vf_config_common(virtfn);
diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig
index fd17db9..5533df9 100644
--- a/drivers/vfio/Kconfig
+++ b/drivers/vfio/Kconfig
@@ -47,4 +47,5 @@
 source "drivers/vfio/pci/Kconfig"
 source "drivers/vfio/platform/Kconfig"
 source "drivers/vfio/mdev/Kconfig"
+source "drivers/vfio/fsl-mc/Kconfig"
 source "virt/lib/Kconfig"
diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile
index de67c47..fee73f3 100644
--- a/drivers/vfio/Makefile
+++ b/drivers/vfio/Makefile
@@ -9,3 +9,4 @@
 obj-$(CONFIG_VFIO_PCI) += pci/
 obj-$(CONFIG_VFIO_PLATFORM) += platform/
 obj-$(CONFIG_VFIO_MDEV) += mdev/
+obj-$(CONFIG_VFIO_FSL_MC) += fsl-mc/
diff --git a/drivers/vfio/fsl-mc/Kconfig b/drivers/vfio/fsl-mc/Kconfig
new file mode 100644
index 0000000..b1a527d
--- /dev/null
+++ b/drivers/vfio/fsl-mc/Kconfig
@@ -0,0 +1,9 @@
+config VFIO_FSL_MC
+	tristate "VFIO support for QorIQ DPAA2 fsl-mc bus devices"
+	depends on VFIO && FSL_MC_BUS && EVENTFD
+	help
+	  Driver to enable support for the VFIO QorIQ DPAA2 fsl-mc
+	  (Management Complex) devices. This is required to passthrough
+	  fsl-mc bus devices using the VFIO framework.
+
+	  If you don't know what to do here, say N.
diff --git a/drivers/vfio/fsl-mc/Makefile b/drivers/vfio/fsl-mc/Makefile
new file mode 100644
index 0000000..cad6dbf
--- /dev/null
+++ b/drivers/vfio/fsl-mc/Makefile
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause)
+
+vfio-fsl-mc-y := vfio_fsl_mc.o vfio_fsl_mc_intr.o
+obj-$(CONFIG_VFIO_FSL_MC) += vfio-fsl-mc.o
diff --git a/drivers/vfio/fsl-mc/vfio_fsl_mc.c b/drivers/vfio/fsl-mc/vfio_fsl_mc.c
new file mode 100644
index 0000000..0113a98
--- /dev/null
+++ b/drivers/vfio/fsl-mc/vfio_fsl_mc.c
@@ -0,0 +1,683 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause)
+/*
+ * Copyright 2013-2016 Freescale Semiconductor Inc.
+ * Copyright 2016-2017,2019-2020 NXP
+ */
+
+#include <linux/device.h>
+#include <linux/iommu.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/vfio.h>
+#include <linux/fsl/mc.h>
+#include <linux/delay.h>
+#include <linux/io-64-nonatomic-hi-lo.h>
+
+#include "vfio_fsl_mc_private.h"
+
+static struct fsl_mc_driver vfio_fsl_mc_driver;
+
+static DEFINE_MUTEX(reflck_lock);
+
+static void vfio_fsl_mc_reflck_get(struct vfio_fsl_mc_reflck *reflck)
+{
+	kref_get(&reflck->kref);
+}
+
+static void vfio_fsl_mc_reflck_release(struct kref *kref)
+{
+	struct vfio_fsl_mc_reflck *reflck = container_of(kref,
+						      struct vfio_fsl_mc_reflck,
+						      kref);
+
+	mutex_destroy(&reflck->lock);
+	kfree(reflck);
+	mutex_unlock(&reflck_lock);
+}
+
+static void vfio_fsl_mc_reflck_put(struct vfio_fsl_mc_reflck *reflck)
+{
+	kref_put_mutex(&reflck->kref, vfio_fsl_mc_reflck_release, &reflck_lock);
+}
+
+static struct vfio_fsl_mc_reflck *vfio_fsl_mc_reflck_alloc(void)
+{
+	struct vfio_fsl_mc_reflck *reflck;
+
+	reflck = kzalloc(sizeof(*reflck), GFP_KERNEL);
+	if (!reflck)
+		return ERR_PTR(-ENOMEM);
+
+	kref_init(&reflck->kref);
+	mutex_init(&reflck->lock);
+
+	return reflck;
+}
+
+static int vfio_fsl_mc_reflck_attach(struct vfio_fsl_mc_device *vdev)
+{
+	int ret = 0;
+
+	mutex_lock(&reflck_lock);
+	if (is_fsl_mc_bus_dprc(vdev->mc_dev)) {
+		vdev->reflck = vfio_fsl_mc_reflck_alloc();
+		ret = PTR_ERR_OR_ZERO(vdev->reflck);
+	} else {
+		struct device *mc_cont_dev = vdev->mc_dev->dev.parent;
+		struct vfio_device *device;
+		struct vfio_fsl_mc_device *cont_vdev;
+
+		device = vfio_device_get_from_dev(mc_cont_dev);
+		if (!device) {
+			ret = -ENODEV;
+			goto unlock;
+		}
+
+		cont_vdev = vfio_device_data(device);
+		if (!cont_vdev || !cont_vdev->reflck) {
+			vfio_device_put(device);
+			ret = -ENODEV;
+			goto unlock;
+		}
+		vfio_fsl_mc_reflck_get(cont_vdev->reflck);
+		vdev->reflck = cont_vdev->reflck;
+		vfio_device_put(device);
+	}
+
+unlock:
+	mutex_unlock(&reflck_lock);
+	return ret;
+}
+
+static int vfio_fsl_mc_regions_init(struct vfio_fsl_mc_device *vdev)
+{
+	struct fsl_mc_device *mc_dev = vdev->mc_dev;
+	int count = mc_dev->obj_desc.region_count;
+	int i;
+
+	vdev->regions = kcalloc(count, sizeof(struct vfio_fsl_mc_region),
+				GFP_KERNEL);
+	if (!vdev->regions)
+		return -ENOMEM;
+
+	for (i = 0; i < count; i++) {
+		struct resource *res = &mc_dev->regions[i];
+		int no_mmap = is_fsl_mc_bus_dprc(mc_dev);
+
+		vdev->regions[i].addr = res->start;
+		vdev->regions[i].size = resource_size(res);
+		vdev->regions[i].type = mc_dev->regions[i].flags & IORESOURCE_BITS;
+		/*
+		 * Only regions addressed with PAGE granularity may be
+		 * MMAPed securely.
+		 */
+		if (!no_mmap && !(vdev->regions[i].addr & ~PAGE_MASK) &&
+				!(vdev->regions[i].size & ~PAGE_MASK))
+			vdev->regions[i].flags |=
+					VFIO_REGION_INFO_FLAG_MMAP;
+		vdev->regions[i].flags |= VFIO_REGION_INFO_FLAG_READ;
+		if (!(mc_dev->regions[i].flags & IORESOURCE_READONLY))
+			vdev->regions[i].flags |= VFIO_REGION_INFO_FLAG_WRITE;
+	}
+
+	return 0;
+}
+
+static void vfio_fsl_mc_regions_cleanup(struct vfio_fsl_mc_device *vdev)
+{
+	struct fsl_mc_device *mc_dev = vdev->mc_dev;
+	int i;
+
+	for (i = 0; i < mc_dev->obj_desc.region_count; i++)
+		iounmap(vdev->regions[i].ioaddr);
+	kfree(vdev->regions);
+}
+
+static int vfio_fsl_mc_open(void *device_data)
+{
+	struct vfio_fsl_mc_device *vdev = device_data;
+	int ret;
+
+	if (!try_module_get(THIS_MODULE))
+		return -ENODEV;
+
+	mutex_lock(&vdev->reflck->lock);
+	if (!vdev->refcnt) {
+		ret = vfio_fsl_mc_regions_init(vdev);
+		if (ret)
+			goto err_reg_init;
+	}
+	vdev->refcnt++;
+
+	mutex_unlock(&vdev->reflck->lock);
+
+	return 0;
+
+err_reg_init:
+	mutex_unlock(&vdev->reflck->lock);
+	module_put(THIS_MODULE);
+	return ret;
+}
+
+static void vfio_fsl_mc_release(void *device_data)
+{
+	struct vfio_fsl_mc_device *vdev = device_data;
+	int ret;
+
+	mutex_lock(&vdev->reflck->lock);
+
+	if (!(--vdev->refcnt)) {
+		struct fsl_mc_device *mc_dev = vdev->mc_dev;
+		struct device *cont_dev = fsl_mc_cont_dev(&mc_dev->dev);
+		struct fsl_mc_device *mc_cont = to_fsl_mc_device(cont_dev);
+
+		vfio_fsl_mc_regions_cleanup(vdev);
+
+		/* reset the device before cleaning up the interrupts */
+		ret = dprc_reset_container(mc_cont->mc_io, 0,
+		      mc_cont->mc_handle,
+			  mc_cont->obj_desc.id,
+			  DPRC_RESET_OPTION_NON_RECURSIVE);
+
+		if (ret) {
+			dev_warn(&mc_cont->dev, "VFIO_FLS_MC: reset device has failed (%d)\n",
+				 ret);
+			WARN_ON(1);
+		}
+
+		vfio_fsl_mc_irqs_cleanup(vdev);
+
+		fsl_mc_cleanup_irq_pool(mc_cont);
+	}
+
+	mutex_unlock(&vdev->reflck->lock);
+
+	module_put(THIS_MODULE);
+}
+
+static long vfio_fsl_mc_ioctl(void *device_data, unsigned int cmd,
+			      unsigned long arg)
+{
+	unsigned long minsz;
+	struct vfio_fsl_mc_device *vdev = device_data;
+	struct fsl_mc_device *mc_dev = vdev->mc_dev;
+
+	switch (cmd) {
+	case VFIO_DEVICE_GET_INFO:
+	{
+		struct vfio_device_info info;
+
+		minsz = offsetofend(struct vfio_device_info, num_irqs);
+
+		if (copy_from_user(&info, (void __user *)arg, minsz))
+			return -EFAULT;
+
+		if (info.argsz < minsz)
+			return -EINVAL;
+
+		info.flags = VFIO_DEVICE_FLAGS_FSL_MC;
+
+		if (is_fsl_mc_bus_dprc(mc_dev))
+			info.flags |= VFIO_DEVICE_FLAGS_RESET;
+
+		info.num_regions = mc_dev->obj_desc.region_count;
+		info.num_irqs = mc_dev->obj_desc.irq_count;
+
+		return copy_to_user((void __user *)arg, &info, minsz) ?
+			-EFAULT : 0;
+	}
+	case VFIO_DEVICE_GET_REGION_INFO:
+	{
+		struct vfio_region_info info;
+
+		minsz = offsetofend(struct vfio_region_info, offset);
+
+		if (copy_from_user(&info, (void __user *)arg, minsz))
+			return -EFAULT;
+
+		if (info.argsz < minsz)
+			return -EINVAL;
+
+		if (info.index >= mc_dev->obj_desc.region_count)
+			return -EINVAL;
+
+		/* map offset to the physical address  */
+		info.offset = VFIO_FSL_MC_INDEX_TO_OFFSET(info.index);
+		info.size = vdev->regions[info.index].size;
+		info.flags = vdev->regions[info.index].flags;
+
+		return copy_to_user((void __user *)arg, &info, minsz);
+	}
+	case VFIO_DEVICE_GET_IRQ_INFO:
+	{
+		struct vfio_irq_info info;
+
+		minsz = offsetofend(struct vfio_irq_info, count);
+		if (copy_from_user(&info, (void __user *)arg, minsz))
+			return -EFAULT;
+
+		if (info.argsz < minsz)
+			return -EINVAL;
+
+		if (info.index >= mc_dev->obj_desc.irq_count)
+			return -EINVAL;
+
+		info.flags = VFIO_IRQ_INFO_EVENTFD;
+		info.count = 1;
+
+		return copy_to_user((void __user *)arg, &info, minsz);
+	}
+	case VFIO_DEVICE_SET_IRQS:
+	{
+		struct vfio_irq_set hdr;
+		u8 *data = NULL;
+		int ret = 0;
+		size_t data_size = 0;
+
+		minsz = offsetofend(struct vfio_irq_set, count);
+
+		if (copy_from_user(&hdr, (void __user *)arg, minsz))
+			return -EFAULT;
+
+		ret = vfio_set_irqs_validate_and_prepare(&hdr, mc_dev->obj_desc.irq_count,
+					mc_dev->obj_desc.irq_count, &data_size);
+		if (ret)
+			return ret;
+
+		if (data_size) {
+			data = memdup_user((void __user *)(arg + minsz),
+				   data_size);
+			if (IS_ERR(data))
+				return PTR_ERR(data);
+		}
+
+		mutex_lock(&vdev->igate);
+		ret = vfio_fsl_mc_set_irqs_ioctl(vdev, hdr.flags,
+						 hdr.index, hdr.start,
+						 hdr.count, data);
+		mutex_unlock(&vdev->igate);
+		kfree(data);
+
+		return ret;
+	}
+	case VFIO_DEVICE_RESET:
+	{
+		int ret;
+		struct fsl_mc_device *mc_dev = vdev->mc_dev;
+
+		/* reset is supported only for the DPRC */
+		if (!is_fsl_mc_bus_dprc(mc_dev))
+			return -ENOTTY;
+
+		ret = dprc_reset_container(mc_dev->mc_io, 0,
+					   mc_dev->mc_handle,
+					   mc_dev->obj_desc.id,
+					   DPRC_RESET_OPTION_NON_RECURSIVE);
+		return ret;
+
+	}
+	default:
+		return -ENOTTY;
+	}
+}
+
+static ssize_t vfio_fsl_mc_read(void *device_data, char __user *buf,
+				size_t count, loff_t *ppos)
+{
+	struct vfio_fsl_mc_device *vdev = device_data;
+	unsigned int index = VFIO_FSL_MC_OFFSET_TO_INDEX(*ppos);
+	loff_t off = *ppos & VFIO_FSL_MC_OFFSET_MASK;
+	struct fsl_mc_device *mc_dev = vdev->mc_dev;
+	struct vfio_fsl_mc_region *region;
+	u64 data[8];
+	int i;
+
+	if (index >= mc_dev->obj_desc.region_count)
+		return -EINVAL;
+
+	region = &vdev->regions[index];
+
+	if (!(region->flags & VFIO_REGION_INFO_FLAG_READ))
+		return -EINVAL;
+
+	if (!region->ioaddr) {
+		region->ioaddr = ioremap(region->addr, region->size);
+		if (!region->ioaddr)
+			return -ENOMEM;
+	}
+
+	if (count != 64 || off != 0)
+		return -EINVAL;
+
+	for (i = 7; i >= 0; i--)
+		data[i] = readq(region->ioaddr + i * sizeof(uint64_t));
+
+	if (copy_to_user(buf, data, 64))
+		return -EFAULT;
+
+	return count;
+}
+
+#define MC_CMD_COMPLETION_TIMEOUT_MS    5000
+#define MC_CMD_COMPLETION_POLLING_MAX_SLEEP_USECS    500
+
+static int vfio_fsl_mc_send_command(void __iomem *ioaddr, uint64_t *cmd_data)
+{
+	int i;
+	enum mc_cmd_status status;
+	unsigned long timeout_usecs = MC_CMD_COMPLETION_TIMEOUT_MS * 1000;
+
+	/* Write at command parameter into portal */
+	for (i = 7; i >= 1; i--)
+		writeq_relaxed(cmd_data[i], ioaddr + i * sizeof(uint64_t));
+
+	/* Write command header in the end */
+	writeq(cmd_data[0], ioaddr);
+
+	/* Wait for response before returning to user-space
+	 * This can be optimized in future to even prepare response
+	 * before returning to user-space and avoid read ioctl.
+	 */
+	for (;;) {
+		u64 header;
+		struct mc_cmd_header *resp_hdr;
+
+		header = cpu_to_le64(readq_relaxed(ioaddr));
+
+		resp_hdr = (struct mc_cmd_header *)&header;
+		status = (enum mc_cmd_status)resp_hdr->status;
+		if (status != MC_CMD_STATUS_READY)
+			break;
+
+		udelay(MC_CMD_COMPLETION_POLLING_MAX_SLEEP_USECS);
+		timeout_usecs -= MC_CMD_COMPLETION_POLLING_MAX_SLEEP_USECS;
+		if (timeout_usecs == 0)
+			return -ETIMEDOUT;
+	}
+
+	return 0;
+}
+
+static ssize_t vfio_fsl_mc_write(void *device_data, const char __user *buf,
+				 size_t count, loff_t *ppos)
+{
+	struct vfio_fsl_mc_device *vdev = device_data;
+	unsigned int index = VFIO_FSL_MC_OFFSET_TO_INDEX(*ppos);
+	loff_t off = *ppos & VFIO_FSL_MC_OFFSET_MASK;
+	struct fsl_mc_device *mc_dev = vdev->mc_dev;
+	struct vfio_fsl_mc_region *region;
+	u64 data[8];
+	int ret;
+
+	if (index >= mc_dev->obj_desc.region_count)
+		return -EINVAL;
+
+	region = &vdev->regions[index];
+
+	if (!(region->flags & VFIO_REGION_INFO_FLAG_WRITE))
+		return -EINVAL;
+
+	if (!region->ioaddr) {
+		region->ioaddr = ioremap(region->addr, region->size);
+		if (!region->ioaddr)
+			return -ENOMEM;
+	}
+
+	if (count != 64 || off != 0)
+		return -EINVAL;
+
+	if (copy_from_user(&data, buf, 64))
+		return -EFAULT;
+
+	ret = vfio_fsl_mc_send_command(region->ioaddr, data);
+	if (ret)
+		return ret;
+
+	return count;
+
+}
+
+static int vfio_fsl_mc_mmap_mmio(struct vfio_fsl_mc_region region,
+				 struct vm_area_struct *vma)
+{
+	u64 size = vma->vm_end - vma->vm_start;
+	u64 pgoff, base;
+	u8 region_cacheable;
+
+	pgoff = vma->vm_pgoff &
+		((1U << (VFIO_FSL_MC_OFFSET_SHIFT - PAGE_SHIFT)) - 1);
+	base = pgoff << PAGE_SHIFT;
+
+	if (region.size < PAGE_SIZE || base + size > region.size)
+		return -EINVAL;
+
+	region_cacheable = (region.type & FSL_MC_REGION_CACHEABLE) &&
+			   (region.type & FSL_MC_REGION_SHAREABLE);
+	if (!region_cacheable)
+		vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+	vma->vm_pgoff = (region.addr >> PAGE_SHIFT) + pgoff;
+
+	return remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
+			       size, vma->vm_page_prot);
+}
+
+static int vfio_fsl_mc_mmap(void *device_data, struct vm_area_struct *vma)
+{
+	struct vfio_fsl_mc_device *vdev = device_data;
+	struct fsl_mc_device *mc_dev = vdev->mc_dev;
+	int index;
+
+	index = vma->vm_pgoff >> (VFIO_FSL_MC_OFFSET_SHIFT - PAGE_SHIFT);
+
+	if (vma->vm_end < vma->vm_start)
+		return -EINVAL;
+	if (vma->vm_start & ~PAGE_MASK)
+		return -EINVAL;
+	if (vma->vm_end & ~PAGE_MASK)
+		return -EINVAL;
+	if (!(vma->vm_flags & VM_SHARED))
+		return -EINVAL;
+	if (index >= mc_dev->obj_desc.region_count)
+		return -EINVAL;
+
+	if (!(vdev->regions[index].flags & VFIO_REGION_INFO_FLAG_MMAP))
+		return -EINVAL;
+
+	if (!(vdev->regions[index].flags & VFIO_REGION_INFO_FLAG_READ)
+			&& (vma->vm_flags & VM_READ))
+		return -EINVAL;
+
+	if (!(vdev->regions[index].flags & VFIO_REGION_INFO_FLAG_WRITE)
+			&& (vma->vm_flags & VM_WRITE))
+		return -EINVAL;
+
+	vma->vm_private_data = mc_dev;
+
+	return vfio_fsl_mc_mmap_mmio(vdev->regions[index], vma);
+}
+
+static const struct vfio_device_ops vfio_fsl_mc_ops = {
+	.name		= "vfio-fsl-mc",
+	.open		= vfio_fsl_mc_open,
+	.release	= vfio_fsl_mc_release,
+	.ioctl		= vfio_fsl_mc_ioctl,
+	.read		= vfio_fsl_mc_read,
+	.write		= vfio_fsl_mc_write,
+	.mmap		= vfio_fsl_mc_mmap,
+};
+
+static int vfio_fsl_mc_bus_notifier(struct notifier_block *nb,
+				    unsigned long action, void *data)
+{
+	struct vfio_fsl_mc_device *vdev = container_of(nb,
+					struct vfio_fsl_mc_device, nb);
+	struct device *dev = data;
+	struct fsl_mc_device *mc_dev = to_fsl_mc_device(dev);
+	struct fsl_mc_device *mc_cont = to_fsl_mc_device(mc_dev->dev.parent);
+
+	if (action == BUS_NOTIFY_ADD_DEVICE &&
+	    vdev->mc_dev == mc_cont) {
+		mc_dev->driver_override = kasprintf(GFP_KERNEL, "%s",
+						    vfio_fsl_mc_ops.name);
+		if (!mc_dev->driver_override)
+			dev_warn(dev, "VFIO_FSL_MC: Setting driver override for device in dprc %s failed\n",
+				 dev_name(&mc_cont->dev));
+		else
+			dev_info(dev, "VFIO_FSL_MC: Setting driver override for device in dprc %s\n",
+				 dev_name(&mc_cont->dev));
+	} else if (action == BUS_NOTIFY_BOUND_DRIVER &&
+		vdev->mc_dev == mc_cont) {
+		struct fsl_mc_driver *mc_drv = to_fsl_mc_driver(dev->driver);
+
+		if (mc_drv && mc_drv != &vfio_fsl_mc_driver)
+			dev_warn(dev, "VFIO_FSL_MC: Object %s bound to driver %s while DPRC bound to vfio-fsl-mc\n",
+				 dev_name(dev), mc_drv->driver.name);
+	}
+
+	return 0;
+}
+
+static int vfio_fsl_mc_init_device(struct vfio_fsl_mc_device *vdev)
+{
+	struct fsl_mc_device *mc_dev = vdev->mc_dev;
+	int ret;
+
+	/* Non-dprc devices share mc_io from parent */
+	if (!is_fsl_mc_bus_dprc(mc_dev)) {
+		struct fsl_mc_device *mc_cont = to_fsl_mc_device(mc_dev->dev.parent);
+
+		mc_dev->mc_io = mc_cont->mc_io;
+		return 0;
+	}
+
+	vdev->nb.notifier_call = vfio_fsl_mc_bus_notifier;
+	ret = bus_register_notifier(&fsl_mc_bus_type, &vdev->nb);
+	if (ret)
+		return ret;
+
+	/* open DPRC, allocate a MC portal */
+	ret = dprc_setup(mc_dev);
+	if (ret) {
+		dev_err(&mc_dev->dev, "VFIO_FSL_MC: Failed to setup DPRC (%d)\n", ret);
+		goto out_nc_unreg;
+	}
+
+	ret = dprc_scan_container(mc_dev, false);
+	if (ret) {
+		dev_err(&mc_dev->dev, "VFIO_FSL_MC: Container scanning failed (%d)\n", ret);
+		goto out_dprc_cleanup;
+	}
+
+	return 0;
+
+out_dprc_cleanup:
+	dprc_remove_devices(mc_dev, NULL, 0);
+	dprc_cleanup(mc_dev);
+out_nc_unreg:
+	bus_unregister_notifier(&fsl_mc_bus_type, &vdev->nb);
+	vdev->nb.notifier_call = NULL;
+
+	return ret;
+}
+
+static int vfio_fsl_mc_probe(struct fsl_mc_device *mc_dev)
+{
+	struct iommu_group *group;
+	struct vfio_fsl_mc_device *vdev;
+	struct device *dev = &mc_dev->dev;
+	int ret;
+
+	group = vfio_iommu_group_get(dev);
+	if (!group) {
+		dev_err(dev, "VFIO_FSL_MC: No IOMMU group\n");
+		return -EINVAL;
+	}
+
+	vdev = devm_kzalloc(dev, sizeof(*vdev), GFP_KERNEL);
+	if (!vdev) {
+		ret = -ENOMEM;
+		goto out_group_put;
+	}
+
+	vdev->mc_dev = mc_dev;
+
+	ret = vfio_add_group_dev(dev, &vfio_fsl_mc_ops, vdev);
+	if (ret) {
+		dev_err(dev, "VFIO_FSL_MC: Failed to add to vfio group\n");
+		goto out_group_put;
+	}
+
+	ret = vfio_fsl_mc_reflck_attach(vdev);
+	if (ret)
+		goto out_group_dev;
+
+	ret = vfio_fsl_mc_init_device(vdev);
+	if (ret)
+		goto out_reflck;
+
+	mutex_init(&vdev->igate);
+
+	return 0;
+
+out_reflck:
+	vfio_fsl_mc_reflck_put(vdev->reflck);
+out_group_dev:
+	vfio_del_group_dev(dev);
+out_group_put:
+	vfio_iommu_group_put(group, dev);
+	return ret;
+}
+
+static int vfio_fsl_mc_remove(struct fsl_mc_device *mc_dev)
+{
+	struct vfio_fsl_mc_device *vdev;
+	struct device *dev = &mc_dev->dev;
+
+	vdev = vfio_del_group_dev(dev);
+	if (!vdev)
+		return -EINVAL;
+
+	mutex_destroy(&vdev->igate);
+
+	vfio_fsl_mc_reflck_put(vdev->reflck);
+
+	if (is_fsl_mc_bus_dprc(mc_dev)) {
+		dprc_remove_devices(mc_dev, NULL, 0);
+		dprc_cleanup(mc_dev);
+	}
+
+	if (vdev->nb.notifier_call)
+		bus_unregister_notifier(&fsl_mc_bus_type, &vdev->nb);
+
+	vfio_iommu_group_put(mc_dev->dev.iommu_group, dev);
+
+	return 0;
+}
+
+static struct fsl_mc_driver vfio_fsl_mc_driver = {
+	.probe		= vfio_fsl_mc_probe,
+	.remove		= vfio_fsl_mc_remove,
+	.driver	= {
+		.name	= "vfio-fsl-mc",
+		.owner	= THIS_MODULE,
+	},
+};
+
+static int __init vfio_fsl_mc_driver_init(void)
+{
+	return fsl_mc_driver_register(&vfio_fsl_mc_driver);
+}
+
+static void __exit vfio_fsl_mc_driver_exit(void)
+{
+	fsl_mc_driver_unregister(&vfio_fsl_mc_driver);
+}
+
+module_init(vfio_fsl_mc_driver_init);
+module_exit(vfio_fsl_mc_driver_exit);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("VFIO for FSL-MC devices - User Level meta-driver");
diff --git a/drivers/vfio/fsl-mc/vfio_fsl_mc_intr.c b/drivers/vfio/fsl-mc/vfio_fsl_mc_intr.c
new file mode 100644
index 0000000..c80dceb
--- /dev/null
+++ b/drivers/vfio/fsl-mc/vfio_fsl_mc_intr.c
@@ -0,0 +1,194 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause)
+/*
+ * Copyright 2013-2016 Freescale Semiconductor Inc.
+ * Copyright 2019 NXP
+ */
+
+#include <linux/vfio.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/eventfd.h>
+#include <linux/msi.h>
+
+#include "linux/fsl/mc.h"
+#include "vfio_fsl_mc_private.h"
+
+int vfio_fsl_mc_irqs_allocate(struct vfio_fsl_mc_device *vdev)
+{
+	struct fsl_mc_device *mc_dev = vdev->mc_dev;
+	struct vfio_fsl_mc_irq *mc_irq;
+	int irq_count;
+	int ret, i;
+
+	/* Device does not support any interrupt */
+	if (mc_dev->obj_desc.irq_count == 0)
+		return 0;
+
+	/* interrupts were already allocated for this device */
+	if (vdev->mc_irqs)
+		return 0;
+
+	irq_count = mc_dev->obj_desc.irq_count;
+
+	mc_irq = kcalloc(irq_count, sizeof(*mc_irq), GFP_KERNEL);
+	if (!mc_irq)
+		return -ENOMEM;
+
+	/* Allocate IRQs */
+	ret = fsl_mc_allocate_irqs(mc_dev);
+	if (ret) {
+		kfree(mc_irq);
+		return ret;
+	}
+
+	for (i = 0; i < irq_count; i++) {
+		mc_irq[i].count = 1;
+		mc_irq[i].flags = VFIO_IRQ_INFO_EVENTFD;
+	}
+
+	vdev->mc_irqs = mc_irq;
+
+	return 0;
+}
+
+static irqreturn_t vfio_fsl_mc_irq_handler(int irq_num, void *arg)
+{
+	struct vfio_fsl_mc_irq *mc_irq = (struct vfio_fsl_mc_irq *)arg;
+
+	eventfd_signal(mc_irq->trigger, 1);
+	return IRQ_HANDLED;
+}
+
+static int vfio_set_trigger(struct vfio_fsl_mc_device *vdev,
+						   int index, int fd)
+{
+	struct vfio_fsl_mc_irq *irq = &vdev->mc_irqs[index];
+	struct eventfd_ctx *trigger;
+	int hwirq;
+	int ret;
+
+	hwirq = vdev->mc_dev->irqs[index]->msi_desc->irq;
+	if (irq->trigger) {
+		free_irq(hwirq, irq);
+		kfree(irq->name);
+		eventfd_ctx_put(irq->trigger);
+		irq->trigger = NULL;
+	}
+
+	if (fd < 0) /* Disable only */
+		return 0;
+
+	irq->name = kasprintf(GFP_KERNEL, "vfio-irq[%d](%s)",
+			    hwirq, dev_name(&vdev->mc_dev->dev));
+	if (!irq->name)
+		return -ENOMEM;
+
+	trigger = eventfd_ctx_fdget(fd);
+	if (IS_ERR(trigger)) {
+		kfree(irq->name);
+		return PTR_ERR(trigger);
+	}
+
+	irq->trigger = trigger;
+
+	ret = request_irq(hwirq, vfio_fsl_mc_irq_handler, 0,
+		  irq->name, irq);
+	if (ret) {
+		kfree(irq->name);
+		eventfd_ctx_put(trigger);
+		irq->trigger = NULL;
+		return ret;
+	}
+
+	return 0;
+}
+
+static int vfio_fsl_mc_set_irq_trigger(struct vfio_fsl_mc_device *vdev,
+				       unsigned int index, unsigned int start,
+				       unsigned int count, u32 flags,
+				       void *data)
+{
+	struct fsl_mc_device *mc_dev = vdev->mc_dev;
+	int ret, hwirq;
+	struct vfio_fsl_mc_irq *irq;
+	struct device *cont_dev = fsl_mc_cont_dev(&mc_dev->dev);
+	struct fsl_mc_device *mc_cont = to_fsl_mc_device(cont_dev);
+
+	if (!count && (flags & VFIO_IRQ_SET_DATA_NONE))
+		return vfio_set_trigger(vdev, index, -1);
+
+	if (start != 0 || count != 1)
+		return -EINVAL;
+
+	mutex_lock(&vdev->reflck->lock);
+	ret = fsl_mc_populate_irq_pool(mc_cont,
+			FSL_MC_IRQ_POOL_MAX_TOTAL_IRQS);
+	if (ret)
+		goto unlock;
+
+	ret = vfio_fsl_mc_irqs_allocate(vdev);
+	if (ret)
+		goto unlock;
+	mutex_unlock(&vdev->reflck->lock);
+
+	if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
+		s32 fd = *(s32 *)data;
+
+		return vfio_set_trigger(vdev, index, fd);
+	}
+
+	hwirq = vdev->mc_dev->irqs[index]->msi_desc->irq;
+
+	irq = &vdev->mc_irqs[index];
+
+	if (flags & VFIO_IRQ_SET_DATA_NONE) {
+		vfio_fsl_mc_irq_handler(hwirq, irq);
+
+	} else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
+		u8 trigger = *(u8 *)data;
+
+		if (trigger)
+			vfio_fsl_mc_irq_handler(hwirq, irq);
+	}
+
+	return 0;
+
+unlock:
+	mutex_unlock(&vdev->reflck->lock);
+	return ret;
+
+}
+
+int vfio_fsl_mc_set_irqs_ioctl(struct vfio_fsl_mc_device *vdev,
+			       u32 flags, unsigned int index,
+			       unsigned int start, unsigned int count,
+			       void *data)
+{
+	if (flags & VFIO_IRQ_SET_ACTION_TRIGGER)
+		return  vfio_fsl_mc_set_irq_trigger(vdev, index, start,
+			  count, flags, data);
+	else
+		return -EINVAL;
+}
+
+/* Free All IRQs for the given MC object */
+void vfio_fsl_mc_irqs_cleanup(struct vfio_fsl_mc_device *vdev)
+{
+	struct fsl_mc_device *mc_dev = vdev->mc_dev;
+	int irq_count = mc_dev->obj_desc.irq_count;
+	int i;
+
+	/*
+	 * Device does not support any interrupt or the interrupts
+	 * were not configured
+	 */
+	if (!vdev->mc_irqs)
+		return;
+
+	for (i = 0; i < irq_count; i++)
+		vfio_set_trigger(vdev, i, -1);
+
+	fsl_mc_free_irqs(mc_dev);
+	kfree(vdev->mc_irqs);
+	vdev->mc_irqs = NULL;
+}
diff --git a/drivers/vfio/fsl-mc/vfio_fsl_mc_private.h b/drivers/vfio/fsl-mc/vfio_fsl_mc_private.h
new file mode 100644
index 0000000..a97ee69
--- /dev/null
+++ b/drivers/vfio/fsl-mc/vfio_fsl_mc_private.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
+/*
+ * Copyright 2013-2016 Freescale Semiconductor Inc.
+ * Copyright 2016,2019-2020 NXP
+ */
+
+#ifndef VFIO_FSL_MC_PRIVATE_H
+#define VFIO_FSL_MC_PRIVATE_H
+
+#define VFIO_FSL_MC_OFFSET_SHIFT    40
+#define VFIO_FSL_MC_OFFSET_MASK (((u64)(1) << VFIO_FSL_MC_OFFSET_SHIFT) - 1)
+
+#define VFIO_FSL_MC_OFFSET_TO_INDEX(off) ((off) >> VFIO_FSL_MC_OFFSET_SHIFT)
+
+#define VFIO_FSL_MC_INDEX_TO_OFFSET(index)	\
+	((u64)(index) << VFIO_FSL_MC_OFFSET_SHIFT)
+
+struct vfio_fsl_mc_irq {
+	u32         flags;
+	u32         count;
+	struct eventfd_ctx  *trigger;
+	char            *name;
+};
+
+struct vfio_fsl_mc_reflck {
+	struct kref		kref;
+	struct mutex		lock;
+};
+
+struct vfio_fsl_mc_region {
+	u32			flags;
+	u32			type;
+	u64			addr;
+	resource_size_t		size;
+	void __iomem		*ioaddr;
+};
+
+struct vfio_fsl_mc_device {
+	struct fsl_mc_device		*mc_dev;
+	struct notifier_block        nb;
+	int				refcnt;
+	struct vfio_fsl_mc_region	*regions;
+	struct vfio_fsl_mc_reflck   *reflck;
+	struct mutex         igate;
+	struct vfio_fsl_mc_irq      *mc_irqs;
+};
+
+extern int vfio_fsl_mc_set_irqs_ioctl(struct vfio_fsl_mc_device *vdev,
+			       u32 flags, unsigned int index,
+			       unsigned int start, unsigned int count,
+			       void *data);
+
+void vfio_fsl_mc_irqs_cleanup(struct vfio_fsl_mc_device *vdev);
+
+#endif /* VFIO_FSL_MC_PRIVATE_H */
diff --git a/drivers/vfio/pci/Kconfig b/drivers/vfio/pci/Kconfig
index ac3c1dd..40a2233 100644
--- a/drivers/vfio/pci/Kconfig
+++ b/drivers/vfio/pci/Kconfig
@@ -45,3 +45,15 @@
 	depends on VFIO_PCI && PPC_POWERNV
 	help
 	  VFIO PCI support for P9 Witherspoon machine with NVIDIA V100 GPUs
+
+config VFIO_PCI_ZDEV
+	bool "VFIO PCI ZPCI device CLP support"
+	depends on VFIO_PCI && S390
+	default y
+	help
+	  Enabling this option exposes VFIO capabilities containing hardware
+	  configuration for zPCI devices. This enables userspace (e.g. QEMU)
+	  to supply proper configuration values instead of hard-coded defaults
+	  for zPCI devices passed through via VFIO on s390.
+
+	  Say Y here.
diff --git a/drivers/vfio/pci/Makefile b/drivers/vfio/pci/Makefile
index f027f8a..781e080 100644
--- a/drivers/vfio/pci/Makefile
+++ b/drivers/vfio/pci/Makefile
@@ -3,5 +3,6 @@
 vfio-pci-y := vfio_pci.o vfio_pci_intrs.o vfio_pci_rdwr.o vfio_pci_config.o
 vfio-pci-$(CONFIG_VFIO_PCI_IGD) += vfio_pci_igd.o
 vfio-pci-$(CONFIG_VFIO_PCI_NVLINK2) += vfio_pci_nvlink2.o
+vfio-pci-$(CONFIG_VFIO_PCI_ZDEV) += vfio_pci_zdev.o
 
 obj-$(CONFIG_VFIO_PCI) += vfio-pci.o
diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index b0f4b92..fbd2b34 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -807,15 +807,25 @@ static long vfio_pci_ioctl(void *device_data,
 
 	if (cmd == VFIO_DEVICE_GET_INFO) {
 		struct vfio_device_info info;
+		struct vfio_info_cap caps = { .buf = NULL, .size = 0 };
+		unsigned long capsz;
 
 		minsz = offsetofend(struct vfio_device_info, num_irqs);
 
+		/* For backward compatibility, cannot require this */
+		capsz = offsetofend(struct vfio_iommu_type1_info, cap_offset);
+
 		if (copy_from_user(&info, (void __user *)arg, minsz))
 			return -EFAULT;
 
 		if (info.argsz < minsz)
 			return -EINVAL;
 
+		if (info.argsz >= capsz) {
+			minsz = capsz;
+			info.cap_offset = 0;
+		}
+
 		info.flags = VFIO_DEVICE_FLAGS_PCI;
 
 		if (vdev->reset_works)
@@ -824,6 +834,33 @@ static long vfio_pci_ioctl(void *device_data,
 		info.num_regions = VFIO_PCI_NUM_REGIONS + vdev->num_regions;
 		info.num_irqs = VFIO_PCI_NUM_IRQS;
 
+		if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV)) {
+			int ret = vfio_pci_info_zdev_add_caps(vdev, &caps);
+
+			if (ret && ret != -ENODEV) {
+				pci_warn(vdev->pdev, "Failed to setup zPCI info capabilities\n");
+				return ret;
+			}
+		}
+
+		if (caps.size) {
+			info.flags |= VFIO_DEVICE_FLAGS_CAPS;
+			if (info.argsz < sizeof(info) + caps.size) {
+				info.argsz = sizeof(info) + caps.size;
+			} else {
+				vfio_info_cap_shift(&caps, sizeof(info));
+				if (copy_to_user((void __user *)arg +
+						  sizeof(info), caps.buf,
+						  caps.size)) {
+					kfree(caps.buf);
+					return -EFAULT;
+				}
+				info.cap_offset = sizeof(info);
+			}
+
+			kfree(caps.buf);
+		}
+
 		return copy_to_user((void __user *)arg, &info, minsz) ?
 			-EFAULT : 0;
 
@@ -1860,7 +1897,6 @@ static const struct vfio_device_ops vfio_pci_ops = {
 
 static int vfio_pci_reflck_attach(struct vfio_pci_device *vdev);
 static void vfio_pci_reflck_put(struct vfio_pci_reflck *reflck);
-static struct pci_driver vfio_pci_driver;
 
 static int vfio_pci_bus_notifier(struct notifier_block *nb,
 				 unsigned long action, void *data)
diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c
index d98843f..a402ade 100644
--- a/drivers/vfio/pci/vfio_pci_config.c
+++ b/drivers/vfio/pci/vfio_pci_config.c
@@ -406,7 +406,7 @@ bool __vfio_pci_memory_enabled(struct vfio_pci_device *vdev)
 	 * PF SR-IOV capability, there's therefore no need to trigger
 	 * faults based on the virtual value.
 	 */
-	return pdev->is_virtfn || (cmd & PCI_COMMAND_MEMORY);
+	return pdev->no_command_memory || (cmd & PCI_COMMAND_MEMORY);
 }
 
 /*
@@ -467,6 +467,9 @@ static void vfio_bar_fixup(struct vfio_pci_device *vdev)
 	__le32 *vbar;
 	u64 mask;
 
+	if (!vdev->bardirty)
+		return;
+
 	vbar = (__le32 *)&vdev->vconfig[PCI_BASE_ADDRESS_0];
 
 	for (i = 0; i < PCI_STD_NUM_BARS; i++, vbar++) {
@@ -520,8 +523,8 @@ static int vfio_basic_config_read(struct vfio_pci_device *vdev, int pos,
 
 	count = vfio_default_config_read(vdev, pos, count, perm, offset, val);
 
-	/* Mask in virtual memory enable for SR-IOV devices */
-	if (offset == PCI_COMMAND && vdev->pdev->is_virtfn) {
+	/* Mask in virtual memory enable */
+	if (offset == PCI_COMMAND && vdev->pdev->no_command_memory) {
 		u16 cmd = le16_to_cpu(*(__le16 *)&vdev->vconfig[PCI_COMMAND]);
 		u32 tmp_val = le32_to_cpu(*val);
 
@@ -589,9 +592,11 @@ static int vfio_basic_config_write(struct vfio_pci_device *vdev, int pos,
 		 * shows it disabled (phys_mem/io, then the device has
 		 * undergone some kind of backdoor reset and needs to be
 		 * restored before we allow it to enable the bars.
-		 * SR-IOV devices will trigger this, but we catch them later
+		 * SR-IOV devices will trigger this - for mem enable let's
+		 * catch this now and for io enable it will be caught later
 		 */
-		if ((new_mem && virt_mem && !phys_mem) ||
+		if ((new_mem && virt_mem && !phys_mem &&
+		     !pdev->no_command_memory) ||
 		    (new_io && virt_io && !phys_io) ||
 		    vfio_need_bar_restore(vdev))
 			vfio_bar_restore(vdev);
@@ -1734,12 +1739,14 @@ int vfio_config_init(struct vfio_pci_device *vdev)
 				 vconfig[PCI_INTERRUPT_PIN]);
 
 		vconfig[PCI_INTERRUPT_PIN] = 0; /* Gratuitous for good VFs */
-
+	}
+	if (pdev->no_command_memory) {
 		/*
-		 * VFs do no implement the memory enable bit of the COMMAND
-		 * register therefore we'll not have it set in our initial
-		 * copy of config space after pci_enable_device().  For
-		 * consistency with PFs, set the virtual enable bit here.
+		 * VFs and devices that set pdev->no_command_memory do not
+		 * implement the memory enable bit of the COMMAND register
+		 * therefore we'll not have it set in our initial copy of
+		 * config space after pci_enable_device().  For consistency
+		 * with PFs, set the virtual enable bit here.
 		 */
 		*(__le16 *)&vconfig[PCI_COMMAND] |=
 					cpu_to_le16(PCI_COMMAND_MEMORY);
diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c
index 1d9fb25..869dce5 100644
--- a/drivers/vfio/pci/vfio_pci_intrs.c
+++ b/drivers/vfio/pci/vfio_pci_intrs.c
@@ -352,11 +352,13 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_device *vdev,
 	vdev->ctx[vector].producer.token = trigger;
 	vdev->ctx[vector].producer.irq = irq;
 	ret = irq_bypass_register_producer(&vdev->ctx[vector].producer);
-	if (unlikely(ret))
+	if (unlikely(ret)) {
 		dev_info(&pdev->dev,
 		"irq bypass producer (token %p) registration fails: %d\n",
 		vdev->ctx[vector].producer.token, ret);
 
+		vdev->ctx[vector].producer.token = NULL;
+	}
 	vdev->ctx[vector].trigger = trigger;
 
 	return 0;
diff --git a/drivers/vfio/pci/vfio_pci_private.h b/drivers/vfio/pci/vfio_pci_private.h
index 61ca8ab..5c90e56 100644
--- a/drivers/vfio/pci/vfio_pci_private.h
+++ b/drivers/vfio/pci/vfio_pci_private.h
@@ -213,4 +213,16 @@ static inline int vfio_pci_ibm_npu2_init(struct vfio_pci_device *vdev)
 	return -ENODEV;
 }
 #endif
+
+#ifdef CONFIG_VFIO_PCI_ZDEV
+extern int vfio_pci_info_zdev_add_caps(struct vfio_pci_device *vdev,
+				       struct vfio_info_cap *caps);
+#else
+static inline int vfio_pci_info_zdev_add_caps(struct vfio_pci_device *vdev,
+					      struct vfio_info_cap *caps)
+{
+	return -ENODEV;
+}
+#endif
+
 #endif /* VFIO_PCI_PRIVATE_H */
diff --git a/drivers/vfio/pci/vfio_pci_zdev.c b/drivers/vfio/pci/vfio_pci_zdev.c
new file mode 100644
index 0000000..2296856
--- /dev/null
+++ b/drivers/vfio/pci/vfio_pci_zdev.c
@@ -0,0 +1,143 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * VFIO ZPCI devices support
+ *
+ * Copyright (C) IBM Corp. 2020.  All rights reserved.
+ *	Author(s): Pierre Morel <pmorel@linux.ibm.com>
+ *                 Matthew Rosato <mjrosato@linux.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+#include <linux/io.h>
+#include <linux/pci.h>
+#include <linux/uaccess.h>
+#include <linux/vfio.h>
+#include <linux/vfio_zdev.h>
+#include <asm/pci_clp.h>
+#include <asm/pci_io.h>
+
+#include "vfio_pci_private.h"
+
+/*
+ * Add the Base PCI Function information to the device info region.
+ */
+static int zpci_base_cap(struct zpci_dev *zdev, struct vfio_pci_device *vdev,
+			 struct vfio_info_cap *caps)
+{
+	struct vfio_device_info_cap_zpci_base cap = {
+		.header.id = VFIO_DEVICE_INFO_CAP_ZPCI_BASE,
+		.header.version = 1,
+		.start_dma = zdev->start_dma,
+		.end_dma = zdev->end_dma,
+		.pchid = zdev->pchid,
+		.vfn = zdev->vfn,
+		.fmb_length = zdev->fmb_length,
+		.pft = zdev->pft,
+		.gid = zdev->pfgid
+	};
+
+	return vfio_info_add_capability(caps, &cap.header, sizeof(cap));
+}
+
+/*
+ * Add the Base PCI Function Group information to the device info region.
+ */
+static int zpci_group_cap(struct zpci_dev *zdev, struct vfio_pci_device *vdev,
+			  struct vfio_info_cap *caps)
+{
+	struct vfio_device_info_cap_zpci_group cap = {
+		.header.id = VFIO_DEVICE_INFO_CAP_ZPCI_GROUP,
+		.header.version = 1,
+		.dasm = zdev->dma_mask,
+		.msi_addr = zdev->msi_addr,
+		.flags = VFIO_DEVICE_INFO_ZPCI_FLAG_REFRESH,
+		.mui = zdev->fmb_update,
+		.noi = zdev->max_msi,
+		.maxstbl = ZPCI_MAX_WRITE_SIZE,
+		.version = zdev->version
+	};
+
+	return vfio_info_add_capability(caps, &cap.header, sizeof(cap));
+}
+
+/*
+ * Add the device utility string to the device info region.
+ */
+static int zpci_util_cap(struct zpci_dev *zdev, struct vfio_pci_device *vdev,
+			 struct vfio_info_cap *caps)
+{
+	struct vfio_device_info_cap_zpci_util *cap;
+	int cap_size = sizeof(*cap) + CLP_UTIL_STR_LEN;
+	int ret;
+
+	cap = kmalloc(cap_size, GFP_KERNEL);
+
+	cap->header.id = VFIO_DEVICE_INFO_CAP_ZPCI_UTIL;
+	cap->header.version = 1;
+	cap->size = CLP_UTIL_STR_LEN;
+	memcpy(cap->util_str, zdev->util_str, cap->size);
+
+	ret = vfio_info_add_capability(caps, &cap->header, cap_size);
+
+	kfree(cap);
+
+	return ret;
+}
+
+/*
+ * Add the function path string to the device info region.
+ */
+static int zpci_pfip_cap(struct zpci_dev *zdev, struct vfio_pci_device *vdev,
+			 struct vfio_info_cap *caps)
+{
+	struct vfio_device_info_cap_zpci_pfip *cap;
+	int cap_size = sizeof(*cap) + CLP_PFIP_NR_SEGMENTS;
+	int ret;
+
+	cap = kmalloc(cap_size, GFP_KERNEL);
+
+	cap->header.id = VFIO_DEVICE_INFO_CAP_ZPCI_PFIP;
+	cap->header.version = 1;
+	cap->size = CLP_PFIP_NR_SEGMENTS;
+	memcpy(cap->pfip, zdev->pfip, cap->size);
+
+	ret = vfio_info_add_capability(caps, &cap->header, cap_size);
+
+	kfree(cap);
+
+	return ret;
+}
+
+/*
+ * Add all supported capabilities to the VFIO_DEVICE_GET_INFO capability chain.
+ */
+int vfio_pci_info_zdev_add_caps(struct vfio_pci_device *vdev,
+				struct vfio_info_cap *caps)
+{
+	struct zpci_dev *zdev = to_zpci(vdev->pdev);
+	int ret;
+
+	if (!zdev)
+		return -ENODEV;
+
+	ret = zpci_base_cap(zdev, vdev, caps);
+	if (ret)
+		return ret;
+
+	ret = zpci_group_cap(zdev, vdev, caps);
+	if (ret)
+		return ret;
+
+	if (zdev->util_str_avail) {
+		ret = zpci_util_cap(zdev, vdev, caps);
+		if (ret)
+			return ret;
+	}
+
+	ret = zpci_pfip_cap(zdev, vdev, caps);
+
+	return ret;
+}
diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index 262ab0e..2151bc7 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -1949,8 +1949,10 @@ int vfio_pin_pages(struct device *dev, unsigned long *user_pfn, int npage,
 	if (!group)
 		return -ENODEV;
 
-	if (group->dev_counter > 1)
-		return -EINVAL;
+	if (group->dev_counter > 1) {
+		ret = -EINVAL;
+		goto err_pin_pages;
+	}
 
 	ret = vfio_group_add_container_user(group);
 	if (ret)
@@ -2051,6 +2053,9 @@ int vfio_group_pin_pages(struct vfio_group *group,
 	if (!group || !user_iova_pfn || !phys_pfn || !npage)
 		return -EINVAL;
 
+	if (group->dev_counter > 1)
+		return -EINVAL;
+
 	if (npage > VFIO_PIN_PAGES_MAX_ENTRIES)
 		return -E2BIG;
 
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index c255a66..bb2684c 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -693,7 +693,8 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data,
 
 		ret = vfio_add_to_pfn_list(dma, iova, phys_pfn[i]);
 		if (ret) {
-			vfio_unpin_page_external(dma, iova, do_accounting);
+			if (put_pfn(phys_pfn[i], dma->prot) && do_accounting)
+				vfio_lock_acct(dma, -1, true);
 			goto pin_unwind;
 		}
 
@@ -2609,6 +2610,20 @@ static int vfio_iommu_migration_build_caps(struct vfio_iommu *iommu,
 	return vfio_info_add_capability(caps, &cap_mig.header, sizeof(cap_mig));
 }
 
+static int vfio_iommu_dma_avail_build_caps(struct vfio_iommu *iommu,
+					   struct vfio_info_cap *caps)
+{
+	struct vfio_iommu_type1_info_dma_avail cap_dma_avail;
+
+	cap_dma_avail.header.id = VFIO_IOMMU_TYPE1_INFO_DMA_AVAIL;
+	cap_dma_avail.header.version = 1;
+
+	cap_dma_avail.avail = iommu->dma_avail;
+
+	return vfio_info_add_capability(caps, &cap_dma_avail.header,
+					sizeof(cap_dma_avail));
+}
+
 static int vfio_iommu_type1_get_info(struct vfio_iommu *iommu,
 				     unsigned long arg)
 {
@@ -2642,6 +2657,9 @@ static int vfio_iommu_type1_get_info(struct vfio_iommu *iommu,
 	ret = vfio_iommu_migration_build_caps(iommu, &caps);
 
 	if (!ret)
+		ret = vfio_iommu_dma_avail_build_caps(iommu, &caps);
+
+	if (!ret)
 		ret = vfio_iommu_iova_build_caps(iommu, &caps);
 
 	mutex_unlock(&iommu->lock);
@@ -2933,7 +2951,8 @@ static int vfio_iommu_type1_dma_rw_chunk(struct vfio_iommu *iommu,
 			 * size
 			 */
 			bitmap_set(dma->bitmap, offset >> pgshift,
-				   *copied >> pgshift);
+				   ((offset + *copied - 1) >> pgshift) -
+				   (offset >> pgshift) + 1);
 		}
 	} else
 		*copied = copy_from_user(data, (void __user *)vaddr,
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 32cd2b5..22207a7 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -446,6 +446,7 @@ struct pci_dev {
 	unsigned int	is_probed:1;		/* Device probing in progress */
 	unsigned int	link_active_reporting:1;/* Device capable of reporting link active */
 	unsigned int	no_vf_scan:1;		/* Don't scan for VFs after IOV enablement */
+	unsigned int	no_command_memory:1;	/* No PCI_COMMAND_MEMORY */
 	pci_dev_flags_t dev_flags;
 	atomic_t	enable_cnt;	/* pci_enable_device has been called */
 
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 9204705..2f313a2 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -201,8 +201,11 @@ struct vfio_device_info {
 #define VFIO_DEVICE_FLAGS_AMBA  (1 << 3)	/* vfio-amba device */
 #define VFIO_DEVICE_FLAGS_CCW	(1 << 4)	/* vfio-ccw device */
 #define VFIO_DEVICE_FLAGS_AP	(1 << 5)	/* vfio-ap device */
+#define VFIO_DEVICE_FLAGS_FSL_MC (1 << 6)	/* vfio-fsl-mc device */
+#define VFIO_DEVICE_FLAGS_CAPS	(1 << 7)	/* Info supports caps */
 	__u32	num_regions;	/* Max region index + 1 */
 	__u32	num_irqs;	/* Max IRQ index + 1 */
+	__u32   cap_offset;	/* Offset within info struct of first cap */
 };
 #define VFIO_DEVICE_GET_INFO		_IO(VFIO_TYPE, VFIO_BASE + 7)
 
@@ -218,6 +221,15 @@ struct vfio_device_info {
 #define VFIO_DEVICE_API_CCW_STRING		"vfio-ccw"
 #define VFIO_DEVICE_API_AP_STRING		"vfio-ap"
 
+/*
+ * The following capabilities are unique to s390 zPCI devices.  Their contents
+ * are further-defined in vfio_zdev.h
+ */
+#define VFIO_DEVICE_INFO_CAP_ZPCI_BASE		1
+#define VFIO_DEVICE_INFO_CAP_ZPCI_GROUP		2
+#define VFIO_DEVICE_INFO_CAP_ZPCI_UTIL		3
+#define VFIO_DEVICE_INFO_CAP_ZPCI_PFIP		4
+
 /**
  * VFIO_DEVICE_GET_REGION_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 8,
  *				       struct vfio_region_info)
@@ -462,7 +474,7 @@ struct vfio_region_gfx_edid {
  * 5. Resumed
  *                  |--------->|
  *
- * 0. Default state of VFIO device is _RUNNNG when the user application starts.
+ * 0. Default state of VFIO device is _RUNNING when the user application starts.
  * 1. During normal shutdown of the user application, the user application may
  *    optionally change the VFIO device state from _RUNNING to _STOP. This
  *    transition is optional. The vendor driver must support this transition but
@@ -1039,6 +1051,21 @@ struct vfio_iommu_type1_info_cap_migration {
 	__u64	max_dirty_bitmap_size;		/* in bytes */
 };
 
+/*
+ * The DMA available capability allows to report the current number of
+ * simultaneously outstanding DMA mappings that are allowed.
+ *
+ * The structure below defines version 1 of this capability.
+ *
+ * avail: specifies the current number of outstanding DMA mappings allowed.
+ */
+#define VFIO_IOMMU_TYPE1_INFO_DMA_AVAIL 3
+
+struct vfio_iommu_type1_info_dma_avail {
+	struct	vfio_info_cap_header header;
+	__u32	avail;
+};
+
 #define VFIO_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12)
 
 /**
diff --git a/include/uapi/linux/vfio_zdev.h b/include/uapi/linux/vfio_zdev.h
new file mode 100644
index 0000000..b430939
--- /dev/null
+++ b/include/uapi/linux/vfio_zdev.h
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * VFIO Region definitions for ZPCI devices
+ *
+ * Copyright IBM Corp. 2020
+ *
+ * Author(s): Pierre Morel <pmorel@linux.ibm.com>
+ *            Matthew Rosato <mjrosato@linux.ibm.com>
+ */
+
+#ifndef _VFIO_ZDEV_H_
+#define _VFIO_ZDEV_H_
+
+#include <linux/types.h>
+#include <linux/vfio.h>
+
+/**
+ * VFIO_DEVICE_INFO_CAP_ZPCI_BASE - Base PCI Function information
+ *
+ * This capability provides a set of descriptive information about the
+ * associated PCI function.
+ */
+struct vfio_device_info_cap_zpci_base {
+	struct vfio_info_cap_header header;
+	__u64 start_dma;	/* Start of available DMA addresses */
+	__u64 end_dma;		/* End of available DMA addresses */
+	__u16 pchid;		/* Physical Channel ID */
+	__u16 vfn;		/* Virtual function number */
+	__u16 fmb_length;	/* Measurement Block Length (in bytes) */
+	__u8 pft;		/* PCI Function Type */
+	__u8 gid;		/* PCI function group ID */
+};
+
+/**
+ * VFIO_DEVICE_INFO_CAP_ZPCI_GROUP - Base PCI Function Group information
+ *
+ * This capability provides a set of descriptive information about the group of
+ * PCI functions that the associated device belongs to.
+ */
+struct vfio_device_info_cap_zpci_group {
+	struct vfio_info_cap_header header;
+	__u64 dasm;		/* DMA Address space mask */
+	__u64 msi_addr;		/* MSI address */
+	__u64 flags;
+#define VFIO_DEVICE_INFO_ZPCI_FLAG_REFRESH 1 /* Program-specified TLB refresh */
+	__u16 mui;		/* Measurement Block Update Interval */
+	__u16 noi;		/* Maximum number of MSIs */
+	__u16 maxstbl;		/* Maximum Store Block Length */
+	__u8 version;		/* Supported PCI Version */
+};
+
+/**
+ * VFIO_DEVICE_INFO_CAP_ZPCI_UTIL - Utility String
+ *
+ * This capability provides the utility string for the associated device, which
+ * is a device identifier string made up of EBCDID characters.  'size' specifies
+ * the length of 'util_str'.
+ */
+struct vfio_device_info_cap_zpci_util {
+	struct vfio_info_cap_header header;
+	__u32 size;
+	__u8 util_str[];
+};
+
+/**
+ * VFIO_DEVICE_INFO_CAP_ZPCI_PFIP - PCI Function Path
+ *
+ * This capability provides the PCI function path string, which is an identifier
+ * that describes the internal hardware path of the device. 'size' specifies
+ * the length of 'pfip'.
+ */
+struct vfio_device_info_cap_zpci_pfip {
+	struct vfio_info_cap_header header;
+	__u32 size;
+	__u8 pfip[];
+};
+
+#endif