Merge branch 'linus/master' into rdma.git for-next

rdma.git merge resolution for the 4.19 merge window

Conflicts:
 drivers/infiniband/core/rdma_core.c
   - Use the rdma code and revise with the new spelling for
     atomic_fetch_add_unless
 drivers/nvme/host/rdma.c
   - Replace max_sge with max_send_sge in new blk code
 drivers/nvme/target/rdma.c
   - Use the blk code and revise to use NULL for ib_post_recv when
     appropriate
   - Replace max_sge with max_recv_sge in new blk code
 net/rds/ib_send.c
   - Use the net code and revise to use NULL for ib_post_recv when
     appropriate

Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
diff --git a/.mailmap b/.mailmap
index d96147e..2a6f685 100644
--- a/.mailmap
+++ b/.mailmap
@@ -31,6 +31,8 @@
 Arnd Bergmann <arnd@arndb.de>
 Axel Dyks <xl@xlsigned.net>
 Axel Lin <axel.lin@gmail.com>
+Bart Van Assche <bvanassche@acm.org> <bart.vanassche@wdc.com>
+Bart Van Assche <bvanassche@acm.org> <bart.vanassche@sandisk.com>
 Ben Gardner <bgardner@wabtec.com>
 Ben M Cahill <ben.m.cahill@intel.com>
 Björn Steinbrink <B.Steinbrink@gmx.de>
diff --git a/MAINTAINERS b/MAINTAINERS
index 3d08725..f72e524 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3541,7 +3541,6 @@
 
 CISCO VIC LOW LATENCY NIC DRIVER
 M:	Christian Benvenuti <benve@cisco.com>
-M:	Dave Goodell <dgoodell@cisco.com>
 S:	Supported
 F:	drivers/infiniband/hw/usnic/
 
@@ -7628,9 +7627,8 @@
 F:	drivers/firmware/iscsi_ibft*
 
 ISCSI EXTENSIONS FOR RDMA (ISER) INITIATOR
-M:	Or Gerlitz <ogerlitz@mellanox.com>
 M:	Sagi Grimberg <sagi@grimberg.me>
-M:	Roi Dayan <roid@mellanox.com>
+M:	Max Gurtovoy <maxg@mellanox.com>
 L:	linux-rdma@vger.kernel.org
 S:	Supported
 W:	http://www.openfabrics.org
@@ -12759,15 +12757,21 @@
 F:	drivers/scsi/sr*
 
 SCSI RDMA PROTOCOL (SRP) INITIATOR
-M:	Bart Van Assche <bart.vanassche@sandisk.com>
+M:	Bart Van Assche <bvanassche@acm.org>
 L:	linux-rdma@vger.kernel.org
 S:	Supported
-W:	http://www.openfabrics.org
 Q:	http://patchwork.kernel.org/project/linux-rdma/list/
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/dad/srp-initiator.git
 F:	drivers/infiniband/ulp/srp/
 F:	include/scsi/srp.h
 
+SCSI RDMA PROTOCOL (SRP) TARGET
+M:	Bart Van Assche <bvanassche@acm.org>
+L:	linux-rdma@vger.kernel.org
+L:	target-devel@vger.kernel.org
+S:	Supported
+Q:	http://patchwork.kernel.org/project/linux-rdma/list/
+F:	drivers/infiniband/ulp/srpt/
+
 SCSI SG DRIVER
 M:	Doug Gilbert <dgilbert@interlog.com>
 L:	linux-scsi@vger.kernel.org
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index b03af54..d160d2d 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -37,7 +37,7 @@
 
 config INFINIBAND_USER_ACCESS_UCM
 	bool "Userspace CM (UCM, DEPRECATED)"
-	depends on BROKEN
+	depends on BROKEN || COMPILE_TEST
 	depends on INFINIBAND_USER_ACCESS
 	help
 	  The UCM module has known security flaws, which no one is
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
index 6166770..867cee5 100644
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile
@@ -35,6 +35,7 @@
 
 ib_uverbs-y :=			uverbs_main.o uverbs_cmd.o uverbs_marshall.o \
 				rdma_core.o uverbs_std_types.o uverbs_ioctl.o \
-				uverbs_ioctl_merge.o uverbs_std_types_cq.o \
+				uverbs_std_types_cq.o \
 				uverbs_std_types_flow_action.o uverbs_std_types_dm.o \
-				uverbs_std_types_mr.o uverbs_std_types_counters.o
+				uverbs_std_types_mr.o uverbs_std_types_counters.o \
+				uverbs_uapi.o
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index 4f32c40..46b855a 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -188,7 +188,7 @@ static int ib_nl_ip_send_msg(struct rdma_dev_addr *dev_addr,
 	return -ENODATA;
 }
 
-int rdma_addr_size(struct sockaddr *addr)
+int rdma_addr_size(const struct sockaddr *addr)
 {
 	switch (addr->sa_family) {
 	case AF_INET:
@@ -315,19 +315,17 @@ static int dst_fetch_ha(const struct dst_entry *dst,
 	int ret = 0;
 
 	n = dst_neigh_lookup(dst, daddr);
+	if (!n)
+		return -ENODATA;
 
-	rcu_read_lock();
-	if (!n || !(n->nud_state & NUD_VALID)) {
-		if (n)
-			neigh_event_send(n, NULL);
+	if (!(n->nud_state & NUD_VALID)) {
+		neigh_event_send(n, NULL);
 		ret = -ENODATA;
 	} else {
 		rdma_copy_addr(dev_addr, dst->dev, n->ha);
 	}
-	rcu_read_unlock();
 
-	if (n)
-		neigh_release(n);
+	neigh_release(n);
 
 	return ret;
 }
@@ -587,7 +585,7 @@ static void process_one_req(struct work_struct *_work)
 	spin_unlock_bh(&lock);
 }
 
-int rdma_resolve_ip(struct sockaddr *src_addr, struct sockaddr *dst_addr,
+int rdma_resolve_ip(struct sockaddr *src_addr, const struct sockaddr *dst_addr,
 		    struct rdma_dev_addr *addr, int timeout_ms,
 		    void (*callback)(int status, struct sockaddr *src_addr,
 				     struct rdma_dev_addr *addr, void *context),
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index 81d66f5..0bee1f4 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -66,20 +66,28 @@ enum gid_attr_find_mask {
 	GID_ATTR_FIND_MASK_GID_TYPE	= 1UL << 3,
 };
 
-enum gid_table_entry_props {
-	GID_TABLE_ENTRY_INVALID		= 1UL << 0,
-	GID_TABLE_ENTRY_DEFAULT		= 1UL << 1,
+enum gid_table_entry_state {
+	GID_TABLE_ENTRY_INVALID		= 1,
+	GID_TABLE_ENTRY_VALID		= 2,
+	/*
+	 * Indicates that entry is pending to be removed, there may
+	 * be active users of this GID entry.
+	 * When last user of the GID entry releases reference to it,
+	 * GID entry is detached from the table.
+	 */
+	GID_TABLE_ENTRY_PENDING_DEL	= 3,
 };
 
 struct ib_gid_table_entry {
-	unsigned long	    props;
-	union ib_gid        gid;
-	struct ib_gid_attr  attr;
-	void		   *context;
+	struct kref			kref;
+	struct work_struct		del_work;
+	struct ib_gid_attr		attr;
+	void				*context;
+	enum gid_table_entry_state	state;
 };
 
 struct ib_gid_table {
-	int                  sz;
+	int				sz;
 	/* In RoCE, adding a GID to the table requires:
 	 * (a) Find if this GID is already exists.
 	 * (b) Find a free space.
@@ -91,13 +99,16 @@ struct ib_gid_table {
 	 *
 	 **/
 	/* Any writer to data_vec must hold this lock and the write side of
-	 * rwlock. readers must hold only rwlock. All writers must be in a
+	 * rwlock. Readers must hold only rwlock. All writers must be in a
 	 * sleepable context.
 	 */
-	struct mutex         lock;
-	/* rwlock protects data_vec[ix]->props. */
-	rwlock_t	     rwlock;
-	struct ib_gid_table_entry *data_vec;
+	struct mutex			lock;
+	/* rwlock protects data_vec[ix]->state and entry pointer.
+	 */
+	rwlock_t			rwlock;
+	struct ib_gid_table_entry	**data_vec;
+	/* bit field, each bit indicates the index of default GID */
+	u32				default_gid_indices;
 };
 
 static void dispatch_gid_change_event(struct ib_device *ib_dev, u8 port)
@@ -135,6 +146,19 @@ bool rdma_is_zero_gid(const union ib_gid *gid)
 }
 EXPORT_SYMBOL(rdma_is_zero_gid);
 
+/** is_gid_index_default - Check if a given index belongs to
+ * reserved default GIDs or not.
+ * @table:	GID table pointer
+ * @index:	Index to check in GID table
+ * Returns true if index is one of the reserved default GID index otherwise
+ * returns false.
+ */
+static bool is_gid_index_default(const struct ib_gid_table *table,
+				 unsigned int index)
+{
+	return index < 32 && (BIT(index) & table->default_gid_indices);
+}
+
 int ib_cache_gid_parse_type_str(const char *buf)
 {
 	unsigned int i;
@@ -164,26 +188,136 @@ static struct ib_gid_table *rdma_gid_table(struct ib_device *device, u8 port)
 	return device->cache.ports[port - rdma_start_port(device)].gid;
 }
 
-static void del_roce_gid(struct ib_device *device, u8 port_num,
-			 struct ib_gid_table *table, int ix)
+static bool is_gid_entry_free(const struct ib_gid_table_entry *entry)
 {
-	pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__,
-		 device->name, port_num, ix,
-		 table->data_vec[ix].gid.raw);
-
-	if (rdma_cap_roce_gid_table(device, port_num))
-		device->del_gid(&table->data_vec[ix].attr,
-				&table->data_vec[ix].context);
-	dev_put(table->data_vec[ix].attr.ndev);
+	return !entry;
 }
 
-static int add_roce_gid(struct ib_gid_table *table,
-			const union ib_gid *gid,
-			const struct ib_gid_attr *attr)
+static bool is_gid_entry_valid(const struct ib_gid_table_entry *entry)
+{
+	return entry && entry->state == GID_TABLE_ENTRY_VALID;
+}
+
+static void schedule_free_gid(struct kref *kref)
+{
+	struct ib_gid_table_entry *entry =
+			container_of(kref, struct ib_gid_table_entry, kref);
+
+	queue_work(ib_wq, &entry->del_work);
+}
+
+static void free_gid_entry_locked(struct ib_gid_table_entry *entry)
+{
+	struct ib_device *device = entry->attr.device;
+	u8 port_num = entry->attr.port_num;
+	struct ib_gid_table *table = rdma_gid_table(device, port_num);
+
+	pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__,
+		 device->name, port_num, entry->attr.index,
+		 entry->attr.gid.raw);
+
+	if (rdma_cap_roce_gid_table(device, port_num) &&
+	    entry->state != GID_TABLE_ENTRY_INVALID)
+		device->del_gid(&entry->attr, &entry->context);
+
+	write_lock_irq(&table->rwlock);
+
+	/*
+	 * The only way to avoid overwriting NULL in table is
+	 * by comparing if it is same entry in table or not!
+	 * If new entry in table is added by the time we free here,
+	 * don't overwrite the table entry.
+	 */
+	if (entry == table->data_vec[entry->attr.index])
+		table->data_vec[entry->attr.index] = NULL;
+	/* Now this index is ready to be allocated */
+	write_unlock_irq(&table->rwlock);
+
+	if (entry->attr.ndev)
+		dev_put(entry->attr.ndev);
+	kfree(entry);
+}
+
+static void free_gid_entry(struct kref *kref)
+{
+	struct ib_gid_table_entry *entry =
+			container_of(kref, struct ib_gid_table_entry, kref);
+
+	free_gid_entry_locked(entry);
+}
+
+/**
+ * free_gid_work - Release reference to the GID entry
+ * @work: Work structure to refer to GID entry which needs to be
+ * deleted.
+ *
+ * free_gid_work() frees the entry from the HCA's hardware table
+ * if provider supports it. It releases reference to netdevice.
+ */
+static void free_gid_work(struct work_struct *work)
+{
+	struct ib_gid_table_entry *entry =
+		container_of(work, struct ib_gid_table_entry, del_work);
+	struct ib_device *device = entry->attr.device;
+	u8 port_num = entry->attr.port_num;
+	struct ib_gid_table *table = rdma_gid_table(device, port_num);
+
+	mutex_lock(&table->lock);
+	free_gid_entry_locked(entry);
+	mutex_unlock(&table->lock);
+}
+
+static struct ib_gid_table_entry *
+alloc_gid_entry(const struct ib_gid_attr *attr)
 {
 	struct ib_gid_table_entry *entry;
-	int ix = attr->index;
-	int ret = 0;
+
+	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+	if (!entry)
+		return NULL;
+	kref_init(&entry->kref);
+	memcpy(&entry->attr, attr, sizeof(*attr));
+	if (entry->attr.ndev)
+		dev_hold(entry->attr.ndev);
+	INIT_WORK(&entry->del_work, free_gid_work);
+	entry->state = GID_TABLE_ENTRY_INVALID;
+	return entry;
+}
+
+static void store_gid_entry(struct ib_gid_table *table,
+			    struct ib_gid_table_entry *entry)
+{
+	entry->state = GID_TABLE_ENTRY_VALID;
+
+	pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__,
+		 entry->attr.device->name, entry->attr.port_num,
+		 entry->attr.index, entry->attr.gid.raw);
+
+	lockdep_assert_held(&table->lock);
+	write_lock_irq(&table->rwlock);
+	table->data_vec[entry->attr.index] = entry;
+	write_unlock_irq(&table->rwlock);
+}
+
+static void get_gid_entry(struct ib_gid_table_entry *entry)
+{
+	kref_get(&entry->kref);
+}
+
+static void put_gid_entry(struct ib_gid_table_entry *entry)
+{
+	kref_put(&entry->kref, schedule_free_gid);
+}
+
+static void put_gid_entry_locked(struct ib_gid_table_entry *entry)
+{
+	kref_put(&entry->kref, free_gid_entry);
+}
+
+static int add_roce_gid(struct ib_gid_table_entry *entry)
+{
+	const struct ib_gid_attr *attr = &entry->attr;
+	int ret;
 
 	if (!attr->ndev) {
 		pr_err("%s NULL netdev device=%s port=%d index=%d\n",
@@ -191,38 +325,22 @@ static int add_roce_gid(struct ib_gid_table *table,
 		       attr->index);
 		return -EINVAL;
 	}
-
-	entry = &table->data_vec[ix];
-	if ((entry->props & GID_TABLE_ENTRY_INVALID) == 0) {
-		WARN(1, "GID table corruption device=%s port=%d index=%d\n",
-		     attr->device->name, attr->port_num,
-		     attr->index);
-		return -EINVAL;
-	}
-
 	if (rdma_cap_roce_gid_table(attr->device, attr->port_num)) {
-		ret = attr->device->add_gid(gid, attr, &entry->context);
+		ret = attr->device->add_gid(attr, &entry->context);
 		if (ret) {
 			pr_err("%s GID add failed device=%s port=%d index=%d\n",
 			       __func__, attr->device->name, attr->port_num,
 			       attr->index);
-			goto add_err;
+			return ret;
 		}
 	}
-	dev_hold(attr->ndev);
-
-add_err:
-	if (!ret)
-		pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__,
-			 attr->device->name, attr->port_num, ix, gid->raw);
-	return ret;
+	return 0;
 }
 
 /**
  * add_modify_gid - Add or modify GID table entry
  *
  * @table:	GID table in which GID to be added or modified
- * @gid:	GID content
  * @attr:	Attributes of the GID
  *
  * Returns 0 on success or appropriate error code. It accepts zero
@@ -230,34 +348,42 @@ static int add_roce_gid(struct ib_gid_table *table,
  * GID. However such zero GIDs are not added to the cache.
  */
 static int add_modify_gid(struct ib_gid_table *table,
-			  const union ib_gid *gid,
 			  const struct ib_gid_attr *attr)
 {
-	int ret;
+	struct ib_gid_table_entry *entry;
+	int ret = 0;
+
+	/*
+	 * Invalidate any old entry in the table to make it safe to write to
+	 * this index.
+	 */
+	if (is_gid_entry_valid(table->data_vec[attr->index]))
+		put_gid_entry(table->data_vec[attr->index]);
+
+	/*
+	 * Some HCA's report multiple GID entries with only one valid GID, and
+	 * leave other unused entries as the zero GID. Convert zero GIDs to
+	 * empty table entries instead of storing them.
+	 */
+	if (rdma_is_zero_gid(&attr->gid))
+		return 0;
+
+	entry = alloc_gid_entry(attr);
+	if (!entry)
+		return -ENOMEM;
 
 	if (rdma_protocol_roce(attr->device, attr->port_num)) {
-		ret = add_roce_gid(table, gid, attr);
+		ret = add_roce_gid(entry);
 		if (ret)
-			return ret;
-	} else {
-		/*
-		 * Some HCA's report multiple GID entries with only one
-		 * valid GID, but remaining as zero GID.
-		 * So ignore such behavior for IB link layer and don't
-		 * fail the call, but don't add such entry to GID cache.
-		 */
-		if (rdma_is_zero_gid(gid))
-			return 0;
+			goto done;
 	}
 
-	lockdep_assert_held(&table->lock);
-	memcpy(&table->data_vec[attr->index].gid, gid, sizeof(*gid));
-	memcpy(&table->data_vec[attr->index].attr, attr, sizeof(*attr));
-
-	write_lock_irq(&table->rwlock);
-	table->data_vec[attr->index].props &= ~GID_TABLE_ENTRY_INVALID;
-	write_unlock_irq(&table->rwlock);
+	store_gid_entry(table, entry);
 	return 0;
+
+done:
+	put_gid_entry(entry);
+	return ret;
 }
 
 /**
@@ -272,16 +398,25 @@ static int add_modify_gid(struct ib_gid_table *table,
 static void del_gid(struct ib_device *ib_dev, u8 port,
 		    struct ib_gid_table *table, int ix)
 {
+	struct ib_gid_table_entry *entry;
+
 	lockdep_assert_held(&table->lock);
+
+	pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__,
+		 ib_dev->name, port, ix,
+		 table->data_vec[ix]->attr.gid.raw);
+
 	write_lock_irq(&table->rwlock);
-	table->data_vec[ix].props |= GID_TABLE_ENTRY_INVALID;
+	entry = table->data_vec[ix];
+	entry->state = GID_TABLE_ENTRY_PENDING_DEL;
+	/*
+	 * For non RoCE protocol, GID entry slot is ready to use.
+	 */
+	if (!rdma_protocol_roce(ib_dev, port))
+		table->data_vec[ix] = NULL;
 	write_unlock_irq(&table->rwlock);
 
-	if (rdma_protocol_roce(ib_dev, port))
-		del_roce_gid(ib_dev, port, table, ix);
-	memset(&table->data_vec[ix].gid, 0, sizeof(table->data_vec[ix].gid));
-	memset(&table->data_vec[ix].attr, 0, sizeof(table->data_vec[ix].attr));
-	table->data_vec[ix].context = NULL;
+	put_gid_entry_locked(entry);
 }
 
 /* rwlock should be read locked, or lock should be held */
@@ -294,8 +429,8 @@ static int find_gid(struct ib_gid_table *table, const union ib_gid *gid,
 	int empty = pempty ? -1 : 0;
 
 	while (i < table->sz && (found < 0 || empty < 0)) {
-		struct ib_gid_table_entry *data = &table->data_vec[i];
-		struct ib_gid_attr *attr = &data->attr;
+		struct ib_gid_table_entry *data = table->data_vec[i];
+		struct ib_gid_attr *attr;
 		int curr_index = i;
 
 		i++;
@@ -306,9 +441,9 @@ static int find_gid(struct ib_gid_table *table, const union ib_gid *gid,
 		 * so lookup free slot only if requested.
 		 */
 		if (pempty && empty < 0) {
-			if (data->props & GID_TABLE_ENTRY_INVALID &&
-			    (default_gid ==
-			     !!(data->props & GID_TABLE_ENTRY_DEFAULT))) {
+			if (is_gid_entry_free(data) &&
+			    default_gid ==
+				is_gid_index_default(table, curr_index)) {
 				/*
 				 * Found an invalid (free) entry; allocate it.
 				 * If default GID is requested, then our
@@ -323,22 +458,23 @@ static int find_gid(struct ib_gid_table *table, const union ib_gid *gid,
 
 		/*
 		 * Additionally find_gid() is used to find valid entry during
-		 * lookup operation, where validity needs to be checked. So
-		 * find the empty entry first to continue to search for a free
-		 * slot and ignore its INVALID flag.
+		 * lookup operation; so ignore the entries which are marked as
+		 * pending for removal and the entries which are marked as
+		 * invalid.
 		 */
-		if (data->props & GID_TABLE_ENTRY_INVALID)
+		if (!is_gid_entry_valid(data))
 			continue;
 
 		if (found >= 0)
 			continue;
 
+		attr = &data->attr;
 		if (mask & GID_ATTR_FIND_MASK_GID_TYPE &&
 		    attr->gid_type != val->gid_type)
 			continue;
 
 		if (mask & GID_ATTR_FIND_MASK_GID &&
-		    memcmp(gid, &data->gid, sizeof(*gid)))
+		    memcmp(gid, &data->attr.gid, sizeof(*gid)))
 			continue;
 
 		if (mask & GID_ATTR_FIND_MASK_NETDEV &&
@@ -346,8 +482,7 @@ static int find_gid(struct ib_gid_table *table, const union ib_gid *gid,
 			continue;
 
 		if (mask & GID_ATTR_FIND_MASK_DEFAULT &&
-		    !!(data->props & GID_TABLE_ENTRY_DEFAULT) !=
-		    default_gid)
+		    is_gid_index_default(table, curr_index) != default_gid)
 			continue;
 
 		found = curr_index;
@@ -396,7 +531,8 @@ static int __ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
 	attr->device = ib_dev;
 	attr->index = empty;
 	attr->port_num = port;
-	ret = add_modify_gid(table, gid, attr);
+	attr->gid = *gid;
+	ret = add_modify_gid(table, attr);
 	if (!ret)
 		dispatch_gid_change_event(ib_dev, port);
 
@@ -492,7 +628,8 @@ int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port,
 	mutex_lock(&table->lock);
 
 	for (ix = 0; ix < table->sz; ix++) {
-		if (table->data_vec[ix].attr.ndev == ndev) {
+		if (is_gid_entry_valid(table->data_vec[ix]) &&
+		    table->data_vec[ix]->attr.ndev == ndev) {
 			del_gid(ib_dev, port, table, ix);
 			deleted = true;
 		}
@@ -506,103 +643,37 @@ int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port,
 	return 0;
 }
 
-static int __ib_cache_gid_get(struct ib_device *ib_dev, u8 port, int index,
-			      union ib_gid *gid, struct ib_gid_attr *attr)
-{
-	struct ib_gid_table *table;
-
-	table = rdma_gid_table(ib_dev, port);
-
-	if (index < 0 || index >= table->sz)
-		return -EINVAL;
-
-	if (table->data_vec[index].props & GID_TABLE_ENTRY_INVALID)
-		return -EINVAL;
-
-	memcpy(gid, &table->data_vec[index].gid, sizeof(*gid));
-	if (attr) {
-		memcpy(attr, &table->data_vec[index].attr, sizeof(*attr));
-		if (attr->ndev)
-			dev_hold(attr->ndev);
-	}
-
-	return 0;
-}
-
-static int _ib_cache_gid_table_find(struct ib_device *ib_dev,
-				    const union ib_gid *gid,
-				    const struct ib_gid_attr *val,
-				    unsigned long mask,
-				    u8 *port, u16 *index)
-{
-	struct ib_gid_table *table;
-	u8 p;
-	int local_index;
-	unsigned long flags;
-
-	for (p = 0; p < ib_dev->phys_port_cnt; p++) {
-		table = ib_dev->cache.ports[p].gid;
-		read_lock_irqsave(&table->rwlock, flags);
-		local_index = find_gid(table, gid, val, false, mask, NULL);
-		if (local_index >= 0) {
-			if (index)
-				*index = local_index;
-			if (port)
-				*port = p + rdma_start_port(ib_dev);
-			read_unlock_irqrestore(&table->rwlock, flags);
-			return 0;
-		}
-		read_unlock_irqrestore(&table->rwlock, flags);
-	}
-
-	return -ENOENT;
-}
-
-static int ib_cache_gid_find(struct ib_device *ib_dev,
-			     const union ib_gid *gid,
-			     enum ib_gid_type gid_type,
-			     struct net_device *ndev, u8 *port,
-			     u16 *index)
-{
-	unsigned long mask = GID_ATTR_FIND_MASK_GID |
-			     GID_ATTR_FIND_MASK_GID_TYPE;
-	struct ib_gid_attr gid_attr_val = {.ndev = ndev, .gid_type = gid_type};
-
-	if (ndev)
-		mask |= GID_ATTR_FIND_MASK_NETDEV;
-
-	return _ib_cache_gid_table_find(ib_dev, gid, &gid_attr_val,
-					mask, port, index);
-}
-
 /**
- * ib_find_cached_gid_by_port - Returns the GID table index where a specified
- * GID value occurs. It searches for the specified GID value in the local
- * software cache.
+ * rdma_find_gid_by_port - Returns the GID entry attributes when it finds
+ * a valid GID entry for given search parameters. It searches for the specified
+ * GID value in the local software cache.
  * @device: The device to query.
  * @gid: The GID value to search for.
  * @gid_type: The GID type to search for.
  * @port_num: The port number of the device where the GID value should be
  *   searched.
- * @ndev: In RoCE, the net device of the device. Null means ignore.
- * @index: The index into the cached GID table where the GID was found. This
- *   parameter may be NULL.
+ * @ndev: In RoCE, the net device of the device. NULL means ignore.
+ *
+ * Returns sgid attributes if the GID is found with valid reference or
+ * returns ERR_PTR for the error.
+ * The caller must invoke rdma_put_gid_attr() to release the reference.
  */
-int ib_find_cached_gid_by_port(struct ib_device *ib_dev,
-			       const union ib_gid *gid,
-			       enum ib_gid_type gid_type,
-			       u8 port, struct net_device *ndev,
-			       u16 *index)
+const struct ib_gid_attr *
+rdma_find_gid_by_port(struct ib_device *ib_dev,
+		      const union ib_gid *gid,
+		      enum ib_gid_type gid_type,
+		      u8 port, struct net_device *ndev)
 {
 	int local_index;
 	struct ib_gid_table *table;
 	unsigned long mask = GID_ATTR_FIND_MASK_GID |
 			     GID_ATTR_FIND_MASK_GID_TYPE;
 	struct ib_gid_attr val = {.ndev = ndev, .gid_type = gid_type};
+	const struct ib_gid_attr *attr;
 	unsigned long flags;
 
 	if (!rdma_is_port_valid(ib_dev, port))
-		return -ENOENT;
+		return ERR_PTR(-ENOENT);
 
 	table = rdma_gid_table(ib_dev, port);
 
@@ -612,89 +683,73 @@ int ib_find_cached_gid_by_port(struct ib_device *ib_dev,
 	read_lock_irqsave(&table->rwlock, flags);
 	local_index = find_gid(table, gid, &val, false, mask, NULL);
 	if (local_index >= 0) {
-		if (index)
-			*index = local_index;
+		get_gid_entry(table->data_vec[local_index]);
+		attr = &table->data_vec[local_index]->attr;
 		read_unlock_irqrestore(&table->rwlock, flags);
-		return 0;
+		return attr;
 	}
 
 	read_unlock_irqrestore(&table->rwlock, flags);
-	return -ENOENT;
+	return ERR_PTR(-ENOENT);
 }
-EXPORT_SYMBOL(ib_find_cached_gid_by_port);
+EXPORT_SYMBOL(rdma_find_gid_by_port);
 
 /**
- * ib_cache_gid_find_by_filter - Returns the GID table index where a specified
- * GID value occurs
+ * rdma_find_gid_by_filter - Returns the GID table attribute where a
+ * specified GID value occurs
  * @device: The device to query.
  * @gid: The GID value to search for.
- * @port_num: The port number of the device where the GID value could be
+ * @port: The port number of the device where the GID value could be
  *   searched.
  * @filter: The filter function is executed on any matching GID in the table.
  *   If the filter function returns true, the corresponding index is returned,
  *   otherwise, we continue searching the GID table. It's guaranteed that
  *   while filter is executed, ndev field is valid and the structure won't
  *   change. filter is executed in an atomic context. filter must not be NULL.
- * @index: The index into the cached GID table where the GID was found. This
- *   parameter may be NULL.
  *
- * ib_cache_gid_find_by_filter() searches for the specified GID value
+ * rdma_find_gid_by_filter() searches for the specified GID value
  * of which the filter function returns true in the port's GID table.
- * This function is only supported on RoCE ports.
  *
  */
-static int ib_cache_gid_find_by_filter(struct ib_device *ib_dev,
-				       const union ib_gid *gid,
-				       u8 port,
-				       bool (*filter)(const union ib_gid *,
-						      const struct ib_gid_attr *,
-						      void *),
-				       void *context,
-				       u16 *index)
+const struct ib_gid_attr *rdma_find_gid_by_filter(
+	struct ib_device *ib_dev, const union ib_gid *gid, u8 port,
+	bool (*filter)(const union ib_gid *gid, const struct ib_gid_attr *,
+		       void *),
+	void *context)
 {
+	const struct ib_gid_attr *res = ERR_PTR(-ENOENT);
 	struct ib_gid_table *table;
-	unsigned int i;
 	unsigned long flags;
-	bool found = false;
+	unsigned int i;
 
-
-	if (!rdma_is_port_valid(ib_dev, port) ||
-	    !rdma_protocol_roce(ib_dev, port))
-		return -EPROTONOSUPPORT;
+	if (!rdma_is_port_valid(ib_dev, port))
+		return ERR_PTR(-EINVAL);
 
 	table = rdma_gid_table(ib_dev, port);
 
 	read_lock_irqsave(&table->rwlock, flags);
 	for (i = 0; i < table->sz; i++) {
-		struct ib_gid_attr attr;
+		struct ib_gid_table_entry *entry = table->data_vec[i];
 
-		if (table->data_vec[i].props & GID_TABLE_ENTRY_INVALID)
+		if (!is_gid_entry_valid(entry))
 			continue;
 
-		if (memcmp(gid, &table->data_vec[i].gid, sizeof(*gid)))
+		if (memcmp(gid, &entry->attr.gid, sizeof(*gid)))
 			continue;
 
-		memcpy(&attr, &table->data_vec[i].attr, sizeof(attr));
-
-		if (filter(gid, &attr, context)) {
-			found = true;
-			if (index)
-				*index = i;
+		if (filter(gid, &entry->attr, context)) {
+			get_gid_entry(entry);
+			res = &entry->attr;
 			break;
 		}
 	}
 	read_unlock_irqrestore(&table->rwlock, flags);
-
-	if (!found)
-		return -ENOENT;
-	return 0;
+	return res;
 }
 
 static struct ib_gid_table *alloc_gid_table(int sz)
 {
-	struct ib_gid_table *table =
-		kzalloc(sizeof(struct ib_gid_table), GFP_KERNEL);
-	int i;
+	struct ib_gid_table *table = kzalloc(sizeof(*table), GFP_KERNEL);
 
 	if (!table)
 		return NULL;
@@ -707,12 +762,6 @@ static struct ib_gid_table *alloc_gid_table(int sz)
 
 	table->sz = sz;
 	rwlock_init(&table->rwlock);
-
-	/* Mark all entries as invalid so that allocator can allocate
-	 * one of the invalid (free) entry.
-	 */
-	for (i = 0; i < sz; i++)
-		table->data_vec[i].props |= GID_TABLE_ENTRY_INVALID;
 	return table;
 
 err_free_table:
@@ -720,12 +769,30 @@ static struct ib_gid_table *alloc_gid_table(int sz)
 	return NULL;
 }
 
-static void release_gid_table(struct ib_gid_table *table)
+static void release_gid_table(struct ib_device *device, u8 port,
+			      struct ib_gid_table *table)
 {
-	if (table) {
-		kfree(table->data_vec);
-		kfree(table);
+	bool leak = false;
+	int i;
+
+	if (!table)
+		return;
+
+	for (i = 0; i < table->sz; i++) {
+		if (is_gid_entry_free(table->data_vec[i]))
+			continue;
+		if (kref_read(&table->data_vec[i]->kref) > 1) {
+			pr_err("GID entry ref leak for %s (index %d) ref=%d\n",
+			       device->name, i,
+			       kref_read(&table->data_vec[i]->kref));
+			leak = true;
+		}
 	}
+	if (leak)
+		return;
+
+	kfree(table->data_vec);
+	kfree(table);
 }
 
 static void cleanup_gid_table_port(struct ib_device *ib_dev, u8 port,
@@ -739,7 +806,7 @@ static void cleanup_gid_table_port(struct ib_device *ib_dev, u8 port,
 
 	mutex_lock(&table->lock);
 	for (i = 0; i < table->sz; ++i) {
-		if (!rdma_is_zero_gid(&table->data_vec[i].gid)) {
+		if (is_gid_entry_valid(table->data_vec[i])) {
 			del_gid(ib_dev, port, table, i);
 			deleted = true;
 		}
@@ -757,12 +824,9 @@ void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port,
 {
 	union ib_gid gid = { };
 	struct ib_gid_attr gid_attr;
-	struct ib_gid_table *table;
 	unsigned int gid_type;
 	unsigned long mask;
 
-	table = rdma_gid_table(ib_dev, port);
-
 	mask = GID_ATTR_FIND_MASK_GID_TYPE |
 	       GID_ATTR_FIND_MASK_DEFAULT |
 	       GID_ATTR_FIND_MASK_NETDEV;
@@ -792,19 +856,12 @@ static void gid_table_reserve_default(struct ib_device *ib_dev, u8 port,
 	unsigned int i;
 	unsigned long roce_gid_type_mask;
 	unsigned int num_default_gids;
-	unsigned int current_gid = 0;
 
 	roce_gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
 	num_default_gids = hweight_long(roce_gid_type_mask);
-	for (i = 0; i < num_default_gids && i < table->sz; i++) {
-		struct ib_gid_table_entry *entry = &table->data_vec[i];
-
-		entry->props |= GID_TABLE_ENTRY_DEFAULT;
-		current_gid = find_next_bit(&roce_gid_type_mask,
-					    BITS_PER_LONG,
-					    current_gid);
-		entry->attr.gid_type = current_gid++;
-	}
+	/* Reserve starting indices for default GIDs */
+	for (i = 0; i < num_default_gids && i < table->sz; i++)
+		table->default_gid_indices |= BIT(i);
 }
 
 
@@ -815,7 +872,7 @@ static void gid_table_release_one(struct ib_device *ib_dev)
 
 	for (port = 0; port < ib_dev->phys_port_cnt; port++) {
 		table = ib_dev->cache.ports[port].gid;
-		release_gid_table(table);
+		release_gid_table(ib_dev, port, table);
 		ib_dev->cache.ports[port].gid = NULL;
 	}
 }
@@ -869,69 +926,94 @@ static int gid_table_setup_one(struct ib_device *ib_dev)
 	return err;
 }
 
-int ib_get_cached_gid(struct ib_device *device,
-		      u8                port_num,
-		      int               index,
-		      union ib_gid     *gid,
-		      struct ib_gid_attr *gid_attr)
+/**
+ * rdma_query_gid - Read the GID content from the GID software cache
+ * @device:		Device to query the GID
+ * @port_num:		Port number of the device
+ * @index:		Index of the GID table entry to read
+ * @gid:		Pointer to GID where to store the entry's GID
+ *
+ * rdma_query_gid() only reads the GID entry content for requested device,
+ * port and index. It reads for IB, RoCE and iWarp link layers.  It doesn't
+ * hold any reference to the GID table entry in the HCA or software cache.
+ *
+ * Returns 0 on success or appropriate error code.
+ *
+ */
+int rdma_query_gid(struct ib_device *device, u8 port_num,
+		   int index, union ib_gid *gid)
 {
-	int res;
-	unsigned long flags;
 	struct ib_gid_table *table;
+	unsigned long flags;
+	int res = -EINVAL;
 
 	if (!rdma_is_port_valid(device, port_num))
 		return -EINVAL;
 
 	table = rdma_gid_table(device, port_num);
 	read_lock_irqsave(&table->rwlock, flags);
-	res = __ib_cache_gid_get(device, port_num, index, gid, gid_attr);
-	read_unlock_irqrestore(&table->rwlock, flags);
 
+	if (index < 0 || index >= table->sz ||
+	    !is_gid_entry_valid(table->data_vec[index]))
+		goto done;
+
+	memcpy(gid, &table->data_vec[index]->attr.gid, sizeof(*gid));
+	res = 0;
+
+done:
+	read_unlock_irqrestore(&table->rwlock, flags);
 	return res;
 }
-EXPORT_SYMBOL(ib_get_cached_gid);
+EXPORT_SYMBOL(rdma_query_gid);
 
 /**
- * ib_find_cached_gid - Returns the port number and GID table index where
- *   a specified GID value occurs.
+ * rdma_find_gid - Returns SGID attributes if the matching GID is found.
  * @device: The device to query.
  * @gid: The GID value to search for.
  * @gid_type: The GID type to search for.
  * @ndev: In RoCE, the net device of the device. NULL means ignore.
- * @port_num: The port number of the device where the GID value was found.
- * @index: The index into the cached GID table where the GID was found.  This
- *   parameter may be NULL.
  *
- * ib_find_cached_gid() searches for the specified GID value in
- * the local software cache.
+ * rdma_find_gid() searches for the specified GID value in the software cache.
+ *
+ * Returns GID attributes if a valid GID is found or returns ERR_PTR for the
+ * error. The caller must invoke rdma_put_gid_attr() to release the reference.
+ *
  */
-int ib_find_cached_gid(struct ib_device *device,
-		       const union ib_gid *gid,
-		       enum ib_gid_type gid_type,
-		       struct net_device *ndev,
-		       u8               *port_num,
-		       u16              *index)
+const struct ib_gid_attr *rdma_find_gid(struct ib_device *device,
+					const union ib_gid *gid,
+					enum ib_gid_type gid_type,
+					struct net_device *ndev)
 {
-	return ib_cache_gid_find(device, gid, gid_type, ndev, port_num, index);
-}
-EXPORT_SYMBOL(ib_find_cached_gid);
+	unsigned long mask = GID_ATTR_FIND_MASK_GID |
+			     GID_ATTR_FIND_MASK_GID_TYPE;
+	struct ib_gid_attr gid_attr_val = {.ndev = ndev, .gid_type = gid_type};
+	u8 p;
 
-int ib_find_gid_by_filter(struct ib_device *device,
-			  const union ib_gid *gid,
-			  u8 port_num,
-			  bool (*filter)(const union ib_gid *gid,
-					 const struct ib_gid_attr *,
-					 void *),
-			  void *context, u16 *index)
-{
-	/* Only RoCE GID table supports filter function */
-	if (!rdma_protocol_roce(device, port_num) && filter)
-		return -EPROTONOSUPPORT;
+	if (ndev)
+		mask |= GID_ATTR_FIND_MASK_NETDEV;
 
-	return ib_cache_gid_find_by_filter(device, gid,
-					   port_num, filter,
-					   context, index);
+	for (p = 0; p < device->phys_port_cnt; p++) {
+		struct ib_gid_table *table;
+		unsigned long flags;
+		int index;
+
+		table = device->cache.ports[p].gid;
+		read_lock_irqsave(&table->rwlock, flags);
+		index = find_gid(table, gid, &gid_attr_val, false, mask, NULL);
+		if (index >= 0) {
+			const struct ib_gid_attr *attr;
+
+			get_gid_entry(table->data_vec[index]);
+			attr = &table->data_vec[index]->attr;
+			read_unlock_irqrestore(&table->rwlock, flags);
+			return attr;
+		}
+		read_unlock_irqrestore(&table->rwlock, flags);
+	}
+
+	return ERR_PTR(-ENOENT);
 }
+EXPORT_SYMBOL(rdma_find_gid);
 
 int ib_get_cached_pkey(struct ib_device *device,
 		       u8                port_num,
@@ -1089,12 +1171,92 @@ int ib_get_cached_port_state(struct ib_device   *device,
 }
 EXPORT_SYMBOL(ib_get_cached_port_state);
 
+/**
+ * rdma_get_gid_attr - Returns GID attributes for a port of a device
+ * at a requested gid_index, if a valid GID entry exists.
+ * @device:		The device to query.
+ * @port_num:		The port number on the device where the GID value
+ *			is to be queried.
+ * @index:		Index of the GID table entry whose attributes are to
+ *                      be queried.
+ *
+ * rdma_get_gid_attr() acquires reference count of gid attributes from the
+ * cached GID table. Caller must invoke rdma_put_gid_attr() to release
+ * reference to gid attribute regardless of link layer.
+ *
+ * Returns pointer to valid gid attribute or ERR_PTR for the appropriate error
+ * code.
+ */
+const struct ib_gid_attr *
+rdma_get_gid_attr(struct ib_device *device, u8 port_num, int index)
+{
+	const struct ib_gid_attr *attr = ERR_PTR(-EINVAL);
+	struct ib_gid_table *table;
+	unsigned long flags;
+
+	if (!rdma_is_port_valid(device, port_num))
+		return ERR_PTR(-EINVAL);
+
+	table = rdma_gid_table(device, port_num);
+	if (index < 0 || index >= table->sz)
+		return ERR_PTR(-EINVAL);
+
+	read_lock_irqsave(&table->rwlock, flags);
+	if (!is_gid_entry_valid(table->data_vec[index]))
+		goto done;
+
+	get_gid_entry(table->data_vec[index]);
+	attr = &table->data_vec[index]->attr;
+done:
+	read_unlock_irqrestore(&table->rwlock, flags);
+	return attr;
+}
+EXPORT_SYMBOL(rdma_get_gid_attr);
+
+/**
+ * rdma_put_gid_attr - Release reference to the GID attribute
+ * @attr:		Pointer to the GID attribute whose reference
+ *			needs to be released.
+ *
+ * rdma_put_gid_attr() must be used to release reference whose
+ * reference is acquired using rdma_get_gid_attr() or any APIs
+ * which returns a pointer to the ib_gid_attr regardless of link layer
+ * of IB or RoCE.
+ *
+ */
+void rdma_put_gid_attr(const struct ib_gid_attr *attr)
+{
+	struct ib_gid_table_entry *entry =
+		container_of(attr, struct ib_gid_table_entry, attr);
+
+	put_gid_entry(entry);
+}
+EXPORT_SYMBOL(rdma_put_gid_attr);
+
+/**
+ * rdma_hold_gid_attr - Get reference to existing GID attribute
+ *
+ * @attr:		Pointer to the GID attribute whose reference
+ *			needs to be taken.
+ *
+ * Increase the reference count to a GID attribute to keep it from being
+ * freed. Callers are required to already be holding a reference to attribute.
+ *
+ */
+void rdma_hold_gid_attr(const struct ib_gid_attr *attr)
+{
+	struct ib_gid_table_entry *entry =
+		container_of(attr, struct ib_gid_table_entry, attr);
+
+	get_gid_entry(entry);
+}
+EXPORT_SYMBOL(rdma_hold_gid_attr);
+
 static int config_non_roce_gid_cache(struct ib_device *device,
 				     u8 port, int gid_tbl_len)
 {
 	struct ib_gid_attr gid_attr = {};
 	struct ib_gid_table *table;
-	union ib_gid gid;
 	int ret = 0;
 	int i;
 
@@ -1106,14 +1268,14 @@ static int config_non_roce_gid_cache(struct ib_device *device,
 	for (i = 0; i < gid_tbl_len; ++i) {
 		if (!device->query_gid)
 			continue;
-		ret = device->query_gid(device, port, i, &gid);
+		ret = device->query_gid(device, port, i, &gid_attr.gid);
 		if (ret) {
 			pr_warn("query_gid failed (%d) for %s (index %d)\n",
 				ret, device->name, i);
 			goto err;
 		}
 		gid_attr.index = i;
-		add_modify_gid(table, &gid, &gid_attr);
+		add_modify_gid(table, &gid_attr);
 	}
 err:
 	mutex_unlock(&table->lock);
@@ -1128,13 +1290,10 @@ static void ib_cache_update(struct ib_device *device,
 	struct ib_pkey_cache      *pkey_cache = NULL, *old_pkey_cache;
 	int                        i;
 	int                        ret;
-	struct ib_gid_table	  *table;
 
 	if (!rdma_is_port_valid(device, port))
 		return;
 
-	table = rdma_gid_table(device, port);
-
 	tprops = kmalloc(sizeof *tprops, GFP_KERNEL);
 	if (!tprops)
 		return;
@@ -1296,4 +1455,9 @@ void ib_cache_cleanup_one(struct ib_device *device)
 	ib_unregister_event_handler(&device->cache.event_handler);
 	flush_workqueue(ib_wq);
 	gid_table_cleanup_one(device);
+
+	/*
+	 * Flush the wq second time for any pending GID delete work.
+	 */
+	flush_workqueue(ib_wq);
 }
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 27a7b0a..6e39c27 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -474,7 +474,7 @@ static int cm_init_av_for_lap(struct cm_port *port, struct ib_wc *wc,
 	if (ret)
 		return ret;
 
-	memcpy(&av->ah_attr, &new_ah_attr, sizeof(new_ah_attr));
+	rdma_move_ah_attr(&av->ah_attr, &new_ah_attr);
 	return 0;
 }
 
@@ -508,31 +508,50 @@ static int add_cm_id_to_port_list(struct cm_id_private *cm_id_priv,
 	return ret;
 }
 
-static struct cm_port *get_cm_port_from_path(struct sa_path_rec *path)
+static struct cm_port *
+get_cm_port_from_path(struct sa_path_rec *path, const struct ib_gid_attr *attr)
 {
 	struct cm_device *cm_dev;
 	struct cm_port *port = NULL;
 	unsigned long flags;
-	u8 p;
-	struct net_device *ndev = ib_get_ndev_from_path(path);
 
-	read_lock_irqsave(&cm.device_lock, flags);
-	list_for_each_entry(cm_dev, &cm.device_list, list) {
-		if (!ib_find_cached_gid(cm_dev->ib_device, &path->sgid,
-					sa_conv_pathrec_to_gid_type(path),
-					ndev, &p, NULL)) {
-			port = cm_dev->port[p - 1];
-			break;
+	if (attr) {
+		read_lock_irqsave(&cm.device_lock, flags);
+		list_for_each_entry(cm_dev, &cm.device_list, list) {
+			if (cm_dev->ib_device == attr->device) {
+				port = cm_dev->port[attr->port_num - 1];
+				break;
+			}
 		}
+		read_unlock_irqrestore(&cm.device_lock, flags);
+	} else {
+		/* SGID attribute can be NULL in following
+		 * conditions.
+		 * (a) Alternative path
+		 * (b) IB link layer without GRH
+		 * (c) LAP send messages
+		 */
+		read_lock_irqsave(&cm.device_lock, flags);
+		list_for_each_entry(cm_dev, &cm.device_list, list) {
+			attr = rdma_find_gid(cm_dev->ib_device,
+					     &path->sgid,
+					     sa_conv_pathrec_to_gid_type(path),
+					     NULL);
+			if (!IS_ERR(attr)) {
+				port = cm_dev->port[attr->port_num - 1];
+				break;
+			}
+		}
+		read_unlock_irqrestore(&cm.device_lock, flags);
+		if (port)
+			rdma_put_gid_attr(attr);
 	}
-	read_unlock_irqrestore(&cm.device_lock, flags);
-
-	if (ndev)
-		dev_put(ndev);
 	return port;
 }
 
-static int cm_init_av_by_path(struct sa_path_rec *path, struct cm_av *av,
+static int cm_init_av_by_path(struct sa_path_rec *path,
+			      const struct ib_gid_attr *sgid_attr,
+			      struct cm_av *av,
 			      struct cm_id_private *cm_id_priv)
 {
 	struct rdma_ah_attr new_ah_attr;
@@ -540,7 +559,7 @@ static int cm_init_av_by_path(struct sa_path_rec *path, struct cm_av *av,
 	struct cm_port *port;
 	int ret;
 
-	port = get_cm_port_from_path(path);
+	port = get_cm_port_from_path(path, sgid_attr);
 	if (!port)
 		return -EINVAL;
 	cm_dev = port->cm_dev;
@@ -554,22 +573,26 @@ static int cm_init_av_by_path(struct sa_path_rec *path, struct cm_av *av,
 
 	/*
 	 * av->ah_attr might be initialized based on wc or during
-	 * request processing time. So initialize a new ah_attr on stack.
+	 * request processing time which might have reference to sgid_attr.
+	 * So initialize a new ah_attr on stack.
 	 * If initialization fails, old ah_attr is used for sending any
 	 * responses. If initialization is successful, than new ah_attr
-	 * is used by overwriting the old one.
+	 * is used by overwriting the old one. So that right ah_attr
+	 * can be used to return an error response.
 	 */
 	ret = ib_init_ah_attr_from_path(cm_dev->ib_device, port->port_num, path,
-					&new_ah_attr);
+					&new_ah_attr, sgid_attr);
 	if (ret)
 		return ret;
 
 	av->timeout = path->packet_life_time + 1;
 
 	ret = add_cm_id_to_port_list(cm_id_priv, av, port);
-	if (ret)
+	if (ret) {
+		rdma_destroy_ah_attr(&new_ah_attr);
 		return ret;
-	memcpy(&av->ah_attr, &new_ah_attr, sizeof(new_ah_attr));
+	}
+	rdma_move_ah_attr(&av->ah_attr, &new_ah_attr);
 	return 0;
 }
 
@@ -1091,6 +1114,9 @@ static void cm_destroy_id(struct ib_cm_id *cm_id, int err)
 	wait_for_completion(&cm_id_priv->comp);
 	while ((work = cm_dequeue_work(cm_id_priv)) != NULL)
 		cm_free_work(work);
+
+	rdma_destroy_ah_attr(&cm_id_priv->av.ah_attr);
+	rdma_destroy_ah_attr(&cm_id_priv->alt_av.ah_attr);
 	kfree(cm_id_priv->private_data);
 	kfree(cm_id_priv);
 }
@@ -1230,14 +1256,12 @@ struct ib_cm_id *ib_cm_insert_listen(struct ib_device *device,
 }
 EXPORT_SYMBOL(ib_cm_insert_listen);
 
-static __be64 cm_form_tid(struct cm_id_private *cm_id_priv,
-			  enum cm_msg_sequence msg_seq)
+static __be64 cm_form_tid(struct cm_id_private *cm_id_priv)
 {
 	u64 hi_tid, low_tid;
 
 	hi_tid   = ((u64) cm_id_priv->av.port->mad_agent->hi_tid) << 32;
-	low_tid  = (u64) ((__force u32)cm_id_priv->id.local_id |
-			  (msg_seq << 30));
+	low_tid  = (u64)cm_id_priv->id.local_id;
 	return cpu_to_be64(hi_tid | low_tid);
 }
 
@@ -1265,7 +1289,7 @@ static void cm_format_req(struct cm_req_msg *req_msg,
 					      pri_path->opa.slid);
 
 	cm_format_mad_hdr(&req_msg->hdr, CM_REQ_ATTR_ID,
-			  cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_REQ));
+			  cm_form_tid(cm_id_priv));
 
 	req_msg->local_comm_id = cm_id_priv->id.local_id;
 	req_msg->service_id = param->service_id;
@@ -1413,12 +1437,13 @@ int ib_send_cm_req(struct ib_cm_id *cm_id,
 		goto out;
 	}
 
-	ret = cm_init_av_by_path(param->primary_path, &cm_id_priv->av,
+	ret = cm_init_av_by_path(param->primary_path,
+				 param->ppath_sgid_attr, &cm_id_priv->av,
 				 cm_id_priv);
 	if (ret)
 		goto error1;
 	if (param->alternate_path) {
-		ret = cm_init_av_by_path(param->alternate_path,
+		ret = cm_init_av_by_path(param->alternate_path, NULL,
 					 &cm_id_priv->alt_av, cm_id_priv);
 		if (ret)
 			goto error1;
@@ -1646,7 +1671,7 @@ static void cm_opa_to_ib_sgid(struct cm_work *work,
 	    (ib_is_opa_gid(&path->sgid))) {
 		union ib_gid sgid;
 
-		if (ib_get_cached_gid(dev, port_num, 0, &sgid, NULL)) {
+		if (rdma_query_gid(dev, port_num, 0, &sgid)) {
 			dev_warn(&dev->dev,
 				 "Error updating sgid in CM request\n");
 			return;
@@ -1691,6 +1716,7 @@ static void cm_format_req_event(struct cm_work *work,
 	param->retry_count = cm_req_get_retry_count(req_msg);
 	param->rnr_retry_count = cm_req_get_rnr_retry_count(req_msg);
 	param->srq = cm_req_get_srq(req_msg);
+	param->ppath_sgid_attr = cm_id_priv->av.ah_attr.grh.sgid_attr;
 	work->cm_event.private_data = &req_msg->private_data;
 }
 
@@ -1914,9 +1940,8 @@ static int cm_req_handler(struct cm_work *work)
 	struct ib_cm_id *cm_id;
 	struct cm_id_private *cm_id_priv, *listen_cm_id_priv;
 	struct cm_req_msg *req_msg;
-	union ib_gid gid;
-	struct ib_gid_attr gid_attr;
 	const struct ib_global_route *grh;
+	const struct ib_gid_attr *gid_attr;
 	int ret;
 
 	req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
@@ -1961,24 +1986,13 @@ static int cm_req_handler(struct cm_work *work)
 	if (cm_req_has_alt_path(req_msg))
 		memset(&work->path[1], 0, sizeof(work->path[1]));
 	grh = rdma_ah_read_grh(&cm_id_priv->av.ah_attr);
-	ret = ib_get_cached_gid(work->port->cm_dev->ib_device,
-				work->port->port_num,
-				grh->sgid_index,
-				&gid, &gid_attr);
-	if (ret) {
-		ib_send_cm_rej(cm_id, IB_CM_REJ_UNSUPPORTED, NULL, 0, NULL, 0);
-		goto rejected;
-	}
+	gid_attr = grh->sgid_attr;
 
-	if (gid_attr.ndev) {
+	if (gid_attr && gid_attr->ndev) {
 		work->path[0].rec_type =
-			sa_conv_gid_to_pathrec_type(gid_attr.gid_type);
-		sa_path_set_ifindex(&work->path[0],
-				    gid_attr.ndev->ifindex);
-		sa_path_set_ndev(&work->path[0],
-				 dev_net(gid_attr.ndev));
-		dev_put(gid_attr.ndev);
+			sa_conv_gid_to_pathrec_type(gid_attr->gid_type);
 	} else {
+		/* If no GID attribute or ndev is null, it is not RoCE. */
 		cm_path_set_rec_type(work->port->cm_dev->ib_device,
 				     work->port->port_num,
 				     &work->path[0],
@@ -1992,15 +2006,14 @@ static int cm_req_handler(struct cm_work *work)
 		sa_path_set_dmac(&work->path[0],
 				 cm_id_priv->av.ah_attr.roce.dmac);
 	work->path[0].hop_limit = grh->hop_limit;
-	ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av,
+	ret = cm_init_av_by_path(&work->path[0], gid_attr, &cm_id_priv->av,
 				 cm_id_priv);
 	if (ret) {
 		int err;
 
-		err = ib_get_cached_gid(work->port->cm_dev->ib_device,
-					work->port->port_num, 0,
-					&work->path[0].sgid,
-					NULL);
+		err = rdma_query_gid(work->port->cm_dev->ib_device,
+				     work->port->port_num, 0,
+				     &work->path[0].sgid);
 		if (err)
 			ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID,
 				       NULL, 0, NULL, 0);
@@ -2012,8 +2025,8 @@ static int cm_req_handler(struct cm_work *work)
 		goto rejected;
 	}
 	if (cm_req_has_alt_path(req_msg)) {
-		ret = cm_init_av_by_path(&work->path[1], &cm_id_priv->alt_av,
-					 cm_id_priv);
+		ret = cm_init_av_by_path(&work->path[1], NULL,
+					 &cm_id_priv->alt_av, cm_id_priv);
 		if (ret) {
 			ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_ALT_GID,
 				       &work->path[0].sgid,
@@ -2451,7 +2464,7 @@ static void cm_format_dreq(struct cm_dreq_msg *dreq_msg,
 			  u8 private_data_len)
 {
 	cm_format_mad_hdr(&dreq_msg->hdr, CM_DREQ_ATTR_ID,
-			  cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_DREQ));
+			  cm_form_tid(cm_id_priv));
 	dreq_msg->local_comm_id = cm_id_priv->id.local_id;
 	dreq_msg->remote_comm_id = cm_id_priv->id.remote_id;
 	cm_dreq_set_remote_qpn(dreq_msg, cm_id_priv->remote_qpn);
@@ -3082,7 +3095,7 @@ static void cm_format_lap(struct cm_lap_msg *lap_msg,
 		alt_ext = opa_is_extended_lid(alternate_path->opa.dlid,
 					      alternate_path->opa.slid);
 	cm_format_mad_hdr(&lap_msg->hdr, CM_LAP_ATTR_ID,
-			  cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_LAP));
+			  cm_form_tid(cm_id_priv));
 	lap_msg->local_comm_id = cm_id_priv->id.local_id;
 	lap_msg->remote_comm_id = cm_id_priv->id.remote_id;
 	cm_lap_set_remote_qpn(lap_msg, cm_id_priv->remote_qpn);
@@ -3136,7 +3149,7 @@ int ib_send_cm_lap(struct ib_cm_id *cm_id,
 		goto out;
 	}
 
-	ret = cm_init_av_by_path(alternate_path, &cm_id_priv->alt_av,
+	ret = cm_init_av_by_path(alternate_path, NULL, &cm_id_priv->alt_av,
 				 cm_id_priv);
 	if (ret)
 		goto out;
@@ -3279,7 +3292,7 @@ static int cm_lap_handler(struct cm_work *work)
 	if (ret)
 		goto unlock;
 
-	cm_init_av_by_path(param->alternate_path, &cm_id_priv->alt_av,
+	cm_init_av_by_path(param->alternate_path, NULL, &cm_id_priv->alt_av,
 			   cm_id_priv);
 	cm_id_priv->id.lap_state = IB_CM_LAP_RCVD;
 	cm_id_priv->tid = lap_msg->hdr.tid;
@@ -3458,7 +3471,7 @@ static void cm_format_sidr_req(struct cm_sidr_req_msg *sidr_req_msg,
 			       struct ib_cm_sidr_req_param *param)
 {
 	cm_format_mad_hdr(&sidr_req_msg->hdr, CM_SIDR_REQ_ATTR_ID,
-			  cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_SIDR));
+			  cm_form_tid(cm_id_priv));
 	sidr_req_msg->request_id = cm_id_priv->id.local_id;
 	sidr_req_msg->pkey = param->path->pkey;
 	sidr_req_msg->service_id = param->service_id;
@@ -3481,7 +3494,9 @@ int ib_send_cm_sidr_req(struct ib_cm_id *cm_id,
 		return -EINVAL;
 
 	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
-	ret = cm_init_av_by_path(param->path, &cm_id_priv->av, cm_id_priv);
+	ret = cm_init_av_by_path(param->path, param->sgid_attr,
+				 &cm_id_priv->av,
+				 cm_id_priv);
 	if (ret)
 		goto out;
 
@@ -3518,6 +3533,7 @@ int ib_send_cm_sidr_req(struct ib_cm_id *cm_id,
 EXPORT_SYMBOL(ib_send_cm_sidr_req);
 
 static void cm_format_sidr_req_event(struct cm_work *work,
+				     const struct cm_id_private *rx_cm_id,
 				     struct ib_cm_id *listen_id)
 {
 	struct cm_sidr_req_msg *sidr_req_msg;
@@ -3531,6 +3547,7 @@ static void cm_format_sidr_req_event(struct cm_work *work,
 	param->service_id = sidr_req_msg->service_id;
 	param->bth_pkey = cm_get_bth_pkey(work);
 	param->port = work->port->port_num;
+	param->sgid_attr = rx_cm_id->av.ah_attr.grh.sgid_attr;
 	work->cm_event.private_data = &sidr_req_msg->private_data;
 }
 
@@ -3588,7 +3605,7 @@ static int cm_sidr_req_handler(struct cm_work *work)
 	cm_id_priv->id.service_id = sidr_req_msg->service_id;
 	cm_id_priv->id.service_mask = ~cpu_to_be64(0);
 
-	cm_format_sidr_req_event(work, &cur_cm_id_priv->id);
+	cm_format_sidr_req_event(work, cm_id_priv, &cur_cm_id_priv->id);
 	cm_process_work(cm_id_priv, work);
 	cm_deref_id(cur_cm_id_priv);
 	return 0;
@@ -3665,7 +3682,8 @@ error:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 }
 EXPORT_SYMBOL(ib_send_cm_sidr_rep);
 
-static void cm_format_sidr_rep_event(struct cm_work *work)
+static void cm_format_sidr_rep_event(struct cm_work *work,
+				     const struct cm_id_private *cm_id_priv)
 {
 	struct cm_sidr_rep_msg *sidr_rep_msg;
 	struct ib_cm_sidr_rep_event_param *param;
@@ -3678,6 +3696,7 @@ static void cm_format_sidr_rep_event(struct cm_work *work)
 	param->qpn = be32_to_cpu(cm_sidr_rep_get_qpn(sidr_rep_msg));
 	param->info = &sidr_rep_msg->info;
 	param->info_len = sidr_rep_msg->info_length;
+	param->sgid_attr = cm_id_priv->av.ah_attr.grh.sgid_attr;
 	work->cm_event.private_data = &sidr_rep_msg->private_data;
 }
 
@@ -3701,7 +3720,7 @@ static int cm_sidr_rep_handler(struct cm_work *work)
 	ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
 	spin_unlock_irq(&cm_id_priv->lock);
 
-	cm_format_sidr_rep_event(work);
+	cm_format_sidr_rep_event(work, cm_id_priv);
 	cm_process_work(cm_id_priv, work);
 	return 0;
 out:
diff --git a/drivers/infiniband/core/cm_msgs.h b/drivers/infiniband/core/cm_msgs.h
index 8b76f0e..476d4309 100644
--- a/drivers/infiniband/core/cm_msgs.h
+++ b/drivers/infiniband/core/cm_msgs.h
@@ -44,13 +44,6 @@
 
 #define IB_CM_CLASS_VERSION	2 /* IB specification 1.2 */
 
-enum cm_msg_sequence {
-	CM_MSG_SEQUENCE_REQ,
-	CM_MSG_SEQUENCE_LAP,
-	CM_MSG_SEQUENCE_DREQ,
-	CM_MSG_SEQUENCE_SIDR
-};
-
 struct cm_req_msg {
 	struct ib_mad_hdr hdr;
 
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index bff10ab..f726772 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -366,7 +366,6 @@ struct cma_multicast {
 	void			*context;
 	struct sockaddr_storage	addr;
 	struct kref		mcref;
-	bool			igmp_joined;
 	u8			join_state;
 };
 
@@ -412,11 +411,11 @@ struct cma_req_info {
 	struct sockaddr_storage listen_addr_storage;
 	struct sockaddr_storage src_addr_storage;
 	struct ib_device *device;
-	int port;
 	union ib_gid local_gid;
 	__be64 service_id;
+	int port;
+	bool has_gid;
 	u16 pkey;
-	bool has_gid:1;
 };
 
 static int cma_comp(struct rdma_id_private *id_priv, enum rdma_cm_state comp)
@@ -491,12 +490,10 @@ static void _cma_attach_to_dev(struct rdma_id_private *id_priv,
 {
 	cma_ref_dev(cma_dev);
 	id_priv->cma_dev = cma_dev;
-	id_priv->gid_type = 0;
 	id_priv->id.device = cma_dev->device;
 	id_priv->id.route.addr.dev_addr.transport =
 		rdma_node_get_transport(cma_dev->device->node_type);
 	list_add_tail(&id_priv->list, &cma_dev->id_list);
-	id_priv->res.type = RDMA_RESTRACK_CM_ID;
 	rdma_restrack_add(&id_priv->res);
 }
 
@@ -603,46 +600,53 @@ static int cma_translate_addr(struct sockaddr *addr, struct rdma_dev_addr *dev_a
 	return ret;
 }
 
-static inline int cma_validate_port(struct ib_device *device, u8 port,
-				    enum ib_gid_type gid_type,
-				    union ib_gid *gid,
-				    struct rdma_id_private *id_priv)
+static const struct ib_gid_attr *
+cma_validate_port(struct ib_device *device, u8 port,
+		  enum ib_gid_type gid_type,
+		  union ib_gid *gid,
+		  struct rdma_id_private *id_priv)
 {
 	struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
 	int bound_if_index = dev_addr->bound_dev_if;
+	const struct ib_gid_attr *sgid_attr;
 	int dev_type = dev_addr->dev_type;
 	struct net_device *ndev = NULL;
-	int ret = -ENODEV;
 
 	if ((dev_type == ARPHRD_INFINIBAND) && !rdma_protocol_ib(device, port))
-		return ret;
+		return ERR_PTR(-ENODEV);
 
 	if ((dev_type != ARPHRD_INFINIBAND) && rdma_protocol_ib(device, port))
-		return ret;
+		return ERR_PTR(-ENODEV);
 
 	if (dev_type == ARPHRD_ETHER && rdma_protocol_roce(device, port)) {
 		ndev = dev_get_by_index(dev_addr->net, bound_if_index);
 		if (!ndev)
-			return ret;
+			return ERR_PTR(-ENODEV);
 	} else {
 		gid_type = IB_GID_TYPE_IB;
 	}
 
-	ret = ib_find_cached_gid_by_port(device, gid, gid_type, port,
-					 ndev, NULL);
-
+	sgid_attr = rdma_find_gid_by_port(device, gid, gid_type, port, ndev);
 	if (ndev)
 		dev_put(ndev);
+	return sgid_attr;
+}
 
-	return ret;
+static void cma_bind_sgid_attr(struct rdma_id_private *id_priv,
+			       const struct ib_gid_attr *sgid_attr)
+{
+	WARN_ON(id_priv->id.route.addr.dev_addr.sgid_attr);
+	id_priv->id.route.addr.dev_addr.sgid_attr = sgid_attr;
 }
 
 static int cma_acquire_dev(struct rdma_id_private *id_priv,
-			   struct rdma_id_private *listen_id_priv)
+			   const struct rdma_id_private *listen_id_priv)
 {
 	struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
+	const struct ib_gid_attr *sgid_attr;
 	struct cma_device *cma_dev;
 	union ib_gid gid, iboe_gid, *gidp;
+	enum ib_gid_type gid_type;
 	int ret = -ENODEV;
 	u8 port;
 
@@ -662,14 +666,13 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv,
 		port = listen_id_priv->id.port_num;
 		gidp = rdma_protocol_roce(cma_dev->device, port) ?
 		       &iboe_gid : &gid;
-
-		ret = cma_validate_port(cma_dev->device, port,
-					rdma_protocol_ib(cma_dev->device, port) ?
-					IB_GID_TYPE_IB :
-					listen_id_priv->gid_type, gidp,
-					id_priv);
-		if (!ret) {
+		gid_type = listen_id_priv->gid_type;
+		sgid_attr = cma_validate_port(cma_dev->device, port,
+					      gid_type, gidp, id_priv);
+		if (!IS_ERR(sgid_attr)) {
 			id_priv->id.port_num = port;
+			cma_bind_sgid_attr(id_priv, sgid_attr);
+			ret = 0;
 			goto out;
 		}
 	}
@@ -683,14 +686,13 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv,
 
 			gidp = rdma_protocol_roce(cma_dev->device, port) ?
 			       &iboe_gid : &gid;
-
-			ret = cma_validate_port(cma_dev->device, port,
-						rdma_protocol_ib(cma_dev->device, port) ?
-						IB_GID_TYPE_IB :
-						cma_dev->default_gid_type[port - 1],
-						gidp, id_priv);
-			if (!ret) {
+			gid_type = cma_dev->default_gid_type[port - 1];
+			sgid_attr = cma_validate_port(cma_dev->device, port,
+						      gid_type, gidp, id_priv);
+			if (!IS_ERR(sgid_attr)) {
 				id_priv->id.port_num = port;
+				cma_bind_sgid_attr(id_priv, sgid_attr);
+				ret = 0;
 				goto out;
 			}
 		}
@@ -732,8 +734,8 @@ static int cma_resolve_ib_dev(struct rdma_id_private *id_priv)
 
 			if (ib_get_cached_port_state(cur_dev->device, p, &port_state))
 				continue;
-			for (i = 0; !ib_get_cached_gid(cur_dev->device, p, i,
-						       &gid, NULL);
+			for (i = 0; !rdma_query_gid(cur_dev->device,
+						    p, i, &gid);
 			     i++) {
 				if (!memcmp(&gid, dgid, sizeof(gid))) {
 					cma_dev = cur_dev;
@@ -785,12 +787,14 @@ struct rdma_cm_id *__rdma_create_id(struct net *net,
 		id_priv->res.kern_name = caller;
 	else
 		rdma_restrack_set_task(&id_priv->res, current);
+	id_priv->res.type = RDMA_RESTRACK_CM_ID;
 	id_priv->state = RDMA_CM_IDLE;
 	id_priv->id.context = context;
 	id_priv->id.event_handler = event_handler;
 	id_priv->id.ps = ps;
 	id_priv->id.qp_type = qp_type;
 	id_priv->tos_set = false;
+	id_priv->gid_type = IB_GID_TYPE_IB;
 	spin_lock_init(&id_priv->lock);
 	mutex_init(&id_priv->qp_mutex);
 	init_completion(&id_priv->comp);
@@ -1036,35 +1040,38 @@ int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr,
 }
 EXPORT_SYMBOL(rdma_init_qp_attr);
 
-static inline int cma_zero_addr(struct sockaddr *addr)
+static inline bool cma_zero_addr(const struct sockaddr *addr)
 {
 	switch (addr->sa_family) {
 	case AF_INET:
 		return ipv4_is_zeronet(((struct sockaddr_in *)addr)->sin_addr.s_addr);
 	case AF_INET6:
-		return ipv6_addr_any(&((struct sockaddr_in6 *) addr)->sin6_addr);
+		return ipv6_addr_any(&((struct sockaddr_in6 *)addr)->sin6_addr);
 	case AF_IB:
-		return ib_addr_any(&((struct sockaddr_ib *) addr)->sib_addr);
+		return ib_addr_any(&((struct sockaddr_ib *)addr)->sib_addr);
 	default:
-		return 0;
+		return false;
 	}
 }
 
-static inline int cma_loopback_addr(struct sockaddr *addr)
+static inline bool cma_loopback_addr(const struct sockaddr *addr)
 {
 	switch (addr->sa_family) {
 	case AF_INET:
-		return ipv4_is_loopback(((struct sockaddr_in *) addr)->sin_addr.s_addr);
+		return ipv4_is_loopback(
+			((struct sockaddr_in *)addr)->sin_addr.s_addr);
 	case AF_INET6:
-		return ipv6_addr_loopback(&((struct sockaddr_in6 *) addr)->sin6_addr);
+		return ipv6_addr_loopback(
+			&((struct sockaddr_in6 *)addr)->sin6_addr);
 	case AF_IB:
-		return ib_addr_loopback(&((struct sockaddr_ib *) addr)->sib_addr);
+		return ib_addr_loopback(
+			&((struct sockaddr_ib *)addr)->sib_addr);
 	default:
-		return 0;
+		return false;
 	}
 }
 
-static inline int cma_any_addr(struct sockaddr *addr)
+static inline bool cma_any_addr(const struct sockaddr *addr)
 {
 	return cma_zero_addr(addr) || cma_loopback_addr(addr);
 }
@@ -1087,7 +1094,7 @@ static int cma_addr_cmp(struct sockaddr *src, struct sockaddr *dst)
 	}
 }
 
-static __be16 cma_port(struct sockaddr *addr)
+static __be16 cma_port(const struct sockaddr *addr)
 {
 	struct sockaddr_ib *sib;
 
@@ -1105,15 +1112,15 @@ static __be16 cma_port(struct sockaddr *addr)
 	}
 }
 
-static inline int cma_any_port(struct sockaddr *addr)
+static inline int cma_any_port(const struct sockaddr *addr)
 {
 	return !cma_port(addr);
 }
 
 static void cma_save_ib_info(struct sockaddr *src_addr,
 			     struct sockaddr *dst_addr,
-			     struct rdma_cm_id *listen_id,
-			     struct sa_path_rec *path)
+			     const struct rdma_cm_id *listen_id,
+			     const struct sa_path_rec *path)
 {
 	struct sockaddr_ib *listen_ib, *ib;
 
@@ -1198,7 +1205,7 @@ static u16 cma_port_from_service_id(__be64 service_id)
 
 static int cma_save_ip_info(struct sockaddr *src_addr,
 			    struct sockaddr *dst_addr,
-			    struct ib_cm_event *ib_event,
+			    const struct ib_cm_event *ib_event,
 			    __be64 service_id)
 {
 	struct cma_hdr *hdr;
@@ -1228,8 +1235,8 @@ static int cma_save_ip_info(struct sockaddr *src_addr,
 
 static int cma_save_net_info(struct sockaddr *src_addr,
 			     struct sockaddr *dst_addr,
-			     struct rdma_cm_id *listen_id,
-			     struct ib_cm_event *ib_event,
+			     const struct rdma_cm_id *listen_id,
+			     const struct ib_cm_event *ib_event,
 			     sa_family_t sa_family, __be64 service_id)
 {
 	if (sa_family == AF_IB) {
@@ -1361,7 +1368,23 @@ static bool validate_net_dev(struct net_device *net_dev,
 	}
 }
 
-static struct net_device *cma_get_net_dev(struct ib_cm_event *ib_event,
+static struct net_device *
+roce_get_net_dev_by_cm_event(const struct ib_cm_event *ib_event)
+{
+	const struct ib_gid_attr *sgid_attr = NULL;
+
+	if (ib_event->event == IB_CM_REQ_RECEIVED)
+		sgid_attr = ib_event->param.req_rcvd.ppath_sgid_attr;
+	else if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED)
+		sgid_attr = ib_event->param.sidr_req_rcvd.sgid_attr;
+
+	if (!sgid_attr)
+		return NULL;
+	dev_hold(sgid_attr->ndev);
+	return sgid_attr->ndev;
+}
+
+static struct net_device *cma_get_net_dev(const struct ib_cm_event *ib_event,
 					  struct cma_req_info *req)
 {
 	struct sockaddr *listen_addr =
@@ -1376,8 +1399,12 @@ static struct net_device *cma_get_net_dev(struct ib_cm_event *ib_event,
 	if (err)
 		return ERR_PTR(err);
 
-	net_dev = ib_get_net_dev_by_params(req->device, req->port, req->pkey,
-					   gid, listen_addr);
+	if (rdma_protocol_roce(req->device, req->port))
+		net_dev = roce_get_net_dev_by_cm_event(ib_event);
+	else
+		net_dev = ib_get_net_dev_by_params(req->device, req->port,
+						   req->pkey,
+						   gid, listen_addr);
 	if (!net_dev)
 		return ERR_PTR(-ENODEV);
 
@@ -1440,14 +1467,20 @@ static bool cma_match_net_dev(const struct rdma_cm_id *id,
 	const struct rdma_addr *addr = &id->route.addr;
 
 	if (!net_dev)
-		/* This request is an AF_IB request or a RoCE request */
+		/* This request is an AF_IB request */
 		return (!id->port_num || id->port_num == port_num) &&
-		       (addr->src_addr.ss_family == AF_IB ||
-			rdma_protocol_roce(id->device, port_num));
+		       (addr->src_addr.ss_family == AF_IB);
 
-	return !addr->dev_addr.bound_dev_if ||
-	       (net_eq(dev_net(net_dev), addr->dev_addr.net) &&
-		addr->dev_addr.bound_dev_if == net_dev->ifindex);
+	/*
+	 * Net namespaces must match, and if the listner is listening
+	 * on a specific netdevice than netdevice must match as well.
+	 */
+	if (net_eq(dev_net(net_dev), addr->dev_addr.net) &&
+	    (!!addr->dev_addr.bound_dev_if ==
+	     (addr->dev_addr.bound_dev_if == net_dev->ifindex)))
+		return true;
+	else
+		return false;
 }
 
 static struct rdma_id_private *cma_find_listener(
@@ -1480,9 +1513,10 @@ static struct rdma_id_private *cma_find_listener(
 	return ERR_PTR(-EINVAL);
 }
 
-static struct rdma_id_private *cma_id_from_event(struct ib_cm_id *cm_id,
-						 struct ib_cm_event *ib_event,
-						 struct net_device **net_dev)
+static struct rdma_id_private *
+cma_ib_id_from_event(struct ib_cm_id *cm_id,
+		     const struct ib_cm_event *ib_event,
+		     struct net_device **net_dev)
 {
 	struct cma_req_info req;
 	struct rdma_bind_list *bind_list;
@@ -1498,10 +1532,6 @@ static struct rdma_id_private *cma_id_from_event(struct ib_cm_id *cm_id,
 		if (PTR_ERR(*net_dev) == -EAFNOSUPPORT) {
 			/* Assuming the protocol is AF_IB */
 			*net_dev = NULL;
-		} else if (rdma_protocol_roce(req.device, req.port)) {
-			/* TODO find the net dev matching the request parameters
-			 * through the RoCE GID table */
-			*net_dev = NULL;
 		} else {
 			return ERR_CAST(*net_dev);
 		}
@@ -1629,6 +1659,21 @@ static void cma_release_port(struct rdma_id_private *id_priv)
 	mutex_unlock(&lock);
 }
 
+static void cma_leave_roce_mc_group(struct rdma_id_private *id_priv,
+				    struct cma_multicast *mc)
+{
+	struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
+	struct net_device *ndev = NULL;
+
+	if (dev_addr->bound_dev_if)
+		ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
+	if (ndev) {
+		cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid, false);
+		dev_put(ndev);
+	}
+	kref_put(&mc->mcref, release_mc);
+}
+
 static void cma_leave_mc_groups(struct rdma_id_private *id_priv)
 {
 	struct cma_multicast *mc;
@@ -1642,22 +1687,7 @@ static void cma_leave_mc_groups(struct rdma_id_private *id_priv)
 			ib_sa_free_multicast(mc->multicast.ib);
 			kfree(mc);
 		} else {
-			if (mc->igmp_joined) {
-				struct rdma_dev_addr *dev_addr =
-					&id_priv->id.route.addr.dev_addr;
-				struct net_device *ndev = NULL;
-
-				if (dev_addr->bound_dev_if)
-					ndev = dev_get_by_index(&init_net,
-								dev_addr->bound_dev_if);
-				if (ndev) {
-					cma_igmp_send(ndev,
-						      &mc->multicast.ib->rec.mgid,
-						      false);
-					dev_put(ndev);
-				}
-			}
-			kref_put(&mc->mcref, release_mc);
+			cma_leave_roce_mc_group(id_priv, mc);
 		}
 	}
 }
@@ -1699,6 +1729,10 @@ void rdma_destroy_id(struct rdma_cm_id *id)
 		cma_deref_id(id_priv->id.context);
 
 	kfree(id_priv->id.route.path_rec);
+
+	if (id_priv->id.route.addr.dev_addr.sgid_attr)
+		rdma_put_gid_attr(id_priv->id.route.addr.dev_addr.sgid_attr);
+
 	put_net(id_priv->id.route.addr.dev_addr.net);
 	kfree(id_priv);
 }
@@ -1730,7 +1764,7 @@ static int cma_rep_recv(struct rdma_id_private *id_priv)
 }
 
 static void cma_set_rep_event_data(struct rdma_cm_event *event,
-				   struct ib_cm_rep_event_param *rep_data,
+				   const struct ib_cm_rep_event_param *rep_data,
 				   void *private_data)
 {
 	event->param.conn.private_data = private_data;
@@ -1743,10 +1777,11 @@ static void cma_set_rep_event_data(struct rdma_cm_event *event,
 	event->param.conn.qp_num = rep_data->remote_qpn;
 }
 
-static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
+static int cma_ib_handler(struct ib_cm_id *cm_id,
+			  const struct ib_cm_event *ib_event)
 {
 	struct rdma_id_private *id_priv = cm_id->context;
-	struct rdma_cm_event event;
+	struct rdma_cm_event event = {};
 	int ret = 0;
 
 	mutex_lock(&id_priv->handler_mutex);
@@ -1756,7 +1791,6 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
 	     id_priv->state != RDMA_CM_DISCONNECT))
 		goto out;
 
-	memset(&event, 0, sizeof event);
 	switch (ib_event->event) {
 	case IB_CM_REQ_ERROR:
 	case IB_CM_REP_ERROR:
@@ -1825,9 +1859,10 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
 	return ret;
 }
 
-static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id,
-					       struct ib_cm_event *ib_event,
-					       struct net_device *net_dev)
+static struct rdma_id_private *
+cma_ib_new_conn_id(const struct rdma_cm_id *listen_id,
+		   const struct ib_cm_event *ib_event,
+		   struct net_device *net_dev)
 {
 	struct rdma_id_private *listen_id_priv;
 	struct rdma_id_private *id_priv;
@@ -1888,11 +1923,12 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id,
 	return NULL;
 }
 
-static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id,
-					      struct ib_cm_event *ib_event,
-					      struct net_device *net_dev)
+static struct rdma_id_private *
+cma_ib_new_udp_id(const struct rdma_cm_id *listen_id,
+		  const struct ib_cm_event *ib_event,
+		  struct net_device *net_dev)
 {
-	struct rdma_id_private *listen_id_priv;
+	const struct rdma_id_private *listen_id_priv;
 	struct rdma_id_private *id_priv;
 	struct rdma_cm_id *id;
 	const sa_family_t ss_family = listen_id->route.addr.src_addr.ss_family;
@@ -1932,7 +1968,7 @@ static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id,
 }
 
 static void cma_set_req_event_data(struct rdma_cm_event *event,
-				   struct ib_cm_req_event_param *req_data,
+				   const struct ib_cm_req_event_param *req_data,
 				   void *private_data, int offset)
 {
 	event->param.conn.private_data = private_data + offset;
@@ -1946,7 +1982,8 @@ static void cma_set_req_event_data(struct rdma_cm_event *event,
 	event->param.conn.qp_num = req_data->remote_qpn;
 }
 
-static int cma_check_req_qp_type(struct rdma_cm_id *id, struct ib_cm_event *ib_event)
+static int cma_ib_check_req_qp_type(const struct rdma_cm_id *id,
+				    const struct ib_cm_event *ib_event)
 {
 	return (((ib_event->event == IB_CM_REQ_RECEIVED) &&
 		 (ib_event->param.req_rcvd.qp_type == id->qp_type)) ||
@@ -1955,19 +1992,20 @@ static int cma_check_req_qp_type(struct rdma_cm_id *id, struct ib_cm_event *ib_e
 		(!id->qp_type));
 }
 
-static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
+static int cma_ib_req_handler(struct ib_cm_id *cm_id,
+			      const struct ib_cm_event *ib_event)
 {
 	struct rdma_id_private *listen_id, *conn_id = NULL;
-	struct rdma_cm_event event;
+	struct rdma_cm_event event = {};
 	struct net_device *net_dev;
 	u8 offset;
 	int ret;
 
-	listen_id = cma_id_from_event(cm_id, ib_event, &net_dev);
+	listen_id = cma_ib_id_from_event(cm_id, ib_event, &net_dev);
 	if (IS_ERR(listen_id))
 		return PTR_ERR(listen_id);
 
-	if (!cma_check_req_qp_type(&listen_id->id, ib_event)) {
+	if (!cma_ib_check_req_qp_type(&listen_id->id, ib_event)) {
 		ret = -EINVAL;
 		goto net_dev_put;
 	}
@@ -1978,16 +2016,15 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
 		goto err1;
 	}
 
-	memset(&event, 0, sizeof event);
 	offset = cma_user_data_offset(listen_id);
 	event.event = RDMA_CM_EVENT_CONNECT_REQUEST;
 	if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED) {
-		conn_id = cma_new_udp_id(&listen_id->id, ib_event, net_dev);
+		conn_id = cma_ib_new_udp_id(&listen_id->id, ib_event, net_dev);
 		event.param.ud.private_data = ib_event->private_data + offset;
 		event.param.ud.private_data_len =
 				IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE - offset;
 	} else {
-		conn_id = cma_new_conn_id(&listen_id->id, ib_event, net_dev);
+		conn_id = cma_ib_new_conn_id(&listen_id->id, ib_event, net_dev);
 		cma_set_req_event_data(&event, &ib_event->param.req_rcvd,
 				       ib_event->private_data, offset);
 	}
@@ -2087,7 +2124,7 @@ EXPORT_SYMBOL(rdma_read_gids);
 static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
 {
 	struct rdma_id_private *id_priv = iw_id->context;
-	struct rdma_cm_event event;
+	struct rdma_cm_event event = {};
 	int ret = 0;
 	struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr;
 	struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr;
@@ -2096,7 +2133,6 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
 	if (id_priv->state != RDMA_CM_CONNECT)
 		goto out;
 
-	memset(&event, 0, sizeof event);
 	switch (iw_event->event) {
 	case IW_CM_EVENT_CLOSE:
 		event.event = RDMA_CM_EVENT_DISCONNECTED;
@@ -2156,11 +2192,17 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
 {
 	struct rdma_cm_id *new_cm_id;
 	struct rdma_id_private *listen_id, *conn_id;
-	struct rdma_cm_event event;
+	struct rdma_cm_event event = {};
 	int ret = -ECONNABORTED;
 	struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr;
 	struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr;
 
+	event.event = RDMA_CM_EVENT_CONNECT_REQUEST;
+	event.param.conn.private_data = iw_event->private_data;
+	event.param.conn.private_data_len = iw_event->private_data_len;
+	event.param.conn.initiator_depth = iw_event->ird;
+	event.param.conn.responder_resources = iw_event->ord;
+
 	listen_id = cm_id->context;
 
 	mutex_lock(&listen_id->handler_mutex);
@@ -2202,13 +2244,6 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
 	memcpy(cma_src_addr(conn_id), laddr, rdma_addr_size(laddr));
 	memcpy(cma_dst_addr(conn_id), raddr, rdma_addr_size(raddr));
 
-	memset(&event, 0, sizeof event);
-	event.event = RDMA_CM_EVENT_CONNECT_REQUEST;
-	event.param.conn.private_data = iw_event->private_data;
-	event.param.conn.private_data_len = iw_event->private_data_len;
-	event.param.conn.initiator_depth = iw_event->ird;
-	event.param.conn.responder_resources = iw_event->ord;
-
 	/*
 	 * Protect against the user destroying conn_id from another thread
 	 * until we're done accessing it.
@@ -2241,7 +2276,8 @@ static int cma_ib_listen(struct rdma_id_private *id_priv)
 
 	addr = cma_src_addr(id_priv);
 	svc_id = rdma_get_service_id(&id_priv->id, addr);
-	id = ib_cm_insert_listen(id_priv->id.device, cma_req_handler, svc_id);
+	id = ib_cm_insert_listen(id_priv->id.device,
+				 cma_ib_req_handler, svc_id);
 	if (IS_ERR(id))
 		return PTR_ERR(id);
 	id_priv->cm_id.ib = id;
@@ -2561,8 +2597,6 @@ cma_iboe_set_path_rec_l2_fields(struct rdma_id_private *id_priv)
 	route->path_rec->rec_type = sa_conv_gid_to_pathrec_type(gid_type);
 
 	route->path_rec->roce.route_resolved = true;
-	sa_path_set_ndev(route->path_rec, addr->dev_addr.net);
-	sa_path_set_ifindex(route->path_rec, ndev->ifindex);
 	sa_path_set_dmac(route->path_rec, addr->dev_addr.dst_dev_addr);
 	return ndev;
 }
@@ -2791,7 +2825,7 @@ static int cma_bind_loopback(struct rdma_id_private *id_priv)
 	p = 1;
 
 port_found:
-	ret = ib_get_cached_gid(cma_dev->device, p, 0, &gid, NULL);
+	ret = rdma_query_gid(cma_dev->device, p, 0, &gid);
 	if (ret)
 		goto out;
 
@@ -2817,9 +2851,8 @@ static void addr_handler(int status, struct sockaddr *src_addr,
 			 struct rdma_dev_addr *dev_addr, void *context)
 {
 	struct rdma_id_private *id_priv = context;
-	struct rdma_cm_event event;
+	struct rdma_cm_event event = {};
 
-	memset(&event, 0, sizeof event);
 	mutex_lock(&id_priv->handler_mutex);
 	if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY,
 			   RDMA_CM_ADDR_RESOLVED))
@@ -2910,7 +2943,7 @@ static int cma_resolve_ib_addr(struct rdma_id_private *id_priv)
 }
 
 static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
-			 struct sockaddr *dst_addr)
+			 const struct sockaddr *dst_addr)
 {
 	if (!src_addr || !src_addr->sa_family) {
 		src_addr = (struct sockaddr *) &id->route.addr.src_addr;
@@ -2931,31 +2964,25 @@ static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
 }
 
 int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
-		      struct sockaddr *dst_addr, int timeout_ms)
+		      const struct sockaddr *dst_addr, int timeout_ms)
 {
 	struct rdma_id_private *id_priv;
 	int ret;
 
 	id_priv = container_of(id, struct rdma_id_private, id);
-	memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr));
 	if (id_priv->state == RDMA_CM_IDLE) {
 		ret = cma_bind_addr(id, src_addr, dst_addr);
-		if (ret) {
-			memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr));
+		if (ret)
 			return ret;
-		}
 	}
 
-	if (cma_family(id_priv) != dst_addr->sa_family) {
-		memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr));
+	if (cma_family(id_priv) != dst_addr->sa_family)
 		return -EINVAL;
-	}
 
-	if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) {
-		memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr));
+	if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY))
 		return -EINVAL;
-	}
 
+	memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr));
 	atomic_inc(&id_priv->refcount);
 	if (cma_any_addr(dst_addr)) {
 		ret = cma_resolve_loopback(id_priv);
@@ -3451,18 +3478,18 @@ static int cma_format_hdr(void *hdr, struct rdma_id_private *id_priv)
 }
 
 static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
-				struct ib_cm_event *ib_event)
+				const struct ib_cm_event *ib_event)
 {
 	struct rdma_id_private *id_priv = cm_id->context;
-	struct rdma_cm_event event;
-	struct ib_cm_sidr_rep_event_param *rep = &ib_event->param.sidr_rep_rcvd;
+	struct rdma_cm_event event = {};
+	const struct ib_cm_sidr_rep_event_param *rep =
+				&ib_event->param.sidr_rep_rcvd;
 	int ret = 0;
 
 	mutex_lock(&id_priv->handler_mutex);
 	if (id_priv->state != RDMA_CM_CONNECT)
 		goto out;
 
-	memset(&event, 0, sizeof event);
 	switch (ib_event->event) {
 	case IB_CM_SIDR_REQ_ERROR:
 		event.event = RDMA_CM_EVENT_UNREACHABLE;
@@ -3488,7 +3515,8 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
 		ib_init_ah_attr_from_path(id_priv->id.device,
 					  id_priv->id.port_num,
 					  id_priv->id.route.path_rec,
-					  &event.param.ud.ah_attr);
+					  &event.param.ud.ah_attr,
+					  rep->sgid_attr);
 		event.param.ud.qp_num = rep->qpn;
 		event.param.ud.qkey = rep->qkey;
 		event.event = RDMA_CM_EVENT_ESTABLISHED;
@@ -3501,6 +3529,8 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
 	}
 
 	ret = id_priv->id.event_handler(&id_priv->id, &event);
+
+	rdma_destroy_ah_attr(&event.param.ud.ah_attr);
 	if (ret) {
 		/* Destroy the CM ID by returning a non-zero value. */
 		id_priv->cm_id.ib = NULL;
@@ -3557,6 +3587,7 @@ static int cma_resolve_ib_udp(struct rdma_id_private *id_priv,
 	id_priv->cm_id.ib = id;
 
 	req.path = id_priv->id.route.path_rec;
+	req.sgid_attr = id_priv->id.route.addr.dev_addr.sgid_attr;
 	req.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv));
 	req.timeout_ms = 1 << (CMA_CM_RESPONSE_TIMEOUT - 8);
 	req.max_cm_retries = CMA_MAX_CM_RETRIES;
@@ -3618,6 +3649,8 @@ static int cma_connect_ib(struct rdma_id_private *id_priv,
 	if (route->num_paths == 2)
 		req.alternate_path = &route->path_rec[1];
 
+	req.ppath_sgid_attr = id_priv->id.route.addr.dev_addr.sgid_attr;
+	/* Alternate path SGID attribute currently unsupported */
 	req.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv));
 	req.qp_num = id_priv->qp_num;
 	req.qp_type = id_priv->id.qp_type;
@@ -3928,7 +3961,7 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
 {
 	struct rdma_id_private *id_priv;
 	struct cma_multicast *mc = multicast->context;
-	struct rdma_cm_event event;
+	struct rdma_cm_event event = {};
 	int ret = 0;
 
 	id_priv = mc->id_priv;
@@ -3952,7 +3985,6 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
 	}
 	mutex_unlock(&id_priv->qp_mutex);
 
-	memset(&event, 0, sizeof event);
 	event.status = status;
 	event.param.ud.private_data = mc->context;
 	if (!status) {
@@ -3981,6 +4013,8 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
 		event.event = RDMA_CM_EVENT_MULTICAST_ERROR;
 
 	ret = id_priv->id.event_handler(&id_priv->id, &event);
+
+	rdma_destroy_ah_attr(&event.param.ud.ah_attr);
 	if (ret) {
 		cma_exch(id_priv, RDMA_CM_DESTROYING);
 		mutex_unlock(&id_priv->handler_mutex);
@@ -4010,7 +4044,7 @@ static void cma_set_mgid(struct rdma_id_private *id_priv,
 		memcpy(mgid, &sin6->sin6_addr, sizeof *mgid);
 	} else if (addr->sa_family == AF_IB) {
 		memcpy(mgid, &((struct sockaddr_ib *) addr)->sib_addr, sizeof *mgid);
-	} else if ((addr->sa_family == AF_INET6)) {
+	} else if (addr->sa_family == AF_INET6) {
 		ipv6_ib_mc_map(&sin6->sin6_addr, dev_addr->broadcast, mc_map);
 		if (id_priv->id.ps == RDMA_PS_UDP)
 			mc_map[7] = 0x01;	/* Use RDMA CM signature */
@@ -4168,8 +4202,6 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
 			if (!send_only) {
 				err = cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid,
 						    true);
-				if (!err)
-					mc->igmp_joined = true;
 			}
 		}
 	} else {
@@ -4221,26 +4253,29 @@ int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
 	memcpy(&mc->addr, addr, rdma_addr_size(addr));
 	mc->context = context;
 	mc->id_priv = id_priv;
-	mc->igmp_joined = false;
 	mc->join_state = join_state;
-	spin_lock(&id_priv->lock);
-	list_add(&mc->list, &id_priv->mc_list);
-	spin_unlock(&id_priv->lock);
 
 	if (rdma_protocol_roce(id->device, id->port_num)) {
 		kref_init(&mc->mcref);
 		ret = cma_iboe_join_multicast(id_priv, mc);
-	} else if (rdma_cap_ib_mcast(id->device, id->port_num))
+		if (ret)
+			goto out_err;
+	} else if (rdma_cap_ib_mcast(id->device, id->port_num)) {
 		ret = cma_join_ib_multicast(id_priv, mc);
-	else
+		if (ret)
+			goto out_err;
+	} else {
 		ret = -ENOSYS;
-
-	if (ret) {
-		spin_lock_irq(&id_priv->lock);
-		list_del(&mc->list);
-		spin_unlock_irq(&id_priv->lock);
-		kfree(mc);
+		goto out_err;
 	}
+
+	spin_lock(&id_priv->lock);
+	list_add(&mc->list, &id_priv->mc_list);
+	spin_unlock(&id_priv->lock);
+
+	return 0;
+out_err:
+	kfree(mc);
 	return ret;
 }
 EXPORT_SYMBOL(rdma_join_multicast);
@@ -4268,23 +4303,7 @@ void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr)
 				ib_sa_free_multicast(mc->multicast.ib);
 				kfree(mc);
 			} else if (rdma_protocol_roce(id->device, id->port_num)) {
-				if (mc->igmp_joined) {
-					struct rdma_dev_addr *dev_addr =
-						&id->route.addr.dev_addr;
-					struct net_device *ndev = NULL;
-
-					if (dev_addr->bound_dev_if)
-						ndev = dev_get_by_index(dev_addr->net,
-									dev_addr->bound_dev_if);
-					if (ndev) {
-						cma_igmp_send(ndev,
-							      &mc->multicast.ib->rec.mgid,
-							      false);
-						dev_put(ndev);
-					}
-					mc->igmp_joined = false;
-				}
-				kref_put(&mc->mcref, release_mc);
+				cma_leave_roce_mc_group(id_priv, mc);
 			}
 			return;
 		}
@@ -4410,7 +4429,7 @@ static void cma_add_one(struct ib_device *device)
 
 static int cma_remove_id_dev(struct rdma_id_private *id_priv)
 {
-	struct rdma_cm_event event;
+	struct rdma_cm_event event = {};
 	enum rdma_cm_state state;
 	int ret = 0;
 
@@ -4426,7 +4445,6 @@ static int cma_remove_id_dev(struct rdma_id_private *id_priv)
 	if (!cma_comp(id_priv, RDMA_CM_DEVICE_REMOVAL))
 		goto out;
 
-	memset(&event, 0, sizeof event);
 	event.event = RDMA_CM_EVENT_DEVICE_REMOVAL;
 	ret = id_priv->id.event_handler(&id_priv->id, &event);
 out:
diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h
index fae417a..77c7005 100644
--- a/drivers/infiniband/core/core_priv.h
+++ b/drivers/infiniband/core/core_priv.h
@@ -91,8 +91,8 @@ void ib_device_unregister_sysfs(struct ib_device *device);
 typedef void (*roce_netdev_callback)(struct ib_device *device, u8 port,
 	      struct net_device *idev, void *cookie);
 
-typedef int (*roce_netdev_filter)(struct ib_device *device, u8 port,
-	     struct net_device *idev, void *cookie);
+typedef bool (*roce_netdev_filter)(struct ib_device *device, u8 port,
+				   struct net_device *idev, void *cookie);
 
 void ib_enum_roce_netdev(struct ib_device *ib_dev,
 			 roce_netdev_filter filter,
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 6fa4c59..db3b627 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -105,8 +105,6 @@ static int ib_device_check_mandatory(struct ib_device *device)
 		IB_MANDATORY_FUNC(query_pkey),
 		IB_MANDATORY_FUNC(alloc_pd),
 		IB_MANDATORY_FUNC(dealloc_pd),
-		IB_MANDATORY_FUNC(create_ah),
-		IB_MANDATORY_FUNC(destroy_ah),
 		IB_MANDATORY_FUNC(create_qp),
 		IB_MANDATORY_FUNC(modify_qp),
 		IB_MANDATORY_FUNC(destroy_qp),
@@ -862,25 +860,6 @@ int ib_query_port(struct ib_device *device,
 EXPORT_SYMBOL(ib_query_port);
 
 /**
- * ib_query_gid - Get GID table entry
- * @device:Device to query
- * @port_num:Port number to query
- * @index:GID table index to query
- * @gid:Returned GID
- * @attr: Returned GID attributes related to this GID index (only in RoCE).
- *   NULL means ignore.
- *
- * ib_query_gid() fetches the specified GID table entry from the cache.
- */
-int ib_query_gid(struct ib_device *device,
-		 u8 port_num, int index, union ib_gid *gid,
-		 struct ib_gid_attr *attr)
-{
-	return ib_get_cached_gid(device, port_num, index, gid, attr);
-}
-EXPORT_SYMBOL(ib_query_gid);
-
-/**
  * ib_enum_roce_netdev - enumerate all RoCE ports
  * @ib_dev : IB device we want to query
  * @filter: Should we call the callback?
@@ -1057,7 +1036,7 @@ int ib_find_gid(struct ib_device *device, union ib_gid *gid,
 			continue;
 
 		for (i = 0; i < device->port_immutable[port].gid_tbl_len; ++i) {
-			ret = ib_query_gid(device, port, i, &tmp_gid, NULL);
+			ret = rdma_query_gid(device, port, i, &tmp_gid);
 			if (ret)
 				return ret;
 			if (!memcmp(&tmp_gid, gid, sizeof *gid)) {
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index f742ae7..ef459f2 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -38,6 +38,7 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/dma-mapping.h>
+#include <linux/idr.h>
 #include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/security.h>
@@ -58,8 +59,13 @@ MODULE_PARM_DESC(send_queue_size, "Size of send queue in number of work requests
 module_param_named(recv_queue_size, mad_recvq_size, int, 0444);
 MODULE_PARM_DESC(recv_queue_size, "Size of receive queue in number of work requests");
 
+/*
+ * The mlx4 driver uses the top byte to distinguish which virtual function
+ * generated the MAD, so we must avoid using it.
+ */
+#define AGENT_ID_LIMIT		(1 << 24)
+static DEFINE_IDR(ib_mad_clients);
 static struct list_head ib_mad_port_list;
-static atomic_t ib_mad_client_id = ATOMIC_INIT(0);
 
 /* Port list lock */
 static DEFINE_SPINLOCK(ib_mad_port_list_lock);
@@ -190,6 +196,8 @@ EXPORT_SYMBOL(ib_response_mad);
 
 /*
  * ib_register_mad_agent - Register to send/receive MADs
+ *
+ * Context: Process context.
  */
 struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
 					   u8 port_num,
@@ -210,7 +218,6 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
 	struct ib_mad_mgmt_vendor_class *vendor_class;
 	struct ib_mad_mgmt_method_table *method;
 	int ret2, qpn;
-	unsigned long flags;
 	u8 mgmt_class, vclass;
 
 	/* Validate parameters */
@@ -376,13 +383,24 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
 		goto error4;
 	}
 
-	spin_lock_irqsave(&port_priv->reg_lock, flags);
-	mad_agent_priv->agent.hi_tid = atomic_inc_return(&ib_mad_client_id);
+	idr_preload(GFP_KERNEL);
+	idr_lock(&ib_mad_clients);
+	ret2 = idr_alloc_cyclic(&ib_mad_clients, mad_agent_priv, 0,
+			AGENT_ID_LIMIT, GFP_ATOMIC);
+	idr_unlock(&ib_mad_clients);
+	idr_preload_end();
+
+	if (ret2 < 0) {
+		ret = ERR_PTR(ret2);
+		goto error5;
+	}
+	mad_agent_priv->agent.hi_tid = ret2;
 
 	/*
 	 * Make sure MAD registration (if supplied)
 	 * is non overlapping with any existing ones
 	 */
+	spin_lock_irq(&port_priv->reg_lock);
 	if (mad_reg_req) {
 		mgmt_class = convert_mgmt_class(mad_reg_req->mgmt_class);
 		if (!is_vendor_class(mgmt_class)) {
@@ -393,7 +411,7 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
 				if (method) {
 					if (method_in_use(&method,
 							   mad_reg_req))
-						goto error5;
+						goto error6;
 				}
 			}
 			ret2 = add_nonoui_reg_req(mad_reg_req, mad_agent_priv,
@@ -409,24 +427,25 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
 					if (is_vendor_method_in_use(
 							vendor_class,
 							mad_reg_req))
-						goto error5;
+						goto error6;
 				}
 			}
 			ret2 = add_oui_reg_req(mad_reg_req, mad_agent_priv);
 		}
 		if (ret2) {
 			ret = ERR_PTR(ret2);
-			goto error5;
+			goto error6;
 		}
 	}
-
-	/* Add mad agent into port's agent list */
-	list_add_tail(&mad_agent_priv->agent_list, &port_priv->agent_list);
-	spin_unlock_irqrestore(&port_priv->reg_lock, flags);
+	spin_unlock_irq(&port_priv->reg_lock);
 
 	return &mad_agent_priv->agent;
+error6:
+	spin_unlock_irq(&port_priv->reg_lock);
+	idr_lock(&ib_mad_clients);
+	idr_remove(&ib_mad_clients, mad_agent_priv->agent.hi_tid);
+	idr_unlock(&ib_mad_clients);
 error5:
-	spin_unlock_irqrestore(&port_priv->reg_lock, flags);
 	ib_mad_agent_security_cleanup(&mad_agent_priv->agent);
 error4:
 	kfree(reg_req);
@@ -575,7 +594,6 @@ static inline void deref_snoop_agent(struct ib_mad_snoop_private *mad_snoop_priv
 static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv)
 {
 	struct ib_mad_port_private *port_priv;
-	unsigned long flags;
 
 	/* Note that we could still be handling received MADs */
 
@@ -587,10 +605,12 @@ static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv)
 	port_priv = mad_agent_priv->qp_info->port_priv;
 	cancel_delayed_work(&mad_agent_priv->timed_work);
 
-	spin_lock_irqsave(&port_priv->reg_lock, flags);
+	spin_lock_irq(&port_priv->reg_lock);
 	remove_mad_reg_req(mad_agent_priv);
-	list_del(&mad_agent_priv->agent_list);
-	spin_unlock_irqrestore(&port_priv->reg_lock, flags);
+	spin_unlock_irq(&port_priv->reg_lock);
+	idr_lock(&ib_mad_clients);
+	idr_remove(&ib_mad_clients, mad_agent_priv->agent.hi_tid);
+	idr_unlock(&ib_mad_clients);
 
 	flush_workqueue(port_priv->wq);
 	ib_cancel_rmpp_recvs(mad_agent_priv);
@@ -601,7 +621,7 @@ static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv)
 	ib_mad_agent_security_cleanup(&mad_agent_priv->agent);
 
 	kfree(mad_agent_priv->reg_req);
-	kfree(mad_agent_priv);
+	kfree_rcu(mad_agent_priv, rcu);
 }
 
 static void unregister_mad_snoop(struct ib_mad_snoop_private *mad_snoop_priv)
@@ -625,6 +645,8 @@ static void unregister_mad_snoop(struct ib_mad_snoop_private *mad_snoop_priv)
 
 /*
  * ib_unregister_mad_agent - Unregisters a client from using MAD services
+ *
+ * Context: Process context.
  */
 void ib_unregister_mad_agent(struct ib_mad_agent *mad_agent)
 {
@@ -1159,7 +1181,6 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr)
 {
 	struct ib_mad_qp_info *qp_info;
 	struct list_head *list;
-	struct ib_send_wr *bad_send_wr;
 	struct ib_mad_agent *mad_agent;
 	struct ib_sge *sge;
 	unsigned long flags;
@@ -1197,7 +1218,7 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr)
 	spin_lock_irqsave(&qp_info->send_queue.lock, flags);
 	if (qp_info->send_queue.count < qp_info->send_queue.max_active) {
 		ret = ib_post_send(mad_agent->qp, &mad_send_wr->send_wr.wr,
-				   &bad_send_wr);
+				   NULL);
 		list = &qp_info->send_queue.list;
 	} else {
 		ret = 0;
@@ -1720,22 +1741,19 @@ find_mad_agent(struct ib_mad_port_private *port_priv,
 	struct ib_mad_agent_private *mad_agent = NULL;
 	unsigned long flags;
 
-	spin_lock_irqsave(&port_priv->reg_lock, flags);
 	if (ib_response_mad(mad_hdr)) {
 		u32 hi_tid;
-		struct ib_mad_agent_private *entry;
 
 		/*
 		 * Routing is based on high 32 bits of transaction ID
 		 * of MAD.
 		 */
 		hi_tid = be64_to_cpu(mad_hdr->tid) >> 32;
-		list_for_each_entry(entry, &port_priv->agent_list, agent_list) {
-			if (entry->agent.hi_tid == hi_tid) {
-				mad_agent = entry;
-				break;
-			}
-		}
+		rcu_read_lock();
+		mad_agent = idr_find(&ib_mad_clients, hi_tid);
+		if (mad_agent && !atomic_inc_not_zero(&mad_agent->refcount))
+			mad_agent = NULL;
+		rcu_read_unlock();
 	} else {
 		struct ib_mad_mgmt_class_table *class;
 		struct ib_mad_mgmt_method_table *method;
@@ -1744,6 +1762,7 @@ find_mad_agent(struct ib_mad_port_private *port_priv,
 		const struct ib_vendor_mad *vendor_mad;
 		int index;
 
+		spin_lock_irqsave(&port_priv->reg_lock, flags);
 		/*
 		 * Routing is based on version, class, and method
 		 * For "newer" vendor MADs, also based on OUI
@@ -1783,20 +1802,19 @@ find_mad_agent(struct ib_mad_port_private *port_priv,
 							  ~IB_MGMT_METHOD_RESP];
 			}
 		}
+		if (mad_agent)
+			atomic_inc(&mad_agent->refcount);
+out:
+		spin_unlock_irqrestore(&port_priv->reg_lock, flags);
 	}
 
-	if (mad_agent) {
-		if (mad_agent->agent.recv_handler)
-			atomic_inc(&mad_agent->refcount);
-		else {
-			dev_notice(&port_priv->device->dev,
-				   "No receive handler for client %p on port %d\n",
-				   &mad_agent->agent, port_priv->port_num);
-			mad_agent = NULL;
-		}
+	if (mad_agent && !mad_agent->agent.recv_handler) {
+		dev_notice(&port_priv->device->dev,
+			   "No receive handler for client %p on port %d\n",
+			   &mad_agent->agent, port_priv->port_num);
+		deref_mad_agent(mad_agent);
+		mad_agent = NULL;
 	}
-out:
-	spin_unlock_irqrestore(&port_priv->reg_lock, flags);
 
 	return mad_agent;
 }
@@ -1896,8 +1914,8 @@ static inline int rcv_has_same_gid(const struct ib_mad_agent_private *mad_agent_
 			const struct ib_global_route *grh =
 					rdma_ah_read_grh(&attr);
 
-			if (ib_get_cached_gid(device, port_num,
-					      grh->sgid_index, &sgid, NULL))
+			if (rdma_query_gid(device, port_num,
+					   grh->sgid_index, &sgid))
 				return 0;
 			return !memcmp(sgid.raw, rwc->recv_buf.grh->dgid.raw,
 				       16);
@@ -2457,7 +2475,6 @@ static void ib_mad_send_done(struct ib_cq *cq, struct ib_wc *wc)
 	struct ib_mad_send_wr_private	*mad_send_wr, *queued_send_wr;
 	struct ib_mad_qp_info		*qp_info;
 	struct ib_mad_queue		*send_queue;
-	struct ib_send_wr		*bad_send_wr;
 	struct ib_mad_send_wc		mad_send_wc;
 	unsigned long flags;
 	int ret;
@@ -2507,7 +2524,7 @@ static void ib_mad_send_done(struct ib_cq *cq, struct ib_wc *wc)
 
 	if (queued_send_wr) {
 		ret = ib_post_send(qp_info->qp, &queued_send_wr->send_wr.wr,
-				   &bad_send_wr);
+				   NULL);
 		if (ret) {
 			dev_err(&port_priv->device->dev,
 				"ib_post_send failed: %d\n", ret);
@@ -2552,11 +2569,9 @@ static bool ib_mad_send_error(struct ib_mad_port_private *port_priv,
 	if (wc->status == IB_WC_WR_FLUSH_ERR) {
 		if (mad_send_wr->retry) {
 			/* Repost send */
-			struct ib_send_wr *bad_send_wr;
-
 			mad_send_wr->retry = 0;
 			ret = ib_post_send(qp_info->qp, &mad_send_wr->send_wr.wr,
-					&bad_send_wr);
+					   NULL);
 			if (!ret)
 				return false;
 		}
@@ -2872,7 +2887,7 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
 	int post, ret;
 	struct ib_mad_private *mad_priv;
 	struct ib_sge sg_list;
-	struct ib_recv_wr recv_wr, *bad_recv_wr;
+	struct ib_recv_wr recv_wr;
 	struct ib_mad_queue *recv_queue = &qp_info->recv_queue;
 
 	/* Initialize common scatter list fields */
@@ -2916,7 +2931,7 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
 		post = (++recv_queue->count < recv_queue->max_active);
 		list_add_tail(&mad_priv->header.mad_list.list, &recv_queue->list);
 		spin_unlock_irqrestore(&recv_queue->lock, flags);
-		ret = ib_post_recv(qp_info->qp, &recv_wr, &bad_recv_wr);
+		ret = ib_post_recv(qp_info->qp, &recv_wr, NULL);
 		if (ret) {
 			spin_lock_irqsave(&recv_queue->lock, flags);
 			list_del(&mad_priv->header.mad_list.list);
@@ -3159,7 +3174,6 @@ static int ib_mad_port_open(struct ib_device *device,
 	port_priv->device = device;
 	port_priv->port_num = port_num;
 	spin_lock_init(&port_priv->reg_lock);
-	INIT_LIST_HEAD(&port_priv->agent_list);
 	init_mad_qp(port_priv, &port_priv->qp_info[0]);
 	init_mad_qp(port_priv, &port_priv->qp_info[1]);
 
@@ -3338,6 +3352,9 @@ int ib_mad_init(void)
 
 	INIT_LIST_HEAD(&ib_mad_port_list);
 
+	/* Client ID 0 is used for snoop-only clients */
+	idr_alloc(&ib_mad_clients, NULL, 0, 0, GFP_KERNEL);
+
 	if (ib_register_client(&mad_client)) {
 		pr_err("Couldn't register ib_mad client\n");
 		return -EINVAL;
diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h
index 28669f6..d84ae16 100644
--- a/drivers/infiniband/core/mad_priv.h
+++ b/drivers/infiniband/core/mad_priv.h
@@ -89,7 +89,6 @@ struct ib_rmpp_segment {
 };
 
 struct ib_mad_agent_private {
-	struct list_head agent_list;
 	struct ib_mad_agent agent;
 	struct ib_mad_reg_req *reg_req;
 	struct ib_mad_qp_info *qp_info;
@@ -105,7 +104,10 @@ struct ib_mad_agent_private {
 	struct list_head rmpp_list;
 
 	atomic_t refcount;
-	struct completion comp;
+	union {
+		struct completion comp;
+		struct rcu_head rcu;
+	};
 };
 
 struct ib_mad_snoop_private {
@@ -203,7 +205,6 @@ struct ib_mad_port_private {
 
 	spinlock_t reg_lock;
 	struct ib_mad_mgmt_version_table version[MAX_MGMT_VERSION];
-	struct list_head agent_list;
 	struct workqueue_struct *wq;
 	struct ib_mad_qp_info qp_info[IB_MAD_QPS_CORE];
 };
diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c
index 6c48f41..d50ff70 100644
--- a/drivers/infiniband/core/multicast.c
+++ b/drivers/infiniband/core/multicast.c
@@ -716,14 +716,28 @@ int ib_sa_get_mcmember_rec(struct ib_device *device, u8 port_num,
 }
 EXPORT_SYMBOL(ib_sa_get_mcmember_rec);
 
+/**
+ * ib_init_ah_from_mcmember - Initialize AH attribute from multicast
+ * member record and gid of the device.
+ * @device:	RDMA device
+ * @port_num:	Port of the rdma device to consider
+ * @ndev:	Optional netdevice, applicable only for RoCE
+ * @gid_type:	GID type to consider
+ * @ah_attr:	AH attribute to fillup on successful completion
+ *
+ * ib_init_ah_from_mcmember() initializes AH attribute based on multicast
+ * member record and other device properties. On success the caller is
+ * responsible to call rdma_destroy_ah_attr on the ah_attr. Returns 0 on
+ * success or appropriate error code.
+ *
+ */
 int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num,
 			     struct ib_sa_mcmember_rec *rec,
 			     struct net_device *ndev,
 			     enum ib_gid_type gid_type,
 			     struct rdma_ah_attr *ah_attr)
 {
-	int ret;
-	u16 gid_index;
+	const struct ib_gid_attr *sgid_attr;
 
 	/* GID table is not based on the netdevice for IB link layer,
 	 * so ignore ndev during search.
@@ -733,26 +747,22 @@ int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num,
 	else if (!rdma_protocol_roce(device, port_num))
 		return -EINVAL;
 
-	ret = ib_find_cached_gid_by_port(device, &rec->port_gid,
-					 gid_type, port_num,
-					 ndev,
-					 &gid_index);
-	if (ret)
-		return ret;
+	sgid_attr = rdma_find_gid_by_port(device, &rec->port_gid,
+					  gid_type, port_num, ndev);
+	if (IS_ERR(sgid_attr))
+		return PTR_ERR(sgid_attr);
 
-	memset(ah_attr, 0, sizeof *ah_attr);
+	memset(ah_attr, 0, sizeof(*ah_attr));
 	ah_attr->type = rdma_ah_find_type(device, port_num);
 
 	rdma_ah_set_dlid(ah_attr, be16_to_cpu(rec->mlid));
 	rdma_ah_set_sl(ah_attr, rec->sl);
 	rdma_ah_set_port_num(ah_attr, port_num);
 	rdma_ah_set_static_rate(ah_attr, rec->rate);
-
-	rdma_ah_set_grh(ah_attr, &rec->mgid,
-			be32_to_cpu(rec->flow_label),
-			(u8)gid_index,
-			rec->hop_limit,
-			rec->traffic_class);
+	rdma_move_grh_sgid_attr(ah_attr, &rec->mgid,
+				be32_to_cpu(rec->flow_label),
+				rec->hop_limit,	rec->traffic_class,
+				sgid_attr);
 	return 0;
 }
 EXPORT_SYMBOL(ib_init_ah_from_mcmember);
diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
index 340c7be..0385ab4 100644
--- a/drivers/infiniband/core/nldev.c
+++ b/drivers/infiniband/core/nldev.c
@@ -237,15 +237,15 @@ static int fill_port_info(struct sk_buff *msg,
 	if (ret)
 		return ret;
 
-	BUILD_BUG_ON(sizeof(attr.port_cap_flags) > sizeof(u64));
-	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS,
-			      (u64)attr.port_cap_flags, RDMA_NLDEV_ATTR_PAD))
-		return -EMSGSIZE;
-	if (rdma_protocol_ib(device, port) &&
-	    nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SUBNET_PREFIX,
-			      attr.subnet_prefix, RDMA_NLDEV_ATTR_PAD))
-		return -EMSGSIZE;
 	if (rdma_protocol_ib(device, port)) {
+		BUILD_BUG_ON(sizeof(attr.port_cap_flags) > sizeof(u64));
+		if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS,
+				      (u64)attr.port_cap_flags,
+				      RDMA_NLDEV_ATTR_PAD))
+			return -EMSGSIZE;
+		if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SUBNET_PREFIX,
+				      attr.subnet_prefix, RDMA_NLDEV_ATTR_PAD))
+			return -EMSGSIZE;
 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_LID, attr.lid))
 			return -EMSGSIZE;
 		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_SM_LID, attr.sm_lid))
diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c
index 475910f..6eb64c6 100644
--- a/drivers/infiniband/core/rdma_core.c
+++ b/drivers/infiniband/core/rdma_core.c
@@ -32,6 +32,7 @@
 
 #include <linux/file.h>
 #include <linux/anon_inodes.h>
+#include <linux/sched/mm.h>
 #include <rdma/ib_verbs.h>
 #include <rdma/uverbs_types.h>
 #include <linux/rcupdate.h>
@@ -41,51 +42,6 @@
 #include "core_priv.h"
 #include "rdma_core.h"
 
-int uverbs_ns_idx(u16 *id, unsigned int ns_count)
-{
-	int ret = (*id & UVERBS_ID_NS_MASK) >> UVERBS_ID_NS_SHIFT;
-
-	if (ret >= ns_count)
-		return -EINVAL;
-
-	*id &= ~UVERBS_ID_NS_MASK;
-	return ret;
-}
-
-const struct uverbs_object_spec *uverbs_get_object(const struct ib_device *ibdev,
-						   uint16_t object)
-{
-	const struct uverbs_root_spec *object_hash = ibdev->specs_root;
-	const struct uverbs_object_spec_hash *objects;
-	int ret = uverbs_ns_idx(&object, object_hash->num_buckets);
-
-	if (ret < 0)
-		return NULL;
-
-	objects = object_hash->object_buckets[ret];
-
-	if (object >= objects->num_objects)
-		return NULL;
-
-	return objects->objects[object];
-}
-
-const struct uverbs_method_spec *uverbs_get_method(const struct uverbs_object_spec *object,
-						   uint16_t method)
-{
-	const struct uverbs_method_spec_hash *methods;
-	int ret = uverbs_ns_idx(&method, object->num_buckets);
-
-	if (ret < 0)
-		return NULL;
-
-	methods = object->method_buckets[ret];
-	if (method >= methods->num_methods)
-		return NULL;
-
-	return methods->methods[method];
-}
-
 void uverbs_uobject_get(struct ib_uobject *uobject)
 {
 	kref_get(&uobject->ref);
@@ -96,7 +52,7 @@ static void uverbs_uobject_free(struct kref *ref)
 	struct ib_uobject *uobj =
 		container_of(ref, struct ib_uobject, ref);
 
-	if (uobj->type->type_class->needs_kfree_rcu)
+	if (uobj->uapi_object->type_class->needs_kfree_rcu)
 		kfree_rcu(uobj, rcu);
 	else
 		kfree(uobj);
@@ -107,7 +63,8 @@ void uverbs_uobject_put(struct ib_uobject *uobject)
 	kref_put(&uobject->ref, uverbs_uobject_free);
 }
 
-static int uverbs_try_lock_object(struct ib_uobject *uobj, bool exclusive)
+static int uverbs_try_lock_object(struct ib_uobject *uobj,
+				  enum rdma_lookup_mode mode)
 {
 	/*
 	 * When a shared access is required, we use a positive counter. Each
@@ -120,27 +77,211 @@ static int uverbs_try_lock_object(struct ib_uobject *uobj, bool exclusive)
 	 * concurrently, setting the counter to zero is enough for releasing
 	 * this lock.
 	 */
-	if (!exclusive)
+	switch (mode) {
+	case UVERBS_LOOKUP_READ:
 		return atomic_fetch_add_unless(&uobj->usecnt, 1, -1) == -1 ?
 			-EBUSY : 0;
-
-	/* lock is either WRITE or DESTROY - should be exclusive */
-	return atomic_cmpxchg(&uobj->usecnt, 0, -1) == 0 ? 0 : -EBUSY;
+	case UVERBS_LOOKUP_WRITE:
+		/* lock is exclusive */
+		return atomic_cmpxchg(&uobj->usecnt, 0, -1) == 0 ? 0 : -EBUSY;
+	case UVERBS_LOOKUP_DESTROY:
+		return 0;
+	}
+	return 0;
 }
 
-static struct ib_uobject *alloc_uobj(struct ib_ucontext *context,
-				     const struct uverbs_obj_type *type)
+static void assert_uverbs_usecnt(struct ib_uobject *uobj,
+				 enum rdma_lookup_mode mode)
 {
-	struct ib_uobject *uobj = kzalloc(type->obj_size, GFP_KERNEL);
+#ifdef CONFIG_LOCKDEP
+	switch (mode) {
+	case UVERBS_LOOKUP_READ:
+		WARN_ON(atomic_read(&uobj->usecnt) <= 0);
+		break;
+	case UVERBS_LOOKUP_WRITE:
+		WARN_ON(atomic_read(&uobj->usecnt) != -1);
+		break;
+	case UVERBS_LOOKUP_DESTROY:
+		break;
+	}
+#endif
+}
 
+/*
+ * This must be called with the hw_destroy_rwsem locked for read or write,
+ * also the uobject itself must be locked for write.
+ *
+ * Upon return the HW object is guaranteed to be destroyed.
+ *
+ * For RDMA_REMOVE_ABORT, the hw_destroy_rwsem is not required to be held,
+ * however the type's allocat_commit function cannot have been called and the
+ * uobject cannot be on the uobjects_lists
+ *
+ * For RDMA_REMOVE_DESTROY the caller shold be holding a kref (eg via
+ * rdma_lookup_get_uobject) and the object is left in a state where the caller
+ * needs to call rdma_lookup_put_uobject.
+ *
+ * For all other destroy modes this function internally unlocks the uobject
+ * and consumes the kref on the uobj.
+ */
+static int uverbs_destroy_uobject(struct ib_uobject *uobj,
+				  enum rdma_remove_reason reason)
+{
+	struct ib_uverbs_file *ufile = uobj->ufile;
+	unsigned long flags;
+	int ret;
+
+	lockdep_assert_held(&ufile->hw_destroy_rwsem);
+	assert_uverbs_usecnt(uobj, UVERBS_LOOKUP_WRITE);
+
+	if (uobj->object) {
+		ret = uobj->uapi_object->type_class->destroy_hw(uobj, reason);
+		if (ret) {
+			if (ib_is_destroy_retryable(ret, reason, uobj))
+				return ret;
+
+			/* Nothing to be done, dangle the memory and move on */
+			WARN(true,
+			     "ib_uverbs: failed to remove uobject id %d, driver err=%d",
+			     uobj->id, ret);
+		}
+
+		uobj->object = NULL;
+	}
+
+	if (reason == RDMA_REMOVE_ABORT) {
+		WARN_ON(!list_empty(&uobj->list));
+		WARN_ON(!uobj->context);
+		uobj->uapi_object->type_class->alloc_abort(uobj);
+	}
+
+	uobj->context = NULL;
+
+	/*
+	 * For DESTROY the usecnt is held write locked, the caller is expected
+	 * to put it unlock and put the object when done with it. Only DESTROY
+	 * can remove the IDR handle.
+	 */
+	if (reason != RDMA_REMOVE_DESTROY)
+		atomic_set(&uobj->usecnt, 0);
+	else
+		uobj->uapi_object->type_class->remove_handle(uobj);
+
+	if (!list_empty(&uobj->list)) {
+		spin_lock_irqsave(&ufile->uobjects_lock, flags);
+		list_del_init(&uobj->list);
+		spin_unlock_irqrestore(&ufile->uobjects_lock, flags);
+
+		/*
+		 * Pairs with the get in rdma_alloc_commit_uobject(), could
+		 * destroy uobj.
+		 */
+		uverbs_uobject_put(uobj);
+	}
+
+	/*
+	 * When aborting the stack kref remains owned by the core code, and is
+	 * not transferred into the type. Pairs with the get in alloc_uobj
+	 */
+	if (reason == RDMA_REMOVE_ABORT)
+		uverbs_uobject_put(uobj);
+
+	return 0;
+}
+
+/*
+ * This calls uverbs_destroy_uobject() using the RDMA_REMOVE_DESTROY
+ * sequence. It should only be used from command callbacks. On success the
+ * caller must pair this with rdma_lookup_put_uobject(LOOKUP_WRITE). This
+ * version requires the caller to have already obtained an
+ * LOOKUP_DESTROY uobject kref.
+ */
+int uobj_destroy(struct ib_uobject *uobj)
+{
+	struct ib_uverbs_file *ufile = uobj->ufile;
+	int ret;
+
+	down_read(&ufile->hw_destroy_rwsem);
+
+	ret = uverbs_try_lock_object(uobj, UVERBS_LOOKUP_WRITE);
+	if (ret)
+		goto out_unlock;
+
+	ret = uverbs_destroy_uobject(uobj, RDMA_REMOVE_DESTROY);
+	if (ret) {
+		atomic_set(&uobj->usecnt, 0);
+		goto out_unlock;
+	}
+
+out_unlock:
+	up_read(&ufile->hw_destroy_rwsem);
+	return ret;
+}
+
+/*
+ * uobj_get_destroy destroys the HW object and returns a handle to the uobj
+ * with a NULL object pointer. The caller must pair this with
+ * uverbs_put_destroy.
+ */
+struct ib_uobject *__uobj_get_destroy(const struct uverbs_api_object *obj,
+				      u32 id, struct ib_uverbs_file *ufile)
+{
+	struct ib_uobject *uobj;
+	int ret;
+
+	uobj = rdma_lookup_get_uobject(obj, ufile, id, UVERBS_LOOKUP_DESTROY);
+	if (IS_ERR(uobj))
+		return uobj;
+
+	ret = uobj_destroy(uobj);
+	if (ret) {
+		rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_DESTROY);
+		return ERR_PTR(ret);
+	}
+
+	return uobj;
+}
+
+/*
+ * Does both uobj_get_destroy() and uobj_put_destroy().  Returns success_res
+ * on success (negative errno on failure). For use by callers that do not need
+ * the uobj.
+ */
+int __uobj_perform_destroy(const struct uverbs_api_object *obj, u32 id,
+			   struct ib_uverbs_file *ufile, int success_res)
+{
+	struct ib_uobject *uobj;
+
+	uobj = __uobj_get_destroy(obj, id, ufile);
+	if (IS_ERR(uobj))
+		return PTR_ERR(uobj);
+
+	rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_WRITE);
+	return success_res;
+}
+
+/* alloc_uobj must be undone by uverbs_destroy_uobject() */
+static struct ib_uobject *alloc_uobj(struct ib_uverbs_file *ufile,
+				     const struct uverbs_api_object *obj)
+{
+	struct ib_uobject *uobj;
+	struct ib_ucontext *ucontext;
+
+	ucontext = ib_uverbs_get_ucontext(ufile);
+	if (IS_ERR(ucontext))
+		return ERR_CAST(ucontext);
+
+	uobj = kzalloc(obj->type_attrs->obj_size, GFP_KERNEL);
 	if (!uobj)
 		return ERR_PTR(-ENOMEM);
 	/*
 	 * user_handle should be filled by the handler,
 	 * The object is added to the list in the commit stage.
 	 */
-	uobj->context = context;
-	uobj->type = type;
+	uobj->ufile = ufile;
+	uobj->context = ucontext;
+	INIT_LIST_HEAD(&uobj->list);
+	uobj->uapi_object = obj;
 	/*
 	 * Allocated objects start out as write locked to deny any other
 	 * syscalls from accessing them until they are committed. See
@@ -157,45 +298,39 @@ static int idr_add_uobj(struct ib_uobject *uobj)
 	int ret;
 
 	idr_preload(GFP_KERNEL);
-	spin_lock(&uobj->context->ufile->idr_lock);
+	spin_lock(&uobj->ufile->idr_lock);
 
 	/*
 	 * We start with allocating an idr pointing to NULL. This represents an
 	 * object which isn't initialized yet. We'll replace it later on with
 	 * the real object once we commit.
 	 */
-	ret = idr_alloc(&uobj->context->ufile->idr, NULL, 0,
+	ret = idr_alloc(&uobj->ufile->idr, NULL, 0,
 			min_t(unsigned long, U32_MAX - 1, INT_MAX), GFP_NOWAIT);
 	if (ret >= 0)
 		uobj->id = ret;
 
-	spin_unlock(&uobj->context->ufile->idr_lock);
+	spin_unlock(&uobj->ufile->idr_lock);
 	idr_preload_end();
 
 	return ret < 0 ? ret : 0;
 }
 
-/*
- * It only removes it from the uobjects list, uverbs_uobject_put() is still
- * required.
- */
-static void uverbs_idr_remove_uobj(struct ib_uobject *uobj)
-{
-	spin_lock(&uobj->context->ufile->idr_lock);
-	idr_remove(&uobj->context->ufile->idr, uobj->id);
-	spin_unlock(&uobj->context->ufile->idr_lock);
-}
-
 /* Returns the ib_uobject or an error. The caller should check for IS_ERR. */
-static struct ib_uobject *lookup_get_idr_uobject(const struct uverbs_obj_type *type,
-						 struct ib_ucontext *ucontext,
-						 int id, bool exclusive)
+static struct ib_uobject *
+lookup_get_idr_uobject(const struct uverbs_api_object *obj,
+		       struct ib_uverbs_file *ufile, s64 id,
+		       enum rdma_lookup_mode mode)
 {
 	struct ib_uobject *uobj;
+	unsigned long idrno = id;
+
+	if (id < 0 || id > ULONG_MAX)
+		return ERR_PTR(-EINVAL);
 
 	rcu_read_lock();
 	/* object won't be released as we're protected in rcu */
-	uobj = idr_find(&ucontext->ufile->idr, id);
+	uobj = idr_find(&ufile->idr, idrno);
 	if (!uobj) {
 		uobj = ERR_PTR(-ENOENT);
 		goto free;
@@ -215,19 +350,28 @@ static struct ib_uobject *lookup_get_idr_uobject(const struct uverbs_obj_type *t
 	return uobj;
 }
 
-static struct ib_uobject *lookup_get_fd_uobject(const struct uverbs_obj_type *type,
-						struct ib_ucontext *ucontext,
-						int id, bool exclusive)
+static struct ib_uobject *
+lookup_get_fd_uobject(const struct uverbs_api_object *obj,
+		      struct ib_uverbs_file *ufile, s64 id,
+		      enum rdma_lookup_mode mode)
 {
+	const struct uverbs_obj_fd_type *fd_type;
 	struct file *f;
 	struct ib_uobject *uobject;
-	const struct uverbs_obj_fd_type *fd_type =
-		container_of(type, struct uverbs_obj_fd_type, type);
+	int fdno = id;
 
-	if (exclusive)
+	if (fdno != id)
+		return ERR_PTR(-EINVAL);
+
+	if (mode != UVERBS_LOOKUP_READ)
 		return ERR_PTR(-EOPNOTSUPP);
 
-	f = fget(id);
+	if (!obj->type_attrs)
+		return ERR_PTR(-EIO);
+	fd_type =
+		container_of(obj->type_attrs, struct uverbs_obj_fd_type, type);
+
+	f = fget(fdno);
 	if (!f)
 		return ERR_PTR(-EBADF);
 
@@ -246,43 +390,55 @@ static struct ib_uobject *lookup_get_fd_uobject(const struct uverbs_obj_type *ty
 	return uobject;
 }
 
-struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_obj_type *type,
-					   struct ib_ucontext *ucontext,
-					   int id, bool exclusive)
+struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_api_object *obj,
+					   struct ib_uverbs_file *ufile, s64 id,
+					   enum rdma_lookup_mode mode)
 {
 	struct ib_uobject *uobj;
 	int ret;
 
-	uobj = type->type_class->lookup_get(type, ucontext, id, exclusive);
+	if (!obj)
+		return ERR_PTR(-EINVAL);
+
+	uobj = obj->type_class->lookup_get(obj, ufile, id, mode);
 	if (IS_ERR(uobj))
 		return uobj;
 
-	if (uobj->type != type) {
+	if (uobj->uapi_object != obj) {
 		ret = -EINVAL;
 		goto free;
 	}
 
-	ret = uverbs_try_lock_object(uobj, exclusive);
-	if (ret) {
-		WARN(ucontext->cleanup_reason,
-		     "ib_uverbs: Trying to lookup_get while cleanup context\n");
+	/*
+	 * If we have been disassociated block every command except for
+	 * DESTROY based commands.
+	 */
+	if (mode != UVERBS_LOOKUP_DESTROY &&
+	    !srcu_dereference(ufile->device->ib_dev,
+			      &ufile->device->disassociate_srcu)) {
+		ret = -EIO;
 		goto free;
 	}
 
+	ret = uverbs_try_lock_object(uobj, mode);
+	if (ret)
+		goto free;
+
 	return uobj;
 free:
-	uobj->type->type_class->lookup_put(uobj, exclusive);
+	obj->type_class->lookup_put(uobj, mode);
 	uverbs_uobject_put(uobj);
 	return ERR_PTR(ret);
 }
 
-static struct ib_uobject *alloc_begin_idr_uobject(const struct uverbs_obj_type *type,
-						  struct ib_ucontext *ucontext)
+static struct ib_uobject *
+alloc_begin_idr_uobject(const struct uverbs_api_object *obj,
+			struct ib_uverbs_file *ufile)
 {
 	int ret;
 	struct ib_uobject *uobj;
 
-	uobj = alloc_uobj(ucontext, type);
+	uobj = alloc_uobj(ufile, obj);
 	if (IS_ERR(uobj))
 		return uobj;
 
@@ -290,7 +446,7 @@ static struct ib_uobject *alloc_begin_idr_uobject(const struct uverbs_obj_type *
 	if (ret)
 		goto uobj_put;
 
-	ret = ib_rdmacg_try_charge(&uobj->cg_obj, ucontext->device,
+	ret = ib_rdmacg_try_charge(&uobj->cg_obj, uobj->context->device,
 				   RDMACG_RESOURCE_HCA_OBJECT);
 	if (ret)
 		goto idr_remove;
@@ -298,304 +454,305 @@ static struct ib_uobject *alloc_begin_idr_uobject(const struct uverbs_obj_type *
 	return uobj;
 
 idr_remove:
-	uverbs_idr_remove_uobj(uobj);
+	spin_lock(&ufile->idr_lock);
+	idr_remove(&ufile->idr, uobj->id);
+	spin_unlock(&ufile->idr_lock);
 uobj_put:
 	uverbs_uobject_put(uobj);
 	return ERR_PTR(ret);
 }
 
-static struct ib_uobject *alloc_begin_fd_uobject(const struct uverbs_obj_type *type,
-						 struct ib_ucontext *ucontext)
+static struct ib_uobject *
+alloc_begin_fd_uobject(const struct uverbs_api_object *obj,
+		       struct ib_uverbs_file *ufile)
 {
-	const struct uverbs_obj_fd_type *fd_type =
-		container_of(type, struct uverbs_obj_fd_type, type);
 	int new_fd;
 	struct ib_uobject *uobj;
-	struct ib_uobject_file *uobj_file;
-	struct file *filp;
 
 	new_fd = get_unused_fd_flags(O_CLOEXEC);
 	if (new_fd < 0)
 		return ERR_PTR(new_fd);
 
-	uobj = alloc_uobj(ucontext, type);
+	uobj = alloc_uobj(ufile, obj);
 	if (IS_ERR(uobj)) {
 		put_unused_fd(new_fd);
 		return uobj;
 	}
 
-	uobj_file = container_of(uobj, struct ib_uobject_file, uobj);
-	filp = anon_inode_getfile(fd_type->name,
-				  fd_type->fops,
-				  uobj_file,
-				  fd_type->flags);
-	if (IS_ERR(filp)) {
-		put_unused_fd(new_fd);
-		uverbs_uobject_put(uobj);
-		return (void *)filp;
-	}
-
-	uobj_file->uobj.id = new_fd;
-	uobj_file->uobj.object = filp;
-	uobj_file->ufile = ucontext->ufile;
-	INIT_LIST_HEAD(&uobj->list);
-	kref_get(&uobj_file->ufile->ref);
+	uobj->id = new_fd;
+	uobj->ufile = ufile;
 
 	return uobj;
 }
 
-struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_obj_type *type,
-					    struct ib_ucontext *ucontext)
+struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_api_object *obj,
+					    struct ib_uverbs_file *ufile)
 {
-	return type->type_class->alloc_begin(type, ucontext);
-}
+	struct ib_uobject *ret;
 
-static int __must_check remove_commit_idr_uobject(struct ib_uobject *uobj,
-						  enum rdma_remove_reason why)
-{
-	const struct uverbs_obj_idr_type *idr_type =
-		container_of(uobj->type, struct uverbs_obj_idr_type,
-			     type);
-	int ret = idr_type->destroy_object(uobj, why);
+	if (!obj)
+		return ERR_PTR(-EINVAL);
 
 	/*
-	 * We can only fail gracefully if the user requested to destroy the
-	 * object. In the rest of the cases, just remove whatever you can.
+	 * The hw_destroy_rwsem is held across the entire object creation and
+	 * released during rdma_alloc_commit_uobject or
+	 * rdma_alloc_abort_uobject
 	 */
-	if (why == RDMA_REMOVE_DESTROY && ret)
-		return ret;
+	if (!down_read_trylock(&ufile->hw_destroy_rwsem))
+		return ERR_PTR(-EIO);
 
-	ib_rdmacg_uncharge(&uobj->cg_obj, uobj->context->device,
-			   RDMACG_RESOURCE_HCA_OBJECT);
-	uverbs_idr_remove_uobj(uobj);
-
-	return ret;
-}
-
-static void alloc_abort_fd_uobject(struct ib_uobject *uobj)
-{
-	struct ib_uobject_file *uobj_file =
-		container_of(uobj, struct ib_uobject_file, uobj);
-	struct file *filp = uobj->object;
-	int id = uobj_file->uobj.id;
-
-	/* Unsuccessful NEW */
-	fput(filp);
-	put_unused_fd(id);
-}
-
-static int __must_check remove_commit_fd_uobject(struct ib_uobject *uobj,
-						 enum rdma_remove_reason why)
-{
-	const struct uverbs_obj_fd_type *fd_type =
-		container_of(uobj->type, struct uverbs_obj_fd_type, type);
-	struct ib_uobject_file *uobj_file =
-		container_of(uobj, struct ib_uobject_file, uobj);
-	int ret = fd_type->context_closed(uobj_file, why);
-
-	if (why == RDMA_REMOVE_DESTROY && ret)
-		return ret;
-
-	if (why == RDMA_REMOVE_DURING_CLEANUP) {
-		alloc_abort_fd_uobject(uobj);
+	ret = obj->type_class->alloc_begin(obj, ufile);
+	if (IS_ERR(ret)) {
+		up_read(&ufile->hw_destroy_rwsem);
 		return ret;
 	}
-
-	uobj_file->uobj.context = NULL;
 	return ret;
 }
 
-static void assert_uverbs_usecnt(struct ib_uobject *uobj, bool exclusive)
-{
-#ifdef CONFIG_LOCKDEP
-	if (exclusive)
-		WARN_ON(atomic_read(&uobj->usecnt) != -1);
-	else
-		WARN_ON(atomic_read(&uobj->usecnt) <= 0);
-#endif
-}
-
-static int __must_check _rdma_remove_commit_uobject(struct ib_uobject *uobj,
-						    enum rdma_remove_reason why)
-{
-	int ret;
-	struct ib_ucontext *ucontext = uobj->context;
-
-	ret = uobj->type->type_class->remove_commit(uobj, why);
-	if (ret && why == RDMA_REMOVE_DESTROY) {
-		/* We couldn't remove the object, so just unlock the uobject */
-		atomic_set(&uobj->usecnt, 0);
-		uobj->type->type_class->lookup_put(uobj, true);
-	} else {
-		mutex_lock(&ucontext->uobjects_lock);
-		list_del(&uobj->list);
-		mutex_unlock(&ucontext->uobjects_lock);
-		/* put the ref we took when we created the object */
-		uverbs_uobject_put(uobj);
-	}
-
-	return ret;
-}
-
-/* This is called only for user requested DESTROY reasons */
-int __must_check rdma_remove_commit_uobject(struct ib_uobject *uobj)
-{
-	int ret;
-	struct ib_ucontext *ucontext = uobj->context;
-
-	/* put the ref count we took at lookup_get */
-	uverbs_uobject_put(uobj);
-	/* Cleanup is running. Calling this should have been impossible */
-	if (!down_read_trylock(&ucontext->cleanup_rwsem)) {
-		WARN(true, "ib_uverbs: Cleanup is running while removing an uobject\n");
-		return 0;
-	}
-	assert_uverbs_usecnt(uobj, true);
-	ret = _rdma_remove_commit_uobject(uobj, RDMA_REMOVE_DESTROY);
-
-	up_read(&ucontext->cleanup_rwsem);
-	return ret;
-}
-
-static int null_obj_type_class_remove_commit(struct ib_uobject *uobj,
-					     enum rdma_remove_reason why)
-{
-	return 0;
-}
-
-static const struct uverbs_obj_type null_obj_type = {
-	.type_class = &((const struct uverbs_obj_type_class){
-			.remove_commit = null_obj_type_class_remove_commit,
-			/* be cautious */
-			.needs_kfree_rcu = true}),
-};
-
-int rdma_explicit_destroy(struct ib_uobject *uobject)
-{
-	int ret;
-	struct ib_ucontext *ucontext = uobject->context;
-
-	/* Cleanup is running. Calling this should have been impossible */
-	if (!down_read_trylock(&ucontext->cleanup_rwsem)) {
-		WARN(true, "ib_uverbs: Cleanup is running while removing an uobject\n");
-		return 0;
-	}
-	assert_uverbs_usecnt(uobject, true);
-	ret = uobject->type->type_class->remove_commit(uobject,
-						       RDMA_REMOVE_DESTROY);
-	if (ret)
-		goto out;
-
-	uobject->type = &null_obj_type;
-
-out:
-	up_read(&ucontext->cleanup_rwsem);
-	return ret;
-}
-
-static void alloc_commit_idr_uobject(struct ib_uobject *uobj)
-{
-	spin_lock(&uobj->context->ufile->idr_lock);
-	/*
-	 * We already allocated this IDR with a NULL object, so
-	 * this shouldn't fail.
-	 */
-	WARN_ON(idr_replace(&uobj->context->ufile->idr,
-			    uobj, uobj->id));
-	spin_unlock(&uobj->context->ufile->idr_lock);
-}
-
-static void alloc_commit_fd_uobject(struct ib_uobject *uobj)
-{
-	struct ib_uobject_file *uobj_file =
-		container_of(uobj, struct ib_uobject_file, uobj);
-
-	fd_install(uobj_file->uobj.id, uobj->object);
-	/* This shouldn't be used anymore. Use the file object instead */
-	uobj_file->uobj.id = 0;
-	/* Get another reference as we export this to the fops */
-	uverbs_uobject_get(&uobj_file->uobj);
-}
-
-int rdma_alloc_commit_uobject(struct ib_uobject *uobj)
-{
-	/* Cleanup is running. Calling this should have been impossible */
-	if (!down_read_trylock(&uobj->context->cleanup_rwsem)) {
-		int ret;
-
-		WARN(true, "ib_uverbs: Cleanup is running while allocating an uobject\n");
-		ret = uobj->type->type_class->remove_commit(uobj,
-							    RDMA_REMOVE_DURING_CLEANUP);
-		if (ret)
-			pr_warn("ib_uverbs: cleanup of idr object %d failed\n",
-				uobj->id);
-		return ret;
-	}
-
-	/* matches atomic_set(-1) in alloc_uobj */
-	assert_uverbs_usecnt(uobj, true);
-	atomic_set(&uobj->usecnt, 0);
-
-	mutex_lock(&uobj->context->uobjects_lock);
-	list_add(&uobj->list, &uobj->context->uobjects);
-	mutex_unlock(&uobj->context->uobjects_lock);
-
-	uobj->type->type_class->alloc_commit(uobj);
-	up_read(&uobj->context->cleanup_rwsem);
-
-	return 0;
-}
-
 static void alloc_abort_idr_uobject(struct ib_uobject *uobj)
 {
-	uverbs_idr_remove_uobj(uobj);
 	ib_rdmacg_uncharge(&uobj->cg_obj, uobj->context->device,
 			   RDMACG_RESOURCE_HCA_OBJECT);
+
+	spin_lock(&uobj->ufile->idr_lock);
+	idr_remove(&uobj->ufile->idr, uobj->id);
+	spin_unlock(&uobj->ufile->idr_lock);
+}
+
+static int __must_check destroy_hw_idr_uobject(struct ib_uobject *uobj,
+					       enum rdma_remove_reason why)
+{
+	const struct uverbs_obj_idr_type *idr_type =
+		container_of(uobj->uapi_object->type_attrs,
+			     struct uverbs_obj_idr_type, type);
+	int ret = idr_type->destroy_object(uobj, why);
+
+	/*
+	 * We can only fail gracefully if the user requested to destroy the
+	 * object or when a retry may be called upon an error.
+	 * In the rest of the cases, just remove whatever you can.
+	 */
+	if (ib_is_destroy_retryable(ret, why, uobj))
+		return ret;
+
+	if (why == RDMA_REMOVE_ABORT)
+		return 0;
+
+	ib_rdmacg_uncharge(&uobj->cg_obj, uobj->context->device,
+			   RDMACG_RESOURCE_HCA_OBJECT);
+
+	return 0;
+}
+
+static void remove_handle_idr_uobject(struct ib_uobject *uobj)
+{
+	spin_lock(&uobj->ufile->idr_lock);
+	idr_remove(&uobj->ufile->idr, uobj->id);
+	spin_unlock(&uobj->ufile->idr_lock);
+	/* Matches the kref in alloc_commit_idr_uobject */
 	uverbs_uobject_put(uobj);
 }
 
+static void alloc_abort_fd_uobject(struct ib_uobject *uobj)
+{
+	put_unused_fd(uobj->id);
+}
+
+static int __must_check destroy_hw_fd_uobject(struct ib_uobject *uobj,
+					      enum rdma_remove_reason why)
+{
+	const struct uverbs_obj_fd_type *fd_type = container_of(
+		uobj->uapi_object->type_attrs, struct uverbs_obj_fd_type, type);
+	int ret = fd_type->context_closed(uobj, why);
+
+	if (ib_is_destroy_retryable(ret, why, uobj))
+		return ret;
+
+	return 0;
+}
+
+static void remove_handle_fd_uobject(struct ib_uobject *uobj)
+{
+}
+
+static int alloc_commit_idr_uobject(struct ib_uobject *uobj)
+{
+	struct ib_uverbs_file *ufile = uobj->ufile;
+
+	spin_lock(&ufile->idr_lock);
+	/*
+	 * We already allocated this IDR with a NULL object, so
+	 * this shouldn't fail.
+	 *
+	 * NOTE: Once we set the IDR we loose ownership of our kref on uobj.
+	 * It will be put by remove_commit_idr_uobject()
+	 */
+	WARN_ON(idr_replace(&ufile->idr, uobj, uobj->id));
+	spin_unlock(&ufile->idr_lock);
+
+	return 0;
+}
+
+static int alloc_commit_fd_uobject(struct ib_uobject *uobj)
+{
+	const struct uverbs_obj_fd_type *fd_type = container_of(
+		uobj->uapi_object->type_attrs, struct uverbs_obj_fd_type, type);
+	int fd = uobj->id;
+	struct file *filp;
+
+	/*
+	 * The kref for uobj is moved into filp->private data and put in
+	 * uverbs_close_fd(). Once alloc_commit() succeeds uverbs_close_fd()
+	 * must be guaranteed to be called from the provided fops release
+	 * callback.
+	 */
+	filp = anon_inode_getfile(fd_type->name,
+				  fd_type->fops,
+				  uobj,
+				  fd_type->flags);
+	if (IS_ERR(filp))
+		return PTR_ERR(filp);
+
+	uobj->object = filp;
+
+	/* Matching put will be done in uverbs_close_fd() */
+	kref_get(&uobj->ufile->ref);
+
+	/* This shouldn't be used anymore. Use the file object instead */
+	uobj->id = 0;
+
+	/*
+	 * NOTE: Once we install the file we loose ownership of our kref on
+	 * uobj. It will be put by uverbs_close_fd()
+	 */
+	fd_install(fd, filp);
+
+	return 0;
+}
+
+/*
+ * In all cases rdma_alloc_commit_uobject() consumes the kref to uobj and the
+ * caller can no longer assume uobj is valid. If this function fails it
+ * destroys the uboject, including the attached HW object.
+ */
+int __must_check rdma_alloc_commit_uobject(struct ib_uobject *uobj)
+{
+	struct ib_uverbs_file *ufile = uobj->ufile;
+	int ret;
+
+	/* alloc_commit consumes the uobj kref */
+	ret = uobj->uapi_object->type_class->alloc_commit(uobj);
+	if (ret) {
+		uverbs_destroy_uobject(uobj, RDMA_REMOVE_ABORT);
+		up_read(&ufile->hw_destroy_rwsem);
+		return ret;
+	}
+
+	/* kref is held so long as the uobj is on the uobj list. */
+	uverbs_uobject_get(uobj);
+	spin_lock_irq(&ufile->uobjects_lock);
+	list_add(&uobj->list, &ufile->uobjects);
+	spin_unlock_irq(&ufile->uobjects_lock);
+
+	/* matches atomic_set(-1) in alloc_uobj */
+	atomic_set(&uobj->usecnt, 0);
+
+	/* Matches the down_read in rdma_alloc_begin_uobject */
+	up_read(&ufile->hw_destroy_rwsem);
+
+	return 0;
+}
+
+/*
+ * This consumes the kref for uobj. It is up to the caller to unwind the HW
+ * object and anything else connected to uobj before calling this.
+ */
 void rdma_alloc_abort_uobject(struct ib_uobject *uobj)
 {
-	uobj->type->type_class->alloc_abort(uobj);
+	struct ib_uverbs_file *ufile = uobj->ufile;
+
+	uobj->object = NULL;
+	uverbs_destroy_uobject(uobj, RDMA_REMOVE_ABORT);
+
+	/* Matches the down_read in rdma_alloc_begin_uobject */
+	up_read(&ufile->hw_destroy_rwsem);
 }
 
-static void lookup_put_idr_uobject(struct ib_uobject *uobj, bool exclusive)
+static void lookup_put_idr_uobject(struct ib_uobject *uobj,
+				   enum rdma_lookup_mode mode)
 {
 }
 
-static void lookup_put_fd_uobject(struct ib_uobject *uobj, bool exclusive)
+static void lookup_put_fd_uobject(struct ib_uobject *uobj,
+				  enum rdma_lookup_mode mode)
 {
 	struct file *filp = uobj->object;
 
-	WARN_ON(exclusive);
+	WARN_ON(mode != UVERBS_LOOKUP_READ);
 	/* This indirectly calls uverbs_close_fd and free the object */
 	fput(filp);
 }
 
-void rdma_lookup_put_uobject(struct ib_uobject *uobj, bool exclusive)
+void rdma_lookup_put_uobject(struct ib_uobject *uobj,
+			     enum rdma_lookup_mode mode)
 {
-	assert_uverbs_usecnt(uobj, exclusive);
-	uobj->type->type_class->lookup_put(uobj, exclusive);
+	assert_uverbs_usecnt(uobj, mode);
+	uobj->uapi_object->type_class->lookup_put(uobj, mode);
 	/*
 	 * In order to unlock an object, either decrease its usecnt for
 	 * read access or zero it in case of exclusive access. See
 	 * uverbs_try_lock_object for locking schema information.
 	 */
-	if (!exclusive)
+	switch (mode) {
+	case UVERBS_LOOKUP_READ:
 		atomic_dec(&uobj->usecnt);
-	else
+		break;
+	case UVERBS_LOOKUP_WRITE:
 		atomic_set(&uobj->usecnt, 0);
+		break;
+	case UVERBS_LOOKUP_DESTROY:
+		break;
+	}
 
+	/* Pairs with the kref obtained by type->lookup_get */
 	uverbs_uobject_put(uobj);
 }
 
+void setup_ufile_idr_uobject(struct ib_uverbs_file *ufile)
+{
+	spin_lock_init(&ufile->idr_lock);
+	idr_init(&ufile->idr);
+}
+
+void release_ufile_idr_uobject(struct ib_uverbs_file *ufile)
+{
+	struct ib_uobject *entry;
+	int id;
+
+	/*
+	 * At this point uverbs_cleanup_ufile() is guaranteed to have run, and
+	 * there are no HW objects left, however the IDR is still populated
+	 * with anything that has not been cleaned up by userspace. Since the
+	 * kref on ufile is 0, nothing is allowed to call lookup_get.
+	 *
+	 * This is an optimized equivalent to remove_handle_idr_uobject
+	 */
+	idr_for_each_entry(&ufile->idr, entry, id) {
+		WARN_ON(entry->object);
+		uverbs_uobject_put(entry);
+	}
+
+	idr_destroy(&ufile->idr);
+}
+
 const struct uverbs_obj_type_class uverbs_idr_class = {
 	.alloc_begin = alloc_begin_idr_uobject,
 	.lookup_get = lookup_get_idr_uobject,
 	.alloc_commit = alloc_commit_idr_uobject,
 	.alloc_abort = alloc_abort_idr_uobject,
 	.lookup_put = lookup_put_idr_uobject,
-	.remove_commit = remove_commit_idr_uobject,
+	.destroy_hw = destroy_hw_idr_uobject,
+	.remove_handle = remove_handle_idr_uobject,
 	/*
 	 * When we destroy an object, we first just lock it for WRITE and
 	 * actually DESTROY it in the finalize stage. So, the problematic
@@ -611,103 +768,180 @@ const struct uverbs_obj_type_class uverbs_idr_class = {
 	 */
 	.needs_kfree_rcu = true,
 };
-
-static void _uverbs_close_fd(struct ib_uobject_file *uobj_file)
-{
-	struct ib_ucontext *ucontext;
-	struct ib_uverbs_file *ufile = uobj_file->ufile;
-	int ret;
-
-	mutex_lock(&uobj_file->ufile->cleanup_mutex);
-
-	/* uobject was either already cleaned up or is cleaned up right now anyway */
-	if (!uobj_file->uobj.context ||
-	    !down_read_trylock(&uobj_file->uobj.context->cleanup_rwsem))
-		goto unlock;
-
-	ucontext = uobj_file->uobj.context;
-	ret = _rdma_remove_commit_uobject(&uobj_file->uobj, RDMA_REMOVE_CLOSE);
-	up_read(&ucontext->cleanup_rwsem);
-	if (ret)
-		pr_warn("uverbs: unable to clean up uobject file in uverbs_close_fd.\n");
-unlock:
-	mutex_unlock(&ufile->cleanup_mutex);
-}
+EXPORT_SYMBOL(uverbs_idr_class);
 
 void uverbs_close_fd(struct file *f)
 {
-	struct ib_uobject_file *uobj_file = f->private_data;
-	struct kref *uverbs_file_ref = &uobj_file->ufile->ref;
+	struct ib_uobject *uobj = f->private_data;
+	struct ib_uverbs_file *ufile = uobj->ufile;
 
-	_uverbs_close_fd(uobj_file);
-	uverbs_uobject_put(&uobj_file->uobj);
-	kref_put(uverbs_file_ref, ib_uverbs_release_file);
+	if (down_read_trylock(&ufile->hw_destroy_rwsem)) {
+		/*
+		 * lookup_get_fd_uobject holds the kref on the struct file any
+		 * time a FD uobj is locked, which prevents this release
+		 * method from being invoked. Meaning we can always get the
+		 * write lock here, or we have a kernel bug.
+		 */
+		WARN_ON(uverbs_try_lock_object(uobj, UVERBS_LOOKUP_WRITE));
+		uverbs_destroy_uobject(uobj, RDMA_REMOVE_CLOSE);
+		up_read(&ufile->hw_destroy_rwsem);
+	}
+
+	/* Matches the get in alloc_begin_fd_uobject */
+	kref_put(&ufile->ref, ib_uverbs_release_file);
+
+	/* Pairs with filp->private_data in alloc_begin_fd_uobject */
+	uverbs_uobject_put(uobj);
 }
 
-void uverbs_cleanup_ucontext(struct ib_ucontext *ucontext, bool device_removed)
+static void ufile_disassociate_ucontext(struct ib_ucontext *ibcontext)
 {
-	enum rdma_remove_reason reason = device_removed ?
-		RDMA_REMOVE_DRIVER_REMOVE : RDMA_REMOVE_CLOSE;
-	unsigned int cur_order = 0;
+	struct ib_device *ib_dev = ibcontext->device;
+	struct task_struct *owning_process  = NULL;
+	struct mm_struct   *owning_mm       = NULL;
 
-	ucontext->cleanup_reason = reason;
-	/*
-	 * Waits for all remove_commit and alloc_commit to finish. Logically, We
-	 * want to hold this forever as the context is going to be destroyed,
-	 * but we'll release it since it causes a "held lock freed" BUG message.
-	 */
-	down_write(&ucontext->cleanup_rwsem);
+	owning_process = get_pid_task(ibcontext->tgid, PIDTYPE_PID);
+	if (!owning_process)
+		return;
 
-	while (!list_empty(&ucontext->uobjects)) {
-		struct ib_uobject *obj, *next_obj;
-		unsigned int next_order = UINT_MAX;
-
-		/*
-		 * This shouldn't run while executing other commands on this
-		 * context. Thus, the only thing we should take care of is
-		 * releasing a FD while traversing this list. The FD could be
-		 * closed and released from the _release fop of this FD.
-		 * In order to mitigate this, we add a lock.
-		 * We take and release the lock per order traversal in order
-		 * to let other threads (which might still use the FDs) chance
-		 * to run.
-		 */
-		mutex_lock(&ucontext->uobjects_lock);
-		list_for_each_entry_safe(obj, next_obj, &ucontext->uobjects,
-					 list) {
-			if (obj->type->destroy_order == cur_order) {
-				int ret;
-
-				/*
-				 * if we hit this WARN_ON, that means we are
-				 * racing with a lookup_get.
+	owning_mm = get_task_mm(owning_process);
+	if (!owning_mm) {
+		pr_info("no mm, disassociate ucontext is pending task termination\n");
+		while (1) {
+			put_task_struct(owning_process);
+			usleep_range(1000, 2000);
+			owning_process = get_pid_task(ibcontext->tgid,
+						      PIDTYPE_PID);
+			if (!owning_process ||
+			    owning_process->state == TASK_DEAD) {
+				pr_info("disassociate ucontext done, task was terminated\n");
+				/* in case task was dead need to release the
+				 * task struct.
 				 */
-				WARN_ON(uverbs_try_lock_object(obj, true));
-				ret = obj->type->type_class->remove_commit(obj,
-									   reason);
-				list_del(&obj->list);
-				if (ret)
-					pr_warn("ib_uverbs: failed to remove uobject id %d order %u\n",
-						obj->id, cur_order);
-				/* put the ref we took when we created the object */
-				uverbs_uobject_put(obj);
-			} else {
-				next_order = min(next_order,
-						 obj->type->destroy_order);
+				if (owning_process)
+					put_task_struct(owning_process);
+				return;
 			}
 		}
-		mutex_unlock(&ucontext->uobjects_lock);
-		cur_order = next_order;
 	}
-	up_write(&ucontext->cleanup_rwsem);
+
+	down_write(&owning_mm->mmap_sem);
+	ib_dev->disassociate_ucontext(ibcontext);
+	up_write(&owning_mm->mmap_sem);
+	mmput(owning_mm);
+	put_task_struct(owning_process);
 }
 
-void uverbs_initialize_ucontext(struct ib_ucontext *ucontext)
+/*
+ * Drop the ucontext off the ufile and completely disconnect it from the
+ * ib_device
+ */
+static void ufile_destroy_ucontext(struct ib_uverbs_file *ufile,
+				   enum rdma_remove_reason reason)
 {
-	ucontext->cleanup_reason = 0;
-	mutex_init(&ucontext->uobjects_lock);
-	INIT_LIST_HEAD(&ucontext->uobjects);
-	init_rwsem(&ucontext->cleanup_rwsem);
+	struct ib_ucontext *ucontext = ufile->ucontext;
+	int ret;
+
+	if (reason == RDMA_REMOVE_DRIVER_REMOVE)
+		ufile_disassociate_ucontext(ucontext);
+
+	put_pid(ucontext->tgid);
+	ib_rdmacg_uncharge(&ucontext->cg_obj, ucontext->device,
+			   RDMACG_RESOURCE_HCA_HANDLE);
+
+	/*
+	 * FIXME: Drivers are not permitted to fail dealloc_ucontext, remove
+	 * the error return.
+	 */
+	ret = ucontext->device->dealloc_ucontext(ucontext);
+	WARN_ON(ret);
+
+	ufile->ucontext = NULL;
+}
+
+static int __uverbs_cleanup_ufile(struct ib_uverbs_file *ufile,
+				  enum rdma_remove_reason reason)
+{
+	struct ib_uobject *obj, *next_obj;
+	int ret = -EINVAL;
+
+	/*
+	 * This shouldn't run while executing other commands on this
+	 * context. Thus, the only thing we should take care of is
+	 * releasing a FD while traversing this list. The FD could be
+	 * closed and released from the _release fop of this FD.
+	 * In order to mitigate this, we add a lock.
+	 * We take and release the lock per traversal in order to let
+	 * other threads (which might still use the FDs) chance to run.
+	 */
+	list_for_each_entry_safe(obj, next_obj, &ufile->uobjects, list) {
+		/*
+		 * if we hit this WARN_ON, that means we are
+		 * racing with a lookup_get.
+		 */
+		WARN_ON(uverbs_try_lock_object(obj, UVERBS_LOOKUP_WRITE));
+		if (!uverbs_destroy_uobject(obj, reason))
+			ret = 0;
+	}
+	return ret;
+}
+
+/*
+ * Destroy the uncontext and every uobject associated with it. If called with
+ * reason != RDMA_REMOVE_CLOSE this will not return until the destruction has
+ * been completed and ufile->ucontext is NULL.
+ *
+ * This is internally locked and can be called in parallel from multiple
+ * contexts.
+ */
+void uverbs_destroy_ufile_hw(struct ib_uverbs_file *ufile,
+			     enum rdma_remove_reason reason)
+{
+	if (reason == RDMA_REMOVE_CLOSE) {
+		/*
+		 * During destruction we might trigger something that
+		 * synchronously calls release on any file descriptor. For
+		 * this reason all paths that come from file_operations
+		 * release must use try_lock. They can progress knowing that
+		 * there is an ongoing uverbs_destroy_ufile_hw that will clean
+		 * up the driver resources.
+		 */
+		if (!mutex_trylock(&ufile->ucontext_lock))
+			return;
+
+	} else {
+		mutex_lock(&ufile->ucontext_lock);
+	}
+
+	down_write(&ufile->hw_destroy_rwsem);
+
+	/*
+	 * If a ucontext was never created then we can't have any uobjects to
+	 * cleanup, nothing to do.
+	 */
+	if (!ufile->ucontext)
+		goto done;
+
+	ufile->ucontext->closing = true;
+	ufile->ucontext->cleanup_retryable = true;
+	while (!list_empty(&ufile->uobjects))
+		if (__uverbs_cleanup_ufile(ufile, reason)) {
+			/*
+			 * No entry was cleaned-up successfully during this
+			 * iteration
+			 */
+			break;
+		}
+
+	ufile->ucontext->cleanup_retryable = false;
+	if (!list_empty(&ufile->uobjects))
+		__uverbs_cleanup_ufile(ufile, reason);
+
+	ufile_destroy_ucontext(ufile, reason);
+
+done:
+	up_write(&ufile->hw_destroy_rwsem);
+	mutex_unlock(&ufile->ucontext_lock);
 }
 
 const struct uverbs_obj_type_class uverbs_fd_class = {
@@ -716,23 +950,33 @@ const struct uverbs_obj_type_class uverbs_fd_class = {
 	.alloc_commit = alloc_commit_fd_uobject,
 	.alloc_abort = alloc_abort_fd_uobject,
 	.lookup_put = lookup_put_fd_uobject,
-	.remove_commit = remove_commit_fd_uobject,
+	.destroy_hw = destroy_hw_fd_uobject,
+	.remove_handle = remove_handle_fd_uobject,
 	.needs_kfree_rcu = false,
 };
+EXPORT_SYMBOL(uverbs_fd_class);
 
-struct ib_uobject *uverbs_get_uobject_from_context(const struct uverbs_obj_type *type_attrs,
-						   struct ib_ucontext *ucontext,
-						   enum uverbs_obj_access access,
-						   int id)
+struct ib_uobject *
+uverbs_get_uobject_from_file(u16 object_id,
+			     struct ib_uverbs_file *ufile,
+			     enum uverbs_obj_access access, s64 id)
 {
+	const struct uverbs_api_object *obj =
+		uapi_get_object(ufile->device->uapi, object_id);
+
 	switch (access) {
 	case UVERBS_ACCESS_READ:
-		return rdma_lookup_get_uobject(type_attrs, ucontext, id, false);
+		return rdma_lookup_get_uobject(obj, ufile, id,
+					       UVERBS_LOOKUP_READ);
 	case UVERBS_ACCESS_DESTROY:
+		/* Actual destruction is done inside uverbs_handle_method */
+		return rdma_lookup_get_uobject(obj, ufile, id,
+					       UVERBS_LOOKUP_DESTROY);
 	case UVERBS_ACCESS_WRITE:
-		return rdma_lookup_get_uobject(type_attrs, ucontext, id, true);
+		return rdma_lookup_get_uobject(obj, ufile, id,
+					       UVERBS_LOOKUP_WRITE);
 	case UVERBS_ACCESS_NEW:
-		return rdma_alloc_begin_uobject(type_attrs, ucontext);
+		return rdma_alloc_begin_uobject(obj, ufile);
 	default:
 		WARN_ON(true);
 		return ERR_PTR(-EOPNOTSUPP);
@@ -753,16 +997,14 @@ int uverbs_finalize_object(struct ib_uobject *uobj,
 
 	switch (access) {
 	case UVERBS_ACCESS_READ:
-		rdma_lookup_put_uobject(uobj, false);
+		rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_READ);
 		break;
 	case UVERBS_ACCESS_WRITE:
-		rdma_lookup_put_uobject(uobj, true);
+		rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_WRITE);
 		break;
 	case UVERBS_ACCESS_DESTROY:
-		if (commit)
-			ret = rdma_remove_commit_uobject(uobj);
-		else
-			rdma_lookup_put_uobject(uobj, true);
+		if (uobj)
+			rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_DESTROY);
 		break;
 	case UVERBS_ACCESS_NEW:
 		if (commit)
@@ -777,43 +1019,3 @@ int uverbs_finalize_object(struct ib_uobject *uobj,
 
 	return ret;
 }
-
-int uverbs_finalize_objects(struct uverbs_attr_bundle *attrs_bundle,
-			    struct uverbs_attr_spec_hash * const *spec_hash,
-			    size_t num,
-			    bool commit)
-{
-	unsigned int i;
-	int ret = 0;
-
-	for (i = 0; i < num; i++) {
-		struct uverbs_attr_bundle_hash *curr_bundle =
-			&attrs_bundle->hash[i];
-		const struct uverbs_attr_spec_hash *curr_spec_bucket =
-			spec_hash[i];
-		unsigned int j;
-
-		for (j = 0; j < curr_bundle->num_attrs; j++) {
-			struct uverbs_attr *attr;
-			const struct uverbs_attr_spec *spec;
-
-			if (!uverbs_attr_is_valid_in_hash(curr_bundle, j))
-				continue;
-
-			attr = &curr_bundle->attrs[j];
-			spec = &curr_spec_bucket->attrs[j];
-
-			if (spec->type == UVERBS_ATTR_TYPE_IDR ||
-			    spec->type == UVERBS_ATTR_TYPE_FD) {
-				int current_ret;
-
-				current_ret = uverbs_finalize_object(attr->obj_attr.uobject,
-								     spec->obj.access,
-								     commit);
-				if (!ret)
-					ret = current_ret;
-			}
-		}
-	}
-	return ret;
-}
diff --git a/drivers/infiniband/core/rdma_core.h b/drivers/infiniband/core/rdma_core.h
index 1efcf93..f962f2a 100644
--- a/drivers/infiniband/core/rdma_core.h
+++ b/drivers/infiniband/core/rdma_core.h
@@ -43,20 +43,12 @@
 #include <rdma/ib_verbs.h>
 #include <linux/mutex.h>
 
-int uverbs_ns_idx(u16 *id, unsigned int ns_count);
-const struct uverbs_object_spec *uverbs_get_object(const struct ib_device *ibdev,
-						   uint16_t object);
-const struct uverbs_method_spec *uverbs_get_method(const struct uverbs_object_spec *object,
-						   uint16_t method);
-/*
- * These functions initialize the context and cleanups its uobjects.
- * The context has a list of objects which is protected by a mutex
- * on the context. initialize_ucontext should be called when we create
- * a context.
- * cleanup_ucontext removes all uobjects from the context and puts them.
- */
-void uverbs_cleanup_ucontext(struct ib_ucontext *ucontext, bool device_removed);
-void uverbs_initialize_ucontext(struct ib_ucontext *ucontext);
+struct ib_uverbs_device;
+
+void uverbs_destroy_ufile_hw(struct ib_uverbs_file *ufile,
+			     enum rdma_remove_reason reason);
+
+int uobj_destroy(struct ib_uobject *uobj);
 
 /*
  * uverbs_uobject_get is called in order to increase the reference count on
@@ -82,7 +74,7 @@ void uverbs_uobject_put(struct ib_uobject *uobject);
 void uverbs_close_fd(struct file *f);
 
 /*
- * Get an ib_uobject that corresponds to the given id from ucontext, assuming
+ * Get an ib_uobject that corresponds to the given id from ufile, assuming
  * the object is from the given type. Lock it to the required access when
  * applicable.
  * This function could create (access == NEW), destroy (access == DESTROY)
@@ -90,13 +82,11 @@ void uverbs_close_fd(struct file *f);
  * The action will be finalized only when uverbs_finalize_object or
  * uverbs_finalize_objects are called.
  */
-struct ib_uobject *uverbs_get_uobject_from_context(const struct uverbs_obj_type *type_attrs,
-						   struct ib_ucontext *ucontext,
-						   enum uverbs_obj_access access,
-						   int id);
-int uverbs_finalize_object(struct ib_uobject *uobj,
-			   enum uverbs_obj_access access,
-			   bool commit);
+struct ib_uobject *
+uverbs_get_uobject_from_file(u16 object_id,
+			     struct ib_uverbs_file *ufile,
+			     enum uverbs_obj_access access, s64 id);
+
 /*
  * Note that certain finalize stages could return a status:
  *   (a) alloc_commit could return a failure if the object is committed at the
@@ -112,9 +102,63 @@ int uverbs_finalize_object(struct ib_uobject *uobj,
  * function. For example, this could happen when we couldn't destroy an
  * object.
  */
-int uverbs_finalize_objects(struct uverbs_attr_bundle *attrs_bundle,
-			    struct uverbs_attr_spec_hash * const *spec_hash,
-			    size_t num,
-			    bool commit);
+int uverbs_finalize_object(struct ib_uobject *uobj,
+			   enum uverbs_obj_access access,
+			   bool commit);
+
+void setup_ufile_idr_uobject(struct ib_uverbs_file *ufile);
+void release_ufile_idr_uobject(struct ib_uverbs_file *ufile);
+
+/*
+ * This is the runtime description of the uverbs API, used by the syscall
+ * machinery to validate and dispatch calls.
+ */
+
+/*
+ * Depending on ID the slot pointer in the radix tree points at one of these
+ * structs.
+ */
+struct uverbs_api_object {
+	const struct uverbs_obj_type *type_attrs;
+	const struct uverbs_obj_type_class *type_class;
+};
+
+struct uverbs_api_ioctl_method {
+	int (__rcu *handler)(struct ib_uverbs_file *ufile,
+			     struct uverbs_attr_bundle *ctx);
+	DECLARE_BITMAP(attr_mandatory, UVERBS_API_ATTR_BKEY_LEN);
+	u16 bundle_size;
+	u8 use_stack:1;
+	u8 driver_method:1;
+	u8 key_bitmap_len;
+	u8 destroy_bkey;
+};
+
+struct uverbs_api_attr {
+	struct uverbs_attr_spec spec;
+};
+
+struct uverbs_api_object;
+struct uverbs_api {
+	/* radix tree contains struct uverbs_api_* pointers */
+	struct radix_tree_root radix;
+	enum rdma_driver_id driver_id;
+};
+
+static inline const struct uverbs_api_object *
+uapi_get_object(struct uverbs_api *uapi, u16 object_id)
+{
+	return radix_tree_lookup(&uapi->radix, uapi_key_obj(object_id));
+}
+
+char *uapi_key_format(char *S, unsigned int key);
+struct uverbs_api *uverbs_alloc_api(
+	const struct uverbs_object_tree_def *const *driver_specs,
+	enum rdma_driver_id driver_id);
+void uverbs_disassociate_api_pre(struct ib_uverbs_device *uverbs_dev);
+void uverbs_disassociate_api(struct uverbs_api *uapi);
+void uverbs_destroy_api(struct uverbs_api *uapi);
+void uapi_compute_bundle_size(struct uverbs_api_ioctl_method *method_elm,
+			      unsigned int num_attrs);
 
 #endif /* RDMA_CORE_H */
diff --git a/drivers/infiniband/core/roce_gid_mgmt.c b/drivers/infiniband/core/roce_gid_mgmt.c
index a4fbdc5..ee36619 100644
--- a/drivers/infiniband/core/roce_gid_mgmt.c
+++ b/drivers/infiniband/core/roce_gid_mgmt.c
@@ -143,14 +143,15 @@ static enum bonding_slave_state is_eth_active_slave_of_bonding_rcu(struct net_de
 
 #define REQUIRED_BOND_STATES		(BONDING_SLAVE_STATE_ACTIVE |	\
 					 BONDING_SLAVE_STATE_NA)
-static int is_eth_port_of_netdev(struct ib_device *ib_dev, u8 port,
-				 struct net_device *rdma_ndev, void *cookie)
+static bool
+is_eth_port_of_netdev_filter(struct ib_device *ib_dev, u8 port,
+			     struct net_device *rdma_ndev, void *cookie)
 {
 	struct net_device *real_dev;
-	int res;
+	bool res;
 
 	if (!rdma_ndev)
-		return 0;
+		return false;
 
 	rcu_read_lock();
 	real_dev = rdma_vlan_dev_real_dev(cookie);
@@ -166,14 +167,15 @@ static int is_eth_port_of_netdev(struct ib_device *ib_dev, u8 port,
 	return res;
 }
 
-static int is_eth_port_inactive_slave(struct ib_device *ib_dev, u8 port,
-				      struct net_device *rdma_ndev, void *cookie)
+static bool
+is_eth_port_inactive_slave_filter(struct ib_device *ib_dev, u8 port,
+				  struct net_device *rdma_ndev, void *cookie)
 {
 	struct net_device *master_dev;
-	int res;
+	bool res;
 
 	if (!rdma_ndev)
-		return 0;
+		return false;
 
 	rcu_read_lock();
 	master_dev = netdev_master_upper_dev_get_rcu(rdma_ndev);
@@ -184,22 +186,59 @@ static int is_eth_port_inactive_slave(struct ib_device *ib_dev, u8 port,
 	return res;
 }
 
-static int pass_all_filter(struct ib_device *ib_dev, u8 port,
-			   struct net_device *rdma_ndev, void *cookie)
-{
-	return 1;
-}
-
-static int upper_device_filter(struct ib_device *ib_dev, u8 port,
+/** is_ndev_for_default_gid_filter - Check if a given netdevice
+ * can be considered for default GIDs or not.
+ * @ib_dev:		IB device to check
+ * @port:		Port to consider for adding default GID
+ * @rdma_ndev:		rdma netdevice pointer
+ * @cookie_ndev:	Netdevice to consider to form a default GID
+ *
+ * is_ndev_for_default_gid_filter() returns true if a given netdevice can be
+ * considered for deriving default RoCE GID, returns false otherwise.
+ */
+static bool
+is_ndev_for_default_gid_filter(struct ib_device *ib_dev, u8 port,
 			       struct net_device *rdma_ndev, void *cookie)
 {
-	int res;
+	struct net_device *cookie_ndev = cookie;
+	bool res;
 
 	if (!rdma_ndev)
-		return 0;
+		return false;
+
+	rcu_read_lock();
+
+	/*
+	 * When rdma netdevice is used in bonding, bonding master netdevice
+	 * should be considered for default GIDs. Therefore, ignore slave rdma
+	 * netdevices when bonding is considered.
+	 * Additionally when event(cookie) netdevice is bond master device,
+	 * make sure that it the upper netdevice of rdma netdevice.
+	 */
+	res = ((cookie_ndev == rdma_ndev && !netif_is_bond_slave(rdma_ndev)) ||
+	       (netif_is_bond_master(cookie_ndev) &&
+		rdma_is_upper_dev_rcu(rdma_ndev, cookie_ndev)));
+
+	rcu_read_unlock();
+	return res;
+}
+
+static bool pass_all_filter(struct ib_device *ib_dev, u8 port,
+			    struct net_device *rdma_ndev, void *cookie)
+{
+	return true;
+}
+
+static bool upper_device_filter(struct ib_device *ib_dev, u8 port,
+				struct net_device *rdma_ndev, void *cookie)
+{
+	bool res;
+
+	if (!rdma_ndev)
+		return false;
 
 	if (rdma_ndev == cookie)
-		return 1;
+		return true;
 
 	rcu_read_lock();
 	res = rdma_is_upper_dev_rcu(rdma_ndev, cookie);
@@ -208,6 +247,34 @@ static int upper_device_filter(struct ib_device *ib_dev, u8 port,
 	return res;
 }
 
+/**
+ * is_upper_ndev_bond_master_filter - Check if a given netdevice
+ * is bond master device of netdevice of the the RDMA device of port.
+ * @ib_dev:		IB device to check
+ * @port:		Port to consider for adding default GID
+ * @rdma_ndev:		Pointer to rdma netdevice
+ * @cookie:	        Netdevice to consider to form a default GID
+ *
+ * is_upper_ndev_bond_master_filter() returns true if a cookie_netdev
+ * is bond master device and rdma_ndev is its lower netdevice. It might
+ * not have been established as slave device yet.
+ */
+static bool
+is_upper_ndev_bond_master_filter(struct ib_device *ib_dev, u8 port,
+				 struct net_device *rdma_ndev,
+				 void *cookie)
+{
+	struct net_device *cookie_ndev = cookie;
+	bool match = false;
+
+	rcu_read_lock();
+	if (netif_is_bond_master(cookie_ndev) &&
+	    rdma_is_upper_dev_rcu(rdma_ndev, cookie_ndev))
+		match = true;
+	rcu_read_unlock();
+	return match;
+}
+
 static void update_gid_ip(enum gid_op_type gid_op,
 			  struct ib_device *ib_dev,
 			  u8 port, struct net_device *ndev,
@@ -223,34 +290,10 @@ static void update_gid_ip(enum gid_op_type gid_op,
 	update_gid(gid_op, ib_dev, port, &gid, &gid_attr);
 }
 
-static void enum_netdev_default_gids(struct ib_device *ib_dev,
-				     u8 port, struct net_device *event_ndev,
-				     struct net_device *rdma_ndev)
-{
-	unsigned long gid_type_mask;
-
-	rcu_read_lock();
-	if (!rdma_ndev ||
-	    ((rdma_ndev != event_ndev &&
-	      !rdma_is_upper_dev_rcu(rdma_ndev, event_ndev)) ||
-	     is_eth_active_slave_of_bonding_rcu(rdma_ndev,
-						netdev_master_upper_dev_get_rcu(rdma_ndev)) ==
-	     BONDING_SLAVE_STATE_INACTIVE)) {
-		rcu_read_unlock();
-		return;
-	}
-	rcu_read_unlock();
-
-	gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
-
-	ib_cache_gid_set_default_gid(ib_dev, port, rdma_ndev, gid_type_mask,
-				     IB_CACHE_GID_DEFAULT_MODE_SET);
-}
-
 static void bond_delete_netdev_default_gids(struct ib_device *ib_dev,
 					    u8 port,
-					    struct net_device *event_ndev,
-					    struct net_device *rdma_ndev)
+					    struct net_device *rdma_ndev,
+					    struct net_device *event_ndev)
 {
 	struct net_device *real_dev = rdma_vlan_dev_real_dev(event_ndev);
 	unsigned long gid_type_mask;
@@ -381,7 +424,6 @@ static void _add_netdev_ips(struct ib_device *ib_dev, u8 port,
 static void add_netdev_ips(struct ib_device *ib_dev, u8 port,
 			   struct net_device *rdma_ndev, void *cookie)
 {
-	enum_netdev_default_gids(ib_dev, port, cookie, rdma_ndev);
 	_add_netdev_ips(ib_dev, port, cookie);
 }
 
@@ -391,6 +433,38 @@ static void del_netdev_ips(struct ib_device *ib_dev, u8 port,
 	ib_cache_gid_del_all_netdev_gids(ib_dev, port, cookie);
 }
 
+/**
+ * del_default_gids - Delete default GIDs of the event/cookie netdevice
+ * @ib_dev:	RDMA device pointer
+ * @port:	Port of the RDMA device whose GID table to consider
+ * @rdma_ndev:	Unused rdma netdevice
+ * @cookie:	Pointer to event netdevice
+ *
+ * del_default_gids() deletes the default GIDs of the event/cookie netdevice.
+ */
+static void del_default_gids(struct ib_device *ib_dev, u8 port,
+			     struct net_device *rdma_ndev, void *cookie)
+{
+	struct net_device *cookie_ndev = cookie;
+	unsigned long gid_type_mask;
+
+	gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
+
+	ib_cache_gid_set_default_gid(ib_dev, port, cookie_ndev, gid_type_mask,
+				     IB_CACHE_GID_DEFAULT_MODE_DELETE);
+}
+
+static void add_default_gids(struct ib_device *ib_dev, u8 port,
+			     struct net_device *rdma_ndev, void *cookie)
+{
+	struct net_device *event_ndev = cookie;
+	unsigned long gid_type_mask;
+
+	gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
+	ib_cache_gid_set_default_gid(ib_dev, port, event_ndev, gid_type_mask,
+				     IB_CACHE_GID_DEFAULT_MODE_SET);
+}
+
 static void enum_all_gids_of_dev_cb(struct ib_device *ib_dev,
 				    u8 port,
 				    struct net_device *rdma_ndev,
@@ -405,9 +479,20 @@ static void enum_all_gids_of_dev_cb(struct ib_device *ib_dev,
 	rtnl_lock();
 	down_read(&net_rwsem);
 	for_each_net(net)
-		for_each_netdev(net, ndev)
-			if (is_eth_port_of_netdev(ib_dev, port, rdma_ndev, ndev))
-				add_netdev_ips(ib_dev, port, rdma_ndev, ndev);
+		for_each_netdev(net, ndev) {
+			/*
+			 * Filter and add default GIDs of the primary netdevice
+			 * when not in bonding mode, or add default GIDs
+			 * of bond master device, when in bonding mode.
+			 */
+			if (is_ndev_for_default_gid_filter(ib_dev, port,
+							   rdma_ndev, ndev))
+				add_default_gids(ib_dev, port, rdma_ndev, ndev);
+
+			if (is_eth_port_of_netdev_filter(ib_dev, port,
+							 rdma_ndev, ndev))
+				_add_netdev_ips(ib_dev, port, ndev);
+		}
 	up_read(&net_rwsem);
 	rtnl_unlock();
 }
@@ -513,18 +598,12 @@ static void del_netdev_default_ips_join(struct ib_device *ib_dev, u8 port,
 	rcu_read_unlock();
 
 	if (master_ndev) {
-		bond_delete_netdev_default_gids(ib_dev, port, master_ndev,
-						rdma_ndev);
+		bond_delete_netdev_default_gids(ib_dev, port, rdma_ndev,
+						master_ndev);
 		dev_put(master_ndev);
 	}
 }
 
-static void del_netdev_default_ips(struct ib_device *ib_dev, u8 port,
-				   struct net_device *rdma_ndev, void *cookie)
-{
-	bond_delete_netdev_default_gids(ib_dev, port, cookie, rdma_ndev);
-}
-
 /* The following functions operate on all IB devices. netdevice_event and
  * addr_event execute ib_enum_all_roce_netdevs through a work.
  * ib_enum_all_roce_netdevs iterates through all IB devices.
@@ -575,40 +654,94 @@ static int netdevice_queue_work(struct netdev_event_work_cmd *cmds,
 }
 
 static const struct netdev_event_work_cmd add_cmd = {
-	.cb = add_netdev_ips, .filter = is_eth_port_of_netdev};
+	.cb	= add_netdev_ips,
+	.filter	= is_eth_port_of_netdev_filter
+};
+
 static const struct netdev_event_work_cmd add_cmd_upper_ips = {
-	.cb = add_netdev_upper_ips, .filter = is_eth_port_of_netdev};
+	.cb	= add_netdev_upper_ips,
+	.filter = is_eth_port_of_netdev_filter
+};
 
-static void netdevice_event_changeupper(struct netdev_notifier_changeupper_info *changeupper_info,
-					struct netdev_event_work_cmd *cmds)
+static void
+ndev_event_unlink(struct netdev_notifier_changeupper_info *changeupper_info,
+		  struct netdev_event_work_cmd *cmds)
 {
-	static const struct netdev_event_work_cmd upper_ips_del_cmd = {
-		.cb = del_netdev_upper_ips, .filter = upper_device_filter};
-	static const struct netdev_event_work_cmd bonding_default_del_cmd = {
-		.cb = del_netdev_default_ips, .filter = is_eth_port_inactive_slave};
+	static const struct netdev_event_work_cmd
+			upper_ips_del_cmd = {
+				.cb	= del_netdev_upper_ips,
+				.filter	= upper_device_filter
+	};
 
-	if (changeupper_info->linking == false) {
-		cmds[0] = upper_ips_del_cmd;
-		cmds[0].ndev = changeupper_info->upper_dev;
-		cmds[1] = add_cmd;
-	} else {
-		cmds[0] = bonding_default_del_cmd;
-		cmds[0].ndev = changeupper_info->upper_dev;
-		cmds[1] = add_cmd_upper_ips;
-		cmds[1].ndev = changeupper_info->upper_dev;
-		cmds[1].filter_ndev = changeupper_info->upper_dev;
-	}
+	cmds[0] = upper_ips_del_cmd;
+	cmds[0].ndev = changeupper_info->upper_dev;
+	cmds[1] = add_cmd;
 }
 
+static const struct netdev_event_work_cmd bonding_default_add_cmd = {
+	.cb	= add_default_gids,
+	.filter	= is_upper_ndev_bond_master_filter
+};
+
+static void
+ndev_event_link(struct net_device *event_ndev,
+		struct netdev_notifier_changeupper_info *changeupper_info,
+		struct netdev_event_work_cmd *cmds)
+{
+	static const struct netdev_event_work_cmd
+			bonding_default_del_cmd = {
+				.cb	= del_default_gids,
+				.filter	= is_upper_ndev_bond_master_filter
+			};
+	/*
+	 * When a lower netdev is linked to its upper bonding
+	 * netdev, delete lower slave netdev's default GIDs.
+	 */
+	cmds[0] = bonding_default_del_cmd;
+	cmds[0].ndev = event_ndev;
+	cmds[0].filter_ndev = changeupper_info->upper_dev;
+
+	/* Now add bonding upper device default GIDs */
+	cmds[1] = bonding_default_add_cmd;
+	cmds[1].ndev = changeupper_info->upper_dev;
+	cmds[1].filter_ndev = changeupper_info->upper_dev;
+
+	/* Now add bonding upper device IP based GIDs */
+	cmds[2] = add_cmd_upper_ips;
+	cmds[2].ndev = changeupper_info->upper_dev;
+	cmds[2].filter_ndev = changeupper_info->upper_dev;
+}
+
+static void netdevice_event_changeupper(struct net_device *event_ndev,
+		struct netdev_notifier_changeupper_info *changeupper_info,
+		struct netdev_event_work_cmd *cmds)
+{
+	if (changeupper_info->linking)
+		ndev_event_link(event_ndev, changeupper_info, cmds);
+	else
+		ndev_event_unlink(changeupper_info, cmds);
+}
+
+static const struct netdev_event_work_cmd add_default_gid_cmd = {
+	.cb	= add_default_gids,
+	.filter	= is_ndev_for_default_gid_filter,
+};
+
 static int netdevice_event(struct notifier_block *this, unsigned long event,
 			   void *ptr)
 {
 	static const struct netdev_event_work_cmd del_cmd = {
 		.cb = del_netdev_ips, .filter = pass_all_filter};
-	static const struct netdev_event_work_cmd bonding_default_del_cmd_join = {
-		.cb = del_netdev_default_ips_join, .filter = is_eth_port_inactive_slave};
-	static const struct netdev_event_work_cmd default_del_cmd = {
-		.cb = del_netdev_default_ips, .filter = pass_all_filter};
+	static const struct netdev_event_work_cmd
+			bonding_default_del_cmd_join = {
+				.cb	= del_netdev_default_ips_join,
+				.filter	= is_eth_port_inactive_slave_filter
+			};
+	static const struct netdev_event_work_cmd
+			netdev_del_cmd = {
+				.cb	= del_netdev_ips,
+				.filter = is_eth_port_of_netdev_filter
+			};
 	static const struct netdev_event_work_cmd bonding_event_ips_del_cmd = {
 		.cb = del_netdev_upper_ips, .filter = upper_device_filter};
 	struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
@@ -621,7 +754,8 @@ static int netdevice_event(struct notifier_block *this, unsigned long event,
 	case NETDEV_REGISTER:
 	case NETDEV_UP:
 		cmds[0] = bonding_default_del_cmd_join;
-		cmds[1] = add_cmd;
+		cmds[1] = add_default_gid_cmd;
+		cmds[2] = add_cmd;
 		break;
 
 	case NETDEV_UNREGISTER:
@@ -632,19 +766,22 @@ static int netdevice_event(struct notifier_block *this, unsigned long event,
 		break;
 
 	case NETDEV_CHANGEADDR:
-		cmds[0] = default_del_cmd;
-		cmds[1] = add_cmd;
+		cmds[0] = netdev_del_cmd;
+		cmds[1] = add_default_gid_cmd;
+		cmds[2] = add_cmd;
 		break;
 
 	case NETDEV_CHANGEUPPER:
-		netdevice_event_changeupper(
+		netdevice_event_changeupper(ndev,
 			container_of(ptr, struct netdev_notifier_changeupper_info, info),
 			cmds);
 		break;
 
 	case NETDEV_BONDING_FAILOVER:
 		cmds[0] = bonding_event_ips_del_cmd;
-		cmds[1] = bonding_default_del_cmd_join;
+		/* Add default GIDs of the bond device */
+		cmds[1] = bonding_default_add_cmd;
+		/* Add IP based GIDs of the bond device */
 		cmds[2] = add_cmd_upper_ips;
 		break;
 
@@ -660,7 +797,8 @@ static void update_gid_event_work_handler(struct work_struct *_work)
 	struct update_gid_event_work *work =
 		container_of(_work, struct update_gid_event_work, work);
 
-	ib_enum_all_roce_netdevs(is_eth_port_of_netdev, work->gid_attr.ndev,
+	ib_enum_all_roce_netdevs(is_eth_port_of_netdev_filter,
+				 work->gid_attr.ndev,
 				 callback_for_addr_gid_device_scan, work);
 
 	dev_put(work->gid_attr.ndev);
diff --git a/drivers/infiniband/core/rw.c b/drivers/infiniband/core/rw.c
index c8963e9..683e6d1 100644
--- a/drivers/infiniband/core/rw.c
+++ b/drivers/infiniband/core/rw.c
@@ -87,7 +87,7 @@ static int rdma_rw_init_one_mr(struct ib_qp *qp, u8 port_num,
 	}
 
 	ret = ib_map_mr_sg(reg->mr, sg, nents, &offset, PAGE_SIZE);
-	if (ret < nents) {
+	if (ret < 0 || ret < nents) {
 		ib_mr_pool_put(qp, &qp->rdma_mrs, reg->mr);
 		return -EINVAL;
 	}
@@ -325,7 +325,7 @@ int rdma_rw_ctx_init(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num,
 EXPORT_SYMBOL(rdma_rw_ctx_init);
 
 /**
- * rdma_rw_ctx_signature init - initialize a RW context with signature offload
+ * rdma_rw_ctx_signature_init - initialize a RW context with signature offload
  * @ctx:	context to initialize
  * @qp:		queue pair to operate on
  * @port_num:	port num to which the connection is bound
@@ -564,10 +564,10 @@ EXPORT_SYMBOL(rdma_rw_ctx_wrs);
 int rdma_rw_ctx_post(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num,
 		struct ib_cqe *cqe, struct ib_send_wr *chain_wr)
 {
-	struct ib_send_wr *first_wr, *bad_wr;
+	struct ib_send_wr *first_wr;
 
 	first_wr = rdma_rw_ctx_wrs(ctx, qp, port_num, cqe, chain_wr);
-	return ib_post_send(qp, first_wr, &bad_wr);
+	return ib_post_send(qp, first_wr, NULL);
 }
 EXPORT_SYMBOL(rdma_rw_ctx_post);
 
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index a61ec7e..7b794a1 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -1227,20 +1227,10 @@ static u8 get_src_path_mask(struct ib_device *device, u8 port_num)
 	return src_path_mask;
 }
 
-static int
-roce_resolve_route_from_path(struct ib_device *device, u8 port_num,
-			     struct sa_path_rec *rec)
+static int roce_resolve_route_from_path(struct sa_path_rec *rec,
+					const struct ib_gid_attr *attr)
 {
-	struct net_device *resolved_dev;
-	struct net_device *ndev;
-	struct net_device *idev;
-	struct rdma_dev_addr dev_addr = {
-		.bound_dev_if = ((sa_path_get_ifindex(rec) >= 0) ?
-				 sa_path_get_ifindex(rec) : 0),
-		.net = sa_path_get_ndev(rec) ?
-			sa_path_get_ndev(rec) :
-			&init_net
-	};
+	struct rdma_dev_addr dev_addr = {};
 	union {
 		struct sockaddr     _sockaddr;
 		struct sockaddr_in  _sockaddr_in;
@@ -1250,9 +1240,14 @@ roce_resolve_route_from_path(struct ib_device *device, u8 port_num,
 
 	if (rec->roce.route_resolved)
 		return 0;
+	if (!attr || !attr->ndev)
+		return -EINVAL;
 
-	if (!device->get_netdev)
-		return -EOPNOTSUPP;
+	dev_addr.bound_dev_if = attr->ndev->ifindex;
+	/* TODO: Use net from the ib_gid_attr once it is added to it,
+	 * until than, limit itself to init_net.
+	 */
+	dev_addr.net = &init_net;
 
 	rdma_gid2ip(&sgid_addr._sockaddr, &rec->sgid);
 	rdma_gid2ip(&dgid_addr._sockaddr, &rec->dgid);
@@ -1268,60 +1263,52 @@ roce_resolve_route_from_path(struct ib_device *device, u8 port_num,
 	    rec->rec_type != SA_PATH_REC_TYPE_ROCE_V2)
 		return -EINVAL;
 
-	idev = device->get_netdev(device, port_num);
-	if (!idev)
-		return -ENODEV;
-
-	resolved_dev = dev_get_by_index(dev_addr.net,
-					dev_addr.bound_dev_if);
-	if (!resolved_dev) {
-		ret = -ENODEV;
-		goto done;
-	}
-	ndev = ib_get_ndev_from_path(rec);
-	rcu_read_lock();
-	if ((ndev && ndev != resolved_dev) ||
-	    (resolved_dev != idev &&
-	     !rdma_is_upper_dev_rcu(idev, resolved_dev)))
-		ret = -EHOSTUNREACH;
-	rcu_read_unlock();
-	dev_put(resolved_dev);
-	if (ndev)
-		dev_put(ndev);
-done:
-	dev_put(idev);
-	if (!ret)
-		rec->roce.route_resolved = true;
-	return ret;
+	rec->roce.route_resolved = true;
+	return 0;
 }
 
 static int init_ah_attr_grh_fields(struct ib_device *device, u8 port_num,
 				   struct sa_path_rec *rec,
-				   struct rdma_ah_attr *ah_attr)
+				   struct rdma_ah_attr *ah_attr,
+				   const struct ib_gid_attr *gid_attr)
 {
 	enum ib_gid_type type = sa_conv_pathrec_to_gid_type(rec);
-	struct net_device *ndev;
-	u16 gid_index;
-	int ret;
 
-	ndev = ib_get_ndev_from_path(rec);
-	ret = ib_find_cached_gid_by_port(device, &rec->sgid, type,
-					 port_num, ndev, &gid_index);
-	if (ndev)
-		dev_put(ndev);
-	if (ret)
-		return ret;
+	if (!gid_attr) {
+		gid_attr = rdma_find_gid_by_port(device, &rec->sgid, type,
+						 port_num, NULL);
+		if (IS_ERR(gid_attr))
+			return PTR_ERR(gid_attr);
+	} else
+		rdma_hold_gid_attr(gid_attr);
 
-	rdma_ah_set_grh(ah_attr, &rec->dgid,
-			be32_to_cpu(rec->flow_label),
-			gid_index, rec->hop_limit,
-			rec->traffic_class);
+	rdma_move_grh_sgid_attr(ah_attr, &rec->dgid,
+				be32_to_cpu(rec->flow_label),
+				rec->hop_limit,	rec->traffic_class,
+				gid_attr);
 	return 0;
 }
 
+/**
+ * ib_init_ah_attr_from_path - Initialize address handle attributes based on
+ *   an SA path record.
+ * @device: Device associated ah attributes initialization.
+ * @port_num: Port on the specified device.
+ * @rec: path record entry to use for ah attributes initialization.
+ * @ah_attr: address handle attributes to initialization from path record.
+ * @sgid_attr: SGID attribute to consider during initialization.
+ *
+ * When ib_init_ah_attr_from_path() returns success,
+ * (a) for IB link layer it optionally contains a reference to SGID attribute
+ * when GRH is present for IB link layer.
+ * (b) for RoCE link layer it contains a reference to SGID attribute.
+ * User must invoke rdma_destroy_ah_attr() to release reference to SGID
+ * attributes which are initialized using ib_init_ah_attr_from_path().
+ */
 int ib_init_ah_attr_from_path(struct ib_device *device, u8 port_num,
 			      struct sa_path_rec *rec,
-			      struct rdma_ah_attr *ah_attr)
+			      struct rdma_ah_attr *ah_attr,
+			      const struct ib_gid_attr *gid_attr)
 {
 	int ret = 0;
 
@@ -1332,7 +1319,7 @@ int ib_init_ah_attr_from_path(struct ib_device *device, u8 port_num,
 	rdma_ah_set_static_rate(ah_attr, rec->rate);
 
 	if (sa_path_is_roce(rec)) {
-		ret = roce_resolve_route_from_path(device, port_num, rec);
+		ret = roce_resolve_route_from_path(rec, gid_attr);
 		if (ret)
 			return ret;
 
@@ -1349,7 +1336,8 @@ int ib_init_ah_attr_from_path(struct ib_device *device, u8 port_num,
 	}
 
 	if (rec->hop_limit > 0 || sa_path_is_roce(rec))
-		ret = init_ah_attr_grh_fields(device, port_num, rec, ah_attr);
+		ret = init_ah_attr_grh_fields(device, port_num,
+					      rec, ah_attr, gid_attr);
 	return ret;
 }
 EXPORT_SYMBOL(ib_init_ah_attr_from_path);
@@ -1557,8 +1545,6 @@ static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,
 				  ARRAY_SIZE(path_rec_table),
 				  mad->data, &rec);
 			rec.rec_type = SA_PATH_REC_TYPE_IB;
-			sa_path_set_ndev(&rec, NULL);
-			sa_path_set_ifindex(&rec, 0);
 			sa_path_set_dmac_zero(&rec);
 
 			if (query->conv_pr) {
@@ -2290,6 +2276,7 @@ static void update_sm_ah(struct work_struct *work)
 	struct ib_sa_sm_ah *new_ah;
 	struct ib_port_attr port_attr;
 	struct rdma_ah_attr   ah_attr;
+	bool grh_required;
 
 	if (ib_query_port(port->agent->device, port->port_num, &port_attr)) {
 		pr_warn("Couldn't query port\n");
@@ -2314,16 +2301,27 @@ static void update_sm_ah(struct work_struct *work)
 	rdma_ah_set_dlid(&ah_attr, port_attr.sm_lid);
 	rdma_ah_set_sl(&ah_attr, port_attr.sm_sl);
 	rdma_ah_set_port_num(&ah_attr, port->port_num);
-	if (port_attr.grh_required) {
-		if (ah_attr.type == RDMA_AH_ATTR_TYPE_OPA) {
-			rdma_ah_set_make_grd(&ah_attr, true);
-		} else {
-			rdma_ah_set_ah_flags(&ah_attr, IB_AH_GRH);
-			rdma_ah_set_subnet_prefix(&ah_attr,
-						  cpu_to_be64(port_attr.subnet_prefix));
-			rdma_ah_set_interface_id(&ah_attr,
-						 cpu_to_be64(IB_SA_WELL_KNOWN_GUID));
-		}
+
+	grh_required = rdma_is_grh_required(port->agent->device,
+					    port->port_num);
+
+	/*
+	 * The OPA sm_lid of 0xFFFF needs special handling so that it can be
+	 * differentiated from a permissive LID of 0xFFFF.  We set the
+	 * grh_required flag here so the SA can program the DGID in the
+	 * address handle appropriately
+	 */
+	if (ah_attr.type == RDMA_AH_ATTR_TYPE_OPA &&
+	    (grh_required ||
+	     port_attr.sm_lid == be16_to_cpu(IB_LID_PERMISSIVE)))
+		rdma_ah_set_make_grd(&ah_attr, true);
+
+	if (ah_attr.type == RDMA_AH_ATTR_TYPE_IB && grh_required) {
+		rdma_ah_set_ah_flags(&ah_attr, IB_AH_GRH);
+		rdma_ah_set_subnet_prefix(&ah_attr,
+					  cpu_to_be64(port_attr.subnet_prefix));
+		rdma_ah_set_interface_id(&ah_attr,
+					 cpu_to_be64(IB_SA_WELL_KNOWN_GUID));
 	}
 
 	new_ah->ah = rdma_create_ah(port->agent->qp->pd, &ah_attr);
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index 31c7efa..7fd14ea 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -42,6 +42,7 @@
 
 #include <rdma/ib_mad.h>
 #include <rdma/ib_pma.h>
+#include <rdma/ib_cache.h>
 
 struct ib_port;
 
@@ -346,7 +347,7 @@ static struct attribute *port_default_attrs[] = {
 	NULL
 };
 
-static size_t print_ndev(struct ib_gid_attr *gid_attr, char *buf)
+static size_t print_ndev(const struct ib_gid_attr *gid_attr, char *buf)
 {
 	if (!gid_attr->ndev)
 		return -EINVAL;
@@ -354,33 +355,26 @@ static size_t print_ndev(struct ib_gid_attr *gid_attr, char *buf)
 	return sprintf(buf, "%s\n", gid_attr->ndev->name);
 }
 
-static size_t print_gid_type(struct ib_gid_attr *gid_attr, char *buf)
+static size_t print_gid_type(const struct ib_gid_attr *gid_attr, char *buf)
 {
 	return sprintf(buf, "%s\n", ib_cache_gid_type_str(gid_attr->gid_type));
 }
 
-static ssize_t _show_port_gid_attr(struct ib_port *p,
-				   struct port_attribute *attr,
-				   char *buf,
-				   size_t (*print)(struct ib_gid_attr *gid_attr,
-						   char *buf))
+static ssize_t _show_port_gid_attr(
+	struct ib_port *p, struct port_attribute *attr, char *buf,
+	size_t (*print)(const struct ib_gid_attr *gid_attr, char *buf))
 {
 	struct port_table_attribute *tab_attr =
 		container_of(attr, struct port_table_attribute, attr);
-	union ib_gid gid;
-	struct ib_gid_attr gid_attr = {};
+	const struct ib_gid_attr *gid_attr;
 	ssize_t ret;
 
-	ret = ib_query_gid(p->ibdev, p->port_num, tab_attr->index, &gid,
-			   &gid_attr);
-	if (ret)
-		goto err;
+	gid_attr = rdma_get_gid_attr(p->ibdev, p->port_num, tab_attr->index);
+	if (IS_ERR(gid_attr))
+		return PTR_ERR(gid_attr);
 
-	ret = print(&gid_attr, buf);
-
-err:
-	if (gid_attr.ndev)
-		dev_put(gid_attr.ndev);
+	ret = print(gid_attr, buf);
+	rdma_put_gid_attr(gid_attr);
 	return ret;
 }
 
@@ -389,26 +383,28 @@ static ssize_t show_port_gid(struct ib_port *p, struct port_attribute *attr,
 {
 	struct port_table_attribute *tab_attr =
 		container_of(attr, struct port_table_attribute, attr);
-	union ib_gid *pgid;
-	union ib_gid gid;
+	const struct ib_gid_attr *gid_attr;
 	ssize_t ret;
 
-	ret = ib_query_gid(p->ibdev, p->port_num, tab_attr->index, &gid, NULL);
+	gid_attr = rdma_get_gid_attr(p->ibdev, p->port_num, tab_attr->index);
+	if (IS_ERR(gid_attr)) {
+		const union ib_gid zgid = {};
 
-	/* If reading GID fails, it is likely due to GID entry being empty
-	 * (invalid) or reserved GID in the table.
-	 * User space expects to read GID table entries as long as it given
-	 * index is within GID table size.
-	 * Administrative/debugging tool fails to query rest of the GID entries
-	 * if it hits error while querying a GID of the given index.
-	 * To avoid user space throwing such error on fail to read gid, return
-	 * zero GID as before. This maintains backward compatibility.
-	 */
-	if (ret)
-		pgid = &zgid;
-	else
-		pgid = &gid;
-	return sprintf(buf, "%pI6\n", pgid->raw);
+		/* If reading GID fails, it is likely due to GID entry being
+		 * empty (invalid) or reserved GID in the table.  User space
+		 * expects to read GID table entries as long as it given index
+		 * is within GID table size.  Administrative/debugging tool
+		 * fails to query rest of the GID entries if it hits error
+		 * while querying a GID of the given index.  To avoid user
+		 * space throwing such error on fail to read gid, return zero
+		 * GID as before. This maintains backward compatibility.
+		 */
+		return sprintf(buf, "%pI6\n", zgid.raw);
+	}
+
+	ret = sprintf(buf, "%pI6\n", gid_attr->gid.raw);
+	rdma_put_gid_attr(gid_attr);
+	return ret;
 }
 
 static ssize_t show_port_gid_attr_ndev(struct ib_port *p,
diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c
index 9eef96d..faa9e61 100644
--- a/drivers/infiniband/core/ucm.c
+++ b/drivers/infiniband/core/ucm.c
@@ -207,7 +207,7 @@ static struct ib_ucm_context *ib_ucm_ctx_alloc(struct ib_ucm_file *file)
 }
 
 static void ib_ucm_event_req_get(struct ib_ucm_req_event_resp *ureq,
-				 struct ib_cm_req_event_param *kreq)
+				 const struct ib_cm_req_event_param *kreq)
 {
 	ureq->remote_ca_guid             = kreq->remote_ca_guid;
 	ureq->remote_qkey                = kreq->remote_qkey;
@@ -231,7 +231,7 @@ static void ib_ucm_event_req_get(struct ib_ucm_req_event_resp *ureq,
 }
 
 static void ib_ucm_event_rep_get(struct ib_ucm_rep_event_resp *urep,
-				 struct ib_cm_rep_event_param *krep)
+				 const struct ib_cm_rep_event_param *krep)
 {
 	urep->remote_ca_guid      = krep->remote_ca_guid;
 	urep->remote_qkey         = krep->remote_qkey;
@@ -247,14 +247,14 @@ static void ib_ucm_event_rep_get(struct ib_ucm_rep_event_resp *urep,
 }
 
 static void ib_ucm_event_sidr_rep_get(struct ib_ucm_sidr_rep_event_resp *urep,
-				      struct ib_cm_sidr_rep_event_param *krep)
+				      const struct ib_cm_sidr_rep_event_param *krep)
 {
 	urep->status = krep->status;
 	urep->qkey   = krep->qkey;
 	urep->qpn    = krep->qpn;
 };
 
-static int ib_ucm_event_process(struct ib_cm_event *evt,
+static int ib_ucm_event_process(const struct ib_cm_event *evt,
 				struct ib_ucm_event *uvt)
 {
 	void *info = NULL;
@@ -351,7 +351,7 @@ static int ib_ucm_event_process(struct ib_cm_event *evt,
 }
 
 static int ib_ucm_event_handler(struct ib_cm_id *cm_id,
-				struct ib_cm_event *event)
+				const struct ib_cm_event *event)
 {
 	struct ib_ucm_event *uevent;
 	struct ib_ucm_context *ctx;
@@ -1000,14 +1000,11 @@ static ssize_t ib_ucm_send_sidr_req(struct ib_ucm_file *file,
 				    const char __user *inbuf,
 				    int in_len, int out_len)
 {
-	struct ib_cm_sidr_req_param param;
+	struct ib_cm_sidr_req_param param = {};
 	struct ib_ucm_context *ctx;
 	struct ib_ucm_sidr_req cmd;
 	int result;
 
-	param.private_data = NULL;
-	param.path = NULL;
-
 	if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
 		return -EFAULT;
 
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index 54ab633..a41792d 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -84,7 +84,6 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
 	struct ib_umem *umem;
 	struct page **page_list;
 	struct vm_area_struct **vma_list;
-	unsigned long locked;
 	unsigned long lock_limit;
 	unsigned long cur_base;
 	unsigned long npages;
@@ -92,7 +91,6 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
 	int i;
 	unsigned long dma_attrs = 0;
 	struct scatterlist *sg, *sg_list_start;
-	int need_release = 0;
 	unsigned int gup_flags = FOLL_WRITE;
 
 	if (dmasync)
@@ -121,10 +119,8 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
 
 	if (access & IB_ACCESS_ON_DEMAND) {
 		ret = ib_umem_odp_get(context, umem, access);
-		if (ret) {
-			kfree(umem);
-			return ERR_PTR(ret);
-		}
+		if (ret)
+			goto umem_kfree;
 		return umem;
 	}
 
@@ -135,8 +131,8 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
 
 	page_list = (struct page **) __get_free_page(GFP_KERNEL);
 	if (!page_list) {
-		kfree(umem);
-		return ERR_PTR(-ENOMEM);
+		ret = -ENOMEM;
+		goto umem_kfree;
 	}
 
 	/*
@@ -149,41 +145,43 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
 
 	npages = ib_umem_num_pages(umem);
 
-	down_write(&current->mm->mmap_sem);
-
-	locked     = npages + current->mm->pinned_vm;
 	lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
 
-	if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) {
+	down_write(&current->mm->mmap_sem);
+	current->mm->pinned_vm += npages;
+	if ((current->mm->pinned_vm > lock_limit) && !capable(CAP_IPC_LOCK)) {
+		up_write(&current->mm->mmap_sem);
 		ret = -ENOMEM;
-		goto out;
+		goto vma;
 	}
+	up_write(&current->mm->mmap_sem);
 
 	cur_base = addr & PAGE_MASK;
 
 	if (npages == 0 || npages > UINT_MAX) {
 		ret = -EINVAL;
-		goto out;
+		goto vma;
 	}
 
 	ret = sg_alloc_table(&umem->sg_head, npages, GFP_KERNEL);
 	if (ret)
-		goto out;
+		goto vma;
 
 	if (!umem->writable)
 		gup_flags |= FOLL_FORCE;
 
-	need_release = 1;
 	sg_list_start = umem->sg_head.sgl;
 
+	down_read(&current->mm->mmap_sem);
 	while (npages) {
 		ret = get_user_pages_longterm(cur_base,
 				     min_t(unsigned long, npages,
 					   PAGE_SIZE / sizeof (struct page *)),
 				     gup_flags, page_list, vma_list);
-
-		if (ret < 0)
-			goto out;
+		if (ret < 0) {
+			up_read(&current->mm->mmap_sem);
+			goto umem_release;
+		}
 
 		umem->npages += ret;
 		cur_base += ret * PAGE_SIZE;
@@ -199,6 +197,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
 		/* preparing for next loop */
 		sg_list_start = sg;
 	}
+	up_read(&current->mm->mmap_sem);
 
 	umem->nmap = ib_dma_map_sg_attrs(context->device,
 				  umem->sg_head.sgl,
@@ -206,27 +205,28 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
 				  DMA_BIDIRECTIONAL,
 				  dma_attrs);
 
-	if (umem->nmap <= 0) {
+	if (!umem->nmap) {
 		ret = -ENOMEM;
-		goto out;
+		goto umem_release;
 	}
 
 	ret = 0;
+	goto out;
 
-out:
-	if (ret < 0) {
-		if (need_release)
-			__ib_umem_release(context->device, umem, 0);
-		kfree(umem);
-	} else
-		current->mm->pinned_vm = locked;
-
+umem_release:
+	__ib_umem_release(context->device, umem, 0);
+vma:
+	down_write(&current->mm->mmap_sem);
+	current->mm->pinned_vm -= ib_umem_num_pages(umem);
 	up_write(&current->mm->mmap_sem);
+out:
 	if (vma_list)
 		free_page((unsigned long) vma_list);
 	free_page((unsigned long) page_list);
-
-	return ret < 0 ? ERR_PTR(ret) : umem;
+umem_kfree:
+	if (ret)
+		kfree(umem);
+	return ret ? ERR_PTR(ret) : umem;
 }
 EXPORT_SYMBOL(ib_umem_get);
 
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index bb98c9e..c34a685 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -268,6 +268,7 @@ static void recv_handler(struct ib_mad_agent *agent,
 		packet->mad.hdr.traffic_class = grh->traffic_class;
 		memcpy(packet->mad.hdr.gid, &grh->dgid, 16);
 		packet->mad.hdr.flow_label = cpu_to_be32(grh->flow_label);
+		rdma_destroy_ah_attr(&ah_attr);
 	}
 
 	if (queue_packet(file, agent, packet))
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index c0d40fc..5df8e54 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -111,7 +111,7 @@ struct ib_uverbs_device {
 	struct mutex				lists_mutex; /* protect lists */
 	struct list_head			uverbs_file_list;
 	struct list_head			uverbs_events_file_list;
-	struct uverbs_root_spec			*specs_root;
+	struct uverbs_api			*uapi;
 };
 
 struct ib_uverbs_event_queue {
@@ -130,21 +130,37 @@ struct ib_uverbs_async_event_file {
 };
 
 struct ib_uverbs_completion_event_file {
-	struct ib_uobject_file			uobj_file;
+	struct ib_uobject			uobj;
 	struct ib_uverbs_event_queue		ev_queue;
 };
 
 struct ib_uverbs_file {
 	struct kref				ref;
-	struct mutex				mutex;
-	struct mutex                            cleanup_mutex; /* protect cleanup */
 	struct ib_uverbs_device		       *device;
+	struct mutex				ucontext_lock;
+	/*
+	 * ucontext must be accessed via ib_uverbs_get_ucontext() or with
+	 * ucontext_lock held
+	 */
 	struct ib_ucontext		       *ucontext;
 	struct ib_event_handler			event_handler;
 	struct ib_uverbs_async_event_file       *async_file;
 	struct list_head			list;
 	int					is_closed;
 
+	/*
+	 * To access the uobjects list hw_destroy_rwsem must be held for write
+	 * OR hw_destroy_rwsem held for read AND uobjects_lock held.
+	 * hw_destroy_rwsem should be called across any destruction of the HW
+	 * object of an associated uobject.
+	 */
+	struct rw_semaphore	hw_destroy_rwsem;
+	spinlock_t		uobjects_lock;
+	struct list_head	uobjects;
+
+	u64 uverbs_cmd_mask;
+	u64 uverbs_ex_cmd_mask;
+
 	struct idr		idr;
 	/* spinlock protects write access to idr */
 	spinlock_t		idr_lock;
@@ -196,7 +212,6 @@ struct ib_uwq_object {
 
 struct ib_ucq_object {
 	struct ib_uobject	uobject;
-	struct ib_uverbs_file  *uverbs_file;
 	struct list_head	comp_list;
 	struct list_head	async_list;
 	u32			comp_events_reported;
@@ -230,7 +245,7 @@ void ib_uverbs_wq_event_handler(struct ib_event *event, void *context_ptr);
 void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr);
 void ib_uverbs_event_handler(struct ib_event_handler *handler,
 			     struct ib_event *event);
-int ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev, struct ib_xrcd *xrcd,
+int ib_uverbs_dealloc_xrcd(struct ib_uobject *uobject, struct ib_xrcd *xrcd,
 			   enum rdma_remove_reason why);
 
 int uverbs_dealloc_mw(struct ib_mw *mw);
@@ -238,12 +253,7 @@ void ib_uverbs_detach_umcast(struct ib_qp *qp,
 			     struct ib_uqp_object *uobj);
 
 void create_udata(struct uverbs_attr_bundle *ctx, struct ib_udata *udata);
-extern const struct uverbs_attr_def uverbs_uhw_compat_in;
-extern const struct uverbs_attr_def uverbs_uhw_compat_out;
 long ib_uverbs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
-int uverbs_destroy_def_handler(struct ib_device *ib_dev,
-			       struct ib_uverbs_file *file,
-			       struct uverbs_attr_bundle *attrs);
 
 struct ib_uverbs_flow_spec {
 	union {
@@ -292,7 +302,6 @@ extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_COUNTERS);
 
 #define IB_UVERBS_DECLARE_CMD(name)					\
 	ssize_t ib_uverbs_##name(struct ib_uverbs_file *file,		\
-				 struct ib_device *ib_dev,              \
 				 const char __user *buf, int in_len,	\
 				 int out_len)
 
@@ -334,7 +343,6 @@ IB_UVERBS_DECLARE_CMD(close_xrcd);
 
 #define IB_UVERBS_DECLARE_EX_CMD(name)				\
 	int ib_uverbs_ex_##name(struct ib_uverbs_file *file,	\
-				struct ib_device *ib_dev,		\
 				struct ib_udata *ucore,		\
 				struct ib_udata *uhw)
 
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 583d3a1..a21d521 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -48,11 +48,10 @@
 #include "core_priv.h"
 
 static struct ib_uverbs_completion_event_file *
-ib_uverbs_lookup_comp_file(int fd, struct ib_ucontext *context)
+_ib_uverbs_lookup_comp_file(s32 fd, struct ib_uverbs_file *ufile)
 {
-	struct ib_uobject *uobj = uobj_get_read(UVERBS_OBJECT_COMP_CHANNEL,
-						fd, context);
-	struct ib_uobject_file *uobj_file;
+	struct ib_uobject *uobj = ufd_get_read(UVERBS_OBJECT_COMP_CHANNEL,
+					       fd, ufile);
 
 	if (IS_ERR(uobj))
 		return (void *)uobj;
@@ -60,13 +59,13 @@ ib_uverbs_lookup_comp_file(int fd, struct ib_ucontext *context)
 	uverbs_uobject_get(uobj);
 	uobj_put_read(uobj);
 
-	uobj_file = container_of(uobj, struct ib_uobject_file, uobj);
-	return container_of(uobj_file, struct ib_uverbs_completion_event_file,
-			    uobj_file);
+	return container_of(uobj, struct ib_uverbs_completion_event_file,
+			    uobj);
 }
+#define ib_uverbs_lookup_comp_file(_fd, _ufile)                                \
+	_ib_uverbs_lookup_comp_file((_fd)*typecheck(s32, _fd), _ufile)
 
 ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
-			      struct ib_device *ib_dev,
 			      const char __user *buf,
 			      int in_len, int out_len)
 {
@@ -76,6 +75,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
 	struct ib_ucontext		 *ucontext;
 	struct file			 *filp;
 	struct ib_rdmacg_object		 cg_obj;
+	struct ib_device *ib_dev;
 	int ret;
 
 	if (out_len < sizeof resp)
@@ -84,7 +84,13 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
 	if (copy_from_user(&cmd, buf, sizeof cmd))
 		return -EFAULT;
 
-	mutex_lock(&file->mutex);
+	mutex_lock(&file->ucontext_lock);
+	ib_dev = srcu_dereference(file->device->ib_dev,
+				  &file->device->disassociate_srcu);
+	if (!ib_dev) {
+		ret = -EIO;
+		goto err;
+	}
 
 	if (file->ucontext) {
 		ret = -EINVAL;
@@ -110,12 +116,12 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
 	ucontext->cg_obj = cg_obj;
 	/* ufile is required when some objects are released */
 	ucontext->ufile = file;
-	uverbs_initialize_ucontext(ucontext);
 
 	rcu_read_lock();
 	ucontext->tgid = get_task_pid(current->group_leader, PIDTYPE_PID);
 	rcu_read_unlock();
 	ucontext->closing = 0;
+	ucontext->cleanup_retryable = false;
 
 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
 	ucontext->umem_tree = RB_ROOT_CACHED;
@@ -146,11 +152,15 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
 		goto err_file;
 	}
 
-	file->ucontext = ucontext;
-
 	fd_install(resp.async_fd, filp);
 
-	mutex_unlock(&file->mutex);
+	/*
+	 * Make sure that ib_uverbs_get_ucontext() sees the pointer update
+	 * only after all writes to setup the ucontext have completed
+	 */
+	smp_store_release(&file->ucontext, ucontext);
+
+	mutex_unlock(&file->ucontext_lock);
 
 	return in_len;
 
@@ -169,15 +179,16 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
 	ib_rdmacg_uncharge(&cg_obj, ib_dev, RDMACG_RESOURCE_HCA_HANDLE);
 
 err:
-	mutex_unlock(&file->mutex);
+	mutex_unlock(&file->ucontext_lock);
 	return ret;
 }
 
-static void copy_query_dev_fields(struct ib_uverbs_file *file,
-				  struct ib_device *ib_dev,
+static void copy_query_dev_fields(struct ib_ucontext *ucontext,
 				  struct ib_uverbs_query_device_resp *resp,
 				  struct ib_device_attr *attr)
 {
+	struct ib_device *ib_dev = ucontext->device;
+
 	resp->fw_ver		= attr->fw_ver;
 	resp->node_guid		= ib_dev->node_guid;
 	resp->sys_image_guid	= attr->sys_image_guid;
@@ -189,7 +200,7 @@ static void copy_query_dev_fields(struct ib_uverbs_file *file,
 	resp->max_qp		= attr->max_qp;
 	resp->max_qp_wr		= attr->max_qp_wr;
 	resp->device_cap_flags	= lower_32_bits(attr->device_cap_flags);
-	resp->max_sge		= attr->max_sge;
+	resp->max_sge		= min(attr->max_send_sge, attr->max_recv_sge);
 	resp->max_sge_rd	= attr->max_sge_rd;
 	resp->max_cq		= attr->max_cq;
 	resp->max_cqe		= attr->max_cqe;
@@ -221,12 +232,16 @@ static void copy_query_dev_fields(struct ib_uverbs_file *file,
 }
 
 ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file,
-			       struct ib_device *ib_dev,
 			       const char __user *buf,
 			       int in_len, int out_len)
 {
 	struct ib_uverbs_query_device      cmd;
 	struct ib_uverbs_query_device_resp resp;
+	struct ib_ucontext *ucontext;
+
+	ucontext = ib_uverbs_get_ucontext(file);
+	if (IS_ERR(ucontext))
+		return PTR_ERR(ucontext);
 
 	if (out_len < sizeof resp)
 		return -ENOSPC;
@@ -235,7 +250,7 @@ ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file,
 		return -EFAULT;
 
 	memset(&resp, 0, sizeof resp);
-	copy_query_dev_fields(file, ib_dev, &resp, &ib_dev->attrs);
+	copy_query_dev_fields(ucontext, &resp, &ucontext->device->attrs);
 
 	if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp))
 		return -EFAULT;
@@ -243,8 +258,28 @@ ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file,
 	return in_len;
 }
 
+/*
+ * ib_uverbs_query_port_resp.port_cap_flags started out as just a copy of the
+ * PortInfo CapabilityMask, but was extended with unique bits.
+ */
+static u32 make_port_cap_flags(const struct ib_port_attr *attr)
+{
+	u32 res;
+
+	/* All IBA CapabilityMask bits are passed through here, except bit 26,
+	 * which is overridden with IP_BASED_GIDS. This is due to a historical
+	 * mistake in the implementation of IP_BASED_GIDS. Otherwise all other
+	 * bits match the IBA definition across all kernel versions.
+	 */
+	res = attr->port_cap_flags & ~(u32)IB_UVERBS_PCF_IP_BASED_GIDS;
+
+	if (attr->ip_gids)
+		res |= IB_UVERBS_PCF_IP_BASED_GIDS;
+
+	return res;
+}
+
 ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file,
-			     struct ib_device *ib_dev,
 			     const char __user *buf,
 			     int in_len, int out_len)
 {
@@ -252,6 +287,13 @@ ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file,
 	struct ib_uverbs_query_port_resp resp;
 	struct ib_port_attr              attr;
 	int                              ret;
+	struct ib_ucontext *ucontext;
+	struct ib_device *ib_dev;
+
+	ucontext = ib_uverbs_get_ucontext(file);
+	if (IS_ERR(ucontext))
+		return PTR_ERR(ucontext);
+	ib_dev = ucontext->device;
 
 	if (out_len < sizeof resp)
 		return -ENOSPC;
@@ -269,12 +311,15 @@ ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file,
 	resp.max_mtu 	     = attr.max_mtu;
 	resp.active_mtu      = attr.active_mtu;
 	resp.gid_tbl_len     = attr.gid_tbl_len;
-	resp.port_cap_flags  = attr.port_cap_flags;
+	resp.port_cap_flags  = make_port_cap_flags(&attr);
 	resp.max_msg_sz      = attr.max_msg_sz;
 	resp.bad_pkey_cntr   = attr.bad_pkey_cntr;
 	resp.qkey_viol_cntr  = attr.qkey_viol_cntr;
 	resp.pkey_tbl_len    = attr.pkey_tbl_len;
 
+	if (rdma_is_grh_required(ib_dev, cmd.port_num))
+		resp.flags |= IB_UVERBS_QPF_GRH_REQUIRED;
+
 	if (rdma_cap_opa_ah(ib_dev, cmd.port_num)) {
 		resp.lid     = OPA_TO_IB_UCAST_LID(attr.lid);
 		resp.sm_lid  = OPA_TO_IB_UCAST_LID(attr.sm_lid);
@@ -300,7 +345,6 @@ ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file,
 }
 
 ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file,
-			   struct ib_device *ib_dev,
 			   const char __user *buf,
 			   int in_len, int out_len)
 {
@@ -310,6 +354,7 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file,
 	struct ib_uobject             *uobj;
 	struct ib_pd                  *pd;
 	int                            ret;
+	struct ib_device *ib_dev;
 
 	if (out_len < sizeof resp)
 		return -ENOSPC;
@@ -322,11 +367,11 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file,
                    in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
                    out_len - sizeof(resp));
 
-	uobj  = uobj_alloc(UVERBS_OBJECT_PD, file->ucontext);
+	uobj = uobj_alloc(UVERBS_OBJECT_PD, file, &ib_dev);
 	if (IS_ERR(uobj))
 		return PTR_ERR(uobj);
 
-	pd = ib_dev->alloc_pd(ib_dev, file->ucontext, &udata);
+	pd = ib_dev->alloc_pd(ib_dev, uobj->context, &udata);
 	if (IS_ERR(pd)) {
 		ret = PTR_ERR(pd);
 		goto err;
@@ -348,9 +393,7 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file,
 		goto err_copy;
 	}
 
-	uobj_alloc_commit(uobj);
-
-	return in_len;
+	return uobj_alloc_commit(uobj, in_len);
 
 err_copy:
 	ib_dealloc_pd(pd);
@@ -361,25 +404,16 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file,
 }
 
 ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file,
-			     struct ib_device *ib_dev,
 			     const char __user *buf,
 			     int in_len, int out_len)
 {
 	struct ib_uverbs_dealloc_pd cmd;
-	struct ib_uobject          *uobj;
-	int                         ret;
 
 	if (copy_from_user(&cmd, buf, sizeof cmd))
 		return -EFAULT;
 
-	uobj  = uobj_get_write(UVERBS_OBJECT_PD, cmd.pd_handle,
-			       file->ucontext);
-	if (IS_ERR(uobj))
-		return PTR_ERR(uobj);
-
-	ret = uobj_remove_commit(uobj);
-
-	return ret ?: in_len;
+	return uobj_perform_destroy(UVERBS_OBJECT_PD, cmd.pd_handle, file,
+				    in_len);
 }
 
 struct xrcd_table_entry {
@@ -468,7 +502,6 @@ static void xrcd_table_delete(struct ib_uverbs_device *dev,
 }
 
 ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
-			    struct ib_device *ib_dev,
 			    const char __user *buf, int in_len,
 			    int out_len)
 {
@@ -481,6 +514,7 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
 	struct inode                   *inode = NULL;
 	int				ret = 0;
 	int				new_xrcd = 0;
+	struct ib_device *ib_dev;
 
 	if (out_len < sizeof resp)
 		return -ENOSPC;
@@ -517,15 +551,15 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
 		}
 	}
 
-	obj  = (struct ib_uxrcd_object *)uobj_alloc(UVERBS_OBJECT_XRCD,
-						    file->ucontext);
+	obj = (struct ib_uxrcd_object *)uobj_alloc(UVERBS_OBJECT_XRCD, file,
+						   &ib_dev);
 	if (IS_ERR(obj)) {
 		ret = PTR_ERR(obj);
 		goto err_tree_mutex_unlock;
 	}
 
 	if (!xrcd) {
-		xrcd = ib_dev->alloc_xrcd(ib_dev, file->ucontext, &udata);
+		xrcd = ib_dev->alloc_xrcd(ib_dev, obj->uobject.context, &udata);
 		if (IS_ERR(xrcd)) {
 			ret = PTR_ERR(xrcd);
 			goto err;
@@ -564,9 +598,7 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
 
 	mutex_unlock(&file->device->xrcd_tree_mutex);
 
-	uobj_alloc_commit(&obj->uobject);
-
-	return in_len;
+	return uobj_alloc_commit(&obj->uobject, in_len);
 
 err_copy:
 	if (inode) {
@@ -591,32 +623,25 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
 }
 
 ssize_t ib_uverbs_close_xrcd(struct ib_uverbs_file *file,
-			     struct ib_device *ib_dev,
 			     const char __user *buf, int in_len,
 			     int out_len)
 {
 	struct ib_uverbs_close_xrcd cmd;
-	struct ib_uobject           *uobj;
-	int                         ret = 0;
 
 	if (copy_from_user(&cmd, buf, sizeof cmd))
 		return -EFAULT;
 
-	uobj  = uobj_get_write(UVERBS_OBJECT_XRCD, cmd.xrcd_handle,
-			       file->ucontext);
-	if (IS_ERR(uobj))
-		return PTR_ERR(uobj);
-
-	ret = uobj_remove_commit(uobj);
-	return ret ?: in_len;
+	return uobj_perform_destroy(UVERBS_OBJECT_XRCD, cmd.xrcd_handle, file,
+				    in_len);
 }
 
-int ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev,
+int ib_uverbs_dealloc_xrcd(struct ib_uobject *uobject,
 			   struct ib_xrcd *xrcd,
 			   enum rdma_remove_reason why)
 {
 	struct inode *inode;
 	int ret;
+	struct ib_uverbs_device *dev = uobject->context->ufile->device;
 
 	inode = xrcd->inode;
 	if (inode && !atomic_dec_and_test(&xrcd->usecnt))
@@ -624,16 +649,18 @@ int ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev,
 
 	ret = ib_dealloc_xrcd(xrcd);
 
-	if (why == RDMA_REMOVE_DESTROY && ret)
+	if (ib_is_destroy_retryable(ret, why, uobject)) {
 		atomic_inc(&xrcd->usecnt);
-	else if (inode)
+		return ret;
+	}
+
+	if (inode)
 		xrcd_table_delete(dev, inode);
 
 	return ret;
 }
 
 ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
-			 struct ib_device *ib_dev,
 			 const char __user *buf, int in_len,
 			 int out_len)
 {
@@ -644,6 +671,7 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
 	struct ib_pd                *pd;
 	struct ib_mr                *mr;
 	int                          ret;
+	struct ib_device *ib_dev;
 
 	if (out_len < sizeof resp)
 		return -ENOSPC;
@@ -663,11 +691,11 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
 	if (ret)
 		return ret;
 
-	uobj  = uobj_alloc(UVERBS_OBJECT_MR, file->ucontext);
+	uobj = uobj_alloc(UVERBS_OBJECT_MR, file, &ib_dev);
 	if (IS_ERR(uobj))
 		return PTR_ERR(uobj);
 
-	pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file->ucontext);
+	pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file);
 	if (!pd) {
 		ret = -EINVAL;
 		goto err_free;
@@ -711,9 +739,7 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
 
 	uobj_put_obj_read(pd);
 
-	uobj_alloc_commit(uobj);
-
-	return in_len;
+	return uobj_alloc_commit(uobj, in_len);
 
 err_copy:
 	ib_dereg_mr(mr);
@@ -727,7 +753,6 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
 }
 
 ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file,
-			   struct ib_device *ib_dev,
 			   const char __user *buf, int in_len,
 			   int out_len)
 {
@@ -759,8 +784,7 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file,
 	     (cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK)))
 			return -EINVAL;
 
-	uobj  = uobj_get_write(UVERBS_OBJECT_MR, cmd.mr_handle,
-			       file->ucontext);
+	uobj = uobj_get_write(UVERBS_OBJECT_MR, cmd.mr_handle, file);
 	if (IS_ERR(uobj))
 		return PTR_ERR(uobj);
 
@@ -778,7 +802,8 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file,
 	}
 
 	if (cmd.flags & IB_MR_REREG_PD) {
-		pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file->ucontext);
+		pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle,
+				       file);
 		if (!pd) {
 			ret = -EINVAL;
 			goto put_uobjs;
@@ -819,29 +844,19 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file,
 }
 
 ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
-			   struct ib_device *ib_dev,
 			   const char __user *buf, int in_len,
 			   int out_len)
 {
 	struct ib_uverbs_dereg_mr cmd;
-	struct ib_uobject	 *uobj;
-	int                       ret = -EINVAL;
 
 	if (copy_from_user(&cmd, buf, sizeof cmd))
 		return -EFAULT;
 
-	uobj  = uobj_get_write(UVERBS_OBJECT_MR, cmd.mr_handle,
-			       file->ucontext);
-	if (IS_ERR(uobj))
-		return PTR_ERR(uobj);
-
-	ret = uobj_remove_commit(uobj);
-
-	return ret ?: in_len;
+	return uobj_perform_destroy(UVERBS_OBJECT_MR, cmd.mr_handle, file,
+				    in_len);
 }
 
 ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file,
-			   struct ib_device *ib_dev,
 			   const char __user *buf, int in_len,
 			   int out_len)
 {
@@ -852,6 +867,7 @@ ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file,
 	struct ib_mw                  *mw;
 	struct ib_udata		       udata;
 	int                            ret;
+	struct ib_device *ib_dev;
 
 	if (out_len < sizeof(resp))
 		return -ENOSPC;
@@ -859,11 +875,11 @@ ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file,
 	if (copy_from_user(&cmd, buf, sizeof(cmd)))
 		return -EFAULT;
 
-	uobj  = uobj_alloc(UVERBS_OBJECT_MW, file->ucontext);
+	uobj = uobj_alloc(UVERBS_OBJECT_MW, file, &ib_dev);
 	if (IS_ERR(uobj))
 		return PTR_ERR(uobj);
 
-	pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file->ucontext);
+	pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file);
 	if (!pd) {
 		ret = -EINVAL;
 		goto err_free;
@@ -897,9 +913,7 @@ ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file,
 	}
 
 	uobj_put_obj_read(pd);
-	uobj_alloc_commit(uobj);
-
-	return in_len;
+	return uobj_alloc_commit(uobj, in_len);
 
 err_copy:
 	uverbs_dealloc_mw(mw);
@@ -911,28 +925,19 @@ ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file,
 }
 
 ssize_t ib_uverbs_dealloc_mw(struct ib_uverbs_file *file,
-			     struct ib_device *ib_dev,
 			     const char __user *buf, int in_len,
 			     int out_len)
 {
 	struct ib_uverbs_dealloc_mw cmd;
-	struct ib_uobject	   *uobj;
-	int                         ret = -EINVAL;
 
 	if (copy_from_user(&cmd, buf, sizeof(cmd)))
 		return -EFAULT;
 
-	uobj  = uobj_get_write(UVERBS_OBJECT_MW, cmd.mw_handle,
-			       file->ucontext);
-	if (IS_ERR(uobj))
-		return PTR_ERR(uobj);
-
-	ret = uobj_remove_commit(uobj);
-	return ret ?: in_len;
+	return uobj_perform_destroy(UVERBS_OBJECT_MW, cmd.mw_handle, file,
+				    in_len);
 }
 
 ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file,
-				      struct ib_device *ib_dev,
 				      const char __user *buf, int in_len,
 				      int out_len)
 {
@@ -940,6 +945,7 @@ ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file,
 	struct ib_uverbs_create_comp_channel_resp  resp;
 	struct ib_uobject			  *uobj;
 	struct ib_uverbs_completion_event_file	  *ev_file;
+	struct ib_device *ib_dev;
 
 	if (out_len < sizeof resp)
 		return -ENOSPC;
@@ -947,14 +953,14 @@ ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file,
 	if (copy_from_user(&cmd, buf, sizeof cmd))
 		return -EFAULT;
 
-	uobj = uobj_alloc(UVERBS_OBJECT_COMP_CHANNEL, file->ucontext);
+	uobj = uobj_alloc(UVERBS_OBJECT_COMP_CHANNEL, file, &ib_dev);
 	if (IS_ERR(uobj))
 		return PTR_ERR(uobj);
 
 	resp.fd = uobj->id;
 
 	ev_file = container_of(uobj, struct ib_uverbs_completion_event_file,
-			       uobj_file.uobj);
+			       uobj);
 	ib_uverbs_init_event_queue(&ev_file->ev_queue);
 
 	if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) {
@@ -962,12 +968,10 @@ ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file,
 		return -EFAULT;
 	}
 
-	uobj_alloc_commit(uobj);
-	return in_len;
+	return uobj_alloc_commit(uobj, in_len);
 }
 
 static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
-					struct ib_device *ib_dev,
 				       struct ib_udata *ucore,
 				       struct ib_udata *uhw,
 				       struct ib_uverbs_ex_create_cq *cmd,
@@ -985,21 +989,23 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
 	int                             ret;
 	struct ib_uverbs_ex_create_cq_resp resp;
 	struct ib_cq_init_attr attr = {};
-
-	if (!ib_dev->create_cq)
-		return ERR_PTR(-EOPNOTSUPP);
+	struct ib_device *ib_dev;
 
 	if (cmd->comp_vector >= file->device->num_comp_vectors)
 		return ERR_PTR(-EINVAL);
 
-	obj  = (struct ib_ucq_object *)uobj_alloc(UVERBS_OBJECT_CQ,
-						  file->ucontext);
+	obj = (struct ib_ucq_object *)uobj_alloc(UVERBS_OBJECT_CQ, file,
+						 &ib_dev);
 	if (IS_ERR(obj))
 		return obj;
 
+	if (!ib_dev->create_cq) {
+		ret = -EOPNOTSUPP;
+		goto err;
+	}
+
 	if (cmd->comp_channel >= 0) {
-		ev_file = ib_uverbs_lookup_comp_file(cmd->comp_channel,
-						     file->ucontext);
+		ev_file = ib_uverbs_lookup_comp_file(cmd->comp_channel, file);
 		if (IS_ERR(ev_file)) {
 			ret = PTR_ERR(ev_file);
 			goto err;
@@ -1007,7 +1013,6 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
 	}
 
 	obj->uobject.user_handle = cmd->user_handle;
-	obj->uverbs_file	   = file;
 	obj->comp_events_reported  = 0;
 	obj->async_events_reported = 0;
 	INIT_LIST_HEAD(&obj->comp_list);
@@ -1019,7 +1024,7 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
 	if (cmd_sz > offsetof(typeof(*cmd), flags) + sizeof(cmd->flags))
 		attr.flags = cmd->flags;
 
-	cq = ib_dev->create_cq(ib_dev, &attr, file->ucontext, uhw);
+	cq = ib_dev->create_cq(ib_dev, &attr, obj->uobject.context, uhw);
 	if (IS_ERR(cq)) {
 		ret = PTR_ERR(cq);
 		goto err_file;
@@ -1047,7 +1052,9 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
 	if (ret)
 		goto err_cb;
 
-	uobj_alloc_commit(&obj->uobject);
+	ret = uobj_alloc_commit(&obj->uobject, 0);
+	if (ret)
+		return ERR_PTR(ret);
 	return obj;
 
 err_cb:
@@ -1075,7 +1082,6 @@ static int ib_uverbs_create_cq_cb(struct ib_uverbs_file *file,
 }
 
 ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file,
-			    struct ib_device *ib_dev,
 			    const char __user *buf, int in_len,
 			    int out_len)
 {
@@ -1106,7 +1112,7 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file,
 	cmd_ex.comp_vector = cmd.comp_vector;
 	cmd_ex.comp_channel = cmd.comp_channel;
 
-	obj = create_cq(file, ib_dev, &ucore, &uhw, &cmd_ex,
+	obj = create_cq(file, &ucore, &uhw, &cmd_ex,
 			offsetof(typeof(cmd_ex), comp_channel) +
 			sizeof(cmd.comp_channel), ib_uverbs_create_cq_cb,
 			NULL);
@@ -1129,7 +1135,6 @@ static int ib_uverbs_ex_create_cq_cb(struct ib_uverbs_file *file,
 }
 
 int ib_uverbs_ex_create_cq(struct ib_uverbs_file *file,
-			 struct ib_device *ib_dev,
 			   struct ib_udata *ucore,
 			   struct ib_udata *uhw)
 {
@@ -1155,7 +1160,7 @@ int ib_uverbs_ex_create_cq(struct ib_uverbs_file *file,
 			     sizeof(resp.response_length)))
 		return -ENOSPC;
 
-	obj = create_cq(file, ib_dev, ucore, uhw, &cmd,
+	obj = create_cq(file, ucore, uhw, &cmd,
 			min(ucore->inlen, sizeof(cmd)),
 			ib_uverbs_ex_create_cq_cb, NULL);
 
@@ -1163,7 +1168,6 @@ int ib_uverbs_ex_create_cq(struct ib_uverbs_file *file,
 }
 
 ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file,
-			    struct ib_device *ib_dev,
 			    const char __user *buf, int in_len,
 			    int out_len)
 {
@@ -1181,7 +1185,7 @@ ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file,
 		   in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
 		   out_len - sizeof(resp));
 
-	cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file->ucontext);
+	cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file);
 	if (!cq)
 		return -EINVAL;
 
@@ -1231,7 +1235,6 @@ static int copy_wc_to_user(struct ib_device *ib_dev, void __user *dest,
 }
 
 ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,
-			  struct ib_device *ib_dev,
 			  const char __user *buf, int in_len,
 			  int out_len)
 {
@@ -1246,7 +1249,7 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,
 	if (copy_from_user(&cmd, buf, sizeof cmd))
 		return -EFAULT;
 
-	cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file->ucontext);
+	cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file);
 	if (!cq)
 		return -EINVAL;
 
@@ -1262,7 +1265,7 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,
 		if (!ret)
 			break;
 
-		ret = copy_wc_to_user(ib_dev, data_ptr, &wc);
+		ret = copy_wc_to_user(cq->device, data_ptr, &wc);
 		if (ret)
 			goto out_put;
 
@@ -1283,7 +1286,6 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,
 }
 
 ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file,
-				struct ib_device *ib_dev,
 				const char __user *buf, int in_len,
 				int out_len)
 {
@@ -1293,7 +1295,7 @@ ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file,
 	if (copy_from_user(&cmd, buf, sizeof cmd))
 		return -EFAULT;
 
-	cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file->ucontext);
+	cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file);
 	if (!cq)
 		return -EINVAL;
 
@@ -1306,45 +1308,28 @@ ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file,
 }
 
 ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file,
-			     struct ib_device *ib_dev,
 			     const char __user *buf, int in_len,
 			     int out_len)
 {
 	struct ib_uverbs_destroy_cq      cmd;
 	struct ib_uverbs_destroy_cq_resp resp;
 	struct ib_uobject		*uobj;
-	struct ib_cq               	*cq;
 	struct ib_ucq_object        	*obj;
-	int                        	 ret = -EINVAL;
 
 	if (copy_from_user(&cmd, buf, sizeof cmd))
 		return -EFAULT;
 
-	uobj  = uobj_get_write(UVERBS_OBJECT_CQ, cmd.cq_handle,
-			       file->ucontext);
+	uobj = uobj_get_destroy(UVERBS_OBJECT_CQ, cmd.cq_handle, file);
 	if (IS_ERR(uobj))
 		return PTR_ERR(uobj);
 
-	/*
-	 * Make sure we don't free the memory in remove_commit as we still
-	 * needs the uobject memory to create the response.
-	 */
-	uverbs_uobject_get(uobj);
-	cq      = uobj->object;
-	obj     = container_of(cq->uobject, struct ib_ucq_object, uobject);
-
+	obj = container_of(uobj, struct ib_ucq_object, uobject);
 	memset(&resp, 0, sizeof(resp));
-
-	ret = uobj_remove_commit(uobj);
-	if (ret) {
-		uverbs_uobject_put(uobj);
-		return ret;
-	}
-
 	resp.comp_events_reported  = obj->comp_events_reported;
 	resp.async_events_reported = obj->async_events_reported;
 
-	uverbs_uobject_put(uobj);
+	uobj_put_destroy(uobj);
+
 	if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp))
 		return -EFAULT;
 
@@ -1375,12 +1360,13 @@ static int create_qp(struct ib_uverbs_file *file,
 	int				ret;
 	struct ib_rwq_ind_table *ind_tbl = NULL;
 	bool has_sq = true;
+	struct ib_device *ib_dev;
 
 	if (cmd->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW))
 		return -EPERM;
 
-	obj  = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP,
-						  file->ucontext);
+	obj = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP, file,
+						 &ib_dev);
 	if (IS_ERR(obj))
 		return PTR_ERR(obj);
 	obj->uxrcd = NULL;
@@ -1390,9 +1376,9 @@ static int create_qp(struct ib_uverbs_file *file,
 	if (cmd_sz >= offsetof(typeof(*cmd), rwq_ind_tbl_handle) +
 		      sizeof(cmd->rwq_ind_tbl_handle) &&
 		      (cmd->comp_mask & IB_UVERBS_CREATE_QP_MASK_IND_TABLE)) {
-		ind_tbl = uobj_get_obj_read(rwq_ind_table, UVERBS_OBJECT_RWQ_IND_TBL,
-					    cmd->rwq_ind_tbl_handle,
-					    file->ucontext);
+		ind_tbl = uobj_get_obj_read(rwq_ind_table,
+					    UVERBS_OBJECT_RWQ_IND_TBL,
+					    cmd->rwq_ind_tbl_handle, file);
 		if (!ind_tbl) {
 			ret = -EINVAL;
 			goto err_put;
@@ -1418,7 +1404,7 @@ static int create_qp(struct ib_uverbs_file *file,
 
 	if (cmd->qp_type == IB_QPT_XRC_TGT) {
 		xrcd_uobj = uobj_get_read(UVERBS_OBJECT_XRCD, cmd->pd_handle,
-					  file->ucontext);
+					  file);
 
 		if (IS_ERR(xrcd_uobj)) {
 			ret = -EINVAL;
@@ -1437,8 +1423,8 @@ static int create_qp(struct ib_uverbs_file *file,
 			cmd->max_recv_sge = 0;
 		} else {
 			if (cmd->is_srq) {
-				srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd->srq_handle,
-							file->ucontext);
+				srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ,
+							cmd->srq_handle, file);
 				if (!srq || srq->srq_type == IB_SRQT_XRC) {
 					ret = -EINVAL;
 					goto err_put;
@@ -1447,8 +1433,9 @@ static int create_qp(struct ib_uverbs_file *file,
 
 			if (!ind_tbl) {
 				if (cmd->recv_cq_handle != cmd->send_cq_handle) {
-					rcq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd->recv_cq_handle,
-								file->ucontext);
+					rcq = uobj_get_obj_read(
+						cq, UVERBS_OBJECT_CQ,
+						cmd->recv_cq_handle, file);
 					if (!rcq) {
 						ret = -EINVAL;
 						goto err_put;
@@ -1458,11 +1445,12 @@ static int create_qp(struct ib_uverbs_file *file,
 		}
 
 		if (has_sq)
-			scq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd->send_cq_handle,
-						file->ucontext);
+			scq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ,
+						cmd->send_cq_handle, file);
 		if (!ind_tbl)
 			rcq = rcq ?: scq;
-		pd  = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd->pd_handle, file->ucontext);
+		pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd->pd_handle,
+				       file);
 		if (!pd || (!scq && has_sq)) {
 			ret = -EINVAL;
 			goto err_put;
@@ -1602,9 +1590,7 @@ static int create_qp(struct ib_uverbs_file *file,
 	if (ind_tbl)
 		uobj_put_obj_read(ind_tbl);
 
-	uobj_alloc_commit(&obj->uevent.uobject);
-
-	return 0;
+	return uobj_alloc_commit(&obj->uevent.uobject, 0);
 err_cb:
 	ib_destroy_qp(qp);
 
@@ -1637,7 +1623,6 @@ static int ib_uverbs_create_qp_cb(struct ib_uverbs_file *file,
 }
 
 ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
-			    struct ib_device *ib_dev,
 			    const char __user *buf, int in_len,
 			    int out_len)
 {
@@ -1698,7 +1683,6 @@ static int ib_uverbs_ex_create_qp_cb(struct ib_uverbs_file *file,
 }
 
 int ib_uverbs_ex_create_qp(struct ib_uverbs_file *file,
-			   struct ib_device *ib_dev,
 			   struct ib_udata *ucore,
 			   struct ib_udata *uhw)
 {
@@ -1735,7 +1719,6 @@ int ib_uverbs_ex_create_qp(struct ib_uverbs_file *file,
 }
 
 ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file,
-			  struct ib_device *ib_dev,
 			  const char __user *buf, int in_len, int out_len)
 {
 	struct ib_uverbs_open_qp        cmd;
@@ -1747,6 +1730,7 @@ ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file,
 	struct ib_qp                   *qp;
 	struct ib_qp_open_attr          attr;
 	int ret;
+	struct ib_device *ib_dev;
 
 	if (out_len < sizeof resp)
 		return -ENOSPC;
@@ -1759,13 +1743,12 @@ ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file,
 		   in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
 		   out_len - sizeof(resp));
 
-	obj  = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP,
-						  file->ucontext);
+	obj = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP, file,
+						 &ib_dev);
 	if (IS_ERR(obj))
 		return PTR_ERR(obj);
 
-	xrcd_uobj = uobj_get_read(UVERBS_OBJECT_XRCD, cmd.pd_handle,
-				  file->ucontext);
+	xrcd_uobj = uobj_get_read(UVERBS_OBJECT_XRCD, cmd.pd_handle, file);
 	if (IS_ERR(xrcd_uobj)) {
 		ret = -EINVAL;
 		goto err_put;
@@ -1809,10 +1792,7 @@ ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file,
 	qp->uobject = &obj->uevent.uobject;
 	uobj_put_read(xrcd_uobj);
 
-
-	uobj_alloc_commit(&obj->uevent.uobject);
-
-	return in_len;
+	return uobj_alloc_commit(&obj->uevent.uobject, in_len);
 
 err_destroy:
 	ib_destroy_qp(qp);
@@ -1846,7 +1826,6 @@ static void copy_ah_attr_to_uverbs(struct ib_uverbs_qp_dest *uverb_attr,
 }
 
 ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file,
-			   struct ib_device *ib_dev,
 			   const char __user *buf, int in_len,
 			   int out_len)
 {
@@ -1867,7 +1846,7 @@ ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file,
 		goto out;
 	}
 
-	qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file->ucontext);
+	qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file);
 	if (!qp) {
 		ret = -EINVAL;
 		goto out;
@@ -1968,11 +1947,11 @@ static int modify_qp(struct ib_uverbs_file *file,
 	struct ib_qp *qp;
 	int ret;
 
-	attr = kmalloc(sizeof *attr, GFP_KERNEL);
+	attr = kzalloc(sizeof(*attr), GFP_KERNEL);
 	if (!attr)
 		return -ENOMEM;
 
-	qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd->base.qp_handle, file->ucontext);
+	qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd->base.qp_handle, file);
 	if (!qp) {
 		ret = -EINVAL;
 		goto out;
@@ -2098,7 +2077,6 @@ static int modify_qp(struct ib_uverbs_file *file,
 }
 
 ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file,
-			    struct ib_device *ib_dev,
 			    const char __user *buf, int in_len,
 			    int out_len)
 {
@@ -2125,7 +2103,6 @@ ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file,
 }
 
 int ib_uverbs_ex_modify_qp(struct ib_uverbs_file *file,
-			   struct ib_device *ib_dev,
 			   struct ib_udata *ucore,
 			   struct ib_udata *uhw)
 {
@@ -2161,7 +2138,6 @@ int ib_uverbs_ex_modify_qp(struct ib_uverbs_file *file,
 }
 
 ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
-			     struct ib_device *ib_dev,
 			     const char __user *buf, int in_len,
 			     int out_len)
 {
@@ -2169,33 +2145,19 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
 	struct ib_uverbs_destroy_qp_resp resp;
 	struct ib_uobject		*uobj;
 	struct ib_uqp_object        	*obj;
-	int                        	 ret = -EINVAL;
 
 	if (copy_from_user(&cmd, buf, sizeof cmd))
 		return -EFAULT;
 
-	memset(&resp, 0, sizeof resp);
-
-	uobj  = uobj_get_write(UVERBS_OBJECT_QP, cmd.qp_handle,
-			       file->ucontext);
+	uobj = uobj_get_destroy(UVERBS_OBJECT_QP, cmd.qp_handle, file);
 	if (IS_ERR(uobj))
 		return PTR_ERR(uobj);
 
 	obj = container_of(uobj, struct ib_uqp_object, uevent.uobject);
-	/*
-	 * Make sure we don't free the memory in remove_commit as we still
-	 * needs the uobject memory to create the response.
-	 */
-	uverbs_uobject_get(uobj);
-
-	ret = uobj_remove_commit(uobj);
-	if (ret) {
-		uverbs_uobject_put(uobj);
-		return ret;
-	}
-
+	memset(&resp, 0, sizeof(resp));
 	resp.events_reported = obj->uevent.events_reported;
-	uverbs_uobject_put(uobj);
+
+	uobj_put_destroy(uobj);
 
 	if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp))
 		return -EFAULT;
@@ -2214,14 +2176,14 @@ static void *alloc_wr(size_t wr_size, __u32 num_sge)
 }
 
 ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
-			    struct ib_device *ib_dev,
 			    const char __user *buf, int in_len,
 			    int out_len)
 {
 	struct ib_uverbs_post_send      cmd;
 	struct ib_uverbs_post_send_resp resp;
 	struct ib_uverbs_send_wr       *user_wr;
-	struct ib_send_wr              *wr = NULL, *last, *next, *bad_wr;
+	struct ib_send_wr              *wr = NULL, *last, *next;
+	const struct ib_send_wr	       *bad_wr;
 	struct ib_qp                   *qp;
 	int                             i, sg_ind;
 	int				is_ud;
@@ -2242,7 +2204,7 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
 	if (!user_wr)
 		return -ENOMEM;
 
-	qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file->ucontext);
+	qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file);
 	if (!qp)
 		goto out;
 
@@ -2278,8 +2240,8 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
 				goto out_put;
 			}
 
-			ud->ah = uobj_get_obj_read(ah, UVERBS_OBJECT_AH, user_wr->wr.ud.ah,
-						   file->ucontext);
+			ud->ah = uobj_get_obj_read(ah, UVERBS_OBJECT_AH,
+						   user_wr->wr.ud.ah, file);
 			if (!ud->ah) {
 				kfree(ud);
 				ret = -EINVAL;
@@ -2494,13 +2456,13 @@ static struct ib_recv_wr *ib_uverbs_unmarshall_recv(const char __user *buf,
 }
 
 ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file,
-			    struct ib_device *ib_dev,
 			    const char __user *buf, int in_len,
 			    int out_len)
 {
 	struct ib_uverbs_post_recv      cmd;
 	struct ib_uverbs_post_recv_resp resp;
-	struct ib_recv_wr              *wr, *next, *bad_wr;
+	struct ib_recv_wr              *wr, *next;
+	const struct ib_recv_wr	       *bad_wr;
 	struct ib_qp                   *qp;
 	ssize_t                         ret = -EINVAL;
 
@@ -2513,7 +2475,7 @@ ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file,
 	if (IS_ERR(wr))
 		return PTR_ERR(wr);
 
-	qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file->ucontext);
+	qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file);
 	if (!qp)
 		goto out;
 
@@ -2543,13 +2505,13 @@ ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file,
 }
 
 ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file,
-				struct ib_device *ib_dev,
 				const char __user *buf, int in_len,
 				int out_len)
 {
 	struct ib_uverbs_post_srq_recv      cmd;
 	struct ib_uverbs_post_srq_recv_resp resp;
-	struct ib_recv_wr                  *wr, *next, *bad_wr;
+	struct ib_recv_wr                  *wr, *next;
+	const struct ib_recv_wr		   *bad_wr;
 	struct ib_srq                      *srq;
 	ssize_t                             ret = -EINVAL;
 
@@ -2562,12 +2524,13 @@ ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file,
 	if (IS_ERR(wr))
 		return PTR_ERR(wr);
 
-	srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file->ucontext);
+	srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file);
 	if (!srq)
 		goto out;
 
 	resp.bad_wr = 0;
-	ret = srq->device->post_srq_recv(srq, wr, &bad_wr);
+	ret = srq->device->post_srq_recv ?
+		srq->device->post_srq_recv(srq, wr, &bad_wr) : -EOPNOTSUPP;
 
 	uobj_put_obj_read(srq);
 
@@ -2592,7 +2555,6 @@ ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file,
 }
 
 ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
-			    struct ib_device *ib_dev,
 			    const char __user *buf, int in_len,
 			    int out_len)
 {
@@ -2601,9 +2563,10 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
 	struct ib_uobject		*uobj;
 	struct ib_pd			*pd;
 	struct ib_ah			*ah;
-	struct rdma_ah_attr		attr;
+	struct rdma_ah_attr		attr = {};
 	int ret;
 	struct ib_udata                   udata;
+	struct ib_device *ib_dev;
 
 	if (out_len < sizeof resp)
 		return -ENOSPC;
@@ -2611,19 +2574,21 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
 	if (copy_from_user(&cmd, buf, sizeof cmd))
 		return -EFAULT;
 
-	if (!rdma_is_port_valid(ib_dev, cmd.attr.port_num))
-		return -EINVAL;
-
 	ib_uverbs_init_udata(&udata, buf + sizeof(cmd),
 		   u64_to_user_ptr(cmd.response) + sizeof(resp),
 		   in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
 		   out_len - sizeof(resp));
 
-	uobj  = uobj_alloc(UVERBS_OBJECT_AH, file->ucontext);
+	uobj = uobj_alloc(UVERBS_OBJECT_AH, file, &ib_dev);
 	if (IS_ERR(uobj))
 		return PTR_ERR(uobj);
 
-	pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file->ucontext);
+	if (!rdma_is_port_valid(ib_dev, cmd.attr.port_num)) {
+		ret = -EINVAL;
+		goto err;
+	}
+
+	pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file);
 	if (!pd) {
 		ret = -EINVAL;
 		goto err;
@@ -2665,9 +2630,7 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
 	}
 
 	uobj_put_obj_read(pd);
-	uobj_alloc_commit(uobj);
-
-	return in_len;
+	return uobj_alloc_commit(uobj, in_len);
 
 err_copy:
 	rdma_destroy_ah(ah);
@@ -2681,27 +2644,18 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
 }
 
 ssize_t ib_uverbs_destroy_ah(struct ib_uverbs_file *file,
-			     struct ib_device *ib_dev,
 			     const char __user *buf, int in_len, int out_len)
 {
 	struct ib_uverbs_destroy_ah cmd;
-	struct ib_uobject	   *uobj;
-	int			    ret;
 
 	if (copy_from_user(&cmd, buf, sizeof cmd))
 		return -EFAULT;
 
-	uobj  = uobj_get_write(UVERBS_OBJECT_AH, cmd.ah_handle,
-			       file->ucontext);
-	if (IS_ERR(uobj))
-		return PTR_ERR(uobj);
-
-	ret = uobj_remove_commit(uobj);
-	return ret ?: in_len;
+	return uobj_perform_destroy(UVERBS_OBJECT_AH, cmd.ah_handle, file,
+				    in_len);
 }
 
 ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file,
-			       struct ib_device *ib_dev,
 			       const char __user *buf, int in_len,
 			       int out_len)
 {
@@ -2714,7 +2668,7 @@ ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file,
 	if (copy_from_user(&cmd, buf, sizeof cmd))
 		return -EFAULT;
 
-	qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file->ucontext);
+	qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file);
 	if (!qp)
 		return -EINVAL;
 
@@ -2751,7 +2705,6 @@ ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file,
 }
 
 ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file,
-			       struct ib_device *ib_dev,
 			       const char __user *buf, int in_len,
 			       int out_len)
 {
@@ -2765,7 +2718,7 @@ ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file,
 	if (copy_from_user(&cmd, buf, sizeof cmd))
 		return -EFAULT;
 
-	qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file->ucontext);
+	qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file);
 	if (!qp)
 		return -EINVAL;
 
@@ -2810,29 +2763,27 @@ static struct ib_uflow_resources *flow_resources_alloc(size_t num_specs)
 	resources = kzalloc(sizeof(*resources), GFP_KERNEL);
 
 	if (!resources)
-		goto err_res;
+		return NULL;
+
+	if (!num_specs)
+		goto out;
 
 	resources->counters =
 		kcalloc(num_specs, sizeof(*resources->counters), GFP_KERNEL);
-
-	if (!resources->counters)
-		goto err_cnt;
-
 	resources->collection =
 		kcalloc(num_specs, sizeof(*resources->collection), GFP_KERNEL);
 
-	if (!resources->collection)
-		goto err_collection;
+	if (!resources->counters || !resources->collection)
+		goto err;
 
+out:
 	resources->max = num_specs;
-
 	return resources;
 
-err_collection:
+err:
 	kfree(resources->counters);
-err_cnt:
 	kfree(resources);
-err_res:
+
 	return NULL;
 }
 
@@ -2840,6 +2791,9 @@ void ib_uverbs_flow_resources_free(struct ib_uflow_resources *uflow_res)
 {
 	unsigned int i;
 
+	if (!uflow_res)
+		return;
+
 	for (i = 0; i < uflow_res->collection_num; i++)
 		atomic_dec(&uflow_res->collection[i]->usecnt);
 
@@ -2875,7 +2829,7 @@ static void flow_resources_add(struct ib_uflow_resources *uflow_res,
 	uflow_res->num++;
 }
 
-static int kern_spec_to_ib_spec_action(struct ib_ucontext *ucontext,
+static int kern_spec_to_ib_spec_action(struct ib_uverbs_file *ufile,
 				       struct ib_uverbs_flow_spec *kern_spec,
 				       union ib_flow_spec *ib_spec,
 				       struct ib_uflow_resources *uflow_res)
@@ -2904,7 +2858,7 @@ static int kern_spec_to_ib_spec_action(struct ib_ucontext *ucontext,
 		ib_spec->action.act = uobj_get_obj_read(flow_action,
 							UVERBS_OBJECT_FLOW_ACTION,
 							kern_spec->action.handle,
-							ucontext);
+							ufile);
 		if (!ib_spec->action.act)
 			return -EINVAL;
 		ib_spec->action.size =
@@ -2922,7 +2876,7 @@ static int kern_spec_to_ib_spec_action(struct ib_ucontext *ucontext,
 			uobj_get_obj_read(counters,
 					  UVERBS_OBJECT_COUNTERS,
 					  kern_spec->flow_count.handle,
-					  ucontext);
+					  ufile);
 		if (!ib_spec->flow_count.counters)
 			return -EINVAL;
 		ib_spec->flow_count.size =
@@ -3091,9 +3045,6 @@ static int kern_spec_to_ib_spec_filter(struct ib_uverbs_flow_spec *kern_spec,
 	void *kern_spec_mask;
 	void *kern_spec_val;
 
-	if (kern_spec->reserved)
-		return -EINVAL;
-
 	kern_filter_sz = kern_spec_filter_sz(&kern_spec->hdr);
 
 	kern_spec_val = (void *)kern_spec +
@@ -3106,7 +3057,7 @@ static int kern_spec_to_ib_spec_filter(struct ib_uverbs_flow_spec *kern_spec,
 						     kern_filter_sz, ib_spec);
 }
 
-static int kern_spec_to_ib_spec(struct ib_ucontext *ucontext,
+static int kern_spec_to_ib_spec(struct ib_uverbs_file *ufile,
 				struct ib_uverbs_flow_spec *kern_spec,
 				union ib_flow_spec *ib_spec,
 				struct ib_uflow_resources *uflow_res)
@@ -3115,14 +3066,13 @@ static int kern_spec_to_ib_spec(struct ib_ucontext *ucontext,
 		return -EINVAL;
 
 	if (kern_spec->type >= IB_FLOW_SPEC_ACTION_TAG)
-		return kern_spec_to_ib_spec_action(ucontext, kern_spec, ib_spec,
+		return kern_spec_to_ib_spec_action(ufile, kern_spec, ib_spec,
 						   uflow_res);
 	else
 		return kern_spec_to_ib_spec_filter(kern_spec, ib_spec);
 }
 
 int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file,
-			   struct ib_device *ib_dev,
 			   struct ib_udata *ucore,
 			   struct ib_udata *uhw)
 {
@@ -3136,6 +3086,7 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file,
 	struct ib_wq_init_attr wq_init_attr = {};
 	size_t required_cmd_sz;
 	size_t required_resp_len;
+	struct ib_device *ib_dev;
 
 	required_cmd_sz = offsetof(typeof(cmd), max_sge) + sizeof(cmd.max_sge);
 	required_resp_len = offsetof(typeof(resp), wqn) + sizeof(resp.wqn);
@@ -3158,18 +3109,18 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file,
 	if (cmd.comp_mask)
 		return -EOPNOTSUPP;
 
-	obj  = (struct ib_uwq_object *)uobj_alloc(UVERBS_OBJECT_WQ,
-						  file->ucontext);
+	obj = (struct ib_uwq_object *)uobj_alloc(UVERBS_OBJECT_WQ, file,
+						 &ib_dev);
 	if (IS_ERR(obj))
 		return PTR_ERR(obj);
 
-	pd  = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file->ucontext);
+	pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file);
 	if (!pd) {
 		err = -EINVAL;
 		goto err_uobj;
 	}
 
-	cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file->ucontext);
+	cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file);
 	if (!cq) {
 		err = -EINVAL;
 		goto err_put_pd;
@@ -3223,8 +3174,7 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file,
 
 	uobj_put_obj_read(pd);
 	uobj_put_obj_read(cq);
-	uobj_alloc_commit(&obj->uevent.uobject);
-	return 0;
+	return uobj_alloc_commit(&obj->uevent.uobject, 0);
 
 err_copy:
 	ib_destroy_wq(wq);
@@ -3239,7 +3189,6 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file,
 }
 
 int ib_uverbs_ex_destroy_wq(struct ib_uverbs_file *file,
-			    struct ib_device *ib_dev,
 			    struct ib_udata *ucore,
 			    struct ib_udata *uhw)
 {
@@ -3273,29 +3222,19 @@ int ib_uverbs_ex_destroy_wq(struct ib_uverbs_file *file,
 		return -EOPNOTSUPP;
 
 	resp.response_length = required_resp_len;
-	uobj  = uobj_get_write(UVERBS_OBJECT_WQ, cmd.wq_handle,
-			       file->ucontext);
+	uobj = uobj_get_destroy(UVERBS_OBJECT_WQ, cmd.wq_handle, file);
 	if (IS_ERR(uobj))
 		return PTR_ERR(uobj);
 
 	obj = container_of(uobj, struct ib_uwq_object, uevent.uobject);
-	/*
-	 * Make sure we don't free the memory in remove_commit as we still
-	 * needs the uobject memory to create the response.
-	 */
-	uverbs_uobject_get(uobj);
-
-	ret = uobj_remove_commit(uobj);
 	resp.events_reported = obj->uevent.events_reported;
-	uverbs_uobject_put(uobj);
-	if (ret)
-		return ret;
+
+	uobj_put_destroy(uobj);
 
 	return ib_copy_to_udata(ucore, &resp, resp.response_length);
 }
 
 int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file,
-			   struct ib_device *ib_dev,
 			   struct ib_udata *ucore,
 			   struct ib_udata *uhw)
 {
@@ -3324,7 +3263,7 @@ int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file,
 	if (cmd.attr_mask > (IB_WQ_STATE | IB_WQ_CUR_STATE | IB_WQ_FLAGS))
 		return -EINVAL;
 
-	wq = uobj_get_obj_read(wq, UVERBS_OBJECT_WQ, cmd.wq_handle, file->ucontext);
+	wq = uobj_get_obj_read(wq, UVERBS_OBJECT_WQ, cmd.wq_handle, file);
 	if (!wq)
 		return -EINVAL;
 
@@ -3345,7 +3284,6 @@ int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file,
 }
 
 int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
-				      struct ib_device *ib_dev,
 				      struct ib_udata *ucore,
 				      struct ib_udata *uhw)
 {
@@ -3363,6 +3301,7 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
 	u32 expected_in_size;
 	size_t required_cmd_sz_header;
 	size_t required_resp_len;
+	struct ib_device *ib_dev;
 
 	required_cmd_sz_header = offsetof(typeof(cmd), log_ind_tbl_size) + sizeof(cmd.log_ind_tbl_size);
 	required_resp_len = offsetof(typeof(resp), ind_tbl_num) + sizeof(resp.ind_tbl_num);
@@ -3418,8 +3357,8 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
 
 	for (num_read_wqs = 0; num_read_wqs < num_wq_handles;
 			num_read_wqs++) {
-		wq = uobj_get_obj_read(wq, UVERBS_OBJECT_WQ, wqs_handles[num_read_wqs],
-				       file->ucontext);
+		wq = uobj_get_obj_read(wq, UVERBS_OBJECT_WQ,
+				       wqs_handles[num_read_wqs], file);
 		if (!wq) {
 			err = -EINVAL;
 			goto put_wqs;
@@ -3428,7 +3367,7 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
 		wqs[num_read_wqs] = wq;
 	}
 
-	uobj  = uobj_alloc(UVERBS_OBJECT_RWQ_IND_TBL, file->ucontext);
+	uobj = uobj_alloc(UVERBS_OBJECT_RWQ_IND_TBL, file, &ib_dev);
 	if (IS_ERR(uobj)) {
 		err = PTR_ERR(uobj);
 		goto put_wqs;
@@ -3472,8 +3411,7 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
 	for (j = 0; j < num_read_wqs; j++)
 		uobj_put_obj_read(wqs[j]);
 
-	uobj_alloc_commit(uobj);
-	return 0;
+	return uobj_alloc_commit(uobj, 0);
 
 err_copy:
 	ib_destroy_rwq_ind_table(rwq_ind_tbl);
@@ -3489,12 +3427,10 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
 }
 
 int ib_uverbs_ex_destroy_rwq_ind_table(struct ib_uverbs_file *file,
-				       struct ib_device *ib_dev,
 				       struct ib_udata *ucore,
 				       struct ib_udata *uhw)
 {
 	struct ib_uverbs_ex_destroy_rwq_ind_table	cmd = {};
-	struct ib_uobject		*uobj;
 	int			ret;
 	size_t required_cmd_sz;
 
@@ -3515,16 +3451,11 @@ int ib_uverbs_ex_destroy_rwq_ind_table(struct ib_uverbs_file *file,
 	if (cmd.comp_mask)
 		return -EOPNOTSUPP;
 
-	uobj  = uobj_get_write(UVERBS_OBJECT_RWQ_IND_TBL, cmd.ind_tbl_handle,
-			       file->ucontext);
-	if (IS_ERR(uobj))
-		return PTR_ERR(uobj);
-
-	return uobj_remove_commit(uobj);
+	return uobj_perform_destroy(UVERBS_OBJECT_RWQ_IND_TBL,
+				    cmd.ind_tbl_handle, file, 0);
 }
 
 int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
-			     struct ib_device *ib_dev,
 			     struct ib_udata *ucore,
 			     struct ib_udata *uhw)
 {
@@ -3541,6 +3472,7 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
 	int err = 0;
 	void *ib_spec;
 	int i;
+	struct ib_device *ib_dev;
 
 	if (ucore->inlen < sizeof(cmd))
 		return -EINVAL;
@@ -3596,13 +3528,13 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
 		kern_flow_attr = &cmd.flow_attr;
 	}
 
-	uobj  = uobj_alloc(UVERBS_OBJECT_FLOW, file->ucontext);
+	uobj = uobj_alloc(UVERBS_OBJECT_FLOW, file, &ib_dev);
 	if (IS_ERR(uobj)) {
 		err = PTR_ERR(uobj);
 		goto err_free_attr;
 	}
 
-	qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file->ucontext);
+	qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file);
 	if (!qp) {
 		err = -EINVAL;
 		goto err_uobj;
@@ -3613,6 +3545,11 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
 		goto err_put;
 	}
 
+	if (!qp->device->create_flow) {
+		err = -EOPNOTSUPP;
+		goto err_put;
+	}
+
 	flow_attr = kzalloc(struct_size(flow_attr, flows,
 				cmd.flow_attr.num_of_specs), GFP_KERNEL);
 	if (!flow_attr) {
@@ -3639,7 +3576,7 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
 			cmd.flow_attr.size >= kern_spec->size;
 	     i++) {
 		err = kern_spec_to_ib_spec(
-				file->ucontext, (struct ib_uverbs_flow_spec *)kern_spec,
+				file, (struct ib_uverbs_flow_spec *)kern_spec,
 				ib_spec, uflow_res);
 		if (err)
 			goto err_free;
@@ -3666,6 +3603,7 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
 	}
 	atomic_inc(&qp->usecnt);
 	flow_id->qp = qp;
+	flow_id->device = qp->device;
 	flow_id->uobject = uobj;
 	uobj->object = flow_id;
 	uflow = container_of(uobj, typeof(*uflow), uobject);
@@ -3680,13 +3618,13 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
 		goto err_copy;
 
 	uobj_put_obj_read(qp);
-	uobj_alloc_commit(uobj);
 	kfree(flow_attr);
 	if (cmd.flow_attr.num_of_specs)
 		kfree(kern_flow_attr);
-	return 0;
+	return uobj_alloc_commit(uobj, 0);
 err_copy:
-	ib_destroy_flow(flow_id);
+	if (!qp->device->destroy_flow(flow_id))
+		atomic_dec(&qp->usecnt);
 err_free:
 	ib_uverbs_flow_resources_free(uflow_res);
 err_free_flow_attr:
@@ -3702,12 +3640,10 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
 }
 
 int ib_uverbs_ex_destroy_flow(struct ib_uverbs_file *file,
-			      struct ib_device *ib_dev,
 			      struct ib_udata *ucore,
 			      struct ib_udata *uhw)
 {
 	struct ib_uverbs_destroy_flow	cmd;
-	struct ib_uobject		*uobj;
 	int				ret;
 
 	if (ucore->inlen < sizeof(cmd))
@@ -3720,17 +3656,11 @@ int ib_uverbs_ex_destroy_flow(struct ib_uverbs_file *file,
 	if (cmd.comp_mask)
 		return -EINVAL;
 
-	uobj  = uobj_get_write(UVERBS_OBJECT_FLOW, cmd.flow_handle,
-			       file->ucontext);
-	if (IS_ERR(uobj))
-		return PTR_ERR(uobj);
-
-	ret = uobj_remove_commit(uobj);
-	return ret;
+	return uobj_perform_destroy(UVERBS_OBJECT_FLOW, cmd.flow_handle, file,
+				    0);
 }
 
 static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
-				struct ib_device *ib_dev,
 				struct ib_uverbs_create_xsrq *cmd,
 				struct ib_udata *udata)
 {
@@ -3741,9 +3671,10 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
 	struct ib_uobject               *uninitialized_var(xrcd_uobj);
 	struct ib_srq_init_attr          attr;
 	int ret;
+	struct ib_device *ib_dev;
 
-	obj  = (struct ib_usrq_object *)uobj_alloc(UVERBS_OBJECT_SRQ,
-						   file->ucontext);
+	obj = (struct ib_usrq_object *)uobj_alloc(UVERBS_OBJECT_SRQ, file,
+						  &ib_dev);
 	if (IS_ERR(obj))
 		return PTR_ERR(obj);
 
@@ -3752,7 +3683,7 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
 
 	if (cmd->srq_type == IB_SRQT_XRC) {
 		xrcd_uobj = uobj_get_read(UVERBS_OBJECT_XRCD, cmd->xrcd_handle,
-					  file->ucontext);
+					  file);
 		if (IS_ERR(xrcd_uobj)) {
 			ret = -EINVAL;
 			goto err;
@@ -3769,15 +3700,15 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
 	}
 
 	if (ib_srq_has_cq(cmd->srq_type)) {
-		attr.ext.cq  = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd->cq_handle,
-						 file->ucontext);
+		attr.ext.cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ,
+						cmd->cq_handle, file);
 		if (!attr.ext.cq) {
 			ret = -EINVAL;
 			goto err_put_xrcd;
 		}
 	}
 
-	pd  = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd->pd_handle, file->ucontext);
+	pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd->pd_handle, file);
 	if (!pd) {
 		ret = -EINVAL;
 		goto err_put_cq;
@@ -3842,9 +3773,7 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
 		uobj_put_obj_read(attr.ext.cq);
 
 	uobj_put_obj_read(pd);
-	uobj_alloc_commit(&obj->uevent.uobject);
-
-	return 0;
+	return uobj_alloc_commit(&obj->uevent.uobject, 0);
 
 err_copy:
 	ib_destroy_srq(srq);
@@ -3868,7 +3797,6 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
 }
 
 ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file,
-			     struct ib_device *ib_dev,
 			     const char __user *buf, int in_len,
 			     int out_len)
 {
@@ -3898,7 +3826,7 @@ ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file,
 		   in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
 		   out_len - sizeof(resp));
 
-	ret = __uverbs_create_xsrq(file, ib_dev, &xcmd, &udata);
+	ret = __uverbs_create_xsrq(file, &xcmd, &udata);
 	if (ret)
 		return ret;
 
@@ -3906,7 +3834,6 @@ ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file,
 }
 
 ssize_t ib_uverbs_create_xsrq(struct ib_uverbs_file *file,
-			      struct ib_device *ib_dev,
 			      const char __user *buf, int in_len, int out_len)
 {
 	struct ib_uverbs_create_xsrq     cmd;
@@ -3925,7 +3852,7 @@ ssize_t ib_uverbs_create_xsrq(struct ib_uverbs_file *file,
 		   in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
 		   out_len - sizeof(resp));
 
-	ret = __uverbs_create_xsrq(file, ib_dev, &cmd, &udata);
+	ret = __uverbs_create_xsrq(file, &cmd, &udata);
 	if (ret)
 		return ret;
 
@@ -3933,7 +3860,6 @@ ssize_t ib_uverbs_create_xsrq(struct ib_uverbs_file *file,
 }
 
 ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file,
-			     struct ib_device *ib_dev,
 			     const char __user *buf, int in_len,
 			     int out_len)
 {
@@ -3949,7 +3875,7 @@ ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file,
 	ib_uverbs_init_udata(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd,
 		   out_len);
 
-	srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file->ucontext);
+	srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file);
 	if (!srq)
 		return -EINVAL;
 
@@ -3964,7 +3890,6 @@ ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file,
 }
 
 ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file,
-			    struct ib_device *ib_dev,
 			    const char __user *buf,
 			    int in_len, int out_len)
 {
@@ -3980,7 +3905,7 @@ ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file,
 	if (copy_from_user(&cmd, buf, sizeof cmd))
 		return -EFAULT;
 
-	srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file->ucontext);
+	srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file);
 	if (!srq)
 		return -EINVAL;
 
@@ -4004,7 +3929,6 @@ ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file,
 }
 
 ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
-			      struct ib_device *ib_dev,
 			      const char __user *buf, int in_len,
 			      int out_len)
 {
@@ -4012,32 +3936,20 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
 	struct ib_uverbs_destroy_srq_resp resp;
 	struct ib_uobject		 *uobj;
 	struct ib_uevent_object        	 *obj;
-	int                         	  ret = -EINVAL;
 
 	if (copy_from_user(&cmd, buf, sizeof cmd))
 		return -EFAULT;
 
-	uobj  = uobj_get_write(UVERBS_OBJECT_SRQ, cmd.srq_handle,
-			       file->ucontext);
+	uobj = uobj_get_destroy(UVERBS_OBJECT_SRQ, cmd.srq_handle, file);
 	if (IS_ERR(uobj))
 		return PTR_ERR(uobj);
 
 	obj = container_of(uobj, struct ib_uevent_object, uobject);
-	/*
-	 * Make sure we don't free the memory in remove_commit as we still
-	 * needs the uobject memory to create the response.
-	 */
-	uverbs_uobject_get(uobj);
-
 	memset(&resp, 0, sizeof(resp));
-
-	ret = uobj_remove_commit(uobj);
-	if (ret) {
-		uverbs_uobject_put(uobj);
-		return ret;
-	}
 	resp.events_reported = obj->events_reported;
-	uverbs_uobject_put(uobj);
+
+	uobj_put_destroy(uobj);
+
 	if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof(resp)))
 		return -EFAULT;
 
@@ -4045,15 +3957,21 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
 }
 
 int ib_uverbs_ex_query_device(struct ib_uverbs_file *file,
-			      struct ib_device *ib_dev,
 			      struct ib_udata *ucore,
 			      struct ib_udata *uhw)
 {
 	struct ib_uverbs_ex_query_device_resp resp = { {0} };
 	struct ib_uverbs_ex_query_device  cmd;
 	struct ib_device_attr attr = {0};
+	struct ib_ucontext *ucontext;
+	struct ib_device *ib_dev;
 	int err;
 
+	ucontext = ib_uverbs_get_ucontext(file);
+	if (IS_ERR(ucontext))
+		return PTR_ERR(ucontext);
+	ib_dev = ucontext->device;
+
 	if (!ib_dev->query_device)
 		return -EOPNOTSUPP;
 
@@ -4079,7 +3997,7 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file,
 	if (err)
 		return err;
 
-	copy_query_dev_fields(file, ib_dev, &resp.base, &attr);
+	copy_query_dev_fields(ucontext, &resp.base, &attr);
 
 	if (ucore->outlen < resp.response_length + sizeof(resp.odp_caps))
 		goto end;
@@ -4166,7 +4084,6 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file,
 }
 
 int ib_uverbs_ex_modify_cq(struct ib_uverbs_file *file,
-			   struct ib_device *ib_dev,
 			   struct ib_udata *ucore,
 			   struct ib_udata *uhw)
 {
@@ -4196,7 +4113,7 @@ int ib_uverbs_ex_modify_cq(struct ib_uverbs_file *file,
 	if (cmd.attr_mask > IB_CQ_MODERATE)
 		return -EOPNOTSUPP;
 
-	cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file->ucontext);
+	cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file);
 	if (!cq)
 		return -EINVAL;
 
diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c
index 8d32c4a..1a6b229 100644
--- a/drivers/infiniband/core/uverbs_ioctl.c
+++ b/drivers/infiniband/core/uverbs_ioctl.c
@@ -35,6 +35,103 @@
 #include "rdma_core.h"
 #include "uverbs.h"
 
+struct bundle_alloc_head {
+	struct bundle_alloc_head *next;
+	u8 data[];
+};
+
+struct bundle_priv {
+	/* Must be first */
+	struct bundle_alloc_head alloc_head;
+	struct bundle_alloc_head *allocated_mem;
+	size_t internal_avail;
+	size_t internal_used;
+
+	struct radix_tree_root *radix;
+	const struct uverbs_api_ioctl_method *method_elm;
+	void __rcu **radix_slots;
+	unsigned long radix_slots_len;
+	u32 method_key;
+
+	struct ib_uverbs_attr __user *user_attrs;
+	struct ib_uverbs_attr *uattrs;
+
+	DECLARE_BITMAP(uobj_finalize, UVERBS_API_ATTR_BKEY_LEN);
+
+	/*
+	 * Must be last. bundle ends in a flex array which overlaps
+	 * internal_buffer.
+	 */
+	struct uverbs_attr_bundle bundle;
+	u64 internal_buffer[32];
+};
+
+/*
+ * Each method has an absolute minimum amount of memory it needs to allocate,
+ * precompute that amount and determine if the onstack memory can be used or
+ * if allocation is need.
+ */
+void uapi_compute_bundle_size(struct uverbs_api_ioctl_method *method_elm,
+			      unsigned int num_attrs)
+{
+	struct bundle_priv *pbundle;
+	size_t bundle_size =
+		offsetof(struct bundle_priv, internal_buffer) +
+		sizeof(*pbundle->bundle.attrs) * method_elm->key_bitmap_len +
+		sizeof(*pbundle->uattrs) * num_attrs;
+
+	method_elm->use_stack = bundle_size <= sizeof(*pbundle);
+	method_elm->bundle_size =
+		ALIGN(bundle_size + 256, sizeof(*pbundle->internal_buffer));
+
+	/* Do not want order-2 allocations for this. */
+	WARN_ON_ONCE(method_elm->bundle_size > PAGE_SIZE);
+}
+
+/**
+ * uverbs_alloc() - Quickly allocate memory for use with a bundle
+ * @bundle: The bundle
+ * @size: Number of bytes to allocate
+ * @flags: Allocator flags
+ *
+ * The bundle allocator is intended for allocations that are connected with
+ * processing the system call related to the bundle. The allocated memory is
+ * always freed once the system call completes, and cannot be freed any other
+ * way.
+ *
+ * This tries to use a small pool of pre-allocated memory for performance.
+ */
+__malloc void *_uverbs_alloc(struct uverbs_attr_bundle *bundle, size_t size,
+			     gfp_t flags)
+{
+	struct bundle_priv *pbundle =
+		container_of(bundle, struct bundle_priv, bundle);
+	size_t new_used;
+	void *res;
+
+	if (check_add_overflow(size, pbundle->internal_used, &new_used))
+		return ERR_PTR(-EOVERFLOW);
+
+	if (new_used > pbundle->internal_avail) {
+		struct bundle_alloc_head *buf;
+
+		buf = kvmalloc(struct_size(buf, data, size), flags);
+		if (!buf)
+			return ERR_PTR(-ENOMEM);
+		buf->next = pbundle->allocated_mem;
+		pbundle->allocated_mem = buf;
+		return buf->data;
+	}
+
+	res = (void *)pbundle->internal_buffer + pbundle->internal_used;
+	pbundle->internal_used =
+		ALIGN(new_used, sizeof(*pbundle->internal_buffer));
+	if (flags & __GFP_ZERO)
+		memset(res, 0, size);
+	return res;
+}
+EXPORT_SYMBOL(_uverbs_alloc);
+
 static bool uverbs_is_attr_cleared(const struct ib_uverbs_attr *uattr,
 				   u16 len)
 {
@@ -46,45 +143,24 @@ static bool uverbs_is_attr_cleared(const struct ib_uverbs_attr *uattr,
 			   0, uattr->len - len);
 }
 
-static int uverbs_process_attr(struct ib_device *ibdev,
-			       struct ib_ucontext *ucontext,
-			       const struct ib_uverbs_attr *uattr,
-			       u16 attr_id,
-			       const struct uverbs_attr_spec_hash *attr_spec_bucket,
-			       struct uverbs_attr_bundle_hash *attr_bundle_h,
-			       struct ib_uverbs_attr __user *uattr_ptr)
+static int uverbs_process_attr(struct bundle_priv *pbundle,
+			       const struct uverbs_api_attr *attr_uapi,
+			       struct ib_uverbs_attr *uattr, u32 attr_bkey)
 {
-	const struct uverbs_attr_spec *spec;
-	const struct uverbs_attr_spec *val_spec;
-	struct uverbs_attr *e;
-	const struct uverbs_object_spec *object;
+	const struct uverbs_attr_spec *spec = &attr_uapi->spec;
+	struct uverbs_attr *e = &pbundle->bundle.attrs[attr_bkey];
+	const struct uverbs_attr_spec *val_spec = spec;
 	struct uverbs_obj_attr *o_attr;
-	struct uverbs_attr *elements = attr_bundle_h->attrs;
-
-	if (attr_id >= attr_spec_bucket->num_attrs) {
-		if (uattr->flags & UVERBS_ATTR_F_MANDATORY)
-			return -EINVAL;
-		else
-			return 0;
-	}
-
-	if (test_bit(attr_id, attr_bundle_h->valid_bitmap))
-		return -EINVAL;
-
-	spec = &attr_spec_bucket->attrs[attr_id];
-	val_spec = spec;
-	e = &elements[attr_id];
-	e->uattr = uattr_ptr;
 
 	switch (spec->type) {
 	case UVERBS_ATTR_TYPE_ENUM_IN:
-		if (uattr->attr_data.enum_data.elem_id >= spec->enum_def.num_elems)
+		if (uattr->attr_data.enum_data.elem_id >= spec->u.enum_def.num_elems)
 			return -EOPNOTSUPP;
 
 		if (uattr->attr_data.enum_data.reserved)
 			return -EINVAL;
 
-		val_spec = &spec->enum_def.ids[uattr->attr_data.enum_data.elem_id];
+		val_spec = &spec->u2.enum_def.ids[uattr->attr_data.enum_data.elem_id];
 
 		/* Currently we only support PTR_IN based enums */
 		if (val_spec->type != UVERBS_ATTR_TYPE_PTR_IN)
@@ -98,64 +174,75 @@ static int uverbs_process_attr(struct ib_device *ibdev,
 		 * longer struct will fail here if used with an old kernel and
 		 * non-zero content, making ABI compat/discovery simpler.
 		 */
-		if (uattr->len > val_spec->ptr.len &&
-		    val_spec->flags & UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO &&
-		    !uverbs_is_attr_cleared(uattr, val_spec->ptr.len))
+		if (uattr->len > val_spec->u.ptr.len &&
+		    val_spec->zero_trailing &&
+		    !uverbs_is_attr_cleared(uattr, val_spec->u.ptr.len))
 			return -EOPNOTSUPP;
 
 	/* fall through */
 	case UVERBS_ATTR_TYPE_PTR_OUT:
-		if (uattr->len < val_spec->ptr.min_len ||
-		    (!(val_spec->flags & UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO) &&
-		     uattr->len > val_spec->ptr.len))
+		if (uattr->len < val_spec->u.ptr.min_len ||
+		    (!val_spec->zero_trailing &&
+		     uattr->len > val_spec->u.ptr.len))
 			return -EINVAL;
 
 		if (spec->type != UVERBS_ATTR_TYPE_ENUM_IN &&
 		    uattr->attr_data.reserved)
 			return -EINVAL;
 
-		e->ptr_attr.data = uattr->data;
+		e->ptr_attr.uattr_idx = uattr - pbundle->uattrs;
 		e->ptr_attr.len = uattr->len;
-		e->ptr_attr.flags = uattr->flags;
+
+		if (val_spec->alloc_and_copy && !uverbs_attr_ptr_is_inline(e)) {
+			void *p;
+
+			p = uverbs_alloc(&pbundle->bundle, uattr->len);
+			if (IS_ERR(p))
+				return PTR_ERR(p);
+
+			e->ptr_attr.ptr = p;
+
+			if (copy_from_user(p, u64_to_user_ptr(uattr->data),
+					   uattr->len))
+				return -EFAULT;
+		} else {
+			e->ptr_attr.data = uattr->data;
+		}
 		break;
 
 	case UVERBS_ATTR_TYPE_IDR:
-		if (uattr->data >> 32)
-			return -EINVAL;
-	/* fall through */
 	case UVERBS_ATTR_TYPE_FD:
 		if (uattr->attr_data.reserved)
 			return -EINVAL;
 
-		if (uattr->len != 0 || !ucontext || uattr->data > INT_MAX)
+		if (uattr->len != 0)
 			return -EINVAL;
 
 		o_attr = &e->obj_attr;
-		object = uverbs_get_object(ibdev, spec->obj.obj_type);
-		if (!object)
-			return -EINVAL;
-		o_attr->type = object->type_attrs;
+		o_attr->attr_elm = attr_uapi;
 
-		o_attr->id = (int)uattr->data;
-		o_attr->uobject = uverbs_get_uobject_from_context(
-					o_attr->type,
-					ucontext,
-					spec->obj.access,
-					o_attr->id);
-
+		/*
+		 * The type of uattr->data is u64 for UVERBS_ATTR_TYPE_IDR and
+		 * s64 for UVERBS_ATTR_TYPE_FD. We can cast the u64 to s64
+		 * here without caring about truncation as we know that the
+		 * IDR implementation today rejects negative IDs
+		 */
+		o_attr->uobject = uverbs_get_uobject_from_file(
+					spec->u.obj.obj_type,
+					pbundle->bundle.ufile,
+					spec->u.obj.access,
+					uattr->data_s64);
 		if (IS_ERR(o_attr->uobject))
 			return PTR_ERR(o_attr->uobject);
+		__set_bit(attr_bkey, pbundle->uobj_finalize);
 
-		if (spec->obj.access == UVERBS_ACCESS_NEW) {
-			u64 id = o_attr->uobject->id;
+		if (spec->u.obj.access == UVERBS_ACCESS_NEW) {
+			unsigned int uattr_idx = uattr - pbundle->uattrs;
+			s64 id = o_attr->uobject->id;
 
 			/* Copy the allocated id to the user-space */
-			if (put_user(id, &e->uattr->data)) {
-				uverbs_finalize_object(o_attr->uobject,
-						       UVERBS_ACCESS_NEW,
-						       false);
+			if (put_user(id, &pbundle->user_attrs[uattr_idx].data))
 				return -EFAULT;
-			}
 		}
 
 		break;
@@ -163,220 +250,225 @@ static int uverbs_process_attr(struct ib_device *ibdev,
 		return -EOPNOTSUPP;
 	}
 
-	set_bit(attr_id, attr_bundle_h->valid_bitmap);
 	return 0;
 }
 
-static int uverbs_uattrs_process(struct ib_device *ibdev,
-				 struct ib_ucontext *ucontext,
-				 const struct ib_uverbs_attr *uattrs,
-				 size_t num_uattrs,
-				 const struct uverbs_method_spec *method,
-				 struct uverbs_attr_bundle *attr_bundle,
-				 struct ib_uverbs_attr __user *uattr_ptr)
+/*
+ * We search the radix tree with the method prefix and now we want to fast
+ * search the suffix bits to get a particular attribute pointer. It is not
+ * totally clear to me if this breaks the radix tree encasulation or not, but
+ * it uses the iter data to determine if the method iter points at the same
+ * chunk that will store the attribute, if so it just derefs it directly. By
+ * construction in most kernel configs the method and attrs will all fit in a
+ * single radix chunk, so in most cases this will have no search. Other cases
+ * this falls back to a full search.
+ */
+static void __rcu **uapi_get_attr_for_method(struct bundle_priv *pbundle,
+					     u32 attr_key)
 {
-	size_t i;
-	int ret = 0;
-	int num_given_buckets = 0;
+	void __rcu **slot;
 
-	for (i = 0; i < num_uattrs; i++) {
-		const struct ib_uverbs_attr *uattr = &uattrs[i];
-		u16 attr_id = uattr->attr_id;
-		struct uverbs_attr_spec_hash *attr_spec_bucket;
+	if (likely(attr_key < pbundle->radix_slots_len)) {
+		void *entry;
 
-		ret = uverbs_ns_idx(&attr_id, method->num_buckets);
-		if (ret < 0) {
-			if (uattr->flags & UVERBS_ATTR_F_MANDATORY) {
-				uverbs_finalize_objects(attr_bundle,
-							method->attr_buckets,
-							num_given_buckets,
-							false);
-				return ret;
-			}
-			continue;
-		}
-
-		/*
-		 * ret is the found ns, so increase num_given_buckets if
-		 * necessary.
-		 */
-		if (ret >= num_given_buckets)
-			num_given_buckets = ret + 1;
-
-		attr_spec_bucket = method->attr_buckets[ret];
-		ret = uverbs_process_attr(ibdev, ucontext, uattr, attr_id,
-					  attr_spec_bucket, &attr_bundle->hash[ret],
-					  uattr_ptr++);
-		if (ret) {
-			uverbs_finalize_objects(attr_bundle,
-						method->attr_buckets,
-						num_given_buckets,
-						false);
-			return ret;
-		}
+		slot = pbundle->radix_slots + attr_key;
+		entry = rcu_dereference_raw(*slot);
+		if (likely(!radix_tree_is_internal_node(entry) && entry))
+			return slot;
 	}
 
-	return num_given_buckets;
+	return radix_tree_lookup_slot(pbundle->radix,
+				      pbundle->method_key | attr_key);
 }
 
-static int uverbs_validate_kernel_mandatory(const struct uverbs_method_spec *method_spec,
-					    struct uverbs_attr_bundle *attr_bundle)
+static int uverbs_set_attr(struct bundle_priv *pbundle,
+			   struct ib_uverbs_attr *uattr)
 {
-	unsigned int i;
-
-	for (i = 0; i < attr_bundle->num_buckets; i++) {
-		struct uverbs_attr_spec_hash *attr_spec_bucket =
-			method_spec->attr_buckets[i];
-
-		if (!bitmap_subset(attr_spec_bucket->mandatory_attrs_bitmask,
-				   attr_bundle->hash[i].valid_bitmap,
-				   attr_spec_bucket->num_attrs))
-			return -EINVAL;
-	}
-
-	for (; i < method_spec->num_buckets; i++) {
-		struct uverbs_attr_spec_hash *attr_spec_bucket =
-			method_spec->attr_buckets[i];
-
-		if (!bitmap_empty(attr_spec_bucket->mandatory_attrs_bitmask,
-				  attr_spec_bucket->num_attrs))
-			return -EINVAL;
-	}
-
-	return 0;
-}
-
-static int uverbs_handle_method(struct ib_uverbs_attr __user *uattr_ptr,
-				const struct ib_uverbs_attr *uattrs,
-				size_t num_uattrs,
-				struct ib_device *ibdev,
-				struct ib_uverbs_file *ufile,
-				const struct uverbs_method_spec *method_spec,
-				struct uverbs_attr_bundle *attr_bundle)
-{
+	u32 attr_key = uapi_key_attr(uattr->attr_id);
+	u32 attr_bkey = uapi_bkey_attr(attr_key);
+	const struct uverbs_api_attr *attr;
+	void __rcu **slot;
 	int ret;
-	int finalize_ret;
-	int num_given_buckets;
 
-	num_given_buckets = uverbs_uattrs_process(ibdev, ufile->ucontext, uattrs,
-						  num_uattrs, method_spec,
-						  attr_bundle, uattr_ptr);
-	if (num_given_buckets <= 0)
+	slot = uapi_get_attr_for_method(pbundle, attr_key);
+	if (!slot) {
+		/*
+		 * Kernel does not support the attribute but user-space says it
+		 * is mandatory
+		 */
+		if (uattr->flags & UVERBS_ATTR_F_MANDATORY)
+			return -EPROTONOSUPPORT;
+		return 0;
+	}
+	attr = srcu_dereference(
+		*slot, &pbundle->bundle.ufile->device->disassociate_srcu);
+
+	/* Reject duplicate attributes from user-space */
+	if (test_bit(attr_bkey, pbundle->bundle.attr_present))
 		return -EINVAL;
 
-	attr_bundle->num_buckets = num_given_buckets;
-	ret = uverbs_validate_kernel_mandatory(method_spec, attr_bundle);
+	ret = uverbs_process_attr(pbundle, attr, uattr, attr_bkey);
 	if (ret)
-		goto cleanup;
+		return ret;
 
-	ret = method_spec->handler(ibdev, ufile, attr_bundle);
-cleanup:
-	finalize_ret = uverbs_finalize_objects(attr_bundle,
-					       method_spec->attr_buckets,
-					       attr_bundle->num_buckets,
-					       !ret);
+	__set_bit(attr_bkey, pbundle->bundle.attr_present);
 
-	return ret ? ret : finalize_ret;
+	return 0;
 }
 
-#define UVERBS_OPTIMIZE_USING_STACK_SZ  256
-static long ib_uverbs_cmd_verbs(struct ib_device *ib_dev,
-				struct ib_uverbs_file *file,
-				struct ib_uverbs_ioctl_hdr *hdr,
-				void __user *buf)
+static int ib_uverbs_run_method(struct bundle_priv *pbundle,
+				unsigned int num_attrs)
 {
-	const struct uverbs_object_spec *object_spec;
-	const struct uverbs_method_spec *method_spec;
-	long err = 0;
+	int (*handler)(struct ib_uverbs_file *ufile,
+		       struct uverbs_attr_bundle *ctx);
+	size_t uattrs_size = array_size(sizeof(*pbundle->uattrs), num_attrs);
+	unsigned int destroy_bkey = pbundle->method_elm->destroy_bkey;
 	unsigned int i;
-	struct {
-		struct ib_uverbs_attr		*uattrs;
-		struct uverbs_attr_bundle	*uverbs_attr_bundle;
-	} *ctx = NULL;
-	struct uverbs_attr *curr_attr;
-	unsigned long *curr_bitmap;
-	size_t ctx_size;
-	uintptr_t data[UVERBS_OPTIMIZE_USING_STACK_SZ / sizeof(uintptr_t)];
+	int ret;
 
-	if (hdr->driver_id != ib_dev->driver_id)
-		return -EINVAL;
+	/* See uverbs_disassociate_api() */
+	handler = srcu_dereference(
+		pbundle->method_elm->handler,
+		&pbundle->bundle.ufile->device->disassociate_srcu);
+	if (!handler)
+		return -EIO;
 
-	object_spec = uverbs_get_object(ib_dev, hdr->object_id);
-	if (!object_spec)
-		return -EPROTONOSUPPORT;
+	pbundle->uattrs = uverbs_alloc(&pbundle->bundle, uattrs_size);
+	if (IS_ERR(pbundle->uattrs))
+		return PTR_ERR(pbundle->uattrs);
+	if (copy_from_user(pbundle->uattrs, pbundle->user_attrs, uattrs_size))
+		return -EFAULT;
 
-	method_spec = uverbs_get_method(object_spec, hdr->method_id);
-	if (!method_spec)
-		return -EPROTONOSUPPORT;
-
-	if ((method_spec->flags & UVERBS_ACTION_FLAG_CREATE_ROOT) ^ !file->ucontext)
-		return -EINVAL;
-
-	ctx_size = sizeof(*ctx) +
-		   sizeof(struct uverbs_attr_bundle) +
-		   sizeof(struct uverbs_attr_bundle_hash) * method_spec->num_buckets +
-		   sizeof(*ctx->uattrs) * hdr->num_attrs +
-		   sizeof(*ctx->uverbs_attr_bundle->hash[0].attrs) *
-		   method_spec->num_child_attrs +
-		   sizeof(*ctx->uverbs_attr_bundle->hash[0].valid_bitmap) *
-			(method_spec->num_child_attrs / BITS_PER_LONG +
-			 method_spec->num_buckets);
-
-	if (ctx_size <= UVERBS_OPTIMIZE_USING_STACK_SZ)
-		ctx = (void *)data;
-	if (!ctx)
-		ctx = kmalloc(ctx_size, GFP_KERNEL);
-	if (!ctx)
-		return -ENOMEM;
-
-	ctx->uverbs_attr_bundle = (void *)ctx + sizeof(*ctx);
-	ctx->uattrs = (void *)(ctx->uverbs_attr_bundle + 1) +
-			      (sizeof(ctx->uverbs_attr_bundle->hash[0]) *
-			       method_spec->num_buckets);
-	curr_attr = (void *)(ctx->uattrs + hdr->num_attrs);
-	curr_bitmap = (void *)(curr_attr + method_spec->num_child_attrs);
-
-	/*
-	 * We just fill the pointers and num_attrs here. The data itself will be
-	 * filled at a later stage (uverbs_process_attr)
-	 */
-	for (i = 0; i < method_spec->num_buckets; i++) {
-		unsigned int curr_num_attrs = method_spec->attr_buckets[i]->num_attrs;
-
-		ctx->uverbs_attr_bundle->hash[i].attrs = curr_attr;
-		curr_attr += curr_num_attrs;
-		ctx->uverbs_attr_bundle->hash[i].num_attrs = curr_num_attrs;
-		ctx->uverbs_attr_bundle->hash[i].valid_bitmap = curr_bitmap;
-		bitmap_zero(curr_bitmap, curr_num_attrs);
-		curr_bitmap += BITS_TO_LONGS(curr_num_attrs);
+	for (i = 0; i != num_attrs; i++) {
+		ret = uverbs_set_attr(pbundle, &pbundle->uattrs[i]);
+		if (unlikely(ret))
+			return ret;
 	}
 
-	err = copy_from_user(ctx->uattrs, buf,
-			     sizeof(*ctx->uattrs) * hdr->num_attrs);
-	if (err) {
-		err = -EFAULT;
-		goto out;
-	}
+	/* User space did not provide all the mandatory attributes */
+	if (unlikely(!bitmap_subset(pbundle->method_elm->attr_mandatory,
+				    pbundle->bundle.attr_present,
+				    pbundle->method_elm->key_bitmap_len)))
+		return -EINVAL;
 
-	err = uverbs_handle_method(buf, ctx->uattrs, hdr->num_attrs, ib_dev,
-				   file, method_spec, ctx->uverbs_attr_bundle);
+	if (destroy_bkey != UVERBS_API_ATTR_BKEY_LEN) {
+		struct uverbs_obj_attr *destroy_attr =
+			&pbundle->bundle.attrs[destroy_bkey].obj_attr;
+
+		ret = uobj_destroy(destroy_attr->uobject);
+		if (ret)
+			return ret;
+		__clear_bit(destroy_bkey, pbundle->uobj_finalize);
+
+		ret = handler(pbundle->bundle.ufile, &pbundle->bundle);
+		uobj_put_destroy(destroy_attr->uobject);
+	} else {
+		ret = handler(pbundle->bundle.ufile, &pbundle->bundle);
+	}
 
 	/*
 	 * EPROTONOSUPPORT is ONLY to be returned if the ioctl framework can
 	 * not invoke the method because the request is not supported.  No
 	 * other cases should return this code.
-	*/
-	if (unlikely(err == -EPROTONOSUPPORT)) {
-		WARN_ON_ONCE(err == -EPROTONOSUPPORT);
-		err = -EINVAL;
-	}
-out:
-	if (ctx != (void *)data)
-		kfree(ctx);
-	return err;
+	 */
+	if (WARN_ON_ONCE(ret == -EPROTONOSUPPORT))
+		return -EINVAL;
+
+	return ret;
 }
 
-#define IB_UVERBS_MAX_CMD_SZ 4096
+static int bundle_destroy(struct bundle_priv *pbundle, bool commit)
+{
+	unsigned int key_bitmap_len = pbundle->method_elm->key_bitmap_len;
+	struct bundle_alloc_head *memblock;
+	unsigned int i;
+	int ret = 0;
+
+	i = -1;
+	while ((i = find_next_bit(pbundle->uobj_finalize, key_bitmap_len,
+				  i + 1)) < key_bitmap_len) {
+		struct uverbs_attr *attr = &pbundle->bundle.attrs[i];
+		int current_ret;
+
+		current_ret = uverbs_finalize_object(
+			attr->obj_attr.uobject,
+			attr->obj_attr.attr_elm->spec.u.obj.access, commit);
+		if (!ret)
+			ret = current_ret;
+	}
+
+	for (memblock = pbundle->allocated_mem; memblock;) {
+		struct bundle_alloc_head *tmp = memblock;
+
+		memblock = memblock->next;
+		kvfree(tmp);
+	}
+
+	return ret;
+}
+
+static int ib_uverbs_cmd_verbs(struct ib_uverbs_file *ufile,
+			       struct ib_uverbs_ioctl_hdr *hdr,
+			       struct ib_uverbs_attr __user *user_attrs)
+{
+	const struct uverbs_api_ioctl_method *method_elm;
+	struct uverbs_api *uapi = ufile->device->uapi;
+	struct radix_tree_iter attrs_iter;
+	struct bundle_priv *pbundle;
+	struct bundle_priv onstack;
+	void __rcu **slot;
+	int destroy_ret;
+	int ret;
+
+	if (unlikely(hdr->driver_id != uapi->driver_id))
+		return -EINVAL;
+
+	slot = radix_tree_iter_lookup(
+		&uapi->radix, &attrs_iter,
+		uapi_key_obj(hdr->object_id) |
+			uapi_key_ioctl_method(hdr->method_id));
+	if (unlikely(!slot))
+		return -EPROTONOSUPPORT;
+	method_elm = srcu_dereference(*slot, &ufile->device->disassociate_srcu);
+
+	if (!method_elm->use_stack) {
+		pbundle = kmalloc(method_elm->bundle_size, GFP_KERNEL);
+		if (!pbundle)
+			return -ENOMEM;
+		pbundle->internal_avail =
+			method_elm->bundle_size -
+			offsetof(struct bundle_priv, internal_buffer);
+		pbundle->alloc_head.next = NULL;
+		pbundle->allocated_mem = &pbundle->alloc_head;
+	} else {
+		pbundle = &onstack;
+		pbundle->internal_avail = sizeof(pbundle->internal_buffer);
+		pbundle->allocated_mem = NULL;
+	}
+
+	/* Space for the pbundle->bundle.attrs flex array */
+	pbundle->method_elm = method_elm;
+	pbundle->method_key = attrs_iter.index;
+	pbundle->bundle.ufile = ufile;
+	pbundle->radix = &uapi->radix;
+	pbundle->radix_slots = slot;
+	pbundle->radix_slots_len = radix_tree_chunk_size(&attrs_iter);
+	pbundle->user_attrs = user_attrs;
+
+	pbundle->internal_used = ALIGN(pbundle->method_elm->key_bitmap_len *
+					       sizeof(*pbundle->bundle.attrs),
+				       sizeof(*pbundle->internal_buffer));
+	memset(pbundle->bundle.attr_present, 0,
+	       sizeof(pbundle->bundle.attr_present));
+	memset(pbundle->uobj_finalize, 0, sizeof(pbundle->uobj_finalize));
+
+	ret = ib_uverbs_run_method(pbundle, hdr->num_attrs);
+	destroy_ret = bundle_destroy(pbundle, ret == 0);
+	if (unlikely(destroy_ret && !ret))
+		return destroy_ret;
+
+	return ret;
+}
 
 long ib_uverbs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
@@ -384,39 +476,138 @@ long ib_uverbs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 	struct ib_uverbs_ioctl_hdr __user *user_hdr =
 		(struct ib_uverbs_ioctl_hdr __user *)arg;
 	struct ib_uverbs_ioctl_hdr hdr;
-	struct ib_device *ib_dev;
 	int srcu_key;
-	long err;
+	int err;
+
+	if (unlikely(cmd != RDMA_VERBS_IOCTL))
+		return -ENOIOCTLCMD;
+
+	err = copy_from_user(&hdr, user_hdr, sizeof(hdr));
+	if (err)
+		return -EFAULT;
+
+	if (hdr.length > PAGE_SIZE ||
+	    hdr.length != struct_size(&hdr, attrs, hdr.num_attrs))
+		return -EINVAL;
+
+	if (hdr.reserved1 || hdr.reserved2)
+		return -EPROTONOSUPPORT;
 
 	srcu_key = srcu_read_lock(&file->device->disassociate_srcu);
-	ib_dev = srcu_dereference(file->device->ib_dev,
-				  &file->device->disassociate_srcu);
-	if (!ib_dev) {
-		err = -EIO;
-		goto out;
-	}
-
-	if (cmd == RDMA_VERBS_IOCTL) {
-		err = copy_from_user(&hdr, user_hdr, sizeof(hdr));
-
-		if (err || hdr.length > IB_UVERBS_MAX_CMD_SZ ||
-		    hdr.length != sizeof(hdr) + hdr.num_attrs * sizeof(struct ib_uverbs_attr)) {
-			err = -EINVAL;
-			goto out;
-		}
-
-		if (hdr.reserved1 || hdr.reserved2) {
-			err = -EPROTONOSUPPORT;
-			goto out;
-		}
-
-		err = ib_uverbs_cmd_verbs(ib_dev, file, &hdr,
-					  (__user void *)arg + sizeof(hdr));
-	} else {
-		err = -ENOIOCTLCMD;
-	}
-out:
+	err = ib_uverbs_cmd_verbs(file, &hdr, user_hdr->attrs);
 	srcu_read_unlock(&file->device->disassociate_srcu, srcu_key);
-
 	return err;
 }
+
+int uverbs_get_flags64(u64 *to, const struct uverbs_attr_bundle *attrs_bundle,
+		       size_t idx, u64 allowed_bits)
+{
+	const struct uverbs_attr *attr;
+	u64 flags;
+
+	attr = uverbs_attr_get(attrs_bundle, idx);
+	/* Missing attribute means 0 flags */
+	if (IS_ERR(attr)) {
+		*to = 0;
+		return 0;
+	}
+
+	/*
+	 * New userspace code should use 8 bytes to pass flags, but we
+	 * transparently support old userspaces that were using 4 bytes as
+	 * well.
+	 */
+	if (attr->ptr_attr.len == 8)
+		flags = attr->ptr_attr.data;
+	else if (attr->ptr_attr.len == 4)
+		flags = *(u32 *)&attr->ptr_attr.data;
+	else
+		return -EINVAL;
+
+	if (flags & ~allowed_bits)
+		return -EINVAL;
+
+	*to = flags;
+	return 0;
+}
+EXPORT_SYMBOL(uverbs_get_flags64);
+
+int uverbs_get_flags32(u32 *to, const struct uverbs_attr_bundle *attrs_bundle,
+		       size_t idx, u64 allowed_bits)
+{
+	u64 flags;
+	int ret;
+
+	ret = uverbs_get_flags64(&flags, attrs_bundle, idx, allowed_bits);
+	if (ret)
+		return ret;
+
+	if (flags > U32_MAX)
+		return -EINVAL;
+	*to = flags;
+
+	return 0;
+}
+EXPORT_SYMBOL(uverbs_get_flags32);
+
+/*
+ * This is for ease of conversion. The purpose is to convert all drivers to
+ * use uverbs_attr_bundle instead of ib_udata.  Assume attr == 0 is input and
+ * attr == 1 is output.
+ */
+void create_udata(struct uverbs_attr_bundle *bundle, struct ib_udata *udata)
+{
+	struct bundle_priv *pbundle =
+		container_of(bundle, struct bundle_priv, bundle);
+	const struct uverbs_attr *uhw_in =
+		uverbs_attr_get(bundle, UVERBS_ATTR_UHW_IN);
+	const struct uverbs_attr *uhw_out =
+		uverbs_attr_get(bundle, UVERBS_ATTR_UHW_OUT);
+
+	if (!IS_ERR(uhw_in)) {
+		udata->inlen = uhw_in->ptr_attr.len;
+		if (uverbs_attr_ptr_is_inline(uhw_in))
+			udata->inbuf =
+				&pbundle->user_attrs[uhw_in->ptr_attr.uattr_idx]
+					 .data;
+		else
+			udata->inbuf = u64_to_user_ptr(uhw_in->ptr_attr.data);
+	} else {
+		udata->inbuf = NULL;
+		udata->inlen = 0;
+	}
+
+	if (!IS_ERR(uhw_out)) {
+		udata->outbuf = u64_to_user_ptr(uhw_out->ptr_attr.data);
+		udata->outlen = uhw_out->ptr_attr.len;
+	} else {
+		udata->outbuf = NULL;
+		udata->outlen = 0;
+	}
+}
+
+int uverbs_copy_to(const struct uverbs_attr_bundle *bundle, size_t idx,
+		   const void *from, size_t size)
+{
+	struct bundle_priv *pbundle =
+		container_of(bundle, struct bundle_priv, bundle);
+	const struct uverbs_attr *attr = uverbs_attr_get(bundle, idx);
+	u16 flags;
+	size_t min_size;
+
+	if (IS_ERR(attr))
+		return PTR_ERR(attr);
+
+	min_size = min_t(size_t, attr->ptr_attr.len, size);
+	if (copy_to_user(u64_to_user_ptr(attr->ptr_attr.data), from, min_size))
+		return -EFAULT;
+
+	flags = pbundle->uattrs[attr->ptr_attr.uattr_idx].flags |
+		UVERBS_ATTR_F_VALID_OUTPUT;
+	if (put_user(flags,
+		     &pbundle->user_attrs[attr->ptr_attr.uattr_idx].flags))
+		return -EFAULT;
+
+	return 0;
+}
+EXPORT_SYMBOL(uverbs_copy_to);
diff --git a/drivers/infiniband/core/uverbs_ioctl_merge.c b/drivers/infiniband/core/uverbs_ioctl_merge.c
deleted file mode 100644
index 6ceb672..0000000
--- a/drivers/infiniband/core/uverbs_ioctl_merge.c
+++ /dev/null
@@ -1,664 +0,0 @@
-/*
- * Copyright (c) 2017, Mellanox Technologies inc.  All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <rdma/uverbs_ioctl.h>
-#include <rdma/rdma_user_ioctl.h>
-#include <linux/bitops.h>
-#include "uverbs.h"
-
-#define UVERBS_NUM_NS (UVERBS_ID_NS_MASK >> UVERBS_ID_NS_SHIFT)
-#define GET_NS_ID(idx) (((idx) & UVERBS_ID_NS_MASK) >> UVERBS_ID_NS_SHIFT)
-#define GET_ID(idx) ((idx) & ~UVERBS_ID_NS_MASK)
-
-#define _for_each_element(elem, tmpi, tmpj, hashes, num_buckets_offset,	       \
-			  buckets_offset)				       \
-	for (tmpj = 0,							       \
-	     elem = (*(const void ***)((hashes)[tmpi] +			       \
-				       (buckets_offset)))[0];	               \
-	     tmpj < *(size_t *)((hashes)[tmpi] + (num_buckets_offset));        \
-	     tmpj++)						               \
-		if ((elem = ((*(const void ***)(hashes[tmpi] +		       \
-						(buckets_offset)))[tmpj])))
-
-/*
- * Iterate all elements of a few @hashes. The number of given hashes is
- * indicated by @num_hashes. The offset of the number of buckets in the hash is
- * represented by @num_buckets_offset, while the offset of the buckets array in
- * the hash structure is represented by @buckets_offset. tmpi and tmpj are two
- * short (or int) based indices that are given by the user. tmpi iterates over
- * the different hashes. @elem points the current element in the hashes[tmpi]
- * bucket we are looping on. To be honest, @hashes representation isn't exactly
- * a hash, but more a collection of elements. These elements' ids are treated
- * in a hash like manner, where the first upper bits are the bucket number.
- * These elements are later mapped into a perfect-hash.
- */
-#define for_each_element(elem, tmpi, tmpj, hashes, num_hashes,                 \
-			 num_buckets_offset, buckets_offset)		       \
-	for (tmpi = 0; tmpi < (num_hashes); tmpi++)		               \
-		_for_each_element(elem, tmpi, tmpj, hashes, num_buckets_offset,\
-				  buckets_offset)
-
-#define get_elements_iterators_entry_above(iters, num_elements, elements,     \
-					  num_objects_fld, objects_fld, bucket,\
-					  min_id)			       \
-	get_elements_above_id((const void **)iters, num_elements,       \
-				     (const void **)(elements),		       \
-				     offsetof(typeof(**elements),	       \
-					      num_objects_fld),		       \
-				     offsetof(typeof(**elements), objects_fld),\
-				     offsetof(typeof(***(*elements)->objects_fld), id),\
-				     bucket, min_id)
-
-#define get_objects_above_id(iters, num_trees, trees, bucket, min_id)	       \
-	get_elements_iterators_entry_above(iters, num_trees, trees,	       \
-					   num_objects, objects, bucket, min_id)
-
-#define get_methods_above_id(method_iters, num_iters, iters, bucket, min_id)\
-	get_elements_iterators_entry_above(method_iters, num_iters, iters,     \
-					   num_methods, methods, bucket, min_id)
-
-#define get_attrs_above_id(attrs_iters, num_iters, iters, bucket, min_id)\
-	get_elements_iterators_entry_above(attrs_iters, num_iters, iters,      \
-					   num_attrs, attrs, bucket, min_id)
-
-/*
- * get_elements_above_id get a few hashes represented by @elements and
- * @num_elements. The hashes fields are described by @num_offset, @data_offset
- * and @id_offset in the same way as required by for_each_element. The function
- * returns an array of @iters, represents an array of elements in the hashes
- * buckets, which their ids are the smallest ids in all hashes but are all
- * larger than the id given by min_id. Elements are only added to the iters
- * array if their id belongs to the bucket @bucket. The number of elements in
- * the returned array is returned by the function. @min_id is also updated to
- * reflect the new min_id of all elements in iters.
- */
-static size_t get_elements_above_id(const void **iters,
-				    unsigned int num_elements,
-				    const void **elements,
-				    size_t num_offset,
-				    size_t data_offset,
-				    size_t id_offset,
-				    u16 bucket,
-				    short *min_id)
-{
-	size_t num_iters = 0;
-	short min = SHRT_MAX;
-	const void *elem;
-	int i, j, last_stored = -1;
-	unsigned int equal_min = 0;
-
-	for_each_element(elem, i, j, elements, num_elements, num_offset,
-			 data_offset) {
-		u16 id = *(u16 *)(elem + id_offset);
-
-		if (GET_NS_ID(id) != bucket)
-			continue;
-
-		if (GET_ID(id) < *min_id ||
-		    (min != SHRT_MAX && GET_ID(id) > min))
-			continue;
-
-		/*
-		 * We first iterate all hashes represented by @elements. When
-		 * we do, we try to find an element @elem in the bucket @bucket
-		 * which its id is min. Since we can't ensure the user sorted
-		 * the elements in increasing order, we override this hash's
-		 * minimal id element we found, if a new element with a smaller
-		 * id was just found.
-		 */
-		iters[last_stored == i ? num_iters - 1 : num_iters++] = elem;
-		last_stored = i;
-		if (min == GET_ID(id))
-			equal_min++;
-		else
-			equal_min = 1;
-		min = GET_ID(id);
-	}
-
-	/*
-	 * We only insert to our iters array an element, if its id is smaller
-	 * than all previous ids. Therefore, the final iters array is sorted so
-	 * that smaller ids are in the end of the array.
-	 * Therefore, we need to clean the beginning of the array to make sure
-	 * all ids of final elements are equal to min.
-	 */
-	memmove(iters, iters + num_iters - equal_min, sizeof(*iters) * equal_min);
-
-	*min_id = min;
-	return equal_min;
-}
-
-#define find_max_element_entry_id(num_elements, elements, num_objects_fld, \
-				  objects_fld, bucket)			   \
-	find_max_element_id(num_elements, (const void **)(elements),	   \
-			    offsetof(typeof(**elements), num_objects_fld),    \
-			    offsetof(typeof(**elements), objects_fld),	      \
-			    offsetof(typeof(***(*elements)->objects_fld), id),\
-			    bucket)
-
-static short find_max_element_ns_id(unsigned int num_elements,
-				    const void **elements,
-				    size_t num_offset,
-				    size_t data_offset,
-				    size_t id_offset)
-{
-	short max_ns = SHRT_MIN;
-	const void *elem;
-	int i, j;
-
-	for_each_element(elem, i, j, elements, num_elements, num_offset,
-			 data_offset) {
-		u16 id = *(u16 *)(elem + id_offset);
-
-		if (GET_NS_ID(id) > max_ns)
-			max_ns = GET_NS_ID(id);
-	}
-
-	return max_ns;
-}
-
-static short find_max_element_id(unsigned int num_elements,
-				 const void **elements,
-				 size_t num_offset,
-				 size_t data_offset,
-				 size_t id_offset,
-				 u16 bucket)
-{
-	short max_id = SHRT_MIN;
-	const void *elem;
-	int i, j;
-
-	for_each_element(elem, i, j, elements, num_elements, num_offset,
-			 data_offset) {
-		u16 id = *(u16 *)(elem + id_offset);
-
-		if (GET_NS_ID(id) == bucket &&
-		    GET_ID(id) > max_id)
-			max_id = GET_ID(id);
-	}
-	return max_id;
-}
-
-#define find_max_element_entry_id(num_elements, elements, num_objects_fld,   \
-				  objects_fld, bucket)			      \
-	find_max_element_id(num_elements, (const void **)(elements),	      \
-			    offsetof(typeof(**elements), num_objects_fld),    \
-			    offsetof(typeof(**elements), objects_fld),	      \
-			    offsetof(typeof(***(*elements)->objects_fld), id),\
-			    bucket)
-
-#define find_max_element_ns_entry_id(num_elements, elements,		    \
-				     num_objects_fld, objects_fld)	    \
-	find_max_element_ns_id(num_elements, (const void **)(elements),	    \
-			      offsetof(typeof(**elements), num_objects_fld),\
-			      offsetof(typeof(**elements), objects_fld),    \
-			      offsetof(typeof(***(*elements)->objects_fld), id))
-
-/*
- * find_max_xxxx_ns_id gets a few elements. Each element is described by an id
- * which its upper bits represents a namespace. It finds the max namespace. This
- * could be used in order to know how many buckets do we need to allocate. If no
- * elements exist, SHRT_MIN is returned. Namespace represents here different
- * buckets. The common example is "common bucket" and "driver bucket".
- *
- * find_max_xxxx_id gets a few elements and a bucket. Each element is described
- * by an id which its upper bits represent a namespace. It returns the max id
- * which is contained in the same namespace defined in @bucket. This could be
- * used in order to know how many elements do we need to allocate in the bucket.
- * If no elements exist, SHRT_MIN is returned.
- */
-
-#define find_max_object_id(num_trees, trees, bucket)			\
-		find_max_element_entry_id(num_trees, trees, num_objects,\
-					  objects, bucket)
-#define find_max_object_ns_id(num_trees, trees)			\
-		find_max_element_ns_entry_id(num_trees, trees,		\
-					     num_objects, objects)
-
-#define find_max_method_id(num_iters, iters, bucket)			\
-		find_max_element_entry_id(num_iters, iters, num_methods,\
-					  methods, bucket)
-#define find_max_method_ns_id(num_iters, iters)			\
-		find_max_element_ns_entry_id(num_iters, iters,		\
-					     num_methods, methods)
-
-#define find_max_attr_id(num_iters, iters, bucket)			\
-		find_max_element_entry_id(num_iters, iters, num_attrs,  \
-					  attrs, bucket)
-#define find_max_attr_ns_id(num_iters, iters)				\
-		find_max_element_ns_entry_id(num_iters, iters,		\
-					     num_attrs, attrs)
-
-static void free_method(struct uverbs_method_spec *method)
-{
-	unsigned int i;
-
-	if (!method)
-		return;
-
-	for (i = 0; i < method->num_buckets; i++)
-		kfree(method->attr_buckets[i]);
-
-	kfree(method);
-}
-
-#define IS_ATTR_OBJECT(attr) ((attr)->type == UVERBS_ATTR_TYPE_IDR || \
-			      (attr)->type == UVERBS_ATTR_TYPE_FD)
-
-/*
- * This function gets array of size @num_method_defs which contains pointers to
- * method definitions @method_defs. The function allocates an
- * uverbs_method_spec structure and initializes its number of buckets and the
- * elements in buckets to the correct attributes. While doing that, it
- * validates that there aren't conflicts between attributes of different
- * method_defs.
- */
-static struct uverbs_method_spec *build_method_with_attrs(const struct uverbs_method_def **method_defs,
-							  size_t num_method_defs)
-{
-	int bucket_idx;
-	int max_attr_buckets = 0;
-	size_t num_attr_buckets = 0;
-	int res = 0;
-	struct uverbs_method_spec *method = NULL;
-	const struct uverbs_attr_def **attr_defs;
-	unsigned int num_of_singularities = 0;
-
-	max_attr_buckets = find_max_attr_ns_id(num_method_defs, method_defs);
-	if (max_attr_buckets >= 0)
-		num_attr_buckets = max_attr_buckets + 1;
-
-	method = kzalloc(struct_size(method, attr_buckets, num_attr_buckets),
-			 GFP_KERNEL);
-	if (!method)
-		return ERR_PTR(-ENOMEM);
-
-	method->num_buckets = num_attr_buckets;
-	attr_defs = kcalloc(num_method_defs, sizeof(*attr_defs), GFP_KERNEL);
-	if (!attr_defs) {
-		res = -ENOMEM;
-		goto free_method;
-	}
-	for (bucket_idx = 0; bucket_idx < method->num_buckets; bucket_idx++) {
-		short min_id = SHRT_MIN;
-		int attr_max_bucket = 0;
-		struct uverbs_attr_spec_hash *hash = NULL;
-
-		attr_max_bucket = find_max_attr_id(num_method_defs, method_defs,
-						   bucket_idx);
-		if (attr_max_bucket < 0)
-			continue;
-
-		hash = kzalloc(sizeof(*hash) +
-			       ALIGN(sizeof(*hash->attrs) * (attr_max_bucket + 1),
-				     sizeof(long)) +
-			       BITS_TO_LONGS(attr_max_bucket + 1) * sizeof(long),
-			       GFP_KERNEL);
-		if (!hash) {
-			res = -ENOMEM;
-			goto free;
-		}
-		hash->num_attrs = attr_max_bucket + 1;
-		method->num_child_attrs += hash->num_attrs;
-		hash->mandatory_attrs_bitmask = (void *)(hash + 1) +
-						 ALIGN(sizeof(*hash->attrs) *
-						       (attr_max_bucket + 1),
-						       sizeof(long));
-
-		method->attr_buckets[bucket_idx] = hash;
-
-		do {
-			size_t			 num_attr_defs;
-			struct uverbs_attr_spec	*attr;
-			bool attr_obj_with_special_access;
-
-			num_attr_defs =
-				get_attrs_above_id(attr_defs,
-						   num_method_defs,
-						   method_defs,
-						   bucket_idx,
-						   &min_id);
-			/* Last attr in bucket */
-			if (!num_attr_defs)
-				break;
-
-			if (num_attr_defs > 1) {
-				/*
-				 * We don't allow two attribute definitions for
-				 * the same attribute. This is usually a
-				 * programmer error. If required, it's better to
-				 * just add a new attribute to capture the new
-				 * semantics.
-				 */
-				res = -EEXIST;
-				goto free;
-			}
-
-			attr = &hash->attrs[min_id];
-			memcpy(attr, &attr_defs[0]->attr, sizeof(*attr));
-
-			attr_obj_with_special_access = IS_ATTR_OBJECT(attr) &&
-				   (attr->obj.access == UVERBS_ACCESS_NEW ||
-				    attr->obj.access == UVERBS_ACCESS_DESTROY);
-			num_of_singularities +=  !!attr_obj_with_special_access;
-			if (WARN(num_of_singularities > 1,
-				 "ib_uverbs: Method contains more than one object attr (%d) with new/destroy access\n",
-				 min_id) ||
-			    WARN(attr_obj_with_special_access &&
-				 !(attr->flags & UVERBS_ATTR_SPEC_F_MANDATORY),
-				 "ib_uverbs: Tried to merge attr (%d) but it's an object with new/destroy access but isn't mandatory\n",
-				 min_id) ||
-			    WARN(IS_ATTR_OBJECT(attr) &&
-				 attr->flags & UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO,
-				 "ib_uverbs: Tried to merge attr (%d) but it's an object with min_sz flag\n",
-				 min_id)) {
-				res = -EINVAL;
-				goto free;
-			}
-
-			if (attr->flags & UVERBS_ATTR_SPEC_F_MANDATORY)
-				set_bit(min_id, hash->mandatory_attrs_bitmask);
-			min_id++;
-
-		} while (1);
-	}
-	kfree(attr_defs);
-	return method;
-
-free:
-	kfree(attr_defs);
-free_method:
-	free_method(method);
-	return ERR_PTR(res);
-}
-
-static void free_object(struct uverbs_object_spec *object)
-{
-	unsigned int i, j;
-
-	if (!object)
-		return;
-
-	for (i = 0; i < object->num_buckets; i++) {
-		struct uverbs_method_spec_hash	*method_buckets =
-			object->method_buckets[i];
-
-		if (!method_buckets)
-			continue;
-
-		for (j = 0; j < method_buckets->num_methods; j++)
-			free_method(method_buckets->methods[j]);
-
-		kfree(method_buckets);
-	}
-
-	kfree(object);
-}
-
-/*
- * This function gets array of size @num_object_defs which contains pointers to
- * object definitions @object_defs. The function allocated an
- * uverbs_object_spec structure and initialize its number of buckets and the
- * elements in buckets to the correct methods. While doing that, it
- * sorts out the correct relationship between conflicts in the same method.
- */
-static struct uverbs_object_spec *build_object_with_methods(const struct uverbs_object_def **object_defs,
-							    size_t num_object_defs)
-{
-	u16 bucket_idx;
-	int max_method_buckets = 0;
-	u16 num_method_buckets = 0;
-	int res = 0;
-	struct uverbs_object_spec *object = NULL;
-	const struct uverbs_method_def **method_defs;
-
-	max_method_buckets = find_max_method_ns_id(num_object_defs, object_defs);
-	if (max_method_buckets >= 0)
-		num_method_buckets = max_method_buckets + 1;
-
-	object = kzalloc(struct_size(object, method_buckets,
-				     num_method_buckets),
-			 GFP_KERNEL);
-	if (!object)
-		return ERR_PTR(-ENOMEM);
-
-	object->num_buckets = num_method_buckets;
-	method_defs = kcalloc(num_object_defs, sizeof(*method_defs), GFP_KERNEL);
-	if (!method_defs) {
-		res = -ENOMEM;
-		goto free_object;
-	}
-
-	for (bucket_idx = 0; bucket_idx < object->num_buckets; bucket_idx++) {
-		short min_id = SHRT_MIN;
-		int methods_max_bucket = 0;
-		struct uverbs_method_spec_hash *hash = NULL;
-
-		methods_max_bucket = find_max_method_id(num_object_defs, object_defs,
-							bucket_idx);
-		if (methods_max_bucket < 0)
-			continue;
-
-		hash = kzalloc(struct_size(hash, methods,
-					   methods_max_bucket + 1),
-			       GFP_KERNEL);
-		if (!hash) {
-			res = -ENOMEM;
-			goto free;
-		}
-
-		hash->num_methods = methods_max_bucket + 1;
-		object->method_buckets[bucket_idx] = hash;
-
-		do {
-			size_t				num_method_defs;
-			struct uverbs_method_spec	*method;
-			int i;
-
-			num_method_defs =
-				get_methods_above_id(method_defs,
-						     num_object_defs,
-						     object_defs,
-						     bucket_idx,
-						     &min_id);
-			/* Last method in bucket */
-			if (!num_method_defs)
-				break;
-
-			method = build_method_with_attrs(method_defs,
-							 num_method_defs);
-			if (IS_ERR(method)) {
-				res = PTR_ERR(method);
-				goto free;
-			}
-
-			/*
-			 * The last tree which is given as an argument to the
-			 * merge overrides previous method handler.
-			 * Therefore, we iterate backwards and search for the
-			 * first handler which != NULL. This also defines the
-			 * set of flags used for this handler.
-			 */
-			for (i = num_method_defs - 1;
-			     i >= 0 && !method_defs[i]->handler; i--)
-				;
-			hash->methods[min_id++] = method;
-			/* NULL handler isn't allowed */
-			if (WARN(i < 0,
-				 "ib_uverbs: tried to merge function id %d, but all handlers are NULL\n",
-				 min_id)) {
-				res = -EINVAL;
-				goto free;
-			}
-			method->handler = method_defs[i]->handler;
-			method->flags = method_defs[i]->flags;
-
-		} while (1);
-	}
-	kfree(method_defs);
-	return object;
-
-free:
-	kfree(method_defs);
-free_object:
-	free_object(object);
-	return ERR_PTR(res);
-}
-
-void uverbs_free_spec_tree(struct uverbs_root_spec *root)
-{
-	unsigned int i, j;
-
-	if (!root)
-		return;
-
-	for (i = 0; i < root->num_buckets; i++) {
-		struct uverbs_object_spec_hash *object_hash =
-			root->object_buckets[i];
-
-		if (!object_hash)
-			continue;
-
-		for (j = 0; j < object_hash->num_objects; j++)
-			free_object(object_hash->objects[j]);
-
-		kfree(object_hash);
-	}
-
-	kfree(root);
-}
-EXPORT_SYMBOL(uverbs_free_spec_tree);
-
-struct uverbs_root_spec *uverbs_alloc_spec_tree(unsigned int num_trees,
-						const struct uverbs_object_tree_def **trees)
-{
-	u16 bucket_idx;
-	short max_object_buckets = 0;
-	size_t num_objects_buckets = 0;
-	struct uverbs_root_spec *root_spec = NULL;
-	const struct uverbs_object_def **object_defs;
-	int i;
-	int res = 0;
-
-	max_object_buckets = find_max_object_ns_id(num_trees, trees);
-	/*
-	 * Devices which don't want to support ib_uverbs, should just allocate
-	 * an empty parsing tree. Every user-space command won't hit any valid
-	 * entry in the parsing tree and thus will fail.
-	 */
-	if (max_object_buckets >= 0)
-		num_objects_buckets = max_object_buckets + 1;
-
-	root_spec = kzalloc(struct_size(root_spec, object_buckets,
-					num_objects_buckets),
-			    GFP_KERNEL);
-	if (!root_spec)
-		return ERR_PTR(-ENOMEM);
-	root_spec->num_buckets = num_objects_buckets;
-
-	object_defs = kcalloc(num_trees, sizeof(*object_defs),
-			      GFP_KERNEL);
-	if (!object_defs) {
-		res = -ENOMEM;
-		goto free_root;
-	}
-
-	for (bucket_idx = 0; bucket_idx < root_spec->num_buckets; bucket_idx++) {
-		short min_id = SHRT_MIN;
-		short objects_max_bucket;
-		struct uverbs_object_spec_hash *hash = NULL;
-
-		objects_max_bucket = find_max_object_id(num_trees, trees,
-							bucket_idx);
-		if (objects_max_bucket < 0)
-			continue;
-
-		hash = kzalloc(struct_size(hash, objects,
-					   objects_max_bucket + 1),
-			       GFP_KERNEL);
-		if (!hash) {
-			res = -ENOMEM;
-			goto free;
-		}
-		hash->num_objects = objects_max_bucket + 1;
-		root_spec->object_buckets[bucket_idx] = hash;
-
-		do {
-			size_t				num_object_defs;
-			struct uverbs_object_spec	*object;
-
-			num_object_defs = get_objects_above_id(object_defs,
-							       num_trees,
-							       trees,
-							       bucket_idx,
-							       &min_id);
-			/* Last object in bucket */
-			if (!num_object_defs)
-				break;
-
-			object = build_object_with_methods(object_defs,
-							   num_object_defs);
-			if (IS_ERR(object)) {
-				res = PTR_ERR(object);
-				goto free;
-			}
-
-			/*
-			 * The last tree which is given as an argument to the
-			 * merge overrides previous object's type_attrs.
-			 * Therefore, we iterate backwards and search for the
-			 * first type_attrs which != NULL.
-			 */
-			for (i = num_object_defs - 1;
-			     i >= 0 && !object_defs[i]->type_attrs; i--)
-				;
-			/*
-			 * NULL is a valid type_attrs. It means an object we
-			 * can't instantiate (like DEVICE).
-			 */
-			object->type_attrs = i < 0 ? NULL :
-				object_defs[i]->type_attrs;
-
-			hash->objects[min_id++] = object;
-		} while (1);
-	}
-
-	kfree(object_defs);
-	return root_spec;
-
-free:
-	kfree(object_defs);
-free_root:
-	uverbs_free_spec_tree(root_spec);
-	return ERR_PTR(res);
-}
-EXPORT_SYMBOL(uverbs_alloc_spec_tree);
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 2094d13..823beca 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -41,8 +41,6 @@
 #include <linux/fs.h>
 #include <linux/poll.h>
 #include <linux/sched.h>
-#include <linux/sched/mm.h>
-#include <linux/sched/task.h>
 #include <linux/file.h>
 #include <linux/cdev.h>
 #include <linux/anon_inodes.h>
@@ -77,7 +75,6 @@ static struct class *uverbs_class;
 static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES);
 
 static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,
-				     struct ib_device *ib_dev,
 				     const char __user *buf, int in_len,
 				     int out_len) = {
 	[IB_USER_VERBS_CMD_GET_CONTEXT]		= ib_uverbs_get_context,
@@ -118,7 +115,6 @@ static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,
 };
 
 static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file,
-				    struct ib_device *ib_dev,
 				    struct ib_udata *ucore,
 				    struct ib_udata *uhw) = {
 	[IB_USER_VERBS_EX_CMD_CREATE_FLOW]	= ib_uverbs_ex_create_flow,
@@ -138,6 +134,30 @@ static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file,
 static void ib_uverbs_add_one(struct ib_device *device);
 static void ib_uverbs_remove_one(struct ib_device *device, void *client_data);
 
+/*
+ * Must be called with the ufile->device->disassociate_srcu held, and the lock
+ * must be held until use of the ucontext is finished.
+ */
+struct ib_ucontext *ib_uverbs_get_ucontext(struct ib_uverbs_file *ufile)
+{
+	/*
+	 * We do not hold the hw_destroy_rwsem lock for this flow, instead
+	 * srcu is used. It does not matter if someone races this with
+	 * get_context, we get NULL or valid ucontext.
+	 */
+	struct ib_ucontext *ucontext = smp_load_acquire(&ufile->ucontext);
+
+	if (!srcu_dereference(ufile->device->ib_dev,
+			      &ufile->device->disassociate_srcu))
+		return ERR_PTR(-EIO);
+
+	if (!ucontext)
+		return ERR_PTR(-EINVAL);
+
+	return ucontext;
+}
+EXPORT_SYMBOL(ib_uverbs_get_ucontext);
+
 int uverbs_dealloc_mw(struct ib_mw *mw)
 {
 	struct ib_pd *pd = mw->pd;
@@ -154,6 +174,7 @@ static void ib_uverbs_release_dev(struct kobject *kobj)
 	struct ib_uverbs_device *dev =
 		container_of(kobj, struct ib_uverbs_device, kobj);
 
+	uverbs_destroy_api(dev->uapi);
 	cleanup_srcu_struct(&dev->disassociate_srcu);
 	kfree(dev);
 }
@@ -184,7 +205,7 @@ void ib_uverbs_release_ucq(struct ib_uverbs_file *file,
 		}
 		spin_unlock_irq(&ev_file->ev_queue.lock);
 
-		uverbs_uobject_put(&ev_file->uobj_file.uobj);
+		uverbs_uobject_put(&ev_file->uobj);
 	}
 
 	spin_lock_irq(&file->async_file->ev_queue.lock);
@@ -220,20 +241,6 @@ void ib_uverbs_detach_umcast(struct ib_qp *qp,
 	}
 }
 
-static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
-				      struct ib_ucontext *context,
-				      bool device_removed)
-{
-	context->closing = 1;
-	uverbs_cleanup_ucontext(context, device_removed);
-	put_pid(context->tgid);
-
-	ib_rdmacg_uncharge(&context->cg_obj, context->device,
-			   RDMACG_RESOURCE_HCA_HANDLE);
-
-	return context->device->dealloc_ucontext(context);
-}
-
 static void ib_uverbs_comp_dev(struct ib_uverbs_device *dev)
 {
 	complete(&dev->comp);
@@ -246,6 +253,8 @@ void ib_uverbs_release_file(struct kref *ref)
 	struct ib_device *ib_dev;
 	int srcu_key;
 
+	release_ufile_idr_uobject(file);
+
 	srcu_key = srcu_read_lock(&file->device->disassociate_srcu);
 	ib_dev = srcu_dereference(file->device->ib_dev,
 				  &file->device->disassociate_srcu);
@@ -338,7 +347,7 @@ static ssize_t ib_uverbs_comp_event_read(struct file *filp, char __user *buf,
 		filp->private_data;
 
 	return ib_uverbs_event_read(&comp_ev_file->ev_queue,
-				    comp_ev_file->uobj_file.ufile, filp,
+				    comp_ev_file->uobj.ufile, filp,
 				    buf, count, pos,
 				    sizeof(struct ib_uverbs_comp_event_desc));
 }
@@ -420,7 +429,9 @@ static int ib_uverbs_async_event_close(struct inode *inode, struct file *filp)
 
 static int ib_uverbs_comp_event_close(struct inode *inode, struct file *filp)
 {
-	struct ib_uverbs_completion_event_file *file = filp->private_data;
+	struct ib_uobject *uobj = filp->private_data;
+	struct ib_uverbs_completion_event_file *file = container_of(
+		uobj, struct ib_uverbs_completion_event_file, uobj);
 	struct ib_uverbs_event *entry, *tmp;
 
 	spin_lock_irq(&file->ev_queue.lock);
@@ -528,7 +539,7 @@ void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr)
 	struct ib_ucq_object *uobj = container_of(event->element.cq->uobject,
 						  struct ib_ucq_object, uobject);
 
-	ib_uverbs_async_handler(uobj->uverbs_file, uobj->uobject.user_handle,
+	ib_uverbs_async_handler(uobj->uobject.ufile, uobj->uobject.user_handle,
 				event->event, &uobj->async_list,
 				&uobj->async_events_reported);
 }
@@ -637,13 +648,13 @@ struct file *ib_uverbs_alloc_async_event_file(struct ib_uverbs_file *uverbs_file
 	return filp;
 }
 
-static bool verify_command_mask(struct ib_device *ib_dev,
-				u32 command, bool extended)
+static bool verify_command_mask(struct ib_uverbs_file *ufile, u32 command,
+				bool extended)
 {
 	if (!extended)
-		return ib_dev->uverbs_cmd_mask & BIT_ULL(command);
+		return ufile->uverbs_cmd_mask & BIT_ULL(command);
 
-	return ib_dev->uverbs_ex_cmd_mask & BIT_ULL(command);
+	return ufile->uverbs_ex_cmd_mask & BIT_ULL(command);
 }
 
 static bool verify_command_idx(u32 command, bool extended)
@@ -713,7 +724,6 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
 {
 	struct ib_uverbs_file *file = filp->private_data;
 	struct ib_uverbs_ex_cmd_hdr ex_hdr;
-	struct ib_device *ib_dev;
 	struct ib_uverbs_cmd_hdr hdr;
 	bool extended;
 	int srcu_key;
@@ -748,24 +758,8 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
 		return ret;
 
 	srcu_key = srcu_read_lock(&file->device->disassociate_srcu);
-	ib_dev = srcu_dereference(file->device->ib_dev,
-				  &file->device->disassociate_srcu);
-	if (!ib_dev) {
-		ret = -EIO;
-		goto out;
-	}
 
-	/*
-	 * Must be after the ib_dev check, as once the RCU clears ib_dev ==
-	 * NULL means ucontext == NULL
-	 */
-	if (!file->ucontext &&
-	    (command != IB_USER_VERBS_CMD_GET_CONTEXT || extended)) {
-		ret = -EINVAL;
-		goto out;
-	}
-
-	if (!verify_command_mask(ib_dev, command, extended)) {
+	if (!verify_command_mask(file, command, extended)) {
 		ret = -EOPNOTSUPP;
 		goto out;
 	}
@@ -773,7 +767,7 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
 	buf += sizeof(hdr);
 
 	if (!extended) {
-		ret = uverbs_cmd_table[command](file, ib_dev, buf,
+		ret = uverbs_cmd_table[command](file, buf,
 						hdr.in_words * 4,
 						hdr.out_words * 4);
 	} else {
@@ -792,7 +786,7 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
 					ex_hdr.provider_in_words * 8,
 					ex_hdr.provider_out_words * 8);
 
-		ret = uverbs_ex_cmd_table[command](file, ib_dev, &ucore, &uhw);
+		ret = uverbs_ex_cmd_table[command](file, &ucore, &uhw);
 		ret = (ret) ? : count;
 	}
 
@@ -804,22 +798,18 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
 static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma)
 {
 	struct ib_uverbs_file *file = filp->private_data;
-	struct ib_device *ib_dev;
+	struct ib_ucontext *ucontext;
 	int ret = 0;
 	int srcu_key;
 
 	srcu_key = srcu_read_lock(&file->device->disassociate_srcu);
-	ib_dev = srcu_dereference(file->device->ib_dev,
-				  &file->device->disassociate_srcu);
-	if (!ib_dev) {
-		ret = -EIO;
+	ucontext = ib_uverbs_get_ucontext(file);
+	if (IS_ERR(ucontext)) {
+		ret = PTR_ERR(ucontext);
 		goto out;
 	}
 
-	if (!file->ucontext)
-		ret = -ENODEV;
-	else
-		ret = ib_dev->mmap(file->ucontext, vma);
+	ret = ucontext->device->mmap(ucontext, vma);
 out:
 	srcu_read_unlock(&file->device->disassociate_srcu, srcu_key);
 	return ret;
@@ -879,13 +869,12 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp)
 	}
 
 	file->device	 = dev;
-	spin_lock_init(&file->idr_lock);
-	idr_init(&file->idr);
-	file->ucontext	 = NULL;
-	file->async_file = NULL;
 	kref_init(&file->ref);
-	mutex_init(&file->mutex);
-	mutex_init(&file->cleanup_mutex);
+	mutex_init(&file->ucontext_lock);
+
+	spin_lock_init(&file->uobjects_lock);
+	INIT_LIST_HEAD(&file->uobjects);
+	init_rwsem(&file->hw_destroy_rwsem);
 
 	filp->private_data = file;
 	kobject_get(&dev->kobj);
@@ -893,6 +882,11 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp)
 	mutex_unlock(&dev->lists_mutex);
 	srcu_read_unlock(&dev->disassociate_srcu, srcu_key);
 
+	file->uverbs_cmd_mask = ib_dev->uverbs_cmd_mask;
+	file->uverbs_ex_cmd_mask = ib_dev->uverbs_ex_cmd_mask;
+
+	setup_ufile_idr_uobject(file);
+
 	return nonseekable_open(inode, filp);
 
 err_module:
@@ -911,13 +905,7 @@ static int ib_uverbs_close(struct inode *inode, struct file *filp)
 {
 	struct ib_uverbs_file *file = filp->private_data;
 
-	mutex_lock(&file->cleanup_mutex);
-	if (file->ucontext) {
-		ib_uverbs_cleanup_ucontext(file, file->ucontext, false);
-		file->ucontext = NULL;
-	}
-	mutex_unlock(&file->cleanup_mutex);
-	idr_destroy(&file->idr);
+	uverbs_destroy_ufile_hw(file, RDMA_REMOVE_CLOSE);
 
 	mutex_lock(&file->device->lists_mutex);
 	if (!file->is_closed) {
@@ -1006,6 +994,19 @@ static DEVICE_ATTR(abi_version, S_IRUGO, show_dev_abi_version, NULL);
 static CLASS_ATTR_STRING(abi_version, S_IRUGO,
 			 __stringify(IB_USER_VERBS_ABI_VERSION));
 
+static int ib_uverbs_create_uapi(struct ib_device *device,
+				 struct ib_uverbs_device *uverbs_dev)
+{
+	struct uverbs_api *uapi;
+
+	uapi = uverbs_alloc_api(device->driver_specs, device->driver_id);
+	if (IS_ERR(uapi))
+		return PTR_ERR(uapi);
+
+	uverbs_dev->uapi = uapi;
+	return 0;
+}
+
 static void ib_uverbs_add_one(struct ib_device *device)
 {
 	int devnum;
@@ -1048,6 +1049,9 @@ static void ib_uverbs_add_one(struct ib_device *device)
 	rcu_assign_pointer(uverbs_dev->ib_dev, device);
 	uverbs_dev->num_comp_vectors = device->num_comp_vectors;
 
+	if (ib_uverbs_create_uapi(device, uverbs_dev))
+		goto err;
+
 	cdev_init(&uverbs_dev->cdev, NULL);
 	uverbs_dev->cdev.owner = THIS_MODULE;
 	uverbs_dev->cdev.ops = device->mmap ? &uverbs_mmap_fops : &uverbs_fops;
@@ -1067,18 +1071,6 @@ static void ib_uverbs_add_one(struct ib_device *device)
 	if (device_create_file(uverbs_dev->dev, &dev_attr_abi_version))
 		goto err_class;
 
-	if (!device->specs_root) {
-		const struct uverbs_object_tree_def *default_root[] = {
-			uverbs_default_get_objects()};
-
-		uverbs_dev->specs_root = uverbs_alloc_spec_tree(1,
-								default_root);
-		if (IS_ERR(uverbs_dev->specs_root))
-			goto err_class;
-
-		device->specs_root = uverbs_dev->specs_root;
-	}
-
 	ib_set_client_data(device, &uverbs_client, uverbs_dev);
 
 	return;
@@ -1098,44 +1090,6 @@ static void ib_uverbs_add_one(struct ib_device *device)
 	return;
 }
 
-static void ib_uverbs_disassociate_ucontext(struct ib_ucontext *ibcontext)
-{
-	struct ib_device *ib_dev = ibcontext->device;
-	struct task_struct *owning_process  = NULL;
-	struct mm_struct   *owning_mm       = NULL;
-
-	owning_process = get_pid_task(ibcontext->tgid, PIDTYPE_PID);
-	if (!owning_process)
-		return;
-
-	owning_mm = get_task_mm(owning_process);
-	if (!owning_mm) {
-		pr_info("no mm, disassociate ucontext is pending task termination\n");
-		while (1) {
-			put_task_struct(owning_process);
-			usleep_range(1000, 2000);
-			owning_process = get_pid_task(ibcontext->tgid,
-						      PIDTYPE_PID);
-			if (!owning_process ||
-			    owning_process->state == TASK_DEAD) {
-				pr_info("disassociate ucontext done, task was terminated\n");
-				/* in case task was dead need to release the
-				 * task struct.
-				 */
-				if (owning_process)
-					put_task_struct(owning_process);
-				return;
-			}
-		}
-	}
-
-	down_write(&owning_mm->mmap_sem);
-	ib_dev->disassociate_ucontext(ibcontext);
-	up_write(&owning_mm->mmap_sem);
-	mmput(owning_mm);
-	put_task_struct(owning_process);
-}
-
 static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev,
 					struct ib_device *ib_dev)
 {
@@ -1144,46 +1098,31 @@ static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev,
 	struct ib_event event;
 
 	/* Pending running commands to terminate */
-	synchronize_srcu(&uverbs_dev->disassociate_srcu);
+	uverbs_disassociate_api_pre(uverbs_dev);
 	event.event = IB_EVENT_DEVICE_FATAL;
 	event.element.port_num = 0;
 	event.device = ib_dev;
 
 	mutex_lock(&uverbs_dev->lists_mutex);
 	while (!list_empty(&uverbs_dev->uverbs_file_list)) {
-		struct ib_ucontext *ucontext;
 		file = list_first_entry(&uverbs_dev->uverbs_file_list,
 					struct ib_uverbs_file, list);
 		file->is_closed = 1;
 		list_del(&file->list);
 		kref_get(&file->ref);
+
+		/* We must release the mutex before going ahead and calling
+		 * uverbs_cleanup_ufile, as it might end up indirectly calling
+		 * uverbs_close, for example due to freeing the resources (e.g
+		 * mmput).
+		 */
 		mutex_unlock(&uverbs_dev->lists_mutex);
 
-
-		mutex_lock(&file->cleanup_mutex);
-		ucontext = file->ucontext;
-		file->ucontext = NULL;
-		mutex_unlock(&file->cleanup_mutex);
-
-		/* At this point ib_uverbs_close cannot be running
-		 * ib_uverbs_cleanup_ucontext
-		 */
-		if (ucontext) {
-			/* We must release the mutex before going ahead and
-			 * calling disassociate_ucontext. disassociate_ucontext
-			 * might end up indirectly calling uverbs_close,
-			 * for example due to freeing the resources
-			 * (e.g mmput).
-			 */
-			ib_uverbs_event_handler(&file->event_handler, &event);
-			ib_uverbs_disassociate_ucontext(ucontext);
-			mutex_lock(&file->cleanup_mutex);
-			ib_uverbs_cleanup_ucontext(file, ucontext, true);
-			mutex_unlock(&file->cleanup_mutex);
-		}
+		ib_uverbs_event_handler(&file->event_handler, &event);
+		uverbs_destroy_ufile_hw(file, RDMA_REMOVE_DRIVER_REMOVE);
+		kref_put(&file->ref, ib_uverbs_release_file);
 
 		mutex_lock(&uverbs_dev->lists_mutex);
-		kref_put(&file->ref, ib_uverbs_release_file);
 	}
 
 	while (!list_empty(&uverbs_dev->uverbs_events_file_list)) {
@@ -1205,6 +1144,8 @@ static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev,
 		kill_fasync(&event_file->ev_queue.async_queue, SIGIO, POLL_IN);
 	}
 	mutex_unlock(&uverbs_dev->lists_mutex);
+
+	uverbs_disassociate_api(uverbs_dev->uapi);
 }
 
 static void ib_uverbs_remove_one(struct ib_device *device, void *client_data)
@@ -1232,7 +1173,6 @@ static void ib_uverbs_remove_one(struct ib_device *device, void *client_data)
 		 * cdev was deleted, however active clients can still issue
 		 * commands and close their open files.
 		 */
-		rcu_assign_pointer(uverbs_dev->ib_dev, NULL);
 		ib_uverbs_free_hw_resources(uverbs_dev, device);
 		wait_clients = 0;
 	}
@@ -1241,10 +1181,6 @@ static void ib_uverbs_remove_one(struct ib_device *device, void *client_data)
 		ib_uverbs_comp_dev(uverbs_dev);
 	if (wait_clients)
 		wait_for_completion(&uverbs_dev->comp);
-	if (uverbs_dev->specs_root) {
-		uverbs_free_spec_tree(uverbs_dev->specs_root);
-		device->specs_root = NULL;
-	}
 
 	kobject_put(&uverbs_dev->kobj);
 }
diff --git a/drivers/infiniband/core/uverbs_marshall.c b/drivers/infiniband/core/uverbs_marshall.c
index bb372b4..b8d715c 100644
--- a/drivers/infiniband/core/uverbs_marshall.c
+++ b/drivers/infiniband/core/uverbs_marshall.c
@@ -211,7 +211,5 @@ void ib_copy_path_rec_from_user(struct sa_path_rec *dst,
 
 	/* TODO: No need to set this */
 	sa_path_set_dmac_zero(dst);
-	sa_path_set_ndev(dst, NULL);
-	sa_path_set_ifindex(dst, 0);
 }
 EXPORT_SYMBOL(ib_copy_path_rec_from_user);
diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c
index b570acb..203cc96 100644
--- a/drivers/infiniband/core/uverbs_std_types.c
+++ b/drivers/infiniband/core/uverbs_std_types.c
@@ -48,14 +48,18 @@ static int uverbs_free_ah(struct ib_uobject *uobject,
 static int uverbs_free_flow(struct ib_uobject *uobject,
 			    enum rdma_remove_reason why)
 {
-	int ret;
 	struct ib_flow *flow = (struct ib_flow *)uobject->object;
 	struct ib_uflow_object *uflow =
 		container_of(uobject, struct ib_uflow_object, uobject);
+	struct ib_qp *qp = flow->qp;
+	int ret;
 
-	ret = ib_destroy_flow(flow);
-	if (!ret)
+	ret = flow->device->destroy_flow(flow);
+	if (!ret) {
+		if (qp)
+			atomic_dec(&qp->usecnt);
 		ib_uverbs_flow_resources_free(uflow->resources);
+	}
 
 	return ret;
 }
@@ -74,6 +78,13 @@ static int uverbs_free_qp(struct ib_uobject *uobject,
 		container_of(uobject, struct ib_uqp_object, uevent.uobject);
 	int ret;
 
+	/*
+	 * If this is a user triggered destroy then do not allow destruction
+	 * until the user cleans up all the mcast bindings. Unlike in other
+	 * places we forcibly clean up the mcast attachments for !DESTROY
+	 * because the mcast attaches are not ubojects and will not be
+	 * destroyed by anything else during cleanup processing.
+	 */
 	if (why == RDMA_REMOVE_DESTROY) {
 		if (!list_empty(&uqp->mcast_list))
 			return -EBUSY;
@@ -82,7 +93,7 @@ static int uverbs_free_qp(struct ib_uobject *uobject,
 	}
 
 	ret = ib_destroy_qp(qp);
-	if (ret && why == RDMA_REMOVE_DESTROY)
+	if (ib_is_destroy_retryable(ret, why, uobject))
 		return ret;
 
 	if (uqp->uxrcd)
@@ -100,8 +111,10 @@ static int uverbs_free_rwq_ind_tbl(struct ib_uobject *uobject,
 	int ret;
 
 	ret = ib_destroy_rwq_ind_table(rwq_ind_tbl);
-	if (!ret || why != RDMA_REMOVE_DESTROY)
-		kfree(ind_tbl);
+	if (ib_is_destroy_retryable(ret, why, uobject))
+		return ret;
+
+	kfree(ind_tbl);
 	return ret;
 }
 
@@ -114,8 +127,10 @@ static int uverbs_free_wq(struct ib_uobject *uobject,
 	int ret;
 
 	ret = ib_destroy_wq(wq);
-	if (!ret || why != RDMA_REMOVE_DESTROY)
-		ib_uverbs_release_uevent(uobject->context->ufile, &uwq->uevent);
+	if (ib_is_destroy_retryable(ret, why, uobject))
+		return ret;
+
+	ib_uverbs_release_uevent(uobject->context->ufile, &uwq->uevent);
 	return ret;
 }
 
@@ -129,8 +144,7 @@ static int uverbs_free_srq(struct ib_uobject *uobject,
 	int ret;
 
 	ret = ib_destroy_srq(srq);
-
-	if (ret && why == RDMA_REMOVE_DESTROY)
+	if (ib_is_destroy_retryable(ret, why, uobject))
 		return ret;
 
 	if (srq_type == IB_SRQT_XRC) {
@@ -152,12 +166,12 @@ static int uverbs_free_xrcd(struct ib_uobject *uobject,
 		container_of(uobject, struct ib_uxrcd_object, uobject);
 	int ret;
 
+	ret = ib_destroy_usecnt(&uxrcd->refcnt, why, uobject);
+	if (ret)
+		return ret;
+
 	mutex_lock(&uobject->context->ufile->device->xrcd_tree_mutex);
-	if (why == RDMA_REMOVE_DESTROY && atomic_read(&uxrcd->refcnt))
-		ret = -EBUSY;
-	else
-		ret = ib_uverbs_dealloc_xrcd(uobject->context->ufile->device,
-					     xrcd, why);
+	ret = ib_uverbs_dealloc_xrcd(uobject, xrcd, why);
 	mutex_unlock(&uobject->context->ufile->device->xrcd_tree_mutex);
 
 	return ret;
@@ -167,20 +181,22 @@ static int uverbs_free_pd(struct ib_uobject *uobject,
 			  enum rdma_remove_reason why)
 {
 	struct ib_pd *pd = uobject->object;
+	int ret;
 
-	if (why == RDMA_REMOVE_DESTROY && atomic_read(&pd->usecnt))
-		return -EBUSY;
+	ret = ib_destroy_usecnt(&pd->usecnt, why, uobject);
+	if (ret)
+		return ret;
 
 	ib_dealloc_pd((struct ib_pd *)uobject->object);
 	return 0;
 }
 
-static int uverbs_hot_unplug_completion_event_file(struct ib_uobject_file *uobj_file,
+static int uverbs_hot_unplug_completion_event_file(struct ib_uobject *uobj,
 						   enum rdma_remove_reason why)
 {
 	struct ib_uverbs_completion_event_file *comp_event_file =
-		container_of(uobj_file, struct ib_uverbs_completion_event_file,
-			     uobj_file);
+		container_of(uobj, struct ib_uverbs_completion_event_file,
+			     uobj);
 	struct ib_uverbs_event_queue *event_queue = &comp_event_file->ev_queue;
 
 	spin_lock_irq(&event_queue->lock);
@@ -194,119 +210,77 @@ static int uverbs_hot_unplug_completion_event_file(struct ib_uobject_file *uobj_
 	return 0;
 };
 
-int uverbs_destroy_def_handler(struct ib_device *ib_dev,
-			       struct ib_uverbs_file *file,
+int uverbs_destroy_def_handler(struct ib_uverbs_file *file,
 			       struct uverbs_attr_bundle *attrs)
 {
 	return 0;
 }
+EXPORT_SYMBOL(uverbs_destroy_def_handler);
 
-/*
- * This spec is used in order to pass information to the hardware driver in a
- * legacy way. Every verb that could get driver specific data should get this
- * spec.
- */
-const struct uverbs_attr_def uverbs_uhw_compat_in =
-	UVERBS_ATTR_PTR_IN_SZ(UVERBS_ATTR_UHW_IN, UVERBS_ATTR_SIZE(0, USHRT_MAX),
-			      UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO));
-const struct uverbs_attr_def uverbs_uhw_compat_out =
-	UVERBS_ATTR_PTR_OUT_SZ(UVERBS_ATTR_UHW_OUT, UVERBS_ATTR_SIZE(0, USHRT_MAX),
-			       UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO));
+DECLARE_UVERBS_NAMED_OBJECT(
+	UVERBS_OBJECT_COMP_CHANNEL,
+	UVERBS_TYPE_ALLOC_FD(sizeof(struct ib_uverbs_completion_event_file),
+			     uverbs_hot_unplug_completion_event_file,
+			     &uverbs_event_fops,
+			     "[infinibandevent]",
+			     O_RDONLY));
 
-void create_udata(struct uverbs_attr_bundle *ctx, struct ib_udata *udata)
-{
-	/*
-	 * This is for ease of conversion. The purpose is to convert all drivers
-	 * to use uverbs_attr_bundle instead of ib_udata.
-	 * Assume attr == 0 is input and attr == 1 is output.
-	 */
-	const struct uverbs_attr *uhw_in =
-		uverbs_attr_get(ctx, UVERBS_ATTR_UHW_IN);
-	const struct uverbs_attr *uhw_out =
-		uverbs_attr_get(ctx, UVERBS_ATTR_UHW_OUT);
-
-	if (!IS_ERR(uhw_in)) {
-		udata->inlen = uhw_in->ptr_attr.len;
-		if (uverbs_attr_ptr_is_inline(uhw_in))
-			udata->inbuf = &uhw_in->uattr->data;
-		else
-			udata->inbuf = u64_to_user_ptr(uhw_in->ptr_attr.data);
-	} else {
-		udata->inbuf = NULL;
-		udata->inlen = 0;
-	}
-
-	if (!IS_ERR(uhw_out)) {
-		udata->outbuf = u64_to_user_ptr(uhw_out->ptr_attr.data);
-		udata->outlen = uhw_out->ptr_attr.len;
-	} else {
-		udata->outbuf = NULL;
-		udata->outlen = 0;
-	}
-}
-
-DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_COMP_CHANNEL,
-			    &UVERBS_TYPE_ALLOC_FD(0,
-						  sizeof(struct ib_uverbs_completion_event_file),
-						  uverbs_hot_unplug_completion_event_file,
-						  &uverbs_event_fops,
-						  "[infinibandevent]", O_RDONLY));
-
-DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_QP,
-			    &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uqp_object), 0,
-						      uverbs_free_qp));
+DECLARE_UVERBS_NAMED_OBJECT(
+	UVERBS_OBJECT_QP,
+	UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uqp_object), uverbs_free_qp));
 
 DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_MW,
-			    &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_mw));
+			    UVERBS_TYPE_ALLOC_IDR(uverbs_free_mw));
 
-DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_SRQ,
-			    &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_usrq_object), 0,
-						      uverbs_free_srq));
+DECLARE_UVERBS_NAMED_OBJECT(
+	UVERBS_OBJECT_SRQ,
+	UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_usrq_object),
+				 uverbs_free_srq));
 
 DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_AH,
-			    &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_ah));
+			    UVERBS_TYPE_ALLOC_IDR(uverbs_free_ah));
 
-DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_FLOW,
-			    &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uflow_object),
-						      0, uverbs_free_flow));
+DECLARE_UVERBS_NAMED_OBJECT(
+	UVERBS_OBJECT_FLOW,
+	UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uflow_object),
+				 uverbs_free_flow));
 
-DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_WQ,
-			    &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uwq_object), 0,
-						      uverbs_free_wq));
+DECLARE_UVERBS_NAMED_OBJECT(
+	UVERBS_OBJECT_WQ,
+	UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uwq_object), uverbs_free_wq));
 
 DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_RWQ_IND_TBL,
-			    &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_rwq_ind_tbl));
+			    UVERBS_TYPE_ALLOC_IDR(uverbs_free_rwq_ind_tbl));
 
-DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_XRCD,
-			    &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uxrcd_object), 0,
-						      uverbs_free_xrcd));
+DECLARE_UVERBS_NAMED_OBJECT(
+	UVERBS_OBJECT_XRCD,
+	UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uxrcd_object),
+				 uverbs_free_xrcd));
 
 DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_PD,
-			    /* 2 is used in order to free the PD after MRs */
-			    &UVERBS_TYPE_ALLOC_IDR(2, uverbs_free_pd));
+			    UVERBS_TYPE_ALLOC_IDR(uverbs_free_pd));
 
-DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_DEVICE, NULL);
+DECLARE_UVERBS_GLOBAL_METHODS(UVERBS_OBJECT_DEVICE);
 
-static DECLARE_UVERBS_OBJECT_TREE(uverbs_default_objects,
-				  &UVERBS_OBJECT(UVERBS_OBJECT_DEVICE),
-				  &UVERBS_OBJECT(UVERBS_OBJECT_PD),
-				  &UVERBS_OBJECT(UVERBS_OBJECT_MR),
-				  &UVERBS_OBJECT(UVERBS_OBJECT_COMP_CHANNEL),
-				  &UVERBS_OBJECT(UVERBS_OBJECT_CQ),
-				  &UVERBS_OBJECT(UVERBS_OBJECT_QP),
-				  &UVERBS_OBJECT(UVERBS_OBJECT_AH),
-				  &UVERBS_OBJECT(UVERBS_OBJECT_MW),
-				  &UVERBS_OBJECT(UVERBS_OBJECT_SRQ),
-				  &UVERBS_OBJECT(UVERBS_OBJECT_FLOW),
-				  &UVERBS_OBJECT(UVERBS_OBJECT_WQ),
-				  &UVERBS_OBJECT(UVERBS_OBJECT_RWQ_IND_TBL),
-				  &UVERBS_OBJECT(UVERBS_OBJECT_XRCD),
-				  &UVERBS_OBJECT(UVERBS_OBJECT_FLOW_ACTION),
-				  &UVERBS_OBJECT(UVERBS_OBJECT_DM),
-				  &UVERBS_OBJECT(UVERBS_OBJECT_COUNTERS));
+DECLARE_UVERBS_OBJECT_TREE(uverbs_default_objects,
+			   &UVERBS_OBJECT(UVERBS_OBJECT_DEVICE),
+			   &UVERBS_OBJECT(UVERBS_OBJECT_PD),
+			   &UVERBS_OBJECT(UVERBS_OBJECT_MR),
+			   &UVERBS_OBJECT(UVERBS_OBJECT_COMP_CHANNEL),
+			   &UVERBS_OBJECT(UVERBS_OBJECT_CQ),
+			   &UVERBS_OBJECT(UVERBS_OBJECT_QP),
+			   &UVERBS_OBJECT(UVERBS_OBJECT_AH),
+			   &UVERBS_OBJECT(UVERBS_OBJECT_MW),
+			   &UVERBS_OBJECT(UVERBS_OBJECT_SRQ),
+			   &UVERBS_OBJECT(UVERBS_OBJECT_FLOW),
+			   &UVERBS_OBJECT(UVERBS_OBJECT_WQ),
+			   &UVERBS_OBJECT(UVERBS_OBJECT_RWQ_IND_TBL),
+			   &UVERBS_OBJECT(UVERBS_OBJECT_XRCD),
+			   &UVERBS_OBJECT(UVERBS_OBJECT_FLOW_ACTION),
+			   &UVERBS_OBJECT(UVERBS_OBJECT_DM),
+			   &UVERBS_OBJECT(UVERBS_OBJECT_COUNTERS));
 
 const struct uverbs_object_tree_def *uverbs_default_get_objects(void)
 {
 	return &uverbs_default_objects;
 }
-EXPORT_SYMBOL_GPL(uverbs_default_get_objects);
diff --git a/drivers/infiniband/core/uverbs_std_types_counters.c b/drivers/infiniband/core/uverbs_std_types_counters.c
index 03b182a..a0ffdcf 100644
--- a/drivers/infiniband/core/uverbs_std_types_counters.c
+++ b/drivers/infiniband/core/uverbs_std_types_counters.c
@@ -38,20 +38,22 @@ static int uverbs_free_counters(struct ib_uobject *uobject,
 				enum rdma_remove_reason why)
 {
 	struct ib_counters *counters = uobject->object;
+	int ret;
 
-	if (why == RDMA_REMOVE_DESTROY &&
-	    atomic_read(&counters->usecnt))
-		return -EBUSY;
+	ret = ib_destroy_usecnt(&counters->usecnt, why, uobject);
+	if (ret)
+		return ret;
 
 	return counters->device->destroy_counters(counters);
 }
 
-static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_CREATE)(struct ib_device *ib_dev,
-							 struct ib_uverbs_file *file,
-							 struct uverbs_attr_bundle *attrs)
+static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_CREATE)(
+	struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
 {
+	struct ib_uobject *uobj = uverbs_attr_get_uobject(
+		attrs, UVERBS_ATTR_CREATE_COUNTERS_HANDLE);
+	struct ib_device *ib_dev = uobj->context->device;
 	struct ib_counters *counters;
-	struct ib_uobject *uobj;
 	int ret;
 
 	/*
@@ -62,7 +64,6 @@ static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_CREATE)(struct ib_device *ib_de
 	if (!ib_dev->create_counters)
 		return -EOPNOTSUPP;
 
-	uobj = uverbs_attr_get_uobject(attrs, UVERBS_ATTR_CREATE_COUNTERS_HANDLE);
 	counters = ib_dev->create_counters(ib_dev, attrs);
 	if (IS_ERR(counters)) {
 		ret = PTR_ERR(counters);
@@ -80,9 +81,8 @@ static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_CREATE)(struct ib_device *ib_de
 	return ret;
 }
 
-static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_READ)(struct ib_device *ib_dev,
-						       struct ib_uverbs_file *file,
-						       struct uverbs_attr_bundle *attrs)
+static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_READ)(
+	struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
 {
 	struct ib_counters_read_attr read_attr = {};
 	const struct uverbs_attr *uattr;
@@ -90,68 +90,62 @@ static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_READ)(struct ib_device *ib_dev,
 		uverbs_attr_get_obj(attrs, UVERBS_ATTR_READ_COUNTERS_HANDLE);
 	int ret;
 
-	if (!ib_dev->read_counters)
+	if (!counters->device->read_counters)
 		return -EOPNOTSUPP;
 
 	if (!atomic_read(&counters->usecnt))
 		return -EINVAL;
 
-	ret = uverbs_copy_from(&read_attr.flags, attrs,
-			       UVERBS_ATTR_READ_COUNTERS_FLAGS);
+	ret = uverbs_get_flags32(&read_attr.flags, attrs,
+				 UVERBS_ATTR_READ_COUNTERS_FLAGS,
+				 IB_UVERBS_READ_COUNTERS_PREFER_CACHED);
 	if (ret)
 		return ret;
 
 	uattr = uverbs_attr_get(attrs, UVERBS_ATTR_READ_COUNTERS_BUFF);
 	read_attr.ncounters = uattr->ptr_attr.len / sizeof(u64);
-	read_attr.counters_buff = kcalloc(read_attr.ncounters,
-					  sizeof(u64), GFP_KERNEL);
-	if (!read_attr.counters_buff)
-		return -ENOMEM;
+	read_attr.counters_buff = uverbs_zalloc(
+		attrs, array_size(read_attr.ncounters, sizeof(u64)));
+	if (IS_ERR(read_attr.counters_buff))
+		return PTR_ERR(read_attr.counters_buff);
 
-	ret = ib_dev->read_counters(counters,
-				    &read_attr,
-				    attrs);
+	ret = counters->device->read_counters(counters, &read_attr, attrs);
 	if (ret)
-		goto err_read;
+		return ret;
 
-	ret = uverbs_copy_to(attrs, UVERBS_ATTR_READ_COUNTERS_BUFF,
-			     read_attr.counters_buff,
-			     read_attr.ncounters * sizeof(u64));
-
-err_read:
-	kfree(read_attr.counters_buff);
-	return ret;
+	return uverbs_copy_to(attrs, UVERBS_ATTR_READ_COUNTERS_BUFF,
+			      read_attr.counters_buff,
+			      read_attr.ncounters * sizeof(u64));
 }
 
-static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_COUNTERS_CREATE,
-	&UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_COUNTERS_HANDLE,
-			 UVERBS_OBJECT_COUNTERS,
-			 UVERBS_ACCESS_NEW,
-			 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
+DECLARE_UVERBS_NAMED_METHOD(
+	UVERBS_METHOD_COUNTERS_CREATE,
+	UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_COUNTERS_HANDLE,
+			UVERBS_OBJECT_COUNTERS,
+			UVERBS_ACCESS_NEW,
+			UA_MANDATORY));
 
-static DECLARE_UVERBS_NAMED_METHOD_WITH_HANDLER(UVERBS_METHOD_COUNTERS_DESTROY,
-	uverbs_destroy_def_handler,
-	&UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_COUNTERS_HANDLE,
-			 UVERBS_OBJECT_COUNTERS,
-			 UVERBS_ACCESS_DESTROY,
-			 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
+	UVERBS_METHOD_COUNTERS_DESTROY,
+	UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_COUNTERS_HANDLE,
+			UVERBS_OBJECT_COUNTERS,
+			UVERBS_ACCESS_DESTROY,
+			UA_MANDATORY));
 
-#define MAX_COUNTERS_BUFF_SIZE USHRT_MAX
-static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_COUNTERS_READ,
-	&UVERBS_ATTR_IDR(UVERBS_ATTR_READ_COUNTERS_HANDLE,
-			 UVERBS_OBJECT_COUNTERS,
-			 UVERBS_ACCESS_READ,
-			 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
-	&UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_READ_COUNTERS_BUFF,
-			     UVERBS_ATTR_SIZE(0, MAX_COUNTERS_BUFF_SIZE),
-			     UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
-	&UVERBS_ATTR_PTR_IN(UVERBS_ATTR_READ_COUNTERS_FLAGS,
-			    UVERBS_ATTR_TYPE(__u32),
-			    UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
+DECLARE_UVERBS_NAMED_METHOD(
+	UVERBS_METHOD_COUNTERS_READ,
+	UVERBS_ATTR_IDR(UVERBS_ATTR_READ_COUNTERS_HANDLE,
+			UVERBS_OBJECT_COUNTERS,
+			UVERBS_ACCESS_READ,
+			UA_MANDATORY),
+	UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_READ_COUNTERS_BUFF,
+			    UVERBS_ATTR_MIN_SIZE(0),
+			    UA_MANDATORY),
+	UVERBS_ATTR_FLAGS_IN(UVERBS_ATTR_READ_COUNTERS_FLAGS,
+			     enum ib_uverbs_read_counters_flags));
 
 DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_COUNTERS,
-			    &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_counters),
+			    UVERBS_TYPE_ALLOC_IDR(uverbs_free_counters),
 			    &UVERBS_METHOD(UVERBS_METHOD_COUNTERS_CREATE),
 			    &UVERBS_METHOD(UVERBS_METHOD_COUNTERS_DESTROY),
 			    &UVERBS_METHOD(UVERBS_METHOD_COUNTERS_READ));
-
diff --git a/drivers/infiniband/core/uverbs_std_types_cq.c b/drivers/infiniband/core/uverbs_std_types_cq.c
index 3d293d0..5b5f205 100644
--- a/drivers/infiniband/core/uverbs_std_types_cq.c
+++ b/drivers/infiniband/core/uverbs_std_types_cq.c
@@ -44,21 +44,26 @@ static int uverbs_free_cq(struct ib_uobject *uobject,
 	int ret;
 
 	ret = ib_destroy_cq(cq);
-	if (!ret || why != RDMA_REMOVE_DESTROY)
-		ib_uverbs_release_ucq(uobject->context->ufile, ev_queue ?
-				      container_of(ev_queue,
-						   struct ib_uverbs_completion_event_file,
-						   ev_queue) : NULL,
-				      ucq);
+	if (ib_is_destroy_retryable(ret, why, uobject))
+		return ret;
+
+	ib_uverbs_release_ucq(
+		uobject->context->ufile,
+		ev_queue ? container_of(ev_queue,
+					struct ib_uverbs_completion_event_file,
+					ev_queue) :
+			   NULL,
+		ucq);
 	return ret;
 }
 
-static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(struct ib_device *ib_dev,
-						   struct ib_uverbs_file *file,
-						   struct uverbs_attr_bundle *attrs)
+static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(
+	struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
 {
-	struct ib_ucontext *ucontext = file->ucontext;
-	struct ib_ucq_object           *obj;
+	struct ib_ucq_object *obj = container_of(
+		uverbs_attr_get_uobject(attrs, UVERBS_ATTR_CREATE_CQ_HANDLE),
+		typeof(*obj), uobject);
+	struct ib_device *ib_dev = obj->uobject.context->device;
 	struct ib_udata uhw;
 	int ret;
 	u64 user_handle;
@@ -67,7 +72,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(struct ib_device *ib_dev,
 	struct ib_uverbs_completion_event_file    *ev_file = NULL;
 	struct ib_uobject *ev_file_uobj;
 
-	if (!(ib_dev->uverbs_cmd_mask & 1ULL << IB_USER_VERBS_CMD_CREATE_CQ))
+	if (!ib_dev->create_cq || !ib_dev->destroy_cq)
 		return -EOPNOTSUPP;
 
 	ret = uverbs_copy_from(&attr.comp_vector, attrs,
@@ -81,28 +86,26 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(struct ib_device *ib_dev,
 	if (ret)
 		return ret;
 
-	/* Optional param, if it doesn't exist, we get -ENOENT and skip it */
-	if (IS_UVERBS_COPY_ERR(uverbs_copy_from(&attr.flags, attrs,
-						UVERBS_ATTR_CREATE_CQ_FLAGS)))
-		return -EFAULT;
+	ret = uverbs_get_flags32(&attr.flags, attrs,
+				 UVERBS_ATTR_CREATE_CQ_FLAGS,
+				 IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION |
+					 IB_UVERBS_CQ_FLAGS_IGNORE_OVERRUN);
+	if (ret)
+		return ret;
 
 	ev_file_uobj = uverbs_attr_get_uobject(attrs, UVERBS_ATTR_CREATE_CQ_COMP_CHANNEL);
 	if (!IS_ERR(ev_file_uobj)) {
 		ev_file = container_of(ev_file_uobj,
 				       struct ib_uverbs_completion_event_file,
-				       uobj_file.uobj);
+				       uobj);
 		uverbs_uobject_get(ev_file_uobj);
 	}
 
-	if (attr.comp_vector >= ucontext->ufile->device->num_comp_vectors) {
+	if (attr.comp_vector >= file->device->num_comp_vectors) {
 		ret = -EINVAL;
 		goto err_event_file;
 	}
 
-	obj = container_of(uverbs_attr_get_uobject(attrs,
-						   UVERBS_ATTR_CREATE_CQ_HANDLE),
-			   typeof(*obj), uobject);
-	obj->uverbs_file	   = ucontext->ufile;
 	obj->comp_events_reported  = 0;
 	obj->async_events_reported = 0;
 	INIT_LIST_HEAD(&obj->comp_list);
@@ -111,7 +114,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(struct ib_device *ib_dev,
 	/* Temporary, only until drivers get the new uverbs_attr_bundle */
 	create_udata(attrs, &uhw);
 
-	cq = ib_dev->create_cq(ib_dev, &attr, ucontext, &uhw);
+	cq = ib_dev->create_cq(ib_dev, &attr, obj->uobject.context, &uhw);
 	if (IS_ERR(cq)) {
 		ret = PTR_ERR(cq);
 		goto err_event_file;
@@ -143,69 +146,64 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(struct ib_device *ib_dev,
 	return ret;
 };
 
-static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_CQ_CREATE,
-	&UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_CQ_HANDLE, UVERBS_OBJECT_CQ,
-			 UVERBS_ACCESS_NEW,
-			 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
-	&UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_CQE,
+DECLARE_UVERBS_NAMED_METHOD(
+	UVERBS_METHOD_CQ_CREATE,
+	UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_CQ_HANDLE,
+			UVERBS_OBJECT_CQ,
+			UVERBS_ACCESS_NEW,
+			UA_MANDATORY),
+	UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_CQE,
+			   UVERBS_ATTR_TYPE(u32),
+			   UA_MANDATORY),
+	UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_USER_HANDLE,
+			   UVERBS_ATTR_TYPE(u64),
+			   UA_MANDATORY),
+	UVERBS_ATTR_FD(UVERBS_ATTR_CREATE_CQ_COMP_CHANNEL,
+		       UVERBS_OBJECT_COMP_CHANNEL,
+		       UVERBS_ACCESS_READ,
+		       UA_OPTIONAL),
+	UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_COMP_VECTOR,
+			   UVERBS_ATTR_TYPE(u32),
+			   UA_MANDATORY),
+	UVERBS_ATTR_FLAGS_IN(UVERBS_ATTR_CREATE_CQ_FLAGS,
+			     enum ib_uverbs_ex_create_cq_flags),
+	UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_CREATE_CQ_RESP_CQE,
 			    UVERBS_ATTR_TYPE(u32),
-			    UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
-	&UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_USER_HANDLE,
-			    UVERBS_ATTR_TYPE(u64),
-			    UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
-	&UVERBS_ATTR_FD(UVERBS_ATTR_CREATE_CQ_COMP_CHANNEL,
-			UVERBS_OBJECT_COMP_CHANNEL,
-			UVERBS_ACCESS_READ),
-	&UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_COMP_VECTOR, UVERBS_ATTR_TYPE(u32),
-			    UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
-	&UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_FLAGS, UVERBS_ATTR_TYPE(u32)),
-	&UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_CREATE_CQ_RESP_CQE, UVERBS_ATTR_TYPE(u32),
-			     UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
-	&uverbs_uhw_compat_in, &uverbs_uhw_compat_out);
+			    UA_MANDATORY),
+	UVERBS_ATTR_UHW());
 
-static int UVERBS_HANDLER(UVERBS_METHOD_CQ_DESTROY)(struct ib_device *ib_dev,
-						    struct ib_uverbs_file *file,
-						    struct uverbs_attr_bundle *attrs)
+static int UVERBS_HANDLER(UVERBS_METHOD_CQ_DESTROY)(
+	struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
 {
 	struct ib_uobject *uobj =
 		uverbs_attr_get_uobject(attrs, UVERBS_ATTR_DESTROY_CQ_HANDLE);
-	struct ib_uverbs_destroy_cq_resp resp;
-	struct ib_ucq_object *obj;
-	int ret;
-
-	if (IS_ERR(uobj))
-		return PTR_ERR(uobj);
-
-	obj = container_of(uobj, struct ib_ucq_object, uobject);
-
-	if (!(ib_dev->uverbs_cmd_mask & 1ULL << IB_USER_VERBS_CMD_DESTROY_CQ))
-		return -EOPNOTSUPP;
-
-	ret = rdma_explicit_destroy(uobj);
-	if (ret)
-		return ret;
-
-	resp.comp_events_reported  = obj->comp_events_reported;
-	resp.async_events_reported = obj->async_events_reported;
+	struct ib_ucq_object *obj =
+		container_of(uobj, struct ib_ucq_object, uobject);
+	struct ib_uverbs_destroy_cq_resp resp = {
+		.comp_events_reported = obj->comp_events_reported,
+		.async_events_reported = obj->async_events_reported
+	};
 
 	return uverbs_copy_to(attrs, UVERBS_ATTR_DESTROY_CQ_RESP, &resp,
 			      sizeof(resp));
 }
 
-static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_CQ_DESTROY,
-	&UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_CQ_HANDLE, UVERBS_OBJECT_CQ,
-			 UVERBS_ACCESS_DESTROY,
-			 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
-	&UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_DESTROY_CQ_RESP,
-			     UVERBS_ATTR_TYPE(struct ib_uverbs_destroy_cq_resp),
-			     UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
+DECLARE_UVERBS_NAMED_METHOD(
+	UVERBS_METHOD_CQ_DESTROY,
+	UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_CQ_HANDLE,
+			UVERBS_OBJECT_CQ,
+			UVERBS_ACCESS_DESTROY,
+			UA_MANDATORY),
+	UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_DESTROY_CQ_RESP,
+			    UVERBS_ATTR_TYPE(struct ib_uverbs_destroy_cq_resp),
+			    UA_MANDATORY));
 
-DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_CQ,
-			    &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_ucq_object), 0,
-						      uverbs_free_cq),
+DECLARE_UVERBS_NAMED_OBJECT(
+	UVERBS_OBJECT_CQ,
+	UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_ucq_object), uverbs_free_cq),
+
 #if IS_ENABLED(CONFIG_INFINIBAND_EXP_LEGACY_VERBS_NEW_UAPI)
-			    &UVERBS_METHOD(UVERBS_METHOD_CQ_CREATE),
-			    &UVERBS_METHOD(UVERBS_METHOD_CQ_DESTROY)
+	&UVERBS_METHOD(UVERBS_METHOD_CQ_CREATE),
+	&UVERBS_METHOD(UVERBS_METHOD_CQ_DESTROY)
 #endif
-			   );
-
+);
diff --git a/drivers/infiniband/core/uverbs_std_types_dm.c b/drivers/infiniband/core/uverbs_std_types_dm.c
index 8b68157..edc3ff7 100644
--- a/drivers/infiniband/core/uverbs_std_types_dm.c
+++ b/drivers/infiniband/core/uverbs_std_types_dm.c
@@ -37,20 +37,24 @@ static int uverbs_free_dm(struct ib_uobject *uobject,
 			  enum rdma_remove_reason why)
 {
 	struct ib_dm *dm = uobject->object;
+	int ret;
 
-	if (why == RDMA_REMOVE_DESTROY && atomic_read(&dm->usecnt))
-		return -EBUSY;
+	ret = ib_destroy_usecnt(&dm->usecnt, why, uobject);
+	if (ret)
+		return ret;
 
 	return dm->device->dealloc_dm(dm);
 }
 
-static int UVERBS_HANDLER(UVERBS_METHOD_DM_ALLOC)(struct ib_device *ib_dev,
-						  struct ib_uverbs_file *file,
-						  struct uverbs_attr_bundle *attrs)
+static int
+UVERBS_HANDLER(UVERBS_METHOD_DM_ALLOC)(struct ib_uverbs_file *file,
+				       struct uverbs_attr_bundle *attrs)
 {
-	struct ib_ucontext *ucontext = file->ucontext;
 	struct ib_dm_alloc_attr attr = {};
-	struct ib_uobject *uobj;
+	struct ib_uobject *uobj =
+		uverbs_attr_get(attrs, UVERBS_ATTR_ALLOC_DM_HANDLE)
+			->obj_attr.uobject;
+	struct ib_device *ib_dev = uobj->context->device;
 	struct ib_dm *dm;
 	int ret;
 
@@ -67,9 +71,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_DM_ALLOC)(struct ib_device *ib_dev,
 	if (ret)
 		return ret;
 
-	uobj = uverbs_attr_get(attrs, UVERBS_ATTR_ALLOC_DM_HANDLE)->obj_attr.uobject;
-
-	dm = ib_dev->alloc_dm(ib_dev, ucontext, &attr, attrs);
+	dm = ib_dev->alloc_dm(ib_dev, uobj->context, &attr, attrs);
 	if (IS_ERR(dm))
 		return PTR_ERR(dm);
 
@@ -83,26 +85,27 @@ static int UVERBS_HANDLER(UVERBS_METHOD_DM_ALLOC)(struct ib_device *ib_dev,
 	return 0;
 }
 
-static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_DM_ALLOC,
-	&UVERBS_ATTR_IDR(UVERBS_ATTR_ALLOC_DM_HANDLE, UVERBS_OBJECT_DM,
-			 UVERBS_ACCESS_NEW,
-			 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
-	&UVERBS_ATTR_PTR_IN(UVERBS_ATTR_ALLOC_DM_LENGTH,
-			    UVERBS_ATTR_TYPE(u64),
-			    UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
-	&UVERBS_ATTR_PTR_IN(UVERBS_ATTR_ALLOC_DM_ALIGNMENT,
-			    UVERBS_ATTR_TYPE(u32),
-			    UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
+DECLARE_UVERBS_NAMED_METHOD(
+	UVERBS_METHOD_DM_ALLOC,
+	UVERBS_ATTR_IDR(UVERBS_ATTR_ALLOC_DM_HANDLE,
+			UVERBS_OBJECT_DM,
+			UVERBS_ACCESS_NEW,
+			UA_MANDATORY),
+	UVERBS_ATTR_PTR_IN(UVERBS_ATTR_ALLOC_DM_LENGTH,
+			   UVERBS_ATTR_TYPE(u64),
+			   UA_MANDATORY),
+	UVERBS_ATTR_PTR_IN(UVERBS_ATTR_ALLOC_DM_ALIGNMENT,
+			   UVERBS_ATTR_TYPE(u32),
+			   UA_MANDATORY));
 
-static DECLARE_UVERBS_NAMED_METHOD_WITH_HANDLER(UVERBS_METHOD_DM_FREE,
-	uverbs_destroy_def_handler,
-	&UVERBS_ATTR_IDR(UVERBS_ATTR_FREE_DM_HANDLE,
-			 UVERBS_OBJECT_DM,
-			 UVERBS_ACCESS_DESTROY,
-			 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
+	UVERBS_METHOD_DM_FREE,
+	UVERBS_ATTR_IDR(UVERBS_ATTR_FREE_DM_HANDLE,
+			UVERBS_OBJECT_DM,
+			UVERBS_ACCESS_DESTROY,
+			UA_MANDATORY));
 
 DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_DM,
-			    /* 1 is used in order to free the DM after MRs */
-			    &UVERBS_TYPE_ALLOC_IDR(1, uverbs_free_dm),
+			    UVERBS_TYPE_ALLOC_IDR(uverbs_free_dm),
 			    &UVERBS_METHOD(UVERBS_METHOD_DM_ALLOC),
 			    &UVERBS_METHOD(UVERBS_METHOD_DM_FREE));
diff --git a/drivers/infiniband/core/uverbs_std_types_flow_action.c b/drivers/infiniband/core/uverbs_std_types_flow_action.c
index a7be51c..d8cfafe2 100644
--- a/drivers/infiniband/core/uverbs_std_types_flow_action.c
+++ b/drivers/infiniband/core/uverbs_std_types_flow_action.c
@@ -37,10 +37,11 @@ static int uverbs_free_flow_action(struct ib_uobject *uobject,
 				   enum rdma_remove_reason why)
 {
 	struct ib_flow_action *action = uobject->object;
+	int ret;
 
-	if (why == RDMA_REMOVE_DESTROY &&
-	    atomic_read(&action->usecnt))
-		return -EBUSY;
+	ret = ib_destroy_usecnt(&action->usecnt, why, uobject);
+	if (ret)
+		return ret;
 
 	return action->device->destroy_flow_action(action);
 }
@@ -303,12 +304,13 @@ static int parse_flow_action_esp(struct ib_device *ib_dev,
 	return 0;
 }
 
-static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE)(struct ib_device *ib_dev,
-								struct ib_uverbs_file *file,
-								struct uverbs_attr_bundle *attrs)
+static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE)(
+	struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
 {
+	struct ib_uobject *uobj = uverbs_attr_get_uobject(
+		attrs, UVERBS_ATTR_CREATE_FLOW_ACTION_ESP_HANDLE);
+	struct ib_device *ib_dev = uobj->context->device;
 	int				  ret;
-	struct ib_uobject		  *uobj;
 	struct ib_flow_action		  *action;
 	struct ib_flow_action_esp_attr	  esp_attr = {};
 
@@ -320,7 +322,6 @@ static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE)(struct ib_device
 		return ret;
 
 	/* No need to check as this attribute is marked as MANDATORY */
-	uobj = uverbs_attr_get_uobject(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_HANDLE);
 	action = ib_dev->create_flow_action_esp(ib_dev, &esp_attr.hdr, attrs);
 	if (IS_ERR(action))
 		return PTR_ERR(action);
@@ -334,102 +335,109 @@ static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE)(struct ib_device
 	return 0;
 }
 
-static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY)(struct ib_device *ib_dev,
-								struct ib_uverbs_file *file,
-								struct uverbs_attr_bundle *attrs)
+static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY)(
+	struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
 {
+	struct ib_uobject *uobj = uverbs_attr_get_uobject(
+		attrs, UVERBS_ATTR_MODIFY_FLOW_ACTION_ESP_HANDLE);
+	struct ib_flow_action *action = uobj->object;
 	int				  ret;
-	struct ib_uobject		  *uobj;
-	struct ib_flow_action		  *action;
 	struct ib_flow_action_esp_attr	  esp_attr = {};
 
-	if (!ib_dev->modify_flow_action_esp)
+	if (!action->device->modify_flow_action_esp)
 		return -EOPNOTSUPP;
 
-	ret = parse_flow_action_esp(ib_dev, file, attrs, &esp_attr, true);
+	ret = parse_flow_action_esp(action->device, file, attrs, &esp_attr,
+				    true);
 	if (ret)
 		return ret;
 
-	uobj = uverbs_attr_get_uobject(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_HANDLE);
-	action = uobj->object;
-
 	if (action->type != IB_FLOW_ACTION_ESP)
 		return -EINVAL;
 
-	return ib_dev->modify_flow_action_esp(action,
-					      &esp_attr.hdr,
-					      attrs);
+	return action->device->modify_flow_action_esp(action, &esp_attr.hdr,
+						      attrs);
 }
 
 static const struct uverbs_attr_spec uverbs_flow_action_esp_keymat[] = {
 	[IB_UVERBS_FLOW_ACTION_ESP_KEYMAT_AES_GCM] = {
-		{ .ptr = {
-			.type = UVERBS_ATTR_TYPE_PTR_IN,
-			UVERBS_ATTR_TYPE(struct ib_uverbs_flow_action_esp_keymat_aes_gcm),
-			.flags = UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO,
-		} },
+		.type = UVERBS_ATTR_TYPE_PTR_IN,
+		UVERBS_ATTR_STRUCT(
+			struct ib_uverbs_flow_action_esp_keymat_aes_gcm,
+			aes_key),
 	},
 };
 
 static const struct uverbs_attr_spec uverbs_flow_action_esp_replay[] = {
 	[IB_UVERBS_FLOW_ACTION_ESP_REPLAY_NONE] = {
-		{ .ptr = {
-			.type = UVERBS_ATTR_TYPE_PTR_IN,
-			/* No need to specify any data */
-			.len = 0,
-		} }
+		.type = UVERBS_ATTR_TYPE_PTR_IN,
+		UVERBS_ATTR_NO_DATA(),
 	},
 	[IB_UVERBS_FLOW_ACTION_ESP_REPLAY_BMP] = {
-		{ .ptr = {
-			.type = UVERBS_ATTR_TYPE_PTR_IN,
-			UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp_replay_bmp, size),
-			.flags = UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO,
-		} }
+		.type = UVERBS_ATTR_TYPE_PTR_IN,
+		UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp_replay_bmp,
+				   size),
 	},
 };
 
-static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE,
-	&UVERBS_ATTR_IDR(UVERBS_ATTR_FLOW_ACTION_ESP_HANDLE, UVERBS_OBJECT_FLOW_ACTION,
-			 UVERBS_ACCESS_NEW,
-			 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
-	&UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS,
-			    UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp, hard_limit_pkts),
-			    UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY |
-				     UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)),
-	&UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ESN, UVERBS_ATTR_TYPE(__u32)),
-	&UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT,
-			     uverbs_flow_action_esp_keymat,
-			     UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
-	&UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY,
-			     uverbs_flow_action_esp_replay),
-	&UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP,
-			    UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp_encap, type)));
+DECLARE_UVERBS_NAMED_METHOD(
+	UVERBS_METHOD_FLOW_ACTION_ESP_CREATE,
+	UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_FLOW_ACTION_ESP_HANDLE,
+			UVERBS_OBJECT_FLOW_ACTION,
+			UVERBS_ACCESS_NEW,
+			UA_MANDATORY),
+	UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS,
+			   UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp,
+					      hard_limit_pkts),
+			   UA_MANDATORY),
+	UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ESN,
+			   UVERBS_ATTR_TYPE(__u32),
+			   UA_OPTIONAL),
+	UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT,
+			    uverbs_flow_action_esp_keymat,
+			    UA_MANDATORY),
+	UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY,
+			    uverbs_flow_action_esp_replay,
+			    UA_OPTIONAL),
+	UVERBS_ATTR_PTR_IN(
+		UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP,
+		UVERBS_ATTR_TYPE(struct ib_uverbs_flow_action_esp_encap),
+		UA_OPTIONAL));
 
-static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY,
-	&UVERBS_ATTR_IDR(UVERBS_ATTR_FLOW_ACTION_ESP_HANDLE, UVERBS_OBJECT_FLOW_ACTION,
-			 UVERBS_ACCESS_WRITE,
-			 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
-	&UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS,
-			    UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp, hard_limit_pkts),
-			    UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)),
-	&UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ESN, UVERBS_ATTR_TYPE(__u32)),
-	&UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT,
-			     uverbs_flow_action_esp_keymat),
-	&UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY,
-			     uverbs_flow_action_esp_replay),
-	&UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP,
-			    UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp_encap, type)));
+DECLARE_UVERBS_NAMED_METHOD(
+	UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY,
+	UVERBS_ATTR_IDR(UVERBS_ATTR_MODIFY_FLOW_ACTION_ESP_HANDLE,
+			UVERBS_OBJECT_FLOW_ACTION,
+			UVERBS_ACCESS_WRITE,
+			UA_MANDATORY),
+	UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS,
+			   UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp,
+					      hard_limit_pkts),
+			   UA_OPTIONAL),
+	UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ESN,
+			   UVERBS_ATTR_TYPE(__u32),
+			   UA_OPTIONAL),
+	UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT,
+			    uverbs_flow_action_esp_keymat,
+			    UA_OPTIONAL),
+	UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY,
+			    uverbs_flow_action_esp_replay,
+			    UA_OPTIONAL),
+	UVERBS_ATTR_PTR_IN(
+		UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP,
+		UVERBS_ATTR_TYPE(struct ib_uverbs_flow_action_esp_encap),
+		UA_OPTIONAL));
 
-static DECLARE_UVERBS_NAMED_METHOD_WITH_HANDLER(UVERBS_METHOD_FLOW_ACTION_DESTROY,
-	uverbs_destroy_def_handler,
-	&UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_FLOW_ACTION_HANDLE,
-			 UVERBS_OBJECT_FLOW_ACTION,
-			 UVERBS_ACCESS_DESTROY,
-			 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
+	UVERBS_METHOD_FLOW_ACTION_DESTROY,
+	UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_FLOW_ACTION_HANDLE,
+			UVERBS_OBJECT_FLOW_ACTION,
+			UVERBS_ACCESS_DESTROY,
+			UA_MANDATORY));
 
-DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_FLOW_ACTION,
-			    &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_flow_action),
-			    &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE),
-			    &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_DESTROY),
-			    &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY));
-
+DECLARE_UVERBS_NAMED_OBJECT(
+	UVERBS_OBJECT_FLOW_ACTION,
+	UVERBS_TYPE_ALLOC_IDR(uverbs_free_flow_action),
+	&UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE),
+	&UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_DESTROY),
+	&UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY));
diff --git a/drivers/infiniband/core/uverbs_std_types_mr.c b/drivers/infiniband/core/uverbs_std_types_mr.c
index 68f7cad..cf02e77 100644
--- a/drivers/infiniband/core/uverbs_std_types_mr.c
+++ b/drivers/infiniband/core/uverbs_std_types_mr.c
@@ -39,14 +39,18 @@ static int uverbs_free_mr(struct ib_uobject *uobject,
 	return ib_dereg_mr((struct ib_mr *)uobject->object);
 }
 
-static int UVERBS_HANDLER(UVERBS_METHOD_DM_MR_REG)(struct ib_device *ib_dev,
-						   struct ib_uverbs_file *file,
-						   struct uverbs_attr_bundle *attrs)
+static int UVERBS_HANDLER(UVERBS_METHOD_DM_MR_REG)(
+	struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
 {
 	struct ib_dm_mr_attr attr = {};
-	struct ib_uobject *uobj;
-	struct ib_dm *dm;
-	struct ib_pd *pd;
+	struct ib_uobject *uobj =
+		uverbs_attr_get_uobject(attrs, UVERBS_ATTR_REG_DM_MR_HANDLE);
+	struct ib_dm *dm =
+		uverbs_attr_get_obj(attrs, UVERBS_ATTR_REG_DM_MR_DM_HANDLE);
+	struct ib_pd *pd =
+		uverbs_attr_get_obj(attrs, UVERBS_ATTR_REG_DM_MR_PD_HANDLE);
+	struct ib_device *ib_dev = pd->device;
+
 	struct ib_mr *mr;
 	int ret;
 
@@ -62,8 +66,9 @@ static int UVERBS_HANDLER(UVERBS_METHOD_DM_MR_REG)(struct ib_device *ib_dev,
 	if (ret)
 		return ret;
 
-	ret = uverbs_copy_from(&attr.access_flags, attrs,
-			       UVERBS_ATTR_REG_DM_MR_ACCESS_FLAGS);
+	ret = uverbs_get_flags32(&attr.access_flags, attrs,
+				 UVERBS_ATTR_REG_DM_MR_ACCESS_FLAGS,
+				 IB_ACCESS_SUPPORTED);
 	if (ret)
 		return ret;
 
@@ -74,12 +79,6 @@ static int UVERBS_HANDLER(UVERBS_METHOD_DM_MR_REG)(struct ib_device *ib_dev,
 	if (ret)
 		return ret;
 
-	pd = uverbs_attr_get_obj(attrs, UVERBS_ATTR_REG_DM_MR_PD_HANDLE);
-
-	dm = uverbs_attr_get_obj(attrs, UVERBS_ATTR_REG_DM_MR_DM_HANDLE);
-
-	uobj = uverbs_attr_get(attrs, UVERBS_ATTR_REG_DM_MR_HANDLE)->obj_attr.uobject;
-
 	if (attr.offset > dm->length || attr.length > dm->length ||
 	    attr.length > dm->length - attr.offset)
 		return -EINVAL;
@@ -115,33 +114,36 @@ static int UVERBS_HANDLER(UVERBS_METHOD_DM_MR_REG)(struct ib_device *ib_dev,
 	return ret;
 }
 
-static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_DM_MR_REG,
-	&UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_HANDLE, UVERBS_OBJECT_MR,
-			 UVERBS_ACCESS_NEW,
-			 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
-	&UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DM_MR_OFFSET,
-			    UVERBS_ATTR_TYPE(u64),
-			    UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
-	&UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DM_MR_LENGTH,
-			    UVERBS_ATTR_TYPE(u64),
-			    UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
-	&UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_PD_HANDLE, UVERBS_OBJECT_PD,
-			 UVERBS_ACCESS_READ,
-			 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
-	&UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DM_MR_ACCESS_FLAGS,
+DECLARE_UVERBS_NAMED_METHOD(
+	UVERBS_METHOD_DM_MR_REG,
+	UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_HANDLE,
+			UVERBS_OBJECT_MR,
+			UVERBS_ACCESS_NEW,
+			UA_MANDATORY),
+	UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DM_MR_OFFSET,
+			   UVERBS_ATTR_TYPE(u64),
+			   UA_MANDATORY),
+	UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DM_MR_LENGTH,
+			   UVERBS_ATTR_TYPE(u64),
+			   UA_MANDATORY),
+	UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_PD_HANDLE,
+			UVERBS_OBJECT_PD,
+			UVERBS_ACCESS_READ,
+			UA_MANDATORY),
+	UVERBS_ATTR_FLAGS_IN(UVERBS_ATTR_REG_DM_MR_ACCESS_FLAGS,
+			     enum ib_access_flags),
+	UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_DM_HANDLE,
+			UVERBS_OBJECT_DM,
+			UVERBS_ACCESS_READ,
+			UA_MANDATORY),
+	UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_REG_DM_MR_RESP_LKEY,
 			    UVERBS_ATTR_TYPE(u32),
-			    UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
-	&UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_DM_HANDLE, UVERBS_OBJECT_DM,
-			 UVERBS_ACCESS_READ,
-			 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
-	&UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_REG_DM_MR_RESP_LKEY,
-			     UVERBS_ATTR_TYPE(u32),
-			     UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
-	&UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_REG_DM_MR_RESP_RKEY,
-			     UVERBS_ATTR_TYPE(u32),
-			     UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
+			    UA_MANDATORY),
+	UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_REG_DM_MR_RESP_RKEY,
+			    UVERBS_ATTR_TYPE(u32),
+			    UA_MANDATORY));
 
-DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_MR,
-			    /* 1 is used in order to free the MR after all the MWs */
-			    &UVERBS_TYPE_ALLOC_IDR(1, uverbs_free_mr),
-			    &UVERBS_METHOD(UVERBS_METHOD_DM_MR_REG));
+DECLARE_UVERBS_NAMED_OBJECT(
+	UVERBS_OBJECT_MR,
+	UVERBS_TYPE_ALLOC_IDR(uverbs_free_mr),
+	&UVERBS_METHOD(UVERBS_METHOD_DM_MR_REG));
diff --git a/drivers/infiniband/core/uverbs_uapi.c b/drivers/infiniband/core/uverbs_uapi.c
new file mode 100644
index 0000000..73ea6f0
--- /dev/null
+++ b/drivers/infiniband/core/uverbs_uapi.c
@@ -0,0 +1,346 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2017, Mellanox Technologies inc.  All rights reserved.
+ */
+#include <rdma/uverbs_ioctl.h>
+#include <rdma/rdma_user_ioctl.h>
+#include <linux/bitops.h>
+#include "rdma_core.h"
+#include "uverbs.h"
+
+static void *uapi_add_elm(struct uverbs_api *uapi, u32 key, size_t alloc_size)
+{
+	void *elm;
+	int rc;
+
+	if (key == UVERBS_API_KEY_ERR)
+		return ERR_PTR(-EOVERFLOW);
+
+	elm = kzalloc(alloc_size, GFP_KERNEL);
+	rc = radix_tree_insert(&uapi->radix, key, elm);
+	if (rc) {
+		kfree(elm);
+		return ERR_PTR(rc);
+	}
+
+	return elm;
+}
+
+static int uapi_merge_method(struct uverbs_api *uapi,
+			     struct uverbs_api_object *obj_elm, u32 obj_key,
+			     const struct uverbs_method_def *method,
+			     bool is_driver)
+{
+	u32 method_key = obj_key | uapi_key_ioctl_method(method->id);
+	struct uverbs_api_ioctl_method *method_elm;
+	unsigned int i;
+
+	if (!method->attrs)
+		return 0;
+
+	method_elm = uapi_add_elm(uapi, method_key, sizeof(*method_elm));
+	if (IS_ERR(method_elm)) {
+		if (method_elm != ERR_PTR(-EEXIST))
+			return PTR_ERR(method_elm);
+
+		/*
+		 * This occurs when a driver uses ADD_UVERBS_ATTRIBUTES_SIMPLE
+		 */
+		if (WARN_ON(method->handler))
+			return -EINVAL;
+		method_elm = radix_tree_lookup(&uapi->radix, method_key);
+		if (WARN_ON(!method_elm))
+			return -EINVAL;
+	} else {
+		WARN_ON(!method->handler);
+		rcu_assign_pointer(method_elm->handler, method->handler);
+		if (method->handler != uverbs_destroy_def_handler)
+			method_elm->driver_method = is_driver;
+	}
+
+	for (i = 0; i != method->num_attrs; i++) {
+		const struct uverbs_attr_def *attr = (*method->attrs)[i];
+		struct uverbs_api_attr *attr_slot;
+
+		if (!attr)
+			continue;
+
+		/*
+		 * ENUM_IN contains the 'ids' pointer to the driver's .rodata,
+		 * so if it is specified by a driver then it always makes this
+		 * into a driver method.
+		 */
+		if (attr->attr.type == UVERBS_ATTR_TYPE_ENUM_IN)
+			method_elm->driver_method |= is_driver;
+
+		attr_slot =
+			uapi_add_elm(uapi, method_key | uapi_key_attr(attr->id),
+				     sizeof(*attr_slot));
+		/* Attributes are not allowed to be modified by drivers */
+		if (IS_ERR(attr_slot))
+			return PTR_ERR(attr_slot);
+
+		attr_slot->spec = attr->attr;
+	}
+
+	return 0;
+}
+
+static int uapi_merge_tree(struct uverbs_api *uapi,
+			   const struct uverbs_object_tree_def *tree,
+			   bool is_driver)
+{
+	unsigned int i, j;
+	int rc;
+
+	if (!tree->objects)
+		return 0;
+
+	for (i = 0; i != tree->num_objects; i++) {
+		const struct uverbs_object_def *obj = (*tree->objects)[i];
+		struct uverbs_api_object *obj_elm;
+		u32 obj_key;
+
+		if (!obj)
+			continue;
+
+		obj_key = uapi_key_obj(obj->id);
+		obj_elm = uapi_add_elm(uapi, obj_key, sizeof(*obj_elm));
+		if (IS_ERR(obj_elm)) {
+			if (obj_elm != ERR_PTR(-EEXIST))
+				return PTR_ERR(obj_elm);
+
+			/* This occurs when a driver uses ADD_UVERBS_METHODS */
+			if (WARN_ON(obj->type_attrs))
+				return -EINVAL;
+			obj_elm = radix_tree_lookup(&uapi->radix, obj_key);
+			if (WARN_ON(!obj_elm))
+				return -EINVAL;
+		} else {
+			obj_elm->type_attrs = obj->type_attrs;
+			if (obj->type_attrs) {
+				obj_elm->type_class =
+					obj->type_attrs->type_class;
+				/*
+				 * Today drivers are only permitted to use
+				 * idr_class types. They cannot use FD types
+				 * because we currently have no way to revoke
+				 * the fops pointer after device
+				 * disassociation.
+				 */
+				if (WARN_ON(is_driver &&
+					    obj->type_attrs->type_class !=
+						    &uverbs_idr_class))
+					return -EINVAL;
+			}
+		}
+
+		if (!obj->methods)
+			continue;
+
+		for (j = 0; j != obj->num_methods; j++) {
+			const struct uverbs_method_def *method =
+				(*obj->methods)[j];
+			if (!method)
+				continue;
+
+			rc = uapi_merge_method(uapi, obj_elm, obj_key, method,
+					       is_driver);
+			if (rc)
+				return rc;
+		}
+	}
+
+	return 0;
+}
+
+static int
+uapi_finalize_ioctl_method(struct uverbs_api *uapi,
+			   struct uverbs_api_ioctl_method *method_elm,
+			   u32 method_key)
+{
+	struct radix_tree_iter iter;
+	unsigned int num_attrs = 0;
+	unsigned int max_bkey = 0;
+	bool single_uobj = false;
+	void __rcu **slot;
+
+	method_elm->destroy_bkey = UVERBS_API_ATTR_BKEY_LEN;
+	radix_tree_for_each_slot (slot, &uapi->radix, &iter,
+				  uapi_key_attrs_start(method_key)) {
+		struct uverbs_api_attr *elm =
+			rcu_dereference_protected(*slot, true);
+		u32 attr_key = iter.index & UVERBS_API_ATTR_KEY_MASK;
+		u32 attr_bkey = uapi_bkey_attr(attr_key);
+		u8 type = elm->spec.type;
+
+		if (uapi_key_attr_to_method(iter.index) !=
+		    uapi_key_attr_to_method(method_key))
+			break;
+
+		if (elm->spec.mandatory)
+			__set_bit(attr_bkey, method_elm->attr_mandatory);
+
+		if (type == UVERBS_ATTR_TYPE_IDR ||
+		    type == UVERBS_ATTR_TYPE_FD) {
+			u8 access = elm->spec.u.obj.access;
+
+			/*
+			 * Verbs specs may only have one NEW/DESTROY, we don't
+			 * have the infrastructure to abort multiple NEW's or
+			 * cope with multiple DESTROY failure.
+			 */
+			if (access == UVERBS_ACCESS_NEW ||
+			    access == UVERBS_ACCESS_DESTROY) {
+				if (WARN_ON(single_uobj))
+					return -EINVAL;
+
+				single_uobj = true;
+				if (WARN_ON(!elm->spec.mandatory))
+					return -EINVAL;
+			}
+
+			if (access == UVERBS_ACCESS_DESTROY)
+				method_elm->destroy_bkey = attr_bkey;
+		}
+
+		max_bkey = max(max_bkey, attr_bkey);
+		num_attrs++;
+	}
+
+	method_elm->key_bitmap_len = max_bkey + 1;
+	WARN_ON(method_elm->key_bitmap_len > UVERBS_API_ATTR_BKEY_LEN);
+
+	uapi_compute_bundle_size(method_elm, num_attrs);
+	return 0;
+}
+
+static int uapi_finalize(struct uverbs_api *uapi)
+{
+	struct radix_tree_iter iter;
+	void __rcu **slot;
+	int rc;
+
+	radix_tree_for_each_slot (slot, &uapi->radix, &iter, 0) {
+		struct uverbs_api_ioctl_method *method_elm =
+			rcu_dereference_protected(*slot, true);
+
+		if (uapi_key_is_ioctl_method(iter.index)) {
+			rc = uapi_finalize_ioctl_method(uapi, method_elm,
+							iter.index);
+			if (rc)
+				return rc;
+		}
+	}
+
+	return 0;
+}
+
+void uverbs_destroy_api(struct uverbs_api *uapi)
+{
+	struct radix_tree_iter iter;
+	void __rcu **slot;
+
+	if (!uapi)
+		return;
+
+	radix_tree_for_each_slot (slot, &uapi->radix, &iter, 0) {
+		kfree(rcu_dereference_protected(*slot, true));
+		radix_tree_iter_delete(&uapi->radix, &iter, slot);
+	}
+}
+
+struct uverbs_api *uverbs_alloc_api(
+	const struct uverbs_object_tree_def *const *driver_specs,
+	enum rdma_driver_id driver_id)
+{
+	struct uverbs_api *uapi;
+	int rc;
+
+	uapi = kzalloc(sizeof(*uapi), GFP_KERNEL);
+	if (!uapi)
+		return ERR_PTR(-ENOMEM);
+
+	INIT_RADIX_TREE(&uapi->radix, GFP_KERNEL);
+	uapi->driver_id = driver_id;
+
+	rc = uapi_merge_tree(uapi, uverbs_default_get_objects(), false);
+	if (rc)
+		goto err;
+
+	for (; driver_specs && *driver_specs; driver_specs++) {
+		rc = uapi_merge_tree(uapi, *driver_specs, true);
+		if (rc)
+			goto err;
+	}
+
+	rc = uapi_finalize(uapi);
+	if (rc)
+		goto err;
+
+	return uapi;
+err:
+	if (rc != -ENOMEM)
+		pr_err("Setup of uverbs_api failed, kernel parsing tree description is not valid (%d)??\n",
+		       rc);
+
+	uverbs_destroy_api(uapi);
+	return ERR_PTR(rc);
+}
+
+/*
+ * The pre version is done before destroying the HW objects, it only blocks
+ * off method access. All methods that require the ib_dev or the module data
+ * must test one of these assignments prior to continuing.
+ */
+void uverbs_disassociate_api_pre(struct ib_uverbs_device *uverbs_dev)
+{
+	struct uverbs_api *uapi = uverbs_dev->uapi;
+	struct radix_tree_iter iter;
+	void __rcu **slot;
+
+	rcu_assign_pointer(uverbs_dev->ib_dev, NULL);
+
+	radix_tree_for_each_slot (slot, &uapi->radix, &iter, 0) {
+		if (uapi_key_is_ioctl_method(iter.index)) {
+			struct uverbs_api_ioctl_method *method_elm =
+				rcu_dereference_protected(*slot, true);
+
+			if (method_elm->driver_method)
+				rcu_assign_pointer(method_elm->handler, NULL);
+		}
+	}
+
+	synchronize_srcu(&uverbs_dev->disassociate_srcu);
+}
+
+/*
+ * Called when a driver disassociates from the ib_uverbs_device. The
+ * assumption is that the driver module will unload after. Replace everything
+ * related to the driver with NULL as a safety measure.
+ */
+void uverbs_disassociate_api(struct uverbs_api *uapi)
+{
+	struct radix_tree_iter iter;
+	void __rcu **slot;
+
+	radix_tree_for_each_slot (slot, &uapi->radix, &iter, 0) {
+		if (uapi_key_is_object(iter.index)) {
+			struct uverbs_api_object *object_elm =
+				rcu_dereference_protected(*slot, true);
+
+			/*
+			 * Some type_attrs are in the driver module. We don't
+			 * bother to keep track of which since there should be
+			 * no use of this after disassociate.
+			 */
+			object_elm->type_attrs = NULL;
+		} else if (uapi_key_is_attr(iter.index)) {
+			struct uverbs_api_attr *elm =
+				rcu_dereference_protected(*slot, true);
+
+			if (elm->spec.type == UVERBS_ATTR_TYPE_ENUM_IN)
+				elm->spec.u2.enum_def.ids = NULL;
+		}
+	}
+}
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 9d6beb9..6ee03d6 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -326,12 +326,162 @@ EXPORT_SYMBOL(ib_dealloc_pd);
 
 /* Address handles */
 
+/**
+ * rdma_copy_ah_attr - Copy rdma ah attribute from source to destination.
+ * @dest:       Pointer to destination ah_attr. Contents of the destination
+ *              pointer is assumed to be invalid and attribute are overwritten.
+ * @src:        Pointer to source ah_attr.
+ */
+void rdma_copy_ah_attr(struct rdma_ah_attr *dest,
+		       const struct rdma_ah_attr *src)
+{
+	*dest = *src;
+	if (dest->grh.sgid_attr)
+		rdma_hold_gid_attr(dest->grh.sgid_attr);
+}
+EXPORT_SYMBOL(rdma_copy_ah_attr);
+
+/**
+ * rdma_replace_ah_attr - Replace valid ah_attr with new new one.
+ * @old:        Pointer to existing ah_attr which needs to be replaced.
+ *              old is assumed to be valid or zero'd
+ * @new:        Pointer to the new ah_attr.
+ *
+ * rdma_replace_ah_attr() first releases any reference in the old ah_attr if
+ * old the ah_attr is valid; after that it copies the new attribute and holds
+ * the reference to the replaced ah_attr.
+ */
+void rdma_replace_ah_attr(struct rdma_ah_attr *old,
+			  const struct rdma_ah_attr *new)
+{
+	rdma_destroy_ah_attr(old);
+	*old = *new;
+	if (old->grh.sgid_attr)
+		rdma_hold_gid_attr(old->grh.sgid_attr);
+}
+EXPORT_SYMBOL(rdma_replace_ah_attr);
+
+/**
+ * rdma_move_ah_attr - Move ah_attr pointed by source to destination.
+ * @dest:       Pointer to destination ah_attr to copy to.
+ *              dest is assumed to be valid or zero'd
+ * @src:        Pointer to the new ah_attr.
+ *
+ * rdma_move_ah_attr() first releases any reference in the destination ah_attr
+ * if it is valid. This also transfers ownership of internal references from
+ * src to dest, making src invalid in the process. No new reference of the src
+ * ah_attr is taken.
+ */
+void rdma_move_ah_attr(struct rdma_ah_attr *dest, struct rdma_ah_attr *src)
+{
+	rdma_destroy_ah_attr(dest);
+	*dest = *src;
+	src->grh.sgid_attr = NULL;
+}
+EXPORT_SYMBOL(rdma_move_ah_attr);
+
+/*
+ * Validate that the rdma_ah_attr is valid for the device before passing it
+ * off to the driver.
+ */
+static int rdma_check_ah_attr(struct ib_device *device,
+			      struct rdma_ah_attr *ah_attr)
+{
+	if (!rdma_is_port_valid(device, ah_attr->port_num))
+		return -EINVAL;
+
+	if ((rdma_is_grh_required(device, ah_attr->port_num) ||
+	     ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) &&
+	    !(ah_attr->ah_flags & IB_AH_GRH))
+		return -EINVAL;
+
+	if (ah_attr->grh.sgid_attr) {
+		/*
+		 * Make sure the passed sgid_attr is consistent with the
+		 * parameters
+		 */
+		if (ah_attr->grh.sgid_attr->index != ah_attr->grh.sgid_index ||
+		    ah_attr->grh.sgid_attr->port_num != ah_attr->port_num)
+			return -EINVAL;
+	}
+	return 0;
+}
+
+/*
+ * If the ah requires a GRH then ensure that sgid_attr pointer is filled in.
+ * On success the caller is responsible to call rdma_unfill_sgid_attr().
+ */
+static int rdma_fill_sgid_attr(struct ib_device *device,
+			       struct rdma_ah_attr *ah_attr,
+			       const struct ib_gid_attr **old_sgid_attr)
+{
+	const struct ib_gid_attr *sgid_attr;
+	struct ib_global_route *grh;
+	int ret;
+
+	*old_sgid_attr = ah_attr->grh.sgid_attr;
+
+	ret = rdma_check_ah_attr(device, ah_attr);
+	if (ret)
+		return ret;
+
+	if (!(ah_attr->ah_flags & IB_AH_GRH))
+		return 0;
+
+	grh = rdma_ah_retrieve_grh(ah_attr);
+	if (grh->sgid_attr)
+		return 0;
+
+	sgid_attr =
+		rdma_get_gid_attr(device, ah_attr->port_num, grh->sgid_index);
+	if (IS_ERR(sgid_attr))
+		return PTR_ERR(sgid_attr);
+
+	/* Move ownerhip of the kref into the ah_attr */
+	grh->sgid_attr = sgid_attr;
+	return 0;
+}
+
+static void rdma_unfill_sgid_attr(struct rdma_ah_attr *ah_attr,
+				  const struct ib_gid_attr *old_sgid_attr)
+{
+	/*
+	 * Fill didn't change anything, the caller retains ownership of
+	 * whatever it passed
+	 */
+	if (ah_attr->grh.sgid_attr == old_sgid_attr)
+		return;
+
+	/*
+	 * Otherwise, we need to undo what rdma_fill_sgid_attr so the caller
+	 * doesn't see any change in the rdma_ah_attr. If we get here
+	 * old_sgid_attr is NULL.
+	 */
+	rdma_destroy_ah_attr(ah_attr);
+}
+
+static const struct ib_gid_attr *
+rdma_update_sgid_attr(struct rdma_ah_attr *ah_attr,
+		      const struct ib_gid_attr *old_attr)
+{
+	if (old_attr)
+		rdma_put_gid_attr(old_attr);
+	if (ah_attr->ah_flags & IB_AH_GRH) {
+		rdma_hold_gid_attr(ah_attr->grh.sgid_attr);
+		return ah_attr->grh.sgid_attr;
+	}
+	return NULL;
+}
+
 static struct ib_ah *_rdma_create_ah(struct ib_pd *pd,
 				     struct rdma_ah_attr *ah_attr,
 				     struct ib_udata *udata)
 {
 	struct ib_ah *ah;
 
+	if (!pd->device->create_ah)
+		return ERR_PTR(-EOPNOTSUPP);
+
 	ah = pd->device->create_ah(pd, ah_attr, udata);
 
 	if (!IS_ERR(ah)) {
@@ -339,15 +489,38 @@ static struct ib_ah *_rdma_create_ah(struct ib_pd *pd,
 		ah->pd      = pd;
 		ah->uobject = NULL;
 		ah->type    = ah_attr->type;
+		ah->sgid_attr = rdma_update_sgid_attr(ah_attr, NULL);
+
 		atomic_inc(&pd->usecnt);
 	}
 
 	return ah;
 }
 
+/**
+ * rdma_create_ah - Creates an address handle for the
+ * given address vector.
+ * @pd: The protection domain associated with the address handle.
+ * @ah_attr: The attributes of the address vector.
+ *
+ * It returns 0 on success and returns appropriate error code on error.
+ * The address handle is used to reference a local or global destination
+ * in all UD QP post sends.
+ */
 struct ib_ah *rdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr)
 {
-	return _rdma_create_ah(pd, ah_attr, NULL);
+	const struct ib_gid_attr *old_sgid_attr;
+	struct ib_ah *ah;
+	int ret;
+
+	ret = rdma_fill_sgid_attr(pd->device, ah_attr, &old_sgid_attr);
+	if (ret)
+		return ERR_PTR(ret);
+
+	ah = _rdma_create_ah(pd, ah_attr, NULL);
+
+	rdma_unfill_sgid_attr(ah_attr, old_sgid_attr);
+	return ah;
 }
 EXPORT_SYMBOL(rdma_create_ah);
 
@@ -368,15 +541,27 @@ struct ib_ah *rdma_create_user_ah(struct ib_pd *pd,
 				  struct rdma_ah_attr *ah_attr,
 				  struct ib_udata *udata)
 {
+	const struct ib_gid_attr *old_sgid_attr;
+	struct ib_ah *ah;
 	int err;
 
+	err = rdma_fill_sgid_attr(pd->device, ah_attr, &old_sgid_attr);
+	if (err)
+		return ERR_PTR(err);
+
 	if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) {
 		err = ib_resolve_eth_dmac(pd->device, ah_attr);
-		if (err)
-			return ERR_PTR(err);
+		if (err) {
+			ah = ERR_PTR(err);
+			goto out;
+		}
 	}
 
-	return _rdma_create_ah(pd, ah_attr, udata);
+	ah = _rdma_create_ah(pd, ah_attr, udata);
+
+out:
+	rdma_unfill_sgid_attr(ah_attr, old_sgid_attr);
+	return ah;
 }
 EXPORT_SYMBOL(rdma_create_user_ah);
 
@@ -455,16 +640,16 @@ static bool find_gid_index(const union ib_gid *gid,
 	return true;
 }
 
-static int get_sgid_index_from_eth(struct ib_device *device, u8 port_num,
-				   u16 vlan_id, const union ib_gid *sgid,
-				   enum ib_gid_type gid_type,
-				   u16 *gid_index)
+static const struct ib_gid_attr *
+get_sgid_attr_from_eth(struct ib_device *device, u8 port_num,
+		       u16 vlan_id, const union ib_gid *sgid,
+		       enum ib_gid_type gid_type)
 {
 	struct find_gid_index_context context = {.vlan_id = vlan_id,
 						 .gid_type = gid_type};
 
-	return ib_find_gid_by_filter(device, sgid, port_num, find_gid_index,
-				     &context, gid_index);
+	return rdma_find_gid_by_filter(device, sgid, port_num, find_gid_index,
+				       &context);
 }
 
 int ib_get_gids_from_rdma_hdr(const union rdma_network_hdr *hdr,
@@ -508,39 +693,24 @@ EXPORT_SYMBOL(ib_get_gids_from_rdma_hdr);
 static int ib_resolve_unicast_gid_dmac(struct ib_device *device,
 				       struct rdma_ah_attr *ah_attr)
 {
-	struct ib_gid_attr sgid_attr;
-	struct ib_global_route *grh;
+	struct ib_global_route *grh = rdma_ah_retrieve_grh(ah_attr);
+	const struct ib_gid_attr *sgid_attr = grh->sgid_attr;
 	int hop_limit = 0xff;
-	union ib_gid sgid;
-	int ret;
-
-	grh = rdma_ah_retrieve_grh(ah_attr);
-
-	ret = ib_query_gid(device,
-			   rdma_ah_get_port_num(ah_attr),
-			   grh->sgid_index,
-			   &sgid, &sgid_attr);
-	if (ret || !sgid_attr.ndev) {
-		if (!ret)
-			ret = -ENXIO;
-		return ret;
-	}
+	int ret = 0;
 
 	/* If destination is link local and source GID is RoCEv1,
 	 * IP stack is not used.
 	 */
 	if (rdma_link_local_addr((struct in6_addr *)grh->dgid.raw) &&
-	    sgid_attr.gid_type == IB_GID_TYPE_ROCE) {
+	    sgid_attr->gid_type == IB_GID_TYPE_ROCE) {
 		rdma_get_ll_mac((struct in6_addr *)grh->dgid.raw,
 				ah_attr->roce.dmac);
-		goto done;
+		return ret;
 	}
 
-	ret = rdma_addr_find_l2_eth_by_grh(&sgid, &grh->dgid,
+	ret = rdma_addr_find_l2_eth_by_grh(&sgid_attr->gid, &grh->dgid,
 					   ah_attr->roce.dmac,
-					   sgid_attr.ndev, &hop_limit);
-done:
-	dev_put(sgid_attr.ndev);
+					   sgid_attr->ndev, &hop_limit);
 
 	grh->hop_limit = hop_limit;
 	return ret;
@@ -555,16 +725,18 @@ static int ib_resolve_unicast_gid_dmac(struct ib_device *device,
  * as sgid and, sgid is used as dgid because sgid contains destinations
  * GID whom to respond to.
  *
+ * On success the caller is responsible to call rdma_destroy_ah_attr on the
+ * attr.
  */
 int ib_init_ah_attr_from_wc(struct ib_device *device, u8 port_num,
 			    const struct ib_wc *wc, const struct ib_grh *grh,
 			    struct rdma_ah_attr *ah_attr)
 {
 	u32 flow_class;
-	u16 gid_index;
 	int ret;
 	enum rdma_network_type net_type = RDMA_NETWORK_IB;
 	enum ib_gid_type gid_type = IB_GID_TYPE_IB;
+	const struct ib_gid_attr *sgid_attr;
 	int hoplimit = 0xff;
 	union ib_gid dgid;
 	union ib_gid sgid;
@@ -595,72 +767,141 @@ int ib_init_ah_attr_from_wc(struct ib_device *device, u8 port_num,
 		if (!(wc->wc_flags & IB_WC_GRH))
 			return -EPROTOTYPE;
 
-		ret = get_sgid_index_from_eth(device, port_num,
-					      vlan_id, &dgid,
-					      gid_type, &gid_index);
-		if (ret)
-			return ret;
+		sgid_attr = get_sgid_attr_from_eth(device, port_num,
+						   vlan_id, &dgid,
+						   gid_type);
+		if (IS_ERR(sgid_attr))
+			return PTR_ERR(sgid_attr);
 
 		flow_class = be32_to_cpu(grh->version_tclass_flow);
-		rdma_ah_set_grh(ah_attr, &sgid,
-				flow_class & 0xFFFFF,
-				(u8)gid_index, hoplimit,
-				(flow_class >> 20) & 0xFF);
-		return ib_resolve_unicast_gid_dmac(device, ah_attr);
+		rdma_move_grh_sgid_attr(ah_attr,
+					&sgid,
+					flow_class & 0xFFFFF,
+					hoplimit,
+					(flow_class >> 20) & 0xFF,
+					sgid_attr);
+
+		ret = ib_resolve_unicast_gid_dmac(device, ah_attr);
+		if (ret)
+			rdma_destroy_ah_attr(ah_attr);
+
+		return ret;
 	} else {
 		rdma_ah_set_dlid(ah_attr, wc->slid);
 		rdma_ah_set_path_bits(ah_attr, wc->dlid_path_bits);
 
-		if (wc->wc_flags & IB_WC_GRH) {
-			if (dgid.global.interface_id != cpu_to_be64(IB_SA_WELL_KNOWN_GUID)) {
-				ret = ib_find_cached_gid_by_port(device, &dgid,
-								 IB_GID_TYPE_IB,
-								 port_num, NULL,
-								 &gid_index);
-				if (ret)
-					return ret;
-			} else {
-				gid_index = 0;
-			}
+		if ((wc->wc_flags & IB_WC_GRH) == 0)
+			return 0;
 
-			flow_class = be32_to_cpu(grh->version_tclass_flow);
-			rdma_ah_set_grh(ah_attr, &sgid,
+		if (dgid.global.interface_id !=
+					cpu_to_be64(IB_SA_WELL_KNOWN_GUID)) {
+			sgid_attr = rdma_find_gid_by_port(
+				device, &dgid, IB_GID_TYPE_IB, port_num, NULL);
+		} else
+			sgid_attr = rdma_get_gid_attr(device, port_num, 0);
+
+		if (IS_ERR(sgid_attr))
+			return PTR_ERR(sgid_attr);
+		flow_class = be32_to_cpu(grh->version_tclass_flow);
+		rdma_move_grh_sgid_attr(ah_attr,
+					&sgid,
 					flow_class & 0xFFFFF,
-					(u8)gid_index, hoplimit,
-					(flow_class >> 20) & 0xFF);
-		}
+					hoplimit,
+					(flow_class >> 20) & 0xFF,
+					sgid_attr);
+
 		return 0;
 	}
 }
 EXPORT_SYMBOL(ib_init_ah_attr_from_wc);
 
+/**
+ * rdma_move_grh_sgid_attr - Sets the sgid attribute of GRH, taking ownership
+ * of the reference
+ *
+ * @attr:	Pointer to AH attribute structure
+ * @dgid:	Destination GID
+ * @flow_label:	Flow label
+ * @hop_limit:	Hop limit
+ * @traffic_class: traffic class
+ * @sgid_attr:	Pointer to SGID attribute
+ *
+ * This takes ownership of the sgid_attr reference. The caller must ensure
+ * rdma_destroy_ah_attr() is called before destroying the rdma_ah_attr after
+ * calling this function.
+ */
+void rdma_move_grh_sgid_attr(struct rdma_ah_attr *attr, union ib_gid *dgid,
+			     u32 flow_label, u8 hop_limit, u8 traffic_class,
+			     const struct ib_gid_attr *sgid_attr)
+{
+	rdma_ah_set_grh(attr, dgid, flow_label, sgid_attr->index, hop_limit,
+			traffic_class);
+	attr->grh.sgid_attr = sgid_attr;
+}
+EXPORT_SYMBOL(rdma_move_grh_sgid_attr);
+
+/**
+ * rdma_destroy_ah_attr - Release reference to SGID attribute of
+ * ah attribute.
+ * @ah_attr: Pointer to ah attribute
+ *
+ * Release reference to the SGID attribute of the ah attribute if it is
+ * non NULL. It is safe to call this multiple times, and safe to call it on
+ * a zero initialized ah_attr.
+ */
+void rdma_destroy_ah_attr(struct rdma_ah_attr *ah_attr)
+{
+	if (ah_attr->grh.sgid_attr) {
+		rdma_put_gid_attr(ah_attr->grh.sgid_attr);
+		ah_attr->grh.sgid_attr = NULL;
+	}
+}
+EXPORT_SYMBOL(rdma_destroy_ah_attr);
+
 struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, const struct ib_wc *wc,
 				   const struct ib_grh *grh, u8 port_num)
 {
 	struct rdma_ah_attr ah_attr;
+	struct ib_ah *ah;
 	int ret;
 
 	ret = ib_init_ah_attr_from_wc(pd->device, port_num, wc, grh, &ah_attr);
 	if (ret)
 		return ERR_PTR(ret);
 
-	return rdma_create_ah(pd, &ah_attr);
+	ah = rdma_create_ah(pd, &ah_attr);
+
+	rdma_destroy_ah_attr(&ah_attr);
+	return ah;
 }
 EXPORT_SYMBOL(ib_create_ah_from_wc);
 
 int rdma_modify_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr)
 {
+	const struct ib_gid_attr *old_sgid_attr;
+	int ret;
+
 	if (ah->type != ah_attr->type)
 		return -EINVAL;
 
-	return ah->device->modify_ah ?
+	ret = rdma_fill_sgid_attr(ah->device, ah_attr, &old_sgid_attr);
+	if (ret)
+		return ret;
+
+	ret = ah->device->modify_ah ?
 		ah->device->modify_ah(ah, ah_attr) :
 		-EOPNOTSUPP;
+
+	ah->sgid_attr = rdma_update_sgid_attr(ah_attr, ah->sgid_attr);
+	rdma_unfill_sgid_attr(ah_attr, old_sgid_attr);
+	return ret;
 }
 EXPORT_SYMBOL(rdma_modify_ah);
 
 int rdma_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr)
 {
+	ah_attr->grh.sgid_attr = NULL;
+
 	return ah->device->query_ah ?
 		ah->device->query_ah(ah, ah_attr) :
 		-EOPNOTSUPP;
@@ -669,13 +910,17 @@ EXPORT_SYMBOL(rdma_query_ah);
 
 int rdma_destroy_ah(struct ib_ah *ah)
 {
+	const struct ib_gid_attr *sgid_attr = ah->sgid_attr;
 	struct ib_pd *pd;
 	int ret;
 
 	pd = ah->pd;
 	ret = ah->device->destroy_ah(ah);
-	if (!ret)
+	if (!ret) {
 		atomic_dec(&pd->usecnt);
+		if (sgid_attr)
+			rdma_put_gid_attr(sgid_attr);
+	}
 
 	return ret;
 }
@@ -1290,16 +1535,19 @@ bool ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
 }
 EXPORT_SYMBOL(ib_modify_qp_is_ok);
 
+/**
+ * ib_resolve_eth_dmac - Resolve destination mac address
+ * @device:		Device to consider
+ * @ah_attr:		address handle attribute which describes the
+ *			source and destination parameters
+ * ib_resolve_eth_dmac() resolves destination mac address and L3 hop limit It
+ * returns 0 on success or appropriate error code. It initializes the
+ * necessary ah_attr fields when call is successful.
+ */
 static int ib_resolve_eth_dmac(struct ib_device *device,
 			       struct rdma_ah_attr *ah_attr)
 {
-	int           ret = 0;
-	struct ib_global_route *grh;
-
-	if (!rdma_is_port_valid(device, rdma_ah_get_port_num(ah_attr)))
-		return -EINVAL;
-
-	grh = rdma_ah_retrieve_grh(ah_attr);
+	int ret = 0;
 
 	if (rdma_is_multicast_addr((struct in6_addr *)ah_attr->grh.dgid.raw)) {
 		if (ipv6_addr_v4mapped((struct in6_addr *)ah_attr->grh.dgid.raw)) {
@@ -1317,6 +1565,14 @@ static int ib_resolve_eth_dmac(struct ib_device *device,
 	return ret;
 }
 
+static bool is_qp_type_connected(const struct ib_qp *qp)
+{
+	return (qp->qp_type == IB_QPT_UC ||
+		qp->qp_type == IB_QPT_RC ||
+		qp->qp_type == IB_QPT_XRC_INI ||
+		qp->qp_type == IB_QPT_XRC_TGT);
+}
+
 /**
  * IB core internal function to perform QP attributes modification.
  */
@@ -1324,8 +1580,53 @@ static int _ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr,
 			 int attr_mask, struct ib_udata *udata)
 {
 	u8 port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
+	const struct ib_gid_attr *old_sgid_attr_av;
+	const struct ib_gid_attr *old_sgid_attr_alt_av;
 	int ret;
 
+	if (attr_mask & IB_QP_AV) {
+		ret = rdma_fill_sgid_attr(qp->device, &attr->ah_attr,
+					  &old_sgid_attr_av);
+		if (ret)
+			return ret;
+	}
+	if (attr_mask & IB_QP_ALT_PATH) {
+		/*
+		 * FIXME: This does not track the migration state, so if the
+		 * user loads a new alternate path after the HW has migrated
+		 * from primary->alternate we will keep the wrong
+		 * references. This is OK for IB because the reference
+		 * counting does not serve any functional purpose.
+		 */
+		ret = rdma_fill_sgid_attr(qp->device, &attr->alt_ah_attr,
+					  &old_sgid_attr_alt_av);
+		if (ret)
+			goto out_av;
+
+		/*
+		 * Today the core code can only handle alternate paths and APM
+		 * for IB. Ban them in roce mode.
+		 */
+		if (!(rdma_protocol_ib(qp->device,
+				       attr->alt_ah_attr.port_num) &&
+		      rdma_protocol_ib(qp->device, port))) {
+			ret = EINVAL;
+			goto out;
+		}
+	}
+
+	/*
+	 * If the user provided the qp_attr then we have to resolve it. Kernel
+	 * users have to provide already resolved rdma_ah_attr's
+	 */
+	if (udata && (attr_mask & IB_QP_AV) &&
+	    attr->ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE &&
+	    is_qp_type_connected(qp)) {
+		ret = ib_resolve_eth_dmac(qp->device, &attr->ah_attr);
+		if (ret)
+			goto out;
+	}
+
 	if (rdma_ib_or_roce(qp->device, port)) {
 		if (attr_mask & IB_QP_RQ_PSN && attr->rq_psn & ~0xffffff) {
 			pr_warn("%s: %s rq_psn overflow, masking to 24 bits\n",
@@ -1341,20 +1642,27 @@ static int _ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr,
 	}
 
 	ret = ib_security_modify_qp(qp, attr, attr_mask, udata);
-	if (!ret && (attr_mask & IB_QP_PORT))
+	if (ret)
+		goto out;
+
+	if (attr_mask & IB_QP_PORT)
 		qp->port = attr->port_num;
+	if (attr_mask & IB_QP_AV)
+		qp->av_sgid_attr =
+			rdma_update_sgid_attr(&attr->ah_attr, qp->av_sgid_attr);
+	if (attr_mask & IB_QP_ALT_PATH)
+		qp->alt_path_sgid_attr = rdma_update_sgid_attr(
+			&attr->alt_ah_attr, qp->alt_path_sgid_attr);
 
+out:
+	if (attr_mask & IB_QP_ALT_PATH)
+		rdma_unfill_sgid_attr(&attr->alt_ah_attr, old_sgid_attr_alt_av);
+out_av:
+	if (attr_mask & IB_QP_AV)
+		rdma_unfill_sgid_attr(&attr->ah_attr, old_sgid_attr_av);
 	return ret;
 }
 
-static bool is_qp_type_connected(const struct ib_qp *qp)
-{
-	return (qp->qp_type == IB_QPT_UC ||
-		qp->qp_type == IB_QPT_RC ||
-		qp->qp_type == IB_QPT_XRC_INI ||
-		qp->qp_type == IB_QPT_XRC_TGT);
-}
-
 /**
  * ib_modify_qp_with_udata - Modifies the attributes for the specified QP.
  * @ib_qp: The QP to modify.
@@ -1369,17 +1677,7 @@ static bool is_qp_type_connected(const struct ib_qp *qp)
 int ib_modify_qp_with_udata(struct ib_qp *ib_qp, struct ib_qp_attr *attr,
 			    int attr_mask, struct ib_udata *udata)
 {
-	struct ib_qp *qp = ib_qp->real_qp;
-	int ret;
-
-	if (attr_mask & IB_QP_AV &&
-	    attr->ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE &&
-	    is_qp_type_connected(qp)) {
-		ret = ib_resolve_eth_dmac(qp->device, &attr->ah_attr);
-		if (ret)
-			return ret;
-	}
-	return _ib_modify_qp(qp, attr, attr_mask, udata);
+	return _ib_modify_qp(ib_qp->real_qp, attr, attr_mask, udata);
 }
 EXPORT_SYMBOL(ib_modify_qp_with_udata);
 
@@ -1451,6 +1749,9 @@ int ib_query_qp(struct ib_qp *qp,
 		int qp_attr_mask,
 		struct ib_qp_init_attr *qp_init_attr)
 {
+	qp_attr->ah_attr.grh.sgid_attr = NULL;
+	qp_attr->alt_ah_attr.grh.sgid_attr = NULL;
+
 	return qp->device->query_qp ?
 		qp->device->query_qp(qp->real_qp, qp_attr, qp_attr_mask, qp_init_attr) :
 		-EOPNOTSUPP;
@@ -1509,6 +1810,8 @@ static int __ib_destroy_shared_qp(struct ib_qp *qp)
 
 int ib_destroy_qp(struct ib_qp *qp)
 {
+	const struct ib_gid_attr *alt_path_sgid_attr = qp->alt_path_sgid_attr;
+	const struct ib_gid_attr *av_sgid_attr = qp->av_sgid_attr;
 	struct ib_pd *pd;
 	struct ib_cq *scq, *rcq;
 	struct ib_srq *srq;
@@ -1539,6 +1842,10 @@ int ib_destroy_qp(struct ib_qp *qp)
 	rdma_restrack_del(&qp->res);
 	ret = qp->device->destroy_qp(qp);
 	if (!ret) {
+		if (alt_path_sgid_attr)
+			rdma_put_gid_attr(alt_path_sgid_attr);
+		if (av_sgid_attr)
+			rdma_put_gid_attr(av_sgid_attr);
 		if (pd)
 			atomic_dec(&pd->usecnt);
 		if (scq)
@@ -1977,35 +2284,6 @@ int ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *rwq_ind_table)
 }
 EXPORT_SYMBOL(ib_destroy_rwq_ind_table);
 
-struct ib_flow *ib_create_flow(struct ib_qp *qp,
-			       struct ib_flow_attr *flow_attr,
-			       int domain)
-{
-	struct ib_flow *flow_id;
-	if (!qp->device->create_flow)
-		return ERR_PTR(-EOPNOTSUPP);
-
-	flow_id = qp->device->create_flow(qp, flow_attr, domain, NULL);
-	if (!IS_ERR(flow_id)) {
-		atomic_inc(&qp->usecnt);
-		flow_id->qp = qp;
-	}
-	return flow_id;
-}
-EXPORT_SYMBOL(ib_create_flow);
-
-int ib_destroy_flow(struct ib_flow *flow_id)
-{
-	int err;
-	struct ib_qp *qp = flow_id->qp;
-
-	err = qp->device->destroy_flow(flow_id);
-	if (!err)
-		atomic_dec(&qp->usecnt);
-	return err;
-}
-EXPORT_SYMBOL(ib_destroy_flow);
-
 int ib_check_mr_status(struct ib_mr *mr, u32 check_mask,
 		       struct ib_mr_status *mr_status)
 {
@@ -2200,7 +2478,6 @@ static void __ib_drain_sq(struct ib_qp *qp)
 	struct ib_cq *cq = qp->send_cq;
 	struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
 	struct ib_drain_cqe sdrain;
-	struct ib_send_wr *bad_swr;
 	struct ib_rdma_wr swr = {
 		.wr = {
 			.next = NULL,
@@ -2219,7 +2496,7 @@ static void __ib_drain_sq(struct ib_qp *qp)
 	sdrain.cqe.done = ib_drain_qp_done;
 	init_completion(&sdrain.done);
 
-	ret = ib_post_send(qp, &swr.wr, &bad_swr);
+	ret = ib_post_send(qp, &swr.wr, NULL);
 	if (ret) {
 		WARN_ONCE(ret, "failed to drain send queue: %d\n", ret);
 		return;
@@ -2240,7 +2517,7 @@ static void __ib_drain_rq(struct ib_qp *qp)
 	struct ib_cq *cq = qp->recv_cq;
 	struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
 	struct ib_drain_cqe rdrain;
-	struct ib_recv_wr rwr = {}, *bad_rwr;
+	struct ib_recv_wr rwr = {};
 	int ret;
 
 	ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
@@ -2253,7 +2530,7 @@ static void __ib_drain_rq(struct ib_qp *qp)
 	rdrain.cqe.done = ib_drain_qp_done;
 	init_completion(&rdrain.done);
 
-	ret = ib_post_recv(qp, &rwr, &bad_rwr);
+	ret = ib_post_recv(qp, &rwr, NULL);
 	if (ret) {
 		WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret);
 		return;
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index a76e206..bbfb86e 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -166,7 +166,8 @@ int bnxt_re_query_device(struct ib_device *ibdev,
 				    | IB_DEVICE_MEM_WINDOW
 				    | IB_DEVICE_MEM_WINDOW_TYPE_2B
 				    | IB_DEVICE_MEM_MGT_EXTENSIONS;
-	ib_attr->max_sge = dev_attr->max_qp_sges;
+	ib_attr->max_send_sge = dev_attr->max_qp_sges;
+	ib_attr->max_recv_sge = dev_attr->max_qp_sges;
 	ib_attr->max_sge_rd = dev_attr->max_qp_sges;
 	ib_attr->max_cq = dev_attr->max_cq;
 	ib_attr->max_cqe = dev_attr->max_cq_wqes;
@@ -243,8 +244,8 @@ int bnxt_re_query_port(struct ib_device *ibdev, u8 port_num,
 	port_attr->gid_tbl_len = dev_attr->max_sgid;
 	port_attr->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_REINIT_SUP |
 				    IB_PORT_DEVICE_MGMT_SUP |
-				    IB_PORT_VENDOR_CLASS_SUP |
-				    IB_PORT_IP_BASED_GIDS;
+				    IB_PORT_VENDOR_CLASS_SUP;
+	port_attr->ip_gids = true;
 
 	port_attr->max_msg_sz = (u32)BNXT_RE_MAX_MR_SIZE_LOW;
 	port_attr->bad_pkey_cntr = 0;
@@ -364,8 +365,7 @@ int bnxt_re_del_gid(const struct ib_gid_attr *attr, void **context)
 	return rc;
 }
 
-int bnxt_re_add_gid(const union ib_gid *gid,
-		    const struct ib_gid_attr *attr, void **context)
+int bnxt_re_add_gid(const struct ib_gid_attr *attr, void **context)
 {
 	int rc;
 	u32 tbl_idx = 0;
@@ -377,7 +377,7 @@ int bnxt_re_add_gid(const union ib_gid *gid,
 	if ((attr->ndev) && is_vlan_dev(attr->ndev))
 		vlan_id = vlan_dev_vlan_id(attr->ndev);
 
-	rc = bnxt_qplib_add_sgid(sgid_tbl, (struct bnxt_qplib_gid *)gid,
+	rc = bnxt_qplib_add_sgid(sgid_tbl, (struct bnxt_qplib_gid *)&attr->gid,
 				 rdev->qplib_res.netdev->dev_addr,
 				 vlan_id, true, &tbl_idx);
 	if (rc == -EALREADY) {
@@ -673,8 +673,6 @@ struct ib_ah *bnxt_re_create_ah(struct ib_pd *ib_pd,
 	int rc;
 	u8 nw_type;
 
-	struct ib_gid_attr sgid_attr;
-
 	if (!(rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH)) {
 		dev_err(rdev_to_dev(rdev), "Failed to alloc AH: GRH not set");
 		return ERR_PTR(-EINVAL);
@@ -705,20 +703,11 @@ struct ib_ah *bnxt_re_create_ah(struct ib_pd *ib_pd,
 				    grh->dgid.raw) &&
 	    !rdma_link_local_addr((struct in6_addr *)
 				  grh->dgid.raw)) {
-		union ib_gid sgid;
+		const struct ib_gid_attr *sgid_attr;
 
-		rc = ib_get_cached_gid(&rdev->ibdev, 1,
-				       grh->sgid_index, &sgid,
-				       &sgid_attr);
-		if (rc) {
-			dev_err(rdev_to_dev(rdev),
-				"Failed to query gid at index %d",
-				grh->sgid_index);
-			goto fail;
-		}
-		dev_put(sgid_attr.ndev);
+		sgid_attr = grh->sgid_attr;
 		/* Get network header type for this GID */
-		nw_type = ib_gid_to_network_type(sgid_attr.gid_type, &sgid);
+		nw_type = rdma_gid_attr_network_type(sgid_attr);
 		switch (nw_type) {
 		case RDMA_NETWORK_IPV4:
 			ah->qplib_ah.nw_type = CMDQ_CREATE_AH_TYPE_V2IPV4;
@@ -1408,7 +1397,7 @@ struct ib_srq *bnxt_re_create_srq(struct ib_pd *ib_pd,
 	}
 
 	if (srq_init_attr->srq_type != IB_SRQT_BASIC) {
-		rc = -ENOTSUPP;
+		rc = -EOPNOTSUPP;
 		goto exit;
 	}
 
@@ -1530,8 +1519,8 @@ int bnxt_re_query_srq(struct ib_srq *ib_srq, struct ib_srq_attr *srq_attr)
 	return 0;
 }
 
-int bnxt_re_post_srq_recv(struct ib_srq *ib_srq, struct ib_recv_wr *wr,
-			  struct ib_recv_wr **bad_wr)
+int bnxt_re_post_srq_recv(struct ib_srq *ib_srq, const struct ib_recv_wr *wr,
+			  const struct ib_recv_wr **bad_wr)
 {
 	struct bnxt_re_srq *srq = container_of(ib_srq, struct bnxt_re_srq,
 					       ib_srq);
@@ -1599,9 +1588,6 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
 	struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr;
 	enum ib_qp_state curr_qp_state, new_qp_state;
 	int rc, entries;
-	int status;
-	union ib_gid sgid;
-	struct ib_gid_attr sgid_attr;
 	unsigned int flags;
 	u8 nw_type;
 
@@ -1668,6 +1654,7 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
 	if (qp_attr_mask & IB_QP_AV) {
 		const struct ib_global_route *grh =
 			rdma_ah_read_grh(&qp_attr->ah_attr);
+		const struct ib_gid_attr *sgid_attr;
 
 		qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_DGID |
 				     CMDQ_MODIFY_QP_MODIFY_MASK_FLOW_LABEL |
@@ -1691,29 +1678,23 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
 		ether_addr_copy(qp->qplib_qp.ah.dmac,
 				qp_attr->ah_attr.roce.dmac);
 
-		status = ib_get_cached_gid(&rdev->ibdev, 1,
-					   grh->sgid_index,
-					   &sgid, &sgid_attr);
-		if (!status) {
-			memcpy(qp->qplib_qp.smac, sgid_attr.ndev->dev_addr,
-			       ETH_ALEN);
-			dev_put(sgid_attr.ndev);
-			nw_type = ib_gid_to_network_type(sgid_attr.gid_type,
-							 &sgid);
-			switch (nw_type) {
-			case RDMA_NETWORK_IPV4:
-				qp->qplib_qp.nw_type =
-					CMDQ_MODIFY_QP_NETWORK_TYPE_ROCEV2_IPV4;
-				break;
-			case RDMA_NETWORK_IPV6:
-				qp->qplib_qp.nw_type =
-					CMDQ_MODIFY_QP_NETWORK_TYPE_ROCEV2_IPV6;
-				break;
-			default:
-				qp->qplib_qp.nw_type =
-					CMDQ_MODIFY_QP_NETWORK_TYPE_ROCEV1;
-				break;
-			}
+		sgid_attr = qp_attr->ah_attr.grh.sgid_attr;
+		memcpy(qp->qplib_qp.smac, sgid_attr->ndev->dev_addr,
+		       ETH_ALEN);
+		nw_type = rdma_gid_attr_network_type(sgid_attr);
+		switch (nw_type) {
+		case RDMA_NETWORK_IPV4:
+			qp->qplib_qp.nw_type =
+				CMDQ_MODIFY_QP_NETWORK_TYPE_ROCEV2_IPV4;
+			break;
+		case RDMA_NETWORK_IPV6:
+			qp->qplib_qp.nw_type =
+				CMDQ_MODIFY_QP_NETWORK_TYPE_ROCEV2_IPV6;
+			break;
+		default:
+			qp->qplib_qp.nw_type =
+				CMDQ_MODIFY_QP_NETWORK_TYPE_ROCEV1;
+			break;
 		}
 	}
 
@@ -1895,19 +1876,17 @@ int bnxt_re_query_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
 /* Routine for sending QP1 packets for RoCE V1 an V2
  */
 static int bnxt_re_build_qp1_send_v2(struct bnxt_re_qp *qp,
-				     struct ib_send_wr *wr,
+				     const struct ib_send_wr *wr,
 				     struct bnxt_qplib_swqe *wqe,
 				     int payload_size)
 {
-	struct ib_device *ibdev = &qp->rdev->ibdev;
 	struct bnxt_re_ah *ah = container_of(ud_wr(wr)->ah, struct bnxt_re_ah,
 					     ib_ah);
 	struct bnxt_qplib_ah *qplib_ah = &ah->qplib_ah;
+	const struct ib_gid_attr *sgid_attr = ah->ib_ah.sgid_attr;
 	struct bnxt_qplib_sge sge;
-	union ib_gid sgid;
 	u8 nw_type;
 	u16 ether_type;
-	struct ib_gid_attr sgid_attr;
 	union ib_gid dgid;
 	bool is_eth = false;
 	bool is_vlan = false;
@@ -1920,22 +1899,10 @@ static int bnxt_re_build_qp1_send_v2(struct bnxt_re_qp *qp,
 
 	memset(&qp->qp1_hdr, 0, sizeof(qp->qp1_hdr));
 
-	rc = ib_get_cached_gid(ibdev, 1,
-			       qplib_ah->host_sgid_index, &sgid,
-			       &sgid_attr);
-	if (rc) {
-		dev_err(rdev_to_dev(qp->rdev),
-			"Failed to query gid at index %d",
-			qplib_ah->host_sgid_index);
-		return rc;
-	}
-	if (sgid_attr.ndev) {
-		if (is_vlan_dev(sgid_attr.ndev))
-			vlan_id = vlan_dev_vlan_id(sgid_attr.ndev);
-		dev_put(sgid_attr.ndev);
-	}
+	if (is_vlan_dev(sgid_attr->ndev))
+		vlan_id = vlan_dev_vlan_id(sgid_attr->ndev);
 	/* Get network header type for this GID */
-	nw_type = ib_gid_to_network_type(sgid_attr.gid_type, &sgid);
+	nw_type = rdma_gid_attr_network_type(sgid_attr);
 	switch (nw_type) {
 	case RDMA_NETWORK_IPV4:
 		nw_type = BNXT_RE_ROCEV2_IPV4_PACKET;
@@ -1948,9 +1915,9 @@ static int bnxt_re_build_qp1_send_v2(struct bnxt_re_qp *qp,
 		break;
 	}
 	memcpy(&dgid.raw, &qplib_ah->dgid, 16);
-	is_udp = sgid_attr.gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP;
+	is_udp = sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP;
 	if (is_udp) {
-		if (ipv6_addr_v4mapped((struct in6_addr *)&sgid)) {
+		if (ipv6_addr_v4mapped((struct in6_addr *)&sgid_attr->gid)) {
 			ip_version = 4;
 			ether_type = ETH_P_IP;
 		} else {
@@ -1983,9 +1950,10 @@ static int bnxt_re_build_qp1_send_v2(struct bnxt_re_qp *qp,
 	}
 
 	if (is_grh || (ip_version == 6)) {
-		memcpy(qp->qp1_hdr.grh.source_gid.raw, sgid.raw, sizeof(sgid));
+		memcpy(qp->qp1_hdr.grh.source_gid.raw, sgid_attr->gid.raw,
+		       sizeof(sgid_attr->gid));
 		memcpy(qp->qp1_hdr.grh.destination_gid.raw, qplib_ah->dgid.data,
-		       sizeof(sgid));
+		       sizeof(sgid_attr->gid));
 		qp->qp1_hdr.grh.hop_limit     = qplib_ah->hop_limit;
 	}
 
@@ -1995,7 +1963,7 @@ static int bnxt_re_build_qp1_send_v2(struct bnxt_re_qp *qp,
 		qp->qp1_hdr.ip4.frag_off = htons(IP_DF);
 		qp->qp1_hdr.ip4.ttl = qplib_ah->hop_limit;
 
-		memcpy(&qp->qp1_hdr.ip4.saddr, sgid.raw + 12, 4);
+		memcpy(&qp->qp1_hdr.ip4.saddr, sgid_attr->gid.raw + 12, 4);
 		memcpy(&qp->qp1_hdr.ip4.daddr, qplib_ah->dgid.data + 12, 4);
 		qp->qp1_hdr.ip4.check = ib_ud_ip4_csum(&qp->qp1_hdr);
 	}
@@ -2080,7 +2048,7 @@ static int bnxt_re_build_qp1_send_v2(struct bnxt_re_qp *qp,
  * and the MAD datagram out to the provided SGE.
  */
 static int bnxt_re_build_qp1_shadow_qp_recv(struct bnxt_re_qp *qp,
-					    struct ib_recv_wr *wr,
+					    const struct ib_recv_wr *wr,
 					    struct bnxt_qplib_swqe *wqe,
 					    int payload_size)
 {
@@ -2125,7 +2093,7 @@ static int is_ud_qp(struct bnxt_re_qp *qp)
 }
 
 static int bnxt_re_build_send_wqe(struct bnxt_re_qp *qp,
-				  struct ib_send_wr *wr,
+				  const struct ib_send_wr *wr,
 				  struct bnxt_qplib_swqe *wqe)
 {
 	struct bnxt_re_ah *ah = NULL;
@@ -2163,7 +2131,7 @@ static int bnxt_re_build_send_wqe(struct bnxt_re_qp *qp,
 	return 0;
 }
 
-static int bnxt_re_build_rdma_wqe(struct ib_send_wr *wr,
+static int bnxt_re_build_rdma_wqe(const struct ib_send_wr *wr,
 				  struct bnxt_qplib_swqe *wqe)
 {
 	switch (wr->opcode) {
@@ -2195,7 +2163,7 @@ static int bnxt_re_build_rdma_wqe(struct ib_send_wr *wr,
 	return 0;
 }
 
-static int bnxt_re_build_atomic_wqe(struct ib_send_wr *wr,
+static int bnxt_re_build_atomic_wqe(const struct ib_send_wr *wr,
 				    struct bnxt_qplib_swqe *wqe)
 {
 	switch (wr->opcode) {
@@ -2222,7 +2190,7 @@ static int bnxt_re_build_atomic_wqe(struct ib_send_wr *wr,
 	return 0;
 }
 
-static int bnxt_re_build_inv_wqe(struct ib_send_wr *wr,
+static int bnxt_re_build_inv_wqe(const struct ib_send_wr *wr,
 				 struct bnxt_qplib_swqe *wqe)
 {
 	wqe->type = BNXT_QPLIB_SWQE_TYPE_LOCAL_INV;
@@ -2241,7 +2209,7 @@ static int bnxt_re_build_inv_wqe(struct ib_send_wr *wr,
 	return 0;
 }
 
-static int bnxt_re_build_reg_wqe(struct ib_reg_wr *wr,
+static int bnxt_re_build_reg_wqe(const struct ib_reg_wr *wr,
 				 struct bnxt_qplib_swqe *wqe)
 {
 	struct bnxt_re_mr *mr = container_of(wr->mr, struct bnxt_re_mr, ib_mr);
@@ -2283,7 +2251,7 @@ static int bnxt_re_build_reg_wqe(struct ib_reg_wr *wr,
 }
 
 static int bnxt_re_copy_inline_data(struct bnxt_re_dev *rdev,
-				    struct ib_send_wr *wr,
+				    const struct ib_send_wr *wr,
 				    struct bnxt_qplib_swqe *wqe)
 {
 	/*  Copy the inline data to the data  field */
@@ -2313,7 +2281,7 @@ static int bnxt_re_copy_inline_data(struct bnxt_re_dev *rdev,
 }
 
 static int bnxt_re_copy_wr_payload(struct bnxt_re_dev *rdev,
-				   struct ib_send_wr *wr,
+				   const struct ib_send_wr *wr,
 				   struct bnxt_qplib_swqe *wqe)
 {
 	int payload_sz = 0;
@@ -2345,7 +2313,7 @@ static void bnxt_ud_qp_hw_stall_workaround(struct bnxt_re_qp *qp)
 
 static int bnxt_re_post_send_shadow_qp(struct bnxt_re_dev *rdev,
 				       struct bnxt_re_qp *qp,
-				struct ib_send_wr *wr)
+				       const struct ib_send_wr *wr)
 {
 	struct bnxt_qplib_swqe wqe;
 	int rc = 0, payload_sz = 0;
@@ -2393,8 +2361,8 @@ static int bnxt_re_post_send_shadow_qp(struct bnxt_re_dev *rdev,
 	return rc;
 }
 
-int bnxt_re_post_send(struct ib_qp *ib_qp, struct ib_send_wr *wr,
-		      struct ib_send_wr **bad_wr)
+int bnxt_re_post_send(struct ib_qp *ib_qp, const struct ib_send_wr *wr,
+		      const struct ib_send_wr **bad_wr)
 {
 	struct bnxt_re_qp *qp = container_of(ib_qp, struct bnxt_re_qp, ib_qp);
 	struct bnxt_qplib_swqe wqe;
@@ -2441,7 +2409,7 @@ int bnxt_re_post_send(struct ib_qp *ib_qp, struct ib_send_wr *wr,
 			default:
 				break;
 			}
-			/* Fall thru to build the wqe */
+			/* fall through */
 		case IB_WR_SEND_WITH_INV:
 			rc = bnxt_re_build_send_wqe(qp, wr, &wqe);
 			break;
@@ -2493,7 +2461,7 @@ int bnxt_re_post_send(struct ib_qp *ib_qp, struct ib_send_wr *wr,
 
 static int bnxt_re_post_recv_shadow_qp(struct bnxt_re_dev *rdev,
 				       struct bnxt_re_qp *qp,
-				       struct ib_recv_wr *wr)
+				       const struct ib_recv_wr *wr)
 {
 	struct bnxt_qplib_swqe wqe;
 	int rc = 0;
@@ -2526,8 +2494,8 @@ static int bnxt_re_post_recv_shadow_qp(struct bnxt_re_dev *rdev,
 	return rc;
 }
 
-int bnxt_re_post_recv(struct ib_qp *ib_qp, struct ib_recv_wr *wr,
-		      struct ib_recv_wr **bad_wr)
+int bnxt_re_post_recv(struct ib_qp *ib_qp, const struct ib_recv_wr *wr,
+		      const struct ib_recv_wr **bad_wr)
 {
 	struct bnxt_re_qp *qp = container_of(ib_qp, struct bnxt_re_qp, ib_qp);
 	struct bnxt_qplib_swqe wqe;
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
index 5c6414c..aa33e7b 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
@@ -158,8 +158,7 @@ void bnxt_re_query_fw_str(struct ib_device *ibdev, char *str);
 int bnxt_re_query_pkey(struct ib_device *ibdev, u8 port_num,
 		       u16 index, u16 *pkey);
 int bnxt_re_del_gid(const struct ib_gid_attr *attr, void **context);
-int bnxt_re_add_gid(const union ib_gid *gid,
-		    const struct ib_gid_attr *attr, void **context);
+int bnxt_re_add_gid(const struct ib_gid_attr *attr, void **context);
 int bnxt_re_query_gid(struct ib_device *ibdev, u8 port_num,
 		      int index, union ib_gid *gid);
 enum rdma_link_layer bnxt_re_get_link_layer(struct ib_device *ibdev,
@@ -182,8 +181,8 @@ int bnxt_re_modify_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr,
 		       struct ib_udata *udata);
 int bnxt_re_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr);
 int bnxt_re_destroy_srq(struct ib_srq *srq);
-int bnxt_re_post_srq_recv(struct ib_srq *srq, struct ib_recv_wr *recv_wr,
-			  struct ib_recv_wr **bad_recv_wr);
+int bnxt_re_post_srq_recv(struct ib_srq *srq, const struct ib_recv_wr *recv_wr,
+			  const struct ib_recv_wr **bad_recv_wr);
 struct ib_qp *bnxt_re_create_qp(struct ib_pd *pd,
 				struct ib_qp_init_attr *qp_init_attr,
 				struct ib_udata *udata);
@@ -192,10 +191,10 @@ int bnxt_re_modify_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
 int bnxt_re_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
 		     int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr);
 int bnxt_re_destroy_qp(struct ib_qp *qp);
-int bnxt_re_post_send(struct ib_qp *qp, struct ib_send_wr *send_wr,
-		      struct ib_send_wr **bad_send_wr);
-int bnxt_re_post_recv(struct ib_qp *qp, struct ib_recv_wr *recv_wr,
-		      struct ib_recv_wr **bad_recv_wr);
+int bnxt_re_post_send(struct ib_qp *qp, const struct ib_send_wr *send_wr,
+		      const struct ib_send_wr **bad_send_wr);
+int bnxt_re_post_recv(struct ib_qp *qp, const struct ib_recv_wr *recv_wr,
+		      const struct ib_recv_wr **bad_recv_wr);
 struct ib_cq *bnxt_re_create_cq(struct ib_device *ibdev,
 				const struct ib_cq_init_attr *attr,
 				struct ib_ucontext *context,
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
index 50d8f1f..e426b99 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
@@ -2354,7 +2354,7 @@ static int bnxt_qplib_cq_process_res_rc(struct bnxt_qplib_cq *cq,
 		srq = qp->srq;
 		if (!srq)
 			return -EINVAL;
-		if (wr_id_idx > srq->hwq.max_elements) {
+		if (wr_id_idx >= srq->hwq.max_elements) {
 			dev_err(&cq->hwq.pdev->dev,
 				"QPLIB: FP: CQ Process RC ");
 			dev_err(&cq->hwq.pdev->dev,
@@ -2369,7 +2369,7 @@ static int bnxt_qplib_cq_process_res_rc(struct bnxt_qplib_cq *cq,
 		*pcqe = cqe;
 	} else {
 		rq = &qp->rq;
-		if (wr_id_idx > rq->hwq.max_elements) {
+		if (wr_id_idx >= rq->hwq.max_elements) {
 			dev_err(&cq->hwq.pdev->dev,
 				"QPLIB: FP: CQ Process RC ");
 			dev_err(&cq->hwq.pdev->dev,
@@ -2437,7 +2437,7 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq,
 		if (!srq)
 			return -EINVAL;
 
-		if (wr_id_idx > srq->hwq.max_elements) {
+		if (wr_id_idx >= srq->hwq.max_elements) {
 			dev_err(&cq->hwq.pdev->dev,
 				"QPLIB: FP: CQ Process UD ");
 			dev_err(&cq->hwq.pdev->dev,
@@ -2452,7 +2452,7 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq,
 		*pcqe = cqe;
 	} else {
 		rq = &qp->rq;
-		if (wr_id_idx > rq->hwq.max_elements) {
+		if (wr_id_idx >= rq->hwq.max_elements) {
 			dev_err(&cq->hwq.pdev->dev,
 				"QPLIB: FP: CQ Process UD ");
 			dev_err(&cq->hwq.pdev->dev,
@@ -2546,7 +2546,7 @@ static int bnxt_qplib_cq_process_res_raweth_qp1(struct bnxt_qplib_cq *cq,
 				"QPLIB: FP: SRQ used but not defined??");
 			return -EINVAL;
 		}
-		if (wr_id_idx > srq->hwq.max_elements) {
+		if (wr_id_idx >= srq->hwq.max_elements) {
 			dev_err(&cq->hwq.pdev->dev,
 				"QPLIB: FP: CQ Process Raw/QP1 ");
 			dev_err(&cq->hwq.pdev->dev,
@@ -2561,7 +2561,7 @@ static int bnxt_qplib_cq_process_res_raweth_qp1(struct bnxt_qplib_cq *cq,
 		*pcqe = cqe;
 	} else {
 		rq = &qp->rq;
-		if (wr_id_idx > rq->hwq.max_elements) {
+		if (wr_id_idx >= rq->hwq.max_elements) {
 			dev_err(&cq->hwq.pdev->dev,
 				"QPLIB: FP: CQ Process Raw/QP1 RQ wr_id ");
 			dev_err(&cq->hwq.pdev->dev,
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c
index 2f3f32ea..4097f3fa 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c
@@ -197,7 +197,7 @@ int bnxt_qplib_get_sgid(struct bnxt_qplib_res *res,
 			struct bnxt_qplib_sgid_tbl *sgid_tbl, int index,
 			struct bnxt_qplib_gid *gid)
 {
-	if (index > sgid_tbl->max) {
+	if (index >= sgid_tbl->max) {
 		dev_err(&res->pdev->dev,
 			"QPLIB: Index %d exceeded SGID table max (%d)",
 			index, sgid_tbl->max);
@@ -402,7 +402,7 @@ int bnxt_qplib_get_pkey(struct bnxt_qplib_res *res,
 		*pkey = 0xFFFF;
 		return 0;
 	}
-	if (index > pkey_tbl->max) {
+	if (index >= pkey_tbl->max) {
 		dev_err(&res->pdev->dev,
 			"QPLIB: Index %d exceeded PKEY table max (%d)",
 			index, pkey_tbl->max);
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cq.c b/drivers/infiniband/hw/cxgb3/iwch_cq.c
index 0a8542c..a098c01 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cq.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_cq.c
@@ -32,38 +32,16 @@
 #include "iwch_provider.h"
 #include "iwch.h"
 
-/*
- * Get one cq entry from cxio and map it to openib.
- *
- * Returns:
- *	0			EMPTY;
- *	1			cqe returned
- *	-EAGAIN		caller must try again
- *	any other -errno	fatal error
- */
-static int iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp,
-			    struct ib_wc *wc)
+static int __iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp,
+			      struct iwch_qp *qhp, struct ib_wc *wc)
 {
-	struct iwch_qp *qhp = NULL;
-	struct t3_cqe cqe, *rd_cqe;
-	struct t3_wq *wq;
+	struct t3_wq *wq = qhp ? &qhp->wq : NULL;
+	struct t3_cqe cqe;
 	u32 credit = 0;
 	u8 cqe_flushed;
 	u64 cookie;
 	int ret = 1;
 
-	rd_cqe = cxio_next_cqe(&chp->cq);
-
-	if (!rd_cqe)
-		return 0;
-
-	qhp = get_qhp(rhp, CQE_QPID(*rd_cqe));
-	if (!qhp)
-		wq = NULL;
-	else {
-		spin_lock(&qhp->lock);
-		wq = &(qhp->wq);
-	}
 	ret = cxio_poll_cq(wq, &(chp->cq), &cqe, &cqe_flushed, &cookie,
 				   &credit);
 	if (t3a_device(chp->rhp) && credit) {
@@ -79,7 +57,7 @@ static int iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp,
 	ret = 1;
 
 	wc->wr_id = cookie;
-	wc->qp = &qhp->ibqp;
+	wc->qp = qhp ? &qhp->ibqp : NULL;
 	wc->vendor_err = CQE_STATUS(cqe);
 	wc->wc_flags = 0;
 
@@ -182,8 +160,38 @@ static int iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp,
 		}
 	}
 out:
-	if (wq)
+	return ret;
+}
+
+/*
+ * Get one cq entry from cxio and map it to openib.
+ *
+ * Returns:
+ *	0			EMPTY;
+ *	1			cqe returned
+ *	-EAGAIN		caller must try again
+ *	any other -errno	fatal error
+ */
+static int iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp,
+			    struct ib_wc *wc)
+{
+	struct iwch_qp *qhp;
+	struct t3_cqe *rd_cqe;
+	int ret;
+
+	rd_cqe = cxio_next_cqe(&chp->cq);
+
+	if (!rd_cqe)
+		return 0;
+
+	qhp = get_qhp(rhp, CQE_QPID(*rd_cqe));
+	if (qhp) {
+		spin_lock(&qhp->lock);
+		ret = __iwch_poll_cq_one(rhp, chp, qhp, wc);
 		spin_unlock(&qhp->lock);
+	} else {
+		ret = __iwch_poll_cq_one(rhp, chp, NULL, wc);
+	}
 	return ret;
 }
 
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index be097c6..1b9ff21 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -61,42 +61,6 @@
 #include <rdma/cxgb3-abi.h>
 #include "common.h"
 
-static struct ib_ah *iwch_ah_create(struct ib_pd *pd,
-				    struct rdma_ah_attr *ah_attr,
-				    struct ib_udata *udata)
-{
-	return ERR_PTR(-ENOSYS);
-}
-
-static int iwch_ah_destroy(struct ib_ah *ah)
-{
-	return -ENOSYS;
-}
-
-static int iwch_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
-{
-	return -ENOSYS;
-}
-
-static int iwch_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
-{
-	return -ENOSYS;
-}
-
-static int iwch_process_mad(struct ib_device *ibdev,
-			    int mad_flags,
-			    u8 port_num,
-			    const struct ib_wc *in_wc,
-			    const struct ib_grh *in_grh,
-			    const struct ib_mad_hdr *in_mad,
-			    size_t in_mad_size,
-			    struct ib_mad_hdr *out_mad,
-			    size_t *out_mad_size,
-			    u16 *out_mad_pkey_index)
-{
-	return -ENOSYS;
-}
-
 static int iwch_dealloc_ucontext(struct ib_ucontext *context)
 {
 	struct iwch_dev *rhp = to_iwch_dev(context->device);
@@ -1103,7 +1067,8 @@ static int iwch_query_device(struct ib_device *ibdev, struct ib_device_attr *pro
 	props->max_mr_size = dev->attr.max_mr_size;
 	props->max_qp = dev->attr.max_qps;
 	props->max_qp_wr = dev->attr.max_wrs;
-	props->max_sge = dev->attr.max_sge_per_wr;
+	props->max_send_sge = dev->attr.max_sge_per_wr;
+	props->max_recv_sge = dev->attr.max_sge_per_wr;
 	props->max_sge_rd = 1;
 	props->max_qp_rd_atom = dev->attr.max_rdma_reads_per_qp;
 	props->max_qp_init_rd_atom = dev->attr.max_rdma_reads_per_qp;
@@ -1398,8 +1363,6 @@ int iwch_register_device(struct iwch_dev *dev)
 	dev->ibdev.mmap = iwch_mmap;
 	dev->ibdev.alloc_pd = iwch_allocate_pd;
 	dev->ibdev.dealloc_pd = iwch_deallocate_pd;
-	dev->ibdev.create_ah = iwch_ah_create;
-	dev->ibdev.destroy_ah = iwch_ah_destroy;
 	dev->ibdev.create_qp = iwch_create_qp;
 	dev->ibdev.modify_qp = iwch_ib_modify_qp;
 	dev->ibdev.destroy_qp = iwch_destroy_qp;
@@ -1414,9 +1377,6 @@ int iwch_register_device(struct iwch_dev *dev)
 	dev->ibdev.dealloc_mw = iwch_dealloc_mw;
 	dev->ibdev.alloc_mr = iwch_alloc_mr;
 	dev->ibdev.map_mr_sg = iwch_map_mr_sg;
-	dev->ibdev.attach_mcast = iwch_multicast_attach;
-	dev->ibdev.detach_mcast = iwch_multicast_detach;
-	dev->ibdev.process_mad = iwch_process_mad;
 	dev->ibdev.req_notify_cq = iwch_arm_cq;
 	dev->ibdev.post_send = iwch_post_send;
 	dev->ibdev.post_recv = iwch_post_receive;
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.h b/drivers/infiniband/hw/cxgb3/iwch_provider.h
index 2e38dde..8adbe96 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.h
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.h
@@ -326,10 +326,10 @@ enum iwch_qp_query_flags {
 };
 
 u16 iwch_rqes_posted(struct iwch_qp *qhp);
-int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
-		      struct ib_send_wr **bad_wr);
-int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
-		      struct ib_recv_wr **bad_wr);
+int iwch_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+		   const struct ib_send_wr **bad_wr);
+int iwch_post_receive(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+		      const struct ib_recv_wr **bad_wr);
 int iwch_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
 int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg);
 int iwch_post_zb_read(struct iwch_ep *ep);
diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c
index 3871e1f..c649faa 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_qp.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c
@@ -39,8 +39,8 @@
 
 #define NO_SUPPORT -1
 
-static int build_rdma_send(union t3_wr *wqe, struct ib_send_wr *wr,
-				u8 * flit_cnt)
+static int build_rdma_send(union t3_wr *wqe, const struct ib_send_wr *wr,
+			   u8 *flit_cnt)
 {
 	int i;
 	u32 plen;
@@ -84,8 +84,8 @@ static int build_rdma_send(union t3_wr *wqe, struct ib_send_wr *wr,
 	return 0;
 }
 
-static int build_rdma_write(union t3_wr *wqe, struct ib_send_wr *wr,
-				 u8 *flit_cnt)
+static int build_rdma_write(union t3_wr *wqe, const struct ib_send_wr *wr,
+			    u8 *flit_cnt)
 {
 	int i;
 	u32 plen;
@@ -125,8 +125,8 @@ static int build_rdma_write(union t3_wr *wqe, struct ib_send_wr *wr,
 	return 0;
 }
 
-static int build_rdma_read(union t3_wr *wqe, struct ib_send_wr *wr,
-				u8 *flit_cnt)
+static int build_rdma_read(union t3_wr *wqe, const struct ib_send_wr *wr,
+			   u8 *flit_cnt)
 {
 	if (wr->num_sge > 1)
 		return -EINVAL;
@@ -146,8 +146,8 @@ static int build_rdma_read(union t3_wr *wqe, struct ib_send_wr *wr,
 	return 0;
 }
 
-static int build_memreg(union t3_wr *wqe, struct ib_reg_wr *wr,
-			  u8 *flit_cnt, int *wr_cnt, struct t3_wq *wq)
+static int build_memreg(union t3_wr *wqe, const struct ib_reg_wr *wr,
+			u8 *flit_cnt, int *wr_cnt, struct t3_wq *wq)
 {
 	struct iwch_mr *mhp = to_iwch_mr(wr->mr);
 	int i;
@@ -189,8 +189,8 @@ static int build_memreg(union t3_wr *wqe, struct ib_reg_wr *wr,
 	return 0;
 }
 
-static int build_inv_stag(union t3_wr *wqe, struct ib_send_wr *wr,
-				u8 *flit_cnt)
+static int build_inv_stag(union t3_wr *wqe, const struct ib_send_wr *wr,
+			  u8 *flit_cnt)
 {
 	wqe->local_inv.stag = cpu_to_be32(wr->ex.invalidate_rkey);
 	wqe->local_inv.reserved = 0;
@@ -246,7 +246,7 @@ static int iwch_sgl2pbl_map(struct iwch_dev *rhp, struct ib_sge *sg_list,
 }
 
 static int build_rdma_recv(struct iwch_qp *qhp, union t3_wr *wqe,
-				struct ib_recv_wr *wr)
+			   const struct ib_recv_wr *wr)
 {
 	int i, err = 0;
 	u32 pbl_addr[T3_MAX_SGE];
@@ -286,7 +286,7 @@ static int build_rdma_recv(struct iwch_qp *qhp, union t3_wr *wqe,
 }
 
 static int build_zero_stag_recv(struct iwch_qp *qhp, union t3_wr *wqe,
-				struct ib_recv_wr *wr)
+				const struct ib_recv_wr *wr)
 {
 	int i;
 	u32 pbl_addr;
@@ -348,8 +348,8 @@ static int build_zero_stag_recv(struct iwch_qp *qhp, union t3_wr *wqe,
 	return 0;
 }
 
-int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
-		      struct ib_send_wr **bad_wr)
+int iwch_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+		   const struct ib_send_wr **bad_wr)
 {
 	int err = 0;
 	u8 uninitialized_var(t3_wr_flit_cnt);
@@ -463,8 +463,8 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 	return err;
 }
 
-int iwch_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
-		      struct ib_recv_wr **bad_wr)
+int iwch_post_receive(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+		      const struct ib_recv_wr **bad_wr)
 {
 	int err = 0;
 	struct iwch_qp *qhp;
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index 0912fa0..0f83cbe 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -587,24 +587,29 @@ static int send_flowc(struct c4iw_ep *ep)
 {
 	struct fw_flowc_wr *flowc;
 	struct sk_buff *skb = skb_dequeue(&ep->com.ep_skb_list);
-	int i;
 	u16 vlan = ep->l2t->vlan;
 	int nparams;
+	int flowclen, flowclen16;
 
 	if (WARN_ON(!skb))
 		return -ENOMEM;
 
 	if (vlan == CPL_L2T_VLAN_NONE)
-		nparams = 8;
-	else
 		nparams = 9;
+	else
+		nparams = 10;
 
-	flowc = __skb_put(skb, FLOWC_LEN);
+	flowclen = offsetof(struct fw_flowc_wr, mnemval[nparams]);
+	flowclen16 = DIV_ROUND_UP(flowclen, 16);
+	flowclen = flowclen16 * 16;
+
+	flowc = __skb_put(skb, flowclen);
+	memset(flowc, 0, flowclen);
 
 	flowc->op_to_nparams = cpu_to_be32(FW_WR_OP_V(FW_FLOWC_WR) |
 					   FW_FLOWC_WR_NPARAMS_V(nparams));
-	flowc->flowid_len16 = cpu_to_be32(FW_WR_LEN16_V(DIV_ROUND_UP(FLOWC_LEN,
-					  16)) | FW_WR_FLOWID_V(ep->hwtid));
+	flowc->flowid_len16 = cpu_to_be32(FW_WR_LEN16_V(flowclen16) |
+					  FW_WR_FLOWID_V(ep->hwtid));
 
 	flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_PFNVFN;
 	flowc->mnemval[0].val = cpu_to_be32(FW_PFVF_CMD_PFN_V
@@ -623,21 +628,13 @@ static int send_flowc(struct c4iw_ep *ep)
 	flowc->mnemval[6].val = cpu_to_be32(ep->snd_win);
 	flowc->mnemval[7].mnemonic = FW_FLOWC_MNEM_MSS;
 	flowc->mnemval[7].val = cpu_to_be32(ep->emss);
-	if (nparams == 9) {
+	flowc->mnemval[8].mnemonic = FW_FLOWC_MNEM_RCV_SCALE;
+	flowc->mnemval[8].val = cpu_to_be32(ep->snd_wscale);
+	if (nparams == 10) {
 		u16 pri;
-
 		pri = (vlan & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
-		flowc->mnemval[8].mnemonic = FW_FLOWC_MNEM_SCHEDCLASS;
-		flowc->mnemval[8].val = cpu_to_be32(pri);
-	} else {
-		/* Pad WR to 16 byte boundary */
-		flowc->mnemval[8].mnemonic = 0;
-		flowc->mnemval[8].val = 0;
-	}
-	for (i = 0; i < 9; i++) {
-		flowc->mnemval[i].r4[0] = 0;
-		flowc->mnemval[i].r4[1] = 0;
-		flowc->mnemval[i].r4[2] = 0;
+		flowc->mnemval[9].mnemonic = FW_FLOWC_MNEM_SCHEDCLASS;
+		flowc->mnemval[9].val = cpu_to_be32(pri);
 	}
 
 	set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
@@ -1176,6 +1173,7 @@ static int act_establish(struct c4iw_dev *dev, struct sk_buff *skb)
 {
 	struct c4iw_ep *ep;
 	struct cpl_act_establish *req = cplhdr(skb);
+	unsigned short tcp_opt = ntohs(req->tcp_opt);
 	unsigned int tid = GET_TID(req);
 	unsigned int atid = TID_TID_G(ntohl(req->tos_atid));
 	struct tid_info *t = dev->rdev.lldi.tids;
@@ -1196,8 +1194,9 @@ static int act_establish(struct c4iw_dev *dev, struct sk_buff *skb)
 
 	ep->snd_seq = be32_to_cpu(req->snd_isn);
 	ep->rcv_seq = be32_to_cpu(req->rcv_isn);
+	ep->snd_wscale = TCPOPT_SND_WSCALE_G(tcp_opt);
 
-	set_emss(ep, ntohs(req->tcp_opt));
+	set_emss(ep, tcp_opt);
 
 	/* dealloc the atid */
 	remove_handle(ep->com.dev, &ep->com.dev->atid_idr, atid);
@@ -1853,10 +1852,33 @@ static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb)
 	return 0;
 }
 
+static void complete_cached_srq_buffers(struct c4iw_ep *ep,
+					__be32 srqidx_status)
+{
+	enum chip_type adapter_type;
+	u32 srqidx;
+
+	adapter_type = ep->com.dev->rdev.lldi.adapter_type;
+	srqidx = ABORT_RSS_SRQIDX_G(be32_to_cpu(srqidx_status));
+
+	/*
+	 * If this TCB had a srq buffer cached, then we must complete
+	 * it. For user mode, that means saving the srqidx in the
+	 * user/kernel status page for this qp.  For kernel mode, just
+	 * synthesize the CQE now.
+	 */
+	if (CHELSIO_CHIP_VERSION(adapter_type) > CHELSIO_T5 && srqidx) {
+		if (ep->com.qp->ibqp.uobject)
+			t4_set_wq_in_error(&ep->com.qp->wq, srqidx);
+		else
+			c4iw_flush_srqidx(ep->com.qp, srqidx);
+	}
+}
+
 static int abort_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
 {
 	struct c4iw_ep *ep;
-	struct cpl_abort_rpl_rss *rpl = cplhdr(skb);
+	struct cpl_abort_rpl_rss6 *rpl = cplhdr(skb);
 	int release = 0;
 	unsigned int tid = GET_TID(rpl);
 
@@ -1865,6 +1887,9 @@ static int abort_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
 		pr_warn("Abort rpl to freed endpoint\n");
 		return 0;
 	}
+
+	complete_cached_srq_buffers(ep, rpl->srqidx_status);
+
 	pr_debug("ep %p tid %u\n", ep, ep->hwtid);
 	mutex_lock(&ep->com.mutex);
 	switch (ep->com.state) {
@@ -2603,16 +2628,17 @@ static int pass_establish(struct c4iw_dev *dev, struct sk_buff *skb)
 	struct cpl_pass_establish *req = cplhdr(skb);
 	unsigned int tid = GET_TID(req);
 	int ret;
+	u16 tcp_opt = ntohs(req->tcp_opt);
 
 	ep = get_ep_from_tid(dev, tid);
 	pr_debug("ep %p tid %u\n", ep, ep->hwtid);
 	ep->snd_seq = be32_to_cpu(req->snd_isn);
 	ep->rcv_seq = be32_to_cpu(req->rcv_isn);
+	ep->snd_wscale = TCPOPT_SND_WSCALE_G(tcp_opt);
 
-	pr_debug("ep %p hwtid %u tcp_opt 0x%02x\n", ep, tid,
-		 ntohs(req->tcp_opt));
+	pr_debug("ep %p hwtid %u tcp_opt 0x%02x\n", ep, tid, tcp_opt);
 
-	set_emss(ep, ntohs(req->tcp_opt));
+	set_emss(ep, tcp_opt);
 
 	dst_confirm(ep->dst);
 	mutex_lock(&ep->com.mutex);
@@ -2719,28 +2745,35 @@ static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb)
 
 static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
 {
-	struct cpl_abort_req_rss *req = cplhdr(skb);
+	struct cpl_abort_req_rss6 *req = cplhdr(skb);
 	struct c4iw_ep *ep;
 	struct sk_buff *rpl_skb;
 	struct c4iw_qp_attributes attrs;
 	int ret;
 	int release = 0;
 	unsigned int tid = GET_TID(req);
+	u8 status;
+
 	u32 len = roundup(sizeof(struct cpl_abort_rpl), 16);
 
 	ep = get_ep_from_tid(dev, tid);
 	if (!ep)
 		return 0;
 
-	if (cxgb_is_neg_adv(req->status)) {
+	status = ABORT_RSS_STATUS_G(be32_to_cpu(req->srqidx_status));
+
+	if (cxgb_is_neg_adv(status)) {
 		pr_debug("Negative advice on abort- tid %u status %d (%s)\n",
-			 ep->hwtid, req->status, neg_adv_str(req->status));
+			 ep->hwtid, status, neg_adv_str(status));
 		ep->stats.abort_neg_adv++;
 		mutex_lock(&dev->rdev.stats.lock);
 		dev->rdev.stats.neg_adv++;
 		mutex_unlock(&dev->rdev.stats.lock);
 		goto deref_ep;
 	}
+
+	complete_cached_srq_buffers(ep, req->srqidx_status);
+
 	pr_debug("ep %p tid %u state %u\n", ep, ep->hwtid,
 		 ep->com.state);
 	set_bit(PEER_ABORT, &ep->com.history);
@@ -3444,9 +3477,6 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog)
 	}
 	insert_handle(dev, &dev->stid_idr, ep, ep->stid);
 
-	memcpy(&ep->com.local_addr, &cm_id->m_local_addr,
-	       sizeof(ep->com.local_addr));
-
 	state_set(&ep->com, LISTEN);
 	if (ep->com.local_addr.ss_family == AF_INET)
 		err = create_server4(dev, ep);
diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c
index 2be2e1a..6d30427 100644
--- a/drivers/infiniband/hw/cxgb4/cq.c
+++ b/drivers/infiniband/hw/cxgb4/cq.c
@@ -77,6 +77,10 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
 	int user = (uctx != &rdev->uctx);
 	int ret;
 	struct sk_buff *skb;
+	struct c4iw_ucontext *ucontext = NULL;
+
+	if (user)
+		ucontext = container_of(uctx, struct c4iw_ucontext, uctx);
 
 	cq->cqid = c4iw_get_cqid(rdev, uctx);
 	if (!cq->cqid) {
@@ -100,6 +104,16 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
 	dma_unmap_addr_set(cq, mapping, cq->dma_addr);
 	memset(cq->queue, 0, cq->memsize);
 
+	if (user && ucontext->is_32b_cqe) {
+		cq->qp_errp = &((struct t4_status_page *)
+		((u8 *)cq->queue + (cq->size - 1) *
+		 (sizeof(*cq->queue) / 2)))->qp_err;
+	} else {
+		cq->qp_errp = &((struct t4_status_page *)
+		((u8 *)cq->queue + (cq->size - 1) *
+		 sizeof(*cq->queue)))->qp_err;
+	}
+
 	/* build fw_ri_res_wr */
 	wr_len = sizeof *res_wr + sizeof *res;
 
@@ -132,7 +146,9 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
 			FW_RI_RES_WR_IQPCIECH_V(2) |
 			FW_RI_RES_WR_IQINTCNTTHRESH_V(0) |
 			FW_RI_RES_WR_IQO_F |
-			FW_RI_RES_WR_IQESIZE_V(1));
+			((user && ucontext->is_32b_cqe) ?
+			 FW_RI_RES_WR_IQESIZE_V(1) :
+			 FW_RI_RES_WR_IQESIZE_V(2)));
 	res->u.cq.iqsize = cpu_to_be16(cq->size);
 	res->u.cq.iqaddr = cpu_to_be64(cq->dma_addr);
 
@@ -166,7 +182,7 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
 	return ret;
 }
 
-static void insert_recv_cqe(struct t4_wq *wq, struct t4_cq *cq)
+static void insert_recv_cqe(struct t4_wq *wq, struct t4_cq *cq, u32 srqidx)
 {
 	struct t4_cqe cqe;
 
@@ -179,6 +195,8 @@ static void insert_recv_cqe(struct t4_wq *wq, struct t4_cq *cq)
 				 CQE_SWCQE_V(1) |
 				 CQE_QPID_V(wq->sq.qid));
 	cqe.bits_type_ts = cpu_to_be64(CQE_GENBIT_V((u64)cq->gen));
+	if (srqidx)
+		cqe.u.srcqe.abs_rqe_idx = cpu_to_be32(srqidx);
 	cq->sw_queue[cq->sw_pidx] = cqe;
 	t4_swcq_produce(cq);
 }
@@ -191,7 +209,7 @@ int c4iw_flush_rq(struct t4_wq *wq, struct t4_cq *cq, int count)
 	pr_debug("wq %p cq %p rq.in_use %u skip count %u\n",
 		 wq, cq, wq->rq.in_use, count);
 	while (in_use--) {
-		insert_recv_cqe(wq, cq);
+		insert_recv_cqe(wq, cq, 0);
 		flushed++;
 	}
 	return flushed;
@@ -442,6 +460,72 @@ void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count)
 	pr_debug("cq %p count %d\n", cq, *count);
 }
 
+static void post_pending_srq_wrs(struct t4_srq *srq)
+{
+	struct t4_srq_pending_wr *pwr;
+	u16 idx = 0;
+
+	while (srq->pending_in_use) {
+		pwr = &srq->pending_wrs[srq->pending_cidx];
+		srq->sw_rq[srq->pidx].wr_id = pwr->wr_id;
+		srq->sw_rq[srq->pidx].valid = 1;
+
+		pr_debug("%s posting pending cidx %u pidx %u wq_pidx %u in_use %u rq_size %u wr_id %llx\n",
+			 __func__,
+			 srq->cidx, srq->pidx, srq->wq_pidx,
+			 srq->in_use, srq->size,
+			 (unsigned long long)pwr->wr_id);
+
+		c4iw_copy_wr_to_srq(srq, &pwr->wqe, pwr->len16);
+		t4_srq_consume_pending_wr(srq);
+		t4_srq_produce(srq, pwr->len16);
+		idx += DIV_ROUND_UP(pwr->len16 * 16, T4_EQ_ENTRY_SIZE);
+	}
+
+	if (idx) {
+		t4_ring_srq_db(srq, idx, pwr->len16, &pwr->wqe);
+		srq->queue[srq->size].status.host_wq_pidx =
+			srq->wq_pidx;
+	}
+}
+
+static u64 reap_srq_cqe(struct t4_cqe *hw_cqe, struct t4_srq *srq)
+{
+	int rel_idx = CQE_ABS_RQE_IDX(hw_cqe) - srq->rqt_abs_idx;
+	u64 wr_id;
+
+	srq->sw_rq[rel_idx].valid = 0;
+	wr_id = srq->sw_rq[rel_idx].wr_id;
+
+	if (rel_idx == srq->cidx) {
+		pr_debug("%s in order cqe rel_idx %u cidx %u pidx %u wq_pidx %u in_use %u rq_size %u wr_id %llx\n",
+			 __func__, rel_idx, srq->cidx, srq->pidx,
+			 srq->wq_pidx, srq->in_use, srq->size,
+			 (unsigned long long)srq->sw_rq[rel_idx].wr_id);
+		t4_srq_consume(srq);
+		while (srq->ooo_count && !srq->sw_rq[srq->cidx].valid) {
+			pr_debug("%s eat ooo cidx %u pidx %u wq_pidx %u in_use %u rq_size %u ooo_count %u wr_id %llx\n",
+				 __func__, srq->cidx, srq->pidx,
+				 srq->wq_pidx, srq->in_use,
+				 srq->size, srq->ooo_count,
+				 (unsigned long long)
+				 srq->sw_rq[srq->cidx].wr_id);
+			t4_srq_consume_ooo(srq);
+		}
+		if (srq->ooo_count == 0 && srq->pending_in_use)
+			post_pending_srq_wrs(srq);
+	} else {
+		pr_debug("%s ooo cqe rel_idx %u cidx %u pidx %u wq_pidx %u in_use %u rq_size %u ooo_count %u wr_id %llx\n",
+			 __func__, rel_idx, srq->cidx,
+			 srq->pidx, srq->wq_pidx,
+			 srq->in_use, srq->size,
+			 srq->ooo_count,
+			 (unsigned long long)srq->sw_rq[rel_idx].wr_id);
+		t4_srq_produce_ooo(srq);
+	}
+	return wr_id;
+}
+
 /*
  * poll_cq
  *
@@ -459,7 +543,8 @@ void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count)
  *    -EOVERFLOW    CQ overflow detected.
  */
 static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe,
-		   u8 *cqe_flushed, u64 *cookie, u32 *credit)
+		   u8 *cqe_flushed, u64 *cookie, u32 *credit,
+		   struct t4_srq *srq)
 {
 	int ret = 0;
 	struct t4_cqe *hw_cqe, read_cqe;
@@ -524,7 +609,7 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe,
 		 */
 		if (CQE_TYPE(hw_cqe) == 1) {
 			if (CQE_STATUS(hw_cqe))
-				t4_set_wq_in_error(wq);
+				t4_set_wq_in_error(wq, 0);
 			ret = -EAGAIN;
 			goto skip_cqe;
 		}
@@ -535,7 +620,7 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe,
 		 */
 		if (CQE_WRID_STAG(hw_cqe) == 1) {
 			if (CQE_STATUS(hw_cqe))
-				t4_set_wq_in_error(wq);
+				t4_set_wq_in_error(wq, 0);
 			ret = -EAGAIN;
 			goto skip_cqe;
 		}
@@ -560,7 +645,7 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe,
 
 	if (CQE_STATUS(hw_cqe) || t4_wq_in_error(wq)) {
 		*cqe_flushed = (CQE_STATUS(hw_cqe) == T4_ERR_SWFLUSH);
-		t4_set_wq_in_error(wq);
+		t4_set_wq_in_error(wq, 0);
 	}
 
 	/*
@@ -574,15 +659,9 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe,
 		 * then we complete this with T4_ERR_MSN and mark the wq in
 		 * error.
 		 */
-
-		if (t4_rq_empty(wq)) {
-			t4_set_wq_in_error(wq);
-			ret = -EAGAIN;
-			goto skip_cqe;
-		}
 		if (unlikely(!CQE_STATUS(hw_cqe) &&
 			     CQE_WRID_MSN(hw_cqe) != wq->rq.msn)) {
-			t4_set_wq_in_error(wq);
+			t4_set_wq_in_error(wq, 0);
 			hw_cqe->header |= cpu_to_be32(CQE_STATUS_V(T4_ERR_MSN));
 		}
 		goto proc_cqe;
@@ -641,11 +720,16 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe,
 			c4iw_log_wr_stats(wq, hw_cqe);
 		t4_sq_consume(wq);
 	} else {
-		pr_debug("completing rq idx %u\n", wq->rq.cidx);
-		*cookie = wq->rq.sw_rq[wq->rq.cidx].wr_id;
-		if (c4iw_wr_log)
-			c4iw_log_wr_stats(wq, hw_cqe);
-		t4_rq_consume(wq);
+		if (!srq) {
+			pr_debug("completing rq idx %u\n", wq->rq.cidx);
+			*cookie = wq->rq.sw_rq[wq->rq.cidx].wr_id;
+			if (c4iw_wr_log)
+				c4iw_log_wr_stats(wq, hw_cqe);
+			t4_rq_consume(wq);
+		} else {
+			*cookie = reap_srq_cqe(hw_cqe, srq);
+		}
+		wq->rq.msn++;
 		goto skip_cqe;
 	}
 
@@ -668,46 +752,33 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe,
 	return ret;
 }
 
-/*
- * Get one cq entry from c4iw and map it to openib.
- *
- * Returns:
- *	0			cqe returned
- *	-ENODATA		EMPTY;
- *	-EAGAIN			caller must try again
- *	any other -errno	fatal error
- */
-static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc)
+static int __c4iw_poll_cq_one(struct c4iw_cq *chp, struct c4iw_qp *qhp,
+			      struct ib_wc *wc, struct c4iw_srq *srq)
 {
-	struct c4iw_qp *qhp = NULL;
-	struct t4_cqe uninitialized_var(cqe), *rd_cqe;
-	struct t4_wq *wq;
+	struct t4_cqe uninitialized_var(cqe);
+	struct t4_wq *wq = qhp ? &qhp->wq : NULL;
 	u32 credit = 0;
 	u8 cqe_flushed;
 	u64 cookie = 0;
 	int ret;
 
-	ret = t4_next_cqe(&chp->cq, &rd_cqe);
-
-	if (ret)
-		return ret;
-
-	qhp = get_qhp(chp->rhp, CQE_QPID(rd_cqe));
-	if (!qhp)
-		wq = NULL;
-	else {
-		spin_lock(&qhp->lock);
-		wq = &(qhp->wq);
-	}
-	ret = poll_cq(wq, &(chp->cq), &cqe, &cqe_flushed, &cookie, &credit);
+	ret = poll_cq(wq, &(chp->cq), &cqe, &cqe_flushed, &cookie, &credit,
+		      srq ? &srq->wq : NULL);
 	if (ret)
 		goto out;
 
 	wc->wr_id = cookie;
-	wc->qp = &qhp->ibqp;
+	wc->qp = qhp ? &qhp->ibqp : NULL;
 	wc->vendor_err = CQE_STATUS(&cqe);
 	wc->wc_flags = 0;
 
+	/*
+	 * Simulate a SRQ_LIMIT_REACHED HW notification if required.
+	 */
+	if (srq && !(srq->flags & T4_SRQ_LIMIT_SUPPORT) && srq->armed &&
+	    srq->wq.in_use < srq->srq_limit)
+		c4iw_dispatch_srq_limit_reached_event(srq);
+
 	pr_debug("qpid 0x%x type %d opcode %d status 0x%x len %u wrid hi 0x%x lo 0x%x cookie 0x%llx\n",
 		 CQE_QPID(&cqe),
 		 CQE_TYPE(&cqe), CQE_OPCODE(&cqe),
@@ -720,15 +791,32 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc)
 			wc->byte_len = CQE_LEN(&cqe);
 		else
 			wc->byte_len = 0;
-		wc->opcode = IB_WC_RECV;
-		if (CQE_OPCODE(&cqe) == FW_RI_SEND_WITH_INV ||
-		    CQE_OPCODE(&cqe) == FW_RI_SEND_WITH_SE_INV) {
+
+		switch (CQE_OPCODE(&cqe)) {
+		case FW_RI_SEND:
+			wc->opcode = IB_WC_RECV;
+			break;
+		case FW_RI_SEND_WITH_INV:
+		case FW_RI_SEND_WITH_SE_INV:
+			wc->opcode = IB_WC_RECV;
 			wc->ex.invalidate_rkey = CQE_WRID_STAG(&cqe);
 			wc->wc_flags |= IB_WC_WITH_INVALIDATE;
 			c4iw_invalidate_mr(qhp->rhp, wc->ex.invalidate_rkey);
+			break;
+		case FW_RI_WRITE_IMMEDIATE:
+			wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
+			wc->ex.imm_data = CQE_IMM_DATA(&cqe);
+			wc->wc_flags |= IB_WC_WITH_IMM;
+			break;
+		default:
+			pr_err("Unexpected opcode %d in the CQE received for QPID=0x%0x\n",
+			       CQE_OPCODE(&cqe), CQE_QPID(&cqe));
+			ret = -EINVAL;
+			goto out;
 		}
 	} else {
 		switch (CQE_OPCODE(&cqe)) {
+		case FW_RI_WRITE_IMMEDIATE:
 		case FW_RI_RDMA_WRITE:
 			wc->opcode = IB_WC_RDMA_WRITE;
 			break;
@@ -819,8 +907,43 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc)
 		}
 	}
 out:
-	if (wq)
+	return ret;
+}
+
+/*
+ * Get one cq entry from c4iw and map it to openib.
+ *
+ * Returns:
+ *	0			cqe returned
+ *	-ENODATA		EMPTY;
+ *	-EAGAIN			caller must try again
+ *	any other -errno	fatal error
+ */
+static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc)
+{
+	struct c4iw_srq *srq = NULL;
+	struct c4iw_qp *qhp = NULL;
+	struct t4_cqe *rd_cqe;
+	int ret;
+
+	ret = t4_next_cqe(&chp->cq, &rd_cqe);
+
+	if (ret)
+		return ret;
+
+	qhp = get_qhp(chp->rhp, CQE_QPID(rd_cqe));
+	if (qhp) {
+		spin_lock(&qhp->lock);
+		srq = qhp->srq;
+		if (srq)
+			spin_lock(&srq->lock);
+		ret = __c4iw_poll_cq_one(chp, qhp, wc, srq);
 		spin_unlock(&qhp->lock);
+		if (srq)
+			spin_unlock(&srq->lock);
+	} else {
+		ret = __c4iw_poll_cq_one(chp, NULL, wc, NULL);
+	}
 	return ret;
 }
 
@@ -876,6 +999,7 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev,
 	int vector = attr->comp_vector;
 	struct c4iw_dev *rhp;
 	struct c4iw_cq *chp;
+	struct c4iw_create_cq ucmd;
 	struct c4iw_create_cq_resp uresp;
 	struct c4iw_ucontext *ucontext = NULL;
 	int ret, wr_len;
@@ -891,9 +1015,16 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev,
 	if (vector >= rhp->rdev.lldi.nciq)
 		return ERR_PTR(-EINVAL);
 
+	if (ib_context) {
+		ucontext = to_c4iw_ucontext(ib_context);
+		if (udata->inlen < sizeof(ucmd))
+			ucontext->is_32b_cqe = 1;
+	}
+
 	chp = kzalloc(sizeof(*chp), GFP_KERNEL);
 	if (!chp)
 		return ERR_PTR(-ENOMEM);
+
 	chp->wr_waitp = c4iw_alloc_wr_wait(GFP_KERNEL);
 	if (!chp->wr_waitp) {
 		ret = -ENOMEM;
@@ -908,9 +1039,6 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev,
 		goto err_free_wr_wait;
 	}
 
-	if (ib_context)
-		ucontext = to_c4iw_ucontext(ib_context);
-
 	/* account for the status page. */
 	entries++;
 
@@ -934,13 +1062,15 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev,
 	if (hwentries < 64)
 		hwentries = 64;
 
-	memsize = hwentries * sizeof *chp->cq.queue;
+	memsize = hwentries * ((ucontext && ucontext->is_32b_cqe) ?
+			(sizeof(*chp->cq.queue) / 2) : sizeof(*chp->cq.queue));
 
 	/*
 	 * memsize must be a multiple of the page size if its a user cq.
 	 */
 	if (ucontext)
 		memsize = roundup(memsize, PAGE_SIZE);
+
 	chp->cq.size = hwentries;
 	chp->cq.memsize = memsize;
 	chp->cq.vector = vector;
@@ -971,6 +1101,7 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev,
 		if (!mm2)
 			goto err_free_mm;
 
+		memset(&uresp, 0, sizeof(uresp));
 		uresp.qid_mask = rhp->rdev.cqmask;
 		uresp.cqid = chp->cq.cqid;
 		uresp.size = chp->cq.size;
@@ -980,9 +1111,16 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev,
 		ucontext->key += PAGE_SIZE;
 		uresp.gts_key = ucontext->key;
 		ucontext->key += PAGE_SIZE;
+		/* communicate to the userspace that
+		 * kernel driver supports 64B CQE
+		 */
+		uresp.flags |= C4IW_64B_CQE;
+
 		spin_unlock(&ucontext->mmap_lock);
 		ret = ib_copy_to_udata(udata, &uresp,
-				       sizeof(uresp) - sizeof(uresp.reserved));
+				       ucontext->is_32b_cqe ?
+				       sizeof(uresp) - sizeof(uresp.flags) :
+				       sizeof(uresp));
 		if (ret)
 			goto err_free_mm2;
 
@@ -1019,11 +1157,6 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev,
 	return ERR_PTR(ret);
 }
 
-int c4iw_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata)
-{
-	return -ENOSYS;
-}
-
 int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
 {
 	struct c4iw_cq *chp;
@@ -1039,3 +1172,19 @@ int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
 	spin_unlock_irqrestore(&chp->lock, flag);
 	return ret;
 }
+
+void c4iw_flush_srqidx(struct c4iw_qp *qhp, u32 srqidx)
+{
+	struct c4iw_cq *rchp = to_c4iw_cq(qhp->ibqp.recv_cq);
+	unsigned long flag;
+
+	/* locking heirarchy: cq lock first, then qp lock. */
+	spin_lock_irqsave(&rchp->lock, flag);
+	spin_lock(&qhp->lock);
+
+	/* create a SRQ RECV CQE for srqidx */
+	insert_recv_cqe(&qhp->wq, &rchp->cq, srqidx);
+
+	spin_unlock(&qhp->lock);
+	spin_unlock_irqrestore(&rchp->lock, flag);
+}
diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c
index a3c3418..c13c0ba 100644
--- a/drivers/infiniband/hw/cxgb4/device.c
+++ b/drivers/infiniband/hw/cxgb4/device.c
@@ -275,10 +275,11 @@ static int dump_qp(int id, void *p, void *data)
 
 			set_ep_sin_addrs(ep, &lsin, &rsin, &m_lsin, &m_rsin);
 			cc = snprintf(qpd->buf + qpd->pos, space,
-				      "rc qp sq id %u rq id %u state %u "
+				      "rc qp sq id %u %s id %u state %u "
 				      "onchip %u ep tid %u state %u "
 				      "%pI4:%u/%u->%pI4:%u/%u\n",
-				      qp->wq.sq.qid, qp->wq.rq.qid,
+				      qp->wq.sq.qid, qp->srq ? "srq" : "rq",
+				      qp->srq ? qp->srq->idx : qp->wq.rq.qid,
 				      (int)qp->attr.state,
 				      qp->wq.sq.flags & T4_SQ_ONCHIP,
 				      ep->hwtid, (int)ep->com.state,
@@ -480,6 +481,9 @@ static int stats_show(struct seq_file *seq, void *v)
 	seq_printf(seq, "      QID: %10llu %10llu %10llu %10llu\n",
 			dev->rdev.stats.qid.total, dev->rdev.stats.qid.cur,
 			dev->rdev.stats.qid.max, dev->rdev.stats.qid.fail);
+	seq_printf(seq, "     SRQS: %10llu %10llu %10llu %10llu\n",
+		   dev->rdev.stats.srqt.total, dev->rdev.stats.srqt.cur,
+			dev->rdev.stats.srqt.max, dev->rdev.stats.srqt.fail);
 	seq_printf(seq, "   TPTMEM: %10llu %10llu %10llu %10llu\n",
 			dev->rdev.stats.stag.total, dev->rdev.stats.stag.cur,
 			dev->rdev.stats.stag.max, dev->rdev.stats.stag.fail);
@@ -530,6 +534,8 @@ static ssize_t stats_clear(struct file *file, const char __user *buf,
 	dev->rdev.stats.pbl.fail = 0;
 	dev->rdev.stats.rqt.max = 0;
 	dev->rdev.stats.rqt.fail = 0;
+	dev->rdev.stats.rqt.max = 0;
+	dev->rdev.stats.rqt.fail = 0;
 	dev->rdev.stats.ocqp.max = 0;
 	dev->rdev.stats.ocqp.fail = 0;
 	dev->rdev.stats.db_full = 0;
@@ -802,7 +808,7 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev)
 
 	rdev->qpmask = rdev->lldi.udb_density - 1;
 	rdev->cqmask = rdev->lldi.ucq_density - 1;
-	pr_debug("dev %s stag start 0x%0x size 0x%0x num stags %d pbl start 0x%0x size 0x%0x rq start 0x%0x size 0x%0x qp qid start %u size %u cq qid start %u size %u\n",
+	pr_debug("dev %s stag start 0x%0x size 0x%0x num stags %d pbl start 0x%0x size 0x%0x rq start 0x%0x size 0x%0x qp qid start %u size %u cq qid start %u size %u srq size %u\n",
 		 pci_name(rdev->lldi.pdev), rdev->lldi.vr->stag.start,
 		 rdev->lldi.vr->stag.size, c4iw_num_stags(rdev),
 		 rdev->lldi.vr->pbl.start,
@@ -811,7 +817,8 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev)
 		 rdev->lldi.vr->qp.start,
 		 rdev->lldi.vr->qp.size,
 		 rdev->lldi.vr->cq.start,
-		 rdev->lldi.vr->cq.size);
+		 rdev->lldi.vr->cq.size,
+		 rdev->lldi.vr->srq.size);
 	pr_debug("udb %pR db_reg %p gts_reg %p qpmask 0x%x cqmask 0x%x\n",
 		 &rdev->lldi.pdev->resource[2],
 		 rdev->lldi.db_reg, rdev->lldi.gts_reg,
@@ -824,10 +831,12 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev)
 	rdev->stats.stag.total = rdev->lldi.vr->stag.size;
 	rdev->stats.pbl.total = rdev->lldi.vr->pbl.size;
 	rdev->stats.rqt.total = rdev->lldi.vr->rq.size;
+	rdev->stats.srqt.total = rdev->lldi.vr->srq.size;
 	rdev->stats.ocqp.total = rdev->lldi.vr->ocq.size;
 	rdev->stats.qid.total = rdev->lldi.vr->qp.size;
 
-	err = c4iw_init_resource(rdev, c4iw_num_stags(rdev), T4_MAX_NUM_PD);
+	err = c4iw_init_resource(rdev, c4iw_num_stags(rdev),
+				 T4_MAX_NUM_PD, rdev->lldi.vr->srq.size);
 	if (err) {
 		pr_err("error %d initializing resources\n", err);
 		return err;
@@ -857,6 +866,7 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev)
 	rdev->status_page->qp_size = rdev->lldi.vr->qp.size;
 	rdev->status_page->cq_start = rdev->lldi.vr->cq.start;
 	rdev->status_page->cq_size = rdev->lldi.vr->cq.size;
+	rdev->status_page->write_cmpl_supported = rdev->lldi.write_cmpl_support;
 
 	if (c4iw_wr_log) {
 		rdev->wr_log = kcalloc(1 << c4iw_wr_log_size_order,
diff --git a/drivers/infiniband/hw/cxgb4/ev.c b/drivers/infiniband/hw/cxgb4/ev.c
index 3e9d8b2..8741d23 100644
--- a/drivers/infiniband/hw/cxgb4/ev.c
+++ b/drivers/infiniband/hw/cxgb4/ev.c
@@ -70,9 +70,10 @@ static void dump_err_cqe(struct c4iw_dev *dev, struct t4_cqe *err_cqe)
 		CQE_STATUS(err_cqe), CQE_TYPE(err_cqe), ntohl(err_cqe->len),
 		CQE_WRID_HI(err_cqe), CQE_WRID_LOW(err_cqe));
 
-	pr_debug("%016llx %016llx %016llx %016llx\n",
+	pr_debug("%016llx %016llx %016llx %016llx - %016llx %016llx %016llx %016llx\n",
 		 be64_to_cpu(p[0]), be64_to_cpu(p[1]), be64_to_cpu(p[2]),
-		 be64_to_cpu(p[3]));
+		 be64_to_cpu(p[3]), be64_to_cpu(p[4]), be64_to_cpu(p[5]),
+		 be64_to_cpu(p[6]), be64_to_cpu(p[7]));
 
 	/*
 	 * Ingress WRITE and READ_RESP errors provide
diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index 870649f..f0fcead 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -97,6 +97,7 @@ struct c4iw_resource {
 	struct c4iw_id_table tpt_table;
 	struct c4iw_id_table qid_table;
 	struct c4iw_id_table pdid_table;
+	struct c4iw_id_table srq_table;
 };
 
 struct c4iw_qid_list {
@@ -130,6 +131,8 @@ struct c4iw_stats {
 	struct c4iw_stat stag;
 	struct c4iw_stat pbl;
 	struct c4iw_stat rqt;
+	struct c4iw_stat srqt;
+	struct c4iw_stat srq;
 	struct c4iw_stat ocqp;
 	u64  db_full;
 	u64  db_empty;
@@ -549,6 +552,7 @@ struct c4iw_qp {
 	struct kref kref;
 	wait_queue_head_t wait;
 	int sq_sig_all;
+	struct c4iw_srq *srq;
 	struct work_struct free_work;
 	struct c4iw_ucontext *ucontext;
 	struct c4iw_wr_wait *wr_waitp;
@@ -559,6 +563,26 @@ static inline struct c4iw_qp *to_c4iw_qp(struct ib_qp *ibqp)
 	return container_of(ibqp, struct c4iw_qp, ibqp);
 }
 
+struct c4iw_srq {
+	struct ib_srq ibsrq;
+	struct list_head db_fc_entry;
+	struct c4iw_dev *rhp;
+	struct t4_srq wq;
+	struct sk_buff *destroy_skb;
+	u32 srq_limit;
+	u32 pdid;
+	int idx;
+	u32 flags;
+	spinlock_t lock; /* protects srq */
+	struct c4iw_wr_wait *wr_waitp;
+	bool armed;
+};
+
+static inline struct c4iw_srq *to_c4iw_srq(struct ib_srq *ibsrq)
+{
+	return container_of(ibsrq, struct c4iw_srq, ibsrq);
+}
+
 struct c4iw_ucontext {
 	struct ib_ucontext ibucontext;
 	struct c4iw_dev_ucontext uctx;
@@ -566,6 +590,7 @@ struct c4iw_ucontext {
 	spinlock_t mmap_lock;
 	struct list_head mmaps;
 	struct kref kref;
+	bool is_32b_cqe;
 };
 
 static inline struct c4iw_ucontext *to_c4iw_ucontext(struct ib_ucontext *c)
@@ -885,7 +910,10 @@ enum conn_pre_alloc_buffers {
 	CN_MAX_CON_BUF
 };
 
-#define FLOWC_LEN 80
+enum {
+	FLOWC_LEN = offsetof(struct fw_flowc_wr, mnemval[FW_FLOWC_MNEM_MAX])
+};
+
 union cpl_wr_size {
 	struct cpl_abort_req abrt_req;
 	struct cpl_abort_rpl abrt_rpl;
@@ -952,6 +980,7 @@ struct c4iw_ep {
 	unsigned int retry_count;
 	int snd_win;
 	int rcv_win;
+	u32 snd_wscale;
 	struct c4iw_ep_stats stats;
 };
 
@@ -988,7 +1017,8 @@ void c4iw_put_qpid(struct c4iw_rdev *rdev, u32 qpid,
 		   struct c4iw_dev_ucontext *uctx);
 u32 c4iw_get_resource(struct c4iw_id_table *id_table);
 void c4iw_put_resource(struct c4iw_id_table *id_table, u32 entry);
-int c4iw_init_resource(struct c4iw_rdev *rdev, u32 nr_tpt, u32 nr_pdid);
+int c4iw_init_resource(struct c4iw_rdev *rdev, u32 nr_tpt,
+		       u32 nr_pdid, u32 nr_srqt);
 int c4iw_init_ctrl_qp(struct c4iw_rdev *rdev);
 int c4iw_pblpool_create(struct c4iw_rdev *rdev);
 int c4iw_rqtpool_create(struct c4iw_rdev *rdev);
@@ -1007,10 +1037,10 @@ void c4iw_release_dev_ucontext(struct c4iw_rdev *rdev,
 void c4iw_init_dev_ucontext(struct c4iw_rdev *rdev,
 			    struct c4iw_dev_ucontext *uctx);
 int c4iw_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
-int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
-		      struct ib_send_wr **bad_wr);
-int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
-		      struct ib_recv_wr **bad_wr);
+int c4iw_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+		   const struct ib_send_wr **bad_wr);
+int c4iw_post_receive(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+		      const struct ib_recv_wr **bad_wr);
 int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param);
 int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog);
 int c4iw_destroy_listen(struct iw_cm_id *cm_id);
@@ -1037,8 +1067,14 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev,
 			     const struct ib_cq_init_attr *attr,
 			     struct ib_ucontext *ib_context,
 			     struct ib_udata *udata);
-int c4iw_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata);
 int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags);
+int c4iw_modify_srq(struct ib_srq *ib_srq, struct ib_srq_attr *attr,
+		    enum ib_srq_attr_mask srq_attr_mask,
+		    struct ib_udata *udata);
+int c4iw_destroy_srq(struct ib_srq *ib_srq);
+struct ib_srq *c4iw_create_srq(struct ib_pd *pd,
+			       struct ib_srq_init_attr *attrs,
+			       struct ib_udata *udata);
 int c4iw_destroy_qp(struct ib_qp *ib_qp);
 struct ib_qp *c4iw_create_qp(struct ib_pd *pd,
 			     struct ib_qp_init_attr *attrs,
@@ -1075,12 +1111,19 @@ extern c4iw_handler_func c4iw_handlers[NUM_CPL_CMDS];
 void __iomem *c4iw_bar2_addrs(struct c4iw_rdev *rdev, unsigned int qid,
 			      enum cxgb4_bar2_qtype qtype,
 			      unsigned int *pbar2_qid, u64 *pbar2_pa);
+int c4iw_alloc_srq_idx(struct c4iw_rdev *rdev);
+void c4iw_free_srq_idx(struct c4iw_rdev *rdev, int idx);
 extern void c4iw_log_wr_stats(struct t4_wq *wq, struct t4_cqe *cqe);
 extern int c4iw_wr_log;
 extern int db_fc_threshold;
 extern int db_coalescing_threshold;
 extern int use_dsgl;
 void c4iw_invalidate_mr(struct c4iw_dev *rhp, u32 rkey);
+void c4iw_dispatch_srq_limit_reached_event(struct c4iw_srq *srq);
+void c4iw_copy_wr_to_srq(struct t4_srq *srq, union t4_recv_wr *wqe, u8 len16);
+void c4iw_flush_srqidx(struct c4iw_qp *qhp, u32 srqidx);
+int c4iw_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
+		       const struct ib_recv_wr **bad_wr);
 struct c4iw_wr_wait *c4iw_alloc_wr_wait(gfp_t gfp);
 
 typedef int c4iw_restrack_func(struct sk_buff *msg,
diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c
index 1feade8..4eda687 100644
--- a/drivers/infiniband/hw/cxgb4/provider.c
+++ b/drivers/infiniband/hw/cxgb4/provider.c
@@ -58,41 +58,6 @@ static int fastreg_support = 1;
 module_param(fastreg_support, int, 0644);
 MODULE_PARM_DESC(fastreg_support, "Advertise fastreg support (default=1)");
 
-static struct ib_ah *c4iw_ah_create(struct ib_pd *pd,
-				    struct rdma_ah_attr *ah_attr,
-				    struct ib_udata *udata)
-
-{
-	return ERR_PTR(-ENOSYS);
-}
-
-static int c4iw_ah_destroy(struct ib_ah *ah)
-{
-	return -ENOSYS;
-}
-
-static int c4iw_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
-{
-	return -ENOSYS;
-}
-
-static int c4iw_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
-{
-	return -ENOSYS;
-}
-
-static int c4iw_process_mad(struct ib_device *ibdev, int mad_flags,
-			    u8 port_num, const struct ib_wc *in_wc,
-			    const struct ib_grh *in_grh,
-			    const struct ib_mad_hdr *in_mad,
-			    size_t in_mad_size,
-			    struct ib_mad_hdr *out_mad,
-			    size_t *out_mad_size,
-			    u16 *out_mad_pkey_index)
-{
-	return -ENOSYS;
-}
-
 void _c4iw_free_ucontext(struct kref *kref)
 {
 	struct c4iw_ucontext *ucontext;
@@ -342,8 +307,12 @@ static int c4iw_query_device(struct ib_device *ibdev, struct ib_device_attr *pro
 	props->vendor_part_id = (u32)dev->rdev.lldi.pdev->device;
 	props->max_mr_size = T4_MAX_MR_SIZE;
 	props->max_qp = dev->rdev.lldi.vr->qp.size / 2;
+	props->max_srq = dev->rdev.lldi.vr->srq.size;
 	props->max_qp_wr = dev->rdev.hw_queue.t4_max_qp_depth;
-	props->max_sge = T4_MAX_RECV_SGE;
+	props->max_srq_wr = dev->rdev.hw_queue.t4_max_qp_depth;
+	props->max_send_sge = min(T4_MAX_SEND_SGE, T4_MAX_WRITE_SGE);
+	props->max_recv_sge = T4_MAX_RECV_SGE;
+	props->max_srq_sge = T4_MAX_RECV_SGE;
 	props->max_sge_rd = 1;
 	props->max_res_rd_atom = dev->rdev.lldi.max_ird_adapter;
 	props->max_qp_rd_atom = min(dev->rdev.lldi.max_ordird_qp,
@@ -592,7 +561,10 @@ void c4iw_register_device(struct work_struct *work)
 	    (1ull << IB_USER_VERBS_CMD_POLL_CQ) |
 	    (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
 	    (1ull << IB_USER_VERBS_CMD_POST_SEND) |
-	    (1ull << IB_USER_VERBS_CMD_POST_RECV);
+	    (1ull << IB_USER_VERBS_CMD_POST_RECV) |
+	    (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
+	    (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
+	    (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ);
 	dev->ibdev.node_type = RDMA_NODE_RNIC;
 	BUILD_BUG_ON(sizeof(C4IW_NODE_DESC) > IB_DEVICE_NODE_DESC_MAX);
 	memcpy(dev->ibdev.node_desc, C4IW_NODE_DESC, sizeof(C4IW_NODE_DESC));
@@ -608,15 +580,15 @@ void c4iw_register_device(struct work_struct *work)
 	dev->ibdev.mmap = c4iw_mmap;
 	dev->ibdev.alloc_pd = c4iw_allocate_pd;
 	dev->ibdev.dealloc_pd = c4iw_deallocate_pd;
-	dev->ibdev.create_ah = c4iw_ah_create;
-	dev->ibdev.destroy_ah = c4iw_ah_destroy;
 	dev->ibdev.create_qp = c4iw_create_qp;
 	dev->ibdev.modify_qp = c4iw_ib_modify_qp;
 	dev->ibdev.query_qp = c4iw_ib_query_qp;
 	dev->ibdev.destroy_qp = c4iw_destroy_qp;
+	dev->ibdev.create_srq = c4iw_create_srq;
+	dev->ibdev.modify_srq = c4iw_modify_srq;
+	dev->ibdev.destroy_srq = c4iw_destroy_srq;
 	dev->ibdev.create_cq = c4iw_create_cq;
 	dev->ibdev.destroy_cq = c4iw_destroy_cq;
-	dev->ibdev.resize_cq = c4iw_resize_cq;
 	dev->ibdev.poll_cq = c4iw_poll_cq;
 	dev->ibdev.get_dma_mr = c4iw_get_dma_mr;
 	dev->ibdev.reg_user_mr = c4iw_reg_user_mr;
@@ -625,12 +597,10 @@ void c4iw_register_device(struct work_struct *work)
 	dev->ibdev.dealloc_mw = c4iw_dealloc_mw;
 	dev->ibdev.alloc_mr = c4iw_alloc_mr;
 	dev->ibdev.map_mr_sg = c4iw_map_mr_sg;
-	dev->ibdev.attach_mcast = c4iw_multicast_attach;
-	dev->ibdev.detach_mcast = c4iw_multicast_detach;
-	dev->ibdev.process_mad = c4iw_process_mad;
 	dev->ibdev.req_notify_cq = c4iw_arm_cq;
 	dev->ibdev.post_send = c4iw_post_send;
 	dev->ibdev.post_recv = c4iw_post_receive;
+	dev->ibdev.post_srq_recv = c4iw_post_srq_recv;
 	dev->ibdev.alloc_hw_stats = c4iw_alloc_stats;
 	dev->ibdev.get_hw_stats = c4iw_get_mib;
 	dev->ibdev.uverbs_abi_ver = C4IW_UVERBS_ABI_VERSION;
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index aef5330..b3203af 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -147,21 +147,24 @@ static int alloc_sq(struct c4iw_rdev *rdev, struct t4_sq *sq, int user)
 }
 
 static int destroy_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
-		      struct c4iw_dev_ucontext *uctx)
+		      struct c4iw_dev_ucontext *uctx, int has_rq)
 {
 	/*
 	 * uP clears EQ contexts when the connection exits rdma mode,
 	 * so no need to post a RESET WR for these EQs.
 	 */
-	dma_free_coherent(&(rdev->lldi.pdev->dev),
-			  wq->rq.memsize, wq->rq.queue,
-			  dma_unmap_addr(&wq->rq, mapping));
 	dealloc_sq(rdev, &wq->sq);
-	c4iw_rqtpool_free(rdev, wq->rq.rqt_hwaddr, wq->rq.rqt_size);
-	kfree(wq->rq.sw_rq);
 	kfree(wq->sq.sw_sq);
-	c4iw_put_qpid(rdev, wq->rq.qid, uctx);
 	c4iw_put_qpid(rdev, wq->sq.qid, uctx);
+
+	if (has_rq) {
+		dma_free_coherent(&rdev->lldi.pdev->dev,
+				  wq->rq.memsize, wq->rq.queue,
+				  dma_unmap_addr(&wq->rq, mapping));
+		c4iw_rqtpool_free(rdev, wq->rq.rqt_hwaddr, wq->rq.rqt_size);
+		kfree(wq->rq.sw_rq);
+		c4iw_put_qpid(rdev, wq->rq.qid, uctx);
+	}
 	return 0;
 }
 
@@ -195,7 +198,8 @@ void __iomem *c4iw_bar2_addrs(struct c4iw_rdev *rdev, unsigned int qid,
 static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
 		     struct t4_cq *rcq, struct t4_cq *scq,
 		     struct c4iw_dev_ucontext *uctx,
-		     struct c4iw_wr_wait *wr_waitp)
+		     struct c4iw_wr_wait *wr_waitp,
+		     int need_rq)
 {
 	int user = (uctx != &rdev->uctx);
 	struct fw_ri_res_wr *res_wr;
@@ -209,10 +213,12 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
 	if (!wq->sq.qid)
 		return -ENOMEM;
 
-	wq->rq.qid = c4iw_get_qpid(rdev, uctx);
-	if (!wq->rq.qid) {
-		ret = -ENOMEM;
-		goto free_sq_qid;
+	if (need_rq) {
+		wq->rq.qid = c4iw_get_qpid(rdev, uctx);
+		if (!wq->rq.qid) {
+			ret = -ENOMEM;
+			goto free_sq_qid;
+		}
 	}
 
 	if (!user) {
@@ -220,25 +226,31 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
 				       GFP_KERNEL);
 		if (!wq->sq.sw_sq) {
 			ret = -ENOMEM;
-			goto free_rq_qid;
+			goto free_rq_qid;//FIXME
 		}
 
-		wq->rq.sw_rq = kcalloc(wq->rq.size, sizeof(*wq->rq.sw_rq),
-				       GFP_KERNEL);
-		if (!wq->rq.sw_rq) {
-			ret = -ENOMEM;
-			goto free_sw_sq;
+		if (need_rq) {
+			wq->rq.sw_rq = kcalloc(wq->rq.size,
+					       sizeof(*wq->rq.sw_rq),
+					       GFP_KERNEL);
+			if (!wq->rq.sw_rq) {
+				ret = -ENOMEM;
+				goto free_sw_sq;
+			}
 		}
 	}
 
-	/*
-	 * RQT must be a power of 2 and at least 16 deep.
-	 */
-	wq->rq.rqt_size = roundup_pow_of_two(max_t(u16, wq->rq.size, 16));
-	wq->rq.rqt_hwaddr = c4iw_rqtpool_alloc(rdev, wq->rq.rqt_size);
-	if (!wq->rq.rqt_hwaddr) {
-		ret = -ENOMEM;
-		goto free_sw_rq;
+	if (need_rq) {
+		/*
+		 * RQT must be a power of 2 and at least 16 deep.
+		 */
+		wq->rq.rqt_size =
+			roundup_pow_of_two(max_t(u16, wq->rq.size, 16));
+		wq->rq.rqt_hwaddr = c4iw_rqtpool_alloc(rdev, wq->rq.rqt_size);
+		if (!wq->rq.rqt_hwaddr) {
+			ret = -ENOMEM;
+			goto free_sw_rq;
+		}
 	}
 
 	ret = alloc_sq(rdev, &wq->sq, user);
@@ -247,34 +259,39 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
 	memset(wq->sq.queue, 0, wq->sq.memsize);
 	dma_unmap_addr_set(&wq->sq, mapping, wq->sq.dma_addr);
 
-	wq->rq.queue = dma_alloc_coherent(&(rdev->lldi.pdev->dev),
-					  wq->rq.memsize, &(wq->rq.dma_addr),
-					  GFP_KERNEL);
-	if (!wq->rq.queue) {
-		ret = -ENOMEM;
-		goto free_sq;
+	if (need_rq) {
+		wq->rq.queue = dma_alloc_coherent(&rdev->lldi.pdev->dev,
+						  wq->rq.memsize,
+						  &wq->rq.dma_addr,
+						  GFP_KERNEL);
+		if (!wq->rq.queue) {
+			ret = -ENOMEM;
+			goto free_sq;
+		}
+		pr_debug("sq base va 0x%p pa 0x%llx rq base va 0x%p pa 0x%llx\n",
+			 wq->sq.queue,
+			 (unsigned long long)virt_to_phys(wq->sq.queue),
+			 wq->rq.queue,
+			 (unsigned long long)virt_to_phys(wq->rq.queue));
+		memset(wq->rq.queue, 0, wq->rq.memsize);
+		dma_unmap_addr_set(&wq->rq, mapping, wq->rq.dma_addr);
 	}
-	pr_debug("sq base va 0x%p pa 0x%llx rq base va 0x%p pa 0x%llx\n",
-		 wq->sq.queue,
-		 (unsigned long long)virt_to_phys(wq->sq.queue),
-		 wq->rq.queue,
-		 (unsigned long long)virt_to_phys(wq->rq.queue));
-	memset(wq->rq.queue, 0, wq->rq.memsize);
-	dma_unmap_addr_set(&wq->rq, mapping, wq->rq.dma_addr);
 
 	wq->db = rdev->lldi.db_reg;
 
 	wq->sq.bar2_va = c4iw_bar2_addrs(rdev, wq->sq.qid, T4_BAR2_QTYPE_EGRESS,
 					 &wq->sq.bar2_qid,
 					 user ? &wq->sq.bar2_pa : NULL);
-	wq->rq.bar2_va = c4iw_bar2_addrs(rdev, wq->rq.qid, T4_BAR2_QTYPE_EGRESS,
-					 &wq->rq.bar2_qid,
-					 user ? &wq->rq.bar2_pa : NULL);
+	if (need_rq)
+		wq->rq.bar2_va = c4iw_bar2_addrs(rdev, wq->rq.qid,
+						 T4_BAR2_QTYPE_EGRESS,
+						 &wq->rq.bar2_qid,
+						 user ? &wq->rq.bar2_pa : NULL);
 
 	/*
 	 * User mode must have bar2 access.
 	 */
-	if (user && (!wq->sq.bar2_pa || !wq->rq.bar2_pa)) {
+	if (user && (!wq->sq.bar2_pa || (need_rq && !wq->rq.bar2_pa))) {
 		pr_warn("%s: sqid %u or rqid %u not in BAR2 range\n",
 			pci_name(rdev->lldi.pdev), wq->sq.qid, wq->rq.qid);
 		goto free_dma;
@@ -285,7 +302,8 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
 
 	/* build fw_ri_res_wr */
 	wr_len = sizeof *res_wr + 2 * sizeof *res;
-
+	if (need_rq)
+		wr_len += sizeof(*res);
 	skb = alloc_skb(wr_len, GFP_KERNEL);
 	if (!skb) {
 		ret = -ENOMEM;
@@ -296,7 +314,7 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
 	res_wr = __skb_put_zero(skb, wr_len);
 	res_wr->op_nres = cpu_to_be32(
 			FW_WR_OP_V(FW_RI_RES_WR) |
-			FW_RI_RES_WR_NRES_V(2) |
+			FW_RI_RES_WR_NRES_V(need_rq ? 2 : 1) |
 			FW_WR_COMPL_F);
 	res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16));
 	res_wr->cookie = (uintptr_t)wr_waitp;
@@ -327,30 +345,36 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
 		FW_RI_RES_WR_EQSIZE_V(eqsize));
 	res->u.sqrq.eqid = cpu_to_be32(wq->sq.qid);
 	res->u.sqrq.eqaddr = cpu_to_be64(wq->sq.dma_addr);
-	res++;
-	res->u.sqrq.restype = FW_RI_RES_TYPE_RQ;
-	res->u.sqrq.op = FW_RI_RES_OP_WRITE;
 
-	/*
-	 * eqsize is the number of 64B entries plus the status page size.
-	 */
-	eqsize = wq->rq.size * T4_RQ_NUM_SLOTS +
-		rdev->hw_queue.t4_eq_status_entries;
-	res->u.sqrq.fetchszm_to_iqid = cpu_to_be32(
-		FW_RI_RES_WR_HOSTFCMODE_V(0) |	/* no host cidx updates */
-		FW_RI_RES_WR_CPRIO_V(0) |	/* don't keep in chip cache */
-		FW_RI_RES_WR_PCIECHN_V(0) |	/* set by uP at ri_init time */
-		FW_RI_RES_WR_IQID_V(rcq->cqid));
-	res->u.sqrq.dcaen_to_eqsize = cpu_to_be32(
-		FW_RI_RES_WR_DCAEN_V(0) |
-		FW_RI_RES_WR_DCACPU_V(0) |
-		FW_RI_RES_WR_FBMIN_V(2) |
-		FW_RI_RES_WR_FBMAX_V(3) |
-		FW_RI_RES_WR_CIDXFTHRESHO_V(0) |
-		FW_RI_RES_WR_CIDXFTHRESH_V(0) |
-		FW_RI_RES_WR_EQSIZE_V(eqsize));
-	res->u.sqrq.eqid = cpu_to_be32(wq->rq.qid);
-	res->u.sqrq.eqaddr = cpu_to_be64(wq->rq.dma_addr);
+	if (need_rq) {
+		res++;
+		res->u.sqrq.restype = FW_RI_RES_TYPE_RQ;
+		res->u.sqrq.op = FW_RI_RES_OP_WRITE;
+
+		/*
+		 * eqsize is the number of 64B entries plus the status page size
+		 */
+		eqsize = wq->rq.size * T4_RQ_NUM_SLOTS +
+			rdev->hw_queue.t4_eq_status_entries;
+		res->u.sqrq.fetchszm_to_iqid =
+			/* no host cidx updates */
+			cpu_to_be32(FW_RI_RES_WR_HOSTFCMODE_V(0) |
+			/* don't keep in chip cache */
+			FW_RI_RES_WR_CPRIO_V(0) |
+			/* set by uP at ri_init time */
+			FW_RI_RES_WR_PCIECHN_V(0) |
+			FW_RI_RES_WR_IQID_V(rcq->cqid));
+		res->u.sqrq.dcaen_to_eqsize =
+			cpu_to_be32(FW_RI_RES_WR_DCAEN_V(0) |
+			FW_RI_RES_WR_DCACPU_V(0) |
+			FW_RI_RES_WR_FBMIN_V(2) |
+			FW_RI_RES_WR_FBMAX_V(3) |
+			FW_RI_RES_WR_CIDXFTHRESHO_V(0) |
+			FW_RI_RES_WR_CIDXFTHRESH_V(0) |
+			FW_RI_RES_WR_EQSIZE_V(eqsize));
+		res->u.sqrq.eqid = cpu_to_be32(wq->rq.qid);
+		res->u.sqrq.eqaddr = cpu_to_be64(wq->rq.dma_addr);
+	}
 
 	c4iw_init_wr_wait(wr_waitp);
 	ret = c4iw_ref_send_wait(rdev, skb, wr_waitp, 0, wq->sq.qid, __func__);
@@ -363,26 +387,30 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
 
 	return 0;
 free_dma:
-	dma_free_coherent(&(rdev->lldi.pdev->dev),
-			  wq->rq.memsize, wq->rq.queue,
-			  dma_unmap_addr(&wq->rq, mapping));
+	if (need_rq)
+		dma_free_coherent(&rdev->lldi.pdev->dev,
+				  wq->rq.memsize, wq->rq.queue,
+				  dma_unmap_addr(&wq->rq, mapping));
 free_sq:
 	dealloc_sq(rdev, &wq->sq);
 free_hwaddr:
-	c4iw_rqtpool_free(rdev, wq->rq.rqt_hwaddr, wq->rq.rqt_size);
+	if (need_rq)
+		c4iw_rqtpool_free(rdev, wq->rq.rqt_hwaddr, wq->rq.rqt_size);
 free_sw_rq:
-	kfree(wq->rq.sw_rq);
+	if (need_rq)
+		kfree(wq->rq.sw_rq);
 free_sw_sq:
 	kfree(wq->sq.sw_sq);
 free_rq_qid:
-	c4iw_put_qpid(rdev, wq->rq.qid, uctx);
+	if (need_rq)
+		c4iw_put_qpid(rdev, wq->rq.qid, uctx);
 free_sq_qid:
 	c4iw_put_qpid(rdev, wq->sq.qid, uctx);
 	return ret;
 }
 
 static int build_immd(struct t4_sq *sq, struct fw_ri_immd *immdp,
-		      struct ib_send_wr *wr, int max, u32 *plenp)
+		      const struct ib_send_wr *wr, int max, u32 *plenp)
 {
 	u8 *dstp, *srcp;
 	u32 plen = 0;
@@ -427,7 +455,12 @@ static int build_isgl(__be64 *queue_start, __be64 *queue_end,
 {
 	int i;
 	u32 plen = 0;
-	__be64 *flitp = (__be64 *)isglp->sge;
+	__be64 *flitp;
+
+	if ((__be64 *)isglp == queue_end)
+		isglp = (struct fw_ri_isgl *)queue_start;
+
+	flitp = (__be64 *)isglp->sge;
 
 	for (i = 0; i < num_sge; i++) {
 		if ((plen + sg_list[i].length) < plen)
@@ -452,7 +485,7 @@ static int build_isgl(__be64 *queue_start, __be64 *queue_end,
 }
 
 static int build_rdma_send(struct t4_sq *sq, union t4_wr *wqe,
-			   struct ib_send_wr *wr, u8 *len16)
+			   const struct ib_send_wr *wr, u8 *len16)
 {
 	u32 plen;
 	int size;
@@ -519,7 +552,7 @@ static int build_rdma_send(struct t4_sq *sq, union t4_wr *wqe,
 }
 
 static int build_rdma_write(struct t4_sq *sq, union t4_wr *wqe,
-			    struct ib_send_wr *wr, u8 *len16)
+			    const struct ib_send_wr *wr, u8 *len16)
 {
 	u32 plen;
 	int size;
@@ -527,7 +560,15 @@ static int build_rdma_write(struct t4_sq *sq, union t4_wr *wqe,
 
 	if (wr->num_sge > T4_MAX_SEND_SGE)
 		return -EINVAL;
-	wqe->write.r2 = 0;
+
+	/*
+	 * iWARP protocol supports 64 bit immediate data but rdma api
+	 * limits it to 32bit.
+	 */
+	if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM)
+		wqe->write.iw_imm_data.ib_imm_data.imm_data32 = wr->ex.imm_data;
+	else
+		wqe->write.iw_imm_data.ib_imm_data.imm_data32 = 0;
 	wqe->write.stag_sink = cpu_to_be32(rdma_wr(wr)->rkey);
 	wqe->write.to_sink = cpu_to_be64(rdma_wr(wr)->remote_addr);
 	if (wr->num_sge) {
@@ -561,7 +602,58 @@ static int build_rdma_write(struct t4_sq *sq, union t4_wr *wqe,
 	return 0;
 }
 
-static int build_rdma_read(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16)
+static void build_immd_cmpl(struct t4_sq *sq, struct fw_ri_immd_cmpl *immdp,
+			    struct ib_send_wr *wr)
+{
+	memcpy((u8 *)immdp->data, (u8 *)(uintptr_t)wr->sg_list->addr, 16);
+	memset(immdp->r1, 0, 6);
+	immdp->op = FW_RI_DATA_IMMD;
+	immdp->immdlen = 16;
+}
+
+static void build_rdma_write_cmpl(struct t4_sq *sq,
+				  struct fw_ri_rdma_write_cmpl_wr *wcwr,
+				  const struct ib_send_wr *wr, u8 *len16)
+{
+	u32 plen;
+	int size;
+
+	/*
+	 * This code assumes the struct fields preceding the write isgl
+	 * fit in one 64B WR slot.  This is because the WQE is built
+	 * directly in the dma queue, and wrapping is only handled
+	 * by the code buildling sgls.  IE the "fixed part" of the wr
+	 * structs must all fit in 64B.  The WQE build code should probably be
+	 * redesigned to avoid this restriction, but for now just add
+	 * the BUILD_BUG_ON() to catch if this WQE struct gets too big.
+	 */
+	BUILD_BUG_ON(offsetof(struct fw_ri_rdma_write_cmpl_wr, u) > 64);
+
+	wcwr->stag_sink = cpu_to_be32(rdma_wr(wr)->rkey);
+	wcwr->to_sink = cpu_to_be64(rdma_wr(wr)->remote_addr);
+	wcwr->stag_inv = cpu_to_be32(wr->next->ex.invalidate_rkey);
+	wcwr->r2 = 0;
+	wcwr->r3 = 0;
+
+	/* SEND_INV SGL */
+	if (wr->next->send_flags & IB_SEND_INLINE)
+		build_immd_cmpl(sq, &wcwr->u_cmpl.immd_src, wr->next);
+	else
+		build_isgl((__be64 *)sq->queue, (__be64 *)&sq->queue[sq->size],
+			   &wcwr->u_cmpl.isgl_src, wr->next->sg_list, 1, NULL);
+
+	/* WRITE SGL */
+	build_isgl((__be64 *)sq->queue, (__be64 *)&sq->queue[sq->size],
+		   wcwr->u.isgl_src, wr->sg_list, wr->num_sge, &plen);
+
+	size = sizeof(*wcwr) + sizeof(struct fw_ri_isgl) +
+		wr->num_sge * sizeof(struct fw_ri_sge);
+	wcwr->plen = cpu_to_be32(plen);
+	*len16 = DIV_ROUND_UP(size, 16);
+}
+
+static int build_rdma_read(union t4_wr *wqe, const struct ib_send_wr *wr,
+			   u8 *len16)
 {
 	if (wr->num_sge > 1)
 		return -EINVAL;
@@ -590,8 +682,74 @@ static int build_rdma_read(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16)
 	return 0;
 }
 
+static void post_write_cmpl(struct c4iw_qp *qhp, const struct ib_send_wr *wr)
+{
+	bool send_signaled = (wr->next->send_flags & IB_SEND_SIGNALED) ||
+			     qhp->sq_sig_all;
+	bool write_signaled = (wr->send_flags & IB_SEND_SIGNALED) ||
+			      qhp->sq_sig_all;
+	struct t4_swsqe *swsqe;
+	union t4_wr *wqe;
+	u16 write_wrid;
+	u8 len16;
+	u16 idx;
+
+	/*
+	 * The sw_sq entries still look like a WRITE and a SEND and consume
+	 * 2 slots. The FW WR, however, will be a single uber-WR.
+	 */
+	wqe = (union t4_wr *)((u8 *)qhp->wq.sq.queue +
+	       qhp->wq.sq.wq_pidx * T4_EQ_ENTRY_SIZE);
+	build_rdma_write_cmpl(&qhp->wq.sq, &wqe->write_cmpl, wr, &len16);
+
+	/* WRITE swsqe */
+	swsqe = &qhp->wq.sq.sw_sq[qhp->wq.sq.pidx];
+	swsqe->opcode = FW_RI_RDMA_WRITE;
+	swsqe->idx = qhp->wq.sq.pidx;
+	swsqe->complete = 0;
+	swsqe->signaled = write_signaled;
+	swsqe->flushed = 0;
+	swsqe->wr_id = wr->wr_id;
+	if (c4iw_wr_log) {
+		swsqe->sge_ts =
+			cxgb4_read_sge_timestamp(qhp->rhp->rdev.lldi.ports[0]);
+		swsqe->host_time = ktime_get();
+	}
+
+	write_wrid = qhp->wq.sq.pidx;
+
+	/* just bump the sw_sq */
+	qhp->wq.sq.in_use++;
+	if (++qhp->wq.sq.pidx == qhp->wq.sq.size)
+		qhp->wq.sq.pidx = 0;
+
+	/* SEND_WITH_INV swsqe */
+	swsqe = &qhp->wq.sq.sw_sq[qhp->wq.sq.pidx];
+	swsqe->opcode = FW_RI_SEND_WITH_INV;
+	swsqe->idx = qhp->wq.sq.pidx;
+	swsqe->complete = 0;
+	swsqe->signaled = send_signaled;
+	swsqe->flushed = 0;
+	swsqe->wr_id = wr->next->wr_id;
+	if (c4iw_wr_log) {
+		swsqe->sge_ts =
+			cxgb4_read_sge_timestamp(qhp->rhp->rdev.lldi.ports[0]);
+		swsqe->host_time = ktime_get();
+	}
+
+	wqe->write_cmpl.flags_send = send_signaled ? FW_RI_COMPLETION_FLAG : 0;
+	wqe->write_cmpl.wrid_send = qhp->wq.sq.pidx;
+
+	init_wr_hdr(wqe, write_wrid, FW_RI_RDMA_WRITE_CMPL_WR,
+		    write_signaled ? FW_RI_COMPLETION_FLAG : 0, len16);
+	t4_sq_produce(&qhp->wq, len16);
+	idx = DIV_ROUND_UP(len16 * 16, T4_EQ_ENTRY_SIZE);
+
+	t4_ring_sq_db(&qhp->wq, idx, wqe);
+}
+
 static int build_rdma_recv(struct c4iw_qp *qhp, union t4_recv_wr *wqe,
-			   struct ib_recv_wr *wr, u8 *len16)
+			   const struct ib_recv_wr *wr, u8 *len16)
 {
 	int ret;
 
@@ -605,8 +763,22 @@ static int build_rdma_recv(struct c4iw_qp *qhp, union t4_recv_wr *wqe,
 	return 0;
 }
 
+static int build_srq_recv(union t4_recv_wr *wqe, const struct ib_recv_wr *wr,
+			  u8 *len16)
+{
+	int ret;
+
+	ret = build_isgl((__be64 *)wqe, (__be64 *)(wqe + 1),
+			 &wqe->recv.isgl, wr->sg_list, wr->num_sge, NULL);
+	if (ret)
+		return ret;
+	*len16 = DIV_ROUND_UP(sizeof(wqe->recv) +
+			      wr->num_sge * sizeof(struct fw_ri_sge), 16);
+	return 0;
+}
+
 static void build_tpte_memreg(struct fw_ri_fr_nsmr_tpte_wr *fr,
-			      struct ib_reg_wr *wr, struct c4iw_mr *mhp,
+			      const struct ib_reg_wr *wr, struct c4iw_mr *mhp,
 			      u8 *len16)
 {
 	__be64 *p = (__be64 *)fr->pbl;
@@ -638,8 +810,8 @@ static void build_tpte_memreg(struct fw_ri_fr_nsmr_tpte_wr *fr,
 }
 
 static int build_memreg(struct t4_sq *sq, union t4_wr *wqe,
-			struct ib_reg_wr *wr, struct c4iw_mr *mhp, u8 *len16,
-			bool dsgl_supported)
+			const struct ib_reg_wr *wr, struct c4iw_mr *mhp,
+			u8 *len16, bool dsgl_supported)
 {
 	struct fw_ri_immd *imdp;
 	__be64 *p;
@@ -701,7 +873,8 @@ static int build_memreg(struct t4_sq *sq, union t4_wr *wqe,
 	return 0;
 }
 
-static int build_inv_stag(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16)
+static int build_inv_stag(union t4_wr *wqe, const struct ib_send_wr *wr,
+			  u8 *len16)
 {
 	wqe->inv.stag_inv = cpu_to_be32(wr->ex.invalidate_rkey);
 	wqe->inv.r2 = 0;
@@ -721,7 +894,7 @@ static void free_qp_work(struct work_struct *work)
 
 	pr_debug("qhp %p ucontext %p\n", qhp, ucontext);
 	destroy_qp(&rhp->rdev, &qhp->wq,
-		   ucontext ? &ucontext->uctx : &rhp->rdev.uctx);
+		   ucontext ? &ucontext->uctx : &rhp->rdev.uctx, !qhp->srq);
 
 	if (ucontext)
 		c4iw_put_ucontext(ucontext);
@@ -804,6 +977,9 @@ static int ib_to_fw_opcode(int ib_opcode)
 	case IB_WR_RDMA_WRITE:
 		opcode = FW_RI_RDMA_WRITE;
 		break;
+	case IB_WR_RDMA_WRITE_WITH_IMM:
+		opcode = FW_RI_WRITE_IMMEDIATE;
+		break;
 	case IB_WR_RDMA_READ:
 	case IB_WR_RDMA_READ_WITH_INV:
 		opcode = FW_RI_READ_REQ;
@@ -820,7 +996,8 @@ static int ib_to_fw_opcode(int ib_opcode)
 	return opcode;
 }
 
-static int complete_sq_drain_wr(struct c4iw_qp *qhp, struct ib_send_wr *wr)
+static int complete_sq_drain_wr(struct c4iw_qp *qhp,
+				const struct ib_send_wr *wr)
 {
 	struct t4_cqe cqe = {};
 	struct c4iw_cq *schp;
@@ -858,8 +1035,9 @@ static int complete_sq_drain_wr(struct c4iw_qp *qhp, struct ib_send_wr *wr)
 	return 0;
 }
 
-static int complete_sq_drain_wrs(struct c4iw_qp *qhp, struct ib_send_wr *wr,
-				struct ib_send_wr **bad_wr)
+static int complete_sq_drain_wrs(struct c4iw_qp *qhp,
+				 const struct ib_send_wr *wr,
+				 const struct ib_send_wr **bad_wr)
 {
 	int ret = 0;
 
@@ -874,7 +1052,8 @@ static int complete_sq_drain_wrs(struct c4iw_qp *qhp, struct ib_send_wr *wr,
 	return ret;
 }
 
-static void complete_rq_drain_wr(struct c4iw_qp *qhp, struct ib_recv_wr *wr)
+static void complete_rq_drain_wr(struct c4iw_qp *qhp,
+				 const struct ib_recv_wr *wr)
 {
 	struct t4_cqe cqe = {};
 	struct c4iw_cq *rchp;
@@ -906,7 +1085,8 @@ static void complete_rq_drain_wr(struct c4iw_qp *qhp, struct ib_recv_wr *wr)
 	}
 }
 
-static void complete_rq_drain_wrs(struct c4iw_qp *qhp, struct ib_recv_wr *wr)
+static void complete_rq_drain_wrs(struct c4iw_qp *qhp,
+				  const struct ib_recv_wr *wr)
 {
 	while (wr) {
 		complete_rq_drain_wr(qhp, wr);
@@ -914,14 +1094,15 @@ static void complete_rq_drain_wrs(struct c4iw_qp *qhp, struct ib_recv_wr *wr)
 	}
 }
 
-int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
-		   struct ib_send_wr **bad_wr)
+int c4iw_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+		   const struct ib_send_wr **bad_wr)
 {
 	int err = 0;
 	u8 len16 = 0;
 	enum fw_wr_opcodes fw_opcode = 0;
 	enum fw_ri_wr_flags fw_flags;
 	struct c4iw_qp *qhp;
+	struct c4iw_dev *rhp;
 	union t4_wr *wqe = NULL;
 	u32 num_wrs;
 	struct t4_swsqe *swsqe;
@@ -929,6 +1110,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 	u16 idx = 0;
 
 	qhp = to_c4iw_qp(ibqp);
+	rhp = qhp->rhp;
 	spin_lock_irqsave(&qhp->lock, flag);
 
 	/*
@@ -946,6 +1128,30 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 		*bad_wr = wr;
 		return -ENOMEM;
 	}
+
+	/*
+	 * Fastpath for NVMe-oF target WRITE + SEND_WITH_INV wr chain which is
+	 * the response for small NVMEe-oF READ requests.  If the chain is
+	 * exactly a WRITE->SEND_WITH_INV and the sgl depths and lengths
+	 * meet the requirements of the fw_ri_write_cmpl_wr work request,
+	 * then build and post the write_cmpl WR.  If any of the tests
+	 * below are not true, then we continue on with the tradtional WRITE
+	 * and SEND WRs.
+	 */
+	if (qhp->rhp->rdev.lldi.write_cmpl_support &&
+	    CHELSIO_CHIP_VERSION(qhp->rhp->rdev.lldi.adapter_type) >=
+	    CHELSIO_T5 &&
+	    wr && wr->next && !wr->next->next &&
+	    wr->opcode == IB_WR_RDMA_WRITE &&
+	    wr->sg_list[0].length && wr->num_sge <= T4_WRITE_CMPL_MAX_SGL &&
+	    wr->next->opcode == IB_WR_SEND_WITH_INV &&
+	    wr->next->sg_list[0].length == T4_WRITE_CMPL_MAX_CQE &&
+	    wr->next->num_sge == 1 && num_wrs >= 2) {
+		post_write_cmpl(qhp, wr);
+		spin_unlock_irqrestore(&qhp->lock, flag);
+		return 0;
+	}
+
 	while (wr) {
 		if (num_wrs == 0) {
 			err = -ENOMEM;
@@ -973,6 +1179,13 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 				swsqe->opcode = FW_RI_SEND_WITH_INV;
 			err = build_rdma_send(&qhp->wq.sq, wqe, wr, &len16);
 			break;
+		case IB_WR_RDMA_WRITE_WITH_IMM:
+			if (unlikely(!rhp->rdev.lldi.write_w_imm_support)) {
+				err = -EINVAL;
+				break;
+			}
+			fw_flags |= FW_RI_RDMA_WRITE_WITH_IMMEDIATE;
+			/*FALLTHROUGH*/
 		case IB_WR_RDMA_WRITE:
 			fw_opcode = FW_RI_RDMA_WRITE_WR;
 			swsqe->opcode = FW_RI_RDMA_WRITE;
@@ -983,8 +1196,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 			fw_opcode = FW_RI_RDMA_READ_WR;
 			swsqe->opcode = FW_RI_READ_REQ;
 			if (wr->opcode == IB_WR_RDMA_READ_WITH_INV) {
-				c4iw_invalidate_mr(qhp->rhp,
-						   wr->sg_list[0].lkey);
+				c4iw_invalidate_mr(rhp, wr->sg_list[0].lkey);
 				fw_flags = FW_RI_RDMA_READ_INVALIDATE;
 			} else {
 				fw_flags = 0;
@@ -1000,7 +1212,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 			struct c4iw_mr *mhp = to_c4iw_mr(reg_wr(wr)->mr);
 
 			swsqe->opcode = FW_RI_FAST_REGISTER;
-			if (qhp->rhp->rdev.lldi.fr_nsmr_tpte_wr_support &&
+			if (rhp->rdev.lldi.fr_nsmr_tpte_wr_support &&
 			    !mhp->attr.state && mhp->mpl_len <= 2) {
 				fw_opcode = FW_RI_FR_NSMR_TPTE_WR;
 				build_tpte_memreg(&wqe->fr_tpte, reg_wr(wr),
@@ -1009,7 +1221,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 				fw_opcode = FW_RI_FR_NSMR_WR;
 				err = build_memreg(&qhp->wq.sq, wqe, reg_wr(wr),
 				       mhp, &len16,
-				       qhp->rhp->rdev.lldi.ulptx_memwrite_dsgl);
+				       rhp->rdev.lldi.ulptx_memwrite_dsgl);
 				if (err)
 					break;
 			}
@@ -1022,7 +1234,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 			fw_opcode = FW_RI_INV_LSTAG_WR;
 			swsqe->opcode = FW_RI_LOCAL_INV;
 			err = build_inv_stag(wqe, wr, &len16);
-			c4iw_invalidate_mr(qhp->rhp, wr->ex.invalidate_rkey);
+			c4iw_invalidate_mr(rhp, wr->ex.invalidate_rkey);
 			break;
 		default:
 			pr_warn("%s post of type=%d TBD!\n", __func__,
@@ -1041,7 +1253,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 		swsqe->wr_id = wr->wr_id;
 		if (c4iw_wr_log) {
 			swsqe->sge_ts = cxgb4_read_sge_timestamp(
-					qhp->rhp->rdev.lldi.ports[0]);
+					rhp->rdev.lldi.ports[0]);
 			swsqe->host_time = ktime_get();
 		}
 
@@ -1055,7 +1267,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 		t4_sq_produce(&qhp->wq, len16);
 		idx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE);
 	}
-	if (!qhp->rhp->rdev.status_page->db_off) {
+	if (!rhp->rdev.status_page->db_off) {
 		t4_ring_sq_db(&qhp->wq, idx, wqe);
 		spin_unlock_irqrestore(&qhp->lock, flag);
 	} else {
@@ -1065,8 +1277,8 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 	return err;
 }
 
-int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
-		      struct ib_recv_wr **bad_wr)
+int c4iw_post_receive(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+		      const struct ib_recv_wr **bad_wr)
 {
 	int err = 0;
 	struct c4iw_qp *qhp;
@@ -1145,6 +1357,89 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
 	return err;
 }
 
+static void defer_srq_wr(struct t4_srq *srq, union t4_recv_wr *wqe,
+			 u64 wr_id, u8 len16)
+{
+	struct t4_srq_pending_wr *pwr = &srq->pending_wrs[srq->pending_pidx];
+
+	pr_debug("%s cidx %u pidx %u wq_pidx %u in_use %u ooo_count %u wr_id 0x%llx pending_cidx %u pending_pidx %u pending_in_use %u\n",
+		 __func__, srq->cidx, srq->pidx, srq->wq_pidx,
+		 srq->in_use, srq->ooo_count,
+		 (unsigned long long)wr_id, srq->pending_cidx,
+		 srq->pending_pidx, srq->pending_in_use);
+	pwr->wr_id = wr_id;
+	pwr->len16 = len16;
+	memcpy(&pwr->wqe, wqe, len16 * 16);
+	t4_srq_produce_pending_wr(srq);
+}
+
+int c4iw_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
+		       const struct ib_recv_wr **bad_wr)
+{
+	union t4_recv_wr *wqe, lwqe;
+	struct c4iw_srq *srq;
+	unsigned long flag;
+	u8 len16 = 0;
+	u16 idx = 0;
+	int err = 0;
+	u32 num_wrs;
+
+	srq = to_c4iw_srq(ibsrq);
+	spin_lock_irqsave(&srq->lock, flag);
+	num_wrs = t4_srq_avail(&srq->wq);
+	if (num_wrs == 0) {
+		spin_unlock_irqrestore(&srq->lock, flag);
+		return -ENOMEM;
+	}
+	while (wr) {
+		if (wr->num_sge > T4_MAX_RECV_SGE) {
+			err = -EINVAL;
+			*bad_wr = wr;
+			break;
+		}
+		wqe = &lwqe;
+		if (num_wrs)
+			err = build_srq_recv(wqe, wr, &len16);
+		else
+			err = -ENOMEM;
+		if (err) {
+			*bad_wr = wr;
+			break;
+		}
+
+		wqe->recv.opcode = FW_RI_RECV_WR;
+		wqe->recv.r1 = 0;
+		wqe->recv.wrid = srq->wq.pidx;
+		wqe->recv.r2[0] = 0;
+		wqe->recv.r2[1] = 0;
+		wqe->recv.r2[2] = 0;
+		wqe->recv.len16 = len16;
+
+		if (srq->wq.ooo_count ||
+		    srq->wq.pending_in_use ||
+		    srq->wq.sw_rq[srq->wq.pidx].valid) {
+			defer_srq_wr(&srq->wq, wqe, wr->wr_id, len16);
+		} else {
+			srq->wq.sw_rq[srq->wq.pidx].wr_id = wr->wr_id;
+			srq->wq.sw_rq[srq->wq.pidx].valid = 1;
+			c4iw_copy_wr_to_srq(&srq->wq, wqe, len16);
+			pr_debug("%s cidx %u pidx %u wq_pidx %u in_use %u wr_id 0x%llx\n",
+				 __func__, srq->wq.cidx,
+				 srq->wq.pidx, srq->wq.wq_pidx,
+				 srq->wq.in_use,
+				 (unsigned long long)wr->wr_id);
+			t4_srq_produce(&srq->wq, len16);
+			idx += DIV_ROUND_UP(len16 * 16, T4_EQ_ENTRY_SIZE);
+		}
+		wr = wr->next;
+		num_wrs--;
+	}
+	if (idx)
+		t4_ring_srq_db(&srq->wq, idx, len16, wqe);
+	spin_unlock_irqrestore(&srq->lock, flag);
+	return err;
+}
+
 static inline void build_term_codes(struct t4_cqe *err_cqe, u8 *layer_type,
 				    u8 *ecode)
 {
@@ -1321,7 +1616,7 @@ static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp,
 		       struct c4iw_cq *schp)
 {
 	int count;
-	int rq_flushed, sq_flushed;
+	int rq_flushed = 0, sq_flushed;
 	unsigned long flag;
 
 	pr_debug("qhp %p rchp %p schp %p\n", qhp, rchp, schp);
@@ -1340,11 +1635,13 @@ static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp,
 		return;
 	}
 	qhp->wq.flushed = 1;
-	t4_set_wq_in_error(&qhp->wq);
+	t4_set_wq_in_error(&qhp->wq, 0);
 
 	c4iw_flush_hw_cq(rchp, qhp);
-	c4iw_count_rcqes(&rchp->cq, &qhp->wq, &count);
-	rq_flushed = c4iw_flush_rq(&qhp->wq, &rchp->cq, count);
+	if (!qhp->srq) {
+		c4iw_count_rcqes(&rchp->cq, &qhp->wq, &count);
+		rq_flushed = c4iw_flush_rq(&qhp->wq, &rchp->cq, count);
+	}
 
 	if (schp != rchp)
 		c4iw_flush_hw_cq(schp, qhp);
@@ -1388,7 +1685,7 @@ static void flush_qp(struct c4iw_qp *qhp)
 	schp = to_c4iw_cq(qhp->ibqp.send_cq);
 
 	if (qhp->ibqp.uobject) {
-		t4_set_wq_in_error(&qhp->wq);
+		t4_set_wq_in_error(&qhp->wq, 0);
 		t4_set_cq_in_error(&rchp->cq);
 		spin_lock_irqsave(&rchp->comp_handler_lock, flag);
 		(*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context);
@@ -1517,16 +1814,21 @@ static int rdma_init(struct c4iw_dev *rhp, struct c4iw_qp *qhp)
 	wqe->u.init.pdid = cpu_to_be32(qhp->attr.pd);
 	wqe->u.init.qpid = cpu_to_be32(qhp->wq.sq.qid);
 	wqe->u.init.sq_eqid = cpu_to_be32(qhp->wq.sq.qid);
-	wqe->u.init.rq_eqid = cpu_to_be32(qhp->wq.rq.qid);
+	if (qhp->srq) {
+		wqe->u.init.rq_eqid = cpu_to_be32(FW_RI_INIT_RQEQID_SRQ |
+						  qhp->srq->idx);
+	} else {
+		wqe->u.init.rq_eqid = cpu_to_be32(qhp->wq.rq.qid);
+		wqe->u.init.hwrqsize = cpu_to_be32(qhp->wq.rq.rqt_size);
+		wqe->u.init.hwrqaddr = cpu_to_be32(qhp->wq.rq.rqt_hwaddr -
+						   rhp->rdev.lldi.vr->rq.start);
+	}
 	wqe->u.init.scqid = cpu_to_be32(qhp->attr.scq);
 	wqe->u.init.rcqid = cpu_to_be32(qhp->attr.rcq);
 	wqe->u.init.ord_max = cpu_to_be32(qhp->attr.max_ord);
 	wqe->u.init.ird_max = cpu_to_be32(qhp->attr.max_ird);
 	wqe->u.init.iss = cpu_to_be32(qhp->ep->snd_seq);
 	wqe->u.init.irs = cpu_to_be32(qhp->ep->rcv_seq);
-	wqe->u.init.hwrqsize = cpu_to_be32(qhp->wq.rq.rqt_size);
-	wqe->u.init.hwrqaddr = cpu_to_be32(qhp->wq.rq.rqt_hwaddr -
-					 rhp->rdev.lldi.vr->rq.start);
 	if (qhp->attr.mpa_attr.initiator)
 		build_rtr_msg(qhp->attr.mpa_attr.p2p_type, &wqe->u.init);
 
@@ -1643,7 +1945,7 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
 	case C4IW_QP_STATE_RTS:
 		switch (attrs->next_state) {
 		case C4IW_QP_STATE_CLOSING:
-			t4_set_wq_in_error(&qhp->wq);
+			t4_set_wq_in_error(&qhp->wq, 0);
 			set_state(qhp, C4IW_QP_STATE_CLOSING);
 			ep = qhp->ep;
 			if (!internal) {
@@ -1656,7 +1958,7 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
 				goto err;
 			break;
 		case C4IW_QP_STATE_TERMINATE:
-			t4_set_wq_in_error(&qhp->wq);
+			t4_set_wq_in_error(&qhp->wq, 0);
 			set_state(qhp, C4IW_QP_STATE_TERMINATE);
 			qhp->attr.layer_etype = attrs->layer_etype;
 			qhp->attr.ecode = attrs->ecode;
@@ -1673,7 +1975,7 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
 			}
 			break;
 		case C4IW_QP_STATE_ERROR:
-			t4_set_wq_in_error(&qhp->wq);
+			t4_set_wq_in_error(&qhp->wq, 0);
 			set_state(qhp, C4IW_QP_STATE_ERROR);
 			if (!internal) {
 				abort = 1;
@@ -1819,7 +2121,7 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
 	struct c4iw_cq *schp;
 	struct c4iw_cq *rchp;
 	struct c4iw_create_qp_resp uresp;
-	unsigned int sqsize, rqsize;
+	unsigned int sqsize, rqsize = 0;
 	struct c4iw_ucontext *ucontext;
 	int ret;
 	struct c4iw_mm_entry *sq_key_mm, *rq_key_mm = NULL, *sq_db_key_mm;
@@ -1840,11 +2142,13 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
 	if (attrs->cap.max_inline_data > T4_MAX_SEND_INLINE)
 		return ERR_PTR(-EINVAL);
 
-	if (attrs->cap.max_recv_wr > rhp->rdev.hw_queue.t4_max_rq_size)
-		return ERR_PTR(-E2BIG);
-	rqsize = attrs->cap.max_recv_wr + 1;
-	if (rqsize < 8)
-		rqsize = 8;
+	if (!attrs->srq) {
+		if (attrs->cap.max_recv_wr > rhp->rdev.hw_queue.t4_max_rq_size)
+			return ERR_PTR(-E2BIG);
+		rqsize = attrs->cap.max_recv_wr + 1;
+		if (rqsize < 8)
+			rqsize = 8;
+	}
 
 	if (attrs->cap.max_send_wr > rhp->rdev.hw_queue.t4_max_sq_size)
 		return ERR_PTR(-E2BIG);
@@ -1869,19 +2173,23 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
 		(sqsize + rhp->rdev.hw_queue.t4_eq_status_entries) *
 		sizeof(*qhp->wq.sq.queue) + 16 * sizeof(__be64);
 	qhp->wq.sq.flush_cidx = -1;
-	qhp->wq.rq.size = rqsize;
-	qhp->wq.rq.memsize =
-		(rqsize + rhp->rdev.hw_queue.t4_eq_status_entries) *
-		sizeof(*qhp->wq.rq.queue);
+	if (!attrs->srq) {
+		qhp->wq.rq.size = rqsize;
+		qhp->wq.rq.memsize =
+			(rqsize + rhp->rdev.hw_queue.t4_eq_status_entries) *
+			sizeof(*qhp->wq.rq.queue);
+	}
 
 	if (ucontext) {
 		qhp->wq.sq.memsize = roundup(qhp->wq.sq.memsize, PAGE_SIZE);
-		qhp->wq.rq.memsize = roundup(qhp->wq.rq.memsize, PAGE_SIZE);
+		if (!attrs->srq)
+			qhp->wq.rq.memsize =
+				roundup(qhp->wq.rq.memsize, PAGE_SIZE);
 	}
 
 	ret = create_qp(&rhp->rdev, &qhp->wq, &schp->cq, &rchp->cq,
 			ucontext ? &ucontext->uctx : &rhp->rdev.uctx,
-			qhp->wr_waitp);
+			qhp->wr_waitp, !attrs->srq);
 	if (ret)
 		goto err_free_wr_wait;
 
@@ -1894,10 +2202,12 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
 	qhp->attr.scq = ((struct c4iw_cq *) attrs->send_cq)->cq.cqid;
 	qhp->attr.rcq = ((struct c4iw_cq *) attrs->recv_cq)->cq.cqid;
 	qhp->attr.sq_num_entries = attrs->cap.max_send_wr;
-	qhp->attr.rq_num_entries = attrs->cap.max_recv_wr;
 	qhp->attr.sq_max_sges = attrs->cap.max_send_sge;
 	qhp->attr.sq_max_sges_rdma_write = attrs->cap.max_send_sge;
-	qhp->attr.rq_max_sges = attrs->cap.max_recv_sge;
+	if (!attrs->srq) {
+		qhp->attr.rq_num_entries = attrs->cap.max_recv_wr;
+		qhp->attr.rq_max_sges = attrs->cap.max_recv_sge;
+	}
 	qhp->attr.state = C4IW_QP_STATE_IDLE;
 	qhp->attr.next_state = C4IW_QP_STATE_IDLE;
 	qhp->attr.enable_rdma_read = 1;
@@ -1922,21 +2232,27 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
 			ret = -ENOMEM;
 			goto err_remove_handle;
 		}
-		rq_key_mm = kmalloc(sizeof(*rq_key_mm), GFP_KERNEL);
-		if (!rq_key_mm) {
-			ret = -ENOMEM;
-			goto err_free_sq_key;
+		if (!attrs->srq) {
+			rq_key_mm = kmalloc(sizeof(*rq_key_mm), GFP_KERNEL);
+			if (!rq_key_mm) {
+				ret = -ENOMEM;
+				goto err_free_sq_key;
+			}
 		}
 		sq_db_key_mm = kmalloc(sizeof(*sq_db_key_mm), GFP_KERNEL);
 		if (!sq_db_key_mm) {
 			ret = -ENOMEM;
 			goto err_free_rq_key;
 		}
-		rq_db_key_mm = kmalloc(sizeof(*rq_db_key_mm), GFP_KERNEL);
-		if (!rq_db_key_mm) {
-			ret = -ENOMEM;
-			goto err_free_sq_db_key;
+		if (!attrs->srq) {
+			rq_db_key_mm =
+				kmalloc(sizeof(*rq_db_key_mm), GFP_KERNEL);
+			if (!rq_db_key_mm) {
+				ret = -ENOMEM;
+				goto err_free_sq_db_key;
+			}
 		}
+		memset(&uresp, 0, sizeof(uresp));
 		if (t4_sq_onchip(&qhp->wq.sq)) {
 			ma_sync_key_mm = kmalloc(sizeof(*ma_sync_key_mm),
 						 GFP_KERNEL);
@@ -1945,30 +2261,35 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
 				goto err_free_rq_db_key;
 			}
 			uresp.flags = C4IW_QPF_ONCHIP;
-		} else
-			uresp.flags = 0;
+		}
+		if (rhp->rdev.lldi.write_w_imm_support)
+			uresp.flags |= C4IW_QPF_WRITE_W_IMM;
 		uresp.qid_mask = rhp->rdev.qpmask;
 		uresp.sqid = qhp->wq.sq.qid;
 		uresp.sq_size = qhp->wq.sq.size;
 		uresp.sq_memsize = qhp->wq.sq.memsize;
-		uresp.rqid = qhp->wq.rq.qid;
-		uresp.rq_size = qhp->wq.rq.size;
-		uresp.rq_memsize = qhp->wq.rq.memsize;
+		if (!attrs->srq) {
+			uresp.rqid = qhp->wq.rq.qid;
+			uresp.rq_size = qhp->wq.rq.size;
+			uresp.rq_memsize = qhp->wq.rq.memsize;
+		}
 		spin_lock(&ucontext->mmap_lock);
 		if (ma_sync_key_mm) {
 			uresp.ma_sync_key = ucontext->key;
 			ucontext->key += PAGE_SIZE;
-		} else {
-			uresp.ma_sync_key =  0;
 		}
 		uresp.sq_key = ucontext->key;
 		ucontext->key += PAGE_SIZE;
-		uresp.rq_key = ucontext->key;
-		ucontext->key += PAGE_SIZE;
+		if (!attrs->srq) {
+			uresp.rq_key = ucontext->key;
+			ucontext->key += PAGE_SIZE;
+		}
 		uresp.sq_db_gts_key = ucontext->key;
 		ucontext->key += PAGE_SIZE;
-		uresp.rq_db_gts_key = ucontext->key;
-		ucontext->key += PAGE_SIZE;
+		if (!attrs->srq) {
+			uresp.rq_db_gts_key = ucontext->key;
+			ucontext->key += PAGE_SIZE;
+		}
 		spin_unlock(&ucontext->mmap_lock);
 		ret = ib_copy_to_udata(udata, &uresp, sizeof uresp);
 		if (ret)
@@ -1977,18 +2298,23 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
 		sq_key_mm->addr = qhp->wq.sq.phys_addr;
 		sq_key_mm->len = PAGE_ALIGN(qhp->wq.sq.memsize);
 		insert_mmap(ucontext, sq_key_mm);
-		rq_key_mm->key = uresp.rq_key;
-		rq_key_mm->addr = virt_to_phys(qhp->wq.rq.queue);
-		rq_key_mm->len = PAGE_ALIGN(qhp->wq.rq.memsize);
-		insert_mmap(ucontext, rq_key_mm);
+		if (!attrs->srq) {
+			rq_key_mm->key = uresp.rq_key;
+			rq_key_mm->addr = virt_to_phys(qhp->wq.rq.queue);
+			rq_key_mm->len = PAGE_ALIGN(qhp->wq.rq.memsize);
+			insert_mmap(ucontext, rq_key_mm);
+		}
 		sq_db_key_mm->key = uresp.sq_db_gts_key;
 		sq_db_key_mm->addr = (u64)(unsigned long)qhp->wq.sq.bar2_pa;
 		sq_db_key_mm->len = PAGE_SIZE;
 		insert_mmap(ucontext, sq_db_key_mm);
-		rq_db_key_mm->key = uresp.rq_db_gts_key;
-		rq_db_key_mm->addr = (u64)(unsigned long)qhp->wq.rq.bar2_pa;
-		rq_db_key_mm->len = PAGE_SIZE;
-		insert_mmap(ucontext, rq_db_key_mm);
+		if (!attrs->srq) {
+			rq_db_key_mm->key = uresp.rq_db_gts_key;
+			rq_db_key_mm->addr =
+				(u64)(unsigned long)qhp->wq.rq.bar2_pa;
+			rq_db_key_mm->len = PAGE_SIZE;
+			insert_mmap(ucontext, rq_db_key_mm);
+		}
 		if (ma_sync_key_mm) {
 			ma_sync_key_mm->key = uresp.ma_sync_key;
 			ma_sync_key_mm->addr =
@@ -2001,7 +2327,19 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
 		c4iw_get_ucontext(ucontext);
 		qhp->ucontext = ucontext;
 	}
+	if (!attrs->srq) {
+		qhp->wq.qp_errp =
+			&qhp->wq.rq.queue[qhp->wq.rq.size].status.qp_err;
+	} else {
+		qhp->wq.qp_errp =
+			&qhp->wq.sq.queue[qhp->wq.sq.size].status.qp_err;
+		qhp->wq.srqidxp =
+			&qhp->wq.sq.queue[qhp->wq.sq.size].status.srqidx;
+	}
+
 	qhp->ibqp.qp_num = qhp->wq.sq.qid;
+	if (attrs->srq)
+		qhp->srq = to_c4iw_srq(attrs->srq);
 	INIT_LIST_HEAD(&qhp->db_fc_entry);
 	pr_debug("sq id %u size %u memsize %zu num_entries %u rq id %u size %u memsize %zu num_entries %u\n",
 		 qhp->wq.sq.qid, qhp->wq.sq.size, qhp->wq.sq.memsize,
@@ -2011,18 +2349,20 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
 err_free_ma_sync_key:
 	kfree(ma_sync_key_mm);
 err_free_rq_db_key:
-	kfree(rq_db_key_mm);
+	if (!attrs->srq)
+		kfree(rq_db_key_mm);
 err_free_sq_db_key:
 	kfree(sq_db_key_mm);
 err_free_rq_key:
-	kfree(rq_key_mm);
+	if (!attrs->srq)
+		kfree(rq_key_mm);
 err_free_sq_key:
 	kfree(sq_key_mm);
 err_remove_handle:
 	remove_handle(rhp, &rhp->qpidr, qhp->wq.sq.qid);
 err_destroy_qp:
 	destroy_qp(&rhp->rdev, &qhp->wq,
-		   ucontext ? &ucontext->uctx : &rhp->rdev.uctx);
+		   ucontext ? &ucontext->uctx : &rhp->rdev.uctx, !attrs->srq);
 err_free_wr_wait:
 	c4iw_put_wr_wait(qhp->wr_waitp);
 err_free_qhp:
@@ -2088,6 +2428,45 @@ struct ib_qp *c4iw_get_qp(struct ib_device *dev, int qpn)
 	return (struct ib_qp *)get_qhp(to_c4iw_dev(dev), qpn);
 }
 
+void c4iw_dispatch_srq_limit_reached_event(struct c4iw_srq *srq)
+{
+	struct ib_event event = {};
+
+	event.device = &srq->rhp->ibdev;
+	event.element.srq = &srq->ibsrq;
+	event.event = IB_EVENT_SRQ_LIMIT_REACHED;
+	ib_dispatch_event(&event);
+}
+
+int c4iw_modify_srq(struct ib_srq *ib_srq, struct ib_srq_attr *attr,
+		    enum ib_srq_attr_mask srq_attr_mask,
+		    struct ib_udata *udata)
+{
+	struct c4iw_srq *srq = to_c4iw_srq(ib_srq);
+	int ret = 0;
+
+	/*
+	 * XXX 0 mask == a SW interrupt for srq_limit reached...
+	 */
+	if (udata && !srq_attr_mask) {
+		c4iw_dispatch_srq_limit_reached_event(srq);
+		goto out;
+	}
+
+	/* no support for this yet */
+	if (srq_attr_mask & IB_SRQ_MAX_WR) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	if (!udata && (srq_attr_mask & IB_SRQ_LIMIT)) {
+		srq->armed = true;
+		srq->srq_limit = attr->srq_limit;
+	}
+out:
+	return ret;
+}
+
 int c4iw_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 		     int attr_mask, struct ib_qp_init_attr *init_attr)
 {
@@ -2104,3 +2483,359 @@ int c4iw_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 	init_attr->sq_sig_type = qhp->sq_sig_all ? IB_SIGNAL_ALL_WR : 0;
 	return 0;
 }
+
+static void free_srq_queue(struct c4iw_srq *srq, struct c4iw_dev_ucontext *uctx,
+			   struct c4iw_wr_wait *wr_waitp)
+{
+	struct c4iw_rdev *rdev = &srq->rhp->rdev;
+	struct sk_buff *skb = srq->destroy_skb;
+	struct t4_srq *wq = &srq->wq;
+	struct fw_ri_res_wr *res_wr;
+	struct fw_ri_res *res;
+	int wr_len;
+
+	wr_len = sizeof(*res_wr) + sizeof(*res);
+	set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0);
+
+	res_wr = (struct fw_ri_res_wr *)__skb_put(skb, wr_len);
+	memset(res_wr, 0, wr_len);
+	res_wr->op_nres = cpu_to_be32(FW_WR_OP_V(FW_RI_RES_WR) |
+			FW_RI_RES_WR_NRES_V(1) |
+			FW_WR_COMPL_F);
+	res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16));
+	res_wr->cookie = (uintptr_t)wr_waitp;
+	res = res_wr->res;
+	res->u.srq.restype = FW_RI_RES_TYPE_SRQ;
+	res->u.srq.op = FW_RI_RES_OP_RESET;
+	res->u.srq.srqid = cpu_to_be32(srq->idx);
+	res->u.srq.eqid = cpu_to_be32(wq->qid);
+
+	c4iw_init_wr_wait(wr_waitp);
+	c4iw_ref_send_wait(rdev, skb, wr_waitp, 0, 0, __func__);
+
+	dma_free_coherent(&rdev->lldi.pdev->dev,
+			  wq->memsize, wq->queue,
+			pci_unmap_addr(wq, mapping));
+	c4iw_rqtpool_free(rdev, wq->rqt_hwaddr, wq->rqt_size);
+	kfree(wq->sw_rq);
+	c4iw_put_qpid(rdev, wq->qid, uctx);
+}
+
+static int alloc_srq_queue(struct c4iw_srq *srq, struct c4iw_dev_ucontext *uctx,
+			   struct c4iw_wr_wait *wr_waitp)
+{
+	struct c4iw_rdev *rdev = &srq->rhp->rdev;
+	int user = (uctx != &rdev->uctx);
+	struct t4_srq *wq = &srq->wq;
+	struct fw_ri_res_wr *res_wr;
+	struct fw_ri_res *res;
+	struct sk_buff *skb;
+	int wr_len;
+	int eqsize;
+	int ret = -ENOMEM;
+
+	wq->qid = c4iw_get_qpid(rdev, uctx);
+	if (!wq->qid)
+		goto err;
+
+	if (!user) {
+		wq->sw_rq = kcalloc(wq->size, sizeof(*wq->sw_rq),
+				    GFP_KERNEL);
+		if (!wq->sw_rq)
+			goto err_put_qpid;
+		wq->pending_wrs = kcalloc(srq->wq.size,
+					  sizeof(*srq->wq.pending_wrs),
+					  GFP_KERNEL);
+		if (!wq->pending_wrs)
+			goto err_free_sw_rq;
+	}
+
+	wq->rqt_size = wq->size;
+	wq->rqt_hwaddr = c4iw_rqtpool_alloc(rdev, wq->rqt_size);
+	if (!wq->rqt_hwaddr)
+		goto err_free_pending_wrs;
+	wq->rqt_abs_idx = (wq->rqt_hwaddr - rdev->lldi.vr->rq.start) >>
+		T4_RQT_ENTRY_SHIFT;
+
+	wq->queue = dma_alloc_coherent(&rdev->lldi.pdev->dev,
+				       wq->memsize, &wq->dma_addr,
+			GFP_KERNEL);
+	if (!wq->queue)
+		goto err_free_rqtpool;
+
+	memset(wq->queue, 0, wq->memsize);
+	pci_unmap_addr_set(wq, mapping, wq->dma_addr);
+
+	wq->bar2_va = c4iw_bar2_addrs(rdev, wq->qid, T4_BAR2_QTYPE_EGRESS,
+				      &wq->bar2_qid,
+			user ? &wq->bar2_pa : NULL);
+
+	/*
+	 * User mode must have bar2 access.
+	 */
+
+	if (user && !wq->bar2_va) {
+		pr_warn(MOD "%s: srqid %u not in BAR2 range.\n",
+			pci_name(rdev->lldi.pdev), wq->qid);
+		ret = -EINVAL;
+		goto err_free_queue;
+	}
+
+	/* build fw_ri_res_wr */
+	wr_len = sizeof(*res_wr) + sizeof(*res);
+
+	skb = alloc_skb(wr_len, GFP_KERNEL | __GFP_NOFAIL);
+	if (!skb)
+		goto err_free_queue;
+	set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0);
+
+	res_wr = (struct fw_ri_res_wr *)__skb_put(skb, wr_len);
+	memset(res_wr, 0, wr_len);
+	res_wr->op_nres = cpu_to_be32(FW_WR_OP_V(FW_RI_RES_WR) |
+			FW_RI_RES_WR_NRES_V(1) |
+			FW_WR_COMPL_F);
+	res_wr->len16_pkd = cpu_to_be32(DIV_ROUND_UP(wr_len, 16));
+	res_wr->cookie = (uintptr_t)wr_waitp;
+	res = res_wr->res;
+	res->u.srq.restype = FW_RI_RES_TYPE_SRQ;
+	res->u.srq.op = FW_RI_RES_OP_WRITE;
+
+	/*
+	 * eqsize is the number of 64B entries plus the status page size.
+	 */
+	eqsize = wq->size * T4_RQ_NUM_SLOTS +
+		rdev->hw_queue.t4_eq_status_entries;
+	res->u.srq.eqid = cpu_to_be32(wq->qid);
+	res->u.srq.fetchszm_to_iqid =
+						/* no host cidx updates */
+		cpu_to_be32(FW_RI_RES_WR_HOSTFCMODE_V(0) |
+		FW_RI_RES_WR_CPRIO_V(0) |       /* don't keep in chip cache */
+		FW_RI_RES_WR_PCIECHN_V(0) |     /* set by uP at ri_init time */
+		FW_RI_RES_WR_FETCHRO_V(0));     /* relaxed_ordering */
+	res->u.srq.dcaen_to_eqsize =
+		cpu_to_be32(FW_RI_RES_WR_DCAEN_V(0) |
+		FW_RI_RES_WR_DCACPU_V(0) |
+		FW_RI_RES_WR_FBMIN_V(2) |
+		FW_RI_RES_WR_FBMAX_V(3) |
+		FW_RI_RES_WR_CIDXFTHRESHO_V(0) |
+		FW_RI_RES_WR_CIDXFTHRESH_V(0) |
+		FW_RI_RES_WR_EQSIZE_V(eqsize));
+	res->u.srq.eqaddr = cpu_to_be64(wq->dma_addr);
+	res->u.srq.srqid = cpu_to_be32(srq->idx);
+	res->u.srq.pdid = cpu_to_be32(srq->pdid);
+	res->u.srq.hwsrqsize = cpu_to_be32(wq->rqt_size);
+	res->u.srq.hwsrqaddr = cpu_to_be32(wq->rqt_hwaddr -
+			rdev->lldi.vr->rq.start);
+
+	c4iw_init_wr_wait(wr_waitp);
+
+	ret = c4iw_ref_send_wait(rdev, skb, wr_waitp, 0, wq->qid, __func__);
+	if (ret)
+		goto err_free_queue;
+
+	pr_debug("%s srq %u eqid %u pdid %u queue va %p pa 0x%llx\n"
+			" bar2_addr %p rqt addr 0x%x size %d\n",
+			__func__, srq->idx, wq->qid, srq->pdid, wq->queue,
+			(u64)virt_to_phys(wq->queue), wq->bar2_va,
+			wq->rqt_hwaddr, wq->rqt_size);
+
+	return 0;
+err_free_queue:
+	dma_free_coherent(&rdev->lldi.pdev->dev,
+			  wq->memsize, wq->queue,
+			pci_unmap_addr(wq, mapping));
+err_free_rqtpool:
+	c4iw_rqtpool_free(rdev, wq->rqt_hwaddr, wq->rqt_size);
+err_free_pending_wrs:
+	if (!user)
+		kfree(wq->pending_wrs);
+err_free_sw_rq:
+	if (!user)
+		kfree(wq->sw_rq);
+err_put_qpid:
+	c4iw_put_qpid(rdev, wq->qid, uctx);
+err:
+	return ret;
+}
+
+void c4iw_copy_wr_to_srq(struct t4_srq *srq, union t4_recv_wr *wqe, u8 len16)
+{
+	u64 *src, *dst;
+
+	src = (u64 *)wqe;
+	dst = (u64 *)((u8 *)srq->queue + srq->wq_pidx * T4_EQ_ENTRY_SIZE);
+	while (len16) {
+		*dst++ = *src++;
+		if (dst >= (u64 *)&srq->queue[srq->size])
+			dst = (u64 *)srq->queue;
+		*dst++ = *src++;
+		if (dst >= (u64 *)&srq->queue[srq->size])
+			dst = (u64 *)srq->queue;
+		len16--;
+	}
+}
+
+struct ib_srq *c4iw_create_srq(struct ib_pd *pd, struct ib_srq_init_attr *attrs,
+			       struct ib_udata *udata)
+{
+	struct c4iw_dev *rhp;
+	struct c4iw_srq *srq;
+	struct c4iw_pd *php;
+	struct c4iw_create_srq_resp uresp;
+	struct c4iw_ucontext *ucontext;
+	struct c4iw_mm_entry *srq_key_mm, *srq_db_key_mm;
+	int rqsize;
+	int ret;
+	int wr_len;
+
+	pr_debug("%s ib_pd %p\n", __func__, pd);
+
+	php = to_c4iw_pd(pd);
+	rhp = php->rhp;
+
+	if (!rhp->rdev.lldi.vr->srq.size)
+		return ERR_PTR(-EINVAL);
+	if (attrs->attr.max_wr > rhp->rdev.hw_queue.t4_max_rq_size)
+		return ERR_PTR(-E2BIG);
+	if (attrs->attr.max_sge > T4_MAX_RECV_SGE)
+		return ERR_PTR(-E2BIG);
+
+	/*
+	 * SRQ RQT and RQ must be a power of 2 and at least 16 deep.
+	 */
+	rqsize = attrs->attr.max_wr + 1;
+	rqsize = roundup_pow_of_two(max_t(u16, rqsize, 16));
+
+	ucontext = pd->uobject ? to_c4iw_ucontext(pd->uobject->context) : NULL;
+
+	srq = kzalloc(sizeof(*srq), GFP_KERNEL);
+	if (!srq)
+		return ERR_PTR(-ENOMEM);
+
+	srq->wr_waitp = c4iw_alloc_wr_wait(GFP_KERNEL);
+	if (!srq->wr_waitp) {
+		ret = -ENOMEM;
+		goto err_free_srq;
+	}
+
+	srq->idx = c4iw_alloc_srq_idx(&rhp->rdev);
+	if (srq->idx < 0) {
+		ret = -ENOMEM;
+		goto err_free_wr_wait;
+	}
+
+	wr_len = sizeof(struct fw_ri_res_wr) + sizeof(struct fw_ri_res);
+	srq->destroy_skb = alloc_skb(wr_len, GFP_KERNEL);
+	if (!srq->destroy_skb) {
+		ret = -ENOMEM;
+		goto err_free_srq_idx;
+	}
+
+	srq->rhp = rhp;
+	srq->pdid = php->pdid;
+
+	srq->wq.size = rqsize;
+	srq->wq.memsize =
+		(rqsize + rhp->rdev.hw_queue.t4_eq_status_entries) *
+		sizeof(*srq->wq.queue);
+	if (ucontext)
+		srq->wq.memsize = roundup(srq->wq.memsize, PAGE_SIZE);
+
+	ret = alloc_srq_queue(srq, ucontext ? &ucontext->uctx :
+			&rhp->rdev.uctx, srq->wr_waitp);
+	if (ret)
+		goto err_free_skb;
+	attrs->attr.max_wr = rqsize - 1;
+
+	if (CHELSIO_CHIP_VERSION(rhp->rdev.lldi.adapter_type) > CHELSIO_T6)
+		srq->flags = T4_SRQ_LIMIT_SUPPORT;
+
+	ret = insert_handle(rhp, &rhp->qpidr, srq, srq->wq.qid);
+	if (ret)
+		goto err_free_queue;
+
+	if (udata) {
+		srq_key_mm = kmalloc(sizeof(*srq_key_mm), GFP_KERNEL);
+		if (!srq_key_mm) {
+			ret = -ENOMEM;
+			goto err_remove_handle;
+		}
+		srq_db_key_mm = kmalloc(sizeof(*srq_db_key_mm), GFP_KERNEL);
+		if (!srq_db_key_mm) {
+			ret = -ENOMEM;
+			goto err_free_srq_key_mm;
+		}
+		memset(&uresp, 0, sizeof(uresp));
+		uresp.flags = srq->flags;
+		uresp.qid_mask = rhp->rdev.qpmask;
+		uresp.srqid = srq->wq.qid;
+		uresp.srq_size = srq->wq.size;
+		uresp.srq_memsize = srq->wq.memsize;
+		uresp.rqt_abs_idx = srq->wq.rqt_abs_idx;
+		spin_lock(&ucontext->mmap_lock);
+		uresp.srq_key = ucontext->key;
+		ucontext->key += PAGE_SIZE;
+		uresp.srq_db_gts_key = ucontext->key;
+		ucontext->key += PAGE_SIZE;
+		spin_unlock(&ucontext->mmap_lock);
+		ret = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
+		if (ret)
+			goto err_free_srq_db_key_mm;
+		srq_key_mm->key = uresp.srq_key;
+		srq_key_mm->addr = virt_to_phys(srq->wq.queue);
+		srq_key_mm->len = PAGE_ALIGN(srq->wq.memsize);
+		insert_mmap(ucontext, srq_key_mm);
+		srq_db_key_mm->key = uresp.srq_db_gts_key;
+		srq_db_key_mm->addr = (u64)(unsigned long)srq->wq.bar2_pa;
+		srq_db_key_mm->len = PAGE_SIZE;
+		insert_mmap(ucontext, srq_db_key_mm);
+	}
+
+	pr_debug("%s srq qid %u idx %u size %u memsize %lu num_entries %u\n",
+		 __func__, srq->wq.qid, srq->idx, srq->wq.size,
+			(unsigned long)srq->wq.memsize, attrs->attr.max_wr);
+
+	spin_lock_init(&srq->lock);
+	return &srq->ibsrq;
+err_free_srq_db_key_mm:
+	kfree(srq_db_key_mm);
+err_free_srq_key_mm:
+	kfree(srq_key_mm);
+err_remove_handle:
+	remove_handle(rhp, &rhp->qpidr, srq->wq.qid);
+err_free_queue:
+	free_srq_queue(srq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx,
+		       srq->wr_waitp);
+err_free_skb:
+	if (srq->destroy_skb)
+		kfree_skb(srq->destroy_skb);
+err_free_srq_idx:
+	c4iw_free_srq_idx(&rhp->rdev, srq->idx);
+err_free_wr_wait:
+	c4iw_put_wr_wait(srq->wr_waitp);
+err_free_srq:
+	kfree(srq);
+	return ERR_PTR(ret);
+}
+
+int c4iw_destroy_srq(struct ib_srq *ibsrq)
+{
+	struct c4iw_dev *rhp;
+	struct c4iw_srq *srq;
+	struct c4iw_ucontext *ucontext;
+
+	srq = to_c4iw_srq(ibsrq);
+	rhp = srq->rhp;
+
+	pr_debug("%s id %d\n", __func__, srq->wq.qid);
+
+	remove_handle(rhp, &rhp->qpidr, srq->wq.qid);
+	ucontext = ibsrq->uobject ?
+		to_c4iw_ucontext(ibsrq->uobject->context) : NULL;
+	free_srq_queue(srq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx,
+		       srq->wr_waitp);
+	c4iw_free_srq_idx(&rhp->rdev, srq->idx);
+	c4iw_put_wr_wait(srq->wr_waitp);
+	kfree(srq);
+	return 0;
+}
diff --git a/drivers/infiniband/hw/cxgb4/resource.c b/drivers/infiniband/hw/cxgb4/resource.c
index 0ef25ae..57ed26b 100644
--- a/drivers/infiniband/hw/cxgb4/resource.c
+++ b/drivers/infiniband/hw/cxgb4/resource.c
@@ -53,7 +53,8 @@ static int c4iw_init_qid_table(struct c4iw_rdev *rdev)
 }
 
 /* nr_* must be power of 2 */
-int c4iw_init_resource(struct c4iw_rdev *rdev, u32 nr_tpt, u32 nr_pdid)
+int c4iw_init_resource(struct c4iw_rdev *rdev, u32 nr_tpt,
+		       u32 nr_pdid, u32 nr_srqt)
 {
 	int err = 0;
 	err = c4iw_id_table_alloc(&rdev->resource.tpt_table, 0, nr_tpt, 1,
@@ -67,7 +68,17 @@ int c4iw_init_resource(struct c4iw_rdev *rdev, u32 nr_tpt, u32 nr_pdid)
 					nr_pdid, 1, 0);
 	if (err)
 		goto pdid_err;
+	if (!nr_srqt)
+		err = c4iw_id_table_alloc(&rdev->resource.srq_table, 0,
+					  1, 1, 0);
+	else
+		err = c4iw_id_table_alloc(&rdev->resource.srq_table, 0,
+					  nr_srqt, 0, 0);
+	if (err)
+		goto srq_err;
 	return 0;
+ srq_err:
+	c4iw_id_table_free(&rdev->resource.pdid_table);
  pdid_err:
 	c4iw_id_table_free(&rdev->resource.qid_table);
  qid_err:
@@ -371,13 +382,21 @@ void c4iw_rqtpool_free(struct c4iw_rdev *rdev, u32 addr, int size)
 int c4iw_rqtpool_create(struct c4iw_rdev *rdev)
 {
 	unsigned rqt_start, rqt_chunk, rqt_top;
+	int skip = 0;
 
 	rdev->rqt_pool = gen_pool_create(MIN_RQT_SHIFT, -1);
 	if (!rdev->rqt_pool)
 		return -ENOMEM;
 
-	rqt_start = rdev->lldi.vr->rq.start;
-	rqt_chunk = rdev->lldi.vr->rq.size;
+	/*
+	 * If SRQs are supported, then never use the first RQE from
+	 * the RQT region. This is because HW uses RQT index 0 as NULL.
+	 */
+	if (rdev->lldi.vr->srq.size)
+		skip = T4_RQT_ENTRY_SIZE;
+
+	rqt_start = rdev->lldi.vr->rq.start + skip;
+	rqt_chunk = rdev->lldi.vr->rq.size - skip;
 	rqt_top = rqt_start + rqt_chunk;
 
 	while (rqt_start < rqt_top) {
@@ -405,6 +424,32 @@ void c4iw_rqtpool_destroy(struct c4iw_rdev *rdev)
 	kref_put(&rdev->rqt_kref, destroy_rqtpool);
 }
 
+int c4iw_alloc_srq_idx(struct c4iw_rdev *rdev)
+{
+	int idx;
+
+	idx = c4iw_id_alloc(&rdev->resource.srq_table);
+	mutex_lock(&rdev->stats.lock);
+	if (idx == -1) {
+		rdev->stats.srqt.fail++;
+		mutex_unlock(&rdev->stats.lock);
+		return -ENOMEM;
+	}
+	rdev->stats.srqt.cur++;
+	if (rdev->stats.srqt.cur > rdev->stats.srqt.max)
+		rdev->stats.srqt.max = rdev->stats.srqt.cur;
+	mutex_unlock(&rdev->stats.lock);
+	return idx;
+}
+
+void c4iw_free_srq_idx(struct c4iw_rdev *rdev, int idx)
+{
+	c4iw_id_free(&rdev->resource.srq_table, idx);
+	mutex_lock(&rdev->stats.lock);
+	rdev->stats.srqt.cur--;
+	mutex_unlock(&rdev->stats.lock);
+}
+
 /*
  * On-Chip QP Memory.
  */
diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h
index 8369c7c..e42021f 100644
--- a/drivers/infiniband/hw/cxgb4/t4.h
+++ b/drivers/infiniband/hw/cxgb4/t4.h
@@ -52,12 +52,16 @@ struct t4_status_page {
 	__be16 pidx;
 	u8 qp_err;	/* flit 1 - sw owns */
 	u8 db_off;
-	u8 pad;
+	u8 pad[2];
 	u16 host_wq_pidx;
 	u16 host_cidx;
 	u16 host_pidx;
+	u16 pad2;
+	u32 srqidx;
 };
 
+#define T4_RQT_ENTRY_SHIFT 6
+#define T4_RQT_ENTRY_SIZE  BIT(T4_RQT_ENTRY_SHIFT)
 #define T4_EQ_ENTRY_SIZE 64
 
 #define T4_SQ_NUM_SLOTS 5
@@ -87,6 +91,9 @@ static inline int t4_max_fr_depth(int use_dsgl)
 #define T4_RQ_NUM_BYTES (T4_EQ_ENTRY_SIZE * T4_RQ_NUM_SLOTS)
 #define T4_MAX_RECV_SGE 4
 
+#define T4_WRITE_CMPL_MAX_SGL 4
+#define T4_WRITE_CMPL_MAX_CQE 16
+
 union t4_wr {
 	struct fw_ri_res_wr res;
 	struct fw_ri_wr ri;
@@ -97,6 +104,7 @@ union t4_wr {
 	struct fw_ri_fr_nsmr_wr fr;
 	struct fw_ri_fr_nsmr_tpte_wr fr_tpte;
 	struct fw_ri_inv_lstag_wr inv;
+	struct fw_ri_rdma_write_cmpl_wr write_cmpl;
 	struct t4_status_page status;
 	__be64 flits[T4_EQ_ENTRY_SIZE / sizeof(__be64) * T4_SQ_NUM_SLOTS];
 };
@@ -179,9 +187,32 @@ struct t4_cqe {
 			__be32 wrid_hi;
 			__be32 wrid_low;
 		} gen;
+		struct {
+			__be32 stag;
+			__be32 msn;
+			__be32 reserved;
+			__be32 abs_rqe_idx;
+		} srcqe;
+		struct {
+			__be32 mo;
+			__be32 msn;
+			/*
+			 * Use union for immediate data to be consistent with
+			 * stack's 32 bit data and iWARP spec's 64 bit data.
+			 */
+			union {
+				struct {
+					__be32 imm_data32;
+					u32 reserved;
+				} ib_imm_data;
+				__be64 imm_data64;
+			} iw_imm_data;
+		} imm_data_rcqe;
+
 		u64 drain_cookie;
+		__be64 flits[3];
 	} u;
-	__be64 reserved;
+	__be64 reserved[3];
 	__be64 bits_type_ts;
 };
 
@@ -237,6 +268,9 @@ struct t4_cqe {
 /* used for RQ completion processing */
 #define CQE_WRID_STAG(x)  (be32_to_cpu((x)->u.rcqe.stag))
 #define CQE_WRID_MSN(x)   (be32_to_cpu((x)->u.rcqe.msn))
+#define CQE_ABS_RQE_IDX(x) (be32_to_cpu((x)->u.srcqe.abs_rqe_idx))
+#define CQE_IMM_DATA(x)( \
+	(x)->u.imm_data_rcqe.iw_imm_data.ib_imm_data.imm_data32)
 
 /* used for SQ completion processing */
 #define CQE_WRID_SQ_IDX(x)	((x)->u.scqe.cidx)
@@ -320,6 +354,7 @@ struct t4_swrqe {
 	u64 wr_id;
 	ktime_t	host_time;
 	u64 sge_ts;
+	int valid;
 };
 
 struct t4_rq {
@@ -349,8 +384,98 @@ struct t4_wq {
 	void __iomem *db;
 	struct c4iw_rdev *rdev;
 	int flushed;
+	u8 *qp_errp;
+	u32 *srqidxp;
 };
 
+struct t4_srq_pending_wr {
+	u64 wr_id;
+	union t4_recv_wr wqe;
+	u8 len16;
+};
+
+struct t4_srq {
+	union t4_recv_wr *queue;
+	dma_addr_t dma_addr;
+	DECLARE_PCI_UNMAP_ADDR(mapping);
+	struct t4_swrqe *sw_rq;
+	void __iomem *bar2_va;
+	u64 bar2_pa;
+	size_t memsize;
+	u32 bar2_qid;
+	u32 qid;
+	u32 msn;
+	u32 rqt_hwaddr;
+	u32 rqt_abs_idx;
+	u16 rqt_size;
+	u16 size;
+	u16 cidx;
+	u16 pidx;
+	u16 wq_pidx;
+	u16 wq_pidx_inc;
+	u16 in_use;
+	struct t4_srq_pending_wr *pending_wrs;
+	u16 pending_cidx;
+	u16 pending_pidx;
+	u16 pending_in_use;
+	u16 ooo_count;
+};
+
+static inline u32 t4_srq_avail(struct t4_srq *srq)
+{
+	return srq->size - 1 - srq->in_use;
+}
+
+static inline void t4_srq_produce(struct t4_srq *srq, u8 len16)
+{
+	srq->in_use++;
+	if (++srq->pidx == srq->size)
+		srq->pidx = 0;
+	srq->wq_pidx += DIV_ROUND_UP(len16 * 16, T4_EQ_ENTRY_SIZE);
+	if (srq->wq_pidx >= srq->size * T4_RQ_NUM_SLOTS)
+		srq->wq_pidx %= srq->size * T4_RQ_NUM_SLOTS;
+	srq->queue[srq->size].status.host_pidx = srq->pidx;
+}
+
+static inline void t4_srq_produce_pending_wr(struct t4_srq *srq)
+{
+	srq->pending_in_use++;
+	srq->in_use++;
+	if (++srq->pending_pidx == srq->size)
+		srq->pending_pidx = 0;
+}
+
+static inline void t4_srq_consume_pending_wr(struct t4_srq *srq)
+{
+	srq->pending_in_use--;
+	srq->in_use--;
+	if (++srq->pending_cidx == srq->size)
+		srq->pending_cidx = 0;
+}
+
+static inline void t4_srq_produce_ooo(struct t4_srq *srq)
+{
+	srq->in_use--;
+	srq->ooo_count++;
+}
+
+static inline void t4_srq_consume_ooo(struct t4_srq *srq)
+{
+	srq->cidx++;
+	if (srq->cidx == srq->size)
+		srq->cidx  = 0;
+	srq->queue[srq->size].status.host_cidx = srq->cidx;
+	srq->ooo_count--;
+}
+
+static inline void t4_srq_consume(struct t4_srq *srq)
+{
+	srq->in_use--;
+	if (++srq->cidx == srq->size)
+		srq->cidx = 0;
+	srq->queue[srq->size].status.host_cidx = srq->cidx;
+}
+
 static inline int t4_rqes_posted(struct t4_wq *wq)
 {
 	return wq->rq.in_use;
@@ -384,7 +509,6 @@ static inline void t4_rq_produce(struct t4_wq *wq, u8 len16)
 static inline void t4_rq_consume(struct t4_wq *wq)
 {
 	wq->rq.in_use--;
-	wq->rq.msn++;
 	if (++wq->rq.cidx == wq->rq.size)
 		wq->rq.cidx = 0;
 }
@@ -464,6 +588,25 @@ static inline void pio_copy(u64 __iomem *dst, u64 *src)
 	}
 }
 
+static inline void t4_ring_srq_db(struct t4_srq *srq, u16 inc, u8 len16,
+				  union t4_recv_wr *wqe)
+{
+	/* Flush host queue memory writes. */
+	wmb();
+	if (inc == 1 && srq->bar2_qid == 0 && wqe) {
+		pr_debug("%s : WC srq->pidx = %d; len16=%d\n",
+			 __func__, srq->pidx, len16);
+		pio_copy(srq->bar2_va + SGE_UDB_WCDOORBELL, (u64 *)wqe);
+	} else {
+		pr_debug("%s: DB srq->pidx = %d; len16=%d\n",
+			 __func__, srq->pidx, len16);
+		writel(PIDX_T5_V(inc) | QID_V(srq->bar2_qid),
+		       srq->bar2_va + SGE_UDB_KDOORBELL);
+	}
+	/* Flush user doorbell area writes. */
+	wmb();
+}
+
 static inline void t4_ring_sq_db(struct t4_wq *wq, u16 inc, union t4_wr *wqe)
 {
 
@@ -515,12 +658,14 @@ static inline void t4_ring_rq_db(struct t4_wq *wq, u16 inc,
 
 static inline int t4_wq_in_error(struct t4_wq *wq)
 {
-	return wq->rq.queue[wq->rq.size].status.qp_err;
+	return *wq->qp_errp;
 }
 
-static inline void t4_set_wq_in_error(struct t4_wq *wq)
+static inline void t4_set_wq_in_error(struct t4_wq *wq, u32 srqidx)
 {
-	wq->rq.queue[wq->rq.size].status.qp_err = 1;
+	if (srqidx)
+		*wq->srqidxp = srqidx;
+	*wq->qp_errp = 1;
 }
 
 static inline void t4_disable_wq_db(struct t4_wq *wq)
@@ -565,6 +710,7 @@ struct t4_cq {
 	u16 cidx_inc;
 	u8 gen;
 	u8 error;
+	u8 *qp_errp;
 	unsigned long flags;
 };
 
@@ -698,18 +844,18 @@ static inline int t4_next_cqe(struct t4_cq *cq, struct t4_cqe **cqe)
 
 static inline int t4_cq_in_error(struct t4_cq *cq)
 {
-	return ((struct t4_status_page *)&cq->queue[cq->size])->qp_err;
+	return *cq->qp_errp;
 }
 
 static inline void t4_set_cq_in_error(struct t4_cq *cq)
 {
-	((struct t4_status_page *)&cq->queue[cq->size])->qp_err = 1;
+	*cq->qp_errp = 1;
 }
 #endif
 
 struct t4_dev_status_page {
 	u8 db_off;
-	u8 pad1;
+	u8 write_cmpl_supported;
 	u16 pad2;
 	u32 pad3;
 	u64 qp_start;
diff --git a/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h b/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h
index 58c531d..cbdb300 100644
--- a/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h
+++ b/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h
@@ -50,7 +50,8 @@ enum fw_ri_wr_opcode {
 	FW_RI_BYPASS			= 0xd,
 	FW_RI_RECEIVE			= 0xe,
 
-	FW_RI_SGE_EC_CR_RETURN		= 0xf
+	FW_RI_SGE_EC_CR_RETURN		= 0xf,
+	FW_RI_WRITE_IMMEDIATE           = FW_RI_RDMA_INIT
 };
 
 enum fw_ri_wr_flags {
@@ -59,7 +60,8 @@ enum fw_ri_wr_flags {
 	FW_RI_SOLICITED_EVENT_FLAG	= 0x04,
 	FW_RI_READ_FENCE_FLAG		= 0x08,
 	FW_RI_LOCAL_FENCE_FLAG		= 0x10,
-	FW_RI_RDMA_READ_INVALIDATE	= 0x20
+	FW_RI_RDMA_READ_INVALIDATE	= 0x20,
+	FW_RI_RDMA_WRITE_WITH_IMMEDIATE = 0x40
 };
 
 enum fw_ri_mpa_attrs {
@@ -263,6 +265,7 @@ enum fw_ri_res_type {
 	FW_RI_RES_TYPE_SQ,
 	FW_RI_RES_TYPE_RQ,
 	FW_RI_RES_TYPE_CQ,
+	FW_RI_RES_TYPE_SRQ,
 };
 
 enum fw_ri_res_op {
@@ -296,6 +299,20 @@ struct fw_ri_res {
 			__be32 r6_lo;
 			__be64 r7;
 		} cq;
+		struct fw_ri_res_srq {
+			__u8   restype;
+			__u8   op;
+			__be16 r3;
+			__be32 eqid;
+			__be32 r4[2];
+			__be32 fetchszm_to_iqid;
+			__be32 dcaen_to_eqsize;
+			__be64 eqaddr;
+			__be32 srqid;
+			__be32 pdid;
+			__be32 hwsrqsize;
+			__be32 hwsrqaddr;
+		} srq;
 	} u;
 };
 
@@ -531,7 +548,17 @@ struct fw_ri_rdma_write_wr {
 	__u16  wrid;
 	__u8   r1[3];
 	__u8   len16;
-	__be64 r2;
+	/*
+	 * Use union for immediate data to be consistent with stack's 32 bit
+	 * data and iWARP spec's 64 bit data.
+	 */
+	union {
+		struct {
+			__be32 imm_data32;
+			u32 reserved;
+		} ib_imm_data;
+		__be64 imm_data64;
+	} iw_imm_data;
 	__be32 plen;
 	__be32 stag_sink;
 	__be64 to_sink;
@@ -568,6 +595,37 @@ struct fw_ri_send_wr {
 #define FW_RI_SEND_WR_SENDOP_G(x)	\
 	(((x) >> FW_RI_SEND_WR_SENDOP_S) & FW_RI_SEND_WR_SENDOP_M)
 
+struct fw_ri_rdma_write_cmpl_wr {
+	__u8   opcode;
+	__u8   flags;
+	__u16  wrid;
+	__u8   r1[3];
+	__u8   len16;
+	__u8   r2;
+	__u8   flags_send;
+	__u16  wrid_send;
+	__be32 stag_inv;
+	__be32 plen;
+	__be32 stag_sink;
+	__be64 to_sink;
+	union fw_ri_cmpl {
+		struct fw_ri_immd_cmpl {
+			__u8   op;
+			__u8   r1[6];
+			__u8   immdlen;
+			__u8   data[16];
+		} immd_src;
+		struct fw_ri_isgl isgl_src;
+	} u_cmpl;
+	__be64 r3;
+#ifndef C99_NOT_SUPPORTED
+	union fw_ri_write {
+		struct fw_ri_immd immd_src[0];
+		struct fw_ri_isgl isgl_src[0];
+	} u;
+#endif
+};
+
 struct fw_ri_rdma_read_wr {
 	__u8   opcode;
 	__u8   flags;
@@ -707,6 +765,10 @@ enum fw_ri_init_p2ptype {
 	FW_RI_INIT_P2PTYPE_DISABLED		= 0xf,
 };
 
+enum fw_ri_init_rqeqid_srq {
+	FW_RI_INIT_RQEQID_SRQ			= 1 << 31,
+};
+
 struct fw_ri_wr {
 	__be32 op_compl;
 	__be32 flowid_len16;
diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index 6deb101..2c19bf7 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -8143,8 +8143,15 @@ static void is_sdma_eng_int(struct hfi1_devdata *dd, unsigned int source)
 	}
 }
 
-/*
+/**
+ * is_rcv_avail_int() - User receive context available IRQ handler
+ * @dd: valid dd
+ * @source: logical IRQ source (offset from IS_RCVAVAIL_START)
+ *
  * RX block receive available interrupt.  Source is < 160.
+ *
+ * This is the general interrupt handler for user (PSM) receive contexts,
+ * and can only be used for non-threaded IRQs.
  */
 static void is_rcv_avail_int(struct hfi1_devdata *dd, unsigned int source)
 {
@@ -8154,12 +8161,7 @@ static void is_rcv_avail_int(struct hfi1_devdata *dd, unsigned int source)
 	if (likely(source < dd->num_rcv_contexts)) {
 		rcd = hfi1_rcd_get_by_index(dd, source);
 		if (rcd) {
-			/* Check for non-user contexts, including vnic */
-			if (source < dd->first_dyn_alloc_ctxt || rcd->is_vnic)
-				rcd->do_interrupt(rcd, 0);
-			else
-				handle_user_interrupt(rcd);
-
+			handle_user_interrupt(rcd);
 			hfi1_rcd_put(rcd);
 			return;	/* OK */
 		}
@@ -8173,8 +8175,14 @@ static void is_rcv_avail_int(struct hfi1_devdata *dd, unsigned int source)
 		   err_detail, source);
 }
 
-/*
+/**
+ * is_rcv_urgent_int() - User receive context urgent IRQ handler
+ * @dd: valid dd
+ * @source: logical IRQ source (ofse from IS_RCVURGENT_START)
+ *
  * RX block receive urgent interrupt.  Source is < 160.
+ *
+ * NOTE: kernel receive contexts specifically do NOT enable this IRQ.
  */
 static void is_rcv_urgent_int(struct hfi1_devdata *dd, unsigned int source)
 {
@@ -8184,11 +8192,7 @@ static void is_rcv_urgent_int(struct hfi1_devdata *dd, unsigned int source)
 	if (likely(source < dd->num_rcv_contexts)) {
 		rcd = hfi1_rcd_get_by_index(dd, source);
 		if (rcd) {
-			/* only pay attention to user urgent interrupts */
-			if (source >= dd->first_dyn_alloc_ctxt &&
-			    !rcd->is_vnic)
-				handle_user_interrupt(rcd);
-
+			handle_user_interrupt(rcd);
 			hfi1_rcd_put(rcd);
 			return;	/* OK */
 		}
@@ -8260,9 +8264,14 @@ static void is_interrupt(struct hfi1_devdata *dd, unsigned int source)
 	dd_dev_err(dd, "invalid interrupt source %u\n", source);
 }
 
-/*
- * General interrupt handler.  This is able to correctly handle
- * all interrupts in case INTx is used.
+/**
+ * gerneral_interrupt() -  General interrupt handler
+ * @irq: MSIx IRQ vector
+ * @data: hfi1 devdata
+ *
+ * This is able to correctly handle all non-threaded interrupts.  Receive
+ * context DATA IRQs are threaded and are not supported by this handler.
+ *
  */
 static irqreturn_t general_interrupt(int irq, void *data)
 {
@@ -10130,7 +10139,7 @@ static void set_lidlmc(struct hfi1_pportdata *ppd)
 	       (((lid & mask) & SEND_CTXT_CHECK_SLID_VALUE_MASK) <<
 			SEND_CTXT_CHECK_SLID_VALUE_SHIFT);
 
-	for (i = 0; i < dd->chip_send_contexts; i++) {
+	for (i = 0; i < chip_send_contexts(dd); i++) {
 		hfi1_cdbg(LINKVERB, "SendContext[%d].SLID_CHECK = 0x%x",
 			  i, (u32)sreg);
 		write_kctxt_csr(dd, i, SEND_CTXT_CHECK_SLID, sreg);
@@ -11857,7 +11866,7 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op,
 		 * sequence numbers could land exactly on the same spot.
 		 * E.g. a rcd restart before the receive header wrapped.
 		 */
-		memset(rcd->rcvhdrq, 0, rcd->rcvhdrq_size);
+		memset(rcd->rcvhdrq, 0, rcvhdrq_size(rcd));
 
 		/* starting timeout */
 		rcd->rcvavail_timeout = dd->rcv_intr_timeout_csr;
@@ -11952,9 +11961,8 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op,
 		rcvctrl |= RCV_CTXT_CTRL_DONT_DROP_EGR_FULL_SMASK;
 	if (op & HFI1_RCVCTRL_NO_EGR_DROP_DIS)
 		rcvctrl &= ~RCV_CTXT_CTRL_DONT_DROP_EGR_FULL_SMASK;
-	rcd->rcvctrl = rcvctrl;
 	hfi1_cdbg(RCVCTRL, "ctxt %d rcvctrl 0x%llx\n", ctxt, rcvctrl);
-	write_kctxt_csr(dd, ctxt, RCV_CTXT_CTRL, rcd->rcvctrl);
+	write_kctxt_csr(dd, ctxt, RCV_CTXT_CTRL, rcvctrl);
 
 	/* work around sticky RcvCtxtStatus.BlockedRHQFull */
 	if (did_enable &&
@@ -12042,7 +12050,7 @@ u32 hfi1_read_cntrs(struct hfi1_devdata *dd, char **namep, u64 **cntrp)
 				} else if (entry->flags & CNTR_SDMA) {
 					hfi1_cdbg(CNTR,
 						  "\t Per SDMA Engine\n");
-					for (j = 0; j < dd->chip_sdma_engines;
+					for (j = 0; j < chip_sdma_engines(dd);
 					     j++) {
 						val =
 						entry->rw_cntr(entry, dd, j,
@@ -12418,6 +12426,7 @@ static int init_cntrs(struct hfi1_devdata *dd)
 	struct hfi1_pportdata *ppd;
 	const char *bit_type_32 = ",32";
 	const int bit_type_32_sz = strlen(bit_type_32);
+	u32 sdma_engines = chip_sdma_engines(dd);
 
 	/* set up the stats timer; the add_timer is done at the end */
 	timer_setup(&dd->synth_stats_timer, update_synth_timer, 0);
@@ -12450,7 +12459,7 @@ static int init_cntrs(struct hfi1_devdata *dd)
 			}
 		} else if (dev_cntrs[i].flags & CNTR_SDMA) {
 			dev_cntrs[i].offset = dd->ndevcntrs;
-			for (j = 0; j < dd->chip_sdma_engines; j++) {
+			for (j = 0; j < sdma_engines; j++) {
 				snprintf(name, C_MAX_NAME, "%s%d",
 					 dev_cntrs[i].name, j);
 				sz += strlen(name);
@@ -12507,7 +12516,7 @@ static int init_cntrs(struct hfi1_devdata *dd)
 				*p++ = '\n';
 			}
 		} else if (dev_cntrs[i].flags & CNTR_SDMA) {
-			for (j = 0; j < dd->chip_sdma_engines; j++) {
+			for (j = 0; j < sdma_engines; j++) {
 				snprintf(name, C_MAX_NAME, "%s%d",
 					 dev_cntrs[i].name, j);
 				memcpy(p, name, strlen(name));
@@ -13020,9 +13029,9 @@ static void clear_all_interrupts(struct hfi1_devdata *dd)
 	write_csr(dd, SEND_PIO_ERR_CLEAR, ~(u64)0);
 	write_csr(dd, SEND_DMA_ERR_CLEAR, ~(u64)0);
 	write_csr(dd, SEND_EGRESS_ERR_CLEAR, ~(u64)0);
-	for (i = 0; i < dd->chip_send_contexts; i++)
+	for (i = 0; i < chip_send_contexts(dd); i++)
 		write_kctxt_csr(dd, i, SEND_CTXT_ERR_CLEAR, ~(u64)0);
-	for (i = 0; i < dd->chip_sdma_engines; i++)
+	for (i = 0; i < chip_sdma_engines(dd); i++)
 		write_kctxt_csr(dd, i, SEND_DMA_ENG_ERR_CLEAR, ~(u64)0);
 
 	write_csr(dd, DCC_ERR_FLG_CLR, ~(u64)0);
@@ -13030,48 +13039,30 @@ static void clear_all_interrupts(struct hfi1_devdata *dd)
 	write_csr(dd, DC_DC8051_ERR_CLR, ~(u64)0);
 }
 
-/* Move to pcie.c? */
-static void disable_intx(struct pci_dev *pdev)
-{
-	pci_intx(pdev, 0);
-}
-
 /**
  * hfi1_clean_up_interrupts() - Free all IRQ resources
  * @dd: valid device data data structure
  *
- * Free the MSI or INTx IRQs and assoicated PCI resources,
- * if they have been allocated.
+ * Free the MSIx and assoicated PCI resources, if they have been allocated.
  */
 void hfi1_clean_up_interrupts(struct hfi1_devdata *dd)
 {
 	int i;
+	struct hfi1_msix_entry *me = dd->msix_entries;
 
 	/* remove irqs - must happen before disabling/turning off */
-	if (dd->num_msix_entries) {
-		/* MSI-X */
-		struct hfi1_msix_entry *me = dd->msix_entries;
-
-		for (i = 0; i < dd->num_msix_entries; i++, me++) {
-			if (!me->arg) /* => no irq, no affinity */
-				continue;
-			hfi1_put_irq_affinity(dd, me);
-			pci_free_irq(dd->pcidev, i, me->arg);
-		}
-
-		/* clean structures */
-		kfree(dd->msix_entries);
-		dd->msix_entries = NULL;
-		dd->num_msix_entries = 0;
-	} else {
-		/* INTx */
-		if (dd->requested_intx_irq) {
-			pci_free_irq(dd->pcidev, 0, dd);
-			dd->requested_intx_irq = 0;
-		}
-		disable_intx(dd->pcidev);
+	for (i = 0; i < dd->num_msix_entries; i++, me++) {
+		if (!me->arg) /* => no irq, no affinity */
+			continue;
+		hfi1_put_irq_affinity(dd, me);
+		pci_free_irq(dd->pcidev, i, me->arg);
 	}
 
+	/* clean structures */
+	kfree(dd->msix_entries);
+	dd->msix_entries = NULL;
+	dd->num_msix_entries = 0;
+
 	pci_free_irq_vectors(dd->pcidev);
 }
 
@@ -13121,20 +13112,6 @@ static void remap_sdma_interrupts(struct hfi1_devdata *dd,
 		   msix_intr);
 }
 
-static int request_intx_irq(struct hfi1_devdata *dd)
-{
-	int ret;
-
-	ret = pci_request_irq(dd->pcidev, 0, general_interrupt, NULL, dd,
-			      DRIVER_NAME "_%d", dd->unit);
-	if (ret)
-		dd_dev_err(dd, "unable to request INTx interrupt, err %d\n",
-			   ret);
-	else
-		dd->requested_intx_irq = 1;
-	return ret;
-}
-
 static int request_msix_irqs(struct hfi1_devdata *dd)
 {
 	int first_general, last_general;
@@ -13253,11 +13230,6 @@ void hfi1_vnic_synchronize_irq(struct hfi1_devdata *dd)
 {
 	int i;
 
-	if (!dd->num_msix_entries) {
-		synchronize_irq(pci_irq_vector(dd->pcidev, 0));
-		return;
-	}
-
 	for (i = 0; i < dd->vnic.num_ctxt; i++) {
 		struct hfi1_ctxtdata *rcd = dd->vnic.ctxt[i];
 		struct hfi1_msix_entry *me = &dd->msix_entries[rcd->msix_intr];
@@ -13346,7 +13318,6 @@ static int set_up_interrupts(struct hfi1_devdata *dd)
 {
 	u32 total;
 	int ret, request;
-	int single_interrupt = 0; /* we expect to have all the interrupts */
 
 	/*
 	 * Interrupt count:
@@ -13363,17 +13334,6 @@ static int set_up_interrupts(struct hfi1_devdata *dd)
 	if (request < 0) {
 		ret = request;
 		goto fail;
-	} else if (request == 0) {
-		/* using INTx */
-		/* dd->num_msix_entries already zero */
-		single_interrupt = 1;
-		dd_dev_err(dd, "MSI-X failed, using INTx interrupts\n");
-	} else if (request < total) {
-		/* using MSI-X, with reduced interrupts */
-		dd_dev_err(dd, "reduced interrupt found, wanted %u, got %u\n",
-			   total, request);
-		ret = -EINVAL;
-		goto fail;
 	} else {
 		dd->msix_entries = kcalloc(total, sizeof(*dd->msix_entries),
 					   GFP_KERNEL);
@@ -13394,10 +13354,7 @@ static int set_up_interrupts(struct hfi1_devdata *dd)
 	/* reset general handler mask, chip MSI-X mappings */
 	reset_interrupts(dd);
 
-	if (single_interrupt)
-		ret = request_intx_irq(dd);
-	else
-		ret = request_msix_irqs(dd);
+	ret = request_msix_irqs(dd);
 	if (ret)
 		goto fail;
 
@@ -13429,6 +13386,8 @@ static int set_up_context_variables(struct hfi1_devdata *dd)
 	int qos_rmt_count;
 	int user_rmt_reduced;
 	u32 n_usr_ctxts;
+	u32 send_contexts = chip_send_contexts(dd);
+	u32 rcv_contexts = chip_rcv_contexts(dd);
 
 	/*
 	 * Kernel receive contexts:
@@ -13450,16 +13409,16 @@ static int set_up_context_variables(struct hfi1_devdata *dd)
 	 * Every kernel receive context needs an ACK send context.
 	 * one send context is allocated for each VL{0-7} and VL15
 	 */
-	if (num_kernel_contexts > (dd->chip_send_contexts - num_vls - 1)) {
+	if (num_kernel_contexts > (send_contexts - num_vls - 1)) {
 		dd_dev_err(dd,
 			   "Reducing # kernel rcv contexts to: %d, from %lu\n",
-			   (int)(dd->chip_send_contexts - num_vls - 1),
+			   send_contexts - num_vls - 1,
 			   num_kernel_contexts);
-		num_kernel_contexts = dd->chip_send_contexts - num_vls - 1;
+		num_kernel_contexts = send_contexts - num_vls - 1;
 	}
 
 	/* Accommodate VNIC contexts if possible */
-	if ((num_kernel_contexts + num_vnic_contexts) > dd->chip_rcv_contexts) {
+	if ((num_kernel_contexts + num_vnic_contexts) > rcv_contexts) {
 		dd_dev_err(dd, "No receive contexts available for VNIC\n");
 		num_vnic_contexts = 0;
 	}
@@ -13477,13 +13436,13 @@ static int set_up_context_variables(struct hfi1_devdata *dd)
 	/*
 	 * Adjust the counts given a global max.
 	 */
-	if (total_contexts + n_usr_ctxts > dd->chip_rcv_contexts) {
+	if (total_contexts + n_usr_ctxts > rcv_contexts) {
 		dd_dev_err(dd,
 			   "Reducing # user receive contexts to: %d, from %u\n",
-			   (int)(dd->chip_rcv_contexts - total_contexts),
+			   rcv_contexts - total_contexts,
 			   n_usr_ctxts);
 		/* recalculate */
-		n_usr_ctxts = dd->chip_rcv_contexts - total_contexts;
+		n_usr_ctxts = rcv_contexts - total_contexts;
 	}
 
 	/* each user context requires an entry in the RMT */
@@ -13509,7 +13468,7 @@ static int set_up_context_variables(struct hfi1_devdata *dd)
 	dd->freectxts = n_usr_ctxts;
 	dd_dev_info(dd,
 		    "rcv contexts: chip %d, used %d (kernel %d, vnic %u, user %u)\n",
-		    (int)dd->chip_rcv_contexts,
+		    rcv_contexts,
 		    (int)dd->num_rcv_contexts,
 		    (int)dd->n_krcv_queues,
 		    dd->num_vnic_contexts,
@@ -13527,7 +13486,7 @@ static int set_up_context_variables(struct hfi1_devdata *dd)
 	 *   contexts.
 	 */
 	dd->rcv_entries.group_size = RCV_INCREMENT;
-	ngroups = dd->chip_rcv_array_count / dd->rcv_entries.group_size;
+	ngroups = chip_rcv_array_count(dd) / dd->rcv_entries.group_size;
 	dd->rcv_entries.ngroups = ngroups / dd->num_rcv_contexts;
 	dd->rcv_entries.nctxt_extra = ngroups -
 		(dd->num_rcv_contexts * dd->rcv_entries.ngroups);
@@ -13552,7 +13511,7 @@ static int set_up_context_variables(struct hfi1_devdata *dd)
 		dd_dev_info(
 			dd,
 			"send contexts: chip %d, used %d (kernel %d, ack %d, user %d, vl15 %d)\n",
-			dd->chip_send_contexts,
+			send_contexts,
 			dd->num_send_contexts,
 			dd->sc_sizes[SC_KERNEL].count,
 			dd->sc_sizes[SC_ACK].count,
@@ -13610,7 +13569,7 @@ static void write_uninitialized_csrs_and_memories(struct hfi1_devdata *dd)
 		write_csr(dd, CCE_INT_MAP + (8 * i), 0);
 
 	/* SendCtxtCreditReturnAddr */
-	for (i = 0; i < dd->chip_send_contexts; i++)
+	for (i = 0; i < chip_send_contexts(dd); i++)
 		write_kctxt_csr(dd, i, SEND_CTXT_CREDIT_RETURN_ADDR, 0);
 
 	/* PIO Send buffers */
@@ -13623,7 +13582,7 @@ static void write_uninitialized_csrs_and_memories(struct hfi1_devdata *dd)
 	/* RcvHdrAddr */
 	/* RcvHdrTailAddr */
 	/* RcvTidFlowTable */
-	for (i = 0; i < dd->chip_rcv_contexts; i++) {
+	for (i = 0; i < chip_rcv_contexts(dd); i++) {
 		write_kctxt_csr(dd, i, RCV_HDR_ADDR, 0);
 		write_kctxt_csr(dd, i, RCV_HDR_TAIL_ADDR, 0);
 		for (j = 0; j < RXE_NUM_TID_FLOWS; j++)
@@ -13631,7 +13590,7 @@ static void write_uninitialized_csrs_and_memories(struct hfi1_devdata *dd)
 	}
 
 	/* RcvArray */
-	for (i = 0; i < dd->chip_rcv_array_count; i++)
+	for (i = 0; i < chip_rcv_array_count(dd); i++)
 		hfi1_put_tid(dd, i, PT_INVALID_FLUSH, 0, 0);
 
 	/* RcvQPMapTable */
@@ -13789,7 +13748,7 @@ static void reset_txe_csrs(struct hfi1_devdata *dd)
 		write_csr(dd, SEND_LOW_PRIORITY_LIST + (8 * i), 0);
 	for (i = 0; i < VL_ARB_HIGH_PRIO_TABLE_SIZE; i++)
 		write_csr(dd, SEND_HIGH_PRIORITY_LIST + (8 * i), 0);
-	for (i = 0; i < dd->chip_send_contexts / NUM_CONTEXTS_PER_SET; i++)
+	for (i = 0; i < chip_send_contexts(dd) / NUM_CONTEXTS_PER_SET; i++)
 		write_csr(dd, SEND_CONTEXT_SET_CTRL + (8 * i), 0);
 	for (i = 0; i < TXE_NUM_32_BIT_COUNTER; i++)
 		write_csr(dd, SEND_COUNTER_ARRAY32 + (8 * i), 0);
@@ -13817,7 +13776,7 @@ static void reset_txe_csrs(struct hfi1_devdata *dd)
 	/*
 	 * TXE Per-Context CSRs
 	 */
-	for (i = 0; i < dd->chip_send_contexts; i++) {
+	for (i = 0; i < chip_send_contexts(dd); i++) {
 		write_kctxt_csr(dd, i, SEND_CTXT_CTRL, 0);
 		write_kctxt_csr(dd, i, SEND_CTXT_CREDIT_CTRL, 0);
 		write_kctxt_csr(dd, i, SEND_CTXT_CREDIT_RETURN_ADDR, 0);
@@ -13835,7 +13794,7 @@ static void reset_txe_csrs(struct hfi1_devdata *dd)
 	/*
 	 * TXE Per-SDMA CSRs
 	 */
-	for (i = 0; i < dd->chip_sdma_engines; i++) {
+	for (i = 0; i < chip_sdma_engines(dd); i++) {
 		write_kctxt_csr(dd, i, SEND_DMA_CTRL, 0);
 		/* SEND_DMA_STATUS read-only */
 		write_kctxt_csr(dd, i, SEND_DMA_BASE_ADDR, 0);
@@ -13968,7 +13927,7 @@ static void reset_rxe_csrs(struct hfi1_devdata *dd)
 	/*
 	 * RXE Kernel and User Per-Context CSRs
 	 */
-	for (i = 0; i < dd->chip_rcv_contexts; i++) {
+	for (i = 0; i < chip_rcv_contexts(dd); i++) {
 		/* kernel */
 		write_kctxt_csr(dd, i, RCV_CTXT_CTRL, 0);
 		/* RCV_CTXT_STATUS read-only */
@@ -14084,13 +14043,13 @@ static int init_chip(struct hfi1_devdata *dd)
 
 	/* disable send contexts and SDMA engines */
 	write_csr(dd, SEND_CTRL, 0);
-	for (i = 0; i < dd->chip_send_contexts; i++)
+	for (i = 0; i < chip_send_contexts(dd); i++)
 		write_kctxt_csr(dd, i, SEND_CTXT_CTRL, 0);
-	for (i = 0; i < dd->chip_sdma_engines; i++)
+	for (i = 0; i < chip_sdma_engines(dd); i++)
 		write_kctxt_csr(dd, i, SEND_DMA_CTRL, 0);
 	/* disable port (turn off RXE inbound traffic) and contexts */
 	write_csr(dd, RCV_CTRL, 0);
-	for (i = 0; i < dd->chip_rcv_contexts; i++)
+	for (i = 0; i < chip_rcv_contexts(dd); i++)
 		write_csr(dd, RCV_CTXT_CTRL, 0);
 	/* mask all interrupt sources */
 	for (i = 0; i < CCE_NUM_INT_CSRS; i++)
@@ -14709,9 +14668,9 @@ static void init_txe(struct hfi1_devdata *dd)
 	write_csr(dd, SEND_EGRESS_ERR_MASK, ~0ull);
 
 	/* enable all per-context and per-SDMA engine errors */
-	for (i = 0; i < dd->chip_send_contexts; i++)
+	for (i = 0; i < chip_send_contexts(dd); i++)
 		write_kctxt_csr(dd, i, SEND_CTXT_ERR_MASK, ~0ull);
-	for (i = 0; i < dd->chip_sdma_engines; i++)
+	for (i = 0; i < chip_sdma_engines(dd); i++)
 		write_kctxt_csr(dd, i, SEND_DMA_ENG_ERR_MASK, ~0ull);
 
 	/* set the local CU to AU mapping */
@@ -14979,11 +14938,13 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
 		"Functional simulator"
 	};
 	struct pci_dev *parent = pdev->bus->self;
+	u32 sdma_engines;
 
 	dd = hfi1_alloc_devdata(pdev, NUM_IB_PORTS *
 				sizeof(struct hfi1_pportdata));
 	if (IS_ERR(dd))
 		goto bail;
+	sdma_engines = chip_sdma_engines(dd);
 	ppd = dd->pport;
 	for (i = 0; i < dd->num_pports; i++, ppd++) {
 		int vl;
@@ -15081,11 +15042,6 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
 	/* give a reasonable active value, will be set on link up */
 	dd->pport->link_speed_active = OPA_LINK_SPEED_25G;
 
-	dd->chip_rcv_contexts = read_csr(dd, RCV_CONTEXTS);
-	dd->chip_send_contexts = read_csr(dd, SEND_CONTEXTS);
-	dd->chip_sdma_engines = read_csr(dd, SEND_DMA_ENGINES);
-	dd->chip_pio_mem_size = read_csr(dd, SEND_PIO_MEM_SIZE);
-	dd->chip_sdma_mem_size = read_csr(dd, SEND_DMA_MEM_SIZE);
 	/* fix up link widths for emulation _p */
 	ppd = dd->pport;
 	if (dd->icode == ICODE_FPGA_EMULATION && is_emulator_p(dd)) {
@@ -15096,11 +15052,11 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
 				OPA_LINK_WIDTH_1X;
 	}
 	/* insure num_vls isn't larger than number of sdma engines */
-	if (HFI1_CAP_IS_KSET(SDMA) && num_vls > dd->chip_sdma_engines) {
+	if (HFI1_CAP_IS_KSET(SDMA) && num_vls > sdma_engines) {
 		dd_dev_err(dd, "num_vls %u too large, using %u VLs\n",
-			   num_vls, dd->chip_sdma_engines);
-		num_vls = dd->chip_sdma_engines;
-		ppd->vls_supported = dd->chip_sdma_engines;
+			   num_vls, sdma_engines);
+		num_vls = sdma_engines;
+		ppd->vls_supported = sdma_engines;
 		ppd->vls_operational = ppd->vls_supported;
 	}
 
@@ -15216,13 +15172,6 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
 	 */
 	aspm_init(dd);
 
-	dd->rcvhdrsize = DEFAULT_RCVHDRSIZE;
-	/*
-	 * rcd[0] is guaranteed to be valid by this point. Also, all
-	 * context are using the same value, as per the module parameter.
-	 */
-	dd->rhf_offset = dd->rcd[0]->rcvhdrqentsize - sizeof(u64) / sizeof(u32);
-
 	ret = init_pervl_scs(dd);
 	if (ret)
 		goto bail_cleanup;
diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h
index fdf389e..36b04d6 100644
--- a/drivers/infiniband/hw/hfi1/chip.h
+++ b/drivers/infiniband/hw/hfi1/chip.h
@@ -656,6 +656,36 @@ static inline void write_uctxt_csr(struct hfi1_devdata *dd, int ctxt,
 	write_csr(dd, offset0 + (0x1000 * ctxt), value);
 }
 
+static inline u32 chip_rcv_contexts(struct hfi1_devdata *dd)
+{
+	return read_csr(dd, RCV_CONTEXTS);
+}
+
+static inline u32 chip_send_contexts(struct hfi1_devdata *dd)
+{
+	return read_csr(dd, SEND_CONTEXTS);
+}
+
+static inline u32 chip_sdma_engines(struct hfi1_devdata *dd)
+{
+	return read_csr(dd, SEND_DMA_ENGINES);
+}
+
+static inline u32 chip_pio_mem_size(struct hfi1_devdata *dd)
+{
+	return read_csr(dd, SEND_PIO_MEM_SIZE);
+}
+
+static inline u32 chip_sdma_mem_size(struct hfi1_devdata *dd)
+{
+	return read_csr(dd, SEND_DMA_MEM_SIZE);
+}
+
+static inline u32 chip_rcv_array_count(struct hfi1_devdata *dd)
+{
+	return read_csr(dd, RCV_ARRAY_CNT);
+}
+
 u64 create_pbc(struct hfi1_pportdata *ppd, u64 flags, int srate_mbs, u32 vl,
 	       u32 dw_len);
 
diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c
index 94dca95..a41f855 100644
--- a/drivers/infiniband/hw/hfi1/driver.c
+++ b/drivers/infiniband/hw/hfi1/driver.c
@@ -208,25 +208,25 @@ static inline void *get_egrbuf(const struct hfi1_ctxtdata *rcd, u64 rhf,
 			(offset * RCV_BUF_BLOCK_SIZE));
 }
 
-static inline void *hfi1_get_header(struct hfi1_devdata *dd,
+static inline void *hfi1_get_header(struct hfi1_ctxtdata *rcd,
 				    __le32 *rhf_addr)
 {
 	u32 offset = rhf_hdrq_offset(rhf_to_cpu(rhf_addr));
 
-	return (void *)(rhf_addr - dd->rhf_offset + offset);
+	return (void *)(rhf_addr - rcd->rhf_offset + offset);
 }
 
-static inline struct ib_header *hfi1_get_msgheader(struct hfi1_devdata *dd,
+static inline struct ib_header *hfi1_get_msgheader(struct hfi1_ctxtdata *rcd,
 						   __le32 *rhf_addr)
 {
-	return (struct ib_header *)hfi1_get_header(dd, rhf_addr);
+	return (struct ib_header *)hfi1_get_header(rcd, rhf_addr);
 }
 
 static inline struct hfi1_16b_header
-		*hfi1_get_16B_header(struct hfi1_devdata *dd,
+		*hfi1_get_16B_header(struct hfi1_ctxtdata *rcd,
 				     __le32 *rhf_addr)
 {
-	return (struct hfi1_16b_header *)hfi1_get_header(dd, rhf_addr);
+	return (struct hfi1_16b_header *)hfi1_get_header(rcd, rhf_addr);
 }
 
 /*
@@ -591,13 +591,12 @@ static void __prescan_rxq(struct hfi1_packet *packet)
 	init_ps_mdata(&mdata, packet);
 
 	while (1) {
-		struct hfi1_devdata *dd = rcd->dd;
 		struct hfi1_ibport *ibp = rcd_to_iport(rcd);
 		__le32 *rhf_addr = (__le32 *)rcd->rcvhdrq + mdata.ps_head +
-					 dd->rhf_offset;
+					 packet->rcd->rhf_offset;
 		struct rvt_qp *qp;
 		struct ib_header *hdr;
-		struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
+		struct rvt_dev_info *rdi = &rcd->dd->verbs_dev.rdi;
 		u64 rhf = rhf_to_cpu(rhf_addr);
 		u32 etype = rhf_rcv_type(rhf), qpn, bth1;
 		int is_ecn = 0;
@@ -612,7 +611,7 @@ static void __prescan_rxq(struct hfi1_packet *packet)
 		if (etype != RHF_RCV_TYPE_IB)
 			goto next;
 
-		packet->hdr = hfi1_get_msgheader(dd, rhf_addr);
+		packet->hdr = hfi1_get_msgheader(packet->rcd, rhf_addr);
 		hdr = packet->hdr;
 		lnh = ib_get_lnh(hdr);
 
@@ -718,7 +717,7 @@ static noinline int skip_rcv_packet(struct hfi1_packet *packet, int thread)
 	ret = check_max_packet(packet, thread);
 
 	packet->rhf_addr = (__le32 *)packet->rcd->rcvhdrq + packet->rhqoff +
-				     packet->rcd->dd->rhf_offset;
+				     packet->rcd->rhf_offset;
 	packet->rhf = rhf_to_cpu(packet->rhf_addr);
 
 	return ret;
@@ -757,7 +756,7 @@ static inline int process_rcv_packet(struct hfi1_packet *packet, int thread)
 	 * crashing down. There is no need to eat another
 	 * comparison in this performance critical code.
 	 */
-	packet->rcd->dd->rhf_rcv_function_map[packet->etype](packet);
+	packet->rcd->rhf_rcv_function_map[packet->etype](packet);
 	packet->numpkt++;
 
 	/* Set up for the next packet */
@@ -768,7 +767,7 @@ static inline int process_rcv_packet(struct hfi1_packet *packet, int thread)
 	ret = check_max_packet(packet, thread);
 
 	packet->rhf_addr = (__le32 *)packet->rcd->rcvhdrq + packet->rhqoff +
-				      packet->rcd->dd->rhf_offset;
+				      packet->rcd->rhf_offset;
 	packet->rhf = rhf_to_cpu(packet->rhf_addr);
 
 	return ret;
@@ -949,12 +948,12 @@ static inline int set_armed_to_active(struct hfi1_ctxtdata *rcd,
 	u8 sc = SC15_PACKET;
 
 	if (etype == RHF_RCV_TYPE_IB) {
-		struct ib_header *hdr = hfi1_get_msgheader(packet->rcd->dd,
+		struct ib_header *hdr = hfi1_get_msgheader(packet->rcd,
 							   packet->rhf_addr);
 		sc = hfi1_9B_get_sc5(hdr, packet->rhf);
 	} else if (etype == RHF_RCV_TYPE_BYPASS) {
 		struct hfi1_16b_header *hdr = hfi1_get_16B_header(
-						packet->rcd->dd,
+						packet->rcd,
 						packet->rhf_addr);
 		sc = hfi1_16B_get_sc(hdr);
 	}
@@ -1034,7 +1033,7 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread)
 			packet.rhqoff += packet.rsize;
 			packet.rhf_addr = (__le32 *)rcd->rcvhdrq +
 					  packet.rhqoff +
-					  dd->rhf_offset;
+					  rcd->rhf_offset;
 			packet.rhf = rhf_to_cpu(packet.rhf_addr);
 
 		} else if (skip_pkt) {
@@ -1384,7 +1383,7 @@ int hfi1_reset_device(int unit)
 static inline void hfi1_setup_ib_header(struct hfi1_packet *packet)
 {
 	packet->hdr = (struct hfi1_ib_message_header *)
-			hfi1_get_msgheader(packet->rcd->dd,
+			hfi1_get_msgheader(packet->rcd,
 					   packet->rhf_addr);
 	packet->hlen = (u8 *)packet->rhf_addr - (u8 *)packet->hdr;
 }
@@ -1485,7 +1484,7 @@ static int hfi1_setup_bypass_packet(struct hfi1_packet *packet)
 	u8 l4;
 
 	packet->hdr = (struct hfi1_16b_header *)
-			hfi1_get_16B_header(packet->rcd->dd,
+			hfi1_get_16B_header(packet->rcd,
 					    packet->rhf_addr);
 	l4 = hfi1_16B_get_l4(packet->hdr);
 	if (l4 == OPA_16B_L4_IB_LOCAL) {
@@ -1575,7 +1574,7 @@ void handle_eflags(struct hfi1_packet *packet)
  * The following functions are called by the interrupt handler. They are type
  * specific handlers for each packet type.
  */
-int process_receive_ib(struct hfi1_packet *packet)
+static int process_receive_ib(struct hfi1_packet *packet)
 {
 	if (hfi1_setup_9B_packet(packet))
 		return RHF_RCV_CONTINUE;
@@ -1607,7 +1606,7 @@ static inline bool hfi1_is_vnic_packet(struct hfi1_packet *packet)
 	return false;
 }
 
-int process_receive_bypass(struct hfi1_packet *packet)
+static int process_receive_bypass(struct hfi1_packet *packet)
 {
 	struct hfi1_devdata *dd = packet->rcd->dd;
 
@@ -1649,7 +1648,7 @@ int process_receive_bypass(struct hfi1_packet *packet)
 	return RHF_RCV_CONTINUE;
 }
 
-int process_receive_error(struct hfi1_packet *packet)
+static int process_receive_error(struct hfi1_packet *packet)
 {
 	/* KHdrHCRCErr -- KDETH packet with a bad HCRC */
 	if (unlikely(
@@ -1668,7 +1667,7 @@ int process_receive_error(struct hfi1_packet *packet)
 	return RHF_RCV_CONTINUE;
 }
 
-int kdeth_process_expected(struct hfi1_packet *packet)
+static int kdeth_process_expected(struct hfi1_packet *packet)
 {
 	hfi1_setup_9B_packet(packet);
 	if (unlikely(hfi1_dbg_should_fault_rx(packet)))
@@ -1682,7 +1681,7 @@ int kdeth_process_expected(struct hfi1_packet *packet)
 	return RHF_RCV_CONTINUE;
 }
 
-int kdeth_process_eager(struct hfi1_packet *packet)
+static int kdeth_process_eager(struct hfi1_packet *packet)
 {
 	hfi1_setup_9B_packet(packet);
 	if (unlikely(hfi1_dbg_should_fault_rx(packet)))
@@ -1695,7 +1694,7 @@ int kdeth_process_eager(struct hfi1_packet *packet)
 	return RHF_RCV_CONTINUE;
 }
 
-int process_receive_invalid(struct hfi1_packet *packet)
+static int process_receive_invalid(struct hfi1_packet *packet)
 {
 	dd_dev_err(packet->rcd->dd, "Invalid packet type %d. Dropping\n",
 		   rhf_rcv_type(packet->rhf));
@@ -1719,9 +1718,8 @@ void seqfile_dump_rcd(struct seq_file *s, struct hfi1_ctxtdata *rcd)
 	init_ps_mdata(&mdata, &packet);
 
 	while (1) {
-		struct hfi1_devdata *dd = rcd->dd;
 		__le32 *rhf_addr = (__le32 *)rcd->rcvhdrq + mdata.ps_head +
-					 dd->rhf_offset;
+					 rcd->rhf_offset;
 		struct ib_header *hdr;
 		u64 rhf = rhf_to_cpu(rhf_addr);
 		u32 etype = rhf_rcv_type(rhf), qpn;
@@ -1738,7 +1736,7 @@ void seqfile_dump_rcd(struct seq_file *s, struct hfi1_ctxtdata *rcd)
 		if (etype > RHF_RCV_TYPE_IB)
 			goto next;
 
-		packet.hdr = hfi1_get_msgheader(dd, rhf_addr);
+		packet.hdr = hfi1_get_msgheader(rcd, rhf_addr);
 		hdr = packet.hdr;
 
 		lnh = be16_to_cpu(hdr->lrh[0]) & 3;
@@ -1760,3 +1758,14 @@ void seqfile_dump_rcd(struct seq_file *s, struct hfi1_ctxtdata *rcd)
 		update_ps_mdata(&mdata, rcd);
 	}
 }
+
+const rhf_rcv_function_ptr normal_rhf_rcv_functions[] = {
+	[RHF_RCV_TYPE_EXPECTED] = kdeth_process_expected,
+	[RHF_RCV_TYPE_EAGER] = kdeth_process_eager,
+	[RHF_RCV_TYPE_IB] = process_receive_ib,
+	[RHF_RCV_TYPE_ERROR] = process_receive_error,
+	[RHF_RCV_TYPE_BYPASS] = process_receive_bypass,
+	[RHF_RCV_TYPE_INVALID5] = process_receive_invalid,
+	[RHF_RCV_TYPE_INVALID6] = process_receive_invalid,
+	[RHF_RCV_TYPE_INVALID7] = process_receive_invalid,
+};
diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c
index 0fc4aa9..1fc7564 100644
--- a/drivers/infiniband/hw/hfi1/file_ops.c
+++ b/drivers/infiniband/hw/hfi1/file_ops.c
@@ -411,7 +411,7 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma)
 		mapio = 1;
 		break;
 	case RCV_HDRQ:
-		memlen = uctxt->rcvhdrq_size;
+		memlen = rcvhdrq_size(uctxt);
 		memvirt = uctxt->rcvhdrq;
 		break;
 	case RCV_EGRBUF: {
@@ -521,7 +521,7 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma)
 		break;
 	case SUBCTXT_RCV_HDRQ:
 		memaddr = (u64)uctxt->subctxt_rcvhdr_base;
-		memlen = uctxt->rcvhdrq_size * uctxt->subctxt_cnt;
+		memlen = rcvhdrq_size(uctxt) * uctxt->subctxt_cnt;
 		flags |= VM_IO | VM_DONTEXPAND;
 		vmf = 1;
 		break;
@@ -985,7 +985,11 @@ static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd,
 	 * sub contexts.
 	 * This has to be done here so the rest of the sub-contexts find the
 	 * proper base context.
+	 * NOTE: _set_bit() can be used here because the context creation is
+	 * protected by the mutex (rather than the spin_lock), and will be the
+	 * very first instance of this context.
 	 */
+	__set_bit(0, uctxt->in_use_ctxts);
 	if (uinfo->subctxt_cnt)
 		init_subctxts(uctxt, uinfo);
 	uctxt->userversion = uinfo->userversion;
@@ -1040,7 +1044,7 @@ static int setup_subctxt(struct hfi1_ctxtdata *uctxt)
 		return -ENOMEM;
 
 	/* We can take the size of the RcvHdr Queue from the master */
-	uctxt->subctxt_rcvhdr_base = vmalloc_user(uctxt->rcvhdrq_size *
+	uctxt->subctxt_rcvhdr_base = vmalloc_user(rcvhdrq_size(uctxt) *
 						  num_subctxts);
 	if (!uctxt->subctxt_rcvhdr_base) {
 		ret = -ENOMEM;
diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index 4ab8b5b..d947031 100644
--- a/drivers/infiniband/hw/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -169,12 +169,6 @@ extern const struct pci_error_handlers hfi1_pci_err_handler;
 struct hfi1_opcode_stats_perctx;
 
 struct ctxt_eager_bufs {
-	ssize_t size;            /* total size of eager buffers */
-	u32 count;               /* size of buffers array */
-	u32 numbufs;             /* number of buffers allocated */
-	u32 alloced;             /* number of rcvarray entries used */
-	u32 rcvtid_size;         /* size of each eager rcv tid */
-	u32 threshold;           /* head update threshold */
 	struct eager_buffer {
 		void *addr;
 		dma_addr_t dma;
@@ -184,6 +178,12 @@ struct ctxt_eager_bufs {
 		void *addr;
 		dma_addr_t dma;
 	} *rcvtids;
+	u32 size;                /* total size of eager buffers */
+	u32 rcvtid_size;         /* size of each eager rcv tid */
+	u16 count;               /* size of buffers array */
+	u16 numbufs;             /* number of buffers allocated */
+	u16 alloced;             /* number of rcvarray entries used */
+	u16 threshold;           /* head update threshold */
 };
 
 struct exp_tid_set {
@@ -191,43 +191,84 @@ struct exp_tid_set {
 	u32 count;
 };
 
+typedef int (*rhf_rcv_function_ptr)(struct hfi1_packet *packet);
 struct hfi1_ctxtdata {
-	/* shadow the ctxt's RcvCtrl register */
-	u64 rcvctrl;
 	/* rcvhdrq base, needs mmap before useful */
 	void *rcvhdrq;
 	/* kernel virtual address where hdrqtail is updated */
 	volatile __le64 *rcvhdrtail_kvaddr;
-	/* when waiting for rcv or pioavail */
-	wait_queue_head_t wait;
-	/* rcvhdrq size (for freeing) */
-	size_t rcvhdrq_size;
+	/* so functions that need physical port can get it easily */
+	struct hfi1_pportdata *ppd;
+	/* so file ops can get at unit */
+	struct hfi1_devdata *dd;
+	/* this receive context's assigned PIO ACK send context */
+	struct send_context *sc;
+	/* per context recv functions */
+	const rhf_rcv_function_ptr *rhf_rcv_function_map;
+	/*
+	 * The interrupt handler for a particular receive context can vary
+	 * throughout it's lifetime. This is not a lock protected data member so
+	 * it must be updated atomically and the prev and new value must always
+	 * be valid. Worst case is we process an extra interrupt and up to 64
+	 * packets with the wrong interrupt handler.
+	 */
+	int (*do_interrupt)(struct hfi1_ctxtdata *rcd, int threaded);
+	/* verbs rx_stats per rcd */
+	struct hfi1_opcode_stats_perctx *opstats;
+	/* clear interrupt mask */
+	u64 imask;
+	/* ctxt rcvhdrq head offset */
+	u32 head;
 	/* number of rcvhdrq entries */
 	u16 rcvhdrq_cnt;
+	u8 ireg;	/* clear interrupt register */
+	/* receive packet sequence counter */
+	u8 seq_cnt;
 	/* size of each of the rcvhdrq entries */
-	u16 rcvhdrqentsize;
+	u8 rcvhdrqentsize;
+	/* offset of RHF within receive header entry */
+	u8 rhf_offset;
+	/* dynamic receive available interrupt timeout */
+	u8 rcvavail_timeout;
+	/* Indicates that this is vnic context */
+	bool is_vnic;
+	/* vnic queue index this context is mapped to */
+	u8 vnic_q_idx;
+	/* Is ASPM interrupt supported for this context */
+	bool aspm_intr_supported;
+	/* ASPM state (enabled/disabled) for this context */
+	bool aspm_enabled;
+	/* Is ASPM processing enabled for this context (in intr context) */
+	bool aspm_intr_enable;
+	struct ctxt_eager_bufs egrbufs;
+	/* QPs waiting for context processing */
+	struct list_head qp_wait_list;
+	/* tid allocation lists */
+	struct exp_tid_set tid_group_list;
+	struct exp_tid_set tid_used_list;
+	struct exp_tid_set tid_full_list;
+
+	/* Timer for re-enabling ASPM if interrupt activity quiets down */
+	struct timer_list aspm_timer;
+	/* per-context configuration flags */
+	unsigned long flags;
+	/* array of tid_groups */
+	struct tid_group  *groups;
 	/* mmap of hdrq, must fit in 44 bits */
 	dma_addr_t rcvhdrq_dma;
 	dma_addr_t rcvhdrqtailaddr_dma;
-	struct ctxt_eager_bufs egrbufs;
-	/* this receive context's assigned PIO ACK send context */
-	struct send_context *sc;
-
-	/* dynamic receive available interrupt timeout */
-	u32 rcvavail_timeout;
+	/* Last interrupt timestamp */
+	ktime_t aspm_ts_last_intr;
+	/* Last timestamp at which we scheduled a timer for this context */
+	ktime_t aspm_ts_timer_sched;
+	/* Lock to serialize between intr, timer intr and user threads */
+	spinlock_t aspm_lock;
 	/* Reference count the base context usage */
 	struct kref kref;
-
-	/* Device context index */
-	u16 ctxt;
-	/*
-	 * non-zero if ctxt can be shared, and defines the maximum number of
-	 * sub-contexts for this device context.
-	 */
-	u16 subctxt_cnt;
-	/* non-zero if ctxt is being shared. */
-	u16 subctxt_id;
-	u8 uuid[16];
+	/* numa node of this context */
+	int numa_id;
+	/* associated msix interrupt. */
+	s16 msix_intr;
 	/* job key */
 	u16 jkey;
 	/* number of RcvArray groups for this context. */
@@ -238,87 +279,59 @@ struct hfi1_ctxtdata {
 	u16 expected_count;
 	/* index of first expected TID entry. */
 	u16 expected_base;
-	/* array of tid_groups */
-	struct tid_group  *groups;
+	/* Device context index */
+	u8 ctxt;
 
-	struct exp_tid_set tid_group_list;
-	struct exp_tid_set tid_used_list;
-	struct exp_tid_set tid_full_list;
-
-	/* lock protecting all Expected TID data of user contexts */
+	/* PSM Specific fields */
+	/* lock protecting all Expected TID data */
 	struct mutex exp_mutex;
-	/* per-context configuration flags */
-	unsigned long flags;
-	/* per-context event flags for fileops/intr communication */
-	unsigned long event_flags;
-	/* total number of polled urgent packets */
-	u32 urgent;
-	/* saved total number of polled urgent packets for poll edge trigger */
-	u32 urgent_poll;
+	/* when waiting for rcv or pioavail */
+	wait_queue_head_t wait;
+	/* uuid from PSM */
+	u8 uuid[16];
 	/* same size as task_struct .comm[], command that opened context */
 	char comm[TASK_COMM_LEN];
-	/* so file ops can get at unit */
-	struct hfi1_devdata *dd;
-	/* so functions that need physical port can get it easily */
-	struct hfi1_pportdata *ppd;
-	/* associated msix interrupt */
-	u32 msix_intr;
+	/* Bitmask of in use context(s) */
+	DECLARE_BITMAP(in_use_ctxts, HFI1_MAX_SHARED_CTXTS);
+	/* per-context event flags for fileops/intr communication */
+	unsigned long event_flags;
 	/* A page of memory for rcvhdrhead, rcvegrhead, rcvegrtail * N */
 	void *subctxt_uregbase;
 	/* An array of pages for the eager receive buffers * N */
 	void *subctxt_rcvegrbuf;
 	/* An array of pages for the eager header queue entries * N */
 	void *subctxt_rcvhdr_base;
-	/* Bitmask of in use context(s) */
-	DECLARE_BITMAP(in_use_ctxts, HFI1_MAX_SHARED_CTXTS);
-	/* The version of the library which opened this ctxt */
-	u32 userversion;
+	/* total number of polled urgent packets */
+	u32 urgent;
+	/* saved total number of polled urgent packets for poll edge trigger */
+	u32 urgent_poll;
 	/* Type of packets or conditions we want to poll for */
 	u16 poll_type;
-	/* receive packet sequence counter */
-	u8 seq_cnt;
-	/* ctxt rcvhdrq head offset */
-	u32 head;
-	/* QPs waiting for context processing */
-	struct list_head qp_wait_list;
-	/* interrupt handling */
-	u64 imask;	/* clear interrupt mask */
-	int ireg;	/* clear interrupt register */
-	int numa_id; /* numa node of this context */
-	/* verbs rx_stats per rcd */
-	struct hfi1_opcode_stats_perctx *opstats;
-
-	/* Is ASPM interrupt supported for this context */
-	bool aspm_intr_supported;
-	/* ASPM state (enabled/disabled) for this context */
-	bool aspm_enabled;
-	/* Timer for re-enabling ASPM if interrupt activity quietens down */
-	struct timer_list aspm_timer;
-	/* Lock to serialize between intr, timer intr and user threads */
-	spinlock_t aspm_lock;
-	/* Is ASPM processing enabled for this context (in intr context) */
-	bool aspm_intr_enable;
-	/* Last interrupt timestamp */
-	ktime_t aspm_ts_last_intr;
-	/* Last timestamp at which we scheduled a timer for this context */
-	ktime_t aspm_ts_timer_sched;
-
+	/* non-zero if ctxt is being shared. */
+	u16 subctxt_id;
+	/* The version of the library which opened this ctxt */
+	u32 userversion;
 	/*
-	 * The interrupt handler for a particular receive context can vary
-	 * throughout it's lifetime. This is not a lock protected data member so
-	 * it must be updated atomically and the prev and new value must always
-	 * be valid. Worst case is we process an extra interrupt and up to 64
-	 * packets with the wrong interrupt handler.
+	 * non-zero if ctxt can be shared, and defines the maximum number of
+	 * sub-contexts for this device context.
 	 */
-	int (*do_interrupt)(struct hfi1_ctxtdata *rcd, int threaded);
+	u8 subctxt_cnt;
 
-	/* Indicates that this is vnic context */
-	bool is_vnic;
-
-	/* vnic queue index this context is mapped to */
-	u8 vnic_q_idx;
 };
 
+/**
+ * rcvhdrq_size - return total size in bytes for header queue
+ * @rcd: the receive context
+ *
+ * rcvhdrqentsize is in DWs, so we have to convert to bytes
+ *
+ */
+static inline u32 rcvhdrq_size(struct hfi1_ctxtdata *rcd)
+{
+	return PAGE_ALIGN(rcd->rcvhdrq_cnt *
+			  rcd->rcvhdrqentsize * sizeof(u32));
+}
+
 /*
  * Represents a single packet at a high level. Put commonly computed things in
  * here so we do not have to keep doing them over and over. The rule of thumb is
@@ -897,12 +910,11 @@ struct hfi1_pportdata {
 	u64 vl_xmit_flit_cnt[C_VL_COUNT + 1];
 };
 
-typedef int (*rhf_rcv_function_ptr)(struct hfi1_packet *packet);
-
 typedef void (*opcode_handler)(struct hfi1_packet *packet);
 typedef void (*hfi1_make_req)(struct rvt_qp *qp,
 			      struct hfi1_pkt_state *ps,
 			      struct rvt_swqe *wqe);
+extern const rhf_rcv_function_ptr normal_rhf_rcv_functions[];
 
 
 /* return values for the RHF receive functions */
@@ -1046,8 +1058,6 @@ struct hfi1_devdata {
 	dma_addr_t                          sdma_pad_phys;
 	/* for deallocation */
 	size_t                              sdma_heads_size;
-	/* number from the chip */
-	u32                                 chip_sdma_engines;
 	/* num used */
 	u32                                 num_sdma;
 	/* array of engines sized by num_sdma */
@@ -1102,8 +1112,6 @@ struct hfi1_devdata {
 	/* base receive interrupt timeout, in CSR units */
 	u32 rcv_intr_timeout_csr;
 
-	u32 freezelen; /* max length of freezemsg */
-	u64 __iomem *egrtidbase;
 	spinlock_t sendctrl_lock; /* protect changes to SendCtrl */
 	spinlock_t rcvctrl_lock; /* protect changes to RcvCtrl */
 	spinlock_t uctxt_lock; /* protect rcd changes */
@@ -1130,25 +1138,6 @@ struct hfi1_devdata {
 	/* Base GUID for device (network order) */
 	u64 base_guid;
 
-	/* these are the "32 bit" regs */
-
-	/* value we put in kr_rcvhdrsize */
-	u32 rcvhdrsize;
-	/* number of receive contexts the chip supports */
-	u32 chip_rcv_contexts;
-	/* number of receive array entries */
-	u32 chip_rcv_array_count;
-	/* number of PIO send contexts the chip supports */
-	u32 chip_send_contexts;
-	/* number of bytes in the PIO memory buffer */
-	u32 chip_pio_mem_size;
-	/* number of bytes in the SDMA memory buffer */
-	u32 chip_sdma_mem_size;
-
-	/* size of each rcvegrbuffer */
-	u32 rcvegrbufsize;
-	/* log2 of above */
-	u16 rcvegrbufsize_shift;
 	/* both sides of the PCIe link are gen3 capable */
 	u8 link_gen3_capable;
 	u8 dc_shutdown;
@@ -1221,9 +1210,6 @@ struct hfi1_devdata {
 	u32 num_msix_entries;
 	u32 first_dyn_msix_idx;
 
-	/* INTx information */
-	u32 requested_intx_irq;		/* did we request one? */
-
 	/* general interrupt: mask of handled interrupts */
 	u64 gi_mask[CCE_NUM_INT_CSRS];
 
@@ -1289,8 +1275,6 @@ struct hfi1_devdata {
 	u64 sw_cce_err_status_aggregate;
 	/* Software counter that aggregates all bypass packet rcv errors */
 	u64 sw_rcv_bypass_packet_errors;
-	/* receive interrupt function */
-	rhf_rcv_function_ptr normal_rhf_rcv_functions[8];
 
 	/* Save the enabled LCB error bits */
 	u64 lcb_err_en;
@@ -1329,10 +1313,7 @@ struct hfi1_devdata {
 	/* seqlock for sc2vl */
 	seqlock_t sc2vl_lock ____cacheline_aligned_in_smp;
 	u64 sc2vl[4];
-	/* receive interrupt functions */
-	rhf_rcv_function_ptr *rhf_rcv_function_map;
 	u64 __percpu *rcv_limit;
-	u16 rhf_offset; /* offset of RHF within receive header entry */
 	/* adding a new field here would make it part of this cacheline */
 
 	/* OUI comes from the HW. Used everywhere as 3 separate bytes. */
@@ -1471,7 +1452,7 @@ void hfi1_make_ud_req_16B(struct rvt_qp *qp,
 /* calculate the current RHF address */
 static inline __le32 *get_rhf_addr(struct hfi1_ctxtdata *rcd)
 {
-	return (__le32 *)rcd->rcvhdrq + rcd->head + rcd->dd->rhf_offset;
+	return (__le32 *)rcd->rcvhdrq + rcd->head + rcd->rhf_offset;
 }
 
 int hfi1_reset_device(int);
@@ -2021,12 +2002,6 @@ static inline void flush_wc(void)
 }
 
 void handle_eflags(struct hfi1_packet *packet);
-int process_receive_ib(struct hfi1_packet *packet);
-int process_receive_bypass(struct hfi1_packet *packet);
-int process_receive_error(struct hfi1_packet *packet);
-int kdeth_process_expected(struct hfi1_packet *packet);
-int kdeth_process_eager(struct hfi1_packet *packet);
-int process_receive_invalid(struct hfi1_packet *packet);
 void seqfile_dump_rcd(struct seq_file *s, struct hfi1_ctxtdata *rcd);
 
 /* global module parameter variables */
diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c
index f110842..758d273 100644
--- a/drivers/infiniband/hw/hfi1/init.c
+++ b/drivers/infiniband/hw/hfi1/init.c
@@ -364,9 +364,9 @@ int hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, int numa,
 		hfi1_exp_tid_group_init(rcd);
 		rcd->ppd = ppd;
 		rcd->dd = dd;
-		__set_bit(0, rcd->in_use_ctxts);
 		rcd->numa_id = numa;
 		rcd->rcv_array_groups = dd->rcv_entries.ngroups;
+		rcd->rhf_rcv_function_map = normal_rhf_rcv_functions;
 
 		mutex_init(&rcd->exp_mutex);
 
@@ -404,6 +404,8 @@ int hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, int numa,
 
 		rcd->rcvhdrq_cnt = rcvhdrcnt;
 		rcd->rcvhdrqentsize = hfi1_hdrq_entsize;
+		rcd->rhf_offset =
+			rcd->rcvhdrqentsize - sizeof(u64) / sizeof(u32);
 		/*
 		 * Simple Eager buffer allocation: we have already pre-allocated
 		 * the number of RcvArray entry groups. Each ctxtdata structure
@@ -853,24 +855,6 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit)
 	struct hfi1_ctxtdata *rcd;
 	struct hfi1_pportdata *ppd;
 
-	/* Set up recv low level handlers */
-	dd->normal_rhf_rcv_functions[RHF_RCV_TYPE_EXPECTED] =
-						kdeth_process_expected;
-	dd->normal_rhf_rcv_functions[RHF_RCV_TYPE_EAGER] =
-						kdeth_process_eager;
-	dd->normal_rhf_rcv_functions[RHF_RCV_TYPE_IB] = process_receive_ib;
-	dd->normal_rhf_rcv_functions[RHF_RCV_TYPE_ERROR] =
-						process_receive_error;
-	dd->normal_rhf_rcv_functions[RHF_RCV_TYPE_BYPASS] =
-						process_receive_bypass;
-	dd->normal_rhf_rcv_functions[RHF_RCV_TYPE_INVALID5] =
-						process_receive_invalid;
-	dd->normal_rhf_rcv_functions[RHF_RCV_TYPE_INVALID6] =
-						process_receive_invalid;
-	dd->normal_rhf_rcv_functions[RHF_RCV_TYPE_INVALID7] =
-						process_receive_invalid;
-	dd->rhf_rcv_function_map = dd->normal_rhf_rcv_functions;
-
 	/* Set up send low level handlers */
 	dd->process_pio_send = hfi1_verbs_send_pio;
 	dd->process_dma_send = hfi1_verbs_send_dma;
@@ -936,7 +920,7 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit)
 	}
 
 	/* Allocate enough memory for user event notification. */
-	len = PAGE_ALIGN(dd->chip_rcv_contexts * HFI1_MAX_SHARED_CTXTS *
+	len = PAGE_ALIGN(chip_rcv_contexts(dd) * HFI1_MAX_SHARED_CTXTS *
 			 sizeof(*dd->events));
 	dd->events = vmalloc_user(len);
 	if (!dd->events)
@@ -948,9 +932,6 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit)
 	dd->status = vmalloc_user(PAGE_SIZE);
 	if (!dd->status)
 		dd_dev_err(dd, "Failed to allocate dev status page\n");
-	else
-		dd->freezelen = PAGE_SIZE - (sizeof(*dd->status) -
-					     sizeof(dd->status->freezemsg));
 	for (pidx = 0; pidx < dd->num_pports; ++pidx) {
 		ppd = dd->pport + pidx;
 		if (dd->status)
@@ -1144,7 +1125,7 @@ void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
 		return;
 
 	if (rcd->rcvhdrq) {
-		dma_free_coherent(&dd->pcidev->dev, rcd->rcvhdrq_size,
+		dma_free_coherent(&dd->pcidev->dev, rcvhdrq_size(rcd),
 				  rcd->rcvhdrq, rcd->rcvhdrq_dma);
 		rcd->rcvhdrq = NULL;
 		if (rcd->rcvhdrtail_kvaddr) {
@@ -1855,12 +1836,7 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
 	if (!rcd->rcvhdrq) {
 		gfp_t gfp_flags;
 
-		/*
-		 * rcvhdrqentsize is in DWs, so we have to convert to bytes
-		 * (* sizeof(u32)).
-		 */
-		amt = PAGE_ALIGN(rcd->rcvhdrq_cnt * rcd->rcvhdrqentsize *
-				 sizeof(u32));
+		amt = rcvhdrq_size(rcd);
 
 		if (rcd->ctxt < dd->first_dyn_alloc_ctxt || rcd->is_vnic)
 			gfp_flags = GFP_KERNEL;
@@ -1885,8 +1861,6 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
 			if (!rcd->rcvhdrtail_kvaddr)
 				goto bail_free;
 		}
-
-		rcd->rcvhdrq_size = amt;
 	}
 	/*
 	 * These values are per-context:
@@ -1902,7 +1876,7 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
 			& RCV_HDR_ENT_SIZE_ENT_SIZE_MASK)
 		<< RCV_HDR_ENT_SIZE_ENT_SIZE_SHIFT;
 	write_kctxt_csr(dd, rcd->ctxt, RCV_HDR_ENT_SIZE, reg);
-	reg = (dd->rcvhdrsize & RCV_HDR_SIZE_HDR_SIZE_MASK)
+	reg = ((u64)DEFAULT_RCVHDRSIZE & RCV_HDR_SIZE_HDR_SIZE_MASK)
 		<< RCV_HDR_SIZE_HDR_SIZE_SHIFT;
 	write_kctxt_csr(dd, rcd->ctxt, RCV_HDR_SIZE, reg);
 
@@ -1938,9 +1912,9 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
 int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd)
 {
 	struct hfi1_devdata *dd = rcd->dd;
-	u32 max_entries, egrtop, alloced_bytes = 0, idx = 0;
+	u32 max_entries, egrtop, alloced_bytes = 0;
 	gfp_t gfp_flags;
-	u16 order;
+	u16 order, idx = 0;
 	int ret = 0;
 	u16 round_mtu = roundup_pow_of_two(hfi1_max_mtu);
 
diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c
index baf7c32..eec8375 100644
--- a/drivers/infiniband/hw/hfi1/pcie.c
+++ b/drivers/infiniband/hw/hfi1/pcie.c
@@ -157,6 +157,7 @@ int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev)
 	unsigned long len;
 	resource_size_t addr;
 	int ret = 0;
+	u32 rcv_array_count;
 
 	addr = pci_resource_start(pdev, 0);
 	len = pci_resource_len(pdev, 0);
@@ -186,9 +187,9 @@ int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev)
 		goto nomem;
 	}
 
-	dd->chip_rcv_array_count = readq(dd->kregbase1 + RCV_ARRAY_CNT);
-	dd_dev_info(dd, "RcvArray count: %u\n", dd->chip_rcv_array_count);
-	dd->base2_start  = RCV_ARRAY + dd->chip_rcv_array_count * 8;
+	rcv_array_count = readq(dd->kregbase1 + RCV_ARRAY_CNT);
+	dd_dev_info(dd, "RcvArray count: %u\n", rcv_array_count);
+	dd->base2_start  = RCV_ARRAY + rcv_array_count * 8;
 
 	dd->kregbase2 = ioremap_nocache(
 		addr + dd->base2_start,
@@ -214,13 +215,13 @@ int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev)
 	 * to write an entire cacheline worth of entries in one shot.
 	 */
 	dd->rcvarray_wc = ioremap_wc(addr + RCV_ARRAY,
-				     dd->chip_rcv_array_count * 8);
+				     rcv_array_count * 8);
 	if (!dd->rcvarray_wc) {
 		dd_dev_err(dd, "WC mapping of receive array failed\n");
 		goto nomem;
 	}
 	dd_dev_info(dd, "WC RcvArray: %p for %x\n",
-		    dd->rcvarray_wc, dd->chip_rcv_array_count * 8);
+		    dd->rcvarray_wc, rcv_array_count * 8);
 
 	dd->flags |= HFI1_PRESENT;	/* chip.c CSR routines now work */
 	return 0;
@@ -346,15 +347,13 @@ int pcie_speeds(struct hfi1_devdata *dd)
 /*
  * Returns:
  *	- actual number of interrupts allocated or
- *	- 0 if fell back to INTx.
  *      - error
  */
 int request_msix(struct hfi1_devdata *dd, u32 msireq)
 {
 	int nvec;
 
-	nvec = pci_alloc_irq_vectors(dd->pcidev, 1, msireq,
-				     PCI_IRQ_MSIX | PCI_IRQ_LEGACY);
+	nvec = pci_alloc_irq_vectors(dd->pcidev, msireq, msireq, PCI_IRQ_MSIX);
 	if (nvec < 0) {
 		dd_dev_err(dd, "pci_alloc_irq_vectors() failed: %d\n", nvec);
 		return nvec;
@@ -362,10 +361,6 @@ int request_msix(struct hfi1_devdata *dd, u32 msireq)
 
 	tune_pcie_caps(dd);
 
-	/* check for legacy IRQ */
-	if (nvec == 1 && !dd->pcidev->msix_enabled)
-		return 0;
-
 	return nvec;
 }
 
diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c
index 9cac15d..c2c1cba 100644
--- a/drivers/infiniband/hw/hfi1/pio.c
+++ b/drivers/infiniband/hw/hfi1/pio.c
@@ -1,5 +1,5 @@
 /*
- * Copyright(c) 2015-2017 Intel Corporation.
+ * Copyright(c) 2015-2018 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -226,7 +226,7 @@ static const char *sc_type_name(int index)
 int init_sc_pools_and_sizes(struct hfi1_devdata *dd)
 {
 	struct mem_pool_info mem_pool_info[NUM_SC_POOLS] = { { 0 } };
-	int total_blocks = (dd->chip_pio_mem_size / PIO_BLOCK_SIZE) - 1;
+	int total_blocks = (chip_pio_mem_size(dd) / PIO_BLOCK_SIZE) - 1;
 	int total_contexts = 0;
 	int fixed_blocks;
 	int pool_blocks;
@@ -343,8 +343,8 @@ int init_sc_pools_and_sizes(struct hfi1_devdata *dd)
 				sc_type_name(i), count);
 			return -EINVAL;
 		}
-		if (total_contexts + count > dd->chip_send_contexts)
-			count = dd->chip_send_contexts - total_contexts;
+		if (total_contexts + count > chip_send_contexts(dd))
+			count = chip_send_contexts(dd) - total_contexts;
 
 		total_contexts += count;
 
@@ -507,7 +507,7 @@ static int sc_hw_alloc(struct hfi1_devdata *dd, int type, u32 *sw_index,
 		if (sci->type == type && sci->allocated == 0) {
 			sci->allocated = 1;
 			/* use a 1:1 mapping, but make them non-equal */
-			context = dd->chip_send_contexts - index - 1;
+			context = chip_send_contexts(dd) - index - 1;
 			dd->hw_to_sw[context] = index;
 			*sw_index = index;
 			*hw_context = context;
@@ -1618,11 +1618,11 @@ static void sc_piobufavail(struct send_context *sc)
 	/* Wake up the most starved one first */
 	if (n)
 		hfi1_qp_wakeup(qps[max_idx],
-			       RVT_S_WAIT_PIO | RVT_S_WAIT_PIO_DRAIN);
+			       RVT_S_WAIT_PIO | HFI1_S_WAIT_PIO_DRAIN);
 	for (i = 0; i < n; i++)
 		if (i != max_idx)
 			hfi1_qp_wakeup(qps[i],
-				       RVT_S_WAIT_PIO | RVT_S_WAIT_PIO_DRAIN);
+				       RVT_S_WAIT_PIO | HFI1_S_WAIT_PIO_DRAIN);
 }
 
 /* translate a send credit update to a bit code of reasons */
diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c
index 1697d96..9b1e84a 100644
--- a/drivers/infiniband/hw/hfi1/qp.c
+++ b/drivers/infiniband/hw/hfi1/qp.c
@@ -1,5 +1,5 @@
 /*
- * Copyright(c) 2015 - 2017 Intel Corporation.
+ * Copyright(c) 2015 - 2018 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -273,7 +273,7 @@ void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
 	if (attr_mask & IB_QP_PATH_MIG_STATE &&
 	    attr->path_mig_state == IB_MIG_MIGRATED &&
 	    qp->s_mig_state == IB_MIG_ARMED) {
-		qp->s_flags |= RVT_S_AHG_CLEAR;
+		qp->s_flags |= HFI1_S_AHG_CLEAR;
 		priv->s_sc = ah_to_sc(ibqp->device, &qp->remote_ah_attr);
 		priv->s_sde = qp_to_sdma_engine(qp, priv->s_sc);
 		priv->s_sendcontext = qp_to_send_context(qp, priv->s_sc);
@@ -717,7 +717,7 @@ void hfi1_migrate_qp(struct rvt_qp *qp)
 	qp->remote_ah_attr = qp->alt_ah_attr;
 	qp->port_num = rdma_ah_get_port_num(&qp->alt_ah_attr);
 	qp->s_pkey_index = qp->s_alt_pkey_index;
-	qp->s_flags |= RVT_S_AHG_CLEAR;
+	qp->s_flags |= HFI1_S_AHG_CLEAR;
 	priv->s_sc = ah_to_sc(qp->ibqp.device, &qp->remote_ah_attr);
 	priv->s_sde = qp_to_sdma_engine(qp, priv->s_sc);
 	qp_set_16b(qp);
diff --git a/drivers/infiniband/hw/hfi1/qp.h b/drivers/infiniband/hw/hfi1/qp.h
index b2d4cba..078cff7 100644
--- a/drivers/infiniband/hw/hfi1/qp.h
+++ b/drivers/infiniband/hw/hfi1/qp.h
@@ -1,7 +1,7 @@
 #ifndef _QP_H
 #define _QP_H
 /*
- * Copyright(c) 2015 - 2017 Intel Corporation.
+ * Copyright(c) 2015 - 2018 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -70,6 +70,26 @@ static inline int hfi1_send_ok(struct rvt_qp *qp)
 }
 
 /*
+ * Driver specific s_flags starting at bit 31 down to HFI1_S_MIN_BIT_MASK
+ *
+ * HFI1_S_AHG_VALID - ahg header valid on chip
+ * HFI1_S_AHG_CLEAR - have send engine clear ahg state
+ * HFI1_S_WAIT_PIO_DRAIN - qp waiting for PIOs to drain
+ * HFI1_S_MIN_BIT_MASK - the lowest bit that can be used by hfi1
+ */
+#define HFI1_S_AHG_VALID         0x80000000
+#define HFI1_S_AHG_CLEAR         0x40000000
+#define HFI1_S_WAIT_PIO_DRAIN    0x20000000
+#define HFI1_S_MIN_BIT_MASK      0x01000000
+
+/*
+ * overload wait defines
+ */
+
+#define HFI1_S_ANY_WAIT_IO (RVT_S_ANY_WAIT_IO | HFI1_S_WAIT_PIO_DRAIN)
+#define HFI1_S_ANY_WAIT (HFI1_S_ANY_WAIT_IO | RVT_S_ANY_WAIT_SEND)
+
+/*
  * free_ahg - clear ahg from QP
  */
 static inline void clear_ahg(struct rvt_qp *qp)
@@ -77,7 +97,7 @@ static inline void clear_ahg(struct rvt_qp *qp)
 	struct hfi1_qp_priv *priv = qp->priv;
 
 	priv->s_ahg->ahgcount = 0;
-	qp->s_flags &= ~(RVT_S_AHG_VALID | RVT_S_AHG_CLEAR);
+	qp->s_flags &= ~(HFI1_S_AHG_VALID | HFI1_S_AHG_CLEAR);
 	if (priv->s_sde && qp->s_ahgidx >= 0)
 		sdma_ahg_free(priv->s_sde, qp->s_ahgidx);
 	qp->s_ahgidx = -1;
diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c
index f15c931..9bd63ab 100644
--- a/drivers/infiniband/hw/hfi1/rc.c
+++ b/drivers/infiniband/hw/hfi1/rc.c
@@ -1,5 +1,5 @@
 /*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015 - 2018 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -241,7 +241,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
 	smp_wmb();
 	qp->s_flags &= ~(RVT_S_RESP_PENDING
 				| RVT_S_ACK_PENDING
-				| RVT_S_AHG_VALID);
+				| HFI1_S_AHG_VALID);
 	return 0;
 }
 
@@ -1024,7 +1024,7 @@ static void reset_psn(struct rvt_qp *qp, u32 psn)
 	if ((cmp_psn(qp->s_psn, qp->s_sending_hpsn) <= 0) &&
 	    (cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) <= 0))
 		qp->s_flags |= RVT_S_WAIT_PSN;
-	qp->s_flags &= ~RVT_S_AHG_VALID;
+	qp->s_flags &= ~HFI1_S_AHG_VALID;
 }
 
 /*
diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c
index ef4c566..5f56f3c 100644
--- a/drivers/infiniband/hw/hfi1/ruc.c
+++ b/drivers/infiniband/hw/hfi1/ruc.c
@@ -1,5 +1,5 @@
 /*
- * Copyright(c) 2015 - 2017 Intel Corporation.
+ * Copyright(c) 2015 - 2018 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -194,7 +194,7 @@ static void ruc_loopback(struct rvt_qp *sqp)
 	spin_lock_irqsave(&sqp->s_lock, flags);
 
 	/* Return if we are already busy processing a work request. */
-	if ((sqp->s_flags & (RVT_S_BUSY | RVT_S_ANY_WAIT)) ||
+	if ((sqp->s_flags & (RVT_S_BUSY | HFI1_S_ANY_WAIT)) ||
 	    !(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_OR_FLUSH_SEND))
 		goto unlock;
 
@@ -533,9 +533,9 @@ static inline void build_ahg(struct rvt_qp *qp, u32 npsn)
 {
 	struct hfi1_qp_priv *priv = qp->priv;
 
-	if (unlikely(qp->s_flags & RVT_S_AHG_CLEAR))
+	if (unlikely(qp->s_flags & HFI1_S_AHG_CLEAR))
 		clear_ahg(qp);
-	if (!(qp->s_flags & RVT_S_AHG_VALID)) {
+	if (!(qp->s_flags & HFI1_S_AHG_VALID)) {
 		/* first middle that needs copy  */
 		if (qp->s_ahgidx < 0)
 			qp->s_ahgidx = sdma_ahg_alloc(priv->s_sde);
@@ -544,7 +544,7 @@ static inline void build_ahg(struct rvt_qp *qp, u32 npsn)
 			priv->s_ahg->tx_flags |= SDMA_TXREQ_F_AHG_COPY;
 			/* save to protect a change in another thread */
 			priv->s_ahg->ahgidx = qp->s_ahgidx;
-			qp->s_flags |= RVT_S_AHG_VALID;
+			qp->s_flags |= HFI1_S_AHG_VALID;
 		}
 	} else {
 		/* subsequent middle after valid */
@@ -650,7 +650,7 @@ static inline void hfi1_make_ruc_header_16B(struct rvt_qp *qp,
 	if (middle)
 		build_ahg(qp, bth2);
 	else
-		qp->s_flags &= ~RVT_S_AHG_VALID;
+		qp->s_flags &= ~HFI1_S_AHG_VALID;
 
 	bth0 |= pkey;
 	bth0 |= extra_bytes << 20;
@@ -727,7 +727,7 @@ static inline void hfi1_make_ruc_header_9B(struct rvt_qp *qp,
 	if (middle)
 		build_ahg(qp, bth2);
 	else
-		qp->s_flags &= ~RVT_S_AHG_VALID;
+		qp->s_flags &= ~HFI1_S_AHG_VALID;
 
 	bth0 |= pkey;
 	bth0 |= extra_bytes << 20;
diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c
index 7fb350b..88e326d 100644
--- a/drivers/infiniband/hw/hfi1/sdma.c
+++ b/drivers/infiniband/hw/hfi1/sdma.c
@@ -1351,7 +1351,7 @@ int sdma_init(struct hfi1_devdata *dd, u8 port)
 	struct hfi1_pportdata *ppd = dd->pport + port;
 	u32 per_sdma_credits;
 	uint idle_cnt = sdma_idle_cnt;
-	size_t num_engines = dd->chip_sdma_engines;
+	size_t num_engines = chip_sdma_engines(dd);
 	int ret = -ENOMEM;
 
 	if (!HFI1_CAP_IS_KSET(SDMA)) {
@@ -1360,18 +1360,18 @@ int sdma_init(struct hfi1_devdata *dd, u8 port)
 	}
 	if (mod_num_sdma &&
 	    /* can't exceed chip support */
-	    mod_num_sdma <= dd->chip_sdma_engines &&
+	    mod_num_sdma <= chip_sdma_engines(dd) &&
 	    /* count must be >= vls */
 	    mod_num_sdma >= num_vls)
 		num_engines = mod_num_sdma;
 
 	dd_dev_info(dd, "SDMA mod_num_sdma: %u\n", mod_num_sdma);
-	dd_dev_info(dd, "SDMA chip_sdma_engines: %u\n", dd->chip_sdma_engines);
+	dd_dev_info(dd, "SDMA chip_sdma_engines: %u\n", chip_sdma_engines(dd));
 	dd_dev_info(dd, "SDMA chip_sdma_mem_size: %u\n",
-		    dd->chip_sdma_mem_size);
+		    chip_sdma_mem_size(dd));
 
 	per_sdma_credits =
-		dd->chip_sdma_mem_size / (num_engines * SDMA_BLOCK_SIZE);
+		chip_sdma_mem_size(dd) / (num_engines * SDMA_BLOCK_SIZE);
 
 	/* set up freeze waitqueue */
 	init_waitqueue_head(&dd->sdma_unfreeze_wq);
diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c
index 0899187..13374c7 100644
--- a/drivers/infiniband/hw/hfi1/verbs.c
+++ b/drivers/infiniband/hw/hfi1/verbs.c
@@ -1007,7 +1007,7 @@ static int pio_wait(struct rvt_qp *qp,
 			int was_empty;
 
 			dev->n_piowait += !!(flag & RVT_S_WAIT_PIO);
-			dev->n_piodrain += !!(flag & RVT_S_WAIT_PIO_DRAIN);
+			dev->n_piodrain += !!(flag & HFI1_S_WAIT_PIO_DRAIN);
 			qp->s_flags |= flag;
 			was_empty = list_empty(&sc->piowait);
 			iowait_queue(ps->pkts_sent, &priv->s_iowait,
@@ -1376,7 +1376,7 @@ int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
 		return pio_wait(qp,
 				ps->s_txreq->psc,
 				ps,
-				RVT_S_WAIT_PIO_DRAIN);
+				HFI1_S_WAIT_PIO_DRAIN);
 	return sr(qp, ps, 0);
 }
 
@@ -1410,7 +1410,8 @@ static void hfi1_fill_device_attr(struct hfi1_devdata *dd)
 	rdi->dparms.props.max_fast_reg_page_list_len = UINT_MAX;
 	rdi->dparms.props.max_qp = hfi1_max_qps;
 	rdi->dparms.props.max_qp_wr = hfi1_max_qp_wrs;
-	rdi->dparms.props.max_sge = hfi1_max_sges;
+	rdi->dparms.props.max_send_sge = hfi1_max_sges;
+	rdi->dparms.props.max_recv_sge = hfi1_max_sges;
 	rdi->dparms.props.max_sge_rd = hfi1_max_sges;
 	rdi->dparms.props.max_cq = hfi1_max_cqs;
 	rdi->dparms.props.max_ah = hfi1_max_ahs;
@@ -1497,15 +1498,6 @@ static int query_port(struct rvt_dev_info *rdi, u8 port_num,
 	props->active_mtu = !valid_ib_mtu(ppd->ibmtu) ? props->max_mtu :
 		mtu_to_enum(ppd->ibmtu, IB_MTU_4096);
 
-	/*
-	 * sm_lid of 0xFFFF needs special handling so that it can
-	 * be differentiated from a permissve LID of 0xFFFF.
-	 * We set the grh_required flag here so the SA can program
-	 * the DGID in the address handle appropriately
-	 */
-	if (props->sm_lid == be16_to_cpu(IB_LID_PERMISSIVE))
-		props->grh_required = true;
-
 	return 0;
 }
 
@@ -1892,7 +1884,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
 	ibdev->process_mad = hfi1_process_mad;
 	ibdev->get_dev_fw_str = hfi1_get_dev_fw_str;
 
-	strncpy(ibdev->node_desc, init_utsname()->nodename,
+	strlcpy(ibdev->node_desc, init_utsname()->nodename,
 		sizeof(ibdev->node_desc));
 
 	/*
diff --git a/drivers/infiniband/hw/hfi1/vnic_main.c b/drivers/infiniband/hw/hfi1/vnic_main.c
index 616fc9b..c643d80 100644
--- a/drivers/infiniband/hw/hfi1/vnic_main.c
+++ b/drivers/infiniband/hw/hfi1/vnic_main.c
@@ -1,5 +1,5 @@
 /*
- * Copyright(c) 2017 Intel Corporation.
+ * Copyright(c) 2017 - 2018 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -120,8 +120,7 @@ static int allocate_vnic_ctxt(struct hfi1_devdata *dd,
 	uctxt->seq_cnt = 1;
 	uctxt->is_vnic = true;
 
-	if (dd->num_msix_entries)
-		hfi1_set_vnic_msix_info(uctxt);
+	hfi1_set_vnic_msix_info(uctxt);
 
 	hfi1_stats.sps_ctxts++;
 	dd_dev_dbg(dd, "created vnic context %d\n", uctxt->ctxt);
@@ -136,8 +135,7 @@ static void deallocate_vnic_ctxt(struct hfi1_devdata *dd,
 	dd_dev_dbg(dd, "closing vnic context %d\n", uctxt->ctxt);
 	flush_wc();
 
-	if (dd->num_msix_entries)
-		hfi1_reset_vnic_msix_info(uctxt);
+	hfi1_reset_vnic_msix_info(uctxt);
 
 	/*
 	 * Disable receive context and interrupt available, reset all
@@ -818,14 +816,14 @@ struct net_device *hfi1_vnic_alloc_rn(struct ib_device *device,
 
 	size = sizeof(struct opa_vnic_rdma_netdev) + sizeof(*vinfo);
 	netdev = alloc_netdev_mqs(size, name, name_assign_type, setup,
-				  dd->chip_sdma_engines, dd->num_vnic_contexts);
+				  chip_sdma_engines(dd), dd->num_vnic_contexts);
 	if (!netdev)
 		return ERR_PTR(-ENOMEM);
 
 	rn = netdev_priv(netdev);
 	vinfo = opa_vnic_dev_priv(netdev);
 	vinfo->dd = dd;
-	vinfo->num_tx_q = dd->chip_sdma_engines;
+	vinfo->num_tx_q = chip_sdma_engines(dd);
 	vinfo->num_rx_q = dd->num_vnic_contexts;
 	vinfo->netdev = netdev;
 	rn->free_rdma_netdev = hfi1_vnic_free_rn;
diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c
index d749286..0d96c5b 100644
--- a/drivers/infiniband/hw/hns/hns_roce_ah.c
+++ b/drivers/infiniband/hw/hns/hns_roce_ah.c
@@ -44,13 +44,11 @@ struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd,
 				 struct ib_udata *udata)
 {
 	struct hns_roce_dev *hr_dev = to_hr_dev(ibpd->device);
+	const struct ib_gid_attr *gid_attr;
 	struct device *dev = hr_dev->dev;
-	struct ib_gid_attr gid_attr;
 	struct hns_roce_ah *ah;
 	u16 vlan_tag = 0xffff;
 	const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr);
-	union ib_gid sgid;
-	int ret;
 
 	ah = kzalloc(sizeof(*ah), GFP_ATOMIC);
 	if (!ah)
@@ -59,18 +57,9 @@ struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd,
 	/* Get mac address */
 	memcpy(ah->av.mac, ah_attr->roce.dmac, ETH_ALEN);
 
-	/* Get source gid */
-	ret = ib_get_cached_gid(ibpd->device, rdma_ah_get_port_num(ah_attr),
-				grh->sgid_index, &sgid, &gid_attr);
-	if (ret) {
-		dev_err(dev, "get sgid failed! ret = %d\n", ret);
-		kfree(ah);
-		return ERR_PTR(ret);
-	}
-
-	if (is_vlan_dev(gid_attr.ndev))
-		vlan_tag = vlan_dev_vlan_id(gid_attr.ndev);
-	dev_put(gid_attr.ndev);
+	gid_attr = ah_attr->grh.sgid_attr;
+	if (is_vlan_dev(gid_attr->ndev))
+		vlan_tag = vlan_dev_vlan_id(gid_attr->ndev);
 
 	if (vlan_tag < 0x1000)
 		vlan_tag |= (rdma_ah_get_sl(ah_attr) &
@@ -108,7 +97,7 @@ int hns_roce_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr)
 	rdma_ah_set_static_rate(ah_attr, ah->av.stat_rate);
 	rdma_ah_set_grh(ah_attr, NULL,
 			(le32_to_cpu(ah->av.sl_tclass_flowlabel) &
-			 HNS_ROCE_FLOW_LABLE_MASK), ah->av.gid_index,
+			 HNS_ROCE_FLOW_LABEL_MASK), ah->av.gid_index,
 			ah->av.hop_limit,
 			(le32_to_cpu(ah->av.sl_tclass_flowlabel) >>
 			 HNS_ROCE_TCLASS_SHIFT));
diff --git a/drivers/infiniband/hw/hns/hns_roce_common.h b/drivers/infiniband/hw/hns/hns_roce_common.h
index 319cb74..93d4b4e 100644
--- a/drivers/infiniband/hw/hns/hns_roce_common.h
+++ b/drivers/infiniband/hw/hns/hns_roce_common.h
@@ -382,15 +382,6 @@
 #define ROCEE_VF_EQ_DB_CFG0_REG			0x238
 #define ROCEE_VF_EQ_DB_CFG1_REG			0x23C
 
-#define ROCEE_VF_SMAC_CFG0_REG			0x12000
-#define ROCEE_VF_SMAC_CFG1_REG			0x12004
-
-#define ROCEE_VF_SGID_CFG0_REG			0x10000
-#define ROCEE_VF_SGID_CFG1_REG			0x10004
-#define ROCEE_VF_SGID_CFG2_REG			0x10008
-#define ROCEE_VF_SGID_CFG3_REG			0x1000c
-#define ROCEE_VF_SGID_CFG4_REG			0x10010
-
 #define ROCEE_VF_ABN_INT_CFG_REG		0x13000
 #define ROCEE_VF_ABN_INT_ST_REG			0x13004
 #define ROCEE_VF_ABN_INT_EN_REG			0x13008
diff --git a/drivers/infiniband/hw/hns/hns_roce_db.c b/drivers/infiniband/hw/hns/hns_roce_db.c
index ebee278..e2f93c1 100644
--- a/drivers/infiniband/hw/hns/hns_roce_db.c
+++ b/drivers/infiniband/hw/hns/hns_roce_db.c
@@ -41,6 +41,8 @@ int hns_roce_db_map_user(struct hns_roce_ucontext *context, unsigned long virt,
 found:
 	db->dma = sg_dma_address(page->umem->sg_head.sgl) +
 		  (virt & ~PAGE_MASK);
+	page->umem->sg_head.sgl->offset = virt & ~PAGE_MASK;
+	db->virt_addr = sg_virt(page->umem->sg_head.sgl);
 	db->u.user_page = page;
 	refcount_inc(&page->refcount);
 
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
index 31221d5..9a24fd0 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -76,7 +76,7 @@
 /* 4G/4K = 1M */
 #define HNS_ROCE_SL_SHIFT			28
 #define HNS_ROCE_TCLASS_SHIFT			20
-#define HNS_ROCE_FLOW_LABLE_MASK		0xfffff
+#define HNS_ROCE_FLOW_LABEL_MASK		0xfffff
 
 #define HNS_ROCE_MAX_PORTS			6
 #define HNS_ROCE_MAX_GID_NUM			16
@@ -110,6 +110,7 @@
 
 enum {
 	HNS_ROCE_SUPPORT_RQ_RECORD_DB = 1 << 0,
+	HNS_ROCE_SUPPORT_SQ_RECORD_DB = 1 << 1,
 };
 
 enum {
@@ -190,7 +191,8 @@ enum {
 	HNS_ROCE_CAP_FLAG_REREG_MR		= BIT(0),
 	HNS_ROCE_CAP_FLAG_ROCE_V1_V2		= BIT(1),
 	HNS_ROCE_CAP_FLAG_RQ_INLINE		= BIT(2),
-	HNS_ROCE_CAP_FLAG_RECORD_DB		= BIT(3)
+	HNS_ROCE_CAP_FLAG_RECORD_DB		= BIT(3),
+	HNS_ROCE_CAP_FLAG_SQ_RECORD_DB		= BIT(4),
 };
 
 enum hns_roce_mtt_type {
@@ -385,6 +387,7 @@ struct hns_roce_db {
 		struct hns_roce_user_db_page *user_page;
 	} u;
 	dma_addr_t	dma;
+	void		*virt_addr;
 	int		index;
 	int		order;
 };
@@ -524,7 +527,9 @@ struct hns_roce_qp {
 	struct hns_roce_buf	hr_buf;
 	struct hns_roce_wq	rq;
 	struct hns_roce_db	rdb;
+	struct hns_roce_db	sdb;
 	u8			rdb_en;
+	u8			sdb_en;
 	u32			doorbell_qpn;
 	__le32			sq_signal_bits;
 	u32			sq_next_wqe;
@@ -579,22 +584,22 @@ struct hns_roce_ceqe {
 };
 
 struct hns_roce_aeqe {
-	u32 asyn;
+	__le32 asyn;
 	union {
 		struct {
-			u32 qp;
+			__le32 qp;
 			u32 rsv0;
 			u32 rsv1;
 		} qp_event;
 
 		struct {
-			u32 cq;
+			__le32 cq;
 			u32 rsv0;
 			u32 rsv1;
 		} cq_event;
 
 		struct {
-			u32 ceqe;
+			__le32 ceqe;
 			u32 rsv0;
 			u32 rsv1;
 		} ce_event;
@@ -641,6 +646,8 @@ struct hns_roce_eq {
 	int				shift;
 	dma_addr_t			cur_eqe_ba;
 	dma_addr_t			nxt_eqe_ba;
+	int				event_type;
+	int				sub_type;
 };
 
 struct hns_roce_eq_table {
@@ -720,10 +727,21 @@ struct hns_roce_caps {
 	u32		eqe_ba_pg_sz;
 	u32		eqe_buf_pg_sz;
 	u32		eqe_hop_num;
+	u32		sl_num;
+	u32		tsq_buf_pg_sz;
+	u32		tpq_buf_pg_sz;
 	u32		chunk_sz;	/* chunk size in non multihop mode*/
 	u64		flags;
 };
 
+struct hns_roce_work {
+	struct hns_roce_dev *hr_dev;
+	struct work_struct work;
+	u32 qpn;
+	int event_type;
+	int sub_type;
+};
+
 struct hns_roce_hw {
 	int (*reset)(struct hns_roce_dev *hr_dev, bool enable);
 	int (*cmq_init)(struct hns_roce_dev *hr_dev);
@@ -736,7 +754,7 @@ struct hns_roce_hw {
 			 u16 token, int event);
 	int (*chk_mbox)(struct hns_roce_dev *hr_dev, unsigned long timeout);
 	int (*set_gid)(struct hns_roce_dev *hr_dev, u8 port, int gid_index,
-		       union ib_gid *gid, const struct ib_gid_attr *attr);
+		       const union ib_gid *gid, const struct ib_gid_attr *attr);
 	int (*set_mac)(struct hns_roce_dev *hr_dev, u8 phy_port, u8 *addr);
 	void (*set_mtu)(struct hns_roce_dev *hr_dev, u8 phy_port,
 			enum ib_mtu mtu);
@@ -760,10 +778,10 @@ struct hns_roce_hw {
 			 int attr_mask, enum ib_qp_state cur_state,
 			 enum ib_qp_state new_state);
 	int (*destroy_qp)(struct ib_qp *ibqp);
-	int (*post_send)(struct ib_qp *ibqp, struct ib_send_wr *wr,
-			 struct ib_send_wr **bad_wr);
-	int (*post_recv)(struct ib_qp *qp, struct ib_recv_wr *recv_wr,
-			 struct ib_recv_wr **bad_recv_wr);
+	int (*post_send)(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+			 const struct ib_send_wr **bad_wr);
+	int (*post_recv)(struct ib_qp *qp, const struct ib_recv_wr *recv_wr,
+			 const struct ib_recv_wr **bad_recv_wr);
 	int (*req_notify_cq)(struct ib_cq *ibcq, enum ib_cq_notify_flags flags);
 	int (*poll_cq)(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
 	int (*dereg_mr)(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr);
@@ -816,6 +834,7 @@ struct hns_roce_dev {
 	u32			tptr_size; /*only for hw v1*/
 	const struct hns_roce_hw *hw;
 	void			*priv;
+	struct workqueue_struct *irq_workq;
 };
 
 static inline struct hns_roce_dev *to_hr_dev(struct ib_device *ib_dev)
@@ -864,7 +883,7 @@ static inline struct hns_roce_sqp *hr_to_hr_sqp(struct hns_roce_qp *hr_qp)
 	return container_of(hr_qp, struct hns_roce_sqp, hr_qp);
 }
 
-static inline void hns_roce_write64_k(__be32 val[2], void __iomem *dest)
+static inline void hns_roce_write64_k(__le32 val[2], void __iomem *dest)
 {
 	__raw_writeq(*(u64 *) val, dest);
 }
@@ -982,7 +1001,7 @@ void hns_roce_qp_remove(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp);
 void hns_roce_qp_free(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp);
 void hns_roce_release_range_qp(struct hns_roce_dev *hr_dev, int base_qpn,
 			       int cnt);
-__be32 send_ieth(struct ib_send_wr *wr);
+__be32 send_ieth(const struct ib_send_wr *wr);
 int to_hr_qp_type(int qp_type);
 
 struct ib_cq *hns_roce_ib_create_cq(struct ib_device *ib_dev,
diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c
index 63b5b3e..f6faefe 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hem.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hem.c
@@ -170,7 +170,7 @@ int hns_roce_calc_hem_mhop(struct hns_roce_dev *hr_dev,
 	case 3:
 		mhop->l2_idx = table_idx & (chunk_ba_num - 1);
 		mhop->l1_idx = table_idx / chunk_ba_num & (chunk_ba_num - 1);
-		mhop->l0_idx = table_idx / chunk_ba_num / chunk_ba_num;
+		mhop->l0_idx = (table_idx / chunk_ba_num) / chunk_ba_num;
 		break;
 	case 2:
 		mhop->l1_idx = table_idx & (chunk_ba_num - 1);
@@ -342,7 +342,7 @@ static int hns_roce_set_hem(struct hns_roce_dev *hr_dev,
 			} else {
 				break;
 			}
-			msleep(HW_SYNC_SLEEP_TIME_INTERVAL);
+			mdelay(HW_SYNC_SLEEP_TIME_INTERVAL);
 		}
 
 		bt_cmd_l = (u32)bt_ba;
@@ -494,6 +494,9 @@ static int hns_roce_table_mhop_get(struct hns_roce_dev *hr_dev,
 			step_idx = 1;
 		} else if (hop_num == HNS_ROCE_HOP_NUM_0) {
 			step_idx = 0;
+		} else {
+			ret = -EINVAL;
+			goto err_dma_alloc_l1;
 		}
 
 		/* set HEM base address to hardware */
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
index 8444234..081aa91 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
@@ -58,8 +58,9 @@ static void set_raddr_seg(struct hns_roce_wqe_raddr_seg *rseg, u64 remote_addr,
 	rseg->len   = 0;
 }
 
-static int hns_roce_v1_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
-				 struct ib_send_wr **bad_wr)
+static int hns_roce_v1_post_send(struct ib_qp *ibqp,
+				 const struct ib_send_wr *wr,
+				 const struct ib_send_wr **bad_wr)
 {
 	struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
 	struct hns_roce_ah *ah = to_hr_ah(ud_wr(wr)->ah);
@@ -173,12 +174,14 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 
 			roce_set_field(ud_sq_wqe->u32_36,
 				       UD_SEND_WQE_U32_36_FLOW_LABEL_M,
-				       UD_SEND_WQE_U32_36_FLOW_LABEL_S, 0);
+				       UD_SEND_WQE_U32_36_FLOW_LABEL_S,
+				       ah->av.sl_tclass_flowlabel &
+				       HNS_ROCE_FLOW_LABEL_MASK);
 			roce_set_field(ud_sq_wqe->u32_36,
-				       UD_SEND_WQE_U32_36_PRIORITY_M,
-				       UD_SEND_WQE_U32_36_PRIORITY_S,
-				       ah->av.sl_tclass_flowlabel >>
-				       HNS_ROCE_SL_SHIFT);
+				      UD_SEND_WQE_U32_36_PRIORITY_M,
+				      UD_SEND_WQE_U32_36_PRIORITY_S,
+				      le32_to_cpu(ah->av.sl_tclass_flowlabel) >>
+				      HNS_ROCE_SL_SHIFT);
 			roce_set_field(ud_sq_wqe->u32_36,
 				       UD_SEND_WQE_U32_36_SGID_INDEX_M,
 				       UD_SEND_WQE_U32_36_SGID_INDEX_S,
@@ -191,7 +194,9 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 				       ah->av.hop_limit);
 			roce_set_field(ud_sq_wqe->u32_40,
 				       UD_SEND_WQE_U32_40_TRAFFIC_CLASS_M,
-				       UD_SEND_WQE_U32_40_TRAFFIC_CLASS_S, 0);
+				       UD_SEND_WQE_U32_40_TRAFFIC_CLASS_S,
+				       ah->av.sl_tclass_flowlabel >>
+				       HNS_ROCE_TCLASS_SHIFT);
 
 			memcpy(&ud_sq_wqe->dgid[0], &ah->av.dgid[0], GID_LEN);
 
@@ -333,7 +338,7 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 		doorbell[0] = le32_to_cpu(sq_db.u32_4);
 		doorbell[1] = le32_to_cpu(sq_db.u32_8);
 
-		hns_roce_write64_k(doorbell, qp->sq.db_reg_l);
+		hns_roce_write64_k((__le32 *)doorbell, qp->sq.db_reg_l);
 		qp->sq_next_wqe = ind;
 	}
 
@@ -342,14 +347,15 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 	return ret;
 }
 
-static int hns_roce_v1_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
-				 struct ib_recv_wr **bad_wr)
+static int hns_roce_v1_post_recv(struct ib_qp *ibqp,
+				 const struct ib_recv_wr *wr,
+				 const struct ib_recv_wr **bad_wr)
 {
 	int ret = 0;
 	int nreq = 0;
 	int ind = 0;
 	int i = 0;
-	u32 reg_val = 0;
+	u32 reg_val;
 	unsigned long flags = 0;
 	struct hns_roce_rq_wqe_ctrl *ctrl = NULL;
 	struct hns_roce_wqe_data_seg *scat = NULL;
@@ -402,14 +408,18 @@ static int hns_roce_v1_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
 		wmb();
 
 		if (ibqp->qp_type == IB_QPT_GSI) {
+			__le32 tmp;
+
 			/* SW update GSI rq header */
 			reg_val = roce_read(to_hr_dev(ibqp->device),
 					    ROCEE_QP1C_CFG3_0_REG +
 					    QP1C_CFGN_OFFSET * hr_qp->phy_port);
-			roce_set_field(reg_val,
+			tmp = cpu_to_le32(reg_val);
+			roce_set_field(tmp,
 				       ROCEE_QP1C_CFG3_0_ROCEE_QP1C_RQ_HEAD_M,
 				       ROCEE_QP1C_CFG3_0_ROCEE_QP1C_RQ_HEAD_S,
 				       hr_qp->rq.head);
+			reg_val = le32_to_cpu(tmp);
 			roce_write(to_hr_dev(ibqp->device),
 				   ROCEE_QP1C_CFG3_0_REG +
 				   QP1C_CFGN_OFFSET * hr_qp->phy_port, reg_val);
@@ -430,7 +440,8 @@ static int hns_roce_v1_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
 			doorbell[0] = le32_to_cpu(rq_db.u32_4);
 			doorbell[1] = le32_to_cpu(rq_db.u32_8);
 
-			hns_roce_write64_k(doorbell, hr_qp->rq.db_reg_l);
+			hns_roce_write64_k((__le32 *)doorbell,
+					   hr_qp->rq.db_reg_l);
 		}
 	}
 	spin_unlock_irqrestore(&hr_qp->rq.lock, flags);
@@ -441,51 +452,63 @@ static int hns_roce_v1_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
 static void hns_roce_set_db_event_mode(struct hns_roce_dev *hr_dev,
 				       int sdb_mode, int odb_mode)
 {
+	__le32 tmp;
 	u32 val;
 
 	val = roce_read(hr_dev, ROCEE_GLB_CFG_REG);
-	roce_set_bit(val, ROCEE_GLB_CFG_ROCEE_DB_SQ_MODE_S, sdb_mode);
-	roce_set_bit(val, ROCEE_GLB_CFG_ROCEE_DB_OTH_MODE_S, odb_mode);
+	tmp = cpu_to_le32(val);
+	roce_set_bit(tmp, ROCEE_GLB_CFG_ROCEE_DB_SQ_MODE_S, sdb_mode);
+	roce_set_bit(tmp, ROCEE_GLB_CFG_ROCEE_DB_OTH_MODE_S, odb_mode);
+	val = le32_to_cpu(tmp);
 	roce_write(hr_dev, ROCEE_GLB_CFG_REG, val);
 }
 
 static void hns_roce_set_db_ext_mode(struct hns_roce_dev *hr_dev, u32 sdb_mode,
 				     u32 odb_mode)
 {
+	__le32 tmp;
 	u32 val;
 
 	/* Configure SDB/ODB extend mode */
 	val = roce_read(hr_dev, ROCEE_GLB_CFG_REG);
-	roce_set_bit(val, ROCEE_GLB_CFG_SQ_EXT_DB_MODE_S, sdb_mode);
-	roce_set_bit(val, ROCEE_GLB_CFG_OTH_EXT_DB_MODE_S, odb_mode);
+	tmp = cpu_to_le32(val);
+	roce_set_bit(tmp, ROCEE_GLB_CFG_SQ_EXT_DB_MODE_S, sdb_mode);
+	roce_set_bit(tmp, ROCEE_GLB_CFG_OTH_EXT_DB_MODE_S, odb_mode);
+	val = le32_to_cpu(tmp);
 	roce_write(hr_dev, ROCEE_GLB_CFG_REG, val);
 }
 
 static void hns_roce_set_sdb(struct hns_roce_dev *hr_dev, u32 sdb_alept,
 			     u32 sdb_alful)
 {
+	__le32 tmp;
 	u32 val;
 
 	/* Configure SDB */
 	val = roce_read(hr_dev, ROCEE_DB_SQ_WL_REG);
-	roce_set_field(val, ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_M,
+	tmp = cpu_to_le32(val);
+	roce_set_field(tmp, ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_M,
 		       ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_S, sdb_alful);
-	roce_set_field(val, ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_EMPTY_M,
+	roce_set_field(tmp, ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_EMPTY_M,
 		       ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_EMPTY_S, sdb_alept);
+	val = le32_to_cpu(tmp);
 	roce_write(hr_dev, ROCEE_DB_SQ_WL_REG, val);
 }
 
 static void hns_roce_set_odb(struct hns_roce_dev *hr_dev, u32 odb_alept,
 			     u32 odb_alful)
 {
+	__le32 tmp;
 	u32 val;
 
 	/* Configure ODB */
 	val = roce_read(hr_dev, ROCEE_DB_OTHERS_WL_REG);
-	roce_set_field(val, ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_M,
+	tmp = cpu_to_le32(val);
+	roce_set_field(tmp, ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_M,
 		       ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_S, odb_alful);
-	roce_set_field(val, ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_EMPTY_M,
+	roce_set_field(tmp, ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_EMPTY_M,
 		       ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_EMPTY_S, odb_alept);
+	val = le32_to_cpu(tmp);
 	roce_write(hr_dev, ROCEE_DB_OTHERS_WL_REG, val);
 }
 
@@ -496,6 +519,7 @@ static void hns_roce_set_sdb_ext(struct hns_roce_dev *hr_dev, u32 ext_sdb_alept,
 	struct hns_roce_v1_priv *priv;
 	struct hns_roce_db_table *db;
 	dma_addr_t sdb_dma_addr;
+	__le32 tmp;
 	u32 val;
 
 	priv = (struct hns_roce_v1_priv *)hr_dev->priv;
@@ -511,7 +535,8 @@ static void hns_roce_set_sdb_ext(struct hns_roce_dev *hr_dev, u32 ext_sdb_alept,
 
 	/* Configure extend SDB depth */
 	val = roce_read(hr_dev, ROCEE_EXT_DB_SQ_H_REG);
-	roce_set_field(val, ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_SHIFT_M,
+	tmp = cpu_to_le32(val);
+	roce_set_field(tmp, ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_SHIFT_M,
 		       ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_SHIFT_S,
 		       db->ext_db->esdb_dep);
 	/*
@@ -519,8 +544,9 @@ static void hns_roce_set_sdb_ext(struct hns_roce_dev *hr_dev, u32 ext_sdb_alept,
 	 * using 4K page, and shift more 32 because of
 	 * caculating the high 32 bit value evaluated to hardware.
 	 */
-	roce_set_field(val, ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_BA_H_M,
+	roce_set_field(tmp, ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_BA_H_M,
 		       ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_BA_H_S, sdb_dma_addr >> 44);
+	val = le32_to_cpu(tmp);
 	roce_write(hr_dev, ROCEE_EXT_DB_SQ_H_REG, val);
 
 	dev_dbg(dev, "ext SDB depth: 0x%x\n", db->ext_db->esdb_dep);
@@ -535,6 +561,7 @@ static void hns_roce_set_odb_ext(struct hns_roce_dev *hr_dev, u32 ext_odb_alept,
 	struct hns_roce_v1_priv *priv;
 	struct hns_roce_db_table *db;
 	dma_addr_t odb_dma_addr;
+	__le32 tmp;
 	u32 val;
 
 	priv = (struct hns_roce_v1_priv *)hr_dev->priv;
@@ -550,12 +577,14 @@ static void hns_roce_set_odb_ext(struct hns_roce_dev *hr_dev, u32 ext_odb_alept,
 
 	/* Configure extend ODB depth */
 	val = roce_read(hr_dev, ROCEE_EXT_DB_OTH_H_REG);
-	roce_set_field(val, ROCEE_EXT_DB_OTH_H_EXT_DB_OTH_SHIFT_M,
+	tmp = cpu_to_le32(val);
+	roce_set_field(tmp, ROCEE_EXT_DB_OTH_H_EXT_DB_OTH_SHIFT_M,
 		       ROCEE_EXT_DB_OTH_H_EXT_DB_OTH_SHIFT_S,
 		       db->ext_db->eodb_dep);
-	roce_set_field(val, ROCEE_EXT_DB_SQ_H_EXT_DB_OTH_BA_H_M,
+	roce_set_field(tmp, ROCEE_EXT_DB_SQ_H_EXT_DB_OTH_BA_H_M,
 		       ROCEE_EXT_DB_SQ_H_EXT_DB_OTH_BA_H_S,
 		       db->ext_db->eodb_dep);
+	val = le32_to_cpu(tmp);
 	roce_write(hr_dev, ROCEE_EXT_DB_OTH_H_REG, val);
 
 	dev_dbg(dev, "ext ODB depth: 0x%x\n", db->ext_db->eodb_dep);
@@ -762,6 +791,7 @@ static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev)
 		free_mr->mr_free_qp[i] = hns_roce_v1_create_lp_qp(hr_dev, pd);
 		if (!free_mr->mr_free_qp[i]) {
 			dev_err(dev, "Create loop qp failed!\n");
+			ret = -ENOMEM;
 			goto create_lp_qp_failed;
 		}
 		hr_qp = free_mr->mr_free_qp[i];
@@ -831,7 +861,7 @@ static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev)
 	if (hns_roce_ib_destroy_cq(cq))
 		dev_err(dev, "Destroy cq for create_lp_qp failed!\n");
 
-	return -EINVAL;
+	return ret;
 }
 
 static void hns_roce_v1_release_lp_qp(struct hns_roce_dev *hr_dev)
@@ -969,7 +999,8 @@ static int hns_roce_v1_send_lp_wqe(struct hns_roce_qp *hr_qp)
 {
 	struct hns_roce_dev *hr_dev = to_hr_dev(hr_qp->ibqp.device);
 	struct device *dev = &hr_dev->pdev->dev;
-	struct ib_send_wr send_wr, *bad_wr;
+	struct ib_send_wr send_wr;
+	const struct ib_send_wr *bad_wr;
 	int ret;
 
 	memset(&send_wr, 0, sizeof(send_wr));
@@ -1161,9 +1192,10 @@ static void hns_roce_db_free(struct hns_roce_dev *hr_dev)
 static int hns_roce_raq_init(struct hns_roce_dev *hr_dev)
 {
 	int ret;
+	u32 val;
+	__le32 tmp;
 	int raq_shift = 0;
 	dma_addr_t addr;
-	u32 val;
 	struct hns_roce_v1_priv *priv;
 	struct hns_roce_raq_table *raq;
 	struct device *dev = &hr_dev->pdev->dev;
@@ -1189,46 +1221,54 @@ static int hns_roce_raq_init(struct hns_roce_dev *hr_dev)
 	/* Configure raq_shift */
 	raq_shift = ilog2(HNS_ROCE_V1_RAQ_SIZE / HNS_ROCE_V1_RAQ_ENTRY);
 	val = roce_read(hr_dev, ROCEE_EXT_RAQ_H_REG);
-	roce_set_field(val, ROCEE_EXT_RAQ_H_EXT_RAQ_SHIFT_M,
+	tmp = cpu_to_le32(val);
+	roce_set_field(tmp, ROCEE_EXT_RAQ_H_EXT_RAQ_SHIFT_M,
 		       ROCEE_EXT_RAQ_H_EXT_RAQ_SHIFT_S, raq_shift);
 	/*
 	 * 44 = 32 + 12, When evaluating addr to hardware, shift 12 because of
 	 * using 4K page, and shift more 32 because of
 	 * caculating the high 32 bit value evaluated to hardware.
 	 */
-	roce_set_field(val, ROCEE_EXT_RAQ_H_EXT_RAQ_BA_H_M,
+	roce_set_field(tmp, ROCEE_EXT_RAQ_H_EXT_RAQ_BA_H_M,
 		       ROCEE_EXT_RAQ_H_EXT_RAQ_BA_H_S,
 		       raq->e_raq_buf->map >> 44);
+	val = le32_to_cpu(tmp);
 	roce_write(hr_dev, ROCEE_EXT_RAQ_H_REG, val);
 	dev_dbg(dev, "Configure raq_shift 0x%x.\n", val);
 
 	/* Configure raq threshold */
 	val = roce_read(hr_dev, ROCEE_RAQ_WL_REG);
-	roce_set_field(val, ROCEE_RAQ_WL_ROCEE_RAQ_WL_M,
+	tmp = cpu_to_le32(val);
+	roce_set_field(tmp, ROCEE_RAQ_WL_ROCEE_RAQ_WL_M,
 		       ROCEE_RAQ_WL_ROCEE_RAQ_WL_S,
 		       HNS_ROCE_V1_EXT_RAQ_WF);
+	val = le32_to_cpu(tmp);
 	roce_write(hr_dev, ROCEE_RAQ_WL_REG, val);
 	dev_dbg(dev, "Configure raq_wl 0x%x.\n", val);
 
 	/* Enable extend raq */
 	val = roce_read(hr_dev, ROCEE_WRMS_POL_TIME_INTERVAL_REG);
-	roce_set_field(val,
+	tmp = cpu_to_le32(val);
+	roce_set_field(tmp,
 		       ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_POL_TIME_INTERVAL_M,
 		       ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_POL_TIME_INTERVAL_S,
 		       POL_TIME_INTERVAL_VAL);
-	roce_set_bit(val, ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_EXT_RAQ_MODE, 1);
-	roce_set_field(val,
+	roce_set_bit(tmp, ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_EXT_RAQ_MODE, 1);
+	roce_set_field(tmp,
 		       ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_RAQ_TIMEOUT_CHK_CFG_M,
 		       ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_RAQ_TIMEOUT_CHK_CFG_S,
 		       2);
-	roce_set_bit(val,
+	roce_set_bit(tmp,
 		     ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_RAQ_TIMEOUT_CHK_EN_S, 1);
+	val = le32_to_cpu(tmp);
 	roce_write(hr_dev, ROCEE_WRMS_POL_TIME_INTERVAL_REG, val);
 	dev_dbg(dev, "Configure WrmsPolTimeInterval 0x%x.\n", val);
 
 	/* Enable raq drop */
 	val = roce_read(hr_dev, ROCEE_GLB_CFG_REG);
-	roce_set_bit(val, ROCEE_GLB_CFG_TRP_RAQ_DROP_EN_S, 1);
+	tmp = cpu_to_le32(val);
+	roce_set_bit(tmp, ROCEE_GLB_CFG_TRP_RAQ_DROP_EN_S, 1);
+	val = le32_to_cpu(tmp);
 	roce_write(hr_dev, ROCEE_GLB_CFG_REG, val);
 	dev_dbg(dev, "Configure GlbCfg = 0x%x.\n", val);
 
@@ -1255,20 +1295,25 @@ static void hns_roce_raq_free(struct hns_roce_dev *hr_dev)
 
 static void hns_roce_port_enable(struct hns_roce_dev *hr_dev, int enable_flag)
 {
+	__le32 tmp;
 	u32 val;
 
 	if (enable_flag) {
 		val = roce_read(hr_dev, ROCEE_GLB_CFG_REG);
 		 /* Open all ports */
-		roce_set_field(val, ROCEE_GLB_CFG_ROCEE_PORT_ST_M,
+		tmp = cpu_to_le32(val);
+		roce_set_field(tmp, ROCEE_GLB_CFG_ROCEE_PORT_ST_M,
 			       ROCEE_GLB_CFG_ROCEE_PORT_ST_S,
 			       ALL_PORT_VAL_OPEN);
+		val = le32_to_cpu(tmp);
 		roce_write(hr_dev, ROCEE_GLB_CFG_REG, val);
 	} else {
 		val = roce_read(hr_dev, ROCEE_GLB_CFG_REG);
 		/* Close all ports */
-		roce_set_field(val, ROCEE_GLB_CFG_ROCEE_PORT_ST_M,
+		tmp = cpu_to_le32(val);
+		roce_set_field(tmp, ROCEE_GLB_CFG_ROCEE_PORT_ST_M,
 			       ROCEE_GLB_CFG_ROCEE_PORT_ST_S, 0x0);
+		val = le32_to_cpu(tmp);
 		roce_write(hr_dev, ROCEE_GLB_CFG_REG, val);
 	}
 }
@@ -1498,13 +1543,11 @@ static int hns_roce_v1_profile(struct hns_roce_dev *hr_dev)
 	int i = 0;
 	struct hns_roce_caps *caps = &hr_dev->caps;
 
-	hr_dev->vendor_id = le32_to_cpu(roce_read(hr_dev, ROCEE_VENDOR_ID_REG));
-	hr_dev->vendor_part_id = le32_to_cpu(roce_read(hr_dev,
-					     ROCEE_VENDOR_PART_ID_REG));
-	hr_dev->sys_image_guid = le32_to_cpu(roce_read(hr_dev,
-					     ROCEE_SYS_IMAGE_GUID_L_REG)) |
-				((u64)le32_to_cpu(roce_read(hr_dev,
-					    ROCEE_SYS_IMAGE_GUID_H_REG)) << 32);
+	hr_dev->vendor_id = roce_read(hr_dev, ROCEE_VENDOR_ID_REG);
+	hr_dev->vendor_part_id = roce_read(hr_dev, ROCEE_VENDOR_PART_ID_REG);
+	hr_dev->sys_image_guid = roce_read(hr_dev, ROCEE_SYS_IMAGE_GUID_L_REG) |
+				((u64)roce_read(hr_dev,
+					    ROCEE_SYS_IMAGE_GUID_H_REG) << 32);
 	hr_dev->hw_rev		= HNS_ROCE_HW_VER1;
 
 	caps->num_qps		= HNS_ROCE_V1_MAX_QP_NUM;
@@ -1557,8 +1600,7 @@ static int hns_roce_v1_profile(struct hns_roce_dev *hr_dev)
 
 	caps->ceqe_depth = HNS_ROCE_V1_COMP_EQE_NUM;
 	caps->aeqe_depth = HNS_ROCE_V1_ASYNC_EQE_NUM;
-	caps->local_ca_ack_delay = le32_to_cpu(roce_read(hr_dev,
-							 ROCEE_ACK_DELAY_REG));
+	caps->local_ca_ack_delay = roce_read(hr_dev, ROCEE_ACK_DELAY_REG);
 	caps->max_mtu = IB_MTU_2048;
 
 	return 0;
@@ -1568,21 +1610,25 @@ static int hns_roce_v1_init(struct hns_roce_dev *hr_dev)
 {
 	int ret;
 	u32 val;
+	__le32 tmp;
 	struct device *dev = &hr_dev->pdev->dev;
 
 	/* DMAE user config */
 	val = roce_read(hr_dev, ROCEE_DMAE_USER_CFG1_REG);
-	roce_set_field(val, ROCEE_DMAE_USER_CFG1_ROCEE_CACHE_TB_CFG_M,
+	tmp = cpu_to_le32(val);
+	roce_set_field(tmp, ROCEE_DMAE_USER_CFG1_ROCEE_CACHE_TB_CFG_M,
 		       ROCEE_DMAE_USER_CFG1_ROCEE_CACHE_TB_CFG_S, 0xf);
-	roce_set_field(val, ROCEE_DMAE_USER_CFG1_ROCEE_STREAM_ID_TB_CFG_M,
+	roce_set_field(tmp, ROCEE_DMAE_USER_CFG1_ROCEE_STREAM_ID_TB_CFG_M,
 		       ROCEE_DMAE_USER_CFG1_ROCEE_STREAM_ID_TB_CFG_S,
 		       1 << PAGES_SHIFT_16);
+	val = le32_to_cpu(tmp);
 	roce_write(hr_dev, ROCEE_DMAE_USER_CFG1_REG, val);
 
 	val = roce_read(hr_dev, ROCEE_DMAE_USER_CFG2_REG);
-	roce_set_field(val, ROCEE_DMAE_USER_CFG2_ROCEE_CACHE_PKT_CFG_M,
+	tmp = cpu_to_le32(val);
+	roce_set_field(tmp, ROCEE_DMAE_USER_CFG2_ROCEE_CACHE_PKT_CFG_M,
 		       ROCEE_DMAE_USER_CFG2_ROCEE_CACHE_PKT_CFG_S, 0xf);
-	roce_set_field(val, ROCEE_DMAE_USER_CFG2_ROCEE_STREAM_ID_PKT_CFG_M,
+	roce_set_field(tmp, ROCEE_DMAE_USER_CFG2_ROCEE_STREAM_ID_PKT_CFG_M,
 		       ROCEE_DMAE_USER_CFG2_ROCEE_STREAM_ID_PKT_CFG_S,
 		       1 << PAGES_SHIFT_16);
 
@@ -1668,6 +1714,7 @@ static int hns_roce_v1_post_mbox(struct hns_roce_dev *hr_dev, u64 in_param,
 	u32 __iomem *hcr = (u32 __iomem *)(hr_dev->reg_base + ROCEE_MB1_REG);
 	unsigned long end;
 	u32 val = 0;
+	__le32 tmp;
 
 	end = msecs_to_jiffies(GO_BIT_TIMEOUT_MSECS) + jiffies;
 	while (hns_roce_v1_cmd_pending(hr_dev)) {
@@ -1679,15 +1726,17 @@ static int hns_roce_v1_post_mbox(struct hns_roce_dev *hr_dev, u64 in_param,
 		cond_resched();
 	}
 
-	roce_set_field(val, ROCEE_MB6_ROCEE_MB_CMD_M, ROCEE_MB6_ROCEE_MB_CMD_S,
+	tmp = cpu_to_le32(val);
+	roce_set_field(tmp, ROCEE_MB6_ROCEE_MB_CMD_M, ROCEE_MB6_ROCEE_MB_CMD_S,
 		       op);
-	roce_set_field(val, ROCEE_MB6_ROCEE_MB_CMD_MDF_M,
+	roce_set_field(tmp, ROCEE_MB6_ROCEE_MB_CMD_MDF_M,
 		       ROCEE_MB6_ROCEE_MB_CMD_MDF_S, op_modifier);
-	roce_set_bit(val, ROCEE_MB6_ROCEE_MB_EVENT_S, event);
-	roce_set_bit(val, ROCEE_MB6_ROCEE_MB_HW_RUN_S, 1);
-	roce_set_field(val, ROCEE_MB6_ROCEE_MB_TOKEN_M,
+	roce_set_bit(tmp, ROCEE_MB6_ROCEE_MB_EVENT_S, event);
+	roce_set_bit(tmp, ROCEE_MB6_ROCEE_MB_HW_RUN_S, 1);
+	roce_set_field(tmp, ROCEE_MB6_ROCEE_MB_TOKEN_M,
 		       ROCEE_MB6_ROCEE_MB_TOKEN_S, token);
 
+	val = le32_to_cpu(tmp);
 	writeq(in_param, hcr + 0);
 	writeq(out_param, hcr + 2);
 	writel(in_modifier, hcr + 4);
@@ -1717,7 +1766,7 @@ static int hns_roce_v1_chk_mbox(struct hns_roce_dev *hr_dev,
 		return -ETIMEDOUT;
 	}
 
-	status = le32_to_cpu((__force __be32)
+	status = le32_to_cpu((__force __le32)
 			      __raw_readl(hcr + HCR_STATUS_OFFSET));
 	if ((status & STATUS_MASK) != 0x1) {
 		dev_err(hr_dev->dev, "mailbox status 0x%x!\n", status);
@@ -1728,7 +1777,7 @@ static int hns_roce_v1_chk_mbox(struct hns_roce_dev *hr_dev,
 }
 
 static int hns_roce_v1_set_gid(struct hns_roce_dev *hr_dev, u8 port,
-			       int gid_index, union ib_gid *gid,
+			       int gid_index, const union ib_gid *gid,
 			       const struct ib_gid_attr *attr)
 {
 	u32 *p = NULL;
@@ -1760,6 +1809,7 @@ static int hns_roce_v1_set_mac(struct hns_roce_dev *hr_dev, u8 phy_port,
 {
 	u32 reg_smac_l;
 	u16 reg_smac_h;
+	__le32 tmp;
 	u16 *p_h;
 	u32 *p;
 	u32 val;
@@ -1784,10 +1834,12 @@ static int hns_roce_v1_set_mac(struct hns_roce_dev *hr_dev, u8 phy_port,
 
 	val = roce_read(hr_dev,
 			ROCEE_SMAC_H_0_REG + phy_port * PHY_PORT_OFFSET);
+	tmp = cpu_to_le32(val);
 	p_h = (u16 *)(&addr[4]);
 	reg_smac_h  = *p_h;
-	roce_set_field(val, ROCEE_SMAC_H_ROCEE_SMAC_H_M,
+	roce_set_field(tmp, ROCEE_SMAC_H_ROCEE_SMAC_H_M,
 		       ROCEE_SMAC_H_ROCEE_SMAC_H_S, reg_smac_h);
+	val = le32_to_cpu(tmp);
 	roce_write(hr_dev, ROCEE_SMAC_H_0_REG + phy_port * PHY_PORT_OFFSET,
 		   val);
 
@@ -1797,12 +1849,15 @@ static int hns_roce_v1_set_mac(struct hns_roce_dev *hr_dev, u8 phy_port,
 static void hns_roce_v1_set_mtu(struct hns_roce_dev *hr_dev, u8 phy_port,
 				enum ib_mtu mtu)
 {
+	__le32 tmp;
 	u32 val;
 
 	val = roce_read(hr_dev,
 			ROCEE_SMAC_H_0_REG + phy_port * PHY_PORT_OFFSET);
-	roce_set_field(val, ROCEE_SMAC_H_ROCEE_PORT_MTU_M,
+	tmp = cpu_to_le32(val);
+	roce_set_field(tmp, ROCEE_SMAC_H_ROCEE_PORT_MTU_M,
 		       ROCEE_SMAC_H_ROCEE_PORT_MTU_S, mtu);
+	val = le32_to_cpu(tmp);
 	roce_write(hr_dev, ROCEE_SMAC_H_0_REG + phy_port * PHY_PORT_OFFSET,
 		   val);
 }
@@ -1848,9 +1903,9 @@ static int hns_roce_v1_write_mtpt(void *mb_buf, struct hns_roce_mr *mr,
 	roce_set_field(mpt_entry->mpt_byte_12, MPT_BYTE_12_MW_BIND_COUNTER_M,
 		       MPT_BYTE_12_MW_BIND_COUNTER_S, 0);
 
-	mpt_entry->virt_addr_l = (u32)mr->iova;
-	mpt_entry->virt_addr_h = (u32)(mr->iova >> 32);
-	mpt_entry->length = (u32)mr->size;
+	mpt_entry->virt_addr_l = cpu_to_le32((u32)mr->iova);
+	mpt_entry->virt_addr_h = cpu_to_le32((u32)(mr->iova >> 32));
+	mpt_entry->length = cpu_to_le32((u32)mr->size);
 
 	roce_set_field(mpt_entry->mpt_byte_28, MPT_BYTE_28_PD_M,
 		       MPT_BYTE_28_PD_S, mr->pd);
@@ -1885,64 +1940,59 @@ static int hns_roce_v1_write_mtpt(void *mb_buf, struct hns_roce_mr *mr,
 			roce_set_field(mpt_entry->mpt_byte_36,
 				MPT_BYTE_36_PA0_H_M,
 				MPT_BYTE_36_PA0_H_S,
-				cpu_to_le32((u32)(pages[i] >> PAGES_SHIFT_32)));
+				(u32)(pages[i] >> PAGES_SHIFT_32));
 			break;
 		case 1:
 			roce_set_field(mpt_entry->mpt_byte_36,
 				       MPT_BYTE_36_PA1_L_M,
-				       MPT_BYTE_36_PA1_L_S,
-				       cpu_to_le32((u32)(pages[i])));
+				       MPT_BYTE_36_PA1_L_S, (u32)(pages[i]));
 			roce_set_field(mpt_entry->mpt_byte_40,
 				MPT_BYTE_40_PA1_H_M,
 				MPT_BYTE_40_PA1_H_S,
-				cpu_to_le32((u32)(pages[i] >> PAGES_SHIFT_24)));
+				(u32)(pages[i] >> PAGES_SHIFT_24));
 			break;
 		case 2:
 			roce_set_field(mpt_entry->mpt_byte_40,
 				       MPT_BYTE_40_PA2_L_M,
-				       MPT_BYTE_40_PA2_L_S,
-				       cpu_to_le32((u32)(pages[i])));
+				       MPT_BYTE_40_PA2_L_S, (u32)(pages[i]));
 			roce_set_field(mpt_entry->mpt_byte_44,
 				MPT_BYTE_44_PA2_H_M,
 				MPT_BYTE_44_PA2_H_S,
-				cpu_to_le32((u32)(pages[i] >> PAGES_SHIFT_16)));
+				(u32)(pages[i] >> PAGES_SHIFT_16));
 			break;
 		case 3:
 			roce_set_field(mpt_entry->mpt_byte_44,
 				       MPT_BYTE_44_PA3_L_M,
-				       MPT_BYTE_44_PA3_L_S,
-				       cpu_to_le32((u32)(pages[i])));
+				       MPT_BYTE_44_PA3_L_S, (u32)(pages[i]));
 			roce_set_field(mpt_entry->mpt_byte_48,
 				MPT_BYTE_48_PA3_H_M,
 				MPT_BYTE_48_PA3_H_S,
-				cpu_to_le32((u32)(pages[i] >> PAGES_SHIFT_8)));
+				(u32)(pages[i] >> PAGES_SHIFT_8));
 			break;
 		case 4:
 			mpt_entry->pa4_l = cpu_to_le32((u32)(pages[i]));
 			roce_set_field(mpt_entry->mpt_byte_56,
 				MPT_BYTE_56_PA4_H_M,
 				MPT_BYTE_56_PA4_H_S,
-				cpu_to_le32((u32)(pages[i] >> PAGES_SHIFT_32)));
+				(u32)(pages[i] >> PAGES_SHIFT_32));
 			break;
 		case 5:
 			roce_set_field(mpt_entry->mpt_byte_56,
 				       MPT_BYTE_56_PA5_L_M,
-				       MPT_BYTE_56_PA5_L_S,
-				       cpu_to_le32((u32)(pages[i])));
+				       MPT_BYTE_56_PA5_L_S, (u32)(pages[i]));
 			roce_set_field(mpt_entry->mpt_byte_60,
 				MPT_BYTE_60_PA5_H_M,
 				MPT_BYTE_60_PA5_H_S,
-				cpu_to_le32((u32)(pages[i] >> PAGES_SHIFT_24)));
+				(u32)(pages[i] >> PAGES_SHIFT_24));
 			break;
 		case 6:
 			roce_set_field(mpt_entry->mpt_byte_60,
 				       MPT_BYTE_60_PA6_L_M,
-				       MPT_BYTE_60_PA6_L_S,
-				       cpu_to_le32((u32)(pages[i])));
+				       MPT_BYTE_60_PA6_L_S, (u32)(pages[i]));
 			roce_set_field(mpt_entry->mpt_byte_64,
 				MPT_BYTE_64_PA6_H_M,
 				MPT_BYTE_64_PA6_H_S,
-				cpu_to_le32((u32)(pages[i] >> PAGES_SHIFT_16)));
+				(u32)(pages[i] >> PAGES_SHIFT_16));
 			break;
 		default:
 			break;
@@ -1951,7 +2001,7 @@ static int hns_roce_v1_write_mtpt(void *mb_buf, struct hns_roce_mr *mr,
 
 	free_page((unsigned long) pages);
 
-	mpt_entry->pbl_addr_l = (u32)(mr->pbl_dma_addr);
+	mpt_entry->pbl_addr_l = cpu_to_le32((u32)(mr->pbl_dma_addr));
 
 	roce_set_field(mpt_entry->mpt_byte_12, MPT_BYTE_12_PBL_ADDR_H_M,
 		       MPT_BYTE_12_PBL_ADDR_H_S,
@@ -1982,9 +2032,9 @@ static struct hns_roce_cqe *next_cqe_sw(struct hns_roce_cq *hr_cq)
 
 static void hns_roce_v1_cq_set_ci(struct hns_roce_cq *hr_cq, u32 cons_index)
 {
-	u32 doorbell[2];
+	__le32 doorbell[2];
 
-	doorbell[0] = cons_index & ((hr_cq->cq_depth << 1) - 1);
+	doorbell[0] = cpu_to_le32(cons_index & ((hr_cq->cq_depth << 1) - 1));
 	doorbell[1] = 0;
 	roce_set_bit(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_HW_SYNS_S, 1);
 	roce_set_field(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_M,
@@ -2081,10 +2131,8 @@ static void hns_roce_v1_write_cqc(struct hns_roce_dev *hr_dev,
 		       CQ_CONTEXT_CQC_BYTE_4_CQC_STATE_S, CQ_STATE_VALID);
 	roce_set_field(cq_context->cqc_byte_4, CQ_CONTEXT_CQC_BYTE_4_CQN_M,
 		       CQ_CONTEXT_CQC_BYTE_4_CQN_S, hr_cq->cqn);
-	cq_context->cqc_byte_4 = cpu_to_le32(cq_context->cqc_byte_4);
 
-	cq_context->cq_bt_l = (u32)dma_handle;
-	cq_context->cq_bt_l = cpu_to_le32(cq_context->cq_bt_l);
+	cq_context->cq_bt_l = cpu_to_le32((u32)dma_handle);
 
 	roce_set_field(cq_context->cqc_byte_12,
 		       CQ_CONTEXT_CQC_BYTE_12_CQ_BT_H_M,
@@ -2096,15 +2144,12 @@ static void hns_roce_v1_write_cqc(struct hns_roce_dev *hr_dev,
 		       ilog2((unsigned int)nent));
 	roce_set_field(cq_context->cqc_byte_12, CQ_CONTEXT_CQC_BYTE_12_CEQN_M,
 		       CQ_CONTEXT_CQC_BYTE_12_CEQN_S, vector);
-	cq_context->cqc_byte_12 = cpu_to_le32(cq_context->cqc_byte_12);
 
-	cq_context->cur_cqe_ba0_l = (u32)(mtts[0]);
-	cq_context->cur_cqe_ba0_l = cpu_to_le32(cq_context->cur_cqe_ba0_l);
+	cq_context->cur_cqe_ba0_l = cpu_to_le32((u32)(mtts[0]));
 
 	roce_set_field(cq_context->cqc_byte_20,
 		       CQ_CONTEXT_CQC_BYTE_20_CUR_CQE_BA0_H_M,
-		       CQ_CONTEXT_CQC_BYTE_20_CUR_CQE_BA0_H_S,
-		       cpu_to_le32((mtts[0]) >> 32));
+		       CQ_CONTEXT_CQC_BYTE_20_CUR_CQE_BA0_H_S, (mtts[0]) >> 32);
 	/* Dedicated hardware, directly set 0 */
 	roce_set_field(cq_context->cqc_byte_20,
 		       CQ_CONTEXT_CQC_BYTE_20_CQ_CUR_INDEX_M,
@@ -2118,9 +2163,8 @@ static void hns_roce_v1_write_cqc(struct hns_roce_dev *hr_dev,
 		       CQ_CONTEXT_CQC_BYTE_20_CQE_TPTR_ADDR_H_M,
 		       CQ_CONTEXT_CQC_BYTE_20_CQE_TPTR_ADDR_H_S,
 		       tptr_dma_addr >> 44);
-	cq_context->cqc_byte_20 = cpu_to_le32(cq_context->cqc_byte_20);
 
-	cq_context->cqe_tptr_addr_l = (u32)(tptr_dma_addr >> 12);
+	cq_context->cqe_tptr_addr_l = cpu_to_le32((u32)(tptr_dma_addr >> 12));
 
 	roce_set_field(cq_context->cqc_byte_32,
 		       CQ_CONTEXT_CQC_BYTE_32_CUR_CQE_BA1_H_M,
@@ -2138,7 +2182,6 @@ static void hns_roce_v1_write_cqc(struct hns_roce_dev *hr_dev,
 	roce_set_field(cq_context->cqc_byte_32,
 		       CQ_CONTEXT_CQC_BYTE_32_CQ_CONS_IDX_M,
 		       CQ_CONTEXT_CQC_BYTE_32_CQ_CONS_IDX_S, 0);
-	cq_context->cqc_byte_32 = cpu_to_le32(cq_context->cqc_byte_32);
 }
 
 static int hns_roce_v1_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
@@ -2151,7 +2194,7 @@ static int hns_roce_v1_req_notify_cq(struct ib_cq *ibcq,
 {
 	struct hns_roce_cq *hr_cq = to_hr_cq(ibcq);
 	u32 notification_flag;
-	u32 doorbell[2];
+	__le32 doorbell[2];
 
 	notification_flag = (flags & IB_CQ_SOLICITED_MASK) ==
 			    IB_CQ_SOLICITED ? CQ_DB_REQ_NOT : CQ_DB_REQ_NOT_SOL;
@@ -2159,7 +2202,8 @@ static int hns_roce_v1_req_notify_cq(struct ib_cq *ibcq,
 	 * flags = 0; Notification Flag = 1, next
 	 * flags = 1; Notification Flag = 0, solocited
 	 */
-	doorbell[0] = hr_cq->cons_index & ((hr_cq->cq_depth << 1) - 1);
+	doorbell[0] =
+		cpu_to_le32(hr_cq->cons_index & ((hr_cq->cq_depth << 1) - 1));
 	roce_set_bit(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_HW_SYNS_S, 1);
 	roce_set_field(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_M,
 		       ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_S, 3);
@@ -2416,7 +2460,7 @@ static int hns_roce_v1_clear_hem(struct hns_roce_dev *hr_dev,
 	struct device *dev = &hr_dev->pdev->dev;
 	struct hns_roce_v1_priv *priv;
 	unsigned long end = 0, flags = 0;
-	uint32_t bt_cmd_val[2] = {0};
+	__le32 bt_cmd_val[2] = {0};
 	void __iomem *bt_cmd;
 	u64 bt_ba = 0;
 
@@ -2468,7 +2512,7 @@ static int hns_roce_v1_clear_hem(struct hns_roce_dev *hr_dev,
 		msleep(HW_SYNC_SLEEP_TIME_INTERVAL);
 	}
 
-	bt_cmd_val[0] = (uint32_t)bt_ba;
+	bt_cmd_val[0] = (__le32)bt_ba;
 	roce_set_field(bt_cmd_val[1], ROCEE_BT_CMD_H_ROCEE_BT_CMD_BA_H_M,
 		ROCEE_BT_CMD_H_ROCEE_BT_CMD_BA_H_S, bt_ba >> 32);
 	hns_roce_write64_k(bt_cmd_val, hr_dev->reg_base + ROCEE_BT_CMD_L_REG);
@@ -2569,10 +2613,11 @@ static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
 	struct hns_roce_sqp_context *context;
 	struct device *dev = &hr_dev->pdev->dev;
 	dma_addr_t dma_handle = 0;
+	u32 __iomem *addr;
 	int rq_pa_start;
+	__le32 tmp;
 	u32 reg_val;
 	u64 *mtts;
-	u32 __iomem *addr;
 
 	context = kzalloc(sizeof(*context), GFP_KERNEL);
 	if (!context)
@@ -2598,7 +2643,7 @@ static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
 		roce_set_field(context->qp1c_bytes_4, QP1C_BYTES_4_PD_M,
 			       QP1C_BYTES_4_PD_S, to_hr_pd(ibqp->pd)->pdn);
 
-		context->sq_rq_bt_l = (u32)(dma_handle);
+		context->sq_rq_bt_l = cpu_to_le32((u32)(dma_handle));
 		roce_set_field(context->qp1c_bytes_12,
 			       QP1C_BYTES_12_SQ_RQ_BT_H_M,
 			       QP1C_BYTES_12_SQ_RQ_BT_H_S,
@@ -2610,7 +2655,7 @@ static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
 			       QP1C_BYTES_16_PORT_NUM_S, hr_qp->phy_port);
 		roce_set_bit(context->qp1c_bytes_16,
 			     QP1C_BYTES_16_SIGNALING_TYPE_S,
-			     hr_qp->sq_signal_bits);
+			     le32_to_cpu(hr_qp->sq_signal_bits));
 		roce_set_bit(context->qp1c_bytes_16, QP1C_BYTES_16_RQ_BA_FLG_S,
 			     1);
 		roce_set_bit(context->qp1c_bytes_16, QP1C_BYTES_16_SQ_BA_FLG_S,
@@ -2624,7 +2669,8 @@ static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
 			       QP1C_BYTES_20_PKEY_IDX_S, attr->pkey_index);
 
 		rq_pa_start = (u32)hr_qp->rq.offset / PAGE_SIZE;
-		context->cur_rq_wqe_ba_l = (u32)(mtts[rq_pa_start]);
+		context->cur_rq_wqe_ba_l =
+				cpu_to_le32((u32)(mtts[rq_pa_start]));
 
 		roce_set_field(context->qp1c_bytes_28,
 			       QP1C_BYTES_28_CUR_RQ_WQE_BA_H_M,
@@ -2643,7 +2689,7 @@ static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
 			       QP1C_BYTES_32_TX_CQ_NUM_S,
 			       to_hr_cq(ibqp->send_cq)->cqn);
 
-		context->cur_sq_wqe_ba_l  = (u32)mtts[0];
+		context->cur_sq_wqe_ba_l  = cpu_to_le32((u32)mtts[0]);
 
 		roce_set_field(context->qp1c_bytes_40,
 			       QP1C_BYTES_40_CUR_SQ_WQE_BA_H_M,
@@ -2658,23 +2704,25 @@ static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
 				       ROCEE_QP1C_CFG0_0_REG +
 				       hr_qp->phy_port * sizeof(*context));
 
-		writel(context->qp1c_bytes_4, addr);
-		writel(context->sq_rq_bt_l, addr + 1);
-		writel(context->qp1c_bytes_12, addr + 2);
-		writel(context->qp1c_bytes_16, addr + 3);
-		writel(context->qp1c_bytes_20, addr + 4);
-		writel(context->cur_rq_wqe_ba_l, addr + 5);
-		writel(context->qp1c_bytes_28, addr + 6);
-		writel(context->qp1c_bytes_32, addr + 7);
-		writel(context->cur_sq_wqe_ba_l, addr + 8);
-		writel(context->qp1c_bytes_40, addr + 9);
+		writel(le32_to_cpu(context->qp1c_bytes_4), addr);
+		writel(le32_to_cpu(context->sq_rq_bt_l), addr + 1);
+		writel(le32_to_cpu(context->qp1c_bytes_12), addr + 2);
+		writel(le32_to_cpu(context->qp1c_bytes_16), addr + 3);
+		writel(le32_to_cpu(context->qp1c_bytes_20), addr + 4);
+		writel(le32_to_cpu(context->cur_rq_wqe_ba_l), addr + 5);
+		writel(le32_to_cpu(context->qp1c_bytes_28), addr + 6);
+		writel(le32_to_cpu(context->qp1c_bytes_32), addr + 7);
+		writel(le32_to_cpu(context->cur_sq_wqe_ba_l), addr + 8);
+		writel(le32_to_cpu(context->qp1c_bytes_40), addr + 9);
 	}
 
 	/* Modify QP1C status */
 	reg_val = roce_read(hr_dev, ROCEE_QP1C_CFG0_0_REG +
 			    hr_qp->phy_port * sizeof(*context));
-	roce_set_field(reg_val, ROCEE_QP1C_CFG0_0_ROCEE_QP1C_QP_ST_M,
+	tmp = cpu_to_le32(reg_val);
+	roce_set_field(tmp, ROCEE_QP1C_CFG0_0_ROCEE_QP1C_QP_ST_M,
 		       ROCEE_QP1C_CFG0_0_ROCEE_QP1C_QP_ST_S, new_state);
+	reg_val = le32_to_cpu(tmp);
 	roce_write(hr_dev, ROCEE_QP1C_CFG0_0_REG +
 		    hr_qp->phy_port * sizeof(*context), reg_val);
 
@@ -2712,7 +2760,7 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
 	const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
 	dma_addr_t dma_handle_2 = 0;
 	dma_addr_t dma_handle = 0;
-	uint32_t doorbell[2] = {0};
+	__le32 doorbell[2] = {0};
 	int rq_pa_start = 0;
 	u64 *mtts_2 = NULL;
 	int ret = -EINVAL;
@@ -2887,7 +2935,7 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
 
 		dmac = (u8 *)attr->ah_attr.roce.dmac;
 
-		context->sq_rq_bt_l = (u32)(dma_handle);
+		context->sq_rq_bt_l = cpu_to_le32((u32)(dma_handle));
 		roce_set_field(context->qpc_bytes_24,
 			       QP_CONTEXT_QPC_BYTES_24_SQ_RQ_BT_H_M,
 			       QP_CONTEXT_QPC_BYTES_24_SQ_RQ_BT_H_S,
@@ -2899,7 +2947,7 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
 			       QP_CONTEXT_QPC_BYTES_24_MINIMUM_RNR_NAK_TIMER_M,
 			       QP_CONTEXT_QPC_BYTES_24_MINIMUM_RNR_NAK_TIMER_S,
 			       attr->min_rnr_timer);
-		context->irrl_ba_l = (u32)(dma_handle_2);
+		context->irrl_ba_l = cpu_to_le32((u32)(dma_handle_2));
 		roce_set_field(context->qpc_bytes_32,
 			       QP_CONTEXT_QPC_BYTES_32_IRRL_BA_H_M,
 			       QP_CONTEXT_QPC_BYTES_32_IRRL_BA_H_S,
@@ -2913,7 +2961,7 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
 			     1);
 		roce_set_bit(context->qpc_bytes_32,
 			     QP_CONTEXT_QPC_BYTE_32_SIGNALING_TYPE_S,
-			     hr_qp->sq_signal_bits);
+			     le32_to_cpu(hr_qp->sq_signal_bits));
 
 		port = (attr_mask & IB_QP_PORT) ? (attr->port_num - 1) :
 			hr_qp->port;
@@ -2991,7 +3039,8 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
 			       QP_CONTEXT_QPC_BYTES_68_RQ_CUR_INDEX_S, 0);
 
 		rq_pa_start = (u32)hr_qp->rq.offset / PAGE_SIZE;
-		context->cur_rq_wqe_ba_l = (u32)(mtts[rq_pa_start]);
+		context->cur_rq_wqe_ba_l =
+				cpu_to_le32((u32)(mtts[rq_pa_start]));
 
 		roce_set_field(context->qpc_bytes_76,
 			QP_CONTEXT_QPC_BYTES_76_CUR_RQ_WQE_BA_H_M,
@@ -3071,7 +3120,7 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
 			goto out;
 		}
 
-		context->rx_cur_sq_wqe_ba_l = (u32)(mtts[0]);
+		context->rx_cur_sq_wqe_ba_l = cpu_to_le32((u32)(mtts[0]));
 
 		roce_set_field(context->qpc_bytes_120,
 			       QP_CONTEXT_QPC_BYTES_120_RX_CUR_SQ_WQE_BA_H_M,
@@ -3219,7 +3268,7 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
 			       QP_CONTEXT_QPC_BYTES_180_SQ_HEAD_M,
 			       QP_CONTEXT_QPC_BYTES_180_SQ_HEAD_S, 0);
 
-		context->tx_cur_sq_wqe_ba_l = (u32)(mtts[0]);
+		context->tx_cur_sq_wqe_ba_l = cpu_to_le32((u32)(mtts[0]));
 
 		roce_set_field(context->qpc_bytes_188,
 			       QP_CONTEXT_QPC_BYTES_188_TX_CUR_SQ_WQE_BA_H_M,
@@ -3386,16 +3435,16 @@ static int hns_roce_v1_q_sqp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
 
 	addr = ROCEE_QP1C_CFG0_0_REG +
 		hr_qp->port * sizeof(struct hns_roce_sqp_context);
-	context.qp1c_bytes_4 = roce_read(hr_dev, addr);
-	context.sq_rq_bt_l = roce_read(hr_dev, addr + 1);
-	context.qp1c_bytes_12 = roce_read(hr_dev, addr + 2);
-	context.qp1c_bytes_16 = roce_read(hr_dev, addr + 3);
-	context.qp1c_bytes_20 = roce_read(hr_dev, addr + 4);
-	context.cur_rq_wqe_ba_l = roce_read(hr_dev, addr + 5);
-	context.qp1c_bytes_28 = roce_read(hr_dev, addr + 6);
-	context.qp1c_bytes_32 = roce_read(hr_dev, addr + 7);
-	context.cur_sq_wqe_ba_l = roce_read(hr_dev, addr + 8);
-	context.qp1c_bytes_40 = roce_read(hr_dev, addr + 9);
+	context.qp1c_bytes_4 = cpu_to_le32(roce_read(hr_dev, addr));
+	context.sq_rq_bt_l = cpu_to_le32(roce_read(hr_dev, addr + 1));
+	context.qp1c_bytes_12 = cpu_to_le32(roce_read(hr_dev, addr + 2));
+	context.qp1c_bytes_16 = cpu_to_le32(roce_read(hr_dev, addr + 3));
+	context.qp1c_bytes_20 = cpu_to_le32(roce_read(hr_dev, addr + 4));
+	context.cur_rq_wqe_ba_l = cpu_to_le32(roce_read(hr_dev, addr + 5));
+	context.qp1c_bytes_28 = cpu_to_le32(roce_read(hr_dev, addr + 6));
+	context.qp1c_bytes_32 = cpu_to_le32(roce_read(hr_dev, addr + 7));
+	context.cur_sq_wqe_ba_l = cpu_to_le32(roce_read(hr_dev, addr + 8));
+	context.qp1c_bytes_40 = cpu_to_le32(roce_read(hr_dev, addr + 9));
 
 	hr_qp->state = roce_get_field(context.qp1c_bytes_4,
 				      QP1C_BYTES_4_QP_STATE_M,
@@ -3557,7 +3606,7 @@ static int hns_roce_v1_q_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
 	qp_attr->retry_cnt = roce_get_field(context->qpc_bytes_148,
 			     QP_CONTEXT_QPC_BYTES_148_RETRY_COUNT_M,
 			     QP_CONTEXT_QPC_BYTES_148_RETRY_COUNT_S);
-	qp_attr->rnr_retry = context->rnr_retry;
+	qp_attr->rnr_retry = (u8)context->rnr_retry;
 
 done:
 	qp_attr->cur_qp_state = qp_attr->qp_state;
@@ -3595,42 +3644,47 @@ static void hns_roce_check_sdb_status(struct hns_roce_dev *hr_dev,
 				      u32 *old_send, u32 *old_retry,
 				      u32 *tsp_st, u32 *success_flags)
 {
+	__le32 *old_send_tmp, *old_retry_tmp;
 	u32 sdb_retry_cnt;
 	u32 sdb_send_ptr;
 	u32 cur_cnt, old_cnt;
+	__le32 tmp, tmp1;
 	u32 send_ptr;
 
 	sdb_send_ptr = roce_read(hr_dev, ROCEE_SDB_SEND_PTR_REG);
 	sdb_retry_cnt =	roce_read(hr_dev, ROCEE_SDB_RETRY_CNT_REG);
-	cur_cnt = roce_get_field(sdb_send_ptr,
-				 ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M,
+	tmp = cpu_to_le32(sdb_send_ptr);
+	tmp1 = cpu_to_le32(sdb_retry_cnt);
+	cur_cnt = roce_get_field(tmp, ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M,
 				 ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S) +
-		  roce_get_field(sdb_retry_cnt,
-				 ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_M,
+		  roce_get_field(tmp1, ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_M,
 				 ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_S);
+
+	old_send_tmp = (__le32 *)old_send;
+	old_retry_tmp = (__le32 *)old_retry;
 	if (!roce_get_bit(*tsp_st, ROCEE_CNT_CLR_CE_CNT_CLR_CE_S)) {
-		old_cnt = roce_get_field(*old_send,
+		old_cnt = roce_get_field(*old_send_tmp,
 					 ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M,
 					 ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S) +
-			  roce_get_field(*old_retry,
+			  roce_get_field(*old_retry_tmp,
 					 ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_M,
 					 ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_S);
 		if (cur_cnt - old_cnt > SDB_ST_CMP_VAL)
 			*success_flags = 1;
 	} else {
-		old_cnt = roce_get_field(*old_send,
+		old_cnt = roce_get_field(*old_send_tmp,
 					 ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M,
 					 ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S);
 		if (cur_cnt - old_cnt > SDB_ST_CMP_VAL) {
 			*success_flags = 1;
 		} else {
-			send_ptr = roce_get_field(*old_send,
+			send_ptr = roce_get_field(*old_send_tmp,
 					    ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M,
 					    ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S) +
-				   roce_get_field(sdb_retry_cnt,
+				   roce_get_field(tmp1,
 					    ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_M,
 					    ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_S);
-			roce_set_field(*old_send,
+			roce_set_field(*old_send_tmp,
 				       ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M,
 				       ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S,
 				       send_ptr);
@@ -3646,11 +3700,14 @@ static int check_qp_db_process_status(struct hns_roce_dev *hr_dev,
 {
 	struct device *dev = &hr_dev->pdev->dev;
 	u32 sdb_send_ptr, old_send;
+	__le32 sdb_issue_ptr_tmp;
+	__le32 sdb_send_ptr_tmp;
 	u32 success_flags = 0;
 	unsigned long end;
 	u32 old_retry;
 	u32 inv_cnt;
 	u32 tsp_st;
+	__le32 tmp;
 
 	if (*wait_stage > HNS_ROCE_V1_DB_STAGE2 ||
 	    *wait_stage < HNS_ROCE_V1_DB_STAGE1) {
@@ -3679,10 +3736,12 @@ static int check_qp_db_process_status(struct hns_roce_dev *hr_dev,
 						 ROCEE_SDB_SEND_PTR_REG);
 		}
 
-		if (roce_get_field(sdb_issue_ptr,
+		sdb_send_ptr_tmp = cpu_to_le32(sdb_send_ptr);
+		sdb_issue_ptr_tmp = cpu_to_le32(sdb_issue_ptr);
+		if (roce_get_field(sdb_issue_ptr_tmp,
 				   ROCEE_SDB_ISSUE_PTR_SDB_ISSUE_PTR_M,
 				   ROCEE_SDB_ISSUE_PTR_SDB_ISSUE_PTR_S) ==
-		    roce_get_field(sdb_send_ptr,
+		    roce_get_field(sdb_send_ptr_tmp,
 				   ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M,
 				   ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S)) {
 			old_send = roce_read(hr_dev, ROCEE_SDB_SEND_PTR_REG);
@@ -3690,7 +3749,8 @@ static int check_qp_db_process_status(struct hns_roce_dev *hr_dev,
 
 			do {
 				tsp_st = roce_read(hr_dev, ROCEE_TSP_BP_ST_REG);
-				if (roce_get_bit(tsp_st,
+				tmp = cpu_to_le32(tsp_st);
+				if (roce_get_bit(tmp,
 					ROCEE_TSP_BP_ST_QH_FIFO_ENTRY_S) == 1) {
 					*wait_stage = HNS_ROCE_V1_DB_WAIT_OK;
 					return 0;
@@ -3699,8 +3759,9 @@ static int check_qp_db_process_status(struct hns_roce_dev *hr_dev,
 				if (!time_before(jiffies, end)) {
 					dev_dbg(dev, "QP(0x%lx) db process stage1 timeout when send ptr equals issue ptr.\n"
 						     "issue 0x%x send 0x%x.\n",
-						hr_qp->qpn, sdb_issue_ptr,
-						sdb_send_ptr);
+						hr_qp->qpn,
+						le32_to_cpu(sdb_issue_ptr_tmp),
+						le32_to_cpu(sdb_send_ptr_tmp));
 					return 0;
 				}
 
@@ -4102,9 +4163,9 @@ static void hns_roce_v1_cq_err_handle(struct hns_roce_dev *hr_dev,
 	struct device *dev = &hr_dev->pdev->dev;
 	u32 cqn;
 
-	cqn = le32_to_cpu(roce_get_field(aeqe->event.cq_event.cq,
+	cqn = roce_get_field(aeqe->event.cq_event.cq,
 			  HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_M,
-			  HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_S));
+			  HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_S);
 
 	switch (event_type) {
 	case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR:
@@ -4340,6 +4401,7 @@ static irqreturn_t hns_roce_v1_msix_interrupt_abn(int irq, void *dev_id)
 	u32 aeshift_val;
 	u32 ceshift_val;
 	u32 cemask_val;
+	__le32 tmp;
 	int i;
 
 	/*
@@ -4348,30 +4410,34 @@ static irqreturn_t hns_roce_v1_msix_interrupt_abn(int irq, void *dev_id)
 	 * interrupt, mask irq, clear irq, cancel mask operation
 	 */
 	aeshift_val = roce_read(hr_dev, ROCEE_CAEP_AEQC_AEQE_SHIFT_REG);
+	tmp = cpu_to_le32(aeshift_val);
 
 	/* AEQE overflow */
-	if (roce_get_bit(aeshift_val,
+	if (roce_get_bit(tmp,
 		ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQ_ALM_OVF_INT_ST_S) == 1) {
 		dev_warn(dev, "AEQ overflow!\n");
 
 		/* Set mask */
 		caepaemask_val = roce_read(hr_dev, ROCEE_CAEP_AE_MASK_REG);
-		roce_set_bit(caepaemask_val,
-			     ROCEE_CAEP_AE_MASK_CAEP_AEQ_ALM_OVF_MASK_S,
+		tmp = cpu_to_le32(caepaemask_val);
+		roce_set_bit(tmp, ROCEE_CAEP_AE_MASK_CAEP_AEQ_ALM_OVF_MASK_S,
 			     HNS_ROCE_INT_MASK_ENABLE);
+		caepaemask_val = le32_to_cpu(tmp);
 		roce_write(hr_dev, ROCEE_CAEP_AE_MASK_REG, caepaemask_val);
 
 		/* Clear int state(INT_WC : write 1 clear) */
 		caepaest_val = roce_read(hr_dev, ROCEE_CAEP_AE_ST_REG);
-		roce_set_bit(caepaest_val,
-			     ROCEE_CAEP_AE_ST_CAEP_AEQ_ALM_OVF_S, 1);
+		tmp = cpu_to_le32(caepaest_val);
+		roce_set_bit(tmp, ROCEE_CAEP_AE_ST_CAEP_AEQ_ALM_OVF_S, 1);
+		caepaest_val = le32_to_cpu(tmp);
 		roce_write(hr_dev, ROCEE_CAEP_AE_ST_REG, caepaest_val);
 
 		/* Clear mask */
 		caepaemask_val = roce_read(hr_dev, ROCEE_CAEP_AE_MASK_REG);
-		roce_set_bit(caepaemask_val,
-			     ROCEE_CAEP_AE_MASK_CAEP_AEQ_ALM_OVF_MASK_S,
+		tmp = cpu_to_le32(caepaemask_val);
+		roce_set_bit(tmp, ROCEE_CAEP_AE_MASK_CAEP_AEQ_ALM_OVF_MASK_S,
 			     HNS_ROCE_INT_MASK_DISABLE);
+		caepaemask_val = le32_to_cpu(tmp);
 		roce_write(hr_dev, ROCEE_CAEP_AE_MASK_REG, caepaemask_val);
 	}
 
@@ -4379,8 +4445,9 @@ static irqreturn_t hns_roce_v1_msix_interrupt_abn(int irq, void *dev_id)
 	for (i = 0; i < hr_dev->caps.num_comp_vectors; i++) {
 		ceshift_val = roce_read(hr_dev, ROCEE_CAEP_CEQC_SHIFT_0_REG +
 					i * CEQ_REG_OFFSET);
+		tmp = cpu_to_le32(ceshift_val);
 
-		if (roce_get_bit(ceshift_val,
+		if (roce_get_bit(tmp,
 			ROCEE_CAEP_CEQC_SHIFT_CAEP_CEQ_ALM_OVF_INT_ST_S) == 1) {
 			dev_warn(dev, "CEQ[%d] almost overflow!\n", i);
 			int_work++;
@@ -4389,9 +4456,11 @@ static irqreturn_t hns_roce_v1_msix_interrupt_abn(int irq, void *dev_id)
 			cemask_val = roce_read(hr_dev,
 					       ROCEE_CAEP_CE_IRQ_MASK_0_REG +
 					       i * CEQ_REG_OFFSET);
-			roce_set_bit(cemask_val,
+			tmp = cpu_to_le32(cemask_val);
+			roce_set_bit(tmp,
 				ROCEE_CAEP_CE_IRQ_MASK_CAEP_CEQ_ALM_OVF_MASK_S,
 				HNS_ROCE_INT_MASK_ENABLE);
+			cemask_val = le32_to_cpu(tmp);
 			roce_write(hr_dev, ROCEE_CAEP_CE_IRQ_MASK_0_REG +
 				   i * CEQ_REG_OFFSET, cemask_val);
 
@@ -4399,9 +4468,11 @@ static irqreturn_t hns_roce_v1_msix_interrupt_abn(int irq, void *dev_id)
 			cealmovf_val = roce_read(hr_dev,
 				       ROCEE_CAEP_CEQ_ALM_OVF_0_REG +
 				       i * CEQ_REG_OFFSET);
-			roce_set_bit(cealmovf_val,
+			tmp = cpu_to_le32(cealmovf_val);
+			roce_set_bit(tmp,
 				     ROCEE_CAEP_CEQ_ALM_OVF_CAEP_CEQ_ALM_OVF_S,
 				     1);
+			cealmovf_val = le32_to_cpu(tmp);
 			roce_write(hr_dev, ROCEE_CAEP_CEQ_ALM_OVF_0_REG +
 				   i * CEQ_REG_OFFSET, cealmovf_val);
 
@@ -4409,9 +4480,11 @@ static irqreturn_t hns_roce_v1_msix_interrupt_abn(int irq, void *dev_id)
 			cemask_val = roce_read(hr_dev,
 				     ROCEE_CAEP_CE_IRQ_MASK_0_REG +
 				     i * CEQ_REG_OFFSET);
-			roce_set_bit(cemask_val,
+			tmp = cpu_to_le32(cemask_val);
+			roce_set_bit(tmp,
 			       ROCEE_CAEP_CE_IRQ_MASK_CAEP_CEQ_ALM_OVF_MASK_S,
 			       HNS_ROCE_INT_MASK_DISABLE);
+			cemask_val = le32_to_cpu(tmp);
 			roce_write(hr_dev, ROCEE_CAEP_CE_IRQ_MASK_0_REG +
 				   i * CEQ_REG_OFFSET, cemask_val);
 		}
@@ -4435,13 +4508,16 @@ static void hns_roce_v1_int_mask_enable(struct hns_roce_dev *hr_dev)
 {
 	u32 aemask_val;
 	int masken = 0;
+	__le32 tmp;
 	int i;
 
 	/* AEQ INT */
 	aemask_val = roce_read(hr_dev, ROCEE_CAEP_AE_MASK_REG);
-	roce_set_bit(aemask_val, ROCEE_CAEP_AE_MASK_CAEP_AEQ_ALM_OVF_MASK_S,
+	tmp = cpu_to_le32(aemask_val);
+	roce_set_bit(tmp, ROCEE_CAEP_AE_MASK_CAEP_AEQ_ALM_OVF_MASK_S,
 		     masken);
-	roce_set_bit(aemask_val, ROCEE_CAEP_AE_MASK_CAEP_AE_IRQ_MASK_S, masken);
+	roce_set_bit(tmp, ROCEE_CAEP_AE_MASK_CAEP_AE_IRQ_MASK_S, masken);
+	aemask_val = le32_to_cpu(tmp);
 	roce_write(hr_dev, ROCEE_CAEP_AE_MASK_REG, aemask_val);
 
 	/* CEQ INT */
@@ -4473,20 +4549,24 @@ static void hns_roce_v1_enable_eq(struct hns_roce_dev *hr_dev, int eq_num,
 				  int enable_flag)
 {
 	void __iomem *eqc = hr_dev->eq_table.eqc_base[eq_num];
+	__le32 tmp;
 	u32 val;
 
 	val = readl(eqc);
+	tmp = cpu_to_le32(val);
 
 	if (enable_flag)
-		roce_set_field(val,
+		roce_set_field(tmp,
 			       ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_M,
 			       ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_S,
 			       HNS_ROCE_EQ_STAT_VALID);
 	else
-		roce_set_field(val,
+		roce_set_field(tmp,
 			       ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_M,
 			       ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_S,
 			       HNS_ROCE_EQ_STAT_INVALID);
+
+	val = le32_to_cpu(tmp);
 	writel(val, eqc);
 }
 
@@ -4499,6 +4579,9 @@ static int hns_roce_v1_create_eq(struct hns_roce_dev *hr_dev,
 	u32 eqconsindx_val = 0;
 	u32 eqcuridx_val = 0;
 	u32 eqshift_val = 0;
+	__le32 tmp2 = 0;
+	__le32 tmp1 = 0;
+	__le32 tmp = 0;
 	int num_bas;
 	int ret;
 	int i;
@@ -4530,14 +4613,13 @@ static int hns_roce_v1_create_eq(struct hns_roce_dev *hr_dev,
 		memset(eq->buf_list[i].buf, 0, HNS_ROCE_BA_SIZE);
 	}
 	eq->cons_index = 0;
-	roce_set_field(eqshift_val,
-		       ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_M,
+	roce_set_field(tmp, ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_M,
 		       ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_S,
 		       HNS_ROCE_EQ_STAT_INVALID);
-	roce_set_field(eqshift_val,
-		       ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_AEQE_SHIFT_M,
+	roce_set_field(tmp, ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_AEQE_SHIFT_M,
 		       ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_AEQE_SHIFT_S,
 		       eq->log_entries);
+	eqshift_val = le32_to_cpu(tmp);
 	writel(eqshift_val, eqc);
 
 	/* Configure eq extended address 12~44bit */
@@ -4549,18 +4631,18 @@ static int hns_roce_v1_create_eq(struct hns_roce_dev *hr_dev,
 	 * using 4K page, and shift more 32 because of
 	 * caculating the high 32 bit value evaluated to hardware.
 	 */
-	roce_set_field(eqcuridx_val, ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQ_BT_H_M,
+	roce_set_field(tmp1, ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQ_BT_H_M,
 		       ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQ_BT_H_S,
 		       eq->buf_list[0].map >> 44);
-	roce_set_field(eqcuridx_val,
-		       ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQE_CUR_IDX_M,
+	roce_set_field(tmp1, ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQE_CUR_IDX_M,
 		       ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQE_CUR_IDX_S, 0);
+	eqcuridx_val = le32_to_cpu(tmp1);
 	writel(eqcuridx_val, eqc + 8);
 
 	/* Configure eq consumer index */
-	roce_set_field(eqconsindx_val,
-		       ROCEE_CAEP_AEQE_CONS_IDX_CAEP_AEQE_CONS_IDX_M,
+	roce_set_field(tmp2, ROCEE_CAEP_AEQE_CONS_IDX_CAEP_AEQE_CONS_IDX_M,
 		       ROCEE_CAEP_AEQE_CONS_IDX_CAEP_AEQE_CONS_IDX_S, 0);
+	eqconsindx_val = le32_to_cpu(tmp2);
 	writel(eqconsindx_val, eqc + 0xc);
 
 	return 0;
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h
index e9a2717..6644014 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h
@@ -260,7 +260,7 @@ struct hns_roce_cqe {
 	__le32 cqe_byte_4;
 	union {
 		__le32 r_key;
-		__be32 immediate_data;
+		__le32 immediate_data;
 	};
 	__le32 byte_cnt;
 	__le32 cqe_byte_16;
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index a6e11be..0218c0f 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -36,6 +36,7 @@
 #include <linux/kernel.h>
 #include <linux/types.h>
 #include <net/addrconf.h>
+#include <rdma/ib_addr.h>
 #include <rdma/ib_umem.h>
 
 #include "hnae3.h"
@@ -53,7 +54,7 @@ static void set_data_seg_v2(struct hns_roce_v2_wqe_data_seg *dseg,
 	dseg->len  = cpu_to_le32(sg->length);
 }
 
-static void set_extend_sge(struct hns_roce_qp *qp, struct ib_send_wr *wr,
+static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr,
 			   unsigned int *sge_ind)
 {
 	struct hns_roce_v2_wqe_data_seg *dseg;
@@ -100,10 +101,10 @@ static void set_extend_sge(struct hns_roce_qp *qp, struct ib_send_wr *wr,
 	}
 }
 
-static int set_rwqe_data_seg(struct ib_qp *ibqp, struct ib_send_wr *wr,
+static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr,
 			     struct hns_roce_v2_rc_send_wqe *rc_sq_wqe,
 			     void *wqe, unsigned int *sge_ind,
-			     struct ib_send_wr **bad_wr)
+			     const struct ib_send_wr **bad_wr)
 {
 	struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
 	struct hns_roce_v2_wqe_data_seg *dseg = wqe;
@@ -164,23 +165,30 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, struct ib_send_wr *wr,
 	return 0;
 }
 
-static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
-				 struct ib_send_wr **bad_wr)
+static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
+				 const struct ib_qp_attr *attr,
+				 int attr_mask, enum ib_qp_state cur_state,
+				 enum ib_qp_state new_state);
+
+static int hns_roce_v2_post_send(struct ib_qp *ibqp,
+				 const struct ib_send_wr *wr,
+				 const struct ib_send_wr **bad_wr)
 {
 	struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
 	struct hns_roce_ah *ah = to_hr_ah(ud_wr(wr)->ah);
 	struct hns_roce_v2_ud_send_wqe *ud_sq_wqe;
 	struct hns_roce_v2_rc_send_wqe *rc_sq_wqe;
 	struct hns_roce_qp *qp = to_hr_qp(ibqp);
-	struct hns_roce_v2_wqe_data_seg *dseg;
 	struct device *dev = hr_dev->dev;
 	struct hns_roce_v2_db sq_db;
+	struct ib_qp_attr attr;
 	unsigned int sge_ind = 0;
 	unsigned int owner_bit;
 	unsigned long flags;
 	unsigned int ind;
 	void *wqe = NULL;
 	bool loopback;
+	int attr_mask;
 	u32 tmp_len;
 	int ret = 0;
 	u8 *smac;
@@ -273,7 +281,8 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 			switch (wr->opcode) {
 			case IB_WR_SEND_WITH_IMM:
 			case IB_WR_RDMA_WRITE_WITH_IMM:
-				ud_sq_wqe->immtdata = wr->ex.imm_data;
+				ud_sq_wqe->immtdata =
+				      cpu_to_le32(be32_to_cpu(wr->ex.imm_data));
 				break;
 			default:
 				ud_sq_wqe->immtdata = 0;
@@ -330,14 +339,13 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 			roce_set_field(ud_sq_wqe->byte_36,
 				       V2_UD_SEND_WQE_BYTE_36_TCLASS_M,
 				       V2_UD_SEND_WQE_BYTE_36_TCLASS_S,
-				       0);
-			roce_set_field(ud_sq_wqe->byte_36,
-				       V2_UD_SEND_WQE_BYTE_36_TCLASS_M,
-				       V2_UD_SEND_WQE_BYTE_36_TCLASS_S,
-				       0);
+				       ah->av.sl_tclass_flowlabel >>
+				       HNS_ROCE_TCLASS_SHIFT);
 			roce_set_field(ud_sq_wqe->byte_40,
 				       V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_M,
-				       V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_S, 0);
+				       V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_S,
+				       ah->av.sl_tclass_flowlabel &
+				       HNS_ROCE_FLOW_LABEL_MASK);
 			roce_set_field(ud_sq_wqe->byte_40,
 				       V2_UD_SEND_WQE_BYTE_40_SL_M,
 				       V2_UD_SEND_WQE_BYTE_40_SL_S,
@@ -371,7 +379,8 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 			switch (wr->opcode) {
 			case IB_WR_SEND_WITH_IMM:
 			case IB_WR_RDMA_WRITE_WITH_IMM:
-				rc_sq_wqe->immtdata = wr->ex.imm_data;
+				rc_sq_wqe->immtdata =
+				      cpu_to_le32(be32_to_cpu(wr->ex.imm_data));
 				break;
 			case IB_WR_SEND_WITH_INV:
 				rc_sq_wqe->inv_key =
@@ -485,7 +494,6 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 			}
 
 			wqe += sizeof(struct hns_roce_v2_rc_send_wqe);
-			dseg = wqe;
 
 			ret = set_rwqe_data_seg(ibqp, wr, rc_sq_wqe, wqe,
 						&sge_ind, bad_wr);
@@ -523,6 +531,19 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 
 		qp->sq_next_wqe = ind;
 		qp->next_sge = sge_ind;
+
+		if (qp->state == IB_QPS_ERR) {
+			attr_mask = IB_QP_STATE;
+			attr.qp_state = IB_QPS_ERR;
+
+			ret = hns_roce_v2_modify_qp(&qp->ibqp, &attr, attr_mask,
+						    qp->state, IB_QPS_ERR);
+			if (ret) {
+				spin_unlock_irqrestore(&qp->sq.lock, flags);
+				*bad_wr = wr;
+				return ret;
+			}
+		}
 	}
 
 	spin_unlock_irqrestore(&qp->sq.lock, flags);
@@ -530,16 +551,19 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 	return ret;
 }
 
-static int hns_roce_v2_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
-				 struct ib_recv_wr **bad_wr)
+static int hns_roce_v2_post_recv(struct ib_qp *ibqp,
+				 const struct ib_recv_wr *wr,
+				 const struct ib_recv_wr **bad_wr)
 {
 	struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
 	struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
 	struct hns_roce_v2_wqe_data_seg *dseg;
 	struct hns_roce_rinl_sge *sge_list;
 	struct device *dev = hr_dev->dev;
+	struct ib_qp_attr attr;
 	unsigned long flags;
 	void *wqe = NULL;
+	int attr_mask;
 	int ret = 0;
 	int nreq;
 	int ind;
@@ -608,6 +632,20 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
 		wmb();
 
 		*hr_qp->rdb.db_record = hr_qp->rq.head & 0xffff;
+
+		if (hr_qp->state == IB_QPS_ERR) {
+			attr_mask = IB_QP_STATE;
+			attr.qp_state = IB_QPS_ERR;
+
+			ret = hns_roce_v2_modify_qp(&hr_qp->ibqp, &attr,
+						    attr_mask, hr_qp->state,
+						    IB_QPS_ERR);
+			if (ret) {
+				spin_unlock_irqrestore(&hr_qp->rq.lock, flags);
+				*bad_wr = wr;
+				return ret;
+			}
+		}
 	}
 	spin_unlock_irqrestore(&hr_qp->rq.lock, flags);
 
@@ -702,8 +740,8 @@ static int hns_roce_v2_cmq_init(struct hns_roce_dev *hr_dev)
 	int ret;
 
 	/* Setup the queue entries for command queue */
-	priv->cmq.csq.desc_num = 1024;
-	priv->cmq.crq.desc_num = 1024;
+	priv->cmq.csq.desc_num = CMD_CSQ_DESC_NUM;
+	priv->cmq.crq.desc_num = CMD_CRQ_DESC_NUM;
 
 	/* Setup the lock for command queue */
 	spin_lock_init(&priv->cmq.csq.lock);
@@ -925,7 +963,8 @@ static int hns_roce_config_global_param(struct hns_roce_dev *hr_dev)
 static int hns_roce_query_pf_resource(struct hns_roce_dev *hr_dev)
 {
 	struct hns_roce_cmq_desc desc[2];
-	struct hns_roce_pf_res *res;
+	struct hns_roce_pf_res_a *req_a;
+	struct hns_roce_pf_res_b *req_b;
 	int ret;
 	int i;
 
@@ -943,21 +982,26 @@ static int hns_roce_query_pf_resource(struct hns_roce_dev *hr_dev)
 	if (ret)
 		return ret;
 
-	res = (struct hns_roce_pf_res *)desc[0].data;
+	req_a = (struct hns_roce_pf_res_a *)desc[0].data;
+	req_b = (struct hns_roce_pf_res_b *)desc[1].data;
 
-	hr_dev->caps.qpc_bt_num = roce_get_field(res->qpc_bt_idx_num,
+	hr_dev->caps.qpc_bt_num = roce_get_field(req_a->qpc_bt_idx_num,
 						 PF_RES_DATA_1_PF_QPC_BT_NUM_M,
 						 PF_RES_DATA_1_PF_QPC_BT_NUM_S);
-	hr_dev->caps.srqc_bt_num = roce_get_field(res->srqc_bt_idx_num,
+	hr_dev->caps.srqc_bt_num = roce_get_field(req_a->srqc_bt_idx_num,
 						PF_RES_DATA_2_PF_SRQC_BT_NUM_M,
 						PF_RES_DATA_2_PF_SRQC_BT_NUM_S);
-	hr_dev->caps.cqc_bt_num = roce_get_field(res->cqc_bt_idx_num,
+	hr_dev->caps.cqc_bt_num = roce_get_field(req_a->cqc_bt_idx_num,
 						 PF_RES_DATA_3_PF_CQC_BT_NUM_M,
 						 PF_RES_DATA_3_PF_CQC_BT_NUM_S);
-	hr_dev->caps.mpt_bt_num = roce_get_field(res->mpt_bt_idx_num,
+	hr_dev->caps.mpt_bt_num = roce_get_field(req_a->mpt_bt_idx_num,
 						 PF_RES_DATA_4_PF_MPT_BT_NUM_M,
 						 PF_RES_DATA_4_PF_MPT_BT_NUM_S);
 
+	hr_dev->caps.sl_num = roce_get_field(req_b->qid_idx_sl_num,
+					     PF_RES_DATA_3_PF_SL_NUM_M,
+					     PF_RES_DATA_3_PF_SL_NUM_S);
+
 	return 0;
 }
 
@@ -1203,12 +1247,14 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
 	caps->eqe_ba_pg_sz	= 0;
 	caps->eqe_buf_pg_sz	= 0;
 	caps->eqe_hop_num	= HNS_ROCE_EQE_HOP_NUM;
+	caps->tsq_buf_pg_sz	= 0;
 	caps->chunk_sz		= HNS_ROCE_V2_TABLE_CHUNK_SIZE;
 
 	caps->flags		= HNS_ROCE_CAP_FLAG_REREG_MR |
 				  HNS_ROCE_CAP_FLAG_ROCE_V1_V2 |
 				  HNS_ROCE_CAP_FLAG_RQ_INLINE |
-				  HNS_ROCE_CAP_FLAG_RECORD_DB;
+				  HNS_ROCE_CAP_FLAG_RECORD_DB |
+				  HNS_ROCE_CAP_FLAG_SQ_RECORD_DB;
 	caps->pkey_table_len[0] = 1;
 	caps->gid_table_len[0] = HNS_ROCE_V2_GID_INDEX_NUM;
 	caps->ceqe_depth	= HNS_ROCE_V2_COMP_EQE_NUM;
@@ -1224,6 +1270,228 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
 	return ret;
 }
 
+static int hns_roce_config_link_table(struct hns_roce_dev *hr_dev,
+				      enum hns_roce_link_table_type type)
+{
+	struct hns_roce_cmq_desc desc[2];
+	struct hns_roce_cfg_llm_a *req_a =
+				(struct hns_roce_cfg_llm_a *)desc[0].data;
+	struct hns_roce_cfg_llm_b *req_b =
+				(struct hns_roce_cfg_llm_b *)desc[1].data;
+	struct hns_roce_v2_priv *priv = hr_dev->priv;
+	struct hns_roce_link_table *link_tbl;
+	struct hns_roce_link_table_entry *entry;
+	enum hns_roce_opcode_type opcode;
+	u32 page_num;
+	int i;
+
+	switch (type) {
+	case TSQ_LINK_TABLE:
+		link_tbl = &priv->tsq;
+		opcode = HNS_ROCE_OPC_CFG_EXT_LLM;
+		break;
+	case TPQ_LINK_TABLE:
+		link_tbl = &priv->tpq;
+		opcode = HNS_ROCE_OPC_CFG_TMOUT_LLM;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	page_num = link_tbl->npages;
+	entry = link_tbl->table.buf;
+	memset(req_a, 0, sizeof(*req_a));
+	memset(req_b, 0, sizeof(*req_b));
+
+	for (i = 0; i < 2; i++) {
+		hns_roce_cmq_setup_basic_desc(&desc[i], opcode, false);
+
+		if (i == 0)
+			desc[i].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT);
+		else
+			desc[i].flag &= ~cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT);
+
+		if (i == 0) {
+			req_a->base_addr_l = link_tbl->table.map & 0xffffffff;
+			req_a->base_addr_h = (link_tbl->table.map >> 32) &
+					     0xffffffff;
+			roce_set_field(req_a->depth_pgsz_init_en,
+				       CFG_LLM_QUE_DEPTH_M,
+				       CFG_LLM_QUE_DEPTH_S,
+				       link_tbl->npages);
+			roce_set_field(req_a->depth_pgsz_init_en,
+				       CFG_LLM_QUE_PGSZ_M,
+				       CFG_LLM_QUE_PGSZ_S,
+				       link_tbl->pg_sz);
+			req_a->head_ba_l = entry[0].blk_ba0;
+			req_a->head_ba_h_nxtptr = entry[0].blk_ba1_nxt_ptr;
+			roce_set_field(req_a->head_ptr,
+				       CFG_LLM_HEAD_PTR_M,
+				       CFG_LLM_HEAD_PTR_S, 0);
+		} else {
+			req_b->tail_ba_l = entry[page_num - 1].blk_ba0;
+			roce_set_field(req_b->tail_ba_h,
+				       CFG_LLM_TAIL_BA_H_M,
+				       CFG_LLM_TAIL_BA_H_S,
+				       entry[page_num - 1].blk_ba1_nxt_ptr &
+				       HNS_ROCE_LINK_TABLE_BA1_M);
+			roce_set_field(req_b->tail_ptr,
+				       CFG_LLM_TAIL_PTR_M,
+				       CFG_LLM_TAIL_PTR_S,
+				       (entry[page_num - 2].blk_ba1_nxt_ptr &
+				       HNS_ROCE_LINK_TABLE_NXT_PTR_M) >>
+				       HNS_ROCE_LINK_TABLE_NXT_PTR_S);
+		}
+	}
+	roce_set_field(req_a->depth_pgsz_init_en,
+		       CFG_LLM_INIT_EN_M, CFG_LLM_INIT_EN_S, 1);
+
+	return hns_roce_cmq_send(hr_dev, desc, 2);
+}
+
+static int hns_roce_init_link_table(struct hns_roce_dev *hr_dev,
+				    enum hns_roce_link_table_type type)
+{
+	struct hns_roce_v2_priv *priv = hr_dev->priv;
+	struct hns_roce_link_table *link_tbl;
+	struct hns_roce_link_table_entry *entry;
+	struct device *dev = hr_dev->dev;
+	u32 buf_chk_sz;
+	dma_addr_t t;
+	int func_num = 1;
+	int pg_num_a;
+	int pg_num_b;
+	int pg_num;
+	int size;
+	int i;
+
+	switch (type) {
+	case TSQ_LINK_TABLE:
+		link_tbl = &priv->tsq;
+		buf_chk_sz = 1 << (hr_dev->caps.tsq_buf_pg_sz + PAGE_SHIFT);
+		pg_num_a = hr_dev->caps.num_qps * 8 / buf_chk_sz;
+		pg_num_b = hr_dev->caps.sl_num * 4 + 2;
+		break;
+	case TPQ_LINK_TABLE:
+		link_tbl = &priv->tpq;
+		buf_chk_sz = 1 << (hr_dev->caps.tpq_buf_pg_sz +	PAGE_SHIFT);
+		pg_num_a = hr_dev->caps.num_cqs * 4 / buf_chk_sz;
+		pg_num_b = 2 * 4 * func_num + 2;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	pg_num = max(pg_num_a, pg_num_b);
+	size = pg_num * sizeof(struct hns_roce_link_table_entry);
+
+	link_tbl->table.buf = dma_alloc_coherent(dev, size,
+						 &link_tbl->table.map,
+						 GFP_KERNEL);
+	if (!link_tbl->table.buf)
+		goto out;
+
+	link_tbl->pg_list = kcalloc(pg_num, sizeof(*link_tbl->pg_list),
+				    GFP_KERNEL);
+	if (!link_tbl->pg_list)
+		goto err_kcalloc_failed;
+
+	entry = link_tbl->table.buf;
+	for (i = 0; i < pg_num; ++i) {
+		link_tbl->pg_list[i].buf = dma_alloc_coherent(dev, buf_chk_sz,
+							      &t, GFP_KERNEL);
+		if (!link_tbl->pg_list[i].buf)
+			goto err_alloc_buf_failed;
+
+		link_tbl->pg_list[i].map = t;
+		memset(link_tbl->pg_list[i].buf, 0, buf_chk_sz);
+
+		entry[i].blk_ba0 = (t >> 12) & 0xffffffff;
+		roce_set_field(entry[i].blk_ba1_nxt_ptr,
+			       HNS_ROCE_LINK_TABLE_BA1_M,
+			       HNS_ROCE_LINK_TABLE_BA1_S,
+			       t >> 44);
+
+		if (i < (pg_num - 1))
+			roce_set_field(entry[i].blk_ba1_nxt_ptr,
+				       HNS_ROCE_LINK_TABLE_NXT_PTR_M,
+				       HNS_ROCE_LINK_TABLE_NXT_PTR_S,
+				       i + 1);
+	}
+	link_tbl->npages = pg_num;
+	link_tbl->pg_sz = buf_chk_sz;
+
+	return hns_roce_config_link_table(hr_dev, type);
+
+err_alloc_buf_failed:
+	for (i -= 1; i >= 0; i--)
+		dma_free_coherent(dev, buf_chk_sz,
+				  link_tbl->pg_list[i].buf,
+				  link_tbl->pg_list[i].map);
+	kfree(link_tbl->pg_list);
+
+err_kcalloc_failed:
+	dma_free_coherent(dev, size, link_tbl->table.buf,
+			  link_tbl->table.map);
+
+out:
+	return -ENOMEM;
+}
+
+static void hns_roce_free_link_table(struct hns_roce_dev *hr_dev,
+				     struct hns_roce_link_table *link_tbl)
+{
+	struct device *dev = hr_dev->dev;
+	int size;
+	int i;
+
+	size = link_tbl->npages * sizeof(struct hns_roce_link_table_entry);
+
+	for (i = 0; i < link_tbl->npages; ++i)
+		if (link_tbl->pg_list[i].buf)
+			dma_free_coherent(dev, link_tbl->pg_sz,
+					  link_tbl->pg_list[i].buf,
+					  link_tbl->pg_list[i].map);
+	kfree(link_tbl->pg_list);
+
+	dma_free_coherent(dev, size, link_tbl->table.buf,
+			  link_tbl->table.map);
+}
+
+static int hns_roce_v2_init(struct hns_roce_dev *hr_dev)
+{
+	struct hns_roce_v2_priv *priv = hr_dev->priv;
+	int ret;
+
+	/* TSQ includes SQ doorbell and ack doorbell */
+	ret = hns_roce_init_link_table(hr_dev, TSQ_LINK_TABLE);
+	if (ret) {
+		dev_err(hr_dev->dev, "TSQ init failed, ret = %d.\n", ret);
+		return ret;
+	}
+
+	ret = hns_roce_init_link_table(hr_dev, TPQ_LINK_TABLE);
+	if (ret) {
+		dev_err(hr_dev->dev, "TPQ init failed, ret = %d.\n", ret);
+		goto err_tpq_init_failed;
+	}
+
+	return 0;
+
+err_tpq_init_failed:
+	hns_roce_free_link_table(hr_dev, &priv->tsq);
+
+	return ret;
+}
+
+static void hns_roce_v2_exit(struct hns_roce_dev *hr_dev)
+{
+	struct hns_roce_v2_priv *priv = hr_dev->priv;
+
+	hns_roce_free_link_table(hr_dev, &priv->tpq);
+	hns_roce_free_link_table(hr_dev, &priv->tsq);
+}
+
 static int hns_roce_v2_cmd_pending(struct hns_roce_dev *hr_dev)
 {
 	u32 status = readl(hr_dev->reg_base + ROCEE_VF_MB_STATUS_REG);
@@ -1307,13 +1575,45 @@ static int hns_roce_v2_chk_mbox(struct hns_roce_dev *hr_dev,
 	return 0;
 }
 
+static int hns_roce_config_sgid_table(struct hns_roce_dev *hr_dev,
+				      int gid_index, const union ib_gid *gid,
+				      enum hns_roce_sgid_type sgid_type)
+{
+	struct hns_roce_cmq_desc desc;
+	struct hns_roce_cfg_sgid_tb *sgid_tb =
+				    (struct hns_roce_cfg_sgid_tb *)desc.data;
+	u32 *p;
+
+	hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_SGID_TB, false);
+
+	roce_set_field(sgid_tb->table_idx_rsv,
+		       CFG_SGID_TB_TABLE_IDX_M,
+		       CFG_SGID_TB_TABLE_IDX_S, gid_index);
+	roce_set_field(sgid_tb->vf_sgid_type_rsv,
+		       CFG_SGID_TB_VF_SGID_TYPE_M,
+		       CFG_SGID_TB_VF_SGID_TYPE_S, sgid_type);
+
+	p = (u32 *)&gid->raw[0];
+	sgid_tb->vf_sgid_l = cpu_to_le32(*p);
+
+	p = (u32 *)&gid->raw[4];
+	sgid_tb->vf_sgid_ml = cpu_to_le32(*p);
+
+	p = (u32 *)&gid->raw[8];
+	sgid_tb->vf_sgid_mh = cpu_to_le32(*p);
+
+	p = (u32 *)&gid->raw[0xc];
+	sgid_tb->vf_sgid_h = cpu_to_le32(*p);
+
+	return hns_roce_cmq_send(hr_dev, &desc, 1);
+}
+
 static int hns_roce_v2_set_gid(struct hns_roce_dev *hr_dev, u8 port,
-			       int gid_index, union ib_gid *gid,
+			       int gid_index, const union ib_gid *gid,
 			       const struct ib_gid_attr *attr)
 {
 	enum hns_roce_sgid_type sgid_type = GID_TYPE_FLAG_ROCE_V1;
-	u32 *p;
-	u32 val;
+	int ret;
 
 	if (!gid || !attr)
 		return -EINVAL;
@@ -1328,49 +1628,37 @@ static int hns_roce_v2_set_gid(struct hns_roce_dev *hr_dev, u8 port,
 			sgid_type = GID_TYPE_FLAG_ROCE_V2_IPV6;
 	}
 
-	p = (u32 *)&gid->raw[0];
-	roce_raw_write(*p, hr_dev->reg_base + ROCEE_VF_SGID_CFG0_REG +
-		       0x20 * gid_index);
+	ret = hns_roce_config_sgid_table(hr_dev, gid_index, gid, sgid_type);
+	if (ret)
+		dev_err(hr_dev->dev, "Configure sgid table failed(%d)!\n", ret);
 
-	p = (u32 *)&gid->raw[4];
-	roce_raw_write(*p, hr_dev->reg_base + ROCEE_VF_SGID_CFG1_REG +
-		       0x20 * gid_index);
-
-	p = (u32 *)&gid->raw[8];
-	roce_raw_write(*p, hr_dev->reg_base + ROCEE_VF_SGID_CFG2_REG +
-		       0x20 * gid_index);
-
-	p = (u32 *)&gid->raw[0xc];
-	roce_raw_write(*p, hr_dev->reg_base + ROCEE_VF_SGID_CFG3_REG +
-		       0x20 * gid_index);
-
-	val = roce_read(hr_dev, ROCEE_VF_SGID_CFG4_REG + 0x20 * gid_index);
-	roce_set_field(val, ROCEE_VF_SGID_CFG4_SGID_TYPE_M,
-		       ROCEE_VF_SGID_CFG4_SGID_TYPE_S, sgid_type);
-
-	roce_write(hr_dev, ROCEE_VF_SGID_CFG4_REG + 0x20 * gid_index, val);
-
-	return 0;
+	return ret;
 }
 
 static int hns_roce_v2_set_mac(struct hns_roce_dev *hr_dev, u8 phy_port,
 			       u8 *addr)
 {
+	struct hns_roce_cmq_desc desc;
+	struct hns_roce_cfg_smac_tb *smac_tb =
+				    (struct hns_roce_cfg_smac_tb *)desc.data;
 	u16 reg_smac_h;
 	u32 reg_smac_l;
-	u32 val;
+
+	hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_SMAC_TB, false);
 
 	reg_smac_l = *(u32 *)(&addr[0]);
-	roce_raw_write(reg_smac_l, hr_dev->reg_base + ROCEE_VF_SMAC_CFG0_REG +
-		       0x08 * phy_port);
-	val = roce_read(hr_dev, ROCEE_VF_SMAC_CFG1_REG + 0x08 * phy_port);
+	reg_smac_h = *(u16 *)(&addr[4]);
 
-	reg_smac_h  = *(u16 *)(&addr[4]);
-	roce_set_field(val, ROCEE_VF_SMAC_CFG1_VF_SMAC_H_M,
-		       ROCEE_VF_SMAC_CFG1_VF_SMAC_H_S, reg_smac_h);
-	roce_write(hr_dev, ROCEE_VF_SMAC_CFG1_REG + 0x08 * phy_port, val);
+	memset(smac_tb, 0, sizeof(*smac_tb));
+	roce_set_field(smac_tb->tb_idx_rsv,
+		       CFG_SMAC_TB_IDX_M,
+		       CFG_SMAC_TB_IDX_S, phy_port);
+	roce_set_field(smac_tb->vf_smac_h_rsv,
+		       CFG_SMAC_TB_VF_SMAC_H_M,
+		       CFG_SMAC_TB_VF_SMAC_H_S, reg_smac_h);
+	smac_tb->vf_smac_l = reg_smac_l;
 
-	return 0;
+	return hns_roce_cmq_send(hr_dev, &desc, 1);
 }
 
 static int hns_roce_v2_write_mtpt(void *mb_buf, struct hns_roce_mr *mr,
@@ -1758,6 +2046,8 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq,
 	struct hns_roce_v2_cqe *cqe;
 	struct hns_roce_qp *hr_qp;
 	struct hns_roce_wq *wq;
+	struct ib_qp_attr attr;
+	int attr_mask;
 	int is_send;
 	u16 wqe_ctr;
 	u32 opcode;
@@ -1844,8 +2134,17 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq,
 		break;
 	}
 
-	/* CQE status error, directly return */
-	if (wc->status != IB_WC_SUCCESS)
+	/* flush cqe if wc status is error, excluding flush error */
+	if ((wc->status != IB_WC_SUCCESS) &&
+	    (wc->status != IB_WC_WR_FLUSH_ERR)) {
+		attr_mask = IB_QP_STATE;
+		attr.qp_state = IB_QPS_ERR;
+		return hns_roce_v2_modify_qp(&(*cur_qp)->ibqp,
+					     &attr, attr_mask,
+					     (*cur_qp)->state, IB_QPS_ERR);
+	}
+
+	if (wc->status == IB_WC_WR_FLUSH_ERR)
 		return 0;
 
 	if (is_send) {
@@ -1931,7 +2230,8 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq,
 		case HNS_ROCE_V2_OPCODE_RDMA_WRITE_IMM:
 			wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
 			wc->wc_flags = IB_WC_WITH_IMM;
-			wc->ex.imm_data = cqe->immtdata;
+			wc->ex.imm_data =
+				cpu_to_be32(le32_to_cpu(cqe->immtdata));
 			break;
 		case HNS_ROCE_V2_OPCODE_SEND:
 			wc->opcode = IB_WC_RECV;
@@ -1940,7 +2240,8 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq,
 		case HNS_ROCE_V2_OPCODE_SEND_WITH_IMM:
 			wc->opcode = IB_WC_RECV;
 			wc->wc_flags = IB_WC_WITH_IMM;
-			wc->ex.imm_data = cqe->immtdata;
+			wc->ex.imm_data =
+				cpu_to_be32(le32_to_cpu(cqe->immtdata));
 			break;
 		case HNS_ROCE_V2_OPCODE_SEND_WITH_INV:
 			wc->opcode = IB_WC_RECV;
@@ -2273,10 +2574,10 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp,
 		       V2_QPC_BYTE_20_RQ_SHIFT_M, V2_QPC_BYTE_20_RQ_SHIFT_S, 0);
 
 	/* No VLAN need to set 0xFFF */
-	roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_VLAN_IDX_M,
-		       V2_QPC_BYTE_24_VLAN_IDX_S, 0xfff);
-	roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_VLAN_IDX_M,
-		       V2_QPC_BYTE_24_VLAN_IDX_S, 0);
+	roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_VLAN_ID_M,
+		       V2_QPC_BYTE_24_VLAN_ID_S, 0xfff);
+	roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_VLAN_ID_M,
+		       V2_QPC_BYTE_24_VLAN_ID_S, 0);
 
 	/*
 	 * Set some fields in context to zero, Because the default values
@@ -2886,21 +3187,6 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
 	roce_set_field(qpc_mask->byte_56_dqpn_err, V2_QPC_BYTE_56_LP_PKTN_INI_M,
 		       V2_QPC_BYTE_56_LP_PKTN_INI_S, 0);
 
-	roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_HOP_LIMIT_M,
-		       V2_QPC_BYTE_24_HOP_LIMIT_S, grh->hop_limit);
-	roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_HOP_LIMIT_M,
-		       V2_QPC_BYTE_24_HOP_LIMIT_S, 0);
-
-	roce_set_field(context->byte_28_at_fl, V2_QPC_BYTE_28_FL_M,
-		       V2_QPC_BYTE_28_FL_S, grh->flow_label);
-	roce_set_field(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_FL_M,
-		       V2_QPC_BYTE_28_FL_S, 0);
-
-	roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M,
-		       V2_QPC_BYTE_24_TC_S, grh->traffic_class);
-	roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M,
-		       V2_QPC_BYTE_24_TC_S, 0);
-
 	if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_UD)
 		roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_MTU_M,
 			       V2_QPC_BYTE_24_MTU_S, IB_MTU_4096);
@@ -2911,9 +3197,6 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
 	roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_MTU_M,
 		       V2_QPC_BYTE_24_MTU_S, 0);
 
-	memcpy(context->dgid, grh->dgid.raw, sizeof(grh->dgid.raw));
-	memset(qpc_mask->dgid, 0, sizeof(grh->dgid.raw));
-
 	roce_set_field(context->byte_84_rq_ci_pi,
 		       V2_QPC_BYTE_84_RQ_PRODUCER_IDX_M,
 		       V2_QPC_BYTE_84_RQ_PRODUCER_IDX_S, hr_qp->rq.head);
@@ -2952,12 +3235,6 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
 		       V2_QPC_BYTE_168_LP_SGEN_INI_M,
 		       V2_QPC_BYTE_168_LP_SGEN_INI_S, 0);
 
-	roce_set_field(context->byte_28_at_fl, V2_QPC_BYTE_28_SL_M,
-		       V2_QPC_BYTE_28_SL_S, rdma_ah_get_sl(&attr->ah_attr));
-	roce_set_field(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_SL_M,
-		       V2_QPC_BYTE_28_SL_S, 0);
-	hr_qp->sl = rdma_ah_get_sl(&attr->ah_attr);
-
 	return 0;
 }
 
@@ -3135,13 +3412,6 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp,
 			      V2_QPC_BYTE_28_AT_S, 0);
 	}
 
-	roce_set_field(context->byte_28_at_fl, V2_QPC_BYTE_28_SL_M,
-		       V2_QPC_BYTE_28_SL_S,
-		       rdma_ah_get_sl(&attr->ah_attr));
-	roce_set_field(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_SL_M,
-		       V2_QPC_BYTE_28_SL_S, 0);
-	hr_qp->sl = rdma_ah_get_sl(&attr->ah_attr);
-
 	roce_set_field(context->byte_172_sq_psn, V2_QPC_BYTE_172_SQ_CUR_PSN_M,
 		       V2_QPC_BYTE_172_SQ_CUR_PSN_S, attr->sq_psn);
 	roce_set_field(qpc_mask->byte_172_sq_psn, V2_QPC_BYTE_172_SQ_CUR_PSN_M,
@@ -3224,9 +3494,114 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
 		;
 	} else {
 		dev_err(dev, "Illegal state for QP!\n");
+		ret = -EINVAL;
 		goto out;
 	}
 
+	/* When QP state is err, SQ and RQ WQE should be flushed */
+	if (new_state == IB_QPS_ERR) {
+		roce_set_field(context->byte_160_sq_ci_pi,
+			       V2_QPC_BYTE_160_SQ_PRODUCER_IDX_M,
+			       V2_QPC_BYTE_160_SQ_PRODUCER_IDX_S,
+			       hr_qp->sq.head);
+		roce_set_field(qpc_mask->byte_160_sq_ci_pi,
+			       V2_QPC_BYTE_160_SQ_PRODUCER_IDX_M,
+			       V2_QPC_BYTE_160_SQ_PRODUCER_IDX_S, 0);
+		roce_set_field(context->byte_84_rq_ci_pi,
+			       V2_QPC_BYTE_84_RQ_PRODUCER_IDX_M,
+			       V2_QPC_BYTE_84_RQ_PRODUCER_IDX_S,
+			       hr_qp->rq.head);
+		roce_set_field(qpc_mask->byte_84_rq_ci_pi,
+			       V2_QPC_BYTE_84_RQ_PRODUCER_IDX_M,
+			       V2_QPC_BYTE_84_RQ_PRODUCER_IDX_S, 0);
+	}
+
+	if (attr_mask & IB_QP_AV) {
+		const struct ib_global_route *grh =
+					    rdma_ah_read_grh(&attr->ah_attr);
+		const struct ib_gid_attr *gid_attr = NULL;
+		u8 src_mac[ETH_ALEN];
+		int is_roce_protocol;
+		u16 vlan = 0xffff;
+		u8 ib_port;
+		u8 hr_port;
+
+		ib_port = (attr_mask & IB_QP_PORT) ? attr->port_num :
+			   hr_qp->port + 1;
+		hr_port = ib_port - 1;
+		is_roce_protocol = rdma_cap_eth_ah(&hr_dev->ib_dev, ib_port) &&
+			       rdma_ah_get_ah_flags(&attr->ah_attr) & IB_AH_GRH;
+
+		if (is_roce_protocol) {
+			gid_attr = attr->ah_attr.grh.sgid_attr;
+			vlan = rdma_vlan_dev_vlan_id(gid_attr->ndev);
+			memcpy(src_mac, gid_attr->ndev->dev_addr, ETH_ALEN);
+		}
+
+		roce_set_field(context->byte_24_mtu_tc,
+			       V2_QPC_BYTE_24_VLAN_ID_M,
+			       V2_QPC_BYTE_24_VLAN_ID_S, vlan);
+		roce_set_field(qpc_mask->byte_24_mtu_tc,
+			       V2_QPC_BYTE_24_VLAN_ID_M,
+			       V2_QPC_BYTE_24_VLAN_ID_S, 0);
+
+		if (grh->sgid_index >= hr_dev->caps.gid_table_len[hr_port]) {
+			dev_err(hr_dev->dev,
+				"sgid_index(%u) too large. max is %d\n",
+				grh->sgid_index,
+				hr_dev->caps.gid_table_len[hr_port]);
+			ret = -EINVAL;
+			goto out;
+		}
+
+		if (attr->ah_attr.type != RDMA_AH_ATTR_TYPE_ROCE) {
+			dev_err(hr_dev->dev, "ah attr is not RDMA roce type\n");
+			ret = -EINVAL;
+			goto out;
+		}
+
+		roce_set_field(context->byte_52_udpspn_dmac,
+			   V2_QPC_BYTE_52_UDPSPN_M, V2_QPC_BYTE_52_UDPSPN_S,
+			   (gid_attr->gid_type != IB_GID_TYPE_ROCE_UDP_ENCAP) ?
+			   0 : 0x12b7);
+
+		roce_set_field(qpc_mask->byte_52_udpspn_dmac,
+			       V2_QPC_BYTE_52_UDPSPN_M,
+			       V2_QPC_BYTE_52_UDPSPN_S, 0);
+
+		roce_set_field(context->byte_20_smac_sgid_idx,
+			       V2_QPC_BYTE_20_SGID_IDX_M,
+			       V2_QPC_BYTE_20_SGID_IDX_S, grh->sgid_index);
+
+		roce_set_field(qpc_mask->byte_20_smac_sgid_idx,
+			       V2_QPC_BYTE_20_SGID_IDX_M,
+			       V2_QPC_BYTE_20_SGID_IDX_S, 0);
+
+		roce_set_field(context->byte_24_mtu_tc,
+			       V2_QPC_BYTE_24_HOP_LIMIT_M,
+			       V2_QPC_BYTE_24_HOP_LIMIT_S, grh->hop_limit);
+		roce_set_field(qpc_mask->byte_24_mtu_tc,
+			       V2_QPC_BYTE_24_HOP_LIMIT_M,
+			       V2_QPC_BYTE_24_HOP_LIMIT_S, 0);
+
+		roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M,
+			       V2_QPC_BYTE_24_TC_S, grh->traffic_class);
+		roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M,
+			       V2_QPC_BYTE_24_TC_S, 0);
+		roce_set_field(context->byte_28_at_fl, V2_QPC_BYTE_28_FL_M,
+			       V2_QPC_BYTE_28_FL_S, grh->flow_label);
+		roce_set_field(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_FL_M,
+			       V2_QPC_BYTE_28_FL_S, 0);
+		memcpy(context->dgid, grh->dgid.raw, sizeof(grh->dgid.raw));
+		memset(qpc_mask->dgid, 0, sizeof(grh->dgid.raw));
+		roce_set_field(context->byte_28_at_fl, V2_QPC_BYTE_28_SL_M,
+			       V2_QPC_BYTE_28_SL_S,
+			       rdma_ah_get_sl(&attr->ah_attr));
+		roce_set_field(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_SL_M,
+			       V2_QPC_BYTE_28_SL_S, 0);
+		hr_qp->sl = rdma_ah_get_sl(&attr->ah_attr);
+	}
+
 	if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC))
 		set_access_flags(hr_qp, context, qpc_mask, attr, attr_mask);
 
@@ -3497,6 +3872,11 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev,
 	hns_roce_mtt_cleanup(hr_dev, &hr_qp->mtt);
 
 	if (is_user) {
+		if (hr_qp->sq.wqe_cnt && (hr_qp->sdb_en == 1))
+			hns_roce_db_unmap_user(
+				to_hr_ucontext(hr_qp->ibqp.uobject->context),
+				&hr_qp->sdb);
+
 		if (hr_qp->rq.wqe_cnt && (hr_qp->rdb_en == 1))
 			hns_roce_db_unmap_user(
 				to_hr_ucontext(hr_qp->ibqp.uobject->context),
@@ -3579,6 +3959,74 @@ static int hns_roce_v2_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
 	return ret;
 }
 
+static void hns_roce_set_qps_to_err(struct hns_roce_dev *hr_dev, u32 qpn)
+{
+	struct hns_roce_qp *hr_qp;
+	struct ib_qp_attr attr;
+	int attr_mask;
+	int ret;
+
+	hr_qp = __hns_roce_qp_lookup(hr_dev, qpn);
+	if (!hr_qp) {
+		dev_warn(hr_dev->dev, "no hr_qp can be found!\n");
+		return;
+	}
+
+	if (hr_qp->ibqp.uobject) {
+		if (hr_qp->sdb_en == 1) {
+			hr_qp->sq.head = *(int *)(hr_qp->sdb.virt_addr);
+			hr_qp->rq.head = *(int *)(hr_qp->rdb.virt_addr);
+		} else {
+			dev_warn(hr_dev->dev, "flush cqe is unsupported in userspace!\n");
+			return;
+		}
+	}
+
+	attr_mask = IB_QP_STATE;
+	attr.qp_state = IB_QPS_ERR;
+	ret = hns_roce_v2_modify_qp(&hr_qp->ibqp, &attr, attr_mask,
+				    hr_qp->state, IB_QPS_ERR);
+	if (ret)
+		dev_err(hr_dev->dev, "failed to modify qp %d to err state.\n",
+			qpn);
+}
+
+static void hns_roce_irq_work_handle(struct work_struct *work)
+{
+	struct hns_roce_work *irq_work =
+				container_of(work, struct hns_roce_work, work);
+	u32 qpn = irq_work->qpn;
+
+	switch (irq_work->event_type) {
+	case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR:
+	case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR:
+	case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
+		hns_roce_set_qps_to_err(irq_work->hr_dev, qpn);
+		break;
+	default:
+		break;
+	}
+
+	kfree(irq_work);
+}
+
+static void hns_roce_v2_init_irq_work(struct hns_roce_dev *hr_dev,
+				      struct hns_roce_eq *eq, u32 qpn)
+{
+	struct hns_roce_work *irq_work;
+
+	irq_work = kzalloc(sizeof(struct hns_roce_work), GFP_ATOMIC);
+	if (!irq_work)
+		return;
+
+	INIT_WORK(&(irq_work->work), hns_roce_irq_work_handle);
+	irq_work->hr_dev = hr_dev;
+	irq_work->qpn = qpn;
+	irq_work->event_type = eq->event_type;
+	irq_work->sub_type = eq->sub_type;
+	queue_work(hr_dev->irq_workq, &(irq_work->work));
+}
+
 static void set_eq_cons_index_v2(struct hns_roce_eq *eq)
 {
 	u32 doorbell[2];
@@ -3681,14 +4129,9 @@ static void hns_roce_v2_local_wq_access_err_handle(struct hns_roce_dev *hr_dev,
 
 static void hns_roce_v2_qp_err_handle(struct hns_roce_dev *hr_dev,
 				      struct hns_roce_aeqe *aeqe,
-				      int event_type)
+				      int event_type, u32 qpn)
 {
 	struct device *dev = hr_dev->dev;
-	u32 qpn;
-
-	qpn = roce_get_field(aeqe->event.qp_event.qp,
-			     HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M,
-			     HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S);
 
 	switch (event_type) {
 	case HNS_ROCE_EVENT_TYPE_COMM_EST:
@@ -3715,14 +4158,9 @@ static void hns_roce_v2_qp_err_handle(struct hns_roce_dev *hr_dev,
 
 static void hns_roce_v2_cq_err_handle(struct hns_roce_dev *hr_dev,
 				      struct hns_roce_aeqe *aeqe,
-				      int event_type)
+				      int event_type, u32 cqn)
 {
 	struct device *dev = hr_dev->dev;
-	u32 cqn;
-
-	cqn = roce_get_field(aeqe->event.cq_event.cq,
-			     HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M,
-			     HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S);
 
 	switch (event_type) {
 	case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR:
@@ -3787,6 +4225,9 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
 	struct hns_roce_aeqe *aeqe;
 	int aeqe_found = 0;
 	int event_type;
+	int sub_type;
+	u32 qpn;
+	u32 cqn;
 
 	while ((aeqe = next_aeqe_sw_v2(eq))) {
 
@@ -3798,6 +4239,15 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
 		event_type = roce_get_field(aeqe->asyn,
 					    HNS_ROCE_V2_AEQE_EVENT_TYPE_M,
 					    HNS_ROCE_V2_AEQE_EVENT_TYPE_S);
+		sub_type = roce_get_field(aeqe->asyn,
+					  HNS_ROCE_V2_AEQE_SUB_TYPE_M,
+					  HNS_ROCE_V2_AEQE_SUB_TYPE_S);
+		qpn = roce_get_field(aeqe->event.qp_event.qp,
+				     HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M,
+				     HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S);
+		cqn = roce_get_field(aeqe->event.cq_event.cq,
+				     HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M,
+				     HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S);
 
 		switch (event_type) {
 		case HNS_ROCE_EVENT_TYPE_PATH_MIG:
@@ -3811,7 +4261,8 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
 		case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR:
 		case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR:
 		case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
-			hns_roce_v2_qp_err_handle(hr_dev, aeqe, event_type);
+			hns_roce_v2_qp_err_handle(hr_dev, aeqe, event_type,
+						  qpn);
 			break;
 		case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH:
 		case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH:
@@ -3820,7 +4271,8 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
 			break;
 		case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR:
 		case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW:
-			hns_roce_v2_cq_err_handle(hr_dev, aeqe, event_type);
+			hns_roce_v2_cq_err_handle(hr_dev, aeqe, event_type,
+						  cqn);
 			break;
 		case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW:
 			dev_warn(dev, "DB overflow.\n");
@@ -3843,6 +4295,8 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
 			break;
 		};
 
+		eq->event_type = event_type;
+		eq->sub_type = sub_type;
 		++eq->cons_index;
 		aeqe_found = 1;
 
@@ -3850,6 +4304,7 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
 			dev_warn(dev, "cons_index overflow, set back to 0.\n");
 			eq->cons_index = 0;
 		}
+		hns_roce_v2_init_irq_work(hr_dev, eq, qpn);
 	}
 
 	set_eq_cons_index_v2(eq);
@@ -4052,15 +4507,12 @@ static void hns_roce_mhop_free_eq(struct hns_roce_dev *hr_dev,
 	u32 bt_chk_sz;
 	u32 mhop_num;
 	int eqe_alloc;
-	int ba_num;
 	int i = 0;
 	int j = 0;
 
 	mhop_num = hr_dev->caps.eqe_hop_num;
 	buf_chk_sz = 1 << (hr_dev->caps.eqe_buf_pg_sz + PAGE_SHIFT);
 	bt_chk_sz = 1 << (hr_dev->caps.eqe_ba_pg_sz + PAGE_SHIFT);
-	ba_num = (PAGE_ALIGN(eq->entries * eq->eqe_size) + buf_chk_sz - 1) /
-		 buf_chk_sz;
 
 	/* hop_num = 0 */
 	if (mhop_num == HNS_ROCE_HOP_NUM_0) {
@@ -4669,6 +5121,13 @@ static int hns_roce_v2_init_eq_table(struct hns_roce_dev *hr_dev)
 		}
 	}
 
+	hr_dev->irq_workq =
+		create_singlethread_workqueue("hns_roce_irq_workqueue");
+	if (!hr_dev->irq_workq) {
+		dev_err(dev, "Create irq workqueue failed!\n");
+		goto err_request_irq_fail;
+	}
+
 	return 0;
 
 err_request_irq_fail:
@@ -4719,12 +5178,17 @@ static void hns_roce_v2_cleanup_eq_table(struct hns_roce_dev *hr_dev)
 		kfree(hr_dev->irq_names[i]);
 
 	kfree(eq_table->eq);
+
+	flush_workqueue(hr_dev->irq_workq);
+	destroy_workqueue(hr_dev->irq_workq);
 }
 
 static const struct hns_roce_hw hns_roce_hw_v2 = {
 	.cmq_init = hns_roce_v2_cmq_init,
 	.cmq_exit = hns_roce_v2_cmq_exit,
 	.hw_profile = hns_roce_v2_profile,
+	.hw_init = hns_roce_v2_init,
+	.hw_exit = hns_roce_v2_exit,
 	.post_mbox = hns_roce_v2_post_mbox,
 	.chk_mbox = hns_roce_v2_chk_mbox,
 	.set_gid = hns_roce_v2_set_gid,
@@ -4749,6 +5213,8 @@ static const struct hns_roce_hw hns_roce_hw_v2 = {
 static const struct pci_device_id hns_roce_hw_v2_pci_tbl[] = {
 	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_25GE_RDMA), 0},
 	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_25GE_RDMA_MACSEC), 0},
+	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_50GE_RDMA), 0},
+	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_50GE_RDMA_MACSEC), 0},
 	{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_100G_RDMA_MACSEC), 0},
 	/* required last entry */
 	{0, }
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
index d47675f..14aa308 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
@@ -112,6 +112,9 @@
 	(step_idx == 1 && hop_num == 1) || \
 	(step_idx == 2 && hop_num == 2))
 
+#define CMD_CSQ_DESC_NUM		1024
+#define CMD_CRQ_DESC_NUM		1024
+
 enum {
 	NO_ARMED = 0x0,
 	REG_NXT_CEQE = 0x2,
@@ -203,6 +206,10 @@ enum hns_roce_opcode_type {
 	HNS_ROCE_OPC_ALLOC_PF_RES			= 0x8004,
 	HNS_ROCE_OPC_QUERY_PF_RES			= 0x8400,
 	HNS_ROCE_OPC_ALLOC_VF_RES			= 0x8401,
+	HNS_ROCE_OPC_CFG_EXT_LLM			= 0x8403,
+	HNS_ROCE_OPC_CFG_TMOUT_LLM			= 0x8404,
+	HNS_ROCE_OPC_CFG_SGID_TB			= 0x8500,
+	HNS_ROCE_OPC_CFG_SMAC_TB			= 0x8501,
 	HNS_ROCE_OPC_CFG_BT_ATTR			= 0x8506,
 };
 
@@ -447,8 +454,8 @@ struct hns_roce_v2_qp_context {
 #define	V2_QPC_BYTE_24_TC_S 8
 #define V2_QPC_BYTE_24_TC_M GENMASK(15, 8)
 
-#define	V2_QPC_BYTE_24_VLAN_IDX_S 16
-#define V2_QPC_BYTE_24_VLAN_IDX_M GENMASK(27, 16)
+#define	V2_QPC_BYTE_24_VLAN_ID_S 16
+#define V2_QPC_BYTE_24_VLAN_ID_M GENMASK(27, 16)
 
 #define	V2_QPC_BYTE_24_MTU_S 28
 #define V2_QPC_BYTE_24_MTU_M GENMASK(31, 28)
@@ -768,7 +775,7 @@ struct hns_roce_v2_cqe {
 	__le32	byte_4;
 	union {
 		__le32 rkey;
-		__be32 immtdata;
+		__le32 immtdata;
 	};
 	__le32	byte_12;
 	__le32	byte_16;
@@ -926,7 +933,7 @@ struct hns_roce_v2_cq_db {
 struct hns_roce_v2_ud_send_wqe {
 	__le32	byte_4;
 	__le32	msg_len;
-	__be32	immtdata;
+	__le32	immtdata;
 	__le32	byte_16;
 	__le32	byte_20;
 	__le32	byte_24;
@@ -1012,7 +1019,7 @@ struct hns_roce_v2_rc_send_wqe {
 	__le32		msg_len;
 	union {
 		__le32  inv_key;
-		__be32  immtdata;
+		__le32  immtdata;
 	};
 	__le32		byte_16;
 	__le32		byte_20;
@@ -1061,6 +1068,40 @@ struct hns_roce_query_version {
 	__le32 rsv[5];
 };
 
+struct hns_roce_cfg_llm_a {
+	__le32 base_addr_l;
+	__le32 base_addr_h;
+	__le32 depth_pgsz_init_en;
+	__le32 head_ba_l;
+	__le32 head_ba_h_nxtptr;
+	__le32 head_ptr;
+};
+
+#define CFG_LLM_QUE_DEPTH_S 0
+#define CFG_LLM_QUE_DEPTH_M GENMASK(12, 0)
+
+#define CFG_LLM_QUE_PGSZ_S 16
+#define CFG_LLM_QUE_PGSZ_M GENMASK(19, 16)
+
+#define CFG_LLM_INIT_EN_S 20
+#define CFG_LLM_INIT_EN_M GENMASK(20, 20)
+
+#define CFG_LLM_HEAD_PTR_S 0
+#define CFG_LLM_HEAD_PTR_M GENMASK(11, 0)
+
+struct hns_roce_cfg_llm_b {
+	__le32 tail_ba_l;
+	__le32 tail_ba_h;
+	__le32 tail_ptr;
+	__le32 rsv[3];
+};
+
+#define CFG_LLM_TAIL_BA_H_S 0
+#define CFG_LLM_TAIL_BA_H_M GENMASK(19, 0)
+
+#define CFG_LLM_TAIL_PTR_S 0
+#define CFG_LLM_TAIL_PTR_M GENMASK(11, 0)
+
 struct hns_roce_cfg_global_param {
 	__le32 time_cfg_udp_port;
 	__le32 rsv[5];
@@ -1072,7 +1113,7 @@ struct hns_roce_cfg_global_param {
 #define CFG_GLOBAL_PARAM_DATA_0_ROCEE_UDP_PORT_S 16
 #define CFG_GLOBAL_PARAM_DATA_0_ROCEE_UDP_PORT_M GENMASK(31, 16)
 
-struct hns_roce_pf_res {
+struct hns_roce_pf_res_a {
 	__le32	rsv;
 	__le32	qpc_bt_idx_num;
 	__le32	srqc_bt_idx_num;
@@ -1111,6 +1152,32 @@ struct hns_roce_pf_res {
 #define PF_RES_DATA_5_PF_EQC_BT_NUM_S 16
 #define PF_RES_DATA_5_PF_EQC_BT_NUM_M GENMASK(25, 16)
 
+struct hns_roce_pf_res_b {
+	__le32	rsv0;
+	__le32	smac_idx_num;
+	__le32	sgid_idx_num;
+	__le32	qid_idx_sl_num;
+	__le32	rsv[2];
+};
+
+#define PF_RES_DATA_1_PF_SMAC_IDX_S 0
+#define PF_RES_DATA_1_PF_SMAC_IDX_M GENMASK(7, 0)
+
+#define PF_RES_DATA_1_PF_SMAC_NUM_S 8
+#define PF_RES_DATA_1_PF_SMAC_NUM_M GENMASK(16, 8)
+
+#define PF_RES_DATA_2_PF_SGID_IDX_S 0
+#define PF_RES_DATA_2_PF_SGID_IDX_M GENMASK(7, 0)
+
+#define PF_RES_DATA_2_PF_SGID_NUM_S 8
+#define PF_RES_DATA_2_PF_SGID_NUM_M GENMASK(16, 8)
+
+#define PF_RES_DATA_3_PF_QID_IDX_S 0
+#define PF_RES_DATA_3_PF_QID_IDX_M GENMASK(9, 0)
+
+#define PF_RES_DATA_3_PF_SL_NUM_S 16
+#define PF_RES_DATA_3_PF_SL_NUM_M GENMASK(26, 16)
+
 struct hns_roce_vf_res_a {
 	__le32 vf_id;
 	__le32 vf_qpc_bt_idx_num;
@@ -1179,13 +1246,6 @@ struct hns_roce_vf_res_b {
 #define VF_RES_B_DATA_3_VF_SL_NUM_S 16
 #define VF_RES_B_DATA_3_VF_SL_NUM_M GENMASK(19, 16)
 
-/* Reg field definition */
-#define ROCEE_VF_SMAC_CFG1_VF_SMAC_H_S 0
-#define ROCEE_VF_SMAC_CFG1_VF_SMAC_H_M GENMASK(15, 0)
-
-#define ROCEE_VF_SGID_CFG4_SGID_TYPE_S 0
-#define ROCEE_VF_SGID_CFG4_SGID_TYPE_M GENMASK(1, 0)
-
 struct hns_roce_cfg_bt_attr {
 	__le32 vf_qpc_cfg;
 	__le32 vf_srqc_cfg;
@@ -1230,6 +1290,32 @@ struct hns_roce_cfg_bt_attr {
 #define CFG_BT_ATTR_DATA_3_VF_MPT_HOPNUM_S 8
 #define CFG_BT_ATTR_DATA_3_VF_MPT_HOPNUM_M GENMASK(9, 8)
 
+struct hns_roce_cfg_sgid_tb {
+	__le32	table_idx_rsv;
+	__le32	vf_sgid_l;
+	__le32	vf_sgid_ml;
+	__le32	vf_sgid_mh;
+	__le32	vf_sgid_h;
+	__le32	vf_sgid_type_rsv;
+};
+#define CFG_SGID_TB_TABLE_IDX_S 0
+#define CFG_SGID_TB_TABLE_IDX_M GENMASK(7, 0)
+
+#define CFG_SGID_TB_VF_SGID_TYPE_S 0
+#define CFG_SGID_TB_VF_SGID_TYPE_M GENMASK(1, 0)
+
+struct hns_roce_cfg_smac_tb {
+	__le32	tb_idx_rsv;
+	__le32	vf_smac_l;
+	__le32	vf_smac_h_rsv;
+	__le32	rsv[3];
+};
+#define CFG_SMAC_TB_IDX_S 0
+#define CFG_SMAC_TB_IDX_M GENMASK(7, 0)
+
+#define CFG_SMAC_TB_VF_SMAC_H_S 0
+#define CFG_SMAC_TB_VF_SMAC_H_M GENMASK(15, 0)
+
 struct hns_roce_cmq_desc {
 	__le16 opcode;
 	__le16 flag;
@@ -1276,8 +1362,32 @@ struct hns_roce_v2_cmq {
 	u16 last_status;
 };
 
+enum hns_roce_link_table_type {
+	TSQ_LINK_TABLE,
+	TPQ_LINK_TABLE,
+};
+
+struct hns_roce_link_table {
+	struct hns_roce_buf_list table;
+	struct hns_roce_buf_list *pg_list;
+	u32 npages;
+	u32 pg_sz;
+};
+
+struct hns_roce_link_table_entry {
+	u32 blk_ba0;
+	u32 blk_ba1_nxt_ptr;
+};
+#define HNS_ROCE_LINK_TABLE_BA1_S 0
+#define HNS_ROCE_LINK_TABLE_BA1_M GENMASK(19, 0)
+
+#define HNS_ROCE_LINK_TABLE_NXT_PTR_S 20
+#define HNS_ROCE_LINK_TABLE_NXT_PTR_M GENMASK(31, 20)
+
 struct hns_roce_v2_priv {
 	struct hns_roce_v2_cmq cmq;
+	struct hns_roce_link_table tsq;
+	struct hns_roce_link_table tpq;
 };
 
 struct hns_roce_eq_context {
diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
index 21b901c..c5cae9a 100644
--- a/drivers/infiniband/hw/hns/hns_roce_main.c
+++ b/drivers/infiniband/hw/hns/hns_roce_main.c
@@ -74,8 +74,7 @@ static int hns_roce_set_mac(struct hns_roce_dev *hr_dev, u8 port, u8 *addr)
 	return hr_dev->hw->set_mac(hr_dev, phy_port, addr);
 }
 
-static int hns_roce_add_gid(const union ib_gid *gid,
-			    const struct ib_gid_attr *attr, void **context)
+static int hns_roce_add_gid(const struct ib_gid_attr *attr, void **context)
 {
 	struct hns_roce_dev *hr_dev = to_hr_dev(attr->device);
 	u8 port = attr->port_num - 1;
@@ -87,8 +86,7 @@ static int hns_roce_add_gid(const union ib_gid *gid,
 
 	spin_lock_irqsave(&hr_dev->iboe.lock, flags);
 
-	ret = hr_dev->hw->set_gid(hr_dev, port, attr->index,
-				  (union ib_gid *)gid, attr);
+	ret = hr_dev->hw->set_gid(hr_dev, port, attr->index, &attr->gid, attr);
 
 	spin_unlock_irqrestore(&hr_dev->iboe.lock, flags);
 
@@ -208,7 +206,8 @@ static int hns_roce_query_device(struct ib_device *ib_dev,
 	props->max_qp_wr = hr_dev->caps.max_wqes;
 	props->device_cap_flags = IB_DEVICE_PORT_ACTIVE_EVENT |
 				  IB_DEVICE_RC_RNR_NAK_GEN;
-	props->max_sge = max(hr_dev->caps.max_sq_sg, hr_dev->caps.max_rq_sg);
+	props->max_send_sge = hr_dev->caps.max_sq_sg;
+	props->max_recv_sge = hr_dev->caps.max_rq_sg;
 	props->max_sge_rd = 1;
 	props->max_cq = hr_dev->caps.num_cqs;
 	props->max_cqe = hr_dev->caps.max_cqes;
@@ -535,6 +534,9 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev)
 		(1ULL << IB_USER_VERBS_CMD_QUERY_QP) |
 		(1ULL << IB_USER_VERBS_CMD_DESTROY_QP);
 
+	ib_dev->uverbs_ex_cmd_mask |=
+		(1ULL << IB_USER_VERBS_EX_CMD_MODIFY_CQ);
+
 	/* HCA||device||port */
 	ib_dev->modify_device		= hns_roce_modify_device;
 	ib_dev->query_device		= hns_roce_query_device;
@@ -887,8 +889,7 @@ int hns_roce_init(struct hns_roce_dev *hr_dev)
 
 error_failed_cmq_init:
 	if (hr_dev->hw->reset) {
-		ret = hr_dev->hw->reset(hr_dev, false);
-		if (ret)
+		if (hr_dev->hw->reset(hr_dev, false))
 			dev_err(dev, "Dereset RoCE engine failed!\n");
 	}
 
diff --git a/drivers/infiniband/hw/hns/hns_roce_pd.c b/drivers/infiniband/hw/hns/hns_roce_pd.c
index b9f2c87..e11c149 100644
--- a/drivers/infiniband/hw/hns/hns_roce_pd.c
+++ b/drivers/infiniband/hw/hns/hns_roce_pd.c
@@ -37,7 +37,7 @@
 
 static int hns_roce_pd_alloc(struct hns_roce_dev *hr_dev, unsigned long *pdn)
 {
-	return hns_roce_bitmap_alloc(&hr_dev->pd_bitmap, pdn);
+	return hns_roce_bitmap_alloc(&hr_dev->pd_bitmap, pdn) ? -ENOMEM : 0;
 }
 
 static void hns_roce_pd_free(struct hns_roce_dev *hr_dev, unsigned long pdn)
diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c
index baaf906..efb7e96 100644
--- a/drivers/infiniband/hw/hns/hns_roce_qp.c
+++ b/drivers/infiniband/hw/hns/hns_roce_qp.c
@@ -115,7 +115,10 @@ static int hns_roce_reserve_range_qp(struct hns_roce_dev *hr_dev, int cnt,
 {
 	struct hns_roce_qp_table *qp_table = &hr_dev->qp_table;
 
-	return hns_roce_bitmap_alloc_range(&qp_table->bitmap, cnt, align, base);
+	return hns_roce_bitmap_alloc_range(&qp_table->bitmap, cnt, align,
+					   base) ?
+		       -ENOMEM :
+		       0;
 }
 
 enum hns_roce_qp_state to_hns_roce_state(enum ib_qp_state state)
@@ -489,6 +492,14 @@ static int hns_roce_set_kernel_sq_size(struct hns_roce_dev *hr_dev,
 	return 0;
 }
 
+static int hns_roce_qp_has_sq(struct ib_qp_init_attr *attr)
+{
+	if (attr->qp_type == IB_QPT_XRC_TGT)
+		return 0;
+
+	return 1;
+}
+
 static int hns_roce_qp_has_rq(struct ib_qp_init_attr *attr)
 {
 	if (attr->qp_type == IB_QPT_XRC_INI ||
@@ -613,6 +624,23 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
 			goto err_mtt;
 		}
 
+		if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SQ_RECORD_DB) &&
+		    (udata->inlen >= sizeof(ucmd)) &&
+		    (udata->outlen >= sizeof(resp)) &&
+		    hns_roce_qp_has_sq(init_attr)) {
+			ret = hns_roce_db_map_user(
+					to_hr_ucontext(ib_pd->uobject->context),
+					ucmd.sdb_addr, &hr_qp->sdb);
+			if (ret) {
+				dev_err(dev, "sq record doorbell map failed!\n");
+				goto err_mtt;
+			}
+
+			/* indicate kernel supports sq record db */
+			resp.cap_flags |= HNS_ROCE_SUPPORT_SQ_RECORD_DB;
+			hr_qp->sdb_en = 1;
+		}
+
 		if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) &&
 		    (udata->outlen >= sizeof(resp)) &&
 		    hns_roce_qp_has_rq(init_attr)) {
@@ -621,7 +649,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
 					ucmd.db_addr, &hr_qp->rdb);
 			if (ret) {
 				dev_err(dev, "rq record doorbell map failed!\n");
-				goto err_mtt;
+				goto err_sq_dbmap;
 			}
 		}
 	} else {
@@ -734,7 +762,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
 	if (ib_pd->uobject && (udata->outlen >= sizeof(resp)) &&
 		(hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB)) {
 
-		/* indicate kernel supports record db */
+		/* indicate kernel supports rq record db */
 		resp.cap_flags |= HNS_ROCE_SUPPORT_RQ_RECORD_DB;
 		ret = ib_copy_to_udata(udata, &resp, sizeof(resp));
 		if (ret)
@@ -770,6 +798,16 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
 		kfree(hr_qp->rq.wrid);
 	}
 
+err_sq_dbmap:
+	if (ib_pd->uobject)
+		if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SQ_RECORD_DB) &&
+		    (udata->inlen >= sizeof(ucmd)) &&
+		    (udata->outlen >= sizeof(resp)) &&
+		    hns_roce_qp_has_sq(init_attr))
+			hns_roce_db_unmap_user(
+					to_hr_ucontext(ib_pd->uobject->context),
+					&hr_qp->sdb);
+
 err_mtt:
 	hns_roce_mtt_cleanup(hr_dev, &hr_qp->mtt);
 
@@ -903,6 +941,17 @@ int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 	new_state = attr_mask & IB_QP_STATE ?
 		    attr->qp_state : cur_state;
 
+	if (ibqp->uobject &&
+	    (attr_mask & IB_QP_STATE) && new_state == IB_QPS_ERR) {
+		if (hr_qp->sdb_en == 1) {
+			hr_qp->sq.head = *(int *)(hr_qp->sdb.virt_addr);
+			hr_qp->rq.head = *(int *)(hr_qp->rdb.virt_addr);
+		} else {
+			dev_warn(dev, "flush cqe is not supported in userspace!\n");
+			goto out;
+		}
+	}
+
 	if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask,
 				IB_LINK_LAYER_ETHERNET)) {
 		dev_err(dev, "ib_modify_qp_is_ok failed\n");
diff --git a/drivers/infiniband/hw/i40iw/Kconfig b/drivers/infiniband/hw/i40iw/Kconfig
index 2962979..d867ef1 100644
--- a/drivers/infiniband/hw/i40iw/Kconfig
+++ b/drivers/infiniband/hw/i40iw/Kconfig
@@ -1,6 +1,7 @@
 config INFINIBAND_I40IW
 	tristate "Intel(R) Ethernet X722 iWARP Driver"
 	depends on INET && I40E
+	depends on IPV6 || !IPV6
 	depends on PCI
 	select GENERIC_ALLOCATOR
 	---help---
diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c
index 7b26551..423818a 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_cm.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c
@@ -57,6 +57,7 @@
 #include <net/addrconf.h>
 #include <net/ip6_route.h>
 #include <net/ip_fib.h>
+#include <net/secure_seq.h>
 #include <net/tcp.h>
 #include <asm/checksum.h>
 
@@ -2164,7 +2165,6 @@ static struct i40iw_cm_node *i40iw_make_cm_node(
 				   struct i40iw_cm_listener *listener)
 {
 	struct i40iw_cm_node *cm_node;
-	struct timespec ts;
 	int oldarpindex;
 	int arpindex;
 	struct net_device *netdev = iwdev->netdev;
@@ -2214,10 +2214,26 @@ static struct i40iw_cm_node *i40iw_make_cm_node(
 	cm_node->tcp_cntxt.rcv_wscale = I40IW_CM_DEFAULT_RCV_WND_SCALE;
 	cm_node->tcp_cntxt.rcv_wnd =
 			I40IW_CM_DEFAULT_RCV_WND_SCALED >> I40IW_CM_DEFAULT_RCV_WND_SCALE;
-	ts = current_kernel_time();
-	cm_node->tcp_cntxt.loc_seq_num = ts.tv_nsec;
-	cm_node->tcp_cntxt.mss = (cm_node->ipv4) ? (iwdev->vsi.mtu - I40IW_MTU_TO_MSS_IPV4) :
-				 (iwdev->vsi.mtu - I40IW_MTU_TO_MSS_IPV6);
+	if (cm_node->ipv4) {
+		cm_node->tcp_cntxt.loc_seq_num = secure_tcp_seq(htonl(cm_node->loc_addr[0]),
+							htonl(cm_node->rem_addr[0]),
+							htons(cm_node->loc_port),
+							htons(cm_node->rem_port));
+		cm_node->tcp_cntxt.mss = iwdev->vsi.mtu - I40IW_MTU_TO_MSS_IPV4;
+	} else if (IS_ENABLED(CONFIG_IPV6)) {
+		__be32 loc[4] = {
+			htonl(cm_node->loc_addr[0]), htonl(cm_node->loc_addr[1]),
+			htonl(cm_node->loc_addr[2]), htonl(cm_node->loc_addr[3])
+		};
+		__be32 rem[4] = {
+			htonl(cm_node->rem_addr[0]), htonl(cm_node->rem_addr[1]),
+			htonl(cm_node->rem_addr[2]), htonl(cm_node->rem_addr[3])
+		};
+		cm_node->tcp_cntxt.loc_seq_num = secure_tcpv6_seq(loc, rem,
+							htons(cm_node->loc_port),
+							htons(cm_node->rem_port));
+		cm_node->tcp_cntxt.mss = iwdev->vsi.mtu - I40IW_MTU_TO_MSS_IPV6;
+	}
 
 	cm_node->iwdev = iwdev;
 	cm_node->dev = &iwdev->sc_dev;
diff --git a/drivers/infiniband/hw/i40iw/i40iw_hw.c b/drivers/infiniband/hw/i40iw/i40iw_hw.c
index 2836c54..55a1fbf 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_hw.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_hw.c
@@ -435,45 +435,24 @@ void i40iw_process_aeq(struct i40iw_device *iwdev)
 }
 
 /**
- * i40iw_manage_apbvt - add or delete tcp port
+ * i40iw_cqp_manage_abvpt_cmd - send cqp command manage abpvt
  * @iwdev: iwarp device
  * @accel_local_port: port for apbvt
  * @add_port: add or delete port
  */
-int i40iw_manage_apbvt(struct i40iw_device *iwdev, u16 accel_local_port, bool add_port)
+static enum i40iw_status_code
+i40iw_cqp_manage_abvpt_cmd(struct i40iw_device *iwdev,
+			   u16 accel_local_port,
+			   bool add_port)
 {
 	struct i40iw_apbvt_info *info;
 	struct i40iw_cqp_request *cqp_request;
 	struct cqp_commands_info *cqp_info;
-	unsigned long flags;
-	struct i40iw_cm_core *cm_core = &iwdev->cm_core;
-	enum i40iw_status_code status = 0;
-	bool in_use;
-
-	/* apbvt_lock is held across CQP delete APBVT OP (non-waiting) to
-	 * protect against race where add APBVT CQP can race ahead of the delete
-	 * APBVT for same port.
-	 */
-	spin_lock_irqsave(&cm_core->apbvt_lock, flags);
-
-	if (!add_port) {
-		in_use = i40iw_port_in_use(cm_core, accel_local_port);
-		if (in_use)
-			goto exit;
-		clear_bit(accel_local_port, cm_core->ports_in_use);
-	} else {
-		in_use = test_and_set_bit(accel_local_port,
-					  cm_core->ports_in_use);
-		spin_unlock_irqrestore(&cm_core->apbvt_lock, flags);
-		if (in_use)
-			return 0;
-	}
+	enum i40iw_status_code status;
 
 	cqp_request = i40iw_get_cqp_request(&iwdev->cqp, add_port);
-	if (!cqp_request) {
-		status = -ENOMEM;
-		goto exit;
-	}
+	if (!cqp_request)
+		return I40IW_ERR_NO_MEMORY;
 
 	cqp_info = &cqp_request->info;
 	info = &cqp_info->in.u.manage_apbvt_entry.info;
@@ -489,14 +468,54 @@ int i40iw_manage_apbvt(struct i40iw_device *iwdev, u16 accel_local_port, bool ad
 	status = i40iw_handle_cqp_op(iwdev, cqp_request);
 	if (status)
 		i40iw_pr_err("CQP-OP Manage APBVT entry fail");
-exit:
-	if (!add_port)
-		spin_unlock_irqrestore(&cm_core->apbvt_lock, flags);
 
 	return status;
 }
 
 /**
+ * i40iw_manage_apbvt - add or delete tcp port
+ * @iwdev: iwarp device
+ * @accel_local_port: port for apbvt
+ * @add_port: add or delete port
+ */
+enum i40iw_status_code i40iw_manage_apbvt(struct i40iw_device *iwdev,
+					  u16 accel_local_port,
+					  bool add_port)
+{
+	struct i40iw_cm_core *cm_core = &iwdev->cm_core;
+	enum i40iw_status_code status;
+	unsigned long flags;
+	bool in_use;
+
+	/* apbvt_lock is held across CQP delete APBVT OP (non-waiting) to
+	 * protect against race where add APBVT CQP can race ahead of the delete
+	 * APBVT for same port.
+	 */
+	if (add_port) {
+		spin_lock_irqsave(&cm_core->apbvt_lock, flags);
+		in_use = __test_and_set_bit(accel_local_port,
+					    cm_core->ports_in_use);
+		spin_unlock_irqrestore(&cm_core->apbvt_lock, flags);
+		if (in_use)
+			return 0;
+		return i40iw_cqp_manage_abvpt_cmd(iwdev, accel_local_port,
+						  true);
+	} else {
+		spin_lock_irqsave(&cm_core->apbvt_lock, flags);
+		in_use = i40iw_port_in_use(cm_core, accel_local_port);
+		if (in_use) {
+			spin_unlock_irqrestore(&cm_core->apbvt_lock, flags);
+			return 0;
+		}
+		__clear_bit(accel_local_port, cm_core->ports_in_use);
+		status = i40iw_cqp_manage_abvpt_cmd(iwdev, accel_local_port,
+						    false);
+		spin_unlock_irqrestore(&cm_core->apbvt_lock, flags);
+		return status;
+	}
+}
+
+/**
  * i40iw_manage_arp_cache - manage hw arp cache
  * @iwdev: iwarp device
  * @mac_addr: mac address ptr
diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
index 68679ad..e2e6c74 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
@@ -71,7 +71,8 @@ static int i40iw_query_device(struct ib_device *ibdev,
 	props->max_mr_size = I40IW_MAX_OUTBOUND_MESSAGE_SIZE;
 	props->max_qp = iwdev->max_qp - iwdev->used_qps;
 	props->max_qp_wr = I40IW_MAX_QP_WRS;
-	props->max_sge = I40IW_MAX_WQ_FRAGMENT_COUNT;
+	props->max_send_sge = I40IW_MAX_WQ_FRAGMENT_COUNT;
+	props->max_recv_sge = I40IW_MAX_WQ_FRAGMENT_COUNT;
 	props->max_cq = iwdev->max_cq - iwdev->used_cqs;
 	props->max_cqe = iwdev->max_cqe;
 	props->max_mr = iwdev->max_mr - iwdev->used_mrs;
@@ -1409,6 +1410,7 @@ static void i40iw_set_hugetlb_values(u64 addr, struct i40iw_mr *iwmr)
 	struct vm_area_struct *vma;
 	struct hstate *h;
 
+	down_read(&current->mm->mmap_sem);
 	vma = find_vma(current->mm, addr);
 	if (vma && is_vm_hugetlb_page(vma)) {
 		h = hstate_vma(vma);
@@ -1417,6 +1419,7 @@ static void i40iw_set_hugetlb_values(u64 addr, struct i40iw_mr *iwmr)
 			iwmr->page_msk = huge_page_mask(h);
 		}
 	}
+	up_read(&current->mm->mmap_sem);
 }
 
 /**
@@ -2198,8 +2201,8 @@ static void i40iw_copy_sg_list(struct i40iw_sge *sg_list, struct ib_sge *sgl, in
  * @bad_wr: return of bad wr if err
  */
 static int i40iw_post_send(struct ib_qp *ibqp,
-			   struct ib_send_wr *ib_wr,
-			   struct ib_send_wr **bad_wr)
+			   const struct ib_send_wr *ib_wr,
+			   const struct ib_send_wr **bad_wr)
 {
 	struct i40iw_qp *iwqp;
 	struct i40iw_qp_uk *ukqp;
@@ -2374,9 +2377,8 @@ static int i40iw_post_send(struct ib_qp *ibqp,
  * @ib_wr: work request for receive
  * @bad_wr: bad wr caused an error
  */
-static int i40iw_post_recv(struct ib_qp *ibqp,
-			   struct ib_recv_wr *ib_wr,
-			   struct ib_recv_wr **bad_wr)
+static int i40iw_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *ib_wr,
+			   const struct ib_recv_wr **bad_wr)
 {
 	struct i40iw_qp *iwqp;
 	struct i40iw_qp_uk *ukqp;
@@ -2701,21 +2703,6 @@ static int i40iw_query_gid(struct ib_device *ibdev,
 }
 
 /**
- * i40iw_modify_port  Modify port properties
- * @ibdev: device pointer from stack
- * @port: port number
- * @port_modify_mask: mask for port modifications
- * @props: port properties
- */
-static int i40iw_modify_port(struct ib_device *ibdev,
-			     u8 port,
-			     int port_modify_mask,
-			     struct ib_port_modify *props)
-{
-	return -ENOSYS;
-}
-
-/**
  * i40iw_query_pkey - Query partition key
  * @ibdev: device pointer from stack
  * @port: port number
@@ -2732,28 +2719,6 @@ static int i40iw_query_pkey(struct ib_device *ibdev,
 }
 
 /**
- * i40iw_create_ah - create address handle
- * @ibpd: ptr of pd
- * @ah_attr: address handle attributes
- */
-static struct ib_ah *i40iw_create_ah(struct ib_pd *ibpd,
-				     struct rdma_ah_attr *attr,
-				     struct ib_udata *udata)
-
-{
-	return ERR_PTR(-ENOSYS);
-}
-
-/**
- * i40iw_destroy_ah - Destroy address handle
- * @ah: pointer to address handle
- */
-static int i40iw_destroy_ah(struct ib_ah *ah)
-{
-	return -ENOSYS;
-}
-
-/**
  * i40iw_get_vector_affinity - report IRQ affinity mask
  * @ibdev: IB device
  * @comp_vector: completion vector index
@@ -2820,7 +2785,6 @@ static struct i40iw_ib_device *i40iw_init_rdma_device(struct i40iw_device *iwdev
 	iwibdev->ibdev.num_comp_vectors = iwdev->ceqs_count;
 	iwibdev->ibdev.dev.parent = &pcidev->dev;
 	iwibdev->ibdev.query_port = i40iw_query_port;
-	iwibdev->ibdev.modify_port = i40iw_modify_port;
 	iwibdev->ibdev.query_pkey = i40iw_query_pkey;
 	iwibdev->ibdev.query_gid = i40iw_query_gid;
 	iwibdev->ibdev.alloc_ucontext = i40iw_alloc_ucontext;
@@ -2840,8 +2804,6 @@ static struct i40iw_ib_device *i40iw_init_rdma_device(struct i40iw_device *iwdev
 	iwibdev->ibdev.alloc_hw_stats = i40iw_alloc_hw_stats;
 	iwibdev->ibdev.get_hw_stats = i40iw_get_hw_stats;
 	iwibdev->ibdev.query_device = i40iw_query_device;
-	iwibdev->ibdev.create_ah = i40iw_create_ah;
-	iwibdev->ibdev.destroy_ah = i40iw_destroy_ah;
 	iwibdev->ibdev.drain_sq = i40iw_drain_sq;
 	iwibdev->ibdev.drain_rq = i40iw_drain_rq;
 	iwibdev->ibdev.alloc_mr = i40iw_alloc_mr;
diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c
index 9345d5b..e9e3a6f 100644
--- a/drivers/infiniband/hw/mlx4/ah.c
+++ b/drivers/infiniband/hw/mlx4/ah.c
@@ -82,12 +82,11 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd,
 				    struct mlx4_ib_ah *ah)
 {
 	struct mlx4_ib_dev *ibdev = to_mdev(pd->device);
+	const struct ib_gid_attr *gid_attr;
 	struct mlx4_dev *dev = ibdev->dev;
 	int is_mcast = 0;
 	struct in6_addr in6;
 	u16 vlan_tag = 0xffff;
-	union ib_gid sgid;
-	struct ib_gid_attr gid_attr;
 	const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr);
 	int ret;
 
@@ -96,25 +95,30 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd,
 		is_mcast = 1;
 
 	memcpy(ah->av.eth.mac, ah_attr->roce.dmac, ETH_ALEN);
-	ret = ib_get_cached_gid(pd->device, rdma_ah_get_port_num(ah_attr),
-				grh->sgid_index, &sgid, &gid_attr);
-	if (ret)
-		return ERR_PTR(ret);
 	eth_zero_addr(ah->av.eth.s_mac);
-	if (is_vlan_dev(gid_attr.ndev))
-		vlan_tag = vlan_dev_vlan_id(gid_attr.ndev);
-	memcpy(ah->av.eth.s_mac, gid_attr.ndev->dev_addr, ETH_ALEN);
-	dev_put(gid_attr.ndev);
+
+	/*
+	 * If sgid_attr is NULL we are being called by mlx4_ib_create_ah_slave
+	 * and we are directly creating an AV for a slave's gid_index.
+	 */
+	gid_attr = ah_attr->grh.sgid_attr;
+	if (gid_attr) {
+		if (is_vlan_dev(gid_attr->ndev))
+			vlan_tag = vlan_dev_vlan_id(gid_attr->ndev);
+		memcpy(ah->av.eth.s_mac, gid_attr->ndev->dev_addr, ETH_ALEN);
+		ret = mlx4_ib_gid_index_to_real_index(ibdev, gid_attr);
+		if (ret < 0)
+			return ERR_PTR(ret);
+		ah->av.eth.gid_index = ret;
+	} else {
+		/* mlx4_ib_create_ah_slave fills in the s_mac and the vlan */
+		ah->av.eth.gid_index = ah_attr->grh.sgid_index;
+	}
+
 	if (vlan_tag < 0x1000)
 		vlan_tag |= (rdma_ah_get_sl(ah_attr) & 7) << 13;
 	ah->av.eth.port_pd = cpu_to_be32(to_mpd(pd)->pdn |
 					 (rdma_ah_get_port_num(ah_attr) << 24));
-	ret = mlx4_ib_gid_index_to_real_index(ibdev,
-					      rdma_ah_get_port_num(ah_attr),
-					      grh->sgid_index);
-	if (ret < 0)
-		return ERR_PTR(ret);
-	ah->av.eth.gid_index = ret;
 	ah->av.eth.vlan = cpu_to_be16(vlan_tag);
 	ah->av.eth.hop_limit = grh->hop_limit;
 	if (rdma_ah_get_static_rate(ah_attr)) {
@@ -173,6 +177,40 @@ struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr,
 		return create_ib_ah(pd, ah_attr, ah); /* never fails */
 }
 
+/* AH's created via this call must be free'd by mlx4_ib_destroy_ah. */
+struct ib_ah *mlx4_ib_create_ah_slave(struct ib_pd *pd,
+				      struct rdma_ah_attr *ah_attr,
+				      int slave_sgid_index, u8 *s_mac,
+				      u16 vlan_tag)
+{
+	struct rdma_ah_attr slave_attr = *ah_attr;
+	struct mlx4_ib_ah *mah;
+	struct ib_ah *ah;
+
+	slave_attr.grh.sgid_attr = NULL;
+	slave_attr.grh.sgid_index = slave_sgid_index;
+	ah = mlx4_ib_create_ah(pd, &slave_attr, NULL);
+	if (IS_ERR(ah))
+		return ah;
+
+	ah->device = pd->device;
+	ah->pd = pd;
+	ah->type = ah_attr->type;
+	mah = to_mah(ah);
+
+	/* get rid of force-loopback bit */
+	mah->av.ib.port_pd &= cpu_to_be32(0x7FFFFFFF);
+
+	if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE)
+		memcpy(mah->av.eth.s_mac, s_mac, 6);
+
+	if (vlan_tag < 0x1000)
+		vlan_tag |= (rdma_ah_get_sl(ah_attr) & 7) << 13;
+	mah->av.eth.vlan = cpu_to_be16(vlan_tag);
+
+	return ah;
+}
+
 int mlx4_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr)
 {
 	struct mlx4_ib_ah *ah = to_mah(ibah);
diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
index 90a3e2642..e5466d7 100644
--- a/drivers/infiniband/hw/mlx4/mad.c
+++ b/drivers/infiniband/hw/mlx4/mad.c
@@ -506,7 +506,7 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
 {
 	struct ib_sge list;
 	struct ib_ud_wr wr;
-	struct ib_send_wr *bad_wr;
+	const struct ib_send_wr *bad_wr;
 	struct mlx4_ib_demux_pv_ctx *tun_ctx;
 	struct mlx4_ib_demux_pv_qp *tun_qp;
 	struct mlx4_rcv_tunnel_mad *tun_mad;
@@ -1310,7 +1310,8 @@ static int mlx4_ib_post_pv_qp_buf(struct mlx4_ib_demux_pv_ctx *ctx,
 				  int index)
 {
 	struct ib_sge sg_list;
-	struct ib_recv_wr recv_wr, *bad_recv_wr;
+	struct ib_recv_wr recv_wr;
+	const struct ib_recv_wr *bad_recv_wr;
 	int size;
 
 	size = (tun_qp->qp->qp_type == IB_QPT_UD) ?
@@ -1361,19 +1362,16 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
 {
 	struct ib_sge list;
 	struct ib_ud_wr wr;
-	struct ib_send_wr *bad_wr;
+	const struct ib_send_wr *bad_wr;
 	struct mlx4_ib_demux_pv_ctx *sqp_ctx;
 	struct mlx4_ib_demux_pv_qp *sqp;
 	struct mlx4_mad_snd_buf *sqp_mad;
 	struct ib_ah *ah;
 	struct ib_qp *send_qp = NULL;
-	struct ib_global_route *grh;
 	unsigned wire_tx_ix = 0;
 	int ret = 0;
 	u16 wire_pkey_ix;
 	int src_qpnum;
-	u8 sgid_index;
-
 
 	sqp_ctx = dev->sriov.sqps[port-1];
 
@@ -1394,16 +1392,11 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
 	send_qp = sqp->qp;
 
 	/* create ah */
-	grh = rdma_ah_retrieve_grh(attr);
-	sgid_index = grh->sgid_index;
-	grh->sgid_index = 0;
-	ah = rdma_create_ah(sqp_ctx->pd, attr);
+	ah = mlx4_ib_create_ah_slave(sqp_ctx->pd, attr,
+				     rdma_ah_retrieve_grh(attr)->sgid_index,
+				     s_mac, vlan_id);
 	if (IS_ERR(ah))
 		return -ENOMEM;
-	grh->sgid_index = sgid_index;
-	to_mah(ah)->av.ib.gid_index = sgid_index;
-	/* get rid of force-loopback bit */
-	to_mah(ah)->av.ib.port_pd &= cpu_to_be32(0x7FFFFFFF);
 	spin_lock(&sqp->tx_lock);
 	if (sqp->tx_ix_head - sqp->tx_ix_tail >=
 	    (MLX4_NUM_TUNNEL_BUFS - 1))
@@ -1445,12 +1438,6 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
 	wr.wr.num_sge = 1;
 	wr.wr.opcode = IB_WR_SEND;
 	wr.wr.send_flags = IB_SEND_SIGNALED;
-	if (s_mac)
-		memcpy(to_mah(ah)->av.eth.s_mac, s_mac, 6);
-	if (vlan_id < 0x1000)
-		vlan_id |= (rdma_ah_get_sl(attr) & 7) << 13;
-	to_mah(ah)->av.eth.vlan = cpu_to_be16(vlan_id);
-
 
 	ret = ib_post_send(send_qp, &wr.wr, &bad_wr);
 	if (!ret)
@@ -1461,7 +1448,7 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
 	spin_unlock(&sqp->tx_lock);
 	sqp->tx_ring[wire_tx_ix].ah = NULL;
 out:
-	rdma_destroy_ah(ah);
+	mlx4_ib_destroy_ah(ah);
 	return ret;
 }
 
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 4ec519a..ca0f1ee 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -246,9 +246,7 @@ static int mlx4_ib_update_gids(struct gid_entry *gids,
 	return mlx4_ib_update_gids_v1(gids, ibdev, port_num);
 }
 
-static int mlx4_ib_add_gid(const union ib_gid *gid,
-			   const struct ib_gid_attr *attr,
-			   void **context)
+static int mlx4_ib_add_gid(const struct ib_gid_attr *attr, void **context)
 {
 	struct mlx4_ib_dev *ibdev = to_mdev(attr->device);
 	struct mlx4_ib_iboe *iboe = &ibdev->iboe;
@@ -271,8 +269,9 @@ static int mlx4_ib_add_gid(const union ib_gid *gid,
 	port_gid_table = &iboe->gids[attr->port_num - 1];
 	spin_lock_bh(&iboe->lock);
 	for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i) {
-		if (!memcmp(&port_gid_table->gids[i].gid, gid, sizeof(*gid)) &&
-		    (port_gid_table->gids[i].gid_type == attr->gid_type))  {
+		if (!memcmp(&port_gid_table->gids[i].gid,
+			    &attr->gid, sizeof(attr->gid)) &&
+		    port_gid_table->gids[i].gid_type == attr->gid_type)  {
 			found = i;
 			break;
 		}
@@ -289,7 +288,8 @@ static int mlx4_ib_add_gid(const union ib_gid *gid,
 				ret = -ENOMEM;
 			} else {
 				*context = port_gid_table->gids[free].ctx;
-				memcpy(&port_gid_table->gids[free].gid, gid, sizeof(*gid));
+				memcpy(&port_gid_table->gids[free].gid,
+				       &attr->gid, sizeof(attr->gid));
 				port_gid_table->gids[free].gid_type = attr->gid_type;
 				port_gid_table->gids[free].ctx->real_index = free;
 				port_gid_table->gids[free].ctx->refcount = 1;
@@ -380,17 +380,15 @@ static int mlx4_ib_del_gid(const struct ib_gid_attr *attr, void **context)
 }
 
 int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev,
-				    u8 port_num, int index)
+				    const struct ib_gid_attr *attr)
 {
 	struct mlx4_ib_iboe *iboe = &ibdev->iboe;
 	struct gid_cache_context *ctx = NULL;
-	union ib_gid gid;
 	struct mlx4_port_gid_table   *port_gid_table;
 	int real_index = -EINVAL;
 	int i;
-	int ret;
 	unsigned long flags;
-	struct ib_gid_attr attr;
+	u8 port_num = attr->port_num;
 
 	if (port_num > MLX4_MAX_PORTS)
 		return -EINVAL;
@@ -399,21 +397,15 @@ int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev,
 		port_num = 1;
 
 	if (!rdma_cap_roce_gid_table(&ibdev->ib_dev, port_num))
-		return index;
-
-	ret = ib_get_cached_gid(&ibdev->ib_dev, port_num, index, &gid, &attr);
-	if (ret)
-		return ret;
-
-	if (attr.ndev)
-		dev_put(attr.ndev);
+		return attr->index;
 
 	spin_lock_irqsave(&iboe->lock, flags);
 	port_gid_table = &iboe->gids[port_num - 1];
 
 	for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i)
-		if (!memcmp(&port_gid_table->gids[i].gid, &gid, sizeof(gid)) &&
-		    attr.gid_type == port_gid_table->gids[i].gid_type) {
+		if (!memcmp(&port_gid_table->gids[i].gid,
+			    &attr->gid, sizeof(attr->gid)) &&
+		    attr->gid_type == port_gid_table->gids[i].gid_type) {
 			ctx = port_gid_table->gids[i].ctx;
 			break;
 		}
@@ -525,8 +517,8 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
 	props->page_size_cap	   = dev->dev->caps.page_size_cap;
 	props->max_qp		   = dev->dev->quotas.qp;
 	props->max_qp_wr	   = dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE;
-	props->max_sge		   = min(dev->dev->caps.max_sq_sg,
-					 dev->dev->caps.max_rq_sg);
+	props->max_send_sge	   = dev->dev->caps.max_sq_sg;
+	props->max_recv_sge	   = dev->dev->caps.max_rq_sg;
 	props->max_sge_rd	   = MLX4_MAX_SGE_RD;
 	props->max_cq		   = dev->dev->quotas.cq;
 	props->max_cqe		   = dev->dev->caps.max_cqes;
@@ -770,7 +762,8 @@ static int eth_link_query_port(struct ib_device *ibdev, u8 port,
 					   IB_WIDTH_4X : IB_WIDTH_1X;
 	props->active_speed	=  (((u8 *)mailbox->buf)[5] == 0x20 /*56Gb*/) ?
 					   IB_SPEED_FDR : IB_SPEED_QDR;
-	props->port_cap_flags	= IB_PORT_CM_SUP | IB_PORT_IP_BASED_GIDS;
+	props->port_cap_flags	= IB_PORT_CM_SUP;
+	props->ip_gids = true;
 	props->gid_tbl_len	= mdev->dev->caps.gid_table_len[port];
 	props->max_msg_sz	= mdev->dev->caps.max_msg_sz;
 	props->pkey_tbl_len	= 1;
@@ -2709,6 +2702,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
 	ibdev->ib_dev.modify_qp		= mlx4_ib_modify_qp;
 	ibdev->ib_dev.query_qp		= mlx4_ib_query_qp;
 	ibdev->ib_dev.destroy_qp	= mlx4_ib_destroy_qp;
+	ibdev->ib_dev.drain_sq		= mlx4_ib_drain_sq;
+	ibdev->ib_dev.drain_rq		= mlx4_ib_drain_rq;
 	ibdev->ib_dev.post_send		= mlx4_ib_post_send;
 	ibdev->ib_dev.post_recv		= mlx4_ib_post_recv;
 	ibdev->ib_dev.create_cq		= mlx4_ib_create_cq;
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index 7b14299..e10dccc 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -322,7 +322,6 @@ struct mlx4_ib_qp {
 	u32			doorbell_qpn;
 	__be32			sq_signal_bits;
 	unsigned		sq_next_wqe;
-	int			sq_max_wqes_per_wr;
 	int			sq_spare_wqes;
 	struct mlx4_ib_wq	sq;
 
@@ -760,6 +759,10 @@ void mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq);
 
 struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr,
 				struct ib_udata *udata);
+struct ib_ah *mlx4_ib_create_ah_slave(struct ib_pd *pd,
+				      struct rdma_ah_attr *ah_attr,
+				      int slave_sgid_index, u8 *s_mac,
+				      u16 vlan_tag);
 int mlx4_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr);
 int mlx4_ib_destroy_ah(struct ib_ah *ah);
 
@@ -771,21 +774,23 @@ int mlx4_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
 int mlx4_ib_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr);
 int mlx4_ib_destroy_srq(struct ib_srq *srq);
 void mlx4_ib_free_srq_wqe(struct mlx4_ib_srq *srq, int wqe_index);
-int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
-			  struct ib_recv_wr **bad_wr);
+int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
+			  const struct ib_recv_wr **bad_wr);
 
 struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
 				struct ib_qp_init_attr *init_attr,
 				struct ib_udata *udata);
 int mlx4_ib_destroy_qp(struct ib_qp *qp);
+void mlx4_ib_drain_sq(struct ib_qp *qp);
+void mlx4_ib_drain_rq(struct ib_qp *qp);
 int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 		      int attr_mask, struct ib_udata *udata);
 int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask,
 		     struct ib_qp_init_attr *qp_init_attr);
-int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
-		      struct ib_send_wr **bad_wr);
-int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
-		      struct ib_recv_wr **bad_wr);
+int mlx4_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+		      const struct ib_send_wr **bad_wr);
+int mlx4_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+		      const struct ib_recv_wr **bad_wr);
 
 int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int mad_ifc_flags,
 		 int port, const struct ib_wc *in_wc, const struct ib_grh *in_grh,
@@ -900,7 +905,7 @@ int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags,
 			  int mr_access_flags, struct ib_pd *pd,
 			  struct ib_udata *udata);
 int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev,
-				    u8 port_num, int index);
+				    const struct ib_gid_attr *attr);
 
 void mlx4_sched_ib_sl2vl_update_work(struct mlx4_ib_dev *ibdev,
 				     int port);
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 3b8045f..6dd3cd2 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -204,91 +204,26 @@ static void *get_send_wqe(struct mlx4_ib_qp *qp, int n)
 
 /*
  * Stamp a SQ WQE so that it is invalid if prefetched by marking the
- * first four bytes of every 64 byte chunk with
- *     0x7FFFFFF | (invalid_ownership_value << 31).
- *
- * When the max work request size is less than or equal to the WQE
- * basic block size, as an optimization, we can stamp all WQEs with
- * 0xffffffff, and skip the very first chunk of each WQE.
+ * first four bytes of every 64 byte chunk with 0xffffffff, except for
+ * the very first chunk of the WQE.
  */
-static void stamp_send_wqe(struct mlx4_ib_qp *qp, int n, int size)
+static void stamp_send_wqe(struct mlx4_ib_qp *qp, int n)
 {
 	__be32 *wqe;
 	int i;
 	int s;
-	int ind;
 	void *buf;
-	__be32 stamp;
 	struct mlx4_wqe_ctrl_seg *ctrl;
 
-	if (qp->sq_max_wqes_per_wr > 1) {
-		s = roundup(size, 1U << qp->sq.wqe_shift);
-		for (i = 0; i < s; i += 64) {
-			ind = (i >> qp->sq.wqe_shift) + n;
-			stamp = ind & qp->sq.wqe_cnt ? cpu_to_be32(0x7fffffff) :
-						       cpu_to_be32(0xffffffff);
-			buf = get_send_wqe(qp, ind & (qp->sq.wqe_cnt - 1));
-			wqe = buf + (i & ((1 << qp->sq.wqe_shift) - 1));
-			*wqe = stamp;
-		}
-	} else {
-		ctrl = buf = get_send_wqe(qp, n & (qp->sq.wqe_cnt - 1));
-		s = (ctrl->qpn_vlan.fence_size & 0x3f) << 4;
-		for (i = 64; i < s; i += 64) {
-			wqe = buf + i;
-			*wqe = cpu_to_be32(0xffffffff);
-		}
+	buf = get_send_wqe(qp, n & (qp->sq.wqe_cnt - 1));
+	ctrl = (struct mlx4_wqe_ctrl_seg *)buf;
+	s = (ctrl->qpn_vlan.fence_size & 0x3f) << 4;
+	for (i = 64; i < s; i += 64) {
+		wqe = buf + i;
+		*wqe = cpu_to_be32(0xffffffff);
 	}
 }
 
-static void post_nop_wqe(struct mlx4_ib_qp *qp, int n, int size)
-{
-	struct mlx4_wqe_ctrl_seg *ctrl;
-	struct mlx4_wqe_inline_seg *inl;
-	void *wqe;
-	int s;
-
-	ctrl = wqe = get_send_wqe(qp, n & (qp->sq.wqe_cnt - 1));
-	s = sizeof(struct mlx4_wqe_ctrl_seg);
-
-	if (qp->ibqp.qp_type == IB_QPT_UD) {
-		struct mlx4_wqe_datagram_seg *dgram = wqe + sizeof *ctrl;
-		struct mlx4_av *av = (struct mlx4_av *)dgram->av;
-		memset(dgram, 0, sizeof *dgram);
-		av->port_pd = cpu_to_be32((qp->port << 24) | to_mpd(qp->ibqp.pd)->pdn);
-		s += sizeof(struct mlx4_wqe_datagram_seg);
-	}
-
-	/* Pad the remainder of the WQE with an inline data segment. */
-	if (size > s) {
-		inl = wqe + s;
-		inl->byte_count = cpu_to_be32(1 << 31 | (size - s - sizeof *inl));
-	}
-	ctrl->srcrb_flags = 0;
-	ctrl->qpn_vlan.fence_size = size / 16;
-	/*
-	 * Make sure descriptor is fully written before setting ownership bit
-	 * (because HW can start executing as soon as we do).
-	 */
-	wmb();
-
-	ctrl->owner_opcode = cpu_to_be32(MLX4_OPCODE_NOP | MLX4_WQE_CTRL_NEC) |
-		(n & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0);
-
-	stamp_send_wqe(qp, n + qp->sq_spare_wqes, size);
-}
-
-/* Post NOP WQE to prevent wrap-around in the middle of WR */
-static inline unsigned pad_wraparound(struct mlx4_ib_qp *qp, int ind)
-{
-	unsigned s = qp->sq.wqe_cnt - (ind & (qp->sq.wqe_cnt - 1));
-	if (unlikely(s < qp->sq_max_wqes_per_wr)) {
-		post_nop_wqe(qp, ind, s << qp->sq.wqe_shift);
-		ind += s;
-	}
-	return ind;
-}
-
 static void mlx4_ib_qp_event(struct mlx4_qp *qp, enum mlx4_event type)
 {
 	struct ib_event event;
@@ -433,8 +368,7 @@ static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
 }
 
 static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
-			      enum mlx4_ib_qp_type type, struct mlx4_ib_qp *qp,
-			      bool shrink_wqe)
+			      enum mlx4_ib_qp_type type, struct mlx4_ib_qp *qp)
 {
 	int s;
 
@@ -461,70 +395,20 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
 	if (s > dev->dev->caps.max_sq_desc_sz)
 		return -EINVAL;
 
+	qp->sq.wqe_shift = ilog2(roundup_pow_of_two(s));
+
 	/*
-	 * Hermon supports shrinking WQEs, such that a single work
-	 * request can include multiple units of 1 << wqe_shift.  This
-	 * way, work requests can differ in size, and do not have to
-	 * be a power of 2 in size, saving memory and speeding up send
-	 * WR posting.  Unfortunately, if we do this then the
-	 * wqe_index field in CQEs can't be used to look up the WR ID
-	 * anymore, so we do this only if selective signaling is off.
-	 *
-	 * Further, on 32-bit platforms, we can't use vmap() to make
-	 * the QP buffer virtually contiguous.  Thus we have to use
-	 * constant-sized WRs to make sure a WR is always fully within
-	 * a single page-sized chunk.
-	 *
-	 * Finally, we use NOP work requests to pad the end of the
-	 * work queue, to avoid wrap-around in the middle of WR.  We
-	 * set NEC bit to avoid getting completions with error for
-	 * these NOP WRs, but since NEC is only supported starting
-	 * with firmware 2.2.232, we use constant-sized WRs for older
-	 * firmware.
-	 *
-	 * And, since MLX QPs only support SEND, we use constant-sized
-	 * WRs in this case.
-	 *
-	 * We look for the smallest value of wqe_shift such that the
-	 * resulting number of wqes does not exceed device
-	 * capabilities.
-	 *
-	 * We set WQE size to at least 64 bytes, this way stamping
-	 * invalidates each WQE.
+	 * We need to leave 2 KB + 1 WR of headroom in the SQ to
+	 * allow HW to prefetch.
 	 */
-	if (shrink_wqe && dev->dev->caps.fw_ver >= MLX4_FW_VER_WQE_CTRL_NEC &&
-	    qp->sq_signal_bits && BITS_PER_LONG == 64 &&
-	    type != MLX4_IB_QPT_SMI && type != MLX4_IB_QPT_GSI &&
-	    !(type & (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_PROXY_SMI |
-		      MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER)))
-		qp->sq.wqe_shift = ilog2(64);
-	else
-		qp->sq.wqe_shift = ilog2(roundup_pow_of_two(s));
+	qp->sq_spare_wqes = (2048 >> qp->sq.wqe_shift) + 1;
+	qp->sq.wqe_cnt = roundup_pow_of_two(cap->max_send_wr +
+					    qp->sq_spare_wqes);
 
-	for (;;) {
-		qp->sq_max_wqes_per_wr = DIV_ROUND_UP(s, 1U << qp->sq.wqe_shift);
-
-		/*
-		 * We need to leave 2 KB + 1 WR of headroom in the SQ to
-		 * allow HW to prefetch.
-		 */
-		qp->sq_spare_wqes = (2048 >> qp->sq.wqe_shift) + qp->sq_max_wqes_per_wr;
-		qp->sq.wqe_cnt = roundup_pow_of_two(cap->max_send_wr *
-						    qp->sq_max_wqes_per_wr +
-						    qp->sq_spare_wqes);
-
-		if (qp->sq.wqe_cnt <= dev->dev->caps.max_wqes)
-			break;
-
-		if (qp->sq_max_wqes_per_wr <= 1)
-			return -EINVAL;
-
-		++qp->sq.wqe_shift;
-	}
-
-	qp->sq.max_gs = (min(dev->dev->caps.max_sq_desc_sz,
-			     (qp->sq_max_wqes_per_wr << qp->sq.wqe_shift)) -
-			 send_wqe_overhead(type, qp->flags)) /
+	qp->sq.max_gs =
+		(min(dev->dev->caps.max_sq_desc_sz,
+		     (1 << qp->sq.wqe_shift)) -
+		 send_wqe_overhead(type, qp->flags)) /
 		sizeof (struct mlx4_wqe_data_seg);
 
 	qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) +
@@ -538,7 +422,7 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
 	}
 
 	cap->max_send_wr  = qp->sq.max_post =
-		(qp->sq.wqe_cnt - qp->sq_spare_wqes) / qp->sq_max_wqes_per_wr;
+		qp->sq.wqe_cnt - qp->sq_spare_wqes;
 	cap->max_send_sge = min(qp->sq.max_gs,
 				min(dev->dev->caps.max_sq_sg,
 				    dev->dev->caps.max_rq_sg));
@@ -977,7 +861,6 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
 {
 	int qpn;
 	int err;
-	struct ib_qp_cap backup_cap;
 	struct mlx4_ib_sqp *sqp = NULL;
 	struct mlx4_ib_qp *qp;
 	enum mlx4_ib_qp_type qp_type = (enum mlx4_ib_qp_type) init_attr->qp_type;
@@ -1178,9 +1061,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
 				goto err;
 		}
 
-		memcpy(&backup_cap, &init_attr->cap, sizeof(backup_cap));
-		err = set_kernel_sq_size(dev, &init_attr->cap,
-					 qp_type, qp, true);
+		err = set_kernel_sq_size(dev, &init_attr->cap, qp_type, qp);
 		if (err)
 			goto err;
 
@@ -1192,20 +1073,10 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
 			*qp->db.db = 0;
 		}
 
-		if (mlx4_buf_alloc(dev->dev, qp->buf_size, qp->buf_size,
+		if (mlx4_buf_alloc(dev->dev, qp->buf_size,  PAGE_SIZE * 2,
 				   &qp->buf)) {
-			memcpy(&init_attr->cap, &backup_cap,
-			       sizeof(backup_cap));
-			err = set_kernel_sq_size(dev, &init_attr->cap, qp_type,
-						 qp, false);
-			if (err)
-				goto err_db;
-
-			if (mlx4_buf_alloc(dev->dev, qp->buf_size,
-					   PAGE_SIZE * 2, &qp->buf)) {
-				err = -ENOMEM;
-				goto err_db;
-			}
+			err = -ENOMEM;
+			goto err_db;
 		}
 
 		err = mlx4_mtt_init(dev->dev, qp->buf.npages, qp->buf.page_shift,
@@ -1859,8 +1730,7 @@ static int _mlx4_set_path(struct mlx4_ib_dev *dev,
 	if (rdma_ah_get_ah_flags(ah) & IB_AH_GRH) {
 		const struct ib_global_route *grh = rdma_ah_read_grh(ah);
 		int real_sgid_index =
-			mlx4_ib_gid_index_to_real_index(dev, port,
-							grh->sgid_index);
+			mlx4_ib_gid_index_to_real_index(dev, grh->sgid_attr);
 
 		if (real_sgid_index < 0)
 			return real_sgid_index;
@@ -2176,6 +2046,7 @@ static int __mlx4_ib_modify_qp(void *src, enum mlx4_ib_source_type src_type,
 {
 	struct ib_uobject *ibuobject;
 	struct ib_srq  *ibsrq;
+	const struct ib_gid_attr *gid_attr = NULL;
 	struct ib_rwq_ind_table *rwq_ind_tbl;
 	enum ib_qp_type qp_type;
 	struct mlx4_ib_dev *dev;
@@ -2356,29 +2227,17 @@ static int __mlx4_ib_modify_qp(void *src, enum mlx4_ib_source_type src_type,
 	if (attr_mask & IB_QP_AV) {
 		u8 port_num = mlx4_is_bonded(dev->dev) ? 1 :
 			attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
-		union ib_gid gid;
-		struct ib_gid_attr gid_attr = {.gid_type = IB_GID_TYPE_IB};
 		u16 vlan = 0xffff;
 		u8 smac[ETH_ALEN];
-		int status = 0;
 		int is_eth =
 			rdma_cap_eth_ah(&dev->ib_dev, port_num) &&
 			rdma_ah_get_ah_flags(&attr->ah_attr) & IB_AH_GRH;
 
 		if (is_eth) {
-			int index =
-				rdma_ah_read_grh(&attr->ah_attr)->sgid_index;
-
-			status = ib_get_cached_gid(&dev->ib_dev, port_num,
-						   index, &gid, &gid_attr);
-			if (!status) {
-				vlan = rdma_vlan_dev_vlan_id(gid_attr.ndev);
-				memcpy(smac, gid_attr.ndev->dev_addr, ETH_ALEN);
-				dev_put(gid_attr.ndev);
-			}
+			gid_attr = attr->ah_attr.grh.sgid_attr;
+			vlan = rdma_vlan_dev_vlan_id(gid_attr->ndev);
+			memcpy(smac, gid_attr->ndev->dev_addr, ETH_ALEN);
 		}
-		if (status)
-			goto out;
 
 		if (mlx4_set_path(dev, attr, attr_mask, qp, &context->pri_path,
 				  port_num, vlan, smac))
@@ -2389,7 +2248,7 @@ static int __mlx4_ib_modify_qp(void *src, enum mlx4_ib_source_type src_type,
 
 		if (is_eth &&
 		    (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR)) {
-			u8 qpc_roce_mode = gid_type_to_qpc(gid_attr.gid_type);
+			u8 qpc_roce_mode = gid_type_to_qpc(gid_attr->gid_type);
 
 			if (qpc_roce_mode == MLX4_QPC_ROCE_MODE_UNDEFINED) {
 				err = -EINVAL;
@@ -2594,11 +2453,9 @@ static int __mlx4_ib_modify_qp(void *src, enum mlx4_ib_source_type src_type,
 		for (i = 0; i < qp->sq.wqe_cnt; ++i) {
 			ctrl = get_send_wqe(qp, i);
 			ctrl->owner_opcode = cpu_to_be32(1 << 31);
-			if (qp->sq_max_wqes_per_wr == 1)
-				ctrl->qpn_vlan.fence_size =
-						1 << (qp->sq.wqe_shift - 4);
-
-			stamp_send_wqe(qp, i, 1 << qp->sq.wqe_shift);
+			ctrl->qpn_vlan.fence_size =
+				1 << (qp->sq.wqe_shift - 4);
+			stamp_send_wqe(qp, i);
 		}
 	}
 
@@ -2937,7 +2794,7 @@ static int vf_get_qp0_qkey(struct mlx4_dev *dev, int qpn, u32 *qkey)
 }
 
 static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp,
-				  struct ib_ud_wr *wr,
+				  const struct ib_ud_wr *wr,
 				  void *wqe, unsigned *mlx_seg_len)
 {
 	struct mlx4_ib_dev *mdev = to_mdev(sqp->qp.ibqp.device);
@@ -3085,7 +2942,7 @@ static int fill_gid_by_hw_index(struct mlx4_ib_dev *ibdev, u8 port_num,
 }
 
 #define MLX4_ROCEV2_QP1_SPORT 0xC000
-static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
+static int build_mlx_header(struct mlx4_ib_sqp *sqp, const struct ib_ud_wr *wr,
 			    void *wqe, unsigned *mlx_seg_len)
 {
 	struct ib_device *ib_dev = sqp->qp.ibqp.device;
@@ -3181,10 +3038,8 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
 					to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1].
 						       guid_cache[ah->av.ib.gid_index];
 			} else {
-				ib_get_cached_gid(ib_dev,
-						  be32_to_cpu(ah->av.ib.port_pd) >> 24,
-						  ah->av.ib.gid_index,
-						  &sqp->ud_header.grh.source_gid, NULL);
+				sqp->ud_header.grh.source_gid =
+					ah->ibah.sgid_attr->gid;
 			}
 		}
 		memcpy(sqp->ud_header.grh.destination_gid.raw,
@@ -3369,7 +3224,7 @@ static __be32 convert_access(int acc)
 }
 
 static void set_reg_seg(struct mlx4_wqe_fmr_seg *fseg,
-			struct ib_reg_wr *wr)
+			const struct ib_reg_wr *wr)
 {
 	struct mlx4_ib_mr *mr = to_mmr(wr->mr);
 
@@ -3399,7 +3254,7 @@ static __always_inline void set_raddr_seg(struct mlx4_wqe_raddr_seg *rseg,
 }
 
 static void set_atomic_seg(struct mlx4_wqe_atomic_seg *aseg,
-		struct ib_atomic_wr *wr)
+			   const struct ib_atomic_wr *wr)
 {
 	if (wr->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
 		aseg->swap_add = cpu_to_be64(wr->swap);
@@ -3415,7 +3270,7 @@ static void set_atomic_seg(struct mlx4_wqe_atomic_seg *aseg,
 }
 
 static void set_masked_atomic_seg(struct mlx4_wqe_masked_atomic_seg *aseg,
-				  struct ib_atomic_wr *wr)
+				  const struct ib_atomic_wr *wr)
 {
 	aseg->swap_add		= cpu_to_be64(wr->swap);
 	aseg->swap_add_mask	= cpu_to_be64(wr->swap_mask);
@@ -3424,7 +3279,7 @@ static void set_masked_atomic_seg(struct mlx4_wqe_masked_atomic_seg *aseg,
 }
 
 static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg,
-			     struct ib_ud_wr *wr)
+			     const struct ib_ud_wr *wr)
 {
 	memcpy(dseg->av, &to_mah(wr->ah)->av, sizeof (struct mlx4_av));
 	dseg->dqpn = cpu_to_be32(wr->remote_qpn);
@@ -3435,7 +3290,7 @@ static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg,
 
 static void set_tunnel_datagram_seg(struct mlx4_ib_dev *dev,
 				    struct mlx4_wqe_datagram_seg *dseg,
-				    struct ib_ud_wr *wr,
+				    const struct ib_ud_wr *wr,
 				    enum mlx4_ib_qp_type qpt)
 {
 	union mlx4_ext_av *av = &to_mah(wr->ah)->av;
@@ -3457,7 +3312,8 @@ static void set_tunnel_datagram_seg(struct mlx4_ib_dev *dev,
 	dseg->qkey = cpu_to_be32(IB_QP_SET_QKEY);
 }
 
-static void build_tunnel_header(struct ib_ud_wr *wr, void *wqe, unsigned *mlx_seg_len)
+static void build_tunnel_header(const struct ib_ud_wr *wr, void *wqe,
+				unsigned *mlx_seg_len)
 {
 	struct mlx4_wqe_inline_seg *inl = wqe;
 	struct mlx4_ib_tunnel_header hdr;
@@ -3540,9 +3396,9 @@ static void __set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ib_sge *sg)
 	dseg->addr       = cpu_to_be64(sg->addr);
 }
 
-static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_ud_wr *wr,
-			 struct mlx4_ib_qp *qp, unsigned *lso_seg_len,
-			 __be32 *lso_hdr_sz, __be32 *blh)
+static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe,
+			 const struct ib_ud_wr *wr, struct mlx4_ib_qp *qp,
+			 unsigned *lso_seg_len, __be32 *lso_hdr_sz, __be32 *blh)
 {
 	unsigned halign = ALIGN(sizeof *wqe + wr->hlen, 16);
 
@@ -3560,7 +3416,7 @@ static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_ud_wr *wr,
 	return 0;
 }
 
-static __be32 send_ieth(struct ib_send_wr *wr)
+static __be32 send_ieth(const struct ib_send_wr *wr)
 {
 	switch (wr->opcode) {
 	case IB_WR_SEND_WITH_IMM:
@@ -3582,8 +3438,8 @@ static void add_zero_len_inline(void *wqe)
 	inl->byte_count = cpu_to_be32(1 << 31);
 }
 
-int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
-		      struct ib_send_wr **bad_wr)
+static int _mlx4_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+			      const struct ib_send_wr **bad_wr, bool drain)
 {
 	struct mlx4_ib_qp *qp = to_mqp(ibqp);
 	void *wqe;
@@ -3593,7 +3449,6 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 	int nreq;
 	int err = 0;
 	unsigned ind;
-	int uninitialized_var(stamp);
 	int uninitialized_var(size);
 	unsigned uninitialized_var(seglen);
 	__be32 dummy;
@@ -3623,7 +3478,8 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 	}
 
 	spin_lock_irqsave(&qp->sq.lock, flags);
-	if (mdev->dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) {
+	if (mdev->dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR &&
+	    !drain) {
 		err = -EIO;
 		*bad_wr = wr;
 		nreq = 0;
@@ -3865,22 +3721,14 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 		ctrl->owner_opcode = mlx4_ib_opcode[wr->opcode] |
 			(ind & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0) | blh;
 
-		stamp = ind + qp->sq_spare_wqes;
-		ind += DIV_ROUND_UP(size * 16, 1U << qp->sq.wqe_shift);
-
 		/*
 		 * We can improve latency by not stamping the last
 		 * send queue WQE until after ringing the doorbell, so
 		 * only stamp here if there are still more WQEs to post.
-		 *
-		 * Same optimization applies to padding with NOP wqe
-		 * in case of WQE shrinking (used to prevent wrap-around
-		 * in the middle of WR).
 		 */
-		if (wr->next) {
-			stamp_send_wqe(qp, stamp, size * 16);
-			ind = pad_wraparound(qp, ind);
-		}
+		if (wr->next)
+			stamp_send_wqe(qp, ind + qp->sq_spare_wqes);
+		ind++;
 	}
 
 out:
@@ -3902,9 +3750,8 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 		 */
 		mmiowb();
 
-		stamp_send_wqe(qp, stamp, size * 16);
+		stamp_send_wqe(qp, ind + qp->sq_spare_wqes - 1);
 
-		ind = pad_wraparound(qp, ind);
 		qp->sq_next_wqe = ind;
 	}
 
@@ -3913,8 +3760,14 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 	return err;
 }
 
-int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
-		      struct ib_recv_wr **bad_wr)
+int mlx4_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+		      const struct ib_send_wr **bad_wr)
+{
+	return _mlx4_ib_post_send(ibqp, wr, bad_wr, false);
+}
+
+static int _mlx4_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+			      const struct ib_recv_wr **bad_wr, bool drain)
 {
 	struct mlx4_ib_qp *qp = to_mqp(ibqp);
 	struct mlx4_wqe_data_seg *scat;
@@ -3929,7 +3782,8 @@ int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
 	max_gs = qp->rq.max_gs;
 	spin_lock_irqsave(&qp->rq.lock, flags);
 
-	if (mdev->dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) {
+	if (mdev->dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR &&
+	    !drain) {
 		err = -EIO;
 		*bad_wr = wr;
 		nreq = 0;
@@ -4000,6 +3854,12 @@ int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
 	return err;
 }
 
+int mlx4_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+		      const struct ib_recv_wr **bad_wr)
+{
+	return _mlx4_ib_post_recv(ibqp, wr, bad_wr, false);
+}
+
 static inline enum ib_qp_state to_ib_qp_state(enum mlx4_qp_state mlx4_state)
 {
 	switch (mlx4_state) {
@@ -4047,9 +3907,9 @@ static void to_rdma_ah_attr(struct mlx4_ib_dev *ibdev,
 	u8 port_num = path->sched_queue & 0x40 ? 2 : 1;
 
 	memset(ah_attr, 0, sizeof(*ah_attr));
-	ah_attr->type = rdma_ah_find_type(&ibdev->ib_dev, port_num);
 	if (port_num == 0 || port_num > dev->caps.num_ports)
 		return;
+	ah_attr->type = rdma_ah_find_type(&ibdev->ib_dev, port_num);
 
 	if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE)
 		rdma_ah_set_sl(ah_attr, ((path->sched_queue >> 3) & 0x7) |
@@ -4465,3 +4325,132 @@ int mlx4_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_tbl)
 	kfree(ib_rwq_ind_tbl);
 	return 0;
 }
+
+struct mlx4_ib_drain_cqe {
+	struct ib_cqe cqe;
+	struct completion done;
+};
+
+static void mlx4_ib_drain_qp_done(struct ib_cq *cq, struct ib_wc *wc)
+{
+	struct mlx4_ib_drain_cqe *cqe = container_of(wc->wr_cqe,
+						     struct mlx4_ib_drain_cqe,
+						     cqe);
+
+	complete(&cqe->done);
+}
+
+/* This function returns only once the drained WR was completed */
+static void handle_drain_completion(struct ib_cq *cq,
+				    struct mlx4_ib_drain_cqe *sdrain,
+				    struct mlx4_ib_dev *dev)
+{
+	struct mlx4_dev *mdev = dev->dev;
+
+	if (cq->poll_ctx == IB_POLL_DIRECT) {
+		while (wait_for_completion_timeout(&sdrain->done, HZ / 10) <= 0)
+			ib_process_cq_direct(cq, -1);
+		return;
+	}
+
+	if (mdev->persist->state == MLX4_DEVICE_STATE_INTERNAL_ERROR) {
+		struct mlx4_ib_cq *mcq = to_mcq(cq);
+		bool triggered = false;
+		unsigned long flags;
+
+		spin_lock_irqsave(&dev->reset_flow_resource_lock, flags);
+		/* Make sure that the CQ handler won't run if wasn't run yet */
+		if (!mcq->mcq.reset_notify_added)
+			mcq->mcq.reset_notify_added = 1;
+		else
+			triggered = true;
+		spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags);
+
+		if (triggered) {
+			/* Wait for any scheduled/running task to be ended */
+			switch (cq->poll_ctx) {
+			case IB_POLL_SOFTIRQ:
+				irq_poll_disable(&cq->iop);
+				irq_poll_enable(&cq->iop);
+				break;
+			case IB_POLL_WORKQUEUE:
+				cancel_work_sync(&cq->work);
+				break;
+			default:
+				WARN_ON_ONCE(1);
+			}
+		}
+
+		/* Run the CQ handler - this makes sure that the drain WR will
+		 * be processed if wasn't processed yet.
+		 */
+		mcq->mcq.comp(&mcq->mcq);
+	}
+
+	wait_for_completion(&sdrain->done);
+}
+
+void mlx4_ib_drain_sq(struct ib_qp *qp)
+{
+	struct ib_cq *cq = qp->send_cq;
+	struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
+	struct mlx4_ib_drain_cqe sdrain;
+	const struct ib_send_wr *bad_swr;
+	struct ib_rdma_wr swr = {
+		.wr = {
+			.next = NULL,
+			{ .wr_cqe	= &sdrain.cqe, },
+			.opcode	= IB_WR_RDMA_WRITE,
+		},
+	};
+	int ret;
+	struct mlx4_ib_dev *dev = to_mdev(qp->device);
+	struct mlx4_dev *mdev = dev->dev;
+
+	ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
+	if (ret && mdev->persist->state != MLX4_DEVICE_STATE_INTERNAL_ERROR) {
+		WARN_ONCE(ret, "failed to drain send queue: %d\n", ret);
+		return;
+	}
+
+	sdrain.cqe.done = mlx4_ib_drain_qp_done;
+	init_completion(&sdrain.done);
+
+	ret = _mlx4_ib_post_send(qp, &swr.wr, &bad_swr, true);
+	if (ret) {
+		WARN_ONCE(ret, "failed to drain send queue: %d\n", ret);
+		return;
+	}
+
+	handle_drain_completion(cq, &sdrain, dev);
+}
+
+void mlx4_ib_drain_rq(struct ib_qp *qp)
+{
+	struct ib_cq *cq = qp->recv_cq;
+	struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
+	struct mlx4_ib_drain_cqe rdrain;
+	struct ib_recv_wr rwr = {};
+	const struct ib_recv_wr *bad_rwr;
+	int ret;
+	struct mlx4_ib_dev *dev = to_mdev(qp->device);
+	struct mlx4_dev *mdev = dev->dev;
+
+	ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
+	if (ret && mdev->persist->state != MLX4_DEVICE_STATE_INTERNAL_ERROR) {
+		WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret);
+		return;
+	}
+
+	rwr.wr_cqe = &rdrain.cqe;
+	rdrain.cqe.done = mlx4_ib_drain_qp_done;
+	init_completion(&rdrain.done);
+
+	ret = _mlx4_ib_post_recv(qp, &rwr, &bad_rwr, true);
+	if (ret) {
+		WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret);
+		return;
+	}
+
+	handle_drain_completion(cq, &rdrain, dev);
+}
diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c
index ebee56c..3731b31 100644
--- a/drivers/infiniband/hw/mlx4/srq.c
+++ b/drivers/infiniband/hw/mlx4/srq.c
@@ -307,8 +307,8 @@ void mlx4_ib_free_srq_wqe(struct mlx4_ib_srq *srq, int wqe_index)
 	spin_unlock(&srq->lock);
 }
 
-int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
-			  struct ib_recv_wr **bad_wr)
+int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
+			  const struct ib_recv_wr **bad_wr)
 {
 	struct mlx4_ib_srq *srq = to_msrq(ibsrq);
 	struct mlx4_wqe_srq_next_seg *next;
diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile
index d42b922..b8e4b15 100644
--- a/drivers/infiniband/hw/mlx5/Makefile
+++ b/drivers/infiniband/hw/mlx5/Makefile
@@ -3,3 +3,5 @@
 mlx5_ib-y :=	main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o gsi.o ib_virt.o cmd.o cong.o
 mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o
 mlx5_ib-$(CONFIG_MLX5_ESWITCH) += ib_rep.o
+mlx5_ib-$(CONFIG_INFINIBAND_USER_ACCESS) += devx.o
+mlx5_ib-$(CONFIG_INFINIBAND_USER_ACCESS) += flow.o
diff --git a/drivers/infiniband/hw/mlx5/ah.c b/drivers/infiniband/hw/mlx5/ah.c
index e6bde32a..ffd03bf 100644
--- a/drivers/infiniband/hw/mlx5/ah.c
+++ b/drivers/infiniband/hw/mlx5/ah.c
@@ -37,7 +37,6 @@ static struct ib_ah *create_ib_ah(struct mlx5_ib_dev *dev,
 				  struct rdma_ah_attr *ah_attr)
 {
 	enum ib_gid_type gid_type;
-	int err;
 
 	if (rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) {
 		const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr);
@@ -53,18 +52,12 @@ static struct ib_ah *create_ib_ah(struct mlx5_ib_dev *dev,
 	ah->av.stat_rate_sl = (rdma_ah_get_static_rate(ah_attr) << 4);
 
 	if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) {
-		err = mlx5_get_roce_gid_type(dev, ah_attr->port_num,
-					     ah_attr->grh.sgid_index,
-					     &gid_type);
-		if (err)
-			return ERR_PTR(err);
+		gid_type = ah_attr->grh.sgid_attr->gid_type;
 
 		memcpy(ah->av.rmac, ah_attr->roce.dmac,
 		       sizeof(ah_attr->roce.dmac));
 		ah->av.udp_sport =
-		mlx5_get_roce_udp_sport(dev,
-					rdma_ah_get_port_num(ah_attr),
-					rdma_ah_read_grh(ah_attr)->sgid_index);
+			mlx5_get_roce_udp_sport(dev, ah_attr->grh.sgid_attr);
 		ah->av.stat_rate_sl |= (rdma_ah_get_sl(ah_attr) & 0x7) << 1;
 		if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
 #define MLX5_ECN_ENABLED BIT(1)
diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c
index ccc0b5d..c84fef9 100644
--- a/drivers/infiniband/hw/mlx5/cmd.c
+++ b/drivers/infiniband/hw/mlx5/cmd.c
@@ -185,3 +185,15 @@ int mlx5_cmd_dealloc_memic(struct mlx5_memic *memic, u64 addr, u64 length)
 
 	return err;
 }
+
+int mlx5_cmd_query_ext_ppcnt_counters(struct mlx5_core_dev *dev, void *out)
+{
+	u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {};
+	int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
+
+	MLX5_SET(ppcnt_reg, in, local_port, 1);
+
+	MLX5_SET(ppcnt_reg, in, grp, MLX5_ETHERNET_EXTENDED_COUNTERS_GROUP);
+	return  mlx5_core_access_reg(dev, in, sz, out, sz, MLX5_REG_PPCNT,
+				     0, 0);
+}
diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h
index 98ea464..88cbb1c 100644
--- a/drivers/infiniband/hw/mlx5/cmd.h
+++ b/drivers/infiniband/hw/mlx5/cmd.h
@@ -41,6 +41,7 @@ int mlx5_cmd_dump_fill_mkey(struct mlx5_core_dev *dev, u32 *mkey);
 int mlx5_cmd_null_mkey(struct mlx5_core_dev *dev, u32 *null_mkey);
 int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point,
 			       void *out, int out_size);
+int mlx5_cmd_query_ext_ppcnt_counters(struct mlx5_core_dev *dev, void *out);
 int mlx5_cmd_modify_cong_params(struct mlx5_core_dev *mdev,
 				void *in, int in_size);
 int mlx5_cmd_alloc_memic(struct mlx5_memic *memic, phys_addr_t *addr,
diff --git a/drivers/infiniband/hw/mlx5/cong.c b/drivers/infiniband/hw/mlx5/cong.c
index 985fa26..7e4e358 100644
--- a/drivers/infiniband/hw/mlx5/cong.c
+++ b/drivers/infiniband/hw/mlx5/cong.c
@@ -359,9 +359,6 @@ static ssize_t get_param(struct file *filp, char __user *buf, size_t count,
 	int ret;
 	char lbuf[11];
 
-	if (*pos)
-		return 0;
-
 	ret = mlx5_ib_get_cc_params(param->dev, param->port_num, offset, &var);
 	if (ret)
 		return ret;
@@ -370,11 +367,7 @@ static ssize_t get_param(struct file *filp, char __user *buf, size_t count,
 	if (ret < 0)
 		return ret;
 
-	if (copy_to_user(buf, lbuf, ret))
-		return -EFAULT;
-
-	*pos += ret;
-	return ret;
+	return simple_read_from_buffer(buf, count, pos, lbuf, ret);
 }
 
 static const struct file_operations dbg_cc_fops = {
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index ad39d64..088205d 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -1184,7 +1184,7 @@ int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
 	int err;
 
 	if (!MLX5_CAP_GEN(dev->mdev, cq_moderation))
-		return -ENOSYS;
+		return -EOPNOTSUPP;
 
 	if (cq_period > MLX5_MAX_CQ_PERIOD)
 		return -EINVAL;
diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
new file mode 100644
index 0000000..ac116d6
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -0,0 +1,1119 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2018, Mellanox Technologies inc.  All rights reserved.
+ */
+
+#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/uverbs_types.h>
+#include <rdma/uverbs_ioctl.h>
+#include <rdma/mlx5_user_ioctl_cmds.h>
+#include <rdma/ib_umem.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/fs.h>
+#include "mlx5_ib.h"
+
+#define UVERBS_MODULE_NAME mlx5_ib
+#include <rdma/uverbs_named_ioctl.h>
+
+#define MLX5_MAX_DESTROY_INBOX_SIZE_DW MLX5_ST_SZ_DW(delete_fte_in)
+struct devx_obj {
+	struct mlx5_core_dev	*mdev;
+	u32			obj_id;
+	u32			dinlen; /* destroy inbox length */
+	u32			dinbox[MLX5_MAX_DESTROY_INBOX_SIZE_DW];
+};
+
+struct devx_umem {
+	struct mlx5_core_dev		*mdev;
+	struct ib_umem			*umem;
+	u32				page_offset;
+	int				page_shift;
+	int				ncont;
+	u32				dinlen;
+	u32				dinbox[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)];
+};
+
+struct devx_umem_reg_cmd {
+	void				*in;
+	u32				inlen;
+	u32				out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
+};
+
+static struct mlx5_ib_ucontext *devx_ufile2uctx(struct ib_uverbs_file *file)
+{
+	return to_mucontext(ib_uverbs_get_ucontext(file));
+}
+
+int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, struct mlx5_ib_ucontext *context)
+{
+	u32 in[MLX5_ST_SZ_DW(create_uctx_in)] = {0};
+	u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0};
+	u64 general_obj_types;
+	void *hdr;
+	int err;
+
+	hdr = MLX5_ADDR_OF(create_uctx_in, in, hdr);
+
+	general_obj_types = MLX5_CAP_GEN_64(dev->mdev, general_obj_types);
+	if (!(general_obj_types & MLX5_GENERAL_OBJ_TYPES_CAP_UCTX) ||
+	    !(general_obj_types & MLX5_GENERAL_OBJ_TYPES_CAP_UMEM))
+		return -EINVAL;
+
+	if (!capable(CAP_NET_RAW))
+		return -EPERM;
+
+	MLX5_SET(general_obj_in_cmd_hdr, hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
+	MLX5_SET(general_obj_in_cmd_hdr, hdr, obj_type, MLX5_OBJ_TYPE_UCTX);
+
+	err = mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
+	if (err)
+		return err;
+
+	context->devx_uid = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
+	return 0;
+}
+
+void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev,
+			  struct mlx5_ib_ucontext *context)
+{
+	u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {0};
+	u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0};
+
+	MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
+	MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_OBJ_TYPE_UCTX);
+	MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, context->devx_uid);
+
+	mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
+}
+
+bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type)
+{
+	struct devx_obj *devx_obj = obj;
+	u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, opcode);
+
+	switch (opcode) {
+	case MLX5_CMD_OP_DESTROY_TIR:
+		*dest_type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+		*dest_id = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox,
+				    obj_id);
+		return true;
+
+	case MLX5_CMD_OP_DESTROY_FLOW_TABLE:
+		*dest_type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+		*dest_id = MLX5_GET(destroy_flow_table_in, devx_obj->dinbox,
+				    table_id);
+		return true;
+	default:
+		return false;
+	}
+}
+
+static int devx_is_valid_obj_id(struct devx_obj *obj, const void *in)
+{
+	u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
+	u32 obj_id;
+
+	switch (opcode) {
+	case MLX5_CMD_OP_MODIFY_GENERAL_OBJECT:
+	case MLX5_CMD_OP_QUERY_GENERAL_OBJECT:
+		obj_id = MLX5_GET(general_obj_in_cmd_hdr, in, obj_id);
+		break;
+	case MLX5_CMD_OP_QUERY_MKEY:
+		obj_id = MLX5_GET(query_mkey_in, in, mkey_index);
+		break;
+	case MLX5_CMD_OP_QUERY_CQ:
+		obj_id = MLX5_GET(query_cq_in, in, cqn);
+		break;
+	case MLX5_CMD_OP_MODIFY_CQ:
+		obj_id = MLX5_GET(modify_cq_in, in, cqn);
+		break;
+	case MLX5_CMD_OP_QUERY_SQ:
+		obj_id = MLX5_GET(query_sq_in, in, sqn);
+		break;
+	case MLX5_CMD_OP_MODIFY_SQ:
+		obj_id = MLX5_GET(modify_sq_in, in, sqn);
+		break;
+	case MLX5_CMD_OP_QUERY_RQ:
+		obj_id = MLX5_GET(query_rq_in, in, rqn);
+		break;
+	case MLX5_CMD_OP_MODIFY_RQ:
+		obj_id = MLX5_GET(modify_rq_in, in, rqn);
+		break;
+	case MLX5_CMD_OP_QUERY_RMP:
+		obj_id = MLX5_GET(query_rmp_in, in, rmpn);
+		break;
+	case MLX5_CMD_OP_MODIFY_RMP:
+		obj_id = MLX5_GET(modify_rmp_in, in, rmpn);
+		break;
+	case MLX5_CMD_OP_QUERY_RQT:
+		obj_id = MLX5_GET(query_rqt_in, in, rqtn);
+		break;
+	case MLX5_CMD_OP_MODIFY_RQT:
+		obj_id = MLX5_GET(modify_rqt_in, in, rqtn);
+		break;
+	case MLX5_CMD_OP_QUERY_TIR:
+		obj_id = MLX5_GET(query_tir_in, in, tirn);
+		break;
+	case MLX5_CMD_OP_MODIFY_TIR:
+		obj_id = MLX5_GET(modify_tir_in, in, tirn);
+		break;
+	case MLX5_CMD_OP_QUERY_TIS:
+		obj_id = MLX5_GET(query_tis_in, in, tisn);
+		break;
+	case MLX5_CMD_OP_MODIFY_TIS:
+		obj_id = MLX5_GET(modify_tis_in, in, tisn);
+		break;
+	case MLX5_CMD_OP_QUERY_FLOW_TABLE:
+		obj_id = MLX5_GET(query_flow_table_in, in, table_id);
+		break;
+	case MLX5_CMD_OP_MODIFY_FLOW_TABLE:
+		obj_id = MLX5_GET(modify_flow_table_in, in, table_id);
+		break;
+	case MLX5_CMD_OP_QUERY_FLOW_GROUP:
+		obj_id = MLX5_GET(query_flow_group_in, in, group_id);
+		break;
+	case MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY:
+		obj_id = MLX5_GET(query_fte_in, in, flow_index);
+		break;
+	case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
+		obj_id = MLX5_GET(set_fte_in, in, flow_index);
+		break;
+	case MLX5_CMD_OP_QUERY_Q_COUNTER:
+		obj_id = MLX5_GET(query_q_counter_in, in, counter_set_id);
+		break;
+	case MLX5_CMD_OP_QUERY_FLOW_COUNTER:
+		obj_id = MLX5_GET(query_flow_counter_in, in, flow_counter_id);
+		break;
+	case MLX5_CMD_OP_QUERY_MODIFY_HEADER_CONTEXT:
+		obj_id = MLX5_GET(general_obj_in_cmd_hdr, in, obj_id);
+		break;
+	case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT:
+		obj_id = MLX5_GET(query_scheduling_element_in, in,
+				  scheduling_element_id);
+		break;
+	case MLX5_CMD_OP_MODIFY_SCHEDULING_ELEMENT:
+		obj_id = MLX5_GET(modify_scheduling_element_in, in,
+				  scheduling_element_id);
+		break;
+	case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT:
+		obj_id = MLX5_GET(add_vxlan_udp_dport_in, in, vxlan_udp_port);
+		break;
+	case MLX5_CMD_OP_QUERY_L2_TABLE_ENTRY:
+		obj_id = MLX5_GET(query_l2_table_entry_in, in, table_index);
+		break;
+	case MLX5_CMD_OP_SET_L2_TABLE_ENTRY:
+		obj_id = MLX5_GET(set_l2_table_entry_in, in, table_index);
+		break;
+	case MLX5_CMD_OP_QUERY_QP:
+		obj_id = MLX5_GET(query_qp_in, in, qpn);
+		break;
+	case MLX5_CMD_OP_RST2INIT_QP:
+		obj_id = MLX5_GET(rst2init_qp_in, in, qpn);
+		break;
+	case MLX5_CMD_OP_INIT2RTR_QP:
+		obj_id = MLX5_GET(init2rtr_qp_in, in, qpn);
+		break;
+	case MLX5_CMD_OP_RTR2RTS_QP:
+		obj_id = MLX5_GET(rtr2rts_qp_in, in, qpn);
+		break;
+	case MLX5_CMD_OP_RTS2RTS_QP:
+		obj_id = MLX5_GET(rts2rts_qp_in, in, qpn);
+		break;
+	case MLX5_CMD_OP_SQERR2RTS_QP:
+		obj_id = MLX5_GET(sqerr2rts_qp_in, in, qpn);
+		break;
+	case MLX5_CMD_OP_2ERR_QP:
+		obj_id = MLX5_GET(qp_2err_in, in, qpn);
+		break;
+	case MLX5_CMD_OP_2RST_QP:
+		obj_id = MLX5_GET(qp_2rst_in, in, qpn);
+		break;
+	case MLX5_CMD_OP_QUERY_DCT:
+		obj_id = MLX5_GET(query_dct_in, in, dctn);
+		break;
+	case MLX5_CMD_OP_QUERY_XRQ:
+		obj_id = MLX5_GET(query_xrq_in, in, xrqn);
+		break;
+	case MLX5_CMD_OP_QUERY_XRC_SRQ:
+		obj_id = MLX5_GET(query_xrc_srq_in, in, xrc_srqn);
+		break;
+	case MLX5_CMD_OP_ARM_XRC_SRQ:
+		obj_id = MLX5_GET(arm_xrc_srq_in, in, xrc_srqn);
+		break;
+	case MLX5_CMD_OP_QUERY_SRQ:
+		obj_id = MLX5_GET(query_srq_in, in, srqn);
+		break;
+	case MLX5_CMD_OP_ARM_RQ:
+		obj_id = MLX5_GET(arm_rq_in, in, srq_number);
+		break;
+	case MLX5_CMD_OP_DRAIN_DCT:
+	case MLX5_CMD_OP_ARM_DCT_FOR_KEY_VIOLATION:
+		obj_id = MLX5_GET(drain_dct_in, in, dctn);
+		break;
+	case MLX5_CMD_OP_ARM_XRQ:
+		obj_id = MLX5_GET(arm_xrq_in, in, xrqn);
+		break;
+	default:
+		return false;
+	}
+
+	if (obj_id == obj->obj_id)
+		return true;
+
+	return false;
+}
+
+static bool devx_is_obj_create_cmd(const void *in)
+{
+	u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
+
+	switch (opcode) {
+	case MLX5_CMD_OP_CREATE_GENERAL_OBJECT:
+	case MLX5_CMD_OP_CREATE_MKEY:
+	case MLX5_CMD_OP_CREATE_CQ:
+	case MLX5_CMD_OP_ALLOC_PD:
+	case MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN:
+	case MLX5_CMD_OP_CREATE_RMP:
+	case MLX5_CMD_OP_CREATE_SQ:
+	case MLX5_CMD_OP_CREATE_RQ:
+	case MLX5_CMD_OP_CREATE_RQT:
+	case MLX5_CMD_OP_CREATE_TIR:
+	case MLX5_CMD_OP_CREATE_TIS:
+	case MLX5_CMD_OP_ALLOC_Q_COUNTER:
+	case MLX5_CMD_OP_CREATE_FLOW_TABLE:
+	case MLX5_CMD_OP_CREATE_FLOW_GROUP:
+	case MLX5_CMD_OP_ALLOC_FLOW_COUNTER:
+	case MLX5_CMD_OP_ALLOC_ENCAP_HEADER:
+	case MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT:
+	case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT:
+	case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT:
+	case MLX5_CMD_OP_SET_L2_TABLE_ENTRY:
+	case MLX5_CMD_OP_CREATE_QP:
+	case MLX5_CMD_OP_CREATE_SRQ:
+	case MLX5_CMD_OP_CREATE_XRC_SRQ:
+	case MLX5_CMD_OP_CREATE_DCT:
+	case MLX5_CMD_OP_CREATE_XRQ:
+	case MLX5_CMD_OP_ATTACH_TO_MCG:
+	case MLX5_CMD_OP_ALLOC_XRCD:
+		return true;
+	case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
+	{
+		u16 op_mod = MLX5_GET(set_fte_in, in, op_mod);
+		if (op_mod == 0)
+			return true;
+		return false;
+	}
+	default:
+		return false;
+	}
+}
+
+static bool devx_is_obj_modify_cmd(const void *in)
+{
+	u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
+
+	switch (opcode) {
+	case MLX5_CMD_OP_MODIFY_GENERAL_OBJECT:
+	case MLX5_CMD_OP_MODIFY_CQ:
+	case MLX5_CMD_OP_MODIFY_RMP:
+	case MLX5_CMD_OP_MODIFY_SQ:
+	case MLX5_CMD_OP_MODIFY_RQ:
+	case MLX5_CMD_OP_MODIFY_RQT:
+	case MLX5_CMD_OP_MODIFY_TIR:
+	case MLX5_CMD_OP_MODIFY_TIS:
+	case MLX5_CMD_OP_MODIFY_FLOW_TABLE:
+	case MLX5_CMD_OP_MODIFY_SCHEDULING_ELEMENT:
+	case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT:
+	case MLX5_CMD_OP_SET_L2_TABLE_ENTRY:
+	case MLX5_CMD_OP_RST2INIT_QP:
+	case MLX5_CMD_OP_INIT2RTR_QP:
+	case MLX5_CMD_OP_RTR2RTS_QP:
+	case MLX5_CMD_OP_RTS2RTS_QP:
+	case MLX5_CMD_OP_SQERR2RTS_QP:
+	case MLX5_CMD_OP_2ERR_QP:
+	case MLX5_CMD_OP_2RST_QP:
+	case MLX5_CMD_OP_ARM_XRC_SRQ:
+	case MLX5_CMD_OP_ARM_RQ:
+	case MLX5_CMD_OP_DRAIN_DCT:
+	case MLX5_CMD_OP_ARM_DCT_FOR_KEY_VIOLATION:
+	case MLX5_CMD_OP_ARM_XRQ:
+		return true;
+	case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
+	{
+		u16 op_mod = MLX5_GET(set_fte_in, in, op_mod);
+
+		if (op_mod == 1)
+			return true;
+		return false;
+	}
+	default:
+		return false;
+	}
+}
+
+static bool devx_is_obj_query_cmd(const void *in)
+{
+	u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
+
+	switch (opcode) {
+	case MLX5_CMD_OP_QUERY_GENERAL_OBJECT:
+	case MLX5_CMD_OP_QUERY_MKEY:
+	case MLX5_CMD_OP_QUERY_CQ:
+	case MLX5_CMD_OP_QUERY_RMP:
+	case MLX5_CMD_OP_QUERY_SQ:
+	case MLX5_CMD_OP_QUERY_RQ:
+	case MLX5_CMD_OP_QUERY_RQT:
+	case MLX5_CMD_OP_QUERY_TIR:
+	case MLX5_CMD_OP_QUERY_TIS:
+	case MLX5_CMD_OP_QUERY_Q_COUNTER:
+	case MLX5_CMD_OP_QUERY_FLOW_TABLE:
+	case MLX5_CMD_OP_QUERY_FLOW_GROUP:
+	case MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY:
+	case MLX5_CMD_OP_QUERY_FLOW_COUNTER:
+	case MLX5_CMD_OP_QUERY_MODIFY_HEADER_CONTEXT:
+	case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT:
+	case MLX5_CMD_OP_QUERY_L2_TABLE_ENTRY:
+	case MLX5_CMD_OP_QUERY_QP:
+	case MLX5_CMD_OP_QUERY_SRQ:
+	case MLX5_CMD_OP_QUERY_XRC_SRQ:
+	case MLX5_CMD_OP_QUERY_DCT:
+	case MLX5_CMD_OP_QUERY_XRQ:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static bool devx_is_general_cmd(void *in)
+{
+	u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
+
+	switch (opcode) {
+	case MLX5_CMD_OP_QUERY_HCA_CAP:
+	case MLX5_CMD_OP_QUERY_VPORT_STATE:
+	case MLX5_CMD_OP_QUERY_ADAPTER:
+	case MLX5_CMD_OP_QUERY_ISSI:
+	case MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT:
+	case MLX5_CMD_OP_QUERY_ROCE_ADDRESS:
+	case MLX5_CMD_OP_QUERY_VNIC_ENV:
+	case MLX5_CMD_OP_QUERY_VPORT_COUNTER:
+	case MLX5_CMD_OP_GET_DROPPED_PACKET_LOG:
+	case MLX5_CMD_OP_NOP:
+	case MLX5_CMD_OP_QUERY_CONG_STATUS:
+	case MLX5_CMD_OP_QUERY_CONG_PARAMS:
+	case MLX5_CMD_OP_QUERY_CONG_STATISTICS:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_EQN)(
+	struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
+{
+	struct mlx5_ib_ucontext *c;
+	struct mlx5_ib_dev *dev;
+	int user_vector;
+	int dev_eqn;
+	unsigned int irqn;
+	int err;
+
+	if (uverbs_copy_from(&user_vector, attrs,
+			     MLX5_IB_ATTR_DEVX_QUERY_EQN_USER_VEC))
+		return -EFAULT;
+
+	c = devx_ufile2uctx(file);
+	if (IS_ERR(c))
+		return PTR_ERR(c);
+	dev = to_mdev(c->ibucontext.device);
+
+	err = mlx5_vector2eqn(dev->mdev, user_vector, &dev_eqn, &irqn);
+	if (err < 0)
+		return err;
+
+	if (uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_QUERY_EQN_DEV_EQN,
+			   &dev_eqn, sizeof(dev_eqn)))
+		return -EFAULT;
+
+	return 0;
+}
+
+/*
+ *Security note:
+ * The hardware protection mechanism works like this: Each device object that
+ * is subject to UAR doorbells (QP/SQ/CQ) gets a UAR ID (called uar_page in
+ * the device specification manual) upon its creation. Then upon doorbell,
+ * hardware fetches the object context for which the doorbell was rang, and
+ * validates that the UAR through which the DB was rang matches the UAR ID
+ * of the object.
+ * If no match the doorbell is silently ignored by the hardware. Of course,
+ * the user cannot ring a doorbell on a UAR that was not mapped to it.
+ * Now in devx, as the devx kernel does not manipulate the QP/SQ/CQ command
+ * mailboxes (except tagging them with UID), we expose to the user its UAR
+ * ID, so it can embed it in these objects in the expected specification
+ * format. So the only thing the user can do is hurt itself by creating a
+ * QP/SQ/CQ with a UAR ID other than his, and then in this case other users
+ * may ring a doorbell on its objects.
+ * The consequence of that will be that another user can schedule a QP/SQ
+ * of the buggy user for execution (just insert it to the hardware schedule
+ * queue or arm its CQ for event generation), no further harm is expected.
+ */
+static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_UAR)(
+	struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
+{
+	struct mlx5_ib_ucontext *c;
+	struct mlx5_ib_dev *dev;
+	u32 user_idx;
+	s32 dev_idx;
+
+	c = devx_ufile2uctx(file);
+	if (IS_ERR(c))
+		return PTR_ERR(c);
+	dev = to_mdev(c->ibucontext.device);
+
+	if (uverbs_copy_from(&user_idx, attrs,
+			     MLX5_IB_ATTR_DEVX_QUERY_UAR_USER_IDX))
+		return -EFAULT;
+
+	dev_idx = bfregn_to_uar_index(dev, &c->bfregi, user_idx, true);
+	if (dev_idx < 0)
+		return dev_idx;
+
+	if (uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_QUERY_UAR_DEV_IDX,
+			   &dev_idx, sizeof(dev_idx)))
+		return -EFAULT;
+
+	return 0;
+}
+
+static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OTHER)(
+	struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
+{
+	struct mlx5_ib_ucontext *c;
+	struct mlx5_ib_dev *dev;
+	void *cmd_in = uverbs_attr_get_alloced_ptr(
+		attrs, MLX5_IB_ATTR_DEVX_OTHER_CMD_IN);
+	int cmd_out_len = uverbs_attr_get_len(attrs,
+					MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT);
+	void *cmd_out;
+	int err;
+
+	c = devx_ufile2uctx(file);
+	if (IS_ERR(c))
+		return PTR_ERR(c);
+	dev = to_mdev(c->ibucontext.device);
+
+	if (!c->devx_uid)
+		return -EPERM;
+
+	/* Only white list of some general HCA commands are allowed for this method. */
+	if (!devx_is_general_cmd(cmd_in))
+		return -EINVAL;
+
+	cmd_out = uverbs_zalloc(attrs, cmd_out_len);
+	if (IS_ERR(cmd_out))
+		return PTR_ERR(cmd_out);
+
+	MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, c->devx_uid);
+	err = mlx5_cmd_exec(dev->mdev, cmd_in,
+			    uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OTHER_CMD_IN),
+			    cmd_out, cmd_out_len);
+	if (err)
+		return err;
+
+	return uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT, cmd_out,
+			      cmd_out_len);
+}
+
+static void devx_obj_build_destroy_cmd(void *in, void *out, void *din,
+				       u32 *dinlen,
+				       u32 *obj_id)
+{
+	u16 obj_type = MLX5_GET(general_obj_in_cmd_hdr, in, obj_type);
+	u16 uid = MLX5_GET(general_obj_in_cmd_hdr, in, uid);
+
+	*obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
+	*dinlen = MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr);
+
+	MLX5_SET(general_obj_in_cmd_hdr, din, obj_id, *obj_id);
+	MLX5_SET(general_obj_in_cmd_hdr, din, uid, uid);
+
+	switch (MLX5_GET(general_obj_in_cmd_hdr, in, opcode)) {
+	case MLX5_CMD_OP_CREATE_GENERAL_OBJECT:
+		MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
+		MLX5_SET(general_obj_in_cmd_hdr, din, obj_type, obj_type);
+		break;
+
+	case MLX5_CMD_OP_CREATE_MKEY:
+		MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_MKEY);
+		break;
+	case MLX5_CMD_OP_CREATE_CQ:
+		MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_CQ);
+		break;
+	case MLX5_CMD_OP_ALLOC_PD:
+		MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DEALLOC_PD);
+		break;
+	case MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN:
+		MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+			 MLX5_CMD_OP_DEALLOC_TRANSPORT_DOMAIN);
+		break;
+	case MLX5_CMD_OP_CREATE_RMP:
+		MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_RMP);
+		break;
+	case MLX5_CMD_OP_CREATE_SQ:
+		MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_SQ);
+		break;
+	case MLX5_CMD_OP_CREATE_RQ:
+		MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_RQ);
+		break;
+	case MLX5_CMD_OP_CREATE_RQT:
+		MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_RQT);
+		break;
+	case MLX5_CMD_OP_CREATE_TIR:
+		MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_TIR);
+		break;
+	case MLX5_CMD_OP_CREATE_TIS:
+		MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_TIS);
+		break;
+	case MLX5_CMD_OP_ALLOC_Q_COUNTER:
+		MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+			 MLX5_CMD_OP_DEALLOC_Q_COUNTER);
+		break;
+	case MLX5_CMD_OP_CREATE_FLOW_TABLE:
+		*dinlen = MLX5_ST_SZ_BYTES(destroy_flow_table_in);
+		*obj_id = MLX5_GET(create_flow_table_out, out, table_id);
+		MLX5_SET(destroy_flow_table_in, din, other_vport,
+			 MLX5_GET(create_flow_table_in,  in, other_vport));
+		MLX5_SET(destroy_flow_table_in, din, vport_number,
+			 MLX5_GET(create_flow_table_in,  in, vport_number));
+		MLX5_SET(destroy_flow_table_in, din, table_type,
+			 MLX5_GET(create_flow_table_in,  in, table_type));
+		MLX5_SET(destroy_flow_table_in, din, table_id, *obj_id);
+		MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+			 MLX5_CMD_OP_DESTROY_FLOW_TABLE);
+		break;
+	case MLX5_CMD_OP_CREATE_FLOW_GROUP:
+		*dinlen = MLX5_ST_SZ_BYTES(destroy_flow_group_in);
+		*obj_id = MLX5_GET(create_flow_group_out, out, group_id);
+		MLX5_SET(destroy_flow_group_in, din, other_vport,
+			 MLX5_GET(create_flow_group_in, in, other_vport));
+		MLX5_SET(destroy_flow_group_in, din, vport_number,
+			 MLX5_GET(create_flow_group_in, in, vport_number));
+		MLX5_SET(destroy_flow_group_in, din, table_type,
+			 MLX5_GET(create_flow_group_in, in, table_type));
+		MLX5_SET(destroy_flow_group_in, din, table_id,
+			 MLX5_GET(create_flow_group_in, in, table_id));
+		MLX5_SET(destroy_flow_group_in, din, group_id, *obj_id);
+		MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+			 MLX5_CMD_OP_DESTROY_FLOW_GROUP);
+		break;
+	case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
+		*dinlen = MLX5_ST_SZ_BYTES(delete_fte_in);
+		*obj_id = MLX5_GET(set_fte_in, in, flow_index);
+		MLX5_SET(delete_fte_in, din, other_vport,
+			 MLX5_GET(set_fte_in,  in, other_vport));
+		MLX5_SET(delete_fte_in, din, vport_number,
+			 MLX5_GET(set_fte_in, in, vport_number));
+		MLX5_SET(delete_fte_in, din, table_type,
+			 MLX5_GET(set_fte_in, in, table_type));
+		MLX5_SET(delete_fte_in, din, table_id,
+			 MLX5_GET(set_fte_in, in, table_id));
+		MLX5_SET(delete_fte_in, din, flow_index, *obj_id);
+		MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+			 MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY);
+		break;
+	case MLX5_CMD_OP_ALLOC_FLOW_COUNTER:
+		MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+			 MLX5_CMD_OP_DEALLOC_FLOW_COUNTER);
+		break;
+	case MLX5_CMD_OP_ALLOC_ENCAP_HEADER:
+		MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+			 MLX5_CMD_OP_DEALLOC_ENCAP_HEADER);
+		break;
+	case MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT:
+		MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+			 MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT);
+		break;
+	case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT:
+		*dinlen = MLX5_ST_SZ_BYTES(destroy_scheduling_element_in);
+		*obj_id = MLX5_GET(create_scheduling_element_out, out,
+				   scheduling_element_id);
+		MLX5_SET(destroy_scheduling_element_in, din,
+			 scheduling_hierarchy,
+			 MLX5_GET(create_scheduling_element_in, in,
+				  scheduling_hierarchy));
+		MLX5_SET(destroy_scheduling_element_in, din,
+			 scheduling_element_id, *obj_id);
+		MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+			 MLX5_CMD_OP_DESTROY_SCHEDULING_ELEMENT);
+		break;
+	case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT:
+		*dinlen = MLX5_ST_SZ_BYTES(delete_vxlan_udp_dport_in);
+		*obj_id = MLX5_GET(add_vxlan_udp_dport_in, in, vxlan_udp_port);
+		MLX5_SET(delete_vxlan_udp_dport_in, din, vxlan_udp_port, *obj_id);
+		MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+			 MLX5_CMD_OP_DELETE_VXLAN_UDP_DPORT);
+		break;
+	case MLX5_CMD_OP_SET_L2_TABLE_ENTRY:
+		*dinlen = MLX5_ST_SZ_BYTES(delete_l2_table_entry_in);
+		*obj_id = MLX5_GET(set_l2_table_entry_in, in, table_index);
+		MLX5_SET(delete_l2_table_entry_in, din, table_index, *obj_id);
+		MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+			 MLX5_CMD_OP_DELETE_L2_TABLE_ENTRY);
+		break;
+	case MLX5_CMD_OP_CREATE_QP:
+		MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_QP);
+		break;
+	case MLX5_CMD_OP_CREATE_SRQ:
+		MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_SRQ);
+		break;
+	case MLX5_CMD_OP_CREATE_XRC_SRQ:
+		MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+			 MLX5_CMD_OP_DESTROY_XRC_SRQ);
+		break;
+	case MLX5_CMD_OP_CREATE_DCT:
+		MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_DCT);
+		break;
+	case MLX5_CMD_OP_CREATE_XRQ:
+		MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_XRQ);
+		break;
+	case MLX5_CMD_OP_ATTACH_TO_MCG:
+		*dinlen = MLX5_ST_SZ_BYTES(detach_from_mcg_in);
+		MLX5_SET(detach_from_mcg_in, din, qpn,
+			 MLX5_GET(attach_to_mcg_in, in, qpn));
+		memcpy(MLX5_ADDR_OF(detach_from_mcg_in, din, multicast_gid),
+		       MLX5_ADDR_OF(attach_to_mcg_in, in, multicast_gid),
+		       MLX5_FLD_SZ_BYTES(attach_to_mcg_in, multicast_gid));
+		MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DETACH_FROM_MCG);
+		break;
+	case MLX5_CMD_OP_ALLOC_XRCD:
+		MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DEALLOC_XRCD);
+		break;
+	default:
+		/* The entry must match to one of the devx_is_obj_create_cmd */
+		WARN_ON(true);
+		break;
+	}
+}
+
+static int devx_obj_cleanup(struct ib_uobject *uobject,
+			    enum rdma_remove_reason why)
+{
+	u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
+	struct devx_obj *obj = uobject->object;
+	int ret;
+
+	ret = mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, out, sizeof(out));
+	if (ib_is_destroy_retryable(ret, why, uobject))
+		return ret;
+
+	kfree(obj);
+	return ret;
+}
+
+static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)(
+	struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
+{
+	void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN);
+	int cmd_out_len =  uverbs_attr_get_len(attrs,
+					MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT);
+	void *cmd_out;
+	struct ib_uobject *uobj = uverbs_attr_get_uobject(
+		attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_HANDLE);
+	struct mlx5_ib_ucontext *c = to_mucontext(uobj->context);
+	struct mlx5_ib_dev *dev = to_mdev(c->ibucontext.device);
+	struct devx_obj *obj;
+	int err;
+
+	if (!c->devx_uid)
+		return -EPERM;
+
+	if (!devx_is_obj_create_cmd(cmd_in))
+		return -EINVAL;
+
+	cmd_out = uverbs_zalloc(attrs, cmd_out_len);
+	if (IS_ERR(cmd_out))
+		return PTR_ERR(cmd_out);
+
+	obj = kzalloc(sizeof(struct devx_obj), GFP_KERNEL);
+	if (!obj)
+		return -ENOMEM;
+
+	MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, c->devx_uid);
+	err = mlx5_cmd_exec(dev->mdev, cmd_in,
+			    uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN),
+			    cmd_out, cmd_out_len);
+	if (err)
+		goto obj_free;
+
+	uobj->object = obj;
+	obj->mdev = dev->mdev;
+	devx_obj_build_destroy_cmd(cmd_in, cmd_out, obj->dinbox, &obj->dinlen, &obj->obj_id);
+	WARN_ON(obj->dinlen > MLX5_MAX_DESTROY_INBOX_SIZE_DW * sizeof(u32));
+
+	err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT, cmd_out, cmd_out_len);
+	if (err)
+		goto obj_free;
+
+	return 0;
+
+obj_free:
+	kfree(obj);
+	return err;
+}
+
+static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)(
+	struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
+{
+	void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN);
+	int cmd_out_len = uverbs_attr_get_len(attrs,
+					MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT);
+	struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs,
+							  MLX5_IB_ATTR_DEVX_OBJ_MODIFY_HANDLE);
+	struct mlx5_ib_ucontext *c = to_mucontext(uobj->context);
+	struct devx_obj *obj = uobj->object;
+	void *cmd_out;
+	int err;
+
+	if (!c->devx_uid)
+		return -EPERM;
+
+	if (!devx_is_obj_modify_cmd(cmd_in))
+		return -EINVAL;
+
+	if (!devx_is_valid_obj_id(obj, cmd_in))
+		return -EINVAL;
+
+	cmd_out = uverbs_zalloc(attrs, cmd_out_len);
+	if (IS_ERR(cmd_out))
+		return PTR_ERR(cmd_out);
+
+	MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, c->devx_uid);
+	err = mlx5_cmd_exec(obj->mdev, cmd_in,
+			    uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN),
+			    cmd_out, cmd_out_len);
+	if (err)
+		return err;
+
+	return uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT,
+			      cmd_out, cmd_out_len);
+}
+
+static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)(
+	struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
+{
+	void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN);
+	int cmd_out_len = uverbs_attr_get_len(attrs,
+					      MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT);
+	struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs,
+							  MLX5_IB_ATTR_DEVX_OBJ_QUERY_HANDLE);
+	struct mlx5_ib_ucontext *c = to_mucontext(uobj->context);
+	struct devx_obj *obj = uobj->object;
+	void *cmd_out;
+	int err;
+
+	if (!c->devx_uid)
+		return -EPERM;
+
+	if (!devx_is_obj_query_cmd(cmd_in))
+		return -EINVAL;
+
+	if (!devx_is_valid_obj_id(obj, cmd_in))
+		return -EINVAL;
+
+	cmd_out = uverbs_zalloc(attrs, cmd_out_len);
+	if (IS_ERR(cmd_out))
+		return PTR_ERR(cmd_out);
+
+	MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, c->devx_uid);
+	err = mlx5_cmd_exec(obj->mdev, cmd_in,
+			    uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN),
+			    cmd_out, cmd_out_len);
+	if (err)
+		return err;
+
+	return uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT,
+			      cmd_out, cmd_out_len);
+}
+
+static int devx_umem_get(struct mlx5_ib_dev *dev, struct ib_ucontext *ucontext,
+			 struct uverbs_attr_bundle *attrs,
+			 struct devx_umem *obj)
+{
+	u64 addr;
+	size_t size;
+	u32 access;
+	int npages;
+	int err;
+	u32 page_mask;
+
+	if (uverbs_copy_from(&addr, attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_ADDR) ||
+	    uverbs_copy_from(&size, attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_LEN))
+		return -EFAULT;
+
+	err = uverbs_get_flags32(&access, attrs,
+				 MLX5_IB_ATTR_DEVX_UMEM_REG_ACCESS,
+				 IB_ACCESS_SUPPORTED);
+	if (err)
+		return err;
+
+	err = ib_check_mr_access(access);
+	if (err)
+		return err;
+
+	obj->umem = ib_umem_get(ucontext, addr, size, access, 0);
+	if (IS_ERR(obj->umem))
+		return PTR_ERR(obj->umem);
+
+	mlx5_ib_cont_pages(obj->umem, obj->umem->address,
+			   MLX5_MKEY_PAGE_SHIFT_MASK, &npages,
+			   &obj->page_shift, &obj->ncont, NULL);
+
+	if (!npages) {
+		ib_umem_release(obj->umem);
+		return -EINVAL;
+	}
+
+	page_mask = (1 << obj->page_shift) - 1;
+	obj->page_offset = obj->umem->address & page_mask;
+
+	return 0;
+}
+
+static int devx_umem_reg_cmd_alloc(struct uverbs_attr_bundle *attrs,
+				   struct devx_umem *obj,
+				   struct devx_umem_reg_cmd *cmd)
+{
+	cmd->inlen = MLX5_ST_SZ_BYTES(create_umem_in) +
+		    (MLX5_ST_SZ_BYTES(mtt) * obj->ncont);
+	cmd->in = uverbs_zalloc(attrs, cmd->inlen);
+	return PTR_ERR_OR_ZERO(cmd->in);
+}
+
+static void devx_umem_reg_cmd_build(struct mlx5_ib_dev *dev,
+				    struct devx_umem *obj,
+				    struct devx_umem_reg_cmd *cmd)
+{
+	void *umem;
+	__be64 *mtt;
+
+	umem = MLX5_ADDR_OF(create_umem_in, cmd->in, umem);
+	mtt = (__be64 *)MLX5_ADDR_OF(umem, umem, mtt);
+
+	MLX5_SET(general_obj_in_cmd_hdr, cmd->in, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
+	MLX5_SET(general_obj_in_cmd_hdr, cmd->in, obj_type, MLX5_OBJ_TYPE_UMEM);
+	MLX5_SET64(umem, umem, num_of_mtt, obj->ncont);
+	MLX5_SET(umem, umem, log_page_size, obj->page_shift -
+					    MLX5_ADAPTER_PAGE_SHIFT);
+	MLX5_SET(umem, umem, page_offset, obj->page_offset);
+	mlx5_ib_populate_pas(dev, obj->umem, obj->page_shift, mtt,
+			     (obj->umem->writable ? MLX5_IB_MTT_WRITE : 0) |
+			     MLX5_IB_MTT_READ);
+}
+
+static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_UMEM_REG)(
+	struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
+{
+	struct devx_umem_reg_cmd cmd;
+	struct devx_umem *obj;
+	struct ib_uobject *uobj = uverbs_attr_get_uobject(
+		attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_HANDLE);
+	u32 obj_id;
+	struct mlx5_ib_ucontext *c = to_mucontext(uobj->context);
+	struct mlx5_ib_dev *dev = to_mdev(c->ibucontext.device);
+	int err;
+
+	if (!c->devx_uid)
+		return -EPERM;
+
+	obj = kzalloc(sizeof(struct devx_umem), GFP_KERNEL);
+	if (!obj)
+		return -ENOMEM;
+
+	err = devx_umem_get(dev, &c->ibucontext, attrs, obj);
+	if (err)
+		goto err_obj_free;
+
+	err = devx_umem_reg_cmd_alloc(attrs, obj, &cmd);
+	if (err)
+		goto err_umem_release;
+
+	devx_umem_reg_cmd_build(dev, obj, &cmd);
+
+	MLX5_SET(general_obj_in_cmd_hdr, cmd.in, uid, c->devx_uid);
+	err = mlx5_cmd_exec(dev->mdev, cmd.in, cmd.inlen, cmd.out,
+			    sizeof(cmd.out));
+	if (err)
+		goto err_umem_release;
+
+	obj->mdev = dev->mdev;
+	uobj->object = obj;
+	devx_obj_build_destroy_cmd(cmd.in, cmd.out, obj->dinbox, &obj->dinlen, &obj_id);
+	err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_OUT_ID, &obj_id, sizeof(obj_id));
+	if (err)
+		goto err_umem_destroy;
+
+	return 0;
+
+err_umem_destroy:
+	mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, cmd.out, sizeof(cmd.out));
+err_umem_release:
+	ib_umem_release(obj->umem);
+err_obj_free:
+	kfree(obj);
+	return err;
+}
+
+static int devx_umem_cleanup(struct ib_uobject *uobject,
+			     enum rdma_remove_reason why)
+{
+	struct devx_umem *obj = uobject->object;
+	u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
+	int err;
+
+	err = mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, out, sizeof(out));
+	if (ib_is_destroy_retryable(err, why, uobject))
+		return err;
+
+	ib_umem_release(obj->umem);
+	kfree(obj);
+	return 0;
+}
+
+DECLARE_UVERBS_NAMED_METHOD(
+	MLX5_IB_METHOD_DEVX_UMEM_REG,
+	UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_UMEM_REG_HANDLE,
+			MLX5_IB_OBJECT_DEVX_UMEM,
+			UVERBS_ACCESS_NEW,
+			UA_MANDATORY),
+	UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_ADDR,
+			   UVERBS_ATTR_TYPE(u64),
+			   UA_MANDATORY),
+	UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_LEN,
+			   UVERBS_ATTR_TYPE(u64),
+			   UA_MANDATORY),
+	UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_ACCESS,
+			     enum ib_access_flags),
+	UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_UMEM_REG_OUT_ID,
+			    UVERBS_ATTR_TYPE(u32),
+			    UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
+	MLX5_IB_METHOD_DEVX_UMEM_DEREG,
+	UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_UMEM_DEREG_HANDLE,
+			MLX5_IB_OBJECT_DEVX_UMEM,
+			UVERBS_ACCESS_DESTROY,
+			UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD(
+	MLX5_IB_METHOD_DEVX_QUERY_EQN,
+	UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_QUERY_EQN_USER_VEC,
+			   UVERBS_ATTR_TYPE(u32),
+			   UA_MANDATORY),
+	UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_QUERY_EQN_DEV_EQN,
+			    UVERBS_ATTR_TYPE(u32),
+			    UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD(
+	MLX5_IB_METHOD_DEVX_QUERY_UAR,
+	UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_QUERY_UAR_USER_IDX,
+			   UVERBS_ATTR_TYPE(u32),
+			   UA_MANDATORY),
+	UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_QUERY_UAR_DEV_IDX,
+			    UVERBS_ATTR_TYPE(u32),
+			    UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD(
+	MLX5_IB_METHOD_DEVX_OTHER,
+	UVERBS_ATTR_PTR_IN(
+		MLX5_IB_ATTR_DEVX_OTHER_CMD_IN,
+		UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)),
+		UA_MANDATORY,
+		UA_ALLOC_AND_COPY),
+	UVERBS_ATTR_PTR_OUT(
+		MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT,
+		UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)),
+		UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD(
+	MLX5_IB_METHOD_DEVX_OBJ_CREATE,
+	UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_CREATE_HANDLE,
+			MLX5_IB_OBJECT_DEVX_OBJ,
+			UVERBS_ACCESS_NEW,
+			UA_MANDATORY),
+	UVERBS_ATTR_PTR_IN(
+		MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN,
+		UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)),
+		UA_MANDATORY,
+		UA_ALLOC_AND_COPY),
+	UVERBS_ATTR_PTR_OUT(
+		MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT,
+		UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)),
+		UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
+	MLX5_IB_METHOD_DEVX_OBJ_DESTROY,
+	UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_DESTROY_HANDLE,
+			MLX5_IB_OBJECT_DEVX_OBJ,
+			UVERBS_ACCESS_DESTROY,
+			UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD(
+	MLX5_IB_METHOD_DEVX_OBJ_MODIFY,
+	UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_MODIFY_HANDLE,
+			MLX5_IB_OBJECT_DEVX_OBJ,
+			UVERBS_ACCESS_WRITE,
+			UA_MANDATORY),
+	UVERBS_ATTR_PTR_IN(
+		MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN,
+		UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)),
+		UA_MANDATORY,
+		UA_ALLOC_AND_COPY),
+	UVERBS_ATTR_PTR_OUT(
+		MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT,
+		UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)),
+		UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD(
+	MLX5_IB_METHOD_DEVX_OBJ_QUERY,
+	UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_QUERY_HANDLE,
+			MLX5_IB_OBJECT_DEVX_OBJ,
+			UVERBS_ACCESS_READ,
+			UA_MANDATORY),
+	UVERBS_ATTR_PTR_IN(
+		MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN,
+		UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)),
+		UA_MANDATORY,
+		UA_ALLOC_AND_COPY),
+	UVERBS_ATTR_PTR_OUT(
+		MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT,
+		UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)),
+		UA_MANDATORY));
+
+DECLARE_UVERBS_GLOBAL_METHODS(MLX5_IB_OBJECT_DEVX,
+			      &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OTHER),
+			      &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_QUERY_UAR),
+			      &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_QUERY_EQN));
+
+DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_DEVX_OBJ,
+			    UVERBS_TYPE_ALLOC_IDR(devx_obj_cleanup),
+			    &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_CREATE),
+			    &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_DESTROY),
+			    &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_MODIFY),
+			    &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_QUERY));
+
+DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_DEVX_UMEM,
+			    UVERBS_TYPE_ALLOC_IDR(devx_umem_cleanup),
+			    &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_UMEM_REG),
+			    &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_UMEM_DEREG));
+
+DECLARE_UVERBS_OBJECT_TREE(devx_objects,
+			   &UVERBS_OBJECT(MLX5_IB_OBJECT_DEVX),
+			   &UVERBS_OBJECT(MLX5_IB_OBJECT_DEVX_OBJ),
+			   &UVERBS_OBJECT(MLX5_IB_OBJECT_DEVX_UMEM));
+
+const struct uverbs_object_tree_def *mlx5_ib_get_devx_tree(void)
+{
+	return &devx_objects;
+}
diff --git a/drivers/infiniband/hw/mlx5/flow.c b/drivers/infiniband/hw/mlx5/flow.c
new file mode 100644
index 0000000..1a29f47
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/flow.c
@@ -0,0 +1,252 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2018, Mellanox Technologies inc.  All rights reserved.
+ */
+
+#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/uverbs_types.h>
+#include <rdma/uverbs_ioctl.h>
+#include <rdma/mlx5_user_ioctl_cmds.h>
+#include <rdma/ib_umem.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/fs.h>
+#include "mlx5_ib.h"
+
+#define UVERBS_MODULE_NAME mlx5_ib
+#include <rdma/uverbs_named_ioctl.h>
+
+static const struct uverbs_attr_spec mlx5_ib_flow_type[] = {
+	[MLX5_IB_FLOW_TYPE_NORMAL] = {
+		.type = UVERBS_ATTR_TYPE_PTR_IN,
+		.u.ptr = {
+			.len = sizeof(u16), /* data is priority */
+			.min_len = sizeof(u16),
+		}
+	},
+	[MLX5_IB_FLOW_TYPE_SNIFFER] = {
+		.type = UVERBS_ATTR_TYPE_PTR_IN,
+		UVERBS_ATTR_NO_DATA(),
+	},
+	[MLX5_IB_FLOW_TYPE_ALL_DEFAULT] = {
+		.type = UVERBS_ATTR_TYPE_PTR_IN,
+		UVERBS_ATTR_NO_DATA(),
+	},
+	[MLX5_IB_FLOW_TYPE_MC_DEFAULT] = {
+		.type = UVERBS_ATTR_TYPE_PTR_IN,
+		UVERBS_ATTR_NO_DATA(),
+	},
+};
+
+static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
+	struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
+{
+	struct mlx5_ib_flow_handler *flow_handler;
+	struct mlx5_ib_flow_matcher *fs_matcher;
+	void *devx_obj;
+	int dest_id, dest_type;
+	void *cmd_in;
+	int inlen;
+	bool dest_devx, dest_qp;
+	struct ib_qp *qp = NULL;
+	struct ib_uobject *uobj =
+		uverbs_attr_get_uobject(attrs, MLX5_IB_ATTR_CREATE_FLOW_HANDLE);
+	struct mlx5_ib_dev *dev = to_mdev(uobj->context->device);
+
+	if (!capable(CAP_NET_RAW))
+		return -EPERM;
+
+	dest_devx =
+		uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX);
+	dest_qp = uverbs_attr_is_valid(attrs,
+				       MLX5_IB_ATTR_CREATE_FLOW_DEST_QP);
+
+	if ((dest_devx && dest_qp) || (!dest_devx && !dest_qp))
+		return -EINVAL;
+
+	if (dest_devx) {
+		devx_obj = uverbs_attr_get_obj(
+			attrs, MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX);
+		if (IS_ERR(devx_obj))
+			return PTR_ERR(devx_obj);
+
+		/* Verify that the given DEVX object is a flow
+		 * steering destination.
+		 */
+		if (!mlx5_ib_devx_is_flow_dest(devx_obj, &dest_id, &dest_type))
+			return -EINVAL;
+	} else {
+		struct mlx5_ib_qp *mqp;
+
+		qp = uverbs_attr_get_obj(attrs,
+					 MLX5_IB_ATTR_CREATE_FLOW_DEST_QP);
+		if (IS_ERR(qp))
+			return PTR_ERR(qp);
+
+		if (qp->qp_type != IB_QPT_RAW_PACKET)
+			return -EINVAL;
+
+		mqp = to_mqp(qp);
+		if (mqp->flags & MLX5_IB_QP_RSS)
+			dest_id = mqp->rss_qp.tirn;
+		else
+			dest_id = mqp->raw_packet_qp.rq.tirn;
+		dest_type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+	}
+
+	if (dev->rep)
+		return -ENOTSUPP;
+
+	cmd_in = uverbs_attr_get_alloced_ptr(
+		attrs, MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE);
+	inlen = uverbs_attr_get_len(attrs,
+				    MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE);
+	fs_matcher = uverbs_attr_get_obj(attrs,
+					 MLX5_IB_ATTR_CREATE_FLOW_MATCHER);
+	flow_handler = mlx5_ib_raw_fs_rule_add(dev, fs_matcher, cmd_in, inlen,
+					       dest_id, dest_type);
+	if (IS_ERR(flow_handler))
+		return PTR_ERR(flow_handler);
+
+	ib_set_flow(uobj, &flow_handler->ibflow, qp, &dev->ib_dev);
+
+	return 0;
+}
+
+static int flow_matcher_cleanup(struct ib_uobject *uobject,
+				enum rdma_remove_reason why)
+{
+	struct mlx5_ib_flow_matcher *obj = uobject->object;
+	int ret;
+
+	ret = ib_destroy_usecnt(&obj->usecnt, why, uobject);
+	if (ret)
+		return ret;
+
+	kfree(obj);
+	return 0;
+}
+
+static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_MATCHER_CREATE)(
+	struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
+{
+	struct ib_uobject *uobj = uverbs_attr_get_uobject(
+		attrs, MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE);
+	struct mlx5_ib_dev *dev = to_mdev(uobj->context->device);
+	struct mlx5_ib_flow_matcher *obj;
+	int err;
+
+	obj = kzalloc(sizeof(struct mlx5_ib_flow_matcher), GFP_KERNEL);
+	if (!obj)
+		return -ENOMEM;
+
+	obj->mask_len = uverbs_attr_get_len(
+		attrs, MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK);
+	err = uverbs_copy_from(&obj->matcher_mask,
+			       attrs,
+			       MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK);
+	if (err)
+		goto end;
+
+	obj->flow_type = uverbs_attr_get_enum_id(
+		attrs, MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE);
+
+	if (obj->flow_type == MLX5_IB_FLOW_TYPE_NORMAL) {
+		err = uverbs_copy_from(&obj->priority,
+				       attrs,
+				       MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE);
+		if (err)
+			goto end;
+	}
+
+	err = uverbs_copy_from(&obj->match_criteria_enable,
+			       attrs,
+			       MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA);
+	if (err)
+		goto end;
+
+	uobj->object = obj;
+	obj->mdev = dev->mdev;
+	atomic_set(&obj->usecnt, 0);
+	return 0;
+
+end:
+	kfree(obj);
+	return err;
+}
+
+DECLARE_UVERBS_NAMED_METHOD(
+	MLX5_IB_METHOD_CREATE_FLOW,
+	UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_HANDLE,
+			UVERBS_OBJECT_FLOW,
+			UVERBS_ACCESS_NEW,
+			UA_MANDATORY),
+	UVERBS_ATTR_PTR_IN(
+		MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE,
+		UVERBS_ATTR_SIZE(1, sizeof(struct mlx5_ib_match_params)),
+		UA_MANDATORY,
+		UA_ALLOC_AND_COPY),
+	UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_MATCHER,
+			MLX5_IB_OBJECT_FLOW_MATCHER,
+			UVERBS_ACCESS_READ,
+			UA_MANDATORY),
+	UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_DEST_QP,
+			UVERBS_OBJECT_QP,
+			UVERBS_ACCESS_READ),
+	UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX,
+			MLX5_IB_OBJECT_DEVX_OBJ,
+			UVERBS_ACCESS_READ));
+
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
+	MLX5_IB_METHOD_DESTROY_FLOW,
+	UVERBS_ATTR_IDR(MLX5_IB_ATTR_CREATE_FLOW_HANDLE,
+			UVERBS_OBJECT_FLOW,
+			UVERBS_ACCESS_DESTROY,
+			UA_MANDATORY));
+
+ADD_UVERBS_METHODS(mlx5_ib_fs,
+		   UVERBS_OBJECT_FLOW,
+		   &UVERBS_METHOD(MLX5_IB_METHOD_CREATE_FLOW),
+		   &UVERBS_METHOD(MLX5_IB_METHOD_DESTROY_FLOW));
+
+DECLARE_UVERBS_NAMED_METHOD(
+	MLX5_IB_METHOD_FLOW_MATCHER_CREATE,
+	UVERBS_ATTR_IDR(MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE,
+			MLX5_IB_OBJECT_FLOW_MATCHER,
+			UVERBS_ACCESS_NEW,
+			UA_MANDATORY),
+	UVERBS_ATTR_PTR_IN(
+		MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK,
+		UVERBS_ATTR_SIZE(1, sizeof(struct mlx5_ib_match_params)),
+		UA_MANDATORY),
+	UVERBS_ATTR_ENUM_IN(MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE,
+			    mlx5_ib_flow_type,
+			    UA_MANDATORY),
+	UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA,
+			   UVERBS_ATTR_TYPE(u8),
+			   UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
+	MLX5_IB_METHOD_FLOW_MATCHER_DESTROY,
+	UVERBS_ATTR_IDR(MLX5_IB_ATTR_FLOW_MATCHER_DESTROY_HANDLE,
+			MLX5_IB_OBJECT_FLOW_MATCHER,
+			UVERBS_ACCESS_DESTROY,
+			UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_FLOW_MATCHER,
+			    UVERBS_TYPE_ALLOC_IDR(flow_matcher_cleanup),
+			    &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_MATCHER_CREATE),
+			    &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_MATCHER_DESTROY));
+
+DECLARE_UVERBS_OBJECT_TREE(flow_objects,
+			   &UVERBS_OBJECT(MLX5_IB_OBJECT_FLOW_MATCHER));
+
+int mlx5_ib_get_flow_trees(const struct uverbs_object_tree_def **root)
+{
+	int i = 0;
+
+	root[i++] = &flow_objects;
+	root[i++] = &mlx5_ib_fs;
+
+	return i;
+}
diff --git a/drivers/infiniband/hw/mlx5/gsi.c b/drivers/infiniband/hw/mlx5/gsi.c
index 79e6309..4950df3 100644
--- a/drivers/infiniband/hw/mlx5/gsi.c
+++ b/drivers/infiniband/hw/mlx5/gsi.c
@@ -477,8 +477,8 @@ static struct ib_qp *get_tx_qp(struct mlx5_ib_gsi_qp *gsi, struct ib_ud_wr *wr)
 	return gsi->tx_qps[qp_index];
 }
 
-int mlx5_ib_gsi_post_send(struct ib_qp *qp, struct ib_send_wr *wr,
-			  struct ib_send_wr **bad_wr)
+int mlx5_ib_gsi_post_send(struct ib_qp *qp, const struct ib_send_wr *wr,
+			  const struct ib_send_wr **bad_wr)
 {
 	struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp);
 	struct ib_qp *tx_qp;
@@ -522,8 +522,8 @@ int mlx5_ib_gsi_post_send(struct ib_qp *qp, struct ib_send_wr *wr,
 	return ret;
 }
 
-int mlx5_ib_gsi_post_recv(struct ib_qp *qp, struct ib_recv_wr *wr,
-			  struct ib_recv_wr **bad_wr)
+int mlx5_ib_gsi_post_recv(struct ib_qp *qp, const struct ib_recv_wr *wr,
+			  const struct ib_recv_wr **bad_wr)
 {
 	struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp);
 
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index b3ba9a2..c414f38 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -419,8 +419,8 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num,
 	translate_eth_proto_oper(eth_prot_oper, &props->active_speed,
 				 &props->active_width);
 
-	props->port_cap_flags  |= IB_PORT_CM_SUP;
-	props->port_cap_flags  |= IB_PORT_IP_BASED_GIDS;
+	props->port_cap_flags |= IB_PORT_CM_SUP;
+	props->ip_gids = true;
 
 	props->gid_tbl_len      = MLX5_CAP_ROCE(dev->mdev,
 						roce_address_table_size);
@@ -510,12 +510,11 @@ static int set_roce_addr(struct mlx5_ib_dev *dev, u8 port_num,
 				      vlan_id, port_num);
 }
 
-static int mlx5_ib_add_gid(const union ib_gid *gid,
-			   const struct ib_gid_attr *attr,
+static int mlx5_ib_add_gid(const struct ib_gid_attr *attr,
 			   __always_unused void **context)
 {
 	return set_roce_addr(to_mdev(attr->device), attr->port_num,
-			     attr->index, gid, attr);
+			     attr->index, &attr->gid, attr);
 }
 
 static int mlx5_ib_del_gid(const struct ib_gid_attr *attr,
@@ -525,41 +524,15 @@ static int mlx5_ib_del_gid(const struct ib_gid_attr *attr,
 			     attr->index, NULL, NULL);
 }
 
-__be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num,
-			       int index)
+__be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev,
+			       const struct ib_gid_attr *attr)
 {
-	struct ib_gid_attr attr;
-	union ib_gid gid;
-
-	if (ib_get_cached_gid(&dev->ib_dev, port_num, index, &gid, &attr))
-		return 0;
-
-	dev_put(attr.ndev);
-
-	if (attr.gid_type != IB_GID_TYPE_ROCE_UDP_ENCAP)
+	if (attr->gid_type != IB_GID_TYPE_ROCE_UDP_ENCAP)
 		return 0;
 
 	return cpu_to_be16(MLX5_CAP_ROCE(dev->mdev, r_roce_min_src_udp_port));
 }
 
-int mlx5_get_roce_gid_type(struct mlx5_ib_dev *dev, u8 port_num,
-			   int index, enum ib_gid_type *gid_type)
-{
-	struct ib_gid_attr attr;
-	union ib_gid gid;
-	int ret;
-
-	ret = ib_get_cached_gid(&dev->ib_dev, port_num, index, &gid, &attr);
-	if (ret)
-		return ret;
-
-	dev_put(attr.ndev);
-
-	*gid_type = attr.gid_type;
-
-	return 0;
-}
-
 static int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev)
 {
 	if (MLX5_CAP_GEN(dev->mdev, port_type) == MLX5_CAP_PORT_TYPE_IB)
@@ -915,7 +888,8 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
 	max_sq_sg = (max_sq_desc - sizeof(struct mlx5_wqe_ctrl_seg) -
 		     sizeof(struct mlx5_wqe_raddr_seg)) /
 		sizeof(struct mlx5_wqe_data_seg);
-	props->max_sge = min(max_rq_sg, max_sq_sg);
+	props->max_send_sge = max_sq_sg;
+	props->max_recv_sge = max_rq_sg;
 	props->max_sge_rd	   = MLX5_MAX_SGE_RD;
 	props->max_cq		   = 1 << MLX5_CAP_GEN(mdev, log_max_cq);
 	props->max_cqe = (1 << MLX5_CAP_GEN(mdev, log_max_cq_sz)) - 1;
@@ -1246,7 +1220,6 @@ static int mlx5_query_hca_port(struct ib_device *ibdev, u8 port,
 	props->qkey_viol_cntr	= rep->qkey_violation_counter;
 	props->subnet_timeout	= rep->subnet_timeout;
 	props->init_type_reply	= rep->init_type_reply;
-	props->grh_required	= rep->grh_required;
 
 	err = mlx5_query_port_link_width_oper(mdev, &ib_link_width_oper, port);
 	if (err)
@@ -1585,31 +1558,26 @@ static int allocate_uars(struct mlx5_ib_dev *dev, struct mlx5_ib_ucontext *conte
 	return err;
 }
 
-static int deallocate_uars(struct mlx5_ib_dev *dev, struct mlx5_ib_ucontext *context)
+static void deallocate_uars(struct mlx5_ib_dev *dev,
+			    struct mlx5_ib_ucontext *context)
 {
 	struct mlx5_bfreg_info *bfregi;
-	int err;
 	int i;
 
 	bfregi = &context->bfregi;
-	for (i = 0; i < bfregi->num_sys_pages; i++) {
+	for (i = 0; i < bfregi->num_sys_pages; i++)
 		if (i < bfregi->num_static_sys_pages ||
-		    bfregi->sys_pages[i] != MLX5_IB_INVALID_UAR_INDEX) {
-			err = mlx5_cmd_free_uar(dev->mdev, bfregi->sys_pages[i]);
-			if (err) {
-				mlx5_ib_warn(dev, "failed to free uar %d, err=%d\n", i, err);
-				return err;
-			}
-		}
-	}
-
-	return 0;
+		    bfregi->sys_pages[i] != MLX5_IB_INVALID_UAR_INDEX)
+			mlx5_cmd_free_uar(dev->mdev, bfregi->sys_pages[i]);
 }
 
 static int mlx5_ib_alloc_transport_domain(struct mlx5_ib_dev *dev, u32 *tdn)
 {
 	int err;
 
+	if (!MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
+		return 0;
+
 	err = mlx5_core_alloc_transport_domain(dev->mdev, tdn);
 	if (err)
 		return err;
@@ -1631,6 +1599,9 @@ static int mlx5_ib_alloc_transport_domain(struct mlx5_ib_dev *dev, u32 *tdn)
 
 static void mlx5_ib_dealloc_transport_domain(struct mlx5_ib_dev *dev, u32 tdn)
 {
+	if (!MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
+		return;
+
 	mlx5_core_dealloc_transport_domain(dev->mdev, tdn);
 
 	if ((MLX5_CAP_GEN(dev->mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) ||
@@ -1660,6 +1631,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
 	int err;
 	size_t min_req_v2 = offsetof(struct mlx5_ib_alloc_ucontext_req_v2,
 				     max_cqe_version);
+	u32 dump_fill_mkey;
 	bool lib_uar_4k;
 
 	if (!dev->ib_active)
@@ -1676,8 +1648,8 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
 	if (err)
 		return ERR_PTR(err);
 
-	if (req.flags)
-		return ERR_PTR(-EINVAL);
+	if (req.flags & ~MLX5_IB_ALLOC_UCTX_DEVX)
+		return ERR_PTR(-EOPNOTSUPP);
 
 	if (req.comp_mask || req.reserved0 || req.reserved1 || req.reserved2)
 		return ERR_PTR(-EOPNOTSUPP);
@@ -1755,10 +1727,26 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
 	context->ibucontext.invalidate_range = &mlx5_ib_invalidate_range;
 #endif
 
-	if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) {
-		err = mlx5_ib_alloc_transport_domain(dev, &context->tdn);
+	err = mlx5_ib_alloc_transport_domain(dev, &context->tdn);
+	if (err)
+		goto out_uars;
+
+	if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX) {
+		/* Block DEVX on Infiniband as of SELinux */
+		if (mlx5_ib_port_link_layer(ibdev, 1) != IB_LINK_LAYER_ETHERNET) {
+			err = -EPERM;
+			goto out_td;
+		}
+
+		err = mlx5_ib_devx_create(dev, context);
 		if (err)
-			goto out_uars;
+			goto out_td;
+	}
+
+	if (MLX5_CAP_GEN(dev->mdev, dump_fill_mkey)) {
+		err = mlx5_cmd_dump_fill_mkey(dev->mdev, &dump_fill_mkey);
+		if (err)
+			goto out_mdev;
 	}
 
 	INIT_LIST_HEAD(&context->vma_private_list);
@@ -1819,9 +1807,18 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
 		resp.response_length += sizeof(resp.num_dyn_bfregs);
 	}
 
+	if (field_avail(typeof(resp), dump_fill_mkey, udata->outlen)) {
+		if (MLX5_CAP_GEN(dev->mdev, dump_fill_mkey)) {
+			resp.dump_fill_mkey = dump_fill_mkey;
+			resp.comp_mask |=
+				MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_DUMP_FILL_MKEY;
+		}
+		resp.response_length += sizeof(resp.dump_fill_mkey);
+	}
+
 	err = ib_copy_to_udata(udata, &resp, resp.response_length);
 	if (err)
-		goto out_td;
+		goto out_mdev;
 
 	bfregi->ver = ver;
 	bfregi->num_low_latency_bfregs = req.num_low_latency_bfregs;
@@ -1831,9 +1828,11 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
 
 	return &context->ibucontext;
 
+out_mdev:
+	if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX)
+		mlx5_ib_devx_destroy(dev, context);
 out_td:
-	if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
-		mlx5_ib_dealloc_transport_domain(dev, context->tdn);
+	mlx5_ib_dealloc_transport_domain(dev, context->tdn);
 
 out_uars:
 	deallocate_uars(dev, context);
@@ -1856,9 +1855,11 @@ static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
 	struct mlx5_ib_dev *dev = to_mdev(ibcontext->device);
 	struct mlx5_bfreg_info *bfregi;
 
+	if (context->devx_uid)
+		mlx5_ib_devx_destroy(dev, context);
+
 	bfregi = &context->bfregi;
-	if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
-		mlx5_ib_dealloc_transport_domain(dev, context->tdn);
+	mlx5_ib_dealloc_transport_domain(dev, context->tdn);
 
 	deallocate_uars(dev, context);
 	kfree(bfregi->sys_pages);
@@ -2040,7 +2041,7 @@ static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd,
 	struct mlx5_bfreg_info *bfregi = &context->bfregi;
 	int err;
 	unsigned long idx;
-	phys_addr_t pfn, pa;
+	phys_addr_t pfn;
 	pgprot_t prot;
 	u32 bfreg_dyn_idx = 0;
 	u32 uar_index;
@@ -2131,8 +2132,6 @@ static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd,
 		goto err;
 	}
 
-	pa = pfn << PAGE_SHIFT;
-
 	err = mlx5_ib_set_vma_data(vma, context);
 	if (err)
 		goto err;
@@ -2699,7 +2698,7 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c,
 			 IPPROTO_GRE);
 
 		MLX5_SET(fte_match_set_misc, misc_params_c, gre_protocol,
-			 0xffff);
+			 ntohs(ib_spec->gre.mask.protocol));
 		MLX5_SET(fte_match_set_misc, misc_params_v, gre_protocol,
 			 ntohs(ib_spec->gre.val.protocol));
 
@@ -2979,11 +2978,11 @@ static void counters_clear_description(struct ib_counters *counters)
 
 static int mlx5_ib_destroy_flow(struct ib_flow *flow_id)
 {
-	struct mlx5_ib_dev *dev = to_mdev(flow_id->qp->device);
 	struct mlx5_ib_flow_handler *handler = container_of(flow_id,
 							  struct mlx5_ib_flow_handler,
 							  ibflow);
 	struct mlx5_ib_flow_handler *iter, *tmp;
+	struct mlx5_ib_dev *dev = handler->dev;
 
 	mutex_lock(&dev->flow_db->lock);
 
@@ -3001,6 +3000,8 @@ static int mlx5_ib_destroy_flow(struct ib_flow *flow_id)
 		counters_clear_description(handler->ibcounters);
 
 	mutex_unlock(&dev->flow_db->lock);
+	if (handler->flow_matcher)
+		atomic_dec(&handler->flow_matcher->usecnt);
 	kfree(handler);
 
 	return 0;
@@ -3021,6 +3022,26 @@ enum flow_table_type {
 
 #define MLX5_FS_MAX_TYPES	 6
 #define MLX5_FS_MAX_ENTRIES	 BIT(16)
+
+static struct mlx5_ib_flow_prio *_get_prio(struct mlx5_flow_namespace *ns,
+					   struct mlx5_ib_flow_prio *prio,
+					   int priority,
+					   int num_entries, int num_groups)
+{
+	struct mlx5_flow_table *ft;
+
+	ft = mlx5_create_auto_grouped_flow_table(ns, priority,
+						 num_entries,
+						 num_groups,
+						 0, 0);
+	if (IS_ERR(ft))
+		return ERR_CAST(ft);
+
+	prio->flow_table = ft;
+	prio->refcount = 0;
+	return prio;
+}
+
 static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
 						struct ib_flow_attr *flow_attr,
 						enum flow_table_type ft_type)
@@ -3033,7 +3054,6 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
 	int num_entries;
 	int num_groups;
 	int priority;
-	int err = 0;
 
 	max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
 						       log_max_ft_size));
@@ -3083,21 +3103,10 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
 		return ERR_PTR(-ENOMEM);
 
 	ft = prio->flow_table;
-	if (!ft) {
-		ft = mlx5_create_auto_grouped_flow_table(ns, priority,
-							 num_entries,
-							 num_groups,
-							 0, 0);
+	if (!ft)
+		return _get_prio(ns, prio, priority, num_entries, num_groups);
 
-		if (!IS_ERR(ft)) {
-			prio->refcount = 0;
-			prio->flow_table = ft;
-		} else {
-			err = PTR_ERR(ft);
-		}
-	}
-
-	return err ? ERR_PTR(err) : prio;
+	return prio;
 }
 
 static void set_underlay_qp(struct mlx5_ib_dev *dev,
@@ -3356,6 +3365,7 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
 
 	ft_prio->refcount++;
 	handler->prio = ft_prio;
+	handler->dev = dev;
 
 	ft_prio->flow_table = ft;
 free:
@@ -3648,6 +3658,189 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
 	return ERR_PTR(err);
 }
 
+static struct mlx5_ib_flow_prio *_get_flow_table(struct mlx5_ib_dev *dev,
+						 int priority, bool mcast)
+{
+	int max_table_size;
+	struct mlx5_flow_namespace *ns = NULL;
+	struct mlx5_ib_flow_prio *prio;
+
+	max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
+			     log_max_ft_size));
+	if (max_table_size < MLX5_FS_MAX_ENTRIES)
+		return ERR_PTR(-ENOMEM);
+
+	if (mcast)
+		priority = MLX5_IB_FLOW_MCAST_PRIO;
+	else
+		priority = ib_prio_to_core_prio(priority, false);
+
+	ns = mlx5_get_flow_namespace(dev->mdev, MLX5_FLOW_NAMESPACE_BYPASS);
+	if (!ns)
+		return ERR_PTR(-ENOTSUPP);
+
+	prio = &dev->flow_db->prios[priority];
+
+	if (prio->flow_table)
+		return prio;
+
+	return _get_prio(ns, prio, priority, MLX5_FS_MAX_ENTRIES,
+			 MLX5_FS_MAX_TYPES);
+}
+
+static struct mlx5_ib_flow_handler *
+_create_raw_flow_rule(struct mlx5_ib_dev *dev,
+		      struct mlx5_ib_flow_prio *ft_prio,
+		      struct mlx5_flow_destination *dst,
+		      struct mlx5_ib_flow_matcher  *fs_matcher,
+		      void *cmd_in, int inlen)
+{
+	struct mlx5_ib_flow_handler *handler;
+	struct mlx5_flow_act flow_act = {.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG};
+	struct mlx5_flow_spec *spec;
+	struct mlx5_flow_table *ft = ft_prio->flow_table;
+	int err = 0;
+
+	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+	handler = kzalloc(sizeof(*handler), GFP_KERNEL);
+	if (!handler || !spec) {
+		err = -ENOMEM;
+		goto free;
+	}
+
+	INIT_LIST_HEAD(&handler->list);
+
+	memcpy(spec->match_value, cmd_in, inlen);
+	memcpy(spec->match_criteria, fs_matcher->matcher_mask.match_params,
+	       fs_matcher->mask_len);
+	spec->match_criteria_enable = fs_matcher->match_criteria_enable;
+
+	flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+	handler->rule = mlx5_add_flow_rules(ft, spec,
+					    &flow_act, dst, 1);
+
+	if (IS_ERR(handler->rule)) {
+		err = PTR_ERR(handler->rule);
+		goto free;
+	}
+
+	ft_prio->refcount++;
+	handler->prio = ft_prio;
+	handler->dev = dev;
+	ft_prio->flow_table = ft;
+
+free:
+	if (err)
+		kfree(handler);
+	kvfree(spec);
+	return err ? ERR_PTR(err) : handler;
+}
+
+static bool raw_fs_is_multicast(struct mlx5_ib_flow_matcher *fs_matcher,
+				void *match_v)
+{
+	void *match_c;
+	void *match_v_set_lyr_2_4, *match_c_set_lyr_2_4;
+	void *dmac, *dmac_mask;
+	void *ipv4, *ipv4_mask;
+
+	if (!(fs_matcher->match_criteria_enable &
+	      (1 << MATCH_CRITERIA_ENABLE_OUTER_BIT)))
+		return false;
+
+	match_c = fs_matcher->matcher_mask.match_params;
+	match_v_set_lyr_2_4 = MLX5_ADDR_OF(fte_match_param, match_v,
+					   outer_headers);
+	match_c_set_lyr_2_4 = MLX5_ADDR_OF(fte_match_param, match_c,
+					   outer_headers);
+
+	dmac = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_v_set_lyr_2_4,
+			    dmac_47_16);
+	dmac_mask = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_c_set_lyr_2_4,
+				 dmac_47_16);
+
+	if (is_multicast_ether_addr(dmac) &&
+	    is_multicast_ether_addr(dmac_mask))
+		return true;
+
+	ipv4 = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_v_set_lyr_2_4,
+			    dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
+
+	ipv4_mask = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_c_set_lyr_2_4,
+				 dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
+
+	if (ipv4_is_multicast(*(__be32 *)(ipv4)) &&
+	    ipv4_is_multicast(*(__be32 *)(ipv4_mask)))
+		return true;
+
+	return false;
+}
+
+struct mlx5_ib_flow_handler *
+mlx5_ib_raw_fs_rule_add(struct mlx5_ib_dev *dev,
+			struct mlx5_ib_flow_matcher *fs_matcher,
+			void *cmd_in, int inlen, int dest_id,
+			int dest_type)
+{
+	struct mlx5_flow_destination *dst;
+	struct mlx5_ib_flow_prio *ft_prio;
+	int priority = fs_matcher->priority;
+	struct mlx5_ib_flow_handler *handler;
+	bool mcast;
+	int err;
+
+	if (fs_matcher->flow_type != MLX5_IB_FLOW_TYPE_NORMAL)
+		return ERR_PTR(-EOPNOTSUPP);
+
+	if (fs_matcher->priority > MLX5_IB_FLOW_LAST_PRIO)
+		return ERR_PTR(-ENOMEM);
+
+	dst = kzalloc(sizeof(*dst), GFP_KERNEL);
+	if (!dst)
+		return ERR_PTR(-ENOMEM);
+
+	mcast = raw_fs_is_multicast(fs_matcher, cmd_in);
+	mutex_lock(&dev->flow_db->lock);
+
+	ft_prio = _get_flow_table(dev, priority, mcast);
+	if (IS_ERR(ft_prio)) {
+		err = PTR_ERR(ft_prio);
+		goto unlock;
+	}
+
+	if (dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR) {
+		dst->type = dest_type;
+		dst->tir_num = dest_id;
+	} else {
+		dst->type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM;
+		dst->ft_num = dest_id;
+	}
+
+	handler = _create_raw_flow_rule(dev, ft_prio, dst, fs_matcher, cmd_in,
+					inlen);
+
+	if (IS_ERR(handler)) {
+		err = PTR_ERR(handler);
+		goto destroy_ft;
+	}
+
+	mutex_unlock(&dev->flow_db->lock);
+	atomic_inc(&fs_matcher->usecnt);
+	handler->flow_matcher = fs_matcher;
+
+	kfree(dst);
+
+	return handler;
+
+destroy_ft:
+	put_flow_table(dev, ft_prio, false);
+unlock:
+	mutex_unlock(&dev->flow_db->lock);
+	kfree(dst);
+
+	return ERR_PTR(err);
+}
+
 static u32 mlx5_ib_flow_action_flags_to_accel_xfrm_flags(u32 mlx5_flags)
 {
 	u32 flags = 0;
@@ -3672,12 +3865,11 @@ mlx5_ib_create_flow_action_esp(struct ib_device *device,
 	u64 flags;
 	int err = 0;
 
-	if (IS_UVERBS_COPY_ERR(uverbs_copy_from(&action_flags, attrs,
-						MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS)))
-		return ERR_PTR(-EFAULT);
-
-	if (action_flags >= (MLX5_FLOW_ACTION_ESP_CREATE_LAST_SUPPORTED << 1))
-		return ERR_PTR(-EOPNOTSUPP);
+	err = uverbs_get_flags64(
+		&action_flags, attrs, MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS,
+		((MLX5_FLOW_ACTION_ESP_CREATE_LAST_SUPPORTED << 1) - 1));
+	if (err)
+		return ERR_PTR(err);
 
 	flags = mlx5_ib_flow_action_flags_to_accel_xfrm_flags(action_flags);
 
@@ -4466,7 +4658,8 @@ static void destroy_dev_resources(struct mlx5_ib_resources *devr)
 		cancel_work_sync(&devr->ports[port].pkey_change_work);
 }
 
-static u32 get_core_cap_flags(struct ib_device *ibdev)
+static u32 get_core_cap_flags(struct ib_device *ibdev,
+			      struct mlx5_hca_vport_context *rep)
 {
 	struct mlx5_ib_dev *dev = to_mdev(ibdev);
 	enum rdma_link_layer ll = mlx5_ib_port_link_layer(ibdev, 1);
@@ -4475,11 +4668,14 @@ static u32 get_core_cap_flags(struct ib_device *ibdev)
 	bool raw_support = !mlx5_core_mp_enabled(dev->mdev);
 	u32 ret = 0;
 
+	if (rep->grh_required)
+		ret |= RDMA_CORE_CAP_IB_GRH_REQUIRED;
+
 	if (ll == IB_LINK_LAYER_INFINIBAND)
-		return RDMA_CORE_PORT_IBA_IB;
+		return ret | RDMA_CORE_PORT_IBA_IB;
 
 	if (raw_support)
-		ret = RDMA_CORE_PORT_RAW_PACKET;
+		ret |= RDMA_CORE_PORT_RAW_PACKET;
 
 	if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV4_CAP))
 		return ret;
@@ -4502,17 +4698,23 @@ static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num,
 	struct ib_port_attr attr;
 	struct mlx5_ib_dev *dev = to_mdev(ibdev);
 	enum rdma_link_layer ll = mlx5_ib_port_link_layer(ibdev, port_num);
+	struct mlx5_hca_vport_context rep = {0};
 	int err;
 
-	immutable->core_cap_flags = get_core_cap_flags(ibdev);
-
 	err = ib_query_port(ibdev, port_num, &attr);
 	if (err)
 		return err;
 
+	if (ll == IB_LINK_LAYER_INFINIBAND) {
+		err = mlx5_query_hca_vport_context(dev->mdev, 0, port_num, 0,
+						   &rep);
+		if (err)
+			return err;
+	}
+
 	immutable->pkey_tbl_len = attr.pkey_tbl_len;
 	immutable->gid_tbl_len = attr.gid_tbl_len;
-	immutable->core_cap_flags = get_core_cap_flags(ibdev);
+	immutable->core_cap_flags = get_core_cap_flags(ibdev, &rep);
 	if ((ll == IB_LINK_LAYER_INFINIBAND) || MLX5_CAP_GEN(dev->mdev, roce))
 		immutable->max_mad_size = IB_MGMT_MAD_SIZE;
 
@@ -4610,7 +4812,7 @@ static void mlx5_remove_netdev_notifier(struct mlx5_ib_dev *dev, u8 port_num)
 	}
 }
 
-static int mlx5_enable_eth(struct mlx5_ib_dev *dev, u8 port_num)
+static int mlx5_enable_eth(struct mlx5_ib_dev *dev)
 {
 	int err;
 
@@ -4689,12 +4891,21 @@ static const struct mlx5_ib_counter extended_err_cnts[] = {
 	INIT_Q_COUNTER(req_cqe_flush_error),
 };
 
+#define INIT_EXT_PPCNT_COUNTER(_name)		\
+	{ .name = #_name, .offset =	\
+	MLX5_BYTE_OFF(ppcnt_reg, \
+		      counter_set.eth_extended_cntrs_grp_data_layout._name##_high)}
+
+static const struct mlx5_ib_counter ext_ppcnt_cnts[] = {
+	INIT_EXT_PPCNT_COUNTER(rx_icrc_encapsulated),
+};
+
 static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev)
 {
 	int i;
 
 	for (i = 0; i < dev->num_ports; i++) {
-		if (dev->port[i].cnts.set_id)
+		if (dev->port[i].cnts.set_id_valid)
 			mlx5_core_dealloc_q_counter(dev->mdev,
 						    dev->port[i].cnts.set_id);
 		kfree(dev->port[i].cnts.names);
@@ -4724,7 +4935,10 @@ static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev,
 		cnts->num_cong_counters = ARRAY_SIZE(cong_cnts);
 		num_counters += ARRAY_SIZE(cong_cnts);
 	}
-
+	if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
+		cnts->num_ext_ppcnt_counters = ARRAY_SIZE(ext_ppcnt_cnts);
+		num_counters += ARRAY_SIZE(ext_ppcnt_cnts);
+	}
 	cnts->names = kcalloc(num_counters, sizeof(cnts->names), GFP_KERNEL);
 	if (!cnts->names)
 		return -ENOMEM;
@@ -4781,6 +4995,13 @@ static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev,
 			offsets[j] = cong_cnts[i].offset;
 		}
 	}
+
+	if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
+		for (i = 0; i < ARRAY_SIZE(ext_ppcnt_cnts); i++, j++) {
+			names[j] = ext_ppcnt_cnts[i].name;
+			offsets[j] = ext_ppcnt_cnts[i].offset;
+		}
+	}
 }
 
 static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev)
@@ -4826,7 +5047,8 @@ static struct rdma_hw_stats *mlx5_ib_alloc_hw_stats(struct ib_device *ibdev,
 
 	return rdma_alloc_hw_stats_struct(port->cnts.names,
 					  port->cnts.num_q_counters +
-					  port->cnts.num_cong_counters,
+					  port->cnts.num_cong_counters +
+					  port->cnts.num_ext_ppcnt_counters,
 					  RDMA_HW_STATS_DEFAULT_LIFESPAN);
 }
 
@@ -4859,6 +5081,34 @@ static int mlx5_ib_query_q_counters(struct mlx5_core_dev *mdev,
 	return ret;
 }
 
+static int mlx5_ib_query_ext_ppcnt_counters(struct mlx5_ib_dev *dev,
+					  struct mlx5_ib_port *port,
+					  struct rdma_hw_stats *stats)
+{
+	int offset = port->cnts.num_q_counters + port->cnts.num_cong_counters;
+	int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
+	int ret, i;
+	void *out;
+
+	out = kvzalloc(sz, GFP_KERNEL);
+	if (!out)
+		return -ENOMEM;
+
+	ret = mlx5_cmd_query_ext_ppcnt_counters(dev->mdev, out);
+	if (ret)
+		goto free;
+
+	for (i = 0; i < port->cnts.num_ext_ppcnt_counters; i++) {
+		stats->value[i + offset] =
+			be64_to_cpup((__be64 *)(out +
+				    port->cnts.offsets[i + offset]));
+	}
+
+free:
+	kvfree(out);
+	return ret;
+}
+
 static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
 				struct rdma_hw_stats *stats,
 				u8 port_num, int index)
@@ -4872,13 +5122,21 @@ static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
 	if (!stats)
 		return -EINVAL;
 
-	num_counters = port->cnts.num_q_counters + port->cnts.num_cong_counters;
+	num_counters = port->cnts.num_q_counters +
+		       port->cnts.num_cong_counters +
+		       port->cnts.num_ext_ppcnt_counters;
 
 	/* q_counters are per IB device, query the master mdev */
 	ret = mlx5_ib_query_q_counters(dev->mdev, port, stats);
 	if (ret)
 		return ret;
 
+	if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
+		ret =  mlx5_ib_query_ext_ppcnt_counters(dev, port, stats);
+		if (ret)
+			return ret;
+	}
+
 	if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
 		mdev = mlx5_ib_get_native_port_mdev(dev, port_num,
 						    &mdev_port_num);
@@ -4905,11 +5163,6 @@ static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
 	return num_counters;
 }
 
-static void mlx5_ib_free_rdma_netdev(struct net_device *netdev)
-{
-	return mlx5_rdma_netdev_free(netdev);
-}
-
 static struct net_device*
 mlx5_ib_alloc_rdma_netdev(struct ib_device *hca,
 			  u8 port_num,
@@ -4919,17 +5172,12 @@ mlx5_ib_alloc_rdma_netdev(struct ib_device *hca,
 			  void (*setup)(struct net_device *))
 {
 	struct net_device *netdev;
-	struct rdma_netdev *rn;
 
 	if (type != RDMA_NETDEV_IPOIB)
 		return ERR_PTR(-EOPNOTSUPP);
 
 	netdev = mlx5_rdma_netdev_alloc(to_mdev(hca)->mdev, hca,
 					name, setup);
-	if (likely(!IS_ERR_OR_NULL(netdev))) {
-		rn = netdev_priv(netdev);
-		rn->free_rdma_netdev = mlx5_ib_free_rdma_netdev;
-	}
 	return netdev;
 }
 
@@ -5127,8 +5375,8 @@ static bool mlx5_ib_bind_slave_port(struct mlx5_ib_dev *ibdev,
 
 	spin_lock(&ibdev->port[port_num].mp.mpi_lock);
 	if (ibdev->port[port_num].mp.mpi) {
-		mlx5_ib_warn(ibdev, "port %d already affiliated.\n",
-			     port_num + 1);
+		mlx5_ib_dbg(ibdev, "port %d already affiliated.\n",
+			    port_num + 1);
 		spin_unlock(&ibdev->port[port_num].mp.mpi_lock);
 		return false;
 	}
@@ -5263,45 +5511,47 @@ static void mlx5_ib_cleanup_multiport_master(struct mlx5_ib_dev *dev)
 	mlx5_nic_vport_disable_roce(dev->mdev);
 }
 
-ADD_UVERBS_ATTRIBUTES_SIMPLE(mlx5_ib_dm, UVERBS_OBJECT_DM,
-			     UVERBS_METHOD_DM_ALLOC,
-			     &UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET,
-						  UVERBS_ATTR_TYPE(u64),
-						  UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
-			     &UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX,
-						  UVERBS_ATTR_TYPE(u16),
-						  UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
+ADD_UVERBS_ATTRIBUTES_SIMPLE(
+	mlx5_ib_dm,
+	UVERBS_OBJECT_DM,
+	UVERBS_METHOD_DM_ALLOC,
+	UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET,
+			    UVERBS_ATTR_TYPE(u64),
+			    UA_MANDATORY),
+	UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX,
+			    UVERBS_ATTR_TYPE(u16),
+			    UA_MANDATORY));
 
-ADD_UVERBS_ATTRIBUTES_SIMPLE(mlx5_ib_flow_action, UVERBS_OBJECT_FLOW_ACTION,
-			     UVERBS_METHOD_FLOW_ACTION_ESP_CREATE,
-			     &UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS,
-						 UVERBS_ATTR_TYPE(u64),
-						 UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
+ADD_UVERBS_ATTRIBUTES_SIMPLE(
+	mlx5_ib_flow_action,
+	UVERBS_OBJECT_FLOW_ACTION,
+	UVERBS_METHOD_FLOW_ACTION_ESP_CREATE,
+	UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS,
+			     enum mlx5_ib_uapi_flow_action_flags));
 
-#define NUM_TREES	2
 static int populate_specs_root(struct mlx5_ib_dev *dev)
 {
-	const struct uverbs_object_tree_def *default_root[NUM_TREES + 1] = {
-		uverbs_default_get_objects()};
-	size_t num_trees = 1;
+	const struct uverbs_object_tree_def **trees = dev->driver_trees;
+	size_t num_trees = 0;
 
-	if (mlx5_accel_ipsec_device_caps(dev->mdev) & MLX5_ACCEL_IPSEC_CAP_DEVICE &&
-	    !WARN_ON(num_trees >= ARRAY_SIZE(default_root)))
-		default_root[num_trees++] = &mlx5_ib_flow_action;
+	if (mlx5_accel_ipsec_device_caps(dev->mdev) &
+	    MLX5_ACCEL_IPSEC_CAP_DEVICE)
+		trees[num_trees++] = &mlx5_ib_flow_action;
 
-	if (MLX5_CAP_DEV_MEM(dev->mdev, memic) &&
-	    !WARN_ON(num_trees >= ARRAY_SIZE(default_root)))
-		default_root[num_trees++] = &mlx5_ib_dm;
+	if (MLX5_CAP_DEV_MEM(dev->mdev, memic))
+		trees[num_trees++] = &mlx5_ib_dm;
 
-	dev->ib_dev.specs_root =
-		uverbs_alloc_spec_tree(num_trees, default_root);
+	if (MLX5_CAP_GEN_64(dev->mdev, general_obj_types) &
+	    MLX5_GENERAL_OBJ_TYPES_CAP_UCTX)
+		trees[num_trees++] = mlx5_ib_get_devx_tree();
 
-	return PTR_ERR_OR_ZERO(dev->ib_dev.specs_root);
-}
+	num_trees += mlx5_ib_get_flow_trees(trees + num_trees);
 
-static void depopulate_specs_root(struct mlx5_ib_dev *dev)
-{
-	uverbs_free_spec_tree(dev->ib_dev.specs_root);
+	WARN_ON(num_trees >= ARRAY_SIZE(dev->driver_trees));
+	trees[num_trees] = NULL;
+	dev->ib_dev.driver_specs = trees;
+
+	return 0;
 }
 
 static int mlx5_ib_read_counters(struct ib_counters *counters,
@@ -5552,6 +5802,8 @@ int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
 	dev->ib_dev.modify_qp		= mlx5_ib_modify_qp;
 	dev->ib_dev.query_qp		= mlx5_ib_query_qp;
 	dev->ib_dev.destroy_qp		= mlx5_ib_destroy_qp;
+	dev->ib_dev.drain_sq		= mlx5_ib_drain_sq;
+	dev->ib_dev.drain_rq		= mlx5_ib_drain_rq;
 	dev->ib_dev.post_send		= mlx5_ib_post_send;
 	dev->ib_dev.post_recv		= mlx5_ib_post_recv;
 	dev->ib_dev.create_cq		= mlx5_ib_create_cq;
@@ -5649,9 +5901,9 @@ int mlx5_ib_stage_rep_non_default_cb(struct mlx5_ib_dev *dev)
 	return 0;
 }
 
-static int mlx5_ib_stage_common_roce_init(struct mlx5_ib_dev *dev,
-					  u8 port_num)
+static int mlx5_ib_stage_common_roce_init(struct mlx5_ib_dev *dev)
 {
+	u8 port_num;
 	int i;
 
 	for (i = 0; i < dev->num_ports; i++) {
@@ -5674,6 +5926,8 @@ static int mlx5_ib_stage_common_roce_init(struct mlx5_ib_dev *dev,
 			(1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) |
 			(1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL);
 
+	port_num = mlx5_core_native_port_num(dev->mdev) - 1;
+
 	return mlx5_add_netdev_notifier(dev, port_num);
 }
 
@@ -5690,14 +5944,12 @@ int mlx5_ib_stage_rep_roce_init(struct mlx5_ib_dev *dev)
 	enum rdma_link_layer ll;
 	int port_type_cap;
 	int err = 0;
-	u8 port_num;
 
-	port_num = mlx5_core_native_port_num(dev->mdev) - 1;
 	port_type_cap = MLX5_CAP_GEN(mdev, port_type);
 	ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
 
 	if (ll == IB_LINK_LAYER_ETHERNET)
-		err = mlx5_ib_stage_common_roce_init(dev, port_num);
+		err = mlx5_ib_stage_common_roce_init(dev);
 
 	return err;
 }
@@ -5712,19 +5964,17 @@ static int mlx5_ib_stage_roce_init(struct mlx5_ib_dev *dev)
 	struct mlx5_core_dev *mdev = dev->mdev;
 	enum rdma_link_layer ll;
 	int port_type_cap;
-	u8 port_num;
 	int err;
 
-	port_num = mlx5_core_native_port_num(dev->mdev) - 1;
 	port_type_cap = MLX5_CAP_GEN(mdev, port_type);
 	ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
 
 	if (ll == IB_LINK_LAYER_ETHERNET) {
-		err = mlx5_ib_stage_common_roce_init(dev, port_num);
+		err = mlx5_ib_stage_common_roce_init(dev);
 		if (err)
 			return err;
 
-		err = mlx5_enable_eth(dev, port_num);
+		err = mlx5_enable_eth(dev);
 		if (err)
 			goto cleanup;
 	}
@@ -5741,9 +5991,7 @@ static void mlx5_ib_stage_roce_cleanup(struct mlx5_ib_dev *dev)
 	struct mlx5_core_dev *mdev = dev->mdev;
 	enum rdma_link_layer ll;
 	int port_type_cap;
-	u8 port_num;
 
-	port_num = mlx5_core_native_port_num(dev->mdev) - 1;
 	port_type_cap = MLX5_CAP_GEN(mdev, port_type);
 	ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
 
@@ -5842,11 +6090,6 @@ int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev)
 	return ib_register_device(&dev->ib_dev, NULL);
 }
 
-static void mlx5_ib_stage_depopulate_specs(struct mlx5_ib_dev *dev)
-{
-	depopulate_specs_root(dev);
-}
-
 void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct mlx5_ib_dev *dev)
 {
 	destroy_umrc_res(dev);
@@ -5915,8 +6158,6 @@ void __mlx5_ib_remove(struct mlx5_ib_dev *dev,
 	ib_dealloc_device((struct ib_device *)dev);
 }
 
-static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev, u8 port_num);
-
 void *__mlx5_ib_add(struct mlx5_ib_dev *dev,
 		    const struct mlx5_ib_profile *profile)
 {
@@ -5983,7 +6224,7 @@ static const struct mlx5_ib_profile pf_profile = {
 		     mlx5_ib_stage_pre_ib_reg_umr_cleanup),
 	STAGE_CREATE(MLX5_IB_STAGE_SPECS,
 		     mlx5_ib_stage_populate_specs,
-		     mlx5_ib_stage_depopulate_specs),
+		     NULL),
 	STAGE_CREATE(MLX5_IB_STAGE_IB_REG,
 		     mlx5_ib_stage_ib_reg_init,
 		     mlx5_ib_stage_ib_reg_cleanup),
@@ -6031,7 +6272,7 @@ static const struct mlx5_ib_profile nic_rep_profile = {
 		     mlx5_ib_stage_pre_ib_reg_umr_cleanup),
 	STAGE_CREATE(MLX5_IB_STAGE_SPECS,
 		     mlx5_ib_stage_populate_specs,
-		     mlx5_ib_stage_depopulate_specs),
+		     NULL),
 	STAGE_CREATE(MLX5_IB_STAGE_IB_REG,
 		     mlx5_ib_stage_ib_reg_init,
 		     mlx5_ib_stage_ib_reg_cleanup),
@@ -6046,7 +6287,7 @@ static const struct mlx5_ib_profile nic_rep_profile = {
 		     mlx5_ib_stage_rep_reg_cleanup),
 };
 
-static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev, u8 port_num)
+static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev)
 {
 	struct mlx5_ib_multiport_info *mpi;
 	struct mlx5_ib_dev *dev;
@@ -6080,8 +6321,6 @@ static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev, u8 port_num)
 	if (!bound) {
 		list_add_tail(&mpi->list, &mlx5_ib_unaffiliated_port_list);
 		dev_dbg(&mdev->pdev->dev, "no suitable IB device found to bind to, added to unaffiliated list.\n");
-	} else {
-		mlx5_ib_dbg(dev, "bound port %u\n", port_num + 1);
 	}
 	mutex_unlock(&mlx5_ib_multiport_mutex);
 
@@ -6099,11 +6338,8 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
 	port_type_cap = MLX5_CAP_GEN(mdev, port_type);
 	ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
 
-	if (mlx5_core_is_mp_slave(mdev) && ll == IB_LINK_LAYER_ETHERNET) {
-		u8 port_num = mlx5_core_native_port_num(mdev) - 1;
-
-		return mlx5_ib_add_slave_port(mdev, port_num);
-	}
+	if (mlx5_core_is_mp_slave(mdev) && ll == IB_LINK_LAYER_ETHERNET)
+		return mlx5_ib_add_slave_port(mdev);
 
 	dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev));
 	if (!dev)
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index d89c8fe..320d4df 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -46,6 +46,7 @@
 #include <rdma/ib_user_verbs.h>
 #include <rdma/mlx5-abi.h>
 #include <rdma/uverbs_ioctl.h>
+#include <rdma/mlx5_user_ioctl_cmds.h>
 
 #define mlx5_ib_dbg(dev, format, arg...)				\
 pr_debug("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__,	\
@@ -78,12 +79,6 @@ enum {
 	MLX5_REQ_SCAT_DATA64_CQE	= 0x22,
 };
 
-enum mlx5_ib_latency_class {
-	MLX5_IB_LATENCY_CLASS_LOW,
-	MLX5_IB_LATENCY_CLASS_MEDIUM,
-	MLX5_IB_LATENCY_CLASS_HIGH,
-};
-
 enum mlx5_ib_mad_ifc_flags {
 	MLX5_MAD_IFC_IGNORE_MKEY	= 1,
 	MLX5_MAD_IFC_IGNORE_BKEY	= 2,
@@ -143,6 +138,7 @@ struct mlx5_ib_ucontext {
 
 	u64			lib_caps;
 	DECLARE_BITMAP(dm_pages, MLX5_MAX_MEMIC_PAGES);
+	u16			devx_uid;
 };
 
 static inline struct mlx5_ib_ucontext *to_mucontext(struct ib_ucontext *ibucontext)
@@ -176,6 +172,18 @@ struct mlx5_ib_flow_handler {
 	struct mlx5_ib_flow_prio	*prio;
 	struct mlx5_flow_handle		*rule;
 	struct ib_counters		*ibcounters;
+	struct mlx5_ib_dev		*dev;
+	struct mlx5_ib_flow_matcher	*flow_matcher;
+};
+
+struct mlx5_ib_flow_matcher {
+	struct mlx5_ib_match_params matcher_mask;
+	int			mask_len;
+	enum mlx5_ib_flow_type	flow_type;
+	u16			priority;
+	struct mlx5_core_dev	*mdev;
+	atomic_t		usecnt;
+	u8			match_criteria_enable;
 };
 
 struct mlx5_ib_flow_db {
@@ -461,7 +469,7 @@ struct mlx5_umr_wr {
 	u32				mkey;
 };
 
-static inline struct mlx5_umr_wr *umr_wr(struct ib_send_wr *wr)
+static inline const struct mlx5_umr_wr *umr_wr(const struct ib_send_wr *wr)
 {
 	return container_of(wr, struct mlx5_umr_wr, wr);
 }
@@ -665,6 +673,7 @@ struct mlx5_ib_counters {
 	size_t *offsets;
 	u32 num_q_counters;
 	u32 num_cong_counters;
+	u32 num_ext_ppcnt_counters;
 	u16 set_id;
 	bool set_id_valid;
 };
@@ -851,6 +860,7 @@ to_mcounters(struct ib_counters *ibcntrs)
 
 struct mlx5_ib_dev {
 	struct ib_device		ib_dev;
+	const struct uverbs_object_tree_def *driver_trees[6];
 	struct mlx5_core_dev		*mdev;
 	struct mlx5_roce		roce[MLX5_MAX_PORTS];
 	int				num_ports;
@@ -1004,8 +1014,8 @@ int mlx5_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
 		       enum ib_srq_attr_mask attr_mask, struct ib_udata *udata);
 int mlx5_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr);
 int mlx5_ib_destroy_srq(struct ib_srq *srq);
-int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
-			  struct ib_recv_wr **bad_wr);
+int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
+			  const struct ib_recv_wr **bad_wr);
 struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd,
 				struct ib_qp_init_attr *init_attr,
 				struct ib_udata *udata);
@@ -1014,10 +1024,12 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask,
 		     struct ib_qp_init_attr *qp_init_attr);
 int mlx5_ib_destroy_qp(struct ib_qp *qp);
-int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
-		      struct ib_send_wr **bad_wr);
-int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
-		      struct ib_recv_wr **bad_wr);
+void mlx5_ib_drain_sq(struct ib_qp *qp);
+void mlx5_ib_drain_rq(struct ib_qp *qp);
+int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+		      const struct ib_send_wr **bad_wr);
+int mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+		      const struct ib_recv_wr **bad_wr);
 void *mlx5_get_send_wqe(struct mlx5_ib_qp *qp, int n);
 int mlx5_ib_read_user_wqe(struct mlx5_ib_qp *qp, int send, int wqe_index,
 			  void *buffer, u32 length,
@@ -1183,10 +1195,8 @@ int mlx5_ib_get_vf_stats(struct ib_device *device, int vf,
 int mlx5_ib_set_vf_guid(struct ib_device *device, int vf, u8 port,
 			u64 guid, int type);
 
-__be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num,
-			       int index);
-int mlx5_get_roce_gid_type(struct mlx5_ib_dev *dev, u8 port_num,
-			   int index, enum ib_gid_type *gid_type);
+__be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev,
+			       const struct ib_gid_attr *attr);
 
 void mlx5_ib_cleanup_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num);
 int mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num);
@@ -1200,10 +1210,10 @@ int mlx5_ib_gsi_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr,
 int mlx5_ib_gsi_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
 			 int qp_attr_mask,
 			 struct ib_qp_init_attr *qp_init_attr);
-int mlx5_ib_gsi_post_send(struct ib_qp *qp, struct ib_send_wr *wr,
-			  struct ib_send_wr **bad_wr);
-int mlx5_ib_gsi_post_recv(struct ib_qp *qp, struct ib_recv_wr *wr,
-			  struct ib_recv_wr **bad_wr);
+int mlx5_ib_gsi_post_send(struct ib_qp *qp, const struct ib_send_wr *wr,
+			  const struct ib_send_wr **bad_wr);
+int mlx5_ib_gsi_post_recv(struct ib_qp *qp, const struct ib_recv_wr *wr,
+			  const struct ib_recv_wr **bad_wr);
 void mlx5_ib_gsi_pkey_change(struct mlx5_ib_gsi_qp *gsi);
 
 int mlx5_ib_generate_wc(struct ib_cq *ibcq, struct ib_wc *wc);
@@ -1217,6 +1227,36 @@ struct mlx5_core_dev *mlx5_ib_get_native_port_mdev(struct mlx5_ib_dev *dev,
 void mlx5_ib_put_native_port_mdev(struct mlx5_ib_dev *dev,
 				  u8 port_num);
 
+#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
+int mlx5_ib_devx_create(struct mlx5_ib_dev *dev,
+			struct mlx5_ib_ucontext *context);
+void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev,
+			  struct mlx5_ib_ucontext *context);
+const struct uverbs_object_tree_def *mlx5_ib_get_devx_tree(void);
+struct mlx5_ib_flow_handler *mlx5_ib_raw_fs_rule_add(
+	struct mlx5_ib_dev *dev, struct mlx5_ib_flow_matcher *fs_matcher,
+	void *cmd_in, int inlen, int dest_id, int dest_type);
+bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type);
+int mlx5_ib_get_flow_trees(const struct uverbs_object_tree_def **root);
+#else
+static inline int
+mlx5_ib_devx_create(struct mlx5_ib_dev *dev,
+		    struct mlx5_ib_ucontext *context) { return -EOPNOTSUPP; };
+static inline void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev,
+					struct mlx5_ib_ucontext *context) {}
+static inline const struct uverbs_object_tree_def *
+mlx5_ib_get_devx_tree(void) { return NULL; }
+static inline bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id,
+					     int *dest_type)
+{
+	return false;
+}
+static inline int
+mlx5_ib_get_flow_trees(const struct uverbs_object_tree_def **root)
+{
+	return 0;
+}
+#endif
 static inline void init_query_mad(struct ib_smp *mad)
 {
 	mad->base_version  = 1;
@@ -1318,4 +1358,7 @@ static inline int get_num_static_uars(struct mlx5_ib_dev *dev,
 unsigned long mlx5_ib_get_xlt_emergency_page(void);
 void mlx5_ib_put_xlt_emergency_page(void);
 
+int bfregn_to_uar_index(struct mlx5_ib_dev *dev,
+			struct mlx5_bfreg_info *bfregi, u32 bfregn,
+			bool dyn_bfreg);
 #endif /* MLX5_IB_H */
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 90a9c46..9fb1d9c 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -271,16 +271,16 @@ static ssize_t size_write(struct file *filp, const char __user *buf,
 {
 	struct mlx5_cache_ent *ent = filp->private_data;
 	struct mlx5_ib_dev *dev = ent->dev;
-	char lbuf[20];
+	char lbuf[20] = {0};
 	u32 var;
 	int err;
 	int c;
 
-	if (copy_from_user(lbuf, buf, sizeof(lbuf)))
+	count = min(count, sizeof(lbuf) - 1);
+	if (copy_from_user(lbuf, buf, count))
 		return -EFAULT;
 
 	c = order2idx(dev, ent->order);
-	lbuf[sizeof(lbuf) - 1] = 0;
 
 	if (sscanf(lbuf, "%u", &var) != 1)
 		return -EINVAL;
@@ -310,19 +310,11 @@ static ssize_t size_read(struct file *filp, char __user *buf, size_t count,
 	char lbuf[20];
 	int err;
 
-	if (*pos)
-		return 0;
-
 	err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->size);
 	if (err < 0)
 		return err;
 
-	if (copy_to_user(buf, lbuf, err))
-		return -EFAULT;
-
-	*pos += err;
-
-	return err;
+	return simple_read_from_buffer(buf, count, pos, lbuf, err);
 }
 
 static const struct file_operations size_fops = {
@@ -337,16 +329,16 @@ static ssize_t limit_write(struct file *filp, const char __user *buf,
 {
 	struct mlx5_cache_ent *ent = filp->private_data;
 	struct mlx5_ib_dev *dev = ent->dev;
-	char lbuf[20];
+	char lbuf[20] = {0};
 	u32 var;
 	int err;
 	int c;
 
-	if (copy_from_user(lbuf, buf, sizeof(lbuf)))
+	count = min(count, sizeof(lbuf) - 1);
+	if (copy_from_user(lbuf, buf, count))
 		return -EFAULT;
 
 	c = order2idx(dev, ent->order);
-	lbuf[sizeof(lbuf) - 1] = 0;
 
 	if (sscanf(lbuf, "%u", &var) != 1)
 		return -EINVAL;
@@ -372,19 +364,11 @@ static ssize_t limit_read(struct file *filp, char __user *buf, size_t count,
 	char lbuf[20];
 	int err;
 
-	if (*pos)
-		return 0;
-
 	err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->limit);
 	if (err < 0)
 		return err;
 
-	if (copy_to_user(buf, lbuf, err))
-		return -EFAULT;
-
-	*pos += err;
-
-	return err;
+	return simple_read_from_buffer(buf, count, pos, lbuf, err);
 }
 
 static const struct file_operations limit_fops = {
@@ -914,7 +898,7 @@ static int mlx5_ib_post_send_wait(struct mlx5_ib_dev *dev,
 				  struct mlx5_umr_wr *umrwr)
 {
 	struct umr_common *umrc = &dev->umrc;
-	struct ib_send_wr *bad;
+	const struct ib_send_wr *bad;
 	int err;
 	struct mlx5_ib_umr_context umr_context;
 
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index a4f1f63..6cba2a0 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -563,32 +563,21 @@ static int alloc_med_class_bfreg(struct mlx5_ib_dev *dev,
 }
 
 static int alloc_bfreg(struct mlx5_ib_dev *dev,
-		       struct mlx5_bfreg_info *bfregi,
-		       enum mlx5_ib_latency_class lat)
+		       struct mlx5_bfreg_info *bfregi)
 {
-	int bfregn = -EINVAL;
+	int bfregn = -ENOMEM;
 
 	mutex_lock(&bfregi->lock);
-	switch (lat) {
-	case MLX5_IB_LATENCY_CLASS_LOW:
+	if (bfregi->ver >= 2) {
+		bfregn = alloc_high_class_bfreg(dev, bfregi);
+		if (bfregn < 0)
+			bfregn = alloc_med_class_bfreg(dev, bfregi);
+	}
+
+	if (bfregn < 0) {
 		BUILD_BUG_ON(NUM_NON_BLUE_FLAME_BFREGS != 1);
 		bfregn = 0;
 		bfregi->count[bfregn]++;
-		break;
-
-	case MLX5_IB_LATENCY_CLASS_MEDIUM:
-		if (bfregi->ver < 2)
-			bfregn = -ENOMEM;
-		else
-			bfregn = alloc_med_class_bfreg(dev, bfregi);
-		break;
-
-	case MLX5_IB_LATENCY_CLASS_HIGH:
-		if (bfregi->ver < 2)
-			bfregn = -ENOMEM;
-		else
-			bfregn = alloc_high_class_bfreg(dev, bfregi);
-		break;
 	}
 	mutex_unlock(&bfregi->lock);
 
@@ -641,13 +630,13 @@ static void mlx5_ib_lock_cqs(struct mlx5_ib_cq *send_cq,
 static void mlx5_ib_unlock_cqs(struct mlx5_ib_cq *send_cq,
 			       struct mlx5_ib_cq *recv_cq);
 
-static int bfregn_to_uar_index(struct mlx5_ib_dev *dev,
-			       struct mlx5_bfreg_info *bfregi, int bfregn,
-			       bool dyn_bfreg)
+int bfregn_to_uar_index(struct mlx5_ib_dev *dev,
+			struct mlx5_bfreg_info *bfregi, u32 bfregn,
+			bool dyn_bfreg)
 {
-	int bfregs_per_sys_page;
-	int index_of_sys_page;
-	int offset;
+	unsigned int bfregs_per_sys_page;
+	u32 index_of_sys_page;
+	u32 offset;
 
 	bfregs_per_sys_page = get_uars_per_sys_page(dev, bfregi->lib_uar_4k) *
 				MLX5_NON_FP_BFREGS_PER_UAR;
@@ -655,6 +644,10 @@ static int bfregn_to_uar_index(struct mlx5_ib_dev *dev,
 
 	if (dyn_bfreg) {
 		index_of_sys_page += bfregi->num_static_sys_pages;
+
+		if (index_of_sys_page >= bfregi->num_sys_pages)
+			return -EINVAL;
+
 		if (bfregn > bfregi->num_dyn_bfregs ||
 		    bfregi->sys_pages[index_of_sys_page] == MLX5_IB_INVALID_UAR_INDEX) {
 			mlx5_ib_dbg(dev, "Invalid dynamic uar index\n");
@@ -819,21 +812,9 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
 		bfregn = MLX5_CROSS_CHANNEL_BFREG;
 	}
 	else {
-		bfregn = alloc_bfreg(dev, &context->bfregi, MLX5_IB_LATENCY_CLASS_HIGH);
-		if (bfregn < 0) {
-			mlx5_ib_dbg(dev, "failed to allocate low latency BFREG\n");
-			mlx5_ib_dbg(dev, "reverting to medium latency\n");
-			bfregn = alloc_bfreg(dev, &context->bfregi, MLX5_IB_LATENCY_CLASS_MEDIUM);
-			if (bfregn < 0) {
-				mlx5_ib_dbg(dev, "failed to allocate medium latency BFREG\n");
-				mlx5_ib_dbg(dev, "reverting to high latency\n");
-				bfregn = alloc_bfreg(dev, &context->bfregi, MLX5_IB_LATENCY_CLASS_LOW);
-				if (bfregn < 0) {
-					mlx5_ib_warn(dev, "bfreg allocation failed\n");
-					return bfregn;
-				}
-			}
-		}
+		bfregn = alloc_bfreg(dev, &context->bfregi);
+		if (bfregn < 0)
+			return bfregn;
 	}
 
 	mlx5_ib_dbg(dev, "bfregn 0x%x, uar_index 0x%x\n", bfregn, uar_index);
@@ -1626,7 +1607,7 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
 	struct mlx5_ib_resources *devr = &dev->devr;
 	int inlen = MLX5_ST_SZ_BYTES(create_qp_in);
 	struct mlx5_core_dev *mdev = dev->mdev;
-	struct mlx5_ib_create_qp_resp resp;
+	struct mlx5_ib_create_qp_resp resp = {};
 	struct mlx5_ib_cq *send_cq;
 	struct mlx5_ib_cq *recv_cq;
 	unsigned long flags;
@@ -2555,18 +2536,16 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
 	if (ah->type == RDMA_AH_ATTR_TYPE_ROCE) {
 		if (!(ah_flags & IB_AH_GRH))
 			return -EINVAL;
-		err = mlx5_get_roce_gid_type(dev, port, grh->sgid_index,
-					     &gid_type);
-		if (err)
-			return err;
+
 		memcpy(path->rmac, ah->roce.dmac, sizeof(ah->roce.dmac));
 		if (qp->ibqp.qp_type == IB_QPT_RC ||
 		    qp->ibqp.qp_type == IB_QPT_UC ||
 		    qp->ibqp.qp_type == IB_QPT_XRC_INI ||
 		    qp->ibqp.qp_type == IB_QPT_XRC_TGT)
-			path->udp_sport = mlx5_get_roce_udp_sport(dev, port,
-								  grh->sgid_index);
+			path->udp_sport =
+				mlx5_get_roce_udp_sport(dev, ah->grh.sgid_attr);
 		path->dci_cfi_prio_sl = (sl & 0x7) << 4;
+		gid_type = ah->grh.sgid_attr->gid_type;
 		if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
 			path->ecn_dscp = (grh->traffic_class >> 2) & 0x3f;
 	} else {
@@ -3529,7 +3508,7 @@ static __always_inline void set_raddr_seg(struct mlx5_wqe_raddr_seg *rseg,
 }
 
 static void *set_eth_seg(struct mlx5_wqe_eth_seg *eseg,
-			 struct ib_send_wr *wr, void *qend,
+			 const struct ib_send_wr *wr, void *qend,
 			 struct mlx5_ib_qp *qp, int *size)
 {
 	void *seg = eseg;
@@ -3582,7 +3561,7 @@ static void *set_eth_seg(struct mlx5_wqe_eth_seg *eseg,
 }
 
 static void set_datagram_seg(struct mlx5_wqe_datagram_seg *dseg,
-			     struct ib_send_wr *wr)
+			     const struct ib_send_wr *wr)
 {
 	memcpy(&dseg->av, &to_mah(ud_wr(wr)->ah)->av, sizeof(struct mlx5_av));
 	dseg->av.dqp_dct = cpu_to_be32(ud_wr(wr)->remote_qpn | MLX5_EXTENDED_UD_AV);
@@ -3730,9 +3709,9 @@ static int umr_check_mkey_mask(struct mlx5_ib_dev *dev, u64 mask)
 
 static int set_reg_umr_segment(struct mlx5_ib_dev *dev,
 			       struct mlx5_wqe_umr_ctrl_seg *umr,
-			       struct ib_send_wr *wr, int atomic)
+			       const struct ib_send_wr *wr, int atomic)
 {
-	struct mlx5_umr_wr *umrwr = umr_wr(wr);
+	const struct mlx5_umr_wr *umrwr = umr_wr(wr);
 
 	memset(umr, 0, sizeof(*umr));
 
@@ -3803,9 +3782,10 @@ static void set_linv_mkey_seg(struct mlx5_mkey_seg *seg)
 	seg->status = MLX5_MKEY_STATUS_FREE;
 }
 
-static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *wr)
+static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg,
+				 const struct ib_send_wr *wr)
 {
-	struct mlx5_umr_wr *umrwr = umr_wr(wr);
+	const struct mlx5_umr_wr *umrwr = umr_wr(wr);
 
 	memset(seg, 0, sizeof(*seg));
 	if (wr->send_flags & MLX5_IB_SEND_UMR_DISABLE_MR)
@@ -3854,7 +3834,7 @@ static void set_reg_umr_inline_seg(void *seg, struct mlx5_ib_qp *qp,
 	seg += mr_list_size;
 }
 
-static __be32 send_ieth(struct ib_send_wr *wr)
+static __be32 send_ieth(const struct ib_send_wr *wr)
 {
 	switch (wr->opcode) {
 	case IB_WR_SEND_WITH_IMM:
@@ -3886,7 +3866,7 @@ static u8 wq_sig(void *wqe)
 	return calc_sig(wqe, (*((u8 *)wqe + 8) & 0x3f) << 4);
 }
 
-static int set_data_inl_seg(struct mlx5_ib_qp *qp, struct ib_send_wr *wr,
+static int set_data_inl_seg(struct mlx5_ib_qp *qp, const struct ib_send_wr *wr,
 			    void *wqe, int *sz)
 {
 	struct mlx5_wqe_inline_seg *seg;
@@ -4032,7 +4012,7 @@ static int mlx5_set_bsf(struct ib_mr *sig_mr,
 	return 0;
 }
 
-static int set_sig_data_segment(struct ib_sig_handover_wr *wr,
+static int set_sig_data_segment(const struct ib_sig_handover_wr *wr,
 				struct mlx5_ib_qp *qp, void **seg, int *size)
 {
 	struct ib_sig_attrs *sig_attrs = wr->sig_attrs;
@@ -4134,7 +4114,7 @@ static int set_sig_data_segment(struct ib_sig_handover_wr *wr,
 }
 
 static void set_sig_mkey_segment(struct mlx5_mkey_seg *seg,
-				 struct ib_sig_handover_wr *wr, u32 size,
+				 const struct ib_sig_handover_wr *wr, u32 size,
 				 u32 length, u32 pdn)
 {
 	struct ib_mr *sig_mr = wr->sig_mr;
@@ -4165,10 +4145,10 @@ static void set_sig_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
 }
 
 
-static int set_sig_umr_wr(struct ib_send_wr *send_wr, struct mlx5_ib_qp *qp,
-			  void **seg, int *size)
+static int set_sig_umr_wr(const struct ib_send_wr *send_wr,
+			  struct mlx5_ib_qp *qp, void **seg, int *size)
 {
-	struct ib_sig_handover_wr *wr = sig_handover_wr(send_wr);
+	const struct ib_sig_handover_wr *wr = sig_handover_wr(send_wr);
 	struct mlx5_ib_mr *sig_mr = to_mmr(wr->sig_mr);
 	u32 pdn = get_pd(qp)->pdn;
 	u32 xlt_size;
@@ -4243,7 +4223,7 @@ static int set_psv_wr(struct ib_sig_domain *domain,
 }
 
 static int set_reg_wr(struct mlx5_ib_qp *qp,
-		      struct ib_reg_wr *wr,
+		      const struct ib_reg_wr *wr,
 		      void **seg, int *size)
 {
 	struct mlx5_ib_mr *mr = to_mmr(wr->mr);
@@ -4314,10 +4294,10 @@ static void dump_wqe(struct mlx5_ib_qp *qp, int idx, int size_16)
 	}
 }
 
-static int begin_wqe(struct mlx5_ib_qp *qp, void **seg,
+static int __begin_wqe(struct mlx5_ib_qp *qp, void **seg,
 		     struct mlx5_wqe_ctrl_seg **ctrl,
-		     struct ib_send_wr *wr, unsigned *idx,
-		     int *size, int nreq)
+		     const struct ib_send_wr *wr, unsigned *idx,
+		     int *size, int nreq, bool send_signaled, bool solicited)
 {
 	if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)))
 		return -ENOMEM;
@@ -4328,10 +4308,8 @@ static int begin_wqe(struct mlx5_ib_qp *qp, void **seg,
 	*(uint32_t *)(*seg + 8) = 0;
 	(*ctrl)->imm = send_ieth(wr);
 	(*ctrl)->fm_ce_se = qp->sq_signal_bits |
-		(wr->send_flags & IB_SEND_SIGNALED ?
-		 MLX5_WQE_CTRL_CQ_UPDATE : 0) |
-		(wr->send_flags & IB_SEND_SOLICITED ?
-		 MLX5_WQE_CTRL_SOLICITED : 0);
+		(send_signaled ? MLX5_WQE_CTRL_CQ_UPDATE : 0) |
+		(solicited ? MLX5_WQE_CTRL_SOLICITED : 0);
 
 	*seg += sizeof(**ctrl);
 	*size = sizeof(**ctrl) / 16;
@@ -4339,6 +4317,16 @@ static int begin_wqe(struct mlx5_ib_qp *qp, void **seg,
 	return 0;
 }
 
+static int begin_wqe(struct mlx5_ib_qp *qp, void **seg,
+		     struct mlx5_wqe_ctrl_seg **ctrl,
+		     const struct ib_send_wr *wr, unsigned *idx,
+		     int *size, int nreq)
+{
+	return __begin_wqe(qp, seg, ctrl, wr, idx, size, nreq,
+			   wr->send_flags & IB_SEND_SIGNALED,
+			   wr->send_flags & IB_SEND_SOLICITED);
+}
+
 static void finish_wqe(struct mlx5_ib_qp *qp,
 		       struct mlx5_wqe_ctrl_seg *ctrl,
 		       u8 size, unsigned idx, u64 wr_id,
@@ -4360,9 +4348,8 @@ static void finish_wqe(struct mlx5_ib_qp *qp,
 	qp->sq.w_list[idx].next = qp->sq.cur_post;
 }
 
-
-int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
-		      struct ib_send_wr **bad_wr)
+static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+			      const struct ib_send_wr **bad_wr, bool drain)
 {
 	struct mlx5_wqe_ctrl_seg *ctrl = NULL;  /* compiler warning */
 	struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
@@ -4393,7 +4380,7 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 
 	spin_lock_irqsave(&qp->sq.lock, flags);
 
-	if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
+	if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR && !drain) {
 		err = -EIO;
 		*bad_wr = wr;
 		nreq = 0;
@@ -4498,10 +4485,8 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 				 * SET_PSV WQEs are not signaled and solicited
 				 * on error
 				 */
-				wr->send_flags &= ~IB_SEND_SIGNALED;
-				wr->send_flags |= IB_SEND_SOLICITED;
-				err = begin_wqe(qp, &seg, &ctrl, wr,
-						&idx, &size, nreq);
+				err = __begin_wqe(qp, &seg, &ctrl, wr, &idx,
+						  &size, nreq, false, true);
 				if (err) {
 					mlx5_ib_warn(dev, "\n");
 					err = -ENOMEM;
@@ -4520,8 +4505,8 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 
 				finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq,
 					   fence, MLX5_OPCODE_SET_PSV);
-				err = begin_wqe(qp, &seg, &ctrl, wr,
-						&idx, &size, nreq);
+				err = __begin_wqe(qp, &seg, &ctrl, wr, &idx,
+						  &size, nreq, false, true);
 				if (err) {
 					mlx5_ib_warn(dev, "\n");
 					err = -ENOMEM;
@@ -4690,13 +4675,19 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 	return err;
 }
 
+int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+		      const struct ib_send_wr **bad_wr)
+{
+	return _mlx5_ib_post_send(ibqp, wr, bad_wr, false);
+}
+
 static void set_sig_seg(struct mlx5_rwqe_sig *sig, int size)
 {
 	sig->signature = calc_sig(sig, size);
 }
 
-int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
-		      struct ib_recv_wr **bad_wr)
+static int _mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+		      const struct ib_recv_wr **bad_wr, bool drain)
 {
 	struct mlx5_ib_qp *qp = to_mqp(ibqp);
 	struct mlx5_wqe_data_seg *scat;
@@ -4714,7 +4705,7 @@ int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
 
 	spin_lock_irqsave(&qp->rq.lock, flags);
 
-	if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
+	if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR && !drain) {
 		err = -EIO;
 		*bad_wr = wr;
 		nreq = 0;
@@ -4776,6 +4767,12 @@ int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
 	return err;
 }
 
+int mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+		      const struct ib_recv_wr **bad_wr)
+{
+	return _mlx5_ib_post_recv(ibqp, wr, bad_wr, false);
+}
+
 static inline enum ib_qp_state to_ib_qp_state(enum mlx5_qp_state mlx5_state)
 {
 	switch (mlx5_state) {
@@ -5365,7 +5362,9 @@ static int set_user_rq_size(struct mlx5_ib_dev *dev,
 
 	rwq->wqe_count = ucmd->rq_wqe_count;
 	rwq->wqe_shift = ucmd->rq_wqe_shift;
-	rwq->buf_size = (rwq->wqe_count << rwq->wqe_shift);
+	if (check_shl_overflow(rwq->wqe_count, rwq->wqe_shift, &rwq->buf_size))
+		return -EINVAL;
+
 	rwq->log_rq_stride = rwq->wqe_shift;
 	rwq->log_rq_size = ilog2(rwq->wqe_count);
 	return 0;
@@ -5697,3 +5696,132 @@ int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
 	kvfree(in);
 	return err;
 }
+
+struct mlx5_ib_drain_cqe {
+	struct ib_cqe cqe;
+	struct completion done;
+};
+
+static void mlx5_ib_drain_qp_done(struct ib_cq *cq, struct ib_wc *wc)
+{
+	struct mlx5_ib_drain_cqe *cqe = container_of(wc->wr_cqe,
+						     struct mlx5_ib_drain_cqe,
+						     cqe);
+
+	complete(&cqe->done);
+}
+
+/* This function returns only once the drained WR was completed */
+static void handle_drain_completion(struct ib_cq *cq,
+				    struct mlx5_ib_drain_cqe *sdrain,
+				    struct mlx5_ib_dev *dev)
+{
+	struct mlx5_core_dev *mdev = dev->mdev;
+
+	if (cq->poll_ctx == IB_POLL_DIRECT) {
+		while (wait_for_completion_timeout(&sdrain->done, HZ / 10) <= 0)
+			ib_process_cq_direct(cq, -1);
+		return;
+	}
+
+	if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
+		struct mlx5_ib_cq *mcq = to_mcq(cq);
+		bool triggered = false;
+		unsigned long flags;
+
+		spin_lock_irqsave(&dev->reset_flow_resource_lock, flags);
+		/* Make sure that the CQ handler won't run if wasn't run yet */
+		if (!mcq->mcq.reset_notify_added)
+			mcq->mcq.reset_notify_added = 1;
+		else
+			triggered = true;
+		spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags);
+
+		if (triggered) {
+			/* Wait for any scheduled/running task to be ended */
+			switch (cq->poll_ctx) {
+			case IB_POLL_SOFTIRQ:
+				irq_poll_disable(&cq->iop);
+				irq_poll_enable(&cq->iop);
+				break;
+			case IB_POLL_WORKQUEUE:
+				cancel_work_sync(&cq->work);
+				break;
+			default:
+				WARN_ON_ONCE(1);
+			}
+		}
+
+		/* Run the CQ handler - this makes sure that the drain WR will
+		 * be processed if wasn't processed yet.
+		 */
+		mcq->mcq.comp(&mcq->mcq);
+	}
+
+	wait_for_completion(&sdrain->done);
+}
+
+void mlx5_ib_drain_sq(struct ib_qp *qp)
+{
+	struct ib_cq *cq = qp->send_cq;
+	struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
+	struct mlx5_ib_drain_cqe sdrain;
+	const struct ib_send_wr *bad_swr;
+	struct ib_rdma_wr swr = {
+		.wr = {
+			.next = NULL,
+			{ .wr_cqe	= &sdrain.cqe, },
+			.opcode	= IB_WR_RDMA_WRITE,
+		},
+	};
+	int ret;
+	struct mlx5_ib_dev *dev = to_mdev(qp->device);
+	struct mlx5_core_dev *mdev = dev->mdev;
+
+	ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
+	if (ret && mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR) {
+		WARN_ONCE(ret, "failed to drain send queue: %d\n", ret);
+		return;
+	}
+
+	sdrain.cqe.done = mlx5_ib_drain_qp_done;
+	init_completion(&sdrain.done);
+
+	ret = _mlx5_ib_post_send(qp, &swr.wr, &bad_swr, true);
+	if (ret) {
+		WARN_ONCE(ret, "failed to drain send queue: %d\n", ret);
+		return;
+	}
+
+	handle_drain_completion(cq, &sdrain, dev);
+}
+
+void mlx5_ib_drain_rq(struct ib_qp *qp)
+{
+	struct ib_cq *cq = qp->recv_cq;
+	struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
+	struct mlx5_ib_drain_cqe rdrain;
+	struct ib_recv_wr rwr = {};
+	const struct ib_recv_wr *bad_rwr;
+	int ret;
+	struct mlx5_ib_dev *dev = to_mdev(qp->device);
+	struct mlx5_core_dev *mdev = dev->mdev;
+
+	ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
+	if (ret && mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR) {
+		WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret);
+		return;
+	}
+
+	rwr.wr_cqe = &rdrain.cqe;
+	rdrain.cqe.done = mlx5_ib_drain_qp_done;
+	init_completion(&rdrain.done);
+
+	ret = _mlx5_ib_post_recv(qp, &rwr, &bad_rwr, true);
+	if (ret) {
+		WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret);
+		return;
+	}
+
+	handle_drain_completion(cq, &rdrain, dev);
+}
diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c
index f5de5ad..d359fec 100644
--- a/drivers/infiniband/hw/mlx5/srq.c
+++ b/drivers/infiniband/hw/mlx5/srq.c
@@ -446,8 +446,8 @@ void mlx5_ib_free_srq_wqe(struct mlx5_ib_srq *srq, int wqe_index)
 	spin_unlock(&srq->lock);
 }
 
-int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
-			  struct ib_recv_wr **bad_wr)
+int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
+			  const struct ib_recv_wr **bad_wr)
 {
 	struct mlx5_ib_srq *srq = to_msrq(ibsrq);
 	struct mlx5_wqe_srq_next_seg *next;
diff --git a/drivers/infiniband/hw/mthca/mthca_av.c b/drivers/infiniband/hw/mthca/mthca_av.c
index e7f6223..0823c0b 100644
--- a/drivers/infiniband/hw/mthca/mthca_av.c
+++ b/drivers/infiniband/hw/mthca/mthca_av.c
@@ -281,10 +281,7 @@ int mthca_read_ah(struct mthca_dev *dev, struct mthca_ah *ah,
 		header->grh.flow_label    =
 			ah->av->sl_tclass_flowlabel & cpu_to_be32(0xfffff);
 		header->grh.hop_limit     = ah->av->hop_limit;
-		ib_get_cached_gid(&dev->ib_dev,
-				  be32_to_cpu(ah->av->port_pd) >> 24,
-				  ah->av->gid_index % dev->limits.gid_table_len,
-				  &header->grh.source_gid, NULL);
+		header->grh.source_gid = ah->ibah.sgid_attr->gid;
 		memcpy(header->grh.destination_gid.raw,
 		       ah->av->dgid, 16);
 	}
diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h
index 5508afb..220a3e4 100644
--- a/drivers/infiniband/hw/mthca/mthca_dev.h
+++ b/drivers/infiniband/hw/mthca/mthca_dev.h
@@ -519,10 +519,10 @@ int mthca_max_srq_sge(struct mthca_dev *dev);
 void mthca_srq_event(struct mthca_dev *dev, u32 srqn,
 		     enum ib_event_type event_type);
 void mthca_free_srq_wqe(struct mthca_srq *srq, u32 wqe_addr);
-int mthca_tavor_post_srq_recv(struct ib_srq *srq, struct ib_recv_wr *wr,
-			      struct ib_recv_wr **bad_wr);
-int mthca_arbel_post_srq_recv(struct ib_srq *srq, struct ib_recv_wr *wr,
-			      struct ib_recv_wr **bad_wr);
+int mthca_tavor_post_srq_recv(struct ib_srq *srq, const struct ib_recv_wr *wr,
+			      const struct ib_recv_wr **bad_wr);
+int mthca_arbel_post_srq_recv(struct ib_srq *srq, const struct ib_recv_wr *wr,
+			      const struct ib_recv_wr **bad_wr);
 
 void mthca_qp_event(struct mthca_dev *dev, u32 qpn,
 		    enum ib_event_type event_type);
@@ -530,14 +530,14 @@ int mthca_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_m
 		   struct ib_qp_init_attr *qp_init_attr);
 int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
 		    struct ib_udata *udata);
-int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
-			  struct ib_send_wr **bad_wr);
-int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
-			     struct ib_recv_wr **bad_wr);
-int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
-			  struct ib_send_wr **bad_wr);
-int mthca_arbel_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
-			     struct ib_recv_wr **bad_wr);
+int mthca_tavor_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+			  const struct ib_send_wr **bad_wr);
+int mthca_tavor_post_receive(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+			     const struct ib_recv_wr **bad_wr);
+int mthca_arbel_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+			  const struct ib_send_wr **bad_wr);
+int mthca_arbel_post_receive(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+			     const struct ib_recv_wr **bad_wr);
 void mthca_free_err_wqe(struct mthca_dev *dev, struct mthca_qp *qp, int is_send,
 			int index, int *dbd, __be32 *new_wqe);
 int mthca_alloc_qp(struct mthca_dev *dev,
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index 541f237..0d3473b 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -96,8 +96,9 @@ static int mthca_query_device(struct ib_device *ibdev, struct ib_device_attr *pr
 	props->page_size_cap       = mdev->limits.page_size_cap;
 	props->max_qp              = mdev->limits.num_qps - mdev->limits.reserved_qps;
 	props->max_qp_wr           = mdev->limits.max_wqes;
-	props->max_sge             = mdev->limits.max_sg;
-	props->max_sge_rd          = props->max_sge;
+	props->max_send_sge        = mdev->limits.max_sg;
+	props->max_recv_sge        = mdev->limits.max_sg;
+	props->max_sge_rd          = mdev->limits.max_sg;
 	props->max_cq              = mdev->limits.num_cqs - mdev->limits.reserved_cqs;
 	props->max_cqe             = mdev->limits.max_cqes;
 	props->max_mr              = mdev->limits.num_mpts - mdev->limits.reserved_mrws;
@@ -448,7 +449,7 @@ static struct ib_srq *mthca_create_srq(struct ib_pd *pd,
 	int err;
 
 	if (init_attr->srq_type != IB_SRQT_BASIC)
-		return ERR_PTR(-ENOSYS);
+		return ERR_PTR(-EOPNOTSUPP);
 
 	srq = kmalloc(sizeof *srq, GFP_KERNEL);
 	if (!srq)
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index af1c49d..3d37f23 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -1488,7 +1488,7 @@ void mthca_free_qp(struct mthca_dev *dev,
 
 /* Create UD header for an MLX send and build a data segment for it */
 static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp,
-			    int ind, struct ib_ud_wr *wr,
+			    int ind, const struct ib_ud_wr *wr,
 			    struct mthca_mlx_seg *mlx,
 			    struct mthca_data_seg *data)
 {
@@ -1581,7 +1581,7 @@ static __always_inline void set_raddr_seg(struct mthca_raddr_seg *rseg,
 }
 
 static __always_inline void set_atomic_seg(struct mthca_atomic_seg *aseg,
-					   struct ib_atomic_wr *wr)
+					   const struct ib_atomic_wr *wr)
 {
 	if (wr->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
 		aseg->swap_add = cpu_to_be64(wr->swap);
@@ -1594,7 +1594,7 @@ static __always_inline void set_atomic_seg(struct mthca_atomic_seg *aseg,
 }
 
 static void set_tavor_ud_seg(struct mthca_tavor_ud_seg *useg,
-			     struct ib_ud_wr *wr)
+			     const struct ib_ud_wr *wr)
 {
 	useg->lkey    = cpu_to_be32(to_mah(wr->ah)->key);
 	useg->av_addr =	cpu_to_be64(to_mah(wr->ah)->avdma);
@@ -1604,15 +1604,15 @@ static void set_tavor_ud_seg(struct mthca_tavor_ud_seg *useg,
 }
 
 static void set_arbel_ud_seg(struct mthca_arbel_ud_seg *useg,
-			     struct ib_ud_wr *wr)
+			     const struct ib_ud_wr *wr)
 {
 	memcpy(useg->av, to_mah(wr->ah)->av, MTHCA_AV_SIZE);
 	useg->dqpn = cpu_to_be32(wr->remote_qpn);
 	useg->qkey = cpu_to_be32(wr->remote_qkey);
 }
 
-int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
-			  struct ib_send_wr **bad_wr)
+int mthca_tavor_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+			  const struct ib_send_wr **bad_wr)
 {
 	struct mthca_dev *dev = to_mdev(ibqp->device);
 	struct mthca_qp *qp = to_mqp(ibqp);
@@ -1814,8 +1814,8 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 	return err;
 }
 
-int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
-			     struct ib_recv_wr **bad_wr)
+int mthca_tavor_post_receive(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+			     const struct ib_recv_wr **bad_wr)
 {
 	struct mthca_dev *dev = to_mdev(ibqp->device);
 	struct mthca_qp *qp = to_mqp(ibqp);
@@ -1925,8 +1925,8 @@ int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
 	return err;
 }
 
-int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
-			  struct ib_send_wr **bad_wr)
+int mthca_arbel_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+			  const struct ib_send_wr **bad_wr)
 {
 	struct mthca_dev *dev = to_mdev(ibqp->device);
 	struct mthca_qp *qp = to_mqp(ibqp);
@@ -2165,8 +2165,8 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 	return err;
 }
 
-int mthca_arbel_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
-			     struct ib_recv_wr **bad_wr)
+int mthca_arbel_post_receive(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+			     const struct ib_recv_wr **bad_wr)
 {
 	struct mthca_dev *dev = to_mdev(ibqp->device);
 	struct mthca_qp *qp = to_mqp(ibqp);
diff --git a/drivers/infiniband/hw/mthca/mthca_srq.c b/drivers/infiniband/hw/mthca/mthca_srq.c
index f79732b..9a3fc6f 100644
--- a/drivers/infiniband/hw/mthca/mthca_srq.c
+++ b/drivers/infiniband/hw/mthca/mthca_srq.c
@@ -472,8 +472,8 @@ void mthca_free_srq_wqe(struct mthca_srq *srq, u32 wqe_addr)
 	spin_unlock(&srq->lock);
 }
 
-int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
-			      struct ib_recv_wr **bad_wr)
+int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
+			      const struct ib_recv_wr **bad_wr)
 {
 	struct mthca_dev *dev = to_mdev(ibsrq->device);
 	struct mthca_srq *srq = to_msrq(ibsrq);
@@ -572,8 +572,8 @@ int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
 	return err;
 }
 
-int mthca_arbel_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
-			      struct ib_recv_wr **bad_wr)
+int mthca_arbel_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
+			      const struct ib_recv_wr **bad_wr)
 {
 	struct mthca_dev *dev = to_mdev(ibsrq->device);
 	struct mthca_srq *srq = to_msrq(ibsrq);
diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h
index 00c27291..bedaa02 100644
--- a/drivers/infiniband/hw/nes/nes.h
+++ b/drivers/infiniband/hw/nes/nes.h
@@ -159,7 +159,7 @@ do { \
 
 #define NES_EVENT_TIMEOUT   1200000
 #else
-#define nes_debug(level, fmt, args...)
+#define nes_debug(level, fmt, args...) no_printk(fmt, ##args)
 #define assert(expr)          do {} while (0)
 
 #define NES_EVENT_TIMEOUT   100000
diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c
index 6cdfbf8..2b67ace 100644
--- a/drivers/infiniband/hw/nes/nes_cm.c
+++ b/drivers/infiniband/hw/nes/nes_cm.c
@@ -58,6 +58,7 @@
 #include <net/neighbour.h>
 #include <net/route.h>
 #include <net/ip_fib.h>
+#include <net/secure_seq.h>
 #include <net/tcp.h>
 #include <linux/fcntl.h>
 
@@ -1445,7 +1446,6 @@ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core,
 					struct nes_cm_listener *listener)
 {
 	struct nes_cm_node *cm_node;
-	struct timespec ts;
 	int oldarpindex = 0;
 	int arpindex = 0;
 	struct nes_device *nesdev;
@@ -1496,8 +1496,10 @@ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core,
 	cm_node->tcp_cntxt.rcv_wscale = NES_CM_DEFAULT_RCV_WND_SCALE;
 	cm_node->tcp_cntxt.rcv_wnd = NES_CM_DEFAULT_RCV_WND_SCALED >>
 				     NES_CM_DEFAULT_RCV_WND_SCALE;
-	ts = current_kernel_time();
-	cm_node->tcp_cntxt.loc_seq_num = htonl(ts.tv_nsec);
+	cm_node->tcp_cntxt.loc_seq_num = secure_tcp_seq(htonl(cm_node->loc_addr),
+							htonl(cm_node->rem_addr),
+							htons(cm_node->loc_port),
+							htons(cm_node->rem_port));
 	cm_node->tcp_cntxt.mss = nesvnic->max_frame_size - sizeof(struct iphdr) -
 				 sizeof(struct tcphdr) - ETH_HLEN - VLAN_HLEN;
 	cm_node->tcp_cntxt.rcv_nxt = 0;
diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c
index 18a7de1..bd0675d 100644
--- a/drivers/infiniband/hw/nes/nes_hw.c
+++ b/drivers/infiniband/hw/nes/nes_hw.c
@@ -70,8 +70,7 @@ static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number);
 static unsigned int nes_reset_adapter_ne020(struct nes_device *nesdev, u8 *OneG_Mode);
 static void nes_terminate_start_timer(struct nes_qp *nesqp);
 
-#ifdef CONFIG_INFINIBAND_NES_DEBUG
-static unsigned char *nes_iwarp_state_str[] = {
+static const char *const nes_iwarp_state_str[] = {
 	"Non-Existent",
 	"Idle",
 	"RTS",
@@ -82,7 +81,7 @@ static unsigned char *nes_iwarp_state_str[] = {
 	"RSVD2",
 };
 
-static unsigned char *nes_tcp_state_str[] = {
+static const char *const nes_tcp_state_str[] = {
 	"Non-Existent",
 	"Closed",
 	"Listen",
@@ -100,7 +99,6 @@ static unsigned char *nes_tcp_state_str[] = {
 	"RSVD3",
 	"RSVD4",
 };
-#endif
 
 static inline void print_ip(struct nes_cm_node *cm_node)
 {
diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c
index 32f2655..6940c72 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ b/drivers/infiniband/hw/nes/nes_verbs.c
@@ -436,7 +436,8 @@ static int nes_query_device(struct ib_device *ibdev, struct ib_device_attr *prop
 	props->max_mr_size = 0x80000000;
 	props->max_qp = nesibdev->max_qp;
 	props->max_qp_wr = nesdev->nesadapter->max_qp_wr - 2;
-	props->max_sge = nesdev->nesadapter->max_sge;
+	props->max_send_sge = nesdev->nesadapter->max_sge;
+	props->max_recv_sge = nesdev->nesadapter->max_sge;
 	props->max_cq = nesibdev->max_cq;
 	props->max_cqe = nesdev->nesadapter->max_cqe;
 	props->max_mr = nesibdev->max_mr;
@@ -754,26 +755,6 @@ static int nes_dealloc_pd(struct ib_pd *ibpd)
 
 
 /**
- * nes_create_ah
- */
-static struct ib_ah *nes_create_ah(struct ib_pd *pd,
-				   struct rdma_ah_attr *ah_attr,
-				   struct ib_udata *udata)
-{
-	return ERR_PTR(-ENOSYS);
-}
-
-
-/**
- * nes_destroy_ah
- */
-static int nes_destroy_ah(struct ib_ah *ah)
-{
-	return -ENOSYS;
-}
-
-
-/**
  * nes_get_encoded_size
  */
 static inline u8 nes_get_encoded_size(int *size)
@@ -3004,42 +2985,9 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 	return err;
 }
 
-
-/**
- * nes_muticast_attach
- */
-static int nes_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
-{
-	nes_debug(NES_DBG_INIT, "\n");
-	return -ENOSYS;
-}
-
-
-/**
- * nes_multicast_detach
- */
-static int nes_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
-{
-	nes_debug(NES_DBG_INIT, "\n");
-	return -ENOSYS;
-}
-
-
-/**
- * nes_process_mad
- */
-static int nes_process_mad(struct ib_device *ibdev, int mad_flags,
-		u8 port_num, const struct ib_wc *in_wc, const struct ib_grh *in_grh,
-		const struct ib_mad_hdr *in, size_t in_mad_size,
-		struct ib_mad_hdr *out, size_t *out_mad_size,
-		u16 *out_mad_pkey_index)
-{
-	nes_debug(NES_DBG_INIT, "\n");
-	return -ENOSYS;
-}
-
 static inline void
-fill_wqe_sg_send(struct nes_hw_qp_wqe *wqe, struct ib_send_wr *ib_wr, u32 uselkey)
+fill_wqe_sg_send(struct nes_hw_qp_wqe *wqe, const struct ib_send_wr *ib_wr,
+		 u32 uselkey)
 {
 	int sge_index;
 	int total_payload_length = 0;
@@ -3065,8 +3013,8 @@ fill_wqe_sg_send(struct nes_hw_qp_wqe *wqe, struct ib_send_wr *ib_wr, u32 uselke
 /**
  * nes_post_send
  */
-static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
-		struct ib_send_wr **bad_wr)
+static int nes_post_send(struct ib_qp *ibqp, const struct ib_send_wr *ib_wr,
+			 const struct ib_send_wr **bad_wr)
 {
 	u64 u64temp;
 	unsigned long flags = 0;
@@ -3327,8 +3275,8 @@ static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
 /**
  * nes_post_recv
  */
-static int nes_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *ib_wr,
-		struct ib_recv_wr **bad_wr)
+static int nes_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *ib_wr,
+			 const struct ib_recv_wr **bad_wr)
 {
 	u64 u64temp;
 	unsigned long flags = 0;
@@ -3735,8 +3683,6 @@ struct nes_ib_device *nes_init_ofa_device(struct net_device *netdev)
 	nesibdev->ibdev.mmap = nes_mmap;
 	nesibdev->ibdev.alloc_pd = nes_alloc_pd;
 	nesibdev->ibdev.dealloc_pd = nes_dealloc_pd;
-	nesibdev->ibdev.create_ah = nes_create_ah;
-	nesibdev->ibdev.destroy_ah = nes_destroy_ah;
 	nesibdev->ibdev.create_qp = nes_create_qp;
 	nesibdev->ibdev.modify_qp = nes_modify_qp;
 	nesibdev->ibdev.query_qp = nes_query_qp;
@@ -3753,10 +3699,6 @@ struct nes_ib_device *nes_init_ofa_device(struct net_device *netdev)
 	nesibdev->ibdev.alloc_mr = nes_alloc_mr;
 	nesibdev->ibdev.map_mr_sg = nes_map_mr_sg;
 
-	nesibdev->ibdev.attach_mcast = nes_multicast_attach;
-	nesibdev->ibdev.detach_mcast = nes_multicast_detach;
-	nesibdev->ibdev.process_mad = nes_process_mad;
-
 	nesibdev->ibdev.req_notify_cq = nes_req_notify_cq;
 	nesibdev->ibdev.post_send = nes_post_send;
 	nesibdev->ibdev.post_recv = nes_post_recv;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
index 3897b645..58188fe 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
@@ -71,7 +71,7 @@ static u16 ocrdma_hdr_type_to_proto_num(int devid, u8 hdr_type)
 }
 
 static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah,
-			struct rdma_ah_attr *attr, union ib_gid *sgid,
+			struct rdma_ah_attr *attr, const union ib_gid *sgid,
 			int pdid, bool *isvlan, u16 vlan_tag)
 {
 	int status;
@@ -164,17 +164,14 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr,
 	struct ocrdma_ah *ah;
 	bool isvlan = false;
 	u16 vlan_tag = 0xffff;
-	struct ib_gid_attr sgid_attr;
+	const struct ib_gid_attr *sgid_attr;
 	struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
 	struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
-	const struct ib_global_route *grh;
-	union ib_gid sgid;
 
 	if ((attr->type != RDMA_AH_ATTR_TYPE_ROCE) ||
 	    !(rdma_ah_get_ah_flags(attr) & IB_AH_GRH))
 		return ERR_PTR(-EINVAL);
 
-	grh = rdma_ah_read_grh(attr);
 	if (atomic_cmpxchg(&dev->update_sl, 1, 0))
 		ocrdma_init_service_level(dev);
 
@@ -186,20 +183,15 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr,
 	if (status)
 		goto av_err;
 
-	status = ib_get_cached_gid(&dev->ibdev, 1, grh->sgid_index, &sgid,
-				   &sgid_attr);
-	if (status) {
-		pr_err("%s(): Failed to query sgid, status = %d\n",
-		      __func__, status);
-		goto av_conf_err;
-	}
-	if (is_vlan_dev(sgid_attr.ndev))
-		vlan_tag = vlan_dev_vlan_id(sgid_attr.ndev);
-	dev_put(sgid_attr.ndev);
-	/* Get network header type for this GID */
-	ah->hdr_type = ib_gid_to_network_type(sgid_attr.gid_type, &sgid);
+	sgid_attr = attr->grh.sgid_attr;
+	if (is_vlan_dev(sgid_attr->ndev))
+		vlan_tag = vlan_dev_vlan_id(sgid_attr->ndev);
 
-	status = set_av_attr(dev, ah, attr, &sgid, pd->id, &isvlan, vlan_tag);
+	/* Get network header type for this GID */
+	ah->hdr_type = rdma_gid_attr_network_type(sgid_attr);
+
+	status = set_av_attr(dev, ah, attr, &sgid_attr->gid, pd->id,
+			     &isvlan, vlan_tag);
 	if (status)
 		goto av_conf_err;
 
@@ -262,12 +254,6 @@ int ocrdma_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr)
 	return 0;
 }
 
-int ocrdma_modify_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr)
-{
-	/* modify_ah is unsupported */
-	return -ENOSYS;
-}
-
 int ocrdma_process_mad(struct ib_device *ibdev,
 		       int process_mad_flags,
 		       u8 port_num,
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h
index 1a65c47..c0c32c9 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h
@@ -55,7 +55,6 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr,
 			       struct ib_udata *udata);
 int ocrdma_destroy_ah(struct ib_ah *ah);
 int ocrdma_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
-int ocrdma_modify_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
 
 int ocrdma_process_mad(struct ib_device *,
 		       int process_mad_flags,
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
index 6c136e5..e578281 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
@@ -1365,8 +1365,9 @@ static int ocrdma_mbx_get_ctrl_attribs(struct ocrdma_dev *dev)
 		dev->hba_port_num = (hba_attribs->ptpnum_maxdoms_hbast_cv &
 					OCRDMA_HBA_ATTRB_PTNUM_MASK)
 					>> OCRDMA_HBA_ATTRB_PTNUM_SHIFT;
-		strncpy(dev->model_number,
-			hba_attribs->controller_model_number, 31);
+		strlcpy(dev->model_number,
+			hba_attribs->controller_model_number,
+			sizeof(dev->model_number));
 	}
 	dma_free_coherent(&dev->nic_info.pdev->dev, dma.size, dma.va, dma.pa);
 free_mqe:
@@ -2494,8 +2495,7 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp,
 {
 	int status;
 	struct rdma_ah_attr *ah_attr = &attrs->ah_attr;
-	union ib_gid sgid;
-	struct ib_gid_attr sgid_attr;
+	const struct ib_gid_attr *sgid_attr;
 	u32 vlan_id = 0xFFFF;
 	u8 mac_addr[6], hdr_type;
 	union {
@@ -2525,25 +2525,23 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp,
 	memcpy(&cmd->params.dgid[0], &grh->dgid.raw[0],
 	       sizeof(cmd->params.dgid));
 
-	status = ib_get_cached_gid(&dev->ibdev, 1, grh->sgid_index,
-				   &sgid, &sgid_attr);
-	if (!status) {
-		vlan_id = rdma_vlan_dev_vlan_id(sgid_attr.ndev);
-		memcpy(mac_addr, sgid_attr.ndev->dev_addr, ETH_ALEN);
-		dev_put(sgid_attr.ndev);
-	}
+	sgid_attr = ah_attr->grh.sgid_attr;
+	vlan_id = rdma_vlan_dev_vlan_id(sgid_attr->ndev);
+	memcpy(mac_addr, sgid_attr->ndev->dev_addr, ETH_ALEN);
 
 	qp->sgid_idx = grh->sgid_index;
-	memcpy(&cmd->params.sgid[0], &sgid.raw[0], sizeof(cmd->params.sgid));
+	memcpy(&cmd->params.sgid[0], &sgid_attr->gid.raw[0],
+	       sizeof(cmd->params.sgid));
 	status = ocrdma_resolve_dmac(dev, ah_attr, &mac_addr[0]);
 	if (status)
 		return status;
+
 	cmd->params.dmac_b0_to_b3 = mac_addr[0] | (mac_addr[1] << 8) |
 				(mac_addr[2] << 16) | (mac_addr[3] << 24);
 
-	hdr_type = ib_gid_to_network_type(sgid_attr.gid_type, &sgid);
+	hdr_type = rdma_gid_attr_network_type(sgid_attr);
 	if (hdr_type == RDMA_NETWORK_IPV4) {
-		rdma_gid2ip(&sgid_addr._sockaddr, &sgid);
+		rdma_gid2ip(&sgid_addr._sockaddr, &sgid_attr->gid);
 		rdma_gid2ip(&dgid_addr._sockaddr, &grh->dgid);
 		memcpy(&cmd->params.dgid[0],
 		       &dgid_addr._sockaddr_in.sin_addr.s_addr, 4);
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
index 5962c0e..7832ee3 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
@@ -176,7 +176,6 @@ static int ocrdma_register_device(struct ocrdma_dev *dev)
 	dev->ibdev.create_ah = ocrdma_create_ah;
 	dev->ibdev.destroy_ah = ocrdma_destroy_ah;
 	dev->ibdev.query_ah = ocrdma_query_ah;
-	dev->ibdev.modify_ah = ocrdma_modify_ah;
 
 	dev->ibdev.poll_cq = ocrdma_poll_cq;
 	dev->ibdev.post_send = ocrdma_post_send;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index 82e20fc..c158ca9 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -89,7 +89,8 @@ int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr,
 					IB_DEVICE_SYS_IMAGE_GUID |
 					IB_DEVICE_LOCAL_DMA_LKEY |
 					IB_DEVICE_MEM_MGT_EXTENSIONS;
-	attr->max_sge = min(dev->attr.max_send_sge, dev->attr.max_recv_sge);
+	attr->max_send_sge = dev->attr.max_send_sge;
+	attr->max_recv_sge = dev->attr.max_recv_sge;
 	attr->max_sge_rd = dev->attr.max_rdma_sge;
 	attr->max_cq = dev->attr.max_cq;
 	attr->max_cqe = dev->attr.max_cqe;
@@ -196,11 +197,10 @@ int ocrdma_query_port(struct ib_device *ibdev,
 	props->sm_lid = 0;
 	props->sm_sl = 0;
 	props->state = port_state;
-	props->port_cap_flags =
-	    IB_PORT_CM_SUP |
-	    IB_PORT_REINIT_SUP |
-	    IB_PORT_DEVICE_MGMT_SUP | IB_PORT_VENDOR_CLASS_SUP |
-	    IB_PORT_IP_BASED_GIDS;
+	props->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_REINIT_SUP |
+				IB_PORT_DEVICE_MGMT_SUP |
+				IB_PORT_VENDOR_CLASS_SUP;
+	props->ip_gids = true;
 	props->gid_tbl_len = OCRDMA_MAX_SGID;
 	props->pkey_tbl_len = 1;
 	props->bad_pkey_cntr = 0;
@@ -1774,13 +1774,13 @@ int ocrdma_destroy_qp(struct ib_qp *ibqp)
 	 * protect against proessing in-flight CQEs for this QP.
 	 */
 	spin_lock_irqsave(&qp->sq_cq->cq_lock, flags);
-	if (qp->rq_cq && (qp->rq_cq != qp->sq_cq))
+	if (qp->rq_cq && (qp->rq_cq != qp->sq_cq)) {
 		spin_lock(&qp->rq_cq->cq_lock);
-
-	ocrdma_del_qpn_map(dev, qp);
-
-	if (qp->rq_cq && (qp->rq_cq != qp->sq_cq))
+		ocrdma_del_qpn_map(dev, qp);
 		spin_unlock(&qp->rq_cq->cq_lock);
+	} else {
+		ocrdma_del_qpn_map(dev, qp);
+	}
 	spin_unlock_irqrestore(&qp->sq_cq->cq_lock, flags);
 
 	if (!pd->uctx) {
@@ -1953,7 +1953,7 @@ int ocrdma_destroy_srq(struct ib_srq *ibsrq)
 /* unprivileged verbs and their support functions. */
 static void ocrdma_build_ud_hdr(struct ocrdma_qp *qp,
 				struct ocrdma_hdr_wqe *hdr,
-				struct ib_send_wr *wr)
+				const struct ib_send_wr *wr)
 {
 	struct ocrdma_ewqe_ud_hdr *ud_hdr =
 		(struct ocrdma_ewqe_ud_hdr *)(hdr + 1);
@@ -2000,7 +2000,7 @@ static inline uint32_t ocrdma_sglist_len(struct ib_sge *sg_list, int num_sge)
 static int ocrdma_build_inline_sges(struct ocrdma_qp *qp,
 				    struct ocrdma_hdr_wqe *hdr,
 				    struct ocrdma_sge *sge,
-				    struct ib_send_wr *wr, u32 wqe_size)
+				    const struct ib_send_wr *wr, u32 wqe_size)
 {
 	int i;
 	char *dpp_addr;
@@ -2038,7 +2038,7 @@ static int ocrdma_build_inline_sges(struct ocrdma_qp *qp,
 }
 
 static int ocrdma_build_send(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr,
-			     struct ib_send_wr *wr)
+			     const struct ib_send_wr *wr)
 {
 	int status;
 	struct ocrdma_sge *sge;
@@ -2057,7 +2057,7 @@ static int ocrdma_build_send(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr,
 }
 
 static int ocrdma_build_write(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr,
-			      struct ib_send_wr *wr)
+			      const struct ib_send_wr *wr)
 {
 	int status;
 	struct ocrdma_sge *ext_rw = (struct ocrdma_sge *)(hdr + 1);
@@ -2075,7 +2075,7 @@ static int ocrdma_build_write(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr,
 }
 
 static void ocrdma_build_read(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr,
-			      struct ib_send_wr *wr)
+			      const struct ib_send_wr *wr)
 {
 	struct ocrdma_sge *ext_rw = (struct ocrdma_sge *)(hdr + 1);
 	struct ocrdma_sge *sge = ext_rw + 1;
@@ -2105,7 +2105,7 @@ static int get_encoded_page_size(int pg_sz)
 
 static int ocrdma_build_reg(struct ocrdma_qp *qp,
 			    struct ocrdma_hdr_wqe *hdr,
-			    struct ib_reg_wr *wr)
+			    const struct ib_reg_wr *wr)
 {
 	u64 fbo;
 	struct ocrdma_ewqe_fr *fast_reg = (struct ocrdma_ewqe_fr *)(hdr + 1);
@@ -2166,8 +2166,8 @@ static void ocrdma_ring_sq_db(struct ocrdma_qp *qp)
 	iowrite32(val, qp->sq_db);
 }
 
-int ocrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
-		     struct ib_send_wr **bad_wr)
+int ocrdma_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+		     const struct ib_send_wr **bad_wr)
 {
 	int status = 0;
 	struct ocrdma_qp *qp = get_ocrdma_qp(ibqp);
@@ -2278,8 +2278,8 @@ static void ocrdma_ring_rq_db(struct ocrdma_qp *qp)
 	iowrite32(val, qp->rq_db);
 }
 
-static void ocrdma_build_rqe(struct ocrdma_hdr_wqe *rqe, struct ib_recv_wr *wr,
-			     u16 tag)
+static void ocrdma_build_rqe(struct ocrdma_hdr_wqe *rqe,
+			     const struct ib_recv_wr *wr, u16 tag)
 {
 	u32 wqe_size = 0;
 	struct ocrdma_sge *sge;
@@ -2299,8 +2299,8 @@ static void ocrdma_build_rqe(struct ocrdma_hdr_wqe *rqe, struct ib_recv_wr *wr,
 	ocrdma_cpu_to_le32(rqe, wqe_size);
 }
 
-int ocrdma_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
-		     struct ib_recv_wr **bad_wr)
+int ocrdma_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+		     const struct ib_recv_wr **bad_wr)
 {
 	int status = 0;
 	unsigned long flags;
@@ -2369,8 +2369,8 @@ static void ocrdma_ring_srq_db(struct ocrdma_srq *srq)
 	iowrite32(val, srq->db + OCRDMA_DB_GEN2_SRQ_OFFSET);
 }
 
-int ocrdma_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
-			 struct ib_recv_wr **bad_wr)
+int ocrdma_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
+			 const struct ib_recv_wr **bad_wr)
 {
 	int status = 0;
 	unsigned long flags;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
index 9a99717..b69cfdce 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
@@ -43,10 +43,10 @@
 #ifndef __OCRDMA_VERBS_H__
 #define __OCRDMA_VERBS_H__
 
-int ocrdma_post_send(struct ib_qp *, struct ib_send_wr *,
-		     struct ib_send_wr **bad_wr);
-int ocrdma_post_recv(struct ib_qp *, struct ib_recv_wr *,
-		     struct ib_recv_wr **bad_wr);
+int ocrdma_post_send(struct ib_qp *, const struct ib_send_wr *,
+		     const struct ib_send_wr **bad_wr);
+int ocrdma_post_recv(struct ib_qp *, const struct ib_recv_wr *,
+		     const struct ib_recv_wr **bad_wr);
 
 int ocrdma_poll_cq(struct ib_cq *, int num_entries, struct ib_wc *wc);
 int ocrdma_arm_cq(struct ib_cq *, enum ib_cq_notify_flags flags);
@@ -100,8 +100,8 @@ int ocrdma_modify_srq(struct ib_srq *, struct ib_srq_attr *,
 		      enum ib_srq_attr_mask, struct ib_udata *);
 int ocrdma_query_srq(struct ib_srq *, struct ib_srq_attr *);
 int ocrdma_destroy_srq(struct ib_srq *);
-int ocrdma_post_srq_recv(struct ib_srq *, struct ib_recv_wr *,
-			 struct ib_recv_wr **bad_recv_wr);
+int ocrdma_post_srq_recv(struct ib_srq *, const struct ib_recv_wr *,
+			 const struct ib_recv_wr **bad_recv_wr);
 
 int ocrdma_dereg_mr(struct ib_mr *);
 struct ib_mr *ocrdma_get_dma_mr(struct ib_pd *, int acc);
diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c
index ad22b32..a0af6d4 100644
--- a/drivers/infiniband/hw/qedr/main.c
+++ b/drivers/infiniband/hw/qedr/main.c
@@ -191,6 +191,11 @@ static int qedr_register_device(struct qedr_dev *dev)
 				     QEDR_UVERBS(MODIFY_QP) |
 				     QEDR_UVERBS(QUERY_QP) |
 				     QEDR_UVERBS(DESTROY_QP) |
+				     QEDR_UVERBS(CREATE_SRQ) |
+				     QEDR_UVERBS(DESTROY_SRQ) |
+				     QEDR_UVERBS(QUERY_SRQ) |
+				     QEDR_UVERBS(MODIFY_SRQ) |
+				     QEDR_UVERBS(POST_SRQ_RECV) |
 				     QEDR_UVERBS(REG_MR) |
 				     QEDR_UVERBS(DEREG_MR) |
 				     QEDR_UVERBS(POLL_CQ) |
@@ -229,6 +234,11 @@ static int qedr_register_device(struct qedr_dev *dev)
 	dev->ibdev.query_qp = qedr_query_qp;
 	dev->ibdev.destroy_qp = qedr_destroy_qp;
 
+	dev->ibdev.create_srq = qedr_create_srq;
+	dev->ibdev.destroy_srq = qedr_destroy_srq;
+	dev->ibdev.modify_srq = qedr_modify_srq;
+	dev->ibdev.query_srq = qedr_query_srq;
+	dev->ibdev.post_srq_recv = qedr_post_srq_recv;
 	dev->ibdev.query_pkey = qedr_query_pkey;
 
 	dev->ibdev.create_ah = qedr_create_ah;
@@ -325,8 +335,8 @@ static int qedr_alloc_resources(struct qedr_dev *dev)
 	spin_lock_init(&dev->sgid_lock);
 
 	if (IS_IWARP(dev)) {
-		spin_lock_init(&dev->idr_lock);
-		idr_init(&dev->qpidr);
+		spin_lock_init(&dev->qpidr.idr_lock);
+		idr_init(&dev->qpidr.idr);
 		dev->iwarp_wq = create_singlethread_workqueue("qedr_iwarpq");
 	}
 
@@ -653,42 +663,70 @@ static void qedr_affiliated_event(void *context, u8 e_code, void *fw_handle)
 #define EVENT_TYPE_NOT_DEFINED	0
 #define EVENT_TYPE_CQ		1
 #define EVENT_TYPE_QP		2
+#define EVENT_TYPE_SRQ		3
 	struct qedr_dev *dev = (struct qedr_dev *)context;
 	struct regpair *async_handle = (struct regpair *)fw_handle;
 	u64 roce_handle64 = ((u64) async_handle->hi << 32) + async_handle->lo;
 	u8 event_type = EVENT_TYPE_NOT_DEFINED;
 	struct ib_event event;
+	struct ib_srq *ibsrq;
+	struct qedr_srq *srq;
+	unsigned long flags;
 	struct ib_cq *ibcq;
 	struct ib_qp *ibqp;
 	struct qedr_cq *cq;
 	struct qedr_qp *qp;
+	u16 srq_id;
 
-	switch (e_code) {
-	case ROCE_ASYNC_EVENT_CQ_OVERFLOW_ERR:
-		event.event = IB_EVENT_CQ_ERR;
-		event_type = EVENT_TYPE_CQ;
-		break;
-	case ROCE_ASYNC_EVENT_SQ_DRAINED:
-		event.event = IB_EVENT_SQ_DRAINED;
-		event_type = EVENT_TYPE_QP;
-		break;
-	case ROCE_ASYNC_EVENT_QP_CATASTROPHIC_ERR:
-		event.event = IB_EVENT_QP_FATAL;
-		event_type = EVENT_TYPE_QP;
-		break;
-	case ROCE_ASYNC_EVENT_LOCAL_INVALID_REQUEST_ERR:
-		event.event = IB_EVENT_QP_REQ_ERR;
-		event_type = EVENT_TYPE_QP;
-		break;
-	case ROCE_ASYNC_EVENT_LOCAL_ACCESS_ERR:
-		event.event = IB_EVENT_QP_ACCESS_ERR;
-		event_type = EVENT_TYPE_QP;
-		break;
-	default:
+	if (IS_ROCE(dev)) {
+		switch (e_code) {
+		case ROCE_ASYNC_EVENT_CQ_OVERFLOW_ERR:
+			event.event = IB_EVENT_CQ_ERR;
+			event_type = EVENT_TYPE_CQ;
+			break;
+		case ROCE_ASYNC_EVENT_SQ_DRAINED:
+			event.event = IB_EVENT_SQ_DRAINED;
+			event_type = EVENT_TYPE_QP;
+			break;
+		case ROCE_ASYNC_EVENT_QP_CATASTROPHIC_ERR:
+			event.event = IB_EVENT_QP_FATAL;
+			event_type = EVENT_TYPE_QP;
+			break;
+		case ROCE_ASYNC_EVENT_LOCAL_INVALID_REQUEST_ERR:
+			event.event = IB_EVENT_QP_REQ_ERR;
+			event_type = EVENT_TYPE_QP;
+			break;
+		case ROCE_ASYNC_EVENT_LOCAL_ACCESS_ERR:
+			event.event = IB_EVENT_QP_ACCESS_ERR;
+			event_type = EVENT_TYPE_QP;
+			break;
+		case ROCE_ASYNC_EVENT_SRQ_LIMIT:
+			event.event = IB_EVENT_SRQ_LIMIT_REACHED;
+			event_type = EVENT_TYPE_SRQ;
+			break;
+		case ROCE_ASYNC_EVENT_SRQ_EMPTY:
+			event.event = IB_EVENT_SRQ_ERR;
+			event_type = EVENT_TYPE_SRQ;
+			break;
+		default:
+			DP_ERR(dev, "unsupported event %d on handle=%llx\n",
+			       e_code, roce_handle64);
+		}
+	} else {
+		switch (e_code) {
+		case QED_IWARP_EVENT_SRQ_LIMIT:
+			event.event = IB_EVENT_SRQ_LIMIT_REACHED;
+			event_type = EVENT_TYPE_SRQ;
+			break;
+		case QED_IWARP_EVENT_SRQ_EMPTY:
+			event.event = IB_EVENT_SRQ_ERR;
+			event_type = EVENT_TYPE_SRQ;
+			break;
+		default:
 		DP_ERR(dev, "unsupported event %d on handle=%llx\n", e_code,
 		       roce_handle64);
+		}
 	}
-
 	switch (event_type) {
 	case EVENT_TYPE_CQ:
 		cq = (struct qedr_cq *)(uintptr_t)roce_handle64;
@@ -722,6 +760,25 @@ static void qedr_affiliated_event(void *context, u8 e_code, void *fw_handle)
 		}
 		DP_ERR(dev, "QP event %d on handle %p\n", e_code, qp);
 		break;
+	case EVENT_TYPE_SRQ:
+		srq_id = (u16)roce_handle64;
+		spin_lock_irqsave(&dev->srqidr.idr_lock, flags);
+		srq = idr_find(&dev->srqidr.idr, srq_id);
+		if (srq) {
+			ibsrq = &srq->ibsrq;
+			if (ibsrq->event_handler) {
+				event.device = ibsrq->device;
+				event.element.srq = ibsrq;
+				ibsrq->event_handler(&event,
+						     ibsrq->srq_context);
+			}
+		} else {
+			DP_NOTICE(dev,
+				  "SRQ event with NULL pointer ibsrq. Handle=%llx\n",
+				  roce_handle64);
+		}
+		spin_unlock_irqrestore(&dev->srqidr.idr_lock, flags);
+		DP_NOTICE(dev, "SRQ event %d on handle %p\n", e_code, srq);
 	default:
 		break;
 	}
diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h
index 86d4511..a2d708d 100644
--- a/drivers/infiniband/hw/qedr/qedr.h
+++ b/drivers/infiniband/hw/qedr/qedr.h
@@ -58,6 +58,7 @@
 #define QEDR_MSG_RQ   "  RQ"
 #define QEDR_MSG_SQ   "  SQ"
 #define QEDR_MSG_QP   "  QP"
+#define QEDR_MSG_SRQ  " SRQ"
 #define QEDR_MSG_GSI  " GSI"
 #define QEDR_MSG_IWARP  " IW"
 
@@ -122,6 +123,11 @@ struct qedr_device_attr {
 
 #define QEDR_ENET_STATE_BIT	(0)
 
+struct qedr_idr {
+	spinlock_t idr_lock; /* Protect idr data-structure */
+	struct idr idr;
+};
+
 struct qedr_dev {
 	struct ib_device	ibdev;
 	struct qed_dev		*cdev;
@@ -165,8 +171,8 @@ struct qedr_dev {
 	struct qedr_cq		*gsi_rqcq;
 	struct qedr_qp		*gsi_qp;
 	enum qed_rdma_type	rdma_type;
-	spinlock_t		idr_lock; /* Protect qpidr data-structure */
-	struct idr		qpidr;
+	struct qedr_idr		qpidr;
+	struct qedr_idr		srqidr;
 	struct workqueue_struct *iwarp_wq;
 	u16			iwarp_max_mtu;
 
@@ -337,6 +343,34 @@ struct qedr_qp_hwq_info {
 				qed_chain_get_capacity(p_info->pbl)	\
 	} while (0)
 
+struct qedr_srq_hwq_info {
+	u32 max_sges;
+	u32 max_wr;
+	struct qed_chain pbl;
+	u64 p_phys_addr_tbl;
+	u32 wqe_prod;
+	u32 sge_prod;
+	u32 wr_prod_cnt;
+	u32 wr_cons_cnt;
+	u32 num_elems;
+
+	u32 *virt_prod_pair_addr;
+	dma_addr_t phy_prod_pair_addr;
+};
+
+struct qedr_srq {
+	struct ib_srq ibsrq;
+	struct qedr_dev *dev;
+
+	struct qedr_userq	usrq;
+	struct qedr_srq_hwq_info hw_srq;
+	struct ib_umem *prod_umem;
+	u16 srq_id;
+	u32 srq_limit;
+	/* lock to protect srq recv post */
+	spinlock_t lock;
+};
+
 enum qedr_qp_err_bitmap {
 	QEDR_QP_ERR_SQ_FULL = 1,
 	QEDR_QP_ERR_RQ_FULL = 2,
@@ -538,4 +572,9 @@ static inline struct qedr_mr *get_qedr_mr(struct ib_mr *ibmr)
 {
 	return container_of(ibmr, struct qedr_mr, ibmr);
 }
+
+static inline struct qedr_srq *get_qedr_srq(struct ib_srq *ibsrq)
+{
+	return container_of(ibsrq, struct qedr_srq, ibsrq);
+}
 #endif
diff --git a/drivers/infiniband/hw/qedr/qedr_hsi_rdma.h b/drivers/infiniband/hw/qedr/qedr_hsi_rdma.h
index 7e1f702..228dd7d 100644
--- a/drivers/infiniband/hw/qedr/qedr_hsi_rdma.h
+++ b/drivers/infiniband/hw/qedr/qedr_hsi_rdma.h
@@ -161,12 +161,23 @@ struct rdma_rq_sge {
 #define RDMA_RQ_SGE_L_KEY_HI_SHIFT  29
 };
 
+struct rdma_srq_wqe_header {
+	struct regpair wr_id;
+	u8 num_sges /* number of SGEs in WQE */;
+	u8 reserved2[7];
+};
+
 struct rdma_srq_sge {
 	struct regpair addr;
 	__le32 length;
 	__le32 l_key;
 };
 
+union rdma_srq_elm {
+	struct rdma_srq_wqe_header header;
+	struct rdma_srq_sge sge;
+};
+
 /* Rdma doorbell data for flags update */
 struct rdma_pwm_flags_data {
 	__le16 icid; /* internal CID */
diff --git a/drivers/infiniband/hw/qedr/qedr_iw_cm.c b/drivers/infiniband/hw/qedr/qedr_iw_cm.c
index 26dc374..505fa36 100644
--- a/drivers/infiniband/hw/qedr/qedr_iw_cm.c
+++ b/drivers/infiniband/hw/qedr/qedr_iw_cm.c
@@ -491,7 +491,7 @@ int qedr_iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
 	int rc = 0;
 	int i;
 
-	qp = idr_find(&dev->qpidr, conn_param->qpn);
+	qp = idr_find(&dev->qpidr.idr, conn_param->qpn);
 
 	laddr = (struct sockaddr_in *)&cm_id->m_local_addr;
 	raddr = (struct sockaddr_in *)&cm_id->m_remote_addr;
@@ -679,7 +679,7 @@ int qedr_iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
 
 	DP_DEBUG(dev, QEDR_MSG_IWARP, "Accept on qpid=%d\n", conn_param->qpn);
 
-	qp = idr_find(&dev->qpidr, conn_param->qpn);
+	qp = idr_find(&dev->qpidr.idr, conn_param->qpn);
 	if (!qp) {
 		DP_ERR(dev, "Invalid QP number %d\n", conn_param->qpn);
 		return -EINVAL;
@@ -737,9 +737,9 @@ void qedr_iw_qp_rem_ref(struct ib_qp *ibqp)
 	struct qedr_qp *qp = get_qedr_qp(ibqp);
 
 	if (atomic_dec_and_test(&qp->refcnt)) {
-		spin_lock_irq(&qp->dev->idr_lock);
-		idr_remove(&qp->dev->qpidr, qp->qp_id);
-		spin_unlock_irq(&qp->dev->idr_lock);
+		spin_lock_irq(&qp->dev->qpidr.idr_lock);
+		idr_remove(&qp->dev->qpidr.idr, qp->qp_id);
+		spin_unlock_irq(&qp->dev->qpidr.idr_lock);
 		kfree(qp);
 	}
 }
@@ -748,5 +748,5 @@ struct ib_qp *qedr_iw_get_qp(struct ib_device *ibdev, int qpn)
 {
 	struct qedr_dev *dev = get_qedr_dev(ibdev);
 
-	return idr_find(&dev->qpidr, qpn);
+	return idr_find(&dev->qpidr.idr, qpn);
 }
diff --git a/drivers/infiniband/hw/qedr/qedr_roce_cm.c b/drivers/infiniband/hw/qedr/qedr_roce_cm.c
index 0f14e68..8557888 100644
--- a/drivers/infiniband/hw/qedr/qedr_roce_cm.c
+++ b/drivers/infiniband/hw/qedr/qedr_roce_cm.c
@@ -380,18 +380,17 @@ int qedr_destroy_gsi_qp(struct qedr_dev *dev)
 #define QEDR_GSI_QPN		(1)
 static inline int qedr_gsi_build_header(struct qedr_dev *dev,
 					struct qedr_qp *qp,
-					struct ib_send_wr *swr,
+					const struct ib_send_wr *swr,
 					struct ib_ud_header *udh,
 					int *roce_mode)
 {
 	bool has_vlan = false, has_grh_ipv6 = true;
 	struct rdma_ah_attr *ah_attr = &get_qedr_ah(ud_wr(swr)->ah)->attr;
 	const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr);
-	union ib_gid sgid;
+	const struct ib_gid_attr *sgid_attr = grh->sgid_attr;
 	int send_size = 0;
 	u16 vlan_id = 0;
 	u16 ether_type;
-	struct ib_gid_attr sgid_attr;
 	int rc;
 	int ip_ver = 0;
 
@@ -402,28 +401,16 @@ static inline int qedr_gsi_build_header(struct qedr_dev *dev,
 	for (i = 0; i < swr->num_sge; ++i)
 		send_size += swr->sg_list[i].length;
 
-	rc = ib_get_cached_gid(qp->ibqp.device, rdma_ah_get_port_num(ah_attr),
-			       grh->sgid_index, &sgid, &sgid_attr);
-	if (rc) {
-		DP_ERR(dev,
-		       "gsi post send: failed to get cached GID (port=%d, ix=%d)\n",
-		       rdma_ah_get_port_num(ah_attr),
-		       grh->sgid_index);
-		return rc;
-	}
-
-	vlan_id = rdma_vlan_dev_vlan_id(sgid_attr.ndev);
+	vlan_id = rdma_vlan_dev_vlan_id(sgid_attr->ndev);
 	if (vlan_id < VLAN_CFI_MASK)
 		has_vlan = true;
 
-	dev_put(sgid_attr.ndev);
-
-	has_udp = (sgid_attr.gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP);
+	has_udp = (sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP);
 	if (!has_udp) {
 		/* RoCE v1 */
 		ether_type = ETH_P_IBOE;
 		*roce_mode = ROCE_V1;
-	} else if (ipv6_addr_v4mapped((struct in6_addr *)&sgid)) {
+	} else if (ipv6_addr_v4mapped((struct in6_addr *)&sgid_attr->gid)) {
 		/* RoCE v2 IPv4 */
 		ip_ver = 4;
 		ether_type = ETH_P_IP;
@@ -471,7 +458,7 @@ static inline int qedr_gsi_build_header(struct qedr_dev *dev,
 		udh->grh.flow_label = grh->flow_label;
 		udh->grh.hop_limit = grh->hop_limit;
 		udh->grh.destination_gid = grh->dgid;
-		memcpy(&udh->grh.source_gid.raw, &sgid.raw,
+		memcpy(&udh->grh.source_gid.raw, sgid_attr->gid.raw,
 		       sizeof(udh->grh.source_gid.raw));
 	} else {
 		/* IPv4 header */
@@ -482,7 +469,7 @@ static inline int qedr_gsi_build_header(struct qedr_dev *dev,
 		udh->ip4.frag_off = htons(IP_DF);
 		udh->ip4.ttl = grh->hop_limit;
 
-		ipv4_addr = qedr_get_ipv4_from_gid(sgid.raw);
+		ipv4_addr = qedr_get_ipv4_from_gid(sgid_attr->gid.raw);
 		udh->ip4.saddr = ipv4_addr;
 		ipv4_addr = qedr_get_ipv4_from_gid(grh->dgid.raw);
 		udh->ip4.daddr = ipv4_addr;
@@ -501,7 +488,7 @@ static inline int qedr_gsi_build_header(struct qedr_dev *dev,
 
 static inline int qedr_gsi_build_packet(struct qedr_dev *dev,
 					struct qedr_qp *qp,
-					struct ib_send_wr *swr,
+					const struct ib_send_wr *swr,
 					struct qed_roce_ll2_packet **p_packet)
 {
 	u8 ud_header_buffer[QEDR_MAX_UD_HEADER_SIZE];
@@ -550,8 +537,8 @@ static inline int qedr_gsi_build_packet(struct qedr_dev *dev,
 	return 0;
 }
 
-int qedr_gsi_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
-		       struct ib_send_wr **bad_wr)
+int qedr_gsi_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+		       const struct ib_send_wr **bad_wr)
 {
 	struct qed_roce_ll2_packet *pkt = NULL;
 	struct qedr_qp *qp = get_qedr_qp(ibqp);
@@ -620,8 +607,8 @@ int qedr_gsi_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 	return rc;
 }
 
-int qedr_gsi_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
-		       struct ib_recv_wr **bad_wr)
+int qedr_gsi_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+		       const struct ib_recv_wr **bad_wr)
 {
 	struct qedr_dev *dev = get_qedr_dev(ibqp->device);
 	struct qedr_qp *qp = get_qedr_qp(ibqp);
diff --git a/drivers/infiniband/hw/qedr/qedr_roce_cm.h b/drivers/infiniband/hw/qedr/qedr_roce_cm.h
index a559163..d46dcd3 100644
--- a/drivers/infiniband/hw/qedr/qedr_roce_cm.h
+++ b/drivers/infiniband/hw/qedr/qedr_roce_cm.h
@@ -46,10 +46,10 @@ static inline u32 qedr_get_ipv4_from_gid(const u8 *gid)
 
 /* RDMA CM */
 int qedr_gsi_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
-int qedr_gsi_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
-		       struct ib_recv_wr **bad_wr);
-int qedr_gsi_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
-		       struct ib_send_wr **bad_wr);
+int qedr_gsi_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+		       const struct ib_recv_wr **bad_wr);
+int qedr_gsi_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+		       const struct ib_send_wr **bad_wr);
 struct ib_qp *qedr_create_gsi_qp(struct qedr_dev *dev,
 				 struct ib_qp_init_attr *attrs,
 				 struct qedr_qp *qp);
diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c
index f07b8df..8cc3df2 100644
--- a/drivers/infiniband/hw/qedr/verbs.c
+++ b/drivers/infiniband/hw/qedr/verbs.c
@@ -51,6 +51,10 @@
 #include <rdma/qedr-abi.h>
 #include "qedr_roce_cm.h"
 
+#define QEDR_SRQ_WQE_ELEM_SIZE	sizeof(union rdma_srq_elm)
+#define	RDMA_MAX_SGE_PER_SRQ	(4)
+#define RDMA_MAX_SRQ_WQE_SIZE	(RDMA_MAX_SGE_PER_SRQ + 1)
+
 #define DB_ADDR_SHIFT(addr)		((addr) << DB_PWM_ADDR_OFFSET_SHIFT)
 
 static inline int qedr_ib_copy_to_udata(struct ib_udata *udata, void *src,
@@ -84,6 +88,19 @@ int qedr_iw_query_gid(struct ib_device *ibdev, u8 port,
 	return 0;
 }
 
+int qedr_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr)
+{
+	struct qedr_dev *dev = get_qedr_dev(ibsrq->device);
+	struct qedr_device_attr *qattr = &dev->attr;
+	struct qedr_srq *srq = get_qedr_srq(ibsrq);
+
+	srq_attr->srq_limit = srq->srq_limit;
+	srq_attr->max_wr = qattr->max_srq_wr;
+	srq_attr->max_sge = qattr->max_sge;
+
+	return 0;
+}
+
 int qedr_query_device(struct ib_device *ibdev,
 		      struct ib_device_attr *attr, struct ib_udata *udata)
 {
@@ -112,7 +129,8 @@ int qedr_query_device(struct ib_device *ibdev,
 	    IB_DEVICE_RC_RNR_NAK_GEN |
 	    IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_MGT_EXTENSIONS;
 
-	attr->max_sge = qattr->max_sge;
+	attr->max_send_sge = qattr->max_sge;
+	attr->max_recv_sge = qattr->max_sge;
 	attr->max_sge_rd = qattr->max_sge;
 	attr->max_cq = qattr->max_cq;
 	attr->max_cqe = qattr->max_cqe;
@@ -224,7 +242,7 @@ int qedr_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *attr)
 	attr->lmc = 0;
 	attr->sm_lid = 0;
 	attr->sm_sl = 0;
-	attr->port_cap_flags = IB_PORT_IP_BASED_GIDS;
+	attr->ip_gids = true;
 	if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
 		attr->gid_tbl_len = 1;
 		attr->pkey_tbl_len = 1;
@@ -1075,27 +1093,19 @@ static inline int get_gid_info_from_table(struct ib_qp *ibqp,
 					  struct qed_rdma_modify_qp_in_params
 					  *qp_params)
 {
+	const struct ib_gid_attr *gid_attr;
 	enum rdma_network_type nw_type;
-	struct ib_gid_attr gid_attr;
 	const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
-	union ib_gid gid;
 	u32 ipv4_addr;
-	int rc = 0;
 	int i;
 
-	rc = ib_get_cached_gid(ibqp->device,
-			       rdma_ah_get_port_num(&attr->ah_attr),
-			       grh->sgid_index, &gid, &gid_attr);
-	if (rc)
-		return rc;
+	gid_attr = grh->sgid_attr;
+	qp_params->vlan_id = rdma_vlan_dev_vlan_id(gid_attr->ndev);
 
-	qp_params->vlan_id = rdma_vlan_dev_vlan_id(gid_attr.ndev);
-
-	dev_put(gid_attr.ndev);
-	nw_type = ib_gid_to_network_type(gid_attr.gid_type, &gid);
+	nw_type = rdma_gid_attr_network_type(gid_attr);
 	switch (nw_type) {
 	case RDMA_NETWORK_IPV6:
-		memcpy(&qp_params->sgid.bytes[0], &gid.raw[0],
+		memcpy(&qp_params->sgid.bytes[0], &gid_attr->gid.raw[0],
 		       sizeof(qp_params->sgid));
 		memcpy(&qp_params->dgid.bytes[0],
 		       &grh->dgid,
@@ -1105,7 +1115,7 @@ static inline int get_gid_info_from_table(struct ib_qp *ibqp,
 			  QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1);
 		break;
 	case RDMA_NETWORK_IB:
-		memcpy(&qp_params->sgid.bytes[0], &gid.raw[0],
+		memcpy(&qp_params->sgid.bytes[0], &gid_attr->gid.raw[0],
 		       sizeof(qp_params->sgid));
 		memcpy(&qp_params->dgid.bytes[0],
 		       &grh->dgid,
@@ -1115,7 +1125,7 @@ static inline int get_gid_info_from_table(struct ib_qp *ibqp,
 	case RDMA_NETWORK_IPV4:
 		memset(&qp_params->sgid, 0, sizeof(qp_params->sgid));
 		memset(&qp_params->dgid, 0, sizeof(qp_params->dgid));
-		ipv4_addr = qedr_get_ipv4_from_gid(gid.raw);
+		ipv4_addr = qedr_get_ipv4_from_gid(gid_attr->gid.raw);
 		qp_params->sgid.ipv4_addr = ipv4_addr;
 		ipv4_addr =
 		    qedr_get_ipv4_from_gid(grh->dgid.raw);
@@ -1189,6 +1199,21 @@ static int qedr_check_qp_attrs(struct ib_pd *ibpd, struct qedr_dev *dev,
 	return 0;
 }
 
+static int qedr_copy_srq_uresp(struct qedr_dev *dev,
+			       struct qedr_srq *srq, struct ib_udata *udata)
+{
+	struct qedr_create_srq_uresp uresp = {};
+	int rc;
+
+	uresp.srq_id = srq->srq_id;
+
+	rc = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
+	if (rc)
+		DP_ERR(dev, "create srq: problem copying data to user space\n");
+
+	return rc;
+}
+
 static void qedr_copy_rq_uresp(struct qedr_dev *dev,
 			       struct qedr_create_qp_uresp *uresp,
 			       struct qedr_qp *qp)
@@ -1255,13 +1280,18 @@ static void qedr_set_common_qp_params(struct qedr_dev *dev,
 	qp->state = QED_ROCE_QP_STATE_RESET;
 	qp->signaled = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR) ? true : false;
 	qp->sq_cq = get_qedr_cq(attrs->send_cq);
-	qp->rq_cq = get_qedr_cq(attrs->recv_cq);
 	qp->dev = dev;
-	qp->rq.max_sges = attrs->cap.max_recv_sge;
 
-	DP_DEBUG(dev, QEDR_MSG_QP,
-		 "RQ params:\trq_max_sges = %d, rq_cq_id = %d\n",
-		 qp->rq.max_sges, qp->rq_cq->icid);
+	if (attrs->srq) {
+		qp->srq = get_qedr_srq(attrs->srq);
+	} else {
+		qp->rq_cq = get_qedr_cq(attrs->recv_cq);
+		qp->rq.max_sges = attrs->cap.max_recv_sge;
+		DP_DEBUG(dev, QEDR_MSG_QP,
+			 "RQ params:\trq_max_sges = %d, rq_cq_id = %d\n",
+			 qp->rq.max_sges, qp->rq_cq->icid);
+	}
+
 	DP_DEBUG(dev, QEDR_MSG_QP,
 		 "QP params:\tpd = %d, qp_type = %d, max_inline_data = %d, state = %d, signaled = %d, use_srq=%d\n",
 		 pd->pd_id, qp->qp_type, qp->max_inline_data,
@@ -1276,9 +1306,303 @@ static void qedr_set_roce_db_info(struct qedr_dev *dev, struct qedr_qp *qp)
 	qp->sq.db = dev->db_addr +
 		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
 	qp->sq.db_data.data.icid = qp->icid + 1;
-	qp->rq.db = dev->db_addr +
-		    DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD);
-	qp->rq.db_data.data.icid = qp->icid;
+	if (!qp->srq) {
+		qp->rq.db = dev->db_addr +
+			    DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD);
+		qp->rq.db_data.data.icid = qp->icid;
+	}
+}
+
+static int qedr_check_srq_params(struct ib_pd *ibpd, struct qedr_dev *dev,
+				 struct ib_srq_init_attr *attrs,
+				 struct ib_udata *udata)
+{
+	struct qedr_device_attr *qattr = &dev->attr;
+
+	if (attrs->attr.max_wr > qattr->max_srq_wr) {
+		DP_ERR(dev,
+		       "create srq: unsupported srq_wr=0x%x requested (max_srq_wr=0x%x)\n",
+		       attrs->attr.max_wr, qattr->max_srq_wr);
+		return -EINVAL;
+	}
+
+	if (attrs->attr.max_sge > qattr->max_sge) {
+		DP_ERR(dev,
+		       "create srq: unsupported sge=0x%x requested (max_srq_sge=0x%x)\n",
+		       attrs->attr.max_sge, qattr->max_sge);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void qedr_free_srq_user_params(struct qedr_srq *srq)
+{
+	qedr_free_pbl(srq->dev, &srq->usrq.pbl_info, srq->usrq.pbl_tbl);
+	ib_umem_release(srq->usrq.umem);
+	ib_umem_release(srq->prod_umem);
+}
+
+static void qedr_free_srq_kernel_params(struct qedr_srq *srq)
+{
+	struct qedr_srq_hwq_info *hw_srq = &srq->hw_srq;
+	struct qedr_dev *dev = srq->dev;
+
+	dev->ops->common->chain_free(dev->cdev, &hw_srq->pbl);
+
+	dma_free_coherent(&dev->pdev->dev, sizeof(struct rdma_srq_producers),
+			  hw_srq->virt_prod_pair_addr,
+			  hw_srq->phy_prod_pair_addr);
+}
+
+static int qedr_init_srq_user_params(struct ib_ucontext *ib_ctx,
+				     struct qedr_srq *srq,
+				     struct qedr_create_srq_ureq *ureq,
+				     int access, int dmasync)
+{
+	struct scatterlist *sg;
+	int rc;
+
+	rc = qedr_init_user_queue(ib_ctx, srq->dev, &srq->usrq, ureq->srq_addr,
+				  ureq->srq_len, access, dmasync, 1);
+	if (rc)
+		return rc;
+
+	srq->prod_umem = ib_umem_get(ib_ctx, ureq->prod_pair_addr,
+				     sizeof(struct rdma_srq_producers),
+				     access, dmasync);
+	if (IS_ERR(srq->prod_umem)) {
+		qedr_free_pbl(srq->dev, &srq->usrq.pbl_info, srq->usrq.pbl_tbl);
+		ib_umem_release(srq->usrq.umem);
+		DP_ERR(srq->dev,
+		       "create srq: failed ib_umem_get for producer, got %ld\n",
+		       PTR_ERR(srq->prod_umem));
+		return PTR_ERR(srq->prod_umem);
+	}
+
+	sg = srq->prod_umem->sg_head.sgl;
+	srq->hw_srq.phy_prod_pair_addr = sg_dma_address(sg);
+
+	return 0;
+}
+
+static int qedr_alloc_srq_kernel_params(struct qedr_srq *srq,
+					struct qedr_dev *dev,
+					struct ib_srq_init_attr *init_attr)
+{
+	struct qedr_srq_hwq_info *hw_srq = &srq->hw_srq;
+	dma_addr_t phy_prod_pair_addr;
+	u32 num_elems;
+	void *va;
+	int rc;
+
+	va = dma_alloc_coherent(&dev->pdev->dev,
+				sizeof(struct rdma_srq_producers),
+				&phy_prod_pair_addr, GFP_KERNEL);
+	if (!va) {
+		DP_ERR(dev,
+		       "create srq: failed to allocate dma memory for producer\n");
+		return -ENOMEM;
+	}
+
+	hw_srq->phy_prod_pair_addr = phy_prod_pair_addr;
+	hw_srq->virt_prod_pair_addr = va;
+
+	num_elems = init_attr->attr.max_wr * RDMA_MAX_SRQ_WQE_SIZE;
+	rc = dev->ops->common->chain_alloc(dev->cdev,
+					   QED_CHAIN_USE_TO_CONSUME_PRODUCE,
+					   QED_CHAIN_MODE_PBL,
+					   QED_CHAIN_CNT_TYPE_U32,
+					   num_elems,
+					   QEDR_SRQ_WQE_ELEM_SIZE,
+					   &hw_srq->pbl, NULL);
+	if (rc)
+		goto err0;
+
+	hw_srq->num_elems = num_elems;
+
+	return 0;
+
+err0:
+	dma_free_coherent(&dev->pdev->dev, sizeof(struct rdma_srq_producers),
+			  va, phy_prod_pair_addr);
+	return rc;
+}
+
+static int qedr_idr_add(struct qedr_dev *dev, struct qedr_idr *qidr,
+			void *ptr, u32 id);
+static void qedr_idr_remove(struct qedr_dev *dev,
+			    struct qedr_idr *qidr, u32 id);
+
+struct ib_srq *qedr_create_srq(struct ib_pd *ibpd,
+			       struct ib_srq_init_attr *init_attr,
+			       struct ib_udata *udata)
+{
+	struct qed_rdma_destroy_srq_in_params destroy_in_params;
+	struct qed_rdma_create_srq_in_params in_params = {};
+	struct qedr_dev *dev = get_qedr_dev(ibpd->device);
+	struct qed_rdma_create_srq_out_params out_params;
+	struct qedr_pd *pd = get_qedr_pd(ibpd);
+	struct qedr_create_srq_ureq ureq = {};
+	u64 pbl_base_addr, phy_prod_pair_addr;
+	struct ib_ucontext *ib_ctx = NULL;
+	struct qedr_srq_hwq_info *hw_srq;
+	struct qedr_ucontext *ctx = NULL;
+	u32 page_cnt, page_size;
+	struct qedr_srq *srq;
+	int rc = 0;
+
+	DP_DEBUG(dev, QEDR_MSG_QP,
+		 "create SRQ called from %s (pd %p)\n",
+		 (udata) ? "User lib" : "kernel", pd);
+
+	rc = qedr_check_srq_params(ibpd, dev, init_attr, udata);
+	if (rc)
+		return ERR_PTR(-EINVAL);
+
+	srq = kzalloc(sizeof(*srq), GFP_KERNEL);
+	if (!srq)
+		return ERR_PTR(-ENOMEM);
+
+	srq->dev = dev;
+	hw_srq = &srq->hw_srq;
+	spin_lock_init(&srq->lock);
+
+	hw_srq->max_wr = init_attr->attr.max_wr;
+	hw_srq->max_sges = init_attr->attr.max_sge;
+
+	if (udata && ibpd->uobject && ibpd->uobject->context) {
+		ib_ctx = ibpd->uobject->context;
+		ctx = get_qedr_ucontext(ib_ctx);
+
+		if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) {
+			DP_ERR(dev,
+			       "create srq: problem copying data from user space\n");
+			goto err0;
+		}
+
+		rc = qedr_init_srq_user_params(ib_ctx, srq, &ureq, 0, 0);
+		if (rc)
+			goto err0;
+
+		page_cnt = srq->usrq.pbl_info.num_pbes;
+		pbl_base_addr = srq->usrq.pbl_tbl->pa;
+		phy_prod_pair_addr = hw_srq->phy_prod_pair_addr;
+		page_size = BIT(srq->usrq.umem->page_shift);
+	} else {
+		struct qed_chain *pbl;
+
+		rc = qedr_alloc_srq_kernel_params(srq, dev, init_attr);
+		if (rc)
+			goto err0;
+
+		pbl = &hw_srq->pbl;
+		page_cnt = qed_chain_get_page_cnt(pbl);
+		pbl_base_addr = qed_chain_get_pbl_phys(pbl);
+		phy_prod_pair_addr = hw_srq->phy_prod_pair_addr;
+		page_size = QED_CHAIN_PAGE_SIZE;
+	}
+
+	in_params.pd_id = pd->pd_id;
+	in_params.pbl_base_addr = pbl_base_addr;
+	in_params.prod_pair_addr = phy_prod_pair_addr;
+	in_params.num_pages = page_cnt;
+	in_params.page_size = page_size;
+
+	rc = dev->ops->rdma_create_srq(dev->rdma_ctx, &in_params, &out_params);
+	if (rc)
+		goto err1;
+
+	srq->srq_id = out_params.srq_id;
+
+	if (udata) {
+		rc = qedr_copy_srq_uresp(dev, srq, udata);
+		if (rc)
+			goto err2;
+	}
+
+	rc = qedr_idr_add(dev, &dev->srqidr, srq, srq->srq_id);
+	if (rc)
+		goto err2;
+
+	DP_DEBUG(dev, QEDR_MSG_SRQ,
+		 "create srq: created srq with srq_id=0x%0x\n", srq->srq_id);
+	return &srq->ibsrq;
+
+err2:
+	destroy_in_params.srq_id = srq->srq_id;
+
+	dev->ops->rdma_destroy_srq(dev->rdma_ctx, &destroy_in_params);
+err1:
+	if (udata)
+		qedr_free_srq_user_params(srq);
+	else
+		qedr_free_srq_kernel_params(srq);
+err0:
+	kfree(srq);
+
+	return ERR_PTR(-EFAULT);
+}
+
+int qedr_destroy_srq(struct ib_srq *ibsrq)
+{
+	struct qed_rdma_destroy_srq_in_params in_params = {};
+	struct qedr_dev *dev = get_qedr_dev(ibsrq->device);
+	struct qedr_srq *srq = get_qedr_srq(ibsrq);
+
+	qedr_idr_remove(dev, &dev->srqidr, srq->srq_id);
+	in_params.srq_id = srq->srq_id;
+	dev->ops->rdma_destroy_srq(dev->rdma_ctx, &in_params);
+
+	if (ibsrq->pd->uobject)
+		qedr_free_srq_user_params(srq);
+	else
+		qedr_free_srq_kernel_params(srq);
+
+	DP_DEBUG(dev, QEDR_MSG_SRQ,
+		 "destroy srq: destroyed srq with srq_id=0x%0x\n",
+		 srq->srq_id);
+	kfree(srq);
+
+	return 0;
+}
+
+int qedr_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
+		    enum ib_srq_attr_mask attr_mask, struct ib_udata *udata)
+{
+	struct qed_rdma_modify_srq_in_params in_params = {};
+	struct qedr_dev *dev = get_qedr_dev(ibsrq->device);
+	struct qedr_srq *srq = get_qedr_srq(ibsrq);
+	int rc;
+
+	if (attr_mask & IB_SRQ_MAX_WR) {
+		DP_ERR(dev,
+		       "modify srq: invalid attribute mask=0x%x specified for %p\n",
+		       attr_mask, srq);
+		return -EINVAL;
+	}
+
+	if (attr_mask & IB_SRQ_LIMIT) {
+		if (attr->srq_limit >= srq->hw_srq.max_wr) {
+			DP_ERR(dev,
+			       "modify srq: invalid srq_limit=0x%x (max_srq_limit=0x%x)\n",
+			       attr->srq_limit, srq->hw_srq.max_wr);
+			return -EINVAL;
+		}
+
+		in_params.srq_id = srq->srq_id;
+		in_params.wqe_limit = attr->srq_limit;
+		rc = dev->ops->rdma_modify_srq(dev->rdma_ctx, &in_params);
+		if (rc)
+			return rc;
+	}
+
+	srq->srq_limit = attr->srq_limit;
+
+	DP_DEBUG(dev, QEDR_MSG_SRQ,
+		 "modify srq: modified srq with srq_id=0x%0x\n", srq->srq_id);
+
+	return 0;
 }
 
 static inline void
@@ -1299,9 +1623,17 @@ qedr_init_common_qp_in_params(struct qedr_dev *dev,
 	params->dpi = pd->uctx ? pd->uctx->dpi : dev->dpi;
 	params->sq_cq_id = get_qedr_cq(attrs->send_cq)->icid;
 	params->stats_queue = 0;
-	params->rq_cq_id = get_qedr_cq(attrs->recv_cq)->icid;
 	params->srq_id = 0;
 	params->use_srq = false;
+
+	if (!qp->srq) {
+		params->rq_cq_id = get_qedr_cq(attrs->recv_cq)->icid;
+
+	} else {
+		params->rq_cq_id = get_qedr_cq(attrs->recv_cq)->icid;
+		params->srq_id = qp->srq->srq_id;
+		params->use_srq = true;
+	}
 }
 
 static inline void qedr_qp_user_print(struct qedr_dev *dev, struct qedr_qp *qp)
@@ -1318,32 +1650,27 @@ static inline void qedr_qp_user_print(struct qedr_dev *dev, struct qedr_qp *qp)
 		 qp->usq.buf_len, qp->urq.buf_addr, qp->urq.buf_len);
 }
 
-static int qedr_idr_add(struct qedr_dev *dev, void *ptr, u32 id)
+static int qedr_idr_add(struct qedr_dev *dev, struct qedr_idr *qidr,
+			void *ptr, u32 id)
 {
 	int rc;
 
-	if (!rdma_protocol_iwarp(&dev->ibdev, 1))
-		return 0;
-
 	idr_preload(GFP_KERNEL);
-	spin_lock_irq(&dev->idr_lock);
+	spin_lock_irq(&qidr->idr_lock);
 
-	rc = idr_alloc(&dev->qpidr, ptr, id, id + 1, GFP_ATOMIC);
+	rc = idr_alloc(&qidr->idr, ptr, id, id + 1, GFP_ATOMIC);
 
-	spin_unlock_irq(&dev->idr_lock);
+	spin_unlock_irq(&qidr->idr_lock);
 	idr_preload_end();
 
 	return rc < 0 ? rc : 0;
 }
 
-static void qedr_idr_remove(struct qedr_dev *dev, u32 id)
+static void qedr_idr_remove(struct qedr_dev *dev, struct qedr_idr *qidr, u32 id)
 {
-	if (!rdma_protocol_iwarp(&dev->ibdev, 1))
-		return;
-
-	spin_lock_irq(&dev->idr_lock);
-	idr_remove(&dev->qpidr, id);
-	spin_unlock_irq(&dev->idr_lock);
+	spin_lock_irq(&qidr->idr_lock);
+	idr_remove(&qidr->idr, id);
+	spin_unlock_irq(&qidr->idr_lock);
 }
 
 static inline void
@@ -1356,9 +1683,10 @@ qedr_iwarp_populate_user_qp(struct qedr_dev *dev,
 
 	qedr_populate_pbls(dev, qp->usq.umem, qp->usq.pbl_tbl,
 			   &qp->usq.pbl_info, FW_PAGE_SHIFT);
-
-	qp->urq.pbl_tbl->va = out_params->rq_pbl_virt;
-	qp->urq.pbl_tbl->pa = out_params->rq_pbl_phys;
+	if (!qp->srq) {
+		qp->urq.pbl_tbl->va = out_params->rq_pbl_virt;
+		qp->urq.pbl_tbl->pa = out_params->rq_pbl_phys;
+	}
 
 	qedr_populate_pbls(dev, qp->urq.umem, qp->urq.pbl_tbl,
 			   &qp->urq.pbl_info, FW_PAGE_SHIFT);
@@ -1404,11 +1732,13 @@ static int qedr_create_user_qp(struct qedr_dev *dev,
 	if (rc)
 		return rc;
 
-	/* RQ - read access only (0), dma sync not required (0) */
-	rc = qedr_init_user_queue(ib_ctx, dev, &qp->urq, ureq.rq_addr,
-				  ureq.rq_len, 0, 0, alloc_and_init);
-	if (rc)
-		return rc;
+	if (!qp->srq) {
+		/* RQ - read access only (0), dma sync not required (0) */
+		rc = qedr_init_user_queue(ib_ctx, dev, &qp->urq, ureq.rq_addr,
+					  ureq.rq_len, 0, 0, alloc_and_init);
+		if (rc)
+			return rc;
+	}
 
 	memset(&in_params, 0, sizeof(in_params));
 	qedr_init_common_qp_in_params(dev, pd, qp, attrs, false, &in_params);
@@ -1416,8 +1746,10 @@ static int qedr_create_user_qp(struct qedr_dev *dev,
 	in_params.qp_handle_hi = ureq.qp_handle_hi;
 	in_params.sq_num_pages = qp->usq.pbl_info.num_pbes;
 	in_params.sq_pbl_ptr = qp->usq.pbl_tbl->pa;
-	in_params.rq_num_pages = qp->urq.pbl_info.num_pbes;
-	in_params.rq_pbl_ptr = qp->urq.pbl_tbl->pa;
+	if (!qp->srq) {
+		in_params.rq_num_pages = qp->urq.pbl_info.num_pbes;
+		in_params.rq_pbl_ptr = qp->urq.pbl_tbl->pa;
+	}
 
 	qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
 					      &in_params, &out_params);
@@ -1679,16 +2011,13 @@ struct ib_qp *qedr_create_qp(struct ib_pd *ibpd,
 	if (rc)
 		return ERR_PTR(rc);
 
-	if (attrs->srq)
-		return ERR_PTR(-EINVAL);
-
 	DP_DEBUG(dev, QEDR_MSG_QP,
 		 "create qp: called from %s, event_handler=%p, eepd=%p sq_cq=%p, sq_icid=%d, rq_cq=%p, rq_icid=%d\n",
 		 udata ? "user library" : "kernel", attrs->event_handler, pd,
 		 get_qedr_cq(attrs->send_cq),
 		 get_qedr_cq(attrs->send_cq)->icid,
 		 get_qedr_cq(attrs->recv_cq),
-		 get_qedr_cq(attrs->recv_cq)->icid);
+		 attrs->recv_cq ? get_qedr_cq(attrs->recv_cq)->icid : 0);
 
 	qp = kzalloc(sizeof(*qp), GFP_KERNEL);
 	if (!qp) {
@@ -1715,9 +2044,11 @@ struct ib_qp *qedr_create_qp(struct ib_pd *ibpd,
 
 	qp->ibqp.qp_num = qp->qp_id;
 
-	rc = qedr_idr_add(dev, qp, qp->qp_id);
-	if (rc)
-		goto err;
+	if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
+		rc = qedr_idr_add(dev, &dev->qpidr, qp, qp->qp_id);
+		if (rc)
+			goto err;
+	}
 
 	return &qp->ibqp;
 
@@ -2289,8 +2620,9 @@ int qedr_destroy_qp(struct ib_qp *ibqp)
 
 	qedr_free_qp_resources(dev, qp);
 
-	if (atomic_dec_and_test(&qp->refcnt)) {
-		qedr_idr_remove(dev, qp->qp_id);
+	if (atomic_dec_and_test(&qp->refcnt) &&
+	    rdma_protocol_iwarp(&dev->ibdev, 1)) {
+		qedr_idr_remove(dev, &dev->qpidr, qp->qp_id);
 		kfree(qp);
 	}
 	return rc;
@@ -2305,7 +2637,7 @@ struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr,
 	if (!ah)
 		return ERR_PTR(-ENOMEM);
 
-	ah->attr = *attr;
+	rdma_copy_ah_attr(&ah->attr, attr);
 
 	return &ah->ibah;
 }
@@ -2314,6 +2646,7 @@ int qedr_destroy_ah(struct ib_ah *ibah)
 {
 	struct qedr_ah *ah = get_qedr_ah(ibah);
 
+	rdma_destroy_ah_attr(&ah->attr);
 	kfree(ah);
 	return 0;
 }
@@ -2705,9 +3038,9 @@ static void swap_wqe_data64(u64 *p)
 
 static u32 qedr_prepare_sq_inline_data(struct qedr_dev *dev,
 				       struct qedr_qp *qp, u8 *wqe_size,
-				       struct ib_send_wr *wr,
-				       struct ib_send_wr **bad_wr, u8 *bits,
-				       u8 bit)
+				       const struct ib_send_wr *wr,
+				       const struct ib_send_wr **bad_wr,
+				       u8 *bits, u8 bit)
 {
 	u32 data_size = sge_data_len(wr->sg_list, wr->num_sge);
 	char *seg_prt, *wqe;
@@ -2790,7 +3123,7 @@ static u32 qedr_prepare_sq_inline_data(struct qedr_dev *dev,
 	} while (0)
 
 static u32 qedr_prepare_sq_sges(struct qedr_qp *qp, u8 *wqe_size,
-				struct ib_send_wr *wr)
+				const struct ib_send_wr *wr)
 {
 	u32 data_size = 0;
 	int i;
@@ -2814,8 +3147,8 @@ static u32 qedr_prepare_sq_rdma_data(struct qedr_dev *dev,
 				     struct qedr_qp *qp,
 				     struct rdma_sq_rdma_wqe_1st *rwqe,
 				     struct rdma_sq_rdma_wqe_2nd *rwqe2,
-				     struct ib_send_wr *wr,
-				     struct ib_send_wr **bad_wr)
+				     const struct ib_send_wr *wr,
+				     const struct ib_send_wr **bad_wr)
 {
 	rwqe2->r_key = cpu_to_le32(rdma_wr(wr)->rkey);
 	DMA_REGPAIR_LE(rwqe2->remote_va, rdma_wr(wr)->remote_addr);
@@ -2837,8 +3170,8 @@ static u32 qedr_prepare_sq_send_data(struct qedr_dev *dev,
 				     struct qedr_qp *qp,
 				     struct rdma_sq_send_wqe_1st *swqe,
 				     struct rdma_sq_send_wqe_2st *swqe2,
-				     struct ib_send_wr *wr,
-				     struct ib_send_wr **bad_wr)
+				     const struct ib_send_wr *wr,
+				     const struct ib_send_wr **bad_wr)
 {
 	memset(swqe2, 0, sizeof(*swqe2));
 	if (wr->send_flags & IB_SEND_INLINE) {
@@ -2854,7 +3187,7 @@ static u32 qedr_prepare_sq_send_data(struct qedr_dev *dev,
 
 static int qedr_prepare_reg(struct qedr_qp *qp,
 			    struct rdma_sq_fmr_wqe_1st *fwqe1,
-			    struct ib_reg_wr *wr)
+			    const struct ib_reg_wr *wr)
 {
 	struct qedr_mr *mr = get_qedr_mr(wr->mr);
 	struct rdma_sq_fmr_wqe_2nd *fwqe2;
@@ -2916,7 +3249,8 @@ static enum ib_wc_opcode qedr_ib_to_wc_opcode(enum ib_wr_opcode opcode)
 	}
 }
 
-static inline bool qedr_can_post_send(struct qedr_qp *qp, struct ib_send_wr *wr)
+static inline bool qedr_can_post_send(struct qedr_qp *qp,
+				      const struct ib_send_wr *wr)
 {
 	int wq_is_full, err_wr, pbl_is_full;
 	struct qedr_dev *dev = qp->dev;
@@ -2953,8 +3287,8 @@ static inline bool qedr_can_post_send(struct qedr_qp *qp, struct ib_send_wr *wr)
 	return true;
 }
 
-static int __qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
-		     struct ib_send_wr **bad_wr)
+static int __qedr_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+			    const struct ib_send_wr **bad_wr)
 {
 	struct qedr_dev *dev = get_qedr_dev(ibqp->device);
 	struct qedr_qp *qp = get_qedr_qp(ibqp);
@@ -3168,8 +3502,8 @@ static int __qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 	return rc;
 }
 
-int qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
-		   struct ib_send_wr **bad_wr)
+int qedr_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+		   const struct ib_send_wr **bad_wr)
 {
 	struct qedr_dev *dev = get_qedr_dev(ibqp->device);
 	struct qedr_qp *qp = get_qedr_qp(ibqp);
@@ -3234,8 +3568,104 @@ int qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 	return rc;
 }
 
-int qedr_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
-		   struct ib_recv_wr **bad_wr)
+static u32 qedr_srq_elem_left(struct qedr_srq_hwq_info *hw_srq)
+{
+	u32 used;
+
+	/* Calculate number of elements used based on producer
+	 * count and consumer count and subtract it from max
+	 * work request supported so that we get elements left.
+	 */
+	used = hw_srq->wr_prod_cnt - hw_srq->wr_cons_cnt;
+
+	return hw_srq->max_wr - used;
+}
+
+int qedr_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
+		       const struct ib_recv_wr **bad_wr)
+{
+	struct qedr_srq *srq = get_qedr_srq(ibsrq);
+	struct qedr_srq_hwq_info *hw_srq;
+	struct qedr_dev *dev = srq->dev;
+	struct qed_chain *pbl;
+	unsigned long flags;
+	int status = 0;
+	u32 num_sge;
+	u32 offset;
+
+	spin_lock_irqsave(&srq->lock, flags);
+
+	hw_srq = &srq->hw_srq;
+	pbl = &srq->hw_srq.pbl;
+	while (wr) {
+		struct rdma_srq_wqe_header *hdr;
+		int i;
+
+		if (!qedr_srq_elem_left(hw_srq) ||
+		    wr->num_sge > srq->hw_srq.max_sges) {
+			DP_ERR(dev, "Can't post WR  (%d,%d) || (%d > %d)\n",
+			       hw_srq->wr_prod_cnt, hw_srq->wr_cons_cnt,
+			       wr->num_sge, srq->hw_srq.max_sges);
+			status = -ENOMEM;
+			*bad_wr = wr;
+			break;
+		}
+
+		hdr = qed_chain_produce(pbl);
+		num_sge = wr->num_sge;
+		/* Set number of sge and work request id in header */
+		SRQ_HDR_SET(hdr, wr->wr_id, num_sge);
+
+		srq->hw_srq.wr_prod_cnt++;
+		hw_srq->wqe_prod++;
+		hw_srq->sge_prod++;
+
+		DP_DEBUG(dev, QEDR_MSG_SRQ,
+			 "SRQ WR: SGEs: %d with wr_id[%d] = %llx\n",
+			 wr->num_sge, hw_srq->wqe_prod, wr->wr_id);
+
+		for (i = 0; i < wr->num_sge; i++) {
+			struct rdma_srq_sge *srq_sge = qed_chain_produce(pbl);
+
+			/* Set SGE length, lkey and address */
+			SRQ_SGE_SET(srq_sge, wr->sg_list[i].addr,
+				    wr->sg_list[i].length, wr->sg_list[i].lkey);
+
+			DP_DEBUG(dev, QEDR_MSG_SRQ,
+				 "[%d]: len %d key %x addr %x:%x\n",
+				 i, srq_sge->length, srq_sge->l_key,
+				 srq_sge->addr.hi, srq_sge->addr.lo);
+			hw_srq->sge_prod++;
+		}
+
+		/* Flush WQE and SGE information before
+		 * updating producer.
+		 */
+		wmb();
+
+		/* SRQ producer is 8 bytes. Need to update SGE producer index
+		 * in first 4 bytes and need to update WQE producer in
+		 * next 4 bytes.
+		 */
+		*srq->hw_srq.virt_prod_pair_addr = hw_srq->sge_prod;
+		offset = offsetof(struct rdma_srq_producers, wqe_prod);
+		*((u8 *)srq->hw_srq.virt_prod_pair_addr + offset) =
+			hw_srq->wqe_prod;
+
+		/* Flush producer after updating it. */
+		wmb();
+		wr = wr->next;
+	}
+
+	DP_DEBUG(dev, QEDR_MSG_SRQ, "POST: Elements in S-RQ: %d\n",
+		 qed_chain_get_elem_left(pbl));
+	spin_unlock_irqrestore(&srq->lock, flags);
+
+	return status;
+}
+
+int qedr_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+		   const struct ib_recv_wr **bad_wr)
 {
 	struct qedr_qp *qp = get_qedr_qp(ibqp);
 	struct qedr_dev *dev = qp->dev;
@@ -3625,6 +4055,31 @@ static void __process_resp_one(struct qedr_dev *dev, struct qedr_qp *qp,
 	wc->wr_id = wr_id;
 }
 
+static int process_resp_one_srq(struct qedr_dev *dev, struct qedr_qp *qp,
+				struct qedr_cq *cq, struct ib_wc *wc,
+				struct rdma_cqe_responder *resp)
+{
+	struct qedr_srq *srq = qp->srq;
+	u64 wr_id;
+
+	wr_id = HILO_GEN(le32_to_cpu(resp->srq_wr_id.hi),
+			 le32_to_cpu(resp->srq_wr_id.lo), u64);
+
+	if (resp->status == RDMA_CQE_RESP_STS_WORK_REQUEST_FLUSHED_ERR) {
+		wc->status = IB_WC_WR_FLUSH_ERR;
+		wc->vendor_err = 0;
+		wc->wr_id = wr_id;
+		wc->byte_len = 0;
+		wc->src_qp = qp->id;
+		wc->qp = &qp->ibqp;
+		wc->wr_id = wr_id;
+	} else {
+		__process_resp_one(dev, qp, cq, wc, resp, wr_id);
+	}
+	srq->hw_srq.wr_cons_cnt++;
+
+	return 1;
+}
 static int process_resp_one(struct qedr_dev *dev, struct qedr_qp *qp,
 			    struct qedr_cq *cq, struct ib_wc *wc,
 			    struct rdma_cqe_responder *resp)
@@ -3674,6 +4129,19 @@ static void try_consume_resp_cqe(struct qedr_cq *cq, struct qedr_qp *qp,
 	}
 }
 
+static int qedr_poll_cq_resp_srq(struct qedr_dev *dev, struct qedr_qp *qp,
+				 struct qedr_cq *cq, int num_entries,
+				 struct ib_wc *wc,
+				 struct rdma_cqe_responder *resp)
+{
+	int cnt;
+
+	cnt = process_resp_one_srq(dev, qp, cq, wc, resp);
+	consume_cqe(cq);
+
+	return cnt;
+}
+
 static int qedr_poll_cq_resp(struct qedr_dev *dev, struct qedr_qp *qp,
 			     struct qedr_cq *cq, int num_entries,
 			     struct ib_wc *wc, struct rdma_cqe_responder *resp,
@@ -3751,6 +4219,11 @@ int qedr_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
 			cnt = qedr_poll_cq_resp(dev, qp, cq, num_entries, wc,
 						&cqe->resp, &update);
 			break;
+		case RDMA_CQE_TYPE_RESPONDER_SRQ:
+			cnt = qedr_poll_cq_resp_srq(dev, qp, cq, num_entries,
+						    wc, &cqe->resp);
+			update = 1;
+			break;
 		case RDMA_CQE_TYPE_INVALID:
 		default:
 			DP_ERR(dev, "Error: invalid CQE type = %d\n",
diff --git a/drivers/infiniband/hw/qedr/verbs.h b/drivers/infiniband/hw/qedr/verbs.h
index 2c57e4c..0b7d012 100644
--- a/drivers/infiniband/hw/qedr/verbs.h
+++ b/drivers/infiniband/hw/qedr/verbs.h
@@ -66,6 +66,15 @@ int qedr_query_qp(struct ib_qp *, struct ib_qp_attr *qp_attr,
 		  int qp_attr_mask, struct ib_qp_init_attr *);
 int qedr_destroy_qp(struct ib_qp *ibqp);
 
+struct ib_srq *qedr_create_srq(struct ib_pd *ibpd,
+			       struct ib_srq_init_attr *attr,
+			       struct ib_udata *udata);
+int qedr_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
+		    enum ib_srq_attr_mask attr_mask, struct ib_udata *udata);
+int qedr_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr);
+int qedr_destroy_srq(struct ib_srq *ibsrq);
+int qedr_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
+		       const struct ib_recv_wr **bad_recv_wr);
 struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr,
 			     struct ib_udata *udata);
 int qedr_destroy_ah(struct ib_ah *ibah);
@@ -82,10 +91,10 @@ int qedr_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
 struct ib_mr *qedr_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
 			    u32 max_num_sg);
 int qedr_poll_cq(struct ib_cq *, int num_entries, struct ib_wc *wc);
-int qedr_post_send(struct ib_qp *, struct ib_send_wr *,
-		   struct ib_send_wr **bad_wr);
-int qedr_post_recv(struct ib_qp *, struct ib_recv_wr *,
-		   struct ib_recv_wr **bad_wr);
+int qedr_post_send(struct ib_qp *, const struct ib_send_wr *,
+		   const struct ib_send_wr **bad_wr);
+int qedr_post_recv(struct ib_qp *, const struct ib_recv_wr *,
+		   const struct ib_recv_wr **bad_wr);
 int qedr_process_mad(struct ib_device *ibdev, int process_mad_flags,
 		     u8 port_num, const struct ib_wc *in_wc,
 		     const struct ib_grh *in_grh,
diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c
index 14b4057..41babbc 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.c
+++ b/drivers/infiniband/hw/qib/qib_verbs.c
@@ -1489,7 +1489,8 @@ static void qib_fill_device_attr(struct qib_devdata *dd)
 	rdi->dparms.props.max_mr_size = ~0ULL;
 	rdi->dparms.props.max_qp = ib_qib_max_qps;
 	rdi->dparms.props.max_qp_wr = ib_qib_max_qp_wrs;
-	rdi->dparms.props.max_sge = ib_qib_max_sges;
+	rdi->dparms.props.max_send_sge = ib_qib_max_sges;
+	rdi->dparms.props.max_recv_sge = ib_qib_max_sges;
 	rdi->dparms.props.max_sge_rd = ib_qib_max_sges;
 	rdi->dparms.props.max_cq = ib_qib_max_cqs;
 	rdi->dparms.props.max_cqe = ib_qib_max_cqes;
diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h
index f9a46768..666613e 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.h
+++ b/drivers/infiniband/hw/qib/qib_verbs.h
@@ -78,9 +78,6 @@ struct qib_verbs_txreq;
 
 #define QIB_VENDOR_IPG		cpu_to_be16(0xFFA0)
 
-/* XXX Should be defined in ib_verbs.h enum ib_port_cap_flags */
-#define IB_PORT_OTHER_LOCAL_CHANGES_SUP (1 << 26)
-
 #define IB_DEFAULT_GID_PREFIX	cpu_to_be64(0xfe80000000000000ULL)
 
 /* Values for set/get portinfo VLCap OperationalVLs */
@@ -314,7 +311,7 @@ void qib_rc_rnr_retry(unsigned long arg);
 
 void qib_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr);
 
-int qib_post_ud_send(struct rvt_qp *qp, struct ib_send_wr *wr);
+int qib_post_ud_send(struct rvt_qp *qp, const struct ib_send_wr *wr);
 
 void qib_ud_rcv(struct qib_ibport *ibp, struct ib_header *hdr,
 		int has_grh, void *data, u32 tlen, struct rvt_qp *qp);
diff --git a/drivers/infiniband/hw/usnic/Kconfig b/drivers/infiniband/hw/usnic/Kconfig
index 29ab11c..d1dae2a 100644
--- a/drivers/infiniband/hw/usnic/Kconfig
+++ b/drivers/infiniband/hw/usnic/Kconfig
@@ -1,10 +1,10 @@
 config INFINIBAND_USNIC
 	tristate "Verbs support for Cisco VIC"
 	depends on NETDEVICES && ETHERNET && INET && PCI && INTEL_IOMMU
+	depends on INFINIBAND_USER_ACCESS
 	select ENIC
 	select NET_VENDOR_CISCO
 	select PCI_IOV
-	select INFINIBAND_USER_ACCESS
 	---help---
 	  This is a low-level driver for Cisco's Virtual Interface
 	  Cards (VICs), including the VIC 1240 and 1280 cards.
diff --git a/drivers/infiniband/hw/usnic/usnic_fwd.c b/drivers/infiniband/hw/usnic/usnic_fwd.c
index 995a26b..7875883 100644
--- a/drivers/infiniband/hw/usnic/usnic_fwd.c
+++ b/drivers/infiniband/hw/usnic/usnic_fwd.c
@@ -92,8 +92,8 @@ struct usnic_fwd_dev *usnic_fwd_dev_alloc(struct pci_dev *pdev)
 	ufdev->pdev = pdev;
 	ufdev->netdev = pci_get_drvdata(pdev);
 	spin_lock_init(&ufdev->lock);
-	strncpy(ufdev->name, netdev_name(ufdev->netdev),
-			sizeof(ufdev->name) - 1);
+	BUILD_BUG_ON(sizeof(ufdev->name) != sizeof(ufdev->netdev->name));
+	strcpy(ufdev->name, ufdev->netdev->name);
 
 	return ufdev;
 }
diff --git a/drivers/infiniband/hw/usnic/usnic_fwd.h b/drivers/infiniband/hw/usnic/usnic_fwd.h
index 0b2cc4e..f0b71d5 100644
--- a/drivers/infiniband/hw/usnic/usnic_fwd.h
+++ b/drivers/infiniband/hw/usnic/usnic_fwd.h
@@ -57,7 +57,7 @@ struct usnic_fwd_dev {
 	char				mac[ETH_ALEN];
 	unsigned int			mtu;
 	__be32				inaddr;
-	char				name[IFNAMSIZ+1];
+	char				name[IFNAMSIZ];
 };
 
 struct usnic_fwd_flow {
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
index a688a56..9973ac893 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
@@ -666,7 +666,7 @@ int usnic_ib_dereg_mr(struct ib_mr *ibmr)
 
 	usnic_dbg("va 0x%lx length 0x%zx\n", mr->umem->va, mr->umem->length);
 
-	usnic_uiom_reg_release(mr->umem, ibmr->pd->uobject->context->closing);
+	usnic_uiom_reg_release(mr->umem, ibmr->uobject->context);
 	kfree(mr);
 	return 0;
 }
@@ -771,15 +771,15 @@ int usnic_ib_destroy_ah(struct ib_ah *ah)
 	return -EINVAL;
 }
 
-int usnic_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
-				struct ib_send_wr **bad_wr)
+int usnic_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+		       const struct ib_send_wr **bad_wr)
 {
 	usnic_dbg("\n");
 	return -EINVAL;
 }
 
-int usnic_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
-				struct ib_recv_wr **bad_wr)
+int usnic_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+		       const struct ib_recv_wr **bad_wr)
 {
 	usnic_dbg("\n");
 	return -EINVAL;
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h
index 1fda944..2a2c9be 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h
+++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h
@@ -80,10 +80,10 @@ struct ib_ah *usnic_ib_create_ah(struct ib_pd *pd,
 				 struct ib_udata *udata);
 
 int usnic_ib_destroy_ah(struct ib_ah *ah);
-int usnic_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
-			struct ib_send_wr **bad_wr);
-int usnic_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
-			struct ib_recv_wr **bad_wr);
+int usnic_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+			const struct ib_send_wr **bad_wr);
+int usnic_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+		       const struct ib_recv_wr **bad_wr);
 int usnic_ib_poll_cq(struct ib_cq *ibcq, int num_entries,
 			struct ib_wc *wc);
 int usnic_ib_req_notify_cq(struct ib_cq *cq,
diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c
index 4381c0a..9dd39da 100644
--- a/drivers/infiniband/hw/usnic/usnic_uiom.c
+++ b/drivers/infiniband/hw/usnic/usnic_uiom.c
@@ -41,6 +41,7 @@
 #include <linux/workqueue.h>
 #include <linux/list.h>
 #include <linux/pci.h>
+#include <rdma/ib_verbs.h>
 
 #include "usnic_log.h"
 #include "usnic_uiom.h"
@@ -88,7 +89,7 @@ static void usnic_uiom_put_pages(struct list_head *chunk_list, int dirty)
 		for_each_sg(chunk->page_list, sg, chunk->nents, i) {
 			page = sg_page(sg);
 			pa = sg_phys(sg);
-			if (dirty)
+			if (!PageDirty(page) && dirty)
 				set_page_dirty_lock(page);
 			put_page(page);
 			usnic_dbg("pa: %pa\n", &pa);
@@ -114,6 +115,16 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable,
 	dma_addr_t pa;
 	unsigned int gup_flags;
 
+	/*
+	 * If the combination of the addr and size requested for this memory
+	 * region causes an integer overflow, return error.
+	 */
+	if (((addr + size) < addr) || PAGE_ALIGN(addr + size) < (addr + size))
+		return -EINVAL;
+
+	if (!size)
+		return -EINVAL;
+
 	if (!can_do_mlock())
 		return -EPERM;
 
@@ -127,7 +138,7 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable,
 
 	down_write(&current->mm->mmap_sem);
 
-	locked = npages + current->mm->locked_vm;
+	locked = npages + current->mm->pinned_vm;
 	lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
 
 	if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) {
@@ -143,7 +154,7 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable,
 	ret = 0;
 
 	while (npages) {
-		ret = get_user_pages(cur_base,
+		ret = get_user_pages_longterm(cur_base,
 					min_t(unsigned long, npages,
 					PAGE_SIZE / sizeof(struct page *)),
 					gup_flags, page_list, NULL);
@@ -186,7 +197,7 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable,
 	if (ret < 0)
 		usnic_uiom_put_pages(chunk_list, 0);
 	else
-		current->mm->locked_vm = locked;
+		current->mm->pinned_vm = locked;
 
 	up_write(&current->mm->mmap_sem);
 	free_page((unsigned long) page_list);
@@ -420,18 +431,22 @@ struct usnic_uiom_reg *usnic_uiom_reg_get(struct usnic_uiom_pd *pd,
 	return ERR_PTR(err);
 }
 
-void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr, int closing)
+void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr,
+			    struct ib_ucontext *ucontext)
 {
+	struct task_struct *task;
 	struct mm_struct *mm;
 	unsigned long diff;
 
 	__usnic_uiom_reg_release(uiomr->pd, uiomr, 1);
 
-	mm = get_task_mm(current);
-	if (!mm) {
-		kfree(uiomr);
-		return;
-	}
+	task = get_pid_task(ucontext->tgid, PIDTYPE_PID);
+	if (!task)
+		goto out;
+	mm = get_task_mm(task);
+	put_task_struct(task);
+	if (!mm)
+		goto out;
 
 	diff = PAGE_ALIGN(uiomr->length + uiomr->offset) >> PAGE_SHIFT;
 
@@ -443,7 +458,7 @@ void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr, int closing)
 	 * up here and not be able to take the mmap_sem.  In that case
 	 * we defer the vm_locked accounting to the system workqueue.
 	 */
-	if (closing) {
+	if (ucontext->closing) {
 		if (!down_write_trylock(&mm->mmap_sem)) {
 			INIT_WORK(&uiomr->work, usnic_uiom_reg_account);
 			uiomr->mm = mm;
@@ -455,9 +470,10 @@ void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr, int closing)
 	} else
 		down_write(&mm->mmap_sem);
 
-	current->mm->locked_vm -= diff;
+	mm->pinned_vm -= diff;
 	up_write(&mm->mmap_sem);
 	mmput(mm);
+out:
 	kfree(uiomr);
 }
 
diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.h b/drivers/infiniband/hw/usnic/usnic_uiom.h
index 431efe4..8c096ac 100644
--- a/drivers/infiniband/hw/usnic/usnic_uiom.h
+++ b/drivers/infiniband/hw/usnic/usnic_uiom.h
@@ -39,6 +39,8 @@
 
 #include "usnic_uiom_interval_tree.h"
 
+struct ib_ucontext;
+
 #define USNIC_UIOM_READ			(1)
 #define USNIC_UIOM_WRITE		(2)
 
@@ -89,7 +91,8 @@ void usnic_uiom_free_dev_list(struct device **devs);
 struct usnic_uiom_reg *usnic_uiom_reg_get(struct usnic_uiom_pd *pd,
 						unsigned long addr, size_t size,
 						int access, int dmasync);
-void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr, int closing);
+void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr,
+			    struct ib_ucontext *ucontext);
 int usnic_uiom_init(char *drv_name);
 void usnic_uiom_fini(void);
 #endif /* USNIC_UIOM_H_ */
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h
index 44cb1cf..42b8685 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h
@@ -378,11 +378,6 @@ static inline enum ib_port_speed pvrdma_port_speed_to_ib(
 	return (enum ib_port_speed)speed;
 }
 
-static inline int pvrdma_qp_attr_mask_to_ib(int attr_mask)
-{
-	return attr_mask;
-}
-
 static inline int ib_qp_attr_mask_to_pvrdma(int attr_mask)
 {
 	return attr_mask & PVRDMA_MASK(PVRDMA_QP_ATTR_MASK_MAX);
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c
index f95b976..0f004c7 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c
@@ -278,19 +278,6 @@ int pvrdma_destroy_cq(struct ib_cq *cq)
 	return ret;
 }
 
-/**
- * pvrdma_modify_cq - modify the CQ moderation parameters
- * @ibcq: the CQ to modify
- * @cq_count: number of CQEs that will trigger an event
- * @cq_period: max period of time in usec before triggering an event
- *
- * @return: -EOPNOTSUPP as CQ resize is not supported.
- */
-int pvrdma_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
-{
-	return -EOPNOTSUPP;
-}
-
 static inline struct pvrdma_cqe *get_cqe(struct pvrdma_cq *cq, int i)
 {
 	return (struct pvrdma_cqe *)pvrdma_page_dir_get_ptr(
@@ -428,16 +415,3 @@ int pvrdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
 	/* Ensure we do not return errors from poll_cq */
 	return npolled;
 }
-
-/**
- * pvrdma_resize_cq - resize CQ
- * @ibcq: the completion queue
- * @entries: CQ entries
- * @udata: user data
- *
- * @return: -EOPNOTSUPP as CQ resize is not supported.
- */
-int pvrdma_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
-{
-	return -EOPNOTSUPP;
-}
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
index 0be33a8..a571989 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
@@ -62,9 +62,7 @@ static DEFINE_MUTEX(pvrdma_device_list_lock);
 static LIST_HEAD(pvrdma_device_list);
 static struct workqueue_struct *event_wq;
 
-static int pvrdma_add_gid(const union ib_gid *gid,
-			  const struct ib_gid_attr *attr,
-			  void **context);
+static int pvrdma_add_gid(const struct ib_gid_attr *attr, void **context);
 static int pvrdma_del_gid(const struct ib_gid_attr *attr, void **context);
 
 static ssize_t show_hca(struct device *device, struct device_attribute *attr,
@@ -216,8 +214,6 @@ static int pvrdma_register_device(struct pvrdma_dev *dev)
 	dev->ib_dev.post_send = pvrdma_post_send;
 	dev->ib_dev.post_recv = pvrdma_post_recv;
 	dev->ib_dev.create_cq = pvrdma_create_cq;
-	dev->ib_dev.modify_cq = pvrdma_modify_cq;
-	dev->ib_dev.resize_cq = pvrdma_resize_cq;
 	dev->ib_dev.destroy_cq = pvrdma_destroy_cq;
 	dev->ib_dev.poll_cq = pvrdma_poll_cq;
 	dev->ib_dev.req_notify_cq = pvrdma_req_notify_cq;
@@ -261,7 +257,6 @@ static int pvrdma_register_device(struct pvrdma_dev *dev)
 		dev->ib_dev.modify_srq = pvrdma_modify_srq;
 		dev->ib_dev.query_srq = pvrdma_query_srq;
 		dev->ib_dev.destroy_srq = pvrdma_destroy_srq;
-		dev->ib_dev.post_srq_recv = pvrdma_post_srq_recv;
 
 		dev->srq_tbl = kcalloc(dev->dsr->caps.max_srq,
 				       sizeof(struct pvrdma_srq *),
@@ -650,13 +645,11 @@ static int pvrdma_add_gid_at_index(struct pvrdma_dev *dev,
 	return 0;
 }
 
-static int pvrdma_add_gid(const union ib_gid *gid,
-			  const struct ib_gid_attr *attr,
-			  void **context)
+static int pvrdma_add_gid(const struct ib_gid_attr *attr, void **context)
 {
 	struct pvrdma_dev *dev = to_vdev(attr->device);
 
-	return pvrdma_add_gid_at_index(dev, gid,
+	return pvrdma_add_gid_at_index(dev, &attr->gid,
 				       ib_gid_type_to_pvrdma(attr->gid_type),
 				       attr->index);
 }
@@ -699,8 +692,12 @@ static int pvrdma_del_gid(const struct ib_gid_attr *attr, void **context)
 }
 
 static void pvrdma_netdevice_event_handle(struct pvrdma_dev *dev,
+					  struct net_device *ndev,
 					  unsigned long event)
 {
+	struct pci_dev *pdev_net;
+	unsigned int slot;
+
 	switch (event) {
 	case NETDEV_REBOOT:
 	case NETDEV_DOWN:
@@ -718,6 +715,24 @@ static void pvrdma_netdevice_event_handle(struct pvrdma_dev *dev,
 		else
 			pvrdma_dispatch_event(dev, 1, IB_EVENT_PORT_ACTIVE);
 		break;
+	case NETDEV_UNREGISTER:
+		dev_put(dev->netdev);
+		dev->netdev = NULL;
+		break;
+	case NETDEV_REGISTER:
+		/* vmxnet3 will have same bus, slot. But func will be 0 */
+		slot = PCI_SLOT(dev->pdev->devfn);
+		pdev_net = pci_get_slot(dev->pdev->bus,
+					PCI_DEVFN(slot, 0));
+		if ((dev->netdev == NULL) &&
+		    (pci_get_drvdata(pdev_net) == ndev)) {
+			/* this is our netdev */
+			dev->netdev = ndev;
+			dev_hold(ndev);
+		}
+		pci_dev_put(pdev_net);
+		break;
+
 	default:
 		dev_dbg(&dev->pdev->dev, "ignore netdevice event %ld on %s\n",
 			event, dev->ib_dev.name);
@@ -734,8 +749,11 @@ static void pvrdma_netdevice_event_work(struct work_struct *work)
 
 	mutex_lock(&pvrdma_device_list_lock);
 	list_for_each_entry(dev, &pvrdma_device_list, device_link) {
-		if (dev->netdev == netdev_work->event_netdev) {
-			pvrdma_netdevice_event_handle(dev, netdev_work->event);
+		if ((netdev_work->event == NETDEV_REGISTER) ||
+		    (dev->netdev == netdev_work->event_netdev)) {
+			pvrdma_netdevice_event_handle(dev,
+						      netdev_work->event_netdev,
+						      netdev_work->event);
 			break;
 		}
 	}
@@ -968,6 +986,7 @@ static int pvrdma_pci_probe(struct pci_dev *pdev,
 		ret = -ENODEV;
 		goto err_free_cq_ring;
 	}
+	dev_hold(dev->netdev);
 
 	dev_info(&pdev->dev, "paired device to %s\n", dev->netdev->name);
 
@@ -1040,6 +1059,10 @@ static int pvrdma_pci_probe(struct pci_dev *pdev,
 	pvrdma_free_irq(dev);
 	pci_free_irq_vectors(pdev);
 err_free_cq_ring:
+	if (dev->netdev) {
+		dev_put(dev->netdev);
+		dev->netdev = NULL;
+	}
 	pvrdma_page_dir_cleanup(dev, &dev->cq_pdir);
 err_free_async_ring:
 	pvrdma_page_dir_cleanup(dev, &dev->async_pdir);
@@ -1079,6 +1102,11 @@ static void pvrdma_pci_remove(struct pci_dev *pdev)
 
 	flush_workqueue(event_wq);
 
+	if (dev->netdev) {
+		dev_put(dev->netdev);
+		dev->netdev = NULL;
+	}
+
 	/* Unregister ib device */
 	ib_unregister_device(&dev->ib_dev);
 
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
index eb5b106..60083c0 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
@@ -599,7 +599,8 @@ static inline void *get_rq_wqe(struct pvrdma_qp *qp, unsigned int n)
 				       qp->rq.offset + n * qp->rq.wqe_size);
 }
 
-static int set_reg_seg(struct pvrdma_sq_wqe_hdr *wqe_hdr, struct ib_reg_wr *wr)
+static int set_reg_seg(struct pvrdma_sq_wqe_hdr *wqe_hdr,
+		       const struct ib_reg_wr *wr)
 {
 	struct pvrdma_user_mr *mr = to_vmr(wr->mr);
 
@@ -623,8 +624,8 @@ static int set_reg_seg(struct pvrdma_sq_wqe_hdr *wqe_hdr, struct ib_reg_wr *wr)
  *
  * @return: 0 on success, otherwise errno returned.
  */
-int pvrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
-		     struct ib_send_wr **bad_wr)
+int pvrdma_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+		     const struct ib_send_wr **bad_wr)
 {
 	struct pvrdma_qp *qp = to_vqp(ibqp);
 	struct pvrdma_dev *dev = to_vdev(ibqp->device);
@@ -827,8 +828,8 @@ int pvrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
  *
  * @return: 0 on success, otherwise errno returned.
  */
-int pvrdma_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
-		     struct ib_recv_wr **bad_wr)
+int pvrdma_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+		     const struct ib_recv_wr **bad_wr)
 {
 	struct pvrdma_dev *dev = to_vdev(ibqp->device);
 	unsigned long flags;
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c
index af23596..dc0ce87 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c
@@ -52,13 +52,6 @@
 
 #include "pvrdma.h"
 
-int pvrdma_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
-			 struct ib_recv_wr **bad_wr)
-{
-	/* No support for kernel clients. */
-	return -EOPNOTSUPP;
-}
-
 /**
  * pvrdma_query_srq - query shared receive queue
  * @ibsrq: the shared receive queue to query
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c
index a51463c..b65d10b 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c
@@ -82,7 +82,8 @@ int pvrdma_query_device(struct ib_device *ibdev,
 	props->max_qp = dev->dsr->caps.max_qp;
 	props->max_qp_wr = dev->dsr->caps.max_qp_wr;
 	props->device_cap_flags = dev->dsr->caps.device_cap_flags;
-	props->max_sge = dev->dsr->caps.max_sge;
+	props->max_send_sge = dev->dsr->caps.max_sge;
+	props->max_recv_sge = dev->dsr->caps.max_sge;
 	props->max_sge_rd = PVRDMA_GET_CAP(dev, dev->dsr->caps.max_sge,
 					   dev->dsr->caps.max_sge_rd);
 	props->max_srq = dev->dsr->caps.max_srq;
@@ -154,7 +155,8 @@ int pvrdma_query_port(struct ib_device *ibdev, u8 port,
 	props->gid_tbl_len = resp->attrs.gid_tbl_len;
 	props->port_cap_flags =
 		pvrdma_port_cap_flags_to_ib(resp->attrs.port_cap_flags);
-	props->port_cap_flags |= IB_PORT_CM_SUP | IB_PORT_IP_BASED_GIDS;
+	props->port_cap_flags |= IB_PORT_CM_SUP;
+	props->ip_gids = true;
 	props->max_msg_sz = resp->attrs.max_msg_sz;
 	props->bad_pkey_cntr = resp->attrs.bad_pkey_cntr;
 	props->qkey_viol_cntr = resp->attrs.qkey_viol_cntr;
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h
index b7b2572..b2e3ab5 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h
@@ -412,15 +412,10 @@ struct ib_mr *pvrdma_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
 			      u32 max_num_sg);
 int pvrdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
 		     int sg_nents, unsigned int *sg_offset);
-int pvrdma_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period);
-int pvrdma_resize_cq(struct ib_cq *ibcq, int entries,
-		     struct ib_udata *udata);
 struct ib_cq *pvrdma_create_cq(struct ib_device *ibdev,
 			       const struct ib_cq_init_attr *attr,
 			       struct ib_ucontext *context,
 			       struct ib_udata *udata);
-int pvrdma_resize_cq(struct ib_cq *ibcq, int entries,
-		     struct ib_udata *udata);
 int pvrdma_destroy_cq(struct ib_cq *cq);
 int pvrdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
 int pvrdma_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags);
@@ -435,8 +430,6 @@ int pvrdma_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
 		      enum ib_srq_attr_mask attr_mask, struct ib_udata *udata);
 int pvrdma_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr);
 int pvrdma_destroy_srq(struct ib_srq *srq);
-int pvrdma_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
-			 struct ib_recv_wr **bad_wr);
 
 struct ib_qp *pvrdma_create_qp(struct ib_pd *pd,
 			       struct ib_qp_init_attr *init_attr,
@@ -446,9 +439,9 @@ int pvrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 int pvrdma_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
 		    int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr);
 int pvrdma_destroy_qp(struct ib_qp *qp);
-int pvrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
-		     struct ib_send_wr **bad_wr);
-int pvrdma_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
-		     struct ib_recv_wr **bad_wr);
+int pvrdma_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+		     const struct ib_send_wr **bad_wr);
+int pvrdma_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+		     const struct ib_recv_wr **bad_wr);
 
 #endif /* __PVRDMA_VERBS_H__ */
diff --git a/drivers/infiniband/sw/rdmavt/ah.c b/drivers/infiniband/sw/rdmavt/ah.c
index ba3639a..89ec0f6 100644
--- a/drivers/infiniband/sw/rdmavt/ah.c
+++ b/drivers/infiniband/sw/rdmavt/ah.c
@@ -120,7 +120,8 @@ struct ib_ah *rvt_create_ah(struct ib_pd *pd,
 	dev->n_ahs_allocated++;
 	spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
 
-	ah->attr = *ah_attr;
+	rdma_copy_ah_attr(&ah->attr, ah_attr);
+
 	atomic_set(&ah->refcount, 0);
 
 	if (dev->driver_f.notify_new_ah)
@@ -148,6 +149,7 @@ int rvt_destroy_ah(struct ib_ah *ibah)
 	dev->n_ahs_allocated--;
 	spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
 
+	rdma_destroy_ah_attr(&ah->attr);
 	kfree(ah);
 
 	return 0;
diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c
index 41183bd..5ce403c 100644
--- a/drivers/infiniband/sw/rdmavt/qp.c
+++ b/drivers/infiniband/sw/rdmavt/qp.c
@@ -780,14 +780,15 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
 	if (!rdi)
 		return ERR_PTR(-EINVAL);
 
-	if (init_attr->cap.max_send_sge > rdi->dparms.props.max_sge ||
+	if (init_attr->cap.max_send_sge > rdi->dparms.props.max_send_sge ||
 	    init_attr->cap.max_send_wr > rdi->dparms.props.max_qp_wr ||
 	    init_attr->create_flags)
 		return ERR_PTR(-EINVAL);
 
 	/* Check receive queue parameters if no SRQ is specified. */
 	if (!init_attr->srq) {
-		if (init_attr->cap.max_recv_sge > rdi->dparms.props.max_sge ||
+		if (init_attr->cap.max_recv_sge >
+		    rdi->dparms.props.max_recv_sge ||
 		    init_attr->cap.max_recv_wr > rdi->dparms.props.max_qp_wr)
 			return ERR_PTR(-EINVAL);
 
@@ -1336,13 +1337,13 @@ int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 		qp->qp_access_flags = attr->qp_access_flags;
 
 	if (attr_mask & IB_QP_AV) {
-		qp->remote_ah_attr = attr->ah_attr;
+		rdma_replace_ah_attr(&qp->remote_ah_attr, &attr->ah_attr);
 		qp->s_srate = rdma_ah_get_static_rate(&attr->ah_attr);
 		qp->srate_mbps = ib_rate_to_mbps(qp->s_srate);
 	}
 
 	if (attr_mask & IB_QP_ALT_PATH) {
-		qp->alt_ah_attr = attr->alt_ah_attr;
+		rdma_replace_ah_attr(&qp->alt_ah_attr, &attr->alt_ah_attr);
 		qp->s_alt_pkey_index = attr->alt_pkey_index;
 	}
 
@@ -1459,6 +1460,8 @@ int rvt_destroy_qp(struct ib_qp *ibqp)
 	vfree(qp->s_wq);
 	rdi->driver_f.qp_priv_free(rdi, qp);
 	kfree(qp->s_ack_queue);
+	rdma_destroy_ah_attr(&qp->remote_ah_attr);
+	rdma_destroy_ah_attr(&qp->alt_ah_attr);
 	kfree(qp);
 	return 0;
 }
@@ -1535,8 +1538,8 @@ int rvt_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
  *
  * Return: 0 on success otherwise errno
  */
-int rvt_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
-		  struct ib_recv_wr **bad_wr)
+int rvt_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+		  const struct ib_recv_wr **bad_wr)
 {
 	struct rvt_qp *qp = ibqp_to_rvtqp(ibqp);
 	struct rvt_rwq *wq = qp->r_rq.wq;
@@ -1617,7 +1620,7 @@ int rvt_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
 static inline int rvt_qp_valid_operation(
 	struct rvt_qp *qp,
 	const struct rvt_operation_params *post_parms,
-	struct ib_send_wr *wr)
+	const struct ib_send_wr *wr)
 {
 	int len;
 
@@ -1714,7 +1717,7 @@ static inline int rvt_qp_is_avail(
  * @wr: the work request to send
  */
 static int rvt_post_one_wr(struct rvt_qp *qp,
-			   struct ib_send_wr *wr,
+			   const struct ib_send_wr *wr,
 			   int *call_send)
 {
 	struct rvt_swqe *wqe;
@@ -1888,8 +1891,8 @@ static int rvt_post_one_wr(struct rvt_qp *qp,
  *
  * Return: 0 on success else errno
  */
-int rvt_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
-		  struct ib_send_wr **bad_wr)
+int rvt_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+		  const struct ib_send_wr **bad_wr)
 {
 	struct rvt_qp *qp = ibqp_to_rvtqp(ibqp);
 	struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
@@ -1945,8 +1948,8 @@ int rvt_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
  *
  * Return: 0 on success else errno
  */
-int rvt_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
-		      struct ib_recv_wr **bad_wr)
+int rvt_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
+		      const struct ib_recv_wr **bad_wr)
 {
 	struct rvt_srq *srq = ibsrq_to_rvtsrq(ibsrq);
 	struct rvt_rwq *wq;
diff --git a/drivers/infiniband/sw/rdmavt/qp.h b/drivers/infiniband/sw/rdmavt/qp.h
index 8409f80..264811f 100644
--- a/drivers/infiniband/sw/rdmavt/qp.h
+++ b/drivers/infiniband/sw/rdmavt/qp.h
@@ -60,10 +60,10 @@ int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 int rvt_destroy_qp(struct ib_qp *ibqp);
 int rvt_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 		 int attr_mask, struct ib_qp_init_attr *init_attr);
-int rvt_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
-		  struct ib_recv_wr **bad_wr);
-int rvt_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
-		  struct ib_send_wr **bad_wr);
-int rvt_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
-		      struct ib_recv_wr **bad_wr);
+int rvt_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+		  const struct ib_recv_wr **bad_wr);
+int rvt_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+		  const struct ib_send_wr **bad_wr);
+int rvt_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
+		      const struct ib_recv_wr **bad_wr);
 #endif          /* DEF_RVTQP_H */
diff --git a/drivers/infiniband/sw/rdmavt/srq.c b/drivers/infiniband/sw/rdmavt/srq.c
index 3707952b..78e06fc 100644
--- a/drivers/infiniband/sw/rdmavt/srq.c
+++ b/drivers/infiniband/sw/rdmavt/srq.c
@@ -82,7 +82,7 @@ struct ib_srq *rvt_create_srq(struct ib_pd *ibpd,
 	struct ib_srq *ret;
 
 	if (srq_init_attr->srq_type != IB_SRQT_BASIC)
-		return ERR_PTR(-ENOSYS);
+		return ERR_PTR(-EOPNOTSUPP);
 
 	if (srq_init_attr->attr.max_sge == 0 ||
 	    srq_init_attr->attr.max_sge > dev->dparms.props.max_srq_sge ||
diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c
index 7121e1b..10999fa 100644
--- a/drivers/infiniband/sw/rxe/rxe.c
+++ b/drivers/infiniband/sw/rxe/rxe.c
@@ -91,7 +91,8 @@ static void rxe_init_device_param(struct rxe_dev *rxe)
 	rxe->attr.max_qp			= RXE_MAX_QP;
 	rxe->attr.max_qp_wr			= RXE_MAX_QP_WR;
 	rxe->attr.device_cap_flags		= RXE_DEVICE_CAP_FLAGS;
-	rxe->attr.max_sge			= RXE_MAX_SGE;
+	rxe->attr.max_send_sge			= RXE_MAX_SGE;
+	rxe->attr.max_recv_sge			= RXE_MAX_SGE;
 	rxe->attr.max_sge_rd			= RXE_MAX_SGE_RD;
 	rxe->attr.max_cq			= RXE_MAX_CQ;
 	rxe->attr.max_cqe			= (1 << RXE_MAX_LOG_CQE) - 1;
diff --git a/drivers/infiniband/sw/rxe/rxe_av.c b/drivers/infiniband/sw/rxe/rxe_av.c
index 7f1ae364..26fe8d7 100644
--- a/drivers/infiniband/sw/rxe/rxe_av.c
+++ b/drivers/infiniband/sw/rxe/rxe_av.c
@@ -55,29 +55,41 @@ int rxe_av_chk_attr(struct rxe_dev *rxe, struct rdma_ah_attr *attr)
 void rxe_av_from_attr(u8 port_num, struct rxe_av *av,
 		     struct rdma_ah_attr *attr)
 {
+	const struct ib_global_route *grh = rdma_ah_read_grh(attr);
+
 	memset(av, 0, sizeof(*av));
-	memcpy(&av->grh, rdma_ah_read_grh(attr),
-	       sizeof(*rdma_ah_read_grh(attr)));
+	memcpy(av->grh.dgid.raw, grh->dgid.raw, sizeof(grh->dgid.raw));
+	av->grh.flow_label = grh->flow_label;
+	av->grh.sgid_index = grh->sgid_index;
+	av->grh.hop_limit = grh->hop_limit;
+	av->grh.traffic_class = grh->traffic_class;
 	av->port_num = port_num;
 }
 
 void rxe_av_to_attr(struct rxe_av *av, struct rdma_ah_attr *attr)
 {
+	struct ib_global_route *grh = rdma_ah_retrieve_grh(attr);
+
 	attr->type = RDMA_AH_ATTR_TYPE_ROCE;
-	memcpy(rdma_ah_retrieve_grh(attr), &av->grh, sizeof(av->grh));
+
+	memcpy(grh->dgid.raw, av->grh.dgid.raw, sizeof(av->grh.dgid.raw));
+	grh->flow_label = av->grh.flow_label;
+	grh->sgid_index = av->grh.sgid_index;
+	grh->hop_limit = av->grh.hop_limit;
+	grh->traffic_class = av->grh.traffic_class;
+
 	rdma_ah_set_ah_flags(attr, IB_AH_GRH);
 	rdma_ah_set_port_num(attr, av->port_num);
 }
 
-void rxe_av_fill_ip_info(struct rxe_av *av,
-			struct rdma_ah_attr *attr,
-			struct ib_gid_attr *sgid_attr,
-			union ib_gid *sgid)
+void rxe_av_fill_ip_info(struct rxe_av *av, struct rdma_ah_attr *attr)
 {
-	rdma_gid2ip((struct sockaddr *)&av->sgid_addr, sgid);
+	const struct ib_gid_attr *sgid_attr = attr->grh.sgid_attr;
+
+	rdma_gid2ip((struct sockaddr *)&av->sgid_addr, &sgid_attr->gid);
 	rdma_gid2ip((struct sockaddr *)&av->dgid_addr,
 		    &rdma_ah_read_grh(attr)->dgid);
-	av->network_type = ib_gid_to_network_type(sgid_attr->gid_type, sgid);
+	av->network_type = rdma_gid_attr_network_type(sgid_attr);
 }
 
 struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt)
diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c
index 98d470d..83311dd 100644
--- a/drivers/infiniband/sw/rxe/rxe_comp.c
+++ b/drivers/infiniband/sw/rxe/rxe_comp.c
@@ -276,6 +276,7 @@ static inline enum comp_state check_ack(struct rxe_qp *qp,
 	case IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE:
 		if (wqe->wr.opcode != IB_WR_RDMA_READ &&
 		    wqe->wr.opcode != IB_WR_RDMA_READ_WITH_INV) {
+			wqe->status = IB_WC_FATAL_ERR;
 			return COMPST_ERROR;
 		}
 		reset_retry_counters(qp);
diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
index a51ece59..87d14f7 100644
--- a/drivers/infiniband/sw/rxe/rxe_loc.h
+++ b/drivers/infiniband/sw/rxe/rxe_loc.h
@@ -43,10 +43,7 @@ void rxe_av_from_attr(u8 port_num, struct rxe_av *av,
 
 void rxe_av_to_attr(struct rxe_av *av, struct rdma_ah_attr *attr);
 
-void rxe_av_fill_ip_info(struct rxe_av *av,
-			struct rdma_ah_attr *attr,
-			struct ib_gid_attr *sgid_attr,
-			union ib_gid *sgid);
+void rxe_av_fill_ip_info(struct rxe_av *av, struct rdma_ah_attr *attr);
 
 struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt);
 
diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
index 59ec6d9..8094cba 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.c
+++ b/drivers/infiniband/sw/rxe/rxe_net.c
@@ -182,39 +182,19 @@ static struct dst_entry *rxe_find_route6(struct net_device *ndev,
 
 #endif
 
-/*
- * Derive the net_device from the av.
- * For physical devices, this will just return rxe->ndev.
- * But for VLAN devices, it will return the vlan dev.
- * Caller should dev_put() the returned net_device.
- */
-static struct net_device *rxe_netdev_from_av(struct rxe_dev *rxe,
-					     int port_num,
-					     struct rxe_av *av)
-{
-	union ib_gid gid;
-	struct ib_gid_attr attr;
-	struct net_device *ndev = rxe->ndev;
-
-	if (ib_get_cached_gid(&rxe->ib_dev, port_num, av->grh.sgid_index,
-			      &gid, &attr) == 0 &&
-	    attr.ndev && attr.ndev != ndev)
-		ndev = attr.ndev;
-	else
-		/* Only to ensure that caller may call dev_put() */
-		dev_hold(ndev);
-
-	return ndev;
-}
-
 static struct dst_entry *rxe_find_route(struct rxe_dev *rxe,
 					struct rxe_qp *qp,
 					struct rxe_av *av)
 {
+	const struct ib_gid_attr *attr;
 	struct dst_entry *dst = NULL;
 	struct net_device *ndev;
 
-	ndev = rxe_netdev_from_av(rxe, qp->attr.port_num, av);
+	attr = rdma_get_gid_attr(&rxe->ib_dev, qp->attr.port_num,
+				 av->grh.sgid_index);
+	if (IS_ERR(attr))
+		return NULL;
+	ndev = attr->ndev;
 
 	if (qp_type(qp) == IB_QPT_RC)
 		dst = sk_dst_get(qp->sk->sk);
@@ -243,9 +223,13 @@ static struct dst_entry *rxe_find_route(struct rxe_dev *rxe,
 					rt6_get_cookie((struct rt6_info *)dst);
 #endif
 		}
-	}
 
-	dev_put(ndev);
+		if (dst && (qp_type(qp) == IB_QPT_RC)) {
+			dst_hold(dst);
+			sk_dst_set(qp->sk->sk, dst);
+		}
+	}
+	rdma_put_gid_attr(attr);
 	return dst;
 }
 
@@ -418,11 +402,7 @@ static int prepare4(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
 	prepare_ipv4_hdr(dst, skb, saddr->s_addr, daddr->s_addr, IPPROTO_UDP,
 			 av->grh.traffic_class, av->grh.hop_limit, df, xnet);
 
-	if (qp_type(qp) == IB_QPT_RC)
-		sk_dst_set(qp->sk->sk, dst);
-	else
-		dst_release(dst);
-
+	dst_release(dst);
 	return 0;
 }
 
@@ -450,11 +430,7 @@ static int prepare6(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
 			 av->grh.traffic_class,
 			 av->grh.hop_limit);
 
-	if (qp_type(qp) == IB_QPT_RC)
-		sk_dst_set(qp->sk->sk, dst);
-	else
-		dst_release(dst);
-
+	dst_release(dst);
 	return 0;
 }
 
@@ -536,9 +512,13 @@ struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av,
 	unsigned int hdr_len;
 	struct sk_buff *skb;
 	struct net_device *ndev;
+	const struct ib_gid_attr *attr;
 	const int port_num = 1;
 
-	ndev = rxe_netdev_from_av(rxe, port_num, av);
+	attr = rdma_get_gid_attr(&rxe->ib_dev, port_num, av->grh.sgid_index);
+	if (IS_ERR(attr))
+		return NULL;
+	ndev = attr->ndev;
 
 	if (av->network_type == RDMA_NETWORK_IPV4)
 		hdr_len = ETH_HLEN + sizeof(struct udphdr) +
@@ -550,10 +530,8 @@ struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av,
 	skb = alloc_skb(paylen + hdr_len + LL_RESERVED_SPACE(ndev),
 			GFP_ATOMIC);
 
-	if (unlikely(!skb)) {
-		dev_put(ndev);
-		return NULL;
-	}
+	if (unlikely(!skb))
+		goto out;
 
 	skb_reserve(skb, hdr_len + LL_RESERVED_SPACE(rxe->ndev));
 
@@ -568,7 +546,8 @@ struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av,
 	pkt->hdr	= skb_put_zero(skb, paylen);
 	pkt->mask	|= RXE_GRH_MASK;
 
-	dev_put(ndev);
+out:
+	rdma_put_gid_attr(attr);
 	return skb;
 }
 
diff --git a/drivers/infiniband/sw/rxe/rxe_param.h b/drivers/infiniband/sw/rxe/rxe_param.h
index 1b596fb..4555510 100644
--- a/drivers/infiniband/sw/rxe/rxe_param.h
+++ b/drivers/infiniband/sw/rxe/rxe_param.h
@@ -83,7 +83,7 @@ enum rxe_device_param {
 	RXE_MAX_SGE_RD			= 32,
 	RXE_MAX_CQ			= 16384,
 	RXE_MAX_LOG_CQE			= 15,
-	RXE_MAX_MR			= 2 * 1024,
+	RXE_MAX_MR			= 256 * 1024,
 	RXE_MAX_PD			= 0x7ffc,
 	RXE_MAX_QP_RD_ATOM		= 128,
 	RXE_MAX_EE_RD_ATOM		= 0,
diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c
index b9f7aa1..c58452d 100644
--- a/drivers/infiniband/sw/rxe/rxe_qp.c
+++ b/drivers/infiniband/sw/rxe/rxe_qp.c
@@ -49,9 +49,9 @@ static int rxe_qp_chk_cap(struct rxe_dev *rxe, struct ib_qp_cap *cap,
 		goto err1;
 	}
 
-	if (cap->max_send_sge > rxe->attr.max_sge) {
+	if (cap->max_send_sge > rxe->attr.max_send_sge) {
 		pr_warn("invalid send sge = %d > %d\n",
-			cap->max_send_sge, rxe->attr.max_sge);
+			cap->max_send_sge, rxe->attr.max_send_sge);
 		goto err1;
 	}
 
@@ -62,9 +62,9 @@ static int rxe_qp_chk_cap(struct rxe_dev *rxe, struct ib_qp_cap *cap,
 			goto err1;
 		}
 
-		if (cap->max_recv_sge > rxe->attr.max_sge) {
+		if (cap->max_recv_sge > rxe->attr.max_recv_sge) {
 			pr_warn("invalid recv sge = %d > %d\n",
-				cap->max_recv_sge, rxe->attr.max_sge);
+				cap->max_recv_sge, rxe->attr.max_recv_sge);
 			goto err1;
 		}
 	}
@@ -580,9 +580,6 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask,
 		     struct ib_udata *udata)
 {
 	int err;
-	struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
-	union ib_gid sgid;
-	struct ib_gid_attr sgid_attr;
 
 	if (mask & IB_QP_MAX_QP_RD_ATOMIC) {
 		int max_rd_atomic = __roundup_pow_of_two(attr->max_rd_atomic);
@@ -623,30 +620,14 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask,
 		qp->attr.qkey = attr->qkey;
 
 	if (mask & IB_QP_AV) {
-		ib_get_cached_gid(&rxe->ib_dev, 1,
-				  rdma_ah_read_grh(&attr->ah_attr)->sgid_index,
-				  &sgid, &sgid_attr);
 		rxe_av_from_attr(attr->port_num, &qp->pri_av, &attr->ah_attr);
-		rxe_av_fill_ip_info(&qp->pri_av, &attr->ah_attr,
-				    &sgid_attr, &sgid);
-		if (sgid_attr.ndev)
-			dev_put(sgid_attr.ndev);
+		rxe_av_fill_ip_info(&qp->pri_av, &attr->ah_attr);
 	}
 
 	if (mask & IB_QP_ALT_PATH) {
-		u8 sgid_index =
-			rdma_ah_read_grh(&attr->alt_ah_attr)->sgid_index;
-
-		ib_get_cached_gid(&rxe->ib_dev, 1, sgid_index,
-				  &sgid, &sgid_attr);
-
 		rxe_av_from_attr(attr->alt_port_num, &qp->alt_av,
 				 &attr->alt_ah_attr);
-		rxe_av_fill_ip_info(&qp->alt_av, &attr->alt_ah_attr,
-				    &sgid_attr, &sgid);
-		if (sgid_attr.ndev)
-			dev_put(sgid_attr.ndev);
-
+		rxe_av_fill_ip_info(&qp->alt_av, &attr->alt_ah_attr);
 		qp->attr.alt_port_num = attr->alt_port_num;
 		qp->attr.alt_pkey_index = attr->alt_pkey_index;
 		qp->attr.alt_timeout = attr->alt_timeout;
diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c
index dfba44a..d30dbac 100644
--- a/drivers/infiniband/sw/rxe/rxe_recv.c
+++ b/drivers/infiniband/sw/rxe/rxe_recv.c
@@ -225,9 +225,14 @@ static int hdr_check(struct rxe_pkt_info *pkt)
 		goto err1;
 	}
 
+	if (unlikely(qpn == 0)) {
+		pr_warn_once("QP 0 not supported");
+		goto err1;
+	}
+
 	if (qpn != IB_MULTICAST_QPN) {
-		index = (qpn == 0) ? port->qp_smi_index :
-			((qpn == 1) ? port->qp_gsi_index : qpn);
+		index = (qpn == 1) ? port->qp_gsi_index : qpn;
+
 		qp = rxe_pool_get_index(&rxe->qp_pool, index);
 		if (unlikely(!qp)) {
 			pr_warn_ratelimited("no qp matches qpn 0x%x\n", qpn);
@@ -256,8 +261,7 @@ static int hdr_check(struct rxe_pkt_info *pkt)
 	return 0;
 
 err2:
-	if (qp)
-		rxe_drop_ref(qp);
+	rxe_drop_ref(qp);
 err1:
 	return -EINVAL;
 }
@@ -328,6 +332,7 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb)
 
 static int rxe_match_dgid(struct rxe_dev *rxe, struct sk_buff *skb)
 {
+	const struct ib_gid_attr *gid_attr;
 	union ib_gid dgid;
 	union ib_gid *pdgid;
 
@@ -339,9 +344,14 @@ static int rxe_match_dgid(struct rxe_dev *rxe, struct sk_buff *skb)
 		pdgid = (union ib_gid *)&ipv6_hdr(skb)->daddr;
 	}
 
-	return ib_find_cached_gid_by_port(&rxe->ib_dev, pdgid,
-					  IB_GID_TYPE_ROCE_UDP_ENCAP,
-					  1, skb->dev, NULL);
+	gid_attr = rdma_find_gid_by_port(&rxe->ib_dev, pdgid,
+					 IB_GID_TYPE_ROCE_UDP_ENCAP,
+					 1, skb->dev);
+	if (IS_ERR(gid_attr))
+		return PTR_ERR(gid_attr);
+
+	rdma_put_gid_attr(gid_attr);
+	return 0;
 }
 
 /* rxe_rcv is called from the interface driver */
diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index 5b57de3..aa58333 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -884,6 +884,11 @@ static enum resp_states do_complete(struct rxe_qp *qp,
 			else
 				wc->network_hdr_type = RDMA_NETWORK_IPV6;
 
+			if (is_vlan_dev(skb->dev)) {
+				wc->wc_flags |= IB_WC_WITH_VLAN;
+				wc->vlan_id = vlan_dev_vlan_id(skb->dev);
+			}
+
 			if (pkt->mask & RXE_IMMDT_MASK) {
 				wc->wc_flags |= IB_WC_WITH_IMM;
 				wc->ex.imm_data = immdt_imm(pkt);
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index 9deafc3..f5b1e0a 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -222,25 +222,11 @@ static int rxe_dealloc_pd(struct ib_pd *ibpd)
 	return 0;
 }
 
-static int rxe_init_av(struct rxe_dev *rxe, struct rdma_ah_attr *attr,
-		       struct rxe_av *av)
+static void rxe_init_av(struct rxe_dev *rxe, struct rdma_ah_attr *attr,
+			struct rxe_av *av)
 {
-	int err;
-	union ib_gid sgid;
-	struct ib_gid_attr sgid_attr;
-
-	err = ib_get_cached_gid(&rxe->ib_dev, rdma_ah_get_port_num(attr),
-				rdma_ah_read_grh(attr)->sgid_index, &sgid,
-				&sgid_attr);
-	if (err) {
-		pr_err("Failed to query sgid. err = %d\n", err);
-		return err;
-	}
-
 	rxe_av_from_attr(rdma_ah_get_port_num(attr), av, attr);
-	rxe_av_fill_ip_info(av, attr, &sgid_attr, &sgid);
-	dev_put(sgid_attr.ndev);
-	return 0;
+	rxe_av_fill_ip_info(av, attr);
 }
 
 static struct ib_ah *rxe_create_ah(struct ib_pd *ibpd,
@@ -255,28 +241,17 @@ static struct ib_ah *rxe_create_ah(struct ib_pd *ibpd,
 
 	err = rxe_av_chk_attr(rxe, attr);
 	if (err)
-		goto err1;
+		return ERR_PTR(err);
 
 	ah = rxe_alloc(&rxe->ah_pool);
-	if (!ah) {
-		err = -ENOMEM;
-		goto err1;
-	}
+	if (!ah)
+		return ERR_PTR(-ENOMEM);
 
 	rxe_add_ref(pd);
 	ah->pd = pd;
 
-	err = rxe_init_av(rxe, attr, &ah->av);
-	if (err)
-		goto err2;
-
+	rxe_init_av(rxe, attr, &ah->av);
 	return &ah->ibah;
-
-err2:
-	rxe_drop_ref(pd);
-	rxe_drop_ref(ah);
-err1:
-	return ERR_PTR(err);
 }
 
 static int rxe_modify_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr)
@@ -289,10 +264,7 @@ static int rxe_modify_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr)
 	if (err)
 		return err;
 
-	err = rxe_init_av(rxe, attr, &ah->av);
-	if (err)
-		return err;
-
+	rxe_init_av(rxe, attr, &ah->av);
 	return 0;
 }
 
@@ -315,7 +287,7 @@ static int rxe_destroy_ah(struct ib_ah *ibah)
 	return 0;
 }
 
-static int post_one_recv(struct rxe_rq *rq, struct ib_recv_wr *ibwr)
+static int post_one_recv(struct rxe_rq *rq, const struct ib_recv_wr *ibwr)
 {
 	int err;
 	int i;
@@ -466,8 +438,8 @@ static int rxe_destroy_srq(struct ib_srq *ibsrq)
 	return 0;
 }
 
-static int rxe_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
-			     struct ib_recv_wr **bad_wr)
+static int rxe_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
+			     const struct ib_recv_wr **bad_wr)
 {
 	int err = 0;
 	unsigned long flags;
@@ -582,7 +554,7 @@ static int rxe_destroy_qp(struct ib_qp *ibqp)
 	return 0;
 }
 
-static int validate_send_wr(struct rxe_qp *qp, struct ib_send_wr *ibwr,
+static int validate_send_wr(struct rxe_qp *qp, const struct ib_send_wr *ibwr,
 			    unsigned int mask, unsigned int length)
 {
 	int num_sge = ibwr->num_sge;
@@ -610,7 +582,7 @@ static int validate_send_wr(struct rxe_qp *qp, struct ib_send_wr *ibwr,
 }
 
 static void init_send_wr(struct rxe_qp *qp, struct rxe_send_wr *wr,
-			 struct ib_send_wr *ibwr)
+			 const struct ib_send_wr *ibwr)
 {
 	wr->wr_id = ibwr->wr_id;
 	wr->num_sge = ibwr->num_sge;
@@ -665,7 +637,7 @@ static void init_send_wr(struct rxe_qp *qp, struct rxe_send_wr *wr,
 	}
 }
 
-static int init_send_wqe(struct rxe_qp *qp, struct ib_send_wr *ibwr,
+static int init_send_wqe(struct rxe_qp *qp, const struct ib_send_wr *ibwr,
 			 unsigned int mask, unsigned int length,
 			 struct rxe_send_wqe *wqe)
 {
@@ -713,7 +685,7 @@ static int init_send_wqe(struct rxe_qp *qp, struct ib_send_wr *ibwr,
 	return 0;
 }
 
-static int post_one_send(struct rxe_qp *qp, struct ib_send_wr *ibwr,
+static int post_one_send(struct rxe_qp *qp, const struct ib_send_wr *ibwr,
 			 unsigned int mask, u32 length)
 {
 	int err;
@@ -754,8 +726,8 @@ static int post_one_send(struct rxe_qp *qp, struct ib_send_wr *ibwr,
 	return err;
 }
 
-static int rxe_post_send_kernel(struct rxe_qp *qp, struct ib_send_wr *wr,
-				struct ib_send_wr **bad_wr)
+static int rxe_post_send_kernel(struct rxe_qp *qp, const struct ib_send_wr *wr,
+				const struct ib_send_wr **bad_wr)
 {
 	int err = 0;
 	unsigned int mask;
@@ -797,8 +769,8 @@ static int rxe_post_send_kernel(struct rxe_qp *qp, struct ib_send_wr *wr,
 	return err;
 }
 
-static int rxe_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
-			 struct ib_send_wr **bad_wr)
+static int rxe_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+			 const struct ib_send_wr **bad_wr)
 {
 	struct rxe_qp *qp = to_rqp(ibqp);
 
@@ -820,8 +792,8 @@ static int rxe_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 		return rxe_post_send_kernel(qp, wr, bad_wr);
 }
 
-static int rxe_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
-			 struct ib_recv_wr **bad_wr)
+static int rxe_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+			 const struct ib_recv_wr **bad_wr)
 {
 	int err = 0;
 	struct rxe_qp *qp = to_rqp(ibqp);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index a50b062..1abe3c6 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -91,11 +91,9 @@ enum {
 	IPOIB_STOP_REAPER	  = 7,
 	IPOIB_FLAG_ADMIN_CM	  = 9,
 	IPOIB_FLAG_UMCAST	  = 10,
-	IPOIB_STOP_NEIGH_GC	  = 11,
 	IPOIB_NEIGH_TBL_FLUSH	  = 12,
 	IPOIB_FLAG_DEV_ADDR_SET	  = 13,
 	IPOIB_FLAG_DEV_ADDR_CTRL  = 14,
-	IPOIB_FLAG_GOING_DOWN	  = 15,
 
 	IPOIB_MAX_BACKOFF_SECONDS = 16,
 
@@ -252,11 +250,11 @@ struct ipoib_cm_tx {
 	struct ipoib_neigh  *neigh;
 	struct ipoib_path   *path;
 	struct ipoib_tx_buf *tx_ring;
-	unsigned	     tx_head;
-	unsigned	     tx_tail;
+	unsigned int	     tx_head;
+	unsigned int	     tx_tail;
 	unsigned long	     flags;
 	u32		     mtu;
-	unsigned             max_send_sge;
+	unsigned int         max_send_sge;
 };
 
 struct ipoib_cm_rx_buf {
@@ -325,15 +323,22 @@ struct ipoib_dev_priv {
 	spinlock_t lock;
 
 	struct net_device *dev;
+	void (*next_priv_destructor)(struct net_device *dev);
 
 	struct napi_struct send_napi;
 	struct napi_struct recv_napi;
 
 	unsigned long flags;
 
+	/*
+	 * This protects access to the child_intfs list.
+	 * To READ from child_intfs the RTNL or vlan_rwsem read side must be
+	 * held.  To WRITE RTNL and the vlan_rwsem write side must be held (in
+	 * that order) This lock exists because we have a few contexts where
+	 * we need the child_intfs, but do not want to grab the RTNL.
+	 */
 	struct rw_semaphore vlan_rwsem;
 	struct mutex mcast_mutex;
-	struct mutex sysfs_mutex;
 
 	struct rb_root  path_tree;
 	struct list_head path_list;
@@ -373,8 +378,8 @@ struct ipoib_dev_priv {
 	struct ipoib_rx_buf *rx_ring;
 
 	struct ipoib_tx_buf *tx_ring;
-	unsigned	     tx_head;
-	unsigned	     tx_tail;
+	unsigned int	     tx_head;
+	unsigned int	     tx_tail;
 	struct ib_sge	     tx_sge[MAX_SKB_FRAGS + 1];
 	struct ib_ud_wr      tx_wr;
 	struct ib_wc	     send_wc[MAX_SEND_CQE];
@@ -404,7 +409,7 @@ struct ipoib_dev_priv {
 #endif
 	u64	hca_caps;
 	struct ipoib_ethtool_st ethtool;
-	unsigned max_send_sge;
+	unsigned int max_send_sge;
 	bool sm_fullmember_sendonly_support;
 	const struct net_device_ops	*rn_ops;
 };
@@ -414,7 +419,7 @@ struct ipoib_ah {
 	struct ib_ah	  *ah;
 	struct list_head   list;
 	struct kref	   ref;
-	unsigned	   last_send;
+	unsigned int	   last_send;
 	int  		   valid;
 };
 
@@ -483,6 +488,7 @@ static inline void ipoib_put_ah(struct ipoib_ah *ah)
 	kref_put(&ah->ref, ipoib_free_ah);
 }
 int ipoib_open(struct net_device *dev);
+void ipoib_intf_free(struct net_device *dev);
 int ipoib_add_pkey_attr(struct net_device *dev);
 int ipoib_add_umcast_attr(struct net_device *dev);
 
@@ -510,9 +516,6 @@ void ipoib_ib_dev_down(struct net_device *dev);
 int ipoib_ib_dev_stop_default(struct net_device *dev);
 void ipoib_pkey_dev_check_presence(struct net_device *dev);
 
-int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
-void ipoib_dev_cleanup(struct net_device *dev);
-
 void ipoib_mcast_join_task(struct work_struct *work);
 void ipoib_mcast_carrier_on_task(struct work_struct *work);
 void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb);
@@ -600,7 +603,6 @@ void ipoib_pkey_open(struct ipoib_dev_priv *priv);
 void ipoib_drain_cq(struct net_device *dev);
 
 void ipoib_set_ethtool_ops(struct net_device *dev);
-void ipoib_set_dev_features(struct ipoib_dev_priv *priv, struct ib_device *hca);
 
 #define IPOIB_FLAGS_RC		0x80
 #define IPOIB_FLAGS_UC		0x40
@@ -729,7 +731,7 @@ void ipoib_cm_dev_stop(struct net_device *dev)
 static inline
 int ipoib_cm_dev_init(struct net_device *dev)
 {
-	return -ENOSYS;
+	return -EOPNOTSUPP;
 }
 
 static inline
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 6535d9b..ea01b8d 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -78,7 +78,7 @@ static struct ib_send_wr ipoib_cm_rx_drain_wr = {
 };
 
 static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
-			       struct ib_cm_event *event);
+			       const struct ib_cm_event *event);
 
 static void ipoib_cm_dma_unmap_rx(struct ipoib_dev_priv *priv, int frags,
 				  u64 mapping[IPOIB_CM_RX_SG])
@@ -94,7 +94,6 @@ static void ipoib_cm_dma_unmap_rx(struct ipoib_dev_priv *priv, int frags,
 static int ipoib_cm_post_receive_srq(struct net_device *dev, int id)
 {
 	struct ipoib_dev_priv *priv = ipoib_priv(dev);
-	struct ib_recv_wr *bad_wr;
 	int i, ret;
 
 	priv->cm.rx_wr.wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV;
@@ -102,7 +101,7 @@ static int ipoib_cm_post_receive_srq(struct net_device *dev, int id)
 	for (i = 0; i < priv->cm.num_frags; ++i)
 		priv->cm.rx_sge[i].addr = priv->cm.srq_ring[id].mapping[i];
 
-	ret = ib_post_srq_recv(priv->cm.srq, &priv->cm.rx_wr, &bad_wr);
+	ret = ib_post_srq_recv(priv->cm.srq, &priv->cm.rx_wr, NULL);
 	if (unlikely(ret)) {
 		ipoib_warn(priv, "post srq failed for buf %d (%d)\n", id, ret);
 		ipoib_cm_dma_unmap_rx(priv, priv->cm.num_frags - 1,
@@ -120,7 +119,6 @@ static int ipoib_cm_post_receive_nonsrq(struct net_device *dev,
 					struct ib_sge *sge, int id)
 {
 	struct ipoib_dev_priv *priv = ipoib_priv(dev);
-	struct ib_recv_wr *bad_wr;
 	int i, ret;
 
 	wr->wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV;
@@ -128,7 +126,7 @@ static int ipoib_cm_post_receive_nonsrq(struct net_device *dev,
 	for (i = 0; i < IPOIB_CM_RX_SG; ++i)
 		sge[i].addr = rx->rx_ring[id].mapping[i];
 
-	ret = ib_post_recv(rx->qp, wr, &bad_wr);
+	ret = ib_post_recv(rx->qp, wr, NULL);
 	if (unlikely(ret)) {
 		ipoib_warn(priv, "post recv failed for buf %d (%d)\n", id, ret);
 		ipoib_cm_dma_unmap_rx(priv, IPOIB_CM_RX_SG - 1,
@@ -212,7 +210,6 @@ static void ipoib_cm_free_rx_ring(struct net_device *dev,
 
 static void ipoib_cm_start_rx_drain(struct ipoib_dev_priv *priv)
 {
-	struct ib_send_wr *bad_wr;
 	struct ipoib_cm_rx *p;
 
 	/* We only reserved 1 extra slot in CQ for drain WRs, so
@@ -227,7 +224,7 @@ static void ipoib_cm_start_rx_drain(struct ipoib_dev_priv *priv)
 	 */
 	p = list_entry(priv->cm.rx_flush_list.next, typeof(*p), list);
 	ipoib_cm_rx_drain_wr.wr_id = IPOIB_CM_RX_DRAIN_WRID;
-	if (ib_post_send(p->qp, &ipoib_cm_rx_drain_wr, &bad_wr))
+	if (ib_post_send(p->qp, &ipoib_cm_rx_drain_wr, NULL))
 		ipoib_warn(priv, "failed to post drain wr\n");
 
 	list_splice_init(&priv->cm.rx_flush_list, &priv->cm.rx_drain_list);
@@ -275,7 +272,7 @@ static struct ib_qp *ipoib_cm_create_rx_qp(struct net_device *dev,
 
 static int ipoib_cm_modify_rx_qp(struct net_device *dev,
 				 struct ib_cm_id *cm_id, struct ib_qp *qp,
-				 unsigned psn)
+				 unsigned int psn)
 {
 	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 	struct ib_qp_attr qp_attr;
@@ -363,7 +360,7 @@ static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_i
 	if (!rx->rx_ring)
 		return -ENOMEM;
 
-	t = kmalloc(sizeof *t, GFP_KERNEL);
+	t = kmalloc(sizeof(*t), GFP_KERNEL);
 	if (!t) {
 		ret = -ENOMEM;
 		goto err_free_1;
@@ -421,8 +418,9 @@ static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_i
 }
 
 static int ipoib_cm_send_rep(struct net_device *dev, struct ib_cm_id *cm_id,
-			     struct ib_qp *qp, struct ib_cm_req_event_param *req,
-			     unsigned psn)
+			     struct ib_qp *qp,
+			     const struct ib_cm_req_event_param *req,
+			     unsigned int psn)
 {
 	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 	struct ipoib_cm_data data = {};
@@ -432,7 +430,7 @@ static int ipoib_cm_send_rep(struct net_device *dev, struct ib_cm_id *cm_id,
 	data.mtu = cpu_to_be32(IPOIB_CM_BUF_SIZE);
 
 	rep.private_data = &data;
-	rep.private_data_len = sizeof data;
+	rep.private_data_len = sizeof(data);
 	rep.flow_control = 0;
 	rep.rnr_retry_count = req->rnr_retry_count;
 	rep.srq = ipoib_cm_has_srq(dev);
@@ -441,16 +439,17 @@ static int ipoib_cm_send_rep(struct net_device *dev, struct ib_cm_id *cm_id,
 	return ib_send_cm_rep(cm_id, &rep);
 }
 
-static int ipoib_cm_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
+static int ipoib_cm_req_handler(struct ib_cm_id *cm_id,
+				const struct ib_cm_event *event)
 {
 	struct net_device *dev = cm_id->context;
 	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 	struct ipoib_cm_rx *p;
-	unsigned psn;
+	unsigned int psn;
 	int ret;
 
 	ipoib_dbg(priv, "REQ arrived\n");
-	p = kzalloc(sizeof *p, GFP_KERNEL);
+	p = kzalloc(sizeof(*p), GFP_KERNEL);
 	if (!p)
 		return -ENOMEM;
 	p->dev = dev;
@@ -503,7 +502,7 @@ static int ipoib_cm_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *even
 }
 
 static int ipoib_cm_rx_handler(struct ib_cm_id *cm_id,
-			       struct ib_cm_event *event)
+			       const struct ib_cm_event *event)
 {
 	struct ipoib_cm_rx *p;
 	struct ipoib_dev_priv *priv;
@@ -547,7 +546,7 @@ static void skb_put_frags(struct sk_buff *skb, unsigned int hdr_space,
 					   0, PAGE_SIZE);
 			--skb_shinfo(skb)->nr_frags;
 		} else {
-			size = min(length, (unsigned) PAGE_SIZE);
+			size = min_t(unsigned int, length, PAGE_SIZE);
 
 			skb_frag_size_set(frag, size);
 			skb->data_len += size;
@@ -641,8 +640,9 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
 		}
 	}
 
-	frags = PAGE_ALIGN(wc->byte_len - min(wc->byte_len,
-					      (unsigned)IPOIB_CM_HEAD_SIZE)) / PAGE_SIZE;
+	frags = PAGE_ALIGN(wc->byte_len -
+			   min_t(u32, wc->byte_len, IPOIB_CM_HEAD_SIZE)) /
+		PAGE_SIZE;
 
 	newskb = ipoib_cm_alloc_rx_skb(dev, rx_ring, wr_id, frags,
 				       mapping, GFP_ATOMIC);
@@ -657,7 +657,7 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
 	}
 
 	ipoib_cm_dma_unmap_rx(priv, frags, rx_ring[wr_id].mapping);
-	memcpy(rx_ring[wr_id].mapping, mapping, (frags + 1) * sizeof *mapping);
+	memcpy(rx_ring[wr_id].mapping, mapping, (frags + 1) * sizeof(*mapping));
 
 	ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n",
 		       wc->byte_len, wc->slid);
@@ -698,13 +698,11 @@ static inline int post_send(struct ipoib_dev_priv *priv,
 			    unsigned int wr_id,
 			    struct ipoib_tx_buf *tx_req)
 {
-	struct ib_send_wr *bad_wr;
-
 	ipoib_build_sge(priv, tx_req);
 
 	priv->tx_wr.wr.wr_id	= wr_id | IPOIB_OP_CM;
 
-	return ib_post_send(tx->qp, &priv->tx_wr.wr, &bad_wr);
+	return ib_post_send(tx->qp, &priv->tx_wr.wr, NULL);
 }
 
 void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_tx *tx)
@@ -712,7 +710,7 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_
 	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 	struct ipoib_tx_buf *tx_req;
 	int rc;
-	unsigned usable_sge = tx->max_send_sge - !!skb_headlen(skb);
+	unsigned int usable_sge = tx->max_send_sge - !!skb_headlen(skb);
 
 	if (unlikely(skb->len > tx->mtu)) {
 		ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n",
@@ -982,7 +980,8 @@ void ipoib_cm_dev_stop(struct net_device *dev)
 	cancel_delayed_work(&priv->cm.stale_task);
 }
 
-static int ipoib_cm_rep_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
+static int ipoib_cm_rep_handler(struct ib_cm_id *cm_id,
+				const struct ib_cm_event *event)
 {
 	struct ipoib_cm_tx *p = cm_id->context;
 	struct ipoib_dev_priv *priv = ipoib_priv(p->dev);
@@ -1068,8 +1067,8 @@ static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_
 	struct ib_qp *tx_qp;
 
 	if (dev->features & NETIF_F_SG)
-		attr.cap.max_send_sge =
-			min_t(u32, priv->ca->attrs.max_sge, MAX_SKB_FRAGS + 1);
+		attr.cap.max_send_sge = min_t(u32, priv->ca->attrs.max_send_sge,
+					      MAX_SKB_FRAGS + 1);
 
 	tx_qp = ib_create_qp(priv->pd, &attr);
 	tx->max_send_sge = attr.cap.max_send_sge;
@@ -1094,7 +1093,7 @@ static int ipoib_cm_send_req(struct net_device *dev,
 	req.qp_num			= qp->qp_num;
 	req.qp_type			= qp->qp_type;
 	req.private_data		= &data;
-	req.private_data_len		= sizeof data;
+	req.private_data_len		= sizeof(data);
 	req.flow_control		= 0;
 
 	req.starting_psn		= 0; /* FIXME */
@@ -1152,7 +1151,7 @@ static int ipoib_cm_tx_init(struct ipoib_cm_tx *p, u32 qpn,
 		ret = -ENOMEM;
 		goto err_tx;
 	}
-	memset(p->tx_ring, 0, ipoib_sendq_size * sizeof *p->tx_ring);
+	memset(p->tx_ring, 0, ipoib_sendq_size * sizeof(*p->tx_ring));
 
 	p->qp = ipoib_cm_create_tx_qp(p->dev, p);
 	memalloc_noio_restore(noio_flag);
@@ -1248,7 +1247,7 @@ static void ipoib_cm_tx_destroy(struct ipoib_cm_tx *p)
 }
 
 static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
-			       struct ib_cm_event *event)
+			       const struct ib_cm_event *event)
 {
 	struct ipoib_cm_tx *tx = cm_id->context;
 	struct ipoib_dev_priv *priv = ipoib_priv(tx->dev);
@@ -1305,7 +1304,7 @@ struct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path
 	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 	struct ipoib_cm_tx *tx;
 
-	tx = kzalloc(sizeof *tx, GFP_ATOMIC);
+	tx = kzalloc(sizeof(*tx), GFP_ATOMIC);
 	if (!tx)
 		return NULL;
 
@@ -1370,7 +1369,7 @@ static void ipoib_cm_tx_start(struct work_struct *work)
 				neigh->daddr + QPN_AND_OPTIONS_OFFSET);
 			goto free_neigh;
 		}
-		memcpy(&pathrec, &p->path->pathrec, sizeof pathrec);
+		memcpy(&pathrec, &p->path->pathrec, sizeof(pathrec));
 
 		spin_unlock_irqrestore(&priv->lock, flags);
 		netif_tx_unlock_bh(dev);
@@ -1428,7 +1427,7 @@ static void ipoib_cm_skb_reap(struct work_struct *work)
 	struct net_device *dev = priv->dev;
 	struct sk_buff *skb;
 	unsigned long flags;
-	unsigned mtu = priv->mcast_mtu;
+	unsigned int mtu = priv->mcast_mtu;
 
 	netif_tx_lock_bh(dev);
 	spin_lock_irqsave(&priv->lock, flags);
@@ -1518,19 +1517,16 @@ static ssize_t set_mode(struct device *d, struct device_attribute *attr,
 {
 	struct net_device *dev = to_net_dev(d);
 	int ret;
-	struct ipoib_dev_priv *priv = ipoib_priv(dev);
-
-	if (test_bit(IPOIB_FLAG_GOING_DOWN, &priv->flags))
-		return -EPERM;
-
-	if (!mutex_trylock(&priv->sysfs_mutex))
-		return restart_syscall();
 
 	if (!rtnl_trylock()) {
-		mutex_unlock(&priv->sysfs_mutex);
 		return restart_syscall();
 	}
 
+	if (dev->reg_state != NETREG_REGISTERED) {
+		rtnl_unlock();
+		return -EPERM;
+	}
+
 	ret = ipoib_set_mode(dev, buf);
 
 	/* The assumption is that the function ipoib_set_mode returned
@@ -1539,7 +1535,6 @@ static ssize_t set_mode(struct device *d, struct device_attribute *attr,
 	 */
 	if (ret != -EBUSY)
 		rtnl_unlock();
-	mutex_unlock(&priv->sysfs_mutex);
 
 	return (!ret || ret == -EBUSY) ? count : ret;
 }
@@ -1564,7 +1559,7 @@ static void ipoib_cm_create_srq(struct net_device *dev, int max_sge)
 
 	priv->cm.srq = ib_create_srq(priv->pd, &srq_init_attr);
 	if (IS_ERR(priv->cm.srq)) {
-		if (PTR_ERR(priv->cm.srq) != -ENOSYS)
+		if (PTR_ERR(priv->cm.srq) != -EOPNOTSUPP)
 			pr_warn("%s: failed to allocate SRQ, error %ld\n",
 			       priv->ca->name, PTR_ERR(priv->cm.srq));
 		priv->cm.srq = NULL;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
index 2706bf26..8342992 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
@@ -102,7 +102,7 @@ static int ipoib_set_coalesce(struct net_device *dev,
 	ret = rdma_set_cq_moderation(priv->recv_cq,
 				     coal->rx_max_coalesced_frames,
 				     coal->rx_coalesce_usecs);
-	if (ret && ret != -ENOSYS) {
+	if (ret && ret != -EOPNOTSUPP) {
 		ipoib_warn(priv, "failed modifying CQ (%d)\n", ret);
 		return ret;
 	}
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_fs.c b/drivers/infiniband/ulp/ipoib/ipoib_fs.c
index ea302b0..17848802 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_fs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_fs.c
@@ -262,15 +262,15 @@ static const struct file_operations ipoib_path_fops = {
 void ipoib_create_debug_files(struct net_device *dev)
 {
 	struct ipoib_dev_priv *priv = ipoib_priv(dev);
-	char name[IFNAMSIZ + sizeof "_path"];
+	char name[IFNAMSIZ + sizeof("_path")];
 
-	snprintf(name, sizeof name, "%s_mcg", dev->name);
+	snprintf(name, sizeof(name), "%s_mcg", dev->name);
 	priv->mcg_dentry = debugfs_create_file(name, S_IFREG | S_IRUGO,
 					       ipoib_root, dev, &ipoib_mcg_fops);
 	if (!priv->mcg_dentry)
 		ipoib_warn(priv, "failed to create mcg debug file\n");
 
-	snprintf(name, sizeof name, "%s_path", dev->name);
+	snprintf(name, sizeof(name), "%s_path", dev->name);
 	priv->path_dentry = debugfs_create_file(name, S_IFREG | S_IRUGO,
 						ipoib_root, dev, &ipoib_path_fops);
 	if (!priv->path_dentry)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index f47f9ac..9006a13 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -40,6 +40,7 @@
 
 #include <linux/ip.h>
 #include <linux/tcp.h>
+#include <rdma/ib_cache.h>
 
 #include "ipoib.h"
 
@@ -57,7 +58,7 @@ struct ipoib_ah *ipoib_create_ah(struct net_device *dev,
 	struct ipoib_ah *ah;
 	struct ib_ah *vah;
 
-	ah = kmalloc(sizeof *ah, GFP_KERNEL);
+	ah = kmalloc(sizeof(*ah), GFP_KERNEL);
 	if (!ah)
 		return ERR_PTR(-ENOMEM);
 
@@ -100,7 +101,6 @@ static void ipoib_ud_dma_unmap_rx(struct ipoib_dev_priv *priv,
 static int ipoib_ib_post_receive(struct net_device *dev, int id)
 {
 	struct ipoib_dev_priv *priv = ipoib_priv(dev);
-	struct ib_recv_wr *bad_wr;
 	int ret;
 
 	priv->rx_wr.wr_id   = id | IPOIB_OP_RECV;
@@ -108,7 +108,7 @@ static int ipoib_ib_post_receive(struct net_device *dev, int id)
 	priv->rx_sge[1].addr = priv->rx_ring[id].mapping[1];
 
 
-	ret = ib_post_recv(priv->qp, &priv->rx_wr, &bad_wr);
+	ret = ib_post_recv(priv->qp, &priv->rx_wr, NULL);
 	if (unlikely(ret)) {
 		ipoib_warn(priv, "receive failed for buf %d (%d)\n", id, ret);
 		ipoib_ud_dma_unmap_rx(priv, priv->rx_ring[id].mapping);
@@ -202,7 +202,7 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
 	}
 
 	memcpy(mapping, priv->rx_ring[wr_id].mapping,
-	       IPOIB_UD_RX_SG * sizeof *mapping);
+	       IPOIB_UD_RX_SG * sizeof(*mapping));
 
 	/*
 	 * If we can't allocate a new RX buffer, dump
@@ -541,7 +541,6 @@ static inline int post_send(struct ipoib_dev_priv *priv,
 			    struct ipoib_tx_buf *tx_req,
 			    void *head, int hlen)
 {
-	struct ib_send_wr *bad_wr;
 	struct sk_buff *skb = tx_req->skb;
 
 	ipoib_build_sge(priv, tx_req);
@@ -558,7 +557,7 @@ static inline int post_send(struct ipoib_dev_priv *priv,
 	} else
 		priv->tx_wr.wr.opcode	= IB_WR_SEND;
 
-	return ib_post_send(priv->qp, &priv->tx_wr.wr, &bad_wr);
+	return ib_post_send(priv->qp, &priv->tx_wr.wr, NULL);
 }
 
 int ipoib_send(struct net_device *dev, struct sk_buff *skb,
@@ -568,7 +567,7 @@ int ipoib_send(struct net_device *dev, struct sk_buff *skb,
 	struct ipoib_tx_buf *tx_req;
 	int hlen, rc;
 	void *phead;
-	unsigned usable_sge = priv->max_send_sge - !!skb_headlen(skb);
+	unsigned int usable_sge = priv->max_send_sge - !!skb_headlen(skb);
 
 	if (skb_is_gso(skb)) {
 		hlen = skb_transport_offset(skb) + tcp_hdrlen(skb);
@@ -1069,7 +1068,7 @@ static bool ipoib_dev_addr_changed_valid(struct ipoib_dev_priv *priv)
 	bool ret = false;
 
 	netdev_gid = (union ib_gid *)(priv->dev->dev_addr + 4);
-	if (ib_query_gid(priv->ca, priv->port, 0, &gid0, NULL))
+	if (rdma_query_gid(priv->ca, priv->port, 0, &gid0))
 		return false;
 
 	netif_addr_lock_bh(priv->dev);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 26cde95..e3d28f9 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -215,11 +215,6 @@ static int ipoib_stop(struct net_device *dev)
 	return 0;
 }
 
-static void ipoib_uninit(struct net_device *dev)
-{
-	ipoib_dev_cleanup(dev);
-}
-
 static netdev_features_t ipoib_fix_features(struct net_device *dev, netdev_features_t features)
 {
 	struct ipoib_dev_priv *priv = ipoib_priv(dev);
@@ -634,7 +629,7 @@ struct ipoib_path_iter *ipoib_path_iter_init(struct net_device *dev)
 {
 	struct ipoib_path_iter *iter;
 
-	iter = kmalloc(sizeof *iter, GFP_KERNEL);
+	iter = kmalloc(sizeof(*iter), GFP_KERNEL);
 	if (!iter)
 		return NULL;
 
@@ -770,8 +765,10 @@ static void path_rec_completion(int status,
 		struct rdma_ah_attr av;
 
 		if (!ib_init_ah_attr_from_path(priv->ca, priv->port,
-					       pathrec, &av))
+					       pathrec, &av, NULL)) {
 			ah = ipoib_create_ah(dev, priv->pd, &av);
+			rdma_destroy_ah_attr(&av);
+		}
 	}
 
 	spin_lock_irqsave(&priv->lock, flags);
@@ -883,7 +880,7 @@ static struct ipoib_path *path_rec_create(struct net_device *dev, void *gid)
 	if (!priv->broadcast)
 		return NULL;
 
-	path = kzalloc(sizeof *path, GFP_ATOMIC);
+	path = kzalloc(sizeof(*path), GFP_ATOMIC);
 	if (!path)
 		return NULL;
 
@@ -1199,11 +1196,13 @@ static void ipoib_timeout(struct net_device *dev)
 static int ipoib_hard_header(struct sk_buff *skb,
 			     struct net_device *dev,
 			     unsigned short type,
-			     const void *daddr, const void *saddr, unsigned len)
+			     const void *daddr,
+			     const void *saddr,
+			     unsigned int len)
 {
 	struct ipoib_header *header;
 
-	header = skb_push(skb, sizeof *header);
+	header = skb_push(skb, sizeof(*header));
 
 	header->proto = htons(type);
 	header->reserved = 0;
@@ -1306,9 +1305,6 @@ static void __ipoib_reap_neigh(struct ipoib_dev_priv *priv)
 	int i;
 	LIST_HEAD(remove_list);
 
-	if (test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags))
-		return;
-
 	spin_lock_irqsave(&priv->lock, flags);
 
 	htbl = rcu_dereference_protected(ntbl->htbl,
@@ -1320,9 +1316,6 @@ static void __ipoib_reap_neigh(struct ipoib_dev_priv *priv)
 	/* neigh is obsolete if it was idle for two GC periods */
 	dt = 2 * arp_tbl.gc_interval;
 	neigh_obsolete = jiffies - dt;
-	/* handle possible race condition */
-	if (test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags))
-		goto out_unlock;
 
 	for (i = 0; i < htbl->size; i++) {
 		struct ipoib_neigh *neigh;
@@ -1360,9 +1353,8 @@ static void ipoib_reap_neigh(struct work_struct *work)
 
 	__ipoib_reap_neigh(priv);
 
-	if (!test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags))
-		queue_delayed_work(priv->wq, &priv->neigh_reap_task,
-				   arp_tbl.gc_interval);
+	queue_delayed_work(priv->wq, &priv->neigh_reap_task,
+			   arp_tbl.gc_interval);
 }
 
 
@@ -1371,7 +1363,7 @@ static struct ipoib_neigh *ipoib_neigh_ctor(u8 *daddr,
 {
 	struct ipoib_neigh *neigh;
 
-	neigh = kzalloc(sizeof *neigh, GFP_ATOMIC);
+	neigh = kzalloc(sizeof(*neigh), GFP_ATOMIC);
 	if (!neigh)
 		return NULL;
 
@@ -1524,9 +1516,8 @@ static int ipoib_neigh_hash_init(struct ipoib_dev_priv *priv)
 	htbl = kzalloc(sizeof(*htbl), GFP_KERNEL);
 	if (!htbl)
 		return -ENOMEM;
-	set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
 	size = roundup_pow_of_two(arp_tbl.gc_thresh3);
-	buckets = kcalloc(size, sizeof(*buckets), GFP_KERNEL);
+	buckets = kvcalloc(size, sizeof(*buckets), GFP_KERNEL);
 	if (!buckets) {
 		kfree(htbl);
 		return -ENOMEM;
@@ -1539,7 +1530,6 @@ static int ipoib_neigh_hash_init(struct ipoib_dev_priv *priv)
 	atomic_set(&ntbl->entries, 0);
 
 	/* start garbage collection */
-	clear_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
 	queue_delayed_work(priv->wq, &priv->neigh_reap_task,
 			   arp_tbl.gc_interval);
 
@@ -1554,7 +1544,7 @@ static void neigh_hash_free_rcu(struct rcu_head *head)
 	struct ipoib_neigh __rcu **buckets = htbl->buckets;
 	struct ipoib_neigh_table *ntbl = htbl->ntbl;
 
-	kfree(buckets);
+	kvfree(buckets);
 	kfree(htbl);
 	complete(&ntbl->deleted);
 }
@@ -1649,15 +1639,11 @@ static void ipoib_flush_neighs(struct ipoib_dev_priv *priv)
 static void ipoib_neigh_hash_uninit(struct net_device *dev)
 {
 	struct ipoib_dev_priv *priv = ipoib_priv(dev);
-	int stopped;
 
 	ipoib_dbg(priv, "ipoib_neigh_hash_uninit\n");
 	init_completion(&priv->ntbl.deleted);
 
-	/* Stop GC if called at init fail need to cancel work */
-	stopped = test_and_set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
-	if (!stopped)
-		cancel_delayed_work(&priv->neigh_reap_task);
+	cancel_delayed_work_sync(&priv->neigh_reap_task);
 
 	ipoib_flush_neighs(priv);
 
@@ -1755,13 +1741,11 @@ static int ipoib_ioctl(struct net_device *dev, struct ifreq *ifr,
 	return priv->rn_ops->ndo_do_ioctl(dev, ifr, cmd);
 }
 
-int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
+static int ipoib_dev_init(struct net_device *dev)
 {
 	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 	int ret = -ENOMEM;
 
-	priv->ca = ca;
-	priv->port = port;
 	priv->qp = NULL;
 
 	/*
@@ -1777,7 +1761,7 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
 	/* create pd, which used both for control and datapath*/
 	priv->pd = ib_alloc_pd(priv->ca, 0);
 	if (IS_ERR(priv->pd)) {
-		pr_warn("%s: failed to allocate PD\n", ca->name);
+		pr_warn("%s: failed to allocate PD\n", priv->ca->name);
 		goto clean_wq;
 	}
 
@@ -1787,7 +1771,8 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
 		goto out_free_pd;
 	}
 
-	if (ipoib_neigh_hash_init(priv) < 0) {
+	ret = ipoib_neigh_hash_init(priv);
+	if (ret) {
 		pr_warn("%s failed to init neigh hash\n", dev->name);
 		goto out_dev_uninit;
 	}
@@ -1796,12 +1781,15 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
 		if (ipoib_ib_dev_open(dev)) {
 			pr_warn("%s failed to open device\n", dev->name);
 			ret = -ENODEV;
-			goto out_dev_uninit;
+			goto out_hash_uninit;
 		}
 	}
 
 	return 0;
 
+out_hash_uninit:
+	ipoib_neigh_hash_uninit(dev);
+
 out_dev_uninit:
 	ipoib_ib_dev_cleanup(dev);
 
@@ -1821,21 +1809,151 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
 	return ret;
 }
 
-void ipoib_dev_cleanup(struct net_device *dev)
+/*
+ * This must be called before doing an unregister_netdev on a parent device to
+ * shutdown the IB event handler.
+ */
+static void ipoib_parent_unregister_pre(struct net_device *ndev)
 {
-	struct ipoib_dev_priv *priv = ipoib_priv(dev), *cpriv, *tcpriv;
-	LIST_HEAD(head);
+	struct ipoib_dev_priv *priv = ipoib_priv(ndev);
+
+	/*
+	 * ipoib_set_mac checks netif_running before pushing work, clearing
+	 * running ensures the it will not add more work.
+	 */
+	rtnl_lock();
+	dev_change_flags(priv->dev, priv->dev->flags & ~IFF_UP);
+	rtnl_unlock();
+
+	/* ipoib_event() cannot be running once this returns */
+	ib_unregister_event_handler(&priv->event_handler);
+
+	/*
+	 * Work on the queue grabs the rtnl lock, so this cannot be done while
+	 * also holding it.
+	 */
+	flush_workqueue(ipoib_workqueue);
+}
+
+static void ipoib_set_dev_features(struct ipoib_dev_priv *priv)
+{
+	priv->hca_caps = priv->ca->attrs.device_cap_flags;
+
+	if (priv->hca_caps & IB_DEVICE_UD_IP_CSUM) {
+		priv->dev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_RXCSUM;
+
+		if (priv->hca_caps & IB_DEVICE_UD_TSO)
+			priv->dev->hw_features |= NETIF_F_TSO;
+
+		priv->dev->features |= priv->dev->hw_features;
+	}
+}
+
+static int ipoib_parent_init(struct net_device *ndev)
+{
+	struct ipoib_dev_priv *priv = ipoib_priv(ndev);
+	struct ib_port_attr attr;
+	int result;
+
+	result = ib_query_port(priv->ca, priv->port, &attr);
+	if (result) {
+		pr_warn("%s: ib_query_port %d failed\n", priv->ca->name,
+			priv->port);
+		return result;
+	}
+	priv->max_ib_mtu = ib_mtu_enum_to_int(attr.max_mtu);
+
+	result = ib_query_pkey(priv->ca, priv->port, 0, &priv->pkey);
+	if (result) {
+		pr_warn("%s: ib_query_pkey port %d failed (ret = %d)\n",
+			priv->ca->name, priv->port, result);
+		return result;
+	}
+
+	result = rdma_query_gid(priv->ca, priv->port, 0, &priv->local_gid);
+	if (result) {
+		pr_warn("%s: rdma_query_gid port %d failed (ret = %d)\n",
+			priv->ca->name, priv->port, result);
+		return result;
+	}
+	memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw,
+	       sizeof(union ib_gid));
+
+	SET_NETDEV_DEV(priv->dev, priv->ca->dev.parent);
+	priv->dev->dev_id = priv->port - 1;
+
+	return 0;
+}
+
+static void ipoib_child_init(struct net_device *ndev)
+{
+	struct ipoib_dev_priv *priv = ipoib_priv(ndev);
+	struct ipoib_dev_priv *ppriv = ipoib_priv(priv->parent);
+
+	dev_hold(priv->parent);
+
+	down_write(&ppriv->vlan_rwsem);
+	list_add_tail(&priv->list, &ppriv->child_intfs);
+	up_write(&ppriv->vlan_rwsem);
+
+	priv->max_ib_mtu = ppriv->max_ib_mtu;
+	set_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags);
+	memcpy(priv->dev->dev_addr, ppriv->dev->dev_addr, INFINIBAND_ALEN);
+	memcpy(&priv->local_gid, &ppriv->local_gid, sizeof(priv->local_gid));
+}
+
+static int ipoib_ndo_init(struct net_device *ndev)
+{
+	struct ipoib_dev_priv *priv = ipoib_priv(ndev);
+	int rc;
+
+	if (priv->parent) {
+		ipoib_child_init(ndev);
+	} else {
+		rc = ipoib_parent_init(ndev);
+		if (rc)
+			return rc;
+	}
+
+	/* MTU will be reset when mcast join happens */
+	ndev->mtu = IPOIB_UD_MTU(priv->max_ib_mtu);
+	priv->mcast_mtu = priv->admin_mtu = ndev->mtu;
+	ndev->max_mtu = IPOIB_CM_MTU;
+
+	ndev->neigh_priv_len = sizeof(struct ipoib_neigh);
+
+	/*
+	 * Set the full membership bit, so that we join the right
+	 * broadcast group, etc.
+	 */
+	priv->pkey |= 0x8000;
+
+	ndev->broadcast[8] = priv->pkey >> 8;
+	ndev->broadcast[9] = priv->pkey & 0xff;
+	set_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags);
+
+	ipoib_set_dev_features(priv);
+
+	rc = ipoib_dev_init(ndev);
+	if (rc) {
+		pr_warn("%s: failed to initialize device: %s port %d (ret = %d)\n",
+			priv->ca->name, priv->dev->name, priv->port, rc);
+	}
+
+	return 0;
+}
+
+static void ipoib_ndo_uninit(struct net_device *dev)
+{
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 
 	ASSERT_RTNL();
 
-	/* Delete any child interfaces first */
-	list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list) {
-		/* Stop GC on child */
-		set_bit(IPOIB_STOP_NEIGH_GC, &cpriv->flags);
-		cancel_delayed_work(&cpriv->neigh_reap_task);
-		unregister_netdevice_queue(cpriv->dev, &head);
-	}
-	unregister_netdevice_many(&head);
+	/*
+	 * ipoib_remove_one guarantees the children are removed before the
+	 * parent, and that is the only place where a parent can be removed.
+	 */
+	WARN_ON(!list_empty(&priv->child_intfs));
 
 	ipoib_neigh_hash_uninit(dev);
 
@@ -1847,6 +1965,16 @@ void ipoib_dev_cleanup(struct net_device *dev)
 		destroy_workqueue(priv->wq);
 		priv->wq = NULL;
 	}
+
+	if (priv->parent) {
+		struct ipoib_dev_priv *ppriv = ipoib_priv(priv->parent);
+
+		down_write(&ppriv->vlan_rwsem);
+		list_del(&priv->list);
+		up_write(&ppriv->vlan_rwsem);
+
+		dev_put(priv->parent);
+	}
 }
 
 static int ipoib_set_vf_link_state(struct net_device *dev, int vf, int link_state)
@@ -1894,7 +2022,8 @@ static const struct header_ops ipoib_header_ops = {
 };
 
 static const struct net_device_ops ipoib_netdev_ops_pf = {
-	.ndo_uninit		 = ipoib_uninit,
+	.ndo_init		 = ipoib_ndo_init,
+	.ndo_uninit		 = ipoib_ndo_uninit,
 	.ndo_open		 = ipoib_open,
 	.ndo_stop		 = ipoib_stop,
 	.ndo_change_mtu		 = ipoib_change_mtu,
@@ -1913,7 +2042,8 @@ static const struct net_device_ops ipoib_netdev_ops_pf = {
 };
 
 static const struct net_device_ops ipoib_netdev_ops_vf = {
-	.ndo_uninit		 = ipoib_uninit,
+	.ndo_init		 = ipoib_ndo_init,
+	.ndo_uninit		 = ipoib_ndo_uninit,
 	.ndo_open		 = ipoib_open,
 	.ndo_stop		 = ipoib_stop,
 	.ndo_change_mtu		 = ipoib_change_mtu,
@@ -1945,6 +2075,13 @@ void ipoib_setup_common(struct net_device *dev)
 	netif_keep_dst(dev);
 
 	memcpy(dev->broadcast, ipv4_bcast_addr, INFINIBAND_ALEN);
+
+	/*
+	 * unregister_netdev always frees the netdev, we use this mode
+	 * consistently to unify all the various unregister paths, including
+	 * those connected to rtnl_link_ops which require it.
+	 */
+	dev->needs_free_netdev = true;
 }
 
 static void ipoib_build_priv(struct net_device *dev)
@@ -1955,7 +2092,6 @@ static void ipoib_build_priv(struct net_device *dev)
 	spin_lock_init(&priv->lock);
 	init_rwsem(&priv->vlan_rwsem);
 	mutex_init(&priv->mcast_mutex);
-	mutex_init(&priv->sysfs_mutex);
 
 	INIT_LIST_HEAD(&priv->path_list);
 	INIT_LIST_HEAD(&priv->child_intfs);
@@ -1999,9 +2135,7 @@ static struct net_device
 	rn->send = ipoib_send;
 	rn->attach_mcast = ipoib_mcast_attach;
 	rn->detach_mcast = ipoib_mcast_detach;
-	rn->free_rdma_netdev = free_netdev;
 	rn->hca = hca;
-
 	dev->netdev_ops = &ipoib_netdev_default_pf;
 
 	return dev;
@@ -2039,6 +2173,9 @@ struct ipoib_dev_priv *ipoib_intf_alloc(struct ib_device *hca, u8 port,
 	if (!priv)
 		return NULL;
 
+	priv->ca = hca;
+	priv->port = port;
+
 	dev = ipoib_get_netdev(hca, port, name);
 	if (!dev)
 		goto free_priv;
@@ -2053,6 +2190,15 @@ struct ipoib_dev_priv *ipoib_intf_alloc(struct ib_device *hca, u8 port,
 
 	rn = netdev_priv(dev);
 	rn->clnt_priv = priv;
+
+	/*
+	 * Only the child register_netdev flows can handle priv_destructor
+	 * being set, so we force it to NULL here and handle manually until it
+	 * is safe to turn on.
+	 */
+	priv->next_priv_destructor = dev->priv_destructor;
+	dev->priv_destructor = NULL;
+
 	ipoib_build_priv(dev);
 
 	return priv;
@@ -2061,6 +2207,27 @@ struct ipoib_dev_priv *ipoib_intf_alloc(struct ib_device *hca, u8 port,
 	return NULL;
 }
 
+void ipoib_intf_free(struct net_device *dev)
+{
+	struct ipoib_dev_priv *priv = ipoib_priv(dev);
+	struct rdma_netdev *rn = netdev_priv(dev);
+
+	dev->priv_destructor = priv->next_priv_destructor;
+	if (dev->priv_destructor)
+		dev->priv_destructor(dev);
+
+	/*
+	 * There are some error flows around register_netdev failing that may
+	 * attempt to call priv_destructor twice, prevent that from happening.
+	 */
+	dev->priv_destructor = NULL;
+
+	/* unregister/destroy is very complicated. Make bugs more obvious. */
+	rn->clnt_priv = NULL;
+
+	kfree(priv);
+}
+
 static ssize_t show_pkey(struct device *dev,
 			 struct device_attribute *attr, char *buf)
 {
@@ -2186,12 +2353,6 @@ static ssize_t create_child(struct device *dev,
 	if (pkey <= 0 || pkey > 0xffff || pkey == 0x8000)
 		return -EINVAL;
 
-	/*
-	 * Set the full membership bit, so that we join the right
-	 * broadcast group, etc.
-	 */
-	pkey |= 0x8000;
-
 	ret = ipoib_vlan_add(to_net_dev(dev), pkey);
 
 	return ret ? ret : count;
@@ -2223,87 +2384,19 @@ int ipoib_add_pkey_attr(struct net_device *dev)
 	return device_create_file(&dev->dev, &dev_attr_pkey);
 }
 
-void ipoib_set_dev_features(struct ipoib_dev_priv *priv, struct ib_device *hca)
-{
-	priv->hca_caps = hca->attrs.device_cap_flags;
-
-	if (priv->hca_caps & IB_DEVICE_UD_IP_CSUM) {
-		priv->dev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_RXCSUM;
-
-		if (priv->hca_caps & IB_DEVICE_UD_TSO)
-			priv->dev->hw_features |= NETIF_F_TSO;
-
-		priv->dev->features |= priv->dev->hw_features;
-	}
-}
-
 static struct net_device *ipoib_add_port(const char *format,
 					 struct ib_device *hca, u8 port)
 {
 	struct ipoib_dev_priv *priv;
-	struct ib_port_attr attr;
-	struct rdma_netdev *rn;
-	int result = -ENOMEM;
+	struct net_device *ndev;
+	int result;
 
 	priv = ipoib_intf_alloc(hca, port, format);
 	if (!priv) {
 		pr_warn("%s, %d: ipoib_intf_alloc failed\n", hca->name, port);
-		goto alloc_mem_failed;
+		return ERR_PTR(-ENOMEM);
 	}
-
-	SET_NETDEV_DEV(priv->dev, hca->dev.parent);
-	priv->dev->dev_id = port - 1;
-
-	result = ib_query_port(hca, port, &attr);
-	if (result) {
-		pr_warn("%s: ib_query_port %d failed\n", hca->name, port);
-		goto device_init_failed;
-	}
-
-	priv->max_ib_mtu = ib_mtu_enum_to_int(attr.max_mtu);
-
-	/* MTU will be reset when mcast join happens */
-	priv->dev->mtu  = IPOIB_UD_MTU(priv->max_ib_mtu);
-	priv->mcast_mtu  = priv->admin_mtu = priv->dev->mtu;
-	priv->dev->max_mtu = IPOIB_CM_MTU;
-
-	priv->dev->neigh_priv_len = sizeof(struct ipoib_neigh);
-
-	result = ib_query_pkey(hca, port, 0, &priv->pkey);
-	if (result) {
-		pr_warn("%s: ib_query_pkey port %d failed (ret = %d)\n",
-			hca->name, port, result);
-		goto device_init_failed;
-	}
-
-	ipoib_set_dev_features(priv, hca);
-
-	/*
-	 * Set the full membership bit, so that we join the right
-	 * broadcast group, etc.
-	 */
-	priv->pkey |= 0x8000;
-
-	priv->dev->broadcast[8] = priv->pkey >> 8;
-	priv->dev->broadcast[9] = priv->pkey & 0xff;
-
-	result = ib_query_gid(hca, port, 0, &priv->local_gid, NULL);
-	if (result) {
-		pr_warn("%s: ib_query_gid port %d failed (ret = %d)\n",
-			hca->name, port, result);
-		goto device_init_failed;
-	}
-
-	memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw,
-	       sizeof(union ib_gid));
-	set_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags);
-
-	result = ipoib_dev_init(priv->dev, hca, port);
-	if (result) {
-		pr_warn("%s: failed to initialize port %d (ret = %d)\n",
-			hca->name, port, result);
-		goto device_init_failed;
-	}
+	ndev = priv->dev;
 
 	INIT_IB_EVENT_HANDLER(&priv->event_handler,
 			      priv->ca, ipoib_event);
@@ -2312,46 +2405,43 @@ static struct net_device *ipoib_add_port(const char *format,
 	/* call event handler to ensure pkey in sync */
 	queue_work(ipoib_workqueue, &priv->flush_heavy);
 
-	result = register_netdev(priv->dev);
+	result = register_netdev(ndev);
 	if (result) {
 		pr_warn("%s: couldn't register ipoib port %d; error %d\n",
 			hca->name, port, result);
-		goto register_failed;
+
+		ipoib_parent_unregister_pre(ndev);
+		ipoib_intf_free(ndev);
+		free_netdev(ndev);
+
+		return ERR_PTR(result);
 	}
 
-	result = -ENOMEM;
-	if (ipoib_cm_add_mode_attr(priv->dev))
+	/*
+	 * We cannot set priv_destructor before register_netdev because we
+	 * need priv to be always valid during the error flow to execute
+	 * ipoib_parent_unregister_pre(). Instead handle it manually and only
+	 * enter priv_destructor mode once we are completely registered.
+	 */
+	ndev->priv_destructor = ipoib_intf_free;
+
+	if (ipoib_cm_add_mode_attr(ndev))
 		goto sysfs_failed;
-	if (ipoib_add_pkey_attr(priv->dev))
+	if (ipoib_add_pkey_attr(ndev))
 		goto sysfs_failed;
-	if (ipoib_add_umcast_attr(priv->dev))
+	if (ipoib_add_umcast_attr(ndev))
 		goto sysfs_failed;
-	if (device_create_file(&priv->dev->dev, &dev_attr_create_child))
+	if (device_create_file(&ndev->dev, &dev_attr_create_child))
 		goto sysfs_failed;
-	if (device_create_file(&priv->dev->dev, &dev_attr_delete_child))
+	if (device_create_file(&ndev->dev, &dev_attr_delete_child))
 		goto sysfs_failed;
 
-	return priv->dev;
+	return ndev;
 
 sysfs_failed:
-	unregister_netdev(priv->dev);
-
-register_failed:
-	ib_unregister_event_handler(&priv->event_handler);
-	flush_workqueue(ipoib_workqueue);
-	/* Stop GC if started before flush */
-	set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
-	cancel_delayed_work(&priv->neigh_reap_task);
-	flush_workqueue(priv->wq);
-	ipoib_dev_cleanup(priv->dev);
-
-device_init_failed:
-	rn = netdev_priv(priv->dev);
-	rn->free_rdma_netdev(priv->dev);
-	kfree(priv);
-
-alloc_mem_failed:
-	return ERR_PTR(result);
+	ipoib_parent_unregister_pre(ndev);
+	unregister_netdev(ndev);
+	return ERR_PTR(-ENOMEM);
 }
 
 static void ipoib_add_one(struct ib_device *device)
@@ -2362,7 +2452,7 @@ static void ipoib_add_one(struct ib_device *device)
 	int p;
 	int count = 0;
 
-	dev_list = kmalloc(sizeof *dev_list, GFP_KERNEL);
+	dev_list = kmalloc(sizeof(*dev_list), GFP_KERNEL);
 	if (!dev_list)
 		return;
 
@@ -2396,39 +2486,18 @@ static void ipoib_remove_one(struct ib_device *device, void *client_data)
 		return;
 
 	list_for_each_entry_safe(priv, tmp, dev_list, list) {
-		struct rdma_netdev *parent_rn = netdev_priv(priv->dev);
-
-		ib_unregister_event_handler(&priv->event_handler);
-		flush_workqueue(ipoib_workqueue);
-
-		/* mark interface in the middle of destruction */
-		set_bit(IPOIB_FLAG_GOING_DOWN, &priv->flags);
+		LIST_HEAD(head);
+		ipoib_parent_unregister_pre(priv->dev);
 
 		rtnl_lock();
-		dev_change_flags(priv->dev, priv->dev->flags & ~IFF_UP);
+
+		list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs,
+					 list)
+			unregister_netdevice_queue(cpriv->dev, &head);
+		unregister_netdevice_queue(priv->dev, &head);
+		unregister_netdevice_many(&head);
+
 		rtnl_unlock();
-
-		/* Stop GC */
-		set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
-		cancel_delayed_work(&priv->neigh_reap_task);
-		flush_workqueue(priv->wq);
-
-		/* Wrap rtnl_lock/unlock with mutex to protect sysfs calls */
-		mutex_lock(&priv->sysfs_mutex);
-		unregister_netdev(priv->dev);
-		mutex_unlock(&priv->sysfs_mutex);
-
-		parent_rn->free_rdma_netdev(priv->dev);
-
-		list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list) {
-			struct rdma_netdev *child_rn;
-
-			child_rn = netdev_priv(cpriv->dev);
-			child_rn->free_rdma_netdev(cpriv->dev);
-			kfree(cpriv);
-		}
-
-		kfree(priv);
 	}
 
 	kfree(dev_list);
@@ -2476,8 +2545,7 @@ static int __init ipoib_init_module(void)
 	 * its private workqueue, and we only queue up flush events
 	 * on our global flush workqueue.  This avoids the deadlocks.
 	 */
-	ipoib_workqueue = alloc_ordered_workqueue("ipoib_flush",
-						  WQ_MEM_RECLAIM);
+	ipoib_workqueue = alloc_ordered_workqueue("ipoib_flush", 0);
 	if (!ipoib_workqueue) {
 		ret = -ENOMEM;
 		goto err_fs;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index 6709328..b9e9562 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -140,7 +140,7 @@ static struct ipoib_mcast *ipoib_mcast_alloc(struct net_device *dev,
 {
 	struct ipoib_mcast *mcast;
 
-	mcast = kzalloc(sizeof *mcast, can_sleep ? GFP_KERNEL : GFP_ATOMIC);
+	mcast = kzalloc(sizeof(*mcast), can_sleep ? GFP_KERNEL : GFP_ATOMIC);
 	if (!mcast)
 		return NULL;
 
@@ -822,6 +822,7 @@ void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb)
 			if (neigh && list_empty(&neigh->list)) {
 				kref_get(&mcast->ah->ref);
 				neigh->ah	= mcast->ah;
+				neigh->ah->valid = 1;
 				list_add_tail(&neigh->list, &mcast->neigh_list);
 			}
 		}
@@ -917,7 +918,7 @@ void ipoib_mcast_restart_task(struct work_struct *work)
 		if (!ipoib_mcast_addr_is_valid(ha->addr, dev->broadcast))
 			continue;
 
-		memcpy(mgid.raw, ha->addr + 4, sizeof mgid);
+		memcpy(mgid.raw, ha->addr + 4, sizeof(mgid));
 
 		mcast = __ipoib_mcast_find(dev, &mgid);
 		if (!mcast || test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
@@ -997,7 +998,7 @@ struct ipoib_mcast_iter *ipoib_mcast_iter_init(struct net_device *dev)
 {
 	struct ipoib_mcast_iter *iter;
 
-	iter = kmalloc(sizeof *iter, GFP_KERNEL);
+	iter = kmalloc(sizeof(*iter), GFP_KERNEL);
 	if (!iter)
 		return NULL;
 
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_netlink.c b/drivers/infiniband/ulp/ipoib/ipoib_netlink.c
index 3e44087..d4d553a 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_netlink.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_netlink.c
@@ -122,15 +122,6 @@ static int ipoib_new_child_link(struct net *src_net, struct net_device *dev,
 	} else
 		child_pkey  = nla_get_u16(data[IFLA_IPOIB_PKEY]);
 
-	if (child_pkey == 0 || child_pkey == 0x8000)
-		return -EINVAL;
-
-	/*
-	 * Set the full membership bit, so that we join the right
-	 * broadcast group, etc.
-	 */
-	child_pkey |= 0x8000;
-
 	err = __ipoib_vlan_add(ppriv, ipoib_priv(dev),
 			       child_pkey, IPOIB_RTNL_CHILD);
 
@@ -139,19 +130,6 @@ static int ipoib_new_child_link(struct net *src_net, struct net_device *dev,
 	return err;
 }
 
-static void ipoib_unregister_child_dev(struct net_device *dev, struct list_head *head)
-{
-	struct ipoib_dev_priv *priv, *ppriv;
-
-	priv = ipoib_priv(dev);
-	ppriv = ipoib_priv(priv->parent);
-
-	down_write(&ppriv->vlan_rwsem);
-	unregister_netdevice_queue(dev, head);
-	list_del(&priv->list);
-	up_write(&ppriv->vlan_rwsem);
-}
-
 static size_t ipoib_get_size(const struct net_device *dev)
 {
 	return nla_total_size(2) +	/* IFLA_IPOIB_PKEY   */
@@ -167,7 +145,6 @@ static struct rtnl_link_ops ipoib_link_ops __read_mostly = {
 	.setup		= ipoib_setup_common,
 	.newlink	= ipoib_new_child_link,
 	.changelink	= ipoib_changelink,
-	.dellink	= ipoib_unregister_child_dev,
 	.get_size	= ipoib_get_size,
 	.fill_info	= ipoib_fill_info,
 };
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
index 984a880..9f36ca7 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
@@ -52,7 +52,7 @@ int ipoib_mcast_attach(struct net_device *dev, struct ib_device *hca,
 
 	if (set_qkey) {
 		ret = -ENOMEM;
-		qp_attr = kmalloc(sizeof *qp_attr, GFP_KERNEL);
+		qp_attr = kmalloc(sizeof(*qp_attr), GFP_KERNEL);
 		if (!qp_attr)
 			goto out;
 
@@ -147,7 +147,7 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
 		.cap = {
 			.max_send_wr  = ipoib_sendq_size,
 			.max_recv_wr  = ipoib_recvq_size,
-			.max_send_sge = min_t(u32, priv->ca->attrs.max_sge,
+			.max_send_sge = min_t(u32, priv->ca->attrs.max_send_sge,
 					      MAX_SKB_FRAGS + 1),
 			.max_recv_sge = IPOIB_UD_RX_SG
 		},
@@ -168,8 +168,8 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
 		else
 			size += ipoib_recvq_size * ipoib_max_conn_qp;
 	} else
-		if (ret != -ENOSYS)
-			return -ENODEV;
+		if (ret != -EOPNOTSUPP)
+			return ret;
 
 	req_vec = (priv->port - 1) * 2;
 
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
index 55a9b71..341753f 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
@@ -50,68 +50,112 @@ static ssize_t show_parent(struct device *d, struct device_attribute *attr,
 }
 static DEVICE_ATTR(parent, S_IRUGO, show_parent, NULL);
 
+static bool is_child_unique(struct ipoib_dev_priv *ppriv,
+			    struct ipoib_dev_priv *priv)
+{
+	struct ipoib_dev_priv *tpriv;
+
+	ASSERT_RTNL();
+
+	/*
+	 * Since the legacy sysfs interface uses pkey for deletion it cannot
+	 * support more than one interface with the same pkey, it creates
+	 * ambiguity.  The RTNL interface deletes using the netdev so it does
+	 * not have a problem to support duplicated pkeys.
+	 */
+	if (priv->child_type != IPOIB_LEGACY_CHILD)
+		return true;
+
+	/*
+	 * First ensure this isn't a duplicate. We check the parent device and
+	 * then all of the legacy child interfaces to make sure the Pkey
+	 * doesn't match.
+	 */
+	if (ppriv->pkey == priv->pkey)
+		return false;
+
+	list_for_each_entry(tpriv, &ppriv->child_intfs, list) {
+		if (tpriv->pkey == priv->pkey &&
+		    tpriv->child_type == IPOIB_LEGACY_CHILD)
+			return false;
+	}
+
+	return true;
+}
+
+/*
+ * NOTE: If this function fails then the priv->dev will remain valid, however
+ * priv can have been freed and must not be touched by caller in the error
+ * case.
+ *
+ * If (ndev->reg_state == NETREG_UNINITIALIZED) then it is up to the caller to
+ * free the net_device (just as rtnl_newlink does) otherwise the net_device
+ * will be freed when the rtnl is unlocked.
+ */
 int __ipoib_vlan_add(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv,
 		     u16 pkey, int type)
 {
+	struct net_device *ndev = priv->dev;
 	int result;
 
-	priv->max_ib_mtu = ppriv->max_ib_mtu;
-	/* MTU will be reset when mcast join happens */
-	priv->dev->mtu   = IPOIB_UD_MTU(priv->max_ib_mtu);
-	priv->mcast_mtu  = priv->admin_mtu = priv->dev->mtu;
-	priv->parent = ppriv->dev;
-	set_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags);
+	ASSERT_RTNL();
 
-	ipoib_set_dev_features(priv, ppriv->ca);
+	/*
+	 * Racing with unregister of the parent must be prevented by the
+	 * caller.
+	 */
+	WARN_ON(ppriv->dev->reg_state != NETREG_REGISTERED);
 
-	priv->pkey = pkey;
-
-	memcpy(priv->dev->dev_addr, ppriv->dev->dev_addr, INFINIBAND_ALEN);
-	memcpy(&priv->local_gid, &ppriv->local_gid, sizeof(priv->local_gid));
-	set_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags);
-	priv->dev->broadcast[8] = pkey >> 8;
-	priv->dev->broadcast[9] = pkey & 0xff;
-
-	result = ipoib_dev_init(priv->dev, ppriv->ca, ppriv->port);
-	if (result < 0) {
-		ipoib_warn(ppriv, "failed to initialize subinterface: "
-			   "device %s, port %d",
-			   ppriv->ca->name, ppriv->port);
-		goto err;
+	if (pkey == 0 || pkey == 0x8000) {
+		result = -EINVAL;
+		goto out_early;
 	}
 
-	result = register_netdevice(priv->dev);
+	priv->parent = ppriv->dev;
+	priv->pkey = pkey;
+	priv->child_type = type;
+
+	if (!is_child_unique(ppriv, priv)) {
+		result = -ENOTUNIQ;
+		goto out_early;
+	}
+
+	/* We do not need to touch priv if register_netdevice fails */
+	ndev->priv_destructor = ipoib_intf_free;
+
+	result = register_netdevice(ndev);
 	if (result) {
 		ipoib_warn(priv, "failed to initialize; error %i", result);
-		goto register_failed;
+
+		/*
+		 * register_netdevice sometimes calls priv_destructor,
+		 * sometimes not. Make sure it was done.
+		 */
+		goto out_early;
 	}
 
 	/* RTNL childs don't need proprietary sysfs entries */
 	if (type == IPOIB_LEGACY_CHILD) {
-		if (ipoib_cm_add_mode_attr(priv->dev))
+		if (ipoib_cm_add_mode_attr(ndev))
 			goto sysfs_failed;
-		if (ipoib_add_pkey_attr(priv->dev))
+		if (ipoib_add_pkey_attr(ndev))
 			goto sysfs_failed;
-		if (ipoib_add_umcast_attr(priv->dev))
+		if (ipoib_add_umcast_attr(ndev))
 			goto sysfs_failed;
 
-		if (device_create_file(&priv->dev->dev, &dev_attr_parent))
+		if (device_create_file(&ndev->dev, &dev_attr_parent))
 			goto sysfs_failed;
 	}
 
-	priv->child_type  = type;
-	list_add_tail(&priv->list, &ppriv->child_intfs);
-
 	return 0;
 
 sysfs_failed:
-	result = -ENOMEM;
 	unregister_netdevice(priv->dev);
+	return -ENOMEM;
 
-register_failed:
-	ipoib_dev_cleanup(priv->dev);
-
-err:
+out_early:
+	if (ndev->priv_destructor)
+		ndev->priv_destructor(ndev);
 	return result;
 }
 
@@ -119,129 +163,124 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)
 {
 	struct ipoib_dev_priv *ppriv, *priv;
 	char intf_name[IFNAMSIZ];
-	struct ipoib_dev_priv *tpriv;
+	struct net_device *ndev;
 	int result;
 
 	if (!capable(CAP_NET_ADMIN))
 		return -EPERM;
 
+	if (!rtnl_trylock())
+		return restart_syscall();
+
+	if (pdev->reg_state != NETREG_REGISTERED) {
+		rtnl_unlock();
+		return -EPERM;
+	}
+
 	ppriv = ipoib_priv(pdev);
 
-	if (test_bit(IPOIB_FLAG_GOING_DOWN, &ppriv->flags))
-		return -EPERM;
-
-	snprintf(intf_name, sizeof intf_name, "%s.%04x",
+	snprintf(intf_name, sizeof(intf_name), "%s.%04x",
 		 ppriv->dev->name, pkey);
 
-	if (!mutex_trylock(&ppriv->sysfs_mutex))
-		return restart_syscall();
-
-	if (!rtnl_trylock()) {
-		mutex_unlock(&ppriv->sysfs_mutex);
-		return restart_syscall();
-	}
-
-	if (!down_write_trylock(&ppriv->vlan_rwsem)) {
-		rtnl_unlock();
-		mutex_unlock(&ppriv->sysfs_mutex);
-		return restart_syscall();
-	}
-
 	priv = ipoib_intf_alloc(ppriv->ca, ppriv->port, intf_name);
 	if (!priv) {
 		result = -ENOMEM;
 		goto out;
 	}
-
-	/*
-	 * First ensure this isn't a duplicate. We check the parent device and
-	 * then all of the legacy child interfaces to make sure the Pkey
-	 * doesn't match.
-	 */
-	if (ppriv->pkey == pkey) {
-		result = -ENOTUNIQ;
-		goto out;
-	}
-
-	list_for_each_entry(tpriv, &ppriv->child_intfs, list) {
-		if (tpriv->pkey == pkey &&
-		    tpriv->child_type == IPOIB_LEGACY_CHILD) {
-			result = -ENOTUNIQ;
-			goto out;
-		}
-	}
+	ndev = priv->dev;
 
 	result = __ipoib_vlan_add(ppriv, priv, pkey, IPOIB_LEGACY_CHILD);
 
+	if (result && ndev->reg_state == NETREG_UNINITIALIZED)
+		free_netdev(ndev);
+
 out:
-	up_write(&ppriv->vlan_rwsem);
 	rtnl_unlock();
-	mutex_unlock(&ppriv->sysfs_mutex);
-
-	if (result && priv) {
-		struct rdma_netdev *rn;
-
-		rn = netdev_priv(priv->dev);
-		rn->free_rdma_netdev(priv->dev);
-		kfree(priv);
-	}
 
 	return result;
 }
 
-int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey)
+struct ipoib_vlan_delete_work {
+	struct work_struct work;
+	struct net_device *dev;
+};
+
+/*
+ * sysfs callbacks of a netdevice cannot obtain the rtnl lock as
+ * unregister_netdev ultimately deletes the sysfs files while holding the rtnl
+ * lock. This deadlocks the system.
+ *
+ * A callback can use rtnl_trylock to avoid the deadlock but it cannot call
+ * unregister_netdev as that internally takes and releases the rtnl_lock.  So
+ * instead we find the netdev to unregister and then do the actual unregister
+ * from the global work queue where we can obtain the rtnl_lock safely.
+ */
+static void ipoib_vlan_delete_task(struct work_struct *work)
 {
-	struct ipoib_dev_priv *ppriv, *priv, *tpriv;
-	struct net_device *dev = NULL;
+	struct ipoib_vlan_delete_work *pwork =
+		container_of(work, struct ipoib_vlan_delete_work, work);
+	struct net_device *dev = pwork->dev;
 
-	if (!capable(CAP_NET_ADMIN))
-		return -EPERM;
+	rtnl_lock();
 
-	ppriv = ipoib_priv(pdev);
+	/* Unregistering tasks can race with another task or parent removal */
+	if (dev->reg_state == NETREG_REGISTERED) {
+		struct ipoib_dev_priv *priv = ipoib_priv(dev);
+		struct ipoib_dev_priv *ppriv = ipoib_priv(priv->parent);
 
-	if (test_bit(IPOIB_FLAG_GOING_DOWN, &ppriv->flags))
-		return -EPERM;
-
-	if (!mutex_trylock(&ppriv->sysfs_mutex))
-		return restart_syscall();
-
-	if (!rtnl_trylock()) {
-		mutex_unlock(&ppriv->sysfs_mutex);
-		return restart_syscall();
-	}
-
-	if (!down_write_trylock(&ppriv->vlan_rwsem)) {
-		rtnl_unlock();
-		mutex_unlock(&ppriv->sysfs_mutex);
-		return restart_syscall();
-	}
-
-	list_for_each_entry_safe(priv, tpriv, &ppriv->child_intfs, list) {
-		if (priv->pkey == pkey &&
-		    priv->child_type == IPOIB_LEGACY_CHILD) {
-			list_del(&priv->list);
-			dev = priv->dev;
-			break;
-		}
-	}
-	up_write(&ppriv->vlan_rwsem);
-
-	if (dev) {
 		ipoib_dbg(ppriv, "delete child vlan %s\n", dev->name);
 		unregister_netdevice(dev);
 	}
 
 	rtnl_unlock();
-	mutex_unlock(&ppriv->sysfs_mutex);
 
-	if (dev) {
-		struct rdma_netdev *rn;
+	kfree(pwork);
+}
 
-		rn = netdev_priv(dev);
-		rn->free_rdma_netdev(priv->dev);
-		kfree(priv);
-		return 0;
+int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey)
+{
+	struct ipoib_dev_priv *ppriv, *priv, *tpriv;
+	int rc;
+
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+
+	if (!rtnl_trylock())
+		return restart_syscall();
+
+	if (pdev->reg_state != NETREG_REGISTERED) {
+		rtnl_unlock();
+		return -EPERM;
 	}
 
-	return -ENODEV;
+	ppriv = ipoib_priv(pdev);
+
+	rc = -ENODEV;
+	list_for_each_entry_safe(priv, tpriv, &ppriv->child_intfs, list) {
+		if (priv->pkey == pkey &&
+		    priv->child_type == IPOIB_LEGACY_CHILD) {
+			struct ipoib_vlan_delete_work *work;
+
+			work = kmalloc(sizeof(*work), GFP_KERNEL);
+			if (!work) {
+				rc = -ENOMEM;
+				goto out;
+			}
+
+			down_write(&ppriv->vlan_rwsem);
+			list_del_init(&priv->list);
+			up_write(&ppriv->vlan_rwsem);
+			work->dev = priv->dev;
+			INIT_WORK(&work->work, ipoib_vlan_delete_task);
+			queue_work(ipoib_workqueue, &work->work);
+
+			rc = 0;
+			break;
+		}
+	}
+
+out:
+	rtnl_unlock();
+
+	return rc;
 }
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c
index 9a6434c..3fecd87 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.c
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.c
@@ -610,12 +610,10 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep,
 			  uint32_t initial_cmdsn)
 {
 	struct iscsi_cls_session *cls_session;
-	struct iscsi_session *session;
 	struct Scsi_Host *shost;
 	struct iser_conn *iser_conn = NULL;
 	struct ib_conn *ib_conn;
 	u32 max_fr_sectors;
-	u16 max_cmds;
 
 	shost = iscsi_host_alloc(&iscsi_iser_sht, 0, 0);
 	if (!shost)
@@ -633,8 +631,8 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep,
 	 */
 	if (ep) {
 		iser_conn = ep->dd_data;
-		max_cmds = iser_conn->max_cmds;
 		shost->sg_tablesize = iser_conn->scsi_sg_tablesize;
+		shost->can_queue = min_t(u16, cmds_max, iser_conn->max_cmds);
 
 		mutex_lock(&iser_conn->state_mutex);
 		if (iser_conn->state != ISER_CONN_UP) {
@@ -660,7 +658,7 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep,
 		}
 		mutex_unlock(&iser_conn->state_mutex);
 	} else {
-		max_cmds = ISER_DEF_XMIT_CMDS_MAX;
+		shost->can_queue = min_t(u16, cmds_max, ISER_DEF_XMIT_CMDS_MAX);
 		if (iscsi_host_add(shost, NULL))
 			goto free_host;
 	}
@@ -676,21 +674,13 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep,
 		iser_warn("max_sectors was reduced from %u to %u\n",
 			  iser_max_sectors, shost->max_sectors);
 
-	if (cmds_max > max_cmds) {
-		iser_info("cmds_max changed from %u to %u\n",
-			  cmds_max, max_cmds);
-		cmds_max = max_cmds;
-	}
-
 	cls_session = iscsi_session_setup(&iscsi_iser_transport, shost,
-					  cmds_max, 0,
+					  shost->can_queue, 0,
 					  sizeof(struct iscsi_iser_task),
 					  initial_cmdsn, 0);
 	if (!cls_session)
 		goto remove_host;
-	session = cls_session->dd_data;
 
-	shost->can_queue = session->scsi_cmds_max;
 	return cls_session;
 
 remove_host:
diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c
index 130bf16..009be88 100644
--- a/drivers/infiniband/ulp/iser/iser_memory.c
+++ b/drivers/infiniband/ulp/iser/iser_memory.c
@@ -405,7 +405,8 @@ iser_reg_sig_mr(struct iscsi_iser_task *iser_task,
 
 	ib_update_fast_reg_key(mr, ib_inc_rkey(mr->rkey));
 
-	wr = sig_handover_wr(iser_tx_next_wr(tx_desc));
+	wr = container_of(iser_tx_next_wr(tx_desc), struct ib_sig_handover_wr,
+			  wr);
 	wr->wr.opcode = IB_WR_REG_SIG_MR;
 	wr->wr.wr_cqe = cqe;
 	wr->wr.sg_list = &data_reg->sge;
@@ -457,7 +458,7 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
 		return n < 0 ? n : -EINVAL;
 	}
 
-	wr = reg_wr(iser_tx_next_wr(tx_desc));
+	wr = container_of(iser_tx_next_wr(tx_desc), struct ib_reg_wr, wr);
 	wr->wr.opcode = IB_WR_REG_MR;
 	wr->wr.wr_cqe = cqe;
 	wr->wr.send_flags = 0;
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index 616d978..b686a4a 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -1022,7 +1022,7 @@ int iser_post_recvl(struct iser_conn *iser_conn)
 {
 	struct ib_conn *ib_conn = &iser_conn->ib_conn;
 	struct iser_login_desc *desc = &iser_conn->login_desc;
-	struct ib_recv_wr wr, *wr_failed;
+	struct ib_recv_wr wr;
 	int ib_ret;
 
 	desc->sge.addr = desc->rsp_dma;
@@ -1036,7 +1036,7 @@ int iser_post_recvl(struct iser_conn *iser_conn)
 	wr.next = NULL;
 
 	ib_conn->post_recv_buf_count++;
-	ib_ret = ib_post_recv(ib_conn->qp, &wr, &wr_failed);
+	ib_ret = ib_post_recv(ib_conn->qp, &wr, NULL);
 	if (ib_ret) {
 		iser_err("ib_post_recv failed ret=%d\n", ib_ret);
 		ib_conn->post_recv_buf_count--;
@@ -1050,7 +1050,7 @@ int iser_post_recvm(struct iser_conn *iser_conn, int count)
 	struct ib_conn *ib_conn = &iser_conn->ib_conn;
 	unsigned int my_rx_head = iser_conn->rx_desc_head;
 	struct iser_rx_desc *rx_desc;
-	struct ib_recv_wr *wr, *wr_failed;
+	struct ib_recv_wr *wr;
 	int i, ib_ret;
 
 	for (wr = ib_conn->rx_wr, i = 0; i < count; i++, wr++) {
@@ -1067,7 +1067,7 @@ int iser_post_recvm(struct iser_conn *iser_conn, int count)
 	wr->next = NULL; /* mark end of work requests list */
 
 	ib_conn->post_recv_buf_count += count;
-	ib_ret = ib_post_recv(ib_conn->qp, ib_conn->rx_wr, &wr_failed);
+	ib_ret = ib_post_recv(ib_conn->qp, ib_conn->rx_wr, NULL);
 	if (ib_ret) {
 		iser_err("ib_post_recv failed ret=%d\n", ib_ret);
 		ib_conn->post_recv_buf_count -= count;
@@ -1086,7 +1086,7 @@ int iser_post_recvm(struct iser_conn *iser_conn, int count)
 int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc,
 		   bool signal)
 {
-	struct ib_send_wr *bad_wr, *wr = iser_tx_next_wr(tx_desc);
+	struct ib_send_wr *wr = iser_tx_next_wr(tx_desc);
 	int ib_ret;
 
 	ib_dma_sync_single_for_device(ib_conn->device->ib_device,
@@ -1100,10 +1100,10 @@ int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc,
 	wr->opcode = IB_WR_SEND;
 	wr->send_flags = signal ? IB_SEND_SIGNALED : 0;
 
-	ib_ret = ib_post_send(ib_conn->qp, &tx_desc->wrs[0].send, &bad_wr);
+	ib_ret = ib_post_send(ib_conn->qp, &tx_desc->wrs[0].send, NULL);
 	if (ib_ret)
 		iser_err("ib_post_send failed, ret:%d opcode:%d\n",
-			 ib_ret, bad_wr->opcode);
+			 ib_ret, wr->opcode);
 
 	return ib_ret;
 }
diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c
index cccbcf0..f39670c 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.c
+++ b/drivers/infiniband/ulp/isert/ib_isert.c
@@ -136,7 +136,7 @@ isert_create_qp(struct isert_conn *isert_conn,
 	attr.cap.max_send_wr = ISERT_QP_MAX_REQ_DTOS + 1;
 	attr.cap.max_recv_wr = ISERT_QP_MAX_RECV_DTOS + 1;
 	attr.cap.max_rdma_ctxs = ISCSI_DEF_XMIT_CMDS_MAX;
-	attr.cap.max_send_sge = device->ib_device->attrs.max_sge;
+	attr.cap.max_send_sge = device->ib_device->attrs.max_send_sge;
 	attr.cap.max_recv_sge = 1;
 	attr.sq_sig_type = IB_SIGNAL_REQ_WR;
 	attr.qp_type = IB_QPT_RC;
@@ -299,7 +299,8 @@ isert_create_device_ib_res(struct isert_device *device)
 	struct ib_device *ib_dev = device->ib_device;
 	int ret;
 
-	isert_dbg("devattr->max_sge: %d\n", ib_dev->attrs.max_sge);
+	isert_dbg("devattr->max_send_sge: %d devattr->max_recv_sge %d\n",
+		  ib_dev->attrs.max_send_sge, ib_dev->attrs.max_recv_sge);
 	isert_dbg("devattr->max_sge_rd: %d\n", ib_dev->attrs.max_sge_rd);
 
 	ret = isert_alloc_comps(device);
@@ -809,7 +810,7 @@ isert_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
 static int
 isert_post_recvm(struct isert_conn *isert_conn, u32 count)
 {
-	struct ib_recv_wr *rx_wr, *rx_wr_failed;
+	struct ib_recv_wr *rx_wr;
 	int i, ret;
 	struct iser_rx_desc *rx_desc;
 
@@ -825,8 +826,7 @@ isert_post_recvm(struct isert_conn *isert_conn, u32 count)
 	rx_wr--;
 	rx_wr->next = NULL; /* mark end of work requests list */
 
-	ret = ib_post_recv(isert_conn->qp, isert_conn->rx_wr,
-			   &rx_wr_failed);
+	ret = ib_post_recv(isert_conn->qp, isert_conn->rx_wr, NULL);
 	if (ret)
 		isert_err("ib_post_recv() failed with ret: %d\n", ret);
 
@@ -836,7 +836,7 @@ isert_post_recvm(struct isert_conn *isert_conn, u32 count)
 static int
 isert_post_recv(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc)
 {
-	struct ib_recv_wr *rx_wr_failed, rx_wr;
+	struct ib_recv_wr rx_wr;
 	int ret;
 
 	if (!rx_desc->in_use) {
@@ -853,7 +853,7 @@ isert_post_recv(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc)
 	rx_wr.num_sge = 1;
 	rx_wr.next = NULL;
 
-	ret = ib_post_recv(isert_conn->qp, &rx_wr, &rx_wr_failed);
+	ret = ib_post_recv(isert_conn->qp, &rx_wr, NULL);
 	if (ret)
 		isert_err("ib_post_recv() failed with ret: %d\n", ret);
 
@@ -864,7 +864,7 @@ static int
 isert_login_post_send(struct isert_conn *isert_conn, struct iser_tx_desc *tx_desc)
 {
 	struct ib_device *ib_dev = isert_conn->cm_id->device;
-	struct ib_send_wr send_wr, *send_wr_failed;
+	struct ib_send_wr send_wr;
 	int ret;
 
 	ib_dma_sync_single_for_device(ib_dev, tx_desc->dma_addr,
@@ -879,7 +879,7 @@ isert_login_post_send(struct isert_conn *isert_conn, struct iser_tx_desc *tx_des
 	send_wr.opcode	= IB_WR_SEND;
 	send_wr.send_flags = IB_SEND_SIGNALED;
 
-	ret = ib_post_send(isert_conn->qp, &send_wr, &send_wr_failed);
+	ret = ib_post_send(isert_conn->qp, &send_wr, NULL);
 	if (ret)
 		isert_err("ib_post_send() failed, ret: %d\n", ret);
 
@@ -967,7 +967,7 @@ isert_init_send_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd,
 static int
 isert_login_post_recv(struct isert_conn *isert_conn)
 {
-	struct ib_recv_wr rx_wr, *rx_wr_fail;
+	struct ib_recv_wr rx_wr;
 	struct ib_sge sge;
 	int ret;
 
@@ -986,7 +986,7 @@ isert_login_post_recv(struct isert_conn *isert_conn)
 	rx_wr.sg_list = &sge;
 	rx_wr.num_sge = 1;
 
-	ret = ib_post_recv(isert_conn->qp, &rx_wr, &rx_wr_fail);
+	ret = ib_post_recv(isert_conn->qp, &rx_wr, NULL);
 	if (ret)
 		isert_err("ib_post_recv() failed: %d\n", ret);
 
@@ -1829,7 +1829,6 @@ isert_send_done(struct ib_cq *cq, struct ib_wc *wc)
 static int
 isert_post_response(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd)
 {
-	struct ib_send_wr *wr_failed;
 	int ret;
 
 	ret = isert_post_recv(isert_conn, isert_cmd->rx_desc);
@@ -1838,8 +1837,7 @@ isert_post_response(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd)
 		return ret;
 	}
 
-	ret = ib_post_send(isert_conn->qp, &isert_cmd->tx_desc.send_wr,
-			   &wr_failed);
+	ret = ib_post_send(isert_conn->qp, &isert_cmd->tx_desc.send_wr, NULL);
 	if (ret) {
 		isert_err("ib_post_send failed with %d\n", ret);
 		return ret;
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index 9786b24..444d165 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -57,13 +57,10 @@
 
 #define DRV_NAME	"ib_srp"
 #define PFX		DRV_NAME ": "
-#define DRV_VERSION	"2.0"
-#define DRV_RELDATE	"July 26, 2015"
 
 MODULE_AUTHOR("Roland Dreier");
 MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol initiator");
 MODULE_LICENSE("Dual BSD/GPL");
-MODULE_INFO(release_date, DRV_RELDATE);
 
 #if !defined(CONFIG_DYNAMIC_DEBUG)
 #define DEFINE_DYNAMIC_DEBUG_METADATA(name, fmt)
@@ -145,7 +142,8 @@ static void srp_remove_one(struct ib_device *device, void *client_data);
 static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc);
 static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc,
 		const char *opname);
-static int srp_ib_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event);
+static int srp_ib_cm_handler(struct ib_cm_id *cm_id,
+			     const struct ib_cm_event *event);
 static int srp_rdma_cm_handler(struct rdma_cm_id *cm_id,
 			       struct rdma_cm_event *event);
 
@@ -1211,7 +1209,6 @@ static void srp_inv_rkey_err_done(struct ib_cq *cq, struct ib_wc *wc)
 static int srp_inv_rkey(struct srp_request *req, struct srp_rdma_ch *ch,
 		u32 rkey)
 {
-	struct ib_send_wr *bad_wr;
 	struct ib_send_wr wr = {
 		.opcode		    = IB_WR_LOCAL_INV,
 		.next		    = NULL,
@@ -1222,7 +1219,7 @@ static int srp_inv_rkey(struct srp_request *req, struct srp_rdma_ch *ch,
 
 	wr.wr_cqe = &req->reg_cqe;
 	req->reg_cqe.done = srp_inv_rkey_err_done;
-	return ib_post_send(ch->qp, &wr, &bad_wr);
+	return ib_post_send(ch->qp, &wr, NULL);
 }
 
 static void srp_unmap_data(struct scsi_cmnd *scmnd,
@@ -1503,7 +1500,6 @@ static int srp_map_finish_fr(struct srp_map_state *state,
 {
 	struct srp_target_port *target = ch->target;
 	struct srp_device *dev = target->srp_host->srp_dev;
-	struct ib_send_wr *bad_wr;
 	struct ib_reg_wr wr;
 	struct srp_fr_desc *desc;
 	u32 rkey;
@@ -1567,7 +1563,7 @@ static int srp_map_finish_fr(struct srp_map_state *state,
 	srp_map_desc(state, desc->mr->iova,
 		     desc->mr->length, desc->mr->rkey);
 
-	err = ib_post_send(ch->qp, &wr.wr, &bad_wr);
+	err = ib_post_send(ch->qp, &wr.wr, NULL);
 	if (unlikely(err)) {
 		WARN_ON_ONCE(err == -ENOMEM);
 		return err;
@@ -2018,7 +2014,7 @@ static int srp_post_send(struct srp_rdma_ch *ch, struct srp_iu *iu, int len)
 {
 	struct srp_target_port *target = ch->target;
 	struct ib_sge list;
-	struct ib_send_wr wr, *bad_wr;
+	struct ib_send_wr wr;
 
 	list.addr   = iu->dma;
 	list.length = len;
@@ -2033,13 +2029,13 @@ static int srp_post_send(struct srp_rdma_ch *ch, struct srp_iu *iu, int len)
 	wr.opcode     = IB_WR_SEND;
 	wr.send_flags = IB_SEND_SIGNALED;
 
-	return ib_post_send(ch->qp, &wr, &bad_wr);
+	return ib_post_send(ch->qp, &wr, NULL);
 }
 
 static int srp_post_recv(struct srp_rdma_ch *ch, struct srp_iu *iu)
 {
 	struct srp_target_port *target = ch->target;
-	struct ib_recv_wr wr, *bad_wr;
+	struct ib_recv_wr wr;
 	struct ib_sge list;
 
 	list.addr   = iu->dma;
@@ -2053,7 +2049,7 @@ static int srp_post_recv(struct srp_rdma_ch *ch, struct srp_iu *iu)
 	wr.sg_list  = &list;
 	wr.num_sge  = 1;
 
-	return ib_post_recv(ch->qp, &wr, &bad_wr);
+	return ib_post_recv(ch->qp, &wr, NULL);
 }
 
 static void srp_process_rsp(struct srp_rdma_ch *ch, struct srp_rsp *rsp)
@@ -2558,7 +2554,7 @@ static void srp_cm_rep_handler(struct ib_cm_id *cm_id,
 }
 
 static void srp_ib_cm_rej_handler(struct ib_cm_id *cm_id,
-				  struct ib_cm_event *event,
+				  const struct ib_cm_event *event,
 				  struct srp_rdma_ch *ch)
 {
 	struct srp_target_port *target = ch->target;
@@ -2643,7 +2639,8 @@ static void srp_ib_cm_rej_handler(struct ib_cm_id *cm_id,
 	}
 }
 
-static int srp_ib_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
+static int srp_ib_cm_handler(struct ib_cm_id *cm_id,
+			     const struct ib_cm_event *event)
 {
 	struct srp_rdma_ch *ch = cm_id->context;
 	struct srp_target_port *target = ch->target;
@@ -3843,7 +3840,7 @@ static ssize_t srp_create_target(struct device *dev,
 	INIT_WORK(&target->tl_err_work, srp_tl_err_work);
 	INIT_WORK(&target->remove_work, srp_remove_work);
 	spin_lock_init(&target->lock);
-	ret = ib_query_gid(ibdev, host->port, 0, &target->sgid, NULL);
+	ret = rdma_query_gid(ibdev, host->port, 0, &target->sgid);
 	if (ret)
 		goto out;
 
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
index 1ae638b..f37cbad 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -575,8 +575,7 @@ static int srpt_refresh_port(struct srpt_port *sport)
 	sport->sm_lid = port_attr.sm_lid;
 	sport->lid = port_attr.lid;
 
-	ret = ib_query_gid(sport->sdev->device, sport->port, 0, &sport->gid,
-			   NULL);
+	ret = rdma_query_gid(sport->sdev->device, sport->port, 0, &sport->gid);
 	if (ret)
 		goto err_query_port;
 
@@ -720,7 +719,7 @@ static struct srpt_ioctx **srpt_alloc_ioctx_ring(struct srpt_device *sdev,
 	WARN_ON(ioctx_size != sizeof(struct srpt_recv_ioctx)
 		&& ioctx_size != sizeof(struct srpt_send_ioctx));
 
-	ring = kmalloc_array(ring_size, sizeof(ring[0]), GFP_KERNEL);
+	ring = kvmalloc_array(ring_size, sizeof(ring[0]), GFP_KERNEL);
 	if (!ring)
 		goto out;
 	for (i = 0; i < ring_size; ++i) {
@@ -734,7 +733,7 @@ static struct srpt_ioctx **srpt_alloc_ioctx_ring(struct srpt_device *sdev,
 err:
 	while (--i >= 0)
 		srpt_free_ioctx(sdev, ring[i], dma_size, dir);
-	kfree(ring);
+	kvfree(ring);
 	ring = NULL;
 out:
 	return ring;
@@ -759,7 +758,7 @@ static void srpt_free_ioctx_ring(struct srpt_ioctx **ioctx_ring,
 
 	for (i = 0; i < ring_size; ++i)
 		srpt_free_ioctx(sdev, ioctx_ring[i], dma_size, dir);
-	kfree(ioctx_ring);
+	kvfree(ioctx_ring);
 }
 
 /**
@@ -817,7 +816,7 @@ static int srpt_post_recv(struct srpt_device *sdev, struct srpt_rdma_ch *ch,
 			  struct srpt_recv_ioctx *ioctx)
 {
 	struct ib_sge list;
-	struct ib_recv_wr wr, *bad_wr;
+	struct ib_recv_wr wr;
 
 	BUG_ON(!sdev);
 	list.addr = ioctx->ioctx.dma;
@@ -831,9 +830,9 @@ static int srpt_post_recv(struct srpt_device *sdev, struct srpt_rdma_ch *ch,
 	wr.num_sge = 1;
 
 	if (sdev->use_srq)
-		return ib_post_srq_recv(sdev->srq, &wr, &bad_wr);
+		return ib_post_srq_recv(sdev->srq, &wr, NULL);
 	else
-		return ib_post_recv(ch->qp, &wr, &bad_wr);
+		return ib_post_recv(ch->qp, &wr, NULL);
 }
 
 /**
@@ -847,7 +846,6 @@ static int srpt_post_recv(struct srpt_device *sdev, struct srpt_rdma_ch *ch,
  */
 static int srpt_zerolength_write(struct srpt_rdma_ch *ch)
 {
-	struct ib_send_wr *bad_wr;
 	struct ib_rdma_wr wr = {
 		.wr = {
 			.next		= NULL,
@@ -860,7 +858,7 @@ static int srpt_zerolength_write(struct srpt_rdma_ch *ch)
 	pr_debug("%s-%d: queued zerolength write\n", ch->sess_name,
 		 ch->qp->qp_num);
 
-	return ib_post_send(ch->qp, &wr.wr, &bad_wr);
+	return ib_post_send(ch->qp, &wr.wr, NULL);
 }
 
 static void srpt_zerolength_write_done(struct ib_cq *cq, struct ib_wc *wc)
@@ -1754,13 +1752,15 @@ static int srpt_create_ch_ib(struct srpt_rdma_ch *ch)
 	 */
 	qp_init->cap.max_send_wr = min(sq_size / 2, attrs->max_qp_wr);
 	qp_init->cap.max_rdma_ctxs = sq_size / 2;
-	qp_init->cap.max_send_sge = min(attrs->max_sge, SRPT_MAX_SG_PER_WQE);
+	qp_init->cap.max_send_sge = min(attrs->max_send_sge,
+					SRPT_MAX_SG_PER_WQE);
 	qp_init->port_num = ch->sport->port;
 	if (sdev->use_srq) {
 		qp_init->srq = sdev->srq;
 	} else {
 		qp_init->cap.max_recv_wr = ch->rq_size;
-		qp_init->cap.max_recv_sge = qp_init->cap.max_send_sge;
+		qp_init->cap.max_recv_sge = min(attrs->max_recv_sge,
+						SRPT_MAX_SG_PER_WQE);
 	}
 
 	if (ch->using_rdma_cm) {
@@ -1833,8 +1833,7 @@ static bool srpt_close_ch(struct srpt_rdma_ch *ch)
 	int ret;
 
 	if (!srpt_set_ch_state(ch, CH_DRAINING)) {
-		pr_debug("%s-%d: already closed\n", ch->sess_name,
-			 ch->qp->qp_num);
+		pr_debug("%s: already closed\n", ch->sess_name);
 		return false;
 	}
 
@@ -1940,8 +1939,8 @@ static void __srpt_close_all_ch(struct srpt_port *sport)
 	list_for_each_entry(nexus, &sport->nexus_list, entry) {
 		list_for_each_entry(ch, &nexus->ch_list, list) {
 			if (srpt_disconnect_ch(ch) >= 0)
-				pr_info("Closing channel %s-%d because target %s_%d has been disabled\n",
-					ch->sess_name, ch->qp->qp_num,
+				pr_info("Closing channel %s because target %s_%d has been disabled\n",
+					ch->sess_name,
 					sport->sdev->device->name, sport->port);
 			srpt_close_ch(ch);
 		}
@@ -2086,7 +2085,7 @@ static int srpt_cm_req_recv(struct srpt_device *const sdev,
 		struct rdma_conn_param rdma_cm;
 		struct ib_cm_rep_param ib_cm;
 	} *rep_param = NULL;
-	struct srpt_rdma_ch *ch;
+	struct srpt_rdma_ch *ch = NULL;
 	char i_port_id[36];
 	u32 it_iu_len;
 	int i, ret;
@@ -2233,13 +2232,15 @@ static int srpt_cm_req_recv(struct srpt_device *const sdev,
 						TARGET_PROT_NORMAL,
 						i_port_id + 2, ch, NULL);
 	if (IS_ERR_OR_NULL(ch->sess)) {
+		WARN_ON_ONCE(ch->sess == NULL);
 		ret = PTR_ERR(ch->sess);
+		ch->sess = NULL;
 		pr_info("Rejected login for initiator %s: ret = %d.\n",
 			ch->sess_name, ret);
 		rej->reason = cpu_to_be32(ret == -ENOMEM ?
 				SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES :
 				SRP_LOGIN_REJ_CHANNEL_LIMIT_REACHED);
-		goto reject;
+		goto destroy_ib;
 	}
 
 	mutex_lock(&sport->mutex);
@@ -2278,7 +2279,7 @@ static int srpt_cm_req_recv(struct srpt_device *const sdev,
 		rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
 		pr_err("rejected SRP_LOGIN_REQ because enabling RTR failed (error code = %d)\n",
 		       ret);
-		goto destroy_ib;
+		goto reject;
 	}
 
 	pr_debug("Establish connection sess=%p name=%s ch=%p\n", ch->sess,
@@ -2357,8 +2358,11 @@ static int srpt_cm_req_recv(struct srpt_device *const sdev,
 	srpt_free_ioctx_ring((struct srpt_ioctx **)ch->ioctx_ring,
 			     ch->sport->sdev, ch->rq_size,
 			     ch->max_rsp_size, DMA_TO_DEVICE);
+
 free_ch:
-	if (ib_cm_id)
+	if (rdma_cm_id)
+		rdma_cm_id->context = NULL;
+	else
 		ib_cm_id->context = NULL;
 	kfree(ch);
 	ch = NULL;
@@ -2378,6 +2382,15 @@ static int srpt_cm_req_recv(struct srpt_device *const sdev,
 		ib_send_cm_rej(ib_cm_id, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0,
 			       rej, sizeof(*rej));
 
+	if (ch && ch->sess) {
+		srpt_close_ch(ch);
+		/*
+		 * Tell the caller not to free cm_id since
+		 * srpt_release_channel_work() will do that.
+		 */
+		ret = 0;
+	}
+
 out:
 	kfree(rep_param);
 	kfree(rsp);
@@ -2387,7 +2400,7 @@ static int srpt_cm_req_recv(struct srpt_device *const sdev,
 }
 
 static int srpt_ib_cm_req_recv(struct ib_cm_id *cm_id,
-			       struct ib_cm_req_event_param *param,
+			       const struct ib_cm_req_event_param *param,
 			       void *private_data)
 {
 	char sguid[40];
@@ -2499,7 +2512,8 @@ static void srpt_cm_rtu_recv(struct srpt_rdma_ch *ch)
  * a non-zero value in any other case will trigger a race with the
  * ib_destroy_cm_id() call in srpt_release_channel().
  */
-static int srpt_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
+static int srpt_cm_handler(struct ib_cm_id *cm_id,
+			   const struct ib_cm_event *event)
 {
 	struct srpt_rdma_ch *ch = cm_id->context;
 	int ret;
@@ -2609,7 +2623,7 @@ static int srpt_write_pending(struct se_cmd *se_cmd)
 	struct srpt_send_ioctx *ioctx =
 		container_of(se_cmd, struct srpt_send_ioctx, cmd);
 	struct srpt_rdma_ch *ch = ioctx->ch;
-	struct ib_send_wr *first_wr = NULL, *bad_wr;
+	struct ib_send_wr *first_wr = NULL;
 	struct ib_cqe *cqe = &ioctx->rdma_cqe;
 	enum srpt_command_state new_state;
 	int ret, i;
@@ -2633,7 +2647,7 @@ static int srpt_write_pending(struct se_cmd *se_cmd)
 		cqe = NULL;
 	}
 
-	ret = ib_post_send(ch->qp, first_wr, &bad_wr);
+	ret = ib_post_send(ch->qp, first_wr, NULL);
 	if (ret) {
 		pr_err("%s: ib_post_send() returned %d for %d (avail: %d)\n",
 			 __func__, ret, ioctx->n_rdma,
@@ -2671,7 +2685,7 @@ static void srpt_queue_response(struct se_cmd *cmd)
 		container_of(cmd, struct srpt_send_ioctx, cmd);
 	struct srpt_rdma_ch *ch = ioctx->ch;
 	struct srpt_device *sdev = ch->sport->sdev;
-	struct ib_send_wr send_wr, *first_wr = &send_wr, *bad_wr;
+	struct ib_send_wr send_wr, *first_wr = &send_wr;
 	struct ib_sge sge;
 	enum srpt_command_state state;
 	int resp_len, ret, i;
@@ -2744,7 +2758,7 @@ static void srpt_queue_response(struct se_cmd *cmd)
 	send_wr.opcode = IB_WR_SEND;
 	send_wr.send_flags = IB_SEND_SIGNALED;
 
-	ret = ib_post_send(ch->qp, first_wr, &bad_wr);
+	ret = ib_post_send(ch->qp, first_wr, NULL);
 	if (ret < 0) {
 		pr_err("%s: sending cmd response failed for tag %llu (%d)\n",
 			__func__, ioctx->cmd.tag, ret);
@@ -2968,7 +2982,8 @@ static void srpt_add_one(struct ib_device *device)
 
 	pr_debug("device = %p\n", device);
 
-	sdev = kzalloc(sizeof(*sdev), GFP_KERNEL);
+	sdev = kzalloc(struct_size(sdev, port, device->phys_port_cnt),
+		       GFP_KERNEL);
 	if (!sdev)
 		goto err;
 
@@ -3022,8 +3037,6 @@ static void srpt_add_one(struct ib_device *device)
 			      srpt_event_handler);
 	ib_register_event_handler(&sdev->event_handler);
 
-	WARN_ON(sdev->device->phys_port_cnt > ARRAY_SIZE(sdev->port));
-
 	for (i = 1; i <= sdev->device->phys_port_cnt; i++) {
 		sport = &sdev->port[i - 1];
 		INIT_LIST_HEAD(&sport->nexus_list);
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.h b/drivers/infiniband/ulp/srpt/ib_srpt.h
index 2361483..444dfd7 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.h
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.h
@@ -396,9 +396,9 @@ struct srpt_port {
  * @sdev_mutex:	   Serializes use_srq changes.
  * @use_srq:       Whether or not to use SRQ.
  * @ioctx_ring:    Per-HCA SRQ.
- * @port:          Information about the ports owned by this HCA.
  * @event_handler: Per-HCA asynchronous IB event handler.
  * @list:          Node in srpt_dev_list.
+ * @port:          Information about the ports owned by this HCA.
  */
 struct srpt_device {
 	struct ib_device	*device;
@@ -410,9 +410,9 @@ struct srpt_device {
 	struct mutex		sdev_mutex;
 	bool			use_srq;
 	struct srpt_recv_ioctx	**ioctx_ring;
-	struct srpt_port	port[2];
 	struct ib_event_handler	event_handler;
 	struct list_head	list;
+	struct srpt_port        port[];
 };
 
 #endif				/* IB_SRPT_H */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
index 09e38f0..b8f75a2 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
@@ -755,7 +755,7 @@ struct cpl_abort_req_rss {
 struct cpl_abort_req_rss6 {
 	WR_HDR;
 	union opcode_tid ot;
-	__u32 srqidx_status;
+	__be32 srqidx_status;
 };
 
 #define ABORT_RSS_STATUS_S    0
@@ -785,7 +785,7 @@ struct cpl_abort_rpl_rss {
 
 struct cpl_abort_rpl_rss6 {
 	union opcode_tid ot;
-	__u32 srqidx_status;
+	__be32 srqidx_status;
 };
 
 struct cpl_abort_rpl {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index fe4ac40..3ce14d4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -433,6 +433,8 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
 	case MLX5_CMD_OP_FPGA_QUERY_QP:
 	case MLX5_CMD_OP_FPGA_QUERY_QP_COUNTERS:
 	case MLX5_CMD_OP_CREATE_GENERAL_OBJECT:
+	case MLX5_CMD_OP_MODIFY_GENERAL_OBJECT:
+	case MLX5_CMD_OP_QUERY_GENERAL_OBJECT:
 		*status = MLX5_DRIVER_STATUS_ABORTED;
 		*synd = MLX5_DRIVER_SYND;
 		return -EIO;
@@ -612,6 +614,9 @@ const char *mlx5_command_str(int command)
 	MLX5_COMMAND_STR_CASE(ARM_XRQ);
 	MLX5_COMMAND_STR_CASE(CREATE_GENERAL_OBJECT);
 	MLX5_COMMAND_STR_CASE(DESTROY_GENERAL_OBJECT);
+	MLX5_COMMAND_STR_CASE(MODIFY_GENERAL_OBJECT);
+	MLX5_COMMAND_STR_CASE(QUERY_GENERAL_OBJECT);
+	MLX5_COMMAND_STR_CASE(QUERY_MODIFY_HEADER_CONTEXT);
 	default: return "unknown command opcode";
 	}
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c
index b3820a3..0f11fff 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c
@@ -240,6 +240,9 @@ const char *parse_fs_dst(struct trace_seq *p,
 	case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE:
 		trace_seq_printf(p, "ft=%p\n", dst->ft);
 		break;
+	case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM:
+		trace_seq_printf(p, "ft_num=%u\n", dst->ft_num);
+		break;
 	case MLX5_FLOW_DESTINATION_TYPE_TIR:
 		trace_seq_printf(p, "tir=%u\n", dst->tir_num);
 		break;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index 6a62b84..8e01f81 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -368,18 +368,20 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
 		int list_size = 0;
 
 		list_for_each_entry(dst, &fte->node.children, node.list) {
-			unsigned int id;
+			unsigned int id, type = dst->dest_attr.type;
 
-			if (dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_COUNTER)
+			if (type == MLX5_FLOW_DESTINATION_TYPE_COUNTER)
 				continue;
 
-			MLX5_SET(dest_format_struct, in_dests, destination_type,
-				 dst->dest_attr.type);
-			if (dst->dest_attr.type ==
-			    MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) {
+			switch (type) {
+			case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM:
+				id = dst->dest_attr.ft_num;
+				type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+				break;
+			case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE:
 				id = dst->dest_attr.ft->id;
-			} else if (dst->dest_attr.type ==
-				   MLX5_FLOW_DESTINATION_TYPE_VPORT) {
+				break;
+			case MLX5_FLOW_DESTINATION_TYPE_VPORT:
 				id = dst->dest_attr.vport.num;
 				MLX5_SET(dest_format_struct, in_dests,
 					 destination_eswitch_owner_vhca_id_valid,
@@ -387,9 +389,13 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
 				MLX5_SET(dest_format_struct, in_dests,
 					 destination_eswitch_owner_vhca_id,
 					 dst->dest_attr.vport.vhca_id);
-			} else {
+				break;
+			default:
 				id = dst->dest_attr.tir_num;
 			}
+
+			MLX5_SET(dest_format_struct, in_dests, destination_type,
+				 type);
 			MLX5_SET(dest_format_struct, in_dests, destination_id, id);
 			in_dests += MLX5_ST_SZ_BYTES(dest_format_struct);
 			list_size++;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 261cb6a..f418541 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -310,89 +310,17 @@ static struct fs_prio *find_prio(struct mlx5_flow_namespace *ns,
 	return NULL;
 }
 
-static bool check_last_reserved(const u32 *match_criteria)
-{
-	char *match_criteria_reserved =
-		MLX5_ADDR_OF(fte_match_param, match_criteria, MLX5_FTE_MATCH_PARAM_RESERVED);
-
-	return	!match_criteria_reserved[0] &&
-		!memcmp(match_criteria_reserved, match_criteria_reserved + 1,
-			MLX5_FLD_SZ_BYTES(fte_match_param,
-					  MLX5_FTE_MATCH_PARAM_RESERVED) - 1);
-}
-
-static bool check_valid_mask(u8 match_criteria_enable, const u32 *match_criteria)
-{
-	if (match_criteria_enable & ~(
-		(1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_OUTER_HEADERS)   |
-		(1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS) |
-		(1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_INNER_HEADERS) |
-		(1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS_2)))
-		return false;
-
-	if (!(match_criteria_enable &
-	      1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_OUTER_HEADERS)) {
-		char *fg_type_mask = MLX5_ADDR_OF(fte_match_param,
-						  match_criteria, outer_headers);
-
-		if (fg_type_mask[0] ||
-		    memcmp(fg_type_mask, fg_type_mask + 1,
-			   MLX5_ST_SZ_BYTES(fte_match_set_lyr_2_4) - 1))
-			return false;
-	}
-
-	if (!(match_criteria_enable &
-	      1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS)) {
-		char *fg_type_mask = MLX5_ADDR_OF(fte_match_param,
-						  match_criteria, misc_parameters);
-
-		if (fg_type_mask[0] ||
-		    memcmp(fg_type_mask, fg_type_mask + 1,
-			   MLX5_ST_SZ_BYTES(fte_match_set_misc) - 1))
-			return false;
-	}
-
-	if (!(match_criteria_enable &
-	      1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_INNER_HEADERS)) {
-		char *fg_type_mask = MLX5_ADDR_OF(fte_match_param,
-						  match_criteria, inner_headers);
-
-		if (fg_type_mask[0] ||
-		    memcmp(fg_type_mask, fg_type_mask + 1,
-			   MLX5_ST_SZ_BYTES(fte_match_set_lyr_2_4) - 1))
-			return false;
-	}
-
-	if (!(match_criteria_enable &
-	      1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS_2)) {
-		char *fg_type_mask = MLX5_ADDR_OF(fte_match_param,
-						  match_criteria, misc_parameters_2);
-
-		if (fg_type_mask[0] ||
-		    memcmp(fg_type_mask, fg_type_mask + 1,
-			   MLX5_ST_SZ_BYTES(fte_match_set_misc2) - 1))
-			return false;
-	}
-
-	return check_last_reserved(match_criteria);
-}
-
 static bool check_valid_spec(const struct mlx5_flow_spec *spec)
 {
 	int i;
 
-	if (!check_valid_mask(spec->match_criteria_enable, spec->match_criteria)) {
-		pr_warn("mlx5_core: Match criteria given mismatches match_criteria_enable\n");
-		return false;
-	}
-
 	for (i = 0; i < MLX5_ST_SZ_DW_MATCH_PARAM; i++)
 		if (spec->match_value[i] & ~spec->match_criteria[i]) {
 			pr_warn("mlx5_core: match_value differs from match_criteria\n");
 			return false;
 		}
 
-	return check_last_reserved(spec->match_value);
+	return true;
 }
 
 static struct mlx5_flow_root_namespace *find_root(struct fs_node *node)
@@ -1159,9 +1087,6 @@ struct mlx5_flow_group *mlx5_create_flow_group(struct mlx5_flow_table *ft,
 	struct mlx5_flow_group *fg;
 	int err;
 
-	if (!check_valid_mask(match_criteria_enable, match_criteria))
-		return ERR_PTR(-EINVAL);
-
 	if (ft->autogroup.active)
 		return ERR_PTR(-EPERM);
 
@@ -1432,7 +1357,9 @@ static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1,
 		    (d1->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE &&
 		     d1->ft == d2->ft) ||
 		    (d1->type == MLX5_FLOW_DESTINATION_TYPE_TIR &&
-		     d1->tir_num == d2->tir_num))
+		     d1->tir_num == d2->tir_num) ||
+		    (d1->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM &&
+		     d1->ft_num == d2->ft_num))
 			return true;
 	}
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
index b7c21eb..e3797a4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
@@ -584,6 +584,22 @@ static int mlx5i_check_required_hca_cap(struct mlx5_core_dev *mdev)
 	return 0;
 }
 
+static void mlx5_rdma_netdev_free(struct net_device *netdev)
+{
+	struct mlx5e_priv *priv = mlx5i_epriv(netdev);
+	struct mlx5i_priv *ipriv = priv->ppriv;
+	const struct mlx5e_profile *profile = priv->profile;
+
+	mlx5e_detach_netdev(priv);
+	profile->cleanup(priv);
+	destroy_workqueue(priv->wq);
+
+	if (!ipriv->sub_interface) {
+		mlx5i_pkey_qpn_ht_cleanup(netdev);
+		mlx5e_destroy_mdev_resources(priv->mdev);
+	}
+}
+
 struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev,
 					  struct ib_device *ibdev,
 					  const char *name,
@@ -657,6 +673,9 @@ struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev,
 	rn->detach_mcast = mlx5i_detach_mcast;
 	rn->set_id = mlx5i_set_pkey_index;
 
+	netdev->priv_destructor = mlx5_rdma_netdev_free;
+	netdev->needs_free_netdev = 1;
+
 	return netdev;
 
 destroy_ht:
@@ -669,21 +688,3 @@ struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev,
 	return NULL;
 }
 EXPORT_SYMBOL(mlx5_rdma_netdev_alloc);
-
-void mlx5_rdma_netdev_free(struct net_device *netdev)
-{
-	struct mlx5e_priv *priv = mlx5i_epriv(netdev);
-	struct mlx5i_priv *ipriv = priv->ppriv;
-	const struct mlx5e_profile *profile = priv->profile;
-
-	mlx5e_detach_netdev(priv);
-	profile->cleanup(priv);
-	destroy_workqueue(priv->wq);
-
-	if (!ipriv->sub_interface) {
-		mlx5i_pkey_qpn_ht_cleanup(netdev);
-		mlx5e_destroy_mdev_resources(priv->mdev);
-	}
-	free_netdev(netdev);
-}
-EXPORT_SYMBOL(mlx5_rdma_netdev_free);
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 0805fa6..dc04201 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -378,7 +378,7 @@ nvme_rdma_find_get_device(struct rdma_cm_id *cm_id)
 	}
 
 	ndev->num_inline_segments = min(NVME_RDMA_MAX_INLINE_SEGMENTS,
-					ndev->dev->attrs.max_sge - 1);
+					ndev->dev->attrs.max_send_sge - 1);
 	list_add(&ndev->entry, &device_list);
 out_unlock:
 	mutex_unlock(&device_list_mutex);
@@ -1093,7 +1093,6 @@ static void nvme_rdma_inv_rkey_done(struct ib_cq *cq, struct ib_wc *wc)
 static int nvme_rdma_inv_rkey(struct nvme_rdma_queue *queue,
 		struct nvme_rdma_request *req)
 {
-	struct ib_send_wr *bad_wr;
 	struct ib_send_wr wr = {
 		.opcode		    = IB_WR_LOCAL_INV,
 		.next		    = NULL,
@@ -1105,7 +1104,7 @@ static int nvme_rdma_inv_rkey(struct nvme_rdma_queue *queue,
 	req->reg_cqe.done = nvme_rdma_inv_rkey_done;
 	wr.wr_cqe = &req->reg_cqe;
 
-	return ib_post_send(queue->qp, &wr, &bad_wr);
+	return ib_post_send(queue->qp, &wr, NULL);
 }
 
 static void nvme_rdma_unmap_data(struct nvme_rdma_queue *queue,
@@ -1308,7 +1307,7 @@ static int nvme_rdma_post_send(struct nvme_rdma_queue *queue,
 		struct nvme_rdma_qe *qe, struct ib_sge *sge, u32 num_sge,
 		struct ib_send_wr *first)
 {
-	struct ib_send_wr wr, *bad_wr;
+	struct ib_send_wr wr;
 	int ret;
 
 	sge->addr   = qe->dma;
@@ -1327,7 +1326,7 @@ static int nvme_rdma_post_send(struct nvme_rdma_queue *queue,
 	else
 		first = &wr;
 
-	ret = ib_post_send(queue->qp, first, &bad_wr);
+	ret = ib_post_send(queue->qp, first, NULL);
 	if (unlikely(ret)) {
 		dev_err(queue->ctrl->ctrl.device,
 			     "%s failed with error code %d\n", __func__, ret);
@@ -1338,7 +1337,7 @@ static int nvme_rdma_post_send(struct nvme_rdma_queue *queue,
 static int nvme_rdma_post_recv(struct nvme_rdma_queue *queue,
 		struct nvme_rdma_qe *qe)
 {
-	struct ib_recv_wr wr, *bad_wr;
+	struct ib_recv_wr wr;
 	struct ib_sge list;
 	int ret;
 
@@ -1353,7 +1352,7 @@ static int nvme_rdma_post_recv(struct nvme_rdma_queue *queue,
 	wr.sg_list  = &list;
 	wr.num_sge  = 1;
 
-	ret = ib_post_recv(queue->qp, &wr, &bad_wr);
+	ret = ib_post_recv(queue->qp, &wr, NULL);
 	if (unlikely(ret)) {
 		dev_err(queue->ctrl->ctrl.device,
 			"%s failed with error code %d\n", __func__, ret);
diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index e7f43d1..3533e91 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -435,7 +435,6 @@ static void nvmet_rdma_free_rsps(struct nvmet_rdma_queue *queue)
 static int nvmet_rdma_post_recv(struct nvmet_rdma_device *ndev,
 		struct nvmet_rdma_cmd *cmd)
 {
-	struct ib_recv_wr *bad_wr;
 	int ret;
 
 	ib_dma_sync_single_for_device(ndev->device,
@@ -443,9 +442,9 @@ static int nvmet_rdma_post_recv(struct nvmet_rdma_device *ndev,
 		DMA_FROM_DEVICE);
 
 	if (ndev->srq)
-		ret = ib_post_srq_recv(ndev->srq, &cmd->wr, &bad_wr);
+		ret = ib_post_srq_recv(ndev->srq, &cmd->wr, NULL);
 	else
-		ret = ib_post_recv(cmd->queue->cm_id->qp, &cmd->wr, &bad_wr);
+		ret = ib_post_recv(cmd->queue->cm_id->qp, &cmd->wr, NULL);
 
 	if (unlikely(ret))
 		pr_err("post_recv cmd failed\n");
@@ -532,7 +531,7 @@ static void nvmet_rdma_queue_response(struct nvmet_req *req)
 	struct nvmet_rdma_rsp *rsp =
 		container_of(req, struct nvmet_rdma_rsp, req);
 	struct rdma_cm_id *cm_id = rsp->queue->cm_id;
-	struct ib_send_wr *first_wr, *bad_wr;
+	struct ib_send_wr *first_wr;
 
 	if (rsp->flags & NVMET_RDMA_REQ_INVALIDATE_RKEY) {
 		rsp->send_wr.opcode = IB_WR_SEND_WITH_INV;
@@ -553,7 +552,7 @@ static void nvmet_rdma_queue_response(struct nvmet_req *req)
 		rsp->send_sge.addr, rsp->send_sge.length,
 		DMA_TO_DEVICE);
 
-	if (unlikely(ib_post_send(cm_id->qp, first_wr, &bad_wr))) {
+	if (unlikely(ib_post_send(cm_id->qp, first_wr, NULL))) {
 		pr_err("sending cmd response failed\n");
 		nvmet_rdma_release_rsp(rsp);
 	}
@@ -892,7 +891,7 @@ nvmet_rdma_find_get_device(struct rdma_cm_id *cm_id)
 
 	inline_page_count = num_pages(port->inline_data_size);
 	inline_sge_count = max(cm_id->device->attrs.max_sge_rd,
-				cm_id->device->attrs.max_sge) - 1;
+				cm_id->device->attrs.max_recv_sge) - 1;
 	if (inline_page_count > inline_sge_count) {
 		pr_warn("inline_data_size %d cannot be supported by device %s. Reducing to %lu.\n",
 			port->inline_data_size, cm_id->device->name,
@@ -969,7 +968,7 @@ static int nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue *queue)
 	qp_attr.cap.max_send_wr = queue->send_queue_size + 1;
 	qp_attr.cap.max_rdma_ctxs = queue->send_queue_size;
 	qp_attr.cap.max_send_sge = max(ndev->device->attrs.max_sge_rd,
-					ndev->device->attrs.max_sge);
+					ndev->device->attrs.max_send_sge);
 
 	if (ndev->srq) {
 		qp_attr.srq = ndev->srq;
diff --git a/fs/cifs/smbdirect.c b/fs/cifs/smbdirect.c
index c55ea4e..5fdb9a5 100644
--- a/fs/cifs/smbdirect.c
+++ b/fs/cifs/smbdirect.c
@@ -802,7 +802,7 @@ static int smbd_ia_open(
  */
 static int smbd_post_send_negotiate_req(struct smbd_connection *info)
 {
-	struct ib_send_wr send_wr, *send_wr_fail;
+	struct ib_send_wr send_wr;
 	int rc = -ENOMEM;
 	struct smbd_request *request;
 	struct smbd_negotiate_req *packet;
@@ -854,7 +854,7 @@ static int smbd_post_send_negotiate_req(struct smbd_connection *info)
 
 	request->has_payload = false;
 	atomic_inc(&info->send_pending);
-	rc = ib_post_send(info->id->qp, &send_wr, &send_wr_fail);
+	rc = ib_post_send(info->id->qp, &send_wr, NULL);
 	if (!rc)
 		return 0;
 
@@ -1024,7 +1024,7 @@ static void smbd_destroy_header(struct smbd_connection *info,
 static int smbd_post_send(struct smbd_connection *info,
 		struct smbd_request *request, bool has_payload)
 {
-	struct ib_send_wr send_wr, *send_wr_fail;
+	struct ib_send_wr send_wr;
 	int rc, i;
 
 	for (i = 0; i < request->num_sge; i++) {
@@ -1055,7 +1055,7 @@ static int smbd_post_send(struct smbd_connection *info,
 		atomic_inc(&info->send_pending);
 	}
 
-	rc = ib_post_send(info->id->qp, &send_wr, &send_wr_fail);
+	rc = ib_post_send(info->id->qp, &send_wr, NULL);
 	if (rc) {
 		log_rdma_send(ERR, "ib_post_send failed rc=%d\n", rc);
 		if (has_payload) {
@@ -1184,7 +1184,7 @@ static int smbd_post_send_data(
 static int smbd_post_recv(
 		struct smbd_connection *info, struct smbd_response *response)
 {
-	struct ib_recv_wr recv_wr, *recv_wr_fail = NULL;
+	struct ib_recv_wr recv_wr;
 	int rc = -EIO;
 
 	response->sge.addr = ib_dma_map_single(
@@ -1203,7 +1203,7 @@ static int smbd_post_recv(
 	recv_wr.sg_list = &response->sge;
 	recv_wr.num_sge = 1;
 
-	rc = ib_post_recv(info->id->qp, &recv_wr, &recv_wr_fail);
+	rc = ib_post_recv(info->id->qp, &recv_wr, NULL);
 	if (rc) {
 		ib_dma_unmap_single(info->id->device, response->sge.addr,
 				    response->sge.length, DMA_FROM_DEVICE);
@@ -1662,9 +1662,16 @@ static struct smbd_connection *_smbd_get_connection(
 	info->max_receive_size = smbd_max_receive_size;
 	info->keep_alive_interval = smbd_keep_alive_interval;
 
-	if (info->id->device->attrs.max_sge < SMBDIRECT_MAX_SGE) {
-		log_rdma_event(ERR, "warning: device max_sge = %d too small\n",
-			info->id->device->attrs.max_sge);
+	if (info->id->device->attrs.max_send_sge < SMBDIRECT_MAX_SGE) {
+		log_rdma_event(ERR,
+			"warning: device max_send_sge = %d too small\n",
+			info->id->device->attrs.max_send_sge);
+		log_rdma_event(ERR, "Queue Pair creation may fail\n");
+	}
+	if (info->id->device->attrs.max_recv_sge < SMBDIRECT_MAX_SGE) {
+		log_rdma_event(ERR,
+			"warning: device max_recv_sge = %d too small\n",
+			info->id->device->attrs.max_recv_sge);
 		log_rdma_event(ERR, "Queue Pair creation may fail\n");
 	}
 
@@ -2473,7 +2480,6 @@ struct smbd_mr *smbd_register_mr(
 	int rc, i;
 	enum dma_data_direction dir;
 	struct ib_reg_wr *reg_wr;
-	struct ib_send_wr *bad_wr;
 
 	if (num_pages > info->max_frmr_depth) {
 		log_rdma_mr(ERR, "num_pages=%d max_frmr_depth=%d\n",
@@ -2547,7 +2553,7 @@ struct smbd_mr *smbd_register_mr(
 	 * on IB_WR_REG_MR. Hardware enforces a barrier and order of execution
 	 * on the next ib_post_send when we actaully send I/O to remote peer
 	 */
-	rc = ib_post_send(info->id->qp, &reg_wr->wr, &bad_wr);
+	rc = ib_post_send(info->id->qp, &reg_wr->wr, NULL);
 	if (!rc)
 		return smbdirect_mr;
 
@@ -2592,7 +2598,7 @@ static void local_inv_done(struct ib_cq *cq, struct ib_wc *wc)
  */
 int smbd_deregister_mr(struct smbd_mr *smbdirect_mr)
 {
-	struct ib_send_wr *wr, *bad_wr;
+	struct ib_send_wr *wr;
 	struct smbd_connection *info = smbdirect_mr->conn;
 	int rc = 0;
 
@@ -2607,7 +2613,7 @@ int smbd_deregister_mr(struct smbd_mr *smbdirect_mr)
 		wr->send_flags = IB_SEND_SIGNALED;
 
 		init_completion(&smbdirect_mr->invalidate_done);
-		rc = ib_post_send(info->id->qp, wr, &bad_wr);
+		rc = ib_post_send(info->id->qp, wr, NULL);
 		if (rc) {
 			log_rdma_mr(ERR, "ib_post_send failed rc=%x\n", rc);
 			smbd_disconnect_rdma_connection(info);
diff --git a/include/linux/idr.h b/include/linux/idr.h
index e856f4e..3e8215b 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -98,6 +98,17 @@ static inline void idr_set_cursor(struct idr *idr, unsigned int val)
  * period).
  */
 
+#define idr_lock(idr)		xa_lock(&(idr)->idr_rt)
+#define idr_unlock(idr)		xa_unlock(&(idr)->idr_rt)
+#define idr_lock_bh(idr)	xa_lock_bh(&(idr)->idr_rt)
+#define idr_unlock_bh(idr)	xa_unlock_bh(&(idr)->idr_rt)
+#define idr_lock_irq(idr)	xa_lock_irq(&(idr)->idr_rt)
+#define idr_unlock_irq(idr)	xa_unlock_irq(&(idr)->idr_rt)
+#define idr_lock_irqsave(idr, flags) \
+				xa_lock_irqsave(&(idr)->idr_rt, flags)
+#define idr_unlock_irqrestore(idr, flags) \
+				xa_unlock_irqrestore(&(idr)->idr_rt, flags)
+
 void idr_preload(gfp_t gfp_mask);
 
 int idr_alloc(struct idr *, void *ptr, int start, int end, gfp_t);
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 54f385c..7a45271 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -1235,14 +1235,11 @@ struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev,
 {
 	return ERR_PTR(-EOPNOTSUPP);
 }
-
-static inline void mlx5_rdma_netdev_free(struct net_device *netdev) {}
 #else
 struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev,
 					  struct ib_device *ibdev,
 					  const char *name,
 					  void (*setup)(struct net_device *));
-void mlx5_rdma_netdev_free(struct net_device *netdev);
 #endif /* CONFIG_MLX5_CORE_IPOIB */
 
 struct mlx5_profile {
diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index 71fb503..804516e 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -89,6 +89,7 @@ struct mlx5_flow_destination {
 	enum mlx5_flow_destination_type	type;
 	union {
 		u32			tir_num;
+		u32			ft_num;
 		struct mlx5_flow_table	*ft;
 		struct mlx5_fc		*counter;
 		struct {
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 6ead9c1..f043d65 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -82,6 +82,7 @@ enum {
 
 enum {
 	MLX5_OBJ_TYPE_UCTX = 0x0004,
+	MLX5_OBJ_TYPE_UMEM = 0x0005,
 };
 
 enum {
@@ -246,12 +247,15 @@ enum {
 	MLX5_CMD_OP_DEALLOC_ENCAP_HEADER          = 0x93e,
 	MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT   = 0x940,
 	MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT = 0x941,
+	MLX5_CMD_OP_QUERY_MODIFY_HEADER_CONTEXT   = 0x942,
 	MLX5_CMD_OP_FPGA_CREATE_QP                = 0x960,
 	MLX5_CMD_OP_FPGA_MODIFY_QP                = 0x961,
 	MLX5_CMD_OP_FPGA_QUERY_QP                 = 0x962,
 	MLX5_CMD_OP_FPGA_DESTROY_QP               = 0x963,
 	MLX5_CMD_OP_FPGA_QUERY_QP_COUNTERS        = 0x964,
 	MLX5_CMD_OP_CREATE_GENERAL_OBJECT         = 0xa00,
+	MLX5_CMD_OP_MODIFY_GENERAL_OBJECT         = 0xa01,
+	MLX5_CMD_OP_QUERY_GENERAL_OBJECT          = 0xa02,
 	MLX5_CMD_OP_DESTROY_GENERAL_OBJECT        = 0xa03,
 	MLX5_CMD_OP_MAX
 };
@@ -1185,6 +1189,7 @@ enum mlx5_flow_destination_type {
 
 	MLX5_FLOW_DESTINATION_TYPE_PORT         = 0x99,
 	MLX5_FLOW_DESTINATION_TYPE_COUNTER      = 0x100,
+	MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM = 0x101,
 };
 
 struct mlx5_ifc_dest_format_struct_bits {
diff --git a/include/linux/overflow.h b/include/linux/overflow.h
index 8712ff7..40b48e2 100644
--- a/include/linux/overflow.h
+++ b/include/linux/overflow.h
@@ -202,6 +202,37 @@
 
 #endif /* COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW */
 
+/** check_shl_overflow() - Calculate a left-shifted value and check overflow
+ *
+ * @a: Value to be shifted
+ * @s: How many bits left to shift
+ * @d: Pointer to where to store the result
+ *
+ * Computes *@d = (@a << @s)
+ *
+ * Returns true if '*d' cannot hold the result or when 'a << s' doesn't
+ * make sense. Example conditions:
+ * - 'a << s' causes bits to be lost when stored in *d.
+ * - 's' is garbage (e.g. negative) or so large that the result of
+ *   'a << s' is guaranteed to be 0.
+ * - 'a' is negative.
+ * - 'a << s' sets the sign bit, if any, in '*d'.
+ *
+ * '*d' will hold the results of the attempted shift, but is not
+ * considered "safe for use" if false is returned.
+ */
+#define check_shl_overflow(a, s, d) ({					\
+	typeof(a) _a = a;						\
+	typeof(s) _s = s;						\
+	typeof(d) _d = d;						\
+	u64 _a_full = _a;						\
+	unsigned int _to_shift =					\
+		_s >= 0 && _s < 8 * sizeof(*d) ? _s : 0;		\
+	*_d = (_a_full << _to_shift);					\
+	(_to_shift != _s || *_d < 0 || _a < 0 ||			\
+		(*_d >> _to_shift) != _a);				\
+})
+
 /**
  * array_size() - Calculate size of 2-dimensional array.
  *
diff --git a/include/rdma/ib.h b/include/rdma/ib.h
index 66dbed0..4f385ec 100644
--- a/include/rdma/ib.h
+++ b/include/rdma/ib.h
@@ -53,12 +53,12 @@ struct ib_addr {
 #define sib_interface_id	ib_u.uib_addr64[1]
 };
 
-static inline int ib_addr_any(const struct ib_addr *a)
+static inline bool ib_addr_any(const struct ib_addr *a)
 {
 	return ((a->sib_addr64[0] | a->sib_addr64[1]) == 0);
 }
 
-static inline int ib_addr_loopback(const struct ib_addr *a)
+static inline bool ib_addr_loopback(const struct ib_addr *a)
 {
 	return ((a->sib_addr32[0] | a->sib_addr32[1] |
 		 a->sib_addr32[2] | (a->sib_addr32[3] ^ htonl(1))) == 0);
diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h
index c2c8b1f..77c7908 100644
--- a/include/rdma/ib_addr.h
+++ b/include/rdma/ib_addr.h
@@ -58,6 +58,7 @@
  * @bound_dev_if:	An optional device interface index.
  * @transport:		The transport type used.
  * @net:		Network namespace containing the bound_dev_if net_dev.
+ * @sgid_attr:		GID attribute to use for identified SGID
  */
 struct rdma_dev_addr {
 	unsigned char src_dev_addr[MAX_ADDR_LEN];
@@ -67,6 +68,7 @@ struct rdma_dev_addr {
 	int bound_dev_if;
 	enum rdma_transport_type transport;
 	struct net *net;
+	const struct ib_gid_attr *sgid_attr;
 	enum rdma_network_type network;
 	int hoplimit;
 };
@@ -95,7 +97,7 @@ int rdma_translate_ip(const struct sockaddr *addr,
  *   or been canceled.  A status of 0 indicates success.
  * @context: User-specified context associated with the call.
  */
-int rdma_resolve_ip(struct sockaddr *src_addr, struct sockaddr *dst_addr,
+int rdma_resolve_ip(struct sockaddr *src_addr, const struct sockaddr *dst_addr,
 		    struct rdma_dev_addr *addr, int timeout_ms,
 		    void (*callback)(int status, struct sockaddr *src_addr,
 				     struct rdma_dev_addr *addr, void *context),
@@ -107,7 +109,7 @@ void rdma_copy_addr(struct rdma_dev_addr *dev_addr,
 		    const struct net_device *dev,
 		    const unsigned char *dst_dev_addr);
 
-int rdma_addr_size(struct sockaddr *addr);
+int rdma_addr_size(const struct sockaddr *addr);
 int rdma_addr_size_in6(struct sockaddr_in6 *addr);
 int rdma_addr_size_kss(struct __kernel_sockaddr_storage *addr);
 
diff --git a/include/rdma/ib_cache.h b/include/rdma/ib_cache.h
index a5f2498..3e11e7c 100644
--- a/include/rdma/ib_cache.h
+++ b/include/rdma/ib_cache.h
@@ -37,45 +37,23 @@
 
 #include <rdma/ib_verbs.h>
 
-/**
- * ib_get_cached_gid - Returns a cached GID table entry
- * @device: The device to query.
- * @port_num: The port number of the device to query.
- * @index: The index into the cached GID table to query.
- * @gid: The GID value found at the specified index.
- * @attr: The GID attribute found at the specified index (only in RoCE).
- *   NULL means ignore (output parameter).
- *
- * ib_get_cached_gid() fetches the specified GID table entry stored in
- * the local software cache.
- */
-int ib_get_cached_gid(struct ib_device    *device,
-		      u8                   port_num,
-		      int                  index,
-		      union ib_gid        *gid,
-		      struct ib_gid_attr  *attr);
+int rdma_query_gid(struct ib_device *device, u8 port_num, int index,
+		   union ib_gid *gid);
+const struct ib_gid_attr *rdma_find_gid(struct ib_device *device,
+					const union ib_gid *gid,
+					enum ib_gid_type gid_type,
+					struct net_device *ndev);
+const struct ib_gid_attr *rdma_find_gid_by_port(struct ib_device *ib_dev,
+						const union ib_gid *gid,
+						enum ib_gid_type gid_type,
+						u8 port,
+						struct net_device *ndev);
+const struct ib_gid_attr *rdma_find_gid_by_filter(
+	struct ib_device *device, const union ib_gid *gid, u8 port_num,
+	bool (*filter)(const union ib_gid *gid, const struct ib_gid_attr *,
+		       void *),
+	void *context);
 
-int ib_find_cached_gid(struct ib_device *device,
-		       const union ib_gid *gid,
-		       enum ib_gid_type gid_type,
-		       struct net_device *ndev,
-		       u8               *port_num,
-		       u16              *index);
-
-int ib_find_cached_gid_by_port(struct ib_device *device,
-			       const union ib_gid *gid,
-			       enum ib_gid_type gid_type,
-			       u8               port_num,
-			       struct net_device *ndev,
-			       u16              *index);
-
-int ib_find_gid_by_filter(struct ib_device *device,
-			  const union ib_gid *gid,
-			  u8 port_num,
-			  bool (*filter)(const union ib_gid *gid,
-					 const struct ib_gid_attr *,
-					 void *),
-			  void *context, u16 *index);
 /**
  * ib_get_cached_pkey - Returns a cached PKey table entry
  * @device: The device to query.
@@ -150,4 +128,33 @@ int ib_get_cached_port_state(struct ib_device *device,
 			      enum ib_port_state *port_active);
 
 bool rdma_is_zero_gid(const union ib_gid *gid);
+const struct ib_gid_attr *rdma_get_gid_attr(struct ib_device *device,
+					    u8 port_num, int index);
+void rdma_put_gid_attr(const struct ib_gid_attr *attr);
+void rdma_hold_gid_attr(const struct ib_gid_attr *attr);
+
+/*
+ * This is to be removed. It only exists to make merging rdma and smc simpler.
+ */
+static inline __deprecated int ib_query_gid(struct ib_device *device,
+					    u8 port_num, int index,
+					    union ib_gid *gid,
+					    struct ib_gid_attr *attr_out)
+{
+	const struct ib_gid_attr *attr;
+
+	memset(attr_out, 0, sizeof(*attr_out));
+	attr = rdma_get_gid_attr(device, port_num, index);
+	if (IS_ERR(attr))
+		return PTR_ERR(attr);
+
+	if (attr->ndev)
+		dev_hold(attr->ndev);
+	*attr_out = *attr;
+
+	rdma_put_gid_attr(attr);
+
+	return 0;
+}
+
 #endif /* _IB_CACHE_H */
diff --git a/include/rdma/ib_cm.h b/include/rdma/ib_cm.h
index 7979cb0..c10f4b5 100644
--- a/include/rdma/ib_cm.h
+++ b/include/rdma/ib_cm.h
@@ -120,6 +120,13 @@ struct ib_cm_req_event_param {
 	struct sa_path_rec	*primary_path;
 	struct sa_path_rec	*alternate_path;
 
+	/*
+	 * SGID attribute of the primary path. Currently only
+	 * useful for RoCE. Alternate path GID attributes
+	 * are not yet supported.
+	 */
+	const struct ib_gid_attr *ppath_sgid_attr;
+
 	__be64			remote_ca_guid;
 	u32			remote_qkey;
 	u32			remote_qpn;
@@ -226,6 +233,12 @@ struct ib_cm_apr_event_param {
 struct ib_cm_sidr_req_event_param {
 	struct ib_cm_id		*listen_id;
 	__be64			service_id;
+
+	/*
+	 * SGID attribute of the request. Currently only
+	 * useful for RoCE.
+	 */
+	const struct ib_gid_attr *sgid_attr;
 	/* P_Key that was used by the GMP's BTH header */
 	u16			bth_pkey;
 	u8			port;
@@ -246,6 +259,7 @@ struct ib_cm_sidr_rep_event_param {
 	u32			qkey;
 	u32			qpn;
 	void			*info;
+	const struct ib_gid_attr *sgid_attr;
 	u8			info_len;
 };
 
@@ -297,7 +311,7 @@ struct ib_cm_event {
  * destroy the @cm_id after the callback completes.
  */
 typedef int (*ib_cm_handler)(struct ib_cm_id *cm_id,
-			     struct ib_cm_event *event);
+			     const struct ib_cm_event *event);
 
 struct ib_cm_id {
 	ib_cm_handler		cm_handler;
@@ -365,6 +379,7 @@ struct ib_cm_id *ib_cm_insert_listen(struct ib_device *device,
 struct ib_cm_req_param {
 	struct sa_path_rec	*primary_path;
 	struct sa_path_rec	*alternate_path;
+	const struct ib_gid_attr *ppath_sgid_attr;
 	__be64			service_id;
 	u32			qp_num;
 	enum ib_qp_type		qp_type;
@@ -566,6 +581,7 @@ int ib_send_cm_apr(struct ib_cm_id *cm_id,
 
 struct ib_cm_sidr_req_param {
 	struct sa_path_rec	*path;
+	const struct ib_gid_attr *sgid_attr;
 	__be64			service_id;
 	int			timeout_ms;
 	const void		*private_data;
diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h
index 2f4f176..f6ba366 100644
--- a/include/rdma/ib_mad.h
+++ b/include/rdma/ib_mad.h
@@ -262,6 +262,39 @@ struct ib_class_port_info {
 	__be32			trap_qkey;
 };
 
+/* PortInfo CapabilityMask */
+enum ib_port_capability_mask_bits {
+	IB_PORT_SM = 1 << 1,
+	IB_PORT_NOTICE_SUP = 1 << 2,
+	IB_PORT_TRAP_SUP = 1 << 3,
+	IB_PORT_OPT_IPD_SUP = 1 << 4,
+	IB_PORT_AUTO_MIGR_SUP = 1 << 5,
+	IB_PORT_SL_MAP_SUP = 1 << 6,
+	IB_PORT_MKEY_NVRAM = 1 << 7,
+	IB_PORT_PKEY_NVRAM = 1 << 8,
+	IB_PORT_LED_INFO_SUP = 1 << 9,
+	IB_PORT_SM_DISABLED = 1 << 10,
+	IB_PORT_SYS_IMAGE_GUID_SUP = 1 << 11,
+	IB_PORT_PKEY_SW_EXT_PORT_TRAP_SUP = 1 << 12,
+	IB_PORT_EXTENDED_SPEEDS_SUP = 1 << 14,
+	IB_PORT_CM_SUP = 1 << 16,
+	IB_PORT_SNMP_TUNNEL_SUP = 1 << 17,
+	IB_PORT_REINIT_SUP = 1 << 18,
+	IB_PORT_DEVICE_MGMT_SUP = 1 << 19,
+	IB_PORT_VENDOR_CLASS_SUP = 1 << 20,
+	IB_PORT_DR_NOTICE_SUP = 1 << 21,
+	IB_PORT_CAP_MASK_NOTICE_SUP = 1 << 22,
+	IB_PORT_BOOT_MGMT_SUP = 1 << 23,
+	IB_PORT_LINK_LATENCY_SUP = 1 << 24,
+	IB_PORT_CLIENT_REG_SUP = 1 << 25,
+	IB_PORT_OTHER_LOCAL_CHANGES_SUP = 1 << 26,
+	IB_PORT_LINK_SPEED_WIDTH_TABLE_SUP = 1 << 27,
+	IB_PORT_VENDOR_SPECIFIC_MADS_TABLE_SUP = 1 << 28,
+	IB_PORT_MCAST_PKEY_TRAP_SUPPRESSION_SUP = 1 << 29,
+	IB_PORT_MCAST_FDB_TOP_SUP = 1 << 30,
+	IB_PORT_HIERARCHY_INFO_SUP = 1ULL << 31,
+};
+
 #define OPA_CLASS_PORT_INFO_PR_SUPPORT BIT(26)
 
 struct opa_class_port_info {
diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h
index bacb144..b6ddf2a 100644
--- a/include/rdma/ib_sa.h
+++ b/include/rdma/ib_sa.h
@@ -172,12 +172,7 @@ struct sa_path_rec_ib {
  */
 struct sa_path_rec_roce {
 	bool	route_resolved;
-	u8           dmac[ETH_ALEN];
-	/* ignored in IB */
-	int	     ifindex;
-	/* ignored in IB */
-	struct net  *net;
-
+	u8	dmac[ETH_ALEN];
 };
 
 struct sa_path_rec_opa {
@@ -556,13 +551,10 @@ int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num,
 			     enum ib_gid_type gid_type,
 			     struct rdma_ah_attr *ah_attr);
 
-/**
- * ib_init_ah_attr_from_path - Initialize address handle attributes based on
- *   an SA path record.
- */
 int ib_init_ah_attr_from_path(struct ib_device *device, u8 port_num,
 			      struct sa_path_rec *rec,
-			      struct rdma_ah_attr *ah_attr);
+			      struct rdma_ah_attr *ah_attr,
+			      const struct ib_gid_attr *sgid_attr);
 
 /**
  * ib_sa_pack_path - Conert a path record from struct ib_sa_path_rec
@@ -667,45 +659,10 @@ static inline void sa_path_set_dmac_zero(struct sa_path_rec *rec)
 		eth_zero_addr(rec->roce.dmac);
 }
 
-static inline void sa_path_set_ifindex(struct sa_path_rec *rec, int ifindex)
-{
-	if (sa_path_is_roce(rec))
-		rec->roce.ifindex = ifindex;
-}
-
-static inline void sa_path_set_ndev(struct sa_path_rec *rec, struct net *net)
-{
-	if (sa_path_is_roce(rec))
-		rec->roce.net = net;
-}
-
 static inline u8 *sa_path_get_dmac(struct sa_path_rec *rec)
 {
 	if (sa_path_is_roce(rec))
 		return rec->roce.dmac;
 	return NULL;
 }
-
-static inline int sa_path_get_ifindex(struct sa_path_rec *rec)
-{
-	if (sa_path_is_roce(rec))
-		return rec->roce.ifindex;
-	return 0;
-}
-
-static inline struct net *sa_path_get_ndev(struct sa_path_rec *rec)
-{
-	if (sa_path_is_roce(rec))
-		return rec->roce.net;
-	return NULL;
-}
-
-static inline struct net_device *ib_get_ndev_from_path(struct sa_path_rec *rec)
-{
-	return sa_path_get_ndev(rec) ?
-		dev_get_by_index(sa_path_get_ndev(rec),
-				 sa_path_get_ifindex(rec))
-		: NULL;
-}
-
 #endif /* IB_SA_H */
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 6c00399..e950c2a 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -94,6 +94,7 @@ enum ib_gid_type {
 struct ib_gid_attr {
 	struct net_device	*ndev;
 	struct ib_device	*device;
+	union ib_gid		gid;
 	enum ib_gid_type	gid_type;
 	u16			index;
 	u8			port_num;
@@ -148,13 +149,13 @@ static inline enum ib_gid_type ib_network_to_gid_type(enum rdma_network_type net
 	return IB_GID_TYPE_IB;
 }
 
-static inline enum rdma_network_type ib_gid_to_network_type(enum ib_gid_type gid_type,
-							    union ib_gid *gid)
+static inline enum rdma_network_type
+rdma_gid_attr_network_type(const struct ib_gid_attr *attr)
 {
-	if (gid_type == IB_GID_TYPE_IB)
+	if (attr->gid_type == IB_GID_TYPE_IB)
 		return RDMA_NETWORK_IB;
 
-	if (ipv6_addr_v4mapped((struct in6_addr *)gid))
+	if (ipv6_addr_v4mapped((struct in6_addr *)&attr->gid))
 		return RDMA_NETWORK_IPV4;
 	else
 		return RDMA_NETWORK_IPV6;
@@ -344,7 +345,8 @@ struct ib_device_attr {
 	int			max_qp;
 	int			max_qp_wr;
 	u64			device_cap_flags;
-	int			max_sge;
+	int			max_send_sge;
+	int			max_recv_sge;
 	int			max_sge_rd;
 	int			max_cq;
 	int			max_cqe;
@@ -430,33 +432,6 @@ enum ib_port_state {
 	IB_PORT_ACTIVE_DEFER	= 5
 };
 
-enum ib_port_cap_flags {
-	IB_PORT_SM				= 1 <<  1,
-	IB_PORT_NOTICE_SUP			= 1 <<  2,
-	IB_PORT_TRAP_SUP			= 1 <<  3,
-	IB_PORT_OPT_IPD_SUP                     = 1 <<  4,
-	IB_PORT_AUTO_MIGR_SUP			= 1 <<  5,
-	IB_PORT_SL_MAP_SUP			= 1 <<  6,
-	IB_PORT_MKEY_NVRAM			= 1 <<  7,
-	IB_PORT_PKEY_NVRAM			= 1 <<  8,
-	IB_PORT_LED_INFO_SUP			= 1 <<  9,
-	IB_PORT_SM_DISABLED			= 1 << 10,
-	IB_PORT_SYS_IMAGE_GUID_SUP		= 1 << 11,
-	IB_PORT_PKEY_SW_EXT_PORT_TRAP_SUP	= 1 << 12,
-	IB_PORT_EXTENDED_SPEEDS_SUP             = 1 << 14,
-	IB_PORT_CM_SUP				= 1 << 16,
-	IB_PORT_SNMP_TUNNEL_SUP			= 1 << 17,
-	IB_PORT_REINIT_SUP			= 1 << 18,
-	IB_PORT_DEVICE_MGMT_SUP			= 1 << 19,
-	IB_PORT_VENDOR_CLASS_SUP		= 1 << 20,
-	IB_PORT_DR_NOTICE_SUP			= 1 << 21,
-	IB_PORT_CAP_MASK_NOTICE_SUP		= 1 << 22,
-	IB_PORT_BOOT_MGMT_SUP			= 1 << 23,
-	IB_PORT_LINK_LATENCY_SUP		= 1 << 24,
-	IB_PORT_CLIENT_REG_SUP			= 1 << 25,
-	IB_PORT_IP_BASED_GIDS			= 1 << 26,
-};
-
 enum ib_port_width {
 	IB_WIDTH_1X	= 1,
 	IB_WIDTH_4X	= 2,
@@ -554,6 +529,7 @@ static inline struct rdma_hw_stats *rdma_alloc_hw_stats_struct(
 #define RDMA_CORE_CAP_AF_IB             0x00001000
 #define RDMA_CORE_CAP_ETH_AH            0x00002000
 #define RDMA_CORE_CAP_OPA_AH            0x00004000
+#define RDMA_CORE_CAP_IB_GRH_REQUIRED   0x00008000
 
 /* Protocol                             0xFFF00000 */
 #define RDMA_CORE_CAP_PROT_IB           0x00100000
@@ -563,6 +539,10 @@ static inline struct rdma_hw_stats *rdma_alloc_hw_stats_struct(
 #define RDMA_CORE_CAP_PROT_RAW_PACKET   0x01000000
 #define RDMA_CORE_CAP_PROT_USNIC        0x02000000
 
+#define RDMA_CORE_PORT_IB_GRH_REQUIRED (RDMA_CORE_CAP_IB_GRH_REQUIRED \
+					| RDMA_CORE_CAP_PROT_ROCE     \
+					| RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP)
+
 #define RDMA_CORE_PORT_IBA_IB          (RDMA_CORE_CAP_PROT_IB  \
 					| RDMA_CORE_CAP_IB_MAD \
 					| RDMA_CORE_CAP_IB_SMI \
@@ -595,6 +575,8 @@ struct ib_port_attr {
 	enum ib_mtu		max_mtu;
 	enum ib_mtu		active_mtu;
 	int			gid_tbl_len;
+	unsigned int		ip_gids:1;
+	/* This is the value from PortInfo CapabilityMask, defined by IBA */
 	u32			port_cap_flags;
 	u32			max_msg_sz;
 	u32			bad_pkey_cntr;
@@ -610,7 +592,6 @@ struct ib_port_attr {
 	u8			active_width;
 	u8			active_speed;
 	u8                      phys_state;
-	bool			grh_required;
 };
 
 enum ib_device_modify_flags {
@@ -689,6 +670,7 @@ struct ib_event_handler {
 	} while (0)
 
 struct ib_global_route {
+	const struct ib_gid_attr *sgid_attr;
 	union ib_gid	dgid;
 	u32		flow_label;
 	u8		sgid_index;
@@ -1370,7 +1352,7 @@ struct ib_rdma_wr {
 	u32			rkey;
 };
 
-static inline struct ib_rdma_wr *rdma_wr(struct ib_send_wr *wr)
+static inline const struct ib_rdma_wr *rdma_wr(const struct ib_send_wr *wr)
 {
 	return container_of(wr, struct ib_rdma_wr, wr);
 }
@@ -1385,7 +1367,7 @@ struct ib_atomic_wr {
 	u32			rkey;
 };
 
-static inline struct ib_atomic_wr *atomic_wr(struct ib_send_wr *wr)
+static inline const struct ib_atomic_wr *atomic_wr(const struct ib_send_wr *wr)
 {
 	return container_of(wr, struct ib_atomic_wr, wr);
 }
@@ -1402,7 +1384,7 @@ struct ib_ud_wr {
 	u8			port_num;   /* valid for DR SMPs on switch only */
 };
 
-static inline struct ib_ud_wr *ud_wr(struct ib_send_wr *wr)
+static inline const struct ib_ud_wr *ud_wr(const struct ib_send_wr *wr)
 {
 	return container_of(wr, struct ib_ud_wr, wr);
 }
@@ -1414,7 +1396,7 @@ struct ib_reg_wr {
 	int			access;
 };
 
-static inline struct ib_reg_wr *reg_wr(struct ib_send_wr *wr)
+static inline const struct ib_reg_wr *reg_wr(const struct ib_send_wr *wr)
 {
 	return container_of(wr, struct ib_reg_wr, wr);
 }
@@ -1427,7 +1409,8 @@ struct ib_sig_handover_wr {
 	struct ib_sge	       *prot;
 };
 
-static inline struct ib_sig_handover_wr *sig_handover_wr(struct ib_send_wr *wr)
+static inline const struct ib_sig_handover_wr *
+sig_handover_wr(const struct ib_send_wr *wr)
 {
 	return container_of(wr, struct ib_sig_handover_wr, wr);
 }
@@ -1443,14 +1426,16 @@ struct ib_recv_wr {
 };
 
 enum ib_access_flags {
-	IB_ACCESS_LOCAL_WRITE	= 1,
-	IB_ACCESS_REMOTE_WRITE	= (1<<1),
-	IB_ACCESS_REMOTE_READ	= (1<<2),
-	IB_ACCESS_REMOTE_ATOMIC	= (1<<3),
-	IB_ACCESS_MW_BIND	= (1<<4),
-	IB_ZERO_BASED		= (1<<5),
-	IB_ACCESS_ON_DEMAND     = (1<<6),
-	IB_ACCESS_HUGETLB	= (1<<7),
+	IB_ACCESS_LOCAL_WRITE = IB_UVERBS_ACCESS_LOCAL_WRITE,
+	IB_ACCESS_REMOTE_WRITE = IB_UVERBS_ACCESS_REMOTE_WRITE,
+	IB_ACCESS_REMOTE_READ = IB_UVERBS_ACCESS_REMOTE_READ,
+	IB_ACCESS_REMOTE_ATOMIC = IB_UVERBS_ACCESS_REMOTE_ATOMIC,
+	IB_ACCESS_MW_BIND = IB_UVERBS_ACCESS_MW_BIND,
+	IB_ZERO_BASED = IB_UVERBS_ACCESS_ZERO_BASED,
+	IB_ACCESS_ON_DEMAND = IB_UVERBS_ACCESS_ON_DEMAND,
+	IB_ACCESS_HUGETLB = IB_UVERBS_ACCESS_HUGETLB,
+
+	IB_ACCESS_SUPPORTED = ((IB_ACCESS_HUGETLB << 1) - 1)
 };
 
 /*
@@ -1473,14 +1458,17 @@ struct ib_fmr_attr {
 struct ib_umem;
 
 enum rdma_remove_reason {
-	/* Userspace requested uobject deletion. Call could fail */
+	/*
+	 * Userspace requested uobject deletion or initial try
+	 * to remove uobject via cleanup. Call could fail
+	 */
 	RDMA_REMOVE_DESTROY,
 	/* Context deletion. This call should delete the actual object itself */
 	RDMA_REMOVE_CLOSE,
 	/* Driver is being hot-unplugged. This call should delete the actual object itself */
 	RDMA_REMOVE_DRIVER_REMOVE,
-	/* Context is being cleaned-up, but commit was just completed */
-	RDMA_REMOVE_DURING_CLEANUP,
+	/* uobj is being cleaned-up before being committed */
+	RDMA_REMOVE_ABORT,
 };
 
 struct ib_rdmacg_object {
@@ -1492,14 +1480,14 @@ struct ib_rdmacg_object {
 struct ib_ucontext {
 	struct ib_device       *device;
 	struct ib_uverbs_file  *ufile;
+	/*
+	 * 'closing' can be read by the driver only during a destroy callback,
+	 * it is set when we are closing the file descriptor and indicates
+	 * that mm_sem may be locked.
+	 */
 	int			closing;
 
-	/* locking the uobjects_list */
-	struct mutex		uobjects_lock;
-	struct list_head	uobjects;
-	/* protects cleanup process from other actions */
-	struct rw_semaphore	cleanup_rwsem;
-	enum rdma_remove_reason cleanup_reason;
+	bool cleanup_retryable;
 
 	struct pid             *tgid;
 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
@@ -1524,6 +1512,9 @@ struct ib_ucontext {
 
 struct ib_uobject {
 	u64			user_handle;	/* handle given to us by userspace */
+	/* ufile & ucontext owning this object */
+	struct ib_uverbs_file  *ufile;
+	/* FIXME, save memory: ufile->context == context */
 	struct ib_ucontext     *context;	/* associated user context */
 	void		       *object;		/* containing object */
 	struct list_head	list;		/* link to context's list */
@@ -1533,13 +1524,7 @@ struct ib_uobject {
 	atomic_t		usecnt;		/* protects exclusive access */
 	struct rcu_head		rcu;		/* kfree_rcu() overhead */
 
-	const struct uverbs_obj_type *type;
-};
-
-struct ib_uobject_file {
-	struct ib_uobject	uobj;
-	/* ufile contains the lock between context release and file close */
-	struct ib_uverbs_file	*ufile;
+	const struct uverbs_api_object *uapi_object;
 };
 
 struct ib_udata {
@@ -1578,6 +1563,7 @@ struct ib_ah {
 	struct ib_device	*device;
 	struct ib_pd		*pd;
 	struct ib_uobject	*uobject;
+	const struct ib_gid_attr *sgid_attr;
 	enum rdma_ah_attr_type	type;
 };
 
@@ -1776,6 +1762,9 @@ struct ib_qp {
 	struct ib_uobject      *uobject;
 	void                  (*event_handler)(struct ib_event *, void *);
 	void		       *qp_context;
+	/* sgid_attrs associated with the AV's */
+	const struct ib_gid_attr *av_sgid_attr;
+	const struct ib_gid_attr *alt_path_sgid_attr;
 	u32			qp_num;
 	u32			max_write_sge;
 	u32			max_read_sge;
@@ -2098,6 +2087,7 @@ struct ib_flow_attr {
 
 struct ib_flow {
 	struct ib_qp		*qp;
+	struct ib_device	*device;
 	struct ib_uobject	*uobject;
 };
 
@@ -2213,7 +2203,11 @@ struct rdma_netdev {
 	struct ib_device  *hca;
 	u8                 port_num;
 
-	/* cleanup function must be specified */
+	/*
+	 * cleanup function must be specified.
+	 * FIXME: This is only used for OPA_VNIC and that usage should be
+	 * removed too.
+	 */
 	void (*free_rdma_netdev)(struct net_device *netdev);
 
 	/* control functions */
@@ -2242,11 +2236,6 @@ struct ib_counters {
 	atomic_t	usecnt;
 };
 
-enum ib_read_counters_flags {
-	/* prefer read values from driver cache */
-	IB_READ_COUNTERS_ATTR_PREFER_CACHED = 1 << 0,
-};
-
 struct ib_counters_read_attr {
 	u64	*counters_buff;
 	u32	ncounters;
@@ -2341,8 +2330,7 @@ struct ib_device {
 	 * concurrently for different ports. This function is only called when
 	 * roce_gid_table is used.
 	 */
-	int		           (*add_gid)(const union ib_gid *gid,
-					      const struct ib_gid_attr *attr,
+	int		           (*add_gid)(const struct ib_gid_attr *attr,
 					      void **context);
 	/* When calling del_gid, the HW vendor's driver should delete the
 	 * gid of device @device at gid index gid_index of port port_num
@@ -2389,8 +2377,8 @@ struct ib_device {
 						struct ib_srq_attr *srq_attr);
 	int                        (*destroy_srq)(struct ib_srq *srq);
 	int                        (*post_srq_recv)(struct ib_srq *srq,
-						    struct ib_recv_wr *recv_wr,
-						    struct ib_recv_wr **bad_recv_wr);
+						    const struct ib_recv_wr *recv_wr,
+						    const struct ib_recv_wr **bad_recv_wr);
 	struct ib_qp *             (*create_qp)(struct ib_pd *pd,
 						struct ib_qp_init_attr *qp_init_attr,
 						struct ib_udata *udata);
@@ -2404,11 +2392,11 @@ struct ib_device {
 					       struct ib_qp_init_attr *qp_init_attr);
 	int                        (*destroy_qp)(struct ib_qp *qp);
 	int                        (*post_send)(struct ib_qp *qp,
-						struct ib_send_wr *send_wr,
-						struct ib_send_wr **bad_send_wr);
+						const struct ib_send_wr *send_wr,
+						const struct ib_send_wr **bad_send_wr);
 	int                        (*post_recv)(struct ib_qp *qp,
-						struct ib_recv_wr *recv_wr,
-						struct ib_recv_wr **bad_recv_wr);
+						const struct ib_recv_wr *recv_wr,
+						const struct ib_recv_wr **bad_recv_wr);
 	struct ib_cq *             (*create_cq)(struct ib_device *device,
 						const struct ib_cq_init_attr *attr,
 						struct ib_ucontext *context,
@@ -2592,7 +2580,7 @@ struct ib_device {
 	const struct cpumask *(*get_vector_affinity)(struct ib_device *ibdev,
 						     int comp_vector);
 
-	struct uverbs_root_spec		*specs_root;
+	const struct uverbs_object_tree_def *const *driver_specs;
 	enum rdma_driver_id		driver_id;
 };
 
@@ -2679,6 +2667,46 @@ static inline bool ib_is_udata_cleared(struct ib_udata *udata,
 }
 
 /**
+ * ib_is_destroy_retryable - Check whether the uobject destruction
+ * is retryable.
+ * @ret: The initial destruction return code
+ * @why: remove reason
+ * @uobj: The uobject that is destroyed
+ *
+ * This function is a helper function that IB layer and low-level drivers
+ * can use to consider whether the destruction of the given uobject is
+ * retry-able.
+ * It checks the original return code, if it wasn't success the destruction
+ * is retryable according to the ucontext state (i.e. cleanup_retryable) and
+ * the remove reason. (i.e. why).
+ * Must be called with the object locked for destroy.
+ */
+static inline bool ib_is_destroy_retryable(int ret, enum rdma_remove_reason why,
+					   struct ib_uobject *uobj)
+{
+	return ret && (why == RDMA_REMOVE_DESTROY ||
+		       uobj->context->cleanup_retryable);
+}
+
+/**
+ * ib_destroy_usecnt - Called during destruction to check the usecnt
+ * @usecnt: The usecnt atomic
+ * @why: remove reason
+ * @uobj: The uobject that is destroyed
+ *
+ * Non-zero usecnts will block destruction unless destruction was triggered by
+ * a ucontext cleanup.
+ */
+static inline int ib_destroy_usecnt(atomic_t *usecnt,
+				    enum rdma_remove_reason why,
+				    struct ib_uobject *uobj)
+{
+	if (atomic_read(usecnt) && ib_is_destroy_retryable(-EBUSY, why, uobj))
+		return -EBUSY;
+	return 0;
+}
+
+/**
  * ib_modify_qp_is_ok - Check that the supplied attribute mask
  * contains all required attributes and no attributes not allowed for
  * the given QP state transition.
@@ -2755,6 +2783,13 @@ static inline int rdma_is_port_valid(const struct ib_device *device,
 		port <= rdma_end_port(device));
 }
 
+static inline bool rdma_is_grh_required(const struct ib_device *device,
+					u8 port_num)
+{
+	return device->port_immutable[port_num].core_cap_flags &
+		RDMA_CORE_PORT_IB_GRH_REQUIRED;
+}
+
 static inline bool rdma_protocol_ib(const struct ib_device *device, u8 port_num)
 {
 	return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_IB;
@@ -3046,10 +3081,6 @@ static inline bool rdma_cap_read_inv(struct ib_device *dev, u32 port_num)
 	return rdma_protocol_iwarp(dev, port_num);
 }
 
-int ib_query_gid(struct ib_device *device,
-		 u8 port_num, int index, union ib_gid *gid,
-		 struct ib_gid_attr *attr);
-
 int ib_set_vf_link_state(struct ib_device *device, int vf, u8 port,
 			 int state);
 int ib_get_vf_config(struct ib_device *device, int vf, u8 port,
@@ -3148,6 +3179,13 @@ int ib_get_rdma_header_version(const union rdma_network_hdr *hdr);
  *   ignored unless the work completion indicates that the GRH is valid.
  * @ah_attr: Returned attributes that can be used when creating an address
  *   handle for replying to the message.
+ * When ib_init_ah_attr_from_wc() returns success,
+ * (a) for IB link layer it optionally contains a reference to SGID attribute
+ * when GRH is present for IB link layer.
+ * (b) for RoCE link layer it contains a reference to SGID attribute.
+ * User must invoke rdma_cleanup_ah_attr_gid_attr() to release reference to SGID
+ * attributes which are initialized using ib_init_ah_attr_from_wc().
+ *
  */
 int ib_init_ah_attr_from_wc(struct ib_device *device, u8 port_num,
 			    const struct ib_wc *wc, const struct ib_grh *grh,
@@ -3247,10 +3285,12 @@ int ib_destroy_srq(struct ib_srq *srq);
  *   the work request that failed to be posted on the QP.
  */
 static inline int ib_post_srq_recv(struct ib_srq *srq,
-				   struct ib_recv_wr *recv_wr,
-				   struct ib_recv_wr **bad_recv_wr)
+				   const struct ib_recv_wr *recv_wr,
+				   const struct ib_recv_wr **bad_recv_wr)
 {
-	return srq->device->post_srq_recv(srq, recv_wr, bad_recv_wr);
+	const struct ib_recv_wr *dummy;
+
+	return srq->device->post_srq_recv(srq, recv_wr, bad_recv_wr ? : &dummy);
 }
 
 /**
@@ -3348,10 +3388,12 @@ int ib_close_qp(struct ib_qp *qp);
  * earlier work requests in the list.
  */
 static inline int ib_post_send(struct ib_qp *qp,
-			       struct ib_send_wr *send_wr,
-			       struct ib_send_wr **bad_send_wr)
+			       const struct ib_send_wr *send_wr,
+			       const struct ib_send_wr **bad_send_wr)
 {
-	return qp->device->post_send(qp, send_wr, bad_send_wr);
+	const struct ib_send_wr *dummy;
+
+	return qp->device->post_send(qp, send_wr, bad_send_wr ? : &dummy);
 }
 
 /**
@@ -3363,10 +3405,12 @@ static inline int ib_post_send(struct ib_qp *qp,
  *   the work request that failed to be posted on the QP.
  */
 static inline int ib_post_recv(struct ib_qp *qp,
-			       struct ib_recv_wr *recv_wr,
-			       struct ib_recv_wr **bad_recv_wr)
+			       const struct ib_recv_wr *recv_wr,
+			       const struct ib_recv_wr **bad_recv_wr)
 {
-	return qp->device->post_recv(qp, recv_wr, bad_recv_wr);
+	const struct ib_recv_wr *dummy;
+
+	return qp->device->post_recv(qp, recv_wr, bad_recv_wr ? : &dummy);
 }
 
 struct ib_cq *__ib_alloc_cq(struct ib_device *dev, void *private,
@@ -3801,10 +3845,6 @@ struct ib_xrcd *__ib_alloc_xrcd(struct ib_device *device, const char *caller);
  */
 int ib_dealloc_xrcd(struct ib_xrcd *xrcd);
 
-struct ib_flow *ib_create_flow(struct ib_qp *qp,
-			       struct ib_flow_attr *flow_attr, int domain);
-int ib_destroy_flow(struct ib_flow *flow_id);
-
 static inline int ib_check_mr_access(int flags)
 {
 	/*
@@ -4033,8 +4073,19 @@ static inline void rdma_ah_set_grh(struct rdma_ah_attr *attr,
 	grh->sgid_index = sgid_index;
 	grh->hop_limit = hop_limit;
 	grh->traffic_class = traffic_class;
+	grh->sgid_attr = NULL;
 }
 
+void rdma_destroy_ah_attr(struct rdma_ah_attr *ah_attr);
+void rdma_move_grh_sgid_attr(struct rdma_ah_attr *attr, union ib_gid *dgid,
+			     u32 flow_label, u8 hop_limit, u8 traffic_class,
+			     const struct ib_gid_attr *sgid_attr);
+void rdma_copy_ah_attr(struct rdma_ah_attr *dest,
+		       const struct rdma_ah_attr *src);
+void rdma_replace_ah_attr(struct rdma_ah_attr *old,
+			  const struct rdma_ah_attr *new);
+void rdma_move_ah_attr(struct rdma_ah_attr *dest, struct rdma_ah_attr *src);
+
 /**
  * rdma_ah_find_type - Return address handle type.
  *
@@ -4102,6 +4153,20 @@ ib_get_vector_affinity(struct ib_device *device, int comp_vector)
 
 }
 
+static inline void ib_set_flow(struct ib_uobject *uobj, struct ib_flow *ibflow,
+			       struct ib_qp *qp, struct ib_device *device)
+{
+	uobj->object = ibflow;
+	ibflow->uobject = uobj;
+
+	if (qp) {
+		atomic_inc(&qp->usecnt);
+		ibflow->qp = qp;
+	}
+
+	ibflow->device = device;
+}
+
 /**
  * rdma_roce_rescan_device - Rescan all of the network devices in the system
  * and add their gids, as needed, to the relevant RoCE devices.
@@ -4110,4 +4175,8 @@ ib_get_vector_affinity(struct ib_device *device, int comp_vector)
  */
 void rdma_roce_rescan_device(struct ib_device *ibdev);
 
+struct ib_ucontext *ib_uverbs_get_ucontext(struct ib_uverbs_file *ufile);
+
+int uverbs_destroy_def_handler(struct ib_uverbs_file *file,
+			       struct uverbs_attr_bundle *attrs);
 #endif /* IB_VERBS_H */
diff --git a/include/rdma/opa_addr.h b/include/rdma/opa_addr.h
index 2bbb7a6..66d4393 100644
--- a/include/rdma/opa_addr.h
+++ b/include/rdma/opa_addr.h
@@ -120,7 +120,7 @@ static inline bool rdma_is_valid_unicast_lid(struct rdma_ah_attr *attr)
 	if (attr->type == RDMA_AH_ATTR_TYPE_IB) {
 		if (!rdma_ah_get_dlid(attr) ||
 		    rdma_ah_get_dlid(attr) >=
-		    be32_to_cpu(IB_MULTICAST_LID_BASE))
+		    be16_to_cpu(IB_MULTICAST_LID_BASE))
 			return false;
 	} else if (attr->type == RDMA_AH_ATTR_TYPE_OPA) {
 		if (!rdma_ah_get_dlid(attr) ||
diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h
index c5c1435..5d71a7f 100644
--- a/include/rdma/rdma_cm.h
+++ b/include/rdma/rdma_cm.h
@@ -192,7 +192,7 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr);
  * @timeout_ms: Time to wait for resolution to complete.
  */
 int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
-		      struct sockaddr *dst_addr, int timeout_ms);
+		      const struct sockaddr *dst_addr, int timeout_ms);
 
 /**
  * rdma_resolve_route - Resolve the RDMA address bound to the RDMA identifier
diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h
index 1145a4c..927f6d5 100644
--- a/include/rdma/rdmavt_qp.h
+++ b/include/rdma/rdmavt_qp.h
@@ -2,7 +2,7 @@
 #define DEF_RDMAVT_INCQP_H
 
 /*
- * Copyright(c) 2016, 2017 Intel Corporation.
+ * Copyright(c) 2016 - 2018 Intel Corporation.
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
@@ -91,6 +91,7 @@
  * RVT_S_WAIT_ACK - waiting for an ACK packet before sending more requests
  * RVT_S_SEND_ONE - send one packet, request ACK, then wait for ACK
  * RVT_S_ECN - a BECN was queued to the send engine
+ * RVT_S_MAX_BIT_MASK - The max bit that can be used by rdmavt
  */
 #define RVT_S_SIGNAL_REQ_WR	0x0001
 #define RVT_S_BUSY		0x0002
@@ -103,23 +104,26 @@
 #define RVT_S_WAIT_SSN_CREDIT	0x0100
 #define RVT_S_WAIT_DMA		0x0200
 #define RVT_S_WAIT_PIO		0x0400
-#define RVT_S_WAIT_PIO_DRAIN    0x0800
-#define RVT_S_WAIT_TX		0x1000
-#define RVT_S_WAIT_DMA_DESC	0x2000
-#define RVT_S_WAIT_KMEM		0x4000
-#define RVT_S_WAIT_PSN		0x8000
-#define RVT_S_WAIT_ACK		0x10000
-#define RVT_S_SEND_ONE		0x20000
-#define RVT_S_UNLIMITED_CREDIT	0x40000
-#define RVT_S_AHG_VALID		0x80000
-#define RVT_S_AHG_CLEAR		0x100000
-#define RVT_S_ECN		0x200000
+#define RVT_S_WAIT_TX		0x0800
+#define RVT_S_WAIT_DMA_DESC	0x1000
+#define RVT_S_WAIT_KMEM		0x2000
+#define RVT_S_WAIT_PSN		0x4000
+#define RVT_S_WAIT_ACK		0x8000
+#define RVT_S_SEND_ONE		0x10000
+#define RVT_S_UNLIMITED_CREDIT	0x20000
+#define RVT_S_ECN		0x40000
+#define RVT_S_MAX_BIT_MASK	0x800000
+
+/*
+ * Drivers should use s_flags starting with bit 31 down to the bit next to
+ * RVT_S_MAX_BIT_MASK
+ */
 
 /*
  * Wait flags that would prevent any packet type from being sent.
  */
 #define RVT_S_ANY_WAIT_IO \
-	(RVT_S_WAIT_PIO | RVT_S_WAIT_PIO_DRAIN | RVT_S_WAIT_TX | \
+	(RVT_S_WAIT_PIO | RVT_S_WAIT_TX | \
 	 RVT_S_WAIT_DMA_DESC | RVT_S_WAIT_KMEM)
 
 /*
diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h
index bd6bba3..9e997c3 100644
--- a/include/rdma/uverbs_ioctl.h
+++ b/include/rdma/uverbs_ioctl.h
@@ -61,103 +61,195 @@ enum uverbs_obj_access {
 	UVERBS_ACCESS_DESTROY
 };
 
-enum {
-	UVERBS_ATTR_SPEC_F_MANDATORY	= 1U << 0,
-	/* Support extending attributes by length, validate all unknown size == zero  */
-	UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO = 1U << 1,
-};
-
 /* Specification of a single attribute inside the ioctl message */
+/* good size 16 */
 struct uverbs_attr_spec {
+	u8 type;
+
+	/*
+	 * Support extending attributes by length. Allow the user to provide
+	 * more bytes than ptr.len, but check that everything after is zero'd
+	 * by the user.
+	 */
+	u8 zero_trailing:1;
+	/*
+	 * Valid only for PTR_IN. Allocate and copy the data inside
+	 * the parser
+	 */
+	u8 alloc_and_copy:1;
+	u8 mandatory:1;
+
 	union {
-		/* Header shared by all following union members - to reduce space. */
 		struct {
-			enum uverbs_attr_type		type;
-			/* Combination of bits from enum UVERBS_ATTR_SPEC_F_XXXX */
-			u8				flags;
-		};
-		struct {
-			enum uverbs_attr_type		type;
-			/* Combination of bits from enum UVERBS_ATTR_SPEC_F_XXXX */
-			u8				flags;
 			/* Current known size to kernel */
-			u16				len;
+			u16 len;
 			/* User isn't allowed to provide something < min_len */
-			u16				min_len;
+			u16 min_len;
 		} ptr;
+
 		struct {
-			enum uverbs_attr_type		type;
-			/* Combination of bits from enum UVERBS_ATTR_SPEC_F_XXXX */
-			u8				flags;
 			/*
 			 * higher bits mean the namespace and lower bits mean
 			 * the type id within the namespace.
 			 */
-			u16			obj_type;
-			u8			access;
+			u16 obj_type;
+			u8 access;
 		} obj;
+
 		struct {
-			enum uverbs_attr_type		type;
-			/* Combination of bits from enum UVERBS_ATTR_SPEC_F_XXXX */
-			u8				flags;
-			u8				num_elems;
+			u8 num_elems;
+		} enum_def;
+	} u;
+
+	/* This weird split of the enum lets us remove some padding */
+	union {
+		struct {
 			/*
 			 * The enum attribute can select one of the attributes
 			 * contained in the ids array. Currently only PTR_IN
 			 * attributes are supported in the ids array.
 			 */
-			const struct uverbs_attr_spec	*ids;
+			const struct uverbs_attr_spec *ids;
 		} enum_def;
-	};
+	} u2;
 };
 
-struct uverbs_attr_spec_hash {
-	size_t				num_attrs;
-	unsigned long			*mandatory_attrs_bitmask;
-	struct uverbs_attr_spec		attrs[0];
+/*
+ * Information about the API is loaded into a radix tree. For IOCTL we start
+ * with a tuple of:
+ *  object_id, attr_id, method_id
+ *
+ * Which is a 48 bit value, with most of the bits guaranteed to be zero. Based
+ * on the current kernel support this is compressed into 16 bit key for the
+ * radix tree. Since this compression is entirely internal to the kernel the
+ * below limits can be revised if the kernel gains additional data.
+ *
+ * With 64 leafs per node this is a 3 level radix tree.
+ *
+ * The tree encodes multiple types, and uses a scheme where OBJ_ID,0,0 returns
+ * the object slot, and OBJ_ID,METH_ID,0 and returns the method slot.
+ */
+enum uapi_radix_data {
+	UVERBS_API_NS_FLAG = 1U << UVERBS_ID_NS_SHIFT,
+
+	UVERBS_API_ATTR_KEY_BITS = 6,
+	UVERBS_API_ATTR_KEY_MASK = GENMASK(UVERBS_API_ATTR_KEY_BITS - 1, 0),
+	UVERBS_API_ATTR_BKEY_LEN = (1 << UVERBS_API_ATTR_KEY_BITS) - 1,
+
+	UVERBS_API_METHOD_KEY_BITS = 5,
+	UVERBS_API_METHOD_KEY_SHIFT = UVERBS_API_ATTR_KEY_BITS,
+	UVERBS_API_METHOD_KEY_NUM_CORE = 24,
+	UVERBS_API_METHOD_KEY_NUM_DRIVER = (1 << UVERBS_API_METHOD_KEY_BITS) -
+					   UVERBS_API_METHOD_KEY_NUM_CORE,
+	UVERBS_API_METHOD_KEY_MASK = GENMASK(
+		UVERBS_API_METHOD_KEY_BITS + UVERBS_API_METHOD_KEY_SHIFT - 1,
+		UVERBS_API_METHOD_KEY_SHIFT),
+
+	UVERBS_API_OBJ_KEY_BITS = 5,
+	UVERBS_API_OBJ_KEY_SHIFT =
+		UVERBS_API_METHOD_KEY_BITS + UVERBS_API_METHOD_KEY_SHIFT,
+	UVERBS_API_OBJ_KEY_NUM_CORE = 24,
+	UVERBS_API_OBJ_KEY_NUM_DRIVER =
+		(1 << UVERBS_API_OBJ_KEY_BITS) - UVERBS_API_OBJ_KEY_NUM_CORE,
+	UVERBS_API_OBJ_KEY_MASK = GENMASK(31, UVERBS_API_OBJ_KEY_SHIFT),
+
+	/* This id guaranteed to not exist in the radix tree */
+	UVERBS_API_KEY_ERR = 0xFFFFFFFF,
 };
 
-struct uverbs_attr_bundle;
-struct ib_uverbs_file;
+static inline __attribute_const__ u32 uapi_key_obj(u32 id)
+{
+	if (id & UVERBS_API_NS_FLAG) {
+		id &= ~UVERBS_API_NS_FLAG;
+		if (id >= UVERBS_API_OBJ_KEY_NUM_DRIVER)
+			return UVERBS_API_KEY_ERR;
+		id = id + UVERBS_API_OBJ_KEY_NUM_CORE;
+	} else {
+		if (id >= UVERBS_API_OBJ_KEY_NUM_CORE)
+			return UVERBS_API_KEY_ERR;
+	}
 
-enum {
+	return id << UVERBS_API_OBJ_KEY_SHIFT;
+}
+
+static inline __attribute_const__ bool uapi_key_is_object(u32 key)
+{
+	return (key & ~UVERBS_API_OBJ_KEY_MASK) == 0;
+}
+
+static inline __attribute_const__ u32 uapi_key_ioctl_method(u32 id)
+{
+	if (id & UVERBS_API_NS_FLAG) {
+		id &= ~UVERBS_API_NS_FLAG;
+		if (id >= UVERBS_API_METHOD_KEY_NUM_DRIVER)
+			return UVERBS_API_KEY_ERR;
+		id = id + UVERBS_API_METHOD_KEY_NUM_CORE;
+	} else {
+		id++;
+		if (id >= UVERBS_API_METHOD_KEY_NUM_CORE)
+			return UVERBS_API_KEY_ERR;
+	}
+
+	return id << UVERBS_API_METHOD_KEY_SHIFT;
+}
+
+static inline __attribute_const__ u32 uapi_key_attr_to_method(u32 attr_key)
+{
+	return attr_key &
+	       (UVERBS_API_OBJ_KEY_MASK | UVERBS_API_METHOD_KEY_MASK);
+}
+
+static inline __attribute_const__ bool uapi_key_is_ioctl_method(u32 key)
+{
+	return (key & UVERBS_API_METHOD_KEY_MASK) != 0 &&
+	       (key & UVERBS_API_ATTR_KEY_MASK) == 0;
+}
+
+static inline __attribute_const__ u32 uapi_key_attrs_start(u32 ioctl_method_key)
+{
+	/* 0 is the method slot itself */
+	return ioctl_method_key + 1;
+}
+
+static inline __attribute_const__ u32 uapi_key_attr(u32 id)
+{
 	/*
-	 * Action marked with this flag creates a context (or root for all
-	 * objects).
+	 * The attr is designed to fit in the typical single radix tree node
+	 * of 64 entries. Since allmost all methods have driver attributes we
+	 * organize things so that the driver and core attributes interleave to
+	 * reduce the length of the attributes array in typical cases.
 	 */
-	UVERBS_ACTION_FLAG_CREATE_ROOT = 1U << 0,
-};
+	if (id & UVERBS_API_NS_FLAG) {
+		id &= ~UVERBS_API_NS_FLAG;
+		id++;
+		if (id >= 1 << (UVERBS_API_ATTR_KEY_BITS - 1))
+			return UVERBS_API_KEY_ERR;
+		id = (id << 1) | 0;
+	} else {
+		if (id >= 1 << (UVERBS_API_ATTR_KEY_BITS - 1))
+			return UVERBS_API_KEY_ERR;
+		id = (id << 1) | 1;
+	}
 
-struct uverbs_method_spec {
-	/* Combination of bits from enum UVERBS_ACTION_FLAG_XXXX */
-	u32						flags;
-	size_t						num_buckets;
-	size_t						num_child_attrs;
-	int (*handler)(struct ib_device *ib_dev, struct ib_uverbs_file *ufile,
-		       struct uverbs_attr_bundle *ctx);
-	struct uverbs_attr_spec_hash		*attr_buckets[0];
-};
+	return id;
+}
 
-struct uverbs_method_spec_hash {
-	size_t					num_methods;
-	struct uverbs_method_spec		*methods[0];
-};
+static inline __attribute_const__ bool uapi_key_is_attr(u32 key)
+{
+	return (key & UVERBS_API_METHOD_KEY_MASK) != 0 &&
+	       (key & UVERBS_API_ATTR_KEY_MASK) != 0;
+}
 
-struct uverbs_object_spec {
-	const struct uverbs_obj_type		*type_attrs;
-	size_t					num_buckets;
-	struct uverbs_method_spec_hash		*method_buckets[0];
-};
-
-struct uverbs_object_spec_hash {
-	size_t					num_objects;
-	struct uverbs_object_spec		*objects[0];
-};
-
-struct uverbs_root_spec {
-	size_t					num_buckets;
-	struct uverbs_object_spec_hash		*object_buckets[0];
-};
+/*
+ * This returns a value in the range [0 to UVERBS_API_ATTR_BKEY_LEN),
+ * basically it undoes the reservation of 0 in the ID numbering. attr_key
+ * must already be masked with UVERBS_API_ATTR_KEY_MASK, or be the output of
+ * uapi_key_attr().
+ */
+static inline __attribute_const__ u32 uapi_bkey_attr(u32 attr_key)
+{
+	return attr_key - 1;
+}
 
 /*
  * =======================================
@@ -176,7 +268,7 @@ struct uverbs_method_def {
 	u32				     flags;
 	size_t				     num_attrs;
 	const struct uverbs_attr_def * const (*attrs)[];
-	int (*handler)(struct ib_device *ib_dev, struct ib_uverbs_file *ufile,
+	int (*handler)(struct ib_uverbs_file *ufile,
 		       struct uverbs_attr_bundle *ctx);
 };
 
@@ -192,196 +284,171 @@ struct uverbs_object_tree_def {
 	const struct uverbs_object_def * const (*objects)[];
 };
 
-#define UA_FLAGS(_flags)  .flags = _flags
-#define __UVERBS_ATTR0(_id, _type, _fld, _attr, ...)              \
-	((const struct uverbs_attr_def)				  \
-	 {.id = _id, .attr = {{._fld = {.type = _type, _attr, .flags = 0, } }, } })
-#define __UVERBS_ATTR1(_id, _type, _fld, _attr, _extra1, ...)      \
-	((const struct uverbs_attr_def)				  \
-	 {.id = _id, .attr = {{._fld = {.type = _type, _attr, _extra1 } },} })
-#define __UVERBS_ATTR2(_id, _type, _fld, _attr, _extra1, _extra2)    \
-	((const struct uverbs_attr_def)				  \
-	 {.id = _id, .attr = {{._fld = {.type = _type, _attr, _extra1, _extra2 } },} })
-#define __UVERBS_ATTR(_id, _type, _fld, _attr, _extra1, _extra2, _n, ...)	\
-	__UVERBS_ATTR##_n(_id, _type, _fld, _attr, _extra1, _extra2)
+/*
+ * =======================================
+ *	Attribute Specifications
+ * =======================================
+ */
 
-#define UVERBS_ATTR_TYPE(_type)					\
-	.min_len = sizeof(_type), .len = sizeof(_type)
-#define UVERBS_ATTR_STRUCT(_type, _last)			\
-	.min_len = ((uintptr_t)(&((_type *)0)->_last + 1)), .len = sizeof(_type)
 #define UVERBS_ATTR_SIZE(_min_len, _len)			\
-	.min_len = _min_len, .len = _len
+	.u.ptr.min_len = _min_len, .u.ptr.len = _len
+
+#define UVERBS_ATTR_NO_DATA() UVERBS_ATTR_SIZE(0, 0)
 
 /*
- * In new compiler, UVERBS_ATTR could be simplified by declaring it as
- * [_id] = {.type = _type, .len = _len, ##__VA_ARGS__}
- * But since we support older compilers too, we need the more complex code.
+ * Specifies a uapi structure that cannot be extended. The user must always
+ * supply the whole structure and nothing more. The structure must be declared
+ * in a header under include/uapi/rdma.
  */
-#define UVERBS_ATTR(_id, _type, _fld, _attr, ...)			\
-	__UVERBS_ATTR(_id, _type, _fld, _attr, ##__VA_ARGS__, 2, 1, 0)
-#define UVERBS_ATTR_PTR_IN_SZ(_id, _len, ...)				\
-	UVERBS_ATTR(_id, UVERBS_ATTR_TYPE_PTR_IN, ptr, _len, ##__VA_ARGS__)
-/* If sizeof(_type) <= sizeof(u64), this will be inlined rather than a pointer */
-#define UVERBS_ATTR_PTR_IN(_id, _type, ...)				\
-	UVERBS_ATTR_PTR_IN_SZ(_id, _type, ##__VA_ARGS__)
-#define UVERBS_ATTR_PTR_OUT_SZ(_id, _len, ...)				\
-	UVERBS_ATTR(_id, UVERBS_ATTR_TYPE_PTR_OUT, ptr, _len, ##__VA_ARGS__)
-#define UVERBS_ATTR_PTR_OUT(_id, _type, ...)				\
-	UVERBS_ATTR_PTR_OUT_SZ(_id, _type, ##__VA_ARGS__)
-#define UVERBS_ATTR_ENUM_IN(_id, _enum_arr, ...)			\
-	UVERBS_ATTR(_id, UVERBS_ATTR_TYPE_ENUM_IN, enum_def,		\
-		    .ids = (_enum_arr),					\
-		    .num_elems = ARRAY_SIZE(_enum_arr), ##__VA_ARGS__)
+#define UVERBS_ATTR_TYPE(_type)					\
+	.u.ptr.min_len = sizeof(_type), .u.ptr.len = sizeof(_type)
+/*
+ * Specifies a uapi structure where the user must provide at least up to
+ * member 'last'.  Anything after last and up until the end of the structure
+ * can be non-zero, anything longer than the end of the structure must be
+ * zero. The structure must be declared in a header under include/uapi/rdma.
+ */
+#define UVERBS_ATTR_STRUCT(_type, _last)                                       \
+	.zero_trailing = 1,                                                    \
+	UVERBS_ATTR_SIZE(((uintptr_t)(&((_type *)0)->_last + 1)),              \
+			 sizeof(_type))
+/*
+ * Specifies at least min_len bytes must be passed in, but the amount can be
+ * larger, up to the protocol maximum size. No check for zeroing is done.
+ */
+#define UVERBS_ATTR_MIN_SIZE(_min_len) UVERBS_ATTR_SIZE(_min_len, USHRT_MAX)
+
+/* Must be used in the '...' of any UVERBS_ATTR */
+#define UA_ALLOC_AND_COPY .alloc_and_copy = 1
+#define UA_MANDATORY .mandatory = 1
+#define UA_OPTIONAL .mandatory = 0
+
+#define UVERBS_ATTR_IDR(_attr_id, _idr_type, _access, ...)                     \
+	(&(const struct uverbs_attr_def){                                      \
+		.id = _attr_id,                                                \
+		.attr = { .type = UVERBS_ATTR_TYPE_IDR,                        \
+			  .u.obj.obj_type = _idr_type,                         \
+			  .u.obj.access = _access,                             \
+			  __VA_ARGS__ } })
+
+#define UVERBS_ATTR_FD(_attr_id, _fd_type, _access, ...)                       \
+	(&(const struct uverbs_attr_def){                                      \
+		.id = (_attr_id) +                                             \
+		      BUILD_BUG_ON_ZERO((_access) != UVERBS_ACCESS_NEW &&      \
+					(_access) != UVERBS_ACCESS_READ),      \
+		.attr = { .type = UVERBS_ATTR_TYPE_FD,                         \
+			  .u.obj.obj_type = _fd_type,                          \
+			  .u.obj.access = _access,                             \
+			  __VA_ARGS__ } })
+
+#define UVERBS_ATTR_PTR_IN(_attr_id, _type, ...)                               \
+	(&(const struct uverbs_attr_def){                                      \
+		.id = _attr_id,                                                \
+		.attr = { .type = UVERBS_ATTR_TYPE_PTR_IN,                     \
+			  _type,                                               \
+			  __VA_ARGS__ } })
+
+#define UVERBS_ATTR_PTR_OUT(_attr_id, _type, ...)                              \
+	(&(const struct uverbs_attr_def){                                      \
+		.id = _attr_id,                                                \
+		.attr = { .type = UVERBS_ATTR_TYPE_PTR_OUT,                    \
+			  _type,                                               \
+			  __VA_ARGS__ } })
+
+/* _enum_arry should be a 'static const union uverbs_attr_spec[]' */
+#define UVERBS_ATTR_ENUM_IN(_attr_id, _enum_arr, ...)                          \
+	(&(const struct uverbs_attr_def){                                      \
+		.id = _attr_id,                                                \
+		.attr = { .type = UVERBS_ATTR_TYPE_ENUM_IN,                    \
+			  .u2.enum_def.ids = _enum_arr,                        \
+			  .u.enum_def.num_elems = ARRAY_SIZE(_enum_arr),       \
+			  __VA_ARGS__ },                                       \
+	})
 
 /*
- * In new compiler, UVERBS_ATTR_IDR (and FD) could be simplified by declaring
- * it as
- * {.id = _id,								\
- *  .attr {.type = __obj_class,						\
- *         .obj = {.obj_type = _idr_type,				\
- *                       .access = _access                              \
- *                }, ##__VA_ARGS__ } }
- * But since we support older compilers too, we need the more complex code.
+ * An input value that is a bitwise combination of values of _enum_type.
+ * This permits the flag value to be passed as either a u32 or u64, it must
+ * be retrieved via uverbs_get_flag().
  */
-#define ___UVERBS_ATTR_OBJ0(_id, _obj_class, _obj_type, _access, ...)\
-	((const struct uverbs_attr_def)					\
-	{.id = _id,							\
-	 .attr = { {.obj = {.type = _obj_class, .obj_type = _obj_type,	\
-			    .access = _access, .flags = 0 } }, } })
-#define ___UVERBS_ATTR_OBJ1(_id, _obj_class, _obj_type, _access, _flags)\
-	((const struct uverbs_attr_def)					\
-	{.id = _id,							\
-	.attr = { {.obj = {.type = _obj_class, .obj_type = _obj_type,	\
-			   .access = _access, _flags} }, } })
-#define ___UVERBS_ATTR_OBJ(_id, _obj_class, _obj_type, _access, _flags, \
-			   _n, ...)					\
-	___UVERBS_ATTR_OBJ##_n(_id, _obj_class, _obj_type, _access, _flags)
-#define __UVERBS_ATTR_OBJ(_id, _obj_class, _obj_type, _access, ...)	\
-	___UVERBS_ATTR_OBJ(_id, _obj_class, _obj_type, _access,		\
-			   ##__VA_ARGS__, 1, 0)
-#define UVERBS_ATTR_IDR(_id, _idr_type, _access, ...)			 \
-	__UVERBS_ATTR_OBJ(_id, UVERBS_ATTR_TYPE_IDR, _idr_type, _access,\
-			  ##__VA_ARGS__)
-#define UVERBS_ATTR_FD(_id, _fd_type, _access, ...)			\
-	__UVERBS_ATTR_OBJ(_id, UVERBS_ATTR_TYPE_FD, _fd_type,		\
-			  (_access) + BUILD_BUG_ON_ZERO(		\
-				(_access) != UVERBS_ACCESS_NEW &&	\
-				(_access) != UVERBS_ACCESS_READ),	\
-			  ##__VA_ARGS__)
-#define DECLARE_UVERBS_ATTR_SPEC(_name, ...)				\
-	const struct uverbs_attr_def _name = __VA_ARGS__
+#define UVERBS_ATTR_FLAGS_IN(_attr_id, _enum_type, ...)                        \
+	UVERBS_ATTR_PTR_IN(                                                    \
+		_attr_id,                                                      \
+		UVERBS_ATTR_SIZE(sizeof(u32) + BUILD_BUG_ON_ZERO(              \
+						       !sizeof(_enum_type *)), \
+				 sizeof(u64)),                                 \
+		__VA_ARGS__)
 
-#define DECLARE_UVERBS_ENUM(_name, ...)					\
-	const struct uverbs_enum_spec _name = {				\
-		.len = ARRAY_SIZE(((struct uverbs_attr_spec[]){__VA_ARGS__})),\
-		.ids = {__VA_ARGS__},					\
+/*
+ * This spec is used in order to pass information to the hardware driver in a
+ * legacy way. Every verb that could get driver specific data should get this
+ * spec.
+ */
+#define UVERBS_ATTR_UHW()                                                      \
+	UVERBS_ATTR_PTR_IN(UVERBS_ATTR_UHW_IN,                                 \
+			   UVERBS_ATTR_MIN_SIZE(0),			       \
+			   UA_OPTIONAL),				       \
+	UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_UHW_OUT,                               \
+			    UVERBS_ATTR_MIN_SIZE(0),			       \
+			    UA_OPTIONAL)
+
+/*
+ * =======================================
+ *	Declaration helpers
+ * =======================================
+ */
+
+#define DECLARE_UVERBS_OBJECT_TREE(_name, ...)                                 \
+	static const struct uverbs_object_def *const _name##_ptr[] = {         \
+		__VA_ARGS__,                                                   \
+	};                                                                     \
+	static const struct uverbs_object_tree_def _name = {                   \
+		.num_objects = ARRAY_SIZE(_name##_ptr),                        \
+		.objects = &_name##_ptr,                                       \
 	}
-#define _UVERBS_METHOD_ATTRS_SZ(...)					\
-	(sizeof((const struct uverbs_attr_def * const []){__VA_ARGS__}) /\
-	 sizeof(const struct uverbs_attr_def *))
-#define _UVERBS_METHOD(_id, _handler, _flags, ...)			\
-	((const struct uverbs_method_def) {				\
-	 .id = _id,							\
-	 .flags = _flags,						\
-	 .handler = _handler,						\
-	 .num_attrs = _UVERBS_METHOD_ATTRS_SZ(__VA_ARGS__),		\
-	 .attrs = &(const struct uverbs_attr_def * const []){__VA_ARGS__} })
-#define DECLARE_UVERBS_METHOD(_name, _id, _handler, ...)		\
-	const struct uverbs_method_def _name =				\
-		_UVERBS_METHOD(_id, _handler, 0, ##__VA_ARGS__)
-#define DECLARE_UVERBS_CTX_METHOD(_name, _id, _handler, _flags, ...)	\
-	const struct uverbs_method_def _name =				\
-		_UVERBS_METHOD(_id, _handler,				\
-			       UVERBS_ACTION_FLAG_CREATE_ROOT,		\
-			       ##__VA_ARGS__)
-#define _UVERBS_OBJECT_METHODS_SZ(...)					\
-	(sizeof((const struct uverbs_method_def * const []){__VA_ARGS__}) / \
-	 sizeof(const struct uverbs_method_def *))
-#define _UVERBS_OBJECT(_id, _type_attrs, ...)				\
-	((const struct uverbs_object_def) {				\
-	 .id = _id,							\
-	 .type_attrs = _type_attrs,					\
-	 .num_methods = _UVERBS_OBJECT_METHODS_SZ(__VA_ARGS__),		\
-	 .methods = &(const struct uverbs_method_def * const []){__VA_ARGS__} })
-#define DECLARE_UVERBS_OBJECT(_name, _id, _type_attrs, ...)		\
-	const struct uverbs_object_def _name =				\
-		_UVERBS_OBJECT(_id, _type_attrs, ##__VA_ARGS__)
-#define _UVERBS_TREE_OBJECTS_SZ(...)					\
-	(sizeof((const struct uverbs_object_def * const []){__VA_ARGS__}) / \
-	 sizeof(const struct uverbs_object_def *))
-#define _UVERBS_OBJECT_TREE(...)					\
-	((const struct uverbs_object_tree_def) {			\
-	 .num_objects = _UVERBS_TREE_OBJECTS_SZ(__VA_ARGS__),		\
-	 .objects = &(const struct uverbs_object_def * const []){__VA_ARGS__} })
-#define DECLARE_UVERBS_OBJECT_TREE(_name, ...)				\
-	const struct uverbs_object_tree_def _name =			\
-		_UVERBS_OBJECT_TREE(__VA_ARGS__)
 
 /* =================================================
  *              Parsing infrastructure
  * =================================================
  */
 
+
 struct uverbs_ptr_attr {
-	u64		data;
+	/*
+	 * If UVERBS_ATTR_SPEC_F_ALLOC_AND_COPY is set then the 'ptr' is
+	 * used.
+	 */
+	union {
+		void *ptr;
+		u64 data;
+	};
 	u16		len;
-	/* Combination of bits from enum UVERBS_ATTR_F_XXXX */
-	u16		flags;
+	u16		uattr_idx;
 	u8		enum_id;
 };
 
 struct uverbs_obj_attr {
-	/* pointer to the kernel descriptor -> type, access, etc */
-	const struct uverbs_obj_type	*type;
 	struct ib_uobject		*uobject;
-	/* fd or id in idr of this object */
-	int				id;
+	const struct uverbs_api_attr	*attr_elm;
 };
 
 struct uverbs_attr {
-	/*
-	 * pointer to the user-space given attribute, in order to write the
-	 * new uobject's id or update flags.
-	 */
-	struct ib_uverbs_attr __user	*uattr;
 	union {
 		struct uverbs_ptr_attr	ptr_attr;
 		struct uverbs_obj_attr	obj_attr;
 	};
 };
 
-struct uverbs_attr_bundle_hash {
-	/* if bit i is set, it means attrs[i] contains valid information */
-	unsigned long *valid_bitmap;
-	size_t num_attrs;
-	/*
-	 * arrays of attributes, each element corresponds to the specification
-	 * of the attribute in the same index.
-	 */
-	struct uverbs_attr *attrs;
-};
-
 struct uverbs_attr_bundle {
-	size_t				num_buckets;
-	struct uverbs_attr_bundle_hash  hash[];
+	struct ib_uverbs_file *ufile;
+	DECLARE_BITMAP(attr_present, UVERBS_API_ATTR_BKEY_LEN);
+	struct uverbs_attr attrs[];
 };
 
-static inline bool uverbs_attr_is_valid_in_hash(const struct uverbs_attr_bundle_hash *attrs_hash,
-						unsigned int idx)
-{
-	return test_bit(idx, attrs_hash->valid_bitmap);
-}
-
 static inline bool uverbs_attr_is_valid(const struct uverbs_attr_bundle *attrs_bundle,
 					unsigned int idx)
 {
-	u16 idx_bucket = idx >>	UVERBS_ID_NS_SHIFT;
-
-	if (attrs_bundle->num_buckets <= idx_bucket)
-		return false;
-
-	return uverbs_attr_is_valid_in_hash(&attrs_bundle->hash[idx_bucket],
-					    idx & ~UVERBS_ID_NS_MASK);
+	return test_bit(uapi_bkey_attr(uapi_key_attr(idx)),
+			attrs_bundle->attr_present);
 }
 
 #define IS_UVERBS_COPY_ERR(_ret)		((_ret) && (_ret) != -ENOENT)
@@ -389,12 +456,10 @@ static inline bool uverbs_attr_is_valid(const struct uverbs_attr_bundle *attrs_b
 static inline const struct uverbs_attr *uverbs_attr_get(const struct uverbs_attr_bundle *attrs_bundle,
 							u16 idx)
 {
-	u16 idx_bucket = idx >>	UVERBS_ID_NS_SHIFT;
-
 	if (!uverbs_attr_is_valid(attrs_bundle, idx))
 		return ERR_PTR(-ENOENT);
 
-	return &attrs_bundle->hash[idx_bucket].attrs[idx & ~UVERBS_ID_NS_MASK];
+	return &attrs_bundle->attrs[uapi_bkey_attr(uapi_key_attr(idx))];
 }
 
 static inline int uverbs_attr_get_enum_id(const struct uverbs_attr_bundle *attrs_bundle,
@@ -431,25 +496,15 @@ static inline struct ib_uobject *uverbs_attr_get_uobject(const struct uverbs_att
 	return attr->obj_attr.uobject;
 }
 
-static inline int uverbs_copy_to(const struct uverbs_attr_bundle *attrs_bundle,
-				 size_t idx, const void *from, size_t size)
+static inline int
+uverbs_attr_get_len(const struct uverbs_attr_bundle *attrs_bundle, u16 idx)
 {
 	const struct uverbs_attr *attr = uverbs_attr_get(attrs_bundle, idx);
-	u16 flags;
-	size_t min_size;
 
 	if (IS_ERR(attr))
 		return PTR_ERR(attr);
 
-	min_size = min_t(size_t, attr->ptr_attr.len, size);
-	if (copy_to_user(u64_to_user_ptr(attr->ptr_attr.data), from, min_size))
-		return -EFAULT;
-
-	flags = attr->ptr_attr.flags | UVERBS_ATTR_F_VALID_OUTPUT;
-	if (put_user(flags, &attr->uattr->flags))
-		return -EFAULT;
-
-	return 0;
+	return attr->ptr_attr.len;
 }
 
 static inline bool uverbs_attr_ptr_is_inline(const struct uverbs_attr *attr)
@@ -457,6 +512,18 @@ static inline bool uverbs_attr_ptr_is_inline(const struct uverbs_attr *attr)
 	return attr->ptr_attr.len <= sizeof(attr->ptr_attr.data);
 }
 
+static inline void *uverbs_attr_get_alloced_ptr(
+	const struct uverbs_attr_bundle *attrs_bundle, u16 idx)
+{
+	const struct uverbs_attr *attr = uverbs_attr_get(attrs_bundle, idx);
+
+	if (IS_ERR(attr))
+		return (void *)attr;
+
+	return uverbs_attr_ptr_is_inline(attr) ? (void *)&attr->ptr_attr.data :
+						 attr->ptr_attr.ptr;
+}
+
 static inline int _uverbs_copy_from(void *to,
 				    const struct uverbs_attr_bundle *attrs_bundle,
 				    size_t idx,
@@ -515,54 +582,54 @@ static inline int _uverbs_copy_from_or_zero(void *to,
 #define uverbs_copy_from_or_zero(to, attrs_bundle, idx)			      \
 	_uverbs_copy_from_or_zero(to, attrs_bundle, idx, sizeof(*to))
 
-/* =================================================
- *	 Definitions -> Specs infrastructure
- * =================================================
- */
-
-/*
- * uverbs_alloc_spec_tree - Merges different common and driver specific feature
- *	into one parsing tree that every uverbs command will be parsed upon.
- *
- * @num_trees: Number of trees in the array @trees.
- * @trees: Array of pointers to tree root definitions to merge. Each such tree
- *	   possibly contains objects, methods and attributes definitions.
- *
- * Returns:
- *	uverbs_root_spec *: The root of the merged parsing tree.
- *	On error, we return an error code. Error is checked via IS_ERR.
- *
- * The following merges could take place:
- * a. Two trees representing the same method with different handler
- *	-> We take the handler of the tree that its handler != NULL
- *	   and its index in the trees array is greater. The incentive for that
- *	   is that developers are expected to first merge common trees and then
- *	   merge trees that gives specialized the behaviour.
- * b. Two trees representing the same object with different
- *    type_attrs (struct uverbs_obj_type):
- *	-> We take the type_attrs of the tree that its type_attr != NULL
- *	   and its index in the trees array is greater. This could be used
- *	   in order to override the free function, allocation size, etc.
- * c. Two trees representing the same method attribute (same id but possibly
- *    different attributes):
- *	-> ERROR (-ENOENT), we believe that's not the programmer's intent.
- *
- * An object without any methods is considered invalid and will abort the
- * function with -ENOENT error.
- */
 #if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
-struct uverbs_root_spec *uverbs_alloc_spec_tree(unsigned int num_trees,
-						const struct uverbs_object_tree_def **trees);
-void uverbs_free_spec_tree(struct uverbs_root_spec *root);
-#else
-static inline struct uverbs_root_spec *uverbs_alloc_spec_tree(unsigned int num_trees,
-							      const struct uverbs_object_tree_def **trees)
+int uverbs_get_flags64(u64 *to, const struct uverbs_attr_bundle *attrs_bundle,
+		       size_t idx, u64 allowed_bits);
+int uverbs_get_flags32(u32 *to, const struct uverbs_attr_bundle *attrs_bundle,
+		       size_t idx, u64 allowed_bits);
+int uverbs_copy_to(const struct uverbs_attr_bundle *attrs_bundle, size_t idx,
+		   const void *from, size_t size);
+__malloc void *_uverbs_alloc(struct uverbs_attr_bundle *bundle, size_t size,
+			     gfp_t flags);
+
+static inline __malloc void *uverbs_alloc(struct uverbs_attr_bundle *bundle,
+					  size_t size)
 {
-	return NULL;
+	return _uverbs_alloc(bundle, size, GFP_KERNEL);
 }
 
-static inline void uverbs_free_spec_tree(struct uverbs_root_spec *root)
+static inline __malloc void *uverbs_zalloc(struct uverbs_attr_bundle *bundle,
+					   size_t size)
 {
+	return _uverbs_alloc(bundle, size, GFP_KERNEL | __GFP_ZERO);
+}
+#else
+static inline int
+uverbs_get_flags64(u64 *to, const struct uverbs_attr_bundle *attrs_bundle,
+		   size_t idx, u64 allowed_bits)
+{
+	return -EINVAL;
+}
+static inline int
+uverbs_get_flags32(u32 *to, const struct uverbs_attr_bundle *attrs_bundle,
+		   size_t idx, u64 allowed_bits)
+{
+	return -EINVAL;
+}
+static inline int uverbs_copy_to(const struct uverbs_attr_bundle *attrs_bundle,
+				 size_t idx, const void *from, size_t size)
+{
+	return -EINVAL;
+}
+static inline __malloc void *uverbs_alloc(struct uverbs_attr_bundle *bundle,
+					  size_t size)
+{
+	return ERR_PTR(-EINVAL);
+}
+static inline __malloc void *uverbs_zalloc(struct uverbs_attr_bundle *bundle,
+					   size_t size)
+{
+	return ERR_PTR(-EINVAL);
 }
 #endif
 
diff --git a/include/rdma/uverbs_named_ioctl.h b/include/rdma/uverbs_named_ioctl.h
index c5bb4eb..b3b2173 100644
--- a/include/rdma/uverbs_named_ioctl.h
+++ b/include/rdma/uverbs_named_ioctl.h
@@ -43,48 +43,89 @@
 #define _UVERBS_NAME(x, y)	_UVERBS_PASTE(x, y)
 #define UVERBS_METHOD(id)	_UVERBS_NAME(UVERBS_MODULE_NAME, _method_##id)
 #define UVERBS_HANDLER(id)	_UVERBS_NAME(UVERBS_MODULE_NAME, _handler_##id)
+#define UVERBS_OBJECT(id)	_UVERBS_NAME(UVERBS_MOUDLE_NAME, _object_##id)
 
-#define DECLARE_UVERBS_NAMED_METHOD(id, ...)	\
-	DECLARE_UVERBS_METHOD(UVERBS_METHOD(id), id, UVERBS_HANDLER(id), ##__VA_ARGS__)
+/* These are static so they do not need to be qualified */
+#define UVERBS_METHOD_ATTRS(method_id) _method_attrs_##method_id
+#define UVERBS_OBJECT_METHODS(object_id) _object_methods_##object_id
 
-#define DECLARE_UVERBS_NAMED_METHOD_WITH_HANDLER(id, handler, ...)	\
-	DECLARE_UVERBS_METHOD(UVERBS_METHOD(id), id, handler, ##__VA_ARGS__)
+#define DECLARE_UVERBS_NAMED_METHOD(_method_id, ...)                           \
+	static const struct uverbs_attr_def *const UVERBS_METHOD_ATTRS(        \
+		_method_id)[] = { __VA_ARGS__ };                               \
+	static const struct uverbs_method_def UVERBS_METHOD(_method_id) = {    \
+		.id = _method_id,                                              \
+		.handler = UVERBS_HANDLER(_method_id),                         \
+		.num_attrs = ARRAY_SIZE(UVERBS_METHOD_ATTRS(_method_id)),      \
+		.attrs = &UVERBS_METHOD_ATTRS(_method_id),                     \
+	}
 
-#define DECLARE_UVERBS_NAMED_METHOD_NO_OVERRIDE(id, handler, ...)	\
-	DECLARE_UVERBS_METHOD(UVERBS_METHOD(id), id, NULL, ##__VA_ARGS__)
-
-#define DECLARE_UVERBS_NAMED_OBJECT(id, ...)	\
-	DECLARE_UVERBS_OBJECT(UVERBS_OBJECT(id), id, ##__VA_ARGS__)
-
-#define _UVERBS_COMP_NAME(x, y, z) _UVERBS_NAME(_UVERBS_NAME(x, y), z)
-
-#define UVERBS_NO_OVERRIDE	NULL
-
-/* This declares a parsing tree with one object and one method. This is usually
- * used for merging driver attributes to the common attributes. The driver has
- * a chance to override the handler and type attrs of the original object.
- * The __VA_ARGS__ just contains a list of attributes.
+/* Create a standard destroy method using the default handler. The handle_attr
+ * argument must be the attribute specifying the handle to destroy, the
+ * default handler does not support any other attributes.
  */
-#define ADD_UVERBS_ATTRIBUTES(_name, _object, _method, _type_attrs, _handler, ...) \
-static DECLARE_UVERBS_METHOD(_UVERBS_COMP_NAME(UVERBS_MODULE_NAME,	     \
-					       _method_, _name),	     \
-			     _method, _handler, ##__VA_ARGS__);		     \
-									     \
-static DECLARE_UVERBS_OBJECT(_UVERBS_COMP_NAME(UVERBS_MODULE_NAME,	     \
-					       _object_, _name),	     \
-			     _object, _type_attrs,			     \
-			     &_UVERBS_COMP_NAME(UVERBS_MODULE_NAME,	     \
-					       _method_, _name));	     \
-									     \
-static DECLARE_UVERBS_OBJECT_TREE(_name,				     \
-				  &_UVERBS_COMP_NAME(UVERBS_MODULE_NAME,     \
-						     _object_, _name))
+#define DECLARE_UVERBS_NAMED_METHOD_DESTROY(_method_id, _handle_attr)          \
+	static const struct uverbs_attr_def *const UVERBS_METHOD_ATTRS(        \
+		_method_id)[] = { _handle_attr };                              \
+	static const struct uverbs_method_def UVERBS_METHOD(_method_id) = {    \
+		.id = _method_id,                                              \
+		.handler = uverbs_destroy_def_handler,                         \
+		.num_attrs = ARRAY_SIZE(UVERBS_METHOD_ATTRS(_method_id)),      \
+		.attrs = &UVERBS_METHOD_ATTRS(_method_id),                     \
+	}
 
-/* A very common use case is that the driver doesn't override the handler and
- * type_attrs. Therefore, we provide a simplified macro for this common case.
+#define DECLARE_UVERBS_NAMED_OBJECT(_object_id, _type_attrs, ...)              \
+	static const struct uverbs_method_def *const UVERBS_OBJECT_METHODS(    \
+		_object_id)[] = { __VA_ARGS__ };                               \
+	const struct uverbs_object_def UVERBS_OBJECT(_object_id) = {           \
+		.id = _object_id,                                              \
+		.type_attrs = &_type_attrs,                                    \
+		.num_methods = ARRAY_SIZE(UVERBS_OBJECT_METHODS(_object_id)),  \
+		.methods = &UVERBS_OBJECT_METHODS(_object_id)                  \
+	}
+
+/*
+ * Declare global methods. These still have a unique object_id because we
+ * identify all uapi methods with a (object,method) tuple. However, they have
+ * no type pointer.
  */
-#define ADD_UVERBS_ATTRIBUTES_SIMPLE(_name, _object, _method, ...)	     \
-	ADD_UVERBS_ATTRIBUTES(_name, _object, _method, UVERBS_NO_OVERRIDE,   \
-			      UVERBS_NO_OVERRIDE, ##__VA_ARGS__)
+#define DECLARE_UVERBS_GLOBAL_METHODS(_object_id, ...)	\
+	static const struct uverbs_method_def *const UVERBS_OBJECT_METHODS(    \
+		_object_id)[] = { __VA_ARGS__ };                               \
+	const struct uverbs_object_def UVERBS_OBJECT(_object_id) = {           \
+		.id = _object_id,                                              \
+		.num_methods = ARRAY_SIZE(UVERBS_OBJECT_METHODS(_object_id)),  \
+		.methods = &UVERBS_OBJECT_METHODS(_object_id)                  \
+	}
+
+/* Used by drivers to declare a complete parsing tree for new methods
+ */
+#define ADD_UVERBS_METHODS(_name, _object_id, ...)                             \
+	static const struct uverbs_method_def *const UVERBS_OBJECT_METHODS(    \
+		_object_id)[] = { __VA_ARGS__ };                               \
+	static const struct uverbs_object_def _name##_struct = {               \
+		.id = _object_id,                                              \
+		.num_methods = ARRAY_SIZE(UVERBS_OBJECT_METHODS(_object_id)),  \
+		.methods = &UVERBS_OBJECT_METHODS(_object_id)                  \
+	};                                                                     \
+	static const struct uverbs_object_def *const _name##_ptrs[] = {        \
+		&_name##_struct,                                               \
+	};                                                                     \
+	static const struct uverbs_object_tree_def _name = {                   \
+		.num_objects = 1,                                              \
+		.objects = &_name##_ptrs,                                      \
+	}
+
+/* Used by drivers to declare a complete parsing tree for a single method that
+ * differs only in having additional driver specific attributes.
+ */
+#define ADD_UVERBS_ATTRIBUTES_SIMPLE(_name, _object_id, _method_id, ...)       \
+	static const struct uverbs_attr_def *const UVERBS_METHOD_ATTRS(        \
+		_method_id)[] = { __VA_ARGS__ };                               \
+	static const struct uverbs_method_def UVERBS_METHOD(_method_id) = {    \
+		.id = _method_id,                                              \
+		.num_attrs = ARRAY_SIZE(UVERBS_METHOD_ATTRS(_method_id)),      \
+		.attrs = &UVERBS_METHOD_ATTRS(_method_id),                     \
+	};                                                                     \
+	ADD_UVERBS_METHODS(_name, _object_id, &UVERBS_METHOD(_method_id))
 
 #endif
diff --git a/include/rdma/uverbs_std_types.h b/include/rdma/uverbs_std_types.h
index 9d56cdb..3b00231 100644
--- a/include/rdma/uverbs_std_types.h
+++ b/include/rdma/uverbs_std_types.h
@@ -37,8 +37,6 @@
 #include <rdma/uverbs_ioctl.h>
 #include <rdma/ib_user_ioctl_verbs.h>
 
-#define UVERBS_OBJECT(id)	uverbs_object_##id
-
 #if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
 const struct uverbs_object_tree_def *uverbs_default_get_objects(void);
 #else
@@ -48,34 +46,61 @@ static inline const struct uverbs_object_tree_def *uverbs_default_get_objects(vo
 }
 #endif
 
-static inline struct ib_uobject *__uobj_get(const struct uverbs_obj_type *type,
-					    bool write,
-					    struct ib_ucontext *ucontext,
-					    int id)
+/* Returns _id, or causes a compile error if _id is not a u32.
+ *
+ * The uobj APIs should only be used with the write based uAPI to access
+ * object IDs. The write API must use a u32 for the object handle, which is
+ * checked by this macro.
+ */
+#define _uobj_check_id(_id) ((_id) * typecheck(u32, _id))
+
+#define uobj_get_type(_ufile, _object)                                         \
+	uapi_get_object((_ufile)->device->uapi, _object)
+
+#define uobj_get_read(_type, _id, _ufile)                                      \
+	rdma_lookup_get_uobject(uobj_get_type(_ufile, _type), _ufile,          \
+				_uobj_check_id(_id), UVERBS_LOOKUP_READ)
+
+#define ufd_get_read(_type, _fdnum, _ufile)                                    \
+	rdma_lookup_get_uobject(uobj_get_type(_ufile, _type), _ufile,          \
+				(_fdnum)*typecheck(s32, _fdnum),               \
+				UVERBS_LOOKUP_READ)
+
+static inline void *_uobj_get_obj_read(struct ib_uobject *uobj)
 {
-	return rdma_lookup_get_uobject(type, ucontext, id, write);
+	if (IS_ERR(uobj))
+		return NULL;
+	return uobj->object;
 }
+#define uobj_get_obj_read(_object, _type, _id, _ufile)                         \
+	((struct ib_##_object *)_uobj_get_obj_read(                            \
+		uobj_get_read(_type, _id, _ufile)))
 
-#define uobj_get_type(_object) UVERBS_OBJECT(_object).type_attrs
+#define uobj_get_write(_type, _id, _ufile)                                     \
+	rdma_lookup_get_uobject(uobj_get_type(_ufile, _type), _ufile,          \
+				_uobj_check_id(_id), UVERBS_LOOKUP_WRITE)
 
-#define uobj_get_read(_type, _id, _ucontext)				\
-	 __uobj_get(uobj_get_type(_type), false, _ucontext, _id)
+int __uobj_perform_destroy(const struct uverbs_api_object *obj, u32 id,
+			   struct ib_uverbs_file *ufile, int success_res);
+#define uobj_perform_destroy(_type, _id, _ufile, _success_res)                 \
+	__uobj_perform_destroy(uobj_get_type(_ufile, _type),                   \
+			       _uobj_check_id(_id), _ufile, _success_res)
 
-#define uobj_get_obj_read(_object, _type, _id, _ucontext)		\
-({									\
-	struct ib_uobject *__uobj =					\
-		__uobj_get(uobj_get_type(_type),			\
-			   false, _ucontext, _id);			\
-									\
-	(struct ib_##_object *)(IS_ERR(__uobj) ? NULL : __uobj->object);\
-})
+struct ib_uobject *__uobj_get_destroy(const struct uverbs_api_object *obj,
+				      u32 id, struct ib_uverbs_file *ufile);
 
-#define uobj_get_write(_type, _id, _ucontext)				\
-	 __uobj_get(uobj_get_type(_type), true, _ucontext, _id)
+#define uobj_get_destroy(_type, _id, _ufile)                                   \
+	__uobj_get_destroy(uobj_get_type(_ufile, _type), _uobj_check_id(_id),  \
+			   _ufile)
+
+static inline void uobj_put_destroy(struct ib_uobject *uobj)
+{
+	rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_WRITE);
+}
 
 static inline void uobj_put_read(struct ib_uobject *uobj)
 {
-	rdma_lookup_put_uobject(uobj, false);
+	rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_READ);
 }
 
 #define uobj_put_obj_read(_obj)					\
@@ -83,17 +108,17 @@ static inline void uobj_put_read(struct ib_uobject *uobj)
 
 static inline void uobj_put_write(struct ib_uobject *uobj)
 {
-	rdma_lookup_put_uobject(uobj, true);
+	rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_WRITE);
 }
 
-static inline int __must_check uobj_remove_commit(struct ib_uobject *uobj)
+static inline int __must_check uobj_alloc_commit(struct ib_uobject *uobj,
+						 int success_res)
 {
-	return rdma_remove_commit_uobject(uobj);
-}
+	int ret = rdma_alloc_commit_uobject(uobj);
 
-static inline void uobj_alloc_commit(struct ib_uobject *uobj)
-{
-	rdma_alloc_commit_uobject(uobj);
+	if (ret)
+		return ret;
+	return success_res;
 }
 
 static inline void uobj_alloc_abort(struct ib_uobject *uobj)
@@ -101,14 +126,19 @@ static inline void uobj_alloc_abort(struct ib_uobject *uobj)
 	rdma_alloc_abort_uobject(uobj);
 }
 
-static inline struct ib_uobject *__uobj_alloc(const struct uverbs_obj_type *type,
-					      struct ib_ucontext *ucontext)
+static inline struct ib_uobject *
+__uobj_alloc(const struct uverbs_api_object *obj, struct ib_uverbs_file *ufile,
+	     struct ib_device **ib_dev)
 {
-	return rdma_alloc_begin_uobject(type, ucontext);
+	struct ib_uobject *uobj = rdma_alloc_begin_uobject(obj, ufile);
+
+	if (!IS_ERR(uobj))
+		*ib_dev = uobj->context->device;
+	return uobj;
 }
 
-#define uobj_alloc(_type, ucontext)	\
-	__uobj_alloc(uobj_get_type(_type), ucontext)
+#define uobj_alloc(_type, _ufile, _ib_dev)                                     \
+	__uobj_alloc(uobj_get_type(_ufile, _type), _ufile, _ib_dev)
 
 #endif
 
diff --git a/include/rdma/uverbs_types.h b/include/rdma/uverbs_types.h
index cc04ec6..acb1bfa 100644
--- a/include/rdma/uverbs_types.h
+++ b/include/rdma/uverbs_types.h
@@ -37,63 +37,72 @@
 #include <rdma/ib_verbs.h>
 
 struct uverbs_obj_type;
+struct uverbs_api_object;
 
-struct uverbs_obj_type_class {
+enum rdma_lookup_mode {
+	UVERBS_LOOKUP_READ,
+	UVERBS_LOOKUP_WRITE,
 	/*
-	 * Get an ib_uobject that corresponds to the given id from ucontext,
-	 * These functions could create or destroy objects if required.
-	 * The action will be finalized only when commit, abort or put fops are
-	 * called.
-	 * The flow of the different actions is:
-	 * [alloc]:	 Starts with alloc_begin. The handlers logic is than
-	 *		 executed. If the handler is successful, alloc_commit
-	 *		 is called and the object is inserted to the repository.
-	 *		 Once alloc_commit completes the object is visible to
-	 *		 other threads and userspace.
-	 e		 Otherwise, alloc_abort is called and the object is
-	 *		 destroyed.
-	 * [lookup]:	 Starts with lookup_get which fetches and locks the
-	 *		 object. After the handler finished using the object, it
-	 *		 needs to call lookup_put to unlock it. The exclusive
-	 *		 flag indicates if the object is locked for exclusive
-	 *		 access.
-	 * [remove]:	 Starts with lookup_get with exclusive flag set. This
-	 *		 locks the object for exclusive access. If the handler
-	 *		 code completed successfully, remove_commit is called
-	 *		 and the ib_uobject is removed from the context's
-	 *		 uobjects repository and put. The object itself is
-	 *		 destroyed as well. Once remove succeeds new krefs to
-	 *		 the object cannot be acquired by other threads or
-	 *		 userspace and the hardware driver is removed from the
-	 *		 object. Other krefs on the object may still exist.
-	 *		 If the handler code failed, lookup_put should be
-	 *		 called. This callback is used when the context
-	 *		 is destroyed as well (process termination,
-	 *		 reset flow).
+	 * Destroy is like LOOKUP_WRITE, except that the uobject is not
+	 * locked.  uobj_destroy is used to convert a LOOKUP_DESTROY lock into
+	 * a LOOKUP_WRITE lock.
 	 */
-	struct ib_uobject *(*alloc_begin)(const struct uverbs_obj_type *type,
-					  struct ib_ucontext *ucontext);
-	void (*alloc_commit)(struct ib_uobject *uobj);
+	UVERBS_LOOKUP_DESTROY,
+};
+
+/*
+ * The following sequences are valid:
+ * Success flow:
+ *   alloc_begin
+ *   alloc_commit
+ *    [..]
+ * Access flow:
+ *   lookup_get(exclusive=false) & uverbs_try_lock_object
+ *   lookup_put(exclusive=false) via rdma_lookup_put_uobject
+ * Destruction flow:
+ *   lookup_get(exclusive=true) & uverbs_try_lock_object
+ *   remove_commit
+ *   remove_handle (optional)
+ *   lookup_put(exclusive=true) via rdma_lookup_put_uobject
+ *
+ * Allocate Error flow #1
+ *   alloc_begin
+ *   alloc_abort
+ * Allocate Error flow #2
+ *   alloc_begin
+ *   remove_commit
+ *   alloc_abort
+ * Allocate Error flow #3
+ *   alloc_begin
+ *   alloc_commit (fails)
+ *   remove_commit
+ *   alloc_abort
+ *
+ * In all cases the caller must hold the ufile kref until alloc_commit or
+ * alloc_abort returns.
+ */
+struct uverbs_obj_type_class {
+	struct ib_uobject *(*alloc_begin)(const struct uverbs_api_object *obj,
+					  struct ib_uverbs_file *ufile);
+	/* This consumes the kref on uobj */
+	int (*alloc_commit)(struct ib_uobject *uobj);
+	/* This does not consume the kref on uobj */
 	void (*alloc_abort)(struct ib_uobject *uobj);
 
-	struct ib_uobject *(*lookup_get)(const struct uverbs_obj_type *type,
-					 struct ib_ucontext *ucontext, int id,
-					 bool exclusive);
-	void (*lookup_put)(struct ib_uobject *uobj, bool exclusive);
-	/*
-	 * Must be called with the exclusive lock held. If successful uobj is
-	 * invalid on return. On failure uobject is left completely
-	 * unchanged
-	 */
-	int __must_check (*remove_commit)(struct ib_uobject *uobj,
-					  enum rdma_remove_reason why);
+	struct ib_uobject *(*lookup_get)(const struct uverbs_api_object *obj,
+					 struct ib_uverbs_file *ufile, s64 id,
+					 enum rdma_lookup_mode mode);
+	void (*lookup_put)(struct ib_uobject *uobj, enum rdma_lookup_mode mode);
+	/* This does not consume the kref on uobj */
+	int __must_check (*destroy_hw)(struct ib_uobject *uobj,
+				       enum rdma_remove_reason why);
+	void (*remove_handle)(struct ib_uobject *uobj);
 	u8    needs_kfree_rcu;
 };
 
 struct uverbs_obj_type {
 	const struct uverbs_obj_type_class * const type_class;
 	size_t	     obj_size;
-	unsigned int destroy_order;
 };
 
 /*
@@ -120,16 +129,15 @@ struct uverbs_obj_idr_type {
 					   enum rdma_remove_reason why);
 };
 
-struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_obj_type *type,
-					   struct ib_ucontext *ucontext,
-					   int id, bool exclusive);
-void rdma_lookup_put_uobject(struct ib_uobject *uobj, bool exclusive);
-struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_obj_type *type,
-					    struct ib_ucontext *ucontext);
+struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_api_object *obj,
+					   struct ib_uverbs_file *ufile, s64 id,
+					   enum rdma_lookup_mode mode);
+void rdma_lookup_put_uobject(struct ib_uobject *uobj,
+			     enum rdma_lookup_mode mode);
+struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_api_object *obj,
+					    struct ib_uverbs_file *ufile);
 void rdma_alloc_abort_uobject(struct ib_uobject *uobj);
-int __must_check rdma_remove_commit_uobject(struct ib_uobject *uobj);
-int rdma_alloc_commit_uobject(struct ib_uobject *uobj);
-int rdma_explicit_destroy(struct ib_uobject *uobject);
+int __must_check rdma_alloc_commit_uobject(struct ib_uobject *uobj);
 
 struct uverbs_obj_fd_type {
 	/*
@@ -140,7 +148,7 @@ struct uverbs_obj_fd_type {
 	 * the driver is removed or the process terminated.
 	 */
 	struct uverbs_obj_type  type;
-	int (*context_closed)(struct ib_uobject_file *uobj_file,
+	int (*context_closed)(struct ib_uobject *uobj,
 			      enum rdma_remove_reason why);
 	const struct file_operations	*fops;
 	const char			*name;
@@ -152,30 +160,29 @@ extern const struct uverbs_obj_type_class uverbs_fd_class;
 
 #define UVERBS_BUILD_BUG_ON(cond) (sizeof(char[1 - 2 * !!(cond)]) -	\
 				   sizeof(char))
-#define UVERBS_TYPE_ALLOC_FD(_order, _obj_size, _context_closed, _fops, _name, _flags)\
+#define UVERBS_TYPE_ALLOC_FD(_obj_size, _context_closed, _fops, _name, _flags)\
 	((&((const struct uverbs_obj_fd_type)				\
 	 {.type = {							\
-		.destroy_order = _order,				\
 		.type_class = &uverbs_fd_class,				\
 		.obj_size = (_obj_size) +				\
-			UVERBS_BUILD_BUG_ON((_obj_size) < sizeof(struct ib_uobject_file)), \
+			UVERBS_BUILD_BUG_ON((_obj_size) <               \
+					    sizeof(struct ib_uobject)), \
 	 },								\
 	 .context_closed = _context_closed,				\
 	 .fops = _fops,							\
 	 .name = _name,							\
 	 .flags = _flags}))->type)
-#define UVERBS_TYPE_ALLOC_IDR_SZ(_size, _order, _destroy_object)	\
+#define UVERBS_TYPE_ALLOC_IDR_SZ(_size, _destroy_object)	\
 	((&((const struct uverbs_obj_idr_type)				\
 	 {.type = {							\
-		.destroy_order = _order,				\
 		.type_class = &uverbs_idr_class,			\
 		.obj_size = (_size) +					\
 			UVERBS_BUILD_BUG_ON((_size) <			\
 					    sizeof(struct ib_uobject))	\
 	 },								\
 	 .destroy_object = _destroy_object,}))->type)
-#define UVERBS_TYPE_ALLOC_IDR(_order, _destroy_object)			\
-	 UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uobject), _order,	\
+#define UVERBS_TYPE_ALLOC_IDR(_destroy_object)			\
+	 UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uobject),	\
 				  _destroy_object)
 
 #endif
diff --git a/include/uapi/rdma/cxgb4-abi.h b/include/uapi/rdma/cxgb4-abi.h
index a159ba8..f85ec1a3 100644
--- a/include/uapi/rdma/cxgb4-abi.h
+++ b/include/uapi/rdma/cxgb4-abi.h
@@ -44,6 +44,16 @@
  * In particular do not use pointer types -- pass pointers in __aligned_u64
  * instead.
  */
+
+enum {
+	C4IW_64B_CQE = (1 << 0)
+};
+
+struct c4iw_create_cq {
+	__u32 flags;
+	__u32 reserved;
+};
+
 struct c4iw_create_cq_resp {
 	__aligned_u64 key;
 	__aligned_u64 gts_key;
@@ -51,11 +61,12 @@ struct c4iw_create_cq_resp {
 	__u32 cqid;
 	__u32 size;
 	__u32 qid_mask;
-	__u32 reserved; /* explicit padding (optional for i386) */
+	__u32 flags;
 };
 
 enum {
-	C4IW_QPF_ONCHIP = (1 << 0)
+	C4IW_QPF_ONCHIP	= (1 << 0),
+	C4IW_QPF_WRITE_W_IMM = (1 << 1)
 };
 
 struct c4iw_create_qp_resp {
@@ -74,6 +85,23 @@ struct c4iw_create_qp_resp {
 	__u32 flags;
 };
 
+struct c4iw_create_srq_resp {
+	__aligned_u64 srq_key;
+	__aligned_u64 srq_db_gts_key;
+	__aligned_u64 srq_memsize;
+	__u32 srqid;
+	__u32 srq_size;
+	__u32 rqt_abs_idx;
+	__u32 qid_mask;
+	__u32 flags;
+	__u32 reserved; /* explicit padding */
+};
+
+enum {
+	/* HW supports SRQ_LIMIT_REACHED event */
+	T4_SRQ_LIMIT_SUPPORT = 1 << 0,
+};
+
 struct c4iw_alloc_ucontext_resp {
 	__aligned_u64 status_page_key;
 	__u32 status_page_size;
diff --git a/include/uapi/rdma/hns-abi.h b/include/uapi/rdma/hns-abi.h
index 78613b6..c1f8773 100644
--- a/include/uapi/rdma/hns-abi.h
+++ b/include/uapi/rdma/hns-abi.h
@@ -53,6 +53,7 @@ struct hns_roce_ib_create_qp {
 	__u8    log_sq_stride;
 	__u8    sq_no_prefetch;
 	__u8    reserved[5];
+	__aligned_u64 sdb_addr;
 };
 
 struct hns_roce_ib_create_qp_resp {
diff --git a/include/uapi/rdma/ib_user_ioctl_cmds.h b/include/uapi/rdma/ib_user_ioctl_cmds.h
index 888ac59..2c881aa 100644
--- a/include/uapi/rdma/ib_user_ioctl_cmds.h
+++ b/include/uapi/rdma/ib_user_ioctl_cmds.h
@@ -79,7 +79,7 @@ enum uverbs_attrs_destroy_cq_cmd_attr_ids {
 };
 
 enum uverbs_attrs_create_flow_action_esp {
-	UVERBS_ATTR_FLOW_ACTION_ESP_HANDLE,
+	UVERBS_ATTR_CREATE_FLOW_ACTION_ESP_HANDLE,
 	UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS,
 	UVERBS_ATTR_FLOW_ACTION_ESP_ESN,
 	UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT,
@@ -87,6 +87,11 @@ enum uverbs_attrs_create_flow_action_esp {
 	UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP,
 };
 
+enum uverbs_attrs_modify_flow_action_esp {
+	UVERBS_ATTR_MODIFY_FLOW_ACTION_ESP_HANDLE =
+		UVERBS_ATTR_CREATE_FLOW_ACTION_ESP_HANDLE,
+};
+
 enum uverbs_attrs_destroy_flow_action_esp {
 	UVERBS_ATTR_DESTROY_FLOW_ACTION_HANDLE,
 };
diff --git a/include/uapi/rdma/ib_user_ioctl_verbs.h b/include/uapi/rdma/ib_user_ioctl_verbs.h
index 625545d..6cdf192 100644
--- a/include/uapi/rdma/ib_user_ioctl_verbs.h
+++ b/include/uapi/rdma/ib_user_ioctl_verbs.h
@@ -40,6 +40,59 @@
 #define RDMA_UAPI_PTR(_type, _name)	__aligned_u64 _name
 #endif
 
+enum ib_uverbs_access_flags {
+	IB_UVERBS_ACCESS_LOCAL_WRITE = 1 << 0,
+	IB_UVERBS_ACCESS_REMOTE_WRITE = 1 << 1,
+	IB_UVERBS_ACCESS_REMOTE_READ = 1 << 2,
+	IB_UVERBS_ACCESS_REMOTE_ATOMIC = 1 << 3,
+	IB_UVERBS_ACCESS_MW_BIND = 1 << 4,
+	IB_UVERBS_ACCESS_ZERO_BASED = 1 << 5,
+	IB_UVERBS_ACCESS_ON_DEMAND = 1 << 6,
+	IB_UVERBS_ACCESS_HUGETLB = 1 << 7,
+};
+
+enum ib_uverbs_query_port_cap_flags {
+	IB_UVERBS_PCF_SM = 1 << 1,
+	IB_UVERBS_PCF_NOTICE_SUP = 1 << 2,
+	IB_UVERBS_PCF_TRAP_SUP = 1 << 3,
+	IB_UVERBS_PCF_OPT_IPD_SUP = 1 << 4,
+	IB_UVERBS_PCF_AUTO_MIGR_SUP = 1 << 5,
+	IB_UVERBS_PCF_SL_MAP_SUP = 1 << 6,
+	IB_UVERBS_PCF_MKEY_NVRAM = 1 << 7,
+	IB_UVERBS_PCF_PKEY_NVRAM = 1 << 8,
+	IB_UVERBS_PCF_LED_INFO_SUP = 1 << 9,
+	IB_UVERBS_PCF_SM_DISABLED = 1 << 10,
+	IB_UVERBS_PCF_SYS_IMAGE_GUID_SUP = 1 << 11,
+	IB_UVERBS_PCF_PKEY_SW_EXT_PORT_TRAP_SUP = 1 << 12,
+	IB_UVERBS_PCF_EXTENDED_SPEEDS_SUP = 1 << 14,
+	IB_UVERBS_PCF_CM_SUP = 1 << 16,
+	IB_UVERBS_PCF_SNMP_TUNNEL_SUP = 1 << 17,
+	IB_UVERBS_PCF_REINIT_SUP = 1 << 18,
+	IB_UVERBS_PCF_DEVICE_MGMT_SUP = 1 << 19,
+	IB_UVERBS_PCF_VENDOR_CLASS_SUP = 1 << 20,
+	IB_UVERBS_PCF_DR_NOTICE_SUP = 1 << 21,
+	IB_UVERBS_PCF_CAP_MASK_NOTICE_SUP = 1 << 22,
+	IB_UVERBS_PCF_BOOT_MGMT_SUP = 1 << 23,
+	IB_UVERBS_PCF_LINK_LATENCY_SUP = 1 << 24,
+	IB_UVERBS_PCF_CLIENT_REG_SUP = 1 << 25,
+	/*
+	 * IsOtherLocalChangesNoticeSupported is aliased by IP_BASED_GIDS and
+	 * is inaccessible
+	 */
+	IB_UVERBS_PCF_LINK_SPEED_WIDTH_TABLE_SUP = 1 << 27,
+	IB_UVERBS_PCF_VENDOR_SPECIFIC_MADS_TABLE_SUP = 1 << 28,
+	IB_UVERBS_PCF_MCAST_PKEY_TRAP_SUPPRESSION_SUP = 1 << 29,
+	IB_UVERBS_PCF_MCAST_FDB_TOP_SUP = 1 << 30,
+	IB_UVERBS_PCF_HIERARCHY_INFO_SUP = 1ULL << 31,
+
+	/* NOTE this is an internal flag, not an IBA flag */
+	IB_UVERBS_PCF_IP_BASED_GIDS = 1 << 26,
+};
+
+enum ib_uverbs_query_port_flags {
+	IB_UVERBS_QPF_GRH_REQUIRED = 1 << 0,
+};
+
 enum ib_uverbs_flow_action_esp_keymat {
 	IB_UVERBS_FLOW_ACTION_ESP_KEYMAT_AES_GCM,
 };
@@ -99,4 +152,9 @@ struct ib_uverbs_flow_action_esp {
 	__aligned_u64	hard_limit_pkts;
 };
 
+enum ib_uverbs_read_counters_flags {
+	/* prefer read values from driver cache */
+	IB_UVERBS_READ_COUNTERS_PREFER_CACHED = 1 << 0,
+};
+
 #endif
diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h
index 4f9991d..25a1676 100644
--- a/include/uapi/rdma/ib_user_verbs.h
+++ b/include/uapi/rdma/ib_user_verbs.h
@@ -279,7 +279,7 @@ struct ib_uverbs_query_port {
 };
 
 struct ib_uverbs_query_port_resp {
-	__u32 port_cap_flags;
+	__u32 port_cap_flags;		/* see ib_uverbs_query_port_cap_flags */
 	__u32 max_msg_sz;
 	__u32 bad_pkey_cntr;
 	__u32 qkey_viol_cntr;
@@ -299,7 +299,8 @@ struct ib_uverbs_query_port_resp {
 	__u8  active_speed;
 	__u8  phys_state;
 	__u8  link_layer;
-	__u8  reserved[2];
+	__u8  flags;			/* see ib_uverbs_query_port_flags */
+	__u8  reserved;
 };
 
 struct ib_uverbs_alloc_pd {
diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h
index 8daec1f..addbb9c 100644
--- a/include/uapi/rdma/mlx5-abi.h
+++ b/include/uapi/rdma/mlx5-abi.h
@@ -76,6 +76,9 @@ enum mlx5_lib_caps {
 	MLX5_LIB_CAP_4K_UAR	= (__u64)1 << 0,
 };
 
+enum mlx5_ib_alloc_uctx_v2_flags {
+	MLX5_IB_ALLOC_UCTX_DEVX	= 1 << 0,
+};
 struct mlx5_ib_alloc_ucontext_req_v2 {
 	__u32	total_num_bfregs;
 	__u32	num_low_latency_bfregs;
@@ -90,6 +93,7 @@ struct mlx5_ib_alloc_ucontext_req_v2 {
 
 enum mlx5_ib_alloc_ucontext_resp_mask {
 	MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET = 1UL << 0,
+	MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_DUMP_FILL_MKEY    = 1UL << 1,
 };
 
 enum mlx5_user_cmds_supp_uhw {
@@ -138,7 +142,7 @@ struct mlx5_ib_alloc_ucontext_resp {
 	__u32	log_uar_size;
 	__u32	num_uars_per_page;
 	__u32	num_dyn_bfregs;
-	__u32	reserved3;
+	__u32	dump_fill_mkey;
 };
 
 struct mlx5_ib_alloc_pd_resp {
diff --git a/include/uapi/rdma/mlx5_user_ioctl_cmds.h b/include/uapi/rdma/mlx5_user_ioctl_cmds.h
index f7d685e..9c51801 100644
--- a/include/uapi/rdma/mlx5_user_ioctl_cmds.h
+++ b/include/uapi/rdma/mlx5_user_ioctl_cmds.h
@@ -33,6 +33,7 @@
 #ifndef MLX5_USER_IOCTL_CMDS_H
 #define MLX5_USER_IOCTL_CMDS_H
 
+#include <linux/types.h>
 #include <rdma/ib_user_ioctl_cmds.h>
 
 enum mlx5_ib_create_flow_action_attrs {
@@ -45,4 +46,124 @@ enum mlx5_ib_alloc_dm_attrs {
 	MLX5_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX,
 };
 
+enum mlx5_ib_devx_methods {
+	MLX5_IB_METHOD_DEVX_OTHER  = (1U << UVERBS_ID_NS_SHIFT),
+	MLX5_IB_METHOD_DEVX_QUERY_UAR,
+	MLX5_IB_METHOD_DEVX_QUERY_EQN,
+};
+
+enum  mlx5_ib_devx_other_attrs {
+	MLX5_IB_ATTR_DEVX_OTHER_CMD_IN = (1U << UVERBS_ID_NS_SHIFT),
+	MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT,
+};
+
+enum mlx5_ib_devx_obj_create_attrs {
+	MLX5_IB_ATTR_DEVX_OBJ_CREATE_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+	MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN,
+	MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT,
+};
+
+enum  mlx5_ib_devx_query_uar_attrs {
+	MLX5_IB_ATTR_DEVX_QUERY_UAR_USER_IDX = (1U << UVERBS_ID_NS_SHIFT),
+	MLX5_IB_ATTR_DEVX_QUERY_UAR_DEV_IDX,
+};
+
+enum mlx5_ib_devx_obj_destroy_attrs {
+	MLX5_IB_ATTR_DEVX_OBJ_DESTROY_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+};
+
+enum mlx5_ib_devx_obj_modify_attrs {
+	MLX5_IB_ATTR_DEVX_OBJ_MODIFY_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+	MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN,
+	MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT,
+};
+
+enum mlx5_ib_devx_obj_query_attrs {
+	MLX5_IB_ATTR_DEVX_OBJ_QUERY_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+	MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN,
+	MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT,
+};
+
+enum  mlx5_ib_devx_query_eqn_attrs {
+	MLX5_IB_ATTR_DEVX_QUERY_EQN_USER_VEC = (1U << UVERBS_ID_NS_SHIFT),
+	MLX5_IB_ATTR_DEVX_QUERY_EQN_DEV_EQN,
+};
+
+enum mlx5_ib_devx_obj_methods {
+	MLX5_IB_METHOD_DEVX_OBJ_CREATE = (1U << UVERBS_ID_NS_SHIFT),
+	MLX5_IB_METHOD_DEVX_OBJ_DESTROY,
+	MLX5_IB_METHOD_DEVX_OBJ_MODIFY,
+	MLX5_IB_METHOD_DEVX_OBJ_QUERY,
+};
+
+enum mlx5_ib_devx_umem_reg_attrs {
+	MLX5_IB_ATTR_DEVX_UMEM_REG_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+	MLX5_IB_ATTR_DEVX_UMEM_REG_ADDR,
+	MLX5_IB_ATTR_DEVX_UMEM_REG_LEN,
+	MLX5_IB_ATTR_DEVX_UMEM_REG_ACCESS,
+	MLX5_IB_ATTR_DEVX_UMEM_REG_OUT_ID,
+};
+
+enum mlx5_ib_devx_umem_dereg_attrs {
+	MLX5_IB_ATTR_DEVX_UMEM_DEREG_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+};
+
+enum mlx5_ib_devx_umem_methods {
+	MLX5_IB_METHOD_DEVX_UMEM_REG = (1U << UVERBS_ID_NS_SHIFT),
+	MLX5_IB_METHOD_DEVX_UMEM_DEREG,
+};
+
+enum mlx5_ib_objects {
+	MLX5_IB_OBJECT_DEVX = (1U << UVERBS_ID_NS_SHIFT),
+	MLX5_IB_OBJECT_DEVX_OBJ,
+	MLX5_IB_OBJECT_DEVX_UMEM,
+	MLX5_IB_OBJECT_FLOW_MATCHER,
+};
+
+enum mlx5_ib_flow_matcher_create_attrs {
+	MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+	MLX5_IB_ATTR_FLOW_MATCHER_MATCH_MASK,
+	MLX5_IB_ATTR_FLOW_MATCHER_FLOW_TYPE,
+	MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA,
+};
+
+enum mlx5_ib_flow_matcher_destroy_attrs {
+	MLX5_IB_ATTR_FLOW_MATCHER_DESTROY_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+};
+
+enum mlx5_ib_flow_matcher_methods {
+	MLX5_IB_METHOD_FLOW_MATCHER_CREATE = (1U << UVERBS_ID_NS_SHIFT),
+	MLX5_IB_METHOD_FLOW_MATCHER_DESTROY,
+};
+
+#define MLX5_IB_DW_MATCH_PARAM 0x80
+
+struct mlx5_ib_match_params {
+	__u32	match_params[MLX5_IB_DW_MATCH_PARAM];
+};
+
+enum mlx5_ib_flow_type {
+	MLX5_IB_FLOW_TYPE_NORMAL,
+	MLX5_IB_FLOW_TYPE_SNIFFER,
+	MLX5_IB_FLOW_TYPE_ALL_DEFAULT,
+	MLX5_IB_FLOW_TYPE_MC_DEFAULT,
+};
+
+enum mlx5_ib_create_flow_attrs {
+	MLX5_IB_ATTR_CREATE_FLOW_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+	MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE,
+	MLX5_IB_ATTR_CREATE_FLOW_DEST_QP,
+	MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX,
+	MLX5_IB_ATTR_CREATE_FLOW_MATCHER,
+};
+
+enum mlx5_ib_destoy_flow_attrs {
+	MLX5_IB_ATTR_DESTROY_FLOW_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+};
+
+enum mlx5_ib_flow_methods {
+	MLX5_IB_METHOD_CREATE_FLOW = (1U << UVERBS_ID_NS_SHIFT),
+	MLX5_IB_METHOD_DESTROY_FLOW,
+};
+
 #endif
diff --git a/include/uapi/rdma/qedr-abi.h b/include/uapi/rdma/qedr-abi.h
index 24c658b..7a10b3a 100644
--- a/include/uapi/rdma/qedr-abi.h
+++ b/include/uapi/rdma/qedr-abi.h
@@ -111,4 +111,21 @@ struct qedr_create_qp_uresp {
 	__u32 reserved;
 };
 
+struct qedr_create_srq_ureq {
+	/* user space virtual address of producer pair */
+	__aligned_u64 prod_pair_addr;
+
+	/* user space virtual address of SRQ buffer */
+	__aligned_u64 srq_addr;
+
+	/* length of SRQ buffer */
+	__aligned_u64 srq_len;
+};
+
+struct qedr_create_srq_uresp {
+	__u16 srq_id;
+	__u16 reserved0;
+	__u32 reserved1;
+};
+
 #endif /* __QEDR_USER_H__ */
diff --git a/include/uapi/rdma/rdma_user_ioctl_cmds.h b/include/uapi/rdma/rdma_user_ioctl_cmds.h
index 1da5a1e..24800c6 100644
--- a/include/uapi/rdma/rdma_user_ioctl_cmds.h
+++ b/include/uapi/rdma/rdma_user_ioctl_cmds.h
@@ -62,7 +62,12 @@ struct ib_uverbs_attr {
 		} enum_data;
 		__u16 reserved;
 	} attr_data;
-	__aligned_u64 data;	/* ptr to command, inline data or idr/fd */
+	union {
+		/* Used by PTR_IN/OUT, ENUM_IN and IDR */
+		__aligned_u64 data;
+		/* Used by FD_IN and FD_OUT */
+		__s64 data_s64;
+	};
 };
 
 struct ib_uverbs_ioctl_hdr {
diff --git a/lib/test_overflow.c b/lib/test_overflow.c
index 2278fe0..fc68056 100644
--- a/lib/test_overflow.c
+++ b/lib/test_overflow.c
@@ -252,7 +252,8 @@ static int __init test_ ## t ## _overflow(void) {			\
 	int err = 0;							\
 	unsigned i;							\
 									\
-	pr_info("%-3s: %zu tests\n", #t, ARRAY_SIZE(t ## _tests));	\
+	pr_info("%-3s: %zu arithmetic tests\n", #t,			\
+		ARRAY_SIZE(t ## _tests));				\
 	for (i = 0; i < ARRAY_SIZE(t ## _tests); ++i)			\
 		err |= do_test_ ## t(&t ## _tests[i]);			\
 	return err;							\
@@ -287,6 +288,200 @@ static int __init test_overflow_calculation(void)
 	return err;
 }
 
+static int __init test_overflow_shift(void)
+{
+	int err = 0;
+
+/* Args are: value, shift, type, expected result, overflow expected */
+#define TEST_ONE_SHIFT(a, s, t, expect, of) ({				\
+	int __failed = 0;						\
+	typeof(a) __a = (a);						\
+	typeof(s) __s = (s);						\
+	t __e = (expect);						\
+	t __d;								\
+	bool __of = check_shl_overflow(__a, __s, &__d);			\
+	if (__of != of) {						\
+		pr_warn("expected (%s)(%s << %s) to%s overflow\n",	\
+			#t, #a, #s, of ? "" : " not");			\
+		__failed = 1;						\
+	} else if (!__of && __d != __e) {				\
+		pr_warn("expected (%s)(%s << %s) == %s\n",		\
+			#t, #a, #s, #expect);				\
+		if ((t)-1 < 0)						\
+			pr_warn("got %lld\n", (s64)__d);		\
+		else							\
+			pr_warn("got %llu\n", (u64)__d);		\
+		__failed = 1;						\
+	}								\
+	if (!__failed)							\
+		pr_info("ok: (%s)(%s << %s) == %s\n", #t, #a, #s,	\
+			of ? "overflow" : #expect);			\
+	__failed;							\
+})
+
+	/* Sane shifts. */
+	err |= TEST_ONE_SHIFT(1, 0, u8, 1 << 0, false);
+	err |= TEST_ONE_SHIFT(1, 4, u8, 1 << 4, false);
+	err |= TEST_ONE_SHIFT(1, 7, u8, 1 << 7, false);
+	err |= TEST_ONE_SHIFT(0xF, 4, u8, 0xF << 4, false);
+	err |= TEST_ONE_SHIFT(1, 0, u16, 1 << 0, false);
+	err |= TEST_ONE_SHIFT(1, 10, u16, 1 << 10, false);
+	err |= TEST_ONE_SHIFT(1, 15, u16, 1 << 15, false);
+	err |= TEST_ONE_SHIFT(0xFF, 8, u16, 0xFF << 8, false);
+	err |= TEST_ONE_SHIFT(1, 0, int, 1 << 0, false);
+	err |= TEST_ONE_SHIFT(1, 16, int, 1 << 16, false);
+	err |= TEST_ONE_SHIFT(1, 30, int, 1 << 30, false);
+	err |= TEST_ONE_SHIFT(1, 0, s32, 1 << 0, false);
+	err |= TEST_ONE_SHIFT(1, 16, s32, 1 << 16, false);
+	err |= TEST_ONE_SHIFT(1, 30, s32, 1 << 30, false);
+	err |= TEST_ONE_SHIFT(1, 0, unsigned int, 1U << 0, false);
+	err |= TEST_ONE_SHIFT(1, 20, unsigned int, 1U << 20, false);
+	err |= TEST_ONE_SHIFT(1, 31, unsigned int, 1U << 31, false);
+	err |= TEST_ONE_SHIFT(0xFFFFU, 16, unsigned int, 0xFFFFU << 16, false);
+	err |= TEST_ONE_SHIFT(1, 0, u32, 1U << 0, false);
+	err |= TEST_ONE_SHIFT(1, 20, u32, 1U << 20, false);
+	err |= TEST_ONE_SHIFT(1, 31, u32, 1U << 31, false);
+	err |= TEST_ONE_SHIFT(0xFFFFU, 16, u32, 0xFFFFU << 16, false);
+	err |= TEST_ONE_SHIFT(1, 0, u64, 1ULL << 0, false);
+	err |= TEST_ONE_SHIFT(1, 40, u64, 1ULL << 40, false);
+	err |= TEST_ONE_SHIFT(1, 63, u64, 1ULL << 63, false);
+	err |= TEST_ONE_SHIFT(0xFFFFFFFFULL, 32, u64,
+			      0xFFFFFFFFULL << 32, false);
+
+	/* Sane shift: start and end with 0, without a too-wide shift. */
+	err |= TEST_ONE_SHIFT(0, 7, u8, 0, false);
+	err |= TEST_ONE_SHIFT(0, 15, u16, 0, false);
+	err |= TEST_ONE_SHIFT(0, 31, unsigned int, 0, false);
+	err |= TEST_ONE_SHIFT(0, 31, u32, 0, false);
+	err |= TEST_ONE_SHIFT(0, 63, u64, 0, false);
+
+	/* Sane shift: start and end with 0, without reaching signed bit. */
+	err |= TEST_ONE_SHIFT(0, 6, s8, 0, false);
+	err |= TEST_ONE_SHIFT(0, 14, s16, 0, false);
+	err |= TEST_ONE_SHIFT(0, 30, int, 0, false);
+	err |= TEST_ONE_SHIFT(0, 30, s32, 0, false);
+	err |= TEST_ONE_SHIFT(0, 62, s64, 0, false);
+
+	/* Overflow: shifted the bit off the end. */
+	err |= TEST_ONE_SHIFT(1, 8, u8, 0, true);
+	err |= TEST_ONE_SHIFT(1, 16, u16, 0, true);
+	err |= TEST_ONE_SHIFT(1, 32, unsigned int, 0, true);
+	err |= TEST_ONE_SHIFT(1, 32, u32, 0, true);
+	err |= TEST_ONE_SHIFT(1, 64, u64, 0, true);
+
+	/* Overflow: shifted into the signed bit. */
+	err |= TEST_ONE_SHIFT(1, 7, s8, 0, true);
+	err |= TEST_ONE_SHIFT(1, 15, s16, 0, true);
+	err |= TEST_ONE_SHIFT(1, 31, int, 0, true);
+	err |= TEST_ONE_SHIFT(1, 31, s32, 0, true);
+	err |= TEST_ONE_SHIFT(1, 63, s64, 0, true);
+
+	/* Overflow: high bit falls off unsigned types. */
+	/* 10010110 */
+	err |= TEST_ONE_SHIFT(150, 1, u8, 0, true);
+	/* 1000100010010110 */
+	err |= TEST_ONE_SHIFT(34966, 1, u16, 0, true);
+	/* 10000100000010001000100010010110 */
+	err |= TEST_ONE_SHIFT(2215151766U, 1, u32, 0, true);
+	err |= TEST_ONE_SHIFT(2215151766U, 1, unsigned int, 0, true);
+	/* 1000001000010000010000000100000010000100000010001000100010010110 */
+	err |= TEST_ONE_SHIFT(9372061470395238550ULL, 1, u64, 0, true);
+
+	/* Overflow: bit shifted into signed bit on signed types. */
+	/* 01001011 */
+	err |= TEST_ONE_SHIFT(75, 1, s8, 0, true);
+	/* 0100010001001011 */
+	err |= TEST_ONE_SHIFT(17483, 1, s16, 0, true);
+	/* 01000010000001000100010001001011 */
+	err |= TEST_ONE_SHIFT(1107575883, 1, s32, 0, true);
+	err |= TEST_ONE_SHIFT(1107575883, 1, int, 0, true);
+	/* 0100000100001000001000000010000001000010000001000100010001001011 */
+	err |= TEST_ONE_SHIFT(4686030735197619275LL, 1, s64, 0, true);
+
+	/* Overflow: bit shifted past signed bit on signed types. */
+	/* 01001011 */
+	err |= TEST_ONE_SHIFT(75, 2, s8, 0, true);
+	/* 0100010001001011 */
+	err |= TEST_ONE_SHIFT(17483, 2, s16, 0, true);
+	/* 01000010000001000100010001001011 */
+	err |= TEST_ONE_SHIFT(1107575883, 2, s32, 0, true);
+	err |= TEST_ONE_SHIFT(1107575883, 2, int, 0, true);
+	/* 0100000100001000001000000010000001000010000001000100010001001011 */
+	err |= TEST_ONE_SHIFT(4686030735197619275LL, 2, s64, 0, true);
+
+	/* Overflow: values larger than destination type. */
+	err |= TEST_ONE_SHIFT(0x100, 0, u8, 0, true);
+	err |= TEST_ONE_SHIFT(0xFF, 0, s8, 0, true);
+	err |= TEST_ONE_SHIFT(0x10000U, 0, u16, 0, true);
+	err |= TEST_ONE_SHIFT(0xFFFFU, 0, s16, 0, true);
+	err |= TEST_ONE_SHIFT(0x100000000ULL, 0, u32, 0, true);
+	err |= TEST_ONE_SHIFT(0x100000000ULL, 0, unsigned int, 0, true);
+	err |= TEST_ONE_SHIFT(0xFFFFFFFFUL, 0, s32, 0, true);
+	err |= TEST_ONE_SHIFT(0xFFFFFFFFUL, 0, int, 0, true);
+	err |= TEST_ONE_SHIFT(0xFFFFFFFFFFFFFFFFULL, 0, s64, 0, true);
+
+	/* Nonsense: negative initial value. */
+	err |= TEST_ONE_SHIFT(-1, 0, s8, 0, true);
+	err |= TEST_ONE_SHIFT(-1, 0, u8, 0, true);
+	err |= TEST_ONE_SHIFT(-5, 0, s16, 0, true);
+	err |= TEST_ONE_SHIFT(-5, 0, u16, 0, true);
+	err |= TEST_ONE_SHIFT(-10, 0, int, 0, true);
+	err |= TEST_ONE_SHIFT(-10, 0, unsigned int, 0, true);
+	err |= TEST_ONE_SHIFT(-100, 0, s32, 0, true);
+	err |= TEST_ONE_SHIFT(-100, 0, u32, 0, true);
+	err |= TEST_ONE_SHIFT(-10000, 0, s64, 0, true);
+	err |= TEST_ONE_SHIFT(-10000, 0, u64, 0, true);
+
+	/* Nonsense: negative shift values. */
+	err |= TEST_ONE_SHIFT(0, -5, s8, 0, true);
+	err |= TEST_ONE_SHIFT(0, -5, u8, 0, true);
+	err |= TEST_ONE_SHIFT(0, -10, s16, 0, true);
+	err |= TEST_ONE_SHIFT(0, -10, u16, 0, true);
+	err |= TEST_ONE_SHIFT(0, -15, int, 0, true);
+	err |= TEST_ONE_SHIFT(0, -15, unsigned int, 0, true);
+	err |= TEST_ONE_SHIFT(0, -20, s32, 0, true);
+	err |= TEST_ONE_SHIFT(0, -20, u32, 0, true);
+	err |= TEST_ONE_SHIFT(0, -30, s64, 0, true);
+	err |= TEST_ONE_SHIFT(0, -30, u64, 0, true);
+
+	/* Overflow: shifted at or beyond entire type's bit width. */
+	err |= TEST_ONE_SHIFT(0, 8, u8, 0, true);
+	err |= TEST_ONE_SHIFT(0, 9, u8, 0, true);
+	err |= TEST_ONE_SHIFT(0, 8, s8, 0, true);
+	err |= TEST_ONE_SHIFT(0, 9, s8, 0, true);
+	err |= TEST_ONE_SHIFT(0, 16, u16, 0, true);
+	err |= TEST_ONE_SHIFT(0, 17, u16, 0, true);
+	err |= TEST_ONE_SHIFT(0, 16, s16, 0, true);
+	err |= TEST_ONE_SHIFT(0, 17, s16, 0, true);
+	err |= TEST_ONE_SHIFT(0, 32, u32, 0, true);
+	err |= TEST_ONE_SHIFT(0, 33, u32, 0, true);
+	err |= TEST_ONE_SHIFT(0, 32, int, 0, true);
+	err |= TEST_ONE_SHIFT(0, 33, int, 0, true);
+	err |= TEST_ONE_SHIFT(0, 32, s32, 0, true);
+	err |= TEST_ONE_SHIFT(0, 33, s32, 0, true);
+	err |= TEST_ONE_SHIFT(0, 64, u64, 0, true);
+	err |= TEST_ONE_SHIFT(0, 65, u64, 0, true);
+	err |= TEST_ONE_SHIFT(0, 64, s64, 0, true);
+	err |= TEST_ONE_SHIFT(0, 65, s64, 0, true);
+
+	/*
+	 * Corner case: for unsigned types, we fail when we've shifted
+	 * through the entire width of bits. For signed types, we might
+	 * want to match this behavior, but that would mean noticing if
+	 * we shift through all but the signed bit, and this is not
+	 * currently detected (but we'll notice an overflow into the
+	 * signed bit). So, for now, we will test this condition but
+	 * mark it as not expected to overflow.
+	 */
+	err |= TEST_ONE_SHIFT(0, 7, s8, 0, false);
+	err |= TEST_ONE_SHIFT(0, 15, s16, 0, false);
+	err |= TEST_ONE_SHIFT(0, 31, int, 0, false);
+	err |= TEST_ONE_SHIFT(0, 31, s32, 0, false);
+	err |= TEST_ONE_SHIFT(0, 63, s64, 0, false);
+
+	return err;
+}
+
 /*
  * Deal with the various forms of allocator arguments. See comments above
  * the DEFINE_TEST_ALLOC() instances for mapping of the "bits".
@@ -397,6 +592,7 @@ static int __init test_module_init(void)
 	int err = 0;
 
 	err |= test_overflow_calculation();
+	err |= test_overflow_shift();
 	err |= test_overflow_allocation();
 
 	if (err) {
diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c
index 3d414ac..b06286f2 100644
--- a/net/9p/trans_rdma.c
+++ b/net/9p/trans_rdma.c
@@ -396,7 +396,7 @@ static int
 post_recv(struct p9_client *client, struct p9_rdma_context *c)
 {
 	struct p9_trans_rdma *rdma = client->trans;
-	struct ib_recv_wr wr, *bad_wr;
+	struct ib_recv_wr wr;
 	struct ib_sge sge;
 
 	c->busa = ib_dma_map_single(rdma->cm_id->device,
@@ -415,7 +415,7 @@ post_recv(struct p9_client *client, struct p9_rdma_context *c)
 	wr.wr_cqe = &c->cqe;
 	wr.sg_list = &sge;
 	wr.num_sge = 1;
-	return ib_post_recv(rdma->qp, &wr, &bad_wr);
+	return ib_post_recv(rdma->qp, &wr, NULL);
 
  error:
 	p9_debug(P9_DEBUG_ERROR, "EIO\n");
@@ -425,7 +425,7 @@ post_recv(struct p9_client *client, struct p9_rdma_context *c)
 static int rdma_request(struct p9_client *client, struct p9_req_t *req)
 {
 	struct p9_trans_rdma *rdma = client->trans;
-	struct ib_send_wr wr, *bad_wr;
+	struct ib_send_wr wr;
 	struct ib_sge sge;
 	int err = 0;
 	unsigned long flags;
@@ -520,7 +520,7 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
 	 * status in case of a very fast reply.
 	 */
 	req->status = REQ_STATUS_SENT;
-	err = ib_post_send(rdma->qp, &wr, &bad_wr);
+	err = ib_post_send(rdma->qp, &wr, NULL);
 	if (err)
 		goto send_error;
 
diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c
index 7232274..af6ad46 100644
--- a/net/core/secure_seq.c
+++ b/net/core/secure_seq.c
@@ -140,6 +140,7 @@ u32 secure_tcp_seq(__be32 saddr, __be32 daddr,
 			    &net_secret);
 	return seq_scale(hash);
 }
+EXPORT_SYMBOL_GPL(secure_tcp_seq);
 
 u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport)
 {
diff --git a/net/rds/ib.c b/net/rds/ib.c
index 89c6333..c1d9764 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -144,7 +144,7 @@ static void rds_ib_add_one(struct ib_device *device)
 	INIT_WORK(&rds_ibdev->free_work, rds_ib_dev_free);
 
 	rds_ibdev->max_wrs = device->attrs.max_qp_wr;
-	rds_ibdev->max_sge = min(device->attrs.max_sge, RDS_IB_MAX_SGE);
+	rds_ibdev->max_sge = min(device->attrs.max_send_sge, RDS_IB_MAX_SGE);
 
 	has_fr = (device->attrs.device_cap_flags &
 		  IB_DEVICE_MEM_MGT_EXTENSIONS);
diff --git a/net/rds/ib_frmr.c b/net/rds/ib_frmr.c
index 8596eed..6431a02 100644
--- a/net/rds/ib_frmr.c
+++ b/net/rds/ib_frmr.c
@@ -103,7 +103,6 @@ static void rds_ib_free_frmr(struct rds_ib_mr *ibmr, bool drop)
 static int rds_ib_post_reg_frmr(struct rds_ib_mr *ibmr)
 {
 	struct rds_ib_frmr *frmr = &ibmr->u.frmr;
-	struct ib_send_wr *failed_wr;
 	struct ib_reg_wr reg_wr;
 	int ret, off = 0;
 
@@ -136,9 +135,7 @@ static int rds_ib_post_reg_frmr(struct rds_ib_mr *ibmr)
 			IB_ACCESS_REMOTE_WRITE;
 	reg_wr.wr.send_flags = IB_SEND_SIGNALED;
 
-	failed_wr = &reg_wr.wr;
-	ret = ib_post_send(ibmr->ic->i_cm_id->qp, &reg_wr.wr, &failed_wr);
-	WARN_ON(failed_wr != &reg_wr.wr);
+	ret = ib_post_send(ibmr->ic->i_cm_id->qp, &reg_wr.wr, NULL);
 	if (unlikely(ret)) {
 		/* Failure here can be because of -ENOMEM as well */
 		frmr->fr_state = FRMR_IS_STALE;
@@ -231,7 +228,7 @@ static int rds_ib_map_frmr(struct rds_ib_device *rds_ibdev,
 
 static int rds_ib_post_inv(struct rds_ib_mr *ibmr)
 {
-	struct ib_send_wr *s_wr, *failed_wr;
+	struct ib_send_wr *s_wr;
 	struct rds_ib_frmr *frmr = &ibmr->u.frmr;
 	struct rdma_cm_id *i_cm_id = ibmr->ic->i_cm_id;
 	int ret = -EINVAL;
@@ -256,9 +253,7 @@ static int rds_ib_post_inv(struct rds_ib_mr *ibmr)
 	s_wr->ex.invalidate_rkey = frmr->mr->rkey;
 	s_wr->send_flags = IB_SEND_SIGNALED;
 
-	failed_wr = s_wr;
-	ret = ib_post_send(i_cm_id->qp, s_wr, &failed_wr);
-	WARN_ON(failed_wr != s_wr);
+	ret = ib_post_send(i_cm_id->qp, s_wr, NULL);
 	if (unlikely(ret)) {
 		frmr->fr_state = FRMR_IS_STALE;
 		frmr->fr_inv = false;
diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c
index d300186..2f16146 100644
--- a/net/rds/ib_recv.c
+++ b/net/rds/ib_recv.c
@@ -381,7 +381,6 @@ void rds_ib_recv_refill(struct rds_connection *conn, int prefill, gfp_t gfp)
 {
 	struct rds_ib_connection *ic = conn->c_transport_data;
 	struct rds_ib_recv_work *recv;
-	struct ib_recv_wr *failed_wr;
 	unsigned int posted = 0;
 	int ret = 0;
 	bool can_wait = !!(gfp & __GFP_DIRECT_RECLAIM);
@@ -415,7 +414,7 @@ void rds_ib_recv_refill(struct rds_connection *conn, int prefill, gfp_t gfp)
 				&recv->r_frag->f_sg));
 
 		/* XXX when can this fail? */
-		ret = ib_post_recv(ic->i_cm_id->qp, &recv->r_wr, &failed_wr);
+		ret = ib_post_recv(ic->i_cm_id->qp, &recv->r_wr, NULL);
 		if (ret) {
 			rds_ib_conn_error(conn, "recv post on "
 			       "%pI6c returned %d, disconnecting and "
@@ -648,7 +647,6 @@ static u64 rds_ib_get_ack(struct rds_ib_connection *ic)
 static void rds_ib_send_ack(struct rds_ib_connection *ic, unsigned int adv_credits)
 {
 	struct rds_header *hdr = ic->i_ack;
-	struct ib_send_wr *failed_wr;
 	u64 seq;
 	int ret;
 
@@ -661,7 +659,7 @@ static void rds_ib_send_ack(struct rds_ib_connection *ic, unsigned int adv_credi
 	rds_message_make_checksum(hdr);
 	ic->i_ack_queued = jiffies;
 
-	ret = ib_post_send(ic->i_cm_id->qp, &ic->i_ack_wr, &failed_wr);
+	ret = ib_post_send(ic->i_cm_id->qp, &ic->i_ack_wr, NULL);
 	if (unlikely(ret)) {
 		/* Failed to send. Release the WR, and
 		 * force another ACK.
diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c
index c8dd312..2dcb555 100644
--- a/net/rds/ib_send.c
+++ b/net/rds/ib_send.c
@@ -492,7 +492,7 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
 	struct rds_ib_send_work *send = NULL;
 	struct rds_ib_send_work *first;
 	struct rds_ib_send_work *prev;
-	struct ib_send_wr *failed_wr;
+	const struct ib_send_wr *failed_wr;
 	struct scatterlist *scat;
 	u32 pos;
 	u32 i;
@@ -758,7 +758,7 @@ int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op)
 {
 	struct rds_ib_connection *ic = conn->c_transport_data;
 	struct rds_ib_send_work *send = NULL;
-	struct ib_send_wr *failed_wr;
+	const struct ib_send_wr *failed_wr;
 	u32 pos;
 	u32 work_alloc;
 	int ret;
@@ -846,7 +846,7 @@ int rds_ib_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
 	struct rds_ib_send_work *send = NULL;
 	struct rds_ib_send_work *first;
 	struct rds_ib_send_work *prev;
-	struct ib_send_wr *failed_wr;
+	const struct ib_send_wr *failed_wr;
 	struct scatterlist *scat;
 	unsigned long len;
 	u64 remote_addr = op->op_remote_addr;
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index a46418f..e871368 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -16,6 +16,7 @@
 #include <net/tcp.h>
 #include <net/sock.h>
 #include <rdma/ib_verbs.h>
+#include <rdma/ib_cache.h>
 
 #include "smc.h"
 #include "smc_clc.h"
diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
index 2cc64bc..9bb5274 100644
--- a/net/smc/smc_ib.c
+++ b/net/smc/smc_ib.c
@@ -16,6 +16,7 @@
 #include <linux/workqueue.h>
 #include <linux/scatterlist.h>
 #include <rdma/ib_verbs.h>
+#include <rdma/ib_cache.h>
 
 #include "smc_pnet.h"
 #include "smc_ib.h"
diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c
index 2f5e324..d8366ed5 100644
--- a/net/smc/smc_tx.c
+++ b/net/smc/smc_tx.c
@@ -270,7 +270,6 @@ static int smc_tx_rdma_write(struct smc_connection *conn, int peer_rmbe_offset,
 			     int num_sges, struct ib_sge sges[])
 {
 	struct smc_link_group *lgr = conn->lgr;
-	struct ib_send_wr *failed_wr = NULL;
 	struct ib_rdma_wr rdma_wr;
 	struct smc_link *link;
 	int rc;
@@ -288,7 +287,7 @@ static int smc_tx_rdma_write(struct smc_connection *conn, int peer_rmbe_offset,
 		/* offset within RMBE */
 		peer_rmbe_offset;
 	rdma_wr.rkey = lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].rkey;
-	rc = ib_post_send(link->roce_qp, &rdma_wr.wr, &failed_wr);
+	rc = ib_post_send(link->roce_qp, &rdma_wr.wr, NULL);
 	if (rc) {
 		conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
 		smc_lgr_terminate(lgr);
diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c
index f856b840..3c458d2 100644
--- a/net/smc/smc_wr.c
+++ b/net/smc/smc_wr.c
@@ -232,15 +232,13 @@ int smc_wr_tx_put_slot(struct smc_link *link,
  */
 int smc_wr_tx_send(struct smc_link *link, struct smc_wr_tx_pend_priv *priv)
 {
-	struct ib_send_wr *failed_wr = NULL;
 	struct smc_wr_tx_pend *pend;
 	int rc;
 
 	ib_req_notify_cq(link->smcibdev->roce_cq_send,
 			 IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
 	pend = container_of(priv, struct smc_wr_tx_pend, priv);
-	rc = ib_post_send(link->roce_qp, &link->wr_tx_ibs[pend->idx],
-			  &failed_wr);
+	rc = ib_post_send(link->roce_qp, &link->wr_tx_ibs[pend->idx], NULL);
 	if (rc) {
 		smc_wr_tx_put_slot(link, priv);
 		smc_lgr_terminate(smc_get_lgr(link));
@@ -251,7 +249,6 @@ int smc_wr_tx_send(struct smc_link *link, struct smc_wr_tx_pend_priv *priv)
 /* Register a memory region and wait for result. */
 int smc_wr_reg_send(struct smc_link *link, struct ib_mr *mr)
 {
-	struct ib_send_wr *failed_wr = NULL;
 	int rc;
 
 	ib_req_notify_cq(link->smcibdev->roce_cq_send,
@@ -260,9 +257,7 @@ int smc_wr_reg_send(struct smc_link *link, struct ib_mr *mr)
 	link->wr_reg.wr.wr_id = (u64)(uintptr_t)mr;
 	link->wr_reg.mr = mr;
 	link->wr_reg.key = mr->rkey;
-	failed_wr = &link->wr_reg.wr;
-	rc = ib_post_send(link->roce_qp, &link->wr_reg.wr, &failed_wr);
-	WARN_ON(failed_wr != &link->wr_reg.wr);
+	rc = ib_post_send(link->roce_qp, &link->wr_reg.wr, NULL);
 	if (rc)
 		return rc;
 
diff --git a/net/smc/smc_wr.h b/net/smc/smc_wr.h
index 210bec3..1d85bb1 100644
--- a/net/smc/smc_wr.h
+++ b/net/smc/smc_wr.h
@@ -63,7 +63,6 @@ static inline void smc_wr_tx_set_wr_id(atomic_long_t *wr_tx_id, long val)
 /* post a new receive work request to fill a completed old work request entry */
 static inline int smc_wr_rx_post(struct smc_link *link)
 {
-	struct ib_recv_wr *bad_recv_wr = NULL;
 	int rc;
 	u64 wr_id, temp_wr_id;
 	u32 index;
@@ -72,7 +71,7 @@ static inline int smc_wr_rx_post(struct smc_link *link)
 	temp_wr_id = wr_id;
 	index = do_div(temp_wr_id, link->wr_rx_cnt);
 	link->wr_rx_ibs[index].wr_id = wr_id;
-	rc = ib_post_recv(link->roce_qp, &link->wr_rx_ibs[index], &bad_recv_wr);
+	rc = ib_post_recv(link->roce_qp, &link->wr_rx_ibs[index], NULL);
 	return rc;
 }
 
diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
index 17fb1e0..0f7c465 100644
--- a/net/sunrpc/xprtrdma/fmr_ops.c
+++ b/net/sunrpc/xprtrdma/fmr_ops.c
@@ -279,9 +279,7 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
 static int
 fmr_op_send(struct rpcrdma_ia *ia, struct rpcrdma_req *req)
 {
-	struct ib_send_wr *bad_wr;
-
-	return ib_post_send(ia->ri_id->qp, &req->rl_sendctx->sc_wr, &bad_wr);
+	return ib_post_send(ia->ri_id->qp, &req->rl_sendctx->sc_wr, NULL);
 }
 
 /* Invalidate all memory regions that were registered for "req".
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index c040de1..1bb00dd 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -464,7 +464,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
 static int
 frwr_op_send(struct rpcrdma_ia *ia, struct rpcrdma_req *req)
 {
-	struct ib_send_wr *post_wr, *bad_wr;
+	struct ib_send_wr *post_wr;
 	struct rpcrdma_mr *mr;
 
 	post_wr = &req->rl_sendctx->sc_wr;
@@ -486,7 +486,7 @@ frwr_op_send(struct rpcrdma_ia *ia, struct rpcrdma_req *req)
 	/* If ib_post_send fails, the next ->send_request for
 	 * @req will queue these MWs for recovery.
 	 */
-	return ib_post_send(ia->ri_id->qp, post_wr, &bad_wr);
+	return ib_post_send(ia->ri_id->qp, post_wr, NULL);
 }
 
 /* Handle a remotely invalidated mr on the @mrs list
@@ -517,7 +517,8 @@ frwr_op_reminv(struct rpcrdma_rep *rep, struct list_head *mrs)
 static void
 frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mrs)
 {
-	struct ib_send_wr *first, **prev, *last, *bad_wr;
+	struct ib_send_wr *first, **prev, *last;
+	const struct ib_send_wr *bad_wr;
 	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
 	struct rpcrdma_frwr *frwr;
 	struct rpcrdma_mr *mr;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index 841fca1..2ef75e8 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -229,11 +229,10 @@ void svc_rdma_recv_ctxt_put(struct svcxprt_rdma *rdma,
 static int __svc_rdma_post_recv(struct svcxprt_rdma *rdma,
 				struct svc_rdma_recv_ctxt *ctxt)
 {
-	struct ib_recv_wr *bad_recv_wr;
 	int ret;
 
 	svc_xprt_get(&rdma->sc_xprt);
-	ret = ib_post_recv(rdma->sc_qp, &ctxt->rc_recv_wr, &bad_recv_wr);
+	ret = ib_post_recv(rdma->sc_qp, &ctxt->rc_recv_wr, NULL);
 	trace_svcrdma_post_recv(&ctxt->rc_recv_wr, ret);
 	if (ret)
 		goto err_post;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c
index ce3ea84..04cb336 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_rw.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c
@@ -307,7 +307,8 @@ static int svc_rdma_post_chunk_ctxt(struct svc_rdma_chunk_ctxt *cc)
 {
 	struct svcxprt_rdma *rdma = cc->cc_rdma;
 	struct svc_xprt *xprt = &rdma->sc_xprt;
-	struct ib_send_wr *first_wr, *bad_wr;
+	struct ib_send_wr *first_wr;
+	const struct ib_send_wr *bad_wr;
 	struct list_head *tmp;
 	struct ib_cqe *cqe;
 	int ret;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 4a3efae..ffef0c5 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -291,7 +291,6 @@ static void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
  */
 int svc_rdma_send(struct svcxprt_rdma *rdma, struct ib_send_wr *wr)
 {
-	struct ib_send_wr *bad_wr;
 	int ret;
 
 	might_sleep();
@@ -311,7 +310,7 @@ int svc_rdma_send(struct svcxprt_rdma *rdma, struct ib_send_wr *wr)
 		}
 
 		svc_xprt_get(&rdma->sc_xprt);
-		ret = ib_post_send(rdma->sc_qp, wr, &bad_wr);
+		ret = ib_post_send(rdma->sc_qp, wr, NULL);
 		trace_svcrdma_post_send(wr, ret);
 		if (ret) {
 			set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index e9535a6..547b2cd 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -476,7 +476,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
 
 	/* Qualify the transport resource defaults with the
 	 * capabilities of this particular device */
-	newxprt->sc_max_send_sges = dev->attrs.max_sge;
+	newxprt->sc_max_send_sges = dev->attrs.max_send_sge;
 	/* transport hdr, head iovec, one page list entry, tail iovec */
 	if (newxprt->sc_max_send_sges < 4) {
 		pr_err("svcrdma: too few Send SGEs available (%d)\n",
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 16161a3..5efeba0 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -508,7 +508,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
 	unsigned int max_sge;
 	int rc;
 
-	max_sge = min_t(unsigned int, ia->ri_device->attrs.max_sge,
+	max_sge = min_t(unsigned int, ia->ri_device->attrs.max_send_sge,
 			RPCRDMA_MAX_SEND_SGES);
 	if (max_sge < RPCRDMA_MIN_SEND_SGES) {
 		pr_warn("rpcrdma: HCA provides only %d send SGEs\n", max_sge);
@@ -1559,7 +1559,8 @@ rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp)
 	if (!count)
 		return;
 
-	rc = ib_post_recv(r_xprt->rx_ia.ri_id->qp, wr, &bad_wr);
+	rc = ib_post_recv(r_xprt->rx_ia.ri_id->qp, wr,
+			  (const struct ib_recv_wr **)&bad_wr);
 	if (rc) {
 		for (wr = bad_wr; wr; wr = wr->next) {
 			struct rpcrdma_rep *rep;