Merge branches 'amso1100', 'cma', 'cxgb3', 'ehca', 'ipath', 'ipoib', 'iser', 'misc', 'mlx4' and 'nes' into for-next
diff --git a/Documentation/infiniband/ipoib.txt b/Documentation/infiniband/ipoib.txt
index 6d40f00..64eeb55 100644
--- a/Documentation/infiniband/ipoib.txt
+++ b/Documentation/infiniband/ipoib.txt
@@ -36,11 +36,11 @@
   fabric with a 2K MTU, the IPoIB MTU will be 2048 - 4 = 2044 bytes.
 
   In connected mode, the IB RC (Reliable Connected) transport is used.
-  Connected mode is to takes advantage of the connected nature of the
-  IB transport and allows an MTU up to the maximal IP packet size of
-  64K, which reduces the number of IP packets needed for handling
-  large UDP datagrams, TCP segments, etc and increases the performance
-  for large messages.
+  Connected mode takes advantage of the connected nature of the IB
+  transport and allows an MTU up to the maximal IP packet size of 64K,
+  which reduces the number of IP packets needed for handling large UDP
+  datagrams, TCP segments, etc and increases the performance for large
+  messages.
 
   In connected mode, the interface's UD QP is still used for multicast
   and communication with peers that don't support connected mode. In
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index bd07803..abbb069 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -36,7 +36,6 @@
 #include <linux/mutex.h>
 #include <linux/inetdevice.h>
 #include <linux/workqueue.h>
-#include <linux/if_arp.h>
 #include <net/arp.h>
 #include <net/neighbour.h>
 #include <net/route.h>
@@ -92,22 +91,12 @@
 int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
 		     const unsigned char *dst_dev_addr)
 {
-	switch (dev->type) {
-	case ARPHRD_INFINIBAND:
-		dev_addr->dev_type = RDMA_NODE_IB_CA;
-		break;
-	case ARPHRD_ETHER:
-		dev_addr->dev_type = RDMA_NODE_RNIC;
-		break;
-	default:
-		return -EADDRNOTAVAIL;
-	}
-
+	dev_addr->dev_type = dev->type;
 	memcpy(dev_addr->src_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
 	memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN);
 	if (dst_dev_addr)
 		memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN);
-	dev_addr->src_dev = dev;
+	dev_addr->bound_dev_if = dev->ifindex;
 	return 0;
 }
 EXPORT_SYMBOL(rdma_copy_addr);
@@ -117,6 +106,15 @@
 	struct net_device *dev;
 	int ret = -EADDRNOTAVAIL;
 
+	if (dev_addr->bound_dev_if) {
+		dev = dev_get_by_index(&init_net, dev_addr->bound_dev_if);
+		if (!dev)
+			return -ENODEV;
+		ret = rdma_copy_addr(dev_addr, dev, NULL);
+		dev_put(dev);
+		return ret;
+	}
+
 	switch (addr->sa_family) {
 	case AF_INET:
 		dev = ip_dev_find(&init_net,
@@ -131,6 +129,7 @@
 
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 	case AF_INET6:
+		read_lock(&dev_base_lock);
 		for_each_netdev(&init_net, dev) {
 			if (ipv6_chk_addr(&init_net,
 					  &((struct sockaddr_in6 *) addr)->sin6_addr,
@@ -139,6 +138,7 @@
 				break;
 			}
 		}
+		read_unlock(&dev_base_lock);
 		break;
 #endif
 	}
@@ -176,48 +176,9 @@
 	mutex_unlock(&lock);
 }
 
-static void addr_send_arp(struct sockaddr *dst_in)
-{
-	struct rtable *rt;
-	struct flowi fl;
-
-	memset(&fl, 0, sizeof fl);
-
-	switch (dst_in->sa_family) {
-	case AF_INET:
-		fl.nl_u.ip4_u.daddr =
-			((struct sockaddr_in *) dst_in)->sin_addr.s_addr;
-
-		if (ip_route_output_key(&init_net, &rt, &fl))
-			return;
-
-		neigh_event_send(rt->u.dst.neighbour, NULL);
-		ip_rt_put(rt);
-		break;
-
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-	case AF_INET6:
-	{
-		struct dst_entry *dst;
-
-		fl.nl_u.ip6_u.daddr =
-			((struct sockaddr_in6 *) dst_in)->sin6_addr;
-
-		dst = ip6_route_output(&init_net, NULL, &fl);
-		if (!dst)
-			return;
-
-		neigh_event_send(dst->neighbour, NULL);
-		dst_release(dst);
-		break;
-	}
-#endif
-	}
-}
-
-static int addr4_resolve_remote(struct sockaddr_in *src_in,
-			       struct sockaddr_in *dst_in,
-			       struct rdma_dev_addr *addr)
+static int addr4_resolve(struct sockaddr_in *src_in,
+			 struct sockaddr_in *dst_in,
+			 struct rdma_dev_addr *addr)
 {
 	__be32 src_ip = src_in->sin_addr.s_addr;
 	__be32 dst_ip = dst_in->sin_addr.s_addr;
@@ -229,10 +190,22 @@
 	memset(&fl, 0, sizeof fl);
 	fl.nl_u.ip4_u.daddr = dst_ip;
 	fl.nl_u.ip4_u.saddr = src_ip;
+	fl.oif = addr->bound_dev_if;
+
 	ret = ip_route_output_key(&init_net, &rt, &fl);
 	if (ret)
 		goto out;
 
+	src_in->sin_family = AF_INET;
+	src_in->sin_addr.s_addr = rt->rt_src;
+
+	if (rt->idev->dev->flags & IFF_LOOPBACK) {
+		ret = rdma_translate_ip((struct sockaddr *) dst_in, addr);
+		if (!ret)
+			memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);
+		goto put;
+	}
+
 	/* If the device does ARP internally, return 'done' */
 	if (rt->idev->dev->flags & IFF_NOARP) {
 		rdma_copy_addr(addr, rt->idev->dev, NULL);
@@ -240,21 +213,14 @@
 	}
 
 	neigh = neigh_lookup(&arp_tbl, &rt->rt_gateway, rt->idev->dev);
-	if (!neigh) {
+	if (!neigh || !(neigh->nud_state & NUD_VALID)) {
+		neigh_event_send(rt->u.dst.neighbour, NULL);
 		ret = -ENODATA;
+		if (neigh)
+			goto release;
 		goto put;
 	}
 
-	if (!(neigh->nud_state & NUD_VALID)) {
-		ret = -ENODATA;
-		goto release;
-	}
-
-	if (!src_ip) {
-		src_in->sin_family = dst_in->sin_family;
-		src_in->sin_addr.s_addr = rt->rt_src;
-	}
-
 	ret = rdma_copy_addr(addr, neigh->dev, neigh->ha);
 release:
 	neigh_release(neigh);
@@ -265,52 +231,77 @@
 }
 
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-static int addr6_resolve_remote(struct sockaddr_in6 *src_in,
-			       struct sockaddr_in6 *dst_in,
-			       struct rdma_dev_addr *addr)
+static int addr6_resolve(struct sockaddr_in6 *src_in,
+			 struct sockaddr_in6 *dst_in,
+			 struct rdma_dev_addr *addr)
 {
 	struct flowi fl;
 	struct neighbour *neigh;
 	struct dst_entry *dst;
-	int ret = -ENODATA;
+	int ret;
 
 	memset(&fl, 0, sizeof fl);
-	fl.nl_u.ip6_u.daddr = dst_in->sin6_addr;
-	fl.nl_u.ip6_u.saddr = src_in->sin6_addr;
+	ipv6_addr_copy(&fl.fl6_dst, &dst_in->sin6_addr);
+	ipv6_addr_copy(&fl.fl6_src, &src_in->sin6_addr);
+	fl.oif = addr->bound_dev_if;
 
 	dst = ip6_route_output(&init_net, NULL, &fl);
-	if (!dst)
-		return ret;
+	if ((ret = dst->error))
+		goto put;
 
-	if (dst->dev->flags & IFF_NOARP) {
-		ret = rdma_copy_addr(addr, dst->dev, NULL);
-	} else {
-		neigh = dst->neighbour;
-		if (neigh && (neigh->nud_state & NUD_VALID))
-			ret = rdma_copy_addr(addr, neigh->dev, neigh->ha);
+	if (ipv6_addr_any(&fl.fl6_src)) {
+		ret = ipv6_dev_get_saddr(&init_net, ip6_dst_idev(dst)->dev,
+					 &fl.fl6_dst, 0, &fl.fl6_src);
+		if (ret)
+			goto put;
+
+		src_in->sin6_family = AF_INET6;
+		ipv6_addr_copy(&src_in->sin6_addr, &fl.fl6_src);
 	}
 
+	if (dst->dev->flags & IFF_LOOPBACK) {
+		ret = rdma_translate_ip((struct sockaddr *) dst_in, addr);
+		if (!ret)
+			memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);
+		goto put;
+	}
+
+	/* If the device does ARP internally, return 'done' */
+	if (dst->dev->flags & IFF_NOARP) {
+		ret = rdma_copy_addr(addr, dst->dev, NULL);
+		goto put;
+	}
+
+	neigh = dst->neighbour;
+	if (!neigh || !(neigh->nud_state & NUD_VALID)) {
+		neigh_event_send(dst->neighbour, NULL);
+		ret = -ENODATA;
+		goto put;
+	}
+
+	ret = rdma_copy_addr(addr, dst->dev, neigh->ha);
+put:
 	dst_release(dst);
 	return ret;
 }
 #else
-static int addr6_resolve_remote(struct sockaddr_in6 *src_in,
-			       struct sockaddr_in6 *dst_in,
-			       struct rdma_dev_addr *addr)
+static int addr6_resolve(struct sockaddr_in6 *src_in,
+			 struct sockaddr_in6 *dst_in,
+			 struct rdma_dev_addr *addr)
 {
 	return -EADDRNOTAVAIL;
 }
 #endif
 
-static int addr_resolve_remote(struct sockaddr *src_in,
-				struct sockaddr *dst_in,
-				struct rdma_dev_addr *addr)
+static int addr_resolve(struct sockaddr *src_in,
+			struct sockaddr *dst_in,
+			struct rdma_dev_addr *addr)
 {
 	if (src_in->sa_family == AF_INET) {
-		return addr4_resolve_remote((struct sockaddr_in *) src_in,
+		return addr4_resolve((struct sockaddr_in *) src_in,
 			(struct sockaddr_in *) dst_in, addr);
 	} else
-		return addr6_resolve_remote((struct sockaddr_in6 *) src_in,
+		return addr6_resolve((struct sockaddr_in6 *) src_in,
 			(struct sockaddr_in6 *) dst_in, addr);
 }
 
@@ -327,8 +318,7 @@
 		if (req->status == -ENODATA) {
 			src_in = (struct sockaddr *) &req->src_addr;
 			dst_in = (struct sockaddr *) &req->dst_addr;
-			req->status = addr_resolve_remote(src_in, dst_in,
-							  req->addr);
+			req->status = addr_resolve(src_in, dst_in, req->addr);
 			if (req->status && time_after_eq(jiffies, req->timeout))
 				req->status = -ETIMEDOUT;
 			else if (req->status == -ENODATA)
@@ -352,82 +342,6 @@
 	}
 }
 
-static int addr_resolve_local(struct sockaddr *src_in,
-			      struct sockaddr *dst_in,
-			      struct rdma_dev_addr *addr)
-{
-	struct net_device *dev;
-	int ret;
-
-	switch (dst_in->sa_family) {
-	case AF_INET:
-	{
-		__be32 src_ip = ((struct sockaddr_in *) src_in)->sin_addr.s_addr;
-		__be32 dst_ip = ((struct sockaddr_in *) dst_in)->sin_addr.s_addr;
-
-		dev = ip_dev_find(&init_net, dst_ip);
-		if (!dev)
-			return -EADDRNOTAVAIL;
-
-		if (ipv4_is_zeronet(src_ip)) {
-			src_in->sa_family = dst_in->sa_family;
-			((struct sockaddr_in *) src_in)->sin_addr.s_addr = dst_ip;
-			ret = rdma_copy_addr(addr, dev, dev->dev_addr);
-		} else if (ipv4_is_loopback(src_ip)) {
-			ret = rdma_translate_ip(dst_in, addr);
-			if (!ret)
-				memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
-		} else {
-			ret = rdma_translate_ip(src_in, addr);
-			if (!ret)
-				memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
-		}
-		dev_put(dev);
-		break;
-	}
-
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-	case AF_INET6:
-	{
-		struct in6_addr *a;
-
-		for_each_netdev(&init_net, dev)
-			if (ipv6_chk_addr(&init_net,
-					  &((struct sockaddr_in6 *) dst_in)->sin6_addr,
-					  dev, 1))
-				break;
-
-		if (!dev)
-			return -EADDRNOTAVAIL;
-
-		a = &((struct sockaddr_in6 *) src_in)->sin6_addr;
-
-		if (ipv6_addr_any(a)) {
-			src_in->sa_family = dst_in->sa_family;
-			((struct sockaddr_in6 *) src_in)->sin6_addr =
-				((struct sockaddr_in6 *) dst_in)->sin6_addr;
-			ret = rdma_copy_addr(addr, dev, dev->dev_addr);
-		} else if (ipv6_addr_loopback(a)) {
-			ret = rdma_translate_ip(dst_in, addr);
-			if (!ret)
-				memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
-		} else  {
-			ret = rdma_translate_ip(src_in, addr);
-			if (!ret)
-				memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
-		}
-		break;
-	}
-#endif
-
-	default:
-		ret = -EADDRNOTAVAIL;
-		break;
-	}
-
-	return ret;
-}
-
 int rdma_resolve_ip(struct rdma_addr_client *client,
 		    struct sockaddr *src_addr, struct sockaddr *dst_addr,
 		    struct rdma_dev_addr *addr, int timeout_ms,
@@ -443,22 +357,28 @@
 	if (!req)
 		return -ENOMEM;
 
-	if (src_addr)
-		memcpy(&req->src_addr, src_addr, ip_addr_size(src_addr));
-	memcpy(&req->dst_addr, dst_addr, ip_addr_size(dst_addr));
+	src_in = (struct sockaddr *) &req->src_addr;
+	dst_in = (struct sockaddr *) &req->dst_addr;
+
+	if (src_addr) {
+		if (src_addr->sa_family != dst_addr->sa_family) {
+			ret = -EINVAL;
+			goto err;
+		}
+
+		memcpy(src_in, src_addr, ip_addr_size(src_addr));
+	} else {
+		src_in->sa_family = dst_addr->sa_family;
+	}
+
+	memcpy(dst_in, dst_addr, ip_addr_size(dst_addr));
 	req->addr = addr;
 	req->callback = callback;
 	req->context = context;
 	req->client = client;
 	atomic_inc(&client->refcount);
 
-	src_in = (struct sockaddr *) &req->src_addr;
-	dst_in = (struct sockaddr *) &req->dst_addr;
-
-	req->status = addr_resolve_local(src_in, dst_in, addr);
-	if (req->status == -EADDRNOTAVAIL)
-		req->status = addr_resolve_remote(src_in, dst_in, addr);
-
+	req->status = addr_resolve(src_in, dst_in, addr);
 	switch (req->status) {
 	case 0:
 		req->timeout = jiffies;
@@ -467,15 +387,16 @@
 	case -ENODATA:
 		req->timeout = msecs_to_jiffies(timeout_ms) + jiffies;
 		queue_req(req);
-		addr_send_arp(dst_in);
 		break;
 	default:
 		ret = req->status;
 		atomic_dec(&client->refcount);
-		kfree(req);
-		break;
+		goto err;
 	}
 	return ret;
+err:
+	kfree(req);
+	return ret;
 }
 EXPORT_SYMBOL(rdma_resolve_ip);
 
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 0753178..fbdd731 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -330,17 +330,7 @@
 	union ib_gid gid;
 	int ret = -ENODEV;
 
-	switch (rdma_node_get_transport(dev_addr->dev_type)) {
-	case RDMA_TRANSPORT_IB:
-		ib_addr_get_sgid(dev_addr, &gid);
-		break;
-	case RDMA_TRANSPORT_IWARP:
-		iw_addr_get_sgid(dev_addr, &gid);
-		break;
-	default:
-		return -ENODEV;
-	}
-
+	rdma_addr_get_sgid(dev_addr, &gid);
 	list_for_each_entry(cma_dev, &dev_list, list) {
 		ret = ib_find_cached_gid(cma_dev->device, &gid,
 					 &id_priv->id.port_num, NULL);
@@ -1032,11 +1022,17 @@
 	if (rt->num_paths == 2)
 		rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path;
 
-	ib_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid);
-	ret = rdma_translate_ip((struct sockaddr *) &id->route.addr.src_addr,
-				&id->route.addr.dev_addr);
-	if (ret)
-		goto destroy_id;
+	if (cma_any_addr((struct sockaddr *) &rt->addr.src_addr)) {
+		rt->addr.dev_addr.dev_type = ARPHRD_INFINIBAND;
+		rdma_addr_set_sgid(&rt->addr.dev_addr, &rt->path_rec[0].sgid);
+		ib_addr_set_pkey(&rt->addr.dev_addr, rt->path_rec[0].pkey);
+	} else {
+		ret = rdma_translate_ip((struct sockaddr *) &rt->addr.src_addr,
+					&rt->addr.dev_addr);
+		if (ret)
+			goto destroy_id;
+	}
+	rdma_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid);
 
 	id_priv = container_of(id, struct rdma_id_private, id);
 	id_priv->state = CMA_CONNECT;
@@ -1071,10 +1067,12 @@
 	cma_save_net_info(&id->route.addr, &listen_id->route.addr,
 			  ip_ver, port, src, dst);
 
-	ret = rdma_translate_ip((struct sockaddr *) &id->route.addr.src_addr,
-				&id->route.addr.dev_addr);
-	if (ret)
-		goto err;
+	if (!cma_any_addr((struct sockaddr *) &id->route.addr.src_addr)) {
+		ret = rdma_translate_ip((struct sockaddr *) &id->route.addr.src_addr,
+					&id->route.addr.dev_addr);
+		if (ret)
+			goto err;
+	}
 
 	id_priv = container_of(id, struct rdma_id_private, id);
 	id_priv->state = CMA_CONNECT;
@@ -1474,15 +1472,6 @@
 	mutex_unlock(&lock);
 }
 
-static int cma_bind_any(struct rdma_cm_id *id, sa_family_t af)
-{
-	struct sockaddr_storage addr_in;
-
-	memset(&addr_in, 0, sizeof addr_in);
-	addr_in.ss_family = af;
-	return rdma_bind_addr(id, (struct sockaddr *) &addr_in);
-}
-
 int rdma_listen(struct rdma_cm_id *id, int backlog)
 {
 	struct rdma_id_private *id_priv;
@@ -1490,7 +1479,8 @@
 
 	id_priv = container_of(id, struct rdma_id_private, id);
 	if (id_priv->state == CMA_IDLE) {
-		ret = cma_bind_any(id, AF_INET);
+		((struct sockaddr *) &id->route.addr.src_addr)->sa_family = AF_INET;
+		ret = rdma_bind_addr(id, (struct sockaddr *) &id->route.addr.src_addr);
 		if (ret)
 			return ret;
 	}
@@ -1565,8 +1555,8 @@
 	struct sockaddr_in6 *sin6;
 
 	memset(&path_rec, 0, sizeof path_rec);
-	ib_addr_get_sgid(&addr->dev_addr, &path_rec.sgid);
-	ib_addr_get_dgid(&addr->dev_addr, &path_rec.dgid);
+	rdma_addr_get_sgid(&addr->dev_addr, &path_rec.sgid);
+	rdma_addr_get_dgid(&addr->dev_addr, &path_rec.dgid);
 	path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(&addr->dev_addr));
 	path_rec.numb_path = 1;
 	path_rec.reversible = 1;
@@ -1781,7 +1771,11 @@
 	if (ret)
 		goto out;
 
-	ib_addr_set_sgid(&id_priv->id.route.addr.dev_addr, &gid);
+	id_priv->id.route.addr.dev_addr.dev_type =
+		(rdma_node_get_transport(cma_dev->device->node_type) == RDMA_TRANSPORT_IB) ?
+		ARPHRD_INFINIBAND : ARPHRD_ETHER;
+
+	rdma_addr_set_sgid(&id_priv->id.route.addr.dev_addr, &gid);
 	ib_addr_set_pkey(&id_priv->id.route.addr.dev_addr, pkey);
 	id_priv->id.port_num = p;
 	cma_attach_to_dev(id_priv, cma_dev);
@@ -1839,7 +1833,7 @@
 static int cma_resolve_loopback(struct rdma_id_private *id_priv)
 {
 	struct cma_work *work;
-	struct sockaddr_in *src_in, *dst_in;
+	struct sockaddr *src, *dst;
 	union ib_gid gid;
 	int ret;
 
@@ -1853,14 +1847,19 @@
 			goto err;
 	}
 
-	ib_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid);
-	ib_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid);
+	rdma_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid);
+	rdma_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid);
 
-	if (cma_zero_addr((struct sockaddr *) &id_priv->id.route.addr.src_addr)) {
-		src_in = (struct sockaddr_in *)&id_priv->id.route.addr.src_addr;
-		dst_in = (struct sockaddr_in *)&id_priv->id.route.addr.dst_addr;
-		src_in->sin_family = dst_in->sin_family;
-		src_in->sin_addr.s_addr = dst_in->sin_addr.s_addr;
+	src = (struct sockaddr *) &id_priv->id.route.addr.src_addr;
+	if (cma_zero_addr(src)) {
+		dst = (struct sockaddr *) &id_priv->id.route.addr.dst_addr;
+		if ((src->sa_family = dst->sa_family) == AF_INET) {
+			((struct sockaddr_in *) src)->sin_addr.s_addr =
+				((struct sockaddr_in *) dst)->sin_addr.s_addr;
+		} else {
+			ipv6_addr_copy(&((struct sockaddr_in6 *) src)->sin6_addr,
+				       &((struct sockaddr_in6 *) dst)->sin6_addr);
+		}
 	}
 
 	work->id = id_priv;
@@ -1878,10 +1877,14 @@
 static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
 			 struct sockaddr *dst_addr)
 {
-	if (src_addr && src_addr->sa_family)
-		return rdma_bind_addr(id, src_addr);
-	else
-		return cma_bind_any(id, dst_addr->sa_family);
+	if (!src_addr || !src_addr->sa_family) {
+		src_addr = (struct sockaddr *) &id->route.addr.src_addr;
+		if ((src_addr->sa_family = dst_addr->sa_family) == AF_INET6) {
+			((struct sockaddr_in6 *) src_addr)->sin6_scope_id =
+				((struct sockaddr_in6 *) dst_addr)->sin6_scope_id;
+		}
+	}
+	return rdma_bind_addr(id, src_addr);
 }
 
 int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
@@ -2077,6 +2080,25 @@
 	return ret;
 }
 
+static int cma_check_linklocal(struct rdma_dev_addr *dev_addr,
+			       struct sockaddr *addr)
+{
+#if defined(CONFIG_IPv6) || defined(CONFIG_IPV6_MODULE)
+	struct sockaddr_in6 *sin6;
+
+	if (addr->sa_family != AF_INET6)
+		return 0;
+
+	sin6 = (struct sockaddr_in6 *) addr;
+	if ((ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) &&
+	    !sin6->sin6_scope_id)
+			return -EINVAL;
+
+	dev_addr->bound_dev_if = sin6->sin6_scope_id;
+#endif
+	return 0;
+}
+
 int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
 {
 	struct rdma_id_private *id_priv;
@@ -2089,7 +2111,13 @@
 	if (!cma_comp_exch(id_priv, CMA_IDLE, CMA_ADDR_BOUND))
 		return -EINVAL;
 
-	if (!cma_any_addr(addr)) {
+	ret = cma_check_linklocal(&id->route.addr.dev_addr, addr);
+	if (ret)
+		goto err1;
+
+	if (cma_loopback_addr(addr)) {
+		ret = cma_bind_loopback(id_priv);
+	} else if (!cma_zero_addr(addr)) {
 		ret = rdma_translate_ip(addr, &id->route.addr.dev_addr);
 		if (ret)
 			goto err1;
@@ -2108,7 +2136,7 @@
 
 	return 0;
 err2:
-	if (!cma_any_addr(addr)) {
+	if (id_priv->cma_dev) {
 		mutex_lock(&lock);
 		cma_detach_from_dev(id_priv);
 		mutex_unlock(&lock);
@@ -2687,10 +2715,15 @@
 	if (cma_any_addr(addr)) {
 		memset(mgid, 0, sizeof *mgid);
 	} else if ((addr->sa_family == AF_INET6) &&
-		   ((be32_to_cpu(sin6->sin6_addr.s6_addr32[0]) & 0xFF10A01B) ==
+		   ((be32_to_cpu(sin6->sin6_addr.s6_addr32[0]) & 0xFFF0FFFF) ==
 								 0xFF10A01B)) {
 		/* IPv6 address is an SA assigned MGID. */
 		memcpy(mgid, &sin6->sin6_addr, sizeof *mgid);
+	} else if ((addr->sa_family == AF_INET6)) {
+		ipv6_ib_mc_map(&sin6->sin6_addr, dev_addr->broadcast, mc_map);
+		if (id_priv->id.ps == RDMA_PS_UDP)
+			mc_map[7] = 0x01;	/* Use RDMA CM signature */
+		*mgid = *(union ib_gid *) (mc_map + 4);
 	} else {
 		ip_ib_mc_map(sin->sin_addr.s_addr, dev_addr->broadcast, mc_map);
 		if (id_priv->id.ps == RDMA_PS_UDP)
@@ -2716,7 +2749,7 @@
 	cma_set_mgid(id_priv, (struct sockaddr *) &mc->addr, &rec.mgid);
 	if (id_priv->id.ps == RDMA_PS_UDP)
 		rec.qkey = cpu_to_be32(RDMA_UDP_QKEY);
-	ib_addr_get_sgid(dev_addr, &rec.port_gid);
+	rdma_addr_get_sgid(dev_addr, &rec.port_gid);
 	rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
 	rec.join_state = 1;
 
@@ -2815,7 +2848,7 @@
 
 	dev_addr = &id_priv->id.route.addr.dev_addr;
 
-	if ((dev_addr->src_dev == ndev) &&
+	if ((dev_addr->bound_dev_if == ndev->ifindex) &&
 	    memcmp(dev_addr->src_dev_addr, ndev->dev_addr, ndev->addr_len)) {
 		printk(KERN_INFO "RDMA CM addr change for ndev %s used by id %p\n",
 		       ndev->name, &id_priv->id);
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index 8254371..7e1ffd8c 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -604,6 +604,12 @@
 	return ret ? ret : id;
 }
 
+void ib_sa_unpack_path(void *attribute, struct ib_sa_path_rec *rec)
+{
+	ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table), attribute, rec);
+}
+EXPORT_SYMBOL(ib_sa_unpack_path);
+
 static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,
 				    int status,
 				    struct ib_sa_mad *mad)
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index bb96d3c..b2e16c3 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -43,6 +43,7 @@
 #include <rdma/rdma_user_cm.h>
 #include <rdma/ib_marshall.h>
 #include <rdma/rdma_cm.h>
+#include <rdma/rdma_cm_ib.h>
 
 MODULE_AUTHOR("Sean Hefty");
 MODULE_DESCRIPTION("RDMA Userspace Connection Manager Access");
@@ -562,10 +563,10 @@
 	switch (route->num_paths) {
 	case 0:
 		dev_addr = &route->addr.dev_addr;
-		ib_addr_get_dgid(dev_addr,
-				 (union ib_gid *) &resp->ib_route[0].dgid);
-		ib_addr_get_sgid(dev_addr,
-				 (union ib_gid *) &resp->ib_route[0].sgid);
+		rdma_addr_get_dgid(dev_addr,
+				   (union ib_gid *) &resp->ib_route[0].dgid);
+		rdma_addr_get_sgid(dev_addr,
+				   (union ib_gid *) &resp->ib_route[0].sgid);
 		resp->ib_route[0].pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
 		break;
 	case 2:
@@ -812,6 +813,51 @@
 	return ret;
 }
 
+static int ucma_set_ib_path(struct ucma_context *ctx,
+			    struct ib_path_rec_data *path_data, size_t optlen)
+{
+	struct ib_sa_path_rec sa_path;
+	struct rdma_cm_event event;
+	int ret;
+
+	if (optlen % sizeof(*path_data))
+		return -EINVAL;
+
+	for (; optlen; optlen -= sizeof(*path_data), path_data++) {
+		if (path_data->flags == (IB_PATH_GMP | IB_PATH_PRIMARY |
+					 IB_PATH_BIDIRECTIONAL))
+			break;
+	}
+
+	if (!optlen)
+		return -EINVAL;
+
+	ib_sa_unpack_path(path_data->path_rec, &sa_path);
+	ret = rdma_set_ib_paths(ctx->cm_id, &sa_path, 1);
+	if (ret)
+		return ret;
+
+	memset(&event, 0, sizeof event);
+	event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
+	return ucma_event_handler(ctx->cm_id, &event);
+}
+
+static int ucma_set_option_ib(struct ucma_context *ctx, int optname,
+			      void *optval, size_t optlen)
+{
+	int ret;
+
+	switch (optname) {
+	case RDMA_OPTION_IB_PATH:
+		ret = ucma_set_ib_path(ctx, optval, optlen);
+		break;
+	default:
+		ret = -ENOSYS;
+	}
+
+	return ret;
+}
+
 static int ucma_set_option_level(struct ucma_context *ctx, int level,
 				 int optname, void *optval, size_t optlen)
 {
@@ -821,6 +867,9 @@
 	case RDMA_OPTION_ID:
 		ret = ucma_set_option_id(ctx, optname, optval, optlen);
 		break;
+	case RDMA_OPTION_IB:
+		ret = ucma_set_option_ib(ctx, optname, optval, optlen);
+		break;
 	default:
 		ret = -ENOSYS;
 	}
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 56feab6..112d397 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -285,7 +285,7 @@
 
 	ucontext = ibdev->alloc_ucontext(ibdev, &udata);
 	if (IS_ERR(ucontext)) {
-		ret = PTR_ERR(file->ucontext);
+		ret = PTR_ERR(ucontext);
 		goto err;
 	}
 
diff --git a/drivers/infiniband/hw/amso1100/c2_qp.c b/drivers/infiniband/hw/amso1100/c2_qp.c
index a6d8944..ad51886 100644
--- a/drivers/infiniband/hw/amso1100/c2_qp.c
+++ b/drivers/infiniband/hw/amso1100/c2_qp.c
@@ -798,8 +798,10 @@
 	u8 actual_sge_count;
 	u32 msg_size;
 
-	if (qp->state > IB_QPS_RTS)
-		return -EINVAL;
+	if (qp->state > IB_QPS_RTS) {
+		err = -EINVAL;
+		goto out;
+	}
 
 	while (ib_wr) {
 
@@ -930,6 +932,7 @@
 		ib_wr = ib_wr->next;
 	}
 
+out:
 	if (err)
 		*bad_wr = ib_wr;
 	return err;
@@ -944,8 +947,10 @@
 	unsigned long lock_flags;
 	int err = 0;
 
-	if (qp->state > IB_QPS_RTS)
-		return -EINVAL;
+	if (qp->state > IB_QPS_RTS) {
+		err = -EINVAL;
+		goto out;
+	}
 
 	/*
 	 * Try and post each work request
@@ -998,6 +1003,7 @@
 		ib_wr = ib_wr->next;
 	}
 
+out:
 	if (err)
 		*bad_wr = ib_wr;
 	return err;
diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c
index 1cecf98..3eb8cec 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_qp.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c
@@ -365,18 +365,19 @@
 	spin_lock_irqsave(&qhp->lock, flag);
 	if (qhp->attr.state > IWCH_QP_STATE_RTS) {
 		spin_unlock_irqrestore(&qhp->lock, flag);
-		return -EINVAL;
+		err = -EINVAL;
+		goto out;
 	}
 	num_wrs = Q_FREECNT(qhp->wq.sq_rptr, qhp->wq.sq_wptr,
 		  qhp->wq.sq_size_log2);
 	if (num_wrs <= 0) {
 		spin_unlock_irqrestore(&qhp->lock, flag);
-		return -ENOMEM;
+		err = -ENOMEM;
+		goto out;
 	}
 	while (wr) {
 		if (num_wrs == 0) {
 			err = -ENOMEM;
-			*bad_wr = wr;
 			break;
 		}
 		idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2);
@@ -428,10 +429,8 @@
 			     wr->opcode);
 			err = -EINVAL;
 		}
-		if (err) {
-			*bad_wr = wr;
+		if (err)
 			break;
-		}
 		wqe->send.wrid.id0.hi = qhp->wq.sq_wptr;
 		sqp->wr_id = wr->wr_id;
 		sqp->opcode = wr2opcode(t3_wr_opcode);
@@ -454,6 +453,10 @@
 	}
 	spin_unlock_irqrestore(&qhp->lock, flag);
 	ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid);
+
+out:
+	if (err)
+		*bad_wr = wr;
 	return err;
 }
 
@@ -471,18 +474,19 @@
 	spin_lock_irqsave(&qhp->lock, flag);
 	if (qhp->attr.state > IWCH_QP_STATE_RTS) {
 		spin_unlock_irqrestore(&qhp->lock, flag);
-		return -EINVAL;
+		err = -EINVAL;
+		goto out;
 	}
 	num_wrs = Q_FREECNT(qhp->wq.rq_rptr, qhp->wq.rq_wptr,
 			    qhp->wq.rq_size_log2) - 1;
 	if (!wr) {
 		spin_unlock_irqrestore(&qhp->lock, flag);
-		return -EINVAL;
+		err = -ENOMEM;
+		goto out;
 	}
 	while (wr) {
 		if (wr->num_sge > T3_MAX_SGE) {
 			err = -EINVAL;
-			*bad_wr = wr;
 			break;
 		}
 		idx = Q_PTR2IDX(qhp->wq.wptr, qhp->wq.size_log2);
@@ -494,10 +498,10 @@
 				err = build_zero_stag_recv(qhp, wqe, wr);
 		else
 			err = -ENOMEM;
-		if (err) {
-			*bad_wr = wr;
+
+		if (err)
 			break;
-		}
+
 		build_fw_riwrh((void *) wqe, T3_WR_RCV, T3_COMPLETION_FLAG,
 			       Q_GENBIT(qhp->wq.wptr, qhp->wq.size_log2),
 			       0, sizeof(struct t3_receive_wr) >> 3, T3_SOPEOP);
@@ -511,6 +515,10 @@
 	}
 	spin_unlock_irqrestore(&qhp->lock, flag);
 	ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid);
+
+out:
+	if (err)
+		*bad_wr = wr;
 	return err;
 }
 
diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h
index c825142..0136abd 100644
--- a/drivers/infiniband/hw/ehca/ehca_classes.h
+++ b/drivers/infiniband/hw/ehca/ehca_classes.h
@@ -375,6 +375,7 @@
 extern rwlock_t ehca_cq_idr_lock;
 extern struct idr ehca_qp_idr;
 extern struct idr ehca_cq_idr;
+extern spinlock_t shca_list_lock;
 
 extern int ehca_static_rate;
 extern int ehca_port_act_time;
diff --git a/drivers/infiniband/hw/ehca/ehca_eq.c b/drivers/infiniband/hw/ehca/ehca_eq.c
index 523e733c..3b87589 100644
--- a/drivers/infiniband/hw/ehca/ehca_eq.c
+++ b/drivers/infiniband/hw/ehca/ehca_eq.c
@@ -169,12 +169,15 @@
 	unsigned long flags;
 	u64 h_ret;
 
-	spin_lock_irqsave(&eq->spinlock, flags);
 	ibmebus_free_irq(eq->ist, (void *)shca);
 
-	h_ret = hipz_h_destroy_eq(shca->ipz_hca_handle, eq);
+	spin_lock_irqsave(&shca_list_lock, flags);
+	eq->is_initialized = 0;
+	spin_unlock_irqrestore(&shca_list_lock, flags);
 
-	spin_unlock_irqrestore(&eq->spinlock, flags);
+	tasklet_kill(&eq->interrupt_task);
+
+	h_ret = hipz_h_destroy_eq(shca->ipz_hca_handle, eq);
 
 	if (h_ret != H_SUCCESS) {
 		ehca_err(&shca->ib_device, "Can't free EQ resources.");
diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c
index fb2d83c..129a6be 100644
--- a/drivers/infiniband/hw/ehca/ehca_main.c
+++ b/drivers/infiniband/hw/ehca/ehca_main.c
@@ -123,7 +123,7 @@
 DEFINE_IDR(ehca_cq_idr);
 
 static LIST_HEAD(shca_list); /* list of all registered ehcas */
-static DEFINE_SPINLOCK(shca_list_lock);
+DEFINE_SPINLOCK(shca_list_lock);
 
 static struct timer_list poll_eqs_timer;
 
diff --git a/drivers/infiniband/hw/ehca/ehca_reqs.c b/drivers/infiniband/hw/ehca/ehca_reqs.c
index 8fd88cd..e3ec7fd 100644
--- a/drivers/infiniband/hw/ehca/ehca_reqs.c
+++ b/drivers/infiniband/hw/ehca/ehca_reqs.c
@@ -400,7 +400,6 @@
 
 static inline int post_one_send(struct ehca_qp *my_qp,
 			 struct ib_send_wr *cur_send_wr,
-			 struct ib_send_wr **bad_send_wr,
 			 int hidden)
 {
 	struct ehca_wqe *wqe_p;
@@ -412,8 +411,6 @@
 	wqe_p = ipz_qeit_get_inc(&my_qp->ipz_squeue);
 	if (unlikely(!wqe_p)) {
 		/* too many posted work requests: queue overflow */
-		if (bad_send_wr)
-			*bad_send_wr = cur_send_wr;
 		ehca_err(my_qp->ib_qp.device, "Too many posted WQEs "
 			 "qp_num=%x", my_qp->ib_qp.qp_num);
 		return -ENOMEM;
@@ -433,8 +430,6 @@
 	 */
 	if (unlikely(ret)) {
 		my_qp->ipz_squeue.current_q_offset = start_offset;
-		if (bad_send_wr)
-			*bad_send_wr = cur_send_wr;
 		ehca_err(my_qp->ib_qp.device, "Could not write WQE "
 			 "qp_num=%x", my_qp->ib_qp.qp_num);
 		return -EINVAL;
@@ -448,7 +443,6 @@
 		   struct ib_send_wr **bad_send_wr)
 {
 	struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp);
-	struct ib_send_wr *cur_send_wr;
 	int wqe_cnt = 0;
 	int ret = 0;
 	unsigned long flags;
@@ -457,7 +451,8 @@
 	if (unlikely(my_qp->state < IB_QPS_RTS)) {
 		ehca_err(qp->device, "Invalid QP state  qp_state=%d qpn=%x",
 			 my_qp->state, qp->qp_num);
-		return -EINVAL;
+		ret = -EINVAL;
+		goto out;
 	}
 
 	/* LOCK the QUEUE */
@@ -476,24 +471,21 @@
 		struct ib_send_wr circ_wr;
 		memset(&circ_wr, 0, sizeof(circ_wr));
 		circ_wr.opcode = IB_WR_RDMA_READ;
-		post_one_send(my_qp, &circ_wr, NULL, 1); /* ignore retcode */
+		post_one_send(my_qp, &circ_wr, 1); /* ignore retcode */
 		wqe_cnt++;
 		ehca_dbg(qp->device, "posted circ wr  qp_num=%x", qp->qp_num);
 		my_qp->message_count = my_qp->packet_count = 0;
 	}
 
 	/* loop processes list of send reqs */
-	for (cur_send_wr = send_wr; cur_send_wr != NULL;
-	     cur_send_wr = cur_send_wr->next) {
-		ret = post_one_send(my_qp, cur_send_wr, bad_send_wr, 0);
+	while (send_wr) {
+		ret = post_one_send(my_qp, send_wr, 0);
 		if (unlikely(ret)) {
-			/* if one or more WQEs were successful, don't fail */
-			if (wqe_cnt)
-				ret = 0;
 			goto post_send_exit0;
 		}
 		wqe_cnt++;
-	} /* eof for cur_send_wr */
+		send_wr = send_wr->next;
+	}
 
 post_send_exit0:
 	iosync(); /* serialize GAL register access */
@@ -503,6 +495,10 @@
 			 my_qp, qp->qp_num, wqe_cnt, ret);
 	my_qp->message_count += wqe_cnt;
 	spin_unlock_irqrestore(&my_qp->spinlock_s, flags);
+
+out:
+	if (ret)
+		*bad_send_wr = send_wr;
 	return ret;
 }
 
@@ -511,7 +507,6 @@
 			      struct ib_recv_wr *recv_wr,
 			      struct ib_recv_wr **bad_recv_wr)
 {
-	struct ib_recv_wr *cur_recv_wr;
 	struct ehca_wqe *wqe_p;
 	int wqe_cnt = 0;
 	int ret = 0;
@@ -522,27 +517,23 @@
 	if (unlikely(!HAS_RQ(my_qp))) {
 		ehca_err(dev, "QP has no RQ  ehca_qp=%p qp_num=%x ext_type=%d",
 			 my_qp, my_qp->real_qp_num, my_qp->ext_type);
-		return -ENODEV;
+		ret = -ENODEV;
+		goto out;
 	}
 
 	/* LOCK the QUEUE */
 	spin_lock_irqsave(&my_qp->spinlock_r, flags);
 
-	/* loop processes list of send reqs */
-	for (cur_recv_wr = recv_wr; cur_recv_wr != NULL;
-	     cur_recv_wr = cur_recv_wr->next) {
+	/* loop processes list of recv reqs */
+	while (recv_wr) {
 		u64 start_offset = my_qp->ipz_rqueue.current_q_offset;
 		/* get pointer next to free WQE */
 		wqe_p = ipz_qeit_get_inc(&my_qp->ipz_rqueue);
 		if (unlikely(!wqe_p)) {
 			/* too many posted work requests: queue overflow */
-			if (bad_recv_wr)
-				*bad_recv_wr = cur_recv_wr;
-			if (wqe_cnt == 0) {
-				ret = -ENOMEM;
-				ehca_err(dev, "Too many posted WQEs "
-					 "qp_num=%x", my_qp->real_qp_num);
-			}
+			ret = -ENOMEM;
+			ehca_err(dev, "Too many posted WQEs "
+				"qp_num=%x", my_qp->real_qp_num);
 			goto post_recv_exit0;
 		}
 		/*
@@ -552,7 +543,7 @@
 		rq_map_idx = start_offset / my_qp->ipz_rqueue.qe_size;
 
 		/* write a RECV WQE into the QUEUE */
-		ret = ehca_write_rwqe(&my_qp->ipz_rqueue, wqe_p, cur_recv_wr,
+		ret = ehca_write_rwqe(&my_qp->ipz_rqueue, wqe_p, recv_wr,
 				rq_map_idx);
 		/*
 		 * if something failed,
@@ -560,22 +551,20 @@
 		 */
 		if (unlikely(ret)) {
 			my_qp->ipz_rqueue.current_q_offset = start_offset;
-			*bad_recv_wr = cur_recv_wr;
-			if (wqe_cnt == 0) {
-				ret = -EINVAL;
-				ehca_err(dev, "Could not write WQE "
-					 "qp_num=%x", my_qp->real_qp_num);
-			}
+			ret = -EINVAL;
+			ehca_err(dev, "Could not write WQE "
+				"qp_num=%x", my_qp->real_qp_num);
 			goto post_recv_exit0;
 		}
 
 		qmap_entry = &my_qp->rq_map.map[rq_map_idx];
-		qmap_entry->app_wr_id = get_app_wr_id(cur_recv_wr->wr_id);
+		qmap_entry->app_wr_id = get_app_wr_id(recv_wr->wr_id);
 		qmap_entry->reported = 0;
 		qmap_entry->cqe_req = 1;
 
 		wqe_cnt++;
-	} /* eof for cur_recv_wr */
+		recv_wr = recv_wr->next;
+	} /* eof for recv_wr */
 
 post_recv_exit0:
 	iosync(); /* serialize GAL register access */
@@ -584,6 +573,11 @@
 	    ehca_dbg(dev, "ehca_qp=%p qp_num=%x wqe_cnt=%d ret=%i",
 		     my_qp, my_qp->real_qp_num, wqe_cnt, ret);
 	spin_unlock_irqrestore(&my_qp->spinlock_r, flags);
+
+out:
+	if (ret)
+		*bad_recv_wr = recv_wr;
+
 	return ret;
 }
 
@@ -597,6 +591,7 @@
 	if (unlikely(my_qp->state == IB_QPS_RESET)) {
 		ehca_err(qp->device, "Invalid QP state  qp_state=%d qpn=%x",
 			 my_qp->state, qp->qp_num);
+		*bad_recv_wr = recv_wr;
 		return -EINVAL;
 	}
 
diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c
index 013d1380..d2787fe 100644
--- a/drivers/infiniband/hw/ipath/ipath_driver.c
+++ b/drivers/infiniband/hw/ipath/ipath_driver.c
@@ -39,6 +39,7 @@
 #include <linux/delay.h>
 #include <linux/netdevice.h>
 #include <linux/vmalloc.h>
+#include <linux/bitmap.h>
 
 #include "ipath_kernel.h"
 #include "ipath_verbs.h"
@@ -1697,7 +1698,7 @@
 			      unsigned len, int avail)
 {
 	unsigned long flags;
-	unsigned end, cnt = 0, next;
+	unsigned end, cnt = 0;
 
 	/* There are two bits per send buffer (busy and generation) */
 	start *= 2;
@@ -1748,12 +1749,7 @@
 
 	if (dd->ipath_pioupd_thresh) {
 		end = 2 * (dd->ipath_piobcnt2k + dd->ipath_piobcnt4k);
-		next = find_first_bit(dd->ipath_pioavailkernel, end);
-		while (next < end) {
-			cnt++;
-			next = find_next_bit(dd->ipath_pioavailkernel, end,
-					next + 1);
-		}
+		cnt = bitmap_weight(dd->ipath_pioavailkernel, end);
 	}
 	spin_unlock_irqrestore(&ipath_pioavail_lock, flags);
 
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 3cb3f47..e596537 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -103,7 +103,7 @@
 		props->device_cap_flags |= IB_DEVICE_UD_AV_PORT_ENFORCE;
 	if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IPOIB_CSUM)
 		props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM;
-	if (dev->dev->caps.max_gso_sz)
+	if (dev->dev->caps.max_gso_sz && dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BLH)
 		props->device_cap_flags |= IB_DEVICE_UD_TSO;
 	if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_RESERVED_LKEY)
 		props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY;
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 219b103..847030c 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -54,7 +54,8 @@
 	/*
 	 * Largest possible UD header: send with GRH and immediate data.
 	 */
-	MLX4_IB_UD_HEADER_SIZE		= 72
+	MLX4_IB_UD_HEADER_SIZE		= 72,
+	MLX4_IB_LSO_HEADER_SPARE	= 128,
 };
 
 struct mlx4_ib_sqp {
@@ -67,7 +68,8 @@
 };
 
 enum {
-	MLX4_IB_MIN_SQ_STRIDE = 6
+	MLX4_IB_MIN_SQ_STRIDE	= 6,
+	MLX4_IB_CACHE_LINE_SIZE	= 64,
 };
 
 static const __be32 mlx4_ib_opcode[] = {
@@ -261,7 +263,7 @@
 	case IB_QPT_UD:
 		return sizeof (struct mlx4_wqe_ctrl_seg) +
 			sizeof (struct mlx4_wqe_datagram_seg) +
-			((flags & MLX4_IB_QP_LSO) ? 64 : 0);
+			((flags & MLX4_IB_QP_LSO) ? MLX4_IB_LSO_HEADER_SPARE : 0);
 	case IB_QPT_UC:
 		return sizeof (struct mlx4_wqe_ctrl_seg) +
 			sizeof (struct mlx4_wqe_raddr_seg);
@@ -897,7 +899,6 @@
 
 	context->flags = cpu_to_be32((to_mlx4_state(new_state) << 28) |
 				     (to_mlx4_st(ibqp->qp_type) << 16));
-	context->flags     |= cpu_to_be32(1 << 8); /* DE? */
 
 	if (!(attr_mask & IB_QP_PATH_MIG_STATE))
 		context->flags |= cpu_to_be32(MLX4_QP_PM_MIGRATED << 11);
@@ -1467,16 +1468,12 @@
 
 static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_send_wr *wr,
 			 struct mlx4_ib_qp *qp, unsigned *lso_seg_len,
-			 __be32 *lso_hdr_sz)
+			 __be32 *lso_hdr_sz, __be32 *blh)
 {
 	unsigned halign = ALIGN(sizeof *wqe + wr->wr.ud.hlen, 16);
 
-	/*
-	 * This is a temporary limitation and will be removed in
-	 * a forthcoming FW release:
-	 */
-	if (unlikely(halign > 64))
-		return -EINVAL;
+	if (unlikely(halign > MLX4_IB_CACHE_LINE_SIZE))
+		*blh = cpu_to_be32(1 << 6);
 
 	if (unlikely(!(qp->flags & MLX4_IB_QP_LSO) &&
 		     wr->num_sge > qp->sq.max_gs - (halign >> 4)))
@@ -1522,6 +1519,7 @@
 	__be32 dummy;
 	__be32 *lso_wqe;
 	__be32 uninitialized_var(lso_hdr_sz);
+	__be32 blh;
 	int i;
 
 	spin_lock_irqsave(&qp->sq.lock, flags);
@@ -1530,6 +1528,7 @@
 
 	for (nreq = 0; wr; ++nreq, wr = wr->next) {
 		lso_wqe = &dummy;
+		blh = 0;
 
 		if (mlx4_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) {
 			err = -ENOMEM;
@@ -1616,7 +1615,7 @@
 			size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
 
 			if (wr->opcode == IB_WR_LSO) {
-				err = build_lso_seg(wqe, wr, qp, &seglen, &lso_hdr_sz);
+				err = build_lso_seg(wqe, wr, qp, &seglen, &lso_hdr_sz, &blh);
 				if (unlikely(err)) {
 					*bad_wr = wr;
 					goto out;
@@ -1687,7 +1686,7 @@
 		}
 
 		ctrl->owner_opcode = mlx4_ib_opcode[wr->opcode] |
-			(ind & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0);
+			(ind & qp->sq.wqe_cnt ? cpu_to_be32(1 << 31) : 0) | blh;
 
 		stamp = ind + qp->sq_spare_wqes;
 		ind += DIV_ROUND_UP(size * 16, 1U << qp->sq.wqe_shift);
diff --git a/drivers/infiniband/hw/nes/Kconfig b/drivers/infiniband/hw/nes/Kconfig
index d449eb6..846dc97 100644
--- a/drivers/infiniband/hw/nes/Kconfig
+++ b/drivers/infiniband/hw/nes/Kconfig
@@ -4,14 +4,13 @@
 	select LIBCRC32C
 	select INET_LRO
 	---help---
-	  This is a low-level driver for NetEffect RDMA enabled
-	  Network Interface Cards (RNIC).
+	  This is the RDMA Network Interface Card (RNIC) driver for
+	  NetEffect Ethernet Cluster Server Adapters.
 
 config INFINIBAND_NES_DEBUG
 	bool "Verbose debugging output"
 	depends on INFINIBAND_NES
 	default n
 	---help---
-	  This option causes the NetEffect RNIC driver to produce debug
-	  messages.  Select this if you are developing the driver
-	  or trying to diagnose a problem.
+	  This option enables debug messages from the NetEffect RNIC
+	  driver.  Select this if you are diagnosing a problem.
diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c
index cbde0cf..b9d09ba 100644
--- a/drivers/infiniband/hw/nes/nes.c
+++ b/drivers/infiniband/hw/nes/nes.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 - 2009 Intel-NE, Inc.  All rights reserved.
+ * Copyright (c) 2006 - 2009 Intel Corporation.  All rights reserved.
  * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
@@ -521,7 +521,8 @@
 	spin_lock_init(&nesdev->indexed_regs_lock);
 
 	/* Remap the PCI registers in adapter BAR0 to kernel VA space */
-	mmio_regs = ioremap_nocache(pci_resource_start(pcidev, BAR_0), sizeof(mmio_regs));
+	mmio_regs = ioremap_nocache(pci_resource_start(pcidev, BAR_0),
+				    pci_resource_len(pcidev, BAR_0));
 	if (mmio_regs == NULL) {
 		printk(KERN_ERR PFX "Unable to remap BAR0\n");
 		ret = -EIO;
diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h
index bcc6abc..9884056 100644
--- a/drivers/infiniband/hw/nes/nes.h
+++ b/drivers/infiniband/hw/nes/nes.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 - 2009 Intel-NE, Inc.  All rights reserved.
+ * Copyright (c) 2006 - 2009 Intel Corporation.  All rights reserved.
  * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c
index 73473db..39468c27 100644
--- a/drivers/infiniband/hw/nes/nes_cm.c
+++ b/drivers/infiniband/hw/nes/nes_cm.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 - 2009 Intel-NE, Inc.  All rights reserved.
+ * Copyright (c) 2006 - 2009 Intel Corporation.  All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -52,6 +52,7 @@
 #include <linux/random.h>
 #include <linux/list.h>
 #include <linux/threads.h>
+#include <linux/highmem.h>
 #include <net/arp.h>
 #include <net/neighbour.h>
 #include <net/route.h>
@@ -251,6 +252,33 @@
 
 	mpa_frame = (struct ietf_mpa_frame *)buffer;
 	cm_node->mpa_frame_size = ntohs(mpa_frame->priv_data_len);
+	/* make sure mpa private data len is less than 512 bytes */
+	if (cm_node->mpa_frame_size > IETF_MAX_PRIV_DATA_LEN) {
+		nes_debug(NES_DBG_CM, "The received Length of Private"
+			" Data field exceeds 512 octets\n");
+		return -EINVAL;
+	}
+	/*
+	 * make sure MPA receiver interoperate with the
+	 * received MPA version and MPA key information
+	 *
+	 */
+	if (mpa_frame->rev != mpa_version) {
+		nes_debug(NES_DBG_CM, "The received mpa version"
+				" can not be interoperated\n");
+		return -EINVAL;
+	}
+	if (cm_node->state != NES_CM_STATE_MPAREQ_SENT) {
+		if (memcmp(mpa_frame->key, IEFT_MPA_KEY_REQ, IETF_MPA_KEY_SIZE)) {
+			nes_debug(NES_DBG_CM, "Unexpected MPA Key received \n");
+			return -EINVAL;
+		}
+	} else {
+		if (memcmp(mpa_frame->key, IEFT_MPA_KEY_REP, IETF_MPA_KEY_SIZE)) {
+			nes_debug(NES_DBG_CM, "Unexpected MPA Key received \n");
+			return -EINVAL;
+		}
+	}
 
 	if (cm_node->mpa_frame_size + sizeof(struct ietf_mpa_frame) != len) {
 		nes_debug(NES_DBG_CM, "The received ietf buffer was not right"
@@ -486,6 +514,8 @@
 		send_reset(cm_node, NULL);
 		break;
 	default:
+		add_ref_cm_node(cm_node);
+		send_reset(cm_node, NULL);
 		create_event(cm_node, NES_CM_EVENT_ABORTED);
 	}
 }
@@ -949,6 +979,7 @@
 				reset_entry);
 		{
 			struct nes_cm_node *loopback = cm_node->loopbackpartner;
+			enum nes_cm_node_state old_state;
 			if (NES_CM_STATE_FIN_WAIT1 <= cm_node->state) {
 				rem_ref_cm_node(cm_node->cm_core, cm_node);
 			} else {
@@ -960,11 +991,12 @@
 							 NES_CM_STATE_CLOSED;
 						WARN_ON(1);
 					} else {
-						cm_node->state =
-							NES_CM_STATE_CLOSED;
-						rem_ref_cm_node(
-							cm_node->cm_core,
-							cm_node);
+						old_state = cm_node->state;
+						cm_node->state = NES_CM_STATE_LISTENER_DESTROYED;
+						if (old_state != NES_CM_STATE_MPAREQ_RCVD)
+							rem_ref_cm_node(
+								cm_node->cm_core,
+								cm_node);
 					}
 				} else {
 					struct nes_cm_event event;
@@ -980,20 +1012,9 @@
 							 loopback->loc_port;
 					event.cm_info.cm_id = loopback->cm_id;
 					cm_event_connect_error(&event);
+					cm_node->state = NES_CM_STATE_LISTENER_DESTROYED;
 					loopback->state = NES_CM_STATE_CLOSED;
 
-					event.cm_node = cm_node;
-					event.cm_info.rem_addr =
-							 cm_node->rem_addr;
-					event.cm_info.loc_addr =
-							 cm_node->loc_addr;
-					event.cm_info.rem_port =
-							 cm_node->rem_port;
-					event.cm_info.loc_port =
-							 cm_node->loc_port;
-					event.cm_info.cm_id = cm_node->cm_id;
-					cm_event_reset(&event);
-
 					rem_ref_cm_node(cm_node->cm_core,
 							 cm_node);
 
@@ -1077,12 +1098,13 @@
 /**
  * nes_addr_resolve_neigh
  */
-static int nes_addr_resolve_neigh(struct nes_vnic *nesvnic, u32 dst_ip)
+static int nes_addr_resolve_neigh(struct nes_vnic *nesvnic, u32 dst_ip, int arpindex)
 {
 	struct rtable *rt;
 	struct flowi fl;
 	struct neighbour *neigh;
-	int rc = -1;
+	int rc = arpindex;
+	struct nes_adapter *nesadapter = nesvnic->nesdev->nesadapter;
 
 	memset(&fl, 0, sizeof fl);
 	fl.nl_u.ip4_u.daddr = htonl(dst_ip);
@@ -1098,6 +1120,21 @@
 			nes_debug(NES_DBG_CM, "Neighbor MAC address for 0x%08X"
 				  " is %pM, Gateway is 0x%08X \n", dst_ip,
 				  neigh->ha, ntohl(rt->rt_gateway));
+
+			if (arpindex >= 0) {
+				if (!memcmp(nesadapter->arp_table[arpindex].mac_addr,
+							neigh->ha, ETH_ALEN)){
+					/* Mac address same as in nes_arp_table */
+					neigh_release(neigh);
+					ip_rt_put(rt);
+					return rc;
+				}
+
+				nes_manage_arp_cache(nesvnic->netdev,
+						nesadapter->arp_table[arpindex].mac_addr,
+						dst_ip, NES_ARP_DELETE);
+			}
+
 			nes_manage_arp_cache(nesvnic->netdev, neigh->ha,
 					     dst_ip, NES_ARP_ADD);
 			rc = nes_arp_table(nesvnic->nesdev, dst_ip, NULL,
@@ -1113,7 +1150,6 @@
 	return rc;
 }
 
-
 /**
  * make_cm_node - create a new instance of a cm node
  */
@@ -1123,6 +1159,7 @@
 {
 	struct nes_cm_node *cm_node;
 	struct timespec ts;
+	int oldarpindex = 0;
 	int arpindex = 0;
 	struct nes_device *nesdev;
 	struct nes_adapter *nesadapter;
@@ -1176,17 +1213,18 @@
 	nesadapter = nesdev->nesadapter;
 
 	cm_node->loopbackpartner = NULL;
+
 	/* get the mac addr for the remote node */
 	if (ipv4_is_loopback(htonl(cm_node->rem_addr)))
 		arpindex = nes_arp_table(nesdev, ntohl(nesvnic->local_ipaddr), NULL, NES_ARP_RESOLVE);
-	else
-		arpindex = nes_arp_table(nesdev, cm_node->rem_addr, NULL, NES_ARP_RESOLVE);
+	else {
+		oldarpindex = nes_arp_table(nesdev, cm_node->rem_addr, NULL, NES_ARP_RESOLVE);
+		arpindex = nes_addr_resolve_neigh(nesvnic, cm_info->rem_addr, oldarpindex);
+
+	}
 	if (arpindex < 0) {
-		arpindex = nes_addr_resolve_neigh(nesvnic, cm_info->rem_addr);
-		if (arpindex < 0) {
-			kfree(cm_node);
-			return NULL;
-		}
+		kfree(cm_node);
+		return NULL;
 	}
 
 	/* copy the mac addr to node context */
@@ -1333,13 +1371,20 @@
 	case NES_CM_STATE_SYN_RCVD:
 	case NES_CM_STATE_SYN_SENT:
 	case NES_CM_STATE_ESTABLISHED:
-	case NES_CM_STATE_MPAREQ_SENT:
 	case NES_CM_STATE_MPAREJ_RCVD:
 		cm_node->tcp_cntxt.rcv_nxt++;
 		cleanup_retrans_entry(cm_node);
 		cm_node->state = NES_CM_STATE_LAST_ACK;
 		send_fin(cm_node, NULL);
 		break;
+	case NES_CM_STATE_MPAREQ_SENT:
+		create_event(cm_node, NES_CM_EVENT_ABORTED);
+		cm_node->tcp_cntxt.rcv_nxt++;
+		cleanup_retrans_entry(cm_node);
+		cm_node->state = NES_CM_STATE_CLOSED;
+		add_ref_cm_node(cm_node);
+		send_reset(cm_node, NULL);
+		break;
 	case NES_CM_STATE_FIN_WAIT1:
 		cm_node->tcp_cntxt.rcv_nxt++;
 		cleanup_retrans_entry(cm_node);
@@ -1590,6 +1635,7 @@
 		break;
 	case NES_CM_STATE_CLOSED:
 		cleanup_retrans_entry(cm_node);
+		add_ref_cm_node(cm_node);
 		send_reset(cm_node, skb);
 		break;
 	case NES_CM_STATE_TSA:
@@ -1641,9 +1687,15 @@
 		passive_open_err(cm_node, skb, 1);
 		break;
 	case NES_CM_STATE_LISTENING:
+		cm_node->tcp_cntxt.loc_seq_num = ntohl(tcph->ack_seq);
+		cleanup_retrans_entry(cm_node);
+		cm_node->state = NES_CM_STATE_CLOSED;
+		send_reset(cm_node, skb);
+		break;
 	case NES_CM_STATE_CLOSED:
 		cm_node->tcp_cntxt.loc_seq_num = ntohl(tcph->ack_seq);
 		cleanup_retrans_entry(cm_node);
+		add_ref_cm_node(cm_node);
 		send_reset(cm_node, skb);
 		break;
 	case NES_CM_STATE_ESTABLISHED:
@@ -1712,8 +1764,13 @@
 			dev_kfree_skb_any(skb);
 		break;
 	case NES_CM_STATE_LISTENING:
+		cleanup_retrans_entry(cm_node);
+		cm_node->state = NES_CM_STATE_CLOSED;
+		send_reset(cm_node, skb);
+		break;
 	case NES_CM_STATE_CLOSED:
 		cleanup_retrans_entry(cm_node);
+		add_ref_cm_node(cm_node);
 		send_reset(cm_node, skb);
 		break;
 	case NES_CM_STATE_LAST_ACK:
@@ -1974,7 +2031,7 @@
 	if (!cm_node)
 		return NULL;
 	mpa_frame = &cm_node->mpa_frame;
-	strcpy(mpa_frame->key, IEFT_MPA_KEY_REQ);
+	memcpy(mpa_frame->key, IEFT_MPA_KEY_REQ, IETF_MPA_KEY_SIZE);
 	mpa_frame->flags = IETF_MPA_FLAGS_CRC;
 	mpa_frame->rev =  IETF_MPA_VERSION;
 	mpa_frame->priv_data_len = htons(private_data_len);
@@ -2102,30 +2159,39 @@
 			cm_node->state = NES_CM_STATE_CLOSED;
 			rem_ref_cm_node(cm_core, cm_node);
 		} else {
-			ret = send_mpa_reject(cm_node);
-			if (ret) {
-				cm_node->state = NES_CM_STATE_CLOSED;
-				err = send_reset(cm_node, NULL);
-				if (err)
-					WARN_ON(1);
-			} else
-				cm_id->add_ref(cm_id);
+			if (cm_node->state == NES_CM_STATE_LISTENER_DESTROYED) {
+				rem_ref_cm_node(cm_core, cm_node);
+			} else {
+				ret = send_mpa_reject(cm_node);
+				if (ret) {
+					cm_node->state = NES_CM_STATE_CLOSED;
+					err = send_reset(cm_node, NULL);
+					if (err)
+						WARN_ON(1);
+				} else
+					cm_id->add_ref(cm_id);
+			}
 		}
 	} else {
 		cm_node->cm_id = NULL;
-		event.cm_node = loopback;
-		event.cm_info.rem_addr = loopback->rem_addr;
-		event.cm_info.loc_addr = loopback->loc_addr;
-		event.cm_info.rem_port = loopback->rem_port;
-		event.cm_info.loc_port = loopback->loc_port;
-		event.cm_info.cm_id = loopback->cm_id;
-		cm_event_mpa_reject(&event);
-		rem_ref_cm_node(cm_core, cm_node);
-		loopback->state = NES_CM_STATE_CLOSING;
+		if (cm_node->state == NES_CM_STATE_LISTENER_DESTROYED) {
+			rem_ref_cm_node(cm_core, cm_node);
+			rem_ref_cm_node(cm_core, loopback);
+		} else {
+			event.cm_node = loopback;
+			event.cm_info.rem_addr = loopback->rem_addr;
+			event.cm_info.loc_addr = loopback->loc_addr;
+			event.cm_info.rem_port = loopback->rem_port;
+			event.cm_info.loc_port = loopback->loc_port;
+			event.cm_info.cm_id = loopback->cm_id;
+			cm_event_mpa_reject(&event);
+			rem_ref_cm_node(cm_core, cm_node);
+			loopback->state = NES_CM_STATE_CLOSING;
 
-		cm_id = loopback->cm_id;
-		rem_ref_cm_node(cm_core, loopback);
-		cm_id->rem_ref(cm_id);
+			cm_id = loopback->cm_id;
+			rem_ref_cm_node(cm_core, loopback);
+			cm_id->rem_ref(cm_id);
+		}
 	}
 
 	return ret;
@@ -2164,11 +2230,15 @@
 	case NES_CM_STATE_CLOSING:
 		ret = -1;
 		break;
-	case NES_CM_STATE_MPAREJ_RCVD:
 	case NES_CM_STATE_LISTENING:
+		cleanup_retrans_entry(cm_node);
+		send_reset(cm_node, NULL);
+		break;
+	case NES_CM_STATE_MPAREJ_RCVD:
 	case NES_CM_STATE_UNKNOWN:
 	case NES_CM_STATE_INITED:
 	case NES_CM_STATE_CLOSED:
+	case NES_CM_STATE_LISTENER_DESTROYED:
 		ret = rem_ref_cm_node(cm_core, cm_node);
 		break;
 	case NES_CM_STATE_TSA:
@@ -2687,8 +2757,6 @@
 	struct nes_pd *nespd;
 	u64 tagged_offset;
 
-
-
 	ibqp = nes_get_qp(cm_id->device, conn_param->qpn);
 	if (!ibqp)
 		return -EINVAL;
@@ -2704,6 +2772,13 @@
 		"%s\n", cm_node, nesvnic, nesvnic->netdev,
 		nesvnic->netdev->name);
 
+	if (NES_CM_STATE_LISTENER_DESTROYED == cm_node->state) {
+		if (cm_node->loopbackpartner)
+			rem_ref_cm_node(cm_node->cm_core, cm_node->loopbackpartner);
+		rem_ref_cm_node(cm_node->cm_core, cm_node);
+		return -EINVAL;
+	}
+
 	/* associate the node with the QP */
 	nesqp->cm_node = (void *)cm_node;
 	cm_node->nesqp = nesqp;
@@ -2786,6 +2861,10 @@
 			cpu_to_le32(conn_param->private_data_len +
 			sizeof(struct ietf_mpa_frame));
 		wqe->wqe_words[NES_IWARP_SQ_WQE_STAG0_IDX] = ibmr->lkey;
+		if (nesqp->sq_kmapped) {
+			nesqp->sq_kmapped = 0;
+			kunmap(nesqp->page);
+		}
 
 		nesqp->nesqp_context->ird_ord_sizes |=
 			cpu_to_le32(NES_QPCONTEXT_ORDIRD_LSMM_PRESENT |
@@ -2929,7 +3008,7 @@
 	if (cm_node->mpa_frame_size > MAX_CM_BUFFER)
 		return -EINVAL;
 
-	strcpy(&cm_node->mpa_frame.key[0], IEFT_MPA_KEY_REP);
+	memcpy(&cm_node->mpa_frame.key[0], IEFT_MPA_KEY_REP, IETF_MPA_KEY_SIZE);
 	if (loopback) {
 		memcpy(&loopback->mpa_frame.priv_data, pdata, pdata_len);
 		loopback->mpa_frame.priv_data_len = pdata_len;
@@ -2974,6 +3053,9 @@
 	if (!nesdev)
 		return -EINVAL;
 
+	if (!(cm_id->local_addr.sin_port) || !(cm_id->remote_addr.sin_port))
+		return -EINVAL;
+
 	nes_debug(NES_DBG_CM, "QP%u, current IP = 0x%08X, Destination IP = "
 		"0x%08X:0x%04X, local = 0x%08X:0x%04X.\n", nesqp->hwqp.qp_id,
 		ntohl(nesvnic->local_ipaddr),
@@ -3251,6 +3333,11 @@
 		wqe->wqe_words[NES_IWARP_SQ_WQE_LENGTH0_IDX] = 0;
 		wqe->wqe_words[NES_IWARP_SQ_WQE_STAG0_IDX] = 0;
 
+		if (nesqp->sq_kmapped) {
+			nesqp->sq_kmapped = 0;
+			kunmap(nesqp->page);
+		}
+
 		/* use the reserved spot on the WQ for the extra first WQE */
 		nesqp->nesqp_context->ird_ord_sizes &=
 			cpu_to_le32(~(NES_QPCONTEXT_ORDIRD_LSMM_PRESENT |
@@ -3346,7 +3433,7 @@
 	nesqp->cm_id = NULL;
 	cm_id->provider_data = NULL;
 	cm_event.event = IW_CM_EVENT_CONNECT_REPLY;
-	cm_event.status = IW_CM_EVENT_STATUS_REJECTED;
+	cm_event.status = -ECONNRESET;
 	cm_event.provider_data = cm_id->provider_data;
 	cm_event.local_addr = cm_id->local_addr;
 	cm_event.remote_addr = cm_id->remote_addr;
@@ -3390,6 +3477,8 @@
 
 	nes_debug(NES_DBG_CM, "%p - cm_id = %p\n", event->cm_node, cm_id);
 	nesqp = cm_id->provider_data;
+	if (!nesqp)
+		return;
 
 	nesqp->cm_id = NULL;
 	/* cm_id->provider_data = NULL; */
@@ -3401,8 +3490,8 @@
 	cm_event.private_data = NULL;
 	cm_event.private_data_len = 0;
 
-	ret = cm_id->event_handler(cm_id, &cm_event);
 	cm_id->add_ref(cm_id);
+	ret = cm_id->event_handler(cm_id, &cm_event);
 	atomic_inc(&cm_closes);
 	cm_event.event = IW_CM_EVENT_CLOSE;
 	cm_event.status = IW_CM_EVENT_STATUS_OK;
diff --git a/drivers/infiniband/hw/nes/nes_cm.h b/drivers/infiniband/hw/nes/nes_cm.h
index 90e8e4d..d9825fd 100644
--- a/drivers/infiniband/hw/nes/nes_cm.h
+++ b/drivers/infiniband/hw/nes/nes_cm.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 - 2009 Intel-NE, Inc.  All rights reserved.
+ * Copyright (c) 2006 - 2009 Intel Corporation.  All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -47,6 +47,8 @@
 #define IEFT_MPA_KEY_REP  "MPA ID Rep Frame"
 #define IETF_MPA_KEY_SIZE 16
 #define IETF_MPA_VERSION  1
+#define IETF_MAX_PRIV_DATA_LEN 512
+#define IETF_MPA_FRAME_SIZE     20
 
 enum ietf_mpa_flags {
 	IETF_MPA_FLAGS_MARKERS = 0x80,	/* receive Markers */
@@ -169,7 +171,7 @@
 
 #define NES_CM_DEF_SEQ2      0x18ed5740
 #define NES_CM_DEF_LOCAL_ID2 0xb807
-#define	MAX_CM_BUFFER	512
+#define	MAX_CM_BUFFER	(IETF_MPA_FRAME_SIZE + IETF_MAX_PRIV_DATA_LEN)
 
 
 typedef u32 nes_addr_t;
@@ -198,6 +200,7 @@
 	NES_CM_STATE_TIME_WAIT,
 	NES_CM_STATE_LAST_ACK,
 	NES_CM_STATE_CLOSING,
+	NES_CM_STATE_LISTENER_DESTROYED,
 	NES_CM_STATE_CLOSED
 };
 
diff --git a/drivers/infiniband/hw/nes/nes_context.h b/drivers/infiniband/hw/nes/nes_context.h
index 0fb8d81..b4393a1 100644
--- a/drivers/infiniband/hw/nes/nes_context.h
+++ b/drivers/infiniband/hw/nes/nes_context.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 - 2009 Intel-NE, Inc.  All rights reserved.
+ * Copyright (c) 2006 - 2009 Intel Corporation.  All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c
index 3512d6d..b1c2cbb 100644
--- a/drivers/infiniband/hw/nes/nes_hw.c
+++ b/drivers/infiniband/hw/nes/nes_hw.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 - 2009 Intel-NE, Inc.  All rights reserved.
+ * Copyright (c) 2006 - 2009 Intel Corporation.  All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -424,8 +424,9 @@
 
 	nesadapter->base_pd = 1;
 
-	nesadapter->device_cap_flags =
-		IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_WINDOW;
+	nesadapter->device_cap_flags = IB_DEVICE_LOCAL_DMA_LKEY |
+				       IB_DEVICE_MEM_WINDOW |
+				       IB_DEVICE_MEM_MGT_EXTENSIONS;
 
 	nesadapter->allocated_qps = (unsigned long *)&(((unsigned char *)nesadapter)
 			[(sizeof(struct nes_adapter)+(sizeof(unsigned long)-1))&(~(sizeof(unsigned long)-1))]);
@@ -436,11 +437,12 @@
 	nesadapter->qp_table = (struct nes_qp **)(&nesadapter->allocated_arps[BITS_TO_LONGS(arp_table_size)]);
 
 
-	/* mark the usual suspect QPs and CQs as in use */
+	/* mark the usual suspect QPs, MR and CQs as in use */
 	for (u32temp = 0; u32temp < NES_FIRST_QPN; u32temp++) {
 		set_bit(u32temp, nesadapter->allocated_qps);
 		set_bit(u32temp, nesadapter->allocated_cqs);
 	}
+	set_bit(0, nesadapter->allocated_mrs);
 
 	for (u32temp = 0; u32temp < 20; u32temp++)
 		set_bit(u32temp, nesadapter->allocated_pds);
@@ -481,7 +483,7 @@
 	nesadapter->max_irrq_wr = (u32temp >> 16) & 3;
 
 	nesadapter->max_sge = 4;
-	nesadapter->max_cqe = 32767;
+	nesadapter->max_cqe = 32766;
 
 	if (nes_read_eeprom_values(nesdev, nesadapter)) {
 		printk(KERN_ERR PFX "Unable to read EEPROM data.\n");
@@ -1355,6 +1357,8 @@
 	}
 	if ((phy_type == NES_PHY_TYPE_ARGUS) ||
 	    (phy_type == NES_PHY_TYPE_SFP_D)) {
+		u32 first_time = 1;
+
 		/* Check firmware heartbeat */
 		nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7ee);
 		temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
@@ -1362,8 +1366,13 @@
 		nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7ee);
 		temp_phy_data2 = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
 
-		if (temp_phy_data != temp_phy_data2)
-			return 0;
+		if (temp_phy_data != temp_phy_data2) {
+			nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7fd);
+			temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
+			if ((temp_phy_data & 0xff) > 0x20)
+				return 0;
+			printk(PFX "Reinitializing PHY\n");
+		}
 
 		/* no heartbeat, configure the PHY */
 		nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0x0000, 0x8000);
@@ -1399,7 +1408,7 @@
 		temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
 		do {
 			if (counter++ > 150) {
-				nes_debug(NES_DBG_PHY, "No PHY heartbeat\n");
+				printk(PFX "No PHY heartbeat\n");
 				break;
 			}
 			mdelay(1);
@@ -1413,11 +1422,20 @@
 			nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7fd);
 			temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
 			if (counter++ > 300) {
-				nes_debug(NES_DBG_PHY, "PHY did not track\n");
-				break;
+				if (((temp_phy_data & 0xff) == 0x0) && first_time) {
+					first_time = 0;
+					counter = 0;
+					/* reset AMCC PHY and try again */
+					nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0xe854, 0x00c0);
+					nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0xe854, 0x0040);
+					continue;
+				} else {
+					printk(PFX "PHY did not track\n");
+					break;
+				}
 			}
 			mdelay(10);
-		} while (((temp_phy_data & 0xff) != 0x50) && ((temp_phy_data & 0xff) != 0x70));
+		} while ((temp_phy_data & 0xff) < 0x30);
 
 		/* setup signal integrity */
 		nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd003, 0x0000);
diff --git a/drivers/infiniband/hw/nes/nes_hw.h b/drivers/infiniband/hw/nes/nes_hw.h
index f28a41b..084be0e 100644
--- a/drivers/infiniband/hw/nes/nes_hw.h
+++ b/drivers/infiniband/hw/nes/nes_hw.h
@@ -1,5 +1,5 @@
 /*
-* Copyright (c) 2006 - 2009 Intel-NE, Inc.  All rights reserved.
+* Copyright (c) 2006 - 2009 Intel Corporation.  All rights reserved.
 *
 * This software is available to you under a choice of one of two
 * licenses.  You may choose to be licensed under the terms of the GNU
@@ -546,11 +546,23 @@
 	NES_IWARP_SQ_FMR_WQE_PBL_LENGTH_IDX = 14,
 };
 
+enum nes_iwarp_sq_fmr_opcodes {
+	NES_IWARP_SQ_FMR_WQE_ZERO_BASED			= (1<<6),
+	NES_IWARP_SQ_FMR_WQE_PAGE_SIZE_4K		= (0<<7),
+	NES_IWARP_SQ_FMR_WQE_PAGE_SIZE_2M		= (1<<7),
+	NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_LOCAL_READ	= (1<<16),
+	NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_LOCAL_WRITE 	= (1<<17),
+	NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_REMOTE_READ 	= (1<<18),
+	NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_REMOTE_WRITE = (1<<19),
+	NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_WINDOW_BIND 	= (1<<20),
+};
+
+#define NES_IWARP_SQ_FMR_WQE_MR_LENGTH_HIGH_MASK	0xFF;
+
 enum nes_iwarp_sq_locinv_wqe_word_idx {
 	NES_IWARP_SQ_LOCINV_WQE_INV_STAG_IDX = 6,
 };
 
-
 enum nes_iwarp_rq_wqe_word_idx {
 	NES_IWARP_RQ_WQE_TOTAL_PAYLOAD_IDX = 1,
 	NES_IWARP_RQ_WQE_COMP_CTX_LOW_IDX = 2,
@@ -1153,6 +1165,19 @@
 	/* TODO: need to add list for two level tables */
 };
 
+#define NES_4K_PBL_CHUNK_SIZE	4096
+
+struct nes_fast_mr_wqe_pbl {
+	u64		*kva;
+	dma_addr_t	paddr;
+};
+
+struct nes_ib_fast_reg_page_list {
+	struct ib_fast_reg_page_list	ibfrpl;
+	struct nes_fast_mr_wqe_pbl 	nes_wqe_pbl;
+	u64 				pbl;
+};
+
 struct nes_listener {
 	struct work_struct      work;
 	struct workqueue_struct *wq;
diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c
index e593af3..5a7b554 100644
--- a/drivers/infiniband/hw/nes/nes_nic.c
+++ b/drivers/infiniband/hw/nes/nes_nic.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 - 2009 Intel-NE, Inc.  All rights reserved.
+ * Copyright (c) 2006 - 2009 Intel Corporation.  All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
diff --git a/drivers/infiniband/hw/nes/nes_user.h b/drivers/infiniband/hw/nes/nes_user.h
index cc90c14..71e133a 100644
--- a/drivers/infiniband/hw/nes/nes_user.h
+++ b/drivers/infiniband/hw/nes/nes_user.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 - 2009 Intel-NE, Inc.  All rights reserved.
+ * Copyright (c) 2006 - 2009 Intel Corporation.  All rights reserved.
  * Copyright (c) 2005 Topspin Communications.  All rights reserved.
  * Copyright (c) 2005 Cisco Systems.  All rights reserved.
  * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
@@ -86,6 +86,7 @@
 	IWNES_MEMREG_TYPE_CQ = 0x0002,
 	IWNES_MEMREG_TYPE_MW = 0x0003,
 	IWNES_MEMREG_TYPE_FMR = 0x0004,
+	IWNES_MEMREG_TYPE_FMEM = 0x0005,
 };
 
 struct nes_mem_reg_req {
diff --git a/drivers/infiniband/hw/nes/nes_utils.c b/drivers/infiniband/hw/nes/nes_utils.c
index 9687c39..729d525 100644
--- a/drivers/infiniband/hw/nes/nes_utils.c
+++ b/drivers/infiniband/hw/nes/nes_utils.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 - 2009 Intel-NE, Inc.  All rights reserved.
+ * Copyright (c) 2006 - 2009 Intel Corporation.  All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c
index a680c42..64d3136 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ b/drivers/infiniband/hw/nes/nes_verbs.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 - 2009 Intel-NE, Inc.  All rights reserved.
+ * Copyright (c) 2006 - 2009 Intel Corporation.  All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -275,342 +275,236 @@
 }
 
 
-/**
- * nes_alloc_fmr
+/*
+ * nes_alloc_fast_mr
  */
-static struct ib_fmr *nes_alloc_fmr(struct ib_pd *ibpd,
-		int ibmr_access_flags,
-		struct ib_fmr_attr *ibfmr_attr)
+static int alloc_fast_reg_mr(struct nes_device *nesdev, struct nes_pd *nespd,
+			     u32 stag, u32 page_count)
 {
-	unsigned long flags;
-	struct nes_pd *nespd = to_nespd(ibpd);
-	struct nes_vnic *nesvnic = to_nesvnic(ibpd->device);
-	struct nes_device *nesdev = nesvnic->nesdev;
-	struct nes_adapter *nesadapter = nesdev->nesadapter;
-	struct nes_fmr *nesfmr;
-	struct nes_cqp_request *cqp_request;
 	struct nes_hw_cqp_wqe *cqp_wqe;
+	struct nes_cqp_request *cqp_request;
+	unsigned long flags;
 	int ret;
-	u32 stag;
-	u32 stag_index = 0;
-	u32 next_stag_index = 0;
-	u32 driver_key = 0;
+	struct nes_adapter *nesadapter = nesdev->nesadapter;
 	u32 opcode = 0;
-	u8 stag_key = 0;
-	int i=0;
-	struct nes_vpbl vpbl;
+	u16 major_code;
+	u64 region_length = page_count * PAGE_SIZE;
 
-	get_random_bytes(&next_stag_index, sizeof(next_stag_index));
-	stag_key = (u8)next_stag_index;
 
-	driver_key = 0;
-
-	next_stag_index >>= 8;
-	next_stag_index %= nesadapter->max_mr;
-
-	ret = nes_alloc_resource(nesadapter, nesadapter->allocated_mrs,
-			nesadapter->max_mr, &stag_index, &next_stag_index);
-	if (ret) {
-		goto failed_resource_alloc;
-	}
-
-	nesfmr = kzalloc(sizeof(*nesfmr), GFP_KERNEL);
-	if (!nesfmr) {
-		ret = -ENOMEM;
-		goto failed_fmr_alloc;
-	}
-
-	nesfmr->nesmr.mode = IWNES_MEMREG_TYPE_FMR;
-	if (ibfmr_attr->max_pages == 1) {
-		/* use zero length PBL */
-		nesfmr->nesmr.pbl_4k = 0;
-		nesfmr->nesmr.pbls_used = 0;
-	} else if (ibfmr_attr->max_pages <= 32) {
-		/* use PBL 256 */
-		nesfmr->nesmr.pbl_4k = 0;
-		nesfmr->nesmr.pbls_used = 1;
-	} else if (ibfmr_attr->max_pages <= 512) {
-		/* use 4K PBLs */
-		nesfmr->nesmr.pbl_4k = 1;
-		nesfmr->nesmr.pbls_used = 1;
-	} else {
-		/* use two level 4K PBLs */
-		/* add support for two level 256B PBLs */
-		nesfmr->nesmr.pbl_4k = 1;
-		nesfmr->nesmr.pbls_used = 1 + (ibfmr_attr->max_pages >> 9) +
-				((ibfmr_attr->max_pages & 511) ? 1 : 0);
-	}
-	/* Register the region with the adapter */
-	spin_lock_irqsave(&nesadapter->pbl_lock, flags);
-
-	/* track PBL resources */
-	if (nesfmr->nesmr.pbls_used != 0) {
-		if (nesfmr->nesmr.pbl_4k) {
-			if (nesfmr->nesmr.pbls_used > nesadapter->free_4kpbl) {
-				spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
-				ret = -ENOMEM;
-				goto failed_vpbl_avail;
-			} else {
-				nesadapter->free_4kpbl -= nesfmr->nesmr.pbls_used;
-			}
-		} else {
-			if (nesfmr->nesmr.pbls_used > nesadapter->free_256pbl) {
-				spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
-				ret = -ENOMEM;
-				goto failed_vpbl_avail;
-			} else {
-				nesadapter->free_256pbl -= nesfmr->nesmr.pbls_used;
-			}
-		}
-	}
-
-	/* one level pbl */
-	if (nesfmr->nesmr.pbls_used == 0) {
-		nesfmr->root_vpbl.pbl_vbase = NULL;
-		nes_debug(NES_DBG_MR,  "zero level pbl \n");
-	} else if (nesfmr->nesmr.pbls_used == 1) {
-		/* can change it to kmalloc & dma_map_single */
-		nesfmr->root_vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 4096,
-				&nesfmr->root_vpbl.pbl_pbase);
-		if (!nesfmr->root_vpbl.pbl_vbase) {
-			spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
-			ret = -ENOMEM;
-			goto failed_vpbl_alloc;
-		}
-		nesfmr->leaf_pbl_cnt = 0;
-		nes_debug(NES_DBG_MR, "one level pbl, root_vpbl.pbl_vbase=%p \n",
-				nesfmr->root_vpbl.pbl_vbase);
-	}
-	/* two level pbl */
-	else {
-		nesfmr->root_vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 8192,
-				&nesfmr->root_vpbl.pbl_pbase);
-		if (!nesfmr->root_vpbl.pbl_vbase) {
-			spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
-			ret = -ENOMEM;
-			goto failed_vpbl_alloc;
-		}
-
-		nesfmr->leaf_pbl_cnt = nesfmr->nesmr.pbls_used-1;
-		nesfmr->root_vpbl.leaf_vpbl = kzalloc(sizeof(*nesfmr->root_vpbl.leaf_vpbl)*1024, GFP_ATOMIC);
-		if (!nesfmr->root_vpbl.leaf_vpbl) {
-			spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
-			ret = -ENOMEM;
-			goto failed_leaf_vpbl_alloc;
-		}
-
-		nes_debug(NES_DBG_MR, "two level pbl, root_vpbl.pbl_vbase=%p"
-				" leaf_pbl_cnt=%d root_vpbl.leaf_vpbl=%p\n",
-				nesfmr->root_vpbl.pbl_vbase, nesfmr->leaf_pbl_cnt, nesfmr->root_vpbl.leaf_vpbl);
-
-		for (i=0; i<nesfmr->leaf_pbl_cnt; i++)
-			nesfmr->root_vpbl.leaf_vpbl[i].pbl_vbase = NULL;
-
-		for (i=0; i<nesfmr->leaf_pbl_cnt; i++) {
-			vpbl.pbl_vbase = pci_alloc_consistent(nesdev->pcidev, 4096,
-					&vpbl.pbl_pbase);
-
-			if (!vpbl.pbl_vbase) {
-				ret = -ENOMEM;
-				spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
-				goto failed_leaf_vpbl_pages_alloc;
-			}
-
-			nesfmr->root_vpbl.pbl_vbase[i].pa_low = cpu_to_le32((u32)vpbl.pbl_pbase);
-			nesfmr->root_vpbl.pbl_vbase[i].pa_high = cpu_to_le32((u32)((((u64)vpbl.pbl_pbase)>>32)));
-			nesfmr->root_vpbl.leaf_vpbl[i] = vpbl;
-
-			nes_debug(NES_DBG_MR, "pbase_low=0x%x, pbase_high=0x%x, vpbl=%p\n",
-					nesfmr->root_vpbl.pbl_vbase[i].pa_low,
-					nesfmr->root_vpbl.pbl_vbase[i].pa_high,
-					&nesfmr->root_vpbl.leaf_vpbl[i]);
-		}
-	}
-	nesfmr->ib_qp = NULL;
-	nesfmr->access_rights =0;
-
-	stag = stag_index << 8;
-	stag |= driver_key;
-	stag += (u32)stag_key;
-
-	spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
 	cqp_request = nes_get_cqp_request(nesdev);
 	if (cqp_request == NULL) {
 		nes_debug(NES_DBG_MR, "Failed to get a cqp_request.\n");
-		ret = -ENOMEM;
-		goto failed_leaf_vpbl_pages_alloc;
+		return -ENOMEM;
 	}
+	nes_debug(NES_DBG_MR, "alloc_fast_reg_mr: page_count = %d, "
+			      "region_length = %llu\n",
+			      page_count, region_length);
 	cqp_request->waiting = 1;
 	cqp_wqe = &cqp_request->cqp_wqe;
 
-	nes_debug(NES_DBG_MR, "Registering STag 0x%08X, index = 0x%08X\n",
-			stag, stag_index);
-
-	opcode = NES_CQP_ALLOCATE_STAG | NES_CQP_STAG_VA_TO | NES_CQP_STAG_MR;
-
-	if (nesfmr->nesmr.pbl_4k == 1)
-		opcode |= NES_CQP_STAG_PBL_BLK_SIZE;
-
-	if (ibmr_access_flags & IB_ACCESS_REMOTE_WRITE) {
-		opcode |= NES_CQP_STAG_RIGHTS_REMOTE_WRITE |
-				NES_CQP_STAG_RIGHTS_LOCAL_WRITE | NES_CQP_STAG_REM_ACC_EN;
-		nesfmr->access_rights |=
-				NES_CQP_STAG_RIGHTS_REMOTE_WRITE | NES_CQP_STAG_RIGHTS_LOCAL_WRITE |
-				NES_CQP_STAG_REM_ACC_EN;
+	spin_lock_irqsave(&nesadapter->pbl_lock, flags);
+	if (nesadapter->free_4kpbl > 0) {
+		nesadapter->free_4kpbl--;
+		spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
+	} else {
+		/* No 4kpbl's available: */
+		spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
+		nes_debug(NES_DBG_MR, "Out of Pbls\n");
+		nes_free_cqp_request(nesdev, cqp_request);
+		return -ENOMEM;
 	}
 
-	if (ibmr_access_flags & IB_ACCESS_REMOTE_READ) {
-		opcode |= NES_CQP_STAG_RIGHTS_REMOTE_READ |
-				NES_CQP_STAG_RIGHTS_LOCAL_READ | NES_CQP_STAG_REM_ACC_EN;
-		nesfmr->access_rights |=
-				NES_CQP_STAG_RIGHTS_REMOTE_READ | NES_CQP_STAG_RIGHTS_LOCAL_READ |
-				NES_CQP_STAG_REM_ACC_EN;
-	}
+	opcode = NES_CQP_ALLOCATE_STAG | NES_CQP_STAG_MR |
+		 NES_CQP_STAG_PBL_BLK_SIZE | NES_CQP_STAG_VA_TO |
+		 NES_CQP_STAG_REM_ACC_EN;
+	/*
+	 * The current OFED API does not support the zero based TO option.
+	 * If added then need to changed the NES_CQP_STAG_VA* option.  Also,
+	 * the API does not support that ability to have the MR set for local
+	 * access only when created and not allow the SQ op to override. Given
+	 * this the remote enable must be set here.
+	 */
 
 	nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
 	set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX, opcode);
-	set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_LEN_HIGH_PD_IDX, (nespd->pd_id & 0x00007fff));
-	set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_STAG_IDX, stag);
+	set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_PBL_BLK_COUNT_IDX, 1);
 
-	cqp_wqe->wqe_words[NES_CQP_STAG_WQE_PBL_BLK_COUNT_IDX] =
-			cpu_to_le32((nesfmr->nesmr.pbls_used>1) ?
-			(nesfmr->nesmr.pbls_used-1) : nesfmr->nesmr.pbls_used);
+	cqp_wqe->wqe_words[NES_CQP_STAG_WQE_LEN_HIGH_PD_IDX] =
+			cpu_to_le32((u32)(region_length >> 8) & 0xff000000);
+	cqp_wqe->wqe_words[NES_CQP_STAG_WQE_LEN_HIGH_PD_IDX] |=
+			cpu_to_le32(nespd->pd_id & 0x00007fff);
+
+	set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_STAG_IDX, stag);
+	set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_VA_LOW_IDX, 0);
+	set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_LEN_LOW_IDX, 0);
+	set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_PA_LOW_IDX, 0);
+	set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_STAG_WQE_PBL_LEN_IDX, (page_count * 8));
+	cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] |= cpu_to_le32(NES_CQP_STAG_PBL_BLK_SIZE);
+	barrier();
 
 	atomic_set(&cqp_request->refcount, 2);
 	nes_post_cqp_request(nesdev, cqp_request);
 
 	/* Wait for CQP */
-	ret = wait_event_timeout(cqp_request->waitq, (cqp_request->request_done != 0),
-			NES_EVENT_TIMEOUT);
-	nes_debug(NES_DBG_MR, "Register STag 0x%08X completed, wait_event_timeout ret = %u,"
-			" CQP Major:Minor codes = 0x%04X:0x%04X.\n",
-			stag, ret, cqp_request->major_code, cqp_request->minor_code);
+	ret = wait_event_timeout(cqp_request->waitq,
+				 (0 != cqp_request->request_done),
+				 NES_EVENT_TIMEOUT);
 
-	if ((!ret) || (cqp_request->major_code)) {
-		nes_put_cqp_request(nesdev, cqp_request);
-		ret = (!ret) ? -ETIME : -EIO;
-		goto failed_leaf_vpbl_pages_alloc;
-	}
+	nes_debug(NES_DBG_MR, "Allocate STag 0x%08X completed, "
+		  "wait_event_timeout ret = %u, CQP Major:Minor codes = "
+		  "0x%04X:0x%04X.\n", stag, ret, cqp_request->major_code,
+		  cqp_request->minor_code);
+	major_code = cqp_request->major_code;
 	nes_put_cqp_request(nesdev, cqp_request);
-	nesfmr->nesmr.ibfmr.lkey = stag;
-	nesfmr->nesmr.ibfmr.rkey = stag;
-	nesfmr->attr = *ibfmr_attr;
 
-	return &nesfmr->nesmr.ibfmr;
-
-	failed_leaf_vpbl_pages_alloc:
-	/* unroll all allocated pages */
-	for (i=0; i<nesfmr->leaf_pbl_cnt; i++) {
-		if (nesfmr->root_vpbl.leaf_vpbl[i].pbl_vbase) {
-			pci_free_consistent(nesdev->pcidev, 4096, nesfmr->root_vpbl.leaf_vpbl[i].pbl_vbase,
-					nesfmr->root_vpbl.leaf_vpbl[i].pbl_pbase);
-		}
-	}
-	if (nesfmr->root_vpbl.leaf_vpbl)
-		kfree(nesfmr->root_vpbl.leaf_vpbl);
-
-	failed_leaf_vpbl_alloc:
-	if (nesfmr->leaf_pbl_cnt == 0) {
-		if (nesfmr->root_vpbl.pbl_vbase)
-			pci_free_consistent(nesdev->pcidev, 4096, nesfmr->root_vpbl.pbl_vbase,
-					nesfmr->root_vpbl.pbl_pbase);
-	} else
-		pci_free_consistent(nesdev->pcidev, 8192, nesfmr->root_vpbl.pbl_vbase,
-				nesfmr->root_vpbl.pbl_pbase);
-
-	failed_vpbl_alloc:
-	if (nesfmr->nesmr.pbls_used != 0) {
+	if (!ret || major_code) {
 		spin_lock_irqsave(&nesadapter->pbl_lock, flags);
-		if (nesfmr->nesmr.pbl_4k)
-			nesadapter->free_4kpbl += nesfmr->nesmr.pbls_used;
-		else
-			nesadapter->free_256pbl += nesfmr->nesmr.pbls_used;
+		nesadapter->free_4kpbl++;
 		spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
 	}
 
-failed_vpbl_avail:
-	kfree(nesfmr);
-
-	failed_fmr_alloc:
-	nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
-
-	failed_resource_alloc:
-	return ERR_PTR(ret);
+	if (!ret)
+		return -ETIME;
+	else if (major_code)
+		return -EIO;
+	return 0;
 }
 
-
-/**
- * nes_dealloc_fmr
+/*
+ * nes_alloc_fast_reg_mr
  */
-static int nes_dealloc_fmr(struct ib_fmr *ibfmr)
+struct ib_mr *nes_alloc_fast_reg_mr(struct ib_pd *ibpd, int max_page_list_len)
 {
-	unsigned long flags;
-	struct nes_mr *nesmr = to_nesmr_from_ibfmr(ibfmr);
-	struct nes_fmr *nesfmr = to_nesfmr(nesmr);
-	struct nes_vnic *nesvnic = to_nesvnic(ibfmr->device);
+	struct nes_pd *nespd = to_nespd(ibpd);
+	struct nes_vnic *nesvnic = to_nesvnic(ibpd->device);
 	struct nes_device *nesdev = nesvnic->nesdev;
 	struct nes_adapter *nesadapter = nesdev->nesadapter;
-	int i = 0;
-	int rc;
 
-	/* free the resources */
-	if (nesfmr->leaf_pbl_cnt == 0) {
-		/* single PBL case */
-		if (nesfmr->root_vpbl.pbl_vbase)
-			pci_free_consistent(nesdev->pcidev, 4096, nesfmr->root_vpbl.pbl_vbase,
-					nesfmr->root_vpbl.pbl_pbase);
+	u32 next_stag_index;
+	u8 stag_key = 0;
+	u32 driver_key = 0;
+	int err = 0;
+	u32 stag_index = 0;
+	struct nes_mr *nesmr;
+	u32 stag;
+	int ret;
+	struct ib_mr *ibmr;
+/*
+ * Note:  Set to always use a fixed length single page entry PBL.  This is to allow
+ *	 for the fast_reg_mr operation to always know the size of the PBL.
+ */
+	if (max_page_list_len > (NES_4K_PBL_CHUNK_SIZE / sizeof(u64)))
+		return ERR_PTR(-E2BIG);
+
+	get_random_bytes(&next_stag_index, sizeof(next_stag_index));
+	stag_key = (u8)next_stag_index;
+	next_stag_index >>= 8;
+	next_stag_index %= nesadapter->max_mr;
+
+	err = nes_alloc_resource(nesadapter, nesadapter->allocated_mrs,
+				 nesadapter->max_mr, &stag_index,
+				 &next_stag_index);
+	if (err)
+		return ERR_PTR(err);
+
+	nesmr = kzalloc(sizeof(*nesmr), GFP_KERNEL);
+	if (!nesmr) {
+		nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	stag = stag_index << 8;
+	stag |= driver_key;
+	stag += (u32)stag_key;
+
+	nes_debug(NES_DBG_MR, "Allocating STag 0x%08X index = 0x%08X\n",
+		  stag, stag_index);
+
+	ret = alloc_fast_reg_mr(nesdev, nespd, stag, max_page_list_len);
+
+	if (ret == 0) {
+		nesmr->ibmr.rkey = stag;
+		nesmr->ibmr.lkey = stag;
+		nesmr->mode = IWNES_MEMREG_TYPE_FMEM;
+		ibmr = &nesmr->ibmr;
 	} else {
-		for (i = 0; i < nesfmr->leaf_pbl_cnt; i++) {
-			pci_free_consistent(nesdev->pcidev, 4096, nesfmr->root_vpbl.leaf_vpbl[i].pbl_vbase,
-					nesfmr->root_vpbl.leaf_vpbl[i].pbl_pbase);
-		}
-		kfree(nesfmr->root_vpbl.leaf_vpbl);
-		pci_free_consistent(nesdev->pcidev, 8192, nesfmr->root_vpbl.pbl_vbase,
-				nesfmr->root_vpbl.pbl_pbase);
+		kfree(nesmr);
+		nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
+		ibmr = ERR_PTR(-ENOMEM);
 	}
-	nesmr->ibmw.device = ibfmr->device;
-	nesmr->ibmw.pd = ibfmr->pd;
-	nesmr->ibmw.rkey = ibfmr->rkey;
-	nesmr->ibmw.uobject = NULL;
+	return ibmr;
+}
 
-	rc = nes_dealloc_mw(&nesmr->ibmw);
+/*
+ * nes_alloc_fast_reg_page_list
+ */
+static struct ib_fast_reg_page_list *nes_alloc_fast_reg_page_list(
+							struct ib_device *ibdev,
+							int page_list_len)
+{
+	struct nes_vnic *nesvnic = to_nesvnic(ibdev);
+	struct nes_device *nesdev = nesvnic->nesdev;
+	struct ib_fast_reg_page_list *pifrpl;
+	struct nes_ib_fast_reg_page_list *pnesfrpl;
 
-	if ((rc == 0) && (nesfmr->nesmr.pbls_used != 0)) {
-		spin_lock_irqsave(&nesadapter->pbl_lock, flags);
-		if (nesfmr->nesmr.pbl_4k) {
-			nesadapter->free_4kpbl += nesfmr->nesmr.pbls_used;
-			WARN_ON(nesadapter->free_4kpbl > nesadapter->max_4kpbl);
-		} else {
-			nesadapter->free_256pbl += nesfmr->nesmr.pbls_used;
-			WARN_ON(nesadapter->free_256pbl > nesadapter->max_256pbl);
-		}
-		spin_unlock_irqrestore(&nesadapter->pbl_lock, flags);
+	if (page_list_len > (NES_4K_PBL_CHUNK_SIZE / sizeof(u64)))
+		return ERR_PTR(-E2BIG);
+	/*
+	 * Allocate the ib_fast_reg_page_list structure, the
+	 * nes_fast_bpl structure, and the PLB table.
+	 */
+	pnesfrpl = kmalloc(sizeof(struct nes_ib_fast_reg_page_list) +
+			   page_list_len * sizeof(u64), GFP_KERNEL);
+
+	if (!pnesfrpl)
+		return ERR_PTR(-ENOMEM);
+
+	pifrpl = &pnesfrpl->ibfrpl;
+	pifrpl->page_list = &pnesfrpl->pbl;
+	pifrpl->max_page_list_len = page_list_len;
+	/*
+	 * Allocate the WQE PBL
+	 */
+	pnesfrpl->nes_wqe_pbl.kva = pci_alloc_consistent(nesdev->pcidev,
+							 page_list_len * sizeof(u64),
+							 &pnesfrpl->nes_wqe_pbl.paddr);
+
+	if (!pnesfrpl->nes_wqe_pbl.kva) {
+		kfree(pnesfrpl);
+		return ERR_PTR(-ENOMEM);
 	}
+	nes_debug(NES_DBG_MR, "nes_alloc_fast_reg_pbl: nes_frpl = %p, "
+		  "ibfrpl = %p, ibfrpl.page_list = %p, pbl.kva = %p, "
+		  "pbl.paddr= %p\n", pnesfrpl, &pnesfrpl->ibfrpl,
+		  pnesfrpl->ibfrpl.page_list, pnesfrpl->nes_wqe_pbl.kva,
+		  (void *)pnesfrpl->nes_wqe_pbl.paddr);
 
-	return rc;
+	return pifrpl;
 }
 
-
-/**
- * nes_map_phys_fmr
+/*
+ * nes_free_fast_reg_page_list
  */
-static int nes_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
-		int list_len, u64 iova)
+static void nes_free_fast_reg_page_list(struct ib_fast_reg_page_list *pifrpl)
 {
-	return 0;
+	struct nes_vnic *nesvnic = to_nesvnic(pifrpl->device);
+	struct nes_device *nesdev = nesvnic->nesdev;
+	struct nes_ib_fast_reg_page_list *pnesfrpl;
+
+	pnesfrpl = container_of(pifrpl, struct nes_ib_fast_reg_page_list, ibfrpl);
+	/*
+	 * Free the WQE PBL.
+	 */
+	pci_free_consistent(nesdev->pcidev,
+			    pifrpl->max_page_list_len * sizeof(u64),
+			    pnesfrpl->nes_wqe_pbl.kva,
+			    pnesfrpl->nes_wqe_pbl.paddr);
+	/*
+	 * Free the PBL structure
+	 */
+	kfree(pnesfrpl);
 }
 
-
-/**
- * nes_unmap_frm
- */
-static int nes_unmap_fmr(struct list_head *ibfmr_list)
-{
-	return 0;
-}
-
-
-
 /**
  * nes_query_device
  */
@@ -633,23 +527,23 @@
 	props->max_qp_wr = nesdev->nesadapter->max_qp_wr - 2;
 	props->max_sge = nesdev->nesadapter->max_sge;
 	props->max_cq = nesibdev->max_cq;
-	props->max_cqe = nesdev->nesadapter->max_cqe - 1;
+	props->max_cqe = nesdev->nesadapter->max_cqe;
 	props->max_mr = nesibdev->max_mr;
 	props->max_mw = nesibdev->max_mr;
 	props->max_pd = nesibdev->max_pd;
 	props->max_sge_rd = 1;
 	switch (nesdev->nesadapter->max_irrq_wr) {
 		case 0:
-			props->max_qp_rd_atom = 1;
+			props->max_qp_rd_atom = 2;
 			break;
 		case 1:
-			props->max_qp_rd_atom = 4;
+			props->max_qp_rd_atom = 8;
 			break;
 		case 2:
-			props->max_qp_rd_atom = 16;
+			props->max_qp_rd_atom = 32;
 			break;
 		case 3:
-			props->max_qp_rd_atom = 32;
+			props->max_qp_rd_atom = 64;
 			break;
 		default:
 			props->max_qp_rd_atom = 0;
@@ -1121,6 +1015,7 @@
 		kunmap(nesqp->page);
 		return -ENOMEM;
 	}
+	nesqp->sq_kmapped = 1;
 	nesqp->hwqp.q2_vbase = mem;
 	mem += 256;
 	memset(nesqp->hwqp.q2_vbase, 0, 256);
@@ -1198,7 +1093,10 @@
 		pci_free_consistent(nesdev->pcidev, nesqp->qp_mem_size, nesqp->hwqp.q2_vbase, nesqp->hwqp.q2_pbase);
 		pci_free_consistent(nesdev->pcidev, 256, nesqp->pbl_vbase, nesqp->pbl_pbase );
 		nesqp->pbl_vbase = NULL;
-		kunmap(nesqp->page);
+		if (nesqp->sq_kmapped) {
+			nesqp->sq_kmapped = 0;
+			kunmap(nesqp->page);
+		}
 	}
 }
 
@@ -1504,8 +1402,6 @@
 			nes_debug(NES_DBG_QP, "QP%u structure located @%p.Size = %u.\n",
 					nesqp->hwqp.qp_id, nesqp, (u32)sizeof(*nesqp));
 			spin_lock_init(&nesqp->lock);
-			init_waitqueue_head(&nesqp->state_waitq);
-			init_waitqueue_head(&nesqp->kick_waitq);
 			nes_add_ref(&nesqp->ibqp);
 			break;
 		default:
@@ -1513,6 +1409,8 @@
 			return ERR_PTR(-EINVAL);
 	}
 
+	nesqp->sig_all = (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR);
+
 	/* update the QP table */
 	nesdev->nesadapter->qp_table[nesqp->hwqp.qp_id-NES_FIRST_QPN] = nesqp;
 	nes_debug(NES_DBG_QP, "netdev refcnt=%u\n",
@@ -1607,8 +1505,10 @@
 				nes_ucontext->first_free_wq = nesqp->mmap_sq_db_index;
 			}
 		}
-		if (nesqp->pbl_pbase)
+		if (nesqp->pbl_pbase && nesqp->sq_kmapped) {
+			nesqp->sq_kmapped = 0;
 			kunmap(nesqp->page);
+		}
 	} else {
 		/* Clean any pending completions from the cq(s) */
 		if (nesqp->nesscq)
@@ -1649,6 +1549,9 @@
 	unsigned long flags;
 	int ret;
 
+	if (entries > nesadapter->max_cqe)
+		return ERR_PTR(-EINVAL);
+
 	err = nes_alloc_resource(nesadapter, nesadapter->allocated_cqs,
 			nesadapter->max_cq, &cq_num, &nesadapter->next_cq);
 	if (err) {
@@ -2606,9 +2509,6 @@
 			stag = stag_index << 8;
 			stag |= driver_key;
 			stag += (u32)stag_key;
-			if (stag == 0) {
-				stag = 1;
-			}
 
 			iova_start = virt;
 			/* Make the leaf PBL the root if only one PBL */
@@ -3109,7 +3009,6 @@
 								" already done based on hw state.\n",
 								nesqp->hwqp.qp_id);
 						issue_modify_qp = 0;
-						nesqp->in_disconnect = 0;
 					}
 					switch (nesqp->hw_iwarp_state) {
 						case NES_AEQE_IWARP_STATE_CLOSING:
@@ -3122,7 +3021,6 @@
 							break;
 						default:
 							next_iwarp_state = NES_CQP_QP_IWARP_STATE_CLOSING;
-							nesqp->in_disconnect = 1;
 							nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_CLOSING;
 							break;
 					}
@@ -3139,7 +3037,6 @@
 				next_iwarp_state = NES_CQP_QP_IWARP_STATE_TERMINATE;
 				nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_TERMINATE;
 				issue_modify_qp = 1;
-				nesqp->in_disconnect = 1;
 				break;
 			case IB_QPS_ERR:
 			case IB_QPS_RESET:
@@ -3162,7 +3059,6 @@
 				if ((nesqp->hw_tcp_state > NES_AEQE_TCP_STATE_CLOSED) &&
 						(nesqp->hw_tcp_state != NES_AEQE_TCP_STATE_TIME_WAIT)) {
 					next_iwarp_state |= NES_CQP_QP_RESET;
-					nesqp->in_disconnect = 1;
 				} else {
 					nes_debug(NES_DBG_MOD_QP, "QP%u NOT setting NES_CQP_QP_RESET since TCP state = %u\n",
 							nesqp->hwqp.qp_id, nesqp->hw_tcp_state);
@@ -3373,21 +3269,17 @@
 	struct nes_device *nesdev = nesvnic->nesdev;
 	struct nes_qp *nesqp = to_nesqp(ibqp);
 	struct nes_hw_qp_wqe *wqe;
-	int err;
+	int err = 0;
 	u32 qsize = nesqp->hwqp.sq_size;
 	u32 head;
-	u32 wqe_misc;
-	u32 wqe_count;
+	u32 wqe_misc = 0;
+	u32 wqe_count = 0;
 	u32 counter;
-	u32 total_payload_length;
 
-	err = 0;
-	wqe_misc = 0;
-	wqe_count = 0;
-	total_payload_length = 0;
-
-	if (nesqp->ibqp_state > IB_QPS_RTS)
-		return -EINVAL;
+	if (nesqp->ibqp_state > IB_QPS_RTS) {
+		err = -EINVAL;
+		goto out;
+	}
 
 	spin_lock_irqsave(&nesqp->lock, flags);
 
@@ -3413,94 +3305,208 @@
 		u64temp = (u64)(ib_wr->wr_id);
 		set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_COMP_SCRATCH_LOW_IDX,
 					u64temp);
-			switch (ib_wr->opcode) {
-				case IB_WR_SEND:
-					if (ib_wr->send_flags & IB_SEND_SOLICITED) {
-						wqe_misc = NES_IWARP_SQ_OP_SENDSE;
-					} else {
-						wqe_misc = NES_IWARP_SQ_OP_SEND;
-					}
-					if (ib_wr->num_sge > nesdev->nesadapter->max_sge) {
-						err = -EINVAL;
-						break;
-					}
-					if (ib_wr->send_flags & IB_SEND_FENCE) {
-						wqe_misc |= NES_IWARP_SQ_WQE_LOCAL_FENCE;
-					}
-					if ((ib_wr->send_flags & IB_SEND_INLINE) &&
-							((nes_drv_opt & NES_DRV_OPT_NO_INLINE_DATA) == 0) &&
-							(ib_wr->sg_list[0].length <= 64)) {
-						memcpy(&wqe->wqe_words[NES_IWARP_SQ_WQE_IMM_DATA_START_IDX],
-							       (void *)(unsigned long)ib_wr->sg_list[0].addr, ib_wr->sg_list[0].length);
-						set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX,
-								ib_wr->sg_list[0].length);
-						wqe_misc |= NES_IWARP_SQ_WQE_IMM_DATA;
-					} else {
-						fill_wqe_sg_send(wqe, ib_wr, 1);
-					}
+		switch (ib_wr->opcode) {
+		case IB_WR_SEND:
+		case IB_WR_SEND_WITH_INV:
+			if (IB_WR_SEND == ib_wr->opcode) {
+				if (ib_wr->send_flags & IB_SEND_SOLICITED)
+					wqe_misc = NES_IWARP_SQ_OP_SENDSE;
+				else
+					wqe_misc = NES_IWARP_SQ_OP_SEND;
+			} else {
+				if (ib_wr->send_flags & IB_SEND_SOLICITED)
+					wqe_misc = NES_IWARP_SQ_OP_SENDSEINV;
+				else
+					wqe_misc = NES_IWARP_SQ_OP_SENDINV;
 
-					break;
-				case IB_WR_RDMA_WRITE:
-					wqe_misc = NES_IWARP_SQ_OP_RDMAW;
-					if (ib_wr->num_sge > nesdev->nesadapter->max_sge) {
-						nes_debug(NES_DBG_IW_TX, "Exceeded max sge, ib_wr=%u, max=%u\n",
-								ib_wr->num_sge,
-								nesdev->nesadapter->max_sge);
-						err = -EINVAL;
-						break;
-					}
-					if (ib_wr->send_flags & IB_SEND_FENCE) {
-						wqe_misc |= NES_IWARP_SQ_WQE_LOCAL_FENCE;
-					}
-
-					set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_STAG_IDX,
-							ib_wr->wr.rdma.rkey);
-					set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_TO_LOW_IDX,
-							ib_wr->wr.rdma.remote_addr);
-
-					if ((ib_wr->send_flags & IB_SEND_INLINE) &&
-							((nes_drv_opt & NES_DRV_OPT_NO_INLINE_DATA) == 0) &&
-							(ib_wr->sg_list[0].length <= 64)) {
-						memcpy(&wqe->wqe_words[NES_IWARP_SQ_WQE_IMM_DATA_START_IDX],
-							       (void *)(unsigned long)ib_wr->sg_list[0].addr, ib_wr->sg_list[0].length);
-						set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX,
-								ib_wr->sg_list[0].length);
-						wqe_misc |= NES_IWARP_SQ_WQE_IMM_DATA;
-					} else {
-						fill_wqe_sg_send(wqe, ib_wr, 1);
-					}
-					wqe->wqe_words[NES_IWARP_SQ_WQE_RDMA_LENGTH_IDX] =
-							wqe->wqe_words[NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX];
-					break;
-				case IB_WR_RDMA_READ:
-					/* iWARP only supports 1 sge for RDMA reads */
-					if (ib_wr->num_sge > 1) {
-						nes_debug(NES_DBG_IW_TX, "Exceeded max sge, ib_wr=%u, max=1\n",
-								ib_wr->num_sge);
-						err = -EINVAL;
-						break;
-					}
-					wqe_misc = NES_IWARP_SQ_OP_RDMAR;
-					set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_TO_LOW_IDX,
-							ib_wr->wr.rdma.remote_addr);
-					set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_STAG_IDX,
-							ib_wr->wr.rdma.rkey);
-					set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_LENGTH_IDX,
-							ib_wr->sg_list->length);
-					set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_FRAG0_LOW_IDX,
-							ib_wr->sg_list->addr);
-					set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_STAG0_IDX,
-							ib_wr->sg_list->lkey);
-					break;
-				default:
-					/* error */
-					err = -EINVAL;
-					break;
+				set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_INV_STAG_LOW_IDX,
+						    ib_wr->ex.invalidate_rkey);
 			}
 
-		if (ib_wr->send_flags & IB_SEND_SIGNALED) {
-			wqe_misc |= NES_IWARP_SQ_WQE_SIGNALED_COMPL;
+			if (ib_wr->num_sge > nesdev->nesadapter->max_sge) {
+				err = -EINVAL;
+				break;
+			}
+
+			if (ib_wr->send_flags & IB_SEND_FENCE)
+				wqe_misc |= NES_IWARP_SQ_WQE_LOCAL_FENCE;
+
+			if ((ib_wr->send_flags & IB_SEND_INLINE) &&
+			    ((nes_drv_opt & NES_DRV_OPT_NO_INLINE_DATA) == 0) &&
+			     (ib_wr->sg_list[0].length <= 64)) {
+				memcpy(&wqe->wqe_words[NES_IWARP_SQ_WQE_IMM_DATA_START_IDX],
+				       (void *)(unsigned long)ib_wr->sg_list[0].addr, ib_wr->sg_list[0].length);
+				set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX,
+						    ib_wr->sg_list[0].length);
+				wqe_misc |= NES_IWARP_SQ_WQE_IMM_DATA;
+			} else {
+				fill_wqe_sg_send(wqe, ib_wr, 1);
+			}
+
+			break;
+		case IB_WR_RDMA_WRITE:
+			wqe_misc = NES_IWARP_SQ_OP_RDMAW;
+			if (ib_wr->num_sge > nesdev->nesadapter->max_sge) {
+				nes_debug(NES_DBG_IW_TX, "Exceeded max sge, ib_wr=%u, max=%u\n",
+					  ib_wr->num_sge, nesdev->nesadapter->max_sge);
+				err = -EINVAL;
+				break;
+			}
+
+			if (ib_wr->send_flags & IB_SEND_FENCE)
+				wqe_misc |= NES_IWARP_SQ_WQE_LOCAL_FENCE;
+
+			set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_STAG_IDX,
+					    ib_wr->wr.rdma.rkey);
+			set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_TO_LOW_IDX,
+					    ib_wr->wr.rdma.remote_addr);
+
+			if ((ib_wr->send_flags & IB_SEND_INLINE) &&
+			    ((nes_drv_opt & NES_DRV_OPT_NO_INLINE_DATA) == 0) &&
+			     (ib_wr->sg_list[0].length <= 64)) {
+				memcpy(&wqe->wqe_words[NES_IWARP_SQ_WQE_IMM_DATA_START_IDX],
+				       (void *)(unsigned long)ib_wr->sg_list[0].addr, ib_wr->sg_list[0].length);
+				set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX,
+						    ib_wr->sg_list[0].length);
+				wqe_misc |= NES_IWARP_SQ_WQE_IMM_DATA;
+			} else {
+				fill_wqe_sg_send(wqe, ib_wr, 1);
+			}
+
+			wqe->wqe_words[NES_IWARP_SQ_WQE_RDMA_LENGTH_IDX] =
+				wqe->wqe_words[NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX];
+			break;
+		case IB_WR_RDMA_READ:
+		case IB_WR_RDMA_READ_WITH_INV:
+			/* iWARP only supports 1 sge for RDMA reads */
+			if (ib_wr->num_sge > 1) {
+				nes_debug(NES_DBG_IW_TX, "Exceeded max sge, ib_wr=%u, max=1\n",
+					  ib_wr->num_sge);
+				err = -EINVAL;
+				break;
+			}
+			if (ib_wr->opcode == IB_WR_RDMA_READ) {
+				wqe_misc = NES_IWARP_SQ_OP_RDMAR;
+			} else {
+				wqe_misc = NES_IWARP_SQ_OP_RDMAR_LOCINV;
+				set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_INV_STAG_LOW_IDX,
+						    ib_wr->ex.invalidate_rkey);
+			}
+
+			set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_TO_LOW_IDX,
+					    ib_wr->wr.rdma.remote_addr);
+			set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_STAG_IDX,
+					    ib_wr->wr.rdma.rkey);
+			set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_LENGTH_IDX,
+					    ib_wr->sg_list->length);
+			set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_FRAG0_LOW_IDX,
+					    ib_wr->sg_list->addr);
+			set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_STAG0_IDX,
+					    ib_wr->sg_list->lkey);
+			break;
+		case IB_WR_LOCAL_INV:
+			wqe_misc = NES_IWARP_SQ_OP_LOCINV;
+			set_wqe_32bit_value(wqe->wqe_words,
+					    NES_IWARP_SQ_LOCINV_WQE_INV_STAG_IDX,
+					    ib_wr->ex.invalidate_rkey);
+			break;
+		case IB_WR_FAST_REG_MR:
+		{
+			int i;
+			int flags = ib_wr->wr.fast_reg.access_flags;
+			struct nes_ib_fast_reg_page_list *pnesfrpl =
+				container_of(ib_wr->wr.fast_reg.page_list,
+					     struct nes_ib_fast_reg_page_list,
+					     ibfrpl);
+			u64 *src_page_list = pnesfrpl->ibfrpl.page_list;
+			u64 *dst_page_list = pnesfrpl->nes_wqe_pbl.kva;
+
+			if (ib_wr->wr.fast_reg.page_list_len >
+			    (NES_4K_PBL_CHUNK_SIZE / sizeof(u64))) {
+				nes_debug(NES_DBG_IW_TX, "SQ_FMR: bad page_list_len\n");
+				err = -EINVAL;
+				break;
+			}
+			wqe_misc = NES_IWARP_SQ_OP_FAST_REG;
+			set_wqe_64bit_value(wqe->wqe_words,
+					    NES_IWARP_SQ_FMR_WQE_VA_FBO_LOW_IDX,
+					    ib_wr->wr.fast_reg.iova_start);
+			set_wqe_32bit_value(wqe->wqe_words,
+					    NES_IWARP_SQ_FMR_WQE_LENGTH_LOW_IDX,
+					    ib_wr->wr.fast_reg.length);
+			set_wqe_32bit_value(wqe->wqe_words,
+					    NES_IWARP_SQ_FMR_WQE_MR_STAG_IDX,
+					    ib_wr->wr.fast_reg.rkey);
+			/* Set page size: */
+			if (ib_wr->wr.fast_reg.page_shift == 12) {
+				wqe_misc |= NES_IWARP_SQ_FMR_WQE_PAGE_SIZE_4K;
+			} else if (ib_wr->wr.fast_reg.page_shift == 21) {
+				wqe_misc |= NES_IWARP_SQ_FMR_WQE_PAGE_SIZE_2M;
+			} else {
+				nes_debug(NES_DBG_IW_TX, "Invalid page shift,"
+					  " ib_wr=%u, max=1\n", ib_wr->num_sge);
+				err = -EINVAL;
+				break;
+			}
+			/* Set access_flags */
+			wqe_misc |= NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_LOCAL_READ;
+			if (flags & IB_ACCESS_LOCAL_WRITE)
+				wqe_misc |= NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_LOCAL_WRITE;
+
+			if (flags & IB_ACCESS_REMOTE_WRITE)
+				wqe_misc |= NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_REMOTE_WRITE;
+
+			if (flags & IB_ACCESS_REMOTE_READ)
+				wqe_misc |= NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_REMOTE_READ;
+
+			if (flags & IB_ACCESS_MW_BIND)
+				wqe_misc |= NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_WINDOW_BIND;
+
+			/* Fill in PBL info: */
+			if (ib_wr->wr.fast_reg.page_list_len >
+			    pnesfrpl->ibfrpl.max_page_list_len) {
+				nes_debug(NES_DBG_IW_TX, "Invalid page list length,"
+					  " ib_wr=%p, value=%u, max=%u\n",
+					  ib_wr, ib_wr->wr.fast_reg.page_list_len,
+					  pnesfrpl->ibfrpl.max_page_list_len);
+				err = -EINVAL;
+				break;
+			}
+
+			set_wqe_64bit_value(wqe->wqe_words,
+					    NES_IWARP_SQ_FMR_WQE_PBL_ADDR_LOW_IDX,
+					    pnesfrpl->nes_wqe_pbl.paddr);
+
+			set_wqe_32bit_value(wqe->wqe_words,
+					    NES_IWARP_SQ_FMR_WQE_PBL_LENGTH_IDX,
+					    ib_wr->wr.fast_reg.page_list_len * 8);
+
+			for (i = 0; i < ib_wr->wr.fast_reg.page_list_len; i++)
+				dst_page_list[i] = cpu_to_le64(src_page_list[i]);
+
+			nes_debug(NES_DBG_IW_TX, "SQ_FMR: iova_start: %p, "
+				  "length: %d, rkey: %0x, pgl_paddr: %p, "
+				  "page_list_len: %u, wqe_misc: %x\n",
+				  (void *)ib_wr->wr.fast_reg.iova_start,
+				  ib_wr->wr.fast_reg.length,
+				  ib_wr->wr.fast_reg.rkey,
+				  (void *)pnesfrpl->nes_wqe_pbl.paddr,
+				  ib_wr->wr.fast_reg.page_list_len,
+				  wqe_misc);
+			break;
 		}
+		default:
+			/* error */
+			err = -EINVAL;
+			break;
+		}
+
+		if (err)
+			break;
+
+		if ((ib_wr->send_flags & IB_SEND_SIGNALED) || nesqp->sig_all)
+			wqe_misc |= NES_IWARP_SQ_WQE_SIGNALED_COMPL;
+
 		wqe->wqe_words[NES_IWARP_SQ_WQE_MISC_IDX] = cpu_to_le32(wqe_misc);
 
 		ib_wr = ib_wr->next;
@@ -3522,6 +3528,7 @@
 
 	spin_unlock_irqrestore(&nesqp->lock, flags);
 
+out:
 	if (err)
 		*bad_wr = ib_wr;
 	return err;
@@ -3548,8 +3555,10 @@
 	u32 counter;
 	u32 total_payload_length;
 
-	if (nesqp->ibqp_state > IB_QPS_RTS)
-		return -EINVAL;
+	if (nesqp->ibqp_state > IB_QPS_RTS) {
+		err = -EINVAL;
+		goto out;
+	}
 
 	spin_lock_irqsave(&nesqp->lock, flags);
 
@@ -3612,6 +3621,7 @@
 
 	spin_unlock_irqrestore(&nesqp->lock, flags);
 
+out:
 	if (err)
 		*bad_wr = ib_wr;
 	return err;
@@ -3720,6 +3730,12 @@
 						nes_debug(NES_DBG_CQ, "Operation = Send.\n");
 						entry->opcode = IB_WC_SEND;
 						break;
+					case NES_IWARP_SQ_OP_LOCINV:
+						entry->opcode = IB_WR_LOCAL_INV;
+						break;
+					case NES_IWARP_SQ_OP_FAST_REG:
+						entry->opcode = IB_WC_FAST_REG_MR;
+						break;
 				}
 
 				nesqp->hwqp.sq_tail = (wqe_index+1)&(nesqp->hwqp.sq_size - 1);
@@ -3890,10 +3906,9 @@
 	nesibdev->ibdev.dealloc_mw = nes_dealloc_mw;
 	nesibdev->ibdev.bind_mw = nes_bind_mw;
 
-	nesibdev->ibdev.alloc_fmr = nes_alloc_fmr;
-	nesibdev->ibdev.unmap_fmr = nes_unmap_fmr;
-	nesibdev->ibdev.dealloc_fmr = nes_dealloc_fmr;
-	nesibdev->ibdev.map_phys_fmr = nes_map_phys_fmr;
+	nesibdev->ibdev.alloc_fast_reg_mr = nes_alloc_fast_reg_mr;
+	nesibdev->ibdev.alloc_fast_reg_page_list = nes_alloc_fast_reg_page_list;
+	nesibdev->ibdev.free_fast_reg_page_list = nes_free_fast_reg_page_list;
 
 	nesibdev->ibdev.attach_mcast = nes_multicast_attach;
 	nesibdev->ibdev.detach_mcast = nes_multicast_detach;
diff --git a/drivers/infiniband/hw/nes/nes_verbs.h b/drivers/infiniband/hw/nes/nes_verbs.h
index 89822d7..2df9993e 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.h
+++ b/drivers/infiniband/hw/nes/nes_verbs.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006 - 2009 Intel-NE, Inc.  All rights reserved.
+ * Copyright (c) 2006 - 2009 Intel Corporation.  All rights reserved.
  * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
@@ -135,19 +135,15 @@
 	struct ib_qp          ibqp;
 	void                  *allocated_buffer;
 	struct iw_cm_id       *cm_id;
-	struct workqueue_struct *wq;
 	struct nes_cq         *nesscq;
 	struct nes_cq         *nesrcq;
 	struct nes_pd         *nespd;
 	void *cm_node; /* handle of the node this QP is associated with */
 	struct ietf_mpa_frame *ietf_frame;
 	dma_addr_t            ietf_frame_pbase;
-	wait_queue_head_t     state_waitq;
 	struct ib_mr          *lsmm_mr;
-	unsigned long         socket;
 	struct nes_hw_qp      hwqp;
 	struct work_struct    work;
-	struct work_struct    ae_work;
 	enum ib_qp_state      ibqp_state;
 	u32                   iwarp_state;
 	u32                   hte_index;
@@ -165,19 +161,20 @@
 	struct page           *page;
 	struct timer_list     terminate_timer;
 	enum ib_event_type    terminate_eventtype;
-	wait_queue_head_t     kick_waitq;
-	u16                   in_disconnect;
+	u16                   active_conn:1;
+	u16                   skip_lsmm:1;
+	u16                   user_mode:1;
+	u16                   hte_added:1;
+	u16                   flush_issued:1;
+	u16                   destroyed:1;
+	u16                   sig_all:1;
+	u16                   rsvd:9;
 	u16                   private_data_len;
 	u16                   term_sq_flush_code;
 	u16                   term_rq_flush_code;
-	u8                    active_conn;
-	u8                    skip_lsmm;
-	u8                    user_mode;
-	u8                    hte_added;
 	u8                    hw_iwarp_state;
-	u8                    flush_issued;
 	u8                    hw_tcp_state;
 	u8                    term_flags;
-	u8                    destroyed;
+	u8                    sq_kmapped;
 };
 #endif			/* NES_VERBS_H */
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 2bf5116..df3eb8c 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -884,6 +884,7 @@
 
 	neigh->neighbour = neighbour;
 	neigh->dev = dev;
+	memset(&neigh->dgid.raw, 0, sizeof (union ib_gid));
 	*to_ipoib_neigh(neighbour) = neigh;
 	skb_queue_head_init(&neigh->queue);
 	ipoib_cm_set(neigh, NULL);
diff --git a/drivers/net/mlx4/fw.c b/drivers/net/mlx4/fw.c
index 3c16602..04f42ae 100644
--- a/drivers/net/mlx4/fw.c
+++ b/drivers/net/mlx4/fw.c
@@ -90,6 +90,7 @@
 		[ 9] = "Q_Key violation counter",
 		[10] = "VMM",
 		[12] = "DPDP",
+		[15] = "Big LSO headers",
 		[16] = "MW support",
 		[17] = "APM support",
 		[18] = "Atomic ops support",
@@ -235,7 +236,7 @@
 	MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_MPT_OFFSET);
 	dev_cap->max_mpts = 1 << (field & 0x3f);
 	MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_EQ_OFFSET);
-	dev_cap->reserved_eqs = 1 << (field & 0xf);
+	dev_cap->reserved_eqs = field & 0xf;
 	MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_EQ_OFFSET);
 	dev_cap->max_eqs = 1 << (field & 0xf);
 	MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_MTT_OFFSET);
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index ce7cc6c..e92d1bf 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -61,6 +61,7 @@
 	MLX4_DEV_CAP_FLAG_BAD_PKEY_CNTR	= 1 <<  8,
 	MLX4_DEV_CAP_FLAG_BAD_QKEY_CNTR	= 1 <<  9,
 	MLX4_DEV_CAP_FLAG_DPDP		= 1 << 12,
+	MLX4_DEV_CAP_FLAG_BLH		= 1 << 15,
 	MLX4_DEV_CAP_FLAG_MEM_WINDOW	= 1 << 16,
 	MLX4_DEV_CAP_FLAG_APM		= 1 << 17,
 	MLX4_DEV_CAP_FLAG_ATOMIC	= 1 << 18,
diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h
index 483057b..fa0d52b 100644
--- a/include/rdma/ib_addr.h
+++ b/include/rdma/ib_addr.h
@@ -36,6 +36,7 @@
 
 #include <linux/in.h>
 #include <linux/in6.h>
+#include <linux/if_arp.h>
 #include <linux/netdevice.h>
 #include <linux/socket.h>
 #include <rdma/ib_verbs.h>
@@ -60,8 +61,8 @@
 	unsigned char src_dev_addr[MAX_ADDR_LEN];
 	unsigned char dst_dev_addr[MAX_ADDR_LEN];
 	unsigned char broadcast[MAX_ADDR_LEN];
-	enum rdma_node_type dev_type;
-	struct net_device *src_dev;
+	unsigned short dev_type;
+	int bound_dev_if;
 };
 
 /**
@@ -121,40 +122,29 @@
 	memcpy(gid, dev_addr->broadcast + 4, sizeof *gid);
 }
 
-static inline void ib_addr_get_sgid(struct rdma_dev_addr *dev_addr,
-				    union ib_gid *gid)
+static inline int rdma_addr_gid_offset(struct rdma_dev_addr *dev_addr)
 {
-	memcpy(gid, dev_addr->src_dev_addr + 4, sizeof *gid);
+	return dev_addr->dev_type == ARPHRD_INFINIBAND ? 4 : 0;
 }
 
-static inline void ib_addr_set_sgid(struct rdma_dev_addr *dev_addr,
-				    union ib_gid *gid)
+static inline void rdma_addr_get_sgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid)
 {
-	memcpy(dev_addr->src_dev_addr + 4, gid, sizeof *gid);
+	memcpy(gid, dev_addr->src_dev_addr + rdma_addr_gid_offset(dev_addr), sizeof *gid);
 }
 
-static inline void ib_addr_get_dgid(struct rdma_dev_addr *dev_addr,
-				    union ib_gid *gid)
+static inline void rdma_addr_set_sgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid)
 {
-	memcpy(gid, dev_addr->dst_dev_addr + 4, sizeof *gid);
+	memcpy(dev_addr->src_dev_addr + rdma_addr_gid_offset(dev_addr), gid, sizeof *gid);
 }
 
-static inline void ib_addr_set_dgid(struct rdma_dev_addr *dev_addr,
-				    union ib_gid *gid)
+static inline void rdma_addr_get_dgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid)
 {
-	memcpy(dev_addr->dst_dev_addr + 4, gid, sizeof *gid);
+	memcpy(gid, dev_addr->dst_dev_addr + rdma_addr_gid_offset(dev_addr), sizeof *gid);
 }
 
-static inline void iw_addr_get_sgid(struct rdma_dev_addr *dev_addr,
-				    union ib_gid *gid)
+static inline void rdma_addr_set_dgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid)
 {
-	memcpy(gid, dev_addr->src_dev_addr, sizeof *gid);
-}
-
-static inline void iw_addr_get_dgid(struct rdma_dev_addr *dev_addr,
-				    union ib_gid *gid)
-{
-	memcpy(gid, dev_addr->dst_dev_addr, sizeof *gid);
+	memcpy(dev_addr->dst_dev_addr + rdma_addr_gid_offset(dev_addr), gid, sizeof *gid);
 }
 
 #endif /* IB_ADDR_H */
diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h
index 3841c1a..1082afa 100644
--- a/include/rdma/ib_sa.h
+++ b/include/rdma/ib_sa.h
@@ -379,4 +379,10 @@
 			 struct ib_sa_path_rec *rec,
 			 struct ib_ah_attr *ah_attr);
 
+/**
+ * ib_sa_unpack_path - Convert a path record from MAD format to struct
+ * ib_sa_path_rec.
+ */
+void ib_sa_unpack_path(void *attribute, struct ib_sa_path_rec *rec);
+
 #endif /* IB_SA_H */
diff --git a/include/rdma/ib_user_sa.h b/include/rdma/ib_user_sa.h
index 6591201..cfc7c9b 100644
--- a/include/rdma/ib_user_sa.h
+++ b/include/rdma/ib_user_sa.h
@@ -35,6 +35,22 @@
 
 #include <linux/types.h>
 
+enum {
+	IB_PATH_GMP		= 1,
+	IB_PATH_PRIMARY		= (1<<1),
+	IB_PATH_ALTERNATE	= (1<<2),
+	IB_PATH_OUTBOUND	= (1<<3),
+	IB_PATH_INBOUND		= (1<<4),
+	IB_PATH_INBOUND_REVERSE = (1<<5),
+	IB_PATH_BIDIRECTIONAL	= IB_PATH_OUTBOUND | IB_PATH_INBOUND_REVERSE
+};
+
+struct ib_path_rec_data {
+	__u32	flags;
+	__u32	reserved;
+	__u32	path_rec[16];
+};
+
 struct ib_user_path_rec {
 	__u8	dgid[16];
 	__u8	sgid[16];
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index c179318..09509ed 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -1425,6 +1425,11 @@
  * @send_wr: A list of work requests to post on the send queue.
  * @bad_send_wr: On an immediate failure, this parameter will reference
  *   the work request that failed to be posted on the QP.
+ *
+ * While IBA Vol. 1 section 11.4.1.1 specifies that if an immediate
+ * error is returned, the QP state shall not be affected,
+ * ib_post_send() will return an immediate error after queueing any
+ * earlier work requests in the list.
  */
 static inline int ib_post_send(struct ib_qp *qp,
 			       struct ib_send_wr *send_wr,
diff --git a/include/rdma/rdma_user_cm.h b/include/rdma/rdma_user_cm.h
index c557054..1d16502 100644
--- a/include/rdma/rdma_user_cm.h
+++ b/include/rdma/rdma_user_cm.h
@@ -215,12 +215,14 @@
 
 /* Option levels */
 enum {
-	RDMA_OPTION_ID		= 0
+	RDMA_OPTION_ID		= 0,
+	RDMA_OPTION_IB		= 1
 };
 
 /* Option details */
 enum {
-	RDMA_OPTION_ID_TOS	= 0
+	RDMA_OPTION_ID_TOS	= 0,
+	RDMA_OPTION_IB_PATH	= 1
 };
 
 struct rdma_ucm_set_option {
diff --git a/net/rds/ib.c b/net/rds/ib.c
index 536ebe5..3b89923 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -182,8 +182,8 @@
 		ic = conn->c_transport_data;
 		dev_addr = &ic->i_cm_id->route.addr.dev_addr;
 
-		ib_addr_get_sgid(dev_addr, (union ib_gid *) &iinfo->src_gid);
-		ib_addr_get_dgid(dev_addr, (union ib_gid *) &iinfo->dst_gid);
+		rdma_addr_get_sgid(dev_addr, (union ib_gid *) &iinfo->src_gid);
+		rdma_addr_get_dgid(dev_addr, (union ib_gid *) &iinfo->dst_gid);
 
 		rds_ibdev = ib_get_client_data(ic->i_cm_id->device, &rds_ib_client);
 		iinfo->max_send_wr = ic->i_send_ring.w_nr;
diff --git a/net/rds/iw.c b/net/rds/iw.c
index db224f7..b28fa85 100644
--- a/net/rds/iw.c
+++ b/net/rds/iw.c
@@ -184,8 +184,8 @@
 		ic = conn->c_transport_data;
 		dev_addr = &ic->i_cm_id->route.addr.dev_addr;
 
-		ib_addr_get_sgid(dev_addr, (union ib_gid *) &iinfo->src_gid);
-		ib_addr_get_dgid(dev_addr, (union ib_gid *) &iinfo->dst_gid);
+		rdma_addr_get_sgid(dev_addr, (union ib_gid *) &iinfo->src_gid);
+		rdma_addr_get_dgid(dev_addr, (union ib_gid *) &iinfo->dst_gid);
 
 		rds_iwdev = ib_get_client_data(ic->i_cm_id->device, &rds_iw_client);
 		iinfo->max_send_wr = ic->i_send_ring.w_nr;