Merge branch 'srp' into for-next
diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c
index f504c9b..1b09b73 100644
--- a/drivers/infiniband/core/ucm.c
+++ b/drivers/infiniband/core/ucm.c
@@ -1215,15 +1215,18 @@
ucm_dev = container_of(dev, struct ib_ucm_device, dev);
cdev_del(&ucm_dev->cdev);
- clear_bit(ucm_dev->devnum, dev_map);
+ if (ucm_dev->devnum < IB_UCM_MAX_DEVICES)
+ clear_bit(ucm_dev->devnum, dev_map);
+ else
+ clear_bit(ucm_dev->devnum - IB_UCM_MAX_DEVICES, dev_map);
kfree(ucm_dev);
}
static const struct file_operations ucm_fops = {
- .owner = THIS_MODULE,
- .open = ib_ucm_open,
+ .owner = THIS_MODULE,
+ .open = ib_ucm_open,
.release = ib_ucm_close,
- .write = ib_ucm_write,
+ .write = ib_ucm_write,
.poll = ib_ucm_poll,
};
@@ -1237,8 +1240,32 @@
}
static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
+static dev_t overflow_maj;
+static DECLARE_BITMAP(overflow_map, IB_UCM_MAX_DEVICES);
+static int find_overflow_devnum(void)
+{
+ int ret;
+
+ if (!overflow_maj) {
+ ret = alloc_chrdev_region(&overflow_maj, 0, IB_UCM_MAX_DEVICES,
+ "infiniband_cm");
+ if (ret) {
+ printk(KERN_ERR "ucm: couldn't register dynamic device number\n");
+ return ret;
+ }
+ }
+
+ ret = find_first_zero_bit(overflow_map, IB_UCM_MAX_DEVICES);
+ if (ret >= IB_UCM_MAX_DEVICES)
+ return -1;
+
+ return ret;
+}
+
static void ib_ucm_add_one(struct ib_device *device)
{
+ int devnum;
+ dev_t base;
struct ib_ucm_device *ucm_dev;
if (!device->alloc_ucontext ||
@@ -1251,16 +1278,25 @@
ucm_dev->ib_dev = device;
- ucm_dev->devnum = find_first_zero_bit(dev_map, IB_UCM_MAX_DEVICES);
- if (ucm_dev->devnum >= IB_UCM_MAX_DEVICES)
- goto err;
+ devnum = find_first_zero_bit(dev_map, IB_UCM_MAX_DEVICES);
+ if (devnum >= IB_UCM_MAX_DEVICES) {
+ devnum = find_overflow_devnum();
+ if (devnum < 0)
+ goto err;
- set_bit(ucm_dev->devnum, dev_map);
+ ucm_dev->devnum = devnum + IB_UCM_MAX_DEVICES;
+ base = devnum + overflow_maj;
+ set_bit(devnum, overflow_map);
+ } else {
+ ucm_dev->devnum = devnum;
+ base = devnum + IB_UCM_BASE_DEV;
+ set_bit(devnum, dev_map);
+ }
cdev_init(&ucm_dev->cdev, &ucm_fops);
ucm_dev->cdev.owner = THIS_MODULE;
kobject_set_name(&ucm_dev->cdev.kobj, "ucm%d", ucm_dev->devnum);
- if (cdev_add(&ucm_dev->cdev, IB_UCM_BASE_DEV + ucm_dev->devnum, 1))
+ if (cdev_add(&ucm_dev->cdev, base, 1))
goto err;
ucm_dev->dev.class = &cm_class;
@@ -1281,7 +1317,10 @@
device_unregister(&ucm_dev->dev);
err_cdev:
cdev_del(&ucm_dev->cdev);
- clear_bit(ucm_dev->devnum, dev_map);
+ if (ucm_dev->devnum < IB_UCM_MAX_DEVICES)
+ clear_bit(devnum, dev_map);
+ else
+ clear_bit(devnum, overflow_map);
err:
kfree(ucm_dev);
return;
@@ -1340,6 +1379,8 @@
ib_unregister_client(&ucm_client);
class_remove_file(&cm_class, &class_attr_abi_version);
unregister_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES);
+ if (overflow_maj)
+ unregister_chrdev_region(overflow_maj, IB_UCM_MAX_DEVICES);
idr_destroy(&ctx_id_table);
}
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index 7de0296..02d360c 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -65,12 +65,9 @@
};
/*
- * Our lifetime rules for these structs are the following: each time a
- * device special file is opened, we look up the corresponding struct
- * ib_umad_port by minor in the umad_port[] table while holding the
- * port_lock. If this lookup succeeds, we take a reference on the
- * ib_umad_port's struct ib_umad_device while still holding the
- * port_lock; if the lookup fails, we fail the open(). We drop these
+ * Our lifetime rules for these structs are the following:
+ * device special file is opened, we take a reference on the
+ * ib_umad_port's struct ib_umad_device. We drop these
* references in the corresponding close().
*
* In addition to references coming from open character devices, there
@@ -78,19 +75,14 @@
* module's reference taken when allocating the ib_umad_device in
* ib_umad_add_one().
*
- * When destroying an ib_umad_device, we clear all of its
- * ib_umad_ports from umad_port[] while holding port_lock before
- * dropping the module's reference to the ib_umad_device. This is
- * always safe because any open() calls will either succeed and obtain
- * a reference before we clear the umad_port[] entries, or fail after
- * we clear the umad_port[] entries.
+ * When destroying an ib_umad_device, we drop the module's reference.
*/
struct ib_umad_port {
- struct cdev *cdev;
+ struct cdev cdev;
struct device *dev;
- struct cdev *sm_cdev;
+ struct cdev sm_cdev;
struct device *sm_dev;
struct semaphore sm_sem;
@@ -136,7 +128,6 @@
static const dev_t base_dev = MKDEV(IB_UMAD_MAJOR, IB_UMAD_MINOR_BASE);
static DEFINE_SPINLOCK(port_lock);
-static struct ib_umad_port *umad_port[IB_UMAD_MAX_PORTS];
static DECLARE_BITMAP(dev_map, IB_UMAD_MAX_PORTS);
static void ib_umad_add_one(struct ib_device *device);
@@ -496,8 +487,8 @@
ah_attr.ah_flags = IB_AH_GRH;
memcpy(ah_attr.grh.dgid.raw, packet->mad.hdr.gid, 16);
ah_attr.grh.sgid_index = packet->mad.hdr.gid_index;
- ah_attr.grh.flow_label = be32_to_cpu(packet->mad.hdr.flow_label);
- ah_attr.grh.hop_limit = packet->mad.hdr.hop_limit;
+ ah_attr.grh.flow_label = be32_to_cpu(packet->mad.hdr.flow_label);
+ ah_attr.grh.hop_limit = packet->mad.hdr.hop_limit;
ah_attr.grh.traffic_class = packet->mad.hdr.traffic_class;
}
@@ -528,9 +519,9 @@
goto err_ah;
}
- packet->msg->ah = ah;
+ packet->msg->ah = ah;
packet->msg->timeout_ms = packet->mad.hdr.timeout_ms;
- packet->msg->retries = packet->mad.hdr.retries;
+ packet->msg->retries = packet->mad.hdr.retries;
packet->msg->context[0] = packet;
/* Copy MAD header. Any RMPP header is already in place. */
@@ -779,15 +770,11 @@
/*
* ib_umad_open() does not need the BKL:
*
- * - umad_port[] accesses are protected by port_lock, the
- * ib_umad_port structures are properly reference counted, and
+ * - the ib_umad_port structures are properly reference counted, and
* everything else is purely local to the file being created, so
* races against other open calls are not a problem;
* - the ioctl method does not affect any global state outside of the
* file structure being operated on;
- * - the port is added to umad_port[] as the last part of module
- * initialization so the open method will either immediately run
- * -ENXIO, or all required initialization will be done.
*/
static int ib_umad_open(struct inode *inode, struct file *filp)
{
@@ -795,13 +782,10 @@
struct ib_umad_file *file;
int ret = 0;
- spin_lock(&port_lock);
- port = umad_port[iminor(inode) - IB_UMAD_MINOR_BASE];
+ port = container_of(inode->i_cdev, struct ib_umad_port, cdev);
if (port)
kref_get(&port->umad_dev->ref);
- spin_unlock(&port_lock);
-
- if (!port)
+ else
return -ENXIO;
mutex_lock(&port->file_mutex);
@@ -872,16 +856,16 @@
}
static const struct file_operations umad_fops = {
- .owner = THIS_MODULE,
- .read = ib_umad_read,
- .write = ib_umad_write,
- .poll = ib_umad_poll,
+ .owner = THIS_MODULE,
+ .read = ib_umad_read,
+ .write = ib_umad_write,
+ .poll = ib_umad_poll,
.unlocked_ioctl = ib_umad_ioctl,
#ifdef CONFIG_COMPAT
- .compat_ioctl = ib_umad_compat_ioctl,
+ .compat_ioctl = ib_umad_compat_ioctl,
#endif
- .open = ib_umad_open,
- .release = ib_umad_close
+ .open = ib_umad_open,
+ .release = ib_umad_close
};
static int ib_umad_sm_open(struct inode *inode, struct file *filp)
@@ -892,13 +876,10 @@
};
int ret;
- spin_lock(&port_lock);
- port = umad_port[iminor(inode) - IB_UMAD_MINOR_BASE - IB_UMAD_MAX_PORTS];
+ port = container_of(inode->i_cdev, struct ib_umad_port, sm_cdev);
if (port)
kref_get(&port->umad_dev->ref);
- spin_unlock(&port_lock);
-
- if (!port)
+ else
return -ENXIO;
if (filp->f_flags & O_NONBLOCK) {
@@ -949,8 +930,8 @@
}
static const struct file_operations umad_sm_fops = {
- .owner = THIS_MODULE,
- .open = ib_umad_sm_open,
+ .owner = THIS_MODULE,
+ .open = ib_umad_sm_open,
.release = ib_umad_sm_close
};
@@ -990,16 +971,51 @@
}
static CLASS_ATTR(abi_version, S_IRUGO, show_abi_version, NULL);
+static dev_t overflow_maj;
+static DECLARE_BITMAP(overflow_map, IB_UMAD_MAX_PORTS);
+static int find_overflow_devnum(void)
+{
+ int ret;
+
+ if (!overflow_maj) {
+ ret = alloc_chrdev_region(&overflow_maj, 0, IB_UMAD_MAX_PORTS * 2,
+ "infiniband_mad");
+ if (ret) {
+ printk(KERN_ERR "user_mad: couldn't register dynamic device number\n");
+ return ret;
+ }
+ }
+
+ ret = find_first_zero_bit(overflow_map, IB_UMAD_MAX_PORTS);
+ if (ret >= IB_UMAD_MAX_PORTS)
+ return -1;
+
+ return ret;
+}
+
static int ib_umad_init_port(struct ib_device *device, int port_num,
struct ib_umad_port *port)
{
+ int devnum;
+ dev_t base;
+
spin_lock(&port_lock);
- port->dev_num = find_first_zero_bit(dev_map, IB_UMAD_MAX_PORTS);
- if (port->dev_num >= IB_UMAD_MAX_PORTS) {
+ devnum = find_first_zero_bit(dev_map, IB_UMAD_MAX_PORTS);
+ if (devnum >= IB_UMAD_MAX_PORTS) {
spin_unlock(&port_lock);
- return -1;
+ devnum = find_overflow_devnum();
+ if (devnum < 0)
+ return -1;
+
+ spin_lock(&port_lock);
+ port->dev_num = devnum + IB_UMAD_MAX_PORTS;
+ base = devnum + overflow_maj;
+ set_bit(devnum, overflow_map);
+ } else {
+ port->dev_num = devnum;
+ base = devnum + base_dev;
+ set_bit(devnum, dev_map);
}
- set_bit(port->dev_num, dev_map);
spin_unlock(&port_lock);
port->ib_dev = device;
@@ -1008,17 +1024,14 @@
mutex_init(&port->file_mutex);
INIT_LIST_HEAD(&port->file_list);
- port->cdev = cdev_alloc();
- if (!port->cdev)
- return -1;
- port->cdev->owner = THIS_MODULE;
- port->cdev->ops = &umad_fops;
- kobject_set_name(&port->cdev->kobj, "umad%d", port->dev_num);
- if (cdev_add(port->cdev, base_dev + port->dev_num, 1))
+ cdev_init(&port->cdev, &umad_fops);
+ port->cdev.owner = THIS_MODULE;
+ kobject_set_name(&port->cdev.kobj, "umad%d", port->dev_num);
+ if (cdev_add(&port->cdev, base, 1))
goto err_cdev;
port->dev = device_create(umad_class, device->dma_device,
- port->cdev->dev, port,
+ port->cdev.dev, port,
"umad%d", port->dev_num);
if (IS_ERR(port->dev))
goto err_cdev;
@@ -1028,17 +1041,15 @@
if (device_create_file(port->dev, &dev_attr_port))
goto err_dev;
- port->sm_cdev = cdev_alloc();
- if (!port->sm_cdev)
- goto err_dev;
- port->sm_cdev->owner = THIS_MODULE;
- port->sm_cdev->ops = &umad_sm_fops;
- kobject_set_name(&port->sm_cdev->kobj, "issm%d", port->dev_num);
- if (cdev_add(port->sm_cdev, base_dev + port->dev_num + IB_UMAD_MAX_PORTS, 1))
+ base += IB_UMAD_MAX_PORTS;
+ cdev_init(&port->sm_cdev, &umad_sm_fops);
+ port->sm_cdev.owner = THIS_MODULE;
+ kobject_set_name(&port->sm_cdev.kobj, "issm%d", port->dev_num);
+ if (cdev_add(&port->sm_cdev, base, 1))
goto err_sm_cdev;
port->sm_dev = device_create(umad_class, device->dma_device,
- port->sm_cdev->dev, port,
+ port->sm_cdev.dev, port,
"issm%d", port->dev_num);
if (IS_ERR(port->sm_dev))
goto err_sm_cdev;
@@ -1048,24 +1059,23 @@
if (device_create_file(port->sm_dev, &dev_attr_port))
goto err_sm_dev;
- spin_lock(&port_lock);
- umad_port[port->dev_num] = port;
- spin_unlock(&port_lock);
-
return 0;
err_sm_dev:
- device_destroy(umad_class, port->sm_cdev->dev);
+ device_destroy(umad_class, port->sm_cdev.dev);
err_sm_cdev:
- cdev_del(port->sm_cdev);
+ cdev_del(&port->sm_cdev);
err_dev:
- device_destroy(umad_class, port->cdev->dev);
+ device_destroy(umad_class, port->cdev.dev);
err_cdev:
- cdev_del(port->cdev);
- clear_bit(port->dev_num, dev_map);
+ cdev_del(&port->cdev);
+ if (port->dev_num < IB_UMAD_MAX_PORTS)
+ clear_bit(devnum, dev_map);
+ else
+ clear_bit(devnum, overflow_map);
return -1;
}
@@ -1079,15 +1089,11 @@
dev_set_drvdata(port->dev, NULL);
dev_set_drvdata(port->sm_dev, NULL);
- device_destroy(umad_class, port->cdev->dev);
- device_destroy(umad_class, port->sm_cdev->dev);
+ device_destroy(umad_class, port->cdev.dev);
+ device_destroy(umad_class, port->sm_cdev.dev);
- cdev_del(port->cdev);
- cdev_del(port->sm_cdev);
-
- spin_lock(&port_lock);
- umad_port[port->dev_num] = NULL;
- spin_unlock(&port_lock);
+ cdev_del(&port->cdev);
+ cdev_del(&port->sm_cdev);
mutex_lock(&port->file_mutex);
@@ -1106,7 +1112,10 @@
mutex_unlock(&port->file_mutex);
- clear_bit(port->dev_num, dev_map);
+ if (port->dev_num < IB_UMAD_MAX_PORTS)
+ clear_bit(port->dev_num, dev_map);
+ else
+ clear_bit(port->dev_num - IB_UMAD_MAX_PORTS, overflow_map);
}
static void ib_umad_add_one(struct ib_device *device)
@@ -1214,6 +1223,8 @@
ib_unregister_client(&umad_client);
class_destroy(umad_class);
unregister_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2);
+ if (overflow_maj)
+ unregister_chrdev_region(overflow_maj, IB_UMAD_MAX_PORTS * 2);
}
module_init(ib_umad_init);
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index b3ea958..e54d9ac 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -41,6 +41,7 @@
#include <linux/idr.h>
#include <linux/mutex.h>
#include <linux/completion.h>
+#include <linux/cdev.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_umem.h>
@@ -69,23 +70,23 @@
struct ib_uverbs_device {
struct kref ref;
+ int num_comp_vectors;
struct completion comp;
- int devnum;
- struct cdev *cdev;
struct device *dev;
struct ib_device *ib_dev;
- int num_comp_vectors;
+ int devnum;
+ struct cdev cdev;
};
struct ib_uverbs_event_file {
struct kref ref;
+ int is_async;
struct ib_uverbs_file *uverbs_file;
spinlock_t lock;
+ int is_closed;
wait_queue_head_t poll_wait;
struct fasync_struct *async_queue;
struct list_head event_list;
- int is_async;
- int is_closed;
};
struct ib_uverbs_file {
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 5f284ff..dbf0451 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -43,7 +43,6 @@
#include <linux/sched.h>
#include <linux/file.h>
#include <linux/mount.h>
-#include <linux/cdev.h>
#include <asm/uaccess.h>
@@ -75,40 +74,39 @@
DEFINE_IDR(ib_uverbs_srq_idr);
static DEFINE_SPINLOCK(map_lock);
-static struct ib_uverbs_device *dev_table[IB_UVERBS_MAX_DEVICES];
static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES);
static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,
const char __user *buf, int in_len,
int out_len) = {
- [IB_USER_VERBS_CMD_GET_CONTEXT] = ib_uverbs_get_context,
- [IB_USER_VERBS_CMD_QUERY_DEVICE] = ib_uverbs_query_device,
- [IB_USER_VERBS_CMD_QUERY_PORT] = ib_uverbs_query_port,
- [IB_USER_VERBS_CMD_ALLOC_PD] = ib_uverbs_alloc_pd,
- [IB_USER_VERBS_CMD_DEALLOC_PD] = ib_uverbs_dealloc_pd,
- [IB_USER_VERBS_CMD_REG_MR] = ib_uverbs_reg_mr,
- [IB_USER_VERBS_CMD_DEREG_MR] = ib_uverbs_dereg_mr,
+ [IB_USER_VERBS_CMD_GET_CONTEXT] = ib_uverbs_get_context,
+ [IB_USER_VERBS_CMD_QUERY_DEVICE] = ib_uverbs_query_device,
+ [IB_USER_VERBS_CMD_QUERY_PORT] = ib_uverbs_query_port,
+ [IB_USER_VERBS_CMD_ALLOC_PD] = ib_uverbs_alloc_pd,
+ [IB_USER_VERBS_CMD_DEALLOC_PD] = ib_uverbs_dealloc_pd,
+ [IB_USER_VERBS_CMD_REG_MR] = ib_uverbs_reg_mr,
+ [IB_USER_VERBS_CMD_DEREG_MR] = ib_uverbs_dereg_mr,
[IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL] = ib_uverbs_create_comp_channel,
- [IB_USER_VERBS_CMD_CREATE_CQ] = ib_uverbs_create_cq,
- [IB_USER_VERBS_CMD_RESIZE_CQ] = ib_uverbs_resize_cq,
- [IB_USER_VERBS_CMD_POLL_CQ] = ib_uverbs_poll_cq,
- [IB_USER_VERBS_CMD_REQ_NOTIFY_CQ] = ib_uverbs_req_notify_cq,
- [IB_USER_VERBS_CMD_DESTROY_CQ] = ib_uverbs_destroy_cq,
- [IB_USER_VERBS_CMD_CREATE_QP] = ib_uverbs_create_qp,
- [IB_USER_VERBS_CMD_QUERY_QP] = ib_uverbs_query_qp,
- [IB_USER_VERBS_CMD_MODIFY_QP] = ib_uverbs_modify_qp,
- [IB_USER_VERBS_CMD_DESTROY_QP] = ib_uverbs_destroy_qp,
- [IB_USER_VERBS_CMD_POST_SEND] = ib_uverbs_post_send,
- [IB_USER_VERBS_CMD_POST_RECV] = ib_uverbs_post_recv,
- [IB_USER_VERBS_CMD_POST_SRQ_RECV] = ib_uverbs_post_srq_recv,
- [IB_USER_VERBS_CMD_CREATE_AH] = ib_uverbs_create_ah,
- [IB_USER_VERBS_CMD_DESTROY_AH] = ib_uverbs_destroy_ah,
- [IB_USER_VERBS_CMD_ATTACH_MCAST] = ib_uverbs_attach_mcast,
- [IB_USER_VERBS_CMD_DETACH_MCAST] = ib_uverbs_detach_mcast,
- [IB_USER_VERBS_CMD_CREATE_SRQ] = ib_uverbs_create_srq,
- [IB_USER_VERBS_CMD_MODIFY_SRQ] = ib_uverbs_modify_srq,
- [IB_USER_VERBS_CMD_QUERY_SRQ] = ib_uverbs_query_srq,
- [IB_USER_VERBS_CMD_DESTROY_SRQ] = ib_uverbs_destroy_srq,
+ [IB_USER_VERBS_CMD_CREATE_CQ] = ib_uverbs_create_cq,
+ [IB_USER_VERBS_CMD_RESIZE_CQ] = ib_uverbs_resize_cq,
+ [IB_USER_VERBS_CMD_POLL_CQ] = ib_uverbs_poll_cq,
+ [IB_USER_VERBS_CMD_REQ_NOTIFY_CQ] = ib_uverbs_req_notify_cq,
+ [IB_USER_VERBS_CMD_DESTROY_CQ] = ib_uverbs_destroy_cq,
+ [IB_USER_VERBS_CMD_CREATE_QP] = ib_uverbs_create_qp,
+ [IB_USER_VERBS_CMD_QUERY_QP] = ib_uverbs_query_qp,
+ [IB_USER_VERBS_CMD_MODIFY_QP] = ib_uverbs_modify_qp,
+ [IB_USER_VERBS_CMD_DESTROY_QP] = ib_uverbs_destroy_qp,
+ [IB_USER_VERBS_CMD_POST_SEND] = ib_uverbs_post_send,
+ [IB_USER_VERBS_CMD_POST_RECV] = ib_uverbs_post_recv,
+ [IB_USER_VERBS_CMD_POST_SRQ_RECV] = ib_uverbs_post_srq_recv,
+ [IB_USER_VERBS_CMD_CREATE_AH] = ib_uverbs_create_ah,
+ [IB_USER_VERBS_CMD_DESTROY_AH] = ib_uverbs_destroy_ah,
+ [IB_USER_VERBS_CMD_ATTACH_MCAST] = ib_uverbs_attach_mcast,
+ [IB_USER_VERBS_CMD_DETACH_MCAST] = ib_uverbs_detach_mcast,
+ [IB_USER_VERBS_CMD_CREATE_SRQ] = ib_uverbs_create_srq,
+ [IB_USER_VERBS_CMD_MODIFY_SRQ] = ib_uverbs_modify_srq,
+ [IB_USER_VERBS_CMD_QUERY_SRQ] = ib_uverbs_query_srq,
+ [IB_USER_VERBS_CMD_DESTROY_SRQ] = ib_uverbs_destroy_srq,
};
static struct vfsmount *uverbs_event_mnt;
@@ -370,7 +368,7 @@
static const struct file_operations uverbs_event_fops = {
.owner = THIS_MODULE,
- .read = ib_uverbs_event_read,
+ .read = ib_uverbs_event_read,
.poll = ib_uverbs_event_poll,
.release = ib_uverbs_event_close,
.fasync = ib_uverbs_event_fasync
@@ -617,14 +615,12 @@
/*
* ib_uverbs_open() does not need the BKL:
*
- * - dev_table[] accesses are protected by map_lock, the
- * ib_uverbs_device structures are properly reference counted, and
+ * - the ib_uverbs_device structures are properly reference counted and
* everything else is purely local to the file being created, so
* races against other open calls are not a problem;
* - there is no ioctl method to race against;
- * - the device is added to dev_table[] as the last part of module
- * initialization, the open method will either immediately run
- * -ENXIO, or all required initialization will be done.
+ * - the open method will either immediately run -ENXIO, or all
+ * required initialization will be done.
*/
static int ib_uverbs_open(struct inode *inode, struct file *filp)
{
@@ -632,13 +628,10 @@
struct ib_uverbs_file *file;
int ret;
- spin_lock(&map_lock);
- dev = dev_table[iminor(inode) - IB_UVERBS_BASE_MINOR];
+ dev = container_of(inode->i_cdev, struct ib_uverbs_device, cdev);
if (dev)
kref_get(&dev->ref);
- spin_unlock(&map_lock);
-
- if (!dev)
+ else
return -ENXIO;
if (!try_module_get(dev->ib_dev->owner)) {
@@ -685,17 +678,17 @@
}
static const struct file_operations uverbs_fops = {
- .owner = THIS_MODULE,
- .write = ib_uverbs_write,
- .open = ib_uverbs_open,
+ .owner = THIS_MODULE,
+ .write = ib_uverbs_write,
+ .open = ib_uverbs_open,
.release = ib_uverbs_close
};
static const struct file_operations uverbs_mmap_fops = {
- .owner = THIS_MODULE,
- .write = ib_uverbs_write,
+ .owner = THIS_MODULE,
+ .write = ib_uverbs_write,
.mmap = ib_uverbs_mmap,
- .open = ib_uverbs_open,
+ .open = ib_uverbs_open,
.release = ib_uverbs_close
};
@@ -735,8 +728,38 @@
}
static CLASS_ATTR(abi_version, S_IRUGO, show_abi_version, NULL);
+static dev_t overflow_maj;
+static DECLARE_BITMAP(overflow_map, IB_UVERBS_MAX_DEVICES);
+
+/*
+ * If we have more than IB_UVERBS_MAX_DEVICES, dynamically overflow by
+ * requesting a new major number and doubling the number of max devices we
+ * support. It's stupid, but simple.
+ */
+static int find_overflow_devnum(void)
+{
+ int ret;
+
+ if (!overflow_maj) {
+ ret = alloc_chrdev_region(&overflow_maj, 0, IB_UVERBS_MAX_DEVICES,
+ "infiniband_verbs");
+ if (ret) {
+ printk(KERN_ERR "user_verbs: couldn't register dynamic device number\n");
+ return ret;
+ }
+ }
+
+ ret = find_first_zero_bit(overflow_map, IB_UVERBS_MAX_DEVICES);
+ if (ret >= IB_UVERBS_MAX_DEVICES)
+ return -1;
+
+ return ret;
+}
+
static void ib_uverbs_add_one(struct ib_device *device)
{
+ int devnum;
+ dev_t base;
struct ib_uverbs_device *uverbs_dev;
if (!device->alloc_ucontext)
@@ -750,28 +773,36 @@
init_completion(&uverbs_dev->comp);
spin_lock(&map_lock);
- uverbs_dev->devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES);
- if (uverbs_dev->devnum >= IB_UVERBS_MAX_DEVICES) {
+ devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES);
+ if (devnum >= IB_UVERBS_MAX_DEVICES) {
spin_unlock(&map_lock);
- goto err;
+ devnum = find_overflow_devnum();
+ if (devnum < 0)
+ goto err;
+
+ spin_lock(&map_lock);
+ uverbs_dev->devnum = devnum + IB_UVERBS_MAX_DEVICES;
+ base = devnum + overflow_maj;
+ set_bit(devnum, overflow_map);
+ } else {
+ uverbs_dev->devnum = devnum;
+ base = devnum + IB_UVERBS_BASE_DEV;
+ set_bit(devnum, dev_map);
}
- set_bit(uverbs_dev->devnum, dev_map);
spin_unlock(&map_lock);
uverbs_dev->ib_dev = device;
uverbs_dev->num_comp_vectors = device->num_comp_vectors;
- uverbs_dev->cdev = cdev_alloc();
- if (!uverbs_dev->cdev)
- goto err;
- uverbs_dev->cdev->owner = THIS_MODULE;
- uverbs_dev->cdev->ops = device->mmap ? &uverbs_mmap_fops : &uverbs_fops;
- kobject_set_name(&uverbs_dev->cdev->kobj, "uverbs%d", uverbs_dev->devnum);
- if (cdev_add(uverbs_dev->cdev, IB_UVERBS_BASE_DEV + uverbs_dev->devnum, 1))
+ cdev_init(&uverbs_dev->cdev, NULL);
+ uverbs_dev->cdev.owner = THIS_MODULE;
+ uverbs_dev->cdev.ops = device->mmap ? &uverbs_mmap_fops : &uverbs_fops;
+ kobject_set_name(&uverbs_dev->cdev.kobj, "uverbs%d", uverbs_dev->devnum);
+ if (cdev_add(&uverbs_dev->cdev, base, 1))
goto err_cdev;
uverbs_dev->dev = device_create(uverbs_class, device->dma_device,
- uverbs_dev->cdev->dev, uverbs_dev,
+ uverbs_dev->cdev.dev, uverbs_dev,
"uverbs%d", uverbs_dev->devnum);
if (IS_ERR(uverbs_dev->dev))
goto err_cdev;
@@ -781,20 +812,19 @@
if (device_create_file(uverbs_dev->dev, &dev_attr_abi_version))
goto err_class;
- spin_lock(&map_lock);
- dev_table[uverbs_dev->devnum] = uverbs_dev;
- spin_unlock(&map_lock);
-
ib_set_client_data(device, &uverbs_client, uverbs_dev);
return;
err_class:
- device_destroy(uverbs_class, uverbs_dev->cdev->dev);
+ device_destroy(uverbs_class, uverbs_dev->cdev.dev);
err_cdev:
- cdev_del(uverbs_dev->cdev);
- clear_bit(uverbs_dev->devnum, dev_map);
+ cdev_del(&uverbs_dev->cdev);
+ if (uverbs_dev->devnum < IB_UVERBS_MAX_DEVICES)
+ clear_bit(devnum, dev_map);
+ else
+ clear_bit(devnum, overflow_map);
err:
kref_put(&uverbs_dev->ref, ib_uverbs_release_dev);
@@ -811,14 +841,13 @@
return;
dev_set_drvdata(uverbs_dev->dev, NULL);
- device_destroy(uverbs_class, uverbs_dev->cdev->dev);
- cdev_del(uverbs_dev->cdev);
+ device_destroy(uverbs_class, uverbs_dev->cdev.dev);
+ cdev_del(&uverbs_dev->cdev);
- spin_lock(&map_lock);
- dev_table[uverbs_dev->devnum] = NULL;
- spin_unlock(&map_lock);
-
- clear_bit(uverbs_dev->devnum, dev_map);
+ if (uverbs_dev->devnum < IB_UVERBS_MAX_DEVICES)
+ clear_bit(uverbs_dev->devnum, dev_map);
+ else
+ clear_bit(uverbs_dev->devnum - IB_UVERBS_MAX_DEVICES, overflow_map);
kref_put(&uverbs_dev->ref, ib_uverbs_release_dev);
wait_for_completion(&uverbs_dev->comp);
@@ -908,6 +937,8 @@
unregister_filesystem(&uverbs_event_fs);
class_destroy(uverbs_class);
unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES);
+ if (overflow_maj)
+ unregister_chrdev_region(overflow_maj, IB_UVERBS_MAX_DEVICES);
idr_destroy(&ib_uverbs_pd_idr);
idr_destroy(&ib_uverbs_mr_idr);
idr_destroy(&ib_uverbs_mw_idr);
diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c
index 0677fc7..a28e862 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_hal.c
+++ b/drivers/infiniband/hw/cxgb3/cxio_hal.c
@@ -109,7 +109,6 @@
while (!CQ_VLD_ENTRY(rptr, cq->size_log2, cqe)) {
udelay(1);
if (i++ > 1000000) {
- BUG_ON(1);
printk(KERN_ERR "%s: stalled rnic\n",
rdev_p->dev_name);
return -EIO;
@@ -155,7 +154,7 @@
return iwch_cxgb3_ofld_send(rdev_p->t3cdev_p, skb);
}
-int cxio_create_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq)
+int cxio_create_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq, int kernel)
{
struct rdma_cq_setup setup;
int size = (1UL << (cq->size_log2)) * sizeof(struct t3_cqe);
@@ -163,12 +162,12 @@
cq->cqid = cxio_hal_get_cqid(rdev_p->rscp);
if (!cq->cqid)
return -ENOMEM;
- cq->sw_queue = kzalloc(size, GFP_KERNEL);
- if (!cq->sw_queue)
- return -ENOMEM;
- cq->queue = dma_alloc_coherent(&(rdev_p->rnic_info.pdev->dev),
- (1UL << (cq->size_log2)) *
- sizeof(struct t3_cqe),
+ if (kernel) {
+ cq->sw_queue = kzalloc(size, GFP_KERNEL);
+ if (!cq->sw_queue)
+ return -ENOMEM;
+ }
+ cq->queue = dma_alloc_coherent(&(rdev_p->rnic_info.pdev->dev), size,
&(cq->dma_addr), GFP_KERNEL);
if (!cq->queue) {
kfree(cq->sw_queue);
diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.h b/drivers/infiniband/hw/cxgb3/cxio_hal.h
index f3d440c..073373c 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_hal.h
+++ b/drivers/infiniband/hw/cxgb3/cxio_hal.h
@@ -53,7 +53,7 @@
#define T3_MAX_PBL_SIZE 256
#define T3_MAX_RQ_SIZE 1024
#define T3_MAX_QP_DEPTH (T3_MAX_RQ_SIZE-1)
-#define T3_MAX_CQ_DEPTH 8192
+#define T3_MAX_CQ_DEPTH 262144
#define T3_MAX_NUM_STAG (1<<15)
#define T3_MAX_MR_SIZE 0x100000000ULL
#define T3_PAGESIZE_MASK 0xffff000 /* 4KB-128MB */
@@ -157,7 +157,7 @@
void cxio_rdev_close(struct cxio_rdev *rdev);
int cxio_hal_cq_op(struct cxio_rdev *rdev, struct t3_cq *cq,
enum t3_cq_opcode op, u32 credit);
-int cxio_create_cq(struct cxio_rdev *rdev, struct t3_cq *cq);
+int cxio_create_cq(struct cxio_rdev *rdev, struct t3_cq *cq, int kernel);
int cxio_destroy_cq(struct cxio_rdev *rdev, struct t3_cq *cq);
int cxio_resize_cq(struct cxio_rdev *rdev, struct t3_cq *cq);
void cxio_release_ucontext(struct cxio_rdev *rdev, struct cxio_ucontext *uctx);
diff --git a/drivers/infiniband/hw/cxgb3/cxio_wr.h b/drivers/infiniband/hw/cxgb3/cxio_wr.h
index a197a5b..15073b2 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_wr.h
+++ b/drivers/infiniband/hw/cxgb3/cxio_wr.h
@@ -730,7 +730,22 @@
static inline void cxio_set_wq_in_error(struct t3_wq *wq)
{
- wq->queue->wq_in_err.err = 1;
+ wq->queue->wq_in_err.err |= 1;
+}
+
+static inline void cxio_disable_wq_db(struct t3_wq *wq)
+{
+ wq->queue->wq_in_err.err |= 2;
+}
+
+static inline void cxio_enable_wq_db(struct t3_wq *wq)
+{
+ wq->queue->wq_in_err.err &= ~2;
+}
+
+static inline int cxio_wq_db_enabled(struct t3_wq *wq)
+{
+ return !(wq->queue->wq_in_err.err & 2);
}
static inline struct t3_cqe *cxio_next_hw_cqe(struct t3_cq *cq)
diff --git a/drivers/infiniband/hw/cxgb3/iwch.c b/drivers/infiniband/hw/cxgb3/iwch.c
index b0ea010..ee1d8b4 100644
--- a/drivers/infiniband/hw/cxgb3/iwch.c
+++ b/drivers/infiniband/hw/cxgb3/iwch.c
@@ -65,6 +65,46 @@
static LIST_HEAD(dev_list);
static DEFINE_MUTEX(dev_mutex);
+static int disable_qp_db(int id, void *p, void *data)
+{
+ struct iwch_qp *qhp = p;
+
+ cxio_disable_wq_db(&qhp->wq);
+ return 0;
+}
+
+static int enable_qp_db(int id, void *p, void *data)
+{
+ struct iwch_qp *qhp = p;
+
+ if (data)
+ ring_doorbell(qhp->rhp->rdev.ctrl_qp.doorbell, qhp->wq.qpid);
+ cxio_enable_wq_db(&qhp->wq);
+ return 0;
+}
+
+static void disable_dbs(struct iwch_dev *rnicp)
+{
+ spin_lock_irq(&rnicp->lock);
+ idr_for_each(&rnicp->qpidr, disable_qp_db, NULL);
+ spin_unlock_irq(&rnicp->lock);
+}
+
+static void enable_dbs(struct iwch_dev *rnicp, int ring_db)
+{
+ spin_lock_irq(&rnicp->lock);
+ idr_for_each(&rnicp->qpidr, enable_qp_db,
+ (void *)(unsigned long)ring_db);
+ spin_unlock_irq(&rnicp->lock);
+}
+
+static void iwch_db_drop_task(struct work_struct *work)
+{
+ struct iwch_dev *rnicp = container_of(work, struct iwch_dev,
+ db_drop_task.work);
+ enable_dbs(rnicp, 1);
+}
+
static void rnic_init(struct iwch_dev *rnicp)
{
PDBG("%s iwch_dev %p\n", __func__, rnicp);
@@ -72,6 +112,7 @@
idr_init(&rnicp->qpidr);
idr_init(&rnicp->mmidr);
spin_lock_init(&rnicp->lock);
+ INIT_DELAYED_WORK(&rnicp->db_drop_task, iwch_db_drop_task);
rnicp->attr.max_qps = T3_MAX_NUM_QP - 32;
rnicp->attr.max_wrs = T3_MAX_QP_DEPTH;
@@ -147,6 +188,8 @@
mutex_lock(&dev_mutex);
list_for_each_entry_safe(dev, tmp, &dev_list, entry) {
if (dev->rdev.t3cdev_p == tdev) {
+ dev->rdev.flags = CXIO_ERROR_FATAL;
+ cancel_delayed_work_sync(&dev->db_drop_task);
list_del(&dev->entry);
iwch_unregister_device(dev);
cxio_rdev_close(&dev->rdev);
@@ -165,7 +208,8 @@
struct cxio_rdev *rdev = tdev->ulp;
struct iwch_dev *rnicp;
struct ib_event event;
- u32 portnum = port_id + 1;
+ u32 portnum = port_id + 1;
+ int dispatch = 0;
if (!rdev)
return;
@@ -174,21 +218,49 @@
case OFFLOAD_STATUS_DOWN: {
rdev->flags = CXIO_ERROR_FATAL;
event.event = IB_EVENT_DEVICE_FATAL;
+ dispatch = 1;
break;
}
case OFFLOAD_PORT_DOWN: {
event.event = IB_EVENT_PORT_ERR;
+ dispatch = 1;
break;
}
case OFFLOAD_PORT_UP: {
event.event = IB_EVENT_PORT_ACTIVE;
+ dispatch = 1;
+ break;
+ }
+ case OFFLOAD_DB_FULL: {
+ disable_dbs(rnicp);
+ break;
+ }
+ case OFFLOAD_DB_EMPTY: {
+ enable_dbs(rnicp, 1);
+ break;
+ }
+ case OFFLOAD_DB_DROP: {
+ unsigned long delay = 1000;
+ unsigned short r;
+
+ disable_dbs(rnicp);
+ get_random_bytes(&r, 2);
+ delay += r & 1023;
+
+ /*
+ * delay is between 1000-2023 usecs.
+ */
+ schedule_delayed_work(&rnicp->db_drop_task,
+ usecs_to_jiffies(delay));
break;
}
}
- event.device = &rnicp->ibdev;
- event.element.port_num = portnum;
- ib_dispatch_event(&event);
+ if (dispatch) {
+ event.device = &rnicp->ibdev;
+ event.element.port_num = portnum;
+ ib_dispatch_event(&event);
+ }
return;
}
diff --git a/drivers/infiniband/hw/cxgb3/iwch.h b/drivers/infiniband/hw/cxgb3/iwch.h
index 8473550..a1c4457 100644
--- a/drivers/infiniband/hw/cxgb3/iwch.h
+++ b/drivers/infiniband/hw/cxgb3/iwch.h
@@ -36,6 +36,7 @@
#include <linux/list.h>
#include <linux/spinlock.h>
#include <linux/idr.h>
+#include <linux/workqueue.h>
#include <rdma/ib_verbs.h>
@@ -110,6 +111,7 @@
struct idr mmidr;
spinlock_t lock;
struct list_head entry;
+ struct delayed_work db_drop_task;
};
static inline struct iwch_dev *to_iwch_dev(struct ib_device *ibdev)
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index ed71755..47b35c6 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -187,7 +187,7 @@
entries = roundup_pow_of_two(entries);
chp->cq.size_log2 = ilog2(entries);
- if (cxio_create_cq(&rhp->rdev, &chp->cq)) {
+ if (cxio_create_cq(&rhp->rdev, &chp->cq, !ucontext)) {
kfree(chp);
return ERR_PTR(-ENOMEM);
}
diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c
index 3eb8cec..b4d893d 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_qp.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c
@@ -452,7 +452,8 @@
++(qhp->wq.sq_wptr);
}
spin_unlock_irqrestore(&qhp->lock, flag);
- ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid);
+ if (cxio_wq_db_enabled(&qhp->wq))
+ ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid);
out:
if (err)
@@ -514,7 +515,8 @@
num_wrs--;
}
spin_unlock_irqrestore(&qhp->lock, flag);
- ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid);
+ if (cxio_wq_db_enabled(&qhp->wq))
+ ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid);
out:
if (err)
@@ -597,7 +599,8 @@
++(qhp->wq.sq_wptr);
spin_unlock_irqrestore(&qhp->lock, flag);
- ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid);
+ if (cxio_wq_db_enabled(&qhp->wq))
+ ring_doorbell(qhp->wq.doorbell, qhp->wq.qpid);
return err;
}
diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c
index 42be0b1..b2b6fea 100644
--- a/drivers/infiniband/hw/ehca/ehca_irq.c
+++ b/drivers/infiniband/hw/ehca/ehca_irq.c
@@ -548,11 +548,10 @@
struct ehca_eq *eq = &shca->eq;
struct ehca_eqe_cache_entry *eqe_cache = eq->eqe_cache;
u64 eqe_value, ret;
- unsigned long flags;
int eqe_cnt, i;
int eq_empty = 0;
- spin_lock_irqsave(&eq->irq_spinlock, flags);
+ spin_lock(&eq->irq_spinlock);
if (is_irq) {
const int max_query_cnt = 100;
int query_cnt = 0;
@@ -643,7 +642,7 @@
} while (1);
unlock_irq_spinlock:
- spin_unlock_irqrestore(&eq->irq_spinlock, flags);
+ spin_unlock(&eq->irq_spinlock);
}
void ehca_tasklet_eq(unsigned long data)
diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c
index 0338f1f..b105f66 100644
--- a/drivers/infiniband/hw/ehca/ehca_qp.c
+++ b/drivers/infiniband/hw/ehca/ehca_qp.c
@@ -55,9 +55,7 @@
/*
* attributes not supported by query qp
*/
-#define QP_ATTR_QUERY_NOT_SUPPORTED (IB_QP_MAX_DEST_RD_ATOMIC | \
- IB_QP_MAX_QP_RD_ATOMIC | \
- IB_QP_ACCESS_FLAGS | \
+#define QP_ATTR_QUERY_NOT_SUPPORTED (IB_QP_ACCESS_FLAGS | \
IB_QP_EN_SQD_ASYNC_NOTIFY)
/*
diff --git a/drivers/infiniband/hw/ehca/ehca_sqp.c b/drivers/infiniband/hw/ehca/ehca_sqp.c
index 8c1213f..dba8f9f 100644
--- a/drivers/infiniband/hw/ehca/ehca_sqp.c
+++ b/drivers/infiniband/hw/ehca/ehca_sqp.c
@@ -222,7 +222,7 @@
{
int ret;
- if (!port_num || port_num > ibdev->phys_port_cnt)
+ if (!port_num || port_num > ibdev->phys_port_cnt || !in_wc)
return IB_MAD_RESULT_FAILURE;
/* accept only pma request */
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 2a97c96..b377671 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -1214,7 +1214,7 @@
static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
void *wqe, unsigned *mlx_seg_len)
{
- struct ib_device *ib_dev = &to_mdev(sqp->qp.ibqp.device)->ib_dev;
+ struct ib_device *ib_dev = sqp->qp.ibqp.device;
struct mlx4_wqe_mlx_seg *mlx = wqe;
struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx;
struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah);
diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c
index b9d09ba..4272c52 100644
--- a/drivers/infiniband/hw/nes/nes.c
+++ b/drivers/infiniband/hw/nes/nes.c
@@ -110,6 +110,7 @@
static struct pci_device_id nes_pci_table[] = {
{PCI_VENDOR_ID_NETEFFECT, PCI_DEVICE_ID_NETEFFECT_NE020, PCI_ANY_ID, PCI_ANY_ID},
+ {PCI_VENDOR_ID_NETEFFECT, PCI_DEVICE_ID_NETEFFECT_NE020_KR, PCI_ANY_ID, PCI_ANY_ID},
{0}
};
diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h
index 9884056..cc78fee 100644
--- a/drivers/infiniband/hw/nes/nes.h
+++ b/drivers/infiniband/hw/nes/nes.h
@@ -64,8 +64,9 @@
* NetEffect PCI vendor id and NE010 PCI device id.
*/
#ifndef PCI_VENDOR_ID_NETEFFECT /* not in pci.ids yet */
-#define PCI_VENDOR_ID_NETEFFECT 0x1678
-#define PCI_DEVICE_ID_NETEFFECT_NE020 0x0100
+#define PCI_VENDOR_ID_NETEFFECT 0x1678
+#define PCI_DEVICE_ID_NETEFFECT_NE020 0x0100
+#define PCI_DEVICE_ID_NETEFFECT_NE020_KR 0x0110
#endif
#define NE020_REV 4
@@ -193,8 +194,8 @@
extern u32 cm_packets_received;
extern u32 cm_packets_dropped;
extern u32 cm_packets_retrans;
-extern u32 cm_listens_created;
-extern u32 cm_listens_destroyed;
+extern atomic_t cm_listens_created;
+extern atomic_t cm_listens_destroyed;
extern u32 cm_backlog_drops;
extern atomic_t cm_loopbacks;
extern atomic_t cm_nodes_created;
diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c
index 39468c27..2a49ee4 100644
--- a/drivers/infiniband/hw/nes/nes_cm.c
+++ b/drivers/infiniband/hw/nes/nes_cm.c
@@ -67,8 +67,8 @@
u32 cm_packets_retrans;
u32 cm_packets_created;
u32 cm_packets_received;
-u32 cm_listens_created;
-u32 cm_listens_destroyed;
+atomic_t cm_listens_created;
+atomic_t cm_listens_destroyed;
u32 cm_backlog_drops;
atomic_t cm_loopbacks;
atomic_t cm_nodes_created;
@@ -1011,9 +1011,10 @@
event.cm_info.loc_port =
loopback->loc_port;
event.cm_info.cm_id = loopback->cm_id;
+ add_ref_cm_node(loopback);
+ loopback->state = NES_CM_STATE_CLOSED;
cm_event_connect_error(&event);
cm_node->state = NES_CM_STATE_LISTENER_DESTROYED;
- loopback->state = NES_CM_STATE_CLOSED;
rem_ref_cm_node(cm_node->cm_core,
cm_node);
@@ -1042,7 +1043,7 @@
kfree(listener);
listener = NULL;
ret = 0;
- cm_listens_destroyed++;
+ atomic_inc(&cm_listens_destroyed);
} else {
spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
}
@@ -3172,7 +3173,7 @@
g_cm_core->api->stop_listener(g_cm_core, (void *)cm_node);
return err;
}
- cm_listens_created++;
+ atomic_inc(&cm_listens_created);
}
cm_id->add_ref(cm_id);
diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c
index b1c2cbb..ce7f538 100644
--- a/drivers/infiniband/hw/nes/nes_hw.c
+++ b/drivers/infiniband/hw/nes/nes_hw.c
@@ -748,16 +748,28 @@
if (hw_rev != NE020_REV) {
/* init serdes 0 */
- if (wide_ppm_offset && (nesadapter->phy_type[0] == NES_PHY_TYPE_CX4))
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL0, 0x000FFFAA);
- else
+ switch (nesadapter->phy_type[0]) {
+ case NES_PHY_TYPE_CX4:
+ if (wide_ppm_offset)
+ nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL0, 0x000FFFAA);
+ else
+ nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL0, 0x000000FF);
+ break;
+ case NES_PHY_TYPE_KR:
nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL0, 0x000000FF);
-
- if (nesadapter->phy_type[0] == NES_PHY_TYPE_PUMA_1G) {
+ nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_EMP0, 0x00000000);
+ break;
+ case NES_PHY_TYPE_PUMA_1G:
+ nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL0, 0x000000FF);
sds = nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0);
sds |= 0x00000100;
nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0, sds);
+ break;
+ default:
+ nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL0, 0x000000FF);
+ break;
}
+
if (!OneG_Mode)
nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_HIGHZ_LANE_MODE0, 0x11110000);
@@ -778,6 +790,9 @@
if (wide_ppm_offset)
nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL1, 0x000FFFAA);
break;
+ case NES_PHY_TYPE_KR:
+ nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_EMP1, 0x00000000);
+ break;
case NES_PHY_TYPE_PUMA_1G:
sds = nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1);
sds |= 0x000000100;
@@ -1279,115 +1294,100 @@
/**
- * nes_init_phy
+ * nes_init_1g_phy
*/
-int nes_init_phy(struct nes_device *nesdev)
+int nes_init_1g_phy(struct nes_device *nesdev, u8 phy_type, u8 phy_index)
{
- struct nes_adapter *nesadapter = nesdev->nesadapter;
+ u32 counter = 0;
+ u16 phy_data;
+ int ret = 0;
+
+ nes_read_1G_phy_reg(nesdev, 1, phy_index, &phy_data);
+ nes_write_1G_phy_reg(nesdev, 23, phy_index, 0xb000);
+
+ /* Reset the PHY */
+ nes_write_1G_phy_reg(nesdev, 0, phy_index, 0x8000);
+ udelay(100);
+ counter = 0;
+ do {
+ nes_read_1G_phy_reg(nesdev, 0, phy_index, &phy_data);
+ if (counter++ > 100) {
+ ret = -1;
+ break;
+ }
+ } while (phy_data & 0x8000);
+
+ /* Setting no phy loopback */
+ phy_data &= 0xbfff;
+ phy_data |= 0x1140;
+ nes_write_1G_phy_reg(nesdev, 0, phy_index, phy_data);
+ nes_read_1G_phy_reg(nesdev, 0, phy_index, &phy_data);
+ nes_read_1G_phy_reg(nesdev, 0x17, phy_index, &phy_data);
+ nes_read_1G_phy_reg(nesdev, 0x1e, phy_index, &phy_data);
+
+ /* Setting the interrupt mask */
+ nes_read_1G_phy_reg(nesdev, 0x19, phy_index, &phy_data);
+ nes_write_1G_phy_reg(nesdev, 0x19, phy_index, 0xffee);
+ nes_read_1G_phy_reg(nesdev, 0x19, phy_index, &phy_data);
+
+ /* turning on flow control */
+ nes_read_1G_phy_reg(nesdev, 4, phy_index, &phy_data);
+ nes_write_1G_phy_reg(nesdev, 4, phy_index, (phy_data & ~(0x03E0)) | 0xc00);
+ nes_read_1G_phy_reg(nesdev, 4, phy_index, &phy_data);
+
+ /* Clear Half duplex */
+ nes_read_1G_phy_reg(nesdev, 9, phy_index, &phy_data);
+ nes_write_1G_phy_reg(nesdev, 9, phy_index, phy_data & ~(0x0100));
+ nes_read_1G_phy_reg(nesdev, 9, phy_index, &phy_data);
+
+ nes_read_1G_phy_reg(nesdev, 0, phy_index, &phy_data);
+ nes_write_1G_phy_reg(nesdev, 0, phy_index, phy_data | 0x0300);
+
+ return ret;
+}
+
+
+/**
+ * nes_init_2025_phy
+ */
+int nes_init_2025_phy(struct nes_device *nesdev, u8 phy_type, u8 phy_index)
+{
+ u32 temp_phy_data = 0;
+ u32 temp_phy_data2 = 0;
u32 counter = 0;
u32 sds;
u32 mac_index = nesdev->mac_index;
- u32 tx_config = 0;
- u16 phy_data;
- u32 temp_phy_data = 0;
- u32 temp_phy_data2 = 0;
- u8 phy_type = nesadapter->phy_type[mac_index];
- u8 phy_index = nesadapter->phy_index[mac_index];
+ int ret = 0;
+ unsigned int first_attempt = 1;
- if ((nesadapter->OneG_Mode) &&
- (phy_type != NES_PHY_TYPE_PUMA_1G)) {
- nes_debug(NES_DBG_PHY, "1G PHY, mac_index = %d.\n", mac_index);
- if (phy_type == NES_PHY_TYPE_1G) {
- tx_config = nes_read_indexed(nesdev, NES_IDX_MAC_TX_CONFIG);
- tx_config &= 0xFFFFFFE3;
- tx_config |= 0x04;
- nes_write_indexed(nesdev, NES_IDX_MAC_TX_CONFIG, tx_config);
- }
+ /* Check firmware heartbeat */
+ nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7ee);
+ temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
+ udelay(1500);
+ nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7ee);
+ temp_phy_data2 = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
- nes_read_1G_phy_reg(nesdev, 1, phy_index, &phy_data);
- nes_write_1G_phy_reg(nesdev, 23, phy_index, 0xb000);
-
- /* Reset the PHY */
- nes_write_1G_phy_reg(nesdev, 0, phy_index, 0x8000);
- udelay(100);
- counter = 0;
- do {
- nes_read_1G_phy_reg(nesdev, 0, phy_index, &phy_data);
- if (counter++ > 100)
- break;
- } while (phy_data & 0x8000);
-
- /* Setting no phy loopback */
- phy_data &= 0xbfff;
- phy_data |= 0x1140;
- nes_write_1G_phy_reg(nesdev, 0, phy_index, phy_data);
- nes_read_1G_phy_reg(nesdev, 0, phy_index, &phy_data);
- nes_read_1G_phy_reg(nesdev, 0x17, phy_index, &phy_data);
- nes_read_1G_phy_reg(nesdev, 0x1e, phy_index, &phy_data);
-
- /* Setting the interrupt mask */
- nes_read_1G_phy_reg(nesdev, 0x19, phy_index, &phy_data);
- nes_write_1G_phy_reg(nesdev, 0x19, phy_index, 0xffee);
- nes_read_1G_phy_reg(nesdev, 0x19, phy_index, &phy_data);
-
- /* turning on flow control */
- nes_read_1G_phy_reg(nesdev, 4, phy_index, &phy_data);
- nes_write_1G_phy_reg(nesdev, 4, phy_index, (phy_data & ~(0x03E0)) | 0xc00);
- nes_read_1G_phy_reg(nesdev, 4, phy_index, &phy_data);
-
- /* Clear Half duplex */
- nes_read_1G_phy_reg(nesdev, 9, phy_index, &phy_data);
- nes_write_1G_phy_reg(nesdev, 9, phy_index, phy_data & ~(0x0100));
- nes_read_1G_phy_reg(nesdev, 9, phy_index, &phy_data);
-
- nes_read_1G_phy_reg(nesdev, 0, phy_index, &phy_data);
- nes_write_1G_phy_reg(nesdev, 0, phy_index, phy_data | 0x0300);
-
- return 0;
- }
-
- if ((phy_type == NES_PHY_TYPE_IRIS) ||
- (phy_type == NES_PHY_TYPE_ARGUS) ||
- (phy_type == NES_PHY_TYPE_SFP_D)) {
- /* setup 10G MDIO operation */
- tx_config = nes_read_indexed(nesdev, NES_IDX_MAC_TX_CONFIG);
- tx_config &= 0xFFFFFFE3;
- tx_config |= 0x15;
- nes_write_indexed(nesdev, NES_IDX_MAC_TX_CONFIG, tx_config);
- }
- if ((phy_type == NES_PHY_TYPE_ARGUS) ||
- (phy_type == NES_PHY_TYPE_SFP_D)) {
- u32 first_time = 1;
-
- /* Check firmware heartbeat */
- nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7ee);
+ if (temp_phy_data != temp_phy_data2) {
+ nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7fd);
temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
- udelay(1500);
- nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7ee);
- temp_phy_data2 = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
+ if ((temp_phy_data & 0xff) > 0x20)
+ return 0;
+ printk(PFX "Reinitialize external PHY\n");
+ }
- if (temp_phy_data != temp_phy_data2) {
- nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7fd);
- temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
- if ((temp_phy_data & 0xff) > 0x20)
- return 0;
- printk(PFX "Reinitializing PHY\n");
- }
+ /* no heartbeat, configure the PHY */
+ nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0x0000, 0x8000);
+ nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc300, 0x0000);
+ nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc316, 0x000A);
+ nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc318, 0x0052);
- /* no heartbeat, configure the PHY */
- nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0x0000, 0x8000);
- nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc300, 0x0000);
+ switch (phy_type) {
+ case NES_PHY_TYPE_ARGUS:
nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc316, 0x000A);
nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc318, 0x0052);
- if (phy_type == NES_PHY_TYPE_ARGUS) {
- nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc302, 0x000C);
- nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc319, 0x0008);
- nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0027, 0x0001);
- } else {
- nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc302, 0x0004);
- nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc319, 0x0038);
- nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0027, 0x0013);
- }
+ nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc302, 0x000C);
+ nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc319, 0x0008);
+ nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0027, 0x0001);
nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc31a, 0x0098);
nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0026, 0x0E00);
@@ -1395,71 +1395,151 @@
nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd006, 0x0007);
nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd007, 0x000A);
nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd008, 0x0009);
+ break;
- nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0028, 0xA528);
+ case NES_PHY_TYPE_SFP_D:
+ nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc316, 0x000A);
+ nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc318, 0x0052);
+ nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc302, 0x0004);
+ nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc319, 0x0038);
+ nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0027, 0x0013);
+ nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc31a, 0x0098);
+ nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0026, 0x0E00);
- /* Bring PHY out of reset */
- nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc300, 0x0002);
+ /* setup LEDs */
+ nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd006, 0x0007);
+ nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd007, 0x000A);
+ nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd008, 0x0009);
+ break;
- /* Check for heartbeat */
- counter = 0;
- mdelay(690);
+ case NES_PHY_TYPE_KR:
+ nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc316, 0x000A);
+ nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc318, 0x0052);
+ nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc302, 0x000C);
+ nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc319, 0x0010);
+ nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0027, 0x0013);
+ nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc31a, 0x0080);
+ nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0026, 0x0E00);
+
+ /* setup LEDs */
+ nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd006, 0x000B);
+ nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd007, 0x0003);
+ nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd008, 0x0004);
+
+ nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0022, 0x406D);
+ nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0023, 0x0020);
+ break;
+ }
+
+ nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0028, 0xA528);
+
+ /* Bring PHY out of reset */
+ nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc300, 0x0002);
+
+ /* Check for heartbeat */
+ counter = 0;
+ mdelay(690);
+ nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7ee);
+ temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
+ do {
+ if (counter++ > 150) {
+ printk(PFX "No PHY heartbeat\n");
+ break;
+ }
+ mdelay(1);
nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7ee);
+ temp_phy_data2 = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
+ } while ((temp_phy_data2 == temp_phy_data));
+
+ /* wait for tracking */
+ counter = 0;
+ do {
+ nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7fd);
temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
- do {
- if (counter++ > 150) {
- printk(PFX "No PHY heartbeat\n");
+ if (counter++ > 300) {
+ if (((temp_phy_data & 0xff) == 0x0) && first_attempt) {
+ first_attempt = 0;
+ counter = 0;
+ /* reset AMCC PHY and try again */
+ nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0xe854, 0x00c0);
+ nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0xe854, 0x0040);
+ continue;
+ } else {
+ ret = 1;
break;
}
- mdelay(1);
- nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7ee);
- temp_phy_data2 = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
- } while ((temp_phy_data2 == temp_phy_data));
+ }
+ mdelay(10);
+ } while ((temp_phy_data & 0xff) < 0x30);
- /* wait for tracking */
- counter = 0;
- do {
- nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7fd);
- temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
- if (counter++ > 300) {
- if (((temp_phy_data & 0xff) == 0x0) && first_time) {
- first_time = 0;
- counter = 0;
- /* reset AMCC PHY and try again */
- nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0xe854, 0x00c0);
- nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0xe854, 0x0040);
- continue;
- } else {
- printk(PFX "PHY did not track\n");
- break;
- }
- }
- mdelay(10);
- } while ((temp_phy_data & 0xff) < 0x30);
-
- /* setup signal integrity */
- nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd003, 0x0000);
- nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xF00D, 0x00FE);
- nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xF00E, 0x0032);
+ /* setup signal integrity */
+ nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd003, 0x0000);
+ nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xF00D, 0x00FE);
+ nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xF00E, 0x0032);
+ if (phy_type == NES_PHY_TYPE_KR) {
+ nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xF00F, 0x000C);
+ } else {
nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xF00F, 0x0002);
nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc314, 0x0063);
-
- /* reset serdes */
- sds = nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0 +
- mac_index * 0x200);
- sds |= 0x1;
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0 +
- mac_index * 0x200, sds);
- sds &= 0xfffffffe;
- nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0 +
- mac_index * 0x200, sds);
-
- counter = 0;
- while (((nes_read32(nesdev->regs + NES_SOFTWARE_RESET) & 0x00000040) != 0x00000040)
- && (counter++ < 5000))
- ;
}
- return 0;
+
+ /* reset serdes */
+ sds = nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0 + mac_index * 0x200);
+ sds |= 0x1;
+ nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0 + mac_index * 0x200, sds);
+ sds &= 0xfffffffe;
+ nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0 + mac_index * 0x200, sds);
+
+ counter = 0;
+ while (((nes_read32(nesdev->regs + NES_SOFTWARE_RESET) & 0x00000040) != 0x00000040)
+ && (counter++ < 5000))
+ ;
+
+ return ret;
+}
+
+
+/**
+ * nes_init_phy
+ */
+int nes_init_phy(struct nes_device *nesdev)
+{
+ struct nes_adapter *nesadapter = nesdev->nesadapter;
+ u32 mac_index = nesdev->mac_index;
+ u32 tx_config = 0;
+ unsigned long flags;
+ u8 phy_type = nesadapter->phy_type[mac_index];
+ u8 phy_index = nesadapter->phy_index[mac_index];
+ int ret = 0;
+
+ tx_config = nes_read_indexed(nesdev, NES_IDX_MAC_TX_CONFIG);
+ if (phy_type == NES_PHY_TYPE_1G) {
+ /* setup 1G MDIO operation */
+ tx_config &= 0xFFFFFFE3;
+ tx_config |= 0x04;
+ } else {
+ /* setup 10G MDIO operation */
+ tx_config &= 0xFFFFFFE3;
+ tx_config |= 0x15;
+ }
+ nes_write_indexed(nesdev, NES_IDX_MAC_TX_CONFIG, tx_config);
+
+ spin_lock_irqsave(&nesdev->nesadapter->phy_lock, flags);
+
+ switch (phy_type) {
+ case NES_PHY_TYPE_1G:
+ ret = nes_init_1g_phy(nesdev, phy_type, phy_index);
+ break;
+ case NES_PHY_TYPE_ARGUS:
+ case NES_PHY_TYPE_SFP_D:
+ case NES_PHY_TYPE_KR:
+ ret = nes_init_2025_phy(nesdev, phy_type, phy_index);
+ break;
+ }
+
+ spin_unlock_irqrestore(&nesdev->nesadapter->phy_lock, flags);
+
+ return ret;
}
@@ -2460,23 +2540,9 @@
}
} else {
switch (nesadapter->phy_type[mac_index]) {
- case NES_PHY_TYPE_IRIS:
- nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 1);
- temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
- u32temp = 20;
- do {
- nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 1);
- phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
- if ((phy_data == temp_phy_data) || (!(--u32temp)))
- break;
- temp_phy_data = phy_data;
- } while (1);
- nes_debug(NES_DBG_PHY, "%s: Phy data = 0x%04X, link was %s.\n",
- __func__, phy_data, nesadapter->mac_link_down[mac_index] ? "DOWN" : "UP");
- break;
-
case NES_PHY_TYPE_ARGUS:
case NES_PHY_TYPE_SFP_D:
+ case NES_PHY_TYPE_KR:
/* clear the alarms */
nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 4, 0x0008);
nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 4, 0xc001);
@@ -3352,8 +3418,6 @@
u16 async_event_id;
u8 tcp_state;
u8 iwarp_state;
- int must_disconn = 1;
- int must_terminate = 0;
struct ib_event ibevent;
nes_debug(NES_DBG_AEQ, "\n");
@@ -3367,6 +3431,8 @@
BUG_ON(!context);
}
+ /* context is nesqp unless async_event_id == CQ ERROR */
+ nesqp = (struct nes_qp *)(unsigned long)context;
async_event_id = (u16)aeq_info;
tcp_state = (aeq_info & NES_AEQE_TCP_STATE_MASK) >> NES_AEQE_TCP_STATE_SHIFT;
iwarp_state = (aeq_info & NES_AEQE_IWARP_STATE_MASK) >> NES_AEQE_IWARP_STATE_SHIFT;
@@ -3378,8 +3444,6 @@
switch (async_event_id) {
case NES_AEQE_AEID_LLP_FIN_RECEIVED:
- nesqp = (struct nes_qp *)(unsigned long)context;
-
if (nesqp->term_flags)
return; /* Ignore it, wait for close complete */
@@ -3394,79 +3458,48 @@
async_event_id, nesqp->last_aeq, tcp_state);
}
- if ((tcp_state != NES_AEQE_TCP_STATE_CLOSE_WAIT) ||
- (nesqp->ibqp_state != IB_QPS_RTS)) {
- /* FIN Received but tcp state or IB state moved on,
- should expect a close complete */
- return;
- }
-
+ break;
case NES_AEQE_AEID_LLP_CLOSE_COMPLETE:
- nesqp = (struct nes_qp *)(unsigned long)context;
if (nesqp->term_flags) {
nes_terminate_done(nesqp, 0);
return;
}
+ spin_lock_irqsave(&nesqp->lock, flags);
+ nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_CLOSING;
+ spin_unlock_irqrestore(&nesqp->lock, flags);
+ nes_hw_modify_qp(nesdev, nesqp, NES_CQP_QP_IWARP_STATE_CLOSING, 0, 0);
+ nes_cm_disconn(nesqp);
+ break;
- case NES_AEQE_AEID_LLP_CONNECTION_RESET:
case NES_AEQE_AEID_RESET_SENT:
- nesqp = (struct nes_qp *)(unsigned long)context;
- if (async_event_id == NES_AEQE_AEID_RESET_SENT) {
- tcp_state = NES_AEQE_TCP_STATE_CLOSED;
- }
+ tcp_state = NES_AEQE_TCP_STATE_CLOSED;
spin_lock_irqsave(&nesqp->lock, flags);
nesqp->hw_iwarp_state = iwarp_state;
nesqp->hw_tcp_state = tcp_state;
nesqp->last_aeq = async_event_id;
-
- if ((tcp_state == NES_AEQE_TCP_STATE_CLOSED) ||
- (tcp_state == NES_AEQE_TCP_STATE_TIME_WAIT)) {
- nesqp->hte_added = 0;
- next_iwarp_state = NES_CQP_QP_IWARP_STATE_ERROR | NES_CQP_QP_DEL_HTE;
- }
-
- if ((nesqp->ibqp_state == IB_QPS_RTS) &&
- ((tcp_state == NES_AEQE_TCP_STATE_CLOSE_WAIT) ||
- (async_event_id == NES_AEQE_AEID_LLP_CONNECTION_RESET))) {
- switch (nesqp->hw_iwarp_state) {
- case NES_AEQE_IWARP_STATE_RTS:
- next_iwarp_state = NES_CQP_QP_IWARP_STATE_CLOSING;
- nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_CLOSING;
- break;
- case NES_AEQE_IWARP_STATE_TERMINATE:
- must_disconn = 0; /* terminate path takes care of disconn */
- if (nesqp->term_flags == 0)
- must_terminate = 1;
- break;
- }
- } else {
- if (async_event_id == NES_AEQE_AEID_LLP_FIN_RECEIVED) {
- /* FIN Received but ib state not RTS,
- close complete will be on its way */
- must_disconn = 0;
- }
- }
+ nesqp->hte_added = 0;
spin_unlock_irqrestore(&nesqp->lock, flags);
+ next_iwarp_state = NES_CQP_QP_IWARP_STATE_ERROR | NES_CQP_QP_DEL_HTE;
+ nes_hw_modify_qp(nesdev, nesqp, next_iwarp_state, 0, 0);
+ nes_cm_disconn(nesqp);
+ break;
- if (must_terminate)
- nes_terminate_connection(nesdev, nesqp, aeqe, IB_EVENT_QP_FATAL);
- else if (must_disconn) {
- if (next_iwarp_state) {
- nes_debug(NES_DBG_AEQ, "issuing hw modifyqp for QP%u. next state = 0x%08X\n",
- nesqp->hwqp.qp_id, next_iwarp_state);
- nes_hw_modify_qp(nesdev, nesqp, next_iwarp_state, 0, 0);
- }
- nes_cm_disconn(nesqp);
- }
+ case NES_AEQE_AEID_LLP_CONNECTION_RESET:
+ if (atomic_read(&nesqp->close_timer_started))
+ return;
+ spin_lock_irqsave(&nesqp->lock, flags);
+ nesqp->hw_iwarp_state = iwarp_state;
+ nesqp->hw_tcp_state = tcp_state;
+ nesqp->last_aeq = async_event_id;
+ spin_unlock_irqrestore(&nesqp->lock, flags);
+ nes_cm_disconn(nesqp);
break;
case NES_AEQE_AEID_TERMINATE_SENT:
- nesqp = (struct nes_qp *)(unsigned long)context;
nes_terminate_send_fin(nesdev, nesqp, aeqe);
break;
case NES_AEQE_AEID_LLP_TERMINATE_RECEIVED:
- nesqp = (struct nes_qp *)(unsigned long)context;
nes_terminate_received(nesdev, nesqp, aeqe);
break;
@@ -3480,7 +3513,8 @@
case NES_AEQE_AEID_DDP_UBE_DDP_MESSAGE_TOO_LONG_FOR_AVAILABLE_BUFFER:
case NES_AEQE_AEID_AMP_BOUNDS_VIOLATION:
case NES_AEQE_AEID_AMP_TO_WRAP:
- nesqp = (struct nes_qp *)(unsigned long)context;
+ printk(KERN_ERR PFX "QP[%u] async_event_id=0x%04X IB_EVENT_QP_ACCESS_ERR\n",
+ nesqp->hwqp.qp_id, async_event_id);
nes_terminate_connection(nesdev, nesqp, aeqe, IB_EVENT_QP_ACCESS_ERR);
break;
@@ -3488,7 +3522,6 @@
case NES_AEQE_AEID_LLP_SEGMENT_TOO_SMALL:
case NES_AEQE_AEID_DDP_UBE_INVALID_MO:
case NES_AEQE_AEID_DDP_UBE_INVALID_QN:
- nesqp = (struct nes_qp *)(unsigned long)context;
if (iwarp_opcode(nesqp, aeq_info) > IWARP_OPCODE_TERM) {
aeq_info &= 0xffff0000;
aeq_info |= NES_AEQE_AEID_RDMAP_ROE_UNEXPECTED_OPCODE;
@@ -3530,7 +3563,8 @@
case NES_AEQE_AEID_STAG_ZERO_INVALID:
case NES_AEQE_AEID_ROE_INVALID_RDMA_READ_REQUEST:
case NES_AEQE_AEID_ROE_INVALID_RDMA_WRITE_OR_READ_RESP:
- nesqp = (struct nes_qp *)(unsigned long)context;
+ printk(KERN_ERR PFX "QP[%u] async_event_id=0x%04X IB_EVENT_QP_FATAL\n",
+ nesqp->hwqp.qp_id, async_event_id);
nes_terminate_connection(nesdev, nesqp, aeqe, IB_EVENT_QP_FATAL);
break;
diff --git a/drivers/infiniband/hw/nes/nes_hw.h b/drivers/infiniband/hw/nes/nes_hw.h
index 084be0e..9b1e7f8 100644
--- a/drivers/infiniband/hw/nes/nes_hw.h
+++ b/drivers/infiniband/hw/nes/nes_hw.h
@@ -37,12 +37,12 @@
#define NES_PHY_TYPE_CX4 1
#define NES_PHY_TYPE_1G 2
-#define NES_PHY_TYPE_IRIS 3
#define NES_PHY_TYPE_ARGUS 4
#define NES_PHY_TYPE_PUMA_1G 5
#define NES_PHY_TYPE_PUMA_10G 6
#define NES_PHY_TYPE_GLADIUS 7
#define NES_PHY_TYPE_SFP_D 8
+#define NES_PHY_TYPE_KR 9
#define NES_MULTICAST_PF_MAX 8
diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c
index ab11027..7dd6ce6 100644
--- a/drivers/infiniband/hw/nes/nes_nic.c
+++ b/drivers/infiniband/hw/nes/nes_nic.c
@@ -1230,8 +1230,8 @@
target_stat_values[++index] = cm_packets_received;
target_stat_values[++index] = cm_packets_dropped;
target_stat_values[++index] = cm_packets_retrans;
- target_stat_values[++index] = cm_listens_created;
- target_stat_values[++index] = cm_listens_destroyed;
+ target_stat_values[++index] = atomic_read(&cm_listens_created);
+ target_stat_values[++index] = atomic_read(&cm_listens_destroyed);
target_stat_values[++index] = cm_backlog_drops;
target_stat_values[++index] = atomic_read(&cm_loopbacks);
target_stat_values[++index] = atomic_read(&cm_nodes_created);
@@ -1461,9 +1461,9 @@
}
return 0;
}
- if ((phy_type == NES_PHY_TYPE_IRIS) ||
- (phy_type == NES_PHY_TYPE_ARGUS) ||
- (phy_type == NES_PHY_TYPE_SFP_D)) {
+ if ((phy_type == NES_PHY_TYPE_ARGUS) ||
+ (phy_type == NES_PHY_TYPE_SFP_D) ||
+ (phy_type == NES_PHY_TYPE_KR)) {
et_cmd->transceiver = XCVR_EXTERNAL;
et_cmd->port = PORT_FIBRE;
et_cmd->supported = SUPPORTED_FIBRE;
@@ -1583,8 +1583,7 @@
struct net_device *netdev;
struct nic_qp_map *curr_qp_map;
u32 u32temp;
- u16 phy_data;
- u16 temp_phy_data;
+ u8 phy_type = nesdev->nesadapter->phy_type[nesdev->mac_index];
netdev = alloc_etherdev(sizeof(struct nes_vnic));
if (!netdev) {
@@ -1692,65 +1691,23 @@
if ((nesdev->netdev_count == 0) &&
((PCI_FUNC(nesdev->pcidev->devfn) == nesdev->mac_index) ||
- ((nesdev->nesadapter->phy_type[nesdev->mac_index] == NES_PHY_TYPE_PUMA_1G) &&
+ ((phy_type == NES_PHY_TYPE_PUMA_1G) &&
(((PCI_FUNC(nesdev->pcidev->devfn) == 1) && (nesdev->mac_index == 2)) ||
((PCI_FUNC(nesdev->pcidev->devfn) == 2) && (nesdev->mac_index == 1)))))) {
- /*
- * nes_debug(NES_DBG_INIT, "Setting up PHY interrupt mask. Using register index 0x%04X\n",
- * NES_IDX_PHY_PCS_CONTROL_STATUS0 + (0x200 * (nesvnic->logical_port & 1)));
- */
u32temp = nes_read_indexed(nesdev, NES_IDX_PHY_PCS_CONTROL_STATUS0 +
(0x200 * (nesdev->mac_index & 1)));
- if (nesdev->nesadapter->phy_type[nesdev->mac_index] != NES_PHY_TYPE_PUMA_1G) {
+ if (phy_type != NES_PHY_TYPE_PUMA_1G) {
u32temp |= 0x00200000;
nes_write_indexed(nesdev, NES_IDX_PHY_PCS_CONTROL_STATUS0 +
(0x200 * (nesdev->mac_index & 1)), u32temp);
}
- u32temp = nes_read_indexed(nesdev, NES_IDX_PHY_PCS_CONTROL_STATUS0 +
- (0x200 * (nesdev->mac_index & 1)));
-
- if ((u32temp&0x0f1f0000) == 0x0f0f0000) {
- if (nesdev->nesadapter->phy_type[nesdev->mac_index] == NES_PHY_TYPE_IRIS) {
- nes_init_phy(nesdev);
- nes_read_10G_phy_reg(nesdev, nesdev->nesadapter->phy_index[nesdev->mac_index], 1, 1);
- temp_phy_data = (u16)nes_read_indexed(nesdev,
- NES_IDX_MAC_MDIO_CONTROL);
- u32temp = 20;
- do {
- nes_read_10G_phy_reg(nesdev, nesdev->nesadapter->phy_index[nesdev->mac_index], 1, 1);
- phy_data = (u16)nes_read_indexed(nesdev,
- NES_IDX_MAC_MDIO_CONTROL);
- if ((phy_data == temp_phy_data) || (!(--u32temp)))
- break;
- temp_phy_data = phy_data;
- } while (1);
- if (phy_data & 4) {
- nes_debug(NES_DBG_INIT, "The Link is UP!!.\n");
- nesvnic->linkup = 1;
- } else {
- nes_debug(NES_DBG_INIT, "The Link is DOWN!!.\n");
- }
- } else {
- nes_debug(NES_DBG_INIT, "The Link is UP!!.\n");
- nesvnic->linkup = 1;
- }
- } else if (nesdev->nesadapter->phy_type[nesdev->mac_index] == NES_PHY_TYPE_PUMA_1G) {
- nes_debug(NES_DBG_INIT, "mac_index=%d, logical_port=%d, u32temp=0x%04X, PCI_FUNC=%d\n",
- nesdev->mac_index, nesvnic->logical_port, u32temp, PCI_FUNC(nesdev->pcidev->devfn));
- if (((nesdev->mac_index < 2) && ((u32temp&0x01010000) == 0x01010000)) ||
- ((nesdev->mac_index > 1) && ((u32temp&0x02020000) == 0x02020000))) {
- nes_debug(NES_DBG_INIT, "The Link is UP!!.\n");
- nesvnic->linkup = 1;
- }
- }
/* clear the MAC interrupt status, assumes direct logical to physical mapping */
u32temp = nes_read_indexed(nesdev, NES_IDX_MAC_INT_STATUS + (0x200 * nesdev->mac_index));
nes_debug(NES_DBG_INIT, "Phy interrupt status = 0x%X.\n", u32temp);
nes_write_indexed(nesdev, NES_IDX_MAC_INT_STATUS + (0x200 * nesdev->mac_index), u32temp);
- if (nesdev->nesadapter->phy_type[nesdev->mac_index] != NES_PHY_TYPE_IRIS)
- nes_init_phy(nesdev);
+ nes_init_phy(nesdev);
}
diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c
index 64d3136..815725f 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ b/drivers/infiniband/hw/nes/nes_verbs.c
@@ -228,7 +228,7 @@
/* Check for SQ overflow */
if (((head + (2 * qsize) - nesqp->hwqp.sq_tail) % qsize) == (qsize - 1)) {
spin_unlock_irqrestore(&nesqp->lock, flags);
- return -EINVAL;
+ return -ENOMEM;
}
wqe = &nesqp->hwqp.sq_vbase[head];
@@ -3294,7 +3294,7 @@
/* Check for SQ overflow */
if (((head + (2 * qsize) - nesqp->hwqp.sq_tail) % qsize) == (qsize - 1)) {
- err = -EINVAL;
+ err = -ENOMEM;
break;
}
@@ -3577,7 +3577,7 @@
}
/* Check for RQ overflow */
if (((head + (2 * qsize) - nesqp->hwqp.rq_tail) % qsize) == (qsize - 1)) {
- err = -EINVAL;
+ err = -ENOMEM;
break;
}
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
index e9795f6..d10b4ec 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
@@ -55,9 +55,7 @@
struct ipoib_dev_priv *priv = netdev_priv(dev);
coal->rx_coalesce_usecs = priv->ethtool.coalesce_usecs;
- coal->tx_coalesce_usecs = priv->ethtool.coalesce_usecs;
coal->rx_max_coalesced_frames = priv->ethtool.max_coalesced_frames;
- coal->tx_max_coalesced_frames = priv->ethtool.max_coalesced_frames;
return 0;
}
@@ -69,10 +67,8 @@
int ret;
/*
- * Since IPoIB uses a single CQ for both rx and tx, we assume
- * that rx params dictate the configuration. These values are
- * saved in the private data and returned when ipoib_get_coalesce()
- * is called.
+ * These values are saved in the private data and returned
+ * when ipoib_get_coalesce() is called
*/
if (coal->rx_coalesce_usecs > 0xffff ||
coal->rx_max_coalesced_frames > 0xffff)
@@ -85,8 +81,6 @@
return ret;
}
- coal->tx_coalesce_usecs = coal->rx_coalesce_usecs;
- coal->tx_max_coalesced_frames = coal->rx_max_coalesced_frames;
priv->ethtool.coalesce_usecs = coal->rx_coalesce_usecs;
priv->ethtool.max_coalesced_frames = coal->rx_max_coalesced_frames;
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c
index 5f7a6fc..71237f8f 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.c
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.c
@@ -128,6 +128,28 @@
return 0;
}
+int iser_initialize_task_headers(struct iscsi_task *task,
+ struct iser_tx_desc *tx_desc)
+{
+ struct iscsi_iser_conn *iser_conn = task->conn->dd_data;
+ struct iser_device *device = iser_conn->ib_conn->device;
+ struct iscsi_iser_task *iser_task = task->dd_data;
+ u64 dma_addr;
+
+ dma_addr = ib_dma_map_single(device->ib_device, (void *)tx_desc,
+ ISER_HEADERS_LEN, DMA_TO_DEVICE);
+ if (ib_dma_mapping_error(device->ib_device, dma_addr))
+ return -ENOMEM;
+
+ tx_desc->dma_addr = dma_addr;
+ tx_desc->tx_sg[0].addr = tx_desc->dma_addr;
+ tx_desc->tx_sg[0].length = ISER_HEADERS_LEN;
+ tx_desc->tx_sg[0].lkey = device->mr->lkey;
+
+ iser_task->headers_initialized = 1;
+ iser_task->iser_conn = iser_conn;
+ return 0;
+}
/**
* iscsi_iser_task_init - Initialize task
* @task: iscsi task
@@ -137,17 +159,17 @@
static int
iscsi_iser_task_init(struct iscsi_task *task)
{
- struct iscsi_iser_conn *iser_conn = task->conn->dd_data;
struct iscsi_iser_task *iser_task = task->dd_data;
+ if (!iser_task->headers_initialized)
+ if (iser_initialize_task_headers(task, &iser_task->desc))
+ return -ENOMEM;
+
/* mgmt task */
- if (!task->sc) {
- iser_task->desc.data = task->data;
+ if (!task->sc)
return 0;
- }
iser_task->command_sent = 0;
- iser_task->iser_conn = iser_conn;
iser_task_rdma_init(iser_task);
return 0;
}
@@ -168,7 +190,7 @@
{
int error = 0;
- iser_dbg("task deq [cid %d itt 0x%x]\n", conn->id, task->itt);
+ iser_dbg("mtask xmit [cid %d itt 0x%x]\n", conn->id, task->itt);
error = iser_send_control(conn, task);
@@ -178,9 +200,6 @@
* - if yes, the task is recycled at iscsi_complete_pdu
* - if no, the task is recycled at iser_snd_completion
*/
- if (error && error != -ENOBUFS)
- iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
-
return error;
}
@@ -232,7 +251,7 @@
task->imm_count, task->unsol_r2t.data_length);
}
- iser_dbg("task deq [cid %d itt 0x%x]\n",
+ iser_dbg("ctask xmit [cid %d itt 0x%x]\n",
conn->id, task->itt);
/* Send the cmd PDU */
@@ -248,8 +267,6 @@
error = iscsi_iser_task_xmit_unsol_data(conn, task);
iscsi_iser_task_xmit_exit:
- if (error && error != -ENOBUFS)
- iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
return error;
}
@@ -283,7 +300,7 @@
* due to issues with the login code re iser sematics
* this not set in iscsi_conn_setup - FIXME
*/
- conn->max_recv_dlength = 128;
+ conn->max_recv_dlength = ISER_RECV_DATA_SEG_LEN;
iser_conn = conn->dd_data;
conn->dd_data = iser_conn;
@@ -401,7 +418,7 @@
struct Scsi_Host *shost;
struct iser_conn *ib_conn;
- shost = iscsi_host_alloc(&iscsi_iser_sht, 0, 1);
+ shost = iscsi_host_alloc(&iscsi_iser_sht, 0, 0);
if (!shost)
return NULL;
shost->transportt = iscsi_iser_scsi_transport;
@@ -675,7 +692,7 @@
memset(&ig, 0, sizeof(struct iser_global));
ig.desc_cache = kmem_cache_create("iser_descriptors",
- sizeof (struct iser_desc),
+ sizeof(struct iser_tx_desc),
0, SLAB_HWCACHE_ALIGN,
NULL);
if (ig.desc_cache == NULL)
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h
index 9d529ca..036934c 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.h
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.h
@@ -102,9 +102,9 @@
#define ISER_MAX_TX_MISC_PDUS 6 /* NOOP_OUT(2), TEXT(1), *
* SCSI_TMFUNC(2), LOGOUT(1) */
-#define ISER_QP_MAX_RECV_DTOS (ISCSI_DEF_XMIT_CMDS_MAX + \
- ISER_MAX_RX_MISC_PDUS + \
- ISER_MAX_TX_MISC_PDUS)
+#define ISER_QP_MAX_RECV_DTOS (ISCSI_DEF_XMIT_CMDS_MAX)
+
+#define ISER_MIN_POSTED_RX (ISCSI_DEF_XMIT_CMDS_MAX >> 2)
/* the max TX (send) WR supported by the iSER QP is defined by *
* max_send_wr = T * (1 + D) + C ; D is how many inflight dataouts we expect *
@@ -132,6 +132,12 @@
__be64 read_va;
} __attribute__((packed));
+/* Constant PDU lengths calculations */
+#define ISER_HEADERS_LEN (sizeof(struct iser_hdr) + sizeof(struct iscsi_hdr))
+
+#define ISER_RECV_DATA_SEG_LEN 128
+#define ISER_RX_PAYLOAD_SIZE (ISER_HEADERS_LEN + ISER_RECV_DATA_SEG_LEN)
+#define ISER_RX_LOGIN_SIZE (ISER_HEADERS_LEN + ISCSI_DEF_MAX_RECV_SEG_LEN)
/* Length of an object name string */
#define ISER_OBJECT_NAME_SIZE 64
@@ -187,51 +193,43 @@
struct iser_mem_reg reg; /* memory registration info */
void *virt_addr;
struct iser_device *device; /* device->device for dma_unmap */
- u64 dma_addr; /* if non zero, addr for dma_unmap */
enum dma_data_direction direction; /* direction for dma_unmap */
unsigned int data_size;
- atomic_t ref_count; /* refcount, freed when dec to 0 */
-};
-
-#define MAX_REGD_BUF_VECTOR_LEN 2
-
-struct iser_dto {
- struct iscsi_iser_task *task;
- struct iser_conn *ib_conn;
- int notify_enable;
-
- /* vector of registered buffers */
- unsigned int regd_vector_len;
- struct iser_regd_buf *regd[MAX_REGD_BUF_VECTOR_LEN];
-
- /* offset into the registered buffer may be specified */
- unsigned int offset[MAX_REGD_BUF_VECTOR_LEN];
-
- /* a smaller size may be specified, if 0, then full size is used */
- unsigned int used_sz[MAX_REGD_BUF_VECTOR_LEN];
};
enum iser_desc_type {
- ISCSI_RX,
ISCSI_TX_CONTROL ,
ISCSI_TX_SCSI_COMMAND,
ISCSI_TX_DATAOUT
};
-struct iser_desc {
+struct iser_tx_desc {
struct iser_hdr iser_header;
struct iscsi_hdr iscsi_header;
- struct iser_regd_buf hdr_regd_buf;
- void *data; /* used by RX & TX_CONTROL */
- struct iser_regd_buf data_regd_buf; /* used by RX & TX_CONTROL */
enum iser_desc_type type;
- struct iser_dto dto;
+ u64 dma_addr;
+ /* sg[0] points to iser/iscsi headers, sg[1] optionally points to either
+ of immediate data, unsolicited data-out or control (login,text) */
+ struct ib_sge tx_sg[2];
+ int num_sge;
};
+#define ISER_RX_PAD_SIZE (256 - (ISER_RX_PAYLOAD_SIZE + \
+ sizeof(u64) + sizeof(struct ib_sge)))
+struct iser_rx_desc {
+ struct iser_hdr iser_header;
+ struct iscsi_hdr iscsi_header;
+ char data[ISER_RECV_DATA_SEG_LEN];
+ u64 dma_addr;
+ struct ib_sge rx_sg;
+ char pad[ISER_RX_PAD_SIZE];
+} __attribute__((packed));
+
struct iser_device {
struct ib_device *ib_device;
struct ib_pd *pd;
- struct ib_cq *cq;
+ struct ib_cq *rx_cq;
+ struct ib_cq *tx_cq;
struct ib_mr *mr;
struct tasklet_struct cq_tasklet;
struct list_head ig_list; /* entry in ig devices list */
@@ -250,15 +248,18 @@
struct ib_fmr_pool *fmr_pool; /* pool of IB FMRs */
int disc_evt_flag; /* disconn event delivered */
wait_queue_head_t wait; /* waitq for conn/disconn */
- atomic_t post_recv_buf_count; /* posted rx count */
+ int post_recv_buf_count; /* posted rx count */
atomic_t post_send_buf_count; /* posted tx count */
- atomic_t unexpected_pdu_count;/* count of received *
- * unexpected pdus *
- * not yet retired */
char name[ISER_OBJECT_NAME_SIZE];
struct iser_page_vec *page_vec; /* represents SG to fmr maps*
* maps serialized as tx is*/
struct list_head conn_list; /* entry in ig conn list */
+
+ char *login_buf;
+ u64 login_dma;
+ unsigned int rx_desc_head;
+ struct iser_rx_desc *rx_descs;
+ struct ib_recv_wr rx_wr[ISER_MIN_POSTED_RX];
};
struct iscsi_iser_conn {
@@ -267,7 +268,7 @@
};
struct iscsi_iser_task {
- struct iser_desc desc;
+ struct iser_tx_desc desc;
struct iscsi_iser_conn *iser_conn;
enum iser_task_status status;
int command_sent; /* set if command sent */
@@ -275,6 +276,7 @@
struct iser_regd_buf rdma_regd[ISER_DIRS_NUM];/* regd rdma buf */
struct iser_data_buf data[ISER_DIRS_NUM]; /* orig. data des*/
struct iser_data_buf data_copy[ISER_DIRS_NUM];/* contig. copy */
+ int headers_initialized;
};
struct iser_page_vec {
@@ -322,22 +324,17 @@
void iser_conn_terminate(struct iser_conn *ib_conn);
-void iser_rcv_completion(struct iser_desc *desc,
- unsigned long dto_xfer_len);
+void iser_rcv_completion(struct iser_rx_desc *desc,
+ unsigned long dto_xfer_len,
+ struct iser_conn *ib_conn);
-void iser_snd_completion(struct iser_desc *desc);
+void iser_snd_completion(struct iser_tx_desc *desc, struct iser_conn *ib_conn);
void iser_task_rdma_init(struct iscsi_iser_task *task);
void iser_task_rdma_finalize(struct iscsi_iser_task *task);
-void iser_dto_buffs_release(struct iser_dto *dto);
-
-int iser_regd_buff_release(struct iser_regd_buf *regd_buf);
-
-void iser_reg_single(struct iser_device *device,
- struct iser_regd_buf *regd_buf,
- enum dma_data_direction direction);
+void iser_free_rx_descriptors(struct iser_conn *ib_conn);
void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *task,
enum iser_data_dir cmd_dir);
@@ -356,11 +353,9 @@
void iser_unreg_mem(struct iser_mem_reg *mem_reg);
-int iser_post_recv(struct iser_desc *rx_desc);
-int iser_post_send(struct iser_desc *tx_desc);
-
-int iser_conn_state_comp(struct iser_conn *ib_conn,
- enum iser_ib_conn_state comp);
+int iser_post_recvl(struct iser_conn *ib_conn);
+int iser_post_recvm(struct iser_conn *ib_conn, int count);
+int iser_post_send(struct iser_conn *ib_conn, struct iser_tx_desc *tx_desc);
int iser_dma_map_task_data(struct iscsi_iser_task *iser_task,
struct iser_data_buf *data,
@@ -368,4 +363,6 @@
enum dma_data_direction dma_dir);
void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task);
+int iser_initialize_task_headers(struct iscsi_task *task,
+ struct iser_tx_desc *tx_desc);
#endif
diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c
index 9de6402..0b9ef07 100644
--- a/drivers/infiniband/ulp/iser/iser_initiator.c
+++ b/drivers/infiniband/ulp/iser/iser_initiator.c
@@ -39,29 +39,6 @@
#include "iscsi_iser.h"
-/* Constant PDU lengths calculations */
-#define ISER_TOTAL_HEADERS_LEN (sizeof (struct iser_hdr) + \
- sizeof (struct iscsi_hdr))
-
-/* iser_dto_add_regd_buff - increments the reference count for *
- * the registered buffer & adds it to the DTO object */
-static void iser_dto_add_regd_buff(struct iser_dto *dto,
- struct iser_regd_buf *regd_buf,
- unsigned long use_offset,
- unsigned long use_size)
-{
- int add_idx;
-
- atomic_inc(®d_buf->ref_count);
-
- add_idx = dto->regd_vector_len;
- dto->regd[add_idx] = regd_buf;
- dto->used_sz[add_idx] = use_size;
- dto->offset[add_idx] = use_offset;
-
- dto->regd_vector_len++;
-}
-
/* Register user buffer memory and initialize passive rdma
* dto descriptor. Total data size is stored in
* iser_task->data[ISER_DIR_IN].data_len
@@ -122,9 +99,9 @@
struct iscsi_iser_task *iser_task = task->dd_data;
struct iser_regd_buf *regd_buf;
int err;
- struct iser_dto *send_dto = &iser_task->desc.dto;
struct iser_hdr *hdr = &iser_task->desc.iser_header;
struct iser_data_buf *buf_out = &iser_task->data[ISER_DIR_OUT];
+ struct ib_sge *tx_dsg = &iser_task->desc.tx_sg[1];
err = iser_dma_map_task_data(iser_task,
buf_out,
@@ -163,135 +140,100 @@
if (imm_sz > 0) {
iser_dbg("Cmd itt:%d, WRITE, adding imm.data sz: %d\n",
task->itt, imm_sz);
- iser_dto_add_regd_buff(send_dto,
- regd_buf,
- 0,
- imm_sz);
+ tx_dsg->addr = regd_buf->reg.va;
+ tx_dsg->length = imm_sz;
+ tx_dsg->lkey = regd_buf->reg.lkey;
+ iser_task->desc.num_sge = 2;
}
return 0;
}
-/**
- * iser_post_receive_control - allocates, initializes and posts receive DTO.
- */
-static int iser_post_receive_control(struct iscsi_conn *conn)
-{
- struct iscsi_iser_conn *iser_conn = conn->dd_data;
- struct iser_desc *rx_desc;
- struct iser_regd_buf *regd_hdr;
- struct iser_regd_buf *regd_data;
- struct iser_dto *recv_dto = NULL;
- struct iser_device *device = iser_conn->ib_conn->device;
- int rx_data_size, err;
- int posts, outstanding_unexp_pdus;
-
- /* for the login sequence we must support rx of upto 8K; login is done
- * after conn create/bind (connect) and conn stop/bind (reconnect),
- * what's common for both schemes is that the connection is not started
- */
- if (conn->c_stage != ISCSI_CONN_STARTED)
- rx_data_size = ISCSI_DEF_MAX_RECV_SEG_LEN;
- else /* FIXME till user space sets conn->max_recv_dlength correctly */
- rx_data_size = 128;
-
- outstanding_unexp_pdus =
- atomic_xchg(&iser_conn->ib_conn->unexpected_pdu_count, 0);
-
- /*
- * in addition to the response buffer, replace those consumed by
- * unexpected pdus.
- */
- for (posts = 0; posts < 1 + outstanding_unexp_pdus; posts++) {
- rx_desc = kmem_cache_alloc(ig.desc_cache, GFP_NOIO);
- if (rx_desc == NULL) {
- iser_err("Failed to alloc desc for post recv %d\n",
- posts);
- err = -ENOMEM;
- goto post_rx_cache_alloc_failure;
- }
- rx_desc->type = ISCSI_RX;
- rx_desc->data = kmalloc(rx_data_size, GFP_NOIO);
- if (rx_desc->data == NULL) {
- iser_err("Failed to alloc data buf for post recv %d\n",
- posts);
- err = -ENOMEM;
- goto post_rx_kmalloc_failure;
- }
-
- recv_dto = &rx_desc->dto;
- recv_dto->ib_conn = iser_conn->ib_conn;
- recv_dto->regd_vector_len = 0;
-
- regd_hdr = &rx_desc->hdr_regd_buf;
- memset(regd_hdr, 0, sizeof(struct iser_regd_buf));
- regd_hdr->device = device;
- regd_hdr->virt_addr = rx_desc; /* == &rx_desc->iser_header */
- regd_hdr->data_size = ISER_TOTAL_HEADERS_LEN;
-
- iser_reg_single(device, regd_hdr, DMA_FROM_DEVICE);
-
- iser_dto_add_regd_buff(recv_dto, regd_hdr, 0, 0);
-
- regd_data = &rx_desc->data_regd_buf;
- memset(regd_data, 0, sizeof(struct iser_regd_buf));
- regd_data->device = device;
- regd_data->virt_addr = rx_desc->data;
- regd_data->data_size = rx_data_size;
-
- iser_reg_single(device, regd_data, DMA_FROM_DEVICE);
-
- iser_dto_add_regd_buff(recv_dto, regd_data, 0, 0);
-
- err = iser_post_recv(rx_desc);
- if (err) {
- iser_err("Failed iser_post_recv for post %d\n", posts);
- goto post_rx_post_recv_failure;
- }
- }
- /* all posts successful */
- return 0;
-
-post_rx_post_recv_failure:
- iser_dto_buffs_release(recv_dto);
- kfree(rx_desc->data);
-post_rx_kmalloc_failure:
- kmem_cache_free(ig.desc_cache, rx_desc);
-post_rx_cache_alloc_failure:
- if (posts > 0) {
- /*
- * response buffer posted, but did not replace all unexpected
- * pdu recv bufs. Ignore error, retry occurs next send
- */
- outstanding_unexp_pdus -= (posts - 1);
- err = 0;
- }
- atomic_add(outstanding_unexp_pdus,
- &iser_conn->ib_conn->unexpected_pdu_count);
-
- return err;
-}
-
/* creates a new tx descriptor and adds header regd buffer */
-static void iser_create_send_desc(struct iscsi_iser_conn *iser_conn,
- struct iser_desc *tx_desc)
+static void iser_create_send_desc(struct iser_conn *ib_conn,
+ struct iser_tx_desc *tx_desc)
{
- struct iser_regd_buf *regd_hdr = &tx_desc->hdr_regd_buf;
- struct iser_dto *send_dto = &tx_desc->dto;
+ struct iser_device *device = ib_conn->device;
- memset(regd_hdr, 0, sizeof(struct iser_regd_buf));
- regd_hdr->device = iser_conn->ib_conn->device;
- regd_hdr->virt_addr = tx_desc; /* == &tx_desc->iser_header */
- regd_hdr->data_size = ISER_TOTAL_HEADERS_LEN;
-
- send_dto->ib_conn = iser_conn->ib_conn;
- send_dto->notify_enable = 1;
- send_dto->regd_vector_len = 0;
+ ib_dma_sync_single_for_cpu(device->ib_device,
+ tx_desc->dma_addr, ISER_HEADERS_LEN, DMA_TO_DEVICE);
memset(&tx_desc->iser_header, 0, sizeof(struct iser_hdr));
tx_desc->iser_header.flags = ISER_VER;
- iser_dto_add_regd_buff(send_dto, regd_hdr, 0, 0);
+ tx_desc->num_sge = 1;
+
+ if (tx_desc->tx_sg[0].lkey != device->mr->lkey) {
+ tx_desc->tx_sg[0].lkey = device->mr->lkey;
+ iser_dbg("sdesc %p lkey mismatch, fixing\n", tx_desc);
+ }
+}
+
+
+int iser_alloc_rx_descriptors(struct iser_conn *ib_conn)
+{
+ int i, j;
+ u64 dma_addr;
+ struct iser_rx_desc *rx_desc;
+ struct ib_sge *rx_sg;
+ struct iser_device *device = ib_conn->device;
+
+ ib_conn->rx_descs = kmalloc(ISER_QP_MAX_RECV_DTOS *
+ sizeof(struct iser_rx_desc), GFP_KERNEL);
+ if (!ib_conn->rx_descs)
+ goto rx_desc_alloc_fail;
+
+ rx_desc = ib_conn->rx_descs;
+
+ for (i = 0; i < ISER_QP_MAX_RECV_DTOS; i++, rx_desc++) {
+ dma_addr = ib_dma_map_single(device->ib_device, (void *)rx_desc,
+ ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
+ if (ib_dma_mapping_error(device->ib_device, dma_addr))
+ goto rx_desc_dma_map_failed;
+
+ rx_desc->dma_addr = dma_addr;
+
+ rx_sg = &rx_desc->rx_sg;
+ rx_sg->addr = rx_desc->dma_addr;
+ rx_sg->length = ISER_RX_PAYLOAD_SIZE;
+ rx_sg->lkey = device->mr->lkey;
+ }
+
+ ib_conn->rx_desc_head = 0;
+ return 0;
+
+rx_desc_dma_map_failed:
+ rx_desc = ib_conn->rx_descs;
+ for (j = 0; j < i; j++, rx_desc++)
+ ib_dma_unmap_single(device->ib_device, rx_desc->dma_addr,
+ ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
+ kfree(ib_conn->rx_descs);
+ ib_conn->rx_descs = NULL;
+rx_desc_alloc_fail:
+ iser_err("failed allocating rx descriptors / data buffers\n");
+ return -ENOMEM;
+}
+
+void iser_free_rx_descriptors(struct iser_conn *ib_conn)
+{
+ int i;
+ struct iser_rx_desc *rx_desc;
+ struct iser_device *device = ib_conn->device;
+
+ if (ib_conn->login_buf) {
+ ib_dma_unmap_single(device->ib_device, ib_conn->login_dma,
+ ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE);
+ kfree(ib_conn->login_buf);
+ }
+
+ if (!ib_conn->rx_descs)
+ return;
+
+ rx_desc = ib_conn->rx_descs;
+ for (i = 0; i < ISER_QP_MAX_RECV_DTOS; i++, rx_desc++)
+ ib_dma_unmap_single(device->ib_device, rx_desc->dma_addr,
+ ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
+ kfree(ib_conn->rx_descs);
}
/**
@@ -301,46 +243,23 @@
{
struct iscsi_iser_conn *iser_conn = conn->dd_data;
- int i;
- /*
- * FIXME this value should be declared to the target during login with
- * the MaxOutstandingUnexpectedPDUs key when supported
- */
- int initial_post_recv_bufs_num = ISER_MAX_RX_MISC_PDUS;
-
- iser_dbg("Initially post: %d\n", initial_post_recv_bufs_num);
+ iser_dbg("Initially post: %d\n", ISER_MIN_POSTED_RX);
/* Check that there is no posted recv or send buffers left - */
/* they must be consumed during the login phase */
- BUG_ON(atomic_read(&iser_conn->ib_conn->post_recv_buf_count) != 0);
+ BUG_ON(iser_conn->ib_conn->post_recv_buf_count != 0);
BUG_ON(atomic_read(&iser_conn->ib_conn->post_send_buf_count) != 0);
+ if (iser_alloc_rx_descriptors(iser_conn->ib_conn))
+ return -ENOMEM;
+
/* Initial post receive buffers */
- for (i = 0; i < initial_post_recv_bufs_num; i++) {
- if (iser_post_receive_control(conn) != 0) {
- iser_err("Failed to post recv bufs at:%d conn:0x%p\n",
- i, conn);
- return -ENOMEM;
- }
- }
- iser_dbg("Posted %d post recv bufs, conn:0x%p\n", i, conn);
+ if (iser_post_recvm(iser_conn->ib_conn, ISER_MIN_POSTED_RX))
+ return -ENOMEM;
+
return 0;
}
-static int
-iser_check_xmit(struct iscsi_conn *conn, void *task)
-{
- struct iscsi_iser_conn *iser_conn = conn->dd_data;
-
- if (atomic_read(&iser_conn->ib_conn->post_send_buf_count) ==
- ISER_QP_MAX_REQ_DTOS) {
- iser_dbg("%ld can't xmit task %p\n",jiffies,task);
- return -ENOBUFS;
- }
- return 0;
-}
-
-
/**
* iser_send_command - send command PDU
*/
@@ -349,27 +268,18 @@
{
struct iscsi_iser_conn *iser_conn = conn->dd_data;
struct iscsi_iser_task *iser_task = task->dd_data;
- struct iser_dto *send_dto = NULL;
unsigned long edtl;
- int err = 0;
+ int err;
struct iser_data_buf *data_buf;
struct iscsi_cmd *hdr = (struct iscsi_cmd *)task->hdr;
struct scsi_cmnd *sc = task->sc;
-
- if (!iser_conn_state_comp(iser_conn->ib_conn, ISER_CONN_UP)) {
- iser_err("Failed to send, conn: 0x%p is not up\n", iser_conn->ib_conn);
- return -EPERM;
- }
- if (iser_check_xmit(conn, task))
- return -ENOBUFS;
+ struct iser_tx_desc *tx_desc = &iser_task->desc;
edtl = ntohl(hdr->data_length);
/* build the tx desc regd header and add it to the tx desc dto */
- iser_task->desc.type = ISCSI_TX_SCSI_COMMAND;
- send_dto = &iser_task->desc.dto;
- send_dto->task = iser_task;
- iser_create_send_desc(iser_conn, &iser_task->desc);
+ tx_desc->type = ISCSI_TX_SCSI_COMMAND;
+ iser_create_send_desc(iser_conn->ib_conn, tx_desc);
if (hdr->flags & ISCSI_FLAG_CMD_READ)
data_buf = &iser_task->data[ISER_DIR_IN];
@@ -398,23 +308,13 @@
goto send_command_error;
}
- iser_reg_single(iser_conn->ib_conn->device,
- send_dto->regd[0], DMA_TO_DEVICE);
-
- if (iser_post_receive_control(conn) != 0) {
- iser_err("post_recv failed!\n");
- err = -ENOMEM;
- goto send_command_error;
- }
-
iser_task->status = ISER_TASK_STATUS_STARTED;
- err = iser_post_send(&iser_task->desc);
+ err = iser_post_send(iser_conn->ib_conn, tx_desc);
if (!err)
return 0;
send_command_error:
- iser_dto_buffs_release(send_dto);
iser_err("conn %p failed task->itt %d err %d\n",conn, task->itt, err);
return err;
}
@@ -428,20 +328,13 @@
{
struct iscsi_iser_conn *iser_conn = conn->dd_data;
struct iscsi_iser_task *iser_task = task->dd_data;
- struct iser_desc *tx_desc = NULL;
- struct iser_dto *send_dto = NULL;
+ struct iser_tx_desc *tx_desc = NULL;
+ struct iser_regd_buf *regd_buf;
unsigned long buf_offset;
unsigned long data_seg_len;
uint32_t itt;
int err = 0;
-
- if (!iser_conn_state_comp(iser_conn->ib_conn, ISER_CONN_UP)) {
- iser_err("Failed to send, conn: 0x%p is not up\n", iser_conn->ib_conn);
- return -EPERM;
- }
-
- if (iser_check_xmit(conn, task))
- return -ENOBUFS;
+ struct ib_sge *tx_dsg;
itt = (__force uint32_t)hdr->itt;
data_seg_len = ntoh24(hdr->dlength);
@@ -450,28 +343,25 @@
iser_dbg("%s itt %d dseg_len %d offset %d\n",
__func__,(int)itt,(int)data_seg_len,(int)buf_offset);
- tx_desc = kmem_cache_alloc(ig.desc_cache, GFP_NOIO);
+ tx_desc = kmem_cache_zalloc(ig.desc_cache, GFP_ATOMIC);
if (tx_desc == NULL) {
iser_err("Failed to alloc desc for post dataout\n");
return -ENOMEM;
}
tx_desc->type = ISCSI_TX_DATAOUT;
+ tx_desc->iser_header.flags = ISER_VER;
memcpy(&tx_desc->iscsi_header, hdr, sizeof(struct iscsi_hdr));
- /* build the tx desc regd header and add it to the tx desc dto */
- send_dto = &tx_desc->dto;
- send_dto->task = iser_task;
- iser_create_send_desc(iser_conn, tx_desc);
+ /* build the tx desc */
+ iser_initialize_task_headers(task, tx_desc);
- iser_reg_single(iser_conn->ib_conn->device,
- send_dto->regd[0], DMA_TO_DEVICE);
-
- /* all data was registered for RDMA, we can use the lkey */
- iser_dto_add_regd_buff(send_dto,
- &iser_task->rdma_regd[ISER_DIR_OUT],
- buf_offset,
- data_seg_len);
+ regd_buf = &iser_task->rdma_regd[ISER_DIR_OUT];
+ tx_dsg = &tx_desc->tx_sg[1];
+ tx_dsg->addr = regd_buf->reg.va + buf_offset;
+ tx_dsg->length = data_seg_len;
+ tx_dsg->lkey = regd_buf->reg.lkey;
+ tx_desc->num_sge = 2;
if (buf_offset + data_seg_len > iser_task->data[ISER_DIR_OUT].data_len) {
iser_err("Offset:%ld & DSL:%ld in Data-Out "
@@ -485,12 +375,11 @@
itt, buf_offset, data_seg_len);
- err = iser_post_send(tx_desc);
+ err = iser_post_send(iser_conn->ib_conn, tx_desc);
if (!err)
return 0;
send_data_out_error:
- iser_dto_buffs_release(send_dto);
kmem_cache_free(ig.desc_cache, tx_desc);
iser_err("conn %p failed err %d\n",conn, err);
return err;
@@ -501,64 +390,44 @@
{
struct iscsi_iser_conn *iser_conn = conn->dd_data;
struct iscsi_iser_task *iser_task = task->dd_data;
- struct iser_desc *mdesc = &iser_task->desc;
- struct iser_dto *send_dto = NULL;
+ struct iser_tx_desc *mdesc = &iser_task->desc;
unsigned long data_seg_len;
int err = 0;
- struct iser_regd_buf *regd_buf;
struct iser_device *device;
- unsigned char opcode;
-
- if (!iser_conn_state_comp(iser_conn->ib_conn, ISER_CONN_UP)) {
- iser_err("Failed to send, conn: 0x%p is not up\n", iser_conn->ib_conn);
- return -EPERM;
- }
-
- if (iser_check_xmit(conn, task))
- return -ENOBUFS;
/* build the tx desc regd header and add it to the tx desc dto */
mdesc->type = ISCSI_TX_CONTROL;
- send_dto = &mdesc->dto;
- send_dto->task = NULL;
- iser_create_send_desc(iser_conn, mdesc);
+ iser_create_send_desc(iser_conn->ib_conn, mdesc);
device = iser_conn->ib_conn->device;
- iser_reg_single(device, send_dto->regd[0], DMA_TO_DEVICE);
-
data_seg_len = ntoh24(task->hdr->dlength);
if (data_seg_len > 0) {
- regd_buf = &mdesc->data_regd_buf;
- memset(regd_buf, 0, sizeof(struct iser_regd_buf));
- regd_buf->device = device;
- regd_buf->virt_addr = task->data;
- regd_buf->data_size = task->data_count;
- iser_reg_single(device, regd_buf,
- DMA_TO_DEVICE);
- iser_dto_add_regd_buff(send_dto, regd_buf,
- 0,
- data_seg_len);
- }
-
- opcode = task->hdr->opcode & ISCSI_OPCODE_MASK;
-
- /* post recv buffer for response if one is expected */
- if (!(opcode == ISCSI_OP_NOOP_OUT && task->hdr->itt == RESERVED_ITT)) {
- if (iser_post_receive_control(conn) != 0) {
- iser_err("post_rcv_buff failed!\n");
- err = -ENOMEM;
+ struct ib_sge *tx_dsg = &mdesc->tx_sg[1];
+ if (task != conn->login_task) {
+ iser_err("data present on non login task!!!\n");
goto send_control_error;
}
+ memcpy(iser_conn->ib_conn->login_buf, task->data,
+ task->data_count);
+ tx_dsg->addr = iser_conn->ib_conn->login_dma;
+ tx_dsg->length = data_seg_len;
+ tx_dsg->lkey = device->mr->lkey;
+ mdesc->num_sge = 2;
}
- err = iser_post_send(mdesc);
+ if (task == conn->login_task) {
+ err = iser_post_recvl(iser_conn->ib_conn);
+ if (err)
+ goto send_control_error;
+ }
+
+ err = iser_post_send(iser_conn->ib_conn, mdesc);
if (!err)
return 0;
send_control_error:
- iser_dto_buffs_release(send_dto);
iser_err("conn %p failed err %d\n",conn, err);
return err;
}
@@ -566,104 +435,71 @@
/**
* iser_rcv_dto_completion - recv DTO completion
*/
-void iser_rcv_completion(struct iser_desc *rx_desc,
- unsigned long dto_xfer_len)
+void iser_rcv_completion(struct iser_rx_desc *rx_desc,
+ unsigned long rx_xfer_len,
+ struct iser_conn *ib_conn)
{
- struct iser_dto *dto = &rx_desc->dto;
- struct iscsi_iser_conn *conn = dto->ib_conn->iser_conn;
- struct iscsi_task *task;
- struct iscsi_iser_task *iser_task;
+ struct iscsi_iser_conn *conn = ib_conn->iser_conn;
struct iscsi_hdr *hdr;
- char *rx_data = NULL;
- int rx_data_len = 0;
- unsigned char opcode;
+ u64 rx_dma;
+ int rx_buflen, outstanding, count, err;
+
+ /* differentiate between login to all other PDUs */
+ if ((char *)rx_desc == ib_conn->login_buf) {
+ rx_dma = ib_conn->login_dma;
+ rx_buflen = ISER_RX_LOGIN_SIZE;
+ } else {
+ rx_dma = rx_desc->dma_addr;
+ rx_buflen = ISER_RX_PAYLOAD_SIZE;
+ }
+
+ ib_dma_sync_single_for_cpu(ib_conn->device->ib_device, rx_dma,
+ rx_buflen, DMA_FROM_DEVICE);
hdr = &rx_desc->iscsi_header;
- iser_dbg("op 0x%x itt 0x%x\n", hdr->opcode,hdr->itt);
+ iser_dbg("op 0x%x itt 0x%x dlen %d\n", hdr->opcode,
+ hdr->itt, (int)(rx_xfer_len - ISER_HEADERS_LEN));
- if (dto_xfer_len > ISER_TOTAL_HEADERS_LEN) { /* we have data */
- rx_data_len = dto_xfer_len - ISER_TOTAL_HEADERS_LEN;
- rx_data = dto->regd[1]->virt_addr;
- rx_data += dto->offset[1];
- }
+ iscsi_iser_recv(conn->iscsi_conn, hdr,
+ rx_desc->data, rx_xfer_len - ISER_HEADERS_LEN);
- opcode = hdr->opcode & ISCSI_OPCODE_MASK;
-
- if (opcode == ISCSI_OP_SCSI_CMD_RSP) {
- spin_lock(&conn->iscsi_conn->session->lock);
- task = iscsi_itt_to_ctask(conn->iscsi_conn, hdr->itt);
- if (task)
- __iscsi_get_task(task);
- spin_unlock(&conn->iscsi_conn->session->lock);
-
- if (!task)
- iser_err("itt can't be matched to task!!! "
- "conn %p opcode %d itt %d\n",
- conn->iscsi_conn, opcode, hdr->itt);
- else {
- iser_task = task->dd_data;
- iser_dbg("itt %d task %p\n",hdr->itt, task);
- iser_task->status = ISER_TASK_STATUS_COMPLETED;
- iser_task_rdma_finalize(iser_task);
- iscsi_put_task(task);
- }
- }
- iser_dto_buffs_release(dto);
-
- iscsi_iser_recv(conn->iscsi_conn, hdr, rx_data, rx_data_len);
-
- kfree(rx_desc->data);
- kmem_cache_free(ig.desc_cache, rx_desc);
+ ib_dma_sync_single_for_device(ib_conn->device->ib_device, rx_dma,
+ rx_buflen, DMA_FROM_DEVICE);
/* decrementing conn->post_recv_buf_count only --after-- freeing the *
* task eliminates the need to worry on tasks which are completed in *
* parallel to the execution of iser_conn_term. So the code that waits *
* for the posted rx bufs refcount to become zero handles everything */
- atomic_dec(&conn->ib_conn->post_recv_buf_count);
+ conn->ib_conn->post_recv_buf_count--;
- /*
- * if an unexpected PDU was received then the recv wr consumed must
- * be replaced, this is done in the next send of a control-type PDU
- */
- if (opcode == ISCSI_OP_NOOP_IN && hdr->itt == RESERVED_ITT) {
- /* nop-in with itt = 0xffffffff */
- atomic_inc(&conn->ib_conn->unexpected_pdu_count);
+ if (rx_dma == ib_conn->login_dma)
+ return;
+
+ outstanding = ib_conn->post_recv_buf_count;
+ if (outstanding + ISER_MIN_POSTED_RX <= ISER_QP_MAX_RECV_DTOS) {
+ count = min(ISER_QP_MAX_RECV_DTOS - outstanding,
+ ISER_MIN_POSTED_RX);
+ err = iser_post_recvm(ib_conn, count);
+ if (err)
+ iser_err("posting %d rx bufs err %d\n", count, err);
}
- else if (opcode == ISCSI_OP_ASYNC_EVENT) {
- /* asyncronous message */
- atomic_inc(&conn->ib_conn->unexpected_pdu_count);
- }
- /* a reject PDU consumes the recv buf posted for the response */
}
-void iser_snd_completion(struct iser_desc *tx_desc)
+void iser_snd_completion(struct iser_tx_desc *tx_desc,
+ struct iser_conn *ib_conn)
{
- struct iser_dto *dto = &tx_desc->dto;
- struct iser_conn *ib_conn = dto->ib_conn;
- struct iscsi_iser_conn *iser_conn = ib_conn->iser_conn;
- struct iscsi_conn *conn = iser_conn->iscsi_conn;
struct iscsi_task *task;
- int resume_tx = 0;
+ struct iser_device *device = ib_conn->device;
- iser_dbg("Initiator, Data sent dto=0x%p\n", dto);
-
- iser_dto_buffs_release(dto);
-
- if (tx_desc->type == ISCSI_TX_DATAOUT)
+ if (tx_desc->type == ISCSI_TX_DATAOUT) {
+ ib_dma_unmap_single(device->ib_device, tx_desc->dma_addr,
+ ISER_HEADERS_LEN, DMA_TO_DEVICE);
kmem_cache_free(ig.desc_cache, tx_desc);
-
- if (atomic_read(&iser_conn->ib_conn->post_send_buf_count) ==
- ISER_QP_MAX_REQ_DTOS)
- resume_tx = 1;
+ }
atomic_dec(&ib_conn->post_send_buf_count);
- if (resume_tx) {
- iser_dbg("%ld resuming tx\n",jiffies);
- iscsi_conn_queue_work(conn);
- }
-
if (tx_desc->type == ISCSI_TX_CONTROL) {
/* this arithmetic is legal by libiscsi dd_data allocation */
task = (void *) ((long)(void *)tx_desc -
@@ -692,7 +528,6 @@
void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task)
{
- int deferred;
int is_rdma_aligned = 1;
struct iser_regd_buf *regd;
@@ -710,32 +545,17 @@
if (iser_task->dir[ISER_DIR_IN]) {
regd = &iser_task->rdma_regd[ISER_DIR_IN];
- deferred = iser_regd_buff_release(regd);
- if (deferred) {
- iser_err("%d references remain for BUF-IN rdma reg\n",
- atomic_read(®d->ref_count));
- }
+ if (regd->reg.is_fmr)
+ iser_unreg_mem(®d->reg);
}
if (iser_task->dir[ISER_DIR_OUT]) {
regd = &iser_task->rdma_regd[ISER_DIR_OUT];
- deferred = iser_regd_buff_release(regd);
- if (deferred) {
- iser_err("%d references remain for BUF-OUT rdma reg\n",
- atomic_read(®d->ref_count));
- }
+ if (regd->reg.is_fmr)
+ iser_unreg_mem(®d->reg);
}
/* if the data was unaligned, it was already unmapped and then copied */
if (is_rdma_aligned)
iser_dma_unmap_task_data(iser_task);
}
-
-void iser_dto_buffs_release(struct iser_dto *dto)
-{
- int i;
-
- for (i = 0; i < dto->regd_vector_len; i++)
- iser_regd_buff_release(dto->regd[i]);
-}
-
diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c
index 274c883..fb88d68 100644
--- a/drivers/infiniband/ulp/iser/iser_memory.c
+++ b/drivers/infiniband/ulp/iser/iser_memory.c
@@ -41,62 +41,6 @@
#define ISER_KMALLOC_THRESHOLD 0x20000 /* 128K - kmalloc limit */
/**
- * Decrements the reference count for the
- * registered buffer & releases it
- *
- * returns 0 if released, 1 if deferred
- */
-int iser_regd_buff_release(struct iser_regd_buf *regd_buf)
-{
- struct ib_device *dev;
-
- if ((atomic_read(®d_buf->ref_count) == 0) ||
- atomic_dec_and_test(®d_buf->ref_count)) {
- /* if we used the dma mr, unreg is just NOP */
- if (regd_buf->reg.is_fmr)
- iser_unreg_mem(®d_buf->reg);
-
- if (regd_buf->dma_addr) {
- dev = regd_buf->device->ib_device;
- ib_dma_unmap_single(dev,
- regd_buf->dma_addr,
- regd_buf->data_size,
- regd_buf->direction);
- }
- /* else this regd buf is associated with task which we */
- /* dma_unmap_single/sg later */
- return 0;
- } else {
- iser_dbg("Release deferred, regd.buff: 0x%p\n", regd_buf);
- return 1;
- }
-}
-
-/**
- * iser_reg_single - fills registered buffer descriptor with
- * registration information
- */
-void iser_reg_single(struct iser_device *device,
- struct iser_regd_buf *regd_buf,
- enum dma_data_direction direction)
-{
- u64 dma_addr;
-
- dma_addr = ib_dma_map_single(device->ib_device,
- regd_buf->virt_addr,
- regd_buf->data_size, direction);
- BUG_ON(ib_dma_mapping_error(device->ib_device, dma_addr));
-
- regd_buf->reg.lkey = device->mr->lkey;
- regd_buf->reg.len = regd_buf->data_size;
- regd_buf->reg.va = dma_addr;
- regd_buf->reg.is_fmr = 0;
-
- regd_buf->dma_addr = dma_addr;
- regd_buf->direction = direction;
-}
-
-/**
* iser_start_rdma_unaligned_sg
*/
static int iser_start_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
@@ -109,10 +53,10 @@
unsigned long cmd_data_len = data->data_len;
if (cmd_data_len > ISER_KMALLOC_THRESHOLD)
- mem = (void *)__get_free_pages(GFP_NOIO,
+ mem = (void *)__get_free_pages(GFP_ATOMIC,
ilog2(roundup_pow_of_two(cmd_data_len)) - PAGE_SHIFT);
else
- mem = kmalloc(cmd_data_len, GFP_NOIO);
+ mem = kmalloc(cmd_data_len, GFP_ATOMIC);
if (mem == NULL) {
iser_err("Failed to allocate mem size %d %d for copying sglist\n",
@@ -474,9 +418,5 @@
return err;
}
}
-
- /* take a reference on this regd buf such that it will not be released *
- * (eg in send dto completion) before we get the scsi response */
- atomic_inc(®d_buf->ref_count);
return 0;
}
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index 8579f32..308d17b 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -37,9 +37,8 @@
#include "iscsi_iser.h"
#define ISCSI_ISER_MAX_CONN 8
-#define ISER_MAX_CQ_LEN ((ISER_QP_MAX_RECV_DTOS + \
- ISER_QP_MAX_REQ_DTOS) * \
- ISCSI_ISER_MAX_CONN)
+#define ISER_MAX_RX_CQ_LEN (ISER_QP_MAX_RECV_DTOS * ISCSI_ISER_MAX_CONN)
+#define ISER_MAX_TX_CQ_LEN (ISER_QP_MAX_REQ_DTOS * ISCSI_ISER_MAX_CONN)
static void iser_cq_tasklet_fn(unsigned long data);
static void iser_cq_callback(struct ib_cq *cq, void *cq_context);
@@ -67,15 +66,23 @@
if (IS_ERR(device->pd))
goto pd_err;
- device->cq = ib_create_cq(device->ib_device,
+ device->rx_cq = ib_create_cq(device->ib_device,
iser_cq_callback,
iser_cq_event_callback,
(void *)device,
- ISER_MAX_CQ_LEN, 0);
- if (IS_ERR(device->cq))
- goto cq_err;
+ ISER_MAX_RX_CQ_LEN, 0);
+ if (IS_ERR(device->rx_cq))
+ goto rx_cq_err;
- if (ib_req_notify_cq(device->cq, IB_CQ_NEXT_COMP))
+ device->tx_cq = ib_create_cq(device->ib_device,
+ NULL, iser_cq_event_callback,
+ (void *)device,
+ ISER_MAX_TX_CQ_LEN, 0);
+
+ if (IS_ERR(device->tx_cq))
+ goto tx_cq_err;
+
+ if (ib_req_notify_cq(device->rx_cq, IB_CQ_NEXT_COMP))
goto cq_arm_err;
tasklet_init(&device->cq_tasklet,
@@ -93,8 +100,10 @@
dma_mr_err:
tasklet_kill(&device->cq_tasklet);
cq_arm_err:
- ib_destroy_cq(device->cq);
-cq_err:
+ ib_destroy_cq(device->tx_cq);
+tx_cq_err:
+ ib_destroy_cq(device->rx_cq);
+rx_cq_err:
ib_dealloc_pd(device->pd);
pd_err:
iser_err("failed to allocate an IB resource\n");
@@ -112,11 +121,13 @@
tasklet_kill(&device->cq_tasklet);
(void)ib_dereg_mr(device->mr);
- (void)ib_destroy_cq(device->cq);
+ (void)ib_destroy_cq(device->tx_cq);
+ (void)ib_destroy_cq(device->rx_cq);
(void)ib_dealloc_pd(device->pd);
device->mr = NULL;
- device->cq = NULL;
+ device->tx_cq = NULL;
+ device->rx_cq = NULL;
device->pd = NULL;
}
@@ -129,13 +140,23 @@
{
struct iser_device *device;
struct ib_qp_init_attr init_attr;
- int ret;
+ int ret = -ENOMEM;
struct ib_fmr_pool_param params;
BUG_ON(ib_conn->device == NULL);
device = ib_conn->device;
+ ib_conn->login_buf = kmalloc(ISER_RX_LOGIN_SIZE, GFP_KERNEL);
+ if (!ib_conn->login_buf) {
+ goto alloc_err;
+ ret = -ENOMEM;
+ }
+
+ ib_conn->login_dma = ib_dma_map_single(ib_conn->device->ib_device,
+ (void *)ib_conn->login_buf, ISER_RX_LOGIN_SIZE,
+ DMA_FROM_DEVICE);
+
ib_conn->page_vec = kmalloc(sizeof(struct iser_page_vec) +
(sizeof(u64) * (ISCSI_ISER_SG_TABLESIZE +1)),
GFP_KERNEL);
@@ -169,12 +190,12 @@
init_attr.event_handler = iser_qp_event_callback;
init_attr.qp_context = (void *)ib_conn;
- init_attr.send_cq = device->cq;
- init_attr.recv_cq = device->cq;
+ init_attr.send_cq = device->tx_cq;
+ init_attr.recv_cq = device->rx_cq;
init_attr.cap.max_send_wr = ISER_QP_MAX_REQ_DTOS;
init_attr.cap.max_recv_wr = ISER_QP_MAX_RECV_DTOS;
- init_attr.cap.max_send_sge = MAX_REGD_BUF_VECTOR_LEN;
- init_attr.cap.max_recv_sge = 2;
+ init_attr.cap.max_send_sge = 2;
+ init_attr.cap.max_recv_sge = 1;
init_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
init_attr.qp_type = IB_QPT_RC;
@@ -192,6 +213,7 @@
(void)ib_destroy_fmr_pool(ib_conn->fmr_pool);
fmr_pool_err:
kfree(ib_conn->page_vec);
+ kfree(ib_conn->login_buf);
alloc_err:
iser_err("unable to alloc mem or create resource, err %d\n", ret);
return ret;
@@ -278,17 +300,6 @@
mutex_unlock(&ig.device_list_mutex);
}
-int iser_conn_state_comp(struct iser_conn *ib_conn,
- enum iser_ib_conn_state comp)
-{
- int ret;
-
- spin_lock_bh(&ib_conn->lock);
- ret = (ib_conn->state == comp);
- spin_unlock_bh(&ib_conn->lock);
- return ret;
-}
-
static int iser_conn_state_comp_exch(struct iser_conn *ib_conn,
enum iser_ib_conn_state comp,
enum iser_ib_conn_state exch)
@@ -314,7 +325,7 @@
mutex_lock(&ig.connlist_mutex);
list_del(&ib_conn->conn_list);
mutex_unlock(&ig.connlist_mutex);
-
+ iser_free_rx_descriptors(ib_conn);
iser_free_ib_conn_res(ib_conn);
ib_conn->device = NULL;
/* on EVENT_ADDR_ERROR there's no device yet for this conn */
@@ -442,7 +453,7 @@
ISCSI_ERR_CONN_FAILED);
/* Complete the termination process if no posts are pending */
- if ((atomic_read(&ib_conn->post_recv_buf_count) == 0) &&
+ if (ib_conn->post_recv_buf_count == 0 &&
(atomic_read(&ib_conn->post_send_buf_count) == 0)) {
ib_conn->state = ISER_CONN_DOWN;
wake_up_interruptible(&ib_conn->wait);
@@ -489,9 +500,8 @@
{
ib_conn->state = ISER_CONN_INIT;
init_waitqueue_head(&ib_conn->wait);
- atomic_set(&ib_conn->post_recv_buf_count, 0);
+ ib_conn->post_recv_buf_count = 0;
atomic_set(&ib_conn->post_send_buf_count, 0);
- atomic_set(&ib_conn->unexpected_pdu_count, 0);
atomic_set(&ib_conn->refcount, 1);
INIT_LIST_HEAD(&ib_conn->conn_list);
spin_lock_init(&ib_conn->lock);
@@ -626,136 +636,97 @@
reg->mem_h = NULL;
}
-/**
- * iser_dto_to_iov - builds IOV from a dto descriptor
- */
-static void iser_dto_to_iov(struct iser_dto *dto, struct ib_sge *iov, int iov_len)
+int iser_post_recvl(struct iser_conn *ib_conn)
{
- int i;
- struct ib_sge *sge;
- struct iser_regd_buf *regd_buf;
+ struct ib_recv_wr rx_wr, *rx_wr_failed;
+ struct ib_sge sge;
+ int ib_ret;
- if (dto->regd_vector_len > iov_len) {
- iser_err("iov size %d too small for posting dto of len %d\n",
- iov_len, dto->regd_vector_len);
- BUG();
- }
+ sge.addr = ib_conn->login_dma;
+ sge.length = ISER_RX_LOGIN_SIZE;
+ sge.lkey = ib_conn->device->mr->lkey;
- for (i = 0; i < dto->regd_vector_len; i++) {
- sge = &iov[i];
- regd_buf = dto->regd[i];
+ rx_wr.wr_id = (unsigned long)ib_conn->login_buf;
+ rx_wr.sg_list = &sge;
+ rx_wr.num_sge = 1;
+ rx_wr.next = NULL;
- sge->addr = regd_buf->reg.va;
- sge->length = regd_buf->reg.len;
- sge->lkey = regd_buf->reg.lkey;
-
- if (dto->used_sz[i] > 0) /* Adjust size */
- sge->length = dto->used_sz[i];
-
- /* offset and length should not exceed the regd buf length */
- if (sge->length + dto->offset[i] > regd_buf->reg.len) {
- iser_err("Used len:%ld + offset:%d, exceed reg.buf.len:"
- "%ld in dto:0x%p [%d], va:0x%08lX\n",
- (unsigned long)sge->length, dto->offset[i],
- (unsigned long)regd_buf->reg.len, dto, i,
- (unsigned long)sge->addr);
- BUG();
- }
-
- sge->addr += dto->offset[i]; /* Adjust offset */
- }
-}
-
-/**
- * iser_post_recv - Posts a receive buffer.
- *
- * returns 0 on success, -1 on failure
- */
-int iser_post_recv(struct iser_desc *rx_desc)
-{
- int ib_ret, ret_val = 0;
- struct ib_recv_wr recv_wr, *recv_wr_failed;
- struct ib_sge iov[2];
- struct iser_conn *ib_conn;
- struct iser_dto *recv_dto = &rx_desc->dto;
-
- /* Retrieve conn */
- ib_conn = recv_dto->ib_conn;
-
- iser_dto_to_iov(recv_dto, iov, 2);
-
- recv_wr.next = NULL;
- recv_wr.sg_list = iov;
- recv_wr.num_sge = recv_dto->regd_vector_len;
- recv_wr.wr_id = (unsigned long)rx_desc;
-
- atomic_inc(&ib_conn->post_recv_buf_count);
- ib_ret = ib_post_recv(ib_conn->qp, &recv_wr, &recv_wr_failed);
+ ib_conn->post_recv_buf_count++;
+ ib_ret = ib_post_recv(ib_conn->qp, &rx_wr, &rx_wr_failed);
if (ib_ret) {
iser_err("ib_post_recv failed ret=%d\n", ib_ret);
- atomic_dec(&ib_conn->post_recv_buf_count);
- ret_val = -1;
+ ib_conn->post_recv_buf_count--;
+ }
+ return ib_ret;
+}
+
+int iser_post_recvm(struct iser_conn *ib_conn, int count)
+{
+ struct ib_recv_wr *rx_wr, *rx_wr_failed;
+ int i, ib_ret;
+ unsigned int my_rx_head = ib_conn->rx_desc_head;
+ struct iser_rx_desc *rx_desc;
+
+ for (rx_wr = ib_conn->rx_wr, i = 0; i < count; i++, rx_wr++) {
+ rx_desc = &ib_conn->rx_descs[my_rx_head];
+ rx_wr->wr_id = (unsigned long)rx_desc;
+ rx_wr->sg_list = &rx_desc->rx_sg;
+ rx_wr->num_sge = 1;
+ rx_wr->next = rx_wr + 1;
+ my_rx_head = (my_rx_head + 1) & (ISER_QP_MAX_RECV_DTOS - 1);
}
- return ret_val;
+ rx_wr--;
+ rx_wr->next = NULL; /* mark end of work requests list */
+
+ ib_conn->post_recv_buf_count += count;
+ ib_ret = ib_post_recv(ib_conn->qp, ib_conn->rx_wr, &rx_wr_failed);
+ if (ib_ret) {
+ iser_err("ib_post_recv failed ret=%d\n", ib_ret);
+ ib_conn->post_recv_buf_count -= count;
+ } else
+ ib_conn->rx_desc_head = my_rx_head;
+ return ib_ret;
}
+
/**
* iser_start_send - Initiate a Send DTO operation
*
* returns 0 on success, -1 on failure
*/
-int iser_post_send(struct iser_desc *tx_desc)
+int iser_post_send(struct iser_conn *ib_conn, struct iser_tx_desc *tx_desc)
{
- int ib_ret, ret_val = 0;
+ int ib_ret;
struct ib_send_wr send_wr, *send_wr_failed;
- struct ib_sge iov[MAX_REGD_BUF_VECTOR_LEN];
- struct iser_conn *ib_conn;
- struct iser_dto *dto = &tx_desc->dto;
- ib_conn = dto->ib_conn;
-
- iser_dto_to_iov(dto, iov, MAX_REGD_BUF_VECTOR_LEN);
+ ib_dma_sync_single_for_device(ib_conn->device->ib_device,
+ tx_desc->dma_addr, ISER_HEADERS_LEN, DMA_TO_DEVICE);
send_wr.next = NULL;
send_wr.wr_id = (unsigned long)tx_desc;
- send_wr.sg_list = iov;
- send_wr.num_sge = dto->regd_vector_len;
+ send_wr.sg_list = tx_desc->tx_sg;
+ send_wr.num_sge = tx_desc->num_sge;
send_wr.opcode = IB_WR_SEND;
- send_wr.send_flags = dto->notify_enable ? IB_SEND_SIGNALED : 0;
+ send_wr.send_flags = IB_SEND_SIGNALED;
atomic_inc(&ib_conn->post_send_buf_count);
ib_ret = ib_post_send(ib_conn->qp, &send_wr, &send_wr_failed);
if (ib_ret) {
- iser_err("Failed to start SEND DTO, dto: 0x%p, IOV len: %d\n",
- dto, dto->regd_vector_len);
iser_err("ib_post_send failed, ret:%d\n", ib_ret);
atomic_dec(&ib_conn->post_send_buf_count);
- ret_val = -1;
}
-
- return ret_val;
+ return ib_ret;
}
-static void iser_handle_comp_error(struct iser_desc *desc)
+static void iser_handle_comp_error(struct iser_tx_desc *desc,
+ struct iser_conn *ib_conn)
{
- struct iser_dto *dto = &desc->dto;
- struct iser_conn *ib_conn = dto->ib_conn;
-
- iser_dto_buffs_release(dto);
-
- if (desc->type == ISCSI_RX) {
- kfree(desc->data);
+ if (desc && desc->type == ISCSI_TX_DATAOUT)
kmem_cache_free(ig.desc_cache, desc);
- atomic_dec(&ib_conn->post_recv_buf_count);
- } else { /* type is TX control/command/dataout */
- if (desc->type == ISCSI_TX_DATAOUT)
- kmem_cache_free(ig.desc_cache, desc);
- atomic_dec(&ib_conn->post_send_buf_count);
- }
- if (atomic_read(&ib_conn->post_recv_buf_count) == 0 &&
+ if (ib_conn->post_recv_buf_count == 0 &&
atomic_read(&ib_conn->post_send_buf_count) == 0) {
/* getting here when the state is UP means that the conn is *
* being terminated asynchronously from the iSCSI layer's *
@@ -774,32 +745,74 @@
}
}
+static int iser_drain_tx_cq(struct iser_device *device)
+{
+ struct ib_cq *cq = device->tx_cq;
+ struct ib_wc wc;
+ struct iser_tx_desc *tx_desc;
+ struct iser_conn *ib_conn;
+ int completed_tx = 0;
+
+ while (ib_poll_cq(cq, 1, &wc) == 1) {
+ tx_desc = (struct iser_tx_desc *) (unsigned long) wc.wr_id;
+ ib_conn = wc.qp->qp_context;
+ if (wc.status == IB_WC_SUCCESS) {
+ if (wc.opcode == IB_WC_SEND)
+ iser_snd_completion(tx_desc, ib_conn);
+ else
+ iser_err("expected opcode %d got %d\n",
+ IB_WC_SEND, wc.opcode);
+ } else {
+ iser_err("tx id %llx status %d vend_err %x\n",
+ wc.wr_id, wc.status, wc.vendor_err);
+ atomic_dec(&ib_conn->post_send_buf_count);
+ iser_handle_comp_error(tx_desc, ib_conn);
+ }
+ completed_tx++;
+ }
+ return completed_tx;
+}
+
+
static void iser_cq_tasklet_fn(unsigned long data)
{
struct iser_device *device = (struct iser_device *)data;
- struct ib_cq *cq = device->cq;
+ struct ib_cq *cq = device->rx_cq;
struct ib_wc wc;
- struct iser_desc *desc;
+ struct iser_rx_desc *desc;
unsigned long xfer_len;
+ struct iser_conn *ib_conn;
+ int completed_tx, completed_rx;
+ completed_tx = completed_rx = 0;
while (ib_poll_cq(cq, 1, &wc) == 1) {
- desc = (struct iser_desc *) (unsigned long) wc.wr_id;
+ desc = (struct iser_rx_desc *) (unsigned long) wc.wr_id;
BUG_ON(desc == NULL);
-
+ ib_conn = wc.qp->qp_context;
if (wc.status == IB_WC_SUCCESS) {
- if (desc->type == ISCSI_RX) {
+ if (wc.opcode == IB_WC_RECV) {
xfer_len = (unsigned long)wc.byte_len;
- iser_rcv_completion(desc, xfer_len);
- } else /* type == ISCSI_TX_CONTROL/SCSI_CMD/DOUT */
- iser_snd_completion(desc);
+ iser_rcv_completion(desc, xfer_len, ib_conn);
+ } else
+ iser_err("expected opcode %d got %d\n",
+ IB_WC_RECV, wc.opcode);
} else {
- iser_err("comp w. error op %d status %d\n",desc->type,wc.status);
- iser_handle_comp_error(desc);
+ if (wc.status != IB_WC_WR_FLUSH_ERR)
+ iser_err("rx id %llx status %d vend_err %x\n",
+ wc.wr_id, wc.status, wc.vendor_err);
+ ib_conn->post_recv_buf_count--;
+ iser_handle_comp_error(NULL, ib_conn);
}
+ completed_rx++;
+ if (!(completed_rx & 63))
+ completed_tx += iser_drain_tx_cq(device);
}
/* #warning "it is assumed here that arming CQ only once its empty" *
* " would not cause interrupts to be missed" */
ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
+
+ completed_tx += iser_drain_tx_cq(device);
+ iser_dbg("got %d rx %d tx completions\n", completed_rx, completed_tx);
}
static void iser_cq_callback(struct ib_cq *cq, void *cq_context)
diff --git a/drivers/net/cxgb3/adapter.h b/drivers/net/cxgb3/adapter.h
index 3e8618b..4cd7f420 100644
--- a/drivers/net/cxgb3/adapter.h
+++ b/drivers/net/cxgb3/adapter.h
@@ -264,6 +264,10 @@
struct work_struct fatal_error_handler_task;
struct work_struct link_fault_handler_task;
+ struct work_struct db_full_task;
+ struct work_struct db_empty_task;
+ struct work_struct db_drop_task;
+
struct dentry *debugfs_root;
struct mutex mdio_lock;
@@ -335,6 +339,7 @@
int t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx,
unsigned char *data);
irqreturn_t t3_sge_intr_msix(int irq, void *cookie);
+extern struct workqueue_struct *cxgb3_wq;
int t3_get_edc_fw(struct cphy *phy, int edc_idx, int size);
diff --git a/drivers/net/cxgb3/cxgb3_main.c b/drivers/net/cxgb3/cxgb3_main.c
index 89bec9c..37945fc 100644
--- a/drivers/net/cxgb3/cxgb3_main.c
+++ b/drivers/net/cxgb3/cxgb3_main.c
@@ -45,6 +45,7 @@
#include <linux/firmware.h>
#include <linux/log2.h>
#include <linux/stringify.h>
+#include <linux/sched.h>
#include <asm/uaccess.h>
#include "common.h"
@@ -140,7 +141,7 @@
* will block keventd as it needs the rtnl lock, and we'll deadlock waiting
* for our work to complete. Get our own work queue to solve this.
*/
-static struct workqueue_struct *cxgb3_wq;
+struct workqueue_struct *cxgb3_wq;
/**
* link_report - show link status and link speed/duplex
@@ -590,6 +591,19 @@
V_RRCPLCPUSIZE(6) | F_HASHTOEPLITZ, cpus, rspq_map);
}
+static void ring_dbs(struct adapter *adap)
+{
+ int i, j;
+
+ for (i = 0; i < SGE_QSETS; i++) {
+ struct sge_qset *qs = &adap->sge.qs[i];
+
+ if (qs->adap)
+ for (j = 0; j < SGE_TXQ_PER_SET; j++)
+ t3_write_reg(adap, A_SG_KDOORBELL, F_SELEGRCNTX | V_EGRCNTX(qs->txq[j].cntxt_id));
+ }
+}
+
static void init_napi(struct adapter *adap)
{
int i;
@@ -2754,6 +2768,42 @@
spin_unlock_irq(&adapter->work_lock);
}
+static void db_full_task(struct work_struct *work)
+{
+ struct adapter *adapter = container_of(work, struct adapter,
+ db_full_task);
+
+ cxgb3_event_notify(&adapter->tdev, OFFLOAD_DB_FULL, 0);
+}
+
+static void db_empty_task(struct work_struct *work)
+{
+ struct adapter *adapter = container_of(work, struct adapter,
+ db_empty_task);
+
+ cxgb3_event_notify(&adapter->tdev, OFFLOAD_DB_EMPTY, 0);
+}
+
+static void db_drop_task(struct work_struct *work)
+{
+ struct adapter *adapter = container_of(work, struct adapter,
+ db_drop_task);
+ unsigned long delay = 1000;
+ unsigned short r;
+
+ cxgb3_event_notify(&adapter->tdev, OFFLOAD_DB_DROP, 0);
+
+ /*
+ * Sleep a while before ringing the driver qset dbs.
+ * The delay is between 1000-2023 usecs.
+ */
+ get_random_bytes(&r, 2);
+ delay += r & 1023;
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ schedule_timeout(usecs_to_jiffies(delay));
+ ring_dbs(adapter);
+}
+
/*
* Processes external (PHY) interrupts in process context.
*/
@@ -3222,6 +3272,11 @@
INIT_LIST_HEAD(&adapter->adapter_list);
INIT_WORK(&adapter->ext_intr_handler_task, ext_intr_task);
INIT_WORK(&adapter->fatal_error_handler_task, fatal_error_task);
+
+ INIT_WORK(&adapter->db_full_task, db_full_task);
+ INIT_WORK(&adapter->db_empty_task, db_empty_task);
+ INIT_WORK(&adapter->db_drop_task, db_drop_task);
+
INIT_DELAYED_WORK(&adapter->adap_check_task, t3_adap_check_task);
for (i = 0; i < ai->nports0 + ai->nports1; ++i) {
diff --git a/drivers/net/cxgb3/cxgb3_offload.h b/drivers/net/cxgb3/cxgb3_offload.h
index 670aa62..929c298 100644
--- a/drivers/net/cxgb3/cxgb3_offload.h
+++ b/drivers/net/cxgb3/cxgb3_offload.h
@@ -73,7 +73,10 @@
OFFLOAD_STATUS_UP,
OFFLOAD_STATUS_DOWN,
OFFLOAD_PORT_DOWN,
- OFFLOAD_PORT_UP
+ OFFLOAD_PORT_UP,
+ OFFLOAD_DB_FULL,
+ OFFLOAD_DB_EMPTY,
+ OFFLOAD_DB_DROP
};
struct cxgb3_client {
diff --git a/drivers/net/cxgb3/regs.h b/drivers/net/cxgb3/regs.h
index 1b5327b..cb42353 100644
--- a/drivers/net/cxgb3/regs.h
+++ b/drivers/net/cxgb3/regs.h
@@ -254,6 +254,22 @@
#define V_LOPIODRBDROPERR(x) ((x) << S_LOPIODRBDROPERR)
#define F_LOPIODRBDROPERR V_LOPIODRBDROPERR(1U)
+#define S_HIPRIORITYDBFULL 7
+#define V_HIPRIORITYDBFULL(x) ((x) << S_HIPRIORITYDBFULL)
+#define F_HIPRIORITYDBFULL V_HIPRIORITYDBFULL(1U)
+
+#define S_HIPRIORITYDBEMPTY 6
+#define V_HIPRIORITYDBEMPTY(x) ((x) << S_HIPRIORITYDBEMPTY)
+#define F_HIPRIORITYDBEMPTY V_HIPRIORITYDBEMPTY(1U)
+
+#define S_LOPRIORITYDBFULL 5
+#define V_LOPRIORITYDBFULL(x) ((x) << S_LOPRIORITYDBFULL)
+#define F_LOPRIORITYDBFULL V_LOPRIORITYDBFULL(1U)
+
+#define S_LOPRIORITYDBEMPTY 4
+#define V_LOPRIORITYDBEMPTY(x) ((x) << S_LOPRIORITYDBEMPTY)
+#define F_LOPRIORITYDBEMPTY V_LOPRIORITYDBEMPTY(1U)
+
#define S_RSPQDISABLED 3
#define V_RSPQDISABLED(x) ((x) << S_RSPQDISABLED)
#define F_RSPQDISABLED V_RSPQDISABLED(1U)
diff --git a/drivers/net/cxgb3/sge.c b/drivers/net/cxgb3/sge.c
index 318a018..9b43446 100644
--- a/drivers/net/cxgb3/sge.c
+++ b/drivers/net/cxgb3/sge.c
@@ -42,6 +42,7 @@
#include "sge_defs.h"
#include "t3_cpl.h"
#include "firmware_exports.h"
+#include "cxgb3_offload.h"
#define USE_GTS 0
@@ -2833,8 +2834,13 @@
}
if (status & (F_HIPIODRBDROPERR | F_LOPIODRBDROPERR))
- CH_ALERT(adapter, "SGE dropped %s priority doorbell\n",
- status & F_HIPIODRBDROPERR ? "high" : "lo");
+ queue_work(cxgb3_wq, &adapter->db_drop_task);
+
+ if (status & (F_HIPRIORITYDBFULL | F_LOPRIORITYDBFULL))
+ queue_work(cxgb3_wq, &adapter->db_full_task);
+
+ if (status & (F_HIPRIORITYDBEMPTY | F_LOPRIORITYDBEMPTY))
+ queue_work(cxgb3_wq, &adapter->db_empty_task);
t3_write_reg(adapter, A_SG_INT_CAUSE, status);
if (status & SGE_FATALERR)
diff --git a/drivers/net/cxgb3/t3_hw.c b/drivers/net/cxgb3/t3_hw.c
index 032cfe0..c38fc71 100644
--- a/drivers/net/cxgb3/t3_hw.c
+++ b/drivers/net/cxgb3/t3_hw.c
@@ -1432,7 +1432,10 @@
F_IRPARITYERROR | V_ITPARITYERROR(M_ITPARITYERROR) | \
V_FLPARITYERROR(M_FLPARITYERROR) | F_LODRBPARITYERROR | \
F_HIDRBPARITYERROR | F_LORCQPARITYERROR | \
- F_HIRCQPARITYERROR)
+ F_HIRCQPARITYERROR | F_LOPRIORITYDBFULL | \
+ F_HIPRIORITYDBFULL | F_LOPRIORITYDBEMPTY | \
+ F_HIPRIORITYDBEMPTY | F_HIPIODRBDROPERR | \
+ F_LOPIODRBDROPERR)
#define MC5_INTR_MASK (F_PARITYERR | F_ACTRGNFULL | F_UNKNOWNCMD | \
F_REQQPARERR | F_DISPQPARERR | F_DELACTEMPTY | \
F_NFASRCHFAIL)
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 09509ed..a585e0f 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -984,9 +984,9 @@
struct list_head event_handler_list;
spinlock_t event_handler_lock;
+ spinlock_t client_data_lock;
struct list_head core_list;
struct list_head client_data_list;
- spinlock_t client_data_lock;
struct ib_cache cache;
int *pkey_tbl_len;
@@ -1144,8 +1144,8 @@
IB_DEV_UNREGISTERED
} reg_state;
- u64 uverbs_cmd_mask;
int uverbs_abi_ver;
+ u64 uverbs_cmd_mask;
char node_desc[64];
__be64 node_guid;
diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h
index c6b2962..4fae903 100644
--- a/include/rdma/rdma_cm.h
+++ b/include/rdma/rdma_cm.h
@@ -67,7 +67,6 @@
RDMA_PS_IPOIB = 0x0002,
RDMA_PS_TCP = 0x0106,
RDMA_PS_UDP = 0x0111,
- RDMA_PS_SCTP = 0x0183
};
struct rdma_addr {