Merge branch 'misc' into k.o/for-next
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 476de9a..2d1fb82 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -3999,7 +3999,8 @@ static void iboe_mcast_work_handler(struct work_struct *work)
kfree(mw);
}
-static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid)
+static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid,
+ enum ib_gid_type gid_type)
{
struct sockaddr_in *sin = (struct sockaddr_in *)addr;
struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)addr;
@@ -4009,8 +4010,8 @@ static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid)
} else if (addr->sa_family == AF_INET6) {
memcpy(mgid, &sin6->sin6_addr, sizeof *mgid);
} else {
- mgid->raw[0] = 0xff;
- mgid->raw[1] = 0x0e;
+ mgid->raw[0] = (gid_type == IB_GID_TYPE_IB) ? 0xff : 0;
+ mgid->raw[1] = (gid_type == IB_GID_TYPE_IB) ? 0x0e : 0;
mgid->raw[2] = 0;
mgid->raw[3] = 0;
mgid->raw[4] = 0;
@@ -4051,7 +4052,9 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
goto out1;
}
- cma_iboe_set_mgid(addr, &mc->multicast.ib->rec.mgid);
+ gid_type = id_priv->cma_dev->default_gid_type[id_priv->id.port_num -
+ rdma_start_port(id_priv->cma_dev->device)];
+ cma_iboe_set_mgid(addr, &mc->multicast.ib->rec.mgid, gid_type);
mc->multicast.ib->rec.pkey = cpu_to_be16(0xffff);
if (id_priv->id.ps == RDMA_PS_UDP)
@@ -4067,8 +4070,6 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
mc->multicast.ib->rec.hop_limit = 1;
mc->multicast.ib->rec.mtu = iboe_get_mtu(ndev->mtu);
- gid_type = id_priv->cma_dev->default_gid_type[id_priv->id.port_num -
- rdma_start_port(id_priv->cma_dev->device)];
if (addr->sa_family == AF_INET) {
if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) {
mc->multicast.ib->rec.hop_limit = IPV6_DEFAULT_HOPLIMIT;
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 2c98533..60535c7 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -1383,8 +1383,9 @@ static int create_qp(struct ib_uverbs_file *file,
attr.rwq_ind_tbl = ind_tbl;
}
- if ((cmd_sz >= offsetof(typeof(*cmd), reserved1) +
- sizeof(cmd->reserved1)) && cmd->reserved1) {
+ if (cmd_sz > sizeof(*cmd) &&
+ !ib_is_udata_cleared(ucore, sizeof(*cmd),
+ cmd_sz - sizeof(*cmd))) {
ret = -EOPNOTSUPP;
goto err_put;
}
@@ -1482,11 +1483,21 @@ static int create_qp(struct ib_uverbs_file *file,
IB_QP_CREATE_MANAGED_SEND |
IB_QP_CREATE_MANAGED_RECV |
IB_QP_CREATE_SCATTER_FCS |
- IB_QP_CREATE_CVLAN_STRIPPING)) {
+ IB_QP_CREATE_CVLAN_STRIPPING |
+ IB_QP_CREATE_SOURCE_QPN)) {
ret = -EINVAL;
goto err_put;
}
+ if (attr.create_flags & IB_QP_CREATE_SOURCE_QPN) {
+ if (!capable(CAP_NET_RAW)) {
+ ret = -EPERM;
+ goto err_put;
+ }
+
+ attr.source_qpn = cmd->source_qpn;
+ }
+
buf = (void *)cmd + sizeof(*cmd);
if (cmd_sz > sizeof(*cmd))
if (!(buf[0] == 0 && !memcmp(buf, buf + 1,
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 40de69b..3d1de62 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -1244,6 +1244,18 @@ int ib_resolve_eth_dmac(struct ib_device *device,
if (rdma_link_local_addr((struct in6_addr *)grh->dgid.raw)) {
rdma_get_ll_mac((struct in6_addr *)grh->dgid.raw,
ah_attr->roce.dmac);
+ return 0;
+ }
+ if (rdma_is_multicast_addr((struct in6_addr *)ah_attr->grh.dgid.raw)) {
+ if (ipv6_addr_v4mapped((struct in6_addr *)ah_attr->grh.dgid.raw)) {
+ __be32 addr = 0;
+
+ memcpy(&addr, ah_attr->grh.dgid.raw + 12, 4);
+ ip_eth_mc_map(addr, (char *)ah_attr->roce.dmac);
+ } else {
+ ipv6_eth_mc_map((struct in6_addr *)ah_attr->grh.dgid.raw,
+ (char *)ah_attr->roce.dmac);
+ }
} else {
union ib_gid sgid;
struct ib_gid_attr sgid_attr;
@@ -1624,15 +1636,53 @@ EXPORT_SYMBOL(ib_dealloc_fmr);
/* Multicast groups */
+static bool is_valid_mcast_lid(struct ib_qp *qp, u16 lid)
+{
+ struct ib_qp_init_attr init_attr = {};
+ struct ib_qp_attr attr = {};
+ int num_eth_ports = 0;
+ int port;
+
+ /* If QP state >= init, it is assigned to a port and we can check this
+ * port only.
+ */
+ if (!ib_query_qp(qp, &attr, IB_QP_STATE | IB_QP_PORT, &init_attr)) {
+ if (attr.qp_state >= IB_QPS_INIT) {
+ if (qp->device->get_link_layer(qp->device, attr.port_num) !=
+ IB_LINK_LAYER_INFINIBAND)
+ return true;
+ goto lid_check;
+ }
+ }
+
+ /* Can't get a quick answer, iterate over all ports */
+ for (port = 0; port < qp->device->phys_port_cnt; port++)
+ if (qp->device->get_link_layer(qp->device, port) !=
+ IB_LINK_LAYER_INFINIBAND)
+ num_eth_ports++;
+
+ /* If we have at lease one Ethernet port, RoCE annex declares that
+ * multicast LID should be ignored. We can't tell at this step if the
+ * QP belongs to an IB or Ethernet port.
+ */
+ if (num_eth_ports)
+ return true;
+
+ /* If all the ports are IB, we can check according to IB spec. */
+lid_check:
+ return !(lid < be16_to_cpu(IB_MULTICAST_LID_BASE) ||
+ lid == be16_to_cpu(IB_LID_PERMISSIVE));
+}
+
int ib_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid)
{
int ret;
if (!qp->device->attach_mcast)
return -ENOSYS;
- if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD ||
- lid < be16_to_cpu(IB_MULTICAST_LID_BASE) ||
- lid == be16_to_cpu(IB_LID_PERMISSIVE))
+
+ if (!rdma_is_multicast_addr((struct in6_addr *)gid->raw) ||
+ qp->qp_type != IB_QPT_UD || !is_valid_mcast_lid(qp, lid))
return -EINVAL;
ret = qp->device->attach_mcast(qp, gid, lid);
@@ -1648,9 +1698,9 @@ int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid)
if (!qp->device->detach_mcast)
return -ENOSYS;
- if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD ||
- lid < be16_to_cpu(IB_MULTICAST_LID_BASE) ||
- lid == be16_to_cpu(IB_LID_PERMISSIVE))
+
+ if (!rdma_is_multicast_addr((struct in6_addr *)gid->raw) ||
+ qp->qp_type != IB_QPT_UD || !is_valid_mcast_lid(qp, lid))
return -EINVAL;
ret = qp->device->detach_mcast(qp, gid, lid);
diff --git a/drivers/infiniband/hw/hfi1/Makefile b/drivers/infiniband/hw/hfi1/Makefile
index 88085f6..66d538c 100644
--- a/drivers/infiniband/hw/hfi1/Makefile
+++ b/drivers/infiniband/hw/hfi1/Makefile
@@ -8,7 +8,7 @@
obj-$(CONFIG_INFINIBAND_HFI1) += hfi1.o
hfi1-y := affinity.o chip.o device.o driver.o efivar.o \
- eprom.o file_ops.o firmware.o \
+ eprom.o exp_rcv.o file_ops.o firmware.o \
init.o intr.o mad.o mmu_rb.o pcie.o pio.o pio_copy.o platform.o \
qp.o qsfp.o rc.o ruc.o sdma.o sysfs.o trace.o \
uc.o ud.o user_exp_rcv.o user_pages.o user_sdma.o verbs.o \
diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c
index e2cd2cd..a97055d 100644
--- a/drivers/infiniband/hw/hfi1/affinity.c
+++ b/drivers/infiniband/hw/hfi1/affinity.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015 - 2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -335,10 +335,10 @@ static void hfi1_update_sdma_affinity(struct hfi1_msix_entry *msix, int cpu)
sde->cpu = cpu;
cpumask_clear(&msix->mask);
cpumask_set_cpu(cpu, &msix->mask);
- dd_dev_dbg(dd, "IRQ vector: %u, type %s engine %u -> cpu: %d\n",
- msix->msix.vector, irq_type_names[msix->type],
+ dd_dev_dbg(dd, "IRQ: %u, type %s engine %u -> cpu: %d\n",
+ msix->irq, irq_type_names[msix->type],
sde->this_idx, cpu);
- irq_set_affinity_hint(msix->msix.vector, &msix->mask);
+ irq_set_affinity_hint(msix->irq, &msix->mask);
/*
* Set the new cpu in the hfi1_affinity_node and clean
@@ -387,7 +387,7 @@ static void hfi1_setup_sdma_notifier(struct hfi1_msix_entry *msix)
{
struct irq_affinity_notify *notify = &msix->notify;
- notify->irq = msix->msix.vector;
+ notify->irq = msix->irq;
notify->notify = hfi1_irq_notifier_notify;
notify->release = hfi1_irq_notifier_release;
@@ -472,10 +472,10 @@ static int get_irq_affinity(struct hfi1_devdata *dd,
}
cpumask_set_cpu(cpu, &msix->mask);
- dd_dev_info(dd, "IRQ vector: %u, type %s %s -> cpu: %d\n",
- msix->msix.vector, irq_type_names[msix->type],
+ dd_dev_info(dd, "IRQ: %u, type %s %s -> cpu: %d\n",
+ msix->irq, irq_type_names[msix->type],
extra, cpu);
- irq_set_affinity_hint(msix->msix.vector, &msix->mask);
+ irq_set_affinity_hint(msix->irq, &msix->mask);
if (msix->type == IRQ_SDMA) {
sde->cpu = cpu;
@@ -533,7 +533,7 @@ void hfi1_put_irq_affinity(struct hfi1_devdata *dd,
}
}
- irq_set_affinity_hint(msix->msix.vector, NULL);
+ irq_set_affinity_hint(msix->irq, NULL);
cpumask_clear(&msix->mask);
mutex_unlock(&node_affinity.lock);
}
diff --git a/drivers/infiniband/hw/hfi1/affinity.h b/drivers/infiniband/hw/hfi1/affinity.h
index e78c7aa..2a1e374 100644
--- a/drivers/infiniband/hw/hfi1/affinity.h
+++ b/drivers/infiniband/hw/hfi1/affinity.h
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015 - 2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -75,24 +75,26 @@ struct hfi1_msix_entry;
/* Initialize non-HT cpu cores mask */
void init_real_cpu_mask(void);
/* Initialize driver affinity data */
-int hfi1_dev_affinity_init(struct hfi1_devdata *);
+int hfi1_dev_affinity_init(struct hfi1_devdata *dd);
/*
* Set IRQ affinity to a CPU. The function will determine the
* CPU and set the affinity to it.
*/
-int hfi1_get_irq_affinity(struct hfi1_devdata *, struct hfi1_msix_entry *);
+int hfi1_get_irq_affinity(struct hfi1_devdata *dd,
+ struct hfi1_msix_entry *msix);
/*
* Remove the IRQ's CPU affinity. This function also updates
* any internal CPU tracking data
*/
-void hfi1_put_irq_affinity(struct hfi1_devdata *, struct hfi1_msix_entry *);
+void hfi1_put_irq_affinity(struct hfi1_devdata *dd,
+ struct hfi1_msix_entry *msix);
/*
* Determine a CPU affinity for a user process, if the process does not
* have an affinity set yet.
*/
-int hfi1_get_proc_affinity(int);
+int hfi1_get_proc_affinity(int node);
/* Release a CPU used by a user process. */
-void hfi1_put_proc_affinity(int);
+void hfi1_put_proc_affinity(int cpu);
struct hfi1_affinity_node {
int node;
diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index 94b54850..937350d 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -1012,14 +1012,15 @@ static struct flag_table dc8051_info_err_flags[] = {
*/
static struct flag_table dc8051_info_host_msg_flags[] = {
FLAG_ENTRY0("Host request done", 0x0001),
- FLAG_ENTRY0("BC SMA message", 0x0002),
- FLAG_ENTRY0("BC PWR_MGM message", 0x0004),
+ FLAG_ENTRY0("BC PWR_MGM message", 0x0002),
+ FLAG_ENTRY0("BC SMA message", 0x0004),
FLAG_ENTRY0("BC Unknown message (BCC)", 0x0008),
FLAG_ENTRY0("BC Unknown message (LCB)", 0x0010),
FLAG_ENTRY0("External device config request", 0x0020),
FLAG_ENTRY0("VerifyCap all frames received", 0x0040),
FLAG_ENTRY0("LinkUp achieved", 0x0080),
FLAG_ENTRY0("Link going down", 0x0100),
+ FLAG_ENTRY0("Link width downgraded", 0x0200),
};
static u32 encoded_size(u32 size);
@@ -1066,6 +1067,8 @@ static int thermal_init(struct hfi1_devdata *dd);
static int wait_logical_linkstate(struct hfi1_pportdata *ppd, u32 state,
int msecs);
+static int wait_physical_linkstate(struct hfi1_pportdata *ppd, u32 state,
+ int msecs);
static void read_planned_down_reason_code(struct hfi1_devdata *dd, u8 *pdrrc);
static void read_link_down_reason(struct hfi1_devdata *dd, u8 *ldr);
static void handle_temp_err(struct hfi1_devdata *dd);
@@ -6906,7 +6909,7 @@ static void reset_neighbor_info(struct hfi1_pportdata *ppd)
static const char * const link_down_reason_strs[] = {
[OPA_LINKDOWN_REASON_NONE] = "None",
- [OPA_LINKDOWN_REASON_RCV_ERROR_0] = "Recive error 0",
+ [OPA_LINKDOWN_REASON_RCV_ERROR_0] = "Receive error 0",
[OPA_LINKDOWN_REASON_BAD_PKT_LEN] = "Bad packet length",
[OPA_LINKDOWN_REASON_PKT_TOO_LONG] = "Packet too long",
[OPA_LINKDOWN_REASON_PKT_TOO_SHORT] = "Packet too short",
@@ -9373,13 +9376,13 @@ static int handle_qsfp_error_conditions(struct hfi1_pportdata *ppd,
if ((qsfp_interrupt_status[0] & QSFP_HIGH_TEMP_ALARM) ||
(qsfp_interrupt_status[0] & QSFP_HIGH_TEMP_WARNING))
- dd_dev_info(dd, "%s: QSFP cable temperature too high\n",
- __func__);
+ dd_dev_err(dd, "%s: QSFP cable temperature too high\n",
+ __func__);
if ((qsfp_interrupt_status[0] & QSFP_LOW_TEMP_ALARM) ||
(qsfp_interrupt_status[0] & QSFP_LOW_TEMP_WARNING))
- dd_dev_info(dd, "%s: QSFP cable temperature too low\n",
- __func__);
+ dd_dev_err(dd, "%s: QSFP cable temperature too low\n",
+ __func__);
/*
* The remaining alarms/warnings don't matter if the link is down.
@@ -9389,75 +9392,75 @@ static int handle_qsfp_error_conditions(struct hfi1_pportdata *ppd,
if ((qsfp_interrupt_status[1] & QSFP_HIGH_VCC_ALARM) ||
(qsfp_interrupt_status[1] & QSFP_HIGH_VCC_WARNING))
- dd_dev_info(dd, "%s: QSFP supply voltage too high\n",
- __func__);
+ dd_dev_err(dd, "%s: QSFP supply voltage too high\n",
+ __func__);
if ((qsfp_interrupt_status[1] & QSFP_LOW_VCC_ALARM) ||
(qsfp_interrupt_status[1] & QSFP_LOW_VCC_WARNING))
- dd_dev_info(dd, "%s: QSFP supply voltage too low\n",
- __func__);
+ dd_dev_err(dd, "%s: QSFP supply voltage too low\n",
+ __func__);
/* Byte 2 is vendor specific */
if ((qsfp_interrupt_status[3] & QSFP_HIGH_POWER_ALARM) ||
(qsfp_interrupt_status[3] & QSFP_HIGH_POWER_WARNING))
- dd_dev_info(dd, "%s: Cable RX channel 1/2 power too high\n",
- __func__);
+ dd_dev_err(dd, "%s: Cable RX channel 1/2 power too high\n",
+ __func__);
if ((qsfp_interrupt_status[3] & QSFP_LOW_POWER_ALARM) ||
(qsfp_interrupt_status[3] & QSFP_LOW_POWER_WARNING))
- dd_dev_info(dd, "%s: Cable RX channel 1/2 power too low\n",
- __func__);
+ dd_dev_err(dd, "%s: Cable RX channel 1/2 power too low\n",
+ __func__);
if ((qsfp_interrupt_status[4] & QSFP_HIGH_POWER_ALARM) ||
(qsfp_interrupt_status[4] & QSFP_HIGH_POWER_WARNING))
- dd_dev_info(dd, "%s: Cable RX channel 3/4 power too high\n",
- __func__);
+ dd_dev_err(dd, "%s: Cable RX channel 3/4 power too high\n",
+ __func__);
if ((qsfp_interrupt_status[4] & QSFP_LOW_POWER_ALARM) ||
(qsfp_interrupt_status[4] & QSFP_LOW_POWER_WARNING))
- dd_dev_info(dd, "%s: Cable RX channel 3/4 power too low\n",
- __func__);
+ dd_dev_err(dd, "%s: Cable RX channel 3/4 power too low\n",
+ __func__);
if ((qsfp_interrupt_status[5] & QSFP_HIGH_BIAS_ALARM) ||
(qsfp_interrupt_status[5] & QSFP_HIGH_BIAS_WARNING))
- dd_dev_info(dd, "%s: Cable TX channel 1/2 bias too high\n",
- __func__);
+ dd_dev_err(dd, "%s: Cable TX channel 1/2 bias too high\n",
+ __func__);
if ((qsfp_interrupt_status[5] & QSFP_LOW_BIAS_ALARM) ||
(qsfp_interrupt_status[5] & QSFP_LOW_BIAS_WARNING))
- dd_dev_info(dd, "%s: Cable TX channel 1/2 bias too low\n",
- __func__);
+ dd_dev_err(dd, "%s: Cable TX channel 1/2 bias too low\n",
+ __func__);
if ((qsfp_interrupt_status[6] & QSFP_HIGH_BIAS_ALARM) ||
(qsfp_interrupt_status[6] & QSFP_HIGH_BIAS_WARNING))
- dd_dev_info(dd, "%s: Cable TX channel 3/4 bias too high\n",
- __func__);
+ dd_dev_err(dd, "%s: Cable TX channel 3/4 bias too high\n",
+ __func__);
if ((qsfp_interrupt_status[6] & QSFP_LOW_BIAS_ALARM) ||
(qsfp_interrupt_status[6] & QSFP_LOW_BIAS_WARNING))
- dd_dev_info(dd, "%s: Cable TX channel 3/4 bias too low\n",
- __func__);
+ dd_dev_err(dd, "%s: Cable TX channel 3/4 bias too low\n",
+ __func__);
if ((qsfp_interrupt_status[7] & QSFP_HIGH_POWER_ALARM) ||
(qsfp_interrupt_status[7] & QSFP_HIGH_POWER_WARNING))
- dd_dev_info(dd, "%s: Cable TX channel 1/2 power too high\n",
- __func__);
+ dd_dev_err(dd, "%s: Cable TX channel 1/2 power too high\n",
+ __func__);
if ((qsfp_interrupt_status[7] & QSFP_LOW_POWER_ALARM) ||
(qsfp_interrupt_status[7] & QSFP_LOW_POWER_WARNING))
- dd_dev_info(dd, "%s: Cable TX channel 1/2 power too low\n",
- __func__);
+ dd_dev_err(dd, "%s: Cable TX channel 1/2 power too low\n",
+ __func__);
if ((qsfp_interrupt_status[8] & QSFP_HIGH_POWER_ALARM) ||
(qsfp_interrupt_status[8] & QSFP_HIGH_POWER_WARNING))
- dd_dev_info(dd, "%s: Cable TX channel 3/4 power too high\n",
- __func__);
+ dd_dev_err(dd, "%s: Cable TX channel 3/4 power too high\n",
+ __func__);
if ((qsfp_interrupt_status[8] & QSFP_LOW_POWER_ALARM) ||
(qsfp_interrupt_status[8] & QSFP_LOW_POWER_WARNING))
- dd_dev_info(dd, "%s: Cable TX channel 3/4 power too low\n",
- __func__);
+ dd_dev_err(dd, "%s: Cable TX channel 3/4 power too low\n",
+ __func__);
/* Bytes 9-10 and 11-12 are reserved */
/* Bytes 13-15 are vendor specific */
@@ -9742,17 +9745,6 @@ static inline int init_cpu_counters(struct hfi1_devdata *dd)
return 0;
}
-static const char * const pt_names[] = {
- "expected",
- "eager",
- "invalid"
-};
-
-static const char *pt_name(u32 type)
-{
- return type >= ARRAY_SIZE(pt_names) ? "unknown" : pt_names[type];
-}
-
/*
* index is the index into the receive array
*/
@@ -9774,15 +9766,14 @@ void hfi1_put_tid(struct hfi1_devdata *dd, u32 index,
type, index);
goto done;
}
-
- hfi1_cdbg(TID, "type %s, index 0x%x, pa 0x%lx, bsize 0x%lx",
- pt_name(type), index, pa, (unsigned long)order);
+ trace_hfi1_put_tid(dd, index, type, pa, order);
#define RT_ADDR_SHIFT 12 /* 4KB kernel address boundary */
reg = RCV_ARRAY_RT_WRITE_ENABLE_SMASK
| (u64)order << RCV_ARRAY_RT_BUF_SIZE_SHIFT
| ((pa >> RT_ADDR_SHIFT) & RCV_ARRAY_RT_ADDR_MASK)
<< RCV_ARRAY_RT_ADDR_SHIFT;
+ trace_hfi1_write_rcvarray(base + (index * 8), reg);
writeq(reg, base + (index * 8));
if (type == PT_EAGER)
@@ -9810,15 +9801,6 @@ void hfi1_clear_tids(struct hfi1_ctxtdata *rcd)
hfi1_put_tid(dd, i, PT_INVALID, 0, 0);
}
-struct ib_header *hfi1_get_msgheader(
- struct hfi1_devdata *dd, __le32 *rhf_addr)
-{
- u32 offset = rhf_hdrq_offset(rhf_to_cpu(rhf_addr));
-
- return (struct ib_header *)
- (rhf_addr - dd->rhf_offset + offset);
-}
-
static const char * const ib_cfg_name_strings[] = {
"HFI1_IB_CFG_LIDLMC",
"HFI1_IB_CFG_LWID_DG_ENB",
@@ -10037,28 +10019,6 @@ static void set_lidlmc(struct hfi1_pportdata *ppd)
sdma_update_lmc(dd, mask, ppd->lid);
}
-static int wait_phy_linkstate(struct hfi1_devdata *dd, u32 state, u32 msecs)
-{
- unsigned long timeout;
- u32 curr_state;
-
- timeout = jiffies + msecs_to_jiffies(msecs);
- while (1) {
- curr_state = read_physical_state(dd);
- if (curr_state == state)
- break;
- if (time_after(jiffies, timeout)) {
- dd_dev_err(dd,
- "timeout waiting for phy link state 0x%x, current state is 0x%x\n",
- state, curr_state);
- return -ETIMEDOUT;
- }
- usleep_range(1950, 2050); /* sleep 2ms-ish */
- }
-
- return 0;
-}
-
static const char *state_completed_string(u32 completed)
{
static const char * const state_completed[] = {
@@ -10292,7 +10252,7 @@ static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason)
if (do_wait) {
/* it can take a while for the link to go down */
- ret = wait_phy_linkstate(dd, PLS_OFFLINE, 10000);
+ ret = wait_physical_linkstate(ppd, PLS_OFFLINE, 10000);
if (ret < 0)
return ret;
}
@@ -10545,6 +10505,19 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
goto unexpected;
}
+ /*
+ * Wait for Link_Up physical state.
+ * Physical and Logical states should already be
+ * be transitioned to LinkUp and LinkInit respectively.
+ */
+ ret = wait_physical_linkstate(ppd, PLS_LINKUP, 1000);
+ if (ret) {
+ dd_dev_err(dd,
+ "%s: physical state did not change to LINK-UP\n",
+ __func__);
+ break;
+ }
+
ret = wait_logical_linkstate(ppd, IB_PORT_INIT, 1000);
if (ret) {
dd_dev_err(dd,
@@ -10658,6 +10631,8 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
*/
if (ret)
goto_offline(ppd, 0);
+ else
+ cache_physical_state(ppd);
break;
case HLS_DN_DISABLE:
/* link is disabled */
@@ -10682,6 +10657,13 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
ret = -EINVAL;
break;
}
+ ret = wait_physical_linkstate(ppd, PLS_DISABLED, 10000);
+ if (ret) {
+ dd_dev_err(dd,
+ "%s: physical state did not change to DISABLED\n",
+ __func__);
+ break;
+ }
dc_shutdown(dd);
}
ppd->host_link_state = HLS_DN_DISABLE;
@@ -10699,6 +10681,7 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
if (ppd->host_link_state != HLS_DN_POLL)
goto unexpected;
ppd->host_link_state = HLS_VERIFY_CAP;
+ cache_physical_state(ppd);
break;
case HLS_GOING_UP:
if (ppd->host_link_state != HLS_VERIFY_CAP)
@@ -12672,21 +12655,56 @@ static int wait_logical_linkstate(struct hfi1_pportdata *ppd, u32 state,
return -ETIMEDOUT;
}
-u8 hfi1_ibphys_portstate(struct hfi1_pportdata *ppd)
+/*
+ * Read the physical hardware link state and set the driver's cached value
+ * of it.
+ */
+void cache_physical_state(struct hfi1_pportdata *ppd)
{
- u32 pstate;
+ u32 read_pstate;
u32 ib_pstate;
- pstate = read_physical_state(ppd->dd);
- ib_pstate = chip_to_opa_pstate(ppd->dd, pstate);
- if (ppd->last_pstate != ib_pstate) {
+ read_pstate = read_physical_state(ppd->dd);
+ ib_pstate = chip_to_opa_pstate(ppd->dd, read_pstate);
+ /* check if OPA pstate changed */
+ if (chip_to_opa_pstate(ppd->dd, ppd->pstate) != ib_pstate) {
dd_dev_info(ppd->dd,
"%s: physical state changed to %s (0x%x), phy 0x%x\n",
__func__, opa_pstate_name(ib_pstate), ib_pstate,
- pstate);
- ppd->last_pstate = ib_pstate;
+ read_pstate);
}
- return ib_pstate;
+ ppd->pstate = read_pstate;
+}
+
+/*
+ * wait_physical_linkstate - wait for an physical link state change to occur
+ * @ppd: port device
+ * @state: the state to wait for
+ * @msecs: the number of milliseconds to wait
+ *
+ * Wait up to msecs milliseconds for physical link state change to occur.
+ * Returns 0 if state reached, otherwise -ETIMEDOUT.
+ */
+static int wait_physical_linkstate(struct hfi1_pportdata *ppd, u32 state,
+ int msecs)
+{
+ unsigned long timeout;
+
+ timeout = jiffies + msecs_to_jiffies(msecs);
+ while (1) {
+ cache_physical_state(ppd);
+ if (ppd->pstate == state)
+ break;
+ if (time_after(jiffies, timeout)) {
+ dd_dev_err(ppd->dd,
+ "timeout waiting for phy link state 0x%x, current state is 0x%x\n",
+ state, ppd->pstate);
+ return -ETIMEDOUT;
+ }
+ usleep_range(1950, 2050); /* sleep 2ms-ish */
+ }
+
+ return 0;
}
#define CLEAR_STATIC_RATE_CONTROL_SMASK(r) \
@@ -12809,30 +12827,24 @@ static void clean_up_interrupts(struct hfi1_devdata *dd)
for (i = 0; i < dd->num_msix_entries; i++, me++) {
if (!me->arg) /* => no irq, no affinity */
continue;
- hfi1_put_irq_affinity(dd, &dd->msix_entries[i]);
- free_irq(me->msix.vector, me->arg);
+ hfi1_put_irq_affinity(dd, me);
+ free_irq(me->irq, me->arg);
}
+
+ /* clean structures */
+ kfree(dd->msix_entries);
+ dd->msix_entries = NULL;
+ dd->num_msix_entries = 0;
} else {
/* INTx */
if (dd->requested_intx_irq) {
free_irq(dd->pcidev->irq, dd);
dd->requested_intx_irq = 0;
}
- }
-
- /* turn off interrupts */
- if (dd->num_msix_entries) {
- /* MSI-X */
- pci_disable_msix(dd->pcidev);
- } else {
- /* INTx */
disable_intx(dd->pcidev);
}
- /* clean structures */
- kfree(dd->msix_entries);
- dd->msix_entries = NULL;
- dd->num_msix_entries = 0;
+ pci_free_irq_vectors(dd->pcidev);
}
/*
@@ -12986,13 +12998,21 @@ static int request_msix_irqs(struct hfi1_devdata *dd)
continue;
/* make sure the name is terminated */
me->name[sizeof(me->name) - 1] = 0;
+ me->irq = pci_irq_vector(dd->pcidev, i);
+ /*
+ * On err return me->irq. Don't need to clear this
+ * because 'arg' has not been set, and cleanup will
+ * do the right thing.
+ */
+ if (me->irq < 0)
+ return me->irq;
- ret = request_threaded_irq(me->msix.vector, handler, thread, 0,
+ ret = request_threaded_irq(me->irq, handler, thread, 0,
me->name, arg);
if (ret) {
dd_dev_err(dd,
- "unable to allocate %s interrupt, vector %d, index %d, err %d\n",
- err_info, me->msix.vector, idx, ret);
+ "unable to allocate %s interrupt, irq %d, index %d, err %d\n",
+ err_info, me->irq, idx, ret);
return ret;
}
/*
@@ -13003,8 +13023,7 @@ static int request_msix_irqs(struct hfi1_devdata *dd)
ret = hfi1_get_irq_affinity(dd, me);
if (ret)
- dd_dev_err(dd,
- "unable to pin IRQ %d\n", ret);
+ dd_dev_err(dd, "unable to pin IRQ %d\n", ret);
}
return ret;
@@ -13023,7 +13042,7 @@ void hfi1_vnic_synchronize_irq(struct hfi1_devdata *dd)
struct hfi1_ctxtdata *rcd = dd->vnic.ctxt[i];
struct hfi1_msix_entry *me = &dd->msix_entries[rcd->msix_intr];
- synchronize_irq(me->msix.vector);
+ synchronize_irq(me->irq);
}
}
@@ -13036,7 +13055,7 @@ void hfi1_reset_vnic_msix_info(struct hfi1_ctxtdata *rcd)
return;
hfi1_put_irq_affinity(dd, me);
- free_irq(me->msix.vector, me->arg);
+ free_irq(me->irq, me->arg);
me->arg = NULL;
}
@@ -13064,14 +13083,19 @@ void hfi1_set_vnic_msix_info(struct hfi1_ctxtdata *rcd)
DRIVER_NAME "_%d kctxt%d", dd->unit, idx);
me->name[sizeof(me->name) - 1] = 0;
me->type = IRQ_RCVCTXT;
-
+ me->irq = pci_irq_vector(dd->pcidev, rcd->msix_intr);
+ if (me->irq < 0) {
+ dd_dev_err(dd, "vnic irq vector request (idx %d) fail %d\n",
+ idx, me->irq);
+ return;
+ }
remap_intr(dd, IS_RCVAVAIL_START + idx, rcd->msix_intr);
- ret = request_threaded_irq(me->msix.vector, receive_context_interrupt,
+ ret = request_threaded_irq(me->irq, receive_context_interrupt,
receive_context_thread, 0, me->name, arg);
if (ret) {
- dd_dev_err(dd, "vnic irq request (vector %d, idx %d) fail %d\n",
- me->msix.vector, idx, ret);
+ dd_dev_err(dd, "vnic irq request (irq %d, idx %d) fail %d\n",
+ me->irq, idx, ret);
return;
}
/*
@@ -13084,7 +13108,7 @@ void hfi1_set_vnic_msix_info(struct hfi1_ctxtdata *rcd)
if (ret) {
dd_dev_err(dd,
"unable to pin IRQ %d\n", ret);
- free_irq(me->msix.vector, me->arg);
+ free_irq(me->irq, me->arg);
}
}
@@ -13107,9 +13131,8 @@ static void reset_interrupts(struct hfi1_devdata *dd)
static int set_up_interrupts(struct hfi1_devdata *dd)
{
- struct hfi1_msix_entry *entries;
- u32 total, request;
- int i, ret;
+ u32 total;
+ int ret, request;
int single_interrupt = 0; /* we expect to have all the interrupts */
/*
@@ -13121,39 +13144,31 @@ static int set_up_interrupts(struct hfi1_devdata *dd)
*/
total = 1 + dd->num_sdma + dd->n_krcv_queues + HFI1_NUM_VNIC_CTXT;
- entries = kcalloc(total, sizeof(*entries), GFP_KERNEL);
- if (!entries) {
- ret = -ENOMEM;
- goto fail;
- }
- /* 1-1 MSI-X entry assignment */
- for (i = 0; i < total; i++)
- entries[i].msix.entry = i;
-
/* ask for MSI-X interrupts */
- request = total;
- request_msix(dd, &request, entries);
-
- if (request == 0) {
+ request = request_msix(dd, total);
+ if (request < 0) {
+ ret = request;
+ goto fail;
+ } else if (request == 0) {
/* using INTx */
/* dd->num_msix_entries already zero */
- kfree(entries);
single_interrupt = 1;
dd_dev_err(dd, "MSI-X failed, using INTx interrupts\n");
+ } else if (request < total) {
+ /* using MSI-X, with reduced interrupts */
+ dd_dev_err(dd, "reduced interrupt found, wanted %u, got %u\n",
+ total, request);
+ ret = -EINVAL;
+ goto fail;
} else {
- /* using MSI-X */
- dd->num_msix_entries = request;
- dd->msix_entries = entries;
-
- if (request != total) {
- /* using MSI-X, with reduced interrupts */
- dd_dev_err(
- dd,
- "cannot handle reduced interrupt case, want %u, got %u\n",
- total, request);
- ret = -EINVAL;
+ dd->msix_entries = kcalloc(total, sizeof(*dd->msix_entries),
+ GFP_KERNEL);
+ if (!dd->msix_entries) {
+ ret = -ENOMEM;
goto fail;
}
+ /* using MSI-X */
+ dd->num_msix_entries = total;
dd_dev_info(dd, "%u MSI-X interrupts allocated\n", total);
}
@@ -14793,7 +14808,7 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
/* start in offline */
ppd->host_link_state = HLS_DN_OFFLINE;
init_vl_arb_caches(ppd);
- ppd->last_pstate = 0xff; /* invalid value */
+ ppd->pstate = PLS_OFFLINE;
}
dd->link_default = HLS_DN_POLL;
diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h
index cbe455d..3dab315 100644
--- a/drivers/infiniband/hw/hfi1/chip.h
+++ b/drivers/infiniband/hw/hfi1/chip.h
@@ -744,6 +744,7 @@ int is_bx(struct hfi1_devdata *dd);
u32 read_physical_state(struct hfi1_devdata *dd);
u32 chip_to_opa_pstate(struct hfi1_devdata *dd, u32 chip_pstate);
u32 get_logical_state(struct hfi1_pportdata *ppd);
+void cache_physical_state(struct hfi1_pportdata *ppd);
const char *opa_lstate_name(u32 lstate);
const char *opa_pstate_name(u32 pstate);
u32 driver_physical_state(struct hfi1_pportdata *ppd);
@@ -1347,8 +1348,6 @@ enum {
u64 get_all_cpu_total(u64 __percpu *cntr);
void hfi1_start_cleanup(struct hfi1_devdata *dd);
void hfi1_clear_tids(struct hfi1_ctxtdata *rcd);
-struct ib_header *hfi1_get_msgheader(
- struct hfi1_devdata *dd, __le32 *rhf_addr);
void hfi1_init_ctxt(struct send_context *sc);
void hfi1_put_tid(struct hfi1_devdata *dd, u32 index,
u32 type, unsigned long pa, u16 order);
@@ -1356,7 +1355,6 @@ void hfi1_quiet_serdes(struct hfi1_pportdata *ppd);
void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt);
u32 hfi1_read_cntrs(struct hfi1_devdata *dd, char **namep, u64 **cntrp);
u32 hfi1_read_portcntrs(struct hfi1_pportdata *ppd, char **namep, u64 **cntrp);
-u8 hfi1_ibphys_portstate(struct hfi1_pportdata *ppd);
int hfi1_get_ib_cfg(struct hfi1_pportdata *ppd, int which);
int hfi1_set_ib_cfg(struct hfi1_pportdata *ppd, int which, u32 val);
int hfi1_set_ctxt_jkey(struct hfi1_devdata *dd, unsigned ctxt, u16 jkey);
diff --git a/drivers/infiniband/hw/hfi1/common.h b/drivers/infiniband/hw/hfi1/common.h
index 995d62c..ba9ab97 100644
--- a/drivers/infiniband/hw/hfi1/common.h
+++ b/drivers/infiniband/hw/hfi1/common.h
@@ -325,6 +325,7 @@ struct diag_pkt {
#define HFI1_LRH_BTH 0x0002 /* 1. word of IB LRH - next header: BTH */
/* misc. */
+#define SC15_PACKET 0xF
#define SIZE_OF_CRC 1
#define LIM_MGMT_P_KEY 0x7FFF
diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c
index 79a8b05..26628cb 100644
--- a/drivers/infiniband/hw/hfi1/driver.c
+++ b/drivers/infiniband/hw/hfi1/driver.c
@@ -223,6 +223,20 @@ static inline void *get_egrbuf(const struct hfi1_ctxtdata *rcd, u64 rhf,
(offset * RCV_BUF_BLOCK_SIZE));
}
+static inline void *hfi1_get_header(struct hfi1_devdata *dd,
+ __le32 *rhf_addr)
+{
+ u32 offset = rhf_hdrq_offset(rhf_to_cpu(rhf_addr));
+
+ return (void *)(rhf_addr - dd->rhf_offset + offset);
+}
+
+static inline struct ib_header *hfi1_get_msgheader(struct hfi1_devdata *dd,
+ __le32 *rhf_addr)
+{
+ return (struct ib_header *)hfi1_get_header(dd, rhf_addr);
+}
+
/*
* Validate and encode the a given RcvArray Buffer size.
* The function will check whether the given size falls within
@@ -248,7 +262,8 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
{
struct ib_header *rhdr = packet->hdr;
u32 rte = rhf_rcv_type_err(packet->rhf);
- int lnh = ib_get_lnh(rhdr);
+ u8 lnh = ib_get_lnh(rhdr);
+ bool has_grh = false;
struct hfi1_ibport *ibp = rcd_to_iport(rcd);
struct hfi1_devdata *dd = ppd->dd;
struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
@@ -256,37 +271,42 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
if (packet->rhf & (RHF_VCRC_ERR | RHF_ICRC_ERR))
return;
+ if (lnh == HFI1_LRH_BTH) {
+ packet->ohdr = &rhdr->u.oth;
+ } else if (lnh == HFI1_LRH_GRH) {
+ has_grh = true;
+ packet->ohdr = &rhdr->u.l.oth;
+ packet->grh = &rhdr->u.l.grh;
+ } else {
+ goto drop;
+ }
+
if (packet->rhf & RHF_TID_ERR) {
/* For TIDERR and RC QPs preemptively schedule a NAK */
- struct ib_other_headers *ohdr = NULL;
u32 tlen = rhf_pkt_len(packet->rhf); /* in bytes */
- u16 lid = ib_get_dlid(rhdr);
+ u32 dlid = ib_get_dlid(rhdr);
u32 qp_num;
- u32 rcv_flags = 0;
+ u32 mlid_base = be16_to_cpu(IB_MULTICAST_LID_BASE);
/* Sanity check packet */
if (tlen < 24)
goto drop;
/* Check for GRH */
- if (lnh == HFI1_LRH_BTH) {
- ohdr = &rhdr->u.oth;
- } else if (lnh == HFI1_LRH_GRH) {
+ if (has_grh) {
u32 vtf;
+ struct ib_grh *grh = packet->grh;
- ohdr = &rhdr->u.l.oth;
- if (rhdr->u.l.grh.next_hdr != IB_GRH_NEXT_HDR)
+ if (grh->next_hdr != IB_GRH_NEXT_HDR)
goto drop;
- vtf = be32_to_cpu(rhdr->u.l.grh.version_tclass_flow);
+ vtf = be32_to_cpu(grh->version_tclass_flow);
if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION)
goto drop;
- rcv_flags |= HFI1_HAS_GRH;
- } else {
- goto drop;
}
+
/* Get the destination QP number. */
- qp_num = be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK;
- if (lid < be16_to_cpu(IB_MULTICAST_LID_BASE)) {
+ qp_num = ib_bth_get_qpn(packet->ohdr);
+ if (dlid < mlid_base) {
struct rvt_qp *qp;
unsigned long flags;
@@ -311,11 +331,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
switch (qp->ibqp.qp_type) {
case IB_QPT_RC:
- hfi1_rc_hdrerr(
- rcd,
- rhdr,
- rcv_flags,
- qp);
+ hfi1_rc_hdrerr(rcd, packet, qp);
break;
default:
/* For now don't handle any other QP types */
@@ -331,9 +347,8 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
switch (rte) {
case RHF_RTE_ERROR_OP_CODE_ERR:
{
- u32 opcode;
void *ebuf = NULL;
- __be32 *bth = NULL;
+ u8 opcode;
if (rhf_use_egr_bfr(packet->rhf))
ebuf = packet->ebuf;
@@ -341,16 +356,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
if (!ebuf)
goto drop; /* this should never happen */
- if (lnh == HFI1_LRH_BTH)
- bth = (__be32 *)ebuf;
- else if (lnh == HFI1_LRH_GRH)
- bth = (__be32 *)((char *)ebuf + sizeof(struct ib_grh));
- else
- goto drop;
-
- opcode = be32_to_cpu(bth[0]) >> 24;
- opcode &= 0xff;
-
+ opcode = ib_bth_get_opcode(packet->ohdr);
if (opcode == IB_OPCODE_CNP) {
/*
* Only in pre-B0 h/w is the CNP_OPCODE handled
@@ -364,7 +370,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
sc5 = hfi1_9B_get_sc5(rhdr, packet->rhf);
sl = ibp->sc_to_sl[sc5];
- lqpn = be32_to_cpu(bth[1]) & RVT_QPN_MASK;
+ lqpn = ib_bth_get_qpn(packet->ohdr);
rcu_read_lock();
qp = rvt_lookup_qpn(rdi, &ibp->rvp, lqpn);
if (!qp) {
@@ -414,7 +420,6 @@ static inline void init_packet(struct hfi1_ctxtdata *rcd,
packet->rhf = rhf_to_cpu(packet->rhf_addr);
packet->rhqoff = rcd->head;
packet->numpkt = 0;
- packet->rcv_flags = 0;
}
void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt,
@@ -423,21 +428,18 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt,
struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
struct ib_header *hdr = pkt->hdr;
struct ib_other_headers *ohdr = pkt->ohdr;
- struct ib_grh *grh = NULL;
+ struct ib_grh *grh = pkt->grh;
u32 rqpn = 0, bth1;
u16 rlid, dlid = ib_get_dlid(hdr);
u8 sc, svc_type;
bool is_mcast = false;
- if (pkt->rcv_flags & HFI1_HAS_GRH)
- grh = &hdr->u.l.grh;
-
switch (qp->ibqp.qp_type) {
case IB_QPT_SMI:
case IB_QPT_GSI:
case IB_QPT_UD:
rlid = ib_get_slid(hdr);
- rqpn = be32_to_cpu(ohdr->u.ud.deth[1]) & RVT_QPN_MASK;
+ rqpn = ib_get_sqpn(ohdr);
svc_type = IB_CC_SVCTYPE_UD;
is_mcast = (dlid > be16_to_cpu(IB_MULTICAST_LID_BASE)) &&
(dlid != be16_to_cpu(IB_LID_PERMISSIVE));
@@ -460,7 +462,7 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt,
bth1 = be32_to_cpu(ohdr->bth[1]);
if (do_cnp && (bth1 & IB_FECN_SMASK)) {
- u16 pkey = (u16)be32_to_cpu(ohdr->bth[0]);
+ u16 pkey = ib_bth_get_pkey(ohdr);
return_cnp(ibp, qp, rqpn, pkey, dlid, rlid, sc, grh);
}
@@ -590,9 +592,10 @@ static void __prescan_rxq(struct hfi1_packet *packet)
if (lnh == HFI1_LRH_BTH) {
packet->ohdr = &hdr->u.oth;
+ packet->grh = NULL;
} else if (lnh == HFI1_LRH_GRH) {
packet->ohdr = &hdr->u.l.oth;
- packet->rcv_flags |= HFI1_HAS_GRH;
+ packet->grh = &hdr->u.l.grh;
} else {
goto next; /* just in case */
}
@@ -697,10 +700,8 @@ static inline int process_rcv_packet(struct hfi1_packet *packet, int thread)
{
int ret;
- packet->hdr = hfi1_get_msgheader(packet->rcd->dd,
- packet->rhf_addr);
- packet->hlen = (u8 *)packet->rhf_addr - (u8 *)packet->hdr;
packet->etype = rhf_rcv_type(packet->rhf);
+
/* total length */
packet->tlen = rhf_pkt_len(packet->rhf); /* in bytes */
/* retrieve eager buffer details */
@@ -758,7 +759,7 @@ static inline void process_rcv_update(int last, struct hfi1_packet *packet)
packet->etail, 0, 0);
packet->updegr = 0;
}
- packet->rcv_flags = 0;
+ packet->grh = NULL;
}
static inline void finish_packet(struct hfi1_packet *packet)
@@ -895,16 +896,21 @@ static inline int set_armed_to_active(struct hfi1_ctxtdata *rcd,
struct hfi1_devdata *dd)
{
struct work_struct *lsaw = &rcd->ppd->linkstate_active_work;
- struct ib_header *hdr = hfi1_get_msgheader(packet->rcd->dd,
- packet->rhf_addr);
u8 etype = rhf_rcv_type(packet->rhf);
+ u8 sc = SC15_PACKET;
- if (etype == RHF_RCV_TYPE_IB &&
- hfi1_9B_get_sc5(hdr, packet->rhf) != 0xf) {
- int hwstate = read_logical_state(dd);
+ if (etype == RHF_RCV_TYPE_IB) {
+ struct ib_header *hdr = hfi1_get_msgheader(packet->rcd->dd,
+ packet->rhf_addr);
+ sc = hfi1_9B_get_sc5(hdr, packet->rhf);
+ }
+ if (sc != SC15_PACKET) {
+ int hwstate = driver_lstate(rcd->ppd);
- if (hwstate != LSTATE_ACTIVE) {
- dd_dev_info(dd, "Unexpected link state %d\n", hwstate);
+ if (hwstate != IB_PORT_ACTIVE) {
+ dd_dev_info(dd,
+ "Unexpected link state %s\n",
+ opa_lstate_name(hwstate));
return 0;
}
@@ -1320,6 +1326,58 @@ int hfi1_reset_device(int unit)
return ret;
}
+static inline void hfi1_setup_ib_header(struct hfi1_packet *packet)
+{
+ packet->hdr = (struct hfi1_ib_message_header *)
+ hfi1_get_msgheader(packet->rcd->dd,
+ packet->rhf_addr);
+ packet->hlen = (u8 *)packet->rhf_addr - (u8 *)packet->hdr;
+}
+
+static int hfi1_setup_9B_packet(struct hfi1_packet *packet)
+{
+ struct hfi1_ibport *ibp = rcd_to_iport(packet->rcd);
+ struct ib_header *hdr;
+ u8 lnh;
+
+ hfi1_setup_ib_header(packet);
+ hdr = packet->hdr;
+
+ lnh = ib_get_lnh(hdr);
+ if (lnh == HFI1_LRH_BTH) {
+ packet->ohdr = &hdr->u.oth;
+ packet->grh = NULL;
+ } else if (lnh == HFI1_LRH_GRH) {
+ u32 vtf;
+
+ packet->ohdr = &hdr->u.l.oth;
+ packet->grh = &hdr->u.l.grh;
+ if (packet->grh->next_hdr != IB_GRH_NEXT_HDR)
+ goto drop;
+ vtf = be32_to_cpu(packet->grh->version_tclass_flow);
+ if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION)
+ goto drop;
+ } else {
+ goto drop;
+ }
+
+ /* Query commonly used fields from packet header */
+ packet->opcode = ib_bth_get_opcode(packet->ohdr);
+ packet->slid = ib_get_slid(hdr);
+ packet->dlid = ib_get_dlid(hdr);
+ packet->sl = ib_get_sl(hdr);
+ packet->sc = hfi1_9B_get_sc5(hdr, packet->rhf);
+ packet->pad = ib_bth_get_pad(packet->ohdr);
+ packet->extra_byte = 0;
+ packet->fecn = ib_bth_get_fecn(packet->ohdr);
+ packet->becn = ib_bth_get_becn(packet->ohdr);
+
+ return 0;
+drop:
+ ibp->rvp.n_pkt_drops++;
+ return -EINVAL;
+}
+
void handle_eflags(struct hfi1_packet *packet)
{
struct hfi1_ctxtdata *rcd = packet->rcd;
@@ -1350,6 +1408,9 @@ int process_receive_ib(struct hfi1_packet *packet)
if (unlikely(hfi1_dbg_fault_packet(packet)))
return RHF_RCV_CONTINUE;
+ if (hfi1_setup_9B_packet(packet))
+ return RHF_RCV_CONTINUE;
+
trace_hfi1_rcvhdr(packet->rcd->ppd->dd,
packet->rcd->ctxt,
rhf_err_flags(packet->rhf),
@@ -1421,6 +1482,7 @@ int process_receive_error(struct hfi1_packet *packet)
rhf_rcv_type_err(packet->rhf) == 3))
return RHF_RCV_CONTINUE;
+ hfi1_setup_ib_header(packet);
handle_eflags(packet);
if (unlikely(rhf_err_flags(packet->rhf)))
@@ -1434,6 +1496,8 @@ int kdeth_process_expected(struct hfi1_packet *packet)
{
if (unlikely(hfi1_dbg_fault_packet(packet)))
return RHF_RCV_CONTINUE;
+
+ hfi1_setup_ib_header(packet);
if (unlikely(rhf_err_flags(packet->rhf)))
handle_eflags(packet);
@@ -1444,6 +1508,7 @@ int kdeth_process_expected(struct hfi1_packet *packet)
int kdeth_process_eager(struct hfi1_packet *packet)
{
+ hfi1_setup_ib_header(packet);
if (unlikely(rhf_err_flags(packet->rhf)))
handle_eflags(packet);
if (unlikely(hfi1_dbg_fault_packet(packet)))
diff --git a/drivers/infiniband/hw/hfi1/eprom.c b/drivers/infiniband/hw/hfi1/eprom.c
index 26da124..d46b171 100644
--- a/drivers/infiniband/hw/hfi1/eprom.c
+++ b/drivers/infiniband/hw/hfi1/eprom.c
@@ -250,7 +250,6 @@ static int read_partition_platform_config(struct hfi1_devdata *dd, void **data,
{
void *buffer;
void *p;
- u32 length;
int ret;
buffer = kmalloc(P1_SIZE, GFP_KERNEL);
@@ -265,13 +264,13 @@ static int read_partition_platform_config(struct hfi1_devdata *dd, void **data,
/* scan for image magic that may trail the actual data */
p = strnstr(buffer, IMAGE_TRAIL_MAGIC, P1_SIZE);
- if (p)
- length = p - buffer;
- else
- length = P1_SIZE;
+ if (!p) {
+ kfree(buffer);
+ return -ENOENT;
+ }
*data = buffer;
- *size = length;
+ *size = p - buffer;
return 0;
}
diff --git a/drivers/infiniband/hw/hfi1/exp_rcv.c b/drivers/infiniband/hw/hfi1/exp_rcv.c
new file mode 100644
index 0000000..0af9167
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/exp_rcv.c
@@ -0,0 +1,114 @@
+/*
+ * Copyright(c) 2017 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include "exp_rcv.h"
+#include "trace.h"
+
+/**
+ * exp_tid_group_init - initialize exp_tid_set
+ * @set - the set
+ */
+void hfi1_exp_tid_group_init(struct exp_tid_set *set)
+{
+ INIT_LIST_HEAD(&set->list);
+ set->count = 0;
+}
+
+/**
+ * alloc_ctxt_rcv_groups - initialize expected receive groups
+ * @rcd - the context to add the groupings to
+ */
+int hfi1_alloc_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd)
+{
+ struct hfi1_devdata *dd = rcd->dd;
+ u32 tidbase;
+ struct tid_group *grp;
+ int i;
+
+ tidbase = rcd->expected_base;
+ for (i = 0; i < rcd->expected_count /
+ dd->rcv_entries.group_size; i++) {
+ grp = kzalloc(sizeof(*grp), GFP_KERNEL);
+ if (!grp)
+ goto bail;
+ grp->size = dd->rcv_entries.group_size;
+ grp->base = tidbase;
+ tid_group_add_tail(grp, &rcd->tid_group_list);
+ tidbase += dd->rcv_entries.group_size;
+ }
+
+ return 0;
+bail:
+ hfi1_free_ctxt_rcv_groups(rcd);
+ return -ENOMEM;
+}
+
+/**
+ * free_ctxt_rcv_groups - free expected receive groups
+ * @rcd - the context to free
+ *
+ * The routine dismantles the expect receive linked
+ * list and clears any tids associated with the receive
+ * context.
+ *
+ * This should only be called for kernel contexts and the
+ * a base user context.
+ */
+void hfi1_free_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd)
+{
+ struct tid_group *grp, *gptr;
+
+ WARN_ON(!EXP_TID_SET_EMPTY(rcd->tid_full_list));
+ WARN_ON(!EXP_TID_SET_EMPTY(rcd->tid_used_list));
+
+ list_for_each_entry_safe(grp, gptr, &rcd->tid_group_list.list, list) {
+ tid_group_remove(grp, &rcd->tid_group_list);
+ kfree(grp);
+ }
+
+ hfi1_clear_tids(rcd);
+}
diff --git a/drivers/infiniband/hw/hfi1/exp_rcv.h b/drivers/infiniband/hw/hfi1/exp_rcv.h
new file mode 100644
index 0000000..c7d02bcd
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/exp_rcv.h
@@ -0,0 +1,187 @@
+#ifndef _HFI1_EXP_RCV_H
+#define _HFI1_EXP_RCV_H
+/*
+ * Copyright(c) 2017 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include "hfi.h"
+
+#define EXP_TID_SET_EMPTY(set) (set.count == 0 && list_empty(&set.list))
+
+#define EXP_TID_TIDLEN_MASK 0x7FFULL
+#define EXP_TID_TIDLEN_SHIFT 0
+#define EXP_TID_TIDCTRL_MASK 0x3ULL
+#define EXP_TID_TIDCTRL_SHIFT 20
+#define EXP_TID_TIDIDX_MASK 0x3FFULL
+#define EXP_TID_TIDIDX_SHIFT 22
+#define EXP_TID_GET(tid, field) \
+ (((tid) >> EXP_TID_TID##field##_SHIFT) & EXP_TID_TID##field##_MASK)
+
+#define EXP_TID_SET(field, value) \
+ (((value) & EXP_TID_TID##field##_MASK) << \
+ EXP_TID_TID##field##_SHIFT)
+#define EXP_TID_CLEAR(tid, field) ({ \
+ (tid) &= ~(EXP_TID_TID##field##_MASK << \
+ EXP_TID_TID##field##_SHIFT); \
+ })
+#define EXP_TID_RESET(tid, field, value) do { \
+ EXP_TID_CLEAR(tid, field); \
+ (tid) |= EXP_TID_SET(field, (value)); \
+ } while (0)
+
+/*
+ * Define fields in the KDETH header so we can update the header
+ * template.
+ */
+#define KDETH_OFFSET_SHIFT 0
+#define KDETH_OFFSET_MASK 0x7fff
+#define KDETH_OM_SHIFT 15
+#define KDETH_OM_MASK 0x1
+#define KDETH_TID_SHIFT 16
+#define KDETH_TID_MASK 0x3ff
+#define KDETH_TIDCTRL_SHIFT 26
+#define KDETH_TIDCTRL_MASK 0x3
+#define KDETH_INTR_SHIFT 28
+#define KDETH_INTR_MASK 0x1
+#define KDETH_SH_SHIFT 29
+#define KDETH_SH_MASK 0x1
+#define KDETH_KVER_SHIFT 30
+#define KDETH_KVER_MASK 0x3
+#define KDETH_JKEY_SHIFT 0x0
+#define KDETH_JKEY_MASK 0xff
+#define KDETH_HCRC_UPPER_SHIFT 16
+#define KDETH_HCRC_UPPER_MASK 0xff
+#define KDETH_HCRC_LOWER_SHIFT 24
+#define KDETH_HCRC_LOWER_MASK 0xff
+
+#define KDETH_GET(val, field) \
+ (((le32_to_cpu((val))) >> KDETH_##field##_SHIFT) & KDETH_##field##_MASK)
+#define KDETH_SET(dw, field, val) do { \
+ u32 dwval = le32_to_cpu(dw); \
+ dwval &= ~(KDETH_##field##_MASK << KDETH_##field##_SHIFT); \
+ dwval |= (((val) & KDETH_##field##_MASK) << \
+ KDETH_##field##_SHIFT); \
+ dw = cpu_to_le32(dwval); \
+ } while (0)
+
+#define KDETH_RESET(dw, field, val) ({ dw = 0; KDETH_SET(dw, field, val); })
+
+/* KDETH OM multipliers and switch over point */
+#define KDETH_OM_SMALL 4
+#define KDETH_OM_SMALL_SHIFT 2
+#define KDETH_OM_LARGE 64
+#define KDETH_OM_LARGE_SHIFT 6
+#define KDETH_OM_MAX_SIZE (1 << ((KDETH_OM_LARGE / KDETH_OM_SMALL) + 1))
+
+struct tid_group {
+ struct list_head list;
+ u32 base;
+ u8 size;
+ u8 used;
+ u8 map;
+};
+
+/*
+ * Write an "empty" RcvArray entry.
+ * This function exists so the TID registaration code can use it
+ * to write to unused/unneeded entries and still take advantage
+ * of the WC performance improvements. The HFI will ignore this
+ * write to the RcvArray entry.
+ */
+static inline void rcv_array_wc_fill(struct hfi1_devdata *dd, u32 index)
+{
+ /*
+ * Doing the WC fill writes only makes sense if the device is
+ * present and the RcvArray has been mapped as WC memory.
+ */
+ if ((dd->flags & HFI1_PRESENT) && dd->rcvarray_wc)
+ writeq(0, dd->rcvarray_wc + (index * 8));
+}
+
+static inline void tid_group_add_tail(struct tid_group *grp,
+ struct exp_tid_set *set)
+{
+ list_add_tail(&grp->list, &set->list);
+ set->count++;
+}
+
+static inline void tid_group_remove(struct tid_group *grp,
+ struct exp_tid_set *set)
+{
+ list_del_init(&grp->list);
+ set->count--;
+}
+
+static inline void tid_group_move(struct tid_group *group,
+ struct exp_tid_set *s1,
+ struct exp_tid_set *s2)
+{
+ tid_group_remove(group, s1);
+ tid_group_add_tail(group, s2);
+}
+
+static inline struct tid_group *tid_group_pop(struct exp_tid_set *set)
+{
+ struct tid_group *grp =
+ list_first_entry(&set->list, struct tid_group, list);
+ list_del_init(&grp->list);
+ set->count--;
+ return grp;
+}
+
+static inline u32 rcventry2tidinfo(u32 rcventry)
+{
+ u32 pair = rcventry & ~0x1;
+
+ return EXP_TID_SET(IDX, pair >> 1) |
+ EXP_TID_SET(CTRL, 1 << (rcventry - pair));
+}
+
+int hfi1_alloc_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd);
+void hfi1_free_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd);
+void hfi1_exp_tid_group_init(struct exp_tid_set *set);
+
+#endif /* _HFI1_EXP_RCV_H */
diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c
index 3158128..bbf80b1 100644
--- a/drivers/infiniband/hw/hfi1/file_ops.c
+++ b/drivers/infiniband/hw/hfi1/file_ops.c
@@ -774,6 +774,8 @@ static int hfi1_file_close(struct inode *inode, struct file *fp)
*ev = 0;
__clear_bit(fdata->subctxt, uctxt->in_use_ctxts);
+ fdata->uctxt = NULL;
+ hfi1_rcd_put(uctxt); /* fdata reference */
if (!bitmap_empty(uctxt->in_use_ctxts, HFI1_MAX_SHARED_CTXTS)) {
mutex_unlock(&hfi1_mutex);
goto done;
@@ -794,17 +796,16 @@ static int hfi1_file_close(struct inode *inode, struct file *fp)
/* Clear the context's J_KEY */
hfi1_clear_ctxt_jkey(dd, uctxt->ctxt);
/*
- * Reset context integrity checks to default.
- * (writes to CSRs probably belong in chip.c)
+ * If a send context is allocated, reset context integrity
+ * checks to default and disable the send context.
*/
- write_kctxt_csr(dd, uctxt->sc->hw_context, SEND_CTXT_CHECK_ENABLE,
- hfi1_pkt_default_send_ctxt_mask(dd, uctxt->sc->type));
- sc_disable(uctxt->sc);
+ if (uctxt->sc) {
+ set_pio_integrity(uctxt->sc);
+ sc_disable(uctxt->sc);
+ }
spin_unlock_irqrestore(&dd->uctxt_lock, flags);
- dd->rcd[uctxt->ctxt] = NULL;
-
- hfi1_user_exp_rcv_grp_free(uctxt);
+ hfi1_free_ctxt_rcv_groups(uctxt);
hfi1_clear_ctxt_pkey(dd, uctxt);
uctxt->rcvwait_to = 0;
@@ -816,8 +817,11 @@ static int hfi1_file_close(struct inode *inode, struct file *fp)
hfi1_stats.sps_ctxts--;
if (++dd->freectxts == dd->num_user_contexts)
aspm_enable_all(dd);
+
+ /* _rcd_put() should be done after releasing mutex */
+ dd->rcd[uctxt->ctxt] = NULL;
mutex_unlock(&hfi1_mutex);
- hfi1_free_ctxtdata(dd, uctxt);
+ hfi1_rcd_put(uctxt); /* dd reference */
done:
mmdrop(fdata->mm);
kobject_put(&dd->kobj);
@@ -887,16 +891,17 @@ static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo)
ret = wait_event_interruptible(fd->uctxt->wait, !test_bit(
HFI1_CTXT_BASE_UNINIT,
&fd->uctxt->event_flags));
- if (test_bit(HFI1_CTXT_BASE_FAILED, &fd->uctxt->event_flags)) {
- clear_bit(fd->subctxt, fd->uctxt->in_use_ctxts);
- return -ENOMEM;
- }
+ if (test_bit(HFI1_CTXT_BASE_FAILED, &fd->uctxt->event_flags))
+ ret = -ENOMEM;
+
/* The only thing a sub context needs is the user_xxx stuff */
if (!ret)
ret = init_user_ctxt(fd);
- if (ret)
+ if (ret) {
clear_bit(fd->subctxt, fd->uctxt->in_use_ctxts);
+ hfi1_rcd_put(fd->uctxt);
+ }
} else if (!ret) {
ret = setup_base_ctxt(fd);
if (fd->uctxt->subctxt_cnt) {
@@ -961,6 +966,8 @@ static int find_sub_ctxt(struct hfi1_filedata *fd,
fd->uctxt = uctxt;
fd->subctxt = subctxt;
+
+ hfi1_rcd_get(uctxt);
__set_bit(fd->subctxt, uctxt->in_use_ctxts);
return 1;
@@ -1069,11 +1076,14 @@ static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd,
aspm_disable_all(dd);
fd->uctxt = uctxt;
+ /* Count the reference for the fd */
+ hfi1_rcd_get(uctxt);
+
return 0;
ctxdata_free:
dd->rcd[ctxt] = NULL;
- hfi1_free_ctxtdata(dd, uctxt);
+ hfi1_rcd_put(uctxt);
return ret;
}
@@ -1260,7 +1270,7 @@ static int setup_base_ctxt(struct hfi1_filedata *fd)
if (ret)
goto setup_failed;
- ret = hfi1_user_exp_rcv_grp_init(fd);
+ ret = hfi1_alloc_ctxt_rcv_groups(uctxt);
if (ret)
goto setup_failed;
@@ -1273,6 +1283,7 @@ static int setup_base_ctxt(struct hfi1_filedata *fd)
return 0;
setup_failed:
+ /* Call _free_ctxtdata, not _rcd_put(). We still need the context. */
hfi1_free_ctxtdata(dd, uctxt);
return ret;
}
diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index 414a04a..1a33a50 100644
--- a/drivers/infiniband/hw/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -213,11 +213,9 @@ struct hfi1_ctxtdata {
/* dynamic receive available interrupt timeout */
u32 rcvavail_timeout;
- /*
- * number of opens (including slave sub-contexts) on this instance
- * (ignoring forks, dup, etc. for now)
- */
- int cnt;
+ /* Reference count the base context usage */
+ struct kref kref;
+
/* Device context index */
unsigned ctxt;
/*
@@ -356,17 +354,26 @@ struct hfi1_packet {
__le32 *rhf_addr;
struct rvt_qp *qp;
struct ib_other_headers *ohdr;
+ struct ib_grh *grh;
u64 rhf;
u32 maxcnt;
u32 rhqoff;
+ u32 dlid;
+ u32 slid;
u16 tlen;
s16 etail;
u8 hlen;
u8 numpkt;
u8 rsize;
u8 updegr;
- u8 rcv_flags;
u8 etype;
+ u8 extra_byte;
+ u8 pad;
+ u8 sc;
+ u8 sl;
+ u8 opcode;
+ bool becn;
+ bool fecn;
};
struct rvt_sge_state;
@@ -512,7 +519,7 @@ static inline void incr_cntr32(u32 *cntr)
#define MAX_NAME_SIZE 64
struct hfi1_msix_entry {
enum irq_type type;
- struct msix_entry msix;
+ int irq;
void *arg;
char name[MAX_NAME_SIZE];
cpumask_t mask;
@@ -654,7 +661,7 @@ struct hfi1_pportdata {
u8 link_enabled; /* link enabled? */
u8 linkinit_reason;
u8 local_tx_rate; /* rate given to 8051 firmware */
- u8 last_pstate; /* info only */
+ u8 pstate; /* info only */
u8 qsfp_retry_count;
/* placeholders for IB MAD packet settings */
@@ -1282,7 +1289,8 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt,
void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd,
struct hfi1_devdata *dd, u8 hw_pidx, u8 port);
void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd);
-
+int hfi1_rcd_put(struct hfi1_ctxtdata *rcd);
+void hfi1_rcd_get(struct hfi1_ctxtdata *rcd);
int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread);
int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *rcd, int thread);
int handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *rcd, int thread);
@@ -1321,6 +1329,22 @@ static inline u32 driver_lstate(struct hfi1_pportdata *ppd)
return ppd->lstate;
}
+/* return the driver's idea of the physical OPA port state */
+static inline u32 driver_pstate(struct hfi1_pportdata *ppd)
+{
+ /*
+ * The driver does some processing from the time the physical
+ * link state is at LINKUP to the time the SM can be notified
+ * as such. Return IB_PORTPHYSSTATE_TRAINING until the software
+ * state is ready.
+ */
+ if (ppd->pstate == PLS_LINKUP &&
+ !(ppd->host_link_state & HLS_UP))
+ return IB_PORTPHYSSTATE_TRAINING;
+ else
+ return chip_to_opa_pstate(ppd->dd, ppd->pstate);
+}
+
void receive_interrupt_work(struct work_struct *work);
/* extract service channel from header and rhf */
@@ -1829,9 +1853,7 @@ void hfi1_pcie_cleanup(struct pci_dev *pdev);
int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev);
void hfi1_pcie_ddcleanup(struct hfi1_devdata *);
int pcie_speeds(struct hfi1_devdata *dd);
-void request_msix(struct hfi1_devdata *dd, u32 *nent,
- struct hfi1_msix_entry *entry);
-void hfi1_enable_intx(struct pci_dev *pdev);
+int request_msix(struct hfi1_devdata *dd, u32 msireq);
void restore_pci_variables(struct hfi1_devdata *dd);
int do_pcie_gen3_transition(struct hfi1_devdata *dd);
int parse_platform_config(struct hfi1_devdata *dd);
@@ -2087,52 +2109,13 @@ int hfi1_tempsense_rd(struct hfi1_devdata *dd, struct hfi1_temp *temp);
#define DD_DEV_ENTRY(dd) __string(dev, dev_name(&(dd)->pcidev->dev))
#define DD_DEV_ASSIGN(dd) __assign_str(dev, dev_name(&(dd)->pcidev->dev))
-#define packettype_name(etype) { RHF_RCV_TYPE_##etype, #etype }
-#define show_packettype(etype) \
-__print_symbolic(etype, \
- packettype_name(EXPECTED), \
- packettype_name(EAGER), \
- packettype_name(IB), \
- packettype_name(ERROR), \
- packettype_name(BYPASS))
-
-#define ib_opcode_name(opcode) { IB_OPCODE_##opcode, #opcode }
-#define show_ib_opcode(opcode) \
-__print_symbolic(opcode, \
- ib_opcode_name(RC_SEND_FIRST), \
- ib_opcode_name(RC_SEND_MIDDLE), \
- ib_opcode_name(RC_SEND_LAST), \
- ib_opcode_name(RC_SEND_LAST_WITH_IMMEDIATE), \
- ib_opcode_name(RC_SEND_ONLY), \
- ib_opcode_name(RC_SEND_ONLY_WITH_IMMEDIATE), \
- ib_opcode_name(RC_RDMA_WRITE_FIRST), \
- ib_opcode_name(RC_RDMA_WRITE_MIDDLE), \
- ib_opcode_name(RC_RDMA_WRITE_LAST), \
- ib_opcode_name(RC_RDMA_WRITE_LAST_WITH_IMMEDIATE), \
- ib_opcode_name(RC_RDMA_WRITE_ONLY), \
- ib_opcode_name(RC_RDMA_WRITE_ONLY_WITH_IMMEDIATE), \
- ib_opcode_name(RC_RDMA_READ_REQUEST), \
- ib_opcode_name(RC_RDMA_READ_RESPONSE_FIRST), \
- ib_opcode_name(RC_RDMA_READ_RESPONSE_MIDDLE), \
- ib_opcode_name(RC_RDMA_READ_RESPONSE_LAST), \
- ib_opcode_name(RC_RDMA_READ_RESPONSE_ONLY), \
- ib_opcode_name(RC_ACKNOWLEDGE), \
- ib_opcode_name(RC_ATOMIC_ACKNOWLEDGE), \
- ib_opcode_name(RC_COMPARE_SWAP), \
- ib_opcode_name(RC_FETCH_ADD), \
- ib_opcode_name(UC_SEND_FIRST), \
- ib_opcode_name(UC_SEND_MIDDLE), \
- ib_opcode_name(UC_SEND_LAST), \
- ib_opcode_name(UC_SEND_LAST_WITH_IMMEDIATE), \
- ib_opcode_name(UC_SEND_ONLY), \
- ib_opcode_name(UC_SEND_ONLY_WITH_IMMEDIATE), \
- ib_opcode_name(UC_RDMA_WRITE_FIRST), \
- ib_opcode_name(UC_RDMA_WRITE_MIDDLE), \
- ib_opcode_name(UC_RDMA_WRITE_LAST), \
- ib_opcode_name(UC_RDMA_WRITE_LAST_WITH_IMMEDIATE), \
- ib_opcode_name(UC_RDMA_WRITE_ONLY), \
- ib_opcode_name(UC_RDMA_WRITE_ONLY_WITH_IMMEDIATE), \
- ib_opcode_name(UD_SEND_ONLY), \
- ib_opcode_name(UD_SEND_ONLY_WITH_IMMEDIATE), \
- ib_opcode_name(CNP))
+/*
+ * hfi1_check_mcast- Check if the given lid is
+ * in the IB multicast range.
+ */
+static inline bool hfi1_check_mcast(u16 lid)
+{
+ return ((lid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) &&
+ (lid != be16_to_cpu(IB_LID_PERMISSIVE)));
+}
#endif /* _HFI1_KERNEL_H */
diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c
index 4a11d4d..dfdb412 100644
--- a/drivers/infiniband/hw/hfi1/init.c
+++ b/drivers/infiniband/hw/hfi1/init.c
@@ -67,6 +67,7 @@
#include "aspm.h"
#include "affinity.h"
#include "vnic.h"
+#include "exp_rcv.h"
#undef pr_fmt
#define pr_fmt(fmt) DRIVER_NAME ": " fmt
@@ -190,16 +191,46 @@ int hfi1_create_ctxts(struct hfi1_devdata *dd)
nomem:
ret = -ENOMEM;
- if (dd->rcd) {
- for (i = 0; i < dd->num_rcv_contexts; ++i)
- hfi1_free_ctxtdata(dd, dd->rcd[i]);
- }
+ for (i = 0; dd->rcd && i < dd->first_dyn_alloc_ctxt; ++i)
+ hfi1_rcd_put(dd->rcd[i]);
+
+ /* All the contexts should be freed, free the array */
kfree(dd->rcd);
dd->rcd = NULL;
return ret;
}
/*
+ * Helper routines for the receive context reference count (rcd and uctxt)
+ */
+static void hfi1_rcd_init(struct hfi1_ctxtdata *rcd)
+{
+ kref_init(&rcd->kref);
+}
+
+static void hfi1_rcd_free(struct kref *kref)
+{
+ struct hfi1_ctxtdata *rcd =
+ container_of(kref, struct hfi1_ctxtdata, kref);
+
+ hfi1_free_ctxtdata(rcd->dd, rcd);
+ kfree(rcd);
+}
+
+int hfi1_rcd_put(struct hfi1_ctxtdata *rcd)
+{
+ if (rcd)
+ return kref_put(&rcd->kref, hfi1_rcd_free);
+
+ return 0;
+}
+
+void hfi1_rcd_get(struct hfi1_ctxtdata *rcd)
+{
+ kref_get(&rcd->kref);
+}
+
+/*
* Common code for user and kernel context setup.
*/
struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt,
@@ -221,6 +252,9 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt,
hfi1_cdbg(PROC, "setting up context %u\n", ctxt);
INIT_LIST_HEAD(&rcd->qp_wait_list);
+ hfi1_exp_tid_group_init(&rcd->tid_group_list);
+ hfi1_exp_tid_group_init(&rcd->tid_used_list);
+ hfi1_exp_tid_group_init(&rcd->tid_full_list);
rcd->ppd = ppd;
rcd->dd = dd;
__set_bit(0, rcd->in_use_ctxts);
@@ -328,6 +362,8 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt,
if (!rcd->opstats)
goto bail;
}
+
+ hfi1_rcd_init(rcd);
}
return rcd;
bail:
@@ -927,14 +963,11 @@ static void shutdown_device(struct hfi1_devdata *dd)
* @rcd: the ctxtdata structure
*
* free up any allocated data for a context
- * This should not touch anything that would affect a simultaneous
- * re-allocation of context data, because it is called after hfi1_mutex
- * is released (and can be called from reinit as well).
* It should never change any chip state, or global driver state.
*/
void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
{
- unsigned e;
+ u32 e;
if (!rcd)
return;
@@ -953,6 +986,7 @@ void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
/* all the RcvArray entries should have been cleared by now */
kfree(rcd->egrbufs.rcvtids);
+ rcd->egrbufs.rcvtids = NULL;
for (e = 0; e < rcd->egrbufs.alloced; e++) {
if (rcd->egrbufs.buffers[e].dma)
@@ -962,13 +996,21 @@ void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
rcd->egrbufs.buffers[e].dma);
}
kfree(rcd->egrbufs.buffers);
+ rcd->egrbufs.alloced = 0;
+ rcd->egrbufs.buffers = NULL;
sc_free(rcd->sc);
+ rcd->sc = NULL;
+
vfree(rcd->subctxt_uregbase);
vfree(rcd->subctxt_rcvegrbuf);
vfree(rcd->subctxt_rcvhdr_base);
kfree(rcd->opstats);
- kfree(rcd);
+
+ rcd->subctxt_uregbase = NULL;
+ rcd->subctxt_rcvegrbuf = NULL;
+ rcd->subctxt_rcvhdr_base = NULL;
+ rcd->opstats = NULL;
}
/*
@@ -1362,7 +1404,7 @@ static void cleanup_device_data(struct hfi1_devdata *dd)
tmp[ctxt] = NULL; /* debugging paranoia */
if (rcd) {
hfi1_clear_tids(rcd);
- hfi1_free_ctxtdata(dd, rcd);
+ hfi1_rcd_put(rcd);
}
}
kfree(tmp);
diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c
index 5977673..a081a98 100644
--- a/drivers/infiniband/hw/hfi1/mad.c
+++ b/drivers/infiniband/hw/hfi1/mad.c
@@ -59,6 +59,14 @@
#define OPA_LINK_WIDTH_RESET_OLD 0x0fff
#define OPA_LINK_WIDTH_RESET 0xffff
+static int smp_length_check(u32 data_size, u32 request_len)
+{
+ if (unlikely(request_len < data_size))
+ return -EINVAL;
+
+ return 0;
+}
+
static int reply(struct ib_mad_hdr *smp)
{
/*
@@ -105,7 +113,7 @@ static void send_trap(struct hfi1_ibport *ibp, void *data, unsigned len)
return;
/* o14-3.2.1 */
- if (ppd_from_ibp(ibp)->lstate != IB_PORT_ACTIVE)
+ if (driver_lstate(ppd_from_ibp(ibp)) != IB_PORT_ACTIVE)
return;
/* o14-2 */
@@ -172,10 +180,10 @@ static void send_trap(struct hfi1_ibport *ibp, void *data, unsigned len)
}
/*
- * Send a bad [PQ]_Key trap (ch. 14.3.8).
+ * Send a bad P_Key trap (ch. 14.3.8).
*/
-void hfi1_bad_pqkey(struct hfi1_ibport *ibp, __be16 trap_num, u32 key, u32 sl,
- u32 qp1, u32 qp2, u16 lid1, u16 lid2)
+void hfi1_bad_pkey(struct hfi1_ibport *ibp, u32 key, u32 sl,
+ u32 qp1, u32 qp2, u16 lid1, u16 lid2)
{
struct opa_mad_notice_attr data;
u32 lid = ppd_from_ibp(ibp)->lid;
@@ -183,17 +191,13 @@ void hfi1_bad_pqkey(struct hfi1_ibport *ibp, __be16 trap_num, u32 key, u32 sl,
u32 _lid2 = lid2;
memset(&data, 0, sizeof(data));
-
- if (trap_num == OPA_TRAP_BAD_P_KEY)
- ibp->rvp.pkey_violations++;
- else
- ibp->rvp.qkey_violations++;
ibp->rvp.n_pkt_drops++;
+ ibp->rvp.pkey_violations++;
/* Send violation trap */
data.generic_type = IB_NOTICE_TYPE_SECURITY;
data.prod_type_lsb = IB_NOTICE_PROD_CA;
- data.trap_num = trap_num;
+ data.trap_num = OPA_TRAP_BAD_P_KEY;
data.issuer_lid = cpu_to_be32(lid);
data.ntc_257_258.lid1 = cpu_to_be32(_lid1);
data.ntc_257_258.lid2 = cpu_to_be32(_lid2);
@@ -260,6 +264,7 @@ void hfi1_cap_mask_chg(struct rvt_dev_info *rdi, u8 port_num)
data.issuer_lid = cpu_to_be32(lid);
data.ntc_144.lid = data.issuer_lid;
data.ntc_144.new_cap_mask = cpu_to_be32(ibp->rvp.port_cap_flags);
+ data.ntc_144.cap_mask3 = cpu_to_be16(ibp->rvp.port_cap3_flags);
send_trap(ibp, &data, sizeof(data));
}
@@ -307,11 +312,11 @@ void hfi1_node_desc_chg(struct hfi1_ibport *ibp)
static int __subn_get_opa_nodedesc(struct opa_smp *smp, u32 am,
u8 *data, struct ib_device *ibdev,
- u8 port, u32 *resp_len)
+ u8 port, u32 *resp_len, u32 max_len)
{
struct opa_node_description *nd;
- if (am) {
+ if (am || smp_length_check(sizeof(*nd), max_len)) {
smp->status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)smp);
}
@@ -328,7 +333,7 @@ static int __subn_get_opa_nodedesc(struct opa_smp *smp, u32 am,
static int __subn_get_opa_nodeinfo(struct opa_smp *smp, u32 am, u8 *data,
struct ib_device *ibdev, u8 port,
- u32 *resp_len)
+ u32 *resp_len, u32 max_len)
{
struct opa_node_info *ni;
struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
@@ -338,6 +343,7 @@ static int __subn_get_opa_nodeinfo(struct opa_smp *smp, u32 am, u8 *data,
/* GUID 0 is illegal */
if (am || pidx >= dd->num_pports || ibdev->node_guid == 0 ||
+ smp_length_check(sizeof(*ni), max_len) ||
get_sguid(to_iport(ibdev, port), HFI1_PORT_GUID_INDEX) == 0) {
smp->status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)smp);
@@ -519,7 +525,7 @@ void read_ltp_rtt(struct hfi1_devdata *dd)
static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
struct ib_device *ibdev, u8 port,
- u32 *resp_len)
+ u32 *resp_len, u32 max_len)
{
int i;
struct hfi1_devdata *dd;
@@ -535,7 +541,7 @@ static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
u32 buffer_units;
u64 tmp = 0;
- if (num_ports != 1) {
+ if (num_ports != 1 || smp_length_check(sizeof(*pi), max_len)) {
smp->status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)smp);
}
@@ -605,7 +611,7 @@ static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
ppd->offline_disabled_reason;
pi->port_states.portphysstate_portstate =
- (hfi1_ibphys_portstate(ppd) << 4) | state;
+ (driver_pstate(ppd) << 4) | state;
pi->mkeyprotect_lmc = (ibp->rvp.mkeyprot << 6) | ppd->lmc;
@@ -704,11 +710,7 @@ static int __subn_get_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
buffer_units |= (dd->vl15_init << 11) & OPA_PI_MASK_BUF_UNIT_VL15_INIT;
pi->buffer_units = cpu_to_be32(buffer_units);
- pi->opa_cap_mask = cpu_to_be16(OPA_CAP_MASK3_IsSharedSpaceSupported |
- OPA_CAP_MASK3_IsEthOnFabricSupported);
- /* Driver does not support mcast/collective configuration */
- pi->opa_cap_mask &=
- cpu_to_be16(~OPA_CAP_MASK3_IsAddrRangeConfigSupported);
+ pi->opa_cap_mask = cpu_to_be16(ibp->rvp.port_cap3_flags);
pi->collectivemask_multicastmask = ((HFI1_COLLECTIVE_NR & 0x7)
<< 3 | (HFI1_MCAST_NR & 0x7));
@@ -748,7 +750,7 @@ static int get_pkeys(struct hfi1_devdata *dd, u8 port, u16 *pkeys)
static int __subn_get_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data,
struct ib_device *ibdev, u8 port,
- u32 *resp_len)
+ u32 *resp_len, u32 max_len)
{
struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
u32 n_blocks_req = OPA_AM_NBLK(am);
@@ -771,6 +773,11 @@ static int __subn_get_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data,
size = (n_blocks_req * OPA_PARTITION_TABLE_BLK_SIZE) * sizeof(u16);
+ if (smp_length_check(size, max_len)) {
+ smp->status |= IB_SMP_INVALID_FIELD;
+ return reply((struct ib_mad_hdr *)smp);
+ }
+
if (start_block + n_blocks_req > n_blocks_avail ||
n_blocks_req > OPA_NUM_PKEY_BLOCKS_PER_SMP) {
pr_warn("OPA Get PKey AM Invalid : s 0x%x; req 0x%x; "
@@ -1074,7 +1081,7 @@ static int set_port_states(struct hfi1_pportdata *ppd, struct opa_smp *smp,
*/
static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
struct ib_device *ibdev, u8 port,
- u32 *resp_len)
+ u32 *resp_len, u32 max_len)
{
struct opa_port_info *pi = (struct opa_port_info *)data;
struct ib_event event;
@@ -1095,7 +1102,8 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
int ret, i, invalid = 0, call_set_mtu = 0;
int call_link_downgrade_policy = 0;
- if (num_ports != 1) {
+ if (num_ports != 1 ||
+ smp_length_check(sizeof(*pi), max_len)) {
smp->status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)smp);
}
@@ -1346,7 +1354,8 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
if (ret)
return ret;
- ret = __subn_get_opa_portinfo(smp, am, data, ibdev, port, resp_len);
+ ret = __subn_get_opa_portinfo(smp, am, data, ibdev, port, resp_len,
+ max_len);
/* restore re-reg bit per o14-12.2.1 */
pi->clientrereg_subnettimeout |= clientrereg;
@@ -1363,7 +1372,8 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
return ret;
get_only:
- return __subn_get_opa_portinfo(smp, am, data, ibdev, port, resp_len);
+ return __subn_get_opa_portinfo(smp, am, data, ibdev, port, resp_len,
+ max_len);
}
/**
@@ -1424,7 +1434,7 @@ static int set_pkeys(struct hfi1_devdata *dd, u8 port, u16 *pkeys)
static int __subn_set_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data,
struct ib_device *ibdev, u8 port,
- u32 *resp_len)
+ u32 *resp_len, u32 max_len)
{
struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
u32 n_blocks_sent = OPA_AM_NBLK(am);
@@ -1434,6 +1444,7 @@ static int __subn_set_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data,
int i;
u16 n_blocks_avail;
unsigned npkeys = hfi1_get_npkeys(dd);
+ u32 size = 0;
if (n_blocks_sent == 0) {
pr_warn("OPA Get PKey AM Invalid : P = %d; B = 0x%x; N = 0x%x\n",
@@ -1444,6 +1455,13 @@ static int __subn_set_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data,
n_blocks_avail = (u16)(npkeys / OPA_PARTITION_TABLE_BLK_SIZE) + 1;
+ size = sizeof(u16) * (n_blocks_sent * OPA_PARTITION_TABLE_BLK_SIZE);
+
+ if (smp_length_check(size, max_len)) {
+ smp->status |= IB_SMP_INVALID_FIELD;
+ return reply((struct ib_mad_hdr *)smp);
+ }
+
if (start_block + n_blocks_sent > n_blocks_avail ||
n_blocks_sent > OPA_NUM_PKEY_BLOCKS_PER_SMP) {
pr_warn("OPA Set PKey AM Invalid : s 0x%x; req 0x%x; avail 0x%x; blk/smp 0x%lx\n",
@@ -1461,7 +1479,8 @@ static int __subn_set_opa_pkeytable(struct opa_smp *smp, u32 am, u8 *data,
return reply((struct ib_mad_hdr *)smp);
}
- return __subn_get_opa_pkeytable(smp, am, data, ibdev, port, resp_len);
+ return __subn_get_opa_pkeytable(smp, am, data, ibdev, port, resp_len,
+ max_len);
}
#define ILLEGAL_VL 12
@@ -1522,14 +1541,14 @@ static int get_sc2vlt_tables(struct hfi1_devdata *dd, void *data)
static int __subn_get_opa_sl_to_sc(struct opa_smp *smp, u32 am, u8 *data,
struct ib_device *ibdev, u8 port,
- u32 *resp_len)
+ u32 *resp_len, u32 max_len)
{
struct hfi1_ibport *ibp = to_iport(ibdev, port);
u8 *p = data;
size_t size = ARRAY_SIZE(ibp->sl_to_sc); /* == 32 */
unsigned i;
- if (am) {
+ if (am || smp_length_check(size, max_len)) {
smp->status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)smp);
}
@@ -1545,14 +1564,15 @@ static int __subn_get_opa_sl_to_sc(struct opa_smp *smp, u32 am, u8 *data,
static int __subn_set_opa_sl_to_sc(struct opa_smp *smp, u32 am, u8 *data,
struct ib_device *ibdev, u8 port,
- u32 *resp_len)
+ u32 *resp_len, u32 max_len)
{
struct hfi1_ibport *ibp = to_iport(ibdev, port);
u8 *p = data;
+ size_t size = ARRAY_SIZE(ibp->sl_to_sc);
int i;
u8 sc;
- if (am) {
+ if (am || smp_length_check(size, max_len)) {
smp->status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)smp);
}
@@ -1567,19 +1587,20 @@ static int __subn_set_opa_sl_to_sc(struct opa_smp *smp, u32 am, u8 *data,
}
}
- return __subn_get_opa_sl_to_sc(smp, am, data, ibdev, port, resp_len);
+ return __subn_get_opa_sl_to_sc(smp, am, data, ibdev, port, resp_len,
+ max_len);
}
static int __subn_get_opa_sc_to_sl(struct opa_smp *smp, u32 am, u8 *data,
struct ib_device *ibdev, u8 port,
- u32 *resp_len)
+ u32 *resp_len, u32 max_len)
{
struct hfi1_ibport *ibp = to_iport(ibdev, port);
u8 *p = data;
size_t size = ARRAY_SIZE(ibp->sc_to_sl); /* == 32 */
unsigned i;
- if (am) {
+ if (am || smp_length_check(size, max_len)) {
smp->status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)smp);
}
@@ -1595,13 +1616,14 @@ static int __subn_get_opa_sc_to_sl(struct opa_smp *smp, u32 am, u8 *data,
static int __subn_set_opa_sc_to_sl(struct opa_smp *smp, u32 am, u8 *data,
struct ib_device *ibdev, u8 port,
- u32 *resp_len)
+ u32 *resp_len, u32 max_len)
{
struct hfi1_ibport *ibp = to_iport(ibdev, port);
+ size_t size = ARRAY_SIZE(ibp->sc_to_sl);
u8 *p = data;
int i;
- if (am) {
+ if (am || smp_length_check(size, max_len)) {
smp->status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)smp);
}
@@ -1609,19 +1631,20 @@ static int __subn_set_opa_sc_to_sl(struct opa_smp *smp, u32 am, u8 *data,
for (i = 0; i < ARRAY_SIZE(ibp->sc_to_sl); i++)
ibp->sc_to_sl[i] = *p++;
- return __subn_get_opa_sc_to_sl(smp, am, data, ibdev, port, resp_len);
+ return __subn_get_opa_sc_to_sl(smp, am, data, ibdev, port, resp_len,
+ max_len);
}
static int __subn_get_opa_sc_to_vlt(struct opa_smp *smp, u32 am, u8 *data,
struct ib_device *ibdev, u8 port,
- u32 *resp_len)
+ u32 *resp_len, u32 max_len)
{
u32 n_blocks = OPA_AM_NBLK(am);
struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
void *vp = (void *)data;
size_t size = 4 * sizeof(u64);
- if (n_blocks != 1) {
+ if (n_blocks != 1 || smp_length_check(size, max_len)) {
smp->status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)smp);
}
@@ -1636,7 +1659,7 @@ static int __subn_get_opa_sc_to_vlt(struct opa_smp *smp, u32 am, u8 *data,
static int __subn_set_opa_sc_to_vlt(struct opa_smp *smp, u32 am, u8 *data,
struct ib_device *ibdev, u8 port,
- u32 *resp_len)
+ u32 *resp_len, u32 max_len)
{
u32 n_blocks = OPA_AM_NBLK(am);
int async_update = OPA_AM_ASYNC(am);
@@ -1644,8 +1667,15 @@ static int __subn_set_opa_sc_to_vlt(struct opa_smp *smp, u32 am, u8 *data,
void *vp = (void *)data;
struct hfi1_pportdata *ppd;
int lstate;
+ /*
+ * set_sc2vlt_tables writes the information contained in *data
+ * to four 64-bit registers SendSC2VLt[0-3]. We need to make
+ * sure *max_len is not greater than the total size of the four
+ * SendSC2VLt[0-3] registers.
+ */
+ size_t size = 4 * sizeof(u64);
- if (n_blocks != 1 || async_update) {
+ if (n_blocks != 1 || async_update || smp_length_check(size, max_len)) {
smp->status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)smp);
}
@@ -1665,27 +1695,28 @@ static int __subn_set_opa_sc_to_vlt(struct opa_smp *smp, u32 am, u8 *data,
set_sc2vlt_tables(dd, vp);
- return __subn_get_opa_sc_to_vlt(smp, am, data, ibdev, port, resp_len);
+ return __subn_get_opa_sc_to_vlt(smp, am, data, ibdev, port, resp_len,
+ max_len);
}
static int __subn_get_opa_sc_to_vlnt(struct opa_smp *smp, u32 am, u8 *data,
struct ib_device *ibdev, u8 port,
- u32 *resp_len)
+ u32 *resp_len, u32 max_len)
{
u32 n_blocks = OPA_AM_NPORT(am);
struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
struct hfi1_pportdata *ppd;
void *vp = (void *)data;
- int size;
+ int size = sizeof(struct sc2vlnt);
- if (n_blocks != 1) {
+ if (n_blocks != 1 || smp_length_check(size, max_len)) {
smp->status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)smp);
}
ppd = dd->pport + (port - 1);
- size = fm_get_table(ppd, FM_TBL_SC2VLNT, vp);
+ fm_get_table(ppd, FM_TBL_SC2VLNT, vp);
if (resp_len)
*resp_len += size;
@@ -1695,15 +1726,16 @@ static int __subn_get_opa_sc_to_vlnt(struct opa_smp *smp, u32 am, u8 *data,
static int __subn_set_opa_sc_to_vlnt(struct opa_smp *smp, u32 am, u8 *data,
struct ib_device *ibdev, u8 port,
- u32 *resp_len)
+ u32 *resp_len, u32 max_len)
{
u32 n_blocks = OPA_AM_NPORT(am);
struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
struct hfi1_pportdata *ppd;
void *vp = (void *)data;
int lstate;
+ int size = sizeof(struct sc2vlnt);
- if (n_blocks != 1) {
+ if (n_blocks != 1 || smp_length_check(size, max_len)) {
smp->status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)smp);
}
@@ -1721,12 +1753,12 @@ static int __subn_set_opa_sc_to_vlnt(struct opa_smp *smp, u32 am, u8 *data,
fm_set_table(ppd, FM_TBL_SC2VLNT, vp);
return __subn_get_opa_sc_to_vlnt(smp, am, data, ibdev, port,
- resp_len);
+ resp_len, max_len);
}
static int __subn_get_opa_psi(struct opa_smp *smp, u32 am, u8 *data,
struct ib_device *ibdev, u8 port,
- u32 *resp_len)
+ u32 *resp_len, u32 max_len)
{
u32 nports = OPA_AM_NPORT(am);
u32 start_of_sm_config = OPA_AM_START_SM_CFG(am);
@@ -1735,7 +1767,7 @@ static int __subn_get_opa_psi(struct opa_smp *smp, u32 am, u8 *data,
struct hfi1_pportdata *ppd;
struct opa_port_state_info *psi = (struct opa_port_state_info *)data;
- if (nports != 1) {
+ if (nports != 1 || smp_length_check(sizeof(*psi), max_len)) {
smp->status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)smp);
}
@@ -1755,7 +1787,7 @@ static int __subn_get_opa_psi(struct opa_smp *smp, u32 am, u8 *data,
ppd->offline_disabled_reason;
psi->port_states.portphysstate_portstate =
- (hfi1_ibphys_portstate(ppd) << 4) | (lstate & 0xf);
+ (driver_pstate(ppd) << 4) | (lstate & 0xf);
psi->link_width_downgrade_tx_active =
cpu_to_be16(ppd->link_width_downgrade_tx_active);
psi->link_width_downgrade_rx_active =
@@ -1768,7 +1800,7 @@ static int __subn_get_opa_psi(struct opa_smp *smp, u32 am, u8 *data,
static int __subn_set_opa_psi(struct opa_smp *smp, u32 am, u8 *data,
struct ib_device *ibdev, u8 port,
- u32 *resp_len)
+ u32 *resp_len, u32 max_len)
{
u32 nports = OPA_AM_NPORT(am);
u32 start_of_sm_config = OPA_AM_START_SM_CFG(am);
@@ -1779,7 +1811,7 @@ static int __subn_set_opa_psi(struct opa_smp *smp, u32 am, u8 *data,
struct opa_port_state_info *psi = (struct opa_port_state_info *)data;
int ret, invalid = 0;
- if (nports != 1) {
+ if (nports != 1 || smp_length_check(sizeof(*psi), max_len)) {
smp->status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)smp);
}
@@ -1809,19 +1841,21 @@ static int __subn_set_opa_psi(struct opa_smp *smp, u32 am, u8 *data,
if (invalid)
smp->status |= IB_SMP_INVALID_FIELD;
- return __subn_get_opa_psi(smp, am, data, ibdev, port, resp_len);
+ return __subn_get_opa_psi(smp, am, data, ibdev, port, resp_len,
+ max_len);
}
static int __subn_get_opa_cable_info(struct opa_smp *smp, u32 am, u8 *data,
struct ib_device *ibdev, u8 port,
- u32 *resp_len)
+ u32 *resp_len, u32 max_len)
{
struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
u32 addr = OPA_AM_CI_ADDR(am);
u32 len = OPA_AM_CI_LEN(am) + 1;
int ret;
- if (dd->pport->port_type != PORT_TYPE_QSFP) {
+ if (dd->pport->port_type != PORT_TYPE_QSFP ||
+ smp_length_check(len, max_len)) {
smp->status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)smp);
}
@@ -1864,21 +1898,22 @@ static int __subn_get_opa_cable_info(struct opa_smp *smp, u32 am, u8 *data,
}
static int __subn_get_opa_bct(struct opa_smp *smp, u32 am, u8 *data,
- struct ib_device *ibdev, u8 port, u32 *resp_len)
+ struct ib_device *ibdev, u8 port, u32 *resp_len,
+ u32 max_len)
{
u32 num_ports = OPA_AM_NPORT(am);
struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
struct hfi1_pportdata *ppd;
struct buffer_control *p = (struct buffer_control *)data;
- int size;
+ int size = sizeof(struct buffer_control);
- if (num_ports != 1) {
+ if (num_ports != 1 || smp_length_check(size, max_len)) {
smp->status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)smp);
}
ppd = dd->pport + (port - 1);
- size = fm_get_table(ppd, FM_TBL_BUFFER_CONTROL, p);
+ fm_get_table(ppd, FM_TBL_BUFFER_CONTROL, p);
trace_bct_get(dd, p);
if (resp_len)
*resp_len += size;
@@ -1887,14 +1922,15 @@ static int __subn_get_opa_bct(struct opa_smp *smp, u32 am, u8 *data,
}
static int __subn_set_opa_bct(struct opa_smp *smp, u32 am, u8 *data,
- struct ib_device *ibdev, u8 port, u32 *resp_len)
+ struct ib_device *ibdev, u8 port, u32 *resp_len,
+ u32 max_len)
{
u32 num_ports = OPA_AM_NPORT(am);
struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
struct hfi1_pportdata *ppd;
struct buffer_control *p = (struct buffer_control *)data;
- if (num_ports != 1) {
+ if (num_ports != 1 || smp_length_check(sizeof(*p), max_len)) {
smp->status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)smp);
}
@@ -1905,41 +1941,43 @@ static int __subn_set_opa_bct(struct opa_smp *smp, u32 am, u8 *data,
return reply((struct ib_mad_hdr *)smp);
}
- return __subn_get_opa_bct(smp, am, data, ibdev, port, resp_len);
+ return __subn_get_opa_bct(smp, am, data, ibdev, port, resp_len,
+ max_len);
}
static int __subn_get_opa_vl_arb(struct opa_smp *smp, u32 am, u8 *data,
struct ib_device *ibdev, u8 port,
- u32 *resp_len)
+ u32 *resp_len, u32 max_len)
{
struct hfi1_pportdata *ppd = ppd_from_ibp(to_iport(ibdev, port));
u32 num_ports = OPA_AM_NPORT(am);
u8 section = (am & 0x00ff0000) >> 16;
u8 *p = data;
- int size = 0;
+ int size = 256;
- if (num_ports != 1) {
+ if (num_ports != 1 || smp_length_check(size, max_len)) {
smp->status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)smp);
}
switch (section) {
case OPA_VLARB_LOW_ELEMENTS:
- size = fm_get_table(ppd, FM_TBL_VL_LOW_ARB, p);
+ fm_get_table(ppd, FM_TBL_VL_LOW_ARB, p);
break;
case OPA_VLARB_HIGH_ELEMENTS:
- size = fm_get_table(ppd, FM_TBL_VL_HIGH_ARB, p);
+ fm_get_table(ppd, FM_TBL_VL_HIGH_ARB, p);
break;
case OPA_VLARB_PREEMPT_ELEMENTS:
- size = fm_get_table(ppd, FM_TBL_VL_PREEMPT_ELEMS, p);
+ fm_get_table(ppd, FM_TBL_VL_PREEMPT_ELEMS, p);
break;
case OPA_VLARB_PREEMPT_MATRIX:
- size = fm_get_table(ppd, FM_TBL_VL_PREEMPT_MATRIX, p);
+ fm_get_table(ppd, FM_TBL_VL_PREEMPT_MATRIX, p);
break;
default:
pr_warn("OPA SubnGet(VL Arb) AM Invalid : 0x%x\n",
be32_to_cpu(smp->attr_mod));
smp->status |= IB_SMP_INVALID_FIELD;
+ size = 0;
break;
}
@@ -1951,14 +1989,15 @@ static int __subn_get_opa_vl_arb(struct opa_smp *smp, u32 am, u8 *data,
static int __subn_set_opa_vl_arb(struct opa_smp *smp, u32 am, u8 *data,
struct ib_device *ibdev, u8 port,
- u32 *resp_len)
+ u32 *resp_len, u32 max_len)
{
struct hfi1_pportdata *ppd = ppd_from_ibp(to_iport(ibdev, port));
u32 num_ports = OPA_AM_NPORT(am);
u8 section = (am & 0x00ff0000) >> 16;
u8 *p = data;
+ int size = 256;
- if (num_ports != 1) {
+ if (num_ports != 1 || smp_length_check(size, max_len)) {
smp->status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)smp);
}
@@ -1986,7 +2025,8 @@ static int __subn_set_opa_vl_arb(struct opa_smp *smp, u32 am, u8 *data,
break;
}
- return __subn_get_opa_vl_arb(smp, am, data, ibdev, port, resp_len);
+ return __subn_get_opa_vl_arb(smp, am, data, ibdev, port, resp_len,
+ max_len);
}
struct opa_pma_mad {
@@ -3282,13 +3322,18 @@ struct opa_congestion_info_attr {
static int __subn_get_opa_cong_info(struct opa_smp *smp, u32 am, u8 *data,
struct ib_device *ibdev, u8 port,
- u32 *resp_len)
+ u32 *resp_len, u32 max_len)
{
struct opa_congestion_info_attr *p =
(struct opa_congestion_info_attr *)data;
struct hfi1_ibport *ibp = to_iport(ibdev, port);
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
+ if (smp_length_check(sizeof(*p), max_len)) {
+ smp->status |= IB_SMP_INVALID_FIELD;
+ return reply((struct ib_mad_hdr *)smp);
+ }
+
p->congestion_info = 0;
p->control_table_cap = ppd->cc_max_table_entries;
p->congestion_log_length = OPA_CONG_LOG_ELEMS;
@@ -3301,7 +3346,7 @@ static int __subn_get_opa_cong_info(struct opa_smp *smp, u32 am, u8 *data,
static int __subn_get_opa_cong_setting(struct opa_smp *smp, u32 am,
u8 *data, struct ib_device *ibdev,
- u8 port, u32 *resp_len)
+ u8 port, u32 *resp_len, u32 max_len)
{
int i;
struct opa_congestion_setting_attr *p =
@@ -3311,6 +3356,11 @@ static int __subn_get_opa_cong_setting(struct opa_smp *smp, u32 am,
struct opa_congestion_setting_entry_shadow *entries;
struct cc_state *cc_state;
+ if (smp_length_check(sizeof(*p), max_len)) {
+ smp->status |= IB_SMP_INVALID_FIELD;
+ return reply((struct ib_mad_hdr *)smp);
+ }
+
rcu_read_lock();
cc_state = get_cc_state(ppd);
@@ -3385,7 +3435,7 @@ static void apply_cc_state(struct hfi1_pportdata *ppd)
static int __subn_set_opa_cong_setting(struct opa_smp *smp, u32 am, u8 *data,
struct ib_device *ibdev, u8 port,
- u32 *resp_len)
+ u32 *resp_len, u32 max_len)
{
struct opa_congestion_setting_attr *p =
(struct opa_congestion_setting_attr *)data;
@@ -3394,6 +3444,11 @@ static int __subn_set_opa_cong_setting(struct opa_smp *smp, u32 am, u8 *data,
struct opa_congestion_setting_entry_shadow *entries;
int i;
+ if (smp_length_check(sizeof(*p), max_len)) {
+ smp->status |= IB_SMP_INVALID_FIELD;
+ return reply((struct ib_mad_hdr *)smp);
+ }
+
/*
* Save details from packet into the ppd. Hold the cc_state_lock so
* our information is consistent with anyone trying to apply the state.
@@ -3415,12 +3470,12 @@ static int __subn_set_opa_cong_setting(struct opa_smp *smp, u32 am, u8 *data,
apply_cc_state(ppd);
return __subn_get_opa_cong_setting(smp, am, data, ibdev, port,
- resp_len);
+ resp_len, max_len);
}
static int __subn_get_opa_hfi1_cong_log(struct opa_smp *smp, u32 am,
u8 *data, struct ib_device *ibdev,
- u8 port, u32 *resp_len)
+ u8 port, u32 *resp_len, u32 max_len)
{
struct hfi1_ibport *ibp = to_iport(ibdev, port);
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
@@ -3428,7 +3483,7 @@ static int __subn_get_opa_hfi1_cong_log(struct opa_smp *smp, u32 am,
s64 ts;
int i;
- if (am != 0) {
+ if (am || smp_length_check(sizeof(*cong_log), max_len)) {
smp->status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)smp);
}
@@ -3486,7 +3541,7 @@ static int __subn_get_opa_hfi1_cong_log(struct opa_smp *smp, u32 am,
static int __subn_get_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data,
struct ib_device *ibdev, u8 port,
- u32 *resp_len)
+ u32 *resp_len, u32 max_len)
{
struct ib_cc_table_attr *cc_table_attr =
(struct ib_cc_table_attr *)data;
@@ -3498,9 +3553,10 @@ static int __subn_get_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data,
int i, j;
u32 sentry, eentry;
struct cc_state *cc_state;
+ u32 size = sizeof(u16) * (IB_CCT_ENTRIES * n_blocks + 1);
/* sanity check n_blocks, start_block */
- if (n_blocks == 0 ||
+ if (n_blocks == 0 || smp_length_check(size, max_len) ||
start_block + n_blocks > ppd->cc_max_table_entries) {
smp->status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)smp);
@@ -3530,14 +3586,14 @@ static int __subn_get_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data,
rcu_read_unlock();
if (resp_len)
- *resp_len += sizeof(u16) * (IB_CCT_ENTRIES * n_blocks + 1);
+ *resp_len += size;
return reply((struct ib_mad_hdr *)smp);
}
static int __subn_set_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data,
struct ib_device *ibdev, u8 port,
- u32 *resp_len)
+ u32 *resp_len, u32 max_len)
{
struct ib_cc_table_attr *p = (struct ib_cc_table_attr *)data;
struct hfi1_ibport *ibp = to_iport(ibdev, port);
@@ -3548,9 +3604,10 @@ static int __subn_set_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data,
int i, j;
u32 sentry, eentry;
u16 ccti_limit;
+ u32 size = sizeof(u16) * (IB_CCT_ENTRIES * n_blocks + 1);
/* sanity check n_blocks, start_block */
- if (n_blocks == 0 ||
+ if (n_blocks == 0 || smp_length_check(size, max_len) ||
start_block + n_blocks > ppd->cc_max_table_entries) {
smp->status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)smp);
@@ -3581,7 +3638,8 @@ static int __subn_set_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data,
/* now apply the information */
apply_cc_state(ppd);
- return __subn_get_opa_cc_table(smp, am, data, ibdev, port, resp_len);
+ return __subn_get_opa_cc_table(smp, am, data, ibdev, port, resp_len,
+ max_len);
}
struct opa_led_info {
@@ -3594,7 +3652,7 @@ struct opa_led_info {
static int __subn_get_opa_led_info(struct opa_smp *smp, u32 am, u8 *data,
struct ib_device *ibdev, u8 port,
- u32 *resp_len)
+ u32 *resp_len, u32 max_len)
{
struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
struct hfi1_pportdata *ppd = dd->pport;
@@ -3602,7 +3660,7 @@ static int __subn_get_opa_led_info(struct opa_smp *smp, u32 am, u8 *data,
u32 nport = OPA_AM_NPORT(am);
u32 is_beaconing_active;
- if (nport != 1) {
+ if (nport != 1 || smp_length_check(sizeof(*p), max_len)) {
smp->status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)smp);
}
@@ -3624,14 +3682,14 @@ static int __subn_get_opa_led_info(struct opa_smp *smp, u32 am, u8 *data,
static int __subn_set_opa_led_info(struct opa_smp *smp, u32 am, u8 *data,
struct ib_device *ibdev, u8 port,
- u32 *resp_len)
+ u32 *resp_len, u32 max_len)
{
struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
struct opa_led_info *p = (struct opa_led_info *)data;
u32 nport = OPA_AM_NPORT(am);
int on = !!(be32_to_cpu(p->rsvd_led_mask) & OPA_LED_MASK);
- if (nport != 1) {
+ if (nport != 1 || smp_length_check(sizeof(*p), max_len)) {
smp->status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)smp);
}
@@ -3641,12 +3699,13 @@ static int __subn_set_opa_led_info(struct opa_smp *smp, u32 am, u8 *data,
else
shutdown_led_override(dd->pport);
- return __subn_get_opa_led_info(smp, am, data, ibdev, port, resp_len);
+ return __subn_get_opa_led_info(smp, am, data, ibdev, port, resp_len,
+ max_len);
}
static int subn_get_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am,
u8 *data, struct ib_device *ibdev, u8 port,
- u32 *resp_len)
+ u32 *resp_len, u32 max_len)
{
int ret;
struct hfi1_ibport *ibp = to_iport(ibdev, port);
@@ -3654,71 +3713,71 @@ static int subn_get_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am,
switch (attr_id) {
case IB_SMP_ATTR_NODE_DESC:
ret = __subn_get_opa_nodedesc(smp, am, data, ibdev, port,
- resp_len);
+ resp_len, max_len);
break;
case IB_SMP_ATTR_NODE_INFO:
ret = __subn_get_opa_nodeinfo(smp, am, data, ibdev, port,
- resp_len);
+ resp_len, max_len);
break;
case IB_SMP_ATTR_PORT_INFO:
ret = __subn_get_opa_portinfo(smp, am, data, ibdev, port,
- resp_len);
+ resp_len, max_len);
break;
case IB_SMP_ATTR_PKEY_TABLE:
ret = __subn_get_opa_pkeytable(smp, am, data, ibdev, port,
- resp_len);
+ resp_len, max_len);
break;
case OPA_ATTRIB_ID_SL_TO_SC_MAP:
ret = __subn_get_opa_sl_to_sc(smp, am, data, ibdev, port,
- resp_len);
+ resp_len, max_len);
break;
case OPA_ATTRIB_ID_SC_TO_SL_MAP:
ret = __subn_get_opa_sc_to_sl(smp, am, data, ibdev, port,
- resp_len);
+ resp_len, max_len);
break;
case OPA_ATTRIB_ID_SC_TO_VLT_MAP:
ret = __subn_get_opa_sc_to_vlt(smp, am, data, ibdev, port,
- resp_len);
+ resp_len, max_len);
break;
case OPA_ATTRIB_ID_SC_TO_VLNT_MAP:
ret = __subn_get_opa_sc_to_vlnt(smp, am, data, ibdev, port,
- resp_len);
+ resp_len, max_len);
break;
case OPA_ATTRIB_ID_PORT_STATE_INFO:
ret = __subn_get_opa_psi(smp, am, data, ibdev, port,
- resp_len);
+ resp_len, max_len);
break;
case OPA_ATTRIB_ID_BUFFER_CONTROL_TABLE:
ret = __subn_get_opa_bct(smp, am, data, ibdev, port,
- resp_len);
+ resp_len, max_len);
break;
case OPA_ATTRIB_ID_CABLE_INFO:
ret = __subn_get_opa_cable_info(smp, am, data, ibdev, port,
- resp_len);
+ resp_len, max_len);
break;
case IB_SMP_ATTR_VL_ARB_TABLE:
ret = __subn_get_opa_vl_arb(smp, am, data, ibdev, port,
- resp_len);
+ resp_len, max_len);
break;
case OPA_ATTRIB_ID_CONGESTION_INFO:
ret = __subn_get_opa_cong_info(smp, am, data, ibdev, port,
- resp_len);
+ resp_len, max_len);
break;
case OPA_ATTRIB_ID_HFI_CONGESTION_SETTING:
ret = __subn_get_opa_cong_setting(smp, am, data, ibdev,
- port, resp_len);
+ port, resp_len, max_len);
break;
case OPA_ATTRIB_ID_HFI_CONGESTION_LOG:
ret = __subn_get_opa_hfi1_cong_log(smp, am, data, ibdev,
- port, resp_len);
+ port, resp_len, max_len);
break;
case OPA_ATTRIB_ID_CONGESTION_CONTROL_TABLE:
ret = __subn_get_opa_cc_table(smp, am, data, ibdev, port,
- resp_len);
+ resp_len, max_len);
break;
case IB_SMP_ATTR_LED_INFO:
ret = __subn_get_opa_led_info(smp, am, data, ibdev, port,
- resp_len);
+ resp_len, max_len);
break;
case IB_SMP_ATTR_SM_INFO:
if (ibp->rvp.port_cap_flags & IB_PORT_SM_DISABLED)
@@ -3736,7 +3795,7 @@ static int subn_get_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am,
static int subn_set_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am,
u8 *data, struct ib_device *ibdev, u8 port,
- u32 *resp_len)
+ u32 *resp_len, u32 max_len)
{
int ret;
struct hfi1_ibport *ibp = to_iport(ibdev, port);
@@ -3744,51 +3803,51 @@ static int subn_set_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am,
switch (attr_id) {
case IB_SMP_ATTR_PORT_INFO:
ret = __subn_set_opa_portinfo(smp, am, data, ibdev, port,
- resp_len);
+ resp_len, max_len);
break;
case IB_SMP_ATTR_PKEY_TABLE:
ret = __subn_set_opa_pkeytable(smp, am, data, ibdev, port,
- resp_len);
+ resp_len, max_len);
break;
case OPA_ATTRIB_ID_SL_TO_SC_MAP:
ret = __subn_set_opa_sl_to_sc(smp, am, data, ibdev, port,
- resp_len);
+ resp_len, max_len);
break;
case OPA_ATTRIB_ID_SC_TO_SL_MAP:
ret = __subn_set_opa_sc_to_sl(smp, am, data, ibdev, port,
- resp_len);
+ resp_len, max_len);
break;
case OPA_ATTRIB_ID_SC_TO_VLT_MAP:
ret = __subn_set_opa_sc_to_vlt(smp, am, data, ibdev, port,
- resp_len);
+ resp_len, max_len);
break;
case OPA_ATTRIB_ID_SC_TO_VLNT_MAP:
ret = __subn_set_opa_sc_to_vlnt(smp, am, data, ibdev, port,
- resp_len);
+ resp_len, max_len);
break;
case OPA_ATTRIB_ID_PORT_STATE_INFO:
ret = __subn_set_opa_psi(smp, am, data, ibdev, port,
- resp_len);
+ resp_len, max_len);
break;
case OPA_ATTRIB_ID_BUFFER_CONTROL_TABLE:
ret = __subn_set_opa_bct(smp, am, data, ibdev, port,
- resp_len);
+ resp_len, max_len);
break;
case IB_SMP_ATTR_VL_ARB_TABLE:
ret = __subn_set_opa_vl_arb(smp, am, data, ibdev, port,
- resp_len);
+ resp_len, max_len);
break;
case OPA_ATTRIB_ID_HFI_CONGESTION_SETTING:
ret = __subn_set_opa_cong_setting(smp, am, data, ibdev,
- port, resp_len);
+ port, resp_len, max_len);
break;
case OPA_ATTRIB_ID_CONGESTION_CONTROL_TABLE:
ret = __subn_set_opa_cc_table(smp, am, data, ibdev, port,
- resp_len);
+ resp_len, max_len);
break;
case IB_SMP_ATTR_LED_INFO:
ret = __subn_set_opa_led_info(smp, am, data, ibdev, port,
- resp_len);
+ resp_len, max_len);
break;
case IB_SMP_ATTR_SM_INFO:
if (ibp->rvp.port_cap_flags & IB_PORT_SM_DISABLED)
@@ -3844,7 +3903,10 @@ static int subn_get_opa_aggregate(struct opa_smp *smp,
memset(next_smp + sizeof(*agg), 0, agg_data_len);
(void)subn_get_opa_sma(agg->attr_id, smp, am, agg->data,
- ibdev, port, NULL);
+ ibdev, port, NULL, (u32)agg_data_len);
+
+ if (smp->status & IB_SMP_INVALID_FIELD)
+ break;
if (smp->status & ~IB_SMP_DIRECTION) {
set_aggr_error(agg);
return reply((struct ib_mad_hdr *)smp);
@@ -3887,7 +3949,9 @@ static int subn_set_opa_aggregate(struct opa_smp *smp,
}
(void)subn_set_opa_sma(agg->attr_id, smp, am, agg->data,
- ibdev, port, NULL);
+ ibdev, port, NULL, (u32)agg_data_len);
+ if (smp->status & IB_SMP_INVALID_FIELD)
+ break;
if (smp->status & ~IB_SMP_DIRECTION) {
set_aggr_error(agg);
return reply((struct ib_mad_hdr *)smp);
@@ -3997,12 +4061,13 @@ static int process_subn_opa(struct ib_device *ibdev, int mad_flags,
struct opa_smp *smp = (struct opa_smp *)out_mad;
struct hfi1_ibport *ibp = to_iport(ibdev, port);
u8 *data;
- u32 am;
+ u32 am, data_size;
__be16 attr_id;
int ret;
*out_mad = *in_mad;
data = opa_get_smp_data(smp);
+ data_size = (u32)opa_get_smp_data_size(smp);
am = be32_to_cpu(smp->attr_mod);
attr_id = smp->attr_id;
@@ -4046,7 +4111,8 @@ static int process_subn_opa(struct ib_device *ibdev, int mad_flags,
default:
clear_opa_smp_data(smp);
ret = subn_get_opa_sma(attr_id, smp, am, data,
- ibdev, port, resp_len);
+ ibdev, port, resp_len,
+ data_size);
break;
case OPA_ATTRIB_ID_AGGREGATE:
ret = subn_get_opa_aggregate(smp, ibdev, port,
@@ -4058,7 +4124,8 @@ static int process_subn_opa(struct ib_device *ibdev, int mad_flags,
switch (attr_id) {
default:
ret = subn_set_opa_sma(attr_id, smp, am, data,
- ibdev, port, resp_len);
+ ibdev, port, resp_len,
+ data_size);
break;
case OPA_ATTRIB_ID_AGGREGATE:
ret = subn_set_opa_aggregate(smp, ibdev, port,
diff --git a/drivers/infiniband/hw/hfi1/mad.h b/drivers/infiniband/hw/hfi1/mad.h
index 5aa3fd1..a4e2506 100644
--- a/drivers/infiniband/hw/hfi1/mad.h
+++ b/drivers/infiniband/hw/hfi1/mad.h
@@ -115,7 +115,7 @@ struct opa_mad_notice_attr {
__be32 lid; /* LID where change occurred */
__be32 new_cap_mask; /* new capability mask */
__be16 reserved2;
- __be16 cap_mask;
+ __be16 cap_mask3;
__be16 change_flags; /* low 4 bits only */
} __packed ntc_144;
diff --git a/drivers/infiniband/hw/hfi1/mmu_rb.c b/drivers/infiniband/hw/hfi1/mmu_rb.c
index ccbf52c..d41fd87 100644
--- a/drivers/infiniband/hw/hfi1/mmu_rb.c
+++ b/drivers/infiniband/hw/hfi1/mmu_rb.c
@@ -217,21 +217,27 @@ static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *handler,
return node;
}
-struct mmu_rb_node *hfi1_mmu_rb_extract(struct mmu_rb_handler *handler,
- unsigned long addr, unsigned long len)
+bool hfi1_mmu_rb_remove_unless_exact(struct mmu_rb_handler *handler,
+ unsigned long addr, unsigned long len,
+ struct mmu_rb_node **rb_node)
{
struct mmu_rb_node *node;
unsigned long flags;
+ bool ret = false;
spin_lock_irqsave(&handler->lock, flags);
node = __mmu_rb_search(handler, addr, len);
if (node) {
+ if (node->addr == addr && node->len == len)
+ goto unlock;
__mmu_int_rb_remove(node, &handler->root);
list_del(&node->list); /* remove from LRU list */
+ ret = true;
}
+unlock:
spin_unlock_irqrestore(&handler->lock, flags);
-
- return node;
+ *rb_node = node;
+ return ret;
}
void hfi1_mmu_rb_evict(struct mmu_rb_handler *handler, void *evict_arg)
diff --git a/drivers/infiniband/hw/hfi1/mmu_rb.h b/drivers/infiniband/hw/hfi1/mmu_rb.h
index 754f6eb..f04cec1 100644
--- a/drivers/infiniband/hw/hfi1/mmu_rb.h
+++ b/drivers/infiniband/hw/hfi1/mmu_rb.h
@@ -81,7 +81,8 @@ int hfi1_mmu_rb_insert(struct mmu_rb_handler *handler,
void hfi1_mmu_rb_evict(struct mmu_rb_handler *handler, void *evict_arg);
void hfi1_mmu_rb_remove(struct mmu_rb_handler *handler,
struct mmu_rb_node *mnode);
-struct mmu_rb_node *hfi1_mmu_rb_extract(struct mmu_rb_handler *handler,
- unsigned long addr, unsigned long len);
+bool hfi1_mmu_rb_remove_unless_exact(struct mmu_rb_handler *handler,
+ unsigned long addr, unsigned long len,
+ struct mmu_rb_node **rb_node);
#endif /* _HFI1_MMU_RB_H */
diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c
index 6a9f6f9..f01841b 100644
--- a/drivers/infiniband/hw/hfi1/pcie.c
+++ b/drivers/infiniband/hw/hfi1/pcie.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015 - 2017 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -240,50 +240,6 @@ void hfi1_pcie_ddcleanup(struct hfi1_devdata *dd)
iounmap(dd->piobase);
}
-static void msix_setup(struct hfi1_devdata *dd, int pos, u32 *msixcnt,
- struct hfi1_msix_entry *hfi1_msix_entry)
-{
- int ret;
- int nvec = *msixcnt;
- struct msix_entry *msix_entry;
- int i;
-
- /*
- * We can't pass hfi1_msix_entry array to msix_setup
- * so use a dummy msix_entry array and copy the allocated
- * irq back to the hfi1_msix_entry array.
- */
- msix_entry = kmalloc_array(nvec, sizeof(*msix_entry), GFP_KERNEL);
- if (!msix_entry) {
- ret = -ENOMEM;
- goto do_intx;
- }
-
- for (i = 0; i < nvec; i++)
- msix_entry[i] = hfi1_msix_entry[i].msix;
-
- ret = pci_enable_msix_range(dd->pcidev, msix_entry, 1, nvec);
- if (ret < 0)
- goto free_msix_entry;
- nvec = ret;
-
- for (i = 0; i < nvec; i++)
- hfi1_msix_entry[i].msix = msix_entry[i];
-
- kfree(msix_entry);
- *msixcnt = nvec;
- return;
-
-free_msix_entry:
- kfree(msix_entry);
-
-do_intx:
- dd_dev_err(dd, "pci_enable_msix_range %d vectors failed: %d, falling back to INTx\n",
- nvec, ret);
- *msixcnt = 0;
- hfi1_enable_intx(dd->pcidev);
-}
-
/* return the PCIe link speed from the given link status */
static u32 extract_speed(u16 linkstat)
{
@@ -364,33 +320,29 @@ int pcie_speeds(struct hfi1_devdata *dd)
}
/*
- * Returns in *nent:
- * - actual number of interrupts allocated
+ * Returns:
+ * - actual number of interrupts allocated or
* - 0 if fell back to INTx.
+ * - error
*/
-void request_msix(struct hfi1_devdata *dd, u32 *nent,
- struct hfi1_msix_entry *entry)
+int request_msix(struct hfi1_devdata *dd, u32 msireq)
{
- int pos;
+ int nvec;
- pos = dd->pcidev->msix_cap;
- if (*nent && pos) {
- msix_setup(dd, pos, nent, entry);
- /* did it, either MSI-X or INTx */
- } else {
- *nent = 0;
- hfi1_enable_intx(dd->pcidev);
+ nvec = pci_alloc_irq_vectors(dd->pcidev, 1, msireq,
+ PCI_IRQ_MSIX | PCI_IRQ_LEGACY);
+ if (nvec < 0) {
+ dd_dev_err(dd, "pci_alloc_irq_vectors() failed: %d\n", nvec);
+ return nvec;
}
tune_pcie_caps(dd);
-}
-void hfi1_enable_intx(struct pci_dev *pdev)
-{
- /* first, turn on INTx */
- pci_intx(pdev, 1);
- /* then turn off MSI-X */
- pci_disable_msix(pdev);
+ /* check for legacy IRQ */
+ if (nvec == 1 && !dd->pcidev->msix_enabled)
+ return 0;
+
+ return nvec;
}
/* restore command and BARs after a reset has wiped them out */
diff --git a/drivers/infiniband/hw/hfi1/platform.c b/drivers/infiniband/hw/hfi1/platform.c
index 838fe84..41307e4 100644
--- a/drivers/infiniband/hw/hfi1/platform.c
+++ b/drivers/infiniband/hw/hfi1/platform.c
@@ -136,7 +136,6 @@ static void save_platform_config_fields(struct hfi1_devdata *dd)
void get_platform_config(struct hfi1_devdata *dd)
{
int ret = 0;
- unsigned long size = 0;
u8 *temp_platform_config = NULL;
u32 esize;
@@ -160,15 +159,6 @@ void get_platform_config(struct hfi1_devdata *dd)
dd->platform_config.size = esize;
return;
}
- /* fail, try EFI variable */
-
- ret = read_hfi1_efi_var(dd, "configuration", &size,
- (void **)&temp_platform_config);
- if (!ret) {
- dd->platform_config.data = temp_platform_config;
- dd->platform_config.size = size;
- return;
- }
}
dd_dev_err(dd,
"%s: Failed to get platform config, falling back to sub-optimal default file\n",
@@ -242,7 +232,7 @@ static int qual_power(struct hfi1_pportdata *ppd)
if (ppd->offline_disabled_reason ==
HFI1_ODR_MASK(OPA_LINKDOWN_REASON_POWER_POLICY)) {
- dd_dev_info(
+ dd_dev_err(
ppd->dd,
"%s: Port disabled due to system power restrictions\n",
__func__);
@@ -268,7 +258,7 @@ static int qual_bitrate(struct hfi1_pportdata *ppd)
if (ppd->offline_disabled_reason ==
HFI1_ODR_MASK(OPA_LINKDOWN_REASON_LINKSPEED_POLICY)) {
- dd_dev_info(
+ dd_dev_err(
ppd->dd,
"%s: Cable failed bitrate check, disabling port\n",
__func__);
@@ -709,15 +699,15 @@ static void apply_tunings(
ret = load_8051_config(ppd->dd, DC_HOST_COMM_SETTINGS,
GENERAL_CONFIG, config_data);
if (ret != HCMD_SUCCESS)
- dd_dev_info(ppd->dd,
- "%s: Failed set ext device config params\n",
- __func__);
+ dd_dev_err(ppd->dd,
+ "%s: Failed set ext device config params\n",
+ __func__);
}
if (tx_preset_index == OPA_INVALID_INDEX) {
if (ppd->port_type == PORT_TYPE_QSFP && limiting_active)
- dd_dev_info(ppd->dd, "%s: Invalid Tx preset index\n",
- __func__);
+ dd_dev_err(ppd->dd, "%s: Invalid Tx preset index\n",
+ __func__);
return;
}
@@ -900,7 +890,7 @@ static int tune_qsfp(struct hfi1_pportdata *ppd,
case 0xD: /* fallthrough */
case 0xF:
default:
- dd_dev_info(ppd->dd, "%s: Unknown/unsupported cable\n",
+ dd_dev_warn(ppd->dd, "%s: Unknown/unsupported cable\n",
__func__);
break;
}
@@ -942,7 +932,7 @@ void tune_serdes(struct hfi1_pportdata *ppd)
case PORT_TYPE_DISCONNECTED:
ppd->offline_disabled_reason =
HFI1_ODR_MASK(OPA_LINKDOWN_REASON_DISCONNECTED);
- dd_dev_info(dd, "%s: Port disconnected, disabling port\n",
+ dd_dev_warn(dd, "%s: Port disconnected, disabling port\n",
__func__);
goto bail;
case PORT_TYPE_FIXED:
@@ -1027,7 +1017,7 @@ void tune_serdes(struct hfi1_pportdata *ppd)
}
break;
default:
- dd_dev_info(ppd->dd, "%s: Unknown port type\n", __func__);
+ dd_dev_warn(ppd->dd, "%s: Unknown port type\n", __func__);
ppd->port_type = PORT_TYPE_UNKNOWN;
tuning_method = OPA_UNKNOWN_TUNING;
total_atten = 0;
diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c
index ee2c74d..806d166 100644
--- a/drivers/infiniband/hw/hfi1/qp.c
+++ b/drivers/infiniband/hw/hfi1/qp.c
@@ -73,12 +73,6 @@ static void iowait_wakeup(struct iowait *wait, int reason);
static void iowait_sdma_drained(struct iowait *wait);
static void qp_pio_drain(struct rvt_qp *qp);
-static inline unsigned mk_qpn(struct rvt_qpn_table *qpt,
- struct rvt_qpn_map *map, unsigned off)
-{
- return (map - qpt->map) * RVT_BITS_PER_PAGE + off;
-}
-
const struct rvt_operation_params hfi1_post_parms[RVT_OPERATION_MAX] = {
[IB_WR_RDMA_WRITE] = {
.length = sizeof(struct ib_rdma_wr),
diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c
index 1080778..baa67bf 100644
--- a/drivers/infiniband/hw/hfi1/rc.c
+++ b/drivers/infiniband/hw/hfi1/rc.c
@@ -765,7 +765,7 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp,
ohdr->u.aeth = rvt_compute_aeth(qp);
sc5 = ibp->sl_to_sc[rdma_ah_get_sl(&qp->remote_ah_attr)];
/* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */
- pbc_flags |= ((!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT);
+ pbc_flags |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT);
lrh0 |= (sc5 & 0xf) << 12 | (rdma_ah_get_sl(&qp->remote_ah_attr)
& 0xf) << 4;
hdr.lrh[0] = cpu_to_be16(lrh0);
@@ -798,7 +798,8 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp,
goto queue_ack;
}
- trace_ack_output_ibhdr(dd_from_ibdev(qp->ibqp.device), &hdr);
+ trace_ack_output_ibhdr(dd_from_ibdev(qp->ibqp.device),
+ &hdr, ib_is_sc5(sc5));
/* write the pbc and data */
ppd->dd->pio_inline_send(ppd->dd, pbuf, pbc, &hdr, hwords);
@@ -1009,7 +1010,7 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr)
return;
}
- psn = be32_to_cpu(ohdr->bth[2]);
+ psn = ib_bth_get_psn(ohdr);
reset_sending_psn(qp, psn);
/*
@@ -1915,17 +1916,16 @@ void process_becn(struct hfi1_pportdata *ppd, u8 sl, u16 rlid, u32 lqpn,
void hfi1_rc_rcv(struct hfi1_packet *packet)
{
struct hfi1_ctxtdata *rcd = packet->rcd;
- struct ib_header *hdr = packet->hdr;
- u32 rcv_flags = packet->rcv_flags;
void *data = packet->ebuf;
u32 tlen = packet->tlen;
struct rvt_qp *qp = packet->qp;
struct hfi1_ibport *ibp = rcd_to_iport(rcd);
struct ib_other_headers *ohdr = packet->ohdr;
- u32 bth0, opcode;
+ u32 bth0;
+ u32 opcode = packet->opcode;
u32 hdrsize = packet->hlen;
u32 psn;
- u32 pad;
+ u32 pad = packet->pad;
struct ib_wc wc;
u32 pmtu = qp->pmtu;
int diff;
@@ -1937,14 +1937,13 @@ void hfi1_rc_rcv(struct hfi1_packet *packet)
u32 rkey;
lockdep_assert_held(&qp->r_lock);
+
bth0 = be32_to_cpu(ohdr->bth[0]);
- if (hfi1_ruc_check_hdr(ibp, hdr, rcv_flags & HFI1_HAS_GRH, qp, bth0))
+ if (hfi1_ruc_check_hdr(ibp, packet))
return;
is_fecn = process_ecn(qp, packet, false);
-
- psn = be32_to_cpu(ohdr->bth[2]);
- opcode = ib_bth_get_opcode(ohdr);
+ psn = ib_bth_get_psn(ohdr);
/*
* Process responses (ACKs) before anything else. Note that the
@@ -2074,8 +2073,6 @@ void hfi1_rc_rcv(struct hfi1_packet *packet)
wc.wc_flags = 0;
wc.ex.imm_data = 0;
send_last:
- /* Get the number of bytes the message was padded by. */
- pad = ib_bth_get_pad(ohdr);
/* Check for invalid length. */
/* LAST len should be >= 1 */
if (unlikely(tlen < (hdrsize + pad + 4)))
@@ -2368,28 +2365,19 @@ void hfi1_rc_rcv(struct hfi1_packet *packet)
void hfi1_rc_hdrerr(
struct hfi1_ctxtdata *rcd,
- struct ib_header *hdr,
- u32 rcv_flags,
+ struct hfi1_packet *packet,
struct rvt_qp *qp)
{
- int has_grh = rcv_flags & HFI1_HAS_GRH;
- struct ib_other_headers *ohdr;
struct hfi1_ibport *ibp = rcd_to_iport(rcd);
int diff;
u32 opcode;
- u32 psn, bth0;
+ u32 psn;
- /* Check for GRH */
- ohdr = &hdr->u.oth;
- if (has_grh)
- ohdr = &hdr->u.l.oth;
-
- bth0 = be32_to_cpu(ohdr->bth[0]);
- if (hfi1_ruc_check_hdr(ibp, hdr, has_grh, qp, bth0))
+ if (hfi1_ruc_check_hdr(ibp, packet))
return;
- psn = be32_to_cpu(ohdr->bth[2]);
- opcode = ib_bth_get_opcode(ohdr);
+ psn = ib_bth_get_psn(packet->ohdr);
+ opcode = ib_bth_get_opcode(packet->ohdr);
/* Only deal with RDMA Writes for now */
if (opcode < IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST) {
diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c
index 3a17dab..9cf506a 100644
--- a/drivers/infiniband/hw/hfi1/ruc.c
+++ b/drivers/infiniband/hw/hfi1/ruc.c
@@ -75,7 +75,7 @@ static int init_sge(struct rvt_qp *qp, struct rvt_rwqe *wqe)
continue;
/* Check LKEY */
if (!rvt_lkey_ok(rkt, pd, j ? &ss->sg_list[j - 1] : &ss->sge,
- &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE))
+ NULL, &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE))
goto bad_lkey;
qp->r_len += wqe->sg_list[i].length;
j++;
@@ -214,100 +214,95 @@ static int gid_ok(union ib_gid *gid, __be64 gid_prefix, __be64 id)
*
* The s_lock will be acquired around the hfi1_migrate_qp() call.
*/
-int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct ib_header *hdr,
- int has_grh, struct rvt_qp *qp, u32 bth0)
+int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_packet *packet)
{
__be64 guid;
unsigned long flags;
+ struct rvt_qp *qp = packet->qp;
u8 sc5 = ibp->sl_to_sc[rdma_ah_get_sl(&qp->remote_ah_attr)];
+ u32 dlid = packet->dlid;
+ u32 slid = packet->slid;
+ u32 sl = packet->sl;
+ int migrated;
+ u32 bth0, bth1;
- if (qp->s_mig_state == IB_MIG_ARMED && (bth0 & IB_BTH_MIG_REQ)) {
- if (!has_grh) {
+ bth0 = be32_to_cpu(packet->ohdr->bth[0]);
+ bth1 = be32_to_cpu(packet->ohdr->bth[1]);
+ migrated = bth0 & IB_BTH_MIG_REQ;
+
+ if (qp->s_mig_state == IB_MIG_ARMED && migrated) {
+ if (!packet->grh) {
if (rdma_ah_get_ah_flags(&qp->alt_ah_attr) &
IB_AH_GRH)
- goto err;
+ return 1;
} else {
const struct ib_global_route *grh;
if (!(rdma_ah_get_ah_flags(&qp->alt_ah_attr) &
IB_AH_GRH))
- goto err;
+ return 1;
grh = rdma_ah_read_grh(&qp->alt_ah_attr);
guid = get_sguid(ibp, grh->sgid_index);
- if (!gid_ok(&hdr->u.l.grh.dgid, ibp->rvp.gid_prefix,
+ if (!gid_ok(&packet->grh->dgid, ibp->rvp.gid_prefix,
guid))
- goto err;
+ return 1;
if (!gid_ok(
- &hdr->u.l.grh.sgid,
+ &packet->grh->sgid,
grh->dgid.global.subnet_prefix,
grh->dgid.global.interface_id))
- goto err;
+ return 1;
}
- if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0, sc5,
- ib_get_slid(hdr)))) {
- hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_P_KEY,
- (u16)bth0,
- ib_get_sl(hdr),
- 0, qp->ibqp.qp_num,
- ib_get_slid(hdr),
- ib_get_dlid(hdr));
- goto err;
+ if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0,
+ sc5, slid))) {
+ hfi1_bad_pkey(ibp, (u16)bth0, sl,
+ 0, qp->ibqp.qp_num, slid, dlid);
+ return 1;
}
/* Validate the SLID. See Ch. 9.6.1.5 and 17.2.8 */
- if (ib_get_slid(hdr) !=
- rdma_ah_get_dlid(&qp->alt_ah_attr) ||
+ if (slid != rdma_ah_get_dlid(&qp->alt_ah_attr) ||
ppd_from_ibp(ibp)->port !=
rdma_ah_get_port_num(&qp->alt_ah_attr))
- goto err;
+ return 1;
spin_lock_irqsave(&qp->s_lock, flags);
hfi1_migrate_qp(qp);
spin_unlock_irqrestore(&qp->s_lock, flags);
} else {
- if (!has_grh) {
+ if (!packet->grh) {
if (rdma_ah_get_ah_flags(&qp->remote_ah_attr) &
IB_AH_GRH)
- goto err;
+ return 1;
} else {
const struct ib_global_route *grh;
if (!(rdma_ah_get_ah_flags(&qp->remote_ah_attr) &
IB_AH_GRH))
- goto err;
+ return 1;
grh = rdma_ah_read_grh(&qp->remote_ah_attr);
guid = get_sguid(ibp, grh->sgid_index);
- if (!gid_ok(&hdr->u.l.grh.dgid, ibp->rvp.gid_prefix,
+ if (!gid_ok(&packet->grh->dgid, ibp->rvp.gid_prefix,
guid))
- goto err;
+ return 1;
if (!gid_ok(
- &hdr->u.l.grh.sgid,
+ &packet->grh->sgid,
grh->dgid.global.subnet_prefix,
grh->dgid.global.interface_id))
- goto err;
+ return 1;
}
- if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0, sc5,
- ib_get_slid(hdr)))) {
- hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_P_KEY,
- (u16)bth0,
- ib_get_sl(hdr),
- 0, qp->ibqp.qp_num,
- ib_get_slid(hdr),
- ib_get_dlid(hdr));
- goto err;
+ if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp), (u16)bth0,
+ sc5, slid))) {
+ hfi1_bad_pkey(ibp, (u16)bth0, sl,
+ 0, qp->ibqp.qp_num, slid, dlid);
+ return 1;
}
/* Validate the SLID. See Ch. 9.6.1.5 */
- if (ib_get_slid(hdr) !=
- rdma_ah_get_dlid(&qp->remote_ah_attr) ||
+ if ((slid != rdma_ah_get_dlid(&qp->remote_ah_attr)) ||
ppd_from_ibp(ibp)->port != qp->port_num)
- goto err;
- if (qp->s_mig_state == IB_MIG_REARM &&
- !(bth0 & IB_BTH_MIG_REQ))
+ return 1;
+ if (qp->s_mig_state == IB_MIG_REARM && !migrated)
qp->s_mig_state = IB_MIG_ARMED;
}
return 0;
-
-err:
- return 1;
}
/**
diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c
index bfd0d51..d82ff57 100644
--- a/drivers/infiniband/hw/hfi1/sdma.c
+++ b/drivers/infiniband/hw/hfi1/sdma.c
@@ -1340,10 +1340,8 @@ static void sdma_clean(struct hfi1_devdata *dd, size_t num_engines)
* @dd: hfi1_devdata
* @port: port number (currently only zero)
*
- * sdma_init initializes the specified number of engines.
- *
- * The code initializes each sde, its csrs. Interrupts
- * are not required to be enabled.
+ * Initializes each sde and its csrs.
+ * Interrupts are not required to be enabled.
*
* Returns:
* 0 - success, -errno on failure
diff --git a/drivers/infiniband/hw/hfi1/trace.c b/drivers/infiniband/hw/hfi1/trace.c
index eafae48..b80b74d 100644
--- a/drivers/infiniband/hw/hfi1/trace.c
+++ b/drivers/infiniband/hw/hfi1/trace.c
@@ -47,7 +47,7 @@
#define CREATE_TRACE_POINTS
#include "trace.h"
-u8 ibhdr_exhdr_len(struct ib_header *hdr)
+u8 hfi1_trace_ib_hdr_len(struct ib_header *hdr)
{
struct ib_other_headers *ohdr;
u8 opcode;
@@ -61,13 +61,18 @@ u8 ibhdr_exhdr_len(struct ib_header *hdr)
0 : hdr_len_by_opcode[opcode] - (12 + 8);
}
-#define IMM_PRN "imm %d"
-#define RETH_PRN "reth vaddr 0x%.16llx rkey 0x%.8x dlen 0x%.8x"
-#define AETH_PRN "aeth syn 0x%.2x %s msn 0x%.8x"
-#define DETH_PRN "deth qkey 0x%.8x sqpn 0x%.6x"
-#define IETH_PRN "ieth rkey 0x%.8x"
-#define ATOMICACKETH_PRN "origdata %llx"
-#define ATOMICETH_PRN "vaddr 0x%llx rkey 0x%.8x sdata %llx cdata %llx"
+const char *hfi1_trace_get_packet_str(struct hfi1_packet *packet)
+{
+ return "IB";
+}
+
+#define IMM_PRN "imm:%d"
+#define RETH_PRN "reth vaddr:0x%.16llx rkey:0x%.8x dlen:0x%.8x"
+#define AETH_PRN "aeth syn:0x%.2x %s msn:0x%.8x"
+#define DETH_PRN "deth qkey:0x%.8x sqpn:0x%.6x"
+#define IETH_PRN "ieth rkey:0x%.8x"
+#define ATOMICACKETH_PRN "origdata:%llx"
+#define ATOMICETH_PRN "vaddr:0x%llx rkey:0x%.8x sdata:%llx cdata:%llx"
#define OP(transport, op) IB_OPCODE_## transport ## _ ## op
@@ -84,6 +89,43 @@ static const char *parse_syndrome(u8 syndrome)
return "";
}
+void hfi1_trace_parse_bth(struct ib_other_headers *ohdr,
+ u8 *ack, u8 *becn, u8 *fecn, u8 *mig,
+ u8 *se, u8 *pad, u8 *opcode, u8 *tver,
+ u16 *pkey, u32 *psn, u32 *qpn)
+{
+ *ack = ib_bth_get_ackreq(ohdr);
+ *becn = ib_bth_get_becn(ohdr);
+ *fecn = ib_bth_get_fecn(ohdr);
+ *mig = ib_bth_get_migreq(ohdr);
+ *se = ib_bth_get_se(ohdr);
+ *pad = ib_bth_get_pad(ohdr);
+ *opcode = ib_bth_get_opcode(ohdr);
+ *tver = ib_bth_get_tver(ohdr);
+ *pkey = ib_bth_get_pkey(ohdr);
+ *psn = ib_bth_get_psn(ohdr);
+ *qpn = ib_bth_get_qpn(ohdr);
+}
+
+void hfi1_trace_parse_9b_hdr(struct ib_header *hdr, bool sc5,
+ struct ib_other_headers **ohdr,
+ u8 *lnh, u8 *lver, u8 *sl, u8 *sc,
+ u16 *len, u32 *dlid, u32 *slid)
+{
+ *lnh = ib_get_lnh(hdr);
+ *lver = ib_get_lver(hdr);
+ *sl = ib_get_sl(hdr);
+ *sc = ib_get_sc(hdr) | (sc5 << 4);
+ *len = ib_get_len(hdr);
+ *dlid = ib_get_dlid(hdr);
+ *slid = ib_get_slid(hdr);
+
+ if (*lnh == HFI1_LRH_BTH)
+ *ohdr = &hdr->u.oth;
+ else
+ *ohdr = &hdr->u.l.oth;
+}
+
const char *parse_everbs_hdrs(
struct trace_seq *p,
u8 opcode,
diff --git a/drivers/infiniband/hw/hfi1/trace_ibhdrs.h b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h
index 090f6b5..0f2d2da 100644
--- a/drivers/infiniband/hw/hfi1/trace_ibhdrs.h
+++ b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h
@@ -55,8 +55,57 @@
#undef TRACE_SYSTEM
#define TRACE_SYSTEM hfi1_ibhdrs
-u8 ibhdr_exhdr_len(struct ib_header *hdr);
+#define ib_opcode_name(opcode) { IB_OPCODE_##opcode, #opcode }
+#define show_ib_opcode(opcode) \
+__print_symbolic(opcode, \
+ ib_opcode_name(RC_SEND_FIRST), \
+ ib_opcode_name(RC_SEND_MIDDLE), \
+ ib_opcode_name(RC_SEND_LAST), \
+ ib_opcode_name(RC_SEND_LAST_WITH_IMMEDIATE), \
+ ib_opcode_name(RC_SEND_ONLY), \
+ ib_opcode_name(RC_SEND_ONLY_WITH_IMMEDIATE), \
+ ib_opcode_name(RC_RDMA_WRITE_FIRST), \
+ ib_opcode_name(RC_RDMA_WRITE_MIDDLE), \
+ ib_opcode_name(RC_RDMA_WRITE_LAST), \
+ ib_opcode_name(RC_RDMA_WRITE_LAST_WITH_IMMEDIATE), \
+ ib_opcode_name(RC_RDMA_WRITE_ONLY), \
+ ib_opcode_name(RC_RDMA_WRITE_ONLY_WITH_IMMEDIATE), \
+ ib_opcode_name(RC_RDMA_READ_REQUEST), \
+ ib_opcode_name(RC_RDMA_READ_RESPONSE_FIRST), \
+ ib_opcode_name(RC_RDMA_READ_RESPONSE_MIDDLE), \
+ ib_opcode_name(RC_RDMA_READ_RESPONSE_LAST), \
+ ib_opcode_name(RC_RDMA_READ_RESPONSE_ONLY), \
+ ib_opcode_name(RC_ACKNOWLEDGE), \
+ ib_opcode_name(RC_ATOMIC_ACKNOWLEDGE), \
+ ib_opcode_name(RC_COMPARE_SWAP), \
+ ib_opcode_name(RC_FETCH_ADD), \
+ ib_opcode_name(UC_SEND_FIRST), \
+ ib_opcode_name(UC_SEND_MIDDLE), \
+ ib_opcode_name(UC_SEND_LAST), \
+ ib_opcode_name(UC_SEND_LAST_WITH_IMMEDIATE), \
+ ib_opcode_name(UC_SEND_ONLY), \
+ ib_opcode_name(UC_SEND_ONLY_WITH_IMMEDIATE), \
+ ib_opcode_name(UC_RDMA_WRITE_FIRST), \
+ ib_opcode_name(UC_RDMA_WRITE_MIDDLE), \
+ ib_opcode_name(UC_RDMA_WRITE_LAST), \
+ ib_opcode_name(UC_RDMA_WRITE_LAST_WITH_IMMEDIATE), \
+ ib_opcode_name(UC_RDMA_WRITE_ONLY), \
+ ib_opcode_name(UC_RDMA_WRITE_ONLY_WITH_IMMEDIATE), \
+ ib_opcode_name(UD_SEND_ONLY), \
+ ib_opcode_name(UD_SEND_ONLY_WITH_IMMEDIATE), \
+ ib_opcode_name(CNP))
+
const char *parse_everbs_hdrs(struct trace_seq *p, u8 opcode, void *ehdrs);
+u8 hfi1_trace_ib_hdr_len(struct ib_header *hdr);
+const char *hfi1_trace_get_packet_str(struct hfi1_packet *packet);
+void hfi1_trace_parse_bth(struct ib_other_headers *ohdr,
+ u8 *ack, u8 *becn, u8 *fecn, u8 *mig,
+ u8 *se, u8 *pad, u8 *opcode, u8 *tver,
+ u16 *pkey, u32 *psn, u32 *qpn);
+void hfi1_trace_parse_9b_hdr(struct ib_header *hdr, bool sc5,
+ struct ib_other_headers **ohdr,
+ u8 *lnh, u8 *lver, u8 *sl, u8 *sc,
+ u16 *len, u32 *dlid, u32 *slid);
#define __parse_ib_ehdrs(op, ehdrs) parse_everbs_hdrs(p, op, ehdrs)
@@ -66,139 +115,198 @@ __print_symbolic(lrh, \
lrh_name(LRH_BTH), \
lrh_name(LRH_GRH))
-#define LRH_PRN "vl %d lver %d sl %d lnh %d,%s dlid %.4x len %d slid %.4x"
+#define LRH_PRN "len:%d sc:%d dlid:0x%.4x slid:0x%.4x"
+#define LRH_9B_PRN "lnh:%d,%s lver:%d sl:%d "
#define BTH_PRN \
- "op 0x%.2x,%s se %d m %d pad %d tver %d pkey 0x%.4x " \
- "f %d b %d qpn 0x%.6x a %d psn 0x%.8x"
-#define EHDR_PRN "%s"
+ "op:0x%.2x,%s se:%d m:%d pad:%d tver:%d pkey:0x%.4x " \
+ "f:%d b:%d qpn:0x%.6x a:%d psn:0x%.8x"
+#define EHDR_PRN "hlen:%d %s"
-DECLARE_EVENT_CLASS(hfi1_ibhdr_template,
+DECLARE_EVENT_CLASS(hfi1_input_ibhdr_template,
TP_PROTO(struct hfi1_devdata *dd,
- struct ib_header *hdr),
- TP_ARGS(dd, hdr),
+ struct hfi1_packet *packet,
+ bool sc5),
+ TP_ARGS(dd, packet, sc5),
TP_STRUCT__entry(
DD_DEV_ENTRY(dd)
- /* LRH */
- __field(u8, vl)
+ __field(u8, lnh)
__field(u8, lver)
__field(u8, sl)
- __field(u8, lnh)
- __field(u16, dlid)
__field(u16, len)
- __field(u16, slid)
- /* BTH */
+ __field(u32, dlid)
+ __field(u8, sc)
+ __field(u32, slid)
__field(u8, opcode)
__field(u8, se)
- __field(u8, m)
+ __field(u8, mig)
__field(u8, pad)
__field(u8, tver)
__field(u16, pkey)
- __field(u8, f)
- __field(u8, b)
+ __field(u8, fecn)
+ __field(u8, becn)
__field(u32, qpn)
- __field(u8, a)
+ __field(u8, ack)
__field(u32, psn)
/* extended headers */
- __dynamic_array(u8, ehdrs, ibhdr_exhdr_len(hdr))
+ __dynamic_array(u8, ehdrs,
+ hfi1_trace_ib_hdr_len(packet->hdr))
),
- TP_fast_assign(
+ TP_fast_assign(
+ struct ib_other_headers *ohdr;
+
+ DD_DEV_ASSIGN(dd);
+
+ hfi1_trace_parse_9b_hdr(packet->hdr, sc5,
+ &ohdr,
+ &__entry->lnh,
+ &__entry->lver,
+ &__entry->sl,
+ &__entry->sc,
+ &__entry->len,
+ &__entry->dlid,
+ &__entry->slid);
+
+ hfi1_trace_parse_bth(ohdr, &__entry->ack,
+ &__entry->becn, &__entry->fecn,
+ &__entry->mig, &__entry->se,
+ &__entry->pad, &__entry->opcode,
+ &__entry->tver, &__entry->pkey,
+ &__entry->psn, &__entry->qpn);
+ /* extended headers */
+ memcpy(__get_dynamic_array(ehdrs), &ohdr->u,
+ __get_dynamic_array_len(ehdrs));
+ ),
+ TP_printk("[%s] (IB) " LRH_PRN " " LRH_9B_PRN " "
+ BTH_PRN " " EHDR_PRN,
+ __get_str(dev),
+ __entry->len,
+ __entry->sc,
+ __entry->dlid,
+ __entry->slid,
+ __entry->lnh, show_lnh(__entry->lnh),
+ __entry->lver,
+ __entry->sl,
+ /* BTH */
+ __entry->opcode, show_ib_opcode(__entry->opcode),
+ __entry->se,
+ __entry->mig,
+ __entry->pad,
+ __entry->tver,
+ __entry->pkey,
+ __entry->fecn,
+ __entry->becn,
+ __entry->qpn,
+ __entry->ack,
+ __entry->psn,
+ /* extended headers */
+ __get_dynamic_array_len(ehdrs),
+ __parse_ib_ehdrs(
+ __entry->opcode,
+ (void *)__get_dynamic_array(ehdrs))
+ )
+);
+
+DEFINE_EVENT(hfi1_input_ibhdr_template, input_ibhdr,
+ TP_PROTO(struct hfi1_devdata *dd,
+ struct hfi1_packet *packet, bool sc5),
+ TP_ARGS(dd, packet, sc5));
+
+DECLARE_EVENT_CLASS(hfi1_output_ibhdr_template,
+ TP_PROTO(struct hfi1_devdata *dd,
+ struct ib_header *hdr,
+ bool sc5),
+ TP_ARGS(dd, hdr, sc5),
+ TP_STRUCT__entry(
+ DD_DEV_ENTRY(dd)
+ __field(u8, lnh)
+ __field(u8, lver)
+ __field(u8, sl)
+ __field(u16, len)
+ __field(u32, dlid)
+ __field(u8, sc)
+ __field(u32, slid)
+ __field(u8, opcode)
+ __field(u8, se)
+ __field(u8, mig)
+ __field(u8, pad)
+ __field(u8, tver)
+ __field(u16, pkey)
+ __field(u8, fecn)
+ __field(u8, becn)
+ __field(u32, qpn)
+ __field(u8, ack)
+ __field(u32, psn)
+ /* extended headers */
+ __dynamic_array(u8, ehdrs,
+ hfi1_trace_ib_hdr_len(hdr))
+ ),
+ TP_fast_assign(
struct ib_other_headers *ohdr;
DD_DEV_ASSIGN(dd);
- /* LRH */
- __entry->vl =
- (u8)(be16_to_cpu(hdr->lrh[0]) >> 12);
- __entry->lver =
- (u8)(be16_to_cpu(hdr->lrh[0]) >> 8) & 0xf;
- __entry->sl =
- (u8)(be16_to_cpu(hdr->lrh[0]) >> 4) & 0xf;
- __entry->lnh =
- (u8)(be16_to_cpu(hdr->lrh[0]) & 3);
- __entry->dlid =
- be16_to_cpu(hdr->lrh[1]);
- /* allow for larger len */
- __entry->len =
- be16_to_cpu(hdr->lrh[2]);
- __entry->slid =
- be16_to_cpu(hdr->lrh[3]);
- /* BTH */
- if (__entry->lnh == HFI1_LRH_BTH)
- ohdr = &hdr->u.oth;
- else
- ohdr = &hdr->u.l.oth;
- __entry->opcode =
- (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff;
- __entry->se =
- (be32_to_cpu(ohdr->bth[0]) >> 23) & 1;
- __entry->m =
- (be32_to_cpu(ohdr->bth[0]) >> 22) & 1;
- __entry->pad =
- (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
- __entry->tver =
- (be32_to_cpu(ohdr->bth[0]) >> 16) & 0xf;
- __entry->pkey =
- be32_to_cpu(ohdr->bth[0]) & 0xffff;
- __entry->f =
- (be32_to_cpu(ohdr->bth[1]) >> IB_FECN_SHIFT) &
- IB_FECN_MASK;
- __entry->b =
- (be32_to_cpu(ohdr->bth[1]) >> IB_BECN_SHIFT) &
- IB_BECN_MASK;
- __entry->qpn =
- be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK;
- __entry->a =
- (be32_to_cpu(ohdr->bth[2]) >> 31) & 1;
- /* allow for larger PSN */
- __entry->psn =
- be32_to_cpu(ohdr->bth[2]) & 0x7fffffff;
+
+ hfi1_trace_parse_9b_hdr(hdr, sc5,
+ &ohdr, &__entry->lnh,
+ &__entry->lver, &__entry->sl,
+ &__entry->sc, &__entry->len,
+ &__entry->dlid, &__entry->slid);
+
+ hfi1_trace_parse_bth(ohdr, &__entry->ack,
+ &__entry->becn, &__entry->fecn,
+ &__entry->mig, &__entry->se,
+ &__entry->pad, &__entry->opcode,
+ &__entry->tver, &__entry->pkey,
+ &__entry->psn, &__entry->qpn);
+
/* extended headers */
- memcpy(__get_dynamic_array(ehdrs), &ohdr->u,
- ibhdr_exhdr_len(hdr));
- ),
- TP_printk("[%s] " LRH_PRN " " BTH_PRN " " EHDR_PRN,
- __get_str(dev),
- /* LRH */
- __entry->vl,
- __entry->lver,
- __entry->sl,
- __entry->lnh, show_lnh(__entry->lnh),
- __entry->dlid,
- __entry->len,
- __entry->slid,
- /* BTH */
- __entry->opcode, show_ib_opcode(__entry->opcode),
- __entry->se,
- __entry->m,
- __entry->pad,
- __entry->tver,
- __entry->pkey,
- __entry->f,
- __entry->b,
- __entry->qpn,
- __entry->a,
- __entry->psn,
- /* extended headers */
- __parse_ib_ehdrs(
- __entry->opcode,
- (void *)__get_dynamic_array(ehdrs))
- )
+ memcpy(__get_dynamic_array(ehdrs),
+ &ohdr->u, __get_dynamic_array_len(ehdrs));
+ ),
+ TP_printk("[%s] (IB) " LRH_PRN " " LRH_9B_PRN " "
+ BTH_PRN " " EHDR_PRN,
+ __get_str(dev),
+ __entry->len,
+ __entry->sc,
+ __entry->dlid,
+ __entry->slid,
+ __entry->lnh, show_lnh(__entry->lnh),
+ __entry->lver,
+ __entry->sl,
+ /* BTH */
+ __entry->opcode, show_ib_opcode(__entry->opcode),
+ __entry->se,
+ __entry->mig,
+ __entry->pad,
+ __entry->tver,
+ __entry->pkey,
+ __entry->fecn,
+ __entry->becn,
+ __entry->qpn,
+ __entry->ack,
+ __entry->psn,
+ /* extended headers */
+ __get_dynamic_array_len(ehdrs),
+ __parse_ib_ehdrs(
+ __entry->opcode,
+ (void *)__get_dynamic_array(ehdrs))
+ )
);
-DEFINE_EVENT(hfi1_ibhdr_template, input_ibhdr,
- TP_PROTO(struct hfi1_devdata *dd, struct ib_header *hdr),
- TP_ARGS(dd, hdr));
+DEFINE_EVENT(hfi1_output_ibhdr_template, pio_output_ibhdr,
+ TP_PROTO(struct hfi1_devdata *dd,
+ struct ib_header *hdr, bool sc5),
+ TP_ARGS(dd, hdr, sc5));
-DEFINE_EVENT(hfi1_ibhdr_template, pio_output_ibhdr,
- TP_PROTO(struct hfi1_devdata *dd, struct ib_header *hdr),
- TP_ARGS(dd, hdr));
+DEFINE_EVENT(hfi1_output_ibhdr_template, ack_output_ibhdr,
+ TP_PROTO(struct hfi1_devdata *dd,
+ struct ib_header *hdr, bool sc5),
+ TP_ARGS(dd, hdr, sc5));
-DEFINE_EVENT(hfi1_ibhdr_template, ack_output_ibhdr,
- TP_PROTO(struct hfi1_devdata *dd, struct ib_header *hdr),
- TP_ARGS(dd, hdr));
+DEFINE_EVENT(hfi1_output_ibhdr_template, sdma_output_ibhdr,
+ TP_PROTO(struct hfi1_devdata *dd,
+ struct ib_header *hdr, bool sc5),
+ TP_ARGS(dd, hdr, sc5));
-DEFINE_EVENT(hfi1_ibhdr_template, sdma_output_ibhdr,
- TP_PROTO(struct hfi1_devdata *dd, struct ib_header *hdr),
- TP_ARGS(dd, hdr));
#endif /* __HFI1_TRACE_IBHDRS_H */
diff --git a/drivers/infiniband/hw/hfi1/trace_misc.h b/drivers/infiniband/hw/hfi1/trace_misc.h
index deac77d..8db2253 100644
--- a/drivers/infiniband/hw/hfi1/trace_misc.h
+++ b/drivers/infiniband/hw/hfi1/trace_misc.h
@@ -72,6 +72,26 @@ TRACE_EVENT(hfi1_interrupt,
__entry->src)
);
+DECLARE_EVENT_CLASS(
+ hfi1_csr_template,
+ TP_PROTO(void __iomem *addr, u64 value),
+ TP_ARGS(addr, value),
+ TP_STRUCT__entry(
+ __field(void __iomem *, addr)
+ __field(u64, value)
+ ),
+ TP_fast_assign(
+ __entry->addr = addr;
+ __entry->value = value;
+ ),
+ TP_printk("addr %p value %llx", __entry->addr, __entry->value)
+);
+
+DEFINE_EVENT(
+ hfi1_csr_template, hfi1_write_rcvarray,
+ TP_PROTO(void __iomem *addr, u64 value),
+ TP_ARGS(addr, value));
+
#ifdef CONFIG_FAULT_INJECTION
TRACE_EVENT(hfi1_fault_opcode,
TP_PROTO(struct rvt_qp *qp, u8 opcode),
diff --git a/drivers/infiniband/hw/hfi1/trace_rx.h b/drivers/infiniband/hw/hfi1/trace_rx.h
index f77e59f..8492957 100644
--- a/drivers/infiniband/hw/hfi1/trace_rx.h
+++ b/drivers/infiniband/hw/hfi1/trace_rx.h
@@ -52,9 +52,25 @@
#include "hfi.h"
+#define tidtype_name(type) { PT_##type, #type }
+#define show_tidtype(type) \
+__print_symbolic(type, \
+ tidtype_name(EXPECTED), \
+ tidtype_name(EAGER), \
+ tidtype_name(INVALID)) \
+
#undef TRACE_SYSTEM
#define TRACE_SYSTEM hfi1_rx
+#define packettype_name(etype) { RHF_RCV_TYPE_##etype, #etype }
+#define show_packettype(etype) \
+__print_symbolic(etype, \
+ packettype_name(EXPECTED), \
+ packettype_name(EAGER), \
+ packettype_name(IB), \
+ packettype_name(ERROR), \
+ packettype_name(BYPASS))
+
TRACE_EVENT(hfi1_rcvhdr,
TP_PROTO(struct hfi1_devdata *dd,
u32 ctxt,
@@ -129,7 +145,8 @@ TRACE_EVENT(hfi1_receive_interrupt,
)
);
-TRACE_EVENT(hfi1_exp_tid_reg,
+DECLARE_EVENT_CLASS(
+ hfi1_exp_tid_reg_unreg,
TP_PROTO(unsigned int ctxt, u16 subctxt, u32 rarr,
u32 npages, unsigned long va, unsigned long pa,
dma_addr_t dma),
@@ -163,38 +180,45 @@ TRACE_EVENT(hfi1_exp_tid_reg,
)
);
-TRACE_EVENT(hfi1_exp_tid_unreg,
- TP_PROTO(unsigned int ctxt, u16 subctxt, u32 rarr, u32 npages,
- unsigned long va, unsigned long pa, dma_addr_t dma),
- TP_ARGS(ctxt, subctxt, rarr, npages, va, pa, dma),
- TP_STRUCT__entry(
- __field(unsigned int, ctxt)
- __field(u16, subctxt)
- __field(u32, rarr)
- __field(u32, npages)
- __field(unsigned long, va)
- __field(unsigned long, pa)
- __field(dma_addr_t, dma)
- ),
- TP_fast_assign(
- __entry->ctxt = ctxt;
- __entry->subctxt = subctxt;
- __entry->rarr = rarr;
- __entry->npages = npages;
- __entry->va = va;
- __entry->pa = pa;
- __entry->dma = dma;
- ),
- TP_printk("[%u:%u] entry:%u, %u pages @ 0x%lx, va:0x%lx dma:0x%llx",
- __entry->ctxt,
- __entry->subctxt,
- __entry->rarr,
- __entry->npages,
- __entry->pa,
- __entry->va,
- __entry->dma
- )
- );
+DEFINE_EVENT(
+ hfi1_exp_tid_reg_unreg, hfi1_exp_tid_unreg,
+ TP_PROTO(unsigned int ctxt, u16 subctxt, u32 rarr, u32 npages,
+ unsigned long va, unsigned long pa, dma_addr_t dma),
+ TP_ARGS(ctxt, subctxt, rarr, npages, va, pa, dma));
+
+DEFINE_EVENT(
+ hfi1_exp_tid_reg_unreg, hfi1_exp_tid_reg,
+ TP_PROTO(unsigned int ctxt, u16 subctxt, u32 rarr, u32 npages,
+ unsigned long va, unsigned long pa, dma_addr_t dma),
+ TP_ARGS(ctxt, subctxt, rarr, npages, va, pa, dma));
+
+TRACE_EVENT(
+ hfi1_put_tid,
+ TP_PROTO(struct hfi1_devdata *dd,
+ u32 index, u32 type, unsigned long pa, u16 order),
+ TP_ARGS(dd, index, type, pa, order),
+ TP_STRUCT__entry(
+ DD_DEV_ENTRY(dd)
+ __field(unsigned long, pa);
+ __field(u32, index);
+ __field(u32, type);
+ __field(u16, order);
+ ),
+ TP_fast_assign(
+ DD_DEV_ASSIGN(dd);
+ __entry->pa = pa;
+ __entry->index = index;
+ __entry->type = type;
+ __entry->order = order;
+ ),
+ TP_printk("[%s] type %s pa %lx index %u order %u",
+ __get_str(dev),
+ show_tidtype(__entry->type),
+ __entry->pa,
+ __entry->index,
+ __entry->order
+ )
+);
TRACE_EVENT(hfi1_exp_tid_inval,
TP_PROTO(unsigned int ctxt, u16 subctxt, unsigned long va, u32 rarr,
diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c
index 5da1e45..76c2451 100644
--- a/drivers/infiniband/hw/hfi1/uc.c
+++ b/drivers/infiniband/hw/hfi1/uc.c
@@ -297,31 +297,25 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
void hfi1_uc_rcv(struct hfi1_packet *packet)
{
struct hfi1_ibport *ibp = rcd_to_iport(packet->rcd);
- struct ib_header *hdr = packet->hdr;
- u32 rcv_flags = packet->rcv_flags;
void *data = packet->ebuf;
u32 tlen = packet->tlen;
struct rvt_qp *qp = packet->qp;
struct ib_other_headers *ohdr = packet->ohdr;
- u32 bth0, opcode;
+ u32 opcode = packet->opcode;
u32 hdrsize = packet->hlen;
u32 psn;
- u32 pad;
+ u32 pad = packet->pad;
struct ib_wc wc;
u32 pmtu = qp->pmtu;
struct ib_reth *reth;
- int has_grh = rcv_flags & HFI1_HAS_GRH;
int ret;
- bth0 = be32_to_cpu(ohdr->bth[0]);
- if (hfi1_ruc_check_hdr(ibp, hdr, has_grh, qp, bth0))
+ if (hfi1_ruc_check_hdr(ibp, packet))
return;
process_ecn(qp, packet, true);
- psn = be32_to_cpu(ohdr->bth[2]);
- opcode = ib_bth_get_opcode(ohdr);
-
+ psn = ib_bth_get_psn(ohdr);
/* Compare the PSN verses the expected PSN. */
if (unlikely(cmp_psn(psn, qp->r_psn) != 0)) {
/*
@@ -432,8 +426,6 @@ void hfi1_uc_rcv(struct hfi1_packet *packet)
wc.ex.imm_data = 0;
wc.wc_flags = 0;
send_last:
- /* Get the number of bytes the message was padded by. */
- pad = ib_bth_get_pad(ohdr);
/* Check for invalid length. */
/* LAST len should be >= 1 */
if (unlikely(tlen < (hdrsize + pad + 4)))
@@ -527,8 +519,6 @@ void hfi1_uc_rcv(struct hfi1_packet *packet)
rdma_last_imm:
wc.wc_flags = IB_WC_WITH_IMM;
- /* Get the number of bytes the message was padded by. */
- pad = ib_bth_get_pad(ohdr);
/* Check for invalid length. */
/* LAST len should be >= 1 */
if (unlikely(tlen < (hdrsize + pad + 4)))
diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c
index 6a4e95c..6bf7a1b0 100644
--- a/drivers/infiniband/hw/hfi1/ud.c
+++ b/drivers/infiniband/hw/hfi1/ud.c
@@ -110,10 +110,10 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
((1 << ppd->lmc) - 1));
if (unlikely(ingress_pkey_check(ppd, pkey, sc5,
qp->s_pkey_index, slid))) {
- hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_P_KEY, pkey,
- rdma_ah_get_sl(ah_attr),
- sqp->ibqp.qp_num, qp->ibqp.qp_num,
- slid, rdma_ah_get_dlid(ah_attr));
+ hfi1_bad_pkey(ibp, pkey,
+ rdma_ah_get_sl(ah_attr),
+ sqp->ibqp.qp_num, qp->ibqp.qp_num,
+ slid, rdma_ah_get_dlid(ah_attr));
goto drop;
}
}
@@ -128,18 +128,8 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
qkey = (int)swqe->ud_wr.remote_qkey < 0 ?
sqp->qkey : swqe->ud_wr.remote_qkey;
- if (unlikely(qkey != qp->qkey)) {
- u16 lid;
-
- lid = ppd->lid | (rdma_ah_get_path_bits(ah_attr) &
- ((1 << ppd->lmc) - 1));
- hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_Q_KEY, qkey,
- rdma_ah_get_sl(ah_attr),
- sqp->ibqp.qp_num, qp->ibqp.qp_num,
- lid,
- rdma_ah_get_dlid(ah_attr));
- goto drop;
- }
+ if (unlikely(qkey != qp->qkey))
+ goto drop; /* silently drop per IBTA spec */
}
/*
@@ -549,7 +539,7 @@ void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn,
hdr.lrh[3] = cpu_to_be16(slid);
plen = 2 /* PBC */ + hwords;
- pbc_flags |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT;
+ pbc_flags |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT);
vl = sc_to_vlt(ppd->dd, sc5);
pbc = create_pbc(ppd, pbc_flags, qp->srate_mbps, vl, plen);
if (ctxt) {
@@ -668,36 +658,31 @@ static int opa_smp_check(struct hfi1_ibport *ibp, u16 pkey, u8 sc5,
void hfi1_ud_rcv(struct hfi1_packet *packet)
{
struct ib_other_headers *ohdr = packet->ohdr;
- int opcode;
u32 hdrsize = packet->hlen;
struct ib_wc wc;
u32 qkey;
u32 src_qp;
- u16 dlid, pkey;
+ u16 pkey;
int mgmt_pkey_idx = -1;
struct hfi1_ibport *ibp = rcd_to_iport(packet->rcd);
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
struct ib_header *hdr = packet->hdr;
- u32 rcv_flags = packet->rcv_flags;
void *data = packet->ebuf;
u32 tlen = packet->tlen;
struct rvt_qp *qp = packet->qp;
- bool has_grh = rcv_flags & HFI1_HAS_GRH;
u8 sc5 = hfi1_9B_get_sc5(hdr, packet->rhf);
u32 bth1;
- u8 sl_from_sc, sl;
- u16 slid;
- u8 extra_bytes;
+ u8 sl_from_sc;
+ u8 extra_bytes = packet->pad;
+ u8 opcode = packet->opcode;
+ u8 sl = packet->sl;
+ u32 dlid = packet->dlid;
+ u32 slid = packet->slid;
- qkey = be32_to_cpu(ohdr->u.ud.deth[0]);
- src_qp = be32_to_cpu(ohdr->u.ud.deth[1]) & RVT_QPN_MASK;
- dlid = ib_get_dlid(hdr);
bth1 = be32_to_cpu(ohdr->bth[1]);
- slid = ib_get_slid(hdr);
+ qkey = ib_get_qkey(ohdr);
+ src_qp = ib_get_sqpn(ohdr);
pkey = ib_bth_get_pkey(ohdr);
- opcode = ib_bth_get_opcode(ohdr);
- sl = ib_get_sl(hdr);
- extra_bytes = ib_bth_get_pad(ohdr);
extra_bytes += (SIZE_OF_CRC << 2);
sl_from_sc = ibp->sc_to_sl[sc5];
@@ -727,10 +712,10 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
* for invalid pkeys is optional according to
* IB spec (release 1.3, section 10.9.4)
*/
- hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_P_KEY,
- pkey, sl,
- src_qp, qp->ibqp.qp_num,
- slid, dlid);
+ hfi1_bad_pkey(ibp,
+ pkey, sl,
+ src_qp, qp->ibqp.qp_num,
+ slid, dlid);
return;
}
} else {
@@ -739,12 +724,9 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
if (mgmt_pkey_idx < 0)
goto drop;
}
- if (unlikely(qkey != qp->qkey)) {
- hfi1_bad_pqkey(ibp, OPA_TRAP_BAD_Q_KEY, qkey, sl,
- src_qp, qp->ibqp.qp_num,
- slid, dlid);
+ if (unlikely(qkey != qp->qkey)) /* Silent drop */
return;
- }
+
/* Drop invalid MAD packets (see 13.5.3.1). */
if (unlikely(qp->ibqp.qp_num == 1 &&
(tlen > 2048 || (sc5 == 0xF))))
@@ -811,7 +793,7 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
qp->r_flags |= RVT_R_REUSE_SGE;
goto drop;
}
- if (has_grh) {
+ if (packet->grh) {
hfi1_copy_sge(&qp->r_sge, &hdr->u.l.grh,
sizeof(struct ib_grh), true, false);
wc.wc_flags |= IB_WC_GRH;
diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c
index a8f0aa4..6318e6c 100644
--- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c
+++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c
@@ -51,14 +51,6 @@
#include "trace.h"
#include "mmu_rb.h"
-struct tid_group {
- struct list_head list;
- u32 base;
- u8 size;
- u8 used;
- u8 map;
-};
-
struct tid_rb_node {
struct mmu_rb_node mmu;
unsigned long phys;
@@ -75,8 +67,6 @@ struct tid_pageset {
u16 count;
};
-#define EXP_TID_SET_EMPTY(set) (set.count == 0 && list_empty(&set.list))
-
#define num_user_pages(vaddr, len) \
(1 + (((((unsigned long)(vaddr) + \
(unsigned long)(len) - 1) & PAGE_MASK) - \
@@ -109,88 +99,6 @@ static struct mmu_rb_ops tid_rb_ops = {
.invalidate = tid_rb_invalidate
};
-static inline u32 rcventry2tidinfo(u32 rcventry)
-{
- u32 pair = rcventry & ~0x1;
-
- return EXP_TID_SET(IDX, pair >> 1) |
- EXP_TID_SET(CTRL, 1 << (rcventry - pair));
-}
-
-static inline void exp_tid_group_init(struct exp_tid_set *set)
-{
- INIT_LIST_HEAD(&set->list);
- set->count = 0;
-}
-
-static inline void tid_group_remove(struct tid_group *grp,
- struct exp_tid_set *set)
-{
- list_del_init(&grp->list);
- set->count--;
-}
-
-static inline void tid_group_add_tail(struct tid_group *grp,
- struct exp_tid_set *set)
-{
- list_add_tail(&grp->list, &set->list);
- set->count++;
-}
-
-static inline struct tid_group *tid_group_pop(struct exp_tid_set *set)
-{
- struct tid_group *grp =
- list_first_entry(&set->list, struct tid_group, list);
- list_del_init(&grp->list);
- set->count--;
- return grp;
-}
-
-static inline void tid_group_move(struct tid_group *group,
- struct exp_tid_set *s1,
- struct exp_tid_set *s2)
-{
- tid_group_remove(group, s1);
- tid_group_add_tail(group, s2);
-}
-
-int hfi1_user_exp_rcv_grp_init(struct hfi1_filedata *fd)
-{
- struct hfi1_ctxtdata *uctxt = fd->uctxt;
- struct hfi1_devdata *dd = fd->dd;
- u32 tidbase;
- u32 i;
- struct tid_group *grp, *gptr;
-
- exp_tid_group_init(&uctxt->tid_group_list);
- exp_tid_group_init(&uctxt->tid_used_list);
- exp_tid_group_init(&uctxt->tid_full_list);
-
- tidbase = uctxt->expected_base;
- for (i = 0; i < uctxt->expected_count /
- dd->rcv_entries.group_size; i++) {
- grp = kzalloc(sizeof(*grp), GFP_KERNEL);
- if (!grp)
- goto grp_failed;
-
- grp->size = dd->rcv_entries.group_size;
- grp->base = tidbase;
- tid_group_add_tail(grp, &uctxt->tid_group_list);
- tidbase += dd->rcv_entries.group_size;
- }
-
- return 0;
-
-grp_failed:
- list_for_each_entry_safe(grp, gptr, &uctxt->tid_group_list.list,
- list) {
- list_del_init(&grp->list);
- kfree(grp);
- }
-
- return -ENOMEM;
-}
-
/*
* Initialize context and file private data needed for Expected
* receive caching. This needs to be done after the context has
@@ -266,18 +174,6 @@ int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd)
return ret;
}
-void hfi1_user_exp_rcv_grp_free(struct hfi1_ctxtdata *uctxt)
-{
- struct tid_group *grp, *gptr;
-
- list_for_each_entry_safe(grp, gptr, &uctxt->tid_group_list.list,
- list) {
- list_del_init(&grp->list);
- kfree(grp);
- }
- hfi1_clear_tids(uctxt);
-}
-
void hfi1_user_exp_rcv_free(struct hfi1_filedata *fd)
{
struct hfi1_ctxtdata *uctxt = fd->uctxt;
@@ -303,23 +199,6 @@ void hfi1_user_exp_rcv_free(struct hfi1_filedata *fd)
}
/*
- * Write an "empty" RcvArray entry.
- * This function exists so the TID registaration code can use it
- * to write to unused/unneeded entries and still take advantage
- * of the WC performance improvements. The HFI will ignore this
- * write to the RcvArray entry.
- */
-static inline void rcv_array_wc_fill(struct hfi1_devdata *dd, u32 index)
-{
- /*
- * Doing the WC fill writes only makes sense if the device is
- * present and the RcvArray has been mapped as WC memory.
- */
- if ((dd->flags & HFI1_PRESENT) && dd->rcvarray_wc)
- writeq(0, dd->rcvarray_wc + (index * 8));
-}
-
-/*
* RcvArray entry allocation for Expected Receives is done by the
* following algorithm:
*
diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.h b/drivers/infiniband/hw/hfi1/user_exp_rcv.h
index 5250c89..1bdc61b 100644
--- a/drivers/infiniband/hw/hfi1/user_exp_rcv.h
+++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.h
@@ -49,29 +49,8 @@
#include "hfi.h"
-#define EXP_TID_TIDLEN_MASK 0x7FFULL
-#define EXP_TID_TIDLEN_SHIFT 0
-#define EXP_TID_TIDCTRL_MASK 0x3ULL
-#define EXP_TID_TIDCTRL_SHIFT 20
-#define EXP_TID_TIDIDX_MASK 0x3FFULL
-#define EXP_TID_TIDIDX_SHIFT 22
-#define EXP_TID_GET(tid, field) \
- (((tid) >> EXP_TID_TID##field##_SHIFT) & EXP_TID_TID##field##_MASK)
+#include "exp_rcv.h"
-#define EXP_TID_SET(field, value) \
- (((value) & EXP_TID_TID##field##_MASK) << \
- EXP_TID_TID##field##_SHIFT)
-#define EXP_TID_CLEAR(tid, field) ({ \
- (tid) &= ~(EXP_TID_TID##field##_MASK << \
- EXP_TID_TID##field##_SHIFT); \
- })
-#define EXP_TID_RESET(tid, field, value) do { \
- EXP_TID_CLEAR(tid, field); \
- (tid) |= EXP_TID_SET(field, (value)); \
- } while (0)
-
-void hfi1_user_exp_rcv_grp_free(struct hfi1_ctxtdata *uctxt);
-int hfi1_user_exp_rcv_grp_init(struct hfi1_filedata *fd);
int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd);
void hfi1_user_exp_rcv_free(struct hfi1_filedata *fd);
int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd,
diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c
index d55339f..8f7cfdd 100644
--- a/drivers/infiniband/hw/hfi1/user_sdma.c
+++ b/drivers/infiniband/hw/hfi1/user_sdma.c
@@ -94,43 +94,13 @@ MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 12
/* Number of BTH.PSN bits used for sequence number in expected rcvs */
#define BTH_SEQ_MASK 0x7ffull
-/*
- * Define fields in the KDETH header so we can update the header
- * template.
- */
-#define KDETH_OFFSET_SHIFT 0
-#define KDETH_OFFSET_MASK 0x7fff
-#define KDETH_OM_SHIFT 15
-#define KDETH_OM_MASK 0x1
-#define KDETH_TID_SHIFT 16
-#define KDETH_TID_MASK 0x3ff
-#define KDETH_TIDCTRL_SHIFT 26
-#define KDETH_TIDCTRL_MASK 0x3
-#define KDETH_INTR_SHIFT 28
-#define KDETH_INTR_MASK 0x1
-#define KDETH_SH_SHIFT 29
-#define KDETH_SH_MASK 0x1
-#define KDETH_HCRC_UPPER_SHIFT 16
-#define KDETH_HCRC_UPPER_MASK 0xff
-#define KDETH_HCRC_LOWER_SHIFT 24
-#define KDETH_HCRC_LOWER_MASK 0xff
-
#define AHG_KDETH_INTR_SHIFT 12
#define AHG_KDETH_SH_SHIFT 13
+#define AHG_KDETH_ARRAY_SIZE 9
#define PBC2LRH(x) ((((x) & 0xfff) << 2) - 4)
#define LRH2PBC(x) ((((x) >> 2) + 1) & 0xfff)
-#define KDETH_GET(val, field) \
- (((le32_to_cpu((val))) >> KDETH_##field##_SHIFT) & KDETH_##field##_MASK)
-#define KDETH_SET(dw, field, val) do { \
- u32 dwval = le32_to_cpu(dw); \
- dwval &= ~(KDETH_##field##_MASK << KDETH_##field##_SHIFT); \
- dwval |= (((val) & KDETH_##field##_MASK) << \
- KDETH_##field##_SHIFT); \
- dw = cpu_to_le32(dwval); \
- } while (0)
-
#define AHG_HEADER_SET(arr, idx, dw, bit, width, value) \
do { \
if ((idx) < ARRAY_SIZE((arr))) \
@@ -141,23 +111,10 @@ MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 12
return -ERANGE; \
} while (0)
-/* KDETH OM multipliers and switch over point */
-#define KDETH_OM_SMALL 4
-#define KDETH_OM_SMALL_SHIFT 2
-#define KDETH_OM_LARGE 64
-#define KDETH_OM_LARGE_SHIFT 6
-#define KDETH_OM_MAX_SIZE (1 << ((KDETH_OM_LARGE / KDETH_OM_SMALL) + 1))
-
/* Tx request flag bits */
#define TXREQ_FLAGS_REQ_ACK BIT(0) /* Set the ACK bit in the header */
#define TXREQ_FLAGS_REQ_DISABLE_SH BIT(1) /* Disable header suppression */
-/* SDMA request flag bits */
-#define SDMA_REQ_FOR_THREAD 1
-#define SDMA_REQ_SEND_DONE 2
-#define SDMA_REQ_HAS_ERROR 3
-#define SDMA_REQ_DONE_ERROR 4
-
#define SDMA_PKT_Q_INACTIVE BIT(0)
#define SDMA_PKT_Q_ACTIVE BIT(1)
#define SDMA_PKT_Q_DEFERRED BIT(2)
@@ -204,25 +161,41 @@ struct evict_data {
};
struct user_sdma_request {
- struct sdma_req_info info;
- struct hfi1_user_sdma_pkt_q *pq;
- struct hfi1_user_sdma_comp_q *cq;
/* This is the original header from user space */
struct hfi1_pkt_header hdr;
+
+ /* Read mostly fields */
+ struct hfi1_user_sdma_pkt_q *pq ____cacheline_aligned_in_smp;
+ struct hfi1_user_sdma_comp_q *cq;
/*
* Pointer to the SDMA engine for this request.
* Since different request could be on different VLs,
* each request will need it's own engine pointer.
*/
struct sdma_engine *sde;
- s8 ahg_idx;
- u32 ahg[9];
+ struct sdma_req_info info;
+ /* TID array values copied from the tid_iov vector */
+ u32 *tids;
+ /* total length of the data in the request */
+ u32 data_len;
+ /* number of elements copied to the tids array */
+ u16 n_tids;
/*
- * KDETH.Offset (Eager) field
- * We need to remember the initial value so the headers
- * can be updated properly.
+ * We copy the iovs for this request (based on
+ * info.iovcnt). These are only the data vectors
*/
- u32 koffset;
+ u8 data_iovs;
+ s8 ahg_idx;
+
+ /* Writeable fields shared with interrupt */
+ u64 seqcomp ____cacheline_aligned_in_smp;
+ u64 seqsubmitted;
+ /* status of the last txreq completed */
+ int status;
+
+ /* Send side fields */
+ struct list_head txps ____cacheline_aligned_in_smp;
+ u64 seqnum;
/*
* KDETH.OFFSET (TID) field
* The offset can cover multiple packets, depending on the
@@ -230,29 +203,21 @@ struct user_sdma_request {
*/
u32 tidoffset;
/*
- * We copy the iovs for this request (based on
- * info.iovcnt). These are only the data vectors
+ * KDETH.Offset (Eager) field
+ * We need to remember the initial value so the headers
+ * can be updated properly.
*/
- unsigned data_iovs;
- /* total length of the data in the request */
- u32 data_len;
- /* progress index moving along the iovs array */
- unsigned iov_idx;
- struct user_sdma_iovec iovs[MAX_VECTORS_PER_REQ];
- /* number of elements copied to the tids array */
- u16 n_tids;
- /* TID array values copied from the tid_iov vector */
- u32 *tids;
- u16 tididx;
+ u32 koffset;
u32 sent;
- u64 seqnum;
- u64 seqcomp;
- u64 seqsubmitted;
- struct list_head txps;
- unsigned long flags;
- /* status of the last txreq completed */
- int status;
-};
+ /* TID index copied from the tid_iov vector */
+ u16 tididx;
+ /* progress index moving along the iovs array */
+ u8 iov_idx;
+ u8 done;
+ u8 has_error;
+
+ struct user_sdma_iovec iovs[MAX_VECTORS_PER_REQ];
+} ____cacheline_aligned_in_smp;
/*
* A single txreq could span up to 3 physical pages when the MTU
@@ -607,12 +572,20 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
hfi1_cdbg(SDMA, "[%u:%u:%u] Using req/comp entry %u\n", dd->unit,
uctxt->ctxt, fd->subctxt, info.comp_idx);
req = pq->reqs + info.comp_idx;
- memset(req, 0, sizeof(*req));
req->data_iovs = req_iovcnt(info.ctrl) - 1; /* subtract header vector */
+ req->data_len = 0;
req->pq = pq;
req->cq = cq;
req->status = -1;
req->ahg_idx = -1;
+ req->iov_idx = 0;
+ req->sent = 0;
+ req->seqnum = 0;
+ req->seqcomp = 0;
+ req->seqsubmitted = 0;
+ req->tids = NULL;
+ req->done = 0;
+ req->has_error = 0;
INIT_LIST_HEAD(&req->txps);
memcpy(&req->info, &info, sizeof(info));
@@ -701,12 +674,14 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
/* Save all the IO vector structures */
for (i = 0; i < req->data_iovs; i++) {
+ req->iovs[i].offset = 0;
INIT_LIST_HEAD(&req->iovs[i].list);
memcpy(&req->iovs[i].iov,
iovec + idx++,
sizeof(req->iovs[i].iov));
ret = pin_vector_pages(req, &req->iovs[i]);
if (ret) {
+ req->data_iovs = i;
req->status = ret;
goto free_req;
}
@@ -749,6 +724,7 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
}
req->tids = tmp;
req->n_tids = ntids;
+ req->tididx = 0;
idx++;
}
@@ -791,12 +767,12 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
* request have been submitted to the SDMA engine. However, it
* will not wait for send completions.
*/
- while (!test_bit(SDMA_REQ_SEND_DONE, &req->flags)) {
+ while (req->seqsubmitted != req->info.npkts) {
ret = user_sdma_send_pkts(req, pcount);
if (ret < 0) {
if (ret != -EBUSY) {
req->status = ret;
- set_bit(SDMA_REQ_DONE_ERROR, &req->flags);
+ WRITE_ONCE(req->has_error, 1);
if (ACCESS_ONCE(req->seqcomp) ==
req->seqsubmitted - 1)
goto free_req;
@@ -898,10 +874,8 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
pq = req->pq;
/* If tx completion has reported an error, we are done. */
- if (test_bit(SDMA_REQ_HAS_ERROR, &req->flags)) {
- set_bit(SDMA_REQ_DONE_ERROR, &req->flags);
+ if (READ_ONCE(req->has_error))
return -EFAULT;
- }
/*
* Check if we might have sent the entire request already
@@ -924,10 +898,8 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
* with errors. If so, we are not going to process any
* more packets from this request.
*/
- if (test_bit(SDMA_REQ_HAS_ERROR, &req->flags)) {
- set_bit(SDMA_REQ_DONE_ERROR, &req->flags);
+ if (READ_ONCE(req->has_error))
return -EFAULT;
- }
tx = kmem_cache_alloc(pq->txreq_cache, GFP_KERNEL);
if (!tx)
@@ -1024,11 +996,6 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
datalen);
if (changes < 0)
goto free_tx;
- sdma_txinit_ahg(&tx->txreq,
- SDMA_TXREQ_F_USE_AHG,
- datalen, req->ahg_idx, changes,
- req->ahg, sizeof(req->hdr),
- user_sdma_txreq_cb);
}
} else {
ret = sdma_txinit(&tx->txreq, 0, sizeof(req->hdr) +
@@ -1105,7 +1072,7 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
ret = sdma_send_txlist(req->sde, &pq->busy, &req->txps, &count);
req->seqsubmitted += count;
if (req->seqsubmitted == req->info.npkts) {
- set_bit(SDMA_REQ_SEND_DONE, &req->flags);
+ WRITE_ONCE(req->done, 1);
/*
* The txreq has already been submitted to the HW queue
* so we can free the AHG entry now. Corruption will not
@@ -1155,14 +1122,23 @@ static int pin_vector_pages(struct user_sdma_request *req,
struct hfi1_user_sdma_pkt_q *pq = req->pq;
struct sdma_mmu_node *node = NULL;
struct mmu_rb_node *rb_node;
+ bool extracted;
- rb_node = hfi1_mmu_rb_extract(pq->handler,
- (unsigned long)iovec->iov.iov_base,
- iovec->iov.iov_len);
- if (rb_node)
+ extracted =
+ hfi1_mmu_rb_remove_unless_exact(pq->handler,
+ (unsigned long)
+ iovec->iov.iov_base,
+ iovec->iov.iov_len, &rb_node);
+ if (rb_node) {
node = container_of(rb_node, struct sdma_mmu_node, rb);
- else
- rb_node = NULL;
+ if (!extracted) {
+ atomic_inc(&node->refcount);
+ iovec->pages = node->pages;
+ iovec->npages = node->npages;
+ iovec->node = node;
+ return 0;
+ }
+ }
if (!node) {
node = kzalloc(sizeof(*node), GFP_KERNEL);
@@ -1423,21 +1399,22 @@ static int set_txreq_header(struct user_sdma_request *req,
}
static int set_txreq_header_ahg(struct user_sdma_request *req,
- struct user_sdma_txreq *tx, u32 len)
+ struct user_sdma_txreq *tx, u32 datalen)
{
+ u32 ahg[AHG_KDETH_ARRAY_SIZE];
int diff = 0;
u8 omfactor; /* KDETH.OM */
struct hfi1_user_sdma_pkt_q *pq = req->pq;
struct hfi1_pkt_header *hdr = &req->hdr;
u16 pbclen = le16_to_cpu(hdr->pbc[0]);
- u32 val32, tidval = 0, lrhlen = get_lrh_len(*hdr, pad_len(len));
+ u32 val32, tidval = 0, lrhlen = get_lrh_len(*hdr, pad_len(datalen));
if (PBC2LRH(pbclen) != lrhlen) {
/* PBC.PbcLengthDWs */
- AHG_HEADER_SET(req->ahg, diff, 0, 0, 12,
+ AHG_HEADER_SET(ahg, diff, 0, 0, 12,
cpu_to_le16(LRH2PBC(lrhlen)));
/* LRH.PktLen (we need the full 16 bits due to byte swap) */
- AHG_HEADER_SET(req->ahg, diff, 3, 0, 16,
+ AHG_HEADER_SET(ahg, diff, 3, 0, 16,
cpu_to_be16(lrhlen >> 2));
}
@@ -1449,13 +1426,12 @@ static int set_txreq_header_ahg(struct user_sdma_request *req,
(HFI1_CAP_IS_KSET(EXTENDED_PSN) ? 0x7fffffff : 0xffffff);
if (unlikely(tx->flags & TXREQ_FLAGS_REQ_ACK))
val32 |= 1UL << 31;
- AHG_HEADER_SET(req->ahg, diff, 6, 0, 16, cpu_to_be16(val32 >> 16));
- AHG_HEADER_SET(req->ahg, diff, 6, 16, 16, cpu_to_be16(val32 & 0xffff));
+ AHG_HEADER_SET(ahg, diff, 6, 0, 16, cpu_to_be16(val32 >> 16));
+ AHG_HEADER_SET(ahg, diff, 6, 16, 16, cpu_to_be16(val32 & 0xffff));
/* KDETH.Offset */
- AHG_HEADER_SET(req->ahg, diff, 15, 0, 16,
+ AHG_HEADER_SET(ahg, diff, 15, 0, 16,
cpu_to_le16(req->koffset & 0xffff));
- AHG_HEADER_SET(req->ahg, diff, 15, 16, 16,
- cpu_to_le16(req->koffset >> 16));
+ AHG_HEADER_SET(ahg, diff, 15, 16, 16, cpu_to_le16(req->koffset >> 16));
if (req_opcode(req->info.ctrl) == EXPECTED) {
__le16 val;
@@ -1473,9 +1449,8 @@ static int set_txreq_header_ahg(struct user_sdma_request *req,
* we have to check again.
*/
if (++req->tididx > req->n_tids - 1 ||
- !req->tids[req->tididx]) {
+ !req->tids[req->tididx])
return -EINVAL;
- }
tidval = req->tids[req->tididx];
}
omfactor = ((EXP_TID_GET(tidval, LEN) *
@@ -1483,7 +1458,7 @@ static int set_txreq_header_ahg(struct user_sdma_request *req,
KDETH_OM_MAX_SIZE) ? KDETH_OM_LARGE_SHIFT :
KDETH_OM_SMALL_SHIFT;
/* KDETH.OM and KDETH.OFFSET (TID) */
- AHG_HEADER_SET(req->ahg, diff, 7, 0, 16,
+ AHG_HEADER_SET(ahg, diff, 7, 0, 16,
((!!(omfactor - KDETH_OM_SMALL_SHIFT)) << 15 |
((req->tidoffset >> omfactor)
& 0x7fff)));
@@ -1503,12 +1478,20 @@ static int set_txreq_header_ahg(struct user_sdma_request *req,
AHG_KDETH_INTR_SHIFT));
}
- AHG_HEADER_SET(req->ahg, diff, 7, 16, 14, val);
+ AHG_HEADER_SET(ahg, diff, 7, 16, 14, val);
}
+ if (diff < 0)
+ return diff;
trace_hfi1_sdma_user_header_ahg(pq->dd, pq->ctxt, pq->subctxt,
req->info.comp_idx, req->sde->this_idx,
- req->ahg_idx, req->ahg, diff, tidval);
+ req->ahg_idx, ahg, diff, tidval);
+ sdma_txinit_ahg(&tx->txreq,
+ SDMA_TXREQ_F_USE_AHG,
+ datalen, req->ahg_idx, diff,
+ ahg, sizeof(req->hdr),
+ user_sdma_txreq_cb);
+
return diff;
}
@@ -1537,7 +1520,7 @@ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status)
if (status != SDMA_TXREQ_S_OK) {
SDMA_DBG(req, "SDMA completion with error %d",
status);
- set_bit(SDMA_REQ_HAS_ERROR, &req->flags);
+ WRITE_ONCE(req->has_error, 1);
}
req->seqcomp = tx->seqnum;
@@ -1556,8 +1539,8 @@ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status)
if (status != SDMA_TXREQ_S_OK)
req->status = status;
if (req->seqcomp == (ACCESS_ONCE(req->seqsubmitted) - 1) &&
- (test_bit(SDMA_REQ_SEND_DONE, &req->flags) ||
- test_bit(SDMA_REQ_DONE_ERROR, &req->flags))) {
+ (READ_ONCE(req->done) ||
+ READ_ONCE(req->has_error))) {
user_sdma_free_request(req, false);
pq_update(pq);
set_comp_state(pq, cq, idx, ERROR, req->status);
diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c
index 2d19f9b..9c9ded6 100644
--- a/drivers/infiniband/hw/hfi1/verbs.c
+++ b/drivers/infiniband/hw/hfi1/verbs.c
@@ -508,13 +508,14 @@ void hfi1_copy_sge(
/*
* Make sure the QP is ready and able to accept the given opcode.
*/
-static inline opcode_handler qp_ok(int opcode, struct hfi1_packet *packet)
+static inline opcode_handler qp_ok(struct hfi1_packet *packet)
{
if (!(ib_rvt_state_ops[packet->qp->state] & RVT_PROCESS_RECV_OK))
return NULL;
- if (((opcode & RVT_OPCODE_QP_MASK) == packet->qp->allowed_ops) ||
- (opcode == IB_OPCODE_CNP))
- return opcode_handler_tbl[opcode];
+ if (((packet->opcode & RVT_OPCODE_QP_MASK) ==
+ packet->qp->allowed_ops) ||
+ (packet->opcode == IB_OPCODE_CNP))
+ return opcode_handler_tbl[packet->opcode];
return NULL;
}
@@ -548,69 +549,34 @@ static u64 hfi1_fault_tx(struct rvt_qp *qp, u8 opcode, u64 pbc)
return pbc;
}
-/**
- * hfi1_ib_rcv - process an incoming packet
- * @packet: data packet information
- *
- * This is called to process an incoming packet at interrupt level.
- *
- * Tlen is the length of the header + data + CRC in bytes.
- */
-void hfi1_ib_rcv(struct hfi1_packet *packet)
+static inline void hfi1_handle_packet(struct hfi1_packet *packet,
+ bool is_mcast)
{
+ u32 qp_num;
struct hfi1_ctxtdata *rcd = packet->rcd;
- struct ib_header *hdr = packet->hdr;
- u32 tlen = packet->tlen;
struct hfi1_pportdata *ppd = rcd->ppd;
struct hfi1_ibport *ibp = rcd_to_iport(rcd);
struct rvt_dev_info *rdi = &ppd->dd->verbs_dev.rdi;
opcode_handler packet_handler;
unsigned long flags;
- u32 qp_num;
- int lnh;
- u8 opcode;
- u16 lid;
- /* Check for GRH */
- lnh = ib_get_lnh(hdr);
- if (lnh == HFI1_LRH_BTH) {
- packet->ohdr = &hdr->u.oth;
- } else if (lnh == HFI1_LRH_GRH) {
- u32 vtf;
+ inc_opstats(packet->tlen, &rcd->opstats->stats[packet->opcode]);
- packet->ohdr = &hdr->u.l.oth;
- if (hdr->u.l.grh.next_hdr != IB_GRH_NEXT_HDR)
- goto drop;
- vtf = be32_to_cpu(hdr->u.l.grh.version_tclass_flow);
- if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION)
- goto drop;
- packet->rcv_flags |= HFI1_HAS_GRH;
- } else {
- goto drop;
- }
-
- trace_input_ibhdr(rcd->dd, hdr);
-
- opcode = ib_bth_get_opcode(packet->ohdr);
- inc_opstats(tlen, &rcd->opstats->stats[opcode]);
-
- /* Get the destination QP number. */
- qp_num = be32_to_cpu(packet->ohdr->bth[1]) & RVT_QPN_MASK;
- lid = ib_get_dlid(hdr);
- if (unlikely((lid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) &&
- (lid != be16_to_cpu(IB_LID_PERMISSIVE)))) {
+ if (unlikely(is_mcast)) {
struct rvt_mcast *mcast;
struct rvt_mcast_qp *p;
- if (lnh != HFI1_LRH_GRH)
+ if (!packet->grh)
goto drop;
- mcast = rvt_mcast_find(&ibp->rvp, &hdr->u.l.grh.dgid, lid);
+ mcast = rvt_mcast_find(&ibp->rvp,
+ &packet->grh->dgid,
+ packet->dlid);
if (!mcast)
goto drop;
list_for_each_entry_rcu(p, &mcast->qp_list, list) {
packet->qp = p->qp;
spin_lock_irqsave(&packet->qp->r_lock, flags);
- packet_handler = qp_ok(opcode, packet);
+ packet_handler = qp_ok(packet);
if (likely(packet_handler))
packet_handler(packet);
else
@@ -624,19 +590,21 @@ void hfi1_ib_rcv(struct hfi1_packet *packet)
if (atomic_dec_return(&mcast->refcount) <= 1)
wake_up(&mcast->wait);
} else {
+ /* Get the destination QP number. */
+ qp_num = ib_bth_get_qpn(packet->ohdr);
rcu_read_lock();
packet->qp = rvt_lookup_qpn(rdi, &ibp->rvp, qp_num);
if (!packet->qp) {
rcu_read_unlock();
goto drop;
}
- if (unlikely(hfi1_dbg_fault_opcode(packet->qp, opcode,
+ if (unlikely(hfi1_dbg_fault_opcode(packet->qp, packet->opcode,
true))) {
rcu_read_unlock();
goto drop;
}
spin_lock_irqsave(&packet->qp->r_lock, flags);
- packet_handler = qp_ok(opcode, packet);
+ packet_handler = qp_ok(packet);
if (likely(packet_handler))
packet_handler(packet);
else
@@ -645,11 +613,29 @@ void hfi1_ib_rcv(struct hfi1_packet *packet)
rcu_read_unlock();
}
return;
-
drop:
ibp->rvp.n_pkt_drops++;
}
+/**
+ * hfi1_ib_rcv - process an incoming packet
+ * @packet: data packet information
+ *
+ * This is called to process an incoming packet at interrupt level.
+ */
+void hfi1_ib_rcv(struct hfi1_packet *packet)
+{
+ struct hfi1_ctxtdata *rcd = packet->rcd;
+ bool is_mcast = false;
+
+ if (unlikely(hfi1_check_mcast(packet->dlid)))
+ is_mcast = true;
+
+ trace_input_ibhdr(rcd->dd, packet,
+ !!(packet->rhf & RHF_DC_INFO_SMASK));
+ hfi1_handle_packet(packet, is_mcast);
+}
+
/*
* This is called from a timer to check for QPs
* which need kernel memory in order to send a packet.
@@ -863,7 +849,7 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
/* No vl15 here */
/* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */
- pbc |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT;
+ pbc |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT);
if (unlikely(hfi1_dbg_fault_opcode(qp, opcode, false)))
pbc = hfi1_fault_tx(qp, opcode, pbc);
@@ -885,7 +871,7 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
return ret;
}
trace_sdma_output_ibhdr(dd_from_ibdev(qp->ibqp.device),
- &ps->s_txreq->phdr.hdr);
+ &ps->s_txreq->phdr.hdr, ib_is_sc5(sc5));
return ret;
bail_ecomm:
@@ -999,7 +985,7 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
u8 opcode = get_opcode(&tx->phdr.hdr);
/* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */
- pbc |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT;
+ pbc |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT);
if (unlikely(hfi1_dbg_fault_opcode(qp, opcode, false)))
pbc = hfi1_fault_tx(qp, opcode, pbc);
pbc = create_pbc(ppd, pbc, qp->srate_mbps, vl, plen);
@@ -1058,7 +1044,7 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
}
trace_pio_output_ibhdr(dd_from_ibdev(qp->ibqp.device),
- &ps->s_txreq->phdr.hdr);
+ &ps->s_txreq->phdr.hdr, ib_is_sc5(sc5));
pio_bail:
if (qp->s_wqe) {
@@ -1368,7 +1354,7 @@ static int query_port(struct rvt_dev_info *rdi, u8 port_num,
props->lmc = ppd->lmc;
/* OPA logical states match IB logical states */
props->state = driver_lstate(ppd);
- props->phys_state = hfi1_ibphys_portstate(ppd);
+ props->phys_state = driver_pstate(ppd);
props->gid_tbl_len = HFI1_GUIDS_PER_PORT;
props->active_width = (u8)opa_width_to_ib(ppd->link_width_active);
/* see rate_show() in ib core/sysfs.c */
@@ -1551,9 +1537,13 @@ static void init_ibport(struct hfi1_pportdata *ppd)
/* Set the prefix to the default value (see ch. 4.1.1) */
ibp->rvp.gid_prefix = IB_DEFAULT_GID_PREFIX;
ibp->rvp.sm_lid = 0;
- /* Below should only set bits defined in OPA PortInfo.CapabilityMask */
+ /*
+ * Below should only set bits defined in OPA PortInfo.CapabilityMask
+ * and PortInfo.CapabilityMask3
+ */
ibp->rvp.port_cap_flags = IB_PORT_AUTO_MIGR_SUP |
IB_PORT_CAP_MASK_NOTICE_SUP;
+ ibp->rvp.port_cap3_flags = OPA_CAP_MASK3_IsSharedSpaceSupported;
ibp->rvp.pma_counter_select[0] = IB_PMA_PORT_XMIT_DATA;
ibp->rvp.pma_counter_select[1] = IB_PMA_PORT_RCV_DATA;
ibp->rvp.pma_counter_select[2] = IB_PMA_PORT_XMIT_PKTS;
diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h
index cd635d0..fdf1e1f 100644
--- a/drivers/infiniband/hw/hfi1/verbs.h
+++ b/drivers/infiniband/hw/hfi1/verbs.h
@@ -236,8 +236,8 @@ static inline int hfi1_send_ok(struct rvt_qp *qp)
/*
* This must be called with s_lock held.
*/
-void hfi1_bad_pqkey(struct hfi1_ibport *ibp, __be16 trap_num, u32 key, u32 sl,
- u32 qp1, u32 qp2, u16 lid1, u16 lid2);
+void hfi1_bad_pkey(struct hfi1_ibport *ibp, u32 key, u32 sl,
+ u32 qp1, u32 qp2, u16 lid1, u16 lid2);
void hfi1_cap_mask_chg(struct rvt_dev_info *rdi, u8 port_num);
void hfi1_sys_guid_chg(struct hfi1_ibport *ibp);
void hfi1_node_desc_chg(struct hfi1_ibport *ibp);
@@ -307,8 +307,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet);
void hfi1_rc_hdrerr(
struct hfi1_ctxtdata *rcd,
- struct ib_header *hdr,
- u32 rcv_flags,
+ struct hfi1_packet *packet,
struct rvt_qp *qp);
u8 ah_to_sc(struct ib_device *ibdev, struct rdma_ah_attr *ah_attr);
@@ -346,8 +345,7 @@ static inline u8 get_opcode(struct ib_header *h)
return be32_to_cpu(h->u.l.oth.bth[0]) >> 24;
}
-int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct ib_header *hdr,
- int has_grh, struct rvt_qp *qp, u32 bth0);
+int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_packet *packet);
u32 hfi1_make_grh(struct hfi1_ibport *ibp, struct ib_grh *hdr,
const struct ib_global_route *grh, u32 hwords, u32 nwords);
diff --git a/drivers/infiniband/hw/hfi1/vnic_main.c b/drivers/infiniband/hw/hfi1/vnic_main.c
index 339f0cd..5a3f80b 100644
--- a/drivers/infiniband/hw/hfi1/vnic_main.c
+++ b/drivers/infiniband/hw/hfi1/vnic_main.c
@@ -156,11 +156,11 @@ static int allocate_vnic_ctxt(struct hfi1_devdata *dd,
return ret;
bail:
/*
- * hfi1_free_ctxtdata() also releases send_context
- * structure if uctxt->sc is not null
+ * hfi1_rcd_put() will call hfi1_free_ctxtdata(), which will
+ * release send_context structure if uctxt->sc is not null
*/
dd->rcd[uctxt->ctxt] = NULL;
- hfi1_free_ctxtdata(dd, uctxt);
+ hfi1_rcd_put(uctxt);
dd_dev_dbg(dd, "vnic allocation failed. rc %d\n", ret);
return ret;
}
@@ -208,7 +208,7 @@ static void deallocate_vnic_ctxt(struct hfi1_devdata *dd,
hfi1_clear_ctxt_pkey(dd, uctxt);
hfi1_stats.sps_ctxts--;
- hfi1_free_ctxtdata(dd, uctxt);
+ hfi1_rcd_put(uctxt);
}
void hfi1_vnic_setup(struct hfi1_devdata *dd)
@@ -751,6 +751,7 @@ static int hfi1_vnic_init(struct hfi1_vnic_vport_info *vinfo)
rc = hfi1_vnic_allot_ctxt(dd, &dd->vnic.ctxt[i]);
if (rc)
break;
+ hfi1_rcd_get(dd->vnic.ctxt[i]);
dd->vnic.ctxt[i]->vnic_q_idx = i;
}
@@ -762,6 +763,7 @@ static int hfi1_vnic_init(struct hfi1_vnic_vport_info *vinfo)
*/
while (i-- > dd->vnic.num_ctxt) {
deallocate_vnic_ctxt(dd, dd->vnic.ctxt[i]);
+ hfi1_rcd_put(dd->vnic.ctxt[i]);
dd->vnic.ctxt[i] = NULL;
}
goto alloc_fail;
@@ -791,6 +793,7 @@ static void hfi1_vnic_deinit(struct hfi1_vnic_vport_info *vinfo)
if (--dd->vnic.num_vports == 0) {
for (i = 0; i < dd->vnic.num_ctxt; i++) {
deallocate_vnic_ctxt(dd, dd->vnic.ctxt[i]);
+ hfi1_rcd_put(dd->vnic.ctxt[i]);
dd->vnic.ctxt[i] = NULL;
}
hfi1_deinit_vnic_rsm(dd);
diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index ff931c5..95382fa 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -218,6 +218,7 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev,
goto err_mtt;
uar = &to_mucontext(context)->uar;
+ cq->mcq.usage = MLX4_RES_USAGE_USER_VERBS;
} else {
err = mlx4_db_alloc(dev->dev, &cq->db, 1);
if (err)
@@ -233,6 +234,7 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev,
goto err_db;
uar = &dev->priv_uar;
+ cq->mcq.usage = MLX4_RES_USAGE_DRIVER;
}
if (dev->eq_table)
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index a422df7..1f25a37 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -80,6 +80,8 @@ static const char mlx4_ib_version[] =
DRV_VERSION "\n";
static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init);
+static enum rdma_link_layer mlx4_ib_port_link_layer(struct ib_device *device,
+ u8 port_num);
static struct workqueue_struct *wq;
@@ -551,6 +553,16 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
props->timestamp_mask = 0xFFFFFFFFFFFFULL;
props->max_ah = INT_MAX;
+ if ((dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS) &&
+ (mlx4_ib_port_link_layer(ibdev, 1) == IB_LINK_LAYER_ETHERNET ||
+ mlx4_ib_port_link_layer(ibdev, 2) == IB_LINK_LAYER_ETHERNET)) {
+ props->rss_caps.max_rwq_indirection_tables = props->max_qp;
+ props->rss_caps.max_rwq_indirection_table_size =
+ dev->dev->caps.max_rss_tbl_sz;
+ props->rss_caps.supported_qpts = 1 << IB_QPT_RAW_PACKET;
+ props->max_wq_type_rq = props->max_qp;
+ }
+
if (!mlx4_is_slave(dev->dev))
err = mlx4_get_internal_clock_params(dev->dev, &clock_params);
@@ -562,6 +574,13 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
}
}
+ if (uhw->outlen >= resp.response_length +
+ sizeof(resp.max_inl_recv_sz)) {
+ resp.response_length += sizeof(resp.max_inl_recv_sz);
+ resp.max_inl_recv_sz = dev->dev->caps.max_rq_sg *
+ sizeof(struct mlx4_wqe_data_seg);
+ }
+
if (uhw->outlen) {
err = ib_copy_to_udata(uhw, &resp, resp.response_length);
if (err)
@@ -1068,6 +1087,9 @@ static struct ib_ucontext *mlx4_ib_alloc_ucontext(struct ib_device *ibdev,
INIT_LIST_HEAD(&context->db_page_list);
mutex_init(&context->db_page_mutex);
+ INIT_LIST_HEAD(&context->wqn_ranges_list);
+ mutex_init(&context->wqn_ranges_mutex);
+
if (ibdev->uverbs_abi_ver == MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION)
err = ib_copy_to_udata(udata, &resp_v3, sizeof(resp_v3));
else
@@ -2712,6 +2734,26 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
ibdev->ib_dev.get_dev_fw_str = get_fw_ver_str;
ibdev->ib_dev.disassociate_ucontext = mlx4_ib_disassociate_ucontext;
+ if ((dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS) &&
+ ((mlx4_ib_port_link_layer(&ibdev->ib_dev, 1) ==
+ IB_LINK_LAYER_ETHERNET) ||
+ (mlx4_ib_port_link_layer(&ibdev->ib_dev, 2) ==
+ IB_LINK_LAYER_ETHERNET))) {
+ ibdev->ib_dev.create_wq = mlx4_ib_create_wq;
+ ibdev->ib_dev.modify_wq = mlx4_ib_modify_wq;
+ ibdev->ib_dev.destroy_wq = mlx4_ib_destroy_wq;
+ ibdev->ib_dev.create_rwq_ind_table =
+ mlx4_ib_create_rwq_ind_table;
+ ibdev->ib_dev.destroy_rwq_ind_table =
+ mlx4_ib_destroy_rwq_ind_table;
+ ibdev->ib_dev.uverbs_ex_cmd_mask |=
+ (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) |
+ (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) |
+ (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) |
+ (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) |
+ (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL);
+ }
+
if (!mlx4_is_slave(ibdev->dev)) {
ibdev->ib_dev.alloc_fmr = mlx4_ib_fmr_alloc;
ibdev->ib_dev.map_phys_fmr = mlx4_ib_map_phys_fmr;
@@ -2771,7 +2813,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
allocated = 0;
if (mlx4_ib_port_link_layer(&ibdev->ib_dev, i + 1) ==
IB_LINK_LAYER_ETHERNET) {
- err = mlx4_counter_alloc(ibdev->dev, &counter_index);
+ err = mlx4_counter_alloc(ibdev->dev, &counter_index,
+ MLX4_RES_USAGE_DRIVER);
/* if failed to allocate a new counter, use default */
if (err)
counter_index =
@@ -2826,7 +2869,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
ibdev->steer_qpn_count = MLX4_IB_UC_MAX_NUM_QPS;
err = mlx4_qp_reserve_range(dev, ibdev->steer_qpn_count,
MLX4_IB_UC_STEER_QPN_ALIGN,
- &ibdev->steer_qpn_base, 0);
+ &ibdev->steer_qpn_base, 0,
+ MLX4_RES_USAGE_DRIVER);
if (err)
goto err_counter;
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index 9db82e6..1fa1982 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -46,6 +46,7 @@
#include <linux/mlx4/device.h>
#include <linux/mlx4/doorbell.h>
+#include <linux/mlx4/qp.h>
#define MLX4_IB_DRV_NAME "mlx4_ib"
@@ -88,6 +89,8 @@ struct mlx4_ib_ucontext {
struct list_head db_page_list;
struct mutex db_page_mutex;
struct mlx4_ib_vma_private_data hw_bar_info[HW_BAR_COUNT];
+ struct list_head wqn_ranges_list;
+ struct mutex wqn_ranges_mutex; /* protect wqn_ranges_list */
};
struct mlx4_ib_pd {
@@ -289,8 +292,25 @@ struct mlx4_roce_smac_vlan_info {
int update_vid;
};
+struct mlx4_wqn_range {
+ int base_wqn;
+ int size;
+ int refcount;
+ bool dirty;
+ struct list_head list;
+};
+
+struct mlx4_ib_rss {
+ unsigned int base_qpn_tbl_sz;
+ u8 flags;
+ u8 rss_key[MLX4_EN_RSS_KEY_SIZE];
+};
+
struct mlx4_ib_qp {
- struct ib_qp ibqp;
+ union {
+ struct ib_qp ibqp;
+ struct ib_wq ibwq;
+ };
struct mlx4_qp mqp;
struct mlx4_buf buf;
@@ -318,6 +338,7 @@ struct mlx4_ib_qp {
u8 sq_no_prefetch;
u8 state;
int mlx_type;
+ u32 inl_recv_sz;
struct list_head gid_list;
struct list_head steering_rules;
struct mlx4_ib_buf *sqp_proxy_rcv;
@@ -328,6 +349,10 @@ struct mlx4_ib_qp {
struct list_head cq_recv_list;
struct list_head cq_send_list;
struct counter_index *counter_index;
+ struct mlx4_wqn_range *wqn_range;
+ /* Number of RSS QP parents that uses this WQ */
+ u32 rss_usecnt;
+ struct mlx4_ib_rss *rss_ctx;
};
struct mlx4_ib_srq {
@@ -623,6 +648,8 @@ struct mlx4_uverbs_ex_query_device_resp {
__u32 comp_mask;
__u32 response_length;
__u64 hca_core_clock_offset;
+ __u32 max_inl_recv_sz;
+ __u32 reserved;
};
static inline struct mlx4_ib_dev *to_mdev(struct ib_device *ibdev)
@@ -890,4 +917,17 @@ void mlx4_sched_ib_sl2vl_update_work(struct mlx4_ib_dev *ibdev,
void mlx4_ib_sl2vl_update(struct mlx4_ib_dev *mdev, int port);
+struct ib_wq *mlx4_ib_create_wq(struct ib_pd *pd,
+ struct ib_wq_init_attr *init_attr,
+ struct ib_udata *udata);
+int mlx4_ib_destroy_wq(struct ib_wq *wq);
+int mlx4_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
+ u32 wq_attr_mask, struct ib_udata *udata);
+
+struct ib_rwq_ind_table
+*mlx4_ib_create_rwq_ind_table(struct ib_device *device,
+ struct ib_rwq_ind_table_init_attr *init_attr,
+ struct ib_udata *udata);
+int mlx4_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table);
+
#endif /* MLX4_IB_H */
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 75c0e6c..e42acfb 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -53,6 +53,7 @@ static void mlx4_ib_lock_cqs(struct mlx4_ib_cq *send_cq,
struct mlx4_ib_cq *recv_cq);
static void mlx4_ib_unlock_cqs(struct mlx4_ib_cq *send_cq,
struct mlx4_ib_cq *recv_cq);
+static int _mlx4_ib_modify_wq(struct ib_wq *ibwq, enum ib_wq_state new_state);
enum {
MLX4_IB_ACK_REQ_FREQ = 8,
@@ -116,6 +117,11 @@ static const __be32 mlx4_ib_opcode[] = {
[IB_WR_MASKED_ATOMIC_FETCH_AND_ADD] = cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_FA),
};
+enum mlx4_ib_source_type {
+ MLX4_IB_QP_SRC = 0,
+ MLX4_IB_RWQ_SRC = 1,
+};
+
static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp)
{
return container_of(mqp, struct mlx4_ib_sqp, qp);
@@ -330,6 +336,12 @@ static void mlx4_ib_qp_event(struct mlx4_qp *qp, enum mlx4_event type)
}
}
+static void mlx4_ib_wq_event(struct mlx4_qp *qp, enum mlx4_event type)
+{
+ pr_warn_ratelimited("Unexpected event type %d on WQ 0x%06x. Events are not supported for WQs\n",
+ type, qp->qpn);
+}
+
static int send_wqe_overhead(enum mlx4_ib_qp_type type, u32 flags)
{
/*
@@ -377,7 +389,8 @@ static int send_wqe_overhead(enum mlx4_ib_qp_type type, u32 flags)
}
static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
- int is_user, int has_rq, struct mlx4_ib_qp *qp)
+ int is_user, int has_rq, struct mlx4_ib_qp *qp,
+ u32 inl_recv_sz)
{
/* Sanity check RQ size before proceeding */
if (cap->max_recv_wr > dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE ||
@@ -385,18 +398,24 @@ static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
return -EINVAL;
if (!has_rq) {
- if (cap->max_recv_wr)
+ if (cap->max_recv_wr || inl_recv_sz)
return -EINVAL;
qp->rq.wqe_cnt = qp->rq.max_gs = 0;
} else {
+ u32 max_inl_recv_sz = dev->dev->caps.max_rq_sg *
+ sizeof(struct mlx4_wqe_data_seg);
+ u32 wqe_size;
+
/* HW requires >= 1 RQ entry with >= 1 gather entry */
- if (is_user && (!cap->max_recv_wr || !cap->max_recv_sge))
+ if (is_user && (!cap->max_recv_wr || !cap->max_recv_sge ||
+ inl_recv_sz > max_inl_recv_sz))
return -EINVAL;
qp->rq.wqe_cnt = roundup_pow_of_two(max(1U, cap->max_recv_wr));
qp->rq.max_gs = roundup_pow_of_two(max(1U, cap->max_recv_sge));
- qp->rq.wqe_shift = ilog2(qp->rq.max_gs * sizeof (struct mlx4_wqe_data_seg));
+ wqe_size = qp->rq.max_gs * sizeof(struct mlx4_wqe_data_seg);
+ qp->rq.wqe_shift = ilog2(max_t(u32, wqe_size, inl_recv_sz));
}
/* leave userspace return values as they were, so as not to break ABI */
@@ -632,7 +651,297 @@ static void mlx4_ib_free_qp_counter(struct mlx4_ib_dev *dev,
qp->counter_index = NULL;
}
+static int set_qp_rss(struct mlx4_ib_dev *dev, struct mlx4_ib_rss *rss_ctx,
+ struct ib_qp_init_attr *init_attr,
+ struct mlx4_ib_create_qp_rss *ucmd)
+{
+ rss_ctx->base_qpn_tbl_sz = init_attr->rwq_ind_tbl->ind_tbl[0]->wq_num |
+ (init_attr->rwq_ind_tbl->log_ind_tbl_size << 24);
+
+ if ((ucmd->rx_hash_function == MLX4_IB_RX_HASH_FUNC_TOEPLITZ) &&
+ (dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS_TOP)) {
+ memcpy(rss_ctx->rss_key, ucmd->rx_hash_key,
+ MLX4_EN_RSS_KEY_SIZE);
+ } else {
+ pr_debug("RX Hash function is not supported\n");
+ return (-EOPNOTSUPP);
+ }
+
+ if ((ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_SRC_IPV4) &&
+ (ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_DST_IPV4)) {
+ rss_ctx->flags = MLX4_RSS_IPV4;
+ } else if ((ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_SRC_IPV4) ||
+ (ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_DST_IPV4)) {
+ pr_debug("RX Hash fields_mask is not supported - both IPv4 SRC and DST must be set\n");
+ return (-EOPNOTSUPP);
+ }
+
+ if ((ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_SRC_IPV6) &&
+ (ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_DST_IPV6)) {
+ rss_ctx->flags |= MLX4_RSS_IPV6;
+ } else if ((ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_SRC_IPV6) ||
+ (ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_DST_IPV6)) {
+ pr_debug("RX Hash fields_mask is not supported - both IPv6 SRC and DST must be set\n");
+ return (-EOPNOTSUPP);
+ }
+
+ if ((ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_SRC_PORT_UDP) &&
+ (ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_DST_PORT_UDP)) {
+ if (!(dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_UDP_RSS)) {
+ pr_debug("RX Hash fields_mask for UDP is not supported\n");
+ return (-EOPNOTSUPP);
+ }
+
+ if (rss_ctx->flags & MLX4_RSS_IPV4) {
+ rss_ctx->flags |= MLX4_RSS_UDP_IPV4;
+ } else if (rss_ctx->flags & MLX4_RSS_IPV6) {
+ rss_ctx->flags |= MLX4_RSS_UDP_IPV6;
+ } else {
+ pr_debug("RX Hash fields_mask is not supported - UDP must be set with IPv4 or IPv6\n");
+ return (-EOPNOTSUPP);
+ }
+ } else if ((ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_SRC_PORT_UDP) ||
+ (ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_DST_PORT_UDP)) {
+ pr_debug("RX Hash fields_mask is not supported - both UDP SRC and DST must be set\n");
+ return (-EOPNOTSUPP);
+ }
+
+ if ((ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_SRC_PORT_TCP) &&
+ (ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_DST_PORT_TCP)) {
+ if (rss_ctx->flags & MLX4_RSS_IPV4) {
+ rss_ctx->flags |= MLX4_RSS_TCP_IPV4;
+ } else if (rss_ctx->flags & MLX4_RSS_IPV6) {
+ rss_ctx->flags |= MLX4_RSS_TCP_IPV6;
+ } else {
+ pr_debug("RX Hash fields_mask is not supported - TCP must be set with IPv4 or IPv6\n");
+ return (-EOPNOTSUPP);
+ }
+
+ } else if ((ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_SRC_PORT_TCP) ||
+ (ucmd->rx_hash_fields_mask & MLX4_IB_RX_HASH_DST_PORT_TCP)) {
+ pr_debug("RX Hash fields_mask is not supported - both TCP SRC and DST must be set\n");
+ return (-EOPNOTSUPP);
+ }
+
+ return 0;
+}
+
+static int create_qp_rss(struct mlx4_ib_dev *dev, struct ib_pd *ibpd,
+ struct ib_qp_init_attr *init_attr,
+ struct mlx4_ib_create_qp_rss *ucmd,
+ struct mlx4_ib_qp *qp)
+{
+ int qpn;
+ int err;
+
+ qp->mqp.usage = MLX4_RES_USAGE_USER_VERBS;
+
+ err = mlx4_qp_reserve_range(dev->dev, 1, 1, &qpn, 0, qp->mqp.usage);
+ if (err)
+ return err;
+
+ err = mlx4_qp_alloc(dev->dev, qpn, &qp->mqp);
+ if (err)
+ goto err_qpn;
+
+ mutex_init(&qp->mutex);
+
+ INIT_LIST_HEAD(&qp->gid_list);
+ INIT_LIST_HEAD(&qp->steering_rules);
+
+ qp->mlx4_ib_qp_type = MLX4_IB_QPT_RAW_ETHERTYPE;
+ qp->state = IB_QPS_RESET;
+
+ /* Set dummy send resources to be compatible with HV and PRM */
+ qp->sq_no_prefetch = 1;
+ qp->sq.wqe_cnt = 1;
+ qp->sq.wqe_shift = MLX4_IB_MIN_SQ_STRIDE;
+ qp->buf_size = qp->sq.wqe_cnt << MLX4_IB_MIN_SQ_STRIDE;
+ qp->mtt = (to_mqp(
+ (struct ib_qp *)init_attr->rwq_ind_tbl->ind_tbl[0]))->mtt;
+
+ qp->rss_ctx = kzalloc(sizeof(*qp->rss_ctx), GFP_KERNEL);
+ if (!qp->rss_ctx) {
+ err = -ENOMEM;
+ goto err_qp_alloc;
+ }
+
+ err = set_qp_rss(dev, qp->rss_ctx, init_attr, ucmd);
+ if (err)
+ goto err;
+
+ return 0;
+
+err:
+ kfree(qp->rss_ctx);
+
+err_qp_alloc:
+ mlx4_qp_remove(dev->dev, &qp->mqp);
+ mlx4_qp_free(dev->dev, &qp->mqp);
+
+err_qpn:
+ mlx4_qp_release_range(dev->dev, qpn, 1);
+ return err;
+}
+
+static struct ib_qp *_mlx4_ib_create_qp_rss(struct ib_pd *pd,
+ struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata)
+{
+ struct mlx4_ib_qp *qp;
+ struct mlx4_ib_create_qp_rss ucmd = {};
+ size_t required_cmd_sz;
+ int err;
+
+ if (!udata) {
+ pr_debug("RSS QP with NULL udata\n");
+ return ERR_PTR(-EINVAL);
+ }
+
+ if (udata->outlen)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ required_cmd_sz = offsetof(typeof(ucmd), reserved1) +
+ sizeof(ucmd.reserved1);
+ if (udata->inlen < required_cmd_sz) {
+ pr_debug("invalid inlen\n");
+ return ERR_PTR(-EINVAL);
+ }
+
+ if (ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen))) {
+ pr_debug("copy failed\n");
+ return ERR_PTR(-EFAULT);
+ }
+
+ if (ucmd.comp_mask || ucmd.reserved1)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ if (udata->inlen > sizeof(ucmd) &&
+ !ib_is_udata_cleared(udata, sizeof(ucmd),
+ udata->inlen - sizeof(ucmd))) {
+ pr_debug("inlen is not supported\n");
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+
+ if (init_attr->qp_type != IB_QPT_RAW_PACKET) {
+ pr_debug("RSS QP with unsupported QP type %d\n",
+ init_attr->qp_type);
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+
+ if (init_attr->create_flags) {
+ pr_debug("RSS QP doesn't support create flags\n");
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+
+ if (init_attr->send_cq || init_attr->cap.max_send_wr) {
+ pr_debug("RSS QP with unsupported send attributes\n");
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+
+ qp = kzalloc(sizeof(*qp), GFP_KERNEL);
+ if (!qp)
+ return ERR_PTR(-ENOMEM);
+
+ qp->pri.vid = 0xFFFF;
+ qp->alt.vid = 0xFFFF;
+
+ err = create_qp_rss(to_mdev(pd->device), pd, init_attr, &ucmd, qp);
+ if (err) {
+ kfree(qp);
+ return ERR_PTR(err);
+ }
+
+ qp->ibqp.qp_num = qp->mqp.qpn;
+
+ return &qp->ibqp;
+}
+
+/*
+ * This function allocates a WQN from a range which is consecutive and aligned
+ * to its size. In case the range is full, then it creates a new range and
+ * allocates WQN from it. The new range will be used for following allocations.
+ */
+static int mlx4_ib_alloc_wqn(struct mlx4_ib_ucontext *context,
+ struct mlx4_ib_qp *qp, int range_size, int *wqn)
+{
+ struct mlx4_ib_dev *dev = to_mdev(context->ibucontext.device);
+ struct mlx4_wqn_range *range;
+ int err = 0;
+
+ mutex_lock(&context->wqn_ranges_mutex);
+
+ range = list_first_entry_or_null(&context->wqn_ranges_list,
+ struct mlx4_wqn_range, list);
+
+ if (!range || (range->refcount == range->size) || range->dirty) {
+ range = kzalloc(sizeof(*range), GFP_KERNEL);
+ if (!range) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ err = mlx4_qp_reserve_range(dev->dev, range_size,
+ range_size, &range->base_wqn, 0,
+ qp->mqp.usage);
+ if (err) {
+ kfree(range);
+ goto out;
+ }
+
+ range->size = range_size;
+ list_add(&range->list, &context->wqn_ranges_list);
+ } else if (range_size != 1) {
+ /*
+ * Requesting a new range (>1) when last range is still open, is
+ * not valid.
+ */
+ err = -EINVAL;
+ goto out;
+ }
+
+ qp->wqn_range = range;
+
+ *wqn = range->base_wqn + range->refcount;
+
+ range->refcount++;
+
+out:
+ mutex_unlock(&context->wqn_ranges_mutex);
+
+ return err;
+}
+
+static void mlx4_ib_release_wqn(struct mlx4_ib_ucontext *context,
+ struct mlx4_ib_qp *qp, bool dirty_release)
+{
+ struct mlx4_ib_dev *dev = to_mdev(context->ibucontext.device);
+ struct mlx4_wqn_range *range;
+
+ mutex_lock(&context->wqn_ranges_mutex);
+
+ range = qp->wqn_range;
+
+ range->refcount--;
+ if (!range->refcount) {
+ mlx4_qp_release_range(dev->dev, range->base_wqn,
+ range->size);
+ list_del(&range->list);
+ kfree(range);
+ } else if (dirty_release) {
+ /*
+ * A range which one of its WQNs is destroyed, won't be able to be
+ * reused for further WQN allocations.
+ * The next created WQ will allocate a new range.
+ */
+ range->dirty = 1;
+ }
+
+ mutex_unlock(&context->wqn_ranges_mutex);
+}
+
static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
+ enum mlx4_ib_source_type src,
struct ib_qp_init_attr *init_attr,
struct ib_udata *udata, int sqpn,
struct mlx4_ib_qp **caller_qp)
@@ -645,6 +954,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
enum mlx4_ib_qp_type qp_type = (enum mlx4_ib_qp_type) init_attr->qp_type;
struct mlx4_ib_cq *mcq;
unsigned long flags;
+ int range_size = 0;
/* When tunneling special qps, we use a plain UD qp */
if (sqpn) {
@@ -719,26 +1029,71 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
qp->sq_signal_bits = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE);
- err = set_rq_size(dev, &init_attr->cap, !!pd->uobject, qp_has_rq(init_attr), qp);
- if (err)
- goto err;
if (pd->uobject) {
- struct mlx4_ib_create_qp ucmd;
+ union {
+ struct mlx4_ib_create_qp qp;
+ struct mlx4_ib_create_wq wq;
+ } ucmd;
+ size_t copy_len;
- if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) {
+ copy_len = (src == MLX4_IB_QP_SRC) ?
+ sizeof(struct mlx4_ib_create_qp) :
+ min(sizeof(struct mlx4_ib_create_wq), udata->inlen);
+
+ if (ib_copy_from_udata(&ucmd, udata, copy_len)) {
err = -EFAULT;
goto err;
}
- qp->sq_no_prefetch = ucmd.sq_no_prefetch;
+ if (src == MLX4_IB_RWQ_SRC) {
+ if (ucmd.wq.comp_mask || ucmd.wq.reserved1 ||
+ ucmd.wq.reserved[0] || ucmd.wq.reserved[1] ||
+ ucmd.wq.reserved[2]) {
+ pr_debug("user command isn't supported\n");
+ err = -EOPNOTSUPP;
+ goto err;
+ }
- err = set_user_sq_size(dev, qp, &ucmd);
+ if (ucmd.wq.log_range_size >
+ ilog2(dev->dev->caps.max_rss_tbl_sz)) {
+ pr_debug("WQN range size must be equal or smaller than %d\n",
+ dev->dev->caps.max_rss_tbl_sz);
+ err = -EOPNOTSUPP;
+ goto err;
+ }
+ range_size = 1 << ucmd.wq.log_range_size;
+ } else {
+ qp->inl_recv_sz = ucmd.qp.inl_recv_sz;
+ }
+
+ err = set_rq_size(dev, &init_attr->cap, !!pd->uobject,
+ qp_has_rq(init_attr), qp, qp->inl_recv_sz);
if (err)
goto err;
- qp->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr,
- qp->buf_size, 0, 0);
+ if (src == MLX4_IB_QP_SRC) {
+ qp->sq_no_prefetch = ucmd.qp.sq_no_prefetch;
+
+ err = set_user_sq_size(dev, qp,
+ (struct mlx4_ib_create_qp *)
+ &ucmd);
+ if (err)
+ goto err;
+ } else {
+ qp->sq_no_prefetch = 1;
+ qp->sq.wqe_cnt = 1;
+ qp->sq.wqe_shift = MLX4_IB_MIN_SQ_STRIDE;
+ /* Allocated buffer expects to have at least that SQ
+ * size.
+ */
+ qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) +
+ (qp->sq.wqe_cnt << qp->sq.wqe_shift);
+ }
+
+ qp->umem = ib_umem_get(pd->uobject->context,
+ (src == MLX4_IB_QP_SRC) ? ucmd.qp.buf_addr :
+ ucmd.wq.buf_addr, qp->buf_size, 0, 0);
if (IS_ERR(qp->umem)) {
err = PTR_ERR(qp->umem);
goto err;
@@ -755,11 +1110,18 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
if (qp_has_rq(init_attr)) {
err = mlx4_ib_db_map_user(to_mucontext(pd->uobject->context),
- ucmd.db_addr, &qp->db);
+ (src == MLX4_IB_QP_SRC) ? ucmd.qp.db_addr :
+ ucmd.wq.db_addr, &qp->db);
if (err)
goto err_mtt;
}
+ qp->mqp.usage = MLX4_RES_USAGE_USER_VERBS;
} else {
+ err = set_rq_size(dev, &init_attr->cap, !!pd->uobject,
+ qp_has_rq(init_attr), qp, 0);
+ if (err)
+ goto err;
+
qp->sq_no_prefetch = 0;
if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO)
@@ -826,6 +1188,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
err = -ENOMEM;
goto err_wrid;
}
+ qp->mqp.usage = MLX4_RES_USAGE_DRIVER;
}
if (sqpn) {
@@ -836,6 +1199,11 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
goto err_wrid;
}
}
+ } else if (src == MLX4_IB_RWQ_SRC) {
+ err = mlx4_ib_alloc_wqn(to_mucontext(pd->uobject->context), qp,
+ range_size, &qpn);
+ if (err)
+ goto err_wrid;
} else {
/* Raw packet QPNs may not have bits 6,7 set in their qp_num;
* otherwise, the WQE BlueFlame setup flow wrongly causes
@@ -845,13 +1213,14 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
(init_attr->cap.max_send_wr ?
MLX4_RESERVE_ETH_BF_QP : 0) |
(init_attr->cap.max_recv_wr ?
- MLX4_RESERVE_A0_QP : 0));
+ MLX4_RESERVE_A0_QP : 0),
+ qp->mqp.usage);
else
if (qp->flags & MLX4_IB_QP_NETIF)
err = mlx4_ib_steer_qp_alloc(dev, 1, &qpn);
else
err = mlx4_qp_reserve_range(dev->dev, 1, 1,
- &qpn, 0);
+ &qpn, 0, qp->mqp.usage);
if (err)
goto err_proxy;
}
@@ -873,7 +1242,9 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
*/
qp->doorbell_qpn = swab32(qp->mqp.qpn << 8);
- qp->mqp.event = mlx4_ib_qp_event;
+ qp->mqp.event = (src == MLX4_IB_QP_SRC) ? mlx4_ib_qp_event :
+ mlx4_ib_wq_event;
+
if (!*caller_qp)
*caller_qp = qp;
@@ -900,6 +1271,9 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
if (!sqpn) {
if (qp->flags & MLX4_IB_QP_NETIF)
mlx4_ib_steer_qp_free(dev, qpn, 1);
+ else if (src == MLX4_IB_RWQ_SRC)
+ mlx4_ib_release_wqn(to_mucontext(pd->uobject->context),
+ qp, 0);
else
mlx4_qp_release_range(dev->dev, qpn, 1);
}
@@ -998,7 +1372,7 @@ static struct mlx4_ib_pd *get_pd(struct mlx4_ib_qp *qp)
return to_mpd(qp->ibqp.pd);
}
-static void get_cqs(struct mlx4_ib_qp *qp,
+static void get_cqs(struct mlx4_ib_qp *qp, enum mlx4_ib_source_type src,
struct mlx4_ib_cq **send_cq, struct mlx4_ib_cq **recv_cq)
{
switch (qp->ibqp.qp_type) {
@@ -1011,14 +1385,46 @@ static void get_cqs(struct mlx4_ib_qp *qp,
*recv_cq = *send_cq;
break;
default:
- *send_cq = to_mcq(qp->ibqp.send_cq);
- *recv_cq = to_mcq(qp->ibqp.recv_cq);
+ *recv_cq = (src == MLX4_IB_QP_SRC) ? to_mcq(qp->ibqp.recv_cq) :
+ to_mcq(qp->ibwq.cq);
+ *send_cq = (src == MLX4_IB_QP_SRC) ? to_mcq(qp->ibqp.send_cq) :
+ *recv_cq;
break;
}
}
+static void destroy_qp_rss(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
+{
+ if (qp->state != IB_QPS_RESET) {
+ int i;
+
+ for (i = 0; i < (1 << qp->ibqp.rwq_ind_tbl->log_ind_tbl_size);
+ i++) {
+ struct ib_wq *ibwq = qp->ibqp.rwq_ind_tbl->ind_tbl[i];
+ struct mlx4_ib_qp *wq = to_mqp((struct ib_qp *)ibwq);
+
+ mutex_lock(&wq->mutex);
+
+ wq->rss_usecnt--;
+
+ mutex_unlock(&wq->mutex);
+ }
+
+ if (mlx4_qp_modify(dev->dev, NULL, to_mlx4_state(qp->state),
+ MLX4_QP_STATE_RST, NULL, 0, 0, &qp->mqp))
+ pr_warn("modify QP %06x to RESET failed.\n",
+ qp->mqp.qpn);
+ }
+
+ mlx4_qp_remove(dev->dev, &qp->mqp);
+ mlx4_qp_free(dev->dev, &qp->mqp);
+ mlx4_qp_release_range(dev->dev, qp->mqp.qpn, 1);
+ del_gid_entries(qp);
+ kfree(qp->rss_ctx);
+}
+
static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
- int is_user)
+ enum mlx4_ib_source_type src, int is_user)
{
struct mlx4_ib_cq *send_cq, *recv_cq;
unsigned long flags;
@@ -1051,7 +1457,7 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
}
}
- get_cqs(qp, &send_cq, &recv_cq);
+ get_cqs(qp, src, &send_cq, &recv_cq);
spin_lock_irqsave(&dev->reset_flow_resource_lock, flags);
mlx4_ib_lock_cqs(send_cq, recv_cq);
@@ -1077,6 +1483,9 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
if (!is_sqp(dev, qp) && !is_tunnel_qp(dev, qp)) {
if (qp->flags & MLX4_IB_QP_NETIF)
mlx4_ib_steer_qp_free(dev, qp->mqp.qpn, 1);
+ else if (src == MLX4_IB_RWQ_SRC)
+ mlx4_ib_release_wqn(to_mucontext(
+ qp->ibwq.uobject->context), qp, 1);
else
mlx4_qp_release_range(dev->dev, qp->mqp.qpn, 1);
}
@@ -1084,9 +1493,12 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
mlx4_mtt_cleanup(dev->dev, &qp->mtt);
if (is_user) {
- if (qp->rq.wqe_cnt)
- mlx4_ib_db_unmap_user(to_mucontext(qp->ibqp.uobject->context),
- &qp->db);
+ if (qp->rq.wqe_cnt) {
+ struct mlx4_ib_ucontext *mcontext = !src ?
+ to_mucontext(qp->ibqp.uobject->context) :
+ to_mucontext(qp->ibwq.uobject->context);
+ mlx4_ib_db_unmap_user(mcontext, &qp->db);
+ }
ib_umem_release(qp->umem);
} else {
kvfree(qp->sq.wrid);
@@ -1128,6 +1540,9 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd,
int sup_u_create_flags = MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK;
u16 xrcdn = 0;
+ if (init_attr->rwq_ind_tbl)
+ return _mlx4_ib_create_qp_rss(pd, init_attr, udata);
+
/*
* We only support LSO, vendor flag1, and multicast loopback blocking,
* and only for kernel UD QPs.
@@ -1182,8 +1597,8 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd,
/* fall through */
case IB_QPT_UD:
{
- err = create_qp_common(to_mdev(pd->device), pd, init_attr,
- udata, 0, &qp);
+ err = create_qp_common(to_mdev(pd->device), pd, MLX4_IB_QP_SRC,
+ init_attr, udata, 0, &qp);
if (err) {
kfree(qp);
return ERR_PTR(err);
@@ -1203,7 +1618,9 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd,
if (udata)
return ERR_PTR(-EINVAL);
if (init_attr->create_flags & MLX4_IB_QP_CREATE_ROCE_V2_GSI) {
- int res = mlx4_qp_reserve_range(to_mdev(pd->device)->dev, 1, 1, &sqpn, 0);
+ int res = mlx4_qp_reserve_range(to_mdev(pd->device)->dev,
+ 1, 1, &sqpn, 0,
+ MLX4_RES_USAGE_DRIVER);
if (res)
return ERR_PTR(res);
@@ -1211,8 +1628,8 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd,
sqpn = get_sqp_num(to_mdev(pd->device), init_attr);
}
- err = create_qp_common(to_mdev(pd->device), pd, init_attr, udata,
- sqpn, &qp);
+ err = create_qp_common(to_mdev(pd->device), pd, MLX4_IB_QP_SRC,
+ init_attr, udata, sqpn, &qp);
if (err)
return ERR_PTR(err);
@@ -1267,7 +1684,6 @@ static int _mlx4_ib_destroy_qp(struct ib_qp *qp)
{
struct mlx4_ib_dev *dev = to_mdev(qp->device);
struct mlx4_ib_qp *mqp = to_mqp(qp);
- struct mlx4_ib_pd *pd;
if (is_qp0(dev, mqp))
mlx4_CLOSE_PORT(dev->dev, mqp->port);
@@ -1282,8 +1698,14 @@ static int _mlx4_ib_destroy_qp(struct ib_qp *qp)
if (mqp->counter_index)
mlx4_ib_free_qp_counter(dev, mqp);
- pd = get_pd(mqp);
- destroy_qp_common(dev, mqp, !!pd->ibpd.uobject);
+ if (qp->rwq_ind_tbl) {
+ destroy_qp_rss(dev, mqp);
+ } else {
+ struct mlx4_ib_pd *pd;
+
+ pd = get_pd(mqp);
+ destroy_qp_common(dev, mqp, MLX4_IB_QP_SRC, !!pd->ibpd.uobject);
+ }
if (is_sqp(dev, mqp))
kfree(to_msqp(mqp));
@@ -1566,7 +1988,7 @@ static int create_qp_lb_counter(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
!(dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_LB_SRC_CHK))
return 0;
- err = mlx4_counter_alloc(dev->dev, &tmp_idx);
+ err = mlx4_counter_alloc(dev->dev, &tmp_idx, MLX4_RES_USAGE_DRIVER);
if (err)
return err;
@@ -1606,12 +2028,119 @@ static u8 gid_type_to_qpc(enum ib_gid_type gid_type)
}
}
-static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
+/*
+ * Go over all RSS QP's childes (WQs) and apply their HW state according to
+ * their logic state if the RSS QP is the first RSS QP associated for the WQ.
+ */
+static int bringup_rss_rwqs(struct ib_rwq_ind_table *ind_tbl, u8 port_num)
+{
+ int i;
+ int err;
+
+ for (i = 0; i < (1 << ind_tbl->log_ind_tbl_size); i++) {
+ struct ib_wq *ibwq = ind_tbl->ind_tbl[i];
+ struct mlx4_ib_qp *wq = to_mqp((struct ib_qp *)ibwq);
+
+ mutex_lock(&wq->mutex);
+
+ /* Mlx4_ib restrictions:
+ * WQ's is associated to a port according to the RSS QP it is
+ * associates to.
+ * In case the WQ is associated to a different port by another
+ * RSS QP, return a failure.
+ */
+ if ((wq->rss_usecnt > 0) && (wq->port != port_num)) {
+ err = -EINVAL;
+ mutex_unlock(&wq->mutex);
+ break;
+ }
+ wq->port = port_num;
+ if ((wq->rss_usecnt == 0) && (ibwq->state == IB_WQS_RDY)) {
+ err = _mlx4_ib_modify_wq(ibwq, IB_WQS_RDY);
+ if (err) {
+ mutex_unlock(&wq->mutex);
+ break;
+ }
+ }
+ wq->rss_usecnt++;
+
+ mutex_unlock(&wq->mutex);
+ }
+
+ if (i && err) {
+ int j;
+
+ for (j = (i - 1); j >= 0; j--) {
+ struct ib_wq *ibwq = ind_tbl->ind_tbl[j];
+ struct mlx4_ib_qp *wq = to_mqp((struct ib_qp *)ibwq);
+
+ mutex_lock(&wq->mutex);
+
+ if ((wq->rss_usecnt == 1) &&
+ (ibwq->state == IB_WQS_RDY))
+ if (_mlx4_ib_modify_wq(ibwq, IB_WQS_RESET))
+ pr_warn("failed to reverse WQN=0x%06x\n",
+ ibwq->wq_num);
+ wq->rss_usecnt--;
+
+ mutex_unlock(&wq->mutex);
+ }
+ }
+
+ return err;
+}
+
+static void bring_down_rss_rwqs(struct ib_rwq_ind_table *ind_tbl)
+{
+ int i;
+
+ for (i = 0; i < (1 << ind_tbl->log_ind_tbl_size); i++) {
+ struct ib_wq *ibwq = ind_tbl->ind_tbl[i];
+ struct mlx4_ib_qp *wq = to_mqp((struct ib_qp *)ibwq);
+
+ mutex_lock(&wq->mutex);
+
+ if ((wq->rss_usecnt == 1) && (ibwq->state == IB_WQS_RDY))
+ if (_mlx4_ib_modify_wq(ibwq, IB_WQS_RESET))
+ pr_warn("failed to reverse WQN=%x\n",
+ ibwq->wq_num);
+ wq->rss_usecnt--;
+
+ mutex_unlock(&wq->mutex);
+ }
+}
+
+static void fill_qp_rss_context(struct mlx4_qp_context *context,
+ struct mlx4_ib_qp *qp)
+{
+ struct mlx4_rss_context *rss_context;
+
+ rss_context = (void *)context + offsetof(struct mlx4_qp_context,
+ pri_path) + MLX4_RSS_OFFSET_IN_QPC_PRI_PATH;
+
+ rss_context->base_qpn = cpu_to_be32(qp->rss_ctx->base_qpn_tbl_sz);
+ rss_context->default_qpn =
+ cpu_to_be32(qp->rss_ctx->base_qpn_tbl_sz & 0xffffff);
+ if (qp->rss_ctx->flags & (MLX4_RSS_UDP_IPV4 | MLX4_RSS_UDP_IPV6))
+ rss_context->base_qpn_udp = rss_context->default_qpn;
+ rss_context->flags = qp->rss_ctx->flags;
+ /* Currently support just toeplitz */
+ rss_context->hash_fn = MLX4_RSS_HASH_TOP;
+
+ memcpy(rss_context->rss_key, qp->rss_ctx->rss_key,
+ MLX4_EN_RSS_KEY_SIZE);
+}
+
+static int __mlx4_ib_modify_qp(void *src, enum mlx4_ib_source_type src_type,
const struct ib_qp_attr *attr, int attr_mask,
enum ib_qp_state cur_state, enum ib_qp_state new_state)
{
- struct mlx4_ib_dev *dev = to_mdev(ibqp->device);
- struct mlx4_ib_qp *qp = to_mqp(ibqp);
+ struct ib_uobject *ibuobject;
+ struct ib_srq *ibsrq;
+ struct ib_rwq_ind_table *rwq_ind_tbl;
+ enum ib_qp_type qp_type;
+ struct mlx4_ib_dev *dev;
+ struct mlx4_ib_qp *qp;
struct mlx4_ib_pd *pd;
struct mlx4_ib_cq *send_cq, *recv_cq;
struct mlx4_qp_context *context;
@@ -1621,6 +2150,30 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
int err = -EINVAL;
int counter_index;
+ if (src_type == MLX4_IB_RWQ_SRC) {
+ struct ib_wq *ibwq;
+
+ ibwq = (struct ib_wq *)src;
+ ibuobject = ibwq->uobject;
+ ibsrq = NULL;
+ rwq_ind_tbl = NULL;
+ qp_type = IB_QPT_RAW_PACKET;
+ qp = to_mqp((struct ib_qp *)ibwq);
+ dev = to_mdev(ibwq->device);
+ pd = to_mpd(ibwq->pd);
+ } else {
+ struct ib_qp *ibqp;
+
+ ibqp = (struct ib_qp *)src;
+ ibuobject = ibqp->uobject;
+ ibsrq = ibqp->srq;
+ rwq_ind_tbl = ibqp->rwq_ind_tbl;
+ qp_type = ibqp->qp_type;
+ qp = to_mqp(ibqp);
+ dev = to_mdev(ibqp->device);
+ pd = get_pd(qp);
+ }
+
/* APM is not supported under RoCE */
if (attr_mask & IB_QP_ALT_PATH &&
rdma_port_get_link_layer(&dev->ib_dev, qp->port) ==
@@ -1634,6 +2187,11 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
context->flags = cpu_to_be32((to_mlx4_state(new_state) << 28) |
(to_mlx4_st(dev, qp->mlx4_ib_qp_type) << 16));
+ if (rwq_ind_tbl) {
+ fill_qp_rss_context(context, qp);
+ context->flags |= cpu_to_be32(1 << MLX4_RSS_QPC_FLAG_OFFSET);
+ }
+
if (!(attr_mask & IB_QP_PATH_MIG_STATE))
context->flags |= cpu_to_be32(MLX4_QP_PM_MIGRATED << 11);
else {
@@ -1651,11 +2209,14 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
}
}
- if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI)
+ if (qp->inl_recv_sz)
+ context->param3 |= cpu_to_be32(1 << 25);
+
+ if (qp_type == IB_QPT_GSI || qp_type == IB_QPT_SMI)
context->mtu_msgmax = (IB_MTU_4096 << 5) | 11;
- else if (ibqp->qp_type == IB_QPT_RAW_PACKET)
+ else if (qp_type == IB_QPT_RAW_PACKET)
context->mtu_msgmax = (MLX4_RAW_QP_MTU << 5) | MLX4_RAW_QP_MSGMAX;
- else if (ibqp->qp_type == IB_QPT_UD) {
+ else if (qp_type == IB_QPT_UD) {
if (qp->flags & MLX4_IB_QP_LSO)
context->mtu_msgmax = (IB_MTU_4096 << 5) |
ilog2(dev->dev->caps.max_gso_sz);
@@ -1671,9 +2232,11 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
ilog2(dev->dev->caps.max_msg_sz);
}
- if (qp->rq.wqe_cnt)
- context->rq_size_stride = ilog2(qp->rq.wqe_cnt) << 3;
- context->rq_size_stride |= qp->rq.wqe_shift - 4;
+ if (!rwq_ind_tbl) { /* PRM RSS receive side should be left zeros */
+ if (qp->rq.wqe_cnt)
+ context->rq_size_stride = ilog2(qp->rq.wqe_cnt) << 3;
+ context->rq_size_stride |= qp->rq.wqe_shift - 4;
+ }
if (qp->sq.wqe_cnt)
context->sq_size_stride = ilog2(qp->sq.wqe_cnt) << 3;
@@ -1685,14 +2248,15 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
context->sq_size_stride |= !!qp->sq_no_prefetch << 7;
context->xrcd = cpu_to_be32((u32) qp->xrcdn);
- if (ibqp->qp_type == IB_QPT_RAW_PACKET)
+ if (qp_type == IB_QPT_RAW_PACKET)
context->param3 |= cpu_to_be32(1 << 30);
}
- if (qp->ibqp.uobject)
+ if (ibuobject)
context->usr_page = cpu_to_be32(
mlx4_to_hw_uar_index(dev->dev,
- to_mucontext(ibqp->uobject->context)->uar.index));
+ to_mucontext(ibuobject->context)
+ ->uar.index));
else
context->usr_page = cpu_to_be32(
mlx4_to_hw_uar_index(dev->dev, dev->priv_uar.index));
@@ -1736,7 +2300,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
steer_qp = 1;
}
- if (ibqp->qp_type == IB_QPT_GSI) {
+ if (qp_type == IB_QPT_GSI) {
enum ib_gid_type gid_type = qp->flags & MLX4_IB_ROCE_V2_GSI_QP ?
IB_GID_TYPE_ROCE_UDP_ENCAP : IB_GID_TYPE_ROCE;
u8 qpc_roce_mode = gid_type_to_qpc(gid_type);
@@ -1753,7 +2317,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
}
if (attr_mask & IB_QP_AV) {
- u8 port_num = mlx4_is_bonded(to_mdev(ibqp->device)->dev) ? 1 :
+ u8 port_num = mlx4_is_bonded(dev->dev) ? 1 :
attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
union ib_gid gid;
struct ib_gid_attr gid_attr = {.gid_type = IB_GID_TYPE_IB};
@@ -1768,7 +2332,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
int index =
rdma_ah_read_grh(&attr->ah_attr)->sgid_index;
- status = ib_get_cached_gid(ibqp->device, port_num,
+ status = ib_get_cached_gid(&dev->ib_dev, port_num,
index, &gid, &gid_attr);
if (!status && !memcmp(&gid, &zgid, sizeof(gid)))
status = -ENOENT;
@@ -1825,15 +2389,20 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
optpar |= MLX4_QP_OPTPAR_ALT_ADDR_PATH;
}
- pd = get_pd(qp);
- get_cqs(qp, &send_cq, &recv_cq);
- context->pd = cpu_to_be32(pd->pdn);
+ context->pd = cpu_to_be32(pd->pdn);
+
+ if (!rwq_ind_tbl) {
+ get_cqs(qp, src_type, &send_cq, &recv_cq);
+ } else { /* Set dummy CQs to be compatible with HV and PRM */
+ send_cq = to_mcq(rwq_ind_tbl->ind_tbl[0]->cq);
+ recv_cq = send_cq;
+ }
context->cqn_send = cpu_to_be32(send_cq->mcq.cqn);
context->cqn_recv = cpu_to_be32(recv_cq->mcq.cqn);
context->params1 = cpu_to_be32(MLX4_IB_ACK_REQ_FREQ << 28);
/* Set "fast registration enabled" for all kernel QPs */
- if (!qp->ibqp.uobject)
+ if (!ibuobject)
context->params1 |= cpu_to_be32(1 << 11);
if (attr_mask & IB_QP_RNR_RETRY) {
@@ -1868,7 +2437,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
optpar |= MLX4_QP_OPTPAR_RWE | MLX4_QP_OPTPAR_RRE | MLX4_QP_OPTPAR_RAE;
}
- if (ibqp->srq)
+ if (ibsrq)
context->params2 |= cpu_to_be32(MLX4_QP_BIT_RIC);
if (attr_mask & IB_QP_MIN_RNR_TIMER) {
@@ -1899,17 +2468,19 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
optpar |= MLX4_QP_OPTPAR_Q_KEY;
}
- if (ibqp->srq)
- context->srqn = cpu_to_be32(1 << 24 | to_msrq(ibqp->srq)->msrq.srqn);
+ if (ibsrq)
+ context->srqn = cpu_to_be32(1 << 24 |
+ to_msrq(ibsrq)->msrq.srqn);
- if (qp->rq.wqe_cnt && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
+ if (qp->rq.wqe_cnt &&
+ cur_state == IB_QPS_RESET &&
+ new_state == IB_QPS_INIT)
context->db_rec_addr = cpu_to_be64(qp->db.dma);
if (cur_state == IB_QPS_INIT &&
new_state == IB_QPS_RTR &&
- (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI ||
- ibqp->qp_type == IB_QPT_UD ||
- ibqp->qp_type == IB_QPT_RAW_PACKET)) {
+ (qp_type == IB_QPT_GSI || qp_type == IB_QPT_SMI ||
+ qp_type == IB_QPT_UD || qp_type == IB_QPT_RAW_PACKET)) {
context->pri_path.sched_queue = (qp->port - 1) << 6;
if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_SMI ||
qp->mlx4_ib_qp_type &
@@ -1942,7 +2513,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
}
}
- if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) {
+ if (qp_type == IB_QPT_RAW_PACKET) {
context->pri_path.ackto = (context->pri_path.ackto & 0xf8) |
MLX4_IB_LINK_TYPE_ETH;
if (dev->dev->caps.tunnel_offload_mode == MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) {
@@ -1952,7 +2523,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
}
}
- if (ibqp->qp_type == IB_QPT_UD && (new_state == IB_QPS_RTR)) {
+ if (qp_type == IB_QPT_UD && (new_state == IB_QPS_RTR)) {
int is_eth = rdma_port_get_link_layer(
&dev->ib_dev, qp->port) ==
IB_LINK_LAYER_ETHERNET;
@@ -1962,14 +2533,15 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
}
}
-
if (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD &&
attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY && attr->en_sqd_async_notify)
sqd_event = 1;
else
sqd_event = 0;
- if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
+ if (!ibuobject &&
+ cur_state == IB_QPS_RESET &&
+ new_state == IB_QPS_INIT)
context->rlkey_roce_mode |= (1 << 4);
/*
@@ -1978,7 +2550,9 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
* headroom is stamped so that the hardware doesn't start
* processing stale work requests.
*/
- if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
+ if (!ibuobject &&
+ cur_state == IB_QPS_RESET &&
+ new_state == IB_QPS_INIT) {
struct mlx4_wqe_ctrl_seg *ctrl;
int i;
@@ -2035,9 +2609,9 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
* entries and reinitialize the QP.
*/
if (new_state == IB_QPS_RESET) {
- if (!ibqp->uobject) {
+ if (!ibuobject) {
mlx4_ib_cq_clean(recv_cq, qp->mqp.qpn,
- ibqp->srq ? to_msrq(ibqp->srq) : NULL);
+ ibsrq ? to_msrq(ibsrq) : NULL);
if (send_cq != recv_cq)
mlx4_ib_cq_clean(send_cq, qp->mqp.qpn, NULL);
@@ -2148,6 +2722,11 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
return err;
}
+enum {
+ MLX4_IB_MODIFY_QP_RSS_SUP_ATTR_MSK = (IB_QP_STATE |
+ IB_QP_PORT),
+};
+
static int _mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
int attr_mask, struct ib_udata *udata)
{
@@ -2178,6 +2757,27 @@ static int _mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
goto out;
}
+ if (ibqp->rwq_ind_tbl) {
+ if (!(((cur_state == IB_QPS_RESET) &&
+ (new_state == IB_QPS_INIT)) ||
+ ((cur_state == IB_QPS_INIT) &&
+ (new_state == IB_QPS_RTR)))) {
+ pr_debug("qpn 0x%x: RSS QP unsupported transition %d to %d\n",
+ ibqp->qp_num, cur_state, new_state);
+
+ err = -EOPNOTSUPP;
+ goto out;
+ }
+
+ if (attr_mask & ~MLX4_IB_MODIFY_QP_RSS_SUP_ATTR_MSK) {
+ pr_debug("qpn 0x%x: RSS QP unsupported attribute mask 0x%x for transition %d to %d\n",
+ ibqp->qp_num, attr_mask, cur_state, new_state);
+
+ err = -EOPNOTSUPP;
+ goto out;
+ }
+ }
+
if (mlx4_is_bonded(dev->dev) && (attr_mask & IB_QP_PORT)) {
if ((cur_state == IB_QPS_RESET) && (new_state == IB_QPS_INIT)) {
if ((ibqp->qp_type == IB_QPT_RC) ||
@@ -2242,7 +2842,17 @@ static int _mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
goto out;
}
- err = __mlx4_ib_modify_qp(ibqp, attr, attr_mask, cur_state, new_state);
+ if (ibqp->rwq_ind_tbl && (new_state == IB_QPS_INIT)) {
+ err = bringup_rss_rwqs(ibqp->rwq_ind_tbl, attr->port_num);
+ if (err)
+ goto out;
+ }
+
+ err = __mlx4_ib_modify_qp(ibqp, MLX4_IB_QP_SRC, attr, attr_mask,
+ cur_state, new_state);
+
+ if (ibqp->rwq_ind_tbl && err)
+ bring_down_rss_rwqs(ibqp->rwq_ind_tbl);
if (mlx4_is_bonded(dev->dev) && (attr_mask & IB_QP_PORT))
attr->port_num = 1;
@@ -3432,6 +4042,9 @@ int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr
int mlx4_state;
int err = 0;
+ if (ibqp->rwq_ind_tbl)
+ return -EOPNOTSUPP;
+
mutex_lock(&qp->mutex);
if (qp->state == IB_QPS_RESET) {
@@ -3527,3 +4140,285 @@ int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr
return err;
}
+struct ib_wq *mlx4_ib_create_wq(struct ib_pd *pd,
+ struct ib_wq_init_attr *init_attr,
+ struct ib_udata *udata)
+{
+ struct mlx4_ib_dev *dev;
+ struct ib_qp_init_attr ib_qp_init_attr;
+ struct mlx4_ib_qp *qp;
+ struct mlx4_ib_create_wq ucmd;
+ int err, required_cmd_sz;
+
+ if (!(udata && pd->uobject))
+ return ERR_PTR(-EINVAL);
+
+ required_cmd_sz = offsetof(typeof(ucmd), reserved) +
+ sizeof(ucmd.reserved);
+ if (udata->inlen < required_cmd_sz) {
+ pr_debug("invalid inlen\n");
+ return ERR_PTR(-EINVAL);
+ }
+
+ if (udata->inlen > sizeof(ucmd) &&
+ !ib_is_udata_cleared(udata, sizeof(ucmd),
+ udata->inlen - sizeof(ucmd))) {
+ pr_debug("inlen is not supported\n");
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+
+ if (udata->outlen)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ dev = to_mdev(pd->device);
+
+ if (init_attr->wq_type != IB_WQT_RQ) {
+ pr_debug("unsupported wq type %d\n", init_attr->wq_type);
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+
+ if (init_attr->create_flags) {
+ pr_debug("unsupported create_flags %u\n",
+ init_attr->create_flags);
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+
+ qp = kzalloc(sizeof(*qp), GFP_KERNEL);
+ if (!qp)
+ return ERR_PTR(-ENOMEM);
+
+ qp->pri.vid = 0xFFFF;
+ qp->alt.vid = 0xFFFF;
+
+ memset(&ib_qp_init_attr, 0, sizeof(ib_qp_init_attr));
+ ib_qp_init_attr.qp_context = init_attr->wq_context;
+ ib_qp_init_attr.qp_type = IB_QPT_RAW_PACKET;
+ ib_qp_init_attr.cap.max_recv_wr = init_attr->max_wr;
+ ib_qp_init_attr.cap.max_recv_sge = init_attr->max_sge;
+ ib_qp_init_attr.recv_cq = init_attr->cq;
+ ib_qp_init_attr.send_cq = ib_qp_init_attr.recv_cq; /* Dummy CQ */
+
+ err = create_qp_common(dev, pd, MLX4_IB_RWQ_SRC, &ib_qp_init_attr,
+ udata, 0, &qp);
+ if (err) {
+ kfree(qp);
+ return ERR_PTR(err);
+ }
+
+ qp->ibwq.event_handler = init_attr->event_handler;
+ qp->ibwq.wq_num = qp->mqp.qpn;
+ qp->ibwq.state = IB_WQS_RESET;
+
+ return &qp->ibwq;
+}
+
+static int ib_wq2qp_state(enum ib_wq_state state)
+{
+ switch (state) {
+ case IB_WQS_RESET:
+ return IB_QPS_RESET;
+ case IB_WQS_RDY:
+ return IB_QPS_RTR;
+ default:
+ return IB_QPS_ERR;
+ }
+}
+
+static int _mlx4_ib_modify_wq(struct ib_wq *ibwq, enum ib_wq_state new_state)
+{
+ struct mlx4_ib_qp *qp = to_mqp((struct ib_qp *)ibwq);
+ enum ib_qp_state qp_cur_state;
+ enum ib_qp_state qp_new_state;
+ int attr_mask;
+ int err;
+
+ /* ib_qp.state represents the WQ HW state while ib_wq.state represents
+ * the WQ logic state.
+ */
+ qp_cur_state = qp->state;
+ qp_new_state = ib_wq2qp_state(new_state);
+
+ if (ib_wq2qp_state(new_state) == qp_cur_state)
+ return 0;
+
+ if (new_state == IB_WQS_RDY) {
+ struct ib_qp_attr attr = {};
+
+ attr.port_num = qp->port;
+ attr_mask = IB_QP_PORT;
+
+ err = __mlx4_ib_modify_qp(ibwq, MLX4_IB_RWQ_SRC, &attr,
+ attr_mask, IB_QPS_RESET, IB_QPS_INIT);
+ if (err) {
+ pr_debug("WQN=0x%06x failed to apply RST->INIT on the HW QP\n",
+ ibwq->wq_num);
+ return err;
+ }
+
+ qp_cur_state = IB_QPS_INIT;
+ }
+
+ attr_mask = 0;
+ err = __mlx4_ib_modify_qp(ibwq, MLX4_IB_RWQ_SRC, NULL, attr_mask,
+ qp_cur_state, qp_new_state);
+
+ if (err && (qp_cur_state == IB_QPS_INIT)) {
+ qp_new_state = IB_QPS_RESET;
+ if (__mlx4_ib_modify_qp(ibwq, MLX4_IB_RWQ_SRC, NULL,
+ attr_mask, IB_QPS_INIT, IB_QPS_RESET)) {
+ pr_warn("WQN=0x%06x failed with reverting HW's resources failure\n",
+ ibwq->wq_num);
+ qp_new_state = IB_QPS_INIT;
+ }
+ }
+
+ qp->state = qp_new_state;
+
+ return err;
+}
+
+int mlx4_ib_modify_wq(struct ib_wq *ibwq, struct ib_wq_attr *wq_attr,
+ u32 wq_attr_mask, struct ib_udata *udata)
+{
+ struct mlx4_ib_qp *qp = to_mqp((struct ib_qp *)ibwq);
+ struct mlx4_ib_modify_wq ucmd = {};
+ size_t required_cmd_sz;
+ enum ib_wq_state cur_state, new_state;
+ int err = 0;
+
+ required_cmd_sz = offsetof(typeof(ucmd), reserved) +
+ sizeof(ucmd.reserved);
+ if (udata->inlen < required_cmd_sz)
+ return -EINVAL;
+
+ if (udata->inlen > sizeof(ucmd) &&
+ !ib_is_udata_cleared(udata, sizeof(ucmd),
+ udata->inlen - sizeof(ucmd)))
+ return -EOPNOTSUPP;
+
+ if (ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen)))
+ return -EFAULT;
+
+ if (ucmd.comp_mask || ucmd.reserved)
+ return -EOPNOTSUPP;
+
+ if (wq_attr_mask & IB_WQ_FLAGS)
+ return -EOPNOTSUPP;
+
+ cur_state = wq_attr_mask & IB_WQ_CUR_STATE ? wq_attr->curr_wq_state :
+ ibwq->state;
+ new_state = wq_attr_mask & IB_WQ_STATE ? wq_attr->wq_state : cur_state;
+
+ if (cur_state < IB_WQS_RESET || cur_state > IB_WQS_ERR ||
+ new_state < IB_WQS_RESET || new_state > IB_WQS_ERR)
+ return -EINVAL;
+
+ if ((new_state == IB_WQS_RDY) && (cur_state == IB_WQS_ERR))
+ return -EINVAL;
+
+ if ((new_state == IB_WQS_ERR) && (cur_state == IB_WQS_RESET))
+ return -EINVAL;
+
+ /* Need to protect against the parent RSS which also may modify WQ
+ * state.
+ */
+ mutex_lock(&qp->mutex);
+
+ /* Can update HW state only if a RSS QP has already associated to this
+ * WQ, so we can apply its port on the WQ.
+ */
+ if (qp->rss_usecnt)
+ err = _mlx4_ib_modify_wq(ibwq, new_state);
+
+ if (!err)
+ ibwq->state = new_state;
+
+ mutex_unlock(&qp->mutex);
+
+ return err;
+}
+
+int mlx4_ib_destroy_wq(struct ib_wq *ibwq)
+{
+ struct mlx4_ib_dev *dev = to_mdev(ibwq->device);
+ struct mlx4_ib_qp *qp = to_mqp((struct ib_qp *)ibwq);
+
+ if (qp->counter_index)
+ mlx4_ib_free_qp_counter(dev, qp);
+
+ destroy_qp_common(dev, qp, MLX4_IB_RWQ_SRC, 1);
+
+ kfree(qp);
+
+ return 0;
+}
+
+struct ib_rwq_ind_table
+*mlx4_ib_create_rwq_ind_table(struct ib_device *device,
+ struct ib_rwq_ind_table_init_attr *init_attr,
+ struct ib_udata *udata)
+{
+ struct ib_rwq_ind_table *rwq_ind_table;
+ struct mlx4_ib_create_rwq_ind_tbl_resp resp = {};
+ unsigned int ind_tbl_size = 1 << init_attr->log_ind_tbl_size;
+ unsigned int base_wqn;
+ size_t min_resp_len;
+ int i;
+ int err;
+
+ if (udata->inlen > 0 &&
+ !ib_is_udata_cleared(udata, 0,
+ udata->inlen))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ min_resp_len = offsetof(typeof(resp), reserved) + sizeof(resp.reserved);
+ if (udata->outlen && udata->outlen < min_resp_len)
+ return ERR_PTR(-EINVAL);
+
+ if (ind_tbl_size >
+ device->attrs.rss_caps.max_rwq_indirection_table_size) {
+ pr_debug("log_ind_tbl_size = %d is bigger than supported = %d\n",
+ ind_tbl_size,
+ device->attrs.rss_caps.max_rwq_indirection_table_size);
+ return ERR_PTR(-EINVAL);
+ }
+
+ base_wqn = init_attr->ind_tbl[0]->wq_num;
+
+ if (base_wqn % ind_tbl_size) {
+ pr_debug("WQN=0x%x isn't aligned with indirection table size\n",
+ base_wqn);
+ return ERR_PTR(-EINVAL);
+ }
+
+ for (i = 1; i < ind_tbl_size; i++) {
+ if (++base_wqn != init_attr->ind_tbl[i]->wq_num) {
+ pr_debug("indirection table's WQNs aren't consecutive\n");
+ return ERR_PTR(-EINVAL);
+ }
+ }
+
+ rwq_ind_table = kzalloc(sizeof(*rwq_ind_table), GFP_KERNEL);
+ if (!rwq_ind_table)
+ return ERR_PTR(-ENOMEM);
+
+ if (udata->outlen) {
+ resp.response_length = offsetof(typeof(resp), response_length) +
+ sizeof(resp.response_length);
+ err = ib_copy_to_udata(udata, &resp, resp.response_length);
+ if (err)
+ goto err;
+ }
+
+ return rwq_ind_table;
+
+err:
+ kfree(rwq_ind_table);
+ return ERR_PTR(err);
+}
+
+int mlx4_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_tbl)
+{
+ kfree(ib_rwq_ind_tbl);
+ return 0;
+}
diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile
index 90ad2ad..bc62996 100644
--- a/drivers/infiniband/hw/mlx5/Makefile
+++ b/drivers/infiniband/hw/mlx5/Makefile
@@ -1,4 +1,4 @@
obj-$(CONFIG_MLX5_INFINIBAND) += mlx5_ib.o
-mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o gsi.o ib_virt.o cmd.o
+mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o gsi.o ib_virt.o cmd.o cong.o
mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o
diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c
index 18d5e1d..470995f 100644
--- a/drivers/infiniband/hw/mlx5/cmd.c
+++ b/drivers/infiniband/hw/mlx5/cmd.c
@@ -57,3 +57,23 @@ int mlx5_cmd_query_cong_counter(struct mlx5_core_dev *dev,
MLX5_SET(query_cong_statistics_in, in, clear, reset);
return mlx5_cmd_exec(dev, in, sizeof(in), out, out_size);
}
+
+int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point,
+ void *out, int out_size)
+{
+ u32 in[MLX5_ST_SZ_DW(query_cong_params_in)] = { };
+
+ MLX5_SET(query_cong_params_in, in, opcode,
+ MLX5_CMD_OP_QUERY_CONG_PARAMS);
+ MLX5_SET(query_cong_params_in, in, cong_protocol, cong_point);
+
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, out_size);
+}
+
+int mlx5_cmd_modify_cong_params(struct mlx5_core_dev *dev,
+ void *in, int in_size)
+{
+ u32 out[MLX5_ST_SZ_DW(modify_cong_params_out)] = { };
+
+ return mlx5_cmd_exec(dev, in, in_size, out, sizeof(out));
+}
diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h
index fa09228..af4c245 100644
--- a/drivers/infiniband/hw/mlx5/cmd.h
+++ b/drivers/infiniband/hw/mlx5/cmd.h
@@ -39,4 +39,8 @@
int mlx5_cmd_null_mkey(struct mlx5_core_dev *dev, u32 *null_mkey);
int mlx5_cmd_query_cong_counter(struct mlx5_core_dev *dev,
bool reset, void *out, int out_size);
+int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point,
+ void *out, int out_size);
+int mlx5_cmd_modify_cong_params(struct mlx5_core_dev *mdev,
+ void *in, int in_size);
#endif /* MLX5_IB_CMD_H */
diff --git a/drivers/infiniband/hw/mlx5/cong.c b/drivers/infiniband/hw/mlx5/cong.c
new file mode 100644
index 0000000..2d32b51
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/cong.c
@@ -0,0 +1,421 @@
+/*
+ * Copyright (c) 2013-2017, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/debugfs.h>
+
+#include "mlx5_ib.h"
+#include "cmd.h"
+
+enum mlx5_ib_cong_node_type {
+ MLX5_IB_RROCE_ECN_RP = 1,
+ MLX5_IB_RROCE_ECN_NP = 2,
+};
+
+static const char * const mlx5_ib_dbg_cc_name[] = {
+ "rp_clamp_tgt_rate",
+ "rp_clamp_tgt_rate_ati",
+ "rp_time_reset",
+ "rp_byte_reset",
+ "rp_threshold",
+ "rp_ai_rate",
+ "rp_hai_rate",
+ "rp_min_dec_fac",
+ "rp_min_rate",
+ "rp_rate_to_set_on_first_cnp",
+ "rp_dce_tcp_g",
+ "rp_dce_tcp_rtt",
+ "rp_rate_reduce_monitor_period",
+ "rp_initial_alpha_value",
+ "rp_gd",
+ "np_cnp_dscp",
+ "np_cnp_prio_mode",
+ "np_cnp_prio",
+};
+
+#define MLX5_IB_RP_CLAMP_TGT_RATE_ATTR BIT(1)
+#define MLX5_IB_RP_CLAMP_TGT_RATE_ATI_ATTR BIT(2)
+#define MLX5_IB_RP_TIME_RESET_ATTR BIT(3)
+#define MLX5_IB_RP_BYTE_RESET_ATTR BIT(4)
+#define MLX5_IB_RP_THRESHOLD_ATTR BIT(5)
+#define MLX5_IB_RP_AI_RATE_ATTR BIT(7)
+#define MLX5_IB_RP_HAI_RATE_ATTR BIT(8)
+#define MLX5_IB_RP_MIN_DEC_FAC_ATTR BIT(9)
+#define MLX5_IB_RP_MIN_RATE_ATTR BIT(10)
+#define MLX5_IB_RP_RATE_TO_SET_ON_FIRST_CNP_ATTR BIT(11)
+#define MLX5_IB_RP_DCE_TCP_G_ATTR BIT(12)
+#define MLX5_IB_RP_DCE_TCP_RTT_ATTR BIT(13)
+#define MLX5_IB_RP_RATE_REDUCE_MONITOR_PERIOD_ATTR BIT(14)
+#define MLX5_IB_RP_INITIAL_ALPHA_VALUE_ATTR BIT(15)
+#define MLX5_IB_RP_GD_ATTR BIT(16)
+
+#define MLX5_IB_NP_CNP_DSCP_ATTR BIT(3)
+#define MLX5_IB_NP_CNP_PRIO_MODE_ATTR BIT(4)
+
+static enum mlx5_ib_cong_node_type
+mlx5_ib_param_to_node(enum mlx5_ib_dbg_cc_types param_offset)
+{
+ if (param_offset >= MLX5_IB_DBG_CC_RP_CLAMP_TGT_RATE &&
+ param_offset <= MLX5_IB_DBG_CC_RP_GD)
+ return MLX5_IB_RROCE_ECN_RP;
+ else
+ return MLX5_IB_RROCE_ECN_NP;
+}
+
+static u32 mlx5_get_cc_param_val(void *field, int offset)
+{
+ switch (offset) {
+ case MLX5_IB_DBG_CC_RP_CLAMP_TGT_RATE:
+ return MLX5_GET(cong_control_r_roce_ecn_rp, field,
+ clamp_tgt_rate);
+ case MLX5_IB_DBG_CC_RP_CLAMP_TGT_RATE_ATI:
+ return MLX5_GET(cong_control_r_roce_ecn_rp, field,
+ clamp_tgt_rate_after_time_inc);
+ case MLX5_IB_DBG_CC_RP_TIME_RESET:
+ return MLX5_GET(cong_control_r_roce_ecn_rp, field,
+ rpg_time_reset);
+ case MLX5_IB_DBG_CC_RP_BYTE_RESET:
+ return MLX5_GET(cong_control_r_roce_ecn_rp, field,
+ rpg_byte_reset);
+ case MLX5_IB_DBG_CC_RP_THRESHOLD:
+ return MLX5_GET(cong_control_r_roce_ecn_rp, field,
+ rpg_threshold);
+ case MLX5_IB_DBG_CC_RP_AI_RATE:
+ return MLX5_GET(cong_control_r_roce_ecn_rp, field,
+ rpg_ai_rate);
+ case MLX5_IB_DBG_CC_RP_HAI_RATE:
+ return MLX5_GET(cong_control_r_roce_ecn_rp, field,
+ rpg_hai_rate);
+ case MLX5_IB_DBG_CC_RP_MIN_DEC_FAC:
+ return MLX5_GET(cong_control_r_roce_ecn_rp, field,
+ rpg_min_dec_fac);
+ case MLX5_IB_DBG_CC_RP_MIN_RATE:
+ return MLX5_GET(cong_control_r_roce_ecn_rp, field,
+ rpg_min_rate);
+ case MLX5_IB_DBG_CC_RP_RATE_TO_SET_ON_FIRST_CNP:
+ return MLX5_GET(cong_control_r_roce_ecn_rp, field,
+ rate_to_set_on_first_cnp);
+ case MLX5_IB_DBG_CC_RP_DCE_TCP_G:
+ return MLX5_GET(cong_control_r_roce_ecn_rp, field,
+ dce_tcp_g);
+ case MLX5_IB_DBG_CC_RP_DCE_TCP_RTT:
+ return MLX5_GET(cong_control_r_roce_ecn_rp, field,
+ dce_tcp_rtt);
+ case MLX5_IB_DBG_CC_RP_RATE_REDUCE_MONITOR_PERIOD:
+ return MLX5_GET(cong_control_r_roce_ecn_rp, field,
+ rate_reduce_monitor_period);
+ case MLX5_IB_DBG_CC_RP_INITIAL_ALPHA_VALUE:
+ return MLX5_GET(cong_control_r_roce_ecn_rp, field,
+ initial_alpha_value);
+ case MLX5_IB_DBG_CC_RP_GD:
+ return MLX5_GET(cong_control_r_roce_ecn_rp, field,
+ rpg_gd);
+ case MLX5_IB_DBG_CC_NP_CNP_DSCP:
+ return MLX5_GET(cong_control_r_roce_ecn_np, field,
+ cnp_dscp);
+ case MLX5_IB_DBG_CC_NP_CNP_PRIO_MODE:
+ return MLX5_GET(cong_control_r_roce_ecn_np, field,
+ cnp_prio_mode);
+ case MLX5_IB_DBG_CC_NP_CNP_PRIO:
+ return MLX5_GET(cong_control_r_roce_ecn_np, field,
+ cnp_802p_prio);
+ default:
+ return 0;
+ }
+}
+
+static void mlx5_ib_set_cc_param_mask_val(void *field, int offset,
+ u32 var, u32 *attr_mask)
+{
+ switch (offset) {
+ case MLX5_IB_DBG_CC_RP_CLAMP_TGT_RATE:
+ *attr_mask |= MLX5_IB_RP_CLAMP_TGT_RATE_ATTR;
+ MLX5_SET(cong_control_r_roce_ecn_rp, field,
+ clamp_tgt_rate, var);
+ break;
+ case MLX5_IB_DBG_CC_RP_CLAMP_TGT_RATE_ATI:
+ *attr_mask |= MLX5_IB_RP_CLAMP_TGT_RATE_ATI_ATTR;
+ MLX5_SET(cong_control_r_roce_ecn_rp, field,
+ clamp_tgt_rate_after_time_inc, var);
+ break;
+ case MLX5_IB_DBG_CC_RP_TIME_RESET:
+ *attr_mask |= MLX5_IB_RP_TIME_RESET_ATTR;
+ MLX5_SET(cong_control_r_roce_ecn_rp, field,
+ rpg_time_reset, var);
+ break;
+ case MLX5_IB_DBG_CC_RP_BYTE_RESET:
+ *attr_mask |= MLX5_IB_RP_BYTE_RESET_ATTR;
+ MLX5_SET(cong_control_r_roce_ecn_rp, field,
+ rpg_byte_reset, var);
+ break;
+ case MLX5_IB_DBG_CC_RP_THRESHOLD:
+ *attr_mask |= MLX5_IB_RP_THRESHOLD_ATTR;
+ MLX5_SET(cong_control_r_roce_ecn_rp, field,
+ rpg_threshold, var);
+ break;
+ case MLX5_IB_DBG_CC_RP_AI_RATE:
+ *attr_mask |= MLX5_IB_RP_AI_RATE_ATTR;
+ MLX5_SET(cong_control_r_roce_ecn_rp, field,
+ rpg_ai_rate, var);
+ break;
+ case MLX5_IB_DBG_CC_RP_HAI_RATE:
+ *attr_mask |= MLX5_IB_RP_HAI_RATE_ATTR;
+ MLX5_SET(cong_control_r_roce_ecn_rp, field,
+ rpg_hai_rate, var);
+ break;
+ case MLX5_IB_DBG_CC_RP_MIN_DEC_FAC:
+ *attr_mask |= MLX5_IB_RP_MIN_DEC_FAC_ATTR;
+ MLX5_SET(cong_control_r_roce_ecn_rp, field,
+ rpg_min_dec_fac, var);
+ break;
+ case MLX5_IB_DBG_CC_RP_MIN_RATE:
+ *attr_mask |= MLX5_IB_RP_MIN_RATE_ATTR;
+ MLX5_SET(cong_control_r_roce_ecn_rp, field,
+ rpg_min_rate, var);
+ break;
+ case MLX5_IB_DBG_CC_RP_RATE_TO_SET_ON_FIRST_CNP:
+ *attr_mask |= MLX5_IB_RP_RATE_TO_SET_ON_FIRST_CNP_ATTR;
+ MLX5_SET(cong_control_r_roce_ecn_rp, field,
+ rate_to_set_on_first_cnp, var);
+ break;
+ case MLX5_IB_DBG_CC_RP_DCE_TCP_G:
+ *attr_mask |= MLX5_IB_RP_DCE_TCP_G_ATTR;
+ MLX5_SET(cong_control_r_roce_ecn_rp, field,
+ dce_tcp_g, var);
+ break;
+ case MLX5_IB_DBG_CC_RP_DCE_TCP_RTT:
+ *attr_mask |= MLX5_IB_RP_DCE_TCP_RTT_ATTR;
+ MLX5_SET(cong_control_r_roce_ecn_rp, field,
+ dce_tcp_rtt, var);
+ break;
+ case MLX5_IB_DBG_CC_RP_RATE_REDUCE_MONITOR_PERIOD:
+ *attr_mask |= MLX5_IB_RP_RATE_REDUCE_MONITOR_PERIOD_ATTR;
+ MLX5_SET(cong_control_r_roce_ecn_rp, field,
+ rate_reduce_monitor_period, var);
+ break;
+ case MLX5_IB_DBG_CC_RP_INITIAL_ALPHA_VALUE:
+ *attr_mask |= MLX5_IB_RP_INITIAL_ALPHA_VALUE_ATTR;
+ MLX5_SET(cong_control_r_roce_ecn_rp, field,
+ initial_alpha_value, var);
+ break;
+ case MLX5_IB_DBG_CC_RP_GD:
+ *attr_mask |= MLX5_IB_RP_GD_ATTR;
+ MLX5_SET(cong_control_r_roce_ecn_rp, field,
+ rpg_gd, var);
+ break;
+ case MLX5_IB_DBG_CC_NP_CNP_DSCP:
+ *attr_mask |= MLX5_IB_NP_CNP_DSCP_ATTR;
+ MLX5_SET(cong_control_r_roce_ecn_np, field, cnp_dscp, var);
+ break;
+ case MLX5_IB_DBG_CC_NP_CNP_PRIO_MODE:
+ *attr_mask |= MLX5_IB_NP_CNP_PRIO_MODE_ATTR;
+ MLX5_SET(cong_control_r_roce_ecn_np, field, cnp_prio_mode, var);
+ break;
+ case MLX5_IB_DBG_CC_NP_CNP_PRIO:
+ *attr_mask |= MLX5_IB_NP_CNP_PRIO_MODE_ATTR;
+ MLX5_SET(cong_control_r_roce_ecn_np, field, cnp_prio_mode, 0);
+ MLX5_SET(cong_control_r_roce_ecn_np, field, cnp_802p_prio, var);
+ break;
+ }
+}
+
+static int mlx5_ib_get_cc_params(struct mlx5_ib_dev *dev, int offset, u32 *var)
+{
+ int outlen = MLX5_ST_SZ_BYTES(query_cong_params_out);
+ void *out;
+ void *field;
+ int err;
+ enum mlx5_ib_cong_node_type node;
+
+ out = kvzalloc(outlen, GFP_KERNEL);
+ if (!out)
+ return -ENOMEM;
+
+ node = mlx5_ib_param_to_node(offset);
+
+ err = mlx5_cmd_query_cong_params(dev->mdev, node, out, outlen);
+ if (err)
+ goto free;
+
+ field = MLX5_ADDR_OF(query_cong_params_out, out, congestion_parameters);
+ *var = mlx5_get_cc_param_val(field, offset);
+
+free:
+ kvfree(out);
+ return err;
+}
+
+static int mlx5_ib_set_cc_params(struct mlx5_ib_dev *dev, int offset, u32 var)
+{
+ int inlen = MLX5_ST_SZ_BYTES(modify_cong_params_in);
+ void *in;
+ void *field;
+ enum mlx5_ib_cong_node_type node;
+ u32 attr_mask = 0;
+ int err;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(modify_cong_params_in, in, opcode,
+ MLX5_CMD_OP_MODIFY_CONG_PARAMS);
+
+ node = mlx5_ib_param_to_node(offset);
+ MLX5_SET(modify_cong_params_in, in, cong_protocol, node);
+
+ field = MLX5_ADDR_OF(modify_cong_params_in, in, congestion_parameters);
+ mlx5_ib_set_cc_param_mask_val(field, offset, var, &attr_mask);
+
+ field = MLX5_ADDR_OF(modify_cong_params_in, in, field_select);
+ MLX5_SET(field_select_r_roce_rp, field, field_select_r_roce_rp,
+ attr_mask);
+
+ err = mlx5_cmd_modify_cong_params(dev->mdev, in, inlen);
+ kvfree(in);
+ return err;
+}
+
+static ssize_t set_param(struct file *filp, const char __user *buf,
+ size_t count, loff_t *pos)
+{
+ struct mlx5_ib_dbg_param *param = filp->private_data;
+ int offset = param->offset;
+ char lbuf[11] = { };
+ u32 var;
+ int ret;
+
+ if (count > sizeof(lbuf))
+ return -EINVAL;
+
+ if (copy_from_user(lbuf, buf, count))
+ return -EFAULT;
+
+ lbuf[sizeof(lbuf) - 1] = '\0';
+
+ if (kstrtou32(lbuf, 0, &var))
+ return -EINVAL;
+
+ ret = mlx5_ib_set_cc_params(param->dev, offset, var);
+ return ret ? ret : count;
+}
+
+static ssize_t get_param(struct file *filp, char __user *buf, size_t count,
+ loff_t *pos)
+{
+ struct mlx5_ib_dbg_param *param = filp->private_data;
+ int offset = param->offset;
+ u32 var = 0;
+ int ret;
+ char lbuf[11];
+
+ if (*pos)
+ return 0;
+
+ ret = mlx5_ib_get_cc_params(param->dev, offset, &var);
+ if (ret)
+ return ret;
+
+ ret = snprintf(lbuf, sizeof(lbuf), "%d\n", var);
+ if (ret < 0)
+ return ret;
+
+ if (copy_to_user(buf, lbuf, ret))
+ return -EFAULT;
+
+ *pos += ret;
+ return ret;
+}
+
+static const struct file_operations dbg_cc_fops = {
+ .owner = THIS_MODULE,
+ .open = simple_open,
+ .write = set_param,
+ .read = get_param,
+};
+
+void mlx5_ib_cleanup_cong_debugfs(struct mlx5_ib_dev *dev)
+{
+ if (!mlx5_debugfs_root ||
+ !dev->dbg_cc_params ||
+ !dev->dbg_cc_params->root)
+ return;
+
+ debugfs_remove_recursive(dev->dbg_cc_params->root);
+ kfree(dev->dbg_cc_params);
+ dev->dbg_cc_params = NULL;
+}
+
+int mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev)
+{
+ struct mlx5_ib_dbg_cc_params *dbg_cc_params;
+ int i;
+
+ if (!mlx5_debugfs_root)
+ goto out;
+
+ if (!MLX5_CAP_GEN(dev->mdev, cc_query_allowed) ||
+ !MLX5_CAP_GEN(dev->mdev, cc_modify_allowed))
+ goto out;
+
+ dbg_cc_params = kzalloc(sizeof(*dbg_cc_params), GFP_KERNEL);
+ if (!dbg_cc_params)
+ goto out;
+
+ dev->dbg_cc_params = dbg_cc_params;
+
+ dbg_cc_params->root = debugfs_create_dir("cc_params",
+ dev->mdev->priv.dbg_root);
+ if (!dbg_cc_params->root)
+ goto err;
+
+ for (i = 0; i < MLX5_IB_DBG_CC_MAX; i++) {
+ dbg_cc_params->params[i].offset = i;
+ dbg_cc_params->params[i].dev = dev;
+ dbg_cc_params->params[i].dentry =
+ debugfs_create_file(mlx5_ib_dbg_cc_name[i],
+ 0600, dbg_cc_params->root,
+ &dbg_cc_params->params[i],
+ &dbg_cc_fops);
+ if (!dbg_cc_params->params[i].dentry)
+ goto err;
+ }
+out: return 0;
+
+err:
+ mlx5_ib_warn(dev, "cong debugfs failure\n");
+ mlx5_ib_cleanup_cong_debugfs(dev);
+ /*
+ * We don't want to fail driver if debugfs failed to initialize,
+ * so we are not forwarding error to the user.
+ */
+ return 0;
+}
diff --git a/drivers/infiniband/hw/mlx5/ib_virt.c b/drivers/infiniband/hw/mlx5/ib_virt.c
index c1b9de8..649a336 100644
--- a/drivers/infiniband/hw/mlx5/ib_virt.c
+++ b/drivers/infiniband/hw/mlx5/ib_virt.c
@@ -96,6 +96,7 @@ int mlx5_ib_set_vf_link_state(struct ib_device *device, int vf,
struct mlx5_ib_dev *dev = to_mdev(device);
struct mlx5_core_dev *mdev = dev->mdev;
struct mlx5_hca_vport_context *in;
+ struct mlx5_vf_context *vfs_ctx = mdev->priv.sriov.vfs_ctx;
int err;
in = kzalloc(sizeof(*in), GFP_KERNEL);
@@ -109,6 +110,8 @@ int mlx5_ib_set_vf_link_state(struct ib_device *device, int vf,
}
in->field_select = MLX5_HCA_VPORT_SEL_STATE_POLICY;
err = mlx5_core_modify_hca_vport_context(mdev, 1, 1, vf + 1, in);
+ if (!err)
+ vfs_ctx[vf].policy = in->policy;
out:
kfree(in);
@@ -151,6 +154,7 @@ static int set_vf_node_guid(struct ib_device *device, int vf, u8 port, u64 guid)
struct mlx5_ib_dev *dev = to_mdev(device);
struct mlx5_core_dev *mdev = dev->mdev;
struct mlx5_hca_vport_context *in;
+ struct mlx5_vf_context *vfs_ctx = mdev->priv.sriov.vfs_ctx;
int err;
in = kzalloc(sizeof(*in), GFP_KERNEL);
@@ -160,6 +164,8 @@ static int set_vf_node_guid(struct ib_device *device, int vf, u8 port, u64 guid)
in->field_select = MLX5_HCA_VPORT_SEL_NODE_GUID;
in->node_guid = guid;
err = mlx5_core_modify_hca_vport_context(mdev, 1, 1, vf + 1, in);
+ if (!err)
+ vfs_ctx[vf].node_guid = guid;
kfree(in);
return err;
}
@@ -169,6 +175,7 @@ static int set_vf_port_guid(struct ib_device *device, int vf, u8 port, u64 guid)
struct mlx5_ib_dev *dev = to_mdev(device);
struct mlx5_core_dev *mdev = dev->mdev;
struct mlx5_hca_vport_context *in;
+ struct mlx5_vf_context *vfs_ctx = mdev->priv.sriov.vfs_ctx;
int err;
in = kzalloc(sizeof(*in), GFP_KERNEL);
@@ -178,6 +185,8 @@ static int set_vf_port_guid(struct ib_device *device, int vf, u8 port, u64 guid)
in->field_select = MLX5_HCA_VPORT_SEL_PORT_GUID;
in->port_guid = guid;
err = mlx5_core_modify_hca_vport_context(mdev, 1, 1, vf + 1, in);
+ if (!err)
+ vfs_ctx[vf].port_guid = guid;
kfree(in);
return err;
}
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index fe60c36..9279631 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -30,6 +30,7 @@
* SOFTWARE.
*/
+#include <linux/debugfs.h>
#include <linux/highmem.h>
#include <linux/module.h>
#include <linux/init.h>
@@ -58,6 +59,7 @@
#include <linux/mlx5/vport.h>
#include "mlx5_ib.h"
#include "cmd.h"
+#include <linux/mlx5/vport.h>
#define DRIVER_NAME "mlx5_ib"
#define DRIVER_VERSION "5.0-0"
@@ -96,6 +98,20 @@ mlx5_ib_port_link_layer(struct ib_device *device, u8 port_num)
return mlx5_port_type_cap_to_rdma_ll(port_type_cap);
}
+static int get_port_state(struct ib_device *ibdev,
+ u8 port_num,
+ enum ib_port_state *state)
+{
+ struct ib_port_attr attr;
+ int ret;
+
+ memset(&attr, 0, sizeof(attr));
+ ret = mlx5_ib_query_port(ibdev, port_num, &attr);
+ if (!ret)
+ *state = attr.state;
+ return ret;
+}
+
static int mlx5_netdev_event(struct notifier_block *this,
unsigned long event, void *ptr)
{
@@ -113,6 +129,7 @@ static int mlx5_netdev_event(struct notifier_block *this,
write_unlock(&ibdev->roce.netdev_lock);
break;
+ case NETDEV_CHANGE:
case NETDEV_UP:
case NETDEV_DOWN: {
struct net_device *lag_ndev = mlx5_lag_get_roce_netdev(ibdev->mdev);
@@ -126,10 +143,23 @@ static int mlx5_netdev_event(struct notifier_block *this,
if ((upper == ndev || (!upper && ndev == ibdev->roce.netdev))
&& ibdev->ib_active) {
struct ib_event ibev = { };
+ enum ib_port_state port_state;
+ if (get_port_state(&ibdev->ib_dev, 1, &port_state))
+ return NOTIFY_DONE;
+
+ if (ibdev->roce.last_port_state == port_state)
+ return NOTIFY_DONE;
+
+ ibdev->roce.last_port_state = port_state;
ibev.device = &ibdev->ib_dev;
- ibev.event = (event == NETDEV_UP) ?
- IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR;
+ if (port_state == IB_PORT_DOWN)
+ ibev.event = IB_EVENT_PORT_ERR;
+ else if (port_state == IB_PORT_ACTIVE)
+ ibev.event = IB_EVENT_PORT_ACTIVE;
+ else
+ return NOTIFY_DONE;
+
ibev.element.port_num = 1;
ib_dispatch_event(&ibev);
}
@@ -667,6 +697,14 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
props->device_cap_flags |= IB_DEVICE_UD_TSO;
}
+ if (MLX5_CAP_GEN(dev->mdev, rq_delay_drop) &&
+ MLX5_CAP_GEN(dev->mdev, general_notification_event))
+ props->raw_packet_caps |= IB_RAW_PACKET_CAP_DELAY_DROP;
+
+ if (MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads) &&
+ MLX5_CAP_IPOIB_ENHANCED(mdev, csum_cap))
+ props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM;
+
if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads) &&
MLX5_CAP_ETH(dev->mdev, scatter_fcs)) {
/* Legacy bit to support old userspace libraries */
@@ -1186,6 +1224,45 @@ static int deallocate_uars(struct mlx5_ib_dev *dev, struct mlx5_ib_ucontext *con
return 0;
}
+static int mlx5_ib_alloc_transport_domain(struct mlx5_ib_dev *dev, u32 *tdn)
+{
+ int err;
+
+ err = mlx5_core_alloc_transport_domain(dev->mdev, tdn);
+ if (err)
+ return err;
+
+ if ((MLX5_CAP_GEN(dev->mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) ||
+ !MLX5_CAP_GEN(dev->mdev, disable_local_lb))
+ return err;
+
+ mutex_lock(&dev->lb_mutex);
+ dev->user_td++;
+
+ if (dev->user_td == 2)
+ err = mlx5_nic_vport_update_local_lb(dev->mdev, true);
+
+ mutex_unlock(&dev->lb_mutex);
+ return err;
+}
+
+static void mlx5_ib_dealloc_transport_domain(struct mlx5_ib_dev *dev, u32 tdn)
+{
+ mlx5_core_dealloc_transport_domain(dev->mdev, tdn);
+
+ if ((MLX5_CAP_GEN(dev->mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) ||
+ !MLX5_CAP_GEN(dev->mdev, disable_local_lb))
+ return;
+
+ mutex_lock(&dev->lb_mutex);
+ dev->user_td--;
+
+ if (dev->user_td < 2)
+ mlx5_nic_vport_update_local_lb(dev->mdev, false);
+
+ mutex_unlock(&dev->lb_mutex);
+}
+
static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
struct ib_udata *udata)
{
@@ -1294,8 +1371,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
mutex_init(&context->upd_xlt_page_mutex);
if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) {
- err = mlx5_core_alloc_transport_domain(dev->mdev,
- &context->tdn);
+ err = mlx5_ib_alloc_transport_domain(dev, &context->tdn);
if (err)
goto out_page;
}
@@ -1361,7 +1437,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
out_td:
if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
- mlx5_core_dealloc_transport_domain(dev->mdev, context->tdn);
+ mlx5_ib_dealloc_transport_domain(dev, context->tdn);
out_page:
free_page(context->upd_xlt_page);
@@ -1389,7 +1465,7 @@ static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
bfregi = &context->bfregi;
if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
- mlx5_core_dealloc_transport_domain(dev->mdev, context->tdn);
+ mlx5_ib_dealloc_transport_domain(dev, context->tdn);
free_page(context->upd_xlt_page);
deallocate_uars(dev, context);
@@ -2029,21 +2105,32 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c,
*/
static bool flow_is_multicast_only(struct ib_flow_attr *ib_attr)
{
- struct ib_flow_spec_eth *eth_spec;
+ union ib_flow_spec *flow_spec;
if (ib_attr->type != IB_FLOW_ATTR_NORMAL ||
- ib_attr->size < sizeof(struct ib_flow_attr) +
- sizeof(struct ib_flow_spec_eth) ||
ib_attr->num_of_specs < 1)
return false;
- eth_spec = (struct ib_flow_spec_eth *)(ib_attr + 1);
- if (eth_spec->type != IB_FLOW_SPEC_ETH ||
- eth_spec->size != sizeof(*eth_spec))
- return false;
+ flow_spec = (union ib_flow_spec *)(ib_attr + 1);
+ if (flow_spec->type == IB_FLOW_SPEC_IPV4) {
+ struct ib_flow_spec_ipv4 *ipv4_spec;
- return is_multicast_ether_addr(eth_spec->mask.dst_mac) &&
- is_multicast_ether_addr(eth_spec->val.dst_mac);
+ ipv4_spec = (struct ib_flow_spec_ipv4 *)flow_spec;
+ if (ipv4_is_multicast(ipv4_spec->val.dst_ip))
+ return true;
+
+ return false;
+ }
+
+ if (flow_spec->type == IB_FLOW_SPEC_ETH) {
+ struct ib_flow_spec_eth *eth_spec;
+
+ eth_spec = (struct ib_flow_spec_eth *)flow_spec;
+ return is_multicast_ether_addr(eth_spec->mask.dst_mac) &&
+ is_multicast_ether_addr(eth_spec->val.dst_mac);
+ }
+
+ return false;
}
static bool is_valid_ethertype(struct mlx5_core_dev *mdev,
@@ -2521,8 +2608,14 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
static int mlx5_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
{
struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
+ struct mlx5_ib_qp *mqp = to_mqp(ibqp);
int err;
+ if (mqp->flags & MLX5_IB_QP_UNDERLAY) {
+ mlx5_ib_dbg(dev, "Attaching a multi cast group to underlay QP is not supported\n");
+ return -EOPNOTSUPP;
+ }
+
err = mlx5_core_attach_mcg(dev->mdev, gid, ibqp->qp_num);
if (err)
mlx5_ib_warn(dev, "failed attaching QPN 0x%x, MGID %pI6\n",
@@ -2684,6 +2777,26 @@ static void mlx5_ib_handle_internal_error(struct mlx5_ib_dev *ibdev)
spin_unlock_irqrestore(&ibdev->reset_flow_resource_lock, flags);
}
+static void delay_drop_handler(struct work_struct *work)
+{
+ int err;
+ struct mlx5_ib_delay_drop *delay_drop =
+ container_of(work, struct mlx5_ib_delay_drop,
+ delay_drop_work);
+
+ atomic_inc(&delay_drop->events_cnt);
+
+ mutex_lock(&delay_drop->lock);
+ err = mlx5_core_set_delay_drop(delay_drop->dev->mdev,
+ delay_drop->timeout);
+ if (err) {
+ mlx5_ib_warn(delay_drop->dev, "Failed to set delay drop, timeout=%u\n",
+ delay_drop->timeout);
+ delay_drop->activate = false;
+ }
+ mutex_unlock(&delay_drop->lock);
+}
+
static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
enum mlx5_dev_event event, unsigned long param)
{
@@ -2736,8 +2849,11 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
ibev.event = IB_EVENT_CLIENT_REREGISTER;
port = (u8)param;
break;
+ case MLX5_DEV_EVENT_DELAY_DROP_TIMEOUT:
+ schedule_work(&ibdev->delay_drop.delay_drop_work);
+ goto out;
default:
- return;
+ goto out;
}
ibev.device = &ibdev->ib_dev;
@@ -2745,7 +2861,7 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
if (port < 1 || port > ibdev->num_ports) {
mlx5_ib_warn(ibdev, "warning: event on port %d\n", port);
- return;
+ goto out;
}
if (ibdev->ib_active)
@@ -2753,6 +2869,9 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
if (fatal)
ibdev->ib_active = false;
+
+out:
+ return;
}
static int set_has_smi_cap(struct mlx5_ib_dev *dev)
@@ -3312,6 +3431,17 @@ static const struct mlx5_ib_counter cong_cnts[] = {
INIT_CONG_COUNTER(np_cnp_sent),
};
+static const struct mlx5_ib_counter extended_err_cnts[] = {
+ INIT_Q_COUNTER(resp_local_length_error),
+ INIT_Q_COUNTER(resp_cqe_error),
+ INIT_Q_COUNTER(req_cqe_error),
+ INIT_Q_COUNTER(req_remote_invalid_request),
+ INIT_Q_COUNTER(req_remote_access_errors),
+ INIT_Q_COUNTER(resp_remote_access_errors),
+ INIT_Q_COUNTER(resp_cqe_flush_error),
+ INIT_Q_COUNTER(req_cqe_flush_error),
+};
+
static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev)
{
unsigned int i;
@@ -3336,6 +3466,10 @@ static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev,
if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters))
num_counters += ARRAY_SIZE(retrans_q_cnts);
+
+ if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters))
+ num_counters += ARRAY_SIZE(extended_err_cnts);
+
cnts->num_q_counters = num_counters;
if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
@@ -3385,6 +3519,13 @@ static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev,
}
}
+ if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters)) {
+ for (i = 0; i < ARRAY_SIZE(extended_err_cnts); i++, j++) {
+ names[j] = extended_err_cnts[i].name;
+ offsets[j] = extended_err_cnts[i].offset;
+ }
+ }
+
if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
for (i = 0; i < ARRAY_SIZE(cong_cnts); i++, j++) {
names[j] = cong_cnts[i].name;
@@ -3555,6 +3696,126 @@ mlx5_ib_alloc_rdma_netdev(struct ib_device *hca,
return netdev;
}
+static void delay_drop_debugfs_cleanup(struct mlx5_ib_dev *dev)
+{
+ if (!dev->delay_drop.dbg)
+ return;
+ debugfs_remove_recursive(dev->delay_drop.dbg->dir_debugfs);
+ kfree(dev->delay_drop.dbg);
+ dev->delay_drop.dbg = NULL;
+}
+
+static void cancel_delay_drop(struct mlx5_ib_dev *dev)
+{
+ if (!(dev->ib_dev.attrs.raw_packet_caps & IB_RAW_PACKET_CAP_DELAY_DROP))
+ return;
+
+ cancel_work_sync(&dev->delay_drop.delay_drop_work);
+ delay_drop_debugfs_cleanup(dev);
+}
+
+static ssize_t delay_drop_timeout_read(struct file *filp, char __user *buf,
+ size_t count, loff_t *pos)
+{
+ struct mlx5_ib_delay_drop *delay_drop = filp->private_data;
+ char lbuf[20];
+ int len;
+
+ len = snprintf(lbuf, sizeof(lbuf), "%u\n", delay_drop->timeout);
+ return simple_read_from_buffer(buf, count, pos, lbuf, len);
+}
+
+static ssize_t delay_drop_timeout_write(struct file *filp, const char __user *buf,
+ size_t count, loff_t *pos)
+{
+ struct mlx5_ib_delay_drop *delay_drop = filp->private_data;
+ u32 timeout;
+ u32 var;
+
+ if (kstrtouint_from_user(buf, count, 0, &var))
+ return -EFAULT;
+
+ timeout = min_t(u32, roundup(var, 100), MLX5_MAX_DELAY_DROP_TIMEOUT_MS *
+ 1000);
+ if (timeout != var)
+ mlx5_ib_dbg(delay_drop->dev, "Round delay drop timeout to %u usec\n",
+ timeout);
+
+ delay_drop->timeout = timeout;
+
+ return count;
+}
+
+static const struct file_operations fops_delay_drop_timeout = {
+ .owner = THIS_MODULE,
+ .open = simple_open,
+ .write = delay_drop_timeout_write,
+ .read = delay_drop_timeout_read,
+};
+
+static int delay_drop_debugfs_init(struct mlx5_ib_dev *dev)
+{
+ struct mlx5_ib_dbg_delay_drop *dbg;
+
+ if (!mlx5_debugfs_root)
+ return 0;
+
+ dbg = kzalloc(sizeof(*dbg), GFP_KERNEL);
+ if (!dbg)
+ return -ENOMEM;
+
+ dbg->dir_debugfs =
+ debugfs_create_dir("delay_drop",
+ dev->mdev->priv.dbg_root);
+ if (!dbg->dir_debugfs)
+ return -ENOMEM;
+
+ dbg->events_cnt_debugfs =
+ debugfs_create_atomic_t("num_timeout_events", 0400,
+ dbg->dir_debugfs,
+ &dev->delay_drop.events_cnt);
+ if (!dbg->events_cnt_debugfs)
+ goto out_debugfs;
+
+ dbg->rqs_cnt_debugfs =
+ debugfs_create_atomic_t("num_rqs", 0400,
+ dbg->dir_debugfs,
+ &dev->delay_drop.rqs_cnt);
+ if (!dbg->rqs_cnt_debugfs)
+ goto out_debugfs;
+
+ dbg->timeout_debugfs =
+ debugfs_create_file("timeout", 0600,
+ dbg->dir_debugfs,
+ &dev->delay_drop,
+ &fops_delay_drop_timeout);
+ if (!dbg->timeout_debugfs)
+ goto out_debugfs;
+
+ return 0;
+
+out_debugfs:
+ delay_drop_debugfs_cleanup(dev);
+ return -ENOMEM;
+}
+
+static void init_delay_drop(struct mlx5_ib_dev *dev)
+{
+ if (!(dev->ib_dev.attrs.raw_packet_caps & IB_RAW_PACKET_CAP_DELAY_DROP))
+ return;
+
+ mutex_init(&dev->delay_drop.lock);
+ dev->delay_drop.dev = dev;
+ dev->delay_drop.activate = false;
+ dev->delay_drop.timeout = MLX5_MAX_DELAY_DROP_TIMEOUT_MS * 1000;
+ INIT_WORK(&dev->delay_drop.delay_drop_work, delay_drop_handler);
+ atomic_set(&dev->delay_drop.rqs_cnt, 0);
+ atomic_set(&dev->delay_drop.events_cnt, 0);
+
+ if (delay_drop_debugfs_init(dev))
+ mlx5_ib_warn(dev, "Failed to init delay drop debugfs\n");
+}
+
static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
{
struct mlx5_ib_dev *dev;
@@ -3722,18 +3983,20 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
(1ull << IB_USER_VERBS_CMD_CLOSE_XRCD);
}
+ dev->ib_dev.create_flow = mlx5_ib_create_flow;
+ dev->ib_dev.destroy_flow = mlx5_ib_destroy_flow;
+ dev->ib_dev.uverbs_ex_cmd_mask |=
+ (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) |
+ (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW);
+
if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) ==
IB_LINK_LAYER_ETHERNET) {
- dev->ib_dev.create_flow = mlx5_ib_create_flow;
- dev->ib_dev.destroy_flow = mlx5_ib_destroy_flow;
dev->ib_dev.create_wq = mlx5_ib_create_wq;
dev->ib_dev.modify_wq = mlx5_ib_modify_wq;
dev->ib_dev.destroy_wq = mlx5_ib_destroy_wq;
dev->ib_dev.create_rwq_ind_table = mlx5_ib_create_rwq_ind_table;
dev->ib_dev.destroy_rwq_ind_table = mlx5_ib_destroy_rwq_ind_table;
dev->ib_dev.uverbs_ex_cmd_mask |=
- (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) |
- (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW) |
(1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) |
(1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) |
(1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) |
@@ -3753,6 +4016,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
err = mlx5_enable_eth(dev);
if (err)
goto err_free_port;
+ dev->roce.last_port_state = IB_PORT_DOWN;
}
err = create_dev_resources(&dev->devr);
@@ -3769,9 +4033,13 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
goto err_odp;
}
+ err = mlx5_ib_init_cong_debugfs(dev);
+ if (err)
+ goto err_cnt;
+
dev->mdev->priv.uar = mlx5_get_uars_page(dev->mdev);
if (!dev->mdev->priv.uar)
- goto err_cnt;
+ goto err_cong;
err = mlx5_alloc_bfreg(dev->mdev, &dev->bfreg, false, false);
if (err)
@@ -3789,18 +4057,25 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
if (err)
goto err_dev;
+ init_delay_drop(dev);
+
for (i = 0; i < ARRAY_SIZE(mlx5_class_attributes); i++) {
err = device_create_file(&dev->ib_dev.dev,
mlx5_class_attributes[i]);
if (err)
- goto err_umrc;
+ goto err_delay_drop;
}
+ if ((MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) &&
+ MLX5_CAP_GEN(mdev, disable_local_lb))
+ mutex_init(&dev->lb_mutex);
+
dev->ib_active = true;
return dev;
-err_umrc:
+err_delay_drop:
+ cancel_delay_drop(dev);
destroy_umrc_res(dev);
err_dev:
@@ -3816,6 +4091,8 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
mlx5_put_uars_page(dev->mdev, dev->mdev->priv.uar);
err_cnt:
+ mlx5_ib_cleanup_cong_debugfs(dev);
+err_cong:
if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
mlx5_ib_dealloc_counters(dev);
@@ -3845,11 +4122,13 @@ static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context)
struct mlx5_ib_dev *dev = context;
enum rdma_link_layer ll = mlx5_ib_port_link_layer(&dev->ib_dev, 1);
+ cancel_delay_drop(dev);
mlx5_remove_netdev_notifier(dev);
ib_unregister_device(&dev->ib_dev);
mlx5_free_bfreg(dev->mdev, &dev->fp_bfreg);
mlx5_free_bfreg(dev->mdev, &dev->bfreg);
mlx5_put_uars_page(dev->mdev, mdev->priv.uar);
+ mlx5_ib_cleanup_cong_debugfs(dev);
if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
mlx5_ib_dealloc_counters(dev);
destroy_umrc_res(dev);
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index bdcf254..7ac9910 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -247,6 +247,10 @@ struct mlx5_ib_wq {
void *qend;
};
+enum mlx5_ib_wq_flags {
+ MLX5_IB_WQ_FLAGS_DELAY_DROP = 0x1,
+};
+
struct mlx5_ib_rwq {
struct ib_wq ibwq;
struct mlx5_core_qp core_qp;
@@ -264,6 +268,7 @@ struct mlx5_ib_rwq {
u32 wqe_count;
u32 wqe_shift;
int wq_sig;
+ u32 create_flags; /* Use enum mlx5_ib_wq_flags */
};
enum {
@@ -378,6 +383,7 @@ struct mlx5_ib_qp {
struct list_head cq_recv_list;
struct list_head cq_send_list;
u32 rate_limit;
+ u32 underlay_qpn;
};
struct mlx5_ib_cq_buf {
@@ -399,6 +405,7 @@ enum mlx5_ib_qp_flags {
MLX5_IB_QP_CAP_SCATTER_FCS = 1 << 7,
MLX5_IB_QP_RSS = 1 << 8,
MLX5_IB_QP_CVLAN_STRIPPING = 1 << 9,
+ MLX5_IB_QP_UNDERLAY = 1 << 10,
};
struct mlx5_umr_wr {
@@ -616,6 +623,63 @@ struct mlx5_roce {
struct net_device *netdev;
struct notifier_block nb;
atomic_t next_port;
+ enum ib_port_state last_port_state;
+};
+
+struct mlx5_ib_dbg_param {
+ int offset;
+ struct mlx5_ib_dev *dev;
+ struct dentry *dentry;
+};
+
+enum mlx5_ib_dbg_cc_types {
+ MLX5_IB_DBG_CC_RP_CLAMP_TGT_RATE,
+ MLX5_IB_DBG_CC_RP_CLAMP_TGT_RATE_ATI,
+ MLX5_IB_DBG_CC_RP_TIME_RESET,
+ MLX5_IB_DBG_CC_RP_BYTE_RESET,
+ MLX5_IB_DBG_CC_RP_THRESHOLD,
+ MLX5_IB_DBG_CC_RP_AI_RATE,
+ MLX5_IB_DBG_CC_RP_HAI_RATE,
+ MLX5_IB_DBG_CC_RP_MIN_DEC_FAC,
+ MLX5_IB_DBG_CC_RP_MIN_RATE,
+ MLX5_IB_DBG_CC_RP_RATE_TO_SET_ON_FIRST_CNP,
+ MLX5_IB_DBG_CC_RP_DCE_TCP_G,
+ MLX5_IB_DBG_CC_RP_DCE_TCP_RTT,
+ MLX5_IB_DBG_CC_RP_RATE_REDUCE_MONITOR_PERIOD,
+ MLX5_IB_DBG_CC_RP_INITIAL_ALPHA_VALUE,
+ MLX5_IB_DBG_CC_RP_GD,
+ MLX5_IB_DBG_CC_NP_CNP_DSCP,
+ MLX5_IB_DBG_CC_NP_CNP_PRIO_MODE,
+ MLX5_IB_DBG_CC_NP_CNP_PRIO,
+ MLX5_IB_DBG_CC_MAX,
+};
+
+struct mlx5_ib_dbg_cc_params {
+ struct dentry *root;
+ struct mlx5_ib_dbg_param params[MLX5_IB_DBG_CC_MAX];
+};
+
+enum {
+ MLX5_MAX_DELAY_DROP_TIMEOUT_MS = 100,
+};
+
+struct mlx5_ib_dbg_delay_drop {
+ struct dentry *dir_debugfs;
+ struct dentry *rqs_cnt_debugfs;
+ struct dentry *events_cnt_debugfs;
+ struct dentry *timeout_debugfs;
+};
+
+struct mlx5_ib_delay_drop {
+ struct mlx5_ib_dev *dev;
+ struct work_struct delay_drop_work;
+ /* serialize setting of delay drop */
+ struct mutex lock;
+ u32 timeout;
+ bool activate;
+ atomic_t events_cnt;
+ atomic_t rqs_cnt;
+ struct mlx5_ib_dbg_delay_drop *dbg;
};
struct mlx5_ib_dev {
@@ -652,9 +716,15 @@ struct mlx5_ib_dev {
struct list_head qp_list;
/* Array with num_ports elements */
struct mlx5_ib_port *port;
- struct mlx5_sq_bfreg bfreg;
- struct mlx5_sq_bfreg fp_bfreg;
- u8 umr_fence;
+ struct mlx5_sq_bfreg bfreg;
+ struct mlx5_sq_bfreg fp_bfreg;
+ struct mlx5_ib_delay_drop delay_drop;
+ struct mlx5_ib_dbg_cc_params *dbg_cc_params;
+
+ /* protect the user_td */
+ struct mutex lb_mutex;
+ u32 user_td;
+ u8 umr_fence;
};
static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq)
@@ -904,6 +974,9 @@ __be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num,
int mlx5_get_roce_gid_type(struct mlx5_ib_dev *dev, u8 port_num,
int index, enum ib_gid_type *gid_type);
+void mlx5_ib_cleanup_cong_debugfs(struct mlx5_ib_dev *dev);
+int mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev);
+
/* GSI QP helper functions */
struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd,
struct ib_qp_init_attr *init_attr);
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 2c40a2e..a0eb2f9 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -48,6 +48,7 @@ enum {
#define MLX5_UMR_ALIGN 2048
static int clean_mr(struct mlx5_ib_mr *mr);
+static int max_umr_order(struct mlx5_ib_dev *dev);
static int use_umr(struct mlx5_ib_dev *dev, int order);
static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
@@ -491,16 +492,18 @@ static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order)
struct mlx5_mr_cache *cache = &dev->cache;
struct mlx5_ib_mr *mr = NULL;
struct mlx5_cache_ent *ent;
+ int last_umr_cache_entry;
int c;
int i;
c = order2idx(dev, order);
- if (c < 0 || c > MAX_UMR_CACHE_ENTRY) {
+ last_umr_cache_entry = order2idx(dev, max_umr_order(dev));
+ if (c < 0 || c > last_umr_cache_entry) {
mlx5_ib_warn(dev, "order %d, cache index %d\n", order, c);
return NULL;
}
- for (i = c; i < MAX_UMR_CACHE_ENTRY; i++) {
+ for (i = c; i <= last_umr_cache_entry; i++) {
ent = &cache->ent[i];
mlx5_ib_dbg(dev, "order %d, cache index %d\n", ent->order, i);
@@ -816,11 +819,16 @@ static int get_octo_len(u64 addr, u64 len, int page_size)
return (npages + 1) / 2;
}
-static int use_umr(struct mlx5_ib_dev *dev, int order)
+static int max_umr_order(struct mlx5_ib_dev *dev)
{
if (MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset))
- return order <= MAX_UMR_CACHE_ENTRY + 2;
- return order <= MLX5_MAX_UMR_SHIFT;
+ return MAX_UMR_CACHE_ENTRY + 2;
+ return MLX5_MAX_UMR_SHIFT;
+}
+
+static int use_umr(struct mlx5_ib_dev *dev, int order)
+{
+ return order <= max_umr_order(dev);
}
static int mr_umem_get(struct ib_pd *pd, u64 start, u64 length,
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index ae07467..3d701c7 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -939,7 +939,7 @@ static int mlx5_ib_mr_initiator_pfault_handler(
if (qp->ibqp.qp_type != IB_QPT_RC) {
av = *wqe;
- if (av->dqp_dct & be32_to_cpu(MLX5_WQE_AV_EXT))
+ if (av->dqp_dct & cpu_to_be32(MLX5_EXTENDED_UD_AV))
*wqe += sizeof(struct mlx5_av);
else
*wqe += sizeof(struct mlx5_base_av);
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 0889ff3..5c7ce9b 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -34,6 +34,7 @@
#include <rdma/ib_umem.h>
#include <rdma/ib_cache.h>
#include <rdma/ib_user_verbs.h>
+#include <linux/mlx5/fs.h>
#include "mlx5_ib.h"
/* not supported currently */
@@ -453,7 +454,8 @@ static int set_user_buf_size(struct mlx5_ib_dev *dev,
return -EINVAL;
}
- if (attr->qp_type == IB_QPT_RAW_PACKET) {
+ if (attr->qp_type == IB_QPT_RAW_PACKET ||
+ qp->flags & MLX5_IB_QP_UNDERLAY) {
base->ubuffer.buf_size = qp->rq.wqe_cnt << qp->rq.wqe_shift;
qp->raw_packet_qp.sq.ubuffer.buf_size = qp->sq.wqe_cnt << 6;
} else {
@@ -675,10 +677,14 @@ static int mlx5_ib_umem_get(struct mlx5_ib_dev *dev,
return err;
}
-static void destroy_user_rq(struct ib_pd *pd, struct mlx5_ib_rwq *rwq)
+static void destroy_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd,
+ struct mlx5_ib_rwq *rwq)
{
struct mlx5_ib_ucontext *context;
+ if (rwq->create_flags & MLX5_IB_WQ_FLAGS_DELAY_DROP)
+ atomic_dec(&dev->delay_drop.rqs_cnt);
+
context = to_mucontext(pd->uobject->context);
mlx5_ib_db_unmap_user(context, &rwq->db);
if (rwq->umem)
@@ -1021,12 +1027,16 @@ static int is_connected(enum ib_qp_type qp_type)
}
static int create_raw_packet_qp_tis(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_qp *qp,
struct mlx5_ib_sq *sq, u32 tdn)
{
u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {0};
void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
MLX5_SET(tisc, tisc, transport_domain, tdn);
+ if (qp->flags & MLX5_IB_QP_UNDERLAY)
+ MLX5_SET(tisc, tisc, underlay_qpn, qp->underlay_qpn);
+
return mlx5_core_create_tis(dev->mdev, in, sizeof(in), &sq->tisn);
}
@@ -1229,7 +1239,7 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
u32 tdn = mucontext->tdn;
if (qp->sq.wqe_cnt) {
- err = create_raw_packet_qp_tis(dev, sq, tdn);
+ err = create_raw_packet_qp_tis(dev, qp, sq, tdn);
if (err)
return err;
@@ -1502,10 +1512,6 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
u32 *in;
int err;
- base = init_attr->qp_type == IB_QPT_RAW_PACKET ?
- &qp->raw_packet_qp.rq.base :
- &qp->trans_qp.base;
-
mutex_init(&qp->mutex);
spin_lock_init(&qp->sq.lock);
spin_lock_init(&qp->rq.lock);
@@ -1587,10 +1593,28 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
qp->wq_sig = !!(ucmd.flags & MLX5_QP_FLAG_SIGNATURE);
qp->scat_cqe = !!(ucmd.flags & MLX5_QP_FLAG_SCATTER_CQE);
+
+ if (init_attr->create_flags & IB_QP_CREATE_SOURCE_QPN) {
+ if (init_attr->qp_type != IB_QPT_UD ||
+ (MLX5_CAP_GEN(dev->mdev, port_type) !=
+ MLX5_CAP_PORT_TYPE_IB) ||
+ !mlx5_get_flow_namespace(dev->mdev, MLX5_FLOW_NAMESPACE_BYPASS)) {
+ mlx5_ib_dbg(dev, "Source QP option isn't supported\n");
+ return -EOPNOTSUPP;
+ }
+
+ qp->flags |= MLX5_IB_QP_UNDERLAY;
+ qp->underlay_qpn = init_attr->source_qpn;
+ }
} else {
qp->wq_sig = !!wq_signature;
}
+ base = (init_attr->qp_type == IB_QPT_RAW_PACKET ||
+ qp->flags & MLX5_IB_QP_UNDERLAY) ?
+ &qp->raw_packet_qp.rq.base :
+ &qp->trans_qp.base;
+
qp->has_rq = qp_has_rq(init_attr);
err = set_rq_size(dev, &init_attr->cap, qp->has_rq,
qp, (pd && pd->uobject) ? &ucmd : NULL);
@@ -1741,7 +1765,8 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
qp->flags |= MLX5_IB_QP_LSO;
}
- if (init_attr->qp_type == IB_QPT_RAW_PACKET) {
+ if (init_attr->qp_type == IB_QPT_RAW_PACKET ||
+ qp->flags & MLX5_IB_QP_UNDERLAY) {
qp->raw_packet_qp.sq.ubuffer.buf_addr = ucmd.sq_buf_addr;
raw_packet_qp_copy_info(qp, &qp->raw_packet_qp);
err = create_raw_packet_qp(dev, qp, in, pd);
@@ -1893,7 +1918,7 @@ static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
{
struct mlx5_ib_cq *send_cq, *recv_cq;
- struct mlx5_ib_qp_base *base = &qp->trans_qp.base;
+ struct mlx5_ib_qp_base *base;
unsigned long flags;
int err;
@@ -1902,12 +1927,14 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
return;
}
- base = qp->ibqp.qp_type == IB_QPT_RAW_PACKET ?
+ base = (qp->ibqp.qp_type == IB_QPT_RAW_PACKET ||
+ qp->flags & MLX5_IB_QP_UNDERLAY) ?
&qp->raw_packet_qp.rq.base :
&qp->trans_qp.base;
if (qp->state != IB_QPS_RESET) {
- if (qp->ibqp.qp_type != IB_QPT_RAW_PACKET) {
+ if (qp->ibqp.qp_type != IB_QPT_RAW_PACKET &&
+ !(qp->flags & MLX5_IB_QP_UNDERLAY)) {
err = mlx5_core_qp_modify(dev->mdev,
MLX5_CMD_OP_2RST_QP, 0,
NULL, &base->mqp);
@@ -1946,7 +1973,8 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
mlx5_ib_unlock_cqs(send_cq, recv_cq);
spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags);
- if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) {
+ if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET ||
+ qp->flags & MLX5_IB_QP_UNDERLAY) {
destroy_raw_packet_qp(dev, qp);
} else {
err = mlx5_core_destroy_qp(dev->mdev, &base->mqp);
@@ -2702,7 +2730,8 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
if (is_sqp(ibqp->qp_type)) {
context->mtu_msgmax = (IB_MTU_256 << 5) | 8;
- } else if (ibqp->qp_type == IB_QPT_UD ||
+ } else if ((ibqp->qp_type == IB_QPT_UD &&
+ !(qp->flags & MLX5_IB_QP_UNDERLAY)) ||
ibqp->qp_type == MLX5_IB_QPT_REG_UMR) {
context->mtu_msgmax = (IB_MTU_4096 << 5) | 12;
} else if (attr_mask & IB_QP_PATH_MTU) {
@@ -2799,6 +2828,11 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
u8 port_num = (attr_mask & IB_QP_PORT ? attr->port_num :
qp->port) - 1;
+
+ /* Underlay port should be used - index 0 function per port */
+ if (qp->flags & MLX5_IB_QP_UNDERLAY)
+ port_num = 0;
+
mibport = &dev->port[port_num];
context->qp_counter_set_usr_page |=
cpu_to_be32((u32)(mibport->cnts.set_id) << 24);
@@ -2824,7 +2858,8 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
optpar = ib_mask_to_mlx5_opt(attr_mask);
optpar &= opt_mask[mlx5_cur][mlx5_new][mlx5_st];
- if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) {
+ if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET ||
+ qp->flags & MLX5_IB_QP_UNDERLAY) {
struct mlx5_modify_raw_qp_param raw_qp_param = {};
raw_qp_param.operation = op;
@@ -2913,7 +2948,13 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
ll = dev->ib_dev.get_link_layer(&dev->ib_dev, port);
}
- if (qp_type != MLX5_IB_QPT_REG_UMR &&
+ if (qp->flags & MLX5_IB_QP_UNDERLAY) {
+ if (attr_mask & ~(IB_QP_STATE | IB_QP_CUR_STATE)) {
+ mlx5_ib_dbg(dev, "invalid attr_mask 0x%x when underlay QP is used\n",
+ attr_mask);
+ goto out;
+ }
+ } else if (qp_type != MLX5_IB_QPT_REG_UMR &&
!ib_modify_qp_is_ok(cur_state, new_state, qp_type, attr_mask, ll)) {
mlx5_ib_dbg(dev, "invalid QP state transition from %d to %d, qp_type %d, attr_mask 0x%x\n",
cur_state, new_state, ibqp->qp_type, attr_mask);
@@ -4477,9 +4518,14 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
return mlx5_ib_gsi_query_qp(ibqp, qp_attr, qp_attr_mask,
qp_init_attr);
+ /* Not all of output fields are applicable, make sure to zero them */
+ memset(qp_init_attr, 0, sizeof(*qp_init_attr));
+ memset(qp_attr, 0, sizeof(*qp_attr));
+
mutex_lock(&qp->mutex);
- if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) {
+ if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET ||
+ qp->flags & MLX5_IB_QP_UNDERLAY) {
err = query_raw_packet_qp_state(dev, qp, &raw_packet_qp_state);
if (err)
goto out;
@@ -4597,6 +4643,27 @@ static void mlx5_ib_wq_event(struct mlx5_core_qp *core_qp, int type)
}
}
+static int set_delay_drop(struct mlx5_ib_dev *dev)
+{
+ int err = 0;
+
+ mutex_lock(&dev->delay_drop.lock);
+ if (dev->delay_drop.activate)
+ goto out;
+
+ err = mlx5_core_set_delay_drop(dev->mdev, dev->delay_drop.timeout);
+ if (err)
+ goto out;
+
+ dev->delay_drop.activate = true;
+out:
+ mutex_unlock(&dev->delay_drop.lock);
+
+ if (!err)
+ atomic_inc(&dev->delay_drop.rqs_cnt);
+ return err;
+}
+
static int create_rq(struct mlx5_ib_rwq *rwq, struct ib_pd *pd,
struct ib_wq_init_attr *init_attr)
{
@@ -4651,9 +4718,28 @@ static int create_rq(struct mlx5_ib_rwq *rwq, struct ib_pd *pd,
}
MLX5_SET(rqc, rqc, scatter_fcs, 1);
}
+ if (init_attr->create_flags & IB_WQ_FLAGS_DELAY_DROP) {
+ if (!(dev->ib_dev.attrs.raw_packet_caps &
+ IB_RAW_PACKET_CAP_DELAY_DROP)) {
+ mlx5_ib_dbg(dev, "Delay drop is not supported\n");
+ err = -EOPNOTSUPP;
+ goto out;
+ }
+ MLX5_SET(rqc, rqc, delay_drop_en, 1);
+ }
rq_pas0 = (__be64 *)MLX5_ADDR_OF(wq, wq, pas);
mlx5_ib_populate_pas(dev, rwq->umem, rwq->page_shift, rq_pas0, 0);
err = mlx5_core_create_rq_tracked(dev->mdev, in, inlen, &rwq->core_qp);
+ if (!err && init_attr->create_flags & IB_WQ_FLAGS_DELAY_DROP) {
+ err = set_delay_drop(dev);
+ if (err) {
+ mlx5_ib_warn(dev, "Failed to enable delay drop err=%d\n",
+ err);
+ mlx5_core_destroy_rq_tracked(dev->mdev, &rwq->core_qp);
+ } else {
+ rwq->create_flags |= MLX5_IB_WQ_FLAGS_DELAY_DROP;
+ }
+ }
out:
kvfree(in);
return err;
@@ -4787,7 +4873,7 @@ struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd,
err_copy:
mlx5_core_destroy_rq_tracked(dev->mdev, &rwq->core_qp);
err_user_rq:
- destroy_user_rq(pd, rwq);
+ destroy_user_rq(dev, pd, rwq);
err:
kfree(rwq);
return ERR_PTR(err);
@@ -4799,7 +4885,7 @@ int mlx5_ib_destroy_wq(struct ib_wq *wq)
struct mlx5_ib_rwq *rwq = to_mrwq(wq);
mlx5_core_destroy_rq_tracked(dev->mdev, &rwq->core_qp);
- destroy_user_rq(wq->pd, rwq);
+ destroy_user_rq(dev, wq->pd, rwq);
kfree(rwq);
return 0;
diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h
index a3e21a2..f9e1c69 100644
--- a/drivers/infiniband/hw/qib/qib.h
+++ b/drivers/infiniband/hw/qib/qib.h
@@ -1,7 +1,7 @@
#ifndef _QIB_KERNEL_H
#define _QIB_KERNEL_H
/*
- * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved.
+ * Copyright (c) 2012 - 2017 Intel Corporation. All rights reserved.
* Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
@@ -443,7 +443,7 @@ struct qib_irq_notify;
#endif
struct qib_msix_entry {
- struct msix_entry msix;
+ int irq;
void *arg;
#ifdef CONFIG_INFINIBAND_QIB_DCA
int dca;
@@ -1433,9 +1433,9 @@ int qib_pcie_init(struct pci_dev *, const struct pci_device_id *);
int qib_pcie_ddinit(struct qib_devdata *, struct pci_dev *,
const struct pci_device_id *);
void qib_pcie_ddcleanup(struct qib_devdata *);
-int qib_pcie_params(struct qib_devdata *, u32, u32 *, struct qib_msix_entry *);
+int qib_pcie_params(struct qib_devdata *dd, u32 minw, u32 *nent);
int qib_reinit_intr(struct qib_devdata *);
-void qib_enable_intx(struct pci_dev *);
+void qib_enable_intx(struct qib_devdata *dd);
void qib_nomsi(struct qib_devdata *);
void qib_nomsix(struct qib_devdata *);
void qib_pcie_getcmd(struct qib_devdata *, u16 *, u8 *, u8 *);
diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c
index e423b71..46045fc 100644
--- a/drivers/infiniband/hw/qib/qib_iba6120.c
+++ b/drivers/infiniband/hw/qib/qib_iba6120.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2013 Intel Corporation. All rights reserved.
+ * Copyright (c) 2013 - 2017 Intel Corporation. All rights reserved.
* Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation.
* All rights reserved.
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
@@ -1838,7 +1838,7 @@ static int qib_6120_setup_reset(struct qib_devdata *dd)
bail:
if (ret) {
- if (qib_pcie_params(dd, dd->lbus_width, NULL, NULL))
+ if (qib_pcie_params(dd, dd->lbus_width, NULL))
qib_dev_err(dd,
"Reset failed to setup PCIe or interrupts; continuing anyway\n");
/* clear the reset error, init error/hwerror mask */
@@ -3562,7 +3562,7 @@ struct qib_devdata *qib_init_iba6120_funcs(struct pci_dev *pdev,
if (qib_mini_init)
goto bail;
- if (qib_pcie_params(dd, 8, NULL, NULL))
+ if (qib_pcie_params(dd, 8, NULL))
qib_dev_err(dd,
"Failed to setup PCIe or interrupts; continuing anyway\n");
dd->cspec->irq = pdev->irq; /* save IRQ */
diff --git a/drivers/infiniband/hw/qib/qib_iba7220.c b/drivers/infiniband/hw/qib/qib_iba7220.c
index c3679c4..49cd6e3 100644
--- a/drivers/infiniband/hw/qib/qib_iba7220.c
+++ b/drivers/infiniband/hw/qib/qib_iba7220.c
@@ -1,4 +1,5 @@
/*
+ * Copyright (c) 2011 - 2017 Intel Corporation. All rights reserved.
* Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation.
* All rights reserved.
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
@@ -2148,7 +2149,7 @@ static int qib_setup_7220_reset(struct qib_devdata *dd)
bail:
if (ret) {
- if (qib_pcie_params(dd, dd->lbus_width, NULL, NULL))
+ if (qib_pcie_params(dd, dd->lbus_width, NULL))
qib_dev_err(dd,
"Reset failed to setup PCIe or interrupts; continuing anyway\n");
@@ -3309,7 +3310,7 @@ static int qib_7220_intr_fallback(struct qib_devdata *dd)
qib_devinfo(dd->pcidev,
"MSI interrupt not detected, trying INTx interrupts\n");
qib_7220_free_irq(dd);
- qib_enable_intx(dd->pcidev);
+ qib_enable_intx(dd);
/*
* Some newer kernels require free_irq before disable_msi,
* and irq can be changed during disable and INTx enable
@@ -4619,7 +4620,7 @@ struct qib_devdata *qib_init_iba7220_funcs(struct pci_dev *pdev,
minwidth = 8; /* x8 capable boards */
break;
}
- if (qib_pcie_params(dd, minwidth, NULL, NULL))
+ if (qib_pcie_params(dd, minwidth, NULL))
qib_dev_err(dd,
"Failed to setup PCIe or interrupts; continuing anyway\n");
diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c
index bb2439f..2653064 100644
--- a/drivers/infiniband/hw/qib/qib_iba7322.c
+++ b/drivers/infiniband/hw/qib/qib_iba7322.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2012 Intel Corporation. All rights reserved.
+ * Copyright (c) 2012 - 2017 Intel Corporation. All rights reserved.
* Copyright (c) 2008 - 2012 QLogic Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -2841,10 +2841,10 @@ static void qib_7322_nomsix(struct qib_devdata *dd)
reset_dca_notifier(dd, &dd->cspec->msix_entries[i]);
#endif
irq_set_affinity_hint(
- dd->cspec->msix_entries[i].msix.vector, NULL);
+ dd->cspec->msix_entries[i].irq, NULL);
free_cpumask_var(dd->cspec->msix_entries[i].mask);
- free_irq(dd->cspec->msix_entries[i].msix.vector,
- dd->cspec->msix_entries[i].arg);
+ free_irq(dd->cspec->msix_entries[i].irq,
+ dd->cspec->msix_entries[i].arg);
}
qib_nomsix(dd);
}
@@ -3336,9 +3336,9 @@ static void reset_dca_notifier(struct qib_devdata *dd, struct qib_msix_entry *m)
qib_devinfo(dd->pcidev,
"Disabling notifier on HCA %d irq %d\n",
dd->unit,
- m->msix.vector);
+ m->irq);
irq_set_affinity_notifier(
- m->msix.vector,
+ m->irq,
NULL);
m->notifier = NULL;
}
@@ -3354,7 +3354,7 @@ static void setup_dca_notifier(struct qib_devdata *dd, struct qib_msix_entry *m)
int ret;
m->notifier = n;
- n->notify.irq = m->msix.vector;
+ n->notify.irq = m->irq;
n->notify.notify = qib_irq_notifier_notify;
n->notify.release = qib_irq_notifier_release;
n->arg = m->arg;
@@ -3500,10 +3500,21 @@ static void qib_setup_7322_interrupt(struct qib_devdata *dd, int clearpend)
- 1,
QIB_DRV_NAME "%d (kctx)", dd->unit);
}
- ret = request_irq(
- dd->cspec->msix_entries[msixnum].msix.vector,
- handler, 0, dd->cspec->msix_entries[msixnum].name,
- arg);
+
+ dd->cspec->msix_entries[msixnum].irq = pci_irq_vector(
+ dd->pcidev, msixnum);
+ if (dd->cspec->msix_entries[msixnum].irq < 0) {
+ qib_dev_err(dd,
+ "Couldn't get MSIx irq (vec=%d): %d\n",
+ msixnum,
+ dd->cspec->msix_entries[msixnum].irq);
+ qib_7322_nomsix(dd);
+ goto try_intx;
+ }
+ ret = request_irq(dd->cspec->msix_entries[msixnum].irq,
+ handler, 0,
+ dd->cspec->msix_entries[msixnum].name,
+ arg);
if (ret) {
/*
* Shouldn't happen since the enable said we could
@@ -3512,7 +3523,7 @@ static void qib_setup_7322_interrupt(struct qib_devdata *dd, int clearpend)
qib_dev_err(dd,
"Couldn't setup MSIx interrupt (vec=%d, irq=%d): %d\n",
msixnum,
- dd->cspec->msix_entries[msixnum].msix.vector,
+ dd->cspec->msix_entries[msixnum].irq,
ret);
qib_7322_nomsix(dd);
goto try_intx;
@@ -3548,7 +3559,7 @@ static void qib_setup_7322_interrupt(struct qib_devdata *dd, int clearpend)
dd->cspec->msix_entries[msixnum].mask);
}
irq_set_affinity_hint(
- dd->cspec->msix_entries[msixnum].msix.vector,
+ dd->cspec->msix_entries[msixnum].irq,
dd->cspec->msix_entries[msixnum].mask);
}
msixnum++;
@@ -3744,7 +3755,6 @@ static int qib_do_7322_reset(struct qib_devdata *dd)
if (msix_entries) {
/* restore the MSIx vector address and data if saved above */
for (i = 0; i < msix_entries; i++) {
- dd->cspec->msix_entries[i].msix.entry = i;
if (!msix_vecsave || !msix_vecsave[2 * i])
continue;
qib_write_kreg(dd, 2 * i +
@@ -3762,8 +3772,7 @@ static int qib_do_7322_reset(struct qib_devdata *dd)
write_7322_initregs(dd);
if (qib_pcie_params(dd, dd->lbus_width,
- &dd->cspec->num_msix_entries,
- dd->cspec->msix_entries))
+ &dd->cspec->num_msix_entries))
qib_dev_err(dd,
"Reset failed to setup PCIe or interrupts; continuing anyway\n");
@@ -5195,7 +5204,7 @@ static int qib_7322_intr_fallback(struct qib_devdata *dd)
qib_devinfo(dd->pcidev,
"MSIx interrupt not detected, trying INTx interrupts\n");
qib_7322_nomsix(dd);
- qib_enable_intx(dd->pcidev);
+ qib_enable_intx(dd);
qib_setup_7322_interrupt(dd, 0);
return 1;
}
@@ -7327,10 +7336,7 @@ struct qib_devdata *qib_init_iba7322_funcs(struct pci_dev *pdev,
if (!dd->cspec->msix_entries)
tabsize = 0;
- for (i = 0; i < tabsize; i++)
- dd->cspec->msix_entries[i].msix.entry = i;
-
- if (qib_pcie_params(dd, 8, &tabsize, dd->cspec->msix_entries))
+ if (qib_pcie_params(dd, 8, &tabsize))
qib_dev_err(dd,
"Failed to setup PCIe or interrupts; continuing anyway\n");
/* may be less than we wanted, if not enough available */
diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c
index c5eaa3c..6b9b43b 100644
--- a/drivers/infiniband/hw/qib/qib_mad.c
+++ b/drivers/infiniband/hw/qib/qib_mad.c
@@ -134,24 +134,21 @@ static void qib_send_trap(struct qib_ibport *ibp, void *data, unsigned len)
}
/*
- * Send a bad [PQ]_Key trap (ch. 14.3.8).
+ * Send a bad P_Key trap (ch. 14.3.8).
*/
-void qib_bad_pqkey(struct qib_ibport *ibp, __be16 trap_num, u32 key, u32 sl,
- u32 qp1, u32 qp2, __be16 lid1, __be16 lid2)
+void qib_bad_pkey(struct qib_ibport *ibp, u32 key, u32 sl,
+ u32 qp1, u32 qp2, __be16 lid1, __be16 lid2)
{
struct ib_mad_notice_attr data;
- if (trap_num == IB_NOTICE_TRAP_BAD_PKEY)
- ibp->rvp.pkey_violations++;
- else
- ibp->rvp.qkey_violations++;
ibp->rvp.n_pkt_drops++;
+ ibp->rvp.pkey_violations++;
/* Send violation trap */
data.generic_type = IB_NOTICE_TYPE_SECURITY;
data.prod_type_msb = 0;
data.prod_type_lsb = IB_NOTICE_PROD_CA;
- data.trap_num = trap_num;
+ data.trap_num = IB_NOTICE_TRAP_BAD_PKEY;
data.issuer_lid = cpu_to_be16(ppd_from_ibp(ibp)->lid);
data.toggle_count = 0;
memset(&data.details, 0, sizeof(data.details));
diff --git a/drivers/infiniband/hw/qib/qib_pcie.c b/drivers/infiniband/hw/qib/qib_pcie.c
index c379b83..d90403e 100644
--- a/drivers/infiniband/hw/qib/qib_pcie.c
+++ b/drivers/infiniband/hw/qib/qib_pcie.c
@@ -1,4 +1,5 @@
/*
+ * Copyright (c) 2010 - 2017 Intel Corporation. All rights reserved.
* Copyright (c) 2008, 2009 QLogic Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -187,112 +188,84 @@ void qib_pcie_ddcleanup(struct qib_devdata *dd)
pci_set_drvdata(dd->pcidev, NULL);
}
-static void qib_msix_setup(struct qib_devdata *dd, int pos, u32 *msixcnt,
- struct qib_msix_entry *qib_msix_entry)
-{
- int ret;
- int nvec = *msixcnt;
- struct msix_entry *msix_entry;
- int i;
-
- ret = pci_msix_vec_count(dd->pcidev);
- if (ret < 0)
- goto do_intx;
-
- nvec = min(nvec, ret);
-
- /* We can't pass qib_msix_entry array to qib_msix_setup
- * so use a dummy msix_entry array and copy the allocated
- * irq back to the qib_msix_entry array. */
- msix_entry = kcalloc(nvec, sizeof(*msix_entry), GFP_KERNEL);
- if (!msix_entry)
- goto do_intx;
-
- for (i = 0; i < nvec; i++)
- msix_entry[i] = qib_msix_entry[i].msix;
-
- ret = pci_enable_msix_range(dd->pcidev, msix_entry, 1, nvec);
- if (ret < 0)
- goto free_msix_entry;
- else
- nvec = ret;
-
- for (i = 0; i < nvec; i++)
- qib_msix_entry[i].msix = msix_entry[i];
-
- kfree(msix_entry);
- *msixcnt = nvec;
- return;
-
-free_msix_entry:
- kfree(msix_entry);
-
-do_intx:
- qib_dev_err(
- dd,
- "pci_enable_msix_range %d vectors failed: %d, falling back to INTx\n",
- nvec, ret);
- *msixcnt = 0;
- qib_enable_intx(dd->pcidev);
-}
-
/**
* We save the msi lo and hi values, so we can restore them after
* chip reset (the kernel PCI infrastructure doesn't yet handle that
* correctly.
*/
-static int qib_msi_setup(struct qib_devdata *dd, int pos)
+static void qib_msi_setup(struct qib_devdata *dd, int pos)
{
struct pci_dev *pdev = dd->pcidev;
u16 control;
- int ret;
- ret = pci_enable_msi(pdev);
- if (ret)
- qib_dev_err(dd,
- "pci_enable_msi failed: %d, interrupts may not work\n",
- ret);
- /* continue even if it fails, we may still be OK... */
-
- pci_read_config_dword(pdev, pos + PCI_MSI_ADDRESS_LO,
- &dd->msi_lo);
- pci_read_config_dword(pdev, pos + PCI_MSI_ADDRESS_HI,
- &dd->msi_hi);
+ pci_read_config_dword(pdev, pos + PCI_MSI_ADDRESS_LO, &dd->msi_lo);
+ pci_read_config_dword(pdev, pos + PCI_MSI_ADDRESS_HI, &dd->msi_hi);
pci_read_config_word(pdev, pos + PCI_MSI_FLAGS, &control);
+
/* now save the data (vector) info */
- pci_read_config_word(pdev, pos + ((control & PCI_MSI_FLAGS_64BIT)
- ? 12 : 8),
+ pci_read_config_word(pdev,
+ pos + ((control & PCI_MSI_FLAGS_64BIT) ? 12 : 8),
&dd->msi_data);
- return ret;
}
-int qib_pcie_params(struct qib_devdata *dd, u32 minw, u32 *nent,
- struct qib_msix_entry *entry)
+static int qib_allocate_irqs(struct qib_devdata *dd, u32 maxvec)
+{
+ unsigned int flags = PCI_IRQ_LEGACY;
+
+ /* Check our capabilities */
+ if (dd->pcidev->msix_cap) {
+ flags |= PCI_IRQ_MSIX;
+ } else {
+ if (dd->pcidev->msi_cap) {
+ flags |= PCI_IRQ_MSI;
+ /* Get msi_lo and msi_hi */
+ qib_msi_setup(dd, dd->pcidev->msi_cap);
+ }
+ }
+
+ if (!(flags & (PCI_IRQ_MSIX | PCI_IRQ_MSI)))
+ qib_dev_err(dd, "No PCI MSI or MSIx capability!\n");
+
+ return pci_alloc_irq_vectors(dd->pcidev, 1, maxvec, flags);
+}
+
+int qib_pcie_params(struct qib_devdata *dd, u32 minw, u32 *nent)
{
u16 linkstat, speed;
- int pos = 0, ret = 1;
+ int nvec;
+ int maxvec;
+ int ret = 0;
if (!pci_is_pcie(dd->pcidev)) {
qib_dev_err(dd, "Can't find PCI Express capability!\n");
/* set up something... */
dd->lbus_width = 1;
dd->lbus_speed = 2500; /* Gen1, 2.5GHz */
+ ret = -1;
goto bail;
}
- pos = dd->pcidev->msix_cap;
- if (nent && *nent && pos) {
- qib_msix_setup(dd, pos, nent, entry);
- ret = 0; /* did it, either MSIx or INTx */
- } else {
- pos = dd->pcidev->msi_cap;
- if (pos)
- ret = qib_msi_setup(dd, pos);
- else
- qib_dev_err(dd, "No PCI MSI or MSIx capability!\n");
+ maxvec = (nent && *nent) ? *nent : 1;
+ nvec = qib_allocate_irqs(dd, maxvec);
+ if (nvec < 0) {
+ ret = nvec;
+ goto bail;
}
- if (!pos)
- qib_enable_intx(dd->pcidev);
+
+ /*
+ * If nent exists, make sure to record how many vectors were allocated
+ */
+ if (nent) {
+ *nent = nvec;
+
+ /*
+ * If we requested (nent) MSIX, but msix_enabled is not set,
+ * pci_alloc_irq_vectors() enabled INTx.
+ */
+ if (!dd->pcidev->msix_enabled)
+ qib_dev_err(dd,
+ "no msix vectors allocated, using INTx\n");
+ }
pcie_capability_read_word(dd->pcidev, PCI_EXP_LNKSTA, &linkstat);
/*
@@ -379,7 +352,7 @@ int qib_reinit_intr(struct qib_devdata *dd)
ret = 1;
bail:
if (!ret && (dd->flags & QIB_HAS_INTX)) {
- qib_enable_intx(dd->pcidev);
+ qib_enable_intx(dd);
ret = 1;
}
@@ -397,7 +370,7 @@ int qib_reinit_intr(struct qib_devdata *dd)
void qib_nomsi(struct qib_devdata *dd)
{
dd->msi_lo = 0;
- pci_disable_msi(dd->pcidev);
+ pci_free_irq_vectors(dd->pcidev);
}
/*
@@ -405,23 +378,21 @@ void qib_nomsi(struct qib_devdata *dd)
*/
void qib_nomsix(struct qib_devdata *dd)
{
- pci_disable_msix(dd->pcidev);
+ pci_free_irq_vectors(dd->pcidev);
}
/*
* Similar to pci_intx(pdev, 1), except that we make sure
* msi(x) is off.
*/
-void qib_enable_intx(struct pci_dev *pdev)
+void qib_enable_intx(struct qib_devdata *dd)
{
u16 cw, new;
int pos;
+ struct pci_dev *pdev = dd->pcidev;
- /* first, turn on INTx */
- pci_read_config_word(pdev, PCI_COMMAND, &cw);
- new = cw & ~PCI_COMMAND_INTX_DISABLE;
- if (new != cw)
- pci_write_config_word(pdev, PCI_COMMAND, new);
+ if (pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_LEGACY) < 0)
+ qib_dev_err(dd, "Failed to enable INTx\n");
pos = pdev->msi_cap;
if (pos) {
diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c
index bd09de7..2852845 100644
--- a/drivers/infiniband/hw/qib/qib_ruc.c
+++ b/drivers/infiniband/hw/qib/qib_ruc.c
@@ -59,7 +59,7 @@ static int qib_init_sge(struct rvt_qp *qp, struct rvt_rwqe *wqe)
continue;
/* Check LKEY */
if (!rvt_lkey_ok(rkt, pd, j ? &ss->sg_list[j - 1] : &ss->sge,
- &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE))
+ NULL, &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE))
goto bad_lkey;
qp->r_len += wqe->sg_list[i].length;
j++;
@@ -256,11 +256,11 @@ int qib_ruc_check_hdr(struct qib_ibport *ibp, struct ib_header *hdr,
}
if (!qib_pkey_ok((u16)bth0,
qib_get_pkey(ibp, qp->s_alt_pkey_index))) {
- qib_bad_pqkey(ibp, IB_NOTICE_TRAP_BAD_PKEY,
- (u16)bth0,
- (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF,
- 0, qp->ibqp.qp_num,
- hdr->lrh[3], hdr->lrh[1]);
+ qib_bad_pkey(ibp,
+ (u16)bth0,
+ (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF,
+ 0, qp->ibqp.qp_num,
+ hdr->lrh[3], hdr->lrh[1]);
goto err;
}
/* Validate the SLID. See Ch. 9.6.1.5 and 17.2.8 */
@@ -295,11 +295,11 @@ int qib_ruc_check_hdr(struct qib_ibport *ibp, struct ib_header *hdr,
}
if (!qib_pkey_ok((u16)bth0,
qib_get_pkey(ibp, qp->s_pkey_index))) {
- qib_bad_pqkey(ibp, IB_NOTICE_TRAP_BAD_PKEY,
- (u16)bth0,
- (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF,
- 0, qp->ibqp.qp_num,
- hdr->lrh[3], hdr->lrh[1]);
+ qib_bad_pkey(ibp,
+ (u16)bth0,
+ (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF,
+ 0, qp->ibqp.qp_num,
+ hdr->lrh[3], hdr->lrh[1]);
goto err;
}
/* Validate the SLID. See Ch. 9.6.1.5 */
diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c
index 341a123..be49074 100644
--- a/drivers/infiniband/hw/qib/qib_ud.c
+++ b/drivers/infiniband/hw/qib/qib_ud.c
@@ -66,8 +66,7 @@ static void qib_ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
qp = rvt_lookup_qpn(rdi, &ibp->rvp, swqe->ud_wr.remote_qpn);
if (!qp) {
ibp->rvp.n_pkt_drops++;
- rcu_read_unlock();
- return;
+ goto drop;
}
sqptype = sqp->ibqp.qp_type == IB_QPT_GSI ?
@@ -94,11 +93,11 @@ static void qib_ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
if (unlikely(!qib_pkey_ok(pkey1, pkey2))) {
lid = ppd->lid | (rdma_ah_get_path_bits(ah_attr) &
((1 << ppd->lmc) - 1));
- qib_bad_pqkey(ibp, IB_NOTICE_TRAP_BAD_PKEY, pkey1,
- rdma_ah_get_sl(ah_attr),
- sqp->ibqp.qp_num, qp->ibqp.qp_num,
- cpu_to_be16(lid),
- cpu_to_be16(rdma_ah_get_dlid(ah_attr)));
+ qib_bad_pkey(ibp, pkey1,
+ rdma_ah_get_sl(ah_attr),
+ sqp->ibqp.qp_num, qp->ibqp.qp_num,
+ cpu_to_be16(lid),
+ cpu_to_be16(rdma_ah_get_dlid(ah_attr)));
goto drop;
}
}
@@ -113,18 +112,8 @@ static void qib_ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
qkey = (int)swqe->ud_wr.remote_qkey < 0 ?
sqp->qkey : swqe->ud_wr.remote_qkey;
- if (unlikely(qkey != qp->qkey)) {
- u16 lid;
-
- lid = ppd->lid | (rdma_ah_get_path_bits(ah_attr) &
- ((1 << ppd->lmc) - 1));
- qib_bad_pqkey(ibp, IB_NOTICE_TRAP_BAD_QKEY, qkey,
- rdma_ah_get_sl(ah_attr),
- sqp->ibqp.qp_num, qp->ibqp.qp_num,
- cpu_to_be16(lid),
- cpu_to_be16(rdma_ah_get_dlid(ah_attr)));
+ if (unlikely(qkey != qp->qkey))
goto drop;
- }
}
/*
@@ -487,22 +476,18 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct ib_header *hdr,
pkey1 = be32_to_cpu(ohdr->bth[0]);
pkey2 = qib_get_pkey(ibp, qp->s_pkey_index);
if (unlikely(!qib_pkey_ok(pkey1, pkey2))) {
- qib_bad_pqkey(ibp, IB_NOTICE_TRAP_BAD_PKEY,
- pkey1,
- (be16_to_cpu(hdr->lrh[0]) >> 4) &
+ qib_bad_pkey(ibp,
+ pkey1,
+ (be16_to_cpu(hdr->lrh[0]) >> 4) &
0xF,
- src_qp, qp->ibqp.qp_num,
- hdr->lrh[3], hdr->lrh[1]);
+ src_qp, qp->ibqp.qp_num,
+ hdr->lrh[3], hdr->lrh[1]);
return;
}
}
- if (unlikely(qkey != qp->qkey)) {
- qib_bad_pqkey(ibp, IB_NOTICE_TRAP_BAD_QKEY, qkey,
- (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF,
- src_qp, qp->ibqp.qp_num,
- hdr->lrh[3], hdr->lrh[1]);
+ if (unlikely(qkey != qp->qkey))
return;
- }
+
/* Drop invalid MAD packets (see 13.5.3.1). */
if (unlikely(qp->ibqp.qp_num == 1 &&
(tlen != 256 ||
diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h
index a52fc67..95e3701 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.h
+++ b/drivers/infiniband/hw/qib/qib_verbs.h
@@ -241,8 +241,8 @@ static inline int qib_pkey_ok(u16 pkey1, u16 pkey2)
return p1 && p1 == p2 && ((__s16)pkey1 < 0 || (__s16)pkey2 < 0);
}
-void qib_bad_pqkey(struct qib_ibport *ibp, __be16 trap_num, u32 key, u32 sl,
- u32 qp1, u32 qp2, __be16 lid1, __be16 lid2);
+void qib_bad_pkey(struct qib_ibport *ibp, u32 key, u32 sl,
+ u32 qp1, u32 qp2, __be16 lid1, __be16 lid2);
void qib_cap_mask_chg(struct rvt_dev_info *rdi, u8 port_num);
void qib_sys_guid_chg(struct qib_ibport *ibp);
void qib_node_desc_chg(struct qib_ibport *ibp);
diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c
index aa5f9ea3..ea95672 100644
--- a/drivers/infiniband/sw/rdmavt/mr.c
+++ b/drivers/infiniband/sw/rdmavt/mr.c
@@ -778,23 +778,54 @@ int rvt_dealloc_fmr(struct ib_fmr *ibfmr)
}
/**
+ * rvt_sge_adjacent - is isge compressible
+ * @isge: outgoing internal SGE
+ * @last_sge: last outgoing SGE written
+ * @sge: SGE to check
+ *
+ * If adjacent will update last_sge to add length.
+ *
+ * Return: true if isge is adjacent to last sge
+ */
+static inline bool rvt_sge_adjacent(struct rvt_sge *isge,
+ struct rvt_sge *last_sge,
+ struct ib_sge *sge)
+{
+ if (last_sge && sge->lkey == last_sge->mr->lkey &&
+ ((uint64_t)(last_sge->vaddr + last_sge->length) == sge->addr)) {
+ if (sge->lkey) {
+ if (unlikely((sge->addr - last_sge->mr->user_base +
+ sge->length > last_sge->mr->length)))
+ return false; /* overrun, caller will catch */
+ } else {
+ last_sge->length += sge->length;
+ }
+ last_sge->sge_length += sge->length;
+ trace_rvt_sge_adjacent(last_sge, sge);
+ return true;
+ }
+ return false;
+}
+
+/**
* rvt_lkey_ok - check IB SGE for validity and initialize
* @rkt: table containing lkey to check SGE against
* @pd: protection domain
* @isge: outgoing internal SGE
+ * @last_sge: last outgoing SGE written
* @sge: SGE to check
* @acc: access flags
*
* Check the IB SGE for validity and initialize our internal version
* of it.
*
- * Return: 1 if valid and successful, otherwise returns 0.
+ * Increments the reference count when a new sge is stored.
*
- * increments the reference count upon success
- *
+ * Return: 0 if compressed, 1 if added , otherwise returns -errno.
*/
int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd,
- struct rvt_sge *isge, struct ib_sge *sge, int acc)
+ struct rvt_sge *isge, struct rvt_sge *last_sge,
+ struct ib_sge *sge, int acc)
{
struct rvt_mregion *mr;
unsigned n, m;
@@ -804,12 +835,14 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd,
* We use LKEY == zero for kernel virtual addresses
* (see rvt_get_dma_mr() and dma_virt_ops).
*/
- rcu_read_lock();
if (sge->lkey == 0) {
struct rvt_dev_info *dev = ib_to_rvt(pd->ibpd.device);
if (pd->user)
- goto bail;
+ return -EINVAL;
+ if (rvt_sge_adjacent(isge, last_sge, sge))
+ return 0;
+ rcu_read_lock();
mr = rcu_dereference(dev->dma_mr);
if (!mr)
goto bail;
@@ -824,6 +857,9 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd,
isge->n = 0;
goto ok;
}
+ if (rvt_sge_adjacent(isge, last_sge, sge))
+ return 0;
+ rcu_read_lock();
mr = rcu_dereference(rkt->table[sge->lkey >> rkt->shift]);
if (!mr)
goto bail;
@@ -874,12 +910,13 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd,
isge->m = m;
isge->n = n;
ok:
+ trace_rvt_sge_new(isge, sge);
return 1;
bail_unref:
rvt_put_mr(mr);
bail:
rcu_read_unlock();
- return 0;
+ return -EINVAL;
}
EXPORT_SYMBOL(rvt_lkey_ok);
diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c
index 8876ee7..740611e 100644
--- a/drivers/infiniband/sw/rdmavt/qp.c
+++ b/drivers/infiniband/sw/rdmavt/qp.c
@@ -421,15 +421,6 @@ static int alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt,
return ret;
}
-static void free_qpn(struct rvt_qpn_table *qpt, u32 qpn)
-{
- struct rvt_qpn_map *map;
-
- map = qpt->map + qpn / RVT_BITS_PER_PAGE;
- if (map->page)
- clear_bit(qpn & RVT_BITS_PER_PAGE_MASK, map->page);
-}
-
/**
* rvt_clear_mr_refs - Drop help mr refs
* @qp: rvt qp data structure
@@ -645,6 +636,19 @@ static void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp,
lockdep_assert_held(&qp->s_lock);
}
+/** rvt_free_qpn - Free a qpn from the bit map
+ * @qpt: QP table
+ * @qpn: queue pair number to free
+ */
+static void rvt_free_qpn(struct rvt_qpn_table *qpt, u32 qpn)
+{
+ struct rvt_qpn_map *map;
+
+ map = qpt->map + (qpn & RVT_QPN_MASK) / RVT_BITS_PER_PAGE;
+ if (map->page)
+ clear_bit(qpn & RVT_BITS_PER_PAGE_MASK, map->page);
+}
+
/**
* rvt_create_qp - create a queue pair for a device
* @ibpd: the protection domain who's device we create the queue pair for
@@ -914,7 +918,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
kref_put(&qp->ip->ref, rvt_release_mmap_info);
bail_qpn:
- free_qpn(&rdi->qp_dev->qpn_table, qp->ibqp.qp_num);
+ rvt_free_qpn(&rdi->qp_dev->qpn_table, qp->ibqp.qp_num);
bail_rq_wq:
if (!qp->ip)
@@ -1301,19 +1305,6 @@ int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
return -EINVAL;
}
-/** rvt_free_qpn - Free a qpn from the bit map
- * @qpt: QP table
- * @qpn: queue pair number to free
- */
-static void rvt_free_qpn(struct rvt_qpn_table *qpt, u32 qpn)
-{
- struct rvt_qpn_map *map;
-
- map = qpt->map + qpn / RVT_BITS_PER_PAGE;
- if (map->page)
- clear_bit(qpn & RVT_BITS_PER_PAGE_MASK, map->page);
-}
-
/**
* rvt_destroy_qp - destroy a queue pair
* @ibqp: the queue pair to destroy
@@ -1622,7 +1613,7 @@ static int rvt_post_one_wr(struct rvt_qp *qp,
struct rvt_pd *pd;
struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
u8 log_pmtu;
- int ret;
+ int ret, incr;
size_t cplen;
bool reserved_op;
int local_ops_delayed = 0;
@@ -1695,22 +1686,23 @@ static int rvt_post_one_wr(struct rvt_qp *qp,
wqe->length = 0;
j = 0;
if (wr->num_sge) {
+ struct rvt_sge *last_sge = NULL;
+
acc = wr->opcode >= IB_WR_RDMA_READ ?
IB_ACCESS_LOCAL_WRITE : 0;
for (i = 0; i < wr->num_sge; i++) {
u32 length = wr->sg_list[i].length;
- int ok;
if (length == 0)
continue;
- ok = rvt_lkey_ok(rkt, pd, &wqe->sg_list[j],
- &wr->sg_list[i], acc);
- if (!ok) {
- ret = -EINVAL;
- goto bail_inval_free;
- }
+ incr = rvt_lkey_ok(rkt, pd, &wqe->sg_list[j], last_sge,
+ &wr->sg_list[i], acc);
+ if (unlikely(incr < 0))
+ goto bail_lkey_error;
wqe->length += length;
- j++;
+ if (incr)
+ last_sge = &wqe->sg_list[j];
+ j += incr;
}
wqe->wr.num_sge = j;
}
@@ -1757,12 +1749,14 @@ static int rvt_post_one_wr(struct rvt_qp *qp,
wqe->wr.send_flags &= ~RVT_SEND_RESERVE_USED;
qp->s_avail--;
}
- trace_rvt_post_one_wr(qp, wqe);
+ trace_rvt_post_one_wr(qp, wqe, wr->num_sge);
smp_wmb(); /* see request builders */
qp->s_head = next;
return 0;
+bail_lkey_error:
+ ret = incr;
bail_inval_free:
/* release mr holds */
while (j) {
diff --git a/drivers/infiniband/sw/rdmavt/trace_mr.h b/drivers/infiniband/sw/rdmavt/trace_mr.h
index 3318a6c..976e482 100644
--- a/drivers/infiniband/sw/rdmavt/trace_mr.h
+++ b/drivers/infiniband/sw/rdmavt/trace_mr.h
@@ -103,6 +103,68 @@ DEFINE_EVENT(
TP_PROTO(struct rvt_mregion *mr, u16 m, u16 n, void *v, size_t len),
TP_ARGS(mr, m, n, v, len));
+DECLARE_EVENT_CLASS(
+ rvt_sge_template,
+ TP_PROTO(struct rvt_sge *sge, struct ib_sge *isge),
+ TP_ARGS(sge, isge),
+ TP_STRUCT__entry(
+ RDI_DEV_ENTRY(ib_to_rvt(sge->mr->pd->device))
+ __field(struct rvt_mregion *, mr)
+ __field(struct rvt_sge *, sge)
+ __field(struct ib_sge *, isge)
+ __field(void *, vaddr)
+ __field(u64, ivaddr)
+ __field(u32, lkey)
+ __field(u32, sge_length)
+ __field(u32, length)
+ __field(u32, ilength)
+ __field(int, user)
+ __field(u16, m)
+ __field(u16, n)
+ ),
+ TP_fast_assign(
+ RDI_DEV_ASSIGN(ib_to_rvt(sge->mr->pd->device));
+ __entry->mr = sge->mr;
+ __entry->sge = sge;
+ __entry->isge = isge;
+ __entry->vaddr = sge->vaddr;
+ __entry->ivaddr = isge->addr;
+ __entry->lkey = sge->mr->lkey;
+ __entry->sge_length = sge->sge_length;
+ __entry->length = sge->length;
+ __entry->ilength = isge->length;
+ __entry->m = sge->m;
+ __entry->n = sge->m;
+ __entry->user = ibpd_to_rvtpd(sge->mr->pd)->user;
+ ),
+ TP_printk(
+ "[%s] mr %p sge %p isge %p vaddr %p ivaddr %llx lkey %x sge_length %u length %u ilength %u m %u n %u user %u",
+ __get_str(dev),
+ __entry->mr,
+ __entry->sge,
+ __entry->isge,
+ __entry->vaddr,
+ __entry->ivaddr,
+ __entry->lkey,
+ __entry->sge_length,
+ __entry->length,
+ __entry->ilength,
+ __entry->m,
+ __entry->n,
+ __entry->user
+ )
+);
+
+DEFINE_EVENT(
+ rvt_sge_template, rvt_sge_adjacent,
+ TP_PROTO(struct rvt_sge *sge, struct ib_sge *isge),
+ TP_ARGS(sge, isge));
+
+DEFINE_EVENT(
+ rvt_sge_template, rvt_sge_new,
+ TP_PROTO(struct rvt_sge *sge, struct ib_sge *isge),
+ TP_ARGS(sge, isge));
+
#endif /* __RVT_TRACE_MR_H */
#undef TRACE_INCLUDE_PATH
diff --git a/drivers/infiniband/sw/rdmavt/trace_tx.h b/drivers/infiniband/sw/rdmavt/trace_tx.h
index a613a22..0ef25fc 100644
--- a/drivers/infiniband/sw/rdmavt/trace_tx.h
+++ b/drivers/infiniband/sw/rdmavt/trace_tx.h
@@ -84,12 +84,12 @@ __print_symbolic(opcode, \
wr_opcode_name(RESERVED10))
#define POS_PRN \
-"[%s] wqe %p wr_id %llx send_flags %x qpn %x qpt %u psn %x lpsn %x ssn %x length %u opcode 0x%.2x,%s size %u avail %u head %u last %u pid %u num_sge %u"
+"[%s] wqe %p wr_id %llx send_flags %x qpn %x qpt %u psn %x lpsn %x ssn %x length %u opcode 0x%.2x,%s size %u avail %u head %u last %u pid %u num_sge %u wr_num_sge %u"
TRACE_EVENT(
rvt_post_one_wr,
- TP_PROTO(struct rvt_qp *qp, struct rvt_swqe *wqe),
- TP_ARGS(qp, wqe),
+ TP_PROTO(struct rvt_qp *qp, struct rvt_swqe *wqe, int wr_num_sge),
+ TP_ARGS(qp, wqe, wr_num_sge),
TP_STRUCT__entry(
RDI_DEV_ENTRY(ib_to_rvt(qp->ibqp.device))
__field(u64, wr_id)
@@ -108,6 +108,7 @@ TRACE_EVENT(
__field(int, send_flags)
__field(pid_t, pid)
__field(int, num_sge)
+ __field(int, wr_num_sge)
),
TP_fast_assign(
RDI_DEV_ASSIGN(ib_to_rvt(qp->ibqp.device))
@@ -127,6 +128,7 @@ TRACE_EVENT(
__entry->ssn = wqe->ssn;
__entry->send_flags = wqe->wr.send_flags;
__entry->num_sge = wqe->wr.num_sge;
+ __entry->wr_num_sge = wr_num_sge;
),
TP_printk(
POS_PRN,
@@ -146,7 +148,8 @@ TRACE_EVENT(
__entry->head,
__entry->last,
__entry->pid,
- __entry->num_sge
+ __entry->num_sge,
+ __entry->wr_num_sge
)
);
diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c
index 0d7c6bb..64bdd44 100644
--- a/drivers/infiniband/sw/rdmavt/vt.c
+++ b/drivers/infiniband/sw/rdmavt/vt.c
@@ -202,8 +202,13 @@ static int rvt_modify_port(struct ib_device *ibdev, u8 port_num,
return -EINVAL;
rvp = rdi->ports[port_index];
- rvp->port_cap_flags |= props->set_port_cap_mask;
- rvp->port_cap_flags &= ~props->clr_port_cap_mask;
+ if (port_modify_mask & IB_PORT_OPA_MASK_CHG) {
+ rvp->port_cap3_flags |= props->set_port_cap_mask;
+ rvp->port_cap3_flags &= ~props->clr_port_cap_mask;
+ } else {
+ rvp->port_cap_flags |= props->set_port_cap_mask;
+ rvp->port_cap_flags &= ~props->clr_port_cap_mask;
+ }
if (props->set_port_cap_mask || props->clr_port_cap_mask)
rdi->driver_f.cap_mask_chg(rdi, port_num);
diff --git a/drivers/infiniband/sw/rxe/rxe.h b/drivers/infiniband/sw/rxe/rxe.h
index 1ac5b85..6447d73 100644
--- a/drivers/infiniband/sw/rxe/rxe.h
+++ b/drivers/infiniband/sw/rxe/rxe.h
@@ -97,7 +97,7 @@ int rxe_rcv(struct sk_buff *skb);
void rxe_dev_put(struct rxe_dev *rxe);
struct rxe_dev *net_to_rxe(struct net_device *ndev);
-struct rxe_dev *get_rxe_by_name(const char* name);
+struct rxe_dev *get_rxe_by_name(const char *name);
void rxe_port_up(struct rxe_dev *rxe);
void rxe_port_down(struct rxe_dev *rxe);
diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
index d6299ed..6b65c01 100644
--- a/drivers/infiniband/sw/rxe/rxe_loc.h
+++ b/drivers/infiniband/sw/rxe/rxe_loc.h
@@ -250,7 +250,7 @@ void rxe_resp_queue_pkt(struct rxe_dev *rxe,
void rxe_comp_queue_pkt(struct rxe_dev *rxe,
struct rxe_qp *qp, struct sk_buff *skb);
-static inline unsigned wr_opcode_mask(int opcode, struct rxe_qp *qp)
+static inline unsigned int wr_opcode_mask(int opcode, struct rxe_qp *qp)
{
return rxe_wr_opcode_info[opcode].mask[qp->ibqp.qp_type];
}
diff --git a/drivers/infiniband/sw/rxe/rxe_mmap.c b/drivers/infiniband/sw/rxe/rxe_mmap.c
index bd812e0..d22431e 100644
--- a/drivers/infiniband/sw/rxe/rxe_mmap.c
+++ b/drivers/infiniband/sw/rxe/rxe_mmap.c
@@ -76,7 +76,7 @@ static void rxe_vma_close(struct vm_area_struct *vma)
kref_put(&ip->ref, rxe_mmap_release);
}
-static struct vm_operations_struct rxe_vm_ops = {
+static const struct vm_operations_struct rxe_vm_ops = {
.open = rxe_vma_open,
.close = rxe_vma_close,
};
diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c
index 75d11ee..c1b5f38 100644
--- a/drivers/infiniband/sw/rxe/rxe_pool.c
+++ b/drivers/infiniband/sw/rxe/rxe_pool.c
@@ -188,7 +188,7 @@ int rxe_pool_init(
struct rxe_dev *rxe,
struct rxe_pool *pool,
enum rxe_elem_type type,
- unsigned max_elem)
+ unsigned int max_elem)
{
int err = 0;
size_t size = rxe_type_info[type].size;
diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c
index 7ee465d..db71614 100644
--- a/drivers/infiniband/sw/rxe/rxe_req.c
+++ b/drivers/infiniband/sw/rxe/rxe_req.c
@@ -43,7 +43,7 @@ static int next_opcode(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
static inline void retry_first_write_send(struct rxe_qp *qp,
struct rxe_send_wqe *wqe,
- unsigned mask, int npsn)
+ unsigned int mask, int npsn)
{
int i;
diff --git a/drivers/infiniband/sw/rxe/rxe_task.c b/drivers/infiniband/sw/rxe/rxe_task.c
index d2a14a1..ea3810b 100644
--- a/drivers/infiniband/sw/rxe/rxe_task.c
+++ b/drivers/infiniband/sw/rxe/rxe_task.c
@@ -78,7 +78,7 @@ void rxe_do_task(unsigned long data)
default:
spin_unlock_irqrestore(&task->state_lock, flags);
- pr_warn("bad state = %d in rxe_do_task\n", task->state);
+ pr_warn("%s failed with bad state %d\n", __func__, task->state);
return;
}
@@ -105,7 +105,7 @@ void rxe_do_task(unsigned long data)
break;
default:
- pr_warn("bad state = %d in rxe_do_task\n",
+ pr_warn("%s failed with bad state %d\n", __func__,
task->state);
}
spin_unlock_irqrestore(&task->state_lock, flags);
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index e6c10e4..e611d0c 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -1169,8 +1169,8 @@ static int rxe_detach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid)
return rxe_mcast_drop_grp_elem(rxe, qp, mgid);
}
-static ssize_t rxe_show_parent(struct device *device,
- struct device_attribute *attr, char *buf)
+static ssize_t parent_show(struct device *device,
+ struct device_attribute *attr, char *buf)
{
struct rxe_dev *rxe = container_of(device, struct rxe_dev,
ib_dev.dev);
@@ -1178,7 +1178,7 @@ static ssize_t rxe_show_parent(struct device *device,
return snprintf(buf, 16, "%s\n", rxe_parent_name(rxe, 1));
}
-static DEVICE_ATTR(parent, S_IRUGO, rxe_show_parent, NULL);
+static DEVICE_ATTR_RO(parent);
static struct device_attribute *rxe_dev_attributes[] = {
&dev_attr_parent,
@@ -1295,15 +1295,15 @@ int rxe_register_device(struct rxe_dev *rxe)
err = ib_register_device(dev, NULL);
if (err) {
- pr_warn("rxe_register_device failed, err = %d\n", err);
+ pr_warn("%s failed with error %d\n", __func__, err);
goto err1;
}
for (i = 0; i < ARRAY_SIZE(rxe_dev_attributes); ++i) {
err = device_create_file(&dev->dev, rxe_dev_attributes[i]);
if (err) {
- pr_warn("device_create_file failed, i = %d, err = %d\n",
- i, err);
+ pr_warn("%s failed with error %d for attr number %d\n",
+ __func__, err, i);
goto err2;
}
}
diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c
index 6beddef..57b862b 100644
--- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c
@@ -52,7 +52,9 @@
#include <linux/module.h>
#include <rdma/ib_addr.h>
-#include <rdma/ib_smi.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/opa_smi.h>
+#include <rdma/opa_port_info.h>
#include "opa_vnic_internal.h"
@@ -980,6 +982,27 @@ static int vema_register(struct opa_vnic_ctrl_port *cport)
}
/**
+ * opa_vnic_ctrl_config_dev -- This function sends a trap to the EM
+ * by way of ib_modify_port to indicate support for ethernet on the
+ * fabric.
+ * @cport: pointer to control port
+ * @en: enable or disable ethernet on fabric support
+ */
+static void opa_vnic_ctrl_config_dev(struct opa_vnic_ctrl_port *cport, bool en)
+{
+ struct ib_port_modify pm = { 0 };
+ int i;
+
+ if (en)
+ pm.set_port_cap_mask = OPA_CAP_MASK3_IsEthOnFabricSupported;
+ else
+ pm.clr_port_cap_mask = OPA_CAP_MASK3_IsEthOnFabricSupported;
+
+ for (i = 1; i <= cport->num_ports; i++)
+ ib_modify_port(cport->ibdev, i, IB_PORT_OPA_MASK_CHG, &pm);
+}
+
+/**
* opa_vnic_vema_add_one -- Handle new ib device
* @device: ib device pointer
*
@@ -1007,6 +1030,7 @@ static void opa_vnic_vema_add_one(struct ib_device *device)
c_info("VNIC client initialized\n");
ib_set_client_data(device, &opa_vnic_client, cport);
+ opa_vnic_ctrl_config_dev(cport, true);
}
/**
@@ -1025,6 +1049,7 @@ static void opa_vnic_vema_rem_one(struct ib_device *device,
return;
c_info("removing VNIC client\n");
+ opa_vnic_ctrl_config_dev(cport, false);
vema_unregister(cport);
kfree(cport);
}
diff --git a/drivers/net/ethernet/mellanox/mlx4/cq.c b/drivers/net/ethernet/mellanox/mlx4/cq.c
index c56a511..72eb50c 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cq.c
@@ -241,13 +241,14 @@ int __mlx4_cq_alloc_icm(struct mlx4_dev *dev, int *cqn)
return err;
}
-static int mlx4_cq_alloc_icm(struct mlx4_dev *dev, int *cqn)
+static int mlx4_cq_alloc_icm(struct mlx4_dev *dev, int *cqn, u8 usage)
{
+ u32 in_modifier = RES_CQ | (((u32)usage & 3) << 30);
u64 out_param;
int err;
if (mlx4_is_mfunc(dev)) {
- err = mlx4_cmd_imm(dev, 0, &out_param, RES_CQ,
+ err = mlx4_cmd_imm(dev, 0, &out_param, in_modifier,
RES_OP_RESERVE_AND_MAP, MLX4_CMD_ALLOC_RES,
MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
if (err)
@@ -303,7 +304,7 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent,
cq->vector = vector;
- err = mlx4_cq_alloc_icm(dev, &cq->cqn);
+ err = mlx4_cq_alloc_icm(dev, &cq->cqn, cq->usage);
if (err)
return err;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_cq.c b/drivers/net/ethernet/mellanox/mlx4/en_cq.c
index 85fe17e..f849eec 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_cq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_cq.c
@@ -140,6 +140,7 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq,
(cq->type == RX && priv->hwtstamp_config.rx_filter))
timestamp_en = 1;
+ cq->mcq.usage = MLX4_RES_USAGE_DRIVER;
err = mlx4_cq_alloc(mdev->dev, cq->size, &cq->wqres.mtt,
&mdev->priv_uar, cq->wqres.db.dma, &cq->mcq,
cq->vector, 0, timestamp_en);
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 3a291fc..e3e6d9f 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -651,7 +651,8 @@ static int mlx4_en_get_qp(struct mlx4_en_priv *priv)
return 0;
}
- err = mlx4_qp_reserve_range(dev, 1, 1, qpn, MLX4_RESERVE_A0_QP);
+ err = mlx4_qp_reserve_range(dev, 1, 1, qpn, MLX4_RESERVE_A0_QP,
+ MLX4_RES_USAGE_DRIVER);
en_dbg(DRV, priv, "Reserved qp %d\n", *qpn);
if (err) {
en_err(priv, "Failed to reserve qp for mac registration\n");
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index 436f768..ad1ffd5 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -1081,7 +1081,8 @@ int mlx4_en_create_drop_qp(struct mlx4_en_priv *priv)
u32 qpn;
err = mlx4_qp_reserve_range(priv->mdev->dev, 1, 1, &qpn,
- MLX4_RESERVE_A0_QP);
+ MLX4_RESERVE_A0_QP,
+ MLX4_RES_USAGE_DRIVER);
if (err) {
en_err(priv, "Failed reserving drop qpn\n");
return err;
@@ -1127,7 +1128,8 @@ int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv)
flags = priv->rx_ring_num == 1 ? MLX4_RESERVE_A0_QP : 0;
err = mlx4_qp_reserve_range(mdev->dev, priv->rx_ring_num,
priv->rx_ring_num,
- &rss_map->base_qpn, flags);
+ &rss_map->base_qpn, flags,
+ MLX4_RES_USAGE_DRIVER);
if (err) {
en_err(priv, "Failed reserving %d qps\n", priv->rx_ring_num);
return err;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index 73faa3d..a81db25 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -105,7 +105,8 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
(unsigned long long) ring->sp_wqres.buf.direct.map);
err = mlx4_qp_reserve_range(mdev->dev, 1, 1, &ring->qpn,
- MLX4_RESERVE_ETH_BF_QP);
+ MLX4_RESERVE_ETH_BF_QP,
+ MLX4_RES_USAGE_DRIVER);
if (err) {
en_err(priv, "failed reserving qp for TX ring\n");
goto err_hwq_res;
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index a27c9c1..fb2591d 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -2475,7 +2475,7 @@ static int mlx4_allocate_default_counters(struct mlx4_dev *dev)
priv->def_counter[port] = -1;
for (port = 0; port < dev->caps.num_ports; port++) {
- err = mlx4_counter_alloc(dev, &idx);
+ err = mlx4_counter_alloc(dev, &idx, MLX4_RES_USAGE_DRIVER);
if (!err || err == -ENOSPC) {
priv->def_counter[port] = idx;
@@ -2517,13 +2517,14 @@ int __mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx)
return 0;
}
-int mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx)
+int mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx, u8 usage)
{
+ u32 in_modifier = RES_COUNTER | (((u32)usage & 3) << 30);
u64 out_param;
int err;
if (mlx4_is_mfunc(dev)) {
- err = mlx4_cmd_imm(dev, 0, &out_param, RES_COUNTER,
+ err = mlx4_cmd_imm(dev, 0, &out_param, in_modifier,
RES_OP_RESERVE, MLX4_CMD_ALLOC_RES,
MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
if (!err)
diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c
index 26747212..5e5b447 100644
--- a/drivers/net/ethernet/mellanox/mlx4/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx4/qp.c
@@ -245,8 +245,9 @@ int __mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align,
}
int mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align,
- int *base, u8 flags)
+ int *base, u8 flags, u8 usage)
{
+ u32 in_modifier = RES_QP | (((u32)usage & 3) << 30);
u64 in_param = 0;
u64 out_param;
int err;
@@ -258,7 +259,7 @@ int mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align,
set_param_l(&in_param, (((u32)flags) << 24) | (u32)cnt);
set_param_h(&in_param, align);
err = mlx4_cmd_imm(dev, in_param, &out_param,
- RES_QP, RES_OP_RESERVE,
+ in_modifier, RES_OP_RESERVE,
MLX4_CMD_ALLOC_RES,
MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
if (err)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
index 898759f..1f1f8af8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
@@ -189,6 +189,7 @@ struct mlx5e_lbt_priv {
struct packet_type pt;
struct completion comp;
bool loopback_ok;
+ bool local_lb;
};
static int
@@ -236,6 +237,13 @@ static int mlx5e_test_loopback_setup(struct mlx5e_priv *priv,
{
int err = 0;
+ /* Temporarily enable local_lb */
+ if (MLX5_CAP_GEN(priv->mdev, disable_local_lb)) {
+ mlx5_nic_vport_query_local_lb(priv->mdev, &lbtp->local_lb);
+ if (!lbtp->local_lb)
+ mlx5_nic_vport_update_local_lb(priv->mdev, true);
+ }
+
err = mlx5e_refresh_tirs(priv, true);
if (err)
return err;
@@ -254,6 +262,11 @@ static int mlx5e_test_loopback_setup(struct mlx5e_priv *priv,
static void mlx5e_test_loopback_cleanup(struct mlx5e_priv *priv,
struct mlx5e_lbt_priv *lbtp)
{
+ if (MLX5_CAP_GEN(priv->mdev, disable_local_lb)) {
+ if (!lbtp->local_lb)
+ mlx5_nic_vport_update_local_lb(priv->mdev, false);
+ }
+
dev_remove_pack(&lbtp->pt);
mlx5e_refresh_tirs(priv, false);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index af51a5d..8494174 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -161,6 +161,8 @@ static const char *eqe_type_str(u8 type)
return "MLX5_EVENT_TYPE_NIC_VPORT_CHANGE";
case MLX5_EVENT_TYPE_FPGA_ERROR:
return "MLX5_EVENT_TYPE_FPGA_ERROR";
+ case MLX5_EVENT_TYPE_GENERAL_EVENT:
+ return "MLX5_EVENT_TYPE_GENERAL_EVENT";
default:
return "Unrecognized event";
}
@@ -378,6 +380,20 @@ int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 token,
EXPORT_SYMBOL_GPL(mlx5_core_page_fault_resume);
#endif
+static void general_event_handler(struct mlx5_core_dev *dev,
+ struct mlx5_eqe *eqe)
+{
+ switch (eqe->sub_type) {
+ case MLX5_GENERAL_SUBTYPE_DELAY_DROP_TIMEOUT:
+ if (dev->event)
+ dev->event(dev, MLX5_DEV_EVENT_DELAY_DROP_TIMEOUT, 0);
+ break;
+ default:
+ mlx5_core_dbg(dev, "General event with unrecognized subtype: sub_type %d\n",
+ eqe->sub_type);
+ }
+}
+
static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr)
{
struct mlx5_eq *eq = eq_ptr;
@@ -486,6 +502,9 @@ static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr)
mlx5_fpga_event(dev, eqe->type, &eqe->data.raw);
break;
+ case MLX5_EVENT_TYPE_GENERAL_EVENT:
+ general_event_handler(dev, eqe);
+ break;
default:
mlx5_core_warn(dev, "Unhandled event 0x%x on EQ 0x%x\n",
eqe->type, eq->eqn);
@@ -693,6 +712,10 @@ int mlx5_start_eqs(struct mlx5_core_dev *dev)
mlx5_core_is_pf(dev))
async_event_mask |= (1ull << MLX5_EVENT_TYPE_NIC_VPORT_CHANGE);
+ if (MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH &&
+ MLX5_CAP_GEN(dev, general_notification_event))
+ async_event_mask |= (1ull << MLX5_EVENT_TYPE_GENERAL_EVENT);
+
if (MLX5_CAP_GEN(dev, port_module_event))
async_event_mask |= (1ull << MLX5_EVENT_TYPE_PORT_MODULE_EVENT);
else
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
index fa33d59..2c71557 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
@@ -120,6 +120,12 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev)
return err;
}
+ if (MLX5_CAP_GEN(dev, ipoib_enhanced_offloads)) {
+ err = mlx5_core_get_caps(dev, MLX5_CAP_IPOIB_ENHANCED_OFFLOADS);
+ if (err)
+ return err;
+ }
+
if (MLX5_CAP_GEN(dev, pg)) {
err = mlx5_core_get_caps(dev, MLX5_CAP_ODP);
if (err)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index c065132..3cec683 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -47,6 +47,7 @@
#include <linux/debugfs.h>
#include <linux/kmod.h>
#include <linux/mlx5/mlx5_ifc.h>
+#include <linux/mlx5/vport.h>
#ifdef CONFIG_RFS_ACCEL
#include <linux/cpu_rmap.h>
#endif
@@ -579,6 +580,18 @@ static int set_hca_ctrl(struct mlx5_core_dev *dev)
return err;
}
+static int mlx5_core_set_hca_defaults(struct mlx5_core_dev *dev)
+{
+ int ret = 0;
+
+ /* Disable local_lb by default */
+ if ((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH) &&
+ MLX5_CAP_GEN(dev, disable_local_lb))
+ ret = mlx5_nic_vport_update_local_lb(dev, false);
+
+ return ret;
+}
+
int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id)
{
u32 out[MLX5_ST_SZ_DW(enable_hca_out)] = {0};
@@ -1155,6 +1168,12 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
goto err_fs;
}
+ err = mlx5_core_set_hca_defaults(dev);
+ if (err) {
+ dev_err(&pdev->dev, "Failed to set hca defaults\n");
+ goto err_fs;
+ }
+
#ifdef CONFIG_MLX5_CORE_EN
mlx5_eswitch_attach(dev->priv.eswitch);
#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
index 340f281..db9e665 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
@@ -242,6 +242,20 @@ int mlx5_core_destroy_qp(struct mlx5_core_dev *dev,
}
EXPORT_SYMBOL_GPL(mlx5_core_destroy_qp);
+int mlx5_core_set_delay_drop(struct mlx5_core_dev *dev,
+ u32 timeout_usec)
+{
+ u32 out[MLX5_ST_SZ_DW(set_delay_drop_params_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(set_delay_drop_params_in)] = {0};
+
+ MLX5_SET(set_delay_drop_params_in, in, opcode,
+ MLX5_CMD_OP_SET_DELAY_DROP_PARAMS);
+ MLX5_SET(set_delay_drop_params_in, in, delay_drop_timeout,
+ timeout_usec / 100);
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+EXPORT_SYMBOL_GPL(mlx5_core_set_delay_drop);
+
struct mbox_info {
u32 *in;
u32 *out;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
index bcdf777..090b29e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
@@ -32,6 +32,7 @@
#include <linux/pci.h>
#include <linux/mlx5/driver.h>
+#include <linux/mlx5/vport.h>
#include "mlx5_core.h"
#ifdef CONFIG_MLX5_CORE_EN
#include "eswitch.h"
@@ -44,6 +45,38 @@ bool mlx5_sriov_is_enabled(struct mlx5_core_dev *dev)
return !!sriov->num_vfs;
}
+static int sriov_restore_guids(struct mlx5_core_dev *dev, int vf)
+{
+ struct mlx5_core_sriov *sriov = &dev->priv.sriov;
+ struct mlx5_hca_vport_context *in;
+ int err = 0;
+
+ /* Restore sriov guid and policy settings */
+ if (sriov->vfs_ctx[vf].node_guid ||
+ sriov->vfs_ctx[vf].port_guid ||
+ sriov->vfs_ctx[vf].policy != MLX5_POLICY_INVALID) {
+ in = kzalloc(sizeof(*in), GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ in->node_guid = sriov->vfs_ctx[vf].node_guid;
+ in->port_guid = sriov->vfs_ctx[vf].port_guid;
+ in->policy = sriov->vfs_ctx[vf].policy;
+ in->field_select =
+ !!(in->port_guid) * MLX5_HCA_VPORT_SEL_PORT_GUID |
+ !!(in->node_guid) * MLX5_HCA_VPORT_SEL_NODE_GUID |
+ !!(in->policy) * MLX5_HCA_VPORT_SEL_STATE_POLICY;
+
+ err = mlx5_core_modify_hca_vport_context(dev, 1, 1, vf + 1, in);
+ if (err)
+ mlx5_core_warn(dev, "modify vport context failed, unable to restore VF %d settings\n", vf);
+
+ kfree(in);
+ }
+
+ return err;
+}
+
static int mlx5_device_enable_sriov(struct mlx5_core_dev *dev, int num_vfs)
{
struct mlx5_core_sriov *sriov = &dev->priv.sriov;
@@ -74,6 +107,15 @@ static int mlx5_device_enable_sriov(struct mlx5_core_dev *dev, int num_vfs)
}
sriov->vfs_ctx[vf].enabled = 1;
sriov->enabled_vfs++;
+ if (MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_IB) {
+ err = sriov_restore_guids(dev, vf);
+ if (err) {
+ mlx5_core_warn(dev,
+ "failed to restore VF %d settings, err %d\n",
+ vf, err);
+ continue;
+ }
+ }
mlx5_core_dbg(dev, "successfully enabled VF* %d\n", vf);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
index 5abfec1..d653b00 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
@@ -897,6 +897,68 @@ int mlx5_modify_nic_vport_promisc(struct mlx5_core_dev *mdev,
}
EXPORT_SYMBOL_GPL(mlx5_modify_nic_vport_promisc);
+enum {
+ UC_LOCAL_LB,
+ MC_LOCAL_LB
+};
+
+int mlx5_nic_vport_update_local_lb(struct mlx5_core_dev *mdev, bool enable)
+{
+ int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in);
+ void *in;
+ int err;
+
+ mlx5_core_dbg(mdev, "%s local_lb\n", enable ? "enable" : "disable");
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(modify_nic_vport_context_in, in,
+ field_select.disable_mc_local_lb, 1);
+ MLX5_SET(modify_nic_vport_context_in, in,
+ nic_vport_context.disable_mc_local_lb, !enable);
+
+ MLX5_SET(modify_nic_vport_context_in, in,
+ field_select.disable_uc_local_lb, 1);
+ MLX5_SET(modify_nic_vport_context_in, in,
+ nic_vport_context.disable_uc_local_lb, !enable);
+
+ err = mlx5_modify_nic_vport_context(mdev, in, inlen);
+
+ kvfree(in);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_nic_vport_update_local_lb);
+
+int mlx5_nic_vport_query_local_lb(struct mlx5_core_dev *mdev, bool *status)
+{
+ int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out);
+ u32 *out;
+ int value;
+ int err;
+
+ out = kzalloc(outlen, GFP_KERNEL);
+ if (!out)
+ return -ENOMEM;
+
+ err = mlx5_query_nic_vport_context(mdev, 0, out, outlen);
+ if (err)
+ goto out;
+
+ value = MLX5_GET(query_nic_vport_context_out, out,
+ nic_vport_context.disable_mc_local_lb) << MC_LOCAL_LB;
+
+ value |= MLX5_GET(query_nic_vport_context_out, out,
+ nic_vport_context.disable_uc_local_lb) << UC_LOCAL_LB;
+
+ *status = !value;
+
+out:
+ kfree(out);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_nic_vport_query_local_lb);
+
enum mlx5_vport_roce_state {
MLX5_VPORT_ROCE_DISABLED = 0,
MLX5_VPORT_ROCE_ENABLED = 1,
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index aad5d81..3607da0 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -428,6 +428,12 @@ enum mlx4_steer_type {
MLX4_NUM_STEERS
};
+enum mlx4_resource_usage {
+ MLX4_RES_USAGE_NONE,
+ MLX4_RES_USAGE_DRIVER,
+ MLX4_RES_USAGE_USER_VERBS,
+};
+
enum {
MLX4_NUM_FEXCH = 64 * 1024,
};
@@ -748,6 +754,7 @@ struct mlx4_cq {
} tasklet_ctx;
int reset_notify_added;
struct list_head reset_notify;
+ u8 usage;
};
struct mlx4_qp {
@@ -757,6 +764,7 @@ struct mlx4_qp {
atomic_t refcount;
struct completion free;
+ u8 usage;
};
struct mlx4_srq {
@@ -1120,7 +1128,7 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, struct mlx4_mtt *mtt,
unsigned vector, int collapsed, int timestamp_en);
void mlx4_cq_free(struct mlx4_dev *dev, struct mlx4_cq *cq);
int mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align,
- int *base, u8 flags);
+ int *base, u8 flags, u8 usage);
void mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt);
int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp);
@@ -1417,7 +1425,7 @@ int mlx4_get_phys_port_id(struct mlx4_dev *dev);
int mlx4_wol_read(struct mlx4_dev *dev, u64 *config, int port);
int mlx4_wol_write(struct mlx4_dev *dev, u64 config, int port);
-int mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx);
+int mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx, u8 usage);
void mlx4_counter_free(struct mlx4_dev *dev, u32 idx);
int mlx4_get_default_counter_index(struct mlx4_dev *dev, int port);
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index f31a0b5..c13d71d 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -290,6 +290,7 @@ enum mlx5_event {
MLX5_EVENT_TYPE_GPIO_EVENT = 0x15,
MLX5_EVENT_TYPE_PORT_MODULE_EVENT = 0x16,
MLX5_EVENT_TYPE_REMOTE_CONFIG = 0x19,
+ MLX5_EVENT_TYPE_GENERAL_EVENT = 0x22,
MLX5_EVENT_TYPE_PPS_EVENT = 0x25,
MLX5_EVENT_TYPE_DB_BF_CONGESTION = 0x1a,
@@ -305,6 +306,10 @@ enum mlx5_event {
};
enum {
+ MLX5_GENERAL_SUBTYPE_DELAY_DROP_TIMEOUT = 0x1,
+};
+
+enum {
MLX5_PORT_CHANGE_SUBTYPE_DOWN = 1,
MLX5_PORT_CHANGE_SUBTYPE_ACTIVE = 4,
MLX5_PORT_CHANGE_SUBTYPE_INITIALIZED = 5,
@@ -968,7 +973,7 @@ enum mlx5_cap_type {
MLX5_CAP_ATOMIC,
MLX5_CAP_ROCE,
MLX5_CAP_IPOIB_OFFLOADS,
- MLX5_CAP_EOIB_OFFLOADS,
+ MLX5_CAP_IPOIB_ENHANCED_OFFLOADS,
MLX5_CAP_FLOW_TABLE,
MLX5_CAP_ESWITCH_FLOW_TABLE,
MLX5_CAP_ESWITCH,
@@ -1011,6 +1016,10 @@ enum mlx5_mcam_feature_groups {
MLX5_GET(per_protocol_networking_offload_caps,\
mdev->caps.hca_max[MLX5_CAP_ETHERNET_OFFLOADS], cap)
+#define MLX5_CAP_IPOIB_ENHANCED(mdev, cap) \
+ MLX5_GET(per_protocol_networking_offload_caps,\
+ mdev->caps.hca_cur[MLX5_CAP_IPOIB_ENHANCED_OFFLOADS], cap)
+
#define MLX5_CAP_ROCE(mdev, cap) \
MLX5_GET(roce_cap, mdev->caps.hca_cur[MLX5_CAP_ROCE], cap)
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index df6ce59..758ef40 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -162,6 +162,13 @@ enum dbg_rsc_type {
MLX5_DBG_RSC_CQ,
};
+enum port_state_policy {
+ MLX5_POLICY_DOWN = 0,
+ MLX5_POLICY_UP = 1,
+ MLX5_POLICY_FOLLOW = 2,
+ MLX5_POLICY_INVALID = 0xffffffff
+};
+
struct mlx5_field_desc {
struct dentry *dent;
int i;
@@ -185,6 +192,7 @@ enum mlx5_dev_event {
MLX5_DEV_EVENT_GUID_CHANGE,
MLX5_DEV_EVENT_CLIENT_REREG,
MLX5_DEV_EVENT_PPS,
+ MLX5_DEV_EVENT_DELAY_DROP_TIMEOUT,
};
enum mlx5_port_status {
@@ -525,6 +533,9 @@ struct mlx5_mkey_table {
struct mlx5_vf_context {
int enabled;
+ u64 port_guid;
+ u64 node_guid;
+ enum port_state_policy policy;
};
struct mlx5_core_sriov {
@@ -842,13 +853,6 @@ struct mlx5_pas {
u8 log_sz;
};
-enum port_state_policy {
- MLX5_POLICY_DOWN = 0,
- MLX5_POLICY_UP = 1,
- MLX5_POLICY_FOLLOW = 2,
- MLX5_POLICY_INVALID = 0xffffffff
-};
-
enum phy_port_state {
MLX5_AAA_111
};
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 87869c0..5bae70e 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -200,6 +200,7 @@ enum {
MLX5_CMD_OP_QUERY_SQ = 0x907,
MLX5_CMD_OP_CREATE_RQ = 0x908,
MLX5_CMD_OP_MODIFY_RQ = 0x909,
+ MLX5_CMD_OP_SET_DELAY_DROP_PARAMS = 0x910,
MLX5_CMD_OP_DESTROY_RQ = 0x90a,
MLX5_CMD_OP_QUERY_RQ = 0x90b,
MLX5_CMD_OP_CREATE_RMP = 0x90c,
@@ -840,7 +841,7 @@ struct mlx5_ifc_cmd_hca_cap_bits {
u8 retransmission_q_counters[0x1];
u8 reserved_at_183[0x1];
u8 modify_rq_counter_set_id[0x1];
- u8 reserved_at_185[0x1];
+ u8 rq_delay_drop[0x1];
u8 max_qp_cnt[0xa];
u8 pkey_table_size[0x10];
@@ -857,7 +858,7 @@ struct mlx5_ifc_cmd_hca_cap_bits {
u8 pcam_reg[0x1];
u8 local_ca_ack_delay[0x5];
u8 port_module_event[0x1];
- u8 reserved_at_1b1[0x1];
+ u8 enhanced_error_q_counters[0x1];
u8 ports_check[0x1];
u8 reserved_at_1b3[0x1];
u8 disable_link_up[0x1];
@@ -873,7 +874,8 @@ struct mlx5_ifc_cmd_hca_cap_bits {
u8 max_tc[0x4];
u8 reserved_at_1d0[0x1];
u8 dcbx[0x1];
- u8 reserved_at_1d2[0x3];
+ u8 general_notification_event[0x1];
+ u8 reserved_at_1d3[0x2];
u8 fpga[0x1];
u8 rol_s[0x1];
u8 rol_g[0x1];
@@ -1016,7 +1018,8 @@ struct mlx5_ifc_cmd_hca_cap_bits {
u8 log_max_wq_sz[0x5];
u8 nic_vport_change_event[0x1];
- u8 reserved_at_3e1[0xa];
+ u8 disable_local_lb[0x1];
+ u8 reserved_at_3e2[0x9];
u8 log_max_vlan_list[0x5];
u8 reserved_at_3f0[0x3];
u8 log_max_current_mc_list[0x5];
@@ -1187,7 +1190,8 @@ struct mlx5_ifc_cong_control_r_roce_ecn_np_bits {
u8 reserved_at_c0[0x12];
u8 cnp_dscp[0x6];
- u8 reserved_at_d8[0x5];
+ u8 reserved_at_d8[0x4];
+ u8 cnp_prio_mode[0x1];
u8 cnp_802p_prio[0x3];
u8 reserved_at_e0[0x720];
@@ -2515,7 +2519,7 @@ enum {
struct mlx5_ifc_rqc_bits {
u8 rlky[0x1];
- u8 reserved_at_1[0x1];
+ u8 delay_drop_en[0x1];
u8 scatter_fcs[0x1];
u8 vsd[0x1];
u8 mem_rq_type[0x4];
@@ -2562,7 +2566,9 @@ struct mlx5_ifc_rmpc_bits {
struct mlx5_ifc_nic_vport_context_bits {
u8 reserved_at_0[0x5];
u8 min_wqe_inline_mode[0x3];
- u8 reserved_at_8[0x17];
+ u8 reserved_at_8[0x15];
+ u8 disable_mc_local_lb[0x1];
+ u8 disable_uc_local_lb[0x1];
u8 roce_en[0x1];
u8 arm_change_event[0x1];
@@ -3947,7 +3953,47 @@ struct mlx5_ifc_query_q_counter_out_bits {
u8 local_ack_timeout_err[0x20];
- u8 reserved_at_320[0x4e0];
+ u8 reserved_at_320[0xa0];
+
+ u8 resp_local_length_error[0x20];
+
+ u8 req_local_length_error[0x20];
+
+ u8 resp_local_qp_error[0x20];
+
+ u8 local_operation_error[0x20];
+
+ u8 resp_local_protection[0x20];
+
+ u8 req_local_protection[0x20];
+
+ u8 resp_cqe_error[0x20];
+
+ u8 req_cqe_error[0x20];
+
+ u8 req_mw_binding[0x20];
+
+ u8 req_bad_response[0x20];
+
+ u8 req_remote_invalid_request[0x20];
+
+ u8 resp_remote_invalid_request[0x20];
+
+ u8 req_remote_access_errors[0x20];
+
+ u8 resp_remote_access_errors[0x20];
+
+ u8 req_remote_operation_errors[0x20];
+
+ u8 req_transport_retries_exceeded[0x20];
+
+ u8 cq_overflow[0x20];
+
+ u8 resp_cqe_flush_error[0x20];
+
+ u8 req_cqe_flush_error[0x20];
+
+ u8 reserved_at_620[0x1e0];
};
struct mlx5_ifc_query_q_counter_in_bits {
@@ -5229,7 +5275,9 @@ struct mlx5_ifc_modify_nic_vport_context_out_bits {
};
struct mlx5_ifc_modify_nic_vport_field_select_bits {
- u8 reserved_at_0[0x16];
+ u8 reserved_at_0[0x14];
+ u8 disable_uc_local_lb[0x1];
+ u8 disable_mc_local_lb[0x1];
u8 node_guid[0x1];
u8 port_guid[0x1];
u8 min_inline[0x1];
@@ -5847,6 +5895,28 @@ struct mlx5_ifc_destroy_rq_in_bits {
u8 reserved_at_60[0x20];
};
+struct mlx5_ifc_set_delay_drop_params_in_bits {
+ u8 opcode[0x10];
+ u8 reserved_at_10[0x10];
+
+ u8 reserved_at_20[0x10];
+ u8 op_mod[0x10];
+
+ u8 reserved_at_40[0x20];
+
+ u8 reserved_at_60[0x10];
+ u8 delay_drop_timeout[0x10];
+};
+
+struct mlx5_ifc_set_delay_drop_params_out_bits {
+ u8 status[0x8];
+ u8 reserved_at_8[0x18];
+
+ u8 syndrome[0x20];
+
+ u8 reserved_at_40[0x40];
+};
+
struct mlx5_ifc_destroy_rmp_out_bits {
u8 status[0x8];
u8 reserved_at_8[0x18];
diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h
index 6f41270..66d19b6 100644
--- a/include/linux/mlx5/qp.h
+++ b/include/linux/mlx5/qp.h
@@ -212,7 +212,6 @@ struct mlx5_wqe_ctrl_seg {
#define MLX5_WQE_CTRL_OPCODE_MASK 0xff
#define MLX5_WQE_CTRL_WQE_INDEX_MASK 0x00ffff00
#define MLX5_WQE_CTRL_WQE_INDEX_SHIFT 8
-#define MLX5_WQE_AV_EXT 0x80000000
enum {
MLX5_ETH_WQE_L3_INNER_CSUM = 1 << 4,
@@ -561,6 +560,9 @@ int mlx5_core_destroy_qp(struct mlx5_core_dev *dev,
int mlx5_core_qp_query(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp,
u32 *out, int outlen);
+int mlx5_core_set_delay_drop(struct mlx5_core_dev *dev,
+ u32 timeout_usec);
+
int mlx5_core_xrcd_alloc(struct mlx5_core_dev *dev, u32 *xrcdn);
int mlx5_core_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn);
void mlx5_init_qp_table(struct mlx5_core_dev *dev);
diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h
index 656c70b..aaa0bb9 100644
--- a/include/linux/mlx5/vport.h
+++ b/include/linux/mlx5/vport.h
@@ -114,5 +114,6 @@ int mlx5_core_modify_hca_vport_context(struct mlx5_core_dev *dev,
u8 other_vport, u8 port_num,
int vf,
struct mlx5_hca_vport_context *req);
-
+int mlx5_nic_vport_update_local_lb(struct mlx5_core_dev *mdev, bool enable);
+int mlx5_nic_vport_query_local_lb(struct mlx5_core_dev *mdev, bool *status);
#endif /* __MLX5_VPORT_H__ */
diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h
index b73a14e..7aca121 100644
--- a/include/rdma/ib_addr.h
+++ b/include/rdma/ib_addr.h
@@ -304,7 +304,13 @@ static inline void rdma_get_ll_mac(struct in6_addr *addr, u8 *mac)
static inline int rdma_is_multicast_addr(struct in6_addr *addr)
{
- return addr->s6_addr[0] == 0xff;
+ u32 ipv4_addr;
+
+ if (addr->s6_addr[0] == 0xff)
+ return 1;
+
+ memcpy(&ipv4_addr, addr->s6_addr + 12, 4);
+ return (ipv6_addr_v4mapped(addr) && ipv4_is_multicast(ipv4_addr));
}
static inline void rdma_get_mcast_mac(struct in6_addr *addr, u8 *mac)
diff --git a/include/rdma/ib_hdrs.h b/include/rdma/ib_hdrs.h
index 5519f31..c124d51 100644
--- a/include/rdma/ib_hdrs.h
+++ b/include/rdma/ib_hdrs.h
@@ -193,8 +193,12 @@ static inline void put_ib_ateth_compare(u64 val, struct ib_atomic_eth *ateth)
#define IB_LNH_MASK 3
#define IB_SC_MASK 0xf
#define IB_SC_SHIFT 12
+#define IB_SC5_MASK 0x10
#define IB_SL_MASK 0xf
#define IB_SL_SHIFT 4
+#define IB_SL_SHIFT 4
+#define IB_LVER_MASK 0xf
+#define IB_LVER_SHIFT 8
static inline u8 ib_get_lnh(struct ib_header *hdr)
{
@@ -206,6 +210,11 @@ static inline u8 ib_get_sc(struct ib_header *hdr)
return ((be16_to_cpu(hdr->lrh[0]) >> IB_SC_SHIFT) & IB_SC_MASK);
}
+static inline bool ib_is_sc5(u16 sc5)
+{
+ return !!(sc5 & IB_SC5_MASK);
+}
+
static inline u8 ib_get_sl(struct ib_header *hdr)
{
return ((be16_to_cpu(hdr->lrh[0]) >> IB_SL_SHIFT) & IB_SL_MASK);
@@ -221,6 +230,27 @@ static inline u16 ib_get_slid(struct ib_header *hdr)
return (be16_to_cpu(hdr->lrh[3]));
}
+static inline u8 ib_get_lver(struct ib_header *hdr)
+{
+ return (u8)((be16_to_cpu(hdr->lrh[0]) >> IB_LVER_SHIFT) &
+ IB_LVER_MASK);
+}
+
+static inline u16 ib_get_len(struct ib_header *hdr)
+{
+ return (u16)(be16_to_cpu(hdr->lrh[2]));
+}
+
+static inline u32 ib_get_qkey(struct ib_other_headers *ohdr)
+{
+ return be32_to_cpu(ohdr->u.ud.deth[0]);
+}
+
+static inline u32 ib_get_sqpn(struct ib_other_headers *ohdr)
+{
+ return ((be32_to_cpu(ohdr->u.ud.deth[1])) & IB_QPN_MASK);
+}
+
/*
* BTH
*/
@@ -229,6 +259,14 @@ static inline u16 ib_get_slid(struct ib_header *hdr)
#define IB_BTH_PAD_MASK 3
#define IB_BTH_PKEY_MASK 0xffff
#define IB_BTH_PAD_SHIFT 20
+#define IB_BTH_A_MASK 1
+#define IB_BTH_A_SHIFT 31
+#define IB_BTH_M_MASK 1
+#define IB_BTH_M_SHIFT 22
+#define IB_BTH_SE_MASK 1
+#define IB_BTH_SE_SHIFT 23
+#define IB_BTH_TVER_MASK 0xf
+#define IB_BTH_TVER_SHIFT 16
static inline u8 ib_bth_get_pad(struct ib_other_headers *ohdr)
{
@@ -247,4 +285,50 @@ static inline u8 ib_bth_get_opcode(struct ib_other_headers *ohdr)
IB_BTH_OPCODE_MASK);
}
+static inline u8 ib_bth_get_ackreq(struct ib_other_headers *ohdr)
+{
+ return (u8)((be32_to_cpu(ohdr->bth[2]) >> IB_BTH_A_SHIFT) &
+ IB_BTH_A_MASK);
+}
+
+static inline u8 ib_bth_get_migreq(struct ib_other_headers *ohdr)
+{
+ return (u8)((be32_to_cpu(ohdr->bth[0]) >> IB_BTH_M_SHIFT) &
+ IB_BTH_M_MASK);
+}
+
+static inline u8 ib_bth_get_se(struct ib_other_headers *ohdr)
+{
+ return (u8)((be32_to_cpu(ohdr->bth[0]) >> IB_BTH_SE_SHIFT) &
+ IB_BTH_SE_MASK);
+}
+
+static inline u32 ib_bth_get_psn(struct ib_other_headers *ohdr)
+{
+ return (u32)(be32_to_cpu(ohdr->bth[2]));
+}
+
+static inline u32 ib_bth_get_qpn(struct ib_other_headers *ohdr)
+{
+ return (u32)((be32_to_cpu(ohdr->bth[1])) & IB_QPN_MASK);
+}
+
+static inline u8 ib_bth_get_becn(struct ib_other_headers *ohdr)
+{
+ return (u8)((be32_to_cpu(ohdr->bth[1]) >> IB_BECN_SHIFT) &
+ IB_BECN_MASK);
+}
+
+static inline u8 ib_bth_get_fecn(struct ib_other_headers *ohdr)
+{
+ return (u8)((be32_to_cpu(ohdr->bth[1]) >> IB_FECN_SHIFT) &
+ IB_FECN_MASK);
+}
+
+static inline u8 ib_bth_get_tver(struct ib_other_headers *ohdr)
+{
+ return (u8)((be32_to_cpu(ohdr->bth[0]) >> IB_BTH_TVER_SHIFT) &
+ IB_BTH_TVER_MASK);
+}
+
#endif /* IB_HDRS_H */
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 68d947d..1082b4c 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -577,7 +577,8 @@ struct ib_device_modify {
enum ib_port_modify_flags {
IB_PORT_SHUTDOWN = 1,
IB_PORT_INIT_TYPE = (1<<2),
- IB_PORT_RESET_QKEY_CNTR = (1<<3)
+ IB_PORT_RESET_QKEY_CNTR = (1<<3),
+ IB_PORT_OPA_MASK_CHG = (1<<4)
};
struct ib_port_modify {
@@ -664,6 +665,8 @@ union rdma_network_hdr {
};
};
+#define IB_QPN_MASK 0xFFFFFF
+
enum {
IB_MULTICAST_QPN = 0xffffff
};
@@ -1059,6 +1062,7 @@ enum ib_qp_create_flags {
/* FREE = 1 << 7, */
IB_QP_CREATE_SCATTER_FCS = 1 << 8,
IB_QP_CREATE_CVLAN_STRIPPING = 1 << 9,
+ IB_QP_CREATE_SOURCE_QPN = 1 << 10,
/* reserve bits 26-31 for low level drivers' internal use */
IB_QP_CREATE_RESERVED_START = 1 << 26,
IB_QP_CREATE_RESERVED_END = 1 << 31,
@@ -1086,6 +1090,7 @@ struct ib_qp_init_attr {
*/
u8 port_num;
struct ib_rwq_ind_table *rwq_ind_tbl;
+ u32 source_qpn;
};
struct ib_qp_open_attr {
@@ -1546,6 +1551,10 @@ enum ib_raw_packet_caps {
IB_RAW_PACKET_CAP_SCATTER_FCS = (1 << 1),
/* Checksum offloads are supported (for both send and receive). */
IB_RAW_PACKET_CAP_IP_CSUM = (1 << 2),
+ /* When a packet is received for an RQ with no receive WQEs, the
+ * packet processing is delayed.
+ */
+ IB_RAW_PACKET_CAP_DELAY_DROP = (1 << 3),
};
enum ib_wq_type {
@@ -1574,6 +1583,7 @@ struct ib_wq {
enum ib_wq_flags {
IB_WQ_FLAGS_CVLAN_STRIPPING = 1 << 0,
IB_WQ_FLAGS_SCATTER_FCS = 1 << 1,
+ IB_WQ_FLAGS_DELAY_DROP = 1 << 2,
};
struct ib_wq_init_attr {
diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h
index 55af692..22fb15f 100644
--- a/include/rdma/rdma_vt.h
+++ b/include/rdma/rdma_vt.h
@@ -75,6 +75,7 @@ struct rvt_ibport {
__be64 mkey;
u64 tid;
u32 port_cap_flags;
+ u16 port_cap3_flags;
u32 pma_sample_start;
u32 pma_sample_interval;
__be16 pma_counter_select[5];
@@ -514,7 +515,8 @@ int rvt_invalidate_rkey(struct rvt_qp *qp, u32 rkey);
int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge,
u32 len, u64 vaddr, u32 rkey, int acc);
int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd,
- struct rvt_sge *isge, struct ib_sge *sge, int acc);
+ struct rvt_sge *isge, struct rvt_sge *last_sge,
+ struct ib_sge *sge, int acc);
struct rvt_mcast *rvt_mcast_find(struct rvt_ibport *ibp, union ib_gid *mgid,
u16 lid);
diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h
index d664d2e..07e2fff 100644
--- a/include/rdma/rdmavt_qp.h
+++ b/include/rdma/rdmavt_qp.h
@@ -396,7 +396,7 @@ struct rvt_srq {
#define RVT_QPNMAP_ENTRIES (RVT_QPN_MAX / PAGE_SIZE / BITS_PER_BYTE)
#define RVT_BITS_PER_PAGE (PAGE_SIZE * BITS_PER_BYTE)
#define RVT_BITS_PER_PAGE_MASK (RVT_BITS_PER_PAGE - 1)
-#define RVT_QPN_MASK 0xFFFFFF
+#define RVT_QPN_MASK IB_QPN_MASK
/*
* QPN-map pages start out as NULL, they get allocated upon
diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h
index 270c350..63656d2 100644
--- a/include/uapi/rdma/ib_user_verbs.h
+++ b/include/uapi/rdma/ib_user_verbs.h
@@ -578,7 +578,7 @@ struct ib_uverbs_ex_create_qp {
__u32 comp_mask;
__u32 create_flags;
__u32 rwq_ind_tbl_handle;
- __u32 reserved1;
+ __u32 source_qpn;
};
struct ib_uverbs_open_qp {
diff --git a/include/uapi/rdma/mlx4-abi.h b/include/uapi/rdma/mlx4-abi.h
index af43175..d915cab 100644
--- a/include/uapi/rdma/mlx4-abi.h
+++ b/include/uapi/rdma/mlx4-abi.h
@@ -95,13 +95,65 @@ struct mlx4_ib_create_srq_resp {
__u32 reserved;
};
+struct mlx4_ib_create_qp_rss {
+ __u64 rx_hash_fields_mask;
+ __u8 rx_hash_function;
+ __u8 rx_key_len;
+ __u8 reserved[6];
+ __u8 rx_hash_key[40];
+ __u32 comp_mask;
+ __u32 reserved1;
+};
+
struct mlx4_ib_create_qp {
__u64 buf_addr;
__u64 db_addr;
__u8 log_sq_bb_count;
__u8 log_sq_stride;
__u8 sq_no_prefetch;
- __u8 reserved[5];
+ __u32 inl_recv_sz;
+ __u8 reserved;
+};
+
+struct mlx4_ib_create_wq {
+ __u64 buf_addr;
+ __u64 db_addr;
+ __u8 log_range_size;
+ __u8 reserved[3];
+ __u32 comp_mask;
+ __u32 reserved1;
+};
+
+struct mlx4_ib_modify_wq {
+ __u32 comp_mask;
+ __u32 reserved;
+};
+
+struct mlx4_ib_create_rwq_ind_tbl_resp {
+ __u32 response_length;
+ __u32 reserved;
+};
+
+/* RX Hash function flags */
+enum mlx4_ib_rx_hash_function_flags {
+ MLX4_IB_RX_HASH_FUNC_TOEPLITZ = 1 << 0,
+};
+
+/*
+ * RX Hash flags, these flags allows to set which incoming packet's field should
+ * participates in RX Hash. Each flag represent certain packet's field,
+ * when the flag is set the field that is represented by the flag will
+ * participate in RX Hash calculation.
+ */
+enum mlx4_ib_rx_hash_fields {
+ MLX4_IB_RX_HASH_SRC_IPV4 = 1 << 0,
+ MLX4_IB_RX_HASH_DST_IPV4 = 1 << 1,
+ MLX4_IB_RX_HASH_SRC_IPV6 = 1 << 2,
+ MLX4_IB_RX_HASH_DST_IPV6 = 1 << 3,
+ MLX4_IB_RX_HASH_SRC_PORT_TCP = 1 << 4,
+ MLX4_IB_RX_HASH_DST_PORT_TCP = 1 << 5,
+ MLX4_IB_RX_HASH_SRC_PORT_UDP = 1 << 6,
+ MLX4_IB_RX_HASH_DST_PORT_UDP = 1 << 7
};
#endif /* MLX4_ABI_USER_H */