net/mlx4_core: Implement resource quota enforcement
Implements resource quota grant decision when resources are requested,
for the following resources: QPs, CQs, SRQs, MPTs, MTTs, vlans, MACs,
and Counters.
When granting a resource, the quota system increases the allocated-count
for that slave.
When the slave later frees the resource, its allocated-count is reduced.
A spinlock is used to protect the integrity of each resource's free-pool counter.
(One slave may be in the process of being granted a resource while another
slave has crashed, initiating cleanup of that slave's resource quotas).
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index e7eb86e..e582a41 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -505,6 +505,7 @@
};
struct resource_allocator {
+ spinlock_t alloc_lock; /* protect quotas */
union {
int res_reserved;
int res_port_rsvd[MLX4_MAX_PORTS];
diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
index cc5d6d0..b1603e2 100644
--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
@@ -284,6 +284,85 @@
}
static void rem_slave_vlans(struct mlx4_dev *dev, int slave);
+static inline int mlx4_grant_resource(struct mlx4_dev *dev, int slave,
+ enum mlx4_resource res_type, int count,
+ int port)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct resource_allocator *res_alloc =
+ &priv->mfunc.master.res_tracker.res_alloc[res_type];
+ int err = -EINVAL;
+ int allocated, free, reserved, guaranteed, from_free;
+
+ if (slave > dev->num_vfs)
+ return -EINVAL;
+
+ spin_lock(&res_alloc->alloc_lock);
+ allocated = (port > 0) ?
+ res_alloc->allocated[(port - 1) * (dev->num_vfs + 1) + slave] :
+ res_alloc->allocated[slave];
+ free = (port > 0) ? res_alloc->res_port_free[port - 1] :
+ res_alloc->res_free;
+ reserved = (port > 0) ? res_alloc->res_port_rsvd[port - 1] :
+ res_alloc->res_reserved;
+ guaranteed = res_alloc->guaranteed[slave];
+
+ if (allocated + count > res_alloc->quota[slave])
+ goto out;
+
+ if (allocated + count <= guaranteed) {
+ err = 0;
+ } else {
+ /* portion may need to be obtained from free area */
+ if (guaranteed - allocated > 0)
+ from_free = count - (guaranteed - allocated);
+ else
+ from_free = count;
+
+ if (free - from_free > reserved)
+ err = 0;
+ }
+
+ if (!err) {
+ /* grant the request */
+ if (port > 0) {
+ res_alloc->allocated[(port - 1) * (dev->num_vfs + 1) + slave] += count;
+ res_alloc->res_port_free[port - 1] -= count;
+ } else {
+ res_alloc->allocated[slave] += count;
+ res_alloc->res_free -= count;
+ }
+ }
+
+out:
+ spin_unlock(&res_alloc->alloc_lock);
+ return err;
+}
+
+static inline void mlx4_release_resource(struct mlx4_dev *dev, int slave,
+ enum mlx4_resource res_type, int count,
+ int port)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct resource_allocator *res_alloc =
+ &priv->mfunc.master.res_tracker.res_alloc[res_type];
+
+ if (slave > dev->num_vfs)
+ return;
+
+ spin_lock(&res_alloc->alloc_lock);
+ if (port > 0) {
+ res_alloc->allocated[(port - 1) * (dev->num_vfs + 1) + slave] -= count;
+ res_alloc->res_port_free[port - 1] += count;
+ } else {
+ res_alloc->allocated[slave] -= count;
+ res_alloc->res_free += count;
+ }
+
+ spin_unlock(&res_alloc->alloc_lock);
+ return;
+}
+
static inline void initialize_res_quotas(struct mlx4_dev *dev,
struct resource_allocator *res_alloc,
enum mlx4_resource res_type,
@@ -373,6 +452,7 @@
!res_alloc->allocated)
goto no_mem_err;
+ spin_lock_init(&res_alloc->alloc_lock);
for (t = 0; t < dev->num_vfs + 1; t++) {
switch (i) {
case RES_QP:
@@ -1399,12 +1479,19 @@
case RES_OP_RESERVE:
count = get_param_l(&in_param);
align = get_param_h(&in_param);
- err = __mlx4_qp_reserve_range(dev, count, align, &base);
+ err = mlx4_grant_resource(dev, slave, RES_QP, count, 0);
if (err)
return err;
+ err = __mlx4_qp_reserve_range(dev, count, align, &base);
+ if (err) {
+ mlx4_release_resource(dev, slave, RES_QP, count, 0);
+ return err;
+ }
+
err = add_res_range(dev, slave, base, count, RES_QP, 0);
if (err) {
+ mlx4_release_resource(dev, slave, RES_QP, count, 0);
__mlx4_qp_release_range(dev, base, count);
return err;
}
@@ -1452,15 +1539,24 @@
return err;
order = get_param_l(&in_param);
+
+ err = mlx4_grant_resource(dev, slave, RES_MTT, 1 << order, 0);
+ if (err)
+ return err;
+
base = __mlx4_alloc_mtt_range(dev, order);
- if (base == -1)
+ if (base == -1) {
+ mlx4_release_resource(dev, slave, RES_MTT, 1 << order, 0);
return -ENOMEM;
+ }
err = add_res_range(dev, slave, base, 1, RES_MTT, order);
- if (err)
+ if (err) {
+ mlx4_release_resource(dev, slave, RES_MTT, 1 << order, 0);
__mlx4_free_mtt_range(dev, base, order);
- else
+ } else {
set_param_l(out_param, base);
+ }
return err;
}
@@ -1475,13 +1571,20 @@
switch (op) {
case RES_OP_RESERVE:
- index = __mlx4_mpt_reserve(dev);
- if (index == -1)
+ err = mlx4_grant_resource(dev, slave, RES_MPT, 1, 0);
+ if (err)
break;
+
+ index = __mlx4_mpt_reserve(dev);
+ if (index == -1) {
+ mlx4_release_resource(dev, slave, RES_MPT, 1, 0);
+ break;
+ }
id = index & mpt_mask(dev);
err = add_res_range(dev, slave, id, 1, RES_MPT, index);
if (err) {
+ mlx4_release_resource(dev, slave, RES_MPT, 1, 0);
__mlx4_mpt_release(dev, index);
break;
}
@@ -1515,12 +1618,19 @@
switch (op) {
case RES_OP_RESERVE_AND_MAP:
- err = __mlx4_cq_alloc_icm(dev, &cqn);
+ err = mlx4_grant_resource(dev, slave, RES_CQ, 1, 0);
if (err)
break;
+ err = __mlx4_cq_alloc_icm(dev, &cqn);
+ if (err) {
+ mlx4_release_resource(dev, slave, RES_CQ, 1, 0);
+ break;
+ }
+
err = add_res_range(dev, slave, cqn, 1, RES_CQ, 0);
if (err) {
+ mlx4_release_resource(dev, slave, RES_CQ, 1, 0);
__mlx4_cq_free_icm(dev, cqn);
break;
}
@@ -1543,12 +1653,19 @@
switch (op) {
case RES_OP_RESERVE_AND_MAP:
- err = __mlx4_srq_alloc_icm(dev, &srqn);
+ err = mlx4_grant_resource(dev, slave, RES_SRQ, 1, 0);
if (err)
break;
+ err = __mlx4_srq_alloc_icm(dev, &srqn);
+ if (err) {
+ mlx4_release_resource(dev, slave, RES_SRQ, 1, 0);
+ break;
+ }
+
err = add_res_range(dev, slave, srqn, 1, RES_SRQ, 0);
if (err) {
+ mlx4_release_resource(dev, slave, RES_SRQ, 1, 0);
__mlx4_srq_free_icm(dev, srqn);
break;
}
@@ -1569,9 +1686,13 @@
struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker;
struct mac_res *res;
+ if (mlx4_grant_resource(dev, slave, RES_MAC, 1, port))
+ return -EINVAL;
res = kzalloc(sizeof *res, GFP_KERNEL);
- if (!res)
+ if (!res) {
+ mlx4_release_resource(dev, slave, RES_MAC, 1, port);
return -ENOMEM;
+ }
res->mac = mac;
res->port = (u8) port;
list_add_tail(&res->list,
@@ -1591,6 +1712,7 @@
list_for_each_entry_safe(res, tmp, mac_list, list) {
if (res->mac == mac && res->port == (u8) port) {
list_del(&res->list);
+ mlx4_release_resource(dev, slave, RES_MAC, 1, port);
kfree(res);
break;
}
@@ -1608,6 +1730,7 @@
list_for_each_entry_safe(res, tmp, mac_list, list) {
list_del(&res->list);
__mlx4_unregister_mac(dev, res->port, res->mac);
+ mlx4_release_resource(dev, slave, RES_MAC, 1, res->port);
kfree(res);
}
}
@@ -1656,9 +1779,13 @@
}
}
+ if (mlx4_grant_resource(dev, slave, RES_VLAN, 1, port))
+ return -EINVAL;
res = kzalloc(sizeof(*res), GFP_KERNEL);
- if (!res)
+ if (!res) {
+ mlx4_release_resource(dev, slave, RES_VLAN, 1, port);
return -ENOMEM;
+ }
res->vlan = vlan;
res->port = (u8) port;
res->vlan_index = vlan_index;
@@ -1682,6 +1809,8 @@
if (res->vlan == vlan && res->port == (u8) port) {
if (!--res->ref_count) {
list_del(&res->list);
+ mlx4_release_resource(dev, slave, RES_VLAN,
+ 1, port);
kfree(res);
}
break;
@@ -1703,6 +1832,7 @@
/* dereference the vlan the num times the slave referenced it */
for (i = 0; i < res->ref_count; i++)
__mlx4_unregister_vlan(dev, res->port, res->vlan);
+ mlx4_release_resource(dev, slave, RES_VLAN, 1, res->port);
kfree(res);
}
}
@@ -1749,15 +1879,23 @@
if (op != RES_OP_RESERVE)
return -EINVAL;
- err = __mlx4_counter_alloc(dev, &index);
+ err = mlx4_grant_resource(dev, slave, RES_COUNTER, 1, 0);
if (err)
return err;
+ err = __mlx4_counter_alloc(dev, &index);
+ if (err) {
+ mlx4_release_resource(dev, slave, RES_COUNTER, 1, 0);
+ return err;
+ }
+
err = add_res_range(dev, slave, index, 1, RES_COUNTER, 0);
- if (err)
+ if (err) {
__mlx4_counter_free(dev, index);
- else
+ mlx4_release_resource(dev, slave, RES_COUNTER, 1, 0);
+ } else {
set_param_l(out_param, index);
+ }
return err;
}
@@ -1864,6 +2002,7 @@
err = rem_res_range(dev, slave, base, count, RES_QP, 0);
if (err)
break;
+ mlx4_release_resource(dev, slave, RES_QP, count, 0);
__mlx4_qp_release_range(dev, base, count);
break;
case RES_OP_MAP_ICM:
@@ -1901,8 +2040,10 @@
base = get_param_l(&in_param);
order = get_param_h(&in_param);
err = rem_res_range(dev, slave, base, 1, RES_MTT, order);
- if (!err)
+ if (!err) {
+ mlx4_release_resource(dev, slave, RES_MTT, 1 << order, 0);
__mlx4_free_mtt_range(dev, base, order);
+ }
return err;
}
@@ -1927,6 +2068,7 @@
err = rem_res_range(dev, slave, id, 1, RES_MPT, 0);
if (err)
break;
+ mlx4_release_resource(dev, slave, RES_MPT, 1, 0);
__mlx4_mpt_release(dev, index);
break;
case RES_OP_MAP_ICM:
@@ -1961,6 +2103,7 @@
if (err)
break;
+ mlx4_release_resource(dev, slave, RES_CQ, 1, 0);
__mlx4_cq_free_icm(dev, cqn);
break;
@@ -1985,6 +2128,7 @@
if (err)
break;
+ mlx4_release_resource(dev, slave, RES_SRQ, 1, 0);
__mlx4_srq_free_icm(dev, srqn);
break;
@@ -2056,6 +2200,7 @@
return err;
__mlx4_counter_free(dev, index);
+ mlx4_release_resource(dev, slave, RES_COUNTER, 1, 0);
return err;
}
@@ -3785,6 +3930,11 @@
&tracker->res_tree[RES_QP]);
list_del(&qp->com.list);
spin_unlock_irq(mlx4_tlock(dev));
+ if (!valid_reserved(dev, slave, qpn)) {
+ __mlx4_qp_release_range(dev, qpn, 1);
+ mlx4_release_resource(dev, slave,
+ RES_QP, 1, 0);
+ }
kfree(qp);
state = 0;
break;
@@ -3856,6 +4006,8 @@
&tracker->res_tree[RES_SRQ]);
list_del(&srq->com.list);
spin_unlock_irq(mlx4_tlock(dev));
+ mlx4_release_resource(dev, slave,
+ RES_SRQ, 1, 0);
kfree(srq);
state = 0;
break;
@@ -3922,6 +4074,8 @@
&tracker->res_tree[RES_CQ]);
list_del(&cq->com.list);
spin_unlock_irq(mlx4_tlock(dev));
+ mlx4_release_resource(dev, slave,
+ RES_CQ, 1, 0);
kfree(cq);
state = 0;
break;
@@ -3985,6 +4139,8 @@
&tracker->res_tree[RES_MPT]);
list_del(&mpt->com.list);
spin_unlock_irq(mlx4_tlock(dev));
+ mlx4_release_resource(dev, slave,
+ RES_MPT, 1, 0);
kfree(mpt);
state = 0;
break;
@@ -4054,6 +4210,8 @@
&tracker->res_tree[RES_MTT]);
list_del(&mtt->com.list);
spin_unlock_irq(mlx4_tlock(dev));
+ mlx4_release_resource(dev, slave, RES_MTT,
+ 1 << mtt->order, 0);
kfree(mtt);
state = 0;
break;
@@ -4212,6 +4370,7 @@
list_del(&counter->com.list);
kfree(counter);
__mlx4_counter_free(dev, index);
+ mlx4_release_resource(dev, slave, RES_COUNTER, 1, 0);
}
}
spin_unlock_irq(mlx4_tlock(dev));