[PATCH] ibmveth change buffer pools dynamically
This patch provides a sysfs interface to change some properties of the
ibmveth buffer pools (size of the buffers, number of buffers per pool,
and whether a pool is active). Ethernet drivers use ethtool to provide
this type of functionality. However, the buffers in the ibmveth driver
can have an arbitrary size (not only regular, mini, and jumbo which are
the only sizes that ethtool can change), and also ibmveth can have an
arbitrary number of buffer pools
Under heavy load we have seen dropped packets which obviously kills TCP
performance. We have created several fixes that mitigate this issue,
but we definitely need a way of changing the number of buffers for an
adapter dynamically. Also, changing the size of the buffers allows
users to change the MTU to something big (bigger than a jumbo frame)
greatly improving performance on partition to partition transfers.
The patch creates directories pool1...pool4 in the device directory in
sysfs, each with files: num, size, and active (which default to the
values in the mainline version).
Comments and suggestions are welcome...
--
Santiago A. Leon
Power Linux Development
IBM Linux Technology Center
Signed-off-by: Jeff Garzik <jeff@garzik.org>
diff --git a/drivers/net/ibmveth.c b/drivers/net/ibmveth.c
index 52d0102..71c74fb 100644
--- a/drivers/net/ibmveth.c
+++ b/drivers/net/ibmveth.c
@@ -96,6 +96,7 @@
static void ibmveth_proc_unregister_adapter(struct ibmveth_adapter *adapter);
static irqreturn_t ibmveth_interrupt(int irq, void *dev_instance, struct pt_regs *regs);
static inline void ibmveth_rxq_harvest_buffer(struct ibmveth_adapter *adapter);
+static struct kobj_type ktype_veth_pool;
#ifdef CONFIG_PROC_FS
#define IBMVETH_PROC_DIR "net/ibmveth"
@@ -133,12 +134,13 @@
}
/* setup the initial settings for a buffer pool */
-static void ibmveth_init_buffer_pool(struct ibmveth_buff_pool *pool, u32 pool_index, u32 pool_size, u32 buff_size)
+static void ibmveth_init_buffer_pool(struct ibmveth_buff_pool *pool, u32 pool_index, u32 pool_size, u32 buff_size, u32 pool_active)
{
pool->size = pool_size;
pool->index = pool_index;
pool->buff_size = buff_size;
pool->threshold = pool_size / 2;
+ pool->active = pool_active;
}
/* allocate and setup an buffer pool - called during open */
@@ -180,7 +182,6 @@
atomic_set(&pool->available, 0);
pool->producer_index = 0;
pool->consumer_index = 0;
- pool->active = 0;
return 0;
}
@@ -301,7 +302,6 @@
kfree(pool->skbuff);
pool->skbuff = NULL;
}
- pool->active = 0;
}
/* remove a buffer from a pool */
@@ -433,7 +433,9 @@
}
for(i = 0; i<IbmVethNumBufferPools; i++)
- ibmveth_free_buffer_pool(adapter, &adapter->rx_buff_pool[i]);
+ if (adapter->rx_buff_pool[i].active)
+ ibmveth_free_buffer_pool(adapter,
+ &adapter->rx_buff_pool[i]);
}
static int ibmveth_open(struct net_device *netdev)
@@ -489,9 +491,6 @@
adapter->rx_queue.num_slots = rxq_entries;
adapter->rx_queue.toggle = 1;
- /* call change_mtu to init the buffer pools based in initial mtu */
- ibmveth_change_mtu(netdev, netdev->mtu);
-
memcpy(&mac_address, netdev->dev_addr, netdev->addr_len);
mac_address = mac_address >> 16;
@@ -522,6 +521,17 @@
return -ENONET;
}
+ for(i = 0; i<IbmVethNumBufferPools; i++) {
+ if(!adapter->rx_buff_pool[i].active)
+ continue;
+ if (ibmveth_alloc_buffer_pool(&adapter->rx_buff_pool[i])) {
+ ibmveth_error_printk("unable to alloc pool\n");
+ adapter->rx_buff_pool[i].active = 0;
+ ibmveth_cleanup(adapter);
+ return -ENOMEM ;
+ }
+ }
+
ibmveth_debug_printk("registering irq 0x%x\n", netdev->irq);
if((rc = request_irq(netdev->irq, &ibmveth_interrupt, 0, netdev->name, netdev)) != 0) {
ibmveth_error_printk("unable to request irq 0x%x, rc %d\n", netdev->irq, rc);
@@ -550,7 +560,8 @@
ibmveth_debug_printk("close starting\n");
- netif_stop_queue(netdev);
+ if (!adapter->pool_config)
+ netif_stop_queue(netdev);
free_irq(netdev->irq, netdev);
@@ -876,46 +887,22 @@
static int ibmveth_change_mtu(struct net_device *dev, int new_mtu)
{
struct ibmveth_adapter *adapter = dev->priv;
+ int new_mtu_oh = new_mtu + IBMVETH_BUFF_OH;
int i;
- int prev_smaller = 1;
- if ((new_mtu < 68) ||
- (new_mtu > (pool_size[IbmVethNumBufferPools-1]) - IBMVETH_BUFF_OH))
+ if (new_mtu < IBMVETH_MAX_MTU)
return -EINVAL;
+ /* Look for an active buffer pool that can hold the new MTU */
for(i = 0; i<IbmVethNumBufferPools; i++) {
- int activate = 0;
- if (new_mtu > (pool_size[i] - IBMVETH_BUFF_OH)) {
- activate = 1;
- prev_smaller= 1;
- } else {
- if (prev_smaller)
- activate = 1;
- prev_smaller= 0;
+ if (!adapter->rx_buff_pool[i].active)
+ continue;
+ if (new_mtu_oh < adapter->rx_buff_pool[i].buff_size) {
+ dev->mtu = new_mtu;
+ return 0;
}
-
- if (activate && !adapter->rx_buff_pool[i].active) {
- struct ibmveth_buff_pool *pool =
- &adapter->rx_buff_pool[i];
- if(ibmveth_alloc_buffer_pool(pool)) {
- ibmveth_error_printk("unable to alloc pool\n");
- return -ENOMEM;
- }
- adapter->rx_buff_pool[i].active = 1;
- } else if (!activate && adapter->rx_buff_pool[i].active) {
- adapter->rx_buff_pool[i].active = 0;
- h_free_logical_lan_buffer(adapter->vdev->unit_address,
- (u64)pool_size[i]);
- }
-
}
-
- /* kick the interrupt handler so that the new buffer pools get
- replenished or deallocated */
- ibmveth_interrupt(dev->irq, dev, NULL);
-
- dev->mtu = new_mtu;
- return 0;
+ return -EINVAL;
}
static int __devinit ibmveth_probe(struct vio_dev *dev, const struct vio_device_id *id)
@@ -960,6 +947,7 @@
adapter->vdev = dev;
adapter->netdev = netdev;
adapter->mcastFilterSize= *mcastFilterSize_p;
+ adapter->pool_config = 0;
/* Some older boxes running PHYP non-natively have an OF that
returns a 8-byte local-mac-address field (and the first
@@ -994,9 +982,16 @@
memcpy(&netdev->dev_addr, &adapter->mac_addr, netdev->addr_len);
- for(i = 0; i<IbmVethNumBufferPools; i++)
+ for(i = 0; i<IbmVethNumBufferPools; i++) {
+ struct kobject *kobj = &adapter->rx_buff_pool[i].kobj;
ibmveth_init_buffer_pool(&adapter->rx_buff_pool[i], i,
- pool_count[i], pool_size[i]);
+ pool_count[i], pool_size[i],
+ pool_active[i]);
+ kobj->parent = &dev->dev.kobj;
+ sprintf(kobj->name, "pool%d", i);
+ kobj->ktype = &ktype_veth_pool;
+ kobject_register(kobj);
+ }
ibmveth_debug_printk("adapter @ 0x%p\n", adapter);
@@ -1025,6 +1020,10 @@
{
struct net_device *netdev = dev->dev.driver_data;
struct ibmveth_adapter *adapter = netdev->priv;
+ int i;
+
+ for(i = 0; i<IbmVethNumBufferPools; i++)
+ kobject_unregister(&adapter->rx_buff_pool[i].kobj);
unregister_netdev(netdev);
@@ -1169,6 +1168,132 @@
}
#endif /* CONFIG_PROC_FS */
+static struct attribute veth_active_attr;
+static struct attribute veth_num_attr;
+static struct attribute veth_size_attr;
+
+static ssize_t veth_pool_show(struct kobject * kobj,
+ struct attribute * attr, char * buf)
+{
+ struct ibmveth_buff_pool *pool = container_of(kobj,
+ struct ibmveth_buff_pool,
+ kobj);
+
+ if (attr == &veth_active_attr)
+ return sprintf(buf, "%d\n", pool->active);
+ else if (attr == &veth_num_attr)
+ return sprintf(buf, "%d\n", pool->size);
+ else if (attr == &veth_size_attr)
+ return sprintf(buf, "%d\n", pool->buff_size);
+ return 0;
+}
+
+static ssize_t veth_pool_store(struct kobject * kobj, struct attribute * attr,
+const char * buf, size_t count)
+{
+ struct ibmveth_buff_pool *pool = container_of(kobj,
+ struct ibmveth_buff_pool,
+ kobj);
+ struct net_device *netdev =
+ container_of(kobj->parent, struct device, kobj)->driver_data;
+ struct ibmveth_adapter *adapter = netdev->priv;
+ long value = simple_strtol(buf, NULL, 10);
+ long rc;
+
+ if (attr == &veth_active_attr) {
+ if (value && !pool->active) {
+ if(ibmveth_alloc_buffer_pool(pool)) {
+ ibmveth_error_printk("unable to alloc pool\n");
+ return -ENOMEM;
+ }
+ pool->active = 1;
+ adapter->pool_config = 1;
+ ibmveth_close(netdev);
+ adapter->pool_config = 0;
+ if ((rc = ibmveth_open(netdev)))
+ return rc;
+ } else if (!value && pool->active) {
+ int mtu = netdev->mtu + IBMVETH_BUFF_OH;
+ int i;
+ /* Make sure there is a buffer pool with buffers that
+ can hold a packet of the size of the MTU */
+ for(i = 0; i<IbmVethNumBufferPools; i++) {
+ if (pool == &adapter->rx_buff_pool[i])
+ continue;
+ if (!adapter->rx_buff_pool[i].active)
+ continue;
+ if (mtu < adapter->rx_buff_pool[i].buff_size) {
+ pool->active = 0;
+ h_free_logical_lan_buffer(adapter->
+ vdev->
+ unit_address,
+ pool->
+ buff_size);
+ }
+ }
+ if (pool->active) {
+ ibmveth_error_printk("no active pool >= MTU\n");
+ return -EPERM;
+ }
+ }
+ } else if (attr == &veth_num_attr) {
+ if (value <= 0 || value > IBMVETH_MAX_POOL_COUNT)
+ return -EINVAL;
+ else {
+ adapter->pool_config = 1;
+ ibmveth_close(netdev);
+ adapter->pool_config = 0;
+ pool->size = value;
+ if ((rc = ibmveth_open(netdev)))
+ return rc;
+ }
+ } else if (attr == &veth_size_attr) {
+ if (value <= IBMVETH_BUFF_OH || value > IBMVETH_MAX_BUF_SIZE)
+ return -EINVAL;
+ else {
+ adapter->pool_config = 1;
+ ibmveth_close(netdev);
+ adapter->pool_config = 0;
+ pool->buff_size = value;
+ if ((rc = ibmveth_open(netdev)))
+ return rc;
+ }
+ }
+
+ /* kick the interrupt handler to allocate/deallocate pools */
+ ibmveth_interrupt(netdev->irq, netdev, NULL);
+ return count;
+}
+
+
+#define ATTR(_name, _mode) \
+ struct attribute veth_##_name##_attr = { \
+ .name = __stringify(_name), .mode = _mode, .owner = THIS_MODULE \
+ };
+
+static ATTR(active, 0644);
+static ATTR(num, 0644);
+static ATTR(size, 0644);
+
+static struct attribute * veth_pool_attrs[] = {
+ &veth_active_attr,
+ &veth_num_attr,
+ &veth_size_attr,
+ NULL,
+};
+
+static struct sysfs_ops veth_pool_ops = {
+ .show = veth_pool_show,
+ .store = veth_pool_store,
+};
+
+static struct kobj_type ktype_veth_pool = {
+ .release = NULL,
+ .sysfs_ops = &veth_pool_ops,
+ .default_attrs = veth_pool_attrs,
+};
+
+
static struct vio_device_id ibmveth_device_table[] __devinitdata= {
{ "network", "IBM,l-lan"},
{ "", "" }