[NET]: reduce per cpu ram used for loopback stats

We dont need a full struct net_device_stats (currently 23 long : 184 bytes on 
x86_64) per possible CPU, but only two counters : bytes and packets

We save few CPU cycles too in loopback_xmit() not updating 4 fields, but 2. 

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c
index 4178b4b1..93fbea1 100644
--- a/drivers/net/loopback.c
+++ b/drivers/net/loopback.c
@@ -58,7 +58,11 @@
 #include <linux/tcp.h>
 #include <linux/percpu.h>
 
-static DEFINE_PER_CPU(struct net_device_stats, loopback_stats);
+struct pcpu_lstats {
+	unsigned long packets;
+	unsigned long bytes;
+};
+static DEFINE_PER_CPU(struct pcpu_lstats, pcpu_lstats);
 
 #define LOOPBACK_OVERHEAD (128 + MAX_HEADER + 16 + 16)
 
@@ -128,7 +132,7 @@
  */
 static int loopback_xmit(struct sk_buff *skb, struct net_device *dev)
 {
-	struct net_device_stats *lb_stats;
+	struct pcpu_lstats *lb_stats;
 
 	skb_orphan(skb);
 
@@ -149,11 +153,9 @@
 #endif
 	dev->last_rx = jiffies;
 
-	lb_stats = &per_cpu(loopback_stats, get_cpu());
-	lb_stats->rx_bytes += skb->len;
-	lb_stats->tx_bytes = lb_stats->rx_bytes;
-	lb_stats->rx_packets++;
-	lb_stats->tx_packets = lb_stats->rx_packets;
+	lb_stats = &per_cpu(pcpu_lstats, get_cpu());
+	lb_stats->bytes += skb->len;
+	lb_stats->packets++;
 	put_cpu();
 
 	netif_rx(skb);
@@ -166,20 +168,21 @@
 static struct net_device_stats *get_stats(struct net_device *dev)
 {
 	struct net_device_stats *stats = &loopback_stats;
+	unsigned long bytes = 0;
+	unsigned long packets = 0;
 	int i;
 
-	memset(stats, 0, sizeof(struct net_device_stats));
-
 	for_each_possible_cpu(i) {
-		struct net_device_stats *lb_stats;
+		const struct pcpu_lstats *lb_stats;
 
-		lb_stats = &per_cpu(loopback_stats, i);
-		stats->rx_bytes   += lb_stats->rx_bytes;
-		stats->tx_bytes   += lb_stats->tx_bytes;
-		stats->rx_packets += lb_stats->rx_packets;
-		stats->tx_packets += lb_stats->tx_packets;
+		lb_stats = &per_cpu(pcpu_lstats, i);
+		bytes   += lb_stats->bytes;
+		packets += lb_stats->packets;
 	}
-
+	stats->rx_packets = packets;
+	stats->tx_packets = packets;
+	stats->rx_bytes = bytes;
+	stats->tx_bytes = bytes;
 	return stats;
 }