openvswitch: Per cpu flow stats.

With mega flow implementation ovs flow can be shared between
multiple CPUs which makes stats updates highly contended
operation. This patch uses per-CPU stats in cases where a flow
is likely to be shared (if there is a wildcard in the 5-tuple
and therefore likely to be spread by RSS). In other situations,
it uses the current strategy, saving memory and allocation time.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Signed-off-by: Jesse Gross <jesse@nicira.com>
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index b409f52..16f4b46 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -35,6 +35,7 @@
 #include <linux/ip.h>
 #include <linux/ipv6.h>
 #include <linux/sctp.h>
+#include <linux/smp.h>
 #include <linux/tcp.h>
 #include <linux/udp.h>
 #include <linux/icmp.h>
@@ -60,10 +61,16 @@
 
 #define TCP_FLAGS_BE16(tp) (*(__be16 *)&tcp_flag_word(tp) & htons(0x0FFF))
 
-void ovs_flow_used(struct sw_flow *flow, struct sk_buff *skb)
+void ovs_flow_stats_update(struct sw_flow *flow, struct sk_buff *skb)
 {
+	struct flow_stats *stats;
 	__be16 tcp_flags = 0;
 
+	if (!flow->stats.is_percpu)
+		stats = flow->stats.stat;
+	else
+		stats = this_cpu_ptr(flow->stats.cpu_stats);
+
 	if ((flow->key.eth.type == htons(ETH_P_IP) ||
 	     flow->key.eth.type == htons(ETH_P_IPV6)) &&
 	    flow->key.ip.proto == IPPROTO_TCP &&
@@ -71,12 +78,87 @@
 		tcp_flags = TCP_FLAGS_BE16(tcp_hdr(skb));
 	}
 
-	spin_lock(&flow->lock);
-	flow->used = jiffies;
-	flow->packet_count++;
-	flow->byte_count += skb->len;
-	flow->tcp_flags |= tcp_flags;
-	spin_unlock(&flow->lock);
+	spin_lock(&stats->lock);
+	stats->used = jiffies;
+	stats->packet_count++;
+	stats->byte_count += skb->len;
+	stats->tcp_flags |= tcp_flags;
+	spin_unlock(&stats->lock);
+}
+
+static void stats_read(struct flow_stats *stats,
+		       struct ovs_flow_stats *ovs_stats,
+		       unsigned long *used, __be16 *tcp_flags)
+{
+	spin_lock(&stats->lock);
+	if (time_after(stats->used, *used))
+		*used = stats->used;
+	*tcp_flags |= stats->tcp_flags;
+	ovs_stats->n_packets += stats->packet_count;
+	ovs_stats->n_bytes += stats->byte_count;
+	spin_unlock(&stats->lock);
+}
+
+void ovs_flow_stats_get(struct sw_flow *flow, struct ovs_flow_stats *ovs_stats,
+			unsigned long *used, __be16 *tcp_flags)
+{
+	int cpu, cur_cpu;
+
+	*used = 0;
+	*tcp_flags = 0;
+	memset(ovs_stats, 0, sizeof(*ovs_stats));
+
+	if (!flow->stats.is_percpu) {
+		stats_read(flow->stats.stat, ovs_stats, used, tcp_flags);
+	} else {
+		cur_cpu = get_cpu();
+		for_each_possible_cpu(cpu) {
+			struct flow_stats *stats;
+
+			if (cpu == cur_cpu)
+				local_bh_disable();
+
+			stats = per_cpu_ptr(flow->stats.cpu_stats, cpu);
+			stats_read(stats, ovs_stats, used, tcp_flags);
+
+			if (cpu == cur_cpu)
+				local_bh_enable();
+		}
+		put_cpu();
+	}
+}
+
+static void stats_reset(struct flow_stats *stats)
+{
+	spin_lock(&stats->lock);
+	stats->used = 0;
+	stats->packet_count = 0;
+	stats->byte_count = 0;
+	stats->tcp_flags = 0;
+	spin_unlock(&stats->lock);
+}
+
+void ovs_flow_stats_clear(struct sw_flow *flow)
+{
+	int cpu, cur_cpu;
+
+	if (!flow->stats.is_percpu) {
+		stats_reset(flow->stats.stat);
+	} else {
+		cur_cpu = get_cpu();
+
+		for_each_possible_cpu(cpu) {
+
+			if (cpu == cur_cpu)
+				local_bh_disable();
+
+			stats_reset(per_cpu_ptr(flow->stats.cpu_stats, cpu));
+
+			if (cpu == cur_cpu)
+				local_bh_enable();
+		}
+		put_cpu();
+	}
 }
 
 static int check_header(struct sk_buff *skb, int len)