perf_counter: Optimize perf_counter_alloc()'s inherit case

We don't need to add usage counts for swcounter and attr usage
models for inherited counters since the parent counter will
always have one, which suffices to generate the needed output.

This avoids up to 3 global atomic increments per inherited
counter.

LKML-Reference: <new-submission>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 0a45490..c2b19c1 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1508,11 +1508,13 @@
 {
 	perf_pending_sync(counter);
 
-	atomic_dec(&nr_counters);
-	if (counter->attr.mmap)
-		atomic_dec(&nr_mmap_counters);
-	if (counter->attr.comm)
-		atomic_dec(&nr_comm_counters);
+	if (!counter->parent) {
+		atomic_dec(&nr_counters);
+		if (counter->attr.mmap)
+			atomic_dec(&nr_mmap_counters);
+		if (counter->attr.comm)
+			atomic_dec(&nr_comm_counters);
+	}
 
 	if (counter->destroy)
 		counter->destroy(counter);
@@ -3515,6 +3517,8 @@
 {
 	u64 event = counter->attr.config;
 
+	WARN_ON(counter->parent);
+
 	atomic_dec(&perf_swcounter_enabled[event]);
 }
 
@@ -3551,8 +3555,10 @@
 	case PERF_COUNT_SW_PAGE_FAULTS_MAJ:
 	case PERF_COUNT_SW_CONTEXT_SWITCHES:
 	case PERF_COUNT_SW_CPU_MIGRATIONS:
-		atomic_inc(&perf_swcounter_enabled[event]);
-		counter->destroy = sw_perf_counter_destroy;
+		if (!counter->parent) {
+			atomic_inc(&perf_swcounter_enabled[event]);
+			counter->destroy = sw_perf_counter_destroy;
+		}
 		pmu = &perf_ops_generic;
 		break;
 	}
@@ -3663,11 +3669,13 @@
 
 	counter->pmu = pmu;
 
-	atomic_inc(&nr_counters);
-	if (counter->attr.mmap)
-		atomic_inc(&nr_mmap_counters);
-	if (counter->attr.comm)
-		atomic_inc(&nr_comm_counters);
+	if (!counter->parent) {
+		atomic_inc(&nr_counters);
+		if (counter->attr.mmap)
+			atomic_inc(&nr_mmap_counters);
+		if (counter->attr.comm)
+			atomic_inc(&nr_comm_counters);
+	}
 
 	return counter;
 }