sched: update aggregate when holding the RQs
It was observed that in __update_group_shares_cpu()
rq_weight > aggregate()->rq_weight
This is caused by forks/wakeups in between the initial aggregate pass and
locking of the RQs for load balance. To avoid this situation partially re-do
the aggregation once we have the RQs locked (which avoids new tasks from
appearing).
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
Cc: Mike Galbraith <efault@gmx.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/kernel/sched.c b/kernel/sched.c
index 160d3c2..dae2019 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1721,6 +1721,11 @@
aggregate_group_set_shares(tg, cpu, sd);
}
+static void
+aggregate_get_nop(struct task_group *tg, int cpu, struct sched_domain *sd)
+{
+}
+
static DEFINE_PER_CPU(spinlock_t, aggregate_lock);
static void __init init_aggregate(void)
@@ -1740,6 +1745,11 @@
return 1;
}
+static void update_aggregate(int cpu, struct sched_domain *sd)
+{
+ aggregate_walk_tree(aggregate_get_down, aggregate_get_nop, cpu, sd);
+}
+
static void put_aggregate(int cpu, struct sched_domain *sd)
{
spin_unlock(&per_cpu(aggregate_lock, cpu));
@@ -1761,6 +1771,10 @@
return 0;
}
+static inline void update_aggregate(int cpu, struct sched_domain *sd)
+{
+}
+
static inline void put_aggregate(int cpu, struct sched_domain *sd)
{
}
@@ -2192,6 +2206,12 @@
int load_idx = sd->forkexec_idx;
int imbalance = 100 + (sd->imbalance_pct-100)/2;
+ /*
+ * now that we have both rqs locked the rq weight won't change
+ * anymore - so update the stats.
+ */
+ update_aggregate(this_cpu, sd);
+
do {
unsigned long load, avg_load;
int local_group;