sched/topology: Make Energy Aware Scheduling depend on schedutil
Energy Aware Scheduling (EAS) is designed with the assumption that
frequencies of CPUs follow their utilization value. When using a CPUFreq
governor other than schedutil, the chances of this assumption being true
are small, if any. When schedutil is being used, EAS' predictions are at
least consistent with the frequency requests. Although those requests
have no guarantees to be honored by the hardware, they should at least
guide DVFS in the right direction and provide some hope in regards to the
EAS model being accurate.
To make sure EAS is only used in a sane configuration, create a strong
dependency on schedutil being used. Since having sugov compiled-in does
not provide that guarantee, make CPUFreq call a scheduler function on
governor changes hence letting it rebuild the scheduling domains, check
the governors of the online CPUs, and enable/disable EAS accordingly.
Signed-off-by: Quentin Perret <quentin.perret@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rafael J. Wysocki <rjw@rjwysocki.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: adharmap@codeaurora.org
Cc: chris.redpath@arm.com
Cc: currojerez@riseup.net
Cc: dietmar.eggemann@arm.com
Cc: edubezval@gmail.com
Cc: gregkh@linuxfoundation.org
Cc: javi.merino@kernel.org
Cc: joel@joelfernandes.org
Cc: juri.lelli@redhat.com
Cc: morten.rasmussen@arm.com
Cc: patrick.bellasi@arm.com
Cc: pkondeti@codeaurora.org
Cc: skannan@codeaurora.org
Cc: smuckle@google.com
Cc: srinivas.pandruvada@linux.intel.com
Cc: thara.gopinath@linaro.org
Cc: tkjos@google.com
Cc: valentin.schneider@arm.com
Cc: vincent.guittot@linaro.org
Cc: viresh.kumar@linaro.org
Link: https://lkml.kernel.org/r/20181203095628.11858-9-quentin.perret@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index 6ddb804..0a5a1d3a 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -201,7 +201,10 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent)
return 1;
}
-#ifdef CONFIG_ENERGY_MODEL
+#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
+DEFINE_MUTEX(sched_energy_mutex);
+bool sched_energy_update;
+
static void free_pd(struct perf_domain *pd)
{
struct perf_domain *tmp;
@@ -275,6 +278,7 @@ static void destroy_perf_domain_rcu(struct rcu_head *rp)
* 1. an Energy Model (EM) is available;
* 2. the SD_ASYM_CPUCAPACITY flag is set in the sched_domain hierarchy.
* 3. the EM complexity is low enough to keep scheduling overheads low;
+ * 4. schedutil is driving the frequency of all CPUs of the rd;
*
* The complexity of the Energy Model is defined as:
*
@@ -294,12 +298,15 @@ static void destroy_perf_domain_rcu(struct rcu_head *rp)
*/
#define EM_MAX_COMPLEXITY 2048
+extern struct cpufreq_governor schedutil_gov;
static void build_perf_domains(const struct cpumask *cpu_map)
{
int i, nr_pd = 0, nr_cs = 0, nr_cpus = cpumask_weight(cpu_map);
struct perf_domain *pd = NULL, *tmp;
int cpu = cpumask_first(cpu_map);
struct root_domain *rd = cpu_rq(cpu)->rd;
+ struct cpufreq_policy *policy;
+ struct cpufreq_governor *gov;
/* EAS is enabled for asymmetric CPU capacity topologies. */
if (!per_cpu(sd_asym_cpucapacity, cpu)) {
@@ -315,6 +322,19 @@ static void build_perf_domains(const struct cpumask *cpu_map)
if (find_pd(pd, i))
continue;
+ /* Do not attempt EAS if schedutil is not being used. */
+ policy = cpufreq_cpu_get(i);
+ if (!policy)
+ goto free;
+ gov = policy->governor;
+ cpufreq_cpu_put(policy);
+ if (gov != &schedutil_gov) {
+ if (rd->pd)
+ pr_warn("rd %*pbl: Disabling EAS, schedutil is mandatory\n",
+ cpumask_pr_args(cpu_map));
+ goto free;
+ }
+
/* Create the new pd and add it to the local list. */
tmp = pd_init(i);
if (!tmp)
@@ -356,7 +376,7 @@ static void build_perf_domains(const struct cpumask *cpu_map)
}
#else
static void free_pd(struct perf_domain *pd) { }
-#endif /* CONFIG_ENERGY_MODEL */
+#endif /* CONFIG_ENERGY_MODEL && CONFIG_CPU_FREQ_GOV_SCHEDUTIL*/
static void free_rootdomain(struct rcu_head *rcu)
{
@@ -2152,10 +2172,10 @@ void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
;
}
-#ifdef CONFIG_ENERGY_MODEL
+#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
/* Build perf. domains: */
for (i = 0; i < ndoms_new; i++) {
- for (j = 0; j < n; j++) {
+ for (j = 0; j < n && !sched_energy_update; j++) {
if (cpumask_equal(doms_new[i], doms_cur[j]) &&
cpu_rq(cpumask_first(doms_cur[j]))->rd->pd)
goto match3;