Merge tag 'pm-5.2-rc1-2' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
Pull more power management updates from Rafael Wysocki:
"These fix a recent regression causing kernels built with CONFIG_PM
unset to crash on systems that support the Performance and Energy Bias
Hint (EPB), clean up the cpufreq core and some users of transition
notifiers and introduce a new power domain flag into the generic power
domains framework (genpd).
Specifics:
- Fix recent regression causing kernels built with CONFIG_PM unset to
crash on systems that support the Performance and Energy Bias Hint
(EPB) by avoiding to compile the EPB-related code depending on
CONFIG_PM when it is unset (Rafael Wysocki).
- Clean up the transition notifier invocation code in the cpufreq
core and change some users of cpufreq transition notifiers
accordingly (Viresh Kumar).
- Change MAINTAINERS to cover the schedutil governor as part of
cpufreq (Viresh Kumar).
- Simplify cpufreq_init_policy() to avoid redundant computations (Yue
Hu).
- Add explanatory comment to the cpufreq core (Rafael Wysocki).
- Introduce a new flag, GENPD_FLAG_RPM_ALWAYS_ON, to the generic
power domains (genpd) framework along with the first user of it
(Leonard Crestez)"
* tag 'pm-5.2-rc1-2' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm:
soc: imx: gpc: Use GENPD_FLAG_RPM_ALWAYS_ON for ERR009619
PM / Domains: Add GENPD_FLAG_RPM_ALWAYS_ON flag
cpufreq: Update MAINTAINERS to include schedutil governor
cpufreq: Don't find governor for setpolicy drivers in cpufreq_init_policy()
cpufreq: Explain the kobject_put() in cpufreq_policy_alloc()
cpufreq: Call transition notifier only once for each policy
x86: intel_epb: Take CONFIG_PM into account
diff --git a/MAINTAINERS b/MAINTAINERS
index 0d31d2b..ee6cf4d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4133,7 +4133,9 @@
F: Documentation/cpu-freq/
F: Documentation/devicetree/bindings/cpufreq/
F: drivers/cpufreq/
+F: kernel/sched/cpufreq*.c
F: include/linux/cpufreq.h
+F: include/linux/sched/cpufreq.h
F: tools/testing/selftests/cpufreq/
CPU FREQUENCY DRIVERS - ARM BIG LITTLE
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index c93fe0f..ebc5380 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -758,15 +758,20 @@ static int cpufreq_callback(struct notifier_block *nb,
unsigned long val, void *data)
{
struct cpufreq_freqs *freq = data;
- int cpu = freq->cpu;
+ struct cpumask *cpus = freq->policy->cpus;
+ int cpu, first = cpumask_first(cpus);
+ unsigned int lpj;
if (freq->flags & CPUFREQ_CONST_LOOPS)
return NOTIFY_OK;
- if (!per_cpu(l_p_j_ref, cpu)) {
- per_cpu(l_p_j_ref, cpu) =
- per_cpu(cpu_data, cpu).loops_per_jiffy;
- per_cpu(l_p_j_ref_freq, cpu) = freq->old;
+ if (!per_cpu(l_p_j_ref, first)) {
+ for_each_cpu(cpu, cpus) {
+ per_cpu(l_p_j_ref, cpu) =
+ per_cpu(cpu_data, cpu).loops_per_jiffy;
+ per_cpu(l_p_j_ref_freq, cpu) = freq->old;
+ }
+
if (!global_l_p_j_ref) {
global_l_p_j_ref = loops_per_jiffy;
global_l_p_j_ref_freq = freq->old;
@@ -778,10 +783,11 @@ static int cpufreq_callback(struct notifier_block *nb,
loops_per_jiffy = cpufreq_scale(global_l_p_j_ref,
global_l_p_j_ref_freq,
freq->new);
- per_cpu(cpu_data, cpu).loops_per_jiffy =
- cpufreq_scale(per_cpu(l_p_j_ref, cpu),
- per_cpu(l_p_j_ref_freq, cpu),
- freq->new);
+
+ lpj = cpufreq_scale(per_cpu(l_p_j_ref, first),
+ per_cpu(l_p_j_ref_freq, first), freq->new);
+ for_each_cpu(cpu, cpus)
+ per_cpu(cpu_data, cpu).loops_per_jiffy = lpj;
}
return NOTIFY_OK;
}
diff --git a/arch/sparc/kernel/time_64.c b/arch/sparc/kernel/time_64.c
index 3eb7794..89fb05f 100644
--- a/arch/sparc/kernel/time_64.c
+++ b/arch/sparc/kernel/time_64.c
@@ -653,19 +653,23 @@ static int sparc64_cpufreq_notifier(struct notifier_block *nb, unsigned long val
void *data)
{
struct cpufreq_freqs *freq = data;
- unsigned int cpu = freq->cpu;
- struct freq_table *ft = &per_cpu(sparc64_freq_table, cpu);
+ unsigned int cpu;
+ struct freq_table *ft;
- if (!ft->ref_freq) {
- ft->ref_freq = freq->old;
- ft->clock_tick_ref = cpu_data(cpu).clock_tick;
- }
- if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) ||
- (val == CPUFREQ_POSTCHANGE && freq->old > freq->new)) {
- cpu_data(cpu).clock_tick =
- cpufreq_scale(ft->clock_tick_ref,
- ft->ref_freq,
- freq->new);
+ for_each_cpu(cpu, freq->policy->cpus) {
+ ft = &per_cpu(sparc64_freq_table, cpu);
+
+ if (!ft->ref_freq) {
+ ft->ref_freq = freq->old;
+ ft->clock_tick_ref = cpu_data(cpu).clock_tick;
+ }
+
+ if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) ||
+ (val == CPUFREQ_POSTCHANGE && freq->old > freq->new)) {
+ cpu_data(cpu).clock_tick =
+ cpufreq_scale(ft->clock_tick_ref, ft->ref_freq,
+ freq->new);
+ }
}
return 0;
diff --git a/arch/x86/kernel/cpu/intel_epb.c b/arch/x86/kernel/cpu/intel_epb.c
index f4dd733..ebb14a2 100644
--- a/arch/x86/kernel/cpu/intel_epb.c
+++ b/arch/x86/kernel/cpu/intel_epb.c
@@ -97,6 +97,7 @@ static void intel_epb_restore(void)
wrmsrl(MSR_IA32_ENERGY_PERF_BIAS, (epb & ~EPB_MASK) | val);
}
+#ifdef CONFIG_PM
static struct syscore_ops intel_epb_syscore_ops = {
.suspend = intel_epb_save,
.resume = intel_epb_restore,
@@ -193,6 +194,25 @@ static int intel_epb_offline(unsigned int cpu)
return 0;
}
+static inline void register_intel_ebp_syscore_ops(void)
+{
+ register_syscore_ops(&intel_epb_syscore_ops);
+}
+#else /* !CONFIG_PM */
+static int intel_epb_online(unsigned int cpu)
+{
+ intel_epb_restore();
+ return 0;
+}
+
+static int intel_epb_offline(unsigned int cpu)
+{
+ return intel_epb_save();
+}
+
+static inline void register_intel_ebp_syscore_ops(void) {}
+#endif
+
static __init int intel_epb_init(void)
{
int ret;
@@ -206,7 +226,7 @@ static __init int intel_epb_init(void)
if (ret < 0)
goto err_out_online;
- register_syscore_ops(&intel_epb_syscore_ops);
+ register_intel_ebp_syscore_ops();
return 0;
err_out_online:
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 15b5e98..356dfc5 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -979,7 +979,7 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
if (!(freq->flags & CPUFREQ_CONST_LOOPS))
mark_tsc_unstable("cpufreq changes");
- set_cyc2ns_scale(tsc_khz, freq->cpu, rdtsc());
+ set_cyc2ns_scale(tsc_khz, freq->policy->cpu, rdtsc());
}
return 0;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index d75bb97..b9591ab 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6698,10 +6698,8 @@ static void kvm_hyperv_tsc_notifier(void)
}
#endif
-static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
- void *data)
+static void __kvmclock_cpufreq_notifier(struct cpufreq_freqs *freq, int cpu)
{
- struct cpufreq_freqs *freq = data;
struct kvm *kvm;
struct kvm_vcpu *vcpu;
int i, send_ipi = 0;
@@ -6745,17 +6743,12 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va
*
*/
- if (val == CPUFREQ_PRECHANGE && freq->old > freq->new)
- return 0;
- if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new)
- return 0;
-
- smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1);
+ smp_call_function_single(cpu, tsc_khz_changed, freq, 1);
spin_lock(&kvm_lock);
list_for_each_entry(kvm, &vm_list, vm_list) {
kvm_for_each_vcpu(i, vcpu, kvm) {
- if (vcpu->cpu != freq->cpu)
+ if (vcpu->cpu != cpu)
continue;
kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
if (vcpu->cpu != smp_processor_id())
@@ -6777,8 +6770,24 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va
* guest context is entered kvmclock will be updated,
* so the guest will not see stale values.
*/
- smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1);
+ smp_call_function_single(cpu, tsc_khz_changed, freq, 1);
}
+}
+
+static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
+ void *data)
+{
+ struct cpufreq_freqs *freq = data;
+ int cpu;
+
+ if (val == CPUFREQ_PRECHANGE && freq->old > freq->new)
+ return 0;
+ if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new)
+ return 0;
+
+ for_each_cpu(cpu, freq->policy->cpus)
+ __kvmclock_cpufreq_notifier(freq, cpu);
+
return 0;
}
diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index 7a6aa23..33c30c1 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -128,6 +128,7 @@ static const struct genpd_lock_ops genpd_spin_ops = {
#define genpd_is_always_on(genpd) (genpd->flags & GENPD_FLAG_ALWAYS_ON)
#define genpd_is_active_wakeup(genpd) (genpd->flags & GENPD_FLAG_ACTIVE_WAKEUP)
#define genpd_is_cpu_domain(genpd) (genpd->flags & GENPD_FLAG_CPU_DOMAIN)
+#define genpd_is_rpm_always_on(genpd) (genpd->flags & GENPD_FLAG_RPM_ALWAYS_ON)
static inline bool irq_safe_dev_in_no_sleep_domain(struct device *dev,
const struct generic_pm_domain *genpd)
@@ -515,7 +516,9 @@ static int genpd_power_off(struct generic_pm_domain *genpd, bool one_dev_on,
* (1) The domain is configured as always on.
* (2) When the domain has a subdomain being powered on.
*/
- if (genpd_is_always_on(genpd) || atomic_read(&genpd->sd_count) > 0)
+ if (genpd_is_always_on(genpd) ||
+ genpd_is_rpm_always_on(genpd) ||
+ atomic_read(&genpd->sd_count) > 0)
return -EBUSY;
list_for_each_entry(pdd, &genpd->dev_list, list_node) {
@@ -1812,7 +1815,8 @@ int pm_genpd_init(struct generic_pm_domain *genpd,
}
/* Always-on domains must be powered on at initialization. */
- if (genpd_is_always_on(genpd) && !genpd_status_on(genpd))
+ if ((genpd_is_always_on(genpd) || genpd_is_rpm_always_on(genpd)) &&
+ !genpd_status_on(genpd))
return -EINVAL;
if (genpd_is_cpu_domain(genpd) &&
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index db779b6..85ff958 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -340,11 +340,14 @@ static void cpufreq_notify_transition(struct cpufreq_policy *policy,
struct cpufreq_freqs *freqs,
unsigned int state)
{
+ int cpu;
+
BUG_ON(irqs_disabled());
if (cpufreq_disabled())
return;
+ freqs->policy = policy;
freqs->flags = cpufreq_driver->flags;
pr_debug("notification %u of frequency transition to %u kHz\n",
state, freqs->new);
@@ -364,10 +367,8 @@ static void cpufreq_notify_transition(struct cpufreq_policy *policy,
}
}
- for_each_cpu(freqs->cpu, policy->cpus) {
- srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
- CPUFREQ_PRECHANGE, freqs);
- }
+ srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
+ CPUFREQ_PRECHANGE, freqs);
adjust_jiffies(CPUFREQ_PRECHANGE, freqs);
break;
@@ -377,11 +378,11 @@ static void cpufreq_notify_transition(struct cpufreq_policy *policy,
pr_debug("FREQ: %u - CPUs: %*pbl\n", freqs->new,
cpumask_pr_args(policy->cpus));
- for_each_cpu(freqs->cpu, policy->cpus) {
- trace_cpu_frequency(freqs->new, freqs->cpu);
- srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
- CPUFREQ_POSTCHANGE, freqs);
- }
+ for_each_cpu(cpu, policy->cpus)
+ trace_cpu_frequency(freqs->new, cpu);
+
+ srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
+ CPUFREQ_POSTCHANGE, freqs);
cpufreq_stats_record_transition(policy, freqs->new);
policy->cur = freqs->new;
@@ -618,50 +619,52 @@ static struct cpufreq_governor *find_governor(const char *str_governor)
return NULL;
}
+static int cpufreq_parse_policy(char *str_governor,
+ struct cpufreq_policy *policy)
+{
+ if (!strncasecmp(str_governor, "performance", CPUFREQ_NAME_LEN)) {
+ policy->policy = CPUFREQ_POLICY_PERFORMANCE;
+ return 0;
+ }
+ if (!strncasecmp(str_governor, "powersave", CPUFREQ_NAME_LEN)) {
+ policy->policy = CPUFREQ_POLICY_POWERSAVE;
+ return 0;
+ }
+ return -EINVAL;
+}
+
/**
- * cpufreq_parse_governor - parse a governor string
+ * cpufreq_parse_governor - parse a governor string only for !setpolicy
*/
static int cpufreq_parse_governor(char *str_governor,
struct cpufreq_policy *policy)
{
- if (cpufreq_driver->setpolicy) {
- if (!strncasecmp(str_governor, "performance", CPUFREQ_NAME_LEN)) {
- policy->policy = CPUFREQ_POLICY_PERFORMANCE;
- return 0;
- }
+ struct cpufreq_governor *t;
- if (!strncasecmp(str_governor, "powersave", CPUFREQ_NAME_LEN)) {
- policy->policy = CPUFREQ_POLICY_POWERSAVE;
- return 0;
- }
- } else {
- struct cpufreq_governor *t;
+ mutex_lock(&cpufreq_governor_mutex);
+
+ t = find_governor(str_governor);
+ if (!t) {
+ int ret;
+
+ mutex_unlock(&cpufreq_governor_mutex);
+
+ ret = request_module("cpufreq_%s", str_governor);
+ if (ret)
+ return -EINVAL;
mutex_lock(&cpufreq_governor_mutex);
t = find_governor(str_governor);
- if (!t) {
- int ret;
+ }
+ if (t && !try_module_get(t->owner))
+ t = NULL;
- mutex_unlock(&cpufreq_governor_mutex);
+ mutex_unlock(&cpufreq_governor_mutex);
- ret = request_module("cpufreq_%s", str_governor);
- if (ret)
- return -EINVAL;
-
- mutex_lock(&cpufreq_governor_mutex);
-
- t = find_governor(str_governor);
- }
- if (t && !try_module_get(t->owner))
- t = NULL;
-
- mutex_unlock(&cpufreq_governor_mutex);
-
- if (t) {
- policy->governor = t;
- return 0;
- }
+ if (t) {
+ policy->governor = t;
+ return 0;
}
return -EINVAL;
@@ -783,8 +786,13 @@ static ssize_t store_scaling_governor(struct cpufreq_policy *policy,
if (ret != 1)
return -EINVAL;
- if (cpufreq_parse_governor(str_governor, &new_policy))
- return -EINVAL;
+ if (cpufreq_driver->setpolicy) {
+ if (cpufreq_parse_policy(str_governor, &new_policy))
+ return -EINVAL;
+ } else {
+ if (cpufreq_parse_governor(str_governor, &new_policy))
+ return -EINVAL;
+ }
ret = cpufreq_set_policy(policy, &new_policy);
@@ -1050,32 +1058,39 @@ __weak struct cpufreq_governor *cpufreq_default_governor(void)
static int cpufreq_init_policy(struct cpufreq_policy *policy)
{
- struct cpufreq_governor *gov = NULL;
+ struct cpufreq_governor *gov = NULL, *def_gov = NULL;
struct cpufreq_policy new_policy;
memcpy(&new_policy, policy, sizeof(*policy));
- /* Update governor of new_policy to the governor used before hotplug */
- gov = find_governor(policy->last_governor);
- if (gov) {
- pr_debug("Restoring governor %s for cpu %d\n",
+ def_gov = cpufreq_default_governor();
+
+ if (has_target()) {
+ /*
+ * Update governor of new_policy to the governor used before
+ * hotplug
+ */
+ gov = find_governor(policy->last_governor);
+ if (gov) {
+ pr_debug("Restoring governor %s for cpu %d\n",
policy->governor->name, policy->cpu);
+ } else {
+ if (!def_gov)
+ return -ENODATA;
+ gov = def_gov;
+ }
+ new_policy.governor = gov;
} else {
- gov = cpufreq_default_governor();
- if (!gov)
- return -ENODATA;
- }
-
- new_policy.governor = gov;
-
- /* Use the default policy if there is no last_policy. */
- if (cpufreq_driver->setpolicy) {
- if (policy->last_policy)
+ /* Use the default policy if there is no last_policy. */
+ if (policy->last_policy) {
new_policy.policy = policy->last_policy;
- else
- cpufreq_parse_governor(gov->name, &new_policy);
+ } else {
+ if (!def_gov)
+ return -ENODATA;
+ cpufreq_parse_policy(def_gov->name, &new_policy);
+ }
}
- /* set default policy */
+
return cpufreq_set_policy(policy, &new_policy);
}
@@ -1133,6 +1148,11 @@ static struct cpufreq_policy *cpufreq_policy_alloc(unsigned int cpu)
cpufreq_global_kobject, "policy%u", cpu);
if (ret) {
pr_err("%s: failed to init policy->kobj: %d\n", __func__, ret);
+ /*
+ * The entire policy object will be freed below, but the extra
+ * memory allocated for the kobject name needs to be freed by
+ * releasing the kobject.
+ */
kobject_put(&policy->kobj);
goto err_free_real_cpus;
}
diff --git a/drivers/soc/imx/gpc.c b/drivers/soc/imx/gpc.c
index 7d14a4b..29b4365 100644
--- a/drivers/soc/imx/gpc.c
+++ b/drivers/soc/imx/gpc.c
@@ -431,10 +431,19 @@ static int imx_gpc_probe(struct platform_device *pdev)
return ret;
}
- /* Disable PU power down in normal operation if ERR009619 is present */
+ /*
+ * Disable PU power down by runtime PM if ERR009619 is present.
+ *
+ * The PRE clock will be paused for several cycles when turning on the
+ * PU domain LDO from power down state. If PRE is in use at that time,
+ * the IPU/PRG cannot get the correct display data from the PRE.
+ *
+ * This is not a concern when the whole system enters suspend state, so
+ * it's safe to power down PU in this case.
+ */
if (of_id_data->err009619_present)
imx_gpc_domains[GPC_PGC_DOMAIN_PU].base.flags |=
- GENPD_FLAG_ALWAYS_ON;
+ GENPD_FLAG_RPM_ALWAYS_ON;
/* Keep DISP always on if ERR006287 is present */
if (of_id_data->err006287_present)
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 684caf0..d01a74f 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -42,13 +42,6 @@ enum cpufreq_table_sorting {
CPUFREQ_TABLE_SORTED_DESCENDING
};
-struct cpufreq_freqs {
- unsigned int cpu; /* cpu nr */
- unsigned int old;
- unsigned int new;
- u8 flags; /* flags of cpufreq_driver, see below. */
-};
-
struct cpufreq_cpuinfo {
unsigned int max_freq;
unsigned int min_freq;
@@ -156,6 +149,13 @@ struct cpufreq_policy {
struct thermal_cooling_device *cdev;
};
+struct cpufreq_freqs {
+ struct cpufreq_policy *policy;
+ unsigned int old;
+ unsigned int new;
+ u8 flags; /* flags of cpufreq_driver, see below. */
+};
+
/* Only for ACPI */
#define CPUFREQ_SHARED_TYPE_NONE (0) /* None */
#define CPUFREQ_SHARED_TYPE_HW (1) /* HW does needed coordination */
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index 0e8e356..b21f35f 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -53,12 +53,16 @@
* driver must then comply with the so called,
* last-man-standing algorithm, for the CPUs in the
* PM domain.
+ *
+ * GENPD_FLAG_RPM_ALWAYS_ON: Instructs genpd to always keep the PM domain
+ * powered on except for system suspend.
*/
#define GENPD_FLAG_PM_CLK (1U << 0)
#define GENPD_FLAG_IRQ_SAFE (1U << 1)
#define GENPD_FLAG_ALWAYS_ON (1U << 2)
#define GENPD_FLAG_ACTIVE_WAKEUP (1U << 3)
#define GENPD_FLAG_CPU_DOMAIN (1U << 4)
+#define GENPD_FLAG_RPM_ALWAYS_ON (1U << 5)
enum gpd_status {
GPD_STATE_ACTIVE = 0, /* PM domain is active */