blob: 0153b0ca7b23e86fa65025f24bf659d241e9bfc8 [file] [log] [blame]
Quentin Perret27871f72018-12-03 09:56:16 +00001// SPDX-License-Identifier: GPL-2.0
2/*
Lukasz Luba1bc138c2020-06-10 11:12:23 +01003 * Energy Model of devices
Quentin Perret27871f72018-12-03 09:56:16 +00004 *
Vincent Donnefortc8ed9952021-09-08 15:05:23 +01005 * Copyright (c) 2018-2021, Arm ltd.
Quentin Perret27871f72018-12-03 09:56:16 +00006 * Written by: Quentin Perret, Arm ltd.
Lukasz Luba1bc138c2020-06-10 11:12:23 +01007 * Improvements provided by: Lukasz Luba, Arm ltd.
Quentin Perret27871f72018-12-03 09:56:16 +00008 */
9
10#define pr_fmt(fmt) "energy_model: " fmt
11
12#include <linux/cpu.h>
Vincent Donneforte4587162021-09-08 15:05:30 +010013#include <linux/cpufreq.h>
Quentin Perret27871f72018-12-03 09:56:16 +000014#include <linux/cpumask.h>
Quentin Perret9cac42d2019-01-22 16:42:47 +000015#include <linux/debugfs.h>
Quentin Perret27871f72018-12-03 09:56:16 +000016#include <linux/energy_model.h>
17#include <linux/sched/topology.h>
18#include <linux/slab.h>
19
Quentin Perret27871f72018-12-03 09:56:16 +000020/*
21 * Mutex serializing the registrations of performance domains and letting
22 * callbacks defined by drivers sleep.
23 */
24static DEFINE_MUTEX(em_pd_mutex);
25
Lukasz Luba1bc138c2020-06-10 11:12:23 +010026static bool _is_cpu_device(struct device *dev)
27{
28 return (dev->bus == &cpu_subsys);
29}
30
Quentin Perret9cac42d2019-01-22 16:42:47 +000031#ifdef CONFIG_DEBUG_FS
32static struct dentry *rootdir;
33
Lukasz Luba521b5122020-05-27 10:58:47 +010034static void em_debug_create_ps(struct em_perf_state *ps, struct dentry *pd)
Quentin Perret9cac42d2019-01-22 16:42:47 +000035{
36 struct dentry *d;
37 char name[24];
38
Lukasz Luba521b5122020-05-27 10:58:47 +010039 snprintf(name, sizeof(name), "ps:%lu", ps->frequency);
Quentin Perret9cac42d2019-01-22 16:42:47 +000040
Lukasz Luba521b5122020-05-27 10:58:47 +010041 /* Create per-ps directory */
Quentin Perret9cac42d2019-01-22 16:42:47 +000042 d = debugfs_create_dir(name, pd);
Lukasz Luba521b5122020-05-27 10:58:47 +010043 debugfs_create_ulong("frequency", 0444, d, &ps->frequency);
44 debugfs_create_ulong("power", 0444, d, &ps->power);
45 debugfs_create_ulong("cost", 0444, d, &ps->cost);
Vincent Donnefortc8ed9952021-09-08 15:05:23 +010046 debugfs_create_ulong("inefficient", 0444, d, &ps->flags);
Quentin Perret9cac42d2019-01-22 16:42:47 +000047}
48
49static int em_debug_cpus_show(struct seq_file *s, void *unused)
50{
51 seq_printf(s, "%*pbl\n", cpumask_pr_args(to_cpumask(s->private)));
52
53 return 0;
54}
55DEFINE_SHOW_ATTRIBUTE(em_debug_cpus);
56
Lukasz Lubac250d502020-11-05 12:50:01 +000057static int em_debug_units_show(struct seq_file *s, void *unused)
58{
59 struct em_perf_domain *pd = s->private;
Vincent Donnefort88f7a8952021-09-08 15:05:24 +010060 char *units = (pd->flags & EM_PERF_DOMAIN_MILLIWATTS) ?
61 "milliWatts" : "bogoWatts";
Lukasz Lubac250d502020-11-05 12:50:01 +000062
63 seq_printf(s, "%s\n", units);
64
65 return 0;
66}
67DEFINE_SHOW_ATTRIBUTE(em_debug_units);
68
Vincent Donnefort8354eb92021-09-08 15:05:25 +010069static int em_debug_skip_inefficiencies_show(struct seq_file *s, void *unused)
70{
71 struct em_perf_domain *pd = s->private;
72 int enabled = (pd->flags & EM_PERF_DOMAIN_SKIP_INEFFICIENCIES) ? 1 : 0;
73
74 seq_printf(s, "%d\n", enabled);
75
76 return 0;
77}
78DEFINE_SHOW_ATTRIBUTE(em_debug_skip_inefficiencies);
79
Lukasz Luba1bc138c2020-06-10 11:12:23 +010080static void em_debug_create_pd(struct device *dev)
Quentin Perret9cac42d2019-01-22 16:42:47 +000081{
82 struct dentry *d;
Quentin Perret9cac42d2019-01-22 16:42:47 +000083 int i;
84
Quentin Perret9cac42d2019-01-22 16:42:47 +000085 /* Create the directory of the performance domain */
Lukasz Luba1bc138c2020-06-10 11:12:23 +010086 d = debugfs_create_dir(dev_name(dev), rootdir);
Quentin Perret9cac42d2019-01-22 16:42:47 +000087
Lukasz Luba1bc138c2020-06-10 11:12:23 +010088 if (_is_cpu_device(dev))
89 debugfs_create_file("cpus", 0444, d, dev->em_pd->cpus,
90 &em_debug_cpus_fops);
Quentin Perret9cac42d2019-01-22 16:42:47 +000091
Lukasz Lubac250d502020-11-05 12:50:01 +000092 debugfs_create_file("units", 0444, d, dev->em_pd, &em_debug_units_fops);
Vincent Donnefort8354eb92021-09-08 15:05:25 +010093 debugfs_create_file("skip-inefficiencies", 0444, d, dev->em_pd,
94 &em_debug_skip_inefficiencies_fops);
Lukasz Lubac250d502020-11-05 12:50:01 +000095
Lukasz Luba521b5122020-05-27 10:58:47 +010096 /* Create a sub-directory for each performance state */
Lukasz Luba1bc138c2020-06-10 11:12:23 +010097 for (i = 0; i < dev->em_pd->nr_perf_states; i++)
98 em_debug_create_ps(&dev->em_pd->table[i], d);
99
100}
101
102static void em_debug_remove_pd(struct device *dev)
103{
104 struct dentry *debug_dir;
105
106 debug_dir = debugfs_lookup(dev_name(dev), rootdir);
107 debugfs_remove_recursive(debug_dir);
Quentin Perret9cac42d2019-01-22 16:42:47 +0000108}
109
110static int __init em_debug_init(void)
111{
112 /* Create /sys/kernel/debug/energy_model directory */
113 rootdir = debugfs_create_dir("energy_model", NULL);
114
115 return 0;
116}
Lukasz Lubafb9d62b2021-03-23 14:56:08 +0000117fs_initcall(em_debug_init);
Quentin Perret9cac42d2019-01-22 16:42:47 +0000118#else /* CONFIG_DEBUG_FS */
Lukasz Luba1bc138c2020-06-10 11:12:23 +0100119static void em_debug_create_pd(struct device *dev) {}
120static void em_debug_remove_pd(struct device *dev) {}
Quentin Perret9cac42d2019-01-22 16:42:47 +0000121#endif
Lukasz Luba1bc138c2020-06-10 11:12:23 +0100122
123static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd,
124 int nr_states, struct em_data_callback *cb)
Quentin Perret27871f72018-12-03 09:56:16 +0000125{
Vincent Donnefortaa1a4322021-09-08 15:05:22 +0100126 unsigned long power, freq, prev_freq = 0, prev_cost = ULONG_MAX;
Lukasz Luba521b5122020-05-27 10:58:47 +0100127 struct em_perf_state *table;
Lukasz Luba1bc138c2020-06-10 11:12:23 +0100128 int i, ret;
Quentin Perret27871f72018-12-03 09:56:16 +0000129 u64 fmax;
130
Quentin Perret27871f72018-12-03 09:56:16 +0000131 table = kcalloc(nr_states, sizeof(*table), GFP_KERNEL);
132 if (!table)
Lukasz Luba1bc138c2020-06-10 11:12:23 +0100133 return -ENOMEM;
Quentin Perret27871f72018-12-03 09:56:16 +0000134
Lukasz Luba521b5122020-05-27 10:58:47 +0100135 /* Build the list of performance states for this performance domain */
Quentin Perret27871f72018-12-03 09:56:16 +0000136 for (i = 0, freq = 0; i < nr_states; i++, freq++) {
137 /*
138 * active_power() is a driver callback which ceils 'freq' to
Lukasz Luba1bc138c2020-06-10 11:12:23 +0100139 * lowest performance state of 'dev' above 'freq' and updates
Quentin Perret27871f72018-12-03 09:56:16 +0000140 * 'power' and 'freq' accordingly.
141 */
Lukasz Lubad0351cc2020-05-27 10:58:49 +0100142 ret = cb->active_power(&power, &freq, dev);
Quentin Perret27871f72018-12-03 09:56:16 +0000143 if (ret) {
Lukasz Luba1bc138c2020-06-10 11:12:23 +0100144 dev_err(dev, "EM: invalid perf. state: %d\n",
145 ret);
Lukasz Luba521b5122020-05-27 10:58:47 +0100146 goto free_ps_table;
Quentin Perret27871f72018-12-03 09:56:16 +0000147 }
148
149 /*
150 * We expect the driver callback to increase the frequency for
Lukasz Luba521b5122020-05-27 10:58:47 +0100151 * higher performance states.
Quentin Perret27871f72018-12-03 09:56:16 +0000152 */
153 if (freq <= prev_freq) {
Lukasz Luba1bc138c2020-06-10 11:12:23 +0100154 dev_err(dev, "EM: non-increasing freq: %lu\n",
155 freq);
Lukasz Luba521b5122020-05-27 10:58:47 +0100156 goto free_ps_table;
Quentin Perret27871f72018-12-03 09:56:16 +0000157 }
158
159 /*
160 * The power returned by active_state() is expected to be
Lukasz Lubaf2c90b12020-11-03 09:05:59 +0000161 * positive and to fit into 16 bits.
Quentin Perret27871f72018-12-03 09:56:16 +0000162 */
Lukasz Luba7d9895c2020-05-27 10:58:48 +0100163 if (!power || power > EM_MAX_POWER) {
Lukasz Luba1bc138c2020-06-10 11:12:23 +0100164 dev_err(dev, "EM: invalid power: %lu\n",
165 power);
Lukasz Luba521b5122020-05-27 10:58:47 +0100166 goto free_ps_table;
Quentin Perret27871f72018-12-03 09:56:16 +0000167 }
168
169 table[i].power = power;
170 table[i].frequency = prev_freq = freq;
Quentin Perret27871f72018-12-03 09:56:16 +0000171 }
172
Lukasz Luba521b5122020-05-27 10:58:47 +0100173 /* Compute the cost of each performance state. */
Quentin Perret27871f72018-12-03 09:56:16 +0000174 fmax = (u64) table[nr_states - 1].frequency;
Vincent Donnefortaa1a4322021-09-08 15:05:22 +0100175 for (i = nr_states - 1; i >= 0; i--) {
Lukasz Luba7fcc17d2021-08-03 11:27:43 +0100176 unsigned long power_res = em_scale_power(table[i].power);
177
178 table[i].cost = div64_u64(fmax * power_res,
Quentin Perret27871f72018-12-03 09:56:16 +0000179 table[i].frequency);
Vincent Donnefortaa1a4322021-09-08 15:05:22 +0100180 if (table[i].cost >= prev_cost) {
Vincent Donnefortc8ed9952021-09-08 15:05:23 +0100181 table[i].flags = EM_PERF_STATE_INEFFICIENT;
Vincent Donnefortaa1a4322021-09-08 15:05:22 +0100182 dev_dbg(dev, "EM: OPP:%lu is inefficient\n",
183 table[i].frequency);
184 } else {
185 prev_cost = table[i].cost;
186 }
Quentin Perret27871f72018-12-03 09:56:16 +0000187 }
188
189 pd->table = table;
Lukasz Luba521b5122020-05-27 10:58:47 +0100190 pd->nr_perf_states = nr_states;
Quentin Perret27871f72018-12-03 09:56:16 +0000191
Lukasz Luba1bc138c2020-06-10 11:12:23 +0100192 return 0;
Quentin Perret27871f72018-12-03 09:56:16 +0000193
Lukasz Luba521b5122020-05-27 10:58:47 +0100194free_ps_table:
Quentin Perret27871f72018-12-03 09:56:16 +0000195 kfree(table);
Lukasz Luba1bc138c2020-06-10 11:12:23 +0100196 return -EINVAL;
Quentin Perret27871f72018-12-03 09:56:16 +0000197}
198
Lukasz Luba1bc138c2020-06-10 11:12:23 +0100199static int em_create_pd(struct device *dev, int nr_states,
200 struct em_data_callback *cb, cpumask_t *cpus)
201{
202 struct em_perf_domain *pd;
203 struct device *cpu_dev;
204 int cpu, ret;
205
206 if (_is_cpu_device(dev)) {
207 pd = kzalloc(sizeof(*pd) + cpumask_size(), GFP_KERNEL);
208 if (!pd)
209 return -ENOMEM;
210
211 cpumask_copy(em_span_cpus(pd), cpus);
212 } else {
213 pd = kzalloc(sizeof(*pd), GFP_KERNEL);
214 if (!pd)
215 return -ENOMEM;
216 }
217
218 ret = em_create_perf_table(dev, pd, nr_states, cb);
219 if (ret) {
220 kfree(pd);
221 return ret;
222 }
223
224 if (_is_cpu_device(dev))
225 for_each_cpu(cpu, cpus) {
226 cpu_dev = get_cpu_device(cpu);
227 cpu_dev->em_pd = pd;
228 }
229
230 dev->em_pd = pd;
231
232 return 0;
233}
234
Vincent Donneforte4587162021-09-08 15:05:30 +0100235static void em_cpufreq_update_efficiencies(struct device *dev)
236{
237 struct em_perf_domain *pd = dev->em_pd;
238 struct em_perf_state *table;
239 struct cpufreq_policy *policy;
240 int found = 0;
241 int i;
242
243 if (!_is_cpu_device(dev) || !pd)
244 return;
245
246 policy = cpufreq_cpu_get(cpumask_first(em_span_cpus(pd)));
247 if (!policy) {
248 dev_warn(dev, "EM: Access to CPUFreq policy failed");
249 return;
250 }
251
252 table = pd->table;
253
254 for (i = 0; i < pd->nr_perf_states; i++) {
255 if (!(table[i].flags & EM_PERF_STATE_INEFFICIENT))
256 continue;
257
258 if (!cpufreq_table_set_inefficient(policy, table[i].frequency))
259 found++;
260 }
261
262 if (!found)
263 return;
264
265 /*
266 * Efficiencies have been installed in CPUFreq, inefficient frequencies
267 * will be skipped. The EM can do the same.
268 */
269 pd->flags |= EM_PERF_DOMAIN_SKIP_INEFFICIENCIES;
270}
271
Lukasz Luba1bc138c2020-06-10 11:12:23 +0100272/**
273 * em_pd_get() - Return the performance domain for a device
274 * @dev : Device to find the performance domain for
275 *
276 * Returns the performance domain to which @dev belongs, or NULL if it doesn't
277 * exist.
278 */
279struct em_perf_domain *em_pd_get(struct device *dev)
280{
281 if (IS_ERR_OR_NULL(dev))
282 return NULL;
283
284 return dev->em_pd;
285}
286EXPORT_SYMBOL_GPL(em_pd_get);
287
Quentin Perret27871f72018-12-03 09:56:16 +0000288/**
289 * em_cpu_get() - Return the performance domain for a CPU
290 * @cpu : CPU to find the performance domain for
291 *
Lukasz Luba1bc138c2020-06-10 11:12:23 +0100292 * Returns the performance domain to which @cpu belongs, or NULL if it doesn't
Quentin Perret27871f72018-12-03 09:56:16 +0000293 * exist.
294 */
295struct em_perf_domain *em_cpu_get(int cpu)
296{
Lukasz Luba1bc138c2020-06-10 11:12:23 +0100297 struct device *cpu_dev;
298
299 cpu_dev = get_cpu_device(cpu);
300 if (!cpu_dev)
301 return NULL;
302
303 return em_pd_get(cpu_dev);
Quentin Perret27871f72018-12-03 09:56:16 +0000304}
305EXPORT_SYMBOL_GPL(em_cpu_get);
306
307/**
Lukasz Luba7d9895c2020-05-27 10:58:48 +0100308 * em_dev_register_perf_domain() - Register the Energy Model (EM) for a device
309 * @dev : Device for which the EM is to register
Lukasz Luba521b5122020-05-27 10:58:47 +0100310 * @nr_states : Number of performance states to register
Quentin Perret27871f72018-12-03 09:56:16 +0000311 * @cb : Callback functions providing the data of the Energy Model
Lukasz Luba1bc138c2020-06-10 11:12:23 +0100312 * @cpus : Pointer to cpumask_t, which in case of a CPU device is
Lukasz Luba7d9895c2020-05-27 10:58:48 +0100313 * obligatory. It can be taken from i.e. 'policy->cpus'. For other
314 * type of devices this should be set to NULL.
Lukasz Lubac250d502020-11-05 12:50:01 +0000315 * @milliwatts : Flag indicating that the power values are in milliWatts or
316 * in some other scale. It must be set properly.
Quentin Perret27871f72018-12-03 09:56:16 +0000317 *
318 * Create Energy Model tables for a performance domain using the callbacks
319 * defined in cb.
320 *
Lukasz Lubac250d502020-11-05 12:50:01 +0000321 * The @milliwatts is important to set with correct value. Some kernel
322 * sub-systems might rely on this flag and check if all devices in the EM are
323 * using the same scale.
324 *
Quentin Perret27871f72018-12-03 09:56:16 +0000325 * If multiple clients register the same performance domain, all but the first
326 * registration will be ignored.
327 *
328 * Return 0 on success
329 */
Lukasz Luba7d9895c2020-05-27 10:58:48 +0100330int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states,
Lukasz Lubac250d502020-11-05 12:50:01 +0000331 struct em_data_callback *cb, cpumask_t *cpus,
332 bool milliwatts)
Quentin Perret27871f72018-12-03 09:56:16 +0000333{
334 unsigned long cap, prev_cap = 0;
Lukasz Luba1bc138c2020-06-10 11:12:23 +0100335 int cpu, ret;
Quentin Perret27871f72018-12-03 09:56:16 +0000336
Lukasz Luba1bc138c2020-06-10 11:12:23 +0100337 if (!dev || !nr_states || !cb)
Quentin Perret27871f72018-12-03 09:56:16 +0000338 return -EINVAL;
339
340 /*
341 * Use a mutex to serialize the registration of performance domains and
342 * let the driver-defined callback functions sleep.
343 */
344 mutex_lock(&em_pd_mutex);
345
Lukasz Luba1bc138c2020-06-10 11:12:23 +0100346 if (dev->em_pd) {
347 ret = -EEXIST;
Quentin Perret27871f72018-12-03 09:56:16 +0000348 goto unlock;
349 }
350
Lukasz Luba1bc138c2020-06-10 11:12:23 +0100351 if (_is_cpu_device(dev)) {
352 if (!cpus) {
353 dev_err(dev, "EM: invalid CPU mask\n");
354 ret = -EINVAL;
355 goto unlock;
356 }
357
358 for_each_cpu(cpu, cpus) {
359 if (em_cpu_get(cpu)) {
360 dev_err(dev, "EM: exists for CPU%d\n", cpu);
361 ret = -EEXIST;
362 goto unlock;
363 }
364 /*
365 * All CPUs of a domain must have the same
366 * micro-architecture since they all share the same
367 * table.
368 */
369 cap = arch_scale_cpu_capacity(cpu);
370 if (prev_cap && prev_cap != cap) {
371 dev_err(dev, "EM: CPUs of %*pbl must have the same capacity\n",
372 cpumask_pr_args(cpus));
373
374 ret = -EINVAL;
375 goto unlock;
376 }
377 prev_cap = cap;
378 }
Quentin Perret27871f72018-12-03 09:56:16 +0000379 }
380
Lukasz Luba1bc138c2020-06-10 11:12:23 +0100381 ret = em_create_pd(dev, nr_states, cb, cpus);
382 if (ret)
383 goto unlock;
384
Vincent Donnefort88f7a8952021-09-08 15:05:24 +0100385 if (milliwatts)
386 dev->em_pd->flags |= EM_PERF_DOMAIN_MILLIWATTS;
Lukasz Lubac250d502020-11-05 12:50:01 +0000387
Vincent Donneforte4587162021-09-08 15:05:30 +0100388 em_cpufreq_update_efficiencies(dev);
389
Lukasz Luba1bc138c2020-06-10 11:12:23 +0100390 em_debug_create_pd(dev);
391 dev_info(dev, "EM: created perf domain\n");
392
Quentin Perret27871f72018-12-03 09:56:16 +0000393unlock:
394 mutex_unlock(&em_pd_mutex);
Quentin Perret27871f72018-12-03 09:56:16 +0000395 return ret;
396}
Lukasz Luba7d9895c2020-05-27 10:58:48 +0100397EXPORT_SYMBOL_GPL(em_dev_register_perf_domain);
398
399/**
Lukasz Luba1bc138c2020-06-10 11:12:23 +0100400 * em_dev_unregister_perf_domain() - Unregister Energy Model (EM) for a device
401 * @dev : Device for which the EM is registered
402 *
403 * Unregister the EM for the specified @dev (but not a CPU device).
404 */
405void em_dev_unregister_perf_domain(struct device *dev)
406{
407 if (IS_ERR_OR_NULL(dev) || !dev->em_pd)
408 return;
409
410 if (_is_cpu_device(dev))
411 return;
412
413 /*
414 * The mutex separates all register/unregister requests and protects
415 * from potential clean-up/setup issues in the debugfs directories.
416 * The debugfs directory name is the same as device's name.
417 */
418 mutex_lock(&em_pd_mutex);
419 em_debug_remove_pd(dev);
420
421 kfree(dev->em_pd->table);
422 kfree(dev->em_pd);
423 dev->em_pd = NULL;
424 mutex_unlock(&em_pd_mutex);
425}
426EXPORT_SYMBOL_GPL(em_dev_unregister_perf_domain);