Patrick Bellasi | ae71030 | 2015-06-23 09:17:54 +0100 | [diff] [blame] | 1 | #include <linux/cgroup.h> |
| 2 | #include <linux/err.h> |
Patrick Bellasi | 2f369bb | 2016-01-12 18:12:13 +0000 | [diff] [blame] | 3 | #include <linux/kernel.h> |
Patrick Bellasi | ae71030 | 2015-06-23 09:17:54 +0100 | [diff] [blame] | 4 | #include <linux/percpu.h> |
| 5 | #include <linux/printk.h> |
Patrick Bellasi | 2f369bb | 2016-01-12 18:12:13 +0000 | [diff] [blame] | 6 | #include <linux/reciprocal_div.h> |
Patrick Bellasi | edd28d3 | 2015-07-07 15:33:20 +0100 | [diff] [blame] | 7 | #include <linux/rcupdate.h> |
Patrick Bellasi | ae71030 | 2015-06-23 09:17:54 +0100 | [diff] [blame] | 8 | #include <linux/slab.h> |
| 9 | |
Patrick Bellasi | 050dcb8 | 2015-06-22 13:49:07 +0100 | [diff] [blame^] | 10 | #include <trace/events/sched.h> |
| 11 | |
Patrick Bellasi | 69fa4c7 | 2015-06-22 18:11:44 +0100 | [diff] [blame] | 12 | #include "sched.h" |
| 13 | |
| 14 | unsigned int sysctl_sched_cfs_boost __read_mostly; |
| 15 | |
Patrick Bellasi | 2f369bb | 2016-01-12 18:12:13 +0000 | [diff] [blame] | 16 | /* |
| 17 | * System energy normalization constants |
| 18 | */ |
| 19 | static struct target_nrg { |
| 20 | unsigned long min_power; |
| 21 | unsigned long max_power; |
| 22 | struct reciprocal_value rdiv; |
| 23 | } schedtune_target_nrg; |
| 24 | |
| 25 | /* Performance Boost region (B) threshold params */ |
| 26 | static int perf_boost_idx; |
| 27 | |
| 28 | /* Performance Constraint region (C) threshold params */ |
| 29 | static int perf_constrain_idx; |
| 30 | |
| 31 | /** |
| 32 | * Performance-Energy (P-E) Space thresholds constants |
| 33 | */ |
| 34 | struct threshold_params { |
| 35 | int nrg_gain; |
| 36 | int cap_gain; |
| 37 | }; |
| 38 | |
| 39 | /* |
| 40 | * System specific P-E space thresholds constants |
| 41 | */ |
| 42 | static struct threshold_params |
| 43 | threshold_gains[] = { |
| 44 | { 0, 4 }, /* >= 0% */ |
| 45 | { 0, 4 }, /* >= 10% */ |
| 46 | { 1, 4 }, /* >= 20% */ |
| 47 | { 2, 4 }, /* >= 30% */ |
| 48 | { 3, 4 }, /* >= 40% */ |
| 49 | { 4, 3 }, /* >= 50% */ |
| 50 | { 4, 2 }, /* >= 60% */ |
| 51 | { 4, 1 }, /* >= 70% */ |
| 52 | { 4, 0 }, /* >= 80% */ |
| 53 | { 4, 0 } /* >= 90% */ |
| 54 | }; |
| 55 | |
| 56 | static int |
| 57 | __schedtune_accept_deltas(int nrg_delta, int cap_delta, |
| 58 | int perf_boost_idx, int perf_constrain_idx) |
| 59 | { |
| 60 | int payoff = -INT_MAX; |
| 61 | |
| 62 | /* Performance Boost (B) region */ |
| 63 | if (nrg_delta > 0 && cap_delta > 0) { |
| 64 | /* |
| 65 | * Evaluate "Performance Boost" vs "Energy Increase" |
| 66 | * payoff criteria: |
| 67 | * cap_delta / nrg_delta < cap_gain / nrg_gain |
| 68 | * which is: |
| 69 | * nrg_delta * cap_gain > cap_delta * nrg_gain |
| 70 | */ |
| 71 | payoff = nrg_delta * threshold_gains[perf_boost_idx].cap_gain; |
| 72 | payoff -= cap_delta * threshold_gains[perf_boost_idx].nrg_gain; |
| 73 | return payoff; |
| 74 | } |
| 75 | |
| 76 | /* Performance Constraint (C) region */ |
| 77 | if (nrg_delta < 0 && cap_delta < 0) { |
| 78 | /* |
| 79 | * Evaluate "Performance Boost" vs "Energy Increase" |
| 80 | * payoff criteria: |
| 81 | * cap_delta / nrg_delta > cap_gain / nrg_gain |
| 82 | * which is: |
| 83 | * cap_delta * nrg_gain > nrg_delta * cap_gain |
| 84 | */ |
| 85 | payoff = cap_delta * threshold_gains[perf_constrain_idx].nrg_gain; |
| 86 | payoff -= nrg_delta * threshold_gains[perf_constrain_idx].cap_gain; |
| 87 | return payoff; |
| 88 | } |
| 89 | |
| 90 | /* Default: reject schedule candidate */ |
| 91 | return payoff; |
| 92 | } |
| 93 | |
Patrick Bellasi | ae71030 | 2015-06-23 09:17:54 +0100 | [diff] [blame] | 94 | #ifdef CONFIG_CGROUP_SCHEDTUNE |
| 95 | |
| 96 | /* |
| 97 | * EAS scheduler tunables for task groups. |
| 98 | */ |
| 99 | |
| 100 | /* SchdTune tunables for a group of tasks */ |
| 101 | struct schedtune { |
| 102 | /* SchedTune CGroup subsystem */ |
| 103 | struct cgroup_subsys_state css; |
| 104 | |
| 105 | /* Boost group allocated ID */ |
| 106 | int idx; |
| 107 | |
| 108 | /* Boost value for tasks on that SchedTune CGroup */ |
| 109 | int boost; |
| 110 | |
Patrick Bellasi | 2f369bb | 2016-01-12 18:12:13 +0000 | [diff] [blame] | 111 | /* Performance Boost (B) region threshold params */ |
| 112 | int perf_boost_idx; |
| 113 | |
| 114 | /* Performance Constraint (C) region threshold params */ |
| 115 | int perf_constrain_idx; |
Patrick Bellasi | ae71030 | 2015-06-23 09:17:54 +0100 | [diff] [blame] | 116 | }; |
| 117 | |
| 118 | static inline struct schedtune *css_st(struct cgroup_subsys_state *css) |
| 119 | { |
| 120 | return css ? container_of(css, struct schedtune, css) : NULL; |
| 121 | } |
| 122 | |
| 123 | static inline struct schedtune *task_schedtune(struct task_struct *tsk) |
| 124 | { |
| 125 | return css_st(task_css(tsk, schedtune_cgrp_id)); |
| 126 | } |
| 127 | |
| 128 | static inline struct schedtune *parent_st(struct schedtune *st) |
| 129 | { |
| 130 | return css_st(st->css.parent); |
| 131 | } |
| 132 | |
| 133 | /* |
| 134 | * SchedTune root control group |
| 135 | * The root control group is used to defined a system-wide boosting tuning, |
| 136 | * which is applied to all tasks in the system. |
| 137 | * Task specific boost tuning could be specified by creating and |
| 138 | * configuring a child control group under the root one. |
| 139 | * By default, system-wide boosting is disabled, i.e. no boosting is applied |
| 140 | * to tasks which are not into a child control group. |
| 141 | */ |
| 142 | static struct schedtune |
| 143 | root_schedtune = { |
| 144 | .boost = 0, |
Patrick Bellasi | 2f369bb | 2016-01-12 18:12:13 +0000 | [diff] [blame] | 145 | .perf_boost_idx = 0, |
| 146 | .perf_constrain_idx = 0, |
Patrick Bellasi | ae71030 | 2015-06-23 09:17:54 +0100 | [diff] [blame] | 147 | }; |
| 148 | |
Patrick Bellasi | 2f369bb | 2016-01-12 18:12:13 +0000 | [diff] [blame] | 149 | int |
| 150 | schedtune_accept_deltas(int nrg_delta, int cap_delta, |
| 151 | struct task_struct *task) |
| 152 | { |
| 153 | struct schedtune *ct; |
| 154 | int perf_boost_idx; |
| 155 | int perf_constrain_idx; |
| 156 | |
| 157 | /* Optimal (O) region */ |
| 158 | if (nrg_delta < 0 && cap_delta > 0) |
| 159 | return INT_MAX; |
| 160 | |
| 161 | /* Suboptimal (S) region */ |
| 162 | if (nrg_delta > 0 && cap_delta < 0) |
| 163 | return -INT_MAX; |
| 164 | |
| 165 | /* Get task specific perf Boost/Constraints indexes */ |
| 166 | rcu_read_lock(); |
| 167 | ct = task_schedtune(task); |
| 168 | perf_boost_idx = ct->perf_boost_idx; |
| 169 | perf_constrain_idx = ct->perf_constrain_idx; |
| 170 | rcu_read_unlock(); |
| 171 | |
| 172 | return __schedtune_accept_deltas(nrg_delta, cap_delta, |
| 173 | perf_boost_idx, perf_constrain_idx); |
| 174 | } |
| 175 | |
Patrick Bellasi | ae71030 | 2015-06-23 09:17:54 +0100 | [diff] [blame] | 176 | /* |
| 177 | * Maximum number of boost groups to support |
| 178 | * When per-task boosting is used we still allow only limited number of |
| 179 | * boost groups for two main reasons: |
| 180 | * 1. on a real system we usually have only few classes of workloads which |
| 181 | * make sense to boost with different values (e.g. background vs foreground |
| 182 | * tasks, interactive vs low-priority tasks) |
| 183 | * 2. a limited number allows for a simpler and more memory/time efficient |
| 184 | * implementation especially for the computation of the per-CPU boost |
| 185 | * value |
| 186 | */ |
| 187 | #define BOOSTGROUPS_COUNT 4 |
| 188 | |
| 189 | /* Array of configured boostgroups */ |
| 190 | static struct schedtune *allocated_group[BOOSTGROUPS_COUNT] = { |
| 191 | &root_schedtune, |
| 192 | NULL, |
| 193 | }; |
| 194 | |
| 195 | /* SchedTune boost groups |
| 196 | * Keep track of all the boost groups which impact on CPU, for example when a |
| 197 | * CPU has two RUNNABLE tasks belonging to two different boost groups and thus |
| 198 | * likely with different boost values. |
| 199 | * Since on each system we expect only a limited number of boost groups, here |
| 200 | * we use a simple array to keep track of the metrics required to compute the |
| 201 | * maximum per-CPU boosting value. |
| 202 | */ |
| 203 | struct boost_groups { |
| 204 | /* Maximum boost value for all RUNNABLE tasks on a CPU */ |
| 205 | unsigned boost_max; |
| 206 | struct { |
| 207 | /* The boost for tasks on that boost group */ |
| 208 | unsigned boost; |
| 209 | /* Count of RUNNABLE tasks on that boost group */ |
| 210 | unsigned tasks; |
| 211 | } group[BOOSTGROUPS_COUNT]; |
| 212 | }; |
| 213 | |
| 214 | /* Boost groups affecting each CPU in the system */ |
| 215 | DEFINE_PER_CPU(struct boost_groups, cpu_boost_groups); |
| 216 | |
Patrick Bellasi | 9a871ed | 2016-01-14 12:31:35 +0000 | [diff] [blame] | 217 | static void |
| 218 | schedtune_cpu_update(int cpu) |
| 219 | { |
| 220 | struct boost_groups *bg; |
| 221 | unsigned boost_max; |
| 222 | int idx; |
| 223 | |
| 224 | bg = &per_cpu(cpu_boost_groups, cpu); |
| 225 | |
| 226 | /* The root boost group is always active */ |
| 227 | boost_max = bg->group[0].boost; |
| 228 | for (idx = 1; idx < BOOSTGROUPS_COUNT; ++idx) { |
| 229 | /* |
| 230 | * A boost group affects a CPU only if it has |
| 231 | * RUNNABLE tasks on that CPU |
| 232 | */ |
| 233 | if (bg->group[idx].tasks == 0) |
| 234 | continue; |
| 235 | boost_max = max(boost_max, bg->group[idx].boost); |
| 236 | } |
| 237 | |
| 238 | bg->boost_max = boost_max; |
| 239 | } |
| 240 | |
| 241 | static int |
| 242 | schedtune_boostgroup_update(int idx, int boost) |
| 243 | { |
| 244 | struct boost_groups *bg; |
| 245 | int cur_boost_max; |
| 246 | int old_boost; |
| 247 | int cpu; |
| 248 | |
| 249 | /* Update per CPU boost groups */ |
| 250 | for_each_possible_cpu(cpu) { |
| 251 | bg = &per_cpu(cpu_boost_groups, cpu); |
| 252 | |
| 253 | /* |
| 254 | * Keep track of current boost values to compute the per CPU |
| 255 | * maximum only when it has been affected by the new value of |
| 256 | * the updated boost group |
| 257 | */ |
| 258 | cur_boost_max = bg->boost_max; |
| 259 | old_boost = bg->group[idx].boost; |
| 260 | |
| 261 | /* Update the boost value of this boost group */ |
| 262 | bg->group[idx].boost = boost; |
| 263 | |
| 264 | /* Check if this update increase current max */ |
| 265 | if (boost > cur_boost_max && bg->group[idx].tasks) { |
| 266 | bg->boost_max = boost; |
| 267 | continue; |
| 268 | } |
| 269 | |
| 270 | /* Check if this update has decreased current max */ |
| 271 | if (cur_boost_max == old_boost && old_boost > boost) |
| 272 | schedtune_cpu_update(cpu); |
| 273 | } |
| 274 | |
| 275 | return 0; |
| 276 | } |
| 277 | |
Patrick Bellasi | edd28d3 | 2015-07-07 15:33:20 +0100 | [diff] [blame] | 278 | static inline void |
| 279 | schedtune_tasks_update(struct task_struct *p, int cpu, int idx, int task_count) |
| 280 | { |
| 281 | struct boost_groups *bg; |
| 282 | int tasks; |
| 283 | |
| 284 | bg = &per_cpu(cpu_boost_groups, cpu); |
| 285 | |
| 286 | /* Update boosted tasks count while avoiding to make it negative */ |
| 287 | if (task_count < 0 && bg->group[idx].tasks <= -task_count) |
| 288 | bg->group[idx].tasks = 0; |
| 289 | else |
| 290 | bg->group[idx].tasks += task_count; |
| 291 | |
| 292 | /* Boost group activation or deactivation on that RQ */ |
| 293 | tasks = bg->group[idx].tasks; |
| 294 | if (tasks == 1 || tasks == 0) |
| 295 | schedtune_cpu_update(cpu); |
| 296 | } |
| 297 | |
| 298 | /* |
| 299 | * NOTE: This function must be called while holding the lock on the CPU RQ |
| 300 | */ |
| 301 | void schedtune_enqueue_task(struct task_struct *p, int cpu) |
| 302 | { |
| 303 | struct schedtune *st; |
| 304 | int idx; |
| 305 | |
| 306 | /* |
| 307 | * When a task is marked PF_EXITING by do_exit() it's going to be |
| 308 | * dequeued and enqueued multiple times in the exit path. |
| 309 | * Thus we avoid any further update, since we do not want to change |
| 310 | * CPU boosting while the task is exiting. |
| 311 | */ |
| 312 | if (p->flags & PF_EXITING) |
| 313 | return; |
| 314 | |
| 315 | /* Get task boost group */ |
| 316 | rcu_read_lock(); |
| 317 | st = task_schedtune(p); |
| 318 | idx = st->idx; |
| 319 | rcu_read_unlock(); |
| 320 | |
| 321 | schedtune_tasks_update(p, cpu, idx, 1); |
| 322 | } |
| 323 | |
| 324 | /* |
| 325 | * NOTE: This function must be called while holding the lock on the CPU RQ |
| 326 | */ |
| 327 | void schedtune_dequeue_task(struct task_struct *p, int cpu) |
| 328 | { |
| 329 | struct schedtune *st; |
| 330 | int idx; |
| 331 | |
| 332 | /* |
| 333 | * When a task is marked PF_EXITING by do_exit() it's going to be |
| 334 | * dequeued and enqueued multiple times in the exit path. |
| 335 | * Thus we avoid any further update, since we do not want to change |
| 336 | * CPU boosting while the task is exiting. |
| 337 | * The last dequeue will be done by cgroup exit() callback. |
| 338 | */ |
| 339 | if (p->flags & PF_EXITING) |
| 340 | return; |
| 341 | |
| 342 | /* Get task boost group */ |
| 343 | rcu_read_lock(); |
| 344 | st = task_schedtune(p); |
| 345 | idx = st->idx; |
| 346 | rcu_read_unlock(); |
| 347 | |
| 348 | schedtune_tasks_update(p, cpu, idx, -1); |
| 349 | } |
| 350 | |
| 351 | int schedtune_cpu_boost(int cpu) |
| 352 | { |
| 353 | struct boost_groups *bg; |
| 354 | |
| 355 | bg = &per_cpu(cpu_boost_groups, cpu); |
| 356 | return bg->boost_max; |
| 357 | } |
| 358 | |
Patrick Bellasi | 9b2b8da | 2016-01-14 18:31:53 +0000 | [diff] [blame] | 359 | int schedtune_task_boost(struct task_struct *p) |
| 360 | { |
| 361 | struct schedtune *st; |
| 362 | int task_boost; |
| 363 | |
| 364 | /* Get task boost value */ |
| 365 | rcu_read_lock(); |
| 366 | st = task_schedtune(p); |
| 367 | task_boost = st->boost; |
| 368 | rcu_read_unlock(); |
| 369 | |
| 370 | return task_boost; |
| 371 | } |
| 372 | |
Patrick Bellasi | ae71030 | 2015-06-23 09:17:54 +0100 | [diff] [blame] | 373 | static u64 |
| 374 | boost_read(struct cgroup_subsys_state *css, struct cftype *cft) |
| 375 | { |
| 376 | struct schedtune *st = css_st(css); |
| 377 | |
| 378 | return st->boost; |
| 379 | } |
| 380 | |
| 381 | static int |
| 382 | boost_write(struct cgroup_subsys_state *css, struct cftype *cft, |
| 383 | u64 boost) |
| 384 | { |
| 385 | struct schedtune *st = css_st(css); |
| 386 | |
| 387 | if (boost < 0 || boost > 100) |
| 388 | return -EINVAL; |
| 389 | |
| 390 | st->boost = boost; |
| 391 | if (css == &root_schedtune.css) |
| 392 | sysctl_sched_cfs_boost = boost; |
| 393 | |
Patrick Bellasi | 9a871ed | 2016-01-14 12:31:35 +0000 | [diff] [blame] | 394 | /* Update CPU boost */ |
| 395 | schedtune_boostgroup_update(st->idx, st->boost); |
| 396 | |
Patrick Bellasi | 050dcb8 | 2015-06-22 13:49:07 +0100 | [diff] [blame^] | 397 | trace_sched_tune_config(st->boost); |
| 398 | |
Patrick Bellasi | ae71030 | 2015-06-23 09:17:54 +0100 | [diff] [blame] | 399 | return 0; |
| 400 | } |
| 401 | |
| 402 | static struct cftype files[] = { |
| 403 | { |
| 404 | .name = "boost", |
| 405 | .read_u64 = boost_read, |
| 406 | .write_u64 = boost_write, |
| 407 | }, |
| 408 | { } /* terminate */ |
| 409 | }; |
| 410 | |
| 411 | static int |
| 412 | schedtune_boostgroup_init(struct schedtune *st) |
| 413 | { |
Patrick Bellasi | 9a871ed | 2016-01-14 12:31:35 +0000 | [diff] [blame] | 414 | struct boost_groups *bg; |
| 415 | int cpu; |
| 416 | |
Patrick Bellasi | ae71030 | 2015-06-23 09:17:54 +0100 | [diff] [blame] | 417 | /* Keep track of allocated boost groups */ |
| 418 | allocated_group[st->idx] = st; |
| 419 | |
Patrick Bellasi | 9a871ed | 2016-01-14 12:31:35 +0000 | [diff] [blame] | 420 | /* Initialize the per CPU boost groups */ |
| 421 | for_each_possible_cpu(cpu) { |
| 422 | bg = &per_cpu(cpu_boost_groups, cpu); |
| 423 | bg->group[st->idx].boost = 0; |
| 424 | bg->group[st->idx].tasks = 0; |
| 425 | } |
| 426 | |
Patrick Bellasi | ae71030 | 2015-06-23 09:17:54 +0100 | [diff] [blame] | 427 | return 0; |
| 428 | } |
| 429 | |
| 430 | static int |
| 431 | schedtune_init(void) |
| 432 | { |
| 433 | struct boost_groups *bg; |
| 434 | int cpu; |
| 435 | |
| 436 | /* Initialize the per CPU boost groups */ |
| 437 | for_each_possible_cpu(cpu) { |
| 438 | bg = &per_cpu(cpu_boost_groups, cpu); |
| 439 | memset(bg, 0, sizeof(struct boost_groups)); |
| 440 | } |
| 441 | |
| 442 | pr_info(" schedtune configured to support %d boost groups\n", |
| 443 | BOOSTGROUPS_COUNT); |
| 444 | return 0; |
| 445 | } |
| 446 | |
| 447 | static struct cgroup_subsys_state * |
| 448 | schedtune_css_alloc(struct cgroup_subsys_state *parent_css) |
| 449 | { |
| 450 | struct schedtune *st; |
| 451 | int idx; |
| 452 | |
| 453 | if (!parent_css) { |
| 454 | schedtune_init(); |
| 455 | return &root_schedtune.css; |
| 456 | } |
| 457 | |
| 458 | /* Allow only single level hierachies */ |
| 459 | if (parent_css != &root_schedtune.css) { |
| 460 | pr_err("Nested SchedTune boosting groups not allowed\n"); |
| 461 | return ERR_PTR(-ENOMEM); |
| 462 | } |
| 463 | |
| 464 | /* Allow only a limited number of boosting groups */ |
| 465 | for (idx = 1; idx < BOOSTGROUPS_COUNT; ++idx) |
| 466 | if (!allocated_group[idx]) |
| 467 | break; |
| 468 | if (idx == BOOSTGROUPS_COUNT) { |
| 469 | pr_err("Trying to create more than %d SchedTune boosting groups\n", |
| 470 | BOOSTGROUPS_COUNT); |
| 471 | return ERR_PTR(-ENOSPC); |
| 472 | } |
| 473 | |
| 474 | st = kzalloc(sizeof(*st), GFP_KERNEL); |
| 475 | if (!st) |
| 476 | goto out; |
| 477 | |
| 478 | /* Initialize per CPUs boost group support */ |
| 479 | st->idx = idx; |
| 480 | if (schedtune_boostgroup_init(st)) |
| 481 | goto release; |
| 482 | |
| 483 | return &st->css; |
| 484 | |
| 485 | release: |
| 486 | kfree(st); |
| 487 | out: |
| 488 | return ERR_PTR(-ENOMEM); |
| 489 | } |
| 490 | |
| 491 | static void |
| 492 | schedtune_boostgroup_release(struct schedtune *st) |
| 493 | { |
Patrick Bellasi | 9a871ed | 2016-01-14 12:31:35 +0000 | [diff] [blame] | 494 | /* Reset this boost group */ |
| 495 | schedtune_boostgroup_update(st->idx, 0); |
| 496 | |
Patrick Bellasi | ae71030 | 2015-06-23 09:17:54 +0100 | [diff] [blame] | 497 | /* Keep track of allocated boost groups */ |
| 498 | allocated_group[st->idx] = NULL; |
| 499 | } |
| 500 | |
| 501 | static void |
| 502 | schedtune_css_free(struct cgroup_subsys_state *css) |
| 503 | { |
| 504 | struct schedtune *st = css_st(css); |
| 505 | |
| 506 | schedtune_boostgroup_release(st); |
| 507 | kfree(st); |
| 508 | } |
| 509 | |
| 510 | struct cgroup_subsys schedtune_cgrp_subsys = { |
| 511 | .css_alloc = schedtune_css_alloc, |
| 512 | .css_free = schedtune_css_free, |
| 513 | .legacy_cftypes = files, |
| 514 | .early_init = 1, |
| 515 | }; |
| 516 | |
Patrick Bellasi | 2f369bb | 2016-01-12 18:12:13 +0000 | [diff] [blame] | 517 | #else /* CONFIG_CGROUP_SCHEDTUNE */ |
| 518 | |
| 519 | int |
| 520 | schedtune_accept_deltas(int nrg_delta, int cap_delta, |
| 521 | struct task_struct *task) |
| 522 | { |
| 523 | /* Optimal (O) region */ |
| 524 | if (nrg_delta < 0 && cap_delta > 0) |
| 525 | return INT_MAX; |
| 526 | |
| 527 | /* Suboptimal (S) region */ |
| 528 | if (nrg_delta > 0 && cap_delta < 0) |
| 529 | return -INT_MAX; |
| 530 | |
| 531 | return __schedtune_accept_deltas(nrg_delta, cap_delta, |
| 532 | perf_boost_idx, perf_constrain_idx); |
| 533 | } |
| 534 | |
Patrick Bellasi | ae71030 | 2015-06-23 09:17:54 +0100 | [diff] [blame] | 535 | #endif /* CONFIG_CGROUP_SCHEDTUNE */ |
| 536 | |
Patrick Bellasi | 69fa4c7 | 2015-06-22 18:11:44 +0100 | [diff] [blame] | 537 | int |
| 538 | sysctl_sched_cfs_boost_handler(struct ctl_table *table, int write, |
| 539 | void __user *buffer, size_t *lenp, |
| 540 | loff_t *ppos) |
| 541 | { |
| 542 | int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); |
| 543 | |
| 544 | if (ret || !write) |
| 545 | return ret; |
| 546 | |
Patrick Bellasi | 2f369bb | 2016-01-12 18:12:13 +0000 | [diff] [blame] | 547 | /* Performance Boost (B) region threshold params */ |
| 548 | perf_boost_idx = sysctl_sched_cfs_boost; |
| 549 | perf_boost_idx /= 10; |
| 550 | |
| 551 | /* Performance Constraint (C) region threshold params */ |
| 552 | perf_constrain_idx = 100 - sysctl_sched_cfs_boost; |
| 553 | perf_constrain_idx /= 10; |
| 554 | |
Patrick Bellasi | 69fa4c7 | 2015-06-22 18:11:44 +0100 | [diff] [blame] | 555 | return 0; |
| 556 | } |
Patrick Bellasi | 2f369bb | 2016-01-12 18:12:13 +0000 | [diff] [blame] | 557 | |
| 558 | /* |
| 559 | * System energy normalization |
| 560 | * Returns the normalized value, in the range [0..SCHED_LOAD_SCALE], |
| 561 | * corresponding to the specified energy variation. |
| 562 | */ |
| 563 | int |
| 564 | schedtune_normalize_energy(int energy_diff) |
| 565 | { |
| 566 | u32 normalized_nrg; |
| 567 | int max_delta; |
| 568 | |
| 569 | #ifdef CONFIG_SCHED_DEBUG |
| 570 | /* Check for boundaries */ |
| 571 | max_delta = schedtune_target_nrg.max_power; |
| 572 | max_delta -= schedtune_target_nrg.min_power; |
| 573 | WARN_ON(abs(energy_diff) >= max_delta); |
| 574 | #endif |
| 575 | |
| 576 | /* Do scaling using positive numbers to increase the range */ |
| 577 | normalized_nrg = (energy_diff < 0) ? -energy_diff : energy_diff; |
| 578 | |
| 579 | /* Scale by energy magnitude */ |
| 580 | normalized_nrg <<= SCHED_CAPACITY_SHIFT; |
| 581 | |
| 582 | /* Normalize on max energy for target platform */ |
| 583 | normalized_nrg = reciprocal_divide( |
| 584 | normalized_nrg, schedtune_target_nrg.rdiv); |
| 585 | |
| 586 | return (energy_diff < 0) ? -normalized_nrg : normalized_nrg; |
| 587 | } |
| 588 | |
| 589 | #ifdef CONFIG_SCHED_DEBUG |
| 590 | static void |
| 591 | schedtune_test_nrg(unsigned long delta_pwr) |
| 592 | { |
| 593 | unsigned long test_delta_pwr; |
| 594 | unsigned long test_norm_pwr; |
| 595 | int idx; |
| 596 | |
| 597 | /* |
| 598 | * Check normalization constants using some constant system |
| 599 | * energy values |
| 600 | */ |
| 601 | pr_info("schedtune: verify normalization constants...\n"); |
| 602 | for (idx = 0; idx < 6; ++idx) { |
| 603 | test_delta_pwr = delta_pwr >> idx; |
| 604 | |
| 605 | /* Normalize on max energy for target platform */ |
| 606 | test_norm_pwr = reciprocal_divide( |
| 607 | test_delta_pwr << SCHED_CAPACITY_SHIFT, |
| 608 | schedtune_target_nrg.rdiv); |
| 609 | |
| 610 | pr_info("schedtune: max_pwr/2^%d: %4lu => norm_pwr: %5lu\n", |
| 611 | idx, test_delta_pwr, test_norm_pwr); |
| 612 | } |
| 613 | } |
| 614 | #else |
| 615 | #define schedtune_test_nrg(delta_pwr) |
| 616 | #endif |
| 617 | |
| 618 | /* |
| 619 | * Compute the min/max power consumption of a cluster and all its CPUs |
| 620 | */ |
| 621 | static void |
| 622 | schedtune_add_cluster_nrg( |
| 623 | struct sched_domain *sd, |
| 624 | struct sched_group *sg, |
| 625 | struct target_nrg *ste) |
| 626 | { |
| 627 | struct sched_domain *sd2; |
| 628 | struct sched_group *sg2; |
| 629 | |
| 630 | struct cpumask *cluster_cpus; |
| 631 | char str[32]; |
| 632 | |
| 633 | unsigned long min_pwr; |
| 634 | unsigned long max_pwr; |
| 635 | int cpu; |
| 636 | |
| 637 | /* Get Cluster energy using EM data for the first CPU */ |
| 638 | cluster_cpus = sched_group_cpus(sg); |
| 639 | snprintf(str, 32, "CLUSTER[%*pbl]", |
| 640 | cpumask_pr_args(cluster_cpus)); |
| 641 | |
| 642 | min_pwr = sg->sge->idle_states[sg->sge->nr_idle_states - 1].power; |
| 643 | max_pwr = sg->sge->cap_states[sg->sge->nr_cap_states - 1].power; |
| 644 | pr_info("schedtune: %-17s min_pwr: %5lu max_pwr: %5lu\n", |
| 645 | str, min_pwr, max_pwr); |
| 646 | |
| 647 | /* |
| 648 | * Keep track of this cluster's energy in the computation of the |
| 649 | * overall system energy |
| 650 | */ |
| 651 | ste->min_power += min_pwr; |
| 652 | ste->max_power += max_pwr; |
| 653 | |
| 654 | /* Get CPU energy using EM data for each CPU in the group */ |
| 655 | for_each_cpu(cpu, cluster_cpus) { |
| 656 | /* Get a SD view for the specific CPU */ |
| 657 | for_each_domain(cpu, sd2) { |
| 658 | /* Get the CPU group */ |
| 659 | sg2 = sd2->groups; |
| 660 | min_pwr = sg2->sge->idle_states[sg2->sge->nr_idle_states - 1].power; |
| 661 | max_pwr = sg2->sge->cap_states[sg2->sge->nr_cap_states - 1].power; |
| 662 | |
| 663 | ste->min_power += min_pwr; |
| 664 | ste->max_power += max_pwr; |
| 665 | |
| 666 | snprintf(str, 32, "CPU[%d]", cpu); |
| 667 | pr_info("schedtune: %-17s min_pwr: %5lu max_pwr: %5lu\n", |
| 668 | str, min_pwr, max_pwr); |
| 669 | |
| 670 | /* |
| 671 | * Assume we have EM data only at the CPU and |
| 672 | * the upper CLUSTER level |
| 673 | */ |
| 674 | BUG_ON(!cpumask_equal( |
| 675 | sched_group_cpus(sg), |
| 676 | sched_group_cpus(sd2->parent->groups) |
| 677 | )); |
| 678 | break; |
| 679 | } |
| 680 | } |
| 681 | } |
| 682 | |
| 683 | /* |
| 684 | * Initialize the constants required to compute normalized energy. |
| 685 | * The values of these constants depends on the EM data for the specific |
| 686 | * target system and topology. |
| 687 | * Thus, this function is expected to be called by the code |
| 688 | * that bind the EM to the topology information. |
| 689 | */ |
| 690 | static int |
| 691 | schedtune_init_late(void) |
| 692 | { |
| 693 | struct target_nrg *ste = &schedtune_target_nrg; |
| 694 | unsigned long delta_pwr = 0; |
| 695 | struct sched_domain *sd; |
| 696 | struct sched_group *sg; |
| 697 | |
| 698 | pr_info("schedtune: init normalization constants...\n"); |
| 699 | ste->max_power = 0; |
| 700 | ste->min_power = 0; |
| 701 | |
| 702 | rcu_read_lock(); |
| 703 | |
| 704 | /* |
| 705 | * When EAS is in use, we always have a pointer to the highest SD |
| 706 | * which provides EM data. |
| 707 | */ |
| 708 | sd = rcu_dereference(per_cpu(sd_ea, cpumask_first(cpu_online_mask))); |
| 709 | if (!sd) { |
| 710 | pr_info("schedtune: no energy model data\n"); |
| 711 | goto nodata; |
| 712 | } |
| 713 | |
| 714 | sg = sd->groups; |
| 715 | do { |
| 716 | schedtune_add_cluster_nrg(sd, sg, ste); |
| 717 | } while (sg = sg->next, sg != sd->groups); |
| 718 | |
| 719 | rcu_read_unlock(); |
| 720 | |
| 721 | pr_info("schedtune: %-17s min_pwr: %5lu max_pwr: %5lu\n", |
| 722 | "SYSTEM", ste->min_power, ste->max_power); |
| 723 | |
| 724 | /* Compute normalization constants */ |
| 725 | delta_pwr = ste->max_power - ste->min_power; |
| 726 | ste->rdiv = reciprocal_value(delta_pwr); |
| 727 | pr_info("schedtune: using normalization constants mul: %u sh1: %u sh2: %u\n", |
| 728 | ste->rdiv.m, ste->rdiv.sh1, ste->rdiv.sh2); |
| 729 | |
| 730 | schedtune_test_nrg(delta_pwr); |
| 731 | return 0; |
| 732 | |
| 733 | nodata: |
| 734 | rcu_read_unlock(); |
| 735 | return -EINVAL; |
| 736 | } |
| 737 | late_initcall(schedtune_init_late); |