blob: f71bcbe1a00c18e6cf43233fca5e052bfc1e0f51 [file] [log] [blame]
Greg Kroah-Hartmanb2441312017-11-01 15:07:57 +01001// SPDX-License-Identifier: GPL-2.0
Ingo Molnarbb44e5d2007-07-09 18:51:58 +02002/*
3 * Real-Time Scheduling Class (mapped to the SCHED_FIFO and SCHED_RR
4 * policies)
5 */
Peter Zijlstra029632f2011-10-25 10:00:11 +02006#include "sched.h"
7
Vincent Guittot371bf422018-06-28 17:45:05 +02008#include "pelt.h"
9
Clark Williamsce0dbbb2013-02-07 09:47:04 -060010int sched_rr_timeslice = RR_TIMESLICE;
Shile Zhang975e1552017-01-28 22:00:49 +080011int sysctl_sched_rr_timeslice = (MSEC_PER_SEC / HZ) * RR_TIMESLICE;
Clark Williamsce0dbbb2013-02-07 09:47:04 -060012
Peter Zijlstra029632f2011-10-25 10:00:11 +020013static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun);
14
15struct rt_bandwidth def_rt_bandwidth;
16
17static enum hrtimer_restart sched_rt_period_timer(struct hrtimer *timer)
18{
19 struct rt_bandwidth *rt_b =
20 container_of(timer, struct rt_bandwidth, rt_period_timer);
Peter Zijlstra029632f2011-10-25 10:00:11 +020021 int idle = 0;
Peter Zijlstra77a4d1a2015-04-15 11:41:57 +020022 int overrun;
Peter Zijlstra029632f2011-10-25 10:00:11 +020023
Peter Zijlstra77a4d1a2015-04-15 11:41:57 +020024 raw_spin_lock(&rt_b->rt_runtime_lock);
Peter Zijlstra029632f2011-10-25 10:00:11 +020025 for (;;) {
Peter Zijlstra77a4d1a2015-04-15 11:41:57 +020026 overrun = hrtimer_forward_now(timer, rt_b->rt_period);
Peter Zijlstra029632f2011-10-25 10:00:11 +020027 if (!overrun)
28 break;
29
Peter Zijlstra77a4d1a2015-04-15 11:41:57 +020030 raw_spin_unlock(&rt_b->rt_runtime_lock);
Peter Zijlstra029632f2011-10-25 10:00:11 +020031 idle = do_sched_rt_period_timer(rt_b, overrun);
Peter Zijlstra77a4d1a2015-04-15 11:41:57 +020032 raw_spin_lock(&rt_b->rt_runtime_lock);
Peter Zijlstra029632f2011-10-25 10:00:11 +020033 }
Peter Zijlstra4cfafd32015-05-14 12:23:11 +020034 if (idle)
35 rt_b->rt_period_active = 0;
Peter Zijlstra77a4d1a2015-04-15 11:41:57 +020036 raw_spin_unlock(&rt_b->rt_runtime_lock);
Peter Zijlstra029632f2011-10-25 10:00:11 +020037
38 return idle ? HRTIMER_NORESTART : HRTIMER_RESTART;
39}
40
41void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime)
42{
43 rt_b->rt_period = ns_to_ktime(period);
44 rt_b->rt_runtime = runtime;
45
46 raw_spin_lock_init(&rt_b->rt_runtime_lock);
47
48 hrtimer_init(&rt_b->rt_period_timer,
49 CLOCK_MONOTONIC, HRTIMER_MODE_REL);
50 rt_b->rt_period_timer.function = sched_rt_period_timer;
51}
52
53static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
54{
55 if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF)
56 return;
57
Peter Zijlstra029632f2011-10-25 10:00:11 +020058 raw_spin_lock(&rt_b->rt_runtime_lock);
Peter Zijlstra4cfafd32015-05-14 12:23:11 +020059 if (!rt_b->rt_period_active) {
60 rt_b->rt_period_active = 1;
Steven Rostedtc3a990d2016-02-16 18:37:46 -050061 /*
62 * SCHED_DEADLINE updates the bandwidth, as a run away
63 * RT task with a DL task could hog a CPU. But DL does
64 * not reset the period. If a deadline task was running
65 * without an RT task running, it can cause RT tasks to
66 * throttle when they start up. Kick the timer right away
67 * to update the period.
68 */
69 hrtimer_forward_now(&rt_b->rt_period_timer, ns_to_ktime(0));
Peter Zijlstra4cfafd32015-05-14 12:23:11 +020070 hrtimer_start_expires(&rt_b->rt_period_timer, HRTIMER_MODE_ABS_PINNED);
71 }
Peter Zijlstra029632f2011-10-25 10:00:11 +020072 raw_spin_unlock(&rt_b->rt_runtime_lock);
73}
74
Abel Vesa07c54f72015-03-03 13:50:27 +020075void init_rt_rq(struct rt_rq *rt_rq)
Peter Zijlstra029632f2011-10-25 10:00:11 +020076{
77 struct rt_prio_array *array;
78 int i;
79
80 array = &rt_rq->active;
81 for (i = 0; i < MAX_RT_PRIO; i++) {
82 INIT_LIST_HEAD(array->queue + i);
83 __clear_bit(i, array->bitmap);
84 }
85 /* delimiter for bitsearch: */
86 __set_bit(MAX_RT_PRIO, array->bitmap);
87
88#if defined CONFIG_SMP
89 rt_rq->highest_prio.curr = MAX_RT_PRIO;
90 rt_rq->highest_prio.next = MAX_RT_PRIO;
91 rt_rq->rt_nr_migratory = 0;
92 rt_rq->overloaded = 0;
93 plist_head_init(&rt_rq->pushable_tasks);
Steven Rostedtb6366f02015-03-18 14:49:46 -040094#endif /* CONFIG_SMP */
Kirill Tkhaif4ebcbc2014-03-15 02:15:00 +040095 /* We start is dequeued state, because no RT tasks are queued */
96 rt_rq->rt_queued = 0;
Peter Zijlstra029632f2011-10-25 10:00:11 +020097
98 rt_rq->rt_time = 0;
99 rt_rq->rt_throttled = 0;
100 rt_rq->rt_runtime = 0;
101 raw_spin_lock_init(&rt_rq->rt_runtime_lock);
102}
103
Gregory Haskins398a1532009-01-14 09:10:04 -0500104#ifdef CONFIG_RT_GROUP_SCHED
Peter Zijlstra029632f2011-10-25 10:00:11 +0200105static void destroy_rt_bandwidth(struct rt_bandwidth *rt_b)
106{
107 hrtimer_cancel(&rt_b->rt_period_timer);
108}
Gregory Haskins398a1532009-01-14 09:10:04 -0500109
Peter Zijlstraa1ba4d82009-04-01 18:40:15 +0200110#define rt_entity_is_task(rt_se) (!(rt_se)->my_q)
111
Peter Zijlstra8f488942009-07-24 12:25:30 +0200112static inline struct task_struct *rt_task_of(struct sched_rt_entity *rt_se)
113{
114#ifdef CONFIG_SCHED_DEBUG
115 WARN_ON_ONCE(!rt_entity_is_task(rt_se));
116#endif
117 return container_of(rt_se, struct task_struct, rt);
118}
119
Gregory Haskins398a1532009-01-14 09:10:04 -0500120static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq)
121{
122 return rt_rq->rq;
123}
124
125static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se)
126{
127 return rt_se->rt_rq;
128}
129
Kirill Tkhai653d07a2014-03-15 02:14:55 +0400130static inline struct rq *rq_of_rt_se(struct sched_rt_entity *rt_se)
131{
132 struct rt_rq *rt_rq = rt_se->rt_rq;
133
134 return rt_rq->rq;
135}
136
Peter Zijlstra029632f2011-10-25 10:00:11 +0200137void free_rt_sched_group(struct task_group *tg)
138{
139 int i;
140
141 if (tg->rt_se)
142 destroy_rt_bandwidth(&tg->rt_bandwidth);
143
144 for_each_possible_cpu(i) {
145 if (tg->rt_rq)
146 kfree(tg->rt_rq[i]);
147 if (tg->rt_se)
148 kfree(tg->rt_se[i]);
149 }
150
151 kfree(tg->rt_rq);
152 kfree(tg->rt_se);
153}
154
155void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq,
156 struct sched_rt_entity *rt_se, int cpu,
157 struct sched_rt_entity *parent)
158{
159 struct rq *rq = cpu_rq(cpu);
160
161 rt_rq->highest_prio.curr = MAX_RT_PRIO;
162 rt_rq->rt_nr_boosted = 0;
163 rt_rq->rq = rq;
164 rt_rq->tg = tg;
165
166 tg->rt_rq[cpu] = rt_rq;
167 tg->rt_se[cpu] = rt_se;
168
169 if (!rt_se)
170 return;
171
172 if (!parent)
173 rt_se->rt_rq = &rq->rt;
174 else
175 rt_se->rt_rq = parent->my_q;
176
177 rt_se->my_q = rt_rq;
178 rt_se->parent = parent;
179 INIT_LIST_HEAD(&rt_se->run_list);
180}
181
182int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
183{
184 struct rt_rq *rt_rq;
185 struct sched_rt_entity *rt_se;
186 int i;
187
Kees Cook6396bb22018-06-12 14:03:40 -0700188 tg->rt_rq = kcalloc(nr_cpu_ids, sizeof(rt_rq), GFP_KERNEL);
Peter Zijlstra029632f2011-10-25 10:00:11 +0200189 if (!tg->rt_rq)
190 goto err;
Kees Cook6396bb22018-06-12 14:03:40 -0700191 tg->rt_se = kcalloc(nr_cpu_ids, sizeof(rt_se), GFP_KERNEL);
Peter Zijlstra029632f2011-10-25 10:00:11 +0200192 if (!tg->rt_se)
193 goto err;
194
195 init_rt_bandwidth(&tg->rt_bandwidth,
196 ktime_to_ns(def_rt_bandwidth.rt_period), 0);
197
198 for_each_possible_cpu(i) {
199 rt_rq = kzalloc_node(sizeof(struct rt_rq),
200 GFP_KERNEL, cpu_to_node(i));
201 if (!rt_rq)
202 goto err;
203
204 rt_se = kzalloc_node(sizeof(struct sched_rt_entity),
205 GFP_KERNEL, cpu_to_node(i));
206 if (!rt_se)
207 goto err_free_rq;
208
Abel Vesa07c54f72015-03-03 13:50:27 +0200209 init_rt_rq(rt_rq);
Peter Zijlstra029632f2011-10-25 10:00:11 +0200210 rt_rq->rt_runtime = tg->rt_bandwidth.rt_runtime;
211 init_tg_rt_entry(tg, rt_rq, rt_se, i, parent->rt_se[i]);
212 }
213
214 return 1;
215
216err_free_rq:
217 kfree(rt_rq);
218err:
219 return 0;
220}
221
Gregory Haskins398a1532009-01-14 09:10:04 -0500222#else /* CONFIG_RT_GROUP_SCHED */
223
Peter Zijlstraa1ba4d82009-04-01 18:40:15 +0200224#define rt_entity_is_task(rt_se) (1)
225
Peter Zijlstra8f488942009-07-24 12:25:30 +0200226static inline struct task_struct *rt_task_of(struct sched_rt_entity *rt_se)
227{
228 return container_of(rt_se, struct task_struct, rt);
229}
230
Gregory Haskins398a1532009-01-14 09:10:04 -0500231static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq)
232{
233 return container_of(rt_rq, struct rq, rt);
234}
235
Kirill Tkhai653d07a2014-03-15 02:14:55 +0400236static inline struct rq *rq_of_rt_se(struct sched_rt_entity *rt_se)
Gregory Haskins398a1532009-01-14 09:10:04 -0500237{
238 struct task_struct *p = rt_task_of(rt_se);
Kirill Tkhai653d07a2014-03-15 02:14:55 +0400239
240 return task_rq(p);
241}
242
243static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se)
244{
245 struct rq *rq = rq_of_rt_se(rt_se);
Gregory Haskins398a1532009-01-14 09:10:04 -0500246
247 return &rq->rt;
248}
249
Peter Zijlstra029632f2011-10-25 10:00:11 +0200250void free_rt_sched_group(struct task_group *tg) { }
251
252int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
253{
254 return 1;
255}
Gregory Haskins398a1532009-01-14 09:10:04 -0500256#endif /* CONFIG_RT_GROUP_SCHED */
257
Steven Rostedt4fd29172008-01-25 21:08:06 +0100258#ifdef CONFIG_SMP
Ingo Molnar84de4272008-01-25 21:08:15 +0100259
Peter Zijlstra8046d682015-06-11 14:46:40 +0200260static void pull_rt_task(struct rq *this_rq);
Peter Zijlstra38033c32014-01-23 20:32:21 +0100261
Peter Zijlstradc877342014-02-12 15:47:29 +0100262static inline bool need_pull_rt_task(struct rq *rq, struct task_struct *prev)
263{
264 /* Try to pull RT tasks here if we lower this rq's prio */
265 return rq->rt.highest_prio.curr > prev->prio;
266}
267
Gregory Haskins637f5082008-01-25 21:08:18 +0100268static inline int rt_overloaded(struct rq *rq)
Steven Rostedt4fd29172008-01-25 21:08:06 +0100269{
Gregory Haskins637f5082008-01-25 21:08:18 +0100270 return atomic_read(&rq->rd->rto_count);
Steven Rostedt4fd29172008-01-25 21:08:06 +0100271}
Ingo Molnar84de4272008-01-25 21:08:15 +0100272
Steven Rostedt4fd29172008-01-25 21:08:06 +0100273static inline void rt_set_overload(struct rq *rq)
274{
Gregory Haskins1f11eb6a2008-06-04 15:04:05 -0400275 if (!rq->online)
276 return;
277
Rusty Russellc6c49272008-11-25 02:35:05 +1030278 cpumask_set_cpu(rq->cpu, rq->rd->rto_mask);
Steven Rostedt4fd29172008-01-25 21:08:06 +0100279 /*
280 * Make sure the mask is visible before we set
281 * the overload count. That is checked to determine
282 * if we should look at the mask. It would be a shame
283 * if we looked at the mask, but the mask was not
284 * updated yet.
Peter Zijlstra7c3f2ab2013-10-15 12:35:07 +0200285 *
286 * Matched by the barrier in pull_rt_task().
Steven Rostedt4fd29172008-01-25 21:08:06 +0100287 */
Peter Zijlstra7c3f2ab2013-10-15 12:35:07 +0200288 smp_wmb();
Gregory Haskins637f5082008-01-25 21:08:18 +0100289 atomic_inc(&rq->rd->rto_count);
Steven Rostedt4fd29172008-01-25 21:08:06 +0100290}
Ingo Molnar84de4272008-01-25 21:08:15 +0100291
Steven Rostedt4fd29172008-01-25 21:08:06 +0100292static inline void rt_clear_overload(struct rq *rq)
293{
Gregory Haskins1f11eb6a2008-06-04 15:04:05 -0400294 if (!rq->online)
295 return;
296
Steven Rostedt4fd29172008-01-25 21:08:06 +0100297 /* the order here really doesn't matter */
Gregory Haskins637f5082008-01-25 21:08:18 +0100298 atomic_dec(&rq->rd->rto_count);
Rusty Russellc6c49272008-11-25 02:35:05 +1030299 cpumask_clear_cpu(rq->cpu, rq->rd->rto_mask);
Steven Rostedt4fd29172008-01-25 21:08:06 +0100300}
Gregory Haskins73fe6aa2008-01-25 21:08:07 +0100301
Gregory Haskins398a1532009-01-14 09:10:04 -0500302static void update_rt_migration(struct rt_rq *rt_rq)
Gregory Haskins73fe6aa2008-01-25 21:08:07 +0100303{
Peter Zijlstraa1ba4d82009-04-01 18:40:15 +0200304 if (rt_rq->rt_nr_migratory && rt_rq->rt_nr_total > 1) {
Gregory Haskins398a1532009-01-14 09:10:04 -0500305 if (!rt_rq->overloaded) {
306 rt_set_overload(rq_of_rt_rq(rt_rq));
307 rt_rq->overloaded = 1;
Gregory Haskinscdc8eb92008-01-25 21:08:23 +0100308 }
Gregory Haskins398a1532009-01-14 09:10:04 -0500309 } else if (rt_rq->overloaded) {
310 rt_clear_overload(rq_of_rt_rq(rt_rq));
311 rt_rq->overloaded = 0;
Gregory Haskins637f5082008-01-25 21:08:18 +0100312 }
Gregory Haskins73fe6aa2008-01-25 21:08:07 +0100313}
Steven Rostedt4fd29172008-01-25 21:08:06 +0100314
Gregory Haskins398a1532009-01-14 09:10:04 -0500315static void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
Peter Zijlstrafa85ae22008-01-25 21:08:29 +0100316{
Peter Zijlstra29baa742012-04-23 12:11:21 +0200317 struct task_struct *p;
318
Peter Zijlstraa1ba4d82009-04-01 18:40:15 +0200319 if (!rt_entity_is_task(rt_se))
320 return;
321
Peter Zijlstra29baa742012-04-23 12:11:21 +0200322 p = rt_task_of(rt_se);
Peter Zijlstraa1ba4d82009-04-01 18:40:15 +0200323 rt_rq = &rq_of_rt_rq(rt_rq)->rt;
324
325 rt_rq->rt_nr_total++;
Ingo Molnar4b53a342017-02-05 15:41:03 +0100326 if (p->nr_cpus_allowed > 1)
Gregory Haskins398a1532009-01-14 09:10:04 -0500327 rt_rq->rt_nr_migratory++;
328
329 update_rt_migration(rt_rq);
Peter Zijlstra6f505b12008-01-25 21:08:30 +0100330}
331
Gregory Haskins398a1532009-01-14 09:10:04 -0500332static void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
333{
Peter Zijlstra29baa742012-04-23 12:11:21 +0200334 struct task_struct *p;
335
Peter Zijlstraa1ba4d82009-04-01 18:40:15 +0200336 if (!rt_entity_is_task(rt_se))
337 return;
338
Peter Zijlstra29baa742012-04-23 12:11:21 +0200339 p = rt_task_of(rt_se);
Peter Zijlstraa1ba4d82009-04-01 18:40:15 +0200340 rt_rq = &rq_of_rt_rq(rt_rq)->rt;
341
342 rt_rq->rt_nr_total--;
Ingo Molnar4b53a342017-02-05 15:41:03 +0100343 if (p->nr_cpus_allowed > 1)
Gregory Haskins398a1532009-01-14 09:10:04 -0500344 rt_rq->rt_nr_migratory--;
345
346 update_rt_migration(rt_rq);
347}
348
Steven Rostedt5181f4a42011-06-16 21:55:23 -0400349static inline int has_pushable_tasks(struct rq *rq)
350{
351 return !plist_head_empty(&rq->rt.pushable_tasks);
352}
353
Peter Zijlstrafd7a4be2015-06-11 14:46:41 +0200354static DEFINE_PER_CPU(struct callback_head, rt_push_head);
355static DEFINE_PER_CPU(struct callback_head, rt_pull_head);
Peter Zijlstrae3fca9e2015-06-11 14:46:37 +0200356
357static void push_rt_tasks(struct rq *);
Peter Zijlstrafd7a4be2015-06-11 14:46:41 +0200358static void pull_rt_task(struct rq *);
Peter Zijlstrae3fca9e2015-06-11 14:46:37 +0200359
Ingo Molnar02d8ec92018-03-03 16:27:54 +0100360static inline void rt_queue_push_tasks(struct rq *rq)
Peter Zijlstradc877342014-02-12 15:47:29 +0100361{
Peter Zijlstrae3fca9e2015-06-11 14:46:37 +0200362 if (!has_pushable_tasks(rq))
363 return;
364
Peter Zijlstrafd7a4be2015-06-11 14:46:41 +0200365 queue_balance_callback(rq, &per_cpu(rt_push_head, rq->cpu), push_rt_tasks);
366}
367
Ingo Molnar02d8ec92018-03-03 16:27:54 +0100368static inline void rt_queue_pull_task(struct rq *rq)
Peter Zijlstrafd7a4be2015-06-11 14:46:41 +0200369{
370 queue_balance_callback(rq, &per_cpu(rt_pull_head, rq->cpu), pull_rt_task);
Peter Zijlstradc877342014-02-12 15:47:29 +0100371}
372
Gregory Haskins917b6272008-12-29 09:39:53 -0500373static void enqueue_pushable_task(struct rq *rq, struct task_struct *p)
374{
375 plist_del(&p->pushable_tasks, &rq->rt.pushable_tasks);
376 plist_node_init(&p->pushable_tasks, p->prio);
377 plist_add(&p->pushable_tasks, &rq->rt.pushable_tasks);
Steven Rostedt5181f4a42011-06-16 21:55:23 -0400378
379 /* Update the highest prio pushable task */
380 if (p->prio < rq->rt.highest_prio.next)
381 rq->rt.highest_prio.next = p->prio;
Gregory Haskins917b6272008-12-29 09:39:53 -0500382}
383
384static void dequeue_pushable_task(struct rq *rq, struct task_struct *p)
385{
386 plist_del(&p->pushable_tasks, &rq->rt.pushable_tasks);
Gregory Haskins917b6272008-12-29 09:39:53 -0500387
Steven Rostedt5181f4a42011-06-16 21:55:23 -0400388 /* Update the new highest prio pushable task */
389 if (has_pushable_tasks(rq)) {
390 p = plist_first_entry(&rq->rt.pushable_tasks,
391 struct task_struct, pushable_tasks);
392 rq->rt.highest_prio.next = p->prio;
393 } else
394 rq->rt.highest_prio.next = MAX_RT_PRIO;
Ingo Molnarbcf08df2008-04-19 12:11:10 +0200395}
396
Gregory Haskins917b6272008-12-29 09:39:53 -0500397#else
398
Peter Zijlstraceacc2c2009-01-16 14:46:40 +0100399static inline void enqueue_pushable_task(struct rq *rq, struct task_struct *p)
400{
401}
402
403static inline void dequeue_pushable_task(struct rq *rq, struct task_struct *p)
404{
405}
406
Gregory Haskinsb07430a2009-01-14 08:55:39 -0500407static inline
Peter Zijlstraceacc2c2009-01-16 14:46:40 +0100408void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
409{
410}
411
Gregory Haskinsb07430a2009-01-14 08:55:39 -0500412static inline
Peter Zijlstraceacc2c2009-01-16 14:46:40 +0100413void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
414{
415}
Gregory Haskins917b6272008-12-29 09:39:53 -0500416
Peter Zijlstradc877342014-02-12 15:47:29 +0100417static inline bool need_pull_rt_task(struct rq *rq, struct task_struct *prev)
418{
419 return false;
420}
421
Peter Zijlstra8046d682015-06-11 14:46:40 +0200422static inline void pull_rt_task(struct rq *this_rq)
Peter Zijlstradc877342014-02-12 15:47:29 +0100423{
Peter Zijlstradc877342014-02-12 15:47:29 +0100424}
425
Ingo Molnar02d8ec92018-03-03 16:27:54 +0100426static inline void rt_queue_push_tasks(struct rq *rq)
Peter Zijlstradc877342014-02-12 15:47:29 +0100427{
428}
Ingo Molnarbb44e5d2007-07-09 18:51:58 +0200429#endif /* CONFIG_SMP */
430
Kirill Tkhaif4ebcbc2014-03-15 02:15:00 +0400431static void enqueue_top_rt_rq(struct rt_rq *rt_rq);
432static void dequeue_top_rt_rq(struct rt_rq *rt_rq);
433
Peter Zijlstra6f505b12008-01-25 21:08:30 +0100434static inline int on_rt_rq(struct sched_rt_entity *rt_se)
435{
Peter Zijlstraff77e462016-01-18 15:27:07 +0100436 return rt_se->on_rq;
Peter Zijlstra6f505b12008-01-25 21:08:30 +0100437}
438
Peter Zijlstra052f1dc2008-02-13 15:45:40 +0100439#ifdef CONFIG_RT_GROUP_SCHED
Peter Zijlstra6f505b12008-01-25 21:08:30 +0100440
Peter Zijlstra9f0c1e52008-02-13 15:45:39 +0100441static inline u64 sched_rt_runtime(struct rt_rq *rt_rq)
Peter Zijlstra6f505b12008-01-25 21:08:30 +0100442{
443 if (!rt_rq->tg)
Peter Zijlstra9f0c1e52008-02-13 15:45:39 +0100444 return RUNTIME_INF;
Peter Zijlstra6f505b12008-01-25 21:08:30 +0100445
Peter Zijlstraac086bc2008-04-19 19:44:58 +0200446 return rt_rq->rt_runtime;
447}
448
449static inline u64 sched_rt_period(struct rt_rq *rt_rq)
450{
451 return ktime_to_ns(rt_rq->tg->rt_bandwidth.rt_period);
Peter Zijlstra6f505b12008-01-25 21:08:30 +0100452}
453
Cheng Xuec514c42011-05-14 14:20:02 +0800454typedef struct task_group *rt_rq_iter_t;
455
Yong Zhang1c09ab02011-06-28 10:51:31 +0800456static inline struct task_group *next_task_group(struct task_group *tg)
457{
458 do {
459 tg = list_entry_rcu(tg->list.next,
460 typeof(struct task_group), list);
461 } while (&tg->list != &task_groups && task_group_is_autogroup(tg));
462
463 if (&tg->list == &task_groups)
464 tg = NULL;
465
466 return tg;
467}
468
469#define for_each_rt_rq(rt_rq, iter, rq) \
470 for (iter = container_of(&task_groups, typeof(*iter), list); \
471 (iter = next_task_group(iter)) && \
472 (rt_rq = iter->rt_rq[cpu_of(rq)]);)
Cheng Xuec514c42011-05-14 14:20:02 +0800473
Peter Zijlstra6f505b12008-01-25 21:08:30 +0100474#define for_each_sched_rt_entity(rt_se) \
475 for (; rt_se; rt_se = rt_se->parent)
476
477static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se)
478{
479 return rt_se->my_q;
480}
481
Peter Zijlstraff77e462016-01-18 15:27:07 +0100482static void enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags);
483static void dequeue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags);
Peter Zijlstra6f505b12008-01-25 21:08:30 +0100484
Peter Zijlstra9f0c1e52008-02-13 15:45:39 +0100485static void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
Peter Zijlstra6f505b12008-01-25 21:08:30 +0100486{
Dario Faggiolif6121f42008-10-03 17:40:46 +0200487 struct task_struct *curr = rq_of_rt_rq(rt_rq)->curr;
Kirill Tkhai88751252014-06-29 00:03:57 +0400488 struct rq *rq = rq_of_rt_rq(rt_rq);
Yong Zhang74b7eb52010-01-29 14:57:52 +0800489 struct sched_rt_entity *rt_se;
490
Kirill Tkhai88751252014-06-29 00:03:57 +0400491 int cpu = cpu_of(rq);
Balbir Singh0c3b9162011-03-03 17:04:35 +0530492
493 rt_se = rt_rq->tg->rt_se[cpu];
Peter Zijlstra6f505b12008-01-25 21:08:30 +0100494
Dario Faggiolif6121f42008-10-03 17:40:46 +0200495 if (rt_rq->rt_nr_running) {
Kirill Tkhaif4ebcbc2014-03-15 02:15:00 +0400496 if (!rt_se)
497 enqueue_top_rt_rq(rt_rq);
498 else if (!on_rt_rq(rt_se))
Peter Zijlstraff77e462016-01-18 15:27:07 +0100499 enqueue_rt_entity(rt_se, 0);
Kirill Tkhaif4ebcbc2014-03-15 02:15:00 +0400500
Gregory Haskinse864c492008-12-29 09:39:49 -0500501 if (rt_rq->highest_prio.curr < curr->prio)
Kirill Tkhai88751252014-06-29 00:03:57 +0400502 resched_curr(rq);
Peter Zijlstra6f505b12008-01-25 21:08:30 +0100503 }
504}
505
Peter Zijlstra9f0c1e52008-02-13 15:45:39 +0100506static void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
Peter Zijlstra6f505b12008-01-25 21:08:30 +0100507{
Yong Zhang74b7eb52010-01-29 14:57:52 +0800508 struct sched_rt_entity *rt_se;
Balbir Singh0c3b9162011-03-03 17:04:35 +0530509 int cpu = cpu_of(rq_of_rt_rq(rt_rq));
Yong Zhang74b7eb52010-01-29 14:57:52 +0800510
Balbir Singh0c3b9162011-03-03 17:04:35 +0530511 rt_se = rt_rq->tg->rt_se[cpu];
Peter Zijlstra6f505b12008-01-25 21:08:30 +0100512
Vincent Guittot296b2ff2018-06-26 15:53:22 +0200513 if (!rt_se) {
Kirill Tkhaif4ebcbc2014-03-15 02:15:00 +0400514 dequeue_top_rt_rq(rt_rq);
Vincent Guittot296b2ff2018-06-26 15:53:22 +0200515 /* Kick cpufreq (see the comment in kernel/sched/sched.h). */
516 cpufreq_update_util(rq_of_rt_rq(rt_rq), 0);
517 }
Kirill Tkhaif4ebcbc2014-03-15 02:15:00 +0400518 else if (on_rt_rq(rt_se))
Peter Zijlstraff77e462016-01-18 15:27:07 +0100519 dequeue_rt_entity(rt_se, 0);
Peter Zijlstra6f505b12008-01-25 21:08:30 +0100520}
521
Kirill Tkhai46383642014-03-15 02:15:07 +0400522static inline int rt_rq_throttled(struct rt_rq *rt_rq)
523{
524 return rt_rq->rt_throttled && !rt_rq->rt_nr_boosted;
525}
526
Peter Zijlstra23b0fdf2008-02-13 15:45:39 +0100527static int rt_se_boosted(struct sched_rt_entity *rt_se)
528{
529 struct rt_rq *rt_rq = group_rt_rq(rt_se);
530 struct task_struct *p;
531
532 if (rt_rq)
533 return !!rt_rq->rt_nr_boosted;
534
535 p = rt_task_of(rt_se);
536 return p->prio != p->normal_prio;
537}
538
Peter Zijlstrad0b27fa2008-04-19 19:44:57 +0200539#ifdef CONFIG_SMP
Rusty Russellc6c49272008-11-25 02:35:05 +1030540static inline const struct cpumask *sched_rt_period_mask(void)
Peter Zijlstrad0b27fa2008-04-19 19:44:57 +0200541{
Nathan Zimmer424c93f2013-05-09 11:24:03 -0500542 return this_rq()->rd->span;
Peter Zijlstrad0b27fa2008-04-19 19:44:57 +0200543}
544#else
Rusty Russellc6c49272008-11-25 02:35:05 +1030545static inline const struct cpumask *sched_rt_period_mask(void)
Peter Zijlstrad0b27fa2008-04-19 19:44:57 +0200546{
Rusty Russellc6c49272008-11-25 02:35:05 +1030547 return cpu_online_mask;
Peter Zijlstrad0b27fa2008-04-19 19:44:57 +0200548}
549#endif
550
551static inline
552struct rt_rq *sched_rt_period_rt_rq(struct rt_bandwidth *rt_b, int cpu)
553{
554 return container_of(rt_b, struct task_group, rt_bandwidth)->rt_rq[cpu];
555}
556
Peter Zijlstraac086bc2008-04-19 19:44:58 +0200557static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq)
558{
559 return &rt_rq->tg->rt_bandwidth;
560}
561
Dhaval Giani55e12e52008-06-24 23:39:43 +0530562#else /* !CONFIG_RT_GROUP_SCHED */
Peter Zijlstra6f505b12008-01-25 21:08:30 +0100563
Peter Zijlstra9f0c1e52008-02-13 15:45:39 +0100564static inline u64 sched_rt_runtime(struct rt_rq *rt_rq)
Peter Zijlstra6f505b12008-01-25 21:08:30 +0100565{
Peter Zijlstraac086bc2008-04-19 19:44:58 +0200566 return rt_rq->rt_runtime;
567}
568
569static inline u64 sched_rt_period(struct rt_rq *rt_rq)
570{
571 return ktime_to_ns(def_rt_bandwidth.rt_period);
Peter Zijlstra6f505b12008-01-25 21:08:30 +0100572}
573
Cheng Xuec514c42011-05-14 14:20:02 +0800574typedef struct rt_rq *rt_rq_iter_t;
575
576#define for_each_rt_rq(rt_rq, iter, rq) \
577 for ((void) iter, rt_rq = &rq->rt; rt_rq; rt_rq = NULL)
578
Peter Zijlstra6f505b12008-01-25 21:08:30 +0100579#define for_each_sched_rt_entity(rt_se) \
580 for (; rt_se; rt_se = NULL)
581
582static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se)
583{
584 return NULL;
585}
586
Peter Zijlstra9f0c1e52008-02-13 15:45:39 +0100587static inline void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
Peter Zijlstra6f505b12008-01-25 21:08:30 +0100588{
Kirill Tkhaif4ebcbc2014-03-15 02:15:00 +0400589 struct rq *rq = rq_of_rt_rq(rt_rq);
590
591 if (!rt_rq->rt_nr_running)
592 return;
593
594 enqueue_top_rt_rq(rt_rq);
Kirill Tkhai88751252014-06-29 00:03:57 +0400595 resched_curr(rq);
Peter Zijlstra6f505b12008-01-25 21:08:30 +0100596}
597
Peter Zijlstra9f0c1e52008-02-13 15:45:39 +0100598static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
Peter Zijlstra6f505b12008-01-25 21:08:30 +0100599{
Kirill Tkhaif4ebcbc2014-03-15 02:15:00 +0400600 dequeue_top_rt_rq(rt_rq);
Peter Zijlstra6f505b12008-01-25 21:08:30 +0100601}
602
Kirill Tkhai46383642014-03-15 02:15:07 +0400603static inline int rt_rq_throttled(struct rt_rq *rt_rq)
604{
605 return rt_rq->rt_throttled;
606}
607
Rusty Russellc6c49272008-11-25 02:35:05 +1030608static inline const struct cpumask *sched_rt_period_mask(void)
Peter Zijlstrad0b27fa2008-04-19 19:44:57 +0200609{
Rusty Russellc6c49272008-11-25 02:35:05 +1030610 return cpu_online_mask;
Peter Zijlstrad0b27fa2008-04-19 19:44:57 +0200611}
612
613static inline
614struct rt_rq *sched_rt_period_rt_rq(struct rt_bandwidth *rt_b, int cpu)
615{
616 return &cpu_rq(cpu)->rt;
617}
618
Peter Zijlstraac086bc2008-04-19 19:44:58 +0200619static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq)
620{
621 return &def_rt_bandwidth;
622}
623
Dhaval Giani55e12e52008-06-24 23:39:43 +0530624#endif /* CONFIG_RT_GROUP_SCHED */
Peter Zijlstra6f505b12008-01-25 21:08:30 +0100625
Juri Lellifaa59932014-02-21 11:37:15 +0100626bool sched_rt_bandwidth_account(struct rt_rq *rt_rq)
627{
628 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
629
630 return (hrtimer_active(&rt_b->rt_period_timer) ||
631 rt_rq->rt_time < rt_b->rt_runtime);
632}
633
Peter Zijlstrab79f3832008-06-19 14:22:25 +0200634#ifdef CONFIG_SMP
Peter Zijlstra78333cd2008-09-23 15:33:43 +0200635/*
636 * We ran out of runtime, see if we can borrow some from our neighbours.
637 */
Juri Lelli269b26a2015-09-02 11:01:36 +0100638static void do_balance_runtime(struct rt_rq *rt_rq)
Peter Zijlstraac086bc2008-04-19 19:44:58 +0200639{
640 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
Shawn Bohreraa7f6732013-01-14 11:55:31 -0600641 struct root_domain *rd = rq_of_rt_rq(rt_rq)->rd;
Juri Lelli269b26a2015-09-02 11:01:36 +0100642 int i, weight;
Peter Zijlstraac086bc2008-04-19 19:44:58 +0200643 u64 rt_period;
644
Rusty Russellc6c49272008-11-25 02:35:05 +1030645 weight = cpumask_weight(rd->span);
Peter Zijlstraac086bc2008-04-19 19:44:58 +0200646
Thomas Gleixner0986b112009-11-17 15:32:06 +0100647 raw_spin_lock(&rt_b->rt_runtime_lock);
Peter Zijlstraac086bc2008-04-19 19:44:58 +0200648 rt_period = ktime_to_ns(rt_b->rt_period);
Rusty Russellc6c49272008-11-25 02:35:05 +1030649 for_each_cpu(i, rd->span) {
Peter Zijlstraac086bc2008-04-19 19:44:58 +0200650 struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i);
651 s64 diff;
652
653 if (iter == rt_rq)
654 continue;
655
Thomas Gleixner0986b112009-11-17 15:32:06 +0100656 raw_spin_lock(&iter->rt_runtime_lock);
Peter Zijlstra78333cd2008-09-23 15:33:43 +0200657 /*
658 * Either all rqs have inf runtime and there's nothing to steal
659 * or __disable_runtime() below sets a specific rq to inf to
660 * indicate its been disabled and disalow stealing.
661 */
Peter Zijlstra7def2be2008-06-05 14:49:58 +0200662 if (iter->rt_runtime == RUNTIME_INF)
663 goto next;
664
Peter Zijlstra78333cd2008-09-23 15:33:43 +0200665 /*
666 * From runqueues with spare time, take 1/n part of their
667 * spare time, but no more than our period.
668 */
Peter Zijlstraac086bc2008-04-19 19:44:58 +0200669 diff = iter->rt_runtime - iter->rt_time;
670 if (diff > 0) {
Peter Zijlstra58838cf2008-07-24 12:43:13 +0200671 diff = div_u64((u64)diff, weight);
Peter Zijlstraac086bc2008-04-19 19:44:58 +0200672 if (rt_rq->rt_runtime + diff > rt_period)
673 diff = rt_period - rt_rq->rt_runtime;
674 iter->rt_runtime -= diff;
675 rt_rq->rt_runtime += diff;
Peter Zijlstraac086bc2008-04-19 19:44:58 +0200676 if (rt_rq->rt_runtime == rt_period) {
Thomas Gleixner0986b112009-11-17 15:32:06 +0100677 raw_spin_unlock(&iter->rt_runtime_lock);
Peter Zijlstraac086bc2008-04-19 19:44:58 +0200678 break;
679 }
680 }
Peter Zijlstra7def2be2008-06-05 14:49:58 +0200681next:
Thomas Gleixner0986b112009-11-17 15:32:06 +0100682 raw_spin_unlock(&iter->rt_runtime_lock);
Peter Zijlstraac086bc2008-04-19 19:44:58 +0200683 }
Thomas Gleixner0986b112009-11-17 15:32:06 +0100684 raw_spin_unlock(&rt_b->rt_runtime_lock);
Peter Zijlstraac086bc2008-04-19 19:44:58 +0200685}
Peter Zijlstra7def2be2008-06-05 14:49:58 +0200686
Peter Zijlstra78333cd2008-09-23 15:33:43 +0200687/*
688 * Ensure this RQ takes back all the runtime it lend to its neighbours.
689 */
Peter Zijlstra7def2be2008-06-05 14:49:58 +0200690static void __disable_runtime(struct rq *rq)
691{
692 struct root_domain *rd = rq->rd;
Cheng Xuec514c42011-05-14 14:20:02 +0800693 rt_rq_iter_t iter;
Peter Zijlstra7def2be2008-06-05 14:49:58 +0200694 struct rt_rq *rt_rq;
695
696 if (unlikely(!scheduler_running))
697 return;
698
Cheng Xuec514c42011-05-14 14:20:02 +0800699 for_each_rt_rq(rt_rq, iter, rq) {
Peter Zijlstra7def2be2008-06-05 14:49:58 +0200700 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
701 s64 want;
702 int i;
703
Thomas Gleixner0986b112009-11-17 15:32:06 +0100704 raw_spin_lock(&rt_b->rt_runtime_lock);
705 raw_spin_lock(&rt_rq->rt_runtime_lock);
Peter Zijlstra78333cd2008-09-23 15:33:43 +0200706 /*
707 * Either we're all inf and nobody needs to borrow, or we're
708 * already disabled and thus have nothing to do, or we have
709 * exactly the right amount of runtime to take out.
710 */
Peter Zijlstra7def2be2008-06-05 14:49:58 +0200711 if (rt_rq->rt_runtime == RUNTIME_INF ||
712 rt_rq->rt_runtime == rt_b->rt_runtime)
713 goto balanced;
Thomas Gleixner0986b112009-11-17 15:32:06 +0100714 raw_spin_unlock(&rt_rq->rt_runtime_lock);
Peter Zijlstra7def2be2008-06-05 14:49:58 +0200715
Peter Zijlstra78333cd2008-09-23 15:33:43 +0200716 /*
717 * Calculate the difference between what we started out with
718 * and what we current have, that's the amount of runtime
719 * we lend and now have to reclaim.
720 */
Peter Zijlstra7def2be2008-06-05 14:49:58 +0200721 want = rt_b->rt_runtime - rt_rq->rt_runtime;
722
Peter Zijlstra78333cd2008-09-23 15:33:43 +0200723 /*
724 * Greedy reclaim, take back as much as we can.
725 */
Rusty Russellc6c49272008-11-25 02:35:05 +1030726 for_each_cpu(i, rd->span) {
Peter Zijlstra7def2be2008-06-05 14:49:58 +0200727 struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i);
728 s64 diff;
729
Peter Zijlstra78333cd2008-09-23 15:33:43 +0200730 /*
731 * Can't reclaim from ourselves or disabled runqueues.
732 */
Peter Zijlstraf1679d02008-08-14 15:49:00 +0200733 if (iter == rt_rq || iter->rt_runtime == RUNTIME_INF)
Peter Zijlstra7def2be2008-06-05 14:49:58 +0200734 continue;
735
Thomas Gleixner0986b112009-11-17 15:32:06 +0100736 raw_spin_lock(&iter->rt_runtime_lock);
Peter Zijlstra7def2be2008-06-05 14:49:58 +0200737 if (want > 0) {
738 diff = min_t(s64, iter->rt_runtime, want);
739 iter->rt_runtime -= diff;
740 want -= diff;
741 } else {
742 iter->rt_runtime -= want;
743 want -= want;
744 }
Thomas Gleixner0986b112009-11-17 15:32:06 +0100745 raw_spin_unlock(&iter->rt_runtime_lock);
Peter Zijlstra7def2be2008-06-05 14:49:58 +0200746
747 if (!want)
748 break;
749 }
750
Thomas Gleixner0986b112009-11-17 15:32:06 +0100751 raw_spin_lock(&rt_rq->rt_runtime_lock);
Peter Zijlstra78333cd2008-09-23 15:33:43 +0200752 /*
753 * We cannot be left wanting - that would mean some runtime
754 * leaked out of the system.
755 */
Peter Zijlstra7def2be2008-06-05 14:49:58 +0200756 BUG_ON(want);
757balanced:
Peter Zijlstra78333cd2008-09-23 15:33:43 +0200758 /*
759 * Disable all the borrow logic by pretending we have inf
760 * runtime - in which case borrowing doesn't make sense.
761 */
Peter Zijlstra7def2be2008-06-05 14:49:58 +0200762 rt_rq->rt_runtime = RUNTIME_INF;
Peter Boonstoppela4c96ae2012-08-09 15:34:47 -0700763 rt_rq->rt_throttled = 0;
Thomas Gleixner0986b112009-11-17 15:32:06 +0100764 raw_spin_unlock(&rt_rq->rt_runtime_lock);
765 raw_spin_unlock(&rt_b->rt_runtime_lock);
Kirill Tkhai99b62562014-06-25 12:19:48 +0400766
767 /* Make rt_rq available for pick_next_task() */
768 sched_rt_rq_enqueue(rt_rq);
Peter Zijlstra7def2be2008-06-05 14:49:58 +0200769 }
770}
771
Peter Zijlstra7def2be2008-06-05 14:49:58 +0200772static void __enable_runtime(struct rq *rq)
773{
Cheng Xuec514c42011-05-14 14:20:02 +0800774 rt_rq_iter_t iter;
Peter Zijlstra7def2be2008-06-05 14:49:58 +0200775 struct rt_rq *rt_rq;
776
777 if (unlikely(!scheduler_running))
778 return;
779
Peter Zijlstra78333cd2008-09-23 15:33:43 +0200780 /*
781 * Reset each runqueue's bandwidth settings
782 */
Cheng Xuec514c42011-05-14 14:20:02 +0800783 for_each_rt_rq(rt_rq, iter, rq) {
Peter Zijlstra7def2be2008-06-05 14:49:58 +0200784 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
785
Thomas Gleixner0986b112009-11-17 15:32:06 +0100786 raw_spin_lock(&rt_b->rt_runtime_lock);
787 raw_spin_lock(&rt_rq->rt_runtime_lock);
Peter Zijlstra7def2be2008-06-05 14:49:58 +0200788 rt_rq->rt_runtime = rt_b->rt_runtime;
789 rt_rq->rt_time = 0;
Zhang, Yanminbaf25732008-09-09 11:26:33 +0800790 rt_rq->rt_throttled = 0;
Thomas Gleixner0986b112009-11-17 15:32:06 +0100791 raw_spin_unlock(&rt_rq->rt_runtime_lock);
792 raw_spin_unlock(&rt_b->rt_runtime_lock);
Peter Zijlstra7def2be2008-06-05 14:49:58 +0200793 }
794}
795
Juri Lelli269b26a2015-09-02 11:01:36 +0100796static void balance_runtime(struct rt_rq *rt_rq)
Peter Zijlstraeff65492008-06-19 14:22:26 +0200797{
Peter Zijlstra4a6184c2011-10-06 22:39:14 +0200798 if (!sched_feat(RT_RUNTIME_SHARE))
Juri Lelli269b26a2015-09-02 11:01:36 +0100799 return;
Peter Zijlstra4a6184c2011-10-06 22:39:14 +0200800
Peter Zijlstraeff65492008-06-19 14:22:26 +0200801 if (rt_rq->rt_time > rt_rq->rt_runtime) {
Thomas Gleixner0986b112009-11-17 15:32:06 +0100802 raw_spin_unlock(&rt_rq->rt_runtime_lock);
Juri Lelli269b26a2015-09-02 11:01:36 +0100803 do_balance_runtime(rt_rq);
Thomas Gleixner0986b112009-11-17 15:32:06 +0100804 raw_spin_lock(&rt_rq->rt_runtime_lock);
Peter Zijlstraeff65492008-06-19 14:22:26 +0200805 }
Peter Zijlstraeff65492008-06-19 14:22:26 +0200806}
Dhaval Giani55e12e52008-06-24 23:39:43 +0530807#else /* !CONFIG_SMP */
Juri Lelli269b26a2015-09-02 11:01:36 +0100808static inline void balance_runtime(struct rt_rq *rt_rq) {}
Dhaval Giani55e12e52008-06-24 23:39:43 +0530809#endif /* CONFIG_SMP */
Peter Zijlstra6f505b12008-01-25 21:08:30 +0100810
811static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
812{
Peter Zijlstra42c62a52011-10-18 22:03:48 +0200813 int i, idle = 1, throttled = 0;
Rusty Russellc6c49272008-11-25 02:35:05 +1030814 const struct cpumask *span;
Peter Zijlstrad0b27fa2008-04-19 19:44:57 +0200815
Peter Zijlstrad0b27fa2008-04-19 19:44:57 +0200816 span = sched_rt_period_mask();
Mike Galbraithe221d022012-08-07 10:02:38 +0200817#ifdef CONFIG_RT_GROUP_SCHED
818 /*
819 * FIXME: isolated CPUs should really leave the root task group,
820 * whether they are isolcpus or were isolated via cpusets, lest
821 * the timer run on a CPU which does not service all runqueues,
822 * potentially leaving other CPUs indefinitely throttled. If
823 * isolation is really required, the user will turn the throttle
824 * off to kill the perturbations it causes anyway. Meanwhile,
825 * this maintains functionality for boot and/or troubleshooting.
826 */
827 if (rt_b == &root_task_group.rt_bandwidth)
828 span = cpu_online_mask;
829#endif
Rusty Russellc6c49272008-11-25 02:35:05 +1030830 for_each_cpu(i, span) {
Peter Zijlstrad0b27fa2008-04-19 19:44:57 +0200831 int enqueue = 0;
832 struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i);
833 struct rq *rq = rq_of_rt_rq(rt_rq);
Dave Kleikampc249f252017-05-15 14:14:13 -0500834 int skip;
835
836 /*
837 * When span == cpu_online_mask, taking each rq->lock
838 * can be time-consuming. Try to avoid it when possible.
839 */
840 raw_spin_lock(&rt_rq->rt_runtime_lock);
Hailong Liuf3d133e2018-07-18 08:46:55 +0800841 if (!sched_feat(RT_RUNTIME_SHARE) && rt_rq->rt_runtime != RUNTIME_INF)
842 rt_rq->rt_runtime = rt_b->rt_runtime;
Dave Kleikampc249f252017-05-15 14:14:13 -0500843 skip = !rt_rq->rt_time && !rt_rq->rt_nr_running;
844 raw_spin_unlock(&rt_rq->rt_runtime_lock);
845 if (skip)
846 continue;
Peter Zijlstrad0b27fa2008-04-19 19:44:57 +0200847
Thomas Gleixner05fa7852009-11-17 14:28:38 +0100848 raw_spin_lock(&rq->lock);
Davidlohr Buesod29a2062018-04-02 09:49:54 -0700849 update_rq_clock(rq);
850
Peter Zijlstrad0b27fa2008-04-19 19:44:57 +0200851 if (rt_rq->rt_time) {
852 u64 runtime;
853
Thomas Gleixner0986b112009-11-17 15:32:06 +0100854 raw_spin_lock(&rt_rq->rt_runtime_lock);
Peter Zijlstraeff65492008-06-19 14:22:26 +0200855 if (rt_rq->rt_throttled)
856 balance_runtime(rt_rq);
Peter Zijlstrad0b27fa2008-04-19 19:44:57 +0200857 runtime = rt_rq->rt_runtime;
858 rt_rq->rt_time -= min(rt_rq->rt_time, overrun*runtime);
859 if (rt_rq->rt_throttled && rt_rq->rt_time < runtime) {
860 rt_rq->rt_throttled = 0;
861 enqueue = 1;
Mike Galbraith61eadef2011-04-29 08:36:50 +0200862
863 /*
Peter Zijlstra9edfbfe2015-01-05 11:18:11 +0100864 * When we're idle and a woken (rt) task is
865 * throttled check_preempt_curr() will set
866 * skip_update and the time between the wakeup
867 * and this unthrottle will get accounted as
868 * 'runtime'.
Mike Galbraith61eadef2011-04-29 08:36:50 +0200869 */
870 if (rt_rq->rt_nr_running && rq->curr == rq->idle)
Davidlohr Buesoadcc8da2018-04-04 09:15:39 -0700871 rq_clock_cancel_skipupdate(rq);
Peter Zijlstrad0b27fa2008-04-19 19:44:57 +0200872 }
873 if (rt_rq->rt_time || rt_rq->rt_nr_running)
874 idle = 0;
Thomas Gleixner0986b112009-11-17 15:32:06 +0100875 raw_spin_unlock(&rt_rq->rt_runtime_lock);
Balbir Singh0c3b9162011-03-03 17:04:35 +0530876 } else if (rt_rq->rt_nr_running) {
Peter Zijlstra8a8cde12008-06-19 14:22:28 +0200877 idle = 0;
Balbir Singh0c3b9162011-03-03 17:04:35 +0530878 if (!rt_rq_throttled(rt_rq))
879 enqueue = 1;
880 }
Peter Zijlstra42c62a52011-10-18 22:03:48 +0200881 if (rt_rq->rt_throttled)
882 throttled = 1;
Peter Zijlstrad0b27fa2008-04-19 19:44:57 +0200883
884 if (enqueue)
885 sched_rt_rq_enqueue(rt_rq);
Thomas Gleixner05fa7852009-11-17 14:28:38 +0100886 raw_spin_unlock(&rq->lock);
Peter Zijlstrad0b27fa2008-04-19 19:44:57 +0200887 }
888
Peter Zijlstra42c62a52011-10-18 22:03:48 +0200889 if (!throttled && (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF))
890 return 1;
891
Peter Zijlstrad0b27fa2008-04-19 19:44:57 +0200892 return idle;
893}
894
Peter Zijlstra6f505b12008-01-25 21:08:30 +0100895static inline int rt_se_prio(struct sched_rt_entity *rt_se)
896{
Peter Zijlstra052f1dc2008-02-13 15:45:40 +0100897#ifdef CONFIG_RT_GROUP_SCHED
Peter Zijlstra6f505b12008-01-25 21:08:30 +0100898 struct rt_rq *rt_rq = group_rt_rq(rt_se);
899
900 if (rt_rq)
Gregory Haskinse864c492008-12-29 09:39:49 -0500901 return rt_rq->highest_prio.curr;
Peter Zijlstra6f505b12008-01-25 21:08:30 +0100902#endif
903
904 return rt_task_of(rt_se)->prio;
905}
906
Peter Zijlstra9f0c1e52008-02-13 15:45:39 +0100907static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq)
Peter Zijlstra6f505b12008-01-25 21:08:30 +0100908{
Peter Zijlstra9f0c1e52008-02-13 15:45:39 +0100909 u64 runtime = sched_rt_runtime(rt_rq);
Peter Zijlstrafa85ae22008-01-25 21:08:29 +0100910
Peter Zijlstrafa85ae22008-01-25 21:08:29 +0100911 if (rt_rq->rt_throttled)
Peter Zijlstra23b0fdf2008-02-13 15:45:39 +0100912 return rt_rq_throttled(rt_rq);
Peter Zijlstrafa85ae22008-01-25 21:08:29 +0100913
Shan Hai5b680fd2011-11-29 11:03:56 +0800914 if (runtime >= sched_rt_period(rt_rq))
Peter Zijlstraac086bc2008-04-19 19:44:58 +0200915 return 0;
916
Peter Zijlstrab79f3832008-06-19 14:22:25 +0200917 balance_runtime(rt_rq);
918 runtime = sched_rt_runtime(rt_rq);
919 if (runtime == RUNTIME_INF)
920 return 0;
Peter Zijlstraac086bc2008-04-19 19:44:58 +0200921
Peter Zijlstra9f0c1e52008-02-13 15:45:39 +0100922 if (rt_rq->rt_time > runtime) {
Peter Zijlstra7abc63b2011-10-18 22:03:48 +0200923 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
924
925 /*
926 * Don't actually throttle groups that have no runtime assigned
927 * but accrue some time due to boosting.
928 */
929 if (likely(rt_b->rt_runtime)) {
930 rt_rq->rt_throttled = 1;
John Stultzc2248152014-06-04 16:11:41 -0700931 printk_deferred_once("sched: RT throttling activated\n");
Peter Zijlstra7abc63b2011-10-18 22:03:48 +0200932 } else {
933 /*
934 * In case we did anyway, make it go away,
935 * replenishment is a joke, since it will replenish us
936 * with exactly 0 ns.
937 */
938 rt_rq->rt_time = 0;
939 }
940
Peter Zijlstra23b0fdf2008-02-13 15:45:39 +0100941 if (rt_rq_throttled(rt_rq)) {
Peter Zijlstra9f0c1e52008-02-13 15:45:39 +0100942 sched_rt_rq_dequeue(rt_rq);
Peter Zijlstra23b0fdf2008-02-13 15:45:39 +0100943 return 1;
944 }
Peter Zijlstrafa85ae22008-01-25 21:08:29 +0100945 }
946
947 return 0;
948}
949
Ingo Molnarbb44e5d2007-07-09 18:51:58 +0200950/*
951 * Update the current task's runtime statistics. Skip current tasks that
952 * are not in our scheduling class.
953 */
Alexey Dobriyana9957442007-10-15 17:00:13 +0200954static void update_curr_rt(struct rq *rq)
Ingo Molnarbb44e5d2007-07-09 18:51:58 +0200955{
956 struct task_struct *curr = rq->curr;
Peter Zijlstra6f505b12008-01-25 21:08:30 +0100957 struct sched_rt_entity *rt_se = &curr->rt;
Ingo Molnarbb44e5d2007-07-09 18:51:58 +0200958 u64 delta_exec;
Wen Yanga7711602018-02-06 09:53:28 +0800959 u64 now;
Ingo Molnarbb44e5d2007-07-09 18:51:58 +0200960
Peter Zijlstra06c3bc62011-02-02 13:19:48 +0100961 if (curr->sched_class != &rt_sched_class)
Ingo Molnarbb44e5d2007-07-09 18:51:58 +0200962 return;
963
Wen Yanga7711602018-02-06 09:53:28 +0800964 now = rq_clock_task(rq);
Wen Yange7ad2032018-02-05 11:18:41 +0800965 delta_exec = now - curr->se.exec_start;
Kirill Tkhaifc79e242013-01-30 16:50:36 +0400966 if (unlikely((s64)delta_exec <= 0))
967 return;
Ingo Molnar6cfb0d52007-08-02 17:41:40 +0200968
Peter Zijlstra42c62a52011-10-18 22:03:48 +0200969 schedstat_set(curr->se.statistics.exec_max,
970 max(curr->se.statistics.exec_max, delta_exec));
Ingo Molnarbb44e5d2007-07-09 18:51:58 +0200971
972 curr->se.sum_exec_runtime += delta_exec;
Frank Mayharf06febc2008-09-12 09:54:39 -0700973 account_group_exec_runtime(curr, delta_exec);
974
Wen Yange7ad2032018-02-05 11:18:41 +0800975 curr->se.exec_start = now;
Tejun Heod2cc5ed2017-09-25 08:12:04 -0700976 cgroup_account_cputime(curr, delta_exec);
Peter Zijlstrafa85ae22008-01-25 21:08:29 +0100977
Peter Zijlstra0b148fa2008-08-19 12:33:04 +0200978 if (!rt_bandwidth_enabled())
979 return;
980
Dhaval Giani354d60c2008-04-19 19:44:59 +0200981 for_each_sched_rt_entity(rt_se) {
Giedrius Rekasius0b079392014-05-25 15:23:31 +0100982 struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
Dhaval Giani354d60c2008-04-19 19:44:59 +0200983
Peter Zijlstracc2991c2008-08-19 12:33:03 +0200984 if (sched_rt_runtime(rt_rq) != RUNTIME_INF) {
Thomas Gleixner0986b112009-11-17 15:32:06 +0100985 raw_spin_lock(&rt_rq->rt_runtime_lock);
Peter Zijlstracc2991c2008-08-19 12:33:03 +0200986 rt_rq->rt_time += delta_exec;
987 if (sched_rt_runtime_exceeded(rt_rq))
Kirill Tkhai88751252014-06-29 00:03:57 +0400988 resched_curr(rq);
Thomas Gleixner0986b112009-11-17 15:32:06 +0100989 raw_spin_unlock(&rt_rq->rt_runtime_lock);
Peter Zijlstracc2991c2008-08-19 12:33:03 +0200990 }
Dhaval Giani354d60c2008-04-19 19:44:59 +0200991 }
Ingo Molnarbb44e5d2007-07-09 18:51:58 +0200992}
993
Kirill Tkhaif4ebcbc2014-03-15 02:15:00 +0400994static void
995dequeue_top_rt_rq(struct rt_rq *rt_rq)
996{
997 struct rq *rq = rq_of_rt_rq(rt_rq);
998
999 BUG_ON(&rq->rt != rt_rq);
1000
1001 if (!rt_rq->rt_queued)
1002 return;
1003
1004 BUG_ON(!rq->nr_running);
1005
Kirill Tkhai72465442014-05-09 03:00:14 +04001006 sub_nr_running(rq, rt_rq->rt_nr_running);
Kirill Tkhaif4ebcbc2014-03-15 02:15:00 +04001007 rt_rq->rt_queued = 0;
Peter Zijlstra8f111bc2017-12-20 16:26:12 +01001008
Kirill Tkhaif4ebcbc2014-03-15 02:15:00 +04001009}
1010
1011static void
1012enqueue_top_rt_rq(struct rt_rq *rt_rq)
1013{
1014 struct rq *rq = rq_of_rt_rq(rt_rq);
1015
1016 BUG_ON(&rq->rt != rt_rq);
1017
1018 if (rt_rq->rt_queued)
1019 return;
Vincent Guittot296b2ff2018-06-26 15:53:22 +02001020
1021 if (rt_rq_throttled(rt_rq))
Kirill Tkhaif4ebcbc2014-03-15 02:15:00 +04001022 return;
1023
Vincent Guittot296b2ff2018-06-26 15:53:22 +02001024 if (rt_rq->rt_nr_running) {
1025 add_nr_running(rq, rt_rq->rt_nr_running);
1026 rt_rq->rt_queued = 1;
1027 }
Peter Zijlstra8f111bc2017-12-20 16:26:12 +01001028
1029 /* Kick cpufreq (see the comment in kernel/sched/sched.h). */
1030 cpufreq_update_util(rq, 0);
Kirill Tkhaif4ebcbc2014-03-15 02:15:00 +04001031}
1032
Gregory Haskins398a1532009-01-14 09:10:04 -05001033#if defined CONFIG_SMP
Gregory Haskinse864c492008-12-29 09:39:49 -05001034
Gregory Haskins398a1532009-01-14 09:10:04 -05001035static void
1036inc_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio)
Steven Rostedt63489e42008-01-25 21:08:03 +01001037{
Gregory Haskins4d984272008-12-29 09:39:49 -05001038 struct rq *rq = rq_of_rt_rq(rt_rq);
Gregory Haskins4d984272008-12-29 09:39:49 -05001039
Kirill Tkhai757dfca2013-11-27 19:59:13 +04001040#ifdef CONFIG_RT_GROUP_SCHED
1041 /*
1042 * Change rq's cpupri only if rt_rq is the top queue.
1043 */
1044 if (&rq->rt != rt_rq)
1045 return;
1046#endif
Steven Rostedt5181f4a42011-06-16 21:55:23 -04001047 if (rq->online && prio < prev_prio)
1048 cpupri_set(&rq->rd->cpupri, rq->cpu, prio);
Steven Rostedt63489e42008-01-25 21:08:03 +01001049}
Peter Zijlstra6f505b12008-01-25 21:08:30 +01001050
Gregory Haskins398a1532009-01-14 09:10:04 -05001051static void
1052dec_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio)
Steven Rostedt63489e42008-01-25 21:08:03 +01001053{
Gregory Haskins4d984272008-12-29 09:39:49 -05001054 struct rq *rq = rq_of_rt_rq(rt_rq);
Gregory Haskins6e0534f2008-05-12 21:21:01 +02001055
Kirill Tkhai757dfca2013-11-27 19:59:13 +04001056#ifdef CONFIG_RT_GROUP_SCHED
1057 /*
1058 * Change rq's cpupri only if rt_rq is the top queue.
1059 */
1060 if (&rq->rt != rt_rq)
1061 return;
1062#endif
Gregory Haskins398a1532009-01-14 09:10:04 -05001063 if (rq->online && rt_rq->highest_prio.curr != prev_prio)
1064 cpupri_set(&rq->rd->cpupri, rq->cpu, rt_rq->highest_prio.curr);
1065}
1066
1067#else /* CONFIG_SMP */
1068
1069static inline
1070void inc_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) {}
1071static inline
1072void dec_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) {}
1073
1074#endif /* CONFIG_SMP */
1075
Steven Rostedt63489e42008-01-25 21:08:03 +01001076#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
Gregory Haskins398a1532009-01-14 09:10:04 -05001077static void
1078inc_rt_prio(struct rt_rq *rt_rq, int prio)
1079{
1080 int prev_prio = rt_rq->highest_prio.curr;
Steven Rostedt63489e42008-01-25 21:08:03 +01001081
Gregory Haskins398a1532009-01-14 09:10:04 -05001082 if (prio < prev_prio)
1083 rt_rq->highest_prio.curr = prio;
1084
1085 inc_rt_prio_smp(rt_rq, prio, prev_prio);
1086}
1087
1088static void
1089dec_rt_prio(struct rt_rq *rt_rq, int prio)
1090{
1091 int prev_prio = rt_rq->highest_prio.curr;
1092
1093 if (rt_rq->rt_nr_running) {
1094
1095 WARN_ON(prio < prev_prio);
Gregory Haskinse864c492008-12-29 09:39:49 -05001096
1097 /*
Gregory Haskins398a1532009-01-14 09:10:04 -05001098 * This may have been our highest task, and therefore
1099 * we may have some recomputation to do
Gregory Haskinse864c492008-12-29 09:39:49 -05001100 */
Gregory Haskins398a1532009-01-14 09:10:04 -05001101 if (prio == prev_prio) {
Gregory Haskinse864c492008-12-29 09:39:49 -05001102 struct rt_prio_array *array = &rt_rq->active;
1103
1104 rt_rq->highest_prio.curr =
Steven Rostedt764a9d62008-01-25 21:08:04 +01001105 sched_find_first_bit(array->bitmap);
Gregory Haskinse864c492008-12-29 09:39:49 -05001106 }
1107
Steven Rostedt764a9d62008-01-25 21:08:04 +01001108 } else
Gregory Haskinse864c492008-12-29 09:39:49 -05001109 rt_rq->highest_prio.curr = MAX_RT_PRIO;
Gregory Haskins73fe6aa2008-01-25 21:08:07 +01001110
Gregory Haskins398a1532009-01-14 09:10:04 -05001111 dec_rt_prio_smp(rt_rq, prio, prev_prio);
1112}
Gregory Haskins1f11eb6a2008-06-04 15:04:05 -04001113
Gregory Haskins398a1532009-01-14 09:10:04 -05001114#else
1115
1116static inline void inc_rt_prio(struct rt_rq *rt_rq, int prio) {}
1117static inline void dec_rt_prio(struct rt_rq *rt_rq, int prio) {}
1118
1119#endif /* CONFIG_SMP || CONFIG_RT_GROUP_SCHED */
1120
Gregory Haskins73fe6aa2008-01-25 21:08:07 +01001121#ifdef CONFIG_RT_GROUP_SCHED
Gregory Haskins398a1532009-01-14 09:10:04 -05001122
1123static void
1124inc_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
1125{
Gregory Haskins73fe6aa2008-01-25 21:08:07 +01001126 if (rt_se_boosted(rt_se))
Steven Rostedt764a9d62008-01-25 21:08:04 +01001127 rt_rq->rt_nr_boosted++;
Peter Zijlstra052f1dc2008-02-13 15:45:40 +01001128
Peter Zijlstra23b0fdf2008-02-13 15:45:39 +01001129 if (rt_rq->tg)
1130 start_rt_bandwidth(&rt_rq->tg->rt_bandwidth);
Gregory Haskins398a1532009-01-14 09:10:04 -05001131}
1132
1133static void
1134dec_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
1135{
Peter Zijlstra23b0fdf2008-02-13 15:45:39 +01001136 if (rt_se_boosted(rt_se))
1137 rt_rq->rt_nr_boosted--;
1138
1139 WARN_ON(!rt_rq->rt_nr_running && rt_rq->rt_nr_boosted);
Gregory Haskins398a1532009-01-14 09:10:04 -05001140}
1141
1142#else /* CONFIG_RT_GROUP_SCHED */
1143
1144static void
1145inc_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
1146{
Ingo Molnarbb44e5d2007-07-09 18:51:58 +02001147 start_rt_bandwidth(&def_rt_bandwidth);
Gregory Haskins398a1532009-01-14 09:10:04 -05001148}
1149
1150static inline
1151void dec_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) {}
1152
1153#endif /* CONFIG_RT_GROUP_SCHED */
1154
1155static inline
Kirill Tkhai22abdef2014-03-15 02:14:49 +04001156unsigned int rt_se_nr_running(struct sched_rt_entity *rt_se)
1157{
1158 struct rt_rq *group_rq = group_rt_rq(rt_se);
1159
1160 if (group_rq)
1161 return group_rq->rt_nr_running;
1162 else
1163 return 1;
1164}
1165
1166static inline
Frederic Weisbecker01d36d02015-11-04 18:17:10 +01001167unsigned int rt_se_rr_nr_running(struct sched_rt_entity *rt_se)
1168{
1169 struct rt_rq *group_rq = group_rt_rq(rt_se);
1170 struct task_struct *tsk;
1171
1172 if (group_rq)
1173 return group_rq->rr_nr_running;
1174
1175 tsk = rt_task_of(rt_se);
1176
1177 return (tsk->policy == SCHED_RR) ? 1 : 0;
1178}
1179
1180static inline
Gregory Haskins398a1532009-01-14 09:10:04 -05001181void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
1182{
1183 int prio = rt_se_prio(rt_se);
1184
1185 WARN_ON(!rt_prio(prio));
Kirill Tkhai22abdef2014-03-15 02:14:49 +04001186 rt_rq->rt_nr_running += rt_se_nr_running(rt_se);
Frederic Weisbecker01d36d02015-11-04 18:17:10 +01001187 rt_rq->rr_nr_running += rt_se_rr_nr_running(rt_se);
Gregory Haskins398a1532009-01-14 09:10:04 -05001188
1189 inc_rt_prio(rt_rq, prio);
1190 inc_rt_migration(rt_se, rt_rq);
1191 inc_rt_group(rt_se, rt_rq);
Ingo Molnarbb44e5d2007-07-09 18:51:58 +02001192}
1193
Peter Zijlstra23b0fdf2008-02-13 15:45:39 +01001194static inline
1195void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
1196{
Ingo Molnarbb44e5d2007-07-09 18:51:58 +02001197 WARN_ON(!rt_prio(rt_se_prio(rt_se)));
Peter Zijlstra6f505b12008-01-25 21:08:30 +01001198 WARN_ON(!rt_rq->rt_nr_running);
Kirill Tkhai22abdef2014-03-15 02:14:49 +04001199 rt_rq->rt_nr_running -= rt_se_nr_running(rt_se);
Frederic Weisbecker01d36d02015-11-04 18:17:10 +01001200 rt_rq->rr_nr_running -= rt_se_rr_nr_running(rt_se);
Ingo Molnarbb44e5d2007-07-09 18:51:58 +02001201
Gregory Haskins398a1532009-01-14 09:10:04 -05001202 dec_rt_prio(rt_rq, rt_se_prio(rt_se));
1203 dec_rt_migration(rt_se, rt_rq);
1204 dec_rt_group(rt_se, rt_rq);
Ingo Molnarbb44e5d2007-07-09 18:51:58 +02001205}
1206
Peter Zijlstraff77e462016-01-18 15:27:07 +01001207/*
1208 * Change rt_se->run_list location unless SAVE && !MOVE
1209 *
1210 * assumes ENQUEUE/DEQUEUE flags match
1211 */
1212static inline bool move_entity(unsigned int flags)
1213{
1214 if ((flags & (DEQUEUE_SAVE | DEQUEUE_MOVE)) == DEQUEUE_SAVE)
1215 return false;
1216
1217 return true;
1218}
1219
1220static void __delist_rt_entity(struct sched_rt_entity *rt_se, struct rt_prio_array *array)
1221{
1222 list_del_init(&rt_se->run_list);
1223
1224 if (list_empty(array->queue + rt_se_prio(rt_se)))
1225 __clear_bit(rt_se_prio(rt_se), array->bitmap);
1226
1227 rt_se->on_list = 0;
1228}
1229
1230static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
Ingo Molnarbb44e5d2007-07-09 18:51:58 +02001231{
Peter Zijlstra6f505b12008-01-25 21:08:30 +01001232 struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
1233 struct rt_prio_array *array = &rt_rq->active;
1234 struct rt_rq *group_rq = group_rt_rq(rt_se);
Dmitry Adamushko20b63312008-06-11 00:58:30 +02001235 struct list_head *queue = array->queue + rt_se_prio(rt_se);
Ingo Molnarbb44e5d2007-07-09 18:51:58 +02001236
Peter Zijlstraad2a3f12008-06-19 09:06:57 +02001237 /*
1238 * Don't enqueue the group if its throttled, or when empty.
1239 * The latter is a consequence of the former when a child group
1240 * get throttled and the current group doesn't have any other
1241 * active members.
1242 */
Peter Zijlstraff77e462016-01-18 15:27:07 +01001243 if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running)) {
1244 if (rt_se->on_list)
1245 __delist_rt_entity(rt_se, array);
Peter Zijlstra6f505b12008-01-25 21:08:30 +01001246 return;
Peter Zijlstraff77e462016-01-18 15:27:07 +01001247 }
Ingo Molnarbb44e5d2007-07-09 18:51:58 +02001248
Peter Zijlstraff77e462016-01-18 15:27:07 +01001249 if (move_entity(flags)) {
1250 WARN_ON_ONCE(rt_se->on_list);
1251 if (flags & ENQUEUE_HEAD)
1252 list_add(&rt_se->run_list, queue);
1253 else
1254 list_add_tail(&rt_se->run_list, queue);
1255
1256 __set_bit(rt_se_prio(rt_se), array->bitmap);
1257 rt_se->on_list = 1;
1258 }
1259 rt_se->on_rq = 1;
Peter Zijlstra78f2c7d2008-01-25 21:08:27 +01001260
Peter Zijlstra6f505b12008-01-25 21:08:30 +01001261 inc_rt_tasks(rt_se, rt_rq);
1262}
1263
Peter Zijlstraff77e462016-01-18 15:27:07 +01001264static void __dequeue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
Peter Zijlstra6f505b12008-01-25 21:08:30 +01001265{
1266 struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
1267 struct rt_prio_array *array = &rt_rq->active;
1268
Peter Zijlstraff77e462016-01-18 15:27:07 +01001269 if (move_entity(flags)) {
1270 WARN_ON_ONCE(!rt_se->on_list);
1271 __delist_rt_entity(rt_se, array);
1272 }
1273 rt_se->on_rq = 0;
Peter Zijlstra6f505b12008-01-25 21:08:30 +01001274
1275 dec_rt_tasks(rt_se, rt_rq);
1276}
1277
1278/*
1279 * Because the prio of an upper entry depends on the lower
1280 * entries, we must remove entries top - down.
Peter Zijlstra6f505b12008-01-25 21:08:30 +01001281 */
Peter Zijlstraff77e462016-01-18 15:27:07 +01001282static void dequeue_rt_stack(struct sched_rt_entity *rt_se, unsigned int flags)
Peter Zijlstra6f505b12008-01-25 21:08:30 +01001283{
Peter Zijlstraad2a3f12008-06-19 09:06:57 +02001284 struct sched_rt_entity *back = NULL;
Peter Zijlstra6f505b12008-01-25 21:08:30 +01001285
Peter Zijlstra58d6c2d2008-04-19 19:45:00 +02001286 for_each_sched_rt_entity(rt_se) {
1287 rt_se->back = back;
1288 back = rt_se;
1289 }
1290
Kirill Tkhaif4ebcbc2014-03-15 02:15:00 +04001291 dequeue_top_rt_rq(rt_rq_of_se(back));
1292
Peter Zijlstra58d6c2d2008-04-19 19:45:00 +02001293 for (rt_se = back; rt_se; rt_se = rt_se->back) {
1294 if (on_rt_rq(rt_se))
Peter Zijlstraff77e462016-01-18 15:27:07 +01001295 __dequeue_rt_entity(rt_se, flags);
Peter Zijlstraad2a3f12008-06-19 09:06:57 +02001296 }
1297}
1298
Peter Zijlstraff77e462016-01-18 15:27:07 +01001299static void enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
Peter Zijlstraad2a3f12008-06-19 09:06:57 +02001300{
Kirill Tkhaif4ebcbc2014-03-15 02:15:00 +04001301 struct rq *rq = rq_of_rt_se(rt_se);
1302
Peter Zijlstraff77e462016-01-18 15:27:07 +01001303 dequeue_rt_stack(rt_se, flags);
Peter Zijlstraad2a3f12008-06-19 09:06:57 +02001304 for_each_sched_rt_entity(rt_se)
Peter Zijlstraff77e462016-01-18 15:27:07 +01001305 __enqueue_rt_entity(rt_se, flags);
Kirill Tkhaif4ebcbc2014-03-15 02:15:00 +04001306 enqueue_top_rt_rq(&rq->rt);
Peter Zijlstraad2a3f12008-06-19 09:06:57 +02001307}
1308
Peter Zijlstraff77e462016-01-18 15:27:07 +01001309static void dequeue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
Peter Zijlstraad2a3f12008-06-19 09:06:57 +02001310{
Kirill Tkhaif4ebcbc2014-03-15 02:15:00 +04001311 struct rq *rq = rq_of_rt_se(rt_se);
1312
Peter Zijlstraff77e462016-01-18 15:27:07 +01001313 dequeue_rt_stack(rt_se, flags);
Peter Zijlstraad2a3f12008-06-19 09:06:57 +02001314
1315 for_each_sched_rt_entity(rt_se) {
1316 struct rt_rq *rt_rq = group_rt_rq(rt_se);
1317
1318 if (rt_rq && rt_rq->rt_nr_running)
Peter Zijlstraff77e462016-01-18 15:27:07 +01001319 __enqueue_rt_entity(rt_se, flags);
Peter Zijlstra58d6c2d2008-04-19 19:45:00 +02001320 }
Kirill Tkhaif4ebcbc2014-03-15 02:15:00 +04001321 enqueue_top_rt_rq(&rq->rt);
Ingo Molnarbb44e5d2007-07-09 18:51:58 +02001322}
1323
1324/*
1325 * Adding/removing a task to/from a priority array:
1326 */
Thomas Gleixnerea87bb72010-01-20 20:58:57 +00001327static void
Peter Zijlstra371fd7e2010-03-24 16:38:48 +01001328enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags)
Peter Zijlstra6f505b12008-01-25 21:08:30 +01001329{
1330 struct sched_rt_entity *rt_se = &p->rt;
1331
Peter Zijlstra371fd7e2010-03-24 16:38:48 +01001332 if (flags & ENQUEUE_WAKEUP)
Peter Zijlstra6f505b12008-01-25 21:08:30 +01001333 rt_se->timeout = 0;
1334
Peter Zijlstraff77e462016-01-18 15:27:07 +01001335 enqueue_rt_entity(rt_se, flags);
Peter Zijlstrac09595f2008-06-27 13:41:14 +02001336
Ingo Molnar4b53a342017-02-05 15:41:03 +01001337 if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
Gregory Haskins917b6272008-12-29 09:39:53 -05001338 enqueue_pushable_task(rq, p);
Peter Zijlstra6f505b12008-01-25 21:08:30 +01001339}
1340
Peter Zijlstra371fd7e2010-03-24 16:38:48 +01001341static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags)
Ingo Molnarbb44e5d2007-07-09 18:51:58 +02001342{
Peter Zijlstra6f505b12008-01-25 21:08:30 +01001343 struct sched_rt_entity *rt_se = &p->rt;
Ingo Molnarbb44e5d2007-07-09 18:51:58 +02001344
1345 update_curr_rt(rq);
Peter Zijlstraff77e462016-01-18 15:27:07 +01001346 dequeue_rt_entity(rt_se, flags);
Peter Zijlstrac09595f2008-06-27 13:41:14 +02001347
Gregory Haskins917b6272008-12-29 09:39:53 -05001348 dequeue_pushable_task(rq, p);
Ingo Molnarbb44e5d2007-07-09 18:51:58 +02001349}
1350
1351/*
Richard Weinberger60686312011-11-12 18:07:57 +01001352 * Put task to the head or the end of the run list without the overhead of
1353 * dequeue followed by enqueue.
Ingo Molnarbb44e5d2007-07-09 18:51:58 +02001354 */
Dmitry Adamushko7ebefa82008-07-01 23:32:15 +02001355static void
1356requeue_rt_entity(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se, int head)
Ingo Molnarbb44e5d2007-07-09 18:51:58 +02001357{
Ingo Molnar1cdad712008-06-19 09:09:15 +02001358 if (on_rt_rq(rt_se)) {
Dmitry Adamushko7ebefa82008-07-01 23:32:15 +02001359 struct rt_prio_array *array = &rt_rq->active;
1360 struct list_head *queue = array->queue + rt_se_prio(rt_se);
1361
1362 if (head)
1363 list_move(&rt_se->run_list, queue);
1364 else
1365 list_move_tail(&rt_se->run_list, queue);
Ingo Molnar1cdad712008-06-19 09:09:15 +02001366 }
Ingo Molnarbb44e5d2007-07-09 18:51:58 +02001367}
1368
Dmitry Adamushko7ebefa82008-07-01 23:32:15 +02001369static void requeue_task_rt(struct rq *rq, struct task_struct *p, int head)
Peter Zijlstra6f505b12008-01-25 21:08:30 +01001370{
1371 struct sched_rt_entity *rt_se = &p->rt;
1372 struct rt_rq *rt_rq;
1373
1374 for_each_sched_rt_entity(rt_se) {
1375 rt_rq = rt_rq_of_se(rt_se);
Dmitry Adamushko7ebefa82008-07-01 23:32:15 +02001376 requeue_rt_entity(rt_rq, rt_se, head);
Peter Zijlstra6f505b12008-01-25 21:08:30 +01001377 }
1378}
1379
1380static void yield_task_rt(struct rq *rq)
Ingo Molnarbb44e5d2007-07-09 18:51:58 +02001381{
Dmitry Adamushko7ebefa82008-07-01 23:32:15 +02001382 requeue_task_rt(rq, rq->curr, 0);
Ingo Molnarbb44e5d2007-07-09 18:51:58 +02001383}
1384
Gregory Haskinse7693a32008-01-25 21:08:09 +01001385#ifdef CONFIG_SMP
Gregory Haskins318e0892008-01-25 21:08:10 +01001386static int find_lowest_rq(struct task_struct *task);
1387
Peter Zijlstra0017d732010-03-24 18:34:10 +01001388static int
Peter Zijlstraac66f542013-10-07 11:29:16 +01001389select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags)
Gregory Haskinse7693a32008-01-25 21:08:09 +01001390{
Peter Zijlstra7608dec2011-04-05 17:23:46 +02001391 struct task_struct *curr;
1392 struct rq *rq;
Steven Rostedtc37495f2011-06-16 21:55:22 -04001393
1394 /* For anything but wake ups, just return the task_cpu */
1395 if (sd_flag != SD_BALANCE_WAKE && sd_flag != SD_BALANCE_FORK)
1396 goto out;
1397
Peter Zijlstra7608dec2011-04-05 17:23:46 +02001398 rq = cpu_rq(cpu);
1399
1400 rcu_read_lock();
Jason Low316c1608d2015-04-28 13:00:20 -07001401 curr = READ_ONCE(rq->curr); /* unlocked access */
Peter Zijlstra7608dec2011-04-05 17:23:46 +02001402
Gregory Haskins318e0892008-01-25 21:08:10 +01001403 /*
Peter Zijlstra7608dec2011-04-05 17:23:46 +02001404 * If the current task on @p's runqueue is an RT task, then
Steven Rostedte1f47d82008-01-25 21:08:12 +01001405 * try to see if we can wake this RT task up on another
1406 * runqueue. Otherwise simply start this RT task
1407 * on its current runqueue.
1408 *
Steven Rostedt43fa5462010-09-20 22:40:03 -04001409 * We want to avoid overloading runqueues. If the woken
1410 * task is a higher priority, then it will stay on this CPU
1411 * and the lower prio task should be moved to another CPU.
1412 * Even though this will probably make the lower prio task
1413 * lose its cache, we do not want to bounce a higher task
1414 * around just because it gave up its CPU, perhaps for a
1415 * lock?
1416 *
1417 * For equal prio tasks, we just let the scheduler sort it out.
Peter Zijlstra7608dec2011-04-05 17:23:46 +02001418 *
Gregory Haskins318e0892008-01-25 21:08:10 +01001419 * Otherwise, just let it ride on the affined RQ and the
1420 * post-schedule router will push the preempted task away
Peter Zijlstra7608dec2011-04-05 17:23:46 +02001421 *
1422 * This test is optimistic, if we get it wrong the load-balancer
1423 * will have to sort it out.
Gregory Haskins318e0892008-01-25 21:08:10 +01001424 */
Peter Zijlstra7608dec2011-04-05 17:23:46 +02001425 if (curr && unlikely(rt_task(curr)) &&
Ingo Molnar4b53a342017-02-05 15:41:03 +01001426 (curr->nr_cpus_allowed < 2 ||
Shawn Bohrer6bfa6872013-10-04 14:24:53 -05001427 curr->prio <= p->prio)) {
Peter Zijlstra7608dec2011-04-05 17:23:46 +02001428 int target = find_lowest_rq(p);
1429
Tim Chen80e3d872014-12-12 15:38:12 -08001430 /*
1431 * Don't bother moving it if the destination CPU is
1432 * not running a lower priority task.
1433 */
1434 if (target != -1 &&
1435 p->prio < cpu_rq(target)->rt.highest_prio.curr)
Peter Zijlstra7608dec2011-04-05 17:23:46 +02001436 cpu = target;
1437 }
1438 rcu_read_unlock();
1439
Steven Rostedtc37495f2011-06-16 21:55:22 -04001440out:
Peter Zijlstra7608dec2011-04-05 17:23:46 +02001441 return cpu;
Gregory Haskinse7693a32008-01-25 21:08:09 +01001442}
Dmitry Adamushko7ebefa82008-07-01 23:32:15 +02001443
1444static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
1445{
Wanpeng Li308a6232014-10-31 06:39:31 +08001446 /*
1447 * Current can't be migrated, useless to reschedule,
1448 * let's hope p can move out.
1449 */
Ingo Molnar4b53a342017-02-05 15:41:03 +01001450 if (rq->curr->nr_cpus_allowed == 1 ||
Wanpeng Li308a6232014-10-31 06:39:31 +08001451 !cpupri_find(&rq->rd->cpupri, rq->curr, NULL))
Dmitry Adamushko7ebefa82008-07-01 23:32:15 +02001452 return;
1453
Wanpeng Li308a6232014-10-31 06:39:31 +08001454 /*
1455 * p is migratable, so let's not schedule it and
1456 * see if it is pushed or pulled somewhere else.
1457 */
Ingo Molnar4b53a342017-02-05 15:41:03 +01001458 if (p->nr_cpus_allowed != 1
Rusty Russell13b8bd02009-03-25 15:01:22 +10301459 && cpupri_find(&rq->rd->cpupri, p, NULL))
Dmitry Adamushko7ebefa82008-07-01 23:32:15 +02001460 return;
1461
Dmitry Adamushko7ebefa82008-07-01 23:32:15 +02001462 /*
Ingo Molnar97fb7a02018-03-03 14:01:12 +01001463 * There appear to be other CPUs that can accept
1464 * the current task but none can run 'p', so lets reschedule
1465 * to try and push the current task away:
Dmitry Adamushko7ebefa82008-07-01 23:32:15 +02001466 */
1467 requeue_task_rt(rq, p, 1);
Kirill Tkhai88751252014-06-29 00:03:57 +04001468 resched_curr(rq);
Dmitry Adamushko7ebefa82008-07-01 23:32:15 +02001469}
1470
Gregory Haskinse7693a32008-01-25 21:08:09 +01001471#endif /* CONFIG_SMP */
1472
Ingo Molnarbb44e5d2007-07-09 18:51:58 +02001473/*
1474 * Preempt the current task with a newly woken task if needed:
1475 */
Peter Zijlstra7d478722009-09-14 19:55:44 +02001476static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int flags)
Ingo Molnarbb44e5d2007-07-09 18:51:58 +02001477{
Gregory Haskins45c01e82008-05-12 21:20:41 +02001478 if (p->prio < rq->curr->prio) {
Kirill Tkhai88751252014-06-29 00:03:57 +04001479 resched_curr(rq);
Gregory Haskins45c01e82008-05-12 21:20:41 +02001480 return;
1481 }
1482
1483#ifdef CONFIG_SMP
1484 /*
1485 * If:
1486 *
1487 * - the newly woken task is of equal priority to the current task
1488 * - the newly woken task is non-migratable while current is migratable
1489 * - current will be preempted on the next reschedule
1490 *
1491 * we should check to see if current can readily move to a different
1492 * cpu. If so, we will reschedule to allow the push logic to try
1493 * to move current somewhere else, making room for our non-migratable
1494 * task.
1495 */
Hillf Danton8dd0de82011-06-14 18:36:24 -04001496 if (p->prio == rq->curr->prio && !test_tsk_need_resched(rq->curr))
Dmitry Adamushko7ebefa82008-07-01 23:32:15 +02001497 check_preempt_equal_prio(rq, p);
Gregory Haskins45c01e82008-05-12 21:20:41 +02001498#endif
Ingo Molnarbb44e5d2007-07-09 18:51:58 +02001499}
1500
Peter Zijlstraf95d4ea2019-05-29 20:36:40 +00001501static inline void set_next_task_rt(struct rq *rq, struct task_struct *p)
Muchun Songff1cdc92018-10-26 21:17:43 +08001502{
1503 p->se.exec_start = rq_clock_task(rq);
1504
1505 /* The running task is never eligible for pushing */
1506 dequeue_pushable_task(rq, p);
Peter Zijlstraf95d4ea2019-05-29 20:36:40 +00001507
1508 /*
1509 * If prev task was rt, put_prev_task() has already updated the
1510 * utilization. We only care of the case where we start to schedule a
1511 * rt task
1512 */
1513 if (rq->curr->sched_class != &rt_sched_class)
1514 update_rt_rq_load_avg(rq_clock_pelt(rq), rq, 0);
1515
1516 rt_queue_push_tasks(rq);
Muchun Songff1cdc92018-10-26 21:17:43 +08001517}
1518
Peter Zijlstra6f505b12008-01-25 21:08:30 +01001519static struct sched_rt_entity *pick_next_rt_entity(struct rq *rq,
1520 struct rt_rq *rt_rq)
Ingo Molnarbb44e5d2007-07-09 18:51:58 +02001521{
Peter Zijlstra6f505b12008-01-25 21:08:30 +01001522 struct rt_prio_array *array = &rt_rq->active;
1523 struct sched_rt_entity *next = NULL;
Ingo Molnarbb44e5d2007-07-09 18:51:58 +02001524 struct list_head *queue;
1525 int idx;
1526
1527 idx = sched_find_first_bit(array->bitmap);
Peter Zijlstra6f505b12008-01-25 21:08:30 +01001528 BUG_ON(idx >= MAX_RT_PRIO);
Ingo Molnarbb44e5d2007-07-09 18:51:58 +02001529
1530 queue = array->queue + idx;
Peter Zijlstra6f505b12008-01-25 21:08:30 +01001531 next = list_entry(queue->next, struct sched_rt_entity, run_list);
Dmitry Adamushko326587b2008-01-25 21:08:34 +01001532
Ingo Molnarbb44e5d2007-07-09 18:51:58 +02001533 return next;
1534}
1535
Gregory Haskins917b6272008-12-29 09:39:53 -05001536static struct task_struct *_pick_next_task_rt(struct rq *rq)
Peter Zijlstra6f505b12008-01-25 21:08:30 +01001537{
1538 struct sched_rt_entity *rt_se;
Peter Zijlstra606dba22012-02-11 06:05:00 +01001539 struct rt_rq *rt_rq = &rq->rt;
Peter Zijlstra6f505b12008-01-25 21:08:30 +01001540
1541 do {
1542 rt_se = pick_next_rt_entity(rq, rt_rq);
Dmitry Adamushko326587b2008-01-25 21:08:34 +01001543 BUG_ON(!rt_se);
Peter Zijlstra6f505b12008-01-25 21:08:30 +01001544 rt_rq = group_rt_rq(rt_se);
1545 } while (rt_rq);
1546
Muchun Songff1cdc92018-10-26 21:17:43 +08001547 return rt_task_of(rt_se);
Gregory Haskins917b6272008-12-29 09:39:53 -05001548}
1549
Peter Zijlstra606dba22012-02-11 06:05:00 +01001550static struct task_struct *
Matt Flemingd8ac8972016-09-21 14:38:10 +01001551pick_next_task_rt(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
Gregory Haskins917b6272008-12-29 09:39:53 -05001552{
Peter Zijlstra606dba22012-02-11 06:05:00 +01001553 struct task_struct *p;
1554 struct rt_rq *rt_rq = &rq->rt;
1555
Peter Zijlstra37e117c2014-02-14 12:25:08 +01001556 if (need_pull_rt_task(rq, prev)) {
Peter Zijlstracbce1a62015-06-11 14:46:54 +02001557 /*
1558 * This is OK, because current is on_cpu, which avoids it being
1559 * picked for load-balance and preemption/IRQs are still
1560 * disabled avoiding further scheduler activity on it and we're
1561 * being very careful to re-start the picking loop.
1562 */
Matt Flemingd8ac8972016-09-21 14:38:10 +01001563 rq_unpin_lock(rq, rf);
Peter Zijlstra38033c32014-01-23 20:32:21 +01001564 pull_rt_task(rq);
Matt Flemingd8ac8972016-09-21 14:38:10 +01001565 rq_repin_lock(rq, rf);
Peter Zijlstra37e117c2014-02-14 12:25:08 +01001566 /*
1567 * pull_rt_task() can drop (and re-acquire) rq->lock; this
Kirill Tkhaia1d9a322014-04-10 17:38:36 +04001568 * means a dl or stop task can slip in, in which case we need
1569 * to re-start task selection.
Peter Zijlstra37e117c2014-02-14 12:25:08 +01001570 */
Kirill Tkhaida0c1e62014-08-20 13:47:32 +04001571 if (unlikely((rq->stop && task_on_rq_queued(rq->stop)) ||
Kirill Tkhaia1d9a322014-04-10 17:38:36 +04001572 rq->dl.dl_nr_running))
Peter Zijlstra37e117c2014-02-14 12:25:08 +01001573 return RETRY_TASK;
1574 }
Peter Zijlstra38033c32014-01-23 20:32:21 +01001575
Kirill Tkhai734ff2a2014-03-04 19:25:46 +04001576 /*
1577 * We may dequeue prev's rt_rq in put_prev_task().
Muchun Songa68d7502018-10-27 11:05:17 +08001578 * So, we update time before rt_queued check.
Kirill Tkhai734ff2a2014-03-04 19:25:46 +04001579 */
1580 if (prev->sched_class == &rt_sched_class)
1581 update_curr_rt(rq);
1582
Kirill Tkhaif4ebcbc2014-03-15 02:15:00 +04001583 if (!rt_rq->rt_queued)
Peter Zijlstra606dba22012-02-11 06:05:00 +01001584 return NULL;
1585
Peter Zijlstra3f1d2a32014-02-12 10:49:30 +01001586 put_prev_task(rq, prev);
Peter Zijlstra606dba22012-02-11 06:05:00 +01001587
1588 p = _pick_next_task_rt(rq);
Gregory Haskins917b6272008-12-29 09:39:53 -05001589
Peter Zijlstraf95d4ea2019-05-29 20:36:40 +00001590 set_next_task_rt(rq, p);
Vincent Guittot371bf422018-06-28 17:45:05 +02001591
Peter Zijlstra6f505b12008-01-25 21:08:30 +01001592 return p;
1593}
1594
Ingo Molnar31ee5292007-08-09 11:16:49 +02001595static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
Ingo Molnarbb44e5d2007-07-09 18:51:58 +02001596{
Ingo Molnarf1e14ef2007-08-09 11:16:48 +02001597 update_curr_rt(rq);
Gregory Haskins917b6272008-12-29 09:39:53 -05001598
Vincent Guittot23127292019-01-23 16:26:53 +01001599 update_rt_rq_load_avg(rq_clock_pelt(rq), rq, 1);
Vincent Guittot371bf422018-06-28 17:45:05 +02001600
Gregory Haskins917b6272008-12-29 09:39:53 -05001601 /*
1602 * The previous task needs to be made eligible for pushing
1603 * if it is still active
1604 */
Ingo Molnar4b53a342017-02-05 15:41:03 +01001605 if (on_rt_rq(&p->rt) && p->nr_cpus_allowed > 1)
Gregory Haskins917b6272008-12-29 09:39:53 -05001606 enqueue_pushable_task(rq, p);
Ingo Molnarbb44e5d2007-07-09 18:51:58 +02001607}
1608
Peter Williams681f3e62007-10-24 18:23:51 +02001609#ifdef CONFIG_SMP
Peter Zijlstra6f505b12008-01-25 21:08:30 +01001610
Steven Rostedte8fa1362008-01-25 21:08:05 +01001611/* Only try algorithms three times */
1612#define RT_MAX_TRIES 3
1613
Steven Rostedtf65eda42008-01-25 21:08:07 +01001614static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
1615{
1616 if (!task_running(rq, p) &&
Sebastian Andrzej Siewior3bd37062019-04-23 16:26:36 +02001617 cpumask_test_cpu(cpu, p->cpus_ptr))
Steven Rostedtf65eda42008-01-25 21:08:07 +01001618 return 1;
Ingo Molnar97fb7a02018-03-03 14:01:12 +01001619
Steven Rostedtf65eda42008-01-25 21:08:07 +01001620 return 0;
1621}
1622
Kirill Tkhaie23ee742013-06-07 15:37:43 -04001623/*
1624 * Return the highest pushable rq's task, which is suitable to be executed
Ingo Molnar97fb7a02018-03-03 14:01:12 +01001625 * on the CPU, NULL otherwise
Kirill Tkhaie23ee742013-06-07 15:37:43 -04001626 */
1627static struct task_struct *pick_highest_pushable_task(struct rq *rq, int cpu)
Steven Rostedte8fa1362008-01-25 21:08:05 +01001628{
Kirill Tkhaie23ee742013-06-07 15:37:43 -04001629 struct plist_head *head = &rq->rt.pushable_tasks;
1630 struct task_struct *p;
Steven Rostedte8fa1362008-01-25 21:08:05 +01001631
Kirill Tkhaie23ee742013-06-07 15:37:43 -04001632 if (!has_pushable_tasks(rq))
1633 return NULL;
Peter Zijlstra3d074672010-03-10 17:07:24 +01001634
Kirill Tkhaie23ee742013-06-07 15:37:43 -04001635 plist_for_each_entry(p, head, pushable_tasks) {
1636 if (pick_rt_task(rq, p, cpu))
1637 return p;
Steven Rostedte8fa1362008-01-25 21:08:05 +01001638 }
1639
Kirill Tkhaie23ee742013-06-07 15:37:43 -04001640 return NULL;
Steven Rostedte8fa1362008-01-25 21:08:05 +01001641}
1642
Rusty Russell0e3900e2008-11-25 02:35:13 +10301643static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask);
Steven Rostedte8fa1362008-01-25 21:08:05 +01001644
Gregory Haskins6e1254d2008-01-25 21:08:11 +01001645static int find_lowest_rq(struct task_struct *task)
1646{
1647 struct sched_domain *sd;
Christoph Lameter4ba29682014-08-26 19:12:21 -05001648 struct cpumask *lowest_mask = this_cpu_cpumask_var_ptr(local_cpu_mask);
Gregory Haskins6e1254d2008-01-25 21:08:11 +01001649 int this_cpu = smp_processor_id();
1650 int cpu = task_cpu(task);
1651
Steven Rostedt0da938c2011-06-14 18:36:25 -04001652 /* Make sure the mask is initialized first */
1653 if (unlikely(!lowest_mask))
1654 return -1;
1655
Ingo Molnar4b53a342017-02-05 15:41:03 +01001656 if (task->nr_cpus_allowed == 1)
Gregory Haskins6e0534f2008-05-12 21:21:01 +02001657 return -1; /* No other targets possible */
1658
1659 if (!cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask))
Gregory Haskins06f90db2008-01-25 21:08:13 +01001660 return -1; /* No targets found */
1661
1662 /*
Ingo Molnar97fb7a02018-03-03 14:01:12 +01001663 * At this point we have built a mask of CPUs representing the
Gregory Haskins6e1254d2008-01-25 21:08:11 +01001664 * lowest priority tasks in the system. Now we want to elect
1665 * the best one based on our affinity and topology.
1666 *
Ingo Molnar97fb7a02018-03-03 14:01:12 +01001667 * We prioritize the last CPU that the task executed on since
Gregory Haskins6e1254d2008-01-25 21:08:11 +01001668 * it is most likely cache-hot in that location.
1669 */
Rusty Russell96f874e22008-11-25 02:35:14 +10301670 if (cpumask_test_cpu(cpu, lowest_mask))
Gregory Haskins6e1254d2008-01-25 21:08:11 +01001671 return cpu;
1672
1673 /*
1674 * Otherwise, we consult the sched_domains span maps to figure
Ingo Molnar97fb7a02018-03-03 14:01:12 +01001675 * out which CPU is logically closest to our hot cache data.
Gregory Haskins6e1254d2008-01-25 21:08:11 +01001676 */
Rusty Russelle2c88062009-11-03 14:53:15 +10301677 if (!cpumask_test_cpu(this_cpu, lowest_mask))
1678 this_cpu = -1; /* Skip this_cpu opt if not among lowest */
Gregory Haskins6e1254d2008-01-25 21:08:11 +01001679
Xiaotian Fengcd4ae6a2011-04-22 18:53:54 +08001680 rcu_read_lock();
Rusty Russelle2c88062009-11-03 14:53:15 +10301681 for_each_domain(cpu, sd) {
1682 if (sd->flags & SD_WAKE_AFFINE) {
1683 int best_cpu;
Gregory Haskins6e1254d2008-01-25 21:08:11 +01001684
Rusty Russelle2c88062009-11-03 14:53:15 +10301685 /*
1686 * "this_cpu" is cheaper to preempt than a
1687 * remote processor.
1688 */
1689 if (this_cpu != -1 &&
Xiaotian Fengcd4ae6a2011-04-22 18:53:54 +08001690 cpumask_test_cpu(this_cpu, sched_domain_span(sd))) {
1691 rcu_read_unlock();
Rusty Russelle2c88062009-11-03 14:53:15 +10301692 return this_cpu;
Xiaotian Fengcd4ae6a2011-04-22 18:53:54 +08001693 }
Gregory Haskins6e1254d2008-01-25 21:08:11 +01001694
Rusty Russelle2c88062009-11-03 14:53:15 +10301695 best_cpu = cpumask_first_and(lowest_mask,
1696 sched_domain_span(sd));
Xiaotian Fengcd4ae6a2011-04-22 18:53:54 +08001697 if (best_cpu < nr_cpu_ids) {
1698 rcu_read_unlock();
Rusty Russelle2c88062009-11-03 14:53:15 +10301699 return best_cpu;
Xiaotian Fengcd4ae6a2011-04-22 18:53:54 +08001700 }
Gregory Haskins6e1254d2008-01-25 21:08:11 +01001701 }
1702 }
Xiaotian Fengcd4ae6a2011-04-22 18:53:54 +08001703 rcu_read_unlock();
Gregory Haskins6e1254d2008-01-25 21:08:11 +01001704
1705 /*
1706 * And finally, if there were no matches within the domains
1707 * just give the caller *something* to work with from the compatible
1708 * locations.
1709 */
Rusty Russelle2c88062009-11-03 14:53:15 +10301710 if (this_cpu != -1)
1711 return this_cpu;
1712
1713 cpu = cpumask_any(lowest_mask);
1714 if (cpu < nr_cpu_ids)
1715 return cpu;
Ingo Molnar97fb7a02018-03-03 14:01:12 +01001716
Rusty Russelle2c88062009-11-03 14:53:15 +10301717 return -1;
Gregory Haskins07b40322008-01-25 21:08:10 +01001718}
1719
Steven Rostedte8fa1362008-01-25 21:08:05 +01001720/* Will lock the rq it finds */
Ingo Molnar4df64c02008-01-25 21:08:15 +01001721static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
Steven Rostedte8fa1362008-01-25 21:08:05 +01001722{
1723 struct rq *lowest_rq = NULL;
Steven Rostedte8fa1362008-01-25 21:08:05 +01001724 int tries;
Ingo Molnar4df64c02008-01-25 21:08:15 +01001725 int cpu;
Steven Rostedte8fa1362008-01-25 21:08:05 +01001726
1727 for (tries = 0; tries < RT_MAX_TRIES; tries++) {
Gregory Haskins07b40322008-01-25 21:08:10 +01001728 cpu = find_lowest_rq(task);
Steven Rostedte8fa1362008-01-25 21:08:05 +01001729
Gregory Haskins2de0b462008-01-25 21:08:10 +01001730 if ((cpu == -1) || (cpu == rq->cpu))
Steven Rostedte8fa1362008-01-25 21:08:05 +01001731 break;
1732
Gregory Haskins07b40322008-01-25 21:08:10 +01001733 lowest_rq = cpu_rq(cpu);
1734
Tim Chen80e3d872014-12-12 15:38:12 -08001735 if (lowest_rq->rt.highest_prio.curr <= task->prio) {
1736 /*
1737 * Target rq has tasks of equal or higher priority,
1738 * retrying does not release any lock and is unlikely
1739 * to yield a different result.
1740 */
1741 lowest_rq = NULL;
1742 break;
1743 }
1744
Steven Rostedte8fa1362008-01-25 21:08:05 +01001745 /* if the prio of this runqueue changed, try again */
Gregory Haskins07b40322008-01-25 21:08:10 +01001746 if (double_lock_balance(rq, lowest_rq)) {
Steven Rostedte8fa1362008-01-25 21:08:05 +01001747 /*
1748 * We had to unlock the run queue. In
1749 * the mean time, task could have
1750 * migrated already or had its affinity changed.
1751 * Also make sure that it wasn't scheduled on its rq.
1752 */
Gregory Haskins07b40322008-01-25 21:08:10 +01001753 if (unlikely(task_rq(task) != rq ||
Sebastian Andrzej Siewior3bd37062019-04-23 16:26:36 +02001754 !cpumask_test_cpu(lowest_rq->cpu, task->cpus_ptr) ||
Gregory Haskins07b40322008-01-25 21:08:10 +01001755 task_running(rq, task) ||
Xunlei Pang13b5ab02016-05-09 12:11:31 +08001756 !rt_task(task) ||
Kirill Tkhaida0c1e62014-08-20 13:47:32 +04001757 !task_on_rq_queued(task))) {
Ingo Molnar4df64c02008-01-25 21:08:15 +01001758
Peter Zijlstra7f1b4392012-05-17 21:19:46 +02001759 double_unlock_balance(rq, lowest_rq);
Steven Rostedte8fa1362008-01-25 21:08:05 +01001760 lowest_rq = NULL;
1761 break;
1762 }
1763 }
1764
1765 /* If this rq is still suitable use it. */
Gregory Haskinse864c492008-12-29 09:39:49 -05001766 if (lowest_rq->rt.highest_prio.curr > task->prio)
Steven Rostedte8fa1362008-01-25 21:08:05 +01001767 break;
1768
1769 /* try again */
Peter Zijlstra1b12bbc2008-08-11 09:30:22 +02001770 double_unlock_balance(rq, lowest_rq);
Steven Rostedte8fa1362008-01-25 21:08:05 +01001771 lowest_rq = NULL;
1772 }
1773
1774 return lowest_rq;
1775}
1776
Gregory Haskins917b6272008-12-29 09:39:53 -05001777static struct task_struct *pick_next_pushable_task(struct rq *rq)
1778{
1779 struct task_struct *p;
1780
1781 if (!has_pushable_tasks(rq))
1782 return NULL;
1783
1784 p = plist_first_entry(&rq->rt.pushable_tasks,
1785 struct task_struct, pushable_tasks);
1786
1787 BUG_ON(rq->cpu != task_cpu(p));
1788 BUG_ON(task_current(rq, p));
Ingo Molnar4b53a342017-02-05 15:41:03 +01001789 BUG_ON(p->nr_cpus_allowed <= 1);
Gregory Haskins917b6272008-12-29 09:39:53 -05001790
Kirill Tkhaida0c1e62014-08-20 13:47:32 +04001791 BUG_ON(!task_on_rq_queued(p));
Gregory Haskins917b6272008-12-29 09:39:53 -05001792 BUG_ON(!rt_task(p));
1793
1794 return p;
1795}
1796
Steven Rostedte8fa1362008-01-25 21:08:05 +01001797/*
1798 * If the current CPU has more than one RT task, see if the non
1799 * running task can migrate over to a CPU that is running a task
1800 * of lesser priority.
1801 */
Gregory Haskins697f0a42008-01-25 21:08:09 +01001802static int push_rt_task(struct rq *rq)
Steven Rostedte8fa1362008-01-25 21:08:05 +01001803{
1804 struct task_struct *next_task;
1805 struct rq *lowest_rq;
Hillf Danton311e8002011-06-16 21:55:20 -04001806 int ret = 0;
Steven Rostedte8fa1362008-01-25 21:08:05 +01001807
Gregory Haskinsa22d7fc2008-01-25 21:08:12 +01001808 if (!rq->rt.overloaded)
1809 return 0;
1810
Gregory Haskins917b6272008-12-29 09:39:53 -05001811 next_task = pick_next_pushable_task(rq);
Steven Rostedte8fa1362008-01-25 21:08:05 +01001812 if (!next_task)
1813 return 0;
1814
Peter Zijlstra49246272010-10-17 21:46:10 +02001815retry:
Yangtao Li9ebc6052018-11-03 13:26:02 -04001816 if (WARN_ON(next_task == rq->curr))
Steven Rostedte8fa1362008-01-25 21:08:05 +01001817 return 0;
1818
1819 /*
1820 * It's possible that the next_task slipped in of
1821 * higher priority than current. If that's the case
1822 * just reschedule current.
1823 */
Gregory Haskins697f0a42008-01-25 21:08:09 +01001824 if (unlikely(next_task->prio < rq->curr->prio)) {
Kirill Tkhai88751252014-06-29 00:03:57 +04001825 resched_curr(rq);
Steven Rostedte8fa1362008-01-25 21:08:05 +01001826 return 0;
1827 }
1828
Gregory Haskins697f0a42008-01-25 21:08:09 +01001829 /* We might release rq lock */
Steven Rostedte8fa1362008-01-25 21:08:05 +01001830 get_task_struct(next_task);
1831
1832 /* find_lock_lowest_rq locks the rq if found */
Gregory Haskins697f0a42008-01-25 21:08:09 +01001833 lowest_rq = find_lock_lowest_rq(next_task, rq);
Steven Rostedte8fa1362008-01-25 21:08:05 +01001834 if (!lowest_rq) {
1835 struct task_struct *task;
1836 /*
Hillf Danton311e8002011-06-16 21:55:20 -04001837 * find_lock_lowest_rq releases rq->lock
Gregory Haskins15635132008-12-29 09:39:53 -05001838 * so it is possible that next_task has migrated.
1839 *
1840 * We need to make sure that the task is still on the same
1841 * run-queue and is also still the next task eligible for
1842 * pushing.
Steven Rostedte8fa1362008-01-25 21:08:05 +01001843 */
Gregory Haskins917b6272008-12-29 09:39:53 -05001844 task = pick_next_pushable_task(rq);
Byungchul Parkde16b912017-05-12 10:05:43 +09001845 if (task == next_task) {
Gregory Haskins15635132008-12-29 09:39:53 -05001846 /*
Hillf Danton311e8002011-06-16 21:55:20 -04001847 * The task hasn't migrated, and is still the next
1848 * eligible task, but we failed to find a run-queue
1849 * to push it to. Do not retry in this case, since
Ingo Molnar97fb7a02018-03-03 14:01:12 +01001850 * other CPUs will pull from us when ready.
Gregory Haskins15635132008-12-29 09:39:53 -05001851 */
Gregory Haskins15635132008-12-29 09:39:53 -05001852 goto out;
Steven Rostedte8fa1362008-01-25 21:08:05 +01001853 }
Gregory Haskins917b6272008-12-29 09:39:53 -05001854
Gregory Haskins15635132008-12-29 09:39:53 -05001855 if (!task)
1856 /* No more tasks, just exit */
1857 goto out;
1858
Gregory Haskins917b6272008-12-29 09:39:53 -05001859 /*
Gregory Haskins15635132008-12-29 09:39:53 -05001860 * Something has shifted, try again.
Gregory Haskins917b6272008-12-29 09:39:53 -05001861 */
Gregory Haskins15635132008-12-29 09:39:53 -05001862 put_task_struct(next_task);
1863 next_task = task;
1864 goto retry;
Steven Rostedte8fa1362008-01-25 21:08:05 +01001865 }
1866
Gregory Haskins697f0a42008-01-25 21:08:09 +01001867 deactivate_task(rq, next_task, 0);
Steven Rostedte8fa1362008-01-25 21:08:05 +01001868 set_task_cpu(next_task, lowest_rq->cpu);
1869 activate_task(lowest_rq, next_task, 0);
Hillf Danton311e8002011-06-16 21:55:20 -04001870 ret = 1;
Steven Rostedte8fa1362008-01-25 21:08:05 +01001871
Kirill Tkhai88751252014-06-29 00:03:57 +04001872 resched_curr(lowest_rq);
Steven Rostedte8fa1362008-01-25 21:08:05 +01001873
Peter Zijlstra1b12bbc2008-08-11 09:30:22 +02001874 double_unlock_balance(rq, lowest_rq);
Steven Rostedte8fa1362008-01-25 21:08:05 +01001875
Steven Rostedte8fa1362008-01-25 21:08:05 +01001876out:
1877 put_task_struct(next_task);
1878
Hillf Danton311e8002011-06-16 21:55:20 -04001879 return ret;
Steven Rostedte8fa1362008-01-25 21:08:05 +01001880}
1881
Steven Rostedte8fa1362008-01-25 21:08:05 +01001882static void push_rt_tasks(struct rq *rq)
1883{
1884 /* push_rt_task will return true if it moved an RT */
1885 while (push_rt_task(rq))
1886 ;
1887}
1888
Steven Rostedtb6366f02015-03-18 14:49:46 -04001889#ifdef HAVE_RT_PUSH_IPI
Steven Rostedtb6366f02015-03-18 14:49:46 -04001890
Steven Rostedt (VMware)3e777f92017-02-28 15:50:30 -05001891/*
1892 * When a high priority task schedules out from a CPU and a lower priority
1893 * task is scheduled in, a check is made to see if there's any RT tasks
1894 * on other CPUs that are waiting to run because a higher priority RT task
1895 * is currently running on its CPU. In this case, the CPU with multiple RT
1896 * tasks queued on it (overloaded) needs to be notified that a CPU has opened
1897 * up that may be able to run one of its non-running queued RT tasks.
1898 *
Steven Rostedt (Red Hat)4bdced52017-10-06 14:05:04 -04001899 * All CPUs with overloaded RT tasks need to be notified as there is currently
1900 * no way to know which of these CPUs have the highest priority task waiting
1901 * to run. Instead of trying to take a spinlock on each of these CPUs,
1902 * which has shown to cause large latency when done on machines with many
1903 * CPUs, sending an IPI to the CPUs to have them push off the overloaded
1904 * RT tasks waiting to run.
Steven Rostedt (VMware)3e777f92017-02-28 15:50:30 -05001905 *
Steven Rostedt (Red Hat)4bdced52017-10-06 14:05:04 -04001906 * Just sending an IPI to each of the CPUs is also an issue, as on large
1907 * count CPU machines, this can cause an IPI storm on a CPU, especially
1908 * if its the only CPU with multiple RT tasks queued, and a large number
1909 * of CPUs scheduling a lower priority task at the same time.
Steven Rostedt (VMware)3e777f92017-02-28 15:50:30 -05001910 *
Steven Rostedt (Red Hat)4bdced52017-10-06 14:05:04 -04001911 * Each root domain has its own irq work function that can iterate over
1912 * all CPUs with RT overloaded tasks. Since all CPUs with overloaded RT
1913 * tassk must be checked if there's one or many CPUs that are lowering
1914 * their priority, there's a single irq work iterator that will try to
1915 * push off RT tasks that are waiting to run.
Steven Rostedt (VMware)3e777f92017-02-28 15:50:30 -05001916 *
Steven Rostedt (Red Hat)4bdced52017-10-06 14:05:04 -04001917 * When a CPU schedules a lower priority task, it will kick off the
1918 * irq work iterator that will jump to each CPU with overloaded RT tasks.
1919 * As it only takes the first CPU that schedules a lower priority task
1920 * to start the process, the rto_start variable is incremented and if
1921 * the atomic result is one, then that CPU will try to take the rto_lock.
1922 * This prevents high contention on the lock as the process handles all
1923 * CPUs scheduling lower priority tasks.
Steven Rostedt (VMware)3e777f92017-02-28 15:50:30 -05001924 *
Steven Rostedt (Red Hat)4bdced52017-10-06 14:05:04 -04001925 * All CPUs that are scheduling a lower priority task will increment the
1926 * rt_loop_next variable. This will make sure that the irq work iterator
1927 * checks all RT overloaded CPUs whenever a CPU schedules a new lower
1928 * priority task, even if the iterator is in the middle of a scan. Incrementing
1929 * the rt_loop_next will cause the iterator to perform another scan.
Steven Rostedt (VMware)3e777f92017-02-28 15:50:30 -05001930 *
Steven Rostedt (VMware)3e777f92017-02-28 15:50:30 -05001931 */
Steven Rostedt (VMware)ad0f1d92018-01-23 20:45:37 -05001932static int rto_next_cpu(struct root_domain *rd)
Steven Rostedtb6366f02015-03-18 14:49:46 -04001933{
Steven Rostedt (Red Hat)4bdced52017-10-06 14:05:04 -04001934 int next;
Steven Rostedtb6366f02015-03-18 14:49:46 -04001935 int cpu;
1936
Steven Rostedt (Red Hat)4bdced52017-10-06 14:05:04 -04001937 /*
1938 * When starting the IPI RT pushing, the rto_cpu is set to -1,
1939 * rt_next_cpu() will simply return the first CPU found in
1940 * the rto_mask.
1941 *
Ingo Molnar97fb7a02018-03-03 14:01:12 +01001942 * If rto_next_cpu() is called with rto_cpu is a valid CPU, it
Steven Rostedt (Red Hat)4bdced52017-10-06 14:05:04 -04001943 * will return the next CPU found in the rto_mask.
1944 *
1945 * If there are no more CPUs left in the rto_mask, then a check is made
1946 * against rto_loop and rto_loop_next. rto_loop is only updated with
1947 * the rto_lock held, but any CPU may increment the rto_loop_next
1948 * without any locking.
1949 */
1950 for (;;) {
1951
1952 /* When rto_cpu is -1 this acts like cpumask_first() */
1953 cpu = cpumask_next(rd->rto_cpu, rd->rto_mask);
1954
1955 rd->rto_cpu = cpu;
1956
1957 if (cpu < nr_cpu_ids)
1958 return cpu;
1959
1960 rd->rto_cpu = -1;
1961
1962 /*
1963 * ACQUIRE ensures we see the @rto_mask changes
1964 * made prior to the @next value observed.
1965 *
1966 * Matches WMB in rt_set_overload().
1967 */
1968 next = atomic_read_acquire(&rd->rto_loop_next);
1969
1970 if (rd->rto_loop == next)
1971 break;
1972
1973 rd->rto_loop = next;
Steven Rostedtb6366f02015-03-18 14:49:46 -04001974 }
1975
Steven Rostedt (Red Hat)4bdced52017-10-06 14:05:04 -04001976 return -1;
1977}
Steven Rostedtb6366f02015-03-18 14:49:46 -04001978
Steven Rostedt (Red Hat)4bdced52017-10-06 14:05:04 -04001979static inline bool rto_start_trylock(atomic_t *v)
1980{
1981 return !atomic_cmpxchg_acquire(v, 0, 1);
1982}
1983
1984static inline void rto_start_unlock(atomic_t *v)
1985{
1986 atomic_set_release(v, 0);
1987}
1988
1989static void tell_cpu_to_push(struct rq *rq)
1990{
1991 int cpu = -1;
1992
1993 /* Keep the loop going if the IPI is currently active */
1994 atomic_inc(&rq->rd->rto_loop_next);
1995
1996 /* Only one CPU can initiate a loop at a time */
1997 if (!rto_start_trylock(&rq->rd->rto_loop_start))
Steven Rostedtb6366f02015-03-18 14:49:46 -04001998 return;
1999
Steven Rostedt (Red Hat)4bdced52017-10-06 14:05:04 -04002000 raw_spin_lock(&rq->rd->rto_lock);
Steven Rostedtb6366f02015-03-18 14:49:46 -04002001
Steven Rostedt (Red Hat)4bdced52017-10-06 14:05:04 -04002002 /*
Ingo Molnar97fb7a02018-03-03 14:01:12 +01002003 * The rto_cpu is updated under the lock, if it has a valid CPU
Steven Rostedt (Red Hat)4bdced52017-10-06 14:05:04 -04002004 * then the IPI is still running and will continue due to the
2005 * update to loop_next, and nothing needs to be done here.
2006 * Otherwise it is finishing up and an ipi needs to be sent.
2007 */
2008 if (rq->rd->rto_cpu < 0)
Steven Rostedt (VMware)ad0f1d92018-01-23 20:45:37 -05002009 cpu = rto_next_cpu(rq->rd);
Steven Rostedt (Red Hat)4bdced52017-10-06 14:05:04 -04002010
2011 raw_spin_unlock(&rq->rd->rto_lock);
2012
2013 rto_start_unlock(&rq->rd->rto_loop_start);
2014
Steven Rostedt (VMware)364f5662018-01-23 20:45:38 -05002015 if (cpu >= 0) {
2016 /* Make sure the rd does not get freed while pushing */
2017 sched_get_rd(rq->rd);
Steven Rostedt (Red Hat)4bdced52017-10-06 14:05:04 -04002018 irq_work_queue_on(&rq->rd->rto_push_work, cpu);
Steven Rostedt (VMware)364f5662018-01-23 20:45:38 -05002019 }
Steven Rostedtb6366f02015-03-18 14:49:46 -04002020}
2021
2022/* Called from hardirq context */
Steven Rostedt (Red Hat)4bdced52017-10-06 14:05:04 -04002023void rto_push_irq_work_func(struct irq_work *work)
Steven Rostedtb6366f02015-03-18 14:49:46 -04002024{
Steven Rostedt (VMware)ad0f1d92018-01-23 20:45:37 -05002025 struct root_domain *rd =
2026 container_of(work, struct root_domain, rto_push_work);
Steven Rostedt (Red Hat)4bdced52017-10-06 14:05:04 -04002027 struct rq *rq;
Steven Rostedtb6366f02015-03-18 14:49:46 -04002028 int cpu;
2029
Steven Rostedt (Red Hat)4bdced52017-10-06 14:05:04 -04002030 rq = this_rq();
Steven Rostedtb6366f02015-03-18 14:49:46 -04002031
Steven Rostedt (Red Hat)4bdced52017-10-06 14:05:04 -04002032 /*
2033 * We do not need to grab the lock to check for has_pushable_tasks.
2034 * When it gets updated, a check is made if a push is possible.
2035 */
Steven Rostedtb6366f02015-03-18 14:49:46 -04002036 if (has_pushable_tasks(rq)) {
2037 raw_spin_lock(&rq->lock);
Steven Rostedt (Red Hat)4bdced52017-10-06 14:05:04 -04002038 push_rt_tasks(rq);
Steven Rostedtb6366f02015-03-18 14:49:46 -04002039 raw_spin_unlock(&rq->lock);
2040 }
2041
Steven Rostedt (VMware)ad0f1d92018-01-23 20:45:37 -05002042 raw_spin_lock(&rd->rto_lock);
Steven Rostedt (Red Hat)4bdced52017-10-06 14:05:04 -04002043
Steven Rostedtb6366f02015-03-18 14:49:46 -04002044 /* Pass the IPI to the next rt overloaded queue */
Steven Rostedt (VMware)ad0f1d92018-01-23 20:45:37 -05002045 cpu = rto_next_cpu(rd);
Steven Rostedtb6366f02015-03-18 14:49:46 -04002046
Steven Rostedt (VMware)ad0f1d92018-01-23 20:45:37 -05002047 raw_spin_unlock(&rd->rto_lock);
Steven Rostedtb6366f02015-03-18 14:49:46 -04002048
Steven Rostedt (VMware)364f5662018-01-23 20:45:38 -05002049 if (cpu < 0) {
2050 sched_put_rd(rd);
Steven Rostedtb6366f02015-03-18 14:49:46 -04002051 return;
Steven Rostedt (VMware)364f5662018-01-23 20:45:38 -05002052 }
Steven Rostedtb6366f02015-03-18 14:49:46 -04002053
Steven Rostedtb6366f02015-03-18 14:49:46 -04002054 /* Try the next RT overloaded CPU */
Steven Rostedt (VMware)ad0f1d92018-01-23 20:45:37 -05002055 irq_work_queue_on(&rd->rto_push_work, cpu);
Steven Rostedtb6366f02015-03-18 14:49:46 -04002056}
2057#endif /* HAVE_RT_PUSH_IPI */
2058
Peter Zijlstra8046d682015-06-11 14:46:40 +02002059static void pull_rt_task(struct rq *this_rq)
Steven Rostedtf65eda42008-01-25 21:08:07 +01002060{
Peter Zijlstra8046d682015-06-11 14:46:40 +02002061 int this_cpu = this_rq->cpu, cpu;
2062 bool resched = false;
Gregory Haskinsa8728942008-12-29 09:39:49 -05002063 struct task_struct *p;
Steven Rostedtf65eda42008-01-25 21:08:07 +01002064 struct rq *src_rq;
Steven Rostedtf73c52a2017-12-02 13:04:54 -05002065 int rt_overload_count = rt_overloaded(this_rq);
Steven Rostedtf65eda42008-01-25 21:08:07 +01002066
Steven Rostedtf73c52a2017-12-02 13:04:54 -05002067 if (likely(!rt_overload_count))
Peter Zijlstra8046d682015-06-11 14:46:40 +02002068 return;
Steven Rostedtf65eda42008-01-25 21:08:07 +01002069
Peter Zijlstra7c3f2ab2013-10-15 12:35:07 +02002070 /*
2071 * Match the barrier from rt_set_overloaded; this guarantees that if we
2072 * see overloaded we must also see the rto_mask bit.
2073 */
2074 smp_rmb();
2075
Steven Rostedtf73c52a2017-12-02 13:04:54 -05002076 /* If we are the only overloaded CPU do nothing */
2077 if (rt_overload_count == 1 &&
2078 cpumask_test_cpu(this_rq->cpu, this_rq->rd->rto_mask))
2079 return;
2080
Steven Rostedtb6366f02015-03-18 14:49:46 -04002081#ifdef HAVE_RT_PUSH_IPI
2082 if (sched_feat(RT_PUSH_IPI)) {
2083 tell_cpu_to_push(this_rq);
Peter Zijlstra8046d682015-06-11 14:46:40 +02002084 return;
Steven Rostedtb6366f02015-03-18 14:49:46 -04002085 }
2086#endif
2087
Rusty Russellc6c49272008-11-25 02:35:05 +10302088 for_each_cpu(cpu, this_rq->rd->rto_mask) {
Steven Rostedtf65eda42008-01-25 21:08:07 +01002089 if (this_cpu == cpu)
2090 continue;
2091
2092 src_rq = cpu_rq(cpu);
Gregory Haskins74ab8e42008-12-29 09:39:50 -05002093
2094 /*
2095 * Don't bother taking the src_rq->lock if the next highest
2096 * task is known to be lower-priority than our current task.
2097 * This may look racy, but if this value is about to go
2098 * logically higher, the src_rq will push this task away.
2099 * And if its going logically lower, we do not care
2100 */
2101 if (src_rq->rt.highest_prio.next >=
2102 this_rq->rt.highest_prio.curr)
2103 continue;
2104
Steven Rostedtf65eda42008-01-25 21:08:07 +01002105 /*
2106 * We can potentially drop this_rq's lock in
2107 * double_lock_balance, and another CPU could
Gregory Haskinsa8728942008-12-29 09:39:49 -05002108 * alter this_rq
Steven Rostedtf65eda42008-01-25 21:08:07 +01002109 */
Gregory Haskinsa8728942008-12-29 09:39:49 -05002110 double_lock_balance(this_rq, src_rq);
Steven Rostedtf65eda42008-01-25 21:08:07 +01002111
2112 /*
Kirill Tkhaie23ee742013-06-07 15:37:43 -04002113 * We can pull only a task, which is pushable
2114 * on its rq, and no others.
Steven Rostedtf65eda42008-01-25 21:08:07 +01002115 */
Kirill Tkhaie23ee742013-06-07 15:37:43 -04002116 p = pick_highest_pushable_task(src_rq, this_cpu);
Steven Rostedtf65eda42008-01-25 21:08:07 +01002117
2118 /*
2119 * Do we have an RT task that preempts
2120 * the to-be-scheduled task?
2121 */
Gregory Haskinsa8728942008-12-29 09:39:49 -05002122 if (p && (p->prio < this_rq->rt.highest_prio.curr)) {
Steven Rostedtf65eda42008-01-25 21:08:07 +01002123 WARN_ON(p == src_rq->curr);
Kirill Tkhaida0c1e62014-08-20 13:47:32 +04002124 WARN_ON(!task_on_rq_queued(p));
Steven Rostedtf65eda42008-01-25 21:08:07 +01002125
2126 /*
2127 * There's a chance that p is higher in priority
Ingo Molnar97fb7a02018-03-03 14:01:12 +01002128 * than what's currently running on its CPU.
Steven Rostedtf65eda42008-01-25 21:08:07 +01002129 * This is just that p is wakeing up and hasn't
2130 * had a chance to schedule. We only pull
2131 * p if it is lower in priority than the
Gregory Haskinsa8728942008-12-29 09:39:49 -05002132 * current task on the run queue
Steven Rostedtf65eda42008-01-25 21:08:07 +01002133 */
Gregory Haskinsa8728942008-12-29 09:39:49 -05002134 if (p->prio < src_rq->curr->prio)
Mike Galbraith614ee1f2008-01-25 21:08:30 +01002135 goto skip;
Steven Rostedtf65eda42008-01-25 21:08:07 +01002136
Peter Zijlstra8046d682015-06-11 14:46:40 +02002137 resched = true;
Steven Rostedtf65eda42008-01-25 21:08:07 +01002138
2139 deactivate_task(src_rq, p, 0);
2140 set_task_cpu(p, this_cpu);
2141 activate_task(this_rq, p, 0);
2142 /*
2143 * We continue with the search, just in
2144 * case there's an even higher prio task
Lucas De Marchi25985ed2011-03-30 22:57:33 -03002145 * in another runqueue. (low likelihood
Steven Rostedtf65eda42008-01-25 21:08:07 +01002146 * but possible)
Steven Rostedtf65eda42008-01-25 21:08:07 +01002147 */
Steven Rostedtf65eda42008-01-25 21:08:07 +01002148 }
Peter Zijlstra49246272010-10-17 21:46:10 +02002149skip:
Peter Zijlstra1b12bbc2008-08-11 09:30:22 +02002150 double_unlock_balance(this_rq, src_rq);
Steven Rostedtf65eda42008-01-25 21:08:07 +01002151 }
2152
Peter Zijlstra8046d682015-06-11 14:46:40 +02002153 if (resched)
2154 resched_curr(this_rq);
Steven Rostedtf65eda42008-01-25 21:08:07 +01002155}
2156
Gregory Haskins8ae121a2008-04-23 07:13:29 -04002157/*
2158 * If we are not running and we are not going to reschedule soon, we should
2159 * try to push tasks away now
2160 */
Peter Zijlstraefbbd052009-12-16 18:04:40 +01002161static void task_woken_rt(struct rq *rq, struct task_struct *p)
Steven Rostedt4642daf2008-01-25 21:08:07 +01002162{
Steven Rostedt9a897c52008-01-25 21:08:22 +01002163 if (!task_running(rq, p) &&
Gregory Haskins8ae121a2008-04-23 07:13:29 -04002164 !test_tsk_need_resched(rq->curr) &&
Ingo Molnar4b53a342017-02-05 15:41:03 +01002165 p->nr_cpus_allowed > 1 &&
Juri Lelli1baca4c2013-11-07 14:43:38 +01002166 (dl_task(rq->curr) || rt_task(rq->curr)) &&
Ingo Molnar4b53a342017-02-05 15:41:03 +01002167 (rq->curr->nr_cpus_allowed < 2 ||
Shawn Bohrer3be209a2011-09-12 09:28:04 -05002168 rq->curr->prio <= p->prio))
Steven Rostedt4642daf2008-01-25 21:08:07 +01002169 push_rt_tasks(rq);
2170}
2171
Ingo Molnarbdd7c812008-01-25 21:08:18 +01002172/* Assumes rq->lock is held */
Gregory Haskins1f11eb6a2008-06-04 15:04:05 -04002173static void rq_online_rt(struct rq *rq)
Ingo Molnarbdd7c812008-01-25 21:08:18 +01002174{
2175 if (rq->rt.overloaded)
2176 rt_set_overload(rq);
Gregory Haskins6e0534f2008-05-12 21:21:01 +02002177
Peter Zijlstra7def2be2008-06-05 14:49:58 +02002178 __enable_runtime(rq);
2179
Gregory Haskinse864c492008-12-29 09:39:49 -05002180 cpupri_set(&rq->rd->cpupri, rq->cpu, rq->rt.highest_prio.curr);
Ingo Molnarbdd7c812008-01-25 21:08:18 +01002181}
2182
2183/* Assumes rq->lock is held */
Gregory Haskins1f11eb6a2008-06-04 15:04:05 -04002184static void rq_offline_rt(struct rq *rq)
Ingo Molnarbdd7c812008-01-25 21:08:18 +01002185{
2186 if (rq->rt.overloaded)
2187 rt_clear_overload(rq);
Gregory Haskins6e0534f2008-05-12 21:21:01 +02002188
Peter Zijlstra7def2be2008-06-05 14:49:58 +02002189 __disable_runtime(rq);
2190
Gregory Haskins6e0534f2008-05-12 21:21:01 +02002191 cpupri_set(&rq->rd->cpupri, rq->cpu, CPUPRI_INVALID);
Ingo Molnarbdd7c812008-01-25 21:08:18 +01002192}
Steven Rostedtcb469842008-01-25 21:08:22 +01002193
2194/*
2195 * When switch from the rt queue, we bring ourselves to a position
2196 * that we might want to pull RT tasks from other runqueues.
2197 */
Peter Zijlstrada7a7352011-01-17 17:03:27 +01002198static void switched_from_rt(struct rq *rq, struct task_struct *p)
Steven Rostedtcb469842008-01-25 21:08:22 +01002199{
2200 /*
2201 * If there are other RT tasks then we will reschedule
2202 * and the scheduling of the other RT tasks will handle
2203 * the balancing. But if we are the last RT task
2204 * we may need to handle the pulling of RT tasks
2205 * now.
2206 */
Kirill Tkhaida0c1e62014-08-20 13:47:32 +04002207 if (!task_on_rq_queued(p) || rq->rt.rt_nr_running)
Kirill Tkhai1158ddb2012-11-23 00:02:15 +04002208 return;
2209
Ingo Molnar02d8ec92018-03-03 16:27:54 +01002210 rt_queue_pull_task(rq);
Steven Rostedtcb469842008-01-25 21:08:22 +01002211}
Rusty Russell3d8cbdf2008-11-25 09:58:41 +10302212
Li Zefan11c785b2014-02-08 14:17:45 +08002213void __init init_sched_rt_class(void)
Rusty Russell3d8cbdf2008-11-25 09:58:41 +10302214{
2215 unsigned int i;
2216
Peter Zijlstra029632f2011-10-25 10:00:11 +02002217 for_each_possible_cpu(i) {
Yinghai Lueaa95842009-06-06 14:51:36 -07002218 zalloc_cpumask_var_node(&per_cpu(local_cpu_mask, i),
Mike Travis6ca09df2008-12-31 18:08:45 -08002219 GFP_KERNEL, cpu_to_node(i));
Peter Zijlstra029632f2011-10-25 10:00:11 +02002220 }
Rusty Russell3d8cbdf2008-11-25 09:58:41 +10302221}
Steven Rostedte8fa1362008-01-25 21:08:05 +01002222#endif /* CONFIG_SMP */
Ingo Molnarbb44e5d2007-07-09 18:51:58 +02002223
Steven Rostedtcb469842008-01-25 21:08:22 +01002224/*
2225 * When switching a task to RT, we may overload the runqueue
2226 * with RT tasks. In this case we try to push them off to
2227 * other runqueues.
2228 */
Peter Zijlstrada7a7352011-01-17 17:03:27 +01002229static void switched_to_rt(struct rq *rq, struct task_struct *p)
Steven Rostedtcb469842008-01-25 21:08:22 +01002230{
Steven Rostedtcb469842008-01-25 21:08:22 +01002231 /*
2232 * If we are already running, then there's nothing
2233 * that needs to be done. But if we are not running
2234 * we may need to preempt the current running task.
2235 * If that current running task is also an RT task
2236 * then see if we can move to another run queue.
2237 */
Kirill Tkhaida0c1e62014-08-20 13:47:32 +04002238 if (task_on_rq_queued(p) && rq->curr != p) {
Steven Rostedtcb469842008-01-25 21:08:22 +01002239#ifdef CONFIG_SMP
Ingo Molnar4b53a342017-02-05 15:41:03 +01002240 if (p->nr_cpus_allowed > 1 && rq->rt.overloaded)
Ingo Molnar02d8ec92018-03-03 16:27:54 +01002241 rt_queue_push_tasks(rq);
Sebastian Andrzej Siewior619bd4a2017-01-24 15:40:06 +01002242#endif /* CONFIG_SMP */
Paul E. McKenney2fe25822017-10-13 17:00:18 -07002243 if (p->prio < rq->curr->prio && cpu_online(cpu_of(rq)))
Kirill Tkhai88751252014-06-29 00:03:57 +04002244 resched_curr(rq);
Steven Rostedtcb469842008-01-25 21:08:22 +01002245 }
2246}
2247
2248/*
2249 * Priority of the task has changed. This may cause
2250 * us to initiate a push or pull.
2251 */
Peter Zijlstrada7a7352011-01-17 17:03:27 +01002252static void
2253prio_changed_rt(struct rq *rq, struct task_struct *p, int oldprio)
Steven Rostedtcb469842008-01-25 21:08:22 +01002254{
Kirill Tkhaida0c1e62014-08-20 13:47:32 +04002255 if (!task_on_rq_queued(p))
Peter Zijlstrada7a7352011-01-17 17:03:27 +01002256 return;
2257
2258 if (rq->curr == p) {
Steven Rostedtcb469842008-01-25 21:08:22 +01002259#ifdef CONFIG_SMP
2260 /*
2261 * If our priority decreases while running, we
2262 * may need to pull tasks to this runqueue.
2263 */
2264 if (oldprio < p->prio)
Ingo Molnar02d8ec92018-03-03 16:27:54 +01002265 rt_queue_pull_task(rq);
Peter Zijlstrafd7a4be2015-06-11 14:46:41 +02002266
Steven Rostedtcb469842008-01-25 21:08:22 +01002267 /*
2268 * If there's a higher priority task waiting to run
Peter Zijlstrafd7a4be2015-06-11 14:46:41 +02002269 * then reschedule.
Steven Rostedtcb469842008-01-25 21:08:22 +01002270 */
Peter Zijlstrafd7a4be2015-06-11 14:46:41 +02002271 if (p->prio > rq->rt.highest_prio.curr)
Kirill Tkhai88751252014-06-29 00:03:57 +04002272 resched_curr(rq);
Steven Rostedtcb469842008-01-25 21:08:22 +01002273#else
2274 /* For UP simply resched on drop of prio */
2275 if (oldprio < p->prio)
Kirill Tkhai88751252014-06-29 00:03:57 +04002276 resched_curr(rq);
Steven Rostedtcb469842008-01-25 21:08:22 +01002277#endif /* CONFIG_SMP */
2278 } else {
2279 /*
2280 * This task is not running, but if it is
2281 * greater than the current running task
2282 * then reschedule.
2283 */
2284 if (p->prio < rq->curr->prio)
Kirill Tkhai88751252014-06-29 00:03:57 +04002285 resched_curr(rq);
Steven Rostedtcb469842008-01-25 21:08:22 +01002286 }
2287}
2288
Nicolas Pitreb18b6a92017-01-21 00:09:08 -05002289#ifdef CONFIG_POSIX_TIMERS
Peter Zijlstra78f2c7d2008-01-25 21:08:27 +01002290static void watchdog(struct rq *rq, struct task_struct *p)
2291{
2292 unsigned long soft, hard;
2293
Jiri Slaby78d7d402010-03-05 13:42:54 -08002294 /* max may change after cur was read, this will be fixed next tick */
2295 soft = task_rlimit(p, RLIMIT_RTTIME);
2296 hard = task_rlimit_max(p, RLIMIT_RTTIME);
Peter Zijlstra78f2c7d2008-01-25 21:08:27 +01002297
2298 if (soft != RLIM_INFINITY) {
2299 unsigned long next;
2300
Ying Xue57d2aa02012-07-17 15:03:43 +08002301 if (p->rt.watchdog_stamp != jiffies) {
2302 p->rt.timeout++;
2303 p->rt.watchdog_stamp = jiffies;
2304 }
2305
Peter Zijlstra78f2c7d2008-01-25 21:08:27 +01002306 next = DIV_ROUND_UP(min(soft, hard), USEC_PER_SEC/HZ);
Peter Zijlstra5a52dd52008-01-25 21:08:32 +01002307 if (p->rt.timeout > next)
Frank Mayharf06febc2008-09-12 09:54:39 -07002308 p->cputime_expires.sched_exp = p->se.sum_exec_runtime;
Peter Zijlstra78f2c7d2008-01-25 21:08:27 +01002309 }
2310}
Nicolas Pitreb18b6a92017-01-21 00:09:08 -05002311#else
2312static inline void watchdog(struct rq *rq, struct task_struct *p) { }
2313#endif
Steven Rostedtcb469842008-01-25 21:08:22 +01002314
Frederic Weisbeckerd84b3132018-02-21 05:17:27 +01002315/*
2316 * scheduler tick hitting a task of our scheduling class.
2317 *
2318 * NOTE: This function can be called remotely by the tick offload that
2319 * goes along full dynticks. Therefore no local assumption can be made
2320 * and everything must be accessed through the @rq and @curr passed in
2321 * parameters.
2322 */
Peter Zijlstra8f4d37e2008-01-25 21:08:29 +01002323static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued)
Ingo Molnarbb44e5d2007-07-09 18:51:58 +02002324{
Colin Cross454c7992012-05-16 21:34:23 -07002325 struct sched_rt_entity *rt_se = &p->rt;
2326
Peter Zijlstra67e2be02007-12-20 15:01:17 +01002327 update_curr_rt(rq);
Vincent Guittot23127292019-01-23 16:26:53 +01002328 update_rt_rq_load_avg(rq_clock_pelt(rq), rq, 1);
Peter Zijlstra67e2be02007-12-20 15:01:17 +01002329
Peter Zijlstra78f2c7d2008-01-25 21:08:27 +01002330 watchdog(rq, p);
2331
Ingo Molnarbb44e5d2007-07-09 18:51:58 +02002332 /*
2333 * RR tasks need a special form of timeslice management.
2334 * FIFO tasks have no timeslices.
2335 */
2336 if (p->policy != SCHED_RR)
2337 return;
2338
Peter Zijlstrafa717062008-01-25 21:08:27 +01002339 if (--p->rt.time_slice)
Ingo Molnarbb44e5d2007-07-09 18:51:58 +02002340 return;
2341
Clark Williamsce0dbbb2013-02-07 09:47:04 -06002342 p->rt.time_slice = sched_rr_timeslice;
Ingo Molnarbb44e5d2007-07-09 18:51:58 +02002343
Dmitry Adamushko98fbc792007-08-24 20:39:10 +02002344 /*
Li Bine9aa39b2013-10-21 20:15:43 +08002345 * Requeue to the end of queue if we (and all of our ancestors) are not
2346 * the only element on the queue
Dmitry Adamushko98fbc792007-08-24 20:39:10 +02002347 */
Colin Cross454c7992012-05-16 21:34:23 -07002348 for_each_sched_rt_entity(rt_se) {
2349 if (rt_se->run_list.prev != rt_se->run_list.next) {
2350 requeue_task_rt(rq, p, 0);
Kirill Tkhai8aa6f0e2014-09-22 22:36:43 +04002351 resched_curr(rq);
Colin Cross454c7992012-05-16 21:34:23 -07002352 return;
2353 }
Dmitry Adamushko98fbc792007-08-24 20:39:10 +02002354 }
Ingo Molnarbb44e5d2007-07-09 18:51:58 +02002355}
2356
H Hartley Sweeten6d686f42010-01-13 20:21:52 -07002357static unsigned int get_rr_interval_rt(struct rq *rq, struct task_struct *task)
Peter Williams0d721ce2009-09-21 01:31:53 +00002358{
2359 /*
2360 * Time slice is 0 for SCHED_FIFO tasks
2361 */
2362 if (task->policy == SCHED_RR)
Clark Williamsce0dbbb2013-02-07 09:47:04 -06002363 return sched_rr_timeslice;
Peter Williams0d721ce2009-09-21 01:31:53 +00002364 else
2365 return 0;
2366}
2367
Peter Zijlstra029632f2011-10-25 10:00:11 +02002368const struct sched_class rt_sched_class = {
Ingo Molnar5522d5d2007-10-15 17:00:12 +02002369 .next = &fair_sched_class,
Ingo Molnarbb44e5d2007-07-09 18:51:58 +02002370 .enqueue_task = enqueue_task_rt,
2371 .dequeue_task = dequeue_task_rt,
2372 .yield_task = yield_task_rt,
2373
2374 .check_preempt_curr = check_preempt_curr_rt,
2375
2376 .pick_next_task = pick_next_task_rt,
2377 .put_prev_task = put_prev_task_rt,
Peter Zijlstra03b7fad2019-05-29 20:36:41 +00002378 .set_next_task = set_next_task_rt,
Ingo Molnarbb44e5d2007-07-09 18:51:58 +02002379
Peter Williams681f3e62007-10-24 18:23:51 +02002380#ifdef CONFIG_SMP
Li Zefan4ce72a22008-10-22 15:25:26 +08002381 .select_task_rq = select_task_rq_rt,
2382
Peter Zijlstra6c370672015-05-15 17:43:36 +02002383 .set_cpus_allowed = set_cpus_allowed_common,
Gregory Haskins1f11eb6a2008-06-04 15:04:05 -04002384 .rq_online = rq_online_rt,
2385 .rq_offline = rq_offline_rt,
Peter Zijlstraefbbd052009-12-16 18:04:40 +01002386 .task_woken = task_woken_rt,
Steven Rostedtcb469842008-01-25 21:08:22 +01002387 .switched_from = switched_from_rt,
Peter Williams681f3e62007-10-24 18:23:51 +02002388#endif
Ingo Molnarbb44e5d2007-07-09 18:51:58 +02002389
2390 .task_tick = task_tick_rt,
Steven Rostedtcb469842008-01-25 21:08:22 +01002391
Peter Williams0d721ce2009-09-21 01:31:53 +00002392 .get_rr_interval = get_rr_interval_rt,
2393
Steven Rostedtcb469842008-01-25 21:08:22 +01002394 .prio_changed = prio_changed_rt,
2395 .switched_to = switched_to_rt,
Stanislaw Gruszka6e998912014-11-12 16:58:44 +01002396
2397 .update_curr = update_curr_rt,
Patrick Bellasi982d9cd2019-06-21 09:42:10 +01002398
2399#ifdef CONFIG_UCLAMP_TASK
2400 .uclamp_enabled = 1,
2401#endif
Ingo Molnarbb44e5d2007-07-09 18:51:58 +02002402};
Peter Zijlstraada18de2008-06-19 14:22:24 +02002403
Nicolas Pitre8887cd92017-06-21 14:22:02 -04002404#ifdef CONFIG_RT_GROUP_SCHED
2405/*
2406 * Ensure that the real time constraints are schedulable.
2407 */
2408static DEFINE_MUTEX(rt_constraints_mutex);
2409
2410/* Must be called with tasklist_lock held */
2411static inline int tg_has_rt_tasks(struct task_group *tg)
2412{
2413 struct task_struct *g, *p;
2414
2415 /*
2416 * Autogroups do not have RT tasks; see autogroup_create().
2417 */
2418 if (task_group_is_autogroup(tg))
2419 return 0;
2420
2421 for_each_process_thread(g, p) {
2422 if (rt_task(p) && task_group(p) == tg)
2423 return 1;
2424 }
2425
2426 return 0;
2427}
2428
2429struct rt_schedulable_data {
2430 struct task_group *tg;
2431 u64 rt_period;
2432 u64 rt_runtime;
2433};
2434
2435static int tg_rt_schedulable(struct task_group *tg, void *data)
2436{
2437 struct rt_schedulable_data *d = data;
2438 struct task_group *child;
2439 unsigned long total, sum = 0;
2440 u64 period, runtime;
2441
2442 period = ktime_to_ns(tg->rt_bandwidth.rt_period);
2443 runtime = tg->rt_bandwidth.rt_runtime;
2444
2445 if (tg == d->tg) {
2446 period = d->rt_period;
2447 runtime = d->rt_runtime;
2448 }
2449
2450 /*
2451 * Cannot have more runtime than the period.
2452 */
2453 if (runtime > period && runtime != RUNTIME_INF)
2454 return -EINVAL;
2455
2456 /*
2457 * Ensure we don't starve existing RT tasks.
2458 */
2459 if (rt_bandwidth_enabled() && !runtime && tg_has_rt_tasks(tg))
2460 return -EBUSY;
2461
2462 total = to_ratio(period, runtime);
2463
2464 /*
2465 * Nobody can have more than the global setting allows.
2466 */
2467 if (total > to_ratio(global_rt_period(), global_rt_runtime()))
2468 return -EINVAL;
2469
2470 /*
2471 * The sum of our children's runtime should not exceed our own.
2472 */
2473 list_for_each_entry_rcu(child, &tg->children, siblings) {
2474 period = ktime_to_ns(child->rt_bandwidth.rt_period);
2475 runtime = child->rt_bandwidth.rt_runtime;
2476
2477 if (child == d->tg) {
2478 period = d->rt_period;
2479 runtime = d->rt_runtime;
2480 }
2481
2482 sum += to_ratio(period, runtime);
2483 }
2484
2485 if (sum > total)
2486 return -EINVAL;
2487
2488 return 0;
2489}
2490
2491static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
2492{
2493 int ret;
2494
2495 struct rt_schedulable_data data = {
2496 .tg = tg,
2497 .rt_period = period,
2498 .rt_runtime = runtime,
2499 };
2500
2501 rcu_read_lock();
2502 ret = walk_tg_tree(tg_rt_schedulable, tg_nop, &data);
2503 rcu_read_unlock();
2504
2505 return ret;
2506}
2507
2508static int tg_set_rt_bandwidth(struct task_group *tg,
2509 u64 rt_period, u64 rt_runtime)
2510{
2511 int i, err = 0;
2512
2513 /*
2514 * Disallowing the root group RT runtime is BAD, it would disallow the
2515 * kernel creating (and or operating) RT threads.
2516 */
2517 if (tg == &root_task_group && rt_runtime == 0)
2518 return -EINVAL;
2519
2520 /* No period doesn't make any sense. */
2521 if (rt_period == 0)
2522 return -EINVAL;
2523
2524 mutex_lock(&rt_constraints_mutex);
2525 read_lock(&tasklist_lock);
2526 err = __rt_schedulable(tg, rt_period, rt_runtime);
2527 if (err)
2528 goto unlock;
2529
2530 raw_spin_lock_irq(&tg->rt_bandwidth.rt_runtime_lock);
2531 tg->rt_bandwidth.rt_period = ns_to_ktime(rt_period);
2532 tg->rt_bandwidth.rt_runtime = rt_runtime;
2533
2534 for_each_possible_cpu(i) {
2535 struct rt_rq *rt_rq = tg->rt_rq[i];
2536
2537 raw_spin_lock(&rt_rq->rt_runtime_lock);
2538 rt_rq->rt_runtime = rt_runtime;
2539 raw_spin_unlock(&rt_rq->rt_runtime_lock);
2540 }
2541 raw_spin_unlock_irq(&tg->rt_bandwidth.rt_runtime_lock);
2542unlock:
2543 read_unlock(&tasklist_lock);
2544 mutex_unlock(&rt_constraints_mutex);
2545
2546 return err;
2547}
2548
2549int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us)
2550{
2551 u64 rt_runtime, rt_period;
2552
2553 rt_period = ktime_to_ns(tg->rt_bandwidth.rt_period);
2554 rt_runtime = (u64)rt_runtime_us * NSEC_PER_USEC;
2555 if (rt_runtime_us < 0)
2556 rt_runtime = RUNTIME_INF;
Konstantin Khlebnikov1a010e22019-02-27 11:10:17 +03002557 else if ((u64)rt_runtime_us > U64_MAX / NSEC_PER_USEC)
2558 return -EINVAL;
Nicolas Pitre8887cd92017-06-21 14:22:02 -04002559
2560 return tg_set_rt_bandwidth(tg, rt_period, rt_runtime);
2561}
2562
2563long sched_group_rt_runtime(struct task_group *tg)
2564{
2565 u64 rt_runtime_us;
2566
2567 if (tg->rt_bandwidth.rt_runtime == RUNTIME_INF)
2568 return -1;
2569
2570 rt_runtime_us = tg->rt_bandwidth.rt_runtime;
2571 do_div(rt_runtime_us, NSEC_PER_USEC);
2572 return rt_runtime_us;
2573}
2574
2575int sched_group_set_rt_period(struct task_group *tg, u64 rt_period_us)
2576{
2577 u64 rt_runtime, rt_period;
2578
Konstantin Khlebnikov1a010e22019-02-27 11:10:17 +03002579 if (rt_period_us > U64_MAX / NSEC_PER_USEC)
2580 return -EINVAL;
2581
Nicolas Pitre8887cd92017-06-21 14:22:02 -04002582 rt_period = rt_period_us * NSEC_PER_USEC;
2583 rt_runtime = tg->rt_bandwidth.rt_runtime;
2584
2585 return tg_set_rt_bandwidth(tg, rt_period, rt_runtime);
2586}
2587
2588long sched_group_rt_period(struct task_group *tg)
2589{
2590 u64 rt_period_us;
2591
2592 rt_period_us = ktime_to_ns(tg->rt_bandwidth.rt_period);
2593 do_div(rt_period_us, NSEC_PER_USEC);
2594 return rt_period_us;
2595}
2596
2597static int sched_rt_global_constraints(void)
2598{
2599 int ret = 0;
2600
2601 mutex_lock(&rt_constraints_mutex);
2602 read_lock(&tasklist_lock);
2603 ret = __rt_schedulable(NULL, 0, 0);
2604 read_unlock(&tasklist_lock);
2605 mutex_unlock(&rt_constraints_mutex);
2606
2607 return ret;
2608}
2609
2610int sched_rt_can_attach(struct task_group *tg, struct task_struct *tsk)
2611{
2612 /* Don't accept realtime tasks when there is no way for them to run */
2613 if (rt_task(tsk) && tg->rt_bandwidth.rt_runtime == 0)
2614 return 0;
2615
2616 return 1;
2617}
2618
2619#else /* !CONFIG_RT_GROUP_SCHED */
2620static int sched_rt_global_constraints(void)
2621{
2622 unsigned long flags;
2623 int i;
2624
2625 raw_spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags);
2626 for_each_possible_cpu(i) {
2627 struct rt_rq *rt_rq = &cpu_rq(i)->rt;
2628
2629 raw_spin_lock(&rt_rq->rt_runtime_lock);
2630 rt_rq->rt_runtime = global_rt_runtime();
2631 raw_spin_unlock(&rt_rq->rt_runtime_lock);
2632 }
2633 raw_spin_unlock_irqrestore(&def_rt_bandwidth.rt_runtime_lock, flags);
2634
2635 return 0;
2636}
2637#endif /* CONFIG_RT_GROUP_SCHED */
2638
2639static int sched_rt_global_validate(void)
2640{
2641 if (sysctl_sched_rt_period <= 0)
2642 return -EINVAL;
2643
2644 if ((sysctl_sched_rt_runtime != RUNTIME_INF) &&
2645 (sysctl_sched_rt_runtime > sysctl_sched_rt_period))
2646 return -EINVAL;
2647
2648 return 0;
2649}
2650
2651static void sched_rt_do_global(void)
2652{
2653 def_rt_bandwidth.rt_runtime = global_rt_runtime();
2654 def_rt_bandwidth.rt_period = ns_to_ktime(global_rt_period());
2655}
2656
2657int sched_rt_handler(struct ctl_table *table, int write,
2658 void __user *buffer, size_t *lenp,
2659 loff_t *ppos)
2660{
2661 int old_period, old_runtime;
2662 static DEFINE_MUTEX(mutex);
2663 int ret;
2664
2665 mutex_lock(&mutex);
2666 old_period = sysctl_sched_rt_period;
2667 old_runtime = sysctl_sched_rt_runtime;
2668
2669 ret = proc_dointvec(table, write, buffer, lenp, ppos);
2670
2671 if (!ret && write) {
2672 ret = sched_rt_global_validate();
2673 if (ret)
2674 goto undo;
2675
2676 ret = sched_dl_global_validate();
2677 if (ret)
2678 goto undo;
2679
2680 ret = sched_rt_global_constraints();
2681 if (ret)
2682 goto undo;
2683
2684 sched_rt_do_global();
2685 sched_dl_do_global();
2686 }
2687 if (0) {
2688undo:
2689 sysctl_sched_rt_period = old_period;
2690 sysctl_sched_rt_runtime = old_runtime;
2691 }
2692 mutex_unlock(&mutex);
2693
2694 return ret;
2695}
2696
2697int sched_rr_handler(struct ctl_table *table, int write,
2698 void __user *buffer, size_t *lenp,
2699 loff_t *ppos)
2700{
2701 int ret;
2702 static DEFINE_MUTEX(mutex);
2703
2704 mutex_lock(&mutex);
2705 ret = proc_dointvec(table, write, buffer, lenp, ppos);
2706 /*
2707 * Make sure that internally we keep jiffies.
2708 * Also, writing zero resets the timeslice to default:
2709 */
2710 if (!ret && write) {
2711 sched_rr_timeslice =
2712 sysctl_sched_rr_timeslice <= 0 ? RR_TIMESLICE :
2713 msecs_to_jiffies(sysctl_sched_rr_timeslice);
2714 }
2715 mutex_unlock(&mutex);
Ingo Molnar97fb7a02018-03-03 14:01:12 +01002716
Nicolas Pitre8887cd92017-06-21 14:22:02 -04002717 return ret;
2718}
2719
Peter Zijlstraada18de2008-06-19 14:22:24 +02002720#ifdef CONFIG_SCHED_DEBUG
Peter Zijlstra029632f2011-10-25 10:00:11 +02002721void print_rt_stats(struct seq_file *m, int cpu)
Peter Zijlstraada18de2008-06-19 14:22:24 +02002722{
Cheng Xuec514c42011-05-14 14:20:02 +08002723 rt_rq_iter_t iter;
Peter Zijlstraada18de2008-06-19 14:22:24 +02002724 struct rt_rq *rt_rq;
2725
2726 rcu_read_lock();
Cheng Xuec514c42011-05-14 14:20:02 +08002727 for_each_rt_rq(rt_rq, iter, cpu_rq(cpu))
Peter Zijlstraada18de2008-06-19 14:22:24 +02002728 print_rt_rq(m, cpu, rt_rq);
2729 rcu_read_unlock();
2730}
Dhaval Giani55e12e52008-06-24 23:39:43 +05302731#endif /* CONFIG_SCHED_DEBUG */