Blame - kernel/sched/deadline.c - SHIFTPHONES/mainline/linux

blob: 1c4bc31eb0f5de0f35194236a732c7c169d62581 [file] [log] [blame]

Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1	/*
				2	* Deadline Scheduling Class (SCHED_DEADLINE)
				3	*
				4	* Earliest Deadline First (EDF) + Constant Bandwidth Server (CBS).
				5	*
				6	* Tasks that periodically executes their instances for less than their
				7	* runtime won't miss any of their deadlines.
				8	* Tasks that are not periodic or sporadic or that tries to execute more
				9	* than their reserved bandwidth will be slowed down (and may potentially
				10	* miss some of their deadlines), and won't affect any other task.
				11	*
				12	* Copyright (C) 2012 Dario Faggioli <raistlin@linux.it>,
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	13	* Juri Lelli <juri.lelli@gmail.com>,
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	14	* Michael Trimarchi <michael@amarulasolutions.com>,
				15	* Fabio Checconi <fchecconi@gmail.com>
				16	*/
				17	#include "sched.h"
				18
Juri Lelli	6bfd6d7	2013-11-07 14:43:47 +0100	[diff] [blame]	19	#include <linux/slab.h>
				20
Dario Faggioli	332ac17	2013-11-07 14:43:45 +0100	[diff] [blame]	21	struct dl_bandwidth def_dl_bandwidth;
				22
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	23	static inline struct task_struct dl_task_of(struct sched_dl_entity dl_se)
				24	{
				25	return container_of(dl_se, struct task_struct, dl);
				26	}
				27
				28	static inline struct rq rq_of_dl_rq(struct dl_rq dl_rq)
				29	{
				30	return container_of(dl_rq, struct rq, dl);
				31	}
				32
				33	static inline struct dl_rq dl_rq_of_se(struct sched_dl_entity dl_se)
				34	{
				35	struct task_struct *p = dl_task_of(dl_se);
				36	struct rq *rq = task_rq(p);
				37
				38	return &rq->dl;
				39	}
				40
				41	static inline int on_dl_rq(struct sched_dl_entity *dl_se)
				42	{
				43	return !RB_EMPTY_NODE(&dl_se->rb_node);
				44	}
				45
				46	static inline int is_leftmost(struct task_struct p, struct dl_rq dl_rq)
				47	{
				48	struct sched_dl_entity *dl_se = &p->dl;
				49
				50	return dl_rq->rb_leftmost == &dl_se->rb_node;
				51	}
				52
Dario Faggioli	332ac17	2013-11-07 14:43:45 +0100	[diff] [blame]	53	void init_dl_bandwidth(struct dl_bandwidth *dl_b, u64 period, u64 runtime)
				54	{
				55	raw_spin_lock_init(&dl_b->dl_runtime_lock);
				56	dl_b->dl_period = period;
				57	dl_b->dl_runtime = runtime;
				58	}
				59
Dario Faggioli	332ac17	2013-11-07 14:43:45 +0100	[diff] [blame]	60	void init_dl_bw(struct dl_bw *dl_b)
				61	{
				62	raw_spin_lock_init(&dl_b->lock);
				63	raw_spin_lock(&def_dl_bandwidth.dl_runtime_lock);
Peter Zijlstra	1724813	2013-12-17 12:44:49 +0100	[diff] [blame]	64	if (global_rt_runtime() == RUNTIME_INF)
Dario Faggioli	332ac17	2013-11-07 14:43:45 +0100	[diff] [blame]	65	dl_b->bw = -1;
				66	else
Peter Zijlstra	1724813	2013-12-17 12:44:49 +0100	[diff] [blame]	67	dl_b->bw = to_ratio(global_rt_period(), global_rt_runtime());
Dario Faggioli	332ac17	2013-11-07 14:43:45 +0100	[diff] [blame]	68	raw_spin_unlock(&def_dl_bandwidth.dl_runtime_lock);
				69	dl_b->total_bw = 0;
				70	}
				71
Abel Vesa	07c54f7	2015-03-03 13:50:27 +0200	[diff] [blame]	72	void init_dl_rq(struct dl_rq *dl_rq)
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	73	{
				74	dl_rq->rb_root = RB_ROOT;
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	75
				76	#ifdef CONFIG_SMP
				77	/* zero means no -deadline tasks */
				78	dl_rq->earliest_dl.curr = dl_rq->earliest_dl.next = 0;
				79
				80	dl_rq->dl_nr_migratory = 0;
				81	dl_rq->overloaded = 0;
				82	dl_rq->pushable_dl_tasks_root = RB_ROOT;
Dario Faggioli	332ac17	2013-11-07 14:43:45 +0100	[diff] [blame]	83	#else
				84	init_dl_bw(&dl_rq->dl_bw);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	85	#endif
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	86	}
				87
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	88	#ifdef CONFIG_SMP
				89
				90	static inline int dl_overloaded(struct rq *rq)
				91	{
				92	return atomic_read(&rq->rd->dlo_count);
				93	}
				94
				95	static inline void dl_set_overload(struct rq *rq)
				96	{
				97	if (!rq->online)
				98	return;
				99
				100	cpumask_set_cpu(rq->cpu, rq->rd->dlo_mask);
				101	/*
				102	* Must be visible before the overload count is
				103	* set (as in sched_rt.c).
				104	*
				105	* Matched by the barrier in pull_dl_task().
				106	*/
				107	smp_wmb();
				108	atomic_inc(&rq->rd->dlo_count);
				109	}
				110
				111	static inline void dl_clear_overload(struct rq *rq)
				112	{
				113	if (!rq->online)
				114	return;
				115
				116	atomic_dec(&rq->rd->dlo_count);
				117	cpumask_clear_cpu(rq->cpu, rq->rd->dlo_mask);
				118	}
				119
				120	static void update_dl_migration(struct dl_rq *dl_rq)
				121	{
Kirill Tkhai	995b9ea	2014-02-18 02:24:13 +0400	[diff] [blame]	122	if (dl_rq->dl_nr_migratory && dl_rq->dl_nr_running > 1) {
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	123	if (!dl_rq->overloaded) {
				124	dl_set_overload(rq_of_dl_rq(dl_rq));
				125	dl_rq->overloaded = 1;
				126	}
				127	} else if (dl_rq->overloaded) {
				128	dl_clear_overload(rq_of_dl_rq(dl_rq));
				129	dl_rq->overloaded = 0;
				130	}
				131	}
				132
				133	static void inc_dl_migration(struct sched_dl_entity dl_se, struct dl_rq dl_rq)
				134	{
				135	struct task_struct *p = dl_task_of(dl_se);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	136
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	137	if (p->nr_cpus_allowed > 1)
				138	dl_rq->dl_nr_migratory++;
				139
				140	update_dl_migration(dl_rq);
				141	}
				142
				143	static void dec_dl_migration(struct sched_dl_entity dl_se, struct dl_rq dl_rq)
				144	{
				145	struct task_struct *p = dl_task_of(dl_se);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	146
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	147	if (p->nr_cpus_allowed > 1)
				148	dl_rq->dl_nr_migratory--;
				149
				150	update_dl_migration(dl_rq);
				151	}
				152
				153	/*
				154	* The list of pushable -deadline task is not a plist, like in
				155	* sched_rt.c, it is an rb-tree with tasks ordered by deadline.
				156	*/
				157	static void enqueue_pushable_dl_task(struct rq rq, struct task_struct p)
				158	{
				159	struct dl_rq *dl_rq = &rq->dl;
				160	struct rb_node **link = &dl_rq->pushable_dl_tasks_root.rb_node;
				161	struct rb_node *parent = NULL;
				162	struct task_struct *entry;
				163	int leftmost = 1;
				164
				165	BUG_ON(!RB_EMPTY_NODE(&p->pushable_dl_tasks));
				166
				167	while (*link) {
				168	parent = *link;
				169	entry = rb_entry(parent, struct task_struct,
				170	pushable_dl_tasks);
				171	if (dl_entity_preempt(&p->dl, &entry->dl))
				172	link = &parent->rb_left;
				173	else {
				174	link = &parent->rb_right;
				175	leftmost = 0;
				176	}
				177	}
				178
				179	if (leftmost)
				180	dl_rq->pushable_dl_tasks_leftmost = &p->pushable_dl_tasks;
				181
				182	rb_link_node(&p->pushable_dl_tasks, parent, link);
				183	rb_insert_color(&p->pushable_dl_tasks, &dl_rq->pushable_dl_tasks_root);
				184	}
				185
				186	static void dequeue_pushable_dl_task(struct rq rq, struct task_struct p)
				187	{
				188	struct dl_rq *dl_rq = &rq->dl;
				189
				190	if (RB_EMPTY_NODE(&p->pushable_dl_tasks))
				191	return;
				192
				193	if (dl_rq->pushable_dl_tasks_leftmost == &p->pushable_dl_tasks) {
				194	struct rb_node *next_node;
				195
				196	next_node = rb_next(&p->pushable_dl_tasks);
				197	dl_rq->pushable_dl_tasks_leftmost = next_node;
				198	}
				199
				200	rb_erase(&p->pushable_dl_tasks, &dl_rq->pushable_dl_tasks_root);
				201	RB_CLEAR_NODE(&p->pushable_dl_tasks);
				202	}
				203
				204	static inline int has_pushable_dl_tasks(struct rq *rq)
				205	{
				206	return !RB_EMPTY_ROOT(&rq->dl.pushable_dl_tasks_root);
				207	}
				208
				209	static int push_dl_task(struct rq *rq);
				210
Peter Zijlstra	dc87734	2014-02-12 15:47:29 +0100	[diff] [blame]	211	static inline bool need_pull_dl_task(struct rq rq, struct task_struct prev)
				212	{
				213	return dl_task(prev);
				214	}
				215
				216	static inline void set_post_schedule(struct rq *rq)
				217	{
				218	rq->post_schedule = has_pushable_dl_tasks(rq);
				219	}
				220
Wanpeng Li	fa9c9d1	2015-03-27 07:08:35 +0800	[diff] [blame]	221	static struct rq find_lock_later_rq(struct task_struct task, struct rq *rq);
				222
				223	static void dl_task_offline_migration(struct rq rq, struct task_struct p)
				224	{
				225	struct rq *later_rq = NULL;
				226	bool fallback = false;
				227
				228	later_rq = find_lock_later_rq(p, rq);
				229
				230	if (!later_rq) {
				231	int cpu;
				232
				233	/*
				234	* If we cannot preempt any rq, fall back to pick any
				235	* online cpu.
				236	*/
				237	fallback = true;
				238	cpu = cpumask_any_and(cpu_active_mask, tsk_cpus_allowed(p));
				239	if (cpu >= nr_cpu_ids) {
				240	/*
				241	* Fail to find any suitable cpu.
				242	* The task will never come back!
				243	*/
				244	BUG_ON(dl_bandwidth_enabled());
				245
				246	/*
				247	* If admission control is disabled we
				248	* try a little harder to let the task
				249	* run.
				250	*/
				251	cpu = cpumask_any(cpu_active_mask);
				252	}
				253	later_rq = cpu_rq(cpu);
				254	double_lock_balance(rq, later_rq);
				255	}
				256
				257	deactivate_task(rq, p, 0);
				258	set_task_cpu(p, later_rq->cpu);
				259	activate_task(later_rq, p, ENQUEUE_REPLENISH);
				260
				261	if (!fallback)
				262	resched_curr(later_rq);
				263
				264	double_unlock_balance(rq, later_rq);
				265	}
				266
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	267	#else
				268
				269	static inline
				270	void enqueue_pushable_dl_task(struct rq rq, struct task_struct p)
				271	{
				272	}
				273
				274	static inline
				275	void dequeue_pushable_dl_task(struct rq rq, struct task_struct p)
				276	{
				277	}
				278
				279	static inline
				280	void inc_dl_migration(struct sched_dl_entity dl_se, struct dl_rq dl_rq)
				281	{
				282	}
				283
				284	static inline
				285	void dec_dl_migration(struct sched_dl_entity dl_se, struct dl_rq dl_rq)
				286	{
				287	}
				288
Peter Zijlstra	dc87734	2014-02-12 15:47:29 +0100	[diff] [blame]	289	static inline bool need_pull_dl_task(struct rq rq, struct task_struct prev)
				290	{
				291	return false;
				292	}
				293
				294	static inline int pull_dl_task(struct rq *rq)
				295	{
				296	return 0;
				297	}
				298
				299	static inline void set_post_schedule(struct rq *rq)
				300	{
				301	}
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	302	#endif /* CONFIG_SMP */
				303
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	304	static void enqueue_task_dl(struct rq rq, struct task_struct p, int flags);
				305	static void __dequeue_task_dl(struct rq rq, struct task_struct p, int flags);
				306	static void check_preempt_curr_dl(struct rq rq, struct task_struct p,
				307	int flags);
				308
				309	/*
				310	* We are being explicitly informed that a new instance is starting,
				311	* and this means that:
				312	* - the absolute deadline of the entity has to be placed at
				313	* current time + relative deadline;
				314	* - the runtime of the entity has to be set to the maximum value.
				315	*
				316	* The capability of specifying such event is useful whenever a -deadline
				317	* entity wants to (try to!) synchronize its behaviour with the scheduler's
				318	* one, and to (try to!) reconcile itself with its own scheduling
				319	* parameters.
				320	*/
Dario Faggioli	2d3d891	2013-11-07 14:43:44 +0100	[diff] [blame]	321	static inline void setup_new_dl_entity(struct sched_dl_entity *dl_se,
				322	struct sched_dl_entity *pi_se)
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	323	{
				324	struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
				325	struct rq *rq = rq_of_dl_rq(dl_rq);
				326
				327	WARN_ON(!dl_se->dl_new \|\| dl_se->dl_throttled);
				328
				329	/*
				330	* We use the regular wall clock time to set deadlines in the
				331	* future; in fact, we must consider execution overheads (time
				332	* spent on hardirq context, etc.).
				333	*/
Dario Faggioli	2d3d891	2013-11-07 14:43:44 +0100	[diff] [blame]	334	dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;
				335	dl_se->runtime = pi_se->dl_runtime;
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	336	dl_se->dl_new = 0;
				337	}
				338
				339	/*
				340	* Pure Earliest Deadline First (EDF) scheduling does not deal with the
				341	* possibility of a entity lasting more than what it declared, and thus
				342	* exhausting its runtime.
				343	*
				344	* Here we are interested in making runtime overrun possible, but we do
				345	* not want a entity which is misbehaving to affect the scheduling of all
				346	* other entities.
				347	* Therefore, a budgeting strategy called Constant Bandwidth Server (CBS)
				348	* is used, in order to confine each entity within its own bandwidth.
				349	*
				350	* This function deals exactly with that, and ensures that when the runtime
				351	* of a entity is replenished, its deadline is also postponed. That ensures
				352	* the overrunning entity can't interfere with other entity in the system and
				353	* can't make them miss their deadlines. Reasons why this kind of overruns
				354	* could happen are, typically, a entity voluntarily trying to overcome its
xiaofeng.yan	1b09d29	2014-07-07 05:59:04 +0000	[diff] [blame]	355	* runtime, or it just underestimated it during sched_setattr().
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	356	*/
Dario Faggioli	2d3d891	2013-11-07 14:43:44 +0100	[diff] [blame]	357	static void replenish_dl_entity(struct sched_dl_entity *dl_se,
				358	struct sched_dl_entity *pi_se)
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	359	{
				360	struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
				361	struct rq *rq = rq_of_dl_rq(dl_rq);
				362
Dario Faggioli	2d3d891	2013-11-07 14:43:44 +0100	[diff] [blame]	363	BUG_ON(pi_se->dl_runtime <= 0);
				364
				365	/*
				366	* This could be the case for a !-dl task that is boosted.
				367	* Just go with full inherited parameters.
				368	*/
				369	if (dl_se->dl_deadline == 0) {
				370	dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;
				371	dl_se->runtime = pi_se->dl_runtime;
				372	}
				373
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	374	/*
				375	* We keep moving the deadline away until we get some
				376	* available runtime for the entity. This ensures correct
				377	* handling of situations where the runtime overrun is
				378	* arbitrary large.
				379	*/
				380	while (dl_se->runtime <= 0) {
Dario Faggioli	2d3d891	2013-11-07 14:43:44 +0100	[diff] [blame]	381	dl_se->deadline += pi_se->dl_period;
				382	dl_se->runtime += pi_se->dl_runtime;
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	383	}
				384
				385	/*
				386	* At this point, the deadline really should be "in
				387	* the future" with respect to rq->clock. If it's
				388	* not, we are, for some reason, lagging too much!
				389	* Anyway, after having warn userspace abut that,
				390	* we still try to keep the things running by
				391	* resetting the deadline and the budget of the
				392	* entity.
				393	*/
				394	if (dl_time_before(dl_se->deadline, rq_clock(rq))) {
John Stultz	c224815	2014-06-04 16:11:41 -0700	[diff] [blame]	395	printk_deferred_once("sched: DL replenish lagged to much\n");
Dario Faggioli	2d3d891	2013-11-07 14:43:44 +0100	[diff] [blame]	396	dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;
				397	dl_se->runtime = pi_se->dl_runtime;
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	398	}
Peter Zijlstra	1019a35	2014-11-26 08:44:03 +0800	[diff] [blame]	399
				400	if (dl_se->dl_yielded)
				401	dl_se->dl_yielded = 0;
				402	if (dl_se->dl_throttled)
				403	dl_se->dl_throttled = 0;
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	404	}
				405
				406	/*
				407	* Here we check if --at time t-- an entity (which is probably being
				408	* [re]activated or, in general, enqueued) can use its remaining runtime
				409	* and its current deadline _without_ exceeding the bandwidth it is
				410	* assigned (function returns true if it can't). We are in fact applying
				411	* one of the CBS rules: when a task wakes up, if the residual runtime
				412	* over residual deadline fits within the allocated bandwidth, then we
				413	* can keep the current (absolute) deadline and residual budget without
				414	* disrupting the schedulability of the system. Otherwise, we should
				415	* refill the runtime and set the deadline a period in the future,
				416	* because keeping the current (absolute) deadline of the task would
Dario Faggioli	712e5e3	2014-01-27 12:20:15 +0100	[diff] [blame]	417	* result in breaking guarantees promised to other tasks (refer to
				418	* Documentation/scheduler/sched-deadline.txt for more informations).
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	419	*
				420	* This function returns true if:
				421	*
Harald Gustafsson	755378a	2013-11-07 14:43:40 +0100	[diff] [blame]	422	* runtime / (deadline - t) > dl_runtime / dl_period ,
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	423	*
				424	* IOW we can't recycle current parameters.
Harald Gustafsson	755378a	2013-11-07 14:43:40 +0100	[diff] [blame]	425	*
				426	* Notice that the bandwidth check is done against the period. For
				427	* task with deadline equal to period this is the same of using
				428	* dl_deadline instead of dl_period in the equation above.
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	429	*/
Dario Faggioli	2d3d891	2013-11-07 14:43:44 +0100	[diff] [blame]	430	static bool dl_entity_overflow(struct sched_dl_entity *dl_se,
				431	struct sched_dl_entity *pi_se, u64 t)
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	432	{
				433	u64 left, right;
				434
				435	/*
				436	* left and right are the two sides of the equation above,
				437	* after a bit of shuffling to use multiplications instead
				438	* of divisions.
				439	*
				440	* Note that none of the time values involved in the two
				441	* multiplications are absolute: dl_deadline and dl_runtime
				442	* are the relative deadline and the maximum runtime of each
				443	* instance, runtime is the runtime left for the last instance
				444	* and (deadline - t), since t is rq->clock, is the time left
				445	* to the (absolute) deadline. Even if overflowing the u64 type
				446	* is very unlikely to occur in both cases, here we scale down
				447	* as we want to avoid that risk at all. Scaling down by 10
				448	* means that we reduce granularity to 1us. We are fine with it,
				449	* since this is only a true/false check and, anyway, thinking
				450	* of anything below microseconds resolution is actually fiction
				451	* (but still we want to give the user that illusion >;).
				452	*/
Dario Faggioli	332ac17	2013-11-07 14:43:45 +0100	[diff] [blame]	453	left = (pi_se->dl_period >> DL_SCALE) * (dl_se->runtime >> DL_SCALE);
				454	right = ((dl_se->deadline - t) >> DL_SCALE) *
				455	(pi_se->dl_runtime >> DL_SCALE);
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	456
				457	return dl_time_before(right, left);
				458	}
				459
				460	/*
				461	* When a -deadline entity is queued back on the runqueue, its runtime and
				462	* deadline might need updating.
				463	*
				464	* The policy here is that we update the deadline of the entity only if:
				465	* - the current deadline is in the past,
				466	* - using the remaining runtime with the current deadline would make
				467	* the entity exceed its bandwidth.
				468	*/
Dario Faggioli	2d3d891	2013-11-07 14:43:44 +0100	[diff] [blame]	469	static void update_dl_entity(struct sched_dl_entity *dl_se,
				470	struct sched_dl_entity *pi_se)
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	471	{
				472	struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
				473	struct rq *rq = rq_of_dl_rq(dl_rq);
				474
				475	/*
				476	* The arrival of a new instance needs special treatment, i.e.,
				477	* the actual scheduling parameters have to be "renewed".
				478	*/
				479	if (dl_se->dl_new) {
Dario Faggioli	2d3d891	2013-11-07 14:43:44 +0100	[diff] [blame]	480	setup_new_dl_entity(dl_se, pi_se);
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	481	return;
				482	}
				483
				484	if (dl_time_before(dl_se->deadline, rq_clock(rq)) \|\|
Dario Faggioli	2d3d891	2013-11-07 14:43:44 +0100	[diff] [blame]	485	dl_entity_overflow(dl_se, pi_se, rq_clock(rq))) {
				486	dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;
				487	dl_se->runtime = pi_se->dl_runtime;
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	488	}
				489	}
				490
				491	/*
				492	* If the entity depleted all its runtime, and if we want it to sleep
				493	* while waiting for some new execution time to become available, we
				494	* set the bandwidth enforcement timer to the replenishment instant
				495	* and try to activate it.
				496	*
				497	* Notice that it is important for the caller to know if the timer
				498	* actually started or not (i.e., the replenishment instant is in
				499	* the future or in the past).
				500	*/
Dario Faggioli	2d3d891	2013-11-07 14:43:44 +0100	[diff] [blame]	501	static int start_dl_timer(struct sched_dl_entity *dl_se, bool boosted)
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	502	{
				503	struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
				504	struct rq *rq = rq_of_dl_rq(dl_rq);
				505	ktime_t now, act;
				506	ktime_t soft, hard;
				507	unsigned long range;
				508	s64 delta;
				509
Dario Faggioli	2d3d891	2013-11-07 14:43:44 +0100	[diff] [blame]	510	if (boosted)
				511	return 0;
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	512	/*
				513	* We want the timer to fire at the deadline, but considering
				514	* that it is actually coming from rq->clock and not from
				515	* hrtimer's time base reading.
				516	*/
				517	act = ns_to_ktime(dl_se->deadline);
				518	now = hrtimer_cb_get_time(&dl_se->dl_timer);
				519	delta = ktime_to_ns(now) - rq_clock(rq);
				520	act = ktime_add_ns(act, delta);
				521
				522	/*
				523	* If the expiry time already passed, e.g., because the value
				524	* chosen as the deadline is too small, don't even try to
				525	* start the timer in the past!
				526	*/
				527	if (ktime_us_delta(act, now) < 0)
				528	return 0;
				529
				530	hrtimer_set_expires(&dl_se->dl_timer, act);
				531
				532	soft = hrtimer_get_softexpires(&dl_se->dl_timer);
				533	hard = hrtimer_get_expires(&dl_se->dl_timer);
				534	range = ktime_to_ns(ktime_sub(hard, soft));
				535	__hrtimer_start_range_ns(&dl_se->dl_timer, soft,
				536	range, HRTIMER_MODE_ABS, 0);
				537
				538	return hrtimer_active(&dl_se->dl_timer);
				539	}
				540
				541	/*
				542	* This is the bandwidth enforcement timer callback. If here, we know
				543	* a task is not on its dl_rq, since the fact that the timer was running
				544	* means the task is throttled and needs a runtime replenishment.
				545	*
				546	* However, what we actually do depends on the fact the task is active,
				547	* (it is on its rq) or has been removed from there by a call to
				548	* dequeue_task_dl(). In the former case we must issue the runtime
				549	* replenishment and add the task back to the dl_rq; in the latter, we just
				550	* do nothing but clearing dl_throttled, so that runtime and deadline
				551	* updating (and the queueing back to dl_rq) will be done by the
				552	* next call to enqueue_task_dl().
				553	*/
				554	static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
				555	{
				556	struct sched_dl_entity *dl_se = container_of(timer,
				557	struct sched_dl_entity,
				558	dl_timer);
				559	struct task_struct *p = dl_task_of(dl_se);
Peter Zijlstra	3960c8c	2015-02-17 13:22:25 +0100	[diff] [blame]	560	unsigned long flags;
Kirill Tkhai	0f397f2	2014-05-20 13:33:42 +0400	[diff] [blame]	561	struct rq *rq;
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	562
Juri Lelli	4cd57f9	2015-03-31 09:53:36 +0100	[diff] [blame]	563	rq = task_rq_lock(p, &flags);
Kirill Tkhai	0f397f2	2014-05-20 13:33:42 +0400	[diff] [blame]	564
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	565	/*
Juri Lelli	aee38ea	2014-10-24 10:16:38 +0100	[diff] [blame]	566	* We need to take care of several possible races here:
				567	*
				568	* - the task might have changed its scheduling policy
				569	* to something different than SCHED_DEADLINE
				570	* - the task might have changed its reservation parameters
				571	* (through sched_setattr())
				572	* - the task might have been boosted by someone else and
				573	* might be in the boosting/deboosting path
				574	*
				575	* In all this cases we bail out, as the task is already
				576	* in the runqueue or is going to be enqueued back anyway.
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	577	*/
Juri Lelli	aee38ea	2014-10-24 10:16:38 +0100	[diff] [blame]	578	if (!dl_task(p) \|\| dl_se->dl_new \|\|
				579	dl_se->dl_boosted \|\| !dl_se->dl_throttled)
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	580	goto unlock;
				581
				582	sched_clock_tick();
				583	update_rq_clock(rq);
Kirill Tkhai	a79ec89	2015-02-16 15:38:34 +0300	[diff] [blame]	584
Wanpeng Li	fa9c9d1	2015-03-27 07:08:35 +0800	[diff] [blame]	585	#ifdef CONFIG_SMP
				586	/*
				587	* If we find that the rq the task was on is no longer
				588	* available, we need to select a new rq.
				589	*/
				590	if (unlikely(!rq->online)) {
				591	dl_task_offline_migration(rq, p);
				592	goto unlock;
				593	}
				594	#endif
				595
Kirill Tkhai	a79ec89	2015-02-16 15:38:34 +0300	[diff] [blame]	596	/*
				597	* If the throttle happened during sched-out; like:
				598	*
				599	* schedule()
				600	* deactivate_task()
				601	* dequeue_task_dl()
				602	* update_curr_dl()
				603	* start_dl_timer()
				604	* __dequeue_task_dl()
				605	* prev->on_rq = 0;
				606	*
				607	* We can be both throttled and !queued. Replenish the counter
				608	* but do not enqueue -- wait for our wakeup to do that.
				609	*/
				610	if (!task_on_rq_queued(p)) {
				611	replenish_dl_entity(dl_se, dl_se);
				612	goto unlock;
				613	}
				614
Peter Zijlstra	1019a35	2014-11-26 08:44:03 +0800	[diff] [blame]	615	enqueue_task_dl(rq, p, ENQUEUE_REPLENISH);
				616	if (dl_task(rq->curr))
				617	check_preempt_curr_dl(rq, p, 0);
				618	else
				619	resched_curr(rq);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	620	#ifdef CONFIG_SMP
Peter Zijlstra	1019a35	2014-11-26 08:44:03 +0800	[diff] [blame]	621	/*
				622	* Queueing this task back might have overloaded rq,
				623	* check if we need to kick someone away.
				624	*/
				625	if (has_pushable_dl_tasks(rq))
				626	push_dl_task(rq);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	627	#endif
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	628	unlock:
Juri Lelli	4cd57f9	2015-03-31 09:53:36 +0100	[diff] [blame]	629	task_rq_unlock(rq, p, &flags);
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	630
				631	return HRTIMER_NORESTART;
				632	}
				633
				634	void init_dl_task_timer(struct sched_dl_entity *dl_se)
				635	{
				636	struct hrtimer *timer = &dl_se->dl_timer;
				637
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	638	hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
				639	timer->function = dl_task_timer;
				640	}
				641
				642	static
				643	int dl_runtime_exceeded(struct rq rq, struct sched_dl_entity dl_se)
				644	{
Luca Abeni	269ad80	2014-12-17 11:50:32 +0100	[diff] [blame]	645	return (dl_se->runtime <= 0);
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	646	}
				647
Juri Lelli	faa5993	2014-02-21 11:37:15 +0100	[diff] [blame]	648	extern bool sched_rt_bandwidth_account(struct rt_rq *rt_rq);
				649
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	650	/*
				651	* Update the current task's runtime statistics (provided it is still
				652	* a -deadline task and has not been removed from the dl_rq).
				653	*/
				654	static void update_curr_dl(struct rq *rq)
				655	{
				656	struct task_struct *curr = rq->curr;
				657	struct sched_dl_entity *dl_se = &curr->dl;
				658	u64 delta_exec;
				659
				660	if (!dl_task(curr) \|\| !on_dl_rq(dl_se))
				661	return;
				662
				663	/*
				664	* Consumed budget is computed considering the time as
				665	* observed by schedulable tasks (excluding time spent
				666	* in hardirq context, etc.). Deadlines are instead
				667	* computed using hard walltime. This seems to be the more
				668	* natural solution, but the full ramifications of this
				669	* approach need further study.
				670	*/
				671	delta_exec = rq_clock_task(rq) - curr->se.exec_start;
Kirill Tkhai	734ff2a	2014-03-04 19:25:46 +0400	[diff] [blame]	672	if (unlikely((s64)delta_exec <= 0))
				673	return;
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	674
				675	schedstat_set(curr->se.statistics.exec_max,
				676	max(curr->se.statistics.exec_max, delta_exec));
				677
				678	curr->se.sum_exec_runtime += delta_exec;
				679	account_group_exec_runtime(curr, delta_exec);
				680
				681	curr->se.exec_start = rq_clock_task(rq);
				682	cpuacct_charge(curr, delta_exec);
				683
Dario Faggioli	239be4a	2013-11-07 14:43:39 +0100	[diff] [blame]	684	sched_rt_avg_update(rq, delta_exec);
				685
Wanpeng Li	8049688	2014-10-31 06:39:32 +0800	[diff] [blame]	686	dl_se->runtime -= dl_se->dl_yielded ? 0 : delta_exec;
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	687	if (dl_runtime_exceeded(rq, dl_se)) {
Peter Zijlstra	1019a35	2014-11-26 08:44:03 +0800	[diff] [blame]	688	dl_se->dl_throttled = 1;
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	689	__dequeue_task_dl(rq, curr, 0);
Peter Zijlstra	1019a35	2014-11-26 08:44:03 +0800	[diff] [blame]	690	if (unlikely(!start_dl_timer(dl_se, curr->dl.dl_boosted)))
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	691	enqueue_task_dl(rq, curr, ENQUEUE_REPLENISH);
				692
				693	if (!is_leftmost(curr, &rq->dl))
Kirill Tkhai	8875125	2014-06-29 00:03:57 +0400	[diff] [blame]	694	resched_curr(rq);
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	695	}
Peter Zijlstra	1724813	2013-12-17 12:44:49 +0100	[diff] [blame]	696
				697	/*
				698	* Because -- for now -- we share the rt bandwidth, we need to
				699	* account our runtime there too, otherwise actual rt tasks
				700	* would be able to exceed the shared quota.
				701	*
				702	* Account to the root rt group for now.
				703	*
				704	* The solution we're working towards is having the RT groups scheduled
				705	* using deadline servers -- however there's a few nasties to figure
				706	* out before that can happen.
				707	*/
				708	if (rt_bandwidth_enabled()) {
				709	struct rt_rq *rt_rq = &rq->rt;
				710
				711	raw_spin_lock(&rt_rq->rt_runtime_lock);
Peter Zijlstra	1724813	2013-12-17 12:44:49 +0100	[diff] [blame]	712	/*
				713	* We'll let actual RT tasks worry about the overflow here, we
Juri Lelli	faa5993	2014-02-21 11:37:15 +0100	[diff] [blame]	714	* have our own CBS to keep us inline; only account when RT
				715	* bandwidth is relevant.
Peter Zijlstra	1724813	2013-12-17 12:44:49 +0100	[diff] [blame]	716	*/
Juri Lelli	faa5993	2014-02-21 11:37:15 +0100	[diff] [blame]	717	if (sched_rt_bandwidth_account(rt_rq))
				718	rt_rq->rt_time += delta_exec;
Peter Zijlstra	1724813	2013-12-17 12:44:49 +0100	[diff] [blame]	719	raw_spin_unlock(&rt_rq->rt_runtime_lock);
				720	}
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	721	}
				722
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	723	#ifdef CONFIG_SMP
				724
				725	static struct task_struct pick_next_earliest_dl_task(struct rq rq, int cpu);
				726
				727	static inline u64 next_deadline(struct rq *rq)
				728	{
				729	struct task_struct *next = pick_next_earliest_dl_task(rq, rq->cpu);
				730
				731	if (next && dl_prio(next->prio))
				732	return next->dl.deadline;
				733	else
				734	return 0;
				735	}
				736
				737	static void inc_dl_deadline(struct dl_rq *dl_rq, u64 deadline)
				738	{
				739	struct rq *rq = rq_of_dl_rq(dl_rq);
				740
				741	if (dl_rq->earliest_dl.curr == 0 \|\|
				742	dl_time_before(deadline, dl_rq->earliest_dl.curr)) {
				743	/*
				744	* If the dl_rq had no -deadline tasks, or if the new task
				745	* has shorter deadline than the current one on dl_rq, we
				746	* know that the previous earliest becomes our next earliest,
				747	* as the new task becomes the earliest itself.
				748	*/
				749	dl_rq->earliest_dl.next = dl_rq->earliest_dl.curr;
				750	dl_rq->earliest_dl.curr = deadline;
Juri Lelli	6bfd6d7	2013-11-07 14:43:47 +0100	[diff] [blame]	751	cpudl_set(&rq->rd->cpudl, rq->cpu, deadline, 1);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	752	} else if (dl_rq->earliest_dl.next == 0 \|\|
				753	dl_time_before(deadline, dl_rq->earliest_dl.next)) {
				754	/*
				755	* On the other hand, if the new -deadline task has a
				756	* a later deadline than the earliest one on dl_rq, but
				757	* it is earlier than the next (if any), we must
				758	* recompute the next-earliest.
				759	*/
				760	dl_rq->earliest_dl.next = next_deadline(rq);
				761	}
				762	}
				763
				764	static void dec_dl_deadline(struct dl_rq *dl_rq, u64 deadline)
				765	{
				766	struct rq *rq = rq_of_dl_rq(dl_rq);
				767
				768	/*
				769	* Since we may have removed our earliest (and/or next earliest)
				770	* task we must recompute them.
				771	*/
				772	if (!dl_rq->dl_nr_running) {
				773	dl_rq->earliest_dl.curr = 0;
				774	dl_rq->earliest_dl.next = 0;
Juri Lelli	6bfd6d7	2013-11-07 14:43:47 +0100	[diff] [blame]	775	cpudl_set(&rq->rd->cpudl, rq->cpu, 0, 0);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	776	} else {
				777	struct rb_node *leftmost = dl_rq->rb_leftmost;
				778	struct sched_dl_entity *entry;
				779
				780	entry = rb_entry(leftmost, struct sched_dl_entity, rb_node);
				781	dl_rq->earliest_dl.curr = entry->deadline;
				782	dl_rq->earliest_dl.next = next_deadline(rq);
Juri Lelli	6bfd6d7	2013-11-07 14:43:47 +0100	[diff] [blame]	783	cpudl_set(&rq->rd->cpudl, rq->cpu, entry->deadline, 1);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	784	}
				785	}
				786
				787	#else
				788
				789	static inline void inc_dl_deadline(struct dl_rq *dl_rq, u64 deadline) {}
				790	static inline void dec_dl_deadline(struct dl_rq *dl_rq, u64 deadline) {}
				791
				792	#endif /* CONFIG_SMP */
				793
				794	static inline
				795	void inc_dl_tasks(struct sched_dl_entity dl_se, struct dl_rq dl_rq)
				796	{
				797	int prio = dl_task_of(dl_se)->prio;
				798	u64 deadline = dl_se->deadline;
				799
				800	WARN_ON(!dl_prio(prio));
				801	dl_rq->dl_nr_running++;
Kirill Tkhai	7246544	2014-05-09 03:00:14 +0400	[diff] [blame]	802	add_nr_running(rq_of_dl_rq(dl_rq), 1);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	803
				804	inc_dl_deadline(dl_rq, deadline);
				805	inc_dl_migration(dl_se, dl_rq);
				806	}
				807
				808	static inline
				809	void dec_dl_tasks(struct sched_dl_entity dl_se, struct dl_rq dl_rq)
				810	{
				811	int prio = dl_task_of(dl_se)->prio;
				812
				813	WARN_ON(!dl_prio(prio));
				814	WARN_ON(!dl_rq->dl_nr_running);
				815	dl_rq->dl_nr_running--;
Kirill Tkhai	7246544	2014-05-09 03:00:14 +0400	[diff] [blame]	816	sub_nr_running(rq_of_dl_rq(dl_rq), 1);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	817
				818	dec_dl_deadline(dl_rq, dl_se->deadline);
				819	dec_dl_migration(dl_se, dl_rq);
				820	}
				821
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	822	static void __enqueue_dl_entity(struct sched_dl_entity *dl_se)
				823	{
				824	struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
				825	struct rb_node **link = &dl_rq->rb_root.rb_node;
				826	struct rb_node *parent = NULL;
				827	struct sched_dl_entity *entry;
				828	int leftmost = 1;
				829
				830	BUG_ON(!RB_EMPTY_NODE(&dl_se->rb_node));
				831
				832	while (*link) {
				833	parent = *link;
				834	entry = rb_entry(parent, struct sched_dl_entity, rb_node);
				835	if (dl_time_before(dl_se->deadline, entry->deadline))
				836	link = &parent->rb_left;
				837	else {
				838	link = &parent->rb_right;
				839	leftmost = 0;
				840	}
				841	}
				842
				843	if (leftmost)
				844	dl_rq->rb_leftmost = &dl_se->rb_node;
				845
				846	rb_link_node(&dl_se->rb_node, parent, link);
				847	rb_insert_color(&dl_se->rb_node, &dl_rq->rb_root);
				848
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	849	inc_dl_tasks(dl_se, dl_rq);
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	850	}
				851
				852	static void __dequeue_dl_entity(struct sched_dl_entity *dl_se)
				853	{
				854	struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
				855
				856	if (RB_EMPTY_NODE(&dl_se->rb_node))
				857	return;
				858
				859	if (dl_rq->rb_leftmost == &dl_se->rb_node) {
				860	struct rb_node *next_node;
				861
				862	next_node = rb_next(&dl_se->rb_node);
				863	dl_rq->rb_leftmost = next_node;
				864	}
				865
				866	rb_erase(&dl_se->rb_node, &dl_rq->rb_root);
				867	RB_CLEAR_NODE(&dl_se->rb_node);
				868
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	869	dec_dl_tasks(dl_se, dl_rq);
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	870	}
				871
				872	static void
Dario Faggioli	2d3d891	2013-11-07 14:43:44 +0100	[diff] [blame]	873	enqueue_dl_entity(struct sched_dl_entity *dl_se,
				874	struct sched_dl_entity *pi_se, int flags)
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	875	{
				876	BUG_ON(on_dl_rq(dl_se));
				877
				878	/*
				879	* If this is a wakeup or a new instance, the scheduling
				880	* parameters of the task might need updating. Otherwise,
				881	* we want a replenishment of its runtime.
				882	*/
Luca Abeni	6a503c3	2014-12-17 11:50:31 +0100	[diff] [blame]	883	if (dl_se->dl_new \|\| flags & ENQUEUE_WAKEUP)
Dario Faggioli	2d3d891	2013-11-07 14:43:44 +0100	[diff] [blame]	884	update_dl_entity(dl_se, pi_se);
Luca Abeni	6a503c3	2014-12-17 11:50:31 +0100	[diff] [blame]	885	else if (flags & ENQUEUE_REPLENISH)
				886	replenish_dl_entity(dl_se, pi_se);
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	887
				888	__enqueue_dl_entity(dl_se);
				889	}
				890
				891	static void dequeue_dl_entity(struct sched_dl_entity *dl_se)
				892	{
				893	__dequeue_dl_entity(dl_se);
				894	}
				895
				896	static void enqueue_task_dl(struct rq rq, struct task_struct p, int flags)
				897	{
Dario Faggioli	2d3d891	2013-11-07 14:43:44 +0100	[diff] [blame]	898	struct task_struct *pi_task = rt_mutex_get_top_task(p);
				899	struct sched_dl_entity *pi_se = &p->dl;
				900
				901	/*
				902	* Use the scheduling parameters of the top pi-waiter
				903	* task if we have one and its (relative) deadline is
				904	* smaller than our one... OTW we keep our runtime and
				905	* deadline.
				906	*/
Juri Lelli	64be6f1	2014-10-24 10:16:37 +0100	[diff] [blame]	907	if (pi_task && p->dl.dl_boosted && dl_prio(pi_task->normal_prio)) {
Dario Faggioli	2d3d891	2013-11-07 14:43:44 +0100	[diff] [blame]	908	pi_se = &pi_task->dl;
Juri Lelli	64be6f1	2014-10-24 10:16:37 +0100	[diff] [blame]	909	} else if (!dl_prio(p->normal_prio)) {
				910	/*
				911	* Special case in which we have a !SCHED_DEADLINE task
				912	* that is going to be deboosted, but exceedes its
				913	* runtime while doing so. No point in replenishing
				914	* it, as it's going to return back to its original
				915	* scheduling class after this.
				916	*/
				917	BUG_ON(!p->dl.dl_boosted \|\| flags != ENQUEUE_REPLENISH);
				918	return;
				919	}
Dario Faggioli	2d3d891	2013-11-07 14:43:44 +0100	[diff] [blame]	920
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	921	/*
				922	* If p is throttled, we do nothing. In fact, if it exhausted
				923	* its budget it needs a replenishment and, since it now is on
				924	* its rq, the bandwidth timer callback (which clearly has not
				925	* run yet) will take care of this.
				926	*/
Peter Zijlstra	1019a35	2014-11-26 08:44:03 +0800	[diff] [blame]	927	if (p->dl.dl_throttled && !(flags & ENQUEUE_REPLENISH))
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	928	return;
				929
Dario Faggioli	2d3d891	2013-11-07 14:43:44 +0100	[diff] [blame]	930	enqueue_dl_entity(&p->dl, pi_se, flags);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	931
				932	if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
				933	enqueue_pushable_dl_task(rq, p);
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	934	}
				935
				936	static void __dequeue_task_dl(struct rq rq, struct task_struct p, int flags)
				937	{
				938	dequeue_dl_entity(&p->dl);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	939	dequeue_pushable_dl_task(rq, p);
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	940	}
				941
				942	static void dequeue_task_dl(struct rq rq, struct task_struct p, int flags)
				943	{
				944	update_curr_dl(rq);
				945	__dequeue_task_dl(rq, p, flags);
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	946	}
				947
				948	/*
				949	* Yield task semantic for -deadline tasks is:
				950	*
				951	* get off from the CPU until our next instance, with
				952	* a new runtime. This is of little use now, since we
				953	* don't have a bandwidth reclaiming mechanism. Anyway,
				954	* bandwidth reclaiming is planned for the future, and
				955	* yield_task_dl will indicate that some spare budget
				956	* is available for other task instances to use it.
				957	*/
				958	static void yield_task_dl(struct rq *rq)
				959	{
				960	struct task_struct *p = rq->curr;
				961
				962	/*
				963	* We make the task go to sleep until its current deadline by
				964	* forcing its runtime to zero. This way, update_curr_dl() stops
				965	* it and the bandwidth timer will wake it up and will give it
Juri Lelli	5bfd126	2014-04-15 13:49:04 +0200	[diff] [blame]	966	* new scheduling parameters (thanks to dl_yielded=1).
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	967	*/
				968	if (p->dl.runtime > 0) {
Juri Lelli	5bfd126	2014-04-15 13:49:04 +0200	[diff] [blame]	969	rq->curr->dl.dl_yielded = 1;
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	970	p->dl.runtime = 0;
				971	}
Kirill Tkhai	6f1607f	2015-02-04 12:09:32 +0300	[diff] [blame]	972	update_rq_clock(rq);
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	973	update_curr_dl(rq);
Wanpeng Li	44fb085	2015-03-10 12:20:00 +0800	[diff] [blame]	974	/*
				975	* Tell update_rq_clock() that we've just updated,
				976	* so we don't do microscopic update in schedule()
				977	* and double the fastpath cost.
				978	*/
				979	rq_clock_skip_update(rq, true);
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	980	}
				981
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	982	#ifdef CONFIG_SMP
				983
				984	static int find_later_rq(struct task_struct *task);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	985
				986	static int
				987	select_task_rq_dl(struct task_struct *p, int cpu, int sd_flag, int flags)
				988	{
				989	struct task_struct *curr;
				990	struct rq *rq;
				991
Wanpeng Li	1d7e974	2014-10-14 10:22:39 +0800	[diff] [blame]	992	if (sd_flag != SD_BALANCE_WAKE)
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	993	goto out;
				994
				995	rq = cpu_rq(cpu);
				996
				997	rcu_read_lock();
Jason Low	316c1608d	2015-04-28 13:00:20 -0700	[diff] [blame]	998	curr = READ_ONCE(rq->curr); /* unlocked access */
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	999
				1000	/*
				1001	* If we are dealing with a -deadline task, we must
				1002	* decide where to wake it up.
				1003	* If it has a later deadline and the current task
				1004	* on this rq can't move (provided the waking task
				1005	* can!) we prefer to send it somewhere else. On the
				1006	* other hand, if it has a shorter deadline, we
				1007	* try to make it stay here, it might be important.
				1008	*/
				1009	if (unlikely(dl_task(curr)) &&
				1010	(curr->nr_cpus_allowed < 2 \|\|
				1011	!dl_entity_preempt(&p->dl, &curr->dl)) &&
				1012	(p->nr_cpus_allowed > 1)) {
				1013	int target = find_later_rq(p);
				1014
				1015	if (target != -1)
				1016	cpu = target;
				1017	}
				1018	rcu_read_unlock();
				1019
				1020	out:
				1021	return cpu;
				1022	}
				1023
				1024	static void check_preempt_equal_dl(struct rq rq, struct task_struct p)
				1025	{
				1026	/*
				1027	* Current can't be migrated, useless to reschedule,
				1028	* let's hope p can move out.
				1029	*/
				1030	if (rq->curr->nr_cpus_allowed == 1 \|\|
Juri Lelli	6bfd6d7	2013-11-07 14:43:47 +0100	[diff] [blame]	1031	cpudl_find(&rq->rd->cpudl, rq->curr, NULL) == -1)
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1032	return;
				1033
				1034	/*
				1035	* p is migratable, so let's not schedule it and
				1036	* see if it is pushed or pulled somewhere else.
				1037	*/
				1038	if (p->nr_cpus_allowed != 1 &&
Juri Lelli	6bfd6d7	2013-11-07 14:43:47 +0100	[diff] [blame]	1039	cpudl_find(&rq->rd->cpudl, p, NULL) != -1)
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1040	return;
				1041
Kirill Tkhai	8875125	2014-06-29 00:03:57 +0400	[diff] [blame]	1042	resched_curr(rq);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1043	}
				1044
Peter Zijlstra	38033c3	2014-01-23 20:32:21 +0100	[diff] [blame]	1045	static int pull_dl_task(struct rq *this_rq);
				1046
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1047	#endif /* CONFIG_SMP */
				1048
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1049	/*
				1050	* Only called when both the current and waking task are -deadline
				1051	* tasks.
				1052	*/
				1053	static void check_preempt_curr_dl(struct rq rq, struct task_struct p,
				1054	int flags)
				1055	{
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1056	if (dl_entity_preempt(&p->dl, &rq->curr->dl)) {
Kirill Tkhai	8875125	2014-06-29 00:03:57 +0400	[diff] [blame]	1057	resched_curr(rq);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1058	return;
				1059	}
				1060
				1061	#ifdef CONFIG_SMP
				1062	/*
				1063	* In the unlikely case current and p have the same deadline
				1064	* let us try to decide what's the best thing to do...
				1065	*/
Dario Faggioli	332ac17	2013-11-07 14:43:45 +0100	[diff] [blame]	1066	if ((p->dl.deadline == rq->curr->dl.deadline) &&
				1067	!test_tsk_need_resched(rq->curr))
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1068	check_preempt_equal_dl(rq, p);
				1069	#endif /* CONFIG_SMP */
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1070	}
				1071
				1072	#ifdef CONFIG_SCHED_HRTICK
				1073	static void start_hrtick_dl(struct rq rq, struct task_struct p)
				1074	{
xiaofeng.yan	177ef2a	2014-08-26 03:15:41 +0000	[diff] [blame]	1075	hrtick_start(rq, p->dl.runtime);
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1076	}
Wanpeng Li	36ce988	2014-11-11 09:52:26 +0800	[diff] [blame]	1077	#else /* !CONFIG_SCHED_HRTICK */
				1078	static void start_hrtick_dl(struct rq rq, struct task_struct p)
				1079	{
				1080	}
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1081	#endif
				1082
				1083	static struct sched_dl_entity pick_next_dl_entity(struct rq rq,
				1084	struct dl_rq *dl_rq)
				1085	{
				1086	struct rb_node *left = dl_rq->rb_leftmost;
				1087
				1088	if (!left)
				1089	return NULL;
				1090
				1091	return rb_entry(left, struct sched_dl_entity, rb_node);
				1092	}
				1093
Peter Zijlstra	606dba2	2012-02-11 06:05:00 +0100	[diff] [blame]	1094	struct task_struct pick_next_task_dl(struct rq rq, struct task_struct *prev)
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1095	{
				1096	struct sched_dl_entity *dl_se;
				1097	struct task_struct *p;
				1098	struct dl_rq *dl_rq;
				1099
				1100	dl_rq = &rq->dl;
				1101
Kirill Tkhai	a1d9a32	2014-04-10 17:38:36 +0400	[diff] [blame]	1102	if (need_pull_dl_task(rq, prev)) {
Peter Zijlstra	38033c3	2014-01-23 20:32:21 +0100	[diff] [blame]	1103	pull_dl_task(rq);
Kirill Tkhai	a1d9a32	2014-04-10 17:38:36 +0400	[diff] [blame]	1104	/*
				1105	* pull_rt_task() can drop (and re-acquire) rq->lock; this
				1106	* means a stop task can slip in, in which case we need to
				1107	* re-start task selection.
				1108	*/
Kirill Tkhai	da0c1e6	2014-08-20 13:47:32 +0400	[diff] [blame]	1109	if (rq->stop && task_on_rq_queued(rq->stop))
Kirill Tkhai	a1d9a32	2014-04-10 17:38:36 +0400	[diff] [blame]	1110	return RETRY_TASK;
				1111	}
				1112
Kirill Tkhai	734ff2a	2014-03-04 19:25:46 +0400	[diff] [blame]	1113	/*
				1114	* When prev is DL, we may throttle it in put_prev_task().
				1115	* So, we update time before we check for dl_nr_running.
				1116	*/
				1117	if (prev->sched_class == &dl_sched_class)
				1118	update_curr_dl(rq);
Peter Zijlstra	38033c3	2014-01-23 20:32:21 +0100	[diff] [blame]	1119
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1120	if (unlikely(!dl_rq->dl_nr_running))
				1121	return NULL;
				1122
Peter Zijlstra	3f1d2a3	2014-02-12 10:49:30 +0100	[diff] [blame]	1123	put_prev_task(rq, prev);
Peter Zijlstra	606dba2	2012-02-11 06:05:00 +0100	[diff] [blame]	1124
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1125	dl_se = pick_next_dl_entity(rq, dl_rq);
				1126	BUG_ON(!dl_se);
				1127
				1128	p = dl_task_of(dl_se);
				1129	p->se.exec_start = rq_clock_task(rq);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1130
				1131	/* Running task will never be pushed. */
Juri Lelli	7136265	2014-01-14 12:03:51 +0100	[diff] [blame]	1132	dequeue_pushable_dl_task(rq, p);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1133
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1134	if (hrtick_enabled(rq))
				1135	start_hrtick_dl(rq, p);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1136
Peter Zijlstra	dc87734	2014-02-12 15:47:29 +0100	[diff] [blame]	1137	set_post_schedule(rq);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1138
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1139	return p;
				1140	}
				1141
				1142	static void put_prev_task_dl(struct rq rq, struct task_struct p)
				1143	{
				1144	update_curr_dl(rq);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1145
				1146	if (on_dl_rq(&p->dl) && p->nr_cpus_allowed > 1)
				1147	enqueue_pushable_dl_task(rq, p);
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1148	}
				1149
				1150	static void task_tick_dl(struct rq rq, struct task_struct p, int queued)
				1151	{
				1152	update_curr_dl(rq);
				1153
Wanpeng Li	a7bebf4	2014-11-26 08:44:01 +0800	[diff] [blame]	1154	/*
				1155	* Even when we have runtime, update_curr_dl() might have resulted in us
				1156	* not being the leftmost task anymore. In that case NEED_RESCHED will
				1157	* be set and schedule() will start a new hrtick for the next task.
				1158	*/
				1159	if (hrtick_enabled(rq) && queued && p->dl.runtime > 0 &&
				1160	is_leftmost(p, &rq->dl))
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1161	start_hrtick_dl(rq, p);
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1162	}
				1163
				1164	static void task_fork_dl(struct task_struct *p)
				1165	{
				1166	/*
				1167	* SCHED_DEADLINE tasks cannot fork and this is achieved through
				1168	* sched_fork()
				1169	*/
				1170	}
				1171
				1172	static void task_dead_dl(struct task_struct *p)
				1173	{
				1174	struct hrtimer *timer = &p->dl.dl_timer;
Dario Faggioli	332ac17	2013-11-07 14:43:45 +0100	[diff] [blame]	1175	struct dl_bw *dl_b = dl_bw_of(task_cpu(p));
				1176
				1177	/*
				1178	* Since we are TASK_DEAD we won't slip out of the domain!
				1179	*/
				1180	raw_spin_lock_irq(&dl_b->lock);
Peter Zijlstra	40767b0	2015-01-28 15:08:03 +0100	[diff] [blame]	1181	/* XXX we should retain the bw until 0-lag */
Dario Faggioli	332ac17	2013-11-07 14:43:45 +0100	[diff] [blame]	1182	dl_b->total_bw -= p->dl.dl_bw;
				1183	raw_spin_unlock_irq(&dl_b->lock);
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1184
Dario Faggioli	2d3d891	2013-11-07 14:43:44 +0100	[diff] [blame]	1185	hrtimer_cancel(timer);
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1186	}
				1187
				1188	static void set_curr_task_dl(struct rq *rq)
				1189	{
				1190	struct task_struct *p = rq->curr;
				1191
				1192	p->se.exec_start = rq_clock_task(rq);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1193
				1194	/* You can't push away the running task */
				1195	dequeue_pushable_dl_task(rq, p);
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1196	}
				1197
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1198	#ifdef CONFIG_SMP
				1199
				1200	/* Only try algorithms three times */
				1201	#define DL_MAX_TRIES 3
				1202
				1203	static int pick_dl_task(struct rq rq, struct task_struct p, int cpu)
				1204	{
				1205	if (!task_running(rq, p) &&
Kirill Tkhai	1ba93d4	2014-09-12 17:42:20 +0400	[diff] [blame]	1206	cpumask_test_cpu(cpu, tsk_cpus_allowed(p)))
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1207	return 1;
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1208	return 0;
				1209	}
				1210
				1211	/* Returns the second earliest -deadline task, NULL otherwise */
				1212	static struct task_struct pick_next_earliest_dl_task(struct rq rq, int cpu)
				1213	{
				1214	struct rb_node *next_node = rq->dl.rb_leftmost;
				1215	struct sched_dl_entity *dl_se;
				1216	struct task_struct *p = NULL;
				1217
				1218	next_node:
				1219	next_node = rb_next(next_node);
				1220	if (next_node) {
				1221	dl_se = rb_entry(next_node, struct sched_dl_entity, rb_node);
				1222	p = dl_task_of(dl_se);
				1223
				1224	if (pick_dl_task(rq, p, cpu))
				1225	return p;
				1226
				1227	goto next_node;
				1228	}
				1229
				1230	return NULL;
				1231	}
				1232
Wanpeng Li	8b5e770	2015-05-13 14:01:01 +0800	[diff] [blame]	1233	/*
				1234	* Return the earliest pushable rq's task, which is suitable to be executed
				1235	* on the CPU, NULL otherwise:
				1236	*/
				1237	static struct task_struct pick_earliest_pushable_dl_task(struct rq rq, int cpu)
				1238	{
				1239	struct rb_node *next_node = rq->dl.pushable_dl_tasks_leftmost;
				1240	struct task_struct *p = NULL;
				1241
				1242	if (!has_pushable_dl_tasks(rq))
				1243	return NULL;
				1244
				1245	next_node:
				1246	if (next_node) {
				1247	p = rb_entry(next_node, struct task_struct, pushable_dl_tasks);
				1248
				1249	if (pick_dl_task(rq, p, cpu))
				1250	return p;
				1251
				1252	next_node = rb_next(next_node);
				1253	goto next_node;
				1254	}
				1255
				1256	return NULL;
				1257	}
				1258
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1259	static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask_dl);
				1260
				1261	static int find_later_rq(struct task_struct *task)
				1262	{
				1263	struct sched_domain *sd;
Christoph Lameter	4ba2968	2014-08-26 19:12:21 -0500	[diff] [blame]	1264	struct cpumask *later_mask = this_cpu_cpumask_var_ptr(local_cpu_mask_dl);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1265	int this_cpu = smp_processor_id();
				1266	int best_cpu, cpu = task_cpu(task);
				1267
				1268	/* Make sure the mask is initialized first */
				1269	if (unlikely(!later_mask))
				1270	return -1;
				1271
				1272	if (task->nr_cpus_allowed == 1)
				1273	return -1;
				1274
Juri Lelli	91ec677	2014-09-19 10:22:41 +0100	[diff] [blame]	1275	/*
				1276	* We have to consider system topology and task affinity
				1277	* first, then we can look for a suitable cpu.
				1278	*/
Juri Lelli	6bfd6d7	2013-11-07 14:43:47 +0100	[diff] [blame]	1279	best_cpu = cpudl_find(&task_rq(task)->rd->cpudl,
				1280	task, later_mask);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1281	if (best_cpu == -1)
				1282	return -1;
				1283
				1284	/*
				1285	* If we are here, some target has been found,
				1286	* the most suitable of which is cached in best_cpu.
				1287	* This is, among the runqueues where the current tasks
				1288	* have later deadlines than the task's one, the rq
				1289	* with the latest possible one.
				1290	*
				1291	* Now we check how well this matches with task's
				1292	* affinity and system topology.
				1293	*
				1294	* The last cpu where the task run is our first
				1295	* guess, since it is most likely cache-hot there.
				1296	*/
				1297	if (cpumask_test_cpu(cpu, later_mask))
				1298	return cpu;
				1299	/*
				1300	* Check if this_cpu is to be skipped (i.e., it is
				1301	* not in the mask) or not.
				1302	*/
				1303	if (!cpumask_test_cpu(this_cpu, later_mask))
				1304	this_cpu = -1;
				1305
				1306	rcu_read_lock();
				1307	for_each_domain(cpu, sd) {
				1308	if (sd->flags & SD_WAKE_AFFINE) {
				1309
				1310	/*
				1311	* If possible, preempting this_cpu is
				1312	* cheaper than migrating.
				1313	*/
				1314	if (this_cpu != -1 &&
				1315	cpumask_test_cpu(this_cpu, sched_domain_span(sd))) {
				1316	rcu_read_unlock();
				1317	return this_cpu;
				1318	}
				1319
				1320	/*
				1321	* Last chance: if best_cpu is valid and is
				1322	* in the mask, that becomes our choice.
				1323	*/
				1324	if (best_cpu < nr_cpu_ids &&
				1325	cpumask_test_cpu(best_cpu, sched_domain_span(sd))) {
				1326	rcu_read_unlock();
				1327	return best_cpu;
				1328	}
				1329	}
				1330	}
				1331	rcu_read_unlock();
				1332
				1333	/*
				1334	* At this point, all our guesses failed, we just return
				1335	* 'something', and let the caller sort the things out.
				1336	*/
				1337	if (this_cpu != -1)
				1338	return this_cpu;
				1339
				1340	cpu = cpumask_any(later_mask);
				1341	if (cpu < nr_cpu_ids)
				1342	return cpu;
				1343
				1344	return -1;
				1345	}
				1346
				1347	/* Locks the rq it finds */
				1348	static struct rq find_lock_later_rq(struct task_struct task, struct rq *rq)
				1349	{
				1350	struct rq *later_rq = NULL;
				1351	int tries;
				1352	int cpu;
				1353
				1354	for (tries = 0; tries < DL_MAX_TRIES; tries++) {
				1355	cpu = find_later_rq(task);
				1356
				1357	if ((cpu == -1) \|\| (cpu == rq->cpu))
				1358	break;
				1359
				1360	later_rq = cpu_rq(cpu);
				1361
				1362	/* Retry if something changed. */
				1363	if (double_lock_balance(rq, later_rq)) {
				1364	if (unlikely(task_rq(task) != rq \|\|
				1365	!cpumask_test_cpu(later_rq->cpu,
				1366	&task->cpus_allowed) \|\|
Kirill Tkhai	da0c1e6	2014-08-20 13:47:32 +0400	[diff] [blame]	1367	task_running(rq, task) \|\|
				1368	!task_on_rq_queued(task))) {
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1369	double_unlock_balance(rq, later_rq);
				1370	later_rq = NULL;
				1371	break;
				1372	}
				1373	}
				1374
				1375	/*
				1376	* If the rq we found has no -deadline task, or
				1377	* its earliest one has a later deadline than our
				1378	* task, the rq is a good one.
				1379	*/
				1380	if (!later_rq->dl.dl_nr_running \|\|
				1381	dl_time_before(task->dl.deadline,
				1382	later_rq->dl.earliest_dl.curr))
				1383	break;
				1384
				1385	/* Otherwise we try again. */
				1386	double_unlock_balance(rq, later_rq);
				1387	later_rq = NULL;
				1388	}
				1389
				1390	return later_rq;
				1391	}
				1392
				1393	static struct task_struct pick_next_pushable_dl_task(struct rq rq)
				1394	{
				1395	struct task_struct *p;
				1396
				1397	if (!has_pushable_dl_tasks(rq))
				1398	return NULL;
				1399
				1400	p = rb_entry(rq->dl.pushable_dl_tasks_leftmost,
				1401	struct task_struct, pushable_dl_tasks);
				1402
				1403	BUG_ON(rq->cpu != task_cpu(p));
				1404	BUG_ON(task_current(rq, p));
				1405	BUG_ON(p->nr_cpus_allowed <= 1);
				1406
Kirill Tkhai	da0c1e6	2014-08-20 13:47:32 +0400	[diff] [blame]	1407	BUG_ON(!task_on_rq_queued(p));
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1408	BUG_ON(!dl_task(p));
				1409
				1410	return p;
				1411	}
				1412
				1413	/*
				1414	* See if the non running -deadline tasks on this rq
				1415	* can be sent to some other CPU where they can preempt
				1416	* and start executing.
				1417	*/
				1418	static int push_dl_task(struct rq *rq)
				1419	{
				1420	struct task_struct *next_task;
				1421	struct rq *later_rq;
Wanpeng Li	c51b8ab	2014-11-06 15:22:44 +0800	[diff] [blame]	1422	int ret = 0;
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1423
				1424	if (!rq->dl.overloaded)
				1425	return 0;
				1426
				1427	next_task = pick_next_pushable_dl_task(rq);
				1428	if (!next_task)
				1429	return 0;
				1430
				1431	retry:
				1432	if (unlikely(next_task == rq->curr)) {
				1433	WARN_ON(1);
				1434	return 0;
				1435	}
				1436
				1437	/*
				1438	* If next_task preempts rq->curr, and rq->curr
				1439	* can move away, it makes sense to just reschedule
				1440	* without going further in pushing next_task.
				1441	*/
				1442	if (dl_task(rq->curr) &&
				1443	dl_time_before(next_task->dl.deadline, rq->curr->dl.deadline) &&
				1444	rq->curr->nr_cpus_allowed > 1) {
Kirill Tkhai	8875125	2014-06-29 00:03:57 +0400	[diff] [blame]	1445	resched_curr(rq);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1446	return 0;
				1447	}
				1448
				1449	/* We might release rq lock */
				1450	get_task_struct(next_task);
				1451
				1452	/* Will lock the rq it'll find */
				1453	later_rq = find_lock_later_rq(next_task, rq);
				1454	if (!later_rq) {
				1455	struct task_struct *task;
				1456
				1457	/*
				1458	* We must check all this again, since
				1459	* find_lock_later_rq releases rq->lock and it is
				1460	* then possible that next_task has migrated.
				1461	*/
				1462	task = pick_next_pushable_dl_task(rq);
				1463	if (task_cpu(next_task) == rq->cpu && task == next_task) {
				1464	/*
				1465	* The task is still there. We don't try
				1466	* again, some other cpu will pull it when ready.
				1467	*/
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1468	goto out;
				1469	}
				1470
				1471	if (!task)
				1472	/* No more tasks */
				1473	goto out;
				1474
				1475	put_task_struct(next_task);
				1476	next_task = task;
				1477	goto retry;
				1478	}
				1479
				1480	deactivate_task(rq, next_task, 0);
				1481	set_task_cpu(next_task, later_rq->cpu);
				1482	activate_task(later_rq, next_task, 0);
Wanpeng Li	c51b8ab	2014-11-06 15:22:44 +0800	[diff] [blame]	1483	ret = 1;
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1484
Kirill Tkhai	8875125	2014-06-29 00:03:57 +0400	[diff] [blame]	1485	resched_curr(later_rq);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1486
				1487	double_unlock_balance(rq, later_rq);
				1488
				1489	out:
				1490	put_task_struct(next_task);
				1491
Wanpeng Li	c51b8ab	2014-11-06 15:22:44 +0800	[diff] [blame]	1492	return ret;
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1493	}
				1494
				1495	static void push_dl_tasks(struct rq *rq)
				1496	{
				1497	/* Terminates as it moves a -deadline task */
				1498	while (push_dl_task(rq))
				1499	;
				1500	}
				1501
				1502	static int pull_dl_task(struct rq *this_rq)
				1503	{
				1504	int this_cpu = this_rq->cpu, ret = 0, cpu;
				1505	struct task_struct *p;
				1506	struct rq *src_rq;
				1507	u64 dmin = LONG_MAX;
				1508
				1509	if (likely(!dl_overloaded(this_rq)))
				1510	return 0;
				1511
				1512	/*
				1513	* Match the barrier from dl_set_overloaded; this guarantees that if we
				1514	* see overloaded we must also see the dlo_mask bit.
				1515	*/
				1516	smp_rmb();
				1517
				1518	for_each_cpu(cpu, this_rq->rd->dlo_mask) {
				1519	if (this_cpu == cpu)
				1520	continue;
				1521
				1522	src_rq = cpu_rq(cpu);
				1523
				1524	/*
				1525	* It looks racy, abd it is! However, as in sched_rt.c,
				1526	* we are fine with this.
				1527	*/
				1528	if (this_rq->dl.dl_nr_running &&
				1529	dl_time_before(this_rq->dl.earliest_dl.curr,
				1530	src_rq->dl.earliest_dl.next))
				1531	continue;
				1532
				1533	/* Might drop this_rq->lock */
				1534	double_lock_balance(this_rq, src_rq);
				1535
				1536	/*
				1537	* If there are no more pullable tasks on the
				1538	* rq, we're done with it.
				1539	*/
				1540	if (src_rq->dl.dl_nr_running <= 1)
				1541	goto skip;
				1542
Wanpeng Li	8b5e770	2015-05-13 14:01:01 +0800	[diff] [blame]	1543	p = pick_earliest_pushable_dl_task(src_rq, this_cpu);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1544
				1545	/*
				1546	* We found a task to be pulled if:
				1547	* - it preempts our current (if there's one),
				1548	* - it will preempt the last one we pulled (if any).
				1549	*/
				1550	if (p && dl_time_before(p->dl.deadline, dmin) &&
				1551	(!this_rq->dl.dl_nr_running \|\|
				1552	dl_time_before(p->dl.deadline,
				1553	this_rq->dl.earliest_dl.curr))) {
				1554	WARN_ON(p == src_rq->curr);
Kirill Tkhai	da0c1e6	2014-08-20 13:47:32 +0400	[diff] [blame]	1555	WARN_ON(!task_on_rq_queued(p));
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1556
				1557	/*
				1558	* Then we pull iff p has actually an earlier
				1559	* deadline than the current task of its runqueue.
				1560	*/
				1561	if (dl_time_before(p->dl.deadline,
				1562	src_rq->curr->dl.deadline))
				1563	goto skip;
				1564
				1565	ret = 1;
				1566
				1567	deactivate_task(src_rq, p, 0);
				1568	set_task_cpu(p, this_cpu);
				1569	activate_task(this_rq, p, 0);
				1570	dmin = p->dl.deadline;
				1571
				1572	/* Is there any other task even earlier? */
				1573	}
				1574	skip:
				1575	double_unlock_balance(this_rq, src_rq);
				1576	}
				1577
				1578	return ret;
				1579	}
				1580
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1581	static void post_schedule_dl(struct rq *rq)
				1582	{
				1583	push_dl_tasks(rq);
				1584	}
				1585
				1586	/*
				1587	* Since the task is not running and a reschedule is not going to happen
				1588	* anytime soon on its runqueue, we try pushing it away now.
				1589	*/
				1590	static void task_woken_dl(struct rq rq, struct task_struct p)
				1591	{
				1592	if (!task_running(rq, p) &&
				1593	!test_tsk_need_resched(rq->curr) &&
				1594	has_pushable_dl_tasks(rq) &&
				1595	p->nr_cpus_allowed > 1 &&
				1596	dl_task(rq->curr) &&
				1597	(rq->curr->nr_cpus_allowed < 2 \|\|
Wanpeng Li	6b0a563	2014-10-31 06:39:34 +0800	[diff] [blame]	1598	!dl_entity_preempt(&p->dl, &rq->curr->dl))) {
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1599	push_dl_tasks(rq);
				1600	}
				1601	}
				1602
				1603	static void set_cpus_allowed_dl(struct task_struct *p,
				1604	const struct cpumask *new_mask)
				1605	{
				1606	struct rq *rq;
Juri Lelli	7f51412	2014-09-19 10:22:40 +0100	[diff] [blame]	1607	struct root_domain *src_rd;
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1608	int weight;
				1609
				1610	BUG_ON(!dl_task(p));
				1611
Juri Lelli	7f51412	2014-09-19 10:22:40 +0100	[diff] [blame]	1612	rq = task_rq(p);
				1613	src_rd = rq->rd;
				1614	/*
				1615	* Migrating a SCHED_DEADLINE task between exclusive
				1616	* cpusets (different root_domains) entails a bandwidth
				1617	* update. We already made space for us in the destination
				1618	* domain (see cpuset_can_attach()).
				1619	*/
				1620	if (!cpumask_intersects(src_rd->span, new_mask)) {
				1621	struct dl_bw *src_dl_b;
				1622
				1623	src_dl_b = dl_bw_of(cpu_of(rq));
				1624	/*
				1625	* We now free resources of the root_domain we are migrating
				1626	* off. In the worst case, sched_setattr() may temporary fail
				1627	* until we complete the update.
				1628	*/
				1629	raw_spin_lock(&src_dl_b->lock);
				1630	__dl_clear(src_dl_b, p->dl.dl_bw);
				1631	raw_spin_unlock(&src_dl_b->lock);
				1632	}
				1633
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1634	/*
				1635	* Update only if the task is actually running (i.e.,
				1636	* it is on the rq AND it is not throttled).
				1637	*/
				1638	if (!on_dl_rq(&p->dl))
				1639	return;
				1640
				1641	weight = cpumask_weight(new_mask);
				1642
				1643	/*
				1644	* Only update if the process changes its state from whether it
				1645	* can migrate or not.
				1646	*/
				1647	if ((p->nr_cpus_allowed > 1) == (weight > 1))
				1648	return;
				1649
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1650	/*
				1651	* The process used to be able to migrate OR it can now migrate
				1652	*/
				1653	if (weight <= 1) {
				1654	if (!task_current(rq, p))
				1655	dequeue_pushable_dl_task(rq, p);
				1656	BUG_ON(!rq->dl.dl_nr_migratory);
				1657	rq->dl.dl_nr_migratory--;
				1658	} else {
				1659	if (!task_current(rq, p))
				1660	enqueue_pushable_dl_task(rq, p);
				1661	rq->dl.dl_nr_migratory++;
				1662	}
				1663
				1664	update_dl_migration(&rq->dl);
				1665	}
				1666
				1667	/* Assumes rq->lock is held */
				1668	static void rq_online_dl(struct rq *rq)
				1669	{
				1670	if (rq->dl.overloaded)
				1671	dl_set_overload(rq);
Juri Lelli	6bfd6d7	2013-11-07 14:43:47 +0100	[diff] [blame]	1672
Xunlei Pang	16b2694	2015-01-19 04:49:36 +0000	[diff] [blame]	1673	cpudl_set_freecpu(&rq->rd->cpudl, rq->cpu);
Juri Lelli	6bfd6d7	2013-11-07 14:43:47 +0100	[diff] [blame]	1674	if (rq->dl.dl_nr_running > 0)
				1675	cpudl_set(&rq->rd->cpudl, rq->cpu, rq->dl.earliest_dl.curr, 1);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1676	}
				1677
				1678	/* Assumes rq->lock is held */
				1679	static void rq_offline_dl(struct rq *rq)
				1680	{
				1681	if (rq->dl.overloaded)
				1682	dl_clear_overload(rq);
Juri Lelli	6bfd6d7	2013-11-07 14:43:47 +0100	[diff] [blame]	1683
				1684	cpudl_set(&rq->rd->cpudl, rq->cpu, 0, 0);
Xunlei Pang	16b2694	2015-01-19 04:49:36 +0000	[diff] [blame]	1685	cpudl_clear_freecpu(&rq->rd->cpudl, rq->cpu);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1686	}
				1687
Wanpeng Li	a6c0e74	2015-05-13 14:01:02 +0800	[diff] [blame^]	1688	void __init init_sched_dl_class(void)
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1689	{
				1690	unsigned int i;
				1691
				1692	for_each_possible_cpu(i)
				1693	zalloc_cpumask_var_node(&per_cpu(local_cpu_mask_dl, i),
				1694	GFP_KERNEL, cpu_to_node(i));
				1695	}
				1696
				1697	#endif /* CONFIG_SMP */
				1698
Kirill Tkhai	67dfa1b	2014-10-27 17:40:52 +0300	[diff] [blame]	1699	/*
				1700	* Ensure p's dl_timer is cancelled. May drop rq->lock for a while.
				1701	*/
				1702	static void cancel_dl_timer(struct rq rq, struct task_struct p)
				1703	{
				1704	struct hrtimer *dl_timer = &p->dl.dl_timer;
				1705
				1706	/* Nobody will change task's class if pi_lock is held */
				1707	lockdep_assert_held(&p->pi_lock);
				1708
				1709	if (hrtimer_active(dl_timer)) {
				1710	int ret = hrtimer_try_to_cancel(dl_timer);
				1711
				1712	if (unlikely(ret == -1)) {
				1713	/*
				1714	* Note, p may migrate OR new deadline tasks
				1715	* may appear in rq when we are unlocking it.
				1716	* A caller of us must be fine with that.
				1717	*/
				1718	raw_spin_unlock(&rq->lock);
				1719	hrtimer_cancel(dl_timer);
				1720	raw_spin_lock(&rq->lock);
				1721	}
				1722	}
				1723	}
				1724
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1725	static void switched_from_dl(struct rq rq, struct task_struct p)
				1726	{
Peter Zijlstra	40767b0	2015-01-28 15:08:03 +0100	[diff] [blame]	1727	/* XXX we should retain the bw until 0-lag */
Kirill Tkhai	67dfa1b	2014-10-27 17:40:52 +0300	[diff] [blame]	1728	cancel_dl_timer(rq, p);
Juri Lelli	a5e7be3	2014-09-19 10:22:39 +0100	[diff] [blame]	1729	__dl_clear_params(p);
				1730
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1731	/*
				1732	* Since this might be the only -deadline task on the rq,
				1733	* this is the right place to try to pull some other one
				1734	* from an overloaded cpu, if any.
				1735	*/
Wanpeng Li	cd66091	2014-10-31 06:39:35 +0800	[diff] [blame]	1736	if (!task_on_rq_queued(p) \|\| rq->dl.dl_nr_running)
				1737	return;
				1738
				1739	if (pull_dl_task(rq))
				1740	resched_curr(rq);
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1741	}
				1742
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1743	/*
				1744	* When switching to -deadline, we may overload the rq, then
				1745	* we try to push someone off, if possible.
				1746	*/
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1747	static void switched_to_dl(struct rq rq, struct task_struct p)
				1748	{
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1749	int check_resched = 1;
				1750
Kirill Tkhai	da0c1e6	2014-08-20 13:47:32 +0400	[diff] [blame]	1751	if (task_on_rq_queued(p) && rq->curr != p) {
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1752	#ifdef CONFIG_SMP
Wanpeng Li	d9aade7a	2014-10-22 08:36:43 +0800	[diff] [blame]	1753	if (p->nr_cpus_allowed > 1 && rq->dl.overloaded &&
				1754	push_dl_task(rq) && rq != task_rq(p))
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1755	/* Only reschedule if pushing failed */
				1756	check_resched = 0;
				1757	#endif /* CONFIG_SMP */
Kirill Tkhai	f3a7e1a	2014-10-21 20:35:56 +0400	[diff] [blame]	1758	if (check_resched) {
				1759	if (dl_task(rq->curr))
				1760	check_preempt_curr_dl(rq, p, 0);
				1761	else
				1762	resched_curr(rq);
				1763	}
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1764	}
				1765	}
				1766
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1767	/*
				1768	* If the scheduling parameters of a -deadline task changed,
				1769	* a push or pull operation might be needed.
				1770	*/
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1771	static void prio_changed_dl(struct rq rq, struct task_struct p,
				1772	int oldprio)
				1773	{
Kirill Tkhai	da0c1e6	2014-08-20 13:47:32 +0400	[diff] [blame]	1774	if (task_on_rq_queued(p) \|\| rq->curr == p) {
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1775	#ifdef CONFIG_SMP
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1776	/*
				1777	* This might be too much, but unfortunately
				1778	* we don't have the old deadline value, and
				1779	* we can't argue if the task is increasing
				1780	* or lowering its prio, so...
				1781	*/
				1782	if (!rq->dl.overloaded)
				1783	pull_dl_task(rq);
				1784
				1785	/*
				1786	* If we now have a earlier deadline task than p,
				1787	* then reschedule, provided p is still on this
				1788	* runqueue.
				1789	*/
				1790	if (dl_time_before(rq->dl.earliest_dl.curr, p->dl.deadline) &&
				1791	rq->curr == p)
Kirill Tkhai	8875125	2014-06-29 00:03:57 +0400	[diff] [blame]	1792	resched_curr(rq);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1793	#else
				1794	/*
				1795	* Again, we don't know if p has a earlier
				1796	* or later deadline, so let's blindly set a
				1797	* (maybe not needed) rescheduling point.
				1798	*/
Kirill Tkhai	8875125	2014-06-29 00:03:57 +0400	[diff] [blame]	1799	resched_curr(rq);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1800	#endif /* CONFIG_SMP */
				1801	} else
				1802	switched_to_dl(rq, p);
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1803	}
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1804
				1805	const struct sched_class dl_sched_class = {
				1806	.next = &rt_sched_class,
				1807	.enqueue_task = enqueue_task_dl,
				1808	.dequeue_task = dequeue_task_dl,
				1809	.yield_task = yield_task_dl,
				1810
				1811	.check_preempt_curr = check_preempt_curr_dl,
				1812
				1813	.pick_next_task = pick_next_task_dl,
				1814	.put_prev_task = put_prev_task_dl,
				1815
				1816	#ifdef CONFIG_SMP
				1817	.select_task_rq = select_task_rq_dl,
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1818	.set_cpus_allowed = set_cpus_allowed_dl,
				1819	.rq_online = rq_online_dl,
				1820	.rq_offline = rq_offline_dl,
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1821	.post_schedule = post_schedule_dl,
				1822	.task_woken = task_woken_dl,
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1823	#endif
				1824
				1825	.set_curr_task = set_curr_task_dl,
				1826	.task_tick = task_tick_dl,
				1827	.task_fork = task_fork_dl,
				1828	.task_dead = task_dead_dl,
				1829
				1830	.prio_changed = prio_changed_dl,
				1831	.switched_from = switched_from_dl,
				1832	.switched_to = switched_to_dl,
Stanislaw Gruszka	6e99891	2014-11-12 16:58:44 +0100	[diff] [blame]	1833
				1834	.update_curr = update_curr_dl,
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1835	};
Wanpeng Li	acb3213	2014-10-31 06:39:33 +0800	[diff] [blame]	1836
				1837	#ifdef CONFIG_SCHED_DEBUG
				1838	extern void print_dl_rq(struct seq_file m, int cpu, struct dl_rq dl_rq);
				1839
				1840	void print_dl_stats(struct seq_file *m, int cpu)
				1841	{
				1842	print_dl_rq(m, cpu, &cpu_rq(cpu)->dl);
				1843	}
				1844	#endif /* CONFIG_SCHED_DEBUG */