Blame - kernel/sched/deadline.c - SHIFTPHONES/mainline/linux

blob: a027799ae130d3623ff4351f08c3cf456979bfbc [file] [log] [blame]

Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1	/*
				2	* Deadline Scheduling Class (SCHED_DEADLINE)
				3	*
				4	* Earliest Deadline First (EDF) + Constant Bandwidth Server (CBS).
				5	*
				6	* Tasks that periodically executes their instances for less than their
				7	* runtime won't miss any of their deadlines.
				8	* Tasks that are not periodic or sporadic or that tries to execute more
				9	* than their reserved bandwidth will be slowed down (and may potentially
				10	* miss some of their deadlines), and won't affect any other task.
				11	*
				12	* Copyright (C) 2012 Dario Faggioli <raistlin@linux.it>,
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	13	* Juri Lelli <juri.lelli@gmail.com>,
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	14	* Michael Trimarchi <michael@amarulasolutions.com>,
				15	* Fabio Checconi <fchecconi@gmail.com>
				16	*/
				17	#include "sched.h"
				18
Juri Lelli	6bfd6d7	2013-11-07 14:43:47 +0100	[diff] [blame]	19	#include <linux/slab.h>
				20
Dario Faggioli	332ac17	2013-11-07 14:43:45 +0100	[diff] [blame]	21	struct dl_bandwidth def_dl_bandwidth;
				22
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	23	static inline struct task_struct dl_task_of(struct sched_dl_entity dl_se)
				24	{
				25	return container_of(dl_se, struct task_struct, dl);
				26	}
				27
				28	static inline struct rq rq_of_dl_rq(struct dl_rq dl_rq)
				29	{
				30	return container_of(dl_rq, struct rq, dl);
				31	}
				32
				33	static inline struct dl_rq dl_rq_of_se(struct sched_dl_entity dl_se)
				34	{
				35	struct task_struct *p = dl_task_of(dl_se);
				36	struct rq *rq = task_rq(p);
				37
				38	return &rq->dl;
				39	}
				40
				41	static inline int on_dl_rq(struct sched_dl_entity *dl_se)
				42	{
				43	return !RB_EMPTY_NODE(&dl_se->rb_node);
				44	}
				45
				46	static inline int is_leftmost(struct task_struct p, struct dl_rq dl_rq)
				47	{
				48	struct sched_dl_entity *dl_se = &p->dl;
				49
				50	return dl_rq->rb_leftmost == &dl_se->rb_node;
				51	}
				52
Dario Faggioli	332ac17	2013-11-07 14:43:45 +0100	[diff] [blame]	53	void init_dl_bandwidth(struct dl_bandwidth *dl_b, u64 period, u64 runtime)
				54	{
				55	raw_spin_lock_init(&dl_b->dl_runtime_lock);
				56	dl_b->dl_period = period;
				57	dl_b->dl_runtime = runtime;
				58	}
				59
Dario Faggioli	332ac17	2013-11-07 14:43:45 +0100	[diff] [blame]	60	void init_dl_bw(struct dl_bw *dl_b)
				61	{
				62	raw_spin_lock_init(&dl_b->lock);
				63	raw_spin_lock(&def_dl_bandwidth.dl_runtime_lock);
Peter Zijlstra	1724813	2013-12-17 12:44:49 +0100	[diff] [blame]	64	if (global_rt_runtime() == RUNTIME_INF)
Dario Faggioli	332ac17	2013-11-07 14:43:45 +0100	[diff] [blame]	65	dl_b->bw = -1;
				66	else
Peter Zijlstra	1724813	2013-12-17 12:44:49 +0100	[diff] [blame]	67	dl_b->bw = to_ratio(global_rt_period(), global_rt_runtime());
Dario Faggioli	332ac17	2013-11-07 14:43:45 +0100	[diff] [blame]	68	raw_spin_unlock(&def_dl_bandwidth.dl_runtime_lock);
				69	dl_b->total_bw = 0;
				70	}
				71
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	72	void init_dl_rq(struct dl_rq dl_rq, struct rq rq)
				73	{
				74	dl_rq->rb_root = RB_ROOT;
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	75
				76	#ifdef CONFIG_SMP
				77	/* zero means no -deadline tasks */
				78	dl_rq->earliest_dl.curr = dl_rq->earliest_dl.next = 0;
				79
				80	dl_rq->dl_nr_migratory = 0;
				81	dl_rq->overloaded = 0;
				82	dl_rq->pushable_dl_tasks_root = RB_ROOT;
Dario Faggioli	332ac17	2013-11-07 14:43:45 +0100	[diff] [blame]	83	#else
				84	init_dl_bw(&dl_rq->dl_bw);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	85	#endif
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	86	}
				87
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	88	#ifdef CONFIG_SMP
				89
				90	static inline int dl_overloaded(struct rq *rq)
				91	{
				92	return atomic_read(&rq->rd->dlo_count);
				93	}
				94
				95	static inline void dl_set_overload(struct rq *rq)
				96	{
				97	if (!rq->online)
				98	return;
				99
				100	cpumask_set_cpu(rq->cpu, rq->rd->dlo_mask);
				101	/*
				102	* Must be visible before the overload count is
				103	* set (as in sched_rt.c).
				104	*
				105	* Matched by the barrier in pull_dl_task().
				106	*/
				107	smp_wmb();
				108	atomic_inc(&rq->rd->dlo_count);
				109	}
				110
				111	static inline void dl_clear_overload(struct rq *rq)
				112	{
				113	if (!rq->online)
				114	return;
				115
				116	atomic_dec(&rq->rd->dlo_count);
				117	cpumask_clear_cpu(rq->cpu, rq->rd->dlo_mask);
				118	}
				119
				120	static void update_dl_migration(struct dl_rq *dl_rq)
				121	{
Kirill Tkhai	995b9ea	2014-02-18 02:24:13 +0400	[diff] [blame]	122	if (dl_rq->dl_nr_migratory && dl_rq->dl_nr_running > 1) {
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	123	if (!dl_rq->overloaded) {
				124	dl_set_overload(rq_of_dl_rq(dl_rq));
				125	dl_rq->overloaded = 1;
				126	}
				127	} else if (dl_rq->overloaded) {
				128	dl_clear_overload(rq_of_dl_rq(dl_rq));
				129	dl_rq->overloaded = 0;
				130	}
				131	}
				132
				133	static void inc_dl_migration(struct sched_dl_entity dl_se, struct dl_rq dl_rq)
				134	{
				135	struct task_struct *p = dl_task_of(dl_se);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	136
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	137	if (p->nr_cpus_allowed > 1)
				138	dl_rq->dl_nr_migratory++;
				139
				140	update_dl_migration(dl_rq);
				141	}
				142
				143	static void dec_dl_migration(struct sched_dl_entity dl_se, struct dl_rq dl_rq)
				144	{
				145	struct task_struct *p = dl_task_of(dl_se);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	146
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	147	if (p->nr_cpus_allowed > 1)
				148	dl_rq->dl_nr_migratory--;
				149
				150	update_dl_migration(dl_rq);
				151	}
				152
				153	/*
				154	* The list of pushable -deadline task is not a plist, like in
				155	* sched_rt.c, it is an rb-tree with tasks ordered by deadline.
				156	*/
				157	static void enqueue_pushable_dl_task(struct rq rq, struct task_struct p)
				158	{
				159	struct dl_rq *dl_rq = &rq->dl;
				160	struct rb_node **link = &dl_rq->pushable_dl_tasks_root.rb_node;
				161	struct rb_node *parent = NULL;
				162	struct task_struct *entry;
				163	int leftmost = 1;
				164
				165	BUG_ON(!RB_EMPTY_NODE(&p->pushable_dl_tasks));
				166
				167	while (*link) {
				168	parent = *link;
				169	entry = rb_entry(parent, struct task_struct,
				170	pushable_dl_tasks);
				171	if (dl_entity_preempt(&p->dl, &entry->dl))
				172	link = &parent->rb_left;
				173	else {
				174	link = &parent->rb_right;
				175	leftmost = 0;
				176	}
				177	}
				178
				179	if (leftmost)
				180	dl_rq->pushable_dl_tasks_leftmost = &p->pushable_dl_tasks;
				181
				182	rb_link_node(&p->pushable_dl_tasks, parent, link);
				183	rb_insert_color(&p->pushable_dl_tasks, &dl_rq->pushable_dl_tasks_root);
				184	}
				185
				186	static void dequeue_pushable_dl_task(struct rq rq, struct task_struct p)
				187	{
				188	struct dl_rq *dl_rq = &rq->dl;
				189
				190	if (RB_EMPTY_NODE(&p->pushable_dl_tasks))
				191	return;
				192
				193	if (dl_rq->pushable_dl_tasks_leftmost == &p->pushable_dl_tasks) {
				194	struct rb_node *next_node;
				195
				196	next_node = rb_next(&p->pushable_dl_tasks);
				197	dl_rq->pushable_dl_tasks_leftmost = next_node;
				198	}
				199
				200	rb_erase(&p->pushable_dl_tasks, &dl_rq->pushable_dl_tasks_root);
				201	RB_CLEAR_NODE(&p->pushable_dl_tasks);
				202	}
				203
				204	static inline int has_pushable_dl_tasks(struct rq *rq)
				205	{
				206	return !RB_EMPTY_ROOT(&rq->dl.pushable_dl_tasks_root);
				207	}
				208
				209	static int push_dl_task(struct rq *rq);
				210
Peter Zijlstra	dc87734	2014-02-12 15:47:29 +0100	[diff] [blame]	211	static inline bool need_pull_dl_task(struct rq rq, struct task_struct prev)
				212	{
				213	return dl_task(prev);
				214	}
				215
				216	static inline void set_post_schedule(struct rq *rq)
				217	{
				218	rq->post_schedule = has_pushable_dl_tasks(rq);
				219	}
				220
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	221	#else
				222
				223	static inline
				224	void enqueue_pushable_dl_task(struct rq rq, struct task_struct p)
				225	{
				226	}
				227
				228	static inline
				229	void dequeue_pushable_dl_task(struct rq rq, struct task_struct p)
				230	{
				231	}
				232
				233	static inline
				234	void inc_dl_migration(struct sched_dl_entity dl_se, struct dl_rq dl_rq)
				235	{
				236	}
				237
				238	static inline
				239	void dec_dl_migration(struct sched_dl_entity dl_se, struct dl_rq dl_rq)
				240	{
				241	}
				242
Peter Zijlstra	dc87734	2014-02-12 15:47:29 +0100	[diff] [blame]	243	static inline bool need_pull_dl_task(struct rq rq, struct task_struct prev)
				244	{
				245	return false;
				246	}
				247
				248	static inline int pull_dl_task(struct rq *rq)
				249	{
				250	return 0;
				251	}
				252
				253	static inline void set_post_schedule(struct rq *rq)
				254	{
				255	}
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	256	#endif /* CONFIG_SMP */
				257
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	258	static void enqueue_task_dl(struct rq rq, struct task_struct p, int flags);
				259	static void __dequeue_task_dl(struct rq rq, struct task_struct p, int flags);
				260	static void check_preempt_curr_dl(struct rq rq, struct task_struct p,
				261	int flags);
				262
				263	/*
				264	* We are being explicitly informed that a new instance is starting,
				265	* and this means that:
				266	* - the absolute deadline of the entity has to be placed at
				267	* current time + relative deadline;
				268	* - the runtime of the entity has to be set to the maximum value.
				269	*
				270	* The capability of specifying such event is useful whenever a -deadline
				271	* entity wants to (try to!) synchronize its behaviour with the scheduler's
				272	* one, and to (try to!) reconcile itself with its own scheduling
				273	* parameters.
				274	*/
Dario Faggioli	2d3d891	2013-11-07 14:43:44 +0100	[diff] [blame]	275	static inline void setup_new_dl_entity(struct sched_dl_entity *dl_se,
				276	struct sched_dl_entity *pi_se)
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	277	{
				278	struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
				279	struct rq *rq = rq_of_dl_rq(dl_rq);
				280
				281	WARN_ON(!dl_se->dl_new \|\| dl_se->dl_throttled);
				282
				283	/*
				284	* We use the regular wall clock time to set deadlines in the
				285	* future; in fact, we must consider execution overheads (time
				286	* spent on hardirq context, etc.).
				287	*/
Dario Faggioli	2d3d891	2013-11-07 14:43:44 +0100	[diff] [blame]	288	dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;
				289	dl_se->runtime = pi_se->dl_runtime;
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	290	dl_se->dl_new = 0;
				291	}
				292
				293	/*
				294	* Pure Earliest Deadline First (EDF) scheduling does not deal with the
				295	* possibility of a entity lasting more than what it declared, and thus
				296	* exhausting its runtime.
				297	*
				298	* Here we are interested in making runtime overrun possible, but we do
				299	* not want a entity which is misbehaving to affect the scheduling of all
				300	* other entities.
				301	* Therefore, a budgeting strategy called Constant Bandwidth Server (CBS)
				302	* is used, in order to confine each entity within its own bandwidth.
				303	*
				304	* This function deals exactly with that, and ensures that when the runtime
				305	* of a entity is replenished, its deadline is also postponed. That ensures
				306	* the overrunning entity can't interfere with other entity in the system and
				307	* can't make them miss their deadlines. Reasons why this kind of overruns
				308	* could happen are, typically, a entity voluntarily trying to overcome its
xiaofeng.yan	1b09d29	2014-07-07 05:59:04 +0000	[diff] [blame]	309	* runtime, or it just underestimated it during sched_setattr().
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	310	*/
Dario Faggioli	2d3d891	2013-11-07 14:43:44 +0100	[diff] [blame]	311	static void replenish_dl_entity(struct sched_dl_entity *dl_se,
				312	struct sched_dl_entity *pi_se)
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	313	{
				314	struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
				315	struct rq *rq = rq_of_dl_rq(dl_rq);
				316
Dario Faggioli	2d3d891	2013-11-07 14:43:44 +0100	[diff] [blame]	317	BUG_ON(pi_se->dl_runtime <= 0);
				318
				319	/*
				320	* This could be the case for a !-dl task that is boosted.
				321	* Just go with full inherited parameters.
				322	*/
				323	if (dl_se->dl_deadline == 0) {
				324	dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;
				325	dl_se->runtime = pi_se->dl_runtime;
				326	}
				327
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	328	/*
				329	* We keep moving the deadline away until we get some
				330	* available runtime for the entity. This ensures correct
				331	* handling of situations where the runtime overrun is
				332	* arbitrary large.
				333	*/
				334	while (dl_se->runtime <= 0) {
Dario Faggioli	2d3d891	2013-11-07 14:43:44 +0100	[diff] [blame]	335	dl_se->deadline += pi_se->dl_period;
				336	dl_se->runtime += pi_se->dl_runtime;
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	337	}
				338
				339	/*
				340	* At this point, the deadline really should be "in
				341	* the future" with respect to rq->clock. If it's
				342	* not, we are, for some reason, lagging too much!
				343	* Anyway, after having warn userspace abut that,
				344	* we still try to keep the things running by
				345	* resetting the deadline and the budget of the
				346	* entity.
				347	*/
				348	if (dl_time_before(dl_se->deadline, rq_clock(rq))) {
John Stultz	c224815	2014-06-04 16:11:41 -0700	[diff] [blame]	349	printk_deferred_once("sched: DL replenish lagged to much\n");
Dario Faggioli	2d3d891	2013-11-07 14:43:44 +0100	[diff] [blame]	350	dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;
				351	dl_se->runtime = pi_se->dl_runtime;
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	352	}
Peter Zijlstra	1019a35	2014-11-26 08:44:03 +0800	[diff] [blame^]	353
				354	if (dl_se->dl_yielded)
				355	dl_se->dl_yielded = 0;
				356	if (dl_se->dl_throttled)
				357	dl_se->dl_throttled = 0;
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	358	}
				359
				360	/*
				361	* Here we check if --at time t-- an entity (which is probably being
				362	* [re]activated or, in general, enqueued) can use its remaining runtime
				363	* and its current deadline _without_ exceeding the bandwidth it is
				364	* assigned (function returns true if it can't). We are in fact applying
				365	* one of the CBS rules: when a task wakes up, if the residual runtime
				366	* over residual deadline fits within the allocated bandwidth, then we
				367	* can keep the current (absolute) deadline and residual budget without
				368	* disrupting the schedulability of the system. Otherwise, we should
				369	* refill the runtime and set the deadline a period in the future,
				370	* because keeping the current (absolute) deadline of the task would
Dario Faggioli	712e5e3	2014-01-27 12:20:15 +0100	[diff] [blame]	371	* result in breaking guarantees promised to other tasks (refer to
				372	* Documentation/scheduler/sched-deadline.txt for more informations).
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	373	*
				374	* This function returns true if:
				375	*
Harald Gustafsson	755378a	2013-11-07 14:43:40 +0100	[diff] [blame]	376	* runtime / (deadline - t) > dl_runtime / dl_period ,
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	377	*
				378	* IOW we can't recycle current parameters.
Harald Gustafsson	755378a	2013-11-07 14:43:40 +0100	[diff] [blame]	379	*
				380	* Notice that the bandwidth check is done against the period. For
				381	* task with deadline equal to period this is the same of using
				382	* dl_deadline instead of dl_period in the equation above.
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	383	*/
Dario Faggioli	2d3d891	2013-11-07 14:43:44 +0100	[diff] [blame]	384	static bool dl_entity_overflow(struct sched_dl_entity *dl_se,
				385	struct sched_dl_entity *pi_se, u64 t)
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	386	{
				387	u64 left, right;
				388
				389	/*
				390	* left and right are the two sides of the equation above,
				391	* after a bit of shuffling to use multiplications instead
				392	* of divisions.
				393	*
				394	* Note that none of the time values involved in the two
				395	* multiplications are absolute: dl_deadline and dl_runtime
				396	* are the relative deadline and the maximum runtime of each
				397	* instance, runtime is the runtime left for the last instance
				398	* and (deadline - t), since t is rq->clock, is the time left
				399	* to the (absolute) deadline. Even if overflowing the u64 type
				400	* is very unlikely to occur in both cases, here we scale down
				401	* as we want to avoid that risk at all. Scaling down by 10
				402	* means that we reduce granularity to 1us. We are fine with it,
				403	* since this is only a true/false check and, anyway, thinking
				404	* of anything below microseconds resolution is actually fiction
				405	* (but still we want to give the user that illusion >;).
				406	*/
Dario Faggioli	332ac17	2013-11-07 14:43:45 +0100	[diff] [blame]	407	left = (pi_se->dl_period >> DL_SCALE) * (dl_se->runtime >> DL_SCALE);
				408	right = ((dl_se->deadline - t) >> DL_SCALE) *
				409	(pi_se->dl_runtime >> DL_SCALE);
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	410
				411	return dl_time_before(right, left);
				412	}
				413
				414	/*
				415	* When a -deadline entity is queued back on the runqueue, its runtime and
				416	* deadline might need updating.
				417	*
				418	* The policy here is that we update the deadline of the entity only if:
				419	* - the current deadline is in the past,
				420	* - using the remaining runtime with the current deadline would make
				421	* the entity exceed its bandwidth.
				422	*/
Dario Faggioli	2d3d891	2013-11-07 14:43:44 +0100	[diff] [blame]	423	static void update_dl_entity(struct sched_dl_entity *dl_se,
				424	struct sched_dl_entity *pi_se)
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	425	{
				426	struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
				427	struct rq *rq = rq_of_dl_rq(dl_rq);
				428
				429	/*
				430	* The arrival of a new instance needs special treatment, i.e.,
				431	* the actual scheduling parameters have to be "renewed".
				432	*/
				433	if (dl_se->dl_new) {
Dario Faggioli	2d3d891	2013-11-07 14:43:44 +0100	[diff] [blame]	434	setup_new_dl_entity(dl_se, pi_se);
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	435	return;
				436	}
				437
				438	if (dl_time_before(dl_se->deadline, rq_clock(rq)) \|\|
Dario Faggioli	2d3d891	2013-11-07 14:43:44 +0100	[diff] [blame]	439	dl_entity_overflow(dl_se, pi_se, rq_clock(rq))) {
				440	dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;
				441	dl_se->runtime = pi_se->dl_runtime;
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	442	}
				443	}
				444
				445	/*
				446	* If the entity depleted all its runtime, and if we want it to sleep
				447	* while waiting for some new execution time to become available, we
				448	* set the bandwidth enforcement timer to the replenishment instant
				449	* and try to activate it.
				450	*
				451	* Notice that it is important for the caller to know if the timer
				452	* actually started or not (i.e., the replenishment instant is in
				453	* the future or in the past).
				454	*/
Dario Faggioli	2d3d891	2013-11-07 14:43:44 +0100	[diff] [blame]	455	static int start_dl_timer(struct sched_dl_entity *dl_se, bool boosted)
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	456	{
				457	struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
				458	struct rq *rq = rq_of_dl_rq(dl_rq);
				459	ktime_t now, act;
				460	ktime_t soft, hard;
				461	unsigned long range;
				462	s64 delta;
				463
Dario Faggioli	2d3d891	2013-11-07 14:43:44 +0100	[diff] [blame]	464	if (boosted)
				465	return 0;
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	466	/*
				467	* We want the timer to fire at the deadline, but considering
				468	* that it is actually coming from rq->clock and not from
				469	* hrtimer's time base reading.
				470	*/
				471	act = ns_to_ktime(dl_se->deadline);
				472	now = hrtimer_cb_get_time(&dl_se->dl_timer);
				473	delta = ktime_to_ns(now) - rq_clock(rq);
				474	act = ktime_add_ns(act, delta);
				475
				476	/*
				477	* If the expiry time already passed, e.g., because the value
				478	* chosen as the deadline is too small, don't even try to
				479	* start the timer in the past!
				480	*/
				481	if (ktime_us_delta(act, now) < 0)
				482	return 0;
				483
				484	hrtimer_set_expires(&dl_se->dl_timer, act);
				485
				486	soft = hrtimer_get_softexpires(&dl_se->dl_timer);
				487	hard = hrtimer_get_expires(&dl_se->dl_timer);
				488	range = ktime_to_ns(ktime_sub(hard, soft));
				489	__hrtimer_start_range_ns(&dl_se->dl_timer, soft,
				490	range, HRTIMER_MODE_ABS, 0);
				491
				492	return hrtimer_active(&dl_se->dl_timer);
				493	}
				494
				495	/*
				496	* This is the bandwidth enforcement timer callback. If here, we know
				497	* a task is not on its dl_rq, since the fact that the timer was running
				498	* means the task is throttled and needs a runtime replenishment.
				499	*
				500	* However, what we actually do depends on the fact the task is active,
				501	* (it is on its rq) or has been removed from there by a call to
				502	* dequeue_task_dl(). In the former case we must issue the runtime
				503	* replenishment and add the task back to the dl_rq; in the latter, we just
				504	* do nothing but clearing dl_throttled, so that runtime and deadline
				505	* updating (and the queueing back to dl_rq) will be done by the
				506	* next call to enqueue_task_dl().
				507	*/
				508	static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
				509	{
				510	struct sched_dl_entity *dl_se = container_of(timer,
				511	struct sched_dl_entity,
				512	dl_timer);
				513	struct task_struct *p = dl_task_of(dl_se);
Kirill Tkhai	0f397f2	2014-05-20 13:33:42 +0400	[diff] [blame]	514	struct rq *rq;
				515	again:
				516	rq = task_rq(p);
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	517	raw_spin_lock(&rq->lock);
				518
Kirill Tkhai	0f397f2	2014-05-20 13:33:42 +0400	[diff] [blame]	519	if (rq != task_rq(p)) {
				520	/* Task was moved, retrying. */
				521	raw_spin_unlock(&rq->lock);
				522	goto again;
				523	}
				524
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	525	/*
Juri Lelli	aee38ea	2014-10-24 10:16:38 +0100	[diff] [blame]	526	* We need to take care of several possible races here:
				527	*
				528	* - the task might have changed its scheduling policy
				529	* to something different than SCHED_DEADLINE
				530	* - the task might have changed its reservation parameters
				531	* (through sched_setattr())
				532	* - the task might have been boosted by someone else and
				533	* might be in the boosting/deboosting path
				534	*
				535	* In all this cases we bail out, as the task is already
				536	* in the runqueue or is going to be enqueued back anyway.
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	537	*/
Juri Lelli	aee38ea	2014-10-24 10:16:38 +0100	[diff] [blame]	538	if (!dl_task(p) \|\| dl_se->dl_new \|\|
				539	dl_se->dl_boosted \|\| !dl_se->dl_throttled)
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	540	goto unlock;
				541
				542	sched_clock_tick();
				543	update_rq_clock(rq);
Peter Zijlstra	1019a35	2014-11-26 08:44:03 +0800	[diff] [blame^]	544	enqueue_task_dl(rq, p, ENQUEUE_REPLENISH);
				545	if (dl_task(rq->curr))
				546	check_preempt_curr_dl(rq, p, 0);
				547	else
				548	resched_curr(rq);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	549	#ifdef CONFIG_SMP
Peter Zijlstra	1019a35	2014-11-26 08:44:03 +0800	[diff] [blame^]	550	/*
				551	* Queueing this task back might have overloaded rq,
				552	* check if we need to kick someone away.
				553	*/
				554	if (has_pushable_dl_tasks(rq))
				555	push_dl_task(rq);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	556	#endif
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	557	unlock:
				558	raw_spin_unlock(&rq->lock);
				559
				560	return HRTIMER_NORESTART;
				561	}
				562
				563	void init_dl_task_timer(struct sched_dl_entity *dl_se)
				564	{
				565	struct hrtimer *timer = &dl_se->dl_timer;
				566
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	567	hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
				568	timer->function = dl_task_timer;
				569	}
				570
				571	static
				572	int dl_runtime_exceeded(struct rq rq, struct sched_dl_entity dl_se)
				573	{
Luca Abeni	269ad80	2014-12-17 11:50:32 +0100	[diff] [blame]	574	return (dl_se->runtime <= 0);
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	575	}
				576
Juri Lelli	faa5993	2014-02-21 11:37:15 +0100	[diff] [blame]	577	extern bool sched_rt_bandwidth_account(struct rt_rq *rt_rq);
				578
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	579	/*
				580	* Update the current task's runtime statistics (provided it is still
				581	* a -deadline task and has not been removed from the dl_rq).
				582	*/
				583	static void update_curr_dl(struct rq *rq)
				584	{
				585	struct task_struct *curr = rq->curr;
				586	struct sched_dl_entity *dl_se = &curr->dl;
				587	u64 delta_exec;
				588
				589	if (!dl_task(curr) \|\| !on_dl_rq(dl_se))
				590	return;
				591
				592	/*
				593	* Consumed budget is computed considering the time as
				594	* observed by schedulable tasks (excluding time spent
				595	* in hardirq context, etc.). Deadlines are instead
				596	* computed using hard walltime. This seems to be the more
				597	* natural solution, but the full ramifications of this
				598	* approach need further study.
				599	*/
				600	delta_exec = rq_clock_task(rq) - curr->se.exec_start;
Kirill Tkhai	734ff2a	2014-03-04 19:25:46 +0400	[diff] [blame]	601	if (unlikely((s64)delta_exec <= 0))
				602	return;
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	603
				604	schedstat_set(curr->se.statistics.exec_max,
				605	max(curr->se.statistics.exec_max, delta_exec));
				606
				607	curr->se.sum_exec_runtime += delta_exec;
				608	account_group_exec_runtime(curr, delta_exec);
				609
				610	curr->se.exec_start = rq_clock_task(rq);
				611	cpuacct_charge(curr, delta_exec);
				612
Dario Faggioli	239be4a	2013-11-07 14:43:39 +0100	[diff] [blame]	613	sched_rt_avg_update(rq, delta_exec);
				614
Wanpeng Li	8049688	2014-10-31 06:39:32 +0800	[diff] [blame]	615	dl_se->runtime -= dl_se->dl_yielded ? 0 : delta_exec;
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	616	if (dl_runtime_exceeded(rq, dl_se)) {
Peter Zijlstra	1019a35	2014-11-26 08:44:03 +0800	[diff] [blame^]	617	dl_se->dl_throttled = 1;
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	618	__dequeue_task_dl(rq, curr, 0);
Peter Zijlstra	1019a35	2014-11-26 08:44:03 +0800	[diff] [blame^]	619	if (unlikely(!start_dl_timer(dl_se, curr->dl.dl_boosted)))
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	620	enqueue_task_dl(rq, curr, ENQUEUE_REPLENISH);
				621
				622	if (!is_leftmost(curr, &rq->dl))
Kirill Tkhai	8875125	2014-06-29 00:03:57 +0400	[diff] [blame]	623	resched_curr(rq);
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	624	}
Peter Zijlstra	1724813	2013-12-17 12:44:49 +0100	[diff] [blame]	625
				626	/*
				627	* Because -- for now -- we share the rt bandwidth, we need to
				628	* account our runtime there too, otherwise actual rt tasks
				629	* would be able to exceed the shared quota.
				630	*
				631	* Account to the root rt group for now.
				632	*
				633	* The solution we're working towards is having the RT groups scheduled
				634	* using deadline servers -- however there's a few nasties to figure
				635	* out before that can happen.
				636	*/
				637	if (rt_bandwidth_enabled()) {
				638	struct rt_rq *rt_rq = &rq->rt;
				639
				640	raw_spin_lock(&rt_rq->rt_runtime_lock);
Peter Zijlstra	1724813	2013-12-17 12:44:49 +0100	[diff] [blame]	641	/*
				642	* We'll let actual RT tasks worry about the overflow here, we
Juri Lelli	faa5993	2014-02-21 11:37:15 +0100	[diff] [blame]	643	* have our own CBS to keep us inline; only account when RT
				644	* bandwidth is relevant.
Peter Zijlstra	1724813	2013-12-17 12:44:49 +0100	[diff] [blame]	645	*/
Juri Lelli	faa5993	2014-02-21 11:37:15 +0100	[diff] [blame]	646	if (sched_rt_bandwidth_account(rt_rq))
				647	rt_rq->rt_time += delta_exec;
Peter Zijlstra	1724813	2013-12-17 12:44:49 +0100	[diff] [blame]	648	raw_spin_unlock(&rt_rq->rt_runtime_lock);
				649	}
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	650	}
				651
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	652	#ifdef CONFIG_SMP
				653
				654	static struct task_struct pick_next_earliest_dl_task(struct rq rq, int cpu);
				655
				656	static inline u64 next_deadline(struct rq *rq)
				657	{
				658	struct task_struct *next = pick_next_earliest_dl_task(rq, rq->cpu);
				659
				660	if (next && dl_prio(next->prio))
				661	return next->dl.deadline;
				662	else
				663	return 0;
				664	}
				665
				666	static void inc_dl_deadline(struct dl_rq *dl_rq, u64 deadline)
				667	{
				668	struct rq *rq = rq_of_dl_rq(dl_rq);
				669
				670	if (dl_rq->earliest_dl.curr == 0 \|\|
				671	dl_time_before(deadline, dl_rq->earliest_dl.curr)) {
				672	/*
				673	* If the dl_rq had no -deadline tasks, or if the new task
				674	* has shorter deadline than the current one on dl_rq, we
				675	* know that the previous earliest becomes our next earliest,
				676	* as the new task becomes the earliest itself.
				677	*/
				678	dl_rq->earliest_dl.next = dl_rq->earliest_dl.curr;
				679	dl_rq->earliest_dl.curr = deadline;
Juri Lelli	6bfd6d7	2013-11-07 14:43:47 +0100	[diff] [blame]	680	cpudl_set(&rq->rd->cpudl, rq->cpu, deadline, 1);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	681	} else if (dl_rq->earliest_dl.next == 0 \|\|
				682	dl_time_before(deadline, dl_rq->earliest_dl.next)) {
				683	/*
				684	* On the other hand, if the new -deadline task has a
				685	* a later deadline than the earliest one on dl_rq, but
				686	* it is earlier than the next (if any), we must
				687	* recompute the next-earliest.
				688	*/
				689	dl_rq->earliest_dl.next = next_deadline(rq);
				690	}
				691	}
				692
				693	static void dec_dl_deadline(struct dl_rq *dl_rq, u64 deadline)
				694	{
				695	struct rq *rq = rq_of_dl_rq(dl_rq);
				696
				697	/*
				698	* Since we may have removed our earliest (and/or next earliest)
				699	* task we must recompute them.
				700	*/
				701	if (!dl_rq->dl_nr_running) {
				702	dl_rq->earliest_dl.curr = 0;
				703	dl_rq->earliest_dl.next = 0;
Juri Lelli	6bfd6d7	2013-11-07 14:43:47 +0100	[diff] [blame]	704	cpudl_set(&rq->rd->cpudl, rq->cpu, 0, 0);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	705	} else {
				706	struct rb_node *leftmost = dl_rq->rb_leftmost;
				707	struct sched_dl_entity *entry;
				708
				709	entry = rb_entry(leftmost, struct sched_dl_entity, rb_node);
				710	dl_rq->earliest_dl.curr = entry->deadline;
				711	dl_rq->earliest_dl.next = next_deadline(rq);
Juri Lelli	6bfd6d7	2013-11-07 14:43:47 +0100	[diff] [blame]	712	cpudl_set(&rq->rd->cpudl, rq->cpu, entry->deadline, 1);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	713	}
				714	}
				715
				716	#else
				717
				718	static inline void inc_dl_deadline(struct dl_rq *dl_rq, u64 deadline) {}
				719	static inline void dec_dl_deadline(struct dl_rq *dl_rq, u64 deadline) {}
				720
				721	#endif /* CONFIG_SMP */
				722
				723	static inline
				724	void inc_dl_tasks(struct sched_dl_entity dl_se, struct dl_rq dl_rq)
				725	{
				726	int prio = dl_task_of(dl_se)->prio;
				727	u64 deadline = dl_se->deadline;
				728
				729	WARN_ON(!dl_prio(prio));
				730	dl_rq->dl_nr_running++;
Kirill Tkhai	7246544	2014-05-09 03:00:14 +0400	[diff] [blame]	731	add_nr_running(rq_of_dl_rq(dl_rq), 1);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	732
				733	inc_dl_deadline(dl_rq, deadline);
				734	inc_dl_migration(dl_se, dl_rq);
				735	}
				736
				737	static inline
				738	void dec_dl_tasks(struct sched_dl_entity dl_se, struct dl_rq dl_rq)
				739	{
				740	int prio = dl_task_of(dl_se)->prio;
				741
				742	WARN_ON(!dl_prio(prio));
				743	WARN_ON(!dl_rq->dl_nr_running);
				744	dl_rq->dl_nr_running--;
Kirill Tkhai	7246544	2014-05-09 03:00:14 +0400	[diff] [blame]	745	sub_nr_running(rq_of_dl_rq(dl_rq), 1);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	746
				747	dec_dl_deadline(dl_rq, dl_se->deadline);
				748	dec_dl_migration(dl_se, dl_rq);
				749	}
				750
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	751	static void __enqueue_dl_entity(struct sched_dl_entity *dl_se)
				752	{
				753	struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
				754	struct rb_node **link = &dl_rq->rb_root.rb_node;
				755	struct rb_node *parent = NULL;
				756	struct sched_dl_entity *entry;
				757	int leftmost = 1;
				758
				759	BUG_ON(!RB_EMPTY_NODE(&dl_se->rb_node));
				760
				761	while (*link) {
				762	parent = *link;
				763	entry = rb_entry(parent, struct sched_dl_entity, rb_node);
				764	if (dl_time_before(dl_se->deadline, entry->deadline))
				765	link = &parent->rb_left;
				766	else {
				767	link = &parent->rb_right;
				768	leftmost = 0;
				769	}
				770	}
				771
				772	if (leftmost)
				773	dl_rq->rb_leftmost = &dl_se->rb_node;
				774
				775	rb_link_node(&dl_se->rb_node, parent, link);
				776	rb_insert_color(&dl_se->rb_node, &dl_rq->rb_root);
				777
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	778	inc_dl_tasks(dl_se, dl_rq);
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	779	}
				780
				781	static void __dequeue_dl_entity(struct sched_dl_entity *dl_se)
				782	{
				783	struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
				784
				785	if (RB_EMPTY_NODE(&dl_se->rb_node))
				786	return;
				787
				788	if (dl_rq->rb_leftmost == &dl_se->rb_node) {
				789	struct rb_node *next_node;
				790
				791	next_node = rb_next(&dl_se->rb_node);
				792	dl_rq->rb_leftmost = next_node;
				793	}
				794
				795	rb_erase(&dl_se->rb_node, &dl_rq->rb_root);
				796	RB_CLEAR_NODE(&dl_se->rb_node);
				797
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	798	dec_dl_tasks(dl_se, dl_rq);
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	799	}
				800
				801	static void
Dario Faggioli	2d3d891	2013-11-07 14:43:44 +0100	[diff] [blame]	802	enqueue_dl_entity(struct sched_dl_entity *dl_se,
				803	struct sched_dl_entity *pi_se, int flags)
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	804	{
				805	BUG_ON(on_dl_rq(dl_se));
				806
				807	/*
				808	* If this is a wakeup or a new instance, the scheduling
				809	* parameters of the task might need updating. Otherwise,
				810	* we want a replenishment of its runtime.
				811	*/
Luca Abeni	6a503c3	2014-12-17 11:50:31 +0100	[diff] [blame]	812	if (dl_se->dl_new \|\| flags & ENQUEUE_WAKEUP)
Dario Faggioli	2d3d891	2013-11-07 14:43:44 +0100	[diff] [blame]	813	update_dl_entity(dl_se, pi_se);
Luca Abeni	6a503c3	2014-12-17 11:50:31 +0100	[diff] [blame]	814	else if (flags & ENQUEUE_REPLENISH)
				815	replenish_dl_entity(dl_se, pi_se);
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	816
				817	__enqueue_dl_entity(dl_se);
				818	}
				819
				820	static void dequeue_dl_entity(struct sched_dl_entity *dl_se)
				821	{
				822	__dequeue_dl_entity(dl_se);
				823	}
				824
				825	static void enqueue_task_dl(struct rq rq, struct task_struct p, int flags)
				826	{
Dario Faggioli	2d3d891	2013-11-07 14:43:44 +0100	[diff] [blame]	827	struct task_struct *pi_task = rt_mutex_get_top_task(p);
				828	struct sched_dl_entity *pi_se = &p->dl;
				829
				830	/*
				831	* Use the scheduling parameters of the top pi-waiter
				832	* task if we have one and its (relative) deadline is
				833	* smaller than our one... OTW we keep our runtime and
				834	* deadline.
				835	*/
Juri Lelli	64be6f1	2014-10-24 10:16:37 +0100	[diff] [blame]	836	if (pi_task && p->dl.dl_boosted && dl_prio(pi_task->normal_prio)) {
Dario Faggioli	2d3d891	2013-11-07 14:43:44 +0100	[diff] [blame]	837	pi_se = &pi_task->dl;
Juri Lelli	64be6f1	2014-10-24 10:16:37 +0100	[diff] [blame]	838	} else if (!dl_prio(p->normal_prio)) {
				839	/*
				840	* Special case in which we have a !SCHED_DEADLINE task
				841	* that is going to be deboosted, but exceedes its
				842	* runtime while doing so. No point in replenishing
				843	* it, as it's going to return back to its original
				844	* scheduling class after this.
				845	*/
				846	BUG_ON(!p->dl.dl_boosted \|\| flags != ENQUEUE_REPLENISH);
				847	return;
				848	}
Dario Faggioli	2d3d891	2013-11-07 14:43:44 +0100	[diff] [blame]	849
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	850	/*
				851	* If p is throttled, we do nothing. In fact, if it exhausted
				852	* its budget it needs a replenishment and, since it now is on
				853	* its rq, the bandwidth timer callback (which clearly has not
				854	* run yet) will take care of this.
				855	*/
Peter Zijlstra	1019a35	2014-11-26 08:44:03 +0800	[diff] [blame^]	856	if (p->dl.dl_throttled && !(flags & ENQUEUE_REPLENISH))
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	857	return;
				858
Dario Faggioli	2d3d891	2013-11-07 14:43:44 +0100	[diff] [blame]	859	enqueue_dl_entity(&p->dl, pi_se, flags);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	860
				861	if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
				862	enqueue_pushable_dl_task(rq, p);
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	863	}
				864
				865	static void __dequeue_task_dl(struct rq rq, struct task_struct p, int flags)
				866	{
				867	dequeue_dl_entity(&p->dl);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	868	dequeue_pushable_dl_task(rq, p);
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	869	}
				870
				871	static void dequeue_task_dl(struct rq rq, struct task_struct p, int flags)
				872	{
				873	update_curr_dl(rq);
				874	__dequeue_task_dl(rq, p, flags);
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	875	}
				876
				877	/*
				878	* Yield task semantic for -deadline tasks is:
				879	*
				880	* get off from the CPU until our next instance, with
				881	* a new runtime. This is of little use now, since we
				882	* don't have a bandwidth reclaiming mechanism. Anyway,
				883	* bandwidth reclaiming is planned for the future, and
				884	* yield_task_dl will indicate that some spare budget
				885	* is available for other task instances to use it.
				886	*/
				887	static void yield_task_dl(struct rq *rq)
				888	{
				889	struct task_struct *p = rq->curr;
				890
				891	/*
				892	* We make the task go to sleep until its current deadline by
				893	* forcing its runtime to zero. This way, update_curr_dl() stops
				894	* it and the bandwidth timer will wake it up and will give it
Juri Lelli	5bfd126	2014-04-15 13:49:04 +0200	[diff] [blame]	895	* new scheduling parameters (thanks to dl_yielded=1).
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	896	*/
				897	if (p->dl.runtime > 0) {
Juri Lelli	5bfd126	2014-04-15 13:49:04 +0200	[diff] [blame]	898	rq->curr->dl.dl_yielded = 1;
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	899	p->dl.runtime = 0;
				900	}
				901	update_curr_dl(rq);
				902	}
				903
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	904	#ifdef CONFIG_SMP
				905
				906	static int find_later_rq(struct task_struct *task);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	907
				908	static int
				909	select_task_rq_dl(struct task_struct *p, int cpu, int sd_flag, int flags)
				910	{
				911	struct task_struct *curr;
				912	struct rq *rq;
				913
Wanpeng Li	1d7e974	2014-10-14 10:22:39 +0800	[diff] [blame]	914	if (sd_flag != SD_BALANCE_WAKE)
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	915	goto out;
				916
				917	rq = cpu_rq(cpu);
				918
				919	rcu_read_lock();
				920	curr = ACCESS_ONCE(rq->curr); /* unlocked access */
				921
				922	/*
				923	* If we are dealing with a -deadline task, we must
				924	* decide where to wake it up.
				925	* If it has a later deadline and the current task
				926	* on this rq can't move (provided the waking task
				927	* can!) we prefer to send it somewhere else. On the
				928	* other hand, if it has a shorter deadline, we
				929	* try to make it stay here, it might be important.
				930	*/
				931	if (unlikely(dl_task(curr)) &&
				932	(curr->nr_cpus_allowed < 2 \|\|
				933	!dl_entity_preempt(&p->dl, &curr->dl)) &&
				934	(p->nr_cpus_allowed > 1)) {
				935	int target = find_later_rq(p);
				936
				937	if (target != -1)
				938	cpu = target;
				939	}
				940	rcu_read_unlock();
				941
				942	out:
				943	return cpu;
				944	}
				945
				946	static void check_preempt_equal_dl(struct rq rq, struct task_struct p)
				947	{
				948	/*
				949	* Current can't be migrated, useless to reschedule,
				950	* let's hope p can move out.
				951	*/
				952	if (rq->curr->nr_cpus_allowed == 1 \|\|
Juri Lelli	6bfd6d7	2013-11-07 14:43:47 +0100	[diff] [blame]	953	cpudl_find(&rq->rd->cpudl, rq->curr, NULL) == -1)
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	954	return;
				955
				956	/*
				957	* p is migratable, so let's not schedule it and
				958	* see if it is pushed or pulled somewhere else.
				959	*/
				960	if (p->nr_cpus_allowed != 1 &&
Juri Lelli	6bfd6d7	2013-11-07 14:43:47 +0100	[diff] [blame]	961	cpudl_find(&rq->rd->cpudl, p, NULL) != -1)
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	962	return;
				963
Kirill Tkhai	8875125	2014-06-29 00:03:57 +0400	[diff] [blame]	964	resched_curr(rq);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	965	}
				966
Peter Zijlstra	38033c3	2014-01-23 20:32:21 +0100	[diff] [blame]	967	static int pull_dl_task(struct rq *this_rq);
				968
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	969	#endif /* CONFIG_SMP */
				970
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	971	/*
				972	* Only called when both the current and waking task are -deadline
				973	* tasks.
				974	*/
				975	static void check_preempt_curr_dl(struct rq rq, struct task_struct p,
				976	int flags)
				977	{
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	978	if (dl_entity_preempt(&p->dl, &rq->curr->dl)) {
Kirill Tkhai	8875125	2014-06-29 00:03:57 +0400	[diff] [blame]	979	resched_curr(rq);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	980	return;
				981	}
				982
				983	#ifdef CONFIG_SMP
				984	/*
				985	* In the unlikely case current and p have the same deadline
				986	* let us try to decide what's the best thing to do...
				987	*/
Dario Faggioli	332ac17	2013-11-07 14:43:45 +0100	[diff] [blame]	988	if ((p->dl.deadline == rq->curr->dl.deadline) &&
				989	!test_tsk_need_resched(rq->curr))
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	990	check_preempt_equal_dl(rq, p);
				991	#endif /* CONFIG_SMP */
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	992	}
				993
				994	#ifdef CONFIG_SCHED_HRTICK
				995	static void start_hrtick_dl(struct rq rq, struct task_struct p)
				996	{
xiaofeng.yan	177ef2a	2014-08-26 03:15:41 +0000	[diff] [blame]	997	hrtick_start(rq, p->dl.runtime);
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	998	}
Wanpeng Li	36ce988	2014-11-11 09:52:26 +0800	[diff] [blame]	999	#else /* !CONFIG_SCHED_HRTICK */
				1000	static void start_hrtick_dl(struct rq rq, struct task_struct p)
				1001	{
				1002	}
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1003	#endif
				1004
				1005	static struct sched_dl_entity pick_next_dl_entity(struct rq rq,
				1006	struct dl_rq *dl_rq)
				1007	{
				1008	struct rb_node *left = dl_rq->rb_leftmost;
				1009
				1010	if (!left)
				1011	return NULL;
				1012
				1013	return rb_entry(left, struct sched_dl_entity, rb_node);
				1014	}
				1015
Peter Zijlstra	606dba2	2012-02-11 06:05:00 +0100	[diff] [blame]	1016	struct task_struct pick_next_task_dl(struct rq rq, struct task_struct *prev)
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1017	{
				1018	struct sched_dl_entity *dl_se;
				1019	struct task_struct *p;
				1020	struct dl_rq *dl_rq;
				1021
				1022	dl_rq = &rq->dl;
				1023
Kirill Tkhai	a1d9a32	2014-04-10 17:38:36 +0400	[diff] [blame]	1024	if (need_pull_dl_task(rq, prev)) {
Peter Zijlstra	38033c3	2014-01-23 20:32:21 +0100	[diff] [blame]	1025	pull_dl_task(rq);
Kirill Tkhai	a1d9a32	2014-04-10 17:38:36 +0400	[diff] [blame]	1026	/*
				1027	* pull_rt_task() can drop (and re-acquire) rq->lock; this
				1028	* means a stop task can slip in, in which case we need to
				1029	* re-start task selection.
				1030	*/
Kirill Tkhai	da0c1e6	2014-08-20 13:47:32 +0400	[diff] [blame]	1031	if (rq->stop && task_on_rq_queued(rq->stop))
Kirill Tkhai	a1d9a32	2014-04-10 17:38:36 +0400	[diff] [blame]	1032	return RETRY_TASK;
				1033	}
				1034
Kirill Tkhai	734ff2a	2014-03-04 19:25:46 +0400	[diff] [blame]	1035	/*
				1036	* When prev is DL, we may throttle it in put_prev_task().
				1037	* So, we update time before we check for dl_nr_running.
				1038	*/
				1039	if (prev->sched_class == &dl_sched_class)
				1040	update_curr_dl(rq);
Peter Zijlstra	38033c3	2014-01-23 20:32:21 +0100	[diff] [blame]	1041
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1042	if (unlikely(!dl_rq->dl_nr_running))
				1043	return NULL;
				1044
Peter Zijlstra	3f1d2a3	2014-02-12 10:49:30 +0100	[diff] [blame]	1045	put_prev_task(rq, prev);
Peter Zijlstra	606dba2	2012-02-11 06:05:00 +0100	[diff] [blame]	1046
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1047	dl_se = pick_next_dl_entity(rq, dl_rq);
				1048	BUG_ON(!dl_se);
				1049
				1050	p = dl_task_of(dl_se);
				1051	p->se.exec_start = rq_clock_task(rq);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1052
				1053	/* Running task will never be pushed. */
Juri Lelli	7136265	2014-01-14 12:03:51 +0100	[diff] [blame]	1054	dequeue_pushable_dl_task(rq, p);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1055
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1056	if (hrtick_enabled(rq))
				1057	start_hrtick_dl(rq, p);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1058
Peter Zijlstra	dc87734	2014-02-12 15:47:29 +0100	[diff] [blame]	1059	set_post_schedule(rq);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1060
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1061	return p;
				1062	}
				1063
				1064	static void put_prev_task_dl(struct rq rq, struct task_struct p)
				1065	{
				1066	update_curr_dl(rq);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1067
				1068	if (on_dl_rq(&p->dl) && p->nr_cpus_allowed > 1)
				1069	enqueue_pushable_dl_task(rq, p);
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1070	}
				1071
				1072	static void task_tick_dl(struct rq rq, struct task_struct p, int queued)
				1073	{
				1074	update_curr_dl(rq);
				1075
Wanpeng Li	a7bebf4	2014-11-26 08:44:01 +0800	[diff] [blame]	1076	/*
				1077	* Even when we have runtime, update_curr_dl() might have resulted in us
				1078	* not being the leftmost task anymore. In that case NEED_RESCHED will
				1079	* be set and schedule() will start a new hrtick for the next task.
				1080	*/
				1081	if (hrtick_enabled(rq) && queued && p->dl.runtime > 0 &&
				1082	is_leftmost(p, &rq->dl))
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1083	start_hrtick_dl(rq, p);
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1084	}
				1085
				1086	static void task_fork_dl(struct task_struct *p)
				1087	{
				1088	/*
				1089	* SCHED_DEADLINE tasks cannot fork and this is achieved through
				1090	* sched_fork()
				1091	*/
				1092	}
				1093
				1094	static void task_dead_dl(struct task_struct *p)
				1095	{
				1096	struct hrtimer *timer = &p->dl.dl_timer;
Dario Faggioli	332ac17	2013-11-07 14:43:45 +0100	[diff] [blame]	1097	struct dl_bw *dl_b = dl_bw_of(task_cpu(p));
				1098
				1099	/*
				1100	* Since we are TASK_DEAD we won't slip out of the domain!
				1101	*/
				1102	raw_spin_lock_irq(&dl_b->lock);
Peter Zijlstra	40767b0	2015-01-28 15:08:03 +0100	[diff] [blame]	1103	/* XXX we should retain the bw until 0-lag */
Dario Faggioli	332ac17	2013-11-07 14:43:45 +0100	[diff] [blame]	1104	dl_b->total_bw -= p->dl.dl_bw;
				1105	raw_spin_unlock_irq(&dl_b->lock);
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1106
Dario Faggioli	2d3d891	2013-11-07 14:43:44 +0100	[diff] [blame]	1107	hrtimer_cancel(timer);
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1108	}
				1109
				1110	static void set_curr_task_dl(struct rq *rq)
				1111	{
				1112	struct task_struct *p = rq->curr;
				1113
				1114	p->se.exec_start = rq_clock_task(rq);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1115
				1116	/* You can't push away the running task */
				1117	dequeue_pushable_dl_task(rq, p);
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1118	}
				1119
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1120	#ifdef CONFIG_SMP
				1121
				1122	/* Only try algorithms three times */
				1123	#define DL_MAX_TRIES 3
				1124
				1125	static int pick_dl_task(struct rq rq, struct task_struct p, int cpu)
				1126	{
				1127	if (!task_running(rq, p) &&
Kirill Tkhai	1ba93d4	2014-09-12 17:42:20 +0400	[diff] [blame]	1128	cpumask_test_cpu(cpu, tsk_cpus_allowed(p)))
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1129	return 1;
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1130	return 0;
				1131	}
				1132
				1133	/* Returns the second earliest -deadline task, NULL otherwise */
				1134	static struct task_struct pick_next_earliest_dl_task(struct rq rq, int cpu)
				1135	{
				1136	struct rb_node *next_node = rq->dl.rb_leftmost;
				1137	struct sched_dl_entity *dl_se;
				1138	struct task_struct *p = NULL;
				1139
				1140	next_node:
				1141	next_node = rb_next(next_node);
				1142	if (next_node) {
				1143	dl_se = rb_entry(next_node, struct sched_dl_entity, rb_node);
				1144	p = dl_task_of(dl_se);
				1145
				1146	if (pick_dl_task(rq, p, cpu))
				1147	return p;
				1148
				1149	goto next_node;
				1150	}
				1151
				1152	return NULL;
				1153	}
				1154
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1155	static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask_dl);
				1156
				1157	static int find_later_rq(struct task_struct *task)
				1158	{
				1159	struct sched_domain *sd;
Christoph Lameter	4ba2968	2014-08-26 19:12:21 -0500	[diff] [blame]	1160	struct cpumask *later_mask = this_cpu_cpumask_var_ptr(local_cpu_mask_dl);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1161	int this_cpu = smp_processor_id();
				1162	int best_cpu, cpu = task_cpu(task);
				1163
				1164	/* Make sure the mask is initialized first */
				1165	if (unlikely(!later_mask))
				1166	return -1;
				1167
				1168	if (task->nr_cpus_allowed == 1)
				1169	return -1;
				1170
Juri Lelli	91ec677	2014-09-19 10:22:41 +0100	[diff] [blame]	1171	/*
				1172	* We have to consider system topology and task affinity
				1173	* first, then we can look for a suitable cpu.
				1174	*/
Juri Lelli	6bfd6d7	2013-11-07 14:43:47 +0100	[diff] [blame]	1175	best_cpu = cpudl_find(&task_rq(task)->rd->cpudl,
				1176	task, later_mask);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1177	if (best_cpu == -1)
				1178	return -1;
				1179
				1180	/*
				1181	* If we are here, some target has been found,
				1182	* the most suitable of which is cached in best_cpu.
				1183	* This is, among the runqueues where the current tasks
				1184	* have later deadlines than the task's one, the rq
				1185	* with the latest possible one.
				1186	*
				1187	* Now we check how well this matches with task's
				1188	* affinity and system topology.
				1189	*
				1190	* The last cpu where the task run is our first
				1191	* guess, since it is most likely cache-hot there.
				1192	*/
				1193	if (cpumask_test_cpu(cpu, later_mask))
				1194	return cpu;
				1195	/*
				1196	* Check if this_cpu is to be skipped (i.e., it is
				1197	* not in the mask) or not.
				1198	*/
				1199	if (!cpumask_test_cpu(this_cpu, later_mask))
				1200	this_cpu = -1;
				1201
				1202	rcu_read_lock();
				1203	for_each_domain(cpu, sd) {
				1204	if (sd->flags & SD_WAKE_AFFINE) {
				1205
				1206	/*
				1207	* If possible, preempting this_cpu is
				1208	* cheaper than migrating.
				1209	*/
				1210	if (this_cpu != -1 &&
				1211	cpumask_test_cpu(this_cpu, sched_domain_span(sd))) {
				1212	rcu_read_unlock();
				1213	return this_cpu;
				1214	}
				1215
				1216	/*
				1217	* Last chance: if best_cpu is valid and is
				1218	* in the mask, that becomes our choice.
				1219	*/
				1220	if (best_cpu < nr_cpu_ids &&
				1221	cpumask_test_cpu(best_cpu, sched_domain_span(sd))) {
				1222	rcu_read_unlock();
				1223	return best_cpu;
				1224	}
				1225	}
				1226	}
				1227	rcu_read_unlock();
				1228
				1229	/*
				1230	* At this point, all our guesses failed, we just return
				1231	* 'something', and let the caller sort the things out.
				1232	*/
				1233	if (this_cpu != -1)
				1234	return this_cpu;
				1235
				1236	cpu = cpumask_any(later_mask);
				1237	if (cpu < nr_cpu_ids)
				1238	return cpu;
				1239
				1240	return -1;
				1241	}
				1242
				1243	/* Locks the rq it finds */
				1244	static struct rq find_lock_later_rq(struct task_struct task, struct rq *rq)
				1245	{
				1246	struct rq *later_rq = NULL;
				1247	int tries;
				1248	int cpu;
				1249
				1250	for (tries = 0; tries < DL_MAX_TRIES; tries++) {
				1251	cpu = find_later_rq(task);
				1252
				1253	if ((cpu == -1) \|\| (cpu == rq->cpu))
				1254	break;
				1255
				1256	later_rq = cpu_rq(cpu);
				1257
				1258	/* Retry if something changed. */
				1259	if (double_lock_balance(rq, later_rq)) {
				1260	if (unlikely(task_rq(task) != rq \|\|
				1261	!cpumask_test_cpu(later_rq->cpu,
				1262	&task->cpus_allowed) \|\|
Kirill Tkhai	da0c1e6	2014-08-20 13:47:32 +0400	[diff] [blame]	1263	task_running(rq, task) \|\|
				1264	!task_on_rq_queued(task))) {
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1265	double_unlock_balance(rq, later_rq);
				1266	later_rq = NULL;
				1267	break;
				1268	}
				1269	}
				1270
				1271	/*
				1272	* If the rq we found has no -deadline task, or
				1273	* its earliest one has a later deadline than our
				1274	* task, the rq is a good one.
				1275	*/
				1276	if (!later_rq->dl.dl_nr_running \|\|
				1277	dl_time_before(task->dl.deadline,
				1278	later_rq->dl.earliest_dl.curr))
				1279	break;
				1280
				1281	/* Otherwise we try again. */
				1282	double_unlock_balance(rq, later_rq);
				1283	later_rq = NULL;
				1284	}
				1285
				1286	return later_rq;
				1287	}
				1288
				1289	static struct task_struct pick_next_pushable_dl_task(struct rq rq)
				1290	{
				1291	struct task_struct *p;
				1292
				1293	if (!has_pushable_dl_tasks(rq))
				1294	return NULL;
				1295
				1296	p = rb_entry(rq->dl.pushable_dl_tasks_leftmost,
				1297	struct task_struct, pushable_dl_tasks);
				1298
				1299	BUG_ON(rq->cpu != task_cpu(p));
				1300	BUG_ON(task_current(rq, p));
				1301	BUG_ON(p->nr_cpus_allowed <= 1);
				1302
Kirill Tkhai	da0c1e6	2014-08-20 13:47:32 +0400	[diff] [blame]	1303	BUG_ON(!task_on_rq_queued(p));
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1304	BUG_ON(!dl_task(p));
				1305
				1306	return p;
				1307	}
				1308
				1309	/*
				1310	* See if the non running -deadline tasks on this rq
				1311	* can be sent to some other CPU where they can preempt
				1312	* and start executing.
				1313	*/
				1314	static int push_dl_task(struct rq *rq)
				1315	{
				1316	struct task_struct *next_task;
				1317	struct rq *later_rq;
Wanpeng Li	c51b8ab	2014-11-06 15:22:44 +0800	[diff] [blame]	1318	int ret = 0;
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1319
				1320	if (!rq->dl.overloaded)
				1321	return 0;
				1322
				1323	next_task = pick_next_pushable_dl_task(rq);
				1324	if (!next_task)
				1325	return 0;
				1326
				1327	retry:
				1328	if (unlikely(next_task == rq->curr)) {
				1329	WARN_ON(1);
				1330	return 0;
				1331	}
				1332
				1333	/*
				1334	* If next_task preempts rq->curr, and rq->curr
				1335	* can move away, it makes sense to just reschedule
				1336	* without going further in pushing next_task.
				1337	*/
				1338	if (dl_task(rq->curr) &&
				1339	dl_time_before(next_task->dl.deadline, rq->curr->dl.deadline) &&
				1340	rq->curr->nr_cpus_allowed > 1) {
Kirill Tkhai	8875125	2014-06-29 00:03:57 +0400	[diff] [blame]	1341	resched_curr(rq);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1342	return 0;
				1343	}
				1344
				1345	/* We might release rq lock */
				1346	get_task_struct(next_task);
				1347
				1348	/* Will lock the rq it'll find */
				1349	later_rq = find_lock_later_rq(next_task, rq);
				1350	if (!later_rq) {
				1351	struct task_struct *task;
				1352
				1353	/*
				1354	* We must check all this again, since
				1355	* find_lock_later_rq releases rq->lock and it is
				1356	* then possible that next_task has migrated.
				1357	*/
				1358	task = pick_next_pushable_dl_task(rq);
				1359	if (task_cpu(next_task) == rq->cpu && task == next_task) {
				1360	/*
				1361	* The task is still there. We don't try
				1362	* again, some other cpu will pull it when ready.
				1363	*/
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1364	goto out;
				1365	}
				1366
				1367	if (!task)
				1368	/* No more tasks */
				1369	goto out;
				1370
				1371	put_task_struct(next_task);
				1372	next_task = task;
				1373	goto retry;
				1374	}
				1375
				1376	deactivate_task(rq, next_task, 0);
				1377	set_task_cpu(next_task, later_rq->cpu);
				1378	activate_task(later_rq, next_task, 0);
Wanpeng Li	c51b8ab	2014-11-06 15:22:44 +0800	[diff] [blame]	1379	ret = 1;
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1380
Kirill Tkhai	8875125	2014-06-29 00:03:57 +0400	[diff] [blame]	1381	resched_curr(later_rq);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1382
				1383	double_unlock_balance(rq, later_rq);
				1384
				1385	out:
				1386	put_task_struct(next_task);
				1387
Wanpeng Li	c51b8ab	2014-11-06 15:22:44 +0800	[diff] [blame]	1388	return ret;
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1389	}
				1390
				1391	static void push_dl_tasks(struct rq *rq)
				1392	{
				1393	/* Terminates as it moves a -deadline task */
				1394	while (push_dl_task(rq))
				1395	;
				1396	}
				1397
				1398	static int pull_dl_task(struct rq *this_rq)
				1399	{
				1400	int this_cpu = this_rq->cpu, ret = 0, cpu;
				1401	struct task_struct *p;
				1402	struct rq *src_rq;
				1403	u64 dmin = LONG_MAX;
				1404
				1405	if (likely(!dl_overloaded(this_rq)))
				1406	return 0;
				1407
				1408	/*
				1409	* Match the barrier from dl_set_overloaded; this guarantees that if we
				1410	* see overloaded we must also see the dlo_mask bit.
				1411	*/
				1412	smp_rmb();
				1413
				1414	for_each_cpu(cpu, this_rq->rd->dlo_mask) {
				1415	if (this_cpu == cpu)
				1416	continue;
				1417
				1418	src_rq = cpu_rq(cpu);
				1419
				1420	/*
				1421	* It looks racy, abd it is! However, as in sched_rt.c,
				1422	* we are fine with this.
				1423	*/
				1424	if (this_rq->dl.dl_nr_running &&
				1425	dl_time_before(this_rq->dl.earliest_dl.curr,
				1426	src_rq->dl.earliest_dl.next))
				1427	continue;
				1428
				1429	/* Might drop this_rq->lock */
				1430	double_lock_balance(this_rq, src_rq);
				1431
				1432	/*
				1433	* If there are no more pullable tasks on the
				1434	* rq, we're done with it.
				1435	*/
				1436	if (src_rq->dl.dl_nr_running <= 1)
				1437	goto skip;
				1438
				1439	p = pick_next_earliest_dl_task(src_rq, this_cpu);
				1440
				1441	/*
				1442	* We found a task to be pulled if:
				1443	* - it preempts our current (if there's one),
				1444	* - it will preempt the last one we pulled (if any).
				1445	*/
				1446	if (p && dl_time_before(p->dl.deadline, dmin) &&
				1447	(!this_rq->dl.dl_nr_running \|\|
				1448	dl_time_before(p->dl.deadline,
				1449	this_rq->dl.earliest_dl.curr))) {
				1450	WARN_ON(p == src_rq->curr);
Kirill Tkhai	da0c1e6	2014-08-20 13:47:32 +0400	[diff] [blame]	1451	WARN_ON(!task_on_rq_queued(p));
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1452
				1453	/*
				1454	* Then we pull iff p has actually an earlier
				1455	* deadline than the current task of its runqueue.
				1456	*/
				1457	if (dl_time_before(p->dl.deadline,
				1458	src_rq->curr->dl.deadline))
				1459	goto skip;
				1460
				1461	ret = 1;
				1462
				1463	deactivate_task(src_rq, p, 0);
				1464	set_task_cpu(p, this_cpu);
				1465	activate_task(this_rq, p, 0);
				1466	dmin = p->dl.deadline;
				1467
				1468	/* Is there any other task even earlier? */
				1469	}
				1470	skip:
				1471	double_unlock_balance(this_rq, src_rq);
				1472	}
				1473
				1474	return ret;
				1475	}
				1476
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1477	static void post_schedule_dl(struct rq *rq)
				1478	{
				1479	push_dl_tasks(rq);
				1480	}
				1481
				1482	/*
				1483	* Since the task is not running and a reschedule is not going to happen
				1484	* anytime soon on its runqueue, we try pushing it away now.
				1485	*/
				1486	static void task_woken_dl(struct rq rq, struct task_struct p)
				1487	{
				1488	if (!task_running(rq, p) &&
				1489	!test_tsk_need_resched(rq->curr) &&
				1490	has_pushable_dl_tasks(rq) &&
				1491	p->nr_cpus_allowed > 1 &&
				1492	dl_task(rq->curr) &&
				1493	(rq->curr->nr_cpus_allowed < 2 \|\|
Wanpeng Li	6b0a563	2014-10-31 06:39:34 +0800	[diff] [blame]	1494	!dl_entity_preempt(&p->dl, &rq->curr->dl))) {
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1495	push_dl_tasks(rq);
				1496	}
				1497	}
				1498
				1499	static void set_cpus_allowed_dl(struct task_struct *p,
				1500	const struct cpumask *new_mask)
				1501	{
				1502	struct rq *rq;
Juri Lelli	7f51412	2014-09-19 10:22:40 +0100	[diff] [blame]	1503	struct root_domain *src_rd;
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1504	int weight;
				1505
				1506	BUG_ON(!dl_task(p));
				1507
Juri Lelli	7f51412	2014-09-19 10:22:40 +0100	[diff] [blame]	1508	rq = task_rq(p);
				1509	src_rd = rq->rd;
				1510	/*
				1511	* Migrating a SCHED_DEADLINE task between exclusive
				1512	* cpusets (different root_domains) entails a bandwidth
				1513	* update. We already made space for us in the destination
				1514	* domain (see cpuset_can_attach()).
				1515	*/
				1516	if (!cpumask_intersects(src_rd->span, new_mask)) {
				1517	struct dl_bw *src_dl_b;
				1518
				1519	src_dl_b = dl_bw_of(cpu_of(rq));
				1520	/*
				1521	* We now free resources of the root_domain we are migrating
				1522	* off. In the worst case, sched_setattr() may temporary fail
				1523	* until we complete the update.
				1524	*/
				1525	raw_spin_lock(&src_dl_b->lock);
				1526	__dl_clear(src_dl_b, p->dl.dl_bw);
				1527	raw_spin_unlock(&src_dl_b->lock);
				1528	}
				1529
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1530	/*
				1531	* Update only if the task is actually running (i.e.,
				1532	* it is on the rq AND it is not throttled).
				1533	*/
				1534	if (!on_dl_rq(&p->dl))
				1535	return;
				1536
				1537	weight = cpumask_weight(new_mask);
				1538
				1539	/*
				1540	* Only update if the process changes its state from whether it
				1541	* can migrate or not.
				1542	*/
				1543	if ((p->nr_cpus_allowed > 1) == (weight > 1))
				1544	return;
				1545
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1546	/*
				1547	* The process used to be able to migrate OR it can now migrate
				1548	*/
				1549	if (weight <= 1) {
				1550	if (!task_current(rq, p))
				1551	dequeue_pushable_dl_task(rq, p);
				1552	BUG_ON(!rq->dl.dl_nr_migratory);
				1553	rq->dl.dl_nr_migratory--;
				1554	} else {
				1555	if (!task_current(rq, p))
				1556	enqueue_pushable_dl_task(rq, p);
				1557	rq->dl.dl_nr_migratory++;
				1558	}
				1559
				1560	update_dl_migration(&rq->dl);
				1561	}
				1562
				1563	/* Assumes rq->lock is held */
				1564	static void rq_online_dl(struct rq *rq)
				1565	{
				1566	if (rq->dl.overloaded)
				1567	dl_set_overload(rq);
Juri Lelli	6bfd6d7	2013-11-07 14:43:47 +0100	[diff] [blame]	1568
Xunlei Pang	16b2694	2015-01-19 04:49:36 +0000	[diff] [blame]	1569	cpudl_set_freecpu(&rq->rd->cpudl, rq->cpu);
Juri Lelli	6bfd6d7	2013-11-07 14:43:47 +0100	[diff] [blame]	1570	if (rq->dl.dl_nr_running > 0)
				1571	cpudl_set(&rq->rd->cpudl, rq->cpu, rq->dl.earliest_dl.curr, 1);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1572	}
				1573
				1574	/* Assumes rq->lock is held */
				1575	static void rq_offline_dl(struct rq *rq)
				1576	{
				1577	if (rq->dl.overloaded)
				1578	dl_clear_overload(rq);
Juri Lelli	6bfd6d7	2013-11-07 14:43:47 +0100	[diff] [blame]	1579
				1580	cpudl_set(&rq->rd->cpudl, rq->cpu, 0, 0);
Xunlei Pang	16b2694	2015-01-19 04:49:36 +0000	[diff] [blame]	1581	cpudl_clear_freecpu(&rq->rd->cpudl, rq->cpu);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1582	}
				1583
				1584	void init_sched_dl_class(void)
				1585	{
				1586	unsigned int i;
				1587
				1588	for_each_possible_cpu(i)
				1589	zalloc_cpumask_var_node(&per_cpu(local_cpu_mask_dl, i),
				1590	GFP_KERNEL, cpu_to_node(i));
				1591	}
				1592
				1593	#endif /* CONFIG_SMP */
				1594
Kirill Tkhai	67dfa1b	2014-10-27 17:40:52 +0300	[diff] [blame]	1595	/*
				1596	* Ensure p's dl_timer is cancelled. May drop rq->lock for a while.
				1597	*/
				1598	static void cancel_dl_timer(struct rq rq, struct task_struct p)
				1599	{
				1600	struct hrtimer *dl_timer = &p->dl.dl_timer;
				1601
				1602	/* Nobody will change task's class if pi_lock is held */
				1603	lockdep_assert_held(&p->pi_lock);
				1604
				1605	if (hrtimer_active(dl_timer)) {
				1606	int ret = hrtimer_try_to_cancel(dl_timer);
				1607
				1608	if (unlikely(ret == -1)) {
				1609	/*
				1610	* Note, p may migrate OR new deadline tasks
				1611	* may appear in rq when we are unlocking it.
				1612	* A caller of us must be fine with that.
				1613	*/
				1614	raw_spin_unlock(&rq->lock);
				1615	hrtimer_cancel(dl_timer);
				1616	raw_spin_lock(&rq->lock);
				1617	}
				1618	}
				1619	}
				1620
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1621	static void switched_from_dl(struct rq rq, struct task_struct p)
				1622	{
Peter Zijlstra	40767b0	2015-01-28 15:08:03 +0100	[diff] [blame]	1623	/* XXX we should retain the bw until 0-lag */
Kirill Tkhai	67dfa1b	2014-10-27 17:40:52 +0300	[diff] [blame]	1624	cancel_dl_timer(rq, p);
Juri Lelli	a5e7be3	2014-09-19 10:22:39 +0100	[diff] [blame]	1625	__dl_clear_params(p);
				1626
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1627	/*
				1628	* Since this might be the only -deadline task on the rq,
				1629	* this is the right place to try to pull some other one
				1630	* from an overloaded cpu, if any.
				1631	*/
Wanpeng Li	cd66091	2014-10-31 06:39:35 +0800	[diff] [blame]	1632	if (!task_on_rq_queued(p) \|\| rq->dl.dl_nr_running)
				1633	return;
				1634
				1635	if (pull_dl_task(rq))
				1636	resched_curr(rq);
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1637	}
				1638
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1639	/*
				1640	* When switching to -deadline, we may overload the rq, then
				1641	* we try to push someone off, if possible.
				1642	*/
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1643	static void switched_to_dl(struct rq rq, struct task_struct p)
				1644	{
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1645	int check_resched = 1;
				1646
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1647	/*
				1648	* If p is throttled, don't consider the possibility
				1649	* of preempting rq->curr, the check will be done right
				1650	* after its runtime will get replenished.
				1651	*/
				1652	if (unlikely(p->dl.dl_throttled))
				1653	return;
				1654
Kirill Tkhai	da0c1e6	2014-08-20 13:47:32 +0400	[diff] [blame]	1655	if (task_on_rq_queued(p) && rq->curr != p) {
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1656	#ifdef CONFIG_SMP
Wanpeng Li	d9aade7a	2014-10-22 08:36:43 +0800	[diff] [blame]	1657	if (p->nr_cpus_allowed > 1 && rq->dl.overloaded &&
				1658	push_dl_task(rq) && rq != task_rq(p))
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1659	/* Only reschedule if pushing failed */
				1660	check_resched = 0;
				1661	#endif /* CONFIG_SMP */
Kirill Tkhai	f3a7e1a	2014-10-21 20:35:56 +0400	[diff] [blame]	1662	if (check_resched) {
				1663	if (dl_task(rq->curr))
				1664	check_preempt_curr_dl(rq, p, 0);
				1665	else
				1666	resched_curr(rq);
				1667	}
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1668	}
				1669	}
				1670
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1671	/*
				1672	* If the scheduling parameters of a -deadline task changed,
				1673	* a push or pull operation might be needed.
				1674	*/
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1675	static void prio_changed_dl(struct rq rq, struct task_struct p,
				1676	int oldprio)
				1677	{
Kirill Tkhai	da0c1e6	2014-08-20 13:47:32 +0400	[diff] [blame]	1678	if (task_on_rq_queued(p) \|\| rq->curr == p) {
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1679	#ifdef CONFIG_SMP
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1680	/*
				1681	* This might be too much, but unfortunately
				1682	* we don't have the old deadline value, and
				1683	* we can't argue if the task is increasing
				1684	* or lowering its prio, so...
				1685	*/
				1686	if (!rq->dl.overloaded)
				1687	pull_dl_task(rq);
				1688
				1689	/*
				1690	* If we now have a earlier deadline task than p,
				1691	* then reschedule, provided p is still on this
				1692	* runqueue.
				1693	*/
				1694	if (dl_time_before(rq->dl.earliest_dl.curr, p->dl.deadline) &&
				1695	rq->curr == p)
Kirill Tkhai	8875125	2014-06-29 00:03:57 +0400	[diff] [blame]	1696	resched_curr(rq);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1697	#else
				1698	/*
				1699	* Again, we don't know if p has a earlier
				1700	* or later deadline, so let's blindly set a
				1701	* (maybe not needed) rescheduling point.
				1702	*/
Kirill Tkhai	8875125	2014-06-29 00:03:57 +0400	[diff] [blame]	1703	resched_curr(rq);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1704	#endif /* CONFIG_SMP */
				1705	} else
				1706	switched_to_dl(rq, p);
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1707	}
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1708
				1709	const struct sched_class dl_sched_class = {
				1710	.next = &rt_sched_class,
				1711	.enqueue_task = enqueue_task_dl,
				1712	.dequeue_task = dequeue_task_dl,
				1713	.yield_task = yield_task_dl,
				1714
				1715	.check_preempt_curr = check_preempt_curr_dl,
				1716
				1717	.pick_next_task = pick_next_task_dl,
				1718	.put_prev_task = put_prev_task_dl,
				1719
				1720	#ifdef CONFIG_SMP
				1721	.select_task_rq = select_task_rq_dl,
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1722	.set_cpus_allowed = set_cpus_allowed_dl,
				1723	.rq_online = rq_online_dl,
				1724	.rq_offline = rq_offline_dl,
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1725	.post_schedule = post_schedule_dl,
				1726	.task_woken = task_woken_dl,
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1727	#endif
				1728
				1729	.set_curr_task = set_curr_task_dl,
				1730	.task_tick = task_tick_dl,
				1731	.task_fork = task_fork_dl,
				1732	.task_dead = task_dead_dl,
				1733
				1734	.prio_changed = prio_changed_dl,
				1735	.switched_from = switched_from_dl,
				1736	.switched_to = switched_to_dl,
Stanislaw Gruszka	6e99891	2014-11-12 16:58:44 +0100	[diff] [blame]	1737
				1738	.update_curr = update_curr_dl,
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1739	};
Wanpeng Li	acb3213	2014-10-31 06:39:33 +0800	[diff] [blame]	1740
				1741	#ifdef CONFIG_SCHED_DEBUG
				1742	extern void print_dl_rq(struct seq_file m, int cpu, struct dl_rq dl_rq);
				1743
				1744	void print_dl_stats(struct seq_file *m, int cpu)
				1745	{
				1746	print_dl_rq(m, cpu, &cpu_rq(cpu)->dl);
				1747	}
				1748	#endif /* CONFIG_SCHED_DEBUG */