Blame - kernel/sched/deadline.c - SHIFTPHONES/mainline/linux

blob: 3958bc576d679398ac2dbd77389165e7d5e74cb6 [file] [log] [blame]

Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1	/*
				2	* Deadline Scheduling Class (SCHED_DEADLINE)
				3	*
				4	* Earliest Deadline First (EDF) + Constant Bandwidth Server (CBS).
				5	*
				6	* Tasks that periodically executes their instances for less than their
				7	* runtime won't miss any of their deadlines.
				8	* Tasks that are not periodic or sporadic or that tries to execute more
				9	* than their reserved bandwidth will be slowed down (and may potentially
				10	* miss some of their deadlines), and won't affect any other task.
				11	*
				12	* Copyright (C) 2012 Dario Faggioli <raistlin@linux.it>,
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	13	* Juri Lelli <juri.lelli@gmail.com>,
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	14	* Michael Trimarchi <michael@amarulasolutions.com>,
				15	* Fabio Checconi <fchecconi@gmail.com>
				16	*/
				17	#include "sched.h"
				18
				19	static inline int dl_time_before(u64 a, u64 b)
				20	{
				21	return (s64)(a - b) < 0;
				22	}
				23
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	24	/*
				25	* Tells if entity @a should preempt entity @b.
				26	*/
				27	static inline
				28	int dl_entity_preempt(struct sched_dl_entity a, struct sched_dl_entity b)
				29	{
				30	return dl_time_before(a->deadline, b->deadline);
				31	}
				32
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	33	static inline struct task_struct dl_task_of(struct sched_dl_entity dl_se)
				34	{
				35	return container_of(dl_se, struct task_struct, dl);
				36	}
				37
				38	static inline struct rq rq_of_dl_rq(struct dl_rq dl_rq)
				39	{
				40	return container_of(dl_rq, struct rq, dl);
				41	}
				42
				43	static inline struct dl_rq dl_rq_of_se(struct sched_dl_entity dl_se)
				44	{
				45	struct task_struct *p = dl_task_of(dl_se);
				46	struct rq *rq = task_rq(p);
				47
				48	return &rq->dl;
				49	}
				50
				51	static inline int on_dl_rq(struct sched_dl_entity *dl_se)
				52	{
				53	return !RB_EMPTY_NODE(&dl_se->rb_node);
				54	}
				55
				56	static inline int is_leftmost(struct task_struct p, struct dl_rq dl_rq)
				57	{
				58	struct sched_dl_entity *dl_se = &p->dl;
				59
				60	return dl_rq->rb_leftmost == &dl_se->rb_node;
				61	}
				62
				63	void init_dl_rq(struct dl_rq dl_rq, struct rq rq)
				64	{
				65	dl_rq->rb_root = RB_ROOT;
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	66
				67	#ifdef CONFIG_SMP
				68	/* zero means no -deadline tasks */
				69	dl_rq->earliest_dl.curr = dl_rq->earliest_dl.next = 0;
				70
				71	dl_rq->dl_nr_migratory = 0;
				72	dl_rq->overloaded = 0;
				73	dl_rq->pushable_dl_tasks_root = RB_ROOT;
				74	#endif
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	75	}
				76
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	77	#ifdef CONFIG_SMP
				78
				79	static inline int dl_overloaded(struct rq *rq)
				80	{
				81	return atomic_read(&rq->rd->dlo_count);
				82	}
				83
				84	static inline void dl_set_overload(struct rq *rq)
				85	{
				86	if (!rq->online)
				87	return;
				88
				89	cpumask_set_cpu(rq->cpu, rq->rd->dlo_mask);
				90	/*
				91	* Must be visible before the overload count is
				92	* set (as in sched_rt.c).
				93	*
				94	* Matched by the barrier in pull_dl_task().
				95	*/
				96	smp_wmb();
				97	atomic_inc(&rq->rd->dlo_count);
				98	}
				99
				100	static inline void dl_clear_overload(struct rq *rq)
				101	{
				102	if (!rq->online)
				103	return;
				104
				105	atomic_dec(&rq->rd->dlo_count);
				106	cpumask_clear_cpu(rq->cpu, rq->rd->dlo_mask);
				107	}
				108
				109	static void update_dl_migration(struct dl_rq *dl_rq)
				110	{
				111	if (dl_rq->dl_nr_migratory && dl_rq->dl_nr_total > 1) {
				112	if (!dl_rq->overloaded) {
				113	dl_set_overload(rq_of_dl_rq(dl_rq));
				114	dl_rq->overloaded = 1;
				115	}
				116	} else if (dl_rq->overloaded) {
				117	dl_clear_overload(rq_of_dl_rq(dl_rq));
				118	dl_rq->overloaded = 0;
				119	}
				120	}
				121
				122	static void inc_dl_migration(struct sched_dl_entity dl_se, struct dl_rq dl_rq)
				123	{
				124	struct task_struct *p = dl_task_of(dl_se);
				125	dl_rq = &rq_of_dl_rq(dl_rq)->dl;
				126
				127	dl_rq->dl_nr_total++;
				128	if (p->nr_cpus_allowed > 1)
				129	dl_rq->dl_nr_migratory++;
				130
				131	update_dl_migration(dl_rq);
				132	}
				133
				134	static void dec_dl_migration(struct sched_dl_entity dl_se, struct dl_rq dl_rq)
				135	{
				136	struct task_struct *p = dl_task_of(dl_se);
				137	dl_rq = &rq_of_dl_rq(dl_rq)->dl;
				138
				139	dl_rq->dl_nr_total--;
				140	if (p->nr_cpus_allowed > 1)
				141	dl_rq->dl_nr_migratory--;
				142
				143	update_dl_migration(dl_rq);
				144	}
				145
				146	/*
				147	* The list of pushable -deadline task is not a plist, like in
				148	* sched_rt.c, it is an rb-tree with tasks ordered by deadline.
				149	*/
				150	static void enqueue_pushable_dl_task(struct rq rq, struct task_struct p)
				151	{
				152	struct dl_rq *dl_rq = &rq->dl;
				153	struct rb_node **link = &dl_rq->pushable_dl_tasks_root.rb_node;
				154	struct rb_node *parent = NULL;
				155	struct task_struct *entry;
				156	int leftmost = 1;
				157
				158	BUG_ON(!RB_EMPTY_NODE(&p->pushable_dl_tasks));
				159
				160	while (*link) {
				161	parent = *link;
				162	entry = rb_entry(parent, struct task_struct,
				163	pushable_dl_tasks);
				164	if (dl_entity_preempt(&p->dl, &entry->dl))
				165	link = &parent->rb_left;
				166	else {
				167	link = &parent->rb_right;
				168	leftmost = 0;
				169	}
				170	}
				171
				172	if (leftmost)
				173	dl_rq->pushable_dl_tasks_leftmost = &p->pushable_dl_tasks;
				174
				175	rb_link_node(&p->pushable_dl_tasks, parent, link);
				176	rb_insert_color(&p->pushable_dl_tasks, &dl_rq->pushable_dl_tasks_root);
				177	}
				178
				179	static void dequeue_pushable_dl_task(struct rq rq, struct task_struct p)
				180	{
				181	struct dl_rq *dl_rq = &rq->dl;
				182
				183	if (RB_EMPTY_NODE(&p->pushable_dl_tasks))
				184	return;
				185
				186	if (dl_rq->pushable_dl_tasks_leftmost == &p->pushable_dl_tasks) {
				187	struct rb_node *next_node;
				188
				189	next_node = rb_next(&p->pushable_dl_tasks);
				190	dl_rq->pushable_dl_tasks_leftmost = next_node;
				191	}
				192
				193	rb_erase(&p->pushable_dl_tasks, &dl_rq->pushable_dl_tasks_root);
				194	RB_CLEAR_NODE(&p->pushable_dl_tasks);
				195	}
				196
				197	static inline int has_pushable_dl_tasks(struct rq *rq)
				198	{
				199	return !RB_EMPTY_ROOT(&rq->dl.pushable_dl_tasks_root);
				200	}
				201
				202	static int push_dl_task(struct rq *rq);
				203
				204	#else
				205
				206	static inline
				207	void enqueue_pushable_dl_task(struct rq rq, struct task_struct p)
				208	{
				209	}
				210
				211	static inline
				212	void dequeue_pushable_dl_task(struct rq rq, struct task_struct p)
				213	{
				214	}
				215
				216	static inline
				217	void inc_dl_migration(struct sched_dl_entity dl_se, struct dl_rq dl_rq)
				218	{
				219	}
				220
				221	static inline
				222	void dec_dl_migration(struct sched_dl_entity dl_se, struct dl_rq dl_rq)
				223	{
				224	}
				225
				226	#endif /* CONFIG_SMP */
				227
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	228	static void enqueue_task_dl(struct rq rq, struct task_struct p, int flags);
				229	static void __dequeue_task_dl(struct rq rq, struct task_struct p, int flags);
				230	static void check_preempt_curr_dl(struct rq rq, struct task_struct p,
				231	int flags);
				232
				233	/*
				234	* We are being explicitly informed that a new instance is starting,
				235	* and this means that:
				236	* - the absolute deadline of the entity has to be placed at
				237	* current time + relative deadline;
				238	* - the runtime of the entity has to be set to the maximum value.
				239	*
				240	* The capability of specifying such event is useful whenever a -deadline
				241	* entity wants to (try to!) synchronize its behaviour with the scheduler's
				242	* one, and to (try to!) reconcile itself with its own scheduling
				243	* parameters.
				244	*/
				245	static inline void setup_new_dl_entity(struct sched_dl_entity *dl_se)
				246	{
				247	struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
				248	struct rq *rq = rq_of_dl_rq(dl_rq);
				249
				250	WARN_ON(!dl_se->dl_new \|\| dl_se->dl_throttled);
				251
				252	/*
				253	* We use the regular wall clock time to set deadlines in the
				254	* future; in fact, we must consider execution overheads (time
				255	* spent on hardirq context, etc.).
				256	*/
				257	dl_se->deadline = rq_clock(rq) + dl_se->dl_deadline;
				258	dl_se->runtime = dl_se->dl_runtime;
				259	dl_se->dl_new = 0;
				260	}
				261
				262	/*
				263	* Pure Earliest Deadline First (EDF) scheduling does not deal with the
				264	* possibility of a entity lasting more than what it declared, and thus
				265	* exhausting its runtime.
				266	*
				267	* Here we are interested in making runtime overrun possible, but we do
				268	* not want a entity which is misbehaving to affect the scheduling of all
				269	* other entities.
				270	* Therefore, a budgeting strategy called Constant Bandwidth Server (CBS)
				271	* is used, in order to confine each entity within its own bandwidth.
				272	*
				273	* This function deals exactly with that, and ensures that when the runtime
				274	* of a entity is replenished, its deadline is also postponed. That ensures
				275	* the overrunning entity can't interfere with other entity in the system and
				276	* can't make them miss their deadlines. Reasons why this kind of overruns
				277	* could happen are, typically, a entity voluntarily trying to overcome its
				278	* runtime, or it just underestimated it during sched_setscheduler_ex().
				279	*/
				280	static void replenish_dl_entity(struct sched_dl_entity *dl_se)
				281	{
				282	struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
				283	struct rq *rq = rq_of_dl_rq(dl_rq);
				284
				285	/*
				286	* We keep moving the deadline away until we get some
				287	* available runtime for the entity. This ensures correct
				288	* handling of situations where the runtime overrun is
				289	* arbitrary large.
				290	*/
				291	while (dl_se->runtime <= 0) {
Harald Gustafsson	755378a	2013-11-07 14:43:40 +0100	[diff] [blame^]	292	dl_se->deadline += dl_se->dl_period;
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	293	dl_se->runtime += dl_se->dl_runtime;
				294	}
				295
				296	/*
				297	* At this point, the deadline really should be "in
				298	* the future" with respect to rq->clock. If it's
				299	* not, we are, for some reason, lagging too much!
				300	* Anyway, after having warn userspace abut that,
				301	* we still try to keep the things running by
				302	* resetting the deadline and the budget of the
				303	* entity.
				304	*/
				305	if (dl_time_before(dl_se->deadline, rq_clock(rq))) {
				306	static bool lag_once = false;
				307
				308	if (!lag_once) {
				309	lag_once = true;
				310	printk_sched("sched: DL replenish lagged to much\n");
				311	}
				312	dl_se->deadline = rq_clock(rq) + dl_se->dl_deadline;
				313	dl_se->runtime = dl_se->dl_runtime;
				314	}
				315	}
				316
				317	/*
				318	* Here we check if --at time t-- an entity (which is probably being
				319	* [re]activated or, in general, enqueued) can use its remaining runtime
				320	* and its current deadline _without_ exceeding the bandwidth it is
				321	* assigned (function returns true if it can't). We are in fact applying
				322	* one of the CBS rules: when a task wakes up, if the residual runtime
				323	* over residual deadline fits within the allocated bandwidth, then we
				324	* can keep the current (absolute) deadline and residual budget without
				325	* disrupting the schedulability of the system. Otherwise, we should
				326	* refill the runtime and set the deadline a period in the future,
				327	* because keeping the current (absolute) deadline of the task would
				328	* result in breaking guarantees promised to other tasks.
				329	*
				330	* This function returns true if:
				331	*
Harald Gustafsson	755378a	2013-11-07 14:43:40 +0100	[diff] [blame^]	332	* runtime / (deadline - t) > dl_runtime / dl_period ,
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	333	*
				334	* IOW we can't recycle current parameters.
Harald Gustafsson	755378a	2013-11-07 14:43:40 +0100	[diff] [blame^]	335	*
				336	* Notice that the bandwidth check is done against the period. For
				337	* task with deadline equal to period this is the same of using
				338	* dl_deadline instead of dl_period in the equation above.
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	339	*/
				340	static bool dl_entity_overflow(struct sched_dl_entity *dl_se, u64 t)
				341	{
				342	u64 left, right;
				343
				344	/*
				345	* left and right are the two sides of the equation above,
				346	* after a bit of shuffling to use multiplications instead
				347	* of divisions.
				348	*
				349	* Note that none of the time values involved in the two
				350	* multiplications are absolute: dl_deadline and dl_runtime
				351	* are the relative deadline and the maximum runtime of each
				352	* instance, runtime is the runtime left for the last instance
				353	* and (deadline - t), since t is rq->clock, is the time left
				354	* to the (absolute) deadline. Even if overflowing the u64 type
				355	* is very unlikely to occur in both cases, here we scale down
				356	* as we want to avoid that risk at all. Scaling down by 10
				357	* means that we reduce granularity to 1us. We are fine with it,
				358	* since this is only a true/false check and, anyway, thinking
				359	* of anything below microseconds resolution is actually fiction
				360	* (but still we want to give the user that illusion >;).
				361	*/
Harald Gustafsson	755378a	2013-11-07 14:43:40 +0100	[diff] [blame^]	362	left = (dl_se->dl_period >> 10) * (dl_se->runtime >> 10);
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	363	right = ((dl_se->deadline - t) >> 10) * (dl_se->dl_runtime >> 10);
				364
				365	return dl_time_before(right, left);
				366	}
				367
				368	/*
				369	* When a -deadline entity is queued back on the runqueue, its runtime and
				370	* deadline might need updating.
				371	*
				372	* The policy here is that we update the deadline of the entity only if:
				373	* - the current deadline is in the past,
				374	* - using the remaining runtime with the current deadline would make
				375	* the entity exceed its bandwidth.
				376	*/
				377	static void update_dl_entity(struct sched_dl_entity *dl_se)
				378	{
				379	struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
				380	struct rq *rq = rq_of_dl_rq(dl_rq);
				381
				382	/*
				383	* The arrival of a new instance needs special treatment, i.e.,
				384	* the actual scheduling parameters have to be "renewed".
				385	*/
				386	if (dl_se->dl_new) {
				387	setup_new_dl_entity(dl_se);
				388	return;
				389	}
				390
				391	if (dl_time_before(dl_se->deadline, rq_clock(rq)) \|\|
				392	dl_entity_overflow(dl_se, rq_clock(rq))) {
				393	dl_se->deadline = rq_clock(rq) + dl_se->dl_deadline;
				394	dl_se->runtime = dl_se->dl_runtime;
				395	}
				396	}
				397
				398	/*
				399	* If the entity depleted all its runtime, and if we want it to sleep
				400	* while waiting for some new execution time to become available, we
				401	* set the bandwidth enforcement timer to the replenishment instant
				402	* and try to activate it.
				403	*
				404	* Notice that it is important for the caller to know if the timer
				405	* actually started or not (i.e., the replenishment instant is in
				406	* the future or in the past).
				407	*/
				408	static int start_dl_timer(struct sched_dl_entity *dl_se)
				409	{
				410	struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
				411	struct rq *rq = rq_of_dl_rq(dl_rq);
				412	ktime_t now, act;
				413	ktime_t soft, hard;
				414	unsigned long range;
				415	s64 delta;
				416
				417	/*
				418	* We want the timer to fire at the deadline, but considering
				419	* that it is actually coming from rq->clock and not from
				420	* hrtimer's time base reading.
				421	*/
				422	act = ns_to_ktime(dl_se->deadline);
				423	now = hrtimer_cb_get_time(&dl_se->dl_timer);
				424	delta = ktime_to_ns(now) - rq_clock(rq);
				425	act = ktime_add_ns(act, delta);
				426
				427	/*
				428	* If the expiry time already passed, e.g., because the value
				429	* chosen as the deadline is too small, don't even try to
				430	* start the timer in the past!
				431	*/
				432	if (ktime_us_delta(act, now) < 0)
				433	return 0;
				434
				435	hrtimer_set_expires(&dl_se->dl_timer, act);
				436
				437	soft = hrtimer_get_softexpires(&dl_se->dl_timer);
				438	hard = hrtimer_get_expires(&dl_se->dl_timer);
				439	range = ktime_to_ns(ktime_sub(hard, soft));
				440	__hrtimer_start_range_ns(&dl_se->dl_timer, soft,
				441	range, HRTIMER_MODE_ABS, 0);
				442
				443	return hrtimer_active(&dl_se->dl_timer);
				444	}
				445
				446	/*
				447	* This is the bandwidth enforcement timer callback. If here, we know
				448	* a task is not on its dl_rq, since the fact that the timer was running
				449	* means the task is throttled and needs a runtime replenishment.
				450	*
				451	* However, what we actually do depends on the fact the task is active,
				452	* (it is on its rq) or has been removed from there by a call to
				453	* dequeue_task_dl(). In the former case we must issue the runtime
				454	* replenishment and add the task back to the dl_rq; in the latter, we just
				455	* do nothing but clearing dl_throttled, so that runtime and deadline
				456	* updating (and the queueing back to dl_rq) will be done by the
				457	* next call to enqueue_task_dl().
				458	*/
				459	static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
				460	{
				461	struct sched_dl_entity *dl_se = container_of(timer,
				462	struct sched_dl_entity,
				463	dl_timer);
				464	struct task_struct *p = dl_task_of(dl_se);
				465	struct rq *rq = task_rq(p);
				466	raw_spin_lock(&rq->lock);
				467
				468	/*
				469	* We need to take care of a possible races here. In fact, the
				470	* task might have changed its scheduling policy to something
				471	* different from SCHED_DEADLINE or changed its reservation
				472	* parameters (through sched_setscheduler()).
				473	*/
				474	if (!dl_task(p) \|\| dl_se->dl_new)
				475	goto unlock;
				476
				477	sched_clock_tick();
				478	update_rq_clock(rq);
				479	dl_se->dl_throttled = 0;
				480	if (p->on_rq) {
				481	enqueue_task_dl(rq, p, ENQUEUE_REPLENISH);
				482	if (task_has_dl_policy(rq->curr))
				483	check_preempt_curr_dl(rq, p, 0);
				484	else
				485	resched_task(rq->curr);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	486	#ifdef CONFIG_SMP
				487	/*
				488	* Queueing this task back might have overloaded rq,
				489	* check if we need to kick someone away.
				490	*/
				491	if (has_pushable_dl_tasks(rq))
				492	push_dl_task(rq);
				493	#endif
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	494	}
				495	unlock:
				496	raw_spin_unlock(&rq->lock);
				497
				498	return HRTIMER_NORESTART;
				499	}
				500
				501	void init_dl_task_timer(struct sched_dl_entity *dl_se)
				502	{
				503	struct hrtimer *timer = &dl_se->dl_timer;
				504
				505	if (hrtimer_active(timer)) {
				506	hrtimer_try_to_cancel(timer);
				507	return;
				508	}
				509
				510	hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
				511	timer->function = dl_task_timer;
				512	}
				513
				514	static
				515	int dl_runtime_exceeded(struct rq rq, struct sched_dl_entity dl_se)
				516	{
				517	int dmiss = dl_time_before(dl_se->deadline, rq_clock(rq));
				518	int rorun = dl_se->runtime <= 0;
				519
				520	if (!rorun && !dmiss)
				521	return 0;
				522
				523	/*
				524	* If we are beyond our current deadline and we are still
				525	* executing, then we have already used some of the runtime of
				526	* the next instance. Thus, if we do not account that, we are
				527	* stealing bandwidth from the system at each deadline miss!
				528	*/
				529	if (dmiss) {
				530	dl_se->runtime = rorun ? dl_se->runtime : 0;
				531	dl_se->runtime -= rq_clock(rq) - dl_se->deadline;
				532	}
				533
				534	return 1;
				535	}
				536
				537	/*
				538	* Update the current task's runtime statistics (provided it is still
				539	* a -deadline task and has not been removed from the dl_rq).
				540	*/
				541	static void update_curr_dl(struct rq *rq)
				542	{
				543	struct task_struct *curr = rq->curr;
				544	struct sched_dl_entity *dl_se = &curr->dl;
				545	u64 delta_exec;
				546
				547	if (!dl_task(curr) \|\| !on_dl_rq(dl_se))
				548	return;
				549
				550	/*
				551	* Consumed budget is computed considering the time as
				552	* observed by schedulable tasks (excluding time spent
				553	* in hardirq context, etc.). Deadlines are instead
				554	* computed using hard walltime. This seems to be the more
				555	* natural solution, but the full ramifications of this
				556	* approach need further study.
				557	*/
				558	delta_exec = rq_clock_task(rq) - curr->se.exec_start;
				559	if (unlikely((s64)delta_exec < 0))
				560	delta_exec = 0;
				561
				562	schedstat_set(curr->se.statistics.exec_max,
				563	max(curr->se.statistics.exec_max, delta_exec));
				564
				565	curr->se.sum_exec_runtime += delta_exec;
				566	account_group_exec_runtime(curr, delta_exec);
				567
				568	curr->se.exec_start = rq_clock_task(rq);
				569	cpuacct_charge(curr, delta_exec);
				570
Dario Faggioli	239be4a	2013-11-07 14:43:39 +0100	[diff] [blame]	571	sched_rt_avg_update(rq, delta_exec);
				572
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	573	dl_se->runtime -= delta_exec;
				574	if (dl_runtime_exceeded(rq, dl_se)) {
				575	__dequeue_task_dl(rq, curr, 0);
				576	if (likely(start_dl_timer(dl_se)))
				577	dl_se->dl_throttled = 1;
				578	else
				579	enqueue_task_dl(rq, curr, ENQUEUE_REPLENISH);
				580
				581	if (!is_leftmost(curr, &rq->dl))
				582	resched_task(curr);
				583	}
				584	}
				585
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	586	#ifdef CONFIG_SMP
				587
				588	static struct task_struct pick_next_earliest_dl_task(struct rq rq, int cpu);
				589
				590	static inline u64 next_deadline(struct rq *rq)
				591	{
				592	struct task_struct *next = pick_next_earliest_dl_task(rq, rq->cpu);
				593
				594	if (next && dl_prio(next->prio))
				595	return next->dl.deadline;
				596	else
				597	return 0;
				598	}
				599
				600	static void inc_dl_deadline(struct dl_rq *dl_rq, u64 deadline)
				601	{
				602	struct rq *rq = rq_of_dl_rq(dl_rq);
				603
				604	if (dl_rq->earliest_dl.curr == 0 \|\|
				605	dl_time_before(deadline, dl_rq->earliest_dl.curr)) {
				606	/*
				607	* If the dl_rq had no -deadline tasks, or if the new task
				608	* has shorter deadline than the current one on dl_rq, we
				609	* know that the previous earliest becomes our next earliest,
				610	* as the new task becomes the earliest itself.
				611	*/
				612	dl_rq->earliest_dl.next = dl_rq->earliest_dl.curr;
				613	dl_rq->earliest_dl.curr = deadline;
				614	} else if (dl_rq->earliest_dl.next == 0 \|\|
				615	dl_time_before(deadline, dl_rq->earliest_dl.next)) {
				616	/*
				617	* On the other hand, if the new -deadline task has a
				618	* a later deadline than the earliest one on dl_rq, but
				619	* it is earlier than the next (if any), we must
				620	* recompute the next-earliest.
				621	*/
				622	dl_rq->earliest_dl.next = next_deadline(rq);
				623	}
				624	}
				625
				626	static void dec_dl_deadline(struct dl_rq *dl_rq, u64 deadline)
				627	{
				628	struct rq *rq = rq_of_dl_rq(dl_rq);
				629
				630	/*
				631	* Since we may have removed our earliest (and/or next earliest)
				632	* task we must recompute them.
				633	*/
				634	if (!dl_rq->dl_nr_running) {
				635	dl_rq->earliest_dl.curr = 0;
				636	dl_rq->earliest_dl.next = 0;
				637	} else {
				638	struct rb_node *leftmost = dl_rq->rb_leftmost;
				639	struct sched_dl_entity *entry;
				640
				641	entry = rb_entry(leftmost, struct sched_dl_entity, rb_node);
				642	dl_rq->earliest_dl.curr = entry->deadline;
				643	dl_rq->earliest_dl.next = next_deadline(rq);
				644	}
				645	}
				646
				647	#else
				648
				649	static inline void inc_dl_deadline(struct dl_rq *dl_rq, u64 deadline) {}
				650	static inline void dec_dl_deadline(struct dl_rq *dl_rq, u64 deadline) {}
				651
				652	#endif /* CONFIG_SMP */
				653
				654	static inline
				655	void inc_dl_tasks(struct sched_dl_entity dl_se, struct dl_rq dl_rq)
				656	{
				657	int prio = dl_task_of(dl_se)->prio;
				658	u64 deadline = dl_se->deadline;
				659
				660	WARN_ON(!dl_prio(prio));
				661	dl_rq->dl_nr_running++;
				662
				663	inc_dl_deadline(dl_rq, deadline);
				664	inc_dl_migration(dl_se, dl_rq);
				665	}
				666
				667	static inline
				668	void dec_dl_tasks(struct sched_dl_entity dl_se, struct dl_rq dl_rq)
				669	{
				670	int prio = dl_task_of(dl_se)->prio;
				671
				672	WARN_ON(!dl_prio(prio));
				673	WARN_ON(!dl_rq->dl_nr_running);
				674	dl_rq->dl_nr_running--;
				675
				676	dec_dl_deadline(dl_rq, dl_se->deadline);
				677	dec_dl_migration(dl_se, dl_rq);
				678	}
				679
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	680	static void __enqueue_dl_entity(struct sched_dl_entity *dl_se)
				681	{
				682	struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
				683	struct rb_node **link = &dl_rq->rb_root.rb_node;
				684	struct rb_node *parent = NULL;
				685	struct sched_dl_entity *entry;
				686	int leftmost = 1;
				687
				688	BUG_ON(!RB_EMPTY_NODE(&dl_se->rb_node));
				689
				690	while (*link) {
				691	parent = *link;
				692	entry = rb_entry(parent, struct sched_dl_entity, rb_node);
				693	if (dl_time_before(dl_se->deadline, entry->deadline))
				694	link = &parent->rb_left;
				695	else {
				696	link = &parent->rb_right;
				697	leftmost = 0;
				698	}
				699	}
				700
				701	if (leftmost)
				702	dl_rq->rb_leftmost = &dl_se->rb_node;
				703
				704	rb_link_node(&dl_se->rb_node, parent, link);
				705	rb_insert_color(&dl_se->rb_node, &dl_rq->rb_root);
				706
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	707	inc_dl_tasks(dl_se, dl_rq);
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	708	}
				709
				710	static void __dequeue_dl_entity(struct sched_dl_entity *dl_se)
				711	{
				712	struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
				713
				714	if (RB_EMPTY_NODE(&dl_se->rb_node))
				715	return;
				716
				717	if (dl_rq->rb_leftmost == &dl_se->rb_node) {
				718	struct rb_node *next_node;
				719
				720	next_node = rb_next(&dl_se->rb_node);
				721	dl_rq->rb_leftmost = next_node;
				722	}
				723
				724	rb_erase(&dl_se->rb_node, &dl_rq->rb_root);
				725	RB_CLEAR_NODE(&dl_se->rb_node);
				726
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	727	dec_dl_tasks(dl_se, dl_rq);
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	728	}
				729
				730	static void
				731	enqueue_dl_entity(struct sched_dl_entity *dl_se, int flags)
				732	{
				733	BUG_ON(on_dl_rq(dl_se));
				734
				735	/*
				736	* If this is a wakeup or a new instance, the scheduling
				737	* parameters of the task might need updating. Otherwise,
				738	* we want a replenishment of its runtime.
				739	*/
				740	if (!dl_se->dl_new && flags & ENQUEUE_REPLENISH)
				741	replenish_dl_entity(dl_se);
				742	else
				743	update_dl_entity(dl_se);
				744
				745	__enqueue_dl_entity(dl_se);
				746	}
				747
				748	static void dequeue_dl_entity(struct sched_dl_entity *dl_se)
				749	{
				750	__dequeue_dl_entity(dl_se);
				751	}
				752
				753	static void enqueue_task_dl(struct rq rq, struct task_struct p, int flags)
				754	{
				755	/*
				756	* If p is throttled, we do nothing. In fact, if it exhausted
				757	* its budget it needs a replenishment and, since it now is on
				758	* its rq, the bandwidth timer callback (which clearly has not
				759	* run yet) will take care of this.
				760	*/
				761	if (p->dl.dl_throttled)
				762	return;
				763
				764	enqueue_dl_entity(&p->dl, flags);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	765
				766	if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
				767	enqueue_pushable_dl_task(rq, p);
				768
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	769	inc_nr_running(rq);
				770	}
				771
				772	static void __dequeue_task_dl(struct rq rq, struct task_struct p, int flags)
				773	{
				774	dequeue_dl_entity(&p->dl);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	775	dequeue_pushable_dl_task(rq, p);
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	776	}
				777
				778	static void dequeue_task_dl(struct rq rq, struct task_struct p, int flags)
				779	{
				780	update_curr_dl(rq);
				781	__dequeue_task_dl(rq, p, flags);
				782
				783	dec_nr_running(rq);
				784	}
				785
				786	/*
				787	* Yield task semantic for -deadline tasks is:
				788	*
				789	* get off from the CPU until our next instance, with
				790	* a new runtime. This is of little use now, since we
				791	* don't have a bandwidth reclaiming mechanism. Anyway,
				792	* bandwidth reclaiming is planned for the future, and
				793	* yield_task_dl will indicate that some spare budget
				794	* is available for other task instances to use it.
				795	*/
				796	static void yield_task_dl(struct rq *rq)
				797	{
				798	struct task_struct *p = rq->curr;
				799
				800	/*
				801	* We make the task go to sleep until its current deadline by
				802	* forcing its runtime to zero. This way, update_curr_dl() stops
				803	* it and the bandwidth timer will wake it up and will give it
				804	* new scheduling parameters (thanks to dl_new=1).
				805	*/
				806	if (p->dl.runtime > 0) {
				807	rq->curr->dl.dl_new = 1;
				808	p->dl.runtime = 0;
				809	}
				810	update_curr_dl(rq);
				811	}
				812
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	813	#ifdef CONFIG_SMP
				814
				815	static int find_later_rq(struct task_struct *task);
				816	static int latest_cpu_find(struct cpumask *span,
				817	struct task_struct *task,
				818	struct cpumask *later_mask);
				819
				820	static int
				821	select_task_rq_dl(struct task_struct *p, int cpu, int sd_flag, int flags)
				822	{
				823	struct task_struct *curr;
				824	struct rq *rq;
				825
				826	if (sd_flag != SD_BALANCE_WAKE && sd_flag != SD_BALANCE_FORK)
				827	goto out;
				828
				829	rq = cpu_rq(cpu);
				830
				831	rcu_read_lock();
				832	curr = ACCESS_ONCE(rq->curr); /* unlocked access */
				833
				834	/*
				835	* If we are dealing with a -deadline task, we must
				836	* decide where to wake it up.
				837	* If it has a later deadline and the current task
				838	* on this rq can't move (provided the waking task
				839	* can!) we prefer to send it somewhere else. On the
				840	* other hand, if it has a shorter deadline, we
				841	* try to make it stay here, it might be important.
				842	*/
				843	if (unlikely(dl_task(curr)) &&
				844	(curr->nr_cpus_allowed < 2 \|\|
				845	!dl_entity_preempt(&p->dl, &curr->dl)) &&
				846	(p->nr_cpus_allowed > 1)) {
				847	int target = find_later_rq(p);
				848
				849	if (target != -1)
				850	cpu = target;
				851	}
				852	rcu_read_unlock();
				853
				854	out:
				855	return cpu;
				856	}
				857
				858	static void check_preempt_equal_dl(struct rq rq, struct task_struct p)
				859	{
				860	/*
				861	* Current can't be migrated, useless to reschedule,
				862	* let's hope p can move out.
				863	*/
				864	if (rq->curr->nr_cpus_allowed == 1 \|\|
				865	latest_cpu_find(rq->rd->span, rq->curr, NULL) == -1)
				866	return;
				867
				868	/*
				869	* p is migratable, so let's not schedule it and
				870	* see if it is pushed or pulled somewhere else.
				871	*/
				872	if (p->nr_cpus_allowed != 1 &&
				873	latest_cpu_find(rq->rd->span, p, NULL) != -1)
				874	return;
				875
				876	resched_task(rq->curr);
				877	}
				878
				879	#endif /* CONFIG_SMP */
				880
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	881	/*
				882	* Only called when both the current and waking task are -deadline
				883	* tasks.
				884	*/
				885	static void check_preempt_curr_dl(struct rq rq, struct task_struct p,
				886	int flags)
				887	{
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	888	if (dl_entity_preempt(&p->dl, &rq->curr->dl)) {
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	889	resched_task(rq->curr);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	890	return;
				891	}
				892
				893	#ifdef CONFIG_SMP
				894	/*
				895	* In the unlikely case current and p have the same deadline
				896	* let us try to decide what's the best thing to do...
				897	*/
				898	if ((s64)(p->dl.deadline - rq->curr->dl.deadline) == 0 &&
				899	!need_resched())
				900	check_preempt_equal_dl(rq, p);
				901	#endif /* CONFIG_SMP */
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	902	}
				903
				904	#ifdef CONFIG_SCHED_HRTICK
				905	static void start_hrtick_dl(struct rq rq, struct task_struct p)
				906	{
				907	s64 delta = p->dl.dl_runtime - p->dl.runtime;
				908
				909	if (delta > 10000)
				910	hrtick_start(rq, p->dl.runtime);
				911	}
				912	#endif
				913
				914	static struct sched_dl_entity pick_next_dl_entity(struct rq rq,
				915	struct dl_rq *dl_rq)
				916	{
				917	struct rb_node *left = dl_rq->rb_leftmost;
				918
				919	if (!left)
				920	return NULL;
				921
				922	return rb_entry(left, struct sched_dl_entity, rb_node);
				923	}
				924
				925	struct task_struct pick_next_task_dl(struct rq rq)
				926	{
				927	struct sched_dl_entity *dl_se;
				928	struct task_struct *p;
				929	struct dl_rq *dl_rq;
				930
				931	dl_rq = &rq->dl;
				932
				933	if (unlikely(!dl_rq->dl_nr_running))
				934	return NULL;
				935
				936	dl_se = pick_next_dl_entity(rq, dl_rq);
				937	BUG_ON(!dl_se);
				938
				939	p = dl_task_of(dl_se);
				940	p->se.exec_start = rq_clock_task(rq);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	941
				942	/* Running task will never be pushed. */
				943	if (p)
				944	dequeue_pushable_dl_task(rq, p);
				945
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	946	#ifdef CONFIG_SCHED_HRTICK
				947	if (hrtick_enabled(rq))
				948	start_hrtick_dl(rq, p);
				949	#endif
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	950
				951	#ifdef CONFIG_SMP
				952	rq->post_schedule = has_pushable_dl_tasks(rq);
				953	#endif /* CONFIG_SMP */
				954
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	955	return p;
				956	}
				957
				958	static void put_prev_task_dl(struct rq rq, struct task_struct p)
				959	{
				960	update_curr_dl(rq);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	961
				962	if (on_dl_rq(&p->dl) && p->nr_cpus_allowed > 1)
				963	enqueue_pushable_dl_task(rq, p);
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	964	}
				965
				966	static void task_tick_dl(struct rq rq, struct task_struct p, int queued)
				967	{
				968	update_curr_dl(rq);
				969
				970	#ifdef CONFIG_SCHED_HRTICK
				971	if (hrtick_enabled(rq) && queued && p->dl.runtime > 0)
				972	start_hrtick_dl(rq, p);
				973	#endif
				974	}
				975
				976	static void task_fork_dl(struct task_struct *p)
				977	{
				978	/*
				979	* SCHED_DEADLINE tasks cannot fork and this is achieved through
				980	* sched_fork()
				981	*/
				982	}
				983
				984	static void task_dead_dl(struct task_struct *p)
				985	{
				986	struct hrtimer *timer = &p->dl.dl_timer;
				987
				988	if (hrtimer_active(timer))
				989	hrtimer_try_to_cancel(timer);
				990	}
				991
				992	static void set_curr_task_dl(struct rq *rq)
				993	{
				994	struct task_struct *p = rq->curr;
				995
				996	p->se.exec_start = rq_clock_task(rq);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	997
				998	/* You can't push away the running task */
				999	dequeue_pushable_dl_task(rq, p);
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1000	}
				1001
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1002	#ifdef CONFIG_SMP
				1003
				1004	/* Only try algorithms three times */
				1005	#define DL_MAX_TRIES 3
				1006
				1007	static int pick_dl_task(struct rq rq, struct task_struct p, int cpu)
				1008	{
				1009	if (!task_running(rq, p) &&
				1010	(cpu < 0 \|\| cpumask_test_cpu(cpu, &p->cpus_allowed)) &&
				1011	(p->nr_cpus_allowed > 1))
				1012	return 1;
				1013
				1014	return 0;
				1015	}
				1016
				1017	/* Returns the second earliest -deadline task, NULL otherwise */
				1018	static struct task_struct pick_next_earliest_dl_task(struct rq rq, int cpu)
				1019	{
				1020	struct rb_node *next_node = rq->dl.rb_leftmost;
				1021	struct sched_dl_entity *dl_se;
				1022	struct task_struct *p = NULL;
				1023
				1024	next_node:
				1025	next_node = rb_next(next_node);
				1026	if (next_node) {
				1027	dl_se = rb_entry(next_node, struct sched_dl_entity, rb_node);
				1028	p = dl_task_of(dl_se);
				1029
				1030	if (pick_dl_task(rq, p, cpu))
				1031	return p;
				1032
				1033	goto next_node;
				1034	}
				1035
				1036	return NULL;
				1037	}
				1038
				1039	static int latest_cpu_find(struct cpumask *span,
				1040	struct task_struct *task,
				1041	struct cpumask *later_mask)
				1042	{
				1043	const struct sched_dl_entity *dl_se = &task->dl;
				1044	int cpu, found = -1, best = 0;
				1045	u64 max_dl = 0;
				1046
				1047	for_each_cpu(cpu, span) {
				1048	struct rq *rq = cpu_rq(cpu);
				1049	struct dl_rq *dl_rq = &rq->dl;
				1050
				1051	if (cpumask_test_cpu(cpu, &task->cpus_allowed) &&
				1052	(!dl_rq->dl_nr_running \|\| dl_time_before(dl_se->deadline,
				1053	dl_rq->earliest_dl.curr))) {
				1054	if (later_mask)
				1055	cpumask_set_cpu(cpu, later_mask);
				1056	if (!best && !dl_rq->dl_nr_running) {
				1057	best = 1;
				1058	found = cpu;
				1059	} else if (!best &&
				1060	dl_time_before(max_dl,
				1061	dl_rq->earliest_dl.curr)) {
				1062	max_dl = dl_rq->earliest_dl.curr;
				1063	found = cpu;
				1064	}
				1065	} else if (later_mask)
				1066	cpumask_clear_cpu(cpu, later_mask);
				1067	}
				1068
				1069	return found;
				1070	}
				1071
				1072	static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask_dl);
				1073
				1074	static int find_later_rq(struct task_struct *task)
				1075	{
				1076	struct sched_domain *sd;
				1077	struct cpumask *later_mask = __get_cpu_var(local_cpu_mask_dl);
				1078	int this_cpu = smp_processor_id();
				1079	int best_cpu, cpu = task_cpu(task);
				1080
				1081	/* Make sure the mask is initialized first */
				1082	if (unlikely(!later_mask))
				1083	return -1;
				1084
				1085	if (task->nr_cpus_allowed == 1)
				1086	return -1;
				1087
				1088	best_cpu = latest_cpu_find(task_rq(task)->rd->span, task, later_mask);
				1089	if (best_cpu == -1)
				1090	return -1;
				1091
				1092	/*
				1093	* If we are here, some target has been found,
				1094	* the most suitable of which is cached in best_cpu.
				1095	* This is, among the runqueues where the current tasks
				1096	* have later deadlines than the task's one, the rq
				1097	* with the latest possible one.
				1098	*
				1099	* Now we check how well this matches with task's
				1100	* affinity and system topology.
				1101	*
				1102	* The last cpu where the task run is our first
				1103	* guess, since it is most likely cache-hot there.
				1104	*/
				1105	if (cpumask_test_cpu(cpu, later_mask))
				1106	return cpu;
				1107	/*
				1108	* Check if this_cpu is to be skipped (i.e., it is
				1109	* not in the mask) or not.
				1110	*/
				1111	if (!cpumask_test_cpu(this_cpu, later_mask))
				1112	this_cpu = -1;
				1113
				1114	rcu_read_lock();
				1115	for_each_domain(cpu, sd) {
				1116	if (sd->flags & SD_WAKE_AFFINE) {
				1117
				1118	/*
				1119	* If possible, preempting this_cpu is
				1120	* cheaper than migrating.
				1121	*/
				1122	if (this_cpu != -1 &&
				1123	cpumask_test_cpu(this_cpu, sched_domain_span(sd))) {
				1124	rcu_read_unlock();
				1125	return this_cpu;
				1126	}
				1127
				1128	/*
				1129	* Last chance: if best_cpu is valid and is
				1130	* in the mask, that becomes our choice.
				1131	*/
				1132	if (best_cpu < nr_cpu_ids &&
				1133	cpumask_test_cpu(best_cpu, sched_domain_span(sd))) {
				1134	rcu_read_unlock();
				1135	return best_cpu;
				1136	}
				1137	}
				1138	}
				1139	rcu_read_unlock();
				1140
				1141	/*
				1142	* At this point, all our guesses failed, we just return
				1143	* 'something', and let the caller sort the things out.
				1144	*/
				1145	if (this_cpu != -1)
				1146	return this_cpu;
				1147
				1148	cpu = cpumask_any(later_mask);
				1149	if (cpu < nr_cpu_ids)
				1150	return cpu;
				1151
				1152	return -1;
				1153	}
				1154
				1155	/* Locks the rq it finds */
				1156	static struct rq find_lock_later_rq(struct task_struct task, struct rq *rq)
				1157	{
				1158	struct rq *later_rq = NULL;
				1159	int tries;
				1160	int cpu;
				1161
				1162	for (tries = 0; tries < DL_MAX_TRIES; tries++) {
				1163	cpu = find_later_rq(task);
				1164
				1165	if ((cpu == -1) \|\| (cpu == rq->cpu))
				1166	break;
				1167
				1168	later_rq = cpu_rq(cpu);
				1169
				1170	/* Retry if something changed. */
				1171	if (double_lock_balance(rq, later_rq)) {
				1172	if (unlikely(task_rq(task) != rq \|\|
				1173	!cpumask_test_cpu(later_rq->cpu,
				1174	&task->cpus_allowed) \|\|
				1175	task_running(rq, task) \|\| !task->on_rq)) {
				1176	double_unlock_balance(rq, later_rq);
				1177	later_rq = NULL;
				1178	break;
				1179	}
				1180	}
				1181
				1182	/*
				1183	* If the rq we found has no -deadline task, or
				1184	* its earliest one has a later deadline than our
				1185	* task, the rq is a good one.
				1186	*/
				1187	if (!later_rq->dl.dl_nr_running \|\|
				1188	dl_time_before(task->dl.deadline,
				1189	later_rq->dl.earliest_dl.curr))
				1190	break;
				1191
				1192	/* Otherwise we try again. */
				1193	double_unlock_balance(rq, later_rq);
				1194	later_rq = NULL;
				1195	}
				1196
				1197	return later_rq;
				1198	}
				1199
				1200	static struct task_struct pick_next_pushable_dl_task(struct rq rq)
				1201	{
				1202	struct task_struct *p;
				1203
				1204	if (!has_pushable_dl_tasks(rq))
				1205	return NULL;
				1206
				1207	p = rb_entry(rq->dl.pushable_dl_tasks_leftmost,
				1208	struct task_struct, pushable_dl_tasks);
				1209
				1210	BUG_ON(rq->cpu != task_cpu(p));
				1211	BUG_ON(task_current(rq, p));
				1212	BUG_ON(p->nr_cpus_allowed <= 1);
				1213
				1214	BUG_ON(!p->se.on_rq);
				1215	BUG_ON(!dl_task(p));
				1216
				1217	return p;
				1218	}
				1219
				1220	/*
				1221	* See if the non running -deadline tasks on this rq
				1222	* can be sent to some other CPU where they can preempt
				1223	* and start executing.
				1224	*/
				1225	static int push_dl_task(struct rq *rq)
				1226	{
				1227	struct task_struct *next_task;
				1228	struct rq *later_rq;
				1229
				1230	if (!rq->dl.overloaded)
				1231	return 0;
				1232
				1233	next_task = pick_next_pushable_dl_task(rq);
				1234	if (!next_task)
				1235	return 0;
				1236
				1237	retry:
				1238	if (unlikely(next_task == rq->curr)) {
				1239	WARN_ON(1);
				1240	return 0;
				1241	}
				1242
				1243	/*
				1244	* If next_task preempts rq->curr, and rq->curr
				1245	* can move away, it makes sense to just reschedule
				1246	* without going further in pushing next_task.
				1247	*/
				1248	if (dl_task(rq->curr) &&
				1249	dl_time_before(next_task->dl.deadline, rq->curr->dl.deadline) &&
				1250	rq->curr->nr_cpus_allowed > 1) {
				1251	resched_task(rq->curr);
				1252	return 0;
				1253	}
				1254
				1255	/* We might release rq lock */
				1256	get_task_struct(next_task);
				1257
				1258	/* Will lock the rq it'll find */
				1259	later_rq = find_lock_later_rq(next_task, rq);
				1260	if (!later_rq) {
				1261	struct task_struct *task;
				1262
				1263	/*
				1264	* We must check all this again, since
				1265	* find_lock_later_rq releases rq->lock and it is
				1266	* then possible that next_task has migrated.
				1267	*/
				1268	task = pick_next_pushable_dl_task(rq);
				1269	if (task_cpu(next_task) == rq->cpu && task == next_task) {
				1270	/*
				1271	* The task is still there. We don't try
				1272	* again, some other cpu will pull it when ready.
				1273	*/
				1274	dequeue_pushable_dl_task(rq, next_task);
				1275	goto out;
				1276	}
				1277
				1278	if (!task)
				1279	/* No more tasks */
				1280	goto out;
				1281
				1282	put_task_struct(next_task);
				1283	next_task = task;
				1284	goto retry;
				1285	}
				1286
				1287	deactivate_task(rq, next_task, 0);
				1288	set_task_cpu(next_task, later_rq->cpu);
				1289	activate_task(later_rq, next_task, 0);
				1290
				1291	resched_task(later_rq->curr);
				1292
				1293	double_unlock_balance(rq, later_rq);
				1294
				1295	out:
				1296	put_task_struct(next_task);
				1297
				1298	return 1;
				1299	}
				1300
				1301	static void push_dl_tasks(struct rq *rq)
				1302	{
				1303	/* Terminates as it moves a -deadline task */
				1304	while (push_dl_task(rq))
				1305	;
				1306	}
				1307
				1308	static int pull_dl_task(struct rq *this_rq)
				1309	{
				1310	int this_cpu = this_rq->cpu, ret = 0, cpu;
				1311	struct task_struct *p;
				1312	struct rq *src_rq;
				1313	u64 dmin = LONG_MAX;
				1314
				1315	if (likely(!dl_overloaded(this_rq)))
				1316	return 0;
				1317
				1318	/*
				1319	* Match the barrier from dl_set_overloaded; this guarantees that if we
				1320	* see overloaded we must also see the dlo_mask bit.
				1321	*/
				1322	smp_rmb();
				1323
				1324	for_each_cpu(cpu, this_rq->rd->dlo_mask) {
				1325	if (this_cpu == cpu)
				1326	continue;
				1327
				1328	src_rq = cpu_rq(cpu);
				1329
				1330	/*
				1331	* It looks racy, abd it is! However, as in sched_rt.c,
				1332	* we are fine with this.
				1333	*/
				1334	if (this_rq->dl.dl_nr_running &&
				1335	dl_time_before(this_rq->dl.earliest_dl.curr,
				1336	src_rq->dl.earliest_dl.next))
				1337	continue;
				1338
				1339	/* Might drop this_rq->lock */
				1340	double_lock_balance(this_rq, src_rq);
				1341
				1342	/*
				1343	* If there are no more pullable tasks on the
				1344	* rq, we're done with it.
				1345	*/
				1346	if (src_rq->dl.dl_nr_running <= 1)
				1347	goto skip;
				1348
				1349	p = pick_next_earliest_dl_task(src_rq, this_cpu);
				1350
				1351	/*
				1352	* We found a task to be pulled if:
				1353	* - it preempts our current (if there's one),
				1354	* - it will preempt the last one we pulled (if any).
				1355	*/
				1356	if (p && dl_time_before(p->dl.deadline, dmin) &&
				1357	(!this_rq->dl.dl_nr_running \|\|
				1358	dl_time_before(p->dl.deadline,
				1359	this_rq->dl.earliest_dl.curr))) {
				1360	WARN_ON(p == src_rq->curr);
				1361	WARN_ON(!p->se.on_rq);
				1362
				1363	/*
				1364	* Then we pull iff p has actually an earlier
				1365	* deadline than the current task of its runqueue.
				1366	*/
				1367	if (dl_time_before(p->dl.deadline,
				1368	src_rq->curr->dl.deadline))
				1369	goto skip;
				1370
				1371	ret = 1;
				1372
				1373	deactivate_task(src_rq, p, 0);
				1374	set_task_cpu(p, this_cpu);
				1375	activate_task(this_rq, p, 0);
				1376	dmin = p->dl.deadline;
				1377
				1378	/* Is there any other task even earlier? */
				1379	}
				1380	skip:
				1381	double_unlock_balance(this_rq, src_rq);
				1382	}
				1383
				1384	return ret;
				1385	}
				1386
				1387	static void pre_schedule_dl(struct rq rq, struct task_struct prev)
				1388	{
				1389	/* Try to pull other tasks here */
				1390	if (dl_task(prev))
				1391	pull_dl_task(rq);
				1392	}
				1393
				1394	static void post_schedule_dl(struct rq *rq)
				1395	{
				1396	push_dl_tasks(rq);
				1397	}
				1398
				1399	/*
				1400	* Since the task is not running and a reschedule is not going to happen
				1401	* anytime soon on its runqueue, we try pushing it away now.
				1402	*/
				1403	static void task_woken_dl(struct rq rq, struct task_struct p)
				1404	{
				1405	if (!task_running(rq, p) &&
				1406	!test_tsk_need_resched(rq->curr) &&
				1407	has_pushable_dl_tasks(rq) &&
				1408	p->nr_cpus_allowed > 1 &&
				1409	dl_task(rq->curr) &&
				1410	(rq->curr->nr_cpus_allowed < 2 \|\|
				1411	dl_entity_preempt(&rq->curr->dl, &p->dl))) {
				1412	push_dl_tasks(rq);
				1413	}
				1414	}
				1415
				1416	static void set_cpus_allowed_dl(struct task_struct *p,
				1417	const struct cpumask *new_mask)
				1418	{
				1419	struct rq *rq;
				1420	int weight;
				1421
				1422	BUG_ON(!dl_task(p));
				1423
				1424	/*
				1425	* Update only if the task is actually running (i.e.,
				1426	* it is on the rq AND it is not throttled).
				1427	*/
				1428	if (!on_dl_rq(&p->dl))
				1429	return;
				1430
				1431	weight = cpumask_weight(new_mask);
				1432
				1433	/*
				1434	* Only update if the process changes its state from whether it
				1435	* can migrate or not.
				1436	*/
				1437	if ((p->nr_cpus_allowed > 1) == (weight > 1))
				1438	return;
				1439
				1440	rq = task_rq(p);
				1441
				1442	/*
				1443	* The process used to be able to migrate OR it can now migrate
				1444	*/
				1445	if (weight <= 1) {
				1446	if (!task_current(rq, p))
				1447	dequeue_pushable_dl_task(rq, p);
				1448	BUG_ON(!rq->dl.dl_nr_migratory);
				1449	rq->dl.dl_nr_migratory--;
				1450	} else {
				1451	if (!task_current(rq, p))
				1452	enqueue_pushable_dl_task(rq, p);
				1453	rq->dl.dl_nr_migratory++;
				1454	}
				1455
				1456	update_dl_migration(&rq->dl);
				1457	}
				1458
				1459	/* Assumes rq->lock is held */
				1460	static void rq_online_dl(struct rq *rq)
				1461	{
				1462	if (rq->dl.overloaded)
				1463	dl_set_overload(rq);
				1464	}
				1465
				1466	/* Assumes rq->lock is held */
				1467	static void rq_offline_dl(struct rq *rq)
				1468	{
				1469	if (rq->dl.overloaded)
				1470	dl_clear_overload(rq);
				1471	}
				1472
				1473	void init_sched_dl_class(void)
				1474	{
				1475	unsigned int i;
				1476
				1477	for_each_possible_cpu(i)
				1478	zalloc_cpumask_var_node(&per_cpu(local_cpu_mask_dl, i),
				1479	GFP_KERNEL, cpu_to_node(i));
				1480	}
				1481
				1482	#endif /* CONFIG_SMP */
				1483
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1484	static void switched_from_dl(struct rq rq, struct task_struct p)
				1485	{
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1486	if (hrtimer_active(&p->dl.dl_timer) && !dl_policy(p->policy))
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1487	hrtimer_try_to_cancel(&p->dl.dl_timer);
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1488
				1489	#ifdef CONFIG_SMP
				1490	/*
				1491	* Since this might be the only -deadline task on the rq,
				1492	* this is the right place to try to pull some other one
				1493	* from an overloaded cpu, if any.
				1494	*/
				1495	if (!rq->dl.dl_nr_running)
				1496	pull_dl_task(rq);
				1497	#endif
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1498	}
				1499
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1500	/*
				1501	* When switching to -deadline, we may overload the rq, then
				1502	* we try to push someone off, if possible.
				1503	*/
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1504	static void switched_to_dl(struct rq rq, struct task_struct p)
				1505	{
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1506	int check_resched = 1;
				1507
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1508	/*
				1509	* If p is throttled, don't consider the possibility
				1510	* of preempting rq->curr, the check will be done right
				1511	* after its runtime will get replenished.
				1512	*/
				1513	if (unlikely(p->dl.dl_throttled))
				1514	return;
				1515
				1516	if (p->on_rq \|\| rq->curr != p) {
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1517	#ifdef CONFIG_SMP
				1518	if (rq->dl.overloaded && push_dl_task(rq) && rq != task_rq(p))
				1519	/* Only reschedule if pushing failed */
				1520	check_resched = 0;
				1521	#endif /* CONFIG_SMP */
				1522	if (check_resched && task_has_dl_policy(rq->curr))
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1523	check_preempt_curr_dl(rq, p, 0);
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1524	}
				1525	}
				1526
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1527	/*
				1528	* If the scheduling parameters of a -deadline task changed,
				1529	* a push or pull operation might be needed.
				1530	*/
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1531	static void prio_changed_dl(struct rq rq, struct task_struct p,
				1532	int oldprio)
				1533	{
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1534	if (p->on_rq \|\| rq->curr == p) {
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1535	#ifdef CONFIG_SMP
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1536	/*
				1537	* This might be too much, but unfortunately
				1538	* we don't have the old deadline value, and
				1539	* we can't argue if the task is increasing
				1540	* or lowering its prio, so...
				1541	*/
				1542	if (!rq->dl.overloaded)
				1543	pull_dl_task(rq);
				1544
				1545	/*
				1546	* If we now have a earlier deadline task than p,
				1547	* then reschedule, provided p is still on this
				1548	* runqueue.
				1549	*/
				1550	if (dl_time_before(rq->dl.earliest_dl.curr, p->dl.deadline) &&
				1551	rq->curr == p)
				1552	resched_task(p);
				1553	#else
				1554	/*
				1555	* Again, we don't know if p has a earlier
				1556	* or later deadline, so let's blindly set a
				1557	* (maybe not needed) rescheduling point.
				1558	*/
				1559	resched_task(p);
				1560	#endif /* CONFIG_SMP */
				1561	} else
				1562	switched_to_dl(rq, p);
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1563	}
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1564
				1565	const struct sched_class dl_sched_class = {
				1566	.next = &rt_sched_class,
				1567	.enqueue_task = enqueue_task_dl,
				1568	.dequeue_task = dequeue_task_dl,
				1569	.yield_task = yield_task_dl,
				1570
				1571	.check_preempt_curr = check_preempt_curr_dl,
				1572
				1573	.pick_next_task = pick_next_task_dl,
				1574	.put_prev_task = put_prev_task_dl,
				1575
				1576	#ifdef CONFIG_SMP
				1577	.select_task_rq = select_task_rq_dl,
Juri Lelli	1baca4c	2013-11-07 14:43:38 +0100	[diff] [blame]	1578	.set_cpus_allowed = set_cpus_allowed_dl,
				1579	.rq_online = rq_online_dl,
				1580	.rq_offline = rq_offline_dl,
				1581	.pre_schedule = pre_schedule_dl,
				1582	.post_schedule = post_schedule_dl,
				1583	.task_woken = task_woken_dl,
Dario Faggioli	aab03e0	2013-11-28 11:14:43 +0100	[diff] [blame]	1584	#endif
				1585
				1586	.set_curr_task = set_curr_task_dl,
				1587	.task_tick = task_tick_dl,
				1588	.task_fork = task_fork_dl,
				1589	.task_dead = task_dead_dl,
				1590
				1591	.prio_changed = prio_changed_dl,
				1592	.switched_from = switched_from_dl,
				1593	.switched_to = switched_to_dl,
				1594	};