sched/deadline: Fix the update of the total -deadline utilization
Now that the inactive timer can be armed to fire at the 0-lag time,
it is possible to use inactive_task_timer() to update the total
-deadline utilization (dl_b->total_bw) at the correct time, fixing
dl_overflow() and __setparam_dl().
Tested-by: Daniel Bristot de Oliveira <bristot@redhat.com>
Signed-off-by: Luca Abeni <luca.abeni@santannapisa.it>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Claudio Scordino <claudio@evidence.eu.com>
Cc: Joel Fernandes <joelaf@google.com>
Cc: Juri Lelli <juri.lelli@arm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tommaso Cucinotta <tommaso.cucinotta@sssup.it>
Link: http://lkml.kernel.org/r/1495138417-6203-4-git-send-email-luca.abeni@santannapisa.it
Signed-off-by: Ingo Molnar <mingo@kernel.org>
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 968c655..126339d 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2475,9 +2475,6 @@ static inline int dl_bw_cpus(int i)
* allocated bandwidth to reflect the new situation.
*
* This function is called while holding p's rq->lock.
- *
- * XXX we should delay bw change until the task's 0-lag point, see
- * __setparam_dl().
*/
static int dl_overflow(struct task_struct *p, int policy,
const struct sched_attr *attr)
@@ -2502,16 +2499,29 @@ static int dl_overflow(struct task_struct *p, int policy,
cpus = dl_bw_cpus(task_cpu(p));
if (dl_policy(policy) && !task_has_dl_policy(p) &&
!__dl_overflow(dl_b, cpus, 0, new_bw)) {
+ if (hrtimer_active(&p->dl.inactive_timer))
+ __dl_clear(dl_b, p->dl.dl_bw);
__dl_add(dl_b, new_bw);
err = 0;
} else if (dl_policy(policy) && task_has_dl_policy(p) &&
!__dl_overflow(dl_b, cpus, p->dl.dl_bw, new_bw)) {
+ /*
+ * XXX this is slightly incorrect: when the task
+ * utilization decreases, we should delay the total
+ * utilization change until the task's 0-lag point.
+ * But this would require to set the task's "inactive
+ * timer" when the task is not inactive.
+ */
__dl_clear(dl_b, p->dl.dl_bw);
__dl_add(dl_b, new_bw);
dl_change_utilization(p, new_bw);
err = 0;
} else if (!dl_policy(policy) && task_has_dl_policy(p)) {
- __dl_clear(dl_b, p->dl.dl_bw);
+ /*
+ * Do not decrease the total deadline utilization here,
+ * switched_from_dl() will take care to do it at the correct
+ * (0-lag) time.
+ */
err = 0;
}
raw_spin_unlock(&dl_b->lock);
@@ -4020,26 +4030,6 @@ __setparam_dl(struct task_struct *p, const struct sched_attr *attr)
dl_se->dl_period = attr->sched_period ?: dl_se->dl_deadline;
dl_se->flags = attr->sched_flags;
dl_se->dl_bw = to_ratio(dl_se->dl_period, dl_se->dl_runtime);
-
- /*
- * Changing the parameters of a task is 'tricky' and we're not doing
- * the correct thing -- also see task_dead_dl() and switched_from_dl().
- *
- * What we SHOULD do is delay the bandwidth release until the 0-lag
- * point. This would include retaining the task_struct until that time
- * and change dl_overflow() to not immediately decrement the current
- * amount.
- *
- * Instead we retain the current runtime/deadline and let the new
- * parameters take effect after the current reservation period lapses.
- * This is safe (albeit pessimistic) because the 0-lag point is always
- * before the current scheduling deadline.
- *
- * We can still have temporary overloads because we do not delay the
- * change in bandwidth until that time; so admission control is
- * not on the safe side. It does however guarantee tasks will never
- * consume more than promised.
- */
}
/*
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 6480a92..add9cba 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -175,8 +175,14 @@ static void task_non_contending(struct task_struct *p)
if (zerolag_time < 0) {
if (dl_task(p))
sub_running_bw(dl_se->dl_bw, dl_rq);
- if (!dl_task(p) || p->state == TASK_DEAD)
+ if (!dl_task(p) || p->state == TASK_DEAD) {
+ struct dl_bw *dl_b = dl_bw_of(task_cpu(p));
+
+ raw_spin_lock(&dl_b->lock);
+ __dl_clear(dl_b, p->dl.dl_bw);
__dl_clear_params(p);
+ raw_spin_unlock(&dl_b->lock);
+ }
return;
}
@@ -1004,10 +1010,16 @@ static enum hrtimer_restart inactive_task_timer(struct hrtimer *timer)
rq = task_rq_lock(p, &rf);
if (!dl_task(p) || p->state == TASK_DEAD) {
+ struct dl_bw *dl_b = dl_bw_of(task_cpu(p));
+
if (p->state == TASK_DEAD && dl_se->dl_non_contending) {
sub_running_bw(p->dl.dl_bw, dl_rq_of_se(&p->dl));
dl_se->dl_non_contending = 0;
}
+
+ raw_spin_lock(&dl_b->lock);
+ __dl_clear(dl_b, p->dl.dl_bw);
+ raw_spin_unlock(&dl_b->lock);
__dl_clear_params(p);
goto unlock;
@@ -1534,19 +1546,6 @@ static void task_fork_dl(struct task_struct *p)
*/
}
-static void task_dead_dl(struct task_struct *p)
-{
- struct dl_bw *dl_b = dl_bw_of(task_cpu(p));
-
- /*
- * Since we are TASK_DEAD we won't slip out of the domain!
- */
- raw_spin_lock_irq(&dl_b->lock);
- /* XXX we should retain the bw until 0-lag */
- dl_b->total_bw -= p->dl.dl_bw;
- raw_spin_unlock_irq(&dl_b->lock);
-}
-
static void set_curr_task_dl(struct rq *rq)
{
struct task_struct *p = rq->curr;
@@ -2141,7 +2140,6 @@ const struct sched_class dl_sched_class = {
.set_curr_task = set_curr_task_dl,
.task_tick = task_tick_dl,
.task_fork = task_fork_dl,
- .task_dead = task_dead_dl,
.prio_changed = prio_changed_dl,
.switched_from = switched_from_dl,