sched: Add a timer to handle CFS bandwidth refresh
This patch adds a per-task_group timer which handles the refresh of the global
CFS bandwidth pool.
Since the RT pool is using a similar timer there's some small refactoring to
share this support.
Signed-off-by: Paul Turner <pjt@google.com>
Reviewed-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20110721184757.277271273@google.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/kernel/sched.c b/kernel/sched.c
index 35561c6..34bf8e6 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -196,10 +196,28 @@
return sysctl_sched_rt_runtime >= 0;
}
+static void start_bandwidth_timer(struct hrtimer *period_timer, ktime_t period)
+{
+ unsigned long delta;
+ ktime_t soft, hard, now;
+
+ for (;;) {
+ if (hrtimer_active(period_timer))
+ break;
+
+ now = hrtimer_cb_get_time(period_timer);
+ hrtimer_forward(period_timer, now, period);
+
+ soft = hrtimer_get_softexpires(period_timer);
+ hard = hrtimer_get_expires(period_timer);
+ delta = ktime_to_ns(ktime_sub(hard, soft));
+ __hrtimer_start_range_ns(period_timer, soft, delta,
+ HRTIMER_MODE_ABS_PINNED, 0);
+ }
+}
+
static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
{
- ktime_t now;
-
if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF)
return;
@@ -207,22 +225,7 @@
return;
raw_spin_lock(&rt_b->rt_runtime_lock);
- for (;;) {
- unsigned long delta;
- ktime_t soft, hard;
-
- if (hrtimer_active(&rt_b->rt_period_timer))
- break;
-
- now = hrtimer_cb_get_time(&rt_b->rt_period_timer);
- hrtimer_forward(&rt_b->rt_period_timer, now, rt_b->rt_period);
-
- soft = hrtimer_get_softexpires(&rt_b->rt_period_timer);
- hard = hrtimer_get_expires(&rt_b->rt_period_timer);
- delta = ktime_to_ns(ktime_sub(hard, soft));
- __hrtimer_start_range_ns(&rt_b->rt_period_timer, soft, delta,
- HRTIMER_MODE_ABS_PINNED, 0);
- }
+ start_bandwidth_timer(&rt_b->rt_period_timer, rt_b->rt_period);
raw_spin_unlock(&rt_b->rt_runtime_lock);
}
@@ -253,6 +256,9 @@
ktime_t period;
u64 quota, runtime;
s64 hierarchal_quota;
+
+ int idle, timer_active;
+ struct hrtimer period_timer;
#endif
};
@@ -403,6 +409,28 @@
}
static inline u64 default_cfs_period(void);
+static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun);
+
+static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer)
+{
+ struct cfs_bandwidth *cfs_b =
+ container_of(timer, struct cfs_bandwidth, period_timer);
+ ktime_t now;
+ int overrun;
+ int idle = 0;
+
+ for (;;) {
+ now = hrtimer_cb_get_time(timer);
+ overrun = hrtimer_forward(timer, now, cfs_b->period);
+
+ if (!overrun)
+ break;
+
+ idle = do_sched_cfs_period_timer(cfs_b, overrun);
+ }
+
+ return idle ? HRTIMER_NORESTART : HRTIMER_RESTART;
+}
static void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
{
@@ -410,6 +438,9 @@
cfs_b->runtime = 0;
cfs_b->quota = RUNTIME_INF;
cfs_b->period = ns_to_ktime(default_cfs_period());
+
+ hrtimer_init(&cfs_b->period_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ cfs_b->period_timer.function = sched_cfs_period_timer;
}
static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq)
@@ -417,8 +448,34 @@
cfs_rq->runtime_enabled = 0;
}
+/* requires cfs_b->lock, may release to reprogram timer */
+static void __start_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
+{
+ /*
+ * The timer may be active because we're trying to set a new bandwidth
+ * period or because we're racing with the tear-down path
+ * (timer_active==0 becomes visible before the hrtimer call-back
+ * terminates). In either case we ensure that it's re-programmed
+ */
+ while (unlikely(hrtimer_active(&cfs_b->period_timer))) {
+ raw_spin_unlock(&cfs_b->lock);
+ /* ensure cfs_b->lock is available while we wait */
+ hrtimer_cancel(&cfs_b->period_timer);
+
+ raw_spin_lock(&cfs_b->lock);
+ /* if someone else restarted the timer then we're done */
+ if (cfs_b->timer_active)
+ return;
+ }
+
+ cfs_b->timer_active = 1;
+ start_bandwidth_timer(&cfs_b->period_timer, cfs_b->period);
+}
+
static void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
-{}
+{
+ hrtimer_cancel(&cfs_b->period_timer);
+}
#else
static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq) {}
static void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b) {}
@@ -9078,7 +9135,7 @@
static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota)
{
- int i, ret = 0;
+ int i, ret = 0, runtime_enabled;
struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(tg);
if (tg == &root_task_group)
@@ -9105,10 +9162,18 @@
if (ret)
goto out_unlock;
+ runtime_enabled = quota != RUNTIME_INF;
raw_spin_lock_irq(&cfs_b->lock);
cfs_b->period = ns_to_ktime(period);
cfs_b->quota = quota;
cfs_b->runtime = quota;
+
+ /* restart the period timer (if active) to handle new period expiry */
+ if (runtime_enabled && cfs_b->timer_active) {
+ /* force a reprogram */
+ cfs_b->timer_active = 0;
+ __start_cfs_bandwidth(cfs_b);
+ }
raw_spin_unlock_irq(&cfs_b->lock);
for_each_possible_cpu(i) {
@@ -9116,7 +9181,7 @@
struct rq *rq = rq_of(cfs_rq);
raw_spin_lock_irq(&rq->lock);
- cfs_rq->runtime_enabled = quota != RUNTIME_INF;
+ cfs_rq->runtime_enabled = runtime_enabled;
cfs_rq->runtime_remaining = 0;
raw_spin_unlock_irq(&rq->lock);
}