blob: 59371549ddf0786aaf6076324df58b8f71cc65c0 [file] [log] [blame]
Peter Zijlstra3e51f332008-05-03 18:29:28 +02001/*
2 * sched_clock for unstable cpu clocks
3 *
4 * Copyright (C) 2008 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
5 *
Steven Rostedtc300ba22008-07-09 00:15:33 -04006 * Updates and enhancements:
7 * Copyright (C) 2008 Red Hat, Inc. Steven Rostedt <srostedt@redhat.com>
8 *
Peter Zijlstra3e51f332008-05-03 18:29:28 +02009 * Based on code by:
10 * Ingo Molnar <mingo@redhat.com>
11 * Guillaume Chazarain <guichaz@gmail.com>
12 *
Peter Zijlstrac6763292010-05-25 10:48:51 +020013 *
14 * What:
15 *
16 * cpu_clock(i) provides a fast (execution time) high resolution
17 * clock with bounded drift between CPUs. The value of cpu_clock(i)
18 * is monotonic for constant i. The timestamp returned is in nanoseconds.
19 *
20 * ######################### BIG FAT WARNING ##########################
21 * # when comparing cpu_clock(i) to cpu_clock(j) for i != j, time can #
22 * # go backwards !! #
23 * ####################################################################
24 *
25 * There is no strict promise about the base, although it tends to start
26 * at 0 on boot (but people really shouldn't rely on that).
27 *
28 * cpu_clock(i) -- can be used from any context, including NMI.
Peter Zijlstrac6763292010-05-25 10:48:51 +020029 * local_clock() -- is cpu_clock() on the current cpu.
30 *
Peter Zijlstraef08f0f2013-11-28 19:31:23 +010031 * sched_clock_cpu(i)
32 *
Peter Zijlstrac6763292010-05-25 10:48:51 +020033 * How:
34 *
35 * The implementation either uses sched_clock() when
36 * !CONFIG_HAVE_UNSTABLE_SCHED_CLOCK, which means in that case the
37 * sched_clock() is assumed to provide these properties (mostly it means
38 * the architecture provides a globally synchronized highres time source).
39 *
40 * Otherwise it tries to create a semi stable clock from a mixture of other
41 * clocks, including:
42 *
43 * - GTOD (clock monotomic)
Peter Zijlstra3e51f332008-05-03 18:29:28 +020044 * - sched_clock()
45 * - explicit idle events
46 *
Peter Zijlstrac6763292010-05-25 10:48:51 +020047 * We use GTOD as base and use sched_clock() deltas to improve resolution. The
48 * deltas are filtered to provide monotonicity and keeping it within an
49 * expected window.
Peter Zijlstra3e51f332008-05-03 18:29:28 +020050 *
51 * Furthermore, explicit sleep and wakeup hooks allow us to account for time
52 * that is otherwise invisible (TSC gets stopped).
53 *
Peter Zijlstra3e51f332008-05-03 18:29:28 +020054 */
Peter Zijlstra3e51f332008-05-03 18:29:28 +020055#include <linux/spinlock.h>
Ingo Molnar6409c4d2008-05-12 21:21:14 +020056#include <linux/hardirq.h>
Paul Gortmaker9984de12011-05-23 14:51:41 -040057#include <linux/export.h>
Ingo Molnarb3425012009-02-26 20:20:29 +010058#include <linux/percpu.h>
59#include <linux/ktime.h>
60#include <linux/sched.h>
Peter Zijlstra3e51f332008-05-03 18:29:28 +020061
Hugh Dickins2c3d1032008-07-25 19:45:00 +010062/*
63 * Scheduler clock - returns current time in nanosec units.
64 * This is default implementation.
65 * Architectures and sub-architectures can override this.
66 */
67unsigned long long __attribute__((weak)) sched_clock(void)
68{
Ron92d23f72009-05-08 22:54:49 +093069 return (unsigned long long)(jiffies - INITIAL_JIFFIES)
70 * (NSEC_PER_SEC / HZ);
Hugh Dickins2c3d1032008-07-25 19:45:00 +010071}
Divyesh Shahb6ac23af2010-04-15 08:54:59 +020072EXPORT_SYMBOL_GPL(sched_clock);
Peter Zijlstra3e51f332008-05-03 18:29:28 +020073
Peter Zijlstra5bb6b1e2010-11-19 21:11:09 +010074__read_mostly int sched_clock_running;
Peter Zijlstrac1955a32008-08-11 08:59:03 +020075
Peter Zijlstra3e51f332008-05-03 18:29:28 +020076#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
Ingo Molnarb3425012009-02-26 20:20:29 +010077__read_mostly int sched_clock_stable;
Peter Zijlstra3e51f332008-05-03 18:29:28 +020078
79struct sched_clock_data {
Peter Zijlstra3e51f332008-05-03 18:29:28 +020080 u64 tick_raw;
81 u64 tick_gtod;
82 u64 clock;
83};
84
85static DEFINE_PER_CPU_SHARED_ALIGNED(struct sched_clock_data, sched_clock_data);
86
87static inline struct sched_clock_data *this_scd(void)
88{
89 return &__get_cpu_var(sched_clock_data);
90}
91
92static inline struct sched_clock_data *cpu_sdc(int cpu)
93{
94 return &per_cpu(sched_clock_data, cpu);
95}
96
97void sched_clock_init(void)
98{
99 u64 ktime_now = ktime_to_ns(ktime_get());
Peter Zijlstra3e51f332008-05-03 18:29:28 +0200100 int cpu;
101
102 for_each_possible_cpu(cpu) {
103 struct sched_clock_data *scd = cpu_sdc(cpu);
104
Peter Zijlstraa3817592008-05-29 10:07:15 +0200105 scd->tick_raw = 0;
Peter Zijlstra3e51f332008-05-03 18:29:28 +0200106 scd->tick_gtod = ktime_now;
107 scd->clock = ktime_now;
108 }
Peter Zijlstraa3817592008-05-29 10:07:15 +0200109
110 sched_clock_running = 1;
Peter Zijlstra3e51f332008-05-03 18:29:28 +0200111}
112
113/*
Ingo Molnarb3425012009-02-26 20:20:29 +0100114 * min, max except they take wrapping into account
Peter Zijlstra354879b2008-08-25 17:15:34 +0200115 */
116
117static inline u64 wrap_min(u64 x, u64 y)
118{
119 return (s64)(x - y) < 0 ? x : y;
120}
121
122static inline u64 wrap_max(u64 x, u64 y)
123{
124 return (s64)(x - y) > 0 ? x : y;
125}
126
127/*
Peter Zijlstra3e51f332008-05-03 18:29:28 +0200128 * update the percpu scd from the raw @now value
129 *
130 * - filter out backward motion
Peter Zijlstra354879b2008-08-25 17:15:34 +0200131 * - use the GTOD tick value to create a window to filter crazy TSC values
Peter Zijlstra3e51f332008-05-03 18:29:28 +0200132 */
Peter Zijlstradef0a9b2009-09-18 20:14:01 +0200133static u64 sched_clock_local(struct sched_clock_data *scd)
Peter Zijlstra3e51f332008-05-03 18:29:28 +0200134{
Peter Zijlstradef0a9b2009-09-18 20:14:01 +0200135 u64 now, clock, old_clock, min_clock, max_clock;
136 s64 delta;
Peter Zijlstra3e51f332008-05-03 18:29:28 +0200137
Peter Zijlstradef0a9b2009-09-18 20:14:01 +0200138again:
139 now = sched_clock();
140 delta = now - scd->tick_raw;
Peter Zijlstra354879b2008-08-25 17:15:34 +0200141 if (unlikely(delta < 0))
142 delta = 0;
Peter Zijlstra3e51f332008-05-03 18:29:28 +0200143
Peter Zijlstradef0a9b2009-09-18 20:14:01 +0200144 old_clock = scd->clock;
145
Peter Zijlstra354879b2008-08-25 17:15:34 +0200146 /*
147 * scd->clock = clamp(scd->tick_gtod + delta,
Ingo Molnarb3425012009-02-26 20:20:29 +0100148 * max(scd->tick_gtod, scd->clock),
149 * scd->tick_gtod + TICK_NSEC);
Peter Zijlstra354879b2008-08-25 17:15:34 +0200150 */
Peter Zijlstra3e51f332008-05-03 18:29:28 +0200151
Peter Zijlstra354879b2008-08-25 17:15:34 +0200152 clock = scd->tick_gtod + delta;
Peter Zijlstradef0a9b2009-09-18 20:14:01 +0200153 min_clock = wrap_max(scd->tick_gtod, old_clock);
154 max_clock = wrap_max(old_clock, scd->tick_gtod + TICK_NSEC);
Peter Zijlstra3e51f332008-05-03 18:29:28 +0200155
Peter Zijlstra354879b2008-08-25 17:15:34 +0200156 clock = wrap_max(clock, min_clock);
157 clock = wrap_min(clock, max_clock);
Peter Zijlstra3e51f332008-05-03 18:29:28 +0200158
Eric Dumazet152f9d02009-09-30 20:36:19 +0200159 if (cmpxchg64(&scd->clock, old_clock, clock) != old_clock)
Peter Zijlstradef0a9b2009-09-18 20:14:01 +0200160 goto again;
Ingo Molnar56b90612008-07-30 10:15:55 +0200161
Peter Zijlstradef0a9b2009-09-18 20:14:01 +0200162 return clock;
Peter Zijlstra3e51f332008-05-03 18:29:28 +0200163}
164
Peter Zijlstradef0a9b2009-09-18 20:14:01 +0200165static u64 sched_clock_remote(struct sched_clock_data *scd)
Peter Zijlstra3e51f332008-05-03 18:29:28 +0200166{
Peter Zijlstradef0a9b2009-09-18 20:14:01 +0200167 struct sched_clock_data *my_scd = this_scd();
168 u64 this_clock, remote_clock;
169 u64 *ptr, old_val, val;
170
Thomas Gleixnera1cbcaa2013-04-06 10:10:27 +0200171#if BITS_PER_LONG != 64
172again:
173 /*
174 * Careful here: The local and the remote clock values need to
175 * be read out atomic as we need to compare the values and
176 * then update either the local or the remote side. So the
177 * cmpxchg64 below only protects one readout.
178 *
179 * We must reread via sched_clock_local() in the retry case on
180 * 32bit as an NMI could use sched_clock_local() via the
181 * tracer and hit between the readout of
182 * the low32bit and the high 32bit portion.
183 */
184 this_clock = sched_clock_local(my_scd);
185 /*
186 * We must enforce atomic readout on 32bit, otherwise the
187 * update on the remote cpu can hit inbetween the readout of
188 * the low32bit and the high 32bit portion.
189 */
190 remote_clock = cmpxchg64(&scd->clock, 0, 0);
191#else
192 /*
193 * On 64bit the read of [my]scd->clock is atomic versus the
194 * update, so we can avoid the above 32bit dance.
195 */
Peter Zijlstradef0a9b2009-09-18 20:14:01 +0200196 sched_clock_local(my_scd);
197again:
198 this_clock = my_scd->clock;
199 remote_clock = scd->clock;
Thomas Gleixnera1cbcaa2013-04-06 10:10:27 +0200200#endif
Peter Zijlstradef0a9b2009-09-18 20:14:01 +0200201
202 /*
203 * Use the opportunity that we have both locks
204 * taken to couple the two clocks: we take the
205 * larger time as the latest time for both
206 * runqueues. (this creates monotonic movement)
207 */
208 if (likely((s64)(remote_clock - this_clock) < 0)) {
209 ptr = &scd->clock;
210 old_val = remote_clock;
211 val = this_clock;
Peter Zijlstra3e51f332008-05-03 18:29:28 +0200212 } else {
Peter Zijlstradef0a9b2009-09-18 20:14:01 +0200213 /*
214 * Should be rare, but possible:
215 */
216 ptr = &my_scd->clock;
217 old_val = this_clock;
218 val = remote_clock;
Peter Zijlstra3e51f332008-05-03 18:29:28 +0200219 }
Peter Zijlstradef0a9b2009-09-18 20:14:01 +0200220
Eric Dumazet152f9d02009-09-30 20:36:19 +0200221 if (cmpxchg64(ptr, old_val, val) != old_val)
Peter Zijlstradef0a9b2009-09-18 20:14:01 +0200222 goto again;
223
224 return val;
Peter Zijlstra3e51f332008-05-03 18:29:28 +0200225}
226
Peter Zijlstrac6763292010-05-25 10:48:51 +0200227/*
228 * Similar to cpu_clock(), but requires local IRQs to be disabled.
229 *
230 * See cpu_clock().
231 */
Peter Zijlstra3e51f332008-05-03 18:29:28 +0200232u64 sched_clock_cpu(int cpu)
233{
Ingo Molnarb3425012009-02-26 20:20:29 +0100234 struct sched_clock_data *scd;
Peter Zijlstradef0a9b2009-09-18 20:14:01 +0200235 u64 clock;
236
Ingo Molnarb3425012009-02-26 20:20:29 +0100237 if (sched_clock_stable)
238 return sched_clock();
Peter Zijlstraa3817592008-05-29 10:07:15 +0200239
Peter Zijlstra3e51f332008-05-03 18:29:28 +0200240 if (unlikely(!sched_clock_running))
241 return 0ull;
242
Peter Zijlstraef08f0f2013-11-28 19:31:23 +0100243 preempt_disable();
Peter Zijlstradef0a9b2009-09-18 20:14:01 +0200244 scd = cpu_sdc(cpu);
Peter Zijlstra3e51f332008-05-03 18:29:28 +0200245
Peter Zijlstradef0a9b2009-09-18 20:14:01 +0200246 if (cpu != smp_processor_id())
247 clock = sched_clock_remote(scd);
248 else
249 clock = sched_clock_local(scd);
Peter Zijlstraef08f0f2013-11-28 19:31:23 +0100250 preempt_enable();
Ingo Molnare4e4e532008-04-14 08:50:02 +0200251
Peter Zijlstra3e51f332008-05-03 18:29:28 +0200252 return clock;
253}
254
255void sched_clock_tick(void)
256{
Peter Zijlstra8325d9c2009-02-26 21:40:16 +0100257 struct sched_clock_data *scd;
Peter Zijlstra3e51f332008-05-03 18:29:28 +0200258 u64 now, now_gtod;
259
Peter Zijlstra8325d9c2009-02-26 21:40:16 +0100260 if (sched_clock_stable)
261 return;
262
Peter Zijlstraa3817592008-05-29 10:07:15 +0200263 if (unlikely(!sched_clock_running))
264 return;
265
Peter Zijlstra3e51f332008-05-03 18:29:28 +0200266 WARN_ON_ONCE(!irqs_disabled());
267
Peter Zijlstra8325d9c2009-02-26 21:40:16 +0100268 scd = this_scd();
Peter Zijlstra3e51f332008-05-03 18:29:28 +0200269 now_gtod = ktime_to_ns(ktime_get());
Steven Rostedta83bc472008-07-09 00:15:32 -0400270 now = sched_clock();
Peter Zijlstra3e51f332008-05-03 18:29:28 +0200271
Peter Zijlstra3e51f332008-05-03 18:29:28 +0200272 scd->tick_raw = now;
273 scd->tick_gtod = now_gtod;
Peter Zijlstradef0a9b2009-09-18 20:14:01 +0200274 sched_clock_local(scd);
Peter Zijlstra3e51f332008-05-03 18:29:28 +0200275}
276
277/*
278 * We are going deep-idle (irqs are disabled):
279 */
280void sched_clock_idle_sleep_event(void)
281{
282 sched_clock_cpu(smp_processor_id());
283}
284EXPORT_SYMBOL_GPL(sched_clock_idle_sleep_event);
285
286/*
287 * We just idled delta nanoseconds (called with irqs disabled):
288 */
289void sched_clock_idle_wakeup_event(u64 delta_ns)
290{
Thomas Gleixner1c5745a2008-12-22 23:05:28 +0100291 if (timekeeping_suspended)
292 return;
293
Peter Zijlstra354879b2008-08-25 17:15:34 +0200294 sched_clock_tick();
Peter Zijlstra3e51f332008-05-03 18:29:28 +0200295 touch_softlockup_watchdog();
296}
297EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event);
298
Peter Zijlstrac6763292010-05-25 10:48:51 +0200299/*
300 * As outlined at the top, provides a fast, high resolution, nanosecond
301 * time source that is monotonic per cpu argument and has bounded drift
302 * between cpus.
303 *
304 * ######################### BIG FAT WARNING ##########################
305 * # when comparing cpu_clock(i) to cpu_clock(j) for i != j, time can #
306 * # go backwards !! #
307 * ####################################################################
308 */
309u64 cpu_clock(int cpu)
David Millerb9f8fcd2009-12-13 18:25:02 -0800310{
Peter Zijlstraef08f0f2013-11-28 19:31:23 +0100311 return sched_clock_cpu(cpu);
David Millerb9f8fcd2009-12-13 18:25:02 -0800312}
313
Peter Zijlstrac6763292010-05-25 10:48:51 +0200314/*
315 * Similar to cpu_clock() for the current cpu. Time will only be observed
316 * to be monotonic if care is taken to only compare timestampt taken on the
317 * same CPU.
318 *
319 * See cpu_clock().
320 */
321u64 local_clock(void)
322{
Peter Zijlstraef08f0f2013-11-28 19:31:23 +0100323 return sched_clock_cpu(raw_smp_processor_id());
Peter Zijlstrac6763292010-05-25 10:48:51 +0200324}
325
Peter Zijlstra8325d9c2009-02-26 21:40:16 +0100326#else /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */
327
328void sched_clock_init(void)
329{
330 sched_clock_running = 1;
331}
332
333u64 sched_clock_cpu(int cpu)
334{
335 if (unlikely(!sched_clock_running))
336 return 0;
337
338 return sched_clock();
339}
340
Peter Zijlstrac6763292010-05-25 10:48:51 +0200341u64 cpu_clock(int cpu)
Peter Zijlstra76a2a6e2008-06-27 13:41:15 +0200342{
David Millerb9f8fcd2009-12-13 18:25:02 -0800343 return sched_clock_cpu(cpu);
Peter Zijlstra76a2a6e2008-06-27 13:41:15 +0200344}
David Millerb9f8fcd2009-12-13 18:25:02 -0800345
Peter Zijlstrac6763292010-05-25 10:48:51 +0200346u64 local_clock(void)
347{
348 return sched_clock_cpu(0);
349}
350
David Millerb9f8fcd2009-12-13 18:25:02 -0800351#endif /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */
352
Ingo Molnar4c9fe8a2008-06-27 14:49:35 +0200353EXPORT_SYMBOL_GPL(cpu_clock);
Peter Zijlstrac6763292010-05-25 10:48:51 +0200354EXPORT_SYMBOL_GPL(local_clock);