blob: 872e481d5098c84c6e604ab567cf5d4334d192da [file] [log] [blame]
Thomas Gleixner457c8992019-05-19 13:08:55 +01001// SPDX-License-Identifier: GPL-2.0-only
Ingo Molnar325ea102018-03-03 12:20:47 +01002/*
3 * Simple CPU accounting cgroup controller
4 */
Frederic Weisbecker73fbec62012-06-16 15:57:37 +02005#include "sched.h"
Frederic Weisbecker73fbec62012-06-16 15:57:37 +02006
7#ifdef CONFIG_IRQ_TIME_ACCOUNTING
8
9/*
10 * There are no locks covering percpu hardirq/softirq time.
Frederic Weisbeckerbf9fae92012-09-08 15:23:11 +020011 * They are only modified in vtime_account, on corresponding CPU
Frederic Weisbecker73fbec62012-06-16 15:57:37 +020012 * with interrupts disabled. So, writes are safe.
13 * They are read and saved off onto struct rq in update_rq_clock().
14 * This may result in other CPU reading this CPU's irq time and can
Frederic Weisbeckerbf9fae92012-09-08 15:23:11 +020015 * race with irq/vtime_account on this CPU. We would either get old
Frederic Weisbecker73fbec62012-06-16 15:57:37 +020016 * or new value with a side effect of accounting a slice of irq time to wrong
17 * task when irq is in progress while we read rq->clock. That is a worthy
18 * compromise in place of having locks on each irq in account_system_time.
19 */
Frederic Weisbecker19d23dbf2016-09-26 02:29:20 +020020DEFINE_PER_CPU(struct irqtime, cpu_irqtime);
Frederic Weisbecker73fbec62012-06-16 15:57:37 +020021
Frederic Weisbecker73fbec62012-06-16 15:57:37 +020022static int sched_clock_irqtime;
23
24void enable_sched_clock_irqtime(void)
25{
26 sched_clock_irqtime = 1;
27}
28
29void disable_sched_clock_irqtime(void)
30{
31 sched_clock_irqtime = 0;
32}
33
Frederic Weisbecker25e2d8c2017-04-25 16:10:48 +020034static void irqtime_account_delta(struct irqtime *irqtime, u64 delta,
35 enum cpu_usage_stat idx)
36{
37 u64 *cpustat = kcpustat_this_cpu->cpustat;
38
39 u64_stats_update_begin(&irqtime->sync);
40 cpustat[idx] += delta;
41 irqtime->total += delta;
42 irqtime->tick_delta += delta;
43 u64_stats_update_end(&irqtime->sync);
44}
45
Frederic Weisbecker73fbec62012-06-16 15:57:37 +020046/*
Frederic Weisbeckerd3759e72020-12-02 12:57:31 +010047 * Called after incrementing preempt_count on {soft,}irq_enter
Frederic Weisbecker73fbec62012-06-16 15:57:37 +020048 * and before decrementing preempt_count on {soft,}irq_exit.
49 */
Frederic Weisbeckerd3759e72020-12-02 12:57:31 +010050void irqtime_account_irq(struct task_struct *curr, unsigned int offset)
Frederic Weisbecker73fbec62012-06-16 15:57:37 +020051{
Frederic Weisbecker19d23dbf2016-09-26 02:29:20 +020052 struct irqtime *irqtime = this_cpu_ptr(&cpu_irqtime);
Frederic Weisbeckerd3759e72020-12-02 12:57:31 +010053 unsigned int pc;
Frederic Weisbecker73fbec62012-06-16 15:57:37 +020054 s64 delta;
55 int cpu;
56
57 if (!sched_clock_irqtime)
58 return;
59
Frederic Weisbecker73fbec62012-06-16 15:57:37 +020060 cpu = smp_processor_id();
Frederic Weisbecker19d23dbf2016-09-26 02:29:20 +020061 delta = sched_clock_cpu(cpu) - irqtime->irq_start_time;
62 irqtime->irq_start_time += delta;
Thomas Gleixner6516b382021-03-09 09:55:54 +010063 pc = irq_count() - offset;
Frederic Weisbecker73fbec62012-06-16 15:57:37 +020064
Frederic Weisbecker73fbec62012-06-16 15:57:37 +020065 /*
66 * We do not account for softirq time from ksoftirqd here.
67 * We want to continue accounting softirq time to ksoftirqd thread
68 * in that case, so as not to confuse scheduler with a special task
69 * that do not consume any time, but still wants to run.
70 */
Frederic Weisbeckerd3759e72020-12-02 12:57:31 +010071 if (pc & HARDIRQ_MASK)
Frederic Weisbecker25e2d8c2017-04-25 16:10:48 +020072 irqtime_account_delta(irqtime, delta, CPUTIME_IRQ);
Frederic Weisbeckerd3759e72020-12-02 12:57:31 +010073 else if ((pc & SOFTIRQ_OFFSET) && curr != this_cpu_ksoftirqd())
Frederic Weisbecker25e2d8c2017-04-25 16:10:48 +020074 irqtime_account_delta(irqtime, delta, CPUTIME_SOFTIRQ);
Frederic Weisbecker73fbec62012-06-16 15:57:37 +020075}
Frederic Weisbecker73fbec62012-06-16 15:57:37 +020076
Frederic Weisbecker2b1f9672017-01-31 04:09:41 +010077static u64 irqtime_tick_accounted(u64 maxtime)
Frederic Weisbecker73fbec62012-06-16 15:57:37 +020078{
Frederic Weisbeckera499a5a2017-01-31 04:09:32 +010079 struct irqtime *irqtime = this_cpu_ptr(&cpu_irqtime);
Frederic Weisbecker2b1f9672017-01-31 04:09:41 +010080 u64 delta;
Frederic Weisbecker73fbec62012-06-16 15:57:37 +020081
Frederic Weisbecker2b1f9672017-01-31 04:09:41 +010082 delta = min(irqtime->tick_delta, maxtime);
83 irqtime->tick_delta -= delta;
Frederic Weisbecker2810f612016-09-26 02:29:18 +020084
Frederic Weisbeckera499a5a2017-01-31 04:09:32 +010085 return delta;
Frederic Weisbecker73fbec62012-06-16 15:57:37 +020086}
87
88#else /* CONFIG_IRQ_TIME_ACCOUNTING */
89
90#define sched_clock_irqtime (0)
91
Frederic Weisbecker2b1f9672017-01-31 04:09:41 +010092static u64 irqtime_tick_accounted(u64 dummy)
Rik van Riel57430212016-07-13 16:50:01 +020093{
94 return 0;
95}
96
Frederic Weisbecker73fbec62012-06-16 15:57:37 +020097#endif /* !CONFIG_IRQ_TIME_ACCOUNTING */
98
99static inline void task_group_account_field(struct task_struct *p, int index,
100 u64 tmp)
101{
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200102 /*
103 * Since all updates are sure to touch the root cgroup, we
104 * get ourselves ahead and touch it first. If the root cgroup
105 * is the only cgroup, then nothing else should be necessary.
106 *
107 */
Christoph Lametera4f61cc2013-08-07 15:38:24 +0000108 __this_cpu_add(kernel_cpustat.cpustat[index], tmp);
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200109
Tejun Heod2cc5ed2017-09-25 08:12:04 -0700110 cgroup_account_cputime_field(p, index, tmp);
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200111}
112
113/*
Ingo Molnar97fb7a02018-03-03 14:01:12 +0100114 * Account user CPU time to a process.
115 * @p: the process that the CPU time gets accounted to
116 * @cputime: the CPU time spent in user space since the last update
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200117 */
Frederic Weisbecker23244a52017-01-31 04:09:37 +0100118void account_user_time(struct task_struct *p, u64 cputime)
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200119{
120 int index;
121
122 /* Add user time to process. */
Frederic Weisbecker23244a52017-01-31 04:09:37 +0100123 p->utime += cputime;
124 account_group_user_time(p, cputime);
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200125
Dongsheng Yangd0ea0262014-01-27 22:00:45 -0500126 index = (task_nice(p) > 0) ? CPUTIME_NICE : CPUTIME_USER;
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200127
128 /* Add user time to cpustat. */
Frederic Weisbecker23244a52017-01-31 04:09:37 +0100129 task_group_account_field(p, index, cputime);
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200130
131 /* Account for user time used */
Frederic Weisbecker6fac4822012-11-13 14:20:55 +0100132 acct_account_cputime(p);
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200133}
134
135/*
Ingo Molnar97fb7a02018-03-03 14:01:12 +0100136 * Account guest CPU time to a process.
137 * @p: the process that the CPU time gets accounted to
138 * @cputime: the CPU time spent in virtual machine since the last update
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200139 */
Frederic Weisbeckerfb8b0492017-01-31 04:09:40 +0100140void account_guest_time(struct task_struct *p, u64 cputime)
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200141{
142 u64 *cpustat = kcpustat_this_cpu->cpustat;
143
144 /* Add guest time to process. */
Frederic Weisbeckerfb8b0492017-01-31 04:09:40 +0100145 p->utime += cputime;
146 account_group_user_time(p, cputime);
147 p->gtime += cputime;
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200148
149 /* Add guest time to cpustat. */
Dongsheng Yangd0ea0262014-01-27 22:00:45 -0500150 if (task_nice(p) > 0) {
Frederic Weisbeckerfb8b0492017-01-31 04:09:40 +0100151 cpustat[CPUTIME_NICE] += cputime;
152 cpustat[CPUTIME_GUEST_NICE] += cputime;
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200153 } else {
Frederic Weisbeckerfb8b0492017-01-31 04:09:40 +0100154 cpustat[CPUTIME_USER] += cputime;
155 cpustat[CPUTIME_GUEST] += cputime;
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200156 }
157}
158
159/*
Ingo Molnar97fb7a02018-03-03 14:01:12 +0100160 * Account system CPU time to a process and desired cpustat field
161 * @p: the process that the CPU time gets accounted to
162 * @cputime: the CPU time spent in kernel space since the last update
Stanislaw Gruszka40565b52016-11-15 03:06:51 +0100163 * @index: pointer to cpustat field that has to be updated
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200164 */
Frederic Weisbeckerc31cc6a2017-01-05 18:11:43 +0100165void account_system_index_time(struct task_struct *p,
Frederic Weisbeckerfb8b0492017-01-31 04:09:40 +0100166 u64 cputime, enum cpu_usage_stat index)
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200167{
168 /* Add system time to process. */
Frederic Weisbeckerfb8b0492017-01-31 04:09:40 +0100169 p->stime += cputime;
170 account_group_system_time(p, cputime);
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200171
172 /* Add system time to cpustat. */
Frederic Weisbeckerfb8b0492017-01-31 04:09:40 +0100173 task_group_account_field(p, index, cputime);
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200174
175 /* Account for system time used */
Frederic Weisbecker6fac4822012-11-13 14:20:55 +0100176 acct_account_cputime(p);
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200177}
178
179/*
Ingo Molnar97fb7a02018-03-03 14:01:12 +0100180 * Account system CPU time to a process.
181 * @p: the process that the CPU time gets accounted to
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200182 * @hardirq_offset: the offset to subtract from hardirq_count()
Ingo Molnar97fb7a02018-03-03 14:01:12 +0100183 * @cputime: the CPU time spent in kernel space since the last update
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200184 */
Frederic Weisbeckerfb8b0492017-01-31 04:09:40 +0100185void account_system_time(struct task_struct *p, int hardirq_offset, u64 cputime)
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200186{
187 int index;
188
189 if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) {
Stanislaw Gruszka40565b52016-11-15 03:06:51 +0100190 account_guest_time(p, cputime);
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200191 return;
192 }
193
194 if (hardirq_count() - hardirq_offset)
195 index = CPUTIME_IRQ;
196 else if (in_serving_softirq())
197 index = CPUTIME_SOFTIRQ;
198 else
199 index = CPUTIME_SYSTEM;
200
Frederic Weisbeckerc31cc6a2017-01-05 18:11:43 +0100201 account_system_index_time(p, cputime, index);
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200202}
203
204/*
205 * Account for involuntary wait time.
Ingo Molnar97fb7a02018-03-03 14:01:12 +0100206 * @cputime: the CPU time spent in involuntary wait
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200207 */
Frederic Weisbeckerbe9095e2017-01-31 04:09:38 +0100208void account_steal_time(u64 cputime)
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200209{
210 u64 *cpustat = kcpustat_this_cpu->cpustat;
211
Frederic Weisbeckerbe9095e2017-01-31 04:09:38 +0100212 cpustat[CPUTIME_STEAL] += cputime;
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200213}
214
215/*
216 * Account for idle time.
Ingo Molnar97fb7a02018-03-03 14:01:12 +0100217 * @cputime: the CPU time spent in idle wait
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200218 */
Frederic Weisbecker18b43a92017-01-31 04:09:39 +0100219void account_idle_time(u64 cputime)
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200220{
221 u64 *cpustat = kcpustat_this_cpu->cpustat;
222 struct rq *rq = this_rq();
223
224 if (atomic_read(&rq->nr_iowait) > 0)
Frederic Weisbecker18b43a92017-01-31 04:09:39 +0100225 cpustat[CPUTIME_IOWAIT] += cputime;
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200226 else
Frederic Weisbecker18b43a92017-01-31 04:09:39 +0100227 cpustat[CPUTIME_IDLE] += cputime;
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200228}
229
Wanpeng Li03cbc732016-08-17 10:05:46 +0800230/*
231 * When a guest is interrupted for a longer amount of time, missed clock
232 * ticks are not redelivered later. Due to that, this function may on
233 * occasion account more time than the calling functions think elapsed.
234 */
Frederic Weisbecker2b1f9672017-01-31 04:09:41 +0100235static __always_inline u64 steal_account_process_time(u64 maxtime)
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200236{
237#ifdef CONFIG_PARAVIRT
238 if (static_key_false(&paravirt_steal_enabled)) {
Frederic Weisbecker2b1f9672017-01-31 04:09:41 +0100239 u64 steal;
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200240
241 steal = paravirt_steal_clock(smp_processor_id());
242 steal -= this_rq()->prev_steal_time;
Frederic Weisbecker2b1f9672017-01-31 04:09:41 +0100243 steal = min(steal, maxtime);
244 account_steal_time(steal);
245 this_rq()->prev_steal_time += steal;
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200246
Frederic Weisbecker2b1f9672017-01-31 04:09:41 +0100247 return steal;
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200248 }
249#endif
Wanpeng Li807e5b82016-06-13 18:32:46 +0800250 return 0;
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200251}
252
Frederic Weisbeckera634f932012-11-21 15:55:59 +0100253/*
Rik van Riel57430212016-07-13 16:50:01 +0200254 * Account how much elapsed time was spent in steal, irq, or softirq time.
255 */
Frederic Weisbecker2b1f9672017-01-31 04:09:41 +0100256static inline u64 account_other_time(u64 max)
Rik van Riel57430212016-07-13 16:50:01 +0200257{
Frederic Weisbecker2b1f9672017-01-31 04:09:41 +0100258 u64 accounted;
Rik van Riel57430212016-07-13 16:50:01 +0200259
Frederic Weisbecker2c11dba2017-11-06 16:01:27 +0100260 lockdep_assert_irqs_disabled();
Frederic Weisbecker2810f612016-09-26 02:29:18 +0200261
Rik van Riel57430212016-07-13 16:50:01 +0200262 accounted = steal_account_process_time(max);
263
264 if (accounted < max)
Frederic Weisbeckera499a5a2017-01-31 04:09:32 +0100265 accounted += irqtime_tick_accounted(max - accounted);
Rik van Riel57430212016-07-13 16:50:01 +0200266
267 return accounted;
268}
269
Stanislaw Gruszkaa1eb1412016-08-17 11:30:44 +0200270#ifdef CONFIG_64BIT
271static inline u64 read_sum_exec_runtime(struct task_struct *t)
272{
273 return t->se.sum_exec_runtime;
274}
275#else
276static u64 read_sum_exec_runtime(struct task_struct *t)
277{
278 u64 ns;
279 struct rq_flags rf;
280 struct rq *rq;
281
282 rq = task_rq_lock(t, &rf);
283 ns = t->se.sum_exec_runtime;
284 task_rq_unlock(rq, t, &rf);
285
286 return ns;
287}
288#endif
289
Rik van Riel57430212016-07-13 16:50:01 +0200290/*
Frederic Weisbeckera634f932012-11-21 15:55:59 +0100291 * Accumulate raw cputime values of dead tasks (sig->[us]time) and live
292 * tasks (sum on group iteration) belonging to @tsk's group.
293 */
294void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
295{
296 struct signal_struct *sig = tsk->signal;
Frederic Weisbecker5613fda2017-01-31 04:09:23 +0100297 u64 utime, stime;
Frederic Weisbeckera634f932012-11-21 15:55:59 +0100298 struct task_struct *t;
Rik van Riele78c3492014-08-16 13:40:10 -0400299 unsigned int seq, nextseq;
Rik van Riel9c368b52014-09-12 09:12:15 -0400300 unsigned long flags;
Frederic Weisbeckera634f932012-11-21 15:55:59 +0100301
Stanislaw Gruszkaa1eb1412016-08-17 11:30:44 +0200302 /*
303 * Update current task runtime to account pending time since last
304 * scheduler action or thread_group_cputime() call. This thread group
305 * might have other running tasks on different CPUs, but updating
306 * their runtime can affect syscall performance, so we skip account
307 * those pending times and rely only on values updated on tick or
308 * other scheduler action.
309 */
310 if (same_thread_group(current, tsk))
311 (void) task_sched_runtime(current);
312
Frederic Weisbeckera634f932012-11-21 15:55:59 +0100313 rcu_read_lock();
Rik van Riele78c3492014-08-16 13:40:10 -0400314 /* Attempt a lockless read on the first round. */
315 nextseq = 0;
316 do {
317 seq = nextseq;
Rik van Riel9c368b52014-09-12 09:12:15 -0400318 flags = read_seqbegin_or_lock_irqsave(&sig->stats_lock, &seq);
Rik van Riele78c3492014-08-16 13:40:10 -0400319 times->utime = sig->utime;
320 times->stime = sig->stime;
321 times->sum_exec_runtime = sig->sum_sched_runtime;
322
323 for_each_thread(tsk, t) {
324 task_cputime(t, &utime, &stime);
325 times->utime += utime;
326 times->stime += stime;
Stanislaw Gruszkaa1eb1412016-08-17 11:30:44 +0200327 times->sum_exec_runtime += read_sum_exec_runtime(t);
Rik van Riele78c3492014-08-16 13:40:10 -0400328 }
329 /* If lockless access failed, take the lock. */
330 nextseq = 1;
331 } while (need_seqretry(&sig->stats_lock, seq));
Rik van Riel9c368b52014-09-12 09:12:15 -0400332 done_seqretry_irqrestore(&sig->stats_lock, seq, flags);
Frederic Weisbeckera634f932012-11-21 15:55:59 +0100333 rcu_read_unlock();
334}
335
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200336#ifdef CONFIG_IRQ_TIME_ACCOUNTING
337/*
338 * Account a tick to a process and cpustat
Ingo Molnar97fb7a02018-03-03 14:01:12 +0100339 * @p: the process that the CPU time gets accounted to
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200340 * @user_tick: is the tick from userspace
341 * @rq: the pointer to rq
342 *
343 * Tick demultiplexing follows the order
344 * - pending hardirq update
345 * - pending softirq update
346 * - user_time
347 * - idle_time
348 * - system time
349 * - check for guest_time
350 * - else account as system_time
351 *
352 * Check for hardirq is done both for system and user time as there is
353 * no timer going off while we are on hardirq and hence we may never get an
354 * opportunity to update it solely in system time.
355 * p->stime and friends are only updated on system time and not on irq
356 * softirq as those do not count in task exec_runtime any more.
357 */
358static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
Alex Shi9dec1b62020-01-02 18:07:52 +0800359 int ticks)
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200360{
Frederic Weisbecker2b1f9672017-01-31 04:09:41 +0100361 u64 other, cputime = TICK_NSEC * ticks;
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200362
Rik van Riel57430212016-07-13 16:50:01 +0200363 /*
364 * When returning from idle, many ticks can get accounted at
365 * once, including some ticks of steal, irq, and softirq time.
366 * Subtract those ticks from the amount of time accounted to
367 * idle, or potentially user or system time. Due to rounding,
368 * other time can exceed ticks occasionally.
369 */
Wanpeng Li03cbc732016-08-17 10:05:46 +0800370 other = account_other_time(ULONG_MAX);
Frederic Weisbecker2b1f9672017-01-31 04:09:41 +0100371 if (other >= cputime)
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200372 return;
Frederic Weisbecker23244a52017-01-31 04:09:37 +0100373
Frederic Weisbecker2b1f9672017-01-31 04:09:41 +0100374 cputime -= other;
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200375
Rik van Riel57430212016-07-13 16:50:01 +0200376 if (this_cpu_ksoftirqd() == p) {
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200377 /*
378 * ksoftirqd time do not get accounted in cpu_softirq_time.
379 * So, we have to handle it separately here.
380 * Also, p->stime needs to be updated for ksoftirqd.
381 */
Frederic Weisbeckerfb8b0492017-01-31 04:09:40 +0100382 account_system_index_time(p, cputime, CPUTIME_SOFTIRQ);
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200383 } else if (user_tick) {
Stanislaw Gruszka40565b52016-11-15 03:06:51 +0100384 account_user_time(p, cputime);
Alex Shi9dec1b62020-01-02 18:07:52 +0800385 } else if (p == this_rq()->idle) {
Frederic Weisbecker18b43a92017-01-31 04:09:39 +0100386 account_idle_time(cputime);
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200387 } else if (p->flags & PF_VCPU) { /* System time or guest time */
Frederic Weisbeckerfb8b0492017-01-31 04:09:40 +0100388 account_guest_time(p, cputime);
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200389 } else {
Frederic Weisbeckerfb8b0492017-01-31 04:09:40 +0100390 account_system_index_time(p, cputime, CPUTIME_SYSTEM);
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200391 }
392}
393
394static void irqtime_account_idle_ticks(int ticks)
395{
Alex Shi9dec1b62020-01-02 18:07:52 +0800396 irqtime_account_process_tick(current, 0, ticks);
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200397}
398#else /* CONFIG_IRQ_TIME_ACCOUNTING */
Ingo Molnar97fb7a02018-03-03 14:01:12 +0100399static inline void irqtime_account_idle_ticks(int ticks) { }
Frederic Weisbecker3f4724e2012-07-16 18:00:34 +0200400static inline void irqtime_account_process_tick(struct task_struct *p, int user_tick,
Alex Shi9dec1b62020-01-02 18:07:52 +0800401 int nr_ticks) { }
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200402#endif /* CONFIG_IRQ_TIME_ACCOUNTING */
403
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200404/*
405 * Use precise platform statistics if available:
406 */
Frederic Weisbecker8d495472019-10-03 18:17:45 +0200407#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
408
Ingo Molnar97fb7a02018-03-03 14:01:12 +0100409# ifndef __ARCH_HAS_VTIME_TASK_SWITCH
Frederic Weisbecker8d495472019-10-03 18:17:45 +0200410void vtime_task_switch(struct task_struct *prev)
Frederic Weisbeckere3942ba2012-11-14 00:24:25 +0100411{
412 if (is_idle_task(prev))
413 vtime_account_idle(prev);
414 else
Frederic Weisbeckerf83eeb12019-10-03 18:17:44 +0200415 vtime_account_kernel(prev);
Frederic Weisbeckere3942ba2012-11-14 00:24:25 +0100416
Frederic Weisbeckerc8d7dab2017-01-05 18:11:50 +0100417 vtime_flush(prev);
Frederic Weisbeckere3942ba2012-11-14 00:24:25 +0100418 arch_vtime_task_switch(prev);
419}
Ingo Molnar97fb7a02018-03-03 14:01:12 +0100420# endif
Frederic Weisbecker0cfdf9a2016-07-13 16:50:03 +0200421
Frederic Weisbeckerd3759e72020-12-02 12:57:31 +0100422void vtime_account_irq(struct task_struct *tsk, unsigned int offset)
Frederic Weisbeckera7e1a9e2012-09-08 16:14:02 +0200423{
Thomas Gleixner6516b382021-03-09 09:55:54 +0100424 unsigned int pc = irq_count() - offset;
Frederic Weisbeckerd3759e72020-12-02 12:57:31 +0100425
426 if (pc & HARDIRQ_OFFSET) {
Frederic Weisbecker8a6a5922020-12-02 12:57:30 +0100427 vtime_account_hardirq(tsk);
Frederic Weisbeckerd3759e72020-12-02 12:57:31 +0100428 } else if (pc & SOFTIRQ_OFFSET) {
Frederic Weisbecker8a6a5922020-12-02 12:57:30 +0100429 vtime_account_softirq(tsk);
430 } else if (!IS_ENABLED(CONFIG_HAVE_VIRT_CPU_ACCOUNTING_IDLE) &&
431 is_idle_task(tsk)) {
Frederic Weisbecker0cfdf9a2016-07-13 16:50:03 +0200432 vtime_account_idle(tsk);
Frederic Weisbecker8a6a5922020-12-02 12:57:30 +0100433 } else {
Frederic Weisbeckerf83eeb12019-10-03 18:17:44 +0200434 vtime_account_kernel(tsk);
Frederic Weisbecker8a6a5922020-12-02 12:57:30 +0100435 }
Frederic Weisbeckera7e1a9e2012-09-08 16:14:02 +0200436}
Frederic Weisbeckera7e1a9e2012-09-08 16:14:02 +0200437
Tejun Heo8157a7f2017-09-25 14:27:54 -0700438void cputime_adjust(struct task_cputime *curr, struct prev_cputime *prev,
439 u64 *ut, u64 *st)
440{
441 *ut = curr->utime;
442 *st = curr->stime;
443}
444
Frederic Weisbecker5613fda2017-01-31 04:09:23 +0100445void task_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st)
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200446{
Frederic Weisbecker9fbc42e2013-02-25 17:25:39 +0100447 *ut = p->utime;
448 *st = p->stime;
449}
Andrey Smetanin9eec50b2015-09-16 12:29:50 +0300450EXPORT_SYMBOL_GPL(task_cputime_adjusted);
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200451
Frederic Weisbecker5613fda2017-01-31 04:09:23 +0100452void thread_group_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st)
Frederic Weisbecker9fbc42e2013-02-25 17:25:39 +0100453{
454 struct task_cputime cputime;
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200455
Frederic Weisbecker9fbc42e2013-02-25 17:25:39 +0100456 thread_group_cputime(p, &cputime);
457
458 *ut = cputime.utime;
459 *st = cputime.stime;
460}
Ingo Molnar97fb7a02018-03-03 14:01:12 +0100461
462#else /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE: */
463
Frederic Weisbecker9fbc42e2013-02-25 17:25:39 +0100464/*
Ingo Molnar97fb7a02018-03-03 14:01:12 +0100465 * Account a single tick of CPU time.
466 * @p: the process that the CPU time gets accounted to
Frederic Weisbecker9fbc42e2013-02-25 17:25:39 +0100467 * @user_tick: indicates if the tick is a user or a system tick
468 */
469void account_process_tick(struct task_struct *p, int user_tick)
470{
Frederic Weisbecker2b1f9672017-01-31 04:09:41 +0100471 u64 cputime, steal;
Frederic Weisbecker9fbc42e2013-02-25 17:25:39 +0100472
Frederic Weisbeckere44fcb42019-10-16 04:56:54 +0200473 if (vtime_accounting_enabled_this_cpu())
Frederic Weisbecker9fbc42e2013-02-25 17:25:39 +0100474 return;
475
476 if (sched_clock_irqtime) {
Alex Shi9dec1b62020-01-02 18:07:52 +0800477 irqtime_account_process_tick(p, user_tick, 1);
Frederic Weisbecker9fbc42e2013-02-25 17:25:39 +0100478 return;
479 }
480
Frederic Weisbecker2b1f9672017-01-31 04:09:41 +0100481 cputime = TICK_NSEC;
Wanpeng Li03cbc732016-08-17 10:05:46 +0800482 steal = steal_account_process_time(ULONG_MAX);
Rik van Riel57430212016-07-13 16:50:01 +0200483
Frederic Weisbecker2b1f9672017-01-31 04:09:41 +0100484 if (steal >= cputime)
Frederic Weisbecker9fbc42e2013-02-25 17:25:39 +0100485 return;
486
Frederic Weisbecker2b1f9672017-01-31 04:09:41 +0100487 cputime -= steal;
Rik van Riel57430212016-07-13 16:50:01 +0200488
Frederic Weisbecker9fbc42e2013-02-25 17:25:39 +0100489 if (user_tick)
Stanislaw Gruszka40565b52016-11-15 03:06:51 +0100490 account_user_time(p, cputime);
Alex Shi9dec1b62020-01-02 18:07:52 +0800491 else if ((p != this_rq()->idle) || (irq_count() != HARDIRQ_OFFSET))
Frederic Weisbeckerfb8b0492017-01-31 04:09:40 +0100492 account_system_time(p, HARDIRQ_OFFSET, cputime);
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200493 else
Frederic Weisbecker18b43a92017-01-31 04:09:39 +0100494 account_idle_time(cputime);
Frederic Weisbecker9fbc42e2013-02-25 17:25:39 +0100495}
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200496
Frederic Weisbecker9fbc42e2013-02-25 17:25:39 +0100497/*
Frederic Weisbecker9fbc42e2013-02-25 17:25:39 +0100498 * Account multiple ticks of idle time.
499 * @ticks: number of stolen ticks
500 */
501void account_idle_ticks(unsigned long ticks)
502{
Frederic Weisbecker18b43a92017-01-31 04:09:39 +0100503 u64 cputime, steal;
Frederic Weisbecker26f2c752016-08-11 14:58:24 +0200504
Frederic Weisbecker9fbc42e2013-02-25 17:25:39 +0100505 if (sched_clock_irqtime) {
506 irqtime_account_idle_ticks(ticks);
507 return;
508 }
509
Frederic Weisbecker18b43a92017-01-31 04:09:39 +0100510 cputime = ticks * TICK_NSEC;
Frederic Weisbecker2b1f9672017-01-31 04:09:41 +0100511 steal = steal_account_process_time(ULONG_MAX);
Wanpeng Lif9bcf1e2016-08-11 13:36:35 +0800512
513 if (steal >= cputime)
514 return;
515
516 cputime -= steal;
517 account_idle_time(cputime);
Frederic Weisbecker9fbc42e2013-02-25 17:25:39 +0100518}
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200519
Frederic Weisbeckerd9a3c982013-02-20 18:54:55 +0100520/*
Peter Zijlstra9d7fb042015-06-30 11:30:54 +0200521 * Adjust tick based cputime random precision against scheduler runtime
522 * accounting.
Rik van Riel347abad2014-09-30 15:59:47 -0400523 *
Peter Zijlstra9d7fb042015-06-30 11:30:54 +0200524 * Tick based cputime accounting depend on random scheduling timeslices of a
525 * task to be interrupted or not by the timer. Depending on these
526 * circumstances, the number of these interrupts may be over or
527 * under-optimistic, matching the real user and system cputime with a variable
528 * precision.
529 *
530 * Fix this by scaling these tick based values against the total runtime
531 * accounted by the CFS scheduler.
532 *
533 * This code provides the following guarantees:
534 *
535 * stime + utime == rtime
536 * stime_i+1 >= stime_i, utime_i+1 >= utime_i
537 *
538 * Assuming that rtime_i+1 >= rtime_i.
Frederic Weisbeckerfa092052012-11-28 17:00:57 +0100539 */
Tejun Heocfb766d2017-09-25 08:12:04 -0700540void cputime_adjust(struct task_cputime *curr, struct prev_cputime *prev,
541 u64 *ut, u64 *st)
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200542{
Frederic Weisbecker5613fda2017-01-31 04:09:23 +0100543 u64 rtime, stime, utime;
Peter Zijlstra9d7fb042015-06-30 11:30:54 +0200544 unsigned long flags;
Frederic Weisbeckerfa092052012-11-28 17:00:57 +0100545
Peter Zijlstra9d7fb042015-06-30 11:30:54 +0200546 /* Serialize concurrent callers such that we can honour our guarantees */
547 raw_spin_lock_irqsave(&prev->lock, flags);
Frederic Weisbecker5613fda2017-01-31 04:09:23 +0100548 rtime = curr->sum_exec_runtime;
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200549
Stanislaw Gruszka772c8082013-04-30 11:35:05 +0200550 /*
Peter Zijlstra9d7fb042015-06-30 11:30:54 +0200551 * This is possible under two circumstances:
552 * - rtime isn't monotonic after all (a bug);
553 * - we got reordered by the lock.
554 *
555 * In both cases this acts as a filter such that the rest of the code
556 * can assume it is monotonic regardless of anything else.
Stanislaw Gruszka772c8082013-04-30 11:35:05 +0200557 */
558 if (prev->stime + prev->utime >= rtime)
559 goto out;
560
Stanislaw Gruszka5a8e01f2013-09-04 15:16:03 +0200561 stime = curr->stime;
562 utime = curr->utime;
563
Peter Zijlstra173be9a2016-08-15 18:38:42 +0200564 /*
Ingo Molnar3b9c08a2017-07-04 11:53:40 +0200565 * If either stime or utime are 0, assume all runtime is userspace.
Ingo Molnar3b037062021-03-18 13:38:50 +0100566 * Once a task gets some ticks, the monotonicity code at 'update:'
Ingo Molnar3b9c08a2017-07-04 11:53:40 +0200567 * will ensure things converge to the observed ratio.
Peter Zijlstra173be9a2016-08-15 18:38:42 +0200568 */
Ingo Molnar3b9c08a2017-07-04 11:53:40 +0200569 if (stime == 0) {
570 utime = rtime;
571 goto update;
Frederic Weisbeckerd9a3c982013-02-20 18:54:55 +0100572 }
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200573
Ingo Molnar3b9c08a2017-07-04 11:53:40 +0200574 if (utime == 0) {
575 stime = rtime;
576 goto update;
577 }
578
Oleg Nesterov3dc167b2020-05-19 19:25:06 +0200579 stime = mul_u64_u64_div_u64(stime, rtime, stime + utime);
Ingo Molnar3b9c08a2017-07-04 11:53:40 +0200580
581update:
Peter Zijlstra9d7fb042015-06-30 11:30:54 +0200582 /*
583 * Make sure stime doesn't go backwards; this preserves monotonicity
584 * for utime because rtime is monotonic.
585 *
586 * utime_i+1 = rtime_i+1 - stime_i
587 * = rtime_i+1 - (rtime_i - utime_i)
588 * = (rtime_i+1 - rtime_i) + utime_i
589 * >= utime_i
590 */
591 if (stime < prev->stime)
592 stime = prev->stime;
593 utime = rtime - stime;
594
595 /*
596 * Make sure utime doesn't go backwards; this still preserves
597 * monotonicity for stime, analogous argument to above.
598 */
599 if (utime < prev->utime) {
600 utime = prev->utime;
601 stime = rtime - utime;
602 }
603
Peter Zijlstra9d7fb042015-06-30 11:30:54 +0200604 prev->stime = stime;
605 prev->utime = utime;
Stanislaw Gruszka772c8082013-04-30 11:35:05 +0200606out:
Frederic Weisbeckerd37f761d2012-11-22 00:58:35 +0100607 *ut = prev->utime;
608 *st = prev->stime;
Peter Zijlstra9d7fb042015-06-30 11:30:54 +0200609 raw_spin_unlock_irqrestore(&prev->lock, flags);
Frederic Weisbeckerd37f761d2012-11-22 00:58:35 +0100610}
611
Frederic Weisbecker5613fda2017-01-31 04:09:23 +0100612void task_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st)
Frederic Weisbeckerd37f761d2012-11-22 00:58:35 +0100613{
614 struct task_cputime cputime = {
Frederic Weisbeckerd37f761d2012-11-22 00:58:35 +0100615 .sum_exec_runtime = p->se.sum_exec_runtime,
616 };
617
Frederic Weisbecker6fac4822012-11-13 14:20:55 +0100618 task_cputime(p, &cputime.utime, &cputime.stime);
Frederic Weisbeckerd37f761d2012-11-22 00:58:35 +0100619 cputime_adjust(&cputime, &p->prev_cputime, ut, st);
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200620}
Andrey Smetanin9eec50b2015-09-16 12:29:50 +0300621EXPORT_SYMBOL_GPL(task_cputime_adjusted);
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200622
Frederic Weisbecker5613fda2017-01-31 04:09:23 +0100623void thread_group_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st)
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200624{
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200625 struct task_cputime cputime;
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200626
627 thread_group_cputime(p, &cputime);
Frederic Weisbeckerd37f761d2012-11-22 00:58:35 +0100628 cputime_adjust(&cputime, &p->signal->prev_cputime, ut, st);
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200629}
Frederic Weisbecker9fbc42e2013-02-25 17:25:39 +0100630#endif /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
Frederic Weisbeckerabf917c2012-07-25 07:56:04 +0200631
632#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
Frederic Weisbeckerbac5b6b2017-06-29 19:15:10 +0200633static u64 vtime_delta(struct vtime *vtime)
Frederic Weisbeckerabf917c2012-07-25 07:56:04 +0200634{
Wanpeng Li2a42eb92017-06-29 19:15:11 +0200635 unsigned long long clock;
Frederic Weisbeckerabf917c2012-07-25 07:56:04 +0200636
Wanpeng Li0e4097c2017-07-09 00:40:28 -0700637 clock = sched_clock();
Wanpeng Li2a42eb92017-06-29 19:15:11 +0200638 if (clock < vtime->starttime)
Frederic Weisbecker6a616712012-12-16 20:00:34 +0100639 return 0;
640
Wanpeng Li2a42eb92017-06-29 19:15:11 +0200641 return clock - vtime->starttime;
Frederic Weisbecker6a616712012-12-16 20:00:34 +0100642}
643
Frederic Weisbeckerbac5b6b2017-06-29 19:15:10 +0200644static u64 get_vtime_delta(struct vtime *vtime)
Frederic Weisbecker6a616712012-12-16 20:00:34 +0100645{
Wanpeng Li2a42eb92017-06-29 19:15:11 +0200646 u64 delta = vtime_delta(vtime);
647 u64 other;
Frederic Weisbecker6a616712012-12-16 20:00:34 +0100648
Wanpeng Li03cbc732016-08-17 10:05:46 +0800649 /*
650 * Unlike tick based timing, vtime based timing never has lost
651 * ticks, and no need for steal time accounting to make up for
652 * lost ticks. Vtime accounts a rounded version of actual
653 * elapsed time. Limit account_other_time to prevent rounding
654 * errors from causing elapsed vtime to go negative.
655 */
Rik van Rielb58c3582016-07-13 16:50:02 +0200656 other = account_other_time(delta);
Frederic Weisbeckerbac5b6b2017-06-29 19:15:10 +0200657 WARN_ON_ONCE(vtime->state == VTIME_INACTIVE);
Wanpeng Li2a42eb92017-06-29 19:15:11 +0200658 vtime->starttime += delta;
Frederic Weisbeckerabf917c2012-07-25 07:56:04 +0200659
Rik van Rielb58c3582016-07-13 16:50:02 +0200660 return delta - other;
Frederic Weisbeckerabf917c2012-07-25 07:56:04 +0200661}
662
Frederic Weisbeckerf83eeb12019-10-03 18:17:44 +0200663static void vtime_account_system(struct task_struct *tsk,
664 struct vtime *vtime)
Frederic Weisbecker6a616712012-12-16 20:00:34 +0100665{
Wanpeng Li2a42eb92017-06-29 19:15:11 +0200666 vtime->stime += get_vtime_delta(vtime);
667 if (vtime->stime >= TICK_NSEC) {
668 account_system_time(tsk, irq_count(), vtime->stime);
669 vtime->stime = 0;
670 }
671}
672
673static void vtime_account_guest(struct task_struct *tsk,
674 struct vtime *vtime)
675{
676 vtime->gtime += get_vtime_delta(vtime);
677 if (vtime->gtime >= TICK_NSEC) {
678 account_guest_time(tsk, vtime->gtime);
679 vtime->gtime = 0;
680 }
Frederic Weisbecker6a616712012-12-16 20:00:34 +0100681}
682
Frederic Weisbecker8d495472019-10-03 18:17:45 +0200683static void __vtime_account_kernel(struct task_struct *tsk,
684 struct vtime *vtime)
685{
686 /* We might have scheduled out from guest path */
Frederic Weisbeckere6d5bf32019-10-16 04:56:49 +0200687 if (vtime->state == VTIME_GUEST)
Frederic Weisbecker8d495472019-10-03 18:17:45 +0200688 vtime_account_guest(tsk, vtime);
689 else
690 vtime_account_system(tsk, vtime);
691}
692
Frederic Weisbeckerf83eeb12019-10-03 18:17:44 +0200693void vtime_account_kernel(struct task_struct *tsk)
Frederic Weisbeckerabf917c2012-07-25 07:56:04 +0200694{
Frederic Weisbeckerbac5b6b2017-06-29 19:15:10 +0200695 struct vtime *vtime = &tsk->vtime;
696
697 if (!vtime_delta(vtime))
Rik van Rielff9a9b42016-02-10 20:08:27 -0500698 return;
699
Frederic Weisbeckerbac5b6b2017-06-29 19:15:10 +0200700 write_seqcount_begin(&vtime->seqcount);
Frederic Weisbecker8d495472019-10-03 18:17:45 +0200701 __vtime_account_kernel(tsk, vtime);
Frederic Weisbeckerbac5b6b2017-06-29 19:15:10 +0200702 write_seqcount_end(&vtime->seqcount);
Frederic Weisbecker6a616712012-12-16 20:00:34 +0100703}
704
Frederic Weisbecker6a616712012-12-16 20:00:34 +0100705void vtime_user_enter(struct task_struct *tsk)
706{
Frederic Weisbeckerbac5b6b2017-06-29 19:15:10 +0200707 struct vtime *vtime = &tsk->vtime;
708
709 write_seqcount_begin(&vtime->seqcount);
Frederic Weisbeckerf83eeb12019-10-03 18:17:44 +0200710 vtime_account_system(tsk, vtime);
Frederic Weisbeckerbac5b6b2017-06-29 19:15:10 +0200711 vtime->state = VTIME_USER;
712 write_seqcount_end(&vtime->seqcount);
Frederic Weisbecker6a616712012-12-16 20:00:34 +0100713}
714
Frederic Weisbecker1c3eda02017-06-29 19:15:07 +0200715void vtime_user_exit(struct task_struct *tsk)
716{
Frederic Weisbeckerbac5b6b2017-06-29 19:15:10 +0200717 struct vtime *vtime = &tsk->vtime;
718
719 write_seqcount_begin(&vtime->seqcount);
Wanpeng Li2a42eb92017-06-29 19:15:11 +0200720 vtime->utime += get_vtime_delta(vtime);
721 if (vtime->utime >= TICK_NSEC) {
722 account_user_time(tsk, vtime->utime);
723 vtime->utime = 0;
724 }
Frederic Weisbeckerbac5b6b2017-06-29 19:15:10 +0200725 vtime->state = VTIME_SYS;
726 write_seqcount_end(&vtime->seqcount);
Frederic Weisbecker1c3eda02017-06-29 19:15:07 +0200727}
728
Frederic Weisbecker6a616712012-12-16 20:00:34 +0100729void vtime_guest_enter(struct task_struct *tsk)
730{
Frederic Weisbeckerbac5b6b2017-06-29 19:15:10 +0200731 struct vtime *vtime = &tsk->vtime;
Frederic Weisbecker5b206d42013-07-12 19:05:14 +0200732 /*
733 * The flags must be updated under the lock with
Frederic Weisbecker60a9ce52017-06-29 19:15:09 +0200734 * the vtime_starttime flush and update.
Frederic Weisbecker5b206d42013-07-12 19:05:14 +0200735 * That enforces a right ordering and update sequence
736 * synchronization against the reader (task_gtime())
737 * that can thus safely catch up with a tickless delta.
738 */
Frederic Weisbeckerbac5b6b2017-06-29 19:15:10 +0200739 write_seqcount_begin(&vtime->seqcount);
Frederic Weisbeckerf83eeb12019-10-03 18:17:44 +0200740 vtime_account_system(tsk, vtime);
Frederic Weisbecker68e7a4d2019-09-25 23:42:42 +0200741 tsk->flags |= PF_VCPU;
Frederic Weisbeckere6d5bf32019-10-16 04:56:49 +0200742 vtime->state = VTIME_GUEST;
Frederic Weisbeckerbac5b6b2017-06-29 19:15:10 +0200743 write_seqcount_end(&vtime->seqcount);
Frederic Weisbecker6a616712012-12-16 20:00:34 +0100744}
Frederic Weisbecker48d6a812013-07-10 02:44:35 +0200745EXPORT_SYMBOL_GPL(vtime_guest_enter);
Frederic Weisbecker6a616712012-12-16 20:00:34 +0100746
747void vtime_guest_exit(struct task_struct *tsk)
748{
Frederic Weisbeckerbac5b6b2017-06-29 19:15:10 +0200749 struct vtime *vtime = &tsk->vtime;
750
751 write_seqcount_begin(&vtime->seqcount);
Wanpeng Li2a42eb92017-06-29 19:15:11 +0200752 vtime_account_guest(tsk, vtime);
Frederic Weisbecker68e7a4d2019-09-25 23:42:42 +0200753 tsk->flags &= ~PF_VCPU;
Frederic Weisbeckere6d5bf32019-10-16 04:56:49 +0200754 vtime->state = VTIME_SYS;
Frederic Weisbeckerbac5b6b2017-06-29 19:15:10 +0200755 write_seqcount_end(&vtime->seqcount);
Frederic Weisbeckerabf917c2012-07-25 07:56:04 +0200756}
Frederic Weisbecker48d6a812013-07-10 02:44:35 +0200757EXPORT_SYMBOL_GPL(vtime_guest_exit);
Frederic Weisbeckerabf917c2012-07-25 07:56:04 +0200758
759void vtime_account_idle(struct task_struct *tsk)
760{
Frederic Weisbeckerbac5b6b2017-06-29 19:15:10 +0200761 account_idle_time(get_vtime_delta(&tsk->vtime));
Frederic Weisbeckerabf917c2012-07-25 07:56:04 +0200762}
Frederic Weisbecker3f4724e2012-07-16 18:00:34 +0200763
Frederic Weisbecker8d495472019-10-03 18:17:45 +0200764void vtime_task_switch_generic(struct task_struct *prev)
Frederic Weisbecker6a616712012-12-16 20:00:34 +0100765{
Frederic Weisbeckerbac5b6b2017-06-29 19:15:10 +0200766 struct vtime *vtime = &prev->vtime;
Frederic Weisbecker6a616712012-12-16 20:00:34 +0100767
Frederic Weisbeckerbac5b6b2017-06-29 19:15:10 +0200768 write_seqcount_begin(&vtime->seqcount);
Frederic Weisbecker14faf6f2019-10-16 04:56:48 +0200769 if (vtime->state == VTIME_IDLE)
Frederic Weisbecker8d495472019-10-03 18:17:45 +0200770 vtime_account_idle(prev);
771 else
772 __vtime_account_kernel(prev, vtime);
Frederic Weisbeckerbac5b6b2017-06-29 19:15:10 +0200773 vtime->state = VTIME_INACTIVE;
Frederic Weisbecker802f4a82019-10-16 04:56:47 +0200774 vtime->cpu = -1;
Frederic Weisbeckerbac5b6b2017-06-29 19:15:10 +0200775 write_seqcount_end(&vtime->seqcount);
776
777 vtime = &current->vtime;
778
779 write_seqcount_begin(&vtime->seqcount);
Frederic Weisbecker14faf6f2019-10-16 04:56:48 +0200780 if (is_idle_task(current))
781 vtime->state = VTIME_IDLE;
Frederic Weisbeckere6d5bf32019-10-16 04:56:49 +0200782 else if (current->flags & PF_VCPU)
783 vtime->state = VTIME_GUEST;
Frederic Weisbecker14faf6f2019-10-16 04:56:48 +0200784 else
785 vtime->state = VTIME_SYS;
Wanpeng Li0e4097c2017-07-09 00:40:28 -0700786 vtime->starttime = sched_clock();
Frederic Weisbecker802f4a82019-10-16 04:56:47 +0200787 vtime->cpu = smp_processor_id();
Frederic Weisbeckerbac5b6b2017-06-29 19:15:10 +0200788 write_seqcount_end(&vtime->seqcount);
Frederic Weisbecker6a616712012-12-16 20:00:34 +0100789}
790
Frederic Weisbecker45eacc62013-05-15 22:16:32 +0200791void vtime_init_idle(struct task_struct *t, int cpu)
Frederic Weisbecker6a616712012-12-16 20:00:34 +0100792{
Frederic Weisbeckerbac5b6b2017-06-29 19:15:10 +0200793 struct vtime *vtime = &t->vtime;
Frederic Weisbecker6a616712012-12-16 20:00:34 +0100794 unsigned long flags;
795
Frederic Weisbeckerb7ce2272015-11-19 16:47:34 +0100796 local_irq_save(flags);
Frederic Weisbeckerbac5b6b2017-06-29 19:15:10 +0200797 write_seqcount_begin(&vtime->seqcount);
Frederic Weisbecker14faf6f2019-10-16 04:56:48 +0200798 vtime->state = VTIME_IDLE;
Wanpeng Li0e4097c2017-07-09 00:40:28 -0700799 vtime->starttime = sched_clock();
Frederic Weisbecker802f4a82019-10-16 04:56:47 +0200800 vtime->cpu = cpu;
Frederic Weisbeckerbac5b6b2017-06-29 19:15:10 +0200801 write_seqcount_end(&vtime->seqcount);
Frederic Weisbeckerb7ce2272015-11-19 16:47:34 +0100802 local_irq_restore(flags);
Frederic Weisbecker6a616712012-12-16 20:00:34 +0100803}
804
Frederic Weisbecker16a6d9b2017-01-31 04:09:21 +0100805u64 task_gtime(struct task_struct *t)
Frederic Weisbecker6a616712012-12-16 20:00:34 +0100806{
Frederic Weisbeckerbac5b6b2017-06-29 19:15:10 +0200807 struct vtime *vtime = &t->vtime;
Frederic Weisbecker6a616712012-12-16 20:00:34 +0100808 unsigned int seq;
Frederic Weisbecker16a6d9b2017-01-31 04:09:21 +0100809 u64 gtime;
Frederic Weisbecker6a616712012-12-16 20:00:34 +0100810
Frederic Weisbeckere5925392015-11-19 16:47:33 +0100811 if (!vtime_accounting_enabled())
Hiroshi Shimamoto25411172015-11-19 16:47:28 +0100812 return t->gtime;
813
Frederic Weisbecker6a616712012-12-16 20:00:34 +0100814 do {
Frederic Weisbeckerbac5b6b2017-06-29 19:15:10 +0200815 seq = read_seqcount_begin(&vtime->seqcount);
Frederic Weisbecker6a616712012-12-16 20:00:34 +0100816
817 gtime = t->gtime;
Frederic Weisbeckere6d5bf32019-10-16 04:56:49 +0200818 if (vtime->state == VTIME_GUEST)
Wanpeng Li2a42eb92017-06-29 19:15:11 +0200819 gtime += vtime->gtime + vtime_delta(vtime);
Frederic Weisbecker6a616712012-12-16 20:00:34 +0100820
Frederic Weisbeckerbac5b6b2017-06-29 19:15:10 +0200821 } while (read_seqcount_retry(&vtime->seqcount, seq));
Frederic Weisbecker6a616712012-12-16 20:00:34 +0100822
823 return gtime;
824}
825
826/*
827 * Fetch cputime raw values from fields of task_struct and
828 * add up the pending nohz execution time since the last
829 * cputime snapshot.
830 */
Frederic Weisbecker5613fda2017-01-31 04:09:23 +0100831void task_cputime(struct task_struct *t, u64 *utime, u64 *stime)
Frederic Weisbecker6a616712012-12-16 20:00:34 +0100832{
Frederic Weisbeckerbac5b6b2017-06-29 19:15:10 +0200833 struct vtime *vtime = &t->vtime;
Frederic Weisbecker6a616712012-12-16 20:00:34 +0100834 unsigned int seq;
Frederic Weisbeckerbac5b6b2017-06-29 19:15:10 +0200835 u64 delta;
Stanislaw Gruszka353c50e2016-11-15 03:06:52 +0100836
837 if (!vtime_accounting_enabled()) {
838 *utime = t->utime;
839 *stime = t->stime;
840 return;
841 }
Frederic Weisbecker6a616712012-12-16 20:00:34 +0100842
843 do {
Frederic Weisbeckerbac5b6b2017-06-29 19:15:10 +0200844 seq = read_seqcount_begin(&vtime->seqcount);
Frederic Weisbecker6a616712012-12-16 20:00:34 +0100845
Stanislaw Gruszka353c50e2016-11-15 03:06:52 +0100846 *utime = t->utime;
847 *stime = t->stime;
Frederic Weisbecker6a616712012-12-16 20:00:34 +0100848
Frederic Weisbecker14faf6f2019-10-16 04:56:48 +0200849 /* Task is sleeping or idle, nothing to add */
850 if (vtime->state < VTIME_SYS)
Frederic Weisbecker6a616712012-12-16 20:00:34 +0100851 continue;
852
Frederic Weisbeckerbac5b6b2017-06-29 19:15:10 +0200853 delta = vtime_delta(vtime);
Frederic Weisbecker6a616712012-12-16 20:00:34 +0100854
855 /*
Frederic Weisbeckere6d5bf32019-10-16 04:56:49 +0200856 * Task runs either in user (including guest) or kernel space,
857 * add pending nohz time to the right place.
Frederic Weisbecker6a616712012-12-16 20:00:34 +0100858 */
Frederic Weisbeckere6d5bf32019-10-16 04:56:49 +0200859 if (vtime->state == VTIME_SYS)
Wanpeng Li2a42eb92017-06-29 19:15:11 +0200860 *stime += vtime->stime + delta;
Frederic Weisbeckere6d5bf32019-10-16 04:56:49 +0200861 else
862 *utime += vtime->utime + delta;
Frederic Weisbeckerbac5b6b2017-06-29 19:15:10 +0200863 } while (read_seqcount_retry(&vtime->seqcount, seq));
Frederic Weisbecker6a616712012-12-16 20:00:34 +0100864}
Frederic Weisbecker64eea632019-10-25 04:03:03 +0200865
Chris Wilsonf1dfdab2020-01-23 19:08:49 +0100866static int vtime_state_fetch(struct vtime *vtime, int cpu)
Frederic Weisbecker74722bb2019-11-21 03:44:26 +0100867{
Chris Wilsonf1dfdab2020-01-23 19:08:49 +0100868 int state = READ_ONCE(vtime->state);
869
Frederic Weisbecker74722bb2019-11-21 03:44:26 +0100870 /*
871 * We raced against a context switch, fetch the
872 * kcpustat task again.
873 */
874 if (vtime->cpu != cpu && vtime->cpu != -1)
875 return -EAGAIN;
876
877 /*
878 * Two possible things here:
879 * 1) We are seeing the scheduling out task (prev) or any past one.
880 * 2) We are seeing the scheduling in task (next) but it hasn't
881 * passed though vtime_task_switch() yet so the pending
882 * cputime of the prev task may not be flushed yet.
883 *
884 * Case 1) is ok but 2) is not. So wait for a safe VTIME state.
885 */
Chris Wilsonf1dfdab2020-01-23 19:08:49 +0100886 if (state == VTIME_INACTIVE)
Frederic Weisbecker74722bb2019-11-21 03:44:26 +0100887 return -EAGAIN;
888
Chris Wilsonf1dfdab2020-01-23 19:08:49 +0100889 return state;
Frederic Weisbecker74722bb2019-11-21 03:44:26 +0100890}
891
Frederic Weisbecker5a1c9552019-11-21 03:44:25 +0100892static u64 kcpustat_user_vtime(struct vtime *vtime)
893{
894 if (vtime->state == VTIME_USER)
895 return vtime->utime + vtime_delta(vtime);
896 else if (vtime->state == VTIME_GUEST)
897 return vtime->gtime + vtime_delta(vtime);
898 return 0;
899}
900
Frederic Weisbecker64eea632019-10-25 04:03:03 +0200901static int kcpustat_field_vtime(u64 *cpustat,
Frederic Weisbecker5a1c9552019-11-21 03:44:25 +0100902 struct task_struct *tsk,
Frederic Weisbecker64eea632019-10-25 04:03:03 +0200903 enum cpu_usage_stat usage,
904 int cpu, u64 *val)
905{
Frederic Weisbecker5a1c9552019-11-21 03:44:25 +0100906 struct vtime *vtime = &tsk->vtime;
Frederic Weisbecker64eea632019-10-25 04:03:03 +0200907 unsigned int seq;
Frederic Weisbecker64eea632019-10-25 04:03:03 +0200908
909 do {
Chris Wilsonf1dfdab2020-01-23 19:08:49 +0100910 int state;
911
Frederic Weisbecker64eea632019-10-25 04:03:03 +0200912 seq = read_seqcount_begin(&vtime->seqcount);
913
Chris Wilsonf1dfdab2020-01-23 19:08:49 +0100914 state = vtime_state_fetch(vtime, cpu);
915 if (state < 0)
916 return state;
Frederic Weisbecker64eea632019-10-25 04:03:03 +0200917
918 *val = cpustat[usage];
919
Frederic Weisbecker5a1c9552019-11-21 03:44:25 +0100920 /*
921 * Nice VS unnice cputime accounting may be inaccurate if
922 * the nice value has changed since the last vtime update.
923 * But proper fix would involve interrupting target on nice
924 * updates which is a no go on nohz_full (although the scheduler
925 * may still interrupt the target if rescheduling is needed...)
926 */
927 switch (usage) {
928 case CPUTIME_SYSTEM:
Chris Wilsonf1dfdab2020-01-23 19:08:49 +0100929 if (state == VTIME_SYS)
Frederic Weisbecker5a1c9552019-11-21 03:44:25 +0100930 *val += vtime->stime + vtime_delta(vtime);
931 break;
932 case CPUTIME_USER:
933 if (task_nice(tsk) <= 0)
934 *val += kcpustat_user_vtime(vtime);
935 break;
936 case CPUTIME_NICE:
937 if (task_nice(tsk) > 0)
938 *val += kcpustat_user_vtime(vtime);
939 break;
940 case CPUTIME_GUEST:
Chris Wilsonf1dfdab2020-01-23 19:08:49 +0100941 if (state == VTIME_GUEST && task_nice(tsk) <= 0)
Frederic Weisbecker5a1c9552019-11-21 03:44:25 +0100942 *val += vtime->gtime + vtime_delta(vtime);
943 break;
944 case CPUTIME_GUEST_NICE:
Chris Wilsonf1dfdab2020-01-23 19:08:49 +0100945 if (state == VTIME_GUEST && task_nice(tsk) > 0)
Frederic Weisbecker5a1c9552019-11-21 03:44:25 +0100946 *val += vtime->gtime + vtime_delta(vtime);
947 break;
948 default:
949 break;
950 }
Frederic Weisbecker64eea632019-10-25 04:03:03 +0200951 } while (read_seqcount_retry(&vtime->seqcount, seq));
952
953 return 0;
954}
955
956u64 kcpustat_field(struct kernel_cpustat *kcpustat,
957 enum cpu_usage_stat usage, int cpu)
958{
959 u64 *cpustat = kcpustat->cpustat;
Borislav Petkove0d648f2020-03-27 22:43:34 +0100960 u64 val = cpustat[usage];
Frederic Weisbecker64eea632019-10-25 04:03:03 +0200961 struct rq *rq;
Frederic Weisbecker64eea632019-10-25 04:03:03 +0200962 int err;
963
964 if (!vtime_accounting_enabled_cpu(cpu))
Borislav Petkove0d648f2020-03-27 22:43:34 +0100965 return val;
Frederic Weisbecker64eea632019-10-25 04:03:03 +0200966
Frederic Weisbecker64eea632019-10-25 04:03:03 +0200967 rq = cpu_rq(cpu);
968
969 for (;;) {
970 struct task_struct *curr;
Frederic Weisbecker64eea632019-10-25 04:03:03 +0200971
972 rcu_read_lock();
973 curr = rcu_dereference(rq->curr);
974 if (WARN_ON_ONCE(!curr)) {
975 rcu_read_unlock();
976 return cpustat[usage];
977 }
978
Frederic Weisbecker5a1c9552019-11-21 03:44:25 +0100979 err = kcpustat_field_vtime(cpustat, curr, usage, cpu, &val);
Frederic Weisbecker64eea632019-10-25 04:03:03 +0200980 rcu_read_unlock();
981
982 if (!err)
983 return val;
984
985 cpu_relax();
986 }
987}
988EXPORT_SYMBOL_GPL(kcpustat_field);
Frederic Weisbecker74722bb2019-11-21 03:44:26 +0100989
990static int kcpustat_cpu_fetch_vtime(struct kernel_cpustat *dst,
991 const struct kernel_cpustat *src,
992 struct task_struct *tsk, int cpu)
993{
994 struct vtime *vtime = &tsk->vtime;
995 unsigned int seq;
Frederic Weisbecker74722bb2019-11-21 03:44:26 +0100996
997 do {
998 u64 *cpustat;
999 u64 delta;
Chris Wilsonf1dfdab2020-01-23 19:08:49 +01001000 int state;
Frederic Weisbecker74722bb2019-11-21 03:44:26 +01001001
1002 seq = read_seqcount_begin(&vtime->seqcount);
1003
Chris Wilsonf1dfdab2020-01-23 19:08:49 +01001004 state = vtime_state_fetch(vtime, cpu);
1005 if (state < 0)
1006 return state;
Frederic Weisbecker74722bb2019-11-21 03:44:26 +01001007
1008 *dst = *src;
1009 cpustat = dst->cpustat;
1010
1011 /* Task is sleeping, dead or idle, nothing to add */
Chris Wilsonf1dfdab2020-01-23 19:08:49 +01001012 if (state < VTIME_SYS)
Frederic Weisbecker74722bb2019-11-21 03:44:26 +01001013 continue;
1014
1015 delta = vtime_delta(vtime);
1016
1017 /*
1018 * Task runs either in user (including guest) or kernel space,
1019 * add pending nohz time to the right place.
1020 */
Chris Wilsonf1dfdab2020-01-23 19:08:49 +01001021 if (state == VTIME_SYS) {
Frederic Weisbecker74722bb2019-11-21 03:44:26 +01001022 cpustat[CPUTIME_SYSTEM] += vtime->stime + delta;
Chris Wilsonf1dfdab2020-01-23 19:08:49 +01001023 } else if (state == VTIME_USER) {
Frederic Weisbecker74722bb2019-11-21 03:44:26 +01001024 if (task_nice(tsk) > 0)
1025 cpustat[CPUTIME_NICE] += vtime->utime + delta;
1026 else
1027 cpustat[CPUTIME_USER] += vtime->utime + delta;
1028 } else {
Chris Wilsonf1dfdab2020-01-23 19:08:49 +01001029 WARN_ON_ONCE(state != VTIME_GUEST);
Frederic Weisbecker74722bb2019-11-21 03:44:26 +01001030 if (task_nice(tsk) > 0) {
1031 cpustat[CPUTIME_GUEST_NICE] += vtime->gtime + delta;
1032 cpustat[CPUTIME_NICE] += vtime->gtime + delta;
1033 } else {
1034 cpustat[CPUTIME_GUEST] += vtime->gtime + delta;
1035 cpustat[CPUTIME_USER] += vtime->gtime + delta;
1036 }
1037 }
1038 } while (read_seqcount_retry(&vtime->seqcount, seq));
1039
Chris Wilsonf1dfdab2020-01-23 19:08:49 +01001040 return 0;
Frederic Weisbecker74722bb2019-11-21 03:44:26 +01001041}
1042
1043void kcpustat_cpu_fetch(struct kernel_cpustat *dst, int cpu)
1044{
1045 const struct kernel_cpustat *src = &kcpustat_cpu(cpu);
1046 struct rq *rq;
1047 int err;
1048
1049 if (!vtime_accounting_enabled_cpu(cpu)) {
1050 *dst = *src;
1051 return;
1052 }
1053
1054 rq = cpu_rq(cpu);
1055
1056 for (;;) {
1057 struct task_struct *curr;
1058
1059 rcu_read_lock();
1060 curr = rcu_dereference(rq->curr);
1061 if (WARN_ON_ONCE(!curr)) {
1062 rcu_read_unlock();
1063 *dst = *src;
1064 return;
1065 }
1066
1067 err = kcpustat_cpu_fetch_vtime(dst, src, curr, cpu);
1068 rcu_read_unlock();
1069
1070 if (!err)
1071 return;
1072
1073 cpu_relax();
1074 }
1075}
1076EXPORT_SYMBOL_GPL(kcpustat_cpu_fetch);
1077
Frederic Weisbeckerabf917c2012-07-25 07:56:04 +02001078#endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */