blob: b7ec42732b28463eb68b8baf9326ee4342d01196 [file] [log] [blame]
Thomas Gleixner457c8992019-05-19 13:08:55 +01001// SPDX-License-Identifier: GPL-2.0-only
Ingo Molnar325ea102018-03-03 12:20:47 +01002/*
3 * Simple CPU accounting cgroup controller
4 */
Frederic Weisbecker73fbec62012-06-16 15:57:37 +02005#include "sched.h"
Frederic Weisbecker73fbec62012-06-16 15:57:37 +02006
7#ifdef CONFIG_IRQ_TIME_ACCOUNTING
8
9/*
10 * There are no locks covering percpu hardirq/softirq time.
Frederic Weisbeckerbf9fae92012-09-08 15:23:11 +020011 * They are only modified in vtime_account, on corresponding CPU
Frederic Weisbecker73fbec62012-06-16 15:57:37 +020012 * with interrupts disabled. So, writes are safe.
13 * They are read and saved off onto struct rq in update_rq_clock().
14 * This may result in other CPU reading this CPU's irq time and can
Frederic Weisbeckerbf9fae92012-09-08 15:23:11 +020015 * race with irq/vtime_account on this CPU. We would either get old
Frederic Weisbecker73fbec62012-06-16 15:57:37 +020016 * or new value with a side effect of accounting a slice of irq time to wrong
17 * task when irq is in progress while we read rq->clock. That is a worthy
18 * compromise in place of having locks on each irq in account_system_time.
19 */
Frederic Weisbecker19d23dbf2016-09-26 02:29:20 +020020DEFINE_PER_CPU(struct irqtime, cpu_irqtime);
Frederic Weisbecker73fbec62012-06-16 15:57:37 +020021
Frederic Weisbecker73fbec62012-06-16 15:57:37 +020022static int sched_clock_irqtime;
23
24void enable_sched_clock_irqtime(void)
25{
26 sched_clock_irqtime = 1;
27}
28
29void disable_sched_clock_irqtime(void)
30{
31 sched_clock_irqtime = 0;
32}
33
Frederic Weisbecker25e2d8c2017-04-25 16:10:48 +020034static void irqtime_account_delta(struct irqtime *irqtime, u64 delta,
35 enum cpu_usage_stat idx)
36{
37 u64 *cpustat = kcpustat_this_cpu->cpustat;
38
39 u64_stats_update_begin(&irqtime->sync);
40 cpustat[idx] += delta;
41 irqtime->total += delta;
42 irqtime->tick_delta += delta;
43 u64_stats_update_end(&irqtime->sync);
44}
45
Frederic Weisbecker73fbec62012-06-16 15:57:37 +020046/*
Frederic Weisbeckerd3759e72020-12-02 12:57:31 +010047 * Called after incrementing preempt_count on {soft,}irq_enter
Frederic Weisbecker73fbec62012-06-16 15:57:37 +020048 * and before decrementing preempt_count on {soft,}irq_exit.
49 */
Frederic Weisbeckerd3759e72020-12-02 12:57:31 +010050void irqtime_account_irq(struct task_struct *curr, unsigned int offset)
Frederic Weisbecker73fbec62012-06-16 15:57:37 +020051{
Frederic Weisbecker19d23dbf2016-09-26 02:29:20 +020052 struct irqtime *irqtime = this_cpu_ptr(&cpu_irqtime);
Frederic Weisbeckerd3759e72020-12-02 12:57:31 +010053 unsigned int pc;
Frederic Weisbecker73fbec62012-06-16 15:57:37 +020054 s64 delta;
55 int cpu;
56
57 if (!sched_clock_irqtime)
58 return;
59
Frederic Weisbecker73fbec62012-06-16 15:57:37 +020060 cpu = smp_processor_id();
Frederic Weisbecker19d23dbf2016-09-26 02:29:20 +020061 delta = sched_clock_cpu(cpu) - irqtime->irq_start_time;
62 irqtime->irq_start_time += delta;
Thomas Gleixner6516b382021-03-09 09:55:54 +010063 pc = irq_count() - offset;
Frederic Weisbecker73fbec62012-06-16 15:57:37 +020064
Frederic Weisbecker73fbec62012-06-16 15:57:37 +020065 /*
66 * We do not account for softirq time from ksoftirqd here.
67 * We want to continue accounting softirq time to ksoftirqd thread
68 * in that case, so as not to confuse scheduler with a special task
69 * that do not consume any time, but still wants to run.
70 */
Frederic Weisbeckerd3759e72020-12-02 12:57:31 +010071 if (pc & HARDIRQ_MASK)
Frederic Weisbecker25e2d8c2017-04-25 16:10:48 +020072 irqtime_account_delta(irqtime, delta, CPUTIME_IRQ);
Frederic Weisbeckerd3759e72020-12-02 12:57:31 +010073 else if ((pc & SOFTIRQ_OFFSET) && curr != this_cpu_ksoftirqd())
Frederic Weisbecker25e2d8c2017-04-25 16:10:48 +020074 irqtime_account_delta(irqtime, delta, CPUTIME_SOFTIRQ);
Frederic Weisbecker73fbec62012-06-16 15:57:37 +020075}
Frederic Weisbecker73fbec62012-06-16 15:57:37 +020076
Frederic Weisbecker2b1f9672017-01-31 04:09:41 +010077static u64 irqtime_tick_accounted(u64 maxtime)
Frederic Weisbecker73fbec62012-06-16 15:57:37 +020078{
Frederic Weisbeckera499a5a2017-01-31 04:09:32 +010079 struct irqtime *irqtime = this_cpu_ptr(&cpu_irqtime);
Frederic Weisbecker2b1f9672017-01-31 04:09:41 +010080 u64 delta;
Frederic Weisbecker73fbec62012-06-16 15:57:37 +020081
Frederic Weisbecker2b1f9672017-01-31 04:09:41 +010082 delta = min(irqtime->tick_delta, maxtime);
83 irqtime->tick_delta -= delta;
Frederic Weisbecker2810f612016-09-26 02:29:18 +020084
Frederic Weisbeckera499a5a2017-01-31 04:09:32 +010085 return delta;
Frederic Weisbecker73fbec62012-06-16 15:57:37 +020086}
87
88#else /* CONFIG_IRQ_TIME_ACCOUNTING */
89
90#define sched_clock_irqtime (0)
91
Frederic Weisbecker2b1f9672017-01-31 04:09:41 +010092static u64 irqtime_tick_accounted(u64 dummy)
Rik van Riel57430212016-07-13 16:50:01 +020093{
94 return 0;
95}
96
Frederic Weisbecker73fbec62012-06-16 15:57:37 +020097#endif /* !CONFIG_IRQ_TIME_ACCOUNTING */
98
99static inline void task_group_account_field(struct task_struct *p, int index,
100 u64 tmp)
101{
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200102 /*
103 * Since all updates are sure to touch the root cgroup, we
104 * get ourselves ahead and touch it first. If the root cgroup
105 * is the only cgroup, then nothing else should be necessary.
106 *
107 */
Christoph Lametera4f61cc2013-08-07 15:38:24 +0000108 __this_cpu_add(kernel_cpustat.cpustat[index], tmp);
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200109
Tejun Heod2cc5ed2017-09-25 08:12:04 -0700110 cgroup_account_cputime_field(p, index, tmp);
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200111}
112
113/*
Ingo Molnar97fb7a02018-03-03 14:01:12 +0100114 * Account user CPU time to a process.
115 * @p: the process that the CPU time gets accounted to
116 * @cputime: the CPU time spent in user space since the last update
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200117 */
Frederic Weisbecker23244a52017-01-31 04:09:37 +0100118void account_user_time(struct task_struct *p, u64 cputime)
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200119{
120 int index;
121
122 /* Add user time to process. */
Frederic Weisbecker23244a52017-01-31 04:09:37 +0100123 p->utime += cputime;
124 account_group_user_time(p, cputime);
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200125
Dongsheng Yangd0ea0262014-01-27 22:00:45 -0500126 index = (task_nice(p) > 0) ? CPUTIME_NICE : CPUTIME_USER;
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200127
128 /* Add user time to cpustat. */
Frederic Weisbecker23244a52017-01-31 04:09:37 +0100129 task_group_account_field(p, index, cputime);
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200130
131 /* Account for user time used */
Frederic Weisbecker6fac4822012-11-13 14:20:55 +0100132 acct_account_cputime(p);
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200133}
134
135/*
Ingo Molnar97fb7a02018-03-03 14:01:12 +0100136 * Account guest CPU time to a process.
137 * @p: the process that the CPU time gets accounted to
138 * @cputime: the CPU time spent in virtual machine since the last update
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200139 */
Frederic Weisbeckerfb8b0492017-01-31 04:09:40 +0100140void account_guest_time(struct task_struct *p, u64 cputime)
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200141{
142 u64 *cpustat = kcpustat_this_cpu->cpustat;
143
144 /* Add guest time to process. */
Frederic Weisbeckerfb8b0492017-01-31 04:09:40 +0100145 p->utime += cputime;
146 account_group_user_time(p, cputime);
147 p->gtime += cputime;
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200148
149 /* Add guest time to cpustat. */
Dongsheng Yangd0ea0262014-01-27 22:00:45 -0500150 if (task_nice(p) > 0) {
Andrey Ryabinin97316982021-11-15 19:46:04 +0300151 task_group_account_field(p, CPUTIME_NICE, cputime);
Frederic Weisbeckerfb8b0492017-01-31 04:09:40 +0100152 cpustat[CPUTIME_GUEST_NICE] += cputime;
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200153 } else {
Andrey Ryabinin97316982021-11-15 19:46:04 +0300154 task_group_account_field(p, CPUTIME_USER, cputime);
Frederic Weisbeckerfb8b0492017-01-31 04:09:40 +0100155 cpustat[CPUTIME_GUEST] += cputime;
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200156 }
157}
158
159/*
Ingo Molnar97fb7a02018-03-03 14:01:12 +0100160 * Account system CPU time to a process and desired cpustat field
161 * @p: the process that the CPU time gets accounted to
162 * @cputime: the CPU time spent in kernel space since the last update
Stanislaw Gruszka40565b52016-11-15 03:06:51 +0100163 * @index: pointer to cpustat field that has to be updated
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200164 */
Frederic Weisbeckerc31cc6a2017-01-05 18:11:43 +0100165void account_system_index_time(struct task_struct *p,
Frederic Weisbeckerfb8b0492017-01-31 04:09:40 +0100166 u64 cputime, enum cpu_usage_stat index)
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200167{
168 /* Add system time to process. */
Frederic Weisbeckerfb8b0492017-01-31 04:09:40 +0100169 p->stime += cputime;
170 account_group_system_time(p, cputime);
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200171
172 /* Add system time to cpustat. */
Frederic Weisbeckerfb8b0492017-01-31 04:09:40 +0100173 task_group_account_field(p, index, cputime);
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200174
175 /* Account for system time used */
Frederic Weisbecker6fac4822012-11-13 14:20:55 +0100176 acct_account_cputime(p);
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200177}
178
179/*
Ingo Molnar97fb7a02018-03-03 14:01:12 +0100180 * Account system CPU time to a process.
181 * @p: the process that the CPU time gets accounted to
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200182 * @hardirq_offset: the offset to subtract from hardirq_count()
Ingo Molnar97fb7a02018-03-03 14:01:12 +0100183 * @cputime: the CPU time spent in kernel space since the last update
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200184 */
Frederic Weisbeckerfb8b0492017-01-31 04:09:40 +0100185void account_system_time(struct task_struct *p, int hardirq_offset, u64 cputime)
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200186{
187 int index;
188
189 if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) {
Stanislaw Gruszka40565b52016-11-15 03:06:51 +0100190 account_guest_time(p, cputime);
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200191 return;
192 }
193
194 if (hardirq_count() - hardirq_offset)
195 index = CPUTIME_IRQ;
196 else if (in_serving_softirq())
197 index = CPUTIME_SOFTIRQ;
198 else
199 index = CPUTIME_SYSTEM;
200
Frederic Weisbeckerc31cc6a2017-01-05 18:11:43 +0100201 account_system_index_time(p, cputime, index);
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200202}
203
204/*
205 * Account for involuntary wait time.
Ingo Molnar97fb7a02018-03-03 14:01:12 +0100206 * @cputime: the CPU time spent in involuntary wait
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200207 */
Frederic Weisbeckerbe9095e2017-01-31 04:09:38 +0100208void account_steal_time(u64 cputime)
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200209{
210 u64 *cpustat = kcpustat_this_cpu->cpustat;
211
Frederic Weisbeckerbe9095e2017-01-31 04:09:38 +0100212 cpustat[CPUTIME_STEAL] += cputime;
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200213}
214
215/*
216 * Account for idle time.
Ingo Molnar97fb7a02018-03-03 14:01:12 +0100217 * @cputime: the CPU time spent in idle wait
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200218 */
Frederic Weisbecker18b43a92017-01-31 04:09:39 +0100219void account_idle_time(u64 cputime)
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200220{
221 u64 *cpustat = kcpustat_this_cpu->cpustat;
222 struct rq *rq = this_rq();
223
224 if (atomic_read(&rq->nr_iowait) > 0)
Frederic Weisbecker18b43a92017-01-31 04:09:39 +0100225 cpustat[CPUTIME_IOWAIT] += cputime;
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200226 else
Frederic Weisbecker18b43a92017-01-31 04:09:39 +0100227 cpustat[CPUTIME_IDLE] += cputime;
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200228}
229
Wanpeng Li03cbc732016-08-17 10:05:46 +0800230/*
231 * When a guest is interrupted for a longer amount of time, missed clock
232 * ticks are not redelivered later. Due to that, this function may on
233 * occasion account more time than the calling functions think elapsed.
234 */
Frederic Weisbecker2b1f9672017-01-31 04:09:41 +0100235static __always_inline u64 steal_account_process_time(u64 maxtime)
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200236{
237#ifdef CONFIG_PARAVIRT
238 if (static_key_false(&paravirt_steal_enabled)) {
Frederic Weisbecker2b1f9672017-01-31 04:09:41 +0100239 u64 steal;
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200240
241 steal = paravirt_steal_clock(smp_processor_id());
242 steal -= this_rq()->prev_steal_time;
Frederic Weisbecker2b1f9672017-01-31 04:09:41 +0100243 steal = min(steal, maxtime);
244 account_steal_time(steal);
245 this_rq()->prev_steal_time += steal;
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200246
Frederic Weisbecker2b1f9672017-01-31 04:09:41 +0100247 return steal;
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200248 }
249#endif
Wanpeng Li807e5b82016-06-13 18:32:46 +0800250 return 0;
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200251}
252
Frederic Weisbeckera634f932012-11-21 15:55:59 +0100253/*
Rik van Riel57430212016-07-13 16:50:01 +0200254 * Account how much elapsed time was spent in steal, irq, or softirq time.
255 */
Frederic Weisbecker2b1f9672017-01-31 04:09:41 +0100256static inline u64 account_other_time(u64 max)
Rik van Riel57430212016-07-13 16:50:01 +0200257{
Frederic Weisbecker2b1f9672017-01-31 04:09:41 +0100258 u64 accounted;
Rik van Riel57430212016-07-13 16:50:01 +0200259
Frederic Weisbecker2c11dba2017-11-06 16:01:27 +0100260 lockdep_assert_irqs_disabled();
Frederic Weisbecker2810f612016-09-26 02:29:18 +0200261
Rik van Riel57430212016-07-13 16:50:01 +0200262 accounted = steal_account_process_time(max);
263
264 if (accounted < max)
Frederic Weisbeckera499a5a2017-01-31 04:09:32 +0100265 accounted += irqtime_tick_accounted(max - accounted);
Rik van Riel57430212016-07-13 16:50:01 +0200266
267 return accounted;
268}
269
Stanislaw Gruszkaa1eb1412016-08-17 11:30:44 +0200270#ifdef CONFIG_64BIT
271static inline u64 read_sum_exec_runtime(struct task_struct *t)
272{
273 return t->se.sum_exec_runtime;
274}
275#else
276static u64 read_sum_exec_runtime(struct task_struct *t)
277{
278 u64 ns;
279 struct rq_flags rf;
280 struct rq *rq;
281
282 rq = task_rq_lock(t, &rf);
283 ns = t->se.sum_exec_runtime;
284 task_rq_unlock(rq, t, &rf);
285
286 return ns;
287}
288#endif
289
Rik van Riel57430212016-07-13 16:50:01 +0200290/*
Frederic Weisbeckera634f932012-11-21 15:55:59 +0100291 * Accumulate raw cputime values of dead tasks (sig->[us]time) and live
292 * tasks (sum on group iteration) belonging to @tsk's group.
293 */
294void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
295{
296 struct signal_struct *sig = tsk->signal;
Frederic Weisbecker5613fda2017-01-31 04:09:23 +0100297 u64 utime, stime;
Frederic Weisbeckera634f932012-11-21 15:55:59 +0100298 struct task_struct *t;
Rik van Riele78c3492014-08-16 13:40:10 -0400299 unsigned int seq, nextseq;
Rik van Riel9c368b52014-09-12 09:12:15 -0400300 unsigned long flags;
Frederic Weisbeckera634f932012-11-21 15:55:59 +0100301
Stanislaw Gruszkaa1eb1412016-08-17 11:30:44 +0200302 /*
303 * Update current task runtime to account pending time since last
304 * scheduler action or thread_group_cputime() call. This thread group
305 * might have other running tasks on different CPUs, but updating
306 * their runtime can affect syscall performance, so we skip account
307 * those pending times and rely only on values updated on tick or
308 * other scheduler action.
309 */
310 if (same_thread_group(current, tsk))
311 (void) task_sched_runtime(current);
312
Frederic Weisbeckera634f932012-11-21 15:55:59 +0100313 rcu_read_lock();
Rik van Riele78c3492014-08-16 13:40:10 -0400314 /* Attempt a lockless read on the first round. */
315 nextseq = 0;
316 do {
317 seq = nextseq;
Rik van Riel9c368b52014-09-12 09:12:15 -0400318 flags = read_seqbegin_or_lock_irqsave(&sig->stats_lock, &seq);
Rik van Riele78c3492014-08-16 13:40:10 -0400319 times->utime = sig->utime;
320 times->stime = sig->stime;
321 times->sum_exec_runtime = sig->sum_sched_runtime;
322
323 for_each_thread(tsk, t) {
324 task_cputime(t, &utime, &stime);
325 times->utime += utime;
326 times->stime += stime;
Stanislaw Gruszkaa1eb1412016-08-17 11:30:44 +0200327 times->sum_exec_runtime += read_sum_exec_runtime(t);
Rik van Riele78c3492014-08-16 13:40:10 -0400328 }
329 /* If lockless access failed, take the lock. */
330 nextseq = 1;
331 } while (need_seqretry(&sig->stats_lock, seq));
Rik van Riel9c368b52014-09-12 09:12:15 -0400332 done_seqretry_irqrestore(&sig->stats_lock, seq, flags);
Frederic Weisbeckera634f932012-11-21 15:55:59 +0100333 rcu_read_unlock();
334}
335
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200336#ifdef CONFIG_IRQ_TIME_ACCOUNTING
337/*
338 * Account a tick to a process and cpustat
Ingo Molnar97fb7a02018-03-03 14:01:12 +0100339 * @p: the process that the CPU time gets accounted to
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200340 * @user_tick: is the tick from userspace
341 * @rq: the pointer to rq
342 *
343 * Tick demultiplexing follows the order
344 * - pending hardirq update
345 * - pending softirq update
346 * - user_time
347 * - idle_time
348 * - system time
349 * - check for guest_time
350 * - else account as system_time
351 *
352 * Check for hardirq is done both for system and user time as there is
353 * no timer going off while we are on hardirq and hence we may never get an
354 * opportunity to update it solely in system time.
355 * p->stime and friends are only updated on system time and not on irq
356 * softirq as those do not count in task exec_runtime any more.
357 */
358static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
Alex Shi9dec1b62020-01-02 18:07:52 +0800359 int ticks)
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200360{
Frederic Weisbecker2b1f9672017-01-31 04:09:41 +0100361 u64 other, cputime = TICK_NSEC * ticks;
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200362
Rik van Riel57430212016-07-13 16:50:01 +0200363 /*
364 * When returning from idle, many ticks can get accounted at
365 * once, including some ticks of steal, irq, and softirq time.
366 * Subtract those ticks from the amount of time accounted to
367 * idle, or potentially user or system time. Due to rounding,
368 * other time can exceed ticks occasionally.
369 */
Wanpeng Li03cbc732016-08-17 10:05:46 +0800370 other = account_other_time(ULONG_MAX);
Frederic Weisbecker2b1f9672017-01-31 04:09:41 +0100371 if (other >= cputime)
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200372 return;
Frederic Weisbecker23244a52017-01-31 04:09:37 +0100373
Frederic Weisbecker2b1f9672017-01-31 04:09:41 +0100374 cputime -= other;
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200375
Rik van Riel57430212016-07-13 16:50:01 +0200376 if (this_cpu_ksoftirqd() == p) {
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200377 /*
378 * ksoftirqd time do not get accounted in cpu_softirq_time.
379 * So, we have to handle it separately here.
380 * Also, p->stime needs to be updated for ksoftirqd.
381 */
Frederic Weisbeckerfb8b0492017-01-31 04:09:40 +0100382 account_system_index_time(p, cputime, CPUTIME_SOFTIRQ);
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200383 } else if (user_tick) {
Stanislaw Gruszka40565b52016-11-15 03:06:51 +0100384 account_user_time(p, cputime);
Alex Shi9dec1b62020-01-02 18:07:52 +0800385 } else if (p == this_rq()->idle) {
Frederic Weisbecker18b43a92017-01-31 04:09:39 +0100386 account_idle_time(cputime);
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200387 } else if (p->flags & PF_VCPU) { /* System time or guest time */
Frederic Weisbeckerfb8b0492017-01-31 04:09:40 +0100388 account_guest_time(p, cputime);
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200389 } else {
Frederic Weisbeckerfb8b0492017-01-31 04:09:40 +0100390 account_system_index_time(p, cputime, CPUTIME_SYSTEM);
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200391 }
392}
393
394static void irqtime_account_idle_ticks(int ticks)
395{
Alex Shi9dec1b62020-01-02 18:07:52 +0800396 irqtime_account_process_tick(current, 0, ticks);
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200397}
398#else /* CONFIG_IRQ_TIME_ACCOUNTING */
Ingo Molnar97fb7a02018-03-03 14:01:12 +0100399static inline void irqtime_account_idle_ticks(int ticks) { }
Frederic Weisbecker3f4724e2012-07-16 18:00:34 +0200400static inline void irqtime_account_process_tick(struct task_struct *p, int user_tick,
Alex Shi9dec1b62020-01-02 18:07:52 +0800401 int nr_ticks) { }
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200402#endif /* CONFIG_IRQ_TIME_ACCOUNTING */
403
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200404/*
405 * Use precise platform statistics if available:
406 */
Frederic Weisbecker8d495472019-10-03 18:17:45 +0200407#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
408
Ingo Molnar97fb7a02018-03-03 14:01:12 +0100409# ifndef __ARCH_HAS_VTIME_TASK_SWITCH
Frederic Weisbecker8d495472019-10-03 18:17:45 +0200410void vtime_task_switch(struct task_struct *prev)
Frederic Weisbeckere3942ba2012-11-14 00:24:25 +0100411{
412 if (is_idle_task(prev))
413 vtime_account_idle(prev);
414 else
Frederic Weisbeckerf83eeb12019-10-03 18:17:44 +0200415 vtime_account_kernel(prev);
Frederic Weisbeckere3942ba2012-11-14 00:24:25 +0100416
Frederic Weisbeckerc8d7dab2017-01-05 18:11:50 +0100417 vtime_flush(prev);
Frederic Weisbeckere3942ba2012-11-14 00:24:25 +0100418 arch_vtime_task_switch(prev);
419}
Ingo Molnar97fb7a02018-03-03 14:01:12 +0100420# endif
Frederic Weisbecker0cfdf9a2016-07-13 16:50:03 +0200421
Frederic Weisbeckerd3759e72020-12-02 12:57:31 +0100422void vtime_account_irq(struct task_struct *tsk, unsigned int offset)
Frederic Weisbeckera7e1a9e2012-09-08 16:14:02 +0200423{
Thomas Gleixner6516b382021-03-09 09:55:54 +0100424 unsigned int pc = irq_count() - offset;
Frederic Weisbeckerd3759e72020-12-02 12:57:31 +0100425
426 if (pc & HARDIRQ_OFFSET) {
Frederic Weisbecker8a6a5922020-12-02 12:57:30 +0100427 vtime_account_hardirq(tsk);
Frederic Weisbeckerd3759e72020-12-02 12:57:31 +0100428 } else if (pc & SOFTIRQ_OFFSET) {
Frederic Weisbecker8a6a5922020-12-02 12:57:30 +0100429 vtime_account_softirq(tsk);
430 } else if (!IS_ENABLED(CONFIG_HAVE_VIRT_CPU_ACCOUNTING_IDLE) &&
431 is_idle_task(tsk)) {
Frederic Weisbecker0cfdf9a2016-07-13 16:50:03 +0200432 vtime_account_idle(tsk);
Frederic Weisbecker8a6a5922020-12-02 12:57:30 +0100433 } else {
Frederic Weisbeckerf83eeb12019-10-03 18:17:44 +0200434 vtime_account_kernel(tsk);
Frederic Weisbecker8a6a5922020-12-02 12:57:30 +0100435 }
Frederic Weisbeckera7e1a9e2012-09-08 16:14:02 +0200436}
Frederic Weisbeckera7e1a9e2012-09-08 16:14:02 +0200437
Tejun Heo8157a7f2017-09-25 14:27:54 -0700438void cputime_adjust(struct task_cputime *curr, struct prev_cputime *prev,
439 u64 *ut, u64 *st)
440{
441 *ut = curr->utime;
442 *st = curr->stime;
443}
444
Frederic Weisbecker5613fda2017-01-31 04:09:23 +0100445void task_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st)
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200446{
Frederic Weisbecker9fbc42e2013-02-25 17:25:39 +0100447 *ut = p->utime;
448 *st = p->stime;
449}
Andrey Smetanin9eec50b2015-09-16 12:29:50 +0300450EXPORT_SYMBOL_GPL(task_cputime_adjusted);
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200451
Frederic Weisbecker5613fda2017-01-31 04:09:23 +0100452void thread_group_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st)
Frederic Weisbecker9fbc42e2013-02-25 17:25:39 +0100453{
454 struct task_cputime cputime;
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200455
Frederic Weisbecker9fbc42e2013-02-25 17:25:39 +0100456 thread_group_cputime(p, &cputime);
457
458 *ut = cputime.utime;
459 *st = cputime.stime;
460}
Ingo Molnar97fb7a02018-03-03 14:01:12 +0100461
462#else /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE: */
463
Frederic Weisbecker9fbc42e2013-02-25 17:25:39 +0100464/*
Ingo Molnar97fb7a02018-03-03 14:01:12 +0100465 * Account a single tick of CPU time.
466 * @p: the process that the CPU time gets accounted to
Frederic Weisbecker9fbc42e2013-02-25 17:25:39 +0100467 * @user_tick: indicates if the tick is a user or a system tick
468 */
469void account_process_tick(struct task_struct *p, int user_tick)
470{
Frederic Weisbecker2b1f9672017-01-31 04:09:41 +0100471 u64 cputime, steal;
Frederic Weisbecker9fbc42e2013-02-25 17:25:39 +0100472
Frederic Weisbeckere44fcb42019-10-16 04:56:54 +0200473 if (vtime_accounting_enabled_this_cpu())
Frederic Weisbecker9fbc42e2013-02-25 17:25:39 +0100474 return;
475
476 if (sched_clock_irqtime) {
Alex Shi9dec1b62020-01-02 18:07:52 +0800477 irqtime_account_process_tick(p, user_tick, 1);
Frederic Weisbecker9fbc42e2013-02-25 17:25:39 +0100478 return;
479 }
480
Frederic Weisbecker2b1f9672017-01-31 04:09:41 +0100481 cputime = TICK_NSEC;
Wanpeng Li03cbc732016-08-17 10:05:46 +0800482 steal = steal_account_process_time(ULONG_MAX);
Rik van Riel57430212016-07-13 16:50:01 +0200483
Frederic Weisbecker2b1f9672017-01-31 04:09:41 +0100484 if (steal >= cputime)
Frederic Weisbecker9fbc42e2013-02-25 17:25:39 +0100485 return;
486
Frederic Weisbecker2b1f9672017-01-31 04:09:41 +0100487 cputime -= steal;
Rik van Riel57430212016-07-13 16:50:01 +0200488
Frederic Weisbecker9fbc42e2013-02-25 17:25:39 +0100489 if (user_tick)
Stanislaw Gruszka40565b52016-11-15 03:06:51 +0100490 account_user_time(p, cputime);
Alex Shi9dec1b62020-01-02 18:07:52 +0800491 else if ((p != this_rq()->idle) || (irq_count() != HARDIRQ_OFFSET))
Frederic Weisbeckerfb8b0492017-01-31 04:09:40 +0100492 account_system_time(p, HARDIRQ_OFFSET, cputime);
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200493 else
Frederic Weisbecker18b43a92017-01-31 04:09:39 +0100494 account_idle_time(cputime);
Frederic Weisbecker9fbc42e2013-02-25 17:25:39 +0100495}
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200496
Frederic Weisbecker9fbc42e2013-02-25 17:25:39 +0100497/*
Frederic Weisbecker9fbc42e2013-02-25 17:25:39 +0100498 * Account multiple ticks of idle time.
499 * @ticks: number of stolen ticks
500 */
501void account_idle_ticks(unsigned long ticks)
502{
Frederic Weisbecker18b43a92017-01-31 04:09:39 +0100503 u64 cputime, steal;
Frederic Weisbecker26f2c752016-08-11 14:58:24 +0200504
Frederic Weisbecker9fbc42e2013-02-25 17:25:39 +0100505 if (sched_clock_irqtime) {
506 irqtime_account_idle_ticks(ticks);
507 return;
508 }
509
Frederic Weisbecker18b43a92017-01-31 04:09:39 +0100510 cputime = ticks * TICK_NSEC;
Frederic Weisbecker2b1f9672017-01-31 04:09:41 +0100511 steal = steal_account_process_time(ULONG_MAX);
Wanpeng Lif9bcf1e2016-08-11 13:36:35 +0800512
513 if (steal >= cputime)
514 return;
515
516 cputime -= steal;
517 account_idle_time(cputime);
Frederic Weisbecker9fbc42e2013-02-25 17:25:39 +0100518}
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200519
Frederic Weisbeckerd9a3c982013-02-20 18:54:55 +0100520/*
Peter Zijlstra9d7fb042015-06-30 11:30:54 +0200521 * Adjust tick based cputime random precision against scheduler runtime
522 * accounting.
Rik van Riel347abad2014-09-30 15:59:47 -0400523 *
Peter Zijlstra9d7fb042015-06-30 11:30:54 +0200524 * Tick based cputime accounting depend on random scheduling timeslices of a
525 * task to be interrupted or not by the timer. Depending on these
526 * circumstances, the number of these interrupts may be over or
527 * under-optimistic, matching the real user and system cputime with a variable
528 * precision.
529 *
530 * Fix this by scaling these tick based values against the total runtime
531 * accounted by the CFS scheduler.
532 *
533 * This code provides the following guarantees:
534 *
535 * stime + utime == rtime
536 * stime_i+1 >= stime_i, utime_i+1 >= utime_i
537 *
538 * Assuming that rtime_i+1 >= rtime_i.
Frederic Weisbeckerfa092052012-11-28 17:00:57 +0100539 */
Tejun Heocfb766d2017-09-25 08:12:04 -0700540void cputime_adjust(struct task_cputime *curr, struct prev_cputime *prev,
541 u64 *ut, u64 *st)
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200542{
Frederic Weisbecker5613fda2017-01-31 04:09:23 +0100543 u64 rtime, stime, utime;
Peter Zijlstra9d7fb042015-06-30 11:30:54 +0200544 unsigned long flags;
Frederic Weisbeckerfa092052012-11-28 17:00:57 +0100545
Peter Zijlstra9d7fb042015-06-30 11:30:54 +0200546 /* Serialize concurrent callers such that we can honour our guarantees */
547 raw_spin_lock_irqsave(&prev->lock, flags);
Frederic Weisbecker5613fda2017-01-31 04:09:23 +0100548 rtime = curr->sum_exec_runtime;
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200549
Stanislaw Gruszka772c8082013-04-30 11:35:05 +0200550 /*
Peter Zijlstra9d7fb042015-06-30 11:30:54 +0200551 * This is possible under two circumstances:
552 * - rtime isn't monotonic after all (a bug);
553 * - we got reordered by the lock.
554 *
555 * In both cases this acts as a filter such that the rest of the code
556 * can assume it is monotonic regardless of anything else.
Stanislaw Gruszka772c8082013-04-30 11:35:05 +0200557 */
558 if (prev->stime + prev->utime >= rtime)
559 goto out;
560
Stanislaw Gruszka5a8e01f2013-09-04 15:16:03 +0200561 stime = curr->stime;
562 utime = curr->utime;
563
Peter Zijlstra173be9a2016-08-15 18:38:42 +0200564 /*
Ingo Molnar3b9c08a2017-07-04 11:53:40 +0200565 * If either stime or utime are 0, assume all runtime is userspace.
Ingo Molnar3b037062021-03-18 13:38:50 +0100566 * Once a task gets some ticks, the monotonicity code at 'update:'
Ingo Molnar3b9c08a2017-07-04 11:53:40 +0200567 * will ensure things converge to the observed ratio.
Peter Zijlstra173be9a2016-08-15 18:38:42 +0200568 */
Ingo Molnar3b9c08a2017-07-04 11:53:40 +0200569 if (stime == 0) {
570 utime = rtime;
571 goto update;
Frederic Weisbeckerd9a3c982013-02-20 18:54:55 +0100572 }
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200573
Ingo Molnar3b9c08a2017-07-04 11:53:40 +0200574 if (utime == 0) {
575 stime = rtime;
576 goto update;
577 }
578
Oleg Nesterov3dc167b2020-05-19 19:25:06 +0200579 stime = mul_u64_u64_div_u64(stime, rtime, stime + utime);
Ingo Molnar3b9c08a2017-07-04 11:53:40 +0200580
581update:
Peter Zijlstra9d7fb042015-06-30 11:30:54 +0200582 /*
583 * Make sure stime doesn't go backwards; this preserves monotonicity
584 * for utime because rtime is monotonic.
585 *
586 * utime_i+1 = rtime_i+1 - stime_i
587 * = rtime_i+1 - (rtime_i - utime_i)
588 * = (rtime_i+1 - rtime_i) + utime_i
589 * >= utime_i
590 */
591 if (stime < prev->stime)
592 stime = prev->stime;
593 utime = rtime - stime;
594
595 /*
596 * Make sure utime doesn't go backwards; this still preserves
597 * monotonicity for stime, analogous argument to above.
598 */
599 if (utime < prev->utime) {
600 utime = prev->utime;
601 stime = rtime - utime;
602 }
603
Peter Zijlstra9d7fb042015-06-30 11:30:54 +0200604 prev->stime = stime;
605 prev->utime = utime;
Stanislaw Gruszka772c8082013-04-30 11:35:05 +0200606out:
Frederic Weisbeckerd37f761d2012-11-22 00:58:35 +0100607 *ut = prev->utime;
608 *st = prev->stime;
Peter Zijlstra9d7fb042015-06-30 11:30:54 +0200609 raw_spin_unlock_irqrestore(&prev->lock, flags);
Frederic Weisbeckerd37f761d2012-11-22 00:58:35 +0100610}
611
Frederic Weisbecker5613fda2017-01-31 04:09:23 +0100612void task_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st)
Frederic Weisbeckerd37f761d2012-11-22 00:58:35 +0100613{
614 struct task_cputime cputime = {
Frederic Weisbeckerd37f761d2012-11-22 00:58:35 +0100615 .sum_exec_runtime = p->se.sum_exec_runtime,
616 };
617
Frederic Weisbeckere7f2be12021-10-26 16:10:55 +0200618 if (task_cputime(p, &cputime.utime, &cputime.stime))
619 cputime.sum_exec_runtime = task_sched_runtime(p);
Frederic Weisbeckerd37f761d2012-11-22 00:58:35 +0100620 cputime_adjust(&cputime, &p->prev_cputime, ut, st);
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200621}
Andrey Smetanin9eec50b2015-09-16 12:29:50 +0300622EXPORT_SYMBOL_GPL(task_cputime_adjusted);
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200623
Frederic Weisbecker5613fda2017-01-31 04:09:23 +0100624void thread_group_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st)
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200625{
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200626 struct task_cputime cputime;
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200627
628 thread_group_cputime(p, &cputime);
Frederic Weisbeckerd37f761d2012-11-22 00:58:35 +0100629 cputime_adjust(&cputime, &p->signal->prev_cputime, ut, st);
Frederic Weisbecker73fbec62012-06-16 15:57:37 +0200630}
Frederic Weisbecker9fbc42e2013-02-25 17:25:39 +0100631#endif /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
Frederic Weisbeckerabf917c2012-07-25 07:56:04 +0200632
633#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
Frederic Weisbeckerbac5b6b2017-06-29 19:15:10 +0200634static u64 vtime_delta(struct vtime *vtime)
Frederic Weisbeckerabf917c2012-07-25 07:56:04 +0200635{
Wanpeng Li2a42eb92017-06-29 19:15:11 +0200636 unsigned long long clock;
Frederic Weisbeckerabf917c2012-07-25 07:56:04 +0200637
Wanpeng Li0e4097c2017-07-09 00:40:28 -0700638 clock = sched_clock();
Wanpeng Li2a42eb92017-06-29 19:15:11 +0200639 if (clock < vtime->starttime)
Frederic Weisbecker6a616712012-12-16 20:00:34 +0100640 return 0;
641
Wanpeng Li2a42eb92017-06-29 19:15:11 +0200642 return clock - vtime->starttime;
Frederic Weisbecker6a616712012-12-16 20:00:34 +0100643}
644
Frederic Weisbeckerbac5b6b2017-06-29 19:15:10 +0200645static u64 get_vtime_delta(struct vtime *vtime)
Frederic Weisbecker6a616712012-12-16 20:00:34 +0100646{
Wanpeng Li2a42eb92017-06-29 19:15:11 +0200647 u64 delta = vtime_delta(vtime);
648 u64 other;
Frederic Weisbecker6a616712012-12-16 20:00:34 +0100649
Wanpeng Li03cbc732016-08-17 10:05:46 +0800650 /*
651 * Unlike tick based timing, vtime based timing never has lost
652 * ticks, and no need for steal time accounting to make up for
653 * lost ticks. Vtime accounts a rounded version of actual
654 * elapsed time. Limit account_other_time to prevent rounding
655 * errors from causing elapsed vtime to go negative.
656 */
Rik van Rielb58c3582016-07-13 16:50:02 +0200657 other = account_other_time(delta);
Frederic Weisbeckerbac5b6b2017-06-29 19:15:10 +0200658 WARN_ON_ONCE(vtime->state == VTIME_INACTIVE);
Wanpeng Li2a42eb92017-06-29 19:15:11 +0200659 vtime->starttime += delta;
Frederic Weisbeckerabf917c2012-07-25 07:56:04 +0200660
Rik van Rielb58c3582016-07-13 16:50:02 +0200661 return delta - other;
Frederic Weisbeckerabf917c2012-07-25 07:56:04 +0200662}
663
Frederic Weisbeckerf83eeb12019-10-03 18:17:44 +0200664static void vtime_account_system(struct task_struct *tsk,
665 struct vtime *vtime)
Frederic Weisbecker6a616712012-12-16 20:00:34 +0100666{
Wanpeng Li2a42eb92017-06-29 19:15:11 +0200667 vtime->stime += get_vtime_delta(vtime);
668 if (vtime->stime >= TICK_NSEC) {
669 account_system_time(tsk, irq_count(), vtime->stime);
670 vtime->stime = 0;
671 }
672}
673
674static void vtime_account_guest(struct task_struct *tsk,
675 struct vtime *vtime)
676{
677 vtime->gtime += get_vtime_delta(vtime);
678 if (vtime->gtime >= TICK_NSEC) {
679 account_guest_time(tsk, vtime->gtime);
680 vtime->gtime = 0;
681 }
Frederic Weisbecker6a616712012-12-16 20:00:34 +0100682}
683
Frederic Weisbecker8d495472019-10-03 18:17:45 +0200684static void __vtime_account_kernel(struct task_struct *tsk,
685 struct vtime *vtime)
686{
687 /* We might have scheduled out from guest path */
Frederic Weisbeckere6d5bf32019-10-16 04:56:49 +0200688 if (vtime->state == VTIME_GUEST)
Frederic Weisbecker8d495472019-10-03 18:17:45 +0200689 vtime_account_guest(tsk, vtime);
690 else
691 vtime_account_system(tsk, vtime);
692}
693
Frederic Weisbeckerf83eeb12019-10-03 18:17:44 +0200694void vtime_account_kernel(struct task_struct *tsk)
Frederic Weisbeckerabf917c2012-07-25 07:56:04 +0200695{
Frederic Weisbeckerbac5b6b2017-06-29 19:15:10 +0200696 struct vtime *vtime = &tsk->vtime;
697
698 if (!vtime_delta(vtime))
Rik van Rielff9a9b42016-02-10 20:08:27 -0500699 return;
700
Frederic Weisbeckerbac5b6b2017-06-29 19:15:10 +0200701 write_seqcount_begin(&vtime->seqcount);
Frederic Weisbecker8d495472019-10-03 18:17:45 +0200702 __vtime_account_kernel(tsk, vtime);
Frederic Weisbeckerbac5b6b2017-06-29 19:15:10 +0200703 write_seqcount_end(&vtime->seqcount);
Frederic Weisbecker6a616712012-12-16 20:00:34 +0100704}
705
Frederic Weisbecker6a616712012-12-16 20:00:34 +0100706void vtime_user_enter(struct task_struct *tsk)
707{
Frederic Weisbeckerbac5b6b2017-06-29 19:15:10 +0200708 struct vtime *vtime = &tsk->vtime;
709
710 write_seqcount_begin(&vtime->seqcount);
Frederic Weisbeckerf83eeb12019-10-03 18:17:44 +0200711 vtime_account_system(tsk, vtime);
Frederic Weisbeckerbac5b6b2017-06-29 19:15:10 +0200712 vtime->state = VTIME_USER;
713 write_seqcount_end(&vtime->seqcount);
Frederic Weisbecker6a616712012-12-16 20:00:34 +0100714}
715
Frederic Weisbecker1c3eda02017-06-29 19:15:07 +0200716void vtime_user_exit(struct task_struct *tsk)
717{
Frederic Weisbeckerbac5b6b2017-06-29 19:15:10 +0200718 struct vtime *vtime = &tsk->vtime;
719
720 write_seqcount_begin(&vtime->seqcount);
Wanpeng Li2a42eb92017-06-29 19:15:11 +0200721 vtime->utime += get_vtime_delta(vtime);
722 if (vtime->utime >= TICK_NSEC) {
723 account_user_time(tsk, vtime->utime);
724 vtime->utime = 0;
725 }
Frederic Weisbeckerbac5b6b2017-06-29 19:15:10 +0200726 vtime->state = VTIME_SYS;
727 write_seqcount_end(&vtime->seqcount);
Frederic Weisbecker1c3eda02017-06-29 19:15:07 +0200728}
729
Frederic Weisbecker6a616712012-12-16 20:00:34 +0100730void vtime_guest_enter(struct task_struct *tsk)
731{
Frederic Weisbeckerbac5b6b2017-06-29 19:15:10 +0200732 struct vtime *vtime = &tsk->vtime;
Frederic Weisbecker5b206d42013-07-12 19:05:14 +0200733 /*
734 * The flags must be updated under the lock with
Frederic Weisbecker60a9ce52017-06-29 19:15:09 +0200735 * the vtime_starttime flush and update.
Frederic Weisbecker5b206d42013-07-12 19:05:14 +0200736 * That enforces a right ordering and update sequence
737 * synchronization against the reader (task_gtime())
738 * that can thus safely catch up with a tickless delta.
739 */
Frederic Weisbeckerbac5b6b2017-06-29 19:15:10 +0200740 write_seqcount_begin(&vtime->seqcount);
Frederic Weisbeckerf83eeb12019-10-03 18:17:44 +0200741 vtime_account_system(tsk, vtime);
Frederic Weisbecker68e7a4d2019-09-25 23:42:42 +0200742 tsk->flags |= PF_VCPU;
Frederic Weisbeckere6d5bf32019-10-16 04:56:49 +0200743 vtime->state = VTIME_GUEST;
Frederic Weisbeckerbac5b6b2017-06-29 19:15:10 +0200744 write_seqcount_end(&vtime->seqcount);
Frederic Weisbecker6a616712012-12-16 20:00:34 +0100745}
Frederic Weisbecker48d6a812013-07-10 02:44:35 +0200746EXPORT_SYMBOL_GPL(vtime_guest_enter);
Frederic Weisbecker6a616712012-12-16 20:00:34 +0100747
748void vtime_guest_exit(struct task_struct *tsk)
749{
Frederic Weisbeckerbac5b6b2017-06-29 19:15:10 +0200750 struct vtime *vtime = &tsk->vtime;
751
752 write_seqcount_begin(&vtime->seqcount);
Wanpeng Li2a42eb92017-06-29 19:15:11 +0200753 vtime_account_guest(tsk, vtime);
Frederic Weisbecker68e7a4d2019-09-25 23:42:42 +0200754 tsk->flags &= ~PF_VCPU;
Frederic Weisbeckere6d5bf32019-10-16 04:56:49 +0200755 vtime->state = VTIME_SYS;
Frederic Weisbeckerbac5b6b2017-06-29 19:15:10 +0200756 write_seqcount_end(&vtime->seqcount);
Frederic Weisbeckerabf917c2012-07-25 07:56:04 +0200757}
Frederic Weisbecker48d6a812013-07-10 02:44:35 +0200758EXPORT_SYMBOL_GPL(vtime_guest_exit);
Frederic Weisbeckerabf917c2012-07-25 07:56:04 +0200759
760void vtime_account_idle(struct task_struct *tsk)
761{
Frederic Weisbeckerbac5b6b2017-06-29 19:15:10 +0200762 account_idle_time(get_vtime_delta(&tsk->vtime));
Frederic Weisbeckerabf917c2012-07-25 07:56:04 +0200763}
Frederic Weisbecker3f4724e2012-07-16 18:00:34 +0200764
Frederic Weisbecker8d495472019-10-03 18:17:45 +0200765void vtime_task_switch_generic(struct task_struct *prev)
Frederic Weisbecker6a616712012-12-16 20:00:34 +0100766{
Frederic Weisbeckerbac5b6b2017-06-29 19:15:10 +0200767 struct vtime *vtime = &prev->vtime;
Frederic Weisbecker6a616712012-12-16 20:00:34 +0100768
Frederic Weisbeckerbac5b6b2017-06-29 19:15:10 +0200769 write_seqcount_begin(&vtime->seqcount);
Frederic Weisbecker14faf6f2019-10-16 04:56:48 +0200770 if (vtime->state == VTIME_IDLE)
Frederic Weisbecker8d495472019-10-03 18:17:45 +0200771 vtime_account_idle(prev);
772 else
773 __vtime_account_kernel(prev, vtime);
Frederic Weisbeckerbac5b6b2017-06-29 19:15:10 +0200774 vtime->state = VTIME_INACTIVE;
Frederic Weisbecker802f4a82019-10-16 04:56:47 +0200775 vtime->cpu = -1;
Frederic Weisbeckerbac5b6b2017-06-29 19:15:10 +0200776 write_seqcount_end(&vtime->seqcount);
777
778 vtime = &current->vtime;
779
780 write_seqcount_begin(&vtime->seqcount);
Frederic Weisbecker14faf6f2019-10-16 04:56:48 +0200781 if (is_idle_task(current))
782 vtime->state = VTIME_IDLE;
Frederic Weisbeckere6d5bf32019-10-16 04:56:49 +0200783 else if (current->flags & PF_VCPU)
784 vtime->state = VTIME_GUEST;
Frederic Weisbecker14faf6f2019-10-16 04:56:48 +0200785 else
786 vtime->state = VTIME_SYS;
Wanpeng Li0e4097c2017-07-09 00:40:28 -0700787 vtime->starttime = sched_clock();
Frederic Weisbecker802f4a82019-10-16 04:56:47 +0200788 vtime->cpu = smp_processor_id();
Frederic Weisbeckerbac5b6b2017-06-29 19:15:10 +0200789 write_seqcount_end(&vtime->seqcount);
Frederic Weisbecker6a616712012-12-16 20:00:34 +0100790}
791
Frederic Weisbecker45eacc62013-05-15 22:16:32 +0200792void vtime_init_idle(struct task_struct *t, int cpu)
Frederic Weisbecker6a616712012-12-16 20:00:34 +0100793{
Frederic Weisbeckerbac5b6b2017-06-29 19:15:10 +0200794 struct vtime *vtime = &t->vtime;
Frederic Weisbecker6a616712012-12-16 20:00:34 +0100795 unsigned long flags;
796
Frederic Weisbeckerb7ce2272015-11-19 16:47:34 +0100797 local_irq_save(flags);
Frederic Weisbeckerbac5b6b2017-06-29 19:15:10 +0200798 write_seqcount_begin(&vtime->seqcount);
Frederic Weisbecker14faf6f2019-10-16 04:56:48 +0200799 vtime->state = VTIME_IDLE;
Wanpeng Li0e4097c2017-07-09 00:40:28 -0700800 vtime->starttime = sched_clock();
Frederic Weisbecker802f4a82019-10-16 04:56:47 +0200801 vtime->cpu = cpu;
Frederic Weisbeckerbac5b6b2017-06-29 19:15:10 +0200802 write_seqcount_end(&vtime->seqcount);
Frederic Weisbeckerb7ce2272015-11-19 16:47:34 +0100803 local_irq_restore(flags);
Frederic Weisbecker6a616712012-12-16 20:00:34 +0100804}
805
Frederic Weisbecker16a6d9b2017-01-31 04:09:21 +0100806u64 task_gtime(struct task_struct *t)
Frederic Weisbecker6a616712012-12-16 20:00:34 +0100807{
Frederic Weisbeckerbac5b6b2017-06-29 19:15:10 +0200808 struct vtime *vtime = &t->vtime;
Frederic Weisbecker6a616712012-12-16 20:00:34 +0100809 unsigned int seq;
Frederic Weisbecker16a6d9b2017-01-31 04:09:21 +0100810 u64 gtime;
Frederic Weisbecker6a616712012-12-16 20:00:34 +0100811
Frederic Weisbeckere5925392015-11-19 16:47:33 +0100812 if (!vtime_accounting_enabled())
Hiroshi Shimamoto25411172015-11-19 16:47:28 +0100813 return t->gtime;
814
Frederic Weisbecker6a616712012-12-16 20:00:34 +0100815 do {
Frederic Weisbeckerbac5b6b2017-06-29 19:15:10 +0200816 seq = read_seqcount_begin(&vtime->seqcount);
Frederic Weisbecker6a616712012-12-16 20:00:34 +0100817
818 gtime = t->gtime;
Frederic Weisbeckere6d5bf32019-10-16 04:56:49 +0200819 if (vtime->state == VTIME_GUEST)
Wanpeng Li2a42eb92017-06-29 19:15:11 +0200820 gtime += vtime->gtime + vtime_delta(vtime);
Frederic Weisbecker6a616712012-12-16 20:00:34 +0100821
Frederic Weisbeckerbac5b6b2017-06-29 19:15:10 +0200822 } while (read_seqcount_retry(&vtime->seqcount, seq));
Frederic Weisbecker6a616712012-12-16 20:00:34 +0100823
824 return gtime;
825}
826
827/*
828 * Fetch cputime raw values from fields of task_struct and
829 * add up the pending nohz execution time since the last
830 * cputime snapshot.
831 */
Frederic Weisbeckere7f2be12021-10-26 16:10:55 +0200832bool task_cputime(struct task_struct *t, u64 *utime, u64 *stime)
Frederic Weisbecker6a616712012-12-16 20:00:34 +0100833{
Frederic Weisbeckerbac5b6b2017-06-29 19:15:10 +0200834 struct vtime *vtime = &t->vtime;
Frederic Weisbecker6a616712012-12-16 20:00:34 +0100835 unsigned int seq;
Frederic Weisbeckerbac5b6b2017-06-29 19:15:10 +0200836 u64 delta;
Frederic Weisbeckere7f2be12021-10-26 16:10:55 +0200837 int ret;
Stanislaw Gruszka353c50e2016-11-15 03:06:52 +0100838
839 if (!vtime_accounting_enabled()) {
840 *utime = t->utime;
841 *stime = t->stime;
Frederic Weisbeckere7f2be12021-10-26 16:10:55 +0200842 return false;
Stanislaw Gruszka353c50e2016-11-15 03:06:52 +0100843 }
Frederic Weisbecker6a616712012-12-16 20:00:34 +0100844
845 do {
Frederic Weisbeckere7f2be12021-10-26 16:10:55 +0200846 ret = false;
Frederic Weisbeckerbac5b6b2017-06-29 19:15:10 +0200847 seq = read_seqcount_begin(&vtime->seqcount);
Frederic Weisbecker6a616712012-12-16 20:00:34 +0100848
Stanislaw Gruszka353c50e2016-11-15 03:06:52 +0100849 *utime = t->utime;
850 *stime = t->stime;
Frederic Weisbecker6a616712012-12-16 20:00:34 +0100851
Frederic Weisbecker14faf6f2019-10-16 04:56:48 +0200852 /* Task is sleeping or idle, nothing to add */
853 if (vtime->state < VTIME_SYS)
Frederic Weisbecker6a616712012-12-16 20:00:34 +0100854 continue;
855
Frederic Weisbeckere7f2be12021-10-26 16:10:55 +0200856 ret = true;
Frederic Weisbeckerbac5b6b2017-06-29 19:15:10 +0200857 delta = vtime_delta(vtime);
Frederic Weisbecker6a616712012-12-16 20:00:34 +0100858
859 /*
Frederic Weisbeckere6d5bf32019-10-16 04:56:49 +0200860 * Task runs either in user (including guest) or kernel space,
861 * add pending nohz time to the right place.
Frederic Weisbecker6a616712012-12-16 20:00:34 +0100862 */
Frederic Weisbeckere6d5bf32019-10-16 04:56:49 +0200863 if (vtime->state == VTIME_SYS)
Wanpeng Li2a42eb92017-06-29 19:15:11 +0200864 *stime += vtime->stime + delta;
Frederic Weisbeckere6d5bf32019-10-16 04:56:49 +0200865 else
866 *utime += vtime->utime + delta;
Frederic Weisbeckerbac5b6b2017-06-29 19:15:10 +0200867 } while (read_seqcount_retry(&vtime->seqcount, seq));
Frederic Weisbeckere7f2be12021-10-26 16:10:55 +0200868
869 return ret;
Frederic Weisbecker6a616712012-12-16 20:00:34 +0100870}
Frederic Weisbecker64eea632019-10-25 04:03:03 +0200871
Chris Wilsonf1dfdab2020-01-23 19:08:49 +0100872static int vtime_state_fetch(struct vtime *vtime, int cpu)
Frederic Weisbecker74722bb2019-11-21 03:44:26 +0100873{
Chris Wilsonf1dfdab2020-01-23 19:08:49 +0100874 int state = READ_ONCE(vtime->state);
875
Frederic Weisbecker74722bb2019-11-21 03:44:26 +0100876 /*
877 * We raced against a context switch, fetch the
878 * kcpustat task again.
879 */
880 if (vtime->cpu != cpu && vtime->cpu != -1)
881 return -EAGAIN;
882
883 /*
884 * Two possible things here:
885 * 1) We are seeing the scheduling out task (prev) or any past one.
886 * 2) We are seeing the scheduling in task (next) but it hasn't
887 * passed though vtime_task_switch() yet so the pending
888 * cputime of the prev task may not be flushed yet.
889 *
890 * Case 1) is ok but 2) is not. So wait for a safe VTIME state.
891 */
Chris Wilsonf1dfdab2020-01-23 19:08:49 +0100892 if (state == VTIME_INACTIVE)
Frederic Weisbecker74722bb2019-11-21 03:44:26 +0100893 return -EAGAIN;
894
Chris Wilsonf1dfdab2020-01-23 19:08:49 +0100895 return state;
Frederic Weisbecker74722bb2019-11-21 03:44:26 +0100896}
897
Frederic Weisbecker5a1c9552019-11-21 03:44:25 +0100898static u64 kcpustat_user_vtime(struct vtime *vtime)
899{
900 if (vtime->state == VTIME_USER)
901 return vtime->utime + vtime_delta(vtime);
902 else if (vtime->state == VTIME_GUEST)
903 return vtime->gtime + vtime_delta(vtime);
904 return 0;
905}
906
Frederic Weisbecker64eea632019-10-25 04:03:03 +0200907static int kcpustat_field_vtime(u64 *cpustat,
Frederic Weisbecker5a1c9552019-11-21 03:44:25 +0100908 struct task_struct *tsk,
Frederic Weisbecker64eea632019-10-25 04:03:03 +0200909 enum cpu_usage_stat usage,
910 int cpu, u64 *val)
911{
Frederic Weisbecker5a1c9552019-11-21 03:44:25 +0100912 struct vtime *vtime = &tsk->vtime;
Frederic Weisbecker64eea632019-10-25 04:03:03 +0200913 unsigned int seq;
Frederic Weisbecker64eea632019-10-25 04:03:03 +0200914
915 do {
Chris Wilsonf1dfdab2020-01-23 19:08:49 +0100916 int state;
917
Frederic Weisbecker64eea632019-10-25 04:03:03 +0200918 seq = read_seqcount_begin(&vtime->seqcount);
919
Chris Wilsonf1dfdab2020-01-23 19:08:49 +0100920 state = vtime_state_fetch(vtime, cpu);
921 if (state < 0)
922 return state;
Frederic Weisbecker64eea632019-10-25 04:03:03 +0200923
924 *val = cpustat[usage];
925
Frederic Weisbecker5a1c9552019-11-21 03:44:25 +0100926 /*
927 * Nice VS unnice cputime accounting may be inaccurate if
928 * the nice value has changed since the last vtime update.
929 * But proper fix would involve interrupting target on nice
930 * updates which is a no go on nohz_full (although the scheduler
931 * may still interrupt the target if rescheduling is needed...)
932 */
933 switch (usage) {
934 case CPUTIME_SYSTEM:
Chris Wilsonf1dfdab2020-01-23 19:08:49 +0100935 if (state == VTIME_SYS)
Frederic Weisbecker5a1c9552019-11-21 03:44:25 +0100936 *val += vtime->stime + vtime_delta(vtime);
937 break;
938 case CPUTIME_USER:
939 if (task_nice(tsk) <= 0)
940 *val += kcpustat_user_vtime(vtime);
941 break;
942 case CPUTIME_NICE:
943 if (task_nice(tsk) > 0)
944 *val += kcpustat_user_vtime(vtime);
945 break;
946 case CPUTIME_GUEST:
Chris Wilsonf1dfdab2020-01-23 19:08:49 +0100947 if (state == VTIME_GUEST && task_nice(tsk) <= 0)
Frederic Weisbecker5a1c9552019-11-21 03:44:25 +0100948 *val += vtime->gtime + vtime_delta(vtime);
949 break;
950 case CPUTIME_GUEST_NICE:
Chris Wilsonf1dfdab2020-01-23 19:08:49 +0100951 if (state == VTIME_GUEST && task_nice(tsk) > 0)
Frederic Weisbecker5a1c9552019-11-21 03:44:25 +0100952 *val += vtime->gtime + vtime_delta(vtime);
953 break;
954 default:
955 break;
956 }
Frederic Weisbecker64eea632019-10-25 04:03:03 +0200957 } while (read_seqcount_retry(&vtime->seqcount, seq));
958
959 return 0;
960}
961
962u64 kcpustat_field(struct kernel_cpustat *kcpustat,
963 enum cpu_usage_stat usage, int cpu)
964{
965 u64 *cpustat = kcpustat->cpustat;
Borislav Petkove0d648f2020-03-27 22:43:34 +0100966 u64 val = cpustat[usage];
Frederic Weisbecker64eea632019-10-25 04:03:03 +0200967 struct rq *rq;
Frederic Weisbecker64eea632019-10-25 04:03:03 +0200968 int err;
969
970 if (!vtime_accounting_enabled_cpu(cpu))
Borislav Petkove0d648f2020-03-27 22:43:34 +0100971 return val;
Frederic Weisbecker64eea632019-10-25 04:03:03 +0200972
Frederic Weisbecker64eea632019-10-25 04:03:03 +0200973 rq = cpu_rq(cpu);
974
975 for (;;) {
976 struct task_struct *curr;
Frederic Weisbecker64eea632019-10-25 04:03:03 +0200977
978 rcu_read_lock();
979 curr = rcu_dereference(rq->curr);
980 if (WARN_ON_ONCE(!curr)) {
981 rcu_read_unlock();
982 return cpustat[usage];
983 }
984
Frederic Weisbecker5a1c9552019-11-21 03:44:25 +0100985 err = kcpustat_field_vtime(cpustat, curr, usage, cpu, &val);
Frederic Weisbecker64eea632019-10-25 04:03:03 +0200986 rcu_read_unlock();
987
988 if (!err)
989 return val;
990
991 cpu_relax();
992 }
993}
994EXPORT_SYMBOL_GPL(kcpustat_field);
Frederic Weisbecker74722bb2019-11-21 03:44:26 +0100995
996static int kcpustat_cpu_fetch_vtime(struct kernel_cpustat *dst,
997 const struct kernel_cpustat *src,
998 struct task_struct *tsk, int cpu)
999{
1000 struct vtime *vtime = &tsk->vtime;
1001 unsigned int seq;
Frederic Weisbecker74722bb2019-11-21 03:44:26 +01001002
1003 do {
1004 u64 *cpustat;
1005 u64 delta;
Chris Wilsonf1dfdab2020-01-23 19:08:49 +01001006 int state;
Frederic Weisbecker74722bb2019-11-21 03:44:26 +01001007
1008 seq = read_seqcount_begin(&vtime->seqcount);
1009
Chris Wilsonf1dfdab2020-01-23 19:08:49 +01001010 state = vtime_state_fetch(vtime, cpu);
1011 if (state < 0)
1012 return state;
Frederic Weisbecker74722bb2019-11-21 03:44:26 +01001013
1014 *dst = *src;
1015 cpustat = dst->cpustat;
1016
1017 /* Task is sleeping, dead or idle, nothing to add */
Chris Wilsonf1dfdab2020-01-23 19:08:49 +01001018 if (state < VTIME_SYS)
Frederic Weisbecker74722bb2019-11-21 03:44:26 +01001019 continue;
1020
1021 delta = vtime_delta(vtime);
1022
1023 /*
1024 * Task runs either in user (including guest) or kernel space,
1025 * add pending nohz time to the right place.
1026 */
Chris Wilsonf1dfdab2020-01-23 19:08:49 +01001027 if (state == VTIME_SYS) {
Frederic Weisbecker74722bb2019-11-21 03:44:26 +01001028 cpustat[CPUTIME_SYSTEM] += vtime->stime + delta;
Chris Wilsonf1dfdab2020-01-23 19:08:49 +01001029 } else if (state == VTIME_USER) {
Frederic Weisbecker74722bb2019-11-21 03:44:26 +01001030 if (task_nice(tsk) > 0)
1031 cpustat[CPUTIME_NICE] += vtime->utime + delta;
1032 else
1033 cpustat[CPUTIME_USER] += vtime->utime + delta;
1034 } else {
Chris Wilsonf1dfdab2020-01-23 19:08:49 +01001035 WARN_ON_ONCE(state != VTIME_GUEST);
Frederic Weisbecker74722bb2019-11-21 03:44:26 +01001036 if (task_nice(tsk) > 0) {
1037 cpustat[CPUTIME_GUEST_NICE] += vtime->gtime + delta;
1038 cpustat[CPUTIME_NICE] += vtime->gtime + delta;
1039 } else {
1040 cpustat[CPUTIME_GUEST] += vtime->gtime + delta;
1041 cpustat[CPUTIME_USER] += vtime->gtime + delta;
1042 }
1043 }
1044 } while (read_seqcount_retry(&vtime->seqcount, seq));
1045
Chris Wilsonf1dfdab2020-01-23 19:08:49 +01001046 return 0;
Frederic Weisbecker74722bb2019-11-21 03:44:26 +01001047}
1048
1049void kcpustat_cpu_fetch(struct kernel_cpustat *dst, int cpu)
1050{
1051 const struct kernel_cpustat *src = &kcpustat_cpu(cpu);
1052 struct rq *rq;
1053 int err;
1054
1055 if (!vtime_accounting_enabled_cpu(cpu)) {
1056 *dst = *src;
1057 return;
1058 }
1059
1060 rq = cpu_rq(cpu);
1061
1062 for (;;) {
1063 struct task_struct *curr;
1064
1065 rcu_read_lock();
1066 curr = rcu_dereference(rq->curr);
1067 if (WARN_ON_ONCE(!curr)) {
1068 rcu_read_unlock();
1069 *dst = *src;
1070 return;
1071 }
1072
1073 err = kcpustat_cpu_fetch_vtime(dst, src, curr, cpu);
1074 rcu_read_unlock();
1075
1076 if (!err)
1077 return;
1078
1079 cpu_relax();
1080 }
1081}
1082EXPORT_SYMBOL_GPL(kcpustat_cpu_fetch);
1083
Frederic Weisbeckerabf917c2012-07-25 07:56:04 +02001084#endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */