blob: 96b4e78104266fc6b3736de89ee20dbd49e02284 [file] [log] [blame]
Greg Kroah-Hartmanb2441312017-11-01 15:07:57 +01001// SPDX-License-Identifier: GPL-2.0
Linus Torvalds1da177e2005-04-16 15:20:36 -07002/*
3 * Implement CPU time clocks for the POSIX clock interface.
4 */
5
Ingo Molnar3f07c012017-02-08 18:51:30 +01006#include <linux/sched/signal.h>
Ingo Molnar32ef5512017-02-05 11:48:36 +01007#include <linux/sched/cputime.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -07008#include <linux/posix-timers.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -07009#include <linux/errno.h>
Roman Zippelf8bd2252008-05-01 04:34:31 -070010#include <linux/math64.h>
Linus Torvalds7c0f6ba2016-12-24 11:46:01 -080011#include <linux/uaccess.h>
Frank Mayharbb34d922008-09-12 09:54:39 -070012#include <linux/kernel_stat.h>
Xiao Guangrong3f0a5252009-08-10 10:52:30 +080013#include <trace/events/timer.h>
Frederic Weisbeckera8572162013-04-18 01:31:13 +020014#include <linux/tick.h>
15#include <linux/workqueue.h>
Al Viroedbeda42017-06-07 09:42:31 +010016#include <linux/compat.h>
Juri Lelli34be3932017-12-12 12:10:24 +010017#include <linux/sched/deadline.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070018
Thomas Gleixnerbab0aae2017-05-30 23:15:41 +020019#include "posix-timers.h"
20
Thomas Gleixnerf37fb0a2017-05-30 23:15:47 +020021static void posix_cpu_timer_rearm(struct k_itimer *timer);
22
Thomas Gleixner3a245c02019-08-21 21:09:06 +020023void posix_cputimers_group_init(struct posix_cputimers *pct, u64 cpu_limit)
24{
25 posix_cputimers_init(pct);
Thomas Gleixner244d49e2019-08-21 21:09:24 +020026 if (cpu_limit != RLIM_INFINITY) {
Thomas Gleixner87dc6442019-08-26 20:22:24 +020027 pct->bases[CPUCLOCK_PROF].nextevt = cpu_limit * NSEC_PER_SEC;
Thomas Gleixner244d49e2019-08-21 21:09:24 +020028 pct->timers_active = true;
29 }
Thomas Gleixner3a245c02019-08-21 21:09:06 +020030}
31
Frank Mayharf06febc2008-09-12 09:54:39 -070032/*
Stanislaw Gruszkaf55db602010-03-11 14:04:37 -080033 * Called after updating RLIMIT_CPU to run cpu timer and update
Thomas Gleixner87dc6442019-08-26 20:22:24 +020034 * tsk->signal->posix_cputimers.bases[clock].nextevt expiration cache if
35 * necessary. Needs siglock protection since other code may update the
Thomas Gleixner3a245c02019-08-21 21:09:06 +020036 * expiration cache as well.
Frank Mayharf06febc2008-09-12 09:54:39 -070037 */
Jiri Slaby5ab46b32009-08-28 14:05:12 +020038void update_rlimit_cpu(struct task_struct *task, unsigned long rlim_new)
Frank Mayharf06febc2008-09-12 09:54:39 -070039{
Frederic Weisbecker858cf3a2017-01-31 04:09:35 +010040 u64 nsecs = rlim_new * NSEC_PER_SEC;
Frank Mayharf06febc2008-09-12 09:54:39 -070041
Jiri Slaby5ab46b32009-08-28 14:05:12 +020042 spin_lock_irq(&task->sighand->siglock);
Frederic Weisbecker858cf3a2017-01-31 04:09:35 +010043 set_process_cpu_timer(task, CPUCLOCK_PROF, &nsecs, NULL);
Jiri Slaby5ab46b32009-08-28 14:05:12 +020044 spin_unlock_irq(&task->sighand->siglock);
Frank Mayharf06febc2008-09-12 09:54:39 -070045}
46
Thomas Gleixner6ae40e32019-08-21 21:08:48 +020047/*
48 * Functions for validating access to tasks.
49 */
Eric W. Biederman96498772020-04-27 07:55:07 -050050static struct pid *pid_for_clock(const clockid_t clock, bool gettime)
Linus Torvalds1da177e2005-04-16 15:20:36 -070051{
Eric W. Biederman96498772020-04-27 07:55:07 -050052 const bool thread = !!CPUCLOCK_PERTHREAD(clock);
53 const pid_t upid = CPUCLOCK_PID(clock);
54 struct pid *pid;
55
56 if (CPUCLOCK_WHICH(clock) >= CPUCLOCK_MAX)
57 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -070058
Thomas Gleixner77b4b542019-09-05 23:15:08 +020059 /*
60 * If the encoded PID is 0, then the timer is targeted at current
61 * or the process to which current belongs.
62 */
Eric W. Biederman96498772020-04-27 07:55:07 -050063 if (upid == 0)
64 return thread ? task_pid(current) : task_tgid(current);
65
66 pid = find_vpid(upid);
Thomas Gleixner6ae40e32019-08-21 21:08:48 +020067 if (!pid)
Thomas Gleixner6ae40e32019-08-21 21:08:48 +020068 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -070069
Eric W. Biederman96498772020-04-27 07:55:07 -050070 if (thread) {
71 struct task_struct *tsk = pid_task(pid, PIDTYPE_PID);
72 return (tsk && same_thread_group(tsk, current)) ? pid : NULL;
73 }
Linus Torvalds1da177e2005-04-16 15:20:36 -070074
Eric W. Biederman96498772020-04-27 07:55:07 -050075 /*
76 * For clock_gettime(PROCESS) allow finding the process by
77 * with the pid of the current task. The code needs the tgid
78 * of the process so that pid_task(pid, PIDTYPE_TGID) can be
79 * used to find the process.
80 */
81 if (gettime && (pid == task_pid(current)))
82 return task_tgid(current);
Thomas Gleixner77b4b542019-09-05 23:15:08 +020083
Eric W. Biederman96498772020-04-27 07:55:07 -050084 /*
85 * For processes require that pid identifies a process.
86 */
87 return pid_has_task(pid, PIDTYPE_TGID) ? pid : NULL;
Thomas Gleixner6ae40e32019-08-21 21:08:48 +020088}
89
90static inline int validate_clock_permissions(const clockid_t clock)
91{
Eric W. Biederman9bf7c322020-04-25 18:38:54 -050092 int ret;
93
94 rcu_read_lock();
Eric W. Biederman96498772020-04-27 07:55:07 -050095 ret = pid_for_clock(clock, false) ? 0 : -EINVAL;
Eric W. Biederman9bf7c322020-04-25 18:38:54 -050096 rcu_read_unlock();
97
98 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -070099}
100
Eric W. Biedermanfece98262020-04-27 09:38:29 -0500101static inline enum pid_type clock_pid_type(const clockid_t clock)
Eric W. Biederman55e8c8e2020-02-28 11:11:06 -0600102{
Eric W. Biedermanfece98262020-04-27 09:38:29 -0500103 return CPUCLOCK_PERTHREAD(clock) ? PIDTYPE_PID : PIDTYPE_TGID;
Eric W. Biederman55e8c8e2020-02-28 11:11:06 -0600104}
105
106static inline struct task_struct *cpu_timer_task_rcu(struct k_itimer *timer)
107{
Eric W. Biedermanfece98262020-04-27 09:38:29 -0500108 return pid_task(timer->it.cpu.pid, clock_pid_type(timer->it_clock));
Eric W. Biederman55e8c8e2020-02-28 11:11:06 -0600109}
110
Linus Torvalds1da177e2005-04-16 15:20:36 -0700111/*
112 * Update expiry time from increment, and increase overrun count,
113 * given the current clock sample.
114 */
Thomas Gleixner60bda032019-08-27 21:31:02 +0200115static u64 bump_cpu_timer(struct k_itimer *timer, u64 now)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700116{
Thomas Gleixner60bda032019-08-27 21:31:02 +0200117 u64 delta, incr, expires = timer->it.cpu.node.expires;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700118 int i;
119
Thomas Gleixner16118792019-01-11 14:33:17 +0100120 if (!timer->it_interval)
Thomas Gleixner60bda032019-08-27 21:31:02 +0200121 return expires;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700122
Thomas Gleixner60bda032019-08-27 21:31:02 +0200123 if (now < expires)
124 return expires;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700125
Thomas Gleixner16118792019-01-11 14:33:17 +0100126 incr = timer->it_interval;
Thomas Gleixner60bda032019-08-27 21:31:02 +0200127 delta = now + incr - expires;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700128
Frederic Weisbecker55ccb612013-06-28 00:06:42 +0000129 /* Don't use (incr*2 < delta), incr*2 might overflow. */
130 for (i = 0; incr < delta - incr; i++)
131 incr = incr << 1;
132
133 for (; i >= 0; incr >>= 1, i--) {
134 if (delta < incr)
135 continue;
136
Thomas Gleixner60bda032019-08-27 21:31:02 +0200137 timer->it.cpu.node.expires += incr;
Thomas Gleixner78c9c4d2018-06-26 15:21:32 +0200138 timer->it_overrun += 1LL << i;
Frederic Weisbecker55ccb612013-06-28 00:06:42 +0000139 delta -= incr;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700140 }
Thomas Gleixner60bda032019-08-27 21:31:02 +0200141 return timer->it.cpu.node.expires;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700142}
143
Thomas Gleixner2bbdbda2019-08-21 21:09:19 +0200144/* Check whether all cache entries contain U64_MAX, i.e. eternal expiry time */
145static inline bool expiry_cache_is_inactive(const struct posix_cputimers *pct)
Frederic Weisbecker555347f2013-04-19 16:17:38 +0200146{
Thomas Gleixner2bbdbda2019-08-21 21:09:19 +0200147 return !(~pct->bases[CPUCLOCK_PROF].nextevt |
148 ~pct->bases[CPUCLOCK_VIRT].nextevt |
149 ~pct->bases[CPUCLOCK_SCHED].nextevt);
Frederic Weisbecker555347f2013-04-19 16:17:38 +0200150}
151
Thomas Gleixnerbc2c8ea2011-02-01 13:52:12 +0000152static int
Deepa Dinamanid2e3e0c2017-03-26 12:04:15 -0700153posix_cpu_clock_getres(const clockid_t which_clock, struct timespec64 *tp)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700154{
Thomas Gleixner6ae40e32019-08-21 21:08:48 +0200155 int error = validate_clock_permissions(which_clock);
156
Linus Torvalds1da177e2005-04-16 15:20:36 -0700157 if (!error) {
158 tp->tv_sec = 0;
159 tp->tv_nsec = ((NSEC_PER_SEC + HZ - 1) / HZ);
160 if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) {
161 /*
162 * If sched_clock is using a cycle counter, we
163 * don't have any idea of its true resolution
164 * exported, but it is much more than 1s/HZ.
165 */
166 tp->tv_nsec = 1;
167 }
168 }
169 return error;
170}
171
Thomas Gleixnerbc2c8ea2011-02-01 13:52:12 +0000172static int
Thomas Gleixner6ae40e32019-08-21 21:08:48 +0200173posix_cpu_clock_set(const clockid_t clock, const struct timespec64 *tp)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700174{
Thomas Gleixner6ae40e32019-08-21 21:08:48 +0200175 int error = validate_clock_permissions(clock);
176
Linus Torvalds1da177e2005-04-16 15:20:36 -0700177 /*
178 * You can never reset a CPU clock, but we check for other errors
179 * in the call before failing with EPERM.
180 */
Thomas Gleixner6ae40e32019-08-21 21:08:48 +0200181 return error ? : -EPERM;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700182}
183
Linus Torvalds1da177e2005-04-16 15:20:36 -0700184/*
Thomas Gleixner2092c1d42019-08-21 21:09:00 +0200185 * Sample a per-thread clock for the given task. clkid is validated.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700186 */
Thomas Gleixner8c2d74f2019-08-21 21:09:01 +0200187static u64 cpu_clock_sample(const clockid_t clkid, struct task_struct *p)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700188{
Thomas Gleixnerab693c52019-08-21 21:09:03 +0200189 u64 utime, stime;
190
191 if (clkid == CPUCLOCK_SCHED)
192 return task_sched_runtime(p);
193
194 task_cputime(p, &utime, &stime);
195
Thomas Gleixner2092c1d42019-08-21 21:09:00 +0200196 switch (clkid) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700197 case CPUCLOCK_PROF:
Thomas Gleixnerab693c52019-08-21 21:09:03 +0200198 return utime + stime;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700199 case CPUCLOCK_VIRT:
Thomas Gleixnerab693c52019-08-21 21:09:03 +0200200 return utime;
Thomas Gleixner2092c1d42019-08-21 21:09:00 +0200201 default:
202 WARN_ON_ONCE(1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700203 }
Thomas Gleixner8c2d74f2019-08-21 21:09:01 +0200204 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700205}
206
Thomas Gleixnerb0d524f2019-08-21 21:09:12 +0200207static inline void store_samples(u64 *samples, u64 stime, u64 utime, u64 rtime)
208{
209 samples[CPUCLOCK_PROF] = stime + utime;
210 samples[CPUCLOCK_VIRT] = utime;
211 samples[CPUCLOCK_SCHED] = rtime;
212}
213
214static void task_sample_cputime(struct task_struct *p, u64 *samples)
215{
216 u64 stime, utime;
217
218 task_cputime(p, &utime, &stime);
219 store_samples(samples, stime, utime, p->se.sum_exec_runtime);
220}
221
222static void proc_sample_cputime_atomic(struct task_cputime_atomic *at,
223 u64 *samples)
224{
225 u64 stime, utime, rtime;
226
227 utime = atomic64_read(&at->utime);
228 stime = atomic64_read(&at->stime);
229 rtime = atomic64_read(&at->sum_exec_runtime);
230 store_samples(samples, stime, utime, rtime);
231}
232
Jason Low10180162015-04-28 13:00:22 -0700233/*
234 * Set cputime to sum_cputime if sum_cputime > cputime. Use cmpxchg
235 * to avoid race conditions with concurrent updates to cputime.
236 */
237static inline void __update_gt_cputime(atomic64_t *cputime, u64 sum_cputime)
Peter Zijlstra4da94d492009-02-11 11:30:27 +0100238{
Jason Low10180162015-04-28 13:00:22 -0700239 u64 curr_cputime;
240retry:
241 curr_cputime = atomic64_read(cputime);
242 if (sum_cputime > curr_cputime) {
243 if (atomic64_cmpxchg(cputime, curr_cputime, sum_cputime) != curr_cputime)
244 goto retry;
245 }
246}
Peter Zijlstra4da94d492009-02-11 11:30:27 +0100247
Thomas Gleixnerb7be4ef2019-08-21 21:09:16 +0200248static void update_gt_cputime(struct task_cputime_atomic *cputime_atomic,
249 struct task_cputime *sum)
Jason Low10180162015-04-28 13:00:22 -0700250{
Jason Low71107442015-04-28 13:00:24 -0700251 __update_gt_cputime(&cputime_atomic->utime, sum->utime);
252 __update_gt_cputime(&cputime_atomic->stime, sum->stime);
253 __update_gt_cputime(&cputime_atomic->sum_exec_runtime, sum->sum_exec_runtime);
Jason Low10180162015-04-28 13:00:22 -0700254}
Peter Zijlstra4da94d492009-02-11 11:30:27 +0100255
Thomas Gleixner19298fb2019-08-21 21:08:51 +0200256/**
257 * thread_group_sample_cputime - Sample cputime for a given task
258 * @tsk: Task for which cputime needs to be started
Yi Wang7f2cbcb2019-10-21 15:44:12 +0800259 * @samples: Storage for time samples
Thomas Gleixner19298fb2019-08-21 21:08:51 +0200260 *
261 * Called from sys_getitimer() to calculate the expiry time of an active
262 * timer. That means group cputime accounting is already active. Called
263 * with task sighand lock held.
264 *
265 * Updates @times with an uptodate sample of the thread group cputimes.
266 */
Thomas Gleixnerb7be4ef2019-08-21 21:09:16 +0200267void thread_group_sample_cputime(struct task_struct *tsk, u64 *samples)
Thomas Gleixner19298fb2019-08-21 21:08:51 +0200268{
269 struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
Thomas Gleixner244d49e2019-08-21 21:09:24 +0200270 struct posix_cputimers *pct = &tsk->signal->posix_cputimers;
Thomas Gleixner19298fb2019-08-21 21:08:51 +0200271
Thomas Gleixner244d49e2019-08-21 21:09:24 +0200272 WARN_ON_ONCE(!pct->timers_active);
Thomas Gleixner19298fb2019-08-21 21:08:51 +0200273
Thomas Gleixnerb7be4ef2019-08-21 21:09:16 +0200274 proc_sample_cputime_atomic(&cputimer->cputime_atomic, samples);
Thomas Gleixner19298fb2019-08-21 21:08:51 +0200275}
276
Thomas Gleixnerc506bef42019-08-21 21:08:54 +0200277/**
278 * thread_group_start_cputime - Start cputime and return a sample
279 * @tsk: Task for which cputime needs to be started
Thomas Gleixnerb7be4ef2019-08-21 21:09:16 +0200280 * @samples: Storage for time samples
Thomas Gleixnerc506bef42019-08-21 21:08:54 +0200281 *
Ingo Molnar4bf07f62021-03-22 22:39:03 +0100282 * The thread group cputime accounting is avoided when there are no posix
Thomas Gleixnerc506bef42019-08-21 21:08:54 +0200283 * CPU timers armed. Before starting a timer it's required to check whether
284 * the time accounting is active. If not, a full update of the atomic
285 * accounting store needs to be done and the accounting enabled.
286 *
287 * Updates @times with an uptodate sample of the thread group cputimes.
288 */
Thomas Gleixnerb7be4ef2019-08-21 21:09:16 +0200289static void thread_group_start_cputime(struct task_struct *tsk, u64 *samples)
Peter Zijlstra4da94d492009-02-11 11:30:27 +0100290{
291 struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
Thomas Gleixner244d49e2019-08-21 21:09:24 +0200292 struct posix_cputimers *pct = &tsk->signal->posix_cputimers;
Peter Zijlstra4da94d492009-02-11 11:30:27 +0100293
Frederic Weisbeckera5dec9f2021-07-26 14:55:08 +0200294 lockdep_assert_task_sighand_held(tsk);
295
Jason Low10180162015-04-28 13:00:22 -0700296 /* Check if cputimer isn't running. This is accessed without locking. */
Thomas Gleixner244d49e2019-08-21 21:09:24 +0200297 if (!READ_ONCE(pct->timers_active)) {
Thomas Gleixnerb7be4ef2019-08-21 21:09:16 +0200298 struct task_cputime sum;
299
Peter Zijlstra4da94d492009-02-11 11:30:27 +0100300 /*
301 * The POSIX timer interface allows for absolute time expiry
302 * values through the TIMER_ABSTIME flag, therefore we have
Jason Low10180162015-04-28 13:00:22 -0700303 * to synchronize the timer to the clock every time we start it.
Peter Zijlstra4da94d492009-02-11 11:30:27 +0100304 */
Frederic Weisbeckerebd7e7f2017-01-31 04:09:34 +0100305 thread_group_cputime(tsk, &sum);
Jason Low71107442015-04-28 13:00:24 -0700306 update_gt_cputime(&cputimer->cputime_atomic, &sum);
Jason Low10180162015-04-28 13:00:22 -0700307
308 /*
Thomas Gleixner244d49e2019-08-21 21:09:24 +0200309 * We're setting timers_active without a lock. Ensure this
310 * only gets written to in one operation. We set it after
311 * update_gt_cputime() as a small optimization, but
312 * barriers are not required because update_gt_cputime()
Jason Low10180162015-04-28 13:00:22 -0700313 * can handle concurrent updates.
314 */
Thomas Gleixner244d49e2019-08-21 21:09:24 +0200315 WRITE_ONCE(pct->timers_active, true);
Jason Low10180162015-04-28 13:00:22 -0700316 }
Thomas Gleixnerb7be4ef2019-08-21 21:09:16 +0200317 proc_sample_cputime_atomic(&cputimer->cputime_atomic, samples);
318}
319
320static void __thread_group_cputime(struct task_struct *tsk, u64 *samples)
321{
322 struct task_cputime ct;
323
324 thread_group_cputime(tsk, &ct);
325 store_samples(samples, ct.stime, ct.utime, ct.sum_exec_runtime);
Peter Zijlstra4da94d492009-02-11 11:30:27 +0100326}
327
Linus Torvalds1da177e2005-04-16 15:20:36 -0700328/*
Thomas Gleixner24ab7f52019-08-21 21:08:55 +0200329 * Sample a process (thread group) clock for the given task clkid. If the
330 * group's cputime accounting is already enabled, read the atomic
Eric W. Biedermana2efdbf2020-02-28 11:08:45 -0600331 * store. Otherwise a full update is required. clkid is already validated.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700332 */
Thomas Gleixner8c2d74f2019-08-21 21:09:01 +0200333static u64 cpu_clock_sample_group(const clockid_t clkid, struct task_struct *p,
334 bool start)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700335{
Thomas Gleixner24ab7f52019-08-21 21:08:55 +0200336 struct thread_group_cputimer *cputimer = &p->signal->cputimer;
Thomas Gleixner244d49e2019-08-21 21:09:24 +0200337 struct posix_cputimers *pct = &p->signal->posix_cputimers;
Thomas Gleixnerb7be4ef2019-08-21 21:09:16 +0200338 u64 samples[CPUCLOCK_MAX];
Frank Mayharbb34d922008-09-12 09:54:39 -0700339
Thomas Gleixner244d49e2019-08-21 21:09:24 +0200340 if (!READ_ONCE(pct->timers_active)) {
Thomas Gleixner24ab7f52019-08-21 21:08:55 +0200341 if (start)
Thomas Gleixnerb7be4ef2019-08-21 21:09:16 +0200342 thread_group_start_cputime(p, samples);
Thomas Gleixner24ab7f52019-08-21 21:08:55 +0200343 else
Thomas Gleixnerb7be4ef2019-08-21 21:09:16 +0200344 __thread_group_cputime(p, samples);
Thomas Gleixner24ab7f52019-08-21 21:08:55 +0200345 } else {
Thomas Gleixnerb7be4ef2019-08-21 21:09:16 +0200346 proc_sample_cputime_atomic(&cputimer->cputime_atomic, samples);
Thomas Gleixner24ab7f52019-08-21 21:08:55 +0200347 }
348
Thomas Gleixnerb7be4ef2019-08-21 21:09:16 +0200349 return samples[clkid];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700350}
351
Thomas Gleixnerbfcf3e92019-08-21 21:08:49 +0200352static int posix_cpu_clock_get(const clockid_t clock, struct timespec64 *tp)
Frederic Weisbecker33ab0fe2013-10-11 17:41:11 +0200353{
Thomas Gleixnerbfcf3e92019-08-21 21:08:49 +0200354 const clockid_t clkid = CPUCLOCK_WHICH(clock);
355 struct task_struct *tsk;
356 u64 t;
Frederic Weisbecker33ab0fe2013-10-11 17:41:11 +0200357
Eric W. Biederman9bf7c322020-04-25 18:38:54 -0500358 rcu_read_lock();
Eric W. Biederman96498772020-04-27 07:55:07 -0500359 tsk = pid_task(pid_for_clock(clock, true), clock_pid_type(clock));
Eric W. Biederman9bf7c322020-04-25 18:38:54 -0500360 if (!tsk) {
361 rcu_read_unlock();
Thomas Gleixnerbfcf3e92019-08-21 21:08:49 +0200362 return -EINVAL;
Eric W. Biederman9bf7c322020-04-25 18:38:54 -0500363 }
Frederic Weisbecker33ab0fe2013-10-11 17:41:11 +0200364
Thomas Gleixnerbfcf3e92019-08-21 21:08:49 +0200365 if (CPUCLOCK_PERTHREAD(clock))
Thomas Gleixner8c2d74f2019-08-21 21:09:01 +0200366 t = cpu_clock_sample(clkid, tsk);
Thomas Gleixnerbfcf3e92019-08-21 21:08:49 +0200367 else
Thomas Gleixner8c2d74f2019-08-21 21:09:01 +0200368 t = cpu_clock_sample_group(clkid, tsk, false);
Eric W. Biederman9bf7c322020-04-25 18:38:54 -0500369 rcu_read_unlock();
Frederic Weisbecker33ab0fe2013-10-11 17:41:11 +0200370
Thomas Gleixnerbfcf3e92019-08-21 21:08:49 +0200371 *tp = ns_to_timespec64(t);
372 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700373}
374
Linus Torvalds1da177e2005-04-16 15:20:36 -0700375/*
376 * Validate the clockid_t for a new CPU-clock timer, and initialize the timer.
Stanislaw Gruszkaba5ea952009-11-17 14:14:13 -0800377 * This is called from sys_timer_create() and do_cpu_nanosleep() with the
378 * new timer already all-zeros initialized.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700379 */
Thomas Gleixnerbc2c8ea2011-02-01 13:52:12 +0000380static int posix_cpu_timer_create(struct k_itimer *new_timer)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700381{
Thomas Gleixner1fb497d2020-07-30 12:14:06 +0200382 static struct lock_class_key posix_cpu_timers_key;
Eric W. Biederman96498772020-04-27 07:55:07 -0500383 struct pid *pid;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700384
Eric W. Biederman9bf7c322020-04-25 18:38:54 -0500385 rcu_read_lock();
Eric W. Biederman96498772020-04-27 07:55:07 -0500386 pid = pid_for_clock(new_timer->it_clock, false);
387 if (!pid) {
Eric W. Biederman9bf7c322020-04-25 18:38:54 -0500388 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700389 return -EINVAL;
Eric W. Biederman9bf7c322020-04-25 18:38:54 -0500390 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700391
Thomas Gleixner1fb497d2020-07-30 12:14:06 +0200392 /*
393 * If posix timer expiry is handled in task work context then
394 * timer::it_lock can be taken without disabling interrupts as all
Ingo Molnar4bf07f62021-03-22 22:39:03 +0100395 * other locking happens in task context. This requires a separate
Thomas Gleixner1fb497d2020-07-30 12:14:06 +0200396 * lock class key otherwise regular posix timer expiry would record
397 * the lock class being taken in interrupt context and generate a
398 * false positive warning.
399 */
400 if (IS_ENABLED(CONFIG_POSIX_CPU_TIMERS_TASK_WORK))
401 lockdep_set_class(&new_timer->it_lock, &posix_cpu_timers_key);
402
Thomas Gleixnerd97bb752017-05-30 23:15:44 +0200403 new_timer->kclock = &clock_posix_cpu;
Thomas Gleixner60bda032019-08-27 21:31:02 +0200404 timerqueue_init(&new_timer->it.cpu.node);
Eric W. Biederman96498772020-04-27 07:55:07 -0500405 new_timer->it.cpu.pid = get_pid(pid);
Eric W. Biederman9bf7c322020-04-25 18:38:54 -0500406 rcu_read_unlock();
Thomas Gleixnere5a8b652019-08-21 21:08:50 +0200407 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700408}
409
Frederic Weisbecker5c8f23e2021-07-26 14:55:12 +0200410static struct posix_cputimer_base *timer_base(struct k_itimer *timer,
411 struct task_struct *tsk)
412{
413 int clkidx = CPUCLOCK_WHICH(timer->it_clock);
414
415 if (CPUCLOCK_PERTHREAD(timer->it_clock))
416 return tsk->posix_cputimers.bases + clkidx;
417 else
418 return tsk->signal->posix_cputimers.bases + clkidx;
419}
420
Linus Torvalds1da177e2005-04-16 15:20:36 -0700421/*
Frederic Weisbeckeree375322021-07-26 14:55:13 +0200422 * Force recalculating the base earliest expiration on the next tick.
423 * This will also re-evaluate the need to keep around the process wide
424 * cputime counter and tick dependency and eventually shut these down
425 * if necessary.
426 */
427static void trigger_base_recalc_expires(struct k_itimer *timer,
428 struct task_struct *tsk)
429{
430 struct posix_cputimer_base *base = timer_base(timer, tsk);
431
432 base->nextevt = 0;
433}
434
435/*
Frederic Weisbecker175cc3a2021-07-26 14:55:09 +0200436 * Dequeue the timer and reset the base if it was its earliest expiration.
437 * It makes sure the next tick recalculates the base next expiration so we
438 * don't keep the costly process wide cputime counter around for a random
439 * amount of time, along with the tick dependency.
440 *
441 * If another timer gets queued between this and the next tick, its
442 * expiration will update the base next event if necessary on the next
443 * tick.
444 */
445static void disarm_timer(struct k_itimer *timer, struct task_struct *p)
446{
447 struct cpu_timer *ctmr = &timer->it.cpu;
448 struct posix_cputimer_base *base;
Frederic Weisbecker175cc3a2021-07-26 14:55:09 +0200449
450 if (!cpu_timer_dequeue(ctmr))
451 return;
452
Frederic Weisbecker5c8f23e2021-07-26 14:55:12 +0200453 base = timer_base(timer, p);
Frederic Weisbecker175cc3a2021-07-26 14:55:09 +0200454 if (cpu_timer_getexpires(ctmr) == base->nextevt)
Frederic Weisbeckeree375322021-07-26 14:55:13 +0200455 trigger_base_recalc_expires(timer, p);
Frederic Weisbecker175cc3a2021-07-26 14:55:09 +0200456}
457
458
459/*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700460 * Clean up a CPU-clock timer that is about to be destroyed.
461 * This is called from timer deletion with the timer already locked.
462 * If we return TIMER_RETRY, it's necessary to release the timer's lock
463 * and try again. (This happens when the timer is in the middle of firing.)
464 */
Thomas Gleixnerbc2c8ea2011-02-01 13:52:12 +0000465static int posix_cpu_timer_del(struct k_itimer *timer)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700466{
Thomas Gleixner60bda032019-08-27 21:31:02 +0200467 struct cpu_timer *ctmr = &timer->it.cpu;
Frederic Weisbecker3d7a1422013-10-11 17:41:11 +0200468 struct sighand_struct *sighand;
Eric W. Biederman55e8c8e2020-02-28 11:11:06 -0600469 struct task_struct *p;
Thomas Gleixner60bda032019-08-27 21:31:02 +0200470 unsigned long flags;
471 int ret = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700472
Eric W. Biederman55e8c8e2020-02-28 11:11:06 -0600473 rcu_read_lock();
474 p = cpu_timer_task_rcu(timer);
475 if (!p)
476 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700477
Frederic Weisbecker3d7a1422013-10-11 17:41:11 +0200478 /*
479 * Protect against sighand release/switch in exit/exec and process/
480 * thread timer list entry concurrent read/writes.
481 */
482 sighand = lock_task_sighand(p, &flags);
483 if (unlikely(sighand == NULL)) {
Frederic Weisbeckera3222f82013-10-11 00:37:39 +0200484 /*
Thomas Gleixner60bda032019-08-27 21:31:02 +0200485 * This raced with the reaping of the task. The exit cleanup
486 * should have removed this timer from the timer queue.
Frederic Weisbeckera3222f82013-10-11 00:37:39 +0200487 */
Thomas Gleixner60bda032019-08-27 21:31:02 +0200488 WARN_ON_ONCE(ctmr->head || timerqueue_node_queued(&ctmr->node));
Frederic Weisbeckera3222f82013-10-11 00:37:39 +0200489 } else {
Frederic Weisbeckera3222f82013-10-11 00:37:39 +0200490 if (timer->it.cpu.firing)
491 ret = TIMER_RETRY;
492 else
Frederic Weisbecker175cc3a2021-07-26 14:55:09 +0200493 disarm_timer(timer, p);
Frederic Weisbecker3d7a1422013-10-11 17:41:11 +0200494
495 unlock_task_sighand(p, &flags);
Oleg Nesterov108150e2005-10-23 20:25:39 +0400496 }
Frederic Weisbeckera3222f82013-10-11 00:37:39 +0200497
Eric W. Biederman55e8c8e2020-02-28 11:11:06 -0600498out:
499 rcu_read_unlock();
Frederic Weisbeckera3222f82013-10-11 00:37:39 +0200500 if (!ret)
Eric W. Biederman55e8c8e2020-02-28 11:11:06 -0600501 put_pid(ctmr->pid);
Oleg Nesterov108150e2005-10-23 20:25:39 +0400502
503 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700504}
505
Thomas Gleixner60bda032019-08-27 21:31:02 +0200506static void cleanup_timerqueue(struct timerqueue_head *head)
Frederic Weisbecker1a7fa512013-06-28 00:06:42 +0000507{
Thomas Gleixner60bda032019-08-27 21:31:02 +0200508 struct timerqueue_node *node;
509 struct cpu_timer *ctmr;
Frederic Weisbecker1a7fa512013-06-28 00:06:42 +0000510
Thomas Gleixner60bda032019-08-27 21:31:02 +0200511 while ((node = timerqueue_getnext(head))) {
512 timerqueue_del(head, node);
513 ctmr = container_of(node, struct cpu_timer, node);
514 ctmr->head = NULL;
515 }
Frederic Weisbecker1a7fa512013-06-28 00:06:42 +0000516}
517
Linus Torvalds1da177e2005-04-16 15:20:36 -0700518/*
Thomas Gleixner7cb9a942019-08-19 16:31:45 +0200519 * Clean out CPU timers which are still armed when a thread exits. The
520 * timers are only removed from the list. No other updates are done. The
521 * corresponding posix timers are still accessible, but cannot be rearmed.
522 *
Linus Torvalds1da177e2005-04-16 15:20:36 -0700523 * This must be called with the siglock held.
524 */
Thomas Gleixner2b699422019-08-21 21:09:04 +0200525static void cleanup_timers(struct posix_cputimers *pct)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700526{
Thomas Gleixner60bda032019-08-27 21:31:02 +0200527 cleanup_timerqueue(&pct->bases[CPUCLOCK_PROF].tqhead);
528 cleanup_timerqueue(&pct->bases[CPUCLOCK_VIRT].tqhead);
529 cleanup_timerqueue(&pct->bases[CPUCLOCK_SCHED].tqhead);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700530}
531
532/*
533 * These are both called with the siglock held, when the current thread
534 * is being reaped. When the final (leader) thread in the group is reaped,
535 * posix_cpu_timers_exit_group will be called after posix_cpu_timers_exit.
536 */
537void posix_cpu_timers_exit(struct task_struct *tsk)
538{
Thomas Gleixner2b699422019-08-21 21:09:04 +0200539 cleanup_timers(&tsk->posix_cputimers);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700540}
541void posix_cpu_timers_exit_group(struct task_struct *tsk)
542{
Thomas Gleixner2b699422019-08-21 21:09:04 +0200543 cleanup_timers(&tsk->signal->posix_cputimers);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700544}
545
Linus Torvalds1da177e2005-04-16 15:20:36 -0700546/*
547 * Insert the timer on the appropriate list before any timers that
Frederic Weisbeckere73d84e2013-10-11 18:56:49 +0200548 * expire later. This must be called with the sighand lock held.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700549 */
Eric W. Biedermanbeb41d92020-02-28 11:09:46 -0600550static void arm_timer(struct k_itimer *timer, struct task_struct *p)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700551{
Frederic Weisbecker5c8f23e2021-07-26 14:55:12 +0200552 struct posix_cputimer_base *base = timer_base(timer, p);
Thomas Gleixner60bda032019-08-27 21:31:02 +0200553 struct cpu_timer *ctmr = &timer->it.cpu;
554 u64 newexp = cpu_timer_getexpires(ctmr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700555
Thomas Gleixner60bda032019-08-27 21:31:02 +0200556 if (!cpu_timer_enqueue(&base->tqhead, ctmr))
Thomas Gleixner3b495b22019-08-21 21:09:08 +0200557 return;
Stanislaw Gruszka5eb9aa62010-03-11 14:04:38 -0800558
Thomas Gleixner3b495b22019-08-21 21:09:08 +0200559 /*
560 * We are the new earliest-expiring POSIX 1.b timer, hence
561 * need to update expiration cache. Take into account that
562 * for process timers we share expiration cache with itimers
563 * and RLIMIT_CPU and for thread timers with RLIMIT_RTTIME.
564 */
Thomas Gleixner2bbdbda2019-08-21 21:09:19 +0200565 if (newexp < base->nextevt)
Thomas Gleixner87dc6442019-08-26 20:22:24 +0200566 base->nextevt = newexp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700567
Thomas Gleixner3b495b22019-08-21 21:09:08 +0200568 if (CPUCLOCK_PERTHREAD(timer->it_clock))
569 tick_dep_set_task(p, TICK_DEP_BIT_POSIX_TIMER);
570 else
Marcelo Tosatti1e4ca262021-05-13 01:29:21 +0200571 tick_dep_set_signal(p, TICK_DEP_BIT_POSIX_TIMER);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700572}
573
574/*
575 * The timer is locked, fire it and arrange for its reload.
576 */
577static void cpu_timer_fire(struct k_itimer *timer)
578{
Thomas Gleixner60bda032019-08-27 21:31:02 +0200579 struct cpu_timer *ctmr = &timer->it.cpu;
580
Stanislaw Gruszka1f169f82010-03-11 14:04:41 -0800581 if ((timer->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE) {
582 /*
583 * User don't want any signal.
584 */
Thomas Gleixner60bda032019-08-27 21:31:02 +0200585 cpu_timer_setexpires(ctmr, 0);
Stanislaw Gruszka1f169f82010-03-11 14:04:41 -0800586 } else if (unlikely(timer->sigq == NULL)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700587 /*
588 * This a special case for clock_nanosleep,
589 * not a normal timer from sys_timer_create.
590 */
591 wake_up_process(timer->it_process);
Thomas Gleixner60bda032019-08-27 21:31:02 +0200592 cpu_timer_setexpires(ctmr, 0);
Thomas Gleixner16118792019-01-11 14:33:17 +0100593 } else if (!timer->it_interval) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700594 /*
595 * One-shot timer. Clear it as soon as it's fired.
596 */
597 posix_timer_event(timer, 0);
Thomas Gleixner60bda032019-08-27 21:31:02 +0200598 cpu_timer_setexpires(ctmr, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700599 } else if (posix_timer_event(timer, ++timer->it_requeue_pending)) {
600 /*
601 * The signal did not get queued because the signal
602 * was ignored, so we won't get any callback to
603 * reload the timer. But we need to keep it
604 * ticking in case the signal is deliverable next time.
605 */
Thomas Gleixnerf37fb0a2017-05-30 23:15:47 +0200606 posix_cpu_timer_rearm(timer);
Thomas Gleixneraf888d62017-05-30 23:15:42 +0200607 ++timer->it_requeue_pending;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700608 }
609}
610
611/*
612 * Guts of sys_timer_settime for CPU timers.
613 * This is called with the timer locked and interrupts disabled.
614 * If we return TIMER_RETRY, it's necessary to release the timer's lock
615 * and try again. (This happens when the timer is in the middle of firing.)
616 */
Frederic Weisbeckere73d84e2013-10-11 18:56:49 +0200617static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags,
Deepa Dinamani5f252b32017-03-26 12:04:17 -0700618 struct itimerspec64 *new, struct itimerspec64 *old)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700619{
Thomas Gleixnerc7a37c62019-08-21 21:08:56 +0200620 clockid_t clkid = CPUCLOCK_WHICH(timer->it_clock);
Frederic Weisbeckerebd7e7f2017-01-31 04:09:34 +0100621 u64 old_expires, new_expires, old_incr, val;
Thomas Gleixner60bda032019-08-27 21:31:02 +0200622 struct cpu_timer *ctmr = &timer->it.cpu;
Thomas Gleixnerc7a37c62019-08-21 21:08:56 +0200623 struct sighand_struct *sighand;
Eric W. Biederman55e8c8e2020-02-28 11:11:06 -0600624 struct task_struct *p;
Thomas Gleixnerc7a37c62019-08-21 21:08:56 +0200625 unsigned long flags;
Thomas Gleixner60bda032019-08-27 21:31:02 +0200626 int ret = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700627
Eric W. Biederman55e8c8e2020-02-28 11:11:06 -0600628 rcu_read_lock();
629 p = cpu_timer_task_rcu(timer);
630 if (!p) {
631 /*
632 * If p has just been reaped, we can no
633 * longer get any information about it at all.
634 */
635 rcu_read_unlock();
636 return -ESRCH;
637 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700638
Thomas Gleixner098b0e02017-06-20 17:37:36 +0200639 /*
640 * Use the to_ktime conversion because that clamps the maximum
641 * value to KTIME_MAX and avoid multiplication overflows.
642 */
643 new_expires = ktime_to_ns(timespec64_to_ktime(new->it_value));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700644
Linus Torvalds1da177e2005-04-16 15:20:36 -0700645 /*
Frederic Weisbeckere73d84e2013-10-11 18:56:49 +0200646 * Protect against sighand release/switch in exit/exec and p->cpu_timers
647 * and p->signal->cpu_timers read/write in arm_timer()
648 */
649 sighand = lock_task_sighand(p, &flags);
650 /*
651 * If p has just been reaped, we can no
Linus Torvalds1da177e2005-04-16 15:20:36 -0700652 * longer get any information about it at all.
653 */
Eric W. Biederman55e8c8e2020-02-28 11:11:06 -0600654 if (unlikely(sighand == NULL)) {
655 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700656 return -ESRCH;
Eric W. Biederman55e8c8e2020-02-28 11:11:06 -0600657 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700658
659 /*
660 * Disarm any old timer after extracting its expiry time.
661 */
Thomas Gleixner16118792019-01-11 14:33:17 +0100662 old_incr = timer->it_interval;
Thomas Gleixner60bda032019-08-27 21:31:02 +0200663 old_expires = cpu_timer_getexpires(ctmr);
664
Oleg Nesterova69ac4a2005-10-24 18:29:58 +0400665 if (unlikely(timer->it.cpu.firing)) {
666 timer->it.cpu.firing = -1;
667 ret = TIMER_RETRY;
Thomas Gleixner60bda032019-08-27 21:31:02 +0200668 } else {
669 cpu_timer_dequeue(ctmr);
670 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700671
672 /*
673 * We need to sample the current value to convert the new
674 * value from to relative and absolute, and to convert the
675 * old value from absolute to relative. To set a process
676 * timer, we need a sample to balance the thread expiry
677 * times (in arm_timer). With an absolute time, we must
678 * check if it's already passed. In short, we need a sample.
679 */
Thomas Gleixner8c2d74f2019-08-21 21:09:01 +0200680 if (CPUCLOCK_PERTHREAD(timer->it_clock))
681 val = cpu_clock_sample(clkid, p);
682 else
683 val = cpu_clock_sample_group(clkid, p, true);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700684
685 if (old) {
Frederic Weisbecker55ccb612013-06-28 00:06:42 +0000686 if (old_expires == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700687 old->it_value.tv_sec = 0;
688 old->it_value.tv_nsec = 0;
689 } else {
690 /*
Thomas Gleixner60bda032019-08-27 21:31:02 +0200691 * Update the timer in case it has overrun already.
692 * If it has, we'll report it as having overrun and
693 * with the next reloaded timer already ticking,
694 * though we are swallowing that pending
695 * notification here to install the new setting.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700696 */
Thomas Gleixner60bda032019-08-27 21:31:02 +0200697 u64 exp = bump_cpu_timer(timer, val);
698
699 if (val < exp) {
700 old_expires = exp - val;
Deepa Dinamani5f252b32017-03-26 12:04:17 -0700701 old->it_value = ns_to_timespec64(old_expires);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700702 } else {
703 old->it_value.tv_nsec = 1;
704 old->it_value.tv_sec = 0;
705 }
706 }
707 }
708
Oleg Nesterova69ac4a2005-10-24 18:29:58 +0400709 if (unlikely(ret)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700710 /*
711 * We are colliding with the timer actually firing.
712 * Punt after filling in the timer's old value, and
713 * disable this firing since we are already reporting
714 * it as an overrun (thanks to bump_cpu_timer above).
715 */
Frederic Weisbeckere73d84e2013-10-11 18:56:49 +0200716 unlock_task_sighand(p, &flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700717 goto out;
718 }
719
Frederic Weisbeckere73d84e2013-10-11 18:56:49 +0200720 if (new_expires != 0 && !(timer_flags & TIMER_ABSTIME)) {
Frederic Weisbecker55ccb612013-06-28 00:06:42 +0000721 new_expires += val;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700722 }
723
724 /*
725 * Install the new expiry time (or zero).
726 * For a timer with no notification action, we don't actually
727 * arm the timer (we'll just fake it for timer_gettime).
728 */
Thomas Gleixner60bda032019-08-27 21:31:02 +0200729 cpu_timer_setexpires(ctmr, new_expires);
Frederic Weisbecker55ccb612013-06-28 00:06:42 +0000730 if (new_expires != 0 && val < new_expires) {
Eric W. Biedermanbeb41d92020-02-28 11:09:46 -0600731 arm_timer(timer, p);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700732 }
733
Frederic Weisbeckere73d84e2013-10-11 18:56:49 +0200734 unlock_task_sighand(p, &flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700735 /*
736 * Install the new reload setting, and
737 * set up the signal and overrun bookkeeping.
738 */
Thomas Gleixner16118792019-01-11 14:33:17 +0100739 timer->it_interval = timespec64_to_ktime(new->it_interval);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700740
741 /*
742 * This acts as a modification timestamp for the timer,
743 * so any automatic reload attempt will punt on seeing
744 * that we have reset the timer manually.
745 */
746 timer->it_requeue_pending = (timer->it_requeue_pending + 2) &
747 ~REQUEUE_PENDING;
748 timer->it_overrun_last = 0;
749 timer->it_overrun = -1;
750
Frederic Weisbeckeree375322021-07-26 14:55:13 +0200751 if (val >= new_expires) {
752 if (new_expires != 0) {
753 /*
754 * The designated time already passed, so we notify
755 * immediately, even if the thread never runs to
756 * accumulate more time on this clock.
757 */
758 cpu_timer_fire(timer);
759 }
760
Linus Torvalds1da177e2005-04-16 15:20:36 -0700761 /*
Frederic Weisbeckeree375322021-07-26 14:55:13 +0200762 * Make sure we don't keep around the process wide cputime
763 * counter or the tick dependency if they are not necessary.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700764 */
Frederic Weisbeckeree375322021-07-26 14:55:13 +0200765 sighand = lock_task_sighand(p, &flags);
766 if (!sighand)
767 goto out;
768
769 if (!cpu_timer_queued(ctmr))
770 trigger_base_recalc_expires(timer, p);
771
772 unlock_task_sighand(p, &flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700773 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700774 out:
Eric W. Biederman55e8c8e2020-02-28 11:11:06 -0600775 rcu_read_unlock();
Frederic Weisbeckerebd7e7f2017-01-31 04:09:34 +0100776 if (old)
Deepa Dinamani5f252b32017-03-26 12:04:17 -0700777 old->it_interval = ns_to_timespec64(old_incr);
Frederic Weisbeckerb7878302015-07-17 22:25:49 +0200778
Linus Torvalds1da177e2005-04-16 15:20:36 -0700779 return ret;
780}
781
Deepa Dinamani5f252b32017-03-26 12:04:17 -0700782static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec64 *itp)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700783{
Thomas Gleixner99093c52019-08-21 21:08:57 +0200784 clockid_t clkid = CPUCLOCK_WHICH(timer->it_clock);
Thomas Gleixner60bda032019-08-27 21:31:02 +0200785 struct cpu_timer *ctmr = &timer->it.cpu;
786 u64 now, expires = cpu_timer_getexpires(ctmr);
Eric W. Biederman55e8c8e2020-02-28 11:11:06 -0600787 struct task_struct *p;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700788
Eric W. Biederman55e8c8e2020-02-28 11:11:06 -0600789 rcu_read_lock();
790 p = cpu_timer_task_rcu(timer);
791 if (!p)
792 goto out;
Frederic Weisbeckera3222f82013-10-11 00:37:39 +0200793
Linus Torvalds1da177e2005-04-16 15:20:36 -0700794 /*
795 * Easy part: convert the reload time.
796 */
Thomas Gleixner16118792019-01-11 14:33:17 +0100797 itp->it_interval = ktime_to_timespec64(timer->it_interval);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700798
Thomas Gleixner60bda032019-08-27 21:31:02 +0200799 if (!expires)
Eric W. Biederman55e8c8e2020-02-28 11:11:06 -0600800 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700801
Linus Torvalds1da177e2005-04-16 15:20:36 -0700802 /*
803 * Sample the clock to take the difference with the expiry time.
804 */
Eric W. Biederman60f2cea2020-02-28 11:09:19 -0600805 if (CPUCLOCK_PERTHREAD(timer->it_clock))
Thomas Gleixner8c2d74f2019-08-21 21:09:01 +0200806 now = cpu_clock_sample(clkid, p);
Eric W. Biederman60f2cea2020-02-28 11:09:19 -0600807 else
808 now = cpu_clock_sample_group(clkid, p, false);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700809
Thomas Gleixner60bda032019-08-27 21:31:02 +0200810 if (now < expires) {
811 itp->it_value = ns_to_timespec64(expires - now);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700812 } else {
813 /*
814 * The timer should have expired already, but the firing
815 * hasn't taken place yet. Say it's just about to expire.
816 */
817 itp->it_value.tv_nsec = 1;
818 itp->it_value.tv_sec = 0;
819 }
Eric W. Biederman55e8c8e2020-02-28 11:11:06 -0600820out:
821 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700822}
823
Thomas Gleixner60bda032019-08-27 21:31:02 +0200824#define MAX_COLLECTED 20
825
826static u64 collect_timerqueue(struct timerqueue_head *head,
827 struct list_head *firing, u64 now)
Frederic Weisbecker2473f3e2013-06-28 00:06:43 +0000828{
Thomas Gleixner60bda032019-08-27 21:31:02 +0200829 struct timerqueue_node *next;
830 int i = 0;
Frederic Weisbecker2473f3e2013-06-28 00:06:43 +0000831
Thomas Gleixner60bda032019-08-27 21:31:02 +0200832 while ((next = timerqueue_getnext(head))) {
833 struct cpu_timer *ctmr;
834 u64 expires;
Frederic Weisbecker2473f3e2013-06-28 00:06:43 +0000835
Thomas Gleixner60bda032019-08-27 21:31:02 +0200836 ctmr = container_of(next, struct cpu_timer, node);
837 expires = cpu_timer_getexpires(ctmr);
838 /* Limit the number of timers to expire at once */
839 if (++i == MAX_COLLECTED || now < expires)
840 return expires;
Frederic Weisbecker2473f3e2013-06-28 00:06:43 +0000841
Thomas Gleixner60bda032019-08-27 21:31:02 +0200842 ctmr->firing = 1;
843 cpu_timer_dequeue(ctmr);
844 list_add_tail(&ctmr->elist, firing);
Frederic Weisbecker2473f3e2013-06-28 00:06:43 +0000845 }
846
Thomas Gleixner2bbdbda2019-08-21 21:09:19 +0200847 return U64_MAX;
Frederic Weisbecker2473f3e2013-06-28 00:06:43 +0000848}
849
Thomas Gleixner60bda032019-08-27 21:31:02 +0200850static void collect_posix_cputimers(struct posix_cputimers *pct, u64 *samples,
851 struct list_head *firing)
Thomas Gleixner1cd07c0b2019-08-21 21:09:20 +0200852{
853 struct posix_cputimer_base *base = pct->bases;
854 int i;
855
856 for (i = 0; i < CPUCLOCK_MAX; i++, base++) {
Thomas Gleixner60bda032019-08-27 21:31:02 +0200857 base->nextevt = collect_timerqueue(&base->tqhead, firing,
858 samples[i]);
Thomas Gleixner1cd07c0b2019-08-21 21:09:20 +0200859 }
860}
861
Juri Lelli34be3932017-12-12 12:10:24 +0100862static inline void check_dl_overrun(struct task_struct *tsk)
863{
864 if (tsk->dl.dl_overrun) {
865 tsk->dl.dl_overrun = 0;
866 __group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk);
867 }
868}
869
Thomas Gleixner8991afe2019-08-21 21:09:23 +0200870static bool check_rlimit(u64 time, u64 limit, int signo, bool rt, bool hard)
871{
872 if (time < limit)
873 return false;
874
875 if (print_fatal_signals) {
876 pr_info("%s Watchdog Timeout (%s): %s[%d]\n",
877 rt ? "RT" : "CPU", hard ? "hard" : "soft",
878 current->comm, task_pid_nr(current));
879 }
880 __group_send_sig_info(signo, SEND_SIG_PRIV, current);
881 return true;
882}
883
Linus Torvalds1da177e2005-04-16 15:20:36 -0700884/*
885 * Check for any per-thread CPU timers that have fired and move them off
886 * the tsk->cpu_timers[N] list onto the firing list. Here we update the
887 * tsk->it_*_expires values to reflect the remaining thread CPU timers.
888 */
889static void check_thread_timers(struct task_struct *tsk,
890 struct list_head *firing)
891{
Thomas Gleixner1cd07c0b2019-08-21 21:09:20 +0200892 struct posix_cputimers *pct = &tsk->posix_cputimers;
893 u64 samples[CPUCLOCK_MAX];
Jiri Slabyd4bb52742010-03-05 13:42:53 -0800894 unsigned long soft;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700895
Juri Lelli34be3932017-12-12 12:10:24 +0100896 if (dl_task(tsk))
897 check_dl_overrun(tsk);
898
Thomas Gleixner1cd07c0b2019-08-21 21:09:20 +0200899 if (expiry_cache_is_inactive(pct))
Jason Low934715a2015-10-14 12:07:54 -0700900 return;
901
Thomas Gleixner1cd07c0b2019-08-21 21:09:20 +0200902 task_sample_cputime(tsk, samples);
903 collect_posix_cputimers(pct, samples, firing);
Peter Zijlstra78f2c7d2008-01-25 21:08:27 +0100904
905 /*
906 * Check for the special case thread timers.
907 */
Krzysztof Opasiak3cf29492017-07-05 19:25:48 +0200908 soft = task_rlimit(tsk, RLIMIT_RTTIME);
Jiri Slabyd4bb52742010-03-05 13:42:53 -0800909 if (soft != RLIM_INFINITY) {
Thomas Gleixner8ea1de92019-08-21 21:09:21 +0200910 /* Task RT timeout is accounted in jiffies. RTTIME is usec */
Thomas Gleixner8991afe2019-08-21 21:09:23 +0200911 unsigned long rttime = tsk->rt.timeout * (USEC_PER_SEC / HZ);
Krzysztof Opasiak3cf29492017-07-05 19:25:48 +0200912 unsigned long hard = task_rlimit_max(tsk, RLIMIT_RTTIME);
Peter Zijlstra78f2c7d2008-01-25 21:08:27 +0100913
Thomas Gleixner8991afe2019-08-21 21:09:23 +0200914 /* At the hard limit, send SIGKILL. No further action. */
915 if (hard != RLIM_INFINITY &&
916 check_rlimit(rttime, hard, SIGKILL, true, true))
Peter Zijlstra78f2c7d2008-01-25 21:08:27 +0100917 return;
Thomas Gleixnerdd670222019-08-21 21:09:22 +0200918
Thomas Gleixner8991afe2019-08-21 21:09:23 +0200919 /* At the soft limit, send a SIGXCPU every second */
920 if (check_rlimit(rttime, soft, SIGXCPU, true, false)) {
Thomas Gleixnerdd670222019-08-21 21:09:22 +0200921 soft += USEC_PER_SEC;
922 tsk->signal->rlim[RLIMIT_RTTIME].rlim_cur = soft;
Peter Zijlstra78f2c7d2008-01-25 21:08:27 +0100923 }
924 }
Thomas Gleixnerc02b0782019-08-21 21:09:10 +0200925
Thomas Gleixner1cd07c0b2019-08-21 21:09:20 +0200926 if (expiry_cache_is_inactive(pct))
Frederic Weisbeckerb7878302015-07-17 22:25:49 +0200927 tick_dep_clear_task(tsk, TICK_DEP_BIT_POSIX_TIMER);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700928}
929
Jason Low10180162015-04-28 13:00:22 -0700930static inline void stop_process_timers(struct signal_struct *sig)
Peter Zijlstra3fccfd62009-02-10 16:37:31 +0100931{
Thomas Gleixner244d49e2019-08-21 21:09:24 +0200932 struct posix_cputimers *pct = &sig->posix_cputimers;
Peter Zijlstra3fccfd62009-02-10 16:37:31 +0100933
Thomas Gleixner244d49e2019-08-21 21:09:24 +0200934 /* Turn off the active flag. This is done without locking. */
935 WRITE_ONCE(pct->timers_active, false);
Frederic Weisbeckerb7878302015-07-17 22:25:49 +0200936 tick_dep_clear_signal(sig, TICK_DEP_BIT_POSIX_TIMER);
Peter Zijlstra3fccfd62009-02-10 16:37:31 +0100937}
938
Stanislaw Gruszka42c4ab42009-07-29 12:15:26 +0200939static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it,
Frederic Weisbeckerebd7e7f2017-01-31 04:09:34 +0100940 u64 *expires, u64 cur_time, int signo)
Stanislaw Gruszka42c4ab42009-07-29 12:15:26 +0200941{
Martin Schwidefsky64861632011-12-15 14:56:09 +0100942 if (!it->expires)
Stanislaw Gruszka42c4ab42009-07-29 12:15:26 +0200943 return;
944
Frederic Weisbecker858cf3a2017-01-31 04:09:35 +0100945 if (cur_time >= it->expires) {
946 if (it->incr)
Martin Schwidefsky64861632011-12-15 14:56:09 +0100947 it->expires += it->incr;
Frederic Weisbecker858cf3a2017-01-31 04:09:35 +0100948 else
Martin Schwidefsky64861632011-12-15 14:56:09 +0100949 it->expires = 0;
Stanislaw Gruszka42c4ab42009-07-29 12:15:26 +0200950
Xiao Guangrong3f0a5252009-08-10 10:52:30 +0800951 trace_itimer_expire(signo == SIGPROF ?
952 ITIMER_PROF : ITIMER_VIRTUAL,
Eric W. Biederman6883f812017-06-04 04:32:13 -0500953 task_tgid(tsk), cur_time);
Stanislaw Gruszka42c4ab42009-07-29 12:15:26 +0200954 __group_send_sig_info(signo, SEND_SIG_PRIV, tsk);
955 }
956
Thomas Gleixner2bbdbda2019-08-21 21:09:19 +0200957 if (it->expires && it->expires < *expires)
Frederic Weisbecker858cf3a2017-01-31 04:09:35 +0100958 *expires = it->expires;
Stanislaw Gruszka42c4ab42009-07-29 12:15:26 +0200959}
960
Linus Torvalds1da177e2005-04-16 15:20:36 -0700961/*
962 * Check for any per-thread CPU timers that have fired and move them
963 * off the tsk->*_timers list onto the firing list. Per-thread timers
964 * have already been taken off.
965 */
966static void check_process_timers(struct task_struct *tsk,
967 struct list_head *firing)
968{
969 struct signal_struct *const sig = tsk->signal;
Thomas Gleixner1cd07c0b2019-08-21 21:09:20 +0200970 struct posix_cputimers *pct = &sig->posix_cputimers;
971 u64 samples[CPUCLOCK_MAX];
Jiri Slabyd4bb52742010-03-05 13:42:53 -0800972 unsigned long soft;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700973
974 /*
Thomas Gleixner244d49e2019-08-21 21:09:24 +0200975 * If there are no active process wide timers (POSIX 1.b, itimers,
Thomas Gleixnera2ed4fd2019-08-29 12:52:28 +0200976 * RLIMIT_CPU) nothing to check. Also skip the process wide timer
977 * processing when there is already another task handling them.
Jason Low934715a2015-10-14 12:07:54 -0700978 */
Thomas Gleixnera2ed4fd2019-08-29 12:52:28 +0200979 if (!READ_ONCE(pct->timers_active) || pct->expiry_active)
Jason Low934715a2015-10-14 12:07:54 -0700980 return;
981
Thomas Gleixnera2ed4fd2019-08-29 12:52:28 +0200982 /*
Jason Lowc8d75aa2015-10-14 12:07:56 -0700983 * Signify that a thread is checking for process timers.
984 * Write access to this field is protected by the sighand lock.
985 */
Thomas Gleixnera2ed4fd2019-08-29 12:52:28 +0200986 pct->expiry_active = true;
Jason Lowc8d75aa2015-10-14 12:07:56 -0700987
Jason Low934715a2015-10-14 12:07:54 -0700988 /*
Thomas Gleixnera3249562019-08-21 21:08:53 +0200989 * Collect the current process totals. Group accounting is active
990 * so the sample can be taken directly.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700991 */
Thomas Gleixnerb7be4ef2019-08-21 21:09:16 +0200992 proc_sample_cputime_atomic(&sig->cputimer.cputime_atomic, samples);
Thomas Gleixner1cd07c0b2019-08-21 21:09:20 +0200993 collect_posix_cputimers(pct, samples, firing);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700994
995 /*
996 * Check for the special case process timers.
997 */
Thomas Gleixner1cd07c0b2019-08-21 21:09:20 +0200998 check_cpu_itimer(tsk, &sig->it[CPUCLOCK_PROF],
999 &pct->bases[CPUCLOCK_PROF].nextevt,
Thomas Gleixnerb7be4ef2019-08-21 21:09:16 +02001000 samples[CPUCLOCK_PROF], SIGPROF);
Thomas Gleixner1cd07c0b2019-08-21 21:09:20 +02001001 check_cpu_itimer(tsk, &sig->it[CPUCLOCK_VIRT],
1002 &pct->bases[CPUCLOCK_VIRT].nextevt,
1003 samples[CPUCLOCK_VIRT], SIGVTALRM);
Thomas Gleixnerb7be4ef2019-08-21 21:09:16 +02001004
Krzysztof Opasiak3cf29492017-07-05 19:25:48 +02001005 soft = task_rlimit(tsk, RLIMIT_CPU);
Jiri Slabyd4bb52742010-03-05 13:42:53 -08001006 if (soft != RLIM_INFINITY) {
Thomas Gleixner8ea1de92019-08-21 21:09:21 +02001007 /* RLIMIT_CPU is in seconds. Samples are nanoseconds */
Krzysztof Opasiak3cf29492017-07-05 19:25:48 +02001008 unsigned long hard = task_rlimit_max(tsk, RLIMIT_CPU);
Thomas Gleixner8ea1de92019-08-21 21:09:21 +02001009 u64 ptime = samples[CPUCLOCK_PROF];
1010 u64 softns = (u64)soft * NSEC_PER_SEC;
1011 u64 hardns = (u64)hard * NSEC_PER_SEC;
Thomas Gleixnerb7be4ef2019-08-21 21:09:16 +02001012
Thomas Gleixner8991afe2019-08-21 21:09:23 +02001013 /* At the hard limit, send SIGKILL. No further action. */
1014 if (hard != RLIM_INFINITY &&
1015 check_rlimit(ptime, hardns, SIGKILL, false, true))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001016 return;
Thomas Gleixnerdd670222019-08-21 21:09:22 +02001017
Thomas Gleixner8991afe2019-08-21 21:09:23 +02001018 /* At the soft limit, send a SIGXCPU every second */
1019 if (check_rlimit(ptime, softns, SIGXCPU, false, false)) {
Thomas Gleixnerdd670222019-08-21 21:09:22 +02001020 sig->rlim[RLIMIT_CPU].rlim_cur = soft + 1;
1021 softns += NSEC_PER_SEC;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001022 }
Thomas Gleixner8ea1de92019-08-21 21:09:21 +02001023
1024 /* Update the expiry cache */
Thomas Gleixner1cd07c0b2019-08-21 21:09:20 +02001025 if (softns < pct->bases[CPUCLOCK_PROF].nextevt)
1026 pct->bases[CPUCLOCK_PROF].nextevt = softns;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001027 }
1028
Thomas Gleixner1cd07c0b2019-08-21 21:09:20 +02001029 if (expiry_cache_is_inactive(pct))
Stanislaw Gruszka29f87b72010-04-27 14:12:15 -07001030 stop_process_timers(sig);
Jason Lowc8d75aa2015-10-14 12:07:56 -07001031
Thomas Gleixner244d49e2019-08-21 21:09:24 +02001032 pct->expiry_active = false;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001033}
1034
1035/*
Thomas Gleixner96fe3b02017-05-30 23:15:46 +02001036 * This is called from the signal code (via posixtimer_rearm)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001037 * when the last timer signal was delivered and we have to reload the timer.
1038 */
Thomas Gleixnerf37fb0a2017-05-30 23:15:47 +02001039static void posix_cpu_timer_rearm(struct k_itimer *timer)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001040{
Thomas Gleixnerda020ce2019-08-21 21:08:58 +02001041 clockid_t clkid = CPUCLOCK_WHICH(timer->it_clock);
Eric W. Biederman55e8c8e2020-02-28 11:11:06 -06001042 struct task_struct *p;
Frederic Weisbeckere73d84e2013-10-11 18:56:49 +02001043 struct sighand_struct *sighand;
1044 unsigned long flags;
Frederic Weisbeckerebd7e7f2017-01-31 04:09:34 +01001045 u64 now;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001046
Eric W. Biederman55e8c8e2020-02-28 11:11:06 -06001047 rcu_read_lock();
1048 p = cpu_timer_task_rcu(timer);
1049 if (!p)
1050 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001051
Frederic Weisbecker1a3402d92021-06-03 01:15:59 +02001052 /* Protect timer list r/w in arm_timer() */
1053 sighand = lock_task_sighand(p, &flags);
1054 if (unlikely(sighand == NULL))
1055 goto out;
1056
Linus Torvalds1da177e2005-04-16 15:20:36 -07001057 /*
1058 * Fetch the current sample and update the timer's expiry time.
1059 */
Eric W. Biederman60f2cea2020-02-28 11:09:19 -06001060 if (CPUCLOCK_PERTHREAD(timer->it_clock))
Thomas Gleixner8c2d74f2019-08-21 21:09:01 +02001061 now = cpu_clock_sample(clkid, p);
Eric W. Biederman60f2cea2020-02-28 11:09:19 -06001062 else
Thomas Gleixner8c2d74f2019-08-21 21:09:01 +02001063 now = cpu_clock_sample_group(clkid, p, true);
Eric W. Biederman60f2cea2020-02-28 11:09:19 -06001064
1065 bump_cpu_timer(timer, now);
1066
Linus Torvalds1da177e2005-04-16 15:20:36 -07001067 /*
1068 * Now re-arm for the new expiry time.
1069 */
Eric W. Biedermanbeb41d92020-02-28 11:09:46 -06001070 arm_timer(timer, p);
Frederic Weisbeckere73d84e2013-10-11 18:56:49 +02001071 unlock_task_sighand(p, &flags);
Eric W. Biederman55e8c8e2020-02-28 11:11:06 -06001072out:
1073 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001074}
1075
Frank Mayharf06febc2008-09-12 09:54:39 -07001076/**
Thomas Gleixner87dc6442019-08-26 20:22:24 +02001077 * task_cputimers_expired - Check whether posix CPU timers are expired
Frank Mayharf06febc2008-09-12 09:54:39 -07001078 *
Thomas Gleixner001f7972019-08-21 21:09:13 +02001079 * @samples: Array of current samples for the CPUCLOCK clocks
Thomas Gleixner87dc6442019-08-26 20:22:24 +02001080 * @pct: Pointer to a posix_cputimers container
Frank Mayharf06febc2008-09-12 09:54:39 -07001081 *
Thomas Gleixner87dc6442019-08-26 20:22:24 +02001082 * Returns true if any member of @samples is greater than the corresponding
1083 * member of @pct->bases[CLK].nextevt. False otherwise
Frank Mayharf06febc2008-09-12 09:54:39 -07001084 */
Thomas Gleixner87dc6442019-08-26 20:22:24 +02001085static inline bool
Yi Wang7f2cbcb2019-10-21 15:44:12 +08001086task_cputimers_expired(const u64 *samples, struct posix_cputimers *pct)
Frank Mayharf06febc2008-09-12 09:54:39 -07001087{
Thomas Gleixner001f7972019-08-21 21:09:13 +02001088 int i;
1089
1090 for (i = 0; i < CPUCLOCK_MAX; i++) {
Yi Wang7f2cbcb2019-10-21 15:44:12 +08001091 if (samples[i] >= pct->bases[i].nextevt)
Thomas Gleixner001f7972019-08-21 21:09:13 +02001092 return true;
1093 }
1094 return false;
Frank Mayharf06febc2008-09-12 09:54:39 -07001095}
1096
1097/**
1098 * fastpath_timer_check - POSIX CPU timers fast path.
1099 *
1100 * @tsk: The task (thread) being checked.
Frank Mayharf06febc2008-09-12 09:54:39 -07001101 *
Frank Mayharbb34d922008-09-12 09:54:39 -07001102 * Check the task and thread group timers. If both are zero (there are no
1103 * timers set) return false. Otherwise snapshot the task and thread group
1104 * timers and compare them with the corresponding expiration times. Return
1105 * true if a timer has expired, else return false.
Frank Mayharf06febc2008-09-12 09:54:39 -07001106 */
Thomas Gleixner001f7972019-08-21 21:09:13 +02001107static inline bool fastpath_timer_check(struct task_struct *tsk)
Frank Mayharf06febc2008-09-12 09:54:39 -07001108{
Thomas Gleixner244d49e2019-08-21 21:09:24 +02001109 struct posix_cputimers *pct = &tsk->posix_cputimers;
Oleg Nesterovad133ba2008-11-17 15:39:47 +01001110 struct signal_struct *sig;
Frank Mayharf06febc2008-09-12 09:54:39 -07001111
Thomas Gleixner244d49e2019-08-21 21:09:24 +02001112 if (!expiry_cache_is_inactive(pct)) {
Thomas Gleixner001f7972019-08-21 21:09:13 +02001113 u64 samples[CPUCLOCK_MAX];
Frank Mayharbb34d922008-09-12 09:54:39 -07001114
Thomas Gleixner001f7972019-08-21 21:09:13 +02001115 task_sample_cputime(tsk, samples);
Thomas Gleixner244d49e2019-08-21 21:09:24 +02001116 if (task_cputimers_expired(samples, pct))
Thomas Gleixner001f7972019-08-21 21:09:13 +02001117 return true;
Frank Mayharbb34d922008-09-12 09:54:39 -07001118 }
Oleg Nesterovad133ba2008-11-17 15:39:47 +01001119
1120 sig = tsk->signal;
Thomas Gleixner244d49e2019-08-21 21:09:24 +02001121 pct = &sig->posix_cputimers;
Jason Lowc8d75aa2015-10-14 12:07:56 -07001122 /*
Thomas Gleixner244d49e2019-08-21 21:09:24 +02001123 * Check if thread group timers expired when timers are active and
1124 * no other thread in the group is already handling expiry for
1125 * thread group cputimers. These fields are read without the
1126 * sighand lock. However, this is fine because this is meant to be
1127 * a fastpath heuristic to determine whether we should try to
1128 * acquire the sighand lock to handle timer expiry.
Jason Lowc8d75aa2015-10-14 12:07:56 -07001129 *
Thomas Gleixner244d49e2019-08-21 21:09:24 +02001130 * In the worst case scenario, if concurrently timers_active is set
1131 * or expiry_active is cleared, but the current thread doesn't see
1132 * the change yet, the timer checks are delayed until the next
1133 * thread in the group gets a scheduler interrupt to handle the
1134 * timer. This isn't an issue in practice because these types of
1135 * delays with signals actually getting sent are expected.
Jason Lowc8d75aa2015-10-14 12:07:56 -07001136 */
Thomas Gleixner244d49e2019-08-21 21:09:24 +02001137 if (READ_ONCE(pct->timers_active) && !READ_ONCE(pct->expiry_active)) {
Thomas Gleixner001f7972019-08-21 21:09:13 +02001138 u64 samples[CPUCLOCK_MAX];
Frank Mayharbb34d922008-09-12 09:54:39 -07001139
Thomas Gleixner001f7972019-08-21 21:09:13 +02001140 proc_sample_cputime_atomic(&sig->cputimer.cputime_atomic,
1141 samples);
Oleg Nesterov8d1f4312010-06-11 20:04:46 +02001142
Thomas Gleixner244d49e2019-08-21 21:09:24 +02001143 if (task_cputimers_expired(samples, pct))
Thomas Gleixner001f7972019-08-21 21:09:13 +02001144 return true;
Frank Mayharbb34d922008-09-12 09:54:39 -07001145 }
Oleg Nesterov37bebc72009-03-23 20:34:11 +01001146
Juri Lelli34be3932017-12-12 12:10:24 +01001147 if (dl_task(tsk) && tsk->dl.dl_overrun)
Thomas Gleixner001f7972019-08-21 21:09:13 +02001148 return true;
Juri Lelli34be3932017-12-12 12:10:24 +01001149
Thomas Gleixner001f7972019-08-21 21:09:13 +02001150 return false;
Frank Mayharf06febc2008-09-12 09:54:39 -07001151}
1152
Thomas Gleixner1fb497d2020-07-30 12:14:06 +02001153static void handle_posix_cpu_timers(struct task_struct *tsk);
1154
1155#ifdef CONFIG_POSIX_CPU_TIMERS_TASK_WORK
1156static void posix_cpu_timers_work(struct callback_head *work)
1157{
1158 handle_posix_cpu_timers(current);
1159}
1160
1161/*
Michael Prattca7752c2021-11-01 17:06:15 -04001162 * Clear existing posix CPU timers task work.
1163 */
1164void clear_posix_cputimers_work(struct task_struct *p)
1165{
1166 /*
1167 * A copied work entry from the old task is not meaningful, clear it.
1168 * N.B. init_task_work will not do this.
1169 */
1170 memset(&p->posix_cputimers_work.work, 0,
1171 sizeof(p->posix_cputimers_work.work));
1172 init_task_work(&p->posix_cputimers_work.work,
1173 posix_cpu_timers_work);
1174 p->posix_cputimers_work.scheduled = false;
1175}
1176
1177/*
Thomas Gleixner1fb497d2020-07-30 12:14:06 +02001178 * Initialize posix CPU timers task work in init task. Out of line to
1179 * keep the callback static and to avoid header recursion hell.
1180 */
1181void __init posix_cputimers_init_work(void)
1182{
Michael Prattca7752c2021-11-01 17:06:15 -04001183 clear_posix_cputimers_work(current);
Thomas Gleixner1fb497d2020-07-30 12:14:06 +02001184}
1185
1186/*
1187 * Note: All operations on tsk->posix_cputimer_work.scheduled happen either
1188 * in hard interrupt context or in task context with interrupts
1189 * disabled. Aside of that the writer/reader interaction is always in the
1190 * context of the current task, which means they are strict per CPU.
1191 */
1192static inline bool posix_cpu_timers_work_scheduled(struct task_struct *tsk)
1193{
1194 return tsk->posix_cputimers_work.scheduled;
1195}
1196
1197static inline void __run_posix_cpu_timers(struct task_struct *tsk)
1198{
1199 if (WARN_ON_ONCE(tsk->posix_cputimers_work.scheduled))
1200 return;
1201
1202 /* Schedule task work to actually expire the timers */
1203 tsk->posix_cputimers_work.scheduled = true;
1204 task_work_add(tsk, &tsk->posix_cputimers_work.work, TWA_RESUME);
1205}
1206
1207static inline bool posix_cpu_timers_enable_work(struct task_struct *tsk,
1208 unsigned long start)
1209{
1210 bool ret = true;
1211
1212 /*
1213 * On !RT kernels interrupts are disabled while collecting expired
1214 * timers, so no tick can happen and the fast path check can be
1215 * reenabled without further checks.
1216 */
1217 if (!IS_ENABLED(CONFIG_PREEMPT_RT)) {
1218 tsk->posix_cputimers_work.scheduled = false;
1219 return true;
1220 }
1221
1222 /*
1223 * On RT enabled kernels ticks can happen while the expired timers
1224 * are collected under sighand lock. But any tick which observes
1225 * the CPUTIMERS_WORK_SCHEDULED bit set, does not run the fastpath
1226 * checks. So reenabling the tick work has do be done carefully:
1227 *
1228 * Disable interrupts and run the fast path check if jiffies have
1229 * advanced since the collecting of expired timers started. If
1230 * jiffies have not advanced or the fast path check did not find
1231 * newly expired timers, reenable the fast path check in the timer
1232 * interrupt. If there are newly expired timers, return false and
1233 * let the collection loop repeat.
1234 */
1235 local_irq_disable();
1236 if (start != jiffies && fastpath_timer_check(tsk))
1237 ret = false;
1238 else
1239 tsk->posix_cputimers_work.scheduled = false;
1240 local_irq_enable();
1241
1242 return ret;
1243}
1244#else /* CONFIG_POSIX_CPU_TIMERS_TASK_WORK */
1245static inline void __run_posix_cpu_timers(struct task_struct *tsk)
1246{
1247 lockdep_posixtimer_enter();
1248 handle_posix_cpu_timers(tsk);
1249 lockdep_posixtimer_exit();
1250}
1251
1252static inline bool posix_cpu_timers_work_scheduled(struct task_struct *tsk)
1253{
1254 return false;
1255}
1256
1257static inline bool posix_cpu_timers_enable_work(struct task_struct *tsk,
1258 unsigned long start)
1259{
1260 return true;
1261}
1262#endif /* CONFIG_POSIX_CPU_TIMERS_TASK_WORK */
1263
1264static void handle_posix_cpu_timers(struct task_struct *tsk)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001265{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001266 struct k_itimer *timer, *next;
Thomas Gleixner1fb497d2020-07-30 12:14:06 +02001267 unsigned long flags, start;
Thomas Gleixnerdce3e8f2019-08-19 16:31:47 +02001268 LIST_HEAD(firing);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001269
Thomas Gleixner820903c2020-07-30 12:14:05 +02001270 if (!lock_task_sighand(tsk, &flags))
Frank Mayharf06febc2008-09-12 09:54:39 -07001271 return;
Ingo Molnar5ce73a42008-09-14 17:11:46 +02001272
Thomas Gleixner1fb497d2020-07-30 12:14:06 +02001273 do {
1274 /*
1275 * On RT locking sighand lock does not disable interrupts,
1276 * so this needs to be careful vs. ticks. Store the current
1277 * jiffies value.
1278 */
1279 start = READ_ONCE(jiffies);
1280 barrier();
Jason Low934715a2015-10-14 12:07:54 -07001281
Thomas Gleixner1fb497d2020-07-30 12:14:06 +02001282 /*
1283 * Here we take off tsk->signal->cpu_timers[N] and
1284 * tsk->cpu_timers[N] all the timers that are firing, and
1285 * put them on the firing list.
1286 */
1287 check_thread_timers(tsk, &firing);
1288
1289 check_process_timers(tsk, &firing);
1290
1291 /*
Ingo Molnar4bf07f62021-03-22 22:39:03 +01001292 * The above timer checks have updated the expiry cache and
Thomas Gleixner1fb497d2020-07-30 12:14:06 +02001293 * because nothing can have queued or modified timers after
1294 * sighand lock was taken above it is guaranteed to be
1295 * consistent. So the next timer interrupt fastpath check
1296 * will find valid data.
1297 *
1298 * If timer expiry runs in the timer interrupt context then
1299 * the loop is not relevant as timers will be directly
1300 * expired in interrupt context. The stub function below
1301 * returns always true which allows the compiler to
1302 * optimize the loop out.
1303 *
1304 * If timer expiry is deferred to task work context then
1305 * the following rules apply:
1306 *
1307 * - On !RT kernels no tick can have happened on this CPU
1308 * after sighand lock was acquired because interrupts are
1309 * disabled. So reenabling task work before dropping
1310 * sighand lock and reenabling interrupts is race free.
1311 *
1312 * - On RT kernels ticks might have happened but the tick
1313 * work ignored posix CPU timer handling because the
1314 * CPUTIMERS_WORK_SCHEDULED bit is set. Reenabling work
1315 * must be done very carefully including a check whether
1316 * ticks have happened since the start of the timer
1317 * expiry checks. posix_cpu_timers_enable_work() takes
1318 * care of that and eventually lets the expiry checks
1319 * run again.
1320 */
1321 } while (!posix_cpu_timers_enable_work(tsk, start));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001322
Frank Mayharbb34d922008-09-12 09:54:39 -07001323 /*
Thomas Gleixner1fb497d2020-07-30 12:14:06 +02001324 * We must release sighand lock before taking any timer's lock.
Frank Mayharbb34d922008-09-12 09:54:39 -07001325 * There is a potential race with timer deletion here, as the
1326 * siglock now protects our private firing list. We have set
1327 * the firing flag in each timer, so that a deletion attempt
1328 * that gets the timer lock before we do will give it up and
1329 * spin until we've taken care of that timer below.
1330 */
Oleg Nesterov0bdd2ed2010-06-11 01:10:18 +02001331 unlock_task_sighand(tsk, &flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001332
1333 /*
1334 * Now that all the timers on our list have the firing flag,
Lucas De Marchi25985ed2011-03-30 22:57:33 -03001335 * no one will touch their list entries but us. We'll take
Linus Torvalds1da177e2005-04-16 15:20:36 -07001336 * each timer's lock before clearing its firing flag, so no
1337 * timer call will interfere.
1338 */
Thomas Gleixner60bda032019-08-27 21:31:02 +02001339 list_for_each_entry_safe(timer, next, &firing, it.cpu.elist) {
H Hartley Sweeten6e85c5b2009-04-29 19:14:32 -04001340 int cpu_firing;
1341
Thomas Gleixner1fb497d2020-07-30 12:14:06 +02001342 /*
1343 * spin_lock() is sufficient here even independent of the
1344 * expiry context. If expiry happens in hard interrupt
1345 * context it's obvious. For task work context it's safe
1346 * because all other operations on timer::it_lock happen in
1347 * task context (syscall or exit).
1348 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001349 spin_lock(&timer->it_lock);
Thomas Gleixner60bda032019-08-27 21:31:02 +02001350 list_del_init(&timer->it.cpu.elist);
H Hartley Sweeten6e85c5b2009-04-29 19:14:32 -04001351 cpu_firing = timer->it.cpu.firing;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001352 timer->it.cpu.firing = 0;
1353 /*
1354 * The firing flag is -1 if we collided with a reset
1355 * of the timer, which already reported this
1356 * almost-firing as an overrun. So don't generate an event.
1357 */
H Hartley Sweeten6e85c5b2009-04-29 19:14:32 -04001358 if (likely(cpu_firing >= 0))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001359 cpu_timer_fire(timer);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001360 spin_unlock(&timer->it_lock);
1361 }
Thomas Gleixner820903c2020-07-30 12:14:05 +02001362}
1363
1364/*
1365 * This is called from the timer interrupt handler. The irq handler has
1366 * already updated our counts. We need to check if any timers fire now.
1367 * Interrupts are disabled.
1368 */
1369void run_posix_cpu_timers(void)
1370{
1371 struct task_struct *tsk = current;
1372
1373 lockdep_assert_irqs_disabled();
1374
1375 /*
Thomas Gleixner1fb497d2020-07-30 12:14:06 +02001376 * If the actual expiry is deferred to task work context and the
1377 * work is already scheduled there is no point to do anything here.
1378 */
1379 if (posix_cpu_timers_work_scheduled(tsk))
1380 return;
1381
1382 /*
Thomas Gleixner820903c2020-07-30 12:14:05 +02001383 * The fast path checks that there are no expired thread or thread
1384 * group timers. If that's so, just return.
1385 */
1386 if (!fastpath_timer_check(tsk))
1387 return;
1388
Thomas Gleixner820903c2020-07-30 12:14:05 +02001389 __run_posix_cpu_timers(tsk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001390}
1391
1392/*
Stanislaw Gruszkaf55db602010-03-11 14:04:37 -08001393 * Set one of the process-wide special case CPU timers or RLIMIT_CPU.
Frank Mayharf06febc2008-09-12 09:54:39 -07001394 * The tsk->sighand->siglock must be held by the caller.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001395 */
Thomas Gleixner1b0dd962019-08-21 21:09:09 +02001396void set_process_cpu_timer(struct task_struct *tsk, unsigned int clkid,
Frederic Weisbecker858cf3a2017-01-31 04:09:35 +01001397 u64 *newval, u64 *oldval)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001398{
Thomas Gleixner87dc6442019-08-26 20:22:24 +02001399 u64 now, *nextevt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001400
Thomas Gleixner1b0dd962019-08-21 21:09:09 +02001401 if (WARN_ON_ONCE(clkid >= CPUCLOCK_SCHED))
Thomas Gleixner692117c2019-08-19 16:31:46 +02001402 return;
1403
Thomas Gleixner87dc6442019-08-26 20:22:24 +02001404 nextevt = &tsk->signal->posix_cputimers.bases[clkid].nextevt;
Thomas Gleixner1b0dd962019-08-21 21:09:09 +02001405 now = cpu_clock_sample_group(clkid, tsk, true);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001406
Thomas Gleixner5405d002019-08-21 21:08:59 +02001407 if (oldval) {
Stanislaw Gruszkaf55db602010-03-11 14:04:37 -08001408 /*
1409 * We are setting itimer. The *oldval is absolute and we update
1410 * it to be relative, *newval argument is relative and we update
1411 * it to be absolute.
1412 */
Martin Schwidefsky64861632011-12-15 14:56:09 +01001413 if (*oldval) {
Frederic Weisbecker858cf3a2017-01-31 04:09:35 +01001414 if (*oldval <= now) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001415 /* Just about to fire. */
Frederic Weisbecker858cf3a2017-01-31 04:09:35 +01001416 *oldval = TICK_NSEC;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001417 } else {
Frederic Weisbecker858cf3a2017-01-31 04:09:35 +01001418 *oldval -= now;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001419 }
1420 }
1421
Frederic Weisbecker8cd9da82021-09-13 16:53:32 +02001422 if (*newval)
1423 *newval += now;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001424 }
1425
1426 /*
Thomas Gleixner1b0dd962019-08-21 21:09:09 +02001427 * Update expiration cache if this is the earliest timer. CPUCLOCK_PROF
1428 * expiry cache is also used by RLIMIT_CPU!.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001429 */
Thomas Gleixner2bbdbda2019-08-21 21:09:19 +02001430 if (*newval < *nextevt)
Thomas Gleixner87dc6442019-08-26 20:22:24 +02001431 *nextevt = *newval;
Frederic Weisbeckerb7878302015-07-17 22:25:49 +02001432
Marcelo Tosatti1e4ca262021-05-13 01:29:21 +02001433 tick_dep_set_signal(tsk, TICK_DEP_BIT_POSIX_TIMER);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001434}
1435
Toyo Abee4b76552006-09-29 02:00:29 -07001436static int do_cpu_nanosleep(const clockid_t which_clock, int flags,
Thomas Gleixner343d8fc2017-06-13 23:29:14 +02001437 const struct timespec64 *rqtp)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001438{
Al Viro86a9c442017-06-07 09:42:26 +01001439 struct itimerspec64 it;
Thomas Gleixner343d8fc2017-06-13 23:29:14 +02001440 struct k_itimer timer;
1441 u64 expires;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001442 int error;
1443
1444 /*
Linus Torvalds1da177e2005-04-16 15:20:36 -07001445 * Set up a temporary timer and then wait for it to go off.
1446 */
1447 memset(&timer, 0, sizeof timer);
1448 spin_lock_init(&timer.it_lock);
1449 timer.it_clock = which_clock;
1450 timer.it_overrun = -1;
1451 error = posix_cpu_timer_create(&timer);
1452 timer.it_process = current;
Thomas Gleixner60bda032019-08-27 21:31:02 +02001453
Linus Torvalds1da177e2005-04-16 15:20:36 -07001454 if (!error) {
Deepa Dinamani5f252b32017-03-26 12:04:17 -07001455 static struct itimerspec64 zero_it;
Al Viroedbeda42017-06-07 09:42:31 +01001456 struct restart_block *restart;
Toyo Abee4b76552006-09-29 02:00:29 -07001457
Al Viroedbeda42017-06-07 09:42:31 +01001458 memset(&it, 0, sizeof(it));
Al Viro86a9c442017-06-07 09:42:26 +01001459 it.it_value = *rqtp;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001460
1461 spin_lock_irq(&timer.it_lock);
Al Viro86a9c442017-06-07 09:42:26 +01001462 error = posix_cpu_timer_set(&timer, flags, &it, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001463 if (error) {
1464 spin_unlock_irq(&timer.it_lock);
1465 return error;
1466 }
1467
1468 while (!signal_pending(current)) {
Thomas Gleixner60bda032019-08-27 21:31:02 +02001469 if (!cpu_timer_getexpires(&timer.it.cpu)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001470 /*
Stanislaw Gruszkae6c42c22013-02-15 11:08:11 +01001471 * Our timer fired and was reset, below
1472 * deletion can not fail.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001473 */
Stanislaw Gruszkae6c42c22013-02-15 11:08:11 +01001474 posix_cpu_timer_del(&timer);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001475 spin_unlock_irq(&timer.it_lock);
1476 return 0;
1477 }
1478
1479 /*
1480 * Block until cpu_timer_fire (or a signal) wakes us.
1481 */
1482 __set_current_state(TASK_INTERRUPTIBLE);
1483 spin_unlock_irq(&timer.it_lock);
1484 schedule();
1485 spin_lock_irq(&timer.it_lock);
1486 }
1487
1488 /*
1489 * We were interrupted by a signal.
1490 */
Thomas Gleixner60bda032019-08-27 21:31:02 +02001491 expires = cpu_timer_getexpires(&timer.it.cpu);
Al Viro86a9c442017-06-07 09:42:26 +01001492 error = posix_cpu_timer_set(&timer, 0, &zero_it, &it);
Stanislaw Gruszkae6c42c22013-02-15 11:08:11 +01001493 if (!error) {
1494 /*
1495 * Timer is now unarmed, deletion can not fail.
1496 */
1497 posix_cpu_timer_del(&timer);
1498 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001499 spin_unlock_irq(&timer.it_lock);
1500
Stanislaw Gruszkae6c42c22013-02-15 11:08:11 +01001501 while (error == TIMER_RETRY) {
1502 /*
1503 * We need to handle case when timer was or is in the
1504 * middle of firing. In other cases we already freed
1505 * resources.
1506 */
1507 spin_lock_irq(&timer.it_lock);
1508 error = posix_cpu_timer_del(&timer);
1509 spin_unlock_irq(&timer.it_lock);
1510 }
1511
Al Viro86a9c442017-06-07 09:42:26 +01001512 if ((it.it_value.tv_sec | it.it_value.tv_nsec) == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001513 /*
1514 * It actually did fire already.
1515 */
1516 return 0;
1517 }
1518
Toyo Abee4b76552006-09-29 02:00:29 -07001519 error = -ERESTART_RESTARTBLOCK;
Al Viro86a9c442017-06-07 09:42:26 +01001520 /*
1521 * Report back to the user the time still remaining.
1522 */
Al Viroedbeda42017-06-07 09:42:31 +01001523 restart = &current->restart_block;
Thomas Gleixner343d8fc2017-06-13 23:29:14 +02001524 restart->nanosleep.expires = expires;
Deepa Dinamanic0edd7c2017-06-24 11:45:06 -07001525 if (restart->nanosleep.type != TT_NONE)
1526 error = nanosleep_copyout(restart, &it.it_value);
Toyo Abee4b76552006-09-29 02:00:29 -07001527 }
1528
1529 return error;
1530}
1531
Thomas Gleixnerbc2c8ea2011-02-01 13:52:12 +00001532static long posix_cpu_nsleep_restart(struct restart_block *restart_block);
1533
1534static int posix_cpu_nsleep(const clockid_t which_clock, int flags,
Thomas Gleixner938e7cf2017-06-13 23:34:33 +02001535 const struct timespec64 *rqtp)
Toyo Abee4b76552006-09-29 02:00:29 -07001536{
Andy Lutomirskif56141e2015-02-12 15:01:14 -08001537 struct restart_block *restart_block = &current->restart_block;
Toyo Abee4b76552006-09-29 02:00:29 -07001538 int error;
1539
1540 /*
1541 * Diagnose required errors first.
1542 */
1543 if (CPUCLOCK_PERTHREAD(which_clock) &&
1544 (CPUCLOCK_PID(which_clock) == 0 ||
Eric W. Biederman01a21972017-04-13 10:32:16 -05001545 CPUCLOCK_PID(which_clock) == task_pid_vnr(current)))
Toyo Abee4b76552006-09-29 02:00:29 -07001546 return -EINVAL;
1547
Al Viro86a9c442017-06-07 09:42:26 +01001548 error = do_cpu_nanosleep(which_clock, flags, rqtp);
Toyo Abee4b76552006-09-29 02:00:29 -07001549
1550 if (error == -ERESTART_RESTARTBLOCK) {
1551
Thomas Gleixner3751f9f2011-02-01 13:51:20 +00001552 if (flags & TIMER_ABSTIME)
Toyo Abee4b76552006-09-29 02:00:29 -07001553 return -ERESTARTNOHAND;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001554
Thomas Gleixnerab8177b2011-05-20 13:05:15 +02001555 restart_block->nanosleep.clockid = which_clock;
Oleg Nesterov5abbe512021-02-01 18:46:41 +01001556 set_restart_fn(restart_block, posix_cpu_nsleep_restart);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001557 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001558 return error;
1559}
1560
Thomas Gleixnerbc2c8ea2011-02-01 13:52:12 +00001561static long posix_cpu_nsleep_restart(struct restart_block *restart_block)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001562{
Thomas Gleixnerab8177b2011-05-20 13:05:15 +02001563 clockid_t which_clock = restart_block->nanosleep.clockid;
Deepa Dinamaniad196382017-03-26 12:04:18 -07001564 struct timespec64 t;
Thomas Gleixner97735f22006-01-09 20:52:37 -08001565
Deepa Dinamaniad196382017-03-26 12:04:18 -07001566 t = ns_to_timespec64(restart_block->nanosleep.expires);
Thomas Gleixner97735f22006-01-09 20:52:37 -08001567
Al Viro86a9c442017-06-07 09:42:26 +01001568 return do_cpu_nanosleep(which_clock, TIMER_ABSTIME, &t);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001569}
1570
Nick Desaulniers29f1b2b2017-12-28 22:11:36 -05001571#define PROCESS_CLOCK make_process_cpuclock(0, CPUCLOCK_SCHED)
1572#define THREAD_CLOCK make_thread_cpuclock(0, CPUCLOCK_SCHED)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001573
Thomas Gleixnera924b042006-01-09 20:52:27 -08001574static int process_cpu_clock_getres(const clockid_t which_clock,
Deepa Dinamanid2e3e0c2017-03-26 12:04:15 -07001575 struct timespec64 *tp)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001576{
1577 return posix_cpu_clock_getres(PROCESS_CLOCK, tp);
1578}
Thomas Gleixnera924b042006-01-09 20:52:27 -08001579static int process_cpu_clock_get(const clockid_t which_clock,
Deepa Dinamani3c9c12f2017-03-26 12:04:14 -07001580 struct timespec64 *tp)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001581{
1582 return posix_cpu_clock_get(PROCESS_CLOCK, tp);
1583}
1584static int process_cpu_timer_create(struct k_itimer *timer)
1585{
1586 timer->it_clock = PROCESS_CLOCK;
1587 return posix_cpu_timer_create(timer);
1588}
Thomas Gleixnera924b042006-01-09 20:52:27 -08001589static int process_cpu_nsleep(const clockid_t which_clock, int flags,
Thomas Gleixner938e7cf2017-06-13 23:34:33 +02001590 const struct timespec64 *rqtp)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001591{
Al Viro99e6c0e2017-06-07 09:42:30 +01001592 return posix_cpu_nsleep(PROCESS_CLOCK, flags, rqtp);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001593}
Thomas Gleixnera924b042006-01-09 20:52:27 -08001594static int thread_cpu_clock_getres(const clockid_t which_clock,
Deepa Dinamanid2e3e0c2017-03-26 12:04:15 -07001595 struct timespec64 *tp)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001596{
1597 return posix_cpu_clock_getres(THREAD_CLOCK, tp);
1598}
Thomas Gleixnera924b042006-01-09 20:52:27 -08001599static int thread_cpu_clock_get(const clockid_t which_clock,
Deepa Dinamani3c9c12f2017-03-26 12:04:14 -07001600 struct timespec64 *tp)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001601{
1602 return posix_cpu_clock_get(THREAD_CLOCK, tp);
1603}
1604static int thread_cpu_timer_create(struct k_itimer *timer)
1605{
1606 timer->it_clock = THREAD_CLOCK;
1607 return posix_cpu_timer_create(timer);
1608}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001609
Christoph Hellwigd3ba5a92017-05-26 12:03:11 +03001610const struct k_clock clock_posix_cpu = {
Andrei Vagin819a95f2019-11-12 01:26:54 +00001611 .clock_getres = posix_cpu_clock_getres,
1612 .clock_set = posix_cpu_clock_set,
1613 .clock_get_timespec = posix_cpu_clock_get,
1614 .timer_create = posix_cpu_timer_create,
1615 .nsleep = posix_cpu_nsleep,
1616 .timer_set = posix_cpu_timer_set,
1617 .timer_del = posix_cpu_timer_del,
1618 .timer_get = posix_cpu_timer_get,
1619 .timer_rearm = posix_cpu_timer_rearm,
Thomas Gleixner19769452011-02-01 13:51:06 +00001620};
1621
Christoph Hellwigd3ba5a92017-05-26 12:03:11 +03001622const struct k_clock clock_process = {
Andrei Vagin819a95f2019-11-12 01:26:54 +00001623 .clock_getres = process_cpu_clock_getres,
1624 .clock_get_timespec = process_cpu_clock_get,
1625 .timer_create = process_cpu_timer_create,
1626 .nsleep = process_cpu_nsleep,
Christoph Hellwigd3ba5a92017-05-26 12:03:11 +03001627};
Linus Torvalds1da177e2005-04-16 15:20:36 -07001628
Christoph Hellwigd3ba5a92017-05-26 12:03:11 +03001629const struct k_clock clock_thread = {
Andrei Vagin819a95f2019-11-12 01:26:54 +00001630 .clock_getres = thread_cpu_clock_getres,
1631 .clock_get_timespec = thread_cpu_clock_get,
1632 .timer_create = thread_cpu_timer_create,
Christoph Hellwigd3ba5a92017-05-26 12:03:11 +03001633};