Roman Gushchin | 76f969e | 2019-04-19 10:03:04 -0700 | [diff] [blame] | 1 | //SPDX-License-Identifier: GPL-2.0 |
| 2 | #include <linux/cgroup.h> |
| 3 | #include <linux/sched.h> |
| 4 | #include <linux/sched/task.h> |
| 5 | #include <linux/sched/signal.h> |
| 6 | |
| 7 | #include "cgroup-internal.h" |
| 8 | |
Roman Gushchin | 4c476d8 | 2019-04-19 10:03:08 -0700 | [diff] [blame] | 9 | #include <trace/events/cgroup.h> |
| 10 | |
Roman Gushchin | 76f969e | 2019-04-19 10:03:04 -0700 | [diff] [blame] | 11 | /* |
| 12 | * Propagate the cgroup frozen state upwards by the cgroup tree. |
| 13 | */ |
| 14 | static void cgroup_propagate_frozen(struct cgroup *cgrp, bool frozen) |
| 15 | { |
| 16 | int desc = 1; |
| 17 | |
| 18 | /* |
| 19 | * If the new state is frozen, some freezing ancestor cgroups may change |
| 20 | * their state too, depending on if all their descendants are frozen. |
| 21 | * |
| 22 | * Otherwise, all ancestor cgroups are forced into the non-frozen state. |
| 23 | */ |
| 24 | while ((cgrp = cgroup_parent(cgrp))) { |
| 25 | if (frozen) { |
| 26 | cgrp->freezer.nr_frozen_descendants += desc; |
| 27 | if (!test_bit(CGRP_FROZEN, &cgrp->flags) && |
| 28 | test_bit(CGRP_FREEZE, &cgrp->flags) && |
| 29 | cgrp->freezer.nr_frozen_descendants == |
| 30 | cgrp->nr_descendants) { |
| 31 | set_bit(CGRP_FROZEN, &cgrp->flags); |
| 32 | cgroup_file_notify(&cgrp->events_file); |
Roman Gushchin | 4c476d8 | 2019-04-19 10:03:08 -0700 | [diff] [blame] | 33 | TRACE_CGROUP_PATH(notify_frozen, cgrp, 1); |
Roman Gushchin | 76f969e | 2019-04-19 10:03:04 -0700 | [diff] [blame] | 34 | desc++; |
| 35 | } |
| 36 | } else { |
| 37 | cgrp->freezer.nr_frozen_descendants -= desc; |
| 38 | if (test_bit(CGRP_FROZEN, &cgrp->flags)) { |
| 39 | clear_bit(CGRP_FROZEN, &cgrp->flags); |
| 40 | cgroup_file_notify(&cgrp->events_file); |
Roman Gushchin | 4c476d8 | 2019-04-19 10:03:08 -0700 | [diff] [blame] | 41 | TRACE_CGROUP_PATH(notify_frozen, cgrp, 0); |
Roman Gushchin | 76f969e | 2019-04-19 10:03:04 -0700 | [diff] [blame] | 42 | desc++; |
| 43 | } |
| 44 | } |
| 45 | } |
| 46 | } |
| 47 | |
| 48 | /* |
| 49 | * Revisit the cgroup frozen state. |
| 50 | * Checks if the cgroup is really frozen and perform all state transitions. |
| 51 | */ |
| 52 | void cgroup_update_frozen(struct cgroup *cgrp) |
| 53 | { |
| 54 | bool frozen; |
| 55 | |
| 56 | lockdep_assert_held(&css_set_lock); |
| 57 | |
| 58 | /* |
| 59 | * If the cgroup has to be frozen (CGRP_FREEZE bit set), |
| 60 | * and all tasks are frozen and/or stopped, let's consider |
| 61 | * the cgroup frozen. Otherwise it's not frozen. |
| 62 | */ |
| 63 | frozen = test_bit(CGRP_FREEZE, &cgrp->flags) && |
| 64 | cgrp->freezer.nr_frozen_tasks == __cgroup_task_count(cgrp); |
| 65 | |
| 66 | if (frozen) { |
| 67 | /* Already there? */ |
| 68 | if (test_bit(CGRP_FROZEN, &cgrp->flags)) |
| 69 | return; |
| 70 | |
| 71 | set_bit(CGRP_FROZEN, &cgrp->flags); |
| 72 | } else { |
| 73 | /* Already there? */ |
| 74 | if (!test_bit(CGRP_FROZEN, &cgrp->flags)) |
| 75 | return; |
| 76 | |
| 77 | clear_bit(CGRP_FROZEN, &cgrp->flags); |
| 78 | } |
| 79 | cgroup_file_notify(&cgrp->events_file); |
Roman Gushchin | 4c476d8 | 2019-04-19 10:03:08 -0700 | [diff] [blame] | 80 | TRACE_CGROUP_PATH(notify_frozen, cgrp, frozen); |
Roman Gushchin | 76f969e | 2019-04-19 10:03:04 -0700 | [diff] [blame] | 81 | |
| 82 | /* Update the state of ancestor cgroups. */ |
| 83 | cgroup_propagate_frozen(cgrp, frozen); |
| 84 | } |
| 85 | |
| 86 | /* |
| 87 | * Increment cgroup's nr_frozen_tasks. |
| 88 | */ |
| 89 | static void cgroup_inc_frozen_cnt(struct cgroup *cgrp) |
| 90 | { |
| 91 | cgrp->freezer.nr_frozen_tasks++; |
| 92 | } |
| 93 | |
| 94 | /* |
| 95 | * Decrement cgroup's nr_frozen_tasks. |
| 96 | */ |
| 97 | static void cgroup_dec_frozen_cnt(struct cgroup *cgrp) |
| 98 | { |
| 99 | cgrp->freezer.nr_frozen_tasks--; |
| 100 | WARN_ON_ONCE(cgrp->freezer.nr_frozen_tasks < 0); |
| 101 | } |
| 102 | |
| 103 | /* |
| 104 | * Enter frozen/stopped state, if not yet there. Update cgroup's counters, |
| 105 | * and revisit the state of the cgroup, if necessary. |
| 106 | */ |
| 107 | void cgroup_enter_frozen(void) |
| 108 | { |
| 109 | struct cgroup *cgrp; |
| 110 | |
| 111 | if (current->frozen) |
| 112 | return; |
| 113 | |
| 114 | spin_lock_irq(&css_set_lock); |
| 115 | current->frozen = true; |
| 116 | cgrp = task_dfl_cgroup(current); |
| 117 | cgroup_inc_frozen_cnt(cgrp); |
| 118 | cgroup_update_frozen(cgrp); |
| 119 | spin_unlock_irq(&css_set_lock); |
| 120 | } |
| 121 | |
| 122 | /* |
| 123 | * Conditionally leave frozen/stopped state. Update cgroup's counters, |
| 124 | * and revisit the state of the cgroup, if necessary. |
| 125 | * |
| 126 | * If always_leave is not set, and the cgroup is freezing, |
| 127 | * we're racing with the cgroup freezing. In this case, we don't |
| 128 | * drop the frozen counter to avoid a transient switch to |
| 129 | * the unfrozen state. |
| 130 | */ |
| 131 | void cgroup_leave_frozen(bool always_leave) |
| 132 | { |
| 133 | struct cgroup *cgrp; |
| 134 | |
| 135 | spin_lock_irq(&css_set_lock); |
| 136 | cgrp = task_dfl_cgroup(current); |
| 137 | if (always_leave || !test_bit(CGRP_FREEZE, &cgrp->flags)) { |
| 138 | cgroup_dec_frozen_cnt(cgrp); |
| 139 | cgroup_update_frozen(cgrp); |
| 140 | WARN_ON_ONCE(!current->frozen); |
| 141 | current->frozen = false; |
Roman Gushchin | cb2c4cd | 2019-04-26 10:59:44 -0700 | [diff] [blame] | 142 | } else if (!(current->jobctl & JOBCTL_TRAP_FREEZE)) { |
| 143 | spin_lock(¤t->sighand->siglock); |
| 144 | current->jobctl |= JOBCTL_TRAP_FREEZE; |
| 145 | set_thread_flag(TIF_SIGPENDING); |
| 146 | spin_unlock(¤t->sighand->siglock); |
Roman Gushchin | 76f969e | 2019-04-19 10:03:04 -0700 | [diff] [blame] | 147 | } |
| 148 | spin_unlock_irq(&css_set_lock); |
Roman Gushchin | 76f969e | 2019-04-19 10:03:04 -0700 | [diff] [blame] | 149 | } |
| 150 | |
| 151 | /* |
| 152 | * Freeze or unfreeze the task by setting or clearing the JOBCTL_TRAP_FREEZE |
| 153 | * jobctl bit. |
| 154 | */ |
| 155 | static void cgroup_freeze_task(struct task_struct *task, bool freeze) |
| 156 | { |
| 157 | unsigned long flags; |
| 158 | |
| 159 | /* If the task is about to die, don't bother with freezing it. */ |
| 160 | if (!lock_task_sighand(task, &flags)) |
| 161 | return; |
| 162 | |
| 163 | if (freeze) { |
| 164 | task->jobctl |= JOBCTL_TRAP_FREEZE; |
| 165 | signal_wake_up(task, false); |
| 166 | } else { |
| 167 | task->jobctl &= ~JOBCTL_TRAP_FREEZE; |
| 168 | wake_up_process(task); |
| 169 | } |
| 170 | |
| 171 | unlock_task_sighand(task, &flags); |
| 172 | } |
| 173 | |
| 174 | /* |
| 175 | * Freeze or unfreeze all tasks in the given cgroup. |
| 176 | */ |
| 177 | static void cgroup_do_freeze(struct cgroup *cgrp, bool freeze) |
| 178 | { |
| 179 | struct css_task_iter it; |
| 180 | struct task_struct *task; |
| 181 | |
| 182 | lockdep_assert_held(&cgroup_mutex); |
| 183 | |
| 184 | spin_lock_irq(&css_set_lock); |
| 185 | if (freeze) |
| 186 | set_bit(CGRP_FREEZE, &cgrp->flags); |
| 187 | else |
| 188 | clear_bit(CGRP_FREEZE, &cgrp->flags); |
| 189 | spin_unlock_irq(&css_set_lock); |
| 190 | |
Roman Gushchin | 4c476d8 | 2019-04-19 10:03:08 -0700 | [diff] [blame] | 191 | if (freeze) |
| 192 | TRACE_CGROUP_PATH(freeze, cgrp); |
| 193 | else |
| 194 | TRACE_CGROUP_PATH(unfreeze, cgrp); |
| 195 | |
Roman Gushchin | 76f969e | 2019-04-19 10:03:04 -0700 | [diff] [blame] | 196 | css_task_iter_start(&cgrp->self, 0, &it); |
| 197 | while ((task = css_task_iter_next(&it))) { |
| 198 | /* |
| 199 | * Ignore kernel threads here. Freezing cgroups containing |
| 200 | * kthreads isn't supported. |
| 201 | */ |
| 202 | if (task->flags & PF_KTHREAD) |
| 203 | continue; |
| 204 | cgroup_freeze_task(task, freeze); |
| 205 | } |
| 206 | css_task_iter_end(&it); |
| 207 | |
| 208 | /* |
| 209 | * Cgroup state should be revisited here to cover empty leaf cgroups |
| 210 | * and cgroups which descendants are already in the desired state. |
| 211 | */ |
| 212 | spin_lock_irq(&css_set_lock); |
| 213 | if (cgrp->nr_descendants == cgrp->freezer.nr_frozen_descendants) |
| 214 | cgroup_update_frozen(cgrp); |
| 215 | spin_unlock_irq(&css_set_lock); |
| 216 | } |
| 217 | |
| 218 | /* |
| 219 | * Adjust the task state (freeze or unfreeze) and revisit the state of |
| 220 | * source and destination cgroups. |
| 221 | */ |
| 222 | void cgroup_freezer_migrate_task(struct task_struct *task, |
| 223 | struct cgroup *src, struct cgroup *dst) |
| 224 | { |
| 225 | lockdep_assert_held(&css_set_lock); |
| 226 | |
| 227 | /* |
| 228 | * Kernel threads are not supposed to be frozen at all. |
| 229 | */ |
| 230 | if (task->flags & PF_KTHREAD) |
| 231 | return; |
| 232 | |
| 233 | /* |
Honglei Wang | 742e8cd | 2019-10-30 16:18:10 +0800 | [diff] [blame^] | 234 | * It's not necessary to do changes if both of the src and dst cgroups |
| 235 | * are not freezing and task is not frozen. |
| 236 | */ |
| 237 | if (!test_bit(CGRP_FREEZE, &src->flags) && |
| 238 | !test_bit(CGRP_FREEZE, &dst->flags) && |
| 239 | !task->frozen) |
| 240 | return; |
| 241 | |
| 242 | /* |
Roman Gushchin | 76f969e | 2019-04-19 10:03:04 -0700 | [diff] [blame] | 243 | * Adjust counters of freezing and frozen tasks. |
| 244 | * Note, that if the task is frozen, but the destination cgroup is not |
| 245 | * frozen, we bump both counters to keep them balanced. |
| 246 | */ |
| 247 | if (task->frozen) { |
| 248 | cgroup_inc_frozen_cnt(dst); |
| 249 | cgroup_dec_frozen_cnt(src); |
| 250 | } |
| 251 | cgroup_update_frozen(dst); |
| 252 | cgroup_update_frozen(src); |
| 253 | |
| 254 | /* |
| 255 | * Force the task to the desired state. |
| 256 | */ |
| 257 | cgroup_freeze_task(task, test_bit(CGRP_FREEZE, &dst->flags)); |
| 258 | } |
| 259 | |
Roman Gushchin | 76f969e | 2019-04-19 10:03:04 -0700 | [diff] [blame] | 260 | void cgroup_freeze(struct cgroup *cgrp, bool freeze) |
| 261 | { |
| 262 | struct cgroup_subsys_state *css; |
| 263 | struct cgroup *dsct; |
| 264 | bool applied = false; |
| 265 | |
| 266 | lockdep_assert_held(&cgroup_mutex); |
| 267 | |
| 268 | /* |
| 269 | * Nothing changed? Just exit. |
| 270 | */ |
| 271 | if (cgrp->freezer.freeze == freeze) |
| 272 | return; |
| 273 | |
| 274 | cgrp->freezer.freeze = freeze; |
| 275 | |
| 276 | /* |
| 277 | * Propagate changes downwards the cgroup tree. |
| 278 | */ |
| 279 | css_for_each_descendant_pre(css, &cgrp->self) { |
| 280 | dsct = css->cgroup; |
| 281 | |
| 282 | if (cgroup_is_dead(dsct)) |
| 283 | continue; |
| 284 | |
| 285 | if (freeze) { |
| 286 | dsct->freezer.e_freeze++; |
| 287 | /* |
| 288 | * Already frozen because of ancestor's settings? |
| 289 | */ |
| 290 | if (dsct->freezer.e_freeze > 1) |
| 291 | continue; |
| 292 | } else { |
| 293 | dsct->freezer.e_freeze--; |
| 294 | /* |
| 295 | * Still frozen because of ancestor's settings? |
| 296 | */ |
| 297 | if (dsct->freezer.e_freeze > 0) |
| 298 | continue; |
| 299 | |
| 300 | WARN_ON_ONCE(dsct->freezer.e_freeze < 0); |
| 301 | } |
| 302 | |
| 303 | /* |
| 304 | * Do change actual state: freeze or unfreeze. |
| 305 | */ |
| 306 | cgroup_do_freeze(dsct, freeze); |
| 307 | applied = true; |
| 308 | } |
| 309 | |
| 310 | /* |
| 311 | * Even if the actual state hasn't changed, let's notify a user. |
| 312 | * The state can be enforced by an ancestor cgroup: the cgroup |
| 313 | * can already be in the desired state or it can be locked in the |
| 314 | * opposite state, so that the transition will never happen. |
| 315 | * In both cases it's better to notify a user, that there is |
| 316 | * nothing to wait for. |
| 317 | */ |
Roman Gushchin | 4c476d8 | 2019-04-19 10:03:08 -0700 | [diff] [blame] | 318 | if (!applied) { |
| 319 | TRACE_CGROUP_PATH(notify_frozen, cgrp, |
| 320 | test_bit(CGRP_FROZEN, &cgrp->flags)); |
Roman Gushchin | 76f969e | 2019-04-19 10:03:04 -0700 | [diff] [blame] | 321 | cgroup_file_notify(&cgrp->events_file); |
Roman Gushchin | 4c476d8 | 2019-04-19 10:03:08 -0700 | [diff] [blame] | 322 | } |
Roman Gushchin | 76f969e | 2019-04-19 10:03:04 -0700 | [diff] [blame] | 323 | } |