Thomas Gleixner | f85d208 | 2019-06-04 10:10:45 +0200 | [diff] [blame] | 1 | // SPDX-License-Identifier: GPL-2.0-only |
Parav Pandit | 39d3e75 | 2017-01-10 00:02:13 +0000 | [diff] [blame] | 2 | /* |
| 3 | * RDMA resource limiting controller for cgroups. |
| 4 | * |
| 5 | * Used to allow a cgroup hierarchy to stop processes from consuming |
| 6 | * additional RDMA resources after a certain limit is reached. |
| 7 | * |
| 8 | * Copyright (C) 2016 Parav Pandit <pandit.parav@gmail.com> |
Parav Pandit | 39d3e75 | 2017-01-10 00:02:13 +0000 | [diff] [blame] | 9 | */ |
| 10 | |
| 11 | #include <linux/bitops.h> |
| 12 | #include <linux/slab.h> |
| 13 | #include <linux/seq_file.h> |
| 14 | #include <linux/cgroup.h> |
| 15 | #include <linux/parser.h> |
| 16 | #include <linux/cgroup_rdma.h> |
| 17 | |
| 18 | #define RDMACG_MAX_STR "max" |
| 19 | |
| 20 | /* |
| 21 | * Protects list of resource pools maintained on per cgroup basis |
| 22 | * and rdma device list. |
| 23 | */ |
| 24 | static DEFINE_MUTEX(rdmacg_mutex); |
| 25 | static LIST_HEAD(rdmacg_devices); |
| 26 | |
| 27 | enum rdmacg_file_type { |
| 28 | RDMACG_RESOURCE_TYPE_MAX, |
| 29 | RDMACG_RESOURCE_TYPE_STAT, |
| 30 | }; |
| 31 | |
| 32 | /* |
| 33 | * resource table definition as to be seen by the user. |
| 34 | * Need to add entries to it when more resources are |
| 35 | * added/defined at IB verb/core layer. |
| 36 | */ |
| 37 | static char const *rdmacg_resource_names[] = { |
| 38 | [RDMACG_RESOURCE_HCA_HANDLE] = "hca_handle", |
| 39 | [RDMACG_RESOURCE_HCA_OBJECT] = "hca_object", |
| 40 | }; |
| 41 | |
| 42 | /* resource tracker for each resource of rdma cgroup */ |
| 43 | struct rdmacg_resource { |
| 44 | int max; |
| 45 | int usage; |
| 46 | }; |
| 47 | |
| 48 | /* |
| 49 | * resource pool object which represents per cgroup, per device |
| 50 | * resources. There are multiple instances of this object per cgroup, |
| 51 | * therefore it cannot be embedded within rdma_cgroup structure. It |
| 52 | * is maintained as list. |
| 53 | */ |
| 54 | struct rdmacg_resource_pool { |
| 55 | struct rdmacg_device *device; |
| 56 | struct rdmacg_resource resources[RDMACG_RESOURCE_MAX]; |
| 57 | |
| 58 | struct list_head cg_node; |
| 59 | struct list_head dev_node; |
| 60 | |
| 61 | /* count active user tasks of this pool */ |
| 62 | u64 usage_sum; |
| 63 | /* total number counts which are set to max */ |
| 64 | int num_max_cnt; |
| 65 | }; |
| 66 | |
| 67 | static struct rdma_cgroup *css_rdmacg(struct cgroup_subsys_state *css) |
| 68 | { |
| 69 | return container_of(css, struct rdma_cgroup, css); |
| 70 | } |
| 71 | |
| 72 | static struct rdma_cgroup *parent_rdmacg(struct rdma_cgroup *cg) |
| 73 | { |
| 74 | return css_rdmacg(cg->css.parent); |
| 75 | } |
| 76 | |
| 77 | static inline struct rdma_cgroup *get_current_rdmacg(void) |
| 78 | { |
| 79 | return css_rdmacg(task_get_css(current, rdma_cgrp_id)); |
| 80 | } |
| 81 | |
| 82 | static void set_resource_limit(struct rdmacg_resource_pool *rpool, |
| 83 | int index, int new_max) |
| 84 | { |
| 85 | if (new_max == S32_MAX) { |
| 86 | if (rpool->resources[index].max != S32_MAX) |
| 87 | rpool->num_max_cnt++; |
| 88 | } else { |
| 89 | if (rpool->resources[index].max == S32_MAX) |
| 90 | rpool->num_max_cnt--; |
| 91 | } |
| 92 | rpool->resources[index].max = new_max; |
| 93 | } |
| 94 | |
| 95 | static void set_all_resource_max_limit(struct rdmacg_resource_pool *rpool) |
| 96 | { |
| 97 | int i; |
| 98 | |
| 99 | for (i = 0; i < RDMACG_RESOURCE_MAX; i++) |
| 100 | set_resource_limit(rpool, i, S32_MAX); |
| 101 | } |
| 102 | |
| 103 | static void free_cg_rpool_locked(struct rdmacg_resource_pool *rpool) |
| 104 | { |
| 105 | lockdep_assert_held(&rdmacg_mutex); |
| 106 | |
| 107 | list_del(&rpool->cg_node); |
| 108 | list_del(&rpool->dev_node); |
| 109 | kfree(rpool); |
| 110 | } |
| 111 | |
| 112 | static struct rdmacg_resource_pool * |
| 113 | find_cg_rpool_locked(struct rdma_cgroup *cg, |
| 114 | struct rdmacg_device *device) |
| 115 | |
| 116 | { |
| 117 | struct rdmacg_resource_pool *pool; |
| 118 | |
| 119 | lockdep_assert_held(&rdmacg_mutex); |
| 120 | |
| 121 | list_for_each_entry(pool, &cg->rpools, cg_node) |
| 122 | if (pool->device == device) |
| 123 | return pool; |
| 124 | |
| 125 | return NULL; |
| 126 | } |
| 127 | |
| 128 | static struct rdmacg_resource_pool * |
| 129 | get_cg_rpool_locked(struct rdma_cgroup *cg, struct rdmacg_device *device) |
| 130 | { |
| 131 | struct rdmacg_resource_pool *rpool; |
| 132 | |
| 133 | rpool = find_cg_rpool_locked(cg, device); |
| 134 | if (rpool) |
| 135 | return rpool; |
| 136 | |
| 137 | rpool = kzalloc(sizeof(*rpool), GFP_KERNEL); |
| 138 | if (!rpool) |
| 139 | return ERR_PTR(-ENOMEM); |
| 140 | |
| 141 | rpool->device = device; |
| 142 | set_all_resource_max_limit(rpool); |
| 143 | |
| 144 | INIT_LIST_HEAD(&rpool->cg_node); |
| 145 | INIT_LIST_HEAD(&rpool->dev_node); |
| 146 | list_add_tail(&rpool->cg_node, &cg->rpools); |
| 147 | list_add_tail(&rpool->dev_node, &device->rpools); |
| 148 | return rpool; |
| 149 | } |
| 150 | |
| 151 | /** |
| 152 | * uncharge_cg_locked - uncharge resource for rdma cgroup |
| 153 | * @cg: pointer to cg to uncharge and all parents in hierarchy |
| 154 | * @device: pointer to rdmacg device |
| 155 | * @index: index of the resource to uncharge in cg (resource pool) |
| 156 | * |
| 157 | * It also frees the resource pool which was created as part of |
| 158 | * charging operation when there are no resources attached to |
| 159 | * resource pool. |
| 160 | */ |
| 161 | static void |
| 162 | uncharge_cg_locked(struct rdma_cgroup *cg, |
| 163 | struct rdmacg_device *device, |
| 164 | enum rdmacg_resource_type index) |
| 165 | { |
| 166 | struct rdmacg_resource_pool *rpool; |
| 167 | |
| 168 | rpool = find_cg_rpool_locked(cg, device); |
| 169 | |
| 170 | /* |
| 171 | * rpool cannot be null at this stage. Let kernel operate in case |
| 172 | * if there a bug in IB stack or rdma controller, instead of crashing |
| 173 | * the system. |
| 174 | */ |
| 175 | if (unlikely(!rpool)) { |
| 176 | pr_warn("Invalid device %p or rdma cgroup %p\n", cg, device); |
| 177 | return; |
| 178 | } |
| 179 | |
| 180 | rpool->resources[index].usage--; |
| 181 | |
| 182 | /* |
| 183 | * A negative count (or overflow) is invalid, |
| 184 | * it indicates a bug in the rdma controller. |
| 185 | */ |
| 186 | WARN_ON_ONCE(rpool->resources[index].usage < 0); |
| 187 | rpool->usage_sum--; |
| 188 | if (rpool->usage_sum == 0 && |
| 189 | rpool->num_max_cnt == RDMACG_RESOURCE_MAX) { |
| 190 | /* |
| 191 | * No user of the rpool and all entries are set to max, so |
| 192 | * safe to delete this rpool. |
| 193 | */ |
| 194 | free_cg_rpool_locked(rpool); |
| 195 | } |
| 196 | } |
| 197 | |
| 198 | /** |
| 199 | * rdmacg_uncharge_hierarchy - hierarchically uncharge rdma resource count |
| 200 | * @device: pointer to rdmacg device |
| 201 | * @stop_cg: while traversing hirerchy, when meet with stop_cg cgroup |
| 202 | * stop uncharging |
| 203 | * @index: index of the resource to uncharge in cg in given resource pool |
| 204 | */ |
| 205 | static void rdmacg_uncharge_hierarchy(struct rdma_cgroup *cg, |
| 206 | struct rdmacg_device *device, |
| 207 | struct rdma_cgroup *stop_cg, |
| 208 | enum rdmacg_resource_type index) |
| 209 | { |
| 210 | struct rdma_cgroup *p; |
| 211 | |
| 212 | mutex_lock(&rdmacg_mutex); |
| 213 | |
| 214 | for (p = cg; p != stop_cg; p = parent_rdmacg(p)) |
| 215 | uncharge_cg_locked(p, device, index); |
| 216 | |
| 217 | mutex_unlock(&rdmacg_mutex); |
| 218 | |
| 219 | css_put(&cg->css); |
| 220 | } |
| 221 | |
| 222 | /** |
| 223 | * rdmacg_uncharge - hierarchically uncharge rdma resource count |
| 224 | * @device: pointer to rdmacg device |
| 225 | * @index: index of the resource to uncharge in cgroup in given resource pool |
| 226 | */ |
| 227 | void rdmacg_uncharge(struct rdma_cgroup *cg, |
| 228 | struct rdmacg_device *device, |
| 229 | enum rdmacg_resource_type index) |
| 230 | { |
| 231 | if (index >= RDMACG_RESOURCE_MAX) |
| 232 | return; |
| 233 | |
| 234 | rdmacg_uncharge_hierarchy(cg, device, NULL, index); |
| 235 | } |
| 236 | EXPORT_SYMBOL(rdmacg_uncharge); |
| 237 | |
| 238 | /** |
| 239 | * rdmacg_try_charge - hierarchically try to charge the rdma resource |
| 240 | * @rdmacg: pointer to rdma cgroup which will own this resource |
| 241 | * @device: pointer to rdmacg device |
| 242 | * @index: index of the resource to charge in cgroup (resource pool) |
| 243 | * |
| 244 | * This function follows charging resource in hierarchical way. |
| 245 | * It will fail if the charge would cause the new value to exceed the |
| 246 | * hierarchical limit. |
Zhen Lei | 08b2b6f | 2021-05-24 16:29:43 +0800 | [diff] [blame^] | 247 | * Returns 0 if the charge succeeded, otherwise -EAGAIN, -ENOMEM or -EINVAL. |
Parav Pandit | 39d3e75 | 2017-01-10 00:02:13 +0000 | [diff] [blame] | 248 | * Returns pointer to rdmacg for this resource when charging is successful. |
| 249 | * |
| 250 | * Charger needs to account resources on two criteria. |
| 251 | * (a) per cgroup & (b) per device resource usage. |
| 252 | * Per cgroup resource usage ensures that tasks of cgroup doesn't cross |
| 253 | * the configured limits. Per device provides granular configuration |
| 254 | * in multi device usage. It allocates resource pool in the hierarchy |
| 255 | * for each parent it come across for first resource. Later on resource |
| 256 | * pool will be available. Therefore it will be much faster thereon |
| 257 | * to charge/uncharge. |
| 258 | */ |
| 259 | int rdmacg_try_charge(struct rdma_cgroup **rdmacg, |
| 260 | struct rdmacg_device *device, |
| 261 | enum rdmacg_resource_type index) |
| 262 | { |
| 263 | struct rdma_cgroup *cg, *p; |
| 264 | struct rdmacg_resource_pool *rpool; |
| 265 | s64 new; |
| 266 | int ret = 0; |
| 267 | |
| 268 | if (index >= RDMACG_RESOURCE_MAX) |
| 269 | return -EINVAL; |
| 270 | |
| 271 | /* |
| 272 | * hold on to css, as cgroup can be removed but resource |
| 273 | * accounting happens on css. |
| 274 | */ |
| 275 | cg = get_current_rdmacg(); |
| 276 | |
| 277 | mutex_lock(&rdmacg_mutex); |
| 278 | for (p = cg; p; p = parent_rdmacg(p)) { |
| 279 | rpool = get_cg_rpool_locked(p, device); |
| 280 | if (IS_ERR(rpool)) { |
| 281 | ret = PTR_ERR(rpool); |
| 282 | goto err; |
| 283 | } else { |
| 284 | new = rpool->resources[index].usage + 1; |
| 285 | if (new > rpool->resources[index].max) { |
| 286 | ret = -EAGAIN; |
| 287 | goto err; |
| 288 | } else { |
| 289 | rpool->resources[index].usage = new; |
| 290 | rpool->usage_sum++; |
| 291 | } |
| 292 | } |
| 293 | } |
| 294 | mutex_unlock(&rdmacg_mutex); |
| 295 | |
| 296 | *rdmacg = cg; |
| 297 | return 0; |
| 298 | |
| 299 | err: |
| 300 | mutex_unlock(&rdmacg_mutex); |
| 301 | rdmacg_uncharge_hierarchy(cg, device, p, index); |
| 302 | return ret; |
| 303 | } |
| 304 | EXPORT_SYMBOL(rdmacg_try_charge); |
| 305 | |
| 306 | /** |
| 307 | * rdmacg_register_device - register rdmacg device to rdma controller. |
| 308 | * @device: pointer to rdmacg device whose resources need to be accounted. |
| 309 | * |
| 310 | * If IB stack wish a device to participate in rdma cgroup resource |
| 311 | * tracking, it must invoke this API to register with rdma cgroup before |
| 312 | * any user space application can start using the RDMA resources. |
Parav Pandit | 39d3e75 | 2017-01-10 00:02:13 +0000 | [diff] [blame] | 313 | */ |
Parav Pandit | 7527a7b | 2019-01-17 20:14:15 +0200 | [diff] [blame] | 314 | void rdmacg_register_device(struct rdmacg_device *device) |
Parav Pandit | 39d3e75 | 2017-01-10 00:02:13 +0000 | [diff] [blame] | 315 | { |
| 316 | INIT_LIST_HEAD(&device->dev_node); |
| 317 | INIT_LIST_HEAD(&device->rpools); |
| 318 | |
| 319 | mutex_lock(&rdmacg_mutex); |
| 320 | list_add_tail(&device->dev_node, &rdmacg_devices); |
| 321 | mutex_unlock(&rdmacg_mutex); |
Parav Pandit | 39d3e75 | 2017-01-10 00:02:13 +0000 | [diff] [blame] | 322 | } |
| 323 | EXPORT_SYMBOL(rdmacg_register_device); |
| 324 | |
| 325 | /** |
| 326 | * rdmacg_unregister_device - unregister rdmacg device from rdma controller. |
| 327 | * @device: pointer to rdmacg device which was previously registered with rdma |
| 328 | * controller using rdmacg_register_device(). |
| 329 | * |
| 330 | * IB stack must invoke this after all the resources of the IB device |
| 331 | * are destroyed and after ensuring that no more resources will be created |
| 332 | * when this API is invoked. |
| 333 | */ |
| 334 | void rdmacg_unregister_device(struct rdmacg_device *device) |
| 335 | { |
| 336 | struct rdmacg_resource_pool *rpool, *tmp; |
| 337 | |
| 338 | /* |
| 339 | * Synchronize with any active resource settings, |
| 340 | * usage query happening via configfs. |
| 341 | */ |
| 342 | mutex_lock(&rdmacg_mutex); |
| 343 | list_del_init(&device->dev_node); |
| 344 | |
| 345 | /* |
| 346 | * Now that this device is off the cgroup list, its safe to free |
| 347 | * all the rpool resources. |
| 348 | */ |
| 349 | list_for_each_entry_safe(rpool, tmp, &device->rpools, dev_node) |
| 350 | free_cg_rpool_locked(rpool); |
| 351 | |
| 352 | mutex_unlock(&rdmacg_mutex); |
| 353 | } |
| 354 | EXPORT_SYMBOL(rdmacg_unregister_device); |
| 355 | |
| 356 | static int parse_resource(char *c, int *intval) |
| 357 | { |
| 358 | substring_t argstr; |
Parav Pandit | 39d3e75 | 2017-01-10 00:02:13 +0000 | [diff] [blame] | 359 | char *name, *value = c; |
| 360 | size_t len; |
Andy Shevchenko | cc659e76 | 2018-05-04 17:42:16 +0300 | [diff] [blame] | 361 | int ret, i; |
Parav Pandit | 39d3e75 | 2017-01-10 00:02:13 +0000 | [diff] [blame] | 362 | |
| 363 | name = strsep(&value, "="); |
| 364 | if (!name || !value) |
| 365 | return -EINVAL; |
| 366 | |
Andy Shevchenko | cc659e76 | 2018-05-04 17:42:16 +0300 | [diff] [blame] | 367 | i = match_string(rdmacg_resource_names, RDMACG_RESOURCE_MAX, name); |
| 368 | if (i < 0) |
| 369 | return i; |
| 370 | |
Parav Pandit | 39d3e75 | 2017-01-10 00:02:13 +0000 | [diff] [blame] | 371 | len = strlen(value); |
| 372 | |
Andy Shevchenko | cc659e76 | 2018-05-04 17:42:16 +0300 | [diff] [blame] | 373 | argstr.from = value; |
| 374 | argstr.to = value + len; |
Parav Pandit | 39d3e75 | 2017-01-10 00:02:13 +0000 | [diff] [blame] | 375 | |
Andy Shevchenko | cc659e76 | 2018-05-04 17:42:16 +0300 | [diff] [blame] | 376 | ret = match_int(&argstr, intval); |
| 377 | if (ret >= 0) { |
| 378 | if (*intval < 0) |
| 379 | return -EINVAL; |
| 380 | return i; |
| 381 | } |
| 382 | if (strncmp(value, RDMACG_MAX_STR, len) == 0) { |
| 383 | *intval = S32_MAX; |
| 384 | return i; |
Parav Pandit | 39d3e75 | 2017-01-10 00:02:13 +0000 | [diff] [blame] | 385 | } |
| 386 | return -EINVAL; |
| 387 | } |
| 388 | |
| 389 | static int rdmacg_parse_limits(char *options, |
| 390 | int *new_limits, unsigned long *enables) |
| 391 | { |
| 392 | char *c; |
| 393 | int err = -EINVAL; |
| 394 | |
| 395 | /* parse resource options */ |
| 396 | while ((c = strsep(&options, " ")) != NULL) { |
| 397 | int index, intval; |
| 398 | |
| 399 | index = parse_resource(c, &intval); |
| 400 | if (index < 0) |
| 401 | goto err; |
| 402 | |
| 403 | new_limits[index] = intval; |
| 404 | *enables |= BIT(index); |
| 405 | } |
| 406 | return 0; |
| 407 | |
| 408 | err: |
| 409 | return err; |
| 410 | } |
| 411 | |
| 412 | static struct rdmacg_device *rdmacg_get_device_locked(const char *name) |
| 413 | { |
| 414 | struct rdmacg_device *device; |
| 415 | |
| 416 | lockdep_assert_held(&rdmacg_mutex); |
| 417 | |
| 418 | list_for_each_entry(device, &rdmacg_devices, dev_node) |
| 419 | if (!strcmp(name, device->name)) |
| 420 | return device; |
| 421 | |
| 422 | return NULL; |
| 423 | } |
| 424 | |
| 425 | static ssize_t rdmacg_resource_set_max(struct kernfs_open_file *of, |
| 426 | char *buf, size_t nbytes, loff_t off) |
| 427 | { |
| 428 | struct rdma_cgroup *cg = css_rdmacg(of_css(of)); |
| 429 | const char *dev_name; |
| 430 | struct rdmacg_resource_pool *rpool; |
| 431 | struct rdmacg_device *device; |
| 432 | char *options = strstrip(buf); |
| 433 | int *new_limits; |
| 434 | unsigned long enables = 0; |
| 435 | int i = 0, ret = 0; |
| 436 | |
| 437 | /* extract the device name first */ |
| 438 | dev_name = strsep(&options, " "); |
| 439 | if (!dev_name) { |
| 440 | ret = -EINVAL; |
| 441 | goto err; |
| 442 | } |
| 443 | |
| 444 | new_limits = kcalloc(RDMACG_RESOURCE_MAX, sizeof(int), GFP_KERNEL); |
| 445 | if (!new_limits) { |
| 446 | ret = -ENOMEM; |
| 447 | goto err; |
| 448 | } |
| 449 | |
| 450 | ret = rdmacg_parse_limits(options, new_limits, &enables); |
| 451 | if (ret) |
| 452 | goto parse_err; |
| 453 | |
| 454 | /* acquire lock to synchronize with hot plug devices */ |
| 455 | mutex_lock(&rdmacg_mutex); |
| 456 | |
| 457 | device = rdmacg_get_device_locked(dev_name); |
| 458 | if (!device) { |
| 459 | ret = -ENODEV; |
| 460 | goto dev_err; |
| 461 | } |
| 462 | |
| 463 | rpool = get_cg_rpool_locked(cg, device); |
| 464 | if (IS_ERR(rpool)) { |
| 465 | ret = PTR_ERR(rpool); |
| 466 | goto dev_err; |
| 467 | } |
| 468 | |
| 469 | /* now set the new limits of the rpool */ |
| 470 | for_each_set_bit(i, &enables, RDMACG_RESOURCE_MAX) |
| 471 | set_resource_limit(rpool, i, new_limits[i]); |
| 472 | |
| 473 | if (rpool->usage_sum == 0 && |
| 474 | rpool->num_max_cnt == RDMACG_RESOURCE_MAX) { |
| 475 | /* |
| 476 | * No user of the rpool and all entries are set to max, so |
| 477 | * safe to delete this rpool. |
| 478 | */ |
| 479 | free_cg_rpool_locked(rpool); |
| 480 | } |
| 481 | |
| 482 | dev_err: |
| 483 | mutex_unlock(&rdmacg_mutex); |
| 484 | |
| 485 | parse_err: |
| 486 | kfree(new_limits); |
| 487 | |
| 488 | err: |
| 489 | return ret ?: nbytes; |
| 490 | } |
| 491 | |
| 492 | static void print_rpool_values(struct seq_file *sf, |
| 493 | struct rdmacg_resource_pool *rpool) |
| 494 | { |
| 495 | enum rdmacg_file_type sf_type; |
| 496 | int i; |
| 497 | u32 value; |
| 498 | |
| 499 | sf_type = seq_cft(sf)->private; |
| 500 | |
| 501 | for (i = 0; i < RDMACG_RESOURCE_MAX; i++) { |
| 502 | seq_puts(sf, rdmacg_resource_names[i]); |
| 503 | seq_putc(sf, '='); |
| 504 | if (sf_type == RDMACG_RESOURCE_TYPE_MAX) { |
| 505 | if (rpool) |
| 506 | value = rpool->resources[i].max; |
| 507 | else |
| 508 | value = S32_MAX; |
| 509 | } else { |
| 510 | if (rpool) |
| 511 | value = rpool->resources[i].usage; |
Parav Pandit | 7896dfb | 2017-01-10 17:51:48 +0000 | [diff] [blame] | 512 | else |
| 513 | value = 0; |
Parav Pandit | 39d3e75 | 2017-01-10 00:02:13 +0000 | [diff] [blame] | 514 | } |
| 515 | |
| 516 | if (value == S32_MAX) |
| 517 | seq_puts(sf, RDMACG_MAX_STR); |
| 518 | else |
| 519 | seq_printf(sf, "%d", value); |
| 520 | seq_putc(sf, ' '); |
| 521 | } |
| 522 | } |
| 523 | |
| 524 | static int rdmacg_resource_read(struct seq_file *sf, void *v) |
| 525 | { |
| 526 | struct rdmacg_device *device; |
| 527 | struct rdmacg_resource_pool *rpool; |
| 528 | struct rdma_cgroup *cg = css_rdmacg(seq_css(sf)); |
| 529 | |
| 530 | mutex_lock(&rdmacg_mutex); |
| 531 | |
| 532 | list_for_each_entry(device, &rdmacg_devices, dev_node) { |
| 533 | seq_printf(sf, "%s ", device->name); |
| 534 | |
| 535 | rpool = find_cg_rpool_locked(cg, device); |
| 536 | print_rpool_values(sf, rpool); |
| 537 | |
| 538 | seq_putc(sf, '\n'); |
| 539 | } |
| 540 | |
| 541 | mutex_unlock(&rdmacg_mutex); |
| 542 | return 0; |
| 543 | } |
| 544 | |
| 545 | static struct cftype rdmacg_files[] = { |
| 546 | { |
| 547 | .name = "max", |
| 548 | .write = rdmacg_resource_set_max, |
| 549 | .seq_show = rdmacg_resource_read, |
| 550 | .private = RDMACG_RESOURCE_TYPE_MAX, |
| 551 | .flags = CFTYPE_NOT_ON_ROOT, |
| 552 | }, |
| 553 | { |
| 554 | .name = "current", |
| 555 | .seq_show = rdmacg_resource_read, |
| 556 | .private = RDMACG_RESOURCE_TYPE_STAT, |
| 557 | .flags = CFTYPE_NOT_ON_ROOT, |
| 558 | }, |
| 559 | { } /* terminate */ |
| 560 | }; |
| 561 | |
| 562 | static struct cgroup_subsys_state * |
| 563 | rdmacg_css_alloc(struct cgroup_subsys_state *parent) |
| 564 | { |
| 565 | struct rdma_cgroup *cg; |
| 566 | |
| 567 | cg = kzalloc(sizeof(*cg), GFP_KERNEL); |
| 568 | if (!cg) |
| 569 | return ERR_PTR(-ENOMEM); |
| 570 | |
| 571 | INIT_LIST_HEAD(&cg->rpools); |
| 572 | return &cg->css; |
| 573 | } |
| 574 | |
| 575 | static void rdmacg_css_free(struct cgroup_subsys_state *css) |
| 576 | { |
| 577 | struct rdma_cgroup *cg = css_rdmacg(css); |
| 578 | |
| 579 | kfree(cg); |
| 580 | } |
| 581 | |
| 582 | /** |
| 583 | * rdmacg_css_offline - cgroup css_offline callback |
| 584 | * @css: css of interest |
| 585 | * |
| 586 | * This function is called when @css is about to go away and responsible |
| 587 | * for shooting down all rdmacg associated with @css. As part of that it |
| 588 | * marks all the resource pool entries to max value, so that when resources are |
| 589 | * uncharged, associated resource pool can be freed as well. |
| 590 | */ |
| 591 | static void rdmacg_css_offline(struct cgroup_subsys_state *css) |
| 592 | { |
| 593 | struct rdma_cgroup *cg = css_rdmacg(css); |
| 594 | struct rdmacg_resource_pool *rpool; |
| 595 | |
| 596 | mutex_lock(&rdmacg_mutex); |
| 597 | |
| 598 | list_for_each_entry(rpool, &cg->rpools, cg_node) |
| 599 | set_all_resource_max_limit(rpool); |
| 600 | |
| 601 | mutex_unlock(&rdmacg_mutex); |
| 602 | } |
| 603 | |
| 604 | struct cgroup_subsys rdma_cgrp_subsys = { |
| 605 | .css_alloc = rdmacg_css_alloc, |
| 606 | .css_free = rdmacg_css_free, |
| 607 | .css_offline = rdmacg_css_offline, |
| 608 | .legacy_cftypes = rdmacg_files, |
| 609 | .dfl_cftypes = rdmacg_files, |
| 610 | }; |