Parav Pandit | 39d3e75 | 2017-01-10 00:02:13 +0000 | [diff] [blame] | 1 | /* |
| 2 | * RDMA resource limiting controller for cgroups. |
| 3 | * |
| 4 | * Used to allow a cgroup hierarchy to stop processes from consuming |
| 5 | * additional RDMA resources after a certain limit is reached. |
| 6 | * |
| 7 | * Copyright (C) 2016 Parav Pandit <pandit.parav@gmail.com> |
| 8 | * |
| 9 | * This file is subject to the terms and conditions of version 2 of the GNU |
| 10 | * General Public License. See the file COPYING in the main directory of the |
| 11 | * Linux distribution for more details. |
| 12 | */ |
| 13 | |
| 14 | #include <linux/bitops.h> |
| 15 | #include <linux/slab.h> |
| 16 | #include <linux/seq_file.h> |
| 17 | #include <linux/cgroup.h> |
| 18 | #include <linux/parser.h> |
| 19 | #include <linux/cgroup_rdma.h> |
| 20 | |
| 21 | #define RDMACG_MAX_STR "max" |
| 22 | |
| 23 | /* |
| 24 | * Protects list of resource pools maintained on per cgroup basis |
| 25 | * and rdma device list. |
| 26 | */ |
| 27 | static DEFINE_MUTEX(rdmacg_mutex); |
| 28 | static LIST_HEAD(rdmacg_devices); |
| 29 | |
| 30 | enum rdmacg_file_type { |
| 31 | RDMACG_RESOURCE_TYPE_MAX, |
| 32 | RDMACG_RESOURCE_TYPE_STAT, |
| 33 | }; |
| 34 | |
| 35 | /* |
| 36 | * resource table definition as to be seen by the user. |
| 37 | * Need to add entries to it when more resources are |
| 38 | * added/defined at IB verb/core layer. |
| 39 | */ |
| 40 | static char const *rdmacg_resource_names[] = { |
| 41 | [RDMACG_RESOURCE_HCA_HANDLE] = "hca_handle", |
| 42 | [RDMACG_RESOURCE_HCA_OBJECT] = "hca_object", |
| 43 | }; |
| 44 | |
| 45 | /* resource tracker for each resource of rdma cgroup */ |
| 46 | struct rdmacg_resource { |
| 47 | int max; |
| 48 | int usage; |
| 49 | }; |
| 50 | |
| 51 | /* |
| 52 | * resource pool object which represents per cgroup, per device |
| 53 | * resources. There are multiple instances of this object per cgroup, |
| 54 | * therefore it cannot be embedded within rdma_cgroup structure. It |
| 55 | * is maintained as list. |
| 56 | */ |
| 57 | struct rdmacg_resource_pool { |
| 58 | struct rdmacg_device *device; |
| 59 | struct rdmacg_resource resources[RDMACG_RESOURCE_MAX]; |
| 60 | |
| 61 | struct list_head cg_node; |
| 62 | struct list_head dev_node; |
| 63 | |
| 64 | /* count active user tasks of this pool */ |
| 65 | u64 usage_sum; |
| 66 | /* total number counts which are set to max */ |
| 67 | int num_max_cnt; |
| 68 | }; |
| 69 | |
| 70 | static struct rdma_cgroup *css_rdmacg(struct cgroup_subsys_state *css) |
| 71 | { |
| 72 | return container_of(css, struct rdma_cgroup, css); |
| 73 | } |
| 74 | |
| 75 | static struct rdma_cgroup *parent_rdmacg(struct rdma_cgroup *cg) |
| 76 | { |
| 77 | return css_rdmacg(cg->css.parent); |
| 78 | } |
| 79 | |
| 80 | static inline struct rdma_cgroup *get_current_rdmacg(void) |
| 81 | { |
| 82 | return css_rdmacg(task_get_css(current, rdma_cgrp_id)); |
| 83 | } |
| 84 | |
| 85 | static void set_resource_limit(struct rdmacg_resource_pool *rpool, |
| 86 | int index, int new_max) |
| 87 | { |
| 88 | if (new_max == S32_MAX) { |
| 89 | if (rpool->resources[index].max != S32_MAX) |
| 90 | rpool->num_max_cnt++; |
| 91 | } else { |
| 92 | if (rpool->resources[index].max == S32_MAX) |
| 93 | rpool->num_max_cnt--; |
| 94 | } |
| 95 | rpool->resources[index].max = new_max; |
| 96 | } |
| 97 | |
| 98 | static void set_all_resource_max_limit(struct rdmacg_resource_pool *rpool) |
| 99 | { |
| 100 | int i; |
| 101 | |
| 102 | for (i = 0; i < RDMACG_RESOURCE_MAX; i++) |
| 103 | set_resource_limit(rpool, i, S32_MAX); |
| 104 | } |
| 105 | |
| 106 | static void free_cg_rpool_locked(struct rdmacg_resource_pool *rpool) |
| 107 | { |
| 108 | lockdep_assert_held(&rdmacg_mutex); |
| 109 | |
| 110 | list_del(&rpool->cg_node); |
| 111 | list_del(&rpool->dev_node); |
| 112 | kfree(rpool); |
| 113 | } |
| 114 | |
| 115 | static struct rdmacg_resource_pool * |
| 116 | find_cg_rpool_locked(struct rdma_cgroup *cg, |
| 117 | struct rdmacg_device *device) |
| 118 | |
| 119 | { |
| 120 | struct rdmacg_resource_pool *pool; |
| 121 | |
| 122 | lockdep_assert_held(&rdmacg_mutex); |
| 123 | |
| 124 | list_for_each_entry(pool, &cg->rpools, cg_node) |
| 125 | if (pool->device == device) |
| 126 | return pool; |
| 127 | |
| 128 | return NULL; |
| 129 | } |
| 130 | |
| 131 | static struct rdmacg_resource_pool * |
| 132 | get_cg_rpool_locked(struct rdma_cgroup *cg, struct rdmacg_device *device) |
| 133 | { |
| 134 | struct rdmacg_resource_pool *rpool; |
| 135 | |
| 136 | rpool = find_cg_rpool_locked(cg, device); |
| 137 | if (rpool) |
| 138 | return rpool; |
| 139 | |
| 140 | rpool = kzalloc(sizeof(*rpool), GFP_KERNEL); |
| 141 | if (!rpool) |
| 142 | return ERR_PTR(-ENOMEM); |
| 143 | |
| 144 | rpool->device = device; |
| 145 | set_all_resource_max_limit(rpool); |
| 146 | |
| 147 | INIT_LIST_HEAD(&rpool->cg_node); |
| 148 | INIT_LIST_HEAD(&rpool->dev_node); |
| 149 | list_add_tail(&rpool->cg_node, &cg->rpools); |
| 150 | list_add_tail(&rpool->dev_node, &device->rpools); |
| 151 | return rpool; |
| 152 | } |
| 153 | |
| 154 | /** |
| 155 | * uncharge_cg_locked - uncharge resource for rdma cgroup |
| 156 | * @cg: pointer to cg to uncharge and all parents in hierarchy |
| 157 | * @device: pointer to rdmacg device |
| 158 | * @index: index of the resource to uncharge in cg (resource pool) |
| 159 | * |
| 160 | * It also frees the resource pool which was created as part of |
| 161 | * charging operation when there are no resources attached to |
| 162 | * resource pool. |
| 163 | */ |
| 164 | static void |
| 165 | uncharge_cg_locked(struct rdma_cgroup *cg, |
| 166 | struct rdmacg_device *device, |
| 167 | enum rdmacg_resource_type index) |
| 168 | { |
| 169 | struct rdmacg_resource_pool *rpool; |
| 170 | |
| 171 | rpool = find_cg_rpool_locked(cg, device); |
| 172 | |
| 173 | /* |
| 174 | * rpool cannot be null at this stage. Let kernel operate in case |
| 175 | * if there a bug in IB stack or rdma controller, instead of crashing |
| 176 | * the system. |
| 177 | */ |
| 178 | if (unlikely(!rpool)) { |
| 179 | pr_warn("Invalid device %p or rdma cgroup %p\n", cg, device); |
| 180 | return; |
| 181 | } |
| 182 | |
| 183 | rpool->resources[index].usage--; |
| 184 | |
| 185 | /* |
| 186 | * A negative count (or overflow) is invalid, |
| 187 | * it indicates a bug in the rdma controller. |
| 188 | */ |
| 189 | WARN_ON_ONCE(rpool->resources[index].usage < 0); |
| 190 | rpool->usage_sum--; |
| 191 | if (rpool->usage_sum == 0 && |
| 192 | rpool->num_max_cnt == RDMACG_RESOURCE_MAX) { |
| 193 | /* |
| 194 | * No user of the rpool and all entries are set to max, so |
| 195 | * safe to delete this rpool. |
| 196 | */ |
| 197 | free_cg_rpool_locked(rpool); |
| 198 | } |
| 199 | } |
| 200 | |
| 201 | /** |
| 202 | * rdmacg_uncharge_hierarchy - hierarchically uncharge rdma resource count |
| 203 | * @device: pointer to rdmacg device |
| 204 | * @stop_cg: while traversing hirerchy, when meet with stop_cg cgroup |
| 205 | * stop uncharging |
| 206 | * @index: index of the resource to uncharge in cg in given resource pool |
| 207 | */ |
| 208 | static void rdmacg_uncharge_hierarchy(struct rdma_cgroup *cg, |
| 209 | struct rdmacg_device *device, |
| 210 | struct rdma_cgroup *stop_cg, |
| 211 | enum rdmacg_resource_type index) |
| 212 | { |
| 213 | struct rdma_cgroup *p; |
| 214 | |
| 215 | mutex_lock(&rdmacg_mutex); |
| 216 | |
| 217 | for (p = cg; p != stop_cg; p = parent_rdmacg(p)) |
| 218 | uncharge_cg_locked(p, device, index); |
| 219 | |
| 220 | mutex_unlock(&rdmacg_mutex); |
| 221 | |
| 222 | css_put(&cg->css); |
| 223 | } |
| 224 | |
| 225 | /** |
| 226 | * rdmacg_uncharge - hierarchically uncharge rdma resource count |
| 227 | * @device: pointer to rdmacg device |
| 228 | * @index: index of the resource to uncharge in cgroup in given resource pool |
| 229 | */ |
| 230 | void rdmacg_uncharge(struct rdma_cgroup *cg, |
| 231 | struct rdmacg_device *device, |
| 232 | enum rdmacg_resource_type index) |
| 233 | { |
| 234 | if (index >= RDMACG_RESOURCE_MAX) |
| 235 | return; |
| 236 | |
| 237 | rdmacg_uncharge_hierarchy(cg, device, NULL, index); |
| 238 | } |
| 239 | EXPORT_SYMBOL(rdmacg_uncharge); |
| 240 | |
| 241 | /** |
| 242 | * rdmacg_try_charge - hierarchically try to charge the rdma resource |
| 243 | * @rdmacg: pointer to rdma cgroup which will own this resource |
| 244 | * @device: pointer to rdmacg device |
| 245 | * @index: index of the resource to charge in cgroup (resource pool) |
| 246 | * |
| 247 | * This function follows charging resource in hierarchical way. |
| 248 | * It will fail if the charge would cause the new value to exceed the |
| 249 | * hierarchical limit. |
| 250 | * Returns 0 if the charge succeded, otherwise -EAGAIN, -ENOMEM or -EINVAL. |
| 251 | * Returns pointer to rdmacg for this resource when charging is successful. |
| 252 | * |
| 253 | * Charger needs to account resources on two criteria. |
| 254 | * (a) per cgroup & (b) per device resource usage. |
| 255 | * Per cgroup resource usage ensures that tasks of cgroup doesn't cross |
| 256 | * the configured limits. Per device provides granular configuration |
| 257 | * in multi device usage. It allocates resource pool in the hierarchy |
| 258 | * for each parent it come across for first resource. Later on resource |
| 259 | * pool will be available. Therefore it will be much faster thereon |
| 260 | * to charge/uncharge. |
| 261 | */ |
| 262 | int rdmacg_try_charge(struct rdma_cgroup **rdmacg, |
| 263 | struct rdmacg_device *device, |
| 264 | enum rdmacg_resource_type index) |
| 265 | { |
| 266 | struct rdma_cgroup *cg, *p; |
| 267 | struct rdmacg_resource_pool *rpool; |
| 268 | s64 new; |
| 269 | int ret = 0; |
| 270 | |
| 271 | if (index >= RDMACG_RESOURCE_MAX) |
| 272 | return -EINVAL; |
| 273 | |
| 274 | /* |
| 275 | * hold on to css, as cgroup can be removed but resource |
| 276 | * accounting happens on css. |
| 277 | */ |
| 278 | cg = get_current_rdmacg(); |
| 279 | |
| 280 | mutex_lock(&rdmacg_mutex); |
| 281 | for (p = cg; p; p = parent_rdmacg(p)) { |
| 282 | rpool = get_cg_rpool_locked(p, device); |
| 283 | if (IS_ERR(rpool)) { |
| 284 | ret = PTR_ERR(rpool); |
| 285 | goto err; |
| 286 | } else { |
| 287 | new = rpool->resources[index].usage + 1; |
| 288 | if (new > rpool->resources[index].max) { |
| 289 | ret = -EAGAIN; |
| 290 | goto err; |
| 291 | } else { |
| 292 | rpool->resources[index].usage = new; |
| 293 | rpool->usage_sum++; |
| 294 | } |
| 295 | } |
| 296 | } |
| 297 | mutex_unlock(&rdmacg_mutex); |
| 298 | |
| 299 | *rdmacg = cg; |
| 300 | return 0; |
| 301 | |
| 302 | err: |
| 303 | mutex_unlock(&rdmacg_mutex); |
| 304 | rdmacg_uncharge_hierarchy(cg, device, p, index); |
| 305 | return ret; |
| 306 | } |
| 307 | EXPORT_SYMBOL(rdmacg_try_charge); |
| 308 | |
| 309 | /** |
| 310 | * rdmacg_register_device - register rdmacg device to rdma controller. |
| 311 | * @device: pointer to rdmacg device whose resources need to be accounted. |
| 312 | * |
| 313 | * If IB stack wish a device to participate in rdma cgroup resource |
| 314 | * tracking, it must invoke this API to register with rdma cgroup before |
| 315 | * any user space application can start using the RDMA resources. |
| 316 | * Returns 0 on success or EINVAL when table length given is beyond |
| 317 | * supported size. |
| 318 | */ |
| 319 | int rdmacg_register_device(struct rdmacg_device *device) |
| 320 | { |
| 321 | INIT_LIST_HEAD(&device->dev_node); |
| 322 | INIT_LIST_HEAD(&device->rpools); |
| 323 | |
| 324 | mutex_lock(&rdmacg_mutex); |
| 325 | list_add_tail(&device->dev_node, &rdmacg_devices); |
| 326 | mutex_unlock(&rdmacg_mutex); |
| 327 | return 0; |
| 328 | } |
| 329 | EXPORT_SYMBOL(rdmacg_register_device); |
| 330 | |
| 331 | /** |
| 332 | * rdmacg_unregister_device - unregister rdmacg device from rdma controller. |
| 333 | * @device: pointer to rdmacg device which was previously registered with rdma |
| 334 | * controller using rdmacg_register_device(). |
| 335 | * |
| 336 | * IB stack must invoke this after all the resources of the IB device |
| 337 | * are destroyed and after ensuring that no more resources will be created |
| 338 | * when this API is invoked. |
| 339 | */ |
| 340 | void rdmacg_unregister_device(struct rdmacg_device *device) |
| 341 | { |
| 342 | struct rdmacg_resource_pool *rpool, *tmp; |
| 343 | |
| 344 | /* |
| 345 | * Synchronize with any active resource settings, |
| 346 | * usage query happening via configfs. |
| 347 | */ |
| 348 | mutex_lock(&rdmacg_mutex); |
| 349 | list_del_init(&device->dev_node); |
| 350 | |
| 351 | /* |
| 352 | * Now that this device is off the cgroup list, its safe to free |
| 353 | * all the rpool resources. |
| 354 | */ |
| 355 | list_for_each_entry_safe(rpool, tmp, &device->rpools, dev_node) |
| 356 | free_cg_rpool_locked(rpool); |
| 357 | |
| 358 | mutex_unlock(&rdmacg_mutex); |
| 359 | } |
| 360 | EXPORT_SYMBOL(rdmacg_unregister_device); |
| 361 | |
| 362 | static int parse_resource(char *c, int *intval) |
| 363 | { |
| 364 | substring_t argstr; |
Parav Pandit | 39d3e75 | 2017-01-10 00:02:13 +0000 | [diff] [blame] | 365 | char *name, *value = c; |
| 366 | size_t len; |
Andy Shevchenko | cc659e76 | 2018-05-04 17:42:16 +0300 | [diff] [blame^] | 367 | int ret, i; |
Parav Pandit | 39d3e75 | 2017-01-10 00:02:13 +0000 | [diff] [blame] | 368 | |
| 369 | name = strsep(&value, "="); |
| 370 | if (!name || !value) |
| 371 | return -EINVAL; |
| 372 | |
Andy Shevchenko | cc659e76 | 2018-05-04 17:42:16 +0300 | [diff] [blame^] | 373 | i = match_string(rdmacg_resource_names, RDMACG_RESOURCE_MAX, name); |
| 374 | if (i < 0) |
| 375 | return i; |
| 376 | |
Parav Pandit | 39d3e75 | 2017-01-10 00:02:13 +0000 | [diff] [blame] | 377 | len = strlen(value); |
| 378 | |
Andy Shevchenko | cc659e76 | 2018-05-04 17:42:16 +0300 | [diff] [blame^] | 379 | argstr.from = value; |
| 380 | argstr.to = value + len; |
Parav Pandit | 39d3e75 | 2017-01-10 00:02:13 +0000 | [diff] [blame] | 381 | |
Andy Shevchenko | cc659e76 | 2018-05-04 17:42:16 +0300 | [diff] [blame^] | 382 | ret = match_int(&argstr, intval); |
| 383 | if (ret >= 0) { |
| 384 | if (*intval < 0) |
| 385 | return -EINVAL; |
| 386 | return i; |
| 387 | } |
| 388 | if (strncmp(value, RDMACG_MAX_STR, len) == 0) { |
| 389 | *intval = S32_MAX; |
| 390 | return i; |
Parav Pandit | 39d3e75 | 2017-01-10 00:02:13 +0000 | [diff] [blame] | 391 | } |
| 392 | return -EINVAL; |
| 393 | } |
| 394 | |
| 395 | static int rdmacg_parse_limits(char *options, |
| 396 | int *new_limits, unsigned long *enables) |
| 397 | { |
| 398 | char *c; |
| 399 | int err = -EINVAL; |
| 400 | |
| 401 | /* parse resource options */ |
| 402 | while ((c = strsep(&options, " ")) != NULL) { |
| 403 | int index, intval; |
| 404 | |
| 405 | index = parse_resource(c, &intval); |
| 406 | if (index < 0) |
| 407 | goto err; |
| 408 | |
| 409 | new_limits[index] = intval; |
| 410 | *enables |= BIT(index); |
| 411 | } |
| 412 | return 0; |
| 413 | |
| 414 | err: |
| 415 | return err; |
| 416 | } |
| 417 | |
| 418 | static struct rdmacg_device *rdmacg_get_device_locked(const char *name) |
| 419 | { |
| 420 | struct rdmacg_device *device; |
| 421 | |
| 422 | lockdep_assert_held(&rdmacg_mutex); |
| 423 | |
| 424 | list_for_each_entry(device, &rdmacg_devices, dev_node) |
| 425 | if (!strcmp(name, device->name)) |
| 426 | return device; |
| 427 | |
| 428 | return NULL; |
| 429 | } |
| 430 | |
| 431 | static ssize_t rdmacg_resource_set_max(struct kernfs_open_file *of, |
| 432 | char *buf, size_t nbytes, loff_t off) |
| 433 | { |
| 434 | struct rdma_cgroup *cg = css_rdmacg(of_css(of)); |
| 435 | const char *dev_name; |
| 436 | struct rdmacg_resource_pool *rpool; |
| 437 | struct rdmacg_device *device; |
| 438 | char *options = strstrip(buf); |
| 439 | int *new_limits; |
| 440 | unsigned long enables = 0; |
| 441 | int i = 0, ret = 0; |
| 442 | |
| 443 | /* extract the device name first */ |
| 444 | dev_name = strsep(&options, " "); |
| 445 | if (!dev_name) { |
| 446 | ret = -EINVAL; |
| 447 | goto err; |
| 448 | } |
| 449 | |
| 450 | new_limits = kcalloc(RDMACG_RESOURCE_MAX, sizeof(int), GFP_KERNEL); |
| 451 | if (!new_limits) { |
| 452 | ret = -ENOMEM; |
| 453 | goto err; |
| 454 | } |
| 455 | |
| 456 | ret = rdmacg_parse_limits(options, new_limits, &enables); |
| 457 | if (ret) |
| 458 | goto parse_err; |
| 459 | |
| 460 | /* acquire lock to synchronize with hot plug devices */ |
| 461 | mutex_lock(&rdmacg_mutex); |
| 462 | |
| 463 | device = rdmacg_get_device_locked(dev_name); |
| 464 | if (!device) { |
| 465 | ret = -ENODEV; |
| 466 | goto dev_err; |
| 467 | } |
| 468 | |
| 469 | rpool = get_cg_rpool_locked(cg, device); |
| 470 | if (IS_ERR(rpool)) { |
| 471 | ret = PTR_ERR(rpool); |
| 472 | goto dev_err; |
| 473 | } |
| 474 | |
| 475 | /* now set the new limits of the rpool */ |
| 476 | for_each_set_bit(i, &enables, RDMACG_RESOURCE_MAX) |
| 477 | set_resource_limit(rpool, i, new_limits[i]); |
| 478 | |
| 479 | if (rpool->usage_sum == 0 && |
| 480 | rpool->num_max_cnt == RDMACG_RESOURCE_MAX) { |
| 481 | /* |
| 482 | * No user of the rpool and all entries are set to max, so |
| 483 | * safe to delete this rpool. |
| 484 | */ |
| 485 | free_cg_rpool_locked(rpool); |
| 486 | } |
| 487 | |
| 488 | dev_err: |
| 489 | mutex_unlock(&rdmacg_mutex); |
| 490 | |
| 491 | parse_err: |
| 492 | kfree(new_limits); |
| 493 | |
| 494 | err: |
| 495 | return ret ?: nbytes; |
| 496 | } |
| 497 | |
| 498 | static void print_rpool_values(struct seq_file *sf, |
| 499 | struct rdmacg_resource_pool *rpool) |
| 500 | { |
| 501 | enum rdmacg_file_type sf_type; |
| 502 | int i; |
| 503 | u32 value; |
| 504 | |
| 505 | sf_type = seq_cft(sf)->private; |
| 506 | |
| 507 | for (i = 0; i < RDMACG_RESOURCE_MAX; i++) { |
| 508 | seq_puts(sf, rdmacg_resource_names[i]); |
| 509 | seq_putc(sf, '='); |
| 510 | if (sf_type == RDMACG_RESOURCE_TYPE_MAX) { |
| 511 | if (rpool) |
| 512 | value = rpool->resources[i].max; |
| 513 | else |
| 514 | value = S32_MAX; |
| 515 | } else { |
| 516 | if (rpool) |
| 517 | value = rpool->resources[i].usage; |
Parav Pandit | 7896dfb | 2017-01-10 17:51:48 +0000 | [diff] [blame] | 518 | else |
| 519 | value = 0; |
Parav Pandit | 39d3e75 | 2017-01-10 00:02:13 +0000 | [diff] [blame] | 520 | } |
| 521 | |
| 522 | if (value == S32_MAX) |
| 523 | seq_puts(sf, RDMACG_MAX_STR); |
| 524 | else |
| 525 | seq_printf(sf, "%d", value); |
| 526 | seq_putc(sf, ' '); |
| 527 | } |
| 528 | } |
| 529 | |
| 530 | static int rdmacg_resource_read(struct seq_file *sf, void *v) |
| 531 | { |
| 532 | struct rdmacg_device *device; |
| 533 | struct rdmacg_resource_pool *rpool; |
| 534 | struct rdma_cgroup *cg = css_rdmacg(seq_css(sf)); |
| 535 | |
| 536 | mutex_lock(&rdmacg_mutex); |
| 537 | |
| 538 | list_for_each_entry(device, &rdmacg_devices, dev_node) { |
| 539 | seq_printf(sf, "%s ", device->name); |
| 540 | |
| 541 | rpool = find_cg_rpool_locked(cg, device); |
| 542 | print_rpool_values(sf, rpool); |
| 543 | |
| 544 | seq_putc(sf, '\n'); |
| 545 | } |
| 546 | |
| 547 | mutex_unlock(&rdmacg_mutex); |
| 548 | return 0; |
| 549 | } |
| 550 | |
| 551 | static struct cftype rdmacg_files[] = { |
| 552 | { |
| 553 | .name = "max", |
| 554 | .write = rdmacg_resource_set_max, |
| 555 | .seq_show = rdmacg_resource_read, |
| 556 | .private = RDMACG_RESOURCE_TYPE_MAX, |
| 557 | .flags = CFTYPE_NOT_ON_ROOT, |
| 558 | }, |
| 559 | { |
| 560 | .name = "current", |
| 561 | .seq_show = rdmacg_resource_read, |
| 562 | .private = RDMACG_RESOURCE_TYPE_STAT, |
| 563 | .flags = CFTYPE_NOT_ON_ROOT, |
| 564 | }, |
| 565 | { } /* terminate */ |
| 566 | }; |
| 567 | |
| 568 | static struct cgroup_subsys_state * |
| 569 | rdmacg_css_alloc(struct cgroup_subsys_state *parent) |
| 570 | { |
| 571 | struct rdma_cgroup *cg; |
| 572 | |
| 573 | cg = kzalloc(sizeof(*cg), GFP_KERNEL); |
| 574 | if (!cg) |
| 575 | return ERR_PTR(-ENOMEM); |
| 576 | |
| 577 | INIT_LIST_HEAD(&cg->rpools); |
| 578 | return &cg->css; |
| 579 | } |
| 580 | |
| 581 | static void rdmacg_css_free(struct cgroup_subsys_state *css) |
| 582 | { |
| 583 | struct rdma_cgroup *cg = css_rdmacg(css); |
| 584 | |
| 585 | kfree(cg); |
| 586 | } |
| 587 | |
| 588 | /** |
| 589 | * rdmacg_css_offline - cgroup css_offline callback |
| 590 | * @css: css of interest |
| 591 | * |
| 592 | * This function is called when @css is about to go away and responsible |
| 593 | * for shooting down all rdmacg associated with @css. As part of that it |
| 594 | * marks all the resource pool entries to max value, so that when resources are |
| 595 | * uncharged, associated resource pool can be freed as well. |
| 596 | */ |
| 597 | static void rdmacg_css_offline(struct cgroup_subsys_state *css) |
| 598 | { |
| 599 | struct rdma_cgroup *cg = css_rdmacg(css); |
| 600 | struct rdmacg_resource_pool *rpool; |
| 601 | |
| 602 | mutex_lock(&rdmacg_mutex); |
| 603 | |
| 604 | list_for_each_entry(rpool, &cg->rpools, cg_node) |
| 605 | set_all_resource_max_limit(rpool); |
| 606 | |
| 607 | mutex_unlock(&rdmacg_mutex); |
| 608 | } |
| 609 | |
| 610 | struct cgroup_subsys rdma_cgrp_subsys = { |
| 611 | .css_alloc = rdmacg_css_alloc, |
| 612 | .css_free = rdmacg_css_free, |
| 613 | .css_offline = rdmacg_css_offline, |
| 614 | .legacy_cftypes = rdmacg_files, |
| 615 | .dfl_cftypes = rdmacg_files, |
| 616 | }; |