blob: ea91108ac4ca2b235cba79d9335abaac08762ee6 [file] [log] [blame]
Aneesh Kumar K.V2bc64a22012-07-31 16:42:12 -07001/*
2 *
3 * Copyright IBM Corporation, 2012
4 * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
5 *
Giuseppe Scrivanofaced7e2019-12-16 20:38:31 +01006 * Cgroup v2
7 * Copyright (C) 2019 Red Hat, Inc.
8 * Author: Giuseppe Scrivano <gscrivan@redhat.com>
9 *
Aneesh Kumar K.V2bc64a22012-07-31 16:42:12 -070010 * This program is free software; you can redistribute it and/or modify it
11 * under the terms of version 2.1 of the GNU Lesser General Public License
12 * as published by the Free Software Foundation.
13 *
14 * This program is distributed in the hope that it would be useful, but
15 * WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
17 *
18 */
19
20#include <linux/cgroup.h>
Johannes Weiner71f87bee2014-12-10 15:42:34 -080021#include <linux/page_counter.h>
Aneesh Kumar K.V2bc64a22012-07-31 16:42:12 -070022#include <linux/slab.h>
23#include <linux/hugetlb.h>
24#include <linux/hugetlb_cgroup.h>
25
Giuseppe Scrivanofaced7e2019-12-16 20:38:31 +010026enum hugetlb_memory_event {
27 HUGETLB_MAX,
28 HUGETLB_NR_MEMORY_EVENTS,
29};
30
Aneesh Kumar K.V2bc64a22012-07-31 16:42:12 -070031struct hugetlb_cgroup {
32 struct cgroup_subsys_state css;
Giuseppe Scrivanofaced7e2019-12-16 20:38:31 +010033
Aneesh Kumar K.V2bc64a22012-07-31 16:42:12 -070034 /*
35 * the counter to account for hugepages from hugetlb.
36 */
Johannes Weiner71f87bee2014-12-10 15:42:34 -080037 struct page_counter hugepage[HUGE_MAX_HSTATE];
Giuseppe Scrivanofaced7e2019-12-16 20:38:31 +010038
Mina Almasrycdc2fcf2020-04-01 21:11:11 -070039 /*
40 * the counter to account for hugepage reservations from hugetlb.
41 */
42 struct page_counter rsvd_hugepage[HUGE_MAX_HSTATE];
43
Giuseppe Scrivanofaced7e2019-12-16 20:38:31 +010044 atomic_long_t events[HUGE_MAX_HSTATE][HUGETLB_NR_MEMORY_EVENTS];
45 atomic_long_t events_local[HUGE_MAX_HSTATE][HUGETLB_NR_MEMORY_EVENTS];
46
47 /* Handle for "hugetlb.events" */
48 struct cgroup_file events_file[HUGE_MAX_HSTATE];
49
50 /* Handle for "hugetlb.events.local" */
51 struct cgroup_file events_local_file[HUGE_MAX_HSTATE];
Aneesh Kumar K.V2bc64a22012-07-31 16:42:12 -070052};
53
Aneesh Kumar K.Vabb82062012-07-31 16:42:24 -070054#define MEMFILE_PRIVATE(x, val) (((x) << 16) | (val))
55#define MEMFILE_IDX(val) (((val) >> 16) & 0xffff)
56#define MEMFILE_ATTR(val) ((val) & 0xffff)
57
Giuseppe Scrivanofaced7e2019-12-16 20:38:31 +010058#define hugetlb_cgroup_from_counter(counter, idx) \
59 container_of(counter, struct hugetlb_cgroup, hugepage[idx])
60
Aneesh Kumar K.V2bc64a22012-07-31 16:42:12 -070061static struct hugetlb_cgroup *root_h_cgroup __read_mostly;
62
Mina Almasrycdc2fcf2020-04-01 21:11:11 -070063static inline struct page_counter *
64hugetlb_cgroup_counter_from_cgroup(struct hugetlb_cgroup *h_cg, int idx,
65 bool rsvd)
66{
67 if (rsvd)
68 return &h_cg->rsvd_hugepage[idx];
69 return &h_cg->hugepage[idx];
70}
71
Aneesh Kumar K.V2bc64a22012-07-31 16:42:12 -070072static inline
73struct hugetlb_cgroup *hugetlb_cgroup_from_css(struct cgroup_subsys_state *s)
74{
Tejun Heoa7c6d552013-08-08 20:11:23 -040075 return s ? container_of(s, struct hugetlb_cgroup, css) : NULL;
Aneesh Kumar K.V2bc64a22012-07-31 16:42:12 -070076}
77
78static inline
Aneesh Kumar K.V2bc64a22012-07-31 16:42:12 -070079struct hugetlb_cgroup *hugetlb_cgroup_from_task(struct task_struct *task)
80{
Tejun Heo073219e2014-02-08 10:36:58 -050081 return hugetlb_cgroup_from_css(task_css(task, hugetlb_cgrp_id));
Aneesh Kumar K.V2bc64a22012-07-31 16:42:12 -070082}
83
84static inline bool hugetlb_cgroup_is_root(struct hugetlb_cgroup *h_cg)
85{
86 return (h_cg == root_h_cgroup);
87}
88
Tejun Heo3f798512013-08-08 20:11:22 -040089static inline struct hugetlb_cgroup *
90parent_hugetlb_cgroup(struct hugetlb_cgroup *h_cg)
Aneesh Kumar K.V2bc64a22012-07-31 16:42:12 -070091{
Tejun Heo5c9d5352014-05-16 13:22:48 -040092 return hugetlb_cgroup_from_css(h_cg->css.parent);
Aneesh Kumar K.V2bc64a22012-07-31 16:42:12 -070093}
94
Tejun Heo3f798512013-08-08 20:11:22 -040095static inline bool hugetlb_cgroup_have_usage(struct hugetlb_cgroup *h_cg)
Aneesh Kumar K.V2bc64a22012-07-31 16:42:12 -070096{
97 int idx;
Aneesh Kumar K.V2bc64a22012-07-31 16:42:12 -070098
99 for (idx = 0; idx < hugetlb_max_hstate; idx++) {
Johannes Weiner71f87bee2014-12-10 15:42:34 -0800100 if (page_counter_read(&h_cg->hugepage[idx]))
Aneesh Kumar K.V2bc64a22012-07-31 16:42:12 -0700101 return true;
102 }
103 return false;
104}
105
David Rientjes297880f2016-05-20 16:57:50 -0700106static void hugetlb_cgroup_init(struct hugetlb_cgroup *h_cgroup,
107 struct hugetlb_cgroup *parent_h_cgroup)
108{
109 int idx;
110
111 for (idx = 0; idx < HUGE_MAX_HSTATE; idx++) {
112 struct page_counter *counter = &h_cgroup->hugepage[idx];
113 struct page_counter *parent = NULL;
114 unsigned long limit;
115 int ret;
116
117 if (parent_h_cgroup)
118 parent = &parent_h_cgroup->hugepage[idx];
119 page_counter_init(counter, parent);
120
121 limit = round_down(PAGE_COUNTER_MAX,
122 1 << huge_page_order(&hstates[idx]));
Roman Gushchinbbec2e12018-06-07 17:06:18 -0700123 ret = page_counter_set_max(counter, limit);
David Rientjes297880f2016-05-20 16:57:50 -0700124 VM_BUG_ON(ret);
125 }
126}
127
Tejun Heoeb954192013-08-08 20:11:23 -0400128static struct cgroup_subsys_state *
129hugetlb_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
Aneesh Kumar K.V2bc64a22012-07-31 16:42:12 -0700130{
Tejun Heoeb954192013-08-08 20:11:23 -0400131 struct hugetlb_cgroup *parent_h_cgroup = hugetlb_cgroup_from_css(parent_css);
132 struct hugetlb_cgroup *h_cgroup;
Aneesh Kumar K.V2bc64a22012-07-31 16:42:12 -0700133
134 h_cgroup = kzalloc(sizeof(*h_cgroup), GFP_KERNEL);
135 if (!h_cgroup)
136 return ERR_PTR(-ENOMEM);
137
David Rientjes297880f2016-05-20 16:57:50 -0700138 if (!parent_h_cgroup)
Aneesh Kumar K.V2bc64a22012-07-31 16:42:12 -0700139 root_h_cgroup = h_cgroup;
David Rientjes297880f2016-05-20 16:57:50 -0700140
141 hugetlb_cgroup_init(h_cgroup, parent_h_cgroup);
Aneesh Kumar K.V2bc64a22012-07-31 16:42:12 -0700142 return &h_cgroup->css;
143}
144
Tejun Heoeb954192013-08-08 20:11:23 -0400145static void hugetlb_cgroup_css_free(struct cgroup_subsys_state *css)
Aneesh Kumar K.V2bc64a22012-07-31 16:42:12 -0700146{
147 struct hugetlb_cgroup *h_cgroup;
148
Tejun Heoeb954192013-08-08 20:11:23 -0400149 h_cgroup = hugetlb_cgroup_from_css(css);
Aneesh Kumar K.V2bc64a22012-07-31 16:42:12 -0700150 kfree(h_cgroup);
151}
152
Aneesh Kumar K.Vda1def52012-07-31 16:42:21 -0700153
154/*
155 * Should be called with hugetlb_lock held.
156 * Since we are holding hugetlb_lock, pages cannot get moved from
157 * active list or uncharged from the cgroup, So no need to get
158 * page reference and test for page active here. This function
159 * cannot fail.
160 */
Tejun Heo3f798512013-08-08 20:11:22 -0400161static void hugetlb_cgroup_move_parent(int idx, struct hugetlb_cgroup *h_cg,
Aneesh Kumar K.Vda1def52012-07-31 16:42:21 -0700162 struct page *page)
163{
Johannes Weiner71f87bee2014-12-10 15:42:34 -0800164 unsigned int nr_pages;
165 struct page_counter *counter;
Aneesh Kumar K.Vda1def52012-07-31 16:42:21 -0700166 struct hugetlb_cgroup *page_hcg;
Tejun Heo3f798512013-08-08 20:11:22 -0400167 struct hugetlb_cgroup *parent = parent_hugetlb_cgroup(h_cg);
Aneesh Kumar K.Vda1def52012-07-31 16:42:21 -0700168
169 page_hcg = hugetlb_cgroup_from_page(page);
170 /*
171 * We can have pages in active list without any cgroup
172 * ie, hugepage with less than 3 pages. We can safely
173 * ignore those pages.
174 */
175 if (!page_hcg || page_hcg != h_cg)
176 goto out;
177
Matthew Wilcox (Oracle)d8c65462019-09-23 15:34:30 -0700178 nr_pages = compound_nr(page);
Aneesh Kumar K.Vda1def52012-07-31 16:42:21 -0700179 if (!parent) {
180 parent = root_h_cgroup;
181 /* root has no limit */
Johannes Weiner71f87bee2014-12-10 15:42:34 -0800182 page_counter_charge(&parent->hugepage[idx], nr_pages);
Aneesh Kumar K.Vda1def52012-07-31 16:42:21 -0700183 }
184 counter = &h_cg->hugepage[idx];
Johannes Weiner71f87bee2014-12-10 15:42:34 -0800185 /* Take the pages off the local counter */
186 page_counter_cancel(counter, nr_pages);
Aneesh Kumar K.Vda1def52012-07-31 16:42:21 -0700187
188 set_hugetlb_cgroup(page, parent);
189out:
190 return;
191}
192
193/*
194 * Force the hugetlb cgroup to empty the hugetlb resources by moving them to
195 * the parent cgroup.
196 */
Tejun Heoeb954192013-08-08 20:11:23 -0400197static void hugetlb_cgroup_css_offline(struct cgroup_subsys_state *css)
Aneesh Kumar K.V2bc64a22012-07-31 16:42:12 -0700198{
Tejun Heoeb954192013-08-08 20:11:23 -0400199 struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css);
Aneesh Kumar K.Vda1def52012-07-31 16:42:21 -0700200 struct hstate *h;
201 struct page *page;
Michal Hocko9d093cb2012-10-26 13:37:33 +0200202 int idx = 0;
Aneesh Kumar K.Vda1def52012-07-31 16:42:21 -0700203
204 do {
Aneesh Kumar K.Vda1def52012-07-31 16:42:21 -0700205 for_each_hstate(h) {
206 spin_lock(&hugetlb_lock);
207 list_for_each_entry(page, &h->hugepage_activelist, lru)
Tejun Heo3f798512013-08-08 20:11:22 -0400208 hugetlb_cgroup_move_parent(idx, h_cg, page);
Aneesh Kumar K.Vda1def52012-07-31 16:42:21 -0700209
210 spin_unlock(&hugetlb_lock);
211 idx++;
212 }
213 cond_resched();
Tejun Heo3f798512013-08-08 20:11:22 -0400214 } while (hugetlb_cgroup_have_usage(h_cg));
Aneesh Kumar K.V2bc64a22012-07-31 16:42:12 -0700215}
216
Giuseppe Scrivanofaced7e2019-12-16 20:38:31 +0100217static inline void hugetlb_event(struct hugetlb_cgroup *hugetlb, int idx,
218 enum hugetlb_memory_event event)
219{
220 atomic_long_inc(&hugetlb->events_local[idx][event]);
221 cgroup_file_notify(&hugetlb->events_local_file[idx]);
222
223 do {
224 atomic_long_inc(&hugetlb->events[idx][event]);
225 cgroup_file_notify(&hugetlb->events_file[idx]);
226 } while ((hugetlb = parent_hugetlb_cgroup(hugetlb)) &&
227 !hugetlb_cgroup_is_root(hugetlb));
228}
229
Aneesh Kumar K.V6d76dcf2012-07-31 16:42:18 -0700230int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages,
231 struct hugetlb_cgroup **ptr)
232{
233 int ret = 0;
Johannes Weiner71f87bee2014-12-10 15:42:34 -0800234 struct page_counter *counter;
Aneesh Kumar K.V6d76dcf2012-07-31 16:42:18 -0700235 struct hugetlb_cgroup *h_cg = NULL;
Aneesh Kumar K.V6d76dcf2012-07-31 16:42:18 -0700236
237 if (hugetlb_cgroup_disabled())
238 goto done;
239 /*
240 * We don't charge any cgroup if the compound page have less
241 * than 3 pages.
242 */
243 if (huge_page_order(&hstates[idx]) < HUGETLB_CGROUP_MIN_ORDER)
244 goto done;
245again:
246 rcu_read_lock();
247 h_cg = hugetlb_cgroup_from_task(current);
Roman Gushchin0362f322019-11-15 17:34:46 -0800248 if (!css_tryget(&h_cg->css)) {
Aneesh Kumar K.V6d76dcf2012-07-31 16:42:18 -0700249 rcu_read_unlock();
250 goto again;
251 }
252 rcu_read_unlock();
253
Giuseppe Scrivanofaced7e2019-12-16 20:38:31 +0100254 if (!page_counter_try_charge(&h_cg->hugepage[idx], nr_pages,
255 &counter)) {
Johannes Weiner6071ca52015-11-05 18:50:26 -0800256 ret = -ENOMEM;
Mina Almasry726b7bb2020-03-28 19:17:22 -0700257 hugetlb_event(h_cg, idx, HUGETLB_MAX);
Giuseppe Scrivanofaced7e2019-12-16 20:38:31 +0100258 }
Aneesh Kumar K.V6d76dcf2012-07-31 16:42:18 -0700259 css_put(&h_cg->css);
260done:
261 *ptr = h_cg;
262 return ret;
263}
264
Aneesh Kumar K.V94ae8ba2012-07-31 16:42:35 -0700265/* Should be called with hugetlb_lock held */
Aneesh Kumar K.V6d76dcf2012-07-31 16:42:18 -0700266void hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages,
267 struct hugetlb_cgroup *h_cg,
268 struct page *page)
269{
270 if (hugetlb_cgroup_disabled() || !h_cg)
271 return;
272
Aneesh Kumar K.V6d76dcf2012-07-31 16:42:18 -0700273 set_hugetlb_cgroup(page, h_cg);
Aneesh Kumar K.V6d76dcf2012-07-31 16:42:18 -0700274 return;
275}
276
277/*
278 * Should be called with hugetlb_lock held
279 */
280void hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages,
281 struct page *page)
282{
283 struct hugetlb_cgroup *h_cg;
Aneesh Kumar K.V6d76dcf2012-07-31 16:42:18 -0700284
285 if (hugetlb_cgroup_disabled())
286 return;
Michal Hocko7ea85742014-08-29 15:18:42 -0700287 lockdep_assert_held(&hugetlb_lock);
Aneesh Kumar K.V6d76dcf2012-07-31 16:42:18 -0700288 h_cg = hugetlb_cgroup_from_page(page);
289 if (unlikely(!h_cg))
290 return;
291 set_hugetlb_cgroup(page, NULL);
Johannes Weiner71f87bee2014-12-10 15:42:34 -0800292 page_counter_uncharge(&h_cg->hugepage[idx], nr_pages);
Aneesh Kumar K.V6d76dcf2012-07-31 16:42:18 -0700293 return;
294}
295
296void hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages,
297 struct hugetlb_cgroup *h_cg)
298{
Aneesh Kumar K.V6d76dcf2012-07-31 16:42:18 -0700299 if (hugetlb_cgroup_disabled() || !h_cg)
300 return;
301
302 if (huge_page_order(&hstates[idx]) < HUGETLB_CGROUP_MIN_ORDER)
303 return;
304
Johannes Weiner71f87bee2014-12-10 15:42:34 -0800305 page_counter_uncharge(&h_cg->hugepage[idx], nr_pages);
Aneesh Kumar K.V6d76dcf2012-07-31 16:42:18 -0700306 return;
307}
308
Johannes Weiner71f87bee2014-12-10 15:42:34 -0800309enum {
310 RES_USAGE,
Mina Almasrycdc2fcf2020-04-01 21:11:11 -0700311 RES_RSVD_USAGE,
Johannes Weiner71f87bee2014-12-10 15:42:34 -0800312 RES_LIMIT,
Mina Almasrycdc2fcf2020-04-01 21:11:11 -0700313 RES_RSVD_LIMIT,
Johannes Weiner71f87bee2014-12-10 15:42:34 -0800314 RES_MAX_USAGE,
Mina Almasrycdc2fcf2020-04-01 21:11:11 -0700315 RES_RSVD_MAX_USAGE,
Johannes Weiner71f87bee2014-12-10 15:42:34 -0800316 RES_FAILCNT,
Mina Almasrycdc2fcf2020-04-01 21:11:11 -0700317 RES_RSVD_FAILCNT,
Johannes Weiner71f87bee2014-12-10 15:42:34 -0800318};
319
Tejun Heo716f4792013-12-05 12:28:03 -0500320static u64 hugetlb_cgroup_read_u64(struct cgroup_subsys_state *css,
321 struct cftype *cft)
Aneesh Kumar K.Vabb82062012-07-31 16:42:24 -0700322{
Johannes Weiner71f87bee2014-12-10 15:42:34 -0800323 struct page_counter *counter;
Mina Almasrycdc2fcf2020-04-01 21:11:11 -0700324 struct page_counter *rsvd_counter;
Tejun Heo182446d2013-08-08 20:11:24 -0400325 struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css);
Aneesh Kumar K.Vabb82062012-07-31 16:42:24 -0700326
Johannes Weiner71f87bee2014-12-10 15:42:34 -0800327 counter = &h_cg->hugepage[MEMFILE_IDX(cft->private)];
Mina Almasrycdc2fcf2020-04-01 21:11:11 -0700328 rsvd_counter = &h_cg->rsvd_hugepage[MEMFILE_IDX(cft->private)];
Aneesh Kumar K.Vabb82062012-07-31 16:42:24 -0700329
Johannes Weiner71f87bee2014-12-10 15:42:34 -0800330 switch (MEMFILE_ATTR(cft->private)) {
331 case RES_USAGE:
332 return (u64)page_counter_read(counter) * PAGE_SIZE;
Mina Almasrycdc2fcf2020-04-01 21:11:11 -0700333 case RES_RSVD_USAGE:
334 return (u64)page_counter_read(rsvd_counter) * PAGE_SIZE;
Johannes Weiner71f87bee2014-12-10 15:42:34 -0800335 case RES_LIMIT:
Roman Gushchinbbec2e12018-06-07 17:06:18 -0700336 return (u64)counter->max * PAGE_SIZE;
Mina Almasrycdc2fcf2020-04-01 21:11:11 -0700337 case RES_RSVD_LIMIT:
338 return (u64)rsvd_counter->max * PAGE_SIZE;
Johannes Weiner71f87bee2014-12-10 15:42:34 -0800339 case RES_MAX_USAGE:
340 return (u64)counter->watermark * PAGE_SIZE;
Mina Almasrycdc2fcf2020-04-01 21:11:11 -0700341 case RES_RSVD_MAX_USAGE:
342 return (u64)rsvd_counter->watermark * PAGE_SIZE;
Johannes Weiner71f87bee2014-12-10 15:42:34 -0800343 case RES_FAILCNT:
344 return counter->failcnt;
Mina Almasrycdc2fcf2020-04-01 21:11:11 -0700345 case RES_RSVD_FAILCNT:
346 return rsvd_counter->failcnt;
Johannes Weiner71f87bee2014-12-10 15:42:34 -0800347 default:
348 BUG();
349 }
Aneesh Kumar K.Vabb82062012-07-31 16:42:24 -0700350}
351
Giuseppe Scrivanofaced7e2019-12-16 20:38:31 +0100352static int hugetlb_cgroup_read_u64_max(struct seq_file *seq, void *v)
353{
354 int idx;
355 u64 val;
356 struct cftype *cft = seq_cft(seq);
357 unsigned long limit;
358 struct page_counter *counter;
359 struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(seq_css(seq));
360
361 idx = MEMFILE_IDX(cft->private);
362 counter = &h_cg->hugepage[idx];
363
364 limit = round_down(PAGE_COUNTER_MAX,
365 1 << huge_page_order(&hstates[idx]));
366
367 switch (MEMFILE_ATTR(cft->private)) {
Mina Almasrycdc2fcf2020-04-01 21:11:11 -0700368 case RES_RSVD_USAGE:
369 counter = &h_cg->rsvd_hugepage[idx];
370 /* Fall through. */
Giuseppe Scrivanofaced7e2019-12-16 20:38:31 +0100371 case RES_USAGE:
372 val = (u64)page_counter_read(counter);
373 seq_printf(seq, "%llu\n", val * PAGE_SIZE);
374 break;
Mina Almasrycdc2fcf2020-04-01 21:11:11 -0700375 case RES_RSVD_LIMIT:
376 counter = &h_cg->rsvd_hugepage[idx];
377 /* Fall through. */
Giuseppe Scrivanofaced7e2019-12-16 20:38:31 +0100378 case RES_LIMIT:
379 val = (u64)counter->max;
380 if (val == limit)
381 seq_puts(seq, "max\n");
382 else
383 seq_printf(seq, "%llu\n", val * PAGE_SIZE);
384 break;
385 default:
386 BUG();
387 }
388
389 return 0;
390}
391
Johannes Weiner71f87bee2014-12-10 15:42:34 -0800392static DEFINE_MUTEX(hugetlb_limit_mutex);
393
Tejun Heo451af502014-05-13 12:16:21 -0400394static ssize_t hugetlb_cgroup_write(struct kernfs_open_file *of,
Giuseppe Scrivanofaced7e2019-12-16 20:38:31 +0100395 char *buf, size_t nbytes, loff_t off,
396 const char *max)
Aneesh Kumar K.Vabb82062012-07-31 16:42:24 -0700397{
Johannes Weiner71f87bee2014-12-10 15:42:34 -0800398 int ret, idx;
399 unsigned long nr_pages;
Tejun Heo451af502014-05-13 12:16:21 -0400400 struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(of_css(of));
Mina Almasrycdc2fcf2020-04-01 21:11:11 -0700401 bool rsvd = false;
Aneesh Kumar K.Vabb82062012-07-31 16:42:24 -0700402
Johannes Weiner71f87bee2014-12-10 15:42:34 -0800403 if (hugetlb_cgroup_is_root(h_cg)) /* Can't set limit on root */
404 return -EINVAL;
Aneesh Kumar K.Vabb82062012-07-31 16:42:24 -0700405
Johannes Weiner71f87bee2014-12-10 15:42:34 -0800406 buf = strstrip(buf);
Giuseppe Scrivanofaced7e2019-12-16 20:38:31 +0100407 ret = page_counter_memparse(buf, max, &nr_pages);
Johannes Weiner71f87bee2014-12-10 15:42:34 -0800408 if (ret)
409 return ret;
410
411 idx = MEMFILE_IDX(of_cft(of)->private);
David Rientjes297880f2016-05-20 16:57:50 -0700412 nr_pages = round_down(nr_pages, 1 << huge_page_order(&hstates[idx]));
Johannes Weiner71f87bee2014-12-10 15:42:34 -0800413
414 switch (MEMFILE_ATTR(of_cft(of)->private)) {
Mina Almasrycdc2fcf2020-04-01 21:11:11 -0700415 case RES_RSVD_LIMIT:
416 rsvd = true;
417 /* Fall through. */
Aneesh Kumar K.Vabb82062012-07-31 16:42:24 -0700418 case RES_LIMIT:
Johannes Weiner71f87bee2014-12-10 15:42:34 -0800419 mutex_lock(&hugetlb_limit_mutex);
Mina Almasrycdc2fcf2020-04-01 21:11:11 -0700420 ret = page_counter_set_max(
421 hugetlb_cgroup_counter_from_cgroup(h_cg, idx, rsvd),
422 nr_pages);
Johannes Weiner71f87bee2014-12-10 15:42:34 -0800423 mutex_unlock(&hugetlb_limit_mutex);
Aneesh Kumar K.Vabb82062012-07-31 16:42:24 -0700424 break;
425 default:
426 ret = -EINVAL;
427 break;
428 }
Tejun Heo451af502014-05-13 12:16:21 -0400429 return ret ?: nbytes;
Aneesh Kumar K.Vabb82062012-07-31 16:42:24 -0700430}
431
Giuseppe Scrivanofaced7e2019-12-16 20:38:31 +0100432static ssize_t hugetlb_cgroup_write_legacy(struct kernfs_open_file *of,
433 char *buf, size_t nbytes, loff_t off)
434{
435 return hugetlb_cgroup_write(of, buf, nbytes, off, "-1");
436}
437
438static ssize_t hugetlb_cgroup_write_dfl(struct kernfs_open_file *of,
439 char *buf, size_t nbytes, loff_t off)
440{
441 return hugetlb_cgroup_write(of, buf, nbytes, off, "max");
442}
443
Tejun Heo6770c642014-05-13 12:16:21 -0400444static ssize_t hugetlb_cgroup_reset(struct kernfs_open_file *of,
445 char *buf, size_t nbytes, loff_t off)
Aneesh Kumar K.Vabb82062012-07-31 16:42:24 -0700446{
Johannes Weiner71f87bee2014-12-10 15:42:34 -0800447 int ret = 0;
Mina Almasrycdc2fcf2020-04-01 21:11:11 -0700448 struct page_counter *counter, *rsvd_counter;
Tejun Heo6770c642014-05-13 12:16:21 -0400449 struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(of_css(of));
Aneesh Kumar K.Vabb82062012-07-31 16:42:24 -0700450
Johannes Weiner71f87bee2014-12-10 15:42:34 -0800451 counter = &h_cg->hugepage[MEMFILE_IDX(of_cft(of)->private)];
Mina Almasrycdc2fcf2020-04-01 21:11:11 -0700452 rsvd_counter = &h_cg->rsvd_hugepage[MEMFILE_IDX(of_cft(of)->private)];
Aneesh Kumar K.Vabb82062012-07-31 16:42:24 -0700453
Johannes Weiner71f87bee2014-12-10 15:42:34 -0800454 switch (MEMFILE_ATTR(of_cft(of)->private)) {
Aneesh Kumar K.Vabb82062012-07-31 16:42:24 -0700455 case RES_MAX_USAGE:
Johannes Weiner71f87bee2014-12-10 15:42:34 -0800456 page_counter_reset_watermark(counter);
Aneesh Kumar K.Vabb82062012-07-31 16:42:24 -0700457 break;
Mina Almasrycdc2fcf2020-04-01 21:11:11 -0700458 case RES_RSVD_MAX_USAGE:
459 page_counter_reset_watermark(rsvd_counter);
460 break;
Aneesh Kumar K.Vabb82062012-07-31 16:42:24 -0700461 case RES_FAILCNT:
Johannes Weiner71f87bee2014-12-10 15:42:34 -0800462 counter->failcnt = 0;
Aneesh Kumar K.Vabb82062012-07-31 16:42:24 -0700463 break;
Mina Almasrycdc2fcf2020-04-01 21:11:11 -0700464 case RES_RSVD_FAILCNT:
465 rsvd_counter->failcnt = 0;
466 break;
Aneesh Kumar K.Vabb82062012-07-31 16:42:24 -0700467 default:
468 ret = -EINVAL;
469 break;
470 }
Tejun Heo6770c642014-05-13 12:16:21 -0400471 return ret ?: nbytes;
Aneesh Kumar K.Vabb82062012-07-31 16:42:24 -0700472}
473
474static char *mem_fmt(char *buf, int size, unsigned long hsize)
475{
476 if (hsize >= (1UL << 30))
477 snprintf(buf, size, "%luGB", hsize >> 30);
478 else if (hsize >= (1UL << 20))
479 snprintf(buf, size, "%luMB", hsize >> 20);
480 else
481 snprintf(buf, size, "%luKB", hsize >> 10);
482 return buf;
483}
484
Giuseppe Scrivanofaced7e2019-12-16 20:38:31 +0100485static int __hugetlb_events_show(struct seq_file *seq, bool local)
486{
487 int idx;
488 long max;
489 struct cftype *cft = seq_cft(seq);
490 struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(seq_css(seq));
491
492 idx = MEMFILE_IDX(cft->private);
493
494 if (local)
495 max = atomic_long_read(&h_cg->events_local[idx][HUGETLB_MAX]);
496 else
497 max = atomic_long_read(&h_cg->events[idx][HUGETLB_MAX]);
498
499 seq_printf(seq, "max %lu\n", max);
500
501 return 0;
502}
503
504static int hugetlb_events_show(struct seq_file *seq, void *v)
505{
506 return __hugetlb_events_show(seq, false);
507}
508
509static int hugetlb_events_local_show(struct seq_file *seq, void *v)
510{
511 return __hugetlb_events_show(seq, true);
512}
513
514static void __init __hugetlb_cgroup_file_dfl_init(int idx)
Aneesh Kumar K.Vabb82062012-07-31 16:42:24 -0700515{
516 char buf[32];
517 struct cftype *cft;
518 struct hstate *h = &hstates[idx];
519
520 /* format the size */
Mina Almasrycdc2fcf2020-04-01 21:11:11 -0700521 mem_fmt(buf, sizeof(buf), huge_page_size(h));
Aneesh Kumar K.Vabb82062012-07-31 16:42:24 -0700522
523 /* Add the limit file */
Giuseppe Scrivanofaced7e2019-12-16 20:38:31 +0100524 cft = &h->cgroup_files_dfl[0];
525 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.max", buf);
526 cft->private = MEMFILE_PRIVATE(idx, RES_LIMIT);
527 cft->seq_show = hugetlb_cgroup_read_u64_max;
528 cft->write = hugetlb_cgroup_write_dfl;
529 cft->flags = CFTYPE_NOT_ON_ROOT;
530
Mina Almasrycdc2fcf2020-04-01 21:11:11 -0700531 /* Add the reservation limit file */
Giuseppe Scrivanofaced7e2019-12-16 20:38:31 +0100532 cft = &h->cgroup_files_dfl[1];
Mina Almasrycdc2fcf2020-04-01 21:11:11 -0700533 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.max", buf);
534 cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_LIMIT);
535 cft->seq_show = hugetlb_cgroup_read_u64_max;
536 cft->write = hugetlb_cgroup_write_dfl;
537 cft->flags = CFTYPE_NOT_ON_ROOT;
538
539 /* Add the current usage file */
540 cft = &h->cgroup_files_dfl[2];
Giuseppe Scrivanofaced7e2019-12-16 20:38:31 +0100541 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.current", buf);
542 cft->private = MEMFILE_PRIVATE(idx, RES_USAGE);
543 cft->seq_show = hugetlb_cgroup_read_u64_max;
544 cft->flags = CFTYPE_NOT_ON_ROOT;
545
Mina Almasrycdc2fcf2020-04-01 21:11:11 -0700546 /* Add the current reservation usage file */
547 cft = &h->cgroup_files_dfl[3];
548 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.current", buf);
549 cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_USAGE);
550 cft->seq_show = hugetlb_cgroup_read_u64_max;
551 cft->flags = CFTYPE_NOT_ON_ROOT;
552
Giuseppe Scrivanofaced7e2019-12-16 20:38:31 +0100553 /* Add the events file */
Mina Almasrycdc2fcf2020-04-01 21:11:11 -0700554 cft = &h->cgroup_files_dfl[4];
Giuseppe Scrivanofaced7e2019-12-16 20:38:31 +0100555 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.events", buf);
556 cft->private = MEMFILE_PRIVATE(idx, 0);
557 cft->seq_show = hugetlb_events_show;
558 cft->file_offset = offsetof(struct hugetlb_cgroup, events_file[idx]),
559 cft->flags = CFTYPE_NOT_ON_ROOT;
560
561 /* Add the events.local file */
Mina Almasrycdc2fcf2020-04-01 21:11:11 -0700562 cft = &h->cgroup_files_dfl[5];
Giuseppe Scrivanofaced7e2019-12-16 20:38:31 +0100563 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.events.local", buf);
564 cft->private = MEMFILE_PRIVATE(idx, 0);
565 cft->seq_show = hugetlb_events_local_show;
566 cft->file_offset = offsetof(struct hugetlb_cgroup,
567 events_local_file[idx]),
568 cft->flags = CFTYPE_NOT_ON_ROOT;
569
570 /* NULL terminate the last cft */
Mina Almasrycdc2fcf2020-04-01 21:11:11 -0700571 cft = &h->cgroup_files_dfl[6];
Giuseppe Scrivanofaced7e2019-12-16 20:38:31 +0100572 memset(cft, 0, sizeof(*cft));
573
574 WARN_ON(cgroup_add_dfl_cftypes(&hugetlb_cgrp_subsys,
575 h->cgroup_files_dfl));
576}
577
578static void __init __hugetlb_cgroup_file_legacy_init(int idx)
579{
580 char buf[32];
581 struct cftype *cft;
582 struct hstate *h = &hstates[idx];
583
584 /* format the size */
Mina Almasrycdc2fcf2020-04-01 21:11:11 -0700585 mem_fmt(buf, sizeof(buf), huge_page_size(h));
Giuseppe Scrivanofaced7e2019-12-16 20:38:31 +0100586
587 /* Add the limit file */
588 cft = &h->cgroup_files_legacy[0];
Aneesh Kumar K.Vabb82062012-07-31 16:42:24 -0700589 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.limit_in_bytes", buf);
590 cft->private = MEMFILE_PRIVATE(idx, RES_LIMIT);
Tejun Heo716f4792013-12-05 12:28:03 -0500591 cft->read_u64 = hugetlb_cgroup_read_u64;
Giuseppe Scrivanofaced7e2019-12-16 20:38:31 +0100592 cft->write = hugetlb_cgroup_write_legacy;
Aneesh Kumar K.Vabb82062012-07-31 16:42:24 -0700593
Mina Almasrycdc2fcf2020-04-01 21:11:11 -0700594 /* Add the reservation limit file */
Giuseppe Scrivanofaced7e2019-12-16 20:38:31 +0100595 cft = &h->cgroup_files_legacy[1];
Mina Almasrycdc2fcf2020-04-01 21:11:11 -0700596 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.limit_in_bytes", buf);
597 cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_LIMIT);
598 cft->read_u64 = hugetlb_cgroup_read_u64;
599 cft->write = hugetlb_cgroup_write_legacy;
600
601 /* Add the usage file */
602 cft = &h->cgroup_files_legacy[2];
Aneesh Kumar K.Vabb82062012-07-31 16:42:24 -0700603 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.usage_in_bytes", buf);
604 cft->private = MEMFILE_PRIVATE(idx, RES_USAGE);
Tejun Heo716f4792013-12-05 12:28:03 -0500605 cft->read_u64 = hugetlb_cgroup_read_u64;
Aneesh Kumar K.Vabb82062012-07-31 16:42:24 -0700606
Mina Almasrycdc2fcf2020-04-01 21:11:11 -0700607 /* Add the reservation usage file */
608 cft = &h->cgroup_files_legacy[3];
609 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.usage_in_bytes", buf);
610 cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_USAGE);
611 cft->read_u64 = hugetlb_cgroup_read_u64;
612
Aneesh Kumar K.Vabb82062012-07-31 16:42:24 -0700613 /* Add the MAX usage file */
Mina Almasrycdc2fcf2020-04-01 21:11:11 -0700614 cft = &h->cgroup_files_legacy[4];
Aneesh Kumar K.Vabb82062012-07-31 16:42:24 -0700615 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.max_usage_in_bytes", buf);
616 cft->private = MEMFILE_PRIVATE(idx, RES_MAX_USAGE);
Tejun Heo6770c642014-05-13 12:16:21 -0400617 cft->write = hugetlb_cgroup_reset;
Tejun Heo716f4792013-12-05 12:28:03 -0500618 cft->read_u64 = hugetlb_cgroup_read_u64;
Aneesh Kumar K.Vabb82062012-07-31 16:42:24 -0700619
Mina Almasrycdc2fcf2020-04-01 21:11:11 -0700620 /* Add the MAX reservation usage file */
621 cft = &h->cgroup_files_legacy[5];
622 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.max_usage_in_bytes", buf);
623 cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_MAX_USAGE);
624 cft->write = hugetlb_cgroup_reset;
625 cft->read_u64 = hugetlb_cgroup_read_u64;
626
Aneesh Kumar K.Vabb82062012-07-31 16:42:24 -0700627 /* Add the failcntfile */
Mina Almasrycdc2fcf2020-04-01 21:11:11 -0700628 cft = &h->cgroup_files_legacy[6];
Aneesh Kumar K.Vabb82062012-07-31 16:42:24 -0700629 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.failcnt", buf);
Mina Almasrycdc2fcf2020-04-01 21:11:11 -0700630 cft->private = MEMFILE_PRIVATE(idx, RES_FAILCNT);
631 cft->write = hugetlb_cgroup_reset;
632 cft->read_u64 = hugetlb_cgroup_read_u64;
633
634 /* Add the reservation failcntfile */
635 cft = &h->cgroup_files_legacy[7];
636 snprintf(cft->name, MAX_CFTYPE_NAME, "%s.rsvd.failcnt", buf);
637 cft->private = MEMFILE_PRIVATE(idx, RES_RSVD_FAILCNT);
Tejun Heo6770c642014-05-13 12:16:21 -0400638 cft->write = hugetlb_cgroup_reset;
Tejun Heo716f4792013-12-05 12:28:03 -0500639 cft->read_u64 = hugetlb_cgroup_read_u64;
Aneesh Kumar K.Vabb82062012-07-31 16:42:24 -0700640
641 /* NULL terminate the last cft */
Mina Almasrycdc2fcf2020-04-01 21:11:11 -0700642 cft = &h->cgroup_files_legacy[8];
Aneesh Kumar K.Vabb82062012-07-31 16:42:24 -0700643 memset(cft, 0, sizeof(*cft));
644
Tejun Heo2cf669a2014-07-15 11:05:09 -0400645 WARN_ON(cgroup_add_legacy_cftypes(&hugetlb_cgrp_subsys,
Giuseppe Scrivanofaced7e2019-12-16 20:38:31 +0100646 h->cgroup_files_legacy));
647}
648
649static void __init __hugetlb_cgroup_file_init(int idx)
650{
651 __hugetlb_cgroup_file_dfl_init(idx);
652 __hugetlb_cgroup_file_legacy_init(idx);
Jianguo Wu7179e7b2012-12-18 14:23:19 -0800653}
654
655void __init hugetlb_cgroup_file_init(void)
656{
657 struct hstate *h;
658
659 for_each_hstate(h) {
660 /*
661 * Add cgroup control files only if the huge page consists
662 * of more than two normal pages. This is because we use
Kirill A. Shutemov1d798ca2015-11-06 16:29:54 -0800663 * page[2].private for storing cgroup details.
Jianguo Wu7179e7b2012-12-18 14:23:19 -0800664 */
665 if (huge_page_order(h) >= HUGETLB_CGROUP_MIN_ORDER)
666 __hugetlb_cgroup_file_init(hstate_index(h));
667 }
Aneesh Kumar K.Vabb82062012-07-31 16:42:24 -0700668}
669
Aneesh Kumar K.V75754682012-07-31 16:42:36 -0700670/*
671 * hugetlb_lock will make sure a parallel cgroup rmdir won't happen
672 * when we migrate hugepages
673 */
Aneesh Kumar K.V8e6ac7f2012-07-31 16:42:27 -0700674void hugetlb_cgroup_migrate(struct page *oldhpage, struct page *newhpage)
675{
676 struct hugetlb_cgroup *h_cg;
Aneesh Kumar K.V94ae8ba2012-07-31 16:42:35 -0700677 struct hstate *h = page_hstate(oldhpage);
Aneesh Kumar K.V8e6ac7f2012-07-31 16:42:27 -0700678
679 if (hugetlb_cgroup_disabled())
680 return;
681
Sasha Levin309381fea2014-01-23 15:52:54 -0800682 VM_BUG_ON_PAGE(!PageHuge(oldhpage), oldhpage);
Aneesh Kumar K.V8e6ac7f2012-07-31 16:42:27 -0700683 spin_lock(&hugetlb_lock);
684 h_cg = hugetlb_cgroup_from_page(oldhpage);
685 set_hugetlb_cgroup(oldhpage, NULL);
Aneesh Kumar K.V8e6ac7f2012-07-31 16:42:27 -0700686
687 /* move the h_cg details to new cgroup */
688 set_hugetlb_cgroup(newhpage, h_cg);
Aneesh Kumar K.V94ae8ba2012-07-31 16:42:35 -0700689 list_move(&newhpage->lru, &h->hugepage_activelist);
Aneesh Kumar K.V8e6ac7f2012-07-31 16:42:27 -0700690 spin_unlock(&hugetlb_lock);
Aneesh Kumar K.V8e6ac7f2012-07-31 16:42:27 -0700691 return;
692}
693
Giuseppe Scrivanofaced7e2019-12-16 20:38:31 +0100694static struct cftype hugetlb_files[] = {
695 {} /* terminate */
696};
697
Tejun Heo073219e2014-02-08 10:36:58 -0500698struct cgroup_subsys hugetlb_cgrp_subsys = {
Tejun Heo92fb9742012-11-19 08:13:38 -0800699 .css_alloc = hugetlb_cgroup_css_alloc,
700 .css_offline = hugetlb_cgroup_css_offline,
701 .css_free = hugetlb_cgroup_css_free,
Giuseppe Scrivanofaced7e2019-12-16 20:38:31 +0100702 .dfl_cftypes = hugetlb_files,
703 .legacy_cftypes = hugetlb_files,
Aneesh Kumar K.V2bc64a22012-07-31 16:42:12 -0700704};