blob: 7fb7caa55a3d2068d009f5c8e9e0f10aa6b9f303 [file] [log] [blame]
Greg Kroah-Hartmanb2441312017-11-01 15:07:57 +01001/* SPDX-License-Identifier: GPL-2.0 */
Vivek Goyal31e4c282009-12-03 12:59:42 -05002#ifndef _BLK_CGROUP_H
3#define _BLK_CGROUP_H
4/*
5 * Common Block IO controller cgroup interface
6 *
7 * Based on ideas and code from CFQ, CFS and BFQ:
8 * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
9 *
10 * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
11 * Paolo Valente <paolo.valente@unimore.it>
12 *
13 * Copyright (C) 2009 Vivek Goyal <vgoyal@redhat.com>
14 * Nauman Rafique <nauman@google.com>
15 */
16
17#include <linux/cgroup.h>
Tejun Heof7331642019-11-07 11:18:03 -080018#include <linux/percpu.h>
Tejun Heo24bdb8e2015-08-18 14:55:22 -070019#include <linux/percpu_counter.h>
Tejun Heof7331642019-11-07 11:18:03 -080020#include <linux/u64_stats_sync.h>
Tejun Heo829fdb52012-04-01 14:38:43 -070021#include <linux/seq_file.h>
Tejun Heoa6371202012-04-19 16:29:24 -070022#include <linux/radix-tree.h>
Tejun Heoa0516612012-06-26 15:05:44 -070023#include <linux/blkdev.h>
Tejun Heoa5049a82014-06-19 17:42:57 -040024#include <linux/atomic.h>
Shaohua Li902ec5b2017-09-14 14:02:06 -070025#include <linux/kthread.h>
Dennis Zhou5cdf2e32018-12-05 12:10:31 -050026#include <linux/fs.h>
Vivek Goyal31e4c282009-12-03 12:59:42 -050027
Tejun Heo24bdb8e2015-08-18 14:55:22 -070028/* percpu_counter batch for blkg_[rw]stats, per-cpu drift doesn't matter */
29#define BLKG_STAT_CPU_BATCH (INT_MAX / 2)
30
Vivek Goyal9355aed2010-10-01 21:16:41 +020031/* Max limits for throttle policy */
32#define THROTL_IOPS_MAX UINT_MAX
33
Tejun Heof48ec1d2012-04-13 13:11:25 -070034#ifdef CONFIG_BLK_CGROUP
35
Tejun Heof7331642019-11-07 11:18:03 -080036enum blkg_iostat_type {
37 BLKG_IOSTAT_READ,
38 BLKG_IOSTAT_WRITE,
39 BLKG_IOSTAT_DISCARD,
40
41 BLKG_IOSTAT_NR,
42};
43
Tejun Heoa6371202012-04-19 16:29:24 -070044struct blkcg_gq;
45
Tejun Heo3c798392012-04-16 13:57:25 -070046struct blkcg {
Tejun Heo36558c82012-04-16 13:57:24 -070047 struct cgroup_subsys_state css;
48 spinlock_t lock;
Tejun Heod866dbf2019-07-24 10:37:22 -070049 refcount_t online_pin;
Tejun Heoa6371202012-04-19 16:29:24 -070050
51 struct radix_tree_root blkg_tree;
Bart Van Assche55679c82016-09-23 09:07:56 -070052 struct blkcg_gq __rcu *blkg_hint;
Tejun Heo36558c82012-04-16 13:57:24 -070053 struct hlist_head blkg_list;
Tejun Heo9a9e8a22012-03-19 15:10:56 -070054
Tejun Heo81437642015-08-18 14:55:15 -070055 struct blkcg_policy_data *cpd[BLKCG_MAX_POLS];
Tejun Heo52ebea72015-05-22 17:13:37 -040056
Tejun Heo7876f932015-07-09 16:39:49 -040057 struct list_head all_blkcgs_node;
Tejun Heo52ebea72015-05-22 17:13:37 -040058#ifdef CONFIG_CGROUP_WRITEBACK
59 struct list_head cgwb_list;
60#endif
Vivek Goyal31e4c282009-12-03 12:59:42 -050061};
62
Tejun Heof7331642019-11-07 11:18:03 -080063struct blkg_iostat {
64 u64 bytes[BLKG_IOSTAT_NR];
65 u64 ios[BLKG_IOSTAT_NR];
66};
67
68struct blkg_iostat_set {
69 struct u64_stats_sync sync;
70 struct blkg_iostat cur;
71 struct blkg_iostat last;
72};
73
Tejun Heoe6269c42015-08-18 14:55:21 -070074/*
Tejun Heof95a04a2012-04-16 13:57:26 -070075 * A blkcg_gq (blkg) is association between a block cgroup (blkcg) and a
76 * request_queue (q). This is used by blkcg policies which need to track
77 * information per blkcg - q pair.
78 *
Tejun Heo001bea72015-08-18 14:55:11 -070079 * There can be multiple active blkcg policies and each blkg:policy pair is
80 * represented by a blkg_policy_data which is allocated and freed by each
81 * policy's pd_alloc/free_fn() methods. A policy can allocate private data
82 * area by allocating larger data structure which embeds blkg_policy_data
83 * at the beginning.
Tejun Heof95a04a2012-04-16 13:57:26 -070084 */
Tejun Heo03814112012-03-05 13:15:14 -080085struct blkg_policy_data {
Tejun Heob276a872013-01-09 08:05:12 -080086 /* the blkg and policy id this per-policy data belongs to */
Tejun Heo3c798392012-04-16 13:57:25 -070087 struct blkcg_gq *blkg;
Tejun Heob276a872013-01-09 08:05:12 -080088 int plid;
Tejun Heo03814112012-03-05 13:15:14 -080089};
90
Arianna Avanzinie48453c2015-06-05 23:38:42 +020091/*
Tejun Heoe4a9bde2015-08-18 14:55:16 -070092 * Policies that need to keep per-blkcg data which is independent from any
93 * request_queue associated to it should implement cpd_alloc/free_fn()
94 * methods. A policy can allocate private data area by allocating larger
95 * data structure which embeds blkcg_policy_data at the beginning.
96 * cpd_init() is invoked to let each policy handle per-blkcg data.
Arianna Avanzinie48453c2015-06-05 23:38:42 +020097 */
98struct blkcg_policy_data {
Tejun Heo81437642015-08-18 14:55:15 -070099 /* the blkcg and policy id this per-policy data belongs to */
100 struct blkcg *blkcg;
Arianna Avanzinie48453c2015-06-05 23:38:42 +0200101 int plid;
Arianna Avanzinie48453c2015-06-05 23:38:42 +0200102};
103
Tejun Heo3c798392012-04-16 13:57:25 -0700104/* association between a blk cgroup and a request queue */
105struct blkcg_gq {
Tejun Heoc875f4d2012-03-05 13:15:22 -0800106 /* Pointer to the associated request_queue */
Tejun Heo36558c82012-04-16 13:57:24 -0700107 struct request_queue *q;
108 struct list_head q_node;
109 struct hlist_node blkcg_node;
Tejun Heo3c798392012-04-16 13:57:25 -0700110 struct blkcg *blkcg;
Tejun Heo3c547862013-01-09 08:05:10 -0800111
Tejun Heoce7acfe2015-05-22 17:13:38 -0400112 /*
113 * Each blkg gets congested separately and the congestion state is
114 * propagated to the matching bdi_writeback_congested.
115 */
116 struct bdi_writeback_congested *wb_congested;
117
Tejun Heo3c547862013-01-09 08:05:10 -0800118 /* all non-root blkcg_gq's are guaranteed to have access to parent */
119 struct blkcg_gq *parent;
120
Tejun Heo1adaf3d2012-03-05 13:15:15 -0800121 /* reference count */
Dennis Zhou7fcf2b02018-12-05 12:10:38 -0500122 struct percpu_ref refcnt;
Vivek Goyal22084192009-12-03 12:59:49 -0500123
Tejun Heof427d902013-01-09 08:05:12 -0800124 /* is this blkg online? protected by both blkcg and q locks */
125 bool online;
126
Tejun Heof7331642019-11-07 11:18:03 -0800127 struct blkg_iostat_set __percpu *iostat_cpu;
128 struct blkg_iostat_set iostat;
Tejun Heo77ea7332015-08-18 14:55:24 -0700129
Tejun Heo36558c82012-04-16 13:57:24 -0700130 struct blkg_policy_data *pd[BLKCG_MAX_POLS];
Tejun Heo1adaf3d2012-03-05 13:15:15 -0800131
Tejun Heod3f77df2019-06-27 13:39:52 -0700132 spinlock_t async_bio_lock;
133 struct bio_list async_bios;
134 struct work_struct async_bio_work;
Josef Bacikd09d8df2018-07-03 11:14:55 -0400135
136 atomic_t use_delay;
137 atomic64_t delay_nsec;
138 atomic64_t delay_start;
139 u64 last_delay;
140 int last_use;
Tejun Heod3f77df2019-06-27 13:39:52 -0700141
142 struct rcu_head rcu_head;
Vivek Goyal31e4c282009-12-03 12:59:42 -0500143};
144
Tejun Heoe4a9bde2015-08-18 14:55:16 -0700145typedef struct blkcg_policy_data *(blkcg_pol_alloc_cpd_fn)(gfp_t gfp);
Tejun Heo81437642015-08-18 14:55:15 -0700146typedef void (blkcg_pol_init_cpd_fn)(struct blkcg_policy_data *cpd);
Tejun Heoe4a9bde2015-08-18 14:55:16 -0700147typedef void (blkcg_pol_free_cpd_fn)(struct blkcg_policy_data *cpd);
Tejun Heo69d7fde2015-08-18 14:55:36 -0700148typedef void (blkcg_pol_bind_cpd_fn)(struct blkcg_policy_data *cpd);
Tejun Heocf09a8e2019-08-28 15:05:51 -0700149typedef struct blkg_policy_data *(blkcg_pol_alloc_pd_fn)(gfp_t gfp,
150 struct request_queue *q, struct blkcg *blkcg);
Tejun Heoa9520cd2015-08-18 14:55:14 -0700151typedef void (blkcg_pol_init_pd_fn)(struct blkg_policy_data *pd);
152typedef void (blkcg_pol_online_pd_fn)(struct blkg_policy_data *pd);
153typedef void (blkcg_pol_offline_pd_fn)(struct blkg_policy_data *pd);
Tejun Heo001bea72015-08-18 14:55:11 -0700154typedef void (blkcg_pol_free_pd_fn)(struct blkg_policy_data *pd);
Tejun Heoa9520cd2015-08-18 14:55:14 -0700155typedef void (blkcg_pol_reset_pd_stats_fn)(struct blkg_policy_data *pd);
Josef Bacik903d23f2018-07-03 11:14:52 -0400156typedef size_t (blkcg_pol_stat_pd_fn)(struct blkg_policy_data *pd, char *buf,
157 size_t size);
Vivek Goyal3e252062009-12-04 10:36:42 -0500158
Tejun Heo3c798392012-04-16 13:57:25 -0700159struct blkcg_policy {
Tejun Heo36558c82012-04-16 13:57:24 -0700160 int plid;
Tejun Heo36558c82012-04-16 13:57:24 -0700161 /* cgroup files for the policy */
Tejun Heo2ee867dc2015-08-18 14:55:34 -0700162 struct cftype *dfl_cftypes;
Tejun Heo880f50e2015-08-18 14:55:30 -0700163 struct cftype *legacy_cftypes;
Tejun Heof9fcc2d2012-04-16 13:57:27 -0700164
165 /* operations */
Tejun Heoe4a9bde2015-08-18 14:55:16 -0700166 blkcg_pol_alloc_cpd_fn *cpd_alloc_fn;
Arianna Avanzinie48453c2015-06-05 23:38:42 +0200167 blkcg_pol_init_cpd_fn *cpd_init_fn;
Tejun Heoe4a9bde2015-08-18 14:55:16 -0700168 blkcg_pol_free_cpd_fn *cpd_free_fn;
Tejun Heo69d7fde2015-08-18 14:55:36 -0700169 blkcg_pol_bind_cpd_fn *cpd_bind_fn;
Tejun Heoe4a9bde2015-08-18 14:55:16 -0700170
Tejun Heo001bea72015-08-18 14:55:11 -0700171 blkcg_pol_alloc_pd_fn *pd_alloc_fn;
Tejun Heof9fcc2d2012-04-16 13:57:27 -0700172 blkcg_pol_init_pd_fn *pd_init_fn;
Tejun Heof427d902013-01-09 08:05:12 -0800173 blkcg_pol_online_pd_fn *pd_online_fn;
174 blkcg_pol_offline_pd_fn *pd_offline_fn;
Tejun Heo001bea72015-08-18 14:55:11 -0700175 blkcg_pol_free_pd_fn *pd_free_fn;
Tejun Heof9fcc2d2012-04-16 13:57:27 -0700176 blkcg_pol_reset_pd_stats_fn *pd_reset_stats_fn;
Josef Bacik903d23f2018-07-03 11:14:52 -0400177 blkcg_pol_stat_pd_fn *pd_stat_fn;
Vivek Goyal3e252062009-12-04 10:36:42 -0500178};
179
Tejun Heo3c798392012-04-16 13:57:25 -0700180extern struct blkcg blkcg_root;
Tejun Heo496d5e72015-05-22 17:13:21 -0400181extern struct cgroup_subsys_state * const blkcg_root_css;
Tejun Heo07b0fde2019-07-16 07:58:31 -0700182extern bool blkcg_debug_stats;
Tejun Heo36558c82012-04-16 13:57:24 -0700183
Tejun Heo24f29042015-08-18 14:55:17 -0700184struct blkcg_gq *blkg_lookup_slowpath(struct blkcg *blkcg,
185 struct request_queue *q, bool update_hint);
Dennis Zhoub9789622018-12-05 12:10:27 -0500186struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg,
187 struct request_queue *q);
Tejun Heo3c798392012-04-16 13:57:25 -0700188struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
189 struct request_queue *q);
Tejun Heo36558c82012-04-16 13:57:24 -0700190int blkcg_init_queue(struct request_queue *q);
Tejun Heo36558c82012-04-16 13:57:24 -0700191void blkcg_exit_queue(struct request_queue *q);
Tejun Heo5efd6112012-03-05 13:15:12 -0800192
Vivek Goyal3e252062009-12-04 10:36:42 -0500193/* Blkio controller policy registration */
Jens Axboed5bf0292014-06-22 16:31:56 -0600194int blkcg_policy_register(struct blkcg_policy *pol);
Tejun Heo3c798392012-04-16 13:57:25 -0700195void blkcg_policy_unregister(struct blkcg_policy *pol);
Tejun Heo36558c82012-04-16 13:57:24 -0700196int blkcg_activate_policy(struct request_queue *q,
Tejun Heo3c798392012-04-16 13:57:25 -0700197 const struct blkcg_policy *pol);
Tejun Heo36558c82012-04-16 13:57:24 -0700198void blkcg_deactivate_policy(struct request_queue *q,
Tejun Heo3c798392012-04-16 13:57:25 -0700199 const struct blkcg_policy *pol);
Vivek Goyal3e252062009-12-04 10:36:42 -0500200
Tejun Heodd165eb2015-08-18 14:55:33 -0700201const char *blkg_dev_name(struct blkcg_gq *blkg);
Tejun Heo3c798392012-04-16 13:57:25 -0700202void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg,
Tejun Heof95a04a2012-04-16 13:57:26 -0700203 u64 (*prfill)(struct seq_file *,
204 struct blkg_policy_data *, int),
Tejun Heo3c798392012-04-16 13:57:25 -0700205 const struct blkcg_policy *pol, int data,
Tejun Heoec399342012-04-13 13:11:27 -0700206 bool show_total);
Tejun Heof95a04a2012-04-16 13:57:26 -0700207u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v);
Tejun Heo16b3de62013-01-09 08:05:12 -0800208
Tejun Heo829fdb52012-04-01 14:38:43 -0700209struct blkg_conf_ctx {
Tejun Heo36558c82012-04-16 13:57:24 -0700210 struct gendisk *disk;
Tejun Heo3c798392012-04-16 13:57:25 -0700211 struct blkcg_gq *blkg;
Tejun Heo36aa9e52015-08-18 14:55:31 -0700212 char *body;
Tejun Heo829fdb52012-04-01 14:38:43 -0700213};
214
Tejun Heo015d2542019-08-28 15:05:53 -0700215struct gendisk *blkcg_conf_get_disk(char **inputp);
Tejun Heo3c798392012-04-16 13:57:25 -0700216int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
Tejun Heo36aa9e52015-08-18 14:55:31 -0700217 char *input, struct blkg_conf_ctx *ctx);
Tejun Heo829fdb52012-04-01 14:38:43 -0700218void blkg_conf_finish(struct blkg_conf_ctx *ctx);
219
Dennis Zhou0fe061b2018-12-05 12:10:26 -0500220/**
221 * blkcg_css - find the current css
222 *
223 * Find the css associated with either the kthread or the current task.
224 * This may return a dying css, so it is up to the caller to use tryget logic
225 * to confirm it is alive and well.
226 */
227static inline struct cgroup_subsys_state *blkcg_css(void)
228{
229 struct cgroup_subsys_state *css;
230
231 css = kthread_blkcg();
232 if (css)
233 return css;
234 return task_css(current, io_cgrp_id);
235}
236
Tejun Heoa7c6d552013-08-08 20:11:23 -0400237static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css)
238{
239 return css ? container_of(css, struct blkcg, css) : NULL;
240}
241
Dennis Zhou0fe061b2018-12-05 12:10:26 -0500242/**
243 * __bio_blkcg - internal, inconsistent version to get blkcg
244 *
245 * DO NOT USE.
246 * This function is inconsistent and consequently is dangerous to use. The
247 * first part of the function returns a blkcg where a reference is owned by the
248 * bio. This means it does not need to be rcu protected as it cannot go away
249 * with the bio owning a reference to it. However, the latter potentially gets
250 * it from task_css(). This can race against task migration and the cgroup
251 * dying. It is also semantically different as it must be called rcu protected
252 * and is susceptible to failure when trying to get a reference to it.
253 * Therefore, it is not ok to assume that *_get() will always succeed on the
254 * blkcg returned here.
255 */
256static inline struct blkcg *__bio_blkcg(struct bio *bio)
Dennis Zhou (Facebook)27e6fa92018-09-11 14:41:26 -0400257{
Dennis Zhoudb6638d2018-12-05 12:10:35 -0500258 if (bio && bio->bi_blkg)
259 return bio->bi_blkg->blkcg;
Dennis Zhou0fe061b2018-12-05 12:10:26 -0500260 return css_to_blkcg(blkcg_css());
261}
262
263/**
264 * bio_blkcg - grab the blkcg associated with a bio
265 * @bio: target bio
266 *
267 * This returns the blkcg associated with a bio, %NULL if not associated.
268 * Callers are expected to either handle %NULL or know association has been
269 * done prior to calling this.
270 */
271static inline struct blkcg *bio_blkcg(struct bio *bio)
272{
Dennis Zhoudb6638d2018-12-05 12:10:35 -0500273 if (bio && bio->bi_blkg)
274 return bio->bi_blkg->blkcg;
Dennis Zhou0fe061b2018-12-05 12:10:26 -0500275 return NULL;
Tejun Heofd383c22015-05-22 17:13:23 -0400276}
277
Josef Bacikd09d8df2018-07-03 11:14:55 -0400278static inline bool blk_cgroup_congested(void)
279{
280 struct cgroup_subsys_state *css;
281 bool ret = false;
282
283 rcu_read_lock();
284 css = kthread_blkcg();
285 if (!css)
286 css = task_css(current, io_cgrp_id);
287 while (css) {
288 if (atomic_read(&css->cgroup->congestion_count)) {
289 ret = true;
290 break;
291 }
292 css = css->parent;
293 }
294 rcu_read_unlock();
295 return ret;
296}
297
Tejun Heo03814112012-03-05 13:15:14 -0800298/**
Josef Bacikc7c98fd2018-07-03 11:14:51 -0400299 * bio_issue_as_root_blkg - see if this bio needs to be issued as root blkg
300 * @return: true if this bio needs to be submitted with the root blkg context.
301 *
302 * In order to avoid priority inversions we sometimes need to issue a bio as if
303 * it were attached to the root blkg, and then backcharge to the actual owning
304 * blkg. The idea is we do bio_blkcg() to look up the actual context for the
305 * bio and attach the appropriate blkg to the bio. Then we call this helper and
306 * if it is true run with the root blkg for that queue and then do any
307 * backcharging to the originating cgroup once the io is complete.
308 */
309static inline bool bio_issue_as_root_blkg(struct bio *bio)
310{
Josef Bacik0d1e0c72018-07-03 11:14:53 -0400311 return (bio->bi_opf & (REQ_META | REQ_SWAP)) != 0;
Josef Bacikc7c98fd2018-07-03 11:14:51 -0400312}
313
314/**
Tejun Heo3c547862013-01-09 08:05:10 -0800315 * blkcg_parent - get the parent of a blkcg
316 * @blkcg: blkcg of interest
317 *
318 * Return the parent blkcg of @blkcg. Can be called anytime.
319 */
320static inline struct blkcg *blkcg_parent(struct blkcg *blkcg)
321{
Tejun Heo5c9d5352014-05-16 13:22:48 -0400322 return css_to_blkcg(blkcg->css.parent);
Tejun Heo3c547862013-01-09 08:05:10 -0800323}
324
325/**
Tejun Heo24f29042015-08-18 14:55:17 -0700326 * __blkg_lookup - internal version of blkg_lookup()
327 * @blkcg: blkcg of interest
328 * @q: request_queue of interest
329 * @update_hint: whether to update lookup hint with the result or not
330 *
331 * This is internal version and shouldn't be used by policy
332 * implementations. Looks up blkgs for the @blkcg - @q pair regardless of
333 * @q's bypass state. If @update_hint is %true, the caller should be
334 * holding @q->queue_lock and lookup hint is updated on success.
335 */
336static inline struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg,
337 struct request_queue *q,
338 bool update_hint)
339{
340 struct blkcg_gq *blkg;
341
Tejun Heo85b6bc92015-08-18 14:55:18 -0700342 if (blkcg == &blkcg_root)
343 return q->root_blkg;
344
Tejun Heo24f29042015-08-18 14:55:17 -0700345 blkg = rcu_dereference(blkcg->blkg_hint);
346 if (blkg && blkg->q == q)
347 return blkg;
348
349 return blkg_lookup_slowpath(blkcg, q, update_hint);
350}
351
352/**
353 * blkg_lookup - lookup blkg for the specified blkcg - q pair
354 * @blkcg: blkcg of interest
355 * @q: request_queue of interest
356 *
357 * Lookup blkg for the @blkcg - @q pair. This function should be called
Bart Van Assche012d4a62019-08-01 15:39:07 -0700358 * under RCU read lock.
Tejun Heo24f29042015-08-18 14:55:17 -0700359 */
360static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg,
361 struct request_queue *q)
362{
363 WARN_ON_ONCE(!rcu_read_lock_held());
Tejun Heo24f29042015-08-18 14:55:17 -0700364 return __blkg_lookup(blkcg, q, false);
365}
366
367/**
Bart Van Asscheb86d8652018-08-10 13:28:07 -0700368 * blk_queue_root_blkg - return blkg for the (blkcg_root, @q) pair
Bart Van Assche6bad9b22018-08-09 07:53:36 -0700369 * @q: request_queue of interest
370 *
371 * Lookup blkg for @q at the root level. See also blkg_lookup().
372 */
Bart Van Asscheb86d8652018-08-10 13:28:07 -0700373static inline struct blkcg_gq *blk_queue_root_blkg(struct request_queue *q)
Bart Van Assche6bad9b22018-08-09 07:53:36 -0700374{
Bart Van Asscheb86d8652018-08-10 13:28:07 -0700375 return q->root_blkg;
Bart Van Assche6bad9b22018-08-09 07:53:36 -0700376}
377
378/**
Tejun Heo03814112012-03-05 13:15:14 -0800379 * blkg_to_pdata - get policy private data
380 * @blkg: blkg of interest
381 * @pol: policy of interest
382 *
383 * Return pointer to private data associated with the @blkg-@pol pair.
384 */
Tejun Heof95a04a2012-04-16 13:57:26 -0700385static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg,
386 struct blkcg_policy *pol)
Tejun Heo03814112012-03-05 13:15:14 -0800387{
Tejun Heof95a04a2012-04-16 13:57:26 -0700388 return blkg ? blkg->pd[pol->plid] : NULL;
Tejun Heo03814112012-03-05 13:15:14 -0800389}
390
Arianna Avanzinie48453c2015-06-05 23:38:42 +0200391static inline struct blkcg_policy_data *blkcg_to_cpd(struct blkcg *blkcg,
392 struct blkcg_policy *pol)
393{
Tejun Heo81437642015-08-18 14:55:15 -0700394 return blkcg ? blkcg->cpd[pol->plid] : NULL;
Arianna Avanzinie48453c2015-06-05 23:38:42 +0200395}
396
Tejun Heo03814112012-03-05 13:15:14 -0800397/**
398 * pdata_to_blkg - get blkg associated with policy private data
Tejun Heof95a04a2012-04-16 13:57:26 -0700399 * @pd: policy private data of interest
Tejun Heo03814112012-03-05 13:15:14 -0800400 *
Tejun Heof95a04a2012-04-16 13:57:26 -0700401 * @pd is policy private data. Determine the blkg it's associated with.
Tejun Heo03814112012-03-05 13:15:14 -0800402 */
Tejun Heof95a04a2012-04-16 13:57:26 -0700403static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd)
Tejun Heo03814112012-03-05 13:15:14 -0800404{
Tejun Heof95a04a2012-04-16 13:57:26 -0700405 return pd ? pd->blkg : NULL;
Tejun Heo03814112012-03-05 13:15:14 -0800406}
407
Tejun Heo81437642015-08-18 14:55:15 -0700408static inline struct blkcg *cpd_to_blkcg(struct blkcg_policy_data *cpd)
409{
410 return cpd ? cpd->blkcg : NULL;
411}
412
Dennis Zhou (Facebook)59b57712018-08-31 16:22:43 -0400413extern void blkcg_destroy_blkgs(struct blkcg *blkcg);
414
Dennis Zhou (Facebook)59b57712018-08-31 16:22:43 -0400415/**
Tejun Heod866dbf2019-07-24 10:37:22 -0700416 * blkcg_pin_online - pin online state
Dennis Zhou (Facebook)59b57712018-08-31 16:22:43 -0400417 * @blkcg: blkcg of interest
418 *
Tejun Heod866dbf2019-07-24 10:37:22 -0700419 * While pinned, a blkcg is kept online. This is primarily used to
420 * impedance-match blkg and cgwb lifetimes so that blkg doesn't go offline
421 * while an associated cgwb is still active.
Dennis Zhou (Facebook)59b57712018-08-31 16:22:43 -0400422 */
Tejun Heod866dbf2019-07-24 10:37:22 -0700423static inline void blkcg_pin_online(struct blkcg *blkcg)
Dennis Zhou (Facebook)59b57712018-08-31 16:22:43 -0400424{
Tejun Heod866dbf2019-07-24 10:37:22 -0700425 refcount_inc(&blkcg->online_pin);
Dennis Zhou (Facebook)59b57712018-08-31 16:22:43 -0400426}
427
428/**
Tejun Heod866dbf2019-07-24 10:37:22 -0700429 * blkcg_unpin_online - unpin online state
Dennis Zhou (Facebook)59b57712018-08-31 16:22:43 -0400430 * @blkcg: blkcg of interest
431 *
Tejun Heod866dbf2019-07-24 10:37:22 -0700432 * This is primarily used to impedance-match blkg and cgwb lifetimes so
433 * that blkg doesn't go offline while an associated cgwb is still active.
434 * When this count goes to zero, all active cgwbs have finished so the
Dennis Zhou (Facebook)59b57712018-08-31 16:22:43 -0400435 * blkcg can continue destruction by calling blkcg_destroy_blkgs().
Dennis Zhou (Facebook)59b57712018-08-31 16:22:43 -0400436 */
Tejun Heod866dbf2019-07-24 10:37:22 -0700437static inline void blkcg_unpin_online(struct blkcg *blkcg)
Dennis Zhou (Facebook)59b57712018-08-31 16:22:43 -0400438{
Tejun Heod866dbf2019-07-24 10:37:22 -0700439 if (refcount_dec_and_test(&blkcg->online_pin))
Dennis Zhou (Facebook)59b57712018-08-31 16:22:43 -0400440 blkcg_destroy_blkgs(blkcg);
441}
442
Tejun Heo54e7ed12012-04-16 13:57:23 -0700443/**
444 * blkg_path - format cgroup path of blkg
445 * @blkg: blkg of interest
446 * @buf: target buffer
447 * @buflen: target buffer length
448 *
449 * Format the path of the cgroup of @blkg into @buf.
450 */
Tejun Heo3c798392012-04-16 13:57:25 -0700451static inline int blkg_path(struct blkcg_gq *blkg, char *buf, int buflen)
Vivek Goyalafc24d42010-04-26 19:27:56 +0200452{
Tejun Heo4c737b42016-08-10 11:23:44 -0400453 return cgroup_path(blkg->blkcg->css.cgroup, buf, buflen);
Vivek Goyalafc24d42010-04-26 19:27:56 +0200454}
455
Tejun Heo1adaf3d2012-03-05 13:15:15 -0800456/**
457 * blkg_get - get a blkg reference
458 * @blkg: blkg to get
459 *
Tejun Heoa5049a82014-06-19 17:42:57 -0400460 * The caller should be holding an existing reference.
Tejun Heo1adaf3d2012-03-05 13:15:15 -0800461 */
Tejun Heo3c798392012-04-16 13:57:25 -0700462static inline void blkg_get(struct blkcg_gq *blkg)
Tejun Heo1adaf3d2012-03-05 13:15:15 -0800463{
Dennis Zhou7fcf2b02018-12-05 12:10:38 -0500464 percpu_ref_get(&blkg->refcnt);
Tejun Heo1adaf3d2012-03-05 13:15:15 -0800465}
466
Josef Bacikd09d8df2018-07-03 11:14:55 -0400467/**
Dennis Zhou7754f662018-12-05 12:10:39 -0500468 * blkg_tryget - try and get a blkg reference
Josef Bacikd09d8df2018-07-03 11:14:55 -0400469 * @blkg: blkg to get
470 *
471 * This is for use when doing an RCU lookup of the blkg. We may be in the midst
472 * of freeing this blkg, so we can only use it if the refcnt is not zero.
473 */
Dennis Zhou7754f662018-12-05 12:10:39 -0500474static inline bool blkg_tryget(struct blkcg_gq *blkg)
Josef Bacikd09d8df2018-07-03 11:14:55 -0400475{
Dennis Zhou6ab21872018-12-19 16:43:21 -0600476 return blkg && percpu_ref_tryget(&blkg->refcnt);
Josef Bacikd09d8df2018-07-03 11:14:55 -0400477}
478
Dennis Zhoubeea9da2018-12-05 12:10:28 -0500479/**
Dennis Zhou7754f662018-12-05 12:10:39 -0500480 * blkg_tryget_closest - try and get a blkg ref on the closet blkg
Dennis Zhoubeea9da2018-12-05 12:10:28 -0500481 * @blkg: blkg to get
482 *
Dennis Zhou6ab21872018-12-19 16:43:21 -0600483 * This needs to be called rcu protected. As the failure mode here is to walk
484 * up the blkg tree, this ensure that the blkg->parent pointers are always
485 * valid. This returns the blkg that it ended up taking a reference on or %NULL
486 * if no reference was taken.
Dennis Zhoubeea9da2018-12-05 12:10:28 -0500487 */
Dennis Zhou7754f662018-12-05 12:10:39 -0500488static inline struct blkcg_gq *blkg_tryget_closest(struct blkcg_gq *blkg)
Dennis Zhoubeea9da2018-12-05 12:10:28 -0500489{
Dennis Zhou6ab21872018-12-19 16:43:21 -0600490 struct blkcg_gq *ret_blkg = NULL;
Dennis Zhoubeea9da2018-12-05 12:10:28 -0500491
Dennis Zhou6ab21872018-12-19 16:43:21 -0600492 WARN_ON_ONCE(!rcu_read_lock_held());
493
494 while (blkg) {
495 if (blkg_tryget(blkg)) {
496 ret_blkg = blkg;
497 break;
498 }
499 blkg = blkg->parent;
500 }
501
502 return ret_blkg;
Dennis Zhoubeea9da2018-12-05 12:10:28 -0500503}
Dennis Zhou (Facebook)07b05bc2018-09-11 14:41:28 -0400504
Tejun Heo1adaf3d2012-03-05 13:15:15 -0800505/**
506 * blkg_put - put a blkg reference
507 * @blkg: blkg to put
Tejun Heo1adaf3d2012-03-05 13:15:15 -0800508 */
Tejun Heo3c798392012-04-16 13:57:25 -0700509static inline void blkg_put(struct blkcg_gq *blkg)
Tejun Heo1adaf3d2012-03-05 13:15:15 -0800510{
Dennis Zhou7fcf2b02018-12-05 12:10:38 -0500511 percpu_ref_put(&blkg->refcnt);
Tejun Heo1adaf3d2012-03-05 13:15:15 -0800512}
513
Tejun Heodd4a4ff2013-05-14 13:52:30 -0700514/**
515 * blkg_for_each_descendant_pre - pre-order walk of a blkg's descendants
516 * @d_blkg: loop cursor pointing to the current descendant
Tejun Heo492eb212013-08-08 20:11:25 -0400517 * @pos_css: used for iteration
Tejun Heodd4a4ff2013-05-14 13:52:30 -0700518 * @p_blkg: target blkg to walk descendants of
519 *
520 * Walk @c_blkg through the descendants of @p_blkg. Must be used with RCU
521 * read locked. If called under either blkcg or queue lock, the iteration
522 * is guaranteed to include all and only online blkgs. The caller may
Tejun Heo492eb212013-08-08 20:11:25 -0400523 * update @pos_css by calling css_rightmost_descendant() to skip subtree.
Tejun Heobd8815a2013-08-08 20:11:27 -0400524 * @p_blkg is included in the iteration and the first node to be visited.
Tejun Heodd4a4ff2013-05-14 13:52:30 -0700525 */
Tejun Heo492eb212013-08-08 20:11:25 -0400526#define blkg_for_each_descendant_pre(d_blkg, pos_css, p_blkg) \
527 css_for_each_descendant_pre((pos_css), &(p_blkg)->blkcg->css) \
528 if (((d_blkg) = __blkg_lookup(css_to_blkcg(pos_css), \
Tejun Heodd4a4ff2013-05-14 13:52:30 -0700529 (p_blkg)->q, false)))
530
Tejun Heoedcb0722012-04-01 14:38:42 -0700531/**
Tejun Heoaa539cb2013-05-14 13:52:31 -0700532 * blkg_for_each_descendant_post - post-order walk of a blkg's descendants
533 * @d_blkg: loop cursor pointing to the current descendant
Tejun Heo492eb212013-08-08 20:11:25 -0400534 * @pos_css: used for iteration
Tejun Heoaa539cb2013-05-14 13:52:31 -0700535 * @p_blkg: target blkg to walk descendants of
536 *
537 * Similar to blkg_for_each_descendant_pre() but performs post-order
Tejun Heobd8815a2013-08-08 20:11:27 -0400538 * traversal instead. Synchronization rules are the same. @p_blkg is
539 * included in the iteration and the last node to be visited.
Tejun Heoaa539cb2013-05-14 13:52:31 -0700540 */
Tejun Heo492eb212013-08-08 20:11:25 -0400541#define blkg_for_each_descendant_post(d_blkg, pos_css, p_blkg) \
542 css_for_each_descendant_post((pos_css), &(p_blkg)->blkcg->css) \
543 if (((d_blkg) = __blkg_lookup(css_to_blkcg(pos_css), \
Tejun Heoaa539cb2013-05-14 13:52:31 -0700544 (p_blkg)->q, false)))
545
Tejun Heoae118892015-08-18 14:55:20 -0700546#ifdef CONFIG_BLK_DEV_THROTTLING
547extern bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
548 struct bio *bio);
549#else
550static inline bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
551 struct bio *bio) { return false; }
552#endif
553
Tejun Heod3f77df2019-06-27 13:39:52 -0700554bool __blkcg_punt_bio_submit(struct bio *bio);
555
556static inline bool blkcg_punt_bio_submit(struct bio *bio)
557{
558 if (bio->bi_opf & REQ_CGROUP_PUNT)
559 return __blkcg_punt_bio_submit(bio);
560 else
561 return false;
562}
Dennis Zhoue439bed2018-12-05 12:10:32 -0500563
564static inline void blkcg_bio_issue_init(struct bio *bio)
565{
566 bio_issue_init(&bio->bi_issue, bio_sectors(bio));
567}
568
Tejun Heoae118892015-08-18 14:55:20 -0700569static inline bool blkcg_bio_issue_check(struct request_queue *q,
570 struct bio *bio)
571{
Tejun Heoae118892015-08-18 14:55:20 -0700572 struct blkcg_gq *blkg;
573 bool throtl = false;
574
Dennis Zhou4705de72018-12-06 12:49:38 -0500575 rcu_read_lock();
576
Dennis Zhou5cdf2e32018-12-05 12:10:31 -0500577 if (!bio->bi_blkg) {
578 char b[BDEVNAME_SIZE];
Tejun Heoae118892015-08-18 14:55:20 -0700579
Dennis Zhou5cdf2e32018-12-05 12:10:31 -0500580 WARN_ONCE(1,
581 "no blkg associated for bio on block-device: %s\n",
582 bio_devname(bio, b));
583 bio_associate_blkg(bio);
584 }
585
586 blkg = bio->bi_blkg;
Tejun Heoae118892015-08-18 14:55:20 -0700587
588 throtl = blk_throtl_bio(q, blkg, bio);
589
Tejun Heo77ea7332015-08-18 14:55:24 -0700590 if (!throtl) {
Tejun Heof7331642019-11-07 11:18:03 -0800591 struct blkg_iostat_set *bis;
592 int rwd, cpu;
593
594 if (op_is_discard(bio->bi_opf))
595 rwd = BLKG_IOSTAT_DISCARD;
596 else if (op_is_write(bio->bi_opf))
597 rwd = BLKG_IOSTAT_WRITE;
598 else
599 rwd = BLKG_IOSTAT_READ;
600
601 cpu = get_cpu();
602 bis = per_cpu_ptr(blkg->iostat_cpu, cpu);
603 u64_stats_update_begin(&bis->sync);
604
Josef Bacikc454edc2018-07-30 10:10:01 -0400605 /*
606 * If the bio is flagged with BIO_QUEUE_ENTERED it means this
607 * is a split bio and we would have already accounted for the
608 * size of the bio.
609 */
610 if (!bio_flagged(bio, BIO_QUEUE_ENTERED))
Tejun Heof7331642019-11-07 11:18:03 -0800611 bis->cur.bytes[rwd] += bio->bi_iter.bi_size;
612 bis->cur.ios[rwd]++;
613
614 u64_stats_update_end(&bis->sync);
Tejun Heo496074f2019-11-14 14:31:28 -0800615 if (cgroup_subsys_on_dfl(io_cgrp_subsys))
616 cgroup_rstat_updated(blkg->blkcg->css.cgroup, cpu);
Tejun Heof7331642019-11-07 11:18:03 -0800617 put_cpu();
Tejun Heo77ea7332015-08-18 14:55:24 -0700618 }
619
Dennis Zhoue439bed2018-12-05 12:10:32 -0500620 blkcg_bio_issue_init(bio);
621
Dennis Zhou4705de72018-12-06 12:49:38 -0500622 rcu_read_unlock();
Tejun Heoae118892015-08-18 14:55:20 -0700623 return !throtl;
624}
625
Josef Bacikd09d8df2018-07-03 11:14:55 -0400626static inline void blkcg_use_delay(struct blkcg_gq *blkg)
627{
628 if (atomic_add_return(1, &blkg->use_delay) == 1)
629 atomic_inc(&blkg->blkcg->css.cgroup->congestion_count);
630}
631
632static inline int blkcg_unuse_delay(struct blkcg_gq *blkg)
633{
634 int old = atomic_read(&blkg->use_delay);
635
636 if (old == 0)
637 return 0;
638
639 /*
640 * We do this song and dance because we can race with somebody else
641 * adding or removing delay. If we just did an atomic_dec we'd end up
642 * negative and we'd already be in trouble. We need to subtract 1 and
643 * then check to see if we were the last delay so we can drop the
644 * congestion count on the cgroup.
645 */
646 while (old) {
647 int cur = atomic_cmpxchg(&blkg->use_delay, old, old - 1);
648 if (cur == old)
649 break;
650 old = cur;
651 }
652
653 if (old == 0)
654 return 0;
655 if (old == 1)
656 atomic_dec(&blkg->blkcg->css.cgroup->congestion_count);
657 return 1;
658}
659
660static inline void blkcg_clear_delay(struct blkcg_gq *blkg)
661{
662 int old = atomic_read(&blkg->use_delay);
663 if (!old)
664 return;
665 /* We only want 1 person clearing the congestion count for this blkg. */
666 while (old) {
667 int cur = atomic_cmpxchg(&blkg->use_delay, old, 0);
668 if (cur == old) {
669 atomic_dec(&blkg->blkcg->css.cgroup->congestion_count);
670 break;
671 }
672 old = cur;
673 }
674}
675
676void blkcg_add_delay(struct blkcg_gq *blkg, u64 now, u64 delta);
677void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay);
678void blkcg_maybe_throttle_current(void);
Tejun Heo36558c82012-04-16 13:57:24 -0700679#else /* CONFIG_BLK_CGROUP */
680
Tejun Heoefa7d1c2015-05-22 17:13:18 -0400681struct blkcg {
682};
Jens Axboe2f5ea472009-12-03 21:06:43 +0100683
Tejun Heof95a04a2012-04-16 13:57:26 -0700684struct blkg_policy_data {
685};
686
Arianna Avanzinie48453c2015-06-05 23:38:42 +0200687struct blkcg_policy_data {
688};
689
Tejun Heo3c798392012-04-16 13:57:25 -0700690struct blkcg_gq {
Jens Axboe2f5ea472009-12-03 21:06:43 +0100691};
692
Tejun Heo3c798392012-04-16 13:57:25 -0700693struct blkcg_policy {
Vivek Goyal3e252062009-12-04 10:36:42 -0500694};
695
Tejun Heo496d5e72015-05-22 17:13:21 -0400696#define blkcg_root_css ((struct cgroup_subsys_state *)ERR_PTR(-EINVAL))
697
Josef Bacikd09d8df2018-07-03 11:14:55 -0400698static inline void blkcg_maybe_throttle_current(void) { }
699static inline bool blk_cgroup_congested(void) { return false; }
700
Tejun Heoefa7d1c2015-05-22 17:13:18 -0400701#ifdef CONFIG_BLOCK
702
Josef Bacikd09d8df2018-07-03 11:14:55 -0400703static inline void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay) { }
704
Tejun Heo3c798392012-04-16 13:57:25 -0700705static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { return NULL; }
Bart Van Asscheb86d8652018-08-10 13:28:07 -0700706static inline struct blkcg_gq *blk_queue_root_blkg(struct request_queue *q)
707{ return NULL; }
Tejun Heo5efd6112012-03-05 13:15:12 -0800708static inline int blkcg_init_queue(struct request_queue *q) { return 0; }
Tejun Heo5efd6112012-03-05 13:15:12 -0800709static inline void blkcg_exit_queue(struct request_queue *q) { }
Jens Axboed5bf0292014-06-22 16:31:56 -0600710static inline int blkcg_policy_register(struct blkcg_policy *pol) { return 0; }
Tejun Heo3c798392012-04-16 13:57:25 -0700711static inline void blkcg_policy_unregister(struct blkcg_policy *pol) { }
Tejun Heoa2b16932012-04-13 13:11:33 -0700712static inline int blkcg_activate_policy(struct request_queue *q,
Tejun Heo3c798392012-04-16 13:57:25 -0700713 const struct blkcg_policy *pol) { return 0; }
Tejun Heoa2b16932012-04-13 13:11:33 -0700714static inline void blkcg_deactivate_policy(struct request_queue *q,
Tejun Heo3c798392012-04-16 13:57:25 -0700715 const struct blkcg_policy *pol) { }
Vivek Goyal3e252062009-12-04 10:36:42 -0500716
Dennis Zhou0fe061b2018-12-05 12:10:26 -0500717static inline struct blkcg *__bio_blkcg(struct bio *bio) { return NULL; }
Tejun Heob1208b52012-06-04 20:40:57 -0700718static inline struct blkcg *bio_blkcg(struct bio *bio) { return NULL; }
Tejun Heoa0516612012-06-26 15:05:44 -0700719
Tejun Heof95a04a2012-04-16 13:57:26 -0700720static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg,
721 struct blkcg_policy *pol) { return NULL; }
722static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) { return NULL; }
Tejun Heo3c798392012-04-16 13:57:25 -0700723static inline char *blkg_path(struct blkcg_gq *blkg) { return NULL; }
724static inline void blkg_get(struct blkcg_gq *blkg) { }
725static inline void blkg_put(struct blkcg_gq *blkg) { }
Vivek Goyalafc24d42010-04-26 19:27:56 +0200726
Tejun Heod3f77df2019-06-27 13:39:52 -0700727static inline bool blkcg_punt_bio_submit(struct bio *bio) { return false; }
Dennis Zhoue439bed2018-12-05 12:10:32 -0500728static inline void blkcg_bio_issue_init(struct bio *bio) { }
Tejun Heoae118892015-08-18 14:55:20 -0700729static inline bool blkcg_bio_issue_check(struct request_queue *q,
730 struct bio *bio) { return true; }
731
Tejun Heoa0516612012-06-26 15:05:44 -0700732#define blk_queue_for_each_rl(rl, q) \
733 for ((rl) = &(q)->root_rl; (rl); (rl) = NULL)
734
Tejun Heoefa7d1c2015-05-22 17:13:18 -0400735#endif /* CONFIG_BLOCK */
Tejun Heo36558c82012-04-16 13:57:24 -0700736#endif /* CONFIG_BLK_CGROUP */
737#endif /* _BLK_CGROUP_H */