Bart Van Assche | 46d6ae0 | 2021-06-03 14:49:29 -0700 | [diff] [blame] | 1 | // SPDX-License-Identifier: GPL-2.0 |
| 2 | /* |
| 3 | * Block rq-qos policy for assigning an I/O priority class to requests. |
| 4 | * |
| 5 | * Using an rq-qos policy for assigning I/O priority class has two advantages |
| 6 | * over using the ioprio_set() system call: |
| 7 | * |
| 8 | * - This policy is cgroup based so it has all the advantages of cgroups. |
| 9 | * - While ioprio_set() does not affect page cache writeback I/O, this rq-qos |
| 10 | * controller affects page cache writeback I/O for filesystems that support |
| 11 | * assiociating a cgroup with writeback I/O. See also |
| 12 | * Documentation/admin-guide/cgroup-v2.rst. |
| 13 | */ |
| 14 | |
| 15 | #include <linux/blk-cgroup.h> |
| 16 | #include <linux/blk-mq.h> |
| 17 | #include <linux/blk_types.h> |
| 18 | #include <linux/kernel.h> |
| 19 | #include <linux/module.h> |
| 20 | #include "blk-ioprio.h" |
| 21 | #include "blk-rq-qos.h" |
| 22 | |
| 23 | /** |
| 24 | * enum prio_policy - I/O priority class policy. |
| 25 | * @POLICY_NO_CHANGE: (default) do not modify the I/O priority class. |
| 26 | * @POLICY_NONE_TO_RT: modify IOPRIO_CLASS_NONE into IOPRIO_CLASS_RT. |
| 27 | * @POLICY_RESTRICT_TO_BE: modify IOPRIO_CLASS_NONE and IOPRIO_CLASS_RT into |
| 28 | * IOPRIO_CLASS_BE. |
| 29 | * @POLICY_ALL_TO_IDLE: change the I/O priority class into IOPRIO_CLASS_IDLE. |
| 30 | * |
| 31 | * See also <linux/ioprio.h>. |
| 32 | */ |
| 33 | enum prio_policy { |
| 34 | POLICY_NO_CHANGE = 0, |
| 35 | POLICY_NONE_TO_RT = 1, |
| 36 | POLICY_RESTRICT_TO_BE = 2, |
| 37 | POLICY_ALL_TO_IDLE = 3, |
| 38 | }; |
| 39 | |
| 40 | static const char *policy_name[] = { |
| 41 | [POLICY_NO_CHANGE] = "no-change", |
| 42 | [POLICY_NONE_TO_RT] = "none-to-rt", |
| 43 | [POLICY_RESTRICT_TO_BE] = "restrict-to-be", |
| 44 | [POLICY_ALL_TO_IDLE] = "idle", |
| 45 | }; |
| 46 | |
| 47 | static struct blkcg_policy ioprio_policy; |
| 48 | |
| 49 | /** |
| 50 | * struct ioprio_blkg - Per (cgroup, request queue) data. |
| 51 | * @pd: blkg_policy_data structure. |
| 52 | */ |
| 53 | struct ioprio_blkg { |
| 54 | struct blkg_policy_data pd; |
| 55 | }; |
| 56 | |
| 57 | /** |
| 58 | * struct ioprio_blkcg - Per cgroup data. |
| 59 | * @cpd: blkcg_policy_data structure. |
| 60 | * @prio_policy: One of the IOPRIO_CLASS_* values. See also <linux/ioprio.h>. |
| 61 | */ |
| 62 | struct ioprio_blkcg { |
| 63 | struct blkcg_policy_data cpd; |
| 64 | enum prio_policy prio_policy; |
| 65 | }; |
| 66 | |
| 67 | static inline struct ioprio_blkg *pd_to_ioprio(struct blkg_policy_data *pd) |
| 68 | { |
| 69 | return pd ? container_of(pd, struct ioprio_blkg, pd) : NULL; |
| 70 | } |
| 71 | |
| 72 | static struct ioprio_blkcg *blkcg_to_ioprio_blkcg(struct blkcg *blkcg) |
| 73 | { |
| 74 | return container_of(blkcg_to_cpd(blkcg, &ioprio_policy), |
| 75 | struct ioprio_blkcg, cpd); |
| 76 | } |
| 77 | |
| 78 | static struct ioprio_blkcg * |
| 79 | ioprio_blkcg_from_css(struct cgroup_subsys_state *css) |
| 80 | { |
| 81 | return blkcg_to_ioprio_blkcg(css_to_blkcg(css)); |
| 82 | } |
| 83 | |
| 84 | static struct ioprio_blkcg *ioprio_blkcg_from_bio(struct bio *bio) |
| 85 | { |
| 86 | struct blkg_policy_data *pd = blkg_to_pd(bio->bi_blkg, &ioprio_policy); |
| 87 | |
| 88 | if (!pd) |
| 89 | return NULL; |
| 90 | |
| 91 | return blkcg_to_ioprio_blkcg(pd->blkg->blkcg); |
| 92 | } |
| 93 | |
| 94 | static int ioprio_show_prio_policy(struct seq_file *sf, void *v) |
| 95 | { |
| 96 | struct ioprio_blkcg *blkcg = ioprio_blkcg_from_css(seq_css(sf)); |
| 97 | |
| 98 | seq_printf(sf, "%s\n", policy_name[blkcg->prio_policy]); |
| 99 | return 0; |
| 100 | } |
| 101 | |
| 102 | static ssize_t ioprio_set_prio_policy(struct kernfs_open_file *of, char *buf, |
| 103 | size_t nbytes, loff_t off) |
| 104 | { |
| 105 | struct ioprio_blkcg *blkcg = ioprio_blkcg_from_css(of_css(of)); |
| 106 | int ret; |
| 107 | |
| 108 | if (off != 0) |
| 109 | return -EIO; |
| 110 | /* kernfs_fop_write_iter() terminates 'buf' with '\0'. */ |
| 111 | ret = sysfs_match_string(policy_name, buf); |
| 112 | if (ret < 0) |
| 113 | return ret; |
| 114 | blkcg->prio_policy = ret; |
| 115 | |
| 116 | return nbytes; |
| 117 | } |
| 118 | |
| 119 | static struct blkg_policy_data * |
| 120 | ioprio_alloc_pd(gfp_t gfp, struct request_queue *q, struct blkcg *blkcg) |
| 121 | { |
| 122 | struct ioprio_blkg *ioprio_blkg; |
| 123 | |
| 124 | ioprio_blkg = kzalloc(sizeof(*ioprio_blkg), gfp); |
| 125 | if (!ioprio_blkg) |
| 126 | return NULL; |
| 127 | |
| 128 | return &ioprio_blkg->pd; |
| 129 | } |
| 130 | |
| 131 | static void ioprio_free_pd(struct blkg_policy_data *pd) |
| 132 | { |
| 133 | struct ioprio_blkg *ioprio_blkg = pd_to_ioprio(pd); |
| 134 | |
| 135 | kfree(ioprio_blkg); |
| 136 | } |
| 137 | |
| 138 | static struct blkcg_policy_data *ioprio_alloc_cpd(gfp_t gfp) |
| 139 | { |
| 140 | struct ioprio_blkcg *blkcg; |
| 141 | |
| 142 | blkcg = kzalloc(sizeof(*blkcg), gfp); |
| 143 | if (!blkcg) |
| 144 | return NULL; |
| 145 | blkcg->prio_policy = POLICY_NO_CHANGE; |
| 146 | return &blkcg->cpd; |
| 147 | } |
| 148 | |
| 149 | static void ioprio_free_cpd(struct blkcg_policy_data *cpd) |
| 150 | { |
| 151 | struct ioprio_blkcg *blkcg = container_of(cpd, typeof(*blkcg), cpd); |
| 152 | |
| 153 | kfree(blkcg); |
| 154 | } |
| 155 | |
| 156 | #define IOPRIO_ATTRS \ |
| 157 | { \ |
| 158 | .name = "prio.class", \ |
| 159 | .seq_show = ioprio_show_prio_policy, \ |
| 160 | .write = ioprio_set_prio_policy, \ |
| 161 | }, \ |
| 162 | { } /* sentinel */ |
| 163 | |
| 164 | /* cgroup v2 attributes */ |
| 165 | static struct cftype ioprio_files[] = { |
| 166 | IOPRIO_ATTRS |
| 167 | }; |
| 168 | |
| 169 | /* cgroup v1 attributes */ |
| 170 | static struct cftype ioprio_legacy_files[] = { |
| 171 | IOPRIO_ATTRS |
| 172 | }; |
| 173 | |
| 174 | static struct blkcg_policy ioprio_policy = { |
| 175 | .dfl_cftypes = ioprio_files, |
| 176 | .legacy_cftypes = ioprio_legacy_files, |
| 177 | |
| 178 | .cpd_alloc_fn = ioprio_alloc_cpd, |
| 179 | .cpd_free_fn = ioprio_free_cpd, |
| 180 | |
| 181 | .pd_alloc_fn = ioprio_alloc_pd, |
| 182 | .pd_free_fn = ioprio_free_pd, |
| 183 | }; |
| 184 | |
| 185 | struct blk_ioprio { |
| 186 | struct rq_qos rqos; |
| 187 | }; |
| 188 | |
| 189 | static void blkcg_ioprio_track(struct rq_qos *rqos, struct request *rq, |
| 190 | struct bio *bio) |
| 191 | { |
| 192 | struct ioprio_blkcg *blkcg = ioprio_blkcg_from_bio(bio); |
| 193 | |
| 194 | /* |
| 195 | * Except for IOPRIO_CLASS_NONE, higher I/O priority numbers |
| 196 | * correspond to a lower priority. Hence, the max_t() below selects |
| 197 | * the lower priority of bi_ioprio and the cgroup I/O priority class. |
| 198 | * If the cgroup policy has been set to POLICY_NO_CHANGE == 0, the |
| 199 | * bio I/O priority is not modified. If the bio I/O priority equals |
| 200 | * IOPRIO_CLASS_NONE, the cgroup I/O priority is assigned to the bio. |
| 201 | */ |
| 202 | bio->bi_ioprio = max_t(u16, bio->bi_ioprio, |
| 203 | IOPRIO_PRIO_VALUE(blkcg->prio_policy, 0)); |
| 204 | } |
| 205 | |
| 206 | static void blkcg_ioprio_exit(struct rq_qos *rqos) |
| 207 | { |
| 208 | struct blk_ioprio *blkioprio_blkg = |
| 209 | container_of(rqos, typeof(*blkioprio_blkg), rqos); |
| 210 | |
| 211 | blkcg_deactivate_policy(rqos->q, &ioprio_policy); |
| 212 | kfree(blkioprio_blkg); |
| 213 | } |
| 214 | |
| 215 | static struct rq_qos_ops blkcg_ioprio_ops = { |
| 216 | .track = blkcg_ioprio_track, |
| 217 | .exit = blkcg_ioprio_exit, |
| 218 | }; |
| 219 | |
| 220 | int blk_ioprio_init(struct request_queue *q) |
| 221 | { |
| 222 | struct blk_ioprio *blkioprio_blkg; |
| 223 | struct rq_qos *rqos; |
| 224 | int ret; |
| 225 | |
| 226 | blkioprio_blkg = kzalloc(sizeof(*blkioprio_blkg), GFP_KERNEL); |
| 227 | if (!blkioprio_blkg) |
| 228 | return -ENOMEM; |
| 229 | |
| 230 | ret = blkcg_activate_policy(q, &ioprio_policy); |
| 231 | if (ret) { |
| 232 | kfree(blkioprio_blkg); |
| 233 | return ret; |
| 234 | } |
| 235 | |
| 236 | rqos = &blkioprio_blkg->rqos; |
| 237 | rqos->id = RQ_QOS_IOPRIO; |
| 238 | rqos->ops = &blkcg_ioprio_ops; |
| 239 | rqos->q = q; |
| 240 | |
| 241 | /* |
| 242 | * Registering the rq-qos policy after activating the blk-cgroup |
| 243 | * policy guarantees that ioprio_blkcg_from_bio(bio) != NULL in the |
| 244 | * rq-qos callbacks. |
| 245 | */ |
| 246 | rq_qos_add(q, rqos); |
| 247 | |
| 248 | return 0; |
| 249 | } |
| 250 | |
| 251 | static int __init ioprio_init(void) |
| 252 | { |
| 253 | return blkcg_policy_register(&ioprio_policy); |
| 254 | } |
| 255 | |
| 256 | static void __exit ioprio_exit(void) |
| 257 | { |
| 258 | blkcg_policy_unregister(&ioprio_policy); |
| 259 | } |
| 260 | |
| 261 | module_init(ioprio_init); |
| 262 | module_exit(ioprio_exit); |