Jens Axboe | a7b36ee | 2021-10-05 09:11:56 -0600 | [diff] [blame] | 1 | #ifndef BLK_THROTTLE_H |
| 2 | #define BLK_THROTTLE_H |
| 3 | |
| 4 | #include "blk-cgroup-rwstat.h" |
| 5 | |
| 6 | /* |
| 7 | * To implement hierarchical throttling, throtl_grps form a tree and bios |
| 8 | * are dispatched upwards level by level until they reach the top and get |
| 9 | * issued. When dispatching bios from the children and local group at each |
| 10 | * level, if the bios are dispatched into a single bio_list, there's a risk |
| 11 | * of a local or child group which can queue many bios at once filling up |
| 12 | * the list starving others. |
| 13 | * |
| 14 | * To avoid such starvation, dispatched bios are queued separately |
| 15 | * according to where they came from. When they are again dispatched to |
| 16 | * the parent, they're popped in round-robin order so that no single source |
| 17 | * hogs the dispatch window. |
| 18 | * |
| 19 | * throtl_qnode is used to keep the queued bios separated by their sources. |
| 20 | * Bios are queued to throtl_qnode which in turn is queued to |
| 21 | * throtl_service_queue and then dispatched in round-robin order. |
| 22 | * |
| 23 | * It's also used to track the reference counts on blkg's. A qnode always |
| 24 | * belongs to a throtl_grp and gets queued on itself or the parent, so |
| 25 | * incrementing the reference of the associated throtl_grp when a qnode is |
| 26 | * queued and decrementing when dequeued is enough to keep the whole blkg |
| 27 | * tree pinned while bios are in flight. |
| 28 | */ |
| 29 | struct throtl_qnode { |
| 30 | struct list_head node; /* service_queue->queued[] */ |
| 31 | struct bio_list bios; /* queued bios */ |
| 32 | struct throtl_grp *tg; /* tg this qnode belongs to */ |
| 33 | }; |
| 34 | |
| 35 | struct throtl_service_queue { |
| 36 | struct throtl_service_queue *parent_sq; /* the parent service_queue */ |
| 37 | |
| 38 | /* |
| 39 | * Bios queued directly to this service_queue or dispatched from |
| 40 | * children throtl_grp's. |
| 41 | */ |
| 42 | struct list_head queued[2]; /* throtl_qnode [READ/WRITE] */ |
| 43 | unsigned int nr_queued[2]; /* number of queued bios */ |
| 44 | |
| 45 | /* |
| 46 | * RB tree of active children throtl_grp's, which are sorted by |
| 47 | * their ->disptime. |
| 48 | */ |
| 49 | struct rb_root_cached pending_tree; /* RB tree of active tgs */ |
| 50 | unsigned int nr_pending; /* # queued in the tree */ |
| 51 | unsigned long first_pending_disptime; /* disptime of the first tg */ |
| 52 | struct timer_list pending_timer; /* fires on first_pending_disptime */ |
| 53 | }; |
| 54 | |
| 55 | enum { |
| 56 | LIMIT_LOW, |
| 57 | LIMIT_MAX, |
| 58 | LIMIT_CNT, |
| 59 | }; |
| 60 | |
| 61 | struct throtl_grp { |
| 62 | /* must be the first member */ |
| 63 | struct blkg_policy_data pd; |
| 64 | |
| 65 | /* active throtl group service_queue member */ |
| 66 | struct rb_node rb_node; |
| 67 | |
| 68 | /* throtl_data this group belongs to */ |
| 69 | struct throtl_data *td; |
| 70 | |
| 71 | /* this group's service queue */ |
| 72 | struct throtl_service_queue service_queue; |
| 73 | |
| 74 | /* |
| 75 | * qnode_on_self is used when bios are directly queued to this |
| 76 | * throtl_grp so that local bios compete fairly with bios |
| 77 | * dispatched from children. qnode_on_parent is used when bios are |
| 78 | * dispatched from this throtl_grp into its parent and will compete |
| 79 | * with the sibling qnode_on_parents and the parent's |
| 80 | * qnode_on_self. |
| 81 | */ |
| 82 | struct throtl_qnode qnode_on_self[2]; |
| 83 | struct throtl_qnode qnode_on_parent[2]; |
| 84 | |
| 85 | /* |
| 86 | * Dispatch time in jiffies. This is the estimated time when group |
| 87 | * will unthrottle and is ready to dispatch more bio. It is used as |
| 88 | * key to sort active groups in service tree. |
| 89 | */ |
| 90 | unsigned long disptime; |
| 91 | |
| 92 | unsigned int flags; |
| 93 | |
| 94 | /* are there any throtl rules between this group and td? */ |
| 95 | bool has_rules[2]; |
| 96 | |
| 97 | /* internally used bytes per second rate limits */ |
| 98 | uint64_t bps[2][LIMIT_CNT]; |
| 99 | /* user configured bps limits */ |
| 100 | uint64_t bps_conf[2][LIMIT_CNT]; |
| 101 | |
| 102 | /* internally used IOPS limits */ |
| 103 | unsigned int iops[2][LIMIT_CNT]; |
| 104 | /* user configured IOPS limits */ |
| 105 | unsigned int iops_conf[2][LIMIT_CNT]; |
| 106 | |
| 107 | /* Number of bytes dispatched in current slice */ |
| 108 | uint64_t bytes_disp[2]; |
| 109 | /* Number of bio's dispatched in current slice */ |
| 110 | unsigned int io_disp[2]; |
| 111 | |
| 112 | unsigned long last_low_overflow_time[2]; |
| 113 | |
| 114 | uint64_t last_bytes_disp[2]; |
| 115 | unsigned int last_io_disp[2]; |
| 116 | |
| 117 | unsigned long last_check_time; |
| 118 | |
| 119 | unsigned long latency_target; /* us */ |
| 120 | unsigned long latency_target_conf; /* us */ |
| 121 | /* When did we start a new slice */ |
| 122 | unsigned long slice_start[2]; |
| 123 | unsigned long slice_end[2]; |
| 124 | |
| 125 | unsigned long last_finish_time; /* ns / 1024 */ |
| 126 | unsigned long checked_last_finish_time; /* ns / 1024 */ |
| 127 | unsigned long avg_idletime; /* ns / 1024 */ |
| 128 | unsigned long idletime_threshold; /* us */ |
| 129 | unsigned long idletime_threshold_conf; /* us */ |
| 130 | |
| 131 | unsigned int bio_cnt; /* total bios */ |
| 132 | unsigned int bad_bio_cnt; /* bios exceeding latency threshold */ |
| 133 | unsigned long bio_cnt_reset_time; |
| 134 | |
| 135 | atomic_t io_split_cnt[2]; |
| 136 | atomic_t last_io_split_cnt[2]; |
| 137 | |
| 138 | struct blkg_rwstat stat_bytes; |
| 139 | struct blkg_rwstat stat_ios; |
| 140 | }; |
| 141 | |
| 142 | extern struct blkcg_policy blkcg_policy_throtl; |
| 143 | |
| 144 | static inline struct throtl_grp *pd_to_tg(struct blkg_policy_data *pd) |
| 145 | { |
| 146 | return pd ? container_of(pd, struct throtl_grp, pd) : NULL; |
| 147 | } |
| 148 | |
| 149 | static inline struct throtl_grp *blkg_to_tg(struct blkcg_gq *blkg) |
| 150 | { |
| 151 | return pd_to_tg(blkg_to_pd(blkg, &blkcg_policy_throtl)); |
| 152 | } |
| 153 | |
| 154 | /* |
| 155 | * Internal throttling interface |
| 156 | */ |
| 157 | #ifndef CONFIG_BLK_DEV_THROTTLING |
| 158 | static inline int blk_throtl_init(struct request_queue *q) { return 0; } |
| 159 | static inline void blk_throtl_exit(struct request_queue *q) { } |
| 160 | static inline void blk_throtl_register_queue(struct request_queue *q) { } |
| 161 | static inline void blk_throtl_charge_bio_split(struct bio *bio) { } |
| 162 | static inline bool blk_throtl_bio(struct bio *bio) { return false; } |
| 163 | #else /* CONFIG_BLK_DEV_THROTTLING */ |
| 164 | int blk_throtl_init(struct request_queue *q); |
| 165 | void blk_throtl_exit(struct request_queue *q); |
| 166 | void blk_throtl_register_queue(struct request_queue *q); |
| 167 | void blk_throtl_charge_bio_split(struct bio *bio); |
| 168 | bool __blk_throtl_bio(struct bio *bio); |
| 169 | static inline bool blk_throtl_bio(struct bio *bio) |
| 170 | { |
| 171 | struct throtl_grp *tg = blkg_to_tg(bio->bi_blkg); |
| 172 | |
| 173 | if (bio_flagged(bio, BIO_THROTTLED)) |
| 174 | return false; |
| 175 | if (!tg->has_rules[bio_data_dir(bio)]) |
| 176 | return false; |
| 177 | |
| 178 | return __blk_throtl_bio(bio); |
| 179 | } |
| 180 | #endif /* CONFIG_BLK_DEV_THROTTLING */ |
| 181 | |
| 182 | #endif |