Tejun Heo | 6954ff1 | 2019-08-28 15:05:59 -0700 | [diff] [blame] | 1 | #!/usr/bin/env drgn |
| 2 | # |
| 3 | # Copyright (C) 2019 Tejun Heo <tj@kernel.org> |
| 4 | # Copyright (C) 2019 Facebook |
| 5 | |
| 6 | desc = """ |
| 7 | This is a drgn script to monitor the blk-iocost cgroup controller. |
| 8 | See the comment at the top of block/blk-iocost.c for more details. |
| 9 | For drgn, visit https://github.com/osandov/drgn. |
| 10 | """ |
| 11 | |
| 12 | import sys |
| 13 | import re |
| 14 | import time |
| 15 | import json |
Tejun Heo | b06f2d3 | 2019-09-04 12:45:55 -0700 | [diff] [blame] | 16 | import math |
Tejun Heo | 6954ff1 | 2019-08-28 15:05:59 -0700 | [diff] [blame] | 17 | |
| 18 | import drgn |
| 19 | from drgn import container_of |
| 20 | from drgn.helpers.linux.list import list_for_each_entry,list_empty |
| 21 | from drgn.helpers.linux.radixtree import radix_tree_for_each,radix_tree_lookup |
| 22 | |
| 23 | import argparse |
| 24 | parser = argparse.ArgumentParser(description=desc, |
| 25 | formatter_class=argparse.RawTextHelpFormatter) |
| 26 | parser.add_argument('devname', metavar='DEV', |
| 27 | help='Target block device name (e.g. sda)') |
| 28 | parser.add_argument('--cgroup', action='append', metavar='REGEX', |
| 29 | help='Regex for target cgroups, ') |
| 30 | parser.add_argument('--interval', '-i', metavar='SECONDS', type=float, default=1, |
| 31 | help='Monitoring interval in seconds') |
| 32 | parser.add_argument('--json', action='store_true', |
| 33 | help='Output in json') |
| 34 | args = parser.parse_args() |
| 35 | |
| 36 | def err(s): |
| 37 | print(s, file=sys.stderr, flush=True) |
| 38 | sys.exit(1) |
| 39 | |
| 40 | try: |
| 41 | blkcg_root = prog['blkcg_root'] |
| 42 | plid = prog['blkcg_policy_iocost'].plid.value_() |
| 43 | except: |
| 44 | err('The kernel does not have iocost enabled') |
| 45 | |
| 46 | IOC_RUNNING = prog['IOC_RUNNING'].value_() |
| 47 | NR_USAGE_SLOTS = prog['NR_USAGE_SLOTS'].value_() |
| 48 | HWEIGHT_WHOLE = prog['HWEIGHT_WHOLE'].value_() |
| 49 | VTIME_PER_SEC = prog['VTIME_PER_SEC'].value_() |
| 50 | VTIME_PER_USEC = prog['VTIME_PER_USEC'].value_() |
| 51 | AUTOP_SSD_FAST = prog['AUTOP_SSD_FAST'].value_() |
| 52 | AUTOP_SSD_DFL = prog['AUTOP_SSD_DFL'].value_() |
| 53 | AUTOP_SSD_QD1 = prog['AUTOP_SSD_QD1'].value_() |
| 54 | AUTOP_HDD = prog['AUTOP_HDD'].value_() |
| 55 | |
| 56 | autop_names = { |
| 57 | AUTOP_SSD_FAST: 'ssd_fast', |
| 58 | AUTOP_SSD_DFL: 'ssd_dfl', |
| 59 | AUTOP_SSD_QD1: 'ssd_qd1', |
| 60 | AUTOP_HDD: 'hdd', |
| 61 | } |
| 62 | |
| 63 | class BlkgIterator: |
| 64 | def blkcg_name(blkcg): |
| 65 | return blkcg.css.cgroup.kn.name.string_().decode('utf-8') |
| 66 | |
| 67 | def walk(self, blkcg, q_id, parent_path): |
| 68 | if not self.include_dying and \ |
| 69 | not (blkcg.css.flags.value_() & prog['CSS_ONLINE'].value_()): |
| 70 | return |
| 71 | |
| 72 | name = BlkgIterator.blkcg_name(blkcg) |
| 73 | path = parent_path + '/' + name if parent_path else name |
| 74 | blkg = drgn.Object(prog, 'struct blkcg_gq', |
Tejun Heo | 9ea37e2 | 2020-01-17 11:54:35 -0800 | [diff] [blame] | 75 | address=radix_tree_lookup(blkcg.blkg_tree.address_of_(), q_id)) |
Tejun Heo | 6954ff1 | 2019-08-28 15:05:59 -0700 | [diff] [blame] | 76 | if not blkg.address_: |
| 77 | return |
| 78 | |
| 79 | self.blkgs.append((path if path else '/', blkg)) |
| 80 | |
| 81 | for c in list_for_each_entry('struct blkcg', |
| 82 | blkcg.css.children.address_of_(), 'css.sibling'): |
| 83 | self.walk(c, q_id, path) |
| 84 | |
| 85 | def __init__(self, root_blkcg, q_id, include_dying=False): |
| 86 | self.include_dying = include_dying |
| 87 | self.blkgs = [] |
| 88 | self.walk(root_blkcg, q_id, '') |
| 89 | |
| 90 | def __iter__(self): |
| 91 | return iter(self.blkgs) |
| 92 | |
| 93 | class IocStat: |
| 94 | def __init__(self, ioc): |
| 95 | global autop_names |
| 96 | |
| 97 | self.enabled = ioc.enabled.value_() |
| 98 | self.running = ioc.running.value_() == IOC_RUNNING |
Tejun Heo | b06f2d3 | 2019-09-04 12:45:55 -0700 | [diff] [blame] | 99 | self.period_ms = ioc.period_us.value_() / 1_000 |
Tejun Heo | 6954ff1 | 2019-08-28 15:05:59 -0700 | [diff] [blame] | 100 | self.period_at = ioc.period_at.value_() / 1_000_000 |
| 101 | self.vperiod_at = ioc.period_at_vtime.value_() / VTIME_PER_SEC |
| 102 | self.vrate_pct = ioc.vtime_rate.counter.value_() * 100 / VTIME_PER_USEC |
| 103 | self.busy_level = ioc.busy_level.value_() |
| 104 | self.autop_idx = ioc.autop_idx.value_() |
| 105 | self.user_cost_model = ioc.user_cost_model.value_() |
| 106 | self.user_qos_params = ioc.user_qos_params.value_() |
| 107 | |
| 108 | if self.autop_idx in autop_names: |
| 109 | self.autop_name = autop_names[self.autop_idx] |
| 110 | else: |
| 111 | self.autop_name = '?' |
| 112 | |
| 113 | def dict(self, now): |
| 114 | return { 'device' : devname, |
Tejun Heo | e742bd5c | 2019-09-04 12:45:54 -0700 | [diff] [blame] | 115 | 'timestamp' : str(now), |
| 116 | 'enabled' : str(int(self.enabled)), |
| 117 | 'running' : str(int(self.running)), |
| 118 | 'period_ms' : str(self.period_ms), |
| 119 | 'period_at' : str(self.period_at), |
| 120 | 'period_vtime_at' : str(self.vperiod_at), |
| 121 | 'busy_level' : str(self.busy_level), |
| 122 | 'vrate_pct' : str(self.vrate_pct), } |
Tejun Heo | 6954ff1 | 2019-08-28 15:05:59 -0700 | [diff] [blame] | 123 | |
| 124 | def table_preamble_str(self): |
| 125 | state = ('RUN' if self.running else 'IDLE') if self.enabled else 'OFF' |
| 126 | output = f'{devname} {state:4} ' \ |
| 127 | f'per={self.period_ms}ms ' \ |
| 128 | f'cur_per={self.period_at:.3f}:v{self.vperiod_at:.3f} ' \ |
| 129 | f'busy={self.busy_level:+3} ' \ |
| 130 | f'vrate={self.vrate_pct:6.2f}% ' \ |
| 131 | f'params={self.autop_name}' |
| 132 | if self.user_cost_model or self.user_qos_params: |
| 133 | output += f'({"C" if self.user_cost_model else ""}{"Q" if self.user_qos_params else ""})' |
| 134 | return output |
| 135 | |
| 136 | def table_header_str(self): |
| 137 | return f'{"":25} active {"weight":>9} {"hweight%":>13} {"inflt%":>6} ' \ |
Tejun Heo | 7c1ee70 | 2019-09-04 12:45:56 -0700 | [diff] [blame] | 138 | f'{"dbt":>3} {"delay":>6} {"usages%"}' |
Tejun Heo | 6954ff1 | 2019-08-28 15:05:59 -0700 | [diff] [blame] | 139 | |
| 140 | class IocgStat: |
| 141 | def __init__(self, iocg): |
| 142 | ioc = iocg.ioc |
| 143 | blkg = iocg.pd.blkg |
| 144 | |
| 145 | self.is_active = not list_empty(iocg.active_list.address_of_()) |
| 146 | self.weight = iocg.weight.value_() |
| 147 | self.active = iocg.active.value_() |
| 148 | self.inuse = iocg.inuse.value_() |
| 149 | self.hwa_pct = iocg.hweight_active.value_() * 100 / HWEIGHT_WHOLE |
| 150 | self.hwi_pct = iocg.hweight_inuse.value_() * 100 / HWEIGHT_WHOLE |
Tejun Heo | b06f2d3 | 2019-09-04 12:45:55 -0700 | [diff] [blame] | 151 | self.address = iocg.value_() |
Tejun Heo | 6954ff1 | 2019-08-28 15:05:59 -0700 | [diff] [blame] | 152 | |
| 153 | vdone = iocg.done_vtime.counter.value_() |
| 154 | vtime = iocg.vtime.counter.value_() |
| 155 | vrate = ioc.vtime_rate.counter.value_() |
| 156 | period_vtime = ioc.period_us.value_() * vrate |
| 157 | if period_vtime: |
| 158 | self.inflight_pct = (vtime - vdone) * 100 / period_vtime |
| 159 | else: |
| 160 | self.inflight_pct = 0 |
| 161 | |
Tejun Heo | 7c1ee70 | 2019-09-04 12:45:56 -0700 | [diff] [blame] | 162 | self.debt_ms = iocg.abs_vdebt.counter.value_() / VTIME_PER_USEC / 1000 |
Tejun Heo | b06f2d3 | 2019-09-04 12:45:55 -0700 | [diff] [blame] | 163 | self.use_delay = blkg.use_delay.counter.value_() |
| 164 | self.delay_ms = blkg.delay_nsec.counter.value_() / 1_000_000 |
Tejun Heo | 6954ff1 | 2019-08-28 15:05:59 -0700 | [diff] [blame] | 165 | |
| 166 | usage_idx = iocg.usage_idx.value_() |
| 167 | self.usages = [] |
| 168 | self.usage = 0 |
| 169 | for i in range(NR_USAGE_SLOTS): |
| 170 | usage = iocg.usages[(usage_idx + i) % NR_USAGE_SLOTS].value_() |
Tejun Heo | b06f2d3 | 2019-09-04 12:45:55 -0700 | [diff] [blame] | 171 | upct = usage * 100 / HWEIGHT_WHOLE |
Tejun Heo | 6954ff1 | 2019-08-28 15:05:59 -0700 | [diff] [blame] | 172 | self.usages.append(upct) |
| 173 | self.usage = max(self.usage, upct) |
| 174 | |
| 175 | def dict(self, now, path): |
| 176 | out = { 'cgroup' : path, |
Tejun Heo | e742bd5c | 2019-09-04 12:45:54 -0700 | [diff] [blame] | 177 | 'timestamp' : str(now), |
| 178 | 'is_active' : str(int(self.is_active)), |
| 179 | 'weight' : str(self.weight), |
| 180 | 'weight_active' : str(self.active), |
| 181 | 'weight_inuse' : str(self.inuse), |
| 182 | 'hweight_active_pct' : str(self.hwa_pct), |
| 183 | 'hweight_inuse_pct' : str(self.hwi_pct), |
| 184 | 'inflight_pct' : str(self.inflight_pct), |
Tejun Heo | 7c1ee70 | 2019-09-04 12:45:56 -0700 | [diff] [blame] | 185 | 'debt_ms' : str(self.debt_ms), |
Tejun Heo | e742bd5c | 2019-09-04 12:45:54 -0700 | [diff] [blame] | 186 | 'use_delay' : str(self.use_delay), |
| 187 | 'delay_ms' : str(self.delay_ms), |
Tejun Heo | b06f2d3 | 2019-09-04 12:45:55 -0700 | [diff] [blame] | 188 | 'usage_pct' : str(self.usage), |
| 189 | 'address' : str(hex(self.address)) } |
Tejun Heo | 6954ff1 | 2019-08-28 15:05:59 -0700 | [diff] [blame] | 190 | for i in range(len(self.usages)): |
Tejun Heo | e742bd5c | 2019-09-04 12:45:54 -0700 | [diff] [blame] | 191 | out[f'usage_pct_{i}'] = str(self.usages[i]) |
Tejun Heo | 6954ff1 | 2019-08-28 15:05:59 -0700 | [diff] [blame] | 192 | return out |
| 193 | |
| 194 | def table_row_str(self, path): |
| 195 | out = f'{path[-28:]:28} ' \ |
| 196 | f'{"*" if self.is_active else " "} ' \ |
| 197 | f'{self.inuse:5}/{self.active:5} ' \ |
| 198 | f'{self.hwi_pct:6.2f}/{self.hwa_pct:6.2f} ' \ |
| 199 | f'{self.inflight_pct:6.2f} ' \ |
Tejun Heo | 7c1ee70 | 2019-09-04 12:45:56 -0700 | [diff] [blame] | 200 | f'{min(math.ceil(self.debt_ms), 999):3} ' \ |
Tejun Heo | b06f2d3 | 2019-09-04 12:45:55 -0700 | [diff] [blame] | 201 | f'{min(self.use_delay, 99):2}*'\ |
| 202 | f'{min(math.ceil(self.delay_ms), 999):03} ' |
Tejun Heo | 6954ff1 | 2019-08-28 15:05:59 -0700 | [diff] [blame] | 203 | for u in self.usages: |
Tejun Heo | b06f2d3 | 2019-09-04 12:45:55 -0700 | [diff] [blame] | 204 | out += f'{min(round(u), 999):03d}:' |
Tejun Heo | 6954ff1 | 2019-08-28 15:05:59 -0700 | [diff] [blame] | 205 | out = out.rstrip(':') |
| 206 | return out |
| 207 | |
| 208 | # handle args |
| 209 | table_fmt = not args.json |
| 210 | interval = args.interval |
| 211 | devname = args.devname |
| 212 | |
| 213 | if args.json: |
| 214 | table_fmt = False |
| 215 | |
| 216 | re_str = None |
| 217 | if args.cgroup: |
| 218 | for r in args.cgroup: |
| 219 | if re_str is None: |
| 220 | re_str = r |
| 221 | else: |
| 222 | re_str += '|' + r |
| 223 | |
| 224 | filter_re = re.compile(re_str) if re_str else None |
| 225 | |
| 226 | # Locate the roots |
| 227 | q_id = None |
| 228 | root_iocg = None |
| 229 | ioc = None |
| 230 | |
Tejun Heo | 9ea37e2 | 2020-01-17 11:54:35 -0800 | [diff] [blame] | 231 | for i, ptr in radix_tree_for_each(blkcg_root.blkg_tree.address_of_()): |
Tejun Heo | 6954ff1 | 2019-08-28 15:05:59 -0700 | [diff] [blame] | 232 | blkg = drgn.Object(prog, 'struct blkcg_gq', address=ptr) |
| 233 | try: |
| 234 | if devname == blkg.q.kobj.parent.name.string_().decode('utf-8'): |
| 235 | q_id = blkg.q.id.value_() |
| 236 | if blkg.pd[plid]: |
| 237 | root_iocg = container_of(blkg.pd[plid], 'struct ioc_gq', 'pd') |
| 238 | ioc = root_iocg.ioc |
| 239 | break |
| 240 | except: |
| 241 | pass |
| 242 | |
| 243 | if ioc is None: |
| 244 | err(f'Could not find ioc for {devname}'); |
| 245 | |
| 246 | # Keep printing |
| 247 | while True: |
| 248 | now = time.time() |
| 249 | iocstat = IocStat(ioc) |
| 250 | output = '' |
| 251 | |
| 252 | if table_fmt: |
| 253 | output += '\n' + iocstat.table_preamble_str() |
| 254 | output += '\n' + iocstat.table_header_str() |
| 255 | else: |
| 256 | output += json.dumps(iocstat.dict(now)) |
| 257 | |
| 258 | for path, blkg in BlkgIterator(blkcg_root, q_id): |
| 259 | if filter_re and not filter_re.match(path): |
| 260 | continue |
| 261 | if not blkg.pd[plid]: |
| 262 | continue |
| 263 | |
| 264 | iocg = container_of(blkg.pd[plid], 'struct ioc_gq', 'pd') |
| 265 | iocg_stat = IocgStat(iocg) |
| 266 | |
| 267 | if not filter_re and not iocg_stat.is_active: |
| 268 | continue |
| 269 | |
| 270 | if table_fmt: |
| 271 | output += '\n' + iocg_stat.table_row_str(path) |
| 272 | else: |
| 273 | output += '\n' + json.dumps(iocg_stat.dict(now, path)) |
| 274 | |
| 275 | print(output) |
| 276 | sys.stdout.flush() |
| 277 | time.sleep(interval) |