Jens Axboe | cf43e6b | 2016-11-07 21:32:37 -0700 | [diff] [blame^] | 1 | /* |
| 2 | * Block stat tracking code |
| 3 | * |
| 4 | * Copyright (C) 2016 Jens Axboe |
| 5 | */ |
| 6 | #include <linux/kernel.h> |
| 7 | #include <linux/blk-mq.h> |
| 8 | |
| 9 | #include "blk-stat.h" |
| 10 | #include "blk-mq.h" |
| 11 | |
| 12 | static void blk_stat_flush_batch(struct blk_rq_stat *stat) |
| 13 | { |
| 14 | const s32 nr_batch = READ_ONCE(stat->nr_batch); |
| 15 | const s32 nr_samples = READ_ONCE(stat->nr_batch); |
| 16 | |
| 17 | if (!nr_batch) |
| 18 | return; |
| 19 | if (!nr_samples) |
| 20 | stat->mean = div64_s64(stat->batch, nr_batch); |
| 21 | else { |
| 22 | stat->mean = div64_s64((stat->mean * nr_samples) + |
| 23 | stat->batch, |
| 24 | nr_batch + nr_samples); |
| 25 | } |
| 26 | |
| 27 | stat->nr_samples += nr_batch; |
| 28 | stat->nr_batch = stat->batch = 0; |
| 29 | } |
| 30 | |
| 31 | static void blk_stat_sum(struct blk_rq_stat *dst, struct blk_rq_stat *src) |
| 32 | { |
| 33 | if (!src->nr_samples) |
| 34 | return; |
| 35 | |
| 36 | blk_stat_flush_batch(src); |
| 37 | |
| 38 | dst->min = min(dst->min, src->min); |
| 39 | dst->max = max(dst->max, src->max); |
| 40 | |
| 41 | if (!dst->nr_samples) |
| 42 | dst->mean = src->mean; |
| 43 | else { |
| 44 | dst->mean = div64_s64((src->mean * src->nr_samples) + |
| 45 | (dst->mean * dst->nr_samples), |
| 46 | dst->nr_samples + src->nr_samples); |
| 47 | } |
| 48 | dst->nr_samples += src->nr_samples; |
| 49 | } |
| 50 | |
| 51 | static void blk_mq_stat_get(struct request_queue *q, struct blk_rq_stat *dst) |
| 52 | { |
| 53 | struct blk_mq_hw_ctx *hctx; |
| 54 | struct blk_mq_ctx *ctx; |
| 55 | uint64_t latest = 0; |
| 56 | int i, j, nr; |
| 57 | |
| 58 | blk_stat_init(&dst[BLK_STAT_READ]); |
| 59 | blk_stat_init(&dst[BLK_STAT_WRITE]); |
| 60 | |
| 61 | nr = 0; |
| 62 | do { |
| 63 | uint64_t newest = 0; |
| 64 | |
| 65 | queue_for_each_hw_ctx(q, hctx, i) { |
| 66 | hctx_for_each_ctx(hctx, ctx, j) { |
| 67 | if (!ctx->stat[BLK_STAT_READ].nr_samples && |
| 68 | !ctx->stat[BLK_STAT_WRITE].nr_samples) |
| 69 | continue; |
| 70 | if (ctx->stat[BLK_STAT_READ].time > newest) |
| 71 | newest = ctx->stat[BLK_STAT_READ].time; |
| 72 | if (ctx->stat[BLK_STAT_WRITE].time > newest) |
| 73 | newest = ctx->stat[BLK_STAT_WRITE].time; |
| 74 | } |
| 75 | } |
| 76 | |
| 77 | /* |
| 78 | * No samples |
| 79 | */ |
| 80 | if (!newest) |
| 81 | break; |
| 82 | |
| 83 | if (newest > latest) |
| 84 | latest = newest; |
| 85 | |
| 86 | queue_for_each_hw_ctx(q, hctx, i) { |
| 87 | hctx_for_each_ctx(hctx, ctx, j) { |
| 88 | if (ctx->stat[BLK_STAT_READ].time == newest) { |
| 89 | blk_stat_sum(&dst[BLK_STAT_READ], |
| 90 | &ctx->stat[BLK_STAT_READ]); |
| 91 | nr++; |
| 92 | } |
| 93 | if (ctx->stat[BLK_STAT_WRITE].time == newest) { |
| 94 | blk_stat_sum(&dst[BLK_STAT_WRITE], |
| 95 | &ctx->stat[BLK_STAT_WRITE]); |
| 96 | nr++; |
| 97 | } |
| 98 | } |
| 99 | } |
| 100 | /* |
| 101 | * If we race on finding an entry, just loop back again. |
| 102 | * Should be very rare. |
| 103 | */ |
| 104 | } while (!nr); |
| 105 | |
| 106 | dst[BLK_STAT_READ].time = dst[BLK_STAT_WRITE].time = latest; |
| 107 | } |
| 108 | |
| 109 | void blk_queue_stat_get(struct request_queue *q, struct blk_rq_stat *dst) |
| 110 | { |
| 111 | if (q->mq_ops) |
| 112 | blk_mq_stat_get(q, dst); |
| 113 | else { |
| 114 | memcpy(&dst[BLK_STAT_READ], &q->rq_stats[BLK_STAT_READ], |
| 115 | sizeof(struct blk_rq_stat)); |
| 116 | memcpy(&dst[BLK_STAT_WRITE], &q->rq_stats[BLK_STAT_WRITE], |
| 117 | sizeof(struct blk_rq_stat)); |
| 118 | } |
| 119 | } |
| 120 | |
| 121 | void blk_hctx_stat_get(struct blk_mq_hw_ctx *hctx, struct blk_rq_stat *dst) |
| 122 | { |
| 123 | struct blk_mq_ctx *ctx; |
| 124 | unsigned int i, nr; |
| 125 | |
| 126 | nr = 0; |
| 127 | do { |
| 128 | uint64_t newest = 0; |
| 129 | |
| 130 | hctx_for_each_ctx(hctx, ctx, i) { |
| 131 | if (!ctx->stat[BLK_STAT_READ].nr_samples && |
| 132 | !ctx->stat[BLK_STAT_WRITE].nr_samples) |
| 133 | continue; |
| 134 | |
| 135 | if (ctx->stat[BLK_STAT_READ].time > newest) |
| 136 | newest = ctx->stat[BLK_STAT_READ].time; |
| 137 | if (ctx->stat[BLK_STAT_WRITE].time > newest) |
| 138 | newest = ctx->stat[BLK_STAT_WRITE].time; |
| 139 | } |
| 140 | |
| 141 | if (!newest) |
| 142 | break; |
| 143 | |
| 144 | hctx_for_each_ctx(hctx, ctx, i) { |
| 145 | if (ctx->stat[BLK_STAT_READ].time == newest) { |
| 146 | blk_stat_sum(&dst[BLK_STAT_READ], |
| 147 | &ctx->stat[BLK_STAT_READ]); |
| 148 | nr++; |
| 149 | } |
| 150 | if (ctx->stat[BLK_STAT_WRITE].time == newest) { |
| 151 | blk_stat_sum(&dst[BLK_STAT_WRITE], |
| 152 | &ctx->stat[BLK_STAT_WRITE]); |
| 153 | nr++; |
| 154 | } |
| 155 | } |
| 156 | /* |
| 157 | * If we race on finding an entry, just loop back again. |
| 158 | * Should be very rare, as the window is only updated |
| 159 | * occasionally |
| 160 | */ |
| 161 | } while (!nr); |
| 162 | } |
| 163 | |
| 164 | static void __blk_stat_init(struct blk_rq_stat *stat, s64 time_now) |
| 165 | { |
| 166 | stat->min = -1ULL; |
| 167 | stat->max = stat->nr_samples = stat->mean = 0; |
| 168 | stat->batch = stat->nr_batch = 0; |
| 169 | stat->time = time_now & BLK_STAT_NSEC_MASK; |
| 170 | } |
| 171 | |
| 172 | void blk_stat_init(struct blk_rq_stat *stat) |
| 173 | { |
| 174 | __blk_stat_init(stat, ktime_to_ns(ktime_get())); |
| 175 | } |
| 176 | |
| 177 | static bool __blk_stat_is_current(struct blk_rq_stat *stat, s64 now) |
| 178 | { |
| 179 | return (now & BLK_STAT_NSEC_MASK) == (stat->time & BLK_STAT_NSEC_MASK); |
| 180 | } |
| 181 | |
| 182 | bool blk_stat_is_current(struct blk_rq_stat *stat) |
| 183 | { |
| 184 | return __blk_stat_is_current(stat, ktime_to_ns(ktime_get())); |
| 185 | } |
| 186 | |
| 187 | void blk_stat_add(struct blk_rq_stat *stat, struct request *rq) |
| 188 | { |
| 189 | s64 now, value; |
| 190 | |
| 191 | now = __blk_stat_time(ktime_to_ns(ktime_get())); |
| 192 | if (now < blk_stat_time(&rq->issue_stat)) |
| 193 | return; |
| 194 | |
| 195 | if (!__blk_stat_is_current(stat, now)) |
| 196 | __blk_stat_init(stat, now); |
| 197 | |
| 198 | value = now - blk_stat_time(&rq->issue_stat); |
| 199 | if (value > stat->max) |
| 200 | stat->max = value; |
| 201 | if (value < stat->min) |
| 202 | stat->min = value; |
| 203 | |
| 204 | if (stat->batch + value < stat->batch || |
| 205 | stat->nr_batch + 1 == BLK_RQ_STAT_BATCH) |
| 206 | blk_stat_flush_batch(stat); |
| 207 | |
| 208 | stat->batch += value; |
| 209 | stat->nr_batch++; |
| 210 | } |
| 211 | |
| 212 | void blk_stat_clear(struct request_queue *q) |
| 213 | { |
| 214 | if (q->mq_ops) { |
| 215 | struct blk_mq_hw_ctx *hctx; |
| 216 | struct blk_mq_ctx *ctx; |
| 217 | int i, j; |
| 218 | |
| 219 | queue_for_each_hw_ctx(q, hctx, i) { |
| 220 | hctx_for_each_ctx(hctx, ctx, j) { |
| 221 | blk_stat_init(&ctx->stat[BLK_STAT_READ]); |
| 222 | blk_stat_init(&ctx->stat[BLK_STAT_WRITE]); |
| 223 | } |
| 224 | } |
| 225 | } else { |
| 226 | blk_stat_init(&q->rq_stats[BLK_STAT_READ]); |
| 227 | blk_stat_init(&q->rq_stats[BLK_STAT_WRITE]); |
| 228 | } |
| 229 | } |
| 230 | |
| 231 | void blk_stat_set_issue_time(struct blk_issue_stat *stat) |
| 232 | { |
| 233 | stat->time = (stat->time & BLK_STAT_MASK) | |
| 234 | (ktime_to_ns(ktime_get()) & BLK_STAT_TIME_MASK); |
| 235 | } |
| 236 | |
| 237 | /* |
| 238 | * Enable stat tracking, return whether it was enabled |
| 239 | */ |
| 240 | bool blk_stat_enable(struct request_queue *q) |
| 241 | { |
| 242 | if (!test_bit(QUEUE_FLAG_STATS, &q->queue_flags)) { |
| 243 | set_bit(QUEUE_FLAG_STATS, &q->queue_flags); |
| 244 | return false; |
| 245 | } |
| 246 | |
| 247 | return true; |
| 248 | } |