blk-wbt: add general throttling mechanism
We can hook this up to the block layer, to help throttle buffered
writes.
wbt registers a few trace points that can be used to track what is
happening in the system:
wbt_lat: 259:0: latency 2446318
wbt_stat: 259:0: rmean=2446318, rmin=2446318, rmax=2446318, rsamples=1,
wmean=518866, wmin=15522, wmax=5330353, wsamples=57
wbt_step: 259:0: step down: step=1, window=72727272, background=8, normal=16, max=32
This shows a sync issue event (wbt_lat) that exceeded it's time. wbt_stat
dumps the current read/write stats for that window, and wbt_step shows a
step down event where we now scale back writes. Each trace includes the
device, 259:0 in this case.
Signed-off-by: Jens Axboe <axboe@fb.com>
diff --git a/block/blk-wbt.h b/block/blk-wbt.h
new file mode 100644
index 0000000..e577003
--- /dev/null
+++ b/block/blk-wbt.h
@@ -0,0 +1,165 @@
+#ifndef WB_THROTTLE_H
+#define WB_THROTTLE_H
+
+#include <linux/kernel.h>
+#include <linux/atomic.h>
+#include <linux/wait.h>
+#include <linux/timer.h>
+#include <linux/ktime.h>
+
+#include "blk-stat.h"
+
+enum wbt_flags {
+ WBT_TRACKED = 1, /* write, tracked for throttling */
+ WBT_READ = 2, /* read */
+ WBT_KSWAPD = 4, /* write, from kswapd */
+
+ WBT_NR_BITS = 3, /* number of bits */
+};
+
+enum {
+ WBT_NUM_RWQ = 2,
+};
+
+static inline void wbt_clear_state(struct blk_issue_stat *stat)
+{
+ stat->time &= BLK_STAT_TIME_MASK;
+}
+
+static inline enum wbt_flags wbt_stat_to_mask(struct blk_issue_stat *stat)
+{
+ return (stat->time & BLK_STAT_MASK) >> BLK_STAT_SHIFT;
+}
+
+static inline void wbt_track(struct blk_issue_stat *stat, enum wbt_flags wb_acct)
+{
+ stat->time |= ((u64) wb_acct) << BLK_STAT_SHIFT;
+}
+
+static inline bool wbt_is_tracked(struct blk_issue_stat *stat)
+{
+ return (stat->time >> BLK_STAT_SHIFT) & WBT_TRACKED;
+}
+
+static inline bool wbt_is_read(struct blk_issue_stat *stat)
+{
+ return (stat->time >> BLK_STAT_SHIFT) & WBT_READ;
+}
+
+struct wb_stat_ops {
+ void (*get)(void *, struct blk_rq_stat *);
+ bool (*is_current)(struct blk_rq_stat *);
+ void (*clear)(void *);
+};
+
+struct rq_wait {
+ wait_queue_head_t wait;
+ atomic_t inflight;
+};
+
+struct rq_wb {
+ /*
+ * Settings that govern how we throttle
+ */
+ unsigned int wb_background; /* background writeback */
+ unsigned int wb_normal; /* normal writeback */
+ unsigned int wb_max; /* max throughput writeback */
+ int scale_step;
+ bool scaled_max;
+
+ /*
+ * Number of consecutive periods where we don't have enough
+ * information to make a firm scale up/down decision.
+ */
+ unsigned int unknown_cnt;
+
+ u64 win_nsec; /* default window size */
+ u64 cur_win_nsec; /* current window size */
+
+ struct timer_list window_timer;
+
+ s64 sync_issue;
+ void *sync_cookie;
+
+ unsigned int wc;
+ unsigned int queue_depth;
+
+ unsigned long last_issue; /* last non-throttled issue */
+ unsigned long last_comp; /* last non-throttled comp */
+ unsigned long min_lat_nsec;
+ struct backing_dev_info *bdi;
+ struct rq_wait rq_wait[WBT_NUM_RWQ];
+
+ struct wb_stat_ops *stat_ops;
+ void *ops_data;
+};
+
+static inline unsigned int wbt_inflight(struct rq_wb *rwb)
+{
+ unsigned int i, ret = 0;
+
+ for (i = 0; i < WBT_NUM_RWQ; i++)
+ ret += atomic_read(&rwb->rq_wait[i].inflight);
+
+ return ret;
+}
+
+struct backing_dev_info;
+
+#ifdef CONFIG_BLK_WBT
+
+void __wbt_done(struct rq_wb *, enum wbt_flags);
+void wbt_done(struct rq_wb *, struct blk_issue_stat *);
+enum wbt_flags wbt_wait(struct rq_wb *, struct bio *, spinlock_t *);
+int wbt_init(struct request_queue *, struct wb_stat_ops *);
+void wbt_exit(struct request_queue *);
+void wbt_update_limits(struct rq_wb *);
+void wbt_requeue(struct rq_wb *, struct blk_issue_stat *);
+void wbt_issue(struct rq_wb *, struct blk_issue_stat *);
+void wbt_disable(struct rq_wb *);
+
+void wbt_set_queue_depth(struct rq_wb *, unsigned int);
+void wbt_set_write_cache(struct rq_wb *, bool);
+
+#else
+
+static inline void __wbt_done(struct rq_wb *rwb, enum wbt_flags flags)
+{
+}
+static inline void wbt_done(struct rq_wb *rwb, struct blk_issue_stat *stat)
+{
+}
+static inline enum wbt_flags wbt_wait(struct rq_wb *rwb, struct bio *bio,
+ spinlock_t *lock)
+{
+ return 0;
+}
+static inline int wbt_init(struct request_queue *q, struct wb_stat_ops *ops)
+{
+ return -EINVAL;
+}
+static inline void wbt_exit(struct request_queue *q)
+{
+}
+static inline void wbt_update_limits(struct rq_wb *rwb)
+{
+}
+static inline void wbt_requeue(struct rq_wb *rwb, struct blk_issue_stat *stat)
+{
+}
+static inline void wbt_issue(struct rq_wb *rwb, struct blk_issue_stat *stat)
+{
+}
+static inline void wbt_disable(struct rq_wb *rwb)
+{
+}
+static inline void wbt_set_queue_depth(struct rq_wb *rwb, unsigned int depth)
+{
+}
+static inline void wbt_set_write_cache(struct rq_wb *rwb, bool wc)
+{
+}
+
+#endif /* CONFIG_BLK_WBT */
+
+#endif