Merge branch 'for-linus' of git://git.kernel.dk/linux-2.6-block
* 'for-linus' of git://git.kernel.dk/linux-2.6-block:
block: only force kblockd unplugging from the schedule() path
block: cleanup the block plug helper functions
block, blk-sysfs: Use the variable directly instead of a function call
block: move queue run on unplug to kblockd
block: kill queue_sync_plugs()
block: readd plug trace event
block: add callback function for unplug notification
block: add comment on why we save and disable interrupts in flush_plug_list()
block: fixup block IO unplug trace call
block: remove block_unplug_timer() trace point
block: splice plug list to local context
diff --git a/block/blk-core.c b/block/blk-core.c
index 90f22cc..3c81210 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -198,19 +198,6 @@
}
EXPORT_SYMBOL(blk_dump_rq_flags);
-/*
- * Make sure that plugs that were pending when this function was entered,
- * are now complete and requests pushed to the queue.
-*/
-static inline void queue_sync_plugs(struct request_queue *q)
-{
- /*
- * If the current process is plugged and has barriers submitted,
- * we will livelock if we don't unplug first.
- */
- blk_flush_plug(current);
-}
-
static void blk_delay_work(struct work_struct *work)
{
struct request_queue *q;
@@ -298,7 +285,6 @@
{
del_timer_sync(&q->timeout);
cancel_delayed_work_sync(&q->delay_work);
- queue_sync_plugs(q);
}
EXPORT_SYMBOL(blk_sync_queue);
@@ -1311,7 +1297,15 @@
plug = current->plug;
if (plug) {
- if (!plug->should_sort && !list_empty(&plug->list)) {
+ /*
+ * If this is the first request added after a plug, fire
+ * of a plug trace. If others have been added before, check
+ * if we have multiple devices in this plug. If so, make a
+ * note to sort the list before dispatch.
+ */
+ if (list_empty(&plug->list))
+ trace_block_plug(q);
+ else if (!plug->should_sort) {
struct request *__rq;
__rq = list_entry_rq(plug->list.prev);
@@ -2668,33 +2662,56 @@
return !(rqa->q <= rqb->q);
}
-static void flush_plug_list(struct blk_plug *plug)
+static void queue_unplugged(struct request_queue *q, unsigned int depth,
+ bool force_kblockd)
+{
+ trace_block_unplug_io(q, depth);
+ __blk_run_queue(q, force_kblockd);
+
+ if (q->unplugged_fn)
+ q->unplugged_fn(q);
+}
+
+void blk_flush_plug_list(struct blk_plug *plug, bool force_kblockd)
{
struct request_queue *q;
unsigned long flags;
struct request *rq;
+ LIST_HEAD(list);
+ unsigned int depth;
BUG_ON(plug->magic != PLUG_MAGIC);
if (list_empty(&plug->list))
return;
- if (plug->should_sort)
- list_sort(NULL, &plug->list, plug_rq_cmp);
+ list_splice_init(&plug->list, &list);
+
+ if (plug->should_sort) {
+ list_sort(NULL, &list, plug_rq_cmp);
+ plug->should_sort = 0;
+ }
q = NULL;
+ depth = 0;
+
+ /*
+ * Save and disable interrupts here, to avoid doing it for every
+ * queue lock we have to take.
+ */
local_irq_save(flags);
- while (!list_empty(&plug->list)) {
- rq = list_entry_rq(plug->list.next);
+ while (!list_empty(&list)) {
+ rq = list_entry_rq(list.next);
list_del_init(&rq->queuelist);
BUG_ON(!(rq->cmd_flags & REQ_ON_PLUG));
BUG_ON(!rq->q);
if (rq->q != q) {
if (q) {
- __blk_run_queue(q, false);
+ queue_unplugged(q, depth, force_kblockd);
spin_unlock(q->queue_lock);
}
q = rq->q;
+ depth = 0;
spin_lock(q->queue_lock);
}
rq->cmd_flags &= ~REQ_ON_PLUG;
@@ -2706,39 +2723,28 @@
__elv_add_request(q, rq, ELEVATOR_INSERT_FLUSH);
else
__elv_add_request(q, rq, ELEVATOR_INSERT_SORT_MERGE);
+
+ depth++;
}
if (q) {
- __blk_run_queue(q, false);
+ queue_unplugged(q, depth, force_kblockd);
spin_unlock(q->queue_lock);
}
- BUG_ON(!list_empty(&plug->list));
local_irq_restore(flags);
}
-
-static void __blk_finish_plug(struct task_struct *tsk, struct blk_plug *plug)
-{
- flush_plug_list(plug);
-
- if (plug == tsk->plug)
- tsk->plug = NULL;
-}
+EXPORT_SYMBOL(blk_flush_plug_list);
void blk_finish_plug(struct blk_plug *plug)
{
- if (plug)
- __blk_finish_plug(current, plug);
+ blk_flush_plug_list(plug, false);
+
+ if (plug == current->plug)
+ current->plug = NULL;
}
EXPORT_SYMBOL(blk_finish_plug);
-void __blk_flush_plug(struct task_struct *tsk, struct blk_plug *plug)
-{
- __blk_finish_plug(tsk, plug);
- tsk->plug = plug;
-}
-EXPORT_SYMBOL(__blk_flush_plug);
-
int __init blk_dev_init(void)
{
BUILD_BUG_ON(__REQ_NR_BITS > 8 *
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 1fa7692..eb94904 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -790,6 +790,22 @@
}
EXPORT_SYMBOL_GPL(blk_queue_flush);
+/**
+ * blk_queue_unplugged - register a callback for an unplug event
+ * @q: the request queue for the device
+ * @fn: the function to call
+ *
+ * Some stacked drivers may need to know when IO is dispatched on an
+ * unplug event. By registrering a callback here, they will be notified
+ * when someone flushes their on-stack queue plug. The function will be
+ * called with the queue lock held.
+ */
+void blk_queue_unplugged(struct request_queue *q, unplugged_fn *fn)
+{
+ q->unplugged_fn = fn;
+}
+EXPORT_SYMBOL(blk_queue_unplugged);
+
static int __init blk_settings_init(void)
{
blk_max_low_pfn = max_low_pfn - 1;
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 261c75c..6d73512 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -498,7 +498,6 @@
{
int ret;
struct device *dev = disk_to_dev(disk);
-
struct request_queue *q = disk->queue;
if (WARN_ON(!q))
@@ -521,7 +520,7 @@
if (ret) {
kobject_uevent(&q->kobj, KOBJ_REMOVE);
kobject_del(&q->kobj);
- blk_trace_remove_sysfs(disk_to_dev(disk));
+ blk_trace_remove_sysfs(dev);
kobject_put(&dev->kobj);
return ret;
}
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 32176cc..1c76506 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -196,6 +196,7 @@
typedef int (make_request_fn) (struct request_queue *q, struct bio *bio);
typedef int (prep_rq_fn) (struct request_queue *, struct request *);
typedef void (unprep_rq_fn) (struct request_queue *, struct request *);
+typedef void (unplugged_fn) (struct request_queue *);
struct bio_vec;
struct bvec_merge_data {
@@ -283,6 +284,7 @@
rq_timed_out_fn *rq_timed_out_fn;
dma_drain_needed_fn *dma_drain_needed;
lld_busy_fn *lld_busy_fn;
+ unplugged_fn *unplugged_fn;
/*
* Dispatch queue sorting
@@ -841,6 +843,7 @@
extern void blk_queue_update_dma_alignment(struct request_queue *, int);
extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *);
extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *);
+extern void blk_queue_unplugged(struct request_queue *, unplugged_fn *);
extern void blk_queue_rq_timeout(struct request_queue *, unsigned int);
extern void blk_queue_flush(struct request_queue *q, unsigned int flush);
extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev);
@@ -862,14 +865,14 @@
extern void blk_start_plug(struct blk_plug *);
extern void blk_finish_plug(struct blk_plug *);
-extern void __blk_flush_plug(struct task_struct *, struct blk_plug *);
+extern void blk_flush_plug_list(struct blk_plug *, bool);
static inline void blk_flush_plug(struct task_struct *tsk)
{
struct blk_plug *plug = tsk->plug;
- if (unlikely(plug))
- __blk_flush_plug(tsk, plug);
+ if (plug)
+ blk_flush_plug_list(plug, true);
}
static inline bool blk_needs_flush_plug(struct task_struct *tsk)
diff --git a/include/trace/events/block.h b/include/trace/events/block.h
index 78f18ad..006e60b 100644
--- a/include/trace/events/block.h
+++ b/include/trace/events/block.h
@@ -401,9 +401,9 @@
DECLARE_EVENT_CLASS(block_unplug,
- TP_PROTO(struct request_queue *q),
+ TP_PROTO(struct request_queue *q, unsigned int depth),
- TP_ARGS(q),
+ TP_ARGS(q, depth),
TP_STRUCT__entry(
__field( int, nr_rq )
@@ -411,7 +411,7 @@
),
TP_fast_assign(
- __entry->nr_rq = q->rq.count[READ] + q->rq.count[WRITE];
+ __entry->nr_rq = depth;
memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
),
@@ -419,31 +419,18 @@
);
/**
- * block_unplug_timer - timed release of operations requests in queue to device driver
- * @q: request queue to unplug
- *
- * Unplug the request queue @q because a timer expired and allow block
- * operation requests to be sent to the device driver.
- */
-DEFINE_EVENT(block_unplug, block_unplug_timer,
-
- TP_PROTO(struct request_queue *q),
-
- TP_ARGS(q)
-);
-
-/**
* block_unplug_io - release of operations requests in request queue
* @q: request queue to unplug
+ * @depth: number of requests just added to the queue
*
* Unplug request queue @q because device driver is scheduled to work
* on elements in the request queue.
*/
DEFINE_EVENT(block_unplug, block_unplug_io,
- TP_PROTO(struct request_queue *q),
+ TP_PROTO(struct request_queue *q, unsigned int depth),
- TP_ARGS(q)
+ TP_ARGS(q, depth)
);
/**
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 7aa40f8..3e3970d 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -850,32 +850,19 @@
__blk_add_trace(bt, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL);
}
-static void blk_add_trace_unplug_io(void *ignore, struct request_queue *q)
+static void blk_add_trace_unplug_io(void *ignore, struct request_queue *q,
+ unsigned int depth)
{
struct blk_trace *bt = q->blk_trace;
if (bt) {
- unsigned int pdu = q->rq.count[READ] + q->rq.count[WRITE];
- __be64 rpdu = cpu_to_be64(pdu);
+ __be64 rpdu = cpu_to_be64(depth);
__blk_add_trace(bt, 0, 0, 0, BLK_TA_UNPLUG_IO, 0,
sizeof(rpdu), &rpdu);
}
}
-static void blk_add_trace_unplug_timer(void *ignore, struct request_queue *q)
-{
- struct blk_trace *bt = q->blk_trace;
-
- if (bt) {
- unsigned int pdu = q->rq.count[READ] + q->rq.count[WRITE];
- __be64 rpdu = cpu_to_be64(pdu);
-
- __blk_add_trace(bt, 0, 0, 0, BLK_TA_UNPLUG_TIMER, 0,
- sizeof(rpdu), &rpdu);
- }
-}
-
static void blk_add_trace_split(void *ignore,
struct request_queue *q, struct bio *bio,
unsigned int pdu)
@@ -1015,8 +1002,6 @@
WARN_ON(ret);
ret = register_trace_block_plug(blk_add_trace_plug, NULL);
WARN_ON(ret);
- ret = register_trace_block_unplug_timer(blk_add_trace_unplug_timer, NULL);
- WARN_ON(ret);
ret = register_trace_block_unplug_io(blk_add_trace_unplug_io, NULL);
WARN_ON(ret);
ret = register_trace_block_split(blk_add_trace_split, NULL);
@@ -1033,7 +1018,6 @@
unregister_trace_block_bio_remap(blk_add_trace_bio_remap, NULL);
unregister_trace_block_split(blk_add_trace_split, NULL);
unregister_trace_block_unplug_io(blk_add_trace_unplug_io, NULL);
- unregister_trace_block_unplug_timer(blk_add_trace_unplug_timer, NULL);
unregister_trace_block_plug(blk_add_trace_plug, NULL);
unregister_trace_block_sleeprq(blk_add_trace_sleeprq, NULL);
unregister_trace_block_getrq(blk_add_trace_getrq, NULL);
@@ -1348,7 +1332,6 @@
[__BLK_TA_COMPLETE] = {{ "C", "complete" }, blk_log_with_error },
[__BLK_TA_PLUG] = {{ "P", "plug" }, blk_log_plug },
[__BLK_TA_UNPLUG_IO] = {{ "U", "unplug_io" }, blk_log_unplug },
- [__BLK_TA_UNPLUG_TIMER] = {{ "UT", "unplug_timer" }, blk_log_unplug },
[__BLK_TA_INSERT] = {{ "I", "insert" }, blk_log_generic },
[__BLK_TA_SPLIT] = {{ "X", "split" }, blk_log_split },
[__BLK_TA_BOUNCE] = {{ "B", "bounce" }, blk_log_generic },