writeback: optimize periodic bdi thread wakeups
Whe the first inode for a bdi is marked dirty, we wake up the bdi thread which
should take care of the periodic background write-out. However, the write-out
will actually start only 'dirty_writeback_interval' centisecs later, so we can
delay the wake-up.
This change was requested by Nick Piggin who pointed out that if we delay the
wake-up, we weed out 2 unnecessary contex switches, which matters because
'__mark_inode_dirty()' is a hot-path function.
This patch introduces a new function - 'bdi_wakeup_thread_delayed()', which
sets up a timer to wake-up the bdi thread and returns. So the wake-up is
delayed.
We also delete the timer in bdi threads just before writing-back. And
synchronously delete it when unregistering bdi. At the unregister point the bdi
does not have any users, so no one can arm it again.
Since now we take 'bdi->wb_lock' in the timer, which can execute in softirq
context, we have to use 'spin_lock_bh()' for 'bdi->wb_lock'. This patch makes
this change as well.
This patch also moves the 'bdi_wb_init()' function down in the file to avoid
forward-declaration of 'bdi_wakeup_thread_delayed()'.
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 55f6e46..bfa2df2 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -76,7 +76,7 @@
{
trace_writeback_queue(bdi, work);
- spin_lock(&bdi->wb_lock);
+ spin_lock_bh(&bdi->wb_lock);
list_add_tail(&work->list, &bdi->work_list);
if (bdi->wb.task) {
wake_up_process(bdi->wb.task);
@@ -88,7 +88,7 @@
trace_writeback_nothread(bdi, work);
wake_up_process(default_backing_dev_info.wb.task);
}
- spin_unlock(&bdi->wb_lock);
+ spin_unlock_bh(&bdi->wb_lock);
}
static void
@@ -704,13 +704,13 @@
{
struct wb_writeback_work *work = NULL;
- spin_lock(&bdi->wb_lock);
+ spin_lock_bh(&bdi->wb_lock);
if (!list_empty(&bdi->work_list)) {
work = list_entry(bdi->work_list.next,
struct wb_writeback_work, list);
list_del_init(&work->list);
}
- spin_unlock(&bdi->wb_lock);
+ spin_unlock_bh(&bdi->wb_lock);
return work;
}
@@ -810,6 +810,12 @@
trace_writeback_thread_start(bdi);
while (!kthread_should_stop()) {
+ /*
+ * Remove own delayed wake-up timer, since we are already awake
+ * and we'll take care of the preriodic write-back.
+ */
+ del_timer(&wb->wakeup_timer);
+
pages_written = wb_do_writeback(wb, 0);
trace_writeback_pages_written(pages_written);
@@ -868,26 +874,6 @@
rcu_read_unlock();
}
-/*
- * This function is used when the first inode for this bdi is marked dirty. It
- * wakes-up the corresponding bdi thread which should then take care of the
- * periodic background write-out of dirty inodes.
- */
-static void wakeup_bdi_thread(struct backing_dev_info *bdi)
-{
- spin_lock(&bdi->wb_lock);
- if (bdi->wb.task)
- wake_up_process(bdi->wb.task);
- else
- /*
- * When bdi tasks are inactive for long time, they are killed.
- * In this case we have to wake-up the forker thread which
- * should create and run the bdi thread.
- */
- wake_up_process(default_backing_dev_info.wb.task);
- spin_unlock(&bdi->wb_lock);
-}
-
static noinline void block_dump___mark_inode_dirty(struct inode *inode)
{
if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) {
@@ -1019,7 +1005,7 @@
spin_unlock(&inode_lock);
if (wakeup_bdi)
- wakeup_bdi_thread(bdi);
+ bdi_wakeup_thread_delayed(bdi);
}
EXPORT_SYMBOL(__mark_inode_dirty);