f2fs: fix to avoid REQ_TIME and CP_TIME collision
Lei Li reported a issue: if foreground operations are frequent, background
checkpoint may be always skipped due to below check, result in losing more
data after sudden power-cut.
f2fs_balance_fs_bg()
...
if (!is_idle(sbi, REQ_TIME) &&
(!excess_dirty_nats(sbi) && !excess_dirty_nodes(sbi)))
return;
E.g:
cp_interval = 5 second
idle_interval = 2 second
foreground operation interval = 1 second (append 1 byte per second into file)
In such case, no matter when it calls f2fs_balance_fs_bg(), is_idle(, REQ_TIME)
returns false, result in skipping background checkpoint.
This patch changes as below to make trigger condition being more reasonable:
- trigger sync_fs() if dirty_{nats,nodes} and prefree segs exceeds threshold;
- skip triggering sync_fs() if there is any background inflight IO or there is
foreground operation recently and meanwhile cp_rwsem is being held by someone;
Reported-by: Lei Li <noctis.akm@gmail.com>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 273f068..0d25f5c 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -2400,24 +2400,31 @@ static inline void *f2fs_kmem_cache_alloc(struct kmem_cache *cachep,
return entry;
}
-static inline bool is_idle(struct f2fs_sb_info *sbi, int type)
+static inline bool is_inflight_io(struct f2fs_sb_info *sbi, int type)
{
- if (sbi->gc_mode == GC_URGENT_HIGH)
- return true;
-
if (get_pages(sbi, F2FS_RD_DATA) || get_pages(sbi, F2FS_RD_NODE) ||
get_pages(sbi, F2FS_RD_META) || get_pages(sbi, F2FS_WB_DATA) ||
get_pages(sbi, F2FS_WB_CP_DATA) ||
get_pages(sbi, F2FS_DIO_READ) ||
get_pages(sbi, F2FS_DIO_WRITE))
- return false;
+ return true;
if (type != DISCARD_TIME && SM_I(sbi) && SM_I(sbi)->dcc_info &&
atomic_read(&SM_I(sbi)->dcc_info->queued_discard))
- return false;
+ return true;
if (SM_I(sbi) && SM_I(sbi)->fcc_info &&
atomic_read(&SM_I(sbi)->fcc_info->queued_flush))
+ return true;
+ return false;
+}
+
+static inline bool is_idle(struct f2fs_sb_info *sbi, int type)
+{
+ if (sbi->gc_mode == GC_URGENT_HIGH)
+ return true;
+
+ if (is_inflight_io(sbi, type))
return false;
if (sbi->gc_mode == GC_URGENT_LOW &&
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 1596502..d9e2e65 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -529,31 +529,38 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi, bool from_bg)
else
f2fs_build_free_nids(sbi, false, false);
- if (!is_idle(sbi, REQ_TIME) &&
- (!excess_dirty_nats(sbi) && !excess_dirty_nodes(sbi)))
+ if (excess_dirty_nats(sbi) || excess_dirty_nodes(sbi) ||
+ excess_prefree_segs(sbi))
+ goto do_sync;
+
+ /* there is background inflight IO or foreground operation recently */
+ if (is_inflight_io(sbi, REQ_TIME) ||
+ (!f2fs_time_over(sbi, REQ_TIME) && rwsem_is_locked(&sbi->cp_rwsem)))
return;
+ /* exceed periodical checkpoint timeout threshold */
+ if (f2fs_time_over(sbi, CP_TIME))
+ goto do_sync;
+
/* checkpoint is the only way to shrink partial cached entries */
- if (!f2fs_available_free_memory(sbi, NAT_ENTRIES) ||
- !f2fs_available_free_memory(sbi, INO_ENTRIES) ||
- excess_prefree_segs(sbi) ||
- excess_dirty_nats(sbi) ||
- excess_dirty_nodes(sbi) ||
- f2fs_time_over(sbi, CP_TIME)) {
- if (test_opt(sbi, DATA_FLUSH) && from_bg) {
- struct blk_plug plug;
+ if (f2fs_available_free_memory(sbi, NAT_ENTRIES) ||
+ f2fs_available_free_memory(sbi, INO_ENTRIES))
+ return;
- mutex_lock(&sbi->flush_lock);
+do_sync:
+ if (test_opt(sbi, DATA_FLUSH) && from_bg) {
+ struct blk_plug plug;
- blk_start_plug(&plug);
- f2fs_sync_dirty_inodes(sbi, FILE_INODE);
- blk_finish_plug(&plug);
+ mutex_lock(&sbi->flush_lock);
- mutex_unlock(&sbi->flush_lock);
- }
- f2fs_sync_fs(sbi->sb, true);
- stat_inc_bg_cp_count(sbi->stat_info);
+ blk_start_plug(&plug);
+ f2fs_sync_dirty_inodes(sbi, FILE_INODE);
+ blk_finish_plug(&plug);
+
+ mutex_unlock(&sbi->flush_lock);
}
+ f2fs_sync_fs(sbi->sb, true);
+ stat_inc_bg_cp_count(sbi->stat_info);
}
static int __submit_flush_wait(struct f2fs_sb_info *sbi,