f2fs: use rw_sem instead of fs_lock(locks mutex) The fs_locks is used to block other ops(ex, recovery) when doing checkpoint. And each other operate routine(besides checkpoint) needs to acquire a fs_lock, there is a terrible problem here, if these are too many concurrency threads acquiring fs_lock, so that they will block each other and may lead to some performance problem, but this is not the phenomenon we want to see. Though there are some optimization patches introduced to enhance the usage of fs_lock, but the thorough solution is using a *rw_sem* to replace the fs_lock. Checkpoint routine takes write_sem, and other ops take read_sem, so that we can block other ops(ex, recovery) when doing checkpoint, and other ops will not disturb each other, this can avoid the problem described above completely. Because of the weakness of rw_sem, the above change may introduce a potential problem that the checkpoint thread might get starved if other threads are intensively locking the read semaphore for I/O.(Pointed out by Xu Jin) In order to avoid this, a wait_list is introduced, the appending read semaphore ops will be dropped into the wait_list if checkpoint thread is waiting for write semaphore, and will be waked up when checkpoint thread gives up write semaphore. Thanks to Kim's previous review and test, and will be very glad to see other guys' performance tests about this patch. V2: -fix the potential starvation problem. -use more suitable func name suggested by Xu Jin. Signed-off-by: Gu Zheng <guz.fnst@cn.fujitsu.com> [Jaegeuk Kim: adjust minor coding standard] Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>

commit: e479556bfdd136669854292eb57ed0139d7253d5 [log] [tgz]
author: Gu Zheng <guz.fnst@cn.fujitsu.com> Fri Sep 27 18:08:30 2013 +0800
committer: Jaegeuk Kim <jaegeuk.kim@samsung.com> Mon Oct 07 11:33:05 2013 +0900
tree: 95772ba1ac8cf1e79c89145daf40c417814896da
parent: 2e5558f4a5cf16a7394fd5770087303db8912c66 [diff] [blame]
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 02c9069..c80faa2 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c

@@ -35,18 +35,18 @@
 	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
 	block_t old_blk_addr;
 	struct dnode_of_data dn;
-	int err, ilock;
+	int err;
 
 	f2fs_balance_fs(sbi);
 
 	sb_start_pagefault(inode->i_sb);
 
 	/* block allocation */
-	ilock = mutex_lock_op(sbi);
+	f2fs_lock_op(sbi);
 	set_new_dnode(&dn, inode, NULL, NULL, 0);
 	err = get_dnode_of_data(&dn, page->index, ALLOC_NODE);
 	if (err) {
-		mutex_unlock_op(sbi, ilock);
+		f2fs_unlock_op(sbi);
 		goto out;
 	}
 
@@ -56,12 +56,12 @@
 		err = reserve_new_block(&dn);
 		if (err) {
 			f2fs_put_dnode(&dn);
-			mutex_unlock_op(sbi, ilock);
+			f2fs_unlock_op(sbi);
 			goto out;
 		}
 	}
 	f2fs_put_dnode(&dn);
-	mutex_unlock_op(sbi, ilock);
+	f2fs_unlock_op(sbi);
 
 	file_update_time(vma->vm_file);
 	lock_page(page);
@@ -270,7 +270,7 @@
 	unsigned int blocksize = inode->i_sb->s_blocksize;
 	struct dnode_of_data dn;
 	pgoff_t free_from;
-	int count = 0, ilock = -1;
+	int count = 0;
 	int err;
 
 	trace_f2fs_truncate_blocks_enter(inode, from);
@@ -278,13 +278,13 @@
 	free_from = (pgoff_t)
 			((from + blocksize - 1) >> (sbi->log_blocksize));
 
-	ilock = mutex_lock_op(sbi);
+	f2fs_lock_op(sbi);
 	set_new_dnode(&dn, inode, NULL, NULL, 0);
 	err = get_dnode_of_data(&dn, free_from, LOOKUP_NODE);
 	if (err) {
 		if (err == -ENOENT)
 			goto free_next;
-		mutex_unlock_op(sbi, ilock);
+		f2fs_unlock_op(sbi);
 		trace_f2fs_truncate_blocks_exit(inode, err);
 		return err;
 	}
@@ -305,7 +305,7 @@
 	f2fs_put_dnode(&dn);
 free_next:
 	err = truncate_inode_blocks(inode, free_from);
-	mutex_unlock_op(sbi, ilock);
+	f2fs_unlock_op(sbi);
 
 	/* lastly zero out the first data page */
 	truncate_partial_data_page(inode, from);
@@ -416,16 +416,15 @@
 {
 	struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
 	struct page *page;
-	int ilock;
 
 	if (!len)
 		return;
 
 	f2fs_balance_fs(sbi);
 
-	ilock = mutex_lock_op(sbi);
+	f2fs_lock_op(sbi);
 	page = get_new_data_page(inode, NULL, index, false);
-	mutex_unlock_op(sbi, ilock);
+	f2fs_unlock_op(sbi);
 
 	if (!IS_ERR(page)) {
 		wait_on_page_writeback(page);
@@ -484,7 +483,6 @@
 			struct address_space *mapping = inode->i_mapping;
 			loff_t blk_start, blk_end;
 			struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
-			int ilock;
 
 			f2fs_balance_fs(sbi);
 
@@ -493,9 +491,9 @@
 			truncate_inode_pages_range(mapping, blk_start,
 					blk_end - 1);
 
-			ilock = mutex_lock_op(sbi);
+			f2fs_lock_op(sbi);
 			ret = truncate_hole(inode, pg_start, pg_end);
-			mutex_unlock_op(sbi, ilock);
+			f2fs_unlock_op(sbi);
 		}
 	}
 
@@ -529,13 +527,12 @@
 
 	for (index = pg_start; index <= pg_end; index++) {
 		struct dnode_of_data dn;
-		int ilock;
 
-		ilock = mutex_lock_op(sbi);
+		f2fs_lock_op(sbi);
 		set_new_dnode(&dn, inode, NULL, NULL, 0);
 		ret = get_dnode_of_data(&dn, index, ALLOC_NODE);
 		if (ret) {
-			mutex_unlock_op(sbi, ilock);
+			f2fs_unlock_op(sbi);
 			break;
 		}
 
@@ -543,12 +540,12 @@
 			ret = reserve_new_block(&dn);
 			if (ret) {
 				f2fs_put_dnode(&dn);
-				mutex_unlock_op(sbi, ilock);
+				f2fs_unlock_op(sbi);
 				break;
 			}
 		}
 		f2fs_put_dnode(&dn);
-		mutex_unlock_op(sbi, ilock);
+		f2fs_unlock_op(sbi);
 
 		if (pg_start == pg_end)
 			new_size = offset + len;
commit	e479556bfdd136669854292eb57ed0139d7253d5	[log] [tgz]
author	Gu Zheng <guz.fnst@cn.fujitsu.com>	Fri Sep 27 18:08:30 2013 +0800
committer	Jaegeuk Kim <jaegeuk.kim@samsung.com>	Mon Oct 07 11:33:05 2013 +0900
tree	95772ba1ac8cf1e79c89145daf40c417814896da
parent	2e5558f4a5cf16a7394fd5770087303db8912c66 [diff] [blame]