Revert "f2fs: use percpu_rw_semaphore"
LKP reported -36.3% regression of fsmark.files_per_sec due to this patch.
I've confirmed that fxmark [1] has also slight regression for DWAL.
[1] https://github.com/sslab-gatech/fxmark
This reverts commit ec795418c41850056feb956534edf059dc1155d4.
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 675fa79..14f5fe2 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -538,7 +538,7 @@
/* NAT cache management */
struct radix_tree_root nat_root;/* root of the nat entry cache */
struct radix_tree_root nat_set_root;/* root of the nat set cache */
- struct percpu_rw_semaphore nat_tree_lock; /* protect nat_tree_lock */
+ struct rw_semaphore nat_tree_lock; /* protect nat_tree_lock */
struct list_head nat_entries; /* cached nat entry list (clean) */
unsigned int nat_cnt; /* the # of cached nat entries */
unsigned int dirty_nat_cnt; /* total num of nat entries in set */
@@ -787,7 +787,7 @@
struct f2fs_checkpoint *ckpt; /* raw checkpoint pointer */
struct inode *meta_inode; /* cache meta blocks */
struct mutex cp_mutex; /* checkpoint procedure lock */
- struct percpu_rw_semaphore cp_rwsem; /* blocking FS operations */
+ struct rw_semaphore cp_rwsem; /* blocking FS operations */
struct rw_semaphore node_write; /* locking node writes */
wait_queue_head_t cp_wait;
unsigned long last_time[MAX_TIME]; /* to store time in jiffies */
@@ -1074,22 +1074,22 @@
static inline void f2fs_lock_op(struct f2fs_sb_info *sbi)
{
- percpu_down_read(&sbi->cp_rwsem);
+ down_read(&sbi->cp_rwsem);
}
static inline void f2fs_unlock_op(struct f2fs_sb_info *sbi)
{
- percpu_up_read(&sbi->cp_rwsem);
+ up_read(&sbi->cp_rwsem);
}
static inline void f2fs_lock_all(struct f2fs_sb_info *sbi)
{
- percpu_down_write(&sbi->cp_rwsem);
+ down_write(&sbi->cp_rwsem);
}
static inline void f2fs_unlock_all(struct f2fs_sb_info *sbi)
{
- percpu_up_write(&sbi->cp_rwsem);
+ up_write(&sbi->cp_rwsem);
}
static inline int __get_cp_reason(struct f2fs_sb_info *sbi)
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index b2fa4b6..f75d197 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -206,14 +206,14 @@
struct nat_entry *e;
bool need = false;
- percpu_down_read(&nm_i->nat_tree_lock);
+ down_read(&nm_i->nat_tree_lock);
e = __lookup_nat_cache(nm_i, nid);
if (e) {
if (!get_nat_flag(e, IS_CHECKPOINTED) &&
!get_nat_flag(e, HAS_FSYNCED_INODE))
need = true;
}
- percpu_up_read(&nm_i->nat_tree_lock);
+ up_read(&nm_i->nat_tree_lock);
return need;
}
@@ -223,11 +223,11 @@
struct nat_entry *e;
bool is_cp = true;
- percpu_down_read(&nm_i->nat_tree_lock);
+ down_read(&nm_i->nat_tree_lock);
e = __lookup_nat_cache(nm_i, nid);
if (e && !get_nat_flag(e, IS_CHECKPOINTED))
is_cp = false;
- percpu_up_read(&nm_i->nat_tree_lock);
+ up_read(&nm_i->nat_tree_lock);
return is_cp;
}
@@ -237,13 +237,13 @@
struct nat_entry *e;
bool need_update = true;
- percpu_down_read(&nm_i->nat_tree_lock);
+ down_read(&nm_i->nat_tree_lock);
e = __lookup_nat_cache(nm_i, ino);
if (e && get_nat_flag(e, HAS_LAST_FSYNC) &&
(get_nat_flag(e, IS_CHECKPOINTED) ||
get_nat_flag(e, HAS_FSYNCED_INODE)))
need_update = false;
- percpu_up_read(&nm_i->nat_tree_lock);
+ up_read(&nm_i->nat_tree_lock);
return need_update;
}
@@ -284,7 +284,7 @@
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct nat_entry *e;
- percpu_down_write(&nm_i->nat_tree_lock);
+ down_write(&nm_i->nat_tree_lock);
e = __lookup_nat_cache(nm_i, ni->nid);
if (!e) {
e = grab_nat_entry(nm_i, ni->nid);
@@ -334,7 +334,7 @@
set_nat_flag(e, HAS_FSYNCED_INODE, true);
set_nat_flag(e, HAS_LAST_FSYNC, fsync_done);
}
- percpu_up_write(&nm_i->nat_tree_lock);
+ up_write(&nm_i->nat_tree_lock);
}
int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
@@ -342,7 +342,8 @@
struct f2fs_nm_info *nm_i = NM_I(sbi);
int nr = nr_shrink;
- percpu_down_write(&nm_i->nat_tree_lock);
+ if (!down_write_trylock(&nm_i->nat_tree_lock))
+ return 0;
while (nr_shrink && !list_empty(&nm_i->nat_entries)) {
struct nat_entry *ne;
@@ -351,7 +352,7 @@
__del_from_nat_cache(nm_i, ne);
nr_shrink--;
}
- percpu_up_write(&nm_i->nat_tree_lock);
+ up_write(&nm_i->nat_tree_lock);
return nr - nr_shrink;
}
@@ -373,13 +374,13 @@
ni->nid = nid;
/* Check nat cache */
- percpu_down_read(&nm_i->nat_tree_lock);
+ down_read(&nm_i->nat_tree_lock);
e = __lookup_nat_cache(nm_i, nid);
if (e) {
ni->ino = nat_get_ino(e);
ni->blk_addr = nat_get_blkaddr(e);
ni->version = nat_get_version(e);
- percpu_up_read(&nm_i->nat_tree_lock);
+ up_read(&nm_i->nat_tree_lock);
return;
}
@@ -403,11 +404,11 @@
node_info_from_raw_nat(ni, &ne);
f2fs_put_page(page, 1);
cache:
- percpu_up_read(&nm_i->nat_tree_lock);
+ up_read(&nm_i->nat_tree_lock);
/* cache nat entry */
- percpu_down_write(&nm_i->nat_tree_lock);
+ down_write(&nm_i->nat_tree_lock);
cache_nat_entry(sbi, nid, &ne);
- percpu_up_write(&nm_i->nat_tree_lock);
+ up_write(&nm_i->nat_tree_lock);
}
/*
@@ -1788,7 +1789,7 @@
ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES,
META_NAT, true);
- percpu_down_read(&nm_i->nat_tree_lock);
+ down_read(&nm_i->nat_tree_lock);
while (1) {
struct page *page = get_current_nat_page(sbi, nid);
@@ -1820,7 +1821,7 @@
remove_free_nid(nm_i, nid);
}
up_read(&curseg->journal_rwsem);
- percpu_up_read(&nm_i->nat_tree_lock);
+ up_read(&nm_i->nat_tree_lock);
ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nm_i->next_scan_nid),
nm_i->ra_nid_pages, META_NAT, false);
@@ -2209,7 +2210,7 @@
if (!nm_i->dirty_nat_cnt)
return;
- percpu_down_write(&nm_i->nat_tree_lock);
+ down_write(&nm_i->nat_tree_lock);
/*
* if there are no enough space in journal to store dirty nat
@@ -2232,7 +2233,7 @@
list_for_each_entry_safe(set, tmp, &sets, set_list)
__flush_nat_entry_set(sbi, set);
- percpu_up_write(&nm_i->nat_tree_lock);
+ up_write(&nm_i->nat_tree_lock);
f2fs_bug_on(sbi, nm_i->dirty_nat_cnt);
}
@@ -2268,8 +2269,7 @@
mutex_init(&nm_i->build_lock);
spin_lock_init(&nm_i->free_nid_list_lock);
- if (percpu_init_rwsem(&nm_i->nat_tree_lock))
- return -ENOMEM;
+ init_rwsem(&nm_i->nat_tree_lock);
nm_i->next_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid);
nm_i->bitmap_size = __bitmap_size(sbi, NAT_BITMAP);
@@ -2326,7 +2326,7 @@
spin_unlock(&nm_i->free_nid_list_lock);
/* destroy nat cache */
- percpu_down_write(&nm_i->nat_tree_lock);
+ down_write(&nm_i->nat_tree_lock);
while ((found = __gang_lookup_nat_cache(nm_i,
nid, NATVEC_SIZE, natvec))) {
unsigned idx;
@@ -2351,9 +2351,8 @@
kmem_cache_free(nat_entry_set_slab, setvec[idx]);
}
}
- percpu_up_write(&nm_i->nat_tree_lock);
+ up_write(&nm_i->nat_tree_lock);
- percpu_free_rwsem(&nm_i->nat_tree_lock);
kfree(nm_i->nat_bitmap);
sbi->nm_info = NULL;
kfree(nm_i);
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 1b86d3f..7f863a6 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -706,8 +706,6 @@
percpu_counter_destroy(&sbi->nr_pages[i]);
percpu_counter_destroy(&sbi->alloc_valid_block_count);
percpu_counter_destroy(&sbi->total_valid_inode_count);
-
- percpu_free_rwsem(&sbi->cp_rwsem);
}
static void f2fs_put_super(struct super_block *sb)
@@ -1483,9 +1481,6 @@
{
int i, err;
- if (percpu_init_rwsem(&sbi->cp_rwsem))
- return -ENOMEM;
-
for (i = 0; i < NR_COUNT_TYPE; i++) {
err = percpu_counter_init(&sbi->nr_pages[i], 0, GFP_KERNEL);
if (err)
@@ -1686,6 +1681,7 @@
sbi->write_io[i].bio = NULL;
}
+ init_rwsem(&sbi->cp_rwsem);
init_waitqueue_head(&sbi->cp_wait);
init_sb_info(sbi);