f2fs: throttle the memory footprint with a sysfs entry
This patch introduces ram_thresh, a sysfs entry, which controls the memory
footprint used by the free nid list and the nat cache.
Previously, the free nid list was controlled by MAX_FREE_NIDS, while the nat
cache was managed by NM_WOUT_THRESHOLD.
However, this approach cannot be applied dynamically according to the system.
So, this patch adds ram_thresh that users can specify the threshold, which is
in order of 1 / 1024.
For example, if the total ram size is 4GB and the value is set to 10 by default,
f2fs tries to control the number of free nids and nat caches not to consume over
10 * (4GB / 1024) = 10MB.
Signed-off-by: Jaegeuk Kim <jaegeuk.kim@samsung.com>
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 59fac1a..05c6524 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -242,6 +242,7 @@
block_t nat_blkaddr; /* base disk address of NAT */
nid_t max_nid; /* maximum possible node ids */
nid_t next_scan_nid; /* the next nid to be scanned */
+ unsigned int ram_thresh; /* control the memory footprint */
/* NAT cache management */
struct radix_tree_root nat_root;/* root of the nat entry cache */
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index d27e65a..fec4967 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -26,6 +26,22 @@
static struct kmem_cache *nat_entry_slab;
static struct kmem_cache *free_nid_slab;
+static inline bool available_free_memory(struct f2fs_nm_info *nm_i, int type)
+{
+ struct sysinfo val;
+ unsigned long mem_size = 0;
+
+ si_meminfo(&val);
+ if (type == FREE_NIDS)
+ mem_size = nm_i->fcnt * sizeof(struct free_nid);
+ else if (type == NAT_ENTRIES)
+ mem_size += nm_i->nat_cnt * sizeof(struct nat_entry);
+ mem_size >>= 12;
+
+ /* give 50:50 memory for free nids and nat caches respectively */
+ return (mem_size < ((val.totalram * nm_i->ram_thresh) >> 11));
+}
+
static void clear_node_page_dirty(struct page *page)
{
struct address_space *mapping = page->mapping;
@@ -208,7 +224,7 @@
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
- if (nm_i->nat_cnt <= NM_WOUT_THRESHOLD || nr_shrink <= 0)
+ if (available_free_memory(nm_i, NAT_ENTRIES) || nr_shrink <= 0)
return 0;
write_lock(&nm_i->nat_tree_lock);
@@ -1288,7 +1304,7 @@
struct nat_entry *ne;
bool allocated = false;
- if (nm_i->fcnt > 2 * MAX_FREE_NIDS)
+ if (!available_free_memory(nm_i, FREE_NIDS))
return -1;
/* 0 nid should not be used */
@@ -1473,7 +1489,7 @@
spin_lock(&nm_i->free_nid_list_lock);
i = __lookup_free_nid_list(nm_i, nid);
f2fs_bug_on(!i || i->state != NID_ALLOC);
- if (nm_i->fcnt > 2 * MAX_FREE_NIDS) {
+ if (!available_free_memory(nm_i, FREE_NIDS)) {
__del_from_free_nid_list(nm_i, i);
} else {
i->state = NID_NEW;
@@ -1836,6 +1852,7 @@
nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks - 3;
nm_i->fcnt = 0;
nm_i->nat_cnt = 0;
+ nm_i->ram_thresh = DEF_RAM_THRESHOLD;
INIT_RADIX_TREE(&nm_i->free_nid_root, GFP_ATOMIC);
INIT_LIST_HEAD(&nm_i->free_nid_list);
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index ee6d286..c972154 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -17,15 +17,15 @@
/* # of pages to perform readahead before building free nids */
#define FREE_NID_PAGES 4
-/* maximum # of free node ids to produce during build_free_nids */
-#define MAX_FREE_NIDS (NAT_ENTRY_PER_BLOCK * FREE_NID_PAGES)
-
/* maximum readahead size for node during getting data blocks */
#define MAX_RA_NODE 128
/* maximum cached nat entries to manage memory footprint */
#define NM_WOUT_THRESHOLD (64 * NAT_ENTRY_PER_BLOCK)
+/* control the memory footprint threshold (10MB per 1GB ram) */
+#define DEF_RAM_THRESHOLD 10
+
/* vector size for gang look-up from nat cache that consists of radix tree */
#define NATVEC_SIZE 64
@@ -77,6 +77,11 @@
ni->version = raw_ne->version;
}
+enum nid_type {
+ FREE_NIDS, /* indicates the free nid list */
+ NAT_ENTRIES /* indicates the cached nat entry */
+};
+
/*
* For free nid mangement
*/
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index dbe402b..34c47b2 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -74,6 +74,7 @@
enum {
GC_THREAD, /* struct f2fs_gc_thread */
SM_INFO, /* struct f2fs_sm_info */
+ NM_INFO, /* struct f2fs_nm_info */
F2FS_SBI, /* struct f2fs_sb_info */
};
@@ -92,6 +93,8 @@
return (unsigned char *)sbi->gc_thread;
else if (struct_type == SM_INFO)
return (unsigned char *)SM_I(sbi);
+ else if (struct_type == NM_INFO)
+ return (unsigned char *)NM_I(sbi);
else if (struct_type == F2FS_SBI)
return (unsigned char *)sbi;
return NULL;
@@ -183,6 +186,7 @@
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, max_small_discards, max_discards);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
+F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level);
@@ -198,6 +202,7 @@
ATTR_LIST(min_ipu_util),
ATTR_LIST(max_victim_search),
ATTR_LIST(dir_level),
+ ATTR_LIST(ram_thresh),
NULL,
};