[PATCH] zoned vm counters: conversion of nr_pagecache to per zone counter
Currently a single atomic variable is used to establish the size of the page
cache in the whole machine. The zoned VM counters have the same method of
implementation as the nr_pagecache code but also allow the determination of
the pagecache size per zone.
Remove the special implementation for nr_pagecache and make it a zoned counter
named NR_FILE_PAGES.
Updates of the page cache counters are always performed with interrupts off.
We can therefore use the __ variant here.
Signed-off-by: Christoph Lameter <clameter@sgi.com>
Cc: Trond Myklebust <trond.myklebust@fys.uio.no>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
diff --git a/arch/s390/appldata/appldata_mem.c b/arch/s390/appldata/appldata_mem.c
index 7915a197..180ba79 100644
--- a/arch/s390/appldata/appldata_mem.c
+++ b/arch/s390/appldata/appldata_mem.c
@@ -130,7 +130,8 @@
mem_data->totalhigh = P2K(val.totalhigh);
mem_data->freehigh = P2K(val.freehigh);
mem_data->bufferram = P2K(val.bufferram);
- mem_data->cached = P2K(atomic_read(&nr_pagecache) - val.bufferram);
+ mem_data->cached = P2K(global_page_state(NR_FILE_PAGES)
+ - val.bufferram);
si_swapinfo(&val);
mem_data->totalswap = P2K(val.totalswap);
diff --git a/arch/sparc/kernel/sys_sunos.c b/arch/sparc/kernel/sys_sunos.c
index 288de27..aa0fb2e 100644
--- a/arch/sparc/kernel/sys_sunos.c
+++ b/arch/sparc/kernel/sys_sunos.c
@@ -196,7 +196,7 @@
* simple, it hopefully works in most obvious cases.. Easy to
* fool it, but this should catch most mistakes.
*/
- freepages = get_page_cache_size();
+ freepages = global_page_state(NR_FILE_PAGES);
freepages >>= 1;
freepages += nr_free_pages();
freepages += nr_swap_pages;
diff --git a/arch/sparc64/kernel/sys_sunos32.c b/arch/sparc64/kernel/sys_sunos32.c
index ae5b32f..87ebdf8 100644
--- a/arch/sparc64/kernel/sys_sunos32.c
+++ b/arch/sparc64/kernel/sys_sunos32.c
@@ -155,7 +155,7 @@
* simple, it hopefully works in most obvious cases.. Easy to
* fool it, but this should catch most mistakes.
*/
- freepages = get_page_cache_size();
+ freepages = global_page_state(NR_FILE_PAGES);
freepages >>= 1;
freepages += nr_free_pages();
freepages += nr_swap_pages;
diff --git a/drivers/base/node.c b/drivers/base/node.c
index 8b12323..ae9e3fe 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -69,6 +69,7 @@
"Node %d LowFree: %8lu kB\n"
"Node %d Dirty: %8lu kB\n"
"Node %d Writeback: %8lu kB\n"
+ "Node %d FilePages: %8lu kB\n"
"Node %d Mapped: %8lu kB\n"
"Node %d Slab: %8lu kB\n",
nid, K(i.totalram),
@@ -82,6 +83,7 @@
nid, K(i.freeram - i.freehigh),
nid, K(ps.nr_dirty),
nid, K(ps.nr_writeback),
+ nid, K(node_page_state(nid, NR_FILE_PAGES)),
nid, K(node_page_state(nid, NR_FILE_MAPPED)),
nid, K(ps.nr_slab));
n += hugetlb_report_node_meminfo(nid, buf + n);
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index bc7d9ab..1af12fd7 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -142,7 +142,8 @@
allowed = ((totalram_pages - hugetlb_total_pages())
* sysctl_overcommit_ratio / 100) + total_swap_pages;
- cached = get_page_cache_size() - total_swapcache_pages - i.bufferram;
+ cached = global_page_state(NR_FILE_PAGES) -
+ total_swapcache_pages - i.bufferram;
if (cached < 0)
cached = 0;
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index eb42c12..08be91e 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -49,7 +49,7 @@
enum zone_stat_item {
NR_FILE_MAPPED, /* mapped into pagetables.
only modified from process context */
-
+ NR_FILE_PAGES,
NR_VM_ZONE_STAT_ITEMS };
struct per_cpu_pages {
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 1245df7..0a2f5d2 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -113,51 +113,6 @@
extern void remove_from_page_cache(struct page *page);
extern void __remove_from_page_cache(struct page *page);
-extern atomic_t nr_pagecache;
-
-#ifdef CONFIG_SMP
-
-#define PAGECACHE_ACCT_THRESHOLD max(16, NR_CPUS * 2)
-DECLARE_PER_CPU(long, nr_pagecache_local);
-
-/*
- * pagecache_acct implements approximate accounting for pagecache.
- * vm_enough_memory() do not need high accuracy. Writers will keep
- * an offset in their per-cpu arena and will spill that into the
- * global count whenever the absolute value of the local count
- * exceeds the counter's threshold.
- *
- * MUST be protected from preemption.
- * current protection is mapping->page_lock.
- */
-static inline void pagecache_acct(int count)
-{
- long *local;
-
- local = &__get_cpu_var(nr_pagecache_local);
- *local += count;
- if (*local > PAGECACHE_ACCT_THRESHOLD || *local < -PAGECACHE_ACCT_THRESHOLD) {
- atomic_add(*local, &nr_pagecache);
- *local = 0;
- }
-}
-
-#else
-
-static inline void pagecache_acct(int count)
-{
- atomic_add(count, &nr_pagecache);
-}
-#endif
-
-static inline unsigned long get_page_cache_size(void)
-{
- int ret = atomic_read(&nr_pagecache);
- if (unlikely(ret < 0))
- ret = 0;
- return ret;
-}
-
/*
* Return byte-offset into filesystem object for page.
*/
diff --git a/mm/filemap.c b/mm/filemap.c
index 648f2c0..87d62c4 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -120,7 +120,7 @@
radix_tree_delete(&mapping->page_tree, page->index);
page->mapping = NULL;
mapping->nrpages--;
- pagecache_acct(-1);
+ __dec_zone_page_state(page, NR_FILE_PAGES);
}
void remove_from_page_cache(struct page *page)
@@ -449,7 +449,7 @@
page->mapping = mapping;
page->index = offset;
mapping->nrpages++;
- pagecache_acct(1);
+ __inc_zone_page_state(page, NR_FILE_PAGES);
}
write_unlock_irq(&mapping->tree_lock);
radix_tree_preload_end();
diff --git a/mm/mmap.c b/mm/mmap.c
index 6446c61..c1868ec 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -96,7 +96,7 @@
if (sysctl_overcommit_memory == OVERCOMMIT_GUESS) {
unsigned long n;
- free = get_page_cache_size();
+ free = global_page_state(NR_FILE_PAGES);
free += nr_swap_pages;
/*
diff --git a/mm/nommu.c b/mm/nommu.c
index 029fada..5151c44 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1122,7 +1122,7 @@
if (sysctl_overcommit_memory == OVERCOMMIT_GUESS) {
unsigned long n;
- free = get_page_cache_size();
+ free = global_page_state(NR_FILE_PAGES);
free += nr_swap_pages;
/*
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 04dd2b0..8350720f 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2124,16 +2124,11 @@
unsigned long action, void *hcpu)
{
int cpu = (unsigned long)hcpu;
- long *count;
unsigned long *src, *dest;
if (action == CPU_DEAD) {
int i;
- /* Drain local pagecache count. */
- count = &per_cpu(nr_pagecache_local, cpu);
- atomic_add(*count, &nr_pagecache);
- *count = 0;
local_irq_disable();
__drain_pages(cpu);
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 7535211..fccbd9b 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -87,7 +87,7 @@
SetPageSwapCache(page);
set_page_private(page, entry.val);
total_swapcache_pages++;
- pagecache_acct(1);
+ __inc_zone_page_state(page, NR_FILE_PAGES);
}
write_unlock_irq(&swapper_space.tree_lock);
radix_tree_preload_end();
@@ -132,7 +132,7 @@
set_page_private(page, 0);
ClearPageSwapCache(page);
total_swapcache_pages--;
- pagecache_acct(-1);
+ __dec_zone_page_state(page, NR_FILE_PAGES);
INC_CACHE_INFO(del_total);
}
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 4800091..f16b33e 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -20,12 +20,6 @@
*/
DEFINE_PER_CPU(struct page_state, page_states) = {0};
-atomic_t nr_pagecache = ATOMIC_INIT(0);
-EXPORT_SYMBOL(nr_pagecache);
-#ifdef CONFIG_SMP
-DEFINE_PER_CPU(long, nr_pagecache_local) = 0;
-#endif
-
static void __get_page_state(struct page_state *ret, int nr, cpumask_t *cpumask)
{
unsigned cpu;
@@ -402,6 +396,7 @@
static char *vmstat_text[] = {
/* Zoned VM counters */
"nr_mapped",
+ "nr_file_pages",
/* Page state */
"nr_dirty",