[PATCH] Optimize off-node performance of zone reclaim
Ensure that the performance of off node pages stays the same as before.
Off node pagefault tests showed an 18% drop in performance without this
patch.
- Increase the timeout to 30 seconds to reduce the overhead.
- Move all code possible out of the off node hot path for zone reclaim
(Sorry Andrew, the struct initialization had to be sacrificed).
The read_page_state() bit us there.
- Check first for the timeout before any other checks.
Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 2e34b61..465bfa5 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1589,24 +1589,20 @@
/*
* Mininum time between zone reclaim scans
*/
-#define ZONE_RECLAIM_INTERVAL HZ/2
+#define ZONE_RECLAIM_INTERVAL 30*HZ
/*
* Try to free up some pages from this zone through reclaim.
*/
int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
{
- int nr_pages = 1 << order;
+ int nr_pages;
struct task_struct *p = current;
struct reclaim_state reclaim_state;
- struct scan_control sc = {
- .gfp_mask = gfp_mask,
- .may_writepage = 0,
- .may_swap = 0,
- .nr_mapped = read_page_state(nr_mapped),
- .nr_scanned = 0,
- .nr_reclaimed = 0,
- .priority = 0
- };
+ struct scan_control sc;
+
+ if (time_before(jiffies,
+ zone->last_unsuccessful_zone_reclaim + ZONE_RECLAIM_INTERVAL))
+ return 0;
if (!(gfp_mask & __GFP_WAIT) ||
zone->zone_pgdat->node_id != numa_node_id() ||
@@ -1614,12 +1610,17 @@
atomic_read(&zone->reclaim_in_progress) > 0)
return 0;
- if (time_before(jiffies,
- zone->last_unsuccessful_zone_reclaim + ZONE_RECLAIM_INTERVAL))
- return 0;
+ sc.may_writepage = 0;
+ sc.may_swap = 0;
+ sc.nr_scanned = 0;
+ sc.nr_reclaimed = 0;
+ sc.priority = 0;
+ sc.nr_mapped = read_page_state(nr_mapped);
+ sc.gfp_mask = gfp_mask;
disable_swap_token();
+ nr_pages = 1 << order;
if (nr_pages > SWAP_CLUSTER_MAX)
sc.swap_cluster_max = nr_pages;
else