mm: kswapd: use the order that kswapd was reclaiming at for sleeping_prematurely() Before kswapd goes to sleep, it uses sleeping_prematurely() to check if there was a race pushing a zone below its watermark. If the race happened, it stays awake. However, balance_pgdat() can decide to reclaim at order-0 if it decides that high-order reclaim is not working as expected. This information is not passed back to sleeping_prematurely(). The impact is that kswapd remains awake reclaiming pages long after it should have gone to sleep. This patch passes the adjusted order to sleeping_prematurely and uses the same logic as balance_pgdat to decide if it's ok to go to sleep. Signed-off-by: Mel Gorman <mel@csn.ul.ie> Reviewed-by: Minchan Kim <minchan.kim@gmail.com> Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Reviewed-by: Eric B Munson <emunson@mgebm.net> Cc: Simon Kirby <sim@hostway.ca> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Cc: Shaohua Li <shaohua.li@intel.com> Cc: Dave Hansen <dave@linux.vnet.ibm.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Rik van Riel <riel@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

commit: 0abdee2bd4118366c62349a304f81537be69af33 [log] [tgz]
author: Mel Gorman <mel@csn.ul.ie> Thu Jan 13 15:46:22 2011 -0800
committer: Linus Torvalds <torvalds@linux-foundation.org> Thu Jan 13 17:32:37 2011 -0800
tree: c013abd2dd49b3837d033eb4d32dfb57984d273e
parent: 1741c87757448cedd03224f01586504f9256415d [diff] [blame]
diff --git a/mm/vmscan.c b/mm/vmscan.c
index d348882..46711f0 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c

@@ -2227,7 +2227,7 @@
 }
 
 /* is kswapd sleeping prematurely? */
-static int sleeping_prematurely(pg_data_t *pgdat, int order, long remaining)
+static bool sleeping_prematurely(pg_data_t *pgdat, int order, long remaining)
 {
 	int i;
 	unsigned long balanced = 0;
@@ -2237,7 +2237,7 @@
 	if (remaining)
 		return 1;
 
-	/* If after HZ/10, a zone is below the high mark, it's premature */
+	/* Check the watermark levels */
 	for (i = 0; i < pgdat->nr_zones; i++) {
 		struct zone *zone = pgdat->node_zones + i;
 
@@ -2269,7 +2269,7 @@
  * For kswapd, balance_pgdat() will work across all this node's zones until
  * they are all at high_wmark_pages(zone).
  *
- * Returns the number of pages which were actually freed.
+ * Returns the final order kswapd was reclaiming at
  *
  * There is special handling here for zones which are full of pinned pages.
  * This can happen if the pages are all mlocked, or if they are all used by
@@ -2532,7 +2532,13 @@
 		}
 	}
 
-	return sc.nr_reclaimed;
+	/*
+	 * Return the order we were reclaiming at so sleeping_prematurely()
+	 * makes a decision on the order we were last reclaiming at. However,
+	 * if another caller entered the allocator slow path while kswapd
+	 * was awake, order will remain at the higher level
+	 */
+	return order;
 }
 
 static void kswapd_try_to_sleep(pg_data_t *pgdat, int order)
@@ -2659,7 +2665,7 @@
 		 */
 		if (!ret) {
 			trace_mm_vmscan_kswapd_wake(pgdat->node_id, order);
-			balance_pgdat(pgdat, order, classzone_idx);
+			order = balance_pgdat(pgdat, order, classzone_idx);
 		}
 	}
 	return 0;
commit	0abdee2bd4118366c62349a304f81537be69af33	[log] [tgz]
author	Mel Gorman <mel@csn.ul.ie>	Thu Jan 13 15:46:22 2011 -0800
committer	Linus Torvalds <torvalds@linux-foundation.org>	Thu Jan 13 17:32:37 2011 -0800
tree	c013abd2dd49b3837d033eb4d32dfb57984d273e
parent	1741c87757448cedd03224f01586504f9256415d [diff] [blame]