Blame - mm/compaction.c - SHIFTPHONES/android_kernel_shift_sdm845

blob: 94cce51b0b3535af75c20f29ecb86a11aba32a71 [file] [log] [blame]

Mel Gorman	748446b	2010-05-24 14:32:27 -0700	[diff] [blame]	1	/*
				2	* linux/mm/compaction.c
				3	*
				4	* Memory compaction for the reduction of external fragmentation. Note that
				5	* this heavily depends upon page migration to do all the real heavy
				6	* lifting
				7	*
				8	* Copyright IBM Corp. 2007-2010 Mel Gorman <mel@csn.ul.ie>
				9	*/
				10	#include <linux/swap.h>
				11	#include <linux/migrate.h>
				12	#include <linux/compaction.h>
				13	#include <linux/mm_inline.h>
				14	#include <linux/backing-dev.h>
Mel Gorman	76ab0f5	2010-05-24 14:32:28 -0700	[diff] [blame]	15	#include <linux/sysctl.h>
Mel Gorman	ed4a6d7	2010-05-24 14:32:29 -0700	[diff] [blame]	16	#include <linux/sysfs.h>
Mel Gorman	748446b	2010-05-24 14:32:27 -0700	[diff] [blame]	17	#include "internal.h"
				18
				19	/*
				20	* compact_control is used to track pages being migrated and the free pages
				21	* they are being migrated to during memory compaction. The free_pfn starts
				22	* at the end of a zone and migrate_pfn begins at the start. Movable pages
				23	* are moved to the end of a zone during a compaction run and the run
				24	* completes when free_pfn <= migrate_pfn
				25	*/
				26	struct compact_control {
				27	struct list_head freepages; /* List of free pages to migrate to */
				28	struct list_head migratepages; /* List of pages being migrated */
				29	unsigned long nr_freepages; /* Number of isolated free pages */
				30	unsigned long nr_migratepages; /* Number of pages to migrate */
				31	unsigned long free_pfn; /* isolate_freepages search base */
				32	unsigned long migrate_pfn; /* isolate_migratepages search base */
				33
				34	/* Account for isolated anon and file pages */
				35	unsigned long nr_anon;
				36	unsigned long nr_file;
				37
Mel Gorman	56de726	2010-05-24 14:32:30 -0700	[diff] [blame]	38	unsigned int order; /* order a direct compactor needs */
				39	int migratetype; /* MOVABLE, RECLAIMABLE etc */
Mel Gorman	748446b	2010-05-24 14:32:27 -0700	[diff] [blame]	40	struct zone *zone;
				41	};
				42
				43	static unsigned long release_freepages(struct list_head *freelist)
				44	{
				45	struct page page, next;
				46	unsigned long count = 0;
				47
				48	list_for_each_entry_safe(page, next, freelist, lru) {
				49	list_del(&page->lru);
				50	__free_page(page);
				51	count++;
				52	}
				53
				54	return count;
				55	}
				56
				57	/* Isolate free pages onto a private freelist. Must hold zone->lock */
				58	static unsigned long isolate_freepages_block(struct zone *zone,
				59	unsigned long blockpfn,
				60	struct list_head *freelist)
				61	{
				62	unsigned long zone_end_pfn, end_pfn;
				63	int total_isolated = 0;
				64	struct page *cursor;
				65
				66	/* Get the last PFN we should scan for free pages at */
				67	zone_end_pfn = zone->zone_start_pfn + zone->spanned_pages;
				68	end_pfn = min(blockpfn + pageblock_nr_pages, zone_end_pfn);
				69
				70	/* Find the first usable PFN in the block to initialse page cursor */
				71	for (; blockpfn < end_pfn; blockpfn++) {
				72	if (pfn_valid_within(blockpfn))
				73	break;
				74	}
				75	cursor = pfn_to_page(blockpfn);
				76
				77	/* Isolate free pages. This assumes the block is valid */
				78	for (; blockpfn < end_pfn; blockpfn++, cursor++) {
				79	int isolated, i;
				80	struct page *page = cursor;
				81
				82	if (!pfn_valid_within(blockpfn))
				83	continue;
				84
				85	if (!PageBuddy(page))
				86	continue;
				87
				88	/* Found a free page, break it into order-0 pages */
				89	isolated = split_free_page(page);
				90	total_isolated += isolated;
				91	for (i = 0; i < isolated; i++) {
				92	list_add(&page->lru, freelist);
				93	page++;
				94	}
				95
				96	/* If a page was split, advance to the end of it */
				97	if (isolated) {
				98	blockpfn += isolated - 1;
				99	cursor += isolated - 1;
				100	}
				101	}
				102
				103	return total_isolated;
				104	}
				105
				106	/* Returns true if the page is within a block suitable for migration to */
				107	static bool suitable_migration_target(struct page *page)
				108	{
				109
				110	int migratetype = get_pageblock_migratetype(page);
				111
				112	/* Don't interfere with memory hot-remove or the min_free_kbytes blocks */
				113	if (migratetype == MIGRATE_ISOLATE \|\| migratetype == MIGRATE_RESERVE)
				114	return false;
				115
				116	/* If the page is a large free page, then allow migration */
				117	if (PageBuddy(page) && page_order(page) >= pageblock_order)
				118	return true;
				119
				120	/* If the block is MIGRATE_MOVABLE, allow migration */
				121	if (migratetype == MIGRATE_MOVABLE)
				122	return true;
				123
				124	/* Otherwise skip the block */
				125	return false;
				126	}
				127
				128	/*
				129	* Based on information in the current compact_control, find blocks
				130	* suitable for isolating free pages from and then isolate them.
				131	*/
				132	static void isolate_freepages(struct zone *zone,
				133	struct compact_control *cc)
				134	{
				135	struct page *page;
				136	unsigned long high_pfn, low_pfn, pfn;
				137	unsigned long flags;
				138	int nr_freepages = cc->nr_freepages;
				139	struct list_head *freelist = &cc->freepages;
				140
				141	pfn = cc->free_pfn;
				142	low_pfn = cc->migrate_pfn + pageblock_nr_pages;
				143	high_pfn = low_pfn;
				144
				145	/*
				146	* Isolate free pages until enough are available to migrate the
				147	* pages on cc->migratepages. We stop searching if the migrate
				148	* and free page scanners meet or enough free pages are isolated.
				149	*/
				150	spin_lock_irqsave(&zone->lock, flags);
				151	for (; pfn > low_pfn && cc->nr_migratepages > nr_freepages;
				152	pfn -= pageblock_nr_pages) {
				153	unsigned long isolated;
				154
				155	if (!pfn_valid(pfn))
				156	continue;
				157
				158	/*
				159	* Check for overlapping nodes/zones. It's possible on some
				160	* configurations to have a setup like
				161	* node0 node1 node0
				162	* i.e. it's possible that all pages within a zones range of
				163	* pages do not belong to a single zone.
				164	*/
				165	page = pfn_to_page(pfn);
				166	if (page_zone(page) != zone)
				167	continue;
				168
				169	/* Check the block is suitable for migration */
				170	if (!suitable_migration_target(page))
				171	continue;
				172
				173	/* Found a block suitable for isolating free pages from */
				174	isolated = isolate_freepages_block(zone, pfn, freelist);
				175	nr_freepages += isolated;
				176
				177	/*
				178	* Record the highest PFN we isolated pages from. When next
				179	* looking for free pages, the search will restart here as
				180	* page migration may have returned some pages to the allocator
				181	*/
				182	if (isolated)
				183	high_pfn = max(high_pfn, pfn);
				184	}
				185	spin_unlock_irqrestore(&zone->lock, flags);
				186
				187	/* split_free_page does not map the pages */
				188	list_for_each_entry(page, freelist, lru) {
				189	arch_alloc_page(page, 0);
				190	kernel_map_pages(page, 1, 1);
				191	}
				192
				193	cc->free_pfn = high_pfn;
				194	cc->nr_freepages = nr_freepages;
				195	}
				196
				197	/* Update the number of anon and file isolated pages in the zone */
				198	static void acct_isolated(struct zone zone, struct compact_control cc)
				199	{
				200	struct page *page;
				201	unsigned int count[NR_LRU_LISTS] = { 0, };
				202
				203	list_for_each_entry(page, &cc->migratepages, lru) {
				204	int lru = page_lru_base_type(page);
				205	count[lru]++;
				206	}
				207
				208	cc->nr_anon = count[LRU_ACTIVE_ANON] + count[LRU_INACTIVE_ANON];
				209	cc->nr_file = count[LRU_ACTIVE_FILE] + count[LRU_INACTIVE_FILE];
				210	__mod_zone_page_state(zone, NR_ISOLATED_ANON, cc->nr_anon);
				211	__mod_zone_page_state(zone, NR_ISOLATED_FILE, cc->nr_file);
				212	}
				213
				214	/* Similar to reclaim, but different enough that they don't share logic */
				215	static bool too_many_isolated(struct zone *zone)
				216	{
				217
				218	unsigned long inactive, isolated;
				219
				220	inactive = zone_page_state(zone, NR_INACTIVE_FILE) +
				221	zone_page_state(zone, NR_INACTIVE_ANON);
				222	isolated = zone_page_state(zone, NR_ISOLATED_FILE) +
				223	zone_page_state(zone, NR_ISOLATED_ANON);
				224
				225	return isolated > inactive;
				226	}
				227
				228	/*
				229	* Isolate all pages that can be migrated from the block pointed to by
				230	* the migrate scanner within compact_control.
				231	*/
				232	static unsigned long isolate_migratepages(struct zone *zone,
				233	struct compact_control *cc)
				234	{
				235	unsigned long low_pfn, end_pfn;
				236	struct list_head *migratelist = &cc->migratepages;
				237
				238	/* Do not scan outside zone boundaries */
				239	low_pfn = max(cc->migrate_pfn, zone->zone_start_pfn);
				240
				241	/* Only scan within a pageblock boundary */
				242	end_pfn = ALIGN(low_pfn + pageblock_nr_pages, pageblock_nr_pages);
				243
				244	/* Do not cross the free scanner or scan within a memory hole */
				245	if (end_pfn > cc->free_pfn \|\| !pfn_valid(low_pfn)) {
				246	cc->migrate_pfn = end_pfn;
				247	return 0;
				248	}
				249
				250	/*
				251	* Ensure that there are not too many pages isolated from the LRU
				252	* list by either parallel reclaimers or compaction. If there are,
				253	* delay for some time until fewer pages are isolated
				254	*/
				255	while (unlikely(too_many_isolated(zone))) {
				256	congestion_wait(BLK_RW_ASYNC, HZ/10);
				257
				258	if (fatal_signal_pending(current))
				259	return 0;
				260	}
				261
				262	/* Time to isolate some pages for migration */
				263	spin_lock_irq(&zone->lru_lock);
				264	for (; low_pfn < end_pfn; low_pfn++) {
				265	struct page *page;
				266	if (!pfn_valid_within(low_pfn))
				267	continue;
				268
				269	/* Get the page and skip if free */
				270	page = pfn_to_page(low_pfn);
				271	if (PageBuddy(page))
				272	continue;
				273
				274	/* Try isolate the page */
				275	if (__isolate_lru_page(page, ISOLATE_BOTH, 0) != 0)
				276	continue;
				277
				278	/* Successfully isolated */
				279	del_page_from_lru_list(zone, page, page_lru(page));
				280	list_add(&page->lru, migratelist);
				281	mem_cgroup_del_lru(page);
				282	cc->nr_migratepages++;
				283
				284	/* Avoid isolating too much */
				285	if (cc->nr_migratepages == COMPACT_CLUSTER_MAX)
				286	break;
				287	}
				288
				289	acct_isolated(zone, cc);
				290
				291	spin_unlock_irq(&zone->lru_lock);
				292	cc->migrate_pfn = low_pfn;
				293
				294	return cc->nr_migratepages;
				295	}
				296
				297	/*
				298	* This is a migrate-callback that "allocates" freepages by taking pages
				299	* from the isolated freelists in the block we are migrating to.
				300	*/
				301	static struct page compaction_alloc(struct page migratepage,
				302	unsigned long data,
				303	int **result)
				304	{
				305	struct compact_control cc = (struct compact_control )data;
				306	struct page *freepage;
				307
				308	/* Isolate free pages if necessary */
				309	if (list_empty(&cc->freepages)) {
				310	isolate_freepages(cc->zone, cc);
				311
				312	if (list_empty(&cc->freepages))
				313	return NULL;
				314	}
				315
				316	freepage = list_entry(cc->freepages.next, struct page, lru);
				317	list_del(&freepage->lru);
				318	cc->nr_freepages--;
				319
				320	return freepage;
				321	}
				322
				323	/*
				324	* We cannot control nr_migratepages and nr_freepages fully when migration is
				325	* running as migrate_pages() has no knowledge of compact_control. When
				326	* migration is complete, we count the number of pages on the lists by hand.
				327	*/
				328	static void update_nr_listpages(struct compact_control *cc)
				329	{
				330	int nr_migratepages = 0;
				331	int nr_freepages = 0;
				332	struct page *page;
				333
				334	list_for_each_entry(page, &cc->migratepages, lru)
				335	nr_migratepages++;
				336	list_for_each_entry(page, &cc->freepages, lru)
				337	nr_freepages++;
				338
				339	cc->nr_migratepages = nr_migratepages;
				340	cc->nr_freepages = nr_freepages;
				341	}
				342
				343	static int compact_finished(struct zone *zone,
				344	struct compact_control *cc)
				345	{
Mel Gorman	56de726	2010-05-24 14:32:30 -0700	[diff] [blame]	346	unsigned int order;
				347	unsigned long watermark = low_wmark_pages(zone) + (1 << cc->order);
				348
Mel Gorman	748446b	2010-05-24 14:32:27 -0700	[diff] [blame]	349	if (fatal_signal_pending(current))
				350	return COMPACT_PARTIAL;
				351
				352	/* Compaction run completes if the migrate and free scanner meet */
				353	if (cc->free_pfn <= cc->migrate_pfn)
				354	return COMPACT_COMPLETE;
				355
Mel Gorman	56de726	2010-05-24 14:32:30 -0700	[diff] [blame]	356	/* Compaction run is not finished if the watermark is not met */
				357	if (!zone_watermark_ok(zone, cc->order, watermark, 0, 0))
				358	return COMPACT_CONTINUE;
				359
				360	if (cc->order == -1)
				361	return COMPACT_CONTINUE;
				362
				363	/* Direct compactor: Is a suitable page free? */
				364	for (order = cc->order; order < MAX_ORDER; order++) {
				365	/* Job done if page is free of the right migratetype */
				366	if (!list_empty(&zone->free_area[order].free_list[cc->migratetype]))
				367	return COMPACT_PARTIAL;
				368
				369	/* Job done if allocation would set block type */
				370	if (order >= pageblock_order && zone->free_area[order].nr_free)
				371	return COMPACT_PARTIAL;
				372	}
				373
Mel Gorman	748446b	2010-05-24 14:32:27 -0700	[diff] [blame]	374	return COMPACT_CONTINUE;
				375	}
				376
				377	static int compact_zone(struct zone zone, struct compact_control cc)
				378	{
				379	int ret;
				380
				381	/* Setup to move all movable pages to the end of the zone */
				382	cc->migrate_pfn = zone->zone_start_pfn;
				383	cc->free_pfn = cc->migrate_pfn + zone->spanned_pages;
				384	cc->free_pfn &= ~(pageblock_nr_pages-1);
				385
				386	migrate_prep_local();
				387
				388	while ((ret = compact_finished(zone, cc)) == COMPACT_CONTINUE) {
				389	unsigned long nr_migrate, nr_remaining;
				390
				391	if (!isolate_migratepages(zone, cc))
				392	continue;
				393
				394	nr_migrate = cc->nr_migratepages;
				395	migrate_pages(&cc->migratepages, compaction_alloc,
				396	(unsigned long)cc, 0);
				397	update_nr_listpages(cc);
				398	nr_remaining = cc->nr_migratepages;
				399
				400	count_vm_event(COMPACTBLOCKS);
				401	count_vm_events(COMPACTPAGES, nr_migrate - nr_remaining);
				402	if (nr_remaining)
				403	count_vm_events(COMPACTPAGEFAILED, nr_remaining);
				404
				405	/* Release LRU pages not migrated */
				406	if (!list_empty(&cc->migratepages)) {
				407	putback_lru_pages(&cc->migratepages);
				408	cc->nr_migratepages = 0;
				409	}
				410
				411	}
				412
				413	/* Release free pages and check accounting */
				414	cc->nr_freepages -= release_freepages(&cc->freepages);
				415	VM_BUG_ON(cc->nr_freepages != 0);
				416
				417	return ret;
				418	}
Mel Gorman	76ab0f5	2010-05-24 14:32:28 -0700	[diff] [blame]	419
Mel Gorman	56de726	2010-05-24 14:32:30 -0700	[diff] [blame]	420	static unsigned long compact_zone_order(struct zone *zone,
				421	int order, gfp_t gfp_mask)
				422	{
				423	struct compact_control cc = {
				424	.nr_freepages = 0,
				425	.nr_migratepages = 0,
				426	.order = order,
				427	.migratetype = allocflags_to_migratetype(gfp_mask),
				428	.zone = zone,
				429	};
				430	INIT_LIST_HEAD(&cc.freepages);
				431	INIT_LIST_HEAD(&cc.migratepages);
				432
				433	return compact_zone(zone, &cc);
				434	}
				435
Mel Gorman	5e77190	2010-05-24 14:32:31 -0700	[diff] [blame^]	436	int sysctl_extfrag_threshold = 500;
				437
Mel Gorman	56de726	2010-05-24 14:32:30 -0700	[diff] [blame]	438	/**
				439	* try_to_compact_pages - Direct compact to satisfy a high-order allocation
				440	* @zonelist: The zonelist used for the current allocation
				441	* @order: The order of the current allocation
				442	* @gfp_mask: The GFP mask of the current allocation
				443	* @nodemask: The allowed nodes to allocate from
				444	*
				445	* This is the main entry point for direct page compaction.
				446	*/
				447	unsigned long try_to_compact_pages(struct zonelist *zonelist,
				448	int order, gfp_t gfp_mask, nodemask_t *nodemask)
				449	{
				450	enum zone_type high_zoneidx = gfp_zone(gfp_mask);
				451	int may_enter_fs = gfp_mask & __GFP_FS;
				452	int may_perform_io = gfp_mask & __GFP_IO;
				453	unsigned long watermark;
				454	struct zoneref *z;
				455	struct zone *zone;
				456	int rc = COMPACT_SKIPPED;
				457
				458	/*
				459	* Check whether it is worth even starting compaction. The order check is
				460	* made because an assumption is made that the page allocator can satisfy
				461	* the "cheaper" orders without taking special steps
				462	*/
				463	if (order <= PAGE_ALLOC_COSTLY_ORDER \|\| !may_enter_fs \|\| !may_perform_io)
				464	return rc;
				465
				466	count_vm_event(COMPACTSTALL);
				467
				468	/* Compact each zone in the list */
				469	for_each_zone_zonelist_nodemask(zone, z, zonelist, high_zoneidx,
				470	nodemask) {
				471	int fragindex;
				472	int status;
				473
				474	/*
				475	* Watermarks for order-0 must be met for compaction. Note
				476	* the 2UL. This is because during migration, copies of
				477	* pages need to be allocated and for a short time, the
				478	* footprint is higher
				479	*/
				480	watermark = low_wmark_pages(zone) + (2UL << order);
				481	if (!zone_watermark_ok(zone, 0, watermark, 0, 0))
				482	continue;
				483
				484	/*
				485	* fragmentation index determines if allocation failures are
				486	* due to low memory or external fragmentation
				487	*
				488	* index of -1 implies allocations might succeed depending
				489	* on watermarks
				490	* index towards 0 implies failure is due to lack of memory
				491	* index towards 1000 implies failure is due to fragmentation
				492	*
				493	* Only compact if a failure would be due to fragmentation.
				494	*/
				495	fragindex = fragmentation_index(zone, order);
Mel Gorman	5e77190	2010-05-24 14:32:31 -0700	[diff] [blame^]	496	if (fragindex >= 0 && fragindex <= sysctl_extfrag_threshold)
Mel Gorman	56de726	2010-05-24 14:32:30 -0700	[diff] [blame]	497	continue;
				498
				499	if (fragindex == -1 && zone_watermark_ok(zone, order, watermark, 0, 0)) {
				500	rc = COMPACT_PARTIAL;
				501	break;
				502	}
				503
				504	status = compact_zone_order(zone, order, gfp_mask);
				505	rc = max(status, rc);
				506
				507	if (zone_watermark_ok(zone, order, watermark, 0, 0))
				508	break;
				509	}
				510
				511	return rc;
				512	}
				513
				514
Mel Gorman	76ab0f5	2010-05-24 14:32:28 -0700	[diff] [blame]	515	/* Compact all zones within a node */
				516	static int compact_node(int nid)
				517	{
				518	int zoneid;
				519	pg_data_t *pgdat;
				520	struct zone *zone;
				521
				522	if (nid < 0 \|\| nid >= nr_node_ids \|\| !node_online(nid))
				523	return -EINVAL;
				524	pgdat = NODE_DATA(nid);
				525
				526	/* Flush pending updates to the LRU lists */
				527	lru_add_drain_all();
				528
				529	for (zoneid = 0; zoneid < MAX_NR_ZONES; zoneid++) {
				530	struct compact_control cc = {
				531	.nr_freepages = 0,
				532	.nr_migratepages = 0,
Mel Gorman	56de726	2010-05-24 14:32:30 -0700	[diff] [blame]	533	.order = -1,
Mel Gorman	76ab0f5	2010-05-24 14:32:28 -0700	[diff] [blame]	534	};
				535
				536	zone = &pgdat->node_zones[zoneid];
				537	if (!populated_zone(zone))
				538	continue;
				539
				540	cc.zone = zone;
				541	INIT_LIST_HEAD(&cc.freepages);
				542	INIT_LIST_HEAD(&cc.migratepages);
				543
				544	compact_zone(zone, &cc);
				545
				546	VM_BUG_ON(!list_empty(&cc.freepages));
				547	VM_BUG_ON(!list_empty(&cc.migratepages));
				548	}
				549
				550	return 0;
				551	}
				552
				553	/* Compact all nodes in the system */
				554	static int compact_nodes(void)
				555	{
				556	int nid;
				557
				558	for_each_online_node(nid)
				559	compact_node(nid);
				560
				561	return COMPACT_COMPLETE;
				562	}
				563
				564	/* The written value is actually unused, all memory is compacted */
				565	int sysctl_compact_memory;
				566
				567	/* This is the entry point for compacting all nodes via /proc/sys/vm */
				568	int sysctl_compaction_handler(struct ctl_table *table, int write,
				569	void __user buffer, size_t length, loff_t *ppos)
				570	{
				571	if (write)
				572	return compact_nodes();
				573
				574	return 0;
				575	}
Mel Gorman	ed4a6d7	2010-05-24 14:32:29 -0700	[diff] [blame]	576
Mel Gorman	5e77190	2010-05-24 14:32:31 -0700	[diff] [blame^]	577	int sysctl_extfrag_handler(struct ctl_table *table, int write,
				578	void __user buffer, size_t length, loff_t *ppos)
				579	{
				580	proc_dointvec_minmax(table, write, buffer, length, ppos);
				581
				582	return 0;
				583	}
				584
Mel Gorman	ed4a6d7	2010-05-24 14:32:29 -0700	[diff] [blame]	585	#if defined(CONFIG_SYSFS) && defined(CONFIG_NUMA)
				586	ssize_t sysfs_compact_node(struct sys_device *dev,
				587	struct sysdev_attribute *attr,
				588	const char *buf, size_t count)
				589	{
				590	compact_node(dev->id);
				591
				592	return count;
				593	}
				594	static SYSDEV_ATTR(compact, S_IWUSR, NULL, sysfs_compact_node);
				595
				596	int compaction_register_node(struct node *node)
				597	{
				598	return sysdev_create_file(&node->sysdev, &attr_compact);
				599	}
				600
				601	void compaction_unregister_node(struct node *node)
				602	{
				603	return sysdev_remove_file(&node->sysdev, &attr_compact);
				604	}
				605	#endif /* CONFIG_SYSFS && CONFIG_NUMA */