Blame - mm/page_ext.c - SHIFTPHONES/mainline/linux

blob: a3616f7a0e9e923c2add3a8459c8b2a82da524f7 [file] [log] [blame]

Greg Kroah-Hartman	b244131	2017-11-01 15:07:57 +0100	[diff] [blame]	1	// SPDX-License-Identifier: GPL-2.0
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	2	#include <linux/mm.h>
				3	#include <linux/mmzone.h>
Mike Rapoport	57c8a66	2018-10-30 15:09:49 -0700	[diff] [blame]	4	#include <linux/memblock.h>
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	5	#include <linux/page_ext.h>
				6	#include <linux/memory.h>
				7	#include <linux/vmalloc.h>
				8	#include <linux/kmemleak.h>
Joonsoo Kim	48c96a3	2014-12-12 16:56:01 -0800	[diff] [blame]	9	#include <linux/page_owner.h>
Vladimir Davydov	33c3fc7	2015-09-09 15:35:45 -0700	[diff] [blame]	10	#include <linux/page_idle.h>
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	11
				12	/*
				13	* struct page extension
				14	*
				15	* This is the feature to manage memory for extended data per page.
				16	*
				17	* Until now, we must modify struct page itself to store extra data per page.
				18	* This requires rebuilding the kernel and it is really time consuming process.
				19	* And, sometimes, rebuild is impossible due to third party module dependency.
				20	* At last, enlarging struct page could cause un-wanted system behaviour change.
				21	*
				22	* This feature is intended to overcome above mentioned problems. This feature
				23	* allocates memory for extended data per page in certain place rather than
				24	* the struct page itself. This memory can be accessed by the accessor
				25	* functions provided by this code. During the boot process, it checks whether
				26	* allocation of huge chunk of memory is needed or not. If not, it avoids
				27	* allocating memory at all. With this advantage, we can include this feature
				28	* into the kernel in default and can avoid rebuild and solve related problems.
				29	*
				30	* To help these things to work well, there are two callbacks for clients. One
				31	* is the need callback which is mandatory if user wants to avoid useless
				32	* memory allocation at boot-time. The other is optional, init callback, which
				33	* is used to do proper initialization after memory is allocated.
				34	*
				35	* The need callback is used to decide whether extended memory allocation is
				36	* needed or not. Sometimes users want to deactivate some features in this
				37	* boot and extra memory would be unneccessary. In this case, to avoid
				38	* allocating huge chunk of memory, each clients represent their need of
				39	* extra memory through the need callback. If one of the need callbacks
				40	* returns true, it means that someone needs extra memory so that
				41	* page extension core should allocates memory for page extension. If
				42	* none of need callbacks return true, memory isn't needed at all in this boot
				43	* and page extension core can skip to allocate memory. As result,
				44	* none of memory is wasted.
				45	*
Joonsoo Kim	980ac16	2016-10-07 16:58:27 -0700	[diff] [blame]	46	* When need callback returns true, page_ext checks if there is a request for
				47	* extra memory through size in struct page_ext_operations. If it is non-zero,
				48	* extra space is allocated for each page_ext entry and offset is returned to
				49	* user through offset in struct page_ext_operations.
				50	*
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	51	* The init callback is used to do proper initialization after page extension
				52	* is completely initialized. In sparse memory system, extra memory is
				53	* allocated some time later than memmap is allocated. In other words, lifetime
				54	* of memory for page extension isn't same with memmap for struct page.
				55	* Therefore, clients can't store extra data until page extension is
				56	* initialized, even if pages are allocated and used freely. This could
				57	* cause inadequate state of extra data per page, so, to prevent it, client
				58	* can utilize this callback to initialize the state of it correctly.
				59	*/
				60
				61	static struct page_ext_operations *page_ext_ops[] = {
Joonsoo Kim	48c96a3	2014-12-12 16:56:01 -0800	[diff] [blame]	62	#ifdef CONFIG_PAGE_OWNER
				63	&page_owner_ops,
				64	#endif
Vladimir Davydov	33c3fc7	2015-09-09 15:35:45 -0700	[diff] [blame]	65	#if defined(CONFIG_IDLE_PAGE_TRACKING) && !defined(CONFIG_64BIT)
				66	&page_idle_ops,
				67	#endif
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	68	};
				69
Vlastimil Babka	5556cfe	2019-10-14 14:11:40 -0700	[diff] [blame]	70	unsigned long page_ext_size = sizeof(struct page_ext);
				71
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	72	static unsigned long total_usage;
				73
				74	static bool __init invoke_need_callbacks(void)
				75	{
				76	int i;
				77	int entries = ARRAY_SIZE(page_ext_ops);
Joonsoo Kim	980ac16	2016-10-07 16:58:27 -0700	[diff] [blame]	78	bool need = false;
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	79
				80	for (i = 0; i < entries; i++) {
Joonsoo Kim	980ac16	2016-10-07 16:58:27 -0700	[diff] [blame]	81	if (page_ext_ops[i]->need && page_ext_ops[i]->need()) {
Vlastimil Babka	5556cfe	2019-10-14 14:11:40 -0700	[diff] [blame]	82	page_ext_ops[i]->offset = page_ext_size;
				83	page_ext_size += page_ext_ops[i]->size;
Joonsoo Kim	980ac16	2016-10-07 16:58:27 -0700	[diff] [blame]	84	need = true;
				85	}
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	86	}
				87
Joonsoo Kim	980ac16	2016-10-07 16:58:27 -0700	[diff] [blame]	88	return need;
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	89	}
				90
				91	static void __init invoke_init_callbacks(void)
				92	{
				93	int i;
				94	int entries = ARRAY_SIZE(page_ext_ops);
				95
				96	for (i = 0; i < entries; i++) {
				97	if (page_ext_ops[i]->init)
				98	page_ext_ops[i]->init();
				99	}
				100	}
				101
Joonsoo Kim	980ac16	2016-10-07 16:58:27 -0700	[diff] [blame]	102	static inline struct page_ext get_entry(void base, unsigned long index)
				103	{
Vlastimil Babka	5556cfe	2019-10-14 14:11:40 -0700	[diff] [blame]	104	return base + page_ext_size * index;
Joonsoo Kim	980ac16	2016-10-07 16:58:27 -0700	[diff] [blame]	105	}
				106
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	107	#if !defined(CONFIG_SPARSEMEM)
				108
				109
				110	void __meminit pgdat_page_ext_init(struct pglist_data *pgdat)
				111	{
				112	pgdat->node_page_ext = NULL;
				113	}
				114
Kirill A. Shutemov	10ed634	2018-08-17 15:45:15 -0700	[diff] [blame]	115	struct page_ext lookup_page_ext(const struct page page)
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	116	{
				117	unsigned long pfn = page_to_pfn(page);
Joonsoo Kim	0b06bb3	2016-10-07 16:58:24 -0700	[diff] [blame]	118	unsigned long index;
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	119	struct page_ext *base;
				120
				121	base = NODE_DATA(page_to_nid(page))->node_page_ext;
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	122	/*
				123	* The sanity checks the page allocator does upon freeing a
				124	* page can reach here before the page_ext arrays are
				125	* allocated when feeding a range of pages to the allocator
				126	* for the first time during bootup or memory hotplug.
				127	*/
				128	if (unlikely(!base))
				129	return NULL;
Joonsoo Kim	0b06bb3	2016-10-07 16:58:24 -0700	[diff] [blame]	130	index = pfn - round_down(node_start_pfn(page_to_nid(page)),
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	131	MAX_ORDER_NR_PAGES);
Joonsoo Kim	980ac16	2016-10-07 16:58:27 -0700	[diff] [blame]	132	return get_entry(base, index);
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	133	}
				134
				135	static int __init alloc_node_page_ext(int nid)
				136	{
				137	struct page_ext *base;
				138	unsigned long table_size;
				139	unsigned long nr_pages;
				140
				141	nr_pages = NODE_DATA(nid)->node_spanned_pages;
				142	if (!nr_pages)
				143	return 0;
				144
				145	/*
				146	* Need extra space if node range is not aligned with
				147	* MAX_ORDER_NR_PAGES. When page allocator's buddy algorithm
				148	* checks buddy's status, range could be out of exact node range.
				149	*/
				150	if (!IS_ALIGNED(node_start_pfn(nid), MAX_ORDER_NR_PAGES) \|\|
				151	!IS_ALIGNED(node_end_pfn(nid), MAX_ORDER_NR_PAGES))
				152	nr_pages += MAX_ORDER_NR_PAGES;
				153
Vlastimil Babka	5556cfe	2019-10-14 14:11:40 -0700	[diff] [blame]	154	table_size = page_ext_size * nr_pages;
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	155
Mike Rapoport	26fb3da	2019-03-11 23:30:42 -0700	[diff] [blame]	156	base = memblock_alloc_try_nid(
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	157	table_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS),
Mike Rapoport	97ad108	2018-10-30 15:09:44 -0700	[diff] [blame]	158	MEMBLOCK_ALLOC_ACCESSIBLE, nid);
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	159	if (!base)
				160	return -ENOMEM;
				161	NODE_DATA(nid)->node_page_ext = base;
				162	total_usage += table_size;
				163	return 0;
				164	}
				165
				166	void __init page_ext_init_flatmem(void)
				167	{
				168
				169	int nid, fail;
				170
				171	if (!invoke_need_callbacks())
				172	return;
				173
				174	for_each_online_node(nid) {
				175	fail = alloc_node_page_ext(nid);
				176	if (fail)
				177	goto fail;
				178	}
				179	pr_info("allocated %ld bytes of page_ext\n", total_usage);
				180	invoke_init_callbacks();
				181	return;
				182
				183	fail:
				184	pr_crit("allocation of page_ext failed.\n");
				185	panic("Out of memory");
				186	}
				187
				188	#else /* CONFIG_FLAT_NODE_MEM_MAP */
				189
Kirill A. Shutemov	10ed634	2018-08-17 15:45:15 -0700	[diff] [blame]	190	struct page_ext lookup_page_ext(const struct page page)
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	191	{
				192	unsigned long pfn = page_to_pfn(page);
				193	struct mem_section *section = __pfn_to_section(pfn);
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	194	/*
				195	* The sanity checks the page allocator does upon freeing a
				196	* page can reach here before the page_ext arrays are
				197	* allocated when feeding a range of pages to the allocator
				198	* for the first time during bootup or memory hotplug.
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	199	*/
				200	if (!section->page_ext)
				201	return NULL;
Joonsoo Kim	980ac16	2016-10-07 16:58:27 -0700	[diff] [blame]	202	return get_entry(section->page_ext, pfn);
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	203	}
				204
				205	static void *__meminit alloc_page_ext(size_t size, int nid)
				206	{
				207	gfp_t flags = GFP_KERNEL \| __GFP_ZERO \| __GFP_NOWARN;
				208	void *addr = NULL;
				209
				210	addr = alloc_pages_exact_nid(nid, size, flags);
				211	if (addr) {
				212	kmemleak_alloc(addr, size, 1, flags);
				213	return addr;
				214	}
				215
Michal Hocko	b95046b	2017-09-06 16:20:41 -0700	[diff] [blame]	216	addr = vzalloc_node(size, nid);
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	217
				218	return addr;
				219	}
				220
				221	static int __meminit init_section_page_ext(unsigned long pfn, int nid)
				222	{
				223	struct mem_section *section;
				224	struct page_ext *base;
				225	unsigned long table_size;
				226
				227	section = __pfn_to_section(pfn);
				228
				229	if (section->page_ext)
				230	return 0;
				231
Vlastimil Babka	5556cfe	2019-10-14 14:11:40 -0700	[diff] [blame]	232	table_size = page_ext_size * PAGES_PER_SECTION;
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	233	base = alloc_page_ext(table_size, nid);
				234
				235	/*
				236	* The value stored in section->page_ext is (base - pfn)
				237	* and it does not point to the memory block allocated above,
				238	* causing kmemleak false positives.
				239	*/
				240	kmemleak_not_leak(base);
				241
				242	if (!base) {
				243	pr_err("page ext allocation failure\n");
				244	return -ENOMEM;
				245	}
				246
				247	/*
				248	* The passed "pfn" may not be aligned to SECTION. For the calculation
				249	* we need to apply a mask.
				250	*/
				251	pfn &= PAGE_SECTION_MASK;
Vlastimil Babka	5556cfe	2019-10-14 14:11:40 -0700	[diff] [blame]	252	section->page_ext = (void )base - page_ext_size pfn;
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	253	total_usage += table_size;
				254	return 0;
				255	}
				256	#ifdef CONFIG_MEMORY_HOTPLUG
				257	static void free_page_ext(void *addr)
				258	{
				259	if (is_vmalloc_addr(addr)) {
				260	vfree(addr);
				261	} else {
				262	struct page *page = virt_to_page(addr);
				263	size_t table_size;
				264
Vlastimil Babka	5556cfe	2019-10-14 14:11:40 -0700	[diff] [blame]	265	table_size = page_ext_size * PAGES_PER_SECTION;
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	266
				267	BUG_ON(PageReserved(page));
Qian Cai	0c81585	2019-03-05 15:49:46 -0800	[diff] [blame]	268	kmemleak_free(addr);
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	269	free_pages_exact(addr, table_size);
				270	}
				271	}
				272
				273	static void __free_page_ext(unsigned long pfn)
				274	{
				275	struct mem_section *ms;
				276	struct page_ext *base;
				277
				278	ms = __pfn_to_section(pfn);
				279	if (!ms \|\| !ms->page_ext)
				280	return;
Joonsoo Kim	980ac16	2016-10-07 16:58:27 -0700	[diff] [blame]	281	base = get_entry(ms->page_ext, pfn);
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	282	free_page_ext(base);
				283	ms->page_ext = NULL;
				284	}
				285
				286	static int __meminit online_page_ext(unsigned long start_pfn,
				287	unsigned long nr_pages,
				288	int nid)
				289	{
				290	unsigned long start, end, pfn;
				291	int fail = 0;
				292
				293	start = SECTION_ALIGN_DOWN(start_pfn);
				294	end = SECTION_ALIGN_UP(start_pfn + nr_pages);
				295
Anshuman Khandual	98fa15f	2019-03-05 15:42:58 -0800	[diff] [blame]	296	if (nid == NUMA_NO_NODE) {
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	297	/*
				298	* In this case, "nid" already exists and contains valid memory.
				299	* "start_pfn" passed to us is a pfn which is an arg for
				300	* online__pages(), and start_pfn should exist.
				301	*/
				302	nid = pfn_to_nid(start_pfn);
				303	VM_BUG_ON(!node_state(nid, N_ONLINE));
				304	}
				305
David Hildenbrand	dccacf8	2020-04-06 20:06:47 -0700	[diff] [blame^]	306	for (pfn = start; !fail && pfn < end; pfn += PAGES_PER_SECTION)
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	307	fail = init_section_page_ext(pfn, nid);
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	308	if (!fail)
				309	return 0;
				310
				311	/* rollback */
				312	for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION)
				313	__free_page_ext(pfn);
				314
				315	return -ENOMEM;
				316	}
				317
				318	static int __meminit offline_page_ext(unsigned long start_pfn,
				319	unsigned long nr_pages, int nid)
				320	{
				321	unsigned long start, end, pfn;
				322
				323	start = SECTION_ALIGN_DOWN(start_pfn);
				324	end = SECTION_ALIGN_UP(start_pfn + nr_pages);
				325
				326	for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION)
				327	__free_page_ext(pfn);
				328	return 0;
				329
				330	}
				331
				332	static int __meminit page_ext_callback(struct notifier_block *self,
				333	unsigned long action, void *arg)
				334	{
				335	struct memory_notify *mn = arg;
				336	int ret = 0;
				337
				338	switch (action) {
				339	case MEM_GOING_ONLINE:
				340	ret = online_page_ext(mn->start_pfn,
				341	mn->nr_pages, mn->status_change_nid);
				342	break;
				343	case MEM_OFFLINE:
				344	offline_page_ext(mn->start_pfn,
				345	mn->nr_pages, mn->status_change_nid);
				346	break;
				347	case MEM_CANCEL_ONLINE:
				348	offline_page_ext(mn->start_pfn,
				349	mn->nr_pages, mn->status_change_nid);
				350	break;
				351	case MEM_GOING_OFFLINE:
				352	break;
				353	case MEM_ONLINE:
				354	case MEM_CANCEL_OFFLINE:
				355	break;
				356	}
				357
				358	return notifier_from_errno(ret);
				359	}
				360
				361	#endif
				362
				363	void __init page_ext_init(void)
				364	{
				365	unsigned long pfn;
				366	int nid;
				367
				368	if (!invoke_need_callbacks())
				369	return;
				370
				371	for_each_node_state(nid, N_MEMORY) {
				372	unsigned long start_pfn, end_pfn;
				373
				374	start_pfn = node_start_pfn(nid);
				375	end_pfn = node_end_pfn(nid);
				376	/*
				377	* start_pfn and end_pfn may not be aligned to SECTION and the
				378	* page->flags of out of node pages are not initialized. So we
				379	* scan [start_pfn, the biggest section's pfn < end_pfn) here.
				380	*/
				381	for (pfn = start_pfn; pfn < end_pfn;
				382	pfn = ALIGN(pfn + 1, PAGES_PER_SECTION)) {
				383
				384	if (!pfn_valid(pfn))
				385	continue;
				386	/*
				387	* Nodes's pfns can be overlapping.
				388	* We know some arch can have a nodes layout such as
				389	* -------------pfn-------------->
				390	* N0 \| N1 \| N2 \| N0 \| N1 \| N2\|....
				391	*/
Qian Cai	2f1ee09	2019-02-12 15:36:03 -0800	[diff] [blame]	392	if (pfn_to_nid(pfn) != nid)
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	393	continue;
				394	if (init_section_page_ext(pfn, nid))
				395	goto oom;
Vlastimil Babka	0fc542b	2017-09-06 16:20:48 -0700	[diff] [blame]	396	cond_resched();
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	397	}
				398	}
				399	hotplug_memory_notifier(page_ext_callback, 0);
				400	pr_info("allocated %ld bytes of page_ext\n", total_usage);
				401	invoke_init_callbacks();
				402	return;
				403
				404	oom:
				405	panic("Out of memory");
				406	}
				407
				408	void __meminit pgdat_page_ext_init(struct pglist_data *pgdat)
				409	{
				410	}
				411
				412	#endif