Blame - mm/page_ext.c - SHIFTPHONES/mainline/linux

blob: df6f74aac8e155df57be7015f4eec86a91434d33 [file] [log] [blame]

Greg Kroah-Hartman	b244131	2017-11-01 15:07:57 +0100	[diff] [blame]	1	// SPDX-License-Identifier: GPL-2.0
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	2	#include <linux/mm.h>
				3	#include <linux/mmzone.h>
Mike Rapoport	57c8a66	2018-10-30 15:09:49 -0700	[diff] [blame]	4	#include <linux/memblock.h>
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	5	#include <linux/page_ext.h>
				6	#include <linux/memory.h>
				7	#include <linux/vmalloc.h>
				8	#include <linux/kmemleak.h>
Joonsoo Kim	48c96a3	2014-12-12 16:56:01 -0800	[diff] [blame]	9	#include <linux/page_owner.h>
Vladimir Davydov	33c3fc7	2015-09-09 15:35:45 -0700	[diff] [blame]	10	#include <linux/page_idle.h>
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	11
				12	/*
				13	* struct page extension
				14	*
				15	* This is the feature to manage memory for extended data per page.
				16	*
				17	* Until now, we must modify struct page itself to store extra data per page.
				18	* This requires rebuilding the kernel and it is really time consuming process.
				19	* And, sometimes, rebuild is impossible due to third party module dependency.
				20	* At last, enlarging struct page could cause un-wanted system behaviour change.
				21	*
				22	* This feature is intended to overcome above mentioned problems. This feature
				23	* allocates memory for extended data per page in certain place rather than
				24	* the struct page itself. This memory can be accessed by the accessor
				25	* functions provided by this code. During the boot process, it checks whether
				26	* allocation of huge chunk of memory is needed or not. If not, it avoids
				27	* allocating memory at all. With this advantage, we can include this feature
				28	* into the kernel in default and can avoid rebuild and solve related problems.
				29	*
				30	* To help these things to work well, there are two callbacks for clients. One
				31	* is the need callback which is mandatory if user wants to avoid useless
				32	* memory allocation at boot-time. The other is optional, init callback, which
				33	* is used to do proper initialization after memory is allocated.
				34	*
				35	* The need callback is used to decide whether extended memory allocation is
				36	* needed or not. Sometimes users want to deactivate some features in this
Haitao Shi	8958b24	2020-12-15 20:47:26 -0800	[diff] [blame]	37	* boot and extra memory would be unnecessary. In this case, to avoid
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	38	* allocating huge chunk of memory, each clients represent their need of
				39	* extra memory through the need callback. If one of the need callbacks
				40	* returns true, it means that someone needs extra memory so that
				41	* page extension core should allocates memory for page extension. If
				42	* none of need callbacks return true, memory isn't needed at all in this boot
				43	* and page extension core can skip to allocate memory. As result,
				44	* none of memory is wasted.
				45	*
Joonsoo Kim	980ac16	2016-10-07 16:58:27 -0700	[diff] [blame]	46	* When need callback returns true, page_ext checks if there is a request for
				47	* extra memory through size in struct page_ext_operations. If it is non-zero,
				48	* extra space is allocated for each page_ext entry and offset is returned to
				49	* user through offset in struct page_ext_operations.
				50	*
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	51	* The init callback is used to do proper initialization after page extension
				52	* is completely initialized. In sparse memory system, extra memory is
				53	* allocated some time later than memmap is allocated. In other words, lifetime
				54	* of memory for page extension isn't same with memmap for struct page.
				55	* Therefore, clients can't store extra data until page extension is
				56	* initialized, even if pages are allocated and used freely. This could
				57	* cause inadequate state of extra data per page, so, to prevent it, client
				58	* can utilize this callback to initialize the state of it correctly.
				59	*/
				60
				61	static struct page_ext_operations *page_ext_ops[] = {
Joonsoo Kim	48c96a3	2014-12-12 16:56:01 -0800	[diff] [blame]	62	#ifdef CONFIG_PAGE_OWNER
				63	&page_owner_ops,
				64	#endif
Vladimir Davydov	33c3fc7	2015-09-09 15:35:45 -0700	[diff] [blame]	65	#if defined(CONFIG_IDLE_PAGE_TRACKING) && !defined(CONFIG_64BIT)
				66	&page_idle_ops,
				67	#endif
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	68	};
				69
Vlastimil Babka	5556cfe	2019-10-14 14:11:40 -0700	[diff] [blame]	70	unsigned long page_ext_size = sizeof(struct page_ext);
				71
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	72	static unsigned long total_usage;
				73
				74	static bool __init invoke_need_callbacks(void)
				75	{
				76	int i;
				77	int entries = ARRAY_SIZE(page_ext_ops);
Joonsoo Kim	980ac16	2016-10-07 16:58:27 -0700	[diff] [blame]	78	bool need = false;
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	79
				80	for (i = 0; i < entries; i++) {
Joonsoo Kim	980ac16	2016-10-07 16:58:27 -0700	[diff] [blame]	81	if (page_ext_ops[i]->need && page_ext_ops[i]->need()) {
Vlastimil Babka	5556cfe	2019-10-14 14:11:40 -0700	[diff] [blame]	82	page_ext_ops[i]->offset = page_ext_size;
				83	page_ext_size += page_ext_ops[i]->size;
Joonsoo Kim	980ac16	2016-10-07 16:58:27 -0700	[diff] [blame]	84	need = true;
				85	}
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	86	}
				87
Joonsoo Kim	980ac16	2016-10-07 16:58:27 -0700	[diff] [blame]	88	return need;
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	89	}
				90
				91	static void __init invoke_init_callbacks(void)
				92	{
				93	int i;
				94	int entries = ARRAY_SIZE(page_ext_ops);
				95
				96	for (i = 0; i < entries; i++) {
				97	if (page_ext_ops[i]->init)
				98	page_ext_ops[i]->init();
				99	}
				100	}
				101
Zhenhua Huang	7fb7ab6	2020-12-14 19:04:46 -0800	[diff] [blame]	102	#ifndef CONFIG_SPARSEMEM
				103	void __init page_ext_init_flatmem_late(void)
				104	{
				105	invoke_init_callbacks();
				106	}
				107	#endif
				108
Joonsoo Kim	980ac16	2016-10-07 16:58:27 -0700	[diff] [blame]	109	static inline struct page_ext get_entry(void base, unsigned long index)
				110	{
Vlastimil Babka	5556cfe	2019-10-14 14:11:40 -0700	[diff] [blame]	111	return base + page_ext_size * index;
Joonsoo Kim	980ac16	2016-10-07 16:58:27 -0700	[diff] [blame]	112	}
				113
Zhenhua Huang	7fb7ab6	2020-12-14 19:04:46 -0800	[diff] [blame]	114	#ifndef CONFIG_SPARSEMEM
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	115
				116
				117	void __meminit pgdat_page_ext_init(struct pglist_data *pgdat)
				118	{
				119	pgdat->node_page_ext = NULL;
				120	}
				121
Kirill A. Shutemov	10ed634	2018-08-17 15:45:15 -0700	[diff] [blame]	122	struct page_ext lookup_page_ext(const struct page page)
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	123	{
				124	unsigned long pfn = page_to_pfn(page);
Joonsoo Kim	0b06bb3	2016-10-07 16:58:24 -0700	[diff] [blame]	125	unsigned long index;
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	126	struct page_ext *base;
				127
				128	base = NODE_DATA(page_to_nid(page))->node_page_ext;
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	129	/*
				130	* The sanity checks the page allocator does upon freeing a
				131	* page can reach here before the page_ext arrays are
				132	* allocated when feeding a range of pages to the allocator
				133	* for the first time during bootup or memory hotplug.
				134	*/
				135	if (unlikely(!base))
				136	return NULL;
Joonsoo Kim	0b06bb3	2016-10-07 16:58:24 -0700	[diff] [blame]	137	index = pfn - round_down(node_start_pfn(page_to_nid(page)),
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	138	MAX_ORDER_NR_PAGES);
Joonsoo Kim	980ac16	2016-10-07 16:58:27 -0700	[diff] [blame]	139	return get_entry(base, index);
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	140	}
				141
				142	static int __init alloc_node_page_ext(int nid)
				143	{
				144	struct page_ext *base;
				145	unsigned long table_size;
				146	unsigned long nr_pages;
				147
				148	nr_pages = NODE_DATA(nid)->node_spanned_pages;
				149	if (!nr_pages)
				150	return 0;
				151
				152	/*
				153	* Need extra space if node range is not aligned with
				154	* MAX_ORDER_NR_PAGES. When page allocator's buddy algorithm
				155	* checks buddy's status, range could be out of exact node range.
				156	*/
				157	if (!IS_ALIGNED(node_start_pfn(nid), MAX_ORDER_NR_PAGES) \|\|
				158	!IS_ALIGNED(node_end_pfn(nid), MAX_ORDER_NR_PAGES))
				159	nr_pages += MAX_ORDER_NR_PAGES;
				160
Vlastimil Babka	5556cfe	2019-10-14 14:11:40 -0700	[diff] [blame]	161	table_size = page_ext_size * nr_pages;
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	162
Mike Rapoport	26fb3da	2019-03-11 23:30:42 -0700	[diff] [blame]	163	base = memblock_alloc_try_nid(
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	164	table_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS),
Mike Rapoport	97ad108	2018-10-30 15:09:44 -0700	[diff] [blame]	165	MEMBLOCK_ALLOC_ACCESSIBLE, nid);
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	166	if (!base)
				167	return -ENOMEM;
				168	NODE_DATA(nid)->node_page_ext = base;
				169	total_usage += table_size;
				170	return 0;
				171	}
				172
				173	void __init page_ext_init_flatmem(void)
				174	{
				175
				176	int nid, fail;
				177
				178	if (!invoke_need_callbacks())
				179	return;
				180
				181	for_each_online_node(nid) {
				182	fail = alloc_node_page_ext(nid);
				183	if (fail)
				184	goto fail;
				185	}
				186	pr_info("allocated %ld bytes of page_ext\n", total_usage);
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	187	return;
				188
				189	fail:
				190	pr_crit("allocation of page_ext failed.\n");
				191	panic("Out of memory");
				192	}
				193
				194	#else /* CONFIG_FLAT_NODE_MEM_MAP */
				195
Kirill A. Shutemov	10ed634	2018-08-17 15:45:15 -0700	[diff] [blame]	196	struct page_ext lookup_page_ext(const struct page page)
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	197	{
				198	unsigned long pfn = page_to_pfn(page);
				199	struct mem_section *section = __pfn_to_section(pfn);
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	200	/*
				201	* The sanity checks the page allocator does upon freeing a
				202	* page can reach here before the page_ext arrays are
				203	* allocated when feeding a range of pages to the allocator
				204	* for the first time during bootup or memory hotplug.
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	205	*/
				206	if (!section->page_ext)
				207	return NULL;
Joonsoo Kim	980ac16	2016-10-07 16:58:27 -0700	[diff] [blame]	208	return get_entry(section->page_ext, pfn);
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	209	}
				210
				211	static void *__meminit alloc_page_ext(size_t size, int nid)
				212	{
				213	gfp_t flags = GFP_KERNEL \| __GFP_ZERO \| __GFP_NOWARN;
				214	void *addr = NULL;
				215
				216	addr = alloc_pages_exact_nid(nid, size, flags);
				217	if (addr) {
				218	kmemleak_alloc(addr, size, 1, flags);
				219	return addr;
				220	}
				221
Michal Hocko	b95046b	2017-09-06 16:20:41 -0700	[diff] [blame]	222	addr = vzalloc_node(size, nid);
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	223
				224	return addr;
				225	}
				226
				227	static int __meminit init_section_page_ext(unsigned long pfn, int nid)
				228	{
				229	struct mem_section *section;
				230	struct page_ext *base;
				231	unsigned long table_size;
				232
				233	section = __pfn_to_section(pfn);
				234
				235	if (section->page_ext)
				236	return 0;
				237
Vlastimil Babka	5556cfe	2019-10-14 14:11:40 -0700	[diff] [blame]	238	table_size = page_ext_size * PAGES_PER_SECTION;
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	239	base = alloc_page_ext(table_size, nid);
				240
				241	/*
				242	* The value stored in section->page_ext is (base - pfn)
				243	* and it does not point to the memory block allocated above,
				244	* causing kmemleak false positives.
				245	*/
				246	kmemleak_not_leak(base);
				247
				248	if (!base) {
				249	pr_err("page ext allocation failure\n");
				250	return -ENOMEM;
				251	}
				252
				253	/*
				254	* The passed "pfn" may not be aligned to SECTION. For the calculation
				255	* we need to apply a mask.
				256	*/
				257	pfn &= PAGE_SECTION_MASK;
Vlastimil Babka	5556cfe	2019-10-14 14:11:40 -0700	[diff] [blame]	258	section->page_ext = (void )base - page_ext_size pfn;
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	259	total_usage += table_size;
				260	return 0;
				261	}
				262	#ifdef CONFIG_MEMORY_HOTPLUG
				263	static void free_page_ext(void *addr)
				264	{
				265	if (is_vmalloc_addr(addr)) {
				266	vfree(addr);
				267	} else {
				268	struct page *page = virt_to_page(addr);
				269	size_t table_size;
				270
Vlastimil Babka	5556cfe	2019-10-14 14:11:40 -0700	[diff] [blame]	271	table_size = page_ext_size * PAGES_PER_SECTION;
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	272
				273	BUG_ON(PageReserved(page));
Qian Cai	0c81585	2019-03-05 15:49:46 -0800	[diff] [blame]	274	kmemleak_free(addr);
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	275	free_pages_exact(addr, table_size);
				276	}
				277	}
				278
				279	static void __free_page_ext(unsigned long pfn)
				280	{
				281	struct mem_section *ms;
				282	struct page_ext *base;
				283
				284	ms = __pfn_to_section(pfn);
				285	if (!ms \|\| !ms->page_ext)
				286	return;
Joonsoo Kim	980ac16	2016-10-07 16:58:27 -0700	[diff] [blame]	287	base = get_entry(ms->page_ext, pfn);
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	288	free_page_ext(base);
				289	ms->page_ext = NULL;
				290	}
				291
				292	static int __meminit online_page_ext(unsigned long start_pfn,
				293	unsigned long nr_pages,
				294	int nid)
				295	{
				296	unsigned long start, end, pfn;
				297	int fail = 0;
				298
				299	start = SECTION_ALIGN_DOWN(start_pfn);
				300	end = SECTION_ALIGN_UP(start_pfn + nr_pages);
				301
Anshuman Khandual	98fa15f	2019-03-05 15:42:58 -0800	[diff] [blame]	302	if (nid == NUMA_NO_NODE) {
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	303	/*
				304	* In this case, "nid" already exists and contains valid memory.
				305	* "start_pfn" passed to us is a pfn which is an arg for
				306	* online__pages(), and start_pfn should exist.
				307	*/
				308	nid = pfn_to_nid(start_pfn);
				309	VM_BUG_ON(!node_state(nid, N_ONLINE));
				310	}
				311
David Hildenbrand	dccacf8	2020-04-06 20:06:47 -0700	[diff] [blame]	312	for (pfn = start; !fail && pfn < end; pfn += PAGES_PER_SECTION)
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	313	fail = init_section_page_ext(pfn, nid);
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	314	if (!fail)
				315	return 0;
				316
				317	/* rollback */
				318	for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION)
				319	__free_page_ext(pfn);
				320
				321	return -ENOMEM;
				322	}
				323
				324	static int __meminit offline_page_ext(unsigned long start_pfn,
				325	unsigned long nr_pages, int nid)
				326	{
				327	unsigned long start, end, pfn;
				328
				329	start = SECTION_ALIGN_DOWN(start_pfn);
				330	end = SECTION_ALIGN_UP(start_pfn + nr_pages);
				331
				332	for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION)
				333	__free_page_ext(pfn);
				334	return 0;
				335
				336	}
				337
				338	static int __meminit page_ext_callback(struct notifier_block *self,
				339	unsigned long action, void *arg)
				340	{
				341	struct memory_notify *mn = arg;
				342	int ret = 0;
				343
				344	switch (action) {
				345	case MEM_GOING_ONLINE:
				346	ret = online_page_ext(mn->start_pfn,
				347	mn->nr_pages, mn->status_change_nid);
				348	break;
				349	case MEM_OFFLINE:
				350	offline_page_ext(mn->start_pfn,
				351	mn->nr_pages, mn->status_change_nid);
				352	break;
				353	case MEM_CANCEL_ONLINE:
				354	offline_page_ext(mn->start_pfn,
				355	mn->nr_pages, mn->status_change_nid);
				356	break;
				357	case MEM_GOING_OFFLINE:
				358	break;
				359	case MEM_ONLINE:
				360	case MEM_CANCEL_OFFLINE:
				361	break;
				362	}
				363
				364	return notifier_from_errno(ret);
				365	}
				366
				367	#endif
				368
				369	void __init page_ext_init(void)
				370	{
				371	unsigned long pfn;
				372	int nid;
				373
				374	if (!invoke_need_callbacks())
				375	return;
				376
				377	for_each_node_state(nid, N_MEMORY) {
				378	unsigned long start_pfn, end_pfn;
				379
				380	start_pfn = node_start_pfn(nid);
				381	end_pfn = node_end_pfn(nid);
				382	/*
				383	* start_pfn and end_pfn may not be aligned to SECTION and the
				384	* page->flags of out of node pages are not initialized. So we
				385	* scan [start_pfn, the biggest section's pfn < end_pfn) here.
				386	*/
				387	for (pfn = start_pfn; pfn < end_pfn;
				388	pfn = ALIGN(pfn + 1, PAGES_PER_SECTION)) {
				389
				390	if (!pfn_valid(pfn))
				391	continue;
				392	/*
				393	* Nodes's pfns can be overlapping.
				394	* We know some arch can have a nodes layout such as
				395	* -------------pfn-------------->
				396	* N0 \| N1 \| N2 \| N0 \| N1 \| N2\|....
				397	*/
Qian Cai	2f1ee09	2019-02-12 15:36:03 -0800	[diff] [blame]	398	if (pfn_to_nid(pfn) != nid)
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	399	continue;
				400	if (init_section_page_ext(pfn, nid))
				401	goto oom;
Vlastimil Babka	0fc542b	2017-09-06 16:20:48 -0700	[diff] [blame]	402	cond_resched();
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	403	}
				404	}
				405	hotplug_memory_notifier(page_ext_callback, 0);
				406	pr_info("allocated %ld bytes of page_ext\n", total_usage);
				407	invoke_init_callbacks();
				408	return;
				409
				410	oom:
				411	panic("Out of memory");
				412	}
				413
				414	void __meminit pgdat_page_ext_init(struct pglist_data *pgdat)
				415	{
				416	}
				417
				418	#endif