Blame - mm/page_ext.c - SHIFTPHONES/mainline/linux

blob: 2e66d934d63f2f25fce2c6a5c32d67e3c58d8608 [file] [log] [blame]

Greg Kroah-Hartman	b244131	2017-11-01 15:07:57 +0100	[diff] [blame]	1	// SPDX-License-Identifier: GPL-2.0
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	2	#include <linux/mm.h>
				3	#include <linux/mmzone.h>
Mike Rapoport	57c8a66	2018-10-30 15:09:49 -0700	[diff] [blame]	4	#include <linux/memblock.h>
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	5	#include <linux/page_ext.h>
				6	#include <linux/memory.h>
				7	#include <linux/vmalloc.h>
				8	#include <linux/kmemleak.h>
Joonsoo Kim	48c96a3	2014-12-12 16:56:01 -0800	[diff] [blame]	9	#include <linux/page_owner.h>
Vladimir Davydov	33c3fc7	2015-09-09 15:35:45 -0700	[diff] [blame]	10	#include <linux/page_idle.h>
Pasha Tatashin	df4e817	2022-01-14 14:06:37 -0800	[diff] [blame]	11	#include <linux/page_table_check.h>
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	12
				13	/*
				14	* struct page extension
				15	*
				16	* This is the feature to manage memory for extended data per page.
				17	*
				18	* Until now, we must modify struct page itself to store extra data per page.
				19	* This requires rebuilding the kernel and it is really time consuming process.
				20	* And, sometimes, rebuild is impossible due to third party module dependency.
				21	* At last, enlarging struct page could cause un-wanted system behaviour change.
				22	*
				23	* This feature is intended to overcome above mentioned problems. This feature
				24	* allocates memory for extended data per page in certain place rather than
				25	* the struct page itself. This memory can be accessed by the accessor
				26	* functions provided by this code. During the boot process, it checks whether
				27	* allocation of huge chunk of memory is needed or not. If not, it avoids
				28	* allocating memory at all. With this advantage, we can include this feature
				29	* into the kernel in default and can avoid rebuild and solve related problems.
				30	*
				31	* To help these things to work well, there are two callbacks for clients. One
				32	* is the need callback which is mandatory if user wants to avoid useless
				33	* memory allocation at boot-time. The other is optional, init callback, which
				34	* is used to do proper initialization after memory is allocated.
				35	*
				36	* The need callback is used to decide whether extended memory allocation is
				37	* needed or not. Sometimes users want to deactivate some features in this
Haitao Shi	8958b24	2020-12-15 20:47:26 -0800	[diff] [blame]	38	* boot and extra memory would be unnecessary. In this case, to avoid
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	39	* allocating huge chunk of memory, each clients represent their need of
				40	* extra memory through the need callback. If one of the need callbacks
				41	* returns true, it means that someone needs extra memory so that
				42	* page extension core should allocates memory for page extension. If
				43	* none of need callbacks return true, memory isn't needed at all in this boot
				44	* and page extension core can skip to allocate memory. As result,
				45	* none of memory is wasted.
				46	*
Joonsoo Kim	980ac16	2016-10-07 16:58:27 -0700	[diff] [blame]	47	* When need callback returns true, page_ext checks if there is a request for
				48	* extra memory through size in struct page_ext_operations. If it is non-zero,
				49	* extra space is allocated for each page_ext entry and offset is returned to
				50	* user through offset in struct page_ext_operations.
				51	*
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	52	* The init callback is used to do proper initialization after page extension
				53	* is completely initialized. In sparse memory system, extra memory is
				54	* allocated some time later than memmap is allocated. In other words, lifetime
				55	* of memory for page extension isn't same with memmap for struct page.
				56	* Therefore, clients can't store extra data until page extension is
				57	* initialized, even if pages are allocated and used freely. This could
				58	* cause inadequate state of extra data per page, so, to prevent it, client
				59	* can utilize this callback to initialize the state of it correctly.
				60	*/
				61
SeongJae Park	1c676e0	2021-09-07 19:56:40 -0700	[diff] [blame]	62	#if defined(CONFIG_PAGE_IDLE_FLAG) && !defined(CONFIG_64BIT)
				63	static bool need_page_idle(void)
				64	{
				65	return true;
				66	}
Ting Liu	cab0a7c	2022-01-14 14:09:28 -0800	[diff] [blame]	67	static struct page_ext_operations page_idle_ops __initdata = {
SeongJae Park	1c676e0	2021-09-07 19:56:40 -0700	[diff] [blame]	68	.need = need_page_idle,
				69	};
				70	#endif
				71
Ting Liu	cab0a7c	2022-01-14 14:09:28 -0800	[diff] [blame]	72	static struct page_ext_operations *page_ext_ops[] __initdata = {
Joonsoo Kim	48c96a3	2014-12-12 16:56:01 -0800	[diff] [blame]	73	#ifdef CONFIG_PAGE_OWNER
				74	&page_owner_ops,
				75	#endif
SeongJae Park	1c676e0	2021-09-07 19:56:40 -0700	[diff] [blame]	76	#if defined(CONFIG_PAGE_IDLE_FLAG) && !defined(CONFIG_64BIT)
Vladimir Davydov	33c3fc7	2015-09-09 15:35:45 -0700	[diff] [blame]	77	&page_idle_ops,
				78	#endif
Pasha Tatashin	df4e817	2022-01-14 14:06:37 -0800	[diff] [blame]	79	#ifdef CONFIG_PAGE_TABLE_CHECK
				80	&page_table_check_ops,
				81	#endif
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	82	};
				83
Vlastimil Babka	5556cfe	2019-10-14 14:11:40 -0700	[diff] [blame]	84	unsigned long page_ext_size = sizeof(struct page_ext);
				85
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	86	static unsigned long total_usage;
				87
				88	static bool __init invoke_need_callbacks(void)
				89	{
				90	int i;
				91	int entries = ARRAY_SIZE(page_ext_ops);
Joonsoo Kim	980ac16	2016-10-07 16:58:27 -0700	[diff] [blame]	92	bool need = false;
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	93
				94	for (i = 0; i < entries; i++) {
Joonsoo Kim	980ac16	2016-10-07 16:58:27 -0700	[diff] [blame]	95	if (page_ext_ops[i]->need && page_ext_ops[i]->need()) {
Vlastimil Babka	5556cfe	2019-10-14 14:11:40 -0700	[diff] [blame]	96	page_ext_ops[i]->offset = page_ext_size;
				97	page_ext_size += page_ext_ops[i]->size;
Joonsoo Kim	980ac16	2016-10-07 16:58:27 -0700	[diff] [blame]	98	need = true;
				99	}
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	100	}
				101
Joonsoo Kim	980ac16	2016-10-07 16:58:27 -0700	[diff] [blame]	102	return need;
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	103	}
				104
				105	static void __init invoke_init_callbacks(void)
				106	{
				107	int i;
				108	int entries = ARRAY_SIZE(page_ext_ops);
				109
				110	for (i = 0; i < entries; i++) {
				111	if (page_ext_ops[i]->init)
				112	page_ext_ops[i]->init();
				113	}
				114	}
				115
Zhenhua Huang	7fb7ab6	2020-12-14 19:04:46 -0800	[diff] [blame]	116	#ifndef CONFIG_SPARSEMEM
				117	void __init page_ext_init_flatmem_late(void)
				118	{
				119	invoke_init_callbacks();
				120	}
				121	#endif
				122
Joonsoo Kim	980ac16	2016-10-07 16:58:27 -0700	[diff] [blame]	123	static inline struct page_ext get_entry(void base, unsigned long index)
				124	{
Vlastimil Babka	5556cfe	2019-10-14 14:11:40 -0700	[diff] [blame]	125	return base + page_ext_size * index;
Joonsoo Kim	980ac16	2016-10-07 16:58:27 -0700	[diff] [blame]	126	}
				127
Zhenhua Huang	7fb7ab6	2020-12-14 19:04:46 -0800	[diff] [blame]	128	#ifndef CONFIG_SPARSEMEM
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	129
				130
				131	void __meminit pgdat_page_ext_init(struct pglist_data *pgdat)
				132	{
				133	pgdat->node_page_ext = NULL;
				134	}
				135
Kirill A. Shutemov	10ed634	2018-08-17 15:45:15 -0700	[diff] [blame]	136	struct page_ext lookup_page_ext(const struct page page)
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	137	{
				138	unsigned long pfn = page_to_pfn(page);
Joonsoo Kim	0b06bb3	2016-10-07 16:58:24 -0700	[diff] [blame]	139	unsigned long index;
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	140	struct page_ext *base;
				141
				142	base = NODE_DATA(page_to_nid(page))->node_page_ext;
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	143	/*
				144	* The sanity checks the page allocator does upon freeing a
				145	* page can reach here before the page_ext arrays are
				146	* allocated when feeding a range of pages to the allocator
				147	* for the first time during bootup or memory hotplug.
				148	*/
				149	if (unlikely(!base))
				150	return NULL;
Joonsoo Kim	0b06bb3	2016-10-07 16:58:24 -0700	[diff] [blame]	151	index = pfn - round_down(node_start_pfn(page_to_nid(page)),
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	152	MAX_ORDER_NR_PAGES);
Joonsoo Kim	980ac16	2016-10-07 16:58:27 -0700	[diff] [blame]	153	return get_entry(base, index);
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	154	}
				155
				156	static int __init alloc_node_page_ext(int nid)
				157	{
				158	struct page_ext *base;
				159	unsigned long table_size;
				160	unsigned long nr_pages;
				161
				162	nr_pages = NODE_DATA(nid)->node_spanned_pages;
				163	if (!nr_pages)
				164	return 0;
				165
				166	/*
				167	* Need extra space if node range is not aligned with
				168	* MAX_ORDER_NR_PAGES. When page allocator's buddy algorithm
				169	* checks buddy's status, range could be out of exact node range.
				170	*/
				171	if (!IS_ALIGNED(node_start_pfn(nid), MAX_ORDER_NR_PAGES) \|\|
				172	!IS_ALIGNED(node_end_pfn(nid), MAX_ORDER_NR_PAGES))
				173	nr_pages += MAX_ORDER_NR_PAGES;
				174
Vlastimil Babka	5556cfe	2019-10-14 14:11:40 -0700	[diff] [blame]	175	table_size = page_ext_size * nr_pages;
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	176
Mike Rapoport	26fb3da	2019-03-11 23:30:42 -0700	[diff] [blame]	177	base = memblock_alloc_try_nid(
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	178	table_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS),
Mike Rapoport	97ad108	2018-10-30 15:09:44 -0700	[diff] [blame]	179	MEMBLOCK_ALLOC_ACCESSIBLE, nid);
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	180	if (!base)
				181	return -ENOMEM;
				182	NODE_DATA(nid)->node_page_ext = base;
				183	total_usage += table_size;
				184	return 0;
				185	}
				186
				187	void __init page_ext_init_flatmem(void)
				188	{
				189
				190	int nid, fail;
				191
				192	if (!invoke_need_callbacks())
				193	return;
				194
				195	for_each_online_node(nid) {
				196	fail = alloc_node_page_ext(nid);
				197	if (fail)
				198	goto fail;
				199	}
				200	pr_info("allocated %ld bytes of page_ext\n", total_usage);
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	201	return;
				202
				203	fail:
				204	pr_crit("allocation of page_ext failed.\n");
				205	panic("Out of memory");
				206	}
				207
Yinan Zhang	d1fea15	2021-11-05 13:36:46 -0700	[diff] [blame]	208	#else /* CONFIG_SPARSEMEM */
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	209
Kirill A. Shutemov	10ed634	2018-08-17 15:45:15 -0700	[diff] [blame]	210	struct page_ext lookup_page_ext(const struct page page)
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	211	{
				212	unsigned long pfn = page_to_pfn(page);
				213	struct mem_section *section = __pfn_to_section(pfn);
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	214	/*
				215	* The sanity checks the page allocator does upon freeing a
				216	* page can reach here before the page_ext arrays are
				217	* allocated when feeding a range of pages to the allocator
				218	* for the first time during bootup or memory hotplug.
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	219	*/
				220	if (!section->page_ext)
				221	return NULL;
Joonsoo Kim	980ac16	2016-10-07 16:58:27 -0700	[diff] [blame]	222	return get_entry(section->page_ext, pfn);
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	223	}
				224
				225	static void *__meminit alloc_page_ext(size_t size, int nid)
				226	{
				227	gfp_t flags = GFP_KERNEL \| __GFP_ZERO \| __GFP_NOWARN;
				228	void *addr = NULL;
				229
				230	addr = alloc_pages_exact_nid(nid, size, flags);
				231	if (addr) {
				232	kmemleak_alloc(addr, size, 1, flags);
				233	return addr;
				234	}
				235
Michal Hocko	b95046b	2017-09-06 16:20:41 -0700	[diff] [blame]	236	addr = vzalloc_node(size, nid);
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	237
				238	return addr;
				239	}
				240
				241	static int __meminit init_section_page_ext(unsigned long pfn, int nid)
				242	{
				243	struct mem_section *section;
				244	struct page_ext *base;
				245	unsigned long table_size;
				246
				247	section = __pfn_to_section(pfn);
				248
				249	if (section->page_ext)
				250	return 0;
				251
Vlastimil Babka	5556cfe	2019-10-14 14:11:40 -0700	[diff] [blame]	252	table_size = page_ext_size * PAGES_PER_SECTION;
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	253	base = alloc_page_ext(table_size, nid);
				254
				255	/*
				256	* The value stored in section->page_ext is (base - pfn)
				257	* and it does not point to the memory block allocated above,
				258	* causing kmemleak false positives.
				259	*/
				260	kmemleak_not_leak(base);
				261
				262	if (!base) {
				263	pr_err("page ext allocation failure\n");
				264	return -ENOMEM;
				265	}
				266
				267	/*
				268	* The passed "pfn" may not be aligned to SECTION. For the calculation
				269	* we need to apply a mask.
				270	*/
				271	pfn &= PAGE_SECTION_MASK;
Vlastimil Babka	5556cfe	2019-10-14 14:11:40 -0700	[diff] [blame]	272	section->page_ext = (void )base - page_ext_size pfn;
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	273	total_usage += table_size;
				274	return 0;
				275	}
Dave Hansen	76af6a0	2021-10-18 15:15:32 -0700	[diff] [blame]	276
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	277	static void free_page_ext(void *addr)
				278	{
				279	if (is_vmalloc_addr(addr)) {
				280	vfree(addr);
				281	} else {
				282	struct page *page = virt_to_page(addr);
				283	size_t table_size;
				284
Vlastimil Babka	5556cfe	2019-10-14 14:11:40 -0700	[diff] [blame]	285	table_size = page_ext_size * PAGES_PER_SECTION;
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	286
				287	BUG_ON(PageReserved(page));
Qian Cai	0c81585	2019-03-05 15:49:46 -0800	[diff] [blame]	288	kmemleak_free(addr);
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	289	free_pages_exact(addr, table_size);
				290	}
				291	}
				292
				293	static void __free_page_ext(unsigned long pfn)
				294	{
				295	struct mem_section *ms;
				296	struct page_ext *base;
				297
				298	ms = __pfn_to_section(pfn);
				299	if (!ms \|\| !ms->page_ext)
				300	return;
Joonsoo Kim	980ac16	2016-10-07 16:58:27 -0700	[diff] [blame]	301	base = get_entry(ms->page_ext, pfn);
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	302	free_page_ext(base);
				303	ms->page_ext = NULL;
				304	}
				305
				306	static int __meminit online_page_ext(unsigned long start_pfn,
				307	unsigned long nr_pages,
				308	int nid)
				309	{
				310	unsigned long start, end, pfn;
				311	int fail = 0;
				312
				313	start = SECTION_ALIGN_DOWN(start_pfn);
				314	end = SECTION_ALIGN_UP(start_pfn + nr_pages);
				315
Anshuman Khandual	98fa15f	2019-03-05 15:42:58 -0800	[diff] [blame]	316	if (nid == NUMA_NO_NODE) {
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	317	/*
				318	* In this case, "nid" already exists and contains valid memory.
				319	* "start_pfn" passed to us is a pfn which is an arg for
				320	* online__pages(), and start_pfn should exist.
				321	*/
				322	nid = pfn_to_nid(start_pfn);
				323	VM_BUG_ON(!node_state(nid, N_ONLINE));
				324	}
				325
David Hildenbrand	dccacf8	2020-04-06 20:06:47 -0700	[diff] [blame]	326	for (pfn = start; !fail && pfn < end; pfn += PAGES_PER_SECTION)
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	327	fail = init_section_page_ext(pfn, nid);
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	328	if (!fail)
				329	return 0;
				330
				331	/* rollback */
				332	for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION)
				333	__free_page_ext(pfn);
				334
				335	return -ENOMEM;
				336	}
				337
				338	static int __meminit offline_page_ext(unsigned long start_pfn,
				339	unsigned long nr_pages, int nid)
				340	{
				341	unsigned long start, end, pfn;
				342
				343	start = SECTION_ALIGN_DOWN(start_pfn);
				344	end = SECTION_ALIGN_UP(start_pfn + nr_pages);
				345
				346	for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION)
				347	__free_page_ext(pfn);
				348	return 0;
				349
				350	}
				351
				352	static int __meminit page_ext_callback(struct notifier_block *self,
				353	unsigned long action, void *arg)
				354	{
				355	struct memory_notify *mn = arg;
				356	int ret = 0;
				357
				358	switch (action) {
				359	case MEM_GOING_ONLINE:
				360	ret = online_page_ext(mn->start_pfn,
				361	mn->nr_pages, mn->status_change_nid);
				362	break;
				363	case MEM_OFFLINE:
				364	offline_page_ext(mn->start_pfn,
				365	mn->nr_pages, mn->status_change_nid);
				366	break;
				367	case MEM_CANCEL_ONLINE:
				368	offline_page_ext(mn->start_pfn,
				369	mn->nr_pages, mn->status_change_nid);
				370	break;
				371	case MEM_GOING_OFFLINE:
				372	break;
				373	case MEM_ONLINE:
				374	case MEM_CANCEL_OFFLINE:
				375	break;
				376	}
				377
				378	return notifier_from_errno(ret);
				379	}
				380
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	381	void __init page_ext_init(void)
				382	{
				383	unsigned long pfn;
				384	int nid;
				385
				386	if (!invoke_need_callbacks())
				387	return;
				388
				389	for_each_node_state(nid, N_MEMORY) {
				390	unsigned long start_pfn, end_pfn;
				391
				392	start_pfn = node_start_pfn(nid);
				393	end_pfn = node_end_pfn(nid);
				394	/*
				395	* start_pfn and end_pfn may not be aligned to SECTION and the
				396	* page->flags of out of node pages are not initialized. So we
				397	* scan [start_pfn, the biggest section's pfn < end_pfn) here.
				398	*/
				399	for (pfn = start_pfn; pfn < end_pfn;
				400	pfn = ALIGN(pfn + 1, PAGES_PER_SECTION)) {
				401
				402	if (!pfn_valid(pfn))
				403	continue;
				404	/*
				405	* Nodes's pfns can be overlapping.
				406	* We know some arch can have a nodes layout such as
				407	* -------------pfn-------------->
				408	* N0 \| N1 \| N2 \| N0 \| N1 \| N2\|....
				409	*/
Qian Cai	2f1ee09	2019-02-12 15:36:03 -0800	[diff] [blame]	410	if (pfn_to_nid(pfn) != nid)
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	411	continue;
				412	if (init_section_page_ext(pfn, nid))
				413	goto oom;
Vlastimil Babka	0fc542b	2017-09-06 16:20:48 -0700	[diff] [blame]	414	cond_resched();
Joonsoo Kim	eefa864b	2014-12-12 16:55:46 -0800	[diff] [blame]	415	}
				416	}
				417	hotplug_memory_notifier(page_ext_callback, 0);
				418	pr_info("allocated %ld bytes of page_ext\n", total_usage);
				419	invoke_init_callbacks();
				420	return;
				421
				422	oom:
				423	panic("Out of memory");
				424	}
				425
				426	void __meminit pgdat_page_ext_init(struct pglist_data *pgdat)
				427	{
				428	}
				429
				430	#endif