Blame - mm/mmu_gather.c - SHIFTPHONES/mainline/linux

blob: afb7185ffdc45484dd578e6a6763693aa0b68388 [file] [log] [blame]

Peter Zijlstra	196d9d8	2018-09-03 15:07:36 +0100	[diff] [blame]	1	#include <linux/gfp.h>
				2	#include <linux/highmem.h>
				3	#include <linux/kernel.h>
				4	#include <linux/mmdebug.h>
				5	#include <linux/mm_types.h>
Arnd Bergmann	36090de	2022-01-14 14:06:10 -0800	[diff] [blame]	6	#include <linux/mm_inline.h>
Peter Zijlstra	196d9d8	2018-09-03 15:07:36 +0100	[diff] [blame]	7	#include <linux/pagemap.h>
				8	#include <linux/rcupdate.h>
				9	#include <linux/smp.h>
				10	#include <linux/swap.h>
				11
				12	#include <asm/pgalloc.h>
				13	#include <asm/tlb.h>
				14
Peter Zijlstra	580a586	2020-02-03 17:37:08 -0800	[diff] [blame]	15	#ifndef CONFIG_MMU_GATHER_NO_GATHER
Martin Schwidefsky	952a31c	2018-09-18 14:51:50 +0200	[diff] [blame]	16
Peter Zijlstra	196d9d8	2018-09-03 15:07:36 +0100	[diff] [blame]	17	static bool tlb_next_batch(struct mmu_gather *tlb)
				18	{
				19	struct mmu_gather_batch *batch;
				20
				21	batch = tlb->active;
				22	if (batch->next) {
				23	tlb->active = batch->next;
				24	return true;
				25	}
				26
				27	if (tlb->batch_count == MAX_GATHER_BATCH_COUNT)
				28	return false;
				29
				30	batch = (void *)__get_free_pages(GFP_NOWAIT \| __GFP_NOWARN, 0);
				31	if (!batch)
				32	return false;
				33
				34	tlb->batch_count++;
				35	batch->next = NULL;
				36	batch->nr = 0;
				37	batch->max = MAX_GATHER_BATCH;
				38
				39	tlb->active->next = batch;
				40	tlb->active = batch;
				41
				42	return true;
				43	}
				44
Martin Schwidefsky	952a31c	2018-09-18 14:51:50 +0200	[diff] [blame]	45	static void tlb_batch_pages_flush(struct mmu_gather *tlb)
Peter Zijlstra	196d9d8	2018-09-03 15:07:36 +0100	[diff] [blame]	46	{
				47	struct mmu_gather_batch *batch;
				48
Peter Zijlstra	196d9d8	2018-09-03 15:07:36 +0100	[diff] [blame]	49	for (batch = &tlb->local; batch && batch->nr; batch = batch->next) {
				50	free_pages_and_swap_cache(batch->pages, batch->nr);
				51	batch->nr = 0;
				52	}
				53	tlb->active = &tlb->local;
				54	}
				55
Martin Schwidefsky	952a31c	2018-09-18 14:51:50 +0200	[diff] [blame]	56	static void tlb_batch_list_free(struct mmu_gather *tlb)
Peter Zijlstra	196d9d8	2018-09-03 15:07:36 +0100	[diff] [blame]	57	{
				58	struct mmu_gather_batch batch, next;
				59
Peter Zijlstra	196d9d8	2018-09-03 15:07:36 +0100	[diff] [blame]	60	for (batch = tlb->local.next; batch; batch = next) {
				61	next = batch->next;
				62	free_pages((unsigned long)batch, 0);
				63	}
				64	tlb->local.next = NULL;
				65	}
				66
Peter Zijlstra	196d9d8	2018-09-03 15:07:36 +0100	[diff] [blame]	67	bool __tlb_remove_page_size(struct mmu_gather tlb, struct page page, int page_size)
				68	{
				69	struct mmu_gather_batch *batch;
				70
				71	VM_BUG_ON(!tlb->end);
Peter Zijlstra	ed6a793	2018-08-31 14:46:08 +0200	[diff] [blame]	72
Peter Zijlstra	3af4bd0	2020-02-03 17:37:05 -0800	[diff] [blame]	73	#ifdef CONFIG_MMU_GATHER_PAGE_SIZE
Peter Zijlstra	196d9d8	2018-09-03 15:07:36 +0100	[diff] [blame]	74	VM_WARN_ON(tlb->page_size != page_size);
Peter Zijlstra	ed6a793	2018-08-31 14:46:08 +0200	[diff] [blame]	75	#endif
Peter Zijlstra	196d9d8	2018-09-03 15:07:36 +0100	[diff] [blame]	76
				77	batch = tlb->active;
				78	/*
				79	* Add the page and check if we are full. If so
				80	* force a flush.
				81	*/
				82	batch->pages[batch->nr++] = page;
				83	if (batch->nr == batch->max) {
				84	if (!tlb_next_batch(tlb))
				85	return true;
				86	batch = tlb->active;
				87	}
				88	VM_BUG_ON_PAGE(batch->nr > batch->max, page);
				89
				90	return false;
				91	}
				92
Peter Zijlstra	580a586	2020-02-03 17:37:08 -0800	[diff] [blame]	93	#endif /* MMU_GATHER_NO_GATHER */
Martin Schwidefsky	952a31c	2018-09-18 14:51:50 +0200	[diff] [blame]	94
Peter Zijlstra	0d6e24d	2020-02-03 17:37:11 -0800	[diff] [blame]	95	#ifdef CONFIG_MMU_GATHER_TABLE_FREE
				96
				97	static void __tlb_remove_table_free(struct mmu_table_batch *batch)
				98	{
				99	int i;
				100
				101	for (i = 0; i < batch->nr; i++)
				102	__tlb_remove_table(batch->tables[i]);
				103
				104	free_page((unsigned long)batch);
				105	}
				106
Peter Zijlstra	ff2e6d72	2020-02-03 17:37:02 -0800	[diff] [blame]	107	#ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE
Peter Zijlstra	196d9d8	2018-09-03 15:07:36 +0100	[diff] [blame]	108
				109	/*
Peter Zijlstra	0d6e24d	2020-02-03 17:37:11 -0800	[diff] [blame]	110	* Semi RCU freeing of the page directories.
				111	*
				112	* This is needed by some architectures to implement software pagetable walkers.
				113	*
				114	* gup_fast() and other software pagetable walkers do a lockless page-table
				115	* walk and therefore needs some synchronization with the freeing of the page
				116	* directories. The chosen means to accomplish that is by disabling IRQs over
				117	* the walk.
				118	*
				119	* Architectures that use IPIs to flush TLBs will then automagically DTRT,
				120	* since we unlink the page, flush TLBs, free the page. Since the disabling of
				121	* IRQs delays the completion of the TLB flush we can never observe an already
				122	* freed page.
				123	*
				124	* Architectures that do not have this (PPC) need to delay the freeing by some
				125	* other means, this is that means.
				126	*
				127	* What we do is batch the freed directory pages (tables) and RCU free them.
				128	* We use the sched RCU variant, as that guarantees that IRQ/preempt disabling
				129	* holds off grace periods.
				130	*
				131	* However, in order to batch these pages we need to allocate storage, this
				132	* allocation is deep inside the MM code and can thus easily fail on memory
				133	* pressure. To guarantee progress we fall back to single table freeing, see
				134	* the implementation of tlb_remove_table_one().
				135	*
Peter Zijlstra	196d9d8	2018-09-03 15:07:36 +0100	[diff] [blame]	136	*/
				137
Peter Zijlstra	0d6e24d	2020-02-03 17:37:11 -0800	[diff] [blame]	138	static void tlb_remove_table_smp_sync(void *arg)
				139	{
				140	/* Simply deliver the interrupt */
				141	}
				142
				143	static void tlb_remove_table_sync_one(void)
				144	{
				145	/*
				146	* This isn't an RCU grace period and hence the page-tables cannot be
				147	* assumed to be actually RCU-freed.
				148	*
				149	* It is however sufficient for software page-table walkers that rely on
				150	* IRQ disabling.
				151	*/
				152	smp_call_function(tlb_remove_table_smp_sync, NULL, 1);
				153	}
				154
				155	static void tlb_remove_table_rcu(struct rcu_head *head)
				156	{
				157	__tlb_remove_table_free(container_of(head, struct mmu_table_batch, rcu));
				158	}
				159
				160	static void tlb_remove_table_free(struct mmu_table_batch *batch)
				161	{
				162	call_rcu(&batch->rcu, tlb_remove_table_rcu);
				163	}
				164
				165	#else /* !CONFIG_MMU_GATHER_RCU_TABLE_FREE */
				166
				167	static void tlb_remove_table_sync_one(void) { }
				168
				169	static void tlb_remove_table_free(struct mmu_table_batch *batch)
				170	{
				171	__tlb_remove_table_free(batch);
				172	}
				173
				174	#endif /* CONFIG_MMU_GATHER_RCU_TABLE_FREE */
				175
Peter Zijlstra	196d9d8	2018-09-03 15:07:36 +0100	[diff] [blame]	176	/*
				177	* If we want tlb_remove_table() to imply TLB invalidates.
				178	*/
				179	static inline void tlb_table_invalidate(struct mmu_gather *tlb)
				180	{
Peter Zijlstra	0ed1325	2020-02-03 17:36:49 -0800	[diff] [blame]	181	if (tlb_needs_table_invalidate()) {
				182	/*
				183	* Invalidate page-table caches used by hardware walkers. Then
				184	* we still need to RCU-sched wait while freeing the pages
				185	* because software walkers can still be in-flight.
				186	*/
				187	tlb_flush_mmu_tlbonly(tlb);
				188	}
Peter Zijlstra	196d9d8	2018-09-03 15:07:36 +0100	[diff] [blame]	189	}
				190
Peter Zijlstra	196d9d8	2018-09-03 15:07:36 +0100	[diff] [blame]	191	static void tlb_remove_table_one(void *table)
				192	{
Peter Zijlstra	0d6e24d	2020-02-03 17:37:11 -0800	[diff] [blame]	193	tlb_remove_table_sync_one();
Peter Zijlstra	196d9d8	2018-09-03 15:07:36 +0100	[diff] [blame]	194	__tlb_remove_table(table);
				195	}
				196
Peter Zijlstra	0a8caf2	2018-09-20 10:55:10 +0200	[diff] [blame]	197	static void tlb_table_flush(struct mmu_gather *tlb)
Peter Zijlstra	196d9d8	2018-09-03 15:07:36 +0100	[diff] [blame]	198	{
				199	struct mmu_table_batch **batch = &tlb->batch;
				200
				201	if (*batch) {
				202	tlb_table_invalidate(tlb);
Peter Zijlstra	0d6e24d	2020-02-03 17:37:11 -0800	[diff] [blame]	203	tlb_remove_table_free(*batch);
Peter Zijlstra	196d9d8	2018-09-03 15:07:36 +0100	[diff] [blame]	204	*batch = NULL;
				205	}
				206	}
				207
				208	void tlb_remove_table(struct mmu_gather tlb, void table)
				209	{
				210	struct mmu_table_batch **batch = &tlb->batch;
				211
				212	if (*batch == NULL) {
				213	batch = (struct mmu_table_batch )__get_free_page(GFP_NOWAIT \| __GFP_NOWARN);
				214	if (*batch == NULL) {
				215	tlb_table_invalidate(tlb);
				216	tlb_remove_table_one(table);
				217	return;
				218	}
				219	(*batch)->nr = 0;
				220	}
				221
				222	(batch)->tables[(batch)->nr++] = table;
				223	if ((*batch)->nr == MAX_TABLE_BATCH)
				224	tlb_table_flush(tlb);
				225	}
				226
Peter Zijlstra	0d6e24d	2020-02-03 17:37:11 -0800	[diff] [blame]	227	static inline void tlb_table_init(struct mmu_gather *tlb)
				228	{
				229	tlb->batch = NULL;
				230	}
				231
				232	#else /* !CONFIG_MMU_GATHER_TABLE_FREE */
				233
				234	static inline void tlb_table_flush(struct mmu_gather *tlb) { }
				235	static inline void tlb_table_init(struct mmu_gather *tlb) { }
				236
				237	#endif /* CONFIG_MMU_GATHER_TABLE_FREE */
Peter Zijlstra	196d9d8	2018-09-03 15:07:36 +0100	[diff] [blame]	238
Peter Zijlstra	0a8caf2	2018-09-20 10:55:10 +0200	[diff] [blame]	239	static void tlb_flush_mmu_free(struct mmu_gather *tlb)
				240	{
Peter Zijlstra	0a8caf2	2018-09-20 10:55:10 +0200	[diff] [blame]	241	tlb_table_flush(tlb);
Peter Zijlstra	580a586	2020-02-03 17:37:08 -0800	[diff] [blame]	242	#ifndef CONFIG_MMU_GATHER_NO_GATHER
Peter Zijlstra	0a8caf2	2018-09-20 10:55:10 +0200	[diff] [blame]	243	tlb_batch_pages_flush(tlb);
				244	#endif
				245	}
				246
				247	void tlb_flush_mmu(struct mmu_gather *tlb)
				248	{
				249	tlb_flush_mmu_tlbonly(tlb);
				250	tlb_flush_mmu_free(tlb);
				251	}
				252
Will Deacon	d8b4505	2021-01-27 23:53:44 +0000	[diff] [blame]	253	static void __tlb_gather_mmu(struct mmu_gather tlb, struct mm_struct mm,
Will Deacon	a72afd8	2021-01-27 23:53:45 +0000	[diff] [blame]	254	bool fullmm)
Peter Zijlstra	196d9d8	2018-09-03 15:07:36 +0100	[diff] [blame]	255	{
Peter Zijlstra	1808d65	2018-09-20 10:50:11 +0200	[diff] [blame]	256	tlb->mm = mm;
Will Deacon	a72afd8	2021-01-27 23:53:45 +0000	[diff] [blame]	257	tlb->fullmm = fullmm;
Peter Zijlstra	1808d65	2018-09-20 10:50:11 +0200	[diff] [blame]	258
Peter Zijlstra	580a586	2020-02-03 17:37:08 -0800	[diff] [blame]	259	#ifndef CONFIG_MMU_GATHER_NO_GATHER
Peter Zijlstra	1808d65	2018-09-20 10:50:11 +0200	[diff] [blame]	260	tlb->need_flush_all = 0;
				261	tlb->local.next = NULL;
				262	tlb->local.nr = 0;
				263	tlb->local.max = ARRAY_SIZE(tlb->__pages);
				264	tlb->active = &tlb->local;
				265	tlb->batch_count = 0;
				266	#endif
				267
Peter Zijlstra	0d6e24d	2020-02-03 17:37:11 -0800	[diff] [blame]	268	tlb_table_init(tlb);
Peter Zijlstra	3af4bd0	2020-02-03 17:37:05 -0800	[diff] [blame]	269	#ifdef CONFIG_MMU_GATHER_PAGE_SIZE
Peter Zijlstra	1808d65	2018-09-20 10:50:11 +0200	[diff] [blame]	270	tlb->page_size = 0;
				271	#endif
				272
				273	__tlb_reset_range(tlb);
Peter Zijlstra	196d9d8	2018-09-03 15:07:36 +0100	[diff] [blame]	274	inc_tlb_flush_pending(tlb->mm);
				275	}
				276
Randy Dunlap	845be1c	2021-04-16 15:45:54 -0700	[diff] [blame]	277	/**
				278	* tlb_gather_mmu - initialize an mmu_gather structure for page-table tear-down
				279	* @tlb: the mmu_gather structure to initialize
				280	* @mm: the mm_struct of the target address space
				281	*
				282	* Called to initialize an (on-stack) mmu_gather structure for page-table
				283	* tear-down from @mm.
				284	*/
Will Deacon	a72afd8	2021-01-27 23:53:45 +0000	[diff] [blame]	285	void tlb_gather_mmu(struct mmu_gather tlb, struct mm_struct mm)
Will Deacon	d8b4505	2021-01-27 23:53:44 +0000	[diff] [blame]	286	{
Will Deacon	a72afd8	2021-01-27 23:53:45 +0000	[diff] [blame]	287	__tlb_gather_mmu(tlb, mm, false);
Will Deacon	d8b4505	2021-01-27 23:53:44 +0000	[diff] [blame]	288	}
				289
Randy Dunlap	845be1c	2021-04-16 15:45:54 -0700	[diff] [blame]	290	/**
				291	* tlb_gather_mmu_fullmm - initialize an mmu_gather structure for page-table tear-down
				292	* @tlb: the mmu_gather structure to initialize
				293	* @mm: the mm_struct of the target address space
				294	*
				295	* In this case, @mm is without users and we're going to destroy the
				296	* full address space (exit/execve).
				297	*
				298	* Called to initialize an (on-stack) mmu_gather structure for page-table
				299	* tear-down from @mm.
				300	*/
Will Deacon	d8b4505	2021-01-27 23:53:44 +0000	[diff] [blame]	301	void tlb_gather_mmu_fullmm(struct mmu_gather tlb, struct mm_struct mm)
				302	{
Will Deacon	a72afd8	2021-01-27 23:53:45 +0000	[diff] [blame]	303	__tlb_gather_mmu(tlb, mm, true);
Will Deacon	d8b4505	2021-01-27 23:53:44 +0000	[diff] [blame]	304	}
				305
Peter Zijlstra	1808d65	2018-09-20 10:50:11 +0200	[diff] [blame]	306	/**
				307	* tlb_finish_mmu - finish an mmu_gather structure
				308	* @tlb: the mmu_gather structure to finish
Peter Zijlstra	1808d65	2018-09-20 10:50:11 +0200	[diff] [blame]	309	*
				310	* Called at the end of the shootdown operation to free up any resources that
				311	* were required.
				312	*/
Will Deacon	ae8eba8	2021-01-27 23:53:43 +0000	[diff] [blame]	313	void tlb_finish_mmu(struct mmu_gather *tlb)
Peter Zijlstra	196d9d8	2018-09-03 15:07:36 +0100	[diff] [blame]	314	{
				315	/*
				316	* If there are parallel threads are doing PTE changes on same range
Michel Lespinasse	c1e8d7c	2020-06-08 21:33:54 -0700	[diff] [blame]	317	* under non-exclusive lock (e.g., mmap_lock read-side) but defer TLB
Yang Shi	7a30df4	2019-06-13 15:56:05 -0700	[diff] [blame]	318	* flush by batching, one thread may end up seeing inconsistent PTEs
				319	* and result in having stale TLB entries. So flush TLB forcefully
				320	* if we detect parallel PTE batching threads.
				321	*
				322	* However, some syscalls, e.g. munmap(), may free page tables, this
				323	* needs force flush everything in the given range. Otherwise this
				324	* may result in having stale TLB entries for some architectures,
				325	* e.g. aarch64, that could specify flush what level TLB.
Peter Zijlstra	196d9d8	2018-09-03 15:07:36 +0100	[diff] [blame]	326	*/
Peter Zijlstra	1808d65	2018-09-20 10:50:11 +0200	[diff] [blame]	327	if (mm_tlb_flush_nested(tlb->mm)) {
Yang Shi	7a30df4	2019-06-13 15:56:05 -0700	[diff] [blame]	328	/*
				329	* The aarch64 yields better performance with fullmm by
				330	* avoiding multiple CPUs spamming TLBI messages at the
				331	* same time.
				332	*
				333	* On x86 non-fullmm doesn't yield significant difference
				334	* against fullmm.
				335	*/
				336	tlb->fullmm = 1;
Peter Zijlstra	1808d65	2018-09-20 10:50:11 +0200	[diff] [blame]	337	__tlb_reset_range(tlb);
Yang Shi	7a30df4	2019-06-13 15:56:05 -0700	[diff] [blame]	338	tlb->freed_tables = 1;
Peter Zijlstra	1808d65	2018-09-20 10:50:11 +0200	[diff] [blame]	339	}
Peter Zijlstra	196d9d8	2018-09-03 15:07:36 +0100	[diff] [blame]	340
Peter Zijlstra	1808d65	2018-09-20 10:50:11 +0200	[diff] [blame]	341	tlb_flush_mmu(tlb);
				342
Peter Zijlstra	580a586	2020-02-03 17:37:08 -0800	[diff] [blame]	343	#ifndef CONFIG_MMU_GATHER_NO_GATHER
Peter Zijlstra	1808d65	2018-09-20 10:50:11 +0200	[diff] [blame]	344	tlb_batch_list_free(tlb);
				345	#endif
Peter Zijlstra	196d9d8	2018-09-03 15:07:36 +0100	[diff] [blame]	346	dec_tlb_flush_pending(tlb->mm);
				347	}