Blame - arch/x86/mm/tlb.c - SHIFTPHONES/mainline/linux

blob: 4e5a5ddb9e4d0630d473446c5b9acdd731c70bf3 [file] [log] [blame]

Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	1	#include <linux/init.h>
				2
				3	#include <linux/mm.h>
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	4	#include <linux/spinlock.h>
				5	#include <linux/smp.h>
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	6	#include <linux/interrupt.h>
Paul Gortmaker	4b599fe	2016-07-13 20:18:55 -0400	[diff] [blame]	7	#include <linux/export.h>
Shaohua Li	9329672	2010-10-20 11:07:03 +0800	[diff] [blame]	8	#include <linux/cpu.h>
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	9
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	10	#include <asm/tlbflush.h>
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	11	#include <asm/mmu_context.h>
Jan Beulich	350f8f5	2009-11-13 11:54:40 +0000	[diff] [blame]	12	#include <asm/cache.h>
Tejun Heo	6dd01be	2009-01-21 17:26:06 +0900	[diff] [blame]	13	#include <asm/apic.h>
Tejun Heo	bdbcdd4	2009-01-21 17:26:06 +0900	[diff] [blame]	14	#include <asm/uv/uv.h>
Alex Shi	3df3212	2012-06-28 09:02:20 +0800	[diff] [blame]	15	#include <linux/debugfs.h>
Glauber Costa	5af5573	2008-03-25 13:28:56 -0300	[diff] [blame]	16
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	17	/*
Andy Lutomirski	ce4a4e56	2017-05-28 10:00:14 -0700	[diff] [blame]	18	* TLB flushing, formerly SMP-only
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	19	* c/o Linus Torvalds.
				20	*
				21	* These mean you can really definitely utterly forget about
				22	* writing to user space from interrupts. (Its not allowed anyway).
				23	*
				24	* Optimizations Manfred Spraul <manfred@colorfullife.com>
				25	*
				26	* More scalable flush, from Andi Kleen
				27	*
Alex Shi	52aec33	2012-06-28 09:02:23 +0800	[diff] [blame]	28	* Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	29	*/
				30
Andy Lutomirski	f39681e	2017-06-29 08:53:15 -0700	[diff] [blame]	31	atomic64_t last_mm_ctx_id = ATOMIC64_INIT(1);
				32
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	33	void leave_mm(int cpu)
				34	{
Andy Lutomirski	3d28ebc	2017-05-28 10:00:15 -0700	[diff] [blame]	35	struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
				36
				37	/*
				38	* It's plausible that we're in lazy TLB mode while our mm is init_mm.
				39	* If so, our callers still expect us to flush the TLB, but there
				40	* aren't any user TLB entries in init_mm to worry about.
				41	*
				42	* This needs to happen before any other sanity checks due to
				43	* intel_idle's shenanigans.
				44	*/
				45	if (loaded_mm == &init_mm)
				46	return;
				47
Alex Shi	c6ae41e	2012-05-11 15:35:27 +0800	[diff] [blame]	48	if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	49	BUG();
Andy Lutomirski	3d28ebc	2017-05-28 10:00:15 -0700	[diff] [blame]	50
				51	switch_mm(NULL, &init_mm, NULL);
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	52	}
				53	EXPORT_SYMBOL_GPL(leave_mm);
				54
Andy Lutomirski	69c0319	2016-04-26 09:39:08 -0700	[diff] [blame]	55	void switch_mm(struct mm_struct prev, struct mm_struct next,
				56	struct task_struct *tsk)
				57	{
Andy Lutomirski	078194f	2016-04-26 09:39:09 -0700	[diff] [blame]	58	unsigned long flags;
				59
				60	local_irq_save(flags);
				61	switch_mm_irqs_off(prev, next, tsk);
				62	local_irq_restore(flags);
				63	}
				64
				65	void switch_mm_irqs_off(struct mm_struct prev, struct mm_struct next,
				66	struct task_struct *tsk)
				67	{
Andy Lutomirski	69c0319	2016-04-26 09:39:08 -0700	[diff] [blame]	68	unsigned cpu = smp_processor_id();
Andy Lutomirski	3d28ebc	2017-05-28 10:00:15 -0700	[diff] [blame]	69	struct mm_struct *real_prev = this_cpu_read(cpu_tlbstate.loaded_mm);
Andy Lutomirski	69c0319	2016-04-26 09:39:08 -0700	[diff] [blame]	70
Andy Lutomirski	3d28ebc	2017-05-28 10:00:15 -0700	[diff] [blame]	71	/*
				72	* NB: The scheduler will call us with prev == next when
				73	* switching from lazy TLB mode to normal mode if active_mm
				74	* isn't changing. When this happens, there is no guarantee
				75	* that CR3 (and hence cpu_tlbstate.loaded_mm) matches next.
				76	*
				77	* NB: leave_mm() calls us with prev == NULL and tsk == NULL.
				78	*/
Andy Lutomirski	e37e43a	2016-08-11 02:35:23 -0700	[diff] [blame]	79
Andy Lutomirski	3d28ebc	2017-05-28 10:00:15 -0700	[diff] [blame]	80	this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
Andy Lutomirski	e37e43a	2016-08-11 02:35:23 -0700	[diff] [blame]	81
Andy Lutomirski	3d28ebc	2017-05-28 10:00:15 -0700	[diff] [blame]	82	if (real_prev == next) {
Andy Lutomirski	69c0319	2016-04-26 09:39:08 -0700	[diff] [blame]	83	/*
Andy Lutomirski	3d28ebc	2017-05-28 10:00:15 -0700	[diff] [blame]	84	* There's nothing to do: we always keep the per-mm control
				85	* regs in sync with cpu_tlbstate.loaded_mm. Just
				86	* sanity-check mm_cpumask.
Andy Lutomirski	69c0319	2016-04-26 09:39:08 -0700	[diff] [blame]	87	*/
Andy Lutomirski	3d28ebc	2017-05-28 10:00:15 -0700	[diff] [blame]	88	if (WARN_ON_ONCE(!cpumask_test_cpu(cpu, mm_cpumask(next))))
				89	cpumask_set_cpu(cpu, mm_cpumask(next));
				90	return;
				91	}
Andy Lutomirski	69c0319	2016-04-26 09:39:08 -0700	[diff] [blame]	92
Andy Lutomirski	3d28ebc	2017-05-28 10:00:15 -0700	[diff] [blame]	93	if (IS_ENABLED(CONFIG_VMAP_STACK)) {
				94	/*
				95	* If our current stack is in vmalloc space and isn't
				96	* mapped in the new pgd, we'll double-fault. Forcibly
				97	* map it.
				98	*/
				99	unsigned int stack_pgd_index = pgd_index(current_stack_pointer());
Andy Lutomirski	69c0319	2016-04-26 09:39:08 -0700	[diff] [blame]	100
Andy Lutomirski	3d28ebc	2017-05-28 10:00:15 -0700	[diff] [blame]	101	pgd_t *pgd = next->pgd + stack_pgd_index;
Andy Lutomirski	69c0319	2016-04-26 09:39:08 -0700	[diff] [blame]	102
Andy Lutomirski	3d28ebc	2017-05-28 10:00:15 -0700	[diff] [blame]	103	if (unlikely(pgd_none(*pgd)))
				104	set_pgd(pgd, init_mm.pgd[stack_pgd_index]);
				105	}
				106
				107	this_cpu_write(cpu_tlbstate.loaded_mm, next);
Andy Lutomirski	b0579ad	2017-06-29 08:53:16 -0700	[diff] [blame^]	108	this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, next->context.ctx_id);
				109	this_cpu_write(cpu_tlbstate.ctxs[0].tlb_gen, atomic64_read(&next->context.tlb_gen));
Andy Lutomirski	3d28ebc	2017-05-28 10:00:15 -0700	[diff] [blame]	110
				111	WARN_ON_ONCE(cpumask_test_cpu(cpu, mm_cpumask(next)));
				112	cpumask_set_cpu(cpu, mm_cpumask(next));
				113
				114	/*
				115	* Re-load page tables.
				116	*
				117	* This logic has an ordering constraint:
				118	*
				119	* CPU 0: Write to a PTE for 'next'
				120	* CPU 0: load bit 1 in mm_cpumask. if nonzero, send IPI.
				121	* CPU 1: set bit 1 in next's mm_cpumask
				122	* CPU 1: load from the PTE that CPU 0 writes (implicit)
				123	*
				124	* We need to prevent an outcome in which CPU 1 observes
				125	* the new PTE value and CPU 0 observes bit 1 clear in
				126	* mm_cpumask. (If that occurs, then the IPI will never
				127	* be sent, and CPU 0's TLB will contain a stale entry.)
				128	*
				129	* The bad outcome can occur if either CPU's load is
				130	* reordered before that CPU's store, so both CPUs must
				131	* execute full barriers to prevent this from happening.
				132	*
				133	* Thus, switch_mm needs a full barrier between the
				134	* store to mm_cpumask and any operation that could load
				135	* from next->pgd. TLB fills are special and can happen
				136	* due to instruction fetches or for no reason at all,
				137	* and neither LOCK nor MFENCE orders them.
				138	* Fortunately, load_cr3() is serializing and gives the
				139	* ordering guarantee we need.
				140	*/
				141	load_cr3(next->pgd);
				142
				143	/*
				144	* This gets called via leave_mm() in the idle path where RCU
				145	* functions differently. Tracing normally uses RCU, so we have to
				146	* call the tracepoint specially here.
				147	*/
				148	trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
				149
				150	/* Stop flush ipis for the previous mm */
				151	WARN_ON_ONCE(!cpumask_test_cpu(cpu, mm_cpumask(real_prev)) &&
				152	real_prev != &init_mm);
				153	cpumask_clear_cpu(cpu, mm_cpumask(real_prev));
				154
Andy Lutomirski	7353425	2017-06-20 22:22:08 -0700	[diff] [blame]	155	/* Load per-mm CR4 and LDTR state */
Andy Lutomirski	3d28ebc	2017-05-28 10:00:15 -0700	[diff] [blame]	156	load_mm_cr4(next);
Andy Lutomirski	7353425	2017-06-20 22:22:08 -0700	[diff] [blame]	157	switch_ldt(real_prev, next);
Andy Lutomirski	69c0319	2016-04-26 09:39:08 -0700	[diff] [blame]	158	}
				159
Andy Lutomirski	b0579ad	2017-06-29 08:53:16 -0700	[diff] [blame^]	160	/*
				161	* flush_tlb_func_common()'s memory ordering requirement is that any
				162	* TLB fills that happen after we flush the TLB are ordered after we
				163	* read active_mm's tlb_gen. We don't need any explicit barriers
				164	* because all x86 flush operations are serializing and the
				165	* atomic64_read operation won't be reordered by the compiler.
				166	*/
Andy Lutomirski	454bbad	2017-05-28 10:00:12 -0700	[diff] [blame]	167	static void flush_tlb_func_common(const struct flush_tlb_info *f,
				168	bool local, enum tlb_flush_reason reason)
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	169	{
Andy Lutomirski	b0579ad	2017-06-29 08:53:16 -0700	[diff] [blame^]	170	/*
				171	* We have three different tlb_gen values in here. They are:
				172	*
				173	* - mm_tlb_gen: the latest generation.
				174	* - local_tlb_gen: the generation that this CPU has already caught
				175	* up to.
				176	* - f->new_tlb_gen: the generation that the requester of the flush
				177	* wants us to catch up to.
				178	*/
				179	struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
				180	u64 mm_tlb_gen = atomic64_read(&loaded_mm->context.tlb_gen);
				181	u64 local_tlb_gen = this_cpu_read(cpu_tlbstate.ctxs[0].tlb_gen);
				182
Andy Lutomirski	bc0d5a8	2017-06-29 08:53:13 -0700	[diff] [blame]	183	/* This code cannot presently handle being reentered. */
				184	VM_WARN_ON(!irqs_disabled());
				185
Andy Lutomirski	b0579ad	2017-06-29 08:53:16 -0700	[diff] [blame^]	186	VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[0].ctx_id) !=
				187	loaded_mm->context.ctx_id);
				188
Andy Lutomirski	b3b90e5	2017-05-22 15:30:02 -0700	[diff] [blame]	189	if (this_cpu_read(cpu_tlbstate.state) != TLBSTATE_OK) {
Andy Lutomirski	b0579ad	2017-06-29 08:53:16 -0700	[diff] [blame^]	190	/*
				191	* leave_mm() is adequate to handle any type of flush, and
				192	* we would prefer not to receive further IPIs. leave_mm()
				193	* clears this CPU's bit in mm_cpumask().
				194	*/
Andy Lutomirski	b3b90e5	2017-05-22 15:30:02 -0700	[diff] [blame]	195	leave_mm(smp_processor_id());
				196	return;
				197	}
				198
Andy Lutomirski	b0579ad	2017-06-29 08:53:16 -0700	[diff] [blame^]	199	if (unlikely(local_tlb_gen == mm_tlb_gen)) {
				200	/*
				201	* There's nothing to do: we're already up to date. This can
				202	* happen if two concurrent flushes happen -- the first flush to
				203	* be handled can catch us all the way up, leaving no work for
				204	* the second flush.
				205	*/
				206	return;
				207	}
				208
				209	WARN_ON_ONCE(local_tlb_gen > mm_tlb_gen);
				210	WARN_ON_ONCE(f->new_tlb_gen > mm_tlb_gen);
				211
				212	/*
				213	* If we get to this point, we know that our TLB is out of date.
				214	* This does not strictly imply that we need to flush (it's
				215	* possible that f->new_tlb_gen <= local_tlb_gen), but we're
				216	* going to need to flush in the very near future, so we might
				217	* as well get it over with.
				218	*
				219	* The only question is whether to do a full or partial flush.
				220	*
				221	* We do a partial flush if requested and two extra conditions
				222	* are met:
				223	*
				224	* 1. f->new_tlb_gen == local_tlb_gen + 1. We have an invariant that
				225	* we've always done all needed flushes to catch up to
				226	* local_tlb_gen. If, for example, local_tlb_gen == 2 and
				227	* f->new_tlb_gen == 3, then we know that the flush needed to bring
				228	* us up to date for tlb_gen 3 is the partial flush we're
				229	* processing.
				230	*
				231	* As an example of why this check is needed, suppose that there
				232	* are two concurrent flushes. The first is a full flush that
				233	* changes context.tlb_gen from 1 to 2. The second is a partial
				234	* flush that changes context.tlb_gen from 2 to 3. If they get
				235	* processed on this CPU in reverse order, we'll see
				236	* local_tlb_gen == 1, mm_tlb_gen == 3, and end != TLB_FLUSH_ALL.
				237	* If we were to use __flush_tlb_single() and set local_tlb_gen to
				238	* 3, we'd be break the invariant: we'd update local_tlb_gen above
				239	* 1 without the full flush that's needed for tlb_gen 2.
				240	*
				241	* 2. f->new_tlb_gen == mm_tlb_gen. This is purely an optimiation.
				242	* Partial TLB flushes are not all that much cheaper than full TLB
				243	* flushes, so it seems unlikely that it would be a performance win
				244	* to do a partial flush if that won't bring our TLB fully up to
				245	* date. By doing a full flush instead, we can increase
				246	* local_tlb_gen all the way to mm_tlb_gen and we can probably
				247	* avoid another flush in the very near future.
				248	*/
				249	if (f->end != TLB_FLUSH_ALL &&
				250	f->new_tlb_gen == local_tlb_gen + 1 &&
				251	f->new_tlb_gen == mm_tlb_gen) {
				252	/* Partial flush */
Andy Lutomirski	b3b90e5	2017-05-22 15:30:02 -0700	[diff] [blame]	253	unsigned long addr;
Andy Lutomirski	be4ffc0	2017-05-28 10:00:16 -0700	[diff] [blame]	254	unsigned long nr_pages = (f->end - f->start) >> PAGE_SHIFT;
Andy Lutomirski	b0579ad	2017-06-29 08:53:16 -0700	[diff] [blame^]	255
Andy Lutomirski	a2055ab	2017-05-28 10:00:10 -0700	[diff] [blame]	256	addr = f->start;
				257	while (addr < f->end) {
Andy Lutomirski	b3b90e5	2017-05-22 15:30:02 -0700	[diff] [blame]	258	__flush_tlb_single(addr);
				259	addr += PAGE_SIZE;
				260	}
Andy Lutomirski	454bbad	2017-05-28 10:00:12 -0700	[diff] [blame]	261	if (local)
				262	count_vm_tlb_events(NR_TLB_LOCAL_FLUSH_ONE, nr_pages);
				263	trace_tlb_flush(reason, nr_pages);
Andy Lutomirski	b0579ad	2017-06-29 08:53:16 -0700	[diff] [blame^]	264	} else {
				265	/* Full flush. */
				266	local_flush_tlb();
				267	if (local)
				268	count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
				269	trace_tlb_flush(reason, TLB_FLUSH_ALL);
Andy Lutomirski	b3b90e5	2017-05-22 15:30:02 -0700	[diff] [blame]	270	}
Andy Lutomirski	b0579ad	2017-06-29 08:53:16 -0700	[diff] [blame^]	271
				272	/* Both paths above update our state to mm_tlb_gen. */
				273	this_cpu_write(cpu_tlbstate.ctxs[0].tlb_gen, mm_tlb_gen);
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	274	}
				275
Andy Lutomirski	454bbad	2017-05-28 10:00:12 -0700	[diff] [blame]	276	static void flush_tlb_func_local(void *info, enum tlb_flush_reason reason)
				277	{
				278	const struct flush_tlb_info *f = info;
				279
				280	flush_tlb_func_common(f, true, reason);
				281	}
				282
				283	static void flush_tlb_func_remote(void *info)
				284	{
				285	const struct flush_tlb_info *f = info;
				286
				287	inc_irq_stat(irq_tlb_count);
				288
Andy Lutomirski	3d28ebc	2017-05-28 10:00:15 -0700	[diff] [blame]	289	if (f->mm && f->mm != this_cpu_read(cpu_tlbstate.loaded_mm))
Andy Lutomirski	454bbad	2017-05-28 10:00:12 -0700	[diff] [blame]	290	return;
				291
				292	count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
				293	flush_tlb_func_common(f, false, TLB_REMOTE_SHOOTDOWN);
				294	}
				295
Rusty Russell	4595f96	2009-01-10 21:58:09 -0800	[diff] [blame]	296	void native_flush_tlb_others(const struct cpumask *cpumask,
Andy Lutomirski	a2055ab	2017-05-28 10:00:10 -0700	[diff] [blame]	297	const struct flush_tlb_info *info)
Rusty Russell	4595f96	2009-01-10 21:58:09 -0800	[diff] [blame]	298	{
Mel Gorman	ec65993	2014-01-21 14:33:16 -0800	[diff] [blame]	299	count_vm_tlb_event(NR_TLB_REMOTE_FLUSH);
Andy Lutomirski	a2055ab	2017-05-28 10:00:10 -0700	[diff] [blame]	300	if (info->end == TLB_FLUSH_ALL)
Nadav Amit	18c9824	2016-04-01 14:31:23 -0700	[diff] [blame]	301	trace_tlb_flush(TLB_REMOTE_SEND_IPI, TLB_FLUSH_ALL);
				302	else
				303	trace_tlb_flush(TLB_REMOTE_SEND_IPI,
Andy Lutomirski	a2055ab	2017-05-28 10:00:10 -0700	[diff] [blame]	304	(info->end - info->start) >> PAGE_SHIFT);
Nadav Amit	18c9824	2016-04-01 14:31:23 -0700	[diff] [blame]	305
Rusty Russell	4595f96	2009-01-10 21:58:09 -0800	[diff] [blame]	306	if (is_uv_system()) {
Tejun Heo	bdbcdd4	2009-01-21 17:26:06 +0900	[diff] [blame]	307	unsigned int cpu;
Rusty Russell	4595f96	2009-01-10 21:58:09 -0800	[diff] [blame]	308
Xiao Guangrong	25542c6	2011-03-15 09:57:37 +0800	[diff] [blame]	309	cpu = smp_processor_id();
Andy Lutomirski	a2055ab	2017-05-28 10:00:10 -0700	[diff] [blame]	310	cpumask = uv_flush_tlb_others(cpumask, info);
Tejun Heo	bdbcdd4	2009-01-21 17:26:06 +0900	[diff] [blame]	311	if (cpumask)
Andy Lutomirski	454bbad	2017-05-28 10:00:12 -0700	[diff] [blame]	312	smp_call_function_many(cpumask, flush_tlb_func_remote,
Andy Lutomirski	a2055ab	2017-05-28 10:00:10 -0700	[diff] [blame]	313	(void *)info, 1);
Mike Travis	0e21990	2009-01-10 21:58:10 -0800	[diff] [blame]	314	return;
Rusty Russell	4595f96	2009-01-10 21:58:09 -0800	[diff] [blame]	315	}
Andy Lutomirski	454bbad	2017-05-28 10:00:12 -0700	[diff] [blame]	316	smp_call_function_many(cpumask, flush_tlb_func_remote,
Andy Lutomirski	a2055ab	2017-05-28 10:00:10 -0700	[diff] [blame]	317	(void *)info, 1);
Rusty Russell	4595f96	2009-01-10 21:58:09 -0800	[diff] [blame]	318	}
				319
Dave Hansen	a510247	2014-07-31 08:41:03 -0700	[diff] [blame]	320	/*
				321	* See Documentation/x86/tlb.txt for details. We choose 33
				322	* because it is large enough to cover the vast majority (at
				323	* least 95%) of allocations, and is small enough that we are
				324	* confident it will not cause too much overhead. Each single
				325	* flush is about 100 ns, so this caps the maximum overhead at
				326	* _about_ 3,000 ns.
				327	*
				328	* This is in units of pages.
				329	*/
Jeremiah Mahler	8642685	2014-08-09 00:38:33 -0700	[diff] [blame]	330	static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33;
Dave Hansen	e9f4e0a	2014-07-31 08:40:55 -0700	[diff] [blame]	331
Alex Shi	611ae8e	2012-06-28 09:02:22 +0800	[diff] [blame]	332	void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
				333	unsigned long end, unsigned long vmflag)
				334	{
Andy Lutomirski	454bbad	2017-05-28 10:00:12 -0700	[diff] [blame]	335	int cpu;
Alex Shi	611ae8e	2012-06-28 09:02:22 +0800	[diff] [blame]	336
Andy Lutomirski	454bbad	2017-05-28 10:00:12 -0700	[diff] [blame]	337	struct flush_tlb_info info = {
				338	.mm = mm,
				339	};
Andy Lutomirski	ce27374	2017-04-22 00:01:21 -0700	[diff] [blame]	340
Andy Lutomirski	454bbad	2017-05-28 10:00:12 -0700	[diff] [blame]	341	cpu = get_cpu();
Andy Lutomirski	ce27374	2017-04-22 00:01:21 -0700	[diff] [blame]	342
Andy Lutomirski	f39681e	2017-06-29 08:53:15 -0700	[diff] [blame]	343	/* This is also a barrier that synchronizes with switch_mm(). */
Andy Lutomirski	b0579ad	2017-06-29 08:53:16 -0700	[diff] [blame^]	344	info.new_tlb_gen = inc_mm_tlb_gen(mm);
Andy Lutomirski	71b3c12	2016-01-06 12:21:01 -0800	[diff] [blame]	345
Andy Lutomirski	454bbad	2017-05-28 10:00:12 -0700	[diff] [blame]	346	/* Should we flush just the requested range? */
				347	if ((end != TLB_FLUSH_ALL) &&
				348	!(vmflag & VM_HUGETLB) &&
				349	((end - start) >> PAGE_SHIFT) <= tlb_single_page_flush_ceiling) {
Andy Lutomirski	a2055ab	2017-05-28 10:00:10 -0700	[diff] [blame]	350	info.start = start;
				351	info.end = end;
Andy Lutomirski	454bbad	2017-05-28 10:00:12 -0700	[diff] [blame]	352	} else {
				353	info.start = 0UL;
				354	info.end = TLB_FLUSH_ALL;
Dave Hansen	4995ab9	2014-07-31 08:40:54 -0700	[diff] [blame]	355	}
Andy Lutomirski	454bbad	2017-05-28 10:00:12 -0700	[diff] [blame]	356
Andy Lutomirski	bc0d5a8	2017-06-29 08:53:13 -0700	[diff] [blame]	357	if (mm == this_cpu_read(cpu_tlbstate.loaded_mm)) {
				358	VM_WARN_ON(irqs_disabled());
				359	local_irq_disable();
Andy Lutomirski	454bbad	2017-05-28 10:00:12 -0700	[diff] [blame]	360	flush_tlb_func_local(&info, TLB_LOCAL_MM_SHOOTDOWN);
Andy Lutomirski	bc0d5a8	2017-06-29 08:53:13 -0700	[diff] [blame]	361	local_irq_enable();
				362	}
				363
Andy Lutomirski	454bbad	2017-05-28 10:00:12 -0700	[diff] [blame]	364	if (cpumask_any_but(mm_cpumask(mm), cpu) < nr_cpu_ids)
Andy Lutomirski	a2055ab	2017-05-28 10:00:10 -0700	[diff] [blame]	365	flush_tlb_others(mm_cpumask(mm), &info);
Andy Lutomirski	454bbad	2017-05-28 10:00:12 -0700	[diff] [blame]	366	put_cpu();
Alex Shi	e7b52ff	2012-06-28 09:02:17 +0800	[diff] [blame]	367	}
				368
Andy Lutomirski	a2055ab	2017-05-28 10:00:10 -0700	[diff] [blame]	369
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	370	static void do_flush_tlb_all(void *info)
				371	{
Mel Gorman	ec65993	2014-01-21 14:33:16 -0800	[diff] [blame]	372	count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	373	__flush_tlb_all();
Alex Shi	c6ae41e	2012-05-11 15:35:27 +0800	[diff] [blame]	374	if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY)
Borislav Petkov	3f8afb7	2010-07-21 14:47:05 +0200	[diff] [blame]	375	leave_mm(smp_processor_id());
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	376	}
				377
				378	void flush_tlb_all(void)
				379	{
Mel Gorman	ec65993	2014-01-21 14:33:16 -0800	[diff] [blame]	380	count_vm_tlb_event(NR_TLB_REMOTE_FLUSH);
Jens Axboe	15c8b6c	2008-05-09 09:39:44 +0200	[diff] [blame]	381	on_each_cpu(do_flush_tlb_all, NULL, 1);
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	382	}
Alex Shi	3df3212	2012-06-28 09:02:20 +0800	[diff] [blame]	383
Alex Shi	effee4b	2012-06-28 09:02:24 +0800	[diff] [blame]	384	static void do_kernel_range_flush(void *info)
				385	{
				386	struct flush_tlb_info *f = info;
				387	unsigned long addr;
				388
				389	/* flush range by one by one 'invlpg' */
Andy Lutomirski	a2055ab	2017-05-28 10:00:10 -0700	[diff] [blame]	390	for (addr = f->start; addr < f->end; addr += PAGE_SIZE)
Alex Shi	effee4b	2012-06-28 09:02:24 +0800	[diff] [blame]	391	__flush_tlb_single(addr);
				392	}
				393
				394	void flush_tlb_kernel_range(unsigned long start, unsigned long end)
				395	{
Alex Shi	effee4b	2012-06-28 09:02:24 +0800	[diff] [blame]	396
				397	/* Balance as user space task's flush, a bit conservative */
Dave Hansen	e9f4e0a	2014-07-31 08:40:55 -0700	[diff] [blame]	398	if (end == TLB_FLUSH_ALL \|\|
Andy Lutomirski	be4ffc0	2017-05-28 10:00:16 -0700	[diff] [blame]	399	(end - start) > tlb_single_page_flush_ceiling << PAGE_SHIFT) {
Alex Shi	effee4b	2012-06-28 09:02:24 +0800	[diff] [blame]	400	on_each_cpu(do_flush_tlb_all, NULL, 1);
Dave Hansen	e9f4e0a	2014-07-31 08:40:55 -0700	[diff] [blame]	401	} else {
				402	struct flush_tlb_info info;
Andy Lutomirski	a2055ab	2017-05-28 10:00:10 -0700	[diff] [blame]	403	info.start = start;
				404	info.end = end;
Alex Shi	effee4b	2012-06-28 09:02:24 +0800	[diff] [blame]	405	on_each_cpu(do_kernel_range_flush, &info, 1);
				406	}
				407	}
Dave Hansen	2d040a1	2014-07-31 08:41:01 -0700	[diff] [blame]	408
Andy Lutomirski	e73ad5f	2017-05-22 15:30:03 -0700	[diff] [blame]	409	void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
				410	{
Andy Lutomirski	a2055ab	2017-05-28 10:00:10 -0700	[diff] [blame]	411	struct flush_tlb_info info = {
				412	.mm = NULL,
				413	.start = 0UL,
				414	.end = TLB_FLUSH_ALL,
				415	};
				416
Andy Lutomirski	e73ad5f	2017-05-22 15:30:03 -0700	[diff] [blame]	417	int cpu = get_cpu();
				418
Andy Lutomirski	bc0d5a8	2017-06-29 08:53:13 -0700	[diff] [blame]	419	if (cpumask_test_cpu(cpu, &batch->cpumask)) {
				420	VM_WARN_ON(irqs_disabled());
				421	local_irq_disable();
Andy Lutomirski	3f79e4c	2017-05-28 10:00:13 -0700	[diff] [blame]	422	flush_tlb_func_local(&info, TLB_LOCAL_SHOOTDOWN);
Andy Lutomirski	bc0d5a8	2017-06-29 08:53:13 -0700	[diff] [blame]	423	local_irq_enable();
				424	}
				425
Andy Lutomirski	e73ad5f	2017-05-22 15:30:03 -0700	[diff] [blame]	426	if (cpumask_any_but(&batch->cpumask, cpu) < nr_cpu_ids)
Andy Lutomirski	a2055ab	2017-05-28 10:00:10 -0700	[diff] [blame]	427	flush_tlb_others(&batch->cpumask, &info);
Andy Lutomirski	e73ad5f	2017-05-22 15:30:03 -0700	[diff] [blame]	428	cpumask_clear(&batch->cpumask);
				429
				430	put_cpu();
				431	}
				432
Dave Hansen	2d040a1	2014-07-31 08:41:01 -0700	[diff] [blame]	433	static ssize_t tlbflush_read_file(struct file file, char __user user_buf,
				434	size_t count, loff_t *ppos)
				435	{
				436	char buf[32];
				437	unsigned int len;
				438
				439	len = sprintf(buf, "%ld\n", tlb_single_page_flush_ceiling);
				440	return simple_read_from_buffer(user_buf, count, ppos, buf, len);
				441	}
				442
				443	static ssize_t tlbflush_write_file(struct file *file,
				444	const char __user user_buf, size_t count, loff_t ppos)
				445	{
				446	char buf[32];
				447	ssize_t len;
				448	int ceiling;
				449
				450	len = min(count, sizeof(buf) - 1);
				451	if (copy_from_user(buf, user_buf, len))
				452	return -EFAULT;
				453
				454	buf[len] = '\0';
				455	if (kstrtoint(buf, 0, &ceiling))
				456	return -EINVAL;
				457
				458	if (ceiling < 0)
				459	return -EINVAL;
				460
				461	tlb_single_page_flush_ceiling = ceiling;
				462	return count;
				463	}
				464
				465	static const struct file_operations fops_tlbflush = {
				466	.read = tlbflush_read_file,
				467	.write = tlbflush_write_file,
				468	.llseek = default_llseek,
				469	};
				470
				471	static int __init create_tlb_single_page_flush_ceiling(void)
				472	{
				473	debugfs_create_file("tlb_single_page_flush_ceiling", S_IRUSR \| S_IWUSR,
				474	arch_debugfs_dir, NULL, &fops_tlbflush);
				475	return 0;
				476	}
				477	late_initcall(create_tlb_single_page_flush_ceiling);