Blame - arch/x86/mm/tlb.c - SHIFTPHONES/kernel/common

blob: 3118392cdf756bfc913d7a4137d5f7e0d46b046d [file] [log] [blame]

Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	1	#include <linux/init.h>
				2
				3	#include <linux/mm.h>
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	4	#include <linux/spinlock.h>
				5	#include <linux/smp.h>
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	6	#include <linux/interrupt.h>
Paul Gortmaker	4b599fe	2016-07-13 20:18:55 -0400	[diff] [blame]	7	#include <linux/export.h>
Shaohua Li	9329672	2010-10-20 11:07:03 +0800	[diff] [blame]	8	#include <linux/cpu.h>
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	9
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	10	#include <asm/tlbflush.h>
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	11	#include <asm/mmu_context.h>
Jan Beulich	350f8f5	2009-11-13 11:54:40 +0000	[diff] [blame]	12	#include <asm/cache.h>
Tejun Heo	6dd01be	2009-01-21 17:26:06 +0900	[diff] [blame]	13	#include <asm/apic.h>
Tejun Heo	bdbcdd4	2009-01-21 17:26:06 +0900	[diff] [blame]	14	#include <asm/uv/uv.h>
Alex Shi	3df3212	2012-06-28 09:02:20 +0800	[diff] [blame]	15	#include <linux/debugfs.h>
Glauber Costa	5af5573	2008-03-25 13:28:56 -0300	[diff] [blame]	16
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	17	/*
Andy Lutomirski	ce4a4e56	2017-05-28 10:00:14 -0700	[diff] [blame]	18	* TLB flushing, formerly SMP-only
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	19	* c/o Linus Torvalds.
				20	*
				21	* These mean you can really definitely utterly forget about
				22	* writing to user space from interrupts. (Its not allowed anyway).
				23	*
				24	* Optimizations Manfred Spraul <manfred@colorfullife.com>
				25	*
				26	* More scalable flush, from Andi Kleen
				27	*
Alex Shi	52aec33	2012-06-28 09:02:23 +0800	[diff] [blame]	28	* Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	29	*/
				30
Andy Lutomirski	f39681e	2017-06-29 08:53:15 -0700	[diff] [blame]	31	atomic64_t last_mm_ctx_id = ATOMIC64_INIT(1);
				32
Andy Lutomirski	b956575	2017-10-09 09:50:49 -0700	[diff] [blame]	33
Andy Lutomirski	10af623	2017-07-24 21:41:38 -0700	[diff] [blame]	34	static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen,
				35	u16 new_asid, bool need_flush)
				36	{
				37	u16 asid;
				38
				39	if (!static_cpu_has(X86_FEATURE_PCID)) {
				40	*new_asid = 0;
				41	*need_flush = true;
				42	return;
				43	}
				44
				45	for (asid = 0; asid < TLB_NR_DYN_ASIDS; asid++) {
				46	if (this_cpu_read(cpu_tlbstate.ctxs[asid].ctx_id) !=
				47	next->context.ctx_id)
				48	continue;
				49
				50	*new_asid = asid;
				51	*need_flush = (this_cpu_read(cpu_tlbstate.ctxs[asid].tlb_gen) <
				52	next_tlb_gen);
				53	return;
				54	}
				55
				56	/*
				57	* We don't currently own an ASID slot on this CPU.
				58	* Allocate a slot.
				59	*/
				60	*new_asid = this_cpu_add_return(cpu_tlbstate.next_asid, 1) - 1;
				61	if (*new_asid >= TLB_NR_DYN_ASIDS) {
				62	*new_asid = 0;
				63	this_cpu_write(cpu_tlbstate.next_asid, 1);
				64	}
				65	*need_flush = true;
				66	}
				67
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	68	void leave_mm(int cpu)
				69	{
Andy Lutomirski	3d28ebc	2017-05-28 10:00:15 -0700	[diff] [blame]	70	struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
				71
				72	/*
				73	* It's plausible that we're in lazy TLB mode while our mm is init_mm.
				74	* If so, our callers still expect us to flush the TLB, but there
				75	* aren't any user TLB entries in init_mm to worry about.
				76	*
				77	* This needs to happen before any other sanity checks due to
				78	* intel_idle's shenanigans.
				79	*/
				80	if (loaded_mm == &init_mm)
				81	return;
				82
Andy Lutomirski	94b1b03	2017-06-29 08:53:17 -0700	[diff] [blame]	83	/* Warn if we're not lazy. */
Andy Lutomirski	b956575	2017-10-09 09:50:49 -0700	[diff] [blame]	84	WARN_ON(!this_cpu_read(cpu_tlbstate.is_lazy));
Andy Lutomirski	3d28ebc	2017-05-28 10:00:15 -0700	[diff] [blame]	85
				86	switch_mm(NULL, &init_mm, NULL);
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	87	}
Andy Lutomirski	6753573	2017-11-04 04:16:12 -0700	[diff] [blame^]	88	EXPORT_SYMBOL_GPL(leave_mm);
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	89
Andy Lutomirski	69c0319	2016-04-26 09:39:08 -0700	[diff] [blame]	90	void switch_mm(struct mm_struct prev, struct mm_struct next,
				91	struct task_struct *tsk)
				92	{
Andy Lutomirski	078194f	2016-04-26 09:39:09 -0700	[diff] [blame]	93	unsigned long flags;
				94
				95	local_irq_save(flags);
				96	switch_mm_irqs_off(prev, next, tsk);
				97	local_irq_restore(flags);
				98	}
				99
				100	void switch_mm_irqs_off(struct mm_struct prev, struct mm_struct next,
				101	struct task_struct *tsk)
				102	{
Andy Lutomirski	3d28ebc	2017-05-28 10:00:15 -0700	[diff] [blame]	103	struct mm_struct *real_prev = this_cpu_read(cpu_tlbstate.loaded_mm);
Andy Lutomirski	10af623	2017-07-24 21:41:38 -0700	[diff] [blame]	104	u16 prev_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
Andy Lutomirski	94b1b03	2017-06-29 08:53:17 -0700	[diff] [blame]	105	unsigned cpu = smp_processor_id();
				106	u64 next_tlb_gen;
Andy Lutomirski	69c0319	2016-04-26 09:39:08 -0700	[diff] [blame]	107
Andy Lutomirski	3d28ebc	2017-05-28 10:00:15 -0700	[diff] [blame]	108	/*
Andy Lutomirski	94b1b03	2017-06-29 08:53:17 -0700	[diff] [blame]	109	* NB: The scheduler will call us with prev == next when switching
				110	* from lazy TLB mode to normal mode if active_mm isn't changing.
				111	* When this happens, we don't assume that CR3 (and hence
				112	* cpu_tlbstate.loaded_mm) matches next.
Andy Lutomirski	3d28ebc	2017-05-28 10:00:15 -0700	[diff] [blame]	113	*
				114	* NB: leave_mm() calls us with prev == NULL and tsk == NULL.
				115	*/
Andy Lutomirski	e37e43a	2016-08-11 02:35:23 -0700	[diff] [blame]	116
Andy Lutomirski	94b1b03	2017-06-29 08:53:17 -0700	[diff] [blame]	117	/* We don't want flush_tlb_func_* to run concurrently with us. */
				118	if (IS_ENABLED(CONFIG_PROVE_LOCKING))
				119	WARN_ON_ONCE(!irqs_disabled());
				120
				121	/*
				122	* Verify that CR3 is what we think it is. This will catch
				123	* hypothetical buggy code that directly switches to swapper_pg_dir
Andy Lutomirski	10af623	2017-07-24 21:41:38 -0700	[diff] [blame]	124	* without going through leave_mm() / switch_mm_irqs_off() or that
				125	* does something like write_cr3(read_cr3_pa()).
Andy Lutomirski	a376e7f	2017-09-07 22:06:57 -0700	[diff] [blame]	126	*
				127	* Only do this check if CONFIG_DEBUG_VM=y because __read_cr3()
				128	* isn't free.
Andy Lutomirski	94b1b03	2017-06-29 08:53:17 -0700	[diff] [blame]	129	*/
Andy Lutomirski	a376e7f	2017-09-07 22:06:57 -0700	[diff] [blame]	130	#ifdef CONFIG_DEBUG_VM
Andy Lutomirski	47061a2	2017-09-17 09:03:48 -0700	[diff] [blame]	131	if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev, prev_asid))) {
Andy Lutomirski	a376e7f	2017-09-07 22:06:57 -0700	[diff] [blame]	132	/*
				133	* If we were to BUG here, we'd be very likely to kill
				134	* the system so hard that we don't see the call trace.
				135	* Try to recover instead by ignoring the error and doing
				136	* a global flush to minimize the chance of corruption.
				137	*
				138	* (This is far from being a fully correct recovery.
				139	* Architecturally, the CPU could prefetch something
				140	* back into an incorrect ASID slot and leave it there
				141	* to cause trouble down the road. It's better than
				142	* nothing, though.)
				143	*/
				144	__flush_tlb_all();
				145	}
				146	#endif
Andy Lutomirski	b956575	2017-10-09 09:50:49 -0700	[diff] [blame]	147	this_cpu_write(cpu_tlbstate.is_lazy, false);
Andy Lutomirski	e37e43a	2016-08-11 02:35:23 -0700	[diff] [blame]	148
Andy Lutomirski	3d28ebc	2017-05-28 10:00:15 -0700	[diff] [blame]	149	if (real_prev == next) {
Andy Lutomirski	e8b9b0c	2017-10-14 09:59:49 -0700	[diff] [blame]	150	VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) !=
				151	next->context.ctx_id);
Andy Lutomirski	94b1b03	2017-06-29 08:53:17 -0700	[diff] [blame]	152
Andy Lutomirski	69c0319	2016-04-26 09:39:08 -0700	[diff] [blame]	153	/*
Andy Lutomirski	b956575	2017-10-09 09:50:49 -0700	[diff] [blame]	154	* We don't currently support having a real mm loaded without
				155	* our cpu set in mm_cpumask(). We have all the bookkeeping
				156	* in place to figure out whether we would need to flush
				157	* if our cpu were cleared in mm_cpumask(), but we don't
				158	* currently use it.
Andy Lutomirski	69c0319	2016-04-26 09:39:08 -0700	[diff] [blame]	159	*/
Andy Lutomirski	b956575	2017-10-09 09:50:49 -0700	[diff] [blame]	160	if (WARN_ON_ONCE(real_prev != &init_mm &&
				161	!cpumask_test_cpu(cpu, mm_cpumask(next))))
				162	cpumask_set_cpu(cpu, mm_cpumask(next));
				163
				164	return;
Andy Lutomirski	94b1b03	2017-06-29 08:53:17 -0700	[diff] [blame]	165	} else {
Andy Lutomirski	10af623	2017-07-24 21:41:38 -0700	[diff] [blame]	166	u16 new_asid;
				167	bool need_flush;
Andy Lutomirski	94b1b03	2017-06-29 08:53:17 -0700	[diff] [blame]	168
				169	if (IS_ENABLED(CONFIG_VMAP_STACK)) {
				170	/*
				171	* If our current stack is in vmalloc space and isn't
				172	* mapped in the new pgd, we'll double-fault. Forcibly
				173	* map it.
				174	*/
Andrey Ryabinin	196bd48	2017-09-29 17:15:36 +0300	[diff] [blame]	175	unsigned int index = pgd_index(current_stack_pointer);
Andy Lutomirski	94b1b03	2017-06-29 08:53:17 -0700	[diff] [blame]	176	pgd_t *pgd = next->pgd + index;
				177
				178	if (unlikely(pgd_none(*pgd)))
				179	set_pgd(pgd, init_mm.pgd[index]);
				180	}
				181
				182	/* Stop remote flushes for the previous mm */
Andy Lutomirski	b956575	2017-10-09 09:50:49 -0700	[diff] [blame]	183	VM_WARN_ON_ONCE(!cpumask_test_cpu(cpu, mm_cpumask(real_prev)) &&
				184	real_prev != &init_mm);
				185	cpumask_clear_cpu(cpu, mm_cpumask(real_prev));
Andy Lutomirski	94b1b03	2017-06-29 08:53:17 -0700	[diff] [blame]	186
				187	/*
				188	* Start remote flushes and then read tlb_gen.
				189	*/
				190	cpumask_set_cpu(cpu, mm_cpumask(next));
				191	next_tlb_gen = atomic64_read(&next->context.tlb_gen);
				192
Andy Lutomirski	10af623	2017-07-24 21:41:38 -0700	[diff] [blame]	193	choose_new_asid(next, next_tlb_gen, &new_asid, &need_flush);
Andy Lutomirski	94b1b03	2017-06-29 08:53:17 -0700	[diff] [blame]	194
Andy Lutomirski	10af623	2017-07-24 21:41:38 -0700	[diff] [blame]	195	if (need_flush) {
				196	this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
				197	this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
Andy Lutomirski	47061a2	2017-09-17 09:03:48 -0700	[diff] [blame]	198	write_cr3(build_cr3(next, new_asid));
Andy Lutomirski	6753573	2017-11-04 04:16:12 -0700	[diff] [blame^]	199
				200	/*
				201	* NB: This gets called via leave_mm() in the idle path
				202	* where RCU functions differently. Tracing normally
				203	* uses RCU, so we need to use the _rcuidle variant.
				204	*
				205	* (There is no good reason for this. The idle code should
				206	* be rearranged to call this before rcu_idle_enter().)
				207	*/
				208	trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
Andy Lutomirski	10af623	2017-07-24 21:41:38 -0700	[diff] [blame]	209	} else {
				210	/* The new ASID is already up to date. */
Andy Lutomirski	47061a2	2017-09-17 09:03:48 -0700	[diff] [blame]	211	write_cr3(build_cr3_noflush(next, new_asid));
Andy Lutomirski	6753573	2017-11-04 04:16:12 -0700	[diff] [blame^]	212
				213	/* See above wrt _rcuidle. */
				214	trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0);
Andy Lutomirski	10af623	2017-07-24 21:41:38 -0700	[diff] [blame]	215	}
				216
				217	this_cpu_write(cpu_tlbstate.loaded_mm, next);
				218	this_cpu_write(cpu_tlbstate.loaded_mm_asid, new_asid);
Andy Lutomirski	3d28ebc	2017-05-28 10:00:15 -0700	[diff] [blame]	219	}
Andy Lutomirski	69c0319	2016-04-26 09:39:08 -0700	[diff] [blame]	220
Andy Lutomirski	3d28ebc	2017-05-28 10:00:15 -0700	[diff] [blame]	221	load_mm_cr4(next);
Andy Lutomirski	7353425	2017-06-20 22:22:08 -0700	[diff] [blame]	222	switch_ldt(real_prev, next);
Andy Lutomirski	69c0319	2016-04-26 09:39:08 -0700	[diff] [blame]	223	}
				224
Andy Lutomirski	b0579ad	2017-06-29 08:53:16 -0700	[diff] [blame]	225	/*
Andy Lutomirski	4e57b94	2017-10-14 09:59:50 -0700	[diff] [blame]	226	* Please ignore the name of this function. It should be called
				227	* switch_to_kernel_thread().
				228	*
Andy Lutomirski	b956575	2017-10-09 09:50:49 -0700	[diff] [blame]	229	* enter_lazy_tlb() is a hint from the scheduler that we are entering a
				230	* kernel thread or other context without an mm. Acceptable implementations
				231	* include doing nothing whatsoever, switching to init_mm, or various clever
				232	* lazy tricks to try to minimize TLB flushes.
				233	*
				234	* The scheduler reserves the right to call enter_lazy_tlb() several times
				235	* in a row. It will notify us that we're going back to a real mm by
				236	* calling switch_mm_irqs_off().
				237	*/
				238	void enter_lazy_tlb(struct mm_struct mm, struct task_struct tsk)
				239	{
				240	if (this_cpu_read(cpu_tlbstate.loaded_mm) == &init_mm)
				241	return;
				242
Andy Lutomirski	4e57b94	2017-10-14 09:59:50 -0700	[diff] [blame]	243	if (tlb_defer_switch_to_init_mm()) {
Andy Lutomirski	b956575	2017-10-09 09:50:49 -0700	[diff] [blame]	244	/*
				245	* There's a significant optimization that may be possible
				246	* here. We have accurate enough TLB flush tracking that we
				247	* don't need to maintain coherence of TLB per se when we're
				248	* lazy. We do, however, need to maintain coherence of
				249	* paging-structure caches. We could, in principle, leave our
				250	* old mm loaded and only switch to init_mm when
				251	* tlb_remove_page() happens.
				252	*/
				253	this_cpu_write(cpu_tlbstate.is_lazy, true);
				254	} else {
				255	switch_mm(NULL, &init_mm, NULL);
				256	}
				257	}
				258
				259	/*
Andy Lutomirski	72c0098	2017-09-06 19:54:53 -0700	[diff] [blame]	260	* Call this when reinitializing a CPU. It fixes the following potential
				261	* problems:
				262	*
				263	* - The ASID changed from what cpu_tlbstate thinks it is (most likely
				264	* because the CPU was taken down and came back up with CR3's PCID
				265	* bits clear. CPU hotplug can do this.
				266	*
				267	* - The TLB contains junk in slots corresponding to inactive ASIDs.
				268	*
				269	* - The CPU went so far out to lunch that it may have missed a TLB
				270	* flush.
				271	*/
				272	void initialize_tlbstate_and_flush(void)
				273	{
				274	int i;
				275	struct mm_struct *mm = this_cpu_read(cpu_tlbstate.loaded_mm);
				276	u64 tlb_gen = atomic64_read(&init_mm.context.tlb_gen);
				277	unsigned long cr3 = __read_cr3();
				278
				279	/* Assert that CR3 already references the right mm. */
				280	WARN_ON((cr3 & CR3_ADDR_MASK) != __pa(mm->pgd));
				281
				282	/*
				283	* Assert that CR4.PCIDE is set if needed. (CR4.PCIDE initialization
				284	* doesn't work like other CR4 bits because it can only be set from
				285	* long mode.)
				286	*/
Andy Lutomirski	7898f79	2017-09-10 08:52:58 -0700	[diff] [blame]	287	WARN_ON(boot_cpu_has(X86_FEATURE_PCID) &&
Andy Lutomirski	72c0098	2017-09-06 19:54:53 -0700	[diff] [blame]	288	!(cr4_read_shadow() & X86_CR4_PCIDE));
				289
				290	/* Force ASID 0 and force a TLB flush. */
Andy Lutomirski	47061a2	2017-09-17 09:03:48 -0700	[diff] [blame]	291	write_cr3(build_cr3(mm, 0));
Andy Lutomirski	72c0098	2017-09-06 19:54:53 -0700	[diff] [blame]	292
				293	/* Reinitialize tlbstate. */
				294	this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0);
				295	this_cpu_write(cpu_tlbstate.next_asid, 1);
				296	this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, mm->context.ctx_id);
				297	this_cpu_write(cpu_tlbstate.ctxs[0].tlb_gen, tlb_gen);
				298
				299	for (i = 1; i < TLB_NR_DYN_ASIDS; i++)
				300	this_cpu_write(cpu_tlbstate.ctxs[i].ctx_id, 0);
				301	}
				302
				303	/*
Andy Lutomirski	b0579ad	2017-06-29 08:53:16 -0700	[diff] [blame]	304	* flush_tlb_func_common()'s memory ordering requirement is that any
				305	* TLB fills that happen after we flush the TLB are ordered after we
				306	* read active_mm's tlb_gen. We don't need any explicit barriers
				307	* because all x86 flush operations are serializing and the
				308	* atomic64_read operation won't be reordered by the compiler.
				309	*/
Andy Lutomirski	454bbad	2017-05-28 10:00:12 -0700	[diff] [blame]	310	static void flush_tlb_func_common(const struct flush_tlb_info *f,
				311	bool local, enum tlb_flush_reason reason)
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	312	{
Andy Lutomirski	b0579ad	2017-06-29 08:53:16 -0700	[diff] [blame]	313	/*
				314	* We have three different tlb_gen values in here. They are:
				315	*
				316	* - mm_tlb_gen: the latest generation.
				317	* - local_tlb_gen: the generation that this CPU has already caught
				318	* up to.
				319	* - f->new_tlb_gen: the generation that the requester of the flush
				320	* wants us to catch up to.
				321	*/
				322	struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
Andy Lutomirski	10af623	2017-07-24 21:41:38 -0700	[diff] [blame]	323	u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
Andy Lutomirski	b0579ad	2017-06-29 08:53:16 -0700	[diff] [blame]	324	u64 mm_tlb_gen = atomic64_read(&loaded_mm->context.tlb_gen);
Andy Lutomirski	10af623	2017-07-24 21:41:38 -0700	[diff] [blame]	325	u64 local_tlb_gen = this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].tlb_gen);
Andy Lutomirski	b0579ad	2017-06-29 08:53:16 -0700	[diff] [blame]	326
Andy Lutomirski	bc0d5a8	2017-06-29 08:53:13 -0700	[diff] [blame]	327	/* This code cannot presently handle being reentered. */
				328	VM_WARN_ON(!irqs_disabled());
				329
Andy Lutomirski	b956575	2017-10-09 09:50:49 -0700	[diff] [blame]	330	if (unlikely(loaded_mm == &init_mm))
				331	return;
				332
Andy Lutomirski	10af623	2017-07-24 21:41:38 -0700	[diff] [blame]	333	VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].ctx_id) !=
Andy Lutomirski	b0579ad	2017-06-29 08:53:16 -0700	[diff] [blame]	334	loaded_mm->context.ctx_id);
				335
Andy Lutomirski	b956575	2017-10-09 09:50:49 -0700	[diff] [blame]	336	if (this_cpu_read(cpu_tlbstate.is_lazy)) {
Andy Lutomirski	b0579ad	2017-06-29 08:53:16 -0700	[diff] [blame]	337	/*
Andy Lutomirski	b956575	2017-10-09 09:50:49 -0700	[diff] [blame]	338	* We're in lazy mode. We need to at least flush our
				339	* paging-structure cache to avoid speculatively reading
				340	* garbage into our TLB. Since switching to init_mm is barely
				341	* slower than a minimal flush, just switch to init_mm.
Andy Lutomirski	b0579ad	2017-06-29 08:53:16 -0700	[diff] [blame]	342	*/
Andy Lutomirski	b956575	2017-10-09 09:50:49 -0700	[diff] [blame]	343	switch_mm_irqs_off(NULL, &init_mm, NULL);
Andy Lutomirski	b3b90e5	2017-05-22 15:30:02 -0700	[diff] [blame]	344	return;
				345	}
				346
Andy Lutomirski	b0579ad	2017-06-29 08:53:16 -0700	[diff] [blame]	347	if (unlikely(local_tlb_gen == mm_tlb_gen)) {
				348	/*
				349	* There's nothing to do: we're already up to date. This can
				350	* happen if two concurrent flushes happen -- the first flush to
				351	* be handled can catch us all the way up, leaving no work for
				352	* the second flush.
				353	*/
Andy Lutomirski	94b1b03	2017-06-29 08:53:17 -0700	[diff] [blame]	354	trace_tlb_flush(reason, 0);
Andy Lutomirski	b0579ad	2017-06-29 08:53:16 -0700	[diff] [blame]	355	return;
				356	}
				357
				358	WARN_ON_ONCE(local_tlb_gen > mm_tlb_gen);
				359	WARN_ON_ONCE(f->new_tlb_gen > mm_tlb_gen);
				360
				361	/*
				362	* If we get to this point, we know that our TLB is out of date.
				363	* This does not strictly imply that we need to flush (it's
				364	* possible that f->new_tlb_gen <= local_tlb_gen), but we're
				365	* going to need to flush in the very near future, so we might
				366	* as well get it over with.
				367	*
				368	* The only question is whether to do a full or partial flush.
				369	*
				370	* We do a partial flush if requested and two extra conditions
				371	* are met:
				372	*
				373	* 1. f->new_tlb_gen == local_tlb_gen + 1. We have an invariant that
				374	* we've always done all needed flushes to catch up to
				375	* local_tlb_gen. If, for example, local_tlb_gen == 2 and
				376	* f->new_tlb_gen == 3, then we know that the flush needed to bring
				377	* us up to date for tlb_gen 3 is the partial flush we're
				378	* processing.
				379	*
				380	* As an example of why this check is needed, suppose that there
				381	* are two concurrent flushes. The first is a full flush that
				382	* changes context.tlb_gen from 1 to 2. The second is a partial
				383	* flush that changes context.tlb_gen from 2 to 3. If they get
				384	* processed on this CPU in reverse order, we'll see
				385	* local_tlb_gen == 1, mm_tlb_gen == 3, and end != TLB_FLUSH_ALL.
				386	* If we were to use __flush_tlb_single() and set local_tlb_gen to
				387	* 3, we'd be break the invariant: we'd update local_tlb_gen above
				388	* 1 without the full flush that's needed for tlb_gen 2.
				389	*
				390	* 2. f->new_tlb_gen == mm_tlb_gen. This is purely an optimiation.
				391	* Partial TLB flushes are not all that much cheaper than full TLB
				392	* flushes, so it seems unlikely that it would be a performance win
				393	* to do a partial flush if that won't bring our TLB fully up to
				394	* date. By doing a full flush instead, we can increase
				395	* local_tlb_gen all the way to mm_tlb_gen and we can probably
				396	* avoid another flush in the very near future.
				397	*/
				398	if (f->end != TLB_FLUSH_ALL &&
				399	f->new_tlb_gen == local_tlb_gen + 1 &&
				400	f->new_tlb_gen == mm_tlb_gen) {
				401	/* Partial flush */
Andy Lutomirski	b3b90e5	2017-05-22 15:30:02 -0700	[diff] [blame]	402	unsigned long addr;
Andy Lutomirski	be4ffc0	2017-05-28 10:00:16 -0700	[diff] [blame]	403	unsigned long nr_pages = (f->end - f->start) >> PAGE_SHIFT;
Andy Lutomirski	b0579ad	2017-06-29 08:53:16 -0700	[diff] [blame]	404
Andy Lutomirski	a2055ab	2017-05-28 10:00:10 -0700	[diff] [blame]	405	addr = f->start;
				406	while (addr < f->end) {
Andy Lutomirski	b3b90e5	2017-05-22 15:30:02 -0700	[diff] [blame]	407	__flush_tlb_single(addr);
				408	addr += PAGE_SIZE;
				409	}
Andy Lutomirski	454bbad	2017-05-28 10:00:12 -0700	[diff] [blame]	410	if (local)
				411	count_vm_tlb_events(NR_TLB_LOCAL_FLUSH_ONE, nr_pages);
				412	trace_tlb_flush(reason, nr_pages);
Andy Lutomirski	b0579ad	2017-06-29 08:53:16 -0700	[diff] [blame]	413	} else {
				414	/* Full flush. */
				415	local_flush_tlb();
				416	if (local)
				417	count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
				418	trace_tlb_flush(reason, TLB_FLUSH_ALL);
Andy Lutomirski	b3b90e5	2017-05-22 15:30:02 -0700	[diff] [blame]	419	}
Andy Lutomirski	b0579ad	2017-06-29 08:53:16 -0700	[diff] [blame]	420
				421	/* Both paths above update our state to mm_tlb_gen. */
Andy Lutomirski	10af623	2017-07-24 21:41:38 -0700	[diff] [blame]	422	this_cpu_write(cpu_tlbstate.ctxs[loaded_mm_asid].tlb_gen, mm_tlb_gen);
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	423	}
				424
Andy Lutomirski	454bbad	2017-05-28 10:00:12 -0700	[diff] [blame]	425	static void flush_tlb_func_local(void *info, enum tlb_flush_reason reason)
				426	{
				427	const struct flush_tlb_info *f = info;
				428
				429	flush_tlb_func_common(f, true, reason);
				430	}
				431
				432	static void flush_tlb_func_remote(void *info)
				433	{
				434	const struct flush_tlb_info *f = info;
				435
				436	inc_irq_stat(irq_tlb_count);
				437
Andy Lutomirski	3d28ebc	2017-05-28 10:00:15 -0700	[diff] [blame]	438	if (f->mm && f->mm != this_cpu_read(cpu_tlbstate.loaded_mm))
Andy Lutomirski	454bbad	2017-05-28 10:00:12 -0700	[diff] [blame]	439	return;
				440
				441	count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
				442	flush_tlb_func_common(f, false, TLB_REMOTE_SHOOTDOWN);
				443	}
				444
Rusty Russell	4595f96	2009-01-10 21:58:09 -0800	[diff] [blame]	445	void native_flush_tlb_others(const struct cpumask *cpumask,
Andy Lutomirski	a2055ab	2017-05-28 10:00:10 -0700	[diff] [blame]	446	const struct flush_tlb_info *info)
Rusty Russell	4595f96	2009-01-10 21:58:09 -0800	[diff] [blame]	447	{
Mel Gorman	ec65993	2014-01-21 14:33:16 -0800	[diff] [blame]	448	count_vm_tlb_event(NR_TLB_REMOTE_FLUSH);
Andy Lutomirski	a2055ab	2017-05-28 10:00:10 -0700	[diff] [blame]	449	if (info->end == TLB_FLUSH_ALL)
Nadav Amit	18c9824	2016-04-01 14:31:23 -0700	[diff] [blame]	450	trace_tlb_flush(TLB_REMOTE_SEND_IPI, TLB_FLUSH_ALL);
				451	else
				452	trace_tlb_flush(TLB_REMOTE_SEND_IPI,
Andy Lutomirski	a2055ab	2017-05-28 10:00:10 -0700	[diff] [blame]	453	(info->end - info->start) >> PAGE_SHIFT);
Nadav Amit	18c9824	2016-04-01 14:31:23 -0700	[diff] [blame]	454
Rusty Russell	4595f96	2009-01-10 21:58:09 -0800	[diff] [blame]	455	if (is_uv_system()) {
Andy Lutomirski	94b1b03	2017-06-29 08:53:17 -0700	[diff] [blame]	456	/*
				457	* This whole special case is confused. UV has a "Broadcast
				458	* Assist Unit", which seems to be a fancy way to send IPIs.
				459	* Back when x86 used an explicit TLB flush IPI, UV was
				460	* optimized to use its own mechanism. These days, x86 uses
				461	* smp_call_function_many(), but UV still uses a manual IPI,
				462	* and that IPI's action is out of date -- it does a manual
				463	* flush instead of calling flush_tlb_func_remote(). This
				464	* means that the percpu tlb_gen variables won't be updated
				465	* and we'll do pointless flushes on future context switches.
				466	*
				467	* Rather than hooking native_flush_tlb_others() here, I think
				468	* that UV should be updated so that smp_call_function_many(),
				469	* etc, are optimal on UV.
				470	*/
Tejun Heo	bdbcdd4	2009-01-21 17:26:06 +0900	[diff] [blame]	471	unsigned int cpu;
Rusty Russell	4595f96	2009-01-10 21:58:09 -0800	[diff] [blame]	472
Xiao Guangrong	25542c6	2011-03-15 09:57:37 +0800	[diff] [blame]	473	cpu = smp_processor_id();
Andy Lutomirski	a2055ab	2017-05-28 10:00:10 -0700	[diff] [blame]	474	cpumask = uv_flush_tlb_others(cpumask, info);
Tejun Heo	bdbcdd4	2009-01-21 17:26:06 +0900	[diff] [blame]	475	if (cpumask)
Andy Lutomirski	454bbad	2017-05-28 10:00:12 -0700	[diff] [blame]	476	smp_call_function_many(cpumask, flush_tlb_func_remote,
Andy Lutomirski	a2055ab	2017-05-28 10:00:10 -0700	[diff] [blame]	477	(void *)info, 1);
Mike Travis	0e21990	2009-01-10 21:58:10 -0800	[diff] [blame]	478	return;
Rusty Russell	4595f96	2009-01-10 21:58:09 -0800	[diff] [blame]	479	}
Andy Lutomirski	454bbad	2017-05-28 10:00:12 -0700	[diff] [blame]	480	smp_call_function_many(cpumask, flush_tlb_func_remote,
Andy Lutomirski	a2055ab	2017-05-28 10:00:10 -0700	[diff] [blame]	481	(void *)info, 1);
Rusty Russell	4595f96	2009-01-10 21:58:09 -0800	[diff] [blame]	482	}
				483
Dave Hansen	a510247	2014-07-31 08:41:03 -0700	[diff] [blame]	484	/*
				485	* See Documentation/x86/tlb.txt for details. We choose 33
				486	* because it is large enough to cover the vast majority (at
				487	* least 95%) of allocations, and is small enough that we are
				488	* confident it will not cause too much overhead. Each single
				489	* flush is about 100 ns, so this caps the maximum overhead at
				490	* _about_ 3,000 ns.
				491	*
				492	* This is in units of pages.
				493	*/
Jeremiah Mahler	8642685	2014-08-09 00:38:33 -0700	[diff] [blame]	494	static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33;
Dave Hansen	e9f4e0a	2014-07-31 08:40:55 -0700	[diff] [blame]	495
Alex Shi	611ae8e	2012-06-28 09:02:22 +0800	[diff] [blame]	496	void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
				497	unsigned long end, unsigned long vmflag)
				498	{
Andy Lutomirski	454bbad	2017-05-28 10:00:12 -0700	[diff] [blame]	499	int cpu;
Alex Shi	611ae8e	2012-06-28 09:02:22 +0800	[diff] [blame]	500
Andy Lutomirski	454bbad	2017-05-28 10:00:12 -0700	[diff] [blame]	501	struct flush_tlb_info info = {
				502	.mm = mm,
				503	};
Andy Lutomirski	ce27374	2017-04-22 00:01:21 -0700	[diff] [blame]	504
Andy Lutomirski	454bbad	2017-05-28 10:00:12 -0700	[diff] [blame]	505	cpu = get_cpu();
Andy Lutomirski	ce27374	2017-04-22 00:01:21 -0700	[diff] [blame]	506
Andy Lutomirski	f39681e	2017-06-29 08:53:15 -0700	[diff] [blame]	507	/* This is also a barrier that synchronizes with switch_mm(). */
Andy Lutomirski	b0579ad	2017-06-29 08:53:16 -0700	[diff] [blame]	508	info.new_tlb_gen = inc_mm_tlb_gen(mm);
Andy Lutomirski	71b3c12	2016-01-06 12:21:01 -0800	[diff] [blame]	509
Andy Lutomirski	454bbad	2017-05-28 10:00:12 -0700	[diff] [blame]	510	/* Should we flush just the requested range? */
				511	if ((end != TLB_FLUSH_ALL) &&
				512	!(vmflag & VM_HUGETLB) &&
				513	((end - start) >> PAGE_SHIFT) <= tlb_single_page_flush_ceiling) {
Andy Lutomirski	a2055ab	2017-05-28 10:00:10 -0700	[diff] [blame]	514	info.start = start;
				515	info.end = end;
Andy Lutomirski	454bbad	2017-05-28 10:00:12 -0700	[diff] [blame]	516	} else {
				517	info.start = 0UL;
				518	info.end = TLB_FLUSH_ALL;
Dave Hansen	4995ab9	2014-07-31 08:40:54 -0700	[diff] [blame]	519	}
Andy Lutomirski	454bbad	2017-05-28 10:00:12 -0700	[diff] [blame]	520
Andy Lutomirski	bc0d5a8	2017-06-29 08:53:13 -0700	[diff] [blame]	521	if (mm == this_cpu_read(cpu_tlbstate.loaded_mm)) {
				522	VM_WARN_ON(irqs_disabled());
				523	local_irq_disable();
Andy Lutomirski	454bbad	2017-05-28 10:00:12 -0700	[diff] [blame]	524	flush_tlb_func_local(&info, TLB_LOCAL_MM_SHOOTDOWN);
Andy Lutomirski	bc0d5a8	2017-06-29 08:53:13 -0700	[diff] [blame]	525	local_irq_enable();
				526	}
				527
Andy Lutomirski	454bbad	2017-05-28 10:00:12 -0700	[diff] [blame]	528	if (cpumask_any_but(mm_cpumask(mm), cpu) < nr_cpu_ids)
Andy Lutomirski	a2055ab	2017-05-28 10:00:10 -0700	[diff] [blame]	529	flush_tlb_others(mm_cpumask(mm), &info);
Andy Lutomirski	94b1b03	2017-06-29 08:53:17 -0700	[diff] [blame]	530
Andy Lutomirski	454bbad	2017-05-28 10:00:12 -0700	[diff] [blame]	531	put_cpu();
Alex Shi	e7b52ff	2012-06-28 09:02:17 +0800	[diff] [blame]	532	}
				533
Andy Lutomirski	a2055ab	2017-05-28 10:00:10 -0700	[diff] [blame]	534
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	535	static void do_flush_tlb_all(void *info)
				536	{
Mel Gorman	ec65993	2014-01-21 14:33:16 -0800	[diff] [blame]	537	count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	538	__flush_tlb_all();
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	539	}
				540
				541	void flush_tlb_all(void)
				542	{
Mel Gorman	ec65993	2014-01-21 14:33:16 -0800	[diff] [blame]	543	count_vm_tlb_event(NR_TLB_REMOTE_FLUSH);
Jens Axboe	15c8b6c	2008-05-09 09:39:44 +0200	[diff] [blame]	544	on_each_cpu(do_flush_tlb_all, NULL, 1);
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	545	}
Alex Shi	3df3212	2012-06-28 09:02:20 +0800	[diff] [blame]	546
Alex Shi	effee4b	2012-06-28 09:02:24 +0800	[diff] [blame]	547	static void do_kernel_range_flush(void *info)
				548	{
				549	struct flush_tlb_info *f = info;
				550	unsigned long addr;
				551
				552	/* flush range by one by one 'invlpg' */
Andy Lutomirski	a2055ab	2017-05-28 10:00:10 -0700	[diff] [blame]	553	for (addr = f->start; addr < f->end; addr += PAGE_SIZE)
Alex Shi	effee4b	2012-06-28 09:02:24 +0800	[diff] [blame]	554	__flush_tlb_single(addr);
				555	}
				556
				557	void flush_tlb_kernel_range(unsigned long start, unsigned long end)
				558	{
Alex Shi	effee4b	2012-06-28 09:02:24 +0800	[diff] [blame]	559
				560	/* Balance as user space task's flush, a bit conservative */
Dave Hansen	e9f4e0a	2014-07-31 08:40:55 -0700	[diff] [blame]	561	if (end == TLB_FLUSH_ALL \|\|
Andy Lutomirski	be4ffc0	2017-05-28 10:00:16 -0700	[diff] [blame]	562	(end - start) > tlb_single_page_flush_ceiling << PAGE_SHIFT) {
Alex Shi	effee4b	2012-06-28 09:02:24 +0800	[diff] [blame]	563	on_each_cpu(do_flush_tlb_all, NULL, 1);
Dave Hansen	e9f4e0a	2014-07-31 08:40:55 -0700	[diff] [blame]	564	} else {
				565	struct flush_tlb_info info;
Andy Lutomirski	a2055ab	2017-05-28 10:00:10 -0700	[diff] [blame]	566	info.start = start;
				567	info.end = end;
Alex Shi	effee4b	2012-06-28 09:02:24 +0800	[diff] [blame]	568	on_each_cpu(do_kernel_range_flush, &info, 1);
				569	}
				570	}
Dave Hansen	2d040a1	2014-07-31 08:41:01 -0700	[diff] [blame]	571
Andy Lutomirski	e73ad5f	2017-05-22 15:30:03 -0700	[diff] [blame]	572	void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
				573	{
Andy Lutomirski	a2055ab	2017-05-28 10:00:10 -0700	[diff] [blame]	574	struct flush_tlb_info info = {
				575	.mm = NULL,
				576	.start = 0UL,
				577	.end = TLB_FLUSH_ALL,
				578	};
				579
Andy Lutomirski	e73ad5f	2017-05-22 15:30:03 -0700	[diff] [blame]	580	int cpu = get_cpu();
				581
Andy Lutomirski	bc0d5a8	2017-06-29 08:53:13 -0700	[diff] [blame]	582	if (cpumask_test_cpu(cpu, &batch->cpumask)) {
				583	VM_WARN_ON(irqs_disabled());
				584	local_irq_disable();
Andy Lutomirski	3f79e4c	2017-05-28 10:00:13 -0700	[diff] [blame]	585	flush_tlb_func_local(&info, TLB_LOCAL_SHOOTDOWN);
Andy Lutomirski	bc0d5a8	2017-06-29 08:53:13 -0700	[diff] [blame]	586	local_irq_enable();
				587	}
				588
Andy Lutomirski	e73ad5f	2017-05-22 15:30:03 -0700	[diff] [blame]	589	if (cpumask_any_but(&batch->cpumask, cpu) < nr_cpu_ids)
Andy Lutomirski	a2055ab	2017-05-28 10:00:10 -0700	[diff] [blame]	590	flush_tlb_others(&batch->cpumask, &info);
Andy Lutomirski	94b1b03	2017-06-29 08:53:17 -0700	[diff] [blame]	591
Andy Lutomirski	e73ad5f	2017-05-22 15:30:03 -0700	[diff] [blame]	592	cpumask_clear(&batch->cpumask);
				593
				594	put_cpu();
				595	}
				596
Dave Hansen	2d040a1	2014-07-31 08:41:01 -0700	[diff] [blame]	597	static ssize_t tlbflush_read_file(struct file file, char __user user_buf,
				598	size_t count, loff_t *ppos)
				599	{
				600	char buf[32];
				601	unsigned int len;
				602
				603	len = sprintf(buf, "%ld\n", tlb_single_page_flush_ceiling);
				604	return simple_read_from_buffer(user_buf, count, ppos, buf, len);
				605	}
				606
				607	static ssize_t tlbflush_write_file(struct file *file,
				608	const char __user user_buf, size_t count, loff_t ppos)
				609	{
				610	char buf[32];
				611	ssize_t len;
				612	int ceiling;
				613
				614	len = min(count, sizeof(buf) - 1);
				615	if (copy_from_user(buf, user_buf, len))
				616	return -EFAULT;
				617
				618	buf[len] = '\0';
				619	if (kstrtoint(buf, 0, &ceiling))
				620	return -EINVAL;
				621
				622	if (ceiling < 0)
				623	return -EINVAL;
				624
				625	tlb_single_page_flush_ceiling = ceiling;
				626	return count;
				627	}
				628
				629	static const struct file_operations fops_tlbflush = {
				630	.read = tlbflush_read_file,
				631	.write = tlbflush_write_file,
				632	.llseek = default_llseek,
				633	};
				634
				635	static int __init create_tlb_single_page_flush_ceiling(void)
				636	{
				637	debugfs_create_file("tlb_single_page_flush_ceiling", S_IRUSR \| S_IWUSR,
				638	arch_debugfs_dir, NULL, &fops_tlbflush);
				639	return 0;
				640	}
				641	late_initcall(create_tlb_single_page_flush_ceiling);