Blame - arch/x86/mm/tlb.c - SHIFTPHONES/kernel/shift/mainline

blob: 9517d1b2a2810817907640c6c2dde698c62b3e79 [file] [log] [blame]

Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	1	#include <linux/init.h>
				2
				3	#include <linux/mm.h>
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	4	#include <linux/spinlock.h>
				5	#include <linux/smp.h>
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	6	#include <linux/interrupt.h>
Paul Gortmaker	4b599fe	2016-07-13 20:18:55 -0400	[diff] [blame]	7	#include <linux/export.h>
Shaohua Li	9329672	2010-10-20 11:07:03 +0800	[diff] [blame]	8	#include <linux/cpu.h>
Tim Chen	18bf3c3	2018-01-29 22:04:47 +0000	[diff] [blame]	9	#include <linux/debugfs.h>
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	10
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	11	#include <asm/tlbflush.h>
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	12	#include <asm/mmu_context.h>
Tim Chen	18bf3c3	2018-01-29 22:04:47 +0000	[diff] [blame]	13	#include <asm/nospec-branch.h>
Jan Beulich	350f8f5	2009-11-13 11:54:40 +0000	[diff] [blame]	14	#include <asm/cache.h>
Tejun Heo	6dd01be	2009-01-21 17:26:06 +0900	[diff] [blame]	15	#include <asm/apic.h>
Tejun Heo	bdbcdd4	2009-01-21 17:26:06 +0900	[diff] [blame]	16	#include <asm/uv/uv.h>
Glauber Costa	5af5573	2008-03-25 13:28:56 -0300	[diff] [blame]	17
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	18	/*
Andy Lutomirski	ce4a4e56	2017-05-28 10:00:14 -0700	[diff] [blame]	19	* TLB flushing, formerly SMP-only
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	20	* c/o Linus Torvalds.
				21	*
				22	* These mean you can really definitely utterly forget about
				23	* writing to user space from interrupts. (Its not allowed anyway).
				24	*
				25	* Optimizations Manfred Spraul <manfred@colorfullife.com>
				26	*
				27	* More scalable flush, from Andi Kleen
				28	*
Alex Shi	52aec33	2012-06-28 09:02:23 +0800	[diff] [blame]	29	* Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	30	*/
				31
Dave Hansen	2ea907c	2017-12-04 15:07:57 +0100	[diff] [blame]	32	/*
				33	* We get here when we do something requiring a TLB invalidation
				34	* but could not go invalidate all of the contexts. We do the
				35	* necessary invalidation by clearing out the 'ctx_id' which
				36	* forces a TLB flush when the context is loaded.
				37	*/
zhong jiang	387048f	2018-07-21 15:55:32 +0800	[diff] [blame]	38	static void clear_asid_other(void)
Dave Hansen	2ea907c	2017-12-04 15:07:57 +0100	[diff] [blame]	39	{
				40	u16 asid;
				41
				42	/*
				43	* This is only expected to be set if we have disabled
				44	* kernel _PAGE_GLOBAL pages.
				45	*/
				46	if (!static_cpu_has(X86_FEATURE_PTI)) {
				47	WARN_ON_ONCE(1);
				48	return;
				49	}
				50
				51	for (asid = 0; asid < TLB_NR_DYN_ASIDS; asid++) {
				52	/* Do not need to flush the current asid */
				53	if (asid == this_cpu_read(cpu_tlbstate.loaded_mm_asid))
				54	continue;
				55	/*
				56	* Make sure the next time we go to switch to
				57	* this asid, we do a flush:
				58	*/
				59	this_cpu_write(cpu_tlbstate.ctxs[asid].ctx_id, 0);
				60	}
				61	this_cpu_write(cpu_tlbstate.invalidate_other, false);
				62	}
				63
Andy Lutomirski	f39681e	2017-06-29 08:53:15 -0700	[diff] [blame]	64	atomic64_t last_mm_ctx_id = ATOMIC64_INIT(1);
				65
Andy Lutomirski	b956575	2017-10-09 09:50:49 -0700	[diff] [blame]	66
Andy Lutomirski	10af623	2017-07-24 21:41:38 -0700	[diff] [blame]	67	static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen,
				68	u16 new_asid, bool need_flush)
				69	{
				70	u16 asid;
				71
				72	if (!static_cpu_has(X86_FEATURE_PCID)) {
				73	*new_asid = 0;
				74	*need_flush = true;
				75	return;
				76	}
				77
Dave Hansen	2ea907c	2017-12-04 15:07:57 +0100	[diff] [blame]	78	if (this_cpu_read(cpu_tlbstate.invalidate_other))
				79	clear_asid_other();
				80
Andy Lutomirski	10af623	2017-07-24 21:41:38 -0700	[diff] [blame]	81	for (asid = 0; asid < TLB_NR_DYN_ASIDS; asid++) {
				82	if (this_cpu_read(cpu_tlbstate.ctxs[asid].ctx_id) !=
				83	next->context.ctx_id)
				84	continue;
				85
				86	*new_asid = asid;
				87	*need_flush = (this_cpu_read(cpu_tlbstate.ctxs[asid].tlb_gen) <
				88	next_tlb_gen);
				89	return;
				90	}
				91
				92	/*
				93	* We don't currently own an ASID slot on this CPU.
				94	* Allocate a slot.
				95	*/
				96	*new_asid = this_cpu_add_return(cpu_tlbstate.next_asid, 1) - 1;
				97	if (*new_asid >= TLB_NR_DYN_ASIDS) {
				98	*new_asid = 0;
				99	this_cpu_write(cpu_tlbstate.next_asid, 1);
				100	}
				101	*need_flush = true;
				102	}
				103
Dave Hansen	48e1119	2017-12-04 15:07:58 +0100	[diff] [blame]	104	static void load_new_mm_cr3(pgd_t *pgdir, u16 new_asid, bool need_flush)
				105	{
				106	unsigned long new_mm_cr3;
				107
				108	if (need_flush) {
Peter Zijlstra	6fd166a	2017-12-04 15:07:59 +0100	[diff] [blame]	109	invalidate_user_asid(new_asid);
Dave Hansen	48e1119	2017-12-04 15:07:58 +0100	[diff] [blame]	110	new_mm_cr3 = build_cr3(pgdir, new_asid);
				111	} else {
				112	new_mm_cr3 = build_cr3_noflush(pgdir, new_asid);
				113	}
				114
				115	/*
				116	* Caution: many callers of this function expect
				117	* that load_cr3() is serializing and orders TLB
				118	* fills with respect to the mm_cpumask writes.
				119	*/
				120	write_cr3(new_mm_cr3);
				121	}
				122
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	123	void leave_mm(int cpu)
				124	{
Andy Lutomirski	3d28ebc	2017-05-28 10:00:15 -0700	[diff] [blame]	125	struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
				126
				127	/*
				128	* It's plausible that we're in lazy TLB mode while our mm is init_mm.
				129	* If so, our callers still expect us to flush the TLB, but there
				130	* aren't any user TLB entries in init_mm to worry about.
				131	*
				132	* This needs to happen before any other sanity checks due to
				133	* intel_idle's shenanigans.
				134	*/
				135	if (loaded_mm == &init_mm)
				136	return;
				137
Andy Lutomirski	94b1b03	2017-06-29 08:53:17 -0700	[diff] [blame]	138	/* Warn if we're not lazy. */
Andy Lutomirski	b956575	2017-10-09 09:50:49 -0700	[diff] [blame]	139	WARN_ON(!this_cpu_read(cpu_tlbstate.is_lazy));
Andy Lutomirski	3d28ebc	2017-05-28 10:00:15 -0700	[diff] [blame]	140
				141	switch_mm(NULL, &init_mm, NULL);
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	142	}
Andy Lutomirski	6753573	2017-11-04 04:16:12 -0700	[diff] [blame]	143	EXPORT_SYMBOL_GPL(leave_mm);
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	144
Andy Lutomirski	69c0319	2016-04-26 09:39:08 -0700	[diff] [blame]	145	void switch_mm(struct mm_struct prev, struct mm_struct next,
				146	struct task_struct *tsk)
				147	{
Andy Lutomirski	078194f	2016-04-26 09:39:09 -0700	[diff] [blame]	148	unsigned long flags;
				149
				150	local_irq_save(flags);
				151	switch_mm_irqs_off(prev, next, tsk);
				152	local_irq_restore(flags);
				153	}
				154
Andy Lutomirski	5beda7d	2018-01-25 13:12:14 -0800	[diff] [blame]	155	static void sync_current_stack_to_mm(struct mm_struct *mm)
				156	{
				157	unsigned long sp = current_stack_pointer;
				158	pgd_t *pgd = pgd_offset(mm, sp);
				159
Kirill A. Shutemov	ed7588d	2018-05-18 13:35:24 +0300	[diff] [blame]	160	if (pgtable_l5_enabled()) {
Andy Lutomirski	5beda7d	2018-01-25 13:12:14 -0800	[diff] [blame]	161	if (unlikely(pgd_none(*pgd))) {
				162	pgd_t *pgd_ref = pgd_offset_k(sp);
				163
				164	set_pgd(pgd, *pgd_ref);
				165	}
				166	} else {
				167	/*
				168	* "pgd" is faked. The top level entries are "p4d"s, so sync
				169	* the p4d. This compiles to approximately the same code as
				170	* the 5-level case.
				171	*/
				172	p4d_t *p4d = p4d_offset(pgd, sp);
				173
				174	if (unlikely(p4d_none(*p4d))) {
				175	pgd_t *pgd_ref = pgd_offset_k(sp);
				176	p4d_t *p4d_ref = p4d_offset(pgd_ref, sp);
				177
				178	set_p4d(p4d, *p4d_ref);
				179	}
				180	}
				181	}
				182
Andy Lutomirski	078194f	2016-04-26 09:39:09 -0700	[diff] [blame]	183	void switch_mm_irqs_off(struct mm_struct prev, struct mm_struct next,
				184	struct task_struct *tsk)
				185	{
Andy Lutomirski	3d28ebc	2017-05-28 10:00:15 -0700	[diff] [blame]	186	struct mm_struct *real_prev = this_cpu_read(cpu_tlbstate.loaded_mm);
Andy Lutomirski	10af623	2017-07-24 21:41:38 -0700	[diff] [blame]	187	u16 prev_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
Andy Lutomirski	94b1b03	2017-06-29 08:53:17 -0700	[diff] [blame]	188	unsigned cpu = smp_processor_id();
				189	u64 next_tlb_gen;
Andy Lutomirski	69c0319	2016-04-26 09:39:08 -0700	[diff] [blame]	190
Andy Lutomirski	3d28ebc	2017-05-28 10:00:15 -0700	[diff] [blame]	191	/*
Andy Lutomirski	94b1b03	2017-06-29 08:53:17 -0700	[diff] [blame]	192	* NB: The scheduler will call us with prev == next when switching
				193	* from lazy TLB mode to normal mode if active_mm isn't changing.
				194	* When this happens, we don't assume that CR3 (and hence
				195	* cpu_tlbstate.loaded_mm) matches next.
Andy Lutomirski	3d28ebc	2017-05-28 10:00:15 -0700	[diff] [blame]	196	*
				197	* NB: leave_mm() calls us with prev == NULL and tsk == NULL.
				198	*/
Andy Lutomirski	e37e43a	2016-08-11 02:35:23 -0700	[diff] [blame]	199
Andy Lutomirski	94b1b03	2017-06-29 08:53:17 -0700	[diff] [blame]	200	/* We don't want flush_tlb_func_* to run concurrently with us. */
				201	if (IS_ENABLED(CONFIG_PROVE_LOCKING))
				202	WARN_ON_ONCE(!irqs_disabled());
				203
				204	/*
				205	* Verify that CR3 is what we think it is. This will catch
				206	* hypothetical buggy code that directly switches to swapper_pg_dir
Andy Lutomirski	10af623	2017-07-24 21:41:38 -0700	[diff] [blame]	207	* without going through leave_mm() / switch_mm_irqs_off() or that
				208	* does something like write_cr3(read_cr3_pa()).
Andy Lutomirski	a376e7f	2017-09-07 22:06:57 -0700	[diff] [blame]	209	*
				210	* Only do this check if CONFIG_DEBUG_VM=y because __read_cr3()
				211	* isn't free.
Andy Lutomirski	94b1b03	2017-06-29 08:53:17 -0700	[diff] [blame]	212	*/
Andy Lutomirski	a376e7f	2017-09-07 22:06:57 -0700	[diff] [blame]	213	#ifdef CONFIG_DEBUG_VM
Dave Hansen	50fb83a6	2017-12-04 15:07:54 +0100	[diff] [blame]	214	if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev->pgd, prev_asid))) {
Andy Lutomirski	a376e7f	2017-09-07 22:06:57 -0700	[diff] [blame]	215	/*
				216	* If we were to BUG here, we'd be very likely to kill
				217	* the system so hard that we don't see the call trace.
				218	* Try to recover instead by ignoring the error and doing
				219	* a global flush to minimize the chance of corruption.
				220	*
				221	* (This is far from being a fully correct recovery.
				222	* Architecturally, the CPU could prefetch something
				223	* back into an incorrect ASID slot and leave it there
				224	* to cause trouble down the road. It's better than
				225	* nothing, though.)
				226	*/
				227	__flush_tlb_all();
				228	}
				229	#endif
Andy Lutomirski	b956575	2017-10-09 09:50:49 -0700	[diff] [blame]	230	this_cpu_write(cpu_tlbstate.is_lazy, false);
Andy Lutomirski	e37e43a	2016-08-11 02:35:23 -0700	[diff] [blame]	231
Mathieu Desnoyers	306e060	2018-01-29 15:20:12 -0500	[diff] [blame]	232	/*
Mathieu Desnoyers	10bcc80	2018-01-29 15:20:18 -0500	[diff] [blame]	233	* The membarrier system call requires a full memory barrier and
				234	* core serialization before returning to user-space, after
				235	* storing to rq->curr. Writing to CR3 provides that full
				236	* memory barrier and core serializing instruction.
Mathieu Desnoyers	306e060	2018-01-29 15:20:12 -0500	[diff] [blame]	237	*/
Andy Lutomirski	3d28ebc	2017-05-28 10:00:15 -0700	[diff] [blame]	238	if (real_prev == next) {
Andy Lutomirski	e8b9b0c	2017-10-14 09:59:49 -0700	[diff] [blame]	239	VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) !=
				240	next->context.ctx_id);
Andy Lutomirski	94b1b03	2017-06-29 08:53:17 -0700	[diff] [blame]	241
Andy Lutomirski	69c0319	2016-04-26 09:39:08 -0700	[diff] [blame]	242	/*
Peter Zijlstra	52a288c	2018-08-22 17:30:13 +0200	[diff] [blame]	243	* We don't currently support having a real mm loaded without
				244	* our cpu set in mm_cpumask(). We have all the bookkeeping
				245	* in place to figure out whether we would need to flush
				246	* if our cpu were cleared in mm_cpumask(), but we don't
				247	* currently use it.
Andy Lutomirski	69c0319	2016-04-26 09:39:08 -0700	[diff] [blame]	248	*/
Andy Lutomirski	b956575	2017-10-09 09:50:49 -0700	[diff] [blame]	249	if (WARN_ON_ONCE(real_prev != &init_mm &&
				250	!cpumask_test_cpu(cpu, mm_cpumask(next))))
				251	cpumask_set_cpu(cpu, mm_cpumask(next));
				252
Peter Zijlstra	52a288c	2018-08-22 17:30:13 +0200	[diff] [blame]	253	return;
Andy Lutomirski	94b1b03	2017-06-29 08:53:17 -0700	[diff] [blame]	254	} else {
Peter Zijlstra	52a288c	2018-08-22 17:30:13 +0200	[diff] [blame]	255	u16 new_asid;
				256	bool need_flush;
Tim Chen	18bf3c3	2018-01-29 22:04:47 +0000	[diff] [blame]	257	u64 last_ctx_id = this_cpu_read(cpu_tlbstate.last_ctx_id);
				258
				259	/*
				260	* Avoid user/user BTB poisoning by flushing the branch
				261	* predictor when switching between processes. This stops
				262	* one process from doing Spectre-v2 attacks on another.
				263	*
				264	* As an optimization, flush indirect branches only when
				265	* switching into processes that disable dumping. This
				266	* protects high value processes like gpg, without having
				267	* too high performance overhead. IBPB is expensive!
				268	*
				269	* This will not flush branches when switching into kernel
				270	* threads. It will also not flush if we switch to idle
				271	* thread and back to the same process. It will flush if we
				272	* switch to a different non-dumpable process.
				273	*/
				274	if (tsk && tsk->mm &&
				275	tsk->mm->context.ctx_id != last_ctx_id &&
				276	get_dumpable(tsk->mm) != SUID_DUMP_USER)
				277	indirect_branch_prediction_barrier();
Andy Lutomirski	94b1b03	2017-06-29 08:53:17 -0700	[diff] [blame]	278
				279	if (IS_ENABLED(CONFIG_VMAP_STACK)) {
				280	/*
				281	* If our current stack is in vmalloc space and isn't
				282	* mapped in the new pgd, we'll double-fault. Forcibly
				283	* map it.
				284	*/
Andy Lutomirski	5beda7d	2018-01-25 13:12:14 -0800	[diff] [blame]	285	sync_current_stack_to_mm(next);
Andy Lutomirski	94b1b03	2017-06-29 08:53:17 -0700	[diff] [blame]	286	}
				287
Rik van Riel	e9d8c61	2018-07-16 15:03:37 -0400	[diff] [blame]	288	/*
				289	* Stop remote flushes for the previous mm.
				290	* Skip kernel threads; we never send init_mm TLB flushing IPIs,
				291	* but the bitmap manipulation can cause cache line contention.
				292	*/
				293	if (real_prev != &init_mm) {
				294	VM_WARN_ON_ONCE(!cpumask_test_cpu(cpu,
				295	mm_cpumask(real_prev)));
				296	cpumask_clear_cpu(cpu, mm_cpumask(real_prev));
				297	}
Andy Lutomirski	94b1b03	2017-06-29 08:53:17 -0700	[diff] [blame]	298
				299	/*
				300	* Start remote flushes and then read tlb_gen.
				301	*/
Rik van Riel	e9d8c61	2018-07-16 15:03:37 -0400	[diff] [blame]	302	if (next != &init_mm)
				303	cpumask_set_cpu(cpu, mm_cpumask(next));
Andy Lutomirski	94b1b03	2017-06-29 08:53:17 -0700	[diff] [blame]	304	next_tlb_gen = atomic64_read(&next->context.tlb_gen);
				305
Andy Lutomirski	10af623	2017-07-24 21:41:38 -0700	[diff] [blame]	306	choose_new_asid(next, next_tlb_gen, &new_asid, &need_flush);
Andy Lutomirski	94b1b03	2017-06-29 08:53:17 -0700	[diff] [blame]	307
Peter Zijlstra	52a288c	2018-08-22 17:30:13 +0200	[diff] [blame]	308	if (need_flush) {
				309	this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
				310	this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
				311	load_new_mm_cr3(next->pgd, new_asid, true);
				312
				313	/*
				314	* NB: This gets called via leave_mm() in the idle path
				315	* where RCU functions differently. Tracing normally
				316	* uses RCU, so we need to use the _rcuidle variant.
				317	*
				318	* (There is no good reason for this. The idle code should
				319	* be rearranged to call this before rcu_idle_enter().)
				320	*/
				321	trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
				322	} else {
				323	/* The new ASID is already up to date. */
				324	load_new_mm_cr3(next->pgd, new_asid, false);
				325
				326	/* See above wrt _rcuidle. */
				327	trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0);
				328	}
Andy Lutomirski	10af623	2017-07-24 21:41:38 -0700	[diff] [blame]	329
Tim Chen	18bf3c3	2018-01-29 22:04:47 +0000	[diff] [blame]	330	/*
Peter Zijlstra	52a288c	2018-08-22 17:30:13 +0200	[diff] [blame]	331	* Record last user mm's context id, so we can avoid
				332	* flushing branch buffer with IBPB if we switch back
				333	* to the same user.
Tim Chen	18bf3c3	2018-01-29 22:04:47 +0000	[diff] [blame]	334	*/
Peter Zijlstra	52a288c	2018-08-22 17:30:13 +0200	[diff] [blame]	335	if (next != &init_mm)
				336	this_cpu_write(cpu_tlbstate.last_ctx_id, next->context.ctx_id);
Tim Chen	18bf3c3	2018-01-29 22:04:47 +0000	[diff] [blame]	337
Peter Zijlstra	52a288c	2018-08-22 17:30:13 +0200	[diff] [blame]	338	this_cpu_write(cpu_tlbstate.loaded_mm, next);
				339	this_cpu_write(cpu_tlbstate.loaded_mm_asid, new_asid);
Andy Lutomirski	3d28ebc	2017-05-28 10:00:15 -0700	[diff] [blame]	340	}
Andy Lutomirski	69c0319	2016-04-26 09:39:08 -0700	[diff] [blame]	341
Andy Lutomirski	3d28ebc	2017-05-28 10:00:15 -0700	[diff] [blame]	342	load_mm_cr4(next);
Andy Lutomirski	7353425	2017-06-20 22:22:08 -0700	[diff] [blame]	343	switch_ldt(real_prev, next);
Andy Lutomirski	69c0319	2016-04-26 09:39:08 -0700	[diff] [blame]	344	}
				345
Andy Lutomirski	b0579ad	2017-06-29 08:53:16 -0700	[diff] [blame]	346	/*
Andy Lutomirski	4e57b94	2017-10-14 09:59:50 -0700	[diff] [blame]	347	* Please ignore the name of this function. It should be called
				348	* switch_to_kernel_thread().
				349	*
Andy Lutomirski	b956575	2017-10-09 09:50:49 -0700	[diff] [blame]	350	* enter_lazy_tlb() is a hint from the scheduler that we are entering a
				351	* kernel thread or other context without an mm. Acceptable implementations
				352	* include doing nothing whatsoever, switching to init_mm, or various clever
				353	* lazy tricks to try to minimize TLB flushes.
				354	*
				355	* The scheduler reserves the right to call enter_lazy_tlb() several times
				356	* in a row. It will notify us that we're going back to a real mm by
				357	* calling switch_mm_irqs_off().
				358	*/
				359	void enter_lazy_tlb(struct mm_struct mm, struct task_struct tsk)
				360	{
				361	if (this_cpu_read(cpu_tlbstate.loaded_mm) == &init_mm)
				362	return;
				363
Peter Zijlstra	52a288c	2018-08-22 17:30:13 +0200	[diff] [blame]	364	if (tlb_defer_switch_to_init_mm()) {
				365	/*
				366	* There's a significant optimization that may be possible
				367	* here. We have accurate enough TLB flush tracking that we
				368	* don't need to maintain coherence of TLB per se when we're
				369	* lazy. We do, however, need to maintain coherence of
				370	* paging-structure caches. We could, in principle, leave our
				371	* old mm loaded and only switch to init_mm when
				372	* tlb_remove_page() happens.
				373	*/
				374	this_cpu_write(cpu_tlbstate.is_lazy, true);
				375	} else {
				376	switch_mm(NULL, &init_mm, NULL);
				377	}
Andy Lutomirski	b956575	2017-10-09 09:50:49 -0700	[diff] [blame]	378	}
				379
				380	/*
Andy Lutomirski	72c0098	2017-09-06 19:54:53 -0700	[diff] [blame]	381	* Call this when reinitializing a CPU. It fixes the following potential
				382	* problems:
				383	*
				384	* - The ASID changed from what cpu_tlbstate thinks it is (most likely
				385	* because the CPU was taken down and came back up with CR3's PCID
				386	* bits clear. CPU hotplug can do this.
				387	*
				388	* - The TLB contains junk in slots corresponding to inactive ASIDs.
				389	*
				390	* - The CPU went so far out to lunch that it may have missed a TLB
				391	* flush.
				392	*/
				393	void initialize_tlbstate_and_flush(void)
				394	{
				395	int i;
				396	struct mm_struct *mm = this_cpu_read(cpu_tlbstate.loaded_mm);
				397	u64 tlb_gen = atomic64_read(&init_mm.context.tlb_gen);
				398	unsigned long cr3 = __read_cr3();
				399
				400	/* Assert that CR3 already references the right mm. */
				401	WARN_ON((cr3 & CR3_ADDR_MASK) != __pa(mm->pgd));
				402
				403	/*
				404	* Assert that CR4.PCIDE is set if needed. (CR4.PCIDE initialization
				405	* doesn't work like other CR4 bits because it can only be set from
				406	* long mode.)
				407	*/
Andy Lutomirski	7898f79	2017-09-10 08:52:58 -0700	[diff] [blame]	408	WARN_ON(boot_cpu_has(X86_FEATURE_PCID) &&
Andy Lutomirski	72c0098	2017-09-06 19:54:53 -0700	[diff] [blame]	409	!(cr4_read_shadow() & X86_CR4_PCIDE));
				410
				411	/* Force ASID 0 and force a TLB flush. */
Dave Hansen	50fb83a6	2017-12-04 15:07:54 +0100	[diff] [blame]	412	write_cr3(build_cr3(mm->pgd, 0));
Andy Lutomirski	72c0098	2017-09-06 19:54:53 -0700	[diff] [blame]	413
				414	/* Reinitialize tlbstate. */
Tim Chen	18bf3c3	2018-01-29 22:04:47 +0000	[diff] [blame]	415	this_cpu_write(cpu_tlbstate.last_ctx_id, mm->context.ctx_id);
Andy Lutomirski	72c0098	2017-09-06 19:54:53 -0700	[diff] [blame]	416	this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0);
				417	this_cpu_write(cpu_tlbstate.next_asid, 1);
				418	this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, mm->context.ctx_id);
				419	this_cpu_write(cpu_tlbstate.ctxs[0].tlb_gen, tlb_gen);
				420
				421	for (i = 1; i < TLB_NR_DYN_ASIDS; i++)
				422	this_cpu_write(cpu_tlbstate.ctxs[i].ctx_id, 0);
				423	}
				424
				425	/*
Andy Lutomirski	b0579ad	2017-06-29 08:53:16 -0700	[diff] [blame]	426	* flush_tlb_func_common()'s memory ordering requirement is that any
				427	* TLB fills that happen after we flush the TLB are ordered after we
				428	* read active_mm's tlb_gen. We don't need any explicit barriers
				429	* because all x86 flush operations are serializing and the
				430	* atomic64_read operation won't be reordered by the compiler.
				431	*/
Andy Lutomirski	454bbad	2017-05-28 10:00:12 -0700	[diff] [blame]	432	static void flush_tlb_func_common(const struct flush_tlb_info *f,
				433	bool local, enum tlb_flush_reason reason)
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	434	{
Andy Lutomirski	b0579ad	2017-06-29 08:53:16 -0700	[diff] [blame]	435	/*
				436	* We have three different tlb_gen values in here. They are:
				437	*
				438	* - mm_tlb_gen: the latest generation.
				439	* - local_tlb_gen: the generation that this CPU has already caught
				440	* up to.
				441	* - f->new_tlb_gen: the generation that the requester of the flush
				442	* wants us to catch up to.
				443	*/
				444	struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
Andy Lutomirski	10af623	2017-07-24 21:41:38 -0700	[diff] [blame]	445	u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
Andy Lutomirski	b0579ad	2017-06-29 08:53:16 -0700	[diff] [blame]	446	u64 mm_tlb_gen = atomic64_read(&loaded_mm->context.tlb_gen);
Andy Lutomirski	10af623	2017-07-24 21:41:38 -0700	[diff] [blame]	447	u64 local_tlb_gen = this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].tlb_gen);
Andy Lutomirski	b0579ad	2017-06-29 08:53:16 -0700	[diff] [blame]	448
Andy Lutomirski	bc0d5a8	2017-06-29 08:53:13 -0700	[diff] [blame]	449	/* This code cannot presently handle being reentered. */
				450	VM_WARN_ON(!irqs_disabled());
				451
Andy Lutomirski	b956575	2017-10-09 09:50:49 -0700	[diff] [blame]	452	if (unlikely(loaded_mm == &init_mm))
				453	return;
				454
Andy Lutomirski	10af623	2017-07-24 21:41:38 -0700	[diff] [blame]	455	VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].ctx_id) !=
Andy Lutomirski	b0579ad	2017-06-29 08:53:16 -0700	[diff] [blame]	456	loaded_mm->context.ctx_id);
				457
Andy Lutomirski	b956575	2017-10-09 09:50:49 -0700	[diff] [blame]	458	if (this_cpu_read(cpu_tlbstate.is_lazy)) {
Andy Lutomirski	b0579ad	2017-06-29 08:53:16 -0700	[diff] [blame]	459	/*
Andy Lutomirski	b956575	2017-10-09 09:50:49 -0700	[diff] [blame]	460	* We're in lazy mode. We need to at least flush our
				461	* paging-structure cache to avoid speculatively reading
				462	* garbage into our TLB. Since switching to init_mm is barely
				463	* slower than a minimal flush, just switch to init_mm.
Andy Lutomirski	b0579ad	2017-06-29 08:53:16 -0700	[diff] [blame]	464	*/
Andy Lutomirski	b956575	2017-10-09 09:50:49 -0700	[diff] [blame]	465	switch_mm_irqs_off(NULL, &init_mm, NULL);
Andy Lutomirski	b3b90e5	2017-05-22 15:30:02 -0700	[diff] [blame]	466	return;
				467	}
				468
Andy Lutomirski	b0579ad	2017-06-29 08:53:16 -0700	[diff] [blame]	469	if (unlikely(local_tlb_gen == mm_tlb_gen)) {
				470	/*
				471	* There's nothing to do: we're already up to date. This can
				472	* happen if two concurrent flushes happen -- the first flush to
				473	* be handled can catch us all the way up, leaving no work for
				474	* the second flush.
				475	*/
Andy Lutomirski	94b1b03	2017-06-29 08:53:17 -0700	[diff] [blame]	476	trace_tlb_flush(reason, 0);
Andy Lutomirski	b0579ad	2017-06-29 08:53:16 -0700	[diff] [blame]	477	return;
				478	}
				479
				480	WARN_ON_ONCE(local_tlb_gen > mm_tlb_gen);
				481	WARN_ON_ONCE(f->new_tlb_gen > mm_tlb_gen);
				482
				483	/*
				484	* If we get to this point, we know that our TLB is out of date.
				485	* This does not strictly imply that we need to flush (it's
				486	* possible that f->new_tlb_gen <= local_tlb_gen), but we're
				487	* going to need to flush in the very near future, so we might
				488	* as well get it over with.
				489	*
				490	* The only question is whether to do a full or partial flush.
				491	*
				492	* We do a partial flush if requested and two extra conditions
				493	* are met:
				494	*
				495	* 1. f->new_tlb_gen == local_tlb_gen + 1. We have an invariant that
				496	* we've always done all needed flushes to catch up to
				497	* local_tlb_gen. If, for example, local_tlb_gen == 2 and
				498	* f->new_tlb_gen == 3, then we know that the flush needed to bring
				499	* us up to date for tlb_gen 3 is the partial flush we're
				500	* processing.
				501	*
				502	* As an example of why this check is needed, suppose that there
				503	* are two concurrent flushes. The first is a full flush that
				504	* changes context.tlb_gen from 1 to 2. The second is a partial
				505	* flush that changes context.tlb_gen from 2 to 3. If they get
				506	* processed on this CPU in reverse order, we'll see
				507	* local_tlb_gen == 1, mm_tlb_gen == 3, and end != TLB_FLUSH_ALL.
Andy Lutomirski	1299ef1	2018-01-31 08:03:10 -0800	[diff] [blame]	508	* If we were to use __flush_tlb_one_user() and set local_tlb_gen to
Andy Lutomirski	b0579ad	2017-06-29 08:53:16 -0700	[diff] [blame]	509	* 3, we'd be break the invariant: we'd update local_tlb_gen above
				510	* 1 without the full flush that's needed for tlb_gen 2.
				511	*
				512	* 2. f->new_tlb_gen == mm_tlb_gen. This is purely an optimiation.
				513	* Partial TLB flushes are not all that much cheaper than full TLB
				514	* flushes, so it seems unlikely that it would be a performance win
				515	* to do a partial flush if that won't bring our TLB fully up to
				516	* date. By doing a full flush instead, we can increase
				517	* local_tlb_gen all the way to mm_tlb_gen and we can probably
				518	* avoid another flush in the very near future.
				519	*/
				520	if (f->end != TLB_FLUSH_ALL &&
				521	f->new_tlb_gen == local_tlb_gen + 1 &&
				522	f->new_tlb_gen == mm_tlb_gen) {
				523	/* Partial flush */
Andy Lutomirski	b3b90e5	2017-05-22 15:30:02 -0700	[diff] [blame]	524	unsigned long addr;
Andy Lutomirski	be4ffc0	2017-05-28 10:00:16 -0700	[diff] [blame]	525	unsigned long nr_pages = (f->end - f->start) >> PAGE_SHIFT;
Andy Lutomirski	b0579ad	2017-06-29 08:53:16 -0700	[diff] [blame]	526
Andy Lutomirski	a2055ab	2017-05-28 10:00:10 -0700	[diff] [blame]	527	addr = f->start;
				528	while (addr < f->end) {
Andy Lutomirski	1299ef1	2018-01-31 08:03:10 -0800	[diff] [blame]	529	__flush_tlb_one_user(addr);
Andy Lutomirski	b3b90e5	2017-05-22 15:30:02 -0700	[diff] [blame]	530	addr += PAGE_SIZE;
				531	}
Andy Lutomirski	454bbad	2017-05-28 10:00:12 -0700	[diff] [blame]	532	if (local)
				533	count_vm_tlb_events(NR_TLB_LOCAL_FLUSH_ONE, nr_pages);
				534	trace_tlb_flush(reason, nr_pages);
Andy Lutomirski	b0579ad	2017-06-29 08:53:16 -0700	[diff] [blame]	535	} else {
				536	/* Full flush. */
				537	local_flush_tlb();
				538	if (local)
				539	count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
				540	trace_tlb_flush(reason, TLB_FLUSH_ALL);
Andy Lutomirski	b3b90e5	2017-05-22 15:30:02 -0700	[diff] [blame]	541	}
Andy Lutomirski	b0579ad	2017-06-29 08:53:16 -0700	[diff] [blame]	542
				543	/* Both paths above update our state to mm_tlb_gen. */
Andy Lutomirski	10af623	2017-07-24 21:41:38 -0700	[diff] [blame]	544	this_cpu_write(cpu_tlbstate.ctxs[loaded_mm_asid].tlb_gen, mm_tlb_gen);
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	545	}
				546
Andy Lutomirski	454bbad	2017-05-28 10:00:12 -0700	[diff] [blame]	547	static void flush_tlb_func_local(void *info, enum tlb_flush_reason reason)
				548	{
				549	const struct flush_tlb_info *f = info;
				550
				551	flush_tlb_func_common(f, true, reason);
				552	}
				553
				554	static void flush_tlb_func_remote(void *info)
				555	{
				556	const struct flush_tlb_info *f = info;
				557
				558	inc_irq_stat(irq_tlb_count);
				559
Andy Lutomirski	3d28ebc	2017-05-28 10:00:15 -0700	[diff] [blame]	560	if (f->mm && f->mm != this_cpu_read(cpu_tlbstate.loaded_mm))
Andy Lutomirski	454bbad	2017-05-28 10:00:12 -0700	[diff] [blame]	561	return;
				562
				563	count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
				564	flush_tlb_func_common(f, false, TLB_REMOTE_SHOOTDOWN);
				565	}
				566
Rusty Russell	4595f96	2009-01-10 21:58:09 -0800	[diff] [blame]	567	void native_flush_tlb_others(const struct cpumask *cpumask,
Andy Lutomirski	a2055ab	2017-05-28 10:00:10 -0700	[diff] [blame]	568	const struct flush_tlb_info *info)
Rusty Russell	4595f96	2009-01-10 21:58:09 -0800	[diff] [blame]	569	{
Mel Gorman	ec65993	2014-01-21 14:33:16 -0800	[diff] [blame]	570	count_vm_tlb_event(NR_TLB_REMOTE_FLUSH);
Andy Lutomirski	a2055ab	2017-05-28 10:00:10 -0700	[diff] [blame]	571	if (info->end == TLB_FLUSH_ALL)
Nadav Amit	18c9824	2016-04-01 14:31:23 -0700	[diff] [blame]	572	trace_tlb_flush(TLB_REMOTE_SEND_IPI, TLB_FLUSH_ALL);
				573	else
				574	trace_tlb_flush(TLB_REMOTE_SEND_IPI,
Andy Lutomirski	a2055ab	2017-05-28 10:00:10 -0700	[diff] [blame]	575	(info->end - info->start) >> PAGE_SHIFT);
Nadav Amit	18c9824	2016-04-01 14:31:23 -0700	[diff] [blame]	576
Rusty Russell	4595f96	2009-01-10 21:58:09 -0800	[diff] [blame]	577	if (is_uv_system()) {
Andy Lutomirski	94b1b03	2017-06-29 08:53:17 -0700	[diff] [blame]	578	/*
				579	* This whole special case is confused. UV has a "Broadcast
				580	* Assist Unit", which seems to be a fancy way to send IPIs.
				581	* Back when x86 used an explicit TLB flush IPI, UV was
				582	* optimized to use its own mechanism. These days, x86 uses
				583	* smp_call_function_many(), but UV still uses a manual IPI,
				584	* and that IPI's action is out of date -- it does a manual
				585	* flush instead of calling flush_tlb_func_remote(). This
				586	* means that the percpu tlb_gen variables won't be updated
				587	* and we'll do pointless flushes on future context switches.
				588	*
				589	* Rather than hooking native_flush_tlb_others() here, I think
				590	* that UV should be updated so that smp_call_function_many(),
				591	* etc, are optimal on UV.
				592	*/
Peter Zijlstra	52a288c	2018-08-22 17:30:13 +0200	[diff] [blame]	593	unsigned int cpu;
				594
Xiao Guangrong	25542c6	2011-03-15 09:57:37 +0800	[diff] [blame]	595	cpu = smp_processor_id();
Andy Lutomirski	a2055ab	2017-05-28 10:00:10 -0700	[diff] [blame]	596	cpumask = uv_flush_tlb_others(cpumask, info);
Tejun Heo	bdbcdd4	2009-01-21 17:26:06 +0900	[diff] [blame]	597	if (cpumask)
Andy Lutomirski	454bbad	2017-05-28 10:00:12 -0700	[diff] [blame]	598	smp_call_function_many(cpumask, flush_tlb_func_remote,
Andy Lutomirski	a2055ab	2017-05-28 10:00:10 -0700	[diff] [blame]	599	(void *)info, 1);
Mike Travis	0e21990	2009-01-10 21:58:10 -0800	[diff] [blame]	600	return;
Rusty Russell	4595f96	2009-01-10 21:58:09 -0800	[diff] [blame]	601	}
Peter Zijlstra	52a288c	2018-08-22 17:30:13 +0200	[diff] [blame]	602	smp_call_function_many(cpumask, flush_tlb_func_remote,
Andy Lutomirski	a2055ab	2017-05-28 10:00:10 -0700	[diff] [blame]	603	(void *)info, 1);
Rusty Russell	4595f96	2009-01-10 21:58:09 -0800	[diff] [blame]	604	}
				605
Dave Hansen	a510247	2014-07-31 08:41:03 -0700	[diff] [blame]	606	/*
				607	* See Documentation/x86/tlb.txt for details. We choose 33
				608	* because it is large enough to cover the vast majority (at
				609	* least 95%) of allocations, and is small enough that we are
				610	* confident it will not cause too much overhead. Each single
				611	* flush is about 100 ns, so this caps the maximum overhead at
				612	* _about_ 3,000 ns.
				613	*
				614	* This is in units of pages.
				615	*/
Jeremiah Mahler	8642685	2014-08-09 00:38:33 -0700	[diff] [blame]	616	static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33;
Dave Hansen	e9f4e0a	2014-07-31 08:40:55 -0700	[diff] [blame]	617
Alex Shi	611ae8e	2012-06-28 09:02:22 +0800	[diff] [blame]	618	void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
				619	unsigned long end, unsigned long vmflag)
				620	{
Andy Lutomirski	454bbad	2017-05-28 10:00:12 -0700	[diff] [blame]	621	int cpu;
Alex Shi	611ae8e	2012-06-28 09:02:22 +0800	[diff] [blame]	622
Nadav Amit	515ab7c	2018-01-31 13:19:12 -0800	[diff] [blame]	623	struct flush_tlb_info info __aligned(SMP_CACHE_BYTES) = {
Andy Lutomirski	454bbad	2017-05-28 10:00:12 -0700	[diff] [blame]	624	.mm = mm,
				625	};
Andy Lutomirski	ce27374	2017-04-22 00:01:21 -0700	[diff] [blame]	626
Andy Lutomirski	454bbad	2017-05-28 10:00:12 -0700	[diff] [blame]	627	cpu = get_cpu();
Andy Lutomirski	ce27374	2017-04-22 00:01:21 -0700	[diff] [blame]	628
Andy Lutomirski	f39681e	2017-06-29 08:53:15 -0700	[diff] [blame]	629	/* This is also a barrier that synchronizes with switch_mm(). */
Andy Lutomirski	b0579ad	2017-06-29 08:53:16 -0700	[diff] [blame]	630	info.new_tlb_gen = inc_mm_tlb_gen(mm);
Andy Lutomirski	71b3c12	2016-01-06 12:21:01 -0800	[diff] [blame]	631
Andy Lutomirski	454bbad	2017-05-28 10:00:12 -0700	[diff] [blame]	632	/* Should we flush just the requested range? */
				633	if ((end != TLB_FLUSH_ALL) &&
				634	!(vmflag & VM_HUGETLB) &&
				635	((end - start) >> PAGE_SHIFT) <= tlb_single_page_flush_ceiling) {
Andy Lutomirski	a2055ab	2017-05-28 10:00:10 -0700	[diff] [blame]	636	info.start = start;
				637	info.end = end;
Andy Lutomirski	454bbad	2017-05-28 10:00:12 -0700	[diff] [blame]	638	} else {
				639	info.start = 0UL;
				640	info.end = TLB_FLUSH_ALL;
Dave Hansen	4995ab9	2014-07-31 08:40:54 -0700	[diff] [blame]	641	}
Andy Lutomirski	454bbad	2017-05-28 10:00:12 -0700	[diff] [blame]	642
Andy Lutomirski	bc0d5a8	2017-06-29 08:53:13 -0700	[diff] [blame]	643	if (mm == this_cpu_read(cpu_tlbstate.loaded_mm)) {
				644	VM_WARN_ON(irqs_disabled());
				645	local_irq_disable();
Andy Lutomirski	454bbad	2017-05-28 10:00:12 -0700	[diff] [blame]	646	flush_tlb_func_local(&info, TLB_LOCAL_MM_SHOOTDOWN);
Andy Lutomirski	bc0d5a8	2017-06-29 08:53:13 -0700	[diff] [blame]	647	local_irq_enable();
				648	}
				649
Andy Lutomirski	454bbad	2017-05-28 10:00:12 -0700	[diff] [blame]	650	if (cpumask_any_but(mm_cpumask(mm), cpu) < nr_cpu_ids)
Andy Lutomirski	a2055ab	2017-05-28 10:00:10 -0700	[diff] [blame]	651	flush_tlb_others(mm_cpumask(mm), &info);
Andy Lutomirski	94b1b03	2017-06-29 08:53:17 -0700	[diff] [blame]	652
Andy Lutomirski	454bbad	2017-05-28 10:00:12 -0700	[diff] [blame]	653	put_cpu();
Alex Shi	e7b52ff	2012-06-28 09:02:17 +0800	[diff] [blame]	654	}
				655
Andy Lutomirski	a2055ab	2017-05-28 10:00:10 -0700	[diff] [blame]	656
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	657	static void do_flush_tlb_all(void *info)
				658	{
Mel Gorman	ec65993	2014-01-21 14:33:16 -0800	[diff] [blame]	659	count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	660	__flush_tlb_all();
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	661	}
				662
				663	void flush_tlb_all(void)
				664	{
Mel Gorman	ec65993	2014-01-21 14:33:16 -0800	[diff] [blame]	665	count_vm_tlb_event(NR_TLB_REMOTE_FLUSH);
Jens Axboe	15c8b6c	2008-05-09 09:39:44 +0200	[diff] [blame]	666	on_each_cpu(do_flush_tlb_all, NULL, 1);
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	667	}
Alex Shi	3df3212	2012-06-28 09:02:20 +0800	[diff] [blame]	668
Alex Shi	effee4b	2012-06-28 09:02:24 +0800	[diff] [blame]	669	static void do_kernel_range_flush(void *info)
				670	{
				671	struct flush_tlb_info *f = info;
				672	unsigned long addr;
				673
				674	/* flush range by one by one 'invlpg' */
Andy Lutomirski	a2055ab	2017-05-28 10:00:10 -0700	[diff] [blame]	675	for (addr = f->start; addr < f->end; addr += PAGE_SIZE)
Andy Lutomirski	1299ef1	2018-01-31 08:03:10 -0800	[diff] [blame]	676	__flush_tlb_one_kernel(addr);
Alex Shi	effee4b	2012-06-28 09:02:24 +0800	[diff] [blame]	677	}
				678
				679	void flush_tlb_kernel_range(unsigned long start, unsigned long end)
				680	{
Alex Shi	effee4b	2012-06-28 09:02:24 +0800	[diff] [blame]	681
				682	/* Balance as user space task's flush, a bit conservative */
Dave Hansen	e9f4e0a	2014-07-31 08:40:55 -0700	[diff] [blame]	683	if (end == TLB_FLUSH_ALL \|\|
Andy Lutomirski	be4ffc0	2017-05-28 10:00:16 -0700	[diff] [blame]	684	(end - start) > tlb_single_page_flush_ceiling << PAGE_SHIFT) {
Alex Shi	effee4b	2012-06-28 09:02:24 +0800	[diff] [blame]	685	on_each_cpu(do_flush_tlb_all, NULL, 1);
Dave Hansen	e9f4e0a	2014-07-31 08:40:55 -0700	[diff] [blame]	686	} else {
				687	struct flush_tlb_info info;
Andy Lutomirski	a2055ab	2017-05-28 10:00:10 -0700	[diff] [blame]	688	info.start = start;
				689	info.end = end;
Alex Shi	effee4b	2012-06-28 09:02:24 +0800	[diff] [blame]	690	on_each_cpu(do_kernel_range_flush, &info, 1);
				691	}
				692	}
Dave Hansen	2d040a1	2014-07-31 08:41:01 -0700	[diff] [blame]	693
Andy Lutomirski	e73ad5f	2017-05-22 15:30:03 -0700	[diff] [blame]	694	void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
				695	{
Andy Lutomirski	a2055ab	2017-05-28 10:00:10 -0700	[diff] [blame]	696	struct flush_tlb_info info = {
				697	.mm = NULL,
				698	.start = 0UL,
				699	.end = TLB_FLUSH_ALL,
				700	};
				701
Andy Lutomirski	e73ad5f	2017-05-22 15:30:03 -0700	[diff] [blame]	702	int cpu = get_cpu();
				703
Andy Lutomirski	bc0d5a8	2017-06-29 08:53:13 -0700	[diff] [blame]	704	if (cpumask_test_cpu(cpu, &batch->cpumask)) {
				705	VM_WARN_ON(irqs_disabled());
				706	local_irq_disable();
Andy Lutomirski	3f79e4c	2017-05-28 10:00:13 -0700	[diff] [blame]	707	flush_tlb_func_local(&info, TLB_LOCAL_SHOOTDOWN);
Andy Lutomirski	bc0d5a8	2017-06-29 08:53:13 -0700	[diff] [blame]	708	local_irq_enable();
				709	}
				710
Andy Lutomirski	e73ad5f	2017-05-22 15:30:03 -0700	[diff] [blame]	711	if (cpumask_any_but(&batch->cpumask, cpu) < nr_cpu_ids)
Andy Lutomirski	a2055ab	2017-05-28 10:00:10 -0700	[diff] [blame]	712	flush_tlb_others(&batch->cpumask, &info);
Andy Lutomirski	94b1b03	2017-06-29 08:53:17 -0700	[diff] [blame]	713
Andy Lutomirski	e73ad5f	2017-05-22 15:30:03 -0700	[diff] [blame]	714	cpumask_clear(&batch->cpumask);
				715
				716	put_cpu();
				717	}
				718
Dave Hansen	2d040a1	2014-07-31 08:41:01 -0700	[diff] [blame]	719	static ssize_t tlbflush_read_file(struct file file, char __user user_buf,
				720	size_t count, loff_t *ppos)
				721	{
				722	char buf[32];
				723	unsigned int len;
				724
				725	len = sprintf(buf, "%ld\n", tlb_single_page_flush_ceiling);
				726	return simple_read_from_buffer(user_buf, count, ppos, buf, len);
				727	}
				728
				729	static ssize_t tlbflush_write_file(struct file *file,
				730	const char __user user_buf, size_t count, loff_t ppos)
				731	{
				732	char buf[32];
				733	ssize_t len;
				734	int ceiling;
				735
				736	len = min(count, sizeof(buf) - 1);
				737	if (copy_from_user(buf, user_buf, len))
				738	return -EFAULT;
				739
				740	buf[len] = '\0';
				741	if (kstrtoint(buf, 0, &ceiling))
				742	return -EINVAL;
				743
				744	if (ceiling < 0)
				745	return -EINVAL;
				746
				747	tlb_single_page_flush_ceiling = ceiling;
				748	return count;
				749	}
				750
				751	static const struct file_operations fops_tlbflush = {
				752	.read = tlbflush_read_file,
				753	.write = tlbflush_write_file,
				754	.llseek = default_llseek,
				755	};
				756
				757	static int __init create_tlb_single_page_flush_ceiling(void)
				758	{
				759	debugfs_create_file("tlb_single_page_flush_ceiling", S_IRUSR \| S_IWUSR,
				760	arch_debugfs_dir, NULL, &fops_tlbflush);
				761	return 0;
				762	}
				763	late_initcall(create_tlb_single_page_flush_ceiling);