Blame - arch/x86/mm/tlb.c - SHIFTPHONES/kernel/common

blob: 8dcc0607f80584748f92fe43aba9a32685fc6f9b [file] [log] [blame]

Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	1	#include <linux/init.h>
				2
				3	#include <linux/mm.h>
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	4	#include <linux/spinlock.h>
				5	#include <linux/smp.h>
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	6	#include <linux/interrupt.h>
Paul Gortmaker	4b599fe	2016-07-13 20:18:55 -0400	[diff] [blame]	7	#include <linux/export.h>
Shaohua Li	9329672	2010-10-20 11:07:03 +0800	[diff] [blame]	8	#include <linux/cpu.h>
Tim Chen	18bf3c3	2018-01-29 22:04:47 +0000	[diff] [blame]	9	#include <linux/debugfs.h>
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	10
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	11	#include <asm/tlbflush.h>
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	12	#include <asm/mmu_context.h>
Tim Chen	18bf3c3	2018-01-29 22:04:47 +0000	[diff] [blame]	13	#include <asm/nospec-branch.h>
Jan Beulich	350f8f5	2009-11-13 11:54:40 +0000	[diff] [blame]	14	#include <asm/cache.h>
Tejun Heo	6dd01be	2009-01-21 17:26:06 +0900	[diff] [blame]	15	#include <asm/apic.h>
Tejun Heo	bdbcdd4	2009-01-21 17:26:06 +0900	[diff] [blame]	16	#include <asm/uv/uv.h>
Glauber Costa	5af5573	2008-03-25 13:28:56 -0300	[diff] [blame]	17
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	18	/*
Andy Lutomirski	ce4a4e56	2017-05-28 10:00:14 -0700	[diff] [blame]	19	* TLB flushing, formerly SMP-only
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	20	* c/o Linus Torvalds.
				21	*
				22	* These mean you can really definitely utterly forget about
				23	* writing to user space from interrupts. (Its not allowed anyway).
				24	*
				25	* Optimizations Manfred Spraul <manfred@colorfullife.com>
				26	*
				27	* More scalable flush, from Andi Kleen
				28	*
Alex Shi	52aec33	2012-06-28 09:02:23 +0800	[diff] [blame]	29	* Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	30	*/
				31
Dave Hansen	2ea907c	2017-12-04 15:07:57 +0100	[diff] [blame]	32	/*
				33	* We get here when we do something requiring a TLB invalidation
				34	* but could not go invalidate all of the contexts. We do the
				35	* necessary invalidation by clearing out the 'ctx_id' which
				36	* forces a TLB flush when the context is loaded.
				37	*/
				38	void clear_asid_other(void)
				39	{
				40	u16 asid;
				41
				42	/*
				43	* This is only expected to be set if we have disabled
				44	* kernel _PAGE_GLOBAL pages.
				45	*/
				46	if (!static_cpu_has(X86_FEATURE_PTI)) {
				47	WARN_ON_ONCE(1);
				48	return;
				49	}
				50
				51	for (asid = 0; asid < TLB_NR_DYN_ASIDS; asid++) {
				52	/* Do not need to flush the current asid */
				53	if (asid == this_cpu_read(cpu_tlbstate.loaded_mm_asid))
				54	continue;
				55	/*
				56	* Make sure the next time we go to switch to
				57	* this asid, we do a flush:
				58	*/
				59	this_cpu_write(cpu_tlbstate.ctxs[asid].ctx_id, 0);
				60	}
				61	this_cpu_write(cpu_tlbstate.invalidate_other, false);
				62	}
				63
Andy Lutomirski	f39681e	2017-06-29 08:53:15 -0700	[diff] [blame]	64	atomic64_t last_mm_ctx_id = ATOMIC64_INIT(1);
				65
Andy Lutomirski	b956575	2017-10-09 09:50:49 -0700	[diff] [blame]	66
Andy Lutomirski	10af623	2017-07-24 21:41:38 -0700	[diff] [blame]	67	static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen,
				68	u16 new_asid, bool need_flush)
				69	{
				70	u16 asid;
				71
				72	if (!static_cpu_has(X86_FEATURE_PCID)) {
				73	*new_asid = 0;
				74	*need_flush = true;
				75	return;
				76	}
				77
Dave Hansen	2ea907c	2017-12-04 15:07:57 +0100	[diff] [blame]	78	if (this_cpu_read(cpu_tlbstate.invalidate_other))
				79	clear_asid_other();
				80
Andy Lutomirski	10af623	2017-07-24 21:41:38 -0700	[diff] [blame]	81	for (asid = 0; asid < TLB_NR_DYN_ASIDS; asid++) {
				82	if (this_cpu_read(cpu_tlbstate.ctxs[asid].ctx_id) !=
				83	next->context.ctx_id)
				84	continue;
				85
				86	*new_asid = asid;
				87	*need_flush = (this_cpu_read(cpu_tlbstate.ctxs[asid].tlb_gen) <
				88	next_tlb_gen);
				89	return;
				90	}
				91
				92	/*
				93	* We don't currently own an ASID slot on this CPU.
				94	* Allocate a slot.
				95	*/
				96	*new_asid = this_cpu_add_return(cpu_tlbstate.next_asid, 1) - 1;
				97	if (*new_asid >= TLB_NR_DYN_ASIDS) {
				98	*new_asid = 0;
				99	this_cpu_write(cpu_tlbstate.next_asid, 1);
				100	}
				101	*need_flush = true;
				102	}
				103
Dave Hansen	48e1119	2017-12-04 15:07:58 +0100	[diff] [blame]	104	static void load_new_mm_cr3(pgd_t *pgdir, u16 new_asid, bool need_flush)
				105	{
				106	unsigned long new_mm_cr3;
				107
				108	if (need_flush) {
Peter Zijlstra	6fd166a	2017-12-04 15:07:59 +0100	[diff] [blame]	109	invalidate_user_asid(new_asid);
Dave Hansen	48e1119	2017-12-04 15:07:58 +0100	[diff] [blame]	110	new_mm_cr3 = build_cr3(pgdir, new_asid);
				111	} else {
				112	new_mm_cr3 = build_cr3_noflush(pgdir, new_asid);
				113	}
				114
				115	/*
				116	* Caution: many callers of this function expect
				117	* that load_cr3() is serializing and orders TLB
				118	* fills with respect to the mm_cpumask writes.
				119	*/
				120	write_cr3(new_mm_cr3);
				121	}
				122
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	123	void leave_mm(int cpu)
				124	{
Andy Lutomirski	3d28ebc	2017-05-28 10:00:15 -0700	[diff] [blame]	125	struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
				126
				127	/*
				128	* It's plausible that we're in lazy TLB mode while our mm is init_mm.
				129	* If so, our callers still expect us to flush the TLB, but there
				130	* aren't any user TLB entries in init_mm to worry about.
				131	*
				132	* This needs to happen before any other sanity checks due to
				133	* intel_idle's shenanigans.
				134	*/
				135	if (loaded_mm == &init_mm)
				136	return;
				137
Andy Lutomirski	94b1b03	2017-06-29 08:53:17 -0700	[diff] [blame]	138	/* Warn if we're not lazy. */
Andy Lutomirski	b956575	2017-10-09 09:50:49 -0700	[diff] [blame]	139	WARN_ON(!this_cpu_read(cpu_tlbstate.is_lazy));
Andy Lutomirski	3d28ebc	2017-05-28 10:00:15 -0700	[diff] [blame]	140
				141	switch_mm(NULL, &init_mm, NULL);
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	142	}
Andy Lutomirski	6753573	2017-11-04 04:16:12 -0700	[diff] [blame]	143	EXPORT_SYMBOL_GPL(leave_mm);
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	144
Andy Lutomirski	69c0319	2016-04-26 09:39:08 -0700	[diff] [blame]	145	void switch_mm(struct mm_struct prev, struct mm_struct next,
				146	struct task_struct *tsk)
				147	{
Andy Lutomirski	078194f	2016-04-26 09:39:09 -0700	[diff] [blame]	148	unsigned long flags;
				149
				150	local_irq_save(flags);
				151	switch_mm_irqs_off(prev, next, tsk);
				152	local_irq_restore(flags);
				153	}
				154
Andy Lutomirski	5beda7d	2018-01-25 13:12:14 -0800	[diff] [blame]	155	static void sync_current_stack_to_mm(struct mm_struct *mm)
				156	{
				157	unsigned long sp = current_stack_pointer;
				158	pgd_t *pgd = pgd_offset(mm, sp);
				159
				160	if (CONFIG_PGTABLE_LEVELS > 4) {
				161	if (unlikely(pgd_none(*pgd))) {
				162	pgd_t *pgd_ref = pgd_offset_k(sp);
				163
				164	set_pgd(pgd, *pgd_ref);
				165	}
				166	} else {
				167	/*
				168	* "pgd" is faked. The top level entries are "p4d"s, so sync
				169	* the p4d. This compiles to approximately the same code as
				170	* the 5-level case.
				171	*/
				172	p4d_t *p4d = p4d_offset(pgd, sp);
				173
				174	if (unlikely(p4d_none(*p4d))) {
				175	pgd_t *pgd_ref = pgd_offset_k(sp);
				176	p4d_t *p4d_ref = p4d_offset(pgd_ref, sp);
				177
				178	set_p4d(p4d, *p4d_ref);
				179	}
				180	}
				181	}
				182
Andy Lutomirski	078194f	2016-04-26 09:39:09 -0700	[diff] [blame]	183	void switch_mm_irqs_off(struct mm_struct prev, struct mm_struct next,
				184	struct task_struct *tsk)
				185	{
Andy Lutomirski	3d28ebc	2017-05-28 10:00:15 -0700	[diff] [blame]	186	struct mm_struct *real_prev = this_cpu_read(cpu_tlbstate.loaded_mm);
Andy Lutomirski	10af623	2017-07-24 21:41:38 -0700	[diff] [blame]	187	u16 prev_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
Andy Lutomirski	94b1b03	2017-06-29 08:53:17 -0700	[diff] [blame]	188	unsigned cpu = smp_processor_id();
				189	u64 next_tlb_gen;
Andy Lutomirski	69c0319	2016-04-26 09:39:08 -0700	[diff] [blame]	190
Andy Lutomirski	3d28ebc	2017-05-28 10:00:15 -0700	[diff] [blame]	191	/*
Andy Lutomirski	94b1b03	2017-06-29 08:53:17 -0700	[diff] [blame]	192	* NB: The scheduler will call us with prev == next when switching
				193	* from lazy TLB mode to normal mode if active_mm isn't changing.
				194	* When this happens, we don't assume that CR3 (and hence
				195	* cpu_tlbstate.loaded_mm) matches next.
Andy Lutomirski	3d28ebc	2017-05-28 10:00:15 -0700	[diff] [blame]	196	*
				197	* NB: leave_mm() calls us with prev == NULL and tsk == NULL.
				198	*/
Andy Lutomirski	e37e43a	2016-08-11 02:35:23 -0700	[diff] [blame]	199
Andy Lutomirski	94b1b03	2017-06-29 08:53:17 -0700	[diff] [blame]	200	/* We don't want flush_tlb_func_* to run concurrently with us. */
				201	if (IS_ENABLED(CONFIG_PROVE_LOCKING))
				202	WARN_ON_ONCE(!irqs_disabled());
				203
				204	/*
				205	* Verify that CR3 is what we think it is. This will catch
				206	* hypothetical buggy code that directly switches to swapper_pg_dir
Andy Lutomirski	10af623	2017-07-24 21:41:38 -0700	[diff] [blame]	207	* without going through leave_mm() / switch_mm_irqs_off() or that
				208	* does something like write_cr3(read_cr3_pa()).
Andy Lutomirski	a376e7f	2017-09-07 22:06:57 -0700	[diff] [blame]	209	*
				210	* Only do this check if CONFIG_DEBUG_VM=y because __read_cr3()
				211	* isn't free.
Andy Lutomirski	94b1b03	2017-06-29 08:53:17 -0700	[diff] [blame]	212	*/
Andy Lutomirski	a376e7f	2017-09-07 22:06:57 -0700	[diff] [blame]	213	#ifdef CONFIG_DEBUG_VM
Dave Hansen	50fb83a6	2017-12-04 15:07:54 +0100	[diff] [blame]	214	if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev->pgd, prev_asid))) {
Andy Lutomirski	a376e7f	2017-09-07 22:06:57 -0700	[diff] [blame]	215	/*
				216	* If we were to BUG here, we'd be very likely to kill
				217	* the system so hard that we don't see the call trace.
				218	* Try to recover instead by ignoring the error and doing
				219	* a global flush to minimize the chance of corruption.
				220	*
				221	* (This is far from being a fully correct recovery.
				222	* Architecturally, the CPU could prefetch something
				223	* back into an incorrect ASID slot and leave it there
				224	* to cause trouble down the road. It's better than
				225	* nothing, though.)
				226	*/
				227	__flush_tlb_all();
				228	}
				229	#endif
Andy Lutomirski	b956575	2017-10-09 09:50:49 -0700	[diff] [blame]	230	this_cpu_write(cpu_tlbstate.is_lazy, false);
Andy Lutomirski	e37e43a	2016-08-11 02:35:23 -0700	[diff] [blame]	231
Mathieu Desnoyers	306e060	2018-01-29 15:20:12 -0500	[diff] [blame]	232	/*
Mathieu Desnoyers	10bcc80	2018-01-29 15:20:18 -0500	[diff] [blame]	233	* The membarrier system call requires a full memory barrier and
				234	* core serialization before returning to user-space, after
				235	* storing to rq->curr. Writing to CR3 provides that full
				236	* memory barrier and core serializing instruction.
Mathieu Desnoyers	306e060	2018-01-29 15:20:12 -0500	[diff] [blame]	237	*/
Andy Lutomirski	3d28ebc	2017-05-28 10:00:15 -0700	[diff] [blame]	238	if (real_prev == next) {
Andy Lutomirski	e8b9b0c	2017-10-14 09:59:49 -0700	[diff] [blame]	239	VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) !=
				240	next->context.ctx_id);
Andy Lutomirski	94b1b03	2017-06-29 08:53:17 -0700	[diff] [blame]	241
Andy Lutomirski	69c0319	2016-04-26 09:39:08 -0700	[diff] [blame]	242	/*
Andy Lutomirski	b956575	2017-10-09 09:50:49 -0700	[diff] [blame]	243	* We don't currently support having a real mm loaded without
				244	* our cpu set in mm_cpumask(). We have all the bookkeeping
				245	* in place to figure out whether we would need to flush
				246	* if our cpu were cleared in mm_cpumask(), but we don't
				247	* currently use it.
Andy Lutomirski	69c0319	2016-04-26 09:39:08 -0700	[diff] [blame]	248	*/
Andy Lutomirski	b956575	2017-10-09 09:50:49 -0700	[diff] [blame]	249	if (WARN_ON_ONCE(real_prev != &init_mm &&
				250	!cpumask_test_cpu(cpu, mm_cpumask(next))))
				251	cpumask_set_cpu(cpu, mm_cpumask(next));
				252
				253	return;
Andy Lutomirski	94b1b03	2017-06-29 08:53:17 -0700	[diff] [blame]	254	} else {
Andy Lutomirski	10af623	2017-07-24 21:41:38 -0700	[diff] [blame]	255	u16 new_asid;
				256	bool need_flush;
Tim Chen	18bf3c3	2018-01-29 22:04:47 +0000	[diff] [blame]	257	u64 last_ctx_id = this_cpu_read(cpu_tlbstate.last_ctx_id);
				258
				259	/*
				260	* Avoid user/user BTB poisoning by flushing the branch
				261	* predictor when switching between processes. This stops
				262	* one process from doing Spectre-v2 attacks on another.
				263	*
				264	* As an optimization, flush indirect branches only when
				265	* switching into processes that disable dumping. This
				266	* protects high value processes like gpg, without having
				267	* too high performance overhead. IBPB is expensive!
				268	*
				269	* This will not flush branches when switching into kernel
				270	* threads. It will also not flush if we switch to idle
				271	* thread and back to the same process. It will flush if we
				272	* switch to a different non-dumpable process.
				273	*/
				274	if (tsk && tsk->mm &&
				275	tsk->mm->context.ctx_id != last_ctx_id &&
				276	get_dumpable(tsk->mm) != SUID_DUMP_USER)
				277	indirect_branch_prediction_barrier();
Andy Lutomirski	94b1b03	2017-06-29 08:53:17 -0700	[diff] [blame]	278
				279	if (IS_ENABLED(CONFIG_VMAP_STACK)) {
				280	/*
				281	* If our current stack is in vmalloc space and isn't
				282	* mapped in the new pgd, we'll double-fault. Forcibly
				283	* map it.
				284	*/
Andy Lutomirski	5beda7d	2018-01-25 13:12:14 -0800	[diff] [blame]	285	sync_current_stack_to_mm(next);
Andy Lutomirski	94b1b03	2017-06-29 08:53:17 -0700	[diff] [blame]	286	}
				287
				288	/* Stop remote flushes for the previous mm */
Andy Lutomirski	b956575	2017-10-09 09:50:49 -0700	[diff] [blame]	289	VM_WARN_ON_ONCE(!cpumask_test_cpu(cpu, mm_cpumask(real_prev)) &&
				290	real_prev != &init_mm);
				291	cpumask_clear_cpu(cpu, mm_cpumask(real_prev));
Andy Lutomirski	94b1b03	2017-06-29 08:53:17 -0700	[diff] [blame]	292
				293	/*
				294	* Start remote flushes and then read tlb_gen.
				295	*/
				296	cpumask_set_cpu(cpu, mm_cpumask(next));
				297	next_tlb_gen = atomic64_read(&next->context.tlb_gen);
				298
Andy Lutomirski	10af623	2017-07-24 21:41:38 -0700	[diff] [blame]	299	choose_new_asid(next, next_tlb_gen, &new_asid, &need_flush);
Andy Lutomirski	94b1b03	2017-06-29 08:53:17 -0700	[diff] [blame]	300
Andy Lutomirski	10af623	2017-07-24 21:41:38 -0700	[diff] [blame]	301	if (need_flush) {
				302	this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
				303	this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
Dave Hansen	48e1119	2017-12-04 15:07:58 +0100	[diff] [blame]	304	load_new_mm_cr3(next->pgd, new_asid, true);
Andy Lutomirski	6753573	2017-11-04 04:16:12 -0700	[diff] [blame]	305
				306	/*
				307	* NB: This gets called via leave_mm() in the idle path
				308	* where RCU functions differently. Tracing normally
				309	* uses RCU, so we need to use the _rcuidle variant.
				310	*
				311	* (There is no good reason for this. The idle code should
				312	* be rearranged to call this before rcu_idle_enter().)
				313	*/
				314	trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
Andy Lutomirski	10af623	2017-07-24 21:41:38 -0700	[diff] [blame]	315	} else {
				316	/* The new ASID is already up to date. */
Dave Hansen	48e1119	2017-12-04 15:07:58 +0100	[diff] [blame]	317	load_new_mm_cr3(next->pgd, new_asid, false);
Andy Lutomirski	6753573	2017-11-04 04:16:12 -0700	[diff] [blame]	318
				319	/* See above wrt _rcuidle. */
				320	trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0);
Andy Lutomirski	10af623	2017-07-24 21:41:38 -0700	[diff] [blame]	321	}
				322
Tim Chen	18bf3c3	2018-01-29 22:04:47 +0000	[diff] [blame]	323	/*
				324	* Record last user mm's context id, so we can avoid
				325	* flushing branch buffer with IBPB if we switch back
				326	* to the same user.
				327	*/
				328	if (next != &init_mm)
				329	this_cpu_write(cpu_tlbstate.last_ctx_id, next->context.ctx_id);
				330
Andy Lutomirski	10af623	2017-07-24 21:41:38 -0700	[diff] [blame]	331	this_cpu_write(cpu_tlbstate.loaded_mm, next);
				332	this_cpu_write(cpu_tlbstate.loaded_mm_asid, new_asid);
Andy Lutomirski	3d28ebc	2017-05-28 10:00:15 -0700	[diff] [blame]	333	}
Andy Lutomirski	69c0319	2016-04-26 09:39:08 -0700	[diff] [blame]	334
Andy Lutomirski	3d28ebc	2017-05-28 10:00:15 -0700	[diff] [blame]	335	load_mm_cr4(next);
Andy Lutomirski	7353425	2017-06-20 22:22:08 -0700	[diff] [blame]	336	switch_ldt(real_prev, next);
Andy Lutomirski	69c0319	2016-04-26 09:39:08 -0700	[diff] [blame]	337	}
				338
Andy Lutomirski	b0579ad	2017-06-29 08:53:16 -0700	[diff] [blame]	339	/*
Andy Lutomirski	4e57b94	2017-10-14 09:59:50 -0700	[diff] [blame]	340	* Please ignore the name of this function. It should be called
				341	* switch_to_kernel_thread().
				342	*
Andy Lutomirski	b956575	2017-10-09 09:50:49 -0700	[diff] [blame]	343	* enter_lazy_tlb() is a hint from the scheduler that we are entering a
				344	* kernel thread or other context without an mm. Acceptable implementations
				345	* include doing nothing whatsoever, switching to init_mm, or various clever
				346	* lazy tricks to try to minimize TLB flushes.
				347	*
				348	* The scheduler reserves the right to call enter_lazy_tlb() several times
				349	* in a row. It will notify us that we're going back to a real mm by
				350	* calling switch_mm_irqs_off().
				351	*/
				352	void enter_lazy_tlb(struct mm_struct mm, struct task_struct tsk)
				353	{
				354	if (this_cpu_read(cpu_tlbstate.loaded_mm) == &init_mm)
				355	return;
				356
Andy Lutomirski	4e57b94	2017-10-14 09:59:50 -0700	[diff] [blame]	357	if (tlb_defer_switch_to_init_mm()) {
Andy Lutomirski	b956575	2017-10-09 09:50:49 -0700	[diff] [blame]	358	/*
				359	* There's a significant optimization that may be possible
				360	* here. We have accurate enough TLB flush tracking that we
				361	* don't need to maintain coherence of TLB per se when we're
				362	* lazy. We do, however, need to maintain coherence of
				363	* paging-structure caches. We could, in principle, leave our
				364	* old mm loaded and only switch to init_mm when
				365	* tlb_remove_page() happens.
				366	*/
				367	this_cpu_write(cpu_tlbstate.is_lazy, true);
				368	} else {
				369	switch_mm(NULL, &init_mm, NULL);
				370	}
				371	}
				372
				373	/*
Andy Lutomirski	72c0098	2017-09-06 19:54:53 -0700	[diff] [blame]	374	* Call this when reinitializing a CPU. It fixes the following potential
				375	* problems:
				376	*
				377	* - The ASID changed from what cpu_tlbstate thinks it is (most likely
				378	* because the CPU was taken down and came back up with CR3's PCID
				379	* bits clear. CPU hotplug can do this.
				380	*
				381	* - The TLB contains junk in slots corresponding to inactive ASIDs.
				382	*
				383	* - The CPU went so far out to lunch that it may have missed a TLB
				384	* flush.
				385	*/
				386	void initialize_tlbstate_and_flush(void)
				387	{
				388	int i;
				389	struct mm_struct *mm = this_cpu_read(cpu_tlbstate.loaded_mm);
				390	u64 tlb_gen = atomic64_read(&init_mm.context.tlb_gen);
				391	unsigned long cr3 = __read_cr3();
				392
				393	/* Assert that CR3 already references the right mm. */
				394	WARN_ON((cr3 & CR3_ADDR_MASK) != __pa(mm->pgd));
				395
				396	/*
				397	* Assert that CR4.PCIDE is set if needed. (CR4.PCIDE initialization
				398	* doesn't work like other CR4 bits because it can only be set from
				399	* long mode.)
				400	*/
Andy Lutomirski	7898f79	2017-09-10 08:52:58 -0700	[diff] [blame]	401	WARN_ON(boot_cpu_has(X86_FEATURE_PCID) &&
Andy Lutomirski	72c0098	2017-09-06 19:54:53 -0700	[diff] [blame]	402	!(cr4_read_shadow() & X86_CR4_PCIDE));
				403
				404	/* Force ASID 0 and force a TLB flush. */
Dave Hansen	50fb83a6	2017-12-04 15:07:54 +0100	[diff] [blame]	405	write_cr3(build_cr3(mm->pgd, 0));
Andy Lutomirski	72c0098	2017-09-06 19:54:53 -0700	[diff] [blame]	406
				407	/* Reinitialize tlbstate. */
Tim Chen	18bf3c3	2018-01-29 22:04:47 +0000	[diff] [blame]	408	this_cpu_write(cpu_tlbstate.last_ctx_id, mm->context.ctx_id);
Andy Lutomirski	72c0098	2017-09-06 19:54:53 -0700	[diff] [blame]	409	this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0);
				410	this_cpu_write(cpu_tlbstate.next_asid, 1);
				411	this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, mm->context.ctx_id);
				412	this_cpu_write(cpu_tlbstate.ctxs[0].tlb_gen, tlb_gen);
				413
				414	for (i = 1; i < TLB_NR_DYN_ASIDS; i++)
				415	this_cpu_write(cpu_tlbstate.ctxs[i].ctx_id, 0);
				416	}
				417
				418	/*
Andy Lutomirski	b0579ad	2017-06-29 08:53:16 -0700	[diff] [blame]	419	* flush_tlb_func_common()'s memory ordering requirement is that any
				420	* TLB fills that happen after we flush the TLB are ordered after we
				421	* read active_mm's tlb_gen. We don't need any explicit barriers
				422	* because all x86 flush operations are serializing and the
				423	* atomic64_read operation won't be reordered by the compiler.
				424	*/
Andy Lutomirski	454bbad	2017-05-28 10:00:12 -0700	[diff] [blame]	425	static void flush_tlb_func_common(const struct flush_tlb_info *f,
				426	bool local, enum tlb_flush_reason reason)
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	427	{
Andy Lutomirski	b0579ad	2017-06-29 08:53:16 -0700	[diff] [blame]	428	/*
				429	* We have three different tlb_gen values in here. They are:
				430	*
				431	* - mm_tlb_gen: the latest generation.
				432	* - local_tlb_gen: the generation that this CPU has already caught
				433	* up to.
				434	* - f->new_tlb_gen: the generation that the requester of the flush
				435	* wants us to catch up to.
				436	*/
				437	struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
Andy Lutomirski	10af623	2017-07-24 21:41:38 -0700	[diff] [blame]	438	u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
Andy Lutomirski	b0579ad	2017-06-29 08:53:16 -0700	[diff] [blame]	439	u64 mm_tlb_gen = atomic64_read(&loaded_mm->context.tlb_gen);
Andy Lutomirski	10af623	2017-07-24 21:41:38 -0700	[diff] [blame]	440	u64 local_tlb_gen = this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].tlb_gen);
Andy Lutomirski	b0579ad	2017-06-29 08:53:16 -0700	[diff] [blame]	441
Andy Lutomirski	bc0d5a8	2017-06-29 08:53:13 -0700	[diff] [blame]	442	/* This code cannot presently handle being reentered. */
				443	VM_WARN_ON(!irqs_disabled());
				444
Andy Lutomirski	b956575	2017-10-09 09:50:49 -0700	[diff] [blame]	445	if (unlikely(loaded_mm == &init_mm))
				446	return;
				447
Andy Lutomirski	10af623	2017-07-24 21:41:38 -0700	[diff] [blame]	448	VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].ctx_id) !=
Andy Lutomirski	b0579ad	2017-06-29 08:53:16 -0700	[diff] [blame]	449	loaded_mm->context.ctx_id);
				450
Andy Lutomirski	b956575	2017-10-09 09:50:49 -0700	[diff] [blame]	451	if (this_cpu_read(cpu_tlbstate.is_lazy)) {
Andy Lutomirski	b0579ad	2017-06-29 08:53:16 -0700	[diff] [blame]	452	/*
Andy Lutomirski	b956575	2017-10-09 09:50:49 -0700	[diff] [blame]	453	* We're in lazy mode. We need to at least flush our
				454	* paging-structure cache to avoid speculatively reading
				455	* garbage into our TLB. Since switching to init_mm is barely
				456	* slower than a minimal flush, just switch to init_mm.
Andy Lutomirski	b0579ad	2017-06-29 08:53:16 -0700	[diff] [blame]	457	*/
Andy Lutomirski	b956575	2017-10-09 09:50:49 -0700	[diff] [blame]	458	switch_mm_irqs_off(NULL, &init_mm, NULL);
Andy Lutomirski	b3b90e5	2017-05-22 15:30:02 -0700	[diff] [blame]	459	return;
				460	}
				461
Andy Lutomirski	b0579ad	2017-06-29 08:53:16 -0700	[diff] [blame]	462	if (unlikely(local_tlb_gen == mm_tlb_gen)) {
				463	/*
				464	* There's nothing to do: we're already up to date. This can
				465	* happen if two concurrent flushes happen -- the first flush to
				466	* be handled can catch us all the way up, leaving no work for
				467	* the second flush.
				468	*/
Andy Lutomirski	94b1b03	2017-06-29 08:53:17 -0700	[diff] [blame]	469	trace_tlb_flush(reason, 0);
Andy Lutomirski	b0579ad	2017-06-29 08:53:16 -0700	[diff] [blame]	470	return;
				471	}
				472
				473	WARN_ON_ONCE(local_tlb_gen > mm_tlb_gen);
				474	WARN_ON_ONCE(f->new_tlb_gen > mm_tlb_gen);
				475
				476	/*
				477	* If we get to this point, we know that our TLB is out of date.
				478	* This does not strictly imply that we need to flush (it's
				479	* possible that f->new_tlb_gen <= local_tlb_gen), but we're
				480	* going to need to flush in the very near future, so we might
				481	* as well get it over with.
				482	*
				483	* The only question is whether to do a full or partial flush.
				484	*
				485	* We do a partial flush if requested and two extra conditions
				486	* are met:
				487	*
				488	* 1. f->new_tlb_gen == local_tlb_gen + 1. We have an invariant that
				489	* we've always done all needed flushes to catch up to
				490	* local_tlb_gen. If, for example, local_tlb_gen == 2 and
				491	* f->new_tlb_gen == 3, then we know that the flush needed to bring
				492	* us up to date for tlb_gen 3 is the partial flush we're
				493	* processing.
				494	*
				495	* As an example of why this check is needed, suppose that there
				496	* are two concurrent flushes. The first is a full flush that
				497	* changes context.tlb_gen from 1 to 2. The second is a partial
				498	* flush that changes context.tlb_gen from 2 to 3. If they get
				499	* processed on this CPU in reverse order, we'll see
				500	* local_tlb_gen == 1, mm_tlb_gen == 3, and end != TLB_FLUSH_ALL.
				501	* If we were to use __flush_tlb_single() and set local_tlb_gen to
				502	* 3, we'd be break the invariant: we'd update local_tlb_gen above
				503	* 1 without the full flush that's needed for tlb_gen 2.
				504	*
				505	* 2. f->new_tlb_gen == mm_tlb_gen. This is purely an optimiation.
				506	* Partial TLB flushes are not all that much cheaper than full TLB
				507	* flushes, so it seems unlikely that it would be a performance win
				508	* to do a partial flush if that won't bring our TLB fully up to
				509	* date. By doing a full flush instead, we can increase
				510	* local_tlb_gen all the way to mm_tlb_gen and we can probably
				511	* avoid another flush in the very near future.
				512	*/
				513	if (f->end != TLB_FLUSH_ALL &&
				514	f->new_tlb_gen == local_tlb_gen + 1 &&
				515	f->new_tlb_gen == mm_tlb_gen) {
				516	/* Partial flush */
Andy Lutomirski	b3b90e5	2017-05-22 15:30:02 -0700	[diff] [blame]	517	unsigned long addr;
Andy Lutomirski	be4ffc0	2017-05-28 10:00:16 -0700	[diff] [blame]	518	unsigned long nr_pages = (f->end - f->start) >> PAGE_SHIFT;
Andy Lutomirski	b0579ad	2017-06-29 08:53:16 -0700	[diff] [blame]	519
Andy Lutomirski	a2055ab	2017-05-28 10:00:10 -0700	[diff] [blame]	520	addr = f->start;
				521	while (addr < f->end) {
Andy Lutomirski	b3b90e5	2017-05-22 15:30:02 -0700	[diff] [blame]	522	__flush_tlb_single(addr);
				523	addr += PAGE_SIZE;
				524	}
Andy Lutomirski	454bbad	2017-05-28 10:00:12 -0700	[diff] [blame]	525	if (local)
				526	count_vm_tlb_events(NR_TLB_LOCAL_FLUSH_ONE, nr_pages);
				527	trace_tlb_flush(reason, nr_pages);
Andy Lutomirski	b0579ad	2017-06-29 08:53:16 -0700	[diff] [blame]	528	} else {
				529	/* Full flush. */
				530	local_flush_tlb();
				531	if (local)
				532	count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
				533	trace_tlb_flush(reason, TLB_FLUSH_ALL);
Andy Lutomirski	b3b90e5	2017-05-22 15:30:02 -0700	[diff] [blame]	534	}
Andy Lutomirski	b0579ad	2017-06-29 08:53:16 -0700	[diff] [blame]	535
				536	/* Both paths above update our state to mm_tlb_gen. */
Andy Lutomirski	10af623	2017-07-24 21:41:38 -0700	[diff] [blame]	537	this_cpu_write(cpu_tlbstate.ctxs[loaded_mm_asid].tlb_gen, mm_tlb_gen);
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	538	}
				539
Andy Lutomirski	454bbad	2017-05-28 10:00:12 -0700	[diff] [blame]	540	static void flush_tlb_func_local(void *info, enum tlb_flush_reason reason)
				541	{
				542	const struct flush_tlb_info *f = info;
				543
				544	flush_tlb_func_common(f, true, reason);
				545	}
				546
				547	static void flush_tlb_func_remote(void *info)
				548	{
				549	const struct flush_tlb_info *f = info;
				550
				551	inc_irq_stat(irq_tlb_count);
				552
Andy Lutomirski	3d28ebc	2017-05-28 10:00:15 -0700	[diff] [blame]	553	if (f->mm && f->mm != this_cpu_read(cpu_tlbstate.loaded_mm))
Andy Lutomirski	454bbad	2017-05-28 10:00:12 -0700	[diff] [blame]	554	return;
				555
				556	count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
				557	flush_tlb_func_common(f, false, TLB_REMOTE_SHOOTDOWN);
				558	}
				559
Rusty Russell	4595f96	2009-01-10 21:58:09 -0800	[diff] [blame]	560	void native_flush_tlb_others(const struct cpumask *cpumask,
Andy Lutomirski	a2055ab	2017-05-28 10:00:10 -0700	[diff] [blame]	561	const struct flush_tlb_info *info)
Rusty Russell	4595f96	2009-01-10 21:58:09 -0800	[diff] [blame]	562	{
Mel Gorman	ec65993	2014-01-21 14:33:16 -0800	[diff] [blame]	563	count_vm_tlb_event(NR_TLB_REMOTE_FLUSH);
Andy Lutomirski	a2055ab	2017-05-28 10:00:10 -0700	[diff] [blame]	564	if (info->end == TLB_FLUSH_ALL)
Nadav Amit	18c9824	2016-04-01 14:31:23 -0700	[diff] [blame]	565	trace_tlb_flush(TLB_REMOTE_SEND_IPI, TLB_FLUSH_ALL);
				566	else
				567	trace_tlb_flush(TLB_REMOTE_SEND_IPI,
Andy Lutomirski	a2055ab	2017-05-28 10:00:10 -0700	[diff] [blame]	568	(info->end - info->start) >> PAGE_SHIFT);
Nadav Amit	18c9824	2016-04-01 14:31:23 -0700	[diff] [blame]	569
Rusty Russell	4595f96	2009-01-10 21:58:09 -0800	[diff] [blame]	570	if (is_uv_system()) {
Andy Lutomirski	94b1b03	2017-06-29 08:53:17 -0700	[diff] [blame]	571	/*
				572	* This whole special case is confused. UV has a "Broadcast
				573	* Assist Unit", which seems to be a fancy way to send IPIs.
				574	* Back when x86 used an explicit TLB flush IPI, UV was
				575	* optimized to use its own mechanism. These days, x86 uses
				576	* smp_call_function_many(), but UV still uses a manual IPI,
				577	* and that IPI's action is out of date -- it does a manual
				578	* flush instead of calling flush_tlb_func_remote(). This
				579	* means that the percpu tlb_gen variables won't be updated
				580	* and we'll do pointless flushes on future context switches.
				581	*
				582	* Rather than hooking native_flush_tlb_others() here, I think
				583	* that UV should be updated so that smp_call_function_many(),
				584	* etc, are optimal on UV.
				585	*/
Tejun Heo	bdbcdd4	2009-01-21 17:26:06 +0900	[diff] [blame]	586	unsigned int cpu;
Rusty Russell	4595f96	2009-01-10 21:58:09 -0800	[diff] [blame]	587
Xiao Guangrong	25542c6	2011-03-15 09:57:37 +0800	[diff] [blame]	588	cpu = smp_processor_id();
Andy Lutomirski	a2055ab	2017-05-28 10:00:10 -0700	[diff] [blame]	589	cpumask = uv_flush_tlb_others(cpumask, info);
Tejun Heo	bdbcdd4	2009-01-21 17:26:06 +0900	[diff] [blame]	590	if (cpumask)
Andy Lutomirski	454bbad	2017-05-28 10:00:12 -0700	[diff] [blame]	591	smp_call_function_many(cpumask, flush_tlb_func_remote,
Andy Lutomirski	a2055ab	2017-05-28 10:00:10 -0700	[diff] [blame]	592	(void *)info, 1);
Mike Travis	0e21990	2009-01-10 21:58:10 -0800	[diff] [blame]	593	return;
Rusty Russell	4595f96	2009-01-10 21:58:09 -0800	[diff] [blame]	594	}
Andy Lutomirski	454bbad	2017-05-28 10:00:12 -0700	[diff] [blame]	595	smp_call_function_many(cpumask, flush_tlb_func_remote,
Andy Lutomirski	a2055ab	2017-05-28 10:00:10 -0700	[diff] [blame]	596	(void *)info, 1);
Rusty Russell	4595f96	2009-01-10 21:58:09 -0800	[diff] [blame]	597	}
				598
Dave Hansen	a510247	2014-07-31 08:41:03 -0700	[diff] [blame]	599	/*
				600	* See Documentation/x86/tlb.txt for details. We choose 33
				601	* because it is large enough to cover the vast majority (at
				602	* least 95%) of allocations, and is small enough that we are
				603	* confident it will not cause too much overhead. Each single
				604	* flush is about 100 ns, so this caps the maximum overhead at
				605	* _about_ 3,000 ns.
				606	*
				607	* This is in units of pages.
				608	*/
Jeremiah Mahler	8642685	2014-08-09 00:38:33 -0700	[diff] [blame]	609	static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33;
Dave Hansen	e9f4e0a	2014-07-31 08:40:55 -0700	[diff] [blame]	610
Alex Shi	611ae8e	2012-06-28 09:02:22 +0800	[diff] [blame]	611	void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
				612	unsigned long end, unsigned long vmflag)
				613	{
Andy Lutomirski	454bbad	2017-05-28 10:00:12 -0700	[diff] [blame]	614	int cpu;
Alex Shi	611ae8e	2012-06-28 09:02:22 +0800	[diff] [blame]	615
Andy Lutomirski	454bbad	2017-05-28 10:00:12 -0700	[diff] [blame]	616	struct flush_tlb_info info = {
				617	.mm = mm,
				618	};
Andy Lutomirski	ce27374	2017-04-22 00:01:21 -0700	[diff] [blame]	619
Andy Lutomirski	454bbad	2017-05-28 10:00:12 -0700	[diff] [blame]	620	cpu = get_cpu();
Andy Lutomirski	ce27374	2017-04-22 00:01:21 -0700	[diff] [blame]	621
Andy Lutomirski	f39681e	2017-06-29 08:53:15 -0700	[diff] [blame]	622	/* This is also a barrier that synchronizes with switch_mm(). */
Andy Lutomirski	b0579ad	2017-06-29 08:53:16 -0700	[diff] [blame]	623	info.new_tlb_gen = inc_mm_tlb_gen(mm);
Andy Lutomirski	71b3c12	2016-01-06 12:21:01 -0800	[diff] [blame]	624
Andy Lutomirski	454bbad	2017-05-28 10:00:12 -0700	[diff] [blame]	625	/* Should we flush just the requested range? */
				626	if ((end != TLB_FLUSH_ALL) &&
				627	!(vmflag & VM_HUGETLB) &&
				628	((end - start) >> PAGE_SHIFT) <= tlb_single_page_flush_ceiling) {
Andy Lutomirski	a2055ab	2017-05-28 10:00:10 -0700	[diff] [blame]	629	info.start = start;
				630	info.end = end;
Andy Lutomirski	454bbad	2017-05-28 10:00:12 -0700	[diff] [blame]	631	} else {
				632	info.start = 0UL;
				633	info.end = TLB_FLUSH_ALL;
Dave Hansen	4995ab9	2014-07-31 08:40:54 -0700	[diff] [blame]	634	}
Andy Lutomirski	454bbad	2017-05-28 10:00:12 -0700	[diff] [blame]	635
Andy Lutomirski	bc0d5a8	2017-06-29 08:53:13 -0700	[diff] [blame]	636	if (mm == this_cpu_read(cpu_tlbstate.loaded_mm)) {
				637	VM_WARN_ON(irqs_disabled());
				638	local_irq_disable();
Andy Lutomirski	454bbad	2017-05-28 10:00:12 -0700	[diff] [blame]	639	flush_tlb_func_local(&info, TLB_LOCAL_MM_SHOOTDOWN);
Andy Lutomirski	bc0d5a8	2017-06-29 08:53:13 -0700	[diff] [blame]	640	local_irq_enable();
				641	}
				642
Andy Lutomirski	454bbad	2017-05-28 10:00:12 -0700	[diff] [blame]	643	if (cpumask_any_but(mm_cpumask(mm), cpu) < nr_cpu_ids)
Andy Lutomirski	a2055ab	2017-05-28 10:00:10 -0700	[diff] [blame]	644	flush_tlb_others(mm_cpumask(mm), &info);
Andy Lutomirski	94b1b03	2017-06-29 08:53:17 -0700	[diff] [blame]	645
Andy Lutomirski	454bbad	2017-05-28 10:00:12 -0700	[diff] [blame]	646	put_cpu();
Alex Shi	e7b52ff	2012-06-28 09:02:17 +0800	[diff] [blame]	647	}
				648
Andy Lutomirski	a2055ab	2017-05-28 10:00:10 -0700	[diff] [blame]	649
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	650	static void do_flush_tlb_all(void *info)
				651	{
Mel Gorman	ec65993	2014-01-21 14:33:16 -0800	[diff] [blame]	652	count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	653	__flush_tlb_all();
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	654	}
				655
				656	void flush_tlb_all(void)
				657	{
Mel Gorman	ec65993	2014-01-21 14:33:16 -0800	[diff] [blame]	658	count_vm_tlb_event(NR_TLB_REMOTE_FLUSH);
Jens Axboe	15c8b6c	2008-05-09 09:39:44 +0200	[diff] [blame]	659	on_each_cpu(do_flush_tlb_all, NULL, 1);
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	660	}
Alex Shi	3df3212	2012-06-28 09:02:20 +0800	[diff] [blame]	661
Alex Shi	effee4b	2012-06-28 09:02:24 +0800	[diff] [blame]	662	static void do_kernel_range_flush(void *info)
				663	{
				664	struct flush_tlb_info *f = info;
				665	unsigned long addr;
				666
				667	/* flush range by one by one 'invlpg' */
Andy Lutomirski	a2055ab	2017-05-28 10:00:10 -0700	[diff] [blame]	668	for (addr = f->start; addr < f->end; addr += PAGE_SIZE)
Peter Zijlstra	a501686	2017-12-05 13:34:49 +0100	[diff] [blame]	669	__flush_tlb_one(addr);
Alex Shi	effee4b	2012-06-28 09:02:24 +0800	[diff] [blame]	670	}
				671
				672	void flush_tlb_kernel_range(unsigned long start, unsigned long end)
				673	{
Alex Shi	effee4b	2012-06-28 09:02:24 +0800	[diff] [blame]	674
				675	/* Balance as user space task's flush, a bit conservative */
Dave Hansen	e9f4e0a	2014-07-31 08:40:55 -0700	[diff] [blame]	676	if (end == TLB_FLUSH_ALL \|\|
Andy Lutomirski	be4ffc0	2017-05-28 10:00:16 -0700	[diff] [blame]	677	(end - start) > tlb_single_page_flush_ceiling << PAGE_SHIFT) {
Alex Shi	effee4b	2012-06-28 09:02:24 +0800	[diff] [blame]	678	on_each_cpu(do_flush_tlb_all, NULL, 1);
Dave Hansen	e9f4e0a	2014-07-31 08:40:55 -0700	[diff] [blame]	679	} else {
				680	struct flush_tlb_info info;
Andy Lutomirski	a2055ab	2017-05-28 10:00:10 -0700	[diff] [blame]	681	info.start = start;
				682	info.end = end;
Alex Shi	effee4b	2012-06-28 09:02:24 +0800	[diff] [blame]	683	on_each_cpu(do_kernel_range_flush, &info, 1);
				684	}
				685	}
Dave Hansen	2d040a1	2014-07-31 08:41:01 -0700	[diff] [blame]	686
Andy Lutomirski	e73ad5f	2017-05-22 15:30:03 -0700	[diff] [blame]	687	void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
				688	{
Andy Lutomirski	a2055ab	2017-05-28 10:00:10 -0700	[diff] [blame]	689	struct flush_tlb_info info = {
				690	.mm = NULL,
				691	.start = 0UL,
				692	.end = TLB_FLUSH_ALL,
				693	};
				694
Andy Lutomirski	e73ad5f	2017-05-22 15:30:03 -0700	[diff] [blame]	695	int cpu = get_cpu();
				696
Andy Lutomirski	bc0d5a8	2017-06-29 08:53:13 -0700	[diff] [blame]	697	if (cpumask_test_cpu(cpu, &batch->cpumask)) {
				698	VM_WARN_ON(irqs_disabled());
				699	local_irq_disable();
Andy Lutomirski	3f79e4c	2017-05-28 10:00:13 -0700	[diff] [blame]	700	flush_tlb_func_local(&info, TLB_LOCAL_SHOOTDOWN);
Andy Lutomirski	bc0d5a8	2017-06-29 08:53:13 -0700	[diff] [blame]	701	local_irq_enable();
				702	}
				703
Andy Lutomirski	e73ad5f	2017-05-22 15:30:03 -0700	[diff] [blame]	704	if (cpumask_any_but(&batch->cpumask, cpu) < nr_cpu_ids)
Andy Lutomirski	a2055ab	2017-05-28 10:00:10 -0700	[diff] [blame]	705	flush_tlb_others(&batch->cpumask, &info);
Andy Lutomirski	94b1b03	2017-06-29 08:53:17 -0700	[diff] [blame]	706
Andy Lutomirski	e73ad5f	2017-05-22 15:30:03 -0700	[diff] [blame]	707	cpumask_clear(&batch->cpumask);
				708
				709	put_cpu();
				710	}
				711
Dave Hansen	2d040a1	2014-07-31 08:41:01 -0700	[diff] [blame]	712	static ssize_t tlbflush_read_file(struct file file, char __user user_buf,
				713	size_t count, loff_t *ppos)
				714	{
				715	char buf[32];
				716	unsigned int len;
				717
				718	len = sprintf(buf, "%ld\n", tlb_single_page_flush_ceiling);
				719	return simple_read_from_buffer(user_buf, count, ppos, buf, len);
				720	}
				721
				722	static ssize_t tlbflush_write_file(struct file *file,
				723	const char __user user_buf, size_t count, loff_t ppos)
				724	{
				725	char buf[32];
				726	ssize_t len;
				727	int ceiling;
				728
				729	len = min(count, sizeof(buf) - 1);
				730	if (copy_from_user(buf, user_buf, len))
				731	return -EFAULT;
				732
				733	buf[len] = '\0';
				734	if (kstrtoint(buf, 0, &ceiling))
				735	return -EINVAL;
				736
				737	if (ceiling < 0)
				738	return -EINVAL;
				739
				740	tlb_single_page_flush_ceiling = ceiling;
				741	return count;
				742	}
				743
				744	static const struct file_operations fops_tlbflush = {
				745	.read = tlbflush_read_file,
				746	.write = tlbflush_write_file,
				747	.llseek = default_llseek,
				748	};
				749
				750	static int __init create_tlb_single_page_flush_ceiling(void)
				751	{
				752	debugfs_create_file("tlb_single_page_flush_ceiling", S_IRUSR \| S_IWUSR,
				753	arch_debugfs_dir, NULL, &fops_tlbflush);
				754	return 0;
				755	}
				756	late_initcall(create_tlb_single_page_flush_ceiling);