Blame - arch/x86/mm/tlb.c - SHIFTPHONES/kernel/shift/mainline

blob: 49358481c733235918cde7c576a3332fba50c364 [file] [log] [blame]

Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	1	#include <linux/init.h>
				2
				3	#include <linux/mm.h>
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	4	#include <linux/spinlock.h>
				5	#include <linux/smp.h>
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	6	#include <linux/interrupt.h>
Tejun Heo	6dd01be	2009-01-21 17:26:06 +0900	[diff] [blame]	7	#include <linux/module.h>
Shaohua Li	9329672	2010-10-20 11:07:03 +0800	[diff] [blame]	8	#include <linux/cpu.h>
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	9
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	10	#include <asm/tlbflush.h>
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	11	#include <asm/mmu_context.h>
Jan Beulich	350f8f5	2009-11-13 11:54:40 +0000	[diff] [blame]	12	#include <asm/cache.h>
Tejun Heo	6dd01be	2009-01-21 17:26:06 +0900	[diff] [blame]	13	#include <asm/apic.h>
Tejun Heo	bdbcdd4	2009-01-21 17:26:06 +0900	[diff] [blame]	14	#include <asm/uv/uv.h>
Glauber Costa	5af5573	2008-03-25 13:28:56 -0300	[diff] [blame]	15
Brian Gerst	9eb912d	2009-01-19 00:38:57 +0900	[diff] [blame]	16	DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate)
				17	= { &init_mm, 0, };
				18
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	19	/*
				20	* Smarter SMP flushing macros.
				21	* c/o Linus Torvalds.
				22	*
				23	* These mean you can really definitely utterly forget about
				24	* writing to user space from interrupts. (Its not allowed anyway).
				25	*
				26	* Optimizations Manfred Spraul <manfred@colorfullife.com>
				27	*
				28	* More scalable flush, from Andi Kleen
				29	*
				30	* To avoid global state use 8 different call vectors.
				31	* Each CPU uses a specific vector to trigger flushes on other
				32	* CPUs. Depending on the received vector the target CPUs look into
Frederik Deweerdt	09b3ec7	2009-01-12 22:35:42 +0100	[diff] [blame]	33	* the right array slot for the flush data.
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	34	*
				35	* With more than 8 CPUs they are hashed to the 8 available
				36	* vectors. The limited global vector space forces us to this right now.
				37	* In future when interrupts are split into per CPU domains this could be
				38	* fixed, at the cost of triggering multiple IPIs in some cases.
				39	*/
				40
				41	union smp_flush_state {
				42	struct {
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	43	struct mm_struct *flush_mm;
				44	unsigned long flush_va;
Thomas Gleixner	39c662f	2009-07-25 19:15:48 +0200	[diff] [blame]	45	raw_spinlock_t tlbstate_lock;
Rusty Russell	4595f96	2009-01-10 21:58:09 -0800	[diff] [blame]	46	DECLARE_BITMAP(flush_cpumask, NR_CPUS);
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	47	};
Jan Beulich	350f8f5	2009-11-13 11:54:40 +0000	[diff] [blame]	48	char pad[INTERNODE_CACHE_BYTES];
Frederik Deweerdt	09b3ec7	2009-01-12 22:35:42 +0100	[diff] [blame]	49	} ____cacheline_internodealigned_in_smp;
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	50
				51	/* State is put into the per CPU data section, but padded
				52	to a full cache line because other CPUs can access it and we don't
				53	want false sharing in the per cpu data segment. */
Frederik Deweerdt	09b3ec7	2009-01-12 22:35:42 +0100	[diff] [blame]	54	static union smp_flush_state flush_state[NUM_INVALIDATE_TLB_VECTORS];
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	55
Shaohua Li	9329672	2010-10-20 11:07:03 +0800	[diff] [blame]	56	static DEFINE_PER_CPU_READ_MOSTLY(int, tlb_vector_offset);
				57
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	58	/*
				59	* We cannot call mmdrop() because we are in interrupt context,
				60	* instead update mm->cpu_vm_mask.
				61	*/
				62	void leave_mm(int cpu)
				63	{
Brian Gerst	9eb912d	2009-01-19 00:38:57 +0900	[diff] [blame]	64	if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	65	BUG();
Rusty Russell	78f1c4d	2009-09-24 09:34:51 -0600	[diff] [blame]	66	cpumask_clear_cpu(cpu,
				67	mm_cpumask(percpu_read(cpu_tlbstate.active_mm)));
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	68	load_cr3(swapper_pg_dir);
				69	}
				70	EXPORT_SYMBOL_GPL(leave_mm);
				71
				72	/*
				73	*
				74	* The flush IPI assumes that a thread switch happens in this order:
				75	* [cpu0: the cpu that switches]
				76	* 1) switch_mm() either 1a) or 1b)
				77	* 1a) thread switch to a different mm
				78	* 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask);
				79	* Stop ipi delivery for the old mm. This is not synchronized with
				80	* the other cpus, but smp_invalidate_interrupt ignore flush ipis
				81	* for the wrong mm, and in the worst case we perform a superfluous
				82	* tlb flush.
				83	* 1a2) set cpu mmu_state to TLBSTATE_OK
				84	* Now the smp_invalidate_interrupt won't call leave_mm if cpu0
				85	* was in lazy tlb mode.
				86	* 1a3) update cpu active_mm
				87	* Now cpu0 accepts tlb flushes for the new mm.
				88	* 1a4) cpu_set(cpu, new_mm->cpu_vm_mask);
				89	* Now the other cpus will send tlb flush ipis.
				90	* 1a4) change cr3.
				91	* 1b) thread switch without mm change
				92	* cpu active_mm is correct, cpu0 already handles
				93	* flush ipis.
				94	* 1b1) set cpu mmu_state to TLBSTATE_OK
				95	* 1b2) test_and_set the cpu bit in cpu_vm_mask.
				96	* Atomically set the bit [other cpus will start sending flush ipis],
				97	* and test the bit.
				98	* 1b3) if the bit was 0: leave_mm was called, flush the tlb.
				99	* 2) switch %%esp, ie current
				100	*
				101	* The interrupt must handle 2 special cases:
				102	* - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm.
				103	* - the cpu performs speculative tlb reads, i.e. even if the cpu only
				104	* runs in kernel space, the cpu could load tlb entries for user space
				105	* pages.
				106	*
				107	* The good news is that cpu mmu_state is local to each cpu, no
				108	* write/read ordering problems.
				109	*/
				110
				111	/*
				112	* TLB flush IPI:
				113	*
				114	* 1) Flush the tlb entries if the cpu uses the mm that's being flushed.
				115	* 2) Leave the mm if we are in the lazy tlb mode.
				116	*
				117	* Interrupts are disabled.
				118	*/
				119
Tejun Heo	02cf94c	2009-01-21 17:26:06 +0900	[diff] [blame]	120	/*
				121	* FIXME: use of asmlinkage is not consistent. On x86_64 it's noop
				122	* but still used for documentation purpose but the usage is slightly
				123	* inconsistent. On x86_32, asmlinkage is regparm(0) but interrupt
				124	* entry calls in with the first parameter in %eax. Maybe define
				125	* intrlinkage?
				126	*/
				127	#ifdef CONFIG_X86_64
				128	asmlinkage
				129	#endif
				130	void smp_invalidate_interrupt(struct pt_regs *regs)
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	131	{
Tejun Heo	6dd01be	2009-01-21 17:26:06 +0900	[diff] [blame]	132	unsigned int cpu;
				133	unsigned int sender;
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	134	union smp_flush_state *f;
				135
				136	cpu = smp_processor_id();
				137	/*
				138	* orig_rax contains the negated interrupt vector.
				139	* Use that to determine where the sender put the data.
				140	*/
				141	sender = ~regs->orig_ax - INVALIDATE_TLB_VECTOR_START;
Frederik Deweerdt	09b3ec7	2009-01-12 22:35:42 +0100	[diff] [blame]	142	f = &flush_state[sender];
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	143
Rusty Russell	4595f96	2009-01-10 21:58:09 -0800	[diff] [blame]	144	if (!cpumask_test_cpu(cpu, to_cpumask(f->flush_cpumask)))
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	145	goto out;
				146	/*
				147	* This was a BUG() but until someone can quote me the
				148	* line from the intel manual that guarantees an IPI to
				149	* multiple CPUs is retried _only_ on the erroring CPUs
				150	* its staying as a return
				151	*
				152	* BUG();
				153	*/
				154
Brian Gerst	9eb912d	2009-01-19 00:38:57 +0900	[diff] [blame]	155	if (f->flush_mm == percpu_read(cpu_tlbstate.active_mm)) {
				156	if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) {
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	157	if (f->flush_va == TLB_FLUSH_ALL)
				158	local_flush_tlb();
				159	else
				160	__flush_tlb_one(f->flush_va);
				161	} else
				162	leave_mm(cpu);
				163	}
				164	out:
				165	ack_APIC_irq();
Tejun Heo	6dd01be	2009-01-21 17:26:06 +0900	[diff] [blame]	166	smp_mb__before_clear_bit();
Rusty Russell	4595f96	2009-01-10 21:58:09 -0800	[diff] [blame]	167	cpumask_clear_cpu(cpu, to_cpumask(f->flush_cpumask));
Tejun Heo	6dd01be	2009-01-21 17:26:06 +0900	[diff] [blame]	168	smp_mb__after_clear_bit();
Hiroshi Shimamoto	8ae9366	2008-12-12 15:52:26 -0800	[diff] [blame]	169	inc_irq_stat(irq_tlb_count);
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	170	}
				171
Rusty Russell	4595f96	2009-01-10 21:58:09 -0800	[diff] [blame]	172	static void flush_tlb_others_ipi(const struct cpumask *cpumask,
				173	struct mm_struct *mm, unsigned long va)
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	174	{
Tejun Heo	6dd01be	2009-01-21 17:26:06 +0900	[diff] [blame]	175	unsigned int sender;
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	176	union smp_flush_state *f;
Cliff Wickman	1812924	2008-06-02 08:56:14 -0500	[diff] [blame]	177
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	178	/* Caller has disabled preemption */
Shaohua Li	9329672	2010-10-20 11:07:03 +0800	[diff] [blame]	179	sender = this_cpu_read(tlb_vector_offset);
Frederik Deweerdt	09b3ec7	2009-01-12 22:35:42 +0100	[diff] [blame]	180	f = &flush_state[sender];
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	181
				182	/*
				183	* Could avoid this lock when
				184	* num_online_cpus() <= NUM_INVALIDATE_TLB_VECTORS, but it is
				185	* probably not worth checking this for a cache-hot lock.
				186	*/
Thomas Gleixner	39c662f	2009-07-25 19:15:48 +0200	[diff] [blame]	187	raw_spin_lock(&f->tlbstate_lock);
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	188
				189	f->flush_mm = mm;
				190	f->flush_va = va;
Linus Torvalds	b04e637	2009-08-21 09:48:10 -0700	[diff] [blame]	191	if (cpumask_andnot(to_cpumask(f->flush_cpumask), cpumask, cpumask_of(smp_processor_id()))) {
				192	/*
				193	* We have to send the IPI only to
				194	* CPUs affected.
				195	*/
				196	apic->send_IPI_mask(to_cpumask(f->flush_cpumask),
				197	INVALIDATE_TLB_VECTOR_START + sender);
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	198
Linus Torvalds	b04e637	2009-08-21 09:48:10 -0700	[diff] [blame]	199	while (!cpumask_empty(to_cpumask(f->flush_cpumask)))
				200	cpu_relax();
				201	}
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	202
				203	f->flush_mm = NULL;
				204	f->flush_va = 0;
Thomas Gleixner	39c662f	2009-07-25 19:15:48 +0200	[diff] [blame]	205	raw_spin_unlock(&f->tlbstate_lock);
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	206	}
				207
Rusty Russell	4595f96	2009-01-10 21:58:09 -0800	[diff] [blame]	208	void native_flush_tlb_others(const struct cpumask *cpumask,
				209	struct mm_struct *mm, unsigned long va)
				210	{
				211	if (is_uv_system()) {
Tejun Heo	bdbcdd4	2009-01-21 17:26:06 +0900	[diff] [blame]	212	unsigned int cpu;
Rusty Russell	4595f96	2009-01-10 21:58:09 -0800	[diff] [blame]	213
Tejun Heo	bdbcdd4	2009-01-21 17:26:06 +0900	[diff] [blame]	214	cpu = get_cpu();
				215	cpumask = uv_flush_tlb_others(cpumask, mm, va, cpu);
				216	if (cpumask)
				217	flush_tlb_others_ipi(cpumask, mm, va);
				218	put_cpu();
Mike Travis	0e21990	2009-01-10 21:58:10 -0800	[diff] [blame]	219	return;
Rusty Russell	4595f96	2009-01-10 21:58:09 -0800	[diff] [blame]	220	}
				221	flush_tlb_others_ipi(cpumask, mm, va);
				222	}
				223
Shaohua Li	9329672	2010-10-20 11:07:03 +0800	[diff] [blame]	224	static void __cpuinit calculate_tlb_offset(void)
				225	{
				226	int cpu, node, nr_node_vecs;
				227	/*
				228	* we are changing tlb_vector_offset for each CPU in runtime, but this
				229	* will not cause inconsistency, as the write is atomic under X86. we
				230	* might see more lock contentions in a short time, but after all CPU's
				231	* tlb_vector_offset are changed, everything should go normal
				232	*
				233	* Note: if NUM_INVALIDATE_TLB_VECTORS % nr_online_nodes !=0, we might
				234	* waste some vectors.
				235	**/
				236	if (nr_online_nodes > NUM_INVALIDATE_TLB_VECTORS)
				237	nr_node_vecs = 1;
				238	else
				239	nr_node_vecs = NUM_INVALIDATE_TLB_VECTORS/nr_online_nodes;
				240
				241	for_each_online_node(node) {
				242	int node_offset = (node % NUM_INVALIDATE_TLB_VECTORS) *
				243	nr_node_vecs;
				244	int cpu_offset = 0;
				245	for_each_cpu(cpu, cpumask_of_node(node)) {
				246	per_cpu(tlb_vector_offset, cpu) = node_offset +
				247	cpu_offset;
				248	cpu_offset++;
				249	cpu_offset = cpu_offset % nr_node_vecs;
				250	}
				251	}
				252	}
				253
				254	static int tlb_cpuhp_notify(struct notifier_block *n,
				255	unsigned long action, void *hcpu)
				256	{
				257	switch (action & 0xf) {
				258	case CPU_ONLINE:
				259	case CPU_DEAD:
				260	calculate_tlb_offset();
				261	}
				262	return NOTIFY_OK;
				263	}
				264
Ingo Molnar	a4928cf	2008-04-23 13:20:56 +0200	[diff] [blame]	265	static int __cpuinit init_smp_flush(void)
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	266	{
				267	int i;
				268
Frederik Deweerdt	09b3ec7	2009-01-12 22:35:42 +0100	[diff] [blame]	269	for (i = 0; i < ARRAY_SIZE(flush_state); i++)
Thomas Gleixner	39c662f	2009-07-25 19:15:48 +0200	[diff] [blame]	270	raw_spin_lock_init(&flush_state[i].tlbstate_lock);
Akinobu Mita	7c04e64	2008-04-19 23:55:17 +0900	[diff] [blame]	271
Shaohua Li	9329672	2010-10-20 11:07:03 +0800	[diff] [blame]	272	calculate_tlb_offset();
				273	hotcpu_notifier(tlb_cpuhp_notify, 0);
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	274	return 0;
				275	}
				276	core_initcall(init_smp_flush);
				277
				278	void flush_tlb_current_task(void)
				279	{
				280	struct mm_struct *mm = current->mm;
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	281
				282	preempt_disable();
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	283
				284	local_flush_tlb();
Rusty Russell	78f1c4d	2009-09-24 09:34:51 -0600	[diff] [blame]	285	if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
				286	flush_tlb_others(mm_cpumask(mm), mm, TLB_FLUSH_ALL);
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	287	preempt_enable();
				288	}
				289
				290	void flush_tlb_mm(struct mm_struct *mm)
				291	{
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	292	preempt_disable();
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	293
				294	if (current->active_mm == mm) {
				295	if (current->mm)
				296	local_flush_tlb();
				297	else
				298	leave_mm(smp_processor_id());
				299	}
Rusty Russell	78f1c4d	2009-09-24 09:34:51 -0600	[diff] [blame]	300	if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
				301	flush_tlb_others(mm_cpumask(mm), mm, TLB_FLUSH_ALL);
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	302
				303	preempt_enable();
				304	}
				305
				306	void flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
				307	{
				308	struct mm_struct *mm = vma->vm_mm;
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	309
				310	preempt_disable();
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	311
				312	if (current->active_mm == mm) {
				313	if (current->mm)
				314	__flush_tlb_one(va);
				315	else
				316	leave_mm(smp_processor_id());
				317	}
				318
Rusty Russell	78f1c4d	2009-09-24 09:34:51 -0600	[diff] [blame]	319	if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
				320	flush_tlb_others(mm_cpumask(mm), mm, va);
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	321
				322	preempt_enable();
				323	}
				324
				325	static void do_flush_tlb_all(void *info)
				326	{
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	327	__flush_tlb_all();
Brian Gerst	9eb912d	2009-01-19 00:38:57 +0900	[diff] [blame]	328	if (percpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY)
Borislav Petkov	3f8afb7	2010-07-21 14:47:05 +0200	[diff] [blame]	329	leave_mm(smp_processor_id());
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	330	}
				331
				332	void flush_tlb_all(void)
				333	{
Jens Axboe	15c8b6c	2008-05-09 09:39:44 +0200	[diff] [blame]	334	on_each_cpu(do_flush_tlb_all, NULL, 1);
Glauber Costa	c048fdf	2008-03-03 14:12:54 -0300	[diff] [blame]	335	}