MIPS: mm: Use the Hardware Page Table Walker if the core supports it

The Hardware Page Table Walker aims to speed up TLB refill exceptions
by handling them in the hardware level instead of having a software
TLB refill handler. However, a TLB refill exception can still be
thrown in certain cases such as, synchronus exceptions, or address
translation or memory errors during the HTW operation. As a result of
which, HTW must not be considered a complete replacement for the TLB
refill software handler, but rather a fast-path for it.
For HTW to work, the PWBase register must contain the task's page
global directory address so the HTW will kick in on TLB refill
exceptions.

Due to HTW being a separate engine embedded deep in the CPU pipeline,
we need to restart the HTW everytime a PTE changes to avoid HTW
fetching a old entry from the page tables. It's also necessary to
restart the HTW on context switches to prevent it from fetching a
page from the previous process. Finally, since HTW is using the
entryhi register to write the translations to the TLB, it's necessary
to stop the HTW whenever the entryhi changes (eg for tlb probe
perations) and enable it back afterwards.

== Performance ==

The following trivial test was used to measure the performance of the
HTW. Using the same root filesystem, the following command was used
to measure the number of tlb refill handler executions with and
without (using 'nohtw' kernel parameter) HTW support.  The kernel was
modified to use a scratch register as a counter for the TLB refill
exceptions.

find /usr -type f -exec ls -lh {} \;

HTW Enabled:
TLB refill exceptions: 12306

HTW Disabled:
TLB refill exceptions: 17805

Signed-off-by: Markos Chandras <markos.chandras@imgtec.com>
Cc: linux-mips@linux-mips.org
Cc: Markos Chandras <markos.chandras@imgtec.com>
Patchwork: https://patchwork.linux-mips.org/patch/7336/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
diff --git a/arch/mips/mm/tlb-r4k.c b/arch/mips/mm/tlb-r4k.c
index 92c9efdb..fa6ebd4 100644
--- a/arch/mips/mm/tlb-r4k.c
+++ b/arch/mips/mm/tlb-r4k.c
@@ -57,6 +57,7 @@
 	local_irq_save(flags);
 	/* Save old context and create impossible VPN2 value */
 	old_ctx = read_c0_entryhi();
+	htw_stop();
 	write_c0_entrylo0(0);
 	write_c0_entrylo1(0);
 
@@ -90,6 +91,7 @@
 	}
 	tlbw_use_hazard();
 	write_c0_entryhi(old_ctx);
+	htw_start();
 	flush_itlb();
 	local_irq_restore(flags);
 }
@@ -131,6 +133,7 @@
 			int oldpid = read_c0_entryhi();
 			int newpid = cpu_asid(cpu, mm);
 
+			htw_stop();
 			while (start < end) {
 				int idx;
 
@@ -151,6 +154,7 @@
 			}
 			tlbw_use_hazard();
 			write_c0_entryhi(oldpid);
+			htw_start();
 		} else {
 			drop_mmu_context(mm, cpu);
 		}
@@ -174,6 +178,7 @@
 		start &= (PAGE_MASK << 1);
 		end += ((PAGE_SIZE << 1) - 1);
 		end &= (PAGE_MASK << 1);
+		htw_stop();
 
 		while (start < end) {
 			int idx;
@@ -195,6 +200,7 @@
 		}
 		tlbw_use_hazard();
 		write_c0_entryhi(pid);
+		htw_start();
 	} else {
 		local_flush_tlb_all();
 	}
@@ -214,6 +220,7 @@
 		page &= (PAGE_MASK << 1);
 		local_irq_save(flags);
 		oldpid = read_c0_entryhi();
+		htw_stop();
 		write_c0_entryhi(page | newpid);
 		mtc0_tlbw_hazard();
 		tlb_probe();
@@ -231,6 +238,7 @@
 
 	finish:
 		write_c0_entryhi(oldpid);
+		htw_start();
 		flush_itlb_vm(vma);
 		local_irq_restore(flags);
 	}
@@ -247,6 +255,7 @@
 
 	local_irq_save(flags);
 	oldpid = read_c0_entryhi();
+	htw_stop();
 	page &= (PAGE_MASK << 1);
 	write_c0_entryhi(page);
 	mtc0_tlbw_hazard();
@@ -263,6 +272,7 @@
 		tlbw_use_hazard();
 	}
 	write_c0_entryhi(oldpid);
+	htw_start();
 	flush_itlb();
 	local_irq_restore(flags);
 }
@@ -351,6 +361,7 @@
 	local_irq_save(flags);
 	/* Save old context and create impossible VPN2 value */
 	old_ctx = read_c0_entryhi();
+	htw_stop();
 	old_pagemask = read_c0_pagemask();
 	wired = read_c0_wired();
 	write_c0_wired(wired + 1);
@@ -366,6 +377,7 @@
 
 	write_c0_entryhi(old_ctx);
 	tlbw_use_hazard();	/* What is the hazard here? */
+	htw_start();
 	write_c0_pagemask(old_pagemask);
 	local_flush_tlb_all();
 	local_irq_restore(flags);