sh: Revert TLB miss fast-path changes that broke PTEA parts.

This ended up causing problems for older parts (particularly ones
using PTEA). Revert this for now, it can be added back in once it's
had some more testing.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
diff --git a/arch/sh/kernel/cpu/sh3/entry.S b/arch/sh/kernel/cpu/sh3/entry.S
index 1c52035..c19205b 100644
--- a/arch/sh/kernel/cpu/sh3/entry.S
+++ b/arch/sh/kernel/cpu/sh3/entry.S
@@ -13,10 +13,8 @@
 #include <linux/linkage.h>
 #include <asm/asm-offsets.h>
 #include <asm/thread_info.h>
-#include <asm/unistd.h>
 #include <asm/cpu/mmu_context.h>
-#include <asm/pgtable.h>
-#include <asm/page.h>
+#include <asm/unistd.h>
 
 ! NOTE:
 ! GNU as (as of 2.9.1) changes bf/s into bt/s and bra, when the address
@@ -138,14 +136,29 @@
 
 call_dpf:
 	mov.l	1f, r0
- 	mov.l	@r0, r6		! address
+	mov	r5, r8
+	mov.l	@r0, r6
+	mov	r6, r9
+	mov.l	2f, r0
+	sts	pr, r10
+	jsr	@r0
+	 mov	r15, r4
+	!
+	tst	r0, r0
+	bf/s	0f
+	 lds	r10, pr
+	rts
+	 nop
+0:	sti
 	mov.l	3f, r0
-
+	mov	r9, r6
+	mov	r8, r5
 	jmp	@r0
- 	 mov	r15, r4		! regs
+	 mov	r15, r4
 
 	.align 2
 1:	.long	MMU_TEA
+2:	.long	__do_page_fault
 3:	.long	do_page_fault
 
 	.align	2
@@ -332,171 +345,9 @@
 !
 !
 
-/* gas doesn't flag impossible values for mov #immediate as an error */
-#if (_PAGE_PRESENT >> 2) > 0x7f
-#error cannot load PAGE_PRESENT as an immediate
-#endif
-#if _PAGE_DIRTY > 0x7f
-#error cannot load PAGE_DIRTY as an immediate
-#endif
-#if (_PAGE_PRESENT << 2) != _PAGE_ACCESSED
-#error cannot derive PAGE_ACCESSED from PAGE_PRESENT
-#endif
-
-#if defined(CONFIG_CPU_SH4)
-#define ldmmupteh(r)	mov.l	8f, r
-#else
-#define ldmmupteh(r)	mov	#MMU_PTEH, r
-#endif
-
 	.balign 	1024,0,1024
 tlb_miss:
-#ifdef COUNT_EXCEPTIONS
-	! Increment the counts
-	mov.l	9f, k1
-	mov.l	@k1, k2
-	add	#1, k2
-	mov.l	k2, @k1
-#endif
-
-	! k0 scratch
-	! k1 pgd and pte pointers
-	! k2 faulting address
-	! k3 pgd and pte index masks
-	! k4 shift
-
-	! Load up the pgd entry (k1)
-
-	ldmmupteh(k0)			!  9 LS (latency=2)	MMU_PTEH
-
-	mov.w	4f, k3			!  8 LS (latency=2)	(PTRS_PER_PGD-1) << 2
-	mov	#-(PGDIR_SHIFT-2), k4	!  6 EX
-
-	mov.l	@(MMU_TEA-MMU_PTEH,k0), k2	! 18 LS (latency=2)
-
-	mov.l	@(MMU_TTB-MMU_PTEH,k0), k1	! 18 LS (latency=2)
-
-	mov	k2, k0			!   5 MT (latency=0)
-	shld	k4, k0			!  99 EX
-
-	and	k3, k0			!  78 EX
-
-	mov.l	@(k0, k1), k1		!  21 LS (latency=2)
-	mov	#-(PAGE_SHIFT-2), k4	!   6 EX
-
-	! Load up the pte entry (k2)
-
-	mov	k2, k0			!   5 MT (latency=0)
-	shld	k4, k0			!  99 EX
-
-	tst	k1, k1			!  86 MT
-
-	bt	20f			! 110 BR
-
-	mov.w	3f, k3			!  8 LS (latency=2)	(PTRS_PER_PTE-1) << 2
-	and	k3, k0			!  78 EX
-	mov.w	5f, k4			!   8 LS (latency=2)	_PAGE_PRESENT
-
-	mov.l	@(k0, k1), k2		!  21 LS (latency=2)
-	add	k0, k1			!  49 EX
-
-#ifdef CONFIG_CPU_HAS_PTEA
-	! Test the entry for present and _PAGE_ACCESSED
-
-	mov	#-28, k3		!   6 EX
-	mov	k2, k0			!   5 MT (latency=0)
-
-	tst	k4, k2			!  68 MT
-	shld	k3, k0			!  99 EX
-
-	bt	20f			! 110 BR
-
-	! Set PTEA register
-	! MMU_PTEA = ((pteval >> 28) & 0xe) | (pteval & 0x1)
-	!
-	! k0=pte>>28, k1=pte*, k2=pte, k3=<unused>, k4=_PAGE_PRESENT
-
-	and	#0xe, k0		!  79 EX
-
-	mov	k0, k3			!   5 MT (latency=0)
-	mov	k2, k0			!   5 MT (latency=0)
-
-	and	#1, k0			!  79 EX
-
-	or	k0, k3			!  82 EX
-
-	ldmmupteh(k0)			!   9 LS (latency=2)
-	shll2	k4			! 101 EX		_PAGE_ACCESSED
-
-	tst	k4, k2			!  68 MT
-
-	mov.l	k3, @(MMU_PTEA-MMU_PTEH,k0)	! 27 LS
-
-	mov.l	7f, k3			!   9 LS (latency=2)	_PAGE_FLAGS_HARDWARE_MASK
-
-	! k0=MMU_PTEH, k1=pte*, k2=pte, k3=_PAGE_FLAGS_HARDWARE, k4=_PAGE_ACCESSED
-#else
-
-	! Test the entry for present and _PAGE_ACCESSED
-
-	mov.l	7f, k3			!   9 LS (latency=2)	_PAGE_FLAGS_HARDWARE_MASK
-	tst	k4, k2			!  68 MT
-
-	shll2	k4			! 101 EX		_PAGE_ACCESSED
-	ldmmupteh(k0)			!   9 LS (latency=2)
-
-	bt	20f			! 110 BR
-	tst	k4, k2			!  68 MT
-
-	! k0=MMU_PTEH, k1=pte*, k2=pte, k3=_PAGE_FLAGS_HARDWARE, k4=_PAGE_ACCESSED
-
-#endif
-
-	! Set up the entry
-
-	and	k2, k3			!  78 EX
-	bt/s	10f			! 108 BR
-
-	 mov.l	k3, @(MMU_PTEL-MMU_PTEH,k0)	! 27 LS
-
-	ldtlb				! 128 CO
-
-	! At least one instruction between ldtlb and rte
-	nop				! 119 NOP
-
-	rte				! 126 CO
-
-	 nop				! 119 NOP
-
-
-10:	or	k4, k2			!  82 EX
-
-	ldtlb				! 128 CO
-
-	! At least one instruction between ldtlb and rte
-	mov.l	k2, @k1			!  27 LS
-
-	rte				! 126 CO
-
-	! Note we cannot execute mov here, because it is executed after
-	! restoring SSR, so would be executed in user space.
-	 nop				! 119 NOP
-
-
-	.align 5
-	! Once cache line if possible...
-1:	.long	swapper_pg_dir
-3:	.short	(PTRS_PER_PTE-1) << 2
-4:	.short	(PTRS_PER_PGD-1) << 2
-5:	.long	_PAGE_PRESENT
-7:	.long	_PAGE_FLAGS_HARDWARE_MASK
-8:	.long	MMU_PTEH
-#ifdef COUNT_EXCEPTIONS
-9:	.long	exception_count_miss
-#endif
-
-	! Either pgd or pte not present
-20:	mov.l	1f, k2
+	mov.l	1f, k2
 	mov.l	4f, k3
 	bra	handle_exception
 	 mov.l	@k2, k2
@@ -647,15 +498,6 @@
 	bf	interrupt_exception
 	shlr2	r8
 	shlr	r8
-
-#ifdef COUNT_EXCEPTIONS
-	mov.l	5f, r9
-	add	r8, r9
-	mov.l	@r9, r10
-	add	#1, r10
-	mov.l	r10, @r9
-#endif
-
 	mov.l	4f, r9
 	add	r8, r9
 	mov.l	@r9, r9
@@ -669,9 +511,6 @@
 2:	.long	0x000080f0	! FD=1, IMASK=15
 3:	.long	0xcfffffff	! RB=0, BL=0
 4:	.long	exception_handling_table
-#ifdef COUNT_EXCEPTIONS
-5:	.long	exception_count_table
-#endif
 
 interrupt_exception:
 	mov.l	1f, r9
diff --git a/arch/sh/mm/fault.c b/arch/sh/mm/fault.c
index 716ebf5..fa5d7f0 100644
--- a/arch/sh/mm/fault.c
+++ b/arch/sh/mm/fault.c
@@ -17,6 +17,7 @@
 #include <linux/kprobes.h>
 #include <asm/system.h>
 #include <asm/mmu_context.h>
+#include <asm/tlbflush.h>
 #include <asm/kgdb.h>
 
 extern void die(const char *,struct pt_regs *,long);
@@ -224,3 +225,89 @@
 	if (!user_mode(regs))
 		goto no_context;
 }
+
+#ifdef CONFIG_SH_STORE_QUEUES
+/*
+ * This is a special case for the SH-4 store queues, as pages for this
+ * space still need to be faulted in before it's possible to flush the
+ * store queue cache for writeout to the remapped region.
+ */
+#define P3_ADDR_MAX		(P4SEG_STORE_QUE + 0x04000000)
+#else
+#define P3_ADDR_MAX		P4SEG
+#endif
+
+/*
+ * Called with interrupts disabled.
+ */
+asmlinkage int __kprobes __do_page_fault(struct pt_regs *regs,
+					 unsigned long writeaccess,
+					 unsigned long address)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
+	pte_t entry;
+	struct mm_struct *mm = current->mm;
+	spinlock_t *ptl;
+	int ret = 1;
+
+#ifdef CONFIG_SH_KGDB
+	if (kgdb_nofault && kgdb_bus_err_hook)
+		kgdb_bus_err_hook();
+#endif
+
+	/*
+	 * We don't take page faults for P1, P2, and parts of P4, these
+	 * are always mapped, whether it be due to legacy behaviour in
+	 * 29-bit mode, or due to PMB configuration in 32-bit mode.
+	 */
+	if (address >= P3SEG && address < P3_ADDR_MAX) {
+		pgd = pgd_offset_k(address);
+		mm = NULL;
+	} else {
+		if (unlikely(address >= TASK_SIZE || !mm))
+			return 1;
+
+		pgd = pgd_offset(mm, address);
+	}
+
+	pud = pud_offset(pgd, address);
+	if (pud_none_or_clear_bad(pud))
+		return 1;
+	pmd = pmd_offset(pud, address);
+	if (pmd_none_or_clear_bad(pmd))
+		return 1;
+
+	if (mm)
+		pte = pte_offset_map_lock(mm, pmd, address, &ptl);
+	else
+		pte = pte_offset_kernel(pmd, address);
+
+	entry = *pte;
+	if (unlikely(pte_none(entry) || pte_not_present(entry)))
+		goto unlock;
+	if (unlikely(writeaccess && !pte_write(entry)))
+		goto unlock;
+
+	if (writeaccess)
+		entry = pte_mkdirty(entry);
+	entry = pte_mkyoung(entry);
+
+#ifdef CONFIG_CPU_SH4
+	/*
+	 * ITLB is not affected by "ldtlb" instruction.
+	 * So, we need to flush the entry by ourselves.
+	 */
+	local_flush_tlb_one(get_asid(), address & PAGE_MASK);
+#endif
+
+	set_pte(pte, entry);
+	update_mmu_cache(NULL, address, entry);
+	ret = 0;
+unlock:
+	if (mm)
+		pte_unmap_unlock(pte, ptl);
+	return ret;
+}