[SPARC64]: Access TSB with physical addresses when possible.
This way we don't need to lock the TSB into the TLB.
The trick is that every TSB load/store is registered into
a special instruction patch section. The default uses
virtual addresses, and the patch instructions use physical
address load/stores.
We can't do this on all chips because only cheetah+ and later
have the physical variant of the atomic quad load.
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/arch/sparc64/kernel/dtlb_miss.S b/arch/sparc64/kernel/dtlb_miss.S
index d0f1565..2ef6f6e 100644
--- a/arch/sparc64/kernel/dtlb_miss.S
+++ b/arch/sparc64/kernel/dtlb_miss.S
@@ -4,7 +4,7 @@
srlx %g6, 48, %g5 ! Get context
brz,pn %g5, kvmap_dtlb ! Context 0 processing
nop ! Delay slot (fill me)
- ldda [%g1] ASI_NUCLEUS_QUAD_LDD, %g4 ! Load TSB entry
+ TSB_LOAD_QUAD(%g1, %g4) ! Load TSB entry
nop ! Push branch to next I$ line
cmp %g4, %g6 ! Compare TAG
diff --git a/arch/sparc64/kernel/itlb_miss.S b/arch/sparc64/kernel/itlb_miss.S
index 6b6c8fe..97facce 100644
--- a/arch/sparc64/kernel/itlb_miss.S
+++ b/arch/sparc64/kernel/itlb_miss.S
@@ -4,7 +4,7 @@
srlx %g6, 48, %g5 ! Get context
brz,pn %g5, kvmap_itlb ! Context 0 processing
nop ! Delay slot (fill me)
- ldda [%g1] ASI_NUCLEUS_QUAD_LDD, %g4 ! Load TSB entry
+ TSB_LOAD_QUAD(%g1, %g4) ! Load TSB entry
cmp %g4, %g6 ! Compare TAG
sethi %hi(_PAGE_EXEC), %g4 ! Setup exec check
diff --git a/arch/sparc64/kernel/ktlb.S b/arch/sparc64/kernel/ktlb.S
index 2b5e71b..9b415ab 100644
--- a/arch/sparc64/kernel/ktlb.S
+++ b/arch/sparc64/kernel/ktlb.S
@@ -44,14 +44,14 @@
kvmap_itlb_vmalloc_addr:
KERN_PGTABLE_WALK(%g4, %g5, %g2, kvmap_itlb_longpath)
- TSB_LOCK_TAG(%g1, %g2, %g4)
+ KTSB_LOCK_TAG(%g1, %g2, %g4)
/* Load and check PTE. */
ldxa [%g5] ASI_PHYS_USE_EC, %g5
brgez,a,pn %g5, kvmap_itlb_longpath
- stx %g0, [%g1]
+ KTSB_STORE(%g1, %g0)
- TSB_WRITE(%g1, %g5, %g6)
+ KTSB_WRITE(%g1, %g5, %g6)
/* fallthrough to TLB load */
@@ -69,9 +69,9 @@
kvmap_itlb_obp:
OBP_TRANS_LOOKUP(%g4, %g5, %g2, %g3, kvmap_itlb_longpath)
- TSB_LOCK_TAG(%g1, %g2, %g4)
+ KTSB_LOCK_TAG(%g1, %g2, %g4)
- TSB_WRITE(%g1, %g5, %g6)
+ KTSB_WRITE(%g1, %g5, %g6)
ba,pt %xcc, kvmap_itlb_load
nop
@@ -79,9 +79,9 @@
kvmap_dtlb_obp:
OBP_TRANS_LOOKUP(%g4, %g5, %g2, %g3, kvmap_dtlb_longpath)
- TSB_LOCK_TAG(%g1, %g2, %g4)
+ KTSB_LOCK_TAG(%g1, %g2, %g4)
- TSB_WRITE(%g1, %g5, %g6)
+ KTSB_WRITE(%g1, %g5, %g6)
ba,pt %xcc, kvmap_dtlb_load
nop
@@ -114,14 +114,14 @@
kvmap_dtlb_vmalloc_addr:
KERN_PGTABLE_WALK(%g4, %g5, %g2, kvmap_dtlb_longpath)
- TSB_LOCK_TAG(%g1, %g2, %g4)
+ KTSB_LOCK_TAG(%g1, %g2, %g4)
/* Load and check PTE. */
ldxa [%g5] ASI_PHYS_USE_EC, %g5
brgez,a,pn %g5, kvmap_dtlb_longpath
- stx %g0, [%g1]
+ KTSB_STORE(%g1, %g0)
- TSB_WRITE(%g1, %g5, %g6)
+ KTSB_WRITE(%g1, %g5, %g6)
/* fallthrough to TLB load */
diff --git a/arch/sparc64/kernel/tsb.S b/arch/sparc64/kernel/tsb.S
index e1dd37f..ff6a79b 100644
--- a/arch/sparc64/kernel/tsb.S
+++ b/arch/sparc64/kernel/tsb.S
@@ -53,7 +53,7 @@
/* Load and check PTE. */
ldxa [%g5] ASI_PHYS_USE_EC, %g5
brgez,a,pn %g5, tsb_do_fault
- stx %g0, [%g1]
+ TSB_STORE(%g1, %g0)
/* If it is larger than the base page size, don't
* bother putting it into the TSB.
@@ -64,7 +64,7 @@
and %g2, %g4, %g2
cmp %g2, %g7
bne,a,pn %xcc, tsb_tlb_reload
- stx %g0, [%g1]
+ TSB_STORE(%g1, %g0)
TSB_WRITE(%g1, %g5, %g6)
@@ -131,13 +131,13 @@
/* Insert an entry into the TSB.
*
- * %o0: TSB entry pointer
+ * %o0: TSB entry pointer (virt or phys address)
* %o1: tag
* %o2: pte
*/
.align 32
- .globl tsb_insert
-tsb_insert:
+ .globl __tsb_insert
+__tsb_insert:
rdpr %pstate, %o5
wrpr %o5, PSTATE_IE, %pstate
TSB_LOCK_TAG(%o0, %g2, %g3)
@@ -146,6 +146,31 @@
retl
nop
+ /* Flush the given TSB entry if it has the matching
+ * tag.
+ *
+ * %o0: TSB entry pointer (virt or phys address)
+ * %o1: tag
+ */
+ .align 32
+ .globl tsb_flush
+tsb_flush:
+ sethi %hi(TSB_TAG_LOCK_HIGH), %g2
+1: TSB_LOAD_TAG(%o0, %g1)
+ srlx %g1, 32, %o3
+ andcc %o3, %g2, %g0
+ bne,pn %icc, 1b
+ membar #LoadLoad
+ cmp %g1, %o1
+ bne,pt %xcc, 2f
+ clr %o3
+ TSB_CAS_TAG(%o0, %g1, %o3)
+ cmp %g1, %o3
+ bne,pn %xcc, 1b
+ nop
+2: retl
+ TSB_MEMBAR
+
/* Reload MMU related context switch state at
* schedule() time.
*
diff --git a/arch/sparc64/kernel/vmlinux.lds.S b/arch/sparc64/kernel/vmlinux.lds.S
index 467d13a..71b943f 100644
--- a/arch/sparc64/kernel/vmlinux.lds.S
+++ b/arch/sparc64/kernel/vmlinux.lds.S
@@ -70,6 +70,10 @@
.con_initcall.init : { *(.con_initcall.init) }
__con_initcall_end = .;
SECURITY_INIT
+ . = ALIGN(4);
+ __tsb_phys_patch = .;
+ .tsb_phys_patch : { *(.tsb_phys_patch) }
+ __tsb_phys_patch_end = .;
. = ALIGN(8192);
__initramfs_start = .;
.init.ramfs : { *(.init.ramfs) }
diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c
index 2c21d85..4893f3e 100644
--- a/arch/sparc64/mm/init.c
+++ b/arch/sparc64/mm/init.c
@@ -39,6 +39,7 @@
#include <asm/tlb.h>
#include <asm/spitfire.h>
#include <asm/sections.h>
+#include <asm/tsb.h>
extern void device_scan(void);
@@ -244,6 +245,16 @@
: "g1", "g7");
}
+static inline void tsb_insert(struct tsb *ent, unsigned long tag, unsigned long pte)
+{
+ unsigned long tsb_addr = (unsigned long) ent;
+
+ if (tlb_type == cheetah_plus)
+ tsb_addr = __pa(tsb_addr);
+
+ __tsb_insert(tsb_addr, tag, pte);
+}
+
void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t pte)
{
struct mm_struct *mm;
@@ -1040,6 +1051,24 @@
return ~0UL;
}
+static void __init tsb_phys_patch(void)
+{
+ struct tsb_phys_patch_entry *p;
+
+ p = &__tsb_phys_patch;
+ while (p < &__tsb_phys_patch_end) {
+ unsigned long addr = p->addr;
+
+ *(unsigned int *) addr = p->insn;
+ wmb();
+ __asm__ __volatile__("flush %0"
+ : /* no outputs */
+ : "r" (addr));
+
+ p++;
+ }
+}
+
/* paging_init() sets up the page tables */
extern void cheetah_ecache_flush_init(void);
@@ -1052,6 +1081,9 @@
unsigned long end_pfn, pages_avail, shift;
unsigned long real_end, i;
+ if (tlb_type == cheetah_plus)
+ tsb_phys_patch();
+
/* Find available physical memory... */
read_obp_memory("available", &pavail[0], &pavail_ents);
diff --git a/arch/sparc64/mm/tsb.c b/arch/sparc64/mm/tsb.c
index 1c4e5c2..787533f 100644
--- a/arch/sparc64/mm/tsb.c
+++ b/arch/sparc64/mm/tsb.c
@@ -20,12 +20,9 @@
return vaddr & (nentries - 1);
}
-static inline int tag_compare(struct tsb *entry, unsigned long vaddr, unsigned long context)
+static inline int tag_compare(unsigned long tag, unsigned long vaddr, unsigned long context)
{
- if (context == ~0UL)
- return 1;
-
- return (entry->tag == ((vaddr >> 22) | (context << 48)));
+ return (tag == ((vaddr >> 22) | (context << 48)));
}
/* TSB flushes need only occur on the processor initiating the address
@@ -41,7 +38,7 @@
unsigned long hash = tsb_hash(v, KERNEL_TSB_NENTRIES);
struct tsb *ent = &swapper_tsb[hash];
- if (tag_compare(ent, v, 0)) {
+ if (tag_compare(ent->tag, v, 0)) {
ent->tag = 0UL;
membar_storeload_storestore();
}
@@ -52,24 +49,31 @@
{
struct mm_struct *mm = mp->mm;
struct tsb *tsb = mm->context.tsb;
- unsigned long ctx = ~0UL;
unsigned long nentries = mm->context.tsb_nentries;
+ unsigned long ctx, base;
int i;
- if (CTX_VALID(mm->context))
- ctx = CTX_HWBITS(mm->context);
+ if (unlikely(!CTX_VALID(mm->context)))
+ return;
+ ctx = CTX_HWBITS(mm->context);
+
+ if (tlb_type == cheetah_plus)
+ base = __pa(tsb);
+ else
+ base = (unsigned long) tsb;
+
for (i = 0; i < mp->tlb_nr; i++) {
unsigned long v = mp->vaddrs[i];
- struct tsb *ent;
+ unsigned long tag, ent, hash;
v &= ~0x1UL;
- ent = &tsb[tsb_hash(v, nentries)];
- if (tag_compare(ent, v, ctx)) {
- ent->tag = 0UL;
- membar_storeload_storestore();
- }
+ hash = tsb_hash(v, nentries);
+ ent = base + (hash * sizeof(struct tsb));
+ tag = (v >> 22UL) | (ctx << 48UL);
+
+ tsb_flush(ent, tag);
}
}
@@ -84,6 +88,7 @@
tte = (_PAGE_VALID | _PAGE_L | _PAGE_CP |
_PAGE_CV | _PAGE_P | _PAGE_W);
tsb_paddr = __pa(mm->context.tsb);
+ BUG_ON(tsb_paddr & (tsb_bytes - 1UL));
/* Use the smallest page size that can map the whole TSB
* in one TLB entry.
@@ -144,13 +149,23 @@
BUG();
};
- tsb_reg |= base;
- tsb_reg |= (tsb_paddr & (page_sz - 1UL));
- tte |= (tsb_paddr & ~(page_sz - 1UL));
+ if (tlb_type == cheetah_plus) {
+ /* Physical mapping, no locked TLB entry for TSB. */
+ tsb_reg |= tsb_paddr;
- mm->context.tsb_reg_val = tsb_reg;
- mm->context.tsb_map_vaddr = base;
- mm->context.tsb_map_pte = tte;
+ mm->context.tsb_reg_val = tsb_reg;
+ mm->context.tsb_map_vaddr = 0;
+ mm->context.tsb_map_pte = 0;
+ } else {
+ tsb_reg |= base;
+ tsb_reg |= (tsb_paddr & (page_sz - 1UL));
+ tte |= (tsb_paddr & ~(page_sz - 1UL));
+
+ mm->context.tsb_reg_val = tsb_reg;
+ mm->context.tsb_map_vaddr = base;
+ mm->context.tsb_map_pte = tte;
+ }
+
}
/* The page tables are locked against modifications while this
@@ -168,13 +183,21 @@
for (i = 0; i < old_nentries; i++) {
register unsigned long tag asm("o4");
register unsigned long pte asm("o5");
- unsigned long v;
- unsigned int hash;
+ unsigned long v, hash;
- __asm__ __volatile__(
- "ldda [%2] %3, %0"
- : "=r" (tag), "=r" (pte)
- : "r" (&old_tsb[i]), "i" (ASI_NUCLEUS_QUAD_LDD));
+ if (tlb_type == cheetah_plus) {
+ __asm__ __volatile__(
+ "ldda [%2] %3, %0"
+ : "=r" (tag), "=r" (pte)
+ : "r" (__pa(&old_tsb[i])),
+ "i" (ASI_QUAD_LDD_PHYS));
+ } else {
+ __asm__ __volatile__(
+ "ldda [%2] %3, %0"
+ : "=r" (tag), "=r" (pte)
+ : "r" (&old_tsb[i]),
+ "i" (ASI_NUCLEUS_QUAD_LDD));
+ }
if (!tag || (tag & (1UL << TSB_TAG_LOCK_BIT)))
continue;
@@ -198,8 +221,20 @@
v |= (i & (512UL - 1UL)) << 13UL;
hash = tsb_hash(v, new_nentries);
- new_tsb[hash].tag = tag;
- new_tsb[hash].pte = pte;
+ if (tlb_type == cheetah_plus) {
+ __asm__ __volatile__(
+ "stxa %0, [%1] %2\n\t"
+ "stxa %3, [%4] %2"
+ : /* no outputs */
+ : "r" (tag),
+ "r" (__pa(&new_tsb[hash].tag)),
+ "i" (ASI_PHYS_USE_EC),
+ "r" (pte),
+ "r" (__pa(&new_tsb[hash].pte)));
+ } else {
+ new_tsb[hash].tag = tag;
+ new_tsb[hash].pte = pte;
+ }
}
}