blob: 819e0f9e8dbaf7b1448f748a9bec40ed020643cd [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
Paul Mundta23ba432007-11-28 20:19:38 +09002 * arch/sh/mm/cache-sh5.c
Linus Torvalds1da177e2005-04-16 15:20:36 -07003 *
Paul Mundt38350e02008-02-13 20:14:10 +09004 * Copyright (C) 2000, 2001 Paolo Alberelli
5 * Copyright (C) 2002 Benedict Gaster
6 * Copyright (C) 2003 Richard Curnow
7 * Copyright (C) 2003 - 2008 Paul Mundt
Paul Mundta23ba432007-11-28 20:19:38 +09008 *
9 * This file is subject to the terms and conditions of the GNU General Public
10 * License. See the file "COPYING" in the main directory of this archive
11 * for more details.
Linus Torvalds1da177e2005-04-16 15:20:36 -070012 */
Linus Torvalds1da177e2005-04-16 15:20:36 -070013#include <linux/init.h>
14#include <linux/mman.h>
15#include <linux/mm.h>
Paul Mundt38350e02008-02-13 20:14:10 +090016#include <asm/tlb.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070017#include <asm/processor.h>
18#include <asm/cache.h>
Paul Mundt38350e02008-02-13 20:14:10 +090019#include <asm/pgalloc.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070020#include <asm/uaccess.h>
21#include <asm/mmu_context.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070022
Paul Mundt37443ef2009-08-15 12:29:49 +090023extern void __weak sh4__flush_region_init(void);
24
Linus Torvalds1da177e2005-04-16 15:20:36 -070025/* Wired TLB entry for the D-cache */
26static unsigned long long dtlb_cache_slot;
27
Linus Torvalds1da177e2005-04-16 15:20:36 -070028#ifdef CONFIG_DCACHE_DISABLED
29#define sh64_dcache_purge_all() do { } while (0)
30#define sh64_dcache_purge_coloured_phy_page(paddr, eaddr) do { } while (0)
31#define sh64_dcache_purge_user_range(mm, start, end) do { } while (0)
32#define sh64_dcache_purge_phy_page(paddr) do { } while (0)
33#define sh64_dcache_purge_virt_page(mm, eaddr) do { } while (0)
Linus Torvalds1da177e2005-04-16 15:20:36 -070034#endif
35
Paul Mundt38350e02008-02-13 20:14:10 +090036/*
37 * The following group of functions deal with mapping and unmapping a
38 * temporary page into a DTLB slot that has been set aside for exclusive
39 * use.
40 */
41static inline void
42sh64_setup_dtlb_cache_slot(unsigned long eaddr, unsigned long asid,
43 unsigned long paddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -070044{
Paul Mundt38350e02008-02-13 20:14:10 +090045 local_irq_disable();
Linus Torvalds1da177e2005-04-16 15:20:36 -070046 sh64_setup_tlb_slot(dtlb_cache_slot, eaddr, asid, paddr);
47}
48
49static inline void sh64_teardown_dtlb_cache_slot(void)
50{
51 sh64_teardown_tlb_slot(dtlb_cache_slot);
Paul Mundt38350e02008-02-13 20:14:10 +090052 local_irq_enable();
Linus Torvalds1da177e2005-04-16 15:20:36 -070053}
54
Linus Torvalds1da177e2005-04-16 15:20:36 -070055#ifndef CONFIG_ICACHE_DISABLED
Paul Mundt38350e02008-02-13 20:14:10 +090056static inline void sh64_icache_inv_all(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -070057{
58 unsigned long long addr, flag, data;
Paul Mundt2fedaac2009-05-09 14:38:49 +090059 unsigned long flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -070060
Paul Mundt38350e02008-02-13 20:14:10 +090061 addr = ICCR0;
62 flag = ICCR0_ICI;
63 data = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -070064
65 /* Make this a critical section for safety (probably not strictly necessary.) */
66 local_irq_save(flags);
67
68 /* Without %1 it gets unexplicably wrong */
Paul Mundt38350e02008-02-13 20:14:10 +090069 __asm__ __volatile__ (
70 "getcfg %3, 0, %0\n\t"
71 "or %0, %2, %0\n\t"
72 "putcfg %3, 0, %0\n\t"
73 "synci"
74 : "=&r" (data)
75 : "0" (data), "r" (flag), "r" (addr));
Linus Torvalds1da177e2005-04-16 15:20:36 -070076
77 local_irq_restore(flags);
78}
79
80static void sh64_icache_inv_kernel_range(unsigned long start, unsigned long end)
81{
82 /* Invalidate range of addresses [start,end] from the I-cache, where
83 * the addresses lie in the kernel superpage. */
84
85 unsigned long long ullend, addr, aligned_start;
Linus Torvalds1da177e2005-04-16 15:20:36 -070086 aligned_start = (unsigned long long)(signed long long)(signed long) start;
Paul Mundt38350e02008-02-13 20:14:10 +090087 addr = L1_CACHE_ALIGN(aligned_start);
Linus Torvalds1da177e2005-04-16 15:20:36 -070088 ullend = (unsigned long long) (signed long long) (signed long) end;
Paul Mundt38350e02008-02-13 20:14:10 +090089
Linus Torvalds1da177e2005-04-16 15:20:36 -070090 while (addr <= ullend) {
Paul Mundt38350e02008-02-13 20:14:10 +090091 __asm__ __volatile__ ("icbi %0, 0" : : "r" (addr));
Linus Torvalds1da177e2005-04-16 15:20:36 -070092 addr += L1_CACHE_BYTES;
93 }
94}
95
96static void sh64_icache_inv_user_page(struct vm_area_struct *vma, unsigned long eaddr)
97{
98 /* If we get called, we know that vma->vm_flags contains VM_EXEC.
99 Also, eaddr is page-aligned. */
Paul Mundt38350e02008-02-13 20:14:10 +0900100 unsigned int cpu = smp_processor_id();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700101 unsigned long long addr, end_addr;
102 unsigned long flags = 0;
103 unsigned long running_asid, vma_asid;
104 addr = eaddr;
105 end_addr = addr + PAGE_SIZE;
106
107 /* Check whether we can use the current ASID for the I-cache
108 invalidation. For example, if we're called via
109 access_process_vm->flush_cache_page->here, (e.g. when reading from
110 /proc), 'running_asid' will be that of the reader, not of the
111 victim.
112
113 Also, note the risk that we might get pre-empted between the ASID
114 compare and blocking IRQs, and before we regain control, the
115 pid->ASID mapping changes. However, the whole cache will get
116 invalidated when the mapping is renewed, so the worst that can
117 happen is that the loop below ends up invalidating somebody else's
118 cache entries.
119 */
120
121 running_asid = get_asid();
Paul Mundt38350e02008-02-13 20:14:10 +0900122 vma_asid = cpu_asid(cpu, vma->vm_mm);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700123 if (running_asid != vma_asid) {
124 local_irq_save(flags);
125 switch_and_save_asid(vma_asid);
126 }
127 while (addr < end_addr) {
128 /* Worth unrolling a little */
Paul Mundt38350e02008-02-13 20:14:10 +0900129 __asm__ __volatile__("icbi %0, 0" : : "r" (addr));
130 __asm__ __volatile__("icbi %0, 32" : : "r" (addr));
131 __asm__ __volatile__("icbi %0, 64" : : "r" (addr));
132 __asm__ __volatile__("icbi %0, 96" : : "r" (addr));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700133 addr += 128;
134 }
135 if (running_asid != vma_asid) {
136 switch_and_save_asid(running_asid);
137 local_irq_restore(flags);
138 }
139}
140
Linus Torvalds1da177e2005-04-16 15:20:36 -0700141static void sh64_icache_inv_user_page_range(struct mm_struct *mm,
142 unsigned long start, unsigned long end)
143{
144 /* Used for invalidating big chunks of I-cache, i.e. assume the range
145 is whole pages. If 'start' or 'end' is not page aligned, the code
146 is conservative and invalidates to the ends of the enclosing pages.
147 This is functionally OK, just a performance loss. */
148
149 /* See the comments below in sh64_dcache_purge_user_range() regarding
150 the choice of algorithm. However, for the I-cache option (2) isn't
151 available because there are no physical tags so aliases can't be
152 resolved. The icbi instruction has to be used through the user
153 mapping. Because icbi is cheaper than ocbp on a cache hit, it
154 would be cheaper to use the selective code for a large range than is
155 possible with the D-cache. Just assume 64 for now as a working
156 figure.
157 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700158 int n_pages;
159
Paul Mundt38350e02008-02-13 20:14:10 +0900160 if (!mm)
161 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700162
163 n_pages = ((end - start) >> PAGE_SHIFT);
164 if (n_pages >= 64) {
165 sh64_icache_inv_all();
166 } else {
167 unsigned long aligned_start;
168 unsigned long eaddr;
169 unsigned long after_last_page_start;
170 unsigned long mm_asid, current_asid;
Paul Mundt2fedaac2009-05-09 14:38:49 +0900171 unsigned long flags = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700172
Paul Mundt38350e02008-02-13 20:14:10 +0900173 mm_asid = cpu_asid(smp_processor_id(), mm);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700174 current_asid = get_asid();
175
176 if (mm_asid != current_asid) {
177 /* Switch ASID and run the invalidate loop under cli */
178 local_irq_save(flags);
179 switch_and_save_asid(mm_asid);
180 }
181
182 aligned_start = start & PAGE_MASK;
183 after_last_page_start = PAGE_SIZE + ((end - 1) & PAGE_MASK);
184
185 while (aligned_start < after_last_page_start) {
186 struct vm_area_struct *vma;
187 unsigned long vma_end;
188 vma = find_vma(mm, aligned_start);
189 if (!vma || (aligned_start <= vma->vm_end)) {
190 /* Avoid getting stuck in an error condition */
191 aligned_start += PAGE_SIZE;
192 continue;
193 }
194 vma_end = vma->vm_end;
195 if (vma->vm_flags & VM_EXEC) {
196 /* Executable */
197 eaddr = aligned_start;
198 while (eaddr < vma_end) {
199 sh64_icache_inv_user_page(vma, eaddr);
200 eaddr += PAGE_SIZE;
201 }
202 }
203 aligned_start = vma->vm_end; /* Skip to start of next region */
204 }
Paul Mundt38350e02008-02-13 20:14:10 +0900205
Linus Torvalds1da177e2005-04-16 15:20:36 -0700206 if (mm_asid != current_asid) {
207 switch_and_save_asid(current_asid);
208 local_irq_restore(flags);
209 }
210 }
211}
212
Linus Torvalds1da177e2005-04-16 15:20:36 -0700213static void sh64_icache_inv_current_user_range(unsigned long start, unsigned long end)
214{
215 /* The icbi instruction never raises ITLBMISS. i.e. if there's not a
216 cache hit on the virtual tag the instruction ends there, without a
217 TLB lookup. */
218
219 unsigned long long aligned_start;
220 unsigned long long ull_end;
221 unsigned long long addr;
222
223 ull_end = end;
224
225 /* Just invalidate over the range using the natural addresses. TLB
226 miss handling will be OK (TBC). Since it's for the current process,
227 either we're already in the right ASID context, or the ASIDs have
228 been recycled since we were last active in which case we might just
229 invalidate another processes I-cache entries : no worries, just a
230 performance drop for him. */
Paul Mundt38350e02008-02-13 20:14:10 +0900231 aligned_start = L1_CACHE_ALIGN(start);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700232 addr = aligned_start;
233 while (addr < ull_end) {
Paul Mundt38350e02008-02-13 20:14:10 +0900234 __asm__ __volatile__ ("icbi %0, 0" : : "r" (addr));
235 __asm__ __volatile__ ("nop");
236 __asm__ __volatile__ ("nop");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700237 addr += L1_CACHE_BYTES;
238 }
239}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700240#endif /* !CONFIG_ICACHE_DISABLED */
241
Linus Torvalds1da177e2005-04-16 15:20:36 -0700242#ifndef CONFIG_DCACHE_DISABLED
Linus Torvalds1da177e2005-04-16 15:20:36 -0700243/* Buffer used as the target of alloco instructions to purge data from cache
244 sets by natural eviction. -- RPC */
Paul Mundt38350e02008-02-13 20:14:10 +0900245#define DUMMY_ALLOCO_AREA_SIZE ((L1_CACHE_BYTES << 10) + (1024 * 4))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700246static unsigned char dummy_alloco_area[DUMMY_ALLOCO_AREA_SIZE] __cacheline_aligned = { 0, };
247
Paul Mundt38350e02008-02-13 20:14:10 +0900248static void inline sh64_dcache_purge_sets(int sets_to_purge_base, int n_sets)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700249{
250 /* Purge all ways in a particular block of sets, specified by the base
251 set number and number of sets. Can handle wrap-around, if that's
252 needed. */
253
254 int dummy_buffer_base_set;
255 unsigned long long eaddr, eaddr0, eaddr1;
256 int j;
257 int set_offset;
258
Paul Mundt38350e02008-02-13 20:14:10 +0900259 dummy_buffer_base_set = ((int)&dummy_alloco_area &
260 cpu_data->dcache.entry_mask) >>
261 cpu_data->dcache.entry_shift;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700262 set_offset = sets_to_purge_base - dummy_buffer_base_set;
263
Paul Mundt38350e02008-02-13 20:14:10 +0900264 for (j = 0; j < n_sets; j++, set_offset++) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700265 set_offset &= (cpu_data->dcache.sets - 1);
Paul Mundt38350e02008-02-13 20:14:10 +0900266 eaddr0 = (unsigned long long)dummy_alloco_area +
267 (set_offset << cpu_data->dcache.entry_shift);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700268
Paul Mundt38350e02008-02-13 20:14:10 +0900269 /*
270 * Do one alloco which hits the required set per cache
271 * way. For write-back mode, this will purge the #ways
272 * resident lines. There's little point unrolling this
273 * loop because the allocos stall more if they're too
274 * close together.
275 */
276 eaddr1 = eaddr0 + cpu_data->dcache.way_size *
277 cpu_data->dcache.ways;
278
279 for (eaddr = eaddr0; eaddr < eaddr1;
280 eaddr += cpu_data->dcache.way_size) {
281 __asm__ __volatile__ ("alloco %0, 0" : : "r" (eaddr));
282 __asm__ __volatile__ ("synco"); /* TAKum03020 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700283 }
284
Paul Mundt38350e02008-02-13 20:14:10 +0900285 eaddr1 = eaddr0 + cpu_data->dcache.way_size *
286 cpu_data->dcache.ways;
287
288 for (eaddr = eaddr0; eaddr < eaddr1;
289 eaddr += cpu_data->dcache.way_size) {
290 /*
291 * Load from each address. Required because
292 * alloco is a NOP if the cache is write-through.
293 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700294 if (test_bit(SH_CACHE_MODE_WT, &(cpu_data->dcache.flags)))
Paul Mundt2fedaac2009-05-09 14:38:49 +0900295 __raw_readb((unsigned long)eaddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700296 }
297 }
298
Paul Mundt38350e02008-02-13 20:14:10 +0900299 /*
300 * Don't use OCBI to invalidate the lines. That costs cycles
301 * directly. If the dummy block is just left resident, it will
302 * naturally get evicted as required.
303 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700304}
305
Paul Mundt38350e02008-02-13 20:14:10 +0900306/*
307 * Purge the entire contents of the dcache. The most efficient way to
308 * achieve this is to use alloco instructions on a region of unused
309 * memory equal in size to the cache, thereby causing the current
310 * contents to be discarded by natural eviction. The alternative, namely
311 * reading every tag, setting up a mapping for the corresponding page and
312 * doing an OCBP for the line, would be much more expensive.
313 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700314static void sh64_dcache_purge_all(void)
315{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700316
317 sh64_dcache_purge_sets(0, cpu_data->dcache.sets);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700318}
319
Linus Torvalds1da177e2005-04-16 15:20:36 -0700320
321/* Assumes this address (+ (2**n_synbits) pages up from it) aren't used for
322 anything else in the kernel */
323#define MAGIC_PAGE0_START 0xffffffffec000000ULL
324
Paul Mundt38350e02008-02-13 20:14:10 +0900325/* Purge the physical page 'paddr' from the cache. It's known that any
326 * cache lines requiring attention have the same page colour as the the
327 * address 'eaddr'.
328 *
329 * This relies on the fact that the D-cache matches on physical tags when
330 * no virtual tag matches. So we create an alias for the original page
331 * and purge through that. (Alternatively, we could have done this by
332 * switching ASID to match the original mapping and purged through that,
333 * but that involves ASID switching cost + probably a TLBMISS + refill
334 * anyway.)
335 */
336static void sh64_dcache_purge_coloured_phy_page(unsigned long paddr,
337 unsigned long eaddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700338{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700339 unsigned long long magic_page_start;
340 unsigned long long magic_eaddr, magic_eaddr_end;
341
342 magic_page_start = MAGIC_PAGE0_START + (eaddr & CACHE_OC_SYN_MASK);
343
344 /* As long as the kernel is not pre-emptible, this doesn't need to be
345 under cli/sti. */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700346 sh64_setup_dtlb_cache_slot(magic_page_start, get_asid(), paddr);
347
348 magic_eaddr = magic_page_start;
349 magic_eaddr_end = magic_eaddr + PAGE_SIZE;
Paul Mundt38350e02008-02-13 20:14:10 +0900350
Linus Torvalds1da177e2005-04-16 15:20:36 -0700351 while (magic_eaddr < magic_eaddr_end) {
352 /* Little point in unrolling this loop - the OCBPs are blocking
353 and won't go any quicker (i.e. the loop overhead is parallel
354 to part of the OCBP execution.) */
Paul Mundt38350e02008-02-13 20:14:10 +0900355 __asm__ __volatile__ ("ocbp %0, 0" : : "r" (magic_eaddr));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700356 magic_eaddr += L1_CACHE_BYTES;
357 }
358
359 sh64_teardown_dtlb_cache_slot();
360}
361
Paul Mundt38350e02008-02-13 20:14:10 +0900362/*
363 * Purge a page given its physical start address, by creating a temporary
364 * 1 page mapping and purging across that. Even if we know the virtual
365 * address (& vma or mm) of the page, the method here is more elegant
366 * because it avoids issues of coping with page faults on the purge
367 * instructions (i.e. no special-case code required in the critical path
368 * in the TLB miss handling).
369 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700370static void sh64_dcache_purge_phy_page(unsigned long paddr)
371{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700372 unsigned long long eaddr_start, eaddr, eaddr_end;
373 int i;
374
375 /* As long as the kernel is not pre-emptible, this doesn't need to be
376 under cli/sti. */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700377 eaddr_start = MAGIC_PAGE0_START;
Paul Mundt38350e02008-02-13 20:14:10 +0900378 for (i = 0; i < (1 << CACHE_OC_N_SYNBITS); i++) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700379 sh64_setup_dtlb_cache_slot(eaddr_start, get_asid(), paddr);
380
381 eaddr = eaddr_start;
382 eaddr_end = eaddr + PAGE_SIZE;
383 while (eaddr < eaddr_end) {
Paul Mundt38350e02008-02-13 20:14:10 +0900384 __asm__ __volatile__ ("ocbp %0, 0" : : "r" (eaddr));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700385 eaddr += L1_CACHE_BYTES;
386 }
387
388 sh64_teardown_dtlb_cache_slot();
389 eaddr_start += PAGE_SIZE;
390 }
391}
392
Hugh Dickins60ec5582005-10-29 18:16:34 -0700393static void sh64_dcache_purge_user_pages(struct mm_struct *mm,
394 unsigned long addr, unsigned long end)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700395{
396 pgd_t *pgd;
Paul Mundt38350e02008-02-13 20:14:10 +0900397 pud_t *pud;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700398 pmd_t *pmd;
399 pte_t *pte;
400 pte_t entry;
Hugh Dickins60ec5582005-10-29 18:16:34 -0700401 spinlock_t *ptl;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700402 unsigned long paddr;
403
Hugh Dickins60ec5582005-10-29 18:16:34 -0700404 if (!mm)
405 return; /* No way to find physical address of page */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700406
Hugh Dickins60ec5582005-10-29 18:16:34 -0700407 pgd = pgd_offset(mm, addr);
408 if (pgd_bad(*pgd))
409 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700410
Paul Mundt38350e02008-02-13 20:14:10 +0900411 pud = pud_offset(pgd, addr);
412 if (pud_none(*pud) || pud_bad(*pud))
413 return;
414
415 pmd = pmd_offset(pud, addr);
Hugh Dickins60ec5582005-10-29 18:16:34 -0700416 if (pmd_none(*pmd) || pmd_bad(*pmd))
417 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700418
Hugh Dickins60ec5582005-10-29 18:16:34 -0700419 pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
420 do {
421 entry = *pte;
422 if (pte_none(entry) || !pte_present(entry))
423 continue;
424 paddr = pte_val(entry) & PAGE_MASK;
425 sh64_dcache_purge_coloured_phy_page(paddr, addr);
426 } while (pte++, addr += PAGE_SIZE, addr != end);
427 pte_unmap_unlock(pte - 1, ptl);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700428}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700429
Paul Mundt38350e02008-02-13 20:14:10 +0900430/*
431 * There are at least 5 choices for the implementation of this, with
432 * pros (+), cons(-), comments(*):
433 *
434 * 1. ocbp each line in the range through the original user's ASID
435 * + no lines spuriously evicted
436 * - tlbmiss handling (must either handle faults on demand => extra
437 * special-case code in tlbmiss critical path), or map the page in
438 * advance (=> flush_tlb_range in advance to avoid multiple hits)
439 * - ASID switching
440 * - expensive for large ranges
441 *
442 * 2. temporarily map each page in the range to a special effective
443 * address and ocbp through the temporary mapping; relies on the
444 * fact that SH-5 OCB* always do TLB lookup and match on ptags (they
445 * never look at the etags)
446 * + no spurious evictions
447 * - expensive for large ranges
448 * * surely cheaper than (1)
449 *
450 * 3. walk all the lines in the cache, check the tags, if a match
451 * occurs create a page mapping to ocbp the line through
452 * + no spurious evictions
453 * - tag inspection overhead
454 * - (especially for small ranges)
455 * - potential cost of setting up/tearing down page mapping for
456 * every line that matches the range
457 * * cost partly independent of range size
458 *
459 * 4. walk all the lines in the cache, check the tags, if a match
460 * occurs use 4 * alloco to purge the line (+3 other probably
461 * innocent victims) by natural eviction
462 * + no tlb mapping overheads
463 * - spurious evictions
464 * - tag inspection overhead
465 *
466 * 5. implement like flush_cache_all
467 * + no tag inspection overhead
468 * - spurious evictions
469 * - bad for small ranges
470 *
471 * (1) can be ruled out as more expensive than (2). (2) appears best
472 * for small ranges. The choice between (3), (4) and (5) for large
473 * ranges and the range size for the large/small boundary need
474 * benchmarking to determine.
475 *
476 * For now use approach (2) for small ranges and (5) for large ones.
477 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700478static void sh64_dcache_purge_user_range(struct mm_struct *mm,
479 unsigned long start, unsigned long end)
480{
Paul Mundt38350e02008-02-13 20:14:10 +0900481 int n_pages = ((end - start) >> PAGE_SHIFT);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700482
Hugh Dickins60ec5582005-10-29 18:16:34 -0700483 if (n_pages >= 64 || ((start ^ (end - 1)) & PMD_MASK)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700484 sh64_dcache_purge_all();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700485 } else {
Hugh Dickins60ec5582005-10-29 18:16:34 -0700486 /* Small range, covered by a single page table page */
487 start &= PAGE_MASK; /* should already be so */
488 end = PAGE_ALIGN(end); /* should already be so */
489 sh64_dcache_purge_user_pages(mm, start, end);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700490 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700491}
Paul Mundt38350e02008-02-13 20:14:10 +0900492#endif /* !CONFIG_DCACHE_DISABLED */
493
494/*
495 * Invalidate the entire contents of both caches, after writing back to
496 * memory any dirty data from the D-cache.
497 */
Paul Mundt94ecd222009-08-16 01:50:17 +0900498static void sh5_flush_cache_all(void)
Paul Mundt38350e02008-02-13 20:14:10 +0900499{
500 sh64_dcache_purge_all();
501 sh64_icache_inv_all();
502}
503
504/*
505 * Invalidate an entire user-address space from both caches, after
506 * writing back dirty data (e.g. for shared mmap etc).
507 *
508 * This could be coded selectively by inspecting all the tags then
509 * doing 4*alloco on any set containing a match (as for
510 * flush_cache_range), but fork/exit/execve (where this is called from)
511 * are expensive anyway.
512 *
513 * Have to do a purge here, despite the comments re I-cache below.
514 * There could be odd-coloured dirty data associated with the mm still
515 * in the cache - if this gets written out through natural eviction
516 * after the kernel has reused the page there will be chaos.
517 *
518 * The mm being torn down won't ever be active again, so any Icache
519 * lines tagged with its ASID won't be visible for the rest of the
520 * lifetime of this ASID cycle. Before the ASID gets reused, there
521 * will be a flush_cache_all. Hence we don't need to touch the
522 * I-cache. This is similar to the lack of action needed in
523 * flush_tlb_mm - see fault.c.
524 */
Paul Mundt94ecd222009-08-16 01:50:17 +0900525static void sh5_flush_cache_mm(struct mm_struct *mm)
Paul Mundt38350e02008-02-13 20:14:10 +0900526{
527 sh64_dcache_purge_all();
528}
529
530/*
531 * Invalidate (from both caches) the range [start,end) of virtual
532 * addresses from the user address space specified by mm, after writing
533 * back any dirty data.
534 *
535 * Note, 'end' is 1 byte beyond the end of the range to flush.
536 */
Paul Mundt94ecd222009-08-16 01:50:17 +0900537static void sh5_flush_cache_range(struct vm_area_struct *vma,
538 unsigned long start, unsigned long end)
Paul Mundt38350e02008-02-13 20:14:10 +0900539{
540 struct mm_struct *mm = vma->vm_mm;
541
542 sh64_dcache_purge_user_range(mm, start, end);
543 sh64_icache_inv_user_page_range(mm, start, end);
544}
545
546/*
547 * Invalidate any entries in either cache for the vma within the user
548 * address space vma->vm_mm for the page starting at virtual address
549 * 'eaddr'. This seems to be used primarily in breaking COW. Note,
550 * the I-cache must be searched too in case the page in question is
551 * both writable and being executed from (e.g. stack trampolines.)
552 *
553 * Note, this is called with pte lock held.
554 */
Paul Mundt94ecd222009-08-16 01:50:17 +0900555static void sh5_flush_cache_page(struct vm_area_struct *vma,
556 unsigned long eaddr, unsigned long pfn)
Paul Mundt38350e02008-02-13 20:14:10 +0900557{
558 sh64_dcache_purge_phy_page(pfn << PAGE_SHIFT);
559
560 if (vma->vm_flags & VM_EXEC)
561 sh64_icache_inv_user_page(vma, eaddr);
562}
563
Paul Mundt94ecd222009-08-16 01:50:17 +0900564static void sh5_flush_dcache_page(struct page *page)
Paul Mundt38350e02008-02-13 20:14:10 +0900565{
566 sh64_dcache_purge_phy_page(page_to_phys(page));
567 wmb();
568}
569
570/*
571 * Flush the range [start,end] of kernel virtual adddress space from
572 * the I-cache. The corresponding range must be purged from the
573 * D-cache also because the SH-5 doesn't have cache snooping between
574 * the caches. The addresses will be visible through the superpage
575 * mapping, therefore it's guaranteed that there no cache entries for
576 * the range in cache sets of the wrong colour.
577 */
Paul Mundt94ecd222009-08-16 01:50:17 +0900578static void sh5_flush_icache_range(unsigned long start, unsigned long end)
Paul Mundt38350e02008-02-13 20:14:10 +0900579{
580 __flush_purge_region((void *)start, end);
581 wmb();
582 sh64_icache_inv_kernel_range(start, end);
583}
584
585/*
Paul Mundt38350e02008-02-13 20:14:10 +0900586 * For the address range [start,end), write back the data from the
587 * D-cache and invalidate the corresponding region of the I-cache for the
588 * current process. Used to flush signal trampolines on the stack to
589 * make them executable.
590 */
Paul Mundt94ecd222009-08-16 01:50:17 +0900591static void sh5_flush_cache_sigtramp(unsigned long vaddr)
Paul Mundt38350e02008-02-13 20:14:10 +0900592{
593 unsigned long end = vaddr + L1_CACHE_BYTES;
594
595 __flush_wback_region((void *)vaddr, L1_CACHE_BYTES);
596 wmb();
597 sh64_icache_inv_current_user_range(vaddr, end);
598}
599
Paul Mundt94ecd222009-08-16 01:50:17 +0900600void __init sh5_cache_init(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700601{
Paul Mundt94ecd222009-08-16 01:50:17 +0900602 flush_cache_all = sh5_flush_cache_all;
603 flush_cache_mm = sh5_flush_cache_mm;
604 flush_cache_dup_mm = sh5_flush_cache_mm;
605 flush_cache_page = sh5_flush_cache_page;
606 flush_cache_range = sh5_flush_cache_range;
607 flush_dcache_page = sh5_flush_dcache_page;
608 flush_icache_range = sh5_flush_icache_range;
609 flush_cache_sigtramp = sh5_flush_cache_sigtramp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700610
Paul Mundt94ecd222009-08-16 01:50:17 +0900611 /* Reserve a slot for dcache colouring in the DTLB */
612 dtlb_cache_slot = sh64_get_wired_dtlb_entry();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700613
Paul Mundt94ecd222009-08-16 01:50:17 +0900614 sh4__flush_region_init();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700615}