Blame - arch/nds32/mm/fault.c - SHIFTPHONES/kernel/common

blob: 68d5f2a27f38b0e5a60335bd20d34f17317db1f0 [file] [log] [blame]

Greentime Hu	664eec4	2017-10-24 15:25:00 +0800	[diff] [blame]	1	// SPDX-License-Identifier: GPL-2.0
				2	// Copyright (C) 2005-2017 Andes Technology Corporation
				3
				4	#include <linux/extable.h>
				5	#include <linux/module.h>
				6	#include <linux/signal.h>
				7	#include <linux/ptrace.h>
				8	#include <linux/mm.h>
				9	#include <linux/init.h>
				10	#include <linux/hardirq.h>
				11	#include <linux/uaccess.h>
Nickhu	ebd0975	2018-10-25 10:24:15 +0800	[diff] [blame^]	12	#include <linux/perf_event.h>
Greentime Hu	664eec4	2017-10-24 15:25:00 +0800	[diff] [blame]	13
				14	#include <asm/pgtable.h>
				15	#include <asm/tlbflush.h>
				16
				17	extern void die(const char str, struct pt_regs regs, long err);
				18
				19	/*
				20	* This is useful to dump out the page tables associated with
				21	* 'addr' in mm 'mm'.
				22	*/
				23	void show_pte(struct mm_struct *mm, unsigned long addr)
				24	{
				25	pgd_t *pgd;
				26	if (!mm)
				27	mm = &init_mm;
				28
				29	pr_alert("pgd = %p\n", mm->pgd);
				30	pgd = pgd_offset(mm, addr);
				31	pr_alert("[%08lx] pgd=%08lx", addr, pgd_val(pgd));
				32
				33	do {
				34	pmd_t *pmd;
				35
				36	if (pgd_none(*pgd))
				37	break;
				38
				39	if (pgd_bad(*pgd)) {
				40	pr_alert("(bad)");
				41	break;
				42	}
				43
				44	pmd = pmd_offset(pgd, addr);
				45	#if PTRS_PER_PMD != 1
				46	pr_alert(", pmd=%08lx", pmd_val(pmd));
				47	#endif
				48
				49	if (pmd_none(*pmd))
				50	break;
				51
				52	if (pmd_bad(*pmd)) {
				53	pr_alert("(bad)");
				54	break;
				55	}
				56
				57	if (IS_ENABLED(CONFIG_HIGHMEM))
				58	{
				59	pte_t *pte;
				60	/* We must not map this if we have highmem enabled */
				61	pte = pte_offset_map(pmd, addr);
				62	pr_alert(", pte=%08lx", pte_val(pte));
				63	pte_unmap(pte);
				64	}
				65	} while (0);
				66
				67	pr_alert("\n");
				68	}
				69
				70	void do_page_fault(unsigned long entry, unsigned long addr,
				71	unsigned int error_code, struct pt_regs *regs)
				72	{
				73	struct task_struct *tsk;
				74	struct mm_struct *mm;
				75	struct vm_area_struct *vma;
Eric W. Biederman	d808e91	2018-04-16 14:58:34 -0500	[diff] [blame]	76	int si_code;
Souptick Joarder	50a7ca3	2018-08-17 15:44:47 -0700	[diff] [blame]	77	vm_fault_t fault;
Greentime Hu	664eec4	2017-10-24 15:25:00 +0800	[diff] [blame]	78	unsigned int mask = VM_READ \| VM_WRITE \| VM_EXEC;
				79	unsigned int flags = FAULT_FLAG_ALLOW_RETRY \| FAULT_FLAG_KILLABLE;
				80
Greentime Hu	664eec4	2017-10-24 15:25:00 +0800	[diff] [blame]	81	error_code = error_code & (ITYPE_mskINST \| ITYPE_mskETYPE);
				82	tsk = current;
				83	mm = tsk->mm;
Eric W. Biederman	d808e91	2018-04-16 14:58:34 -0500	[diff] [blame]	84	si_code = SEGV_MAPERR;
Greentime Hu	664eec4	2017-10-24 15:25:00 +0800	[diff] [blame]	85	/*
				86	* We fault-in kernel-space virtual memory on-demand. The
				87	* 'reference' page table is init_mm.pgd.
				88	*
				89	* NOTE! We MUST NOT take any locks for this case. We may
				90	* be in an interrupt or a critical region, and should
				91	* only copy the information from the master page table,
				92	* nothing more.
				93	*/
				94	if (addr >= TASK_SIZE) {
				95	if (user_mode(regs))
				96	goto bad_area_nosemaphore;
				97
				98	if (addr >= TASK_SIZE && addr < VMALLOC_END
				99	&& (entry == ENTRY_PTE_NOT_PRESENT))
				100	goto vmalloc_fault;
				101	else
				102	goto no_context;
				103	}
				104
				105	/* Send a signal to the task for handling the unalignment access. */
				106	if (entry == ENTRY_GENERAL_EXCPETION
				107	&& error_code == ETYPE_ALIGNMENT_CHECK) {
				108	if (user_mode(regs))
				109	goto bad_area_nosemaphore;
				110	else
				111	goto no_context;
				112	}
				113
				114	/*
				115	* If we're in an interrupt or have no user
				116	* context, we must not take the fault..
				117	*/
				118	if (unlikely(faulthandler_disabled() \|\| !mm))
				119	goto no_context;
				120
				121	/*
				122	* As per x86, we may deadlock here. However, since the kernel only
				123	* validly references user space from well defined areas of the code,
				124	* we can bug out early if this is from code which shouldn't.
				125	*/
				126	if (unlikely(!down_read_trylock(&mm->mmap_sem))) {
				127	if (!user_mode(regs) &&
				128	!search_exception_tables(instruction_pointer(regs)))
				129	goto no_context;
				130	retry:
				131	down_read(&mm->mmap_sem);
				132	} else {
				133	/*
				134	* The above down_read_trylock() might have succeeded in which
				135	* case, we'll have missed the might_sleep() from down_read().
				136	*/
				137	might_sleep();
				138	if (IS_ENABLED(CONFIG_DEBUG_VM)) {
				139	if (!user_mode(regs) &&
				140	!search_exception_tables(instruction_pointer(regs)))
				141	goto no_context;
				142	}
				143	}
				144
				145	vma = find_vma(mm, addr);
				146
				147	if (unlikely(!vma))
				148	goto bad_area;
				149
				150	if (vma->vm_start <= addr)
				151	goto good_area;
				152
				153	if (unlikely(!(vma->vm_flags & VM_GROWSDOWN)))
				154	goto bad_area;
				155
				156	if (unlikely(expand_stack(vma, addr)))
				157	goto bad_area;
				158
				159	/*
				160	* Ok, we have a good vm_area for this memory access, so
				161	* we can handle it..
				162	*/
				163
				164	good_area:
Eric W. Biederman	d808e91	2018-04-16 14:58:34 -0500	[diff] [blame]	165	si_code = SEGV_ACCERR;
Greentime Hu	664eec4	2017-10-24 15:25:00 +0800	[diff] [blame]	166
				167	/* first do some preliminary protection checks */
				168	if (entry == ENTRY_PTE_NOT_PRESENT) {
				169	if (error_code & ITYPE_mskINST)
				170	mask = VM_EXEC;
				171	else {
				172	mask = VM_READ \| VM_WRITE;
Greentime Hu	664eec4	2017-10-24 15:25:00 +0800	[diff] [blame]	173	}
				174	} else if (entry == ENTRY_TLB_MISC) {
				175	switch (error_code & ITYPE_mskETYPE) {
				176	case RD_PROT:
				177	mask = VM_READ;
				178	break;
				179	case WRT_PROT:
				180	mask = VM_WRITE;
				181	flags \|= FAULT_FLAG_WRITE;
				182	break;
				183	case NOEXEC:
				184	mask = VM_EXEC;
				185	break;
				186	case PAGE_MODIFY:
				187	mask = VM_WRITE;
				188	flags \|= FAULT_FLAG_WRITE;
				189	break;
				190	case ACC_BIT:
				191	BUG();
				192	default:
				193	break;
				194	}
				195
				196	}
				197	if (!(vma->vm_flags & mask))
				198	goto bad_area;
				199
				200	/*
				201	* If for any reason at all we couldn't handle the fault,
				202	* make sure we exit gracefully rather than endlessly redo
				203	* the fault.
				204	*/
				205
				206	fault = handle_mm_fault(vma, addr, flags);
				207
				208	/*
				209	* If we need to retry but a fatal signal is pending, handle the
				210	* signal first. We do not need to release the mmap_sem because it
				211	* would already be released in __lock_page_or_retry in mm/filemap.c.
				212	*/
				213	if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) {
				214	if (!user_mode(regs))
				215	goto no_context;
				216	return;
				217	}
				218
				219	if (unlikely(fault & VM_FAULT_ERROR)) {
				220	if (fault & VM_FAULT_OOM)
				221	goto out_of_memory;
				222	else if (fault & VM_FAULT_SIGBUS)
				223	goto do_sigbus;
				224	else
				225	goto bad_area;
				226	}
				227
				228	/*
				229	* Major/minor page fault accounting is only done on the initial
				230	* attempt. If we go through a retry, it is extremely likely that the
				231	* page will be found in page cache at that point.
				232	*/
Nickhu	ebd0975	2018-10-25 10:24:15 +0800	[diff] [blame^]	233	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
Greentime Hu	664eec4	2017-10-24 15:25:00 +0800	[diff] [blame]	234	if (flags & FAULT_FLAG_ALLOW_RETRY) {
Nickhu	ebd0975	2018-10-25 10:24:15 +0800	[diff] [blame^]	235	if (fault & VM_FAULT_MAJOR) {
Greentime Hu	664eec4	2017-10-24 15:25:00 +0800	[diff] [blame]	236	tsk->maj_flt++;
Nickhu	ebd0975	2018-10-25 10:24:15 +0800	[diff] [blame^]	237	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ,
				238	1, regs, addr);
				239	} else {
Greentime Hu	664eec4	2017-10-24 15:25:00 +0800	[diff] [blame]	240	tsk->min_flt++;
Nickhu	ebd0975	2018-10-25 10:24:15 +0800	[diff] [blame^]	241	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN,
				242	1, regs, addr);
				243	}
Greentime Hu	664eec4	2017-10-24 15:25:00 +0800	[diff] [blame]	244	if (fault & VM_FAULT_RETRY) {
				245	flags &= ~FAULT_FLAG_ALLOW_RETRY;
				246	flags \|= FAULT_FLAG_TRIED;
				247
				248	/* No need to up_read(&mm->mmap_sem) as we would
				249	* have already released it in __lock_page_or_retry
				250	* in mm/filemap.c.
				251	*/
				252	goto retry;
				253	}
				254	}
				255
				256	up_read(&mm->mmap_sem);
				257	return;
				258
				259	/*
				260	* Something tried to access memory that isn't in our memory map..
				261	* Fix it, but check if it's kernel or user first..
				262	*/
				263	bad_area:
				264	up_read(&mm->mmap_sem);
				265
				266	bad_area_nosemaphore:
				267
				268	/* User mode accesses just cause a SIGSEGV */
				269
				270	if (user_mode(regs)) {
				271	tsk->thread.address = addr;
				272	tsk->thread.error_code = error_code;
				273	tsk->thread.trap_no = entry;
Eric W. Biederman	d808e91	2018-04-16 14:58:34 -0500	[diff] [blame]	274	force_sig_fault(SIGSEGV, si_code, (void __user *)addr, tsk);
Greentime Hu	664eec4	2017-10-24 15:25:00 +0800	[diff] [blame]	275	return;
				276	}
				277
				278	no_context:
				279
				280	/* Are we prepared to handle this kernel fault?
				281	*
				282	* (The kernel has valid exception-points in the source
				283	* when it acesses user-memory. When it fails in one
				284	* of those points, we find it in a table and do a jump
				285	* to some fixup code that loads an appropriate error
				286	* code)
				287	*/
				288
				289	{
				290	const struct exception_table_entry *entry;
				291
				292	if ((entry =
				293	search_exception_tables(instruction_pointer(regs))) !=
				294	NULL) {
				295	/* Adjust the instruction pointer in the stackframe */
				296	instruction_pointer(regs) = entry->fixup;
				297	return;
				298	}
				299	}
				300
				301	/*
				302	* Oops. The kernel tried to access some bad page. We'll have to
				303	* terminate things with extreme prejudice.
				304	*/
				305
				306	bust_spinlocks(1);
				307	pr_alert("Unable to handle kernel %s at virtual address %08lx\n",
				308	(addr < PAGE_SIZE) ? "NULL pointer dereference" :
				309	"paging request", addr);
				310
				311	show_pte(mm, addr);
				312	die("Oops", regs, error_code);
				313	bust_spinlocks(0);
				314	do_exit(SIGKILL);
				315
				316	return;
				317
				318	/*
				319	* We ran out of memory, or some other thing happened to us that made
				320	* us unable to handle the page fault gracefully.
				321	*/
				322
				323	out_of_memory:
				324	up_read(&mm->mmap_sem);
				325	if (!user_mode(regs))
				326	goto no_context;
				327	pagefault_out_of_memory();
				328	return;
				329
				330	do_sigbus:
				331	up_read(&mm->mmap_sem);
				332
				333	/* Kernel mode? Handle exceptions or die */
				334	if (!user_mode(regs))
				335	goto no_context;
				336
				337	/*
				338	* Send a sigbus
				339	*/
				340	tsk->thread.address = addr;
				341	tsk->thread.error_code = error_code;
				342	tsk->thread.trap_no = entry;
Eric W. Biederman	d808e91	2018-04-16 14:58:34 -0500	[diff] [blame]	343	force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)addr, tsk);
Greentime Hu	664eec4	2017-10-24 15:25:00 +0800	[diff] [blame]	344
				345	return;
				346
				347	vmalloc_fault:
				348	{
				349	/*
				350	* Synchronize this task's top level page-table
				351	* with the 'reference' page table.
				352	*
				353	* Use current_pgd instead of tsk->active_mm->pgd
				354	* since the latter might be unavailable if this
				355	* code is executed in a misfortunately run irq
				356	* (like inside schedule() between switch_mm and
				357	* switch_to...).
				358	*/
				359
				360	unsigned int index = pgd_index(addr);
				361	pgd_t pgd, pgd_k;
				362	pud_t pud, pud_k;
				363	pmd_t pmd, pmd_k;
				364	pte_t *pte_k;
				365
				366	pgd = (pgd_t *) __va(__nds32__mfsr(NDS32_SR_L1_PPTB)) + index;
				367	pgd_k = init_mm.pgd + index;
				368
				369	if (!pgd_present(*pgd_k))
				370	goto no_context;
				371
				372	pud = pud_offset(pgd, addr);
				373	pud_k = pud_offset(pgd_k, addr);
				374	if (!pud_present(*pud_k))
				375	goto no_context;
				376
				377	pmd = pmd_offset(pud, addr);
				378	pmd_k = pmd_offset(pud_k, addr);
				379	if (!pmd_present(*pmd_k))
				380	goto no_context;
				381
				382	if (!pmd_present(*pmd))
				383	set_pmd(pmd, *pmd_k);
				384	else
				385	BUG_ON(pmd_page(pmd) != pmd_page(pmd_k));
				386
				387	/*
				388	* Since the vmalloc area is global, we don't
				389	* need to copy individual PTE's, it is enough to
				390	* copy the pgd pointer into the pte page of the
				391	* root task. If that is there, we'll find our pte if
				392	* it exists.
				393	*/
				394
				395	/* Make sure the actual PTE exists as well to
				396	* catch kernel vmalloc-area accesses to non-mapped
				397	* addres. If we don't do this, this will just
				398	* silently loop forever.
				399	*/
				400
				401	pte_k = pte_offset_kernel(pmd_k, addr);
				402	if (!pte_present(*pte_k))
				403	goto no_context;
				404
				405	return;
				406	}
				407	}