Merge tag 'for-riscv' of https://git.kernel.org/pub/scm/virt/kvm/kvm.git into for-next
H extension definitions, shared by the KVM and RISC-V trees.
* tag 'for-riscv' of ssh://gitolite.kernel.org/pub/scm/virt/kvm/kvm: (301 commits)
RISC-V: Add hypervisor extension related CSR defines
KVM: selftests: Ensure all migrations are performed when test is affined
KVM: x86: Swap order of CPUID entry "index" vs. "significant flag" checks
ptp: Fix ptp_kvm_getcrosststamp issue for x86 ptp_kvm
x86/kvmclock: Move this_cpu_pvti into kvmclock.h
KVM: s390: Function documentation fixes
selftests: KVM: Don't clobber XMM register when read
KVM: VMX: Fix a TSX_CTRL_CPUID_CLEAR field mask issue
selftests: KVM: Explicitly use movq to read xmm registers
selftests: KVM: Call ucall_init when setting up in rseq_test
KVM: Remove tlbs_dirty
KVM: X86: Synchronize the shadow pagetable before link it
KVM: X86: Fix missed remote tlb flush in rmap_write_protect()
KVM: x86: nSVM: don't copy virt_ext from vmcb12
KVM: x86: nSVM: test eax for 4K alignment for GP errata workaround
KVM: x86: selftests: test simultaneous uses of V_IRQ from L1 and L0
KVM: x86: nSVM: restore int_vector in svm_clear_vintr
kvm: x86: Add AMD PMU MSRs to msrs_to_save_all[]
KVM: x86: nVMX: re-evaluate emulation_required on nested VM exit
KVM: x86: nVMX: don't fail nested VM entry on invalid guest state if !from_vmentry
...
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 301a542..dcd7afc 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -62,6 +62,7 @@
select GENERIC_SCHED_CLOCK
select GENERIC_SMP_IDLE_THREAD
select GENERIC_TIME_VSYSCALL if MMU && 64BIT
+ select GENERIC_VDSO_TIME_NS if HAVE_GENERIC_VDSO
select HANDLE_DOMAIN_IRQ
select HAVE_ARCH_AUDITSYSCALL
select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL
diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
index 0eb4568..9247407 100644
--- a/arch/riscv/Makefile
+++ b/arch/riscv/Makefile
@@ -140,3 +140,13 @@
archclean:
$(Q)$(MAKE) $(clean)=$(boot)
+
+PHONY += rv32_randconfig
+rv32_randconfig:
+ $(Q)$(MAKE) KCONFIG_ALLCONFIG=$(srctree)/arch/riscv/configs/32-bit.config \
+ -f $(srctree)/Makefile randconfig
+
+PHONY += rv64_randconfig
+rv64_randconfig:
+ $(Q)$(MAKE) KCONFIG_ALLCONFIG=$(srctree)/arch/riscv/configs/64-bit.config \
+ -f $(srctree)/Makefile randconfig
diff --git a/arch/riscv/configs/32-bit.config b/arch/riscv/configs/32-bit.config
new file mode 100644
index 0000000..43f4132
--- /dev/null
+++ b/arch/riscv/configs/32-bit.config
@@ -0,0 +1,2 @@
+CONFIG_ARCH_RV32I=y
+CONFIG_32BIT=y
diff --git a/arch/riscv/configs/64-bit.config b/arch/riscv/configs/64-bit.config
new file mode 100644
index 0000000..313edc5
--- /dev/null
+++ b/arch/riscv/configs/64-bit.config
@@ -0,0 +1,2 @@
+CONFIG_ARCH_RV64I=y
+CONFIG_64BIT=y
diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h
index 109c97e..b3e5ff0 100644
--- a/arch/riscv/include/asm/page.h
+++ b/arch/riscv/include/asm/page.h
@@ -157,6 +157,8 @@ extern phys_addr_t __phys_addr_symbol(unsigned long x);
#define page_to_bus(page) (page_to_phys(page))
#define phys_to_page(paddr) (pfn_to_page(phys_to_pfn(paddr)))
+#define sym_to_pfn(x) __phys_to_pfn(__pa_symbol(x))
+
#ifdef CONFIG_FLATMEM
#define pfn_valid(pfn) \
(((pfn) >= ARCH_PFN_OFFSET) && (((pfn) - ARCH_PFN_OFFSET) < max_mapnr))
diff --git a/arch/riscv/include/asm/syscall.h b/arch/riscv/include/asm/syscall.h
index b933b15..34fbb3e 100644
--- a/arch/riscv/include/asm/syscall.h
+++ b/arch/riscv/include/asm/syscall.h
@@ -82,4 +82,5 @@ static inline int syscall_get_arch(struct task_struct *task)
#endif
}
+asmlinkage long sys_riscv_flush_icache(uintptr_t, uintptr_t, uintptr_t);
#endif /* _ASM_RISCV_SYSCALL_H */
diff --git a/arch/riscv/include/asm/vdso.h b/arch/riscv/include/asm/vdso.h
index 893e471..e310b22 100644
--- a/arch/riscv/include/asm/vdso.h
+++ b/arch/riscv/include/asm/vdso.h
@@ -16,18 +16,24 @@
#ifdef CONFIG_MMU
#include <linux/types.h>
-#include <generated/vdso-offsets.h>
+/*
+ * All systems with an MMU have a VDSO, but systems without an MMU don't
+ * support shared libraries and therefor don't have one.
+ */
+#ifdef CONFIG_MMU
-#ifndef CONFIG_GENERIC_TIME_VSYSCALL
-struct vdso_data {
-};
-#endif
+#define __VVAR_PAGES 2
+
+#ifndef __ASSEMBLY__
+#include <generated/vdso-offsets.h>
#define VDSO_SYMBOL(base, name) \
(void __user *)((unsigned long)(base) + __vdso_##name##_offset)
#endif /* CONFIG_MMU */
-asmlinkage long sys_riscv_flush_icache(uintptr_t, uintptr_t, uintptr_t);
+#endif /* !__ASSEMBLY__ */
+
+#endif /* CONFIG_MMU */
#endif /* _ASM_RISCV_VDSO_H */
diff --git a/arch/riscv/include/asm/vdso/gettimeofday.h b/arch/riscv/include/asm/vdso/gettimeofday.h
index f839f16..77d9c2f 100644
--- a/arch/riscv/include/asm/vdso/gettimeofday.h
+++ b/arch/riscv/include/asm/vdso/gettimeofday.h
@@ -76,6 +76,13 @@ static __always_inline const struct vdso_data *__arch_get_vdso_data(void)
return _vdso_data;
}
+#ifdef CONFIG_TIME_NS
+static __always_inline
+const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd)
+{
+ return _timens_data;
+}
+#endif
#endif /* !__ASSEMBLY__ */
#endif /* __ASM_VDSO_GETTIMEOFDAY_H */
diff --git a/arch/riscv/kernel/reset.c b/arch/riscv/kernel/reset.c
index ee5878d..9c842c4 100644
--- a/arch/riscv/kernel/reset.c
+++ b/arch/riscv/kernel/reset.c
@@ -12,7 +12,7 @@ static void default_power_off(void)
wait_for_interrupt();
}
-void (*pm_power_off)(void) = default_power_off;
+void (*pm_power_off)(void) = NULL;
EXPORT_SYMBOL(pm_power_off);
void machine_restart(char *cmd)
@@ -23,10 +23,16 @@ void machine_restart(char *cmd)
void machine_halt(void)
{
- pm_power_off();
+ if (pm_power_off != NULL)
+ pm_power_off();
+ else
+ default_power_off();
}
void machine_power_off(void)
{
- pm_power_off();
+ if (pm_power_off != NULL)
+ pm_power_off();
+ else
+ default_power_off();
}
diff --git a/arch/riscv/kernel/syscall_table.c b/arch/riscv/kernel/syscall_table.c
index a63c667..44b1420 100644
--- a/arch/riscv/kernel/syscall_table.c
+++ b/arch/riscv/kernel/syscall_table.c
@@ -7,7 +7,6 @@
#include <linux/linkage.h>
#include <linux/syscalls.h>
#include <asm-generic/syscalls.h>
-#include <asm/vdso.h>
#include <asm/syscall.h>
#undef __SYSCALL
diff --git a/arch/riscv/kernel/vdso.c b/arch/riscv/kernel/vdso.c
index 25a3b88..a9436a6 100644
--- a/arch/riscv/kernel/vdso.c
+++ b/arch/riscv/kernel/vdso.c
@@ -12,16 +12,25 @@
#include <linux/binfmts.h>
#include <linux/err.h>
#include <asm/page.h>
+#include <asm/vdso.h>
+#include <linux/time_namespace.h>
+
#ifdef CONFIG_GENERIC_TIME_VSYSCALL
#include <vdso/datapage.h>
#else
-#include <asm/vdso.h>
+struct vdso_data {
+};
#endif
extern char vdso_start[], vdso_end[];
-static unsigned int vdso_pages __ro_after_init;
-static struct page **vdso_pagelist __ro_after_init;
+enum vvar_pages {
+ VVAR_DATA_PAGE_OFFSET,
+ VVAR_TIMENS_PAGE_OFFSET,
+ VVAR_NR_PAGES,
+};
+
+#define VVAR_SIZE (VVAR_NR_PAGES << PAGE_SHIFT)
/*
* The vDSO data page.
@@ -32,80 +41,228 @@ static union {
} vdso_data_store __page_aligned_data;
struct vdso_data *vdso_data = &vdso_data_store.data;
-static int __init vdso_init(void)
+struct __vdso_info {
+ const char *name;
+ const char *vdso_code_start;
+ const char *vdso_code_end;
+ unsigned long vdso_pages;
+ /* Data Mapping */
+ struct vm_special_mapping *dm;
+ /* Code Mapping */
+ struct vm_special_mapping *cm;
+};
+
+static struct __vdso_info vdso_info __ro_after_init = {
+ .name = "vdso",
+ .vdso_code_start = vdso_start,
+ .vdso_code_end = vdso_end,
+};
+
+static int vdso_mremap(const struct vm_special_mapping *sm,
+ struct vm_area_struct *new_vma)
{
- unsigned int i;
-
- vdso_pages = (vdso_end - vdso_start) >> PAGE_SHIFT;
- vdso_pagelist =
- kcalloc(vdso_pages + 1, sizeof(struct page *), GFP_KERNEL);
- if (unlikely(vdso_pagelist == NULL)) {
- pr_err("vdso: pagelist allocation failed\n");
- return -ENOMEM;
- }
-
- for (i = 0; i < vdso_pages; i++) {
- struct page *pg;
-
- pg = virt_to_page(vdso_start + (i << PAGE_SHIFT));
- vdso_pagelist[i] = pg;
- }
- vdso_pagelist[i] = virt_to_page(vdso_data);
+ current->mm->context.vdso = (void *)new_vma->vm_start;
return 0;
}
-arch_initcall(vdso_init);
-int arch_setup_additional_pages(struct linux_binprm *bprm,
- int uses_interp)
+static int __init __vdso_init(void)
{
- struct mm_struct *mm = current->mm;
- unsigned long vdso_base, vdso_len;
- int ret;
+ unsigned int i;
+ struct page **vdso_pagelist;
+ unsigned long pfn;
- vdso_len = (vdso_pages + 1) << PAGE_SHIFT;
-
- mmap_write_lock(mm);
- vdso_base = get_unmapped_area(NULL, 0, vdso_len, 0, 0);
- if (IS_ERR_VALUE(vdso_base)) {
- ret = vdso_base;
- goto end;
+ if (memcmp(vdso_info.vdso_code_start, "\177ELF", 4)) {
+ pr_err("vDSO is not a valid ELF object!\n");
+ return -EINVAL;
}
- /*
- * Put vDSO base into mm struct. We need to do this before calling
- * install_special_mapping or the perf counter mmap tracking code
- * will fail to recognise it as a vDSO (since arch_vma_name fails).
- */
- mm->context.vdso = (void *)vdso_base;
+ vdso_info.vdso_pages = (
+ vdso_info.vdso_code_end -
+ vdso_info.vdso_code_start) >>
+ PAGE_SHIFT;
- ret =
- install_special_mapping(mm, vdso_base, vdso_pages << PAGE_SHIFT,
- (VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC),
- vdso_pagelist);
+ vdso_pagelist = kcalloc(vdso_info.vdso_pages,
+ sizeof(struct page *),
+ GFP_KERNEL);
+ if (vdso_pagelist == NULL)
+ return -ENOMEM;
- if (unlikely(ret)) {
- mm->context.vdso = NULL;
- goto end;
- }
+ /* Grab the vDSO code pages. */
+ pfn = sym_to_pfn(vdso_info.vdso_code_start);
- vdso_base += (vdso_pages << PAGE_SHIFT);
- ret = install_special_mapping(mm, vdso_base, PAGE_SIZE,
- (VM_READ | VM_MAYREAD), &vdso_pagelist[vdso_pages]);
+ for (i = 0; i < vdso_info.vdso_pages; i++)
+ vdso_pagelist[i] = pfn_to_page(pfn + i);
- if (unlikely(ret))
- mm->context.vdso = NULL;
-end:
- mmap_write_unlock(mm);
- return ret;
+ vdso_info.cm->pages = vdso_pagelist;
+
+ return 0;
}
-const char *arch_vma_name(struct vm_area_struct *vma)
+#ifdef CONFIG_TIME_NS
+struct vdso_data *arch_get_vdso_data(void *vvar_page)
{
- if (vma->vm_mm && (vma->vm_start == (long)vma->vm_mm->context.vdso))
- return "[vdso]";
- if (vma->vm_mm && (vma->vm_start ==
- (long)vma->vm_mm->context.vdso + PAGE_SIZE))
- return "[vdso_data]";
+ return (struct vdso_data *)(vvar_page);
+}
+
+/*
+ * The vvar mapping contains data for a specific time namespace, so when a task
+ * changes namespace we must unmap its vvar data for the old namespace.
+ * Subsequent faults will map in data for the new namespace.
+ *
+ * For more details see timens_setup_vdso_data().
+ */
+int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
+{
+ struct mm_struct *mm = task->mm;
+ struct vm_area_struct *vma;
+
+ mmap_read_lock(mm);
+
+ for (vma = mm->mmap; vma; vma = vma->vm_next) {
+ unsigned long size = vma->vm_end - vma->vm_start;
+
+ if (vma_is_special_mapping(vma, vdso_info.dm))
+ zap_page_range(vma, vma->vm_start, size);
+ }
+
+ mmap_read_unlock(mm);
+ return 0;
+}
+
+static struct page *find_timens_vvar_page(struct vm_area_struct *vma)
+{
+ if (likely(vma->vm_mm == current->mm))
+ return current->nsproxy->time_ns->vvar_page;
+
+ /*
+ * VM_PFNMAP | VM_IO protect .fault() handler from being called
+ * through interfaces like /proc/$pid/mem or
+ * process_vm_{readv,writev}() as long as there's no .access()
+ * in special_mapping_vmops.
+ * For more details check_vma_flags() and __access_remote_vm()
+ */
+ WARN(1, "vvar_page accessed remotely");
+
return NULL;
}
+#else
+static struct page *find_timens_vvar_page(struct vm_area_struct *vma)
+{
+ return NULL;
+}
+#endif
+
+static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
+ struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+ struct page *timens_page = find_timens_vvar_page(vma);
+ unsigned long pfn;
+
+ switch (vmf->pgoff) {
+ case VVAR_DATA_PAGE_OFFSET:
+ if (timens_page)
+ pfn = page_to_pfn(timens_page);
+ else
+ pfn = sym_to_pfn(vdso_data);
+ break;
+#ifdef CONFIG_TIME_NS
+ case VVAR_TIMENS_PAGE_OFFSET:
+ /*
+ * If a task belongs to a time namespace then a namespace
+ * specific VVAR is mapped with the VVAR_DATA_PAGE_OFFSET and
+ * the real VVAR page is mapped with the VVAR_TIMENS_PAGE_OFFSET
+ * offset.
+ * See also the comment near timens_setup_vdso_data().
+ */
+ if (!timens_page)
+ return VM_FAULT_SIGBUS;
+ pfn = sym_to_pfn(vdso_data);
+ break;
+#endif /* CONFIG_TIME_NS */
+ default:
+ return VM_FAULT_SIGBUS;
+ }
+
+ return vmf_insert_pfn(vma, vmf->address, pfn);
+}
+
+enum rv_vdso_map {
+ RV_VDSO_MAP_VVAR,
+ RV_VDSO_MAP_VDSO,
+};
+
+static struct vm_special_mapping rv_vdso_maps[] __ro_after_init = {
+ [RV_VDSO_MAP_VVAR] = {
+ .name = "[vvar]",
+ .fault = vvar_fault,
+ },
+ [RV_VDSO_MAP_VDSO] = {
+ .name = "[vdso]",
+ .mremap = vdso_mremap,
+ },
+};
+
+static int __init vdso_init(void)
+{
+ vdso_info.dm = &rv_vdso_maps[RV_VDSO_MAP_VVAR];
+ vdso_info.cm = &rv_vdso_maps[RV_VDSO_MAP_VDSO];
+
+ return __vdso_init();
+}
+arch_initcall(vdso_init);
+
+static int __setup_additional_pages(struct mm_struct *mm,
+ struct linux_binprm *bprm,
+ int uses_interp)
+{
+ unsigned long vdso_base, vdso_text_len, vdso_mapping_len;
+ void *ret;
+
+ BUILD_BUG_ON(VVAR_NR_PAGES != __VVAR_PAGES);
+
+ vdso_text_len = vdso_info.vdso_pages << PAGE_SHIFT;
+ /* Be sure to map the data page */
+ vdso_mapping_len = vdso_text_len + VVAR_SIZE;
+
+ vdso_base = get_unmapped_area(NULL, 0, vdso_mapping_len, 0, 0);
+ if (IS_ERR_VALUE(vdso_base)) {
+ ret = ERR_PTR(vdso_base);
+ goto up_fail;
+ }
+
+ ret = _install_special_mapping(mm, vdso_base, VVAR_SIZE,
+ (VM_READ | VM_MAYREAD | VM_PFNMAP), vdso_info.dm);
+ if (IS_ERR(ret))
+ goto up_fail;
+
+ vdso_base += VVAR_SIZE;
+ mm->context.vdso = (void *)vdso_base;
+ ret =
+ _install_special_mapping(mm, vdso_base, vdso_text_len,
+ (VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC),
+ vdso_info.cm);
+
+ if (IS_ERR(ret))
+ goto up_fail;
+
+ return 0;
+
+up_fail:
+ mm->context.vdso = NULL;
+ return PTR_ERR(ret);
+}
+
+int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
+{
+ struct mm_struct *mm = current->mm;
+ int ret;
+
+ if (mmap_write_lock_killable(mm))
+ return -EINTR;
+
+ ret = __setup_additional_pages(mm, bprm, uses_interp);
+ mmap_write_unlock(mm);
+
+ return ret;
+}
diff --git a/arch/riscv/kernel/vdso/vdso.lds.S b/arch/riscv/kernel/vdso/vdso.lds.S
index e6f558b..01d94aa 100644
--- a/arch/riscv/kernel/vdso/vdso.lds.S
+++ b/arch/riscv/kernel/vdso/vdso.lds.S
@@ -3,12 +3,16 @@
* Copyright (C) 2012 Regents of the University of California
*/
#include <asm/page.h>
+#include <asm/vdso.h>
OUTPUT_ARCH(riscv)
SECTIONS
{
- PROVIDE(_vdso_data = . + PAGE_SIZE);
+ PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE);
+#ifdef CONFIG_TIME_NS
+ PROVIDE(_timens_data = _vdso_data + PAGE_SIZE);
+#endif
. = SIZEOF_HEADERS;
.hash : { *(.hash) } :text
diff --git a/arch/riscv/mm/context.c b/arch/riscv/mm/context.c
index ee3459c..ea54cc0 100644
--- a/arch/riscv/mm/context.c
+++ b/arch/riscv/mm/context.c
@@ -233,8 +233,10 @@ static int __init asids_init(void)
local_flush_tlb_all();
/* Pre-compute ASID details */
- num_asids = 1 << asid_bits;
- asid_mask = num_asids - 1;
+ if (asid_bits) {
+ num_asids = 1 << asid_bits;
+ asid_mask = num_asids - 1;
+ }
/*
* Use ASID allocator only if number of HW ASIDs are
@@ -255,7 +257,7 @@ static int __init asids_init(void)
pr_info("ASID allocator using %lu bits (%lu entries)\n",
asid_bits, num_asids);
} else {
- pr_info("ASID allocator disabled\n");
+ pr_info("ASID allocator disabled (%lu bits)\n", asid_bits);
}
return 0;