Merge tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux
Pull arm64 updates from Catalin Marinas:
"The main theme of this pull request is security covering variants 2
and 3 for arm64. I expect to send additional patches next week
covering an improved firmware interface (requires firmware changes)
for variant 2 and way for KPTI to be disabled on unaffected CPUs
(Cavium's ThunderX doesn't work properly with KPTI enabled because of
a hardware erratum).
Summary:
- Security mitigations:
- variant 2: invalidate the branch predictor with a call to
secure firmware
- variant 3: implement KPTI for arm64
- 52-bit physical address support for arm64 (ARMv8.2)
- arm64 support for RAS (firmware first only) and SDEI (software
delegated exception interface; allows firmware to inject a RAS
error into the OS)
- perf support for the ARM DynamIQ Shared Unit PMU
- CPUID and HWCAP bits updated for new floating point multiplication
instructions in ARMv8.4
- remove some virtual memory layout printks during boot
- fix initial page table creation to cope with larger than 32M kernel
images when 16K pages are enabled"
* tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux: (104 commits)
arm64: Fix TTBR + PAN + 52-bit PA logic in cpu_do_switch_mm
arm64: Turn on KPTI only on CPUs that need it
arm64: Branch predictor hardening for Cavium ThunderX2
arm64: Run enable method for errata work arounds on late CPUs
arm64: Move BP hardening to check_and_switch_context
arm64: mm: ignore memory above supported physical address size
arm64: kpti: Fix the interaction between ASID switching and software PAN
KVM: arm64: Emulate RAS error registers and set HCR_EL2's TERR & TEA
KVM: arm64: Handle RAS SErrors from EL2 on guest exit
KVM: arm64: Handle RAS SErrors from EL1 on guest exit
KVM: arm64: Save ESR_EL2 on guest SError
KVM: arm64: Save/Restore guest DISR_EL1
KVM: arm64: Set an impdef ESR for Virtual-SError using VSESR_EL2.
KVM: arm/arm64: mask/unmask daif around VHE guests
arm64: kernel: Prepare for a DISR user
arm64: Unconditionally enable IESB on exception entry/return for firmware-first
arm64: kernel: Survive corrected RAS errors notified by SError
arm64: cpufeature: Detect CPU RAS Extentions
arm64: sysreg: Move to use definitions for all the SCTLR bits
arm64: cpufeature: __this_cpu_has_cap() shouldn't stop early
...
diff --git a/Documentation/arm64/cpu-feature-registers.txt b/Documentation/arm64/cpu-feature-registers.txt
index bd9b3fa..a70090b 100644
--- a/Documentation/arm64/cpu-feature-registers.txt
+++ b/Documentation/arm64/cpu-feature-registers.txt
@@ -110,7 +110,9 @@
x--------------------------------------------------x
| Name | bits | visible |
|--------------------------------------------------|
- | RES0 | [63-48] | n |
+ | RES0 | [63-52] | n |
+ |--------------------------------------------------|
+ | FHM | [51-48] | y |
|--------------------------------------------------|
| DP | [47-44] | y |
|--------------------------------------------------|
diff --git a/Documentation/arm64/elf_hwcaps.txt b/Documentation/arm64/elf_hwcaps.txt
index 89edba1..57324ee 100644
--- a/Documentation/arm64/elf_hwcaps.txt
+++ b/Documentation/arm64/elf_hwcaps.txt
@@ -158,3 +158,7 @@
HWCAP_SVE
Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001.
+
+HWCAP_ASIMDFHM
+
+ Functionality implied by ID_AA64ISAR0_EL1.FHM == 0b0001.
diff --git a/Documentation/arm64/silicon-errata.txt b/Documentation/arm64/silicon-errata.txt
index fc1c884..c1d520d 100644
--- a/Documentation/arm64/silicon-errata.txt
+++ b/Documentation/arm64/silicon-errata.txt
@@ -72,7 +72,7 @@
| Hisilicon | Hip0{6,7} | #161010701 | N/A |
| Hisilicon | Hip07 | #161600802 | HISILICON_ERRATUM_161600802 |
| | | | |
-| Qualcomm Tech. | Falkor v1 | E1003 | QCOM_FALKOR_ERRATUM_1003 |
+| Qualcomm Tech. | Kryo/Falkor v1 | E1003 | QCOM_FALKOR_ERRATUM_1003 |
| Qualcomm Tech. | Falkor v1 | E1009 | QCOM_FALKOR_ERRATUM_1009 |
| Qualcomm Tech. | QDF2400 ITS | E0065 | QCOM_QDF2400_ERRATUM_0065 |
| Qualcomm Tech. | Falkor v{1,2} | E1041 | QCOM_FALKOR_ERRATUM_1041 |
diff --git a/Documentation/devicetree/bindings/arm/arm-dsu-pmu.txt b/Documentation/devicetree/bindings/arm/arm-dsu-pmu.txt
new file mode 100644
index 0000000..6efabba
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/arm-dsu-pmu.txt
@@ -0,0 +1,27 @@
+* ARM DynamIQ Shared Unit (DSU) Performance Monitor Unit (PMU)
+
+ARM DyanmIQ Shared Unit (DSU) integrates one or more CPU cores
+with a shared L3 memory system, control logic and external interfaces to
+form a multicore cluster. The PMU enables to gather various statistics on
+the operations of the DSU. The PMU provides independent 32bit counters that
+can count any of the supported events, along with a 64bit cycle counter.
+The PMU is accessed via CPU system registers and has no MMIO component.
+
+** DSU PMU required properties:
+
+- compatible : should be one of :
+
+ "arm,dsu-pmu"
+
+- interrupts : Exactly 1 SPI must be listed.
+
+- cpus : List of phandles for the CPUs connected to this DSU instance.
+
+
+** Example:
+
+dsu-pmu-0 {
+ compatible = "arm,dsu-pmu";
+ interrupts = <GIC_SPI 02 IRQ_TYPE_LEVEL_HIGH>;
+ cpus = <&cpu_0>, <&cpu_1>;
+};
diff --git a/Documentation/devicetree/bindings/arm/firmware/sdei.txt b/Documentation/devicetree/bindings/arm/firmware/sdei.txt
new file mode 100644
index 0000000..ee3f0ff
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/firmware/sdei.txt
@@ -0,0 +1,42 @@
+* Software Delegated Exception Interface (SDEI)
+
+Firmware implementing the SDEI functions described in ARM document number
+ARM DEN 0054A ("Software Delegated Exception Interface") can be used by
+Linux to receive notification of events such as those generated by
+firmware-first error handling, or from an IRQ that has been promoted to
+a firmware-assisted NMI.
+
+The interface provides a number of API functions for registering callbacks
+and enabling/disabling events. Functions are invoked by trapping to the
+privilege level of the SDEI firmware (specified as part of the binding
+below) and passing arguments in a manner specified by the "SMC Calling
+Convention (ARM DEN 0028B):
+
+ r0 => 32-bit Function ID / return value
+ {r1 - r3} => Parameters
+
+Note that the immediate field of the trapping instruction must be set
+to #0.
+
+The SDEI_EVENT_REGISTER function registers a callback in the kernel
+text to handle the specified event number.
+
+The sdei node should be a child node of '/firmware' and have required
+properties:
+
+ - compatible : should contain:
+ * "arm,sdei-1.0" : For implementations complying to SDEI version 1.x.
+
+ - method : The method of calling the SDEI firmware. Permitted
+ values are:
+ * "smc" : SMC #0, with the register assignments specified in this
+ binding.
+ * "hvc" : HVC #0, with the register assignments specified in this
+ binding.
+Example:
+ firmware {
+ sdei {
+ compatible = "arm,sdei-1.0";
+ method = "smc";
+ };
+ };
diff --git a/Documentation/perf/arm_dsu_pmu.txt b/Documentation/perf/arm_dsu_pmu.txt
new file mode 100644
index 0000000..d611e15
--- /dev/null
+++ b/Documentation/perf/arm_dsu_pmu.txt
@@ -0,0 +1,28 @@
+ARM DynamIQ Shared Unit (DSU) PMU
+==================================
+
+ARM DynamIQ Shared Unit integrates one or more cores with an L3 memory system,
+control logic and external interfaces to form a multicore cluster. The PMU
+allows counting the various events related to the L3 cache, Snoop Control Unit
+etc, using 32bit independent counters. It also provides a 64bit cycle counter.
+
+The PMU can only be accessed via CPU system registers and are common to the
+cores connected to the same DSU. Like most of the other uncore PMUs, DSU
+PMU doesn't support process specific events and cannot be used in sampling mode.
+
+The DSU provides a bitmap for a subset of implemented events via hardware
+registers. There is no way for the driver to determine if the other events
+are available or not. Hence the driver exposes only those events advertised
+by the DSU, in "events" directory under :
+
+ /sys/bus/event_sources/devices/arm_dsu_<N>/
+
+The user should refer to the TRM of the product to figure out the supported events
+and use the raw event code for the unlisted events.
+
+The driver also exposes the CPUs connected to the DSU instance in "associated_cpus".
+
+
+e.g usage :
+
+ perf stat -a -e arm_dsu_0/cycles/
diff --git a/MAINTAINERS b/MAINTAINERS
index 217a875..98ee6fe 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -12645,6 +12645,15 @@
S: Supported
F: drivers/media/pci/solo6x10/
+SOFTWARE DELEGATED EXCEPTION INTERFACE (SDEI)
+M: James Morse <james.morse@arm.com>
+L: linux-arm-kernel@lists.infradead.org
+S: Maintained
+F: Documentation/devicetree/bindings/arm/firmware/sdei.txt
+F: drivers/firmware/arm_sdei.c
+F: include/linux/sdei.h
+F: include/uapi/linux/sdei.h
+
SOFTWARE RAID (Multiple Disks) SUPPORT
M: Shaohua Li <shli@kernel.org>
L: linux-raid@vger.kernel.org
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index a9f7d3f..acbf9ec 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -238,6 +238,9 @@ int kvm_arm_coproc_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
int exception_index);
+static inline void handle_exit_early(struct kvm_vcpu *vcpu, struct kvm_run *run,
+ int exception_index) {}
+
static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr,
unsigned long hyp_stack_ptr,
unsigned long vector_ptr)
@@ -301,4 +304,6 @@ int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu,
/* All host FP/SIMD state is restored on guest exit, so nothing to save: */
static inline void kvm_fpsimd_flush_cpu_state(void) {}
+static inline void kvm_arm_vhe_guest_enter(void) {}
+static inline void kvm_arm_vhe_guest_exit(void) {}
#endif /* __ARM_KVM_HOST_H__ */
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index fa6f217..a2d176a 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -211,6 +211,11 @@ static inline bool __kvm_cpu_uses_extended_idmap(void)
return false;
}
+static inline unsigned long __kvm_idmap_ptrs_per_pgd(void)
+{
+ return PTRS_PER_PGD;
+}
+
static inline void __kvm_extend_hypmap(pgd_t *boot_hyp_pgd,
pgd_t *hyp_pgd,
pgd_t *merged_hyp_pgd,
@@ -221,6 +226,18 @@ static inline unsigned int kvm_get_vmid_bits(void)
return 8;
}
+static inline void *kvm_get_hyp_vector(void)
+{
+ return kvm_ksym_ref(__kvm_hyp_vector);
+}
+
+static inline int kvm_map_vectors(void)
+{
+ return 0;
+}
+
+#define kvm_phys_to_vttbr(addr) (addr)
+
#endif /* !__ASSEMBLY__ */
#endif /* __ARM_KVM_MMU_H__ */
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index c9a7e9e..b488076 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -522,20 +522,13 @@
config QCOM_FALKOR_ERRATUM_1003
bool "Falkor E1003: Incorrect translation due to ASID change"
default y
- select ARM64_PAN if ARM64_SW_TTBR0_PAN
help
On Falkor v1, an incorrect ASID may be cached in the TLB when ASID
- and BADDR are changed together in TTBRx_EL1. The workaround for this
- issue is to use a reserved ASID in cpu_do_switch_mm() before
- switching to the new ASID. Saying Y here selects ARM64_PAN if
- ARM64_SW_TTBR0_PAN is selected. This is done because implementing and
- maintaining the E1003 workaround in the software PAN emulation code
- would be an unnecessary complication. The affected Falkor v1 CPU
- implements ARMv8.1 hardware PAN support and using hardware PAN
- support versus software PAN emulation is mutually exclusive at
- runtime.
-
- If unsure, say Y.
+ and BADDR are changed together in TTBRx_EL1. Since we keep the ASID
+ in TTBR1_EL1, this situation only occurs in the entry trampoline and
+ then only for entries in the walk cache, since the leaf translation
+ is unchanged. Work around the erratum by invalidating the walk cache
+ entries for the trampoline before entering the kernel proper.
config QCOM_FALKOR_ERRATUM_1009
bool "Falkor E1009: Prematurely complete a DSB after a TLBI"
@@ -656,6 +649,35 @@
default 47 if ARM64_VA_BITS_47
default 48 if ARM64_VA_BITS_48
+choice
+ prompt "Physical address space size"
+ default ARM64_PA_BITS_48
+ help
+ Choose the maximum physical address range that the kernel will
+ support.
+
+config ARM64_PA_BITS_48
+ bool "48-bit"
+
+config ARM64_PA_BITS_52
+ bool "52-bit (ARMv8.2)"
+ depends on ARM64_64K_PAGES
+ depends on ARM64_PAN || !ARM64_SW_TTBR0_PAN
+ help
+ Enable support for a 52-bit physical address space, introduced as
+ part of the ARMv8.2-LPA extension.
+
+ With this enabled, the kernel will also continue to work on CPUs that
+ do not support ARMv8.2-LPA, but with some added memory overhead (and
+ minor performance overhead).
+
+endchoice
+
+config ARM64_PA_BITS
+ int
+ default 48 if ARM64_PA_BITS_48
+ default 52 if ARM64_PA_BITS_52
+
config CPU_BIG_ENDIAN
bool "Build big-endian kernel"
help
@@ -850,6 +872,35 @@
However for 4K, we choose a higher default value, 11 as opposed to 10, giving us
4M allocations matching the default size used by generic code.
+config UNMAP_KERNEL_AT_EL0
+ bool "Unmap kernel when running in userspace (aka \"KAISER\")" if EXPERT
+ default y
+ help
+ Speculation attacks against some high-performance processors can
+ be used to bypass MMU permission checks and leak kernel data to
+ userspace. This can be defended against by unmapping the kernel
+ when running in userspace, mapping it back in on exception entry
+ via a trampoline page in the vector table.
+
+ If unsure, say Y.
+
+config HARDEN_BRANCH_PREDICTOR
+ bool "Harden the branch predictor against aliasing attacks" if EXPERT
+ default y
+ help
+ Speculation attacks against some high-performance processors rely on
+ being able to manipulate the branch predictor for a victim context by
+ executing aliasing branches in the attacker context. Such attacks
+ can be partially mitigated against by clearing internal branch
+ predictor state and limiting the prediction logic in some situations.
+
+ This config option will take CPU-specific actions to harden the
+ branch predictor against aliasing attacks and may rely on specific
+ instruction sequences or control bits being set by the system
+ firmware.
+
+ If unsure, say Y.
+
menuconfig ARMV8_DEPRECATED
bool "Emulate deprecated/obsolete ARMv8 instructions"
depends on COMPAT
@@ -1021,6 +1072,22 @@
operations if DC CVAP is not supported (following the behaviour of
DC CVAP itself if the system does not define a point of persistence).
+config ARM64_RAS_EXTN
+ bool "Enable support for RAS CPU Extensions"
+ default y
+ help
+ CPUs that support the Reliability, Availability and Serviceability
+ (RAS) Extensions, part of ARMv8.2 are able to track faults and
+ errors, classify them and report them to software.
+
+ On CPUs with these extensions system software can use additional
+ barriers to determine if faults are pending and read the
+ classification from a new set of registers.
+
+ Selecting this feature will allow the kernel to use these barriers
+ and access the new registers if the system supports the extension.
+ Platform RAS features may additionally depend on firmware support.
+
endmenu
config ARM64_SVE
diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h
index 4a85c69..6690281 100644
--- a/arch/arm64/include/asm/alternative.h
+++ b/arch/arm64/include/asm/alternative.h
@@ -12,6 +12,8 @@
#include <linux/stddef.h>
#include <linux/stringify.h>
+extern int alternatives_applied;
+
struct alt_instr {
s32 orig_offset; /* offset to original instruction */
s32 alt_offset; /* offset to replacement instruction */
diff --git a/arch/arm64/include/asm/arm_dsu_pmu.h b/arch/arm64/include/asm/arm_dsu_pmu.h
new file mode 100644
index 0000000..82e5cc3
--- /dev/null
+++ b/arch/arm64/include/asm/arm_dsu_pmu.h
@@ -0,0 +1,129 @@
+/*
+ * ARM DynamIQ Shared Unit (DSU) PMU Low level register access routines.
+ *
+ * Copyright (C) ARM Limited, 2017.
+ *
+ * Author: Suzuki K Poulose <suzuki.poulose@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2, as published by the Free Software Foundation.
+ */
+
+#include <linux/bitops.h>
+#include <linux/build_bug.h>
+#include <linux/compiler.h>
+#include <linux/types.h>
+#include <asm/barrier.h>
+#include <asm/sysreg.h>
+
+
+#define CLUSTERPMCR_EL1 sys_reg(3, 0, 15, 5, 0)
+#define CLUSTERPMCNTENSET_EL1 sys_reg(3, 0, 15, 5, 1)
+#define CLUSTERPMCNTENCLR_EL1 sys_reg(3, 0, 15, 5, 2)
+#define CLUSTERPMOVSSET_EL1 sys_reg(3, 0, 15, 5, 3)
+#define CLUSTERPMOVSCLR_EL1 sys_reg(3, 0, 15, 5, 4)
+#define CLUSTERPMSELR_EL1 sys_reg(3, 0, 15, 5, 5)
+#define CLUSTERPMINTENSET_EL1 sys_reg(3, 0, 15, 5, 6)
+#define CLUSTERPMINTENCLR_EL1 sys_reg(3, 0, 15, 5, 7)
+#define CLUSTERPMCCNTR_EL1 sys_reg(3, 0, 15, 6, 0)
+#define CLUSTERPMXEVTYPER_EL1 sys_reg(3, 0, 15, 6, 1)
+#define CLUSTERPMXEVCNTR_EL1 sys_reg(3, 0, 15, 6, 2)
+#define CLUSTERPMMDCR_EL1 sys_reg(3, 0, 15, 6, 3)
+#define CLUSTERPMCEID0_EL1 sys_reg(3, 0, 15, 6, 4)
+#define CLUSTERPMCEID1_EL1 sys_reg(3, 0, 15, 6, 5)
+
+static inline u32 __dsu_pmu_read_pmcr(void)
+{
+ return read_sysreg_s(CLUSTERPMCR_EL1);
+}
+
+static inline void __dsu_pmu_write_pmcr(u32 val)
+{
+ write_sysreg_s(val, CLUSTERPMCR_EL1);
+ isb();
+}
+
+static inline u32 __dsu_pmu_get_reset_overflow(void)
+{
+ u32 val = read_sysreg_s(CLUSTERPMOVSCLR_EL1);
+ /* Clear the bit */
+ write_sysreg_s(val, CLUSTERPMOVSCLR_EL1);
+ isb();
+ return val;
+}
+
+static inline void __dsu_pmu_select_counter(int counter)
+{
+ write_sysreg_s(counter, CLUSTERPMSELR_EL1);
+ isb();
+}
+
+static inline u64 __dsu_pmu_read_counter(int counter)
+{
+ __dsu_pmu_select_counter(counter);
+ return read_sysreg_s(CLUSTERPMXEVCNTR_EL1);
+}
+
+static inline void __dsu_pmu_write_counter(int counter, u64 val)
+{
+ __dsu_pmu_select_counter(counter);
+ write_sysreg_s(val, CLUSTERPMXEVCNTR_EL1);
+ isb();
+}
+
+static inline void __dsu_pmu_set_event(int counter, u32 event)
+{
+ __dsu_pmu_select_counter(counter);
+ write_sysreg_s(event, CLUSTERPMXEVTYPER_EL1);
+ isb();
+}
+
+static inline u64 __dsu_pmu_read_pmccntr(void)
+{
+ return read_sysreg_s(CLUSTERPMCCNTR_EL1);
+}
+
+static inline void __dsu_pmu_write_pmccntr(u64 val)
+{
+ write_sysreg_s(val, CLUSTERPMCCNTR_EL1);
+ isb();
+}
+
+static inline void __dsu_pmu_disable_counter(int counter)
+{
+ write_sysreg_s(BIT(counter), CLUSTERPMCNTENCLR_EL1);
+ isb();
+}
+
+static inline void __dsu_pmu_enable_counter(int counter)
+{
+ write_sysreg_s(BIT(counter), CLUSTERPMCNTENSET_EL1);
+ isb();
+}
+
+static inline void __dsu_pmu_counter_interrupt_enable(int counter)
+{
+ write_sysreg_s(BIT(counter), CLUSTERPMINTENSET_EL1);
+ isb();
+}
+
+static inline void __dsu_pmu_counter_interrupt_disable(int counter)
+{
+ write_sysreg_s(BIT(counter), CLUSTERPMINTENCLR_EL1);
+ isb();
+}
+
+
+static inline u32 __dsu_pmu_read_pmceid(int n)
+{
+ switch (n) {
+ case 0:
+ return read_sysreg_s(CLUSTERPMCEID0_EL1);
+ case 1:
+ return read_sysreg_s(CLUSTERPMCEID1_EL1);
+ default:
+ BUILD_BUG();
+ return 0;
+ }
+}
diff --git a/arch/arm64/include/asm/asm-uaccess.h b/arch/arm64/include/asm/asm-uaccess.h
index b3da6c8..4128bec 100644
--- a/arch/arm64/include/asm/asm-uaccess.h
+++ b/arch/arm64/include/asm/asm-uaccess.h
@@ -4,6 +4,7 @@
#include <asm/alternative.h>
#include <asm/kernel-pgtable.h>
+#include <asm/mmu.h>
#include <asm/sysreg.h>
#include <asm/assembler.h>
@@ -12,52 +13,63 @@
*/
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
.macro __uaccess_ttbr0_disable, tmp1
- mrs \tmp1, ttbr1_el1 // swapper_pg_dir
- add \tmp1, \tmp1, #SWAPPER_DIR_SIZE // reserved_ttbr0 at the end of swapper_pg_dir
- msr ttbr0_el1, \tmp1 // set reserved TTBR0_EL1
+ mrs \tmp1, ttbr1_el1 // swapper_pg_dir
+ bic \tmp1, \tmp1, #TTBR_ASID_MASK
+ sub \tmp1, \tmp1, #RESERVED_TTBR0_SIZE // reserved_ttbr0 just before swapper_pg_dir
+ msr ttbr0_el1, \tmp1 // set reserved TTBR0_EL1
+ isb
+ add \tmp1, \tmp1, #RESERVED_TTBR0_SIZE
+ msr ttbr1_el1, \tmp1 // set reserved ASID
isb
.endm
- .macro __uaccess_ttbr0_enable, tmp1
+ .macro __uaccess_ttbr0_enable, tmp1, tmp2
get_thread_info \tmp1
ldr \tmp1, [\tmp1, #TSK_TI_TTBR0] // load saved TTBR0_EL1
+ mrs \tmp2, ttbr1_el1
+ extr \tmp2, \tmp2, \tmp1, #48
+ ror \tmp2, \tmp2, #16
+ msr ttbr1_el1, \tmp2 // set the active ASID
+ isb
msr ttbr0_el1, \tmp1 // set the non-PAN TTBR0_EL1
isb
.endm
- .macro uaccess_ttbr0_disable, tmp1
-alternative_if_not ARM64_HAS_PAN
- __uaccess_ttbr0_disable \tmp1
-alternative_else_nop_endif
- .endm
-
- .macro uaccess_ttbr0_enable, tmp1, tmp2
+ .macro uaccess_ttbr0_disable, tmp1, tmp2
alternative_if_not ARM64_HAS_PAN
save_and_disable_irq \tmp2 // avoid preemption
- __uaccess_ttbr0_enable \tmp1
+ __uaccess_ttbr0_disable \tmp1
restore_irq \tmp2
alternative_else_nop_endif
.endm
+
+ .macro uaccess_ttbr0_enable, tmp1, tmp2, tmp3
+alternative_if_not ARM64_HAS_PAN
+ save_and_disable_irq \tmp3 // avoid preemption
+ __uaccess_ttbr0_enable \tmp1, \tmp2
+ restore_irq \tmp3
+alternative_else_nop_endif
+ .endm
#else
- .macro uaccess_ttbr0_disable, tmp1
+ .macro uaccess_ttbr0_disable, tmp1, tmp2
.endm
- .macro uaccess_ttbr0_enable, tmp1, tmp2
+ .macro uaccess_ttbr0_enable, tmp1, tmp2, tmp3
.endm
#endif
/*
* These macros are no-ops when UAO is present.
*/
- .macro uaccess_disable_not_uao, tmp1
- uaccess_ttbr0_disable \tmp1
+ .macro uaccess_disable_not_uao, tmp1, tmp2
+ uaccess_ttbr0_disable \tmp1, \tmp2
alternative_if ARM64_ALT_PAN_NOT_UAO
SET_PSTATE_PAN(1)
alternative_else_nop_endif
.endm
- .macro uaccess_enable_not_uao, tmp1, tmp2
- uaccess_ttbr0_enable \tmp1, \tmp2
+ .macro uaccess_enable_not_uao, tmp1, tmp2, tmp3
+ uaccess_ttbr0_enable \tmp1, \tmp2, \tmp3
alternative_if ARM64_ALT_PAN_NOT_UAO
SET_PSTATE_PAN(0)
alternative_else_nop_endif
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index 8b16828..3873dd7 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -26,7 +26,6 @@
#include <asm/asm-offsets.h>
#include <asm/cpufeature.h>
#include <asm/debug-monitors.h>
-#include <asm/mmu_context.h>
#include <asm/page.h>
#include <asm/pgtable-hwdef.h>
#include <asm/ptrace.h>
@@ -110,6 +109,13 @@
.endm
/*
+ * RAS Error Synchronization barrier
+ */
+ .macro esb
+ hint #16
+ .endm
+
+/*
* NOP sequence
*/
.macro nops, num
@@ -255,7 +261,11 @@ lr .req x30 // link register
#else
adr_l \dst, \sym
#endif
+alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
mrs \tmp, tpidr_el1
+alternative_else
+ mrs \tmp, tpidr_el2
+alternative_endif
add \dst, \dst, \tmp
.endm
@@ -266,7 +276,11 @@ lr .req x30 // link register
*/
.macro ldr_this_cpu dst, sym, tmp
adr_l \dst, \sym
+alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
mrs \tmp, tpidr_el1
+alternative_else
+ mrs \tmp, tpidr_el2
+alternative_endif
ldr \dst, [\dst, \tmp]
.endm
@@ -344,10 +358,26 @@ alternative_endif
* tcr_set_idmap_t0sz - update TCR.T0SZ so that we can load the ID map
*/
.macro tcr_set_idmap_t0sz, valreg, tmpreg
-#ifndef CONFIG_ARM64_VA_BITS_48
ldr_l \tmpreg, idmap_t0sz
bfi \valreg, \tmpreg, #TCR_T0SZ_OFFSET, #TCR_TxSZ_WIDTH
-#endif
+ .endm
+
+/*
+ * tcr_compute_pa_size - set TCR.(I)PS to the highest supported
+ * ID_AA64MMFR0_EL1.PARange value
+ *
+ * tcr: register with the TCR_ELx value to be updated
+ * pos: IPS or PS bitfield position
+ * tmp{0,1}: temporary registers
+ */
+ .macro tcr_compute_pa_size, tcr, pos, tmp0, tmp1
+ mrs \tmp0, ID_AA64MMFR0_EL1
+ // Narrow PARange to fit the PS field in TCR_ELx
+ ubfx \tmp0, \tmp0, #ID_AA64MMFR0_PARANGE_SHIFT, #3
+ mov \tmp1, #ID_AA64MMFR0_PARANGE_MAX
+ cmp \tmp0, \tmp1
+ csel \tmp0, \tmp1, \tmp0, hi
+ bfi \tcr, \tmp0, \pos, #3
.endm
/*
@@ -478,37 +508,18 @@ alternative_endif
.endm
/*
- * Errata workaround prior to TTBR0_EL1 update
+ * Arrange a physical address in a TTBR register, taking care of 52-bit
+ * addresses.
*
- * val: TTBR value with new BADDR, preserved
- * tmp0: temporary register, clobbered
- * tmp1: other temporary register, clobbered
+ * phys: physical address, preserved
+ * ttbr: returns the TTBR value
*/
- .macro pre_ttbr0_update_workaround, val, tmp0, tmp1
-#ifdef CONFIG_QCOM_FALKOR_ERRATUM_1003
-alternative_if ARM64_WORKAROUND_QCOM_FALKOR_E1003
- mrs \tmp0, ttbr0_el1
- mov \tmp1, #FALKOR_RESERVED_ASID
- bfi \tmp0, \tmp1, #48, #16 // reserved ASID + old BADDR
- msr ttbr0_el1, \tmp0
- isb
- bfi \tmp0, \val, #0, #48 // reserved ASID + new BADDR
- msr ttbr0_el1, \tmp0
- isb
-alternative_else_nop_endif
-#endif
- .endm
-
-/*
- * Errata workaround post TTBR0_EL1 update.
- */
- .macro post_ttbr0_update_workaround
-#ifdef CONFIG_CAVIUM_ERRATUM_27456
-alternative_if ARM64_WORKAROUND_CAVIUM_27456
- ic iallu
- dsb nsh
- isb
-alternative_else_nop_endif
+ .macro phys_to_ttbr, phys, ttbr
+#ifdef CONFIG_ARM64_PA_BITS_52
+ orr \ttbr, \phys, \phys, lsr #46
+ and \ttbr, \ttbr, #TTBR_BADDR_MASK_52
+#else
+ mov \ttbr, \phys
#endif
.endm
diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
index 2ff7c5e..bb26382 100644
--- a/arch/arm64/include/asm/cpucaps.h
+++ b/arch/arm64/include/asm/cpucaps.h
@@ -41,7 +41,11 @@
#define ARM64_WORKAROUND_CAVIUM_30115 20
#define ARM64_HAS_DCPOP 21
#define ARM64_SVE 22
+#define ARM64_UNMAP_KERNEL_AT_EL0 23
+#define ARM64_HARDEN_BRANCH_PREDICTOR 24
+#define ARM64_HARDEN_BP_POST_GUEST_EXIT 25
+#define ARM64_HAS_RAS_EXTN 26
-#define ARM64_NCAPS 23
+#define ARM64_NCAPS 27
#endif /* __ASM_CPUCAPS_H */
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index cbf08d7..be7bd19 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -79,28 +79,37 @@
#define ARM_CPU_PART_AEM_V8 0xD0F
#define ARM_CPU_PART_FOUNDATION 0xD00
#define ARM_CPU_PART_CORTEX_A57 0xD07
+#define ARM_CPU_PART_CORTEX_A72 0xD08
#define ARM_CPU_PART_CORTEX_A53 0xD03
#define ARM_CPU_PART_CORTEX_A73 0xD09
+#define ARM_CPU_PART_CORTEX_A75 0xD0A
#define APM_CPU_PART_POTENZA 0x000
#define CAVIUM_CPU_PART_THUNDERX 0x0A1
#define CAVIUM_CPU_PART_THUNDERX_81XX 0x0A2
#define CAVIUM_CPU_PART_THUNDERX_83XX 0x0A3
+#define CAVIUM_CPU_PART_THUNDERX2 0x0AF
#define BRCM_CPU_PART_VULCAN 0x516
#define QCOM_CPU_PART_FALKOR_V1 0x800
#define QCOM_CPU_PART_FALKOR 0xC00
+#define QCOM_CPU_PART_KRYO 0x200
#define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53)
#define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57)
+#define MIDR_CORTEX_A72 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A72)
#define MIDR_CORTEX_A73 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A73)
+#define MIDR_CORTEX_A75 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A75)
#define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX)
#define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX)
#define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX)
+#define MIDR_CAVIUM_THUNDERX2 MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX2)
+#define MIDR_BRCM_VULCAN MIDR_CPU_MODEL(ARM_CPU_IMP_BRCM, BRCM_CPU_PART_VULCAN)
#define MIDR_QCOM_FALKOR_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR_V1)
#define MIDR_QCOM_FALKOR MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR)
+#define MIDR_QCOM_KRYO MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO)
#ifndef __ASSEMBLY__
diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h
index c4cd508..8389050 100644
--- a/arch/arm64/include/asm/efi.h
+++ b/arch/arm64/include/asm/efi.h
@@ -121,19 +121,21 @@ static inline void efi_set_pgd(struct mm_struct *mm)
if (mm != current->active_mm) {
/*
* Update the current thread's saved ttbr0 since it is
- * restored as part of a return from exception. Set
- * the hardware TTBR0_EL1 using cpu_switch_mm()
- * directly to enable potential errata workarounds.
+ * restored as part of a return from exception. Enable
+ * access to the valid TTBR0_EL1 and invoke the errata
+ * workaround directly since there is no return from
+ * exception when invoking the EFI run-time services.
*/
update_saved_ttbr0(current, mm);
- cpu_switch_mm(mm->pgd, mm);
+ uaccess_ttbr0_enable();
+ post_ttbr_update_workaround();
} else {
/*
* Defer the switch to the current thread's TTBR0_EL1
* until uaccess_enable(). Restore the current
* thread's saved ttbr0 corresponding to its active_mm
*/
- cpu_set_reserved_ttbr0();
+ uaccess_ttbr0_disable();
update_saved_ttbr0(current, current->active_mm);
}
}
diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h
index 014d7d8..803443d 100644
--- a/arch/arm64/include/asm/esr.h
+++ b/arch/arm64/include/asm/esr.h
@@ -86,6 +86,18 @@
#define ESR_ELx_WNR_SHIFT (6)
#define ESR_ELx_WNR (UL(1) << ESR_ELx_WNR_SHIFT)
+/* Asynchronous Error Type */
+#define ESR_ELx_IDS_SHIFT (24)
+#define ESR_ELx_IDS (UL(1) << ESR_ELx_IDS_SHIFT)
+#define ESR_ELx_AET_SHIFT (10)
+#define ESR_ELx_AET (UL(0x7) << ESR_ELx_AET_SHIFT)
+
+#define ESR_ELx_AET_UC (UL(0) << ESR_ELx_AET_SHIFT)
+#define ESR_ELx_AET_UEU (UL(1) << ESR_ELx_AET_SHIFT)
+#define ESR_ELx_AET_UEO (UL(2) << ESR_ELx_AET_SHIFT)
+#define ESR_ELx_AET_UER (UL(3) << ESR_ELx_AET_SHIFT)
+#define ESR_ELx_AET_CE (UL(6) << ESR_ELx_AET_SHIFT)
+
/* Shared ISS field definitions for Data/Instruction aborts */
#define ESR_ELx_SET_SHIFT (11)
#define ESR_ELx_SET_MASK (UL(3) << ESR_ELx_SET_SHIFT)
@@ -100,6 +112,7 @@
#define ESR_ELx_FSC (0x3F)
#define ESR_ELx_FSC_TYPE (0x3C)
#define ESR_ELx_FSC_EXTABT (0x10)
+#define ESR_ELx_FSC_SERROR (0x11)
#define ESR_ELx_FSC_ACCESS (0x08)
#define ESR_ELx_FSC_FAULT (0x04)
#define ESR_ELx_FSC_PERM (0x0C)
@@ -127,6 +140,13 @@
#define ESR_ELx_WFx_ISS_WFE (UL(1) << 0)
#define ESR_ELx_xVC_IMM_MASK ((1UL << 16) - 1)
+#define DISR_EL1_IDS (UL(1) << 24)
+/*
+ * DISR_EL1 and ESR_ELx share the bottom 13 bits, but the RES0 bits may mean
+ * different things in the future...
+ */
+#define DISR_EL1_ESR_MASK (ESR_ELx_AET | ESR_ELx_EA | ESR_ELx_FSC)
+
/* ESR value templates for specific events */
/* BRK instruction trap from AArch64 state */
diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h
index 0c2eec4..bc30429 100644
--- a/arch/arm64/include/asm/exception.h
+++ b/arch/arm64/include/asm/exception.h
@@ -18,6 +18,8 @@
#ifndef __ASM_EXCEPTION_H
#define __ASM_EXCEPTION_H
+#include <asm/esr.h>
+
#include <linux/interrupt.h>
#define __exception __attribute__((section(".exception.text")))
@@ -27,4 +29,16 @@
#define __exception_irq_entry __exception
#endif
+static inline u32 disr_to_esr(u64 disr)
+{
+ unsigned int esr = ESR_ELx_EC_SERROR << ESR_ELx_EC_SHIFT;
+
+ if ((disr & DISR_EL1_IDS) == 0)
+ esr |= (disr & DISR_EL1_ESR_MASK);
+ else
+ esr |= (disr & ESR_ELx_ISS_MASK);
+
+ return esr;
+}
+
#endif /* __ASM_EXCEPTION_H */
diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h
index 4052ec3..ec1e6d6 100644
--- a/arch/arm64/include/asm/fixmap.h
+++ b/arch/arm64/include/asm/fixmap.h
@@ -58,6 +58,11 @@ enum fixed_addresses {
FIX_APEI_GHES_NMI,
#endif /* CONFIG_ACPI_APEI_GHES */
+#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
+ FIX_ENTRY_TRAMP_DATA,
+ FIX_ENTRY_TRAMP_TEXT,
+#define TRAMP_VALIAS (__fix_to_virt(FIX_ENTRY_TRAMP_TEXT))
+#endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */
__end_of_permanent_fixed_addresses,
/*
diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
index 74f3439..8857a0f 100644
--- a/arch/arm64/include/asm/fpsimd.h
+++ b/arch/arm64/include/asm/fpsimd.h
@@ -71,7 +71,7 @@ extern void fpsimd_flush_thread(void);
extern void fpsimd_signal_preserve_current_state(void);
extern void fpsimd_preserve_current_state(void);
extern void fpsimd_restore_current_state(void);
-extern void fpsimd_update_current_state(struct fpsimd_state *state);
+extern void fpsimd_update_current_state(struct user_fpsimd_state const *state);
extern void fpsimd_flush_task_state(struct task_struct *target);
extern void sve_flush_cpu_state(void);
diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h
index 7803343..82386e8 100644
--- a/arch/arm64/include/asm/kernel-pgtable.h
+++ b/arch/arm64/include/asm/kernel-pgtable.h
@@ -52,7 +52,52 @@
#define IDMAP_PGTABLE_LEVELS (ARM64_HW_PGTABLE_LEVELS(PHYS_MASK_SHIFT))
#endif
-#define SWAPPER_DIR_SIZE (SWAPPER_PGTABLE_LEVELS * PAGE_SIZE)
+
+/*
+ * If KASLR is enabled, then an offset K is added to the kernel address
+ * space. The bottom 21 bits of this offset are zero to guarantee 2MB
+ * alignment for PA and VA.
+ *
+ * For each pagetable level of the swapper, we know that the shift will
+ * be larger than 21 (for the 4KB granule case we use section maps thus
+ * the smallest shift is actually 30) thus there is the possibility that
+ * KASLR can increase the number of pagetable entries by 1, so we make
+ * room for this extra entry.
+ *
+ * Note KASLR cannot increase the number of required entries for a level
+ * by more than one because it increments both the virtual start and end
+ * addresses equally (the extra entry comes from the case where the end
+ * address is just pushed over a boundary and the start address isn't).
+ */
+
+#ifdef CONFIG_RANDOMIZE_BASE
+#define EARLY_KASLR (1)
+#else
+#define EARLY_KASLR (0)
+#endif
+
+#define EARLY_ENTRIES(vstart, vend, shift) (((vend) >> (shift)) \
+ - ((vstart) >> (shift)) + 1 + EARLY_KASLR)
+
+#define EARLY_PGDS(vstart, vend) (EARLY_ENTRIES(vstart, vend, PGDIR_SHIFT))
+
+#if SWAPPER_PGTABLE_LEVELS > 3
+#define EARLY_PUDS(vstart, vend) (EARLY_ENTRIES(vstart, vend, PUD_SHIFT))
+#else
+#define EARLY_PUDS(vstart, vend) (0)
+#endif
+
+#if SWAPPER_PGTABLE_LEVELS > 2
+#define EARLY_PMDS(vstart, vend) (EARLY_ENTRIES(vstart, vend, SWAPPER_TABLE_SHIFT))
+#else
+#define EARLY_PMDS(vstart, vend) (0)
+#endif
+
+#define EARLY_PAGES(vstart, vend) ( 1 /* PGDIR page */ \
+ + EARLY_PGDS((vstart), (vend)) /* each PGDIR needs a next level page table */ \
+ + EARLY_PUDS((vstart), (vend)) /* each PUD needs a next level page table */ \
+ + EARLY_PMDS((vstart), (vend))) /* each PMD needs a next level page table */
+#define SWAPPER_DIR_SIZE (PAGE_SIZE * EARLY_PAGES(KIMAGE_VADDR + TEXT_OFFSET, _end))
#define IDMAP_DIR_SIZE (IDMAP_PGTABLE_LEVELS * PAGE_SIZE)
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
@@ -78,8 +123,16 @@
/*
* Initial memory map attributes.
*/
-#define SWAPPER_PTE_FLAGS (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED)
-#define SWAPPER_PMD_FLAGS (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S)
+#define _SWAPPER_PTE_FLAGS (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED)
+#define _SWAPPER_PMD_FLAGS (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S)
+
+#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
+#define SWAPPER_PTE_FLAGS (_SWAPPER_PTE_FLAGS | PTE_NG)
+#define SWAPPER_PMD_FLAGS (_SWAPPER_PMD_FLAGS | PMD_SECT_NG)
+#else
+#define SWAPPER_PTE_FLAGS _SWAPPER_PTE_FLAGS
+#define SWAPPER_PMD_FLAGS _SWAPPER_PMD_FLAGS
+#endif
#if ARM64_SWAPPER_USES_SECTION_MAPS
#define SWAPPER_MM_MMUFLAGS (PMD_ATTRINDX(MT_NORMAL) | SWAPPER_PMD_FLAGS)
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 715d395..b0c8417 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -23,6 +23,8 @@
#include <asm/types.h>
/* Hyp Configuration Register (HCR) bits */
+#define HCR_TEA (UL(1) << 37)
+#define HCR_TERR (UL(1) << 36)
#define HCR_E2H (UL(1) << 34)
#define HCR_ID (UL(1) << 33)
#define HCR_CD (UL(1) << 32)
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index ab4d0a9..24961b7 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -68,6 +68,8 @@ extern u32 __kvm_get_mdcr_el2(void);
extern u32 __init_stage2_translation(void);
+extern void __qcom_hyp_sanitize_btac_predictors(void);
+
#endif
#endif /* __ARM_KVM_ASM_H__ */
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index 5f28dfa..413dc82 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -50,6 +50,13 @@ static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS;
if (is_kernel_in_hyp_mode())
vcpu->arch.hcr_el2 |= HCR_E2H;
+ if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN)) {
+ /* route synchronous external abort exceptions to EL2 */
+ vcpu->arch.hcr_el2 |= HCR_TEA;
+ /* trap error record accesses */
+ vcpu->arch.hcr_el2 |= HCR_TERR;
+ }
+
if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features))
vcpu->arch.hcr_el2 &= ~HCR_RW;
}
@@ -64,6 +71,11 @@ static inline void vcpu_set_hcr(struct kvm_vcpu *vcpu, unsigned long hcr)
vcpu->arch.hcr_el2 = hcr;
}
+static inline void vcpu_set_vsesr(struct kvm_vcpu *vcpu, u64 vsesr)
+{
+ vcpu->arch.vsesr_el2 = vsesr;
+}
+
static inline unsigned long *vcpu_pc(const struct kvm_vcpu *vcpu)
{
return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pc;
@@ -171,6 +183,11 @@ static inline phys_addr_t kvm_vcpu_get_fault_ipa(const struct kvm_vcpu *vcpu)
return ((phys_addr_t)vcpu->arch.fault.hpfar_el2 & HPFAR_MASK) << 8;
}
+static inline u64 kvm_vcpu_get_disr(const struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.fault.disr_el1;
+}
+
static inline u32 kvm_vcpu_hvc_get_imm(const struct kvm_vcpu *vcpu)
{
return kvm_vcpu_get_hsr(vcpu) & ESR_ELx_xVC_IMM_MASK;
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index ea6cb5b..4485ae8 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -25,6 +25,7 @@
#include <linux/types.h>
#include <linux/kvm_types.h>
#include <asm/cpufeature.h>
+#include <asm/daifflags.h>
#include <asm/fpsimd.h>
#include <asm/kvm.h>
#include <asm/kvm_asm.h>
@@ -89,6 +90,7 @@ struct kvm_vcpu_fault_info {
u32 esr_el2; /* Hyp Syndrom Register */
u64 far_el2; /* Hyp Fault Address Register */
u64 hpfar_el2; /* Hyp IPA Fault Address Register */
+ u64 disr_el1; /* Deferred [SError] Status Register */
};
/*
@@ -120,6 +122,7 @@ enum vcpu_sysreg {
PAR_EL1, /* Physical Address Register */
MDSCR_EL1, /* Monitor Debug System Control Register */
MDCCINT_EL1, /* Monitor Debug Comms Channel Interrupt Enable Reg */
+ DISR_EL1, /* Deferred Interrupt Status Register */
/* Performance Monitors Registers */
PMCR_EL0, /* Control Register */
@@ -192,6 +195,8 @@ struct kvm_cpu_context {
u64 sys_regs[NR_SYS_REGS];
u32 copro[NR_COPRO_REGS];
};
+
+ struct kvm_vcpu *__hyp_running_vcpu;
};
typedef struct kvm_cpu_context kvm_cpu_context_t;
@@ -277,6 +282,9 @@ struct kvm_vcpu_arch {
/* Detect first run of a vcpu */
bool has_run_once;
+
+ /* Virtual SError ESR to restore when HCR_EL2.VSE is set */
+ u64 vsesr_el2;
};
#define vcpu_gp_regs(v) (&(v)->arch.ctxt.gp_regs)
@@ -340,6 +348,8 @@ void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot);
int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
int exception_index);
+void handle_exit_early(struct kvm_vcpu *vcpu, struct kvm_run *run,
+ int exception_index);
int kvm_perf_init(void);
int kvm_perf_teardown(void);
@@ -396,4 +406,13 @@ static inline void kvm_fpsimd_flush_cpu_state(void)
sve_flush_cpu_state();
}
+static inline void kvm_arm_vhe_guest_enter(void)
+{
+ local_daif_mask();
+}
+
+static inline void kvm_arm_vhe_guest_exit(void)
+{
+ local_daif_restore(DAIF_PROCCTX_NOIRQ);
+}
#endif /* __ARM64_KVM_HOST_H__ */
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 672c868..72e279d 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -273,15 +273,26 @@ void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled);
static inline bool __kvm_cpu_uses_extended_idmap(void)
{
- return __cpu_uses_extended_idmap();
+ return __cpu_uses_extended_idmap_level();
}
+static inline unsigned long __kvm_idmap_ptrs_per_pgd(void)
+{
+ return idmap_ptrs_per_pgd;
+}
+
+/*
+ * Can't use pgd_populate here, because the extended idmap adds an extra level
+ * above CONFIG_PGTABLE_LEVELS (which is 2 or 3 if we're using the extended
+ * idmap), and pgd_populate is only available if CONFIG_PGTABLE_LEVELS = 4.
+ */
static inline void __kvm_extend_hypmap(pgd_t *boot_hyp_pgd,
pgd_t *hyp_pgd,
pgd_t *merged_hyp_pgd,
unsigned long hyp_idmap_start)
{
int idmap_idx;
+ u64 pgd_addr;
/*
* Use the first entry to access the HYP mappings. It is
@@ -289,7 +300,8 @@ static inline void __kvm_extend_hypmap(pgd_t *boot_hyp_pgd,
* extended idmap.
*/
VM_BUG_ON(pgd_val(merged_hyp_pgd[0]));
- merged_hyp_pgd[0] = __pgd(__pa(hyp_pgd) | PMD_TYPE_TABLE);
+ pgd_addr = __phys_to_pgd_val(__pa(hyp_pgd));
+ merged_hyp_pgd[0] = __pgd(pgd_addr | PMD_TYPE_TABLE);
/*
* Create another extended level entry that points to the boot HYP map,
@@ -299,7 +311,8 @@ static inline void __kvm_extend_hypmap(pgd_t *boot_hyp_pgd,
*/
idmap_idx = hyp_idmap_start >> VA_BITS;
VM_BUG_ON(pgd_val(merged_hyp_pgd[idmap_idx]));
- merged_hyp_pgd[idmap_idx] = __pgd(__pa(boot_hyp_pgd) | PMD_TYPE_TABLE);
+ pgd_addr = __phys_to_pgd_val(__pa(boot_hyp_pgd));
+ merged_hyp_pgd[idmap_idx] = __pgd(pgd_addr | PMD_TYPE_TABLE);
}
static inline unsigned int kvm_get_vmid_bits(void)
@@ -309,5 +322,45 @@ static inline unsigned int kvm_get_vmid_bits(void)
return (cpuid_feature_extract_unsigned_field(reg, ID_AA64MMFR1_VMIDBITS_SHIFT) == 2) ? 16 : 8;
}
+#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
+#include <asm/mmu.h>
+
+static inline void *kvm_get_hyp_vector(void)
+{
+ struct bp_hardening_data *data = arm64_get_bp_hardening_data();
+ void *vect = kvm_ksym_ref(__kvm_hyp_vector);
+
+ if (data->fn) {
+ vect = __bp_harden_hyp_vecs_start +
+ data->hyp_vectors_slot * SZ_2K;
+
+ if (!has_vhe())
+ vect = lm_alias(vect);
+ }
+
+ return vect;
+}
+
+static inline int kvm_map_vectors(void)
+{
+ return create_hyp_mappings(kvm_ksym_ref(__bp_harden_hyp_vecs_start),
+ kvm_ksym_ref(__bp_harden_hyp_vecs_end),
+ PAGE_HYP_EXEC);
+}
+
+#else
+static inline void *kvm_get_hyp_vector(void)
+{
+ return kvm_ksym_ref(__kvm_hyp_vector);
+}
+
+static inline int kvm_map_vectors(void)
+{
+ return 0;
+}
+#endif
+
+#define kvm_phys_to_vttbr(addr) phys_to_ttbr(addr)
+
#endif /* __ASSEMBLY__ */
#endif /* __ARM64_KVM_MMU_H__ */
diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h
index 0d34bf0..a050d4f 100644
--- a/arch/arm64/include/asm/mmu.h
+++ b/arch/arm64/include/asm/mmu.h
@@ -17,6 +17,11 @@
#define __ASM_MMU_H
#define MMCF_AARCH32 0x1 /* mm context flag for AArch32 executables */
+#define USER_ASID_BIT 48
+#define USER_ASID_FLAG (UL(1) << USER_ASID_BIT)
+#define TTBR_ASID_MASK (UL(0xffff) << 48)
+
+#ifndef __ASSEMBLY__
typedef struct {
atomic64_t id;
@@ -31,6 +36,49 @@ typedef struct {
*/
#define ASID(mm) ((mm)->context.id.counter & 0xffff)
+static inline bool arm64_kernel_unmapped_at_el0(void)
+{
+ return IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0) &&
+ cpus_have_const_cap(ARM64_UNMAP_KERNEL_AT_EL0);
+}
+
+typedef void (*bp_hardening_cb_t)(void);
+
+struct bp_hardening_data {
+ int hyp_vectors_slot;
+ bp_hardening_cb_t fn;
+};
+
+#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
+extern char __bp_harden_hyp_vecs_start[], __bp_harden_hyp_vecs_end[];
+
+DECLARE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data);
+
+static inline struct bp_hardening_data *arm64_get_bp_hardening_data(void)
+{
+ return this_cpu_ptr(&bp_hardening_data);
+}
+
+static inline void arm64_apply_bp_hardening(void)
+{
+ struct bp_hardening_data *d;
+
+ if (!cpus_have_const_cap(ARM64_HARDEN_BRANCH_PREDICTOR))
+ return;
+
+ d = arm64_get_bp_hardening_data();
+ if (d->fn)
+ d->fn();
+}
+#else
+static inline struct bp_hardening_data *arm64_get_bp_hardening_data(void)
+{
+ return NULL;
+}
+
+static inline void arm64_apply_bp_hardening(void) { }
+#endif /* CONFIG_HARDEN_BRANCH_PREDICTOR */
+
extern void paging_init(void);
extern void bootmem_init(void);
extern void __iomem *early_io_map(phys_addr_t phys, unsigned long virt);
@@ -41,4 +89,5 @@ extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,
extern void *fixmap_remap_fdt(phys_addr_t dt_phys);
extern void mark_linear_text_alias_ro(void);
+#endif /* !__ASSEMBLY__ */
#endif
diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
index 9d155fa..8d33319 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -19,8 +19,6 @@
#ifndef __ASM_MMU_CONTEXT_H
#define __ASM_MMU_CONTEXT_H
-#define FALKOR_RESERVED_ASID 1
-
#ifndef __ASSEMBLY__
#include <linux/compiler.h>
@@ -51,23 +49,39 @@ static inline void contextidr_thread_switch(struct task_struct *next)
*/
static inline void cpu_set_reserved_ttbr0(void)
{
- unsigned long ttbr = __pa_symbol(empty_zero_page);
+ unsigned long ttbr = phys_to_ttbr(__pa_symbol(empty_zero_page));
write_sysreg(ttbr, ttbr0_el1);
isb();
}
+static inline void cpu_switch_mm(pgd_t *pgd, struct mm_struct *mm)
+{
+ BUG_ON(pgd == swapper_pg_dir);
+ cpu_set_reserved_ttbr0();
+ cpu_do_switch_mm(virt_to_phys(pgd),mm);
+}
+
/*
* TCR.T0SZ value to use when the ID map is active. Usually equals
* TCR_T0SZ(VA_BITS), unless system RAM is positioned very high in
* physical memory, in which case it will be smaller.
*/
extern u64 idmap_t0sz;
+extern u64 idmap_ptrs_per_pgd;
static inline bool __cpu_uses_extended_idmap(void)
{
- return (!IS_ENABLED(CONFIG_ARM64_VA_BITS_48) &&
- unlikely(idmap_t0sz != TCR_T0SZ(VA_BITS)));
+ return unlikely(idmap_t0sz != TCR_T0SZ(VA_BITS));
+}
+
+/*
+ * True if the extended ID map requires an extra level of translation table
+ * to be configured.
+ */
+static inline bool __cpu_uses_extended_idmap_level(void)
+{
+ return ARM64_HW_PGTABLE_LEVELS(64 - idmap_t0sz) > CONFIG_PGTABLE_LEVELS;
}
/*
@@ -170,7 +184,7 @@ static inline void update_saved_ttbr0(struct task_struct *tsk,
else
ttbr = virt_to_phys(mm->pgd) | ASID(mm) << 48;
- task_thread_info(tsk)->ttbr0 = ttbr;
+ WRITE_ONCE(task_thread_info(tsk)->ttbr0, ttbr);
}
#else
static inline void update_saved_ttbr0(struct task_struct *tsk,
@@ -225,6 +239,7 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next,
#define activate_mm(prev,next) switch_mm(prev, next, current)
void verify_cpu_asid_bits(void);
+void post_ttbr_update_workaround(void);
#endif /* !__ASSEMBLY__ */
diff --git a/arch/arm64/include/asm/percpu.h b/arch/arm64/include/asm/percpu.h
index 3bd498e..4339320 100644
--- a/arch/arm64/include/asm/percpu.h
+++ b/arch/arm64/include/asm/percpu.h
@@ -16,11 +16,15 @@
#ifndef __ASM_PERCPU_H
#define __ASM_PERCPU_H
+#include <asm/alternative.h>
#include <asm/stack_pointer.h>
static inline void set_my_cpu_offset(unsigned long off)
{
- asm volatile("msr tpidr_el1, %0" :: "r" (off) : "memory");
+ asm volatile(ALTERNATIVE("msr tpidr_el1, %0",
+ "msr tpidr_el2, %0",
+ ARM64_HAS_VIRT_HOST_EXTN)
+ :: "r" (off) : "memory");
}
static inline unsigned long __my_cpu_offset(void)
@@ -31,7 +35,10 @@ static inline unsigned long __my_cpu_offset(void)
* We want to allow caching the value, so avoid using volatile and
* instead use a fake stack read to hazard against barrier().
*/
- asm("mrs %0, tpidr_el1" : "=r" (off) :
+ asm(ALTERNATIVE("mrs %0, tpidr_el1",
+ "mrs %0, tpidr_el2",
+ ARM64_HAS_VIRT_HOST_EXTN)
+ : "=r" (off) :
"Q" (*(const unsigned long *)current_stack_pointer));
return off;
diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h
index 5ca6a57..e9d9f1b 100644
--- a/arch/arm64/include/asm/pgalloc.h
+++ b/arch/arm64/include/asm/pgalloc.h
@@ -44,7 +44,7 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
static inline void __pud_populate(pud_t *pud, phys_addr_t pmd, pudval_t prot)
{
- set_pud(pud, __pud(pmd | prot));
+ set_pud(pud, __pud(__phys_to_pud_val(pmd) | prot));
}
static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
@@ -73,7 +73,7 @@ static inline void pud_free(struct mm_struct *mm, pud_t *pud)
static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t pud, pgdval_t prot)
{
- set_pgd(pgdp, __pgd(pud | prot));
+ set_pgd(pgdp, __pgd(__phys_to_pgd_val(pud) | prot));
}
static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
@@ -129,7 +129,7 @@ static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
static inline void __pmd_populate(pmd_t *pmdp, phys_addr_t pte,
pmdval_t prot)
{
- set_pmd(pmdp, __pmd(pte | prot));
+ set_pmd(pmdp, __pmd(__phys_to_pmd_val(pte) | prot));
}
/*
diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
index eb0c2bd..f42836d 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -16,6 +16,8 @@
#ifndef __ASM_PGTABLE_HWDEF_H
#define __ASM_PGTABLE_HWDEF_H
+#include <asm/memory.h>
+
/*
* Number of page-table levels required to address 'va_bits' wide
* address, without section mapping. We resolve the top (va_bits - PAGE_SHIFT)
@@ -116,9 +118,9 @@
* Level 1 descriptor (PUD).
*/
#define PUD_TYPE_TABLE (_AT(pudval_t, 3) << 0)
-#define PUD_TABLE_BIT (_AT(pgdval_t, 1) << 1)
-#define PUD_TYPE_MASK (_AT(pgdval_t, 3) << 0)
-#define PUD_TYPE_SECT (_AT(pgdval_t, 1) << 0)
+#define PUD_TABLE_BIT (_AT(pudval_t, 1) << 1)
+#define PUD_TYPE_MASK (_AT(pudval_t, 3) << 0)
+#define PUD_TYPE_SECT (_AT(pudval_t, 1) << 0)
/*
* Level 2 descriptor (PMD).
@@ -166,6 +168,14 @@
#define PTE_UXN (_AT(pteval_t, 1) << 54) /* User XN */
#define PTE_HYP_XN (_AT(pteval_t, 1) << 54) /* HYP XN */
+#define PTE_ADDR_LOW (((_AT(pteval_t, 1) << (48 - PAGE_SHIFT)) - 1) << PAGE_SHIFT)
+#ifdef CONFIG_ARM64_PA_BITS_52
+#define PTE_ADDR_HIGH (_AT(pteval_t, 0xf) << 12)
+#define PTE_ADDR_MASK (PTE_ADDR_LOW | PTE_ADDR_HIGH)
+#else
+#define PTE_ADDR_MASK PTE_ADDR_LOW
+#endif
+
/*
* AttrIndx[2:0] encoding (mapping attributes defined in the MAIR* registers).
*/
@@ -196,7 +206,7 @@
/*
* Highest possible physical address supported.
*/
-#define PHYS_MASK_SHIFT (48)
+#define PHYS_MASK_SHIFT (CONFIG_ARM64_PA_BITS)
#define PHYS_MASK ((UL(1) << PHYS_MASK_SHIFT) - 1)
/*
@@ -272,9 +282,23 @@
#define TCR_TG1_4K (UL(2) << TCR_TG1_SHIFT)
#define TCR_TG1_64K (UL(3) << TCR_TG1_SHIFT)
+#define TCR_IPS_SHIFT 32
+#define TCR_IPS_MASK (UL(7) << TCR_IPS_SHIFT)
+#define TCR_A1 (UL(1) << 22)
#define TCR_ASID16 (UL(1) << 36)
#define TCR_TBI0 (UL(1) << 37)
#define TCR_HA (UL(1) << 39)
#define TCR_HD (UL(1) << 40)
+/*
+ * TTBR.
+ */
+#ifdef CONFIG_ARM64_PA_BITS_52
+/*
+ * This should be GENMASK_ULL(47, 2).
+ * TTBR_ELx[1] is RES0 in this configuration.
+ */
+#define TTBR_BADDR_MASK_52 (((UL(1) << 46) - 1) << 2)
+#endif
+
#endif
diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h
index 0a5635f..22a9268 100644
--- a/arch/arm64/include/asm/pgtable-prot.h
+++ b/arch/arm64/include/asm/pgtable-prot.h
@@ -34,8 +34,16 @@
#include <asm/pgtable-types.h>
-#define PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED)
-#define PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S)
+#define _PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED)
+#define _PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S)
+
+#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
+#define PROT_DEFAULT (_PROT_DEFAULT | PTE_NG)
+#define PROT_SECT_DEFAULT (_PROT_SECT_DEFAULT | PMD_SECT_NG)
+#else
+#define PROT_DEFAULT _PROT_DEFAULT
+#define PROT_SECT_DEFAULT _PROT_SECT_DEFAULT
+#endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */
#define PROT_DEVICE_nGnRnE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRnE))
#define PROT_DEVICE_nGnRE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRE))
@@ -48,6 +56,7 @@
#define PROT_SECT_NORMAL_EXEC (PROT_SECT_DEFAULT | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL))
#define _PAGE_DEFAULT (PROT_DEFAULT | PTE_ATTRINDX(MT_NORMAL))
+#define _HYP_PAGE_DEFAULT (_PAGE_DEFAULT & ~PTE_NG)
#define PAGE_KERNEL __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE)
#define PAGE_KERNEL_RO __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_RDONLY)
@@ -55,15 +64,15 @@
#define PAGE_KERNEL_EXEC __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE)
#define PAGE_KERNEL_EXEC_CONT __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_CONT)
-#define PAGE_HYP __pgprot(_PAGE_DEFAULT | PTE_HYP | PTE_HYP_XN)
-#define PAGE_HYP_EXEC __pgprot(_PAGE_DEFAULT | PTE_HYP | PTE_RDONLY)
-#define PAGE_HYP_RO __pgprot(_PAGE_DEFAULT | PTE_HYP | PTE_RDONLY | PTE_HYP_XN)
+#define PAGE_HYP __pgprot(_HYP_PAGE_DEFAULT | PTE_HYP | PTE_HYP_XN)
+#define PAGE_HYP_EXEC __pgprot(_HYP_PAGE_DEFAULT | PTE_HYP | PTE_RDONLY)
+#define PAGE_HYP_RO __pgprot(_HYP_PAGE_DEFAULT | PTE_HYP | PTE_RDONLY | PTE_HYP_XN)
#define PAGE_HYP_DEVICE __pgprot(PROT_DEVICE_nGnRE | PTE_HYP)
#define PAGE_S2 __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_NORMAL) | PTE_S2_RDONLY)
#define PAGE_S2_DEVICE __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_DEVICE_nGnRE) | PTE_S2_RDONLY | PTE_UXN)
-#define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PROT_NONE | PTE_RDONLY | PTE_PXN | PTE_UXN)
+#define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PROT_NONE | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN)
#define PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE)
#define PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_WRITE)
#define PAGE_READONLY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN)
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index bdcc7f1..89167c4 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -59,9 +59,22 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
#define pte_ERROR(pte) __pte_error(__FILE__, __LINE__, pte_val(pte))
-#define pte_pfn(pte) ((pte_val(pte) & PHYS_MASK) >> PAGE_SHIFT)
+/*
+ * Macros to convert between a physical address and its placement in a
+ * page table entry, taking care of 52-bit addresses.
+ */
+#ifdef CONFIG_ARM64_PA_BITS_52
+#define __pte_to_phys(pte) \
+ ((pte_val(pte) & PTE_ADDR_LOW) | ((pte_val(pte) & PTE_ADDR_HIGH) << 36))
+#define __phys_to_pte_val(phys) (((phys) | ((phys) >> 36)) & PTE_ADDR_MASK)
+#else
+#define __pte_to_phys(pte) (pte_val(pte) & PTE_ADDR_MASK)
+#define __phys_to_pte_val(phys) (phys)
+#endif
-#define pfn_pte(pfn,prot) (__pte(((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot)))
+#define pte_pfn(pte) (__pte_to_phys(pte) >> PAGE_SHIFT)
+#define pfn_pte(pfn,prot) \
+ __pte(__phys_to_pte_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot))
#define pte_none(pte) (!pte_val(pte))
#define pte_clear(mm,addr,ptep) set_pte(ptep, __pte(0))
@@ -292,6 +305,11 @@ static inline int pte_same(pte_t pte_a, pte_t pte_b)
#define __HAVE_ARCH_PTE_SPECIAL
+static inline pte_t pgd_pte(pgd_t pgd)
+{
+ return __pte(pgd_val(pgd));
+}
+
static inline pte_t pud_pte(pud_t pud)
{
return __pte(pud_val(pud));
@@ -357,15 +375,24 @@ static inline int pmd_protnone(pmd_t pmd)
#define pmd_mkhuge(pmd) (__pmd(pmd_val(pmd) & ~PMD_TABLE_BIT))
-#define pmd_pfn(pmd) (((pmd_val(pmd) & PMD_MASK) & PHYS_MASK) >> PAGE_SHIFT)
-#define pfn_pmd(pfn,prot) (__pmd(((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot)))
+#define __pmd_to_phys(pmd) __pte_to_phys(pmd_pte(pmd))
+#define __phys_to_pmd_val(phys) __phys_to_pte_val(phys)
+#define pmd_pfn(pmd) ((__pmd_to_phys(pmd) & PMD_MASK) >> PAGE_SHIFT)
+#define pfn_pmd(pfn,prot) __pmd(__phys_to_pmd_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot))
#define mk_pmd(page,prot) pfn_pmd(page_to_pfn(page),prot)
#define pud_write(pud) pte_write(pud_pte(pud))
-#define pud_pfn(pud) (((pud_val(pud) & PUD_MASK) & PHYS_MASK) >> PAGE_SHIFT)
+
+#define __pud_to_phys(pud) __pte_to_phys(pud_pte(pud))
+#define __phys_to_pud_val(phys) __phys_to_pte_val(phys)
+#define pud_pfn(pud) ((__pud_to_phys(pud) & PUD_MASK) >> PAGE_SHIFT)
+#define pfn_pud(pfn,prot) __pud(__phys_to_pud_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot))
#define set_pmd_at(mm, addr, pmdp, pmd) set_pte_at(mm, addr, (pte_t *)pmdp, pmd_pte(pmd))
+#define __pgd_to_phys(pgd) __pte_to_phys(pgd_pte(pgd))
+#define __phys_to_pgd_val(phys) __phys_to_pte_val(phys)
+
#define __pgprot_modify(prot,mask,bits) \
__pgprot((pgprot_val(prot) & ~(mask)) | (bits))
@@ -416,7 +443,7 @@ static inline void pmd_clear(pmd_t *pmdp)
static inline phys_addr_t pmd_page_paddr(pmd_t pmd)
{
- return pmd_val(pmd) & PHYS_MASK & (s32)PAGE_MASK;
+ return __pmd_to_phys(pmd);
}
/* Find an entry in the third-level page table. */
@@ -434,7 +461,7 @@ static inline phys_addr_t pmd_page_paddr(pmd_t pmd)
#define pte_set_fixmap_offset(pmd, addr) pte_set_fixmap(pte_offset_phys(pmd, addr))
#define pte_clear_fixmap() clear_fixmap(FIX_PTE)
-#define pmd_page(pmd) pfn_to_page(__phys_to_pfn(pmd_val(pmd) & PHYS_MASK))
+#define pmd_page(pmd) pfn_to_page(__phys_to_pfn(__pmd_to_phys(pmd)))
/* use ONLY for statically allocated translation tables */
#define pte_offset_kimg(dir,addr) ((pte_t *)__phys_to_kimg(pte_offset_phys((dir), (addr))))
@@ -467,7 +494,7 @@ static inline void pud_clear(pud_t *pudp)
static inline phys_addr_t pud_page_paddr(pud_t pud)
{
- return pud_val(pud) & PHYS_MASK & (s32)PAGE_MASK;
+ return __pud_to_phys(pud);
}
/* Find an entry in the second-level page table. */
@@ -480,7 +507,7 @@ static inline phys_addr_t pud_page_paddr(pud_t pud)
#define pmd_set_fixmap_offset(pud, addr) pmd_set_fixmap(pmd_offset_phys(pud, addr))
#define pmd_clear_fixmap() clear_fixmap(FIX_PMD)
-#define pud_page(pud) pfn_to_page(__phys_to_pfn(pud_val(pud) & PHYS_MASK))
+#define pud_page(pud) pfn_to_page(__phys_to_pfn(__pud_to_phys(pud)))
/* use ONLY for statically allocated translation tables */
#define pmd_offset_kimg(dir,addr) ((pmd_t *)__phys_to_kimg(pmd_offset_phys((dir), (addr))))
@@ -519,7 +546,7 @@ static inline void pgd_clear(pgd_t *pgdp)
static inline phys_addr_t pgd_page_paddr(pgd_t pgd)
{
- return pgd_val(pgd) & PHYS_MASK & (s32)PAGE_MASK;
+ return __pgd_to_phys(pgd);
}
/* Find an entry in the frst-level page table. */
@@ -532,7 +559,7 @@ static inline phys_addr_t pgd_page_paddr(pgd_t pgd)
#define pud_set_fixmap_offset(pgd, addr) pud_set_fixmap(pud_offset_phys(pgd, addr))
#define pud_clear_fixmap() clear_fixmap(FIX_PUD)
-#define pgd_page(pgd) pfn_to_page(__phys_to_pfn(pgd_val(pgd) & PHYS_MASK))
+#define pgd_page(pgd) pfn_to_page(__phys_to_pfn(__pgd_to_phys(pgd)))
/* use ONLY for statically allocated translation tables */
#define pud_offset_kimg(dir,addr) ((pud_t *)__phys_to_kimg(pud_offset_phys((dir), (addr))))
@@ -682,7 +709,9 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm,
#endif
extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
+extern pgd_t swapper_pg_end[];
extern pgd_t idmap_pg_dir[PTRS_PER_PGD];
+extern pgd_t tramp_pg_dir[PTRS_PER_PGD];
/*
* Encode and decode a swap entry:
@@ -736,6 +765,12 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
#define kc_vaddr_to_offset(v) ((v) & ~VA_START)
#define kc_offset_to_vaddr(o) ((o) | VA_START)
+#ifdef CONFIG_ARM64_PA_BITS_52
+#define phys_to_ttbr(addr) (((addr) | ((addr) >> 46)) & TTBR_BADDR_MASK_52)
+#else
+#define phys_to_ttbr(addr) (addr)
+#endif
+
#endif /* !__ASSEMBLY__ */
#endif /* __ASM_PGTABLE_H */
diff --git a/arch/arm64/include/asm/proc-fns.h b/arch/arm64/include/asm/proc-fns.h
index 14ad6e4..16cef2e 100644
--- a/arch/arm64/include/asm/proc-fns.h
+++ b/arch/arm64/include/asm/proc-fns.h
@@ -35,12 +35,6 @@ extern u64 cpu_do_resume(phys_addr_t ptr, u64 idmap_ttbr);
#include <asm/memory.h>
-#define cpu_switch_mm(pgd,mm) \
-do { \
- BUG_ON(pgd == swapper_pg_dir); \
- cpu_do_switch_mm(virt_to_phys(pgd),mm); \
-} while (0)
-
#endif /* __ASSEMBLY__ */
#endif /* __KERNEL__ */
#endif /* __ASM_PROCFNS_H */
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 023cacb..cee4ae2 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -216,6 +216,7 @@ static inline void spin_lock_prefetch(const void *ptr)
int cpu_enable_pan(void *__unused);
int cpu_enable_cache_maint_trap(void *__unused);
+int cpu_clear_disr(void *__unused);
/* Userspace interface for PR_SVE_{SET,GET}_VL prctl()s: */
#define SVE_SET_VL(arg) sve_set_current_vl(arg)
diff --git a/arch/arm64/include/asm/sdei.h b/arch/arm64/include/asm/sdei.h
new file mode 100644
index 0000000..e073e68
--- /dev/null
+++ b/arch/arm64/include/asm/sdei.h
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2017 Arm Ltd.
+#ifndef __ASM_SDEI_H
+#define __ASM_SDEI_H
+
+/* Values for sdei_exit_mode */
+#define SDEI_EXIT_HVC 0
+#define SDEI_EXIT_SMC 1
+
+#define SDEI_STACK_SIZE IRQ_STACK_SIZE
+
+#ifndef __ASSEMBLY__
+
+#include <linux/linkage.h>
+#include <linux/preempt.h>
+#include <linux/types.h>
+
+#include <asm/virt.h>
+
+extern unsigned long sdei_exit_mode;
+
+/* Software Delegated Exception entry point from firmware*/
+asmlinkage void __sdei_asm_handler(unsigned long event_num, unsigned long arg,
+ unsigned long pc, unsigned long pstate);
+
+/* and its CONFIG_UNMAP_KERNEL_AT_EL0 trampoline */
+asmlinkage void __sdei_asm_entry_trampoline(unsigned long event_num,
+ unsigned long arg,
+ unsigned long pc,
+ unsigned long pstate);
+
+/*
+ * The above entry point does the minimum to call C code. This function does
+ * anything else, before calling the driver.
+ */
+struct sdei_registered_event;
+asmlinkage unsigned long __sdei_handler(struct pt_regs *regs,
+ struct sdei_registered_event *arg);
+
+unsigned long sdei_arch_get_entry_point(int conduit);
+#define sdei_arch_get_entry_point(x) sdei_arch_get_entry_point(x)
+
+bool _on_sdei_stack(unsigned long sp);
+static inline bool on_sdei_stack(unsigned long sp)
+{
+ if (!IS_ENABLED(CONFIG_VMAP_STACK))
+ return false;
+ if (!IS_ENABLED(CONFIG_ARM_SDE_INTERFACE))
+ return false;
+ if (in_nmi())
+ return _on_sdei_stack(sp);
+
+ return false;
+}
+
+#endif /* __ASSEMBLY__ */
+#endif /* __ASM_SDEI_H */
diff --git a/arch/arm64/include/asm/sections.h b/arch/arm64/include/asm/sections.h
index 941267c..caab039 100644
--- a/arch/arm64/include/asm/sections.h
+++ b/arch/arm64/include/asm/sections.h
@@ -28,5 +28,6 @@ extern char __initdata_begin[], __initdata_end[];
extern char __inittext_begin[], __inittext_end[];
extern char __irqentry_text_start[], __irqentry_text_end[];
extern char __mmuoff_data_start[], __mmuoff_data_end[];
+extern char __entry_tramp_text_start[], __entry_tramp_text_end[];
#endif /* __ASM_SECTIONS_H */
diff --git a/arch/arm64/include/asm/sparsemem.h b/arch/arm64/include/asm/sparsemem.h
index 74a9d30..b299929 100644
--- a/arch/arm64/include/asm/sparsemem.h
+++ b/arch/arm64/include/asm/sparsemem.h
@@ -17,7 +17,7 @@
#define __ASM_SPARSEMEM_H
#ifdef CONFIG_SPARSEMEM
-#define MAX_PHYSMEM_BITS 48
+#define MAX_PHYSMEM_BITS CONFIG_ARM64_PA_BITS
#define SECTION_SIZE_BITS 30
#endif
diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h
index 6ad3077..472ef94 100644
--- a/arch/arm64/include/asm/stacktrace.h
+++ b/arch/arm64/include/asm/stacktrace.h
@@ -22,6 +22,7 @@
#include <asm/memory.h>
#include <asm/ptrace.h>
+#include <asm/sdei.h>
struct stackframe {
unsigned long fp;
@@ -85,6 +86,8 @@ static inline bool on_accessible_stack(struct task_struct *tsk, unsigned long sp
return true;
if (on_overflow_stack(sp))
return true;
+ if (on_sdei_stack(sp))
+ return true;
return false;
}
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 08cc885..0e1960c 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -20,6 +20,7 @@
#ifndef __ASM_SYSREG_H
#define __ASM_SYSREG_H
+#include <asm/compiler.h>
#include <linux/stringify.h>
/*
@@ -175,6 +176,16 @@
#define SYS_AFSR0_EL1 sys_reg(3, 0, 5, 1, 0)
#define SYS_AFSR1_EL1 sys_reg(3, 0, 5, 1, 1)
#define SYS_ESR_EL1 sys_reg(3, 0, 5, 2, 0)
+
+#define SYS_ERRIDR_EL1 sys_reg(3, 0, 5, 3, 0)
+#define SYS_ERRSELR_EL1 sys_reg(3, 0, 5, 3, 1)
+#define SYS_ERXFR_EL1 sys_reg(3, 0, 5, 4, 0)
+#define SYS_ERXCTLR_EL1 sys_reg(3, 0, 5, 4, 1)
+#define SYS_ERXSTATUS_EL1 sys_reg(3, 0, 5, 4, 2)
+#define SYS_ERXADDR_EL1 sys_reg(3, 0, 5, 4, 3)
+#define SYS_ERXMISC0_EL1 sys_reg(3, 0, 5, 5, 0)
+#define SYS_ERXMISC1_EL1 sys_reg(3, 0, 5, 5, 1)
+
#define SYS_FAR_EL1 sys_reg(3, 0, 6, 0, 0)
#define SYS_PAR_EL1 sys_reg(3, 0, 7, 4, 0)
@@ -278,6 +289,7 @@
#define SYS_AMAIR_EL1 sys_reg(3, 0, 10, 3, 0)
#define SYS_VBAR_EL1 sys_reg(3, 0, 12, 0, 0)
+#define SYS_DISR_EL1 sys_reg(3, 0, 12, 1, 1)
#define SYS_ICC_IAR0_EL1 sys_reg(3, 0, 12, 8, 0)
#define SYS_ICC_EOIR0_EL1 sys_reg(3, 0, 12, 8, 1)
@@ -353,8 +365,10 @@
#define SYS_DACR32_EL2 sys_reg(3, 4, 3, 0, 0)
#define SYS_IFSR32_EL2 sys_reg(3, 4, 5, 0, 1)
+#define SYS_VSESR_EL2 sys_reg(3, 4, 5, 2, 3)
#define SYS_FPEXC32_EL2 sys_reg(3, 4, 5, 3, 0)
+#define SYS_VDISR_EL2 sys_reg(3, 4, 12, 1, 1)
#define __SYS__AP0Rx_EL2(x) sys_reg(3, 4, 12, 8, x)
#define SYS_ICH_AP0R0_EL2 __SYS__AP0Rx_EL2(0)
#define SYS_ICH_AP0R1_EL2 __SYS__AP0Rx_EL2(1)
@@ -398,27 +412,85 @@
/* Common SCTLR_ELx flags. */
#define SCTLR_ELx_EE (1 << 25)
+#define SCTLR_ELx_IESB (1 << 21)
+#define SCTLR_ELx_WXN (1 << 19)
#define SCTLR_ELx_I (1 << 12)
#define SCTLR_ELx_SA (1 << 3)
#define SCTLR_ELx_C (1 << 2)
#define SCTLR_ELx_A (1 << 1)
#define SCTLR_ELx_M 1
+#define SCTLR_ELx_FLAGS (SCTLR_ELx_M | SCTLR_ELx_A | SCTLR_ELx_C | \
+ SCTLR_ELx_SA | SCTLR_ELx_I | SCTLR_ELx_IESB)
+
+/* SCTLR_EL2 specific flags. */
#define SCTLR_EL2_RES1 ((1 << 4) | (1 << 5) | (1 << 11) | (1 << 16) | \
(1 << 18) | (1 << 22) | (1 << 23) | (1 << 28) | \
(1 << 29))
+#define SCTLR_EL2_RES0 ((1 << 6) | (1 << 7) | (1 << 8) | (1 << 9) | \
+ (1 << 10) | (1 << 13) | (1 << 14) | (1 << 15) | \
+ (1 << 17) | (1 << 20) | (1 << 24) | (1 << 26) | \
+ (1 << 27) | (1 << 30) | (1 << 31))
-#define SCTLR_ELx_FLAGS (SCTLR_ELx_M | SCTLR_ELx_A | SCTLR_ELx_C | \
- SCTLR_ELx_SA | SCTLR_ELx_I)
+#ifdef CONFIG_CPU_BIG_ENDIAN
+#define ENDIAN_SET_EL2 SCTLR_ELx_EE
+#define ENDIAN_CLEAR_EL2 0
+#else
+#define ENDIAN_SET_EL2 0
+#define ENDIAN_CLEAR_EL2 SCTLR_ELx_EE
+#endif
+
+/* SCTLR_EL2 value used for the hyp-stub */
+#define SCTLR_EL2_SET (SCTLR_ELx_IESB | ENDIAN_SET_EL2 | SCTLR_EL2_RES1)
+#define SCTLR_EL2_CLEAR (SCTLR_ELx_M | SCTLR_ELx_A | SCTLR_ELx_C | \
+ SCTLR_ELx_SA | SCTLR_ELx_I | SCTLR_ELx_WXN | \
+ ENDIAN_CLEAR_EL2 | SCTLR_EL2_RES0)
+
+/* Check all the bits are accounted for */
+#define SCTLR_EL2_BUILD_BUG_ON_MISSING_BITS BUILD_BUG_ON((SCTLR_EL2_SET ^ SCTLR_EL2_CLEAR) != ~0)
+
/* SCTLR_EL1 specific flags. */
#define SCTLR_EL1_UCI (1 << 26)
+#define SCTLR_EL1_E0E (1 << 24)
#define SCTLR_EL1_SPAN (1 << 23)
+#define SCTLR_EL1_NTWE (1 << 18)
+#define SCTLR_EL1_NTWI (1 << 16)
#define SCTLR_EL1_UCT (1 << 15)
+#define SCTLR_EL1_DZE (1 << 14)
+#define SCTLR_EL1_UMA (1 << 9)
#define SCTLR_EL1_SED (1 << 8)
+#define SCTLR_EL1_ITD (1 << 7)
#define SCTLR_EL1_CP15BEN (1 << 5)
+#define SCTLR_EL1_SA0 (1 << 4)
+
+#define SCTLR_EL1_RES1 ((1 << 11) | (1 << 20) | (1 << 22) | (1 << 28) | \
+ (1 << 29))
+#define SCTLR_EL1_RES0 ((1 << 6) | (1 << 10) | (1 << 13) | (1 << 17) | \
+ (1 << 27) | (1 << 30) | (1 << 31))
+
+#ifdef CONFIG_CPU_BIG_ENDIAN
+#define ENDIAN_SET_EL1 (SCTLR_EL1_E0E | SCTLR_ELx_EE)
+#define ENDIAN_CLEAR_EL1 0
+#else
+#define ENDIAN_SET_EL1 0
+#define ENDIAN_CLEAR_EL1 (SCTLR_EL1_E0E | SCTLR_ELx_EE)
+#endif
+
+#define SCTLR_EL1_SET (SCTLR_ELx_M | SCTLR_ELx_C | SCTLR_ELx_SA |\
+ SCTLR_EL1_SA0 | SCTLR_EL1_SED | SCTLR_ELx_I |\
+ SCTLR_EL1_DZE | SCTLR_EL1_UCT | SCTLR_EL1_NTWI |\
+ SCTLR_EL1_NTWE | SCTLR_ELx_IESB | SCTLR_EL1_SPAN |\
+ ENDIAN_SET_EL1 | SCTLR_EL1_UCI | SCTLR_EL1_RES1)
+#define SCTLR_EL1_CLEAR (SCTLR_ELx_A | SCTLR_EL1_CP15BEN | SCTLR_EL1_ITD |\
+ SCTLR_EL1_UMA | SCTLR_ELx_WXN | ENDIAN_CLEAR_EL1 |\
+ SCTLR_EL1_RES0)
+
+/* Check all the bits are accounted for */
+#define SCTLR_EL1_BUILD_BUG_ON_MISSING_BITS BUILD_BUG_ON((SCTLR_EL1_SET ^ SCTLR_EL1_CLEAR) != ~0)
/* id_aa64isar0 */
+#define ID_AA64ISAR0_FHM_SHIFT 48
#define ID_AA64ISAR0_DP_SHIFT 44
#define ID_AA64ISAR0_SM4_SHIFT 40
#define ID_AA64ISAR0_SM3_SHIFT 36
@@ -437,7 +509,10 @@
#define ID_AA64ISAR1_DPB_SHIFT 0
/* id_aa64pfr0 */
+#define ID_AA64PFR0_CSV3_SHIFT 60
+#define ID_AA64PFR0_CSV2_SHIFT 56
#define ID_AA64PFR0_SVE_SHIFT 32
+#define ID_AA64PFR0_RAS_SHIFT 28
#define ID_AA64PFR0_GIC_SHIFT 24
#define ID_AA64PFR0_ASIMD_SHIFT 20
#define ID_AA64PFR0_FP_SHIFT 16
@@ -447,6 +522,7 @@
#define ID_AA64PFR0_EL0_SHIFT 0
#define ID_AA64PFR0_SVE 0x1
+#define ID_AA64PFR0_RAS_V1 0x1
#define ID_AA64PFR0_FP_NI 0xf
#define ID_AA64PFR0_FP_SUPPORTED 0x0
#define ID_AA64PFR0_ASIMD_NI 0xf
@@ -471,6 +547,14 @@
#define ID_AA64MMFR0_TGRAN64_SUPPORTED 0x0
#define ID_AA64MMFR0_TGRAN16_NI 0x0
#define ID_AA64MMFR0_TGRAN16_SUPPORTED 0x1
+#define ID_AA64MMFR0_PARANGE_48 0x5
+#define ID_AA64MMFR0_PARANGE_52 0x6
+
+#ifdef CONFIG_ARM64_PA_BITS_52
+#define ID_AA64MMFR0_PARANGE_MAX ID_AA64MMFR0_PARANGE_52
+#else
+#define ID_AA64MMFR0_PARANGE_MAX ID_AA64MMFR0_PARANGE_48
+#endif
/* id_aa64mmfr1 */
#define ID_AA64MMFR1_PAN_SHIFT 20
@@ -582,6 +666,7 @@
#else
+#include <linux/build_bug.h>
#include <linux/types.h>
asm(
@@ -638,6 +723,9 @@ static inline void config_sctlr_el1(u32 clear, u32 set)
{
u32 val;
+ SCTLR_EL2_BUILD_BUG_ON_MISSING_BITS;
+ SCTLR_EL1_BUILD_BUG_ON_MISSING_BITS;
+
val = read_sysreg(sctlr_el1);
val &= ~clear;
val |= set;
diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index af1c769..9e82dd7 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -23,6 +23,7 @@
#include <linux/sched.h>
#include <asm/cputype.h>
+#include <asm/mmu.h>
/*
* Raw TLBI operations.
@@ -54,6 +55,11 @@
#define __tlbi(op, ...) __TLBI_N(op, ##__VA_ARGS__, 1, 0)
+#define __tlbi_user(op, arg) do { \
+ if (arm64_kernel_unmapped_at_el0()) \
+ __tlbi(op, (arg) | USER_ASID_FLAG); \
+} while (0)
+
/*
* TLB Management
* ==============
@@ -115,6 +121,7 @@ static inline void flush_tlb_mm(struct mm_struct *mm)
dsb(ishst);
__tlbi(aside1is, asid);
+ __tlbi_user(aside1is, asid);
dsb(ish);
}
@@ -125,6 +132,7 @@ static inline void flush_tlb_page(struct vm_area_struct *vma,
dsb(ishst);
__tlbi(vale1is, addr);
+ __tlbi_user(vale1is, addr);
dsb(ish);
}
@@ -151,10 +159,13 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma,
dsb(ishst);
for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12)) {
- if (last_level)
+ if (last_level) {
__tlbi(vale1is, addr);
- else
+ __tlbi_user(vale1is, addr);
+ } else {
__tlbi(vae1is, addr);
+ __tlbi_user(vae1is, addr);
+ }
}
dsb(ish);
}
@@ -194,6 +205,7 @@ static inline void __flush_tlb_pgtable(struct mm_struct *mm,
unsigned long addr = uaddr >> 12 | (ASID(mm) << 48);
__tlbi(vae1is, addr);
+ __tlbi_user(vae1is, addr);
dsb(ish);
}
diff --git a/arch/arm64/include/asm/traps.h b/arch/arm64/include/asm/traps.h
index 1696f9d..178e338 100644
--- a/arch/arm64/include/asm/traps.h
+++ b/arch/arm64/include/asm/traps.h
@@ -19,6 +19,7 @@
#define __ASM_TRAP_H
#include <linux/list.h>
+#include <asm/esr.h>
#include <asm/sections.h>
struct pt_regs;
@@ -66,4 +67,57 @@ static inline int in_entry_text(unsigned long ptr)
return ptr >= (unsigned long)&__entry_text_start &&
ptr < (unsigned long)&__entry_text_end;
}
+
+/*
+ * CPUs with the RAS extensions have an Implementation-Defined-Syndrome bit
+ * to indicate whether this ESR has a RAS encoding. CPUs without this feature
+ * have a ISS-Valid bit in the same position.
+ * If this bit is set, we know its not a RAS SError.
+ * If its clear, we need to know if the CPU supports RAS. Uncategorized RAS
+ * errors share the same encoding as an all-zeros encoding from a CPU that
+ * doesn't support RAS.
+ */
+static inline bool arm64_is_ras_serror(u32 esr)
+{
+ WARN_ON(preemptible());
+
+ if (esr & ESR_ELx_IDS)
+ return false;
+
+ if (this_cpu_has_cap(ARM64_HAS_RAS_EXTN))
+ return true;
+ else
+ return false;
+}
+
+/*
+ * Return the AET bits from a RAS SError's ESR.
+ *
+ * It is implementation defined whether Uncategorized errors are containable.
+ * We treat them as Uncontainable.
+ * Non-RAS SError's are reported as Uncontained/Uncategorized.
+ */
+static inline u32 arm64_ras_serror_get_severity(u32 esr)
+{
+ u32 aet = esr & ESR_ELx_AET;
+
+ if (!arm64_is_ras_serror(esr)) {
+ /* Not a RAS error, we can't interpret the ESR. */
+ return ESR_ELx_AET_UC;
+ }
+
+ /*
+ * AET is RES0 if 'the value returned in the DFSC field is not
+ * [ESR_ELx_FSC_SERROR]'
+ */
+ if ((esr & ESR_ELx_FSC) != ESR_ELx_FSC_SERROR) {
+ /* No severity information : Uncategorized */
+ return ESR_ELx_AET_UC;
+ }
+
+ return aet;
+}
+
+bool arm64_is_fatal_ras_serror(struct pt_regs *regs, unsigned int esr);
+void __noreturn arm64_serror_panic(struct pt_regs *regs, u32 esr);
#endif
diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
index fc0f9eb..59fda529 100644
--- a/arch/arm64/include/asm/uaccess.h
+++ b/arch/arm64/include/asm/uaccess.h
@@ -105,17 +105,23 @@ static inline void set_fs(mm_segment_t fs)
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
static inline void __uaccess_ttbr0_disable(void)
{
- unsigned long ttbr;
+ unsigned long flags, ttbr;
- /* reserved_ttbr0 placed at the end of swapper_pg_dir */
- ttbr = read_sysreg(ttbr1_el1) + SWAPPER_DIR_SIZE;
- write_sysreg(ttbr, ttbr0_el1);
+ local_irq_save(flags);
+ ttbr = read_sysreg(ttbr1_el1);
+ ttbr &= ~TTBR_ASID_MASK;
+ /* reserved_ttbr0 placed before swapper_pg_dir */
+ write_sysreg(ttbr - RESERVED_TTBR0_SIZE, ttbr0_el1);
isb();
+ /* Set reserved ASID */
+ write_sysreg(ttbr, ttbr1_el1);
+ isb();
+ local_irq_restore(flags);
}
static inline void __uaccess_ttbr0_enable(void)
{
- unsigned long flags;
+ unsigned long flags, ttbr0, ttbr1;
/*
* Disable interrupts to avoid preemption between reading the 'ttbr0'
@@ -123,7 +129,17 @@ static inline void __uaccess_ttbr0_enable(void)
* roll-over and an update of 'ttbr0'.
*/
local_irq_save(flags);
- write_sysreg(current_thread_info()->ttbr0, ttbr0_el1);
+ ttbr0 = READ_ONCE(current_thread_info()->ttbr0);
+
+ /* Restore active ASID */
+ ttbr1 = read_sysreg(ttbr1_el1);
+ ttbr1 &= ~TTBR_ASID_MASK; /* safety measure */
+ ttbr1 |= ttbr0 & TTBR_ASID_MASK;
+ write_sysreg(ttbr1, ttbr1_el1);
+ isb();
+
+ /* Restore user page table */
+ write_sysreg(ttbr0, ttbr0_el1);
isb();
local_irq_restore(flags);
}
@@ -155,6 +171,18 @@ static inline bool uaccess_ttbr0_enable(void)
}
#endif
+static inline void __uaccess_disable_hw_pan(void)
+{
+ asm(ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN,
+ CONFIG_ARM64_PAN));
+}
+
+static inline void __uaccess_enable_hw_pan(void)
+{
+ asm(ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN,
+ CONFIG_ARM64_PAN));
+}
+
#define __uaccess_disable(alt) \
do { \
if (!uaccess_ttbr0_disable()) \
diff --git a/arch/arm64/include/asm/vmap_stack.h b/arch/arm64/include/asm/vmap_stack.h
new file mode 100644
index 0000000..0b5ec6e
--- /dev/null
+++ b/arch/arm64/include/asm/vmap_stack.h
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2017 Arm Ltd.
+#ifndef __ASM_VMAP_STACK_H
+#define __ASM_VMAP_STACK_H
+
+#include <linux/bug.h>
+#include <linux/gfp.h>
+#include <linux/kconfig.h>
+#include <linux/vmalloc.h>
+#include <asm/memory.h>
+#include <asm/pgtable.h>
+#include <asm/thread_info.h>
+
+/*
+ * To ensure that VMAP'd stack overflow detection works correctly, all VMAP'd
+ * stacks need to have the same alignment.
+ */
+static inline unsigned long *arch_alloc_vmap_stack(size_t stack_size, int node)
+{
+ BUILD_BUG_ON(!IS_ENABLED(CONFIG_VMAP_STACK));
+
+ return __vmalloc_node_range(stack_size, THREAD_ALIGN,
+ VMALLOC_START, VMALLOC_END,
+ THREADINFO_GFP, PAGE_KERNEL, 0, node,
+ __builtin_return_address(0));
+}
+
+#endif /* __ASM_VMAP_STACK_H */
diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h
index cda76fa..f018c3d 100644
--- a/arch/arm64/include/uapi/asm/hwcap.h
+++ b/arch/arm64/include/uapi/asm/hwcap.h
@@ -43,5 +43,6 @@
#define HWCAP_ASIMDDP (1 << 20)
#define HWCAP_SHA512 (1 << 21)
#define HWCAP_SVE (1 << 22)
+#define HWCAP_ASIMDFHM (1 << 23)
#endif /* _UAPI__ASM_HWCAP_H */
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 067baac..b875413 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -52,6 +52,11 @@
arm64-obj-$(CONFIG_ARM64_RELOC_TEST) += arm64-reloc-test.o
arm64-reloc-test-y := reloc_test_core.o reloc_test_syms.o
arm64-obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
+arm64-obj-$(CONFIG_ARM_SDE_INTERFACE) += sdei.o
+
+ifeq ($(CONFIG_KVM),y)
+arm64-obj-$(CONFIG_HARDEN_BRANCH_PREDICTOR) += bpi.o
+endif
obj-y += $(arm64-obj-y) vdso/ probes/
obj-m += $(arm64-obj-m)
diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c
index b3162715..252396a 100644
--- a/arch/arm64/kernel/acpi.c
+++ b/arch/arm64/kernel/acpi.c
@@ -117,7 +117,7 @@ bool __init acpi_psci_present(void)
}
/* Whether HVC must be used instead of SMC as the PSCI conduit */
-bool __init acpi_psci_use_hvc(void)
+bool acpi_psci_use_hvc(void)
{
return acpi_gbl_FADT.arm_boot_flags & ACPI_FADT_PSCI_USE_HVC;
}
diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c
index 6dd0a3a3..414288a 100644
--- a/arch/arm64/kernel/alternative.c
+++ b/arch/arm64/kernel/alternative.c
@@ -32,6 +32,8 @@
#define ALT_ORIG_PTR(a) __ALT_PTR(a, orig_offset)
#define ALT_REPL_PTR(a) __ALT_PTR(a, alt_offset)
+int alternatives_applied;
+
struct alt_region {
struct alt_instr *begin;
struct alt_instr *end;
@@ -143,7 +145,6 @@ static void __apply_alternatives(void *alt_region, bool use_linear_alias)
*/
static int __apply_alternatives_multi_stop(void *unused)
{
- static int patched = 0;
struct alt_region region = {
.begin = (struct alt_instr *)__alt_instructions,
.end = (struct alt_instr *)__alt_instructions_end,
@@ -151,14 +152,14 @@ static int __apply_alternatives_multi_stop(void *unused)
/* We always have a CPU 0 at this point (__init) */
if (smp_processor_id()) {
- while (!READ_ONCE(patched))
+ while (!READ_ONCE(alternatives_applied))
cpu_relax();
isb();
} else {
- BUG_ON(patched);
+ BUG_ON(alternatives_applied);
__apply_alternatives(®ion, true);
/* Barriers provided by the cache flushing */
- WRITE_ONCE(patched, 1);
+ WRITE_ONCE(alternatives_applied, 1);
}
return 0;
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 71bf088..1303e04 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -18,12 +18,14 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+#include <linux/arm_sdei.h>
#include <linux/sched.h>
#include <linux/mm.h>
#include <linux/dma-mapping.h>
#include <linux/kvm_host.h>
#include <linux/suspend.h>
#include <asm/cpufeature.h>
+#include <asm/fixmap.h>
#include <asm/thread_info.h>
#include <asm/memory.h>
#include <asm/smp_plat.h>
@@ -130,6 +132,7 @@ int main(void)
BLANK();
#ifdef CONFIG_KVM_ARM_HOST
DEFINE(VCPU_CONTEXT, offsetof(struct kvm_vcpu, arch.ctxt));
+ DEFINE(VCPU_FAULT_DISR, offsetof(struct kvm_vcpu, arch.fault.disr_el1));
DEFINE(CPU_GP_REGS, offsetof(struct kvm_cpu_context, gp_regs));
DEFINE(CPU_USER_PT_REGS, offsetof(struct kvm_regs, regs));
DEFINE(CPU_FP_REGS, offsetof(struct kvm_regs, fp_regs));
@@ -148,11 +151,18 @@ int main(void)
DEFINE(ARM_SMCCC_RES_X2_OFFS, offsetof(struct arm_smccc_res, a2));
DEFINE(ARM_SMCCC_QUIRK_ID_OFFS, offsetof(struct arm_smccc_quirk, id));
DEFINE(ARM_SMCCC_QUIRK_STATE_OFFS, offsetof(struct arm_smccc_quirk, state));
-
BLANK();
DEFINE(HIBERN_PBE_ORIG, offsetof(struct pbe, orig_address));
DEFINE(HIBERN_PBE_ADDR, offsetof(struct pbe, address));
DEFINE(HIBERN_PBE_NEXT, offsetof(struct pbe, next));
DEFINE(ARM64_FTR_SYSVAL, offsetof(struct arm64_ftr_reg, sys_val));
+ BLANK();
+#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
+ DEFINE(TRAMP_VALIAS, TRAMP_VALIAS);
+#endif
+#ifdef CONFIG_ARM_SDE_INTERFACE
+ DEFINE(SDEI_EVENT_INTREGS, offsetof(struct sdei_registered_event, interrupted_regs));
+ DEFINE(SDEI_EVENT_PRIORITY, offsetof(struct sdei_registered_event, priority));
+#endif
return 0;
}
diff --git a/arch/arm64/kernel/bpi.S b/arch/arm64/kernel/bpi.S
new file mode 100644
index 0000000..76225c2
--- /dev/null
+++ b/arch/arm64/kernel/bpi.S
@@ -0,0 +1,87 @@
+/*
+ * Contains CPU specific branch predictor invalidation sequences
+ *
+ * Copyright (C) 2018 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/linkage.h>
+
+.macro ventry target
+ .rept 31
+ nop
+ .endr
+ b \target
+.endm
+
+.macro vectors target
+ ventry \target + 0x000
+ ventry \target + 0x080
+ ventry \target + 0x100
+ ventry \target + 0x180
+
+ ventry \target + 0x200
+ ventry \target + 0x280
+ ventry \target + 0x300
+ ventry \target + 0x380
+
+ ventry \target + 0x400
+ ventry \target + 0x480
+ ventry \target + 0x500
+ ventry \target + 0x580
+
+ ventry \target + 0x600
+ ventry \target + 0x680
+ ventry \target + 0x700
+ ventry \target + 0x780
+.endm
+
+ .align 11
+ENTRY(__bp_harden_hyp_vecs_start)
+ .rept 4
+ vectors __kvm_hyp_vector
+ .endr
+ENTRY(__bp_harden_hyp_vecs_end)
+ENTRY(__psci_hyp_bp_inval_start)
+ sub sp, sp, #(8 * 18)
+ stp x16, x17, [sp, #(16 * 0)]
+ stp x14, x15, [sp, #(16 * 1)]
+ stp x12, x13, [sp, #(16 * 2)]
+ stp x10, x11, [sp, #(16 * 3)]
+ stp x8, x9, [sp, #(16 * 4)]
+ stp x6, x7, [sp, #(16 * 5)]
+ stp x4, x5, [sp, #(16 * 6)]
+ stp x2, x3, [sp, #(16 * 7)]
+ stp x0, x1, [sp, #(16 * 8)]
+ mov x0, #0x84000000
+ smc #0
+ ldp x16, x17, [sp, #(16 * 0)]
+ ldp x14, x15, [sp, #(16 * 1)]
+ ldp x12, x13, [sp, #(16 * 2)]
+ ldp x10, x11, [sp, #(16 * 3)]
+ ldp x8, x9, [sp, #(16 * 4)]
+ ldp x6, x7, [sp, #(16 * 5)]
+ ldp x4, x5, [sp, #(16 * 6)]
+ ldp x2, x3, [sp, #(16 * 7)]
+ ldp x0, x1, [sp, #(16 * 8)]
+ add sp, sp, #(8 * 18)
+ENTRY(__psci_hyp_bp_inval_end)
+
+ENTRY(__qcom_hyp_sanitize_link_stack_start)
+ stp x29, x30, [sp, #-16]!
+ .rept 16
+ bl . + 4
+ .endr
+ ldp x29, x30, [sp], #16
+ENTRY(__qcom_hyp_sanitize_link_stack_end)
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 0e27f86..ed68818 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -30,6 +30,20 @@ is_affected_midr_range(const struct arm64_cpu_capabilities *entry, int scope)
entry->midr_range_max);
}
+static bool __maybe_unused
+is_kryo_midr(const struct arm64_cpu_capabilities *entry, int scope)
+{
+ u32 model;
+
+ WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible());
+
+ model = read_cpuid_id();
+ model &= MIDR_IMPLEMENTOR_MASK | (0xf00 << MIDR_PARTNUM_SHIFT) |
+ MIDR_ARCHITECTURE_MASK;
+
+ return model == entry->midr_model;
+}
+
static bool
has_mismatched_cache_line_size(const struct arm64_cpu_capabilities *entry,
int scope)
@@ -46,6 +60,127 @@ static int cpu_enable_trap_ctr_access(void *__unused)
return 0;
}
+#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
+#include <asm/mmu_context.h>
+#include <asm/cacheflush.h>
+
+DEFINE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data);
+
+#ifdef CONFIG_KVM
+extern char __psci_hyp_bp_inval_start[], __psci_hyp_bp_inval_end[];
+extern char __qcom_hyp_sanitize_link_stack_start[];
+extern char __qcom_hyp_sanitize_link_stack_end[];
+
+static void __copy_hyp_vect_bpi(int slot, const char *hyp_vecs_start,
+ const char *hyp_vecs_end)
+{
+ void *dst = lm_alias(__bp_harden_hyp_vecs_start + slot * SZ_2K);
+ int i;
+
+ for (i = 0; i < SZ_2K; i += 0x80)
+ memcpy(dst + i, hyp_vecs_start, hyp_vecs_end - hyp_vecs_start);
+
+ flush_icache_range((uintptr_t)dst, (uintptr_t)dst + SZ_2K);
+}
+
+static void __install_bp_hardening_cb(bp_hardening_cb_t fn,
+ const char *hyp_vecs_start,
+ const char *hyp_vecs_end)
+{
+ static int last_slot = -1;
+ static DEFINE_SPINLOCK(bp_lock);
+ int cpu, slot = -1;
+
+ spin_lock(&bp_lock);
+ for_each_possible_cpu(cpu) {
+ if (per_cpu(bp_hardening_data.fn, cpu) == fn) {
+ slot = per_cpu(bp_hardening_data.hyp_vectors_slot, cpu);
+ break;
+ }
+ }
+
+ if (slot == -1) {
+ last_slot++;
+ BUG_ON(((__bp_harden_hyp_vecs_end - __bp_harden_hyp_vecs_start)
+ / SZ_2K) <= last_slot);
+ slot = last_slot;
+ __copy_hyp_vect_bpi(slot, hyp_vecs_start, hyp_vecs_end);
+ }
+
+ __this_cpu_write(bp_hardening_data.hyp_vectors_slot, slot);
+ __this_cpu_write(bp_hardening_data.fn, fn);
+ spin_unlock(&bp_lock);
+}
+#else
+#define __psci_hyp_bp_inval_start NULL
+#define __psci_hyp_bp_inval_end NULL
+#define __qcom_hyp_sanitize_link_stack_start NULL
+#define __qcom_hyp_sanitize_link_stack_end NULL
+
+static void __install_bp_hardening_cb(bp_hardening_cb_t fn,
+ const char *hyp_vecs_start,
+ const char *hyp_vecs_end)
+{
+ __this_cpu_write(bp_hardening_data.fn, fn);
+}
+#endif /* CONFIG_KVM */
+
+static void install_bp_hardening_cb(const struct arm64_cpu_capabilities *entry,
+ bp_hardening_cb_t fn,
+ const char *hyp_vecs_start,
+ const char *hyp_vecs_end)
+{
+ u64 pfr0;
+
+ if (!entry->matches(entry, SCOPE_LOCAL_CPU))
+ return;
+
+ pfr0 = read_cpuid(ID_AA64PFR0_EL1);
+ if (cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_CSV2_SHIFT))
+ return;
+
+ __install_bp_hardening_cb(fn, hyp_vecs_start, hyp_vecs_end);
+}
+
+#include <linux/psci.h>
+
+static int enable_psci_bp_hardening(void *data)
+{
+ const struct arm64_cpu_capabilities *entry = data;
+
+ if (psci_ops.get_version)
+ install_bp_hardening_cb(entry,
+ (bp_hardening_cb_t)psci_ops.get_version,
+ __psci_hyp_bp_inval_start,
+ __psci_hyp_bp_inval_end);
+
+ return 0;
+}
+
+static void qcom_link_stack_sanitization(void)
+{
+ u64 tmp;
+
+ asm volatile("mov %0, x30 \n"
+ ".rept 16 \n"
+ "bl . + 4 \n"
+ ".endr \n"
+ "mov x30, %0 \n"
+ : "=&r" (tmp));
+}
+
+static int qcom_enable_link_stack_sanitization(void *data)
+{
+ const struct arm64_cpu_capabilities *entry = data;
+
+ install_bp_hardening_cb(entry, qcom_link_stack_sanitization,
+ __qcom_hyp_sanitize_link_stack_start,
+ __qcom_hyp_sanitize_link_stack_end);
+
+ return 0;
+}
+#endif /* CONFIG_HARDEN_BRANCH_PREDICTOR */
+
#define MIDR_RANGE(model, min, max) \
.def_scope = SCOPE_LOCAL_CPU, \
.matches = is_affected_midr_range, \
@@ -169,6 +304,13 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
MIDR_CPU_VAR_REV(0, 0),
MIDR_CPU_VAR_REV(0, 0)),
},
+ {
+ .desc = "Qualcomm Technologies Kryo erratum 1003",
+ .capability = ARM64_WORKAROUND_QCOM_FALKOR_E1003,
+ .def_scope = SCOPE_LOCAL_CPU,
+ .midr_model = MIDR_QCOM_KRYO,
+ .matches = is_kryo_midr,
+ },
#endif
#ifdef CONFIG_QCOM_FALKOR_ERRATUM_1009
{
@@ -187,6 +329,47 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
MIDR_ALL_VERSIONS(MIDR_CORTEX_A73),
},
#endif
+#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
+ {
+ .capability = ARM64_HARDEN_BRANCH_PREDICTOR,
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A57),
+ .enable = enable_psci_bp_hardening,
+ },
+ {
+ .capability = ARM64_HARDEN_BRANCH_PREDICTOR,
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A72),
+ .enable = enable_psci_bp_hardening,
+ },
+ {
+ .capability = ARM64_HARDEN_BRANCH_PREDICTOR,
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A73),
+ .enable = enable_psci_bp_hardening,
+ },
+ {
+ .capability = ARM64_HARDEN_BRANCH_PREDICTOR,
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A75),
+ .enable = enable_psci_bp_hardening,
+ },
+ {
+ .capability = ARM64_HARDEN_BRANCH_PREDICTOR,
+ MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR_V1),
+ .enable = qcom_enable_link_stack_sanitization,
+ },
+ {
+ .capability = ARM64_HARDEN_BP_POST_GUEST_EXIT,
+ MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR_V1),
+ },
+ {
+ .capability = ARM64_HARDEN_BRANCH_PREDICTOR,
+ MIDR_ALL_VERSIONS(MIDR_BRCM_VULCAN),
+ .enable = enable_psci_bp_hardening,
+ },
+ {
+ .capability = ARM64_HARDEN_BRANCH_PREDICTOR,
+ MIDR_ALL_VERSIONS(MIDR_CAVIUM_THUNDERX2),
+ .enable = enable_psci_bp_hardening,
+ },
+#endif
{
}
};
@@ -200,15 +383,18 @@ void verify_local_cpu_errata_workarounds(void)
{
const struct arm64_cpu_capabilities *caps = arm64_errata;
- for (; caps->matches; caps++)
- if (!cpus_have_cap(caps->capability) &&
- caps->matches(caps, SCOPE_LOCAL_CPU)) {
+ for (; caps->matches; caps++) {
+ if (cpus_have_cap(caps->capability)) {
+ if (caps->enable)
+ caps->enable((void *)caps);
+ } else if (caps->matches(caps, SCOPE_LOCAL_CPU)) {
pr_crit("CPU%d: Requires work around for %s, not detected"
" at boot time\n",
smp_processor_id(),
caps->desc ? : "an erratum");
cpu_die_early();
}
+ }
}
void update_cpu_errata_workarounds(void)
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index a73a592..0fb6a31 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -123,6 +123,7 @@ cpufeature_pan_not_uao(const struct arm64_cpu_capabilities *entry, int __unused)
* sync with the documentation of the CPU feature register ABI.
*/
static const struct arm64_ftr_bits ftr_id_aa64isar0[] = {
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_FHM_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_DP_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_SM4_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_SM3_SHIFT, 4, 0),
@@ -145,8 +146,11 @@ static const struct arm64_ftr_bits ftr_id_aa64isar1[] = {
};
static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = {
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_CSV3_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_CSV2_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE),
FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_SVE_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_RAS_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_GIC_SHIFT, 4, 0),
S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_ASIMD_SHIFT, 4, ID_AA64PFR0_ASIMD_NI),
S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_FP_SHIFT, 4, ID_AA64PFR0_FP_NI),
@@ -846,6 +850,67 @@ static bool has_no_fpsimd(const struct arm64_cpu_capabilities *entry, int __unus
ID_AA64PFR0_FP_SHIFT) < 0;
}
+#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
+static int __kpti_forced; /* 0: not forced, >0: forced on, <0: forced off */
+
+static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry,
+ int __unused)
+{
+ u64 pfr0 = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1);
+
+ /* Forced on command line? */
+ if (__kpti_forced) {
+ pr_info_once("kernel page table isolation forced %s by command line option\n",
+ __kpti_forced > 0 ? "ON" : "OFF");
+ return __kpti_forced > 0;
+ }
+
+ /* Useful for KASLR robustness */
+ if (IS_ENABLED(CONFIG_RANDOMIZE_BASE))
+ return true;
+
+ /* Don't force KPTI for CPUs that are not vulnerable */
+ switch (read_cpuid_id() & MIDR_CPU_MODEL_MASK) {
+ case MIDR_CAVIUM_THUNDERX2:
+ case MIDR_BRCM_VULCAN:
+ return false;
+ }
+
+ /* Defer to CPU feature registers */
+ return !cpuid_feature_extract_unsigned_field(pfr0,
+ ID_AA64PFR0_CSV3_SHIFT);
+}
+
+static int __init parse_kpti(char *str)
+{
+ bool enabled;
+ int ret = strtobool(str, &enabled);
+
+ if (ret)
+ return ret;
+
+ __kpti_forced = enabled ? 1 : -1;
+ return 0;
+}
+__setup("kpti=", parse_kpti);
+#endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */
+
+static int cpu_copy_el2regs(void *__unused)
+{
+ /*
+ * Copy register values that aren't redirected by hardware.
+ *
+ * Before code patching, we only set tpidr_el1, all CPUs need to copy
+ * this value to tpidr_el2 before we patch the code. Once we've done
+ * that, freshly-onlined CPUs will set tpidr_el2, so we don't need to
+ * do anything here.
+ */
+ if (!alternatives_applied)
+ write_sysreg(read_sysreg(tpidr_el1), tpidr_el2);
+
+ return 0;
+}
+
static const struct arm64_cpu_capabilities arm64_features[] = {
{
.desc = "GIC system register CPU interface",
@@ -915,6 +980,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.capability = ARM64_HAS_VIRT_HOST_EXTN,
.def_scope = SCOPE_SYSTEM,
.matches = runs_at_el2,
+ .enable = cpu_copy_el2regs,
},
{
.desc = "32-bit EL0 Support",
@@ -932,6 +998,14 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.def_scope = SCOPE_SYSTEM,
.matches = hyp_offset_low,
},
+#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
+ {
+ .desc = "Kernel page table isolation (KPTI)",
+ .capability = ARM64_UNMAP_KERNEL_AT_EL0,
+ .def_scope = SCOPE_SYSTEM,
+ .matches = unmap_kernel_at_el0,
+ },
+#endif
{
/* FP/SIMD is not implemented */
.capability = ARM64_HAS_NO_FPSIMD,
@@ -963,6 +1037,19 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.enable = sve_kernel_enable,
},
#endif /* CONFIG_ARM64_SVE */
+#ifdef CONFIG_ARM64_RAS_EXTN
+ {
+ .desc = "RAS Extension Support",
+ .capability = ARM64_HAS_RAS_EXTN,
+ .def_scope = SCOPE_SYSTEM,
+ .matches = has_cpuid_feature,
+ .sys_reg = SYS_ID_AA64PFR0_EL1,
+ .sign = FTR_UNSIGNED,
+ .field_pos = ID_AA64PFR0_RAS_SHIFT,
+ .min_field_value = ID_AA64PFR0_RAS_V1,
+ .enable = cpu_clear_disr,
+ },
+#endif /* CONFIG_ARM64_RAS_EXTN */
{},
};
@@ -992,6 +1079,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM3_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SM3),
HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM4_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SM4),
HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_DP_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_ASIMDDP),
+ HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_FHM_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_ASIMDFHM),
HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, HWCAP_FP),
HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_FPHP),
HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, HWCAP_ASIMD),
@@ -1071,6 +1159,25 @@ static void __init setup_elf_hwcaps(const struct arm64_cpu_capabilities *hwcaps)
cap_set_elf_hwcap(hwcaps);
}
+/*
+ * Check if the current CPU has a given feature capability.
+ * Should be called from non-preemptible context.
+ */
+static bool __this_cpu_has_cap(const struct arm64_cpu_capabilities *cap_array,
+ unsigned int cap)
+{
+ const struct arm64_cpu_capabilities *caps;
+
+ if (WARN_ON(preemptible()))
+ return false;
+
+ for (caps = cap_array; caps->matches; caps++)
+ if (caps->capability == cap &&
+ caps->matches(caps, SCOPE_LOCAL_CPU))
+ return true;
+ return false;
+}
+
void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps,
const char *info)
{
@@ -1106,7 +1213,7 @@ void __init enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps)
* uses an IPI, giving us a PSTATE that disappears when
* we return.
*/
- stop_machine(caps->enable, NULL, cpu_online_mask);
+ stop_machine(caps->enable, (void *)caps, cpu_online_mask);
}
}
}
@@ -1134,8 +1241,9 @@ verify_local_elf_hwcaps(const struct arm64_cpu_capabilities *caps)
}
static void
-verify_local_cpu_features(const struct arm64_cpu_capabilities *caps)
+verify_local_cpu_features(const struct arm64_cpu_capabilities *caps_list)
{
+ const struct arm64_cpu_capabilities *caps = caps_list;
for (; caps->matches; caps++) {
if (!cpus_have_cap(caps->capability))
continue;
@@ -1143,13 +1251,13 @@ verify_local_cpu_features(const struct arm64_cpu_capabilities *caps)
* If the new CPU misses an advertised feature, we cannot proceed
* further, park the cpu.
*/
- if (!caps->matches(caps, SCOPE_LOCAL_CPU)) {
+ if (!__this_cpu_has_cap(caps_list, caps->capability)) {
pr_crit("CPU%d: missing feature: %s\n",
smp_processor_id(), caps->desc);
cpu_die_early();
}
if (caps->enable)
- caps->enable(NULL);
+ caps->enable((void *)caps);
}
}
@@ -1189,6 +1297,9 @@ static void verify_local_cpu_capabilities(void)
if (system_supports_sve())
verify_sve_features();
+
+ if (system_uses_ttbr0_pan())
+ pr_info("Emulating Privileged Access Never (PAN) using TTBR0_EL1 switching\n");
}
void check_local_cpu_capabilities(void)
@@ -1225,25 +1336,6 @@ static void __init mark_const_caps_ready(void)
static_branch_enable(&arm64_const_caps_ready);
}
-/*
- * Check if the current CPU has a given feature capability.
- * Should be called from non-preemptible context.
- */
-static bool __this_cpu_has_cap(const struct arm64_cpu_capabilities *cap_array,
- unsigned int cap)
-{
- const struct arm64_cpu_capabilities *caps;
-
- if (WARN_ON(preemptible()))
- return false;
-
- for (caps = cap_array; caps->desc; caps++)
- if (caps->capability == cap && caps->matches)
- return caps->matches(caps, SCOPE_LOCAL_CPU);
-
- return false;
-}
-
extern const struct arm64_cpu_capabilities arm64_errata[];
bool this_cpu_has_cap(unsigned int cap)
@@ -1387,3 +1479,11 @@ static int __init enable_mrs_emulation(void)
}
core_initcall(enable_mrs_emulation);
+
+int cpu_clear_disr(void *__unused)
+{
+ /* Firmware may have left a deferred SError in this register. */
+ write_sysreg_s(0, SYS_DISR_EL1);
+
+ return 0;
+}
diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c
index fd69108..f2d1381 100644
--- a/arch/arm64/kernel/cpuidle.c
+++ b/arch/arm64/kernel/cpuidle.c
@@ -47,6 +47,8 @@ int arm_cpuidle_suspend(int index)
#include <acpi/processor.h>
+#define ARM64_LPI_IS_RETENTION_STATE(arch_flags) (!(arch_flags))
+
int acpi_processor_ffh_lpi_probe(unsigned int cpu)
{
return arm_cpuidle_init(cpu);
@@ -54,6 +56,10 @@ int acpi_processor_ffh_lpi_probe(unsigned int cpu)
int acpi_processor_ffh_lpi_enter(struct acpi_lpi_state *lpi)
{
- return CPU_PM_CPU_IDLE_ENTER(arm_cpuidle_suspend, lpi->index);
+ if (ARM64_LPI_IS_RETENTION_STATE(lpi->arch_flags))
+ return CPU_PM_CPU_IDLE_ENTER_RETENTION(arm_cpuidle_suspend,
+ lpi->index);
+ else
+ return CPU_PM_CPU_IDLE_ENTER(arm_cpuidle_suspend, lpi->index);
}
#endif
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index 1e25545..7f94623 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -76,6 +76,7 @@ static const char *const hwcap_str[] = {
"asimddp",
"sha512",
"sve",
+ "asimdfhm",
NULL
};
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 6d14b8f..b34e717 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -28,6 +28,8 @@
#include <asm/errno.h>
#include <asm/esr.h>
#include <asm/irq.h>
+#include <asm/memory.h>
+#include <asm/mmu.h>
#include <asm/processor.h>
#include <asm/ptrace.h>
#include <asm/thread_info.h>
@@ -69,8 +71,21 @@
#define BAD_FIQ 2
#define BAD_ERROR 3
- .macro kernel_ventry label
+ .macro kernel_ventry, el, label, regsize = 64
.align 7
+#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
+alternative_if ARM64_UNMAP_KERNEL_AT_EL0
+ .if \el == 0
+ .if \regsize == 64
+ mrs x30, tpidrro_el0
+ msr tpidrro_el0, xzr
+ .else
+ mov x30, xzr
+ .endif
+ .endif
+alternative_else_nop_endif
+#endif
+
sub sp, sp, #S_FRAME_SIZE
#ifdef CONFIG_VMAP_STACK
/*
@@ -82,7 +97,7 @@
tbnz x0, #THREAD_SHIFT, 0f
sub x0, sp, x0 // x0'' = sp' - x0' = (sp + x0) - sp = x0
sub sp, sp, x0 // sp'' = sp' - x0 = (sp + x0) - x0 = sp
- b \label
+ b el\()\el\()_\label
0:
/*
@@ -114,7 +129,12 @@
sub sp, sp, x0
mrs x0, tpidrro_el0
#endif
- b \label
+ b el\()\el\()_\label
+ .endm
+
+ .macro tramp_alias, dst, sym
+ mov_q \dst, TRAMP_VALIAS
+ add \dst, \dst, #(\sym - .entry.tramp.text)
.endm
.macro kernel_entry, el, regsize = 64
@@ -185,7 +205,7 @@
.if \el != 0
mrs x21, ttbr0_el1
- tst x21, #0xffff << 48 // Check for the reserved ASID
+ tst x21, #TTBR_ASID_MASK // Check for the reserved ASID
orr x23, x23, #PSR_PAN_BIT // Set the emulated PAN in the saved SPSR
b.eq 1f // TTBR0 access already disabled
and x23, x23, #~PSR_PAN_BIT // Clear the emulated PAN in the saved SPSR
@@ -248,7 +268,7 @@
tbnz x22, #22, 1f // Skip re-enabling TTBR0 access if the PSR_PAN_BIT is set
.endif
- __uaccess_ttbr0_enable x0
+ __uaccess_ttbr0_enable x0, x1
.if \el == 0
/*
@@ -257,7 +277,7 @@
* Cavium erratum 27456 (broadcast TLBI instructions may cause I-cache
* corruption).
*/
- post_ttbr0_update_workaround
+ bl post_ttbr_update_workaround
.endif
1:
.if \el != 0
@@ -269,18 +289,20 @@
.if \el == 0
ldr x23, [sp, #S_SP] // load return stack pointer
msr sp_el0, x23
+ tst x22, #PSR_MODE32_BIT // native task?
+ b.eq 3f
+
#ifdef CONFIG_ARM64_ERRATUM_845719
alternative_if ARM64_WORKAROUND_845719
- tbz x22, #4, 1f
#ifdef CONFIG_PID_IN_CONTEXTIDR
mrs x29, contextidr_el1
msr contextidr_el1, x29
#else
msr contextidr_el1, xzr
#endif
-1:
alternative_else_nop_endif
#endif
+3:
.endif
msr elr_el1, x21 // set up the return data
@@ -302,7 +324,21 @@
ldp x28, x29, [sp, #16 * 14]
ldr lr, [sp, #S_LR]
add sp, sp, #S_FRAME_SIZE // restore sp
- eret // return to kernel
+
+ .if \el == 0
+alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0
+#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
+ bne 4f
+ msr far_el1, x30
+ tramp_alias x30, tramp_exit_native
+ br x30
+4:
+ tramp_alias x30, tramp_exit_compat
+ br x30
+#endif
+ .else
+ eret
+ .endif
.endm
.macro irq_stack_entry
@@ -367,31 +403,31 @@
.align 11
ENTRY(vectors)
- kernel_ventry el1_sync_invalid // Synchronous EL1t
- kernel_ventry el1_irq_invalid // IRQ EL1t
- kernel_ventry el1_fiq_invalid // FIQ EL1t
- kernel_ventry el1_error_invalid // Error EL1t
+ kernel_ventry 1, sync_invalid // Synchronous EL1t
+ kernel_ventry 1, irq_invalid // IRQ EL1t
+ kernel_ventry 1, fiq_invalid // FIQ EL1t
+ kernel_ventry 1, error_invalid // Error EL1t
- kernel_ventry el1_sync // Synchronous EL1h
- kernel_ventry el1_irq // IRQ EL1h
- kernel_ventry el1_fiq_invalid // FIQ EL1h
- kernel_ventry el1_error // Error EL1h
+ kernel_ventry 1, sync // Synchronous EL1h
+ kernel_ventry 1, irq // IRQ EL1h
+ kernel_ventry 1, fiq_invalid // FIQ EL1h
+ kernel_ventry 1, error // Error EL1h
- kernel_ventry el0_sync // Synchronous 64-bit EL0
- kernel_ventry el0_irq // IRQ 64-bit EL0
- kernel_ventry el0_fiq_invalid // FIQ 64-bit EL0
- kernel_ventry el0_error // Error 64-bit EL0
+ kernel_ventry 0, sync // Synchronous 64-bit EL0
+ kernel_ventry 0, irq // IRQ 64-bit EL0
+ kernel_ventry 0, fiq_invalid // FIQ 64-bit EL0
+ kernel_ventry 0, error // Error 64-bit EL0
#ifdef CONFIG_COMPAT
- kernel_ventry el0_sync_compat // Synchronous 32-bit EL0
- kernel_ventry el0_irq_compat // IRQ 32-bit EL0
- kernel_ventry el0_fiq_invalid_compat // FIQ 32-bit EL0
- kernel_ventry el0_error_compat // Error 32-bit EL0
+ kernel_ventry 0, sync_compat, 32 // Synchronous 32-bit EL0
+ kernel_ventry 0, irq_compat, 32 // IRQ 32-bit EL0
+ kernel_ventry 0, fiq_invalid_compat, 32 // FIQ 32-bit EL0
+ kernel_ventry 0, error_compat, 32 // Error 32-bit EL0
#else
- kernel_ventry el0_sync_invalid // Synchronous 32-bit EL0
- kernel_ventry el0_irq_invalid // IRQ 32-bit EL0
- kernel_ventry el0_fiq_invalid // FIQ 32-bit EL0
- kernel_ventry el0_error_invalid // Error 32-bit EL0
+ kernel_ventry 0, sync_invalid, 32 // Synchronous 32-bit EL0
+ kernel_ventry 0, irq_invalid, 32 // IRQ 32-bit EL0
+ kernel_ventry 0, fiq_invalid, 32 // FIQ 32-bit EL0
+ kernel_ventry 0, error_invalid, 32 // Error 32-bit EL0
#endif
END(vectors)
@@ -685,12 +721,15 @@
* Instruction abort handling
*/
mrs x26, far_el1
- enable_daif
+ enable_da_f
+#ifdef CONFIG_TRACE_IRQFLAGS
+ bl trace_hardirqs_off
+#endif
ct_user_exit
mov x0, x26
mov x1, x25
mov x2, sp
- bl do_mem_abort
+ bl do_el0_ia_bp_hardening
b ret_to_user
el0_fpsimd_acc:
/*
@@ -943,6 +982,124 @@
.popsection // .entry.text
+#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
+/*
+ * Exception vectors trampoline.
+ */
+ .pushsection ".entry.tramp.text", "ax"
+
+ .macro tramp_map_kernel, tmp
+ mrs \tmp, ttbr1_el1
+ add \tmp, \tmp, #(PAGE_SIZE + RESERVED_TTBR0_SIZE)
+ bic \tmp, \tmp, #USER_ASID_FLAG
+ msr ttbr1_el1, \tmp
+#ifdef CONFIG_QCOM_FALKOR_ERRATUM_1003
+alternative_if ARM64_WORKAROUND_QCOM_FALKOR_E1003
+ /* ASID already in \tmp[63:48] */
+ movk \tmp, #:abs_g2_nc:(TRAMP_VALIAS >> 12)
+ movk \tmp, #:abs_g1_nc:(TRAMP_VALIAS >> 12)
+ /* 2MB boundary containing the vectors, so we nobble the walk cache */
+ movk \tmp, #:abs_g0_nc:((TRAMP_VALIAS & ~(SZ_2M - 1)) >> 12)
+ isb
+ tlbi vae1, \tmp
+ dsb nsh
+alternative_else_nop_endif
+#endif /* CONFIG_QCOM_FALKOR_ERRATUM_1003 */
+ .endm
+
+ .macro tramp_unmap_kernel, tmp
+ mrs \tmp, ttbr1_el1
+ sub \tmp, \tmp, #(PAGE_SIZE + RESERVED_TTBR0_SIZE)
+ orr \tmp, \tmp, #USER_ASID_FLAG
+ msr ttbr1_el1, \tmp
+ /*
+ * We avoid running the post_ttbr_update_workaround here because the
+ * user and kernel ASIDs don't have conflicting mappings, so any
+ * "blessing" as described in:
+ *
+ * http://lkml.kernel.org/r/56BB848A.6060603@caviumnetworks.com
+ *
+ * will not hurt correctness. Whilst this may partially defeat the
+ * point of using split ASIDs in the first place, it avoids
+ * the hit of invalidating the entire I-cache on every return to
+ * userspace.
+ */
+ .endm
+
+ .macro tramp_ventry, regsize = 64
+ .align 7
+1:
+ .if \regsize == 64
+ msr tpidrro_el0, x30 // Restored in kernel_ventry
+ .endif
+ /*
+ * Defend against branch aliasing attacks by pushing a dummy
+ * entry onto the return stack and using a RET instruction to
+ * enter the full-fat kernel vectors.
+ */
+ bl 2f
+ b .
+2:
+ tramp_map_kernel x30
+#ifdef CONFIG_RANDOMIZE_BASE
+ adr x30, tramp_vectors + PAGE_SIZE
+alternative_insn isb, nop, ARM64_WORKAROUND_QCOM_FALKOR_E1003
+ ldr x30, [x30]
+#else
+ ldr x30, =vectors
+#endif
+ prfm plil1strm, [x30, #(1b - tramp_vectors)]
+ msr vbar_el1, x30
+ add x30, x30, #(1b - tramp_vectors)
+ isb
+ ret
+ .endm
+
+ .macro tramp_exit, regsize = 64
+ adr x30, tramp_vectors
+ msr vbar_el1, x30
+ tramp_unmap_kernel x30
+ .if \regsize == 64
+ mrs x30, far_el1
+ .endif
+ eret
+ .endm
+
+ .align 11
+ENTRY(tramp_vectors)
+ .space 0x400
+
+ tramp_ventry
+ tramp_ventry
+ tramp_ventry
+ tramp_ventry
+
+ tramp_ventry 32
+ tramp_ventry 32
+ tramp_ventry 32
+ tramp_ventry 32
+END(tramp_vectors)
+
+ENTRY(tramp_exit_native)
+ tramp_exit
+END(tramp_exit_native)
+
+ENTRY(tramp_exit_compat)
+ tramp_exit 32
+END(tramp_exit_compat)
+
+ .ltorg
+ .popsection // .entry.tramp.text
+#ifdef CONFIG_RANDOMIZE_BASE
+ .pushsection ".rodata", "a"
+ .align PAGE_SHIFT
+ .globl __entry_tramp_data_start
+__entry_tramp_data_start:
+ .quad vectors
+ .popsection // .rodata
+#endif /* CONFIG_RANDOMIZE_BASE */
+#endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */
+
/*
* Special system call wrappers.
*/
@@ -996,3 +1153,180 @@
b ret_to_user
ENDPROC(ret_from_fork)
NOKPROBE(ret_from_fork)
+
+#ifdef CONFIG_ARM_SDE_INTERFACE
+
+#include <asm/sdei.h>
+#include <uapi/linux/arm_sdei.h>
+
+.macro sdei_handler_exit exit_mode
+ /* On success, this call never returns... */
+ cmp \exit_mode, #SDEI_EXIT_SMC
+ b.ne 99f
+ smc #0
+ b .
+99: hvc #0
+ b .
+.endm
+
+#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
+/*
+ * The regular SDEI entry point may have been unmapped along with the rest of
+ * the kernel. This trampoline restores the kernel mapping to make the x1 memory
+ * argument accessible.
+ *
+ * This clobbers x4, __sdei_handler() will restore this from firmware's
+ * copy.
+ */
+.ltorg
+.pushsection ".entry.tramp.text", "ax"
+ENTRY(__sdei_asm_entry_trampoline)
+ mrs x4, ttbr1_el1
+ tbz x4, #USER_ASID_BIT, 1f
+
+ tramp_map_kernel tmp=x4
+ isb
+ mov x4, xzr
+
+ /*
+ * Use reg->interrupted_regs.addr_limit to remember whether to unmap
+ * the kernel on exit.
+ */
+1: str x4, [x1, #(SDEI_EVENT_INTREGS + S_ORIG_ADDR_LIMIT)]
+
+#ifdef CONFIG_RANDOMIZE_BASE
+ adr x4, tramp_vectors + PAGE_SIZE
+ add x4, x4, #:lo12:__sdei_asm_trampoline_next_handler
+ ldr x4, [x4]
+#else
+ ldr x4, =__sdei_asm_handler
+#endif
+ br x4
+ENDPROC(__sdei_asm_entry_trampoline)
+NOKPROBE(__sdei_asm_entry_trampoline)
+
+/*
+ * Make the exit call and restore the original ttbr1_el1
+ *
+ * x0 & x1: setup for the exit API call
+ * x2: exit_mode
+ * x4: struct sdei_registered_event argument from registration time.
+ */
+ENTRY(__sdei_asm_exit_trampoline)
+ ldr x4, [x4, #(SDEI_EVENT_INTREGS + S_ORIG_ADDR_LIMIT)]
+ cbnz x4, 1f
+
+ tramp_unmap_kernel tmp=x4
+
+1: sdei_handler_exit exit_mode=x2
+ENDPROC(__sdei_asm_exit_trampoline)
+NOKPROBE(__sdei_asm_exit_trampoline)
+ .ltorg
+.popsection // .entry.tramp.text
+#ifdef CONFIG_RANDOMIZE_BASE
+.pushsection ".rodata", "a"
+__sdei_asm_trampoline_next_handler:
+ .quad __sdei_asm_handler
+.popsection // .rodata
+#endif /* CONFIG_RANDOMIZE_BASE */
+#endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */
+
+/*
+ * Software Delegated Exception entry point.
+ *
+ * x0: Event number
+ * x1: struct sdei_registered_event argument from registration time.
+ * x2: interrupted PC
+ * x3: interrupted PSTATE
+ * x4: maybe clobbered by the trampoline
+ *
+ * Firmware has preserved x0->x17 for us, we must save/restore the rest to
+ * follow SMC-CC. We save (or retrieve) all the registers as the handler may
+ * want them.
+ */
+ENTRY(__sdei_asm_handler)
+ stp x2, x3, [x1, #SDEI_EVENT_INTREGS + S_PC]
+ stp x4, x5, [x1, #SDEI_EVENT_INTREGS + 16 * 2]
+ stp x6, x7, [x1, #SDEI_EVENT_INTREGS + 16 * 3]
+ stp x8, x9, [x1, #SDEI_EVENT_INTREGS + 16 * 4]
+ stp x10, x11, [x1, #SDEI_EVENT_INTREGS + 16 * 5]
+ stp x12, x13, [x1, #SDEI_EVENT_INTREGS + 16 * 6]
+ stp x14, x15, [x1, #SDEI_EVENT_INTREGS + 16 * 7]
+ stp x16, x17, [x1, #SDEI_EVENT_INTREGS + 16 * 8]
+ stp x18, x19, [x1, #SDEI_EVENT_INTREGS + 16 * 9]
+ stp x20, x21, [x1, #SDEI_EVENT_INTREGS + 16 * 10]
+ stp x22, x23, [x1, #SDEI_EVENT_INTREGS + 16 * 11]
+ stp x24, x25, [x1, #SDEI_EVENT_INTREGS + 16 * 12]
+ stp x26, x27, [x1, #SDEI_EVENT_INTREGS + 16 * 13]
+ stp x28, x29, [x1, #SDEI_EVENT_INTREGS + 16 * 14]
+ mov x4, sp
+ stp lr, x4, [x1, #SDEI_EVENT_INTREGS + S_LR]
+
+ mov x19, x1
+
+#ifdef CONFIG_VMAP_STACK
+ /*
+ * entry.S may have been using sp as a scratch register, find whether
+ * this is a normal or critical event and switch to the appropriate
+ * stack for this CPU.
+ */
+ ldrb w4, [x19, #SDEI_EVENT_PRIORITY]
+ cbnz w4, 1f
+ ldr_this_cpu dst=x5, sym=sdei_stack_normal_ptr, tmp=x6
+ b 2f
+1: ldr_this_cpu dst=x5, sym=sdei_stack_critical_ptr, tmp=x6
+2: mov x6, #SDEI_STACK_SIZE
+ add x5, x5, x6
+ mov sp, x5
+#endif
+
+ /*
+ * We may have interrupted userspace, or a guest, or exit-from or
+ * return-to either of these. We can't trust sp_el0, restore it.
+ */
+ mrs x28, sp_el0
+ ldr_this_cpu dst=x0, sym=__entry_task, tmp=x1
+ msr sp_el0, x0
+
+ /* If we interrupted the kernel point to the previous stack/frame. */
+ and x0, x3, #0xc
+ mrs x1, CurrentEL
+ cmp x0, x1
+ csel x29, x29, xzr, eq // fp, or zero
+ csel x4, x2, xzr, eq // elr, or zero
+
+ stp x29, x4, [sp, #-16]!
+ mov x29, sp
+
+ add x0, x19, #SDEI_EVENT_INTREGS
+ mov x1, x19
+ bl __sdei_handler
+
+ msr sp_el0, x28
+ /* restore regs >x17 that we clobbered */
+ mov x4, x19 // keep x4 for __sdei_asm_exit_trampoline
+ ldp x28, x29, [x4, #SDEI_EVENT_INTREGS + 16 * 14]
+ ldp x18, x19, [x4, #SDEI_EVENT_INTREGS + 16 * 9]
+ ldp lr, x1, [x4, #SDEI_EVENT_INTREGS + S_LR]
+ mov sp, x1
+
+ mov x1, x0 // address to complete_and_resume
+ /* x0 = (x0 <= 1) ? EVENT_COMPLETE:EVENT_COMPLETE_AND_RESUME */
+ cmp x0, #1
+ mov_q x2, SDEI_1_0_FN_SDEI_EVENT_COMPLETE
+ mov_q x3, SDEI_1_0_FN_SDEI_EVENT_COMPLETE_AND_RESUME
+ csel x0, x2, x3, ls
+
+ ldr_l x2, sdei_exit_mode
+
+alternative_if_not ARM64_UNMAP_KERNEL_AT_EL0
+ sdei_handler_exit exit_mode=x2
+alternative_else_nop_endif
+
+#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
+ tramp_alias dst=x5, sym=__sdei_asm_exit_trampoline
+ br x5
+#endif
+ENDPROC(__sdei_asm_handler)
+NOKPROBE(__sdei_asm_handler)
+#endif /* CONFIG_ARM_SDE_INTERFACE */
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index fae81f7..55fb544 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -1036,14 +1036,14 @@ void fpsimd_restore_current_state(void)
* flag that indicates that the FPSIMD register contents are the most recent
* FPSIMD state of 'current'
*/
-void fpsimd_update_current_state(struct fpsimd_state *state)
+void fpsimd_update_current_state(struct user_fpsimd_state const *state)
{
if (!system_supports_fpsimd())
return;
local_bh_disable();
- current->thread.fpsimd_state.user_fpsimd = state->user_fpsimd;
+ current->thread.fpsimd_state.user_fpsimd = *state;
if (system_supports_sve() && test_thread_flag(TIF_SVE))
fpsimd_to_sve(current);
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index e3cb9fb..ba3ab047 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -148,6 +148,26 @@
ENDPROC(preserve_boot_args)
/*
+ * Macro to arrange a physical address in a page table entry, taking care of
+ * 52-bit addresses.
+ *
+ * Preserves: phys
+ * Returns: pte
+ */
+ .macro phys_to_pte, phys, pte
+#ifdef CONFIG_ARM64_PA_BITS_52
+ /*
+ * We assume \phys is 64K aligned and this is guaranteed by only
+ * supporting this configuration with 64K pages.
+ */
+ orr \pte, \phys, \phys, lsr #36
+ and \pte, \pte, #PTE_ADDR_MASK
+#else
+ mov \pte, \phys
+#endif
+ .endm
+
+/*
* Macro to create a table entry to the next page.
*
* tbl: page table address
@@ -156,54 +176,124 @@
* ptrs: #imm pointers per table page
*
* Preserves: virt
- * Corrupts: tmp1, tmp2
+ * Corrupts: ptrs, tmp1, tmp2
* Returns: tbl -> next level table page address
*/
.macro create_table_entry, tbl, virt, shift, ptrs, tmp1, tmp2
- lsr \tmp1, \virt, #\shift
- and \tmp1, \tmp1, #\ptrs - 1 // table index
- add \tmp2, \tbl, #PAGE_SIZE
+ add \tmp1, \tbl, #PAGE_SIZE
+ phys_to_pte \tmp1, \tmp2
orr \tmp2, \tmp2, #PMD_TYPE_TABLE // address of next table and entry type
+ lsr \tmp1, \virt, #\shift
+ sub \ptrs, \ptrs, #1
+ and \tmp1, \tmp1, \ptrs // table index
str \tmp2, [\tbl, \tmp1, lsl #3]
add \tbl, \tbl, #PAGE_SIZE // next level table page
.endm
/*
- * Macro to populate the PGD (and possibily PUD) for the corresponding
- * block entry in the next level (tbl) for the given virtual address.
+ * Macro to populate page table entries, these entries can be pointers to the next level
+ * or last level entries pointing to physical memory.
*
- * Preserves: tbl, next, virt
- * Corrupts: tmp1, tmp2
+ * tbl: page table address
+ * rtbl: pointer to page table or physical memory
+ * index: start index to write
+ * eindex: end index to write - [index, eindex] written to
+ * flags: flags for pagetable entry to or in
+ * inc: increment to rtbl between each entry
+ * tmp1: temporary variable
+ *
+ * Preserves: tbl, eindex, flags, inc
+ * Corrupts: index, tmp1
+ * Returns: rtbl
*/
- .macro create_pgd_entry, tbl, virt, tmp1, tmp2
- create_table_entry \tbl, \virt, PGDIR_SHIFT, PTRS_PER_PGD, \tmp1, \tmp2
-#if SWAPPER_PGTABLE_LEVELS > 3
- create_table_entry \tbl, \virt, PUD_SHIFT, PTRS_PER_PUD, \tmp1, \tmp2
-#endif
-#if SWAPPER_PGTABLE_LEVELS > 2
- create_table_entry \tbl, \virt, SWAPPER_TABLE_SHIFT, PTRS_PER_PTE, \tmp1, \tmp2
-#endif
+ .macro populate_entries, tbl, rtbl, index, eindex, flags, inc, tmp1
+.Lpe\@: phys_to_pte \rtbl, \tmp1
+ orr \tmp1, \tmp1, \flags // tmp1 = table entry
+ str \tmp1, [\tbl, \index, lsl #3]
+ add \rtbl, \rtbl, \inc // rtbl = pa next level
+ add \index, \index, #1
+ cmp \index, \eindex
+ b.ls .Lpe\@
.endm
/*
- * Macro to populate block entries in the page table for the start..end
- * virtual range (inclusive).
+ * Compute indices of table entries from virtual address range. If multiple entries
+ * were needed in the previous page table level then the next page table level is assumed
+ * to be composed of multiple pages. (This effectively scales the end index).
*
- * Preserves: tbl, flags
- * Corrupts: phys, start, end, pstate
+ * vstart: virtual address of start of range
+ * vend: virtual address of end of range
+ * shift: shift used to transform virtual address into index
+ * ptrs: number of entries in page table
+ * istart: index in table corresponding to vstart
+ * iend: index in table corresponding to vend
+ * count: On entry: how many extra entries were required in previous level, scales
+ * our end index.
+ * On exit: returns how many extra entries required for next page table level
+ *
+ * Preserves: vstart, vend, shift, ptrs
+ * Returns: istart, iend, count
*/
- .macro create_block_map, tbl, flags, phys, start, end
- lsr \phys, \phys, #SWAPPER_BLOCK_SHIFT
- lsr \start, \start, #SWAPPER_BLOCK_SHIFT
- and \start, \start, #PTRS_PER_PTE - 1 // table index
- orr \phys, \flags, \phys, lsl #SWAPPER_BLOCK_SHIFT // table entry
- lsr \end, \end, #SWAPPER_BLOCK_SHIFT
- and \end, \end, #PTRS_PER_PTE - 1 // table end index
-9999: str \phys, [\tbl, \start, lsl #3] // store the entry
- add \start, \start, #1 // next entry
- add \phys, \phys, #SWAPPER_BLOCK_SIZE // next block
- cmp \start, \end
- b.ls 9999b
+ .macro compute_indices, vstart, vend, shift, ptrs, istart, iend, count
+ lsr \iend, \vend, \shift
+ mov \istart, \ptrs
+ sub \istart, \istart, #1
+ and \iend, \iend, \istart // iend = (vend >> shift) & (ptrs - 1)
+ mov \istart, \ptrs
+ mul \istart, \istart, \count
+ add \iend, \iend, \istart // iend += (count - 1) * ptrs
+ // our entries span multiple tables
+
+ lsr \istart, \vstart, \shift
+ mov \count, \ptrs
+ sub \count, \count, #1
+ and \istart, \istart, \count
+
+ sub \count, \iend, \istart
+ .endm
+
+/*
+ * Map memory for specified virtual address range. Each level of page table needed supports
+ * multiple entries. If a level requires n entries the next page table level is assumed to be
+ * formed from n pages.
+ *
+ * tbl: location of page table
+ * rtbl: address to be used for first level page table entry (typically tbl + PAGE_SIZE)
+ * vstart: start address to map
+ * vend: end address to map - we map [vstart, vend]
+ * flags: flags to use to map last level entries
+ * phys: physical address corresponding to vstart - physical memory is contiguous
+ * pgds: the number of pgd entries
+ *
+ * Temporaries: istart, iend, tmp, count, sv - these need to be different registers
+ * Preserves: vstart, vend, flags
+ * Corrupts: tbl, rtbl, istart, iend, tmp, count, sv
+ */
+ .macro map_memory, tbl, rtbl, vstart, vend, flags, phys, pgds, istart, iend, tmp, count, sv
+ add \rtbl, \tbl, #PAGE_SIZE
+ mov \sv, \rtbl
+ mov \count, #0
+ compute_indices \vstart, \vend, #PGDIR_SHIFT, \pgds, \istart, \iend, \count
+ populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp
+ mov \tbl, \sv
+ mov \sv, \rtbl
+
+#if SWAPPER_PGTABLE_LEVELS > 3
+ compute_indices \vstart, \vend, #PUD_SHIFT, #PTRS_PER_PUD, \istart, \iend, \count
+ populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp
+ mov \tbl, \sv
+ mov \sv, \rtbl
+#endif
+
+#if SWAPPER_PGTABLE_LEVELS > 2
+ compute_indices \vstart, \vend, #SWAPPER_TABLE_SHIFT, #PTRS_PER_PMD, \istart, \iend, \count
+ populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp
+ mov \tbl, \sv
+#endif
+
+ compute_indices \vstart, \vend, #SWAPPER_BLOCK_SHIFT, #PTRS_PER_PTE, \istart, \iend, \count
+ bic \count, \phys, #SWAPPER_BLOCK_SIZE - 1
+ populate_entries \tbl, \count, \istart, \iend, \flags, #SWAPPER_BLOCK_SIZE, \tmp
.endm
/*
@@ -221,14 +311,16 @@
* dirty cache lines being evicted.
*/
adrp x0, idmap_pg_dir
- ldr x1, =(IDMAP_DIR_SIZE + SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE)
+ adrp x1, swapper_pg_end
+ sub x1, x1, x0
bl __inval_dcache_area
/*
* Clear the idmap and swapper page tables.
*/
adrp x0, idmap_pg_dir
- ldr x1, =(IDMAP_DIR_SIZE + SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE)
+ adrp x1, swapper_pg_end
+ sub x1, x1, x0
1: stp xzr, xzr, [x0], #16
stp xzr, xzr, [x0], #16
stp xzr, xzr, [x0], #16
@@ -244,16 +336,34 @@
adrp x0, idmap_pg_dir
adrp x3, __idmap_text_start // __pa(__idmap_text_start)
-#ifndef CONFIG_ARM64_VA_BITS_48
+ /*
+ * VA_BITS may be too small to allow for an ID mapping to be created
+ * that covers system RAM if that is located sufficiently high in the
+ * physical address space. So for the ID map, use an extended virtual
+ * range in that case, and configure an additional translation level
+ * if needed.
+ *
+ * Calculate the maximum allowed value for TCR_EL1.T0SZ so that the
+ * entire ID map region can be mapped. As T0SZ == (64 - #bits used),
+ * this number conveniently equals the number of leading zeroes in
+ * the physical address of __idmap_text_end.
+ */
+ adrp x5, __idmap_text_end
+ clz x5, x5
+ cmp x5, TCR_T0SZ(VA_BITS) // default T0SZ small enough?
+ b.ge 1f // .. then skip VA range extension
+
+ adr_l x6, idmap_t0sz
+ str x5, [x6]
+ dmb sy
+ dc ivac, x6 // Invalidate potentially stale cache line
+
+#if (VA_BITS < 48)
#define EXTRA_SHIFT (PGDIR_SHIFT + PAGE_SHIFT - 3)
-#define EXTRA_PTRS (1 << (48 - EXTRA_SHIFT))
+#define EXTRA_PTRS (1 << (PHYS_MASK_SHIFT - EXTRA_SHIFT))
/*
- * If VA_BITS < 48, it may be too small to allow for an ID mapping to be
- * created that covers system RAM if that is located sufficiently high
- * in the physical address space. So for the ID map, use an extended
- * virtual range in that case, by configuring an additional translation
- * level.
+ * If VA_BITS < 48, we have to configure an additional table level.
* First, we have to verify our assumption that the current value of
* VA_BITS was chosen such that all translation levels are fully
* utilised, and that lowering T0SZ will always result in an additional
@@ -263,30 +373,22 @@
#error "Mismatch between VA_BITS and page size/number of translation levels"
#endif
+ mov x4, EXTRA_PTRS
+ create_table_entry x0, x3, EXTRA_SHIFT, x4, x5, x6
+#else
/*
- * Calculate the maximum allowed value for TCR_EL1.T0SZ so that the
- * entire ID map region can be mapped. As T0SZ == (64 - #bits used),
- * this number conveniently equals the number of leading zeroes in
- * the physical address of __idmap_text_end.
+ * If VA_BITS == 48, we don't have to configure an additional
+ * translation level, but the top-level table has more entries.
*/
- adrp x5, __idmap_text_end
- clz x5, x5
- cmp x5, TCR_T0SZ(VA_BITS) // default T0SZ small enough?
- b.ge 1f // .. then skip additional level
-
- adr_l x6, idmap_t0sz
- str x5, [x6]
- dmb sy
- dc ivac, x6 // Invalidate potentially stale cache line
-
- create_table_entry x0, x3, EXTRA_SHIFT, EXTRA_PTRS, x5, x6
-1:
+ mov x4, #1 << (PHYS_MASK_SHIFT - PGDIR_SHIFT)
+ str_l x4, idmap_ptrs_per_pgd, x5
#endif
-
- create_pgd_entry x0, x3, x5, x6
+1:
+ ldr_l x4, idmap_ptrs_per_pgd
mov x5, x3 // __pa(__idmap_text_start)
adr_l x6, __idmap_text_end // __pa(__idmap_text_end)
- create_block_map x0, x7, x3, x5, x6
+
+ map_memory x0, x1, x3, x6, x7, x3, x4, x10, x11, x12, x13, x14
/*
* Map the kernel image (starting with PHYS_OFFSET).
@@ -294,12 +396,13 @@
adrp x0, swapper_pg_dir
mov_q x5, KIMAGE_VADDR + TEXT_OFFSET // compile time __va(_text)
add x5, x5, x23 // add KASLR displacement
- create_pgd_entry x0, x5, x3, x6
+ mov x4, PTRS_PER_PGD
adrp x6, _end // runtime __pa(_end)
adrp x3, _text // runtime __pa(_text)
sub x6, x6, x3 // _end - _text
add x6, x6, x5 // runtime __va(_end)
- create_block_map x0, x7, x3, x5, x6
+
+ map_memory x0, x1, x5, x6, x7, x3, x4, x10, x11, x12, x13, x14
/*
* Since the page tables have been populated with non-cacheable
@@ -307,7 +410,8 @@
* tables again to remove any speculatively loaded cache lines.
*/
adrp x0, idmap_pg_dir
- ldr x1, =(IDMAP_DIR_SIZE + SWAPPER_DIR_SIZE + RESERVED_TTBR0_SIZE)
+ adrp x1, swapper_pg_end
+ sub x1, x1, x0
dmb sy
bl __inval_dcache_area
@@ -388,17 +492,13 @@
mrs x0, CurrentEL
cmp x0, #CurrentEL_EL2
b.eq 1f
- mrs x0, sctlr_el1
-CPU_BE( orr x0, x0, #(3 << 24) ) // Set the EE and E0E bits for EL1
-CPU_LE( bic x0, x0, #(3 << 24) ) // Clear the EE and E0E bits for EL1
+ mov_q x0, (SCTLR_EL1_RES1 | ENDIAN_SET_EL1)
msr sctlr_el1, x0
mov w0, #BOOT_CPU_MODE_EL1 // This cpu booted in EL1
isb
ret
-1: mrs x0, sctlr_el2
-CPU_BE( orr x0, x0, #(1 << 25) ) // Set the EE bit for EL2
-CPU_LE( bic x0, x0, #(1 << 25) ) // Clear the EE bit for EL2
+1: mov_q x0, (SCTLR_EL2_RES1 | ENDIAN_SET_EL2)
msr sctlr_el2, x0
#ifdef CONFIG_ARM64_VHE
@@ -514,10 +614,7 @@
* requires no configuration, and all non-hyp-specific EL2 setup
* will be done via the _EL1 system register aliases in __cpu_setup.
*/
- /* sctlr_el1 */
- mov x0, #0x0800 // Set/clear RES{1,0} bits
-CPU_BE( movk x0, #0x33d0, lsl #16 ) // Set EE and E0E on BE systems
-CPU_LE( movk x0, #0x30d0, lsl #16 ) // Clear EE and E0E on LE systems
+ mov_q x0, (SCTLR_EL1_RES1 | ENDIAN_SET_EL1)
msr sctlr_el1, x0
/* Coprocessor traps. */
@@ -679,8 +776,10 @@
update_early_cpu_boot_status 0, x1, x2
adrp x1, idmap_pg_dir
adrp x2, swapper_pg_dir
- msr ttbr0_el1, x1 // load TTBR0
- msr ttbr1_el1, x2 // load TTBR1
+ phys_to_ttbr x1, x3
+ phys_to_ttbr x2, x4
+ msr ttbr0_el1, x3 // load TTBR0
+ msr ttbr1_el1, x4 // load TTBR1
isb
msr sctlr_el1, x0
isb
diff --git a/arch/arm64/kernel/hibernate-asm.S b/arch/arm64/kernel/hibernate-asm.S
index e56d848..84f5d52 100644
--- a/arch/arm64/kernel/hibernate-asm.S
+++ b/arch/arm64/kernel/hibernate-asm.S
@@ -33,12 +33,14 @@
* Even switching to our copied tables will cause a changed output address at
* each stage of the walk.
*/
-.macro break_before_make_ttbr_switch zero_page, page_table
- msr ttbr1_el1, \zero_page
+.macro break_before_make_ttbr_switch zero_page, page_table, tmp
+ phys_to_ttbr \zero_page, \tmp
+ msr ttbr1_el1, \tmp
isb
tlbi vmalle1
dsb nsh
- msr ttbr1_el1, \page_table
+ phys_to_ttbr \page_table, \tmp
+ msr ttbr1_el1, \tmp
isb
.endm
@@ -78,7 +80,7 @@
* We execute from ttbr0, change ttbr1 to our copied linear map tables
* with a break-before-make via the zero page
*/
- break_before_make_ttbr_switch x5, x0
+ break_before_make_ttbr_switch x5, x0, x6
mov x21, x1
mov x30, x2
@@ -109,7 +111,7 @@
dsb ish /* wait for PoU cleaning to finish */
/* switch to the restored kernels page tables */
- break_before_make_ttbr_switch x25, x21
+ break_before_make_ttbr_switch x25, x21, x6
ic ialluis
dsb ish
diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c
index 3009b8b..f20cf7e 100644
--- a/arch/arm64/kernel/hibernate.c
+++ b/arch/arm64/kernel/hibernate.c
@@ -247,8 +247,7 @@ static int create_safe_exec_page(void *src_start, size_t length,
}
pte = pte_offset_kernel(pmd, dst_addr);
- set_pte(pte, __pte(virt_to_phys((void *)dst) |
- pgprot_val(PAGE_KERNEL_EXEC)));
+ set_pte(pte, pfn_pte(virt_to_pfn(dst), PAGE_KERNEL_EXEC));
/*
* Load our new page tables. A strict BBM approach requires that we
@@ -264,7 +263,7 @@ static int create_safe_exec_page(void *src_start, size_t length,
*/
cpu_set_reserved_ttbr0();
local_flush_tlb_all();
- write_sysreg(virt_to_phys(pgd), ttbr0_el1);
+ write_sysreg(phys_to_ttbr(virt_to_phys(pgd)), ttbr0_el1);
isb();
*phys_dst_addr = virt_to_phys((void *)dst);
diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c
index 713561e..60e5fc6 100644
--- a/arch/arm64/kernel/irq.c
+++ b/arch/arm64/kernel/irq.c
@@ -29,6 +29,7 @@
#include <linux/irqchip.h>
#include <linux/seq_file.h>
#include <linux/vmalloc.h>
+#include <asm/vmap_stack.h>
unsigned long irq_err_count;
@@ -58,17 +59,7 @@ static void init_irq_stacks(void)
unsigned long *p;
for_each_possible_cpu(cpu) {
- /*
- * To ensure that VMAP'd stack overflow detection works
- * correctly, the IRQ stacks need to have the same
- * alignment as other stacks.
- */
- p = __vmalloc_node_range(IRQ_STACK_SIZE, THREAD_ALIGN,
- VMALLOC_START, VMALLOC_END,
- THREADINFO_GFP, PAGE_KERNEL,
- 0, cpu_to_node(cpu),
- __builtin_return_address(0));
-
+ p = arch_alloc_vmap_stack(IRQ_STACK_SIZE, cpu_to_node(cpu));
per_cpu(irq_stack_ptr, cpu) = p;
}
}
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 6b7dcf4..583fd81 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -370,16 +370,14 @@ void tls_preserve_current_state(void)
static void tls_thread_switch(struct task_struct *next)
{
- unsigned long tpidr, tpidrro;
-
tls_preserve_current_state();
- tpidr = *task_user_tls(next);
- tpidrro = is_compat_thread(task_thread_info(next)) ?
- next->thread.tp_value : 0;
+ if (is_compat_thread(task_thread_info(next)))
+ write_sysreg(next->thread.tp_value, tpidrro_el0);
+ else if (!arm64_kernel_unmapped_at_el0())
+ write_sysreg(0, tpidrro_el0);
- write_sysreg(tpidr, tpidr_el0);
- write_sysreg(tpidrro, tpidrro_el0);
+ write_sysreg(*task_user_tls(next), tpidr_el0);
}
/* Restore the UAO state depending on next's addr_limit */
diff --git a/arch/arm64/kernel/sdei.c b/arch/arm64/kernel/sdei.c
new file mode 100644
index 0000000..6b8d90d
--- /dev/null
+++ b/arch/arm64/kernel/sdei.c
@@ -0,0 +1,235 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2017 Arm Ltd.
+#define pr_fmt(fmt) "sdei: " fmt
+
+#include <linux/arm_sdei.h>
+#include <linux/hardirq.h>
+#include <linux/irqflags.h>
+#include <linux/sched/task_stack.h>
+#include <linux/uaccess.h>
+
+#include <asm/alternative.h>
+#include <asm/kprobes.h>
+#include <asm/mmu.h>
+#include <asm/ptrace.h>
+#include <asm/sections.h>
+#include <asm/sysreg.h>
+#include <asm/vmap_stack.h>
+
+unsigned long sdei_exit_mode;
+
+/*
+ * VMAP'd stacks checking for stack overflow on exception using sp as a scratch
+ * register, meaning SDEI has to switch to its own stack. We need two stacks as
+ * a critical event may interrupt a normal event that has just taken a
+ * synchronous exception, and is using sp as scratch register. For a critical
+ * event interrupting a normal event, we can't reliably tell if we were on the
+ * sdei stack.
+ * For now, we allocate stacks when the driver is probed.
+ */
+DECLARE_PER_CPU(unsigned long *, sdei_stack_normal_ptr);
+DECLARE_PER_CPU(unsigned long *, sdei_stack_critical_ptr);
+
+#ifdef CONFIG_VMAP_STACK
+DEFINE_PER_CPU(unsigned long *, sdei_stack_normal_ptr);
+DEFINE_PER_CPU(unsigned long *, sdei_stack_critical_ptr);
+#endif
+
+static void _free_sdei_stack(unsigned long * __percpu *ptr, int cpu)
+{
+ unsigned long *p;
+
+ p = per_cpu(*ptr, cpu);
+ if (p) {
+ per_cpu(*ptr, cpu) = NULL;
+ vfree(p);
+ }
+}
+
+static void free_sdei_stacks(void)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ _free_sdei_stack(&sdei_stack_normal_ptr, cpu);
+ _free_sdei_stack(&sdei_stack_critical_ptr, cpu);
+ }
+}
+
+static int _init_sdei_stack(unsigned long * __percpu *ptr, int cpu)
+{
+ unsigned long *p;
+
+ p = arch_alloc_vmap_stack(SDEI_STACK_SIZE, cpu_to_node(cpu));
+ if (!p)
+ return -ENOMEM;
+ per_cpu(*ptr, cpu) = p;
+
+ return 0;
+}
+
+static int init_sdei_stacks(void)
+{
+ int cpu;
+ int err = 0;
+
+ for_each_possible_cpu(cpu) {
+ err = _init_sdei_stack(&sdei_stack_normal_ptr, cpu);
+ if (err)
+ break;
+ err = _init_sdei_stack(&sdei_stack_critical_ptr, cpu);
+ if (err)
+ break;
+ }
+
+ if (err)
+ free_sdei_stacks();
+
+ return err;
+}
+
+bool _on_sdei_stack(unsigned long sp)
+{
+ unsigned long low, high;
+
+ if (!IS_ENABLED(CONFIG_VMAP_STACK))
+ return false;
+
+ low = (unsigned long)raw_cpu_read(sdei_stack_critical_ptr);
+ high = low + SDEI_STACK_SIZE;
+
+ if (low <= sp && sp < high)
+ return true;
+
+ low = (unsigned long)raw_cpu_read(sdei_stack_normal_ptr);
+ high = low + SDEI_STACK_SIZE;
+
+ return (low <= sp && sp < high);
+}
+
+unsigned long sdei_arch_get_entry_point(int conduit)
+{
+ /*
+ * SDEI works between adjacent exception levels. If we booted at EL1 we
+ * assume a hypervisor is marshalling events. If we booted at EL2 and
+ * dropped to EL1 because we don't support VHE, then we can't support
+ * SDEI.
+ */
+ if (is_hyp_mode_available() && !is_kernel_in_hyp_mode()) {
+ pr_err("Not supported on this hardware/boot configuration\n");
+ return 0;
+ }
+
+ if (IS_ENABLED(CONFIG_VMAP_STACK)) {
+ if (init_sdei_stacks())
+ return 0;
+ }
+
+ sdei_exit_mode = (conduit == CONDUIT_HVC) ? SDEI_EXIT_HVC : SDEI_EXIT_SMC;
+
+#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
+ if (arm64_kernel_unmapped_at_el0()) {
+ unsigned long offset;
+
+ offset = (unsigned long)__sdei_asm_entry_trampoline -
+ (unsigned long)__entry_tramp_text_start;
+ return TRAMP_VALIAS + offset;
+ } else
+#endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */
+ return (unsigned long)__sdei_asm_handler;
+
+}
+
+/*
+ * __sdei_handler() returns one of:
+ * SDEI_EV_HANDLED - success, return to the interrupted context.
+ * SDEI_EV_FAILED - failure, return this error code to firmare.
+ * virtual-address - success, return to this address.
+ */
+static __kprobes unsigned long _sdei_handler(struct pt_regs *regs,
+ struct sdei_registered_event *arg)
+{
+ u32 mode;
+ int i, err = 0;
+ int clobbered_registers = 4;
+ u64 elr = read_sysreg(elr_el1);
+ u32 kernel_mode = read_sysreg(CurrentEL) | 1; /* +SPSel */
+ unsigned long vbar = read_sysreg(vbar_el1);
+
+ if (arm64_kernel_unmapped_at_el0())
+ clobbered_registers++;
+
+ /* Retrieve the missing registers values */
+ for (i = 0; i < clobbered_registers; i++) {
+ /* from within the handler, this call always succeeds */
+ sdei_api_event_context(i, ®s->regs[i]);
+ }
+
+ /*
+ * We didn't take an exception to get here, set PAN. UAO will be cleared
+ * by sdei_event_handler()s set_fs(USER_DS) call.
+ */
+ __uaccess_enable_hw_pan();
+
+ err = sdei_event_handler(regs, arg);
+ if (err)
+ return SDEI_EV_FAILED;
+
+ if (elr != read_sysreg(elr_el1)) {
+ /*
+ * We took a synchronous exception from the SDEI handler.
+ * This could deadlock, and if you interrupt KVM it will
+ * hyp-panic instead.
+ */
+ pr_warn("unsafe: exception during handler\n");
+ }
+
+ mode = regs->pstate & (PSR_MODE32_BIT | PSR_MODE_MASK);
+
+ /*
+ * If we interrupted the kernel with interrupts masked, we always go
+ * back to wherever we came from.
+ */
+ if (mode == kernel_mode && !interrupts_enabled(regs))
+ return SDEI_EV_HANDLED;
+
+ /*
+ * Otherwise, we pretend this was an IRQ. This lets user space tasks
+ * receive signals before we return to them, and KVM to invoke it's
+ * world switch to do the same.
+ *
+ * See DDI0487B.a Table D1-7 'Vector offsets from vector table base
+ * address'.
+ */
+ if (mode == kernel_mode)
+ return vbar + 0x280;
+ else if (mode & PSR_MODE32_BIT)
+ return vbar + 0x680;
+
+ return vbar + 0x480;
+}
+
+
+asmlinkage __kprobes notrace unsigned long
+__sdei_handler(struct pt_regs *regs, struct sdei_registered_event *arg)
+{
+ unsigned long ret;
+ bool do_nmi_exit = false;
+
+ /*
+ * nmi_enter() deals with printk() re-entrance and use of RCU when
+ * RCU believed this CPU was idle. Because critical events can
+ * interrupt normal events, we may already be in_nmi().
+ */
+ if (!in_nmi()) {
+ nmi_enter();
+ do_nmi_exit = true;
+ }
+
+ ret = _sdei_handler(regs, arg);
+
+ if (do_nmi_exit)
+ nmi_exit();
+
+ return ret;
+}
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index b120111..f60c052 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -178,7 +178,8 @@ static void __user *apply_user_offset(
static int preserve_fpsimd_context(struct fpsimd_context __user *ctx)
{
- struct fpsimd_state *fpsimd = ¤t->thread.fpsimd_state;
+ struct user_fpsimd_state const *fpsimd =
+ ¤t->thread.fpsimd_state.user_fpsimd;
int err;
/* copy the FP and status/control registers */
@@ -195,7 +196,7 @@ static int preserve_fpsimd_context(struct fpsimd_context __user *ctx)
static int restore_fpsimd_context(struct fpsimd_context __user *ctx)
{
- struct fpsimd_state fpsimd;
+ struct user_fpsimd_state fpsimd;
__u32 magic, size;
int err = 0;
@@ -266,7 +267,7 @@ static int restore_sve_fpsimd_context(struct user_ctxs *user)
{
int err;
unsigned int vq;
- struct fpsimd_state fpsimd;
+ struct user_fpsimd_state fpsimd;
struct sve_context sve;
if (__copy_from_user(&sve, user->sve, sizeof(sve)))
diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c
index 22711ee..a124140 100644
--- a/arch/arm64/kernel/signal32.c
+++ b/arch/arm64/kernel/signal32.c
@@ -228,7 +228,8 @@ union __fpsimd_vreg {
static int compat_preserve_vfp_context(struct compat_vfp_sigframe __user *frame)
{
- struct fpsimd_state *fpsimd = ¤t->thread.fpsimd_state;
+ struct user_fpsimd_state const *fpsimd =
+ ¤t->thread.fpsimd_state.user_fpsimd;
compat_ulong_t magic = VFP_MAGIC;
compat_ulong_t size = VFP_STORAGE_SIZE;
compat_ulong_t fpscr, fpexc;
@@ -277,7 +278,7 @@ static int compat_preserve_vfp_context(struct compat_vfp_sigframe __user *frame)
static int compat_restore_vfp_context(struct compat_vfp_sigframe __user *frame)
{
- struct fpsimd_state fpsimd;
+ struct user_fpsimd_state fpsimd;
compat_ulong_t magic = VFP_MAGIC;
compat_ulong_t size = VFP_STORAGE_SIZE;
compat_ulong_t fpscr;
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 551eb07..3b8ad7b 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -18,6 +18,7 @@
*/
#include <linux/acpi.h>
+#include <linux/arm_sdei.h>
#include <linux/delay.h>
#include <linux/init.h>
#include <linux/spinlock.h>
@@ -836,6 +837,7 @@ static void ipi_cpu_stop(unsigned int cpu)
set_cpu_online(cpu, false);
local_daif_mask();
+ sdei_mask_local_cpu();
while (1)
cpu_relax();
@@ -853,6 +855,7 @@ static void ipi_cpu_crash_stop(unsigned int cpu, struct pt_regs *regs)
atomic_dec(&waiting_for_crash_ipi);
local_irq_disable();
+ sdei_mask_local_cpu();
#ifdef CONFIG_HOTPLUG_CPU
if (cpu_ops[cpu]->cpu_die)
@@ -972,6 +975,8 @@ void smp_send_stop(void)
if (num_online_cpus() > 1)
pr_warning("SMP: failed to stop secondary CPUs %*pbl\n",
cpumask_pr_args(cpu_online_mask));
+
+ sdei_mask_local_cpu();
}
#ifdef CONFIG_KEXEC_CORE
@@ -990,8 +995,10 @@ void crash_smp_send_stop(void)
cpus_stopped = 1;
- if (num_online_cpus() == 1)
+ if (num_online_cpus() == 1) {
+ sdei_mask_local_cpu();
return;
+ }
cpumask_copy(&mask, cpu_online_mask);
cpumask_clear_cpu(smp_processor_id(), &mask);
@@ -1009,6 +1016,8 @@ void crash_smp_send_stop(void)
if (atomic_read(&waiting_for_crash_ipi) > 0)
pr_warning("SMP: failed to stop secondary CPUs %*pbl\n",
cpumask_pr_args(&mask));
+
+ sdei_mask_local_cpu();
}
bool smp_crash_stop_failed(void)
diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c
index 3fe5ad8..a307b9e 100644
--- a/arch/arm64/kernel/suspend.c
+++ b/arch/arm64/kernel/suspend.c
@@ -2,6 +2,7 @@
#include <linux/ftrace.h>
#include <linux/percpu.h>
#include <linux/slab.h>
+#include <linux/uaccess.h>
#include <asm/alternative.h>
#include <asm/cacheflush.h>
#include <asm/cpufeature.h>
@@ -51,8 +52,7 @@ void notrace __cpu_suspend_exit(void)
* PSTATE was not saved over suspend/resume, re-enable any detected
* features that might not have been set correctly.
*/
- asm(ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN,
- CONFIG_ARM64_PAN));
+ __uaccess_enable_hw_pan();
uao_thread_switch(current);
/*
diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
index 8d48b23..2186853 100644
--- a/arch/arm64/kernel/topology.c
+++ b/arch/arm64/kernel/topology.c
@@ -37,18 +37,14 @@ static int __init get_cpu_for_node(struct device_node *node)
if (!cpu_node)
return -1;
- for_each_possible_cpu(cpu) {
- if (of_get_cpu_node(cpu, NULL) == cpu_node) {
- topology_parse_cpu_capacity(cpu_node, cpu);
- of_node_put(cpu_node);
- return cpu;
- }
- }
-
- pr_crit("Unable to find CPU node for %pOF\n", cpu_node);
+ cpu = of_cpu_node_to_id(cpu_node);
+ if (cpu >= 0)
+ topology_parse_cpu_capacity(cpu_node, cpu);
+ else
+ pr_crit("Unable to find CPU node for %pOF\n", cpu_node);
of_node_put(cpu_node);
- return -1;
+ return cpu;
}
static int __init parse_core(struct device_node *core, int cluster_id,
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 3d3588f..bbb0fde 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -662,17 +662,58 @@ asmlinkage void handle_bad_stack(struct pt_regs *regs)
}
#endif
-asmlinkage void do_serror(struct pt_regs *regs, unsigned int esr)
+void __noreturn arm64_serror_panic(struct pt_regs *regs, u32 esr)
{
- nmi_enter();
-
console_verbose();
pr_crit("SError Interrupt on CPU%d, code 0x%08x -- %s\n",
smp_processor_id(), esr, esr_get_class_string(esr));
- __show_regs(regs);
+ if (regs)
+ __show_regs(regs);
- panic("Asynchronous SError Interrupt");
+ nmi_panic(regs, "Asynchronous SError Interrupt");
+
+ cpu_park_loop();
+ unreachable();
+}
+
+bool arm64_is_fatal_ras_serror(struct pt_regs *regs, unsigned int esr)
+{
+ u32 aet = arm64_ras_serror_get_severity(esr);
+
+ switch (aet) {
+ case ESR_ELx_AET_CE: /* corrected error */
+ case ESR_ELx_AET_UEO: /* restartable, not yet consumed */
+ /*
+ * The CPU can make progress. We may take UEO again as
+ * a more severe error.
+ */
+ return false;
+
+ case ESR_ELx_AET_UEU: /* Uncorrected Unrecoverable */
+ case ESR_ELx_AET_UER: /* Uncorrected Recoverable */
+ /*
+ * The CPU can't make progress. The exception may have
+ * been imprecise.
+ */
+ return true;
+
+ case ESR_ELx_AET_UC: /* Uncontainable or Uncategorized error */
+ default:
+ /* Error has been silently propagated */
+ arm64_serror_panic(regs, esr);
+ }
+}
+
+asmlinkage void do_serror(struct pt_regs *regs, unsigned int esr)
+{
+ nmi_enter();
+
+ /* non-RAS errors are not containable */
+ if (!arm64_is_ras_serror(esr) || arm64_is_fatal_ras_serror(regs, esr))
+ arm64_serror_panic(regs, esr);
+
+ nmi_exit();
}
void __pte_error(const char *file, int line, unsigned long val)
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 7da3e5c..0221aca 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -57,6 +57,17 @@
#define HIBERNATE_TEXT
#endif
+#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
+#define TRAMP_TEXT \
+ . = ALIGN(PAGE_SIZE); \
+ VMLINUX_SYMBOL(__entry_tramp_text_start) = .; \
+ *(.entry.tramp.text) \
+ . = ALIGN(PAGE_SIZE); \
+ VMLINUX_SYMBOL(__entry_tramp_text_end) = .;
+#else
+#define TRAMP_TEXT
+#endif
+
/*
* The size of the PE/COFF section that covers the kernel image, which
* runs from stext to _edata, must be a round multiple of the PE/COFF
@@ -113,6 +124,7 @@
HYPERVISOR_TEXT
IDMAP_TEXT
HIBERNATE_TEXT
+ TRAMP_TEXT
*(.fixup)
*(.gnu.warning)
. = ALIGN(16);
@@ -206,13 +218,19 @@
. = ALIGN(PAGE_SIZE);
idmap_pg_dir = .;
. += IDMAP_DIR_SIZE;
- swapper_pg_dir = .;
- . += SWAPPER_DIR_SIZE;
+
+#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
+ tramp_pg_dir = .;
+ . += PAGE_SIZE;
+#endif
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
reserved_ttbr0 = .;
. += RESERVED_TTBR0_SIZE;
#endif
+ swapper_pg_dir = .;
+ . += SWAPPER_DIR_SIZE;
+ swapper_pg_end = .;
__pecoff_data_size = ABSOLUTE(. - __initdata_begin);
_end = .;
@@ -234,7 +252,10 @@
ASSERT(__hibernate_exit_text_end - (__hibernate_exit_text_start & ~(SZ_4K - 1))
<= SZ_4K, "Hibernate exit text too big or misaligned")
#endif
-
+#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
+ASSERT((__entry_tramp_text_end - __entry_tramp_text_start) == PAGE_SIZE,
+ "Entry trampoline text too big")
+#endif
/*
* If padding is applied before .head.text, virt<->phys conversions will fail.
*/
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index e60494f..520b0da 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -23,18 +23,26 @@
#include <linux/kvm_host.h>
#include <asm/esr.h>
+#include <asm/exception.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_coproc.h>
#include <asm/kvm_emulate.h>
#include <asm/kvm_mmu.h>
#include <asm/kvm_psci.h>
#include <asm/debug-monitors.h>
+#include <asm/traps.h>
#define CREATE_TRACE_POINTS
#include "trace.h"
typedef int (*exit_handle_fn)(struct kvm_vcpu *, struct kvm_run *);
+static void kvm_handle_guest_serror(struct kvm_vcpu *vcpu, u32 esr)
+{
+ if (!arm64_is_ras_serror(esr) || arm64_is_fatal_ras_serror(NULL, esr))
+ kvm_inject_vabt(vcpu);
+}
+
static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
{
int ret;
@@ -242,7 +250,6 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
*vcpu_pc(vcpu) -= adj;
}
- kvm_inject_vabt(vcpu);
return 1;
}
@@ -252,7 +259,6 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
case ARM_EXCEPTION_IRQ:
return 1;
case ARM_EXCEPTION_EL1_SERROR:
- kvm_inject_vabt(vcpu);
/* We may still need to return for single-step */
if (!(*vcpu_cpsr(vcpu) & DBG_SPSR_SS)
&& kvm_arm_handle_step_debug(vcpu, run))
@@ -275,3 +281,25 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
return 0;
}
}
+
+/* For exit types that need handling before we can be preempted */
+void handle_exit_early(struct kvm_vcpu *vcpu, struct kvm_run *run,
+ int exception_index)
+{
+ if (ARM_SERROR_PENDING(exception_index)) {
+ if (this_cpu_has_cap(ARM64_HAS_RAS_EXTN)) {
+ u64 disr = kvm_vcpu_get_disr(vcpu);
+
+ kvm_handle_guest_serror(vcpu, disr_to_esr(disr));
+ } else {
+ kvm_inject_vabt(vcpu);
+ }
+
+ return;
+ }
+
+ exception_index = ARM_EXCEPTION_CODE(exception_index);
+
+ if (exception_index == ARM_EXCEPTION_EL1_SERROR)
+ kvm_handle_guest_serror(vcpu, kvm_vcpu_get_hsr(vcpu));
+}
diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S
index 870828c..e086c6ef 100644
--- a/arch/arm64/kvm/hyp-init.S
+++ b/arch/arm64/kvm/hyp-init.S
@@ -63,7 +63,8 @@
cmp x0, #HVC_STUB_HCALL_NR
b.lo __kvm_handle_stub_hvc
- msr ttbr0_el2, x0
+ phys_to_ttbr x0, x4
+ msr ttbr0_el2, x4
mrs x4, tcr_el1
ldr x5, =TCR_EL2_MASK
@@ -71,30 +72,27 @@
mov x5, #TCR_EL2_RES1
orr x4, x4, x5
-#ifndef CONFIG_ARM64_VA_BITS_48
/*
- * If we are running with VA_BITS < 48, we may be running with an extra
- * level of translation in the ID map. This is only the case if system
- * RAM is out of range for the currently configured page size and number
- * of translation levels, in which case we will also need the extra
- * level for the HYP ID map, or we won't be able to enable the EL2 MMU.
+ * The ID map may be configured to use an extended virtual address
+ * range. This is only the case if system RAM is out of range for the
+ * currently configured page size and VA_BITS, in which case we will
+ * also need the extended virtual range for the HYP ID map, or we won't
+ * be able to enable the EL2 MMU.
*
* However, at EL2, there is only one TTBR register, and we can't switch
* between translation tables *and* update TCR_EL2.T0SZ at the same
- * time. Bottom line: we need the extra level in *both* our translation
- * tables.
+ * time. Bottom line: we need to use the extended range with *both* our
+ * translation tables.
*
* So use the same T0SZ value we use for the ID map.
*/
ldr_l x5, idmap_t0sz
bfi x4, x5, TCR_T0SZ_OFFSET, TCR_TxSZ_WIDTH
-#endif
+
/*
- * Read the PARange bits from ID_AA64MMFR0_EL1 and set the PS bits in
- * TCR_EL2.
+ * Set the PS bits in TCR_EL2.
*/
- mrs x5, ID_AA64MMFR0_EL1
- bfi x4, x5, #16, #3
+ tcr_compute_pa_size x4, #TCR_EL2_PS_SHIFT, x5, x6
msr tcr_el2, x4
@@ -122,6 +120,10 @@
kern_hyp_va x2
msr vbar_el2, x2
+ /* copy tpidr_el1 into tpidr_el2 for use by HYP */
+ mrs x1, tpidr_el1
+ msr tpidr_el2, x1
+
/* Hello, World! */
eret
ENDPROC(__kvm_hyp_init)
diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S
index 12ee62d..fdd1068 100644
--- a/arch/arm64/kvm/hyp/entry.S
+++ b/arch/arm64/kvm/hyp/entry.S
@@ -62,8 +62,8 @@
// Store the host regs
save_callee_saved_regs x1
- // Store the host_ctxt for use at exit time
- str x1, [sp, #-16]!
+ // Store host_ctxt and vcpu for use at exit time
+ stp x1, x0, [sp, #-16]!
add x18, x0, #VCPU_CONTEXT
@@ -124,6 +124,17 @@
// Now restore the host regs
restore_callee_saved_regs x2
+alternative_if ARM64_HAS_RAS_EXTN
+ // If we have the RAS extensions we can consume a pending error
+ // without an unmask-SError and isb.
+ esb
+ mrs_s x2, SYS_DISR_EL1
+ str x2, [x1, #(VCPU_FAULT_DISR - VCPU_CONTEXT)]
+ cbz x2, 1f
+ msr_s SYS_DISR_EL1, xzr
+ orr x0, x0, #(1<<ARM_EXIT_WITH_SERROR_BIT)
+1: ret
+alternative_else
// If we have a pending asynchronous abort, now is the
// time to find out. From your VAXorcist book, page 666:
// "Threaten me not, oh Evil one! For I speak with
@@ -134,7 +145,9 @@
mov x5, x0
dsb sy // Synchronize against in-flight ld/st
+ nop
msr daifclr, #4 // Unmask aborts
+alternative_endif
// This is our single instruction exception window. A pending
// SError is guaranteed to occur at the earliest when we unmask
@@ -159,6 +172,10 @@
ENDPROC(__guest_exit)
ENTRY(__fpsimd_guest_restore)
+ // x0: esr
+ // x1: vcpu
+ // x2-x29,lr: vcpu regs
+ // vcpu x0-x1 on the stack
stp x2, x3, [sp, #-16]!
stp x4, lr, [sp, #-16]!
@@ -173,7 +190,7 @@
alternative_endif
isb
- mrs x3, tpidr_el2
+ mov x3, x1
ldr x0, [x3, #VCPU_HOST_CONTEXT]
kern_hyp_va x0
@@ -196,3 +213,15 @@
eret
ENDPROC(__fpsimd_guest_restore)
+
+ENTRY(__qcom_hyp_sanitize_btac_predictors)
+ /**
+ * Call SMC64 with Silicon provider serviceID 23<<8 (0xc2001700)
+ * 0xC2000000-0xC200FFFF: assigned to SiP Service Calls
+ * b15-b0: contains SiP functionID
+ */
+ movz x0, #0x1700
+ movk x0, #0xc200, lsl #16
+ smc #0
+ ret
+ENDPROC(__qcom_hyp_sanitize_btac_predictors)
diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S
index 5170ce1..e4f37b9 100644
--- a/arch/arm64/kvm/hyp/hyp-entry.S
+++ b/arch/arm64/kvm/hyp/hyp-entry.S
@@ -104,6 +104,7 @@
/*
* x0: ESR_EC
*/
+ ldr x1, [sp, #16 + 8] // vcpu stored by __guest_enter
/*
* We trap the first access to the FP/SIMD to save the host context
@@ -116,19 +117,18 @@
b.eq __fpsimd_guest_restore
alternative_else_nop_endif
- mrs x1, tpidr_el2
mov x0, #ARM_EXCEPTION_TRAP
b __guest_exit
el1_irq:
stp x0, x1, [sp, #-16]!
- mrs x1, tpidr_el2
+ ldr x1, [sp, #16 + 8]
mov x0, #ARM_EXCEPTION_IRQ
b __guest_exit
el1_error:
stp x0, x1, [sp, #-16]!
- mrs x1, tpidr_el2
+ ldr x1, [sp, #16 + 8]
mov x0, #ARM_EXCEPTION_EL1_SERROR
b __guest_exit
@@ -163,6 +163,18 @@
eret
ENDPROC(__hyp_do_panic)
+ENTRY(__hyp_panic)
+ /*
+ * '=kvm_host_cpu_state' is a host VA from the constant pool, it may
+ * not be accessible by this address from EL2, hyp_panic() converts
+ * it with kern_hyp_va() before use.
+ */
+ ldr x0, =kvm_host_cpu_state
+ mrs x1, tpidr_el2
+ add x0, x0, x1
+ b hyp_panic
+ENDPROC(__hyp_panic)
+
.macro invalid_vector label, target = __hyp_panic
.align 2
\label:
diff --git a/arch/arm64/kvm/hyp/s2-setup.c b/arch/arm64/kvm/hyp/s2-setup.c
index a81f5e1..603e1ee 100644
--- a/arch/arm64/kvm/hyp/s2-setup.c
+++ b/arch/arm64/kvm/hyp/s2-setup.c
@@ -32,6 +32,8 @@ u32 __hyp_text __init_stage2_translation(void)
* PS is only 3. Fortunately, bit 19 is RES0 in VTCR_EL2...
*/
parange = read_sysreg(id_aa64mmfr0_el1) & 7;
+ if (parange > ID_AA64MMFR0_PARANGE_MAX)
+ parange = ID_AA64MMFR0_PARANGE_MAX;
val |= parange << 16;
/* Compute the actual PARange... */
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index f7c651f..036e1f3 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -17,6 +17,7 @@
#include <linux/types.h>
#include <linux/jump_label.h>
+#include <uapi/linux/psci.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_emulate.h>
@@ -52,7 +53,7 @@ static void __hyp_text __activate_traps_vhe(void)
val &= ~(CPACR_EL1_FPEN | CPACR_EL1_ZEN);
write_sysreg(val, cpacr_el1);
- write_sysreg(__kvm_hyp_vector, vbar_el1);
+ write_sysreg(kvm_get_hyp_vector(), vbar_el1);
}
static void __hyp_text __activate_traps_nvhe(void)
@@ -93,6 +94,9 @@ static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu)
write_sysreg(val, hcr_el2);
+ if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN) && (val & HCR_VSE))
+ write_sysreg_s(vcpu->arch.vsesr_el2, SYS_VSESR_EL2);
+
/* Trap on AArch32 cp15 c15 accesses (EL1 or EL0) */
write_sysreg(1 << 15, hstr_el2);
/*
@@ -235,11 +239,12 @@ static bool __hyp_text __translate_far_to_hpfar(u64 far, u64 *hpfar)
static bool __hyp_text __populate_fault_info(struct kvm_vcpu *vcpu)
{
- u64 esr = read_sysreg_el2(esr);
- u8 ec = ESR_ELx_EC(esr);
+ u8 ec;
+ u64 esr;
u64 hpfar, far;
- vcpu->arch.fault.esr_el2 = esr;
+ esr = vcpu->arch.fault.esr_el2;
+ ec = ESR_ELx_EC(esr);
if (ec != ESR_ELx_EC_DABT_LOW && ec != ESR_ELx_EC_IABT_LOW)
return true;
@@ -305,9 +310,9 @@ int __hyp_text __kvm_vcpu_run(struct kvm_vcpu *vcpu)
u64 exit_code;
vcpu = kern_hyp_va(vcpu);
- write_sysreg(vcpu, tpidr_el2);
host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
+ host_ctxt->__hyp_running_vcpu = vcpu;
guest_ctxt = &vcpu->arch.ctxt;
__sysreg_save_host_state(host_ctxt);
@@ -332,6 +337,8 @@ int __hyp_text __kvm_vcpu_run(struct kvm_vcpu *vcpu)
exit_code = __guest_enter(vcpu, host_ctxt);
/* And we're baaack! */
+ if (ARM_EXCEPTION_CODE(exit_code) != ARM_EXCEPTION_IRQ)
+ vcpu->arch.fault.esr_el2 = read_sysreg_el2(esr);
/*
* We're using the raw exception code in order to only process
* the trap if no SError is pending. We will come back to the
@@ -341,6 +348,18 @@ int __hyp_text __kvm_vcpu_run(struct kvm_vcpu *vcpu)
if (exit_code == ARM_EXCEPTION_TRAP && !__populate_fault_info(vcpu))
goto again;
+ if (exit_code == ARM_EXCEPTION_TRAP &&
+ (kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_HVC64 ||
+ kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_HVC32) &&
+ vcpu_get_reg(vcpu, 0) == PSCI_0_2_FN_PSCI_VERSION) {
+ u64 val = PSCI_RET_NOT_SUPPORTED;
+ if (test_bit(KVM_ARM_VCPU_PSCI_0_2, vcpu->arch.features))
+ val = 2;
+
+ vcpu_set_reg(vcpu, 0, val);
+ goto again;
+ }
+
if (static_branch_unlikely(&vgic_v2_cpuif_trap) &&
exit_code == ARM_EXCEPTION_TRAP) {
bool valid;
@@ -393,6 +412,14 @@ int __hyp_text __kvm_vcpu_run(struct kvm_vcpu *vcpu)
/* 0 falls through to be handled out of EL2 */
}
+ if (cpus_have_const_cap(ARM64_HARDEN_BP_POST_GUEST_EXIT)) {
+ u32 midr = read_cpuid_id();
+
+ /* Apply BTAC predictors mitigation to all Falkor chips */
+ if ((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR_V1)
+ __qcom_hyp_sanitize_btac_predictors();
+ }
+
fp_enabled = __fpsimd_enabled();
__sysreg_save_guest_state(guest_ctxt);
@@ -422,7 +449,8 @@ int __hyp_text __kvm_vcpu_run(struct kvm_vcpu *vcpu)
static const char __hyp_panic_string[] = "HYP panic:\nPS:%08llx PC:%016llx ESR:%08llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%p\n";
-static void __hyp_text __hyp_call_panic_nvhe(u64 spsr, u64 elr, u64 par)
+static void __hyp_text __hyp_call_panic_nvhe(u64 spsr, u64 elr, u64 par,
+ struct kvm_vcpu *vcpu)
{
unsigned long str_va;
@@ -436,35 +464,35 @@ static void __hyp_text __hyp_call_panic_nvhe(u64 spsr, u64 elr, u64 par)
__hyp_do_panic(str_va,
spsr, elr,
read_sysreg(esr_el2), read_sysreg_el2(far),
- read_sysreg(hpfar_el2), par,
- (void *)read_sysreg(tpidr_el2));
+ read_sysreg(hpfar_el2), par, vcpu);
}
-static void __hyp_text __hyp_call_panic_vhe(u64 spsr, u64 elr, u64 par)
+static void __hyp_text __hyp_call_panic_vhe(u64 spsr, u64 elr, u64 par,
+ struct kvm_vcpu *vcpu)
{
panic(__hyp_panic_string,
spsr, elr,
read_sysreg_el2(esr), read_sysreg_el2(far),
- read_sysreg(hpfar_el2), par,
- (void *)read_sysreg(tpidr_el2));
+ read_sysreg(hpfar_el2), par, vcpu);
}
static hyp_alternate_select(__hyp_call_panic,
__hyp_call_panic_nvhe, __hyp_call_panic_vhe,
ARM64_HAS_VIRT_HOST_EXTN);
-void __hyp_text __noreturn __hyp_panic(void)
+void __hyp_text __noreturn hyp_panic(struct kvm_cpu_context *__host_ctxt)
{
+ struct kvm_vcpu *vcpu = NULL;
+
u64 spsr = read_sysreg_el2(spsr);
u64 elr = read_sysreg_el2(elr);
u64 par = read_sysreg(par_el1);
if (read_sysreg(vttbr_el2)) {
- struct kvm_vcpu *vcpu;
struct kvm_cpu_context *host_ctxt;
- vcpu = (struct kvm_vcpu *)read_sysreg(tpidr_el2);
- host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
+ host_ctxt = kern_hyp_va(__host_ctxt);
+ vcpu = host_ctxt->__hyp_running_vcpu;
__timer_disable_traps(vcpu);
__deactivate_traps(vcpu);
__deactivate_vm(vcpu);
@@ -472,7 +500,7 @@ void __hyp_text __noreturn __hyp_panic(void)
}
/* Call panic for real */
- __hyp_call_panic()(spsr, elr, par);
+ __hyp_call_panic()(spsr, elr, par, vcpu);
unreachable();
}
diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c
index 9341376..2c17afd 100644
--- a/arch/arm64/kvm/hyp/sysreg-sr.c
+++ b/arch/arm64/kvm/hyp/sysreg-sr.c
@@ -27,8 +27,8 @@ static void __hyp_text __sysreg_do_nothing(struct kvm_cpu_context *ctxt) { }
/*
* Non-VHE: Both host and guest must save everything.
*
- * VHE: Host must save tpidr*_el[01], actlr_el1, mdscr_el1, sp0, pc,
- * pstate, and guest must save everything.
+ * VHE: Host must save tpidr*_el0, actlr_el1, mdscr_el1, sp_el0,
+ * and guest must save everything.
*/
static void __hyp_text __sysreg_save_common_state(struct kvm_cpu_context *ctxt)
@@ -36,11 +36,8 @@ static void __hyp_text __sysreg_save_common_state(struct kvm_cpu_context *ctxt)
ctxt->sys_regs[ACTLR_EL1] = read_sysreg(actlr_el1);
ctxt->sys_regs[TPIDR_EL0] = read_sysreg(tpidr_el0);
ctxt->sys_regs[TPIDRRO_EL0] = read_sysreg(tpidrro_el0);
- ctxt->sys_regs[TPIDR_EL1] = read_sysreg(tpidr_el1);
ctxt->sys_regs[MDSCR_EL1] = read_sysreg(mdscr_el1);
ctxt->gp_regs.regs.sp = read_sysreg(sp_el0);
- ctxt->gp_regs.regs.pc = read_sysreg_el2(elr);
- ctxt->gp_regs.regs.pstate = read_sysreg_el2(spsr);
}
static void __hyp_text __sysreg_save_state(struct kvm_cpu_context *ctxt)
@@ -62,10 +59,16 @@ static void __hyp_text __sysreg_save_state(struct kvm_cpu_context *ctxt)
ctxt->sys_regs[AMAIR_EL1] = read_sysreg_el1(amair);
ctxt->sys_regs[CNTKCTL_EL1] = read_sysreg_el1(cntkctl);
ctxt->sys_regs[PAR_EL1] = read_sysreg(par_el1);
+ ctxt->sys_regs[TPIDR_EL1] = read_sysreg(tpidr_el1);
ctxt->gp_regs.sp_el1 = read_sysreg(sp_el1);
ctxt->gp_regs.elr_el1 = read_sysreg_el1(elr);
ctxt->gp_regs.spsr[KVM_SPSR_EL1]= read_sysreg_el1(spsr);
+ ctxt->gp_regs.regs.pc = read_sysreg_el2(elr);
+ ctxt->gp_regs.regs.pstate = read_sysreg_el2(spsr);
+
+ if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN))
+ ctxt->sys_regs[DISR_EL1] = read_sysreg_s(SYS_VDISR_EL2);
}
static hyp_alternate_select(__sysreg_call_save_host_state,
@@ -89,11 +92,8 @@ static void __hyp_text __sysreg_restore_common_state(struct kvm_cpu_context *ctx
write_sysreg(ctxt->sys_regs[ACTLR_EL1], actlr_el1);
write_sysreg(ctxt->sys_regs[TPIDR_EL0], tpidr_el0);
write_sysreg(ctxt->sys_regs[TPIDRRO_EL0], tpidrro_el0);
- write_sysreg(ctxt->sys_regs[TPIDR_EL1], tpidr_el1);
write_sysreg(ctxt->sys_regs[MDSCR_EL1], mdscr_el1);
write_sysreg(ctxt->gp_regs.regs.sp, sp_el0);
- write_sysreg_el2(ctxt->gp_regs.regs.pc, elr);
- write_sysreg_el2(ctxt->gp_regs.regs.pstate, spsr);
}
static void __hyp_text __sysreg_restore_state(struct kvm_cpu_context *ctxt)
@@ -115,10 +115,16 @@ static void __hyp_text __sysreg_restore_state(struct kvm_cpu_context *ctxt)
write_sysreg_el1(ctxt->sys_regs[AMAIR_EL1], amair);
write_sysreg_el1(ctxt->sys_regs[CNTKCTL_EL1], cntkctl);
write_sysreg(ctxt->sys_regs[PAR_EL1], par_el1);
+ write_sysreg(ctxt->sys_regs[TPIDR_EL1], tpidr_el1);
write_sysreg(ctxt->gp_regs.sp_el1, sp_el1);
write_sysreg_el1(ctxt->gp_regs.elr_el1, elr);
write_sysreg_el1(ctxt->gp_regs.spsr[KVM_SPSR_EL1],spsr);
+ write_sysreg_el2(ctxt->gp_regs.regs.pc, elr);
+ write_sysreg_el2(ctxt->gp_regs.regs.pstate, spsr);
+
+ if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN))
+ write_sysreg_s(ctxt->sys_regs[DISR_EL1], SYS_VDISR_EL2);
}
static hyp_alternate_select(__sysreg_call_restore_host_state,
diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c
index 8ecbcb4..60666a0 100644
--- a/arch/arm64/kvm/inject_fault.c
+++ b/arch/arm64/kvm/inject_fault.c
@@ -164,14 +164,25 @@ void kvm_inject_undefined(struct kvm_vcpu *vcpu)
inject_undef64(vcpu);
}
+static void pend_guest_serror(struct kvm_vcpu *vcpu, u64 esr)
+{
+ vcpu_set_vsesr(vcpu, esr);
+ vcpu_set_hcr(vcpu, vcpu_get_hcr(vcpu) | HCR_VSE);
+}
+
/**
* kvm_inject_vabt - inject an async abort / SError into the guest
* @vcpu: The VCPU to receive the exception
*
* It is assumed that this code is called from the VCPU thread and that the
* VCPU therefore is not currently executing guest code.
+ *
+ * Systems with the RAS Extensions specify an imp-def ESR (ISV/IDS = 1) with
+ * the remaining ISS all-zeros so that this error is not interpreted as an
+ * uncategorized RAS error. Without the RAS Extensions we can't specify an ESR
+ * value, so the CPU generates an imp-def value.
*/
void kvm_inject_vabt(struct kvm_vcpu *vcpu)
{
- vcpu_set_hcr(vcpu, vcpu_get_hcr(vcpu) | HCR_VSE);
+ pend_guest_serror(vcpu, ESR_ELx_ISV);
}
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 1830ebc..50a43c7 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1159,6 +1159,16 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_AFSR0_EL1), access_vm_reg, reset_unknown, AFSR0_EL1 },
{ SYS_DESC(SYS_AFSR1_EL1), access_vm_reg, reset_unknown, AFSR1_EL1 },
{ SYS_DESC(SYS_ESR_EL1), access_vm_reg, reset_unknown, ESR_EL1 },
+
+ { SYS_DESC(SYS_ERRIDR_EL1), trap_raz_wi },
+ { SYS_DESC(SYS_ERRSELR_EL1), trap_raz_wi },
+ { SYS_DESC(SYS_ERXFR_EL1), trap_raz_wi },
+ { SYS_DESC(SYS_ERXCTLR_EL1), trap_raz_wi },
+ { SYS_DESC(SYS_ERXSTATUS_EL1), trap_raz_wi },
+ { SYS_DESC(SYS_ERXADDR_EL1), trap_raz_wi },
+ { SYS_DESC(SYS_ERXMISC0_EL1), trap_raz_wi },
+ { SYS_DESC(SYS_ERXMISC1_EL1), trap_raz_wi },
+
{ SYS_DESC(SYS_FAR_EL1), access_vm_reg, reset_unknown, FAR_EL1 },
{ SYS_DESC(SYS_PAR_EL1), NULL, reset_unknown, PAR_EL1 },
@@ -1169,6 +1179,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_AMAIR_EL1), access_vm_reg, reset_amair_el1, AMAIR_EL1 },
{ SYS_DESC(SYS_VBAR_EL1), NULL, reset_val, VBAR_EL1, 0 },
+ { SYS_DESC(SYS_DISR_EL1), NULL, reset_val, DISR_EL1, 0 },
{ SYS_DESC(SYS_ICC_IAR0_EL1), write_to_read_only },
{ SYS_DESC(SYS_ICC_EOIR0_EL1), read_from_write_only },
diff --git a/arch/arm64/lib/clear_user.S b/arch/arm64/lib/clear_user.S
index e88fb99..3d69a8d 100644
--- a/arch/arm64/lib/clear_user.S
+++ b/arch/arm64/lib/clear_user.S
@@ -30,7 +30,7 @@
* Alignment fixed up by hardware.
*/
ENTRY(__clear_user)
- uaccess_enable_not_uao x2, x3
+ uaccess_enable_not_uao x2, x3, x4
mov x2, x1 // save the size for fixup return
subs x1, x1, #8
b.mi 2f
@@ -50,7 +50,7 @@
b.mi 5f
uao_user_alternative 9f, strb, sttrb, wzr, x0, 0
5: mov x0, #0
- uaccess_disable_not_uao x2
+ uaccess_disable_not_uao x2, x3
ret
ENDPROC(__clear_user)
diff --git a/arch/arm64/lib/copy_from_user.S b/arch/arm64/lib/copy_from_user.S
index 4b5d826..20305d4 100644
--- a/arch/arm64/lib/copy_from_user.S
+++ b/arch/arm64/lib/copy_from_user.S
@@ -64,10 +64,10 @@
end .req x5
ENTRY(__arch_copy_from_user)
- uaccess_enable_not_uao x3, x4
+ uaccess_enable_not_uao x3, x4, x5
add end, x0, x2
#include "copy_template.S"
- uaccess_disable_not_uao x3
+ uaccess_disable_not_uao x3, x4
mov x0, #0 // Nothing to copy
ret
ENDPROC(__arch_copy_from_user)
diff --git a/arch/arm64/lib/copy_in_user.S b/arch/arm64/lib/copy_in_user.S
index b24a830..fbb090f 100644
--- a/arch/arm64/lib/copy_in_user.S
+++ b/arch/arm64/lib/copy_in_user.S
@@ -65,10 +65,10 @@
end .req x5
ENTRY(raw_copy_in_user)
- uaccess_enable_not_uao x3, x4
+ uaccess_enable_not_uao x3, x4, x5
add end, x0, x2
#include "copy_template.S"
- uaccess_disable_not_uao x3
+ uaccess_disable_not_uao x3, x4
mov x0, #0
ret
ENDPROC(raw_copy_in_user)
diff --git a/arch/arm64/lib/copy_to_user.S b/arch/arm64/lib/copy_to_user.S
index 351f076..fda6172 100644
--- a/arch/arm64/lib/copy_to_user.S
+++ b/arch/arm64/lib/copy_to_user.S
@@ -63,10 +63,10 @@
end .req x5
ENTRY(__arch_copy_to_user)
- uaccess_enable_not_uao x3, x4
+ uaccess_enable_not_uao x3, x4, x5
add end, x0, x2
#include "copy_template.S"
- uaccess_disable_not_uao x3
+ uaccess_disable_not_uao x3, x4
mov x0, #0
ret
ENDPROC(__arch_copy_to_user)
diff --git a/arch/arm64/lib/tishift.S b/arch/arm64/lib/tishift.S
index 0179a43..d3db9b2 100644
--- a/arch/arm64/lib/tishift.S
+++ b/arch/arm64/lib/tishift.S
@@ -38,19 +38,19 @@
ENDPROC(__ashlti3)
ENTRY(__ashrti3)
- cbz x2, 3f
+ cbz x2, 1f
mov x3, #64
sub x3, x3, x2
cmp x3, #0
- b.le 4f
+ b.le 2f
lsr x0, x0, x2
lsl x3, x1, x3
asr x2, x1, x2
orr x0, x0, x3
mov x1, x2
-3:
+1:
ret
-4:
+2:
neg w0, w3
asr x2, x1, #63
asr x0, x1, x0
diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
index 7f1dbe9..91464e7 100644
--- a/arch/arm64/mm/cache.S
+++ b/arch/arm64/mm/cache.S
@@ -49,7 +49,7 @@
* - end - virtual end address of region
*/
ENTRY(__flush_cache_user_range)
- uaccess_ttbr0_enable x2, x3
+ uaccess_ttbr0_enable x2, x3, x4
dcache_line_size x2, x3
sub x3, x2, #1
bic x4, x0, x3
@@ -72,7 +72,7 @@
isb
mov x0, #0
1:
- uaccess_ttbr0_disable x1
+ uaccess_ttbr0_disable x1, x2
ret
9:
mov x0, #-EFAULT
diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index 6f401704..301417a 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -39,7 +39,16 @@ static cpumask_t tlb_flush_pending;
#define ASID_MASK (~GENMASK(asid_bits - 1, 0))
#define ASID_FIRST_VERSION (1UL << asid_bits)
-#define NUM_USER_ASIDS ASID_FIRST_VERSION
+
+#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
+#define NUM_USER_ASIDS (ASID_FIRST_VERSION >> 1)
+#define asid2idx(asid) (((asid) & ~ASID_MASK) >> 1)
+#define idx2asid(idx) (((idx) << 1) & ~ASID_MASK)
+#else
+#define NUM_USER_ASIDS (ASID_FIRST_VERSION)
+#define asid2idx(asid) ((asid) & ~ASID_MASK)
+#define idx2asid(idx) asid2idx(idx)
+#endif
/* Get the ASIDBits supported by the current CPU */
static u32 get_cpu_asid_bits(void)
@@ -79,13 +88,6 @@ void verify_cpu_asid_bits(void)
}
}
-static void set_reserved_asid_bits(void)
-{
- if (IS_ENABLED(CONFIG_QCOM_FALKOR_ERRATUM_1003) &&
- cpus_have_const_cap(ARM64_WORKAROUND_QCOM_FALKOR_E1003))
- __set_bit(FALKOR_RESERVED_ASID, asid_map);
-}
-
static void flush_context(unsigned int cpu)
{
int i;
@@ -94,8 +96,6 @@ static void flush_context(unsigned int cpu)
/* Update the list of reserved ASIDs and the ASID bitmap. */
bitmap_clear(asid_map, 0, NUM_USER_ASIDS);
- set_reserved_asid_bits();
-
for_each_possible_cpu(i) {
asid = atomic64_xchg_relaxed(&per_cpu(active_asids, i), 0);
/*
@@ -107,7 +107,7 @@ static void flush_context(unsigned int cpu)
*/
if (asid == 0)
asid = per_cpu(reserved_asids, i);
- __set_bit(asid & ~ASID_MASK, asid_map);
+ __set_bit(asid2idx(asid), asid_map);
per_cpu(reserved_asids, i) = asid;
}
@@ -162,16 +162,16 @@ static u64 new_context(struct mm_struct *mm, unsigned int cpu)
* We had a valid ASID in a previous life, so try to re-use
* it if possible.
*/
- asid &= ~ASID_MASK;
- if (!__test_and_set_bit(asid, asid_map))
+ if (!__test_and_set_bit(asid2idx(asid), asid_map))
return newasid;
}
/*
* Allocate a free ASID. If we can't find one, take a note of the
- * currently active ASIDs and mark the TLBs as requiring flushes.
- * We always count from ASID #1, as we use ASID #0 when setting a
- * reserved TTBR0 for the init_mm.
+ * currently active ASIDs and mark the TLBs as requiring flushes. We
+ * always count from ASID #2 (index 1), as we use ASID #0 when setting
+ * a reserved TTBR0 for the init_mm and we allocate ASIDs in even/odd
+ * pairs.
*/
asid = find_next_zero_bit(asid_map, NUM_USER_ASIDS, cur_idx);
if (asid != NUM_USER_ASIDS)
@@ -188,32 +188,35 @@ static u64 new_context(struct mm_struct *mm, unsigned int cpu)
set_asid:
__set_bit(asid, asid_map);
cur_idx = asid;
- return asid | generation;
+ return idx2asid(asid) | generation;
}
void check_and_switch_context(struct mm_struct *mm, unsigned int cpu)
{
unsigned long flags;
- u64 asid;
+ u64 asid, old_active_asid;
asid = atomic64_read(&mm->context.id);
/*
* The memory ordering here is subtle.
- * If our ASID matches the current generation, then we update
- * our active_asids entry with a relaxed xchg. Racing with a
- * concurrent rollover means that either:
+ * If our active_asids is non-zero and the ASID matches the current
+ * generation, then we update the active_asids entry with a relaxed
+ * cmpxchg. Racing with a concurrent rollover means that either:
*
- * - We get a zero back from the xchg and end up waiting on the
+ * - We get a zero back from the cmpxchg and end up waiting on the
* lock. Taking the lock synchronises with the rollover and so
* we are forced to see the updated generation.
*
- * - We get a valid ASID back from the xchg, which means the
+ * - We get a valid ASID back from the cmpxchg, which means the
* relaxed xchg in flush_context will treat us as reserved
* because atomic RmWs are totally ordered for a given location.
*/
- if (!((asid ^ atomic64_read(&asid_generation)) >> asid_bits)
- && atomic64_xchg_relaxed(&per_cpu(active_asids, cpu), asid))
+ old_active_asid = atomic64_read(&per_cpu(active_asids, cpu));
+ if (old_active_asid &&
+ !((asid ^ atomic64_read(&asid_generation)) >> asid_bits) &&
+ atomic64_cmpxchg_relaxed(&per_cpu(active_asids, cpu),
+ old_active_asid, asid))
goto switch_mm_fastpath;
raw_spin_lock_irqsave(&cpu_asid_lock, flags);
@@ -231,6 +234,9 @@ void check_and_switch_context(struct mm_struct *mm, unsigned int cpu)
raw_spin_unlock_irqrestore(&cpu_asid_lock, flags);
switch_mm_fastpath:
+
+ arm64_apply_bp_hardening();
+
/*
* Defer TTBR0_EL1 setting for user threads to uaccess_enable() when
* emulating PAN.
@@ -239,6 +245,15 @@ void check_and_switch_context(struct mm_struct *mm, unsigned int cpu)
cpu_switch_mm(mm->pgd, mm);
}
+/* Errata workaround post TTBRx_EL1 update. */
+asmlinkage void post_ttbr_update_workaround(void)
+{
+ asm(ALTERNATIVE("nop; nop; nop",
+ "ic iallu; dsb nsh; isb",
+ ARM64_WORKAROUND_CAVIUM_27456,
+ CONFIG_CAVIUM_ERRATUM_27456));
+}
+
static int asids_init(void)
{
asid_bits = get_cpu_asid_bits();
@@ -254,8 +269,6 @@ static int asids_init(void)
panic("Failed to allocate bitmap for %lu ASIDs\n",
NUM_USER_ASIDS);
- set_reserved_asid_bits();
-
pr_info("ASID allocator initialised with %lu entries\n", NUM_USER_ASIDS);
return 0;
}
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 9b7f89d..6c30cf9 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -707,6 +707,23 @@ asmlinkage void __exception do_mem_abort(unsigned long addr, unsigned int esr,
arm64_notify_die("", regs, &info, esr);
}
+asmlinkage void __exception do_el0_ia_bp_hardening(unsigned long addr,
+ unsigned int esr,
+ struct pt_regs *regs)
+{
+ /*
+ * We've taken an instruction abort from userspace and not yet
+ * re-enabled IRQs. If the address is a kernel address, apply
+ * BP hardening prior to enabling IRQs and pre-emption.
+ */
+ if (addr > TASK_SIZE)
+ arm64_apply_bp_hardening();
+
+ local_irq_enable();
+ do_mem_abort(addr, esr, regs);
+}
+
+
asmlinkage void __exception do_sp_pc_abort(unsigned long addr,
unsigned int esr,
struct pt_regs *regs)
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 00e7b90..c903f7c 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -366,6 +366,9 @@ void __init arm64_memblock_init(void)
/* Handle linux,usable-memory-range property */
fdt_enforce_memory_region();
+ /* Remove memory above our supported physical address size */
+ memblock_remove(1ULL << PHYS_MASK_SHIFT, ULLONG_MAX);
+
/*
* Ensure that the linear region takes up exactly half of the kernel
* virtual address space. This way, we can distinguish a linear address
@@ -600,49 +603,6 @@ void __init mem_init(void)
mem_init_print_info(NULL);
-#define MLK(b, t) b, t, ((t) - (b)) >> 10
-#define MLM(b, t) b, t, ((t) - (b)) >> 20
-#define MLG(b, t) b, t, ((t) - (b)) >> 30
-#define MLK_ROUNDUP(b, t) b, t, DIV_ROUND_UP(((t) - (b)), SZ_1K)
-
- pr_notice("Virtual kernel memory layout:\n");
-#ifdef CONFIG_KASAN
- pr_notice(" kasan : 0x%16lx - 0x%16lx (%6ld GB)\n",
- MLG(KASAN_SHADOW_START, KASAN_SHADOW_END));
-#endif
- pr_notice(" modules : 0x%16lx - 0x%16lx (%6ld MB)\n",
- MLM(MODULES_VADDR, MODULES_END));
- pr_notice(" vmalloc : 0x%16lx - 0x%16lx (%6ld GB)\n",
- MLG(VMALLOC_START, VMALLOC_END));
- pr_notice(" .text : 0x%p" " - 0x%p" " (%6ld KB)\n",
- MLK_ROUNDUP(_text, _etext));
- pr_notice(" .rodata : 0x%p" " - 0x%p" " (%6ld KB)\n",
- MLK_ROUNDUP(__start_rodata, __init_begin));
- pr_notice(" .init : 0x%p" " - 0x%p" " (%6ld KB)\n",
- MLK_ROUNDUP(__init_begin, __init_end));
- pr_notice(" .data : 0x%p" " - 0x%p" " (%6ld KB)\n",
- MLK_ROUNDUP(_sdata, _edata));
- pr_notice(" .bss : 0x%p" " - 0x%p" " (%6ld KB)\n",
- MLK_ROUNDUP(__bss_start, __bss_stop));
- pr_notice(" fixed : 0x%16lx - 0x%16lx (%6ld KB)\n",
- MLK(FIXADDR_START, FIXADDR_TOP));
- pr_notice(" PCI I/O : 0x%16lx - 0x%16lx (%6ld MB)\n",
- MLM(PCI_IO_START, PCI_IO_END));
-#ifdef CONFIG_SPARSEMEM_VMEMMAP
- pr_notice(" vmemmap : 0x%16lx - 0x%16lx (%6ld GB maximum)\n",
- MLG(VMEMMAP_START, VMEMMAP_START + VMEMMAP_SIZE));
- pr_notice(" 0x%16lx - 0x%16lx (%6ld MB actual)\n",
- MLM((unsigned long)phys_to_page(memblock_start_of_DRAM()),
- (unsigned long)virt_to_page(high_memory)));
-#endif
- pr_notice(" memory : 0x%16lx - 0x%16lx (%6ld MB)\n",
- MLM(__phys_to_virt(memblock_start_of_DRAM()),
- (unsigned long)high_memory));
-
-#undef MLK
-#undef MLM
-#undef MLK_ROUNDUP
-
/*
* Check boundaries twice: Some fundamental inconsistencies can be
* detected at build time already.
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 267d2b7..b44992e 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -50,6 +50,7 @@
#define NO_CONT_MAPPINGS BIT(1)
u64 idmap_t0sz = TCR_T0SZ(VA_BITS);
+u64 idmap_ptrs_per_pgd = PTRS_PER_PGD;
u64 kimage_voffset __ro_after_init;
EXPORT_SYMBOL(kimage_voffset);
@@ -525,6 +526,35 @@ static int __init parse_rodata(char *arg)
}
early_param("rodata", parse_rodata);
+#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
+static int __init map_entry_trampoline(void)
+{
+ pgprot_t prot = rodata_enabled ? PAGE_KERNEL_ROX : PAGE_KERNEL_EXEC;
+ phys_addr_t pa_start = __pa_symbol(__entry_tramp_text_start);
+
+ /* The trampoline is always mapped and can therefore be global */
+ pgprot_val(prot) &= ~PTE_NG;
+
+ /* Map only the text into the trampoline page table */
+ memset(tramp_pg_dir, 0, PGD_SIZE);
+ __create_pgd_mapping(tramp_pg_dir, pa_start, TRAMP_VALIAS, PAGE_SIZE,
+ prot, pgd_pgtable_alloc, 0);
+
+ /* Map both the text and data into the kernel page table */
+ __set_fixmap(FIX_ENTRY_TRAMP_TEXT, pa_start, prot);
+ if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) {
+ extern char __entry_tramp_data_start[];
+
+ __set_fixmap(FIX_ENTRY_TRAMP_DATA,
+ __pa_symbol(__entry_tramp_data_start),
+ PAGE_KERNEL_RO);
+ }
+
+ return 0;
+}
+core_initcall(map_entry_trampoline);
+#endif
+
/*
* Create fine-grained mappings for the kernel.
*/
@@ -570,8 +600,8 @@ static void __init map_kernel(pgd_t *pgd)
* entry instead.
*/
BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES));
- set_pud(pud_set_fixmap_offset(pgd, FIXADDR_START),
- __pud(__pa_symbol(bm_pmd) | PUD_TYPE_TABLE));
+ pud_populate(&init_mm, pud_set_fixmap_offset(pgd, FIXADDR_START),
+ lm_alias(bm_pmd));
pud_clear_fixmap();
} else {
BUG();
@@ -612,7 +642,8 @@ void __init paging_init(void)
* allocated with it.
*/
memblock_free(__pa_symbol(swapper_pg_dir) + PAGE_SIZE,
- SWAPPER_DIR_SIZE - PAGE_SIZE);
+ __pa_symbol(swapper_pg_end) - __pa_symbol(swapper_pg_dir)
+ - PAGE_SIZE);
}
/*
@@ -686,7 +717,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
if (!p)
return -ENOMEM;
- set_pmd(pmd, __pmd(__pa(p) | PROT_SECT_NORMAL));
+ pmd_set_huge(pmd, __pa(p), __pgprot(PROT_SECT_NORMAL));
} else
vmemmap_verify((pte_t *)pmd, node, addr, next);
} while (addr = next, addr != end);
@@ -879,15 +910,19 @@ int __init arch_ioremap_pmd_supported(void)
int pud_set_huge(pud_t *pud, phys_addr_t phys, pgprot_t prot)
{
+ pgprot_t sect_prot = __pgprot(PUD_TYPE_SECT |
+ pgprot_val(mk_sect_prot(prot)));
BUG_ON(phys & ~PUD_MASK);
- set_pud(pud, __pud(phys | PUD_TYPE_SECT | pgprot_val(mk_sect_prot(prot))));
+ set_pud(pud, pfn_pud(__phys_to_pfn(phys), sect_prot));
return 1;
}
int pmd_set_huge(pmd_t *pmd, phys_addr_t phys, pgprot_t prot)
{
+ pgprot_t sect_prot = __pgprot(PMD_TYPE_SECT |
+ pgprot_val(mk_sect_prot(prot)));
BUG_ON(phys & ~PMD_MASK);
- set_pmd(pmd, __pmd(phys | PMD_TYPE_SECT | pgprot_val(mk_sect_prot(prot))));
+ set_pmd(pmd, pfn_pmd(__phys_to_pfn(phys), sect_prot));
return 1;
}
diff --git a/arch/arm64/mm/pgd.c b/arch/arm64/mm/pgd.c
index 051e71e..289f911 100644
--- a/arch/arm64/mm/pgd.c
+++ b/arch/arm64/mm/pgd.c
@@ -49,6 +49,14 @@ void __init pgd_cache_init(void)
if (PGD_SIZE == PAGE_SIZE)
return;
+#ifdef CONFIG_ARM64_PA_BITS_52
+ /*
+ * With 52-bit physical addresses, the architecture requires the
+ * top-level table to be aligned to at least 64 bytes.
+ */
+ BUILD_BUG_ON(PGD_SIZE < 64);
+#endif
+
/*
* Naturally aligned pgds required by the architecture.
*/
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 95233df..9f177aa 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -70,7 +70,11 @@
mrs x8, mdscr_el1
mrs x9, oslsr_el1
mrs x10, sctlr_el1
+alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
mrs x11, tpidr_el1
+alternative_else
+ mrs x11, tpidr_el2
+alternative_endif
mrs x12, sp_el0
stp x2, x3, [x0]
stp x4, xzr, [x0, #16]
@@ -116,7 +120,11 @@
msr mdscr_el1, x10
msr sctlr_el1, x12
+alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
msr tpidr_el1, x13
+alternative_else
+ msr tpidr_el2, x13
+alternative_endif
msr sp_el0, x14
/*
* Restore oslsr_el1 by writing oslar_el1
@@ -124,6 +132,11 @@
ubfx x11, x11, #1, #1
msr oslar_el1, x11
reset_pmuserenr_el0 x0 // Disable PMU access from EL0
+
+alternative_if ARM64_HAS_RAS_EXTN
+ msr_s SYS_DISR_EL1, xzr
+alternative_else_nop_endif
+
isb
ret
ENDPROC(cpu_do_resume)
@@ -138,13 +151,18 @@
* - pgd_phys - physical address of new TTB
*/
ENTRY(cpu_do_switch_mm)
- pre_ttbr0_update_workaround x0, x2, x3
+ mrs x2, ttbr1_el1
mmid x1, x1 // get mm->context.id
- bfi x0, x1, #48, #16 // set the ASID
- msr ttbr0_el1, x0 // set TTBR0
+ phys_to_ttbr x0, x3
+#ifdef CONFIG_ARM64_SW_TTBR0_PAN
+ bfi x3, x1, #48, #16 // set the ASID field in TTBR0
+#endif
+ bfi x2, x1, #48, #16 // set the ASID
+ msr ttbr1_el1, x2 // in TTBR1 (since TCR.A1 is set)
isb
- post_ttbr0_update_workaround
- ret
+ msr ttbr0_el1, x3 // now update TTBR0
+ isb
+ b post_ttbr_update_workaround // Back to C code...
ENDPROC(cpu_do_switch_mm)
.pushsection ".idmap.text", "ax"
@@ -158,14 +176,16 @@
save_and_disable_daif flags=x2
adrp x1, empty_zero_page
- msr ttbr1_el1, x1
+ phys_to_ttbr x1, x3
+ msr ttbr1_el1, x3
isb
tlbi vmalle1
dsb nsh
isb
- msr ttbr1_el1, x0
+ phys_to_ttbr x0, x3
+ msr ttbr1_el1, x3
isb
restore_daif x2
@@ -214,25 +234,19 @@
/*
* Prepare SCTLR
*/
- adr x5, crval
- ldp w5, w6, [x5]
- mrs x0, sctlr_el1
- bic x0, x0, x5 // clear bits
- orr x0, x0, x6 // set bits
+ mov_q x0, SCTLR_EL1_SET
/*
* Set/prepare TCR and TTBR. We use 512GB (39-bit) address range for
* both user and kernel.
*/
ldr x10, =TCR_TxSZ(VA_BITS) | TCR_CACHE_FLAGS | TCR_SMP_FLAGS | \
- TCR_TG_FLAGS | TCR_ASID16 | TCR_TBI0
+ TCR_TG_FLAGS | TCR_ASID16 | TCR_TBI0 | TCR_A1
tcr_set_idmap_t0sz x10, x9
/*
- * Read the PARange bits from ID_AA64MMFR0_EL1 and set the IPS bits in
- * TCR_EL1.
+ * Set the IPS bits in TCR_EL1.
*/
- mrs x9, ID_AA64MMFR0_EL1
- bfi x10, x9, #32, #3
+ tcr_compute_pa_size x10, #TCR_IPS_SHIFT, x5, x6
#ifdef CONFIG_ARM64_HW_AFDBM
/*
* Hardware update of the Access and Dirty bits.
@@ -249,21 +263,3 @@
msr tcr_el1, x10
ret // return to head.S
ENDPROC(__cpu_setup)
-
- /*
- * We set the desired value explicitly, including those of the
- * reserved bits. The values of bits EE & E0E were set early in
- * el2_setup, which are left untouched below.
- *
- * n n T
- * U E WT T UD US IHBS
- * CE0 XWHW CZ ME TEEA S
- * .... .IEE .... NEAI TE.I ..AD DEN0 ACAM
- * 0011 0... 1101 ..0. ..0. 10.. .0.. .... < hardware reserved
- * .... .1.. .... 01.1 11.1 ..01 0.01 1101 < software settings
- */
- .type crval, #object
-crval:
- .word 0xfcffffff // clear
- .word 0x34d5d91d // set
- .popsection
diff --git a/arch/arm64/xen/hypercall.S b/arch/arm64/xen/hypercall.S
index 401ceb7..c5f05c4 100644
--- a/arch/arm64/xen/hypercall.S
+++ b/arch/arm64/xen/hypercall.S
@@ -101,12 +101,12 @@
* need the explicit uaccess_enable/disable if the TTBR0 PAN emulation
* is enabled (it implies that hardware UAO and PAN disabled).
*/
- uaccess_ttbr0_enable x6, x7
+ uaccess_ttbr0_enable x6, x7, x8
hvc XEN_IMM
/*
* Disable userspace access from kernel once the hyp call completed.
*/
- uaccess_ttbr0_disable x6
+ uaccess_ttbr0_disable x6, x7
ret
ENDPROC(privcmd_call);
diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig
index fa87a055..e77f77c 100644
--- a/drivers/firmware/Kconfig
+++ b/drivers/firmware/Kconfig
@@ -48,6 +48,14 @@
This enables support for the SCPI power domains which can be
enabled or disabled via the SCP firmware
+config ARM_SDE_INTERFACE
+ bool "ARM Software Delegated Exception Interface (SDEI)"
+ depends on ARM64
+ help
+ The Software Delegated Exception Interface (SDEI) is an ARM
+ standard for registering callbacks from the platform firmware
+ into the OS. This is typically used to implement RAS notifications.
+
config EDD
tristate "BIOS Enhanced Disk Drive calls determine boot disk"
depends on X86
diff --git a/drivers/firmware/Makefile b/drivers/firmware/Makefile
index feaa890..b248238 100644
--- a/drivers/firmware/Makefile
+++ b/drivers/firmware/Makefile
@@ -6,6 +6,7 @@
obj-$(CONFIG_ARM_PSCI_CHECKER) += psci_checker.o
obj-$(CONFIG_ARM_SCPI_PROTOCOL) += arm_scpi.o
obj-$(CONFIG_ARM_SCPI_POWER_DOMAIN) += scpi_pm_domain.o
+obj-$(CONFIG_ARM_SDE_INTERFACE) += arm_sdei.o
obj-$(CONFIG_DMI) += dmi_scan.o
obj-$(CONFIG_DMI_SYSFS) += dmi-sysfs.o
obj-$(CONFIG_EDD) += edd.o
diff --git a/drivers/firmware/arm_sdei.c b/drivers/firmware/arm_sdei.c
new file mode 100644
index 0000000..1ea7164
--- /dev/null
+++ b/drivers/firmware/arm_sdei.c
@@ -0,0 +1,1092 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2017 Arm Ltd.
+#define pr_fmt(fmt) "sdei: " fmt
+
+#include <linux/acpi.h>
+#include <linux/arm_sdei.h>
+#include <linux/arm-smccc.h>
+#include <linux/atomic.h>
+#include <linux/bitops.h>
+#include <linux/compiler.h>
+#include <linux/cpuhotplug.h>
+#include <linux/cpu.h>
+#include <linux/cpu_pm.h>
+#include <linux/errno.h>
+#include <linux/hardirq.h>
+#include <linux/kernel.h>
+#include <linux/kprobes.h>
+#include <linux/kvm_host.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/notifier.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/percpu.h>
+#include <linux/platform_device.h>
+#include <linux/pm.h>
+#include <linux/ptrace.h>
+#include <linux/preempt.h>
+#include <linux/reboot.h>
+#include <linux/slab.h>
+#include <linux/smp.h>
+#include <linux/spinlock.h>
+#include <linux/uaccess.h>
+
+/*
+ * The call to use to reach the firmware.
+ */
+static asmlinkage void (*sdei_firmware_call)(unsigned long function_id,
+ unsigned long arg0, unsigned long arg1,
+ unsigned long arg2, unsigned long arg3,
+ unsigned long arg4, struct arm_smccc_res *res);
+
+/* entry point from firmware to arch asm code */
+static unsigned long sdei_entry_point;
+
+struct sdei_event {
+ /* These three are protected by the sdei_list_lock */
+ struct list_head list;
+ bool reregister;
+ bool reenable;
+
+ u32 event_num;
+ u8 type;
+ u8 priority;
+
+ /* This pointer is handed to firmware as the event argument. */
+ union {
+ /* Shared events */
+ struct sdei_registered_event *registered;
+
+ /* CPU private events */
+ struct sdei_registered_event __percpu *private_registered;
+ };
+};
+
+/* Take the mutex for any API call or modification. Take the mutex first. */
+static DEFINE_MUTEX(sdei_events_lock);
+
+/* and then hold this when modifying the list */
+static DEFINE_SPINLOCK(sdei_list_lock);
+static LIST_HEAD(sdei_list);
+
+/* Private events are registered/enabled via IPI passing one of these */
+struct sdei_crosscall_args {
+ struct sdei_event *event;
+ atomic_t errors;
+ int first_error;
+};
+
+#define CROSSCALL_INIT(arg, event) (arg.event = event, \
+ arg.first_error = 0, \
+ atomic_set(&arg.errors, 0))
+
+static inline int sdei_do_cross_call(void *fn, struct sdei_event * event)
+{
+ struct sdei_crosscall_args arg;
+
+ CROSSCALL_INIT(arg, event);
+ on_each_cpu(fn, &arg, true);
+
+ return arg.first_error;
+}
+
+static inline void
+sdei_cross_call_return(struct sdei_crosscall_args *arg, int err)
+{
+ if (err && (atomic_inc_return(&arg->errors) == 1))
+ arg->first_error = err;
+}
+
+static int sdei_to_linux_errno(unsigned long sdei_err)
+{
+ switch (sdei_err) {
+ case SDEI_NOT_SUPPORTED:
+ return -EOPNOTSUPP;
+ case SDEI_INVALID_PARAMETERS:
+ return -EINVAL;
+ case SDEI_DENIED:
+ return -EPERM;
+ case SDEI_PENDING:
+ return -EINPROGRESS;
+ case SDEI_OUT_OF_RESOURCE:
+ return -ENOMEM;
+ }
+
+ /* Not an error value ... */
+ return sdei_err;
+}
+
+/*
+ * If x0 is any of these values, then the call failed, use sdei_to_linux_errno()
+ * to translate.
+ */
+static int sdei_is_err(struct arm_smccc_res *res)
+{
+ switch (res->a0) {
+ case SDEI_NOT_SUPPORTED:
+ case SDEI_INVALID_PARAMETERS:
+ case SDEI_DENIED:
+ case SDEI_PENDING:
+ case SDEI_OUT_OF_RESOURCE:
+ return true;
+ }
+
+ return false;
+}
+
+static int invoke_sdei_fn(unsigned long function_id, unsigned long arg0,
+ unsigned long arg1, unsigned long arg2,
+ unsigned long arg3, unsigned long arg4,
+ u64 *result)
+{
+ int err = 0;
+ struct arm_smccc_res res;
+
+ if (sdei_firmware_call) {
+ sdei_firmware_call(function_id, arg0, arg1, arg2, arg3, arg4,
+ &res);
+ if (sdei_is_err(&res))
+ err = sdei_to_linux_errno(res.a0);
+ } else {
+ /*
+ * !sdei_firmware_call means we failed to probe or called
+ * sdei_mark_interface_broken(). -EIO is not an error returned
+ * by sdei_to_linux_errno() and is used to suppress messages
+ * from this driver.
+ */
+ err = -EIO;
+ res.a0 = SDEI_NOT_SUPPORTED;
+ }
+
+ if (result)
+ *result = res.a0;
+
+ return err;
+}
+
+static struct sdei_event *sdei_event_find(u32 event_num)
+{
+ struct sdei_event *e, *found = NULL;
+
+ lockdep_assert_held(&sdei_events_lock);
+
+ spin_lock(&sdei_list_lock);
+ list_for_each_entry(e, &sdei_list, list) {
+ if (e->event_num == event_num) {
+ found = e;
+ break;
+ }
+ }
+ spin_unlock(&sdei_list_lock);
+
+ return found;
+}
+
+int sdei_api_event_context(u32 query, u64 *result)
+{
+ return invoke_sdei_fn(SDEI_1_0_FN_SDEI_EVENT_CONTEXT, query, 0, 0, 0, 0,
+ result);
+}
+NOKPROBE_SYMBOL(sdei_api_event_context);
+
+static int sdei_api_event_get_info(u32 event, u32 info, u64 *result)
+{
+ return invoke_sdei_fn(SDEI_1_0_FN_SDEI_EVENT_GET_INFO, event, info, 0,
+ 0, 0, result);
+}
+
+static struct sdei_event *sdei_event_create(u32 event_num,
+ sdei_event_callback *cb,
+ void *cb_arg)
+{
+ int err;
+ u64 result;
+ struct sdei_event *event;
+ struct sdei_registered_event *reg;
+
+ lockdep_assert_held(&sdei_events_lock);
+
+ event = kzalloc(sizeof(*event), GFP_KERNEL);
+ if (!event)
+ return ERR_PTR(-ENOMEM);
+
+ INIT_LIST_HEAD(&event->list);
+ event->event_num = event_num;
+
+ err = sdei_api_event_get_info(event_num, SDEI_EVENT_INFO_EV_PRIORITY,
+ &result);
+ if (err) {
+ kfree(event);
+ return ERR_PTR(err);
+ }
+ event->priority = result;
+
+ err = sdei_api_event_get_info(event_num, SDEI_EVENT_INFO_EV_TYPE,
+ &result);
+ if (err) {
+ kfree(event);
+ return ERR_PTR(err);
+ }
+ event->type = result;
+
+ if (event->type == SDEI_EVENT_TYPE_SHARED) {
+ reg = kzalloc(sizeof(*reg), GFP_KERNEL);
+ if (!reg) {
+ kfree(event);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ reg->event_num = event_num;
+ reg->priority = event->priority;
+
+ reg->callback = cb;
+ reg->callback_arg = cb_arg;
+ event->registered = reg;
+ } else {
+ int cpu;
+ struct sdei_registered_event __percpu *regs;
+
+ regs = alloc_percpu(struct sdei_registered_event);
+ if (!regs) {
+ kfree(event);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ for_each_possible_cpu(cpu) {
+ reg = per_cpu_ptr(regs, cpu);
+
+ reg->event_num = event->event_num;
+ reg->priority = event->priority;
+ reg->callback = cb;
+ reg->callback_arg = cb_arg;
+ }
+
+ event->private_registered = regs;
+ }
+
+ if (sdei_event_find(event_num)) {
+ kfree(event->registered);
+ kfree(event);
+ event = ERR_PTR(-EBUSY);
+ } else {
+ spin_lock(&sdei_list_lock);
+ list_add(&event->list, &sdei_list);
+ spin_unlock(&sdei_list_lock);
+ }
+
+ return event;
+}
+
+static void sdei_event_destroy(struct sdei_event *event)
+{
+ lockdep_assert_held(&sdei_events_lock);
+
+ spin_lock(&sdei_list_lock);
+ list_del(&event->list);
+ spin_unlock(&sdei_list_lock);
+
+ if (event->type == SDEI_EVENT_TYPE_SHARED)
+ kfree(event->registered);
+ else
+ free_percpu(event->private_registered);
+
+ kfree(event);
+}
+
+static int sdei_api_get_version(u64 *version)
+{
+ return invoke_sdei_fn(SDEI_1_0_FN_SDEI_VERSION, 0, 0, 0, 0, 0, version);
+}
+
+int sdei_mask_local_cpu(void)
+{
+ int err;
+
+ WARN_ON_ONCE(preemptible());
+
+ err = invoke_sdei_fn(SDEI_1_0_FN_SDEI_PE_MASK, 0, 0, 0, 0, 0, NULL);
+ if (err && err != -EIO) {
+ pr_warn_once("failed to mask CPU[%u]: %d\n",
+ smp_processor_id(), err);
+ return err;
+ }
+
+ return 0;
+}
+
+static void _ipi_mask_cpu(void *ignored)
+{
+ sdei_mask_local_cpu();
+}
+
+int sdei_unmask_local_cpu(void)
+{
+ int err;
+
+ WARN_ON_ONCE(preemptible());
+
+ err = invoke_sdei_fn(SDEI_1_0_FN_SDEI_PE_UNMASK, 0, 0, 0, 0, 0, NULL);
+ if (err && err != -EIO) {
+ pr_warn_once("failed to unmask CPU[%u]: %d\n",
+ smp_processor_id(), err);
+ return err;
+ }
+
+ return 0;
+}
+
+static void _ipi_unmask_cpu(void *ignored)
+{
+ sdei_unmask_local_cpu();
+}
+
+static void _ipi_private_reset(void *ignored)
+{
+ int err;
+
+ err = invoke_sdei_fn(SDEI_1_0_FN_SDEI_PRIVATE_RESET, 0, 0, 0, 0, 0,
+ NULL);
+ if (err && err != -EIO)
+ pr_warn_once("failed to reset CPU[%u]: %d\n",
+ smp_processor_id(), err);
+}
+
+static int sdei_api_shared_reset(void)
+{
+ return invoke_sdei_fn(SDEI_1_0_FN_SDEI_SHARED_RESET, 0, 0, 0, 0, 0,
+ NULL);
+}
+
+static void sdei_mark_interface_broken(void)
+{
+ pr_err("disabling SDEI firmware interface\n");
+ on_each_cpu(&_ipi_mask_cpu, NULL, true);
+ sdei_firmware_call = NULL;
+}
+
+static int sdei_platform_reset(void)
+{
+ int err;
+
+ on_each_cpu(&_ipi_private_reset, NULL, true);
+ err = sdei_api_shared_reset();
+ if (err) {
+ pr_err("Failed to reset platform: %d\n", err);
+ sdei_mark_interface_broken();
+ }
+
+ return err;
+}
+
+static int sdei_api_event_enable(u32 event_num)
+{
+ return invoke_sdei_fn(SDEI_1_0_FN_SDEI_EVENT_ENABLE, event_num, 0, 0, 0,
+ 0, NULL);
+}
+
+/* Called directly by the hotplug callbacks */
+static void _local_event_enable(void *data)
+{
+ int err;
+ struct sdei_crosscall_args *arg = data;
+
+ WARN_ON_ONCE(preemptible());
+
+ err = sdei_api_event_enable(arg->event->event_num);
+
+ sdei_cross_call_return(arg, err);
+}
+
+int sdei_event_enable(u32 event_num)
+{
+ int err = -EINVAL;
+ struct sdei_event *event;
+
+ mutex_lock(&sdei_events_lock);
+ event = sdei_event_find(event_num);
+ if (!event) {
+ mutex_unlock(&sdei_events_lock);
+ return -ENOENT;
+ }
+
+ spin_lock(&sdei_list_lock);
+ event->reenable = true;
+ spin_unlock(&sdei_list_lock);
+
+ if (event->type == SDEI_EVENT_TYPE_SHARED)
+ err = sdei_api_event_enable(event->event_num);
+ else
+ err = sdei_do_cross_call(_local_event_enable, event);
+ mutex_unlock(&sdei_events_lock);
+
+ return err;
+}
+EXPORT_SYMBOL(sdei_event_enable);
+
+static int sdei_api_event_disable(u32 event_num)
+{
+ return invoke_sdei_fn(SDEI_1_0_FN_SDEI_EVENT_DISABLE, event_num, 0, 0,
+ 0, 0, NULL);
+}
+
+static void _ipi_event_disable(void *data)
+{
+ int err;
+ struct sdei_crosscall_args *arg = data;
+
+ err = sdei_api_event_disable(arg->event->event_num);
+
+ sdei_cross_call_return(arg, err);
+}
+
+int sdei_event_disable(u32 event_num)
+{
+ int err = -EINVAL;
+ struct sdei_event *event;
+
+ mutex_lock(&sdei_events_lock);
+ event = sdei_event_find(event_num);
+ if (!event) {
+ mutex_unlock(&sdei_events_lock);
+ return -ENOENT;
+ }
+
+ spin_lock(&sdei_list_lock);
+ event->reenable = false;
+ spin_unlock(&sdei_list_lock);
+
+ if (event->type == SDEI_EVENT_TYPE_SHARED)
+ err = sdei_api_event_disable(event->event_num);
+ else
+ err = sdei_do_cross_call(_ipi_event_disable, event);
+ mutex_unlock(&sdei_events_lock);
+
+ return err;
+}
+EXPORT_SYMBOL(sdei_event_disable);
+
+static int sdei_api_event_unregister(u32 event_num)
+{
+ return invoke_sdei_fn(SDEI_1_0_FN_SDEI_EVENT_UNREGISTER, event_num, 0,
+ 0, 0, 0, NULL);
+}
+
+/* Called directly by the hotplug callbacks */
+static void _local_event_unregister(void *data)
+{
+ int err;
+ struct sdei_crosscall_args *arg = data;
+
+ WARN_ON_ONCE(preemptible());
+
+ err = sdei_api_event_unregister(arg->event->event_num);
+
+ sdei_cross_call_return(arg, err);
+}
+
+static int _sdei_event_unregister(struct sdei_event *event)
+{
+ lockdep_assert_held(&sdei_events_lock);
+
+ spin_lock(&sdei_list_lock);
+ event->reregister = false;
+ event->reenable = false;
+ spin_unlock(&sdei_list_lock);
+
+ if (event->type == SDEI_EVENT_TYPE_SHARED)
+ return sdei_api_event_unregister(event->event_num);
+
+ return sdei_do_cross_call(_local_event_unregister, event);
+}
+
+int sdei_event_unregister(u32 event_num)
+{
+ int err;
+ struct sdei_event *event;
+
+ WARN_ON(in_nmi());
+
+ mutex_lock(&sdei_events_lock);
+ event = sdei_event_find(event_num);
+ do {
+ if (!event) {
+ pr_warn("Event %u not registered\n", event_num);
+ err = -ENOENT;
+ break;
+ }
+
+ err = _sdei_event_unregister(event);
+ if (err)
+ break;
+
+ sdei_event_destroy(event);
+ } while (0);
+ mutex_unlock(&sdei_events_lock);
+
+ return err;
+}
+EXPORT_SYMBOL(sdei_event_unregister);
+
+/*
+ * unregister events, but don't destroy them as they are re-registered by
+ * sdei_reregister_shared().
+ */
+static int sdei_unregister_shared(void)
+{
+ int err = 0;
+ struct sdei_event *event;
+
+ mutex_lock(&sdei_events_lock);
+ spin_lock(&sdei_list_lock);
+ list_for_each_entry(event, &sdei_list, list) {
+ if (event->type != SDEI_EVENT_TYPE_SHARED)
+ continue;
+
+ err = _sdei_event_unregister(event);
+ if (err)
+ break;
+ }
+ spin_unlock(&sdei_list_lock);
+ mutex_unlock(&sdei_events_lock);
+
+ return err;
+}
+
+static int sdei_api_event_register(u32 event_num, unsigned long entry_point,
+ void *arg, u64 flags, u64 affinity)
+{
+ return invoke_sdei_fn(SDEI_1_0_FN_SDEI_EVENT_REGISTER, event_num,
+ (unsigned long)entry_point, (unsigned long)arg,
+ flags, affinity, NULL);
+}
+
+/* Called directly by the hotplug callbacks */
+static void _local_event_register(void *data)
+{
+ int err;
+ struct sdei_registered_event *reg;
+ struct sdei_crosscall_args *arg = data;
+
+ WARN_ON(preemptible());
+
+ reg = per_cpu_ptr(arg->event->private_registered, smp_processor_id());
+ err = sdei_api_event_register(arg->event->event_num, sdei_entry_point,
+ reg, 0, 0);
+
+ sdei_cross_call_return(arg, err);
+}
+
+static int _sdei_event_register(struct sdei_event *event)
+{
+ int err;
+
+ lockdep_assert_held(&sdei_events_lock);
+
+ spin_lock(&sdei_list_lock);
+ event->reregister = true;
+ spin_unlock(&sdei_list_lock);
+
+ if (event->type == SDEI_EVENT_TYPE_SHARED)
+ return sdei_api_event_register(event->event_num,
+ sdei_entry_point,
+ event->registered,
+ SDEI_EVENT_REGISTER_RM_ANY, 0);
+
+
+ err = sdei_do_cross_call(_local_event_register, event);
+ if (err) {
+ spin_lock(&sdei_list_lock);
+ event->reregister = false;
+ event->reenable = false;
+ spin_unlock(&sdei_list_lock);
+
+ sdei_do_cross_call(_local_event_unregister, event);
+ }
+
+ return err;
+}
+
+int sdei_event_register(u32 event_num, sdei_event_callback *cb, void *arg)
+{
+ int err;
+ struct sdei_event *event;
+
+ WARN_ON(in_nmi());
+
+ mutex_lock(&sdei_events_lock);
+ do {
+ if (sdei_event_find(event_num)) {
+ pr_warn("Event %u already registered\n", event_num);
+ err = -EBUSY;
+ break;
+ }
+
+ event = sdei_event_create(event_num, cb, arg);
+ if (IS_ERR(event)) {
+ err = PTR_ERR(event);
+ pr_warn("Failed to create event %u: %d\n", event_num,
+ err);
+ break;
+ }
+
+ err = _sdei_event_register(event);
+ if (err) {
+ sdei_event_destroy(event);
+ pr_warn("Failed to register event %u: %d\n", event_num,
+ err);
+ }
+ } while (0);
+ mutex_unlock(&sdei_events_lock);
+
+ return err;
+}
+EXPORT_SYMBOL(sdei_event_register);
+
+static int sdei_reregister_event(struct sdei_event *event)
+{
+ int err;
+
+ lockdep_assert_held(&sdei_events_lock);
+
+ err = _sdei_event_register(event);
+ if (err) {
+ pr_err("Failed to re-register event %u\n", event->event_num);
+ sdei_event_destroy(event);
+ return err;
+ }
+
+ if (event->reenable) {
+ if (event->type == SDEI_EVENT_TYPE_SHARED)
+ err = sdei_api_event_enable(event->event_num);
+ else
+ err = sdei_do_cross_call(_local_event_enable, event);
+ }
+
+ if (err)
+ pr_err("Failed to re-enable event %u\n", event->event_num);
+
+ return err;
+}
+
+static int sdei_reregister_shared(void)
+{
+ int err = 0;
+ struct sdei_event *event;
+
+ mutex_lock(&sdei_events_lock);
+ spin_lock(&sdei_list_lock);
+ list_for_each_entry(event, &sdei_list, list) {
+ if (event->type != SDEI_EVENT_TYPE_SHARED)
+ continue;
+
+ if (event->reregister) {
+ err = sdei_reregister_event(event);
+ if (err)
+ break;
+ }
+ }
+ spin_unlock(&sdei_list_lock);
+ mutex_unlock(&sdei_events_lock);
+
+ return err;
+}
+
+static int sdei_cpuhp_down(unsigned int cpu)
+{
+ struct sdei_event *event;
+ struct sdei_crosscall_args arg;
+
+ /* un-register private events */
+ spin_lock(&sdei_list_lock);
+ list_for_each_entry(event, &sdei_list, list) {
+ if (event->type == SDEI_EVENT_TYPE_SHARED)
+ continue;
+
+ CROSSCALL_INIT(arg, event);
+ /* call the cross-call function locally... */
+ _local_event_unregister(&arg);
+ if (arg.first_error)
+ pr_err("Failed to unregister event %u: %d\n",
+ event->event_num, arg.first_error);
+ }
+ spin_unlock(&sdei_list_lock);
+
+ return sdei_mask_local_cpu();
+}
+
+static int sdei_cpuhp_up(unsigned int cpu)
+{
+ struct sdei_event *event;
+ struct sdei_crosscall_args arg;
+
+ /* re-register/enable private events */
+ spin_lock(&sdei_list_lock);
+ list_for_each_entry(event, &sdei_list, list) {
+ if (event->type == SDEI_EVENT_TYPE_SHARED)
+ continue;
+
+ if (event->reregister) {
+ CROSSCALL_INIT(arg, event);
+ /* call the cross-call function locally... */
+ _local_event_register(&arg);
+ if (arg.first_error)
+ pr_err("Failed to re-register event %u: %d\n",
+ event->event_num, arg.first_error);
+ }
+
+ if (event->reenable) {
+ CROSSCALL_INIT(arg, event);
+ _local_event_enable(&arg);
+ if (arg.first_error)
+ pr_err("Failed to re-enable event %u: %d\n",
+ event->event_num, arg.first_error);
+ }
+ }
+ spin_unlock(&sdei_list_lock);
+
+ return sdei_unmask_local_cpu();
+}
+
+/* When entering idle, mask/unmask events for this cpu */
+static int sdei_pm_notifier(struct notifier_block *nb, unsigned long action,
+ void *data)
+{
+ int rv;
+
+ switch (action) {
+ case CPU_PM_ENTER:
+ rv = sdei_mask_local_cpu();
+ break;
+ case CPU_PM_EXIT:
+ case CPU_PM_ENTER_FAILED:
+ rv = sdei_unmask_local_cpu();
+ break;
+ default:
+ return NOTIFY_DONE;
+ }
+
+ if (rv)
+ return notifier_from_errno(rv);
+
+ return NOTIFY_OK;
+}
+
+static struct notifier_block sdei_pm_nb = {
+ .notifier_call = sdei_pm_notifier,
+};
+
+static int sdei_device_suspend(struct device *dev)
+{
+ on_each_cpu(_ipi_mask_cpu, NULL, true);
+
+ return 0;
+}
+
+static int sdei_device_resume(struct device *dev)
+{
+ on_each_cpu(_ipi_unmask_cpu, NULL, true);
+
+ return 0;
+}
+
+/*
+ * We need all events to be reregistered when we resume from hibernate.
+ *
+ * The sequence is freeze->thaw. Reboot. freeze->restore. We unregister
+ * events during freeze, then re-register and re-enable them during thaw
+ * and restore.
+ */
+static int sdei_device_freeze(struct device *dev)
+{
+ int err;
+
+ /* unregister private events */
+ cpuhp_remove_state(CPUHP_AP_ARM_SDEI_STARTING);
+
+ err = sdei_unregister_shared();
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static int sdei_device_thaw(struct device *dev)
+{
+ int err;
+
+ /* re-register shared events */
+ err = sdei_reregister_shared();
+ if (err) {
+ pr_warn("Failed to re-register shared events...\n");
+ sdei_mark_interface_broken();
+ return err;
+ }
+
+ err = cpuhp_setup_state(CPUHP_AP_ARM_SDEI_STARTING, "SDEI",
+ &sdei_cpuhp_up, &sdei_cpuhp_down);
+ if (err)
+ pr_warn("Failed to re-register CPU hotplug notifier...\n");
+
+ return err;
+}
+
+static int sdei_device_restore(struct device *dev)
+{
+ int err;
+
+ err = sdei_platform_reset();
+ if (err)
+ return err;
+
+ return sdei_device_thaw(dev);
+}
+
+static const struct dev_pm_ops sdei_pm_ops = {
+ .suspend = sdei_device_suspend,
+ .resume = sdei_device_resume,
+ .freeze = sdei_device_freeze,
+ .thaw = sdei_device_thaw,
+ .restore = sdei_device_restore,
+};
+
+/*
+ * Mask all CPUs and unregister all events on panic, reboot or kexec.
+ */
+static int sdei_reboot_notifier(struct notifier_block *nb, unsigned long action,
+ void *data)
+{
+ /*
+ * We are going to reset the interface, after this there is no point
+ * doing work when we take CPUs offline.
+ */
+ cpuhp_remove_state(CPUHP_AP_ARM_SDEI_STARTING);
+
+ sdei_platform_reset();
+
+ return NOTIFY_OK;
+}
+
+static struct notifier_block sdei_reboot_nb = {
+ .notifier_call = sdei_reboot_notifier,
+};
+
+static void sdei_smccc_smc(unsigned long function_id,
+ unsigned long arg0, unsigned long arg1,
+ unsigned long arg2, unsigned long arg3,
+ unsigned long arg4, struct arm_smccc_res *res)
+{
+ arm_smccc_smc(function_id, arg0, arg1, arg2, arg3, arg4, 0, 0, res);
+}
+
+static void sdei_smccc_hvc(unsigned long function_id,
+ unsigned long arg0, unsigned long arg1,
+ unsigned long arg2, unsigned long arg3,
+ unsigned long arg4, struct arm_smccc_res *res)
+{
+ arm_smccc_hvc(function_id, arg0, arg1, arg2, arg3, arg4, 0, 0, res);
+}
+
+static int sdei_get_conduit(struct platform_device *pdev)
+{
+ const char *method;
+ struct device_node *np = pdev->dev.of_node;
+
+ sdei_firmware_call = NULL;
+ if (np) {
+ if (of_property_read_string(np, "method", &method)) {
+ pr_warn("missing \"method\" property\n");
+ return CONDUIT_INVALID;
+ }
+
+ if (!strcmp("hvc", method)) {
+ sdei_firmware_call = &sdei_smccc_hvc;
+ return CONDUIT_HVC;
+ } else if (!strcmp("smc", method)) {
+ sdei_firmware_call = &sdei_smccc_smc;
+ return CONDUIT_SMC;
+ }
+
+ pr_warn("invalid \"method\" property: %s\n", method);
+ } else if (IS_ENABLED(CONFIG_ACPI) && !acpi_disabled) {
+ if (acpi_psci_use_hvc()) {
+ sdei_firmware_call = &sdei_smccc_hvc;
+ return CONDUIT_HVC;
+ } else {
+ sdei_firmware_call = &sdei_smccc_smc;
+ return CONDUIT_SMC;
+ }
+ }
+
+ return CONDUIT_INVALID;
+}
+
+static int sdei_probe(struct platform_device *pdev)
+{
+ int err;
+ u64 ver = 0;
+ int conduit;
+
+ conduit = sdei_get_conduit(pdev);
+ if (!sdei_firmware_call)
+ return 0;
+
+ err = sdei_api_get_version(&ver);
+ if (err == -EOPNOTSUPP)
+ pr_err("advertised but not implemented in platform firmware\n");
+ if (err) {
+ pr_err("Failed to get SDEI version: %d\n", err);
+ sdei_mark_interface_broken();
+ return err;
+ }
+
+ pr_info("SDEIv%d.%d (0x%x) detected in firmware.\n",
+ (int)SDEI_VERSION_MAJOR(ver), (int)SDEI_VERSION_MINOR(ver),
+ (int)SDEI_VERSION_VENDOR(ver));
+
+ if (SDEI_VERSION_MAJOR(ver) != 1) {
+ pr_warn("Conflicting SDEI version detected.\n");
+ sdei_mark_interface_broken();
+ return -EINVAL;
+ }
+
+ err = sdei_platform_reset();
+ if (err)
+ return err;
+
+ sdei_entry_point = sdei_arch_get_entry_point(conduit);
+ if (!sdei_entry_point) {
+ /* Not supported due to hardware or boot configuration */
+ sdei_mark_interface_broken();
+ return 0;
+ }
+
+ err = cpu_pm_register_notifier(&sdei_pm_nb);
+ if (err) {
+ pr_warn("Failed to register CPU PM notifier...\n");
+ goto error;
+ }
+
+ err = register_reboot_notifier(&sdei_reboot_nb);
+ if (err) {
+ pr_warn("Failed to register reboot notifier...\n");
+ goto remove_cpupm;
+ }
+
+ err = cpuhp_setup_state(CPUHP_AP_ARM_SDEI_STARTING, "SDEI",
+ &sdei_cpuhp_up, &sdei_cpuhp_down);
+ if (err) {
+ pr_warn("Failed to register CPU hotplug notifier...\n");
+ goto remove_reboot;
+ }
+
+ return 0;
+
+remove_reboot:
+ unregister_reboot_notifier(&sdei_reboot_nb);
+
+remove_cpupm:
+ cpu_pm_unregister_notifier(&sdei_pm_nb);
+
+error:
+ sdei_mark_interface_broken();
+ return err;
+}
+
+static const struct of_device_id sdei_of_match[] = {
+ { .compatible = "arm,sdei-1.0" },
+ {}
+};
+
+static struct platform_driver sdei_driver = {
+ .driver = {
+ .name = "sdei",
+ .pm = &sdei_pm_ops,
+ .of_match_table = sdei_of_match,
+ },
+ .probe = sdei_probe,
+};
+
+static bool __init sdei_present_dt(void)
+{
+ struct platform_device *pdev;
+ struct device_node *np, *fw_np;
+
+ fw_np = of_find_node_by_name(NULL, "firmware");
+ if (!fw_np)
+ return false;
+
+ np = of_find_matching_node(fw_np, sdei_of_match);
+ of_node_put(fw_np);
+ if (!np)
+ return false;
+
+ pdev = of_platform_device_create(np, sdei_driver.driver.name, NULL);
+ of_node_put(np);
+ if (!pdev)
+ return false;
+
+ return true;
+}
+
+static bool __init sdei_present_acpi(void)
+{
+ acpi_status status;
+ struct platform_device *pdev;
+ struct acpi_table_header *sdei_table_header;
+
+ if (acpi_disabled)
+ return false;
+
+ status = acpi_get_table(ACPI_SIG_SDEI, 0, &sdei_table_header);
+ if (ACPI_FAILURE(status) && status != AE_NOT_FOUND) {
+ const char *msg = acpi_format_exception(status);
+
+ pr_info("Failed to get ACPI:SDEI table, %s\n", msg);
+ }
+ if (ACPI_FAILURE(status))
+ return false;
+
+ pdev = platform_device_register_simple(sdei_driver.driver.name, 0, NULL,
+ 0);
+ if (IS_ERR(pdev))
+ return false;
+
+ return true;
+}
+
+static int __init sdei_init(void)
+{
+ if (sdei_present_dt() || sdei_present_acpi())
+ platform_driver_register(&sdei_driver);
+
+ return 0;
+}
+
+/*
+ * On an ACPI system SDEI needs to be ready before HEST:GHES tries to register
+ * its events. ACPI is initialised from a subsys_initcall(), GHES is initialised
+ * by device_initcall(). We want to be called in the middle.
+ */
+subsys_initcall_sync(sdei_init);
+
+int sdei_event_handler(struct pt_regs *regs,
+ struct sdei_registered_event *arg)
+{
+ int err;
+ mm_segment_t orig_addr_limit;
+ u32 event_num = arg->event_num;
+
+ orig_addr_limit = get_fs();
+ set_fs(USER_DS);
+
+ err = arg->callback(event_num, regs, arg->callback_arg);
+ if (err)
+ pr_err_ratelimited("event %u on CPU %u failed with error: %d\n",
+ event_num, smp_processor_id(), err);
+
+ set_fs(orig_addr_limit);
+
+ return err;
+}
+NOKPROBE_SYMBOL(sdei_event_handler);
diff --git a/drivers/firmware/psci.c b/drivers/firmware/psci.c
index d687ca3d..8b25d31 100644
--- a/drivers/firmware/psci.c
+++ b/drivers/firmware/psci.c
@@ -496,6 +496,8 @@ static void __init psci_init_migrate(void)
static void __init psci_0_2_set_functions(void)
{
pr_info("Using standard PSCI v0.2 function IDs\n");
+ psci_ops.get_version = psci_get_version;
+
psci_function_id[PSCI_FN_CPU_SUSPEND] =
PSCI_FN_NATIVE(0_2, CPU_SUSPEND);
psci_ops.cpu_suspend = psci_cpu_suspend;
diff --git a/drivers/hwtracing/coresight/of_coresight.c b/drivers/hwtracing/coresight/of_coresight.c
index a187941..7c37544 100644
--- a/drivers/hwtracing/coresight/of_coresight.c
+++ b/drivers/hwtracing/coresight/of_coresight.c
@@ -104,26 +104,17 @@ static int of_coresight_alloc_memory(struct device *dev,
int of_coresight_get_cpu(const struct device_node *node)
{
int cpu;
- bool found;
- struct device_node *dn, *np;
+ struct device_node *dn;
dn = of_parse_phandle(node, "cpu", 0);
-
/* Affinity defaults to CPU0 */
if (!dn)
return 0;
-
- for_each_possible_cpu(cpu) {
- np = of_cpu_device_node_get(cpu);
- found = (dn == np);
- of_node_put(np);
- if (found)
- break;
- }
+ cpu = of_cpu_node_to_id(dn);
of_node_put(dn);
/* Affinity to CPU0 if no cpu nodes are found */
- return found ? cpu : 0;
+ return (cpu < 0) ? 0 : cpu;
}
EXPORT_SYMBOL_GPL(of_coresight_get_cpu);
diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index a874777..a57c0fb 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -1070,31 +1070,6 @@ static int __init gic_validate_dist_version(void __iomem *dist_base)
return 0;
}
-static int get_cpu_number(struct device_node *dn)
-{
- const __be32 *cell;
- u64 hwid;
- int cpu;
-
- cell = of_get_property(dn, "reg", NULL);
- if (!cell)
- return -1;
-
- hwid = of_read_number(cell, of_n_addr_cells(dn));
-
- /*
- * Non affinity bits must be set to 0 in the DT
- */
- if (hwid & ~MPIDR_HWID_BITMASK)
- return -1;
-
- for_each_possible_cpu(cpu)
- if (cpu_logical_map(cpu) == hwid)
- return cpu;
-
- return -1;
-}
-
/* Create all possible partitions at boot time */
static void __init gic_populate_ppi_partitions(struct device_node *gic_node)
{
@@ -1145,8 +1120,8 @@ static void __init gic_populate_ppi_partitions(struct device_node *gic_node)
if (WARN_ON(!cpu_node))
continue;
- cpu = get_cpu_number(cpu_node);
- if (WARN_ON(cpu == -1))
+ cpu = of_cpu_node_to_id(cpu_node);
+ if (WARN_ON(cpu < 0))
continue;
pr_cont("%pOF[%d] ", cpu_node, cpu);
diff --git a/drivers/of/base.c b/drivers/of/base.c
index 26618ba..a9d6fe8 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -316,6 +316,32 @@ struct device_node *of_get_cpu_node(int cpu, unsigned int *thread)
EXPORT_SYMBOL(of_get_cpu_node);
/**
+ * of_cpu_node_to_id: Get the logical CPU number for a given device_node
+ *
+ * @cpu_node: Pointer to the device_node for CPU.
+ *
+ * Returns the logical CPU number of the given CPU device_node.
+ * Returns -ENODEV if the CPU is not found.
+ */
+int of_cpu_node_to_id(struct device_node *cpu_node)
+{
+ int cpu;
+ bool found = false;
+ struct device_node *np;
+
+ for_each_possible_cpu(cpu) {
+ np = of_cpu_device_node_get(cpu);
+ found = (cpu_node == np);
+ of_node_put(np);
+ if (found)
+ return cpu;
+ }
+
+ return -ENODEV;
+}
+EXPORT_SYMBOL(of_cpu_node_to_id);
+
+/**
* __of_device_is_compatible() - Check if the node matches given constraints
* @device: pointer to node
* @compat: required compatible string, NULL or "" for any match
diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig
index b8f44b0..da5724c 100644
--- a/drivers/perf/Kconfig
+++ b/drivers/perf/Kconfig
@@ -17,6 +17,15 @@
depends on ARM_PMU && ACPI
def_bool y
+config ARM_DSU_PMU
+ tristate "ARM DynamIQ Shared Unit (DSU) PMU"
+ depends on ARM64
+ help
+ Provides support for performance monitor unit in ARM DynamIQ Shared
+ Unit (DSU). The DSU integrates one or more cores with an L3 memory
+ system, control logic. The PMU allows counting various events related
+ to DSU.
+
config HISI_PMU
bool "HiSilicon SoC PMU"
depends on ARM64 && ACPI
diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile
index 710a013..c2f2741 100644
--- a/drivers/perf/Makefile
+++ b/drivers/perf/Makefile
@@ -1,4 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_ARM_DSU_PMU) += arm_dsu_pmu.o
obj-$(CONFIG_ARM_PMU) += arm_pmu.o arm_pmu_platform.o
obj-$(CONFIG_ARM_PMU_ACPI) += arm_pmu_acpi.o
obj-$(CONFIG_HISI_PMU) += hisilicon/
diff --git a/drivers/perf/arm_dsu_pmu.c b/drivers/perf/arm_dsu_pmu.c
new file mode 100644
index 0000000..93c50e3
--- /dev/null
+++ b/drivers/perf/arm_dsu_pmu.c
@@ -0,0 +1,843 @@
+/*
+ * ARM DynamIQ Shared Unit (DSU) PMU driver
+ *
+ * Copyright (C) ARM Limited, 2017.
+ *
+ * Based on ARM CCI-PMU, ARMv8 PMU-v3 drivers.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+
+#define PMUNAME "arm_dsu"
+#define DRVNAME PMUNAME "_pmu"
+#define pr_fmt(fmt) DRVNAME ": " fmt
+
+#include <linux/bitmap.h>
+#include <linux/bitops.h>
+#include <linux/bug.h>
+#include <linux/cpumask.h>
+#include <linux/device.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/perf_event.h>
+#include <linux/platform_device.h>
+#include <linux/spinlock.h>
+#include <linux/smp.h>
+#include <linux/sysfs.h>
+#include <linux/types.h>
+
+#include <asm/arm_dsu_pmu.h>
+#include <asm/local64.h>
+
+/* PMU event codes */
+#define DSU_PMU_EVT_CYCLES 0x11
+#define DSU_PMU_EVT_CHAIN 0x1e
+
+#define DSU_PMU_MAX_COMMON_EVENTS 0x40
+
+#define DSU_PMU_MAX_HW_CNTRS 32
+#define DSU_PMU_HW_COUNTER_MASK (DSU_PMU_MAX_HW_CNTRS - 1)
+
+#define CLUSTERPMCR_E BIT(0)
+#define CLUSTERPMCR_P BIT(1)
+#define CLUSTERPMCR_C BIT(2)
+#define CLUSTERPMCR_N_SHIFT 11
+#define CLUSTERPMCR_N_MASK 0x1f
+#define CLUSTERPMCR_IDCODE_SHIFT 16
+#define CLUSTERPMCR_IDCODE_MASK 0xff
+#define CLUSTERPMCR_IMP_SHIFT 24
+#define CLUSTERPMCR_IMP_MASK 0xff
+#define CLUSTERPMCR_RES_MASK 0x7e8
+#define CLUSTERPMCR_RES_VAL 0x40
+
+#define DSU_ACTIVE_CPU_MASK 0x0
+#define DSU_ASSOCIATED_CPU_MASK 0x1
+
+/*
+ * We use the index of the counters as they appear in the counter
+ * bit maps in the PMU registers (e.g CLUSTERPMSELR).
+ * i.e,
+ * counter 0 - Bit 0
+ * counter 1 - Bit 1
+ * ...
+ * Cycle counter - Bit 31
+ */
+#define DSU_PMU_IDX_CYCLE_COUNTER 31
+
+/* All event counters are 32bit, with a 64bit Cycle counter */
+#define DSU_PMU_COUNTER_WIDTH(idx) \
+ (((idx) == DSU_PMU_IDX_CYCLE_COUNTER) ? 64 : 32)
+
+#define DSU_PMU_COUNTER_MASK(idx) \
+ GENMASK_ULL((DSU_PMU_COUNTER_WIDTH((idx)) - 1), 0)
+
+#define DSU_EXT_ATTR(_name, _func, _config) \
+ (&((struct dev_ext_attribute[]) { \
+ { \
+ .attr = __ATTR(_name, 0444, _func, NULL), \
+ .var = (void *)_config \
+ } \
+ })[0].attr.attr)
+
+#define DSU_EVENT_ATTR(_name, _config) \
+ DSU_EXT_ATTR(_name, dsu_pmu_sysfs_event_show, (unsigned long)_config)
+
+#define DSU_FORMAT_ATTR(_name, _config) \
+ DSU_EXT_ATTR(_name, dsu_pmu_sysfs_format_show, (char *)_config)
+
+#define DSU_CPUMASK_ATTR(_name, _config) \
+ DSU_EXT_ATTR(_name, dsu_pmu_cpumask_show, (unsigned long)_config)
+
+struct dsu_hw_events {
+ DECLARE_BITMAP(used_mask, DSU_PMU_MAX_HW_CNTRS);
+ struct perf_event *events[DSU_PMU_MAX_HW_CNTRS];
+};
+
+/*
+ * struct dsu_pmu - DSU PMU descriptor
+ *
+ * @pmu_lock : Protects accesses to DSU PMU register from normal vs
+ * interrupt handler contexts.
+ * @hw_events : Holds the event counter state.
+ * @associated_cpus : CPUs attached to the DSU.
+ * @active_cpu : CPU to which the PMU is bound for accesses.
+ * @cpuhp_node : Node for CPU hotplug notifier link.
+ * @num_counters : Number of event counters implemented by the PMU,
+ * excluding the cycle counter.
+ * @irq : Interrupt line for counter overflow.
+ * @cpmceid_bitmap : Bitmap for the availability of architected common
+ * events (event_code < 0x40).
+ */
+struct dsu_pmu {
+ struct pmu pmu;
+ struct device *dev;
+ raw_spinlock_t pmu_lock;
+ struct dsu_hw_events hw_events;
+ cpumask_t associated_cpus;
+ cpumask_t active_cpu;
+ struct hlist_node cpuhp_node;
+ s8 num_counters;
+ int irq;
+ DECLARE_BITMAP(cpmceid_bitmap, DSU_PMU_MAX_COMMON_EVENTS);
+};
+
+static unsigned long dsu_pmu_cpuhp_state;
+
+static inline struct dsu_pmu *to_dsu_pmu(struct pmu *pmu)
+{
+ return container_of(pmu, struct dsu_pmu, pmu);
+}
+
+static ssize_t dsu_pmu_sysfs_event_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct dev_ext_attribute *eattr = container_of(attr,
+ struct dev_ext_attribute, attr);
+ return snprintf(buf, PAGE_SIZE, "event=0x%lx\n",
+ (unsigned long)eattr->var);
+}
+
+static ssize_t dsu_pmu_sysfs_format_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct dev_ext_attribute *eattr = container_of(attr,
+ struct dev_ext_attribute, attr);
+ return snprintf(buf, PAGE_SIZE, "%s\n", (char *)eattr->var);
+}
+
+static ssize_t dsu_pmu_cpumask_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct pmu *pmu = dev_get_drvdata(dev);
+ struct dsu_pmu *dsu_pmu = to_dsu_pmu(pmu);
+ struct dev_ext_attribute *eattr = container_of(attr,
+ struct dev_ext_attribute, attr);
+ unsigned long mask_id = (unsigned long)eattr->var;
+ const cpumask_t *cpumask;
+
+ switch (mask_id) {
+ case DSU_ACTIVE_CPU_MASK:
+ cpumask = &dsu_pmu->active_cpu;
+ break;
+ case DSU_ASSOCIATED_CPU_MASK:
+ cpumask = &dsu_pmu->associated_cpus;
+ break;
+ default:
+ return 0;
+ }
+ return cpumap_print_to_pagebuf(true, buf, cpumask);
+}
+
+static struct attribute *dsu_pmu_format_attrs[] = {
+ DSU_FORMAT_ATTR(event, "config:0-31"),
+ NULL,
+};
+
+static const struct attribute_group dsu_pmu_format_attr_group = {
+ .name = "format",
+ .attrs = dsu_pmu_format_attrs,
+};
+
+static struct attribute *dsu_pmu_event_attrs[] = {
+ DSU_EVENT_ATTR(cycles, 0x11),
+ DSU_EVENT_ATTR(bus_access, 0x19),
+ DSU_EVENT_ATTR(memory_error, 0x1a),
+ DSU_EVENT_ATTR(bus_cycles, 0x1d),
+ DSU_EVENT_ATTR(l3d_cache_allocate, 0x29),
+ DSU_EVENT_ATTR(l3d_cache_refill, 0x2a),
+ DSU_EVENT_ATTR(l3d_cache, 0x2b),
+ DSU_EVENT_ATTR(l3d_cache_wb, 0x2c),
+ NULL,
+};
+
+static umode_t
+dsu_pmu_event_attr_is_visible(struct kobject *kobj, struct attribute *attr,
+ int unused)
+{
+ struct pmu *pmu = dev_get_drvdata(kobj_to_dev(kobj));
+ struct dsu_pmu *dsu_pmu = to_dsu_pmu(pmu);
+ struct dev_ext_attribute *eattr = container_of(attr,
+ struct dev_ext_attribute, attr.attr);
+ unsigned long evt = (unsigned long)eattr->var;
+
+ return test_bit(evt, dsu_pmu->cpmceid_bitmap) ? attr->mode : 0;
+}
+
+static const struct attribute_group dsu_pmu_events_attr_group = {
+ .name = "events",
+ .attrs = dsu_pmu_event_attrs,
+ .is_visible = dsu_pmu_event_attr_is_visible,
+};
+
+static struct attribute *dsu_pmu_cpumask_attrs[] = {
+ DSU_CPUMASK_ATTR(cpumask, DSU_ACTIVE_CPU_MASK),
+ DSU_CPUMASK_ATTR(associated_cpus, DSU_ASSOCIATED_CPU_MASK),
+ NULL,
+};
+
+static const struct attribute_group dsu_pmu_cpumask_attr_group = {
+ .attrs = dsu_pmu_cpumask_attrs,
+};
+
+static const struct attribute_group *dsu_pmu_attr_groups[] = {
+ &dsu_pmu_cpumask_attr_group,
+ &dsu_pmu_events_attr_group,
+ &dsu_pmu_format_attr_group,
+ NULL,
+};
+
+static int dsu_pmu_get_online_cpu_any_but(struct dsu_pmu *dsu_pmu, int cpu)
+{
+ struct cpumask online_supported;
+
+ cpumask_and(&online_supported,
+ &dsu_pmu->associated_cpus, cpu_online_mask);
+ return cpumask_any_but(&online_supported, cpu);
+}
+
+static inline bool dsu_pmu_counter_valid(struct dsu_pmu *dsu_pmu, u32 idx)
+{
+ return (idx < dsu_pmu->num_counters) ||
+ (idx == DSU_PMU_IDX_CYCLE_COUNTER);
+}
+
+static inline u64 dsu_pmu_read_counter(struct perf_event *event)
+{
+ u64 val;
+ unsigned long flags;
+ struct dsu_pmu *dsu_pmu = to_dsu_pmu(event->pmu);
+ int idx = event->hw.idx;
+
+ if (WARN_ON(!cpumask_test_cpu(smp_processor_id(),
+ &dsu_pmu->associated_cpus)))
+ return 0;
+
+ if (!dsu_pmu_counter_valid(dsu_pmu, idx)) {
+ dev_err(event->pmu->dev,
+ "Trying reading invalid counter %d\n", idx);
+ return 0;
+ }
+
+ raw_spin_lock_irqsave(&dsu_pmu->pmu_lock, flags);
+ if (idx == DSU_PMU_IDX_CYCLE_COUNTER)
+ val = __dsu_pmu_read_pmccntr();
+ else
+ val = __dsu_pmu_read_counter(idx);
+ raw_spin_unlock_irqrestore(&dsu_pmu->pmu_lock, flags);
+
+ return val;
+}
+
+static void dsu_pmu_write_counter(struct perf_event *event, u64 val)
+{
+ unsigned long flags;
+ struct dsu_pmu *dsu_pmu = to_dsu_pmu(event->pmu);
+ int idx = event->hw.idx;
+
+ if (WARN_ON(!cpumask_test_cpu(smp_processor_id(),
+ &dsu_pmu->associated_cpus)))
+ return;
+
+ if (!dsu_pmu_counter_valid(dsu_pmu, idx)) {
+ dev_err(event->pmu->dev,
+ "writing to invalid counter %d\n", idx);
+ return;
+ }
+
+ raw_spin_lock_irqsave(&dsu_pmu->pmu_lock, flags);
+ if (idx == DSU_PMU_IDX_CYCLE_COUNTER)
+ __dsu_pmu_write_pmccntr(val);
+ else
+ __dsu_pmu_write_counter(idx, val);
+ raw_spin_unlock_irqrestore(&dsu_pmu->pmu_lock, flags);
+}
+
+static int dsu_pmu_get_event_idx(struct dsu_hw_events *hw_events,
+ struct perf_event *event)
+{
+ int idx;
+ unsigned long evtype = event->attr.config;
+ struct dsu_pmu *dsu_pmu = to_dsu_pmu(event->pmu);
+ unsigned long *used_mask = hw_events->used_mask;
+
+ if (evtype == DSU_PMU_EVT_CYCLES) {
+ if (test_and_set_bit(DSU_PMU_IDX_CYCLE_COUNTER, used_mask))
+ return -EAGAIN;
+ return DSU_PMU_IDX_CYCLE_COUNTER;
+ }
+
+ idx = find_first_zero_bit(used_mask, dsu_pmu->num_counters);
+ if (idx >= dsu_pmu->num_counters)
+ return -EAGAIN;
+ set_bit(idx, hw_events->used_mask);
+ return idx;
+}
+
+static void dsu_pmu_enable_counter(struct dsu_pmu *dsu_pmu, int idx)
+{
+ __dsu_pmu_counter_interrupt_enable(idx);
+ __dsu_pmu_enable_counter(idx);
+}
+
+static void dsu_pmu_disable_counter(struct dsu_pmu *dsu_pmu, int idx)
+{
+ __dsu_pmu_disable_counter(idx);
+ __dsu_pmu_counter_interrupt_disable(idx);
+}
+
+static inline void dsu_pmu_set_event(struct dsu_pmu *dsu_pmu,
+ struct perf_event *event)
+{
+ int idx = event->hw.idx;
+ unsigned long flags;
+
+ if (!dsu_pmu_counter_valid(dsu_pmu, idx)) {
+ dev_err(event->pmu->dev,
+ "Trying to set invalid counter %d\n", idx);
+ return;
+ }
+
+ raw_spin_lock_irqsave(&dsu_pmu->pmu_lock, flags);
+ __dsu_pmu_set_event(idx, event->hw.config_base);
+ raw_spin_unlock_irqrestore(&dsu_pmu->pmu_lock, flags);
+}
+
+static void dsu_pmu_event_update(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ u64 delta, prev_count, new_count;
+
+ do {
+ /* We may also be called from the irq handler */
+ prev_count = local64_read(&hwc->prev_count);
+ new_count = dsu_pmu_read_counter(event);
+ } while (local64_cmpxchg(&hwc->prev_count, prev_count, new_count) !=
+ prev_count);
+ delta = (new_count - prev_count) & DSU_PMU_COUNTER_MASK(hwc->idx);
+ local64_add(delta, &event->count);
+}
+
+static void dsu_pmu_read(struct perf_event *event)
+{
+ dsu_pmu_event_update(event);
+}
+
+static inline u32 dsu_pmu_get_reset_overflow(void)
+{
+ return __dsu_pmu_get_reset_overflow();
+}
+
+/**
+ * dsu_pmu_set_event_period: Set the period for the counter.
+ *
+ * All DSU PMU event counters, except the cycle counter are 32bit
+ * counters. To handle cases of extreme interrupt latency, we program
+ * the counter with half of the max count for the counters.
+ */
+static void dsu_pmu_set_event_period(struct perf_event *event)
+{
+ int idx = event->hw.idx;
+ u64 val = DSU_PMU_COUNTER_MASK(idx) >> 1;
+
+ local64_set(&event->hw.prev_count, val);
+ dsu_pmu_write_counter(event, val);
+}
+
+static irqreturn_t dsu_pmu_handle_irq(int irq_num, void *dev)
+{
+ int i;
+ bool handled = false;
+ struct dsu_pmu *dsu_pmu = dev;
+ struct dsu_hw_events *hw_events = &dsu_pmu->hw_events;
+ unsigned long overflow;
+
+ overflow = dsu_pmu_get_reset_overflow();
+ if (!overflow)
+ return IRQ_NONE;
+
+ for_each_set_bit(i, &overflow, DSU_PMU_MAX_HW_CNTRS) {
+ struct perf_event *event = hw_events->events[i];
+
+ if (!event)
+ continue;
+ dsu_pmu_event_update(event);
+ dsu_pmu_set_event_period(event);
+ handled = true;
+ }
+
+ return IRQ_RETVAL(handled);
+}
+
+static void dsu_pmu_start(struct perf_event *event, int pmu_flags)
+{
+ struct dsu_pmu *dsu_pmu = to_dsu_pmu(event->pmu);
+
+ /* We always reprogram the counter */
+ if (pmu_flags & PERF_EF_RELOAD)
+ WARN_ON(!(event->hw.state & PERF_HES_UPTODATE));
+ dsu_pmu_set_event_period(event);
+ if (event->hw.idx != DSU_PMU_IDX_CYCLE_COUNTER)
+ dsu_pmu_set_event(dsu_pmu, event);
+ event->hw.state = 0;
+ dsu_pmu_enable_counter(dsu_pmu, event->hw.idx);
+}
+
+static void dsu_pmu_stop(struct perf_event *event, int pmu_flags)
+{
+ struct dsu_pmu *dsu_pmu = to_dsu_pmu(event->pmu);
+
+ if (event->hw.state & PERF_HES_STOPPED)
+ return;
+ dsu_pmu_disable_counter(dsu_pmu, event->hw.idx);
+ dsu_pmu_event_update(event);
+ event->hw.state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
+}
+
+static int dsu_pmu_add(struct perf_event *event, int flags)
+{
+ struct dsu_pmu *dsu_pmu = to_dsu_pmu(event->pmu);
+ struct dsu_hw_events *hw_events = &dsu_pmu->hw_events;
+ struct hw_perf_event *hwc = &event->hw;
+ int idx;
+
+ if (WARN_ON_ONCE(!cpumask_test_cpu(smp_processor_id(),
+ &dsu_pmu->associated_cpus)))
+ return -ENOENT;
+
+ idx = dsu_pmu_get_event_idx(hw_events, event);
+ if (idx < 0)
+ return idx;
+
+ hwc->idx = idx;
+ hw_events->events[idx] = event;
+ hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
+
+ if (flags & PERF_EF_START)
+ dsu_pmu_start(event, PERF_EF_RELOAD);
+
+ perf_event_update_userpage(event);
+ return 0;
+}
+
+static void dsu_pmu_del(struct perf_event *event, int flags)
+{
+ struct dsu_pmu *dsu_pmu = to_dsu_pmu(event->pmu);
+ struct dsu_hw_events *hw_events = &dsu_pmu->hw_events;
+ struct hw_perf_event *hwc = &event->hw;
+ int idx = hwc->idx;
+
+ dsu_pmu_stop(event, PERF_EF_UPDATE);
+ hw_events->events[idx] = NULL;
+ clear_bit(idx, hw_events->used_mask);
+ perf_event_update_userpage(event);
+}
+
+static void dsu_pmu_enable(struct pmu *pmu)
+{
+ u32 pmcr;
+ unsigned long flags;
+ struct dsu_pmu *dsu_pmu = to_dsu_pmu(pmu);
+
+ /* If no counters are added, skip enabling the PMU */
+ if (bitmap_empty(dsu_pmu->hw_events.used_mask, DSU_PMU_MAX_HW_CNTRS))
+ return;
+
+ raw_spin_lock_irqsave(&dsu_pmu->pmu_lock, flags);
+ pmcr = __dsu_pmu_read_pmcr();
+ pmcr |= CLUSTERPMCR_E;
+ __dsu_pmu_write_pmcr(pmcr);
+ raw_spin_unlock_irqrestore(&dsu_pmu->pmu_lock, flags);
+}
+
+static void dsu_pmu_disable(struct pmu *pmu)
+{
+ u32 pmcr;
+ unsigned long flags;
+ struct dsu_pmu *dsu_pmu = to_dsu_pmu(pmu);
+
+ raw_spin_lock_irqsave(&dsu_pmu->pmu_lock, flags);
+ pmcr = __dsu_pmu_read_pmcr();
+ pmcr &= ~CLUSTERPMCR_E;
+ __dsu_pmu_write_pmcr(pmcr);
+ raw_spin_unlock_irqrestore(&dsu_pmu->pmu_lock, flags);
+}
+
+static bool dsu_pmu_validate_event(struct pmu *pmu,
+ struct dsu_hw_events *hw_events,
+ struct perf_event *event)
+{
+ if (is_software_event(event))
+ return true;
+ /* Reject groups spanning multiple HW PMUs. */
+ if (event->pmu != pmu)
+ return false;
+ return dsu_pmu_get_event_idx(hw_events, event) >= 0;
+}
+
+/*
+ * Make sure the group of events can be scheduled at once
+ * on the PMU.
+ */
+static bool dsu_pmu_validate_group(struct perf_event *event)
+{
+ struct perf_event *sibling, *leader = event->group_leader;
+ struct dsu_hw_events fake_hw;
+
+ if (event->group_leader == event)
+ return true;
+
+ memset(fake_hw.used_mask, 0, sizeof(fake_hw.used_mask));
+ if (!dsu_pmu_validate_event(event->pmu, &fake_hw, leader))
+ return false;
+ list_for_each_entry(sibling, &leader->sibling_list, group_entry) {
+ if (!dsu_pmu_validate_event(event->pmu, &fake_hw, sibling))
+ return false;
+ }
+ return dsu_pmu_validate_event(event->pmu, &fake_hw, event);
+}
+
+static int dsu_pmu_event_init(struct perf_event *event)
+{
+ struct dsu_pmu *dsu_pmu = to_dsu_pmu(event->pmu);
+
+ if (event->attr.type != event->pmu->type)
+ return -ENOENT;
+
+ /* We don't support sampling */
+ if (is_sampling_event(event)) {
+ dev_dbg(dsu_pmu->pmu.dev, "Can't support sampling events\n");
+ return -EOPNOTSUPP;
+ }
+
+ /* We cannot support task bound events */
+ if (event->cpu < 0 || event->attach_state & PERF_ATTACH_TASK) {
+ dev_dbg(dsu_pmu->pmu.dev, "Can't support per-task counters\n");
+ return -EINVAL;
+ }
+
+ if (has_branch_stack(event) ||
+ event->attr.exclude_user ||
+ event->attr.exclude_kernel ||
+ event->attr.exclude_hv ||
+ event->attr.exclude_idle ||
+ event->attr.exclude_host ||
+ event->attr.exclude_guest) {
+ dev_dbg(dsu_pmu->pmu.dev, "Can't support filtering\n");
+ return -EINVAL;
+ }
+
+ if (!cpumask_test_cpu(event->cpu, &dsu_pmu->associated_cpus)) {
+ dev_dbg(dsu_pmu->pmu.dev,
+ "Requested cpu is not associated with the DSU\n");
+ return -EINVAL;
+ }
+ /*
+ * Choose the current active CPU to read the events. We don't want
+ * to migrate the event contexts, irq handling etc to the requested
+ * CPU. As long as the requested CPU is within the same DSU, we
+ * are fine.
+ */
+ event->cpu = cpumask_first(&dsu_pmu->active_cpu);
+ if (event->cpu >= nr_cpu_ids)
+ return -EINVAL;
+ if (!dsu_pmu_validate_group(event))
+ return -EINVAL;
+
+ event->hw.config_base = event->attr.config;
+ return 0;
+}
+
+static struct dsu_pmu *dsu_pmu_alloc(struct platform_device *pdev)
+{
+ struct dsu_pmu *dsu_pmu;
+
+ dsu_pmu = devm_kzalloc(&pdev->dev, sizeof(*dsu_pmu), GFP_KERNEL);
+ if (!dsu_pmu)
+ return ERR_PTR(-ENOMEM);
+
+ raw_spin_lock_init(&dsu_pmu->pmu_lock);
+ /*
+ * Initialise the number of counters to -1, until we probe
+ * the real number on a connected CPU.
+ */
+ dsu_pmu->num_counters = -1;
+ return dsu_pmu;
+}
+
+/**
+ * dsu_pmu_dt_get_cpus: Get the list of CPUs in the cluster.
+ */
+static int dsu_pmu_dt_get_cpus(struct device_node *dev, cpumask_t *mask)
+{
+ int i = 0, n, cpu;
+ struct device_node *cpu_node;
+
+ n = of_count_phandle_with_args(dev, "cpus", NULL);
+ if (n <= 0)
+ return -ENODEV;
+ for (; i < n; i++) {
+ cpu_node = of_parse_phandle(dev, "cpus", i);
+ if (!cpu_node)
+ break;
+ cpu = of_cpu_node_to_id(cpu_node);
+ of_node_put(cpu_node);
+ /*
+ * We have to ignore the failures here and continue scanning
+ * the list to handle cases where the nr_cpus could be capped
+ * in the running kernel.
+ */
+ if (cpu < 0)
+ continue;
+ cpumask_set_cpu(cpu, mask);
+ }
+ return 0;
+}
+
+/*
+ * dsu_pmu_probe_pmu: Probe the PMU details on a CPU in the cluster.
+ */
+static void dsu_pmu_probe_pmu(struct dsu_pmu *dsu_pmu)
+{
+ u64 num_counters;
+ u32 cpmceid[2];
+
+ num_counters = (__dsu_pmu_read_pmcr() >> CLUSTERPMCR_N_SHIFT) &
+ CLUSTERPMCR_N_MASK;
+ /* We can only support up to 31 independent counters */
+ if (WARN_ON(num_counters > 31))
+ num_counters = 31;
+ dsu_pmu->num_counters = num_counters;
+ if (!dsu_pmu->num_counters)
+ return;
+ cpmceid[0] = __dsu_pmu_read_pmceid(0);
+ cpmceid[1] = __dsu_pmu_read_pmceid(1);
+ bitmap_from_u32array(dsu_pmu->cpmceid_bitmap,
+ DSU_PMU_MAX_COMMON_EVENTS,
+ cpmceid,
+ ARRAY_SIZE(cpmceid));
+}
+
+static void dsu_pmu_set_active_cpu(int cpu, struct dsu_pmu *dsu_pmu)
+{
+ cpumask_set_cpu(cpu, &dsu_pmu->active_cpu);
+ if (irq_set_affinity_hint(dsu_pmu->irq, &dsu_pmu->active_cpu))
+ pr_warn("Failed to set irq affinity to %d\n", cpu);
+}
+
+/*
+ * dsu_pmu_init_pmu: Initialise the DSU PMU configurations if
+ * we haven't done it already.
+ */
+static void dsu_pmu_init_pmu(struct dsu_pmu *dsu_pmu)
+{
+ if (dsu_pmu->num_counters == -1)
+ dsu_pmu_probe_pmu(dsu_pmu);
+ /* Reset the interrupt overflow mask */
+ dsu_pmu_get_reset_overflow();
+}
+
+static int dsu_pmu_device_probe(struct platform_device *pdev)
+{
+ int irq, rc;
+ struct dsu_pmu *dsu_pmu;
+ char *name;
+ static atomic_t pmu_idx = ATOMIC_INIT(-1);
+
+ dsu_pmu = dsu_pmu_alloc(pdev);
+ if (IS_ERR(dsu_pmu))
+ return PTR_ERR(dsu_pmu);
+
+ rc = dsu_pmu_dt_get_cpus(pdev->dev.of_node, &dsu_pmu->associated_cpus);
+ if (rc) {
+ dev_warn(&pdev->dev, "Failed to parse the CPUs\n");
+ return rc;
+ }
+
+ irq = platform_get_irq(pdev, 0);
+ if (irq < 0) {
+ dev_warn(&pdev->dev, "Failed to find IRQ\n");
+ return -EINVAL;
+ }
+
+ name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "%s_%d",
+ PMUNAME, atomic_inc_return(&pmu_idx));
+ if (!name)
+ return -ENOMEM;
+ rc = devm_request_irq(&pdev->dev, irq, dsu_pmu_handle_irq,
+ IRQF_NOBALANCING, name, dsu_pmu);
+ if (rc) {
+ dev_warn(&pdev->dev, "Failed to request IRQ %d\n", irq);
+ return rc;
+ }
+
+ dsu_pmu->irq = irq;
+ platform_set_drvdata(pdev, dsu_pmu);
+ rc = cpuhp_state_add_instance(dsu_pmu_cpuhp_state,
+ &dsu_pmu->cpuhp_node);
+ if (rc)
+ return rc;
+
+ dsu_pmu->pmu = (struct pmu) {
+ .task_ctx_nr = perf_invalid_context,
+ .module = THIS_MODULE,
+ .pmu_enable = dsu_pmu_enable,
+ .pmu_disable = dsu_pmu_disable,
+ .event_init = dsu_pmu_event_init,
+ .add = dsu_pmu_add,
+ .del = dsu_pmu_del,
+ .start = dsu_pmu_start,
+ .stop = dsu_pmu_stop,
+ .read = dsu_pmu_read,
+
+ .attr_groups = dsu_pmu_attr_groups,
+ };
+
+ rc = perf_pmu_register(&dsu_pmu->pmu, name, -1);
+ if (rc) {
+ cpuhp_state_remove_instance(dsu_pmu_cpuhp_state,
+ &dsu_pmu->cpuhp_node);
+ irq_set_affinity_hint(dsu_pmu->irq, NULL);
+ }
+
+ return rc;
+}
+
+static int dsu_pmu_device_remove(struct platform_device *pdev)
+{
+ struct dsu_pmu *dsu_pmu = platform_get_drvdata(pdev);
+
+ perf_pmu_unregister(&dsu_pmu->pmu);
+ cpuhp_state_remove_instance(dsu_pmu_cpuhp_state, &dsu_pmu->cpuhp_node);
+ irq_set_affinity_hint(dsu_pmu->irq, NULL);
+
+ return 0;
+}
+
+static const struct of_device_id dsu_pmu_of_match[] = {
+ { .compatible = "arm,dsu-pmu", },
+ {},
+};
+
+static struct platform_driver dsu_pmu_driver = {
+ .driver = {
+ .name = DRVNAME,
+ .of_match_table = of_match_ptr(dsu_pmu_of_match),
+ },
+ .probe = dsu_pmu_device_probe,
+ .remove = dsu_pmu_device_remove,
+};
+
+static int dsu_pmu_cpu_online(unsigned int cpu, struct hlist_node *node)
+{
+ struct dsu_pmu *dsu_pmu = hlist_entry_safe(node, struct dsu_pmu,
+ cpuhp_node);
+
+ if (!cpumask_test_cpu(cpu, &dsu_pmu->associated_cpus))
+ return 0;
+
+ /* If the PMU is already managed, there is nothing to do */
+ if (!cpumask_empty(&dsu_pmu->active_cpu))
+ return 0;
+
+ dsu_pmu_init_pmu(dsu_pmu);
+ dsu_pmu_set_active_cpu(cpu, dsu_pmu);
+
+ return 0;
+}
+
+static int dsu_pmu_cpu_teardown(unsigned int cpu, struct hlist_node *node)
+{
+ int dst;
+ struct dsu_pmu *dsu_pmu = hlist_entry_safe(node, struct dsu_pmu,
+ cpuhp_node);
+
+ if (!cpumask_test_and_clear_cpu(cpu, &dsu_pmu->active_cpu))
+ return 0;
+
+ dst = dsu_pmu_get_online_cpu_any_but(dsu_pmu, cpu);
+ /* If there are no active CPUs in the DSU, leave IRQ disabled */
+ if (dst >= nr_cpu_ids) {
+ irq_set_affinity_hint(dsu_pmu->irq, NULL);
+ return 0;
+ }
+
+ perf_pmu_migrate_context(&dsu_pmu->pmu, cpu, dst);
+ dsu_pmu_set_active_cpu(dst, dsu_pmu);
+
+ return 0;
+}
+
+static int __init dsu_pmu_init(void)
+{
+ int ret;
+
+ ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
+ DRVNAME,
+ dsu_pmu_cpu_online,
+ dsu_pmu_cpu_teardown);
+ if (ret < 0)
+ return ret;
+ dsu_pmu_cpuhp_state = ret;
+ return platform_driver_register(&dsu_pmu_driver);
+}
+
+static void __exit dsu_pmu_exit(void)
+{
+ platform_driver_unregister(&dsu_pmu_driver);
+ cpuhp_remove_multi_state(dsu_pmu_cpuhp_state);
+}
+
+module_init(dsu_pmu_init);
+module_exit(dsu_pmu_exit);
+
+MODULE_DEVICE_TABLE(of, dsu_pmu_of_match);
+MODULE_DESCRIPTION("Perf driver for ARM DynamIQ Shared Unit");
+MODULE_AUTHOR("Suzuki K Poulose <suzuki.poulose@arm.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/perf/arm_pmu_platform.c b/drivers/perf/arm_pmu_platform.c
index 91b224ec..46501cc 100644
--- a/drivers/perf/arm_pmu_platform.c
+++ b/drivers/perf/arm_pmu_platform.c
@@ -82,19 +82,10 @@ static int pmu_parse_irq_affinity(struct device_node *node, int i)
return -EINVAL;
}
- /* Now look up the logical CPU number */
- for_each_possible_cpu(cpu) {
- struct device_node *cpu_dn;
-
- cpu_dn = of_cpu_device_node_get(cpu);
- of_node_put(cpu_dn);
-
- if (dn == cpu_dn)
- break;
- }
-
- if (cpu >= nr_cpu_ids) {
+ cpu = of_cpu_node_to_id(dn);
+ if (cpu < 0) {
pr_warn("failed to find logical CPU for %s\n", dn->name);
+ cpu = nr_cpu_ids;
}
of_node_put(dn);
diff --git a/drivers/perf/arm_spe_pmu.c b/drivers/perf/arm_spe_pmu.c
index 8ce262f..51b40ae 100644
--- a/drivers/perf/arm_spe_pmu.c
+++ b/drivers/perf/arm_spe_pmu.c
@@ -1164,6 +1164,15 @@ static int arm_spe_pmu_device_dt_probe(struct platform_device *pdev)
struct arm_spe_pmu *spe_pmu;
struct device *dev = &pdev->dev;
+ /*
+ * If kernelspace is unmapped when running at EL0, then the SPE
+ * buffer will fault and prematurely terminate the AUX session.
+ */
+ if (arm64_kernel_unmapped_at_el0()) {
+ dev_warn_once(dev, "profiling buffer inaccessible. Try passing \"kpti=off\" on the kernel command line\n");
+ return -EPERM;
+ }
+
spe_pmu = devm_kzalloc(dev, sizeof(*spe_pmu), GFP_KERNEL);
if (!spe_pmu) {
dev_err(dev, "failed to allocate spe_pmu\n");
diff --git a/include/linux/arm_sdei.h b/include/linux/arm_sdei.h
new file mode 100644
index 0000000..942afbd
--- /dev/null
+++ b/include/linux/arm_sdei.h
@@ -0,0 +1,79 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2017 Arm Ltd.
+#ifndef __LINUX_ARM_SDEI_H
+#define __LINUX_ARM_SDEI_H
+
+#include <uapi/linux/arm_sdei.h>
+
+enum sdei_conduit_types {
+ CONDUIT_INVALID = 0,
+ CONDUIT_SMC,
+ CONDUIT_HVC,
+};
+
+#include <asm/sdei.h>
+
+/* Arch code should override this to set the entry point from firmware... */
+#ifndef sdei_arch_get_entry_point
+#define sdei_arch_get_entry_point(conduit) (0)
+#endif
+
+/*
+ * When an event occurs sdei_event_handler() will call a user-provided callback
+ * like this in NMI context on the CPU that received the event.
+ */
+typedef int (sdei_event_callback)(u32 event, struct pt_regs *regs, void *arg);
+
+/*
+ * Register your callback to claim an event. The event must be described
+ * by firmware.
+ */
+int sdei_event_register(u32 event_num, sdei_event_callback *cb, void *arg);
+
+/*
+ * Calls to sdei_event_unregister() may return EINPROGRESS. Keep calling
+ * it until it succeeds.
+ */
+int sdei_event_unregister(u32 event_num);
+
+int sdei_event_enable(u32 event_num);
+int sdei_event_disable(u32 event_num);
+
+#ifdef CONFIG_ARM_SDE_INTERFACE
+/* For use by arch code when CPU hotplug notifiers are not appropriate. */
+int sdei_mask_local_cpu(void);
+int sdei_unmask_local_cpu(void);
+#else
+static inline int sdei_mask_local_cpu(void) { return 0; }
+static inline int sdei_unmask_local_cpu(void) { return 0; }
+#endif /* CONFIG_ARM_SDE_INTERFACE */
+
+
+/*
+ * This struct represents an event that has been registered. The driver
+ * maintains a list of all events, and which ones are registered. (Private
+ * events have one entry in the list, but are registered on each CPU).
+ * A pointer to this struct is passed to firmware, and back to the event
+ * handler. The event handler can then use this to invoke the registered
+ * callback, without having to walk the list.
+ *
+ * For CPU private events, this structure is per-cpu.
+ */
+struct sdei_registered_event {
+ /* For use by arch code: */
+ struct pt_regs interrupted_regs;
+
+ sdei_event_callback *callback;
+ void *callback_arg;
+ u32 event_num;
+ u8 priority;
+};
+
+/* The arch code entry point should then call this when an event arrives. */
+int notrace sdei_event_handler(struct pt_regs *regs,
+ struct sdei_registered_event *arg);
+
+/* arch code may use this to retrieve the extra registers. */
+int sdei_api_event_context(u32 query, u64 *result);
+
+#endif /* __LINUX_ARM_SDEI_H */
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 1a32e55..2c787c5 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -109,6 +109,7 @@ enum cpuhp_state {
CPUHP_AP_PERF_XTENSA_STARTING,
CPUHP_AP_PERF_METAG_STARTING,
CPUHP_AP_MIPS_OP_LOONGSON3_STARTING,
+ CPUHP_AP_ARM_SDEI_STARTING,
CPUHP_AP_ARM_VFP_STARTING,
CPUHP_AP_ARM64_DEBUG_MONITORS_STARTING,
CPUHP_AP_PERF_ARM_HW_BREAKPOINT_STARTING,
diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index 8f7788d..871f9e2 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -257,22 +257,30 @@ static inline int cpuidle_register_governor(struct cpuidle_governor *gov)
{return 0;}
#endif
-#define CPU_PM_CPU_IDLE_ENTER(low_level_idle_enter, idx) \
-({ \
- int __ret; \
- \
- if (!idx) { \
- cpu_do_idle(); \
- return idx; \
- } \
- \
- __ret = cpu_pm_enter(); \
- if (!__ret) { \
- __ret = low_level_idle_enter(idx); \
- cpu_pm_exit(); \
- } \
- \
- __ret ? -1 : idx; \
+#define __CPU_PM_CPU_IDLE_ENTER(low_level_idle_enter, idx, is_retention) \
+({ \
+ int __ret = 0; \
+ \
+ if (!idx) { \
+ cpu_do_idle(); \
+ return idx; \
+ } \
+ \
+ if (!is_retention) \
+ __ret = cpu_pm_enter(); \
+ if (!__ret) { \
+ __ret = low_level_idle_enter(idx); \
+ if (!is_retention) \
+ cpu_pm_exit(); \
+ } \
+ \
+ __ret ? -1 : idx; \
})
+#define CPU_PM_CPU_IDLE_ENTER(low_level_idle_enter, idx) \
+ __CPU_PM_CPU_IDLE_ENTER(low_level_idle_enter, idx, 0)
+
+#define CPU_PM_CPU_IDLE_ENTER_RETENTION(low_level_idle_enter, idx) \
+ __CPU_PM_CPU_IDLE_ENTER(low_level_idle_enter, idx, 1)
+
#endif /* _LINUX_CPUIDLE_H */
diff --git a/include/linux/of.h b/include/linux/of.h
index d3dea1d..173102d 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -544,6 +544,8 @@ const char *of_prop_next_string(struct property *prop, const char *cur);
bool of_console_check(struct device_node *dn, char *name, int index);
+extern int of_cpu_node_to_id(struct device_node *np);
+
#else /* CONFIG_OF */
static inline void of_core_init(void)
@@ -916,6 +918,11 @@ static inline void of_property_clear_flag(struct property *p, unsigned long flag
{
}
+static inline int of_cpu_node_to_id(struct device_node *np)
+{
+ return -ENODEV;
+}
+
#define of_match_ptr(_ptr) NULL
#define of_match_node(_matches, _node) NULL
#endif /* CONFIG_OF */
diff --git a/include/linux/psci.h b/include/linux/psci.h
index bdea1cb..f724fd8 100644
--- a/include/linux/psci.h
+++ b/include/linux/psci.h
@@ -26,6 +26,7 @@ int psci_cpu_init_idle(unsigned int cpu);
int psci_cpu_suspend_enter(unsigned long index);
struct psci_operations {
+ u32 (*get_version)(void);
int (*cpu_suspend)(u32 state, unsigned long entry_point);
int (*cpu_off)(u32 state);
int (*cpu_on)(unsigned long cpuid, unsigned long entry_point);
@@ -46,10 +47,11 @@ static inline int psci_dt_init(void) { return 0; }
#if defined(CONFIG_ARM_PSCI_FW) && defined(CONFIG_ACPI)
int __init psci_acpi_init(void);
bool __init acpi_psci_present(void);
-bool __init acpi_psci_use_hvc(void);
+bool acpi_psci_use_hvc(void);
#else
static inline int psci_acpi_init(void) { return 0; }
static inline bool acpi_psci_present(void) { return false; }
+static inline bool acpi_psci_use_hvc(void) {return false; }
#endif
#endif /* __LINUX_PSCI_H */
diff --git a/include/uapi/linux/arm_sdei.h b/include/uapi/linux/arm_sdei.h
new file mode 100644
index 0000000..af0630b
--- /dev/null
+++ b/include/uapi/linux/arm_sdei.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/* Copyright (C) 2017 Arm Ltd. */
+#ifndef _UAPI_LINUX_ARM_SDEI_H
+#define _UAPI_LINUX_ARM_SDEI_H
+
+#define SDEI_1_0_FN_BASE 0xC4000020
+#define SDEI_1_0_MASK 0xFFFFFFE0
+#define SDEI_1_0_FN(n) (SDEI_1_0_FN_BASE + (n))
+
+#define SDEI_1_0_FN_SDEI_VERSION SDEI_1_0_FN(0x00)
+#define SDEI_1_0_FN_SDEI_EVENT_REGISTER SDEI_1_0_FN(0x01)
+#define SDEI_1_0_FN_SDEI_EVENT_ENABLE SDEI_1_0_FN(0x02)
+#define SDEI_1_0_FN_SDEI_EVENT_DISABLE SDEI_1_0_FN(0x03)
+#define SDEI_1_0_FN_SDEI_EVENT_CONTEXT SDEI_1_0_FN(0x04)
+#define SDEI_1_0_FN_SDEI_EVENT_COMPLETE SDEI_1_0_FN(0x05)
+#define SDEI_1_0_FN_SDEI_EVENT_COMPLETE_AND_RESUME SDEI_1_0_FN(0x06)
+#define SDEI_1_0_FN_SDEI_EVENT_UNREGISTER SDEI_1_0_FN(0x07)
+#define SDEI_1_0_FN_SDEI_EVENT_STATUS SDEI_1_0_FN(0x08)
+#define SDEI_1_0_FN_SDEI_EVENT_GET_INFO SDEI_1_0_FN(0x09)
+#define SDEI_1_0_FN_SDEI_EVENT_ROUTING_SET SDEI_1_0_FN(0x0A)
+#define SDEI_1_0_FN_SDEI_PE_MASK SDEI_1_0_FN(0x0B)
+#define SDEI_1_0_FN_SDEI_PE_UNMASK SDEI_1_0_FN(0x0C)
+#define SDEI_1_0_FN_SDEI_INTERRUPT_BIND SDEI_1_0_FN(0x0D)
+#define SDEI_1_0_FN_SDEI_INTERRUPT_RELEASE SDEI_1_0_FN(0x0E)
+#define SDEI_1_0_FN_SDEI_PRIVATE_RESET SDEI_1_0_FN(0x11)
+#define SDEI_1_0_FN_SDEI_SHARED_RESET SDEI_1_0_FN(0x12)
+
+#define SDEI_VERSION_MAJOR_SHIFT 48
+#define SDEI_VERSION_MAJOR_MASK 0x7fff
+#define SDEI_VERSION_MINOR_SHIFT 32
+#define SDEI_VERSION_MINOR_MASK 0xffff
+#define SDEI_VERSION_VENDOR_SHIFT 0
+#define SDEI_VERSION_VENDOR_MASK 0xffffffff
+
+#define SDEI_VERSION_MAJOR(x) (x>>SDEI_VERSION_MAJOR_SHIFT & SDEI_VERSION_MAJOR_MASK)
+#define SDEI_VERSION_MINOR(x) (x>>SDEI_VERSION_MINOR_SHIFT & SDEI_VERSION_MINOR_MASK)
+#define SDEI_VERSION_VENDOR(x) (x>>SDEI_VERSION_VENDOR_SHIFT & SDEI_VERSION_VENDOR_MASK)
+
+/* SDEI return values */
+#define SDEI_SUCCESS 0
+#define SDEI_NOT_SUPPORTED -1
+#define SDEI_INVALID_PARAMETERS -2
+#define SDEI_DENIED -3
+#define SDEI_PENDING -5
+#define SDEI_OUT_OF_RESOURCE -10
+
+/* EVENT_REGISTER flags */
+#define SDEI_EVENT_REGISTER_RM_ANY 0
+#define SDEI_EVENT_REGISTER_RM_PE 1
+
+/* EVENT_STATUS return value bits */
+#define SDEI_EVENT_STATUS_RUNNING 2
+#define SDEI_EVENT_STATUS_ENABLED 1
+#define SDEI_EVENT_STATUS_REGISTERED 0
+
+/* EVENT_COMPLETE status values */
+#define SDEI_EV_HANDLED 0
+#define SDEI_EV_FAILED 1
+
+/* GET_INFO values */
+#define SDEI_EVENT_INFO_EV_TYPE 0
+#define SDEI_EVENT_INFO_EV_SIGNALED 1
+#define SDEI_EVENT_INFO_EV_PRIORITY 2
+#define SDEI_EVENT_INFO_EV_ROUTING_MODE 3
+#define SDEI_EVENT_INFO_EV_ROUTING_AFF 4
+
+/* and their results */
+#define SDEI_EVENT_TYPE_PRIVATE 0
+#define SDEI_EVENT_TYPE_SHARED 1
+#define SDEI_EVENT_PRIORITY_NORMAL 0
+#define SDEI_EVENT_PRIORITY_CRITICAL 1
+
+#endif /* _UAPI_LINUX_ARM_SDEI_H */
diff --git a/kernel/events/core.c b/kernel/events/core.c
index d99fe3f..02f7d6e 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4913,6 +4913,7 @@ void perf_event_update_userpage(struct perf_event *event)
unlock:
rcu_read_unlock();
}
+EXPORT_SYMBOL_GPL(perf_event_update_userpage);
static int perf_mmap_fault(struct vm_fault *vmf)
{
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index 2e43f9d..08464b2 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -53,8 +53,8 @@
__asm__(".arch_extension virt");
#endif
+DEFINE_PER_CPU(kvm_cpu_context_t, kvm_host_cpu_state);
static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page);
-static kvm_cpu_context_t __percpu *kvm_host_cpu_state;
/* Per-CPU variable containing the currently running vcpu. */
static DEFINE_PER_CPU(struct kvm_vcpu *, kvm_arm_running_vcpu);
@@ -354,7 +354,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
}
vcpu->cpu = cpu;
- vcpu->arch.host_cpu_context = this_cpu_ptr(kvm_host_cpu_state);
+ vcpu->arch.host_cpu_context = this_cpu_ptr(&kvm_host_cpu_state);
kvm_arm_set_running_vcpu(vcpu);
kvm_vgic_load(vcpu);
@@ -509,7 +509,7 @@ static void update_vttbr(struct kvm *kvm)
pgd_phys = virt_to_phys(kvm->arch.pgd);
BUG_ON(pgd_phys & ~VTTBR_BADDR_MASK);
vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK(kvm_vmid_bits);
- kvm->arch.vttbr = pgd_phys | vmid;
+ kvm->arch.vttbr = kvm_phys_to_vttbr(pgd_phys) | vmid;
spin_unlock(&kvm_vmid_lock);
}
@@ -704,9 +704,13 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
*/
trace_kvm_entry(*vcpu_pc(vcpu));
guest_enter_irqoff();
+ if (has_vhe())
+ kvm_arm_vhe_guest_enter();
ret = kvm_call_hyp(__kvm_vcpu_run, vcpu);
+ if (has_vhe())
+ kvm_arm_vhe_guest_exit();
vcpu->mode = OUTSIDE_GUEST_MODE;
vcpu->stat.exits++;
/*
@@ -759,6 +763,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
guest_exit();
trace_kvm_exit(ret, kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu));
+ /* Exit types that need handling before we can be preempted */
+ handle_exit_early(vcpu, run, ret);
+
preempt_enable();
ret = handle_exit(vcpu, run, ret);
@@ -1158,7 +1165,7 @@ static void cpu_init_hyp_mode(void *dummy)
pgd_ptr = kvm_mmu_get_httbr();
stack_page = __this_cpu_read(kvm_arm_hyp_stack_page);
hyp_stack_ptr = stack_page + PAGE_SIZE;
- vector_ptr = (unsigned long)kvm_ksym_ref(__kvm_hyp_vector);
+ vector_ptr = (unsigned long)kvm_get_hyp_vector();
__cpu_init_hyp_mode(pgd_ptr, hyp_stack_ptr, vector_ptr);
__cpu_init_stage2();
@@ -1272,19 +1279,8 @@ static inline void hyp_cpu_pm_exit(void)
}
#endif
-static void teardown_common_resources(void)
-{
- free_percpu(kvm_host_cpu_state);
-}
-
static int init_common_resources(void)
{
- kvm_host_cpu_state = alloc_percpu(kvm_cpu_context_t);
- if (!kvm_host_cpu_state) {
- kvm_err("Cannot allocate host CPU state\n");
- return -ENOMEM;
- }
-
/* set size of VMID supported by CPU */
kvm_vmid_bits = kvm_get_vmid_bits();
kvm_info("%d-bit VMID\n", kvm_vmid_bits);
@@ -1403,6 +1399,12 @@ static int init_hyp_mode(void)
goto out_err;
}
+ err = kvm_map_vectors();
+ if (err) {
+ kvm_err("Cannot map vectors\n");
+ goto out_err;
+ }
+
/*
* Map the Hyp stack pages
*/
@@ -1420,7 +1422,7 @@ static int init_hyp_mode(void)
for_each_possible_cpu(cpu) {
kvm_cpu_context_t *cpu_ctxt;
- cpu_ctxt = per_cpu_ptr(kvm_host_cpu_state, cpu);
+ cpu_ctxt = per_cpu_ptr(&kvm_host_cpu_state, cpu);
err = create_hyp_mappings(cpu_ctxt, cpu_ctxt + 1, PAGE_HYP);
if (err) {
@@ -1544,7 +1546,6 @@ int kvm_arch_init(void *opaque)
if (!in_hyp_mode)
teardown_hyp_mode();
out_err:
- teardown_common_resources();
return err;
}
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index 9dea963..f8eaf86 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -621,7 +621,7 @@ static int create_hyp_pud_mappings(pgd_t *pgd, unsigned long start,
return 0;
}
-static int __create_hyp_mappings(pgd_t *pgdp,
+static int __create_hyp_mappings(pgd_t *pgdp, unsigned long ptrs_per_pgd,
unsigned long start, unsigned long end,
unsigned long pfn, pgprot_t prot)
{
@@ -634,7 +634,7 @@ static int __create_hyp_mappings(pgd_t *pgdp,
addr = start & PAGE_MASK;
end = PAGE_ALIGN(end);
do {
- pgd = pgdp + pgd_index(addr);
+ pgd = pgdp + ((addr >> PGDIR_SHIFT) & (ptrs_per_pgd - 1));
if (pgd_none(*pgd)) {
pud = pud_alloc_one(NULL, addr);
@@ -697,8 +697,8 @@ int create_hyp_mappings(void *from, void *to, pgprot_t prot)
int err;
phys_addr = kvm_kaddr_to_phys(from + virt_addr - start);
- err = __create_hyp_mappings(hyp_pgd, virt_addr,
- virt_addr + PAGE_SIZE,
+ err = __create_hyp_mappings(hyp_pgd, PTRS_PER_PGD,
+ virt_addr, virt_addr + PAGE_SIZE,
__phys_to_pfn(phys_addr),
prot);
if (err)
@@ -729,7 +729,7 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr)
if (!is_vmalloc_addr(from) || !is_vmalloc_addr(to - 1))
return -EINVAL;
- return __create_hyp_mappings(hyp_pgd, start, end,
+ return __create_hyp_mappings(hyp_pgd, PTRS_PER_PGD, start, end,
__phys_to_pfn(phys_addr), PAGE_HYP_DEVICE);
}
@@ -1735,7 +1735,7 @@ static int kvm_map_idmap_text(pgd_t *pgd)
int err;
/* Create the idmap in the boot page tables */
- err = __create_hyp_mappings(pgd,
+ err = __create_hyp_mappings(pgd, __kvm_idmap_ptrs_per_pgd(),
hyp_idmap_start, hyp_idmap_end,
__phys_to_pfn(hyp_idmap_start),
PAGE_HYP_EXEC);