Merge branch 'next' of https://git.kernel.org/pub/scm/linux/kernel/git/scottwood/linux into next
Freescale updates from Scott:
"Highlights include elimination of legacy clock bindings use from dts
files, an 83xx watchdog handler, fixes to old dts interrupt errors, and
some minor cleanup."
diff --git a/Documentation/powerpc/firmware-assisted-dump.txt b/Documentation/powerpc/firmware-assisted-dump.txt
index bdd344a..18c5fee 100644
--- a/Documentation/powerpc/firmware-assisted-dump.txt
+++ b/Documentation/powerpc/firmware-assisted-dump.txt
@@ -113,7 +113,15 @@
size (see Fig. 1). This area is *not* released: this region will
be kept permanently reserved, so that it can act as a receptacle
for a copy of the boot memory content in addition to CPU state
-and HPTE region, in the case a crash does occur.
+and HPTE region, in the case a crash does occur. Since this reserved
+memory area is used only after the system crash, there is no point in
+blocking this significant chunk of memory from production kernel.
+Hence, the implementation uses the Linux kernel's Contiguous Memory
+Allocator (CMA) for memory reservation if CMA is configured for kernel.
+With CMA reservation this memory will be available for applications to
+use it, while kernel is prevented from using it. With this fadump will
+still be able to capture all of the kernel memory and most of the user
+space memory except the user pages that were present in CMA region.
o Memory Reservation during first kernel
@@ -162,6 +170,9 @@
1. Set config option CONFIG_FA_DUMP=y and build kernel.
2. Boot into linux kernel with 'fadump=on' kernel cmdline option.
+ By default, fadump reserved memory will be initialized as CMA area.
+ Alternatively, user can boot linux kernel with 'fadump=nocma' to
+ prevent fadump to use CMA.
3. Optionally, user can also set 'crashkernel=' kernel cmdline
to specify size of the memory to reserve for boot memory dump
preservation.
@@ -172,6 +183,10 @@
2. If firmware-assisted dump fails to reserve memory then it
will fallback to existing kdump mechanism if 'crashkernel='
option is set at kernel cmdline.
+ 3. if user wants to capture all of user space memory and ok with
+ reserved memory not available to production system, then
+ 'fadump=nocma' kernel parameter can be used to fallback to
+ old behaviour.
Sysfs/debugfs files:
------------
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 6d6e1ff..a1e858e 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -1143,7 +1143,7 @@
config PIN_TLB_IMMR
bool "Pinned TLB for IMMR"
- depends on PIN_TLB
+ depends on PIN_TLB || PPC_EARLY_DEBUG_CPM
default y
config PIN_TLB_TEXT
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index fa28e84..4c4e0eb 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -417,6 +417,9 @@
archprepare: checkbin
+archheaders:
+ $(Q)$(MAKE) $(build)=arch/powerpc/kernel/syscalls all
+
ifdef CONFIG_STACKPROTECTOR
prepare: stack_protector_prepare
diff --git a/arch/powerpc/boot/serial.c b/arch/powerpc/boot/serial.c
index f045f84..b0491b8 100644
--- a/arch/powerpc/boot/serial.c
+++ b/arch/powerpc/boot/serial.c
@@ -93,7 +93,8 @@ static void *serial_get_stdout_devp(void)
if (devp == NULL)
goto err_out;
- if (getprop(devp, "linux,stdout-path", path, MAX_PATH_LEN) > 0) {
+ if (getprop(devp, "linux,stdout-path", path, MAX_PATH_LEN) > 0 ||
+ getprop(devp, "stdout-path", path, MAX_PATH_LEN) > 0) {
devp = finddevice(path);
if (devp == NULL)
goto err_out;
diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild
index 3196d22..77ff7fb 100644
--- a/arch/powerpc/include/asm/Kbuild
+++ b/arch/powerpc/include/asm/Kbuild
@@ -1,3 +1,7 @@
+generated-y += syscall_table_32.h
+generated-y += syscall_table_64.h
+generated-y += syscall_table_c32.h
+generated-y += syscall_table_spu.h
generic-y += div64.h
generic-y += export.h
generic-y += irq_regs.h
diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h
index 15bc16b..cf5ba52 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-4k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h
@@ -1,11 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_POWERPC_BOOK3S_64_HASH_4K_H
#define _ASM_POWERPC_BOOK3S_64_HASH_4K_H
-/*
- * Entries per page directory level. The PTE level must use a 64b record
- * for each page table entry. The PMD and PGD level use a 32b record for
- * each entry by assuming that each entry is page aligned.
- */
+
#define H_PTE_INDEX_SIZE 9
#define H_PMD_INDEX_SIZE 7
#define H_PUD_INDEX_SIZE 9
diff --git a/arch/powerpc/include/asm/fadump.h b/arch/powerpc/include/asm/fadump.h
index 1e7a335..188776b 100644
--- a/arch/powerpc/include/asm/fadump.h
+++ b/arch/powerpc/include/asm/fadump.h
@@ -48,6 +48,10 @@
#define memblock_num_regions(memblock_type) (memblock.memblock_type.cnt)
+/* Alignement per CMA requirement. */
+#define FADUMP_CMA_ALIGNMENT (PAGE_SIZE << \
+ max_t(unsigned long, MAX_ORDER - 1, pageblock_order))
+
/* Firmware provided dump sections */
#define FADUMP_CPU_STATE_DATA 0x0001
#define FADUMP_HPTE_REGION 0x0002
@@ -141,6 +145,7 @@ struct fw_dump {
unsigned long fadump_supported:1;
unsigned long dump_active:1;
unsigned long dump_registered:1;
+ unsigned long nocma:1;
};
/*
@@ -200,7 +205,7 @@ struct fad_crash_memory_ranges {
unsigned long long size;
};
-extern int is_fadump_boot_memory_area(u64 addr, ulong size);
+extern int is_fadump_memory_area(u64 addr, ulong size);
extern int early_init_dt_scan_fw_dump(unsigned long node,
const char *uname, int depth, void *data);
extern int fadump_reserve_mem(void);
diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
index 35db0cb..e847ff6 100644
--- a/arch/powerpc/include/asm/iommu.h
+++ b/arch/powerpc/include/asm/iommu.h
@@ -215,11 +215,12 @@ struct iommu_table_group {
extern void iommu_register_group(struct iommu_table_group *table_group,
int pci_domain_number, unsigned long pe_num);
-extern int iommu_add_device(struct device *dev);
+extern int iommu_add_device(struct iommu_table_group *table_group,
+ struct device *dev);
extern void iommu_del_device(struct device *dev);
-extern int __init tce_iommu_bus_notifier_init(void);
-extern long iommu_tce_xchg(struct iommu_table *tbl, unsigned long entry,
- unsigned long *hpa, enum dma_data_direction *direction);
+extern long iommu_tce_xchg(struct mm_struct *mm, struct iommu_table *tbl,
+ unsigned long entry, unsigned long *hpa,
+ enum dma_data_direction *direction);
#else
static inline void iommu_register_group(struct iommu_table_group *table_group,
int pci_domain_number,
@@ -227,7 +228,8 @@ static inline void iommu_register_group(struct iommu_table_group *table_group,
{
}
-static inline int iommu_add_device(struct device *dev)
+static inline int iommu_add_device(struct iommu_table_group *table_group,
+ struct device *dev)
{
return 0;
}
@@ -235,11 +237,6 @@ static inline int iommu_add_device(struct device *dev)
static inline void iommu_del_device(struct device *dev)
{
}
-
-static inline int __init tce_iommu_bus_notifier_init(void)
-{
- return 0;
-}
#endif /* !CONFIG_IOMMU_API */
int dma_iommu_mapping_error(struct device *dev, dma_addr_t dma_addr);
diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index c05efd2..6ee8195 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -21,9 +21,12 @@ struct mm_iommu_table_group_mem_t;
extern int isolate_lru_page(struct page *page); /* from internal.h */
extern bool mm_iommu_preregistered(struct mm_struct *mm);
-extern long mm_iommu_get(struct mm_struct *mm,
+extern long mm_iommu_new(struct mm_struct *mm,
unsigned long ua, unsigned long entries,
struct mm_iommu_table_group_mem_t **pmem);
+extern long mm_iommu_newdev(struct mm_struct *mm, unsigned long ua,
+ unsigned long entries, unsigned long dev_hpa,
+ struct mm_iommu_table_group_mem_t **pmem);
extern long mm_iommu_put(struct mm_struct *mm,
struct mm_iommu_table_group_mem_t *mem);
extern void mm_iommu_init(struct mm_struct *mm);
@@ -32,15 +35,23 @@ extern struct mm_iommu_table_group_mem_t *mm_iommu_lookup(struct mm_struct *mm,
unsigned long ua, unsigned long size);
extern struct mm_iommu_table_group_mem_t *mm_iommu_lookup_rm(
struct mm_struct *mm, unsigned long ua, unsigned long size);
-extern struct mm_iommu_table_group_mem_t *mm_iommu_find(struct mm_struct *mm,
+extern struct mm_iommu_table_group_mem_t *mm_iommu_get(struct mm_struct *mm,
unsigned long ua, unsigned long entries);
extern long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
unsigned long ua, unsigned int pageshift, unsigned long *hpa);
extern long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem,
unsigned long ua, unsigned int pageshift, unsigned long *hpa);
extern void mm_iommu_ua_mark_dirty_rm(struct mm_struct *mm, unsigned long ua);
+extern bool mm_iommu_is_devmem(struct mm_struct *mm, unsigned long hpa,
+ unsigned int pageshift, unsigned long *size);
extern long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem);
extern void mm_iommu_mapped_dec(struct mm_iommu_table_group_mem_t *mem);
+#else
+static inline bool mm_iommu_is_devmem(struct mm_struct *mm, unsigned long hpa,
+ unsigned int pageshift, unsigned long *size)
+{
+ return false;
+}
#endif
extern void switch_slb(struct task_struct *tsk, struct mm_struct *mm);
extern void set_context(unsigned long id, pgd_t *pgd);
@@ -217,12 +228,6 @@ static inline void enter_lazy_tlb(struct mm_struct *mm,
#endif
}
-static inline int arch_dup_mmap(struct mm_struct *oldmm,
- struct mm_struct *mm)
-{
- return 0;
-}
-
#ifdef CONFIG_PPC_BOOK3E_64
static inline void arch_exit_mmap(struct mm_struct *mm)
{
@@ -247,6 +252,7 @@ static inline void arch_bprm_mm_init(struct mm_struct *mm,
#ifdef CONFIG_PPC_MEM_KEYS
bool arch_vma_access_permitted(struct vm_area_struct *vma, bool write,
bool execute, bool foreign);
+void arch_dup_pkeys(struct mm_struct *oldmm, struct mm_struct *mm);
#else /* CONFIG_PPC_MEM_KEYS */
static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
bool write, bool execute, bool foreign)
@@ -259,6 +265,7 @@ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
#define thread_pkey_regs_save(thread)
#define thread_pkey_regs_restore(new_thread, old_thread)
#define thread_pkey_regs_init(thread)
+#define arch_dup_pkeys(oldmm, mm)
static inline u64 pte_to_hpte_pkey_bits(u64 pteflags)
{
@@ -267,5 +274,12 @@ static inline u64 pte_to_hpte_pkey_bits(u64 pteflags)
#endif /* CONFIG_PPC_MEM_KEYS */
+static inline int arch_dup_mmap(struct mm_struct *oldmm,
+ struct mm_struct *mm)
+{
+ arch_dup_pkeys(oldmm, mm);
+ return 0;
+}
+
#endif /* __KERNEL__ */
#endif /* __ASM_POWERPC_MMU_CONTEXT_H */
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index ff38664..a55b01c 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -347,6 +347,7 @@ extern int opal_async_comp_init(void);
extern int opal_sensor_init(void);
extern int opal_hmi_handler_init(void);
extern int opal_event_init(void);
+int opal_power_control_init(void);
extern int opal_machine_check(struct pt_regs *regs);
extern bool opal_mce_check_early_recovery(struct pt_regs *regs);
diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h
index 94d4490..aee4fcc 100644
--- a/arch/powerpc/include/asm/pci-bridge.h
+++ b/arch/powerpc/include/asm/pci-bridge.h
@@ -129,6 +129,7 @@ struct pci_controller {
#endif /* CONFIG_PPC64 */
void *private_data;
+ struct npu *npu;
};
/* These are used for config access before all the PCI probing
diff --git a/arch/powerpc/include/asm/pci.h b/arch/powerpc/include/asm/pci.h
index 2af9ded..0c72f18 100644
--- a/arch/powerpc/include/asm/pci.h
+++ b/arch/powerpc/include/asm/pci.h
@@ -129,5 +129,9 @@ extern void pcibios_scan_phb(struct pci_controller *hose);
extern struct pci_dev *pnv_pci_get_gpu_dev(struct pci_dev *npdev);
extern struct pci_dev *pnv_pci_get_npu_dev(struct pci_dev *gpdev, int index);
+extern int pnv_npu2_init(struct pci_controller *hose);
+extern int pnv_npu2_map_lpar_dev(struct pci_dev *gpdev, unsigned int lparid,
+ unsigned long msr);
+extern int pnv_npu2_unmap_lpar_dev(struct pci_dev *gpdev);
#endif /* __ASM_POWERPC_PCI_H */
diff --git a/arch/powerpc/include/asm/syscall.h b/arch/powerpc/include/asm/syscall.h
index ab9f3f0..1a0e7a8 100644
--- a/arch/powerpc/include/asm/syscall.h
+++ b/arch/powerpc/include/asm/syscall.h
@@ -18,9 +18,8 @@
#include <linux/thread_info.h>
/* ftrace syscalls requires exporting the sys_call_table */
-#ifdef CONFIG_FTRACE_SYSCALLS
extern const unsigned long sys_call_table[];
-#endif /* CONFIG_FTRACE_SYSCALLS */
+extern const unsigned long compat_sys_call_table[];
static inline int syscall_get_nr(struct task_struct *task, struct pt_regs *regs)
{
diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h
deleted file mode 100644
index 01b5171..0000000
--- a/arch/powerpc/include/asm/systbl.h
+++ /dev/null
@@ -1,396 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * List of powerpc syscalls. For the meaning of the _SPU suffix see
- * arch/powerpc/platforms/cell/spu_callbacks.c
- */
-
-SYSCALL(restart_syscall)
-SYSCALL(exit)
-PPC_SYS(fork)
-SYSCALL_SPU(read)
-SYSCALL_SPU(write)
-COMPAT_SYS_SPU(open)
-SYSCALL_SPU(close)
-SYSCALL_SPU(waitpid)
-SYSCALL_SPU(creat)
-SYSCALL_SPU(link)
-SYSCALL_SPU(unlink)
-COMPAT_SYS(execve)
-SYSCALL_SPU(chdir)
-COMPAT_SYS_SPU(time)
-SYSCALL_SPU(mknod)
-SYSCALL_SPU(chmod)
-SYSCALL_SPU(lchown)
-SYSCALL(ni_syscall)
-OLDSYS(stat)
-COMPAT_SYS_SPU(lseek)
-SYSCALL_SPU(getpid)
-COMPAT_SYS(mount)
-SYSX(sys_ni_syscall,sys_oldumount,sys_oldumount)
-SYSCALL_SPU(setuid)
-SYSCALL_SPU(getuid)
-COMPAT_SYS_SPU(stime)
-COMPAT_SYS(ptrace)
-SYSCALL_SPU(alarm)
-OLDSYS(fstat)
-SYSCALL(pause)
-COMPAT_SYS(utime)
-SYSCALL(ni_syscall)
-SYSCALL(ni_syscall)
-SYSCALL_SPU(access)
-SYSCALL_SPU(nice)
-SYSCALL(ni_syscall)
-SYSCALL_SPU(sync)
-SYSCALL_SPU(kill)
-SYSCALL_SPU(rename)
-SYSCALL_SPU(mkdir)
-SYSCALL_SPU(rmdir)
-SYSCALL_SPU(dup)
-SYSCALL_SPU(pipe)
-COMPAT_SYS_SPU(times)
-SYSCALL(ni_syscall)
-SYSCALL_SPU(brk)
-SYSCALL_SPU(setgid)
-SYSCALL_SPU(getgid)
-SYSCALL(signal)
-SYSCALL_SPU(geteuid)
-SYSCALL_SPU(getegid)
-SYSCALL(acct)
-SYSCALL(umount)
-SYSCALL(ni_syscall)
-COMPAT_SYS_SPU(ioctl)
-COMPAT_SYS_SPU(fcntl)
-SYSCALL(ni_syscall)
-SYSCALL_SPU(setpgid)
-SYSCALL(ni_syscall)
-SYSX(sys_ni_syscall,sys_olduname,sys_olduname)
-SYSCALL_SPU(umask)
-SYSCALL_SPU(chroot)
-COMPAT_SYS(ustat)
-SYSCALL_SPU(dup2)
-SYSCALL_SPU(getppid)
-SYSCALL_SPU(getpgrp)
-SYSCALL_SPU(setsid)
-SYS32ONLY(sigaction)
-SYSCALL_SPU(sgetmask)
-SYSCALL_SPU(ssetmask)
-SYSCALL_SPU(setreuid)
-SYSCALL_SPU(setregid)
-#define compat_sys_sigsuspend sys_sigsuspend
-SYS32ONLY(sigsuspend)
-SYSX(sys_ni_syscall,compat_sys_sigpending,sys_sigpending)
-SYSCALL_SPU(sethostname)
-COMPAT_SYS_SPU(setrlimit)
-SYSX(sys_ni_syscall,compat_sys_old_getrlimit,sys_old_getrlimit)
-COMPAT_SYS_SPU(getrusage)
-COMPAT_SYS_SPU(gettimeofday)
-COMPAT_SYS_SPU(settimeofday)
-SYSCALL_SPU(getgroups)
-SYSCALL_SPU(setgroups)
-SYSX(sys_ni_syscall,sys_ni_syscall,ppc_select)
-SYSCALL_SPU(symlink)
-OLDSYS(lstat)
-SYSCALL_SPU(readlink)
-SYSCALL(uselib)
-SYSCALL(swapon)
-SYSCALL(reboot)
-SYSX(sys_ni_syscall,compat_sys_old_readdir,sys_old_readdir)
-SYSCALL_SPU(mmap)
-SYSCALL_SPU(munmap)
-COMPAT_SYS_SPU(truncate)
-COMPAT_SYS_SPU(ftruncate)
-SYSCALL_SPU(fchmod)
-SYSCALL_SPU(fchown)
-SYSCALL_SPU(getpriority)
-SYSCALL_SPU(setpriority)
-SYSCALL(ni_syscall)
-COMPAT_SYS(statfs)
-COMPAT_SYS(fstatfs)
-SYSCALL(ni_syscall)
-COMPAT_SYS_SPU(socketcall)
-SYSCALL_SPU(syslog)
-COMPAT_SYS_SPU(setitimer)
-COMPAT_SYS_SPU(getitimer)
-COMPAT_SYS_SPU(newstat)
-COMPAT_SYS_SPU(newlstat)
-COMPAT_SYS_SPU(newfstat)
-SYSX(sys_ni_syscall,sys_uname,sys_uname)
-SYSCALL(ni_syscall)
-SYSCALL_SPU(vhangup)
-SYSCALL(ni_syscall)
-SYSCALL(ni_syscall)
-COMPAT_SYS_SPU(wait4)
-SYSCALL(swapoff)
-COMPAT_SYS_SPU(sysinfo)
-COMPAT_SYS(ipc)
-SYSCALL_SPU(fsync)
-SYS32ONLY(sigreturn)
-PPC_SYS(clone)
-SYSCALL_SPU(setdomainname)
-SYSCALL_SPU(newuname)
-SYSCALL(ni_syscall)
-COMPAT_SYS_SPU(adjtimex)
-SYSCALL_SPU(mprotect)
-SYSX(sys_ni_syscall,compat_sys_sigprocmask,sys_sigprocmask)
-SYSCALL(ni_syscall)
-SYSCALL(init_module)
-SYSCALL(delete_module)
-SYSCALL(ni_syscall)
-SYSCALL(quotactl)
-SYSCALL_SPU(getpgid)
-SYSCALL_SPU(fchdir)
-SYSCALL_SPU(bdflush)
-SYSCALL_SPU(sysfs)
-SYSX_SPU(ppc64_personality,ppc64_personality,sys_personality)
-SYSCALL(ni_syscall)
-SYSCALL_SPU(setfsuid)
-SYSCALL_SPU(setfsgid)
-SYSCALL_SPU(llseek)
-COMPAT_SYS_SPU(getdents)
-COMPAT_SPU_NEW(select)
-SYSCALL_SPU(flock)
-SYSCALL_SPU(msync)
-COMPAT_SYS_SPU(readv)
-COMPAT_SYS_SPU(writev)
-SYSCALL_SPU(getsid)
-SYSCALL_SPU(fdatasync)
-COMPAT_SYS(sysctl)
-SYSCALL_SPU(mlock)
-SYSCALL_SPU(munlock)
-SYSCALL_SPU(mlockall)
-SYSCALL_SPU(munlockall)
-SYSCALL_SPU(sched_setparam)
-SYSCALL_SPU(sched_getparam)
-SYSCALL_SPU(sched_setscheduler)
-SYSCALL_SPU(sched_getscheduler)
-SYSCALL_SPU(sched_yield)
-SYSCALL_SPU(sched_get_priority_max)
-SYSCALL_SPU(sched_get_priority_min)
-COMPAT_SYS_SPU(sched_rr_get_interval)
-COMPAT_SYS_SPU(nanosleep)
-SYSCALL_SPU(mremap)
-SYSCALL_SPU(setresuid)
-SYSCALL_SPU(getresuid)
-SYSCALL(ni_syscall)
-SYSCALL_SPU(poll)
-SYSCALL(ni_syscall)
-SYSCALL_SPU(setresgid)
-SYSCALL_SPU(getresgid)
-SYSCALL_SPU(prctl)
-COMPAT_SYS(rt_sigreturn)
-COMPAT_SYS(rt_sigaction)
-COMPAT_SYS(rt_sigprocmask)
-COMPAT_SYS(rt_sigpending)
-COMPAT_SYS(rt_sigtimedwait)
-COMPAT_SYS(rt_sigqueueinfo)
-COMPAT_SYS(rt_sigsuspend)
-COMPAT_SYS_SPU(pread64)
-COMPAT_SYS_SPU(pwrite64)
-SYSCALL_SPU(chown)
-SYSCALL_SPU(getcwd)
-SYSCALL_SPU(capget)
-SYSCALL_SPU(capset)
-COMPAT_SYS(sigaltstack)
-SYSX_SPU(sys_sendfile64,compat_sys_sendfile,sys_sendfile)
-SYSCALL(ni_syscall)
-SYSCALL(ni_syscall)
-PPC_SYS(vfork)
-COMPAT_SYS_SPU(getrlimit)
-COMPAT_SYS_SPU(readahead)
-SYS32ONLY(mmap2)
-SYS32ONLY(truncate64)
-SYS32ONLY(ftruncate64)
-SYSX(sys_ni_syscall,sys_stat64,sys_stat64)
-SYSX(sys_ni_syscall,sys_lstat64,sys_lstat64)
-SYSX(sys_ni_syscall,sys_fstat64,sys_fstat64)
-SYSCALL(pciconfig_read)
-SYSCALL(pciconfig_write)
-SYSCALL(pciconfig_iobase)
-SYSCALL(ni_syscall)
-SYSCALL_SPU(getdents64)
-SYSCALL_SPU(pivot_root)
-SYSX(sys_ni_syscall,compat_sys_fcntl64,sys_fcntl64)
-SYSCALL_SPU(madvise)
-SYSCALL_SPU(mincore)
-SYSCALL_SPU(gettid)
-SYSCALL_SPU(tkill)
-SYSCALL_SPU(setxattr)
-SYSCALL_SPU(lsetxattr)
-SYSCALL_SPU(fsetxattr)
-SYSCALL_SPU(getxattr)
-SYSCALL_SPU(lgetxattr)
-SYSCALL_SPU(fgetxattr)
-SYSCALL_SPU(listxattr)
-SYSCALL_SPU(llistxattr)
-SYSCALL_SPU(flistxattr)
-SYSCALL_SPU(removexattr)
-SYSCALL_SPU(lremovexattr)
-SYSCALL_SPU(fremovexattr)
-COMPAT_SYS_SPU(futex)
-COMPAT_SYS_SPU(sched_setaffinity)
-COMPAT_SYS_SPU(sched_getaffinity)
-SYSCALL(ni_syscall)
-SYSCALL(ni_syscall)
-SYS32ONLY(sendfile64)
-COMPAT_SYS_SPU(io_setup)
-SYSCALL_SPU(io_destroy)
-COMPAT_SYS_SPU(io_getevents)
-COMPAT_SYS_SPU(io_submit)
-SYSCALL_SPU(io_cancel)
-SYSCALL(set_tid_address)
-SYSX_SPU(sys_fadvise64,ppc32_fadvise64,sys_fadvise64)
-SYSCALL(exit_group)
-COMPAT_SYS(lookup_dcookie)
-SYSCALL_SPU(epoll_create)
-SYSCALL_SPU(epoll_ctl)
-SYSCALL_SPU(epoll_wait)
-SYSCALL_SPU(remap_file_pages)
-COMPAT_SYS_SPU(timer_create)
-COMPAT_SYS_SPU(timer_settime)
-COMPAT_SYS_SPU(timer_gettime)
-SYSCALL_SPU(timer_getoverrun)
-SYSCALL_SPU(timer_delete)
-COMPAT_SYS_SPU(clock_settime)
-COMPAT_SYS_SPU(clock_gettime)
-COMPAT_SYS_SPU(clock_getres)
-COMPAT_SYS_SPU(clock_nanosleep)
-SYSX(ppc64_swapcontext,ppc32_swapcontext,ppc_swapcontext)
-SYSCALL_SPU(tgkill)
-COMPAT_SYS_SPU(utimes)
-COMPAT_SYS_SPU(statfs64)
-COMPAT_SYS_SPU(fstatfs64)
-SYSX(sys_ni_syscall,ppc_fadvise64_64,ppc_fadvise64_64)
-SYSCALL_SPU(rtas)
-OLDSYS(debug_setcontext)
-SYSCALL(ni_syscall)
-COMPAT_SYS(migrate_pages)
-COMPAT_SYS(mbind)
-COMPAT_SYS(get_mempolicy)
-COMPAT_SYS(set_mempolicy)
-COMPAT_SYS(mq_open)
-SYSCALL(mq_unlink)
-COMPAT_SYS(mq_timedsend)
-COMPAT_SYS(mq_timedreceive)
-COMPAT_SYS(mq_notify)
-COMPAT_SYS(mq_getsetattr)
-COMPAT_SYS(kexec_load)
-SYSCALL(add_key)
-SYSCALL(request_key)
-COMPAT_SYS(keyctl)
-COMPAT_SYS(waitid)
-SYSCALL(ioprio_set)
-SYSCALL(ioprio_get)
-SYSCALL(inotify_init)
-SYSCALL(inotify_add_watch)
-SYSCALL(inotify_rm_watch)
-SYSCALL(spu_run)
-SYSCALL(spu_create)
-COMPAT_SYS(pselect6)
-COMPAT_SYS(ppoll)
-SYSCALL_SPU(unshare)
-SYSCALL_SPU(splice)
-SYSCALL_SPU(tee)
-COMPAT_SYS_SPU(vmsplice)
-COMPAT_SYS_SPU(openat)
-SYSCALL_SPU(mkdirat)
-SYSCALL_SPU(mknodat)
-SYSCALL_SPU(fchownat)
-COMPAT_SYS_SPU(futimesat)
-SYSX_SPU(sys_newfstatat,sys_fstatat64,sys_fstatat64)
-SYSCALL_SPU(unlinkat)
-SYSCALL_SPU(renameat)
-SYSCALL_SPU(linkat)
-SYSCALL_SPU(symlinkat)
-SYSCALL_SPU(readlinkat)
-SYSCALL_SPU(fchmodat)
-SYSCALL_SPU(faccessat)
-COMPAT_SYS_SPU(get_robust_list)
-COMPAT_SYS_SPU(set_robust_list)
-COMPAT_SYS_SPU(move_pages)
-SYSCALL_SPU(getcpu)
-COMPAT_SYS(epoll_pwait)
-COMPAT_SYS_SPU(utimensat)
-COMPAT_SYS_SPU(signalfd)
-SYSCALL_SPU(timerfd_create)
-SYSCALL_SPU(eventfd)
-COMPAT_SYS_SPU(sync_file_range2)
-COMPAT_SYS(fallocate)
-SYSCALL(subpage_prot)
-COMPAT_SYS_SPU(timerfd_settime)
-COMPAT_SYS_SPU(timerfd_gettime)
-COMPAT_SYS_SPU(signalfd4)
-SYSCALL_SPU(eventfd2)
-SYSCALL_SPU(epoll_create1)
-SYSCALL_SPU(dup3)
-SYSCALL_SPU(pipe2)
-SYSCALL(inotify_init1)
-SYSCALL_SPU(perf_event_open)
-COMPAT_SYS_SPU(preadv)
-COMPAT_SYS_SPU(pwritev)
-COMPAT_SYS(rt_tgsigqueueinfo)
-SYSCALL(fanotify_init)
-COMPAT_SYS(fanotify_mark)
-SYSCALL_SPU(prlimit64)
-SYSCALL_SPU(socket)
-SYSCALL_SPU(bind)
-SYSCALL_SPU(connect)
-SYSCALL_SPU(listen)
-SYSCALL_SPU(accept)
-SYSCALL_SPU(getsockname)
-SYSCALL_SPU(getpeername)
-SYSCALL_SPU(socketpair)
-SYSCALL_SPU(send)
-SYSCALL_SPU(sendto)
-COMPAT_SYS_SPU(recv)
-COMPAT_SYS_SPU(recvfrom)
-SYSCALL_SPU(shutdown)
-COMPAT_SYS_SPU(setsockopt)
-COMPAT_SYS_SPU(getsockopt)
-COMPAT_SYS_SPU(sendmsg)
-COMPAT_SYS_SPU(recvmsg)
-COMPAT_SYS_SPU(recvmmsg)
-SYSCALL_SPU(accept4)
-SYSCALL_SPU(name_to_handle_at)
-COMPAT_SYS_SPU(open_by_handle_at)
-COMPAT_SYS_SPU(clock_adjtime)
-SYSCALL_SPU(syncfs)
-COMPAT_SYS_SPU(sendmmsg)
-SYSCALL_SPU(setns)
-COMPAT_SYS(process_vm_readv)
-COMPAT_SYS(process_vm_writev)
-SYSCALL(finit_module)
-SYSCALL(kcmp) /* sys_kcmp */
-SYSCALL_SPU(sched_setattr)
-SYSCALL_SPU(sched_getattr)
-SYSCALL_SPU(renameat2)
-SYSCALL_SPU(seccomp)
-SYSCALL_SPU(getrandom)
-SYSCALL_SPU(memfd_create)
-SYSCALL_SPU(bpf)
-COMPAT_SYS(execveat)
-PPC64ONLY(switch_endian)
-SYSCALL_SPU(userfaultfd)
-SYSCALL_SPU(membarrier)
-SYSCALL(ni_syscall)
-SYSCALL(ni_syscall)
-SYSCALL(ni_syscall)
-SYSCALL(ni_syscall)
-SYSCALL(ni_syscall)
-SYSCALL(ni_syscall)
-SYSCALL(ni_syscall)
-SYSCALL(ni_syscall)
-SYSCALL(ni_syscall)
-SYSCALL(ni_syscall)
-SYSCALL(ni_syscall)
-SYSCALL(ni_syscall)
-SYSCALL(mlock2)
-SYSCALL(copy_file_range)
-COMPAT_SYS_SPU(preadv2)
-COMPAT_SYS_SPU(pwritev2)
-SYSCALL(kexec_file_load)
-SYSCALL(statx)
-SYSCALL(pkey_alloc)
-SYSCALL(pkey_free)
-SYSCALL(pkey_mprotect)
-SYSCALL(rseq)
-COMPAT_SYS(io_pgetevents)
diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h
index b0de85b..a3c35e6 100644
--- a/arch/powerpc/include/asm/unistd.h
+++ b/arch/powerpc/include/asm/unistd.h
@@ -11,8 +11,7 @@
#include <uapi/asm/unistd.h>
-
-#define NR_syscalls 389
+#define NR_syscalls __NR_syscalls
#define __NR__exit __NR_exit
diff --git a/arch/powerpc/include/uapi/asm/Kbuild b/arch/powerpc/include/uapi/asm/Kbuild
index 3712152..8ab8ba1 100644
--- a/arch/powerpc/include/uapi/asm/Kbuild
+++ b/arch/powerpc/include/uapi/asm/Kbuild
@@ -1,6 +1,8 @@
# UAPI Header export list
include include/uapi/asm-generic/Kbuild.asm
+generated-y += unistd_32.h
+generated-y += unistd_64.h
generic-y += param.h
generic-y += poll.h
generic-y += resource.h
diff --git a/arch/powerpc/include/uapi/asm/unistd.h b/arch/powerpc/include/uapi/asm/unistd.h
index 985534d..5f84e3d 100644
--- a/arch/powerpc/include/uapi/asm/unistd.h
+++ b/arch/powerpc/include/uapi/asm/unistd.h
@@ -10,395 +10,10 @@
#ifndef _UAPI_ASM_POWERPC_UNISTD_H_
#define _UAPI_ASM_POWERPC_UNISTD_H_
-
-#define __NR_restart_syscall 0
-#define __NR_exit 1
-#define __NR_fork 2
-#define __NR_read 3
-#define __NR_write 4
-#define __NR_open 5
-#define __NR_close 6
-#define __NR_waitpid 7
-#define __NR_creat 8
-#define __NR_link 9
-#define __NR_unlink 10
-#define __NR_execve 11
-#define __NR_chdir 12
-#define __NR_time 13
-#define __NR_mknod 14
-#define __NR_chmod 15
-#define __NR_lchown 16
-#define __NR_break 17
-#define __NR_oldstat 18
-#define __NR_lseek 19
-#define __NR_getpid 20
-#define __NR_mount 21
-#define __NR_umount 22
-#define __NR_setuid 23
-#define __NR_getuid 24
-#define __NR_stime 25
-#define __NR_ptrace 26
-#define __NR_alarm 27
-#define __NR_oldfstat 28
-#define __NR_pause 29
-#define __NR_utime 30
-#define __NR_stty 31
-#define __NR_gtty 32
-#define __NR_access 33
-#define __NR_nice 34
-#define __NR_ftime 35
-#define __NR_sync 36
-#define __NR_kill 37
-#define __NR_rename 38
-#define __NR_mkdir 39
-#define __NR_rmdir 40
-#define __NR_dup 41
-#define __NR_pipe 42
-#define __NR_times 43
-#define __NR_prof 44
-#define __NR_brk 45
-#define __NR_setgid 46
-#define __NR_getgid 47
-#define __NR_signal 48
-#define __NR_geteuid 49
-#define __NR_getegid 50
-#define __NR_acct 51
-#define __NR_umount2 52
-#define __NR_lock 53
-#define __NR_ioctl 54
-#define __NR_fcntl 55
-#define __NR_mpx 56
-#define __NR_setpgid 57
-#define __NR_ulimit 58
-#define __NR_oldolduname 59
-#define __NR_umask 60
-#define __NR_chroot 61
-#define __NR_ustat 62
-#define __NR_dup2 63
-#define __NR_getppid 64
-#define __NR_getpgrp 65
-#define __NR_setsid 66
-#define __NR_sigaction 67
-#define __NR_sgetmask 68
-#define __NR_ssetmask 69
-#define __NR_setreuid 70
-#define __NR_setregid 71
-#define __NR_sigsuspend 72
-#define __NR_sigpending 73
-#define __NR_sethostname 74
-#define __NR_setrlimit 75
-#define __NR_getrlimit 76
-#define __NR_getrusage 77
-#define __NR_gettimeofday 78
-#define __NR_settimeofday 79
-#define __NR_getgroups 80
-#define __NR_setgroups 81
-#define __NR_select 82
-#define __NR_symlink 83
-#define __NR_oldlstat 84
-#define __NR_readlink 85
-#define __NR_uselib 86
-#define __NR_swapon 87
-#define __NR_reboot 88
-#define __NR_readdir 89
-#define __NR_mmap 90
-#define __NR_munmap 91
-#define __NR_truncate 92
-#define __NR_ftruncate 93
-#define __NR_fchmod 94
-#define __NR_fchown 95
-#define __NR_getpriority 96
-#define __NR_setpriority 97
-#define __NR_profil 98
-#define __NR_statfs 99
-#define __NR_fstatfs 100
-#define __NR_ioperm 101
-#define __NR_socketcall 102
-#define __NR_syslog 103
-#define __NR_setitimer 104
-#define __NR_getitimer 105
-#define __NR_stat 106
-#define __NR_lstat 107
-#define __NR_fstat 108
-#define __NR_olduname 109
-#define __NR_iopl 110
-#define __NR_vhangup 111
-#define __NR_idle 112
-#define __NR_vm86 113
-#define __NR_wait4 114
-#define __NR_swapoff 115
-#define __NR_sysinfo 116
-#define __NR_ipc 117
-#define __NR_fsync 118
-#define __NR_sigreturn 119
-#define __NR_clone 120
-#define __NR_setdomainname 121
-#define __NR_uname 122
-#define __NR_modify_ldt 123
-#define __NR_adjtimex 124
-#define __NR_mprotect 125
-#define __NR_sigprocmask 126
-#define __NR_create_module 127
-#define __NR_init_module 128
-#define __NR_delete_module 129
-#define __NR_get_kernel_syms 130
-#define __NR_quotactl 131
-#define __NR_getpgid 132
-#define __NR_fchdir 133
-#define __NR_bdflush 134
-#define __NR_sysfs 135
-#define __NR_personality 136
-#define __NR_afs_syscall 137 /* Syscall for Andrew File System */
-#define __NR_setfsuid 138
-#define __NR_setfsgid 139
-#define __NR__llseek 140
-#define __NR_getdents 141
-#define __NR__newselect 142
-#define __NR_flock 143
-#define __NR_msync 144
-#define __NR_readv 145
-#define __NR_writev 146
-#define __NR_getsid 147
-#define __NR_fdatasync 148
-#define __NR__sysctl 149
-#define __NR_mlock 150
-#define __NR_munlock 151
-#define __NR_mlockall 152
-#define __NR_munlockall 153
-#define __NR_sched_setparam 154
-#define __NR_sched_getparam 155
-#define __NR_sched_setscheduler 156
-#define __NR_sched_getscheduler 157
-#define __NR_sched_yield 158
-#define __NR_sched_get_priority_max 159
-#define __NR_sched_get_priority_min 160
-#define __NR_sched_rr_get_interval 161
-#define __NR_nanosleep 162
-#define __NR_mremap 163
-#define __NR_setresuid 164
-#define __NR_getresuid 165
-#define __NR_query_module 166
-#define __NR_poll 167
-#define __NR_nfsservctl 168
-#define __NR_setresgid 169
-#define __NR_getresgid 170
-#define __NR_prctl 171
-#define __NR_rt_sigreturn 172
-#define __NR_rt_sigaction 173
-#define __NR_rt_sigprocmask 174
-#define __NR_rt_sigpending 175
-#define __NR_rt_sigtimedwait 176
-#define __NR_rt_sigqueueinfo 177
-#define __NR_rt_sigsuspend 178
-#define __NR_pread64 179
-#define __NR_pwrite64 180
-#define __NR_chown 181
-#define __NR_getcwd 182
-#define __NR_capget 183
-#define __NR_capset 184
-#define __NR_sigaltstack 185
-#define __NR_sendfile 186
-#define __NR_getpmsg 187 /* some people actually want streams */
-#define __NR_putpmsg 188 /* some people actually want streams */
-#define __NR_vfork 189
-#define __NR_ugetrlimit 190 /* SuS compliant getrlimit */
-#define __NR_readahead 191
-#ifndef __powerpc64__ /* these are 32-bit only */
-#define __NR_mmap2 192
-#define __NR_truncate64 193
-#define __NR_ftruncate64 194
-#define __NR_stat64 195
-#define __NR_lstat64 196
-#define __NR_fstat64 197
-#endif
-#define __NR_pciconfig_read 198
-#define __NR_pciconfig_write 199
-#define __NR_pciconfig_iobase 200
-#define __NR_multiplexer 201
-#define __NR_getdents64 202
-#define __NR_pivot_root 203
#ifndef __powerpc64__
-#define __NR_fcntl64 204
-#endif
-#define __NR_madvise 205
-#define __NR_mincore 206
-#define __NR_gettid 207
-#define __NR_tkill 208
-#define __NR_setxattr 209
-#define __NR_lsetxattr 210
-#define __NR_fsetxattr 211
-#define __NR_getxattr 212
-#define __NR_lgetxattr 213
-#define __NR_fgetxattr 214
-#define __NR_listxattr 215
-#define __NR_llistxattr 216
-#define __NR_flistxattr 217
-#define __NR_removexattr 218
-#define __NR_lremovexattr 219
-#define __NR_fremovexattr 220
-#define __NR_futex 221
-#define __NR_sched_setaffinity 222
-#define __NR_sched_getaffinity 223
-/* 224 currently unused */
-#define __NR_tuxcall 225
-#ifndef __powerpc64__
-#define __NR_sendfile64 226
-#endif
-#define __NR_io_setup 227
-#define __NR_io_destroy 228
-#define __NR_io_getevents 229
-#define __NR_io_submit 230
-#define __NR_io_cancel 231
-#define __NR_set_tid_address 232
-#define __NR_fadvise64 233
-#define __NR_exit_group 234
-#define __NR_lookup_dcookie 235
-#define __NR_epoll_create 236
-#define __NR_epoll_ctl 237
-#define __NR_epoll_wait 238
-#define __NR_remap_file_pages 239
-#define __NR_timer_create 240
-#define __NR_timer_settime 241
-#define __NR_timer_gettime 242
-#define __NR_timer_getoverrun 243
-#define __NR_timer_delete 244
-#define __NR_clock_settime 245
-#define __NR_clock_gettime 246
-#define __NR_clock_getres 247
-#define __NR_clock_nanosleep 248
-#define __NR_swapcontext 249
-#define __NR_tgkill 250
-#define __NR_utimes 251
-#define __NR_statfs64 252
-#define __NR_fstatfs64 253
-#ifndef __powerpc64__
-#define __NR_fadvise64_64 254
-#endif
-#define __NR_rtas 255
-#define __NR_sys_debug_setcontext 256
-/* Number 257 is reserved for vserver */
-#define __NR_migrate_pages 258
-#define __NR_mbind 259
-#define __NR_get_mempolicy 260
-#define __NR_set_mempolicy 261
-#define __NR_mq_open 262
-#define __NR_mq_unlink 263
-#define __NR_mq_timedsend 264
-#define __NR_mq_timedreceive 265
-#define __NR_mq_notify 266
-#define __NR_mq_getsetattr 267
-#define __NR_kexec_load 268
-#define __NR_add_key 269
-#define __NR_request_key 270
-#define __NR_keyctl 271
-#define __NR_waitid 272
-#define __NR_ioprio_set 273
-#define __NR_ioprio_get 274
-#define __NR_inotify_init 275
-#define __NR_inotify_add_watch 276
-#define __NR_inotify_rm_watch 277
-#define __NR_spu_run 278
-#define __NR_spu_create 279
-#define __NR_pselect6 280
-#define __NR_ppoll 281
-#define __NR_unshare 282
-#define __NR_splice 283
-#define __NR_tee 284
-#define __NR_vmsplice 285
-#define __NR_openat 286
-#define __NR_mkdirat 287
-#define __NR_mknodat 288
-#define __NR_fchownat 289
-#define __NR_futimesat 290
-#ifdef __powerpc64__
-#define __NR_newfstatat 291
+#include <asm/unistd_32.h>
#else
-#define __NR_fstatat64 291
+#include <asm/unistd_64.h>
#endif
-#define __NR_unlinkat 292
-#define __NR_renameat 293
-#define __NR_linkat 294
-#define __NR_symlinkat 295
-#define __NR_readlinkat 296
-#define __NR_fchmodat 297
-#define __NR_faccessat 298
-#define __NR_get_robust_list 299
-#define __NR_set_robust_list 300
-#define __NR_move_pages 301
-#define __NR_getcpu 302
-#define __NR_epoll_pwait 303
-#define __NR_utimensat 304
-#define __NR_signalfd 305
-#define __NR_timerfd_create 306
-#define __NR_eventfd 307
-#define __NR_sync_file_range2 308
-#define __NR_fallocate 309
-#define __NR_subpage_prot 310
-#define __NR_timerfd_settime 311
-#define __NR_timerfd_gettime 312
-#define __NR_signalfd4 313
-#define __NR_eventfd2 314
-#define __NR_epoll_create1 315
-#define __NR_dup3 316
-#define __NR_pipe2 317
-#define __NR_inotify_init1 318
-#define __NR_perf_event_open 319
-#define __NR_preadv 320
-#define __NR_pwritev 321
-#define __NR_rt_tgsigqueueinfo 322
-#define __NR_fanotify_init 323
-#define __NR_fanotify_mark 324
-#define __NR_prlimit64 325
-#define __NR_socket 326
-#define __NR_bind 327
-#define __NR_connect 328
-#define __NR_listen 329
-#define __NR_accept 330
-#define __NR_getsockname 331
-#define __NR_getpeername 332
-#define __NR_socketpair 333
-#define __NR_send 334
-#define __NR_sendto 335
-#define __NR_recv 336
-#define __NR_recvfrom 337
-#define __NR_shutdown 338
-#define __NR_setsockopt 339
-#define __NR_getsockopt 340
-#define __NR_sendmsg 341
-#define __NR_recvmsg 342
-#define __NR_recvmmsg 343
-#define __NR_accept4 344
-#define __NR_name_to_handle_at 345
-#define __NR_open_by_handle_at 346
-#define __NR_clock_adjtime 347
-#define __NR_syncfs 348
-#define __NR_sendmmsg 349
-#define __NR_setns 350
-#define __NR_process_vm_readv 351
-#define __NR_process_vm_writev 352
-#define __NR_finit_module 353
-#define __NR_kcmp 354
-#define __NR_sched_setattr 355
-#define __NR_sched_getattr 356
-#define __NR_renameat2 357
-#define __NR_seccomp 358
-#define __NR_getrandom 359
-#define __NR_memfd_create 360
-#define __NR_bpf 361
-#define __NR_execveat 362
-#define __NR_switch_endian 363
-#define __NR_userfaultfd 364
-#define __NR_membarrier 365
-#define __NR_mlock2 378
-#define __NR_copy_file_range 379
-#define __NR_preadv2 380
-#define __NR_pwritev2 381
-#define __NR_kexec_file_load 382
-#define __NR_statx 383
-#define __NR_pkey_alloc 384
-#define __NR_pkey_free 385
-#define __NR_pkey_mprotect 386
-#define __NR_rseq 387
-#define __NR_io_pgetevents 388
#endif /* _UAPI_ASM_POWERPC_UNISTD_H_ */
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index a5a6a24..cb7f0bb 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -160,16 +160,6 @@
extra-$(CONFIG_PPC64) += entry_64.o
extra-$(CONFIG_PPC_OF_BOOT_TRAMPOLINE) += prom_init.o
-extra-y += systbl_chk.i
-$(obj)/systbl.o: systbl_chk
-
-quiet_cmd_systbl_chk = CALL $<
- cmd_systbl_chk = $(CONFIG_SHELL) $< $(obj)/systbl_chk.i
-
-PHONY += systbl_chk
-systbl_chk: $(src)/systbl_chk.sh $(obj)/systbl_chk.i
- $(call cmd,systbl_chk)
-
ifdef CONFIG_PPC_OF_BOOT_TRAMPOLINE
$(obj)/built-in.a: prom_init_check
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 7c2032e..435927f 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -54,6 +54,9 @@
SYS_CALL_TABLE:
.tc sys_call_table[TC],sys_call_table
+COMPAT_SYS_CALL_TABLE:
+ .tc compat_sys_call_table[TC],compat_sys_call_table
+
/* This value is used to mark exception frames on the stack. */
exception_marker:
.tc ID_EXC_MARKER[TC],STACK_FRAME_REGS_MARKER
@@ -178,7 +181,7 @@
ld r11,SYS_CALL_TABLE@toc(2)
andis. r10,r10,_TIF_32BIT@h
beq 15f
- addi r11,r11,8 /* use 32-bit syscall entries */
+ ld r11,COMPAT_SYS_CALL_TABLE@toc(2)
clrldi r3,r3,32
clrldi r4,r4,32
clrldi r5,r5,32
@@ -186,7 +189,7 @@
clrldi r7,r7,32
clrldi r8,r8,32
15:
- slwi r0,r0,4
+ slwi r0,r0,3
barrier_nospec_asm
/*
@@ -291,6 +294,10 @@
HMT_MEDIUM_LOW
END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ std r8, PACATMSCRATCH(r13)
+#endif
+
ld r13,GPR13(r1) /* only restore r13 if returning to usermode */
ld r2,GPR2(r1)
ld r1,GPR1(r1)
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 89d32bb..ed3d6fa 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -1031,7 +1031,7 @@
EXCEPTION_PROLOG_COMMON_2(PACA_EXGEN)
EXCEPTION_PROLOG_COMMON_3(0xe60)
addi r3,r1,STACK_FRAME_OVERHEAD
- BRANCH_LINK_TO_FAR(hmi_exception_realmode) /* Function call ABI */
+ BRANCH_LINK_TO_FAR(DOTSYM(hmi_exception_realmode)) /* Function call ABI */
cmpdi cr0,r3,0
/* Windup the stack. */
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index 7a0da83..45a8d0b 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -35,6 +35,7 @@
#include <linux/kobject.h>
#include <linux/sysfs.h>
#include <linux/slab.h>
+#include <linux/cma.h>
#include <asm/debugfs.h>
#include <asm/page.h>
@@ -46,6 +47,9 @@
static struct fw_dump fw_dump;
static struct fadump_mem_struct fdm;
static const struct fadump_mem_struct *fdm_active;
+#ifdef CONFIG_CMA
+static struct cma *fadump_cma;
+#endif
static DEFINE_MUTEX(fadump_mutex);
struct fad_crash_memory_ranges *crash_memory_ranges;
@@ -53,6 +57,67 @@ int crash_memory_ranges_size;
int crash_mem_ranges;
int max_crash_mem_ranges;
+#ifdef CONFIG_CMA
+/*
+ * fadump_cma_init() - Initialize CMA area from a fadump reserved memory
+ *
+ * This function initializes CMA area from fadump reserved memory.
+ * The total size of fadump reserved memory covers for boot memory size
+ * + cpu data size + hpte size and metadata.
+ * Initialize only the area equivalent to boot memory size for CMA use.
+ * The reamining portion of fadump reserved memory will be not given
+ * to CMA and pages for thoes will stay reserved. boot memory size is
+ * aligned per CMA requirement to satisy cma_init_reserved_mem() call.
+ * But for some reason even if it fails we still have the memory reservation
+ * with us and we can still continue doing fadump.
+ */
+int __init fadump_cma_init(void)
+{
+ unsigned long long base, size;
+ int rc;
+
+ if (!fw_dump.fadump_enabled)
+ return 0;
+
+ /*
+ * Do not use CMA if user has provided fadump=nocma kernel parameter.
+ * Return 1 to continue with fadump old behaviour.
+ */
+ if (fw_dump.nocma)
+ return 1;
+
+ base = fw_dump.reserve_dump_area_start;
+ size = fw_dump.boot_memory_size;
+
+ if (!size)
+ return 0;
+
+ rc = cma_init_reserved_mem(base, size, 0, "fadump_cma", &fadump_cma);
+ if (rc) {
+ pr_err("Failed to init cma area for firmware-assisted dump,%d\n", rc);
+ /*
+ * Though the CMA init has failed we still have memory
+ * reservation with us. The reserved memory will be
+ * blocked from production system usage. Hence return 1,
+ * so that we can continue with fadump.
+ */
+ return 1;
+ }
+
+ /*
+ * So we now have successfully initialized cma area for fadump.
+ */
+ pr_info("Initialized 0x%lx bytes cma area at %ldMB from 0x%lx "
+ "bytes of memory reserved for firmware-assisted dump\n",
+ cma_get_size(fadump_cma),
+ (unsigned long)cma_get_base(fadump_cma) >> 20,
+ fw_dump.reserve_dump_area_size);
+ return 1;
+}
+#else
+static int __init fadump_cma_init(void) { return 1; }
+#endif /* CONFIG_CMA */
+
/* Scan the Firmware Assisted dump configuration details. */
int __init early_init_dt_scan_fw_dump(unsigned long node,
const char *uname, int depth, void *data)
@@ -118,13 +183,19 @@ int __init early_init_dt_scan_fw_dump(unsigned long node,
/*
* If fadump is registered, check if the memory provided
- * falls within boot memory area.
+ * falls within boot memory area and reserved memory area.
*/
-int is_fadump_boot_memory_area(u64 addr, ulong size)
+int is_fadump_memory_area(u64 addr, ulong size)
{
+ u64 d_start = fw_dump.reserve_dump_area_start;
+ u64 d_end = d_start + fw_dump.reserve_dump_area_size;
+
if (!fw_dump.dump_registered)
return 0;
+ if (((addr + size) > d_start) && (addr <= d_end))
+ return 1;
+
return (addr + size) > RMA_START && addr <= fw_dump.boot_memory_size;
}
@@ -172,6 +243,35 @@ static int is_boot_memory_area_contiguous(void)
return ret;
}
+/*
+ * Returns true, if there are no holes in reserved memory area,
+ * false otherwise.
+ */
+static bool is_reserved_memory_area_contiguous(void)
+{
+ struct memblock_region *reg;
+ unsigned long start, end;
+ unsigned long d_start = fw_dump.reserve_dump_area_start;
+ unsigned long d_end = d_start + fw_dump.reserve_dump_area_size;
+
+ for_each_memblock(memory, reg) {
+ start = max(d_start, (unsigned long)reg->base);
+ end = min(d_end, (unsigned long)(reg->base + reg->size));
+ if (d_start < end) {
+ /* Memory hole from d_start to start */
+ if (start > d_start)
+ break;
+
+ if (end == d_end)
+ return true;
+
+ d_start = end + 1;
+ }
+ }
+
+ return false;
+}
+
/* Print firmware assisted dump configurations for debugging purpose. */
static void fadump_show_config(void)
{
@@ -378,8 +478,15 @@ int __init fadump_reserve_mem(void)
*/
if (fdm_active)
fw_dump.boot_memory_size = be64_to_cpu(fdm_active->rmr_region.source_len);
- else
+ else {
fw_dump.boot_memory_size = fadump_calculate_reserve_size();
+#ifdef CONFIG_CMA
+ if (!fw_dump.nocma)
+ fw_dump.boot_memory_size =
+ ALIGN(fw_dump.boot_memory_size,
+ FADUMP_CMA_ALIGNMENT);
+#endif
+ }
/*
* Calculate the memory boundary.
@@ -426,8 +533,9 @@ int __init fadump_reserve_mem(void)
fw_dump.fadumphdr_addr =
be64_to_cpu(fdm_active->rmr_region.destination_address) +
be64_to_cpu(fdm_active->rmr_region.source_len);
- pr_debug("fadumphdr_addr = %p\n",
- (void *) fw_dump.fadumphdr_addr);
+ pr_debug("fadumphdr_addr = %pa\n", &fw_dump.fadumphdr_addr);
+ fw_dump.reserve_dump_area_start = base;
+ fw_dump.reserve_dump_area_size = size;
} else {
size = get_fadump_area_size();
@@ -455,10 +563,11 @@ int __init fadump_reserve_mem(void)
(unsigned long)(size >> 20),
(unsigned long)(base >> 20),
(unsigned long)(memblock_phys_mem_size() >> 20));
- }
- fw_dump.reserve_dump_area_start = base;
- fw_dump.reserve_dump_area_size = size;
+ fw_dump.reserve_dump_area_start = base;
+ fw_dump.reserve_dump_area_size = size;
+ return fadump_cma_init();
+ }
return 1;
}
@@ -477,6 +586,10 @@ static int __init early_fadump_param(char *p)
fw_dump.fadump_enabled = 1;
else if (strncmp(p, "off", 3) == 0)
fw_dump.fadump_enabled = 0;
+ else if (strncmp(p, "nocma", 5) == 0) {
+ fw_dump.fadump_enabled = 1;
+ fw_dump.nocma = 1;
+ }
return 0;
}
@@ -525,8 +638,10 @@ static int register_fw_dump(struct fadump_mem_struct *fdm)
break;
case -3:
if (!is_boot_memory_area_contiguous())
- pr_err("Can't have holes in boot memory area while "
- "registering fadump\n");
+ pr_err("Can't have holes in boot memory area while registering fadump\n");
+ else if (!is_reserved_memory_area_contiguous())
+ pr_err("Can't have holes in reserved memory area while"
+ " registering fadump\n");
printk(KERN_ERR "Failed to register firmware-assisted kernel"
" dump. Parameter Error(%d).\n", rc);
@@ -1229,7 +1344,7 @@ static int fadump_unregister_dump(struct fadump_mem_struct *fdm)
return 0;
}
-static int fadump_invalidate_dump(struct fadump_mem_struct *fdm)
+static int fadump_invalidate_dump(const struct fadump_mem_struct *fdm)
{
int rc = 0;
unsigned int wait_time;
@@ -1260,9 +1375,8 @@ void fadump_cleanup(void)
{
/* Invalidate the registration only if dump is active. */
if (fw_dump.dump_active) {
- init_fadump_mem_struct(&fdm,
- be64_to_cpu(fdm_active->cpu_state_data.destination_address));
- fadump_invalidate_dump(&fdm);
+ /* pass the same memory dump structure provided by platform */
+ fadump_invalidate_dump(fdm_active);
} else if (fw_dump.dump_registered) {
/* Un-register Firmware-assisted dump if it was registered. */
fadump_unregister_dump(&fdm);
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index f0dc680..9d5d109 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -47,6 +47,7 @@
#include <asm/fadump.h>
#include <asm/vio.h>
#include <asm/tce.h>
+#include <asm/mmu_context.h>
#define DBG(...)
@@ -993,15 +994,19 @@ int iommu_tce_check_gpa(unsigned long page_shift, unsigned long gpa)
}
EXPORT_SYMBOL_GPL(iommu_tce_check_gpa);
-long iommu_tce_xchg(struct iommu_table *tbl, unsigned long entry,
- unsigned long *hpa, enum dma_data_direction *direction)
+long iommu_tce_xchg(struct mm_struct *mm, struct iommu_table *tbl,
+ unsigned long entry, unsigned long *hpa,
+ enum dma_data_direction *direction)
{
long ret;
+ unsigned long size = 0;
ret = tbl->it_ops->exchange(tbl, entry, hpa, direction);
if (!ret && ((*direction == DMA_FROM_DEVICE) ||
- (*direction == DMA_BIDIRECTIONAL)))
+ (*direction == DMA_BIDIRECTIONAL)) &&
+ !mm_iommu_is_devmem(mm, *hpa, tbl->it_page_shift,
+ &size))
SetPageDirty(pfn_to_page(*hpa >> PAGE_SHIFT));
/* if (unlikely(ret))
@@ -1073,11 +1078,8 @@ void iommu_release_ownership(struct iommu_table *tbl)
}
EXPORT_SYMBOL_GPL(iommu_release_ownership);
-int iommu_add_device(struct device *dev)
+int iommu_add_device(struct iommu_table_group *table_group, struct device *dev)
{
- struct iommu_table *tbl;
- struct iommu_table_group_link *tgl;
-
/*
* The sysfs entries should be populated before
* binding IOMMU group. If sysfs entries isn't
@@ -1093,32 +1095,10 @@ int iommu_add_device(struct device *dev)
return -EBUSY;
}
- tbl = get_iommu_table_base(dev);
- if (!tbl) {
- pr_debug("%s: Skipping device %s with no tbl\n",
- __func__, dev_name(dev));
- return 0;
- }
-
- tgl = list_first_entry_or_null(&tbl->it_group_list,
- struct iommu_table_group_link, next);
- if (!tgl) {
- pr_debug("%s: Skipping device %s with no group\n",
- __func__, dev_name(dev));
- return 0;
- }
pr_debug("%s: Adding %s to iommu group %d\n",
- __func__, dev_name(dev),
- iommu_group_id(tgl->table_group->group));
+ __func__, dev_name(dev), iommu_group_id(table_group->group));
- if (PAGE_SIZE < IOMMU_PAGE_SIZE(tbl)) {
- pr_err("%s: Invalid IOMMU page size %lx (%lx) on %s\n",
- __func__, IOMMU_PAGE_SIZE(tbl),
- PAGE_SIZE, dev_name(dev));
- return -EINVAL;
- }
-
- return iommu_group_add_device(tgl->table_group->group, dev);
+ return iommu_group_add_device(table_group->group, dev);
}
EXPORT_SYMBOL_GPL(iommu_add_device);
@@ -1138,31 +1118,4 @@ void iommu_del_device(struct device *dev)
iommu_group_remove_device(dev);
}
EXPORT_SYMBOL_GPL(iommu_del_device);
-
-static int tce_iommu_bus_notifier(struct notifier_block *nb,
- unsigned long action, void *data)
-{
- struct device *dev = data;
-
- switch (action) {
- case BUS_NOTIFY_ADD_DEVICE:
- return iommu_add_device(dev);
- case BUS_NOTIFY_DEL_DEVICE:
- if (dev->iommu_group)
- iommu_del_device(dev);
- return 0;
- default:
- return 0;
- }
-}
-
-static struct notifier_block tce_iommu_bus_nb = {
- .notifier_call = tce_iommu_bus_notifier,
-};
-
-int __init tce_iommu_bus_notifier_init(void)
-{
- bus_register_notifier(&pci_bus_type, &tce_iommu_bus_nb);
- return 0;
-}
#endif /* CONFIG_IOMMU_API */
diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c
index fde6323..7cea597 100644
--- a/arch/powerpc/kernel/legacy_serial.c
+++ b/arch/powerpc/kernel/legacy_serial.c
@@ -400,8 +400,7 @@ void __init find_legacy_serial_ports(void)
/* Next, fill our array with ISA ports */
for_each_node_by_type(np, "serial") {
struct device_node *isa = of_get_parent(np);
- if (isa && (!strcmp(isa->name, "isa") ||
- !strcmp(isa->name, "lpc"))) {
+ if (of_node_name_eq(isa, "isa") || of_node_name_eq(isa, "lpc")) {
if (of_device_is_available(np)) {
index = add_legacy_isa_port(np, isa);
if (index >= 0 && np == stdout)
@@ -415,11 +414,11 @@ void __init find_legacy_serial_ports(void)
/* Next, try to locate PCI ports */
for (np = NULL; (np = of_find_all_nodes(np));) {
struct device_node *pci, *parent = of_get_parent(np);
- if (parent && !strcmp(parent->name, "isa")) {
+ if (of_node_name_eq(parent, "isa")) {
of_node_put(parent);
continue;
}
- if (strcmp(np->name, "serial") &&
+ if (!of_node_name_eq(np, "serial") &&
!of_node_is_type(np, "serial")) {
of_node_put(parent);
continue;
diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index 9d39e0e..2d47cc7 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -848,7 +848,23 @@ static long restore_tm_user_regs(struct pt_regs *regs,
/* If TM bits are set to the reserved value, it's an invalid context */
if (MSR_TM_RESV(msr_hi))
return 1;
- /* Pull in the MSR TM bits from the user context */
+
+ /*
+ * Disabling preemption, since it is unsafe to be preempted
+ * with MSR[TS] set without recheckpointing.
+ */
+ preempt_disable();
+
+ /*
+ * CAUTION:
+ * After regs->MSR[TS] being updated, make sure that get_user(),
+ * put_user() or similar functions are *not* called. These
+ * functions can generate page faults which will cause the process
+ * to be de-scheduled with MSR[TS] set but without calling
+ * tm_recheckpoint(). This can cause a bug.
+ *
+ * Pull in the MSR TM bits from the user context
+ */
regs->msr = (regs->msr & ~MSR_TS_MASK) | (msr_hi & MSR_TS_MASK);
/* Now, recheckpoint. This loads up all of the checkpointed (older)
* registers, including FP and V[S]Rs. After recheckpointing, the
@@ -873,6 +889,8 @@ static long restore_tm_user_regs(struct pt_regs *regs,
}
#endif
+ preempt_enable();
+
return 0;
}
#endif
@@ -1140,11 +1158,11 @@ SYSCALL_DEFINE0(rt_sigreturn)
{
struct rt_sigframe __user *rt_sf;
struct pt_regs *regs = current_pt_regs();
+ int tm_restore = 0;
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
struct ucontext __user *uc_transact;
unsigned long msr_hi;
unsigned long tmp;
- int tm_restore = 0;
#endif
/* Always make any pending restarted system calls return -EINTR */
current->restart_block.fn = do_no_restart_syscall;
@@ -1192,11 +1210,19 @@ SYSCALL_DEFINE0(rt_sigreturn)
goto bad;
}
}
- if (!tm_restore)
- /* Fall through, for non-TM restore */
+ if (!tm_restore) {
+ /*
+ * Unset regs->msr because ucontext MSR TS is not
+ * set, and recheckpoint was not called. This avoid
+ * hitting a TM Bad thing at RFID
+ */
+ regs->msr &= ~MSR_TS_MASK;
+ }
+ /* Fall through, for non-TM restore */
#endif
- if (do_setcontext(&rt_sf->uc, regs, 1))
- goto bad;
+ if (!tm_restore)
+ if (do_setcontext(&rt_sf->uc, regs, 1))
+ goto bad;
/*
* It's not clear whether or why it is desirable to save the
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index e53ad11..0935fe6 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -467,20 +467,6 @@ static long restore_tm_sigcontexts(struct task_struct *tsk,
if (MSR_TM_RESV(msr))
return -EINVAL;
- /* pull in MSR TS bits from user context */
- regs->msr = (regs->msr & ~MSR_TS_MASK) | (msr & MSR_TS_MASK);
-
- /*
- * Ensure that TM is enabled in regs->msr before we leave the signal
- * handler. It could be the case that (a) user disabled the TM bit
- * through the manipulation of the MSR bits in uc_mcontext or (b) the
- * TM bit was disabled because a sufficient number of context switches
- * happened whilst in the signal handler and load_tm overflowed,
- * disabling the TM bit. In either case we can end up with an illegal
- * TM state leading to a TM Bad Thing when we return to userspace.
- */
- regs->msr |= MSR_TM;
-
/* pull in MSR LE from user context */
regs->msr = (regs->msr & ~MSR_LE) | (msr & MSR_LE);
@@ -572,6 +558,34 @@ static long restore_tm_sigcontexts(struct task_struct *tsk,
tm_enable();
/* Make sure the transaction is marked as failed */
tsk->thread.tm_texasr |= TEXASR_FS;
+
+ /*
+ * Disabling preemption, since it is unsafe to be preempted
+ * with MSR[TS] set without recheckpointing.
+ */
+ preempt_disable();
+
+ /* pull in MSR TS bits from user context */
+ regs->msr = (regs->msr & ~MSR_TS_MASK) | (msr & MSR_TS_MASK);
+
+ /*
+ * Ensure that TM is enabled in regs->msr before we leave the signal
+ * handler. It could be the case that (a) user disabled the TM bit
+ * through the manipulation of the MSR bits in uc_mcontext or (b) the
+ * TM bit was disabled because a sufficient number of context switches
+ * happened whilst in the signal handler and load_tm overflowed,
+ * disabling the TM bit. In either case we can end up with an illegal
+ * TM state leading to a TM Bad Thing when we return to userspace.
+ *
+ * CAUTION:
+ * After regs->MSR[TS] being updated, make sure that get_user(),
+ * put_user() or similar functions are *not* called. These
+ * functions can generate page faults which will cause the process
+ * to be de-scheduled with MSR[TS] set but without calling
+ * tm_recheckpoint(). This can cause a bug.
+ */
+ regs->msr |= MSR_TM;
+
/* This loads the checkpointed FP/VEC state, if used */
tm_recheckpoint(&tsk->thread);
@@ -585,6 +599,8 @@ static long restore_tm_sigcontexts(struct task_struct *tsk,
regs->msr |= MSR_VEC;
}
+ preempt_enable();
+
return err;
}
#endif
@@ -741,11 +757,23 @@ SYSCALL_DEFINE0(rt_sigreturn)
&uc_transact->uc_mcontext))
goto badframe;
}
- else
- /* Fall through, for non-TM restore */
#endif
- if (restore_sigcontext(current, NULL, 1, &uc->uc_mcontext))
- goto badframe;
+ /* Fall through, for non-TM restore */
+ if (!MSR_TM_ACTIVE(msr)) {
+ /*
+ * Unset MSR[TS] on the thread regs since MSR from user
+ * context does not have MSR active, and recheckpoint was
+ * not called since restore_tm_sigcontexts() was not called
+ * also.
+ *
+ * If not unsetting it, the code can RFID to userspace with
+ * MSR[TS] set, but without CPU in the proper state,
+ * causing a TM bad thing.
+ */
+ current->thread.regs->msr &= ~MSR_TS_MASK;
+ if (restore_sigcontext(current, NULL, 1, &uc->uc_mcontext))
+ goto badframe;
+ }
if (restore_altstack(&uc->uc_stack))
goto badframe;
diff --git a/arch/powerpc/kernel/syscalls/Makefile b/arch/powerpc/kernel/syscalls/Makefile
new file mode 100644
index 0000000..27b4895
--- /dev/null
+++ b/arch/powerpc/kernel/syscalls/Makefile
@@ -0,0 +1,63 @@
+# SPDX-License-Identifier: GPL-2.0
+kapi := arch/$(SRCARCH)/include/generated/asm
+uapi := arch/$(SRCARCH)/include/generated/uapi/asm
+
+_dummy := $(shell [ -d '$(uapi)' ] || mkdir -p '$(uapi)') \
+ $(shell [ -d '$(kapi)' ] || mkdir -p '$(kapi)')
+
+syscall := $(srctree)/$(src)/syscall.tbl
+syshdr := $(srctree)/$(src)/syscallhdr.sh
+systbl := $(srctree)/$(src)/syscalltbl.sh
+
+quiet_cmd_syshdr = SYSHDR $@
+ cmd_syshdr = $(CONFIG_SHELL) '$(syshdr)' '$<' '$@' \
+ '$(syshdr_abis_$(basetarget))' \
+ '$(syshdr_pfx_$(basetarget))' \
+ '$(syshdr_offset_$(basetarget))'
+
+quiet_cmd_systbl = SYSTBL $@
+ cmd_systbl = $(CONFIG_SHELL) '$(systbl)' '$<' '$@' \
+ '$(systbl_abis_$(basetarget))' \
+ '$(systbl_abi_$(basetarget))' \
+ '$(systbl_offset_$(basetarget))'
+
+syshdr_abis_unistd_32 := common,nospu,32
+$(uapi)/unistd_32.h: $(syscall) $(syshdr)
+ $(call if_changed,syshdr)
+
+syshdr_abis_unistd_64 := common,nospu,64
+$(uapi)/unistd_64.h: $(syscall) $(syshdr)
+ $(call if_changed,syshdr)
+
+systbl_abis_syscall_table_32 := common,nospu,32
+systbl_abi_syscall_table_32 := 32
+$(kapi)/syscall_table_32.h: $(syscall) $(systbl)
+ $(call if_changed,systbl)
+
+systbl_abis_syscall_table_64 := common,nospu,64
+systbl_abi_syscall_table_64 := 64
+$(kapi)/syscall_table_64.h: $(syscall) $(systbl)
+ $(call if_changed,systbl)
+
+systbl_abis_syscall_table_c32 := common,nospu,32
+systbl_abi_syscall_table_c32 := c32
+$(kapi)/syscall_table_c32.h: $(syscall) $(systbl)
+ $(call if_changed,systbl)
+
+systbl_abis_syscall_table_spu := common,spu
+systbl_abi_syscall_table_spu := spu
+$(kapi)/syscall_table_spu.h: $(syscall) $(systbl)
+ $(call if_changed,systbl)
+
+uapisyshdr-y += unistd_32.h unistd_64.h
+kapisyshdr-y += syscall_table_32.h \
+ syscall_table_64.h \
+ syscall_table_c32.h \
+ syscall_table_spu.h
+
+targets += $(uapisyshdr-y) $(kapisyshdr-y)
+
+PHONY += all
+all: $(addprefix $(uapi)/,$(uapisyshdr-y))
+all: $(addprefix $(kapi)/,$(kapisyshdr-y))
+ @:
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl
new file mode 100644
index 0000000..db3bbb8
--- /dev/null
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -0,0 +1,427 @@
+# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+#
+# system call numbers and entry vectors for powerpc
+#
+# The format is:
+# <number> <abi> <name> <entry point> <compat entry point>
+#
+# The <abi> can be common, spu, nospu, 64, or 32 for this file.
+#
+0 nospu restart_syscall sys_restart_syscall
+1 nospu exit sys_exit
+2 nospu fork ppc_fork
+3 common read sys_read
+4 common write sys_write
+5 common open sys_open compat_sys_open
+6 common close sys_close
+7 common waitpid sys_waitpid
+8 common creat sys_creat
+9 common link sys_link
+10 common unlink sys_unlink
+11 nospu execve sys_execve compat_sys_execve
+12 common chdir sys_chdir
+13 common time sys_time compat_sys_time
+14 common mknod sys_mknod
+15 common chmod sys_chmod
+16 common lchown sys_lchown
+17 common break sys_ni_syscall
+18 32 oldstat sys_stat sys_ni_syscall
+18 64 oldstat sys_ni_syscall
+18 spu oldstat sys_ni_syscall
+19 common lseek sys_lseek compat_sys_lseek
+20 common getpid sys_getpid
+21 nospu mount sys_mount compat_sys_mount
+22 32 umount sys_oldumount
+22 64 umount sys_ni_syscall
+22 spu umount sys_ni_syscall
+23 common setuid sys_setuid
+24 common getuid sys_getuid
+25 common stime sys_stime compat_sys_stime
+26 nospu ptrace sys_ptrace compat_sys_ptrace
+27 common alarm sys_alarm
+28 32 oldfstat sys_fstat sys_ni_syscall
+28 64 oldfstat sys_ni_syscall
+28 spu oldfstat sys_ni_syscall
+29 nospu pause sys_pause
+30 nospu utime sys_utime compat_sys_utime
+31 common stty sys_ni_syscall
+32 common gtty sys_ni_syscall
+33 common access sys_access
+34 common nice sys_nice
+35 common ftime sys_ni_syscall
+36 common sync sys_sync
+37 common kill sys_kill
+38 common rename sys_rename
+39 common mkdir sys_mkdir
+40 common rmdir sys_rmdir
+41 common dup sys_dup
+42 common pipe sys_pipe
+43 common times sys_times compat_sys_times
+44 common prof sys_ni_syscall
+45 common brk sys_brk
+46 common setgid sys_setgid
+47 common getgid sys_getgid
+48 nospu signal sys_signal
+49 common geteuid sys_geteuid
+50 common getegid sys_getegid
+51 nospu acct sys_acct
+52 nospu umount2 sys_umount
+53 common lock sys_ni_syscall
+54 common ioctl sys_ioctl compat_sys_ioctl
+55 common fcntl sys_fcntl compat_sys_fcntl
+56 common mpx sys_ni_syscall
+57 common setpgid sys_setpgid
+58 common ulimit sys_ni_syscall
+59 32 oldolduname sys_olduname
+59 64 oldolduname sys_ni_syscall
+59 spu oldolduname sys_ni_syscall
+60 common umask sys_umask
+61 common chroot sys_chroot
+62 nospu ustat sys_ustat compat_sys_ustat
+63 common dup2 sys_dup2
+64 common getppid sys_getppid
+65 common getpgrp sys_getpgrp
+66 common setsid sys_setsid
+67 32 sigaction sys_sigaction compat_sys_sigaction
+67 64 sigaction sys_ni_syscall
+67 spu sigaction sys_ni_syscall
+68 common sgetmask sys_sgetmask
+69 common ssetmask sys_ssetmask
+70 common setreuid sys_setreuid
+71 common setregid sys_setregid
+72 32 sigsuspend sys_sigsuspend
+72 64 sigsuspend sys_ni_syscall
+72 spu sigsuspend sys_ni_syscall
+73 32 sigpending sys_sigpending compat_sys_sigpending
+73 64 sigpending sys_ni_syscall
+73 spu sigpending sys_ni_syscall
+74 common sethostname sys_sethostname
+75 common setrlimit sys_setrlimit compat_sys_setrlimit
+76 32 getrlimit sys_old_getrlimit compat_sys_old_getrlimit
+76 64 getrlimit sys_ni_syscall
+76 spu getrlimit sys_ni_syscall
+77 common getrusage sys_getrusage compat_sys_getrusage
+78 common gettimeofday sys_gettimeofday compat_sys_gettimeofday
+79 common settimeofday sys_settimeofday compat_sys_settimeofday
+80 common getgroups sys_getgroups
+81 common setgroups sys_setgroups
+82 32 select ppc_select sys_ni_syscall
+82 64 select sys_ni_syscall
+82 spu select sys_ni_syscall
+83 common symlink sys_symlink
+84 32 oldlstat sys_lstat sys_ni_syscall
+84 64 oldlstat sys_ni_syscall
+84 spu oldlstat sys_ni_syscall
+85 common readlink sys_readlink
+86 nospu uselib sys_uselib
+87 nospu swapon sys_swapon
+88 nospu reboot sys_reboot
+89 32 readdir sys_old_readdir compat_sys_old_readdir
+89 64 readdir sys_ni_syscall
+89 spu readdir sys_ni_syscall
+90 common mmap sys_mmap
+91 common munmap sys_munmap
+92 common truncate sys_truncate compat_sys_truncate
+93 common ftruncate sys_ftruncate compat_sys_ftruncate
+94 common fchmod sys_fchmod
+95 common fchown sys_fchown
+96 common getpriority sys_getpriority
+97 common setpriority sys_setpriority
+98 common profil sys_ni_syscall
+99 nospu statfs sys_statfs compat_sys_statfs
+100 nospu fstatfs sys_fstatfs compat_sys_fstatfs
+101 common ioperm sys_ni_syscall
+102 common socketcall sys_socketcall compat_sys_socketcall
+103 common syslog sys_syslog
+104 common setitimer sys_setitimer compat_sys_setitimer
+105 common getitimer sys_getitimer compat_sys_getitimer
+106 common stat sys_newstat compat_sys_newstat
+107 common lstat sys_newlstat compat_sys_newlstat
+108 common fstat sys_newfstat compat_sys_newfstat
+109 32 olduname sys_uname
+109 64 olduname sys_ni_syscall
+109 spu olduname sys_ni_syscall
+110 common iopl sys_ni_syscall
+111 common vhangup sys_vhangup
+112 common idle sys_ni_syscall
+113 common vm86 sys_ni_syscall
+114 common wait4 sys_wait4 compat_sys_wait4
+115 nospu swapoff sys_swapoff
+116 common sysinfo sys_sysinfo compat_sys_sysinfo
+117 nospu ipc sys_ipc compat_sys_ipc
+118 common fsync sys_fsync
+119 32 sigreturn sys_sigreturn compat_sys_sigreturn
+119 64 sigreturn sys_ni_syscall
+119 spu sigreturn sys_ni_syscall
+120 nospu clone ppc_clone
+121 common setdomainname sys_setdomainname
+122 common uname sys_newuname
+123 common modify_ldt sys_ni_syscall
+124 common adjtimex sys_adjtimex compat_sys_adjtimex
+125 common mprotect sys_mprotect
+126 32 sigprocmask sys_sigprocmask compat_sys_sigprocmask
+126 64 sigprocmask sys_ni_syscall
+126 spu sigprocmask sys_ni_syscall
+127 common create_module sys_ni_syscall
+128 nospu init_module sys_init_module
+129 nospu delete_module sys_delete_module
+130 common get_kernel_syms sys_ni_syscall
+131 nospu quotactl sys_quotactl
+132 common getpgid sys_getpgid
+133 common fchdir sys_fchdir
+134 common bdflush sys_bdflush
+135 common sysfs sys_sysfs
+136 32 personality sys_personality ppc64_personality
+136 64 personality ppc64_personality
+136 spu personality ppc64_personality
+137 common afs_syscall sys_ni_syscall
+138 common setfsuid sys_setfsuid
+139 common setfsgid sys_setfsgid
+140 common _llseek sys_llseek
+141 common getdents sys_getdents compat_sys_getdents
+142 common _newselect sys_select compat_sys_select
+143 common flock sys_flock
+144 common msync sys_msync
+145 common readv sys_readv compat_sys_readv
+146 common writev sys_writev compat_sys_writev
+147 common getsid sys_getsid
+148 common fdatasync sys_fdatasync
+149 nospu _sysctl sys_sysctl compat_sys_sysctl
+150 common mlock sys_mlock
+151 common munlock sys_munlock
+152 common mlockall sys_mlockall
+153 common munlockall sys_munlockall
+154 common sched_setparam sys_sched_setparam
+155 common sched_getparam sys_sched_getparam
+156 common sched_setscheduler sys_sched_setscheduler
+157 common sched_getscheduler sys_sched_getscheduler
+158 common sched_yield sys_sched_yield
+159 common sched_get_priority_max sys_sched_get_priority_max
+160 common sched_get_priority_min sys_sched_get_priority_min
+161 common sched_rr_get_interval sys_sched_rr_get_interval compat_sys_sched_rr_get_interval
+162 common nanosleep sys_nanosleep compat_sys_nanosleep
+163 common mremap sys_mremap
+164 common setresuid sys_setresuid
+165 common getresuid sys_getresuid
+166 common query_module sys_ni_syscall
+167 common poll sys_poll
+168 common nfsservctl sys_ni_syscall
+169 common setresgid sys_setresgid
+170 common getresgid sys_getresgid
+171 common prctl sys_prctl
+172 nospu rt_sigreturn sys_rt_sigreturn compat_sys_rt_sigreturn
+173 nospu rt_sigaction sys_rt_sigaction compat_sys_rt_sigaction
+174 nospu rt_sigprocmask sys_rt_sigprocmask compat_sys_rt_sigprocmask
+175 nospu rt_sigpending sys_rt_sigpending compat_sys_rt_sigpending
+176 nospu rt_sigtimedwait sys_rt_sigtimedwait compat_sys_rt_sigtimedwait
+177 nospu rt_sigqueueinfo sys_rt_sigqueueinfo compat_sys_rt_sigqueueinfo
+178 nospu rt_sigsuspend sys_rt_sigsuspend compat_sys_rt_sigsuspend
+179 common pread64 sys_pread64 compat_sys_pread64
+180 common pwrite64 sys_pwrite64 compat_sys_pwrite64
+181 common chown sys_chown
+182 common getcwd sys_getcwd
+183 common capget sys_capget
+184 common capset sys_capset
+185 nospu sigaltstack sys_sigaltstack compat_sys_sigaltstack
+186 32 sendfile sys_sendfile compat_sys_sendfile
+186 64 sendfile sys_sendfile64
+186 spu sendfile sys_sendfile64
+187 common getpmsg sys_ni_syscall
+188 common putpmsg sys_ni_syscall
+189 nospu vfork ppc_vfork
+190 common ugetrlimit sys_getrlimit compat_sys_getrlimit
+191 common readahead sys_readahead compat_sys_readahead
+192 32 mmap2 sys_mmap2 compat_sys_mmap2
+193 32 truncate64 sys_truncate64 compat_sys_truncate64
+194 32 ftruncate64 sys_ftruncate64 compat_sys_ftruncate64
+195 32 stat64 sys_stat64
+196 32 lstat64 sys_lstat64
+197 32 fstat64 sys_fstat64
+198 nospu pciconfig_read sys_pciconfig_read
+199 nospu pciconfig_write sys_pciconfig_write
+200 nospu pciconfig_iobase sys_pciconfig_iobase
+201 common multiplexer sys_ni_syscall
+202 common getdents64 sys_getdents64
+203 common pivot_root sys_pivot_root
+204 32 fcntl64 sys_fcntl64 compat_sys_fcntl64
+205 common madvise sys_madvise
+206 common mincore sys_mincore
+207 common gettid sys_gettid
+208 common tkill sys_tkill
+209 common setxattr sys_setxattr
+210 common lsetxattr sys_lsetxattr
+211 common fsetxattr sys_fsetxattr
+212 common getxattr sys_getxattr
+213 common lgetxattr sys_lgetxattr
+214 common fgetxattr sys_fgetxattr
+215 common listxattr sys_listxattr
+216 common llistxattr sys_llistxattr
+217 common flistxattr sys_flistxattr
+218 common removexattr sys_removexattr
+219 common lremovexattr sys_lremovexattr
+220 common fremovexattr sys_fremovexattr
+221 common futex sys_futex compat_sys_futex
+222 common sched_setaffinity sys_sched_setaffinity compat_sys_sched_setaffinity
+223 common sched_getaffinity sys_sched_getaffinity compat_sys_sched_getaffinity
+# 224 unused
+225 common tuxcall sys_ni_syscall
+226 32 sendfile64 sys_sendfile64 compat_sys_sendfile64
+227 common io_setup sys_io_setup compat_sys_io_setup
+228 common io_destroy sys_io_destroy
+229 common io_getevents sys_io_getevents compat_sys_io_getevents
+230 common io_submit sys_io_submit compat_sys_io_submit
+231 common io_cancel sys_io_cancel
+232 nospu set_tid_address sys_set_tid_address
+233 common fadvise64 sys_fadvise64 ppc32_fadvise64
+234 nospu exit_group sys_exit_group
+235 nospu lookup_dcookie sys_lookup_dcookie compat_sys_lookup_dcookie
+236 common epoll_create sys_epoll_create
+237 common epoll_ctl sys_epoll_ctl
+238 common epoll_wait sys_epoll_wait
+239 common remap_file_pages sys_remap_file_pages
+240 common timer_create sys_timer_create compat_sys_timer_create
+241 common timer_settime sys_timer_settime compat_sys_timer_settime
+242 common timer_gettime sys_timer_gettime compat_sys_timer_gettime
+243 common timer_getoverrun sys_timer_getoverrun
+244 common timer_delete sys_timer_delete
+245 common clock_settime sys_clock_settime compat_sys_clock_settime
+246 common clock_gettime sys_clock_gettime compat_sys_clock_gettime
+247 common clock_getres sys_clock_getres compat_sys_clock_getres
+248 common clock_nanosleep sys_clock_nanosleep compat_sys_clock_nanosleep
+249 32 swapcontext ppc_swapcontext ppc32_swapcontext
+249 64 swapcontext ppc64_swapcontext
+249 spu swapcontext sys_ni_syscall
+250 common tgkill sys_tgkill
+251 common utimes sys_utimes compat_sys_utimes
+252 common statfs64 sys_statfs64 compat_sys_statfs64
+253 common fstatfs64 sys_fstatfs64 compat_sys_fstatfs64
+254 32 fadvise64_64 ppc_fadvise64_64
+254 spu fadvise64_64 sys_ni_syscall
+255 common rtas sys_rtas
+256 32 sys_debug_setcontext sys_debug_setcontext sys_ni_syscall
+256 64 sys_debug_setcontext sys_ni_syscall
+256 spu sys_debug_setcontext sys_ni_syscall
+# 257 reserved for vserver
+258 nospu migrate_pages sys_migrate_pages compat_sys_migrate_pages
+259 nospu mbind sys_mbind compat_sys_mbind
+260 nospu get_mempolicy sys_get_mempolicy compat_sys_get_mempolicy
+261 nospu set_mempolicy sys_set_mempolicy compat_sys_set_mempolicy
+262 nospu mq_open sys_mq_open compat_sys_mq_open
+263 nospu mq_unlink sys_mq_unlink
+264 nospu mq_timedsend sys_mq_timedsend compat_sys_mq_timedsend
+265 nospu mq_timedreceive sys_mq_timedreceive compat_sys_mq_timedreceive
+266 nospu mq_notify sys_mq_notify compat_sys_mq_notify
+267 nospu mq_getsetattr sys_mq_getsetattr compat_sys_mq_getsetattr
+268 nospu kexec_load sys_kexec_load compat_sys_kexec_load
+269 nospu add_key sys_add_key
+270 nospu request_key sys_request_key
+271 nospu keyctl sys_keyctl compat_sys_keyctl
+272 nospu waitid sys_waitid compat_sys_waitid
+273 nospu ioprio_set sys_ioprio_set
+274 nospu ioprio_get sys_ioprio_get
+275 nospu inotify_init sys_inotify_init
+276 nospu inotify_add_watch sys_inotify_add_watch
+277 nospu inotify_rm_watch sys_inotify_rm_watch
+278 nospu spu_run sys_spu_run
+279 nospu spu_create sys_spu_create
+280 nospu pselect6 sys_pselect6 compat_sys_pselect6
+281 nospu ppoll sys_ppoll compat_sys_ppoll
+282 common unshare sys_unshare
+283 common splice sys_splice
+284 common tee sys_tee
+285 common vmsplice sys_vmsplice compat_sys_vmsplice
+286 common openat sys_openat compat_sys_openat
+287 common mkdirat sys_mkdirat
+288 common mknodat sys_mknodat
+289 common fchownat sys_fchownat
+290 common futimesat sys_futimesat compat_sys_futimesat
+291 32 fstatat64 sys_fstatat64
+291 64 newfstatat sys_newfstatat
+291 spu newfstatat sys_newfstatat
+292 common unlinkat sys_unlinkat
+293 common renameat sys_renameat
+294 common linkat sys_linkat
+295 common symlinkat sys_symlinkat
+296 common readlinkat sys_readlinkat
+297 common fchmodat sys_fchmodat
+298 common faccessat sys_faccessat
+299 common get_robust_list sys_get_robust_list compat_sys_get_robust_list
+300 common set_robust_list sys_set_robust_list compat_sys_set_robust_list
+301 common move_pages sys_move_pages compat_sys_move_pages
+302 common getcpu sys_getcpu
+303 nospu epoll_pwait sys_epoll_pwait compat_sys_epoll_pwait
+304 common utimensat sys_utimensat compat_sys_utimensat
+305 common signalfd sys_signalfd compat_sys_signalfd
+306 common timerfd_create sys_timerfd_create
+307 common eventfd sys_eventfd
+308 common sync_file_range2 sys_sync_file_range2 compat_sys_sync_file_range2
+309 nospu fallocate sys_fallocate compat_sys_fallocate
+310 nospu subpage_prot sys_subpage_prot
+311 common timerfd_settime sys_timerfd_settime compat_sys_timerfd_settime
+312 common timerfd_gettime sys_timerfd_gettime compat_sys_timerfd_gettime
+313 common signalfd4 sys_signalfd4 compat_sys_signalfd4
+314 common eventfd2 sys_eventfd2
+315 common epoll_create1 sys_epoll_create1
+316 common dup3 sys_dup3
+317 common pipe2 sys_pipe2
+318 nospu inotify_init1 sys_inotify_init1
+319 common perf_event_open sys_perf_event_open
+320 common preadv sys_preadv compat_sys_preadv
+321 common pwritev sys_pwritev compat_sys_pwritev
+322 nospu rt_tgsigqueueinfo sys_rt_tgsigqueueinfo compat_sys_rt_tgsigqueueinfo
+323 nospu fanotify_init sys_fanotify_init
+324 nospu fanotify_mark sys_fanotify_mark compat_sys_fanotify_mark
+325 common prlimit64 sys_prlimit64
+326 common socket sys_socket
+327 common bind sys_bind
+328 common connect sys_connect
+329 common listen sys_listen
+330 common accept sys_accept
+331 common getsockname sys_getsockname
+332 common getpeername sys_getpeername
+333 common socketpair sys_socketpair
+334 common send sys_send
+335 common sendto sys_sendto
+336 common recv sys_recv compat_sys_recv
+337 common recvfrom sys_recvfrom compat_sys_recvfrom
+338 common shutdown sys_shutdown
+339 common setsockopt sys_setsockopt compat_sys_setsockopt
+340 common getsockopt sys_getsockopt compat_sys_getsockopt
+341 common sendmsg sys_sendmsg compat_sys_sendmsg
+342 common recvmsg sys_recvmsg compat_sys_recvmsg
+343 common recvmmsg sys_recvmmsg compat_sys_recvmmsg
+344 common accept4 sys_accept4
+345 common name_to_handle_at sys_name_to_handle_at
+346 common open_by_handle_at sys_open_by_handle_at compat_sys_open_by_handle_at
+347 common clock_adjtime sys_clock_adjtime compat_sys_clock_adjtime
+348 common syncfs sys_syncfs
+349 common sendmmsg sys_sendmmsg compat_sys_sendmmsg
+350 common setns sys_setns
+351 nospu process_vm_readv sys_process_vm_readv compat_sys_process_vm_readv
+352 nospu process_vm_writev sys_process_vm_writev compat_sys_process_vm_writev
+353 nospu finit_module sys_finit_module
+354 nospu kcmp sys_kcmp
+355 common sched_setattr sys_sched_setattr
+356 common sched_getattr sys_sched_getattr
+357 common renameat2 sys_renameat2
+358 common seccomp sys_seccomp
+359 common getrandom sys_getrandom
+360 common memfd_create sys_memfd_create
+361 common bpf sys_bpf
+362 nospu execveat sys_execveat compat_sys_execveat
+363 32 switch_endian sys_ni_syscall
+363 64 switch_endian ppc_switch_endian
+363 spu switch_endian sys_ni_syscall
+364 common userfaultfd sys_userfaultfd
+365 common membarrier sys_membarrier
+378 nospu mlock2 sys_mlock2
+379 nospu copy_file_range sys_copy_file_range
+380 common preadv2 sys_preadv2 compat_sys_preadv2
+381 common pwritev2 sys_pwritev2 compat_sys_pwritev2
+382 nospu kexec_file_load sys_kexec_file_load
+383 nospu statx sys_statx
+384 nospu pkey_alloc sys_pkey_alloc
+385 nospu pkey_free sys_pkey_free
+386 nospu pkey_mprotect sys_pkey_mprotect
+387 nospu rseq sys_rseq
+388 nospu io_pgetevents sys_io_pgetevents compat_sys_io_pgetevents
diff --git a/arch/powerpc/kernel/syscalls/syscallhdr.sh b/arch/powerpc/kernel/syscalls/syscallhdr.sh
new file mode 100644
index 0000000..c0a9a32
--- /dev/null
+++ b/arch/powerpc/kernel/syscalls/syscallhdr.sh
@@ -0,0 +1,37 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+in="$1"
+out="$2"
+my_abis=`echo "($3)" | tr ',' '|'`
+prefix="$4"
+offset="$5"
+
+fileguard=_UAPI_ASM_POWERPC_`basename "$out" | sed \
+ -e 'y/abcdefghijklmnopqrstuvwxyz/ABCDEFGHIJKLMNOPQRSTUVWXYZ/' \
+ -e 's/[^A-Z0-9_]/_/g' -e 's/__/_/g'`
+grep -E "^[0-9A-Fa-fXx]+[[:space:]]+${my_abis}" "$in" | sort -n | (
+ printf "#ifndef %s\n" "${fileguard}"
+ printf "#define %s\n" "${fileguard}"
+ printf "\n"
+
+ nxt=0
+ while read nr abi name entry compat ; do
+ if [ -z "$offset" ]; then
+ printf "#define __NR_%s%s\t%s\n" \
+ "${prefix}" "${name}" "${nr}"
+ else
+ printf "#define __NR_%s%s\t(%s + %s)\n" \
+ "${prefix}" "${name}" "${offset}" "${nr}"
+ fi
+ nxt=$((nr+1))
+ done
+
+ printf "\n"
+ printf "#ifdef __KERNEL__\n"
+ printf "#define __NR_syscalls\t%s\n" "${nxt}"
+ printf "#endif\n"
+ printf "\n"
+ printf "#endif /* %s */" "${fileguard}"
+ printf "\n"
+) > "$out"
diff --git a/arch/powerpc/kernel/syscalls/syscalltbl.sh b/arch/powerpc/kernel/syscalls/syscalltbl.sh
new file mode 100644
index 0000000..fd62049
--- /dev/null
+++ b/arch/powerpc/kernel/syscalls/syscalltbl.sh
@@ -0,0 +1,36 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+in="$1"
+out="$2"
+my_abis=`echo "($3)" | tr ',' '|'`
+my_abi="$4"
+offset="$5"
+
+emit() {
+ t_nxt="$1"
+ t_nr="$2"
+ t_entry="$3"
+
+ while [ $t_nxt -lt $t_nr ]; do
+ printf "__SYSCALL(%s,sys_ni_syscall, )\n" "${t_nxt}"
+ t_nxt=$((t_nxt+1))
+ done
+ printf "__SYSCALL(%s,%s, )\n" "${t_nxt}" "${t_entry}"
+}
+
+grep -E "^[0-9A-Fa-fXx]+[[:space:]]+${my_abis}" "$in" | sort -n | (
+ nxt=0
+ if [ -z "$offset" ]; then
+ offset=0
+ fi
+
+ while read nr abi name entry compat ; do
+ if [ "$my_abi" = "c32" ] && [ ! -z "$compat" ]; then
+ emit $((nxt+offset)) $((nr+offset)) $compat
+ else
+ emit $((nxt+offset)) $((nr+offset)) $entry
+ fi
+ nxt=$((nr+1))
+ done
+) > "$out"
diff --git a/arch/powerpc/kernel/systbl.S b/arch/powerpc/kernel/systbl.S
index 919a327..23265a2 100644
--- a/arch/powerpc/kernel/systbl.S
+++ b/arch/powerpc/kernel/systbl.S
@@ -16,28 +16,6 @@
#include <asm/ppc_asm.h>
-#ifdef CONFIG_PPC64
-#define SYSCALL(func) .8byte DOTSYM(sys_##func),DOTSYM(sys_##func)
-#define COMPAT_SYS(func) .8byte DOTSYM(sys_##func),DOTSYM(compat_sys_##func)
-#define PPC_SYS(func) .8byte DOTSYM(ppc_##func),DOTSYM(ppc_##func)
-#define OLDSYS(func) .8byte DOTSYM(sys_ni_syscall),DOTSYM(sys_ni_syscall)
-#define SYS32ONLY(func) .8byte DOTSYM(sys_ni_syscall),DOTSYM(compat_sys_##func)
-#define PPC64ONLY(func) .8byte DOTSYM(ppc_##func),DOTSYM(sys_ni_syscall)
-#define SYSX(f, f3264, f32) .8byte DOTSYM(f),DOTSYM(f3264)
-#else
-#define SYSCALL(func) .long sys_##func
-#define COMPAT_SYS(func) .long sys_##func
-#define PPC_SYS(func) .long ppc_##func
-#define OLDSYS(func) .long sys_##func
-#define SYS32ONLY(func) .long sys_##func
-#define PPC64ONLY(func) .long sys_ni_syscall
-#define SYSX(f, f3264, f32) .long f32
-#endif
-#define SYSCALL_SPU(func) SYSCALL(func)
-#define COMPAT_SYS_SPU(func) COMPAT_SYS(func)
-#define COMPAT_SPU_NEW(func) COMPAT_SYS(func)
-#define SYSX_SPU(f, f3264, f32) SYSX(f, f3264, f32)
-
.section .rodata,"a"
#ifdef CONFIG_PPC64
@@ -46,5 +24,21 @@
.globl sys_call_table
sys_call_table:
+#ifdef CONFIG_PPC64
+#define __SYSCALL(nr, entry, nargs) .8byte DOTSYM(entry)
+#include <asm/syscall_table_64.h>
+#undef __SYSCALL
+#else
+#define __SYSCALL(nr, entry, nargs) .long entry
+#include <asm/syscall_table_32.h>
+#undef __SYSCALL
+#endif
-#include <asm/systbl.h>
+#ifdef CONFIG_COMPAT
+.globl compat_sys_call_table
+compat_sys_call_table:
+#define compat_sys_sigsuspend sys_sigsuspend
+#define __SYSCALL(nr, entry, nargs) .8byte DOTSYM(entry)
+#include <asm/syscall_table_c32.h>
+#undef __SYSCALL
+#endif
diff --git a/arch/powerpc/kernel/systbl_chk.c b/arch/powerpc/kernel/systbl_chk.c
deleted file mode 100644
index 4653258..0000000
--- a/arch/powerpc/kernel/systbl_chk.c
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * This file, when run through CPP produces a list of syscall numbers
- * in the order of systbl.h. That way we can check for gaps and syscalls
- * that are out of order.
- *
- * Unfortunately, we cannot check for the correct ordering of entries
- * using SYSX().
- *
- * Copyright © IBM Corporation
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-#include <asm/unistd.h>
-
-#define SYSCALL(func) __NR_##func
-#define COMPAT_SYS(func) __NR_##func
-#define PPC_SYS(func) __NR_##func
-#ifdef CONFIG_PPC64
-#define OLDSYS(func) -1
-#define SYS32ONLY(func) -1
-#define PPC64ONLY(func) __NR_##func
-#else
-#define OLDSYS(func) __NR_old##func
-#define SYS32ONLY(func) __NR_##func
-#define PPC64ONLY(func) -1
-#endif
-#define SYSX(f, f3264, f32) -1
-
-#define SYSCALL_SPU(func) SYSCALL(func)
-#define COMPAT_SYS_SPU(func) COMPAT_SYS(func)
-#define COMPAT_SPU_NEW(func) COMPAT_SYS(_new##func)
-#define SYSX_SPU(f, f3264, f32) SYSX(f, f3264, f32)
-
-/* Just insert a marker for ni_syscalls */
-#define __NR_ni_syscall -1
-
-/*
- * These are the known exceptions.
- * Hopefully, there will be no more.
- */
-#define __NR_llseek __NR__llseek
-#undef __NR_umount
-#define __NR_umount __NR_umount2
-#define __NR_old_getrlimit __NR_getrlimit
-#define __NR_newstat __NR_stat
-#define __NR_newlstat __NR_lstat
-#define __NR_newfstat __NR_fstat
-#define __NR_newuname __NR_uname
-#define __NR_sysctl __NR__sysctl
-#define __NR_olddebug_setcontext __NR_sys_debug_setcontext
-
-/* We call sys_ugetrlimit for syscall number __NR_getrlimit */
-#define getrlimit ugetrlimit
-
-START_TABLE
-#include <asm/systbl.h>
-END_TABLE NR_syscalls
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 9a86572..00af2c4 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -1434,7 +1434,8 @@ void program_check_exception(struct pt_regs *regs)
goto bail;
} else {
printk(KERN_EMERG "Unexpected TM Bad Thing exception "
- "at %lx (msr 0x%lx)\n", regs->nip, regs->msr);
+ "at %lx (msr 0x%lx) tm_scratch=%llx\n",
+ regs->nip, regs->msr, get_paca()->tm_scratch);
die("Unrecoverable exception", regs, SIGABRT);
}
}
diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
index 65b3bdb..7725a97 100644
--- a/arch/powerpc/kernel/vdso.c
+++ b/arch/powerpc/kernel/vdso.c
@@ -671,15 +671,18 @@ static void __init vdso_setup_syscall_map(void)
{
unsigned int i;
extern unsigned long *sys_call_table;
+#ifdef CONFIG_PPC64
+ extern unsigned long *compat_sys_call_table;
+#endif
extern unsigned long sys_ni_syscall;
for (i = 0; i < NR_syscalls; i++) {
#ifdef CONFIG_PPC64
- if (sys_call_table[i*2] != sys_ni_syscall)
+ if (sys_call_table[i] != sys_ni_syscall)
vdso_data->syscall_map_64[i >> 5] |=
0x80000000UL >> (i & 0x1f);
- if (sys_call_table[i*2+1] != sys_ni_syscall)
+ if (compat_sys_call_table[i] != sys_ni_syscall)
vdso_data->syscall_map_32[i >> 5] |=
0x80000000UL >> (i & 0x1f);
#else /* CONFIG_PPC64 */
diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
index 62a8d03..532ab797 100644
--- a/arch/powerpc/kvm/book3s_64_vio.c
+++ b/arch/powerpc/kvm/book3s_64_vio.c
@@ -397,12 +397,13 @@ static long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *stt,
return H_SUCCESS;
}
-static void kvmppc_clear_tce(struct iommu_table *tbl, unsigned long entry)
+static void kvmppc_clear_tce(struct mm_struct *mm, struct iommu_table *tbl,
+ unsigned long entry)
{
unsigned long hpa = 0;
enum dma_data_direction dir = DMA_NONE;
- iommu_tce_xchg(tbl, entry, &hpa, &dir);
+ iommu_tce_xchg(mm, tbl, entry, &hpa, &dir);
}
static long kvmppc_tce_iommu_mapped_dec(struct kvm *kvm,
@@ -433,7 +434,7 @@ static long kvmppc_tce_iommu_do_unmap(struct kvm *kvm,
unsigned long hpa = 0;
long ret;
- if (WARN_ON_ONCE(iommu_tce_xchg(tbl, entry, &hpa, &dir)))
+ if (WARN_ON_ONCE(iommu_tce_xchg(kvm->mm, tbl, entry, &hpa, &dir)))
return H_TOO_HARD;
if (dir == DMA_NONE)
@@ -441,7 +442,7 @@ static long kvmppc_tce_iommu_do_unmap(struct kvm *kvm,
ret = kvmppc_tce_iommu_mapped_dec(kvm, tbl, entry);
if (ret != H_SUCCESS)
- iommu_tce_xchg(tbl, entry, &hpa, &dir);
+ iommu_tce_xchg(kvm->mm, tbl, entry, &hpa, &dir);
return ret;
}
@@ -487,7 +488,7 @@ long kvmppc_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl,
if (mm_iommu_mapped_inc(mem))
return H_TOO_HARD;
- ret = iommu_tce_xchg(tbl, entry, &hpa, &dir);
+ ret = iommu_tce_xchg(kvm->mm, tbl, entry, &hpa, &dir);
if (WARN_ON_ONCE(ret)) {
mm_iommu_mapped_dec(mem);
return H_TOO_HARD;
@@ -566,7 +567,7 @@ long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
entry, ua, dir);
if (ret != H_SUCCESS) {
- kvmppc_clear_tce(stit->tbl, entry);
+ kvmppc_clear_tce(vcpu->kvm->mm, stit->tbl, entry);
goto unlock_exit;
}
}
@@ -655,7 +656,8 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
iommu_tce_direction(tce));
if (ret != H_SUCCESS) {
- kvmppc_clear_tce(stit->tbl, entry);
+ kvmppc_clear_tce(vcpu->kvm->mm, stit->tbl,
+ entry);
goto unlock_exit;
}
}
@@ -704,7 +706,7 @@ long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu,
return ret;
WARN_ON_ONCE(1);
- kvmppc_clear_tce(stit->tbl, entry);
+ kvmppc_clear_tce(vcpu->kvm->mm, stit->tbl, entry);
}
}
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index c866d9a..056b750 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -226,7 +226,9 @@ static int mm_fault_error(struct pt_regs *regs, unsigned long addr,
static bool bad_kernel_fault(bool is_exec, unsigned long error_code,
unsigned long address)
{
- if (is_exec && (error_code & (DSISR_NOEXEC_OR_G | DSISR_KEYFAULT))) {
+ /* NX faults set DSISR_PROTFAULT on the 8xx, DSISR_NOEXEC_OR_G on others */
+ if (is_exec && (error_code & (DSISR_NOEXEC_OR_G | DSISR_KEYFAULT |
+ DSISR_PROTFAULT))) {
printk_ratelimited(KERN_CRIT "kernel tried to execute"
" exec-protected page (%lx) -"
"exploit attempt? (uid: %d)\n",
diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c
index 56c2234..a712a65 100644
--- a/arch/powerpc/mm/mmu_context_iommu.c
+++ b/arch/powerpc/mm/mmu_context_iommu.c
@@ -36,6 +36,8 @@ struct mm_iommu_table_group_mem_t {
u64 ua; /* userspace address */
u64 entries; /* number of entries in hpas[] */
u64 *hpas; /* vmalloc'ed */
+#define MM_IOMMU_TABLE_INVALID_HPA ((uint64_t)-1)
+ u64 dev_hpa; /* Device memory base address */
};
static long mm_iommu_adjust_locked_vm(struct mm_struct *mm,
@@ -126,7 +128,8 @@ static int mm_iommu_move_page_from_cma(struct page *page)
return 0;
}
-long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries,
+static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua,
+ unsigned long entries, unsigned long dev_hpa,
struct mm_iommu_table_group_mem_t **pmem)
{
struct mm_iommu_table_group_mem_t *mem;
@@ -140,12 +143,6 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries,
list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list,
next) {
- if ((mem->ua == ua) && (mem->entries == entries)) {
- ++mem->used;
- *pmem = mem;
- goto unlock_exit;
- }
-
/* Overlap? */
if ((mem->ua < (ua + (entries << PAGE_SHIFT))) &&
(ua < (mem->ua +
@@ -156,11 +153,13 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries,
}
- ret = mm_iommu_adjust_locked_vm(mm, entries, true);
- if (ret)
- goto unlock_exit;
+ if (dev_hpa == MM_IOMMU_TABLE_INVALID_HPA) {
+ ret = mm_iommu_adjust_locked_vm(mm, entries, true);
+ if (ret)
+ goto unlock_exit;
- locked_entries = entries;
+ locked_entries = entries;
+ }
mem = kzalloc(sizeof(*mem), GFP_KERNEL);
if (!mem) {
@@ -168,6 +167,13 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries,
goto unlock_exit;
}
+ if (dev_hpa != MM_IOMMU_TABLE_INVALID_HPA) {
+ mem->pageshift = __ffs(dev_hpa | (entries << PAGE_SHIFT));
+ mem->dev_hpa = dev_hpa;
+ goto good_exit;
+ }
+ mem->dev_hpa = MM_IOMMU_TABLE_INVALID_HPA;
+
/*
* For a starting point for a maximum page size calculation
* we use @ua and @entries natural alignment to allow IOMMU pages
@@ -236,6 +242,7 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries,
mem->hpas[i] = page_to_pfn(page) << PAGE_SHIFT;
}
+good_exit:
atomic64_set(&mem->mapped, 1);
mem->used = 1;
mem->ua = ua;
@@ -252,13 +259,31 @@ long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries,
return ret;
}
-EXPORT_SYMBOL_GPL(mm_iommu_get);
+
+long mm_iommu_new(struct mm_struct *mm, unsigned long ua, unsigned long entries,
+ struct mm_iommu_table_group_mem_t **pmem)
+{
+ return mm_iommu_do_alloc(mm, ua, entries, MM_IOMMU_TABLE_INVALID_HPA,
+ pmem);
+}
+EXPORT_SYMBOL_GPL(mm_iommu_new);
+
+long mm_iommu_newdev(struct mm_struct *mm, unsigned long ua,
+ unsigned long entries, unsigned long dev_hpa,
+ struct mm_iommu_table_group_mem_t **pmem)
+{
+ return mm_iommu_do_alloc(mm, ua, entries, dev_hpa, pmem);
+}
+EXPORT_SYMBOL_GPL(mm_iommu_newdev);
static void mm_iommu_unpin(struct mm_iommu_table_group_mem_t *mem)
{
long i;
struct page *page = NULL;
+ if (!mem->hpas)
+ return;
+
for (i = 0; i < mem->entries; ++i) {
if (!mem->hpas[i])
continue;
@@ -300,6 +325,7 @@ static void mm_iommu_release(struct mm_iommu_table_group_mem_t *mem)
long mm_iommu_put(struct mm_struct *mm, struct mm_iommu_table_group_mem_t *mem)
{
long ret = 0;
+ unsigned long entries, dev_hpa;
mutex_lock(&mem_list_mutex);
@@ -321,9 +347,12 @@ long mm_iommu_put(struct mm_struct *mm, struct mm_iommu_table_group_mem_t *mem)
}
/* @mapped became 0 so now mappings are disabled, release the region */
+ entries = mem->entries;
+ dev_hpa = mem->dev_hpa;
mm_iommu_release(mem);
- mm_iommu_adjust_locked_vm(mm, mem->entries, false);
+ if (dev_hpa == MM_IOMMU_TABLE_INVALID_HPA)
+ mm_iommu_adjust_locked_vm(mm, entries, false);
unlock_exit:
mutex_unlock(&mem_list_mutex);
@@ -368,27 +397,32 @@ struct mm_iommu_table_group_mem_t *mm_iommu_lookup_rm(struct mm_struct *mm,
return ret;
}
-struct mm_iommu_table_group_mem_t *mm_iommu_find(struct mm_struct *mm,
+struct mm_iommu_table_group_mem_t *mm_iommu_get(struct mm_struct *mm,
unsigned long ua, unsigned long entries)
{
struct mm_iommu_table_group_mem_t *mem, *ret = NULL;
+ mutex_lock(&mem_list_mutex);
+
list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next) {
if ((mem->ua == ua) && (mem->entries == entries)) {
ret = mem;
+ ++mem->used;
break;
}
}
+ mutex_unlock(&mem_list_mutex);
+
return ret;
}
-EXPORT_SYMBOL_GPL(mm_iommu_find);
+EXPORT_SYMBOL_GPL(mm_iommu_get);
long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
unsigned long ua, unsigned int pageshift, unsigned long *hpa)
{
const long entry = (ua - mem->ua) >> PAGE_SHIFT;
- u64 *va = &mem->hpas[entry];
+ u64 *va;
if (entry >= mem->entries)
return -EFAULT;
@@ -396,6 +430,12 @@ long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
if (pageshift > mem->pageshift)
return -EFAULT;
+ if (!mem->hpas) {
+ *hpa = mem->dev_hpa + (ua - mem->ua);
+ return 0;
+ }
+
+ va = &mem->hpas[entry];
*hpa = (*va & MM_IOMMU_TABLE_GROUP_PAGE_MASK) | (ua & ~PAGE_MASK);
return 0;
@@ -406,7 +446,6 @@ long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem,
unsigned long ua, unsigned int pageshift, unsigned long *hpa)
{
const long entry = (ua - mem->ua) >> PAGE_SHIFT;
- void *va = &mem->hpas[entry];
unsigned long *pa;
if (entry >= mem->entries)
@@ -415,7 +454,12 @@ long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem,
if (pageshift > mem->pageshift)
return -EFAULT;
- pa = (void *) vmalloc_to_phys(va);
+ if (!mem->hpas) {
+ *hpa = mem->dev_hpa + (ua - mem->ua);
+ return 0;
+ }
+
+ pa = (void *) vmalloc_to_phys(&mem->hpas[entry]);
if (!pa)
return -EFAULT;
@@ -435,6 +479,9 @@ extern void mm_iommu_ua_mark_dirty_rm(struct mm_struct *mm, unsigned long ua)
if (!mem)
return;
+ if (mem->dev_hpa != MM_IOMMU_TABLE_INVALID_HPA)
+ return;
+
entry = (ua - mem->ua) >> PAGE_SHIFT;
va = &mem->hpas[entry];
@@ -445,6 +492,33 @@ extern void mm_iommu_ua_mark_dirty_rm(struct mm_struct *mm, unsigned long ua)
*pa |= MM_IOMMU_TABLE_GROUP_PAGE_DIRTY;
}
+bool mm_iommu_is_devmem(struct mm_struct *mm, unsigned long hpa,
+ unsigned int pageshift, unsigned long *size)
+{
+ struct mm_iommu_table_group_mem_t *mem;
+ unsigned long end;
+
+ list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next) {
+ if (mem->dev_hpa == MM_IOMMU_TABLE_INVALID_HPA)
+ continue;
+
+ end = mem->dev_hpa + (mem->entries << PAGE_SHIFT);
+ if ((mem->dev_hpa <= hpa) && (hpa < end)) {
+ /*
+ * Since the IOMMU page size might be bigger than
+ * PAGE_SIZE, the amount of preregistered memory
+ * starting from @hpa might be smaller than 1<<pageshift
+ * and the caller needs to distinguish this situation.
+ */
+ *size = min(1UL << pageshift, end - hpa);
+ return true;
+ }
+ }
+
+ return false;
+}
+EXPORT_SYMBOL_GPL(mm_iommu_is_devmem);
+
long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem)
{
if (atomic64_inc_not_zero(&mem->mapped))
diff --git a/arch/powerpc/mm/pkeys.c b/arch/powerpc/mm/pkeys.c
index 04b60a8..5878077 100644
--- a/arch/powerpc/mm/pkeys.c
+++ b/arch/powerpc/mm/pkeys.c
@@ -415,3 +415,13 @@ bool arch_vma_access_permitted(struct vm_area_struct *vma, bool write,
return pkey_access_permitted(vma_pkey(vma), write, execute);
}
+
+void arch_dup_pkeys(struct mm_struct *oldmm, struct mm_struct *mm)
+{
+ if (static_branch_likely(&pkey_disabled))
+ return;
+
+ /* Duplicate the oldmm pkey state in mm: */
+ mm_pkey_allocation_map(mm) = mm_pkey_allocation_map(oldmm);
+ mm->context.execute_only_pkey = oldmm->context.execute_only_pkey;
+}
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 383cc36..b072300 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -10,6 +10,7 @@
*/
#include <linux/kernel.h>
#include <linux/sched.h>
+#include <linux/sched/clock.h>
#include <linux/perf_event.h>
#include <linux/percpu.h>
#include <linux/hardirq.h>
@@ -2166,7 +2167,7 @@ static bool pmc_overflow(unsigned long val)
/*
* Performance monitor interrupt stuff
*/
-static void perf_event_interrupt(struct pt_regs *regs)
+static void __perf_event_interrupt(struct pt_regs *regs)
{
int i, j;
struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
@@ -2250,6 +2251,14 @@ static void perf_event_interrupt(struct pt_regs *regs)
irq_exit();
}
+static void perf_event_interrupt(struct pt_regs *regs)
+{
+ u64 start_clock = sched_clock();
+
+ __perf_event_interrupt(regs);
+ perf_sample_event_took(sched_clock() - start_clock);
+}
+
static int power_pmu_prepare_cpu(unsigned int cpu)
{
struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu);
diff --git a/arch/powerpc/platforms/44x/warp.c b/arch/powerpc/platforms/44x/warp.c
index 7e4f8ca..f467247 100644
--- a/arch/powerpc/platforms/44x/warp.c
+++ b/arch/powerpc/platforms/44x/warp.c
@@ -179,9 +179,9 @@ static int pika_setup_leds(void)
}
for_each_child_of_node(np, child)
- if (strcmp(child->name, "green") == 0)
+ if (of_node_name_eq(child, "green"))
green_led = of_get_gpio(child, 0);
- else if (strcmp(child->name, "red") == 0)
+ else if (of_node_name_eq(child, "red"))
red_led = of_get_gpio(child, 0);
of_node_put(np);
diff --git a/arch/powerpc/platforms/4xx/ocm.c b/arch/powerpc/platforms/4xx/ocm.c
index f5bbd45..f2610a0 100644
--- a/arch/powerpc/platforms/4xx/ocm.c
+++ b/arch/powerpc/platforms/4xx/ocm.c
@@ -223,8 +223,6 @@ static void __init ocm_init_node(int count, struct device_node *node)
INIT_LIST_HEAD(&ocm->c.list);
ocm->ready = 1;
-
- return;
}
static int ocm_debugfs_show(struct seq_file *m, void *v)
@@ -242,9 +240,7 @@ static int ocm_debugfs_show(struct seq_file *m, void *v)
seq_printf(m, "PhysAddr : 0x%llx\n", ocm->phys);
seq_printf(m, "MemTotal : %d Bytes\n", ocm->memtotal);
seq_printf(m, "MemTotal(NC) : %d Bytes\n", ocm->nc.memtotal);
- seq_printf(m, "MemTotal(C) : %d Bytes\n", ocm->c.memtotal);
-
- seq_printf(m, "\n");
+ seq_printf(m, "MemTotal(C) : %d Bytes\n\n", ocm->c.memtotal);
seq_printf(m, "NC.PhysAddr : 0x%llx\n", ocm->nc.phys);
seq_printf(m, "NC.VirtAddr : 0x%p\n", ocm->nc.virt);
@@ -256,9 +252,7 @@ static int ocm_debugfs_show(struct seq_file *m, void *v)
blk->size, blk->owner);
}
- seq_printf(m, "\n");
-
- seq_printf(m, "C.PhysAddr : 0x%llx\n", ocm->c.phys);
+ seq_printf(m, "\nC.PhysAddr : 0x%llx\n", ocm->c.phys);
seq_printf(m, "C.VirtAddr : 0x%p\n", ocm->c.virt);
seq_printf(m, "C.MemTotal : %d Bytes\n", ocm->c.memtotal);
seq_printf(m, "C.MemFree : %d Bytes\n", ocm->c.memfree);
@@ -268,7 +262,7 @@ static int ocm_debugfs_show(struct seq_file *m, void *v)
blk->size, blk->owner);
}
- seq_printf(m, "\n");
+ seq_putc(m, '\n');
}
return 0;
@@ -338,7 +332,6 @@ void *ppc4xx_ocm_alloc(phys_addr_t *phys, int size, int align,
ocm_blk = kzalloc(sizeof(*ocm_blk), GFP_KERNEL);
if (!ocm_blk) {
- printk(KERN_ERR "PPC4XX OCM: could not allocate ocm block");
rh_free(ocm_reg->rh, offset);
break;
}
@@ -392,10 +385,8 @@ static int __init ppc4xx_ocm_init(void)
return 0;
ocm_nodes = kzalloc((count * sizeof(struct ocm_info)), GFP_KERNEL);
- if (!ocm_nodes) {
- printk(KERN_ERR "PPC4XX OCM: failed to allocate OCM nodes!\n");
+ if (!ocm_nodes)
return -ENOMEM;
- }
ocm_count = count;
count = 0;
diff --git a/arch/powerpc/platforms/4xx/pci.c b/arch/powerpc/platforms/4xx/pci.c
index d0825f5..e6e2adc 100644
--- a/arch/powerpc/platforms/4xx/pci.c
+++ b/arch/powerpc/platforms/4xx/pci.c
@@ -1399,7 +1399,6 @@ static void __init ppc_476fpe_pciex_check_link(struct ppc4xx_pciex_port *port)
printk(KERN_WARNING "PCIE%d: Link up failed\n", port->index);
iounmap(mbase);
- return;
}
static struct ppc4xx_pciex_hwops ppc_476fpe_pcie_hwops __initdata =
diff --git a/arch/powerpc/platforms/52xx/efika.c b/arch/powerpc/platforms/52xx/efika.c
index 1ecbf17..6153886 100644
--- a/arch/powerpc/platforms/52xx/efika.c
+++ b/arch/powerpc/platforms/52xx/efika.c
@@ -82,11 +82,9 @@ static void __init efika_pcisetup(void)
return;
}
- for (pcictrl = NULL;;) {
- pcictrl = of_get_next_child(root, pcictrl);
- if ((pcictrl == NULL) || (strcmp(pcictrl->name, "pci") == 0))
+ for_each_child_of_node(root, pcictrl)
+ if (of_node_name_eq(pcictrl, "pci"))
break;
- }
of_node_put(root);
diff --git a/arch/powerpc/platforms/cell/setup.c b/arch/powerpc/platforms/cell/setup.c
index 902d527..e2e1371 100644
--- a/arch/powerpc/platforms/cell/setup.c
+++ b/arch/powerpc/platforms/cell/setup.c
@@ -131,7 +131,7 @@ static int cell_setup_phb(struct pci_controller *phb)
np = phb->dn;
model = of_get_property(np, "model", NULL);
- if (model == NULL || strcmp(np->name, "pci"))
+ if (model == NULL || !of_node_name_eq(np, "pci"))
return 0;
/* Setup workarounds for spider */
diff --git a/arch/powerpc/platforms/cell/spu_callbacks.c b/arch/powerpc/platforms/cell/spu_callbacks.c
index 8ae8620..125f2a5 100644
--- a/arch/powerpc/platforms/cell/spu_callbacks.c
+++ b/arch/powerpc/platforms/cell/spu_callbacks.c
@@ -34,20 +34,9 @@
*/
static void *spu_syscall_table[] = {
-#define SYSCALL(func) sys_ni_syscall,
-#define COMPAT_SYS(func) sys_ni_syscall,
-#define PPC_SYS(func) sys_ni_syscall,
-#define OLDSYS(func) sys_ni_syscall,
-#define SYS32ONLY(func) sys_ni_syscall,
-#define PPC64ONLY(func) sys_ni_syscall,
-#define SYSX(f, f3264, f32) sys_ni_syscall,
-
-#define SYSCALL_SPU(func) sys_##func,
-#define COMPAT_SYS_SPU(func) sys_##func,
-#define COMPAT_SPU_NEW(func) sys_##func,
-#define SYSX_SPU(f, f3264, f32) f,
-
-#include <asm/systbl.h>
+#define __SYSCALL(nr, entry, nargs) entry,
+#include <asm/syscall_table_spu.h>
+#undef __SYSCALL
};
long spu_sys_callback(struct spu_syscall_block *s)
diff --git a/arch/powerpc/platforms/cell/spu_manage.c b/arch/powerpc/platforms/cell/spu_manage.c
index f7e3637..bed935c 100644
--- a/arch/powerpc/platforms/cell/spu_manage.c
+++ b/arch/powerpc/platforms/cell/spu_manage.c
@@ -458,7 +458,6 @@ static void init_affinity_node(int cbe)
struct device_node *vic_dn, *last_spu_dn;
phandle avoid_ph;
const phandle *vic_handles;
- const char *name;
int lenp, i, added;
last_spu = list_first_entry(&cbe_spu_info[cbe].spus, struct spu,
@@ -480,12 +479,7 @@ static void init_affinity_node(int cbe)
if (!vic_dn)
continue;
- /* a neighbour might be spe, mic-tm, or bif0 */
- name = of_get_property(vic_dn, "name", NULL);
- if (!name)
- continue;
-
- if (strcmp(name, "spe") == 0) {
+ if (of_node_name_eq(vic_dn, "spe") ) {
spu = devnode_spu(cbe, vic_dn);
avoid_ph = last_spu_dn->phandle;
} else {
@@ -498,7 +492,7 @@ static void init_affinity_node(int cbe)
spu = neighbour_spu(cbe, vic_dn, last_spu_dn);
if (!spu)
continue;
- if (!strcmp(name, "mic-tm")) {
+ if (of_node_name_eq(vic_dn, "mic-tm")) {
last_spu->has_mem_affinity = 1;
spu->has_mem_affinity = 1;
}
diff --git a/arch/powerpc/platforms/chrp/setup.c b/arch/powerpc/platforms/chrp/setup.c
index 3c21f04..e66644e 100644
--- a/arch/powerpc/platforms/chrp/setup.c
+++ b/arch/powerpc/platforms/chrp/setup.c
@@ -287,10 +287,7 @@ static __init void chrp_init(void)
* or /pci@80000000/isa@C/serial@i2F8
* The optional graphics card has also type 'serial' in VGA mode.
*/
- property = of_get_property(node, "name", NULL);
- if (!property)
- goto out_put;
- if (!strcmp(property, "failsafe") || !strcmp(property, "serial"))
+ if (of_node_name_eq(node, "failsafe") || of_node_name_eq(node, "serial"))
add_preferred_console("ttyS", 0, NULL);
out_put:
of_node_put(node);
diff --git a/arch/powerpc/platforms/powermac/feature.c b/arch/powerpc/platforms/powermac/feature.c
index bdb33163..c3e5ee8 100644
--- a/arch/powerpc/platforms/powermac/feature.c
+++ b/arch/powerpc/platforms/powermac/feature.c
@@ -173,9 +173,9 @@ static long ohare_htw_scc_enable(struct device_node *node, long param,
macio = macio_find(node, 0);
if (!macio)
return -ENODEV;
- if (!strcmp(node->name, "ch-a"))
+ if (of_node_name_eq(node, "ch-a"))
chan_mask = MACIO_FLAG_SCCA_ON;
- else if (!strcmp(node->name, "ch-b"))
+ else if (of_node_name_eq(node, "ch-b"))
chan_mask = MACIO_FLAG_SCCB_ON;
else
return -ENODEV;
@@ -610,9 +610,9 @@ static long core99_scc_enable(struct device_node *node, long param, long value)
macio = macio_find(node, 0);
if (!macio)
return -ENODEV;
- if (!strcmp(node->name, "ch-a"))
+ if (of_node_name_eq(node, "ch-a"))
chan_mask = MACIO_FLAG_SCCA_ON;
- else if (!strcmp(node->name, "ch-b"))
+ else if (of_node_name_eq(node, "ch-b"))
chan_mask = MACIO_FLAG_SCCB_ON;
else
return -ENODEV;
@@ -1392,8 +1392,7 @@ static long g5_mpic_enable(struct device_node *node, long param, long value)
if (parent == NULL)
return 0;
- is_u3 = strcmp(parent->name, "u3") == 0 ||
- strcmp(parent->name, "u4") == 0;
+ is_u3 = of_node_name_eq(parent, "u3") || of_node_name_eq(parent, "u4");
of_node_put(parent);
if (!is_u3)
return 0;
diff --git a/arch/powerpc/platforms/powermac/low_i2c.c b/arch/powerpc/platforms/powermac/low_i2c.c
index 84bace3..4de058a 100644
--- a/arch/powerpc/platforms/powermac/low_i2c.c
+++ b/arch/powerpc/platforms/powermac/low_i2c.c
@@ -617,7 +617,7 @@ static void __init kw_i2c_probe(void)
* but not for now
*/
child = of_get_next_child(np, NULL);
- multibus = !child || strcmp(child->name, "i2c-bus");
+ multibus = !of_node_name_eq(child, "i2c-bus");
of_node_put(child);
/* For a multibus setup, we get the bus count based on the
@@ -1205,7 +1205,7 @@ static void pmac_i2c_devscan(void (*callback)(struct device_node *dev,
if (bus != pmac_i2c_find_bus(np))
continue;
for (p = whitelist; p->name != NULL; p++) {
- if (strcmp(np->name, p->name))
+ if (!of_node_name_eq(np, p->name))
continue;
if (p->compatible &&
!of_device_is_compatible(np, p->compatible))
diff --git a/arch/powerpc/platforms/powermac/pci.c b/arch/powerpc/platforms/powermac/pci.c
index 04527d1..3d74205 100644
--- a/arch/powerpc/platforms/powermac/pci.c
+++ b/arch/powerpc/platforms/powermac/pci.c
@@ -501,9 +501,7 @@ static void __init init_p2pbridge(void)
/* XXX it would be better here to identify the specific
PCI-PCI bridge chip we have. */
p2pbridge = of_find_node_by_name(NULL, "pci-bridge");
- if (p2pbridge == NULL
- || p2pbridge->parent == NULL
- || strcmp(p2pbridge->parent->name, "pci") != 0)
+ if (p2pbridge == NULL || !of_node_name_eq(p2pbridge->parent, "pci"))
goto done;
if (pci_device_from_OF_node(p2pbridge, &bus, &devfn) < 0) {
DBG("Can't find PCI infos for PCI<->PCI bridge\n");
@@ -828,14 +826,14 @@ static int __init pmac_add_bridge(struct device_node *dev)
if (of_device_is_compatible(dev, "uni-north")) {
primary = setup_uninorth(hose, &rsrc);
disp_name = "UniNorth";
- } else if (strcmp(dev->name, "pci") == 0) {
+ } else if (of_node_name_eq(dev, "pci")) {
/* XXX assume this is a mpc106 (grackle) */
setup_grackle(hose);
disp_name = "Grackle (MPC106)";
- } else if (strcmp(dev->name, "bandit") == 0) {
+ } else if (of_node_name_eq(dev, "bandit")) {
setup_bandit(hose, &rsrc);
disp_name = "Bandit";
- } else if (strcmp(dev->name, "chaos") == 0) {
+ } else if (of_node_name_eq(dev, "chaos")) {
setup_chaos(hose, &rsrc);
disp_name = "Chaos";
primary = 0;
@@ -914,16 +912,14 @@ void __init pmac_pci_init(void)
"of device tree\n");
return;
}
- for (np = NULL; (np = of_get_next_child(root, np)) != NULL;) {
- if (np->name == NULL)
- continue;
- if (strcmp(np->name, "bandit") == 0
- || strcmp(np->name, "chaos") == 0
- || strcmp(np->name, "pci") == 0) {
+ for_each_child_of_node(root, np) {
+ if (of_node_name_eq(np, "bandit")
+ || of_node_name_eq(np, "chaos")
+ || of_node_name_eq(np, "pci")) {
if (pmac_add_bridge(np) == 0)
of_node_get(np);
}
- if (strcmp(np->name, "ht") == 0) {
+ if (of_node_name_eq(np, "ht")) {
of_node_get(np);
ht = np;
}
@@ -983,7 +979,7 @@ static bool pmac_pci_enable_device_hook(struct pci_dev *dev)
/* Firewire & GMAC were disabled after PCI probe, the driver is
* claiming them, we must re-enable them now.
*/
- if (uninorth_child && !strcmp(node->name, "firewire") &&
+ if (uninorth_child && of_node_name_eq(node, "firewire") &&
(of_device_is_compatible(node, "pci106b,18") ||
of_device_is_compatible(node, "pci106b,30") ||
of_device_is_compatible(node, "pci11c1,5811"))) {
@@ -991,7 +987,7 @@ static bool pmac_pci_enable_device_hook(struct pci_dev *dev)
pmac_call_feature(PMAC_FTR_1394_ENABLE, node, 0, 1);
updatecfg = 1;
}
- if (uninorth_child && !strcmp(node->name, "ethernet") &&
+ if (uninorth_child && of_node_name_eq(node, "ethernet") &&
of_device_is_compatible(node, "gmac")) {
pmac_call_feature(PMAC_FTR_GMAC_ENABLE, node, 0, 1);
updatecfg = 1;
@@ -1262,4 +1258,3 @@ struct pci_controller_ops pmac_pci_controller_ops = {
.enable_device_hook = pmac_pci_enable_device_hook,
#endif
};
-
diff --git a/arch/powerpc/platforms/powermac/pfunc_base.c b/arch/powerpc/platforms/powermac/pfunc_base.c
index fd2e210..62311e8 100644
--- a/arch/powerpc/platforms/powermac/pfunc_base.c
+++ b/arch/powerpc/platforms/powermac/pfunc_base.c
@@ -101,9 +101,8 @@ static void macio_gpio_init_one(struct macio_chip *macio)
* Find the "gpio" parent node
*/
- for (gparent = NULL;
- (gparent = of_get_next_child(macio->of_node, gparent)) != NULL;)
- if (strcmp(gparent->name, "gpio") == 0)
+ for_each_child_of_node(macio->of_node, gparent)
+ if (of_node_name_eq(gparent, "gpio"))
break;
if (gparent == NULL)
return;
@@ -313,7 +312,7 @@ static void uninorth_install_pfunc(void)
* Install handlers for the hwclock child if any
*/
for (np = NULL; (np = of_get_next_child(uninorth_node, np)) != NULL;)
- if (strcmp(np->name, "hw-clock") == 0) {
+ if (of_node_name_eq(np, "hw-clock")) {
unin_hwclock = np;
break;
}
diff --git a/arch/powerpc/platforms/powermac/pic.c b/arch/powerpc/platforms/powermac/pic.c
index 5a71395..c292ffa 100644
--- a/arch/powerpc/platforms/powermac/pic.c
+++ b/arch/powerpc/platforms/powermac/pic.c
@@ -553,13 +553,13 @@ void __init pmac_pic_init(void)
for_each_node_with_property(np, "interrupt-controller") {
/* Skip /chosen/interrupt-controller */
- if (strcmp(np->name, "chosen") == 0)
+ if (of_node_name_eq(np, "chosen"))
continue;
/* It seems like at least one person wants
* to use BootX on a machine with an AppleKiwi
* controller which happens to pretend to be an
* interrupt controller too. */
- if (strcmp(np->name, "AppleKiwi") == 0)
+ if (of_node_name_eq(np, "AppleKiwi"))
continue;
/* I think we found one ! */
of_irq_dflt_pic = np;
diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c
index 2f00e3d..2e8221e 100644
--- a/arch/powerpc/platforms/powermac/setup.c
+++ b/arch/powerpc/platforms/powermac/setup.c
@@ -560,15 +560,9 @@ static int __init check_pmac_serial_console(void)
}
pr_debug("stdout is %pOF\n", prom_stdout);
- name = of_get_property(prom_stdout, "name", NULL);
- if (!name) {
- pr_debug(" stdout package has no name !\n");
- goto not_found;
- }
-
- if (strcmp(name, "ch-a") == 0)
+ if (of_node_name_eq(prom_stdout, "ch-a"))
offset = 0;
- else if (strcmp(name, "ch-b") == 0)
+ else if (of_node_name_eq(prom_stdout, "ch-b"))
offset = 1;
else
goto not_found;
diff --git a/arch/powerpc/platforms/powermac/udbg_scc.c b/arch/powerpc/platforms/powermac/udbg_scc.c
index 8901973..415b74d 100644
--- a/arch/powerpc/platforms/powermac/udbg_scc.c
+++ b/arch/powerpc/platforms/powermac/udbg_scc.c
@@ -87,7 +87,7 @@ void udbg_scc_init(int force_scc)
for (ch = NULL; (ch = of_get_next_child(escc, ch)) != NULL;) {
if (ch == stdout)
ch_def = of_node_get(ch);
- if (strcmp(ch->name, "ch-a") == 0)
+ if (of_node_name_eq(ch, "ch-a"))
ch_a = of_node_get(ch);
}
if (ch_def == NULL && !force_scc)
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c
index abc0be7..f380789 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -564,8 +564,8 @@ static void pnv_eeh_get_phb_diag(struct eeh_pe *pe)
static int pnv_eeh_get_phb_state(struct eeh_pe *pe)
{
struct pnv_phb *phb = pe->phb->private_data;
- u8 fstate;
- __be16 pcierr;
+ u8 fstate = 0;
+ __be16 pcierr = 0;
s64 rc;
int result = 0;
@@ -603,8 +603,8 @@ static int pnv_eeh_get_phb_state(struct eeh_pe *pe)
static int pnv_eeh_get_pe_state(struct eeh_pe *pe)
{
struct pnv_phb *phb = pe->phb->private_data;
- u8 fstate;
- __be16 pcierr;
+ u8 fstate = 0;
+ __be16 pcierr = 0;
s64 rc;
int result;
diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c
index 75b9352..d7f742e 100644
--- a/arch/powerpc/platforms/powernv/npu-dma.c
+++ b/arch/powerpc/platforms/powernv/npu-dma.c
@@ -9,32 +9,19 @@
* License as published by the Free Software Foundation.
*/
-#include <linux/slab.h>
#include <linux/mmu_notifier.h>
#include <linux/mmu_context.h>
#include <linux/of.h>
-#include <linux/export.h>
#include <linux/pci.h>
#include <linux/memblock.h>
-#include <linux/iommu.h>
#include <linux/sizes.h>
#include <asm/debugfs.h>
-#include <asm/tlb.h>
#include <asm/powernv.h>
-#include <asm/reg.h>
-#include <asm/opal.h>
-#include <asm/io.h>
-#include <asm/iommu.h>
-#include <asm/pnv-pci.h>
-#include <asm/msi_bitmap.h>
#include <asm/opal.h>
-#include "powernv.h"
#include "pci.h"
-#define npu_to_phb(x) container_of(x, struct pnv_phb, npu)
-
/*
* spinlock to protect initialisation of an npu_context for a particular
* mm_struct.
@@ -133,15 +120,25 @@ static struct pnv_ioda_pe *get_gpu_pci_dev_and_pe(struct pnv_ioda_pe *npe,
return pe;
}
-long pnv_npu_set_window(struct pnv_ioda_pe *npe, int num,
+static long pnv_npu_unset_window(struct iommu_table_group *table_group,
+ int num);
+
+static long pnv_npu_set_window(struct iommu_table_group *table_group, int num,
struct iommu_table *tbl)
{
+ struct pnv_ioda_pe *npe = container_of(table_group, struct pnv_ioda_pe,
+ table_group);
struct pnv_phb *phb = npe->phb;
int64_t rc;
const unsigned long size = tbl->it_indirect_levels ?
tbl->it_level_size : tbl->it_size;
const __u64 start_addr = tbl->it_offset << tbl->it_page_shift;
const __u64 win_size = tbl->it_size << tbl->it_page_shift;
+ int num2 = (num == 0) ? 1 : 0;
+
+ /* NPU has just one TVE so if there is another table, remove it first */
+ if (npe->table_group.tables[num2])
+ pnv_npu_unset_window(&npe->table_group, num2);
pe_info(npe, "Setting up window %llx..%llx pg=%lx\n",
start_addr, start_addr + win_size - 1,
@@ -167,11 +164,16 @@ long pnv_npu_set_window(struct pnv_ioda_pe *npe, int num,
return 0;
}
-long pnv_npu_unset_window(struct pnv_ioda_pe *npe, int num)
+static long pnv_npu_unset_window(struct iommu_table_group *table_group, int num)
{
+ struct pnv_ioda_pe *npe = container_of(table_group, struct pnv_ioda_pe,
+ table_group);
struct pnv_phb *phb = npe->phb;
int64_t rc;
+ if (!npe->table_group.tables[num])
+ return 0;
+
pe_info(npe, "Removing DMA window\n");
rc = opal_pci_map_pe_dma_window(phb->opal_id, npe->pe_number,
@@ -210,7 +212,8 @@ static void pnv_npu_dma_set_32(struct pnv_ioda_pe *npe)
if (!gpe)
return;
- rc = pnv_npu_set_window(npe, 0, gpe->table_group.tables[0]);
+ rc = pnv_npu_set_window(&npe->table_group, 0,
+ gpe->table_group.tables[0]);
/*
* NVLink devices use the same TCE table configuration as
@@ -235,7 +238,7 @@ static int pnv_npu_dma_set_bypass(struct pnv_ioda_pe *npe)
if (phb->type != PNV_PHB_NPU_NVLINK || !npe->pdev)
return -EINVAL;
- rc = pnv_npu_unset_window(npe, 0);
+ rc = pnv_npu_unset_window(&npe->table_group, 0);
if (rc != OPAL_SUCCESS)
return rc;
@@ -288,11 +291,15 @@ void pnv_npu_try_dma_set_bypass(struct pci_dev *gpdev, bool bypass)
}
}
+#ifdef CONFIG_IOMMU_API
/* Switch ownership from platform code to external user (e.g. VFIO) */
-void pnv_npu_take_ownership(struct pnv_ioda_pe *npe)
+static void pnv_npu_take_ownership(struct iommu_table_group *table_group)
{
+ struct pnv_ioda_pe *npe = container_of(table_group, struct pnv_ioda_pe,
+ table_group);
struct pnv_phb *phb = npe->phb;
int64_t rc;
+ struct pci_dev *gpdev = NULL;
/*
* Note: NPU has just a single TVE in the hardware which means that
@@ -301,7 +308,7 @@ void pnv_npu_take_ownership(struct pnv_ioda_pe *npe)
* if it was enabled at the moment of ownership change.
*/
if (npe->table_group.tables[0]) {
- pnv_npu_unset_window(npe, 0);
+ pnv_npu_unset_window(&npe->table_group, 0);
return;
}
@@ -314,31 +321,316 @@ void pnv_npu_take_ownership(struct pnv_ioda_pe *npe)
return;
}
pnv_pci_ioda2_tce_invalidate_entire(npe->phb, false);
+
+ get_gpu_pci_dev_and_pe(npe, &gpdev);
+ if (gpdev)
+ pnv_npu2_unmap_lpar_dev(gpdev);
}
-struct pnv_ioda_pe *pnv_pci_npu_setup_iommu(struct pnv_ioda_pe *npe)
+static void pnv_npu_release_ownership(struct iommu_table_group *table_group)
{
- struct pnv_phb *phb = npe->phb;
- struct pci_bus *pbus = phb->hose->bus;
- struct pci_dev *npdev, *gpdev = NULL, *gptmp;
- struct pnv_ioda_pe *gpe = get_gpu_pci_dev_and_pe(npe, &gpdev);
+ struct pnv_ioda_pe *npe = container_of(table_group, struct pnv_ioda_pe,
+ table_group);
+ struct pci_dev *gpdev = NULL;
- if (!gpe || !gpdev)
- return NULL;
+ get_gpu_pci_dev_and_pe(npe, &gpdev);
+ if (gpdev)
+ pnv_npu2_map_lpar_dev(gpdev, 0, MSR_DR | MSR_PR | MSR_HV);
+}
- list_for_each_entry(npdev, &pbus->devices, bus_list) {
- gptmp = pnv_pci_get_gpu_dev(npdev);
+static struct iommu_table_group_ops pnv_pci_npu_ops = {
+ .set_window = pnv_npu_set_window,
+ .unset_window = pnv_npu_unset_window,
+ .take_ownership = pnv_npu_take_ownership,
+ .release_ownership = pnv_npu_release_ownership,
+};
+#endif /* !CONFIG_IOMMU_API */
- if (gptmp != gpdev)
+/*
+ * NPU2 ATS
+ */
+/* Maximum possible number of ATSD MMIO registers per NPU */
+#define NV_NMMU_ATSD_REGS 8
+#define NV_NPU_MAX_PE_NUM 16
+
+/*
+ * A compound NPU IOMMU group which might consist of 1 GPU + 2xNPUs (POWER8) or
+ * up to 3 x (GPU + 2xNPUs) (POWER9).
+ */
+struct npu_comp {
+ struct iommu_table_group table_group;
+ int pe_num;
+ struct pnv_ioda_pe *pe[NV_NPU_MAX_PE_NUM];
+};
+
+/* An NPU descriptor, valid for POWER9 only */
+struct npu {
+ int index;
+ __be64 *mmio_atsd_regs[NV_NMMU_ATSD_REGS];
+ unsigned int mmio_atsd_count;
+
+ /* Bitmask for MMIO register usage */
+ unsigned long mmio_atsd_usage;
+
+ /* Do we need to explicitly flush the nest mmu? */
+ bool nmmu_flush;
+
+ struct npu_comp npucomp;
+};
+
+#ifdef CONFIG_IOMMU_API
+static long pnv_npu_peers_create_table_userspace(
+ struct iommu_table_group *table_group,
+ int num, __u32 page_shift, __u64 window_size, __u32 levels,
+ struct iommu_table **ptbl)
+{
+ struct npu_comp *npucomp = container_of(table_group, struct npu_comp,
+ table_group);
+
+ if (!npucomp->pe_num || !npucomp->pe[0] ||
+ !npucomp->pe[0]->table_group.ops ||
+ !npucomp->pe[0]->table_group.ops->create_table)
+ return -EFAULT;
+
+ return npucomp->pe[0]->table_group.ops->create_table(
+ &npucomp->pe[0]->table_group, num, page_shift,
+ window_size, levels, ptbl);
+}
+
+static long pnv_npu_peers_set_window(struct iommu_table_group *table_group,
+ int num, struct iommu_table *tbl)
+{
+ int i, j;
+ long ret = 0;
+ struct npu_comp *npucomp = container_of(table_group, struct npu_comp,
+ table_group);
+
+ for (i = 0; i < npucomp->pe_num; ++i) {
+ struct pnv_ioda_pe *pe = npucomp->pe[i];
+
+ if (!pe->table_group.ops->set_window)
continue;
- pe_info(gpe, "Attached NPU %s\n", dev_name(&npdev->dev));
- iommu_group_add_device(gpe->table_group.group, &npdev->dev);
+ ret = pe->table_group.ops->set_window(&pe->table_group,
+ num, tbl);
+ if (ret)
+ break;
}
- return gpe;
+ if (ret) {
+ for (j = 0; j < i; ++j) {
+ struct pnv_ioda_pe *pe = npucomp->pe[j];
+
+ if (!pe->table_group.ops->unset_window)
+ continue;
+
+ ret = pe->table_group.ops->unset_window(
+ &pe->table_group, num);
+ if (ret)
+ break;
+ }
+ } else {
+ table_group->tables[num] = iommu_tce_table_get(tbl);
+ }
+
+ return ret;
}
+static long pnv_npu_peers_unset_window(struct iommu_table_group *table_group,
+ int num)
+{
+ int i, j;
+ long ret = 0;
+ struct npu_comp *npucomp = container_of(table_group, struct npu_comp,
+ table_group);
+
+ for (i = 0; i < npucomp->pe_num; ++i) {
+ struct pnv_ioda_pe *pe = npucomp->pe[i];
+
+ WARN_ON(npucomp->table_group.tables[num] !=
+ table_group->tables[num]);
+ if (!npucomp->table_group.tables[num])
+ continue;
+
+ if (!pe->table_group.ops->unset_window)
+ continue;
+
+ ret = pe->table_group.ops->unset_window(&pe->table_group, num);
+ if (ret)
+ break;
+ }
+
+ if (ret) {
+ for (j = 0; j < i; ++j) {
+ struct pnv_ioda_pe *pe = npucomp->pe[j];
+
+ if (!npucomp->table_group.tables[num])
+ continue;
+
+ if (!pe->table_group.ops->set_window)
+ continue;
+
+ ret = pe->table_group.ops->set_window(&pe->table_group,
+ num, table_group->tables[num]);
+ if (ret)
+ break;
+ }
+ } else if (table_group->tables[num]) {
+ iommu_tce_table_put(table_group->tables[num]);
+ table_group->tables[num] = NULL;
+ }
+
+ return ret;
+}
+
+static void pnv_npu_peers_take_ownership(struct iommu_table_group *table_group)
+{
+ int i;
+ struct npu_comp *npucomp = container_of(table_group, struct npu_comp,
+ table_group);
+
+ for (i = 0; i < npucomp->pe_num; ++i) {
+ struct pnv_ioda_pe *pe = npucomp->pe[i];
+
+ if (!pe->table_group.ops->take_ownership)
+ continue;
+ pe->table_group.ops->take_ownership(&pe->table_group);
+ }
+}
+
+static void pnv_npu_peers_release_ownership(
+ struct iommu_table_group *table_group)
+{
+ int i;
+ struct npu_comp *npucomp = container_of(table_group, struct npu_comp,
+ table_group);
+
+ for (i = 0; i < npucomp->pe_num; ++i) {
+ struct pnv_ioda_pe *pe = npucomp->pe[i];
+
+ if (!pe->table_group.ops->release_ownership)
+ continue;
+ pe->table_group.ops->release_ownership(&pe->table_group);
+ }
+}
+
+static struct iommu_table_group_ops pnv_npu_peers_ops = {
+ .get_table_size = pnv_pci_ioda2_get_table_size,
+ .create_table = pnv_npu_peers_create_table_userspace,
+ .set_window = pnv_npu_peers_set_window,
+ .unset_window = pnv_npu_peers_unset_window,
+ .take_ownership = pnv_npu_peers_take_ownership,
+ .release_ownership = pnv_npu_peers_release_ownership,
+};
+
+static void pnv_comp_attach_table_group(struct npu_comp *npucomp,
+ struct pnv_ioda_pe *pe)
+{
+ if (WARN_ON(npucomp->pe_num == NV_NPU_MAX_PE_NUM))
+ return;
+
+ npucomp->pe[npucomp->pe_num] = pe;
+ ++npucomp->pe_num;
+}
+
+struct iommu_table_group *pnv_try_setup_npu_table_group(struct pnv_ioda_pe *pe)
+{
+ struct iommu_table_group *table_group;
+ struct npu_comp *npucomp;
+ struct pci_dev *gpdev = NULL;
+ struct pci_controller *hose;
+ struct pci_dev *npdev = NULL;
+
+ list_for_each_entry(gpdev, &pe->pbus->devices, bus_list) {
+ npdev = pnv_pci_get_npu_dev(gpdev, 0);
+ if (npdev)
+ break;
+ }
+
+ if (!npdev)
+ /* It is not an NPU attached device, skip */
+ return NULL;
+
+ hose = pci_bus_to_host(npdev->bus);
+
+ if (hose->npu) {
+ table_group = &hose->npu->npucomp.table_group;
+
+ if (!table_group->group) {
+ table_group->ops = &pnv_npu_peers_ops;
+ iommu_register_group(table_group,
+ hose->global_number,
+ pe->pe_number);
+ }
+ } else {
+ /* Create a group for 1 GPU and attached NPUs for POWER8 */
+ pe->npucomp = kzalloc(sizeof(pe->npucomp), GFP_KERNEL);
+ table_group = &pe->npucomp->table_group;
+ table_group->ops = &pnv_npu_peers_ops;
+ iommu_register_group(table_group, hose->global_number,
+ pe->pe_number);
+ }
+
+ /* Steal capabilities from a GPU PE */
+ table_group->max_dynamic_windows_supported =
+ pe->table_group.max_dynamic_windows_supported;
+ table_group->tce32_start = pe->table_group.tce32_start;
+ table_group->tce32_size = pe->table_group.tce32_size;
+ table_group->max_levels = pe->table_group.max_levels;
+ if (!table_group->pgsizes)
+ table_group->pgsizes = pe->table_group.pgsizes;
+
+ npucomp = container_of(table_group, struct npu_comp, table_group);
+ pnv_comp_attach_table_group(npucomp, pe);
+
+ return table_group;
+}
+
+struct iommu_table_group *pnv_npu_compound_attach(struct pnv_ioda_pe *pe)
+{
+ struct iommu_table_group *table_group;
+ struct npu_comp *npucomp;
+ struct pci_dev *gpdev = NULL;
+ struct pci_dev *npdev;
+ struct pnv_ioda_pe *gpe = get_gpu_pci_dev_and_pe(pe, &gpdev);
+
+ WARN_ON(!(pe->flags & PNV_IODA_PE_DEV));
+ if (!gpe)
+ return NULL;
+
+ /*
+ * IODA2 bridges get this set up from pci_controller_ops::setup_bridge
+ * but NPU bridges do not have this hook defined so we do it here.
+ * We do not setup other table group parameters as they won't be used
+ * anyway - NVLink bridges are subordinate PEs.
+ */
+ pe->table_group.ops = &pnv_pci_npu_ops;
+
+ table_group = iommu_group_get_iommudata(
+ iommu_group_get(&gpdev->dev));
+
+ /*
+ * On P9 NPU PHB and PCI PHB support different page sizes,
+ * keep only matching. We expect here that NVLink bridge PE pgsizes is
+ * initialized by the caller.
+ */
+ table_group->pgsizes &= pe->table_group.pgsizes;
+ npucomp = container_of(table_group, struct npu_comp, table_group);
+ pnv_comp_attach_table_group(npucomp, pe);
+
+ list_for_each_entry(npdev, &pe->phb->hose->bus->devices, bus_list) {
+ struct pci_dev *gpdevtmp = pnv_pci_get_gpu_dev(npdev);
+
+ if (gpdevtmp != gpdev)
+ continue;
+
+ iommu_add_device(table_group, &npdev->dev);
+ }
+
+ return table_group;
+}
+#endif /* CONFIG_IOMMU_API */
+
/* Maximum number of nvlinks per npu */
#define NV_MAX_LINKS 6
@@ -490,7 +782,6 @@ static void acquire_atsd_reg(struct npu_context *npu_context,
int i, j;
struct npu *npu;
struct pci_dev *npdev;
- struct pnv_phb *nphb;
for (i = 0; i <= max_npu2_index; i++) {
mmio_atsd_reg[i].reg = -1;
@@ -505,8 +796,10 @@ static void acquire_atsd_reg(struct npu_context *npu_context,
if (!npdev)
continue;
- nphb = pci_bus_to_host(npdev->bus)->private_data;
- npu = &nphb->npu;
+ npu = pci_bus_to_host(npdev->bus)->npu;
+ if (!npu)
+ continue;
+
mmio_atsd_reg[i].npu = npu;
mmio_atsd_reg[i].reg = get_mmio_atsd_reg(npu);
while (mmio_atsd_reg[i].reg < 0) {
@@ -671,9 +964,9 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
u32 nvlink_index;
struct device_node *nvlink_dn;
struct mm_struct *mm = current->mm;
- struct pnv_phb *nphb;
struct npu *npu;
struct npu_context *npu_context;
+ struct pci_controller *hose;
/*
* At present we don't support GPUs connected to multiple NPUs and I'm
@@ -681,13 +974,14 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
*/
struct pci_dev *npdev = pnv_pci_get_npu_dev(gpdev, 0);
- if (!firmware_has_feature(FW_FEATURE_OPAL))
- return ERR_PTR(-ENODEV);
-
if (!npdev)
/* No nvlink associated with this GPU device */
return ERR_PTR(-ENODEV);
+ /* We only support DR/PR/HV in pnv_npu2_map_lpar_dev() */
+ if (flags & ~(MSR_DR | MSR_PR | MSR_HV))
+ return ERR_PTR(-EINVAL);
+
nvlink_dn = of_parse_phandle(npdev->dev.of_node, "ibm,nvlink", 0);
if (WARN_ON(of_property_read_u32(nvlink_dn, "ibm,npu-link-index",
&nvlink_index)))
@@ -701,20 +995,10 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
return ERR_PTR(-EINVAL);
}
- nphb = pci_bus_to_host(npdev->bus)->private_data;
- npu = &nphb->npu;
-
- /*
- * Setup the NPU context table for a particular GPU. These need to be
- * per-GPU as we need the tables to filter ATSDs when there are no
- * active contexts on a particular GPU. It is safe for these to be
- * called concurrently with destroy as the OPAL call takes appropriate
- * locks and refcounts on init/destroy.
- */
- rc = opal_npu_init_context(nphb->opal_id, mm->context.id, flags,
- PCI_DEVID(gpdev->bus->number, gpdev->devfn));
- if (rc < 0)
- return ERR_PTR(-ENOSPC);
+ hose = pci_bus_to_host(npdev->bus);
+ npu = hose->npu;
+ if (!npu)
+ return ERR_PTR(-ENODEV);
/*
* We store the npu pci device so we can more easily get at the
@@ -726,9 +1010,6 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
if (npu_context->release_cb != cb ||
npu_context->priv != priv) {
spin_unlock(&npu_context_lock);
- opal_npu_destroy_context(nphb->opal_id, mm->context.id,
- PCI_DEVID(gpdev->bus->number,
- gpdev->devfn));
return ERR_PTR(-EINVAL);
}
@@ -754,9 +1035,6 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
if (rc) {
kfree(npu_context);
- opal_npu_destroy_context(nphb->opal_id, mm->context.id,
- PCI_DEVID(gpdev->bus->number,
- gpdev->devfn));
return ERR_PTR(rc);
}
@@ -776,7 +1054,7 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
*/
WRITE_ONCE(npu_context->npdev[npu->index][nvlink_index], npdev);
- if (!nphb->npu.nmmu_flush) {
+ if (!npu->nmmu_flush) {
/*
* If we're not explicitly flushing ourselves we need to mark
* the thread for global flushes
@@ -809,27 +1087,24 @@ void pnv_npu2_destroy_context(struct npu_context *npu_context,
struct pci_dev *gpdev)
{
int removed;
- struct pnv_phb *nphb;
struct npu *npu;
struct pci_dev *npdev = pnv_pci_get_npu_dev(gpdev, 0);
struct device_node *nvlink_dn;
u32 nvlink_index;
+ struct pci_controller *hose;
if (WARN_ON(!npdev))
return;
- if (!firmware_has_feature(FW_FEATURE_OPAL))
+ hose = pci_bus_to_host(npdev->bus);
+ npu = hose->npu;
+ if (!npu)
return;
-
- nphb = pci_bus_to_host(npdev->bus)->private_data;
- npu = &nphb->npu;
nvlink_dn = of_parse_phandle(npdev->dev.of_node, "ibm,nvlink", 0);
if (WARN_ON(of_property_read_u32(nvlink_dn, "ibm,npu-link-index",
&nvlink_index)))
return;
WRITE_ONCE(npu_context->npdev[npu->index][nvlink_index], NULL);
- opal_npu_destroy_context(nphb->opal_id, npu_context->mm->context.id,
- PCI_DEVID(gpdev->bus->number, gpdev->devfn));
spin_lock(&npu_context_lock);
removed = kref_put(&npu_context->kref, pnv_npu2_release_context);
spin_unlock(&npu_context_lock);
@@ -857,13 +1132,12 @@ int pnv_npu2_handle_fault(struct npu_context *context, uintptr_t *ea,
u64 rc = 0, result = 0;
int i, is_write;
struct page *page[1];
+ const char __user *u;
+ char c;
/* mmap_sem should be held so the struct_mm must be present */
struct mm_struct *mm = context->mm;
- if (!firmware_has_feature(FW_FEATURE_OPAL))
- return -ENODEV;
-
WARN_ON(!rwsem_is_locked(&mm->mmap_sem));
for (i = 0; i < count; i++) {
@@ -872,18 +1146,17 @@ int pnv_npu2_handle_fault(struct npu_context *context, uintptr_t *ea,
is_write ? FOLL_WRITE : 0,
page, NULL, NULL);
- /*
- * To support virtualised environments we will have to do an
- * access to the page to ensure it gets faulted into the
- * hypervisor. For the moment virtualisation is not supported in
- * other areas so leave the access out.
- */
if (rc != 1) {
status[i] = rc;
result = -EFAULT;
continue;
}
+ /* Make sure partition scoped tree gets a pte */
+ u = page_address(page[0]);
+ if (__get_user(c, u))
+ result = -EFAULT;
+
status[i] = 0;
put_page(page[0]);
}
@@ -892,42 +1165,127 @@ int pnv_npu2_handle_fault(struct npu_context *context, uintptr_t *ea,
}
EXPORT_SYMBOL(pnv_npu2_handle_fault);
-int pnv_npu2_init(struct pnv_phb *phb)
+int pnv_npu2_init(struct pci_controller *hose)
{
unsigned int i;
u64 mmio_atsd;
- struct device_node *dn;
- struct pci_dev *gpdev;
static int npu_index;
- uint64_t rc = 0;
+ struct npu *npu;
+ int ret;
- phb->npu.nmmu_flush =
- of_property_read_bool(phb->hose->dn, "ibm,nmmu-flush");
- for_each_child_of_node(phb->hose->dn, dn) {
- gpdev = pnv_pci_get_gpu_dev(get_pci_dev(dn));
- if (gpdev) {
- rc = opal_npu_map_lpar(phb->opal_id,
- PCI_DEVID(gpdev->bus->number, gpdev->devfn),
- 0, 0);
- if (rc)
- dev_err(&gpdev->dev,
- "Error %lld mapping device to LPAR\n",
- rc);
- }
+ npu = kzalloc(sizeof(*npu), GFP_KERNEL);
+ if (!npu)
+ return -ENOMEM;
+
+ npu->nmmu_flush = of_property_read_bool(hose->dn, "ibm,nmmu-flush");
+
+ for (i = 0; i < ARRAY_SIZE(npu->mmio_atsd_regs) &&
+ !of_property_read_u64_index(hose->dn, "ibm,mmio-atsd",
+ i, &mmio_atsd); i++)
+ npu->mmio_atsd_regs[i] = ioremap(mmio_atsd, 32);
+
+ pr_info("NPU%d: Found %d MMIO ATSD registers", hose->global_number, i);
+ npu->mmio_atsd_count = i;
+ npu->mmio_atsd_usage = 0;
+ npu_index++;
+ if (WARN_ON(npu_index >= NV_MAX_NPUS)) {
+ ret = -ENOSPC;
+ goto fail_exit;
+ }
+ max_npu2_index = npu_index;
+ npu->index = npu_index;
+ hose->npu = npu;
+
+ return 0;
+
+fail_exit:
+ for (i = 0; i < npu->mmio_atsd_count; ++i)
+ iounmap(npu->mmio_atsd_regs[i]);
+
+ kfree(npu);
+
+ return ret;
+}
+
+int pnv_npu2_map_lpar_dev(struct pci_dev *gpdev, unsigned int lparid,
+ unsigned long msr)
+{
+ int ret;
+ struct pci_dev *npdev = pnv_pci_get_npu_dev(gpdev, 0);
+ struct pci_controller *hose;
+ struct pnv_phb *nphb;
+
+ if (!npdev)
+ return -ENODEV;
+
+ hose = pci_bus_to_host(npdev->bus);
+ nphb = hose->private_data;
+
+ dev_dbg(&gpdev->dev, "Map LPAR opalid=%llu lparid=%u\n",
+ nphb->opal_id, lparid);
+ /*
+ * Currently we only support radix and non-zero LPCR only makes sense
+ * for hash tables so skiboot expects the LPCR parameter to be a zero.
+ */
+ ret = opal_npu_map_lpar(nphb->opal_id,
+ PCI_DEVID(gpdev->bus->number, gpdev->devfn), lparid,
+ 0 /* LPCR bits */);
+ if (ret) {
+ dev_err(&gpdev->dev, "Error %d mapping device to LPAR\n", ret);
+ return ret;
}
- for (i = 0; !of_property_read_u64_index(phb->hose->dn, "ibm,mmio-atsd",
- i, &mmio_atsd); i++)
- phb->npu.mmio_atsd_regs[i] = ioremap(mmio_atsd, 32);
-
- pr_info("NPU%lld: Found %d MMIO ATSD registers", phb->opal_id, i);
- phb->npu.mmio_atsd_count = i;
- phb->npu.mmio_atsd_usage = 0;
- npu_index++;
- if (WARN_ON(npu_index >= NV_MAX_NPUS))
- return -ENOSPC;
- max_npu2_index = npu_index;
- phb->npu.index = npu_index;
+ dev_dbg(&gpdev->dev, "init context opalid=%llu msr=%lx\n",
+ nphb->opal_id, msr);
+ ret = opal_npu_init_context(nphb->opal_id, 0/*__unused*/, msr,
+ PCI_DEVID(gpdev->bus->number, gpdev->devfn));
+ if (ret < 0)
+ dev_err(&gpdev->dev, "Failed to init context: %d\n", ret);
+ else
+ ret = 0;
return 0;
}
+EXPORT_SYMBOL_GPL(pnv_npu2_map_lpar_dev);
+
+void pnv_npu2_map_lpar(struct pnv_ioda_pe *gpe, unsigned long msr)
+{
+ struct pci_dev *gpdev;
+
+ list_for_each_entry(gpdev, &gpe->pbus->devices, bus_list)
+ pnv_npu2_map_lpar_dev(gpdev, 0, msr);
+}
+
+int pnv_npu2_unmap_lpar_dev(struct pci_dev *gpdev)
+{
+ int ret;
+ struct pci_dev *npdev = pnv_pci_get_npu_dev(gpdev, 0);
+ struct pci_controller *hose;
+ struct pnv_phb *nphb;
+
+ if (!npdev)
+ return -ENODEV;
+
+ hose = pci_bus_to_host(npdev->bus);
+ nphb = hose->private_data;
+
+ dev_dbg(&gpdev->dev, "destroy context opalid=%llu\n",
+ nphb->opal_id);
+ ret = opal_npu_destroy_context(nphb->opal_id, 0/*__unused*/,
+ PCI_DEVID(gpdev->bus->number, gpdev->devfn));
+ if (ret < 0) {
+ dev_err(&gpdev->dev, "Failed to destroy context: %d\n", ret);
+ return ret;
+ }
+
+ /* Set LPID to 0 anyway, just to be safe */
+ dev_dbg(&gpdev->dev, "Map LPAR opalid=%llu lparid=0\n", nphb->opal_id);
+ ret = opal_npu_map_lpar(nphb->opal_id,
+ PCI_DEVID(gpdev->bus->number, gpdev->devfn), 0 /*LPID*/,
+ 0 /* LPCR bits */);
+ if (ret)
+ dev_err(&gpdev->dev, "Error %d mapping device to LPAR\n", ret);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(pnv_npu2_unmap_lpar_dev);
diff --git a/arch/powerpc/platforms/powernv/opal-power.c b/arch/powerpc/platforms/powernv/opal-power.c
index 58dc330..89ab1da 100644
--- a/arch/powerpc/platforms/powernv/opal-power.c
+++ b/arch/powerpc/platforms/powernv/opal-power.c
@@ -138,7 +138,7 @@ static struct notifier_block opal_power_control_nb = {
.priority = 0,
};
-static int __init opal_power_control_init(void)
+int __init opal_power_control_init(void)
{
int ret, supported = 0;
struct device_node *np;
@@ -176,4 +176,3 @@ static int __init opal_power_control_init(void)
return 0;
}
-machine_subsys_initcall(powernv, opal_power_control_init);
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index beed86f..79586f1 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -877,7 +877,7 @@ static int __init opal_init(void)
consoles = of_find_node_by_path("/ibm,opal/consoles");
if (consoles) {
for_each_child_of_node(consoles, np) {
- if (strcmp(np->name, "serial"))
+ if (!of_node_name_eq(np, "serial"))
continue;
of_platform_device_create(np, NULL, NULL);
}
@@ -960,6 +960,9 @@ static int __init opal_init(void)
/* Initialise OPAL sensor groups */
opal_sensor_groups_init();
+ /* Initialise OPAL Power control interface */
+ opal_power_control_init();
+
return 0;
}
machine_subsys_initcall(powernv, opal_init);
diff --git a/arch/powerpc/platforms/powernv/pci-ioda-tce.c b/arch/powerpc/platforms/powernv/pci-ioda-tce.c
index fe96910..697449a 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda-tce.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda-tce.c
@@ -299,7 +299,7 @@ long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset,
if (alloc_userspace_copy) {
offset = 0;
uas = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift,
- levels, tce_table_size, &offset,
+ tmplevels, tce_table_size, &offset,
&total_allocated_uas);
if (!uas)
goto free_tces_exit;
@@ -368,6 +368,7 @@ void pnv_pci_unlink_table_and_group(struct iommu_table *tbl,
found = false;
for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
if (table_group->tables[i] == tbl) {
+ iommu_tce_table_put(tbl);
table_group->tables[i] = NULL;
found = true;
break;
@@ -393,7 +394,7 @@ long pnv_pci_link_table_and_group(int node, int num,
tgl->table_group = table_group;
list_add_rcu(&tgl->next, &tbl->it_group_list);
- table_group->tables[num] = tbl;
+ table_group->tables[num] = iommu_tce_table_get(tbl);
return 0;
}
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index dd80744..1d6406a 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -190,7 +190,8 @@ static void pnv_ioda_free_pe(struct pnv_ioda_pe *pe)
unsigned int pe_num = pe->pe_number;
WARN_ON(pe->pdev);
-
+ WARN_ON(pe->npucomp); /* NPUs are not supposed to be freed */
+ kfree(pe->npucomp);
memset(pe, 0, sizeof(struct pnv_ioda_pe));
clear_bit(pe_num, phb->ioda.pe_alloc);
}
@@ -517,8 +518,6 @@ static void __init pnv_ioda_parse_m64_window(struct pnv_phb *phb)
phb->init_m64 = pnv_ioda1_init_m64;
else
phb->init_m64 = pnv_ioda2_init_m64;
- phb->reserve_m64_pe = pnv_ioda_reserve_m64_pe;
- phb->pick_m64_pe = pnv_ioda_pick_m64_pe;
}
static void pnv_ioda_freeze_pe(struct pnv_phb *phb, int pe_no)
@@ -604,8 +603,8 @@ static int pnv_ioda_unfreeze_pe(struct pnv_phb *phb, int pe_no, int opt)
static int pnv_ioda_get_pe_state(struct pnv_phb *phb, int pe_no)
{
struct pnv_ioda_pe *slave, *pe;
- u8 fstate, state;
- __be16 pcierr;
+ u8 fstate = 0, state;
+ __be16 pcierr = 0;
s64 rc;
/* Sanity check on PE number */
@@ -663,10 +662,6 @@ static int pnv_ioda_get_pe_state(struct pnv_phb *phb, int pe_no)
return state;
}
-/* Currently those 2 are only used when MSIs are enabled, this will change
- * but in the meantime, we need to protect them to avoid warnings
- */
-#ifdef CONFIG_PCI_MSI
struct pnv_ioda_pe *pnv_ioda_get_pe(struct pci_dev *dev)
{
struct pci_controller *hose = pci_bus_to_host(dev->bus);
@@ -679,7 +674,6 @@ struct pnv_ioda_pe *pnv_ioda_get_pe(struct pci_dev *dev)
return NULL;
return &phb->ioda.pe_array[pdn->pe_number];
}
-#endif /* CONFIG_PCI_MSI */
static int pnv_ioda_set_one_peltv(struct pnv_phb *phb,
struct pnv_ioda_pe *parent,
@@ -1160,8 +1154,8 @@ static struct pnv_ioda_pe *pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all)
pe = &phb->ioda.pe_array[phb->ioda.root_pe_idx];
/* Check if PE is determined by M64 */
- if (!pe && phb->pick_m64_pe)
- pe = phb->pick_m64_pe(bus, all);
+ if (!pe)
+ pe = pnv_ioda_pick_m64_pe(bus, all);
/* The PE number isn't pinned by M64 */
if (!pe)
@@ -1273,19 +1267,20 @@ static void pnv_ioda_setup_npu_PEs(struct pci_bus *bus)
static void pnv_pci_ioda_setup_PEs(void)
{
- struct pci_controller *hose, *tmp;
+ struct pci_controller *hose;
struct pnv_phb *phb;
struct pci_bus *bus;
struct pci_dev *pdev;
+ struct pnv_ioda_pe *pe;
- list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
+ list_for_each_entry(hose, &hose_list, list_node) {
phb = hose->private_data;
if (phb->type == PNV_PHB_NPU_NVLINK) {
/* PE#0 is needed for error reporting */
pnv_ioda_reserve_pe(phb, 0);
pnv_ioda_setup_npu_PEs(hose->bus);
if (phb->model == PNV_PHB_MODEL_NPU2)
- pnv_npu2_init(phb);
+ WARN_ON_ONCE(pnv_npu2_init(hose));
}
if (phb->type == PNV_PHB_NPU_OCAPI) {
bus = hose->bus;
@@ -1293,6 +1288,14 @@ static void pnv_pci_ioda_setup_PEs(void)
pnv_ioda_setup_dev_PE(pdev);
}
}
+ list_for_each_entry(hose, &hose_list, list_node) {
+ phb = hose->private_data;
+ if (phb->type != PNV_PHB_IODA2)
+ continue;
+
+ list_for_each_entry(pe, &phb->ioda.pe_list, list)
+ pnv_npu2_map_lpar(pe, MSR_DR | MSR_PR | MSR_HV);
+ }
}
#ifdef CONFIG_PCI_IOV
@@ -1531,6 +1534,11 @@ void pnv_pci_sriov_disable(struct pci_dev *pdev)
static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
struct pnv_ioda_pe *pe);
+#ifdef CONFIG_IOMMU_API
+static void pnv_ioda_setup_bus_iommu_group(struct pnv_ioda_pe *pe,
+ struct iommu_table_group *table_group, struct pci_bus *bus);
+
+#endif
static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
{
struct pci_bus *bus;
@@ -1584,6 +1592,9 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
mutex_unlock(&phb->ioda.pe_list_mutex);
pnv_pci_ioda2_setup_dma_pe(phb, pe);
+#ifdef CONFIG_IOMMU_API
+ pnv_ioda_setup_bus_iommu_group(pe, &pe->table_group, NULL);
+#endif
}
}
@@ -1923,21 +1934,16 @@ static u64 pnv_pci_ioda_dma_get_required_mask(struct pci_dev *pdev)
return mask;
}
-static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe,
- struct pci_bus *bus,
- bool add_to_group)
+static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, struct pci_bus *bus)
{
struct pci_dev *dev;
list_for_each_entry(dev, &bus->devices, bus_list) {
set_iommu_table_base(&dev->dev, pe->table_group.tables[0]);
set_dma_offset(&dev->dev, pe->tce_bypass_base);
- if (add_to_group)
- iommu_add_device(&dev->dev);
if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate)
- pnv_ioda_setup_bus_dma(pe, dev->subordinate,
- add_to_group);
+ pnv_ioda_setup_bus_dma(pe, dev->subordinate);
}
}
@@ -2366,16 +2372,8 @@ static void pnv_pci_ioda1_setup_dma_pe(struct pnv_phb *phb,
pe->table_group.tce32_size = tbl->it_size << tbl->it_page_shift;
iommu_init_table(tbl, phb->hose->node);
- if (pe->flags & PNV_IODA_PE_DEV) {
- /*
- * Setting table base here only for carrying iommu_group
- * further down to let iommu_add_device() do the job.
- * pnv_pci_ioda_dma_dev_setup will override it later anyway.
- */
- set_iommu_table_base(&pe->pdev->dev, tbl);
- iommu_add_device(&pe->pdev->dev);
- } else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL))
- pnv_ioda_setup_bus_dma(pe, pe->pbus, true);
+ if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL))
+ pnv_ioda_setup_bus_dma(pe, pe->pbus);
return;
fail:
@@ -2527,14 +2525,6 @@ static long pnv_pci_ioda2_setup_default_config(struct pnv_ioda_pe *pe)
if (!pnv_iommu_bypass_disabled)
pnv_pci_ioda2_set_bypass(pe, true);
- /*
- * Setting table base here only for carrying iommu_group
- * further down to let iommu_add_device() do the job.
- * pnv_pci_ioda_dma_dev_setup will override it later anyway.
- */
- if (pe->flags & PNV_IODA_PE_DEV)
- set_iommu_table_base(&pe->pdev->dev, tbl);
-
return 0;
}
@@ -2565,7 +2555,7 @@ static long pnv_pci_ioda2_unset_window(struct iommu_table_group *table_group,
#endif
#ifdef CONFIG_IOMMU_API
-static unsigned long pnv_pci_ioda2_get_table_size(__u32 page_shift,
+unsigned long pnv_pci_ioda2_get_table_size(__u32 page_shift,
__u64 window_size, __u32 levels)
{
unsigned long bytes = 0;
@@ -2616,7 +2606,7 @@ static void pnv_ioda2_take_ownership(struct iommu_table_group *table_group)
pnv_pci_ioda2_set_bypass(pe, false);
pnv_pci_ioda2_unset_window(&pe->table_group, 0);
if (pe->pbus)
- pnv_ioda_setup_bus_dma(pe, pe->pbus, false);
+ pnv_ioda_setup_bus_dma(pe, pe->pbus);
iommu_tce_table_put(tbl);
}
@@ -2627,7 +2617,7 @@ static void pnv_ioda2_release_ownership(struct iommu_table_group *table_group)
pnv_pci_ioda2_setup_default_config(pe);
if (pe->pbus)
- pnv_ioda_setup_bus_dma(pe, pe->pbus, false);
+ pnv_ioda_setup_bus_dma(pe, pe->pbus);
}
static struct iommu_table_group_ops pnv_pci_ioda2_ops = {
@@ -2639,131 +2629,100 @@ static struct iommu_table_group_ops pnv_pci_ioda2_ops = {
.release_ownership = pnv_ioda2_release_ownership,
};
-static int gpe_table_group_to_npe_cb(struct device *dev, void *opaque)
-{
- struct pci_controller *hose;
- struct pnv_phb *phb;
- struct pnv_ioda_pe **ptmppe = opaque;
- struct pci_dev *pdev = container_of(dev, struct pci_dev, dev);
- struct pci_dn *pdn = pci_get_pdn(pdev);
-
- if (!pdn || pdn->pe_number == IODA_INVALID_PE)
- return 0;
-
- hose = pci_bus_to_host(pdev->bus);
- phb = hose->private_data;
- if (phb->type != PNV_PHB_NPU_NVLINK)
- return 0;
-
- *ptmppe = &phb->ioda.pe_array[pdn->pe_number];
-
- return 1;
-}
-
-/*
- * This returns PE of associated NPU.
- * This assumes that NPU is in the same IOMMU group with GPU and there is
- * no other PEs.
- */
-static struct pnv_ioda_pe *gpe_table_group_to_npe(
- struct iommu_table_group *table_group)
-{
- struct pnv_ioda_pe *npe = NULL;
- int ret = iommu_group_for_each_dev(table_group->group, &npe,
- gpe_table_group_to_npe_cb);
-
- BUG_ON(!ret || !npe);
-
- return npe;
-}
-
-static long pnv_pci_ioda2_npu_set_window(struct iommu_table_group *table_group,
- int num, struct iommu_table *tbl)
-{
- struct pnv_ioda_pe *npe = gpe_table_group_to_npe(table_group);
- int num2 = (num == 0) ? 1 : 0;
- long ret = pnv_pci_ioda2_set_window(table_group, num, tbl);
-
- if (ret)
- return ret;
-
- if (table_group->tables[num2])
- pnv_npu_unset_window(npe, num2);
-
- ret = pnv_npu_set_window(npe, num, tbl);
- if (ret) {
- pnv_pci_ioda2_unset_window(table_group, num);
- if (table_group->tables[num2])
- pnv_npu_set_window(npe, num2,
- table_group->tables[num2]);
- }
-
- return ret;
-}
-
-static long pnv_pci_ioda2_npu_unset_window(
+static void pnv_ioda_setup_bus_iommu_group_add_devices(struct pnv_ioda_pe *pe,
struct iommu_table_group *table_group,
- int num)
+ struct pci_bus *bus)
{
- struct pnv_ioda_pe *npe = gpe_table_group_to_npe(table_group);
- int num2 = (num == 0) ? 1 : 0;
- long ret = pnv_pci_ioda2_unset_window(table_group, num);
+ struct pci_dev *dev;
- if (ret)
- return ret;
+ list_for_each_entry(dev, &bus->devices, bus_list) {
+ iommu_add_device(table_group, &dev->dev);
- if (!npe->table_group.tables[num])
- return 0;
-
- ret = pnv_npu_unset_window(npe, num);
- if (ret)
- return ret;
-
- if (table_group->tables[num2])
- ret = pnv_npu_set_window(npe, num2, table_group->tables[num2]);
-
- return ret;
+ if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate)
+ pnv_ioda_setup_bus_iommu_group_add_devices(pe,
+ table_group, dev->subordinate);
+ }
}
-static void pnv_ioda2_npu_take_ownership(struct iommu_table_group *table_group)
+static void pnv_ioda_setup_bus_iommu_group(struct pnv_ioda_pe *pe,
+ struct iommu_table_group *table_group, struct pci_bus *bus)
{
- /*
- * Detach NPU first as pnv_ioda2_take_ownership() will destroy
- * the iommu_table if 32bit DMA is enabled.
- */
- pnv_npu_take_ownership(gpe_table_group_to_npe(table_group));
- pnv_ioda2_take_ownership(table_group);
+
+ if (pe->flags & PNV_IODA_PE_DEV)
+ iommu_add_device(table_group, &pe->pdev->dev);
+
+ if ((pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)) || bus)
+ pnv_ioda_setup_bus_iommu_group_add_devices(pe, table_group,
+ bus);
}
-static struct iommu_table_group_ops pnv_pci_ioda2_npu_ops = {
- .get_table_size = pnv_pci_ioda2_get_table_size,
- .create_table = pnv_pci_ioda2_create_table_userspace,
- .set_window = pnv_pci_ioda2_npu_set_window,
- .unset_window = pnv_pci_ioda2_npu_unset_window,
- .take_ownership = pnv_ioda2_npu_take_ownership,
- .release_ownership = pnv_ioda2_release_ownership,
-};
+static unsigned long pnv_ioda_parse_tce_sizes(struct pnv_phb *phb);
static void pnv_pci_ioda_setup_iommu_api(void)
{
- struct pci_controller *hose, *tmp;
+ struct pci_controller *hose;
struct pnv_phb *phb;
- struct pnv_ioda_pe *pe, *gpe;
+ struct pnv_ioda_pe *pe;
+
+ /*
+ * There are 4 types of PEs:
+ * - PNV_IODA_PE_BUS: a downstream port with an adapter,
+ * created from pnv_pci_setup_bridge();
+ * - PNV_IODA_PE_BUS_ALL: a PCI-PCIX bridge with devices behind it,
+ * created from pnv_pci_setup_bridge();
+ * - PNV_IODA_PE_VF: a SRIOV virtual function,
+ * created from pnv_pcibios_sriov_enable();
+ * - PNV_IODA_PE_DEV: an NPU or OCAPI device,
+ * created from pnv_pci_ioda_fixup().
+ *
+ * Normally a PE is represented by an IOMMU group, however for
+ * devices with side channels the groups need to be more strict.
+ */
+ list_for_each_entry(hose, &hose_list, list_node) {
+ phb = hose->private_data;
+
+ if (phb->type == PNV_PHB_NPU_NVLINK)
+ continue;
+
+ list_for_each_entry(pe, &phb->ioda.pe_list, list) {
+ struct iommu_table_group *table_group;
+
+ table_group = pnv_try_setup_npu_table_group(pe);
+ if (!table_group) {
+ if (!pnv_pci_ioda_pe_dma_weight(pe))
+ continue;
+
+ table_group = &pe->table_group;
+ iommu_register_group(&pe->table_group,
+ pe->phb->hose->global_number,
+ pe->pe_number);
+ }
+ pnv_ioda_setup_bus_iommu_group(pe, table_group,
+ pe->pbus);
+ }
+ }
/*
* Now we have all PHBs discovered, time to add NPU devices to
* the corresponding IOMMU groups.
*/
- list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
+ list_for_each_entry(hose, &hose_list, list_node) {
+ unsigned long pgsizes;
+
phb = hose->private_data;
if (phb->type != PNV_PHB_NPU_NVLINK)
continue;
+ pgsizes = pnv_ioda_parse_tce_sizes(phb);
list_for_each_entry(pe, &phb->ioda.pe_list, list) {
- gpe = pnv_pci_npu_setup_iommu(pe);
- if (gpe)
- gpe->table_group.ops = &pnv_pci_ioda2_npu_ops;
+ /*
+ * IODA2 bridges get this set up from
+ * pci_controller_ops::setup_bridge but NPU bridges
+ * do not have this hook defined so we do it here.
+ */
+ pe->table_group.pgsizes = pgsizes;
+ pnv_npu_compound_attach(pe);
}
}
}
@@ -2810,9 +2769,6 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
/* TVE #1 is selected by PCI address bit 59 */
pe->tce_bypass_base = 1ull << 59;
- iommu_register_group(&pe->table_group, phb->hose->global_number,
- pe->pe_number);
-
/* The PE will reserve all possible 32-bits space */
pe_info(pe, "Setting up 32-bit TCE table at 0..%08x\n",
phb->ioda.m32_pci_base);
@@ -2833,10 +2789,9 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
return;
if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL))
- pnv_ioda_setup_bus_dma(pe, pe->pbus, true);
+ pnv_ioda_setup_bus_dma(pe, pe->pbus);
}
-#ifdef CONFIG_PCI_MSI
int64_t pnv_opal_pci_msi_eoi(struct irq_chip *chip, unsigned int hw_irq)
{
struct pnv_phb *phb = container_of(chip, struct pnv_phb,
@@ -2982,9 +2937,6 @@ static void pnv_pci_init_ioda_msis(struct pnv_phb *phb)
pr_info(" Allocated bitmap for %d MSIs (base IRQ 0x%x)\n",
count, phb->msi_base);
}
-#else
-static void pnv_pci_init_ioda_msis(struct pnv_phb *phb) { }
-#endif /* CONFIG_PCI_MSI */
#ifdef CONFIG_PCI_IOV
static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
@@ -3402,8 +3354,7 @@ static void pnv_pci_setup_bridge(struct pci_bus *bus, unsigned long type)
return;
/* Reserve PEs according to used M64 resources */
- if (phb->reserve_m64_pe)
- phb->reserve_m64_pe(bus, NULL, all);
+ pnv_ioda_reserve_m64_pe(bus, NULL, all);
/*
* Assign PE. We might run here because of partial hotplug.
@@ -3687,6 +3638,15 @@ static void pnv_pci_release_device(struct pci_dev *pdev)
pnv_ioda_release_pe(pe);
}
+static void pnv_npu_disable_device(struct pci_dev *pdev)
+{
+ struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev);
+ struct eeh_pe *eehpe = edev ? edev->pe : NULL;
+
+ if (eehpe && eeh_ops && eeh_ops->reset)
+ eeh_ops->reset(eehpe, EEH_RESET_HOT);
+}
+
static void pnv_pci_ioda_shutdown(struct pci_controller *hose)
{
struct pnv_phb *phb = hose->private_data;
@@ -3698,10 +3658,8 @@ static void pnv_pci_ioda_shutdown(struct pci_controller *hose)
static const struct pci_controller_ops pnv_pci_ioda_controller_ops = {
.dma_dev_setup = pnv_pci_dma_dev_setup,
.dma_bus_setup = pnv_pci_dma_bus_setup,
-#ifdef CONFIG_PCI_MSI
.setup_msi_irqs = pnv_setup_msi_irqs,
.teardown_msi_irqs = pnv_teardown_msi_irqs,
-#endif
.enable_device_hook = pnv_pci_enable_device_hook,
.release_device = pnv_pci_release_device,
.window_alignment = pnv_pci_window_alignment,
@@ -3722,15 +3680,14 @@ static int pnv_npu_dma_set_mask(struct pci_dev *npdev, u64 dma_mask)
static const struct pci_controller_ops pnv_npu_ioda_controller_ops = {
.dma_dev_setup = pnv_pci_dma_dev_setup,
-#ifdef CONFIG_PCI_MSI
.setup_msi_irqs = pnv_setup_msi_irqs,
.teardown_msi_irqs = pnv_teardown_msi_irqs,
-#endif
.enable_device_hook = pnv_pci_enable_device_hook,
.window_alignment = pnv_pci_window_alignment,
.reset_secondary_bus = pnv_pci_reset_secondary_bus,
.dma_set_mask = pnv_npu_dma_set_mask,
.shutdown = pnv_pci_ioda_shutdown,
+ .disable_device = pnv_npu_disable_device,
};
static const struct pci_controller_ops pnv_npu_ocapi_ioda_controller_ops = {
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index 13aef23..45fb70b 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -160,7 +160,6 @@ int pnv_pci_set_power_state(uint64_t id, uint8_t state, struct opal_msg *msg)
}
EXPORT_SYMBOL_GPL(pnv_pci_set_power_state);
-#ifdef CONFIG_PCI_MSI
int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
{
struct pci_controller *hose = pci_bus_to_host(pdev->bus);
@@ -229,7 +228,6 @@ void pnv_teardown_msi_irqs(struct pci_dev *pdev)
msi_bitmap_free_hwirqs(&phb->msi_bmp, hwirq - phb->msi_base, 1);
}
}
-#endif /* CONFIG_PCI_MSI */
/* Nicely print the contents of the PE State Tables (PEST). */
static void pnv_pci_dump_pest(__be64 pestA[], __be64 pestB[], int pest_size)
@@ -602,8 +600,8 @@ static void pnv_pci_handle_eeh_config(struct pnv_phb *phb, u32 pe_no)
static void pnv_pci_config_check_eeh(struct pci_dn *pdn)
{
struct pnv_phb *phb = pdn->phb->private_data;
- u8 fstate;
- __be16 pcierr;
+ u8 fstate = 0;
+ __be16 pcierr = 0;
unsigned int pe_no;
s64 rc;
@@ -1127,4 +1125,45 @@ void __init pnv_pci_init(void)
set_pci_dma_ops(&dma_iommu_ops);
}
-machine_subsys_initcall_sync(powernv, tce_iommu_bus_notifier_init);
+static int pnv_tce_iommu_bus_notifier(struct notifier_block *nb,
+ unsigned long action, void *data)
+{
+ struct device *dev = data;
+ struct pci_dev *pdev;
+ struct pci_dn *pdn;
+ struct pnv_ioda_pe *pe;
+ struct pci_controller *hose;
+ struct pnv_phb *phb;
+
+ switch (action) {
+ case BUS_NOTIFY_ADD_DEVICE:
+ pdev = to_pci_dev(dev);
+ pdn = pci_get_pdn(pdev);
+ hose = pci_bus_to_host(pdev->bus);
+ phb = hose->private_data;
+
+ WARN_ON_ONCE(!phb);
+ if (!pdn || pdn->pe_number == IODA_INVALID_PE || !phb)
+ return 0;
+
+ pe = &phb->ioda.pe_array[pdn->pe_number];
+ iommu_add_device(&pe->table_group, dev);
+ return 0;
+ case BUS_NOTIFY_DEL_DEVICE:
+ iommu_del_device(dev);
+ return 0;
+ default:
+ return 0;
+ }
+}
+
+static struct notifier_block pnv_tce_iommu_bus_nb = {
+ .notifier_call = pnv_tce_iommu_bus_notifier,
+};
+
+static int __init pnv_tce_iommu_bus_notifier_init(void)
+{
+ bus_register_notifier(&pci_bus_type, &pnv_tce_iommu_bus_nb);
+ return 0;
+}
+machine_subsys_initcall_sync(powernv, pnv_tce_iommu_bus_notifier_init);
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index 8b37b28..8e36da3 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -8,9 +8,6 @@
struct pci_dn;
-/* Maximum possible number of ATSD MMIO registers per NPU */
-#define NV_NMMU_ATSD_REGS 8
-
enum pnv_phb_type {
PNV_PHB_IODA1 = 0,
PNV_PHB_IODA2 = 1,
@@ -65,6 +62,7 @@ struct pnv_ioda_pe {
/* "Base" iommu table, ie, 4K TCEs, 32-bit DMA */
struct iommu_table_group table_group;
+ struct npu_comp *npucomp;
/* 64-bit TCE bypass region */
bool tce_bypass_enabled;
@@ -106,20 +104,14 @@ struct pnv_phb {
struct dentry *dbgfs;
#endif
-#ifdef CONFIG_PCI_MSI
unsigned int msi_base;
unsigned int msi32_support;
struct msi_bitmap msi_bmp;
-#endif
int (*msi_setup)(struct pnv_phb *phb, struct pci_dev *dev,
unsigned int hwirq, unsigned int virq,
unsigned int is_64, struct msi_msg *msg);
void (*dma_dev_setup)(struct pnv_phb *phb, struct pci_dev *pdev);
- void (*fixup_phb)(struct pci_controller *hose);
int (*init_m64)(struct pnv_phb *phb);
- void (*reserve_m64_pe)(struct pci_bus *bus,
- unsigned long *pe_bitmap, bool all);
- struct pnv_ioda_pe *(*pick_m64_pe)(struct pci_bus *bus, bool all);
int (*get_pe_state)(struct pnv_phb *phb, int pe_no);
void (*freeze_pe)(struct pnv_phb *phb, int pe_no);
int (*unfreeze_pe)(struct pnv_phb *phb, int pe_no, int opt);
@@ -180,19 +172,6 @@ struct pnv_phb {
unsigned int diag_data_size;
u8 *diag_data;
- /* Nvlink2 data */
- struct npu {
- int index;
- __be64 *mmio_atsd_regs[NV_NMMU_ATSD_REGS];
- unsigned int mmio_atsd_count;
-
- /* Bitmask for MMIO register usage */
- unsigned long mmio_atsd_usage;
-
- /* Do we need to explicitly flush the nest mmu? */
- bool nmmu_flush;
- } npu;
-
int p2p_target_count;
};
@@ -210,6 +189,7 @@ extern void pnv_pci_init_ioda_hub(struct device_node *np);
extern void pnv_pci_init_ioda2_phb(struct device_node *np);
extern void pnv_pci_init_npu_phb(struct device_node *np);
extern void pnv_pci_init_npu2_opencapi_phb(struct device_node *np);
+extern void pnv_npu2_map_lpar(struct pnv_ioda_pe *gpe, unsigned long msr);
extern void pnv_pci_reset_secondary_bus(struct pci_dev *dev);
extern int pnv_eeh_phb_reset(struct pci_controller *hose, int option);
@@ -220,6 +200,8 @@ extern void pnv_teardown_msi_irqs(struct pci_dev *pdev);
extern struct pnv_ioda_pe *pnv_ioda_get_pe(struct pci_dev *dev);
extern void pnv_set_msi_irq_chip(struct pnv_phb *phb, unsigned int virq);
extern void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable);
+extern unsigned long pnv_pci_ioda2_get_table_size(__u32 page_shift,
+ __u64 window_size, __u32 levels);
extern int pnv_eeh_post_init(void);
extern void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level,
@@ -235,12 +217,10 @@ extern void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level,
extern void pnv_npu_try_dma_set_bypass(struct pci_dev *gpdev, bool bypass);
extern void pnv_pci_ioda2_tce_invalidate_entire(struct pnv_phb *phb, bool rm);
extern struct pnv_ioda_pe *pnv_pci_npu_setup_iommu(struct pnv_ioda_pe *npe);
-extern long pnv_npu_set_window(struct pnv_ioda_pe *npe, int num,
- struct iommu_table *tbl);
-extern long pnv_npu_unset_window(struct pnv_ioda_pe *npe, int num);
-extern void pnv_npu_take_ownership(struct pnv_ioda_pe *npe);
-extern void pnv_npu_release_ownership(struct pnv_ioda_pe *npe);
-extern int pnv_npu2_init(struct pnv_phb *phb);
+extern struct iommu_table_group *pnv_try_setup_npu_table_group(
+ struct pnv_ioda_pe *pe);
+extern struct iommu_table_group *pnv_npu_compound_attach(
+ struct pnv_ioda_pe *pe);
/* pci-ioda-tce.c */
#define POWERNV_IOMMU_DEFAULT_LEVELS 1
diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index 3b881ac..d291b61 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -197,6 +197,7 @@ static int update_lmb_associativity_index(struct drmem_lmb *lmb)
found = find_aa_index(dr_node, ala_prop, lmb_assoc, &aa_index);
+ of_node_put(dr_node);
dlpar_free_cc_nodes(lmb_node);
if (!found) {
@@ -353,8 +354,11 @@ static bool lmb_is_removable(struct drmem_lmb *lmb)
phys_addr = lmb->base_addr;
#ifdef CONFIG_FA_DUMP
- /* Don't hot-remove memory that falls in fadump boot memory area */
- if (is_fadump_boot_memory_area(phys_addr, block_sz))
+ /*
+ * Don't hot-remove memory that falls in fadump boot memory area
+ * and memory that is reserved for capturing old kernel memory.
+ */
+ if (is_fadump_memory_area(phys_addr, block_sz))
return false;
#endif
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index 06f0296..8fc8fe0 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -57,7 +57,6 @@ static struct iommu_table_group *iommu_pseries_alloc_group(int node)
{
struct iommu_table_group *table_group;
struct iommu_table *tbl;
- struct iommu_table_group_link *tgl;
table_group = kzalloc_node(sizeof(struct iommu_table_group), GFP_KERNEL,
node);
@@ -68,22 +67,13 @@ static struct iommu_table_group *iommu_pseries_alloc_group(int node)
if (!tbl)
goto free_group;
- tgl = kzalloc_node(sizeof(struct iommu_table_group_link), GFP_KERNEL,
- node);
- if (!tgl)
- goto free_table;
-
INIT_LIST_HEAD_RCU(&tbl->it_group_list);
kref_init(&tbl->it_kref);
- tgl->table_group = table_group;
- list_add_rcu(&tgl->next, &tbl->it_group_list);
table_group->tables[0] = tbl;
return table_group;
-free_table:
- kfree(tbl);
free_group:
kfree(table_group);
return NULL;
@@ -93,23 +83,12 @@ static void iommu_pseries_free_group(struct iommu_table_group *table_group,
const char *node_name)
{
struct iommu_table *tbl;
-#ifdef CONFIG_IOMMU_API
- struct iommu_table_group_link *tgl;
-#endif
if (!table_group)
return;
tbl = table_group->tables[0];
#ifdef CONFIG_IOMMU_API
- tgl = list_first_entry_or_null(&tbl->it_group_list,
- struct iommu_table_group_link, next);
-
- WARN_ON_ONCE(!tgl);
- if (tgl) {
- list_del_rcu(&tgl->next);
- kfree(tgl);
- }
if (table_group->group) {
iommu_group_put(table_group->group);
BUG_ON(table_group->group);
@@ -645,7 +624,6 @@ static void pci_dma_bus_setup_pSeries(struct pci_bus *bus)
iommu_table_setparms(pci->phb, dn, tbl);
tbl->it_ops = &iommu_table_pseries_ops;
iommu_init_table(tbl, pci->phb->node);
- iommu_register_group(pci->table_group, pci_domain_nr(bus), 0);
/* Divide the rest (1.75GB) among the children */
pci->phb->dma_window_size = 0x80000000ul;
@@ -756,10 +734,7 @@ static void pci_dma_dev_setup_pSeries(struct pci_dev *dev)
iommu_table_setparms(phb, dn, tbl);
tbl->it_ops = &iommu_table_pseries_ops;
iommu_init_table(tbl, phb->node);
- iommu_register_group(PCI_DN(dn)->table_group,
- pci_domain_nr(phb->bus), 0);
set_iommu_table_base(&dev->dev, tbl);
- iommu_add_device(&dev->dev);
return;
}
@@ -770,11 +745,10 @@ static void pci_dma_dev_setup_pSeries(struct pci_dev *dev)
while (dn && PCI_DN(dn) && PCI_DN(dn)->table_group == NULL)
dn = dn->parent;
- if (dn && PCI_DN(dn)) {
+ if (dn && PCI_DN(dn))
set_iommu_table_base(&dev->dev,
PCI_DN(dn)->table_group->tables[0]);
- iommu_add_device(&dev->dev);
- } else
+ else
printk(KERN_WARNING "iommu: Device %s has no iommu table\n",
pci_name(dev));
}
@@ -964,6 +938,37 @@ struct failed_ddw_pdn {
static LIST_HEAD(failed_ddw_pdn_list);
+static phys_addr_t ddw_memory_hotplug_max(void)
+{
+ phys_addr_t max_addr = memory_hotplug_max();
+ struct device_node *memory;
+
+ for_each_node_by_type(memory, "memory") {
+ unsigned long start, size;
+ int ranges, n_mem_addr_cells, n_mem_size_cells, len;
+ const __be32 *memcell_buf;
+
+ memcell_buf = of_get_property(memory, "reg", &len);
+ if (!memcell_buf || len <= 0)
+ continue;
+
+ n_mem_addr_cells = of_n_addr_cells(memory);
+ n_mem_size_cells = of_n_size_cells(memory);
+
+ /* ranges in cell */
+ ranges = (len >> 2) / (n_mem_addr_cells + n_mem_size_cells);
+
+ start = of_read_number(memcell_buf, n_mem_addr_cells);
+ memcell_buf += n_mem_addr_cells;
+ size = of_read_number(memcell_buf, n_mem_size_cells);
+ memcell_buf += n_mem_size_cells;
+
+ max_addr = max_t(phys_addr_t, max_addr, start + size);
+ }
+
+ return max_addr;
+}
+
/*
* If the PE supports dynamic dma windows, and there is space for a table
* that can map all pages in a linear offset, then setup such a table,
@@ -1053,7 +1058,7 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
}
/* verify the window * number of ptes will map the partition */
/* check largest block * page size > max memory hotplug addr */
- max_addr = memory_hotplug_max();
+ max_addr = ddw_memory_hotplug_max();
if (query.largest_available_block < (max_addr >> page_shift)) {
dev_dbg(&dev->dev, "can't map partition max 0x%llx with %u "
"%llu-sized pages\n", max_addr, query.largest_available_block,
@@ -1190,7 +1195,7 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev)
}
set_iommu_table_base(&dev->dev, pci->table_group->tables[0]);
- iommu_add_device(&dev->dev);
+ iommu_add_device(pci->table_group, &dev->dev);
}
static int dma_set_mask_pSeriesLP(struct device *dev, u64 dma_mask)
@@ -1395,4 +1400,27 @@ static int __init disable_multitce(char *str)
__setup("multitce=", disable_multitce);
+static int tce_iommu_bus_notifier(struct notifier_block *nb,
+ unsigned long action, void *data)
+{
+ struct device *dev = data;
+
+ switch (action) {
+ case BUS_NOTIFY_DEL_DEVICE:
+ iommu_del_device(dev);
+ return 0;
+ default:
+ return 0;
+ }
+}
+
+static struct notifier_block tce_iommu_bus_nb = {
+ .notifier_call = tce_iommu_bus_notifier,
+};
+
+static int __init tce_iommu_bus_notifier_init(void)
+{
+ bus_register_notifier(&pci_bus_type, &tce_iommu_bus_nb);
+ return 0;
+}
machine_subsys_initcall_sync(pseries, tce_iommu_bus_notifier_init);
diff --git a/arch/powerpc/platforms/pseries/pci.c b/arch/powerpc/platforms/pseries/pci.c
index 41d8a4d..7725825 100644
--- a/arch/powerpc/platforms/pseries/pci.c
+++ b/arch/powerpc/platforms/pseries/pci.c
@@ -29,6 +29,7 @@
#include <asm/pci-bridge.h>
#include <asm/prom.h>
#include <asm/ppc-pci.h>
+#include <asm/pci.h>
#include "pseries.h"
#if 0
@@ -237,6 +238,8 @@ static void __init pSeries_request_regions(void)
void __init pSeries_final_fixup(void)
{
+ struct pci_controller *hose;
+
pSeries_request_regions();
eeh_probe_devices();
@@ -246,6 +249,25 @@ void __init pSeries_final_fixup(void)
ppc_md.pcibios_sriov_enable = pseries_pcibios_sriov_enable;
ppc_md.pcibios_sriov_disable = pseries_pcibios_sriov_disable;
#endif
+ list_for_each_entry(hose, &hose_list, list_node) {
+ struct device_node *dn = hose->dn, *nvdn;
+
+ while (1) {
+ dn = of_find_all_nodes(dn);
+ if (!dn)
+ break;
+ nvdn = of_parse_phandle(dn, "ibm,nvlink", 0);
+ if (!nvdn)
+ continue;
+ if (!of_device_is_compatible(nvdn, "ibm,npu-link"))
+ continue;
+ if (!of_device_is_compatible(nvdn->parent,
+ "ibm,power9-npu"))
+ continue;
+ WARN_ON_ONCE(pnv_npu2_init(hose));
+ break;
+ }
+ }
}
/*
diff --git a/arch/powerpc/platforms/pseries/pmem.c b/arch/powerpc/platforms/pseries/pmem.c
index a27f40e..27f0a91 100644
--- a/arch/powerpc/platforms/pseries/pmem.c
+++ b/arch/powerpc/platforms/pseries/pmem.c
@@ -52,8 +52,8 @@ static ssize_t pmem_drc_add_node(u32 drc_index)
/* NB: The of reconfig notifier creates platform device from the node */
rc = dlpar_attach_node(dn, pmem_node);
if (rc) {
- pr_err("Failed to attach node %s, rc: %d, drc index: %x\n",
- dn->name, rc, drc_index);
+ pr_err("Failed to attach node %pOF, rc: %d, drc index: %x\n",
+ dn, rc, drc_index);
if (dlpar_release_drc(drc_index))
dlpar_free_cc_nodes(dn);
@@ -93,8 +93,8 @@ static ssize_t pmem_drc_remove_node(u32 drc_index)
rc = dlpar_release_drc(drc_index);
if (rc) {
- pr_err("Failed to release drc (%x) for CPU %s, rc: %d\n",
- drc_index, dn->name, rc);
+ pr_err("Failed to release drc (%x) for CPU %pOFn, rc: %d\n",
+ drc_index, dn, rc);
dlpar_attach_node(dn, pmem_node);
return rc;
}
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 4078a05..41f62ca2 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -190,7 +190,7 @@ static void __init pseries_setup_i8259_cascade(void)
of_node_put(old);
if (np == NULL)
break;
- if (strcmp(np->name, "pci") != 0)
+ if (!of_node_name_eq(np, "pci"))
continue;
addrp = of_get_property(np, "8259-interrupt-acknowledge", NULL);
if (addrp == NULL)
diff --git a/drivers/ide/pmac.c b/drivers/ide/pmac.c
index c5b902b..ec46aa8 100644
--- a/drivers/ide/pmac.c
+++ b/drivers/ide/pmac.c
@@ -1045,7 +1045,7 @@ static int pmac_ide_setup_device(pmac_ide_hwif_t *pmif, struct ide_hw *hw)
d.port_ops = &pmac_ide_ata4_port_ops;
d.udma_mask = ATA_UDMA5;
} else if (of_device_is_compatible(np, "keylargo-ata")) {
- if (strcmp(np->name, "ata-4") == 0) {
+ if (of_node_name_eq(np, "ata-4")) {
pmif->kind = controller_kl_ata4;
d.port_ops = &pmac_ide_ata4_port_ops;
d.udma_mask = ATA_UDMA4;
diff --git a/drivers/macintosh/ans-lcd.c b/drivers/macintosh/ans-lcd.c
index c8e078b..ef0c236 100644
--- a/drivers/macintosh/ans-lcd.c
+++ b/drivers/macintosh/ans-lcd.c
@@ -160,7 +160,7 @@ anslcd_init(void)
struct device_node* node;
node = of_find_node_by_name(NULL, "lcd");
- if (!node || !node->parent || strcmp(node->parent->name, "gc")) {
+ if (!node || !of_node_name_eq(node->parent, "gc")) {
of_node_put(node);
return -ENODEV;
}
diff --git a/drivers/macintosh/macio_asic.c b/drivers/macintosh/macio_asic.c
index bc84188..3543a82 100644
--- a/drivers/macintosh/macio_asic.c
+++ b/drivers/macintosh/macio_asic.c
@@ -190,11 +190,11 @@ static int macio_resource_quirks(struct device_node *np, struct resource *res,
return 0;
/* Grand Central has too large resource 0 on some machines */
- if (index == 0 && !strcmp(np->name, "gc"))
+ if (index == 0 && of_node_name_eq(np, "gc"))
res->end = res->start + 0x1ffff;
/* Airport has bogus resource 2 */
- if (index >= 2 && !strcmp(np->name, "radio"))
+ if (index >= 2 && of_node_name_eq(np, "radio"))
return 1;
#ifndef CONFIG_PPC64
@@ -207,20 +207,20 @@ static int macio_resource_quirks(struct device_node *np, struct resource *res,
* level of hierarchy, but I don't really feel the need
* for it
*/
- if (!strcmp(np->name, "escc"))
+ if (of_node_name_eq(np, "escc"))
return 1;
/* ESCC has bogus resources >= 3 */
- if (index >= 3 && !(strcmp(np->name, "ch-a") &&
- strcmp(np->name, "ch-b")))
+ if (index >= 3 && (of_node_name_eq(np, "ch-a") ||
+ of_node_name_eq(np, "ch-b")))
return 1;
/* Media bay has too many resources, keep only first one */
- if (index > 0 && !strcmp(np->name, "media-bay"))
+ if (index > 0 && of_node_name_eq(np, "media-bay"))
return 1;
/* Some older IDE resources have bogus sizes */
- if (!strcmp(np->name, "IDE") || !strcmp(np->name, "ATA") ||
+ if (of_node_name_eq(np, "IDE") || of_node_name_eq(np, "ATA") ||
of_node_is_type(np, "ide") || of_node_is_type(np, "ata")) {
if (index == 0 && (res->end - res->start) > 0xfff)
res->end = res->start + 0xfff;
@@ -260,7 +260,7 @@ static void macio_add_missing_resources(struct macio_dev *dev)
irq_base = 64;
/* Fix SCC */
- if (strcmp(np->name, "ch-a") == 0) {
+ if (of_node_name_eq(np, "ch-a")) {
macio_create_fixup_irq(dev, 0, 15 + irq_base);
macio_create_fixup_irq(dev, 1, 4 + irq_base);
macio_create_fixup_irq(dev, 2, 5 + irq_base);
@@ -268,18 +268,18 @@ static void macio_add_missing_resources(struct macio_dev *dev)
}
/* Fix media-bay */
- if (strcmp(np->name, "media-bay") == 0) {
+ if (of_node_name_eq(np, "media-bay")) {
macio_create_fixup_irq(dev, 0, 29 + irq_base);
printk(KERN_INFO "macio: fixed media-bay irq on gatwick\n");
}
/* Fix left media bay childs */
- if (dev->media_bay != NULL && strcmp(np->name, "floppy") == 0) {
+ if (dev->media_bay != NULL && of_node_name_eq(np, "floppy")) {
macio_create_fixup_irq(dev, 0, 19 + irq_base);
macio_create_fixup_irq(dev, 1, 1 + irq_base);
printk(KERN_INFO "macio: fixed left floppy irqs\n");
}
- if (dev->media_bay != NULL && strcasecmp(np->name, "ata4") == 0) {
+ if (dev->media_bay != NULL && of_node_name_eq(np, "ata4")) {
macio_create_fixup_irq(dev, 0, 14 + irq_base);
macio_create_fixup_irq(dev, 0, 3 + irq_base);
printk(KERN_INFO "macio: fixed left ide irqs\n");
@@ -438,11 +438,8 @@ static struct macio_dev * macio_add_one_device(struct macio_chip *chip,
static int macio_skip_device(struct device_node *np)
{
- if (strncmp(np->name, "battery", 7) == 0)
- return 1;
- if (strncmp(np->name, "escc-legacy", 11) == 0)
- return 1;
- return 0;
+ return of_node_name_prefix(np, "battery") ||
+ of_node_name_prefix(np, "escc-legacy");
}
/**
@@ -489,9 +486,9 @@ static void macio_pci_add_devices(struct macio_chip *chip)
root_res);
if (mdev == NULL)
of_node_put(np);
- else if (strncmp(np->name, "media-bay", 9) == 0)
+ else if (of_node_name_prefix(np, "media-bay"))
mbdev = mdev;
- else if (strncmp(np->name, "escc", 4) == 0)
+ else if (of_node_name_prefix(np, "escc"))
sdev = mdev;
}
diff --git a/drivers/macintosh/rack-meter.c b/drivers/macintosh/rack-meter.c
index 1f29d24..3940e2a 100644
--- a/drivers/macintosh/rack-meter.c
+++ b/drivers/macintosh/rack-meter.c
@@ -376,18 +376,19 @@ static int rackmeter_probe(struct macio_dev* mdev,
pr_debug("rackmeter_probe()\n");
/* Get i2s-a node */
- while ((i2s = of_get_next_child(mdev->ofdev.dev.of_node, i2s)) != NULL)
- if (strcmp(i2s->name, "i2s-a") == 0)
- break;
+ for_each_child_of_node(mdev->ofdev.dev.of_node, i2s)
+ if (of_node_name_eq(i2s, "i2s-a"))
+ break;
+
if (i2s == NULL) {
pr_debug(" i2s-a child not found\n");
goto bail;
}
/* Get lightshow or virtual sound */
- while ((np = of_get_next_child(i2s, np)) != NULL) {
- if (strcmp(np->name, "lightshow") == 0)
+ for_each_child_of_node(i2s, np) {
+ if (of_node_name_eq(np, "lightshow"))
break;
- if ((strcmp(np->name, "sound") == 0) &&
+ if (of_node_name_eq(np, "sound") &&
of_get_property(np, "virtual", NULL) != NULL)
break;
}
diff --git a/drivers/macintosh/via-pmu.c b/drivers/macintosh/via-pmu.c
index 60f57e2..ac0cf37 100644
--- a/drivers/macintosh/via-pmu.c
+++ b/drivers/macintosh/via-pmu.c
@@ -318,8 +318,8 @@ int __init find_via_pmu(void)
PMU_INT_ADB |
PMU_INT_TICK;
- if (vias->parent->name && ((strcmp(vias->parent->name, "ohare") == 0)
- || of_device_is_compatible(vias->parent, "ohare")))
+ if (of_node_name_eq(vias->parent, "ohare") ||
+ of_device_is_compatible(vias->parent, "ohare"))
pmu_kind = PMU_OHARE_BASED;
else if (of_device_is_compatible(vias->parent, "paddington"))
pmu_kind = PMU_PADDINGTON_BASED;
diff --git a/drivers/macintosh/windfarm_lm87_sensor.c b/drivers/macintosh/windfarm_lm87_sensor.c
index 35aa571..09724ac 100644
--- a/drivers/macintosh/windfarm_lm87_sensor.c
+++ b/drivers/macintosh/windfarm_lm87_sensor.c
@@ -110,8 +110,8 @@ static int wf_lm87_probe(struct i2c_client *client,
* the Xserve G5 has several lm87's. However, for now we only
* care about the internal temperature sensor
*/
- while ((np = of_get_next_child(client->dev.of_node, np)) != NULL) {
- if (strcmp(np->name, "int-temp"))
+ for_each_child_of_node(client->dev.of_node, np) {
+ if (!of_node_name_eq(np, "int-temp"))
continue;
loc = of_get_property(np, "location", NULL);
if (!loc)
diff --git a/drivers/macintosh/windfarm_smu_controls.c b/drivers/macintosh/windfarm_smu_controls.c
index 86d6546..2cb9652 100644
--- a/drivers/macintosh/windfarm_smu_controls.c
+++ b/drivers/macintosh/windfarm_smu_controls.c
@@ -267,7 +267,7 @@ static int __init smu_controls_init(void)
/* Look for RPM fans */
for (fans = NULL; (fans = of_get_next_child(smu, fans)) != NULL;)
- if (!strcmp(fans->name, "rpm-fans") ||
+ if (of_node_name_eq(fans, "rpm-fans") ||
of_device_is_compatible(fans, "smu-rpm-fans"))
break;
for (fan = NULL;
@@ -287,7 +287,7 @@ static int __init smu_controls_init(void)
/* Look for PWM fans */
for (fans = NULL; (fans = of_get_next_child(smu, fans)) != NULL;)
- if (!strcmp(fans->name, "pwm-fans"))
+ if (of_node_name_eq(fans, "pwm-fans"))
break;
for (fan = NULL;
fans && (fan = of_get_next_child(fans, fan)) != NULL;) {
diff --git a/drivers/macintosh/windfarm_smu_sensors.c b/drivers/macintosh/windfarm_smu_sensors.c
index 1ba86de..a58f673 100644
--- a/drivers/macintosh/windfarm_smu_sensors.c
+++ b/drivers/macintosh/windfarm_smu_sensors.c
@@ -424,7 +424,7 @@ static int __init smu_sensors_init(void)
/* Look for sensors subdir */
for (sensors = NULL;
(sensors = of_get_next_child(smu, sensors)) != NULL;)
- if (!strcmp(sensors->name, "sensors"))
+ if (of_node_name_eq(sensors, "sensors"))
break;
of_node_put(smu);
diff --git a/drivers/misc/ocxl/config.c b/drivers/misc/ocxl/config.c
index 57a6bb1..8f2c5d8 100644
--- a/drivers/misc/ocxl/config.c
+++ b/drivers/misc/ocxl/config.c
@@ -318,7 +318,7 @@ static int read_afu_name(struct pci_dev *dev, struct ocxl_fn_config *fn,
if (rc)
return rc;
ptr = (u32 *) &afu->name[i];
- *ptr = val;
+ *ptr = le32_to_cpu((__force __le32) val);
}
afu->name[OCXL_AFU_NAME_SZ - 1] = '\0'; /* play safe */
return 0;
diff --git a/drivers/vfio/pci/Kconfig b/drivers/vfio/pci/Kconfig
index 42dc1d3..d0f8e4f 100644
--- a/drivers/vfio/pci/Kconfig
+++ b/drivers/vfio/pci/Kconfig
@@ -38,3 +38,9 @@
and LPC bridge config space.
To enable Intel IGD assignment through vfio-pci, say Y.
+
+config VFIO_PCI_NVLINK2
+ def_bool y
+ depends on VFIO_PCI && PPC_POWERNV
+ help
+ VFIO PCI support for P9 Witherspoon machine with NVIDIA V100 GPUs
diff --git a/drivers/vfio/pci/Makefile b/drivers/vfio/pci/Makefile
index 76d8ec0..9662c06 100644
--- a/drivers/vfio/pci/Makefile
+++ b/drivers/vfio/pci/Makefile
@@ -1,5 +1,6 @@
vfio-pci-y := vfio_pci.o vfio_pci_intrs.o vfio_pci_rdwr.o vfio_pci_config.o
vfio-pci-$(CONFIG_VFIO_PCI_IGD) += vfio_pci_igd.o
+vfio-pci-$(CONFIG_VFIO_PCI_NVLINK2) += vfio_pci_nvlink2.o
obj-$(CONFIG_VFIO_PCI) += vfio-pci.o
diff --git a/drivers/vfio/pci/trace.h b/drivers/vfio/pci/trace.h
new file mode 100644
index 0000000..228ccdb
--- /dev/null
+++ b/drivers/vfio/pci/trace.h
@@ -0,0 +1,102 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * VFIO PCI mmap/mmap_fault tracepoints
+ *
+ * Copyright (C) 2018 IBM Corp. All rights reserved.
+ * Author: Alexey Kardashevskiy <aik@ozlabs.ru>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM vfio_pci
+
+#if !defined(_TRACE_VFIO_PCI_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_VFIO_PCI_H
+
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(vfio_pci_nvgpu_mmap_fault,
+ TP_PROTO(struct pci_dev *pdev, unsigned long hpa, unsigned long ua,
+ vm_fault_t ret),
+ TP_ARGS(pdev, hpa, ua, ret),
+
+ TP_STRUCT__entry(
+ __field(const char *, name)
+ __field(unsigned long, hpa)
+ __field(unsigned long, ua)
+ __field(int, ret)
+ ),
+
+ TP_fast_assign(
+ __entry->name = dev_name(&pdev->dev),
+ __entry->hpa = hpa;
+ __entry->ua = ua;
+ __entry->ret = ret;
+ ),
+
+ TP_printk("%s: %lx -> %lx ret=%d", __entry->name, __entry->hpa,
+ __entry->ua, __entry->ret)
+);
+
+TRACE_EVENT(vfio_pci_nvgpu_mmap,
+ TP_PROTO(struct pci_dev *pdev, unsigned long hpa, unsigned long ua,
+ unsigned long size, int ret),
+ TP_ARGS(pdev, hpa, ua, size, ret),
+
+ TP_STRUCT__entry(
+ __field(const char *, name)
+ __field(unsigned long, hpa)
+ __field(unsigned long, ua)
+ __field(unsigned long, size)
+ __field(int, ret)
+ ),
+
+ TP_fast_assign(
+ __entry->name = dev_name(&pdev->dev),
+ __entry->hpa = hpa;
+ __entry->ua = ua;
+ __entry->size = size;
+ __entry->ret = ret;
+ ),
+
+ TP_printk("%s: %lx -> %lx size=%lx ret=%d", __entry->name, __entry->hpa,
+ __entry->ua, __entry->size, __entry->ret)
+);
+
+TRACE_EVENT(vfio_pci_npu2_mmap,
+ TP_PROTO(struct pci_dev *pdev, unsigned long hpa, unsigned long ua,
+ unsigned long size, int ret),
+ TP_ARGS(pdev, hpa, ua, size, ret),
+
+ TP_STRUCT__entry(
+ __field(const char *, name)
+ __field(unsigned long, hpa)
+ __field(unsigned long, ua)
+ __field(unsigned long, size)
+ __field(int, ret)
+ ),
+
+ TP_fast_assign(
+ __entry->name = dev_name(&pdev->dev),
+ __entry->hpa = hpa;
+ __entry->ua = ua;
+ __entry->size = size;
+ __entry->ret = ret;
+ ),
+
+ TP_printk("%s: %lx -> %lx size=%lx ret=%d", __entry->name, __entry->hpa,
+ __entry->ua, __entry->size, __entry->ret)
+);
+
+#endif /* _TRACE_VFIO_PCI_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE trace
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index 50cdedf..a89fa5d 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -289,14 +289,37 @@ static int vfio_pci_enable(struct vfio_pci_device *vdev)
if (ret) {
dev_warn(&vdev->pdev->dev,
"Failed to setup Intel IGD regions\n");
- vfio_pci_disable(vdev);
- return ret;
+ goto disable_exit;
+ }
+ }
+
+ if (pdev->vendor == PCI_VENDOR_ID_NVIDIA &&
+ IS_ENABLED(CONFIG_VFIO_PCI_NVLINK2)) {
+ ret = vfio_pci_nvdia_v100_nvlink2_init(vdev);
+ if (ret && ret != -ENODEV) {
+ dev_warn(&vdev->pdev->dev,
+ "Failed to setup NVIDIA NV2 RAM region\n");
+ goto disable_exit;
+ }
+ }
+
+ if (pdev->vendor == PCI_VENDOR_ID_IBM &&
+ IS_ENABLED(CONFIG_VFIO_PCI_NVLINK2)) {
+ ret = vfio_pci_ibm_npu2_init(vdev);
+ if (ret && ret != -ENODEV) {
+ dev_warn(&vdev->pdev->dev,
+ "Failed to setup NVIDIA NV2 ATSD region\n");
+ goto disable_exit;
}
}
vfio_pci_probe_mmaps(vdev);
return 0;
+
+disable_exit:
+ vfio_pci_disable(vdev);
+ return ret;
}
static void vfio_pci_disable(struct vfio_pci_device *vdev)
@@ -750,6 +773,12 @@ static long vfio_pci_ioctl(void *device_data,
if (ret)
return ret;
+ if (vdev->region[i].ops->add_capability) {
+ ret = vdev->region[i].ops->add_capability(vdev,
+ &vdev->region[i], &caps);
+ if (ret)
+ return ret;
+ }
}
}
@@ -1117,6 +1146,15 @@ static int vfio_pci_mmap(void *device_data, struct vm_area_struct *vma)
return -EINVAL;
if ((vma->vm_flags & VM_SHARED) == 0)
return -EINVAL;
+ if (index >= VFIO_PCI_NUM_REGIONS) {
+ int regnum = index - VFIO_PCI_NUM_REGIONS;
+ struct vfio_pci_region *region = vdev->region + regnum;
+
+ if (region && region->ops && region->ops->mmap &&
+ (region->flags & VFIO_REGION_INFO_FLAG_MMAP))
+ return region->ops->mmap(vdev, region, vma);
+ return -EINVAL;
+ }
if (index >= VFIO_PCI_ROM_REGION_INDEX)
return -EINVAL;
if (!vdev->bar_mmap_supported[index])
diff --git a/drivers/vfio/pci/vfio_pci_nvlink2.c b/drivers/vfio/pci/vfio_pci_nvlink2.c
new file mode 100644
index 0000000..054a2cf
--- /dev/null
+++ b/drivers/vfio/pci/vfio_pci_nvlink2.c
@@ -0,0 +1,482 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * VFIO PCI NVIDIA Whitherspoon GPU support a.k.a. NVLink2.
+ *
+ * Copyright (C) 2018 IBM Corp. All rights reserved.
+ * Author: Alexey Kardashevskiy <aik@ozlabs.ru>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Register an on-GPU RAM region for cacheable access.
+ *
+ * Derived from original vfio_pci_igd.c:
+ * Copyright (C) 2016 Red Hat, Inc. All rights reserved.
+ * Author: Alex Williamson <alex.williamson@redhat.com>
+ */
+
+#include <linux/io.h>
+#include <linux/pci.h>
+#include <linux/uaccess.h>
+#include <linux/vfio.h>
+#include <linux/sched/mm.h>
+#include <linux/mmu_context.h>
+#include <asm/kvm_ppc.h>
+#include "vfio_pci_private.h"
+
+#define CREATE_TRACE_POINTS
+#include "trace.h"
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(vfio_pci_nvgpu_mmap_fault);
+EXPORT_TRACEPOINT_SYMBOL_GPL(vfio_pci_nvgpu_mmap);
+EXPORT_TRACEPOINT_SYMBOL_GPL(vfio_pci_npu2_mmap);
+
+struct vfio_pci_nvgpu_data {
+ unsigned long gpu_hpa; /* GPU RAM physical address */
+ unsigned long gpu_tgt; /* TGT address of corresponding GPU RAM */
+ unsigned long useraddr; /* GPU RAM userspace address */
+ unsigned long size; /* Size of the GPU RAM window (usually 128GB) */
+ struct mm_struct *mm;
+ struct mm_iommu_table_group_mem_t *mem; /* Pre-registered RAM descr. */
+ struct pci_dev *gpdev;
+ struct notifier_block group_notifier;
+};
+
+static size_t vfio_pci_nvgpu_rw(struct vfio_pci_device *vdev,
+ char __user *buf, size_t count, loff_t *ppos, bool iswrite)
+{
+ unsigned int i = VFIO_PCI_OFFSET_TO_INDEX(*ppos) - VFIO_PCI_NUM_REGIONS;
+ struct vfio_pci_nvgpu_data *data = vdev->region[i].data;
+ loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
+ loff_t posaligned = pos & PAGE_MASK, posoff = pos & ~PAGE_MASK;
+ size_t sizealigned;
+ void __iomem *ptr;
+
+ if (pos >= vdev->region[i].size)
+ return -EINVAL;
+
+ count = min(count, (size_t)(vdev->region[i].size - pos));
+
+ /*
+ * We map only a bit of GPU RAM for a short time instead of mapping it
+ * for the guest lifetime as:
+ *
+ * 1) we do not know GPU RAM size, only aperture which is 4-8 times
+ * bigger than actual RAM size (16/32GB RAM vs. 128GB aperture);
+ * 2) mapping GPU RAM allows CPU to prefetch and if this happens
+ * before NVLink bridge is reset (which fences GPU RAM),
+ * hardware management interrupts (HMI) might happen, this
+ * will freeze NVLink bridge.
+ *
+ * This is not fast path anyway.
+ */
+ sizealigned = _ALIGN_UP(posoff + count, PAGE_SIZE);
+ ptr = ioremap_cache(data->gpu_hpa + posaligned, sizealigned);
+ if (!ptr)
+ return -EFAULT;
+
+ if (iswrite) {
+ if (copy_from_user(ptr + posoff, buf, count))
+ count = -EFAULT;
+ else
+ *ppos += count;
+ } else {
+ if (copy_to_user(buf, ptr + posoff, count))
+ count = -EFAULT;
+ else
+ *ppos += count;
+ }
+
+ iounmap(ptr);
+
+ return count;
+}
+
+static void vfio_pci_nvgpu_release(struct vfio_pci_device *vdev,
+ struct vfio_pci_region *region)
+{
+ struct vfio_pci_nvgpu_data *data = region->data;
+ long ret;
+
+ /* If there were any mappings at all... */
+ if (data->mm) {
+ ret = mm_iommu_put(data->mm, data->mem);
+ WARN_ON(ret);
+
+ mmdrop(data->mm);
+ }
+
+ vfio_unregister_notifier(&data->gpdev->dev, VFIO_GROUP_NOTIFY,
+ &data->group_notifier);
+
+ pnv_npu2_unmap_lpar_dev(data->gpdev);
+
+ kfree(data);
+}
+
+static vm_fault_t vfio_pci_nvgpu_mmap_fault(struct vm_fault *vmf)
+{
+ vm_fault_t ret;
+ struct vm_area_struct *vma = vmf->vma;
+ struct vfio_pci_region *region = vma->vm_private_data;
+ struct vfio_pci_nvgpu_data *data = region->data;
+ unsigned long vmf_off = (vmf->address - vma->vm_start) >> PAGE_SHIFT;
+ unsigned long nv2pg = data->gpu_hpa >> PAGE_SHIFT;
+ unsigned long vm_pgoff = vma->vm_pgoff &
+ ((1U << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT)) - 1);
+ unsigned long pfn = nv2pg + vm_pgoff + vmf_off;
+
+ ret = vmf_insert_pfn(vma, vmf->address, pfn);
+ trace_vfio_pci_nvgpu_mmap_fault(data->gpdev, pfn << PAGE_SHIFT,
+ vmf->address, ret);
+
+ return ret;
+}
+
+static const struct vm_operations_struct vfio_pci_nvgpu_mmap_vmops = {
+ .fault = vfio_pci_nvgpu_mmap_fault,
+};
+
+static int vfio_pci_nvgpu_mmap(struct vfio_pci_device *vdev,
+ struct vfio_pci_region *region, struct vm_area_struct *vma)
+{
+ int ret;
+ struct vfio_pci_nvgpu_data *data = region->data;
+
+ if (data->useraddr)
+ return -EPERM;
+
+ if (vma->vm_end - vma->vm_start > data->size)
+ return -EINVAL;
+
+ vma->vm_private_data = region;
+ vma->vm_flags |= VM_PFNMAP;
+ vma->vm_ops = &vfio_pci_nvgpu_mmap_vmops;
+
+ /*
+ * Calling mm_iommu_newdev() here once as the region is not
+ * registered yet and therefore right initialization will happen now.
+ * Other places will use mm_iommu_find() which returns
+ * registered @mem and does not go gup().
+ */
+ data->useraddr = vma->vm_start;
+ data->mm = current->mm;
+
+ atomic_inc(&data->mm->mm_count);
+ ret = (int) mm_iommu_newdev(data->mm, data->useraddr,
+ (vma->vm_end - vma->vm_start) >> PAGE_SHIFT,
+ data->gpu_hpa, &data->mem);
+
+ trace_vfio_pci_nvgpu_mmap(vdev->pdev, data->gpu_hpa, data->useraddr,
+ vma->vm_end - vma->vm_start, ret);
+
+ return ret;
+}
+
+static int vfio_pci_nvgpu_add_capability(struct vfio_pci_device *vdev,
+ struct vfio_pci_region *region, struct vfio_info_cap *caps)
+{
+ struct vfio_pci_nvgpu_data *data = region->data;
+ struct vfio_region_info_cap_nvlink2_ssatgt cap = { 0 };
+
+ cap.header.id = VFIO_REGION_INFO_CAP_NVLINK2_SSATGT;
+ cap.header.version = 1;
+ cap.tgt = data->gpu_tgt;
+
+ return vfio_info_add_capability(caps, &cap.header, sizeof(cap));
+}
+
+static const struct vfio_pci_regops vfio_pci_nvgpu_regops = {
+ .rw = vfio_pci_nvgpu_rw,
+ .release = vfio_pci_nvgpu_release,
+ .mmap = vfio_pci_nvgpu_mmap,
+ .add_capability = vfio_pci_nvgpu_add_capability,
+};
+
+static int vfio_pci_nvgpu_group_notifier(struct notifier_block *nb,
+ unsigned long action, void *opaque)
+{
+ struct kvm *kvm = opaque;
+ struct vfio_pci_nvgpu_data *data = container_of(nb,
+ struct vfio_pci_nvgpu_data,
+ group_notifier);
+
+ if (action == VFIO_GROUP_NOTIFY_SET_KVM && kvm &&
+ pnv_npu2_map_lpar_dev(data->gpdev,
+ kvm->arch.lpid, MSR_DR | MSR_PR))
+ return NOTIFY_BAD;
+
+ return NOTIFY_OK;
+}
+
+int vfio_pci_nvdia_v100_nvlink2_init(struct vfio_pci_device *vdev)
+{
+ int ret;
+ u64 reg[2];
+ u64 tgt = 0;
+ struct device_node *npu_node, *mem_node;
+ struct pci_dev *npu_dev;
+ struct vfio_pci_nvgpu_data *data;
+ uint32_t mem_phandle = 0;
+ unsigned long events = VFIO_GROUP_NOTIFY_SET_KVM;
+
+ /*
+ * PCI config space does not tell us about NVLink presense but
+ * platform does, use this.
+ */
+ npu_dev = pnv_pci_get_npu_dev(vdev->pdev, 0);
+ if (!npu_dev)
+ return -ENODEV;
+
+ npu_node = pci_device_to_OF_node(npu_dev);
+ if (!npu_node)
+ return -EINVAL;
+
+ if (of_property_read_u32(npu_node, "memory-region", &mem_phandle))
+ return -EINVAL;
+
+ mem_node = of_find_node_by_phandle(mem_phandle);
+ if (!mem_node)
+ return -EINVAL;
+
+ if (of_property_read_variable_u64_array(mem_node, "reg", reg,
+ ARRAY_SIZE(reg), ARRAY_SIZE(reg)) !=
+ ARRAY_SIZE(reg))
+ return -EINVAL;
+
+ if (of_property_read_u64(npu_node, "ibm,device-tgt-addr", &tgt)) {
+ dev_warn(&vdev->pdev->dev, "No ibm,device-tgt-addr found\n");
+ return -EFAULT;
+ }
+
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ data->gpu_hpa = reg[0];
+ data->gpu_tgt = tgt;
+ data->size = reg[1];
+
+ dev_dbg(&vdev->pdev->dev, "%lx..%lx\n", data->gpu_hpa,
+ data->gpu_hpa + data->size - 1);
+
+ data->gpdev = vdev->pdev;
+ data->group_notifier.notifier_call = vfio_pci_nvgpu_group_notifier;
+
+ ret = vfio_register_notifier(&data->gpdev->dev, VFIO_GROUP_NOTIFY,
+ &events, &data->group_notifier);
+ if (ret)
+ goto free_exit;
+
+ /*
+ * We have just set KVM, we do not need the listener anymore.
+ * Also, keeping it registered means that if more than one GPU is
+ * assigned, we will get several similar notifiers notifying about
+ * the same device again which does not help with anything.
+ */
+ vfio_unregister_notifier(&data->gpdev->dev, VFIO_GROUP_NOTIFY,
+ &data->group_notifier);
+
+ ret = vfio_pci_register_dev_region(vdev,
+ PCI_VENDOR_ID_NVIDIA | VFIO_REGION_TYPE_PCI_VENDOR_TYPE,
+ VFIO_REGION_SUBTYPE_NVIDIA_NVLINK2_RAM,
+ &vfio_pci_nvgpu_regops,
+ data->size,
+ VFIO_REGION_INFO_FLAG_READ |
+ VFIO_REGION_INFO_FLAG_WRITE |
+ VFIO_REGION_INFO_FLAG_MMAP,
+ data);
+ if (ret)
+ goto free_exit;
+
+ return 0;
+free_exit:
+ kfree(data);
+
+ return ret;
+}
+
+/*
+ * IBM NPU2 bridge
+ */
+struct vfio_pci_npu2_data {
+ void *base; /* ATSD register virtual address, for emulated access */
+ unsigned long mmio_atsd; /* ATSD physical address */
+ unsigned long gpu_tgt; /* TGT address of corresponding GPU RAM */
+ unsigned int link_speed; /* The link speed from DT's ibm,nvlink-speed */
+};
+
+static size_t vfio_pci_npu2_rw(struct vfio_pci_device *vdev,
+ char __user *buf, size_t count, loff_t *ppos, bool iswrite)
+{
+ unsigned int i = VFIO_PCI_OFFSET_TO_INDEX(*ppos) - VFIO_PCI_NUM_REGIONS;
+ struct vfio_pci_npu2_data *data = vdev->region[i].data;
+ loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
+
+ if (pos >= vdev->region[i].size)
+ return -EINVAL;
+
+ count = min(count, (size_t)(vdev->region[i].size - pos));
+
+ if (iswrite) {
+ if (copy_from_user(data->base + pos, buf, count))
+ return -EFAULT;
+ } else {
+ if (copy_to_user(buf, data->base + pos, count))
+ return -EFAULT;
+ }
+ *ppos += count;
+
+ return count;
+}
+
+static int vfio_pci_npu2_mmap(struct vfio_pci_device *vdev,
+ struct vfio_pci_region *region, struct vm_area_struct *vma)
+{
+ int ret;
+ struct vfio_pci_npu2_data *data = region->data;
+ unsigned long req_len = vma->vm_end - vma->vm_start;
+
+ if (req_len != PAGE_SIZE)
+ return -EINVAL;
+
+ vma->vm_flags |= VM_PFNMAP;
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+ ret = remap_pfn_range(vma, vma->vm_start, data->mmio_atsd >> PAGE_SHIFT,
+ req_len, vma->vm_page_prot);
+ trace_vfio_pci_npu2_mmap(vdev->pdev, data->mmio_atsd, vma->vm_start,
+ vma->vm_end - vma->vm_start, ret);
+
+ return ret;
+}
+
+static void vfio_pci_npu2_release(struct vfio_pci_device *vdev,
+ struct vfio_pci_region *region)
+{
+ struct vfio_pci_npu2_data *data = region->data;
+
+ memunmap(data->base);
+ kfree(data);
+}
+
+static int vfio_pci_npu2_add_capability(struct vfio_pci_device *vdev,
+ struct vfio_pci_region *region, struct vfio_info_cap *caps)
+{
+ struct vfio_pci_npu2_data *data = region->data;
+ struct vfio_region_info_cap_nvlink2_ssatgt captgt = { 0 };
+ struct vfio_region_info_cap_nvlink2_lnkspd capspd = { 0 };
+ int ret;
+
+ captgt.header.id = VFIO_REGION_INFO_CAP_NVLINK2_SSATGT;
+ captgt.header.version = 1;
+ captgt.tgt = data->gpu_tgt;
+
+ capspd.header.id = VFIO_REGION_INFO_CAP_NVLINK2_LNKSPD;
+ capspd.header.version = 1;
+ capspd.link_speed = data->link_speed;
+
+ ret = vfio_info_add_capability(caps, &captgt.header, sizeof(captgt));
+ if (ret)
+ return ret;
+
+ return vfio_info_add_capability(caps, &capspd.header, sizeof(capspd));
+}
+
+static const struct vfio_pci_regops vfio_pci_npu2_regops = {
+ .rw = vfio_pci_npu2_rw,
+ .mmap = vfio_pci_npu2_mmap,
+ .release = vfio_pci_npu2_release,
+ .add_capability = vfio_pci_npu2_add_capability,
+};
+
+int vfio_pci_ibm_npu2_init(struct vfio_pci_device *vdev)
+{
+ int ret;
+ struct vfio_pci_npu2_data *data;
+ struct device_node *nvlink_dn;
+ u32 nvlink_index = 0;
+ struct pci_dev *npdev = vdev->pdev;
+ struct device_node *npu_node = pci_device_to_OF_node(npdev);
+ struct pci_controller *hose = pci_bus_to_host(npdev->bus);
+ u64 mmio_atsd = 0;
+ u64 tgt = 0;
+ u32 link_speed = 0xff;
+
+ /*
+ * PCI config space does not tell us about NVLink presense but
+ * platform does, use this.
+ */
+ if (!pnv_pci_get_gpu_dev(vdev->pdev))
+ return -ENODEV;
+
+ /*
+ * NPU2 normally has 8 ATSD registers (for concurrency) and 6 links
+ * so we can allocate one register per link, using nvlink index as
+ * a key.
+ * There is always at least one ATSD register so as long as at least
+ * NVLink bridge #0 is passed to the guest, ATSD will be available.
+ */
+ nvlink_dn = of_parse_phandle(npdev->dev.of_node, "ibm,nvlink", 0);
+ if (WARN_ON(of_property_read_u32(nvlink_dn, "ibm,npu-link-index",
+ &nvlink_index)))
+ return -ENODEV;
+
+ if (of_property_read_u64_index(hose->dn, "ibm,mmio-atsd", nvlink_index,
+ &mmio_atsd)) {
+ dev_warn(&vdev->pdev->dev, "No available ATSD found\n");
+ mmio_atsd = 0;
+ }
+
+ if (of_property_read_u64(npu_node, "ibm,device-tgt-addr", &tgt)) {
+ dev_warn(&vdev->pdev->dev, "No ibm,device-tgt-addr found\n");
+ return -EFAULT;
+ }
+
+ if (of_property_read_u32(npu_node, "ibm,nvlink-speed", &link_speed)) {
+ dev_warn(&vdev->pdev->dev, "No ibm,nvlink-speed found\n");
+ return -EFAULT;
+ }
+
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ data->mmio_atsd = mmio_atsd;
+ data->gpu_tgt = tgt;
+ data->link_speed = link_speed;
+ if (data->mmio_atsd) {
+ data->base = memremap(data->mmio_atsd, SZ_64K, MEMREMAP_WT);
+ if (!data->base) {
+ ret = -ENOMEM;
+ goto free_exit;
+ }
+ }
+
+ /*
+ * We want to expose the capability even if this specific NVLink
+ * did not get its own ATSD register because capabilities
+ * belong to VFIO regions and normally there will be ATSD register
+ * assigned to the NVLink bridge.
+ */
+ ret = vfio_pci_register_dev_region(vdev,
+ PCI_VENDOR_ID_IBM |
+ VFIO_REGION_TYPE_PCI_VENDOR_TYPE,
+ VFIO_REGION_SUBTYPE_IBM_NVLINK2_ATSD,
+ &vfio_pci_npu2_regops,
+ data->mmio_atsd ? PAGE_SIZE : 0,
+ VFIO_REGION_INFO_FLAG_READ |
+ VFIO_REGION_INFO_FLAG_WRITE |
+ VFIO_REGION_INFO_FLAG_MMAP,
+ data);
+ if (ret)
+ goto free_exit;
+
+ return 0;
+
+free_exit:
+ kfree(data);
+
+ return ret;
+}
diff --git a/drivers/vfio/pci/vfio_pci_private.h b/drivers/vfio/pci/vfio_pci_private.h
index cde3b5d..127071b 100644
--- a/drivers/vfio/pci/vfio_pci_private.h
+++ b/drivers/vfio/pci/vfio_pci_private.h
@@ -59,6 +59,12 @@ struct vfio_pci_regops {
size_t count, loff_t *ppos, bool iswrite);
void (*release)(struct vfio_pci_device *vdev,
struct vfio_pci_region *region);
+ int (*mmap)(struct vfio_pci_device *vdev,
+ struct vfio_pci_region *region,
+ struct vm_area_struct *vma);
+ int (*add_capability)(struct vfio_pci_device *vdev,
+ struct vfio_pci_region *region,
+ struct vfio_info_cap *caps);
};
struct vfio_pci_region {
@@ -157,4 +163,18 @@ static inline int vfio_pci_igd_init(struct vfio_pci_device *vdev)
return -ENODEV;
}
#endif
+#ifdef CONFIG_VFIO_PCI_NVLINK2
+extern int vfio_pci_nvdia_v100_nvlink2_init(struct vfio_pci_device *vdev);
+extern int vfio_pci_ibm_npu2_init(struct vfio_pci_device *vdev);
+#else
+static inline int vfio_pci_nvdia_v100_nvlink2_init(struct vfio_pci_device *vdev)
+{
+ return -ENODEV;
+}
+
+static inline int vfio_pci_ibm_npu2_init(struct vfio_pci_device *vdev)
+{
+ return -ENODEV;
+}
+#endif
#endif /* VFIO_PCI_PRIVATE_H */
diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c
index b30926e..c424913 100644
--- a/drivers/vfio/vfio_iommu_spapr_tce.c
+++ b/drivers/vfio/vfio_iommu_spapr_tce.c
@@ -152,11 +152,12 @@ static long tce_iommu_unregister_pages(struct tce_container *container,
struct mm_iommu_table_group_mem_t *mem;
struct tce_iommu_prereg *tcemem;
bool found = false;
+ long ret;
if ((vaddr & ~PAGE_MASK) || (size & ~PAGE_MASK))
return -EINVAL;
- mem = mm_iommu_find(container->mm, vaddr, size >> PAGE_SHIFT);
+ mem = mm_iommu_get(container->mm, vaddr, size >> PAGE_SHIFT);
if (!mem)
return -ENOENT;
@@ -168,9 +169,13 @@ static long tce_iommu_unregister_pages(struct tce_container *container,
}
if (!found)
- return -ENOENT;
+ ret = -ENOENT;
+ else
+ ret = tce_iommu_prereg_free(container, tcemem);
- return tce_iommu_prereg_free(container, tcemem);
+ mm_iommu_put(container->mm, mem);
+
+ return ret;
}
static long tce_iommu_register_pages(struct tce_container *container,
@@ -185,22 +190,24 @@ static long tce_iommu_register_pages(struct tce_container *container,
((vaddr + size) < vaddr))
return -EINVAL;
- mem = mm_iommu_find(container->mm, vaddr, entries);
+ mem = mm_iommu_get(container->mm, vaddr, entries);
if (mem) {
list_for_each_entry(tcemem, &container->prereg_list, next) {
- if (tcemem->mem == mem)
- return -EBUSY;
+ if (tcemem->mem == mem) {
+ ret = -EBUSY;
+ goto put_exit;
+ }
}
+ } else {
+ ret = mm_iommu_new(container->mm, vaddr, entries, &mem);
+ if (ret)
+ return ret;
}
- ret = mm_iommu_get(container->mm, vaddr, entries, &mem);
- if (ret)
- return ret;
-
tcemem = kzalloc(sizeof(*tcemem), GFP_KERNEL);
if (!tcemem) {
- mm_iommu_put(container->mm, mem);
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto put_exit;
}
tcemem->mem = mem;
@@ -209,10 +216,22 @@ static long tce_iommu_register_pages(struct tce_container *container,
container->enabled = true;
return 0;
+
+put_exit:
+ mm_iommu_put(container->mm, mem);
+ return ret;
}
-static bool tce_page_is_contained(struct page *page, unsigned page_shift)
+static bool tce_page_is_contained(struct mm_struct *mm, unsigned long hpa,
+ unsigned int page_shift)
{
+ struct page *page;
+ unsigned long size = 0;
+
+ if (mm_iommu_is_devmem(mm, hpa, page_shift, &size))
+ return size == (1UL << page_shift);
+
+ page = pfn_to_page(hpa >> PAGE_SHIFT);
/*
* Check that the TCE table granularity is not bigger than the size of
* a page we just found. Otherwise the hardware can get access to
@@ -371,6 +390,7 @@ static void tce_iommu_release(void *iommu_data)
{
struct tce_container *container = iommu_data;
struct tce_iommu_group *tcegrp;
+ struct tce_iommu_prereg *tcemem, *tmtmp;
long i;
while (tce_groups_attached(container)) {
@@ -393,13 +413,8 @@ static void tce_iommu_release(void *iommu_data)
tce_iommu_free_table(container, tbl);
}
- while (!list_empty(&container->prereg_list)) {
- struct tce_iommu_prereg *tcemem;
-
- tcemem = list_first_entry(&container->prereg_list,
- struct tce_iommu_prereg, next);
- WARN_ON_ONCE(tce_iommu_prereg_free(container, tcemem));
- }
+ list_for_each_entry_safe(tcemem, tmtmp, &container->prereg_list, next)
+ WARN_ON(tce_iommu_prereg_free(container, tcemem));
tce_iommu_disable(container);
if (container->mm)
@@ -492,7 +507,8 @@ static int tce_iommu_clear(struct tce_container *container,
direction = DMA_NONE;
oldhpa = 0;
- ret = iommu_tce_xchg(tbl, entry, &oldhpa, &direction);
+ ret = iommu_tce_xchg(container->mm, tbl, entry, &oldhpa,
+ &direction);
if (ret)
continue;
@@ -530,7 +546,6 @@ static long tce_iommu_build(struct tce_container *container,
enum dma_data_direction direction)
{
long i, ret = 0;
- struct page *page;
unsigned long hpa;
enum dma_data_direction dirtmp;
@@ -541,15 +556,16 @@ static long tce_iommu_build(struct tce_container *container,
if (ret)
break;
- page = pfn_to_page(hpa >> PAGE_SHIFT);
- if (!tce_page_is_contained(page, tbl->it_page_shift)) {
+ if (!tce_page_is_contained(container->mm, hpa,
+ tbl->it_page_shift)) {
ret = -EPERM;
break;
}
hpa |= offset;
dirtmp = direction;
- ret = iommu_tce_xchg(tbl, entry + i, &hpa, &dirtmp);
+ ret = iommu_tce_xchg(container->mm, tbl, entry + i, &hpa,
+ &dirtmp);
if (ret) {
tce_iommu_unuse_page(container, hpa);
pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%ld\n",
@@ -576,7 +592,6 @@ static long tce_iommu_build_v2(struct tce_container *container,
enum dma_data_direction direction)
{
long i, ret = 0;
- struct page *page;
unsigned long hpa;
enum dma_data_direction dirtmp;
@@ -589,8 +604,8 @@ static long tce_iommu_build_v2(struct tce_container *container,
if (ret)
break;
- page = pfn_to_page(hpa >> PAGE_SHIFT);
- if (!tce_page_is_contained(page, tbl->it_page_shift)) {
+ if (!tce_page_is_contained(container->mm, hpa,
+ tbl->it_page_shift)) {
ret = -EPERM;
break;
}
@@ -603,7 +618,8 @@ static long tce_iommu_build_v2(struct tce_container *container,
if (mm_iommu_mapped_inc(mem))
break;
- ret = iommu_tce_xchg(tbl, entry + i, &hpa, &dirtmp);
+ ret = iommu_tce_xchg(container->mm, tbl, entry + i, &hpa,
+ &dirtmp);
if (ret) {
/* dirtmp cannot be DMA_NONE here */
tce_iommu_unuse_page_v2(container, tbl, entry + i);
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 8131028..02bb7ad 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -354,6 +354,21 @@ struct vfio_region_gfx_edid {
};
/*
+ * 10de vendor sub-type
+ *
+ * NVIDIA GPU NVlink2 RAM is coherent RAM mapped onto the host address space.
+ */
+#define VFIO_REGION_SUBTYPE_NVIDIA_NVLINK2_RAM (1)
+
+/*
+ * 1014 vendor sub-type
+ *
+ * IBM NPU NVlink2 ATSD (Address Translation Shootdown) register of NPU
+ * to do TLB invalidation on a GPU.
+ */
+#define VFIO_REGION_SUBTYPE_IBM_NVLINK2_ATSD (1)
+
+/*
* The MSIX mappable capability informs that MSIX data of a BAR can be mmapped
* which allows direct access to non-MSIX registers which happened to be within
* the same system page.
@@ -363,6 +378,33 @@ struct vfio_region_gfx_edid {
*/
#define VFIO_REGION_INFO_CAP_MSIX_MAPPABLE 3
+/*
+ * Capability with compressed real address (aka SSA - small system address)
+ * where GPU RAM is mapped on a system bus. Used by a GPU for DMA routing
+ * and by the userspace to associate a NVLink bridge with a GPU.
+ */
+#define VFIO_REGION_INFO_CAP_NVLINK2_SSATGT 4
+
+struct vfio_region_info_cap_nvlink2_ssatgt {
+ struct vfio_info_cap_header header;
+ __u64 tgt;
+};
+
+/*
+ * Capability with an NVLink link speed. The value is read by
+ * the NVlink2 bridge driver from the bridge's "ibm,nvlink-speed"
+ * property in the device tree. The value is fixed in the hardware
+ * and failing to provide the correct value results in the link
+ * not working with no indication from the driver why.
+ */
+#define VFIO_REGION_INFO_CAP_NVLINK2_LNKSPD 5
+
+struct vfio_region_info_cap_nvlink2_lnkspd {
+ struct vfio_info_cap_header header;
+ __u32 link_speed;
+ __u32 __pad;
+};
+
/**
* VFIO_DEVICE_GET_IRQ_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 9,
* struct vfio_irq_info)
diff --git a/tools/testing/selftests/powerpc/tm/.gitignore b/tools/testing/selftests/powerpc/tm/.gitignore
index c3ee839..208452a 100644
--- a/tools/testing/selftests/powerpc/tm/.gitignore
+++ b/tools/testing/selftests/powerpc/tm/.gitignore
@@ -11,6 +11,7 @@
tm-signal-context-chk-gpr
tm-signal-context-chk-vmx
tm-signal-context-chk-vsx
+tm-signal-sigreturn-nt
tm-vmx-unavail
tm-unavailable
tm-trap
diff --git a/tools/testing/selftests/powerpc/tm/Makefile b/tools/testing/selftests/powerpc/tm/Makefile
index 9fc2cf6..75a6853 100644
--- a/tools/testing/selftests/powerpc/tm/Makefile
+++ b/tools/testing/selftests/powerpc/tm/Makefile
@@ -4,7 +4,7 @@
TEST_GEN_PROGS := tm-resched-dscr tm-syscall tm-signal-msr-resv tm-signal-stack \
tm-vmxcopy tm-fork tm-tar tm-tmspr tm-vmx-unavail tm-unavailable tm-trap \
- $(SIGNAL_CONTEXT_CHK_TESTS) tm-sigreturn
+ $(SIGNAL_CONTEXT_CHK_TESTS) tm-sigreturn tm-signal-sigreturn-nt
top_srcdir = ../../../../..
include ../../lib.mk
diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-sigreturn-nt.c b/tools/testing/selftests/powerpc/tm/tm-signal-sigreturn-nt.c
new file mode 100644
index 0000000..56fbf9f
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/tm-signal-sigreturn-nt.c
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright 2018, Breno Leitao, Gustavo Romero, IBM Corp.
+ *
+ * A test case that creates a signal and starts a suspended transaction
+ * inside the signal handler.
+ *
+ * It returns from the signal handler with the CPU at suspended state, but
+ * without setting usercontext MSR Transaction State (TS) fields.
+ */
+
+#define _GNU_SOURCE
+#include <stdlib.h>
+#include <signal.h>
+
+#include "utils.h"
+
+void trap_signal_handler(int signo, siginfo_t *si, void *uc)
+{
+ ucontext_t *ucp = (ucontext_t *) uc;
+
+ asm("tbegin.; tsuspend.;");
+
+ /* Skip 'trap' instruction if it succeed */
+ ucp->uc_mcontext.regs->nip += 4;
+}
+
+int tm_signal_sigreturn_nt(void)
+{
+ struct sigaction trap_sa;
+
+ trap_sa.sa_flags = SA_SIGINFO;
+ trap_sa.sa_sigaction = trap_signal_handler;
+
+ sigaction(SIGTRAP, &trap_sa, NULL);
+
+ raise(SIGTRAP);
+
+ return EXIT_SUCCESS;
+}
+
+int main(int argc, char **argv)
+{
+ test_harness(tm_signal_sigreturn_nt, "tm_signal_sigreturn_nt");
+}
+