ARC: add support for DSP-enabled userspace applications

To be able to run DSP-enabled userspace applications we need to
save and restore following DSP-related registers:
At IRQ/exception entry/exit:
 * DSP_CTRL (save it and reset to value suitable for kernel)
 * ACC0_LO, ACC0_HI (we already save them as r58, r59 pair)
At context switch:
 * ACC0_GLO, ACC0_GHI
 * DSP_BFLY0, DSP_FFT_CTRL

Reviewed-by: Vineet Gupta <vgupta@synopsys.com>
Signed-off-by: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index 55432a8..eb3bcb2 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -411,6 +411,9 @@
 config ARC_DSP_HANDLED
 	def_bool n
 
+config ARC_DSP_SAVE_RESTORE_REGS
+	def_bool n
+
 choice
 	prompt "DSP support"
 	default ARC_DSP_NONE
@@ -433,6 +436,15 @@
 	  DSP extension presence in HW, no support for DSP-enabled userspace
 	  applications. We don't save / restore DSP registers and only do
 	  some minimal preparations so userspace won't be able to break kernel
+
+config ARC_DSP_USERSPACE
+	bool "Support DSP for userspace apps"
+	select ARC_HAS_ACCL_REGS
+	select ARC_DSP_HANDLED
+	select ARC_DSP_SAVE_RESTORE_REGS
+	help
+	  DSP extension presence in HW, support save / restore DSP registers to
+	  run DSP-enabled userspace applications
 endchoice
 
 config ARC_IRQ_NO_AUTOSAVE
diff --git a/arch/arc/include/asm/arcregs.h b/arch/arc/include/asm/arcregs.h
index 135f6ec..aee1ee2 100644
--- a/arch/arc/include/asm/arcregs.h
+++ b/arch/arc/include/asm/arcregs.h
@@ -120,6 +120,8 @@
 
 /*
  * DSP-related registers
+ * Registers names must correspond to dsp_callee_regs structure fields names
+ * for automatic offset calculation in DSP_AUX_SAVE_RESTORE macros.
  */
 #define ARC_AUX_DSP_BUILD	0x7A
 #define ARC_AUX_ACC0_LO		0x580
diff --git a/arch/arc/include/asm/dsp-impl.h b/arch/arc/include/asm/dsp-impl.h
index 6066203..8380f7b 100644
--- a/arch/arc/include/asm/dsp-impl.h
+++ b/arch/arc/include/asm/dsp-impl.h
@@ -7,6 +7,8 @@
 #ifndef __ASM_ARC_DSP_IMPL_H
 #define __ASM_ARC_DSP_IMPL_H
 
+#include <asm/dsp.h>
+
 #define DSP_CTRL_DISABLED_ALL		0
 
 #ifdef __ASSEMBLY__
@@ -30,12 +32,82 @@
 	 */
 	mov	r10, DSP_CTRL_DISABLED_ALL
 	sr	r10, [ARC_AUX_DSP_CTRL]
-#endif /* ARC_DSP_KERNEL */
+
+#elif defined(CONFIG_ARC_DSP_SAVE_RESTORE_REGS)
+	/*
+	 * Save DSP_CTRL register and reset it to value suitable for kernel
+	 * (DSP_CTRL_DISABLED_ALL)
+	 */
+	mov	r10, DSP_CTRL_DISABLED_ALL
+	aex	r10, [ARC_AUX_DSP_CTRL]
+	st	r10, [sp, PT_DSP_CTRL]
+
+#endif
+.endm
+
+/* clobbers r10, r11 registers pair */
+.macro DSP_RESTORE_REGFILE_IRQ
+#if defined(CONFIG_ARC_DSP_SAVE_RESTORE_REGS)
+	ld	r10, [sp, PT_DSP_CTRL]
+	sr	r10, [ARC_AUX_DSP_CTRL]
+
+#endif
 .endm
 
 #else /* __ASEMBLY__ */
 
+#include <linux/sched.h>
 #include <asm/asserts.h>
+#include <asm/switch_to.h>
+
+#ifdef CONFIG_ARC_DSP_SAVE_RESTORE_REGS
+
+/*
+ * As we save new and restore old AUX register value in the same place we
+ * can optimize a bit and use AEX instruction (swap contents of an auxiliary
+ * register with a core register) instead of LR + SR pair.
+ */
+#define AUX_SAVE_RESTORE(_saveto, _readfrom, _offt, _aux)		\
+do {									\
+	long unsigned int _scratch;					\
+									\
+	__asm__ __volatile__(						\
+		"ld	%0, [%2, %4]			\n"		\
+		"aex	%0, [%3]			\n"		\
+		"st	%0, [%1, %4]			\n"		\
+		:							\
+		  "=&r" (_scratch)	/* must be early clobber */	\
+		:							\
+		   "r" (_saveto),					\
+		   "r" (_readfrom),					\
+		   "Ir" (_aux),						\
+		   "Ir" (_offt)						\
+		:							\
+		  "memory"						\
+	);								\
+} while (0)
+
+#define DSP_AUX_SAVE_RESTORE(_saveto, _readfrom, _aux)			\
+	AUX_SAVE_RESTORE(_saveto, _readfrom,				\
+		offsetof(struct dsp_callee_regs, _aux),			\
+		ARC_AUX_##_aux)
+
+static inline void dsp_save_restore(struct task_struct *prev,
+					struct task_struct *next)
+{
+	long unsigned int *saveto = &prev->thread.dsp.ACC0_GLO;
+	long unsigned int *readfrom = &next->thread.dsp.ACC0_GLO;
+
+	DSP_AUX_SAVE_RESTORE(saveto, readfrom, ACC0_GLO);
+	DSP_AUX_SAVE_RESTORE(saveto, readfrom, ACC0_GHI);
+
+	DSP_AUX_SAVE_RESTORE(saveto, readfrom, DSP_BFLY0);
+	DSP_AUX_SAVE_RESTORE(saveto, readfrom, DSP_FFT_CTRL);
+}
+
+#else /* !CONFIG_ARC_DSP_SAVE_RESTORE_REGS */
+#define dsp_save_restore(p, n)
+#endif /* CONFIG_ARC_DSP_SAVE_RESTORE_REGS */
 
 static inline bool dsp_exist(void)
 {
diff --git a/arch/arc/include/asm/dsp.h b/arch/arc/include/asm/dsp.h
new file mode 100644
index 0000000..b016f4d
--- /dev/null
+++ b/arch/arc/include/asm/dsp.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2020 Synopsys, Inc. (www.synopsys.com)
+ *
+ * Author: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
+ */
+#ifndef __ASM_ARC_DSP_H
+#define __ASM_ARC_DSP_H
+
+#ifndef __ASSEMBLY__
+
+/*
+ * DSP-related saved registers - need to be saved only when you are
+ * scheduled out.
+ * structure fields name must correspond to aux register defenitions for
+ * automatic offset calculation in DSP_AUX_SAVE_RESTORE macros
+ */
+struct dsp_callee_regs {
+	unsigned long ACC0_GLO, ACC0_GHI, DSP_BFLY0, DSP_FFT_CTRL;
+};
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __ASM_ARC_DSP_H */
diff --git a/arch/arc/include/asm/entry-arcv2.h b/arch/arc/include/asm/entry-arcv2.h
index dd6aa18..ae0aa53 100644
--- a/arch/arc/include/asm/entry-arcv2.h
+++ b/arch/arc/include/asm/entry-arcv2.h
@@ -192,6 +192,9 @@
 	ld	r25, [sp, PT_user_r25]
 #endif
 
+	/* clobbers r10, r11 registers pair */
+	DSP_RESTORE_REGFILE_IRQ
+
 #ifdef CONFIG_ARC_HAS_ACCL_REGS
 	LD2	r58, r59, PT_r58
 #endif
diff --git a/arch/arc/include/asm/processor.h b/arch/arc/include/asm/processor.h
index ec532d1..0fcea5b 100644
--- a/arch/arc/include/asm/processor.h
+++ b/arch/arc/include/asm/processor.h
@@ -14,6 +14,7 @@
 #ifndef __ASSEMBLY__
 
 #include <asm/ptrace.h>
+#include <asm/dsp.h>
 #include <asm/fpu.h>
 
 #ifdef CONFIG_ARC_PLAT_EZNPS
@@ -31,6 +32,9 @@ struct thread_struct {
 	unsigned long ksp;	/* kernel mode stack pointer */
 	unsigned long callee_reg;	/* pointer to callee regs */
 	unsigned long fault_address;	/* dbls as brkpt holder as well */
+#ifdef CONFIG_ARC_DSP_SAVE_RESTORE_REGS
+	struct dsp_callee_regs dsp;
+#endif
 #ifdef CONFIG_ARC_FPU_SAVE_RESTORE
 	struct arc_fpu fpu;
 #endif
diff --git a/arch/arc/include/asm/ptrace.h b/arch/arc/include/asm/ptrace.h
index ba9854e..2fdb87a 100644
--- a/arch/arc/include/asm/ptrace.h
+++ b/arch/arc/include/asm/ptrace.h
@@ -91,6 +91,9 @@ struct pt_regs {
 #ifdef CONFIG_ARC_HAS_ACCL_REGS
 	unsigned long r58, r59;	/* ACCL/ACCH used by FPU / DSP MPY */
 #endif
+#ifdef CONFIG_ARC_DSP_SAVE_RESTORE_REGS
+	unsigned long DSP_CTRL;
+#endif
 
 	/*------- Below list auto saved by h/w -----------*/
 	unsigned long r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11;
diff --git a/arch/arc/include/asm/switch_to.h b/arch/arc/include/asm/switch_to.h
index aadf65b..4a3d679 100644
--- a/arch/arc/include/asm/switch_to.h
+++ b/arch/arc/include/asm/switch_to.h
@@ -9,6 +9,7 @@
 #ifndef __ASSEMBLY__
 
 #include <linux/sched.h>
+#include <asm/dsp-impl.h>
 #include <asm/fpu.h>
 
 #ifdef CONFIG_ARC_PLAT_EZNPS
@@ -24,6 +25,7 @@ struct task_struct *__switch_to(struct task_struct *p, struct task_struct *n);
 #define switch_to(prev, next, last)	\
 do {					\
 	ARC_EZNPS_DP_PREV(prev, next);	\
+	dsp_save_restore(prev, next);	\
 	fpu_save_restore(prev, next);	\
 	last = __switch_to(prev, next);\
 	mb();				\
diff --git a/arch/arc/kernel/asm-offsets.c b/arch/arc/kernel/asm-offsets.c
index c783bcd..0e88403 100644
--- a/arch/arc/kernel/asm-offsets.c
+++ b/arch/arc/kernel/asm-offsets.c
@@ -12,6 +12,7 @@
 #include <asm/hardirq.h>
 #include <asm/page.h>
 
+
 int main(void)
 {
 	DEFINE(TASK_THREAD, offsetof(struct task_struct, thread));
@@ -75,6 +76,9 @@ int main(void)
 	OFFSET(PT_r58, pt_regs, r58);
 	OFFSET(PT_r59, pt_regs, r59);
 #endif
+#ifdef CONFIG_ARC_DSP_SAVE_RESTORE_REGS
+	OFFSET(PT_DSP_CTRL, pt_regs, DSP_CTRL);
+#endif
 
 	return 0;
 }