s390/fpu: always enable the vector facility if it is available

If the kernel detects that the s390 hardware supports the vector
facility, it is enabled by default at an early stage.  To force
it off, use the novx kernel parameter.  Note that there is a small
time window, where the vector facility is enabled before it is
forced to be off.

With enabling the vector facility by default, the FPU save and
restore functions can be improved.  They do not longer require
to manage expensive control register updates to enable or disable
the vector enablement control for particular processes.

Signed-off-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Reviewed-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
diff --git a/arch/s390/include/asm/ctl_reg.h b/arch/s390/include/asm/ctl_reg.h
index 17a3735..d7697ab 100644
--- a/arch/s390/include/asm/ctl_reg.h
+++ b/arch/s390/include/asm/ctl_reg.h
@@ -46,8 +46,6 @@
 	__ctl_load(reg, cr, cr);
 }
 
-void __ctl_set_vx(void);
-
 void smp_ctl_set_bit(int cr, int bit);
 void smp_ctl_clear_bit(int cr, int bit);
 
diff --git a/arch/s390/include/asm/fpu-internal.h b/arch/s390/include/asm/fpu-internal.h
index 55dc2c0..cd79a33 100644
--- a/arch/s390/include/asm/fpu-internal.h
+++ b/arch/s390/include/asm/fpu-internal.h
@@ -8,10 +8,6 @@
 #ifndef _ASM_S390_FPU_INTERNAL_H
 #define _ASM_S390_FPU_INTERNAL_H
 
-#define FPU_USE_VX		1	/* Vector extension is active */
-
-#ifndef __ASSEMBLY__
-
 #include <linux/errno.h>
 #include <linux/string.h>
 #include <asm/linkage.h>
@@ -20,7 +16,6 @@
 
 struct fpu {
 	__u32 fpc;			/* Floating-point control */
-	__u32 flags;
 	union {
 		void *regs;
 		freg_t *fprs;		/* Floating-point register save area */
@@ -30,9 +25,6 @@
 
 void save_fpu_regs(void);
 
-#define is_vx_fpu(fpu) (!!((fpu)->flags & FPU_USE_VX))
-#define is_vx_task(tsk) (!!((tsk)->thread.fpu.flags & FPU_USE_VX))
-
 /* VX array structure for address operand constraints in inline assemblies */
 struct vx_array { __vector128 _[__NUM_VXRS]; };
 
@@ -89,7 +81,7 @@
 static inline void fpregs_store(_s390_fp_regs *fpregs, struct fpu *fpu)
 {
 	fpregs->pad = 0;
-	if (is_vx_fpu(fpu))
+	if (MACHINE_HAS_VX)
 		convert_vx_to_fp((freg_t *)&fpregs->fprs, fpu->vxrs);
 	else
 		memcpy((freg_t *)&fpregs->fprs, fpu->fprs,
@@ -98,13 +90,11 @@
 
 static inline void fpregs_load(_s390_fp_regs *fpregs, struct fpu *fpu)
 {
-	if (is_vx_fpu(fpu))
+	if (MACHINE_HAS_VX)
 		convert_fp_to_vx(fpu->vxrs, (freg_t *)&fpregs->fprs);
 	else
 		memcpy(fpu->fprs, (freg_t *)&fpregs->fprs,
 		       sizeof(fpregs->fprs));
 }
 
-#endif
-
 #endif /* _ASM_S390_FPU_INTERNAL_H */
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index 3aeeb1b..f77834a 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -29,7 +29,6 @@
 	BLANK();
 	DEFINE(__THREAD_ksp, offsetof(struct thread_struct, ksp));
 	DEFINE(__THREAD_FPU_fpc, offsetof(struct thread_struct, fpu.fpc));
-	DEFINE(__THREAD_FPU_flags, offsetof(struct thread_struct, fpu.flags));
 	DEFINE(__THREAD_FPU_regs, offsetof(struct thread_struct, fpu.regs));
 	DEFINE(__THREAD_per_cause, offsetof(struct thread_struct, per_event.cause));
 	DEFINE(__THREAD_per_address, offsetof(struct thread_struct, per_event.address));
diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c
index e0f9d27..66c9441 100644
--- a/arch/s390/kernel/compat_signal.c
+++ b/arch/s390/kernel/compat_signal.c
@@ -249,7 +249,7 @@
 		return -EFAULT;
 
 	/* Save vector registers to signal stack */
-	if (is_vx_task(current)) {
+	if (MACHINE_HAS_VX) {
 		for (i = 0; i < __NUM_VXRS_LOW; i++)
 			vxrs[i] = *((__u64 *)(current->thread.fpu.vxrs + i) + 1);
 		if (__copy_to_user(&sregs_ext->vxrs_low, vxrs,
@@ -277,7 +277,7 @@
 		*(__u32 *)&regs->gprs[i] = gprs_high[i];
 
 	/* Restore vector registers from signal stack */
-	if (is_vx_task(current)) {
+	if (MACHINE_HAS_VX) {
 		if (__copy_from_user(vxrs, &sregs_ext->vxrs_low,
 				     sizeof(sregs_ext->vxrs_low)) ||
 		    __copy_from_user(current->thread.fpu.vxrs + __NUM_VXRS_LOW,
@@ -470,8 +470,7 @@
 	 */
 	uc_flags = UC_GPRS_HIGH;
 	if (MACHINE_HAS_VX) {
-		if (is_vx_task(current))
-			uc_flags |= UC_VXRS;
+		uc_flags |= UC_VXRS;
 	} else
 		frame_size -= sizeof(frame->uc.uc_mcontext_ext.vxrs_low) +
 			      sizeof(frame->uc.uc_mcontext_ext.vxrs_high);
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index 311a2d6..3c31609 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -329,10 +329,20 @@
 		S390_lowcore.machine_flags |= MACHINE_FLAG_TE;
 	if (test_facility(51))
 		S390_lowcore.machine_flags |= MACHINE_FLAG_TLB_LC;
-	if (test_facility(129))
+	if (test_facility(129)) {
 		S390_lowcore.machine_flags |= MACHINE_FLAG_VX;
+		__ctl_set_bit(0, 17);
+	}
 }
 
+static int __init disable_vector_extension(char *str)
+{
+	S390_lowcore.machine_flags &= ~MACHINE_FLAG_VX;
+	__ctl_clear_bit(0, 17);
+	return 1;
+}
+early_param("novx", disable_vector_extension);
+
 static int __init cad_setup(char *str)
 {
 	int val;
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 582fe44..b78babf 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -20,7 +20,6 @@
 #include <asm/page.h>
 #include <asm/sigp.h>
 #include <asm/irq.h>
-#include <asm/fpu-internal.h>
 #include <asm/vx-insn.h>
 
 __PT_R0      =	__PT_GPRS
@@ -748,15 +747,12 @@
 	br	%r14
 .Lpsw_idle_end:
 
-/* Store floating-point controls and floating-point or vector extension
- * registers instead.  A critical section cleanup assures that the registers
- * are stored even if interrupted for some other work.	The register %r2
- * designates a struct fpu to store register contents.	If the specified
- * structure does not contain a register save area, the register store is
- * omitted (see also comments in arch_dup_task_struct()).
- *
- * The CIF_FPU flag is set in any case.  The CIF_FPU triggers a lazy restore
- * of the register contents at system call or io return.
+/*
+ * Store floating-point controls and floating-point or vector register
+ * depending whether the vector facility is available.	A critical section
+ * cleanup assures that the registers are stored even if interrupted for
+ * some other work.  The CIF_FPU flag is set to trigger a lazy restore
+ * of the register contents at return from io or a system call.
  */
 ENTRY(save_fpu_regs)
 	lg	%r2,__LC_CURRENT
@@ -768,7 +764,7 @@
 	lg	%r3,__THREAD_FPU_regs(%r2)
 	ltgr	%r3,%r3
 	jz	.Lsave_fpu_regs_done	  # no save area -> set CIF_FPU
-	tm	__THREAD_FPU_flags+3(%r2),FPU_USE_VX
+	tm	__LC_MACHINE_FLAGS+5,4	  # MACHINE_HAS_VX
 	jz	.Lsave_fpu_regs_fp	  # no -> store FP regs
 .Lsave_fpu_regs_vx_low:
 	VSTM	%v0,%v15,0,%r3		  # vstm 0,15,0(3)
@@ -797,15 +793,15 @@
 	br	%r14
 .Lsave_fpu_regs_end:
 
-/* Load floating-point controls and floating-point or vector extension
- * registers.  A critical section cleanup assures that the register contents
- * are loaded even if interrupted for some other work.	Depending on the saved
- * FP/VX state, the vector-enablement control, CR0.46, is either set or cleared.
+/*
+ * Load floating-point controls and floating-point or vector registers.
+ * A critical section cleanup assures that the register contents are
+ * loaded even if interrupted for some other work.
  *
  * There are special calling conventions to fit into sysc and io return work:
  *	%r15:	<kernel stack>
  * The function requires:
- *	%r4 and __SF_EMPTY+32(%r15)
+ *	%r4
  */
 load_fpu_regs:
 	lg	%r4,__LC_CURRENT
@@ -813,25 +809,14 @@
 	tm	__LC_CPU_FLAGS+7,_CIF_FPU
 	bnor	%r14
 	lfpc	__THREAD_FPU_fpc(%r4)
-	stctg	%c0,%c0,__SF_EMPTY+32(%r15)	# store CR0
-	tm	__THREAD_FPU_flags+3(%r4),FPU_USE_VX	# VX-enabled task ?
+	tm	__LC_MACHINE_FLAGS+5,4		# MACHINE_HAS_VX
 	lg	%r4,__THREAD_FPU_regs(%r4)	# %r4 <- reg save area
-	jz	.Lload_fpu_regs_fp_ctl		# -> no VX, load FP regs
-.Lload_fpu_regs_vx_ctl:
-	tm	__SF_EMPTY+32+5(%r15),2		# test VX control
-	jo	.Lload_fpu_regs_vx
-	oi	__SF_EMPTY+32+5(%r15),2		# set VX control
-	lctlg	%c0,%c0,__SF_EMPTY+32(%r15)
+	jz	.Lload_fpu_regs_fp		# -> no VX, load FP regs
 .Lload_fpu_regs_vx:
 	VLM	%v0,%v15,0,%r4
 .Lload_fpu_regs_vx_high:
 	VLM	%v16,%v31,256,%r4
 	j	.Lload_fpu_regs_done
-.Lload_fpu_regs_fp_ctl:
-	tm	__SF_EMPTY+32+5(%r15),2		# test VX control
-	jz	.Lload_fpu_regs_fp
-	ni	__SF_EMPTY+32+5(%r15),253	# clear VX control
-	lctlg	%c0,%c0,__SF_EMPTY+32(%r15)
 .Lload_fpu_regs_fp:
 	ld	0,0(%r4)
 	ld	1,8(%r4)
@@ -854,16 +839,6 @@
 	br	%r14
 .Lload_fpu_regs_end:
 
-/* Test and set the vector enablement control in CR0.46 */
-ENTRY(__ctl_set_vx)
-	stctg	%c0,%c0,__SF_EMPTY(%r15)
-	tm	__SF_EMPTY+5(%r15),2
-	bor	%r14
-	oi	__SF_EMPTY+5(%r15),2
-	lctlg	%c0,%c0,__SF_EMPTY(%r15)
-	br	%r14
-.L__ctl_set_vx_end:
-
 .L__critical_end:
 
 /*
@@ -1019,10 +994,6 @@
 	jl	0f
 	clg	%r9,BASED(.Lcleanup_table+104)	# .Lload_fpu_regs_end
 	jl	.Lcleanup_load_fpu_regs
-	clg	%r9,BASED(.Lcleanup_table+112)	# __ctl_set_vx
-	jl	0f
-	clg	%r9,BASED(.Lcleanup_table+120)	# .L__ctl_set_vx_end
-	jl	.Lcleanup___ctl_set_vx
 0:	br	%r14
 
 	.align	8
@@ -1041,8 +1012,6 @@
 	.quad	.Lsave_fpu_regs_end
 	.quad	load_fpu_regs
 	.quad	.Lload_fpu_regs_end
-	.quad	__ctl_set_vx
-	.quad	.L__ctl_set_vx_end
 
 #if IS_ENABLED(CONFIG_KVM)
 .Lcleanup_table_sie:
@@ -1226,7 +1195,7 @@
 	lg	%r3,__THREAD_FPU_regs(%r2)
 	ltgr	%r3,%r3
 	jz	5f			  # no save area -> set CIF_FPU
-	tm	__THREAD_FPU_flags+3(%r2),FPU_USE_VX
+	tm	__LC_MACHINE_FLAGS+5,4	  # MACHINE_HAS_VX
 	jz	4f			  # no VX -> store FP regs
 2:	# Store vector registers (V0-V15)
 	VSTM	%v0,%v15,0,%r3		  # vstm 0,15,0(3)
@@ -1272,37 +1241,21 @@
 	jhe	1f
 	clg	%r9,BASED(.Lcleanup_load_fpu_regs_fp)
 	jhe	2f
-	clg	%r9,BASED(.Lcleanup_load_fpu_regs_fp_ctl)
-	jhe	3f
 	clg	%r9,BASED(.Lcleanup_load_fpu_regs_vx_high)
-	jhe	4f
+	jhe	3f
 	clg	%r9,BASED(.Lcleanup_load_fpu_regs_vx)
-	jhe	5f
-	clg	%r9,BASED(.Lcleanup_load_fpu_regs_vx_ctl)
-	jhe	6f
+	jhe	4f
 	lg	%r4,__LC_CURRENT
 	aghi	%r4,__TASK_thread
 	lfpc	__THREAD_FPU_fpc(%r4)
-	tm	__THREAD_FPU_flags+3(%r4),FPU_USE_VX	# VX-enabled task ?
+	tm	__LC_MACHINE_FLAGS+5,4		# MACHINE_HAS_VX
 	lg	%r4,__THREAD_FPU_regs(%r4)	# %r4 <- reg save area
-	jz	3f				# -> no VX, load FP regs
-6:	# Set VX-enablement control
-	stctg	%c0,%c0,__SF_EMPTY+32(%r15)	# store CR0
-	tm	__SF_EMPTY+32+5(%r15),2		# test VX control
-	jo	5f
-	oi	__SF_EMPTY+32+5(%r15),2		# set VX control
-	lctlg	%c0,%c0,__SF_EMPTY+32(%r15)
-5:	# Load V0 ..V15 registers
+	jz	2f				# -> no VX, load FP regs
+4:	# Load V0 ..V15 registers
 	VLM	%v0,%v15,0,%r4
-4:	# Load V16..V31 registers
+3:	# Load V16..V31 registers
 	VLM	%v16,%v31,256,%r4
 	j	1f
-3:	# Clear VX-enablement control for FP
-	stctg	%c0,%c0,__SF_EMPTY+32(%r15)	# store CR0
-	tm	__SF_EMPTY+32+5(%r15),2		# test VX control
-	jz	2f
-	ni	__SF_EMPTY+32+5(%r15),253	# clear VX control
-	lctlg	%c0,%c0,__SF_EMPTY+32(%r15)
 2:	# Load floating-point registers
 	ld	0,0(%r4)
 	ld	1,8(%r4)
@@ -1324,28 +1277,15 @@
 	ni	__LC_CPU_FLAGS+7,255-_CIF_FPU
 	lg	%r9,48(%r11)		# return from load_fpu_regs
 	br	%r14
-.Lcleanup_load_fpu_regs_vx_ctl:
-	.quad	.Lload_fpu_regs_vx_ctl
 .Lcleanup_load_fpu_regs_vx:
 	.quad	.Lload_fpu_regs_vx
 .Lcleanup_load_fpu_regs_vx_high:
 	.quad	.Lload_fpu_regs_vx_high
-.Lcleanup_load_fpu_regs_fp_ctl:
-	.quad	.Lload_fpu_regs_fp_ctl
 .Lcleanup_load_fpu_regs_fp:
 	.quad	.Lload_fpu_regs_fp
 .Lcleanup_load_fpu_regs_done:
 	.quad	.Lload_fpu_regs_done
 
-.Lcleanup___ctl_set_vx:
-	stctg	%c0,%c0,__SF_EMPTY(%r15)
-	tm	__SF_EMPTY+5(%r15),2
-	bor	%r14
-	oi	__SF_EMPTY+5(%r15),2
-	lctlg	%c0,%c0,__SF_EMPTY(%r15)
-	lg	%r9,48(%r11)		# return from __ctl_set_vx
-	br	%r14
-
 /*
  * Integer constants
  */
diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h
index 834df04..3eecd0f 100644
--- a/arch/s390/kernel/entry.h
+++ b/arch/s390/kernel/entry.h
@@ -21,8 +21,6 @@
 asmlinkage long do_syscall_trace_enter(struct pt_regs *regs);
 asmlinkage void do_syscall_trace_exit(struct pt_regs *regs);
 
-int alloc_vector_registers(struct task_struct *tsk);
-
 void do_protection_exception(struct pt_regs *regs);
 void do_dat_exception(struct pt_regs *regs);
 
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index f2dac9f..b637871 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -87,31 +87,29 @@
 
 int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
 {
+	size_t fpu_regs_size;
+
 	*dst = *src;
 
-	/* Set up a new floating-point register save area */
-	dst->thread.fpu.fpc = 0;
-	dst->thread.fpu.flags = 0;	/* Always start with VX disabled */
-	dst->thread.fpu.fprs = kzalloc(sizeof(freg_t) * __NUM_FPRS,
-				       GFP_KERNEL|__GFP_REPEAT);
-	if (!dst->thread.fpu.fprs)
+	/*
+	 * If the vector extension is available, it is enabled for all tasks,
+	 * and, thus, the FPU register save area must be allocated accordingly.
+	 */
+	fpu_regs_size = MACHINE_HAS_VX ? sizeof(__vector128) * __NUM_VXRS
+				       : sizeof(freg_t) * __NUM_FPRS;
+	dst->thread.fpu.regs = kzalloc(fpu_regs_size, GFP_KERNEL|__GFP_REPEAT);
+	if (!dst->thread.fpu.regs)
 		return -ENOMEM;
 
 	/*
 	 * Save the floating-point or vector register state of the current
-	 * task.  The state is not saved for early kernel threads, for example,
-	 * the init_task, which do not have an allocated save area.
-	 * The CIF_FPU flag is set in any case to lazy clear or restore a saved
-	 * state when switching to a different task or returning to user space.
+	 * task and set the CIF_FPU flag to lazy restore the FPU register
+	 * state when returning to user space.
 	 */
 	save_fpu_regs();
 	dst->thread.fpu.fpc = current->thread.fpu.fpc;
-	if (is_vx_task(current))
-		convert_vx_to_fp(dst->thread.fpu.fprs,
-				 current->thread.fpu.vxrs);
-	else
-		memcpy(dst->thread.fpu.fprs, current->thread.fpu.fprs,
-		       sizeof(freg_t) * __NUM_FPRS);
+	memcpy(dst->thread.fpu.regs, current->thread.fpu.regs, fpu_regs_size);
+
 	return 0;
 }
 
@@ -199,7 +197,7 @@
 	save_fpu_regs();
 	fpregs->fpc = current->thread.fpu.fpc;
 	fpregs->pad = 0;
-	if (is_vx_task(current))
+	if (MACHINE_HAS_VX)
 		convert_vx_to_fp((freg_t *)&fpregs->fprs,
 				 current->thread.fpu.vxrs);
 	else
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index 8b1c8e3..3ccd900 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -239,7 +239,7 @@
 		 * or the child->thread.fpu.vxrs array
 		 */
 		offset = addr - (addr_t) &dummy->regs.fp_regs.fprs;
-		if (is_vx_task(child))
+		if (MACHINE_HAS_VX)
 			tmp = *(addr_t *)
 			       ((addr_t) child->thread.fpu.vxrs + 2*offset);
 		else
@@ -383,7 +383,7 @@
 		 * or the child->thread.fpu.vxrs array
 		 */
 		offset = addr - (addr_t) &dummy->regs.fp_regs.fprs;
-		if (is_vx_task(child))
+		if (MACHINE_HAS_VX)
 			*(addr_t *)((addr_t)
 				child->thread.fpu.vxrs + 2*offset) = data;
 		else
@@ -617,7 +617,7 @@
 		 * or the child->thread.fpu.vxrs array
 		 */
 		offset = addr - (addr_t) &dummy32->regs.fp_regs.fprs;
-		if (is_vx_task(child))
+		if (MACHINE_HAS_VX)
 			tmp = *(__u32 *)
 			       ((addr_t) child->thread.fpu.vxrs + 2*offset);
 		else
@@ -742,7 +742,7 @@
 		 * or the child->thread.fpu.vxrs array
 		 */
 		offset = addr - (addr_t) &dummy32->regs.fp_regs.fprs;
-		if (is_vx_task(child))
+		if (MACHINE_HAS_VX)
 			*(__u32 *)((addr_t)
 				child->thread.fpu.vxrs + 2*offset) = tmp;
 		else
@@ -981,7 +981,7 @@
 	if (rc)
 		return rc;
 
-	if (is_vx_task(target))
+	if (MACHINE_HAS_VX)
 		convert_fp_to_vx(target->thread.fpu.vxrs, fprs);
 	else
 		memcpy(target->thread.fpu.fprs, &fprs, sizeof(fprs));
@@ -1047,13 +1047,10 @@
 
 	if (!MACHINE_HAS_VX)
 		return -ENODEV;
-	if (is_vx_task(target)) {
-		if (target == current)
-			save_fpu_regs();
-		for (i = 0; i < __NUM_VXRS_LOW; i++)
-			vxrs[i] = *((__u64 *)(target->thread.fpu.vxrs + i) + 1);
-	} else
-		memset(vxrs, 0, sizeof(vxrs));
+	if (target == current)
+		save_fpu_regs();
+	for (i = 0; i < __NUM_VXRS_LOW; i++)
+		vxrs[i] = *((__u64 *)(target->thread.fpu.vxrs + i) + 1);
 	return user_regset_copyout(&pos, &count, &kbuf, &ubuf, vxrs, 0, -1);
 }
 
@@ -1067,11 +1064,7 @@
 
 	if (!MACHINE_HAS_VX)
 		return -ENODEV;
-	if (!is_vx_task(target)) {
-		rc = alloc_vector_registers(target);
-		if (rc)
-			return rc;
-	} else if (target == current)
+	if (target == current)
 		save_fpu_regs();
 
 	rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, vxrs, 0, -1);
@@ -1091,13 +1084,10 @@
 
 	if (!MACHINE_HAS_VX)
 		return -ENODEV;
-	if (is_vx_task(target)) {
-		if (target == current)
-			save_fpu_regs();
-		memcpy(vxrs, target->thread.fpu.vxrs + __NUM_VXRS_LOW,
-		       sizeof(vxrs));
-	} else
-		memset(vxrs, 0, sizeof(vxrs));
+	if (target == current)
+		save_fpu_regs();
+	memcpy(vxrs, target->thread.fpu.vxrs + __NUM_VXRS_LOW, sizeof(vxrs));
+
 	return user_regset_copyout(&pos, &count, &kbuf, &ubuf, vxrs, 0, -1);
 }
 
@@ -1110,11 +1100,7 @@
 
 	if (!MACHINE_HAS_VX)
 		return -ENODEV;
-	if (!is_vx_task(target)) {
-		rc = alloc_vector_registers(target);
-		if (rc)
-			return rc;
-	} else if (target == current)
+	if (target == current)
 		save_fpu_regs();
 
 	rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
diff --git a/arch/s390/kernel/s390_ksyms.c b/arch/s390/kernel/s390_ksyms.c
index 5090d3d..d9325b7 100644
--- a/arch/s390/kernel/s390_ksyms.c
+++ b/arch/s390/kernel/s390_ksyms.c
@@ -10,7 +10,6 @@
 EXPORT_SYMBOL(sie64a);
 EXPORT_SYMBOL(sie_exit);
 EXPORT_SYMBOL(save_fpu_regs);
-EXPORT_SYMBOL(__ctl_set_vx);
 #endif
 EXPORT_SYMBOL(memcpy);
 EXPORT_SYMBOL(memset);
diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c
index 9549af1..028cc46 100644
--- a/arch/s390/kernel/signal.c
+++ b/arch/s390/kernel/signal.c
@@ -179,7 +179,7 @@
 	int i;
 
 	/* Save vector registers to signal stack */
-	if (is_vx_task(current)) {
+	if (MACHINE_HAS_VX) {
 		for (i = 0; i < __NUM_VXRS_LOW; i++)
 			vxrs[i] = *((__u64 *)(current->thread.fpu.vxrs + i) + 1);
 		if (__copy_to_user(&sregs_ext->vxrs_low, vxrs,
@@ -199,7 +199,7 @@
 	int i;
 
 	/* Restore vector registers from signal stack */
-	if (is_vx_task(current)) {
+	if (MACHINE_HAS_VX) {
 		if (__copy_from_user(vxrs, &sregs_ext->vxrs_low,
 				     sizeof(sregs_ext->vxrs_low)) ||
 		    __copy_from_user(current->thread.fpu.vxrs + __NUM_VXRS_LOW,
@@ -381,8 +381,7 @@
 	uc_flags = 0;
 	if (MACHINE_HAS_VX) {
 		frame_size += sizeof(_sigregs_ext);
-		if (is_vx_task(current))
-			uc_flags |= UC_VXRS;
+		uc_flags |= UC_VXRS;
 	}
 	frame = get_sigframe(&ksig->ka, regs, frame_size);
 	if (frame == (void __user *) -1UL)
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
index 9861613..d9c4531 100644
--- a/arch/s390/kernel/traps.c
+++ b/arch/s390/kernel/traps.c
@@ -224,29 +224,6 @@
 DO_ERROR_INFO(specification_exception, SIGILL, ILL_ILLOPN,
 	      "specification exception");
 
-int alloc_vector_registers(struct task_struct *tsk)
-{
-	__vector128 *vxrs;
-	freg_t *fprs;
-
-	/* Allocate vector register save area. */
-	vxrs = kzalloc(sizeof(__vector128) * __NUM_VXRS,
-		       GFP_KERNEL|__GFP_REPEAT);
-	if (!vxrs)
-		return -ENOMEM;
-	preempt_disable();
-	if (tsk == current)
-		save_fpu_regs();
-	/* Copy the 16 floating point registers */
-	convert_fp_to_vx(vxrs, tsk->thread.fpu.fprs);
-	fprs = tsk->thread.fpu.fprs;
-	tsk->thread.fpu.vxrs = vxrs;
-	tsk->thread.fpu.flags |= FPU_USE_VX;
-	kfree(fprs);
-	preempt_enable();
-	return 0;
-}
-
 void vector_exception(struct pt_regs *regs)
 {
 	int si_code, vic;
@@ -281,13 +258,6 @@
 	do_trap(regs, SIGFPE, si_code, "vector exception");
 }
 
-static int __init disable_vector_extension(char *str)
-{
-	S390_lowcore.machine_flags &= ~MACHINE_FLAG_VX;
-	return 1;
-}
-__setup("novx", disable_vector_extension);
-
 void data_exception(struct pt_regs *regs)
 {
 	__u16 __user *location;
@@ -296,15 +266,6 @@
 	location = get_trap_ip(regs);
 
 	save_fpu_regs();
-	/* Check for vector register enablement */
-	if (MACHINE_HAS_VX && !is_vx_task(current) &&
-	    (current->thread.fpu.fpc & FPC_DXC_MASK) == 0xfe00) {
-		alloc_vector_registers(current);
-		/* Vector data exception is suppressing, rewind psw. */
-		regs->psw.addr = __rewind_psw(regs->psw, regs->int_code >> 16);
-		clear_pt_regs_flag(regs, PIF_PER_TRAP);
-		return;
-	}
 	if (current->thread.fpu.fpc & FPC_DXC_MASK)
 		signal = SIGFPE;
 	else
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 0a67c40..c6b4063 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -1292,7 +1292,6 @@
 static inline void save_fpu_to(struct fpu *dst)
 {
 	dst->fpc = current->thread.fpu.fpc;
-	dst->flags = current->thread.fpu.flags;
 	dst->regs = current->thread.fpu.regs;
 }
 
@@ -1303,7 +1302,6 @@
 static inline void load_fpu_from(struct fpu *from)
 {
 	current->thread.fpu.fpc = from->fpc;
-	current->thread.fpu.flags = from->flags;
 	current->thread.fpu.regs = from->regs;
 }
 
@@ -1315,15 +1313,12 @@
 
 	if (test_kvm_facility(vcpu->kvm, 129)) {
 		current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
-		current->thread.fpu.flags = FPU_USE_VX;
 		/*
 		 * Use the register save area in the SIE-control block
 		 * for register restore and save in kvm_arch_vcpu_put()
 		 */
 		current->thread.fpu.vxrs =
 			(__vector128 *)&vcpu->run->s.regs.vrs;
-		/* Always enable the vector extension for KVM */
-		__ctl_set_vx();
 	} else
 		load_fpu_from(&vcpu->arch.guest_fpregs);
 
@@ -2326,7 +2321,6 @@
 		 * registers and the FPC value and store them in the
 		 * guest_fpregs structure.
 		 */
-		WARN_ON(!is_vx_task(current));	  /* XXX remove later */
 		vcpu->arch.guest_fpregs.fpc = current->thread.fpu.fpc;
 		convert_vx_to_fp(vcpu->arch.guest_fpregs.fprs,
 				 current->thread.fpu.vxrs);