sh: Move over to dynamically allocated FPU context.

This follows the x86 xstate changes and implements a task_xstate slab
cache that is dynamically sized to match one of hard FP/soft FP/FPU-less.

This also tidies up and consolidates some of the SH-2A/SH-4 FPU
fragmentation. Now fpu state restorers are commonly defined, with the
init_fpu()/fpu_init() mess reworked to follow the x86 convention.
The fpu_init() register initialization has been replaced by xstate setup
followed by writing out to hardware via the standard restore path.

As init_fpu() now performs a slab allocation a secondary lighterweight
restorer is also introduced for the context switch.

In the future the DSP state will be rolled in here, too.

More work remains for math emulation and the SH-5 FPU, which presently
uses its own special (UP-only) interfaces.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
diff --git a/arch/sh/kernel/ptrace_32.c b/arch/sh/kernel/ptrace_32.c
index 9be35f3..be9b5dc 100644
--- a/arch/sh/kernel/ptrace_32.c
+++ b/arch/sh/kernel/ptrace_32.c
@@ -163,10 +163,10 @@
 
 	if ((boot_cpu_data.flags & CPU_HAS_FPU))
 		return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
-					   &target->thread.fpu.hard, 0, -1);
+					   &target->thread.xstate->hardfpu, 0, -1);
 
 	return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
-				   &target->thread.fpu.soft, 0, -1);
+				   &target->thread.xstate->softfpu, 0, -1);
 }
 
 static int fpregs_set(struct task_struct *target,
@@ -184,10 +184,10 @@
 
 	if ((boot_cpu_data.flags & CPU_HAS_FPU))
 		return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
-					  &target->thread.fpu.hard, 0, -1);
+					  &target->thread.xstate->hardfpu, 0, -1);
 
 	return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
-				  &target->thread.fpu.soft, 0, -1);
+				  &target->thread.xstate->softfpu, 0, -1);
 }
 
 static int fpregs_active(struct task_struct *target,
@@ -333,7 +333,7 @@
 				else
 					tmp = 0;
 			} else
-				tmp = ((long *)&child->thread.fpu)
+				tmp = ((long *)child->thread.xstate)
 					[(addr - (long)&dummy->fpu) >> 2];
 		} else if (addr == (long) &dummy->u_fpvalid)
 			tmp = !!tsk_used_math(child);
@@ -362,7 +362,7 @@
 		else if (addr >= (long) &dummy->fpu &&
 			 addr < (long) &dummy->u_fpvalid) {
 			set_stopped_child_used_math(child);
-			((long *)&child->thread.fpu)
+			((long *)child->thread.xstate)
 				[(addr - (long)&dummy->fpu) >> 2] = data;
 			ret = 0;
 		} else if (addr == (long) &dummy->u_fpvalid) {