[S390] switch sched_clock to store-clock-extended.

Add get_clock_xt to read an 8 byte clock value using store clock
extended (STCKE) and use get_clock_xt for sched_clock. STCKE should
be faster than STCK on newer machines.

Signed-off-by: Jan Glauber <jan.glauber@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
diff --git a/include/asm-s390/timex.h b/include/asm-s390/timex.h
index 98229db..6dd7eec 100644
--- a/include/asm-s390/timex.h
+++ b/include/asm-s390/timex.h
@@ -62,16 +62,18 @@
 	return clk;
 }
 
-static inline void get_clock_extended(void *dest)
+static inline unsigned long long get_clock_xt(void)
 {
-	typedef struct { unsigned long long clk[2]; } __clock_t;
+	unsigned char clk[16];
 
 #if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ > 2)
-	asm volatile("stcke %0" : "=Q" (*((__clock_t *)dest)) : : "cc");
+	asm volatile("stcke %0" : "=Q" (clk) : : "cc");
 #else /* __GNUC__ */
-	asm volatile("stcke 0(%1)" : "=m" (*((__clock_t *)dest))
-				   : "a" ((__clock_t *)dest) : "cc");
+	asm volatile("stcke 0(%1)" : "=m" (clk)
+				   : "a" (clk) : "cc");
 #endif /* __GNUC__ */
+
+	return *((unsigned long long *)&clk[1]);
 }
 
 static inline cycles_t get_cycles(void)