Merge branch 'for-linus' of git://ftp.arm.linux.org.uk/~rmk/linux-arm

Pull ARM updates from Russell King:
 "Included in these updates are:
   - Performance optimisation to avoid writing the control register at
     every exception.
   - Use static inline instead of extern inline in ftrace code.
   - Crypto ARM assembly updates for big endian
   - Alignment of initrd/.init memory to page sizes when freeing to
     ensure that we fully free the regions
   - Add gcov support
   - A couple of preparatory patches for VDSO support: use
     _install_special_mapping, and randomize the sigpage placement above
     stack.
   - Add L2 ePAPR DT cache properties so that DT can specify the cache
     geometry.
   - Preparatory patch for FIQ (NMI) kernel C code for things like
     spinlock lockup debug.  Following on from this are a couple of my
     patches cleaning up show_regs() and removing an unused (probably
     since 1.x days) do_unexp_fiq() function.
   - Use pr_warn() rather than pr_warning().
   - A number of cleanups (smp, footbridge, return_address)"

* 'for-linus' of git://ftp.arm.linux.org.uk/~rmk/linux-arm: (21 commits)
  ARM: 8167/1: extend the reserved memory for initrd to be page aligned
  ARM: 8168/1: extend __init_end to a page align address
  ARM: 8169/1: l2c: parse cache properties from ePAPR definitions
  ARM: 8160/1: drop warning about return_address not using unwind tables
  ARM: 8161/1: footbridge: select machine dir based on ARCH_FOOTBRIDGE
  ARM: 8158/1: LLVMLinux: use static inline in ARM ftrace.h
  ARM: 8155/1: place sigpage at a random offset above stack
  ARM: 8154/1: use _install_special_mapping for sigpage
  ARM: 8153/1: Enable gcov support on the ARM architecture
  ARM: Avoid writing to control register on every exception
  ARM: 8152/1: Convert pr_warning to pr_warn
  ARM: remove unused do_unexp_fiq() function
  ARM: remove extraneous newline in show_regs()
  ARM: 8150/3: fiq: Replace default FIQ handler
  ARM: 8140/1: ep93xx: Enable DEBUG_LL_UART_PL01X
  ARM: 8139/1: versatile: Enable DEBUG_LL_UART_PL01X
  ARM: 8138/1: drop ISAR0 workaround for B15
  ARM: 8136/1: sa1100: add Micro ASIC platform device
  ARM: 8131/1: arm/smp: Absorb boot_secondary()
  ARM: 8126/1: crypto: enable NEON SHA-384/SHA-512 for big endian
  ...
diff --git a/Documentation/devicetree/bindings/arm/l2cc.txt b/Documentation/devicetree/bindings/arm/l2cc.txt
index af527ee..292ef7c 100644
--- a/Documentation/devicetree/bindings/arm/l2cc.txt
+++ b/Documentation/devicetree/bindings/arm/l2cc.txt
@@ -2,6 +2,10 @@
 
 ARM cores often have a separate level 2 cache controller. There are various
 implementations of the L2 cache controller with compatible programming models.
+Some of the properties that are just prefixed "cache-*" are taken from section
+3.7.3 of the ePAPR v1.1 specification which can be found at:
+https://www.power.org/wp-content/uploads/2012/06/Power_ePAPR_APPROVED_v1.1.pdf
+
 The ARM L2 cache representation in the device tree should be done as follows:
 
 Required properties:
@@ -44,6 +48,12 @@
   I/O coherent mode. Valid only when the arm,pl310-cache compatible
   string is used.
 - interrupts : 1 combined interrupt.
+- cache-size : specifies the size in bytes of the cache
+- cache-sets : specifies the number of associativity sets of the cache
+- cache-block-size : specifies the size in bytes of a cache block
+- cache-line-size : specifies the size in bytes of a line in the cache,
+  if this is not specified, the line size is assumed to be equal to the
+  cache block size
 - cache-id-part: cache id part number to be used if it is not present
   on hardware
 - wt-override: If present then L2 is forced to Write through mode
diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index 0ce9d0f..12bfc1f 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -157,6 +157,7 @@
 machine-$(CONFIG_ARCH_EFM32)		+= efm32
 machine-$(CONFIG_ARCH_EP93XX)		+= ep93xx
 machine-$(CONFIG_ARCH_EXYNOS)		+= exynos
+machine-$(CONFIG_ARCH_FOOTBRIDGE)	+= footbridge
 machine-$(CONFIG_ARCH_GEMINI)		+= gemini
 machine-$(CONFIG_ARCH_HIGHBANK)		+= highbank
 machine-$(CONFIG_ARCH_HISI)		+= hisi
@@ -205,7 +206,6 @@
 machine-$(CONFIG_ARCH_VT8500)		+= vt8500
 machine-$(CONFIG_ARCH_W90X900)		+= w90x900
 machine-$(CONFIG_ARCH_ZYNQ)		+= zynq
-machine-$(CONFIG_FOOTBRIDGE)		+= footbridge
 machine-$(CONFIG_PLAT_SPEAR)		+= spear
 
 # Platform directory name.  This list is sorted alphanumerically
diff --git a/arch/arm/boot/bootp/Makefile b/arch/arm/boot/bootp/Makefile
index c394e30..5761f00 100644
--- a/arch/arm/boot/bootp/Makefile
+++ b/arch/arm/boot/bootp/Makefile
@@ -5,6 +5,8 @@
 # architecture-specific flags and dependencies.
 #
 
+GCOV_PROFILE	:= n
+
 LDFLAGS_bootp	:=-p --no-undefined -X \
 		 --defsym initrd_phys=$(INITRD_PHYS) \
 		 --defsym params_phys=$(PARAMS_PHYS) -T
diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile
index 76a50ec..3ea230a 100644
--- a/arch/arm/boot/compressed/Makefile
+++ b/arch/arm/boot/compressed/Makefile
@@ -37,6 +37,8 @@
 OBJS		+= hyp-stub.o
 endif
 
+GCOV_PROFILE		:= n
+
 #
 # Architecture dependencies
 #
diff --git a/arch/arm/configs/ep93xx_defconfig b/arch/arm/configs/ep93xx_defconfig
index 1b650c8..72233b9 100644
--- a/arch/arm/configs/ep93xx_defconfig
+++ b/arch/arm/configs/ep93xx_defconfig
@@ -107,5 +107,6 @@
 CONFIG_DEBUG_MUTEXES=y
 CONFIG_DEBUG_USER=y
 CONFIG_DEBUG_LL=y
+CONFIG_DEBUG_LL_UART_PL01X=y
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 CONFIG_LIBCRC32C=y
diff --git a/arch/arm/configs/versatile_defconfig b/arch/arm/configs/versatile_defconfig
index d52b4ff..ea49d37 100644
--- a/arch/arm/configs/versatile_defconfig
+++ b/arch/arm/configs/versatile_defconfig
@@ -82,5 +82,6 @@
 CONFIG_DEBUG_KERNEL=y
 CONFIG_DEBUG_USER=y
 CONFIG_DEBUG_LL=y
+CONFIG_DEBUG_LL_UART_PL01X=y
 CONFIG_FONTS=y
 CONFIG_FONT_ACORN_8x8=y
diff --git a/arch/arm/crypto/sha1-armv7-neon.S b/arch/arm/crypto/sha1-armv7-neon.S
index 50013c0..dcd01f3 100644
--- a/arch/arm/crypto/sha1-armv7-neon.S
+++ b/arch/arm/crypto/sha1-armv7-neon.S
@@ -9,7 +9,7 @@
  */
 
 #include <linux/linkage.h>
-
+#include <asm/assembler.h>
 
 .syntax unified
 .code   32
@@ -61,13 +61,13 @@
 #define RT3 r12
 
 #define W0 q0
-#define W1 q1
+#define W1 q7
 #define W2 q2
 #define W3 q3
 #define W4 q4
-#define W5 q5
-#define W6 q6
-#define W7 q7
+#define W5 q6
+#define W6 q5
+#define W7 q1
 
 #define tmp0 q8
 #define tmp1 q9
@@ -79,6 +79,11 @@
 #define qK3 q14
 #define qK4 q15
 
+#ifdef CONFIG_CPU_BIG_ENDIAN
+#define ARM_LE(code...)
+#else
+#define ARM_LE(code...)		code
+#endif
 
 /* Round function macros. */
 
@@ -150,45 +155,45 @@
 #define W_PRECALC_00_15() \
 	add       RWK, sp, #(WK_offs(0));			\
 	\
-	vld1.32   {tmp0, tmp1}, [RDATA]!;			\
-	vrev32.8  W0, tmp0;		/* big => little */	\
-	vld1.32   {tmp2, tmp3}, [RDATA]!;			\
+	vld1.32   {W0, W7}, [RDATA]!;				\
+ ARM_LE(vrev32.8  W0, W0;	)	/* big => little */	\
+	vld1.32   {W6, W5}, [RDATA]!;				\
 	vadd.u32  tmp0, W0, curK;				\
-	vrev32.8  W7, tmp1;		/* big => little */	\
-	vrev32.8  W6, tmp2;		/* big => little */	\
+ ARM_LE(vrev32.8  W7, W7;	)	/* big => little */	\
+ ARM_LE(vrev32.8  W6, W6;	)	/* big => little */	\
 	vadd.u32  tmp1, W7, curK;				\
-	vrev32.8  W5, tmp3;		/* big => little */	\
+ ARM_LE(vrev32.8  W5, W5;	)	/* big => little */	\
 	vadd.u32  tmp2, W6, curK;				\
 	vst1.32   {tmp0, tmp1}, [RWK]!;				\
 	vadd.u32  tmp3, W5, curK;				\
 	vst1.32   {tmp2, tmp3}, [RWK];				\
 
 #define WPRECALC_00_15_0(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
-	vld1.32   {tmp0, tmp1}, [RDATA]!;			\
+	vld1.32   {W0, W7}, [RDATA]!;				\
 
 #define WPRECALC_00_15_1(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
 	add       RWK, sp, #(WK_offs(0));			\
 
 #define WPRECALC_00_15_2(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
-	vrev32.8  W0, tmp0;		/* big => little */	\
+ ARM_LE(vrev32.8  W0, W0;	)	/* big => little */	\
 
 #define WPRECALC_00_15_3(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
-	vld1.32   {tmp2, tmp3}, [RDATA]!;			\
+	vld1.32   {W6, W5}, [RDATA]!;				\
 
 #define WPRECALC_00_15_4(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
 	vadd.u32  tmp0, W0, curK;				\
 
 #define WPRECALC_00_15_5(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
-	vrev32.8  W7, tmp1;		/* big => little */	\
+ ARM_LE(vrev32.8  W7, W7;	)	/* big => little */	\
 
 #define WPRECALC_00_15_6(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
-	vrev32.8  W6, tmp2;		/* big => little */	\
+ ARM_LE(vrev32.8  W6, W6;	)	/* big => little */	\
 
 #define WPRECALC_00_15_7(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
 	vadd.u32  tmp1, W7, curK;				\
 
 #define WPRECALC_00_15_8(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
-	vrev32.8  W5, tmp3;		/* big => little */	\
+ ARM_LE(vrev32.8  W5, W5;	)	/* big => little */	\
 
 #define WPRECALC_00_15_9(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
 	vadd.u32  tmp2, W6, curK;				\
diff --git a/arch/arm/include/asm/ftrace.h b/arch/arm/include/asm/ftrace.h
index 39eb16b..bfe2a2f 100644
--- a/arch/arm/include/asm/ftrace.h
+++ b/arch/arm/include/asm/ftrace.h
@@ -45,7 +45,7 @@
 
 #else
 
-extern inline void *return_address(unsigned int level)
+static inline void *return_address(unsigned int level)
 {
 	return NULL;
 }
diff --git a/arch/arm/include/asm/smp.h b/arch/arm/include/asm/smp.h
index 2ec765c..18f5a55 100644
--- a/arch/arm/include/asm/smp.h
+++ b/arch/arm/include/asm/smp.h
@@ -49,12 +49,6 @@
 extern void set_smp_cross_call(void (*)(const struct cpumask *, unsigned int));
 
 /*
- * Boot a secondary CPU, and assign it the specified idle task.
- * This also gives us the initial stack to use for this CPU.
- */
-extern int boot_secondary(unsigned int cpu, struct task_struct *);
-
-/*
  * Called from platform specific assembly code, this is the
  * secondary CPU entry point.
  */
diff --git a/arch/arm/include/asm/syscall.h b/arch/arm/include/asm/syscall.h
index 4651f69..e86c985 100644
--- a/arch/arm/include/asm/syscall.h
+++ b/arch/arm/include/asm/syscall.h
@@ -63,8 +63,8 @@
 	if (i + n > SYSCALL_MAX_ARGS) {
 		unsigned long *args_bad = args + SYSCALL_MAX_ARGS - i;
 		unsigned int n_bad = n + i - SYSCALL_MAX_ARGS;
-		pr_warning("%s called with max args %d, handling only %d\n",
-			   __func__, i + n, SYSCALL_MAX_ARGS);
+		pr_warn("%s called with max args %d, handling only %d\n",
+			__func__, i + n, SYSCALL_MAX_ARGS);
 		memset(args_bad, 0, n_bad * sizeof(args[0]));
 		n = SYSCALL_MAX_ARGS - i;
 	}
@@ -88,8 +88,8 @@
 		return;
 
 	if (i + n > SYSCALL_MAX_ARGS) {
-		pr_warning("%s called with max args %d, handling only %d\n",
-			   __func__, i + n, SYSCALL_MAX_ARGS);
+		pr_warn("%s called with max args %d, handling only %d\n",
+			__func__, i + n, SYSCALL_MAX_ARGS);
 		n = SYSCALL_MAX_ARGS - i;
 	}
 
diff --git a/arch/arm/kernel/atags_parse.c b/arch/arm/kernel/atags_parse.c
index 7807ef5..528f8af 100644
--- a/arch/arm/kernel/atags_parse.c
+++ b/arch/arm/kernel/atags_parse.c
@@ -130,7 +130,7 @@
 	strlcat(default_command_line, tag->u.cmdline.cmdline,
 		COMMAND_LINE_SIZE);
 #elif defined(CONFIG_CMDLINE_FORCE)
-	pr_warning("Ignoring tag cmdline (using the default kernel command line)\n");
+	pr_warn("Ignoring tag cmdline (using the default kernel command line)\n");
 #else
 	strlcpy(default_command_line, tag->u.cmdline.cmdline,
 		COMMAND_LINE_SIZE);
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 36276cd..2f5555d 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -146,7 +146,7 @@
 #define SPFIX(code...)
 #endif
 
-	.macro	svc_entry, stack_hole=0
+	.macro	svc_entry, stack_hole=0, trace=1
  UNWIND(.fnstart		)
  UNWIND(.save {r0 - pc}		)
 	sub	sp, sp, #(S_FRAME_SIZE + \stack_hole - 4)
@@ -182,9 +182,11 @@
 	@
 	stmia	r7, {r2 - r6}
 
+	.if \trace
 #ifdef CONFIG_TRACE_IRQFLAGS
 	bl	trace_hardirqs_off
 #endif
+	.endif
 	.endm
 
 	.align	5
@@ -295,6 +297,15 @@
 ENDPROC(__pabt_svc)
 
 	.align	5
+__fiq_svc:
+	svc_entry trace=0
+	mov	r0, sp				@ struct pt_regs *regs
+	bl	handle_fiq_as_nmi
+	svc_exit_via_fiq
+ UNWIND(.fnend		)
+ENDPROC(__fiq_svc)
+
+	.align	5
 .LCcralign:
 	.word	cr_alignment
 #ifdef MULTI_DABORT
@@ -305,6 +316,46 @@
 	.word	fp_enter
 
 /*
+ * Abort mode handlers
+ */
+
+@
+@ Taking a FIQ in abort mode is similar to taking a FIQ in SVC mode
+@ and reuses the same macros. However in abort mode we must also
+@ save/restore lr_abt and spsr_abt to make nested aborts safe.
+@
+	.align 5
+__fiq_abt:
+	svc_entry trace=0
+
+ ARM(	msr	cpsr_c, #ABT_MODE | PSR_I_BIT | PSR_F_BIT )
+ THUMB( mov	r0, #ABT_MODE | PSR_I_BIT | PSR_F_BIT )
+ THUMB( msr	cpsr_c, r0 )
+	mov	r1, lr		@ Save lr_abt
+	mrs	r2, spsr	@ Save spsr_abt, abort is now safe
+ ARM(	msr	cpsr_c, #SVC_MODE | PSR_I_BIT | PSR_F_BIT )
+ THUMB( mov	r0, #SVC_MODE | PSR_I_BIT | PSR_F_BIT )
+ THUMB( msr	cpsr_c, r0 )
+	stmfd	sp!, {r1 - r2}
+
+	add	r0, sp, #8			@ struct pt_regs *regs
+	bl	handle_fiq_as_nmi
+
+	ldmfd	sp!, {r1 - r2}
+ ARM(	msr	cpsr_c, #ABT_MODE | PSR_I_BIT | PSR_F_BIT )
+ THUMB( mov	r0, #ABT_MODE | PSR_I_BIT | PSR_F_BIT )
+ THUMB( msr	cpsr_c, r0 )
+	mov	lr, r1		@ Restore lr_abt, abort is unsafe
+	msr	spsr_cxsf, r2	@ Restore spsr_abt
+ ARM(	msr	cpsr_c, #SVC_MODE | PSR_I_BIT | PSR_F_BIT )
+ THUMB( mov	r0, #SVC_MODE | PSR_I_BIT | PSR_F_BIT )
+ THUMB( msr	cpsr_c, r0 )
+
+	svc_exit_via_fiq
+ UNWIND(.fnend		)
+ENDPROC(__fiq_abt)
+
+/*
  * User mode handlers
  *
  * EABI note: sp_svc is always 64-bit aligned here, so should S_FRAME_SIZE
@@ -314,13 +365,16 @@
 #error "sizeof(struct pt_regs) must be a multiple of 8"
 #endif
 
-	.macro	usr_entry
+	.macro	usr_entry, trace=1
  UNWIND(.fnstart	)
  UNWIND(.cantunwind	)	@ don't unwind the user space
 	sub	sp, sp, #S_FRAME_SIZE
  ARM(	stmib	sp, {r1 - r12}	)
  THUMB(	stmia	sp, {r0 - r12}	)
 
+ ATRAP(	mrc	p15, 0, r7, c1, c0, 0)
+ ATRAP(	ldr	r8, .LCcralign)
+
 	ldmia	r0, {r3 - r5}
 	add	r0, sp, #S_PC		@ here for interlock avoidance
 	mov	r6, #-1			@  ""  ""     ""        ""
@@ -328,6 +382,8 @@
 	str	r3, [sp]		@ save the "real" r0 copied
 					@ from the exception stack
 
+ ATRAP(	ldr	r8, [r8, #0])
+
 	@
 	@ We are now ready to fill in the remaining blanks on the stack:
 	@
@@ -341,20 +397,21 @@
  ARM(	stmdb	r0, {sp, lr}^			)
  THUMB(	store_user_sp_lr r0, r1, S_SP - S_PC	)
 
-	@
 	@ Enable the alignment trap while in kernel mode
-	@
-	alignment_trap r0, .LCcralign
+ ATRAP(	teq	r8, r7)
+ ATRAP( mcrne	p15, 0, r8, c1, c0, 0)
 
 	@
 	@ Clear FP to mark the first stack frame
 	@
 	zero_fp
 
+	.if	\trace
 #ifdef CONFIG_IRQSOFF_TRACER
 	bl	trace_hardirqs_off
 #endif
 	ct_user_exit save = 0
+	.endif
 	.endm
 
 	.macro	kuser_cmpxchg_check
@@ -683,6 +740,17 @@
 ENDPROC(__pabt_usr)
 ENDPROC(ret_from_exception)
 
+	.align	5
+__fiq_usr:
+	usr_entry trace=0
+	kuser_cmpxchg_check
+	mov	r0, sp				@ struct pt_regs *regs
+	bl	handle_fiq_as_nmi
+	get_thread_info tsk
+	restore_user_regs fast = 0, offset = 0
+ UNWIND(.fnend		)
+ENDPROC(__fiq_usr)
+
 /*
  * Register switch for ARMv3 and ARMv4 processors
  * r0 = previous task_struct, r1 = previous thread_info, r2 = next thread_info
@@ -1118,17 +1186,29 @@
 	b	vector_addrexcptn
 
 /*=============================================================================
- * Undefined FIQs
+ * FIQ "NMI" handler
  *-----------------------------------------------------------------------------
- * Enter in FIQ mode, spsr = ANY CPSR, lr = ANY PC
- * MUST PRESERVE SVC SPSR, but need to switch to SVC mode to show our msg.
- * Basically to switch modes, we *HAVE* to clobber one register...  brain
- * damage alert!  I don't think that we can execute any code in here in any
- * other mode than FIQ...  Ok you can switch to another mode, but you can't
- * get out of that mode without clobbering one register.
+ * Handle a FIQ using the SVC stack allowing FIQ act like NMI on x86
+ * systems.
  */
-vector_fiq:
-	subs	pc, lr, #4
+	vector_stub	fiq, FIQ_MODE, 4
+
+	.long	__fiq_usr			@  0  (USR_26 / USR_32)
+	.long	__fiq_svc			@  1  (FIQ_26 / FIQ_32)
+	.long	__fiq_svc			@  2  (IRQ_26 / IRQ_32)
+	.long	__fiq_svc			@  3  (SVC_26 / SVC_32)
+	.long	__fiq_svc			@  4
+	.long	__fiq_svc			@  5
+	.long	__fiq_svc			@  6
+	.long	__fiq_abt			@  7
+	.long	__fiq_svc			@  8
+	.long	__fiq_svc			@  9
+	.long	__fiq_svc			@  a
+	.long	__fiq_svc			@  b
+	.long	__fiq_svc			@  c
+	.long	__fiq_svc			@  d
+	.long	__fiq_svc			@  e
+	.long	__fiq_svc			@  f
 
 	.globl	vector_fiq_offset
 	.equ	vector_fiq_offset, vector_fiq
diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index e52fe5a..6bb09d4 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -366,7 +366,7 @@
 	str	r0, [sp, #S_OLD_R0]		@ Save OLD_R0
 #endif
 	zero_fp
-	alignment_trap ip, __cr_alignment
+	alignment_trap r10, ip, __cr_alignment
 	enable_irq
 	ct_user_exit
 	get_thread_info tsk
diff --git a/arch/arm/kernel/entry-header.S b/arch/arm/kernel/entry-header.S
index 2fdf867..4176df7 100644
--- a/arch/arm/kernel/entry-header.S
+++ b/arch/arm/kernel/entry-header.S
@@ -37,11 +37,19 @@
 #endif
 	.endm
 
-	.macro	alignment_trap, rtemp, label
 #ifdef CONFIG_ALIGNMENT_TRAP
-	ldr	\rtemp, \label
-	ldr	\rtemp, [\rtemp]
-	mcr	p15, 0, \rtemp, c1, c0
+#define ATRAP(x...) x
+#else
+#define ATRAP(x...)
+#endif
+
+	.macro	alignment_trap, rtmp1, rtmp2, label
+#ifdef CONFIG_ALIGNMENT_TRAP
+	mrc	p15, 0, \rtmp2, c1, c0, 0
+	ldr	\rtmp1, \label
+	ldr	\rtmp1, [\rtmp1]
+	teq	\rtmp1, \rtmp2
+	mcrne	p15, 0, \rtmp1, c1, c0, 0
 #endif
 	.endm
 
@@ -216,6 +224,34 @@
 	ldmia	sp, {r0 - pc}^			@ load r0 - pc, cpsr
 	.endm
 
+	@
+	@ svc_exit_via_fiq - like svc_exit but switches to FIQ mode before exit
+	@
+	@ This macro acts in a similar manner to svc_exit but switches to FIQ
+	@ mode to restore the final part of the register state.
+	@
+	@ We cannot use the normal svc_exit procedure because that would
+	@ clobber spsr_svc (FIQ could be delivered during the first few
+	@ instructions of vector_swi meaning its contents have not been
+	@ saved anywhere).
+	@
+	@ Note that, unlike svc_exit, this macro also does not allow a caller
+	@ supplied rpsr. This is because the FIQ exceptions are not re-entrant
+	@ and the handlers cannot call into the scheduler (meaning the value
+	@ on the stack remains correct).
+	@
+	.macro  svc_exit_via_fiq
+	mov	r0, sp
+	ldmib	r0, {r1 - r14}	@ abort is deadly from here onward (it will
+				@ clobber state restored below)
+	msr	cpsr_c, #FIQ_MODE | PSR_I_BIT | PSR_F_BIT
+	add	r8, r0, #S_PC
+	ldr	r9, [r0, #S_PSR]
+	msr	spsr_cxsf, r9
+	ldr	r0, [r0, #S_R0]
+	ldmia	r8, {pc}^
+	.endm
+
 	.macro	restore_user_regs, fast = 0, offset = 0
 	ldr	r1, [sp, #\offset + S_PSR]	@ get calling cpsr
 	ldr	lr, [sp, #\offset + S_PC]!	@ get pc
@@ -267,6 +303,25 @@
 	rfeia	sp!
 	.endm
 
+	@
+	@ svc_exit_via_fiq - like svc_exit but switches to FIQ mode before exit
+	@
+	@ For full details see non-Thumb implementation above.
+	@
+	.macro  svc_exit_via_fiq
+	add	r0, sp, #S_R2
+	ldr	lr, [sp, #S_LR]
+	ldr	sp, [sp, #S_SP] @ abort is deadly from here onward (it will
+			        @ clobber state restored below)
+	ldmia	r0, {r2 - r12}
+	mov	r1, #FIQ_MODE | PSR_I_BIT | PSR_F_BIT
+	msr	cpsr_c, r1
+	sub	r0, #S_R2
+	add	r8, r0, #S_PC
+	ldmia	r0, {r0 - r1}
+	rfeia	r8
+	.endm
+
 #ifdef CONFIG_CPU_V7M
 	/*
 	 * Note we don't need to do clrex here as clearing the local monitor is
diff --git a/arch/arm/kernel/fiq.c b/arch/arm/kernel/fiq.c
index 918875d..b37752a 100644
--- a/arch/arm/kernel/fiq.c
+++ b/arch/arm/kernel/fiq.c
@@ -52,7 +52,8 @@
 		(unsigned)&vector_fiq_offset;		\
 	})
 
-static unsigned long no_fiq_insn;
+static unsigned long dfl_fiq_insn;
+static struct pt_regs dfl_fiq_regs;
 
 /* Default reacquire function
  * - we always relinquish FIQ control
@@ -60,8 +61,15 @@
  */
 static int fiq_def_op(void *ref, int relinquish)
 {
-	if (!relinquish)
-		set_fiq_handler(&no_fiq_insn, sizeof(no_fiq_insn));
+	if (!relinquish) {
+		/* Restore default handler and registers */
+		local_fiq_disable();
+		set_fiq_regs(&dfl_fiq_regs);
+		set_fiq_handler(&dfl_fiq_insn, sizeof(dfl_fiq_insn));
+		local_fiq_enable();
+
+		/* FIXME: notify irq controller to standard enable FIQs */
+	}
 
 	return 0;
 }
@@ -150,6 +158,7 @@
 void __init init_FIQ(int start)
 {
 	unsigned offset = FIQ_OFFSET;
-	no_fiq_insn = *(unsigned long *)(0xffff0000 + offset);
+	dfl_fiq_insn = *(unsigned long *)(0xffff0000 + offset);
+	get_fiq_regs(&dfl_fiq_regs);
 	fiq_start = start;
 }
diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c
index 4d963fb..b5b452f 100644
--- a/arch/arm/kernel/hw_breakpoint.c
+++ b/arch/arm/kernel/hw_breakpoint.c
@@ -113,8 +113,8 @@
 	GEN_READ_WB_REG_CASES(ARM_OP2_WVR, val);
 	GEN_READ_WB_REG_CASES(ARM_OP2_WCR, val);
 	default:
-		pr_warning("attempt to read from unknown breakpoint "
-				"register %d\n", n);
+		pr_warn("attempt to read from unknown breakpoint register %d\n",
+			n);
 	}
 
 	return val;
@@ -128,8 +128,8 @@
 	GEN_WRITE_WB_REG_CASES(ARM_OP2_WVR, val);
 	GEN_WRITE_WB_REG_CASES(ARM_OP2_WCR, val);
 	default:
-		pr_warning("attempt to write to unknown breakpoint "
-				"register %d\n", n);
+		pr_warn("attempt to write to unknown breakpoint register %d\n",
+			n);
 	}
 	isb();
 }
@@ -292,7 +292,7 @@
 	case TYPE_DATA:
 		return get_num_wrps();
 	default:
-		pr_warning("unknown slot type: %d\n", type);
+		pr_warn("unknown slot type: %d\n", type);
 		return 0;
 	}
 }
@@ -365,7 +365,7 @@
 	}
 
 	if (i == max_slots) {
-		pr_warning("Can't find any breakpoint slot\n");
+		pr_warn("Can't find any breakpoint slot\n");
 		return -EBUSY;
 	}
 
@@ -417,7 +417,7 @@
 	}
 
 	if (i == max_slots) {
-		pr_warning("Can't find any breakpoint slot\n");
+		pr_warn("Can't find any breakpoint slot\n");
 		return;
 	}
 
@@ -894,8 +894,8 @@
 {
 	int cpu = smp_processor_id();
 
-	pr_warning("Debug register access (0x%x) caused undefined instruction on CPU %d\n",
-		   instr, cpu);
+	pr_warn("Debug register access (0x%x) caused undefined instruction on CPU %d\n",
+		instr, cpu);
 
 	/* Set the error flag for this CPU and skip the faulting instruction. */
 	cpumask_set_cpu(cpu, &debug_err_mask);
diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c
index 5c4d38e..88de943 100644
--- a/arch/arm/kernel/irq.c
+++ b/arch/arm/kernel/irq.c
@@ -205,8 +205,8 @@
 		raw_spin_unlock(&desc->lock);
 
 		if (affinity_broken && printk_ratelimit())
-			pr_warning("IRQ%u no longer affine to CPU%u\n", i,
-				smp_processor_id());
+			pr_warn("IRQ%u no longer affine to CPU%u\n",
+				i, smp_processor_id());
 	}
 
 	local_irq_restore(flags);
diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c
index 4bf4cce..eb2c4d5 100644
--- a/arch/arm/kernel/perf_event_cpu.c
+++ b/arch/arm/kernel/perf_event_cpu.c
@@ -146,8 +146,8 @@
 			 * continue. Otherwise, continue without this interrupt.
 			 */
 			if (irq_set_affinity(irq, cpumask_of(i)) && irqs > 1) {
-				pr_warning("unable to set irq affinity (irq=%d, cpu=%u)\n",
-					    irq, i);
+				pr_warn("unable to set irq affinity (irq=%d, cpu=%u)\n",
+					irq, i);
 				continue;
 			}
 
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index a35f6eb..a0a691d 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -306,7 +306,6 @@
 
 void show_regs(struct pt_regs * regs)
 {
-	printk("\n");
 	__show_regs(regs);
 	dump_stack();
 }
@@ -474,19 +473,57 @@
 
 const char *arch_vma_name(struct vm_area_struct *vma)
 {
-	return is_gate_vma(vma) ? "[vectors]" :
-		(vma->vm_mm && vma->vm_start == vma->vm_mm->context.sigpage) ?
-		 "[sigpage]" : NULL;
+	return is_gate_vma(vma) ? "[vectors]" : NULL;
+}
+
+/* If possible, provide a placement hint at a random offset from the
+ * stack for the signal page.
+ */
+static unsigned long sigpage_addr(const struct mm_struct *mm,
+				  unsigned int npages)
+{
+	unsigned long offset;
+	unsigned long first;
+	unsigned long last;
+	unsigned long addr;
+	unsigned int slots;
+
+	first = PAGE_ALIGN(mm->start_stack);
+
+	last = TASK_SIZE - (npages << PAGE_SHIFT);
+
+	/* No room after stack? */
+	if (first > last)
+		return 0;
+
+	/* Just enough room? */
+	if (first == last)
+		return first;
+
+	slots = ((last - first) >> PAGE_SHIFT) + 1;
+
+	offset = get_random_int() % slots;
+
+	addr = first + (offset << PAGE_SHIFT);
+
+	return addr;
 }
 
 static struct page *signal_page;
 extern struct page *get_signal_page(void);
 
+static const struct vm_special_mapping sigpage_mapping = {
+	.name = "[sigpage]",
+	.pages = &signal_page,
+};
+
 int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
 {
 	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
 	unsigned long addr;
-	int ret;
+	unsigned long hint;
+	int ret = 0;
 
 	if (!signal_page)
 		signal_page = get_signal_page();
@@ -494,18 +531,23 @@
 		return -ENOMEM;
 
 	down_write(&mm->mmap_sem);
-	addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0);
+	hint = sigpage_addr(mm, 1);
+	addr = get_unmapped_area(NULL, hint, PAGE_SIZE, 0, 0);
 	if (IS_ERR_VALUE(addr)) {
 		ret = addr;
 		goto up_fail;
 	}
 
-	ret = install_special_mapping(mm, addr, PAGE_SIZE,
+	vma = _install_special_mapping(mm, addr, PAGE_SIZE,
 		VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC,
-		&signal_page);
+		&sigpage_mapping);
 
-	if (ret == 0)
-		mm->context.sigpage = addr;
+	if (IS_ERR(vma)) {
+		ret = PTR_ERR(vma);
+		goto up_fail;
+	}
+
+	mm->context.sigpage = addr;
 
  up_fail:
 	up_write(&mm->mmap_sem);
diff --git a/arch/arm/kernel/return_address.c b/arch/arm/kernel/return_address.c
index fafedd8..98ea4b7 100644
--- a/arch/arm/kernel/return_address.c
+++ b/arch/arm/kernel/return_address.c
@@ -59,15 +59,6 @@
 
 #else /* if defined(CONFIG_FRAME_POINTER) && !defined(CONFIG_ARM_UNWIND) */
 
-#if defined(CONFIG_ARM_UNWIND)
-#warning "TODO: return_address should use unwind tables"
-#endif
-
-void *return_address(unsigned int level)
-{
-	return NULL;
-}
-
 #endif /* if defined(CONFIG_FRAME_POINTER) && !defined(CONFIG_ARM_UNWIND) / else */
 
 EXPORT_SYMBOL_GPL(return_address);
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 84db893d..c031063 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -133,6 +133,7 @@
 	u32 irq[3];
 	u32 abt[3];
 	u32 und[3];
+	u32 fiq[3];
 } ____cacheline_aligned;
 
 #ifndef CONFIG_CPU_V7M
@@ -470,7 +471,10 @@
 	"msr	cpsr_c, %5\n\t"
 	"add	r14, %0, %6\n\t"
 	"mov	sp, r14\n\t"
-	"msr	cpsr_c, %7"
+	"msr	cpsr_c, %7\n\t"
+	"add	r14, %0, %8\n\t"
+	"mov	sp, r14\n\t"
+	"msr	cpsr_c, %9"
 	    :
 	    : "r" (stk),
 	      PLC (PSR_F_BIT | PSR_I_BIT | IRQ_MODE),
@@ -479,6 +483,8 @@
 	      "I" (offsetof(struct stack, abt[0])),
 	      PLC (PSR_F_BIT | PSR_I_BIT | UND_MODE),
 	      "I" (offsetof(struct stack, und[0])),
+	      PLC (PSR_F_BIT | PSR_I_BIT | FIQ_MODE),
+	      "I" (offsetof(struct stack, fiq[0])),
 	      PLC (PSR_F_BIT | PSR_I_BIT | SVC_MODE)
 	    : "r14");
 #endif
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 9388a3d..39c74a2 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -95,6 +95,9 @@
 {
 	int ret;
 
+	if (!smp_ops.smp_boot_secondary)
+		return -ENOSYS;
+
 	/*
 	 * We need to tell the secondary core where to find
 	 * its stack and the page tables.
@@ -113,7 +116,7 @@
 	/*
 	 * Now bring the CPU into our world.
 	 */
-	ret = boot_secondary(cpu, idle);
+	ret = smp_ops.smp_boot_secondary(cpu, idle);
 	if (ret == 0) {
 		/*
 		 * CPU was successfully started, wait for it
@@ -142,13 +145,6 @@
 		smp_ops.smp_init_cpus();
 }
 
-int boot_secondary(unsigned int cpu, struct task_struct *idle)
-{
-	if (smp_ops.smp_boot_secondary)
-		return smp_ops.smp_boot_secondary(cpu, idle);
-	return -ENOSYS;
-}
-
 int platform_can_cpu_hotplug(void)
 {
 #ifdef CONFIG_HOTPLUG_CPU
@@ -650,7 +646,7 @@
 		udelay(1);
 
 	if (num_online_cpus() > 1)
-		pr_warning("SMP: failed to stop secondary CPUs\n");
+		pr_warn("SMP: failed to stop secondary CPUs\n");
 }
 
 /*
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index a964c9f..0c8b108 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -25,6 +25,7 @@
 #include <linux/delay.h>
 #include <linux/init.h>
 #include <linux/sched.h>
+#include <linux/irq.h>
 
 #include <linux/atomic.h>
 #include <asm/cacheflush.h>
@@ -460,10 +461,29 @@
 	arm_notify_die("Oops - undefined instruction", regs, &info, 0, 6);
 }
 
-asmlinkage void do_unexp_fiq (struct pt_regs *regs)
+/*
+ * Handle FIQ similarly to NMI on x86 systems.
+ *
+ * The runtime environment for NMIs is extremely restrictive
+ * (NMIs can pre-empt critical sections meaning almost all locking is
+ * forbidden) meaning this default FIQ handling must only be used in
+ * circumstances where non-maskability improves robustness, such as
+ * watchdog or debug logic.
+ *
+ * This handler is not appropriate for general purpose use in drivers
+ * platform code and can be overrideen using set_fiq_handler.
+ */
+asmlinkage void __exception_irq_entry handle_fiq_as_nmi(struct pt_regs *regs)
 {
-	printk("Hmm.  Unexpected FIQ received, but trying to continue\n");
-	printk("You may have a hardware problem...\n");
+	struct pt_regs *old_regs = set_irq_regs(regs);
+
+	nmi_enter();
+
+	/* nop. FIQ handlers for special arch/arm features can be added here. */
+
+	nmi_exit();
+
+	set_irq_regs(old_regs);
 }
 
 /*
diff --git a/arch/arm/kernel/unwind.c b/arch/arm/kernel/unwind.c
index a61a1df..cbb85c5 100644
--- a/arch/arm/kernel/unwind.c
+++ b/arch/arm/kernel/unwind.c
@@ -157,7 +157,7 @@
 	if (likely(start->addr_offset <= addr_prel31))
 		return start;
 	else {
-		pr_warning("unwind: Unknown symbol address %08lx\n", addr);
+		pr_warn("unwind: Unknown symbol address %08lx\n", addr);
 		return NULL;
 	}
 }
@@ -225,7 +225,7 @@
 	unsigned long ret;
 
 	if (ctrl->entries <= 0) {
-		pr_warning("unwind: Corrupt unwind table\n");
+		pr_warn("unwind: Corrupt unwind table\n");
 		return 0;
 	}
 
@@ -333,8 +333,8 @@
 		insn = (insn << 8) | unwind_get_byte(ctrl);
 		mask = insn & 0x0fff;
 		if (mask == 0) {
-			pr_warning("unwind: 'Refuse to unwind' instruction %04lx\n",
-				   insn);
+			pr_warn("unwind: 'Refuse to unwind' instruction %04lx\n",
+				insn);
 			return -URC_FAILURE;
 		}
 
@@ -357,8 +357,8 @@
 		unsigned long mask = unwind_get_byte(ctrl);
 
 		if (mask == 0 || mask & 0xf0) {
-			pr_warning("unwind: Spare encoding %04lx\n",
-			       (insn << 8) | mask);
+			pr_warn("unwind: Spare encoding %04lx\n",
+				(insn << 8) | mask);
 			return -URC_FAILURE;
 		}
 
@@ -370,7 +370,7 @@
 
 		ctrl->vrs[SP] += 0x204 + (uleb128 << 2);
 	} else {
-		pr_warning("unwind: Unhandled instruction %02lx\n", insn);
+		pr_warn("unwind: Unhandled instruction %02lx\n", insn);
 		return -URC_FAILURE;
 	}
 
@@ -403,7 +403,7 @@
 
 	idx = unwind_find_idx(frame->pc);
 	if (!idx) {
-		pr_warning("unwind: Index not found %08lx\n", frame->pc);
+		pr_warn("unwind: Index not found %08lx\n", frame->pc);
 		return -URC_FAILURE;
 	}
 
@@ -422,8 +422,8 @@
 		/* only personality routine 0 supported in the index */
 		ctrl.insn = &idx->insn;
 	else {
-		pr_warning("unwind: Unsupported personality routine %08lx in the index at %p\n",
-			   idx->insn, idx);
+		pr_warn("unwind: Unsupported personality routine %08lx in the index at %p\n",
+			idx->insn, idx);
 		return -URC_FAILURE;
 	}
 
@@ -435,8 +435,8 @@
 		ctrl.byte = 1;
 		ctrl.entries = 1 + ((*ctrl.insn & 0x00ff0000) >> 16);
 	} else {
-		pr_warning("unwind: Unsupported personality routine %08lx at %p\n",
-			   *ctrl.insn, ctrl.insn);
+		pr_warn("unwind: Unsupported personality routine %08lx at %p\n",
+			*ctrl.insn, ctrl.insn);
 		return -URC_FAILURE;
 	}
 
diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S
index 6f57cb9..8e95aa4 100644
--- a/arch/arm/kernel/vmlinux.lds.S
+++ b/arch/arm/kernel/vmlinux.lds.S
@@ -219,8 +219,8 @@
 	__data_loc = ALIGN(4);		/* location in binary */
 	. = PAGE_OFFSET + TEXT_OFFSET;
 #else
-	__init_end = .;
 	. = ALIGN(THREAD_SIZE);
+	__init_end = .;
 	__data_loc = .;
 #endif
 
diff --git a/arch/arm/mach-sa1100/Kconfig b/arch/arm/mach-sa1100/Kconfig
index 04f9784..c6f6ed1 100644
--- a/arch/arm/mach-sa1100/Kconfig
+++ b/arch/arm/mach-sa1100/Kconfig
@@ -58,6 +58,7 @@
 	bool "Compaq iPAQ H3100"
 	select ARM_SA1110_CPUFREQ
 	select HTC_EGPIO
+	select MFD_IPAQ_MICRO
 	help
 	  Say Y here if you intend to run this kernel on the Compaq iPAQ
 	  H3100 handheld computer.  Information about this machine and the
@@ -69,6 +70,7 @@
 	bool "Compaq iPAQ H3600/H3700"
 	select ARM_SA1110_CPUFREQ
 	select HTC_EGPIO
+	select MFD_IPAQ_MICRO
 	help
 	  Say Y here if you intend to run this kernel on the Compaq iPAQ
 	  H3600 handheld computer.  Information about this machine and the
diff --git a/arch/arm/mach-sa1100/h3xxx.c b/arch/arm/mach-sa1100/h3xxx.c
index c79bf46..b1d4faa 100644
--- a/arch/arm/mach-sa1100/h3xxx.c
+++ b/arch/arm/mach-sa1100/h3xxx.c
@@ -25,6 +25,7 @@
 #include <asm/mach/map.h>
 
 #include <mach/h3xxx.h>
+#include <mach/irqs.h>
 
 #include "generic.h"
 
@@ -244,9 +245,23 @@
 	},
 };
 
+static struct resource h3xxx_micro_resources[] = {
+	DEFINE_RES_MEM(0x80010000, SZ_4K),
+	DEFINE_RES_MEM(0x80020000, SZ_4K),
+	DEFINE_RES_IRQ(IRQ_Ser1UART),
+};
+
+struct platform_device h3xxx_micro_asic = {
+	.name = "ipaq-h3xxx-micro",
+	.id = -1,
+	.resource = h3xxx_micro_resources,
+	.num_resources = ARRAY_SIZE(h3xxx_micro_resources),
+};
+
 static struct platform_device *h3xxx_devices[] = {
 	&h3xxx_egpio,
 	&h3xxx_keys,
+	&h3xxx_micro_asic,
 };
 
 void __init h3xxx_mach_init(void)
diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c
index 5f2c988..55f9d6e 100644
--- a/arch/arm/mm/cache-l2x0.c
+++ b/arch/arm/mm/cache-l2x0.c
@@ -21,6 +21,7 @@
 #include <linux/init.h>
 #include <linux/smp.h>
 #include <linux/spinlock.h>
+#include <linux/log2.h>
 #include <linux/io.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
@@ -945,6 +946,98 @@
  * pass it though the device tree */
 static u32 cache_id_part_number_from_dt;
 
+/**
+ * l2x0_cache_size_of_parse() - read cache size parameters from DT
+ * @np: the device tree node for the l2 cache
+ * @aux_val: pointer to machine-supplied auxilary register value, to
+ * be augmented by the call (bits to be set to 1)
+ * @aux_mask: pointer to machine-supplied auxilary register mask, to
+ * be augmented by the call (bits to be set to 0)
+ * @associativity: variable to return the calculated associativity in
+ * @max_way_size: the maximum size in bytes for the cache ways
+ */
+static void __init l2x0_cache_size_of_parse(const struct device_node *np,
+					    u32 *aux_val, u32 *aux_mask,
+					    u32 *associativity,
+					    u32 max_way_size)
+{
+	u32 mask = 0, val = 0;
+	u32 cache_size = 0, sets = 0;
+	u32 way_size_bits = 1;
+	u32 way_size = 0;
+	u32 block_size = 0;
+	u32 line_size = 0;
+
+	of_property_read_u32(np, "cache-size", &cache_size);
+	of_property_read_u32(np, "cache-sets", &sets);
+	of_property_read_u32(np, "cache-block-size", &block_size);
+	of_property_read_u32(np, "cache-line-size", &line_size);
+
+	if (!cache_size || !sets)
+		return;
+
+	/* All these l2 caches have the same line = block size actually */
+	if (!line_size) {
+		if (block_size) {
+			/* If linesize if not given, it is equal to blocksize */
+			line_size = block_size;
+		} else {
+			/* Fall back to known size */
+			pr_warn("L2C OF: no cache block/line size given: "
+				"falling back to default size %d bytes\n",
+				CACHE_LINE_SIZE);
+			line_size = CACHE_LINE_SIZE;
+		}
+	}
+
+	if (line_size != CACHE_LINE_SIZE)
+		pr_warn("L2C OF: DT supplied line size %d bytes does "
+			"not match hardware line size of %d bytes\n",
+			line_size,
+			CACHE_LINE_SIZE);
+
+	/*
+	 * Since:
+	 * set size = cache size / sets
+	 * ways = cache size / (sets * line size)
+	 * way size = cache size / (cache size / (sets * line size))
+	 * way size = sets * line size
+	 * associativity = ways = cache size / way size
+	 */
+	way_size = sets * line_size;
+	*associativity = cache_size / way_size;
+
+	if (way_size > max_way_size) {
+		pr_err("L2C OF: set size %dKB is too large\n", way_size);
+		return;
+	}
+
+	pr_info("L2C OF: override cache size: %d bytes (%dKB)\n",
+		cache_size, cache_size >> 10);
+	pr_info("L2C OF: override line size: %d bytes\n", line_size);
+	pr_info("L2C OF: override way size: %d bytes (%dKB)\n",
+		way_size, way_size >> 10);
+	pr_info("L2C OF: override associativity: %d\n", *associativity);
+
+	/*
+	 * Calculates the bits 17:19 to set for way size:
+	 * 512KB -> 6, 256KB -> 5, ... 16KB -> 1
+	 */
+	way_size_bits = ilog2(way_size >> 10) - 3;
+	if (way_size_bits < 1 || way_size_bits > 6) {
+		pr_err("L2C OF: cache way size illegal: %dKB is not mapped\n",
+		       way_size);
+		return;
+	}
+
+	mask |= L2C_AUX_CTRL_WAY_SIZE_MASK;
+	val |= (way_size_bits << L2C_AUX_CTRL_WAY_SIZE_SHIFT);
+
+	*aux_val &= ~mask;
+	*aux_val |= val;
+	*aux_mask &= ~mask;
+}
+
 static void __init l2x0_of_parse(const struct device_node *np,
 				 u32 *aux_val, u32 *aux_mask)
 {
@@ -952,6 +1045,7 @@
 	u32 tag = 0;
 	u32 dirty = 0;
 	u32 val = 0, mask = 0;
+	u32 assoc;
 
 	of_property_read_u32(np, "arm,tag-latency", &tag);
 	if (tag) {
@@ -974,6 +1068,15 @@
 		val |= (dirty - 1) << L2X0_AUX_CTRL_DIRTY_LATENCY_SHIFT;
 	}
 
+	l2x0_cache_size_of_parse(np, aux_val, aux_mask, &assoc, SZ_256K);
+	if (assoc > 8) {
+		pr_err("l2x0 of: cache setting yield too high associativity\n");
+		pr_err("l2x0 of: %d calculated, max 8\n", assoc);
+	} else {
+		mask |= L2X0_AUX_CTRL_ASSOC_MASK;
+		val |= (assoc << L2X0_AUX_CTRL_ASSOC_SHIFT);
+	}
+
 	*aux_val &= ~mask;
 	*aux_val |= val;
 	*aux_mask &= ~mask;
@@ -1021,6 +1124,7 @@
 	u32 data[3] = { 0, 0, 0 };
 	u32 tag[3] = { 0, 0, 0 };
 	u32 filter[2] = { 0, 0 };
+	u32 assoc;
 
 	of_property_read_u32_array(np, "arm,tag-latency", tag, ARRAY_SIZE(tag));
 	if (tag[0] && tag[1] && tag[2])
@@ -1047,6 +1151,23 @@
 		writel_relaxed((filter[0] & ~(SZ_1M - 1)) | L310_ADDR_FILTER_EN,
 			       l2x0_base + L310_ADDR_FILTER_START);
 	}
+
+	l2x0_cache_size_of_parse(np, aux_val, aux_mask, &assoc, SZ_512K);
+	switch (assoc) {
+	case 16:
+		*aux_val &= ~L2X0_AUX_CTRL_ASSOC_MASK;
+		*aux_val |= L310_AUX_CTRL_ASSOCIATIVITY_16;
+		*aux_mask &= ~L2X0_AUX_CTRL_ASSOC_MASK;
+		break;
+	case 8:
+		*aux_val &= ~L2X0_AUX_CTRL_ASSOC_MASK;
+		*aux_mask &= ~L2X0_AUX_CTRL_ASSOC_MASK;
+		break;
+	default:
+		pr_err("PL310 OF: cache setting yield illegal associativity\n");
+		pr_err("PL310 OF: %d calculated, only 8 and 16 legal\n", assoc);
+		break;
+	}
 }
 
 static const struct l2c_init_data of_l2c310_data __initconst = {
diff --git a/arch/arm/mm/idmap.c b/arch/arm/mm/idmap.c
index c447ec7..e7a81ceb 100644
--- a/arch/arm/mm/idmap.c
+++ b/arch/arm/mm/idmap.c
@@ -27,7 +27,7 @@
 	if (pud_none_or_clear_bad(pud) || (pud_val(*pud) & L_PGD_SWAPPER)) {
 		pmd = pmd_alloc_one(&init_mm, addr);
 		if (!pmd) {
-			pr_warning("Failed to allocate identity pmd.\n");
+			pr_warn("Failed to allocate identity pmd.\n");
 			return;
 		}
 		/*
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 659c75d..9221645 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -636,6 +636,11 @@
 void free_initrd_mem(unsigned long start, unsigned long end)
 {
 	if (!keep_initrd) {
+		if (start == initrd_start)
+			start = round_down(start, PAGE_SIZE);
+		if (end == initrd_end)
+			end = round_up(end, PAGE_SIZE);
+
 		poison_init_mem((void *)start, PAGE_ALIGN(end) - start);
 		free_reserved_area((void *)start, (void *)end, -1, "initrd");
 	}
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index 8348ed6..9f98cec 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -223,13 +223,13 @@
 
 static int __init early_cachepolicy(char *p)
 {
-	pr_warning("cachepolicy kernel parameter not supported without cp15\n");
+	pr_warn("cachepolicy kernel parameter not supported without cp15\n");
 }
 early_param("cachepolicy", early_cachepolicy);
 
 static int __init noalign_setup(char *__unused)
 {
-	pr_warning("noalign kernel parameter not supported without cp15\n");
+	pr_warn("noalign kernel parameter not supported without cp15\n");
 }
 __setup("noalign", noalign_setup);
 
diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index b5d67db..b3a9478 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -570,7 +570,7 @@
 __v7_b15mp_proc_info:
 	.long	0x420f00f0
 	.long	0xff0ffff0
-	__v7_proc __v7_b15mp_setup, hwcaps = HWCAP_IDIV
+	__v7_proc __v7_b15mp_setup
 	.size	__v7_b15mp_proc_info, . - __v7_b15mp_proc_info
 
 	/*
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 97f0c04..edf8715 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -97,9 +97,9 @@
 
 	PERCPU_SECTION(64)
 
+	. = ALIGN(PAGE_SIZE);
 	__init_end = .;
 
-	. = ALIGN(PAGE_SIZE);
 	_data = .;
 	_sdata = .;
 	RW_DATA_SECTION(64, PAGE_SIZE, THREAD_SIZE)
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index a83061f..adf5e07 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -333,8 +333,14 @@
 
 void free_initrd_mem(unsigned long start, unsigned long end)
 {
-	if (!keep_initrd)
+	if (!keep_initrd) {
+		if (start == initrd_start)
+			start = round_down(start, PAGE_SIZE);
+		if (end == initrd_end)
+			end = round_up(end, PAGE_SIZE);
+
 		free_reserved_area((void *)start, (void *)end, 0, "initrd");
+	}
 }
 
 static int __init keepinitrd_setup(char *__unused)
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 00b5906..77daef0 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -543,7 +543,7 @@
 
 config CRYPTO_SHA1_ARM_NEON
 	tristate "SHA1 digest algorithm (ARM NEON)"
-	depends on ARM && KERNEL_MODE_NEON && !CPU_BIG_ENDIAN
+	depends on ARM && KERNEL_MODE_NEON
 	select CRYPTO_SHA1_ARM
 	select CRYPTO_SHA1
 	select CRYPTO_HASH
@@ -603,7 +603,7 @@
 
 config CRYPTO_SHA512_ARM_NEON
 	tristate "SHA384 and SHA512 digest algorithm (ARM NEON)"
-	depends on ARM && KERNEL_MODE_NEON && !CPU_BIG_ENDIAN
+	depends on ARM && KERNEL_MODE_NEON
 	select CRYPTO_SHA512
 	select CRYPTO_HASH
 	help
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 5ba0360..aa70cbd 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -40,6 +40,8 @@
  * }
  *
  * [__init_begin, __init_end] is the init section that may be freed after init
+ * 	// __init_begin and __init_end should be page aligned, so that we can
+ *	// free the whole .init memory
  * [_stext, _etext] is the text section
  * [_sdata, _edata] is the data section
  *
diff --git a/kernel/gcov/Kconfig b/kernel/gcov/Kconfig
index d04ce8a..cf66c5c 100644
--- a/kernel/gcov/Kconfig
+++ b/kernel/gcov/Kconfig
@@ -35,7 +35,7 @@
 config GCOV_PROFILE_ALL
 	bool "Profile entire Kernel"
 	depends on GCOV_KERNEL
-	depends on SUPERH || S390 || X86 || PPC || MICROBLAZE
+	depends on SUPERH || S390 || X86 || PPC || MICROBLAZE || ARM
 	default n
 	---help---
 	This options activates profiling for the entire kernel.