Merge branches 'amba', 'fixes', 'misc' and 'tauros2' into for-next
diff --git a/Documentation/DMA-attributes.txt b/Documentation/DMA-attributes.txt
index 18dc52c..e8cf9cf 100644
--- a/Documentation/DMA-attributes.txt
+++ b/Documentation/DMA-attributes.txt
@@ -100,3 +100,29 @@
 be mapped as contiguous chunk into device dma address space. By
 specifying this attribute the allocated buffer is forced to be contiguous
 also in physical memory.
+
+DMA_ATTR_ALLOC_SINGLE_PAGES
+---------------------------
+
+This is a hint to the DMA-mapping subsystem that it's probably not worth
+the time to try to allocate memory to in a way that gives better TLB
+efficiency (AKA it's not worth trying to build the mapping out of larger
+pages).  You might want to specify this if:
+- You know that the accesses to this memory won't thrash the TLB.
+  You might know that the accesses are likely to be sequential or
+  that they aren't sequential but it's unlikely you'll ping-pong
+  between many addresses that are likely to be in different physical
+  pages.
+- You know that the penalty of TLB misses while accessing the
+  memory will be small enough to be inconsequential.  If you are
+  doing a heavy operation like decryption or decompression this
+  might be the case.
+- You know that the DMA mapping is fairly transitory.  If you expect
+  the mapping to have a short lifetime then it may be worth it to
+  optimize allocation (avoid coming up with large pages) instead of
+  getting the slight performance win of larger pages.
+Setting this hint doesn't guarantee that you won't get huge pages, but it
+means that we won't try quite as hard to get them.
+
+NOTE: At the moment DMA_ATTR_ALLOC_SINGLE_PAGES is only implemented on ARM,
+though ARM64 patches will likely be posted soon.
diff --git a/Documentation/driver-model/porting.txt b/Documentation/driver-model/porting.txt
index 92d86f7..453053f 100644
--- a/Documentation/driver-model/porting.txt
+++ b/Documentation/driver-model/porting.txt
@@ -340,8 +340,10 @@
 
   int (*match)(struct device * dev, struct device_driver * drv);
 
-match should return '1' if the driver supports the device, and '0'
-otherwise. 
+match should return positive value if the driver supports the device,
+and zero otherwise. It may also return error code (for example
+-EPROBE_DEFER) if determining that given driver supports the device is
+not possible.
 
 When a device is registered, the bus's list of drivers is iterated
 over. bus->match() is called for each one until a match is found. 
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 4f799e5..d67f1aa 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -572,7 +572,6 @@
 	select NEED_MACH_IO_H
 	select NEED_MACH_MEMORY_H
 	select NO_IOPORT_MAP
-	select VIRT_TO_BUS
 	help
 	  On the Acorn Risc-PC, Linux can support the internal IDE disk and
 	  CD-ROM interface, serial and parallel port, and the floppy drive.
@@ -1337,7 +1336,6 @@
 config BL_SWITCHER
 	bool "big.LITTLE switcher support"
 	depends on BIG_LITTLE && MCPM && HOTPLUG_CPU && ARM_GIC
-	select ARM_CPU_SUSPEND
 	select CPU_PM
 	help
 	  The big.LITTLE "switcher" provides the core functionality to
@@ -2111,7 +2109,8 @@
 	def_bool y
 
 config ARM_CPU_SUSPEND
-	def_bool PM_SLEEP
+	def_bool PM_SLEEP || BL_SWITCHER || ARM_PSCI_FW
+	depends on ARCH_SUSPEND_POSSIBLE
 
 config ARCH_HIBERNATION_POSSIBLE
 	bool
diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index fe25410..46fb1ca 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -352,7 +352,6 @@
 
 # My testing targets (bypasses dependencies)
 bp:;	$(Q)$(MAKE) $(build)=$(boot) MACHINE=$(MACHINE) $(boot)/bootpImage
-i zi:;	$(Q)$(MAKE) $(build)=$(boot) MACHINE=$(MACHINE) $@
 
 
 define archhelp
diff --git a/arch/arm/boot/Makefile b/arch/arm/boot/Makefile
index 9eca7ae..48fab15 100644
--- a/arch/arm/boot/Makefile
+++ b/arch/arm/boot/Makefile
@@ -88,7 +88,7 @@
 	$(call if_changed,objcopy)
 	@$(kecho) '  Kernel: $@ is ready'
 
-PHONY += initrd FORCE
+PHONY += initrd
 initrd:
 	@test "$(INITRD_PHYS)" != "" || \
 	(echo This machine does not support INITRD; exit -1)
@@ -107,12 +107,4 @@
 	$(CONFIG_SHELL) $(srctree)/$(src)/install.sh "$(KERNELRELEASE)" \
 	$(obj)/uImage System.map "$(INSTALL_PATH)"
 
-zi:
-	$(CONFIG_SHELL) $(srctree)/$(src)/install.sh "$(KERNELRELEASE)" \
-	$(obj)/zImage System.map "$(INSTALL_PATH)"
-
-i:
-	$(CONFIG_SHELL) $(srctree)/$(src)/install.sh "$(KERNELRELEASE)" \
-	$(obj)/Image System.map "$(INSTALL_PATH)"
-
 subdir-	    := bootp compressed dts
diff --git a/arch/arm/boot/compressed/.gitignore b/arch/arm/boot/compressed/.gitignore
index 0714e03..86b2f5d 100644
--- a/arch/arm/boot/compressed/.gitignore
+++ b/arch/arm/boot/compressed/.gitignore
@@ -3,11 +3,7 @@
 font.c
 lib1funcs.S
 hyp-stub.S
-piggy.gzip
-piggy.lzo
-piggy.lzma
-piggy.xzkern
-piggy.lz4
+piggy_data
 vmlinux
 vmlinux.lds
 
diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile
index 7a6a58e..d50430c 100644
--- a/arch/arm/boot/compressed/Makefile
+++ b/arch/arm/boot/compressed/Makefile
@@ -66,11 +66,11 @@
 
 CPPFLAGS_vmlinux.lds := -DTEXT_START="$(ZTEXTADDR)" -DBSS_START="$(ZBSSADDR)"
 
-suffix_$(CONFIG_KERNEL_GZIP) = gzip
-suffix_$(CONFIG_KERNEL_LZO)  = lzo
-suffix_$(CONFIG_KERNEL_LZMA) = lzma
-suffix_$(CONFIG_KERNEL_XZ)   = xzkern
-suffix_$(CONFIG_KERNEL_LZ4)  = lz4
+compress-$(CONFIG_KERNEL_GZIP) = gzip
+compress-$(CONFIG_KERNEL_LZO)  = lzo
+compress-$(CONFIG_KERNEL_LZMA) = lzma
+compress-$(CONFIG_KERNEL_XZ)   = xzkern
+compress-$(CONFIG_KERNEL_LZ4)  = lz4
 
 # Borrowed libfdt files for the ATAG compatibility mode
 
@@ -89,15 +89,12 @@
 OBJS	+= $(libfdt_objs) atags_to_fdt.o
 endif
 
-targets       := vmlinux vmlinux.lds \
-		 piggy.$(suffix_y) piggy.$(suffix_y).o \
-		 lib1funcs.o lib1funcs.S ashldi3.o ashldi3.S bswapsdi2.o \
-		 bswapsdi2.S font.o font.c head.o misc.o $(OBJS)
+targets       := vmlinux vmlinux.lds piggy_data piggy.o \
+		 lib1funcs.o ashldi3.o bswapsdi2.o \
+		 head.o $(OBJS)
 
-# Make sure files are removed during clean
-extra-y       += piggy.gzip piggy.lzo piggy.lzma piggy.xzkern piggy.lz4 \
-		 lib1funcs.S ashldi3.S bswapsdi2.S $(libfdt) $(libfdt_hdrs) \
-		 hyp-stub.S
+clean-files += piggy_data lib1funcs.S ashldi3.S bswapsdi2.S \
+		$(libfdt) $(libfdt_hdrs) hyp-stub.S
 
 KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING
 
@@ -178,22 +175,24 @@
 
 efi-obj-$(CONFIG_EFI_STUB) := $(objtree)/drivers/firmware/efi/libstub/lib.a
 
-$(obj)/vmlinux: $(obj)/vmlinux.lds $(obj)/$(HEAD) $(obj)/piggy.$(suffix_y).o \
+$(obj)/vmlinux: $(obj)/vmlinux.lds $(obj)/$(HEAD) $(obj)/piggy.o \
 		$(addprefix $(obj)/, $(OBJS)) $(lib1funcs) $(ashldi3) \
 		$(bswapsdi2) $(efi-obj-y) FORCE
 	@$(check_for_multiple_zreladdr)
 	$(call if_changed,ld)
 	@$(check_for_bad_syms)
 
-$(obj)/piggy.$(suffix_y): $(obj)/../Image FORCE
-	$(call if_changed,$(suffix_y))
+$(obj)/piggy_data: $(obj)/../Image FORCE
+	$(call if_changed,$(compress-y))
 
-$(obj)/piggy.$(suffix_y).o:  $(obj)/piggy.$(suffix_y) FORCE
+$(obj)/piggy.o: $(obj)/piggy_data
 
 CFLAGS_font.o := -Dstatic=
 
 $(obj)/font.c: $(FONTC)
 	$(call cmd,shipped)
 
+AFLAGS_hyp-stub.o := -Wa,-march=armv7-a
+
 $(obj)/hyp-stub.S: $(srctree)/arch/$(SRCARCH)/kernel/hyp-stub.S
 	$(call cmd,shipped)
diff --git a/arch/arm/boot/compressed/piggy.lzo.S b/arch/arm/boot/compressed/piggy.S
similarity index 67%
rename from arch/arm/boot/compressed/piggy.lzo.S
rename to arch/arm/boot/compressed/piggy.S
index a425ad9..f720884 100644
--- a/arch/arm/boot/compressed/piggy.lzo.S
+++ b/arch/arm/boot/compressed/piggy.S
@@ -1,6 +1,6 @@
 	.section .piggydata,#alloc
 	.globl	input_data
 input_data:
-	.incbin	"arch/arm/boot/compressed/piggy.lzo"
+	.incbin	"arch/arm/boot/compressed/piggy_data"
 	.globl	input_data_end
 input_data_end:
diff --git a/arch/arm/boot/compressed/piggy.gzip.S b/arch/arm/boot/compressed/piggy.gzip.S
deleted file mode 100644
index a68adf9..0000000
--- a/arch/arm/boot/compressed/piggy.gzip.S
+++ /dev/null
@@ -1,6 +0,0 @@
-	.section .piggydata,#alloc
-	.globl	input_data
-input_data:
-	.incbin	"arch/arm/boot/compressed/piggy.gzip"
-	.globl	input_data_end
-input_data_end:
diff --git a/arch/arm/boot/compressed/piggy.lz4.S b/arch/arm/boot/compressed/piggy.lz4.S
deleted file mode 100644
index 3d9a575..0000000
--- a/arch/arm/boot/compressed/piggy.lz4.S
+++ /dev/null
@@ -1,6 +0,0 @@
-	.section .piggydata,#alloc
-	.globl	input_data
-input_data:
-	.incbin	"arch/arm/boot/compressed/piggy.lz4"
-	.globl	input_data_end
-input_data_end:
diff --git a/arch/arm/boot/compressed/piggy.lzma.S b/arch/arm/boot/compressed/piggy.lzma.S
deleted file mode 100644
index d7e69cf..0000000
--- a/arch/arm/boot/compressed/piggy.lzma.S
+++ /dev/null
@@ -1,6 +0,0 @@
-	.section .piggydata,#alloc
-	.globl	input_data
-input_data:
-	.incbin	"arch/arm/boot/compressed/piggy.lzma"
-	.globl	input_data_end
-input_data_end:
diff --git a/arch/arm/boot/compressed/piggy.xzkern.S b/arch/arm/boot/compressed/piggy.xzkern.S
deleted file mode 100644
index 5703f30..0000000
--- a/arch/arm/boot/compressed/piggy.xzkern.S
+++ /dev/null
@@ -1,6 +0,0 @@
-	.section .piggydata,#alloc
-	.globl	input_data
-input_data:
-	.incbin	"arch/arm/boot/compressed/piggy.xzkern"
-	.globl	input_data_end
-input_data_end:
diff --git a/arch/arm/common/sa1111.c b/arch/arm/common/sa1111.c
index 3d22494..fb0a0a4 100644
--- a/arch/arm/common/sa1111.c
+++ b/arch/arm/common/sa1111.c
@@ -1290,7 +1290,7 @@
 	struct sa1111_dev *dev = SA1111_DEV(_dev);
 	struct sa1111_driver *drv = SA1111_DRV(_drv);
 
-	return dev->devid & drv->devid;
+	return !!(dev->devid & drv->devid);
 }
 
 static int sa1111_bus_suspend(struct device *dev, pm_message_t state)
diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild
index 16da638..3f6616b 100644
--- a/arch/arm/include/asm/Kbuild
+++ b/arch/arm/include/asm/Kbuild
@@ -23,7 +23,6 @@
 generic-y += resource.h
 generic-y += rwsem.h
 generic-y += seccomp.h
-generic-y += sections.h
 generic-y += segment.h
 generic-y += sembuf.h
 generic-y += serial.h
diff --git a/arch/arm/include/asm/div64.h b/arch/arm/include/asm/div64.h
index e1f0776..7d919a9 100644
--- a/arch/arm/include/asm/div64.h
+++ b/arch/arm/include/asm/div64.h
@@ -74,7 +74,7 @@
 static inline uint64_t __arch_xprod_64(uint64_t m, uint64_t n, bool bias)
 {
 	unsigned long long res;
-	unsigned int tmp = 0;
+	register unsigned int tmp asm("ip") = 0;
 
 	if (!bias) {
 		asm (	"umull	%Q0, %R0, %Q1, %Q2\n\t"
@@ -90,12 +90,12 @@
 			: "r" (m), "r" (n)
 			: "cc");
 	} else {
-		asm (	"umull	%Q0, %R0, %Q1, %Q2\n\t"
-			"cmn	%Q0, %Q1\n\t"
-			"adcs	%R0, %R0, %R1\n\t"
-			"adc	%Q0, %3, #0"
-			: "=&r" (res)
-			: "r" (m), "r" (n), "r" (tmp)
+		asm (	"umull	%Q0, %R0, %Q2, %Q3\n\t"
+			"cmn	%Q0, %Q2\n\t"
+			"adcs	%R0, %R0, %R2\n\t"
+			"adc	%Q0, %1, #0"
+			: "=&r" (res), "+&r" (tmp)
+			: "r" (m), "r" (n)
 			: "cc");
 	}
 
diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h
index c79b57b..9427fd6 100644
--- a/arch/arm/include/asm/memory.h
+++ b/arch/arm/include/asm/memory.h
@@ -134,6 +134,21 @@
  */
 #define PLAT_PHYS_OFFSET	UL(CONFIG_PHYS_OFFSET)
 
+#ifdef CONFIG_XIP_KERNEL
+/*
+ * When referencing data in RAM from the XIP region in a relative manner
+ * with the MMU off, we need the relative offset between the two physical
+ * addresses.  The macro below achieves this, which is:
+ *    __pa(v_data) - __xip_pa(v_text)
+ */
+#define PHYS_RELATIVE(v_data, v_text) \
+	(((v_data) - PAGE_OFFSET + PLAT_PHYS_OFFSET) - \
+	 ((v_text) - XIP_VIRT_ADDR(CONFIG_XIP_PHYS_ADDR) + \
+          CONFIG_XIP_PHYS_ADDR))
+#else
+#define PHYS_RELATIVE(v_data, v_text) ((v_data) - (v_text))
+#endif
+
 #ifndef __ASSEMBLY__
 
 /*
@@ -273,14 +288,14 @@
 #define __va(x)			((void *)__phys_to_virt((phys_addr_t)(x)))
 #define pfn_to_kaddr(pfn)	__va((phys_addr_t)(pfn) << PAGE_SHIFT)
 
-extern phys_addr_t (*arch_virt_to_idmap)(unsigned long x);
+extern unsigned long (*arch_virt_to_idmap)(unsigned long x);
 
 /*
  * These are for systems that have a hardware interconnect supported alias of
  * physical memory for idmap purposes.  Most cases should leave these
- * untouched.
+ * untouched.  Note: this can only return addresses less than 4GiB.
  */
-static inline phys_addr_t __virt_to_idmap(unsigned long x)
+static inline unsigned long __virt_to_idmap(unsigned long x)
 {
 	if (IS_ENABLED(CONFIG_MMU) && arch_virt_to_idmap)
 		return arch_virt_to_idmap(x);
@@ -303,20 +318,6 @@
 #define __bus_to_pfn(x)	__phys_to_pfn(x)
 #endif
 
-#ifdef CONFIG_VIRT_TO_BUS
-#define virt_to_bus virt_to_bus
-static inline __deprecated unsigned long virt_to_bus(void *x)
-{
-	return __virt_to_bus((unsigned long)x);
-}
-
-#define bus_to_virt bus_to_virt
-static inline __deprecated void *bus_to_virt(unsigned long x)
-{
-	return (void *)__bus_to_virt(x);
-}
-#endif
-
 /*
  * Conversion between a struct page and a physical address.
  *
diff --git a/arch/arm/include/asm/mmu_context.h b/arch/arm/include/asm/mmu_context.h
index 432ce81..fa5b42d 100644
--- a/arch/arm/include/asm/mmu_context.h
+++ b/arch/arm/include/asm/mmu_context.h
@@ -26,7 +26,12 @@
 #ifdef CONFIG_CPU_HAS_ASID
 
 void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk);
-#define init_new_context(tsk,mm)	({ atomic64_set(&(mm)->context.id, 0); 0; })
+static inline int
+init_new_context(struct task_struct *tsk, struct mm_struct *mm)
+{
+	atomic64_set(&mm->context.id, 0);
+	return 0;
+}
 
 #ifdef CONFIG_ARM_ERRATA_798181
 void a15_erratum_get_cpumask(int this_cpu, struct mm_struct *mm,
@@ -85,7 +90,12 @@
 
 #endif	/* CONFIG_MMU */
 
-#define init_new_context(tsk,mm)	0
+static inline int
+init_new_context(struct task_struct *tsk, struct mm_struct *mm)
+{
+	return 0;
+}
+
 
 #endif	/* CONFIG_CPU_HAS_ASID */
 
diff --git a/arch/arm/include/asm/sections.h b/arch/arm/include/asm/sections.h
new file mode 100644
index 0000000..803bbf2
--- /dev/null
+++ b/arch/arm/include/asm/sections.h
@@ -0,0 +1,8 @@
+#ifndef _ASM_ARM_SECTIONS_H
+#define _ASM_ARM_SECTIONS_H
+
+#include <asm-generic/sections.h>
+
+extern char _exiprom[];
+
+#endif	/* _ASM_ARM_SECTIONS_H */
diff --git a/arch/arm/include/asm/sparsemem.h b/arch/arm/include/asm/sparsemem.h
index 0009861..73e5e85 100644
--- a/arch/arm/include/asm/sparsemem.h
+++ b/arch/arm/include/asm/sparsemem.h
@@ -15,10 +15,11 @@
  * Eg, if you have 2 banks of up to 64MB at 0x80000000, 0x84000000,
  * then MAX_PHYSMEM_BITS is 32, SECTION_SIZE_BITS is 26.
  *
- * Define these in your mach/memory.h.
+ * These can be overridden in your mach/memory.h.
  */
-#if !defined(SECTION_SIZE_BITS) || !defined(MAX_PHYSMEM_BITS)
-#error Sparsemem is not supported on this platform
+#if !defined(MAX_PHYSMEM_BITS) || !defined(SECTION_SIZE_BITS)
+#define MAX_PHYSMEM_BITS	36
+#define SECTION_SIZE_BITS	28
 #endif
 
 #endif
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index 2c5f160..ad325a8 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -88,6 +88,7 @@
 obj-$(CONFIG_EARLY_PRINTK)	+= early_printk.o
 
 obj-$(CONFIG_ARM_VIRT_EXT)	+= hyp-stub.o
+AFLAGS_hyp-stub.o		:=-Wa,-march=armv7-a
 ifeq ($(CONFIG_ARM_PSCI),y)
 obj-$(CONFIG_SMP)		+= psci_smp.o
 endif
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 3ce377f..e255050 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -1064,7 +1064,6 @@
 	.endm
 
 	.section .stubs, "ax", %progbits
-__stubs_start:
 	@ This must be the first word
 	.word	vector_swi
 
@@ -1202,14 +1201,13 @@
 	.long	__fiq_svc			@  e
 	.long	__fiq_svc			@  f
 
-	.globl	vector_fiq_offset
-	.equ	vector_fiq_offset, vector_fiq
+	.globl	vector_fiq
 
 	.section .vectors, "ax", %progbits
-__vectors_start:
+.L__vectors_start:
 	W(b)	vector_rst
 	W(b)	vector_und
-	W(ldr)	pc, __vectors_start + 0x1000
+	W(ldr)	pc, .L__vectors_start + 0x1000
 	W(b)	vector_pabt
 	W(b)	vector_dabt
 	W(b)	vector_addrexcptn
diff --git a/arch/arm/kernel/hibernate.c b/arch/arm/kernel/hibernate.c
index a71501f..b09561a 100644
--- a/arch/arm/kernel/hibernate.c
+++ b/arch/arm/kernel/hibernate.c
@@ -62,7 +62,7 @@
 
 	ret = swsusp_save();
 	if (ret == 0)
-		_soft_restart(virt_to_phys(cpu_resume), false);
+		_soft_restart(virt_to_idmap(cpu_resume), false);
 	return ret;
 }
 
@@ -87,7 +87,7 @@
 	for (pbe = restore_pblist; pbe; pbe = pbe->next)
 		copy_page(pbe->orig_address, pbe->address);
 
-	_soft_restart(virt_to_phys(cpu_resume), false);
+	_soft_restart(virt_to_idmap(cpu_resume), false);
 }
 
 static u64 resume_stack[PAGE_SIZE/2/sizeof(u64)] __nosavedata;
diff --git a/arch/arm/kernel/hyp-stub.S b/arch/arm/kernel/hyp-stub.S
index 2a55373..0b1e4a9 100644
--- a/arch/arm/kernel/hyp-stub.S
+++ b/arch/arm/kernel/hyp-stub.S
@@ -17,6 +17,7 @@
  */
 
 #include <linux/init.h>
+#include <linux/irqchip/arm-gic-v3.h>
 #include <linux/linkage.h>
 #include <asm/assembler.h>
 #include <asm/virt.h>
@@ -161,6 +162,29 @@
 1:
 #endif
 
+#ifdef CONFIG_ARM_GIC_V3
+	@ Check whether GICv3 system registers are available
+	mrc	p15, 0, r7, c0, c1, 1	@ ID_PFR1
+	ubfx	r7, r7, #28, #4
+	cmp	r7, #1
+	bne	2f
+
+	@ Enable system register accesses
+	mrc	p15, 4, r7, c12, c9, 5	@ ICC_HSRE
+	orr	r7, r7, #(ICC_SRE_EL2_ENABLE | ICC_SRE_EL2_SRE)
+	mcr	p15, 4, r7, c12, c9, 5	@ ICC_HSRE
+	isb
+
+	@ SRE bit could be forced to 0 by firmware.
+	@ Check whether it sticks before accessing any other sysreg
+	mrc	p15, 4, r7, c12, c9, 5	@ ICC_HSRE
+	tst	r7, #ICC_SRE_EL2_SRE
+	beq	2f
+	mov	r7, #0
+	mcr	p15, 4, r7, c12, c11, 0	@ ICH_HCR
+2:
+#endif
+
 	bx	lr			@ The boot CPU mode is left in r4.
 ENDPROC(__hyp_stub_install_secondary)
 
diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c
index 1d45320..ece04a4 100644
--- a/arch/arm/kernel/irq.c
+++ b/arch/arm/kernel/irq.c
@@ -95,7 +95,7 @@
 			outer_cache.write_sec = machine_desc->l2c_write_sec;
 		ret = l2x0_of_init(machine_desc->l2c_aux_val,
 				   machine_desc->l2c_aux_mask);
-		if (ret)
+		if (ret && ret != -ENODEV)
 			pr_err("L2C: failed to init: %d\n", ret);
 	}
 
diff --git a/arch/arm/kernel/machine_kexec.c b/arch/arm/kernel/machine_kexec.c
index 8bf3b7c..59fd0e2 100644
--- a/arch/arm/kernel/machine_kexec.c
+++ b/arch/arm/kernel/machine_kexec.c
@@ -143,10 +143,8 @@
 
 void machine_kexec(struct kimage *image)
 {
-	unsigned long page_list;
-	unsigned long reboot_code_buffer_phys;
-	unsigned long reboot_entry = (unsigned long)relocate_new_kernel;
-	unsigned long reboot_entry_phys;
+	unsigned long page_list, reboot_entry_phys;
+	void (*reboot_entry)(void);
 	void *reboot_code_buffer;
 
 	/*
@@ -159,9 +157,6 @@
 
 	page_list = image->head & PAGE_MASK;
 
-	/* we need both effective and real address here */
-	reboot_code_buffer_phys =
-	    page_to_pfn(image->control_code_page) << PAGE_SHIFT;
 	reboot_code_buffer = page_address(image->control_code_page);
 
 	/* Prepare parameters for reboot_code_buffer*/
@@ -174,10 +169,11 @@
 
 	/* copy our kernel relocation code to the control code page */
 	reboot_entry = fncpy(reboot_code_buffer,
-			     reboot_entry,
+			     &relocate_new_kernel,
 			     relocate_new_kernel_size);
-	reboot_entry_phys = (unsigned long)reboot_entry +
-		(reboot_code_buffer_phys - (unsigned long)reboot_code_buffer);
+
+	/* get the identity mapping physical address for the reboot code */
+	reboot_entry_phys = virt_to_idmap(reboot_entry);
 
 	pr_info("Bye!\n");
 
diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c
index efdddcb..4f14b5c 100644
--- a/arch/arm/kernel/module.c
+++ b/arch/arm/kernel/module.c
@@ -34,7 +34,7 @@
  * recompiling the whole kernel when CONFIG_XIP_KERNEL is turned on/off.
  */
 #undef MODULES_VADDR
-#define MODULES_VADDR	(((unsigned long)_etext + ~PMD_MASK) & PMD_MASK)
+#define MODULES_VADDR	(((unsigned long)_exiprom + ~PMD_MASK) & PMD_MASK)
 #endif
 
 #ifdef CONFIG_MMU
diff --git a/arch/arm/kernel/reboot.c b/arch/arm/kernel/reboot.c
index 3826935..71a2ff9 100644
--- a/arch/arm/kernel/reboot.c
+++ b/arch/arm/kernel/reboot.c
@@ -50,7 +50,7 @@
 	flush_cache_all();
 
 	/* Switch to the identity mapping. */
-	phys_reset = (phys_reset_t)(unsigned long)virt_to_idmap(cpu_reset);
+	phys_reset = (phys_reset_t)virt_to_idmap(cpu_reset);
 	phys_reset((unsigned long)addr);
 
 	/* Should never get here. */
diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c
index 08b7847..ec279d1 100644
--- a/arch/arm/kernel/topology.c
+++ b/arch/arm/kernel/topology.c
@@ -40,7 +40,7 @@
  * to run the rebalance_domains for all idle cores and the cpu_capacity can be
  * updated during this sequence.
  */
-static DEFINE_PER_CPU(unsigned long, cpu_scale);
+static DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE;
 
 unsigned long arch_scale_cpu_capacity(struct sched_domain *sd, int cpu)
 {
@@ -306,8 +306,6 @@
 		cpu_topo->socket_id = -1;
 		cpumask_clear(&cpu_topo->core_sibling);
 		cpumask_clear(&cpu_topo->thread_sibling);
-
-		set_capacity_scale(cpu, SCHED_CAPACITY_SCALE);
 	}
 	smp_wmb();
 
diff --git a/arch/arm/kernel/vmlinux-xip.lds.S b/arch/arm/kernel/vmlinux-xip.lds.S
new file mode 100644
index 0000000..cba1ec8
--- /dev/null
+++ b/arch/arm/kernel/vmlinux-xip.lds.S
@@ -0,0 +1,316 @@
+/* ld script to make ARM Linux kernel
+ * taken from the i386 version by Russell King
+ * Written by Martin Mares <mj@atrey.karlin.mff.cuni.cz>
+ */
+
+#include <asm-generic/vmlinux.lds.h>
+#include <asm/cache.h>
+#include <asm/thread_info.h>
+#include <asm/memory.h>
+#include <asm/page.h>
+
+#define PROC_INFO							\
+	. = ALIGN(4);							\
+	VMLINUX_SYMBOL(__proc_info_begin) = .;				\
+	*(.proc.info.init)						\
+	VMLINUX_SYMBOL(__proc_info_end) = .;
+
+#define IDMAP_TEXT							\
+	ALIGN_FUNCTION();						\
+	VMLINUX_SYMBOL(__idmap_text_start) = .;				\
+	*(.idmap.text)							\
+	VMLINUX_SYMBOL(__idmap_text_end) = .;				\
+	. = ALIGN(PAGE_SIZE);						\
+	VMLINUX_SYMBOL(__hyp_idmap_text_start) = .;			\
+	*(.hyp.idmap.text)						\
+	VMLINUX_SYMBOL(__hyp_idmap_text_end) = .;
+
+#ifdef CONFIG_HOTPLUG_CPU
+#define ARM_CPU_DISCARD(x)
+#define ARM_CPU_KEEP(x)		x
+#else
+#define ARM_CPU_DISCARD(x)	x
+#define ARM_CPU_KEEP(x)
+#endif
+
+#if (defined(CONFIG_SMP_ON_UP) && !defined(CONFIG_DEBUG_SPINLOCK)) || \
+	defined(CONFIG_GENERIC_BUG)
+#define ARM_EXIT_KEEP(x)	x
+#define ARM_EXIT_DISCARD(x)
+#else
+#define ARM_EXIT_KEEP(x)
+#define ARM_EXIT_DISCARD(x)	x
+#endif
+
+OUTPUT_ARCH(arm)
+ENTRY(stext)
+
+#ifndef __ARMEB__
+jiffies = jiffies_64;
+#else
+jiffies = jiffies_64 + 4;
+#endif
+
+SECTIONS
+{
+	/*
+	 * XXX: The linker does not define how output sections are
+	 * assigned to input sections when there are multiple statements
+	 * matching the same input section name.  There is no documented
+	 * order of matching.
+	 *
+	 * unwind exit sections must be discarded before the rest of the
+	 * unwind sections get included.
+	 */
+	/DISCARD/ : {
+		*(.ARM.exidx.exit.text)
+		*(.ARM.extab.exit.text)
+		ARM_CPU_DISCARD(*(.ARM.exidx.cpuexit.text))
+		ARM_CPU_DISCARD(*(.ARM.extab.cpuexit.text))
+		ARM_EXIT_DISCARD(EXIT_TEXT)
+		ARM_EXIT_DISCARD(EXIT_DATA)
+		EXIT_CALL
+#ifndef CONFIG_MMU
+		*(.text.fixup)
+		*(__ex_table)
+#endif
+#ifndef CONFIG_SMP_ON_UP
+		*(.alt.smp.init)
+#endif
+		*(.discard)
+		*(.discard.*)
+	}
+
+	. = XIP_VIRT_ADDR(CONFIG_XIP_PHYS_ADDR);
+	_xiprom = .;			/* XIP ROM area to be mapped */
+
+	.head.text : {
+		_text = .;
+		HEAD_TEXT
+	}
+
+	.text : {			/* Real text segment		*/
+		_stext = .;		/* Text and read-only data	*/
+			IDMAP_TEXT
+			__exception_text_start = .;
+			*(.exception.text)
+			__exception_text_end = .;
+			IRQENTRY_TEXT
+			TEXT_TEXT
+			SCHED_TEXT
+			LOCK_TEXT
+			KPROBES_TEXT
+			*(.gnu.warning)
+			*(.glue_7)
+			*(.glue_7t)
+		. = ALIGN(4);
+		*(.got)			/* Global offset table		*/
+			ARM_CPU_KEEP(PROC_INFO)
+	}
+
+	RO_DATA(PAGE_SIZE)
+
+	. = ALIGN(4);
+	__ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) {
+		__start___ex_table = .;
+#ifdef CONFIG_MMU
+		*(__ex_table)
+#endif
+		__stop___ex_table = .;
+	}
+
+#ifdef CONFIG_ARM_UNWIND
+	/*
+	 * Stack unwinding tables
+	 */
+	. = ALIGN(8);
+	.ARM.unwind_idx : {
+		__start_unwind_idx = .;
+		*(.ARM.exidx*)
+		__stop_unwind_idx = .;
+	}
+	.ARM.unwind_tab : {
+		__start_unwind_tab = .;
+		*(.ARM.extab*)
+		__stop_unwind_tab = .;
+	}
+#endif
+
+	NOTES
+
+	_etext = .;			/* End of text and rodata section */
+
+	/*
+	 * The vectors and stubs are relocatable code, and the
+	 * only thing that matters is their relative offsets
+	 */
+	__vectors_start = .;
+	.vectors 0xffff0000 : AT(__vectors_start) {
+		*(.vectors)
+	}
+	. = __vectors_start + SIZEOF(.vectors);
+	__vectors_end = .;
+
+	__stubs_start = .;
+	.stubs ADDR(.vectors) + 0x1000 : AT(__stubs_start) {
+		*(.stubs)
+	}
+	. = __stubs_start + SIZEOF(.stubs);
+	__stubs_end = .;
+
+	PROVIDE(vector_fiq_offset = vector_fiq - ADDR(.vectors));
+
+	INIT_TEXT_SECTION(8)
+	.exit.text : {
+		ARM_EXIT_KEEP(EXIT_TEXT)
+	}
+	.init.proc.info : {
+		ARM_CPU_DISCARD(PROC_INFO)
+	}
+	.init.arch.info : {
+		__arch_info_begin = .;
+		*(.arch.info.init)
+		__arch_info_end = .;
+	}
+	.init.tagtable : {
+		__tagtable_begin = .;
+		*(.taglist.init)
+		__tagtable_end = .;
+	}
+#ifdef CONFIG_SMP_ON_UP
+	.init.smpalt : {
+		__smpalt_begin = .;
+		*(.alt.smp.init)
+		__smpalt_end = .;
+	}
+#endif
+	.init.pv_table : {
+		__pv_table_begin = .;
+		*(.pv_table)
+		__pv_table_end = .;
+	}
+	.init.data : {
+		INIT_SETUP(16)
+		INIT_CALLS
+		CON_INITCALL
+		SECURITY_INITCALL
+		INIT_RAM_FS
+	}
+
+#ifdef CONFIG_SMP
+	PERCPU_SECTION(L1_CACHE_BYTES)
+#endif
+
+	_exiprom = .;			/* End of XIP ROM area */
+	__data_loc = ALIGN(4);		/* location in binary */
+	. = PAGE_OFFSET + TEXT_OFFSET;
+
+	.data : AT(__data_loc) {
+		_data = .;		/* address in memory */
+		_sdata = .;
+
+		/*
+		 * first, the init task union, aligned
+		 * to an 8192 byte boundary.
+		 */
+		INIT_TASK_DATA(THREAD_SIZE)
+
+		. = ALIGN(PAGE_SIZE);
+		__init_begin = .;
+		INIT_DATA
+		ARM_EXIT_KEEP(EXIT_DATA)
+		. = ALIGN(PAGE_SIZE);
+		__init_end = .;
+
+		NOSAVE_DATA
+		CACHELINE_ALIGNED_DATA(L1_CACHE_BYTES)
+		READ_MOSTLY_DATA(L1_CACHE_BYTES)
+
+		/*
+		 * and the usual data section
+		 */
+		DATA_DATA
+		CONSTRUCTORS
+
+		_edata = .;
+	}
+	_edata_loc = __data_loc + SIZEOF(.data);
+
+#ifdef CONFIG_HAVE_TCM
+        /*
+	 * We align everything to a page boundary so we can
+	 * free it after init has commenced and TCM contents have
+	 * been copied to its destination.
+	 */
+	.tcm_start : {
+		. = ALIGN(PAGE_SIZE);
+		__tcm_start = .;
+		__itcm_start = .;
+	}
+
+	/*
+	 * Link these to the ITCM RAM
+	 * Put VMA to the TCM address and LMA to the common RAM
+	 * and we'll upload the contents from RAM to TCM and free
+	 * the used RAM after that.
+	 */
+	.text_itcm ITCM_OFFSET : AT(__itcm_start)
+	{
+		__sitcm_text = .;
+		*(.tcm.text)
+		*(.tcm.rodata)
+		. = ALIGN(4);
+		__eitcm_text = .;
+	}
+
+	/*
+	 * Reset the dot pointer, this is needed to create the
+	 * relative __dtcm_start below (to be used as extern in code).
+	 */
+	. = ADDR(.tcm_start) + SIZEOF(.tcm_start) + SIZEOF(.text_itcm);
+
+	.dtcm_start : {
+		__dtcm_start = .;
+	}
+
+	/* TODO: add remainder of ITCM as well, that can be used for data! */
+	.data_dtcm DTCM_OFFSET : AT(__dtcm_start)
+	{
+		. = ALIGN(4);
+		__sdtcm_data = .;
+		*(.tcm.data)
+		. = ALIGN(4);
+		__edtcm_data = .;
+	}
+
+	/* Reset the dot pointer or the linker gets confused */
+	. = ADDR(.dtcm_start) + SIZEOF(.data_dtcm);
+
+	/* End marker for freeing TCM copy in linked object */
+	.tcm_end : AT(ADDR(.dtcm_start) + SIZEOF(.data_dtcm)){
+		. = ALIGN(PAGE_SIZE);
+		__tcm_end = .;
+	}
+#endif
+
+	BSS_SECTION(0, 0, 0)
+	_end = .;
+
+	STABS_DEBUG
+}
+
+/*
+ * These must never be empty
+ * If you have to comment these two assert statements out, your
+ * binutils is too old (for other reasons as well)
+ */
+ASSERT((__proc_info_end - __proc_info_begin), "missing CPU support")
+ASSERT((__arch_info_end - __arch_info_begin), "no machine record defined")
+
+/*
+ * The HYP init code can't be more than a page long,
+ * and should not cross a page boundary.
+ * The above comment applies as well.
+ */
+ASSERT(__hyp_idmap_text_end - (__hyp_idmap_text_start & PAGE_MASK) <= PAGE_SIZE,
+	"HYP init code too big or misaligned")
diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S
index 8b60fde..96de892 100644
--- a/arch/arm/kernel/vmlinux.lds.S
+++ b/arch/arm/kernel/vmlinux.lds.S
@@ -3,14 +3,16 @@
  * Written by Martin Mares <mj@atrey.karlin.mff.cuni.cz>
  */
 
+#ifdef CONFIG_XIP_KERNEL
+#include "vmlinux-xip.lds.S"
+#else
+
 #include <asm-generic/vmlinux.lds.h>
 #include <asm/cache.h>
 #include <asm/thread_info.h>
 #include <asm/memory.h>
 #include <asm/page.h>
-#ifdef CONFIG_ARM_KERNMEM_PERMS
 #include <asm/pgtable.h>
-#endif
 
 #define PROC_INFO							\
 	. = ALIGN(4);							\
@@ -84,17 +86,13 @@
 		*(.discard.*)
 	}
 
-#ifdef CONFIG_XIP_KERNEL
-	. = XIP_VIRT_ADDR(CONFIG_XIP_PHYS_ADDR);
-#else
 	. = PAGE_OFFSET + TEXT_OFFSET;
-#endif
 	.head.text : {
 		_text = .;
 		HEAD_TEXT
 	}
 
-#ifdef CONFIG_ARM_KERNMEM_PERMS
+#ifdef CONFIG_DEBUG_RODATA
 	. = ALIGN(1<<SECTION_SHIFT);
 #endif
 
@@ -117,7 +115,7 @@
 			ARM_CPU_KEEP(PROC_INFO)
 	}
 
-#ifdef CONFIG_DEBUG_RODATA
+#ifdef CONFIG_DEBUG_ALIGN_RODATA
 	. = ALIGN(1<<SECTION_SHIFT);
 #endif
 	RO_DATA(PAGE_SIZE)
@@ -152,32 +150,33 @@
 
 	_etext = .;			/* End of text and rodata section */
 
-#ifndef CONFIG_XIP_KERNEL
-# ifdef CONFIG_ARM_KERNMEM_PERMS
+#ifdef CONFIG_DEBUG_RODATA
 	. = ALIGN(1<<SECTION_SHIFT);
-# else
+#else
 	. = ALIGN(PAGE_SIZE);
-# endif
-	__init_begin = .;
 #endif
+	__init_begin = .;
+
 	/*
 	 * The vectors and stubs are relocatable code, and the
 	 * only thing that matters is their relative offsets
 	 */
 	__vectors_start = .;
-	.vectors 0 : AT(__vectors_start) {
+	.vectors 0xffff0000 : AT(__vectors_start) {
 		*(.vectors)
 	}
 	. = __vectors_start + SIZEOF(.vectors);
 	__vectors_end = .;
 
 	__stubs_start = .;
-	.stubs 0x1000 : AT(__stubs_start) {
+	.stubs ADDR(.vectors) + 0x1000 : AT(__stubs_start) {
 		*(.stubs)
 	}
 	. = __stubs_start + SIZEOF(.stubs);
 	__stubs_end = .;
 
+	PROVIDE(vector_fiq_offset = vector_fiq - ADDR(.vectors));
+
 	INIT_TEXT_SECTION(8)
 	.exit.text : {
 		ARM_EXIT_KEEP(EXIT_TEXT)
@@ -208,37 +207,28 @@
 		__pv_table_end = .;
 	}
 	.init.data : {
-#ifndef CONFIG_XIP_KERNEL
 		INIT_DATA
-#endif
 		INIT_SETUP(16)
 		INIT_CALLS
 		CON_INITCALL
 		SECURITY_INITCALL
 		INIT_RAM_FS
 	}
-#ifndef CONFIG_XIP_KERNEL
 	.exit.data : {
 		ARM_EXIT_KEEP(EXIT_DATA)
 	}
-#endif
 
 #ifdef CONFIG_SMP
 	PERCPU_SECTION(L1_CACHE_BYTES)
 #endif
 
-#ifdef CONFIG_XIP_KERNEL
-	__data_loc = ALIGN(4);		/* location in binary */
-	. = PAGE_OFFSET + TEXT_OFFSET;
-#else
-#ifdef CONFIG_ARM_KERNMEM_PERMS
+#ifdef CONFIG_DEBUG_RODATA
 	. = ALIGN(1<<SECTION_SHIFT);
 #else
 	. = ALIGN(THREAD_SIZE);
 #endif
 	__init_end = .;
 	__data_loc = .;
-#endif
 
 	.data : AT(__data_loc) {
 		_data = .;		/* address in memory */
@@ -250,15 +240,6 @@
 		 */
 		INIT_TASK_DATA(THREAD_SIZE)
 
-#ifdef CONFIG_XIP_KERNEL
-		. = ALIGN(PAGE_SIZE);
-		__init_begin = .;
-		INIT_DATA
-		ARM_EXIT_KEEP(EXIT_DATA)
-		. = ALIGN(PAGE_SIZE);
-		__init_end = .;
-#endif
-
 		NOSAVE_DATA
 		CACHELINE_ALIGNED_DATA(L1_CACHE_BYTES)
 		READ_MOSTLY_DATA(L1_CACHE_BYTES)
@@ -336,6 +317,15 @@
 	STABS_DEBUG
 }
 
+#ifdef CONFIG_DEBUG_RODATA
+/*
+ * Without CONFIG_DEBUG_ALIGN_RODATA, __start_rodata_section_aligned will
+ * be the first section-aligned location after __start_rodata. Otherwise,
+ * it will be equal to __start_rodata.
+ */
+__start_rodata_section_aligned = ALIGN(__start_rodata, 1 << SECTION_SHIFT);
+#endif
+
 /*
  * These must never be empty
  * If you have to comment these two assert statements out, your
@@ -351,3 +341,5 @@
  */
 ASSERT(__hyp_idmap_text_end - (__hyp_idmap_text_start & PAGE_MASK) <= PAGE_SIZE,
 	"HYP init code too big or misaligned")
+
+#endif /* CONFIG_XIP_KERNEL */
diff --git a/arch/arm/mach-davinci/include/mach/uncompress.h b/arch/arm/mach-davinci/include/mach/uncompress.h
index 8fb97b9..53b456a5 100644
--- a/arch/arm/mach-davinci/include/mach/uncompress.h
+++ b/arch/arm/mach-davinci/include/mach/uncompress.h
@@ -30,7 +30,7 @@
 u32 *uart;
 
 /* PORT_16C550A, in polled non-fifo mode */
-static void putc(char c)
+static inline void putc(char c)
 {
 	if (!uart)
 		return;
diff --git a/arch/arm/mach-footbridge/Kconfig b/arch/arm/mach-footbridge/Kconfig
index 07152d0..cbbdd84 100644
--- a/arch/arm/mach-footbridge/Kconfig
+++ b/arch/arm/mach-footbridge/Kconfig
@@ -68,7 +68,6 @@
 	select ISA
 	select ISA_DMA
 	select PCI
-	select VIRT_TO_BUS
 	help
 	  Say Y here if you intend to run this kernel on the Rebel.COM
 	  NetWinder.  Information about this machine can be found at:
diff --git a/arch/arm/mach-keystone/keystone.c b/arch/arm/mach-keystone/keystone.c
index c279293..d80879c 100644
--- a/arch/arm/mach-keystone/keystone.c
+++ b/arch/arm/mach-keystone/keystone.c
@@ -63,7 +63,7 @@
 	of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL);
 }
 
-static phys_addr_t keystone_virt_to_idmap(unsigned long x)
+static unsigned long keystone_virt_to_idmap(unsigned long x)
 {
 	return (phys_addr_t)(x) - CONFIG_PAGE_OFFSET + KEYSTONE_LOW_PHYS_START;
 }
diff --git a/arch/arm/mach-ks8695/include/mach/uncompress.h b/arch/arm/mach-ks8695/include/mach/uncompress.h
index c089a1a..a001c7c 100644
--- a/arch/arm/mach-ks8695/include/mach/uncompress.h
+++ b/arch/arm/mach-ks8695/include/mach/uncompress.h
@@ -17,7 +17,7 @@
 #include <linux/io.h>
 #include <mach/regs-uart.h>
 
-static void putc(char c)
+static inline void putc(char c)
 {
 	while (!(__raw_readl((void __iomem*)KS8695_UART_PA + KS8695_URLS) & URLS_URTHRE))
 		barrier();
diff --git a/arch/arm/mach-netx/include/mach/uncompress.h b/arch/arm/mach-netx/include/mach/uncompress.h
index 5cb1051b..033875d 100644
--- a/arch/arm/mach-netx/include/mach/uncompress.h
+++ b/arch/arm/mach-netx/include/mach/uncompress.h
@@ -40,7 +40,7 @@
 #define FR_BUSY (1<<3)
 #define FR_TXFF (1<<5)
 
-static void putc(char c)
+static inline void putc(char c)
 {
 	unsigned long base;
 
diff --git a/arch/arm/mach-omap1/include/mach/uncompress.h b/arch/arm/mach-omap1/include/mach/uncompress.h
index 4869633..9cca6a5 100644
--- a/arch/arm/mach-omap1/include/mach/uncompress.h
+++ b/arch/arm/mach-omap1/include/mach/uncompress.h
@@ -45,7 +45,7 @@
 	*uart_info = port;
 }
 
-static void putc(int c)
+static inline void putc(int c)
 {
 	if (!uart_base)
 		return;
diff --git a/arch/arm/mach-rpc/include/mach/uncompress.h b/arch/arm/mach-rpc/include/mach/uncompress.h
index 0fd4b0b..654a6f3 100644
--- a/arch/arm/mach-rpc/include/mach/uncompress.h
+++ b/arch/arm/mach-rpc/include/mach/uncompress.h
@@ -76,7 +76,7 @@
 /*
  * This does not append a newline
  */
-static void putc(int c)
+static inline void putc(int c)
 {
 	extern void ll_write_char(char *, char c, char white);
 	int x,y;
diff --git a/arch/arm/mach-sa1100/include/mach/uncompress.h b/arch/arm/mach-sa1100/include/mach/uncompress.h
index 73093dc..a1a041b 100644
--- a/arch/arm/mach-sa1100/include/mach/uncompress.h
+++ b/arch/arm/mach-sa1100/include/mach/uncompress.h
@@ -19,7 +19,7 @@
 
 #define UART(x)		(*(volatile unsigned long *)(serial_port + (x)))
 
-static void putc(int c)
+static inline void putc(int c)
 {
 	unsigned long serial_port;
 
diff --git a/arch/arm/mach-w90x900/include/mach/uncompress.h b/arch/arm/mach-w90x900/include/mach/uncompress.h
index 4b7c324..3855ece 100644
--- a/arch/arm/mach-w90x900/include/mach/uncompress.h
+++ b/arch/arm/mach-w90x900/include/mach/uncompress.h
@@ -27,7 +27,7 @@
 #define TX_DONE	(UART_LSR_TEMT | UART_LSR_THRE)
 static volatile u32 * const uart_base = (u32 *)UART0_PA;
 
-static void putc(int ch)
+static inline void putc(int ch)
 {
 	/* Check THRE and TEMT bits before we transmit the character.
 	 */
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index 549f6d3..5534766 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -1037,24 +1037,26 @@
 	  This option specifies the architecture can support big endian
 	  operation.
 
-config ARM_KERNMEM_PERMS
-	bool "Restrict kernel memory permissions"
-	depends on MMU
-	help
-	  If this is set, kernel memory other than kernel text (and rodata)
-	  will be made non-executable. The tradeoff is that each region is
-	  padded to section-size (1MiB) boundaries (because their permissions
-	  are different and splitting the 1M pages into 4K ones causes TLB
-	  performance problems), wasting memory.
-
 config DEBUG_RODATA
 	bool "Make kernel text and rodata read-only"
-	depends on ARM_KERNMEM_PERMS
+	depends on MMU && !XIP_KERNEL
+	default y if CPU_V7
+	help
+	  If this is set, kernel text and rodata memory will be made
+	  read-only, and non-text kernel memory will be made non-executable.
+	  The tradeoff is that each region is padded to section-size (1MiB)
+	  boundaries (because their permissions are different and splitting
+	  the 1M pages into 4K ones causes TLB performance problems), which
+	  can waste memory.
+
+config DEBUG_ALIGN_RODATA
+	bool "Make rodata strictly non-executable"
+	depends on DEBUG_RODATA
 	default y
 	help
-	  If this is set, kernel text and rodata will be made read-only. This
-	  is to help catch accidental or malicious attempts to change the
-	  kernel's executable code. Additionally splits rodata from kernel
-	  text so it can be made explicitly non-executable. This creates
-	  another section-size padded region, so it can waste more memory
-	  space while gaining the read-only protections.
+	  If this is set, rodata will be made explicitly non-executable. This
+	  provides protection on the rare chance that attackers might find and
+	  use ROP gadgets that exist in the rodata section. This adds an
+	  additional section-aligned split of rodata from kernel text so it
+	  can be made explicitly non-executable. This padding may waste memory
+	  space to gain the additional protection.
diff --git a/arch/arm/mm/cache-tauros2.c b/arch/arm/mm/cache-tauros2.c
index 1e373d2..88255be 100644
--- a/arch/arm/mm/cache-tauros2.c
+++ b/arch/arm/mm/cache-tauros2.c
@@ -22,6 +22,11 @@
 #include <asm/cputype.h>
 #include <asm/hardware/cache-tauros2.h>
 
+/* CP15 PJ4 Control configuration register */
+#define CCR_L2C_PREFETCH_DISABLE	BIT(24)
+#define CCR_L2C_ECC_ENABLE		BIT(23)
+#define CCR_L2C_WAY7_4_DISABLE		BIT(21)
+#define CCR_L2C_BURST8_ENABLE		BIT(20)
 
 /*
  * When Tauros2 is used on a CPU that supports the v7 hierarchical
@@ -182,18 +187,18 @@
 	u = read_extra_features();
 
 	if (features & CACHE_TAUROS2_PREFETCH_ON)
-		u &= ~0x01000000;
+		u &= ~CCR_L2C_PREFETCH_DISABLE;
 	else
-		u |= 0x01000000;
+		u |= CCR_L2C_PREFETCH_DISABLE;
 	pr_info("Tauros2: %s L2 prefetch.\n",
 			(features & CACHE_TAUROS2_PREFETCH_ON)
 			? "Enabling" : "Disabling");
 
 	if (features & CACHE_TAUROS2_LINEFILL_BURST8)
-		u |= 0x00100000;
+		u |= CCR_L2C_BURST8_ENABLE;
 	else
-		u &= ~0x00100000;
-	pr_info("Tauros2: %s line fill burt8.\n",
+		u &= ~CCR_L2C_BURST8_ENABLE;
+	pr_info("Tauros2: %s burst8 line fill.\n",
 			(features & CACHE_TAUROS2_LINEFILL_BURST8)
 			? "Enabling" : "Disabling");
 
@@ -287,16 +292,15 @@
 	node = of_find_matching_node(NULL, tauros2_ids);
 	if (!node) {
 		pr_info("Not found marvell,tauros2-cache, disable it\n");
-		return;
+	} else {
+		ret = of_property_read_u32(node, "marvell,tauros2-cache-features", &f);
+		if (ret) {
+			pr_info("Not found marvell,tauros-cache-features property, "
+				"disable extra features\n");
+			features = 0;
+		} else
+			features = f;
 	}
-
-	ret = of_property_read_u32(node, "marvell,tauros2-cache-features", &f);
-	if (ret) {
-		pr_info("Not found marvell,tauros-cache-features property, "
-			"disable extra features\n");
-		features = 0;
-	} else
-		features = f;
 #endif
 	tauros2_internal_init(features);
 }
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index 0eca381..deac58d 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -42,6 +42,55 @@
 #include "dma.h"
 #include "mm.h"
 
+struct arm_dma_alloc_args {
+	struct device *dev;
+	size_t size;
+	gfp_t gfp;
+	pgprot_t prot;
+	const void *caller;
+	bool want_vaddr;
+};
+
+struct arm_dma_free_args {
+	struct device *dev;
+	size_t size;
+	void *cpu_addr;
+	struct page *page;
+	bool want_vaddr;
+};
+
+struct arm_dma_allocator {
+	void *(*alloc)(struct arm_dma_alloc_args *args,
+		       struct page **ret_page);
+	void (*free)(struct arm_dma_free_args *args);
+};
+
+struct arm_dma_buffer {
+	struct list_head list;
+	void *virt;
+	struct arm_dma_allocator *allocator;
+};
+
+static LIST_HEAD(arm_dma_bufs);
+static DEFINE_SPINLOCK(arm_dma_bufs_lock);
+
+static struct arm_dma_buffer *arm_dma_buffer_find(void *virt)
+{
+	struct arm_dma_buffer *buf, *found = NULL;
+	unsigned long flags;
+
+	spin_lock_irqsave(&arm_dma_bufs_lock, flags);
+	list_for_each_entry(buf, &arm_dma_bufs, list) {
+		if (buf->virt == virt) {
+			list_del(&buf->list);
+			found = buf;
+			break;
+		}
+	}
+	spin_unlock_irqrestore(&arm_dma_bufs_lock, flags);
+	return found;
+}
+
 /*
  * The DMA API is built upon the notion of "buffer ownership".  A buffer
  * is either exclusively owned by the CPU (and therefore may be accessed
@@ -592,7 +641,7 @@
 #define __alloc_remap_buffer(dev, size, gfp, prot, ret, c, wv)	NULL
 #define __alloc_from_pool(size, ret_page)			NULL
 #define __alloc_from_contiguous(dev, size, prot, ret, c, wv)	NULL
-#define __free_from_pool(cpu_addr, size)			0
+#define __free_from_pool(cpu_addr, size)			do { } while (0)
 #define __free_from_contiguous(dev, page, cpu_addr, size, wv)	do { } while (0)
 #define __dma_free_remap(cpu_addr, size)			do { } while (0)
 
@@ -610,7 +659,78 @@
 	return page_address(page);
 }
 
+static void *simple_allocator_alloc(struct arm_dma_alloc_args *args,
+				    struct page **ret_page)
+{
+	return __alloc_simple_buffer(args->dev, args->size, args->gfp,
+				     ret_page);
+}
 
+static void simple_allocator_free(struct arm_dma_free_args *args)
+{
+	__dma_free_buffer(args->page, args->size);
+}
+
+static struct arm_dma_allocator simple_allocator = {
+	.alloc = simple_allocator_alloc,
+	.free = simple_allocator_free,
+};
+
+static void *cma_allocator_alloc(struct arm_dma_alloc_args *args,
+				 struct page **ret_page)
+{
+	return __alloc_from_contiguous(args->dev, args->size, args->prot,
+				       ret_page, args->caller,
+				       args->want_vaddr);
+}
+
+static void cma_allocator_free(struct arm_dma_free_args *args)
+{
+	__free_from_contiguous(args->dev, args->page, args->cpu_addr,
+			       args->size, args->want_vaddr);
+}
+
+static struct arm_dma_allocator cma_allocator = {
+	.alloc = cma_allocator_alloc,
+	.free = cma_allocator_free,
+};
+
+static void *pool_allocator_alloc(struct arm_dma_alloc_args *args,
+				  struct page **ret_page)
+{
+	return __alloc_from_pool(args->size, ret_page);
+}
+
+static void pool_allocator_free(struct arm_dma_free_args *args)
+{
+	__free_from_pool(args->cpu_addr, args->size);
+}
+
+static struct arm_dma_allocator pool_allocator = {
+	.alloc = pool_allocator_alloc,
+	.free = pool_allocator_free,
+};
+
+static void *remap_allocator_alloc(struct arm_dma_alloc_args *args,
+				   struct page **ret_page)
+{
+	return __alloc_remap_buffer(args->dev, args->size, args->gfp,
+				    args->prot, ret_page, args->caller,
+				    args->want_vaddr);
+}
+
+static void remap_allocator_free(struct arm_dma_free_args *args)
+{
+	if (args->want_vaddr)
+		__dma_free_remap(args->cpu_addr, args->size);
+
+	__dma_free_buffer(args->page, args->size);
+}
+
+static struct arm_dma_allocator remap_allocator = {
+	.alloc = remap_allocator_alloc,
+	.free = remap_allocator_free,
+};
 
 static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
 			 gfp_t gfp, pgprot_t prot, bool is_coherent,
@@ -619,7 +739,16 @@
 	u64 mask = get_coherent_dma_mask(dev);
 	struct page *page = NULL;
 	void *addr;
-	bool want_vaddr;
+	bool allowblock, cma;
+	struct arm_dma_buffer *buf;
+	struct arm_dma_alloc_args args = {
+		.dev = dev,
+		.size = PAGE_ALIGN(size),
+		.gfp = gfp,
+		.prot = prot,
+		.caller = caller,
+		.want_vaddr = !dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs),
+	};
 
 #ifdef CONFIG_DMA_API_DEBUG
 	u64 limit = (mask + 1) & ~mask;
@@ -633,6 +762,10 @@
 	if (!mask)
 		return NULL;
 
+	buf = kzalloc(sizeof(*buf), gfp);
+	if (!buf)
+		return NULL;
+
 	if (mask < 0xffffffffULL)
 		gfp |= GFP_DMA;
 
@@ -644,28 +777,37 @@
 	 * platform; see CONFIG_HUGETLBFS.
 	 */
 	gfp &= ~(__GFP_COMP);
+	args.gfp = gfp;
 
 	*handle = DMA_ERROR_CODE;
-	size = PAGE_ALIGN(size);
-	want_vaddr = !dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs);
+	allowblock = gfpflags_allow_blocking(gfp);
+	cma = allowblock ? dev_get_cma_area(dev) : false;
 
-	if (nommu())
-		addr = __alloc_simple_buffer(dev, size, gfp, &page);
-	else if (dev_get_cma_area(dev) && (gfp & __GFP_DIRECT_RECLAIM))
-		addr = __alloc_from_contiguous(dev, size, prot, &page,
-					       caller, want_vaddr);
-	else if (is_coherent)
-		addr = __alloc_simple_buffer(dev, size, gfp, &page);
-	else if (!gfpflags_allow_blocking(gfp))
-		addr = __alloc_from_pool(size, &page);
+	if (cma)
+		buf->allocator = &cma_allocator;
+	else if (nommu() || is_coherent)
+		buf->allocator = &simple_allocator;
+	else if (allowblock)
+		buf->allocator = &remap_allocator;
 	else
-		addr = __alloc_remap_buffer(dev, size, gfp, prot, &page,
-					    caller, want_vaddr);
+		buf->allocator = &pool_allocator;
 
-	if (page)
+	addr = buf->allocator->alloc(&args, &page);
+
+	if (page) {
+		unsigned long flags;
+
 		*handle = pfn_to_dma(dev, page_to_pfn(page));
+		buf->virt = args.want_vaddr ? addr : page;
 
-	return want_vaddr ? addr : page;
+		spin_lock_irqsave(&arm_dma_bufs_lock, flags);
+		list_add(&buf->list, &arm_dma_bufs);
+		spin_unlock_irqrestore(&arm_dma_bufs_lock, flags);
+	} else {
+		kfree(buf);
+	}
+
+	return args.want_vaddr ? addr : page;
 }
 
 /*
@@ -741,25 +883,21 @@
 			   bool is_coherent)
 {
 	struct page *page = pfn_to_page(dma_to_pfn(dev, handle));
-	bool want_vaddr = !dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs);
+	struct arm_dma_buffer *buf;
+	struct arm_dma_free_args args = {
+		.dev = dev,
+		.size = PAGE_ALIGN(size),
+		.cpu_addr = cpu_addr,
+		.page = page,
+		.want_vaddr = !dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs),
+	};
 
-	size = PAGE_ALIGN(size);
-
-	if (nommu()) {
-		__dma_free_buffer(page, size);
-	} else if (!is_coherent && __free_from_pool(cpu_addr, size)) {
+	buf = arm_dma_buffer_find(cpu_addr);
+	if (WARN(!buf, "Freeing invalid buffer %p\n", cpu_addr))
 		return;
-	} else if (!dev_get_cma_area(dev)) {
-		if (want_vaddr && !is_coherent)
-			__dma_free_remap(cpu_addr, size);
-		__dma_free_buffer(page, size);
-	} else {
-		/*
-		 * Non-atomic allocations cannot be freed with IRQs disabled
-		 */
-		WARN_ON(irqs_disabled());
-		__free_from_contiguous(dev, page, cpu_addr, size, want_vaddr);
-	}
+
+	buf->allocator->free(&args);
+	kfree(buf);
 }
 
 void arm_dma_free(struct device *dev, size_t size, void *cpu_addr,
@@ -1122,6 +1260,9 @@
 	spin_unlock_irqrestore(&mapping->lock, flags);
 }
 
+/* We'll try 2M, 1M, 64K, and finally 4K; array must end with 0! */
+static const int iommu_order_array[] = { 9, 8, 4, 0 };
+
 static struct page **__iommu_alloc_buffer(struct device *dev, size_t size,
 					  gfp_t gfp, struct dma_attrs *attrs)
 {
@@ -1129,6 +1270,7 @@
 	int count = size >> PAGE_SHIFT;
 	int array_size = count * sizeof(struct page *);
 	int i = 0;
+	int order_idx = 0;
 
 	if (array_size <= PAGE_SIZE)
 		pages = kzalloc(array_size, GFP_KERNEL);
@@ -1154,6 +1296,10 @@
 		return pages;
 	}
 
+	/* Go straight to 4K chunks if caller says it's OK. */
+	if (dma_get_attr(DMA_ATTR_ALLOC_SINGLE_PAGES, attrs))
+		order_idx = ARRAY_SIZE(iommu_order_array) - 1;
+
 	/*
 	 * IOMMU can map any pages, so himem can also be used here
 	 */
@@ -1162,22 +1308,24 @@
 	while (count) {
 		int j, order;
 
-		for (order = __fls(count); order > 0; --order) {
-			/*
-			 * We do not want OOM killer to be invoked as long
-			 * as we can fall back to single pages, so we force
-			 * __GFP_NORETRY for orders higher than zero.
-			 */
-			pages[i] = alloc_pages(gfp | __GFP_NORETRY, order);
-			if (pages[i])
-				break;
+		order = iommu_order_array[order_idx];
+
+		/* Drop down when we get small */
+		if (__fls(count) < order) {
+			order_idx++;
+			continue;
 		}
 
-		if (!pages[i]) {
-			/*
-			 * Fall back to single page allocation.
-			 * Might invoke OOM killer as last resort.
-			 */
+		if (order) {
+			/* See if it's easy to allocate a high-order chunk */
+			pages[i] = alloc_pages(gfp | __GFP_NORETRY, order);
+
+			/* Go down a notch at first sign of pressure */
+			if (!pages[i]) {
+				order_idx++;
+				continue;
+			}
+		} else {
 			pages[i] = alloc_pages(gfp, 0);
 			if (!pages[i])
 				goto error;
diff --git a/arch/arm/mm/idmap.c b/arch/arm/mm/idmap.c
index d659096..bd274a0 100644
--- a/arch/arm/mm/idmap.c
+++ b/arch/arm/mm/idmap.c
@@ -15,7 +15,7 @@
  * page tables.
  */
 pgd_t *idmap_pgd;
-phys_addr_t (*arch_virt_to_idmap) (unsigned long x);
+unsigned long (*arch_virt_to_idmap)(unsigned long x);
 
 #ifdef CONFIG_ARM_LPAE
 static void idmap_add_pmd(pud_t *pud, unsigned long addr, unsigned long end,
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 49bd081..370581a 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -572,8 +572,9 @@
 	}
 }
 
-#ifdef CONFIG_ARM_KERNMEM_PERMS
+#ifdef CONFIG_DEBUG_RODATA
 struct section_perm {
+	const char *name;
 	unsigned long start;
 	unsigned long end;
 	pmdval_t mask;
@@ -581,9 +582,13 @@
 	pmdval_t clear;
 };
 
+/* First section-aligned location at or after __start_rodata. */
+extern char __start_rodata_section_aligned[];
+
 static struct section_perm nx_perms[] = {
 	/* Make pages tables, etc before _stext RW (set NX). */
 	{
+		.name	= "pre-text NX",
 		.start	= PAGE_OFFSET,
 		.end	= (unsigned long)_stext,
 		.mask	= ~PMD_SECT_XN,
@@ -591,26 +596,26 @@
 	},
 	/* Make init RW (set NX). */
 	{
+		.name	= "init NX",
 		.start	= (unsigned long)__init_begin,
 		.end	= (unsigned long)_sdata,
 		.mask	= ~PMD_SECT_XN,
 		.prot	= PMD_SECT_XN,
 	},
-#ifdef CONFIG_DEBUG_RODATA
 	/* Make rodata NX (set RO in ro_perms below). */
 	{
-		.start  = (unsigned long)__start_rodata,
+		.name	= "rodata NX",
+		.start  = (unsigned long)__start_rodata_section_aligned,
 		.end    = (unsigned long)__init_begin,
 		.mask   = ~PMD_SECT_XN,
 		.prot   = PMD_SECT_XN,
 	},
-#endif
 };
 
-#ifdef CONFIG_DEBUG_RODATA
 static struct section_perm ro_perms[] = {
 	/* Make kernel code and rodata RX (set RO). */
 	{
+		.name	= "text/rodata RO",
 		.start  = (unsigned long)_stext,
 		.end    = (unsigned long)__init_begin,
 #ifdef CONFIG_ARM_LPAE
@@ -623,7 +628,6 @@
 #endif
 	},
 };
-#endif
 
 /*
  * Updates section permissions only for the current mm (sections are
@@ -670,8 +674,8 @@
 	for (i = 0; i < n; i++) {
 		if (!IS_ALIGNED(perms[i].start, SECTION_SIZE) ||
 		    !IS_ALIGNED(perms[i].end, SECTION_SIZE)) {
-			pr_err("BUG: section %lx-%lx not aligned to %lx\n",
-				perms[i].start, perms[i].end,
+			pr_err("BUG: %s section %lx-%lx not aligned to %lx\n",
+				perms[i].name, perms[i].start, perms[i].end,
 				SECTION_SIZE);
 			continue;
 		}
@@ -712,7 +716,6 @@
 	stop_machine(__fix_kernmem_perms, NULL, NULL);
 }
 
-#ifdef CONFIG_DEBUG_RODATA
 int __mark_rodata_ro(void *unused)
 {
 	update_sections_early(ro_perms, ARRAY_SIZE(ro_perms));
@@ -735,11 +738,10 @@
 	set_section_perms(ro_perms, ARRAY_SIZE(ro_perms), true,
 				current->active_mm);
 }
-#endif /* CONFIG_DEBUG_RODATA */
 
 #else
 static inline void fix_kernmem_perms(void) { }
-#endif /* CONFIG_ARM_KERNMEM_PERMS */
+#endif /* CONFIG_DEBUG_RODATA */
 
 void free_tcmmem(void)
 {
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index 434d76f..e4b681a 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -1253,7 +1253,7 @@
 
 #ifdef CONFIG_XIP_KERNEL
 	/* The XIP kernel is mapped in the module area -- skip over it */
-	addr = ((unsigned long)_etext + PMD_SIZE - 1) & PMD_MASK;
+	addr = ((unsigned long)_exiprom + PMD_SIZE - 1) & PMD_MASK;
 #endif
 	for ( ; addr < PAGE_OFFSET; addr += PMD_SIZE)
 		pmd_clear(pmd_off_k(addr));
@@ -1335,7 +1335,7 @@
 #ifdef CONFIG_XIP_KERNEL
 	map.pfn = __phys_to_pfn(CONFIG_XIP_PHYS_ADDR & SECTION_MASK);
 	map.virtual = MODULES_VADDR;
-	map.length = ((unsigned long)_etext - map.virtual + ~SECTION_MASK) & SECTION_MASK;
+	map.length = ((unsigned long)_exiprom - map.virtual + ~SECTION_MASK) & SECTION_MASK;
 	map.type = MT_ROM;
 	create_mapping(&map);
 #endif
@@ -1426,7 +1426,11 @@
 static void __init map_lowmem(void)
 {
 	struct memblock_region *reg;
+#ifdef CONFIG_XIP_KERNEL
+	phys_addr_t kernel_x_start = round_down(__pa(_sdata), SECTION_SIZE);
+#else
 	phys_addr_t kernel_x_start = round_down(__pa(_stext), SECTION_SIZE);
+#endif
 	phys_addr_t kernel_x_end = round_up(__pa(__init_end), SECTION_SIZE);
 
 	/* Map all the lowmem memory banks. */
diff --git a/arch/arm/mm/pageattr.c b/arch/arm/mm/pageattr.c
index cf30daf..d19b1ad 100644
--- a/arch/arm/mm/pageattr.c
+++ b/arch/arm/mm/pageattr.c
@@ -49,6 +49,9 @@
 		WARN_ON_ONCE(1);
 	}
 
+	if (!numpages)
+		return 0;
+
 	if (start < MODULES_VADDR || start >= MODULES_END)
 		return -EINVAL;
 
diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index 0f92d57..0f8963a 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -487,7 +487,7 @@
 
 	.align	2
 __v7_setup_stack_ptr:
-	.word	__v7_setup_stack - .
+	.word	PHYS_RELATIVE(__v7_setup_stack, .)
 ENDPROC(__v7_setup)
 
 	.bss
diff --git a/arch/arm/plat-orion/time.c b/arch/arm/plat-orion/time.c
index 8085a8a..ffb93db 100644
--- a/arch/arm/plat-orion/time.c
+++ b/arch/arm/plat-orion/time.c
@@ -18,6 +18,7 @@
 #include <linux/irq.h>
 #include <linux/sched_clock.h>
 #include <plat/time.h>
+#include <asm/delay.h>
 
 /*
  * MBus bridge block registers.
@@ -188,6 +189,15 @@
 	timer_base = _timer_base;
 }
 
+static unsigned long orion_delay_timer_read(void)
+{
+	return ~readl(timer_base + TIMER0_VAL_OFF);
+}
+
+static struct delay_timer orion_delay_timer = {
+	.read_current_timer = orion_delay_timer_read,
+};
+
 void __init
 orion_time_init(void __iomem *_bridge_base, u32 _bridge_timer1_clr_mask,
 		unsigned int irq, unsigned int tclk)
@@ -202,6 +212,9 @@
 
 	ticks_per_jiffy = (tclk + HZ/2) / HZ;
 
+	orion_delay_timer.freq = tclk;
+	register_current_timer_delay(&orion_delay_timer);
+
 	/*
 	 * Set scale and timer for sched_clock.
 	 */
diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c
index f67f35b..42816be 100644
--- a/arch/arm64/kernel/psci.c
+++ b/arch/arm64/kernel/psci.c
@@ -20,7 +20,6 @@
 #include <linux/smp.h>
 #include <linux/delay.h>
 #include <linux/psci.h>
-#include <linux/slab.h>
 
 #include <uapi/linux/psci.h>
 
@@ -28,73 +27,6 @@
 #include <asm/cpu_ops.h>
 #include <asm/errno.h>
 #include <asm/smp_plat.h>
-#include <asm/suspend.h>
-
-static DEFINE_PER_CPU_READ_MOSTLY(u32 *, psci_power_state);
-
-static int __maybe_unused cpu_psci_cpu_init_idle(unsigned int cpu)
-{
-	int i, ret, count = 0;
-	u32 *psci_states;
-	struct device_node *state_node, *cpu_node;
-
-	cpu_node = of_get_cpu_node(cpu, NULL);
-	if (!cpu_node)
-		return -ENODEV;
-
-	/*
-	 * If the PSCI cpu_suspend function hook has not been initialized
-	 * idle states must not be enabled, so bail out
-	 */
-	if (!psci_ops.cpu_suspend)
-		return -EOPNOTSUPP;
-
-	/* Count idle states */
-	while ((state_node = of_parse_phandle(cpu_node, "cpu-idle-states",
-					      count))) {
-		count++;
-		of_node_put(state_node);
-	}
-
-	if (!count)
-		return -ENODEV;
-
-	psci_states = kcalloc(count, sizeof(*psci_states), GFP_KERNEL);
-	if (!psci_states)
-		return -ENOMEM;
-
-	for (i = 0; i < count; i++) {
-		u32 state;
-
-		state_node = of_parse_phandle(cpu_node, "cpu-idle-states", i);
-
-		ret = of_property_read_u32(state_node,
-					   "arm,psci-suspend-param",
-					   &state);
-		if (ret) {
-			pr_warn(" * %s missing arm,psci-suspend-param property\n",
-				state_node->full_name);
-			of_node_put(state_node);
-			goto free_mem;
-		}
-
-		of_node_put(state_node);
-		pr_debug("psci-power-state %#x index %d\n", state, i);
-		if (!psci_power_state_is_valid(state)) {
-			pr_warn("Invalid PSCI power state %#x\n", state);
-			ret = -EINVAL;
-			goto free_mem;
-		}
-		psci_states[i] = state;
-	}
-	/* Idle states parsed correctly, initialize per-cpu pointer */
-	per_cpu(psci_power_state, cpu) = psci_states;
-	return 0;
-
-free_mem:
-	kfree(psci_states);
-	return ret;
-}
 
 static int __init cpu_psci_cpu_init(unsigned int cpu)
 {
@@ -178,38 +110,11 @@
 }
 #endif
 
-static int psci_suspend_finisher(unsigned long index)
-{
-	u32 *state = __this_cpu_read(psci_power_state);
-
-	return psci_ops.cpu_suspend(state[index - 1],
-				    virt_to_phys(cpu_resume));
-}
-
-static int __maybe_unused cpu_psci_cpu_suspend(unsigned long index)
-{
-	int ret;
-	u32 *state = __this_cpu_read(psci_power_state);
-	/*
-	 * idle state index 0 corresponds to wfi, should never be called
-	 * from the cpu_suspend operations
-	 */
-	if (WARN_ON_ONCE(!index))
-		return -EINVAL;
-
-	if (!psci_power_state_loses_context(state[index - 1]))
-		ret = psci_ops.cpu_suspend(state[index - 1], 0);
-	else
-		ret = cpu_suspend(index, psci_suspend_finisher);
-
-	return ret;
-}
-
 const struct cpu_operations cpu_psci_ops = {
 	.name		= "psci",
 #ifdef CONFIG_CPU_IDLE
-	.cpu_init_idle	= cpu_psci_cpu_init_idle,
-	.cpu_suspend	= cpu_psci_cpu_suspend,
+	.cpu_init_idle	= psci_cpu_init_idle,
+	.cpu_suspend	= psci_cpu_suspend_enter,
 #endif
 	.cpu_init	= cpu_psci_cpu_init,
 	.cpu_prepare	= cpu_psci_cpu_prepare,
diff --git a/drivers/base/dd.c b/drivers/base/dd.c
index c4da2df..16688f5 100644
--- a/drivers/base/dd.c
+++ b/drivers/base/dd.c
@@ -560,6 +560,7 @@
 	struct device_attach_data *data = _data;
 	struct device *dev = data->dev;
 	bool async_allowed;
+	int ret;
 
 	/*
 	 * Check if device has already been claimed. This may
@@ -570,8 +571,17 @@
 	if (dev->driver)
 		return -EBUSY;
 
-	if (!driver_match_device(drv, dev))
+	ret = driver_match_device(drv, dev);
+	if (ret == 0) {
+		/* no match */
 		return 0;
+	} else if (ret == -EPROBE_DEFER) {
+		dev_dbg(dev, "Device match requests probe deferral\n");
+		driver_deferred_probe_add(dev);
+	} else if (ret < 0) {
+		dev_dbg(dev, "Bus failed to match device: %d", ret);
+		return ret;
+	} /* ret > 0 means positive match */
 
 	async_allowed = driver_allows_async_probing(drv);
 
@@ -691,6 +701,7 @@
 static int __driver_attach(struct device *dev, void *data)
 {
 	struct device_driver *drv = data;
+	int ret;
 
 	/*
 	 * Lock device and try to bind to it. We drop the error
@@ -702,8 +713,17 @@
 	 * is an error.
 	 */
 
-	if (!driver_match_device(drv, dev))
+	ret = driver_match_device(drv, dev);
+	if (ret == 0) {
+		/* no match */
 		return 0;
+	} else if (ret == -EPROBE_DEFER) {
+		dev_dbg(dev, "Device match requests probe deferral\n");
+		driver_deferred_probe_add(dev);
+	} else if (ret < 0) {
+		dev_dbg(dev, "Bus failed to match device: %d", ret);
+		return ret;
+	} /* ret > 0 means positive match */
 
 	if (dev->parent)	/* Needed for USB */
 		device_lock(dev->parent);
diff --git a/drivers/clk/clkdev.c b/drivers/clk/clkdev.c
index 779b6ff..eb20b94 100644
--- a/drivers/clk/clkdev.c
+++ b/drivers/clk/clkdev.c
@@ -353,11 +353,25 @@
 }
 EXPORT_SYMBOL(clkdev_drop);
 
+static struct clk_lookup *__clk_register_clkdev(struct clk_hw *hw,
+						const char *con_id,
+						const char *dev_id, ...)
+{
+	struct clk_lookup *cl;
+	va_list ap;
+
+	va_start(ap, dev_id);
+	cl = vclkdev_create(hw, con_id, dev_id, ap);
+	va_end(ap);
+
+	return cl;
+}
+
 /**
  * clk_register_clkdev - register one clock lookup for a struct clk
  * @clk: struct clk to associate with all clk_lookups
  * @con_id: connection ID string on device
- * @dev_id: format string describing device name
+ * @dev_id: string describing device name
  *
  * con_id or dev_id may be NULL as a wildcard, just as in the rest of
  * clkdev.
@@ -368,17 +382,22 @@
  * after clk_register().
  */
 int clk_register_clkdev(struct clk *clk, const char *con_id,
-	const char *dev_fmt, ...)
+	const char *dev_id)
 {
 	struct clk_lookup *cl;
-	va_list ap;
 
 	if (IS_ERR(clk))
 		return PTR_ERR(clk);
 
-	va_start(ap, dev_fmt);
-	cl = vclkdev_create(__clk_get_hw(clk), con_id, dev_fmt, ap);
-	va_end(ap);
+	/*
+	 * Since dev_id can be NULL, and NULL is handled specially, we must
+	 * pass it as either a NULL format string, or with "%s".
+	 */
+	if (dev_id)
+		cl = __clk_register_clkdev(__clk_get_hw(clk), con_id, "%s",
+					   dev_id);
+	else
+		cl = __clk_register_clkdev(__clk_get_hw(clk), con_id, NULL);
 
 	return cl ? 0 : -ENOMEM;
 }
diff --git a/drivers/firmware/psci.c b/drivers/firmware/psci.c
index f25cd79..11bfee8 100644
--- a/drivers/firmware/psci.c
+++ b/drivers/firmware/psci.c
@@ -14,6 +14,7 @@
 #define pr_fmt(fmt) "psci: " fmt
 
 #include <linux/arm-smccc.h>
+#include <linux/cpuidle.h>
 #include <linux/errno.h>
 #include <linux/linkage.h>
 #include <linux/of.h>
@@ -21,10 +22,12 @@
 #include <linux/printk.h>
 #include <linux/psci.h>
 #include <linux/reboot.h>
+#include <linux/slab.h>
 #include <linux/suspend.h>
 
 #include <uapi/linux/psci.h>
 
+#include <asm/cpuidle.h>
 #include <asm/cputype.h>
 #include <asm/system_misc.h>
 #include <asm/smp_plat.h>
@@ -244,6 +247,123 @@
 			      psci_func_id, 0, 0);
 }
 
+#ifdef CONFIG_CPU_IDLE
+static DEFINE_PER_CPU_READ_MOSTLY(u32 *, psci_power_state);
+
+static int psci_dt_cpu_init_idle(struct device_node *cpu_node, int cpu)
+{
+	int i, ret, count = 0;
+	u32 *psci_states;
+	struct device_node *state_node;
+
+	/*
+	 * If the PSCI cpu_suspend function hook has not been initialized
+	 * idle states must not be enabled, so bail out
+	 */
+	if (!psci_ops.cpu_suspend)
+		return -EOPNOTSUPP;
+
+	/* Count idle states */
+	while ((state_node = of_parse_phandle(cpu_node, "cpu-idle-states",
+					      count))) {
+		count++;
+		of_node_put(state_node);
+	}
+
+	if (!count)
+		return -ENODEV;
+
+	psci_states = kcalloc(count, sizeof(*psci_states), GFP_KERNEL);
+	if (!psci_states)
+		return -ENOMEM;
+
+	for (i = 0; i < count; i++) {
+		u32 state;
+
+		state_node = of_parse_phandle(cpu_node, "cpu-idle-states", i);
+
+		ret = of_property_read_u32(state_node,
+					   "arm,psci-suspend-param",
+					   &state);
+		if (ret) {
+			pr_warn(" * %s missing arm,psci-suspend-param property\n",
+				state_node->full_name);
+			of_node_put(state_node);
+			goto free_mem;
+		}
+
+		of_node_put(state_node);
+		pr_debug("psci-power-state %#x index %d\n", state, i);
+		if (!psci_power_state_is_valid(state)) {
+			pr_warn("Invalid PSCI power state %#x\n", state);
+			ret = -EINVAL;
+			goto free_mem;
+		}
+		psci_states[i] = state;
+	}
+	/* Idle states parsed correctly, initialize per-cpu pointer */
+	per_cpu(psci_power_state, cpu) = psci_states;
+	return 0;
+
+free_mem:
+	kfree(psci_states);
+	return ret;
+}
+
+int psci_cpu_init_idle(unsigned int cpu)
+{
+	struct device_node *cpu_node;
+	int ret;
+
+	cpu_node = of_get_cpu_node(cpu, NULL);
+	if (!cpu_node)
+		return -ENODEV;
+
+	ret = psci_dt_cpu_init_idle(cpu_node, cpu);
+
+	of_node_put(cpu_node);
+
+	return ret;
+}
+
+static int psci_suspend_finisher(unsigned long index)
+{
+	u32 *state = __this_cpu_read(psci_power_state);
+
+	return psci_ops.cpu_suspend(state[index - 1],
+				    virt_to_phys(cpu_resume));
+}
+
+int psci_cpu_suspend_enter(unsigned long index)
+{
+	int ret;
+	u32 *state = __this_cpu_read(psci_power_state);
+	/*
+	 * idle state index 0 corresponds to wfi, should never be called
+	 * from the cpu_suspend operations
+	 */
+	if (WARN_ON_ONCE(!index))
+		return -EINVAL;
+
+	if (!psci_power_state_loses_context(state[index - 1]))
+		ret = psci_ops.cpu_suspend(state[index - 1], 0);
+	else
+		ret = cpu_suspend(index, psci_suspend_finisher);
+
+	return ret;
+}
+
+/* ARM specific CPU idle operations */
+#ifdef CONFIG_ARM
+static struct cpuidle_ops psci_cpuidle_ops __initdata = {
+	.suspend = psci_cpu_suspend_enter,
+	.init = psci_dt_cpu_init_idle,
+};
+
+CPUIDLE_METHOD_OF_DECLARE(psci, "arm,psci", &psci_cpuidle_ops);
+#endif
+#endif
+
 static int psci_system_suspend(unsigned long unused)
 {
 	return invoke_psci_fn(PSCI_FN_NATIVE(1_0, SYSTEM_SUSPEND),
diff --git a/drivers/media/v4l2-core/videobuf2-dma-contig.c b/drivers/media/v4l2-core/videobuf2-dma-contig.c
index c331272..5361197 100644
--- a/drivers/media/v4l2-core/videobuf2-dma-contig.c
+++ b/drivers/media/v4l2-core/videobuf2-dma-contig.c
@@ -23,13 +23,16 @@
 
 struct vb2_dc_conf {
 	struct device		*dev;
+	struct dma_attrs	attrs;
 };
 
 struct vb2_dc_buf {
 	struct device			*dev;
 	void				*vaddr;
 	unsigned long			size;
+	void				*cookie;
 	dma_addr_t			dma_addr;
+	struct dma_attrs		attrs;
 	enum dma_data_direction		dma_dir;
 	struct sg_table			*dma_sgt;
 	struct frame_vector		*vec;
@@ -131,7 +134,8 @@
 		sg_free_table(buf->sgt_base);
 		kfree(buf->sgt_base);
 	}
-	dma_free_coherent(buf->dev, buf->size, buf->vaddr, buf->dma_addr);
+	dma_free_attrs(buf->dev, buf->size, buf->cookie, buf->dma_addr,
+			&buf->attrs);
 	put_device(buf->dev);
 	kfree(buf);
 }
@@ -147,14 +151,18 @@
 	if (!buf)
 		return ERR_PTR(-ENOMEM);
 
-	buf->vaddr = dma_alloc_coherent(dev, size, &buf->dma_addr,
-						GFP_KERNEL | gfp_flags);
-	if (!buf->vaddr) {
+	buf->attrs = conf->attrs;
+	buf->cookie = dma_alloc_attrs(dev, size, &buf->dma_addr,
+					GFP_KERNEL | gfp_flags, &buf->attrs);
+	if (!buf->cookie) {
 		dev_err(dev, "dma_alloc_coherent of size %ld failed\n", size);
 		kfree(buf);
 		return ERR_PTR(-ENOMEM);
 	}
 
+	if (!dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, &buf->attrs))
+		buf->vaddr = buf->cookie;
+
 	/* Prevent the device from being released while the buffer is used */
 	buf->dev = get_device(dev);
 	buf->size = size;
@@ -185,8 +193,8 @@
 	 */
 	vma->vm_pgoff = 0;
 
-	ret = dma_mmap_coherent(buf->dev, vma, buf->vaddr,
-		buf->dma_addr, buf->size);
+	ret = dma_mmap_attrs(buf->dev, vma, buf->cookie,
+		buf->dma_addr, buf->size, &buf->attrs);
 
 	if (ret) {
 		pr_err("Remapping memory failed, error: %d\n", ret);
@@ -329,7 +337,7 @@
 {
 	struct vb2_dc_buf *buf = dbuf->priv;
 
-	return buf->vaddr + pgnum * PAGE_SIZE;
+	return buf->vaddr ? buf->vaddr + pgnum * PAGE_SIZE : NULL;
 }
 
 static void *vb2_dc_dmabuf_ops_vmap(struct dma_buf *dbuf)
@@ -368,8 +376,8 @@
 		return NULL;
 	}
 
-	ret = dma_get_sgtable(buf->dev, sgt, buf->vaddr, buf->dma_addr,
-		buf->size);
+	ret = dma_get_sgtable_attrs(buf->dev, sgt, buf->cookie, buf->dma_addr,
+		buf->size, &buf->attrs);
 	if (ret < 0) {
 		dev_err(buf->dev, "failed to get scatterlist from DMA API\n");
 		kfree(sgt);
@@ -721,7 +729,8 @@
 };
 EXPORT_SYMBOL_GPL(vb2_dma_contig_memops);
 
-void *vb2_dma_contig_init_ctx(struct device *dev)
+void *vb2_dma_contig_init_ctx_attrs(struct device *dev,
+				    struct dma_attrs *attrs)
 {
 	struct vb2_dc_conf *conf;
 
@@ -730,10 +739,12 @@
 		return ERR_PTR(-ENOMEM);
 
 	conf->dev = dev;
+	if (attrs)
+		conf->attrs = *attrs;
 
 	return conf;
 }
-EXPORT_SYMBOL_GPL(vb2_dma_contig_init_ctx);
+EXPORT_SYMBOL_GPL(vb2_dma_contig_init_ctx_attrs);
 
 void vb2_dma_contig_cleanup_ctx(void *alloc_ctx)
 {
diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c
index 7e2c43f..2b2181c 100644
--- a/drivers/nvdimm/bus.c
+++ b/drivers/nvdimm/bus.c
@@ -62,7 +62,7 @@
 {
 	struct nd_device_driver *nd_drv = to_nd_device_driver(drv);
 
-	return test_bit(to_nd_device_type(dev), &nd_drv->type);
+	return !!test_bit(to_nd_device_type(dev), &nd_drv->type);
 }
 
 static struct module *to_bus_provider(struct device *dev)
diff --git a/include/linux/clkdev.h b/include/linux/clkdev.h
index 08bffcc..c2c04f7 100644
--- a/include/linux/clkdev.h
+++ b/include/linux/clkdev.h
@@ -44,8 +44,7 @@
 void clkdev_add_table(struct clk_lookup *, size_t);
 int clk_add_alias(const char *, const char *, const char *, struct device *);
 
-int clk_register_clkdev(struct clk *, const char *, const char *, ...)
-	__printf(3, 4);
+int clk_register_clkdev(struct clk *, const char *, const char *);
 int clk_register_clkdevs(struct clk *, struct clk_lookup *, size_t);
 
 #ifdef CONFIG_COMMON_CLK
diff --git a/include/linux/device.h b/include/linux/device.h
index 6d6f1fe..8f2ec81 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -70,8 +70,11 @@
  * @dev_groups:	Default attributes of the devices on the bus.
  * @drv_groups: Default attributes of the device drivers on the bus.
  * @match:	Called, perhaps multiple times, whenever a new device or driver
- *		is added for this bus. It should return a nonzero value if the
- *		given device can be handled by the given driver.
+ *		is added for this bus. It should return a positive value if the
+ *		given device can be handled by the given driver and zero
+ *		otherwise. It may also return error code if determining that
+ *		the driver supports the device is not possible. In case of
+ *		-EPROBE_DEFER it will queue the device for deferred probing.
  * @uevent:	Called when a device is added, removed, or a few other things
  *		that generate uevents to add the environment variables.
  * @probe:	Called when a new device or driver add to this bus, and callback
diff --git a/include/linux/dma-attrs.h b/include/linux/dma-attrs.h
index 99c0be0..5246239 100644
--- a/include/linux/dma-attrs.h
+++ b/include/linux/dma-attrs.h
@@ -18,6 +18,7 @@
 	DMA_ATTR_NO_KERNEL_MAPPING,
 	DMA_ATTR_SKIP_CPU_SYNC,
 	DMA_ATTR_FORCE_CONTIGUOUS,
+	DMA_ATTR_ALLOC_SINGLE_PAGES,
 	DMA_ATTR_MAX,
 };
 
diff --git a/include/linux/psci.h b/include/linux/psci.h
index 12c4865..393efe2 100644
--- a/include/linux/psci.h
+++ b/include/linux/psci.h
@@ -24,6 +24,9 @@
 bool psci_power_state_loses_context(u32 state);
 bool psci_power_state_is_valid(u32 state);
 
+int psci_cpu_init_idle(unsigned int cpu);
+int psci_cpu_suspend_enter(unsigned long index);
+
 struct psci_operations {
 	int (*cpu_suspend)(u32 state, unsigned long entry_point);
 	int (*cpu_off)(u32 state);
diff --git a/include/media/videobuf2-dma-contig.h b/include/media/videobuf2-dma-contig.h
index c33dfa6..2087c9a 100644
--- a/include/media/videobuf2-dma-contig.h
+++ b/include/media/videobuf2-dma-contig.h
@@ -16,6 +16,8 @@
 #include <media/videobuf2-v4l2.h>
 #include <linux/dma-mapping.h>
 
+struct dma_attrs;
+
 static inline dma_addr_t
 vb2_dma_contig_plane_dma_addr(struct vb2_buffer *vb, unsigned int plane_no)
 {
@@ -24,7 +26,14 @@
 	return *addr;
 }
 
-void *vb2_dma_contig_init_ctx(struct device *dev);
+void *vb2_dma_contig_init_ctx_attrs(struct device *dev,
+				    struct dma_attrs *attrs);
+
+static inline void *vb2_dma_contig_init_ctx(struct device *dev)
+{
+	return vb2_dma_contig_init_ctx_attrs(dev, NULL);
+}
+
 void vb2_dma_contig_cleanup_ctx(void *alloc_ctx);
 
 extern const struct vb2_mem_ops vb2_dma_contig_memops;