Merge tag 'sh-for-4.17' of git://git.libc.org/linux-sh

Pull arch/sh updates from Rich Felker:
 "Fixes for bugs in futex, device tree, and userspace breakpoint traps,
  and for PCI issues on SH7786"

* tag 'sh-for-4.17' of git://git.libc.org/linux-sh:
  arch/sh: pcie-sh7786: handle non-zero DMA offset
  arch/sh: pcie-sh7786: adjust the memory mapping
  arch/sh: pcie-sh7786: adjust PCI MEM and IO regions
  arch/sh: pcie-sh7786: exclude unusable PCI MEM areas
  arch/sh: pcie-sh7786: mark unavailable PCI resource as disabled
  arch/sh: pci: don't use disabled resources
  arch/sh: make the DMA mapping operations observe dev->dma_pfn_offset
  arch/sh: add sh7786_mm_sel() function
  sh: fix debug trap failure to process signals before return to user
  sh: fix memory corruption of unflattened device tree
  sh: fix futex FUTEX_OP_SET op on userspace addresses
diff --git a/arch/sh/boards/of-generic.c b/arch/sh/boards/of-generic.c
index 4feb7c8..46b2481 100644
--- a/arch/sh/boards/of-generic.c
+++ b/arch/sh/boards/of-generic.c
@@ -126,12 +126,6 @@ static void __init sh_of_setup(char **cmdline_p)
 {
 	struct device_node *root;
 
-#ifdef CONFIG_USE_BUILTIN_DTB
-	unflatten_and_copy_device_tree();
-#else
-	unflatten_device_tree();
-#endif
-
 	board_time_init = sh_of_time_init;
 
 	sh_mv.mv_name = "Unknown SH model";
diff --git a/arch/sh/drivers/pci/pci.c b/arch/sh/drivers/pci/pci.c
index 5976a2c..e5b7437 100644
--- a/arch/sh/drivers/pci/pci.c
+++ b/arch/sh/drivers/pci/pci.c
@@ -49,6 +49,8 @@ static void pcibios_scanbus(struct pci_channel *hose)
 	for (i = 0; i < hose->nr_resources; i++) {
 		res = hose->resources + i;
 		offset = 0;
+		if (res->flags & IORESOURCE_DISABLED)
+			continue;
 		if (res->flags & IORESOURCE_IO)
 			offset = hose->io_offset;
 		else if (res->flags & IORESOURCE_MEM)
@@ -102,6 +104,9 @@ int register_pci_controller(struct pci_channel *hose)
 	for (i = 0; i < hose->nr_resources; i++) {
 		struct resource *res = hose->resources + i;
 
+		if (res->flags & IORESOURCE_DISABLED)
+			continue;
+
 		if (res->flags & IORESOURCE_IO) {
 			if (request_resource(&ioport_resource, res) < 0)
 				goto out;
diff --git a/arch/sh/drivers/pci/pcie-sh7786.c b/arch/sh/drivers/pci/pcie-sh7786.c
index 0167a73..382e7ec 100644
--- a/arch/sh/drivers/pci/pcie-sh7786.c
+++ b/arch/sh/drivers/pci/pcie-sh7786.c
@@ -19,6 +19,7 @@
 #include <linux/clk.h>
 #include <linux/sh_clk.h>
 #include <linux/sh_intc.h>
+#include <cpu/sh7786.h>
 #include "pcie-sh7786.h"
 #include <asm/sizes.h>
 
@@ -32,6 +33,7 @@ struct sh7786_pcie_port {
 
 static struct sh7786_pcie_port *sh7786_pcie_ports;
 static unsigned int nr_ports;
+static unsigned long dma_pfn_offset;
 
 static struct sh7786_pcie_hwops {
 	int (*core_init)(void);
@@ -40,73 +42,73 @@ static struct sh7786_pcie_hwops {
 
 static struct resource sh7786_pci0_resources[] = {
 	{
-		.name	= "PCIe0 IO",
+		.name	= "PCIe0 MEM 0",
 		.start	= 0xfd000000,
 		.end	= 0xfd000000 + SZ_8M - 1,
-		.flags	= IORESOURCE_IO,
+		.flags	= IORESOURCE_MEM,
 	}, {
-		.name	= "PCIe0 MEM 0",
+		.name	= "PCIe0 MEM 1",
 		.start	= 0xc0000000,
 		.end	= 0xc0000000 + SZ_512M - 1,
 		.flags	= IORESOURCE_MEM | IORESOURCE_MEM_32BIT,
 	}, {
-		.name	= "PCIe0 MEM 1",
+		.name	= "PCIe0 MEM 2",
 		.start	= 0x10000000,
 		.end	= 0x10000000 + SZ_64M - 1,
 		.flags	= IORESOURCE_MEM,
 	}, {
-		.name	= "PCIe0 MEM 2",
+		.name	= "PCIe0 IO",
 		.start	= 0xfe100000,
 		.end	= 0xfe100000 + SZ_1M - 1,
-		.flags	= IORESOURCE_MEM,
+		.flags	= IORESOURCE_IO,
 	},
 };
 
 static struct resource sh7786_pci1_resources[] = {
 	{
-		.name	= "PCIe1 IO",
+		.name	= "PCIe1 MEM 0",
 		.start	= 0xfd800000,
 		.end	= 0xfd800000 + SZ_8M - 1,
-		.flags	= IORESOURCE_IO,
+		.flags	= IORESOURCE_MEM,
 	}, {
-		.name	= "PCIe1 MEM 0",
+		.name	= "PCIe1 MEM 1",
 		.start	= 0xa0000000,
 		.end	= 0xa0000000 + SZ_512M - 1,
 		.flags	= IORESOURCE_MEM | IORESOURCE_MEM_32BIT,
 	}, {
-		.name	= "PCIe1 MEM 1",
+		.name	= "PCIe1 MEM 2",
 		.start	= 0x30000000,
 		.end	= 0x30000000 + SZ_256M - 1,
 		.flags	= IORESOURCE_MEM | IORESOURCE_MEM_32BIT,
 	}, {
-		.name	= "PCIe1 MEM 2",
+		.name	= "PCIe1 IO",
 		.start	= 0xfe300000,
 		.end	= 0xfe300000 + SZ_1M - 1,
-		.flags	= IORESOURCE_MEM,
+		.flags	= IORESOURCE_IO,
 	},
 };
 
 static struct resource sh7786_pci2_resources[] = {
 	{
-		.name	= "PCIe2 IO",
+		.name	= "PCIe2 MEM 0",
 		.start	= 0xfc800000,
 		.end	= 0xfc800000 + SZ_4M - 1,
-		.flags	= IORESOURCE_IO,
+		.flags	= IORESOURCE_MEM,
 	}, {
-		.name	= "PCIe2 MEM 0",
+		.name	= "PCIe2 MEM 1",
 		.start	= 0x80000000,
 		.end	= 0x80000000 + SZ_512M - 1,
 		.flags	= IORESOURCE_MEM | IORESOURCE_MEM_32BIT,
 	}, {
-		.name	= "PCIe2 MEM 1",
+		.name	= "PCIe2 MEM 2",
 		.start	= 0x20000000,
 		.end	= 0x20000000 + SZ_256M - 1,
 		.flags	= IORESOURCE_MEM | IORESOURCE_MEM_32BIT,
 	}, {
-		.name	= "PCIe2 MEM 2",
+		.name	= "PCIe2 IO",
 		.start	= 0xfcd00000,
 		.end	= 0xfcd00000 + SZ_1M - 1,
-		.flags	= IORESOURCE_MEM,
+		.flags	= IORESOURCE_IO,
 	},
 };
 
@@ -301,7 +303,7 @@ static int __init pcie_init(struct sh7786_pcie_port *port)
 {
 	struct pci_channel *chan = port->hose;
 	unsigned int data;
-	phys_addr_t memphys;
+	phys_addr_t memstart, memend;
 	size_t memsize;
 	int ret, i, win;
 
@@ -357,15 +359,26 @@ static int __init pcie_init(struct sh7786_pcie_port *port)
 	data |= (0xff << 16);
 	pci_write_reg(chan, data, SH4A_PCIEMACCTLR);
 
-	memphys = __pa(memory_start);
-	memsize = roundup_pow_of_two(memory_end - memory_start);
+	memstart = __pa(memory_start);
+	memend   = __pa(memory_end);
+	memsize = roundup_pow_of_two(memend - memstart);
+
+	/*
+	 * The start address must be aligned on its size. So we round
+	 * it down, and then recalculate the size so that it covers
+	 * the entire memory.
+	 */
+	memstart = ALIGN_DOWN(memstart, memsize);
+	memsize = roundup_pow_of_two(memend - memstart);
+
+	dma_pfn_offset = memstart >> PAGE_SHIFT;
 
 	/*
 	 * If there's more than 512MB of memory, we need to roll over to
 	 * LAR1/LAMR1.
 	 */
 	if (memsize > SZ_512M) {
-		pci_write_reg(chan, memphys + SZ_512M, SH4A_PCIELAR1);
+		pci_write_reg(chan, memstart + SZ_512M, SH4A_PCIELAR1);
 		pci_write_reg(chan, ((memsize - SZ_512M) - SZ_256) | 1,
 			      SH4A_PCIELAMR1);
 		memsize = SZ_512M;
@@ -381,7 +394,7 @@ static int __init pcie_init(struct sh7786_pcie_port *port)
 	 * LAR0/LAMR0 covers up to the first 512MB, which is enough to
 	 * cover all of lowmem on most platforms.
 	 */
-	pci_write_reg(chan, memphys, SH4A_PCIELAR0);
+	pci_write_reg(chan, memstart, SH4A_PCIELAR0);
 	pci_write_reg(chan, (memsize - SZ_256) | 1, SH4A_PCIELAMR0);
 
 	/* Finish initialization */
@@ -438,6 +451,9 @@ static int __init pcie_init(struct sh7786_pcie_port *port)
 		 * mode, so just skip them entirely.
 		 */
 		if ((res->flags & IORESOURCE_MEM_32BIT) && __in_29bit_mode())
+			res->flags |= IORESOURCE_DISABLED;
+
+		if (res->flags & IORESOURCE_DISABLED)
 			continue;
 
 		pci_write_reg(chan, 0x00000000, SH4A_PCIEPTCTLR(win));
@@ -472,6 +488,11 @@ int pcibios_map_platform_irq(const struct pci_dev *pdev, u8 slot, u8 pin)
         return evt2irq(0xae0);
 }
 
+void pcibios_bus_add_device(struct pci_dev *pdev)
+{
+	pdev->dev.dma_pfn_offset = dma_pfn_offset;
+}
+
 static int __init sh7786_pcie_core_init(void)
 {
 	/* Return the number of ports */
@@ -527,6 +548,7 @@ static struct sh7786_pcie_hwops sh7786_65nm_pcie_hwops __initdata = {
 static int __init sh7786_pcie_init(void)
 {
 	struct clk *platclk;
+	u32 mm_sel;
 	int i;
 
 	printk(KERN_NOTICE "PCI: Starting initialization.\n");
@@ -560,6 +582,16 @@ static int __init sh7786_pcie_init(void)
 
 	clk_enable(platclk);
 
+	mm_sel = sh7786_mm_sel();
+
+	/*
+	 * Depending on the MMSELR register value, the PCIe0 MEM 1
+	 * area may not be available. See Table 13.11 of the SH7786
+	 * datasheet.
+	 */
+	if (mm_sel != 1 && mm_sel != 2 && mm_sel != 5 && mm_sel != 6)
+		sh7786_pci0_resources[2].flags |= IORESOURCE_DISABLED;
+
 	printk(KERN_NOTICE "PCI: probing %d ports.\n", nr_ports);
 
 	for (i = 0; i < nr_ports; i++) {
diff --git a/arch/sh/include/asm/futex.h b/arch/sh/include/asm/futex.h
index 15bf07b..6d192f4 100644
--- a/arch/sh/include/asm/futex.h
+++ b/arch/sh/include/asm/futex.h
@@ -37,10 +37,7 @@ static inline int arch_futex_atomic_op_inuser(int op, u32 oparg, int *oval,
 	pagefault_disable();
 
 	do {
-		if (op == FUTEX_OP_SET)
-			ret = oldval = 0;
-		else
-			ret = get_user(oldval, uaddr);
+		ret = get_user(oldval, uaddr);
 
 		if (ret) break;
 
diff --git a/arch/sh/include/cpu-sh4/cpu/sh7786.h b/arch/sh/include/cpu-sh4/cpu/sh7786.h
index 0df09e6..96b8cb1 100644
--- a/arch/sh/include/cpu-sh4/cpu/sh7786.h
+++ b/arch/sh/include/cpu-sh4/cpu/sh7786.h
@@ -14,6 +14,8 @@
 #ifndef __CPU_SH7786_H__
 #define __CPU_SH7786_H__
 
+#include <linux/io.h>
+
 enum {
 	/* PA */
 	GPIO_PA7, GPIO_PA6, GPIO_PA5, GPIO_PA4,
@@ -131,4 +133,9 @@ enum {
 	GPIO_FN_IRL7, GPIO_FN_IRL6, GPIO_FN_IRL5, GPIO_FN_IRL4,
 };
 
+static inline u32 sh7786_mm_sel(void)
+{
+	return __raw_readl(0xFC400020) & 0x7;
+}
+
 #endif /* __CPU_SH7786_H__ */
diff --git a/arch/sh/kernel/dma-nommu.c b/arch/sh/kernel/dma-nommu.c
index 62b4851..178457d 100644
--- a/arch/sh/kernel/dma-nommu.c
+++ b/arch/sh/kernel/dma-nommu.c
@@ -16,7 +16,8 @@ static dma_addr_t nommu_map_page(struct device *dev, struct page *page,
 				 enum dma_data_direction dir,
 				 unsigned long attrs)
 {
-	dma_addr_t addr = page_to_phys(page) + offset;
+	dma_addr_t addr = page_to_phys(page) + offset
+		- PFN_PHYS(dev->dma_pfn_offset);
 
 	WARN_ON(size == 0);
 
@@ -36,12 +37,14 @@ static int nommu_map_sg(struct device *dev, struct scatterlist *sg,
 	WARN_ON(nents == 0 || sg[0].length == 0);
 
 	for_each_sg(sg, s, nents, i) {
+		dma_addr_t offset = PFN_PHYS(dev->dma_pfn_offset);
+
 		BUG_ON(!sg_page(s));
 
 		if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
 			sh_sync_dma_for_device(sg_virt(s), s->length, dir);
 
-		s->dma_address = sg_phys(s);
+		s->dma_address = sg_phys(s) - offset;
 		s->dma_length = s->length;
 	}
 
diff --git a/arch/sh/kernel/entry-common.S b/arch/sh/kernel/entry-common.S
index c001f78..28cc612 100644
--- a/arch/sh/kernel/entry-common.S
+++ b/arch/sh/kernel/entry-common.S
@@ -255,7 +255,7 @@
 	mov.l	@r8, r8
 	jsr	@r8
 	 nop
-	bra	__restore_all
+	bra	ret_from_exception
 	 nop
 	CFI_ENDPROC
 
diff --git a/arch/sh/kernel/setup.c b/arch/sh/kernel/setup.c
index b95c411..d34e998 100644
--- a/arch/sh/kernel/setup.c
+++ b/arch/sh/kernel/setup.c
@@ -330,6 +330,14 @@ void __init setup_arch(char **cmdline_p)
 	/* Let earlyprintk output early console messages */
 	early_platform_driver_probe("earlyprintk", 1, 1);
 
+#ifdef CONFIG_OF_FLATTREE
+#ifdef CONFIG_USE_BUILTIN_DTB
+	unflatten_and_copy_device_tree();
+#else
+	unflatten_device_tree();
+#endif
+#endif
+
 	paging_init();
 
 #ifdef CONFIG_DUMMY_CONSOLE
diff --git a/arch/sh/mm/consistent.c b/arch/sh/mm/consistent.c
index 6ea3aab..8ce9869 100644
--- a/arch/sh/mm/consistent.c
+++ b/arch/sh/mm/consistent.c
@@ -59,7 +59,7 @@ void *dma_generic_alloc_coherent(struct device *dev, size_t size,
 
 	split_page(pfn_to_page(virt_to_phys(ret) >> PAGE_SHIFT), order);
 
-	*dma_handle = virt_to_phys(ret);
+	*dma_handle = virt_to_phys(ret) - PFN_PHYS(dev->dma_pfn_offset);
 
 	return ret_nocache;
 }
@@ -69,7 +69,7 @@ void dma_generic_free_coherent(struct device *dev, size_t size,
 			       unsigned long attrs)
 {
 	int order = get_order(size);
-	unsigned long pfn = dma_handle >> PAGE_SHIFT;
+	unsigned long pfn = (dma_handle >> PAGE_SHIFT) + dev->dma_pfn_offset;
 	int k;
 
 	for (k = 0; k < (1 << order); k++)