Merge branch 'for-2.6.24' of master.kernel.org:/pub/scm/linux/kernel/git/jwboyer/powerpc-4xx into merge
diff --git a/Documentation/powerpc/booting-without-of.txt b/Documentation/powerpc/booting-without-of.txt
index 2233e3d..ac1be25 100644
--- a/Documentation/powerpc/booting-without-of.txt
+++ b/Documentation/powerpc/booting-without-of.txt
@@ -52,6 +52,7 @@
       i) Freescale QUICC Engine module (QE)
       j) CFI or JEDEC memory-mapped NOR flash
       k) Global Utilities Block
+      l) Xilinx IP cores
 
   VII - Specifying interrupt information for devices
     1) interrupts property
@@ -2252,6 +2253,266 @@
 			   available.
 			   For Axon: 0x0000012a
 
+   l) Xilinx IP cores
+
+   The Xilinx EDK toolchain ships with a set of IP cores (devices) for use
+   in Xilinx Spartan and Virtex FPGAs.  The devices cover the whole range
+   of standard device types (network, serial, etc.) and miscellanious
+   devices (gpio, LCD, spi, etc).  Also, since these devices are
+   implemented within the fpga fabric every instance of the device can be
+   synthesised with different options that change the behaviour.
+
+   Each IP-core has a set of parameters which the FPGA designer can use to
+   control how the core is synthesized.  Historically, the EDK tool would
+   extract the device parameters relevant to device drivers and copy them
+   into an 'xparameters.h' in the form of #define symbols.  This tells the
+   device drivers how the IP cores are configured, but it requres the kernel
+   to be recompiled every time the FPGA bitstream is resynthesized.
+
+   The new approach is to export the parameters into the device tree and
+   generate a new device tree each time the FPGA bitstream changes.  The
+   parameters which used to be exported as #defines will now become
+   properties of the device node.  In general, device nodes for IP-cores
+   will take the following form:
+
+	(name)@(base-address) {
+		compatible = "xlnx,(ip-core-name)-(HW_VER)"
+			     [, (list of compatible devices), ...];
+		reg = <(baseaddr) (size)>;
+		interrupt-parent = <&interrupt-controller-phandle>;
+		interrupts = < ... >;
+		xlnx,(parameter1) = "(string-value)";
+		xlnx,(parameter2) = <(int-value)>;
+	};
+
+	(ip-core-name):	the name of the ip block (given after the BEGIN
+			directive in system.mhs).  Should be in lowercase
+			and all underscores '_' converted to dashes '-'.
+	(name):		is derived from the "PARAMETER INSTANCE" value.
+	(parameter#):	C_* parameters from system.mhs.  The C_ prefix is
+			dropped from the parameter name, the name is converted
+			to lowercase and all underscore '_' characters are
+			converted to dashes '-'.
+	(baseaddr):	the C_BASEADDR parameter.
+	(HW_VER):	from the HW_VER parameter.
+	(size):		equals C_HIGHADDR - C_BASEADDR + 1
+
+   Typically, the compatible list will include the exact IP core version
+   followed by an older IP core version which implements the same
+   interface or any other device with the same interface.
+
+   'reg', 'interrupt-parent' and 'interrupts' are all optional properties.
+
+   For example, the following block from system.mhs:
+
+	BEGIN opb_uartlite
+		PARAMETER INSTANCE = opb_uartlite_0
+		PARAMETER HW_VER = 1.00.b
+		PARAMETER C_BAUDRATE = 115200
+		PARAMETER C_DATA_BITS = 8
+		PARAMETER C_ODD_PARITY = 0
+		PARAMETER C_USE_PARITY = 0
+		PARAMETER C_CLK_FREQ = 50000000
+		PARAMETER C_BASEADDR = 0xEC100000
+		PARAMETER C_HIGHADDR = 0xEC10FFFF
+		BUS_INTERFACE SOPB = opb_7
+		PORT OPB_Clk = CLK_50MHz
+		PORT Interrupt = opb_uartlite_0_Interrupt
+		PORT RX = opb_uartlite_0_RX
+		PORT TX = opb_uartlite_0_TX
+		PORT OPB_Rst = sys_bus_reset_0
+	END
+
+   becomes the following device tree node:
+
+	opb-uartlite-0@ec100000 {
+		device_type = "serial";
+		compatible = "xlnx,opb-uartlite-1.00.b";
+		reg = <ec100000 10000>;
+		interrupt-parent = <&opb-intc>;
+		interrupts = <1 0>; // got this from the opb_intc parameters
+		current-speed = <d#115200>;	// standard serial device prop
+		clock-frequency = <d#50000000>;	// standard serial device prop
+		xlnx,data-bits = <8>;
+		xlnx,odd-parity = <0>;
+		xlnx,use-parity = <0>;
+	};
+
+   Some IP cores actually implement 2 or more logical devices.  In this case,
+   the device should still describe the whole IP core with a single node
+   and add a child node for each logical device.  The ranges property can
+   be used to translate from parent IP-core to the registers of each device.
+   (Note: this makes the assumption that both logical devices have the same
+   bus binding.  If this is not true, then separate nodes should be used for
+   each logical device).  The 'cell-index' property can be used to enumerate
+   logical devices within an IP core.  For example, the following is the
+   system.mhs entry for the dual ps2 controller found on the ml403 reference
+   design.
+
+	BEGIN opb_ps2_dual_ref
+		PARAMETER INSTANCE = opb_ps2_dual_ref_0
+		PARAMETER HW_VER = 1.00.a
+		PARAMETER C_BASEADDR = 0xA9000000
+		PARAMETER C_HIGHADDR = 0xA9001FFF
+		BUS_INTERFACE SOPB = opb_v20_0
+		PORT Sys_Intr1 = ps2_1_intr
+		PORT Sys_Intr2 = ps2_2_intr
+		PORT Clkin1 = ps2_clk_rx_1
+		PORT Clkin2 = ps2_clk_rx_2
+		PORT Clkpd1 = ps2_clk_tx_1
+		PORT Clkpd2 = ps2_clk_tx_2
+		PORT Rx1 = ps2_d_rx_1
+		PORT Rx2 = ps2_d_rx_2
+		PORT Txpd1 = ps2_d_tx_1
+		PORT Txpd2 = ps2_d_tx_2
+	END
+
+   It would result in the following device tree nodes:
+
+	opb_ps2_dual_ref_0@a9000000 {
+		ranges = <0 a9000000 2000>;
+		// If this device had extra parameters, then they would
+		// go here.
+		ps2@0 {
+			compatible = "xlnx,opb-ps2-dual-ref-1.00.a";
+			reg = <0 40>;
+			interrupt-parent = <&opb-intc>;
+			interrupts = <3 0>;
+			cell-index = <0>;
+		};
+		ps2@1000 {
+			compatible = "xlnx,opb-ps2-dual-ref-1.00.a";
+			reg = <1000 40>;
+			interrupt-parent = <&opb-intc>;
+			interrupts = <3 0>;
+			cell-index = <0>;
+		};
+	};
+
+   Also, the system.mhs file defines bus attachments from the processor
+   to the devices.  The device tree structure should reflect the bus
+   attachments.  Again an example; this system.mhs fragment:
+
+	BEGIN ppc405_virtex4
+		PARAMETER INSTANCE = ppc405_0
+		PARAMETER HW_VER = 1.01.a
+		BUS_INTERFACE DPLB = plb_v34_0
+		BUS_INTERFACE IPLB = plb_v34_0
+	END
+
+	BEGIN opb_intc
+		PARAMETER INSTANCE = opb_intc_0
+		PARAMETER HW_VER = 1.00.c
+		PARAMETER C_BASEADDR = 0xD1000FC0
+		PARAMETER C_HIGHADDR = 0xD1000FDF
+		BUS_INTERFACE SOPB = opb_v20_0
+	END
+
+	BEGIN opb_uart16550
+		PARAMETER INSTANCE = opb_uart16550_0
+		PARAMETER HW_VER = 1.00.d
+		PARAMETER C_BASEADDR = 0xa0000000
+		PARAMETER C_HIGHADDR = 0xa0001FFF
+		BUS_INTERFACE SOPB = opb_v20_0
+	END
+
+	BEGIN plb_v34
+		PARAMETER INSTANCE = plb_v34_0
+		PARAMETER HW_VER = 1.02.a
+	END
+
+	BEGIN plb_bram_if_cntlr
+		PARAMETER INSTANCE = plb_bram_if_cntlr_0
+		PARAMETER HW_VER = 1.00.b
+		PARAMETER C_BASEADDR = 0xFFFF0000
+		PARAMETER C_HIGHADDR = 0xFFFFFFFF
+		BUS_INTERFACE SPLB = plb_v34_0
+	END
+
+	BEGIN plb2opb_bridge
+		PARAMETER INSTANCE = plb2opb_bridge_0
+		PARAMETER HW_VER = 1.01.a
+		PARAMETER C_RNG0_BASEADDR = 0x20000000
+		PARAMETER C_RNG0_HIGHADDR = 0x3FFFFFFF
+		PARAMETER C_RNG1_BASEADDR = 0x60000000
+		PARAMETER C_RNG1_HIGHADDR = 0x7FFFFFFF
+		PARAMETER C_RNG2_BASEADDR = 0x80000000
+		PARAMETER C_RNG2_HIGHADDR = 0xBFFFFFFF
+		PARAMETER C_RNG3_BASEADDR = 0xC0000000
+		PARAMETER C_RNG3_HIGHADDR = 0xDFFFFFFF
+		BUS_INTERFACE SPLB = plb_v34_0
+		BUS_INTERFACE MOPB = opb_v20_0
+	END
+
+   Gives this device tree (some properties removed for clarity):
+
+	plb-v34-0 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		device_type = "ibm,plb";
+		ranges; // 1:1 translation
+
+		plb-bram-if-cntrl-0@ffff0000 {
+			reg = <ffff0000 10000>;
+		}
+
+		opb-v20-0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			ranges = <20000000 20000000 20000000
+				  60000000 60000000 20000000
+				  80000000 80000000 40000000
+				  c0000000 c0000000 20000000>;
+
+			opb-uart16550-0@a0000000 {
+				reg = <a00000000 2000>;
+			};
+
+			opb-intc-0@d1000fc0 {
+				reg = <d1000fc0 20>;
+			};
+		};
+	};
+
+   That covers the general approach to binding xilinx IP cores into the
+   device tree.  The following are bindings for specific devices:
+
+      i) Xilinx ML300 Framebuffer
+
+      Simple framebuffer device from the ML300 reference design (also on the
+      ML403 reference design as well as others).
+
+      Optional properties:
+       - resolution = <xres yres> : pixel resolution of framebuffer.  Some
+                                    implementations use a different resolution.
+                                    Default is <d#640 d#480>
+       - virt-resolution = <xvirt yvirt> : Size of framebuffer in memory.
+                                           Default is <d#1024 d#480>.
+       - rotate-display (empty) : rotate display 180 degrees.
+
+      ii) Xilinx SystemACE
+
+      The Xilinx SystemACE device is used to program FPGAs from an FPGA
+      bitstream stored on a CF card.  It can also be used as a generic CF
+      interface device.
+
+      Optional properties:
+       - 8-bit (empty) : Set this property for SystemACE in 8 bit mode
+
+      iii) Xilinx EMAC and Xilinx TEMAC
+
+      Xilinx Ethernet devices.  In addition to general xilinx properties
+      listed above, nodes for these devices should include a phy-handle
+      property, and may include other common network device properties
+      like local-mac-address.
+      
+      iv) Xilinx Uartlite
+
+      Xilinx uartlite devices are simple fixed speed serial ports.
+
+      Requred properties:
+       - current-speed : Baud rate of uartlite
+
    More devices will be defined as this spec matures.
 
 VII - Specifying interrupt information for devices
diff --git a/arch/powerpc/boot/dts/walnut.dts b/arch/powerpc/boot/dts/walnut.dts
index fa681f5..754fa39 100644
--- a/arch/powerpc/boot/dts/walnut.dts
+++ b/arch/powerpc/boot/dts/walnut.dts
@@ -122,7 +122,9 @@
 				device_type = "network";
 				compatible = "ibm,emac-405gp", "ibm,emac";
 				interrupt-parent = <&UIC0>;
-				interrupts = <9 4 f 4>;
+				interrupts = <
+					f 4 /* Ethernet */
+					9 4 /* Ethernet Wake Up */>;
 				local-mac-address = [000000000000]; /* Filled in by zImage */
 				reg = <ef600800 70>;
 				mal-device = <&MAL>;
diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper
index ece6f49..31147a0 100755
--- a/arch/powerpc/boot/wrapper
+++ b/arch/powerpc/boot/wrapper
@@ -21,6 +21,14 @@
 #		(default ./arch/powerpc/boot)
 # -W dir	specify working directory for temporary files (default .)
 
+# Stop execution if any command fails
+set -e
+
+# Allow for verbose output
+if [ "$V" = 1 ]; then
+    set -x
+fi
+
 # defaults
 kernel=
 ofile=zImage
@@ -111,7 +119,7 @@
     if [ -z "$dtb" ]; then
 	dtb="$platform.dtb"
     fi
-    dtc -O dtb -o "$dtb" -b 0 -V 16 "$dts" || exit 1
+    dtc -O dtb -o "$dtb" -b 0 -V 16 "$dts"
 fi
 
 if [ -z "$kernel" ]; then
@@ -283,23 +291,13 @@
 
     ${CROSS}objcopy -O binary "$ofile" "$ofile.bin"
 
-    msg=$(dd if="$ofile.bin" of="$ofile.bin" conv=notrunc \
-        skip=$overlay_dest seek=$system_reset_kernel      \
-        count=$overlay_size bs=1 2>&1)
+    dd if="$ofile.bin" of="$ofile.bin" conv=notrunc   \
+        skip=$overlay_dest seek=$system_reset_kernel  \
+        count=$overlay_size bs=1
 
-    if [ $? -ne "0" ]; then
-       echo $msg
-       exit 1
-    fi
-
-    msg=$(dd if="$ofile.bin" of="$ofile.bin" conv=notrunc \
-        skip=$system_reset_overlay seek=$overlay_dest     \
-        count=$overlay_size bs=1 2>&1)
-
-    if [ $? -ne "0" ]; then
-       echo $msg
-       exit 2
-    fi
+    dd if="$ofile.bin" of="$ofile.bin" conv=notrunc   \
+        skip=$system_reset_overlay seek=$overlay_dest \
+        count=$overlay_size bs=1
 
     gzip --force -9 --stdout "$ofile.bin" > "$object/otheros.bld"
     ;;
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index d3fb7d0..9ed351f 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -1104,6 +1104,16 @@
 	{
 		.pvr_mask		= 0xf0000fff,
 		.pvr_value		= 0x40000850,
+		.cpu_name		= "440GR Rev. A",
+		.cpu_features		= CPU_FTRS_44X,
+		.cpu_user_features	= COMMON_USER_BOOKE,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.platform		= "ppc440",
+	},
+	{ /* Use logical PVR for 440EP (logical pvr = pvr | 0x8) */
+		.pvr_mask		= 0xf0000fff,
+		.pvr_value		= 0x40000858,
 		.cpu_name		= "440EP Rev. A",
 		.cpu_features		= CPU_FTRS_44X,
 		.cpu_user_features	= COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
@@ -1115,6 +1125,16 @@
 	{
 		.pvr_mask		= 0xf0000fff,
 		.pvr_value		= 0x400008d3,
+		.cpu_name		= "440GR Rev. B",
+		.cpu_features		= CPU_FTRS_44X,
+		.cpu_user_features	= COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.platform		= "ppc440",
+	},
+	{ /* Use logical PVR for 440EP (logical pvr = pvr | 0x8) */
+		.pvr_mask		= 0xf0000fff,
+		.pvr_value		= 0x400008db,
 		.cpu_name		= "440EP Rev. B",
 		.cpu_features		= CPU_FTRS_44X,
 		.cpu_user_features	= COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
@@ -1123,20 +1143,9 @@
 		.cpu_setup		= __setup_cpu_440ep,
 		.platform		= "ppc440",
 	},
-	{ /* 440EPX */
-		.pvr_mask		= 0xf0000ffb,
-		.pvr_value		= 0x200008D0,
-		.cpu_name		= "440EPX",
-		.cpu_features		= CPU_FTRS_44X,
-		.cpu_user_features	= COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
-		.icache_bsize		= 32,
-		.dcache_bsize		= 32,
-		.cpu_setup		= __setup_cpu_440epx,
-		.platform		= "ppc440",
-	},
 	{ /* 440GRX */
 		.pvr_mask		= 0xf0000ffb,
-		.pvr_value		= 0x200008D8,
+		.pvr_value		= 0x200008D0,
 		.cpu_name		= "440GRX",
 		.cpu_features		= CPU_FTRS_44X,
 		.cpu_user_features	= COMMON_USER_BOOKE,
@@ -1145,6 +1154,17 @@
 		.cpu_setup		= __setup_cpu_440grx,
 		.platform		= "ppc440",
 	},
+	{ /* Use logical PVR for 440EPx (logical pvr = pvr | 0x8) */
+		.pvr_mask		= 0xf0000ffb,
+		.pvr_value		= 0x200008D8,
+		.cpu_name		= "440EPX",
+		.cpu_features		= CPU_FTRS_44X,
+		.cpu_user_features	= COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.cpu_setup		= __setup_cpu_440epx,
+		.platform		= "ppc440",
+	},
 	{	/* 440GP Rev. B */
 		.pvr_mask		= 0xf0000fff,
 		.pvr_value		= 0x40000440,
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 21d889e..a7572cf 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -244,6 +244,13 @@
 	andis.	r10,r0,DBCR0_IC@h
 	bnel-	load_dbcr0
 #endif
+#ifdef CONFIG_44x
+	lis	r4,icache_44x_need_flush@ha
+	lwz	r5,icache_44x_need_flush@l(r4)
+	cmplwi	cr0,r5,0
+	bne-	2f
+1:
+#endif /* CONFIG_44x */
 	stwcx.	r0,0,r1			/* to clear the reservation */
 	lwz	r4,_LINK(r1)
 	lwz	r5,_CCR(r1)
@@ -258,6 +265,12 @@
 	mtspr	SPRN_SRR1,r8
 	SYNC
 	RFI
+#ifdef CONFIG_44x
+2:	li	r7,0
+	iccci	r0,r0
+	stw	r7,icache_44x_need_flush@l(r4)
+	b	1b
+#endif  /* CONFIG_44x */
 
 66:	li	r3,-ENOSYS
 	b	ret_from_syscall
@@ -683,6 +696,16 @@
 
 	/* interrupts are hard-disabled at this point */
 restore:
+#ifdef CONFIG_44x
+	lis	r4,icache_44x_need_flush@ha
+	lwz	r5,icache_44x_need_flush@l(r4)
+	cmplwi	cr0,r5,0
+	beq+	1f
+	li	r6,0
+	iccci	r0,r0
+	stw	r6,icache_44x_need_flush@l(r4)
+1:
+#endif  /* CONFIG_44x */
 	lwz	r0,GPR0(r1)
 	lwz	r2,GPR2(r1)
 	REST_4GPRS(3, r1)
diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index 8533de5..8b642ab 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -288,7 +288,16 @@
  */
 _GLOBAL(_tlbie)
 #if defined(CONFIG_40x)
+	/* We run the search with interrupts disabled because we have to change
+	 * the PID and I don't want to preempt when that happens.
+	 */
+	mfmsr	r5
+	mfspr	r6,SPRN_PID
+	wrteei	0
+	mtspr	SPRN_PID,r4
 	tlbsx.	r3, 0, r3
+	mtspr	SPRN_PID,r6
+	wrtee	r5
 	bne	10f
 	sync
 	/* There are only 64 TLB entries, so r3 < 64, which means bit 25 is clear.
@@ -297,23 +306,23 @@
 	tlbwe	r3, r3, TLB_TAG
 	isync
 10:
+
 #elif defined(CONFIG_44x)
-	mfspr	r4,SPRN_MMUCR
-	mfspr	r5,SPRN_PID			/* Get PID */
-	rlwimi	r4,r5,0,24,31			/* Set TID */
+	mfspr	r5,SPRN_MMUCR
+	rlwimi	r5,r4,0,24,31			/* Set TID */
 
 	/* We have to run the search with interrupts disabled, even critical
 	 * and debug interrupts (in fact the only critical exceptions we have
 	 * are debug and machine check).  Otherwise  an interrupt which causes
 	 * a TLB miss can clobber the MMUCR between the mtspr and the tlbsx. */
-	mfmsr	r5
+	mfmsr	r4
 	lis	r6,(MSR_EE|MSR_CE|MSR_ME|MSR_DE)@ha
 	addi	r6,r6,(MSR_EE|MSR_CE|MSR_ME|MSR_DE)@l
-	andc	r6,r5,r6
+	andc	r6,r4,r6
 	mtmsr	r6
-	mtspr	SPRN_MMUCR,r4
+	mtspr	SPRN_MMUCR,r5
 	tlbsx.	r3, 0, r3
-	mtmsr	r5
+	mtmsr	r4
 	bne	10f
 	sync
 	/* There are only 64 TLB entries, so r3 < 64,
@@ -534,12 +543,21 @@
 	addi	r3,r3,L1_CACHE_BYTES
 	bdnz	0b
 	sync
+#ifndef CONFIG_44x
+	/* We don't flush the icache on 44x. Those have a virtual icache
+	 * and we don't have access to the virtual address here (it's
+	 * not the page vaddr but where it's mapped in user space). The
+	 * flushing of the icache on these is handled elsewhere, when
+	 * a change in the address space occurs, before returning to
+	 * user space
+	 */
 	mtctr	r4
 1:	icbi	0,r6
 	addi	r6,r6,L1_CACHE_BYTES
 	bdnz	1b
 	sync
 	isync
+#endif /* CONFIG_44x */
 	blr
 
 /*
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 9f329a8..acc0d24 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -697,6 +697,18 @@
 		prop = of_get_flat_dt_prop(node, "cpu-version", NULL);
 		if (prop && (*prop & 0xff000000) == 0x0f000000)
 			identify_cpu(0, *prop);
+#if defined(CONFIG_44x) && defined(CONFIG_PPC_FPU)
+		/*
+		 * Since 440GR(x)/440EP(x) processors have the same pvr,
+		 * we check the node path and set bit 28 in the cur_cpu_spec
+		 * pvr for EP(x) processor version. This bit is always 0 in
+		 * the "real" pvr. Then we call identify_cpu again with
+		 * the new logical pvr to enable FPU support.
+		 */
+		if (strstr(uname, "440EP")) {
+			identify_cpu(0, cur_cpu_spec->pvr_value | 0x8);
+		}
+#endif
 	}
 
 	check_cpu_feature_properties(node);
diff --git a/arch/powerpc/mm/40x_mmu.c b/arch/powerpc/mm/40x_mmu.c
index e067df8..3899ea9 100644
--- a/arch/powerpc/mm/40x_mmu.c
+++ b/arch/powerpc/mm/40x_mmu.c
@@ -98,13 +98,12 @@
 
 	v = KERNELBASE;
 	p = PPC_MEMSTART;
-	s = 0;
+	s = total_lowmem;
 
-	if (__map_without_ltlbs) {
-		return s;
-	}
+	if (__map_without_ltlbs)
+		return 0;
 
-	while (s <= (total_lowmem - LARGE_PAGE_SIZE_16M)) {
+	while (s >= LARGE_PAGE_SIZE_16M) {
 		pmd_t *pmdp;
 		unsigned long val = p | _PMD_SIZE_16M | _PAGE_HWEXEC | _PAGE_HWWRITE;
 
@@ -116,10 +115,10 @@
 
 		v += LARGE_PAGE_SIZE_16M;
 		p += LARGE_PAGE_SIZE_16M;
-		s += LARGE_PAGE_SIZE_16M;
+		s -= LARGE_PAGE_SIZE_16M;
 	}
 
-	while (s <= (total_lowmem - LARGE_PAGE_SIZE_4M)) {
+	while (s >= LARGE_PAGE_SIZE_4M) {
 		pmd_t *pmdp;
 		unsigned long val = p | _PMD_SIZE_4M | _PAGE_HWEXEC | _PAGE_HWWRITE;
 
@@ -128,8 +127,8 @@
 
 		v += LARGE_PAGE_SIZE_4M;
 		p += LARGE_PAGE_SIZE_4M;
-		s += LARGE_PAGE_SIZE_4M;
+		s -= LARGE_PAGE_SIZE_4M;
 	}
 
-	return s;
+	return total_lowmem - s;
 }
diff --git a/arch/powerpc/mm/44x_mmu.c b/arch/powerpc/mm/44x_mmu.c
index c3df504..04dc087 100644
--- a/arch/powerpc/mm/44x_mmu.c
+++ b/arch/powerpc/mm/44x_mmu.c
@@ -35,6 +35,7 @@
  */
 unsigned int tlb_44x_index; /* = 0 */
 unsigned int tlb_44x_hwater = PPC44x_TLB_SIZE - 1 - PPC44x_EARLY_TLBS;
+int icache_44x_need_flush;
 
 /*
  * "Pins" a 256MB TLB entry in AS0 for kernel lowmem
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index a18fda3..8135da0 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -309,7 +309,7 @@
 					set_bit(PG_arch_1, &page->flags);
 				}
 				pte_update(ptep, 0, _PAGE_HWEXEC);
-				_tlbie(address);
+				_tlbie(address, mm->context.id);
 				pte_unmap_unlock(ptep, ptl);
 				up_read(&mm->mmap_sem);
 				return 0;
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
index c94a64f..eb3a732 100644
--- a/arch/powerpc/mm/mmu_decl.h
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -61,12 +61,12 @@
 #define mmu_mapin_ram()		(0UL)
 
 #elif defined(CONFIG_4xx)
-#define flush_HPTE(X, va, pg)	_tlbie(va)
+#define flush_HPTE(pid, va, pg)	_tlbie(va, pid)
 extern void MMU_init_hw(void);
 extern unsigned long mmu_mapin_ram(void);
 
 #elif defined(CONFIG_FSL_BOOKE)
-#define flush_HPTE(X, va, pg)	_tlbie(va)
+#define flush_HPTE(pid, va, pg)	_tlbie(va, pid)
 extern void MMU_init_hw(void);
 extern unsigned long mmu_mapin_ram(void);
 extern void adjust_total_lowmem(void);
diff --git a/arch/ppc/kernel/entry.S b/arch/ppc/kernel/entry.S
index fba7ca1..b19bfef 100644
--- a/arch/ppc/kernel/entry.S
+++ b/arch/ppc/kernel/entry.S
@@ -244,6 +244,13 @@
 	andis.	r10,r0,DBCR0_IC@h
 	bnel-	load_dbcr0
 #endif
+#ifdef CONFIG_44x
+	lis	r4,icache_44x_need_flush@ha
+	lwz	r5,icache_44x_need_flush@l(r4)
+	cmplwi	cr0,r5,0
+	bne-	2f
+1:
+#endif /* CONFIG_44x */
 	stwcx.	r0,0,r1			/* to clear the reservation */
 	lwz	r4,_LINK(r1)
 	lwz	r5,_CCR(r1)
@@ -258,6 +265,12 @@
 	mtspr	SPRN_SRR1,r8
 	SYNC
 	RFI
+#ifdef CONFIG_44x
+2:	li	r7,0
+	iccci	r0,r0
+	stw	r7,icache_44x_need_flush@l(r4)
+	b	1b
+#endif  /* CONFIG_44x */
 
 66:	li	r3,-ENOSYS
 	b	ret_from_syscall
@@ -679,6 +692,16 @@
 
 	/* interrupts are hard-disabled at this point */
 restore:
+#ifdef CONFIG_44x
+	lis	r4,icache_44x_need_flush@ha
+	lwz	r5,icache_44x_need_flush@l(r4)
+	cmplwi	cr0,r5,0
+	beq+	1f
+	li	r6,0
+	iccci	r0,r0
+	stw	r6,icache_44x_need_flush@l(r4)
+1:
+#endif  /* CONFIG_44x */
 	lwz	r0,GPR0(r1)
 	lwz	r2,GPR2(r1)
 	REST_4GPRS(3, r1)
diff --git a/arch/ppc/kernel/misc.S b/arch/ppc/kernel/misc.S
index a22e1f4..e0c850d 100644
--- a/arch/ppc/kernel/misc.S
+++ b/arch/ppc/kernel/misc.S
@@ -224,7 +224,16 @@
  */
 _GLOBAL(_tlbie)
 #if defined(CONFIG_40x)
+	/* We run the search with interrupts disabled because we have to change
+	 * the PID and I don't want to preempt when that happens.
+	 */
+	mfmsr	r5
+	mfspr	r6,SPRN_PID
+	wrteei	0
+	mtspr	SPRN_PID,r4
 	tlbsx.	r3, 0, r3
+	mtspr	SPRN_PID,r6
+	wrtee	r5
 	bne	10f
 	sync
 	/* There are only 64 TLB entries, so r3 < 64, which means bit 25 is clear.
@@ -234,22 +243,21 @@
 	isync
 10:
 #elif defined(CONFIG_44x)
-	mfspr	r4,SPRN_MMUCR
-	mfspr	r5,SPRN_PID			/* Get PID */
-	rlwimi	r4,r5,0,24,31			/* Set TID */
+	mfspr	r5,SPRN_MMUCR
+	rlwimi	r5,r4,0,24,31			/* Set TID */
 
 	/* We have to run the search with interrupts disabled, even critical
 	 * and debug interrupts (in fact the only critical exceptions we have
 	 * are debug and machine check).  Otherwise  an interrupt which causes
 	 * a TLB miss can clobber the MMUCR between the mtspr and the tlbsx. */
-	mfmsr	r5
+	mfmsr	r4
 	lis	r6,(MSR_EE|MSR_CE|MSR_ME|MSR_DE)@ha
 	addi	r6,r6,(MSR_EE|MSR_CE|MSR_ME|MSR_DE)@l
-	andc	r6,r5,r6
+	andc	r6,r4,r6
 	mtmsr	r6
-	mtspr	SPRN_MMUCR,r4
+	mtspr	SPRN_MMUCR,r5
 	tlbsx.	r3, 0, r3
-	mtmsr	r5
+	mtmsr	r4
 	bne	10f
 	sync
 	/* There are only 64 TLB entries, so r3 < 64,
@@ -491,12 +499,21 @@
 	addi	r3,r3,L1_CACHE_BYTES
 	bdnz	0b
 	sync
+#ifndef CONFIG_44x
+	/* We don't flush the icache on 44x. Those have a virtual icache
+	 * and we don't have access to the virtual address here (it's
+	 * not the page vaddr but where it's mapped in user space). The
+	 * flushing of the icache on these is handled elsewhere, when
+	 * a change in the address space occurs, before returning to
+	 * user space
+	 */
 	mtctr	r4
 1:	icbi	0,r6
 	addi	r6,r6,L1_CACHE_BYTES
 	bdnz	1b
 	sync
 	isync
+#endif /* CONFIG_44x */
 	blr
 
 /*
diff --git a/arch/ppc/mm/44x_mmu.c b/arch/ppc/mm/44x_mmu.c
index 0a0a0487b..6536a25 100644
--- a/arch/ppc/mm/44x_mmu.c
+++ b/arch/ppc/mm/44x_mmu.c
@@ -61,6 +61,7 @@
  */
 unsigned int tlb_44x_index = 0;
 unsigned int tlb_44x_hwater = 62;
+int icache_44x_need_flush;
 
 /*
  * "Pins" a 256MB TLB entry in AS0 for kernel lowmem
diff --git a/arch/ppc/mm/4xx_mmu.c b/arch/ppc/mm/4xx_mmu.c
index 838e09d..ea785db 100644
--- a/arch/ppc/mm/4xx_mmu.c
+++ b/arch/ppc/mm/4xx_mmu.c
@@ -99,13 +99,12 @@
 
 	v = KERNELBASE;
 	p = PPC_MEMSTART;
-	s = 0;
+	s = total_lowmem;
 
-	if (__map_without_ltlbs) {
-		return s;
-	}
+	if (__map_without_ltlbs)
+		return 0;
 
-	while (s <= (total_lowmem - LARGE_PAGE_SIZE_16M)) {
+	while (s >= LARGE_PAGE_SIZE_16M) {
 		pmd_t *pmdp;
 		unsigned long val = p | _PMD_SIZE_16M | _PAGE_HWEXEC | _PAGE_HWWRITE;
 
@@ -117,10 +116,10 @@
 
 		v += LARGE_PAGE_SIZE_16M;
 		p += LARGE_PAGE_SIZE_16M;
-		s += LARGE_PAGE_SIZE_16M;
+		s -= LARGE_PAGE_SIZE_16M;
 	}
 
-	while (s <= (total_lowmem - LARGE_PAGE_SIZE_4M)) {
+	while (s >= LARGE_PAGE_SIZE_4M) {
 		pmd_t *pmdp;
 		unsigned long val = p | _PMD_SIZE_4M | _PAGE_HWEXEC | _PAGE_HWWRITE;
 
@@ -129,8 +128,8 @@
 
 		v += LARGE_PAGE_SIZE_4M;
 		p += LARGE_PAGE_SIZE_4M;
-		s += LARGE_PAGE_SIZE_4M;
+		s -= LARGE_PAGE_SIZE_4M;
 	}
 
-	return s;
+	return total_lowmem - s;
 }
diff --git a/arch/ppc/mm/fault.c b/arch/ppc/mm/fault.c
index 254c23b..36c0e75 100644
--- a/arch/ppc/mm/fault.c
+++ b/arch/ppc/mm/fault.c
@@ -227,7 +227,7 @@
 					set_bit(PG_arch_1, &page->flags);
 				}
 				pte_update(ptep, 0, _PAGE_HWEXEC);
-				_tlbie(address);
+				_tlbie(address, mm->context.id);
 				pte_unmap_unlock(ptep, ptl);
 				up_read(&mm->mmap_sem);
 				return 0;
diff --git a/arch/ppc/mm/mmu_decl.h b/arch/ppc/mm/mmu_decl.h
index 540f329..f1d4f21 100644
--- a/arch/ppc/mm/mmu_decl.h
+++ b/arch/ppc/mm/mmu_decl.h
@@ -54,12 +54,12 @@
 #define mmu_mapin_ram()		(0UL)
 
 #elif defined(CONFIG_4xx)
-#define flush_HPTE(X, va, pg)	_tlbie(va)
+#define flush_HPTE(pid, va, pg)	_tlbie(va, pid)
 extern void MMU_init_hw(void);
 extern unsigned long mmu_mapin_ram(void);
 
 #elif defined(CONFIG_FSL_BOOKE)
-#define flush_HPTE(X, va, pg)	_tlbie(va)
+#define flush_HPTE(pid, va, pg)	_tlbie(va, pid)
 extern void MMU_init_hw(void);
 extern unsigned long mmu_mapin_ram(void);
 extern void adjust_total_lowmem(void);
diff --git a/arch/ppc/platforms/4xx/ebony.c b/arch/ppc/platforms/4xx/ebony.c
index 05d7184..453643a 100644
--- a/arch/ppc/platforms/4xx/ebony.c
+++ b/arch/ppc/platforms/4xx/ebony.c
@@ -236,7 +236,7 @@
 	gen550_init(0, &port);
 
 	/* Purge TLB entry added in head_44x.S for early serial access */
-	_tlbie(UART0_IO_BASE);
+	_tlbie(UART0_IO_BASE, 0);
 #endif
 
 	port.membase = ioremap64(PPC440GP_UART1_ADDR, 8);
diff --git a/arch/ppc/platforms/4xx/luan.c b/arch/ppc/platforms/4xx/luan.c
index 4b16961..b79ebb8 100644
--- a/arch/ppc/platforms/4xx/luan.c
+++ b/arch/ppc/platforms/4xx/luan.c
@@ -230,10 +230,15 @@
 
 	/* Allocate hoses for PCIX1 and PCIX2 */
 	hose1 = pcibios_alloc_controller();
-	hose2 = pcibios_alloc_controller();
-	if (!hose1 || !hose2)
+	if (!hose1)
 		return;
 
+	hose2 = pcibios_alloc_controller();
+	if (!hose2) {
+		pcibios_free_controller(hose1);
+		return;
+	}
+
 	/* Setup PCIX1 */
 	hose1->first_busno = 0;
 	hose1->last_busno = 0xff;
diff --git a/arch/ppc/platforms/4xx/ocotea.c b/arch/ppc/platforms/4xx/ocotea.c
index fd0f971..28a712c 100644
--- a/arch/ppc/platforms/4xx/ocotea.c
+++ b/arch/ppc/platforms/4xx/ocotea.c
@@ -259,7 +259,7 @@
 	gen550_init(0, &port);
 
 	/* Purge TLB entry added in head_44x.S for early serial access */
-	_tlbie(UART0_IO_BASE);
+	_tlbie(UART0_IO_BASE, 0);
 #endif
 
 	port.membase = ioremap64(PPC440GX_UART1_ADDR, 8);
diff --git a/arch/ppc/platforms/4xx/taishan.c b/arch/ppc/platforms/4xx/taishan.c
index 888c492..f6a0c66 100644
--- a/arch/ppc/platforms/4xx/taishan.c
+++ b/arch/ppc/platforms/4xx/taishan.c
@@ -316,7 +316,7 @@
 	gen550_init(0, &port);
 
 	/* Purge TLB entry added in head_44x.S for early serial access */
-	_tlbie(UART0_IO_BASE);
+	_tlbie(UART0_IO_BASE, 0);
 #endif
 
 	port.membase = ioremap64(PPC440GX_UART1_ADDR, 8);
diff --git a/drivers/serial/uartlite.c b/drivers/serial/uartlite.c
index dfef83f..a85f2d3 100644
--- a/drivers/serial/uartlite.c
+++ b/drivers/serial/uartlite.c
@@ -329,12 +329,14 @@
 static void ulite_console_wait_tx(struct uart_port *port)
 {
 	int i;
+	u8 val;
 
-	/* wait up to 10ms for the character(s) to be sent */
-	for (i = 0; i < 10000; i++) {
-		if (readb(port->membase + ULITE_STATUS) & ULITE_STATUS_TXEMPTY)
+	/* Spin waiting for TX fifo to have space available */
+	for (i = 0; i < 100000; i++) {
+		val = readb(port->membase + ULITE_STATUS);
+		if ((val & ULITE_STATUS_TXFULL) == 0)
 			break;
-		udelay(1);
+		cpu_relax();
 	}
 }
 
diff --git a/include/asm-powerpc/pgtable-ppc32.h b/include/asm-powerpc/pgtable-ppc32.h
index 86a54a4a..fea2d8f 100644
--- a/include/asm-powerpc/pgtable-ppc32.h
+++ b/include/asm-powerpc/pgtable-ppc32.h
@@ -11,6 +11,11 @@
 extern unsigned long va_to_phys(unsigned long address);
 extern pte_t *va_to_pte(unsigned long address);
 extern unsigned long ioremap_bot, ioremap_base;
+
+#ifdef CONFIG_44x
+extern int icache_44x_need_flush;
+#endif
+
 #endif /* __ASSEMBLY__ */
 
 /*
@@ -562,6 +567,10 @@
 	: "=&r" (old), "=&r" (tmp), "=m" (*p)
 	: "r" (p), "r" (clr), "r" (set), "m" (*p)
 	: "cc" );
+#ifdef CONFIG_44x
+	if ((old & _PAGE_USER) && (old & _PAGE_HWEXEC))
+		icache_44x_need_flush = 1;
+#endif
 	return old;
 }
 #else
@@ -582,6 +591,10 @@
 	: "=&r" (old), "=&r" (tmp), "=m" (*p)
 	: "r" (p), "r" ((unsigned long)(p) + 4), "r" (clr), "r" (set), "m" (*p)
 	: "cc" );
+#ifdef CONFIG_44x
+	if ((old & _PAGE_USER) && (old & _PAGE_HWEXEC))
+		icache_44x_need_flush = 1;
+#endif
 	return old;
 }
 #endif
diff --git a/include/asm-powerpc/tlbflush.h b/include/asm-powerpc/tlbflush.h
index b6b036c..e7b4c0d 100644
--- a/include/asm-powerpc/tlbflush.h
+++ b/include/asm-powerpc/tlbflush.h
@@ -1,5 +1,6 @@
 #ifndef _ASM_POWERPC_TLBFLUSH_H
 #define _ASM_POWERPC_TLBFLUSH_H
+
 /*
  * TLB flushing:
  *
@@ -16,9 +17,6 @@
  */
 #ifdef __KERNEL__
 
-struct mm_struct;
-struct vm_area_struct;
-
 #if defined(CONFIG_4xx) || defined(CONFIG_8xx) || defined(CONFIG_FSL_BOOKE)
 /*
  * TLB flushing for software loaded TLB chips
@@ -28,7 +26,9 @@
  * specific tlbie's
  */
 
-extern void _tlbie(unsigned long address);
+#include <linux/mm.h>
+
+extern void _tlbie(unsigned long address, unsigned int pid);
 
 #if defined(CONFIG_40x) || defined(CONFIG_8xx)
 #define _tlbia()	asm volatile ("tlbia; sync" : : : "memory")
@@ -44,13 +44,13 @@
 static inline void flush_tlb_page(struct vm_area_struct *vma,
 				  unsigned long vmaddr)
 {
-	_tlbie(vmaddr);
+	_tlbie(vmaddr, vma->vm_mm->context.id);
 }
 
 static inline void flush_tlb_page_nohash(struct vm_area_struct *vma,
 					 unsigned long vmaddr)
 {
-	_tlbie(vmaddr);
+	_tlbie(vmaddr, vma->vm_mm->context.id);
 }
 
 static inline void flush_tlb_range(struct vm_area_struct *vma,