Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client

Pull some further ceph acl cleanups from Sage Weil:
 "I do have a couple patches on top of what's in your tree, though, that
  clean up a couple duplicated lines in your fix and apply Christoph's
  cleanup"

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client:
  ceph: simplify ceph_{get,init}_acl
  ceph: remove duplicate declaration of ceph_setattr
diff --git a/Documentation/block/biodoc.txt b/Documentation/block/biodoc.txt
index 8df5e8e..2101e71 100644
--- a/Documentation/block/biodoc.txt
+++ b/Documentation/block/biodoc.txt
@@ -447,14 +447,13 @@
  * main unit of I/O for the block layer and lower layers (ie drivers)
  */
 struct bio {
-       sector_t            bi_sector;
        struct bio          *bi_next;    /* request queue link */
        struct block_device *bi_bdev;	/* target device */
        unsigned long       bi_flags;    /* status, command, etc */
        unsigned long       bi_rw;       /* low bits: r/w, high: priority */
 
        unsigned int	bi_vcnt;     /* how may bio_vec's */
-       unsigned int	bi_idx;		/* current index into bio_vec array */
+       struct bvec_iter	bi_iter;	/* current index into bio_vec array */
 
        unsigned int	bi_size;     /* total size in bytes */
        unsigned short 	bi_phys_segments; /* segments after physaddr coalesce*/
@@ -480,7 +479,7 @@
 - Code that traverses the req list can find all the segments of a bio
   by using rq_for_each_segment.  This handles the fact that a request
   has multiple bios, each of which can have multiple segments.
-- Drivers which can't process a large bio in one shot can use the bi_idx
+- Drivers which can't process a large bio in one shot can use the bi_iter
   field to keep track of the next bio_vec entry to process.
   (e.g a 1MB bio_vec needs to be handled in max 128kB chunks for IDE)
   [TBD: Should preferably also have a bi_voffset and bi_vlen to avoid modifying
@@ -589,7 +588,7 @@
 nr_sectors and current_nr_sectors fields (based on the corresponding
 hard_xxx values and the number of bytes transferred) and updates it on
 every transfer that invokes end_that_request_first. It does the same for the
-buffer, bio, bio->bi_idx fields too.
+buffer, bio, bio->bi_iter fields too.
 
 The buffer field is just a virtual address mapping of the current segment
 of the i/o buffer in cases where the buffer resides in low-memory. For high
diff --git a/Documentation/block/biovecs.txt b/Documentation/block/biovecs.txt
new file mode 100644
index 0000000..74a32ad
--- /dev/null
+++ b/Documentation/block/biovecs.txt
@@ -0,0 +1,111 @@
+
+Immutable biovecs and biovec iterators:
+=======================================
+
+Kent Overstreet <kmo@daterainc.com>
+
+As of 3.13, biovecs should never be modified after a bio has been submitted.
+Instead, we have a new struct bvec_iter which represents a range of a biovec -
+the iterator will be modified as the bio is completed, not the biovec.
+
+More specifically, old code that needed to partially complete a bio would
+update bi_sector and bi_size, and advance bi_idx to the next biovec. If it
+ended up partway through a biovec, it would increment bv_offset and decrement
+bv_len by the number of bytes completed in that biovec.
+
+In the new scheme of things, everything that must be mutated in order to
+partially complete a bio is segregated into struct bvec_iter: bi_sector,
+bi_size and bi_idx have been moved there; and instead of modifying bv_offset
+and bv_len, struct bvec_iter has bi_bvec_done, which represents the number of
+bytes completed in the current bvec.
+
+There are a bunch of new helper macros for hiding the gory details - in
+particular, presenting the illusion of partially completed biovecs so that
+normal code doesn't have to deal with bi_bvec_done.
+
+ * Driver code should no longer refer to biovecs directly; we now have
+   bio_iovec() and bio_iovec_iter() macros that return literal struct biovecs,
+   constructed from the raw biovecs but taking into account bi_bvec_done and
+   bi_size.
+
+   bio_for_each_segment() has been updated to take a bvec_iter argument
+   instead of an integer (that corresponded to bi_idx); for a lot of code the
+   conversion just required changing the types of the arguments to
+   bio_for_each_segment().
+
+ * Advancing a bvec_iter is done with bio_advance_iter(); bio_advance() is a
+   wrapper around bio_advance_iter() that operates on bio->bi_iter, and also
+   advances the bio integrity's iter if present.
+
+   There is a lower level advance function - bvec_iter_advance() - which takes
+   a pointer to a biovec, not a bio; this is used by the bio integrity code.
+
+What's all this get us?
+=======================
+
+Having a real iterator, and making biovecs immutable, has a number of
+advantages:
+
+ * Before, iterating over bios was very awkward when you weren't processing
+   exactly one bvec at a time - for example, bio_copy_data() in fs/bio.c,
+   which copies the contents of one bio into another. Because the biovecs
+   wouldn't necessarily be the same size, the old code was tricky convoluted -
+   it had to walk two different bios at the same time, keeping both bi_idx and
+   and offset into the current biovec for each.
+
+   The new code is much more straightforward - have a look. This sort of
+   pattern comes up in a lot of places; a lot of drivers were essentially open
+   coding bvec iterators before, and having common implementation considerably
+   simplifies a lot of code.
+
+ * Before, any code that might need to use the biovec after the bio had been
+   completed (perhaps to copy the data somewhere else, or perhaps to resubmit
+   it somewhere else if there was an error) had to save the entire bvec array
+   - again, this was being done in a fair number of places.
+
+ * Biovecs can be shared between multiple bios - a bvec iter can represent an
+   arbitrary range of an existing biovec, both starting and ending midway
+   through biovecs. This is what enables efficient splitting of arbitrary
+   bios. Note that this means we _only_ use bi_size to determine when we've
+   reached the end of a bio, not bi_vcnt - and the bio_iovec() macro takes
+   bi_size into account when constructing biovecs.
+
+ * Splitting bios is now much simpler. The old bio_split() didn't even work on
+   bios with more than a single bvec! Now, we can efficiently split arbitrary
+   size bios - because the new bio can share the old bio's biovec.
+
+   Care must be taken to ensure the biovec isn't freed while the split bio is
+   still using it, in case the original bio completes first, though. Using
+   bio_chain() when splitting bios helps with this.
+
+ * Submitting partially completed bios is now perfectly fine - this comes up
+   occasionally in stacking block drivers and various code (e.g. md and
+   bcache) had some ugly workarounds for this.
+
+   It used to be the case that submitting a partially completed bio would work
+   fine to _most_ devices, but since accessing the raw bvec array was the
+   norm, not all drivers would respect bi_idx and those would break. Now,
+   since all drivers _must_ go through the bvec iterator - and have been
+   audited to make sure they are - submitting partially completed bios is
+   perfectly fine.
+
+Other implications:
+===================
+
+ * Almost all usage of bi_idx is now incorrect and has been removed; instead,
+   where previously you would have used bi_idx you'd now use a bvec_iter,
+   probably passing it to one of the helper macros.
+
+   I.e. instead of using bio_iovec_idx() (or bio->bi_iovec[bio->bi_idx]), you
+   now use bio_iter_iovec(), which takes a bvec_iter and returns a
+   literal struct bio_vec - constructed on the fly from the raw biovec but
+   taking into account bi_bvec_done (and bi_size).
+
+ * bi_vcnt can't be trusted or relied upon by driver code - i.e. anything that
+   doesn't actually own the bio. The reason is twofold: firstly, it's not
+   actually needed for iterating over the bio anymore - we only use bi_size.
+   Secondly, when cloning a bio and reusing (a portion of) the original bio's
+   biovec, in order to calculate bi_vcnt for the new bio we'd have to iterate
+   over all the biovecs in the new bio - which is silly as it's not needed.
+
+   So, don't use bi_vcnt anymore.
diff --git a/drivers/staging/zram/zram.txt b/Documentation/blockdev/zram.txt
similarity index 90%
rename from drivers/staging/zram/zram.txt
rename to Documentation/blockdev/zram.txt
index 765d790..2eccddf 100644
--- a/drivers/staging/zram/zram.txt
+++ b/Documentation/blockdev/zram.txt
@@ -1,8 +1,6 @@
 zram: Compressed RAM based block devices
 ----------------------------------------
 
-Project home: http://compcache.googlecode.com/
-
 * Introduction
 
 The zram module creates RAM based block devices named /dev/zram<id>
@@ -69,9 +67,5 @@
 	resets the disksize to zero. You must set the disksize again
 	before reusing the device.
 
-Please report any problems at:
- - Mailing list: linux-mm-cc at laptop dot org
- - Issue tracker: http://code.google.com/p/compcache/issues/list
-
 Nitin Gupta
 ngupta@vflare.org
diff --git a/Documentation/devicetree/bindings/arm/bcm/kona-timer.txt b/Documentation/devicetree/bindings/arm/bcm/kona-timer.txt
index 17d88b2..39adf54 100644
--- a/Documentation/devicetree/bindings/arm/bcm/kona-timer.txt
+++ b/Documentation/devicetree/bindings/arm/bcm/kona-timer.txt
@@ -8,13 +8,18 @@
 - DEPRECATED: compatible : "bcm,kona-timer"
 - reg : Register range for the timer
 - interrupts : interrupt for the timer
+- clocks: phandle + clock specifier pair of the external clock
 - clock-frequency: frequency that the clock operates
 
+Only one of clocks or clock-frequency should be specified.
+
+Refer to clocks/clock-bindings.txt for generic clock consumer properties.
+
 Example:
 	timer@35006000 {
 		compatible = "brcm,kona-timer";
 		reg = <0x35006000 0x1000>;
 		interrupts = <0x0 7 0x4>;
-		clock-frequency = <32768>;
+		clocks = <&hub_timer_clk>;
 	};
 
diff --git a/Documentation/devicetree/bindings/clock/bcm-kona-clock.txt b/Documentation/devicetree/bindings/clock/bcm-kona-clock.txt
new file mode 100644
index 0000000..56d1f49
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/bcm-kona-clock.txt
@@ -0,0 +1,93 @@
+Broadcom Kona Family Clocks
+
+This binding is associated with Broadcom SoCs having "Kona" style
+clock control units (CCUs).  A CCU is a clock provider that manages
+a set of clock signals.  Each CCU is represented by a node in the
+device tree.
+
+This binding uses the common clock binding:
+    Documentation/devicetree/bindings/clock/clock-bindings.txt
+
+Required properties:
+- compatible
+	Shall have one of the following values:
+	- "brcm,bcm11351-root-ccu"
+	- "brcm,bcm11351-aon-ccu"
+	- "brcm,bcm11351-hub-ccu"
+	- "brcm,bcm11351-master-ccu"
+	- "brcm,bcm11351-slave-ccu"
+- reg
+	Shall define the base and range of the address space
+	containing clock control registers
+- #clock-cells
+	Shall have value <1>.  The permitted clock-specifier values
+	are defined below.
+- clock-output-names
+	Shall be an ordered list of strings defining the names of
+	the clocks provided by the CCU.
+
+
+BCM281XX family SoCs use Kona CCUs.  The following table defines
+the set of CCUs and clock specifiers for BCM281XX clocks.  When
+a clock consumer references a clocks, its symbolic specifier
+(rather than its numeric index value) should be used.  These
+specifiers are defined in "include/dt-bindings/clock/bcm281xx.h".
+
+    CCU     Clock           Type    Index   Specifier
+    ---     -----           ----    -----   ---------
+    root    frac_1m         peri      0     BCM281XX_ROOT_CCU_FRAC_1M
+
+    aon     hub_timer       peri      0     BCM281XX_AON_CCU_HUB_TIMER
+    aon     pmu_bsc         peri      1     BCM281XX_AON_CCU_PMU_BSC
+    aon     pmu_bsc_var     peri      2     BCM281XX_AON_CCU_PMU_BSC_VAR
+
+    hub     tmon_1m         peri      0     BCM281XX_HUB_CCU_TMON_1M
+
+    master  sdio1           peri      0     BCM281XX_MASTER_CCU_SDIO1
+    master  sdio2           peri      1     BCM281XX_MASTER_CCU_SDIO2
+    master  sdio3           peri      2     BCM281XX_MASTER_CCU_SDIO3
+    master  sdio4           peri      3     BCM281XX_MASTER_CCU_SDIO4
+    master  dmac            peri      4     BCM281XX_MASTER_CCU_DMAC
+    master  usb_ic          peri      5     BCM281XX_MASTER_CCU_USB_IC
+    master  hsic2_48m       peri      6     BCM281XX_MASTER_CCU_HSIC_48M
+    master  hsic2_12m       peri      7     BCM281XX_MASTER_CCU_HSIC_12M
+
+    slave   uartb           peri      0     BCM281XX_SLAVE_CCU_UARTB
+    slave   uartb2          peri      1     BCM281XX_SLAVE_CCU_UARTB2
+    slave   uartb3          peri      2     BCM281XX_SLAVE_CCU_UARTB3
+    slave   uartb4          peri      3     BCM281XX_SLAVE_CCU_UARTB4
+    slave   ssp0            peri      4     BCM281XX_SLAVE_CCU_SSP0
+    slave   ssp2            peri      5     BCM281XX_SLAVE_CCU_SSP2
+    slave   bsc1            peri      6     BCM281XX_SLAVE_CCU_BSC1
+    slave   bsc2            peri      7     BCM281XX_SLAVE_CCU_BSC2
+    slave   bsc3            peri      8     BCM281XX_SLAVE_CCU_BSC3
+    slave   pwm             peri      9     BCM281XX_SLAVE_CCU_PWM
+
+
+Device tree example:
+
+	slave_ccu: slave_ccu {
+		compatible = "brcm,bcm11351-slave-ccu";
+		reg = <0x3e011000 0x0f00>;
+		#clock-cells = <1>;
+		clock-output-names = "uartb",
+				     "uartb2",
+				     "uartb3",
+				     "uartb4";
+	};
+
+	ref_crystal_clk: ref_crystal {
+		#clock-cells = <0>;
+		compatible = "fixed-clock";
+		clock-frequency = <26000000>;
+	};
+
+	uart@3e002000 {
+		compatible = "brcm,bcm11351-dw-apb-uart", "snps,dw-apb-uart";
+		status = "disabled";
+		reg = <0x3e002000 0x1000>;
+		clocks = <&slave_ccu BCM281XX_SLAVE_CCU_UARTB3>;
+		interrupts = <GIC_SPI 65 IRQ_TYPE_LEVEL_HIGH>;
+		reg-shift = <2>;
+		reg-io-width = <4>;
+	};
diff --git a/Documentation/devicetree/bindings/clock/corenet-clock.txt b/Documentation/devicetree/bindings/clock/corenet-clock.txt
new file mode 100644
index 0000000..24711af
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/corenet-clock.txt
@@ -0,0 +1,134 @@
+* Clock Block on Freescale CoreNet Platforms
+
+Freescale CoreNet chips take primary clocking input from the external
+SYSCLK signal. The SYSCLK input (frequency) is multiplied using
+multiple phase locked loops (PLL) to create a variety of frequencies
+which can then be passed to a variety of internal logic, including
+cores and peripheral IP blocks.
+Please refer to the Reference Manual for details.
+
+1. Clock Block Binding
+
+Required properties:
+- compatible: Should contain a specific clock block compatible string
+	and a single chassis clock compatible string.
+	Clock block strings include, but not limited to, one of the:
+	* "fsl,p2041-clockgen"
+	* "fsl,p3041-clockgen"
+	* "fsl,p4080-clockgen"
+	* "fsl,p5020-clockgen"
+	* "fsl,p5040-clockgen"
+	* "fsl,t4240-clockgen"
+	* "fsl,b4420-clockgen"
+	* "fsl,b4860-clockgen"
+	Chassis clock strings include:
+	* "fsl,qoriq-clockgen-1.0": for chassis 1.0 clocks
+	* "fsl,qoriq-clockgen-2.0": for chassis 2.0 clocks
+- reg: Describes the address of the device's resources within the
+	address space defined by its parent bus, and resource zero
+	represents the clock register set
+- clock-frequency: Input system clock frequency
+
+Recommended properties:
+- ranges: Allows valid translation between child's address space and
+	parent's. Must be present if the device has sub-nodes.
+- #address-cells: Specifies the number of cells used to represent
+	physical base addresses.  Must be present if the device has
+	sub-nodes and set to 1 if present
+- #size-cells: Specifies the number of cells used to represent
+	the size of an address. Must be present if the device has
+	sub-nodes and set to 1 if present
+
+2. Clock Provider/Consumer Binding
+
+Most of the bindings are from the common clock binding[1].
+ [1] Documentation/devicetree/bindings/clock/clock-bindings.txt
+
+Required properties:
+- compatible : Should include one of the following:
+	* "fsl,qoriq-core-pll-1.0" for core PLL clocks (v1.0)
+	* "fsl,qoriq-core-pll-2.0" for core PLL clocks (v2.0)
+	* "fsl,qoriq-core-mux-1.0" for core mux clocks (v1.0)
+	* "fsl,qoriq-core-mux-2.0" for core mux clocks (v2.0)
+	* "fsl,qoriq-sysclk-1.0": for input system clock (v1.0).
+		It takes parent's clock-frequency as its clock.
+	* "fsl,qoriq-sysclk-2.0": for input system clock (v2.0).
+		It takes parent's clock-frequency as its clock.
+- #clock-cells: From common clock binding. The number of cells in a
+	clock-specifier. Should be <0> for "fsl,qoriq-sysclk-[1,2].0"
+	clocks, or <1> for "fsl,qoriq-core-pll-[1,2].0" clocks.
+	For "fsl,qoriq-core-pll-[1,2].0" clocks, the single
+	clock-specifier cell may take the following values:
+	* 0 - equal to the PLL frequency
+	* 1 - equal to the PLL frequency divided by 2
+	* 2 - equal to the PLL frequency divided by 4
+
+Recommended properties:
+- clocks: Should be the phandle of input parent clock
+- clock-names: From common clock binding, indicates the clock name
+- clock-output-names: From common clock binding, indicates the names of
+	output clocks
+- reg: Should be the offset and length of clock block base address.
+	The length should be 4.
+
+Example for clock block and clock provider:
+/ {
+	clockgen: global-utilities@e1000 {
+		compatible = "fsl,p5020-clockgen", "fsl,qoriq-clockgen-1.0";
+		ranges = <0x0 0xe1000 0x1000>;
+		clock-frequency = <133333333>;
+		reg = <0xe1000 0x1000>;
+		#address-cells = <1>;
+		#size-cells = <1>;
+
+		sysclk: sysclk {
+			#clock-cells = <0>;
+			compatible = "fsl,qoriq-sysclk-1.0";
+			clock-output-names = "sysclk";
+		}
+
+		pll0: pll0@800 {
+			#clock-cells = <1>;
+			reg = <0x800 0x4>;
+			compatible = "fsl,qoriq-core-pll-1.0";
+			clocks = <&sysclk>;
+			clock-output-names = "pll0", "pll0-div2";
+		};
+
+		pll1: pll1@820 {
+			#clock-cells = <1>;
+			reg = <0x820 0x4>;
+			compatible = "fsl,qoriq-core-pll-1.0";
+			clocks = <&sysclk>;
+			clock-output-names = "pll1", "pll1-div2";
+		};
+
+		mux0: mux0@0 {
+			#clock-cells = <0>;
+			reg = <0x0 0x4>;
+			compatible = "fsl,qoriq-core-mux-1.0";
+			clocks = <&pll0 0>, <&pll0 1>, <&pll1 0>, <&pll1 1>;
+			clock-names = "pll0", "pll0-div2", "pll1", "pll1-div2";
+			clock-output-names = "cmux0";
+		};
+
+		mux1: mux1@20 {
+			#clock-cells = <0>;
+			reg = <0x20 0x4>;
+			compatible = "fsl,qoriq-core-mux-1.0";
+			clocks = <&pll0 0>, <&pll0 1>, <&pll1 0>, <&pll1 1>;
+			clock-names = "pll0", "pll0-div2", "pll1", "pll1-div2";
+			clock-output-names = "cmux1";
+		};
+	};
+  }
+
+Example for clock consumer:
+
+/ {
+	cpu0: PowerPC,e5500@0 {
+		...
+		clocks = <&mux0>;
+		...
+	};
+  }
diff --git a/Documentation/devicetree/bindings/mmc/kona-sdhci.txt b/Documentation/devicetree/bindings/mmc/kona-sdhci.txt
index 789fb07..aaba248 100644
--- a/Documentation/devicetree/bindings/mmc/kona-sdhci.txt
+++ b/Documentation/devicetree/bindings/mmc/kona-sdhci.txt
@@ -6,12 +6,16 @@
 Required properties:
 - compatible : Should be "brcm,kona-sdhci"
 - DEPRECATED: compatible : Should be "bcm,kona-sdhci"
+- clocks: phandle + clock specifier pair of the external clock
+
+Refer to clocks/clock-bindings.txt for generic clock consumer properties.
 
 Example:
 
 sdio2: sdio@0x3f1a0000 {
 	compatible = "brcm,kona-sdhci";
 	reg = <0x3f1a0000 0x10000>;
+	clocks = <&sdio3_clk>;
 	interrupts = <0x0 74 0x4>;
 };
 
diff --git a/Documentation/filesystems/nfs/nfs41-server.txt b/Documentation/filesystems/nfs/nfs41-server.txt
index 01c2db7..b930ad0 100644
--- a/Documentation/filesystems/nfs/nfs41-server.txt
+++ b/Documentation/filesystems/nfs/nfs41-server.txt
@@ -5,11 +5,11 @@
 by reading this file will contain either "+4.1" or "-4.1"
 correspondingly.
 
-Currently, server support for minorversion 1 is disabled by default.
-It can be enabled at run time by writing the string "+4.1" to
+Currently, server support for minorversion 1 is enabled by default.
+It can be disabled at run time by writing the string "-4.1" to
 the /proc/fs/nfsd/versions control file.  Note that to write this
-control file, the nfsd service must be taken down.  Use your user-mode
-nfs-utils to set this up; see rpc.nfsd(8)
+control file, the nfsd service must be taken down.  You can use rpc.nfsd
+for this; see rpc.nfsd(8).
 
 (Warning: older servers will interpret "+4.1" and "-4.1" as "+4" and
 "-4", respectively.  Therefore, code meant to work on both new and old
@@ -29,29 +29,6 @@
 See http://wiki.linux-nfs.org/wiki/index.php/PNFS_prototype_design
 for more information.
 
-The current implementation is intended for developers only: while it
-does support ordinary file operations on clients we have tested against
-(including the linux client), it is incomplete in ways which may limit
-features unexpectedly, cause known bugs in rare cases, or cause
-interoperability problems with future clients.  Known issues:
-
-	- gss support is questionable: currently mounts with kerberos
-	  from a linux client are possible, but we aren't really
-	  conformant with the spec (for example, we don't use kerberos
-	  on the backchannel correctly).
-	- We do not support SSV, which provides security for shared
-	  client-server state (thus preventing unauthorized tampering
-	  with locks and opens, for example).  It is mandatory for
-	  servers to support this, though no clients use it yet.
-
-In addition, some limitations are inherited from the current NFSv4
-implementation:
-
-	- Incomplete delegation enforcement: if a file is renamed or
-	  unlinked by a local process, a client holding a delegation may
-	  continue to indefinitely allow opens of the file under the old
-	  name.
-
 The table below, taken from the NFSv4.1 document, lists
 the operations that are mandatory to implement (REQ), optional
 (OPT), and NFSv4.0 operations that are required not to implement (MNI)
@@ -169,6 +146,16 @@
 
 Implementation notes:
 
+SSV:
+* The spec claims this is mandatory, but we don't actually know of any
+  implementations, so we're ignoring it for now.  The server returns
+  NFS4ERR_ENCR_ALG_UNSUPP on EXCHANGE_ID, which should be future-proof.
+
+GSS on the backchannel:
+* Again, theoretically required but not widely implemented (in
+  particular, the current Linux client doesn't request it).  We return
+  NFS4ERR_ENCR_ALG_UNSUPP on CREATE_SESSION.
+
 DELEGPURGE:
 * mandatory only for servers that support CLAIM_DELEGATE_PREV and/or
   CLAIM_DELEG_PREV_FH (which allows clients to keep delegations that
@@ -176,7 +163,6 @@
   now.
 
 EXCHANGE_ID:
-* only SP4_NONE state protection supported
 * implementation ids are ignored
 
 CREATE_SESSION:
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index 31f7617..f00bee14 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -1386,8 +1386,8 @@
 For example, if a task is using all allowed memory, its badness score will be
 1000.  If it is using half of its allowed memory, its score will be 500.
 
-There is an additional factor included in the badness score: root
-processes are given 3% extra memory over other tasks.
+There is an additional factor included in the badness score: the current memory
+and swap usage is discounted by 3% for root processes.
 
 The amount of "allowed" memory depends on the context in which the oom killer
 was called.  If it is due to the memory assigned to the allocating task's cpuset
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index deb48b5..c53784c 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -782,7 +782,7 @@
 ----------------------
 
 This describes how the VFS can manipulate an open file. As of kernel
-3.5, the following members are defined:
+3.12, the following members are defined:
 
 struct file_operations {
 	struct module *owner;
@@ -803,9 +803,6 @@
 	int (*aio_fsync) (struct kiocb *, int datasync);
 	int (*fasync) (int, struct file *, int);
 	int (*lock) (struct file *, int, struct file_lock *);
-	ssize_t (*readv) (struct file *, const struct iovec *, unsigned long, loff_t *);
-	ssize_t (*writev) (struct file *, const struct iovec *, unsigned long, loff_t *);
-	ssize_t (*sendfile) (struct file *, loff_t *, size_t, read_actor_t, void *);
 	ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int);
 	unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
 	int (*check_flags)(int);
@@ -814,6 +811,7 @@
 	ssize_t (*splice_read)(struct file *, struct pipe_inode_info *, size_t, unsigned int);
 	int (*setlease)(struct file *, long arg, struct file_lock **);
 	long (*fallocate)(struct file *, int mode, loff_t offset, loff_t len);
+	int (*show_fdinfo)(struct seq_file *m, struct file *f);
 };
 
 Again, all methods are called without any locks being held, unless
@@ -864,12 +862,6 @@
   lock: called by the fcntl(2) system call for F_GETLK, F_SETLK, and F_SETLKW
   	commands
 
-  readv: called by the readv(2) system call
-
-  writev: called by the writev(2) system call
-
-  sendfile: called by the sendfile(2) system call
-
   get_unmapped_area: called by the mmap(2) system call
 
   check_flags: called by the fcntl(2) system call for F_SETFL command
diff --git a/MAINTAINERS b/MAINTAINERS
index a31a6e3..b5795f0 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9740,11 +9740,27 @@
 S:	Odd Fixes
 F:	drivers/media/pci/zoran/
 
+ZRAM COMPRESSED RAM BLOCK DEVICE DRVIER
+M:	Minchan Kim <minchan@kernel.org>
+M:	Nitin Gupta <ngupta@vflare.org>
+L:	linux-kernel@vger.kernel.org
+S:	Maintained
+F:	drivers/block/zram/
+F:	Documentation/blockdev/zram.txt
+
 ZS DECSTATION Z85C30 SERIAL DRIVER
 M:	"Maciej W. Rozycki" <macro@linux-mips.org>
 S:	Maintained
 F:	drivers/tty/serial/zs.*
 
+ZSMALLOC COMPRESSED SLAB MEMORY ALLOCATOR
+M:	Minchan Kim <minchan@kernel.org>
+M:	Nitin Gupta <ngupta@vflare.org>
+L:	linux-mm@kvack.org
+S:	Maintained
+F:	mm/zsmalloc.c
+F:	include/linux/zsmalloc.h
+
 ZSWAP COMPRESSED SWAP CACHING
 M:	Seth Jennings <sjenning@linux.vnet.ibm.com>
 L:	linux-mm@kvack.org
diff --git a/Makefile b/Makefile
index 455fd48..4231023 100644
--- a/Makefile
+++ b/Makefile
@@ -311,9 +311,15 @@
 # If the user is running make -s (silent mode), suppress echoing of
 # commands
 
+ifneq ($(filter 4.%,$(MAKE_VERSION)),)	# make-4
+ifneq ($(filter %s ,$(firstword x$(MAKEFLAGS))),)
+  quiet=silent_
+endif
+else					# make-3.8x
 ifneq ($(filter s% -s%,$(MAKEFLAGS)),)
   quiet=silent_
 endif
+endif
 
 export quiet Q KBUILD_VERBOSE
 
@@ -633,7 +639,7 @@
 
 ifdef CONFIG_DEBUG_INFO
 KBUILD_CFLAGS	+= -g
-KBUILD_AFLAGS	+= -gdwarf-2
+KBUILD_AFLAGS	+= -Wa,--gdwarf-2
 endif
 
 ifdef CONFIG_DEBUG_INFO_REDUCED
@@ -682,6 +688,9 @@
 # require functions to have arguments in prototypes, not empty 'int foo()'
 KBUILD_CFLAGS   += $(call cc-option,-Werror=strict-prototypes)
 
+# Prohibit date/time macros, which would make the build non-deterministic
+KBUILD_CFLAGS   += $(call cc-option,-Werror=date-time)
+
 # use the deterministic mode of AR if available
 KBUILD_ARFLAGS := $(call ar-option,D)
 
diff --git a/arch/arc/include/asm/linkage.h b/arch/arc/include/asm/linkage.h
index 0283e9e..66ee552 100644
--- a/arch/arc/include/asm/linkage.h
+++ b/arch/arc/include/asm/linkage.h
@@ -11,6 +11,8 @@
 
 #ifdef __ASSEMBLY__
 
+#define ASM_NL		 `	/* use '`' to mark new line in macro */
+
 /* Can't use the ENTRY macro in linux/linkage.h
  * gas considers ';' as comment vs. newline
  */
diff --git a/arch/arm/boot/dts/bcm11351-brt.dts b/arch/arm/boot/dts/bcm11351-brt.dts
index 23cd16d..396b704 100644
--- a/arch/arm/boot/dts/bcm11351-brt.dts
+++ b/arch/arm/boot/dts/bcm11351-brt.dts
@@ -44,5 +44,11 @@
 		status = "okay";
 	};
 
+	usbotg: usb@3f120000 {
+		status = "okay";
+	};
 
+	usbphy: usb-phy@3f130000 {
+		status = "okay";
+	};
 };
diff --git a/arch/arm/boot/dts/bcm11351.dtsi b/arch/arm/boot/dts/bcm11351.dtsi
index dd8e878..e491b82 100644
--- a/arch/arm/boot/dts/bcm11351.dtsi
+++ b/arch/arm/boot/dts/bcm11351.dtsi
@@ -43,7 +43,7 @@
 		compatible = "brcm,bcm11351-dw-apb-uart", "snps,dw-apb-uart";
 		status = "disabled";
 		reg = <0x3e000000 0x1000>;
-		clock-frequency = <13000000>;
+		clocks = <&uartb_clk>;
 		interrupts = <GIC_SPI 67 IRQ_TYPE_LEVEL_HIGH>;
 		reg-shift = <2>;
 		reg-io-width = <4>;
@@ -53,7 +53,7 @@
 		compatible = "brcm,bcm11351-dw-apb-uart", "snps,dw-apb-uart";
 		status = "disabled";
 		reg = <0x3e001000 0x1000>;
-		clock-frequency = <13000000>;
+		clocks = <&uartb2_clk>;
 		interrupts = <GIC_SPI 66 IRQ_TYPE_LEVEL_HIGH>;
 		reg-shift = <2>;
 		reg-io-width = <4>;
@@ -63,7 +63,7 @@
 		compatible = "brcm,bcm11351-dw-apb-uart", "snps,dw-apb-uart";
 		status = "disabled";
 		reg = <0x3e002000 0x1000>;
-		clock-frequency = <13000000>;
+		clocks = <&uartb3_clk>;
 		interrupts = <GIC_SPI 65 IRQ_TYPE_LEVEL_HIGH>;
 		reg-shift = <2>;
 		reg-io-width = <4>;
@@ -73,7 +73,7 @@
 		compatible = "brcm,bcm11351-dw-apb-uart", "snps,dw-apb-uart";
 		status = "disabled";
 		reg = <0x3e003000 0x1000>;
-		clock-frequency = <13000000>;
+		clocks = <&uartb4_clk>;
 		interrupts = <GIC_SPI 64 IRQ_TYPE_LEVEL_HIGH>;
 		reg-shift = <2>;
 		reg-io-width = <4>;
@@ -95,7 +95,7 @@
 		compatible = "brcm,kona-timer";
 		reg = <0x35006000 0x1000>;
 		interrupts = <GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>;
-		clock-frequency = <32768>;
+		clocks = <&hub_timer_clk>;
 	};
 
 	gpio: gpio@35003000 {
@@ -118,6 +118,7 @@
 		compatible = "brcm,kona-sdhci";
 		reg = <0x3f180000 0x10000>;
 		interrupts = <GIC_SPI 77 IRQ_TYPE_LEVEL_HIGH>;
+		clocks = <&sdio1_clk>;
 		status = "disabled";
 	};
 
@@ -125,6 +126,7 @@
 		compatible = "brcm,kona-sdhci";
 		reg = <0x3f190000 0x10000>;
 		interrupts = <GIC_SPI 76 IRQ_TYPE_LEVEL_HIGH>;
+		clocks = <&sdio2_clk>;
 		status = "disabled";
 	};
 
@@ -132,6 +134,7 @@
 		compatible = "brcm,kona-sdhci";
 		reg = <0x3f1a0000 0x10000>;
 		interrupts = <GIC_SPI 74 IRQ_TYPE_LEVEL_HIGH>;
+		clocks = <&sdio3_clk>;
 		status = "disabled";
 	};
 
@@ -139,6 +142,7 @@
 		compatible = "brcm,kona-sdhci";
 		reg = <0x3f1b0000 0x10000>;
 		interrupts = <GIC_SPI 73 IRQ_TYPE_LEVEL_HIGH>;
+		clocks = <&sdio4_clk>;
 		status = "disabled";
 	};
 
@@ -146,4 +150,160 @@
 		compatible = "brcm,capri-pinctrl";
 		reg = <0x35004800 0x430>;
 	};
+
+	i2c@3e016000 {
+		compatible = "brcm,bcm11351-i2c", "brcm,kona-i2c";
+		reg = <0x3e016000 0x80>;
+		interrupts = <GIC_SPI 103 IRQ_TYPE_LEVEL_HIGH>;
+		#address-cells = <1>;
+		#size-cells = <0>;
+		clocks = <&bsc1_clk>;
+		status = "disabled";
+	};
+
+	i2c@3e017000 {
+		compatible = "brcm,bcm11351-i2c", "brcm,kona-i2c";
+		reg = <0x3e017000 0x80>;
+		interrupts = <GIC_SPI 102 IRQ_TYPE_LEVEL_HIGH>;
+		#address-cells = <1>;
+		#size-cells = <0>;
+		clocks = <&bsc2_clk>;
+		status = "disabled";
+	};
+
+	i2c@3e018000 {
+		compatible = "brcm,bcm11351-i2c", "brcm,kona-i2c";
+		reg = <0x3e018000 0x80>;
+		interrupts = <GIC_SPI 169 IRQ_TYPE_LEVEL_HIGH>;
+		#address-cells = <1>;
+		#size-cells = <0>;
+		clocks = <&bsc3_clk>;
+		status = "disabled";
+	};
+
+	i2c@3500d000 {
+		compatible = "brcm,bcm11351-i2c", "brcm,kona-i2c";
+		reg = <0x3500d000 0x80>;
+		interrupts = <GIC_SPI 14 IRQ_TYPE_LEVEL_HIGH>;
+		#address-cells = <1>;
+		#size-cells = <0>;
+		clocks = <&pmu_bsc_clk>;
+		status = "disabled";
+	};
+
+	clocks {
+		bsc1_clk: bsc1 {
+			compatible = "fixed-clock";
+			clock-frequency = <13000000>;
+			#clock-cells = <0>;
+		};
+
+		bsc2_clk: bsc2 {
+			compatible = "fixed-clock";
+			clock-frequency = <13000000>;
+			#clock-cells = <0>;
+		};
+
+		bsc3_clk: bsc3 {
+			compatible = "fixed-clock";
+			clock-frequency = <13000000>;
+			#clock-cells = <0>;
+		};
+
+		pmu_bsc_clk: pmu_bsc {
+			compatible = "fixed-clock";
+			clock-frequency = <13000000>;
+			#clock-cells = <0>;
+		};
+
+		hub_timer_clk: hub_timer {
+			compatible = "fixed-clock";
+			clock-frequency = <32768>;
+			#clock-cells = <0>;
+		};
+
+		pwm_clk: pwm {
+			compatible = "fixed-clock";
+			clock-frequency = <26000000>;
+			#clock-cells = <0>;
+		};
+
+		sdio1_clk: sdio1 {
+			compatible = "fixed-clock";
+			clock-frequency = <48000000>;
+			#clock-cells = <0>;
+		};
+
+		sdio2_clk: sdio2 {
+			compatible = "fixed-clock";
+			clock-frequency = <48000000>;
+			#clock-cells = <0>;
+		};
+
+		sdio3_clk: sdio3 {
+			compatible = "fixed-clock";
+			clock-frequency = <48000000>;
+			#clock-cells = <0>;
+		};
+
+		sdio4_clk: sdio4 {
+			compatible = "fixed-clock";
+			clock-frequency = <48000000>;
+			#clock-cells = <0>;
+		};
+
+		tmon_1m_clk: tmon_1m {
+			compatible = "fixed-clock";
+			clock-frequency = <1000000>;
+			#clock-cells = <0>;
+		};
+
+		uartb_clk: uartb {
+			compatible = "fixed-clock";
+			clock-frequency = <13000000>;
+			#clock-cells = <0>;
+		};
+
+		uartb2_clk: uartb2 {
+			compatible = "fixed-clock";
+			clock-frequency = <13000000>;
+			#clock-cells = <0>;
+		};
+
+		uartb3_clk: uartb3 {
+			compatible = "fixed-clock";
+			clock-frequency = <13000000>;
+			#clock-cells = <0>;
+		};
+
+		uartb4_clk: uartb4 {
+			compatible = "fixed-clock";
+			clock-frequency = <13000000>;
+			#clock-cells = <0>;
+		};
+
+		usb_otg_ahb_clk: usb_otg_ahb {
+			compatible = "fixed-clock";
+			clock-frequency = <52000000>;
+			#clock-cells = <0>;
+		};
+	};
+
+	usbotg: usb@3f120000 {
+		compatible = "snps,dwc2";
+		reg = <0x3f120000 0x10000>;
+		interrupts = <GIC_SPI 47 IRQ_TYPE_LEVEL_HIGH>;
+		clocks = <&usb_otg_ahb_clk>;
+		clock-names = "otg";
+		phys = <&usbphy>;
+		phy-names = "usb2-phy";
+		status = "disabled";
+	};
+
+	usbphy: usb-phy@3f130000 {
+		compatible = "brcm,kona-usb2-phy";
+		reg = <0x3f130000 0x28>;
+		#phy-cells = <0>;
+		status = "disabled";
+	};
 };
diff --git a/arch/arm/boot/dts/bcm28155-ap.dts b/arch/arm/boot/dts/bcm28155-ap.dts
index 08e47c2..c7fa9fb 100644
--- a/arch/arm/boot/dts/bcm28155-ap.dts
+++ b/arch/arm/boot/dts/bcm28155-ap.dts
@@ -27,6 +27,26 @@
 		status = "okay";
 	};
 
+	i2c@3e016000 {
+		status="okay";
+		clock-frequency = <400000>;
+	};
+
+	i2c@3e017000 {
+		status="okay";
+		clock-frequency = <400000>;
+	};
+
+	i2c@3e018000 {
+		status="okay";
+		clock-frequency = <400000>;
+	};
+
+	i2c@3500d000 {
+		status="okay";
+		clock-frequency = <400000>;
+	};
+
 	sdio1: sdio@3f180000 {
 		max-frequency = <48000000>;
 		status = "okay";
@@ -43,4 +63,12 @@
 		cd-gpios = <&gpio 14 0>;
 		status = "okay";
 	};
+
+	usbotg: usb@3f120000 {
+		status = "okay";
+	};
+
+	usbphy: usb-phy@3f130000 {
+		status = "okay";
+	};
 };
diff --git a/arch/arm/boot/dts/moxart-uc7112lx.dts b/arch/arm/boot/dts/moxart-uc7112lx.dts
index 90749d5..10d088d 100644
--- a/arch/arm/boot/dts/moxart-uc7112lx.dts
+++ b/arch/arm/boot/dts/moxart-uc7112lx.dts
@@ -17,6 +17,14 @@
 		reg = <0x0 0x2000000>;
 	};
 
+	clocks {
+		ref12: ref12M {
+			compatible = "fixed-clock";
+			#clock-cells = <0>;
+			clock-frequency = <12000000>;
+		};
+	};
+
 	flash@80000000,0 {
 		compatible = "numonyx,js28f128", "cfi-flash";
 		reg = <0x80000000 0x1000000>;
diff --git a/arch/arm/boot/dts/moxart.dtsi b/arch/arm/boot/dts/moxart.dtsi
index da1d8ef..1fd27ed 100644
--- a/arch/arm/boot/dts/moxart.dtsi
+++ b/arch/arm/boot/dts/moxart.dtsi
@@ -26,12 +26,6 @@
 	clocks {
 		#address-cells = <1>;
 		#size-cells = <0>;
-
-		ref12: ref12M {
-			compatible = "fixed-clock";
-			#clock-cells = <0>;
-			clock-frequency = <12000000>;
-		};
 	};
 
 	soc {
diff --git a/arch/frv/Makefile b/arch/frv/Makefile
index 4d1b1e9..2a8fb73 100644
--- a/arch/frv/Makefile
+++ b/arch/frv/Makefile
@@ -74,13 +74,6 @@
 KBUILD_CFLAGS	+= -ffixed-fcc3 -ffixed-cc3 -ffixed-gr15 -ffixed-icc2
 KBUILD_AFLAGS	+= -mno-fdpic
 
-# make sure the .S files get compiled with debug info
-# and disable optimisations that are unhelpful whilst debugging
-ifdef CONFIG_DEBUG_INFO
-#KBUILD_CFLAGS	+= -O1
-KBUILD_AFLAGS	+= -Wa,--gdwarf2
-endif
-
 head-y		:= arch/frv/kernel/head.o
 
 core-y		+= arch/frv/kernel/ arch/frv/mm/
diff --git a/arch/m68k/emu/nfblock.c b/arch/m68k/emu/nfblock.c
index 0721858..2d75ae2 100644
--- a/arch/m68k/emu/nfblock.c
+++ b/arch/m68k/emu/nfblock.c
@@ -62,17 +62,18 @@
 static void nfhd_make_request(struct request_queue *queue, struct bio *bio)
 {
 	struct nfhd_device *dev = queue->queuedata;
-	struct bio_vec *bvec;
-	int i, dir, len, shift;
-	sector_t sec = bio->bi_sector;
+	struct bio_vec bvec;
+	struct bvec_iter iter;
+	int dir, len, shift;
+	sector_t sec = bio->bi_iter.bi_sector;
 
 	dir = bio_data_dir(bio);
 	shift = dev->bshift;
-	bio_for_each_segment(bvec, bio, i) {
-		len = bvec->bv_len;
+	bio_for_each_segment(bvec, bio, iter) {
+		len = bvec.bv_len;
 		len >>= 9;
 		nfhd_read_write(dev->id, 0, dir, sec >> shift, len >> shift,
-				bvec_to_phys(bvec));
+				bvec_to_phys(&bvec));
 		sec += len;
 	}
 	bio_endio(bio, 0);
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index c02f1c0..dcae3a7 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -116,7 +116,6 @@
 	select CEVT_R4K
 	select CSRC_R4K
 	select DMA_NONCOHERENT
-	select FW_CFE
 	select HW_HAS_PCI
 	select IRQ_CPU
 	select SYS_HAS_CPU_MIPS32_R1
@@ -124,6 +123,7 @@
 	select SYS_SUPPORTS_32BIT_KERNEL
 	select SYS_SUPPORTS_LITTLE_ENDIAN
 	select SYS_HAS_EARLY_PRINTK
+	select EARLY_PRINTK_8250 if EARLY_PRINTK
 	help
 	 Support for BCM47XX based boards
 
@@ -134,14 +134,13 @@
 	select CSRC_R4K
 	select DMA_NONCOHERENT
 	select IRQ_CPU
-	select SYS_HAS_CPU_MIPS32_R1
-	select SYS_HAS_CPU_BMIPS4350 if !BCM63XX_CPU_6338 && !BCM63XX_CPU_6345 && !BCM63XX_CPU_6348
 	select SYS_SUPPORTS_32BIT_KERNEL
 	select SYS_SUPPORTS_BIG_ENDIAN
 	select SYS_HAS_EARLY_PRINTK
 	select SWAP_IO_SPACE
 	select ARCH_REQUIRE_GPIOLIB
 	select HAVE_CLK
+	select MIPS_L1_CACHE_SHIFT_4
 	help
 	 Support for BCM63XX based boards
 
@@ -186,6 +185,7 @@
 	select SYS_SUPPORTS_128HZ
 	select SYS_SUPPORTS_256HZ
 	select SYS_SUPPORTS_1024HZ
+	select MIPS_L1_CACHE_SHIFT_4
 	help
 	  This enables support for DEC's MIPS based workstations.  For details
 	  see the Linux/MIPS FAQ on <http://www.linux-mips.org/> and the
@@ -305,7 +305,7 @@
 	select CEVT_R4K
 	select CSRC_R4K
 	select CSRC_GIC
-	select DMA_NONCOHERENT
+	select DMA_MAYBE_COHERENT
 	select GENERIC_ISA_DMA
 	select HAVE_PCSPKR_PLATFORM
 	select IRQ_CPU
@@ -324,7 +324,6 @@
 	select SYS_HAS_CPU_MIPS64_R2
 	select SYS_HAS_CPU_NEVADA
 	select SYS_HAS_CPU_RM7000
-	select SYS_HAS_EARLY_PRINTK
 	select SYS_SUPPORTS_32BIT_KERNEL
 	select SYS_SUPPORTS_64BIT_KERNEL
 	select SYS_SUPPORTS_BIG_ENDIAN
@@ -349,6 +348,7 @@
 	select DMA_NONCOHERENT
 	select IRQ_CPU
 	select IRQ_GIC
+	select LIBFDT
 	select MIPS_MSC
 	select SYS_HAS_CPU_MIPS32_R1
 	select SYS_HAS_CPU_MIPS32_R2
@@ -471,6 +471,7 @@
 	select SYS_SUPPORTS_32BIT_KERNEL
 	select SYS_SUPPORTS_64BIT_KERNEL
 	select SYS_SUPPORTS_BIG_ENDIAN
+	select MIPS_L1_CACHE_SHIFT_7
 	help
 	  This are the SGI Indy, Challenge S and Indigo2, as well as certain
 	  OEM variants like the Tandem CMN B006S. To compile a Linux kernel
@@ -491,6 +492,7 @@
 	select SYS_SUPPORTS_BIG_ENDIAN
 	select SYS_SUPPORTS_NUMA
 	select SYS_SUPPORTS_SMP
+	select MIPS_L1_CACHE_SHIFT_7
 	help
 	  This are the SGI Origin 200, Origin 2000 and Onyx 2 Graphics
 	  workstations.  To compile a Linux kernel that runs on these, say Y
@@ -697,6 +699,7 @@
 	select SWAP_IO_SPACE
 	select BOOT_RAW
 	select ARCH_REQUIRE_GPIOLIB
+	select MIPS_L1_CACHE_SHIFT_4
 	help
 	  Support the Mikrotik(tm) RouterBoard 532 series,
 	  based on the IDT RC32434 SoC.
@@ -779,6 +782,7 @@
 	select CEVT_R4K
 	select CSRC_R4K
 	select IRQ_CPU
+	select ARCH_SUPPORTS_MSI
 	select ZONE_DMA32 if 64BIT
 	select SYNC_R4K
 	select SYS_HAS_EARLY_PRINTK
@@ -897,6 +901,10 @@
 config ARCH_DMA_ADDR_T_64BIT
 	def_bool (HIGHMEM && 64BIT_PHYS_ADDR) || 64BIT
 
+config DMA_MAYBE_COHERENT
+	select DMA_NONCOHERENT
+	bool
+
 config DMA_COHERENT
 	bool
 
@@ -1091,11 +1099,24 @@
 config BOOT_ELF32
 	bool
 
+config MIPS_L1_CACHE_SHIFT_4
+	bool
+
+config MIPS_L1_CACHE_SHIFT_5
+	bool
+
+config MIPS_L1_CACHE_SHIFT_6
+	bool
+
+config MIPS_L1_CACHE_SHIFT_7
+	bool
+
 config MIPS_L1_CACHE_SHIFT
 	int
-	default "4" if MACH_DECSTATION || MIKROTIK_RB532 || PMC_MSP4200_EVAL || SOC_RT288X
-	default "6" if MIPS_CPU_SCACHE
-	default "7" if SGI_IP22 || SGI_IP27 || SGI_IP28 || SNI_RM || CPU_CAVIUM_OCTEON
+	default "4" if MIPS_L1_CACHE_SHIFT_4
+	default "5" if MIPS_L1_CACHE_SHIFT_5
+	default "6" if MIPS_L1_CACHE_SHIFT_6
+	default "7" if MIPS_L1_CACHE_SHIFT_7
 	default "5"
 
 config HAVE_STD_PC_SERIAL_PORT
@@ -1375,47 +1396,31 @@
 	select LIBFDT
 	select USE_OF
 	select USB_EHCI_BIG_ENDIAN_MMIO
+	select SYS_HAS_DMA_OPS
+	select MIPS_L1_CACHE_SHIFT_7
 	help
 	  The Cavium Octeon processor is a highly integrated chip containing
 	  many ethernet hardware widgets for networking tasks. The processor
 	  can have up to 16 Mips64v2 cores and 8 integrated gigabit ethernets.
 	  Full details can be found at http://www.caviumnetworks.com.
 
-config CPU_BMIPS3300
-	bool "BMIPS3300"
-	depends on SYS_HAS_CPU_BMIPS3300
-	select CPU_BMIPS
-	help
-	  Broadcom BMIPS3300 processors.
-
-config CPU_BMIPS4350
-	bool "BMIPS4350"
-	depends on SYS_HAS_CPU_BMIPS4350
-	select CPU_BMIPS
-	select SYS_SUPPORTS_SMP
-	select SYS_SUPPORTS_HOTPLUG_CPU
-	help
-	  Broadcom BMIPS4350 ("VIPER") processors.
-
-config CPU_BMIPS4380
-	bool "BMIPS4380"
-	depends on SYS_HAS_CPU_BMIPS4380
-	select CPU_BMIPS
-	select SYS_SUPPORTS_SMP
-	select SYS_SUPPORTS_HOTPLUG_CPU
-	help
-	  Broadcom BMIPS4380 processors.
-
-config CPU_BMIPS5000
-	bool "BMIPS5000"
-	depends on SYS_HAS_CPU_BMIPS5000
-	select CPU_BMIPS
+config CPU_BMIPS
+	bool "Broadcom BMIPS"
+	depends on SYS_HAS_CPU_BMIPS
+	select CPU_MIPS32
+	select CPU_BMIPS32_3300 if SYS_HAS_CPU_BMIPS32_3300
+	select CPU_BMIPS4350 if SYS_HAS_CPU_BMIPS4350
+	select CPU_BMIPS4380 if SYS_HAS_CPU_BMIPS4380
+	select CPU_BMIPS5000 if SYS_HAS_CPU_BMIPS5000
+	select CPU_SUPPORTS_32BIT_KERNEL
+	select DMA_NONCOHERENT
+	select IRQ_CPU
+	select SWAP_IO_SPACE
+	select WEAK_ORDERING
 	select CPU_SUPPORTS_HIGHMEM
-	select MIPS_CPU_SCACHE
-	select SYS_SUPPORTS_SMP
-	select SYS_SUPPORTS_HOTPLUG_CPU
+	select CPU_HAS_PREFETCH
 	help
-	  Broadcom BMIPS5000 processors.
+	  Support for BMIPS32/3300/4350/4380 and BMIPS5000 processors.
 
 config CPU_XLR
 	bool "Netlogic XLR SoC"
@@ -1498,14 +1503,25 @@
 	select CPU_SUPPORTS_32BIT_KERNEL
 	select CPU_SUPPORTS_HIGHMEM
 
-config CPU_BMIPS
+config CPU_BMIPS32_3300
+	select SMP_UP if SMP
 	bool
-	select CPU_MIPS32
-	select CPU_SUPPORTS_32BIT_KERNEL
-	select DMA_NONCOHERENT
-	select IRQ_CPU
-	select SWAP_IO_SPACE
-	select WEAK_ORDERING
+
+config CPU_BMIPS4350
+	bool
+	select SYS_SUPPORTS_SMP
+	select SYS_SUPPORTS_HOTPLUG_CPU
+
+config CPU_BMIPS4380
+	bool
+	select SYS_SUPPORTS_SMP
+	select SYS_SUPPORTS_HOTPLUG_CPU
+
+config CPU_BMIPS5000
+	bool
+	select MIPS_CPU_SCACHE
+	select SYS_SUPPORTS_SMP
+	select SYS_SUPPORTS_HOTPLUG_CPU
 
 config SYS_HAS_CPU_LOONGSON2E
 	bool
@@ -1579,17 +1595,24 @@
 config SYS_HAS_CPU_CAVIUM_OCTEON
 	bool
 
-config SYS_HAS_CPU_BMIPS3300
+config SYS_HAS_CPU_BMIPS
 	bool
 
+config SYS_HAS_CPU_BMIPS32_3300
+	bool
+	select SYS_HAS_CPU_BMIPS
+
 config SYS_HAS_CPU_BMIPS4350
 	bool
+	select SYS_HAS_CPU_BMIPS
 
 config SYS_HAS_CPU_BMIPS4380
 	bool
+	select SYS_HAS_CPU_BMIPS
 
 config SYS_HAS_CPU_BMIPS5000
 	bool
+	select SYS_HAS_CPU_BMIPS
 
 config SYS_HAS_CPU_XLR
 	bool
@@ -1797,6 +1820,7 @@
 config MIPS_CPU_SCACHE
 	bool
 	select BOARD_SCACHE
+	select MIPS_L1_CACHE_SHIFT_6
 
 config R5000_CPU_SCACHE
 	bool
@@ -1833,59 +1857,48 @@
 	prompt "MIPS MT options"
 
 config MIPS_MT_DISABLED
-	bool "Disable multithreading support."
+	bool "Disable multithreading support"
 	help
-	  Use this option if your workload can't take advantage of
-	  MIPS hardware multithreading support.  On systems that don't have
-	  the option of an MT-enabled processor this option will be the only
-	  option in this menu.
+	  Use this option if your platform does not support the MT ASE
+	  which is hardware multithreading support. On systems without
+	  an MT-enabled processor, this will be the only option that is
+	  available in this menu.
 
 config MIPS_MT_SMP
 	bool "Use 1 TC on each available VPE for SMP"
 	depends on SYS_SUPPORTS_MULTITHREADING
 	select CPU_MIPSR2_IRQ_VI
 	select CPU_MIPSR2_IRQ_EI
+	select SYNC_R4K
 	select MIPS_MT
 	select SMP
-	select SYS_SUPPORTS_SCHED_SMT if SMP
-	select SYS_SUPPORTS_SMP
 	select SMP_UP
+	select SYS_SUPPORTS_SMP
+	select SYS_SUPPORTS_SCHED_SMT
 	select MIPS_PERF_SHARED_TC_COUNTERS
 	help
-	  This is a kernel model which is known a VSMP but lately has been
-	  marketesed into SMVP.
-	  Virtual SMP uses the processor's VPEs  to implement virtual
-	  processors. In currently available configuration of the 34K processor
-	  this allows for a dual processor. Both processors will share the same
-	  primary caches; each will obtain the half of the TLB for it's own
-	  exclusive use. For a layman this model can be described as similar to
-	  what Intel calls Hyperthreading.
-
-	  For further information see http://www.linux-mips.org/wiki/34K#VSMP
+	  This is a kernel model which is known as SMVP. This is supported
+	  on cores with the MT ASE and uses the available VPEs to implement
+	  virtual processors which supports SMP. This is equivalent to the
+	  Intel Hyperthreading feature. For further information go to
+	  <http://www.imgtec.com/mips/mips-multithreading.asp>.
 
 config MIPS_MT_SMTC
-	bool "SMTC: Use all TCs on all VPEs for SMP"
+	bool "Use all TCs on all VPEs for SMP (DEPRECATED)"
 	depends on CPU_MIPS32_R2
-	#depends on CPU_MIPS64_R2		# once there is hardware ...
 	depends on SYS_SUPPORTS_MULTITHREADING
 	select CPU_MIPSR2_IRQ_VI
 	select CPU_MIPSR2_IRQ_EI
 	select MIPS_MT
-	select NR_CPUS_DEFAULT_8
 	select SMP
-	select SYS_SUPPORTS_SMP
 	select SMP_UP
+	select SYS_SUPPORTS_SMP
+	select NR_CPUS_DEFAULT_8
 	help
-	  This is a kernel model which is known a SMTC or lately has been
-	  marketesed into SMVP.
-	  is presenting the available TC's of the core as processors to Linux.
-	  On currently available 34K processors this means a Linux system will
-	  see up to 5 processors. The implementation of the SMTC kernel differs
-	  significantly from VSMP and cannot efficiently coexist in the same
-	  kernel binary so the choice between VSMP and SMTC is a compile time
-	  decision.
-
-	  For further information see http://www.linux-mips.org/wiki/34K#SMTC
+	  This is a kernel model which is known as SMTC. This is
+	  supported on cores with the MT ASE and presents all TCs
+	  available on all VPEs to support SMP. For further
+	  information see <http://www.linux-mips.org/wiki/34K#SMTC>.
 
 endchoice
 
@@ -1922,6 +1935,16 @@
 	  Includes a loader for loading an elf relocatable object
 	  onto another VPE and running it.
 
+config MIPS_VPE_LOADER_CMP
+	bool
+	default "y"
+	depends on MIPS_VPE_LOADER && MIPS_CMP
+
+config MIPS_VPE_LOADER_MT
+	bool
+	default "y"
+	depends on MIPS_VPE_LOADER && !MIPS_CMP
+
 config MIPS_MT_SMTC_IM_BACKSTOP
 	bool "Use per-TC register bits as backstop for inhibited IM bits"
 	depends on MIPS_MT_SMTC
@@ -1955,24 +1978,29 @@
 	  you to ensure the amount you put in the option and the space your
 	  program requires is less or equal to the amount physically present.
 
-# this should possibly be in drivers/char, but it is rather cpu related. Hmmm
 config MIPS_VPE_APSP_API
 	bool "Enable support for AP/SP API (RTLX)"
 	depends on MIPS_VPE_LOADER
 	help
 
+config MIPS_VPE_APSP_API_CMP
+	bool
+	default "y"
+	depends on MIPS_VPE_APSP_API && MIPS_CMP
+
+config MIPS_VPE_APSP_API_MT
+	bool
+	default "y"
+	depends on MIPS_VPE_APSP_API && !MIPS_CMP
+
 config MIPS_CMP
-	bool "MIPS CMP framework support"
-	depends on SYS_SUPPORTS_MIPS_CMP
-	select SMP
+	bool "MIPS CMP support"
+	depends on SYS_SUPPORTS_MIPS_CMP && MIPS_MT_SMP
 	select SYNC_R4K
-	select SYS_SUPPORTS_SMP
-	select SYS_SUPPORTS_SCHED_SMT if SMP
 	select WEAK_ORDERING
 	default n
 	help
-	  This is a placeholder option for the GCMP work. It will need to
-	  be handled differently...
+	  Enable Coherency Manager processor (CMP) support.
 
 config SB1_PASS_1_WORKAROUNDS
 	bool
@@ -2324,6 +2352,23 @@
 
 	  If unsure, say Y. Only embedded should say N here.
 
+config MIPS_O32_FP64_SUPPORT
+	bool "Support for O32 binaries using 64-bit FP"
+	depends on 32BIT || MIPS32_O32
+	default y
+	help
+	  When this is enabled, the kernel will support use of 64-bit floating
+	  point registers with binaries using the O32 ABI along with the
+	  EF_MIPS_FP64 ELF header flag (typically built with -mfp64). On
+	  32-bit MIPS systems this support is at the cost of increasing the
+	  size and complexity of the compiled FPU emulator. Thus if you are
+	  running a MIPS32 system and know that none of your userland binaries
+	  will require 64-bit floating point, you may wish to reduce the size
+	  of your kernel & potentially improve FP emulation performance by
+	  saying N here.
+
+	  If unsure, say Y.
+
 config USE_OF
 	bool
 	select OF
diff --git a/arch/mips/Makefile b/arch/mips/Makefile
index efe50787..9b8556d 100644
--- a/arch/mips/Makefile
+++ b/arch/mips/Makefile
@@ -114,7 +114,7 @@
 cflags-$(CONFIG_CPU_LITTLE_ENDIAN)	+= $(shell $(CC) -dumpmachine |grep -q 'mips.*el-.*' || echo -EL $(undef-all) $(predef-le))
 
 cflags-$(CONFIG_CPU_HAS_SMARTMIPS)	+= $(call cc-option,-msmartmips)
-cflags-$(CONFIG_CPU_MICROMIPS) += $(call cc-option,-mmicromips -mno-jals)
+cflags-$(CONFIG_CPU_MICROMIPS) += $(call cc-option,-mmicromips)
 
 cflags-$(CONFIG_SB1XXX_CORELIS)	+= $(call cc-option,-mno-sched-prolog) \
 				   -fno-omit-frame-pointer
diff --git a/arch/mips/alchemy/common/power.c b/arch/mips/alchemy/common/power.c
index 0c7fce2..bdb28dee 100644
--- a/arch/mips/alchemy/common/power.c
+++ b/arch/mips/alchemy/common/power.c
@@ -29,7 +29,6 @@
  *  675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
-#include <linux/init.h>
 #include <linux/pm.h>
 #include <linux/sysctl.h>
 #include <linux/jiffies.h>
diff --git a/arch/mips/ar7/time.c b/arch/mips/ar7/time.c
index 22c9321..1dc6c3b 100644
--- a/arch/mips/ar7/time.c
+++ b/arch/mips/ar7/time.c
@@ -18,7 +18,6 @@
  * Setting up the clock on the MIPS boards.
  */
 
-#include <linux/init.h>
 #include <linux/time.h>
 #include <linux/err.h>
 #include <linux/clk.h>
diff --git a/arch/mips/ath79/common.h b/arch/mips/ath79/common.h
index 648d2da..a312071 100644
--- a/arch/mips/ath79/common.h
+++ b/arch/mips/ath79/common.h
@@ -15,7 +15,6 @@
 #define __ATH79_COMMON_H
 
 #include <linux/types.h>
-#include <linux/init.h>
 
 #define ATH79_MEM_SIZE_MIN	(2 * 1024 * 1024)
 #define ATH79_MEM_SIZE_MAX	(128 * 1024 * 1024)
diff --git a/arch/mips/bcm47xx/Kconfig b/arch/mips/bcm47xx/Kconfig
index 2b8b118..09cb6f7 100644
--- a/arch/mips/bcm47xx/Kconfig
+++ b/arch/mips/bcm47xx/Kconfig
@@ -2,6 +2,7 @@
 
 config BCM47XX_SSB
 	bool "SSB Support for Broadcom BCM47XX"
+	select SYS_HAS_CPU_BMIPS32_3300
 	select SSB
 	select SSB_DRIVER_MIPS
 	select SSB_DRIVER_EXTIF
@@ -11,6 +12,7 @@
 	select SSB_PCICORE_HOSTMODE if PCI
 	select SSB_DRIVER_GPIO
 	select GPIOLIB
+	select LEDS_GPIO_REGISTER
 	default y
 	help
 	 Add support for old Broadcom BCM47xx boards with Sonics Silicon Backplane support.
@@ -20,6 +22,7 @@
 config BCM47XX_BCMA
 	bool "BCMA Support for Broadcom BCM47XX"
 	select SYS_HAS_CPU_MIPS32_R2
+	select CPU_MIPSR2_IRQ_VI
 	select BCMA
 	select BCMA_HOST_SOC
 	select BCMA_DRIVER_MIPS
@@ -27,6 +30,7 @@
 	select BCMA_DRIVER_PCI_HOSTMODE if PCI
 	select BCMA_DRIVER_GPIO
 	select GPIOLIB
+	select LEDS_GPIO_REGISTER
 	default y
 	help
 	 Add support for new Broadcom BCM47xx boards with Broadcom specific Advanced Microcontroller Bus.
diff --git a/arch/mips/bcm47xx/Makefile b/arch/mips/bcm47xx/Makefile
index c52daf9..4688b6a 100644
--- a/arch/mips/bcm47xx/Makefile
+++ b/arch/mips/bcm47xx/Makefile
@@ -4,5 +4,4 @@
 #
 
 obj-y				+= irq.o nvram.o prom.o serial.o setup.o time.o sprom.o
-obj-y				+= board.o
-obj-$(CONFIG_BCM47XX_SSB)	+= wgt634u.o
+obj-y				+= board.o buttons.o leds.o
diff --git a/arch/mips/bcm47xx/bcm47xx_private.h b/arch/mips/bcm47xx/bcm47xx_private.h
new file mode 100644
index 0000000..5c94ace
--- /dev/null
+++ b/arch/mips/bcm47xx/bcm47xx_private.h
@@ -0,0 +1,12 @@
+#ifndef LINUX_BCM47XX_PRIVATE_H_
+#define LINUX_BCM47XX_PRIVATE_H_
+
+#include <linux/kernel.h>
+
+/* buttons.c */
+int __init bcm47xx_buttons_register(void);
+
+/* leds.c */
+void __init bcm47xx_leds_register(void);
+
+#endif
diff --git a/arch/mips/bcm47xx/board.c b/arch/mips/bcm47xx/board.c
index f3f6bfe..6d612e2 100644
--- a/arch/mips/bcm47xx/board.c
+++ b/arch/mips/bcm47xx/board.c
@@ -36,26 +36,32 @@
 struct bcm47xx_board_type_list1 bcm47xx_board_list_model_name[] __initconst = {
 	{{BCM47XX_BOARD_DLINK_DIR130, "D-Link DIR-130"}, "DIR-130"},
 	{{BCM47XX_BOARD_DLINK_DIR330, "D-Link DIR-330"}, "DIR-330"},
-	{ {0}, 0},
+	{ {0}, NULL},
 };
 
 /* model_no */
 static const
 struct bcm47xx_board_type_list1 bcm47xx_board_list_model_no[] __initconst = {
 	{{BCM47XX_BOARD_ASUS_WL700GE, "Asus WL700"}, "WL700"},
-	{ {0}, 0},
+	{ {0}, NULL},
 };
 
 /* machine_name */
 static const
 struct bcm47xx_board_type_list1 bcm47xx_board_list_machine_name[] __initconst = {
 	{{BCM47XX_BOARD_LINKSYS_WRTSL54GS, "Linksys WRTSL54GS"}, "WRTSL54GS"},
-	{ {0}, 0},
+	{ {0}, NULL},
 };
 
 /* hardware_version */
 static const
 struct bcm47xx_board_type_list1 bcm47xx_board_list_hardware_version[] __initconst = {
+	{{BCM47XX_BOARD_ASUS_RTN10U, "Asus RT-N10U"}, "RTN10U"},
+	{{BCM47XX_BOARD_ASUS_RTN12, "Asus RT-N12"}, "RT-N12"},
+	{{BCM47XX_BOARD_ASUS_RTN12B1, "Asus RT-N12B1"}, "RTN12B1"},
+	{{BCM47XX_BOARD_ASUS_RTN12C1, "Asus RT-N12C1"}, "RTN12C1"},
+	{{BCM47XX_BOARD_ASUS_RTN12D1, "Asus RT-N12D1"}, "RTN12D1"},
+	{{BCM47XX_BOARD_ASUS_RTN12HP, "Asus RT-N12HP"}, "RTN12HP"},
 	{{BCM47XX_BOARD_ASUS_RTN16, "Asus RT-N16"}, "RT-N16-"},
 	{{BCM47XX_BOARD_ASUS_WL320GE, "Asus WL320GE"}, "WL320G-"},
 	{{BCM47XX_BOARD_ASUS_WL330GE, "Asus WL330GE"}, "WL330GE-"},
@@ -66,7 +72,7 @@
 	{{BCM47XX_BOARD_ASUS_WL520GC, "Asus WL520GC"}, "WL520GC-"},
 	{{BCM47XX_BOARD_ASUS_WL520GU, "Asus WL520GU"}, "WL520GU-"},
 	{{BCM47XX_BOARD_BELKIN_F7D4301, "Belkin F7D4301"}, "F7D4301"},
-	{ {0}, 0},
+	{ {0}, NULL},
 };
 
 /* productid */
@@ -75,19 +81,13 @@
 	{{BCM47XX_BOARD_ASUS_RTAC66U, "Asus RT-AC66U"}, "RT-AC66U"},
 	{{BCM47XX_BOARD_ASUS_RTN10, "Asus RT-N10"}, "RT-N10"},
 	{{BCM47XX_BOARD_ASUS_RTN10D, "Asus RT-N10D"}, "RT-N10D"},
-	{{BCM47XX_BOARD_ASUS_RTN10U, "Asus RT-N10U"}, "RT-N10U"},
-	{{BCM47XX_BOARD_ASUS_RTN12, "Asus RT-N12"}, "RT-N12"},
-	{{BCM47XX_BOARD_ASUS_RTN12B1, "Asus RT-N12B1"}, "RT-N12B1"},
-	{{BCM47XX_BOARD_ASUS_RTN12C1, "Asus RT-N12C1"}, "RT-N12C1"},
-	{{BCM47XX_BOARD_ASUS_RTN12D1, "Asus RT-N12D1"}, "RT-N12D1"},
-	{{BCM47XX_BOARD_ASUS_RTN12HP, "Asus RT-N12HP"}, "RT-N12HP"},
 	{{BCM47XX_BOARD_ASUS_RTN15U, "Asus RT-N15U"}, "RT-N15U"},
 	{{BCM47XX_BOARD_ASUS_RTN16, "Asus RT-N16"}, "RT-N16"},
 	{{BCM47XX_BOARD_ASUS_RTN53, "Asus RT-N53"}, "RT-N53"},
 	{{BCM47XX_BOARD_ASUS_RTN66U, "Asus RT-N66U"}, "RT-N66U"},
 	{{BCM47XX_BOARD_ASUS_WL300G, "Asus WL300G"}, "WL300g"},
 	{{BCM47XX_BOARD_ASUS_WLHDD, "Asus WLHDD"}, "WLHDD"},
-	{ {0}, 0},
+	{ {0}, NULL},
 };
 
 /* ModelId */
@@ -97,7 +97,7 @@
 	{{BCM47XX_BOARD_MOTOROLA_WE800G, "Motorola WE800G"}, "WE800G"},
 	{{BCM47XX_BOARD_MOTOROLA_WR850GP, "Motorola WR850GP"}, "WR850GP"},
 	{{BCM47XX_BOARD_MOTOROLA_WR850GV2V3, "Motorola WR850G"}, "WR850G"},
-	{ {0}, 0},
+	{ {0}, NULL},
 };
 
 /* melco_id or buf1falo_id */
@@ -112,7 +112,7 @@
 	{{BCM47XX_BOARD_BUFFALO_WZR_G300N, "Buffalo WZR-G300N"}, "31120"},
 	{{BCM47XX_BOARD_BUFFALO_WZR_RS_G54, "Buffalo WZR-RS-G54"}, "30083"},
 	{{BCM47XX_BOARD_BUFFALO_WZR_RS_G54HP, "Buffalo WZR-RS-G54HP"}, "30103"},
-	{ {0}, 0},
+	{ {0}, NULL},
 };
 
 /* boot_hw_model, boot_hw_ver */
@@ -143,7 +143,7 @@
 	{{BCM47XX_BOARD_LINKSYS_WRT54G3GV2, "Linksys WRT54G3GV2-VF"}, "WRT54G3GV2-VF", "1.0"},
 	{{BCM47XX_BOARD_LINKSYS_WRT610NV1, "Linksys WRT610N V1"}, "WRT610N", "1.0"},
 	{{BCM47XX_BOARD_LINKSYS_WRT610NV2, "Linksys WRT610N V2"}, "WRT610N", "2.0"},
-	{ {0}, 0},
+	{ {0}, NULL},
 };
 
 /* board_id */
@@ -165,7 +165,7 @@
 	{{BCM47XX_BOARD_NETGEAR_WNR3500V2, "Netgear WNR3500 V2"}, "U12H127T00_NETGEAR"},
 	{{BCM47XX_BOARD_NETGEAR_WNR3500V2VC, "Netgear WNR3500 V2vc"}, "U12H127T70_NETGEAR"},
 	{{BCM47XX_BOARD_NETGEAR_WNR834BV2, "Netgear WNR834B V2"}, "U12H081T00_NETGEAR"},
-	{ {0}, 0},
+	{ {0}, NULL},
 };
 
 /* boardtype, boardnum, boardrev */
@@ -174,7 +174,9 @@
 	{{BCM47XX_BOARD_HUAWEI_E970, "Huawei E970"}, "0x048e", "0x5347", "0x11"},
 	{{BCM47XX_BOARD_PHICOMM_M1, "Phicomm M1"}, "0x0590", "80", "0x1104"},
 	{{BCM47XX_BOARD_ZTE_H218N, "ZTE H218N"}, "0x053d", "1234", "0x1305"},
-	{ {0}, 0},
+	{{BCM47XX_BOARD_NETGEAR_WNR3500L, "Netgear WNR3500L"}, "0x04CF", "3500", "02"},
+	{{BCM47XX_BOARD_LINKSYS_WRT54GSV1, "Linksys WRT54GS V1"}, "0x0101", "42", "0x10"},
+	{ {0}, NULL},
 };
 
 static const
diff --git a/arch/mips/bcm47xx/buttons.c b/arch/mips/bcm47xx/buttons.c
new file mode 100644
index 0000000..872c62e
--- /dev/null
+++ b/arch/mips/bcm47xx/buttons.c
@@ -0,0 +1,531 @@
+#include "bcm47xx_private.h"
+
+#include <linux/input.h>
+#include <linux/gpio_keys.h>
+#include <linux/interrupt.h>
+#include <bcm47xx_board.h>
+#include <bcm47xx.h>
+
+/**************************************************
+ * Database
+ **************************************************/
+
+#define BCM47XX_GPIO_KEY(_gpio, _code)					\
+	{								\
+		.code		= _code,				\
+		.gpio		= _gpio,				\
+		.active_low	= 1,					\
+	}
+
+/* Asus */
+
+static const struct gpio_keys_button
+bcm47xx_buttons_asus_rtn12[] __initconst = {
+	BCM47XX_GPIO_KEY(0, KEY_WPS_BUTTON),
+	BCM47XX_GPIO_KEY(1, KEY_RESTART),
+	BCM47XX_GPIO_KEY(4, BTN_0), /* Router mode */
+	BCM47XX_GPIO_KEY(5, BTN_1), /* Repeater mode */
+	BCM47XX_GPIO_KEY(6, BTN_2), /* AP mode */
+};
+
+static const struct gpio_keys_button
+bcm47xx_buttons_asus_rtn16[] __initconst = {
+	BCM47XX_GPIO_KEY(6, KEY_WPS_BUTTON),
+	BCM47XX_GPIO_KEY(8, KEY_RESTART),
+};
+
+static const struct gpio_keys_button
+bcm47xx_buttons_asus_rtn66u[] __initconst = {
+	BCM47XX_GPIO_KEY(4, KEY_WPS_BUTTON),
+	BCM47XX_GPIO_KEY(9, KEY_RESTART),
+};
+
+static const struct gpio_keys_button
+bcm47xx_buttons_asus_wl300g[] __initconst = {
+	BCM47XX_GPIO_KEY(6, KEY_RESTART),
+};
+
+static const struct gpio_keys_button
+bcm47xx_buttons_asus_wl320ge[] __initconst = {
+	BCM47XX_GPIO_KEY(6, KEY_RESTART),
+};
+
+static const struct gpio_keys_button
+bcm47xx_buttons_asus_wl330ge[] __initconst = {
+	BCM47XX_GPIO_KEY(2, KEY_RESTART),
+};
+
+static const struct gpio_keys_button
+bcm47xx_buttons_asus_wl500gd[] __initconst = {
+	BCM47XX_GPIO_KEY(6, KEY_RESTART),
+};
+
+static const struct gpio_keys_button
+bcm47xx_buttons_asus_wl500gpv1[] __initconst = {
+	BCM47XX_GPIO_KEY(0, KEY_RESTART),
+	BCM47XX_GPIO_KEY(4, KEY_WPS_BUTTON),
+};
+
+static const struct gpio_keys_button
+bcm47xx_buttons_asus_wl500gpv2[] __initconst = {
+	BCM47XX_GPIO_KEY(2, KEY_RESTART),
+	BCM47XX_GPIO_KEY(3, KEY_WPS_BUTTON),
+};
+
+static const struct gpio_keys_button
+bcm47xx_buttons_asus_wl500w[] __initconst = {
+	BCM47XX_GPIO_KEY(6, KEY_RESTART),
+	BCM47XX_GPIO_KEY(7, KEY_WPS_BUTTON),
+};
+
+static const struct gpio_keys_button
+bcm47xx_buttons_asus_wl520gc[] __initconst = {
+	BCM47XX_GPIO_KEY(2, KEY_RESTART),
+	BCM47XX_GPIO_KEY(3, KEY_WPS_BUTTON),
+};
+
+static const struct gpio_keys_button
+bcm47xx_buttons_asus_wl520gu[] __initconst = {
+	BCM47XX_GPIO_KEY(2, KEY_RESTART),
+	BCM47XX_GPIO_KEY(3, KEY_WPS_BUTTON),
+};
+
+static const struct gpio_keys_button
+bcm47xx_buttons_asus_wl700ge[] __initconst = {
+	BCM47XX_GPIO_KEY(0, KEY_POWER), /* Hard disk power switch */
+	BCM47XX_GPIO_KEY(4, KEY_WPS_BUTTON), /* EZSetup */
+	BCM47XX_GPIO_KEY(6, KEY_COPY), /* Copy data from USB to internal disk */
+	BCM47XX_GPIO_KEY(7, KEY_RESTART), /* Hard reset */
+};
+
+static const struct gpio_keys_button
+bcm47xx_buttons_asus_wlhdd[] __initconst = {
+	BCM47XX_GPIO_KEY(6, KEY_RESTART),
+};
+
+/* Huawei */
+
+static const struct gpio_keys_button
+bcm47xx_buttons_huawei_e970[] __initconst = {
+	BCM47XX_GPIO_KEY(6, KEY_RESTART),
+};
+
+/* Belkin */
+
+static const struct gpio_keys_button
+bcm47xx_buttons_belkin_f7d4301[] __initconst = {
+	BCM47XX_GPIO_KEY(6, KEY_RESTART),
+	BCM47XX_GPIO_KEY(8, KEY_WPS_BUTTON),
+};
+
+/* Buffalo */
+
+static const struct gpio_keys_button
+bcm47xx_buttons_buffalo_whr2_a54g54[] __initconst = {
+	BCM47XX_GPIO_KEY(4, KEY_RESTART),
+};
+
+static const struct gpio_keys_button
+bcm47xx_buttons_buffalo_whr_g125[] __initconst = {
+	BCM47XX_GPIO_KEY(0, KEY_WPS_BUTTON),
+	BCM47XX_GPIO_KEY(4, KEY_RESTART),
+	BCM47XX_GPIO_KEY(5, BTN_0), /* Router / AP mode swtich */
+};
+
+static const struct gpio_keys_button
+bcm47xx_buttons_buffalo_whr_g54s[] __initconst = {
+	BCM47XX_GPIO_KEY(0, KEY_WPS_BUTTON),
+	BCM47XX_GPIO_KEY(4, KEY_RESTART),
+	BCM47XX_GPIO_KEY(5, BTN_0), /* Router / AP mode swtich */
+};
+
+static const struct gpio_keys_button
+bcm47xx_buttons_buffalo_whr_hp_g54[] __initconst = {
+	BCM47XX_GPIO_KEY(0, KEY_WPS_BUTTON),
+	BCM47XX_GPIO_KEY(4, KEY_RESTART),
+	BCM47XX_GPIO_KEY(5, BTN_0), /* Router / AP mode swtich */
+};
+
+static const struct gpio_keys_button
+bcm47xx_buttons_buffalo_wzr_g300n[] __initconst = {
+	BCM47XX_GPIO_KEY(4, KEY_RESTART),
+};
+
+static const struct gpio_keys_button
+bcm47xx_buttons_buffalo_wzr_rs_g54[] __initconst = {
+	BCM47XX_GPIO_KEY(0, KEY_WPS_BUTTON),
+	BCM47XX_GPIO_KEY(4, KEY_RESTART),
+};
+
+static const struct gpio_keys_button
+bcm47xx_buttons_buffalo_wzr_rs_g54hp[] __initconst = {
+	BCM47XX_GPIO_KEY(0, KEY_WPS_BUTTON),
+	BCM47XX_GPIO_KEY(4, KEY_RESTART),
+};
+
+/* Dell */
+
+static const struct gpio_keys_button
+bcm47xx_buttons_dell_tm2300[] __initconst = {
+	BCM47XX_GPIO_KEY(0, KEY_RESTART),
+};
+
+/* D-Link */
+
+static const struct gpio_keys_button
+bcm47xx_buttons_dlink_dir130[] __initconst = {
+	BCM47XX_GPIO_KEY(3, KEY_RESTART),
+	BCM47XX_GPIO_KEY(7, KEY_UNKNOWN),
+};
+
+static const struct gpio_keys_button
+bcm47xx_buttons_dlink_dir330[] __initconst = {
+	BCM47XX_GPIO_KEY(3, KEY_RESTART),
+	BCM47XX_GPIO_KEY(7, KEY_UNKNOWN),
+};
+
+/* Linksys */
+
+static const struct gpio_keys_button
+bcm47xx_buttons_linksys_e1000v1[] __initconst = {
+	BCM47XX_GPIO_KEY(5, KEY_WPS_BUTTON),
+	BCM47XX_GPIO_KEY(6, KEY_RESTART),
+};
+
+static const struct gpio_keys_button
+bcm47xx_buttons_linksys_e1000v21[] __initconst = {
+	BCM47XX_GPIO_KEY(9, KEY_WPS_BUTTON),
+	BCM47XX_GPIO_KEY(10, KEY_RESTART),
+};
+
+static const struct gpio_keys_button
+bcm47xx_buttons_linksys_e2000v1[] __initconst = {
+	BCM47XX_GPIO_KEY(5, KEY_WPS_BUTTON),
+	BCM47XX_GPIO_KEY(8, KEY_RESTART),
+};
+
+static const struct gpio_keys_button
+bcm47xx_buttons_linksys_e3000v1[] __initconst = {
+	BCM47XX_GPIO_KEY(4, KEY_WPS_BUTTON),
+	BCM47XX_GPIO_KEY(6, KEY_RESTART),
+};
+
+static const struct gpio_keys_button
+bcm47xx_buttons_linksys_e3200v1[] __initconst = {
+	BCM47XX_GPIO_KEY(5, KEY_RESTART),
+	BCM47XX_GPIO_KEY(8, KEY_WPS_BUTTON),
+};
+
+static const struct gpio_keys_button
+bcm47xx_buttons_linksys_e4200v1[] __initconst = {
+	BCM47XX_GPIO_KEY(4, KEY_WPS_BUTTON),
+	BCM47XX_GPIO_KEY(6, KEY_RESTART),
+};
+
+static const struct gpio_keys_button
+bcm47xx_buttons_linksys_wrt150nv1[] __initconst = {
+	BCM47XX_GPIO_KEY(4, KEY_WPS_BUTTON),
+	BCM47XX_GPIO_KEY(6, KEY_RESTART),
+};
+
+static const struct gpio_keys_button
+bcm47xx_buttons_linksys_wrt150nv11[] __initconst = {
+	BCM47XX_GPIO_KEY(4, KEY_WPS_BUTTON),
+	BCM47XX_GPIO_KEY(6, KEY_RESTART),
+};
+
+static const struct gpio_keys_button
+bcm47xx_buttons_linksys_wrt160nv1[] __initconst = {
+	BCM47XX_GPIO_KEY(4, KEY_WPS_BUTTON),
+	BCM47XX_GPIO_KEY(6, KEY_RESTART),
+};
+
+static const struct gpio_keys_button
+bcm47xx_buttons_linksys_wrt160nv3[] __initconst = {
+	BCM47XX_GPIO_KEY(5, KEY_WPS_BUTTON),
+	BCM47XX_GPIO_KEY(6, KEY_RESTART),
+};
+
+static const struct gpio_keys_button
+bcm47xx_buttons_linksys_wrt300nv11[] __initconst = {
+	BCM47XX_GPIO_KEY(4, KEY_UNKNOWN),
+	BCM47XX_GPIO_KEY(6, KEY_RESTART),
+};
+
+static const struct gpio_keys_button
+bcm47xx_buttons_linksys_wrt310nv1[] __initconst = {
+	BCM47XX_GPIO_KEY(6, KEY_RESTART),
+	BCM47XX_GPIO_KEY(8, KEY_UNKNOWN),
+};
+
+static const struct gpio_keys_button
+bcm47xx_buttons_linksys_wrt610nv1[] __initconst = {
+	BCM47XX_GPIO_KEY(6, KEY_RESTART),
+	BCM47XX_GPIO_KEY(8, KEY_WPS_BUTTON),
+};
+
+static const struct gpio_keys_button
+bcm47xx_buttons_linksys_wrt610nv2[] __initconst = {
+	BCM47XX_GPIO_KEY(4, KEY_WPS_BUTTON),
+	BCM47XX_GPIO_KEY(6, KEY_RESTART),
+};
+
+/* Motorola */
+
+static const struct gpio_keys_button
+bcm47xx_buttons_motorola_we800g[] __initconst = {
+	BCM47XX_GPIO_KEY(0, KEY_RESTART),
+};
+
+static const struct gpio_keys_button
+bcm47xx_buttons_motorola_wr850gp[] __initconst = {
+	BCM47XX_GPIO_KEY(5, KEY_RESTART),
+};
+
+static const struct gpio_keys_button
+bcm47xx_buttons_motorola_wr850gv2v3[] __initconst = {
+	BCM47XX_GPIO_KEY(5, KEY_RESTART),
+};
+
+/* Netgear */
+
+static const struct gpio_keys_button
+bcm47xx_buttons_netgear_wndr3400v1[] __initconst = {
+	BCM47XX_GPIO_KEY(4, KEY_RESTART),
+	BCM47XX_GPIO_KEY(6, KEY_WPS_BUTTON),
+	BCM47XX_GPIO_KEY(8, KEY_RFKILL),
+};
+
+static const struct gpio_keys_button
+bcm47xx_buttons_netgear_wndr3700v3[] __initconst = {
+	BCM47XX_GPIO_KEY(2, KEY_RFKILL),
+	BCM47XX_GPIO_KEY(3, KEY_RESTART),
+	BCM47XX_GPIO_KEY(4, KEY_WPS_BUTTON),
+};
+
+static const struct gpio_keys_button
+bcm47xx_buttons_netgear_wndr4500v1[] __initconst = {
+	BCM47XX_GPIO_KEY(4, KEY_WPS_BUTTON),
+	BCM47XX_GPIO_KEY(5, KEY_RFKILL),
+	BCM47XX_GPIO_KEY(6, KEY_RESTART),
+};
+
+static const struct gpio_keys_button
+bcm47xx_buttons_netgear_wnr834bv2[] __initconst = {
+	BCM47XX_GPIO_KEY(6, KEY_RESTART),
+};
+
+/* SimpleTech */
+
+static const struct gpio_keys_button
+bcm47xx_buttons_simpletech_simpleshare[] __initconst = {
+	BCM47XX_GPIO_KEY(0, KEY_RESTART),
+};
+
+/**************************************************
+ * Init
+ **************************************************/
+
+static struct gpio_keys_platform_data bcm47xx_button_pdata;
+
+static struct platform_device bcm47xx_buttons_gpio_keys = {
+	.name = "gpio-keys",
+	.dev = {
+		.platform_data = &bcm47xx_button_pdata,
+	}
+};
+
+/* Copy data from __initconst */
+static int __init bcm47xx_buttons_copy(const struct gpio_keys_button *buttons,
+				       size_t nbuttons)
+{
+	size_t size = nbuttons * sizeof(*buttons);
+
+	bcm47xx_button_pdata.buttons = kmalloc(size, GFP_KERNEL);
+	if (!bcm47xx_button_pdata.buttons)
+		return -ENOMEM;
+	memcpy(bcm47xx_button_pdata.buttons, buttons, size);
+	bcm47xx_button_pdata.nbuttons = nbuttons;
+
+	return 0;
+}
+
+#define bcm47xx_copy_bdata(dev_buttons)					\
+	bcm47xx_buttons_copy(dev_buttons, ARRAY_SIZE(dev_buttons));
+
+int __init bcm47xx_buttons_register(void)
+{
+	enum bcm47xx_board board = bcm47xx_board_get();
+	int err;
+
+	switch (board) {
+	case BCM47XX_BOARD_ASUS_RTN12:
+		err = bcm47xx_copy_bdata(bcm47xx_buttons_asus_rtn12);
+		break;
+	case BCM47XX_BOARD_ASUS_RTN16:
+		err = bcm47xx_copy_bdata(bcm47xx_buttons_asus_rtn16);
+		break;
+	case BCM47XX_BOARD_ASUS_RTN66U:
+		err = bcm47xx_copy_bdata(bcm47xx_buttons_asus_rtn66u);
+		break;
+	case BCM47XX_BOARD_ASUS_WL300G:
+		err = bcm47xx_copy_bdata(bcm47xx_buttons_asus_wl300g);
+		break;
+	case BCM47XX_BOARD_ASUS_WL320GE:
+		err = bcm47xx_copy_bdata(bcm47xx_buttons_asus_wl320ge);
+		break;
+	case BCM47XX_BOARD_ASUS_WL330GE:
+		err = bcm47xx_copy_bdata(bcm47xx_buttons_asus_wl330ge);
+		break;
+	case BCM47XX_BOARD_ASUS_WL500GD:
+		err = bcm47xx_copy_bdata(bcm47xx_buttons_asus_wl500gd);
+		break;
+	case BCM47XX_BOARD_ASUS_WL500GPV1:
+		err = bcm47xx_copy_bdata(bcm47xx_buttons_asus_wl500gpv1);
+		break;
+	case BCM47XX_BOARD_ASUS_WL500GPV2:
+		err = bcm47xx_copy_bdata(bcm47xx_buttons_asus_wl500gpv2);
+		break;
+	case BCM47XX_BOARD_ASUS_WL500W:
+		err = bcm47xx_copy_bdata(bcm47xx_buttons_asus_wl500w);
+		break;
+	case BCM47XX_BOARD_ASUS_WL520GC:
+		err = bcm47xx_copy_bdata(bcm47xx_buttons_asus_wl520gc);
+		break;
+	case BCM47XX_BOARD_ASUS_WL520GU:
+		err = bcm47xx_copy_bdata(bcm47xx_buttons_asus_wl520gu);
+		break;
+	case BCM47XX_BOARD_ASUS_WL700GE:
+		err = bcm47xx_copy_bdata(bcm47xx_buttons_asus_wl700ge);
+		break;
+	case BCM47XX_BOARD_ASUS_WLHDD:
+		err = bcm47xx_copy_bdata(bcm47xx_buttons_asus_wlhdd);
+		break;
+
+	case BCM47XX_BOARD_BELKIN_F7D4301:
+		err = bcm47xx_copy_bdata(bcm47xx_buttons_belkin_f7d4301);
+		break;
+
+	case BCM47XX_BOARD_BUFFALO_WHR2_A54G54:
+		err = bcm47xx_copy_bdata(bcm47xx_buttons_buffalo_whr2_a54g54);
+		break;
+	case BCM47XX_BOARD_BUFFALO_WHR_G125:
+		err = bcm47xx_copy_bdata(bcm47xx_buttons_buffalo_whr_g125);
+		break;
+	case BCM47XX_BOARD_BUFFALO_WHR_G54S:
+		err = bcm47xx_copy_bdata(bcm47xx_buttons_buffalo_whr_g54s);
+		break;
+	case BCM47XX_BOARD_BUFFALO_WHR_HP_G54:
+		err = bcm47xx_copy_bdata(bcm47xx_buttons_buffalo_whr_hp_g54);
+		break;
+	case BCM47XX_BOARD_BUFFALO_WZR_G300N:
+		err = bcm47xx_copy_bdata(bcm47xx_buttons_buffalo_wzr_g300n);
+		break;
+	case BCM47XX_BOARD_BUFFALO_WZR_RS_G54:
+		err = bcm47xx_copy_bdata(bcm47xx_buttons_buffalo_wzr_rs_g54);
+		break;
+	case BCM47XX_BOARD_BUFFALO_WZR_RS_G54HP:
+		err = bcm47xx_copy_bdata(bcm47xx_buttons_buffalo_wzr_rs_g54hp);
+		break;
+
+	case BCM47XX_BOARD_DELL_TM2300:
+		err = bcm47xx_copy_bdata(bcm47xx_buttons_dell_tm2300);
+		break;
+
+	case BCM47XX_BOARD_DLINK_DIR130:
+		err = bcm47xx_copy_bdata(bcm47xx_buttons_dlink_dir130);
+		break;
+	case BCM47XX_BOARD_DLINK_DIR330:
+		err = bcm47xx_copy_bdata(bcm47xx_buttons_dlink_dir330);
+		break;
+
+	case BCM47XX_BOARD_HUAWEI_E970:
+		err = bcm47xx_copy_bdata(bcm47xx_buttons_huawei_e970);
+		break;
+
+	case BCM47XX_BOARD_LINKSYS_E1000V1:
+		err = bcm47xx_copy_bdata(bcm47xx_buttons_linksys_e1000v1);
+		break;
+	case BCM47XX_BOARD_LINKSYS_E1000V21:
+		err = bcm47xx_copy_bdata(bcm47xx_buttons_linksys_e1000v21);
+		break;
+	case BCM47XX_BOARD_LINKSYS_E2000V1:
+		err = bcm47xx_copy_bdata(bcm47xx_buttons_linksys_e2000v1);
+		break;
+	case BCM47XX_BOARD_LINKSYS_E3000V1:
+		err = bcm47xx_copy_bdata(bcm47xx_buttons_linksys_e3000v1);
+		break;
+	case BCM47XX_BOARD_LINKSYS_E3200V1:
+		err = bcm47xx_copy_bdata(bcm47xx_buttons_linksys_e3200v1);
+		break;
+	case BCM47XX_BOARD_LINKSYS_E4200V1:
+		err = bcm47xx_copy_bdata(bcm47xx_buttons_linksys_e4200v1);
+		break;
+	case BCM47XX_BOARD_LINKSYS_WRT150NV1:
+		err = bcm47xx_copy_bdata(bcm47xx_buttons_linksys_wrt150nv1);
+		break;
+	case BCM47XX_BOARD_LINKSYS_WRT150NV11:
+		err = bcm47xx_copy_bdata(bcm47xx_buttons_linksys_wrt150nv11);
+		break;
+	case BCM47XX_BOARD_LINKSYS_WRT160NV1:
+		err = bcm47xx_copy_bdata(bcm47xx_buttons_linksys_wrt160nv1);
+		break;
+	case BCM47XX_BOARD_LINKSYS_WRT160NV3:
+		err = bcm47xx_copy_bdata(bcm47xx_buttons_linksys_wrt160nv3);
+		break;
+	case BCM47XX_BOARD_LINKSYS_WRT300NV11:
+		err = bcm47xx_copy_bdata(bcm47xx_buttons_linksys_wrt300nv11);
+		break;
+	case BCM47XX_BOARD_LINKSYS_WRT310NV1:
+		err = bcm47xx_copy_bdata(bcm47xx_buttons_linksys_wrt310nv1);
+		break;
+	case BCM47XX_BOARD_LINKSYS_WRT610NV1:
+		err = bcm47xx_copy_bdata(bcm47xx_buttons_linksys_wrt610nv1);
+		break;
+	case BCM47XX_BOARD_LINKSYS_WRT610NV2:
+		err = bcm47xx_copy_bdata(bcm47xx_buttons_linksys_wrt610nv2);
+		break;
+
+	case BCM47XX_BOARD_MOTOROLA_WE800G:
+		err = bcm47xx_copy_bdata(bcm47xx_buttons_motorola_we800g);
+		break;
+	case BCM47XX_BOARD_MOTOROLA_WR850GP:
+		err = bcm47xx_copy_bdata(bcm47xx_buttons_motorola_wr850gp);
+		break;
+	case BCM47XX_BOARD_MOTOROLA_WR850GV2V3:
+		err = bcm47xx_copy_bdata(bcm47xx_buttons_motorola_wr850gv2v3);
+		break;
+
+	case BCM47XX_BOARD_NETGEAR_WNDR3400V1:
+		err = bcm47xx_copy_bdata(bcm47xx_buttons_netgear_wndr3400v1);
+		break;
+	case BCM47XX_BOARD_NETGEAR_WNDR3700V3:
+		err = bcm47xx_copy_bdata(bcm47xx_buttons_netgear_wndr3700v3);
+		break;
+	case BCM47XX_BOARD_NETGEAR_WNDR4500V1:
+		err = bcm47xx_copy_bdata(bcm47xx_buttons_netgear_wndr4500v1);
+		break;
+	case BCM47XX_BOARD_NETGEAR_WNR834BV2:
+		err = bcm47xx_copy_bdata(bcm47xx_buttons_netgear_wnr834bv2);
+		break;
+
+	case BCM47XX_BOARD_SIMPLETECH_SIMPLESHARE:
+		err = bcm47xx_copy_bdata(bcm47xx_buttons_simpletech_simpleshare);
+		break;
+
+	default:
+		pr_debug("No buttons configuration found for this device\n");
+		return -ENOTSUPP;
+	}
+
+	if (err)
+		return -ENOMEM;
+
+	err = platform_device_register(&bcm47xx_buttons_gpio_keys);
+	if (err) {
+		pr_err("Failed to register platform device: %d\n", err);
+		return err;
+	}
+
+	return 0;
+}
diff --git a/arch/mips/bcm47xx/irq.c b/arch/mips/bcm47xx/irq.c
index 8cf3833..e0585b7 100644
--- a/arch/mips/bcm47xx/irq.c
+++ b/arch/mips/bcm47xx/irq.c
@@ -25,10 +25,11 @@
 #include <linux/types.h>
 #include <linux/interrupt.h>
 #include <linux/irq.h>
+#include <asm/setup.h>
 #include <asm/irq_cpu.h>
 #include <bcm47xx.h>
 
-void plat_irq_dispatch(void)
+asmlinkage void plat_irq_dispatch(void)
 {
 	u32 cause;
 
@@ -50,6 +51,18 @@
 		do_IRQ(6);
 }
 
+#define DEFINE_HWx_IRQDISPATCH(x)					\
+	static void bcm47xx_hw ## x ## _irqdispatch(void)		\
+	{								\
+		do_IRQ(x);						\
+	}
+DEFINE_HWx_IRQDISPATCH(2)
+DEFINE_HWx_IRQDISPATCH(3)
+DEFINE_HWx_IRQDISPATCH(4)
+DEFINE_HWx_IRQDISPATCH(5)
+DEFINE_HWx_IRQDISPATCH(6)
+DEFINE_HWx_IRQDISPATCH(7)
+
 void __init arch_init_irq(void)
 {
 #ifdef CONFIG_BCM47XX_BCMA
@@ -64,4 +77,14 @@
 	}
 #endif
 	mips_cpu_irq_init();
+
+	if (cpu_has_vint) {
+		pr_info("Setting up vectored interrupts\n");
+		set_vi_handler(2, bcm47xx_hw2_irqdispatch);
+		set_vi_handler(3, bcm47xx_hw3_irqdispatch);
+		set_vi_handler(4, bcm47xx_hw4_irqdispatch);
+		set_vi_handler(5, bcm47xx_hw5_irqdispatch);
+		set_vi_handler(6, bcm47xx_hw6_irqdispatch);
+		set_vi_handler(7, bcm47xx_hw7_irqdispatch);
+	}
 }
diff --git a/arch/mips/bcm47xx/leds.c b/arch/mips/bcm47xx/leds.c
new file mode 100644
index 0000000..647d155
--- /dev/null
+++ b/arch/mips/bcm47xx/leds.c
@@ -0,0 +1,542 @@
+#include "bcm47xx_private.h"
+
+#include <linux/leds.h>
+#include <bcm47xx_board.h>
+
+/**************************************************
+ * Database
+ **************************************************/
+
+#define BCM47XX_GPIO_LED(_gpio, _color, _function, _active_low,		\
+			 _default_state)				\
+	{								\
+		.name		= "bcm47xx:" _color ":" _function,	\
+		.gpio		= _gpio,				\
+		.active_low	= _active_low,				\
+		.default_state	= _default_state,			\
+	}
+
+#define BCM47XX_GPIO_LED_TRIGGER(_gpio, _color, _function, _active_low,	\
+				 _default_trigger)			\
+	{								\
+		.name		= "bcm47xx:" _color ":" _function,	\
+		.gpio		= _gpio,				\
+		.active_low	= _active_low,				\
+		.default_state	= LEDS_GPIO_DEFSTATE_OFF,		\
+		.default_trigger	= _default_trigger,		\
+	}
+
+/* Asus */
+
+static const struct gpio_led
+bcm47xx_leds_asus_rtn12[] __initconst = {
+	BCM47XX_GPIO_LED(2, "unk", "power", 1, LEDS_GPIO_DEFSTATE_ON),
+	BCM47XX_GPIO_LED(7, "unk", "wlan", 0, LEDS_GPIO_DEFSTATE_OFF),
+};
+
+static const struct gpio_led
+bcm47xx_leds_asus_rtn16[] __initconst = {
+	BCM47XX_GPIO_LED(1, "blue", "power", 1, LEDS_GPIO_DEFSTATE_ON),
+	BCM47XX_GPIO_LED(7, "blue", "wlan", 0, LEDS_GPIO_DEFSTATE_OFF),
+};
+
+static const struct gpio_led
+bcm47xx_leds_asus_rtn66u[] __initconst = {
+	BCM47XX_GPIO_LED(12, "unk", "power", 1, LEDS_GPIO_DEFSTATE_ON),
+	BCM47XX_GPIO_LED(15, "unk", "usb", 1, LEDS_GPIO_DEFSTATE_OFF),
+};
+
+static const struct gpio_led
+bcm47xx_leds_asus_wl300g[] __initconst = {
+	BCM47XX_GPIO_LED(0, "unk", "power", 1, LEDS_GPIO_DEFSTATE_ON),
+};
+
+static const struct gpio_led
+bcm47xx_leds_asus_wl320ge[] __initconst = {
+	BCM47XX_GPIO_LED(0, "unk", "wlan", 1, LEDS_GPIO_DEFSTATE_OFF),
+	BCM47XX_GPIO_LED(2, "unk", "power", 1, LEDS_GPIO_DEFSTATE_ON),
+	BCM47XX_GPIO_LED(11, "unk", "link", 1, LEDS_GPIO_DEFSTATE_OFF),
+};
+
+static const struct gpio_led
+bcm47xx_leds_asus_wl330ge[] __initconst = {
+	BCM47XX_GPIO_LED(0, "unk", "power", 1, LEDS_GPIO_DEFSTATE_ON),
+};
+
+static const struct gpio_led
+bcm47xx_leds_asus_wl500gd[] __initconst = {
+	BCM47XX_GPIO_LED(0, "unk", "power", 1, LEDS_GPIO_DEFSTATE_ON),
+};
+
+static const struct gpio_led
+bcm47xx_leds_asus_wl500gpv1[] __initconst = {
+	BCM47XX_GPIO_LED(1, "unk", "power", 1, LEDS_GPIO_DEFSTATE_ON),
+};
+
+static const struct gpio_led
+bcm47xx_leds_asus_wl500gpv2[] __initconst = {
+	BCM47XX_GPIO_LED(0, "unk", "power", 1, LEDS_GPIO_DEFSTATE_ON),
+	BCM47XX_GPIO_LED(1, "unk", "wlan", 1, LEDS_GPIO_DEFSTATE_OFF),
+};
+
+static const struct gpio_led
+bcm47xx_leds_asus_wl500w[] __initconst = {
+	BCM47XX_GPIO_LED(5, "unk", "power", 1, LEDS_GPIO_DEFSTATE_ON),
+};
+
+static const struct gpio_led
+bcm47xx_leds_asus_wl520gc[] __initconst = {
+	BCM47XX_GPIO_LED(0, "unk", "power", 1, LEDS_GPIO_DEFSTATE_ON),
+	BCM47XX_GPIO_LED(1, "unk", "wlan", 1, LEDS_GPIO_DEFSTATE_OFF),
+};
+
+static const struct gpio_led
+bcm47xx_leds_asus_wl520gu[] __initconst = {
+	BCM47XX_GPIO_LED(0, "unk", "power", 1, LEDS_GPIO_DEFSTATE_ON),
+	BCM47XX_GPIO_LED(1, "unk", "wlan", 1, LEDS_GPIO_DEFSTATE_OFF),
+};
+
+static const struct gpio_led
+bcm47xx_leds_asus_wl700ge[] __initconst = {
+	BCM47XX_GPIO_LED(1, "unk", "power", 1, LEDS_GPIO_DEFSTATE_ON), /* Labeled "READY" (there is no "power" LED). Originally ON, flashing on USB activity. */
+};
+
+static const struct gpio_led
+bcm47xx_leds_asus_wlhdd[] __initconst = {
+	BCM47XX_GPIO_LED(0, "unk", "power", 1, LEDS_GPIO_DEFSTATE_ON),
+	BCM47XX_GPIO_LED(2, "unk", "usb", 1, LEDS_GPIO_DEFSTATE_OFF),
+};
+
+/* Belkin */
+
+static const struct gpio_led
+bcm47xx_leds_belkin_f7d4301[] __initconst = {
+	BCM47XX_GPIO_LED(10, "green", "power", 1, LEDS_GPIO_DEFSTATE_ON),
+	BCM47XX_GPIO_LED(11, "amber", "power", 1, LEDS_GPIO_DEFSTATE_OFF),
+	BCM47XX_GPIO_LED(12, "unk", "wps", 1, LEDS_GPIO_DEFSTATE_OFF),
+	BCM47XX_GPIO_LED(13, "unk", "wlan", 1, LEDS_GPIO_DEFSTATE_OFF),
+	BCM47XX_GPIO_LED(14, "unk", "usb0", 1, LEDS_GPIO_DEFSTATE_OFF),
+	BCM47XX_GPIO_LED(15, "unk", "usb1", 1, LEDS_GPIO_DEFSTATE_OFF),
+};
+
+/* Buffalo */
+
+static const struct gpio_led
+bcm47xx_leds_buffalo_whr2_a54g54[] __initconst = {
+	BCM47XX_GPIO_LED(7, "unk", "diag", 1, LEDS_GPIO_DEFSTATE_OFF),
+};
+
+static const struct gpio_led
+bcm47xx_leds_buffalo_whr_g125[] __initconst = {
+	BCM47XX_GPIO_LED(1, "unk", "bridge", 1, LEDS_GPIO_DEFSTATE_OFF),
+	BCM47XX_GPIO_LED(2, "unk", "wlan", 1, LEDS_GPIO_DEFSTATE_OFF),
+	BCM47XX_GPIO_LED(3, "unk", "internal", 1, LEDS_GPIO_DEFSTATE_OFF),
+	BCM47XX_GPIO_LED(6, "unk", "wps", 1, LEDS_GPIO_DEFSTATE_OFF),
+	BCM47XX_GPIO_LED(7, "unk", "diag", 1, LEDS_GPIO_DEFSTATE_OFF),
+};
+
+static const struct gpio_led
+bcm47xx_leds_buffalo_whr_g54s[] __initconst = {
+	BCM47XX_GPIO_LED(1, "unk", "bridge", 1, LEDS_GPIO_DEFSTATE_OFF),
+	BCM47XX_GPIO_LED(2, "unk", "wlan", 1, LEDS_GPIO_DEFSTATE_OFF),
+	BCM47XX_GPIO_LED(3, "unk", "internal", 1, LEDS_GPIO_DEFSTATE_OFF),
+	BCM47XX_GPIO_LED(6, "unk", "wps", 1, LEDS_GPIO_DEFSTATE_OFF),
+	BCM47XX_GPIO_LED(7, "unk", "diag", 1, LEDS_GPIO_DEFSTATE_OFF),
+};
+
+static const struct gpio_led
+bcm47xx_leds_buffalo_whr_hp_g54[] __initconst = {
+	BCM47XX_GPIO_LED(1, "unk", "bridge", 1, LEDS_GPIO_DEFSTATE_OFF),
+	BCM47XX_GPIO_LED(2, "unk", "wlan", 1, LEDS_GPIO_DEFSTATE_OFF),
+	BCM47XX_GPIO_LED(3, "unk", "internal", 1, LEDS_GPIO_DEFSTATE_OFF),
+	BCM47XX_GPIO_LED(6, "unk", "wps", 1, LEDS_GPIO_DEFSTATE_OFF),
+	BCM47XX_GPIO_LED(7, "unk", "diag", 1, LEDS_GPIO_DEFSTATE_OFF),
+};
+
+static const struct gpio_led
+bcm47xx_leds_buffalo_wzr_g300n[] __initconst = {
+	BCM47XX_GPIO_LED(1, "unk", "bridge", 1, LEDS_GPIO_DEFSTATE_OFF),
+	BCM47XX_GPIO_LED(6, "unk", "wps", 1, LEDS_GPIO_DEFSTATE_OFF),
+	BCM47XX_GPIO_LED(7, "unk", "diag", 1, LEDS_GPIO_DEFSTATE_OFF),
+};
+
+static const struct gpio_led
+bcm47xx_leds_buffalo_wzr_rs_g54[] __initconst = {
+	BCM47XX_GPIO_LED(6, "unk", "wps", 1, LEDS_GPIO_DEFSTATE_OFF),
+	BCM47XX_GPIO_LED(1, "unk", "vpn", 1, LEDS_GPIO_DEFSTATE_OFF),
+	BCM47XX_GPIO_LED(7, "unk", "diag", 1, LEDS_GPIO_DEFSTATE_OFF),
+};
+
+static const struct gpio_led
+bcm47xx_leds_buffalo_wzr_rs_g54hp[] __initconst = {
+	BCM47XX_GPIO_LED(6, "unk", "wps", 1, LEDS_GPIO_DEFSTATE_OFF),
+	BCM47XX_GPIO_LED(1, "unk", "vpn", 1, LEDS_GPIO_DEFSTATE_OFF),
+	BCM47XX_GPIO_LED(7, "unk", "diag", 1, LEDS_GPIO_DEFSTATE_OFF),
+};
+
+/* Dell */
+
+static const struct gpio_led
+bcm47xx_leds_dell_tm2300[] __initconst = {
+	BCM47XX_GPIO_LED(6, "unk", "wlan", 1, LEDS_GPIO_DEFSTATE_OFF),
+	BCM47XX_GPIO_LED(7, "unk", "power", 1, LEDS_GPIO_DEFSTATE_ON),
+};
+
+/* D-Link */
+
+static const struct gpio_led
+bcm47xx_leds_dlink_dir130[] __initconst = {
+	BCM47XX_GPIO_LED_TRIGGER(0, "green", "status", 1, "timer"), /* Originally blinking when device is ready, separated from "power" LED */
+	BCM47XX_GPIO_LED(6, "blue", "unk", 1, LEDS_GPIO_DEFSTATE_OFF),
+};
+
+static const struct gpio_led
+bcm47xx_leds_dlink_dir330[] __initconst = {
+	BCM47XX_GPIO_LED_TRIGGER(0, "green", "status", 1, "timer"), /* Originally blinking when device is ready, separated from "power" LED */
+	BCM47XX_GPIO_LED(4, "unk", "usb", 1, LEDS_GPIO_DEFSTATE_OFF),
+	BCM47XX_GPIO_LED(6, "blue", "unk", 1, LEDS_GPIO_DEFSTATE_OFF),
+};
+
+/* Huawei */
+
+static const struct gpio_led
+bcm47xx_leds_huawei_e970[] __initconst = {
+	BCM47XX_GPIO_LED(0, "unk", "wlan", 0, LEDS_GPIO_DEFSTATE_OFF),
+};
+
+/* Linksys */
+
+static const struct gpio_led
+bcm47xx_leds_linksys_e1000v1[] __initconst = {
+	BCM47XX_GPIO_LED(0, "blue", "wlan", 0, LEDS_GPIO_DEFSTATE_OFF),
+	BCM47XX_GPIO_LED(1, "blue", "power", 0, LEDS_GPIO_DEFSTATE_ON),
+	BCM47XX_GPIO_LED(2, "amber", "wps", 1, LEDS_GPIO_DEFSTATE_OFF),
+	BCM47XX_GPIO_LED(4, "blue", "wps", 1, LEDS_GPIO_DEFSTATE_OFF),
+};
+
+static const struct gpio_led
+bcm47xx_leds_linksys_e1000v21[] __initconst = {
+	BCM47XX_GPIO_LED(5, "unk", "wlan", 0, LEDS_GPIO_DEFSTATE_OFF),
+	BCM47XX_GPIO_LED(6, "unk", "power", 1, LEDS_GPIO_DEFSTATE_ON),
+	BCM47XX_GPIO_LED(7, "amber", "wps", 0, LEDS_GPIO_DEFSTATE_OFF),
+	BCM47XX_GPIO_LED(8, "blue", "wps", 0, LEDS_GPIO_DEFSTATE_OFF),
+};
+
+static const struct gpio_led
+bcm47xx_leds_linksys_e2000v1[] __initconst = {
+	BCM47XX_GPIO_LED(1, "blue", "wlan", 0, LEDS_GPIO_DEFSTATE_OFF),
+	BCM47XX_GPIO_LED(2, "blue", "power", 0, LEDS_GPIO_DEFSTATE_ON),
+	BCM47XX_GPIO_LED(3, "blue", "wps", 1, LEDS_GPIO_DEFSTATE_OFF),
+	BCM47XX_GPIO_LED(4, "amber", "wps", 1, LEDS_GPIO_DEFSTATE_OFF),
+};
+
+static const struct gpio_led
+bcm47xx_leds_linksys_e3000v1[] __initconst = {
+	BCM47XX_GPIO_LED(0, "amber", "wps", 1, LEDS_GPIO_DEFSTATE_OFF),
+	BCM47XX_GPIO_LED(1, "unk", "wlan", 0, LEDS_GPIO_DEFSTATE_OFF),
+	BCM47XX_GPIO_LED(3, "blue", "wps", 1, LEDS_GPIO_DEFSTATE_OFF),
+	BCM47XX_GPIO_LED(5, "unk", "power", 0, LEDS_GPIO_DEFSTATE_ON),
+	BCM47XX_GPIO_LED(7, "unk", "usb", 0, LEDS_GPIO_DEFSTATE_OFF),
+};
+
+static const struct gpio_led
+bcm47xx_leds_linksys_e3200v1[] __initconst = {
+	BCM47XX_GPIO_LED(3, "green", "power", 1, LEDS_GPIO_DEFSTATE_ON),
+};
+
+static const struct gpio_led
+bcm47xx_leds_linksys_e4200v1[] __initconst = {
+	BCM47XX_GPIO_LED(5, "white", "power", 1, LEDS_GPIO_DEFSTATE_ON),
+};
+
+static const struct gpio_led
+bcm47xx_leds_linksys_wrt150nv1[] __initconst = {
+	BCM47XX_GPIO_LED(1, "unk", "power", 0, LEDS_GPIO_DEFSTATE_ON),
+	BCM47XX_GPIO_LED(3, "amber", "wps", 1, LEDS_GPIO_DEFSTATE_OFF),
+	BCM47XX_GPIO_LED(5, "green", "wps", 1, LEDS_GPIO_DEFSTATE_OFF),
+};
+
+static const struct gpio_led
+bcm47xx_leds_linksys_wrt150nv11[] __initconst = {
+	BCM47XX_GPIO_LED(1, "unk", "power", 0, LEDS_GPIO_DEFSTATE_ON),
+	BCM47XX_GPIO_LED(3, "amber", "wps", 1, LEDS_GPIO_DEFSTATE_OFF),
+	BCM47XX_GPIO_LED(5, "green", "wps", 1, LEDS_GPIO_DEFSTATE_OFF),
+};
+
+static const struct gpio_led
+bcm47xx_leds_linksys_wrt160nv1[] __initconst = {
+	BCM47XX_GPIO_LED(1, "unk", "power", 0, LEDS_GPIO_DEFSTATE_ON),
+	BCM47XX_GPIO_LED(3, "amber", "wps", 1, LEDS_GPIO_DEFSTATE_OFF),
+	BCM47XX_GPIO_LED(5, "blue", "wps", 1, LEDS_GPIO_DEFSTATE_OFF),
+};
+
+static const struct gpio_led
+bcm47xx_leds_linksys_wrt160nv3[] __initconst = {
+	BCM47XX_GPIO_LED(1, "unk", "power", 0, LEDS_GPIO_DEFSTATE_ON),
+	BCM47XX_GPIO_LED(2, "amber", "wps", 1, LEDS_GPIO_DEFSTATE_OFF),
+	BCM47XX_GPIO_LED(4, "blue", "wps", 1, LEDS_GPIO_DEFSTATE_OFF),
+};
+
+static const struct gpio_led
+bcm47xx_leds_linksys_wrt300nv11[] __initconst = {
+	BCM47XX_GPIO_LED(1, "unk", "power", 0, LEDS_GPIO_DEFSTATE_ON),
+	BCM47XX_GPIO_LED(3, "amber", "wps", 1, LEDS_GPIO_DEFSTATE_OFF),
+	BCM47XX_GPIO_LED(5, "green", "wps", 1, LEDS_GPIO_DEFSTATE_OFF),
+};
+
+static const struct gpio_led
+bcm47xx_leds_linksys_wrt310nv1[] __initconst = {
+	BCM47XX_GPIO_LED(1, "blue", "power", 0, LEDS_GPIO_DEFSTATE_ON),
+	BCM47XX_GPIO_LED(3, "amber", "wps", 1, LEDS_GPIO_DEFSTATE_OFF),
+	BCM47XX_GPIO_LED(9, "blue", "wps", 1, LEDS_GPIO_DEFSTATE_OFF),
+};
+
+static const struct gpio_led
+bcm47xx_leds_linksys_wrt610nv1[] __initconst = {
+	BCM47XX_GPIO_LED(0, "unk", "usb",  1, LEDS_GPIO_DEFSTATE_OFF),
+	BCM47XX_GPIO_LED(1, "unk", "power",  0, LEDS_GPIO_DEFSTATE_OFF),
+	BCM47XX_GPIO_LED(3, "amber", "wps",  1, LEDS_GPIO_DEFSTATE_OFF),
+	BCM47XX_GPIO_LED(9, "blue", "wps",  1, LEDS_GPIO_DEFSTATE_OFF),
+};
+
+static const struct gpio_led
+bcm47xx_leds_linksys_wrt610nv2[] __initconst = {
+	BCM47XX_GPIO_LED(0, "amber", "wps", 1, LEDS_GPIO_DEFSTATE_OFF),
+	BCM47XX_GPIO_LED(1, "unk", "wlan", 0, LEDS_GPIO_DEFSTATE_OFF),
+	BCM47XX_GPIO_LED(3, "blue", "wps", 1, LEDS_GPIO_DEFSTATE_OFF),
+	BCM47XX_GPIO_LED(5, "unk", "power", 0, LEDS_GPIO_DEFSTATE_ON),
+	BCM47XX_GPIO_LED(7, "unk", "usb", 0, LEDS_GPIO_DEFSTATE_OFF),
+};
+
+/* Motorola */
+
+static const struct gpio_led
+bcm47xx_leds_motorola_we800g[] __initconst = {
+	BCM47XX_GPIO_LED(1, "amber", "wlan", 0, LEDS_GPIO_DEFSTATE_OFF),
+	BCM47XX_GPIO_LED(2, "unk", "unk", 1, LEDS_GPIO_DEFSTATE_OFF), /* There are only 3 LEDs: Power, Wireless and Device (ethernet) */
+	BCM47XX_GPIO_LED(4, "green", "power", 0, LEDS_GPIO_DEFSTATE_ON),
+};
+
+static const struct gpio_led
+bcm47xx_leds_motorola_wr850gp[] __initconst = {
+	BCM47XX_GPIO_LED(0, "unk", "wlan", 1, LEDS_GPIO_DEFSTATE_OFF),
+	BCM47XX_GPIO_LED(1, "unk", "power", 0, LEDS_GPIO_DEFSTATE_ON),
+	BCM47XX_GPIO_LED(6, "unk", "dmz", 1, LEDS_GPIO_DEFSTATE_OFF),
+	BCM47XX_GPIO_LED(7, "unk", "diag", 1, LEDS_GPIO_DEFSTATE_OFF),
+};
+
+static const struct gpio_led
+bcm47xx_leds_motorola_wr850gv2v3[] __initconst = {
+	BCM47XX_GPIO_LED(0, "unk", "wlan", 1, LEDS_GPIO_DEFSTATE_OFF),
+	BCM47XX_GPIO_LED(1, "unk", "power", 0, LEDS_GPIO_DEFSTATE_ON),
+	BCM47XX_GPIO_LED(7, "unk", "diag", 1, LEDS_GPIO_DEFSTATE_OFF),
+};
+
+/* Netgear */
+
+static const struct gpio_led
+bcm47xx_leds_netgear_wndr3400v1[] __initconst = {
+	BCM47XX_GPIO_LED(2, "green", "usb", 1, LEDS_GPIO_DEFSTATE_OFF),
+	BCM47XX_GPIO_LED(3, "green", "power", 0, LEDS_GPIO_DEFSTATE_ON),
+	BCM47XX_GPIO_LED(7, "amber", "power", 0, LEDS_GPIO_DEFSTATE_OFF),
+};
+
+static const struct gpio_led
+bcm47xx_leds_netgear_wndr4500v1[] __initconst = {
+	BCM47XX_GPIO_LED(1, "green", "wps", 1, LEDS_GPIO_DEFSTATE_OFF),
+	BCM47XX_GPIO_LED(2, "green", "power", 1, LEDS_GPIO_DEFSTATE_ON),
+	BCM47XX_GPIO_LED(3, "amber", "power", 1, LEDS_GPIO_DEFSTATE_OFF),
+	BCM47XX_GPIO_LED(8, "green", "usb1", 1, LEDS_GPIO_DEFSTATE_OFF),
+	BCM47XX_GPIO_LED(9, "green", "2ghz", 1, LEDS_GPIO_DEFSTATE_OFF),
+	BCM47XX_GPIO_LED(11, "blue", "5ghz", 1, LEDS_GPIO_DEFSTATE_OFF),
+	BCM47XX_GPIO_LED(14, "green", "usb2", 1, LEDS_GPIO_DEFSTATE_OFF),
+};
+
+static const struct gpio_led
+bcm47xx_leds_netgear_wnr834bv2[] __initconst = {
+	BCM47XX_GPIO_LED(2, "green", "power", 0, LEDS_GPIO_DEFSTATE_ON),
+	BCM47XX_GPIO_LED(3, "amber", "power", 0, LEDS_GPIO_DEFSTATE_OFF),
+	BCM47XX_GPIO_LED(7, "unk", "connected", 0, LEDS_GPIO_DEFSTATE_OFF),
+};
+
+/* SimpleTech */
+
+static const struct gpio_led
+bcm47xx_leds_simpletech_simpleshare[] __initconst = {
+	BCM47XX_GPIO_LED(1, "unk", "status", 1, LEDS_GPIO_DEFSTATE_OFF), /* "Ready" LED */
+};
+
+/**************************************************
+ * Init
+ **************************************************/
+
+static struct gpio_led_platform_data bcm47xx_leds_pdata;
+
+#define bcm47xx_set_pdata(dev_leds) do {				\
+	bcm47xx_leds_pdata.leds = dev_leds;				\
+	bcm47xx_leds_pdata.num_leds = ARRAY_SIZE(dev_leds);		\
+} while (0)
+
+void __init bcm47xx_leds_register(void)
+{
+	enum bcm47xx_board board = bcm47xx_board_get();
+
+	switch (board) {
+	case BCM47XX_BOARD_ASUS_RTN12:
+		bcm47xx_set_pdata(bcm47xx_leds_asus_rtn12);
+		break;
+	case BCM47XX_BOARD_ASUS_RTN16:
+		bcm47xx_set_pdata(bcm47xx_leds_asus_rtn16);
+		break;
+	case BCM47XX_BOARD_ASUS_RTN66U:
+		bcm47xx_set_pdata(bcm47xx_leds_asus_rtn66u);
+		break;
+	case BCM47XX_BOARD_ASUS_WL300G:
+		bcm47xx_set_pdata(bcm47xx_leds_asus_wl300g);
+		break;
+	case BCM47XX_BOARD_ASUS_WL320GE:
+		bcm47xx_set_pdata(bcm47xx_leds_asus_wl320ge);
+		break;
+	case BCM47XX_BOARD_ASUS_WL330GE:
+		bcm47xx_set_pdata(bcm47xx_leds_asus_wl330ge);
+		break;
+	case BCM47XX_BOARD_ASUS_WL500GD:
+		bcm47xx_set_pdata(bcm47xx_leds_asus_wl500gd);
+		break;
+	case BCM47XX_BOARD_ASUS_WL500GPV1:
+		bcm47xx_set_pdata(bcm47xx_leds_asus_wl500gpv1);
+		break;
+	case BCM47XX_BOARD_ASUS_WL500GPV2:
+		bcm47xx_set_pdata(bcm47xx_leds_asus_wl500gpv2);
+		break;
+	case BCM47XX_BOARD_ASUS_WL500W:
+		bcm47xx_set_pdata(bcm47xx_leds_asus_wl500w);
+		break;
+	case BCM47XX_BOARD_ASUS_WL520GC:
+		bcm47xx_set_pdata(bcm47xx_leds_asus_wl520gc);
+		break;
+	case BCM47XX_BOARD_ASUS_WL520GU:
+		bcm47xx_set_pdata(bcm47xx_leds_asus_wl520gu);
+		break;
+	case BCM47XX_BOARD_ASUS_WL700GE:
+		bcm47xx_set_pdata(bcm47xx_leds_asus_wl700ge);
+		break;
+	case BCM47XX_BOARD_ASUS_WLHDD:
+		bcm47xx_set_pdata(bcm47xx_leds_asus_wlhdd);
+		break;
+
+	case BCM47XX_BOARD_BELKIN_F7D4301:
+		bcm47xx_set_pdata(bcm47xx_leds_belkin_f7d4301);
+		break;
+
+	case BCM47XX_BOARD_BUFFALO_WHR2_A54G54:
+		bcm47xx_set_pdata(bcm47xx_leds_buffalo_whr2_a54g54);
+		break;
+	case BCM47XX_BOARD_BUFFALO_WHR_G125:
+		bcm47xx_set_pdata(bcm47xx_leds_buffalo_whr_g125);
+		break;
+	case BCM47XX_BOARD_BUFFALO_WHR_G54S:
+		bcm47xx_set_pdata(bcm47xx_leds_buffalo_whr_g54s);
+		break;
+	case BCM47XX_BOARD_BUFFALO_WHR_HP_G54:
+		bcm47xx_set_pdata(bcm47xx_leds_buffalo_whr_hp_g54);
+		break;
+	case BCM47XX_BOARD_BUFFALO_WZR_G300N:
+		bcm47xx_set_pdata(bcm47xx_leds_buffalo_wzr_g300n);
+		break;
+	case BCM47XX_BOARD_BUFFALO_WZR_RS_G54:
+		bcm47xx_set_pdata(bcm47xx_leds_buffalo_wzr_rs_g54);
+		break;
+	case BCM47XX_BOARD_BUFFALO_WZR_RS_G54HP:
+		bcm47xx_set_pdata(bcm47xx_leds_buffalo_wzr_rs_g54hp);
+		break;
+
+	case BCM47XX_BOARD_DELL_TM2300:
+		bcm47xx_set_pdata(bcm47xx_leds_dell_tm2300);
+		break;
+
+	case BCM47XX_BOARD_DLINK_DIR130:
+		bcm47xx_set_pdata(bcm47xx_leds_dlink_dir130);
+		break;
+	case BCM47XX_BOARD_DLINK_DIR330:
+		bcm47xx_set_pdata(bcm47xx_leds_dlink_dir330);
+		break;
+
+	case BCM47XX_BOARD_HUAWEI_E970:
+		bcm47xx_set_pdata(bcm47xx_leds_huawei_e970);
+		break;
+
+	case BCM47XX_BOARD_LINKSYS_E1000V1:
+		bcm47xx_set_pdata(bcm47xx_leds_linksys_e1000v1);
+		break;
+	case BCM47XX_BOARD_LINKSYS_E1000V21:
+		bcm47xx_set_pdata(bcm47xx_leds_linksys_e1000v21);
+		break;
+	case BCM47XX_BOARD_LINKSYS_E2000V1:
+		bcm47xx_set_pdata(bcm47xx_leds_linksys_e2000v1);
+		break;
+	case BCM47XX_BOARD_LINKSYS_E3000V1:
+		bcm47xx_set_pdata(bcm47xx_leds_linksys_e3000v1);
+		break;
+	case BCM47XX_BOARD_LINKSYS_E3200V1:
+		bcm47xx_set_pdata(bcm47xx_leds_linksys_e3200v1);
+		break;
+	case BCM47XX_BOARD_LINKSYS_E4200V1:
+		bcm47xx_set_pdata(bcm47xx_leds_linksys_e4200v1);
+		break;
+	case BCM47XX_BOARD_LINKSYS_WRT150NV1:
+		bcm47xx_set_pdata(bcm47xx_leds_linksys_wrt150nv1);
+		break;
+	case BCM47XX_BOARD_LINKSYS_WRT150NV11:
+		bcm47xx_set_pdata(bcm47xx_leds_linksys_wrt150nv11);
+		break;
+	case BCM47XX_BOARD_LINKSYS_WRT160NV1:
+		bcm47xx_set_pdata(bcm47xx_leds_linksys_wrt160nv1);
+		break;
+	case BCM47XX_BOARD_LINKSYS_WRT160NV3:
+		bcm47xx_set_pdata(bcm47xx_leds_linksys_wrt160nv3);
+		break;
+	case BCM47XX_BOARD_LINKSYS_WRT300NV11:
+		bcm47xx_set_pdata(bcm47xx_leds_linksys_wrt300nv11);
+		break;
+	case BCM47XX_BOARD_LINKSYS_WRT310NV1:
+		bcm47xx_set_pdata(bcm47xx_leds_linksys_wrt310nv1);
+		break;
+	case BCM47XX_BOARD_LINKSYS_WRT610NV1:
+		bcm47xx_set_pdata(bcm47xx_leds_linksys_wrt610nv1);
+		break;
+	case BCM47XX_BOARD_LINKSYS_WRT610NV2:
+		bcm47xx_set_pdata(bcm47xx_leds_linksys_wrt610nv2);
+		break;
+
+	case BCM47XX_BOARD_MOTOROLA_WE800G:
+		bcm47xx_set_pdata(bcm47xx_leds_motorola_we800g);
+		break;
+	case BCM47XX_BOARD_MOTOROLA_WR850GP:
+		bcm47xx_set_pdata(bcm47xx_leds_motorola_wr850gp);
+		break;
+	case BCM47XX_BOARD_MOTOROLA_WR850GV2V3:
+		bcm47xx_set_pdata(bcm47xx_leds_motorola_wr850gv2v3);
+		break;
+
+	case BCM47XX_BOARD_NETGEAR_WNDR3400V1:
+		bcm47xx_set_pdata(bcm47xx_leds_netgear_wndr3400v1);
+		break;
+	case BCM47XX_BOARD_NETGEAR_WNDR4500V1:
+		bcm47xx_set_pdata(bcm47xx_leds_netgear_wndr4500v1);
+		break;
+	case BCM47XX_BOARD_NETGEAR_WNR834BV2:
+		bcm47xx_set_pdata(bcm47xx_leds_netgear_wnr834bv2);
+		break;
+
+	case BCM47XX_BOARD_SIMPLETECH_SIMPLESHARE:
+		bcm47xx_set_pdata(bcm47xx_leds_simpletech_simpleshare);
+		break;
+
+	default:
+		pr_debug("No LEDs configuration found for this device\n");
+		return;
+	}
+
+	gpio_led_register_device(-1, &bcm47xx_leds_pdata);
+}
diff --git a/arch/mips/bcm47xx/nvram.c b/arch/mips/bcm47xx/nvram.c
index b4c585b..6decb27 100644
--- a/arch/mips/bcm47xx/nvram.c
+++ b/arch/mips/bcm47xx/nvram.c
@@ -11,7 +11,6 @@
  * option) any later version.
  */
 
-#include <linux/init.h>
 #include <linux/types.h>
 #include <linux/module.h>
 #include <linux/ssb/ssb.h>
@@ -22,11 +21,11 @@
 #include <asm/mach-bcm47xx/bcm47xx.h>
 
 static char nvram_buf[NVRAM_SPACE];
+static const u32 nvram_sizes[] = {0x8000, 0xF000, 0x10000};
 
 static u32 find_nvram_size(u32 end)
 {
 	struct nvram_header *header;
-	u32 nvram_sizes[] = {0x8000, 0xF000, 0x10000};
 	int i;
 
 	for (i = 0; i < ARRAY_SIZE(nvram_sizes); i++) {
diff --git a/arch/mips/bcm47xx/prom.c b/arch/mips/bcm47xx/prom.c
index 5cba318..0af808d 100644
--- a/arch/mips/bcm47xx/prom.c
+++ b/arch/mips/bcm47xx/prom.c
@@ -28,126 +28,27 @@
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/spinlock.h>
+#include <linux/ssb/ssb_driver_chipcommon.h>
+#include <linux/ssb/ssb_regs.h>
 #include <linux/smp.h>
 #include <asm/bootinfo.h>
-#include <asm/fw/cfe/cfe_api.h>
-#include <asm/fw/cfe/cfe_error.h>
 #include <bcm47xx.h>
 #include <bcm47xx_board.h>
 
-static int cfe_cons_handle;
 
-static u16 get_chip_id(void)
-{
-	switch (bcm47xx_bus_type) {
-#ifdef CONFIG_BCM47XX_SSB
-	case BCM47XX_BUS_TYPE_SSB:
-		return bcm47xx_bus.ssb.chip_id;
-#endif
-#ifdef CONFIG_BCM47XX_BCMA
-	case BCM47XX_BUS_TYPE_BCMA:
-		return bcm47xx_bus.bcma.bus.chipinfo.id;
-#endif
-	}
-	return 0;
-}
+static char bcm47xx_system_type[20] = "Broadcom BCM47XX";
 
 const char *get_system_type(void)
 {
-	static char buf[50];
-	u16 chip_id = get_chip_id();
-
-	snprintf(buf, sizeof(buf),
-		 (chip_id > 0x9999) ? "Broadcom BCM%d (%s)" :
-				      "Broadcom BCM%04X (%s)",
-		 chip_id, bcm47xx_board_get_name());
-
-	return buf;
+	return bcm47xx_system_type;
 }
 
-void prom_putchar(char c)
+__init void bcm47xx_set_system_type(u16 chip_id)
 {
-	while (cfe_write(cfe_cons_handle, &c, 1) == 0)
-		;
-}
-
-static __init void prom_init_cfe(void)
-{
-	uint32_t cfe_ept;
-	uint32_t cfe_handle;
-	uint32_t cfe_eptseal;
-	int argc = fw_arg0;
-	char **envp = (char **) fw_arg2;
-	int *prom_vec = (int *) fw_arg3;
-
-	/*
-	 * Check if a loader was used; if NOT, the 4 arguments are
-	 * what CFE gives us (handle, 0, EPT and EPTSEAL)
-	 */
-	if (argc < 0) {
-		cfe_handle = (uint32_t)argc;
-		cfe_ept = (uint32_t)envp;
-		cfe_eptseal = (uint32_t)prom_vec;
-	} else {
-		if ((int)prom_vec < 0) {
-			/*
-			 * Old loader; all it gives us is the handle,
-			 * so use the "known" entrypoint and assume
-			 * the seal.
-			 */
-			cfe_handle = (uint32_t)prom_vec;
-			cfe_ept = 0xBFC00500;
-			cfe_eptseal = CFE_EPTSEAL;
-		} else {
-			/*
-			 * Newer loaders bundle the handle/ept/eptseal
-			 * Note: prom_vec is in the loader's useg
-			 * which is still alive in the TLB.
-			 */
-			cfe_handle = prom_vec[0];
-			cfe_ept = prom_vec[2];
-			cfe_eptseal = prom_vec[3];
-		}
-	}
-
-	if (cfe_eptseal != CFE_EPTSEAL) {
-		/* too early for panic to do any good */
-		printk(KERN_ERR "CFE's entrypoint seal doesn't match.");
-		while (1) ;
-	}
-
-	cfe_init(cfe_handle, cfe_ept);
-}
-
-static __init void prom_init_console(void)
-{
-	/* Initialize CFE console */
-	cfe_cons_handle = cfe_getstdhandle(CFE_STDHANDLE_CONSOLE);
-}
-
-static __init void prom_init_cmdline(void)
-{
-	static char buf[COMMAND_LINE_SIZE] __initdata;
-
-	/* Get the kernel command line from CFE */
-	if (cfe_getenv("LINUX_CMDLINE", buf, COMMAND_LINE_SIZE) >= 0) {
-		buf[COMMAND_LINE_SIZE - 1] = 0;
-		strcpy(arcs_cmdline, buf);
-	}
-
-	/* Force a console handover by adding a console= argument if needed,
-	 * as CFE is not available anymore later in the boot process. */
-	if ((strstr(arcs_cmdline, "console=")) == NULL) {
-		/* Try to read the default serial port used by CFE */
-		if ((cfe_getenv("BOOT_CONSOLE", buf, COMMAND_LINE_SIZE) < 0)
-		    || (strncmp("uart", buf, 4)))
-			/* Default to uart0 */
-			strcpy(buf, "uart0");
-
-		/* Compute the new command line */
-		snprintf(arcs_cmdline, COMMAND_LINE_SIZE, "%s console=ttyS%c,115200",
-			 arcs_cmdline, buf[4]);
-	}
+	snprintf(bcm47xx_system_type, sizeof(bcm47xx_system_type),
+		 (chip_id > 0x9999) ? "Broadcom BCM%d" :
+				      "Broadcom BCM%04X",
+		 chip_id);
 }
 
 static __init void prom_init_mem(void)
@@ -195,12 +96,16 @@
 	add_memory_region(0, mem, BOOT_MEM_RAM);
 }
 
+/*
+ * This is the first serial on the chip common core, it is at this position
+ * for sb (ssb) and ai (bcma) bus.
+ */
+#define BCM47XX_SERIAL_ADDR (SSB_ENUM_BASE + SSB_CHIPCO_UART0_DATA)
+
 void __init prom_init(void)
 {
-	prom_init_cfe();
-	prom_init_console();
-	prom_init_cmdline();
 	prom_init_mem();
+	setup_8250_early_printk_port(CKSEG1ADDR(BCM47XX_SERIAL_ADDR), 0, 0);
 }
 
 void __init prom_free_prom_memory(void)
diff --git a/arch/mips/bcm47xx/serial.c b/arch/mips/bcm47xx/serial.c
index b8ef965..2f5bbd6 100644
--- a/arch/mips/bcm47xx/serial.c
+++ b/arch/mips/bcm47xx/serial.c
@@ -31,7 +31,8 @@
 
 	memset(&uart8250_data, 0,  sizeof(uart8250_data));
 
-	for (i = 0; i < mcore->nr_serial_ports; i++) {
+	for (i = 0; i < mcore->nr_serial_ports &&
+		    i < ARRAY_SIZE(uart8250_data) - 1; i++) {
 		struct plat_serial8250_port *p = &(uart8250_data[i]);
 		struct ssb_serial_port *ssb_port = &(mcore->serial_ports[i]);
 
@@ -55,7 +56,8 @@
 
 	memset(&uart8250_data, 0,  sizeof(uart8250_data));
 
-	for (i = 0; i < cc->nr_serial_ports; i++) {
+	for (i = 0; i < cc->nr_serial_ports &&
+		    i < ARRAY_SIZE(uart8250_data) - 1; i++) {
 		struct plat_serial8250_port *p = &(uart8250_data[i]);
 		struct bcma_serial_port *bcma_port;
 		bcma_port = &(cc->serial_ports[i]);
diff --git a/arch/mips/bcm47xx/setup.c b/arch/mips/bcm47xx/setup.c
index 9057728..025be21 100644
--- a/arch/mips/bcm47xx/setup.c
+++ b/arch/mips/bcm47xx/setup.c
@@ -26,6 +26,8 @@
  *  675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
+#include "bcm47xx_private.h"
+
 #include <linux/export.h>
 #include <linux/types.h>
 #include <linux/ethtool.h>
@@ -35,6 +37,8 @@
 #include <linux/ssb/ssb_embedded.h>
 #include <linux/bcma/bcma_soc.h>
 #include <asm/bootinfo.h>
+#include <asm/idle.h>
+#include <asm/prom.h>
 #include <asm/reboot.h>
 #include <asm/time.h>
 #include <bcm47xx.h>
@@ -213,12 +217,14 @@
 #ifdef CONFIG_BCM47XX_BCMA
 		bcm47xx_bus_type = BCM47XX_BUS_TYPE_BCMA;
 		bcm47xx_register_bcma();
+		bcm47xx_set_system_type(bcm47xx_bus.bcma.bus.chipinfo.id);
 #endif
 	} else {
 		printk(KERN_INFO "bcm47xx: using ssb bus\n");
 #ifdef CONFIG_BCM47XX_SSB
 		bcm47xx_bus_type = BCM47XX_BUS_TYPE_SSB;
 		bcm47xx_register_ssb();
+		bcm47xx_set_system_type(bcm47xx_bus.ssb.chip_id);
 #endif
 	}
 
@@ -226,8 +232,34 @@
 	_machine_halt = bcm47xx_machine_halt;
 	pm_power_off = bcm47xx_machine_halt;
 	bcm47xx_board_detect();
+	mips_set_machine_name(bcm47xx_board_get_name());
 }
 
+static int __init bcm47xx_cpu_fixes(void)
+{
+	switch (bcm47xx_bus_type) {
+#ifdef CONFIG_BCM47XX_SSB
+	case BCM47XX_BUS_TYPE_SSB:
+		/* Nothing to do */
+		break;
+#endif
+#ifdef CONFIG_BCM47XX_BCMA
+	case BCM47XX_BUS_TYPE_BCMA:
+		/* The BCM4706 has a problem with the CPU wait instruction.
+		 * When r4k_wait or r4k_wait_irqoff is used will just hang and
+		 * not return from a msleep(). Removing the cpu_wait
+		 * functionality is a workaround for this problem. The BCM4716
+		 * does not have this problem.
+		 */
+		if (bcm47xx_bus.bcma.bus.chipinfo.id == BCMA_CHIP_ID_BCM4706)
+			cpu_wait = NULL;
+		break;
+#endif
+	}
+	return 0;
+}
+arch_initcall(bcm47xx_cpu_fixes);
+
 static struct fixed_phy_status bcm47xx_fixed_phy_status __initdata = {
 	.link	= 1,
 	.speed	= SPEED_100,
@@ -248,6 +280,9 @@
 		break;
 #endif
 	}
+	bcm47xx_buttons_register();
+	bcm47xx_leds_register();
+
 	fixed_phy_add(PHY_POLL, 0, &bcm47xx_fixed_phy_status);
 	return 0;
 }
diff --git a/arch/mips/bcm47xx/sprom.c b/arch/mips/bcm47xx/sprom.c
index ad03c93..a8b5408 100644
--- a/arch/mips/bcm47xx/sprom.c
+++ b/arch/mips/bcm47xx/sprom.c
@@ -135,7 +135,7 @@
 }
 
 static void nvram_read_macaddr(const char *prefix, const char *name,
-			       u8 (*val)[6], bool fallback)
+			       u8 val[6], bool fallback)
 {
 	char buf[100];
 	int err;
@@ -144,11 +144,11 @@
 	if (err < 0)
 		return;
 
-	bcm47xx_nvram_parse_macaddr(buf, *val);
+	bcm47xx_nvram_parse_macaddr(buf, val);
 }
 
 static void nvram_read_alpha2(const char *prefix, const char *name,
-			     char (*val)[2], bool fallback)
+			     char val[2], bool fallback)
 {
 	char buf[10];
 	int err;
@@ -162,7 +162,7 @@
 		pr_warn("alpha2 is too long %s\n", buf);
 		return;
 	}
-	memcpy(val, buf, sizeof(val));
+	memcpy(val, buf, 2);
 }
 
 static void bcm47xx_fill_sprom_r1234589(struct ssb_sprom *sprom,
@@ -180,7 +180,7 @@
 		      fallback);
 	nvram_read_s8(prefix, NULL, "ag1", &sprom->antenna_gain.a1, 0,
 		      fallback);
-	nvram_read_alpha2(prefix, "ccode", &sprom->alpha2, fallback);
+	nvram_read_alpha2(prefix, "ccode", sprom->alpha2, fallback);
 }
 
 static void bcm47xx_fill_sprom_r12389(struct ssb_sprom *sprom,
@@ -633,20 +633,20 @@
 static void bcm47xx_fill_sprom_ethernet(struct ssb_sprom *sprom,
 					const char *prefix, bool fallback)
 {
-	nvram_read_macaddr(prefix, "et0macaddr", &sprom->et0mac, fallback);
+	nvram_read_macaddr(prefix, "et0macaddr", sprom->et0mac, fallback);
 	nvram_read_u8(prefix, NULL, "et0mdcport", &sprom->et0mdcport, 0,
 		      fallback);
 	nvram_read_u8(prefix, NULL, "et0phyaddr", &sprom->et0phyaddr, 0,
 		      fallback);
 
-	nvram_read_macaddr(prefix, "et1macaddr", &sprom->et1mac, fallback);
+	nvram_read_macaddr(prefix, "et1macaddr", sprom->et1mac, fallback);
 	nvram_read_u8(prefix, NULL, "et1mdcport", &sprom->et1mdcport, 0,
 		      fallback);
 	nvram_read_u8(prefix, NULL, "et1phyaddr", &sprom->et1phyaddr, 0,
 		      fallback);
 
-	nvram_read_macaddr(prefix, "macaddr", &sprom->il0mac, fallback);
-	nvram_read_macaddr(prefix, "il0macaddr", &sprom->il0mac, fallback);
+	nvram_read_macaddr(prefix, "macaddr", sprom->il0mac, fallback);
+	nvram_read_macaddr(prefix, "il0macaddr", sprom->il0mac, fallback);
 }
 
 static void bcm47xx_fill_board_data(struct ssb_sprom *sprom, const char *prefix,
diff --git a/arch/mips/bcm47xx/wgt634u.c b/arch/mips/bcm47xx/wgt634u.c
deleted file mode 100644
index c63a4c2..0000000
--- a/arch/mips/bcm47xx/wgt634u.c
+++ /dev/null
@@ -1,174 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2007 Aurelien Jarno <aurelien@aurel32.net>
- */
-
-#include <linux/platform_device.h>
-#include <linux/module.h>
-#include <linux/leds.h>
-#include <linux/mtd/physmap.h>
-#include <linux/ssb/ssb.h>
-#include <linux/ssb/ssb_embedded.h>
-#include <linux/interrupt.h>
-#include <linux/reboot.h>
-#include <linux/gpio.h>
-#include <asm/mach-bcm47xx/bcm47xx.h>
-
-/* GPIO definitions for the WGT634U */
-#define WGT634U_GPIO_LED	3
-#define WGT634U_GPIO_RESET	2
-#define WGT634U_GPIO_TP1	7
-#define WGT634U_GPIO_TP2	6
-#define WGT634U_GPIO_TP3	5
-#define WGT634U_GPIO_TP4	4
-#define WGT634U_GPIO_TP5	1
-
-static struct gpio_led wgt634u_leds[] = {
-	{
-		.name = "power",
-		.gpio = WGT634U_GPIO_LED,
-		.active_low = 1,
-		.default_trigger = "heartbeat",
-	},
-};
-
-static struct gpio_led_platform_data wgt634u_led_data = {
-	.num_leds =	ARRAY_SIZE(wgt634u_leds),
-	.leds =		wgt634u_leds,
-};
-
-static struct platform_device wgt634u_gpio_leds = {
-	.name =		"leds-gpio",
-	.id =		-1,
-	.dev = {
-		.platform_data = &wgt634u_led_data,
-	}
-};
-
-
-/* 8MiB flash. The struct mtd_partition matches original Netgear WGT634U
-   firmware. */
-static struct mtd_partition wgt634u_partitions[] = {
-	{
-		.name	    = "cfe",
-		.offset	    = 0,
-		.size	    = 0x60000,		/* 384k */
-		.mask_flags = MTD_WRITEABLE	/* force read-only */
-	},
-	{
-		.name	= "config",
-		.offset = 0x60000,
-		.size	= 0x20000		/* 128k */
-	},
-	{
-		.name	= "linux",
-		.offset = 0x80000,
-		.size	= 0x140000		/* 1280k */
-	},
-	{
-		.name	= "jffs",
-		.offset = 0x1c0000,
-		.size	= 0x620000		/* 6272k */
-	},
-	{
-		.name	= "nvram",
-		.offset = 0x7e0000,
-		.size	= 0x20000		/* 128k */
-	},
-};
-
-static struct physmap_flash_data wgt634u_flash_data = {
-	.parts	  = wgt634u_partitions,
-	.nr_parts = ARRAY_SIZE(wgt634u_partitions)
-};
-
-static struct resource wgt634u_flash_resource = {
-	.flags = IORESOURCE_MEM,
-};
-
-static struct platform_device wgt634u_flash = {
-	.name	       = "physmap-flash",
-	.id	       = 0,
-	.dev	       = { .platform_data = &wgt634u_flash_data, },
-	.resource      = &wgt634u_flash_resource,
-	.num_resources = 1,
-};
-
-/* Platform devices */
-static struct platform_device *wgt634u_devices[] __initdata = {
-	&wgt634u_flash,
-	&wgt634u_gpio_leds,
-};
-
-static irqreturn_t gpio_interrupt(int irq, void *ignored)
-{
-	int state;
-
-	/* Interrupts are shared, check if the current one is
-	   a GPIO interrupt. */
-	if (!ssb_chipco_irq_status(&bcm47xx_bus.ssb.chipco,
-				   SSB_CHIPCO_IRQ_GPIO))
-		return IRQ_NONE;
-
-	state = gpio_get_value(WGT634U_GPIO_RESET);
-
-	/* Interrupt are level triggered, revert the interrupt polarity
-	   to clear the interrupt. */
-	ssb_gpio_polarity(&bcm47xx_bus.ssb, 1 << WGT634U_GPIO_RESET,
-			  state ? 1 << WGT634U_GPIO_RESET : 0);
-
-	if (!state) {
-		printk(KERN_INFO "Reset button pressed");
-		ctrl_alt_del();
-	}
-
-	return IRQ_HANDLED;
-}
-
-static int __init wgt634u_init(void)
-{
-	/* There is no easy way to detect that we are running on a WGT634U
-	 * machine. Use the MAC address as an heuristic. Netgear Inc. has
-	 * been allocated ranges 00:09:5b:xx:xx:xx and 00:0f:b5:xx:xx:xx.
-	 */
-	u8 *et0mac;
-
-	if (bcm47xx_bus_type != BCM47XX_BUS_TYPE_SSB)
-		return -ENODEV;
-
-	et0mac = bcm47xx_bus.ssb.sprom.et0mac;
-
-	if (et0mac[0] == 0x00 &&
-	    ((et0mac[1] == 0x09 && et0mac[2] == 0x5b) ||
-	     (et0mac[1] == 0x0f && et0mac[2] == 0xb5))) {
-		struct ssb_mipscore *mcore = &bcm47xx_bus.ssb.mipscore;
-
-		printk(KERN_INFO "WGT634U machine detected.\n");
-
-		if (!request_irq(gpio_to_irq(WGT634U_GPIO_RESET),
-				 gpio_interrupt, IRQF_SHARED,
-				 "WGT634U GPIO", &bcm47xx_bus.ssb.chipco)) {
-			gpio_direction_input(WGT634U_GPIO_RESET);
-			ssb_gpio_intmask(&bcm47xx_bus.ssb,
-					 1 << WGT634U_GPIO_RESET,
-					 1 << WGT634U_GPIO_RESET);
-			ssb_chipco_irq_mask(&bcm47xx_bus.ssb.chipco,
-					    SSB_CHIPCO_IRQ_GPIO,
-					    SSB_CHIPCO_IRQ_GPIO);
-		}
-
-		wgt634u_flash_data.width = mcore->pflash.buswidth;
-		wgt634u_flash_resource.start = mcore->pflash.window;
-		wgt634u_flash_resource.end = mcore->pflash.window
-					   + mcore->pflash.window_size
-					   - 1;
-		return platform_add_devices(wgt634u_devices,
-					    ARRAY_SIZE(wgt634u_devices));
-	} else
-		return -ENODEV;
-}
-
-module_init(wgt634u_init);
diff --git a/arch/mips/bcm63xx/Kconfig b/arch/mips/bcm63xx/Kconfig
index b78306c..a057fdf1 100644
--- a/arch/mips/bcm63xx/Kconfig
+++ b/arch/mips/bcm63xx/Kconfig
@@ -3,33 +3,41 @@
 
 config BCM63XX_CPU_3368
 	bool "support 3368 CPU"
+	select SYS_HAS_CPU_BMIPS4350
 	select HW_HAS_PCI
 
 config BCM63XX_CPU_6328
 	bool "support 6328 CPU"
+	select SYS_HAS_CPU_BMIPS4350
 	select HW_HAS_PCI
 
 config BCM63XX_CPU_6338
 	bool "support 6338 CPU"
+	select SYS_HAS_CPU_BMIPS32_3300
 	select HW_HAS_PCI
 
 config BCM63XX_CPU_6345
 	bool "support 6345 CPU"
+	select SYS_HAS_CPU_BMIPS32_3300
 
 config BCM63XX_CPU_6348
 	bool "support 6348 CPU"
+	select SYS_HAS_CPU_BMIPS32_3300
 	select HW_HAS_PCI
 
 config BCM63XX_CPU_6358
 	bool "support 6358 CPU"
+	select SYS_HAS_CPU_BMIPS4350
 	select HW_HAS_PCI
 
 config BCM63XX_CPU_6362
 	bool "support 6362 CPU"
+	select SYS_HAS_CPU_BMIPS4350
 	select HW_HAS_PCI
 
 config BCM63XX_CPU_6368
 	bool "support 6368 CPU"
+	select SYS_HAS_CPU_BMIPS4350
 	select HW_HAS_PCI
 endmenu
 
diff --git a/arch/mips/bcm63xx/Makefile b/arch/mips/bcm63xx/Makefile
index ac28073..9019f54 100644
--- a/arch/mips/bcm63xx/Makefile
+++ b/arch/mips/bcm63xx/Makefile
@@ -1,7 +1,7 @@
 obj-y		+= clk.o cpu.o cs.o gpio.o irq.o nvram.o prom.o reset.o \
 		   setup.o timer.o dev-dsp.o dev-enet.o dev-flash.o \
-		   dev-pcmcia.o dev-rng.o dev-spi.o dev-uart.o dev-wdt.o \
-		   dev-usb-usbd.o
+		   dev-pcmcia.o dev-rng.o dev-spi.o dev-hsspi.o dev-uart.o \
+		   dev-wdt.o dev-usb-usbd.o
 obj-$(CONFIG_EARLY_PRINTK)	+= early_printk.o
 
 obj-y		+= boards/
diff --git a/arch/mips/bcm63xx/boards/board_bcm963xx.c b/arch/mips/bcm63xx/boards/board_bcm963xx.c
index 5b974eb..33727e7 100644
--- a/arch/mips/bcm63xx/boards/board_bcm963xx.c
+++ b/arch/mips/bcm63xx/boards/board_bcm963xx.c
@@ -23,6 +23,7 @@
 #include <bcm63xx_dev_enet.h>
 #include <bcm63xx_dev_dsp.h>
 #include <bcm63xx_dev_flash.h>
+#include <bcm63xx_dev_hsspi.h>
 #include <bcm63xx_dev_pcmcia.h>
 #include <bcm63xx_dev_spi.h>
 #include <bcm63xx_dev_usb_usbd.h>
@@ -915,6 +916,8 @@
 
 	bcm63xx_spi_register();
 
+	bcm63xx_hsspi_register();
+
 	bcm63xx_flash_register();
 
 	bcm63xx_led_data.num_leds = ARRAY_SIZE(board.leds);
diff --git a/arch/mips/bcm63xx/clk.c b/arch/mips/bcm63xx/clk.c
index 43da4ae..6375652 100644
--- a/arch/mips/bcm63xx/clk.c
+++ b/arch/mips/bcm63xx/clk.c
@@ -226,6 +226,28 @@
 };
 
 /*
+ * HSSPI clock
+ */
+static void hsspi_set(struct clk *clk, int enable)
+{
+	u32 mask;
+
+	if (BCMCPU_IS_6328())
+		mask = CKCTL_6328_HSSPI_EN;
+	else if (BCMCPU_IS_6362())
+		mask = CKCTL_6362_HSSPI_EN;
+	else
+		return;
+
+	bcm_hwclock_set(mask, enable);
+}
+
+static struct clk clk_hsspi = {
+	.set	= hsspi_set,
+};
+
+
+/*
  * XTM clock
  */
 static void xtm_set(struct clk *clk, int enable)
@@ -346,6 +368,8 @@
 		return &clk_usbd;
 	if (!strcmp(id, "spi"))
 		return &clk_spi;
+	if (!strcmp(id, "hsspi"))
+		return &clk_hsspi;
 	if (!strcmp(id, "xtm"))
 		return &clk_xtm;
 	if (!strcmp(id, "periph"))
@@ -366,3 +390,21 @@
 }
 
 EXPORT_SYMBOL(clk_put);
+
+#define HSSPI_PLL_HZ_6328	133333333
+#define HSSPI_PLL_HZ_6362	400000000
+
+static int __init bcm63xx_clk_init(void)
+{
+	switch (bcm63xx_get_cpu_id()) {
+	case BCM6328_CPU_ID:
+		clk_hsspi.rate = HSSPI_PLL_HZ_6328;
+		break;
+	case BCM6362_CPU_ID:
+		clk_hsspi.rate = HSSPI_PLL_HZ_6362;
+		break;
+	}
+
+	return 0;
+}
+arch_initcall(bcm63xx_clk_init);
diff --git a/arch/mips/bcm63xx/cpu.c b/arch/mips/bcm63xx/cpu.c
index b713cd6..1b1b8a8 100644
--- a/arch/mips/bcm63xx/cpu.c
+++ b/arch/mips/bcm63xx/cpu.c
@@ -123,7 +123,9 @@
 
 static unsigned int detect_cpu_clock(void)
 {
-	switch (bcm63xx_get_cpu_id()) {
+	u16 cpu_id = bcm63xx_get_cpu_id();
+
+	switch (cpu_id) {
 	case BCM3368_CPU_ID:
 		return 300000000;
 
@@ -249,7 +251,7 @@
 	}
 
 	default:
-		BUG();
+		panic("Failed to detect clock for CPU with id=%04X\n", cpu_id);
 	}
 }
 
diff --git a/arch/mips/bcm63xx/dev-hsspi.c b/arch/mips/bcm63xx/dev-hsspi.c
new file mode 100644
index 0000000..696abc4
--- /dev/null
+++ b/arch/mips/bcm63xx/dev-hsspi.c
@@ -0,0 +1,47 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2012 Jonas Gorski <jonas.gorski@gmail.com>
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/platform_device.h>
+
+#include <bcm63xx_cpu.h>
+#include <bcm63xx_dev_hsspi.h>
+#include <bcm63xx_regs.h>
+
+static struct resource spi_resources[] = {
+	{
+		.start		= -1, /* filled at runtime */
+		.end		= -1, /* filled at runtime */
+		.flags		= IORESOURCE_MEM,
+	},
+	{
+		.start		= -1, /* filled at runtime */
+		.flags		= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device bcm63xx_hsspi_device = {
+	.name		= "bcm63xx-hsspi",
+	.id		= 0,
+	.num_resources	= ARRAY_SIZE(spi_resources),
+	.resource	= spi_resources,
+};
+
+int __init bcm63xx_hsspi_register(void)
+{
+	if (!BCMCPU_IS_6328() && !BCMCPU_IS_6362())
+		return -ENODEV;
+
+	spi_resources[0].start = bcm63xx_regset_address(RSET_HSSPI);
+	spi_resources[0].end = spi_resources[0].start;
+	spi_resources[0].end += RSET_HSSPI_SIZE - 1;
+	spi_resources[1].start = bcm63xx_get_irq_number(IRQ_HSSPI);
+
+	return platform_device_register(&bcm63xx_hsspi_device);
+}
diff --git a/arch/mips/bcm63xx/early_printk.c b/arch/mips/bcm63xx/early_printk.c
index aa8f7f9..6092226 100644
--- a/arch/mips/bcm63xx/early_printk.c
+++ b/arch/mips/bcm63xx/early_printk.c
@@ -6,9 +6,8 @@
  * Copyright (C) 2008 Maxime Bizon <mbizon@freebox.fr>
  */
 
-#include <linux/init.h>
 #include <bcm63xx_io.h>
-#include <bcm63xx_regs.h>
+#include <linux/serial_bcm63xx.h>
 
 static void wait_xfered(void)
 {
diff --git a/arch/mips/bcm63xx/prom.c b/arch/mips/bcm63xx/prom.c
index 8ac4e09..e1f27d6 100644
--- a/arch/mips/bcm63xx/prom.c
+++ b/arch/mips/bcm63xx/prom.c
@@ -59,14 +59,12 @@
 	/* do low level board init */
 	board_prom_init();
 
-	if (IS_ENABLED(CONFIG_CPU_BMIPS4350) && IS_ENABLED(CONFIG_SMP)) {
-		/* set up SMP */
-		register_smp_ops(&bmips_smp_ops);
-
+	/* set up SMP */
+	if (!register_bmips_smp_ops()) {
 		/*
-		 * BCM6328 might not have its second CPU enabled, while BCM6358
-		 * needs special handling for its shared TLB, so disable SMP
-		 * for now.
+		 * BCM6328 might not have its second CPU enabled, while BCM3368
+		 * and BCM6358 need special handling for their shared TLB, so
+		 * disable SMP for now.
 		 */
 		if (BCMCPU_IS_6328()) {
 			reg = bcm_readl(BCM_6328_OTP_BASE +
@@ -74,7 +72,7 @@
 
 			if (reg & OTP_6328_REG3_TP1_DISABLED)
 				bmips_smp_enabled = 0;
-		} else if (BCMCPU_IS_6358()) {
+		} else if (BCMCPU_IS_3368() || BCMCPU_IS_6358()) {
 			bmips_smp_enabled = 0;
 		}
 
diff --git a/arch/mips/boot/compressed/Makefile b/arch/mips/boot/compressed/Makefile
index ca0c343..61af6b6 100644
--- a/arch/mips/boot/compressed/Makefile
+++ b/arch/mips/boot/compressed/Makefile
@@ -27,10 +27,10 @@
 	-DBOOT_HEAP_SIZE=$(BOOT_HEAP_SIZE) \
 	-DKERNEL_ENTRY=$(VMLINUX_ENTRY_ADDRESS)
 
-targets := head.o decompress.o dbg.o uart-16550.o uart-alchemy.o
+targets := head.o decompress.o string.o dbg.o uart-16550.o uart-alchemy.o
 
 # decompressor objects (linked with vmlinuz)
-vmlinuzobjs-y := $(obj)/head.o $(obj)/decompress.o $(obj)/dbg.o
+vmlinuzobjs-y := $(obj)/head.o $(obj)/decompress.o $(obj)/string.o $(obj)/dbg.o
 
 ifdef CONFIG_DEBUG_ZBOOT
 vmlinuzobjs-$(CONFIG_SYS_SUPPORTS_ZBOOT_UART16550) += $(obj)/uart-16550.o
diff --git a/arch/mips/boot/compressed/dbg.c b/arch/mips/boot/compressed/dbg.c
index 134a616..06c6a5b 100644
--- a/arch/mips/boot/compressed/dbg.c
+++ b/arch/mips/boot/compressed/dbg.c
@@ -6,7 +6,6 @@
  * need to implement your own putc().
  */
 #include <linux/compiler.h>
-#include <linux/init.h>
 #include <linux/types.h>
 
 void __weak putc(char c)
diff --git a/arch/mips/boot/compressed/decompress.c b/arch/mips/boot/compressed/decompress.c
index a8c6fd6..c00c4dd 100644
--- a/arch/mips/boot/compressed/decompress.c
+++ b/arch/mips/boot/compressed/decompress.c
@@ -43,33 +43,11 @@
 /* activate the code for pre-boot environment */
 #define STATIC static
 
-#if defined(CONFIG_KERNEL_GZIP) || defined(CONFIG_KERNEL_XZ) || \
-	defined(CONFIG_KERNEL_LZ4)
-void *memcpy(void *dest, const void *src, size_t n)
-{
-	int i;
-	const char *s = src;
-	char *d = dest;
-
-	for (i = 0; i < n; i++)
-		d[i] = s[i];
-	return dest;
-}
-#endif
 #ifdef CONFIG_KERNEL_GZIP
 #include "../../../../lib/decompress_inflate.c"
 #endif
 
 #ifdef CONFIG_KERNEL_BZIP2
-void *memset(void *s, int c, size_t n)
-{
-	int i;
-	char *ss = s;
-
-	for (i = 0; i < n; i++)
-		ss[i] = c;
-	return s;
-}
 #include "../../../../lib/decompress_bunzip2.c"
 #endif
 
diff --git a/arch/mips/boot/compressed/string.c b/arch/mips/boot/compressed/string.c
new file mode 100644
index 0000000..9de9885
--- /dev/null
+++ b/arch/mips/boot/compressed/string.c
@@ -0,0 +1,28 @@
+/*
+ * arch/mips/boot/compressed/string.c
+ *
+ * Very small subset of simple string routines
+ */
+
+#include <linux/types.h>
+
+void *memcpy(void *dest, const void *src, size_t n)
+{
+	int i;
+	const char *s = src;
+	char *d = dest;
+
+	for (i = 0; i < n; i++)
+		d[i] = s[i];
+	return dest;
+}
+
+void *memset(void *s, int c, size_t n)
+{
+	int i;
+	char *ss = s;
+
+	for (i = 0; i < n; i++)
+		ss[i] = c;
+	return s;
+}
diff --git a/arch/mips/boot/compressed/uart-16550.c b/arch/mips/boot/compressed/uart-16550.c
index c01d343..237494b 100644
--- a/arch/mips/boot/compressed/uart-16550.c
+++ b/arch/mips/boot/compressed/uart-16550.c
@@ -4,7 +4,6 @@
 
 #include <linux/types.h>
 #include <linux/serial_reg.h>
-#include <linux/init.h>
 
 #include <asm/addrspace.h>
 
@@ -19,8 +18,8 @@
 #endif
 
 #ifdef CONFIG_MACH_JZ4740
-#define UART0_BASE  0xB0030000
-#define PORT(offset) (UART0_BASE + (4 * offset))
+#include <asm/mach-jz4740/base.h>
+#define PORT(offset) (CKSEG1ADDR(JZ4740_UART0_BASE_ADDR) + (4 * offset))
 #endif
 
 #ifdef CONFIG_CPU_XLR
diff --git a/arch/mips/cavium-octeon/executive/cvmx-cmd-queue.c b/arch/mips/cavium-octeon/executive/cvmx-cmd-queue.c
index 132bccc..8241fc6 100644
--- a/arch/mips/cavium-octeon/executive/cvmx-cmd-queue.c
+++ b/arch/mips/cavium-octeon/executive/cvmx-cmd-queue.c
@@ -47,6 +47,7 @@
  * state. It points to a bootmem named block.
  */
 __cvmx_cmd_queue_all_state_t *__cvmx_cmd_queue_state_ptr;
+EXPORT_SYMBOL_GPL(__cvmx_cmd_queue_state_ptr);
 
 /**
  * Initialize the Global queue state pointer.
diff --git a/arch/mips/cavium-octeon/executive/cvmx-helper-board.c b/arch/mips/cavium-octeon/executive/cvmx-helper-board.c
index 0a1283c..b764df6 100644
--- a/arch/mips/cavium-octeon/executive/cvmx-helper-board.c
+++ b/arch/mips/cavium-octeon/executive/cvmx-helper-board.c
@@ -722,3 +722,30 @@
 	}
 	return 0;
 }
+
+/**
+ * Get the clock type used for the USB block based on board type.
+ * Used by the USB code for auto configuration of clock type.
+ *
+ * Return USB clock type enumeration
+ */
+enum cvmx_helper_board_usb_clock_types __cvmx_helper_board_usb_get_clock_type(void)
+{
+	switch (cvmx_sysinfo_get()->board_type) {
+	case CVMX_BOARD_TYPE_BBGW_REF:
+	case CVMX_BOARD_TYPE_LANAI2_A:
+	case CVMX_BOARD_TYPE_LANAI2_U:
+	case CVMX_BOARD_TYPE_LANAI2_G:
+	case CVMX_BOARD_TYPE_NIC10E_66:
+	case CVMX_BOARD_TYPE_UBNT_E100:
+		return USB_CLOCK_TYPE_CRYSTAL_12;
+	case CVMX_BOARD_TYPE_NIC10E:
+		return USB_CLOCK_TYPE_REF_12;
+	default:
+		break;
+	}
+	/* Most boards except NIC10e use a 12MHz crystal */
+	if (OCTEON_IS_MODEL(OCTEON_FAM_2))
+		return USB_CLOCK_TYPE_CRYSTAL_12;
+	return USB_CLOCK_TYPE_REF_48;
+}
diff --git a/arch/mips/cavium-octeon/executive/cvmx-helper-util.c b/arch/mips/cavium-octeon/executive/cvmx-helper-util.c
index 65d2bc9..453d7f6 100644
--- a/arch/mips/cavium-octeon/executive/cvmx-helper-util.c
+++ b/arch/mips/cavium-octeon/executive/cvmx-helper-util.c
@@ -251,6 +251,7 @@
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(cvmx_helper_setup_red);
 
 /**
  * Setup the common GMX settings that determine the number of
@@ -384,6 +385,7 @@
 	}
 	return -1;
 }
+EXPORT_SYMBOL_GPL(cvmx_helper_get_ipd_port);
 
 /**
  * Returns the interface number for an IPD/PKO port number.
@@ -408,6 +410,7 @@
 
 	return -1;
 }
+EXPORT_SYMBOL_GPL(cvmx_helper_get_interface_num);
 
 /**
  * Returns the interface index number for an IPD/PKO port
@@ -431,3 +434,4 @@
 
 	return -1;
 }
+EXPORT_SYMBOL_GPL(cvmx_helper_get_interface_index_num);
diff --git a/arch/mips/cavium-octeon/executive/cvmx-helper.c b/arch/mips/cavium-octeon/executive/cvmx-helper.c
index d63d20d..8553ad5 100644
--- a/arch/mips/cavium-octeon/executive/cvmx-helper.c
+++ b/arch/mips/cavium-octeon/executive/cvmx-helper.c
@@ -67,7 +67,7 @@
 void (*cvmx_override_ipd_port_setup) (int ipd_port);
 
 /* Port count per interface */
-static int interface_port_count[4] = { 0, 0, 0, 0 };
+static int interface_port_count[5];
 
 /* Port last configured link info index by IPD/PKO port */
 static cvmx_helper_link_info_t
@@ -88,6 +88,7 @@
 	else
 		return 3;
 }
+EXPORT_SYMBOL_GPL(cvmx_helper_get_number_of_interfaces);
 
 /**
  * Return the number of ports on an interface. Depending on the
@@ -102,6 +103,7 @@
 {
 	return interface_port_count[interface];
 }
+EXPORT_SYMBOL_GPL(cvmx_helper_ports_on_interface);
 
 /**
  * Get the operating mode of an interface. Depending on the Octeon
@@ -179,6 +181,7 @@
 			return CVMX_HELPER_INTERFACE_MODE_RGMII;
 	}
 }
+EXPORT_SYMBOL_GPL(cvmx_helper_interface_get_mode);
 
 /**
  * Configure the IPD/PIP tagging and QoS options for a specific
@@ -825,6 +828,7 @@
 		__cvmx_helper_errata_fix_ipd_ptr_alignment();
 	return 0;
 }
+EXPORT_SYMBOL_GPL(cvmx_helper_ipd_and_packet_input_enable);
 
 /**
  * Initialize the PIP, IPD, and PKO hardware to support
@@ -903,6 +907,7 @@
 #endif
 	return result;
 }
+EXPORT_SYMBOL_GPL(cvmx_helper_initialize_packet_io_global);
 
 /**
  * Does core local initialization for packet io
@@ -947,6 +952,7 @@
 	 */
 	return port_link_info[ipd_port];
 }
+EXPORT_SYMBOL_GPL(cvmx_helper_link_autoconf);
 
 /**
  * Return the link state of an IPD/PKO port as returned by
@@ -1005,6 +1011,7 @@
 	}
 	return result;
 }
+EXPORT_SYMBOL_GPL(cvmx_helper_link_get);
 
 /**
  * Configure an IPD/PKO port for the specified link state. This
@@ -1060,6 +1067,7 @@
 		port_link_info[ipd_port].u64 = link_info.u64;
 	return result;
 }
+EXPORT_SYMBOL_GPL(cvmx_helper_link_set);
 
 /**
  * Configure a port for internal and/or external loopback. Internal loopback
diff --git a/arch/mips/cavium-octeon/executive/cvmx-pko.c b/arch/mips/cavium-octeon/executive/cvmx-pko.c
index f2c8775..008b881 100644
--- a/arch/mips/cavium-octeon/executive/cvmx-pko.c
+++ b/arch/mips/cavium-octeon/executive/cvmx-pko.c
@@ -140,7 +140,7 @@
 	pko_reg_flags.s.ena_pko = 0;
 	cvmx_write_csr(CVMX_PKO_REG_FLAGS, pko_reg_flags.u64);
 }
-
+EXPORT_SYMBOL_GPL(cvmx_pko_disable);
 
 /**
  * Reset the packet output.
@@ -182,6 +182,7 @@
 	}
 	__cvmx_pko_reset();
 }
+EXPORT_SYMBOL_GPL(cvmx_pko_shutdown);
 
 /**
  * Configure a output port and the associated queues for use.
diff --git a/arch/mips/cavium-octeon/executive/cvmx-spi.c b/arch/mips/cavium-octeon/executive/cvmx-spi.c
index ef5198d..459e3b1 100644
--- a/arch/mips/cavium-octeon/executive/cvmx-spi.c
+++ b/arch/mips/cavium-octeon/executive/cvmx-spi.c
@@ -177,6 +177,7 @@
 
 	return res;
 }
+EXPORT_SYMBOL_GPL(cvmx_spi_restart_interface);
 
 /**
  * Callback to perform SPI4 reset
diff --git a/arch/mips/cavium-octeon/octeon-platform.c b/arch/mips/cavium-octeon/octeon-platform.c
index 1830874..6df0f4d 100644
--- a/arch/mips/cavium-octeon/octeon-platform.c
+++ b/arch/mips/cavium-octeon/octeon-platform.c
@@ -171,6 +171,7 @@
 static struct of_device_id __initdata octeon_ids[] = {
 	{ .compatible = "simple-bus", },
 	{ .compatible = "cavium,octeon-6335-uctl", },
+	{ .compatible = "cavium,octeon-5750-usbn", },
 	{ .compatible = "cavium,octeon-3860-bootbus", },
 	{ .compatible = "cavium,mdio-mux", },
 	{ .compatible = "gpio-leds", },
@@ -336,14 +337,14 @@
 	int p;
 	int count = 0;
 
-	if (cvmx_helper_interface_enumerate(idx) == 0)
-		count = cvmx_helper_ports_on_interface(idx);
-
 	snprintf(name_buffer, sizeof(name_buffer), "interface@%d", idx);
 	iface = fdt_subnode_offset(initial_boot_params, pip, name_buffer);
 	if (iface < 0)
 		return;
 
+	if (cvmx_helper_interface_enumerate(idx) == 0)
+		count = cvmx_helper_ports_on_interface(idx);
+
 	for (p = 0; p < 16; p++)
 		octeon_fdt_pip_port(iface, idx, p, count - 1, pmac);
 }
@@ -682,6 +683,37 @@
 		}
 	}
 
+	/* DWC2 USB */
+	alias_prop = fdt_getprop(initial_boot_params, aliases,
+				 "usbn", NULL);
+	if (alias_prop) {
+		int usbn = fdt_path_offset(initial_boot_params, alias_prop);
+
+		if (usbn >= 0 && (current_cpu_type() == CPU_CAVIUM_OCTEON2 ||
+				  !octeon_has_feature(OCTEON_FEATURE_USB))) {
+			pr_debug("Deleting usbn\n");
+			fdt_nop_node(initial_boot_params, usbn);
+			fdt_nop_property(initial_boot_params, aliases, "usbn");
+		} else  {
+			__be32 new_f[1];
+			enum cvmx_helper_board_usb_clock_types c;
+			c = __cvmx_helper_board_usb_get_clock_type();
+			switch (c) {
+			case USB_CLOCK_TYPE_REF_48:
+				new_f[0] = cpu_to_be32(48000000);
+				fdt_setprop_inplace(initial_boot_params, usbn,
+						    "refclk-frequency",  new_f, sizeof(new_f));
+				/* Fall through ...*/
+			case USB_CLOCK_TYPE_REF_12:
+				/* Missing "refclk-type" defaults to external. */
+				fdt_nop_property(initial_boot_params, usbn, "refclk-type");
+				break;
+			default:
+				break;
+			}
+		}
+	}
+
 	return 0;
 }
 
diff --git a/arch/mips/cavium-octeon/octeon_3xxx.dts b/arch/mips/cavium-octeon/octeon_3xxx.dts
index 88cb42d..fa33115 100644
--- a/arch/mips/cavium-octeon/octeon_3xxx.dts
+++ b/arch/mips/cavium-octeon/octeon_3xxx.dts
@@ -550,6 +550,24 @@
 				big-endian-regs;
 			};
 		};
+
+		usbn: usbn@1180068000000 {
+			compatible = "cavium,octeon-5750-usbn";
+			reg = <0x11800 0x68000000 0x0 0x1000>;
+			ranges; /* Direct mapping */
+			#address-cells = <2>;
+			#size-cells = <2>;
+			/* 12MHz, 24MHz and 48MHz allowed */
+			refclk-frequency = <12000000>;
+			/* Either "crystal" or "external" */
+			refclk-type = "crystal";
+
+			usbc@16f0010000000 {
+				compatible = "cavium,octeon-5750-usbc";
+				reg = <0x16f00 0x10000000 0x0 0x80000>;
+				interrupts = <0 56>;
+			};
+		};
 	};
 
 	aliases {
@@ -566,6 +584,7 @@
 		flash0 = &flash0;
 		cf0 = &cf0;
 		uctl = &uctl;
+		usbn = &usbn;
 		led0 = &led0;
 	};
  };
diff --git a/arch/mips/cavium-octeon/smp.c b/arch/mips/cavium-octeon/smp.c
index 24a2167..67a078f 100644
--- a/arch/mips/cavium-octeon/smp.c
+++ b/arch/mips/cavium-octeon/smp.c
@@ -6,7 +6,6 @@
  * Copyright (C) 2004-2008, 2009, 2010 Cavium Networks
  */
 #include <linux/cpu.h>
-#include <linux/init.h>
 #include <linux/delay.h>
 #include <linux/smp.h>
 #include <linux/interrupt.h>
diff --git a/arch/mips/configs/ar7_defconfig b/arch/mips/configs/ar7_defconfig
index 80e012fa..320772c 100644
--- a/arch/mips/configs/ar7_defconfig
+++ b/arch/mips/configs/ar7_defconfig
@@ -86,7 +86,6 @@
 CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 # CONFIG_FIRMWARE_IN_KERNEL is not set
 CONFIG_MTD=y
-CONFIG_MTD_PARTITIONS=y
 CONFIG_MTD_CHAR=y
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_CFI=y
diff --git a/arch/mips/configs/bcm47xx_defconfig b/arch/mips/configs/bcm47xx_defconfig
index 4ca8e5c..0db4eb3 100644
--- a/arch/mips/configs/bcm47xx_defconfig
+++ b/arch/mips/configs/bcm47xx_defconfig
@@ -1,623 +1,86 @@
 CONFIG_BCM47XX=y
-CONFIG_NO_HZ=y
-CONFIG_HIGH_RES_TIMERS=y
-CONFIG_KEXEC=y
-# CONFIG_SECCOMP is not set
-CONFIG_EXPERIMENTAL=y
-# CONFIG_LOCALVERSION_AUTO is not set
 CONFIG_SYSVIPC=y
-CONFIG_POSIX_MQUEUE=y
-CONFIG_BSD_PROCESS_ACCT=y
-CONFIG_BSD_PROCESS_ACCT_V3=y
-CONFIG_TASKSTATS=y
-CONFIG_TASK_DELAY_ACCT=y
-CONFIG_TASK_XACCT=y
-CONFIG_TASK_IO_ACCOUNTING=y
-CONFIG_AUDIT=y
-CONFIG_TINY_RCU=y
-CONFIG_CGROUPS=y
-CONFIG_CGROUP_CPUACCT=y
-CONFIG_RELAY=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_UIDGID_STRICT_TYPE_CHECKS=y
 CONFIG_BLK_DEV_INITRD=y
-CONFIG_RD_LZMA=y
-CONFIG_EXPERT=y
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
+CONFIG_EMBEDDED=y
 CONFIG_SLAB=y
 CONFIG_MODULES=y
 CONFIG_MODULE_UNLOAD=y
-CONFIG_MODULE_FORCE_UNLOAD=y
-CONFIG_MODVERSIONS=y
-# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PARTITION_ADVANCED=y
 CONFIG_PCI=y
-CONFIG_BINFMT_MISC=m
+# CONFIG_SUSPEND is not set
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
-CONFIG_XFRM_USER=m
-CONFIG_NET_KEY=m
 CONFIG_INET=y
 CONFIG_IP_MULTICAST=y
 CONFIG_IP_ADVANCED_ROUTER=y
 CONFIG_IP_MULTIPLE_TABLES=y
 CONFIG_IP_ROUTE_MULTIPATH=y
-CONFIG_IP_ROUTE_VERBOSE=y
-CONFIG_NET_IPIP=m
-CONFIG_NET_IPGRE=m
-CONFIG_NET_IPGRE_BROADCAST=y
 CONFIG_IP_MROUTE=y
-CONFIG_IP_PIMSM_V1=y
-CONFIG_IP_PIMSM_V2=y
+CONFIG_IP_MROUTE_MULTIPLE_TABLES=y
 CONFIG_SYN_COOKIES=y
-CONFIG_INET_AH=m
-CONFIG_INET_ESP=m
-CONFIG_INET_IPCOMP=m
-CONFIG_INET_XFRM_MODE_TRANSPORT=m
-CONFIG_INET_XFRM_MODE_TUNNEL=m
-CONFIG_INET_XFRM_MODE_BEET=m
-CONFIG_INET_DIAG=m
 CONFIG_TCP_CONG_ADVANCED=y
-CONFIG_TCP_CONG_BIC=y
-CONFIG_TCP_CONG_CUBIC=m
-CONFIG_TCP_CONG_HSTCP=m
-CONFIG_TCP_CONG_HYBLA=m
-CONFIG_TCP_CONG_SCALABLE=m
-CONFIG_TCP_CONG_LP=m
-CONFIG_TCP_CONG_VENO=m
-CONFIG_TCP_CONG_YEAH=m
-CONFIG_TCP_CONG_ILLINOIS=m
 CONFIG_IPV6_PRIVACY=y
-CONFIG_INET6_AH=m
-CONFIG_INET6_ESP=m
-CONFIG_INET6_IPCOMP=m
-CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION=m
-CONFIG_IPV6_TUNNEL=m
 CONFIG_IPV6_MULTIPLE_TABLES=y
 CONFIG_IPV6_SUBTREES=y
-CONFIG_NETWORK_SECMARK=y
+CONFIG_IPV6_MROUTE=y
 CONFIG_NETFILTER=y
-CONFIG_NETFILTER_NETLINK_QUEUE=m
-CONFIG_NF_CONNTRACK=m
-CONFIG_NF_CONNTRACK_SECMARK=y
-CONFIG_NF_CONNTRACK_EVENTS=y
-CONFIG_NF_CT_PROTO_UDPLITE=m
-CONFIG_NF_CONNTRACK_AMANDA=m
-CONFIG_NF_CONNTRACK_FTP=m
-CONFIG_NF_CONNTRACK_H323=m
-CONFIG_NF_CONNTRACK_IRC=m
-CONFIG_NF_CONNTRACK_NETBIOS_NS=m
-CONFIG_NF_CONNTRACK_PPTP=m
-CONFIG_NF_CONNTRACK_SANE=m
-CONFIG_NF_CONNTRACK_SIP=m
-CONFIG_NF_CONNTRACK_TFTP=m
-CONFIG_NF_CT_NETLINK=m
-CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
-CONFIG_NETFILTER_XT_TARGET_CONNMARK=m
-CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m
-CONFIG_NETFILTER_XT_TARGET_DSCP=m
-CONFIG_NETFILTER_XT_TARGET_MARK=m
-CONFIG_NETFILTER_XT_TARGET_NFLOG=m
-CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m
-CONFIG_NETFILTER_XT_TARGET_TRACE=m
-CONFIG_NETFILTER_XT_TARGET_SECMARK=m
-CONFIG_NETFILTER_XT_TARGET_TCPMSS=m
-CONFIG_NETFILTER_XT_MATCH_COMMENT=m
-CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m
-CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m
-CONFIG_NETFILTER_XT_MATCH_CONNMARK=m
-CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m
-CONFIG_NETFILTER_XT_MATCH_DSCP=m
-CONFIG_NETFILTER_XT_MATCH_ESP=m
-CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m
-CONFIG_NETFILTER_XT_MATCH_HELPER=m
-CONFIG_NETFILTER_XT_MATCH_LENGTH=m
-CONFIG_NETFILTER_XT_MATCH_LIMIT=m
-CONFIG_NETFILTER_XT_MATCH_MAC=m
-CONFIG_NETFILTER_XT_MATCH_MARK=m
-CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m
-CONFIG_NETFILTER_XT_MATCH_POLICY=m
-CONFIG_NETFILTER_XT_MATCH_PHYSDEV=m
-CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m
-CONFIG_NETFILTER_XT_MATCH_QUOTA=m
-CONFIG_NETFILTER_XT_MATCH_REALM=m
-CONFIG_NETFILTER_XT_MATCH_STATE=m
-CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
-CONFIG_NETFILTER_XT_MATCH_STRING=m
-CONFIG_NETFILTER_XT_MATCH_TCPMSS=m
-CONFIG_NETFILTER_XT_MATCH_TIME=m
-CONFIG_NETFILTER_XT_MATCH_U32=m
-CONFIG_IP_VS=m
-CONFIG_IP_VS_PROTO_TCP=y
-CONFIG_IP_VS_PROTO_UDP=y
-CONFIG_IP_VS_PROTO_ESP=y
-CONFIG_IP_VS_PROTO_AH=y
-CONFIG_IP_VS_RR=m
-CONFIG_IP_VS_WRR=m
-CONFIG_IP_VS_LC=m
-CONFIG_IP_VS_WLC=m
-CONFIG_IP_VS_LBLC=m
-CONFIG_IP_VS_LBLCR=m
-CONFIG_IP_VS_DH=m
-CONFIG_IP_VS_SH=m
-CONFIG_IP_VS_SED=m
-CONFIG_IP_VS_NQ=m
-CONFIG_IP_VS_FTP=m
-CONFIG_NF_CONNTRACK_IPV4=m
-CONFIG_IP_NF_QUEUE=m
-CONFIG_IP_NF_IPTABLES=m
-CONFIG_IP_NF_MATCH_ADDRTYPE=m
-CONFIG_IP_NF_MATCH_AH=m
-CONFIG_IP_NF_MATCH_ECN=m
-CONFIG_IP_NF_MATCH_TTL=m
-CONFIG_IP_NF_FILTER=m
-CONFIG_IP_NF_TARGET_REJECT=m
-CONFIG_IP_NF_TARGET_LOG=m
-CONFIG_IP_NF_TARGET_ULOG=m
-CONFIG_NF_NAT=m
-CONFIG_IP_NF_TARGET_MASQUERADE=m
-CONFIG_IP_NF_TARGET_NETMAP=m
-CONFIG_IP_NF_TARGET_REDIRECT=m
-CONFIG_NF_NAT_SNMP_BASIC=m
-CONFIG_IP_NF_MANGLE=m
-CONFIG_IP_NF_TARGET_CLUSTERIP=m
-CONFIG_IP_NF_TARGET_ECN=m
-CONFIG_IP_NF_TARGET_TTL=m
-CONFIG_IP_NF_RAW=m
-CONFIG_IP_NF_ARPTABLES=m
-CONFIG_IP_NF_ARPFILTER=m
-CONFIG_IP_NF_ARP_MANGLE=m
-CONFIG_NF_CONNTRACK_IPV6=m
-CONFIG_IP6_NF_QUEUE=m
-CONFIG_IP6_NF_IPTABLES=m
-CONFIG_IP6_NF_MATCH_AH=m
-CONFIG_IP6_NF_MATCH_EUI64=m
-CONFIG_IP6_NF_MATCH_FRAG=m
-CONFIG_IP6_NF_MATCH_OPTS=m
-CONFIG_IP6_NF_MATCH_HL=m
-CONFIG_IP6_NF_MATCH_IPV6HEADER=m
-CONFIG_IP6_NF_MATCH_MH=m
-CONFIG_IP6_NF_MATCH_RT=m
-CONFIG_IP6_NF_TARGET_HL=m
-CONFIG_IP6_NF_TARGET_LOG=m
-CONFIG_IP6_NF_FILTER=m
-CONFIG_IP6_NF_TARGET_REJECT=m
-CONFIG_IP6_NF_MANGLE=m
-CONFIG_IP6_NF_RAW=m
-CONFIG_BRIDGE_NF_EBTABLES=m
-CONFIG_BRIDGE_EBT_BROUTE=m
-CONFIG_BRIDGE_EBT_T_FILTER=m
-CONFIG_BRIDGE_EBT_T_NAT=m
-CONFIG_BRIDGE_EBT_802_3=m
-CONFIG_BRIDGE_EBT_AMONG=m
-CONFIG_BRIDGE_EBT_ARP=m
-CONFIG_BRIDGE_EBT_IP=m
-CONFIG_BRIDGE_EBT_LIMIT=m
-CONFIG_BRIDGE_EBT_MARK=m
-CONFIG_BRIDGE_EBT_PKTTYPE=m
-CONFIG_BRIDGE_EBT_STP=m
-CONFIG_BRIDGE_EBT_VLAN=m
-CONFIG_BRIDGE_EBT_ARPREPLY=m
-CONFIG_BRIDGE_EBT_DNAT=m
-CONFIG_BRIDGE_EBT_MARK_T=m
-CONFIG_BRIDGE_EBT_REDIRECT=m
-CONFIG_BRIDGE_EBT_SNAT=m
-CONFIG_BRIDGE_EBT_LOG=m
-CONFIG_BRIDGE_EBT_ULOG=m
-CONFIG_IP_DCCP=m
-CONFIG_TIPC=m
-CONFIG_TIPC_ADVANCED=y
-CONFIG_ATM=m
-CONFIG_ATM_CLIP=m
-CONFIG_ATM_LANE=m
-CONFIG_ATM_MPOA=m
-CONFIG_ATM_BR2684=m
-CONFIG_BRIDGE=m
-CONFIG_VLAN_8021Q=m
+CONFIG_VLAN_8021Q=y
 CONFIG_NET_SCHED=y
-CONFIG_NET_SCH_CBQ=m
-CONFIG_NET_SCH_HTB=m
-CONFIG_NET_SCH_HFSC=m
-CONFIG_NET_SCH_ATM=m
-CONFIG_NET_SCH_PRIO=m
-CONFIG_NET_SCH_RED=m
-CONFIG_NET_SCH_SFQ=m
-CONFIG_NET_SCH_TEQL=m
-CONFIG_NET_SCH_TBF=m
-CONFIG_NET_SCH_GRED=m
-CONFIG_NET_SCH_DSMARK=m
-CONFIG_NET_SCH_NETEM=m
-CONFIG_NET_SCH_INGRESS=m
-CONFIG_NET_CLS_BASIC=m
-CONFIG_NET_CLS_TCINDEX=m
-CONFIG_NET_CLS_ROUTE4=m
-CONFIG_NET_CLS_FW=m
-CONFIG_NET_CLS_U32=m
-CONFIG_CLS_U32_PERF=y
-CONFIG_CLS_U32_MARK=y
-CONFIG_NET_CLS_RSVP=m
-CONFIG_NET_CLS_RSVP6=m
-CONFIG_NET_EMATCH=y
-CONFIG_NET_EMATCH_CMP=m
-CONFIG_NET_EMATCH_NBYTE=m
-CONFIG_NET_EMATCH_U32=m
-CONFIG_NET_EMATCH_META=m
-CONFIG_NET_EMATCH_TEXT=m
-CONFIG_NET_CLS_ACT=y
-CONFIG_NET_ACT_POLICE=m
-CONFIG_NET_ACT_GACT=m
-CONFIG_GACT_PROB=y
-CONFIG_NET_ACT_MIRRED=m
-CONFIG_NET_ACT_IPT=m
-CONFIG_NET_ACT_NAT=m
-CONFIG_NET_ACT_PEDIT=m
-CONFIG_NET_ACT_SIMP=m
-CONFIG_NET_CLS_IND=y
-CONFIG_NET_PKTGEN=m
-CONFIG_BT=m
-CONFIG_BT_HCIUART=m
-CONFIG_BT_HCIUART_H4=y
-CONFIG_BT_HCIUART_BCSP=y
-CONFIG_BT_HCIUART_LL=y
-CONFIG_BT_HCIBCM203X=m
-CONFIG_BT_HCIBPA10X=m
-CONFIG_BT_HCIBFUSB=m
-CONFIG_BT_HCIVHCI=m
-CONFIG_CFG80211=m
-CONFIG_MAC80211=m
-CONFIG_MAC80211_RC_PID=y
-CONFIG_MAC80211_RC_DEFAULT_PID=y
-CONFIG_MAC80211_MESH=y
-CONFIG_RFKILL=m
-CONFIG_RFKILL_INPUT=y
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
-CONFIG_FW_LOADER=m
-CONFIG_CONNECTOR=m
+CONFIG_NET_SCH_FQ_CODEL=y
+CONFIG_HAMRADIO=y
+CONFIG_CFG80211=y
+CONFIG_MAC80211=y
 CONFIG_MTD=y
-CONFIG_MTD_CONCAT=y
-CONFIG_MTD_PARTITIONS=y
-CONFIG_MTD_CHAR=y
+CONFIG_MTD_BCM47XX_PARTS=y
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_CFI=y
 CONFIG_MTD_CFI_INTELEXT=y
 CONFIG_MTD_CFI_AMDSTD=y
-CONFIG_MTD_CFI_STAA=y
-CONFIG_MTD_RAM=y
-CONFIG_MTD_ROM=y
-CONFIG_MTD_ABSENT=y
+CONFIG_MTD_COMPLEX_MAPPINGS=y
 CONFIG_MTD_PHYSMAP=y
-CONFIG_BLK_DEV_LOOP=m
-CONFIG_BLK_DEV_CRYPTOLOOP=m
-CONFIG_BLK_DEV_NBD=m
-CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_RAM_SIZE=16384
-CONFIG_ATA_OVER_ETH=m
-CONFIG_RAID_ATTRS=m
-CONFIG_SCSI=y
-CONFIG_SCSI_TGT=m
-CONFIG_BLK_DEV_SD=y
-CONFIG_CHR_DEV_ST=m
-CONFIG_CHR_DEV_OSST=m
-CONFIG_BLK_DEV_SR=m
-CONFIG_BLK_DEV_SR_VENDOR=y
-CONFIG_CHR_DEV_SG=m
-CONFIG_CHR_DEV_SCH=m
-CONFIG_SCSI_MULTI_LUN=y
-CONFIG_SCSI_CONSTANTS=y
-CONFIG_SCSI_LOGGING=y
-CONFIG_SCSI_SCAN_ASYNC=y
-CONFIG_ISCSI_TCP=m
+CONFIG_MTD_BCM47XXSFLASH=y
+CONFIG_MTD_NAND=y
+CONFIG_MTD_NAND_BCM47XXNFLASH=y
 CONFIG_NETDEVICES=y
-CONFIG_DUMMY=m
-CONFIG_EQUALIZER=m
-CONFIG_TUN=m
-CONFIG_VETH=m
-CONFIG_PHYLIB=m
-CONFIG_MARVELL_PHY=m
-CONFIG_DAVICOM_PHY=m
-CONFIG_QSEMI_PHY=m
-CONFIG_LXT_PHY=m
-CONFIG_CICADA_PHY=m
-CONFIG_VITESSE_PHY=m
-CONFIG_SMSC_PHY=m
-CONFIG_BROADCOM_PHY=m
-CONFIG_ICPLUS_PHY=m
-CONFIG_MDIO_BITBANG=m
-CONFIG_NET_ETHERNET=y
-CONFIG_NET_PCI=y
 CONFIG_B44=y
-# CONFIG_NETDEV_1000 is not set
-# CONFIG_NETDEV_10000 is not set
-CONFIG_ATH_COMMON=m
-CONFIG_ATH5K=m
-CONFIG_B43=m
-CONFIG_B43LEGACY=m
-CONFIG_ZD1211RW=m
-CONFIG_USB_CATC=m
-CONFIG_USB_KAWETH=m
-CONFIG_USB_PEGASUS=m
-CONFIG_USB_RTL8150=m
-CONFIG_USB_USBNET=m
-CONFIG_USB_NET_DM9601=m
-CONFIG_USB_NET_GL620A=m
-CONFIG_USB_NET_PLUSB=m
-CONFIG_USB_NET_MCS7830=m
-CONFIG_USB_NET_RNDIS_HOST=m
-CONFIG_USB_ALI_M5632=y
-CONFIG_USB_AN2720=y
-CONFIG_USB_EPSON2888=y
-CONFIG_USB_KC2190=y
-CONFIG_USB_SIERRA_NET=m
-CONFIG_ATM_DUMMY=m
-CONFIG_ATM_TCP=m
-CONFIG_PPP=m
-CONFIG_PPP_ASYNC=m
-CONFIG_PPP_DEFLATE=m
-CONFIG_PPP_BSDCOMP=m
-CONFIG_PPP_MPPE=m
-CONFIG_PPPOE=m
-CONFIG_PPPOATM=m
-CONFIG_SLIP=m
-CONFIG_INPUT_EVDEV=m
-# CONFIG_INPUT_KEYBOARD is not set
-# CONFIG_INPUT_MOUSE is not set
-# CONFIG_SERIO is not set
-# CONFIG_VT is not set
+CONFIG_TIGON3=y
+CONFIG_BGMAC=y
+CONFIG_ATH_CARDS=y
+CONFIG_ATH5K=y
+CONFIG_B43=y
+CONFIG_B43LEGACY=y
+CONFIG_BRCMSMAC=y
+CONFIG_ISDN=y
 CONFIG_SERIAL_8250=y
+# CONFIG_SERIAL_8250_DEPRECATED_OPTIONS is not set
 CONFIG_SERIAL_8250_CONSOLE=y
 # CONFIG_SERIAL_8250_PCI is not set
 CONFIG_SERIAL_8250_NR_UARTS=2
 CONFIG_SERIAL_8250_RUNTIME_UARTS=2
-# CONFIG_LEGACY_PTYS is not set
-# CONFIG_HW_RANDOM is not set
-CONFIG_W1=m
-CONFIG_W1_MASTER_MATROX=m
-CONFIG_W1_MASTER_DS2490=m
-CONFIG_W1_SLAVE_THERM=m
-CONFIG_W1_SLAVE_SMEM=m
-CONFIG_W1_SLAVE_DS2433=m
-CONFIG_W1_SLAVE_DS2760=m
-# CONFIG_HWMON is not set
-CONFIG_THERMAL=y
+CONFIG_SERIAL_8250_EXTENDED=y
+CONFIG_SERIAL_8250_SHARE_IRQ=y
+CONFIG_HW_RANDOM=y
+CONFIG_GPIO_SYSFS=y
 CONFIG_WATCHDOG=y
-CONFIG_WATCHDOG_NOWAYOUT=y
 CONFIG_BCM47XX_WDT=y
+CONFIG_SSB_DEBUG=y
 CONFIG_SSB_DRIVER_GIGE=y
-CONFIG_DISPLAY_SUPPORT=m
-CONFIG_SOUND=m
-CONFIG_SND=m
-CONFIG_SND_SEQUENCER=m
-CONFIG_SND_SEQ_DUMMY=m
-CONFIG_SND_MIXER_OSS=m
-CONFIG_SND_PCM_OSS=m
-CONFIG_SND_SEQUENCER_OSS=y
-CONFIG_SND_DUMMY=m
-CONFIG_SND_VIRMIDI=m
-CONFIG_SND_USB_AUDIO=m
-CONFIG_HID=m
-CONFIG_USB_HID=m
-CONFIG_USB_HIDDEV=y
+CONFIG_BCMA_DRIVER_GMAC_CMN=y
 CONFIG_USB=y
-CONFIG_USB_DEVICEFS=y
-# CONFIG_USB_DEVICE_CLASS is not set
-CONFIG_USB_EHCI_HCD=y
-CONFIG_USB_EHCI_ROOT_HUB_TT=y
-CONFIG_USB_OHCI_HCD=y
-CONFIG_USB_U132_HCD=m
-CONFIG_USB_R8A66597_HCD=m
-CONFIG_USB_ACM=m
-CONFIG_USB_PRINTER=m
-CONFIG_USB_STORAGE=y
-CONFIG_USB_STORAGE_DATAFAB=y
-CONFIG_USB_STORAGE_FREECOM=y
-CONFIG_USB_STORAGE_USBAT=y
-CONFIG_USB_STORAGE_SDDR09=y
-CONFIG_USB_STORAGE_SDDR55=y
-CONFIG_USB_STORAGE_JUMPSHOT=y
-CONFIG_USB_STORAGE_ALAUDA=y
-CONFIG_USB_STORAGE_ONETOUCH=y
-CONFIG_USB_STORAGE_KARMA=y
-CONFIG_USB_MDC800=m
-CONFIG_USB_MICROTEK=m
-CONFIG_USB_SERIAL=m
-CONFIG_USB_SERIAL_GENERIC=y
-CONFIG_USB_SERIAL_AIRCABLE=m
-CONFIG_USB_SERIAL_ARK3116=m
-CONFIG_USB_SERIAL_BELKIN=m
-CONFIG_USB_SERIAL_CH341=m
-CONFIG_USB_SERIAL_DIGI_ACCELEPORT=m
-CONFIG_USB_SERIAL_CYPRESS_M8=m
-CONFIG_USB_SERIAL_EMPEG=m
-CONFIG_USB_SERIAL_FTDI_SIO=m
-CONFIG_USB_SERIAL_FUNSOFT=m
-CONFIG_USB_SERIAL_VISOR=m
-CONFIG_USB_SERIAL_IPAQ=m
-CONFIG_USB_SERIAL_IR=m
-CONFIG_USB_SERIAL_GARMIN=m
-CONFIG_USB_SERIAL_IPW=m
-CONFIG_USB_SERIAL_KEYSPAN_PDA=m
-CONFIG_USB_SERIAL_KLSI=m
-CONFIG_USB_SERIAL_KOBIL_SCT=m
-CONFIG_USB_SERIAL_MCT_U232=m
-CONFIG_USB_SERIAL_MOS7720=m
-CONFIG_USB_SERIAL_MOS7840=m
-CONFIG_USB_SERIAL_NAVMAN=m
-CONFIG_USB_SERIAL_PL2303=m
-CONFIG_USB_SERIAL_OTI6858=m
-CONFIG_USB_SERIAL_HP4X=m
-CONFIG_USB_SERIAL_SAFE=m
-CONFIG_USB_SERIAL_SIERRAWIRELESS=m
-CONFIG_USB_SERIAL_CYBERJACK=m
-CONFIG_USB_SERIAL_XIRCOM=m
-CONFIG_USB_SERIAL_OPTION=m
-CONFIG_USB_SERIAL_OMNINET=m
-CONFIG_USB_SERIAL_DEBUG=m
-CONFIG_USB_ADUTUX=m
-CONFIG_USB_RIO500=m
-CONFIG_USB_LEGOTOWER=m
-CONFIG_USB_LCD=m
-CONFIG_USB_LED=m
-CONFIG_USB_CYPRESS_CY7C63=m
-CONFIG_USB_CYTHERM=m
-CONFIG_USB_IDMOUSE=m
-CONFIG_USB_FTDI_ELAN=m
-CONFIG_USB_SISUSBVGA=m
-CONFIG_USB_LD=m
-CONFIG_USB_TRANCEVIBRATOR=m
-CONFIG_USB_IOWARRIOR=m
-CONFIG_USB_TEST=m
-CONFIG_USB_ATM=m
-CONFIG_USB_SPEEDTOUCH=m
-CONFIG_USB_CXACRU=m
-CONFIG_USB_UEAGLEATM=m
-CONFIG_USB_XUSBATM=m
-CONFIG_USB_GADGET=m
-CONFIG_USB_GADGET_NET2280=y
-CONFIG_USB_ZERO=m
-CONFIG_USB_ETH=m
-CONFIG_USB_GADGETFS=m
-CONFIG_USB_MASS_STORAGE=m
-CONFIG_USB_G_SERIAL=m
-CONFIG_USB_MIDI_GADGET=m
-CONFIG_LEDS_CLASS=y
-CONFIG_LEDS_GPIO=y
+CONFIG_USB_HCD_BCMA=y
+CONFIG_USB_HCD_SSB=y
 CONFIG_LEDS_TRIGGER_TIMER=y
-CONFIG_LEDS_TRIGGER_HEARTBEAT=y
 CONFIG_LEDS_TRIGGER_DEFAULT_ON=y
-CONFIG_EXT2_FS=y
-CONFIG_EXT2_FS_XATTR=y
-CONFIG_EXT2_FS_POSIX_ACL=y
-CONFIG_EXT2_FS_SECURITY=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-CONFIG_EXT3_FS_POSIX_ACL=y
-CONFIG_EXT3_FS_SECURITY=y
-CONFIG_REISERFS_FS=m
-CONFIG_REISERFS_FS_XATTR=y
-CONFIG_REISERFS_FS_POSIX_ACL=y
-CONFIG_REISERFS_FS_SECURITY=y
-CONFIG_JFS_FS=m
-CONFIG_JFS_POSIX_ACL=y
-CONFIG_JFS_SECURITY=y
-CONFIG_XFS_FS=m
-CONFIG_XFS_QUOTA=y
-CONFIG_XFS_POSIX_ACL=y
-CONFIG_XFS_RT=y
-CONFIG_GFS2_FS=m
-CONFIG_QUOTA=y
-CONFIG_QUOTA_NETLINK_INTERFACE=y
-CONFIG_QFMT_V1=m
-CONFIG_QFMT_V2=m
-CONFIG_AUTOFS_FS=m
-CONFIG_AUTOFS4_FS=m
-CONFIG_FUSE_FS=m
-CONFIG_ISO9660_FS=m
-CONFIG_JOLIET=y
-CONFIG_ZISOFS=y
-CONFIG_UDF_FS=m
-CONFIG_MSDOS_FS=m
-CONFIG_VFAT_FS=m
-CONFIG_NTFS_FS=m
-CONFIG_NTFS_RW=y
-CONFIG_PROC_KCORE=y
-CONFIG_TMPFS=y
-CONFIG_TMPFS_POSIX_ACL=y
-CONFIG_ADFS_FS=m
-CONFIG_AFFS_FS=m
-CONFIG_HFS_FS=m
-CONFIG_HFSPLUS_FS=m
-CONFIG_BEFS_FS=m
-CONFIG_BFS_FS=m
-CONFIG_EFS_FS=m
-CONFIG_JFFS2_FS=m
-CONFIG_JFFS2_FS_XATTR=y
-CONFIG_CRAMFS=m
-CONFIG_VXFS_FS=m
-CONFIG_MINIX_FS=m
-CONFIG_HPFS_FS=m
-CONFIG_QNX4FS_FS=m
-CONFIG_ROMFS_FS=m
-CONFIG_SYSV_FS=m
-CONFIG_UFS_FS=m
-CONFIG_NFS_FS=m
-CONFIG_NFS_V3=y
-CONFIG_NFS_V3_ACL=y
-CONFIG_NFS_V4=y
-CONFIG_NFSD=m
-CONFIG_NFSD_V3_ACL=y
-CONFIG_NFSD_V4=y
-CONFIG_RPCSEC_GSS_SPKM3=m
-CONFIG_CIFS=m
-CONFIG_CIFS_XATTR=y
-CONFIG_CIFS_POSIX=y
-CONFIG_NCP_FS=m
-CONFIG_NCPFS_NFS_NS=y
-CONFIG_NCPFS_OS2_NS=y
-CONFIG_NCPFS_NLS=y
-CONFIG_NCPFS_EXTRAS=y
-CONFIG_CODA_FS=m
-CONFIG_PARTITION_ADVANCED=y
-CONFIG_KARMA_PARTITION=y
-CONFIG_NLS_CODEPAGE_437=m
-CONFIG_NLS_CODEPAGE_737=m
-CONFIG_NLS_CODEPAGE_775=m
-CONFIG_NLS_CODEPAGE_850=m
-CONFIG_NLS_CODEPAGE_852=m
-CONFIG_NLS_CODEPAGE_855=m
-CONFIG_NLS_CODEPAGE_857=m
-CONFIG_NLS_CODEPAGE_860=m
-CONFIG_NLS_CODEPAGE_861=m
-CONFIG_NLS_CODEPAGE_862=m
-CONFIG_NLS_CODEPAGE_863=m
-CONFIG_NLS_CODEPAGE_864=m
-CONFIG_NLS_CODEPAGE_865=m
-CONFIG_NLS_CODEPAGE_866=m
-CONFIG_NLS_CODEPAGE_869=m
-CONFIG_NLS_CODEPAGE_936=m
-CONFIG_NLS_CODEPAGE_950=m
-CONFIG_NLS_CODEPAGE_932=m
-CONFIG_NLS_CODEPAGE_949=m
-CONFIG_NLS_CODEPAGE_874=m
-CONFIG_NLS_ISO8859_8=m
-CONFIG_NLS_CODEPAGE_1250=m
-CONFIG_NLS_CODEPAGE_1251=m
-CONFIG_NLS_ASCII=m
-CONFIG_NLS_ISO8859_1=m
-CONFIG_NLS_ISO8859_2=m
-CONFIG_NLS_ISO8859_3=m
-CONFIG_NLS_ISO8859_4=m
-CONFIG_NLS_ISO8859_5=m
-CONFIG_NLS_ISO8859_6=m
-CONFIG_NLS_ISO8859_7=m
-CONFIG_NLS_ISO8859_9=m
-CONFIG_NLS_ISO8859_13=m
-CONFIG_NLS_ISO8859_14=m
-CONFIG_NLS_ISO8859_15=m
-CONFIG_NLS_KOI8_R=m
-CONFIG_NLS_KOI8_U=m
-CONFIG_DLM=m
-CONFIG_DLM_DEBUG=y
+CONFIG_PRINTK_TIME=y
+CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_INFO_REDUCED=y
+CONFIG_STRIP_ASM_SYMS=y
 CONFIG_DEBUG_FS=y
-CONFIG_CRYPTO_NULL=m
-CONFIG_CRYPTO_TEST=m
-CONFIG_CRYPTO_LRW=m
-CONFIG_CRYPTO_PCBC=m
-CONFIG_CRYPTO_XTS=m
-CONFIG_CRYPTO_HMAC=y
-CONFIG_CRYPTO_XCBC=m
-CONFIG_CRYPTO_MD4=m
-CONFIG_CRYPTO_MD5=y
-CONFIG_CRYPTO_MICHAEL_MIC=m
-CONFIG_CRYPTO_SHA256=m
-CONFIG_CRYPTO_SHA512=m
-CONFIG_CRYPTO_TGR192=m
-CONFIG_CRYPTO_WP512=m
-CONFIG_CRYPTO_ANUBIS=m
-CONFIG_CRYPTO_BLOWFISH=m
-CONFIG_CRYPTO_CAMELLIA=m
-CONFIG_CRYPTO_CAST6=m
-CONFIG_CRYPTO_FCRYPT=m
-CONFIG_CRYPTO_KHAZAD=m
-CONFIG_CRYPTO_SEED=m
-CONFIG_CRYPTO_SERPENT=m
-CONFIG_CRYPTO_TEA=m
-CONFIG_CRYPTO_TWOFISH=m
-CONFIG_CRC16=m
-CONFIG_CRC7=m
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_CMDLINE_BOOL=y
+CONFIG_CMDLINE="console=ttyS0,115200"
+CONFIG_CRC32_SARWATE=y
diff --git a/arch/mips/configs/bcm63xx_defconfig b/arch/mips/configs/bcm63xx_defconfig
index 9190051..3fec264 100644
--- a/arch/mips/configs/bcm63xx_defconfig
+++ b/arch/mips/configs/bcm63xx_defconfig
@@ -44,7 +44,6 @@
 # CONFIG_STANDALONE is not set
 # CONFIG_PREVENT_FIRMWARE_BUILD is not set
 CONFIG_MTD=y
-CONFIG_MTD_PARTITIONS=y
 CONFIG_MTD_CFI=y
 CONFIG_MTD_CFI_INTELEXT=y
 CONFIG_MTD_CFI_AMDSTD=y
diff --git a/arch/mips/configs/cobalt_defconfig b/arch/mips/configs/cobalt_defconfig
index 5419adb..23b6693 100644
--- a/arch/mips/configs/cobalt_defconfig
+++ b/arch/mips/configs/cobalt_defconfig
@@ -19,7 +19,6 @@
 # CONFIG_IPV6 is not set
 CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 CONFIG_MTD=y
-CONFIG_MTD_PARTITIONS=y
 CONFIG_MTD_CHAR=y
 CONFIG_MTD_BLKDEVS=y
 CONFIG_MTD_JEDECPROBE=y
diff --git a/arch/mips/configs/gpr_defconfig b/arch/mips/configs/gpr_defconfig
index fb64589..8f219da 100644
--- a/arch/mips/configs/gpr_defconfig
+++ b/arch/mips/configs/gpr_defconfig
@@ -165,7 +165,6 @@
 CONFIG_CFG80211=y
 CONFIG_MAC80211=y
 CONFIG_MTD=y
-CONFIG_MTD_PARTITIONS=y
 CONFIG_MTD_CHAR=y
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_CFI=y
diff --git a/arch/mips/configs/jmr3927_defconfig b/arch/mips/configs/jmr3927_defconfig
index db5705e..9bc08f2 100644
--- a/arch/mips/configs/jmr3927_defconfig
+++ b/arch/mips/configs/jmr3927_defconfig
@@ -22,7 +22,6 @@
 # CONFIG_INET_DIAG is not set
 # CONFIG_IPV6 is not set
 CONFIG_MTD=y
-CONFIG_MTD_PARTITIONS=y
 CONFIG_MTD_CMDLINE_PARTS=y
 CONFIG_MTD_CHAR=y
 CONFIG_MTD_CFI=y
diff --git a/arch/mips/configs/lasat_defconfig b/arch/mips/configs/lasat_defconfig
index d9f3db2..0179c7f 100644
--- a/arch/mips/configs/lasat_defconfig
+++ b/arch/mips/configs/lasat_defconfig
@@ -31,7 +31,6 @@
 # CONFIG_INET_DIAG is not set
 # CONFIG_IPV6 is not set
 CONFIG_MTD=y
-CONFIG_MTD_PARTITIONS=y
 CONFIG_MTD_CHAR=y
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_CFI=y
diff --git a/arch/mips/configs/maltasmvp_defconfig b/arch/mips/configs/maltasmvp_defconfig
index 8a66602..d759318 100644
--- a/arch/mips/configs/maltasmvp_defconfig
+++ b/arch/mips/configs/maltasmvp_defconfig
@@ -4,7 +4,7 @@
 CONFIG_MIPS_MT_SMP=y
 CONFIG_SCHED_SMT=y
 CONFIG_MIPS_CMP=y
-CONFIG_NR_CPUS=8
+CONFIG_NR_CPUS=2
 CONFIG_HZ_100=y
 CONFIG_LOCALVERSION="cmp"
 CONFIG_SYSVIPC=y
@@ -58,7 +58,6 @@
 CONFIG_DEV_APPLETALK=m
 CONFIG_IPDDP=m
 CONFIG_IPDDP_ENCAP=y
-CONFIG_IPDDP_DECAP=y
 CONFIG_NET_SCHED=y
 CONFIG_NET_SCH_CBQ=m
 CONFIG_NET_SCH_HTB=m
diff --git a/arch/mips/configs/markeins_defconfig b/arch/mips/configs/markeins_defconfig
index 636f82b..4c2c0c4 100644
--- a/arch/mips/configs/markeins_defconfig
+++ b/arch/mips/configs/markeins_defconfig
@@ -124,7 +124,6 @@
 CONFIG_IP6_NF_RAW=m
 CONFIG_FW_LOADER=m
 CONFIG_MTD=y
-CONFIG_MTD_PARTITIONS=y
 CONFIG_MTD_CMDLINE_PARTS=y
 CONFIG_MTD_CHAR=y
 CONFIG_MTD_BLOCK=y
diff --git a/arch/mips/configs/mtx1_defconfig b/arch/mips/configs/mtx1_defconfig
index 9fa8f16..593946a 100644
--- a/arch/mips/configs/mtx1_defconfig
+++ b/arch/mips/configs/mtx1_defconfig
@@ -246,7 +246,6 @@
 CONFIG_BT_HCIVHCI=m
 CONFIG_CONNECTOR=m
 CONFIG_MTD=y
-CONFIG_MTD_PARTITIONS=y
 CONFIG_MTD_CHAR=y
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_CFI=y
diff --git a/arch/mips/configs/pnx8335_stb225_defconfig b/arch/mips/configs/pnx8335_stb225_defconfig
index f292576..c887066 100644
--- a/arch/mips/configs/pnx8335_stb225_defconfig
+++ b/arch/mips/configs/pnx8335_stb225_defconfig
@@ -31,7 +31,6 @@
 # CONFIG_IPV6 is not set
 CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 CONFIG_MTD=y
-CONFIG_MTD_PARTITIONS=y
 CONFIG_MTD_CMDLINE_PARTS=y
 CONFIG_MTD_CHAR=y
 CONFIG_MTD_BLOCK=y
diff --git a/arch/mips/configs/qi_lb60_defconfig b/arch/mips/configs/qi_lb60_defconfig
new file mode 100644
index 0000000..2b96547
--- /dev/null
+++ b/arch/mips/configs/qi_lb60_defconfig
@@ -0,0 +1,188 @@
+CONFIG_MACH_JZ4740=y
+# CONFIG_COMPACTION is not set
+# CONFIG_CROSS_MEMORY_ATTACH is not set
+CONFIG_HZ_100=y
+CONFIG_PREEMPT=y
+# CONFIG_SECCOMP is not set
+# CONFIG_LOCALVERSION_AUTO is not set
+CONFIG_SYSVIPC=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_SYSCTL_SYSCALL=y
+CONFIG_KALLSYMS_ALL=y
+CONFIG_EMBEDDED=y
+# CONFIG_VM_EVENT_COUNTERS is not set
+# CONFIG_COMPAT_BRK is not set
+CONFIG_SLAB=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_EFI_PARTITION is not set
+# CONFIG_IOSCHED_CFQ is not set
+# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_ROUTE_MULTIPATH=y
+CONFIG_IP_ROUTE_VERBOSE=y
+CONFIG_IP_MROUTE=y
+CONFIG_IP_MROUTE_MULTIPLE_TABLES=y
+# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
+# CONFIG_INET_XFRM_MODE_TUNNEL is not set
+# CONFIG_INET_XFRM_MODE_BEET is not set
+# CONFIG_INET_LRO is not set
+# CONFIG_INET_DIAG is not set
+CONFIG_TCP_CONG_ADVANCED=y
+# CONFIG_TCP_CONG_BIC is not set
+# CONFIG_TCP_CONG_CUBIC is not set
+CONFIG_TCP_CONG_WESTWOOD=y
+# CONFIG_TCP_CONG_HTCP is not set
+# CONFIG_IPV6 is not set
+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
+# CONFIG_FIRMWARE_IN_KERNEL is not set
+CONFIG_MTD=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_NAND=y
+CONFIG_MTD_NAND_JZ4740=y
+CONFIG_MTD_UBI=y
+CONFIG_NETDEVICES=y
+# CONFIG_WLAN is not set
+# CONFIG_INPUT_MOUSEDEV is not set
+CONFIG_INPUT_EVDEV=y
+# CONFIG_KEYBOARD_ATKBD is not set
+CONFIG_KEYBOARD_GPIO=y
+CONFIG_KEYBOARD_MATRIX=y
+# CONFIG_INPUT_MOUSE is not set
+CONFIG_INPUT_MISC=y
+# CONFIG_SERIO is not set
+CONFIG_LEGACY_PTY_COUNT=2
+# CONFIG_DEVKMEM is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+# CONFIG_SERIAL_8250_DMA is not set
+CONFIG_SERIAL_8250_NR_UARTS=2
+CONFIG_SERIAL_8250_RUNTIME_UARTS=2
+# CONFIG_HW_RANDOM is not set
+CONFIG_SPI=y
+CONFIG_SPI_GPIO=y
+CONFIG_POWER_SUPPLY=y
+CONFIG_BATTERY_JZ4740=y
+CONFIG_CHARGER_GPIO=y
+# CONFIG_HWMON is not set
+CONFIG_MFD_JZ4740_ADC=y
+CONFIG_REGULATOR=y
+CONFIG_REGULATOR_FIXED_VOLTAGE=y
+CONFIG_FB=y
+CONFIG_FB_JZ4740=y
+CONFIG_BACKLIGHT_LCD_SUPPORT=y
+CONFIG_LCD_CLASS_DEVICE=y
+# CONFIG_BACKLIGHT_CLASS_DEVICE is not set
+# CONFIG_VGA_CONSOLE is not set
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_LOGO=y
+# CONFIG_LOGO_LINUX_MONO is not set
+# CONFIG_LOGO_LINUX_VGA16 is not set
+# CONFIG_LOGO_LINUX_CLUT224 is not set
+CONFIG_SOUND=y
+CONFIG_SND=y
+# CONFIG_SND_SUPPORT_OLD_API is not set
+# CONFIG_SND_VERBOSE_PROCFS is not set
+# CONFIG_SND_DRIVERS is not set
+# CONFIG_SND_SPI is not set
+# CONFIG_SND_MIPS is not set
+CONFIG_SND_SOC=y
+CONFIG_SND_JZ4740_SOC=y
+CONFIG_SND_JZ4740_SOC_QI_LB60=y
+CONFIG_USB=y
+CONFIG_USB_OTG_BLACKLIST_HUB=y
+CONFIG_USB_MUSB_HDRC=y
+CONFIG_USB_MUSB_GADGET=y
+CONFIG_USB_MUSB_JZ4740=y
+CONFIG_NOP_USB_XCEIV=y
+CONFIG_USB_GADGET=y
+CONFIG_USB_GADGET_DEBUG=y
+CONFIG_USB_ETH=y
+# CONFIG_USB_ETH_RNDIS is not set
+CONFIG_MMC=y
+CONFIG_MMC_UNSAFE_RESUME=y
+# CONFIG_MMC_BLOCK_BOUNCE is not set
+CONFIG_MMC_JZ4740=y
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_DRV_JZ4740=y
+CONFIG_DMADEVICES=y
+CONFIG_DMA_JZ4740=y
+CONFIG_PWM=y
+CONFIG_PWM_JZ4740=y
+CONFIG_EXT2_FS=y
+CONFIG_EXT3_FS=y
+# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
+# CONFIG_EXT3_FS_XATTR is not set
+# CONFIG_DNOTIFY is not set
+CONFIG_VFAT_FS=y
+CONFIG_PROC_KCORE=y
+# CONFIG_PROC_PAGE_MONITOR is not set
+CONFIG_TMPFS=y
+CONFIG_JFFS2_FS=y
+CONFIG_JFFS2_SUMMARY=y
+CONFIG_JFFS2_COMPRESSION_OPTIONS=y
+# CONFIG_JFFS2_ZLIB is not set
+CONFIG_UBIFS_FS=y
+CONFIG_UBIFS_FS_ADVANCED_COMPR=y
+# CONFIG_NETWORK_FILESYSTEMS is not set
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_CODEPAGE_737=y
+CONFIG_NLS_CODEPAGE_775=y
+CONFIG_NLS_CODEPAGE_850=y
+CONFIG_NLS_CODEPAGE_852=y
+CONFIG_NLS_CODEPAGE_855=y
+CONFIG_NLS_CODEPAGE_857=y
+CONFIG_NLS_CODEPAGE_860=y
+CONFIG_NLS_CODEPAGE_861=y
+CONFIG_NLS_CODEPAGE_862=y
+CONFIG_NLS_CODEPAGE_863=y
+CONFIG_NLS_CODEPAGE_864=y
+CONFIG_NLS_CODEPAGE_865=y
+CONFIG_NLS_CODEPAGE_866=y
+CONFIG_NLS_CODEPAGE_869=y
+CONFIG_NLS_CODEPAGE_936=y
+CONFIG_NLS_CODEPAGE_950=y
+CONFIG_NLS_CODEPAGE_932=y
+CONFIG_NLS_CODEPAGE_949=y
+CONFIG_NLS_CODEPAGE_874=y
+CONFIG_NLS_ISO8859_8=y
+CONFIG_NLS_CODEPAGE_1250=y
+CONFIG_NLS_CODEPAGE_1251=y
+CONFIG_NLS_ASCII=y
+CONFIG_NLS_ISO8859_1=y
+CONFIG_NLS_ISO8859_2=y
+CONFIG_NLS_ISO8859_3=y
+CONFIG_NLS_ISO8859_4=y
+CONFIG_NLS_ISO8859_5=y
+CONFIG_NLS_ISO8859_6=y
+CONFIG_NLS_ISO8859_7=y
+CONFIG_NLS_ISO8859_9=y
+CONFIG_NLS_ISO8859_13=y
+CONFIG_NLS_ISO8859_14=y
+CONFIG_NLS_ISO8859_15=y
+CONFIG_NLS_KOI8_R=y
+CONFIG_NLS_KOI8_U=y
+CONFIG_NLS_UTF8=y
+CONFIG_PRINTK_TIME=y
+CONFIG_DEBUG_INFO=y
+CONFIG_STRIP_ASM_SYMS=y
+CONFIG_READABLE_ASM=y
+CONFIG_DEBUG_KMEMLEAK=y
+CONFIG_DEBUG_MEMORY_INIT=y
+CONFIG_DEBUG_STACKOVERFLOW=y
+CONFIG_PANIC_ON_OOPS=y
+# CONFIG_FTRACE is not set
+CONFIG_KGDB=y
+CONFIG_RUNTIME_DEBUG=y
+CONFIG_CRYPTO_ZLIB=y
+# CONFIG_CRYPTO_ANSI_CPRNG is not set
+CONFIG_FONTS=y
+CONFIG_FONT_SUN8x16=y
diff --git a/arch/mips/configs/rb532_defconfig b/arch/mips/configs/rb532_defconfig
index b85b1213..5d9d708 100644
--- a/arch/mips/configs/rb532_defconfig
+++ b/arch/mips/configs/rb532_defconfig
@@ -114,7 +114,6 @@
 CONFIG_HAMRADIO=y
 CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 CONFIG_MTD=y
-CONFIG_MTD_PARTITIONS=y
 CONFIG_MTD_CHAR=y
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_BLOCK2MTD=y
diff --git a/arch/mips/configs/rbtx49xx_defconfig b/arch/mips/configs/rbtx49xx_defconfig
index 9cba856..f8bf9b4 100644
--- a/arch/mips/configs/rbtx49xx_defconfig
+++ b/arch/mips/configs/rbtx49xx_defconfig
@@ -35,7 +35,6 @@
 # CONFIG_IPV6 is not set
 # CONFIG_WIRELESS is not set
 CONFIG_MTD=y
-CONFIG_MTD_PARTITIONS=y
 CONFIG_MTD_CMDLINE_PARTS=y
 CONFIG_MTD_CHAR=y
 CONFIG_MTD_BLOCK=m
diff --git a/arch/mips/fw/arc/file.c b/arch/mips/fw/arc/file.c
index a8b0803..49fd3ff 100644
--- a/arch/mips/fw/arc/file.c
+++ b/arch/mips/fw/arc/file.c
@@ -8,7 +8,6 @@
  * Copyright (C) 1994, 1995, 1996, 1999 Ralf Baechle
  * Copyright (C) 1999 Silicon Graphics, Inc.
  */
-#include <linux/init.h>
 
 #include <asm/fw/arc/types.h>
 #include <asm/sgialib.h>
diff --git a/arch/mips/include/asm/amon.h b/arch/mips/include/asm/amon.h
index c3dc1a6..3cc03c6 100644
--- a/arch/mips/include/asm/amon.h
+++ b/arch/mips/include/asm/amon.h
@@ -1,7 +1,12 @@
 /*
- * Amon support
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2013 Imagination Technologies Ltd.
+ *
+ * Arbitrary Monitor Support (AMON)
  */
-
-int amon_cpu_avail(int);
-void amon_cpu_start(int, unsigned long, unsigned long,
-		    unsigned long, unsigned long);
+int amon_cpu_avail(int cpu);
+int amon_cpu_start(int cpu, unsigned long pc, unsigned long sp,
+		   unsigned long gp, unsigned long a0);
diff --git a/arch/mips/include/asm/asmmacro-32.h b/arch/mips/include/asm/asmmacro-32.h
index 2413afe..70e1f17 100644
--- a/arch/mips/include/asm/asmmacro-32.h
+++ b/arch/mips/include/asm/asmmacro-32.h
@@ -12,27 +12,6 @@
 #include <asm/fpregdef.h>
 #include <asm/mipsregs.h>
 
-	.macro	fpu_save_double thread status tmp1=t0
-	cfc1	\tmp1,  fcr31
-	sdc1	$f0,  THREAD_FPR0(\thread)
-	sdc1	$f2,  THREAD_FPR2(\thread)
-	sdc1	$f4,  THREAD_FPR4(\thread)
-	sdc1	$f6,  THREAD_FPR6(\thread)
-	sdc1	$f8,  THREAD_FPR8(\thread)
-	sdc1	$f10, THREAD_FPR10(\thread)
-	sdc1	$f12, THREAD_FPR12(\thread)
-	sdc1	$f14, THREAD_FPR14(\thread)
-	sdc1	$f16, THREAD_FPR16(\thread)
-	sdc1	$f18, THREAD_FPR18(\thread)
-	sdc1	$f20, THREAD_FPR20(\thread)
-	sdc1	$f22, THREAD_FPR22(\thread)
-	sdc1	$f24, THREAD_FPR24(\thread)
-	sdc1	$f26, THREAD_FPR26(\thread)
-	sdc1	$f28, THREAD_FPR28(\thread)
-	sdc1	$f30, THREAD_FPR30(\thread)
-	sw	\tmp1, THREAD_FCR31(\thread)
-	.endm
-
 	.macro	fpu_save_single thread tmp=t0
 	cfc1	\tmp,  fcr31
 	swc1	$f0,  THREAD_FPR0(\thread)
@@ -70,27 +49,6 @@
 	sw	\tmp, THREAD_FCR31(\thread)
 	.endm
 
-	.macro	fpu_restore_double thread status tmp=t0
-	lw	\tmp, THREAD_FCR31(\thread)
-	ldc1	$f0,  THREAD_FPR0(\thread)
-	ldc1	$f2,  THREAD_FPR2(\thread)
-	ldc1	$f4,  THREAD_FPR4(\thread)
-	ldc1	$f6,  THREAD_FPR6(\thread)
-	ldc1	$f8,  THREAD_FPR8(\thread)
-	ldc1	$f10, THREAD_FPR10(\thread)
-	ldc1	$f12, THREAD_FPR12(\thread)
-	ldc1	$f14, THREAD_FPR14(\thread)
-	ldc1	$f16, THREAD_FPR16(\thread)
-	ldc1	$f18, THREAD_FPR18(\thread)
-	ldc1	$f20, THREAD_FPR20(\thread)
-	ldc1	$f22, THREAD_FPR22(\thread)
-	ldc1	$f24, THREAD_FPR24(\thread)
-	ldc1	$f26, THREAD_FPR26(\thread)
-	ldc1	$f28, THREAD_FPR28(\thread)
-	ldc1	$f30, THREAD_FPR30(\thread)
-	ctc1	\tmp, fcr31
-	.endm
-
 	.macro	fpu_restore_single thread tmp=t0
 	lw	\tmp, THREAD_FCR31(\thread)
 	lwc1	$f0,  THREAD_FPR0(\thread)
diff --git a/arch/mips/include/asm/asmmacro-64.h b/arch/mips/include/asm/asmmacro-64.h
index 08a527d..38ea609 100644
--- a/arch/mips/include/asm/asmmacro-64.h
+++ b/arch/mips/include/asm/asmmacro-64.h
@@ -13,102 +13,6 @@
 #include <asm/fpregdef.h>
 #include <asm/mipsregs.h>
 
-	.macro	fpu_save_16even thread tmp=t0
-	cfc1	\tmp, fcr31
-	sdc1	$f0,  THREAD_FPR0(\thread)
-	sdc1	$f2,  THREAD_FPR2(\thread)
-	sdc1	$f4,  THREAD_FPR4(\thread)
-	sdc1	$f6,  THREAD_FPR6(\thread)
-	sdc1	$f8,  THREAD_FPR8(\thread)
-	sdc1	$f10, THREAD_FPR10(\thread)
-	sdc1	$f12, THREAD_FPR12(\thread)
-	sdc1	$f14, THREAD_FPR14(\thread)
-	sdc1	$f16, THREAD_FPR16(\thread)
-	sdc1	$f18, THREAD_FPR18(\thread)
-	sdc1	$f20, THREAD_FPR20(\thread)
-	sdc1	$f22, THREAD_FPR22(\thread)
-	sdc1	$f24, THREAD_FPR24(\thread)
-	sdc1	$f26, THREAD_FPR26(\thread)
-	sdc1	$f28, THREAD_FPR28(\thread)
-	sdc1	$f30, THREAD_FPR30(\thread)
-	sw	\tmp, THREAD_FCR31(\thread)
-	.endm
-
-	.macro	fpu_save_16odd thread
-	sdc1	$f1,  THREAD_FPR1(\thread)
-	sdc1	$f3,  THREAD_FPR3(\thread)
-	sdc1	$f5,  THREAD_FPR5(\thread)
-	sdc1	$f7,  THREAD_FPR7(\thread)
-	sdc1	$f9,  THREAD_FPR9(\thread)
-	sdc1	$f11, THREAD_FPR11(\thread)
-	sdc1	$f13, THREAD_FPR13(\thread)
-	sdc1	$f15, THREAD_FPR15(\thread)
-	sdc1	$f17, THREAD_FPR17(\thread)
-	sdc1	$f19, THREAD_FPR19(\thread)
-	sdc1	$f21, THREAD_FPR21(\thread)
-	sdc1	$f23, THREAD_FPR23(\thread)
-	sdc1	$f25, THREAD_FPR25(\thread)
-	sdc1	$f27, THREAD_FPR27(\thread)
-	sdc1	$f29, THREAD_FPR29(\thread)
-	sdc1	$f31, THREAD_FPR31(\thread)
-	.endm
-
-	.macro	fpu_save_double thread status tmp
-	sll	\tmp, \status, 5
-	bgez	\tmp, 2f
-	fpu_save_16odd \thread
-2:
-	fpu_save_16even \thread \tmp
-	.endm
-
-	.macro	fpu_restore_16even thread tmp=t0
-	lw	\tmp, THREAD_FCR31(\thread)
-	ldc1	$f0,  THREAD_FPR0(\thread)
-	ldc1	$f2,  THREAD_FPR2(\thread)
-	ldc1	$f4,  THREAD_FPR4(\thread)
-	ldc1	$f6,  THREAD_FPR6(\thread)
-	ldc1	$f8,  THREAD_FPR8(\thread)
-	ldc1	$f10, THREAD_FPR10(\thread)
-	ldc1	$f12, THREAD_FPR12(\thread)
-	ldc1	$f14, THREAD_FPR14(\thread)
-	ldc1	$f16, THREAD_FPR16(\thread)
-	ldc1	$f18, THREAD_FPR18(\thread)
-	ldc1	$f20, THREAD_FPR20(\thread)
-	ldc1	$f22, THREAD_FPR22(\thread)
-	ldc1	$f24, THREAD_FPR24(\thread)
-	ldc1	$f26, THREAD_FPR26(\thread)
-	ldc1	$f28, THREAD_FPR28(\thread)
-	ldc1	$f30, THREAD_FPR30(\thread)
-	ctc1	\tmp, fcr31
-	.endm
-
-	.macro	fpu_restore_16odd thread
-	ldc1	$f1,  THREAD_FPR1(\thread)
-	ldc1	$f3,  THREAD_FPR3(\thread)
-	ldc1	$f5,  THREAD_FPR5(\thread)
-	ldc1	$f7,  THREAD_FPR7(\thread)
-	ldc1	$f9,  THREAD_FPR9(\thread)
-	ldc1	$f11, THREAD_FPR11(\thread)
-	ldc1	$f13, THREAD_FPR13(\thread)
-	ldc1	$f15, THREAD_FPR15(\thread)
-	ldc1	$f17, THREAD_FPR17(\thread)
-	ldc1	$f19, THREAD_FPR19(\thread)
-	ldc1	$f21, THREAD_FPR21(\thread)
-	ldc1	$f23, THREAD_FPR23(\thread)
-	ldc1	$f25, THREAD_FPR25(\thread)
-	ldc1	$f27, THREAD_FPR27(\thread)
-	ldc1	$f29, THREAD_FPR29(\thread)
-	ldc1	$f31, THREAD_FPR31(\thread)
-	.endm
-
-	.macro	fpu_restore_double thread status tmp
-	sll	\tmp, \status, 5
-	bgez	\tmp, 1f				# 16 register mode?
-
-	fpu_restore_16odd \thread
-1:	fpu_restore_16even \thread \tmp
-	.endm
-
 	.macro	cpu_save_nonscratch thread
 	LONG_S	s0, THREAD_REG16(\thread)
 	LONG_S	s1, THREAD_REG17(\thread)
diff --git a/arch/mips/include/asm/asmmacro.h b/arch/mips/include/asm/asmmacro.h
index 6c8342a..3220c93 100644
--- a/arch/mips/include/asm/asmmacro.h
+++ b/arch/mips/include/asm/asmmacro.h
@@ -62,6 +62,113 @@
 	.endm
 #endif /* CONFIG_MIPS_MT_SMTC */
 
+	.macro	fpu_save_16even thread tmp=t0
+	cfc1	\tmp, fcr31
+	sdc1	$f0,  THREAD_FPR0(\thread)
+	sdc1	$f2,  THREAD_FPR2(\thread)
+	sdc1	$f4,  THREAD_FPR4(\thread)
+	sdc1	$f6,  THREAD_FPR6(\thread)
+	sdc1	$f8,  THREAD_FPR8(\thread)
+	sdc1	$f10, THREAD_FPR10(\thread)
+	sdc1	$f12, THREAD_FPR12(\thread)
+	sdc1	$f14, THREAD_FPR14(\thread)
+	sdc1	$f16, THREAD_FPR16(\thread)
+	sdc1	$f18, THREAD_FPR18(\thread)
+	sdc1	$f20, THREAD_FPR20(\thread)
+	sdc1	$f22, THREAD_FPR22(\thread)
+	sdc1	$f24, THREAD_FPR24(\thread)
+	sdc1	$f26, THREAD_FPR26(\thread)
+	sdc1	$f28, THREAD_FPR28(\thread)
+	sdc1	$f30, THREAD_FPR30(\thread)
+	sw	\tmp, THREAD_FCR31(\thread)
+	.endm
+
+	.macro	fpu_save_16odd thread
+	.set	push
+	.set	mips64r2
+	sdc1	$f1,  THREAD_FPR1(\thread)
+	sdc1	$f3,  THREAD_FPR3(\thread)
+	sdc1	$f5,  THREAD_FPR5(\thread)
+	sdc1	$f7,  THREAD_FPR7(\thread)
+	sdc1	$f9,  THREAD_FPR9(\thread)
+	sdc1	$f11, THREAD_FPR11(\thread)
+	sdc1	$f13, THREAD_FPR13(\thread)
+	sdc1	$f15, THREAD_FPR15(\thread)
+	sdc1	$f17, THREAD_FPR17(\thread)
+	sdc1	$f19, THREAD_FPR19(\thread)
+	sdc1	$f21, THREAD_FPR21(\thread)
+	sdc1	$f23, THREAD_FPR23(\thread)
+	sdc1	$f25, THREAD_FPR25(\thread)
+	sdc1	$f27, THREAD_FPR27(\thread)
+	sdc1	$f29, THREAD_FPR29(\thread)
+	sdc1	$f31, THREAD_FPR31(\thread)
+	.set	pop
+	.endm
+
+	.macro	fpu_save_double thread status tmp
+#if defined(CONFIG_MIPS64) || defined(CONFIG_CPU_MIPS32_R2)
+	sll	\tmp, \status, 5
+	bgez	\tmp, 10f
+	fpu_save_16odd \thread
+10:
+#endif
+	fpu_save_16even \thread \tmp
+	.endm
+
+	.macro	fpu_restore_16even thread tmp=t0
+	lw	\tmp, THREAD_FCR31(\thread)
+	ldc1	$f0,  THREAD_FPR0(\thread)
+	ldc1	$f2,  THREAD_FPR2(\thread)
+	ldc1	$f4,  THREAD_FPR4(\thread)
+	ldc1	$f6,  THREAD_FPR6(\thread)
+	ldc1	$f8,  THREAD_FPR8(\thread)
+	ldc1	$f10, THREAD_FPR10(\thread)
+	ldc1	$f12, THREAD_FPR12(\thread)
+	ldc1	$f14, THREAD_FPR14(\thread)
+	ldc1	$f16, THREAD_FPR16(\thread)
+	ldc1	$f18, THREAD_FPR18(\thread)
+	ldc1	$f20, THREAD_FPR20(\thread)
+	ldc1	$f22, THREAD_FPR22(\thread)
+	ldc1	$f24, THREAD_FPR24(\thread)
+	ldc1	$f26, THREAD_FPR26(\thread)
+	ldc1	$f28, THREAD_FPR28(\thread)
+	ldc1	$f30, THREAD_FPR30(\thread)
+	ctc1	\tmp, fcr31
+	.endm
+
+	.macro	fpu_restore_16odd thread
+	.set	push
+	.set	mips64r2
+	ldc1	$f1,  THREAD_FPR1(\thread)
+	ldc1	$f3,  THREAD_FPR3(\thread)
+	ldc1	$f5,  THREAD_FPR5(\thread)
+	ldc1	$f7,  THREAD_FPR7(\thread)
+	ldc1	$f9,  THREAD_FPR9(\thread)
+	ldc1	$f11, THREAD_FPR11(\thread)
+	ldc1	$f13, THREAD_FPR13(\thread)
+	ldc1	$f15, THREAD_FPR15(\thread)
+	ldc1	$f17, THREAD_FPR17(\thread)
+	ldc1	$f19, THREAD_FPR19(\thread)
+	ldc1	$f21, THREAD_FPR21(\thread)
+	ldc1	$f23, THREAD_FPR23(\thread)
+	ldc1	$f25, THREAD_FPR25(\thread)
+	ldc1	$f27, THREAD_FPR27(\thread)
+	ldc1	$f29, THREAD_FPR29(\thread)
+	ldc1	$f31, THREAD_FPR31(\thread)
+	.set	pop
+	.endm
+
+	.macro	fpu_restore_double thread status tmp
+#if defined(CONFIG_MIPS64) || defined(CONFIG_CPU_MIPS32_R2)
+	sll	\tmp, \status, 5
+	bgez	\tmp, 10f				# 16 register mode?
+
+	fpu_restore_16odd \thread
+10:
+#endif
+	fpu_restore_16even \thread \tmp
+	.endm
+
 /*
  * Temporary until all gas have MT ASE support
  */
diff --git a/arch/mips/include/asm/bmips.h b/arch/mips/include/asm/bmips.h
index 27bd060..cbacceb 100644
--- a/arch/mips/include/asm/bmips.h
+++ b/arch/mips/include/asm/bmips.h
@@ -46,8 +46,35 @@
 
 #include <linux/cpumask.h>
 #include <asm/r4kcache.h>
+#include <asm/smp-ops.h>
 
-extern struct plat_smp_ops bmips_smp_ops;
+extern struct plat_smp_ops bmips43xx_smp_ops;
+extern struct plat_smp_ops bmips5000_smp_ops;
+
+static inline int register_bmips_smp_ops(void)
+{
+#if IS_ENABLED(CONFIG_CPU_BMIPS) && IS_ENABLED(CONFIG_SMP)
+	switch (current_cpu_type()) {
+	case CPU_BMIPS32:
+	case CPU_BMIPS3300:
+		return register_up_smp_ops();
+	case CPU_BMIPS4350:
+	case CPU_BMIPS4380:
+		register_smp_ops(&bmips43xx_smp_ops);
+		break;
+	case CPU_BMIPS5000:
+		register_smp_ops(&bmips5000_smp_ops);
+		break;
+	default:
+		return -ENODEV;
+	}
+
+	return 0;
+#else
+	return -ENODEV;
+#endif
+}
+
 extern char bmips_reset_nmi_vec;
 extern char bmips_reset_nmi_vec_end;
 extern char bmips_smp_movevec;
diff --git a/arch/mips/include/asm/cpu-features.h b/arch/mips/include/asm/cpu-features.h
index d445d06..6e70b03 100644
--- a/arch/mips/include/asm/cpu-features.h
+++ b/arch/mips/include/asm/cpu-features.h
@@ -20,6 +20,13 @@
 #ifndef cpu_has_tlb
 #define cpu_has_tlb		(cpu_data[0].options & MIPS_CPU_TLB)
 #endif
+#ifndef cpu_has_tlbinv
+#define cpu_has_tlbinv		(cpu_data[0].options & MIPS_CPU_TLBINV)
+#endif
+#ifndef cpu_has_segments
+#define cpu_has_segments	(cpu_data[0].options & MIPS_CPU_SEGMENTS)
+#endif
+
 
 /*
  * For the moment we don't consider R6000 and R8000 so we can assume that
diff --git a/arch/mips/include/asm/cpu-info.h b/arch/mips/include/asm/cpu-info.h
index 21c8e29..8f7adf0 100644
--- a/arch/mips/include/asm/cpu-info.h
+++ b/arch/mips/include/asm/cpu-info.h
@@ -52,6 +52,9 @@
 	unsigned int		cputype;
 	int			isa_level;
 	int			tlbsize;
+	int			tlbsizevtlb;
+	int			tlbsizeftlbsets;
+	int			tlbsizeftlbways;
 	struct cache_desc	icache; /* Primary I-cache */
 	struct cache_desc	dcache; /* Primary D or combined I/D cache */
 	struct cache_desc	scache; /* Secondary cache */
diff --git a/arch/mips/include/asm/cpu-type.h b/arch/mips/include/asm/cpu-type.h
index 4a402cc..02f591b 100644
--- a/arch/mips/include/asm/cpu-type.h
+++ b/arch/mips/include/asm/cpu-type.h
@@ -27,10 +27,7 @@
 #ifdef CONFIG_SYS_HAS_CPU_MIPS32_R1
 	case CPU_4KC:
 	case CPU_ALCHEMY:
-	case CPU_BMIPS3300:
-	case CPU_BMIPS4350:
 	case CPU_PR4450:
-	case CPU_BMIPS32:
 	case CPU_JZRISC:
 #endif
 
@@ -47,6 +44,8 @@
 	case CPU_74K:
 	case CPU_M14KC:
 	case CPU_M14KEC:
+	case CPU_INTERAPTIV:
+	case CPU_PROAPTIV:
 #endif
 
 #ifdef CONFIG_SYS_HAS_CPU_MIPS64_R1
@@ -163,6 +162,16 @@
 	case CPU_CAVIUM_OCTEON2:
 #endif
 
+#if defined(CONFIG_SYS_HAS_CPU_BMIPS32_3300) || \
+	defined (CONFIG_SYS_HAS_CPU_MIPS32_R1)
+	case CPU_BMIPS32:
+	case CPU_BMIPS3300:
+#endif
+
+#ifdef CONFIG_SYS_HAS_CPU_BMIPS4350
+	case CPU_BMIPS4350:
+#endif
+
 #ifdef CONFIG_SYS_HAS_CPU_BMIPS4380
 	case CPU_BMIPS4380:
 #endif
diff --git a/arch/mips/include/asm/cpu.h b/arch/mips/include/asm/cpu.h
index d2035e1..76411df 100644
--- a/arch/mips/include/asm/cpu.h
+++ b/arch/mips/include/asm/cpu.h
@@ -111,6 +111,10 @@
 #define PRID_IMP_1074K		0x9a00
 #define PRID_IMP_M14KC		0x9c00
 #define PRID_IMP_M14KEC		0x9e00
+#define PRID_IMP_INTERAPTIV_UP	0xa000
+#define PRID_IMP_INTERAPTIV_MP	0xa100
+#define PRID_IMP_PROAPTIV_UP	0xa200
+#define PRID_IMP_PROAPTIV_MP	0xa300
 
 /*
  * These are the PRID's for when 23:16 == PRID_COMP_SIBYTE
@@ -194,6 +198,7 @@
 #define PRID_IMP_NETLOGIC_XLP8XX	0x1000
 #define PRID_IMP_NETLOGIC_XLP3XX	0x1100
 #define PRID_IMP_NETLOGIC_XLP2XX	0x1200
+#define PRID_IMP_NETLOGIC_XLP9XX	0x1500
 
 /*
  * Particular Revision values for bits 7:0 of the PRId register.
@@ -249,6 +254,8 @@
 
 #define FPIR_IMP_NONE		0x0000
 
+#if !defined(__ASSEMBLY__)
+
 enum cpu_type_enum {
 	CPU_UNKNOWN,
 
@@ -289,7 +296,7 @@
 	CPU_4KC, CPU_4KEC, CPU_4KSC, CPU_24K, CPU_34K, CPU_1004K, CPU_74K,
 	CPU_ALCHEMY, CPU_PR4450, CPU_BMIPS32, CPU_BMIPS3300, CPU_BMIPS4350,
 	CPU_BMIPS4380, CPU_BMIPS5000, CPU_JZRISC, CPU_LOONGSON1, CPU_M14KC,
-	CPU_M14KEC,
+	CPU_M14KEC, CPU_INTERAPTIV, CPU_PROAPTIV,
 
 	/*
 	 * MIPS64 class processors
@@ -301,6 +308,7 @@
 	CPU_LAST
 };
 
+#endif /* !__ASSEMBLY */
 
 /*
  * ISA Level encodings
@@ -348,6 +356,8 @@
 #define MIPS_CPU_PCI		0x00400000 /* CPU has Perf Ctr Int indicator */
 #define MIPS_CPU_RIXI		0x00800000 /* CPU has TLB Read/eXec Inhibit */
 #define MIPS_CPU_MICROMIPS	0x01000000 /* CPU has microMIPS capability */
+#define MIPS_CPU_TLBINV		0x02000000 /* CPU supports TLBINV/F */
+#define MIPS_CPU_SEGMENTS	0x04000000 /* CPU supports Segmentation Control registers */
 
 /*
  * CPU ASE encodings
diff --git a/arch/mips/include/asm/dma-coherence.h b/arch/mips/include/asm/dma-coherence.h
index 242cbb3..bc5e85d 100644
--- a/arch/mips/include/asm/dma-coherence.h
+++ b/arch/mips/include/asm/dma-coherence.h
@@ -9,7 +9,16 @@
 #ifndef __ASM_DMA_COHERENCE_H
 #define __ASM_DMA_COHERENCE_H
 
+#ifdef CONFIG_DMA_MAYBE_COHERENT
 extern int coherentio;
 extern int hw_coherentio;
+#else
+#ifdef CONFIG_DMA_COHERENT
+#define coherentio	1
+#else
+#define coherentio	0
+#endif
+#define hw_coherentio	0
+#endif /* CONFIG_DMA_MAYBE_COHERENT */
 
 #endif
diff --git a/arch/mips/include/asm/elf.h b/arch/mips/include/asm/elf.h
index a66359e..d414405 100644
--- a/arch/mips/include/asm/elf.h
+++ b/arch/mips/include/asm/elf.h
@@ -36,6 +36,7 @@
 #define EF_MIPS_ABI2		0x00000020
 #define EF_MIPS_OPTIONS_FIRST	0x00000080
 #define EF_MIPS_32BITMODE	0x00000100
+#define EF_MIPS_FP64		0x00000200
 #define EF_MIPS_ABI		0x0000f000
 #define EF_MIPS_ARCH		0xf0000000
 
@@ -176,6 +177,18 @@
 #ifdef CONFIG_32BIT
 
 /*
+ * In order to be sure that we don't attempt to execute an O32 binary which
+ * requires 64 bit FP (FR=1) on a system which does not support it we refuse
+ * to execute any binary which has bits specified by the following macro set
+ * in its ELF header flags.
+ */
+#ifdef CONFIG_MIPS_O32_FP64_SUPPORT
+# define __MIPS_O32_FP64_MUST_BE_ZERO	0
+#else
+# define __MIPS_O32_FP64_MUST_BE_ZERO	EF_MIPS_FP64
+#endif
+
+/*
  * This is used to ensure we don't load something for the wrong architecture.
  */
 #define elf_check_arch(hdr)						\
@@ -192,6 +205,8 @@
 	if (((__h->e_flags & EF_MIPS_ABI) != 0) &&			\
 	    ((__h->e_flags & EF_MIPS_ABI) != EF_MIPS_ABI_O32))		\
 		__res = 0;						\
+	if (__h->e_flags & __MIPS_O32_FP64_MUST_BE_ZERO)		\
+		__res = 0;						\
 									\
 	__res;								\
 })
@@ -249,6 +264,11 @@
 
 #define SET_PERSONALITY(ex)						\
 do {									\
+	if ((ex).e_flags & EF_MIPS_FP64)				\
+		clear_thread_flag(TIF_32BIT_FPREGS);			\
+	else								\
+		set_thread_flag(TIF_32BIT_FPREGS);			\
+									\
 	if (personality(current->personality) != PER_LINUX)		\
 		set_personality(PER_LINUX);				\
 									\
@@ -271,14 +291,18 @@
 #endif
 
 #ifdef CONFIG_MIPS32_O32
-#define __SET_PERSONALITY32_O32()					\
+#define __SET_PERSONALITY32_O32(ex)					\
 	do {								\
 		set_thread_flag(TIF_32BIT_REGS);			\
 		set_thread_flag(TIF_32BIT_ADDR);			\
+									\
+		if (!((ex).e_flags & EF_MIPS_FP64))			\
+			set_thread_flag(TIF_32BIT_FPREGS);		\
+									\
 		current->thread.abi = &mips_abi_32;			\
 	} while (0)
 #else
-#define __SET_PERSONALITY32_O32()					\
+#define __SET_PERSONALITY32_O32(ex)					\
 	do { } while (0)
 #endif
 
@@ -289,7 +313,7 @@
 	     ((ex).e_flags & EF_MIPS_ABI) == 0)				\
 		__SET_PERSONALITY32_N32();				\
 	else								\
-		__SET_PERSONALITY32_O32();				\
+		__SET_PERSONALITY32_O32(ex);                            \
 } while (0)
 #else
 #define __SET_PERSONALITY32(ex) do { } while (0)
@@ -300,6 +324,7 @@
 	unsigned int p;							\
 									\
 	clear_thread_flag(TIF_32BIT_REGS);				\
+	clear_thread_flag(TIF_32BIT_FPREGS);				\
 	clear_thread_flag(TIF_32BIT_ADDR);				\
 									\
 	if ((ex).e_ident[EI_CLASS] == ELFCLASS32)			\
diff --git a/arch/mips/include/asm/fpu.h b/arch/mips/include/asm/fpu.h
index d088e5d..cfe092f 100644
--- a/arch/mips/include/asm/fpu.h
+++ b/arch/mips/include/asm/fpu.h
@@ -33,11 +33,48 @@
 extern void _save_fp(struct task_struct *);
 extern void _restore_fp(struct task_struct *);
 
-#define __enable_fpu()							\
-do {									\
-	set_c0_status(ST0_CU1);						\
-	enable_fpu_hazard();						\
-} while (0)
+/*
+ * This enum specifies a mode in which we want the FPU to operate, for cores
+ * which implement the Status.FR bit. Note that FPU_32BIT & FPU_64BIT
+ * purposefully have the values 0 & 1 respectively, so that an integer value
+ * of Status.FR can be trivially casted to the corresponding enum fpu_mode.
+ */
+enum fpu_mode {
+	FPU_32BIT = 0,		/* FR = 0 */
+	FPU_64BIT,		/* FR = 1 */
+	FPU_AS_IS,
+};
+
+static inline int __enable_fpu(enum fpu_mode mode)
+{
+	int fr;
+
+	switch (mode) {
+	case FPU_AS_IS:
+		/* just enable the FPU in its current mode */
+		set_c0_status(ST0_CU1);
+		enable_fpu_hazard();
+		return 0;
+
+	case FPU_64BIT:
+#if !(defined(CONFIG_CPU_MIPS32_R2) || defined(CONFIG_MIPS64))
+		/* we only have a 32-bit FPU */
+		return SIGFPE;
+#endif
+		/* fall through */
+	case FPU_32BIT:
+		/* set CU1 & change FR appropriately */
+		fr = (int)mode;
+		change_c0_status(ST0_CU1 | ST0_FR, ST0_CU1 | (fr ? ST0_FR : 0));
+		enable_fpu_hazard();
+
+		/* check FR has the desired value */
+		return (!!(read_c0_status() & ST0_FR) == !!fr) ? 0 : SIGFPE;
+
+	default:
+		BUG();
+	}
+}
 
 #define __disable_fpu()							\
 do {									\
@@ -45,19 +82,6 @@
 	disable_fpu_hazard();						\
 } while (0)
 
-#define enable_fpu()							\
-do {									\
-	if (cpu_has_fpu)						\
-		__enable_fpu();						\
-} while (0)
-
-#define disable_fpu()							\
-do {									\
-	if (cpu_has_fpu)						\
-		__disable_fpu();					\
-} while (0)
-
-
 #define clear_fpu_owner()	clear_thread_flag(TIF_USEDFPU)
 
 static inline int __is_fpu_owner(void)
@@ -70,27 +94,46 @@
 	return cpu_has_fpu && __is_fpu_owner();
 }
 
-static inline void __own_fpu(void)
+static inline int __own_fpu(void)
 {
-	__enable_fpu();
+	enum fpu_mode mode;
+	int ret;
+
+	mode = !test_thread_flag(TIF_32BIT_FPREGS);
+	ret = __enable_fpu(mode);
+	if (ret)
+		return ret;
+
 	KSTK_STATUS(current) |= ST0_CU1;
+	if (mode == FPU_64BIT)
+		KSTK_STATUS(current) |= ST0_FR;
+	else /* mode == FPU_32BIT */
+		KSTK_STATUS(current) &= ~ST0_FR;
+
 	set_thread_flag(TIF_USEDFPU);
+	return 0;
 }
 
-static inline void own_fpu_inatomic(int restore)
+static inline int own_fpu_inatomic(int restore)
 {
+	int ret = 0;
+
 	if (cpu_has_fpu && !__is_fpu_owner()) {
-		__own_fpu();
-		if (restore)
+		ret = __own_fpu();
+		if (restore && !ret)
 			_restore_fp(current);
 	}
+	return ret;
 }
 
-static inline void own_fpu(int restore)
+static inline int own_fpu(int restore)
 {
+	int ret;
+
 	preempt_disable();
-	own_fpu_inatomic(restore);
+	ret = own_fpu_inatomic(restore);
 	preempt_enable();
+	return ret;
 }
 
 static inline void lose_fpu(int save)
@@ -106,16 +149,21 @@
 	preempt_enable();
 }
 
-static inline void init_fpu(void)
+static inline int init_fpu(void)
 {
+	int ret = 0;
+
 	preempt_disable();
 	if (cpu_has_fpu) {
-		__own_fpu();
-		_init_fpu();
+		ret = __own_fpu();
+		if (!ret)
+			_init_fpu();
 	} else {
 		fpu_emulator_init_fpu();
 	}
+
 	preempt_enable();
+	return ret;
 }
 
 static inline void save_fp(struct task_struct *tsk)
diff --git a/arch/mips/include/asm/highmem.h b/arch/mips/include/asm/highmem.h
index b0dd0c8..572e63e 100644
--- a/arch/mips/include/asm/highmem.h
+++ b/arch/mips/include/asm/highmem.h
@@ -19,7 +19,6 @@
 
 #ifdef __KERNEL__
 
-#include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/uaccess.h>
 #include <asm/kmap_types.h>
diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h
index 3296696..a995fce 100644
--- a/arch/mips/include/asm/kvm_host.h
+++ b/arch/mips/include/asm/kvm_host.h
@@ -391,9 +391,6 @@
 	uint32_t guest_kernel_asid[NR_CPUS];
 	struct mm_struct guest_kernel_mm, guest_user_mm;
 
-	struct kvm_mips_tlb shadow_tlb[NR_CPUS][KVM_MIPS_GUEST_TLB_SIZE];
-
-
 	struct hrtimer comparecount_timer;
 
 	int last_sched_cpu;
@@ -529,7 +526,6 @@
 
 extern void kvm_mips_dump_host_tlbs(void);
 extern void kvm_mips_dump_guest_tlbs(struct kvm_vcpu *vcpu);
-extern void kvm_mips_dump_shadow_tlbs(struct kvm_vcpu *vcpu);
 extern void kvm_mips_flush_host_tlb(int skip_kseg0);
 extern int kvm_mips_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long entryhi);
 extern int kvm_mips_host_tlb_inv_index(struct kvm_vcpu *vcpu, int index);
@@ -541,10 +537,7 @@
 						   unsigned long gva);
 extern void kvm_get_new_mmu_context(struct mm_struct *mm, unsigned long cpu,
 				    struct kvm_vcpu *vcpu);
-extern void kvm_shadow_tlb_put(struct kvm_vcpu *vcpu);
-extern void kvm_shadow_tlb_load(struct kvm_vcpu *vcpu);
 extern void kvm_local_flush_tlb_all(void);
-extern void kvm_mips_init_shadow_tlb(struct kvm_vcpu *vcpu);
 extern void kvm_mips_alloc_new_mmu_context(struct kvm_vcpu *vcpu);
 extern void kvm_mips_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
 extern void kvm_mips_vcpu_put(struct kvm_vcpu *vcpu);
diff --git a/arch/mips/include/asm/mach-ath79/ar71xx_regs.h b/arch/mips/include/asm/mach-ath79/ar71xx_regs.h
index b86a125..cd41e93 100644
--- a/arch/mips/include/asm/mach-ath79/ar71xx_regs.h
+++ b/arch/mips/include/asm/mach-ath79/ar71xx_regs.h
@@ -16,7 +16,6 @@
 #define __ASM_MACH_AR71XX_REGS_H
 
 #include <linux/types.h>
-#include <linux/init.h>
 #include <linux/io.h>
 #include <linux/bitops.h>
 
diff --git a/arch/mips/include/asm/mach-bcm47xx/bcm47xx.h b/arch/mips/include/asm/mach-bcm47xx/bcm47xx.h
index cc7563b..7527c1d 100644
--- a/arch/mips/include/asm/mach-bcm47xx/bcm47xx.h
+++ b/arch/mips/include/asm/mach-bcm47xx/bcm47xx.h
@@ -56,4 +56,6 @@
 				 const char *prefix);
 #endif
 
+void bcm47xx_set_system_type(u16 chip_id);
+
 #endif /* __ASM_BCM47XX_H */
diff --git a/arch/mips/include/asm/mach-bcm47xx/bcm47xx_board.h b/arch/mips/include/asm/mach-bcm47xx/bcm47xx_board.h
index 00867dd..40005fb 100644
--- a/arch/mips/include/asm/mach-bcm47xx/bcm47xx_board.h
+++ b/arch/mips/include/asm/mach-bcm47xx/bcm47xx_board.h
@@ -66,6 +66,7 @@
 	BCM47XX_BOARD_LINKSYS_WRT310NV1,
 	BCM47XX_BOARD_LINKSYS_WRT310NV2,
 	BCM47XX_BOARD_LINKSYS_WRT54G3GV2,
+	BCM47XX_BOARD_LINKSYS_WRT54GSV1,
 	BCM47XX_BOARD_LINKSYS_WRT610NV1,
 	BCM47XX_BOARD_LINKSYS_WRT610NV2,
 	BCM47XX_BOARD_LINKSYS_WRTSL54GS,
diff --git a/arch/mips/include/asm/mach-bcm47xx/cpu-feature-overrides.h b/arch/mips/include/asm/mach-bcm47xx/cpu-feature-overrides.h
new file mode 100644
index 0000000..b7992cd
--- /dev/null
+++ b/arch/mips/include/asm/mach-bcm47xx/cpu-feature-overrides.h
@@ -0,0 +1,82 @@
+#ifndef __ASM_MACH_BCM47XX_CPU_FEATURE_OVERRIDES_H
+#define __ASM_MACH_BCM47XX_CPU_FEATURE_OVERRIDES_H
+
+#define cpu_has_tlb			1
+#define cpu_has_4kex			1
+#define cpu_has_3k_cache		0
+#define cpu_has_4k_cache		1
+#define cpu_has_tx39_cache		0
+#define cpu_has_fpu			0
+#define cpu_has_32fpr			0
+#define cpu_has_counter			1
+#if defined(CONFIG_BCM47XX_BCMA) && !defined(CONFIG_BCM47XX_SSB)
+#define cpu_has_watch			1
+#elif defined(CONFIG_BCM47XX_SSB) && !defined(CONFIG_BCM47XX_BCMA)
+#define cpu_has_watch			0
+#endif
+#define cpu_has_divec			1
+#define cpu_has_vce			0
+#define cpu_has_cache_cdex_p		0
+#define cpu_has_cache_cdex_s		0
+#define cpu_has_prefetch		1
+#define cpu_has_mcheck			1
+#define cpu_has_ejtag			1
+#define cpu_has_llsc			1
+
+/* cpu_has_mips16 */
+#define cpu_has_mdmx			0
+#define cpu_has_mips3d			0
+#define cpu_has_rixi			0
+#define cpu_has_mmips			0
+#define cpu_has_smartmips		0
+#define cpu_has_vtag_icache		0
+/* cpu_has_dc_aliases */
+#define cpu_has_ic_fills_f_dc		0
+#define cpu_has_pindexed_dcache		0
+#define cpu_icache_snoops_remote_store	0
+
+#define cpu_has_mips_2			1
+#define cpu_has_mips_3			0
+#define cpu_has_mips32r1		1
+#if defined(CONFIG_BCM47XX_BCMA) && !defined(CONFIG_BCM47XX_SSB)
+#define cpu_has_mips32r2		1
+#elif defined(CONFIG_BCM47XX_SSB) && !defined(CONFIG_BCM47XX_BCMA)
+#define cpu_has_mips32r2		0
+#endif
+#define cpu_has_mips64r1		0
+#define cpu_has_mips64r2		0
+
+#if defined(CONFIG_BCM47XX_BCMA) && !defined(CONFIG_BCM47XX_SSB)
+#define cpu_has_dsp			1
+#define cpu_has_dsp2			1
+#elif defined(CONFIG_BCM47XX_SSB) && !defined(CONFIG_BCM47XX_BCMA)
+#define cpu_has_dsp			0
+#define cpu_has_dsp2			0
+#endif
+#define cpu_has_mipsmt			0
+/* cpu_has_userlocal */
+
+#define cpu_has_nofpuex			0
+#define cpu_has_64bits			0
+#define cpu_has_64bit_zero_reg		0
+#if defined(CONFIG_BCM47XX_BCMA) && !defined(CONFIG_BCM47XX_SSB)
+#define cpu_has_vint			1
+#elif defined(CONFIG_BCM47XX_SSB) && !defined(CONFIG_BCM47XX_BCMA)
+#define cpu_has_vint			0
+#endif
+#define cpu_has_veic			0
+#define cpu_has_inclusive_pcaches	0
+
+#if defined(CONFIG_BCM47XX_BCMA) && !defined(CONFIG_BCM47XX_SSB)
+#define cpu_dcache_line_size()		32
+#define cpu_icache_line_size()		32
+#define cpu_has_perf_cntr_intr_bit	1
+#elif defined(CONFIG_BCM47XX_SSB) && !defined(CONFIG_BCM47XX_BCMA)
+#define cpu_dcache_line_size()		16
+#define cpu_icache_line_size()		16
+#define cpu_has_perf_cntr_intr_bit	0
+#endif
+#define cpu_scache_line_size()		0
+#define cpu_has_vz			0
+
+#endif /* __ASM_MACH_BCM47XX_CPU_FEATURE_OVERRIDES_H */
diff --git a/arch/mips/include/asm/mach-bcm63xx/bcm63xx_cpu.h b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_cpu.h
index 19f9134..3112f08 100644
--- a/arch/mips/include/asm/mach-bcm63xx/bcm63xx_cpu.h
+++ b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_cpu.h
@@ -145,6 +145,7 @@
 	RSET_UART1,
 	RSET_GPIO,
 	RSET_SPI,
+	RSET_HSSPI,
 	RSET_UDC0,
 	RSET_OHCI0,
 	RSET_OHCI_PRIV,
@@ -193,6 +194,7 @@
 #define RSET_ENETDMAS_SIZE(chans)	(16 * (chans))
 #define RSET_ENETSW_SIZE		65536
 #define RSET_UART_SIZE			24
+#define RSET_HSSPI_SIZE			1536
 #define RSET_UDC_SIZE			256
 #define RSET_OHCI_SIZE			256
 #define RSET_EHCI_SIZE			256
@@ -265,6 +267,7 @@
 #define BCM_6328_UART1_BASE		(0xb0000120)
 #define BCM_6328_GPIO_BASE		(0xb0000080)
 #define BCM_6328_SPI_BASE		(0xdeadbeef)
+#define BCM_6328_HSSPI_BASE		(0xb0001000)
 #define BCM_6328_UDC0_BASE		(0xdeadbeef)
 #define BCM_6328_USBDMA_BASE		(0xb000c000)
 #define BCM_6328_OHCI0_BASE		(0xb0002600)
@@ -313,6 +316,7 @@
 #define BCM_6338_UART1_BASE		(0xdeadbeef)
 #define BCM_6338_GPIO_BASE		(0xfffe0400)
 #define BCM_6338_SPI_BASE		(0xfffe0c00)
+#define BCM_6338_HSSPI_BASE		(0xdeadbeef)
 #define BCM_6338_UDC0_BASE		(0xdeadbeef)
 #define BCM_6338_USBDMA_BASE		(0xfffe2400)
 #define BCM_6338_OHCI0_BASE		(0xdeadbeef)
@@ -360,6 +364,7 @@
 #define BCM_6345_UART1_BASE		(0xdeadbeef)
 #define BCM_6345_GPIO_BASE		(0xfffe0400)
 #define BCM_6345_SPI_BASE		(0xdeadbeef)
+#define BCM_6345_HSSPI_BASE		(0xdeadbeef)
 #define BCM_6345_UDC0_BASE		(0xdeadbeef)
 #define BCM_6345_USBDMA_BASE		(0xfffe2800)
 #define BCM_6345_ENET0_BASE		(0xfffe1800)
@@ -406,6 +411,7 @@
 #define BCM_6348_UART1_BASE		(0xdeadbeef)
 #define BCM_6348_GPIO_BASE		(0xfffe0400)
 #define BCM_6348_SPI_BASE		(0xfffe0c00)
+#define BCM_6348_HSSPI_BASE		(0xdeadbeef)
 #define BCM_6348_UDC0_BASE		(0xfffe1000)
 #define BCM_6348_USBDMA_BASE		(0xdeadbeef)
 #define BCM_6348_OHCI0_BASE		(0xfffe1b00)
@@ -451,6 +457,7 @@
 #define BCM_6358_UART1_BASE		(0xfffe0120)
 #define BCM_6358_GPIO_BASE		(0xfffe0080)
 #define BCM_6358_SPI_BASE		(0xfffe0800)
+#define BCM_6358_HSSPI_BASE		(0xdeadbeef)
 #define BCM_6358_UDC0_BASE		(0xfffe0800)
 #define BCM_6358_USBDMA_BASE		(0xdeadbeef)
 #define BCM_6358_OHCI0_BASE		(0xfffe1400)
@@ -553,6 +560,7 @@
 #define BCM_6368_UART1_BASE		(0xb0000120)
 #define BCM_6368_GPIO_BASE		(0xb0000080)
 #define BCM_6368_SPI_BASE		(0xb0000800)
+#define BCM_6368_HSSPI_BASE		(0xdeadbeef)
 #define BCM_6368_UDC0_BASE		(0xdeadbeef)
 #define BCM_6368_USBDMA_BASE		(0xb0004800)
 #define BCM_6368_OHCI0_BASE		(0xb0001600)
@@ -604,6 +612,7 @@
 	__GEN_RSET_BASE(__cpu, UART1)					\
 	__GEN_RSET_BASE(__cpu, GPIO)					\
 	__GEN_RSET_BASE(__cpu, SPI)					\
+	__GEN_RSET_BASE(__cpu, HSSPI)					\
 	__GEN_RSET_BASE(__cpu, UDC0)					\
 	__GEN_RSET_BASE(__cpu, OHCI0)					\
 	__GEN_RSET_BASE(__cpu, OHCI_PRIV)				\
@@ -647,6 +656,7 @@
 	[RSET_UART1]		= BCM_## __cpu ##_UART1_BASE,		\
 	[RSET_GPIO]		= BCM_## __cpu ##_GPIO_BASE,		\
 	[RSET_SPI]		= BCM_## __cpu ##_SPI_BASE,		\
+	[RSET_HSSPI]		= BCM_## __cpu ##_HSSPI_BASE,		\
 	[RSET_UDC0]		= BCM_## __cpu ##_UDC0_BASE,		\
 	[RSET_OHCI0]		= BCM_## __cpu ##_OHCI0_BASE,		\
 	[RSET_OHCI_PRIV]	= BCM_## __cpu ##_OHCI_PRIV_BASE,	\
@@ -727,6 +737,7 @@
 	IRQ_ENET0,
 	IRQ_ENET1,
 	IRQ_ENET_PHY,
+	IRQ_HSSPI,
 	IRQ_OHCI0,
 	IRQ_EHCI0,
 	IRQ_USBD,
@@ -815,6 +826,7 @@
 #define BCM_6328_ENET0_IRQ		0
 #define BCM_6328_ENET1_IRQ		0
 #define BCM_6328_ENET_PHY_IRQ		(IRQ_INTERNAL_BASE + 12)
+#define BCM_6328_HSSPI_IRQ		(IRQ_INTERNAL_BASE + 29)
 #define BCM_6328_OHCI0_IRQ		(BCM_6328_HIGH_IRQ_BASE + 9)
 #define BCM_6328_EHCI0_IRQ		(BCM_6328_HIGH_IRQ_BASE + 10)
 #define BCM_6328_USBD_IRQ		(IRQ_INTERNAL_BASE + 4)
@@ -860,6 +872,7 @@
 #define BCM_6338_ENET0_IRQ		(IRQ_INTERNAL_BASE + 8)
 #define BCM_6338_ENET1_IRQ		0
 #define BCM_6338_ENET_PHY_IRQ		(IRQ_INTERNAL_BASE + 9)
+#define BCM_6338_HSSPI_IRQ		0
 #define BCM_6338_OHCI0_IRQ		0
 #define BCM_6338_EHCI0_IRQ		0
 #define BCM_6338_USBD_IRQ		0
@@ -898,6 +911,7 @@
 #define BCM_6345_ENET0_IRQ		(IRQ_INTERNAL_BASE + 8)
 #define BCM_6345_ENET1_IRQ		0
 #define BCM_6345_ENET_PHY_IRQ		(IRQ_INTERNAL_BASE + 12)
+#define BCM_6345_HSSPI_IRQ		0
 #define BCM_6345_OHCI0_IRQ		0
 #define BCM_6345_EHCI0_IRQ		0
 #define BCM_6345_USBD_IRQ		0
@@ -936,6 +950,7 @@
 #define BCM_6348_ENET0_IRQ		(IRQ_INTERNAL_BASE + 8)
 #define BCM_6348_ENET1_IRQ		(IRQ_INTERNAL_BASE + 7)
 #define BCM_6348_ENET_PHY_IRQ		(IRQ_INTERNAL_BASE + 9)
+#define BCM_6348_HSSPI_IRQ		0
 #define BCM_6348_OHCI0_IRQ		(IRQ_INTERNAL_BASE + 12)
 #define BCM_6348_EHCI0_IRQ		0
 #define BCM_6348_USBD_IRQ		0
@@ -974,6 +989,7 @@
 #define BCM_6358_ENET0_IRQ		(IRQ_INTERNAL_BASE + 8)
 #define BCM_6358_ENET1_IRQ		(IRQ_INTERNAL_BASE + 6)
 #define BCM_6358_ENET_PHY_IRQ		(IRQ_INTERNAL_BASE + 9)
+#define BCM_6358_HSSPI_IRQ		0
 #define BCM_6358_OHCI0_IRQ		(IRQ_INTERNAL_BASE + 5)
 #define BCM_6358_EHCI0_IRQ		(IRQ_INTERNAL_BASE + 10)
 #define BCM_6358_USBD_IRQ		0
@@ -1086,6 +1102,7 @@
 #define BCM_6368_ENET0_IRQ		0
 #define BCM_6368_ENET1_IRQ		0
 #define BCM_6368_ENET_PHY_IRQ		(IRQ_INTERNAL_BASE + 15)
+#define BCM_6368_HSSPI_IRQ		0
 #define BCM_6368_OHCI0_IRQ		(IRQ_INTERNAL_BASE + 5)
 #define BCM_6368_EHCI0_IRQ		(IRQ_INTERNAL_BASE + 7)
 #define BCM_6368_USBD_IRQ		(IRQ_INTERNAL_BASE + 8)
@@ -1133,6 +1150,7 @@
 	[IRQ_ENET0]		= BCM_## __cpu ##_ENET0_IRQ,		\
 	[IRQ_ENET1]		= BCM_## __cpu ##_ENET1_IRQ,		\
 	[IRQ_ENET_PHY]		= BCM_## __cpu ##_ENET_PHY_IRQ,		\
+	[IRQ_HSSPI]		= BCM_## __cpu ##_HSSPI_IRQ,		\
 	[IRQ_OHCI0]		= BCM_## __cpu ##_OHCI0_IRQ,		\
 	[IRQ_EHCI0]		= BCM_## __cpu ##_EHCI0_IRQ,		\
 	[IRQ_USBD]		= BCM_## __cpu ##_USBD_IRQ,		\
diff --git a/arch/mips/include/asm/mach-bcm63xx/bcm63xx_dev_hsspi.h b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_dev_hsspi.h
new file mode 100644
index 0000000..1b1acaf
--- /dev/null
+++ b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_dev_hsspi.h
@@ -0,0 +1,8 @@
+#ifndef BCM63XX_DEV_HSSPI_H
+#define BCM63XX_DEV_HSSPI_H
+
+#include <linux/types.h>
+
+int bcm63xx_hsspi_register(void);
+
+#endif /* BCM63XX_DEV_HSSPI_H */
diff --git a/arch/mips/include/asm/mach-bcm63xx/bcm63xx_regs.h b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_regs.h
index 9875db3..ab427f8 100644
--- a/arch/mips/include/asm/mach-bcm63xx/bcm63xx_regs.h
+++ b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_regs.h
@@ -463,126 +463,6 @@
 #define WDT_SOFTRESET_REG		0xc
 
 /*************************************************************************
- * _REG relative to RSET_UARTx
- *************************************************************************/
-
-/* UART Control Register */
-#define UART_CTL_REG			0x0
-#define UART_CTL_RXTMOUTCNT_SHIFT	0
-#define UART_CTL_RXTMOUTCNT_MASK	(0x1f << UART_CTL_RXTMOUTCNT_SHIFT)
-#define UART_CTL_RSTTXDN_SHIFT		5
-#define UART_CTL_RSTTXDN_MASK		(1 << UART_CTL_RSTTXDN_SHIFT)
-#define UART_CTL_RSTRXFIFO_SHIFT		6
-#define UART_CTL_RSTRXFIFO_MASK		(1 << UART_CTL_RSTRXFIFO_SHIFT)
-#define UART_CTL_RSTTXFIFO_SHIFT		7
-#define UART_CTL_RSTTXFIFO_MASK		(1 << UART_CTL_RSTTXFIFO_SHIFT)
-#define UART_CTL_STOPBITS_SHIFT		8
-#define UART_CTL_STOPBITS_MASK		(0xf << UART_CTL_STOPBITS_SHIFT)
-#define UART_CTL_STOPBITS_1		(0x7 << UART_CTL_STOPBITS_SHIFT)
-#define UART_CTL_STOPBITS_2		(0xf << UART_CTL_STOPBITS_SHIFT)
-#define UART_CTL_BITSPERSYM_SHIFT	12
-#define UART_CTL_BITSPERSYM_MASK	(0x3 << UART_CTL_BITSPERSYM_SHIFT)
-#define UART_CTL_XMITBRK_SHIFT		14
-#define UART_CTL_XMITBRK_MASK		(1 << UART_CTL_XMITBRK_SHIFT)
-#define UART_CTL_RSVD_SHIFT		15
-#define UART_CTL_RSVD_MASK		(1 << UART_CTL_RSVD_SHIFT)
-#define UART_CTL_RXPAREVEN_SHIFT		16
-#define UART_CTL_RXPAREVEN_MASK		(1 << UART_CTL_RXPAREVEN_SHIFT)
-#define UART_CTL_RXPAREN_SHIFT		17
-#define UART_CTL_RXPAREN_MASK		(1 << UART_CTL_RXPAREN_SHIFT)
-#define UART_CTL_TXPAREVEN_SHIFT		18
-#define UART_CTL_TXPAREVEN_MASK		(1 << UART_CTL_TXPAREVEN_SHIFT)
-#define UART_CTL_TXPAREN_SHIFT		18
-#define UART_CTL_TXPAREN_MASK		(1 << UART_CTL_TXPAREN_SHIFT)
-#define UART_CTL_LOOPBACK_SHIFT		20
-#define UART_CTL_LOOPBACK_MASK		(1 << UART_CTL_LOOPBACK_SHIFT)
-#define UART_CTL_RXEN_SHIFT		21
-#define UART_CTL_RXEN_MASK		(1 << UART_CTL_RXEN_SHIFT)
-#define UART_CTL_TXEN_SHIFT		22
-#define UART_CTL_TXEN_MASK		(1 << UART_CTL_TXEN_SHIFT)
-#define UART_CTL_BRGEN_SHIFT		23
-#define UART_CTL_BRGEN_MASK		(1 << UART_CTL_BRGEN_SHIFT)
-
-/* UART Baudword register */
-#define UART_BAUD_REG			0x4
-
-/* UART Misc Control register */
-#define UART_MCTL_REG			0x8
-#define UART_MCTL_DTR_SHIFT		0
-#define UART_MCTL_DTR_MASK		(1 << UART_MCTL_DTR_SHIFT)
-#define UART_MCTL_RTS_SHIFT		1
-#define UART_MCTL_RTS_MASK		(1 << UART_MCTL_RTS_SHIFT)
-#define UART_MCTL_RXFIFOTHRESH_SHIFT	8
-#define UART_MCTL_RXFIFOTHRESH_MASK	(0xf << UART_MCTL_RXFIFOTHRESH_SHIFT)
-#define UART_MCTL_TXFIFOTHRESH_SHIFT	12
-#define UART_MCTL_TXFIFOTHRESH_MASK	(0xf << UART_MCTL_TXFIFOTHRESH_SHIFT)
-#define UART_MCTL_RXFIFOFILL_SHIFT	16
-#define UART_MCTL_RXFIFOFILL_MASK	(0x1f << UART_MCTL_RXFIFOFILL_SHIFT)
-#define UART_MCTL_TXFIFOFILL_SHIFT	24
-#define UART_MCTL_TXFIFOFILL_MASK	(0x1f << UART_MCTL_TXFIFOFILL_SHIFT)
-
-/* UART External Input Configuration register */
-#define UART_EXTINP_REG			0xc
-#define UART_EXTINP_RI_SHIFT		0
-#define UART_EXTINP_RI_MASK		(1 << UART_EXTINP_RI_SHIFT)
-#define UART_EXTINP_CTS_SHIFT		1
-#define UART_EXTINP_CTS_MASK		(1 << UART_EXTINP_CTS_SHIFT)
-#define UART_EXTINP_DCD_SHIFT		2
-#define UART_EXTINP_DCD_MASK		(1 << UART_EXTINP_DCD_SHIFT)
-#define UART_EXTINP_DSR_SHIFT		3
-#define UART_EXTINP_DSR_MASK		(1 << UART_EXTINP_DSR_SHIFT)
-#define UART_EXTINP_IRSTAT(x)		(1 << (x + 4))
-#define UART_EXTINP_IRMASK(x)		(1 << (x + 8))
-#define UART_EXTINP_IR_RI		0
-#define UART_EXTINP_IR_CTS		1
-#define UART_EXTINP_IR_DCD		2
-#define UART_EXTINP_IR_DSR		3
-#define UART_EXTINP_RI_NOSENSE_SHIFT	16
-#define UART_EXTINP_RI_NOSENSE_MASK	(1 << UART_EXTINP_RI_NOSENSE_SHIFT)
-#define UART_EXTINP_CTS_NOSENSE_SHIFT	17
-#define UART_EXTINP_CTS_NOSENSE_MASK	(1 << UART_EXTINP_CTS_NOSENSE_SHIFT)
-#define UART_EXTINP_DCD_NOSENSE_SHIFT	18
-#define UART_EXTINP_DCD_NOSENSE_MASK	(1 << UART_EXTINP_DCD_NOSENSE_SHIFT)
-#define UART_EXTINP_DSR_NOSENSE_SHIFT	19
-#define UART_EXTINP_DSR_NOSENSE_MASK	(1 << UART_EXTINP_DSR_NOSENSE_SHIFT)
-
-/* UART Interrupt register */
-#define UART_IR_REG			0x10
-#define UART_IR_MASK(x)			(1 << (x + 16))
-#define UART_IR_STAT(x)			(1 << (x))
-#define UART_IR_EXTIP			0
-#define UART_IR_TXUNDER			1
-#define UART_IR_TXOVER			2
-#define UART_IR_TXTRESH			3
-#define UART_IR_TXRDLATCH		4
-#define UART_IR_TXEMPTY			5
-#define UART_IR_RXUNDER			6
-#define UART_IR_RXOVER			7
-#define UART_IR_RXTIMEOUT		8
-#define UART_IR_RXFULL			9
-#define UART_IR_RXTHRESH		10
-#define UART_IR_RXNOTEMPTY		11
-#define UART_IR_RXFRAMEERR		12
-#define UART_IR_RXPARERR		13
-#define UART_IR_RXBRK			14
-#define UART_IR_TXDONE			15
-
-/* UART Fifo register */
-#define UART_FIFO_REG			0x14
-#define UART_FIFO_VALID_SHIFT		0
-#define UART_FIFO_VALID_MASK		0xff
-#define UART_FIFO_FRAMEERR_SHIFT	8
-#define UART_FIFO_FRAMEERR_MASK		(1 << UART_FIFO_FRAMEERR_SHIFT)
-#define UART_FIFO_PARERR_SHIFT		9
-#define UART_FIFO_PARERR_MASK		(1 << UART_FIFO_PARERR_SHIFT)
-#define UART_FIFO_BRKDET_SHIFT		10
-#define UART_FIFO_BRKDET_MASK		(1 << UART_FIFO_BRKDET_SHIFT)
-#define UART_FIFO_ANYERR_MASK		(UART_FIFO_FRAMEERR_MASK |	\
-					UART_FIFO_PARERR_MASK |		\
-					UART_FIFO_BRKDET_MASK)
-
-
-/*************************************************************************
  * _REG relative to RSET_GPIO
  *************************************************************************/
 
diff --git a/arch/mips/include/asm/mach-generic/dma-coherence.h b/arch/mips/include/asm/mach-generic/dma-coherence.h
index a9e8f6b..7629c35 100644
--- a/arch/mips/include/asm/mach-generic/dma-coherence.h
+++ b/arch/mips/include/asm/mach-generic/dma-coherence.h
@@ -49,11 +49,7 @@
 
 static inline int plat_device_is_coherent(struct device *dev)
 {
-#ifdef CONFIG_DMA_COHERENT
-	return 1;
-#else
 	return coherentio;
-#endif
 }
 
 #ifdef CONFIG_SWIOTLB
diff --git a/arch/mips/include/asm/mach-generic/floppy.h b/arch/mips/include/asm/mach-generic/floppy.h
index 5b5cd68..e2561d9 100644
--- a/arch/mips/include/asm/mach-generic/floppy.h
+++ b/arch/mips/include/asm/mach-generic/floppy.h
@@ -9,7 +9,6 @@
 #define __ASM_MACH_GENERIC_FLOPPY_H
 
 #include <linux/delay.h>
-#include <linux/init.h>
 #include <linux/ioport.h>
 #include <linux/sched.h>
 #include <linux/linkage.h>
diff --git a/arch/mips/include/asm/mach-generic/ide.h b/arch/mips/include/asm/mach-generic/ide.h
index affa66f5..4ae5fbc 100644
--- a/arch/mips/include/asm/mach-generic/ide.h
+++ b/arch/mips/include/asm/mach-generic/ide.h
@@ -23,7 +23,7 @@
 static inline void __ide_flush_prologue(void)
 {
 #ifdef CONFIG_SMP
-	if (cpu_has_dc_aliases)
+	if (cpu_has_dc_aliases || !cpu_has_ic_fills_f_dc)
 		preempt_disable();
 #endif
 }
@@ -31,14 +31,14 @@
 static inline void __ide_flush_epilogue(void)
 {
 #ifdef CONFIG_SMP
-	if (cpu_has_dc_aliases)
+	if (cpu_has_dc_aliases || !cpu_has_ic_fills_f_dc)
 		preempt_enable();
 #endif
 }
 
 static inline void __ide_flush_dcache_range(unsigned long addr, unsigned long size)
 {
-	if (cpu_has_dc_aliases) {
+	if (cpu_has_dc_aliases || !cpu_has_ic_fills_f_dc) {
 		unsigned long end = addr + size;
 
 		while (addr < end) {
diff --git a/arch/mips/include/asm/mach-jazz/floppy.h b/arch/mips/include/asm/mach-jazz/floppy.h
index 62aa1e2..4b86c88a 100644
--- a/arch/mips/include/asm/mach-jazz/floppy.h
+++ b/arch/mips/include/asm/mach-jazz/floppy.h
@@ -9,7 +9,6 @@
 #define __ASM_MACH_JAZZ_FLOPPY_H
 
 #include <linux/delay.h>
-#include <linux/init.h>
 #include <linux/linkage.h>
 #include <linux/types.h>
 #include <linux/mm.h>
diff --git a/arch/mips/include/asm/mach-jz4740/platform.h b/arch/mips/include/asm/mach-jz4740/platform.h
index 05988c2..069b43a 100644
--- a/arch/mips/include/asm/mach-jz4740/platform.h
+++ b/arch/mips/include/asm/mach-jz4740/platform.h
@@ -21,6 +21,7 @@
 
 extern struct platform_device jz4740_usb_ohci_device;
 extern struct platform_device jz4740_udc_device;
+extern struct platform_device jz4740_udc_xceiv_device;
 extern struct platform_device jz4740_mmc_device;
 extern struct platform_device jz4740_rtc_device;
 extern struct platform_device jz4740_i2c_device;
diff --git a/arch/mips/include/asm/mach-netlogic/irq.h b/arch/mips/include/asm/mach-netlogic/irq.h
index 868ed8a..c0dbd53 100644
--- a/arch/mips/include/asm/mach-netlogic/irq.h
+++ b/arch/mips/include/asm/mach-netlogic/irq.h
@@ -9,7 +9,8 @@
 #define __ASM_NETLOGIC_IRQ_H
 
 #include <asm/mach-netlogic/multi-node.h>
-#define NR_IRQS			(64 * NLM_NR_NODES)
+#define NLM_IRQS_PER_NODE	1024
+#define NR_IRQS			(NLM_IRQS_PER_NODE * NLM_NR_NODES)
 
 #define MIPS_CPU_IRQ_BASE	0
 
diff --git a/arch/mips/include/asm/mach-netlogic/multi-node.h b/arch/mips/include/asm/mach-netlogic/multi-node.h
index d62fc77..9ed8dac 100644
--- a/arch/mips/include/asm/mach-netlogic/multi-node.h
+++ b/arch/mips/include/asm/mach-netlogic/multi-node.h
@@ -47,8 +47,37 @@
 #endif
 #endif
 
-#define NLM_CORES_PER_NODE	8
 #define NLM_THREADS_PER_CORE	4
-#define NLM_CPUS_PER_NODE	(NLM_CORES_PER_NODE * NLM_THREADS_PER_CORE)
+#ifdef CONFIG_CPU_XLR
+#define nlm_cores_per_node()	8
+#else
+extern unsigned int xlp_cores_per_node;
+#define nlm_cores_per_node()	xlp_cores_per_node
+#endif
+
+#define nlm_threads_per_node()	(nlm_cores_per_node() * NLM_THREADS_PER_CORE)
+#define nlm_cpuid_to_node(c)	((c) / nlm_threads_per_node())
+
+struct nlm_soc_info {
+	unsigned long	coremask;	/* cores enabled on the soc */
+	unsigned long	ebase;		/* not used now */
+	uint64_t	irqmask;	/* EIMR for the node */
+	uint64_t	sysbase;	/* only for XLP - sys block base */
+	uint64_t	picbase;	/* PIC block base */
+	spinlock_t	piclock;	/* lock for PIC access */
+	cpumask_t	cpumask;	/* logical cpu mask for node */
+	unsigned int	socbus;
+};
+
+extern struct nlm_soc_info nlm_nodes[NLM_NR_NODES];
+#define nlm_get_node(i)		(&nlm_nodes[i])
+#define nlm_node_present(n)	((n) >= 0 && (n) < NLM_NR_NODES && \
+					nlm_get_node(n)->coremask != 0)
+#ifdef CONFIG_CPU_XLR
+#define nlm_current_node()	(&nlm_nodes[0])
+#else
+#define nlm_current_node()	(&nlm_nodes[nlm_nodeid()])
+#endif
+void nlm_node_init(int node);
 
 #endif
diff --git a/arch/mips/include/asm/mach-netlogic/topology.h b/arch/mips/include/asm/mach-netlogic/topology.h
new file mode 100644
index 0000000..0da99fa
--- /dev/null
+++ b/arch/mips/include/asm/mach-netlogic/topology.h
@@ -0,0 +1,20 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2013 Broadcom Corporation
+ */
+#ifndef _ASM_MACH_NETLOGIC_TOPOLOGY_H
+#define _ASM_MACH_NETLOGIC_TOPOLOGY_H
+
+#include <asm/mach-netlogic/multi-node.h>
+
+#define topology_physical_package_id(cpu)	cpu_to_node(cpu)
+#define topology_core_id(cpu)	(cpu_logical_map(cpu) / NLM_THREADS_PER_CORE)
+#define topology_thread_cpumask(cpu)		(&cpu_sibling_map[cpu])
+#define topology_core_cpumask(cpu)	cpumask_of_node(cpu_to_node(cpu))
+
+#include <asm-generic/topology.h>
+
+#endif /* _ASM_MACH_NETLOGIC_TOPOLOGY_H */
diff --git a/arch/mips/include/asm/mips-boards/piix4.h b/arch/mips/include/asm/mips-boards/piix4.h
index e332279..836e2ed 100644
--- a/arch/mips/include/asm/mips-boards/piix4.h
+++ b/arch/mips/include/asm/mips-boards/piix4.h
@@ -26,6 +26,10 @@
 #define   PIIX4_FUNC0_PIRQRC_IRQ_ROUTING_DISABLE	(1 << 7)
 #define   PIIX4_FUNC0_PIRQRC_IRQ_ROUTING_MASK		0xf
 #define   PIIX4_FUNC0_PIRQRC_IRQ_ROUTING_MAX		16
+/* SERIRQ Control */
+#define PIIX4_FUNC0_SERIRQC			0x64
+#define   PIIX4_FUNC0_SERIRQC_EN			(1 << 7)
+#define   PIIX4_FUNC0_SERIRQC_CONT			(1 << 6)
 /* Top Of Memory */
 #define PIIX4_FUNC0_TOM				0x69
 #define   PIIX4_FUNC0_TOM_TOP_OF_MEMORY_MASK		0xf0
@@ -34,6 +38,9 @@
 #define   PIIX4_FUNC0_DLC_USBPR_EN			(1 << 2)
 #define   PIIX4_FUNC0_DLC_PASSIVE_RELEASE_EN		(1 << 1)
 #define   PIIX4_FUNC0_DLC_DELAYED_TRANSACTION_EN	(1 << 0)
+/* General Configuration */
+#define PIIX4_FUNC0_GENCFG			0xb0
+#define   PIIX4_FUNC0_GENCFG_SERIRQ			(1 << 16)
 
 /* IDE Timing */
 #define PIIX4_FUNC1_IDETIM_PRIMARY_LO		0x40
diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h
index e033141..bbc3dd4 100644
--- a/arch/mips/include/asm/mipsregs.h
+++ b/arch/mips/include/asm/mipsregs.h
@@ -14,6 +14,7 @@
 #define _ASM_MIPSREGS_H
 
 #include <linux/linkage.h>
+#include <linux/types.h>
 #include <asm/hazards.h>
 #include <asm/war.h>
 
@@ -573,7 +574,9 @@
 #define MIPS_CONF1_IA		(_ULCAST_(7) << 16)
 #define MIPS_CONF1_IL		(_ULCAST_(7) << 19)
 #define MIPS_CONF1_IS		(_ULCAST_(7) << 22)
-#define MIPS_CONF1_TLBS		(_ULCAST_(63)<< 25)
+#define MIPS_CONF1_TLBS_SHIFT   (25)
+#define MIPS_CONF1_TLBS_SIZE    (6)
+#define MIPS_CONF1_TLBS         (_ULCAST_(63) << MIPS_CONF1_TLBS_SHIFT)
 
 #define MIPS_CONF2_SA		(_ULCAST_(15)<<	 0)
 #define MIPS_CONF2_SL		(_ULCAST_(15)<<	 4)
@@ -587,21 +590,53 @@
 #define MIPS_CONF3_TL		(_ULCAST_(1) <<	 0)
 #define MIPS_CONF3_SM		(_ULCAST_(1) <<	 1)
 #define MIPS_CONF3_MT		(_ULCAST_(1) <<	 2)
+#define MIPS_CONF3_CDMM		(_ULCAST_(1) <<	 3)
 #define MIPS_CONF3_SP		(_ULCAST_(1) <<	 4)
 #define MIPS_CONF3_VINT		(_ULCAST_(1) <<	 5)
 #define MIPS_CONF3_VEIC		(_ULCAST_(1) <<	 6)
 #define MIPS_CONF3_LPA		(_ULCAST_(1) <<	 7)
+#define MIPS_CONF3_ITL		(_ULCAST_(1) <<	 8)
+#define MIPS_CONF3_CTXTC	(_ULCAST_(1) <<	 9)
 #define MIPS_CONF3_DSP		(_ULCAST_(1) << 10)
 #define MIPS_CONF3_DSP2P	(_ULCAST_(1) << 11)
 #define MIPS_CONF3_RXI		(_ULCAST_(1) << 12)
 #define MIPS_CONF3_ULRI		(_ULCAST_(1) << 13)
 #define MIPS_CONF3_ISA		(_ULCAST_(3) << 14)
 #define MIPS_CONF3_ISA_OE	(_ULCAST_(1) << 16)
+#define MIPS_CONF3_MCU		(_ULCAST_(1) << 17)
+#define MIPS_CONF3_MMAR		(_ULCAST_(7) << 18)
+#define MIPS_CONF3_IPLW		(_ULCAST_(3) << 21)
 #define MIPS_CONF3_VZ		(_ULCAST_(1) << 23)
+#define MIPS_CONF3_PW		(_ULCAST_(1) << 24)
+#define MIPS_CONF3_SC		(_ULCAST_(1) << 25)
+#define MIPS_CONF3_BI		(_ULCAST_(1) << 26)
+#define MIPS_CONF3_BP		(_ULCAST_(1) << 27)
+#define MIPS_CONF3_MSA		(_ULCAST_(1) << 28)
+#define MIPS_CONF3_CMGCR	(_ULCAST_(1) << 29)
+#define MIPS_CONF3_BPG		(_ULCAST_(1) << 30)
 
+#define MIPS_CONF4_MMUSIZEEXT_SHIFT	(0)
 #define MIPS_CONF4_MMUSIZEEXT	(_ULCAST_(255) << 0)
+#define MIPS_CONF4_FTLBSETS_SHIFT	(0)
+#define MIPS_CONF4_FTLBSETS_SHIFT	(0)
+#define MIPS_CONF4_FTLBSETS	(_ULCAST_(15) << MIPS_CONF4_FTLBSETS_SHIFT)
+#define MIPS_CONF4_FTLBWAYS_SHIFT	(4)
+#define MIPS_CONF4_FTLBWAYS	(_ULCAST_(15) << MIPS_CONF4_FTLBWAYS_SHIFT)
+#define MIPS_CONF4_FTLBPAGESIZE_SHIFT	(8)
+/* bits 10:8 in FTLB-only configurations */
+#define MIPS_CONF4_FTLBPAGESIZE (_ULCAST_(7) << MIPS_CONF4_FTLBPAGESIZE_SHIFT)
+/* bits 12:8 in VTLB-FTLB only configurations */
+#define MIPS_CONF4_VFTLBPAGESIZE (_ULCAST_(31) << MIPS_CONF4_FTLBPAGESIZE_SHIFT)
 #define MIPS_CONF4_MMUEXTDEF	(_ULCAST_(3) << 14)
 #define MIPS_CONF4_MMUEXTDEF_MMUSIZEEXT (_ULCAST_(1) << 14)
+#define MIPS_CONF4_MMUEXTDEF_FTLBSIZEEXT	(_ULCAST_(2) << 14)
+#define MIPS_CONF4_MMUEXTDEF_VTLBSIZEEXT	(_ULCAST_(3) << 14)
+#define MIPS_CONF4_KSCREXIST	(_ULCAST_(255) << 16)
+#define MIPS_CONF4_VTLBSIZEEXT_SHIFT	(24)
+#define MIPS_CONF4_VTLBSIZEEXT	(_ULCAST_(15) << MIPS_CONF4_VTLBSIZEEXT_SHIFT)
+#define MIPS_CONF4_AE		(_ULCAST_(1) << 28)
+#define MIPS_CONF4_IE		(_ULCAST_(3) << 29)
+#define MIPS_CONF4_TLBINV	(_ULCAST_(2) << 29)
 
 #define MIPS_CONF5_NF		(_ULCAST_(1) << 0)
 #define MIPS_CONF5_UFR		(_ULCAST_(1) << 2)
@@ -611,11 +646,15 @@
 #define MIPS_CONF5_K		(_ULCAST_(1) << 30)
 
 #define MIPS_CONF6_SYND		(_ULCAST_(1) << 13)
+/* proAptiv FTLB on/off bit */
+#define MIPS_CONF6_FTLBEN	(_ULCAST_(1) << 15)
 
 #define MIPS_CONF7_WII		(_ULCAST_(1) << 31)
 
 #define MIPS_CONF7_RPS		(_ULCAST_(1) << 2)
 
+/*  EntryHI bit definition */
+#define MIPS_ENTRYHI_EHINV	(_ULCAST_(1) << 10)
 
 /*
  * Bits in the MIPS32/64 coprocessor 1 (FPU) revision register.
@@ -628,6 +667,26 @@
 #define MIPS_FPIR_L		(_ULCAST_(1) << 21)
 #define MIPS_FPIR_F64		(_ULCAST_(1) << 22)
 
+/*
+ * Bits in the MIPS32 Memory Segmentation registers.
+ */
+#define MIPS_SEGCFG_PA_SHIFT	9
+#define MIPS_SEGCFG_PA		(_ULCAST_(127) << MIPS_SEGCFG_PA_SHIFT)
+#define MIPS_SEGCFG_AM_SHIFT	4
+#define MIPS_SEGCFG_AM		(_ULCAST_(7) << MIPS_SEGCFG_AM_SHIFT)
+#define MIPS_SEGCFG_EU_SHIFT	3
+#define MIPS_SEGCFG_EU		(_ULCAST_(1) << MIPS_SEGCFG_EU_SHIFT)
+#define MIPS_SEGCFG_C_SHIFT	0
+#define MIPS_SEGCFG_C		(_ULCAST_(7) << MIPS_SEGCFG_C_SHIFT)
+
+#define MIPS_SEGCFG_UUSK	_ULCAST_(7)
+#define MIPS_SEGCFG_USK		_ULCAST_(5)
+#define MIPS_SEGCFG_MUSUK	_ULCAST_(4)
+#define MIPS_SEGCFG_MUSK	_ULCAST_(3)
+#define MIPS_SEGCFG_MSK		_ULCAST_(2)
+#define MIPS_SEGCFG_MK		_ULCAST_(1)
+#define MIPS_SEGCFG_UK		_ULCAST_(0)
+
 #ifndef __ASSEMBLY__
 
 /*
@@ -649,6 +708,19 @@
 }
 
 /*
+ * TLB Invalidate Flush
+ */
+static inline void tlbinvf(void)
+{
+	__asm__ __volatile__(
+		".set push\n\t"
+		".set noreorder\n\t"
+		".word 0x42000004\n\t" /* tlbinvf */
+		".set pop");
+}
+
+
+/*
  * Functions to access the R10000 performance counters.	 These are basically
  * mfc0 and mtc0 instructions from and to coprocessor register with a 5-bit
  * performance counter number encoded into bits 1 ... 5 of the instruction.
@@ -1102,6 +1174,15 @@
 #define read_c0_ebase()		__read_32bit_c0_register($15, 1)
 #define write_c0_ebase(val)	__write_32bit_c0_register($15, 1, val)
 
+/* MIPSR3 */
+#define read_c0_segctl0()	__read_32bit_c0_register($5, 2)
+#define write_c0_segctl0(val)	__write_32bit_c0_register($5, 2, val)
+
+#define read_c0_segctl1()	__read_32bit_c0_register($5, 3)
+#define write_c0_segctl1(val)	__write_32bit_c0_register($5, 3, val)
+
+#define read_c0_segctl2()	__read_32bit_c0_register($5, 4)
+#define write_c0_segctl2(val)	__write_32bit_c0_register($5, 4, val)
 
 /* Cavium OCTEON (cnMIPS) */
 #define read_c0_cvmcount()	__read_ulong_c0_register($9, 6)
diff --git a/arch/mips/include/asm/netlogic/common.h b/arch/mips/include/asm/netlogic/common.h
index bb68c33..c281f03 100644
--- a/arch/mips/include/asm/netlogic/common.h
+++ b/arch/mips/include/asm/netlogic/common.h
@@ -84,7 +84,6 @@
  */
 void nlm_init_boot_cpu(void);
 unsigned int nlm_get_cpu_frequency(void);
-void nlm_node_init(int node);
 extern struct plat_smp_ops nlm_smp_ops;
 extern char nlm_reset_entry[], nlm_reset_entry_end[];
 
@@ -94,26 +93,16 @@
 extern unsigned int nlm_threads_per_core;
 extern cpumask_t nlm_cpumask;
 
-struct nlm_soc_info {
-	unsigned long coremask; /* cores enabled on the soc */
-	unsigned long ebase;
-	uint64_t irqmask;
-	uint64_t sysbase;	/* only for XLP */
-	uint64_t picbase;
-	spinlock_t piclock;
-};
-
-#define nlm_get_node(i)		(&nlm_nodes[i])
-#ifdef CONFIG_CPU_XLR
-#define nlm_current_node()	(&nlm_nodes[0])
-#else
-#define nlm_current_node()	(&nlm_nodes[nlm_nodeid()])
-#endif
-
 struct irq_data;
 uint64_t nlm_pci_irqmask(int node);
+void nlm_setup_pic_irq(int node, int picirq, int irq, int irt);
 void nlm_set_pic_extra_ack(int node, int irq,  void (*xack)(struct irq_data *));
 
+#ifdef CONFIG_PCI_MSI
+void nlm_dispatch_msi(int node, int lirq);
+void nlm_dispatch_msix(int node, int msixirq);
+#endif
+
 /*
  * The NR_IRQs is divided between nodes, each of them has a separate irq space
  */
@@ -122,7 +111,6 @@
 	return node * NR_IRQS / NLM_NR_NODES + irq;
 }
 
-extern struct nlm_soc_info nlm_nodes[NLM_NR_NODES];
 extern int nlm_cpu_ready[];
 #endif
 #endif /* _NETLOGIC_COMMON_H_ */
diff --git a/arch/mips/include/asm/netlogic/mips-extns.h b/arch/mips/include/asm/netlogic/mips-extns.h
index f299d31..de9aada 100644
--- a/arch/mips/include/asm/netlogic/mips-extns.h
+++ b/arch/mips/include/asm/netlogic/mips-extns.h
@@ -146,7 +146,12 @@
 
 static inline int nlm_nodeid(void)
 {
-	return (__read_32bit_c0_register($15, 1) >> 5) & 0x3;
+	uint32_t prid = read_c0_prid();
+
+	if ((prid & 0xff00) == PRID_IMP_NETLOGIC_XLP9XX)
+		return (__read_32bit_c0_register($15, 1) >> 7) & 0x7;
+	else
+		return (__read_32bit_c0_register($15, 1) >> 5) & 0x3;
 }
 
 static inline unsigned int nlm_core_id(void)
diff --git a/arch/mips/include/asm/netlogic/xlp-hal/bridge.h b/arch/mips/include/asm/netlogic/xlp-hal/bridge.h
index 4e8eacb..3067f98 100644
--- a/arch/mips/include/asm/netlogic/xlp-hal/bridge.h
+++ b/arch/mips/include/asm/netlogic/xlp-hal/bridge.h
@@ -69,44 +69,9 @@
 #define BRIDGE_FLASH_LIMIT3		0x13
 
 #define BRIDGE_DRAM_BAR(i)		(0x14 + (i))
-#define BRIDGE_DRAM_BAR0		0x14
-#define BRIDGE_DRAM_BAR1		0x15
-#define BRIDGE_DRAM_BAR2		0x16
-#define BRIDGE_DRAM_BAR3		0x17
-#define BRIDGE_DRAM_BAR4		0x18
-#define BRIDGE_DRAM_BAR5		0x19
-#define BRIDGE_DRAM_BAR6		0x1a
-#define BRIDGE_DRAM_BAR7		0x1b
-
 #define BRIDGE_DRAM_LIMIT(i)		(0x1c + (i))
-#define BRIDGE_DRAM_LIMIT0		0x1c
-#define BRIDGE_DRAM_LIMIT1		0x1d
-#define BRIDGE_DRAM_LIMIT2		0x1e
-#define BRIDGE_DRAM_LIMIT3		0x1f
-#define BRIDGE_DRAM_LIMIT4		0x20
-#define BRIDGE_DRAM_LIMIT5		0x21
-#define BRIDGE_DRAM_LIMIT6		0x22
-#define BRIDGE_DRAM_LIMIT7		0x23
-
 #define BRIDGE_DRAM_NODE_TRANSLN(i)	(0x24 + (i))
-#define BRIDGE_DRAM_NODE_TRANSLN0	0x24
-#define BRIDGE_DRAM_NODE_TRANSLN1	0x25
-#define BRIDGE_DRAM_NODE_TRANSLN2	0x26
-#define BRIDGE_DRAM_NODE_TRANSLN3	0x27
-#define BRIDGE_DRAM_NODE_TRANSLN4	0x28
-#define BRIDGE_DRAM_NODE_TRANSLN5	0x29
-#define BRIDGE_DRAM_NODE_TRANSLN6	0x2a
-#define BRIDGE_DRAM_NODE_TRANSLN7	0x2b
-
 #define BRIDGE_DRAM_CHNL_TRANSLN(i)	(0x2c + (i))
-#define BRIDGE_DRAM_CHNL_TRANSLN0	0x2c
-#define BRIDGE_DRAM_CHNL_TRANSLN1	0x2d
-#define BRIDGE_DRAM_CHNL_TRANSLN2	0x2e
-#define BRIDGE_DRAM_CHNL_TRANSLN3	0x2f
-#define BRIDGE_DRAM_CHNL_TRANSLN4	0x30
-#define BRIDGE_DRAM_CHNL_TRANSLN5	0x31
-#define BRIDGE_DRAM_CHNL_TRANSLN6	0x32
-#define BRIDGE_DRAM_CHNL_TRANSLN7	0x33
 
 #define BRIDGE_PCIEMEM_BASE0		0x34
 #define BRIDGE_PCIEMEM_BASE1		0x35
@@ -178,12 +143,42 @@
 #define BRIDGE_GIO_WEIGHT		0x2cb
 #define BRIDGE_FLASH_WEIGHT		0x2cc
 
+/* FIXME verify */
+#define BRIDGE_9XX_FLASH_BAR(i)		(0x11 + (i))
+#define BRIDGE_9XX_FLASH_BAR_LIMIT(i)	(0x15 + (i))
+
+#define BRIDGE_9XX_DRAM_BAR(i)		(0x19 + (i))
+#define BRIDGE_9XX_DRAM_LIMIT(i)	(0x29 + (i))
+#define BRIDGE_9XX_DRAM_NODE_TRANSLN(i)	(0x39 + (i))
+#define BRIDGE_9XX_DRAM_CHNL_TRANSLN(i)	(0x49 + (i))
+
+#define BRIDGE_9XX_ADDRESS_ERROR0	0x9d
+#define BRIDGE_9XX_ADDRESS_ERROR1	0x9e
+#define BRIDGE_9XX_ADDRESS_ERROR2	0x9f
+
+#define BRIDGE_9XX_PCIEMEM_BASE0	0x59
+#define BRIDGE_9XX_PCIEMEM_BASE1	0x5a
+#define BRIDGE_9XX_PCIEMEM_BASE2	0x5b
+#define BRIDGE_9XX_PCIEMEM_BASE3	0x5c
+#define BRIDGE_9XX_PCIEMEM_LIMIT0	0x5d
+#define BRIDGE_9XX_PCIEMEM_LIMIT1	0x5e
+#define BRIDGE_9XX_PCIEMEM_LIMIT2	0x5f
+#define BRIDGE_9XX_PCIEMEM_LIMIT3	0x60
+#define BRIDGE_9XX_PCIEIO_BASE0		0x61
+#define BRIDGE_9XX_PCIEIO_BASE1		0x62
+#define BRIDGE_9XX_PCIEIO_BASE2		0x63
+#define BRIDGE_9XX_PCIEIO_BASE3		0x64
+#define BRIDGE_9XX_PCIEIO_LIMIT0	0x65
+#define BRIDGE_9XX_PCIEIO_LIMIT1	0x66
+#define BRIDGE_9XX_PCIEIO_LIMIT2	0x67
+#define BRIDGE_9XX_PCIEIO_LIMIT3	0x68
+
 #ifndef __ASSEMBLY__
 
 #define nlm_read_bridge_reg(b, r)	nlm_read_reg(b, r)
 #define nlm_write_bridge_reg(b, r, v)	nlm_write_reg(b, r, v)
-#define nlm_get_bridge_pcibase(node)	\
-			nlm_pcicfg_base(XLP_IO_BRIDGE_OFFSET(node))
+#define nlm_get_bridge_pcibase(node)	nlm_pcicfg_base(cpu_is_xlp9xx() ? \
+		XLP9XX_IO_BRIDGE_OFFSET(node) : XLP_IO_BRIDGE_OFFSET(node))
 #define nlm_get_bridge_regbase(node)	\
 			(nlm_get_bridge_pcibase(node) + XLP_IO_PCI_HDRSZ)
 
diff --git a/arch/mips/include/asm/netlogic/xlp-hal/iomap.h b/arch/mips/include/asm/netlogic/xlp-hal/iomap.h
index 55eee77..1f23dfa 100644
--- a/arch/mips/include/asm/netlogic/xlp-hal/iomap.h
+++ b/arch/mips/include/asm/netlogic/xlp-hal/iomap.h
@@ -48,8 +48,10 @@
 #define XLP_IO_SIZE			(64 << 20)	/* ECFG space size */
 #define XLP_IO_PCI_HDRSZ		0x100
 #define XLP_IO_DEV(node, dev)		((dev) + (node) * 8)
-#define XLP_HDR_OFFSET(node, bus, dev, fn)	(((bus) << 20) | \
-				((XLP_IO_DEV(node, dev)) << 15) | ((fn) << 12))
+#define XLP_IO_PCI_OFFSET(b, d, f)	(((b) << 20) | ((d) << 15) | ((f) << 12))
+
+#define XLP_HDR_OFFSET(node, bus, dev, fn) \
+		XLP_IO_PCI_OFFSET(bus, XLP_IO_DEV(node, dev), fn)
 
 #define XLP_IO_BRIDGE_OFFSET(node)	XLP_HDR_OFFSET(node, 0, 0, 0)
 /* coherent inter chip */
@@ -109,6 +111,36 @@
 #define XLP_IO_MMC_OFFSET(node, slot)	\
 		((XLP_IO_SD_OFFSET(node))+(slot*0x100)+XLP_IO_PCI_HDRSZ)
 
+/* Things have changed drastically in XLP 9XX */
+#define XLP9XX_HDR_OFFSET(n, d, f)	\
+			XLP_IO_PCI_OFFSET(xlp9xx_get_socbus(n), d, f)
+
+#define XLP9XX_IO_BRIDGE_OFFSET(node)	XLP_IO_PCI_OFFSET(0, 0, node)
+#define XLP9XX_IO_PIC_OFFSET(node)	XLP9XX_HDR_OFFSET(node, 2, 0)
+#define XLP9XX_IO_UART_OFFSET(node)	XLP9XX_HDR_OFFSET(node, 2, 2)
+#define XLP9XX_IO_SYS_OFFSET(node)	XLP9XX_HDR_OFFSET(node, 6, 0)
+#define XLP9XX_IO_FUSE_OFFSET(node)	XLP9XX_HDR_OFFSET(node, 6, 1)
+#define XLP9XX_IO_JTAG_OFFSET(node)	XLP9XX_HDR_OFFSET(node, 6, 4)
+
+#define XLP9XX_IO_PCIE_OFFSET(node, i)	XLP9XX_HDR_OFFSET(node, 1, i)
+#define XLP9XX_IO_PCIE0_OFFSET(node)	XLP9XX_HDR_OFFSET(node, 1, 0)
+#define XLP9XX_IO_PCIE2_OFFSET(node)	XLP9XX_HDR_OFFSET(node, 1, 2)
+#define XLP9XX_IO_PCIE3_OFFSET(node)	XLP9XX_HDR_OFFSET(node, 1, 3)
+
+/* XLP9xx USB block */
+#define XLP9XX_IO_USB_OFFSET(node, i)		XLP9XX_HDR_OFFSET(node, 4, i)
+#define XLP9XX_IO_USB_XHCI0_OFFSET(node)	XLP9XX_HDR_OFFSET(node, 4, 1)
+#define XLP9XX_IO_USB_XHCI1_OFFSET(node)	XLP9XX_HDR_OFFSET(node, 4, 2)
+
+/* XLP9XX on-chip SATA controller */
+#define XLP9XX_IO_SATA_OFFSET(node)		XLP9XX_HDR_OFFSET(node, 3, 2)
+
+#define XLP9XX_IO_NOR_OFFSET(node)		XLP9XX_HDR_OFFSET(node, 7, 0)
+#define XLP9XX_IO_NAND_OFFSET(node)		XLP9XX_HDR_OFFSET(node, 7, 1)
+#define XLP9XX_IO_SPI_OFFSET(node)		XLP9XX_HDR_OFFSET(node, 7, 2)
+/* SD flash */
+#define XLP9XX_IO_MMCSD_OFFSET(node)		XLP9XX_HDR_OFFSET(node, 7, 3)
+
 /* PCI config header register id's */
 #define XLP_PCI_CFGREG0			0x00
 #define XLP_PCI_CFGREG1			0x01
@@ -156,11 +188,23 @@
 #define PCI_DEVICE_ID_NLM_MMC		0x1018
 #define PCI_DEVICE_ID_NLM_XHCI		0x101d
 
+#define PCI_DEVICE_ID_XLP9XX_SATA	0x901A
+#define PCI_DEVICE_ID_XLP9XX_XHCI	0x901D
+
 #ifndef __ASSEMBLY__
 
 #define nlm_read_pci_reg(b, r)		nlm_read_reg(b, r)
 #define nlm_write_pci_reg(b, r, v)	nlm_write_reg(b, r, v)
 
+static inline int xlp9xx_get_socbus(int node)
+{
+	uint64_t socbridge;
+
+	if (node == 0)
+		return 1;
+	socbridge = nlm_pcicfg_base(XLP9XX_IO_BRIDGE_OFFSET(node));
+	return (nlm_read_pci_reg(socbridge, 0x6) >> 8) & 0xff;
+}
 #endif /* !__ASSEMBLY */
 
 #endif /* __NLM_HAL_IOMAP_H__ */
diff --git a/arch/mips/include/asm/netlogic/xlp-hal/pcibus.h b/arch/mips/include/asm/netlogic/xlp-hal/pcibus.h
index b559cb9..d4deb87 100644
--- a/arch/mips/include/asm/netlogic/xlp-hal/pcibus.h
+++ b/arch/mips/include/asm/netlogic/xlp-hal/pcibus.h
@@ -52,25 +52,48 @@
 #define PCIE_BYTE_SWAP_MEM_LIM		0x248
 #define PCIE_BYTE_SWAP_IO_BASE		0x249
 #define PCIE_BYTE_SWAP_IO_LIM		0x24A
+
+#define PCIE_BRIDGE_MSIX_ADDR_BASE	0x24F
+#define PCIE_BRIDGE_MSIX_ADDR_LIMIT	0x250
 #define PCIE_MSI_STATUS			0x25A
 #define PCIE_MSI_EN			0x25B
+#define PCIE_MSIX_STATUS		0x25D
+#define PCIE_INT_STATUS0		0x25F
+#define PCIE_INT_STATUS1		0x260
 #define PCIE_INT_EN0			0x261
+#define PCIE_INT_EN1			0x262
 
-/* PCIE_MSI_EN */
-#define PCIE_MSI_VECTOR_INT_EN		0xFFFFFFFF
+/* XLP9XX has basic changes */
+#define PCIE_9XX_BYTE_SWAP_MEM_BASE	0x25c
+#define PCIE_9XX_BYTE_SWAP_MEM_LIM	0x25d
+#define PCIE_9XX_BYTE_SWAP_IO_BASE	0x25e
+#define PCIE_9XX_BYTE_SWAP_IO_LIM	0x25f
 
-/* PCIE_INT_EN0 */
-#define PCIE_MSI_INT_EN			(1 << 9)
+/* other */
+#define PCIE_NLINKS			4
 
+/* MSI addresses */
+#define MSI_ADDR_BASE			0xfffee00000ULL
+#define MSI_ADDR_SZ			0x10000
+#define MSI_LINK_ADDR(n, l)		(MSI_ADDR_BASE + \
+				(PCIE_NLINKS * (n) + (l)) * MSI_ADDR_SZ)
+#define MSIX_ADDR_BASE			0xfffef00000ULL
+#define MSIX_LINK_ADDR(n, l)		(MSIX_ADDR_BASE + \
+				(PCIE_NLINKS * (n) + (l)) * MSI_ADDR_SZ)
 #ifndef __ASSEMBLY__
 
 #define nlm_read_pcie_reg(b, r)		nlm_read_reg(b, r)
 #define nlm_write_pcie_reg(b, r, v)	nlm_write_reg(b, r, v)
-#define nlm_get_pcie_base(node, inst)	\
-			nlm_pcicfg_base(XLP_IO_PCIE_OFFSET(node, inst))
-#define nlm_get_pcie_regbase(node, inst)	\
-			(nlm_get_pcie_base(node, inst) + XLP_IO_PCI_HDRSZ)
+#define nlm_get_pcie_base(node, inst)	nlm_pcicfg_base(cpu_is_xlp9xx() ? \
+	XLP9XX_IO_PCIE_OFFSET(node, inst) : XLP_IO_PCIE_OFFSET(node, inst))
 
-int xlp_pcie_link_irt(int link);
+#ifdef CONFIG_PCI_MSI
+void xlp_init_node_msi_irqs(int node, int link);
+#else
+static inline void xlp_init_node_msi_irqs(int node, int link) {}
+#endif
+
+struct pci_dev *xlp_get_pcie_link(const struct pci_dev *dev);
+
 #endif
 #endif /* __NLM_HAL_PCIBUS_H__ */
diff --git a/arch/mips/include/asm/netlogic/xlp-hal/pic.h b/arch/mips/include/asm/netlogic/xlp-hal/pic.h
index 105389b..f10bf3b 100644
--- a/arch/mips/include/asm/netlogic/xlp-hal/pic.h
+++ b/arch/mips/include/asm/netlogic/xlp-hal/pic.h
@@ -150,12 +150,19 @@
 #define PIC_IRT0		0x74
 #define PIC_IRT(i)		(PIC_IRT0 + ((i) * 2))
 
-#define TIMER_CYCLES_MAXVAL	0xffffffffffffffffULL
+#define PIC_9XX_PENDING_0	0x6
+#define PIC_9XX_PENDING_1	0x8
+#define PIC_9XX_PENDING_2	0xa
+#define PIC_9XX_PENDING_3	0xc
+
+#define PIC_9XX_IRT0		0x1c0
+#define PIC_9XX_IRT(i)		(PIC_9XX_IRT0 + ((i) * 2))
 
 /*
  *    IRT Map
  */
 #define PIC_NUM_IRTS		160
+#define PIC_9XX_NUM_IRTS	256
 
 #define PIC_IRT_WD_0_INDEX	0
 #define PIC_IRT_WD_1_INDEX	1
@@ -193,14 +200,9 @@
 #define PIC_IRT_PCIE_LINK_INDEX(num)	((num) + PIC_IRT_PCIE_LINK_0_INDEX)
 
 #define PIC_CLOCK_TIMER			7
-#define PIC_IRQ_BASE			8
 
 #if !defined(LOCORE) && !defined(__ASSEMBLY__)
 
-#define PIC_IRT_FIRST_IRQ		(PIC_IRQ_BASE)
-#define PIC_IRT_LAST_IRQ		63
-#define PIC_IRQ_IS_IRT(irq)		((irq) >= PIC_IRT_FIRST_IRQ)
-
 /*
  *   Misc
  */
@@ -210,30 +212,26 @@
 
 #define nlm_read_pic_reg(b, r)	nlm_read_reg64(b, r)
 #define nlm_write_pic_reg(b, r, v) nlm_write_reg64(b, r, v)
-#define nlm_get_pic_pcibase(node) nlm_pcicfg_base(XLP_IO_PIC_OFFSET(node))
+#define nlm_get_pic_pcibase(node)	nlm_pcicfg_base(cpu_is_xlp9xx() ? \
+		XLP9XX_IO_PIC_OFFSET(node) : XLP_IO_PIC_OFFSET(node))
 #define nlm_get_pic_regbase(node) (nlm_get_pic_pcibase(node) + XLP_IO_PCI_HDRSZ)
 
 /* We use PIC on node 0 as a timer */
 #define pic_timer_freq()		nlm_get_pic_frequency(0)
 
 /* IRT and h/w interrupt routines */
-static inline int
-nlm_pic_read_irt(uint64_t base, int irt_index)
-{
-	return nlm_read_pic_reg(base, PIC_IRT(irt_index));
-}
-
 static inline void
-nlm_set_irt_to_cpu(uint64_t base, int irt, int cpu)
+nlm_9xx_pic_write_irt(uint64_t base, int irt_num, int en, int nmi,
+	int sch, int vec, int dt, int db, int cpu)
 {
 	uint64_t val;
 
-	val = nlm_read_pic_reg(base, PIC_IRT(irt));
-	/* clear cpuset and mask */
-	val &= ~((0x7ull << 16) | 0xffff);
-	/* set DB, cpuset and cpumask */
-	val |= (1 << 19) | ((cpu >> 4) << 16) | (1 << (cpu & 0xf));
-	nlm_write_pic_reg(base, PIC_IRT(irt), val);
+	val = (((uint64_t)en & 0x1) << 22) | ((nmi & 0x1) << 23) |
+			((0 /*mc*/) << 20) | ((vec & 0x3f) << 24) |
+			((dt & 0x1) << 21) | (0 /*ptr*/ << 16) |
+			(cpu & 0x3ff);
+
+	nlm_write_pic_reg(base, PIC_9XX_IRT(irt_num), val);
 }
 
 static inline void
@@ -254,9 +252,13 @@
 nlm_pic_write_irt_direct(uint64_t base, int irt_num, int en, int nmi,
 	int sch, int vec, int cpu)
 {
-	nlm_pic_write_irt(base, irt_num, en, nmi, sch, vec, 1,
-		(cpu >> 4),		/* thread group */
-		1 << (cpu & 0xf));	/* thread mask */
+	if (cpu_is_xlp9xx())
+		nlm_9xx_pic_write_irt(base, irt_num, en, nmi, sch, vec,
+							1, 0, cpu);
+	else
+		nlm_pic_write_irt(base, irt_num, en, nmi, sch, vec, 1,
+			(cpu >> 4),		/* thread group */
+			1 << (cpu & 0xf));	/* thread mask */
 }
 
 static inline uint64_t
@@ -298,8 +300,13 @@
 {
 	uint64_t reg;
 
-	reg = nlm_read_pic_reg(base, PIC_IRT(irt));
-	nlm_write_pic_reg(base, PIC_IRT(irt), reg | (1u << 31));
+	if (cpu_is_xlp9xx()) {
+		reg = nlm_read_pic_reg(base, PIC_9XX_IRT(irt));
+		nlm_write_pic_reg(base, PIC_9XX_IRT(irt), reg | (1 << 22));
+	} else {
+		reg = nlm_read_pic_reg(base, PIC_IRT(irt));
+		nlm_write_pic_reg(base, PIC_IRT(irt), reg | (1u << 31));
+	}
 }
 
 static inline void
@@ -307,8 +314,15 @@
 {
 	uint64_t reg;
 
-	reg = nlm_read_pic_reg(base, PIC_IRT(irt));
-	nlm_write_pic_reg(base, PIC_IRT(irt), reg & ~((uint64_t)1 << 31));
+	if (cpu_is_xlp9xx()) {
+		reg = nlm_read_pic_reg(base, PIC_9XX_IRT(irt));
+		reg &= ~((uint64_t)1 << 22);
+		nlm_write_pic_reg(base, PIC_9XX_IRT(irt), reg);
+	} else {
+		reg = nlm_read_pic_reg(base, PIC_IRT(irt));
+		reg &= ~((uint64_t)1 << 31);
+		nlm_write_pic_reg(base, PIC_IRT(irt), reg);
+	}
 }
 
 static inline void
@@ -316,8 +330,13 @@
 {
 	uint64_t ipi;
 
-	ipi = ((uint64_t)nmi << 31) | (irq << 20);
-	ipi |= ((hwt >> 4) << 16) | (1 << (hwt & 0xf)); /* cpuset and mask */
+	if (cpu_is_xlp9xx())
+		ipi = (nmi << 23) | (irq << 24) |
+			(0/*mcm*/ << 20) | (0/*ptr*/ << 16) | hwt;
+	else
+		ipi = ((uint64_t)nmi << 31) | (irq << 20) |
+			((hwt >> 4) << 16) | (1 << (hwt & 0xf));
+
 	nlm_write_pic_reg(base, PIC_IPI_CTL, ipi);
 }
 
diff --git a/arch/mips/include/asm/netlogic/xlp-hal/sys.h b/arch/mips/include/asm/netlogic/xlp-hal/sys.h
index fcf2833c..d9b107f 100644
--- a/arch/mips/include/asm/netlogic/xlp-hal/sys.h
+++ b/arch/mips/include/asm/netlogic/xlp-hal/sys.h
@@ -147,13 +147,29 @@
 #define SYS_SYS_PLL_MEM_REQ			0x2a3
 #define SYS_PLL_MEM_STAT			0x2a4
 
+/* Registers changed on 9XX */
+#define SYS_9XX_POWER_ON_RESET_CFG		0x00
+#define SYS_9XX_CHIP_RESET			0x01
+#define SYS_9XX_CPU_RESET			0x02
+#define SYS_9XX_CPU_NONCOHERENT_MODE		0x03
+
+/* XLP 9XX fuse block registers */
+#define FUSE_9XX_DEVCFG6			0xc6
+
 #ifndef __ASSEMBLY__
 
 #define nlm_read_sys_reg(b, r)		nlm_read_reg(b, r)
 #define nlm_write_sys_reg(b, r, v)	nlm_write_reg(b, r, v)
-#define nlm_get_sys_pcibase(node) nlm_pcicfg_base(XLP_IO_SYS_OFFSET(node))
+#define nlm_get_sys_pcibase(node)	nlm_pcicfg_base(cpu_is_xlp9xx() ? \
+		XLP9XX_IO_SYS_OFFSET(node) : XLP_IO_SYS_OFFSET(node))
 #define nlm_get_sys_regbase(node) (nlm_get_sys_pcibase(node) + XLP_IO_PCI_HDRSZ)
 
+/* XLP9XX fuse block */
+#define nlm_get_fuse_pcibase(node)	\
+			nlm_pcicfg_base(XLP9XX_IO_FUSE_OFFSET(node))
+#define nlm_get_fuse_regbase(node)	\
+			(nlm_get_fuse_pcibase(node) + XLP_IO_PCI_HDRSZ)
+
 unsigned int nlm_get_pic_frequency(int node);
 #endif
 #endif
diff --git a/arch/mips/include/asm/netlogic/xlp-hal/uart.h b/arch/mips/include/asm/netlogic/xlp-hal/uart.h
index 86d16e1..a6c5442 100644
--- a/arch/mips/include/asm/netlogic/xlp-hal/uart.h
+++ b/arch/mips/include/asm/netlogic/xlp-hal/uart.h
@@ -94,7 +94,8 @@
 #define nlm_read_uart_reg(b, r)		nlm_read_reg(b, r)
 #define nlm_write_uart_reg(b, r, v)	nlm_write_reg(b, r, v)
 #define nlm_get_uart_pcibase(node, inst)	\
-		nlm_pcicfg_base(XLP_IO_UART_OFFSET(node, inst))
+	nlm_pcicfg_base(cpu_is_xlp9xx() ?  XLP9XX_IO_UART_OFFSET(node) : \
+						XLP_IO_UART_OFFSET(node, inst))
 #define nlm_get_uart_regbase(node, inst)	\
 			(nlm_get_uart_pcibase(node, inst) + XLP_IO_PCI_HDRSZ)
 
diff --git a/arch/mips/include/asm/netlogic/xlp-hal/xlp.h b/arch/mips/include/asm/netlogic/xlp-hal/xlp.h
index 470f209..2b0c959 100644
--- a/arch/mips/include/asm/netlogic/xlp-hal/xlp.h
+++ b/arch/mips/include/asm/netlogic/xlp-hal/xlp.h
@@ -37,10 +37,9 @@
 
 #define PIC_UART_0_IRQ			17
 #define PIC_UART_1_IRQ			18
-#define PIC_PCIE_LINK_0_IRQ		19
-#define PIC_PCIE_LINK_1_IRQ		20
-#define PIC_PCIE_LINK_2_IRQ		21
-#define PIC_PCIE_LINK_3_IRQ		22
+
+#define PIC_PCIE_LINK_LEGACY_IRQ_BASE	19
+#define PIC_PCIE_LINK_LEGACY_IRQ(i)	(19 + (i))
 
 #define PIC_EHCI_0_IRQ			23
 #define PIC_EHCI_1_IRQ			24
@@ -51,6 +50,8 @@
 #define PIC_2XX_XHCI_0_IRQ		23
 #define PIC_2XX_XHCI_1_IRQ		24
 #define PIC_2XX_XHCI_2_IRQ		25
+#define PIC_9XX_XHCI_0_IRQ		23
+#define PIC_9XX_XHCI_1_IRQ		24
 
 #define PIC_MMC_IRQ			29
 #define PIC_I2C_0_IRQ			30
@@ -58,6 +59,23 @@
 #define PIC_I2C_2_IRQ			32
 #define PIC_I2C_3_IRQ			33
 
+#define PIC_PCIE_LINK_MSI_IRQ_BASE	44	/* 44 - 47 MSI IRQ */
+#define PIC_PCIE_LINK_MSI_IRQ(i)	(44 + (i))
+
+/* MSI-X with second link-level dispatch */
+#define PIC_PCIE_MSIX_IRQ_BASE		48	/* 48 - 51 MSI-X IRQ */
+#define PIC_PCIE_MSIX_IRQ(i)		(48 + (i))
+
+#define NLM_MSIX_VEC_BASE		96	/* 96 - 127 - MSIX mapped */
+#define NLM_MSI_VEC_BASE		128	/* 128 -255 - MSI mapped */
+
+#define NLM_PIC_INDIRECT_VEC_BASE	512
+#define NLM_GPIO_VEC_BASE		768
+
+#define PIC_IRQ_BASE			8
+#define PIC_IRT_FIRST_IRQ		PIC_IRQ_BASE
+#define PIC_IRT_LAST_IRQ		63
+
 #ifndef __ASSEMBLY__
 
 /* SMP support functions */
@@ -68,6 +86,9 @@
 void nlm_hal_init(void);
 int xlp_get_dram_map(int n, uint64_t *dram_map);
 
+struct pci_dev;
+int xlp_socdev_to_node(const struct pci_dev *dev);
+
 /* Device tree related */
 void xlp_early_init_devtree(void);
 void *xlp_dt_init(void *fdtp);
@@ -76,8 +97,15 @@
 {
 	int chip = read_c0_prid() & 0xff00;
 
-	return chip == PRID_IMP_NETLOGIC_XLP2XX;
+	return chip == PRID_IMP_NETLOGIC_XLP2XX ||
+		chip == PRID_IMP_NETLOGIC_XLP9XX;
 }
 
+static inline int cpu_is_xlp9xx(void)
+{
+	int chip = read_c0_prid() & 0xff00;
+
+	return chip == PRID_IMP_NETLOGIC_XLP9XX;
+}
 #endif /* !__ASSEMBLY__ */
 #endif /* _ASM_NLM_XLP_H */
diff --git a/arch/mips/include/asm/netlogic/xlr/xlr.h b/arch/mips/include/asm/netlogic/xlr/xlr.h
index c1667e0..ceb991c 100644
--- a/arch/mips/include/asm/netlogic/xlr/xlr.h
+++ b/arch/mips/include/asm/netlogic/xlr/xlr.h
@@ -35,11 +35,6 @@
 #ifndef _ASM_NLM_XLR_H
 #define _ASM_NLM_XLR_H
 
-/* Platform UART functions */
-struct uart_port;
-unsigned int nlm_xlr_uart_in(struct uart_port *, int);
-void nlm_xlr_uart_out(struct uart_port *, int, int);
-
 /* SMP helpers */
 void xlr_wakeup_secondary_cpus(void);
 
diff --git a/arch/mips/include/asm/octeon/cvmx-helper-board.h b/arch/mips/include/asm/octeon/cvmx-helper-board.h
index 41785dd..8933203 100644
--- a/arch/mips/include/asm/octeon/cvmx-helper-board.h
+++ b/arch/mips/include/asm/octeon/cvmx-helper-board.h
@@ -36,6 +36,13 @@
 
 #include <asm/octeon/cvmx-helper.h>
 
+enum cvmx_helper_board_usb_clock_types {
+	USB_CLOCK_TYPE_REF_12,
+	USB_CLOCK_TYPE_REF_24,
+	USB_CLOCK_TYPE_REF_48,
+	USB_CLOCK_TYPE_CRYSTAL_12,
+};
+
 typedef enum {
 	set_phy_link_flags_autoneg = 0x1,
 	set_phy_link_flags_flow_control_dont_touch = 0x0 << 1,
@@ -154,4 +161,6 @@
  */
 extern int __cvmx_helper_board_hardware_enable(int interface);
 
+enum cvmx_helper_board_usb_clock_types __cvmx_helper_board_usb_get_clock_type(void);
+
 #endif /* __CVMX_HELPER_BOARD_H__ */
diff --git a/arch/mips/include/asm/page.h b/arch/mips/include/asm/page.h
index f6be474..5e08bcc 100644
--- a/arch/mips/include/asm/page.h
+++ b/arch/mips/include/asm/page.h
@@ -11,6 +11,8 @@
 
 #include <spaces.h>
 #include <linux/const.h>
+#include <linux/kernel.h>
+#include <asm/mipsregs.h>
 
 /*
  * PAGE_SHIFT determines the page size
@@ -33,6 +35,29 @@
 #define PAGE_SIZE	(_AC(1,UL) << PAGE_SHIFT)
 #define PAGE_MASK	(~((1 << PAGE_SHIFT) - 1))
 
+/*
+ * This is used for calculating the real page sizes
+ * for FTLB or VTLB + FTLB confugrations.
+ */
+static inline unsigned int page_size_ftlb(unsigned int mmuextdef)
+{
+	switch (mmuextdef) {
+	case MIPS_CONF4_MMUEXTDEF_FTLBSIZEEXT:
+		if (PAGE_SIZE == (1 << 30))
+			return 5;
+		if (PAGE_SIZE == (1llu << 32))
+			return 6;
+		if (PAGE_SIZE > (256 << 10))
+			return 7; /* reserved */
+			/* fall through */
+	case MIPS_CONF4_MMUEXTDEF_VTLBSIZEEXT:
+		return (PAGE_SHIFT - 10) / 2;
+	default:
+		panic("Invalid FTLB configuration with Conf4_mmuextdef=%d value\n",
+		      mmuextdef >> 14);
+	}
+}
+
 #ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT
 #define HPAGE_SHIFT	(PAGE_SHIFT + PAGE_SHIFT - 3)
 #define HPAGE_SIZE	(_AC(1,UL) << HPAGE_SHIFT)
diff --git a/arch/mips/include/asm/rtlx.h b/arch/mips/include/asm/rtlx.h
index 90985b6..c102065 100644
--- a/arch/mips/include/asm/rtlx.h
+++ b/arch/mips/include/asm/rtlx.h
@@ -1,13 +1,18 @@
 /*
- * Copyright (C) 2004, 2005 MIPS Technologies, Inc.  All rights reserved.
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
  *
+ * Copyright (C) 2004, 2005 MIPS Technologies, Inc.  All rights reserved.
+ * Copyright (C) 2013 Imagination Technologies Ltd.
  */
-
 #ifndef __ASM_RTLX_H_
 #define __ASM_RTLX_H_
 
 #include <irq.h>
 
+#define RTLX_MODULE_NAME "rtlx"
+
 #define LX_NODE_BASE 10
 
 #define MIPS_CPU_RTLX_IRQ 0
@@ -15,18 +20,31 @@
 #define RTLX_VERSION 2
 #define RTLX_xID 0x12345600
 #define RTLX_ID (RTLX_xID | RTLX_VERSION)
+#define RTLX_BUFFER_SIZE 2048
 #define RTLX_CHANNELS 8
 
 #define RTLX_CHANNEL_STDIO	0
 #define RTLX_CHANNEL_DBG	1
 #define RTLX_CHANNEL_SYSIO	2
 
-extern int rtlx_open(int index, int can_sleep);
-extern int rtlx_release(int index);
-extern ssize_t rtlx_read(int index, void __user *buff, size_t count);
-extern ssize_t rtlx_write(int index, const void __user *buffer, size_t count);
-extern unsigned int rtlx_read_poll(int index, int can_sleep);
-extern unsigned int rtlx_write_poll(int index);
+void rtlx_starting(int vpe);
+void rtlx_stopping(int vpe);
+
+int rtlx_open(int index, int can_sleep);
+int rtlx_release(int index);
+ssize_t rtlx_read(int index, void __user *buff, size_t count);
+ssize_t rtlx_write(int index, const void __user *buffer, size_t count);
+unsigned int rtlx_read_poll(int index, int can_sleep);
+unsigned int rtlx_write_poll(int index);
+
+int __init rtlx_module_init(void);
+void __exit rtlx_module_exit(void);
+
+void _interrupt_sp(void);
+
+extern struct vpe_notifications rtlx_notify;
+extern const struct file_operations rtlx_fops;
+extern void (*aprp_hook)(void);
 
 enum rtlx_state {
 	RTLX_STATE_UNUSED = 0,
@@ -35,10 +53,15 @@
 	RTLX_STATE_OPENED
 };
 
-#define RTLX_BUFFER_SIZE 2048
+extern struct chan_waitqueues {
+	wait_queue_head_t rt_queue;
+	wait_queue_head_t lx_queue;
+	atomic_t in_open;
+	struct mutex mutex;
+} channel_wqs[RTLX_CHANNELS];
 
 /* each channel supports read and write.
-   linux (vpe0) reads lx_buffer	 and writes rt_buffer
+   linux (vpe0) reads lx_buffer and writes rt_buffer
    SP (vpe1) reads rt_buffer and writes lx_buffer
 */
 struct rtlx_channel {
@@ -55,11 +78,11 @@
 	char *lx_buffer;
 };
 
-struct rtlx_info {
+extern struct rtlx_info {
 	unsigned long id;
 	enum rtlx_state state;
+	int ap_int_pending;	/* Status of 0 or 1 for CONFIG_MIPS_CMP only */
 
 	struct rtlx_channel channel[RTLX_CHANNELS];
-};
-
+} *rtlx;
 #endif /* __ASM_RTLX_H_ */
diff --git a/arch/mips/include/asm/switch_to.h b/arch/mips/include/asm/switch_to.h
index eb0af15..278d45a 100644
--- a/arch/mips/include/asm/switch_to.h
+++ b/arch/mips/include/asm/switch_to.h
@@ -19,11 +19,19 @@
 
 struct task_struct;
 
-/*
- * switch_to(n) should switch tasks to task nr n, first
- * checking that n isn't the current task, in which case it does nothing.
+/**
+ * resume - resume execution of a task
+ * @prev:	The task previously executed.
+ * @next:	The task to begin executing.
+ * @next_ti:	task_thread_info(next).
+ * @usedfpu:	Non-zero if prev's FP context should be saved.
+ *
+ * This function is used whilst scheduling to save the context of prev & load
+ * the context of next. Returns prev.
  */
-extern asmlinkage void *resume(void *last, void *next, void *next_ti, u32 __usedfpu);
+extern asmlinkage struct task_struct *resume(struct task_struct *prev,
+		struct task_struct *next, struct thread_info *next_ti,
+		u32 usedfpu);
 
 extern unsigned int ll_bit;
 extern struct task_struct *ll_task;
diff --git a/arch/mips/include/asm/syscall.h b/arch/mips/include/asm/syscall.h
index 81c8913..33e8dbf 100644
--- a/arch/mips/include/asm/syscall.h
+++ b/arch/mips/include/asm/syscall.h
@@ -29,7 +29,7 @@
 static inline unsigned long mips_get_syscall_arg(unsigned long *arg,
 	struct task_struct *task, struct pt_regs *regs, unsigned int n)
 {
-	unsigned long usp = regs->regs[29];
+	unsigned long usp __maybe_unused = regs->regs[29];
 
 	switch (n) {
 	case 0: case 1: case 2: case 3:
diff --git a/arch/mips/include/asm/thread_info.h b/arch/mips/include/asm/thread_info.h
index 4f58ef6..24846f9 100644
--- a/arch/mips/include/asm/thread_info.h
+++ b/arch/mips/include/asm/thread_info.h
@@ -110,11 +110,12 @@
 #define TIF_NOHZ		19	/* in adaptive nohz mode */
 #define TIF_FIXADE		20	/* Fix address errors in software */
 #define TIF_LOGADE		21	/* Log address errors to syslog */
-#define TIF_32BIT_REGS		22	/* also implies 16/32 fprs */
+#define TIF_32BIT_REGS		22	/* 32-bit general purpose registers */
 #define TIF_32BIT_ADDR		23	/* 32-bit address space (o32/n32) */
 #define TIF_FPUBOUND		24	/* thread bound to FPU-full CPU set */
 #define TIF_LOAD_WATCH		25	/* If set, load watch registers */
 #define TIF_SYSCALL_TRACEPOINT	26	/* syscall tracepoint instrumentation */
+#define TIF_32BIT_FPREGS	27	/* 32-bit floating point registers */
 #define TIF_SYSCALL_TRACE	31	/* syscall trace active */
 
 #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
@@ -131,6 +132,7 @@
 #define _TIF_32BIT_ADDR		(1<<TIF_32BIT_ADDR)
 #define _TIF_FPUBOUND		(1<<TIF_FPUBOUND)
 #define _TIF_LOAD_WATCH		(1<<TIF_LOAD_WATCH)
+#define _TIF_32BIT_FPREGS	(1<<TIF_32BIT_FPREGS)
 #define _TIF_SYSCALL_TRACEPOINT	(1<<TIF_SYSCALL_TRACEPOINT)
 
 #define _TIF_WORK_SYSCALL_ENTRY	(_TIF_NOHZ | _TIF_SYSCALL_TRACE |	\
diff --git a/arch/mips/include/asm/tlb.h b/arch/mips/include/asm/tlb.h
index c67842b..4a23493 100644
--- a/arch/mips/include/asm/tlb.h
+++ b/arch/mips/include/asm/tlb.h
@@ -18,6 +18,10 @@
  */
 #define tlb_flush(tlb) flush_tlb_mm((tlb)->mm)
 
+#define UNIQUE_ENTRYHI(idx)						\
+		((CKSEG0 + ((idx) << (PAGE_SHIFT + 1))) |		\
+		 (cpu_has_tlbinv ? MIPS_ENTRYHI_EHINV : 0))
+
 #include <asm-generic/tlb.h>
 
 #endif /* __ASM_TLB_H */
diff --git a/arch/mips/include/asm/vpe.h b/arch/mips/include/asm/vpe.h
index 0880fe8..7849f39 100644
--- a/arch/mips/include/asm/vpe.h
+++ b/arch/mips/include/asm/vpe.h
@@ -1,24 +1,95 @@
 /*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
  * Copyright (C) 2005 MIPS Technologies, Inc.  All rights reserved.
- *
- *  This program is free software; you can distribute it and/or modify it
- *  under the terms of the GNU General Public License (Version 2) as
- *  published by the Free Software Foundation.
- *
- *  This program is distributed in the hope it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with this program; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
+ * Copyright (C) 2013 Imagination Technologies Ltd.
  */
-
 #ifndef _ASM_VPE_H
 #define _ASM_VPE_H
 
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/smp.h>
+#include <linux/spinlock.h>
+
+#define VPE_MODULE_NAME "vpe"
+#define VPE_MODULE_MINOR 1
+
+/* grab the likely amount of memory we will need. */
+#ifdef CONFIG_MIPS_VPE_LOADER_TOM
+#define P_SIZE (2 * 1024 * 1024)
+#else
+/* add an overhead to the max kmalloc size for non-striped symbols/etc */
+#define P_SIZE (256 * 1024)
+#endif
+
+#define MAX_VPES 16
+#define VPE_PATH_MAX 256
+
+static inline int aprp_cpu_index(void)
+{
+#ifdef CONFIG_MIPS_CMP
+	return setup_max_cpus;
+#else
+	extern int tclimit;
+	return tclimit;
+#endif
+}
+
+enum vpe_state {
+	VPE_STATE_UNUSED = 0,
+	VPE_STATE_INUSE,
+	VPE_STATE_RUNNING
+};
+
+enum tc_state {
+	TC_STATE_UNUSED = 0,
+	TC_STATE_INUSE,
+	TC_STATE_RUNNING,
+	TC_STATE_DYNAMIC
+};
+
+struct vpe {
+	enum vpe_state state;
+
+	/* (device) minor associated with this vpe */
+	int minor;
+
+	/* elfloader stuff */
+	void *load_addr;
+	unsigned long len;
+	char *pbuffer;
+	unsigned long plen;
+	char cwd[VPE_PATH_MAX];
+
+	unsigned long __start;
+
+	/* tc's associated with this vpe */
+	struct list_head tc;
+
+	/* The list of vpe's */
+	struct list_head list;
+
+	/* shared symbol address */
+	void *shared_ptr;
+
+	/* the list of who wants to know when something major happens */
+	struct list_head notify;
+
+	unsigned int ntcs;
+};
+
+struct tc {
+	enum tc_state state;
+	int index;
+
+	struct vpe *pvpe;	/* parent VPE */
+	struct list_head tc;	/* The list of TC's with this VPE */
+	struct list_head list;	/* The global list of tc's */
+};
+
 struct vpe_notifications {
 	void (*start)(int vpe);
 	void (*stop)(int vpe);
@@ -26,10 +97,34 @@
 	struct list_head list;
 };
 
+struct vpe_control {
+	spinlock_t vpe_list_lock;
+	struct list_head vpe_list;      /* Virtual processing elements */
+	spinlock_t tc_list_lock;
+	struct list_head tc_list;       /* Thread contexts */
+};
 
-extern int vpe_notify(int index, struct vpe_notifications *notify);
+extern unsigned long physical_memsize;
+extern struct vpe_control vpecontrol;
+extern const struct file_operations vpe_fops;
 
-extern void *vpe_get_shared(int index);
-extern char *vpe_getcwd(int index);
+int vpe_notify(int index, struct vpe_notifications *notify);
 
+void *vpe_get_shared(int index);
+char *vpe_getcwd(int index);
+
+struct vpe *get_vpe(int minor);
+struct tc *get_tc(int index);
+struct vpe *alloc_vpe(int minor);
+struct tc *alloc_tc(int index);
+void release_vpe(struct vpe *v);
+
+void *alloc_progmem(unsigned long len);
+void release_progmem(void *ptr);
+
+int __weak vpe_run(struct vpe *v);
+void cleanup_tc(struct tc *tc);
+
+int __init vpe_module_init(void);
+void __exit vpe_module_exit(void);
 #endif /* _ASM_VPE_H */
diff --git a/arch/mips/include/uapi/asm/inst.h b/arch/mips/include/uapi/asm/inst.h
index e5a676e..b39ba25 100644
--- a/arch/mips/include/uapi/asm/inst.h
+++ b/arch/mips/include/uapi/asm/inst.h
@@ -98,8 +98,9 @@
  */
 enum cop_op {
 	mfc_op	      = 0x00, dmfc_op	    = 0x01,
-	cfc_op	      = 0x02, mtc_op	    = 0x04,
-	dmtc_op	      = 0x05, ctc_op	    = 0x06,
+	cfc_op	      = 0x02, mfhc_op	    = 0x03,
+	mtc_op        = 0x04, dmtc_op	    = 0x05,
+	ctc_op	      = 0x06, mthc_op	    = 0x07,
 	bc_op	      = 0x08, cop_op	    = 0x10,
 	copm_op	      = 0x18
 };
@@ -397,8 +398,10 @@
 	mm_movt1_op = 0xa5,
 	mm_ftruncw_op = 0xac,
 	mm_fneg1_op = 0xad,
+	mm_mfhc1_op = 0xc0,
 	mm_froundl_op = 0xcc,
 	mm_fcvtd1_op = 0xcd,
+	mm_mthc1_op = 0xe0,
 	mm_froundw_op = 0xec,
 	mm_fcvts1_op = 0xed,
 };
diff --git a/arch/mips/jz4740/board-qi_lb60.c b/arch/mips/jz4740/board-qi_lb60.c
index 8a5ec0e..c01900e 100644
--- a/arch/mips/jz4740/board-qi_lb60.c
+++ b/arch/mips/jz4740/board-qi_lb60.c
@@ -427,6 +427,7 @@
 
 static struct platform_device *jz_platform_devices[] __initdata = {
 	&jz4740_udc_device,
+	&jz4740_udc_xceiv_device,
 	&jz4740_mmc_device,
 	&jz4740_nand_device,
 	&qi_lb60_keypad,
diff --git a/arch/mips/jz4740/platform.c b/arch/mips/jz4740/platform.c
index df65677..a447101 100644
--- a/arch/mips/jz4740/platform.c
+++ b/arch/mips/jz4740/platform.c
@@ -14,13 +14,14 @@
  */
 
 #include <linux/device.h>
-#include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/platform_device.h>
 #include <linux/resource.h>
 
 #include <linux/dma-mapping.h>
 
+#include <linux/usb/musb.h>
+
 #include <asm/mach-jz4740/platform.h>
 #include <asm/mach-jz4740/base.h>
 #include <asm/mach-jz4740/irq.h>
@@ -56,29 +57,35 @@
 	.resource	= jz4740_usb_ohci_resources,
 };
 
-/* UDC (USB gadget controller) */
-static struct resource jz4740_usb_gdt_resources[] = {
-	{
-		.start	= JZ4740_UDC_BASE_ADDR,
-		.end	= JZ4740_UDC_BASE_ADDR + 0x1000 - 1,
-		.flags	= IORESOURCE_MEM,
+/* USB Device Controller */
+struct platform_device jz4740_udc_xceiv_device = {
+	.name = "usb_phy_gen_xceiv",
+	.id   = 0,
+};
+
+static struct resource jz4740_udc_resources[] = {
+	[0] = {
+		.start = JZ4740_UDC_BASE_ADDR,
+		.end   = JZ4740_UDC_BASE_ADDR + 0x10000 - 1,
+		.flags = IORESOURCE_MEM,
 	},
-	{
-		.start	= JZ4740_IRQ_UDC,
-		.end	= JZ4740_IRQ_UDC,
-		.flags	= IORESOURCE_IRQ,
+	[1] = {
+		.start = JZ4740_IRQ_UDC,
+		.end   = JZ4740_IRQ_UDC,
+		.flags = IORESOURCE_IRQ,
+		.name  = "mc",
 	},
 };
 
 struct platform_device jz4740_udc_device = {
-	.name		= "jz-udc",
-	.id		= -1,
-	.dev = {
-		.dma_mask = &jz4740_udc_device.dev.coherent_dma_mask,
+	.name = "musb-jz4740",
+	.id   = -1,
+	.dev  = {
+		.dma_mask          = &jz4740_udc_device.dev.coherent_dma_mask,
 		.coherent_dma_mask = DMA_BIT_MASK(32),
 	},
-	.num_resources	= ARRAY_SIZE(jz4740_usb_gdt_resources),
-	.resource	= jz4740_usb_gdt_resources,
+	.num_resources = ARRAY_SIZE(jz4740_udc_resources),
+	.resource      = jz4740_udc_resources,
 };
 
 /* MMC/SD controller */
diff --git a/arch/mips/kernel/Makefile b/arch/mips/kernel/Makefile
index 1c1b717..26c6175 100644
--- a/arch/mips/kernel/Makefile
+++ b/arch/mips/kernel/Makefile
@@ -30,6 +30,7 @@
 obj-$(CONFIG_CSRC_SB1250)	+= csrc-sb1250.o
 obj-$(CONFIG_SYNC_R4K)		+= sync-r4k.o
 
+obj-$(CONFIG_DEBUG_FS)		+= segment.o
 obj-$(CONFIG_STACKTRACE)	+= stacktrace.o
 obj-$(CONFIG_MODULES)		+= mips_ksyms.o module.o
 obj-$(CONFIG_MODULES_USE_ELF_RELA) += module-rela.o
@@ -55,7 +56,11 @@
 obj-$(CONFIG_CPU_MIPSR2)	+= spram.o
 
 obj-$(CONFIG_MIPS_VPE_LOADER)	+= vpe.o
+obj-$(CONFIG_MIPS_VPE_LOADER_CMP) += vpe-cmp.o
+obj-$(CONFIG_MIPS_VPE_LOADER_MT) += vpe-mt.o
 obj-$(CONFIG_MIPS_VPE_APSP_API) += rtlx.o
+obj-$(CONFIG_MIPS_VPE_APSP_API_CMP) += rtlx-cmp.o
+obj-$(CONFIG_MIPS_VPE_APSP_API_MT) += rtlx-mt.o
 
 obj-$(CONFIG_I8259)		+= i8259.o
 obj-$(CONFIG_IRQ_CPU)		+= irq_cpu.o
diff --git a/arch/mips/kernel/binfmt_elfo32.c b/arch/mips/kernel/binfmt_elfo32.c
index 202e581..7faf5f2 100644
--- a/arch/mips/kernel/binfmt_elfo32.c
+++ b/arch/mips/kernel/binfmt_elfo32.c
@@ -28,6 +28,18 @@
 typedef elf_fpreg_t elf_fpregset_t[ELF_NFPREG];
 
 /*
+ * In order to be sure that we don't attempt to execute an O32 binary which
+ * requires 64 bit FP (FR=1) on a system which does not support it we refuse
+ * to execute any binary which has bits specified by the following macro set
+ * in its ELF header flags.
+ */
+#ifdef CONFIG_MIPS_O32_FP64_SUPPORT
+# define __MIPS_O32_FP64_MUST_BE_ZERO	0
+#else
+# define __MIPS_O32_FP64_MUST_BE_ZERO	EF_MIPS_FP64
+#endif
+
+/*
  * This is used to ensure we don't load something for the wrong architecture.
  */
 #define elf_check_arch(hdr)						\
@@ -44,6 +56,8 @@
 	if (((__h->e_flags & EF_MIPS_ABI) != 0) &&			\
 	    ((__h->e_flags & EF_MIPS_ABI) != EF_MIPS_ABI_O32))		\
 		__res = 0;						\
+	if (__h->e_flags & __MIPS_O32_FP64_MUST_BE_ZERO)		\
+		__res = 0;						\
 									\
 	__res;								\
 })
diff --git a/arch/mips/kernel/bmips_vec.S b/arch/mips/kernel/bmips_vec.S
index bd79c4f..a5bf73d 100644
--- a/arch/mips/kernel/bmips_vec.S
+++ b/arch/mips/kernel/bmips_vec.S
@@ -8,11 +8,11 @@
  * Reset/NMI/re-entry vectors for BMIPS processors
  */
 
-#include <linux/init.h>
 
 #include <asm/asm.h>
 #include <asm/asmmacro.h>
 #include <asm/cacheops.h>
+#include <asm/cpu.h>
 #include <asm/regdef.h>
 #include <asm/mipsregs.h>
 #include <asm/stackframe.h>
@@ -91,12 +91,18 @@
 	beqz	k0, bmips_smp_entry
 
 #if defined(CONFIG_CPU_BMIPS5000)
+	mfc0	k0, CP0_PRID
+	li	k1, PRID_IMP_BMIPS5000
+	andi	k0, 0xff00
+	bne	k0, k1, 1f
+
 	/* if we're not on core 0, this must be the SMP boot signal */
 	li	k1, (3 << 25)
 	mfc0	k0, $22
 	and	k0, k1
 	bnez	k0, bmips_smp_entry
-#endif
+1:
+#endif /* CONFIG_CPU_BMIPS5000 */
 #endif /* CONFIG_SMP */
 
 	/* nope, it's just a regular NMI */
@@ -139,7 +145,12 @@
 	xori	k0, 0x04
 	mtc0	k0, CP0_CONFIG
 
+	mfc0	k0, CP0_PRID
+	andi	k0, 0xff00
 #if defined(CONFIG_CPU_BMIPS4350) || defined(CONFIG_CPU_BMIPS4380)
+	li	k1, PRID_IMP_BMIPS43XX
+	bne	k0, k1, 2f
+
 	/* initialize CPU1's local I-cache */
 	li	k0, 0x80000000
 	li	k1, 0x80010000
@@ -150,14 +161,21 @@
 1:	cache	Index_Store_Tag_I, 0(k0)
 	addiu	k0, 16
 	bne	k0, k1, 1b
-#elif defined(CONFIG_CPU_BMIPS5000)
+
+	b	3f
+2:
+#endif /* CONFIG_CPU_BMIPS4350 || CONFIG_CPU_BMIPS4380 */
+#if defined(CONFIG_CPU_BMIPS5000)
 	/* set exception vector base */
+	li	k1, PRID_IMP_BMIPS5000
+	bne	k0, k1, 3f
+
 	la	k0, ebase
 	lw	k0, 0(k0)
 	mtc0	k0, $15, 1
 	BARRIER
-#endif
-
+#endif /* CONFIG_CPU_BMIPS5000 */
+3:
 	/* jump back to kseg0 in case we need to remap the kseg1 area */
 	la	k0, 1f
 	jr	k0
@@ -221,8 +239,18 @@
 LEAF(bmips_enable_xks01)
 
 #if defined(CONFIG_XKS01)
-
+	mfc0	t0, CP0_PRID
+	andi	t2, t0, 0xff00
 #if defined(CONFIG_CPU_BMIPS4380)
+	li	t1, PRID_IMP_BMIPS43XX
+	bne	t2, t1, 1f
+
+	andi	t0, 0xff
+	addiu	t1, t0, -PRID_REV_BMIPS4380_HI
+	bgtz	t1, 2f
+	addiu	t0, -PRID_REV_BMIPS4380_LO
+	bltz	t0, 2f
+
 	mfc0	t0, $22, 3
 	li	t1, 0x1ff0
 	li	t2, (1 << 12) | (1 << 9)
@@ -231,7 +259,13 @@
 	or	t0, t2
 	mtc0	t0, $22, 3
 	BARRIER
-#elif defined(CONFIG_CPU_BMIPS5000)
+	b	2f
+1:
+#endif /* CONFIG_CPU_BMIPS4380 */
+#if defined(CONFIG_CPU_BMIPS5000)
+	li	t1, PRID_IMP_BMIPS5000
+	bne	t2, t1, 2f
+
 	mfc0	t0, $22, 5
 	li	t1, 0x01ff
 	li	t2, (1 << 8) | (1 << 5)
@@ -240,12 +274,8 @@
 	or	t0, t2
 	mtc0	t0, $22, 5
 	BARRIER
-#else
-
-#error Missing XKS01 setup
-
-#endif
-
+#endif /* CONFIG_CPU_BMIPS5000 */
+2:
 #endif /* defined(CONFIG_XKS01) */
 
 	jr	ra
diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c
index c814287..530f832 100644
--- a/arch/mips/kernel/cpu-probe.c
+++ b/arch/mips/kernel/cpu-probe.c
@@ -112,7 +112,7 @@
 	unsigned long tmp, fpu_id;
 
 	tmp = read_c0_status();
-	__enable_fpu();
+	__enable_fpu(FPU_AS_IS);
 	fpu_id = read_32bit_cp1_register(CP1_REVISION);
 	write_c0_status(tmp);
 	return fpu_id;
@@ -163,6 +163,25 @@
 static char unknown_isa[] = KERN_ERR \
 	"Unsupported ISA type, c0.config0: %d.";
 
+static void set_ftlb_enable(struct cpuinfo_mips *c, int enable)
+{
+	unsigned int config6;
+	/*
+	 * Config6 is implementation dependent and it's currently only
+	 * used by proAptiv
+	 */
+	if (c->cputype == CPU_PROAPTIV) {
+		config6 = read_c0_config6();
+		if (enable)
+			/* Enable FTLB */
+			write_c0_config6(config6 | MIPS_CONF6_FTLBEN);
+		else
+			/* Disable FTLB */
+			write_c0_config6(config6 &  ~MIPS_CONF6_FTLBEN);
+		back_to_back_c0_hazard();
+	}
+}
+
 static inline unsigned int decode_config0(struct cpuinfo_mips *c)
 {
 	unsigned int config0;
@@ -170,8 +189,13 @@
 
 	config0 = read_c0_config();
 
-	if (((config0 & MIPS_CONF_MT) >> 7) == 1)
+	/*
+	 * Look for Standard TLB or Dual VTLB and FTLB
+	 */
+	if ((((config0 & MIPS_CONF_MT) >> 7) == 1) ||
+	    (((config0 & MIPS_CONF_MT) >> 7) == 4))
 		c->options |= MIPS_CPU_TLB;
+
 	isa = (config0 & MIPS_CONF_AT) >> 13;
 	switch (isa) {
 	case 0:
@@ -226,8 +250,11 @@
 		c->options |= MIPS_CPU_FPU;
 		c->options |= MIPS_CPU_32FPR;
 	}
-	if (cpu_has_tlb)
+	if (cpu_has_tlb) {
 		c->tlbsize = ((config1 & MIPS_CONF1_TLBS) >> 25) + 1;
+		c->tlbsizevtlb = c->tlbsize;
+		c->tlbsizeftlbsets = 0;
+	}
 
 	return config1 & MIPS_CONF_M;
 }
@@ -272,6 +299,8 @@
 		c->options |= MIPS_CPU_MICROMIPS;
 	if (config3 & MIPS_CONF3_VZ)
 		c->ases |= MIPS_ASE_VZ;
+	if (config3 & MIPS_CONF3_SC)
+		c->options |= MIPS_CPU_SEGMENTS;
 
 	return config3 & MIPS_CONF_M;
 }
@@ -279,12 +308,51 @@
 static inline unsigned int decode_config4(struct cpuinfo_mips *c)
 {
 	unsigned int config4;
+	unsigned int newcf4;
+	unsigned int mmuextdef;
+	unsigned int ftlb_page = MIPS_CONF4_FTLBPAGESIZE;
 
 	config4 = read_c0_config4();
 
-	if ((config4 & MIPS_CONF4_MMUEXTDEF) == MIPS_CONF4_MMUEXTDEF_MMUSIZEEXT
-	    && cpu_has_tlb)
-		c->tlbsize += (config4 & MIPS_CONF4_MMUSIZEEXT) * 0x40;
+	if (cpu_has_tlb) {
+		if (((config4 & MIPS_CONF4_IE) >> 29) == 2)
+			c->options |= MIPS_CPU_TLBINV;
+		mmuextdef = config4 & MIPS_CONF4_MMUEXTDEF;
+		switch (mmuextdef) {
+		case MIPS_CONF4_MMUEXTDEF_MMUSIZEEXT:
+			c->tlbsize += (config4 & MIPS_CONF4_MMUSIZEEXT) * 0x40;
+			c->tlbsizevtlb = c->tlbsize;
+			break;
+		case MIPS_CONF4_MMUEXTDEF_VTLBSIZEEXT:
+			c->tlbsizevtlb +=
+				((config4 & MIPS_CONF4_VTLBSIZEEXT) >>
+				  MIPS_CONF4_VTLBSIZEEXT_SHIFT) * 0x40;
+			c->tlbsize = c->tlbsizevtlb;
+			ftlb_page = MIPS_CONF4_VFTLBPAGESIZE;
+			/* fall through */
+		case MIPS_CONF4_MMUEXTDEF_FTLBSIZEEXT:
+			newcf4 = (config4 & ~ftlb_page) |
+				(page_size_ftlb(mmuextdef) <<
+				 MIPS_CONF4_FTLBPAGESIZE_SHIFT);
+			write_c0_config4(newcf4);
+			back_to_back_c0_hazard();
+			config4 = read_c0_config4();
+			if (config4 != newcf4) {
+				pr_err("PAGE_SIZE 0x%lx is not supported by FTLB (config4=0x%x)\n",
+				       PAGE_SIZE, config4);
+				/* Switch FTLB off */
+				set_ftlb_enable(c, 0);
+				break;
+			}
+			c->tlbsizeftlbsets = 1 <<
+				((config4 & MIPS_CONF4_FTLBSETS) >>
+				 MIPS_CONF4_FTLBSETS_SHIFT);
+			c->tlbsizeftlbways = ((config4 & MIPS_CONF4_FTLBWAYS) >>
+					      MIPS_CONF4_FTLBWAYS_SHIFT) + 2;
+			c->tlbsize += c->tlbsizeftlbways * c->tlbsizeftlbsets;
+			break;
+		}
+	}
 
 	c->kscratch_mask = (config4 >> 16) & 0xff;
 
@@ -312,6 +380,9 @@
 
 	c->scache.flags = MIPS_CACHE_NOT_PRESENT;
 
+	/* Enable FTLB if present */
+	set_ftlb_enable(c, 1);
+
 	ok = decode_config0(c);			/* Read Config registers.  */
 	BUG_ON(!ok);				/* Arch spec violation!	 */
 	if (ok)
@@ -675,7 +746,6 @@
 
 static inline void cpu_probe_mips(struct cpuinfo_mips *c, unsigned int cpu)
 {
-	decode_configs(c);
 	switch (c->processor_id & PRID_IMP_MASK) {
 	case PRID_IMP_4KC:
 		c->cputype = CPU_4KC;
@@ -739,8 +809,26 @@
 		c->cputype = CPU_74K;
 		__cpu_name[cpu] = "MIPS 1074Kc";
 		break;
+	case PRID_IMP_INTERAPTIV_UP:
+		c->cputype = CPU_INTERAPTIV;
+		__cpu_name[cpu] = "MIPS interAptiv";
+		break;
+	case PRID_IMP_INTERAPTIV_MP:
+		c->cputype = CPU_INTERAPTIV;
+		__cpu_name[cpu] = "MIPS interAptiv (multi)";
+		break;
+	case PRID_IMP_PROAPTIV_UP:
+		c->cputype = CPU_PROAPTIV;
+		__cpu_name[cpu] = "MIPS proAptiv";
+		break;
+	case PRID_IMP_PROAPTIV_MP:
+		c->cputype = CPU_PROAPTIV;
+		__cpu_name[cpu] = "MIPS proAptiv (multi)";
+		break;
 	}
 
+	decode_configs(c);
+
 	spram_config();
 }
 
@@ -943,6 +1031,7 @@
 
 	switch (c->processor_id & PRID_IMP_MASK) {
 	case PRID_IMP_NETLOGIC_XLP2XX:
+	case PRID_IMP_NETLOGIC_XLP9XX:
 		c->cputype = CPU_XLP;
 		__cpu_name[cpu] = "Broadcom XLPII";
 		break;
diff --git a/arch/mips/kernel/crash.c b/arch/mips/kernel/crash.c
index 93aa302..d212646 100644
--- a/arch/mips/kernel/crash.c
+++ b/arch/mips/kernel/crash.c
@@ -5,7 +5,6 @@
 #include <linux/bootmem.h>
 #include <linux/crash_dump.h>
 #include <linux/delay.h>
-#include <linux/init.h>
 #include <linux/irq.h>
 #include <linux/types.h>
 #include <linux/sched.h>
diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S
index 47d7583..d84f6a5 100644
--- a/arch/mips/kernel/genex.S
+++ b/arch/mips/kernel/genex.S
@@ -476,6 +476,7 @@
 	BUILD_HANDLER ov ov sti silent			/* #12 */
 	BUILD_HANDLER tr tr sti silent			/* #13 */
 	BUILD_HANDLER fpe fpe fpe silent		/* #15 */
+	BUILD_HANDLER ftlb ftlb none silent		/* #16 */
 	BUILD_HANDLER mdmx mdmx sti silent		/* #22 */
 #ifdef	CONFIG_HARDWARE_WATCHPOINTS
 	/*
diff --git a/arch/mips/kernel/idle.c b/arch/mips/kernel/idle.c
index f7991d9..3553243 100644
--- a/arch/mips/kernel/idle.c
+++ b/arch/mips/kernel/idle.c
@@ -184,6 +184,8 @@
 	case CPU_24K:
 	case CPU_34K:
 	case CPU_1004K:
+	case CPU_INTERAPTIV:
+	case CPU_PROAPTIV:
 		cpu_wait = r4k_wait;
 		if (read_c0_config7() & MIPS_CONF7_WII)
 			cpu_wait = r4k_wait_irqoff;
diff --git a/arch/mips/kernel/proc.c b/arch/mips/kernel/proc.c
index 8c58d8a..00d2097 100644
--- a/arch/mips/kernel/proc.c
+++ b/arch/mips/kernel/proc.c
@@ -65,26 +65,25 @@
 				cpu_data[n].watch_reg_masks[i]);
 		seq_printf(m, "]\n");
 	}
-	if (cpu_has_mips_r) {
-		seq_printf(m, "isa\t\t\t: mips1");
-		if (cpu_has_mips_2)
-			seq_printf(m, "%s", " mips2");
-		if (cpu_has_mips_3)
-			seq_printf(m, "%s", " mips3");
-		if (cpu_has_mips_4)
-			seq_printf(m, "%s", " mips4");
-		if (cpu_has_mips_5)
-			seq_printf(m, "%s", " mips5");
-		if (cpu_has_mips32r1)
-			seq_printf(m, "%s", " mips32r1");
-		if (cpu_has_mips32r2)
-			seq_printf(m, "%s", " mips32r2");
-		if (cpu_has_mips64r1)
-			seq_printf(m, "%s", " mips64r1");
-		if (cpu_has_mips64r2)
-			seq_printf(m, "%s", " mips64r2");
-		seq_printf(m, "\n");
-	}
+
+	seq_printf(m, "isa\t\t\t: mips1");
+	if (cpu_has_mips_2)
+		seq_printf(m, "%s", " mips2");
+	if (cpu_has_mips_3)
+		seq_printf(m, "%s", " mips3");
+	if (cpu_has_mips_4)
+		seq_printf(m, "%s", " mips4");
+	if (cpu_has_mips_5)
+		seq_printf(m, "%s", " mips5");
+	if (cpu_has_mips32r1)
+		seq_printf(m, "%s", " mips32r1");
+	if (cpu_has_mips32r2)
+		seq_printf(m, "%s", " mips32r2");
+	if (cpu_has_mips64r1)
+		seq_printf(m, "%s", " mips64r1");
+	if (cpu_has_mips64r2)
+		seq_printf(m, "%s", " mips64r2");
+	seq_printf(m, "\n");
 
 	seq_printf(m, "ASEs implemented\t:");
 	if (cpu_has_mips16)	seq_printf(m, "%s", " mips16");
@@ -107,7 +106,14 @@
 	seq_printf(m, "kscratch registers\t: %d\n",
 		      hweight8(cpu_data[n].kscratch_mask));
 	seq_printf(m, "core\t\t\t: %d\n", cpu_data[n].core);
-
+#if defined(CONFIG_MIPS_MT_SMP) || defined(CONFIG_MIPS_MT_SMTC)
+	if (cpu_has_mipsmt) {
+		seq_printf(m, "VPE\t\t\t: %d\n", cpu_data[n].vpe_id);
+#if defined(CONFIG_MIPS_MT_SMTC)
+		seq_printf(m, "TC\t\t\t: %d\n", cpu_data[n].tc_id);
+#endif
+	}
+#endif
 	sprintf(fmt, "VCE%%c exceptions\t\t: %s\n",
 		      cpu_has_vce ? "%u" : "not available");
 	seq_printf(m, fmt, 'D', vced_count);
diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index ddc7610..6ae540e 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -60,15 +60,11 @@
 
 	/* New thread loses kernel privileges. */
 	status = regs->cp0_status & ~(ST0_CU0|ST0_CU1|ST0_FR|KU_MASK);
-#ifdef CONFIG_64BIT
-	status |= test_thread_flag(TIF_32BIT_REGS) ? 0 : ST0_FR;
-#endif
 	status |= KU_USER;
 	regs->cp0_status = status;
 	clear_used_math();
 	clear_fpu_owner();
-	if (cpu_has_dsp)
-		__init_dsp();
+	init_dsp();
 	regs->cp0_epc = pc;
 	regs->regs[29] = sp;
 }
diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c
index b52e1d2..7da9b76 100644
--- a/arch/mips/kernel/ptrace.c
+++ b/arch/mips/kernel/ptrace.c
@@ -137,13 +137,13 @@
 		if (cpu_has_mipsmt) {
 			unsigned int vpflags = dvpe();
 			flags = read_c0_status();
-			__enable_fpu();
+			__enable_fpu(FPU_AS_IS);
 			__asm__ __volatile__("cfc1\t%0,$0" : "=r" (tmp));
 			write_c0_status(flags);
 			evpe(vpflags);
 		} else {
 			flags = read_c0_status();
-			__enable_fpu();
+			__enable_fpu(FPU_AS_IS);
 			__asm__ __volatile__("cfc1\t%0,$0" : "=r" (tmp));
 			write_c0_status(flags);
 		}
@@ -408,6 +408,7 @@
 	/* Read the word at location addr in the USER area. */
 	case PTRACE_PEEKUSR: {
 		struct pt_regs *regs;
+		fpureg_t *fregs;
 		unsigned long tmp = 0;
 
 		regs = task_pt_regs(child);
@@ -418,26 +419,28 @@
 			tmp = regs->regs[addr];
 			break;
 		case FPR_BASE ... FPR_BASE + 31:
-			if (tsk_used_math(child)) {
-				fpureg_t *fregs = get_fpu_regs(child);
+			if (!tsk_used_math(child)) {
+				/* FP not yet used */
+				tmp = -1;
+				break;
+			}
+			fregs = get_fpu_regs(child);
 
 #ifdef CONFIG_32BIT
+			if (test_thread_flag(TIF_32BIT_FPREGS)) {
 				/*
 				 * The odd registers are actually the high
 				 * order bits of the values stored in the even
 				 * registers - unless we're using r2k_switch.S.
 				 */
 				if (addr & 1)
-					tmp = (unsigned long) (fregs[((addr & ~1) - 32)] >> 32);
+					tmp = fregs[(addr & ~1) - 32] >> 32;
 				else
-					tmp = (unsigned long) (fregs[(addr - 32)] & 0xffffffff);
-#endif
-#ifdef CONFIG_64BIT
-				tmp = fregs[addr - FPR_BASE];
-#endif
-			} else {
-				tmp = -1;	/* FP not yet used  */
+					tmp = fregs[addr - 32];
+				break;
 			}
+#endif
+			tmp = fregs[addr - FPR_BASE];
 			break;
 		case PC:
 			tmp = regs->cp0_epc;
@@ -483,13 +486,13 @@
 			if (cpu_has_mipsmt) {
 				unsigned int vpflags = dvpe();
 				flags = read_c0_status();
-				__enable_fpu();
+				__enable_fpu(FPU_AS_IS);
 				__asm__ __volatile__("cfc1\t%0,$0": "=r" (tmp));
 				write_c0_status(flags);
 				evpe(vpflags);
 			} else {
 				flags = read_c0_status();
-				__enable_fpu();
+				__enable_fpu(FPU_AS_IS);
 				__asm__ __volatile__("cfc1\t%0,$0": "=r" (tmp));
 				write_c0_status(flags);
 			}
@@ -554,22 +557,25 @@
 				child->thread.fpu.fcr31 = 0;
 			}
 #ifdef CONFIG_32BIT
-			/*
-			 * The odd registers are actually the high order bits
-			 * of the values stored in the even registers - unless
-			 * we're using r2k_switch.S.
-			 */
-			if (addr & 1) {
-				fregs[(addr & ~1) - FPR_BASE] &= 0xffffffff;
-				fregs[(addr & ~1) - FPR_BASE] |= ((unsigned long long) data) << 32;
-			} else {
-				fregs[addr - FPR_BASE] &= ~0xffffffffLL;
-				fregs[addr - FPR_BASE] |= data;
+			if (test_thread_flag(TIF_32BIT_FPREGS)) {
+				/*
+				 * The odd registers are actually the high
+				 * order bits of the values stored in the even
+				 * registers - unless we're using r2k_switch.S.
+				 */
+				if (addr & 1) {
+					fregs[(addr & ~1) - FPR_BASE] &=
+						0xffffffff;
+					fregs[(addr & ~1) - FPR_BASE] |=
+						((u64)data) << 32;
+				} else {
+					fregs[addr - FPR_BASE] &= ~0xffffffffLL;
+					fregs[addr - FPR_BASE] |= data;
+				}
+				break;
 			}
 #endif
-#ifdef CONFIG_64BIT
 			fregs[addr - FPR_BASE] = data;
-#endif
 			break;
 		}
 		case PC:
diff --git a/arch/mips/kernel/ptrace32.c b/arch/mips/kernel/ptrace32.c
index 9486055..b8aa2dd 100644
--- a/arch/mips/kernel/ptrace32.c
+++ b/arch/mips/kernel/ptrace32.c
@@ -80,6 +80,7 @@
 	/* Read the word at location addr in the USER area. */
 	case PTRACE_PEEKUSR: {
 		struct pt_regs *regs;
+		fpureg_t *fregs;
 		unsigned int tmp;
 
 		regs = task_pt_regs(child);
@@ -90,21 +91,25 @@
 			tmp = regs->regs[addr];
 			break;
 		case FPR_BASE ... FPR_BASE + 31:
-			if (tsk_used_math(child)) {
-				fpureg_t *fregs = get_fpu_regs(child);
-
+			if (!tsk_used_math(child)) {
+				/* FP not yet used */
+				tmp = -1;
+				break;
+			}
+			fregs = get_fpu_regs(child);
+			if (test_thread_flag(TIF_32BIT_FPREGS)) {
 				/*
 				 * The odd registers are actually the high
 				 * order bits of the values stored in the even
 				 * registers - unless we're using r2k_switch.S.
 				 */
 				if (addr & 1)
-					tmp = (unsigned long) (fregs[((addr & ~1) - 32)] >> 32);
+					tmp = fregs[(addr & ~1) - 32] >> 32;
 				else
-					tmp = (unsigned long) (fregs[(addr - 32)] & 0xffffffff);
-			} else {
-				tmp = -1;	/* FP not yet used  */
+					tmp = fregs[addr - 32];
+				break;
 			}
+			tmp = fregs[addr - FPR_BASE];
 			break;
 		case PC:
 			tmp = regs->cp0_epc;
@@ -147,13 +152,13 @@
 			if (cpu_has_mipsmt) {
 				unsigned int vpflags = dvpe();
 				flags = read_c0_status();
-				__enable_fpu();
+				__enable_fpu(FPU_AS_IS);
 				__asm__ __volatile__("cfc1\t%0,$0": "=r" (tmp));
 				write_c0_status(flags);
 				evpe(vpflags);
 			} else {
 				flags = read_c0_status();
-				__enable_fpu();
+				__enable_fpu(FPU_AS_IS);
 				__asm__ __volatile__("cfc1\t%0,$0": "=r" (tmp));
 				write_c0_status(flags);
 			}
@@ -236,20 +241,24 @@
 				       sizeof(child->thread.fpu));
 				child->thread.fpu.fcr31 = 0;
 			}
-			/*
-			 * The odd registers are actually the high order bits
-			 * of the values stored in the even registers - unless
-			 * we're using r2k_switch.S.
-			 */
-			if (addr & 1) {
-				fregs[(addr & ~1) - FPR_BASE] &= 0xffffffff;
-				fregs[(addr & ~1) - FPR_BASE] |= ((unsigned long long) data) << 32;
-			} else {
-				fregs[addr - FPR_BASE] &= ~0xffffffffLL;
-				/* Must cast, lest sign extension fill upper
-				   bits!  */
-				fregs[addr - FPR_BASE] |= (unsigned int)data;
+			if (test_thread_flag(TIF_32BIT_FPREGS)) {
+				/*
+				 * The odd registers are actually the high
+				 * order bits of the values stored in the even
+				 * registers - unless we're using r2k_switch.S.
+				 */
+				if (addr & 1) {
+					fregs[(addr & ~1) - FPR_BASE] &=
+						0xffffffff;
+					fregs[(addr & ~1) - FPR_BASE] |=
+						((u64)data) << 32;
+				} else {
+					fregs[addr - FPR_BASE] &= ~0xffffffffLL;
+					fregs[addr - FPR_BASE] |= data;
+				}
+				break;
 			}
+			fregs[addr - FPR_BASE] = data;
 			break;
 		}
 		case PC:
diff --git a/arch/mips/kernel/r4k_fpu.S b/arch/mips/kernel/r4k_fpu.S
index 55ffe14..253b2fb 100644
--- a/arch/mips/kernel/r4k_fpu.S
+++ b/arch/mips/kernel/r4k_fpu.S
@@ -35,7 +35,15 @@
 LEAF(_save_fp_context)
 	cfc1	t1, fcr31
 
-#ifdef CONFIG_64BIT
+#if defined(CONFIG_64BIT) || defined(CONFIG_MIPS32_R2)
+	.set	push
+#ifdef CONFIG_MIPS32_R2
+	.set	mips64r2
+	mfc0	t0, CP0_STATUS
+	sll	t0, t0, 5
+	bgez	t0, 1f			# skip storing odd if FR=0
+	 nop
+#endif
 	/* Store the 16 odd double precision registers */
 	EX	sdc1 $f1, SC_FPREGS+8(a0)
 	EX	sdc1 $f3, SC_FPREGS+24(a0)
@@ -53,6 +61,7 @@
 	EX	sdc1 $f27, SC_FPREGS+216(a0)
 	EX	sdc1 $f29, SC_FPREGS+232(a0)
 	EX	sdc1 $f31, SC_FPREGS+248(a0)
+1:	.set	pop
 #endif
 
 	/* Store the 16 even double precision registers */
@@ -82,7 +91,31 @@
 LEAF(_save_fp_context32)
 	cfc1	t1, fcr31
 
-	EX	sdc1 $f0, SC32_FPREGS+0(a0)
+	mfc0	t0, CP0_STATUS
+	sll	t0, t0, 5
+	bgez	t0, 1f			# skip storing odd if FR=0
+	 nop
+
+	/* Store the 16 odd double precision registers */
+	EX      sdc1 $f1, SC32_FPREGS+8(a0)
+	EX      sdc1 $f3, SC32_FPREGS+24(a0)
+	EX      sdc1 $f5, SC32_FPREGS+40(a0)
+	EX      sdc1 $f7, SC32_FPREGS+56(a0)
+	EX      sdc1 $f9, SC32_FPREGS+72(a0)
+	EX      sdc1 $f11, SC32_FPREGS+88(a0)
+	EX      sdc1 $f13, SC32_FPREGS+104(a0)
+	EX      sdc1 $f15, SC32_FPREGS+120(a0)
+	EX      sdc1 $f17, SC32_FPREGS+136(a0)
+	EX      sdc1 $f19, SC32_FPREGS+152(a0)
+	EX      sdc1 $f21, SC32_FPREGS+168(a0)
+	EX      sdc1 $f23, SC32_FPREGS+184(a0)
+	EX      sdc1 $f25, SC32_FPREGS+200(a0)
+	EX      sdc1 $f27, SC32_FPREGS+216(a0)
+	EX      sdc1 $f29, SC32_FPREGS+232(a0)
+	EX      sdc1 $f31, SC32_FPREGS+248(a0)
+
+	/* Store the 16 even double precision registers */
+1:	EX	sdc1 $f0, SC32_FPREGS+0(a0)
 	EX	sdc1 $f2, SC32_FPREGS+16(a0)
 	EX	sdc1 $f4, SC32_FPREGS+32(a0)
 	EX	sdc1 $f6, SC32_FPREGS+48(a0)
@@ -114,7 +147,16 @@
  */
 LEAF(_restore_fp_context)
 	EX	lw t0, SC_FPC_CSR(a0)
-#ifdef CONFIG_64BIT
+
+#if defined(CONFIG_64BIT) || defined(CONFIG_MIPS32_R2)
+	.set	push
+#ifdef CONFIG_MIPS32_R2
+	.set	mips64r2
+	mfc0	t0, CP0_STATUS
+	sll	t0, t0, 5
+	bgez	t0, 1f			# skip loading odd if FR=0
+	 nop
+#endif
 	EX	ldc1 $f1, SC_FPREGS+8(a0)
 	EX	ldc1 $f3, SC_FPREGS+24(a0)
 	EX	ldc1 $f5, SC_FPREGS+40(a0)
@@ -131,6 +173,7 @@
 	EX	ldc1 $f27, SC_FPREGS+216(a0)
 	EX	ldc1 $f29, SC_FPREGS+232(a0)
 	EX	ldc1 $f31, SC_FPREGS+248(a0)
+1:	.set pop
 #endif
 	EX	ldc1 $f0, SC_FPREGS+0(a0)
 	EX	ldc1 $f2, SC_FPREGS+16(a0)
@@ -157,7 +200,30 @@
 LEAF(_restore_fp_context32)
 	/* Restore an o32 sigcontext.  */
 	EX	lw t0, SC32_FPC_CSR(a0)
-	EX	ldc1 $f0, SC32_FPREGS+0(a0)
+
+	mfc0	t0, CP0_STATUS
+	sll	t0, t0, 5
+	bgez	t0, 1f			# skip loading odd if FR=0
+	 nop
+
+	EX      ldc1 $f1, SC32_FPREGS+8(a0)
+	EX      ldc1 $f3, SC32_FPREGS+24(a0)
+	EX      ldc1 $f5, SC32_FPREGS+40(a0)
+	EX      ldc1 $f7, SC32_FPREGS+56(a0)
+	EX      ldc1 $f9, SC32_FPREGS+72(a0)
+	EX      ldc1 $f11, SC32_FPREGS+88(a0)
+	EX      ldc1 $f13, SC32_FPREGS+104(a0)
+	EX      ldc1 $f15, SC32_FPREGS+120(a0)
+	EX      ldc1 $f17, SC32_FPREGS+136(a0)
+	EX      ldc1 $f19, SC32_FPREGS+152(a0)
+	EX      ldc1 $f21, SC32_FPREGS+168(a0)
+	EX      ldc1 $f23, SC32_FPREGS+184(a0)
+	EX      ldc1 $f25, SC32_FPREGS+200(a0)
+	EX      ldc1 $f27, SC32_FPREGS+216(a0)
+	EX      ldc1 $f29, SC32_FPREGS+232(a0)
+	EX      ldc1 $f31, SC32_FPREGS+248(a0)
+
+1:	EX	ldc1 $f0, SC32_FPREGS+0(a0)
 	EX	ldc1 $f2, SC32_FPREGS+16(a0)
 	EX	ldc1 $f4, SC32_FPREGS+32(a0)
 	EX	ldc1 $f6, SC32_FPREGS+48(a0)
diff --git a/arch/mips/kernel/r4k_switch.S b/arch/mips/kernel/r4k_switch.S
index 078de5ea..cc78dd9 100644
--- a/arch/mips/kernel/r4k_switch.S
+++ b/arch/mips/kernel/r4k_switch.S
@@ -123,7 +123,7 @@
  * Save a thread's fp context.
  */
 LEAF(_save_fp)
-#ifdef CONFIG_64BIT
+#if defined(CONFIG_64BIT) || defined(CONFIG_CPU_MIPS32_R2)
 	mfc0	t0, CP0_STATUS
 #endif
 	fpu_save_double a0 t0 t1		# clobbers t1
@@ -134,7 +134,7 @@
  * Restore a thread's fp context.
  */
 LEAF(_restore_fp)
-#ifdef CONFIG_64BIT
+#if defined(CONFIG_64BIT) || defined(CONFIG_CPU_MIPS32_R2)
 	mfc0	t0, CP0_STATUS
 #endif
 	fpu_restore_double a0 t0 t1		# clobbers t1
@@ -228,6 +228,47 @@
 	mtc1	t1, $f29
 	mtc1	t1, $f30
 	mtc1	t1, $f31
+
+#ifdef CONFIG_CPU_MIPS32_R2
+	.set    push
+	.set    mips64r2
+	sll     t0, t0, 5			# is Status.FR set?
+	bgez    t0, 1f				# no: skip setting upper 32b
+
+	mthc1   t1, $f0
+	mthc1   t1, $f1
+	mthc1   t1, $f2
+	mthc1   t1, $f3
+	mthc1   t1, $f4
+	mthc1   t1, $f5
+	mthc1   t1, $f6
+	mthc1   t1, $f7
+	mthc1   t1, $f8
+	mthc1   t1, $f9
+	mthc1   t1, $f10
+	mthc1   t1, $f11
+	mthc1   t1, $f12
+	mthc1   t1, $f13
+	mthc1   t1, $f14
+	mthc1   t1, $f15
+	mthc1   t1, $f16
+	mthc1   t1, $f17
+	mthc1   t1, $f18
+	mthc1   t1, $f19
+	mthc1   t1, $f20
+	mthc1   t1, $f21
+	mthc1   t1, $f22
+	mthc1   t1, $f23
+	mthc1   t1, $f24
+	mthc1   t1, $f25
+	mthc1   t1, $f26
+	mthc1   t1, $f27
+	mthc1   t1, $f28
+	mthc1   t1, $f29
+	mthc1   t1, $f30
+	mthc1   t1, $f31
+1:	.set    pop
+#endif /* CONFIG_CPU_MIPS32_R2 */
 #else
 	.set	mips3
 	dmtc1	t1, $f0
diff --git a/arch/mips/kernel/rtlx-cmp.c b/arch/mips/kernel/rtlx-cmp.c
new file mode 100644
index 0000000..56dc696
--- /dev/null
+++ b/arch/mips/kernel/rtlx-cmp.c
@@ -0,0 +1,116 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2005 MIPS Technologies, Inc.  All rights reserved.
+ * Copyright (C) 2013 Imagination Technologies Ltd.
+ */
+#include <linux/device.h>
+#include <linux/fs.h>
+#include <linux/err.h>
+#include <linux/wait.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
+
+#include <asm/mips_mt.h>
+#include <asm/vpe.h>
+#include <asm/rtlx.h>
+
+static int major;
+
+static void rtlx_interrupt(void)
+{
+	int i;
+	struct rtlx_info *info;
+	struct rtlx_info **p = vpe_get_shared(aprp_cpu_index());
+
+	if (p == NULL || *p == NULL)
+		return;
+
+	info = *p;
+
+	if (info->ap_int_pending == 1 && smp_processor_id() == 0) {
+		for (i = 0; i < RTLX_CHANNELS; i++) {
+			wake_up(&channel_wqs[i].lx_queue);
+			wake_up(&channel_wqs[i].rt_queue);
+		}
+		info->ap_int_pending = 0;
+	}
+}
+
+void _interrupt_sp(void)
+{
+	smp_send_reschedule(aprp_cpu_index());
+}
+
+int __init rtlx_module_init(void)
+{
+	struct device *dev;
+	int i, err;
+
+	if (!cpu_has_mipsmt) {
+		pr_warn("VPE loader: not a MIPS MT capable processor\n");
+		return -ENODEV;
+	}
+
+	if (num_possible_cpus() - aprp_cpu_index() < 1) {
+		pr_warn("No TCs reserved for AP/SP, not initializing RTLX.\n"
+			"Pass maxcpus=<n> argument as kernel argument\n");
+
+		return -ENODEV;
+	}
+
+	major = register_chrdev(0, RTLX_MODULE_NAME, &rtlx_fops);
+	if (major < 0) {
+		pr_err("rtlx_module_init: unable to register device\n");
+		return major;
+	}
+
+	/* initialise the wait queues */
+	for (i = 0; i < RTLX_CHANNELS; i++) {
+		init_waitqueue_head(&channel_wqs[i].rt_queue);
+		init_waitqueue_head(&channel_wqs[i].lx_queue);
+		atomic_set(&channel_wqs[i].in_open, 0);
+		mutex_init(&channel_wqs[i].mutex);
+
+		dev = device_create(mt_class, NULL, MKDEV(major, i), NULL,
+				    "%s%d", RTLX_MODULE_NAME, i);
+		if (IS_ERR(dev)) {
+			err = PTR_ERR(dev);
+			goto out_chrdev;
+		}
+	}
+
+	/* set up notifiers */
+	rtlx_notify.start = rtlx_starting;
+	rtlx_notify.stop = rtlx_stopping;
+	vpe_notify(aprp_cpu_index(), &rtlx_notify);
+
+	if (cpu_has_vint) {
+		aprp_hook = rtlx_interrupt;
+	} else {
+		pr_err("APRP RTLX init on non-vectored-interrupt processor\n");
+		err = -ENODEV;
+		goto out_class;
+	}
+
+	return 0;
+
+out_class:
+	for (i = 0; i < RTLX_CHANNELS; i++)
+		device_destroy(mt_class, MKDEV(major, i));
+out_chrdev:
+	unregister_chrdev(major, RTLX_MODULE_NAME);
+
+	return err;
+}
+
+void __exit rtlx_module_exit(void)
+{
+	int i;
+
+	for (i = 0; i < RTLX_CHANNELS; i++)
+		device_destroy(mt_class, MKDEV(major, i));
+	unregister_chrdev(major, RTLX_MODULE_NAME);
+}
diff --git a/arch/mips/kernel/rtlx-mt.c b/arch/mips/kernel/rtlx-mt.c
new file mode 100644
index 0000000..91d61ba
--- /dev/null
+++ b/arch/mips/kernel/rtlx-mt.c
@@ -0,0 +1,148 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2005 MIPS Technologies, Inc.  All rights reserved.
+ * Copyright (C) 2013 Imagination Technologies Ltd.
+ */
+#include <linux/device.h>
+#include <linux/fs.h>
+#include <linux/err.h>
+#include <linux/wait.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+
+#include <asm/mips_mt.h>
+#include <asm/vpe.h>
+#include <asm/rtlx.h>
+
+static int major;
+
+static void rtlx_dispatch(void)
+{
+	if (read_c0_cause() & read_c0_status() & C_SW0)
+		do_IRQ(MIPS_CPU_IRQ_BASE + MIPS_CPU_RTLX_IRQ);
+}
+
+/*
+ * Interrupt handler may be called before rtlx_init has otherwise had
+ * a chance to run.
+ */
+static irqreturn_t rtlx_interrupt(int irq, void *dev_id)
+{
+	unsigned int vpeflags;
+	unsigned long flags;
+	int i;
+
+	/* Ought not to be strictly necessary for SMTC builds */
+	local_irq_save(flags);
+	vpeflags = dvpe();
+	set_c0_status(0x100 << MIPS_CPU_RTLX_IRQ);
+	irq_enable_hazard();
+	evpe(vpeflags);
+	local_irq_restore(flags);
+
+	for (i = 0; i < RTLX_CHANNELS; i++) {
+		wake_up(&channel_wqs[i].lx_queue);
+		wake_up(&channel_wqs[i].rt_queue);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static struct irqaction rtlx_irq = {
+	.handler	= rtlx_interrupt,
+	.name		= "RTLX",
+};
+
+static int rtlx_irq_num = MIPS_CPU_IRQ_BASE + MIPS_CPU_RTLX_IRQ;
+
+void _interrupt_sp(void)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	dvpe();
+	settc(1);
+	write_vpe_c0_cause(read_vpe_c0_cause() | C_SW0);
+	evpe(EVPE_ENABLE);
+	local_irq_restore(flags);
+}
+
+int __init rtlx_module_init(void)
+{
+	struct device *dev;
+	int i, err;
+
+	if (!cpu_has_mipsmt) {
+		pr_warn("VPE loader: not a MIPS MT capable processor\n");
+		return -ENODEV;
+	}
+
+	if (aprp_cpu_index() == 0) {
+		pr_warn("No TCs reserved for AP/SP, not initializing RTLX.\n"
+			"Pass maxtcs=<n> argument as kernel argument\n");
+
+		return -ENODEV;
+	}
+
+	major = register_chrdev(0, RTLX_MODULE_NAME, &rtlx_fops);
+	if (major < 0) {
+		pr_err("rtlx_module_init: unable to register device\n");
+		return major;
+	}
+
+	/* initialise the wait queues */
+	for (i = 0; i < RTLX_CHANNELS; i++) {
+		init_waitqueue_head(&channel_wqs[i].rt_queue);
+		init_waitqueue_head(&channel_wqs[i].lx_queue);
+		atomic_set(&channel_wqs[i].in_open, 0);
+		mutex_init(&channel_wqs[i].mutex);
+
+		dev = device_create(mt_class, NULL, MKDEV(major, i), NULL,
+				    "%s%d", RTLX_MODULE_NAME, i);
+		if (IS_ERR(dev)) {
+			err = PTR_ERR(dev);
+			goto out_chrdev;
+		}
+	}
+
+	/* set up notifiers */
+	rtlx_notify.start = rtlx_starting;
+	rtlx_notify.stop = rtlx_stopping;
+	vpe_notify(aprp_cpu_index(), &rtlx_notify);
+
+	if (cpu_has_vint) {
+		aprp_hook = rtlx_dispatch;
+	} else {
+		pr_err("APRP RTLX init on non-vectored-interrupt processor\n");
+		err = -ENODEV;
+		goto out_class;
+	}
+
+	rtlx_irq.dev_id = rtlx;
+	err = setup_irq(rtlx_irq_num, &rtlx_irq);
+	if (err)
+		goto out_class;
+
+	return 0;
+
+out_class:
+	for (i = 0; i < RTLX_CHANNELS; i++)
+		device_destroy(mt_class, MKDEV(major, i));
+out_chrdev:
+	unregister_chrdev(major, RTLX_MODULE_NAME);
+
+	return err;
+}
+
+void __exit rtlx_module_exit(void)
+{
+	int i;
+
+	for (i = 0; i < RTLX_CHANNELS; i++)
+		device_destroy(mt_class, MKDEV(major, i));
+	unregister_chrdev(major, RTLX_MODULE_NAME);
+}
diff --git a/arch/mips/kernel/rtlx.c b/arch/mips/kernel/rtlx.c
index 2c12ea1..31b1b76 100644
--- a/arch/mips/kernel/rtlx.c
+++ b/arch/mips/kernel/rtlx.c
@@ -1,114 +1,51 @@
 /*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
  * Copyright (C) 2005 MIPS Technologies, Inc.  All rights reserved.
  * Copyright (C) 2005, 06 Ralf Baechle (ralf@linux-mips.org)
- *
- *  This program is free software; you can distribute it and/or modify it
- *  under the terms of the GNU General Public License (Version 2) as
- *  published by the Free Software Foundation.
- *
- *  This program is distributed in the hope it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with this program; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
+ * Copyright (C) 2013 Imagination Technologies Ltd.
  */
-
-#include <linux/device.h>
 #include <linux/kernel.h>
 #include <linux/fs.h>
-#include <linux/init.h>
-#include <asm/uaccess.h>
-#include <linux/list.h>
-#include <linux/vmalloc.h>
-#include <linux/elf.h>
-#include <linux/seq_file.h>
 #include <linux/syscalls.h>
 #include <linux/moduleloader.h>
-#include <linux/interrupt.h>
-#include <linux/poll.h>
-#include <linux/sched.h>
-#include <linux/wait.h>
+#include <linux/atomic.h>
 #include <asm/mipsmtregs.h>
 #include <asm/mips_mt.h>
-#include <asm/cacheflush.h>
-#include <linux/atomic.h>
-#include <asm/cpu.h>
 #include <asm/processor.h>
-#include <asm/vpe.h>
 #include <asm/rtlx.h>
 #include <asm/setup.h>
+#include <asm/vpe.h>
 
-static struct rtlx_info *rtlx;
-static int major;
-static char module_name[] = "rtlx";
-
-static struct chan_waitqueues {
-	wait_queue_head_t rt_queue;
-	wait_queue_head_t lx_queue;
-	atomic_t in_open;
-	struct mutex mutex;
-} channel_wqs[RTLX_CHANNELS];
-
-static struct vpe_notifications notify;
 static int sp_stopping;
-
-extern void *vpe_get_shared(int index);
-
-static void rtlx_dispatch(void)
-{
-	do_IRQ(MIPS_CPU_IRQ_BASE + MIPS_CPU_RTLX_IRQ);
-}
-
-
-/* Interrupt handler may be called before rtlx_init has otherwise had
-   a chance to run.
-*/
-static irqreturn_t rtlx_interrupt(int irq, void *dev_id)
-{
-	unsigned int vpeflags;
-	unsigned long flags;
-	int i;
-
-	/* Ought not to be strictly necessary for SMTC builds */
-	local_irq_save(flags);
-	vpeflags = dvpe();
-	set_c0_status(0x100 << MIPS_CPU_RTLX_IRQ);
-	irq_enable_hazard();
-	evpe(vpeflags);
-	local_irq_restore(flags);
-
-	for (i = 0; i < RTLX_CHANNELS; i++) {
-			wake_up(&channel_wqs[i].lx_queue);
-			wake_up(&channel_wqs[i].rt_queue);
-	}
-
-	return IRQ_HANDLED;
-}
+struct rtlx_info *rtlx;
+struct chan_waitqueues channel_wqs[RTLX_CHANNELS];
+struct vpe_notifications rtlx_notify;
+void (*aprp_hook)(void) = NULL;
+EXPORT_SYMBOL(aprp_hook);
 
 static void __used dump_rtlx(void)
 {
 	int i;
 
-	printk("id 0x%lx state %d\n", rtlx->id, rtlx->state);
+	pr_info("id 0x%lx state %d\n", rtlx->id, rtlx->state);
 
 	for (i = 0; i < RTLX_CHANNELS; i++) {
 		struct rtlx_channel *chan = &rtlx->channel[i];
 
-		printk(" rt_state %d lx_state %d buffer_size %d\n",
-		       chan->rt_state, chan->lx_state, chan->buffer_size);
+		pr_info(" rt_state %d lx_state %d buffer_size %d\n",
+			chan->rt_state, chan->lx_state, chan->buffer_size);
 
-		printk(" rt_read %d rt_write %d\n",
-		       chan->rt_read, chan->rt_write);
+		pr_info(" rt_read %d rt_write %d\n",
+			chan->rt_read, chan->rt_write);
 
-		printk(" lx_read %d lx_write %d\n",
-		       chan->lx_read, chan->lx_write);
+		pr_info(" lx_read %d lx_write %d\n",
+			chan->lx_read, chan->lx_write);
 
-		printk(" rt_buffer <%s>\n", chan->rt_buffer);
-		printk(" lx_buffer <%s>\n", chan->lx_buffer);
+		pr_info(" rt_buffer <%s>\n", chan->rt_buffer);
+		pr_info(" lx_buffer <%s>\n", chan->lx_buffer);
 	}
 }
 
@@ -116,8 +53,7 @@
 static int rtlx_init(struct rtlx_info *rtlxi)
 {
 	if (rtlxi->id != RTLX_ID) {
-		printk(KERN_ERR "no valid RTLX id at 0x%p 0x%lx\n",
-			rtlxi, rtlxi->id);
+		pr_err("no valid RTLX id at 0x%p 0x%lx\n", rtlxi, rtlxi->id);
 		return -ENOEXEC;
 	}
 
@@ -127,20 +63,20 @@
 }
 
 /* notifications */
-static void starting(int vpe)
+void rtlx_starting(int vpe)
 {
 	int i;
 	sp_stopping = 0;
 
 	/* force a reload of rtlx */
-	rtlx=NULL;
+	rtlx = NULL;
 
 	/* wake up any sleeping rtlx_open's */
 	for (i = 0; i < RTLX_CHANNELS; i++)
 		wake_up_interruptible(&channel_wqs[i].lx_queue);
 }
 
-static void stopping(int vpe)
+void rtlx_stopping(int vpe)
 {
 	int i;
 
@@ -158,31 +94,30 @@
 	int ret = 0;
 
 	if (index >= RTLX_CHANNELS) {
-		printk(KERN_DEBUG "rtlx_open index out of range\n");
+		pr_debug(KERN_DEBUG "rtlx_open index out of range\n");
 		return -ENOSYS;
 	}
 
 	if (atomic_inc_return(&channel_wqs[index].in_open) > 1) {
-		printk(KERN_DEBUG "rtlx_open channel %d already opened\n",
-		       index);
+		pr_debug(KERN_DEBUG "rtlx_open channel %d already opened\n", index);
 		ret = -EBUSY;
 		goto out_fail;
 	}
 
 	if (rtlx == NULL) {
-		if( (p = vpe_get_shared(tclimit)) == NULL) {
-		    if (can_sleep) {
-			ret = __wait_event_interruptible(
+		p = vpe_get_shared(aprp_cpu_index());
+		if (p == NULL) {
+			if (can_sleep) {
+				ret = __wait_event_interruptible(
 					channel_wqs[index].lx_queue,
-					(p = vpe_get_shared(tclimit)));
-			if (ret)
+					(p = vpe_get_shared(aprp_cpu_index())));
+				if (ret)
+					goto out_fail;
+			} else {
+				pr_debug("No SP program loaded, and device opened with O_NONBLOCK\n");
+				ret = -ENOSYS;
 				goto out_fail;
-		    } else {
-			printk(KERN_DEBUG "No SP program loaded, and device "
-					"opened with O_NONBLOCK\n");
-			ret = -ENOSYS;
-			goto out_fail;
-		    }
+			}
 		}
 
 		smp_rmb();
@@ -204,24 +139,24 @@
 					ret = -ERESTARTSYS;
 					goto out_fail;
 				}
-				finish_wait(&channel_wqs[index].lx_queue, &wait);
+				finish_wait(&channel_wqs[index].lx_queue,
+					    &wait);
 			} else {
-				pr_err(" *vpe_get_shared is NULL. "
-				       "Has an SP program been loaded?\n");
+				pr_err(" *vpe_get_shared is NULL. Has an SP program been loaded?\n");
 				ret = -ENOSYS;
 				goto out_fail;
 			}
 		}
 
 		if ((unsigned int)*p < KSEG0) {
-			printk(KERN_WARNING "vpe_get_shared returned an "
-			       "invalid pointer maybe an error code %d\n",
-			       (int)*p);
+			pr_warn("vpe_get_shared returned an invalid pointer maybe an error code %d\n",
+				(int)*p);
 			ret = -ENOSYS;
 			goto out_fail;
 		}
 
-		if ((ret = rtlx_init(*p)) < 0)
+		ret = rtlx_init(*p);
+		if (ret < 0)
 			goto out_ret;
 	}
 
@@ -352,7 +287,7 @@
 	size_t fl;
 
 	if (rtlx == NULL)
-		return(-ENOSYS);
+		return -ENOSYS;
 
 	rt = &rtlx->channel[index];
 
@@ -361,8 +296,8 @@
 	rt_read = rt->rt_read;
 
 	/* total number of bytes to copy */
-	count = min(count, (size_t)write_spacefree(rt_read, rt->rt_write,
-							rt->buffer_size));
+	count = min_t(size_t, count, write_spacefree(rt_read, rt->rt_write,
+						     rt->buffer_size));
 
 	/* first bit from write pointer to the end of the buffer, or count */
 	fl = min(count, (size_t) rt->buffer_size - rt->rt_write);
@@ -372,9 +307,8 @@
 		goto out;
 
 	/* if there's any left copy to the beginning of the buffer */
-	if (count - fl) {
+	if (count - fl)
 		failed = copy_from_user(rt->rt_buffer, buffer + fl, count - fl);
-	}
 
 out:
 	count -= failed;
@@ -384,6 +318,8 @@
 	smp_wmb();
 	mutex_unlock(&channel_wqs[index].mutex);
 
+	_interrupt_sp();
+
 	return count;
 }
 
@@ -398,7 +334,7 @@
 	return rtlx_release(iminor(inode));
 }
 
-static unsigned int file_poll(struct file *file, poll_table * wait)
+static unsigned int file_poll(struct file *file, poll_table *wait)
 {
 	int minor = iminor(file_inode(file));
 	unsigned int mask = 0;
@@ -420,21 +356,20 @@
 	return mask;
 }
 
-static ssize_t file_read(struct file *file, char __user * buffer, size_t count,
-			 loff_t * ppos)
+static ssize_t file_read(struct file *file, char __user *buffer, size_t count,
+			 loff_t *ppos)
 {
 	int minor = iminor(file_inode(file));
 
 	/* data available? */
-	if (!rtlx_read_poll(minor, (file->f_flags & O_NONBLOCK) ? 0 : 1)) {
-		return 0;	// -EAGAIN makes cat whinge
-	}
+	if (!rtlx_read_poll(minor, (file->f_flags & O_NONBLOCK) ? 0 : 1))
+		return 0;	/* -EAGAIN makes 'cat' whine */
 
 	return rtlx_read(minor, buffer, count);
 }
 
-static ssize_t file_write(struct file *file, const char __user * buffer,
-			  size_t count, loff_t * ppos)
+static ssize_t file_write(struct file *file, const char __user *buffer,
+			  size_t count, loff_t *ppos)
 {
 	int minor = iminor(file_inode(file));
 
@@ -454,100 +389,16 @@
 	return rtlx_write(minor, buffer, count);
 }
 
-static const struct file_operations rtlx_fops = {
+const struct file_operations rtlx_fops = {
 	.owner =   THIS_MODULE,
-	.open =	   file_open,
+	.open =    file_open,
 	.release = file_release,
 	.write =   file_write,
-	.read =	   file_read,
-	.poll =	   file_poll,
+	.read =    file_read,
+	.poll =    file_poll,
 	.llseek =  noop_llseek,
 };
 
-static struct irqaction rtlx_irq = {
-	.handler	= rtlx_interrupt,
-	.name		= "RTLX",
-};
-
-static int rtlx_irq_num = MIPS_CPU_IRQ_BASE + MIPS_CPU_RTLX_IRQ;
-
-static char register_chrdev_failed[] __initdata =
-	KERN_ERR "rtlx_module_init: unable to register device\n";
-
-static int __init rtlx_module_init(void)
-{
-	struct device *dev;
-	int i, err;
-
-	if (!cpu_has_mipsmt) {
-		printk("VPE loader: not a MIPS MT capable processor\n");
-		return -ENODEV;
-	}
-
-	if (tclimit == 0) {
-		printk(KERN_WARNING "No TCs reserved for AP/SP, not "
-		       "initializing RTLX.\nPass maxtcs=<n> argument as kernel "
-		       "argument\n");
-
-		return -ENODEV;
-	}
-
-	major = register_chrdev(0, module_name, &rtlx_fops);
-	if (major < 0) {
-		printk(register_chrdev_failed);
-		return major;
-	}
-
-	/* initialise the wait queues */
-	for (i = 0; i < RTLX_CHANNELS; i++) {
-		init_waitqueue_head(&channel_wqs[i].rt_queue);
-		init_waitqueue_head(&channel_wqs[i].lx_queue);
-		atomic_set(&channel_wqs[i].in_open, 0);
-		mutex_init(&channel_wqs[i].mutex);
-
-		dev = device_create(mt_class, NULL, MKDEV(major, i), NULL,
-				    "%s%d", module_name, i);
-		if (IS_ERR(dev)) {
-			err = PTR_ERR(dev);
-			goto out_chrdev;
-		}
-	}
-
-	/* set up notifiers */
-	notify.start = starting;
-	notify.stop = stopping;
-	vpe_notify(tclimit, &notify);
-
-	if (cpu_has_vint)
-		set_vi_handler(MIPS_CPU_RTLX_IRQ, rtlx_dispatch);
-	else {
-		pr_err("APRP RTLX init on non-vectored-interrupt processor\n");
-		err = -ENODEV;
-		goto out_chrdev;
-	}
-
-	rtlx_irq.dev_id = rtlx;
-	setup_irq(rtlx_irq_num, &rtlx_irq);
-
-	return 0;
-
-out_chrdev:
-	for (i = 0; i < RTLX_CHANNELS; i++)
-		device_destroy(mt_class, MKDEV(major, i));
-
-	return err;
-}
-
-static void __exit rtlx_module_exit(void)
-{
-	int i;
-
-	for (i = 0; i < RTLX_CHANNELS; i++)
-		device_destroy(mt_class, MKDEV(major, i));
-
-	unregister_chrdev(major, module_name);
-}
-
 module_init(rtlx_module_init);
 module_exit(rtlx_module_exit);
 
diff --git a/arch/mips/kernel/segment.c b/arch/mips/kernel/segment.c
new file mode 100644
index 0000000..076ead2
--- /dev/null
+++ b/arch/mips/kernel/segment.c
@@ -0,0 +1,110 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2013 Imagination Technologies Ltd.
+ */
+
+#include <linux/kernel.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <asm/cpu.h>
+#include <asm/mipsregs.h>
+
+static void build_segment_config(char *str, unsigned int cfg)
+{
+	unsigned int am;
+	static const char * const am_str[] = {
+		"UK", "MK", "MSK", "MUSK", "MUSUK", "USK",
+		"RSRVD", "UUSK"};
+
+	/* Segment access mode. */
+	am = (cfg & MIPS_SEGCFG_AM) >> MIPS_SEGCFG_AM_SHIFT;
+	str += sprintf(str, "%-5s", am_str[am]);
+
+	/*
+	 * Access modes MK, MSK and MUSK are mapped segments. Therefore
+	 * there is no direct physical address mapping.
+	 */
+	if ((am == 0) || (am > 3)) {
+		str += sprintf(str, "         %03lx",
+			((cfg & MIPS_SEGCFG_PA) >> MIPS_SEGCFG_PA_SHIFT));
+		str += sprintf(str, "         %01ld",
+			((cfg & MIPS_SEGCFG_C) >> MIPS_SEGCFG_C_SHIFT));
+	} else {
+		str += sprintf(str, "         UND");
+		str += sprintf(str, "         U");
+	}
+
+	/* Exception configuration. */
+	str += sprintf(str, "       %01ld\n",
+		((cfg & MIPS_SEGCFG_EU) >> MIPS_SEGCFG_EU_SHIFT));
+}
+
+static int show_segments(struct seq_file *m, void *v)
+{
+	unsigned int segcfg;
+	char str[42];
+
+	seq_puts(m, "Segment   Virtual    Size   Access Mode   Physical   Caching   EU\n");
+	seq_puts(m, "-------   -------    ----   -----------   --------   -------   --\n");
+
+	segcfg = read_c0_segctl0();
+	build_segment_config(str, segcfg);
+	seq_printf(m, "   0      e0000000   512M      %s", str);
+
+	segcfg >>= 16;
+	build_segment_config(str, segcfg);
+	seq_printf(m, "   1      c0000000   512M      %s", str);
+
+	segcfg = read_c0_segctl1();
+	build_segment_config(str, segcfg);
+	seq_printf(m, "   2      a0000000   512M      %s", str);
+
+	segcfg >>= 16;
+	build_segment_config(str, segcfg);
+	seq_printf(m, "   3      80000000   512M      %s", str);
+
+	segcfg = read_c0_segctl2();
+	build_segment_config(str, segcfg);
+	seq_printf(m, "   4      40000000    1G       %s", str);
+
+	segcfg >>= 16;
+	build_segment_config(str, segcfg);
+	seq_printf(m, "   5      00000000    1G       %s\n", str);
+
+	return 0;
+}
+
+static int segments_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, show_segments, NULL);
+}
+
+static const struct file_operations segments_fops = {
+	.open		= segments_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int __init segments_info(void)
+{
+	extern struct dentry *mips_debugfs_dir;
+	struct dentry *segments;
+
+	if (cpu_has_segments) {
+		if (!mips_debugfs_dir)
+			return -ENODEV;
+
+		segments = debugfs_create_file("segments", S_IRUGO,
+					       mips_debugfs_dir, NULL,
+					       &segments_fops);
+		if (!segments)
+			return -ENOMEM;
+	}
+	return 0;
+}
+
+device_initcall(segments_info);
diff --git a/arch/mips/kernel/signal.c b/arch/mips/kernel/signal.c
index 2f285ab..5199563 100644
--- a/arch/mips/kernel/signal.c
+++ b/arch/mips/kernel/signal.c
@@ -71,8 +71,9 @@
 	int err;
 	while (1) {
 		lock_fpu_owner();
-		own_fpu_inatomic(1);
-		err = save_fp_context(sc); /* this might fail */
+		err = own_fpu_inatomic(1);
+		if (!err)
+			err = save_fp_context(sc); /* this might fail */
 		unlock_fpu_owner();
 		if (likely(!err))
 			break;
@@ -91,8 +92,9 @@
 	int err, tmp __maybe_unused;
 	while (1) {
 		lock_fpu_owner();
-		own_fpu_inatomic(0);
-		err = restore_fp_context(sc); /* this might fail */
+		err = own_fpu_inatomic(0);
+		if (!err)
+			err = restore_fp_context(sc); /* this might fail */
 		unlock_fpu_owner();
 		if (likely(!err))
 			break;
diff --git a/arch/mips/kernel/signal32.c b/arch/mips/kernel/signal32.c
index 1905a41..3d60f77 100644
--- a/arch/mips/kernel/signal32.c
+++ b/arch/mips/kernel/signal32.c
@@ -85,8 +85,9 @@
 	int err;
 	while (1) {
 		lock_fpu_owner();
-		own_fpu_inatomic(1);
-		err = save_fp_context32(sc); /* this might fail */
+		err = own_fpu_inatomic(1);
+		if (!err)
+			err = save_fp_context32(sc); /* this might fail */
 		unlock_fpu_owner();
 		if (likely(!err))
 			break;
@@ -105,8 +106,9 @@
 	int err, tmp __maybe_unused;
 	while (1) {
 		lock_fpu_owner();
-		own_fpu_inatomic(0);
-		err = restore_fp_context32(sc); /* this might fail */
+		err = own_fpu_inatomic(0);
+		if (!err)
+			err = restore_fp_context32(sc); /* this might fail */
 		unlock_fpu_owner();
 		if (likely(!err))
 			break;
diff --git a/arch/mips/kernel/smp-bmips.c b/arch/mips/kernel/smp-bmips.c
index 2362665..ea4c2dc 100644
--- a/arch/mips/kernel/smp-bmips.c
+++ b/arch/mips/kernel/smp-bmips.c
@@ -49,8 +49,10 @@
 unsigned long bmips_smp_boot_sp;
 unsigned long bmips_smp_boot_gp;
 
-static void bmips_send_ipi_single(int cpu, unsigned int action);
-static irqreturn_t bmips_ipi_interrupt(int irq, void *dev_id);
+static void bmips43xx_send_ipi_single(int cpu, unsigned int action);
+static void bmips5000_send_ipi_single(int cpu, unsigned int action);
+static irqreturn_t bmips43xx_ipi_interrupt(int irq, void *dev_id);
+static irqreturn_t bmips5000_ipi_interrupt(int irq, void *dev_id);
 
 /* SW interrupts 0,1 are used for interprocessor signaling */
 #define IPI0_IRQ			(MIPS_CPU_IRQ_BASE + 0)
@@ -64,49 +66,58 @@
 static void __init bmips_smp_setup(void)
 {
 	int i, cpu = 1, boot_cpu = 0;
-
-#if defined(CONFIG_CPU_BMIPS4350) || defined(CONFIG_CPU_BMIPS4380)
 	int cpu_hw_intr;
 
-	/* arbitration priority */
-	clear_c0_brcm_cmt_ctrl(0x30);
+	switch (current_cpu_type()) {
+	case CPU_BMIPS4350:
+	case CPU_BMIPS4380:
+		/* arbitration priority */
+		clear_c0_brcm_cmt_ctrl(0x30);
 
-	/* NBK and weak order flags */
-	set_c0_brcm_config_0(0x30000);
+		/* NBK and weak order flags */
+		set_c0_brcm_config_0(0x30000);
 
-	/* Find out if we are running on TP0 or TP1 */
-	boot_cpu = !!(read_c0_brcm_cmt_local() & (1 << 31));
+		/* Find out if we are running on TP0 or TP1 */
+		boot_cpu = !!(read_c0_brcm_cmt_local() & (1 << 31));
 
-	/*
-	 * MIPS interrupts 0,1 (SW INT 0,1) cross over to the other thread
-	 * MIPS interrupt 2 (HW INT 0) is the CPU0 L1 controller output
-	 * MIPS interrupt 3 (HW INT 1) is the CPU1 L1 controller output
-	 */
-	if (boot_cpu == 0)
-		cpu_hw_intr = 0x02;
-	else
-		cpu_hw_intr = 0x1d;
+		/*
+		 * MIPS interrupts 0,1 (SW INT 0,1) cross over to the other
+		 * thread
+		 * MIPS interrupt 2 (HW INT 0) is the CPU0 L1 controller output
+		 * MIPS interrupt 3 (HW INT 1) is the CPU1 L1 controller output
+		 */
+		if (boot_cpu == 0)
+			cpu_hw_intr = 0x02;
+		else
+			cpu_hw_intr = 0x1d;
 
-	change_c0_brcm_cmt_intr(0xf8018000, (cpu_hw_intr << 27) | (0x03 << 15));
+		change_c0_brcm_cmt_intr(0xf8018000,
+					(cpu_hw_intr << 27) | (0x03 << 15));
 
-	/* single core, 2 threads (2 pipelines) */
-	max_cpus = 2;
-#elif defined(CONFIG_CPU_BMIPS5000)
-	/* enable raceless SW interrupts */
-	set_c0_brcm_config(0x03 << 22);
+		/* single core, 2 threads (2 pipelines) */
+		max_cpus = 2;
 
-	/* route HW interrupt 0 to CPU0, HW interrupt 1 to CPU1 */
-	change_c0_brcm_mode(0x1f << 27, 0x02 << 27);
+		break;
+	case CPU_BMIPS5000:
+		/* enable raceless SW interrupts */
+		set_c0_brcm_config(0x03 << 22);
 
-	/* N cores, 2 threads per core */
-	max_cpus = (((read_c0_brcm_config() >> 6) & 0x03) + 1) << 1;
+		/* route HW interrupt 0 to CPU0, HW interrupt 1 to CPU1 */
+		change_c0_brcm_mode(0x1f << 27, 0x02 << 27);
 
-	/* clear any pending SW interrupts */
-	for (i = 0; i < max_cpus; i++) {
-		write_c0_brcm_action(ACTION_CLR_IPI(i, 0));
-		write_c0_brcm_action(ACTION_CLR_IPI(i, 1));
+		/* N cores, 2 threads per core */
+		max_cpus = (((read_c0_brcm_config() >> 6) & 0x03) + 1) << 1;
+
+		/* clear any pending SW interrupts */
+		for (i = 0; i < max_cpus; i++) {
+			write_c0_brcm_action(ACTION_CLR_IPI(i, 0));
+			write_c0_brcm_action(ACTION_CLR_IPI(i, 1));
+		}
+
+		break;
+	default:
+		max_cpus = 1;
 	}
-#endif
 
 	if (!bmips_smp_enabled)
 		max_cpus = 1;
@@ -134,6 +145,20 @@
  */
 static void bmips_prepare_cpus(unsigned int max_cpus)
 {
+	irqreturn_t (*bmips_ipi_interrupt)(int irq, void *dev_id);
+
+	switch (current_cpu_type()) {
+	case CPU_BMIPS4350:
+	case CPU_BMIPS4380:
+		bmips_ipi_interrupt = bmips43xx_ipi_interrupt;
+		break;
+	case CPU_BMIPS5000:
+		bmips_ipi_interrupt = bmips5000_ipi_interrupt;
+		break;
+	default:
+		return;
+	}
+
 	if (request_irq(IPI0_IRQ, bmips_ipi_interrupt, IRQF_PERCPU,
 			"smp_ipi0", NULL))
 		panic("Can't request IPI0 interrupt");
@@ -168,26 +193,39 @@
 
 	pr_info("SMP: Booting CPU%d...\n", cpu);
 
-	if (cpumask_test_cpu(cpu, &bmips_booted_mask))
-		bmips_send_ipi_single(cpu, 0);
-	else {
-#if defined(CONFIG_CPU_BMIPS4350) || defined(CONFIG_CPU_BMIPS4380)
-		/* Reset slave TP1 if booting from TP0 */
-		if (cpu_logical_map(cpu) == 1)
-			set_c0_brcm_cmt_ctrl(0x01);
-#elif defined(CONFIG_CPU_BMIPS5000)
-		if (cpu & 0x01)
-			write_c0_brcm_action(ACTION_BOOT_THREAD(cpu));
-		else {
-			/*
-			 * core N thread 0 was already booted; just
-			 * pulse the NMI line
-			 */
-			bmips_write_zscm_reg(0x210, 0xc0000000);
-			udelay(10);
-			bmips_write_zscm_reg(0x210, 0x00);
+	if (cpumask_test_cpu(cpu, &bmips_booted_mask)) {
+		switch (current_cpu_type()) {
+		case CPU_BMIPS4350:
+		case CPU_BMIPS4380:
+			bmips43xx_send_ipi_single(cpu, 0);
+			break;
+		case CPU_BMIPS5000:
+			bmips5000_send_ipi_single(cpu, 0);
+			break;
 		}
-#endif
+	}
+	else {
+		switch (current_cpu_type()) {
+		case CPU_BMIPS4350:
+		case CPU_BMIPS4380:
+			/* Reset slave TP1 if booting from TP0 */
+			if (cpu_logical_map(cpu) == 1)
+				set_c0_brcm_cmt_ctrl(0x01);
+			break;
+		case CPU_BMIPS5000:
+			if (cpu & 0x01)
+				write_c0_brcm_action(ACTION_BOOT_THREAD(cpu));
+			else {
+				/*
+				 * core N thread 0 was already booted; just
+				 * pulse the NMI line
+				 */
+				bmips_write_zscm_reg(0x210, 0xc0000000);
+				udelay(10);
+				bmips_write_zscm_reg(0x210, 0x00);
+			}
+			break;
+		}
 		cpumask_set_cpu(cpu, &bmips_booted_mask);
 	}
 }
@@ -199,26 +237,32 @@
 {
 	/* move NMI vector to kseg0, in case XKS01 is enabled */
 
-#if defined(CONFIG_CPU_BMIPS4350) || defined(CONFIG_CPU_BMIPS4380)
-	void __iomem *cbr = BMIPS_GET_CBR();
+	void __iomem *cbr;
 	unsigned long old_vec;
 	unsigned long relo_vector;
 	int boot_cpu;
 
-	boot_cpu = !!(read_c0_brcm_cmt_local() & (1 << 31));
-	relo_vector = boot_cpu ? BMIPS_RELO_VECTOR_CONTROL_0 :
-			  BMIPS_RELO_VECTOR_CONTROL_1;
+	switch (current_cpu_type()) {
+	case CPU_BMIPS4350:
+	case CPU_BMIPS4380:
+		cbr = BMIPS_GET_CBR();
 
-	old_vec = __raw_readl(cbr + relo_vector);
-	__raw_writel(old_vec & ~0x20000000, cbr + relo_vector);
+		boot_cpu = !!(read_c0_brcm_cmt_local() & (1 << 31));
+		relo_vector = boot_cpu ? BMIPS_RELO_VECTOR_CONTROL_0 :
+				  BMIPS_RELO_VECTOR_CONTROL_1;
 
-	clear_c0_cause(smp_processor_id() ? C_SW1 : C_SW0);
-#elif defined(CONFIG_CPU_BMIPS5000)
-	write_c0_brcm_bootvec(read_c0_brcm_bootvec() &
-		(smp_processor_id() & 0x01 ? ~0x20000000 : ~0x2000));
+		old_vec = __raw_readl(cbr + relo_vector);
+		__raw_writel(old_vec & ~0x20000000, cbr + relo_vector);
 
-	write_c0_brcm_action(ACTION_CLR_IPI(smp_processor_id(), 0));
-#endif
+		clear_c0_cause(smp_processor_id() ? C_SW1 : C_SW0);
+		break;
+	case CPU_BMIPS5000:
+		write_c0_brcm_bootvec(read_c0_brcm_bootvec() &
+			(smp_processor_id() & 0x01 ? ~0x20000000 : ~0x2000));
+
+		write_c0_brcm_action(ACTION_CLR_IPI(smp_processor_id(), 0));
+		break;
+	}
 }
 
 /*
@@ -243,8 +287,6 @@
 {
 }
 
-#if defined(CONFIG_CPU_BMIPS5000)
-
 /*
  * BMIPS5000 raceless IPIs
  *
@@ -253,12 +295,12 @@
  * IPI1 is used for SMP_CALL_FUNCTION
  */
 
-static void bmips_send_ipi_single(int cpu, unsigned int action)
+static void bmips5000_send_ipi_single(int cpu, unsigned int action)
 {
 	write_c0_brcm_action(ACTION_SET_IPI(cpu, action == SMP_CALL_FUNCTION));
 }
 
-static irqreturn_t bmips_ipi_interrupt(int irq, void *dev_id)
+static irqreturn_t bmips5000_ipi_interrupt(int irq, void *dev_id)
 {
 	int action = irq - IPI0_IRQ;
 
@@ -272,7 +314,14 @@
 	return IRQ_HANDLED;
 }
 
-#else
+static void bmips5000_send_ipi_mask(const struct cpumask *mask,
+	unsigned int action)
+{
+	unsigned int i;
+
+	for_each_cpu(i, mask)
+		bmips5000_send_ipi_single(i, action);
+}
 
 /*
  * BMIPS43xx racey IPIs
@@ -287,7 +336,7 @@
 static DEFINE_SPINLOCK(ipi_lock);
 static DEFINE_PER_CPU(int, ipi_action_mask);
 
-static void bmips_send_ipi_single(int cpu, unsigned int action)
+static void bmips43xx_send_ipi_single(int cpu, unsigned int action)
 {
 	unsigned long flags;
 
@@ -298,7 +347,7 @@
 	spin_unlock_irqrestore(&ipi_lock, flags);
 }
 
-static irqreturn_t bmips_ipi_interrupt(int irq, void *dev_id)
+static irqreturn_t bmips43xx_ipi_interrupt(int irq, void *dev_id)
 {
 	unsigned long flags;
 	int action, cpu = irq - IPI0_IRQ;
@@ -317,15 +366,13 @@
 	return IRQ_HANDLED;
 }
 
-#endif /* BMIPS type */
-
-static void bmips_send_ipi_mask(const struct cpumask *mask,
+static void bmips43xx_send_ipi_mask(const struct cpumask *mask,
 	unsigned int action)
 {
 	unsigned int i;
 
 	for_each_cpu(i, mask)
-		bmips_send_ipi_single(i, action);
+		bmips43xx_send_ipi_single(i, action);
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
@@ -381,15 +428,30 @@
 
 #endif /* CONFIG_HOTPLUG_CPU */
 
-struct plat_smp_ops bmips_smp_ops = {
+struct plat_smp_ops bmips43xx_smp_ops = {
 	.smp_setup		= bmips_smp_setup,
 	.prepare_cpus		= bmips_prepare_cpus,
 	.boot_secondary		= bmips_boot_secondary,
 	.smp_finish		= bmips_smp_finish,
 	.init_secondary		= bmips_init_secondary,
 	.cpus_done		= bmips_cpus_done,
-	.send_ipi_single	= bmips_send_ipi_single,
-	.send_ipi_mask		= bmips_send_ipi_mask,
+	.send_ipi_single	= bmips43xx_send_ipi_single,
+	.send_ipi_mask		= bmips43xx_send_ipi_mask,
+#ifdef CONFIG_HOTPLUG_CPU
+	.cpu_disable		= bmips_cpu_disable,
+	.cpu_die		= bmips_cpu_die,
+#endif
+};
+
+struct plat_smp_ops bmips5000_smp_ops = {
+	.smp_setup		= bmips_smp_setup,
+	.prepare_cpus		= bmips_prepare_cpus,
+	.boot_secondary		= bmips_boot_secondary,
+	.smp_finish		= bmips_smp_finish,
+	.init_secondary		= bmips_init_secondary,
+	.cpus_done		= bmips_cpus_done,
+	.send_ipi_single	= bmips5000_send_ipi_single,
+	.send_ipi_mask		= bmips5000_send_ipi_mask,
 #ifdef CONFIG_HOTPLUG_CPU
 	.cpu_disable		= bmips_cpu_disable,
 	.cpu_die		= bmips_cpu_die,
@@ -427,43 +489,47 @@
 
 	BUG_ON(ebase != CKSEG0);
 
-#if defined(CONFIG_CPU_BMIPS4350)
-	/*
-	 * BMIPS4350 cannot relocate the normal vectors, but it
-	 * can relocate the BEV=1 vectors.  So CPU1 starts up at
-	 * the relocated BEV=1, IV=0 general exception vector @
-	 * 0xa000_0380.
-	 *
-	 * set_uncached_handler() is used here because:
-	 *  - CPU1 will run this from uncached space
-	 *  - None of the cacheflush functions are set up yet
-	 */
-	set_uncached_handler(BMIPS_WARM_RESTART_VEC - CKSEG0,
-		&bmips_smp_int_vec, 0x80);
-	__sync();
-	return;
-#elif defined(CONFIG_CPU_BMIPS4380)
-	/*
-	 * 0x8000_0000: reset/NMI (initially in kseg1)
-	 * 0x8000_0400: normal vectors
-	 */
-	new_ebase = 0x80000400;
-	cbr = BMIPS_GET_CBR();
-	__raw_writel(0x80080800, cbr + BMIPS_RELO_VECTOR_CONTROL_0);
-	__raw_writel(0xa0080800, cbr + BMIPS_RELO_VECTOR_CONTROL_1);
-#elif defined(CONFIG_CPU_BMIPS5000)
-	/*
-	 * 0x8000_0000: reset/NMI (initially in kseg1)
-	 * 0x8000_1000: normal vectors
-	 */
-	new_ebase = 0x80001000;
-	write_c0_brcm_bootvec(0xa0088008);
-	write_c0_ebase(new_ebase);
-	if (max_cpus > 2)
-		bmips_write_zscm_reg(0xa0, 0xa008a008);
-#else
-	return;
-#endif
+	switch (current_cpu_type()) {
+	case CPU_BMIPS4350:
+		/*
+		 * BMIPS4350 cannot relocate the normal vectors, but it
+		 * can relocate the BEV=1 vectors.  So CPU1 starts up at
+		 * the relocated BEV=1, IV=0 general exception vector @
+		 * 0xa000_0380.
+		 *
+		 * set_uncached_handler() is used here because:
+		 *  - CPU1 will run this from uncached space
+		 *  - None of the cacheflush functions are set up yet
+		 */
+		set_uncached_handler(BMIPS_WARM_RESTART_VEC - CKSEG0,
+			&bmips_smp_int_vec, 0x80);
+		__sync();
+		return;
+	case CPU_BMIPS4380:
+		/*
+		 * 0x8000_0000: reset/NMI (initially in kseg1)
+		 * 0x8000_0400: normal vectors
+		 */
+		new_ebase = 0x80000400;
+		cbr = BMIPS_GET_CBR();
+		__raw_writel(0x80080800, cbr + BMIPS_RELO_VECTOR_CONTROL_0);
+		__raw_writel(0xa0080800, cbr + BMIPS_RELO_VECTOR_CONTROL_1);
+		break;
+	case CPU_BMIPS5000:
+		/*
+		 * 0x8000_0000: reset/NMI (initially in kseg1)
+		 * 0x8000_1000: normal vectors
+		 */
+		new_ebase = 0x80001000;
+		write_c0_brcm_bootvec(0xa0088008);
+		write_c0_ebase(new_ebase);
+		if (max_cpus > 2)
+			bmips_write_zscm_reg(0xa0, 0xa008a008);
+		break;
+	default:
+		return;
+	}
+
 	board_nmi_handler_setup = &bmips_nmi_handler_setup;
 	ebase = new_ebase;
 }
diff --git a/arch/mips/kernel/smp-cmp.c b/arch/mips/kernel/smp-cmp.c
index 5969f1e..1b925d8 100644
--- a/arch/mips/kernel/smp-cmp.c
+++ b/arch/mips/kernel/smp-cmp.c
@@ -199,11 +199,14 @@
 	pr_debug("SMPCMP: CPU%d: %s max_cpus=%d\n",
 		 smp_processor_id(), __func__, max_cpus);
 
+#ifdef CONFIG_MIPS_MT
 	/*
 	 * FIXME: some of these options are per-system, some per-core and
 	 * some per-cpu
 	 */
 	mips_mt_set_cpuoptions();
+#endif
+
 }
 
 struct plat_smp_ops cmp_smp_ops = {
diff --git a/arch/mips/kernel/smp-mt.c b/arch/mips/kernel/smp-mt.c
index 57a3f7a..0fb8cef 100644
--- a/arch/mips/kernel/smp-mt.c
+++ b/arch/mips/kernel/smp-mt.c
@@ -71,6 +71,7 @@
 
 		/* Record this as available CPU */
 		set_cpu_possible(tc, true);
+		set_cpu_present(tc, true);
 		__cpu_number_map[tc]	= ++ncpu;
 		__cpu_logical_map[ncpu] = tc;
 	}
@@ -112,12 +113,39 @@
 	write_tc_c0_tchalt(TCHALT_H);
 }
 
+#ifdef CONFIG_IRQ_GIC
+static void mp_send_ipi_single(int cpu, unsigned int action)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+
+	switch (action) {
+	case SMP_CALL_FUNCTION:
+		gic_send_ipi(plat_ipi_call_int_xlate(cpu));
+		break;
+
+	case SMP_RESCHEDULE_YOURSELF:
+		gic_send_ipi(plat_ipi_resched_int_xlate(cpu));
+		break;
+	}
+
+	local_irq_restore(flags);
+}
+#endif
+
 static void vsmp_send_ipi_single(int cpu, unsigned int action)
 {
 	int i;
 	unsigned long flags;
 	int vpflags;
 
+#ifdef CONFIG_IRQ_GIC
+	if (gic_present) {
+		mp_send_ipi_single(cpu, action);
+		return;
+	}
+#endif
 	local_irq_save(flags);
 
 	vpflags = dvpe();	/* can't access the other CPU's registers whilst MVPE enabled */
diff --git a/arch/mips/kernel/spram.c b/arch/mips/kernel/spram.c
index 93f8681..b242e2c 100644
--- a/arch/mips/kernel/spram.c
+++ b/arch/mips/kernel/spram.c
@@ -8,7 +8,6 @@
  *
  * Copyright (C) 2007, 2008 MIPS Technologies, Inc.
  */
-#include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/ptrace.h>
 #include <linux/stddef.h>
@@ -206,6 +205,8 @@
 	case CPU_34K:
 	case CPU_74K:
 	case CPU_1004K:
+	case CPU_INTERAPTIV:
+	case CPU_PROAPTIV:
 		config0 = read_c0_config();
 		/* FIXME: addresses are Malta specific */
 		if (config0 & (1<<24)) {
diff --git a/arch/mips/kernel/sync-r4k.c b/arch/mips/kernel/sync-r4k.c
index 84536bf..c24ad5f 100644
--- a/arch/mips/kernel/sync-r4k.c
+++ b/arch/mips/kernel/sync-r4k.c
@@ -11,7 +11,6 @@
  */
 
 #include <linux/kernel.h>
-#include <linux/init.h>
 #include <linux/irqflags.h>
 #include <linux/cpumask.h>
 
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index f9c8746..e0b4996 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -78,6 +78,7 @@
 extern asmlinkage void handle_ov(void);
 extern asmlinkage void handle_tr(void);
 extern asmlinkage void handle_fpe(void);
+extern asmlinkage void handle_ftlb(void);
 extern asmlinkage void handle_mdmx(void);
 extern asmlinkage void handle_watch(void);
 extern asmlinkage void handle_mt(void);
@@ -1080,7 +1081,7 @@
 	unsigned long old_epc, old31;
 	unsigned int opcode;
 	unsigned int cpid;
-	int status;
+	int status, err;
 	unsigned long __maybe_unused flags;
 
 	prev_state = exception_enter();
@@ -1153,19 +1154,19 @@
 
 	case 1:
 		if (used_math())	/* Using the FPU again.	 */
-			own_fpu(1);
+			err = own_fpu(1);
 		else {			/* First time FPU user.	 */
-			init_fpu();
+			err = init_fpu();
 			set_used_math();
 		}
 
-		if (!raw_cpu_has_fpu) {
+		if (!raw_cpu_has_fpu || err) {
 			int sig;
 			void __user *fault_addr = NULL;
 			sig = fpu_emulator_cop1Handler(regs,
 						       &current->thread.fpu,
 						       0, &fault_addr);
-			if (!process_fpemu_return(sig, fault_addr))
+			if (!process_fpemu_return(sig, fault_addr) && !err)
 				mt_ase_fp_affinity();
 		}
 
@@ -1336,6 +1337,8 @@
 	case CPU_34K:
 	case CPU_74K:
 	case CPU_1004K:
+	case CPU_INTERAPTIV:
+	case CPU_PROAPTIV:
 		{
 #define ERRCTL_PE	0x80000000
 #define ERRCTL_L2P	0x00800000
@@ -1425,14 +1428,27 @@
 	printk("Decoded c0_cacheerr: %s cache fault in %s reference.\n",
 	       reg_val & (1<<30) ? "secondary" : "primary",
 	       reg_val & (1<<31) ? "data" : "insn");
-	printk("Error bits: %s%s%s%s%s%s%s\n",
-	       reg_val & (1<<29) ? "ED " : "",
-	       reg_val & (1<<28) ? "ET " : "",
-	       reg_val & (1<<26) ? "EE " : "",
-	       reg_val & (1<<25) ? "EB " : "",
-	       reg_val & (1<<24) ? "EI " : "",
-	       reg_val & (1<<23) ? "E1 " : "",
-	       reg_val & (1<<22) ? "E0 " : "");
+	if (cpu_has_mips_r2 &&
+	    ((current_cpu_data.processor_id && 0xff0000) == PRID_COMP_MIPS)) {
+		pr_err("Error bits: %s%s%s%s%s%s%s%s\n",
+			reg_val & (1<<29) ? "ED " : "",
+			reg_val & (1<<28) ? "ET " : "",
+			reg_val & (1<<27) ? "ES " : "",
+			reg_val & (1<<26) ? "EE " : "",
+			reg_val & (1<<25) ? "EB " : "",
+			reg_val & (1<<24) ? "EI " : "",
+			reg_val & (1<<23) ? "E1 " : "",
+			reg_val & (1<<22) ? "E0 " : "");
+	} else {
+		pr_err("Error bits: %s%s%s%s%s%s%s\n",
+			reg_val & (1<<29) ? "ED " : "",
+			reg_val & (1<<28) ? "ET " : "",
+			reg_val & (1<<26) ? "EE " : "",
+			reg_val & (1<<25) ? "EB " : "",
+			reg_val & (1<<24) ? "EI " : "",
+			reg_val & (1<<23) ? "E1 " : "",
+			reg_val & (1<<22) ? "E0 " : "");
+	}
 	printk("IDX: 0x%08x\n", reg_val & ((1<<22)-1));
 
 #if defined(CONFIG_CPU_MIPS32) || defined(CONFIG_CPU_MIPS64)
@@ -1446,6 +1462,34 @@
 	panic("Can't handle the cache error!");
 }
 
+asmlinkage void do_ftlb(void)
+{
+	const int field = 2 * sizeof(unsigned long);
+	unsigned int reg_val;
+
+	/* For the moment, report the problem and hang. */
+	if (cpu_has_mips_r2 &&
+	    ((current_cpu_data.processor_id && 0xff0000) == PRID_COMP_MIPS)) {
+		pr_err("FTLB error exception, cp0_ecc=0x%08x:\n",
+		       read_c0_ecc());
+		pr_err("cp0_errorepc == %0*lx\n", field, read_c0_errorepc());
+		reg_val = read_c0_cacheerr();
+		pr_err("c0_cacheerr == %08x\n", reg_val);
+
+		if ((reg_val & 0xc0000000) == 0xc0000000) {
+			pr_err("Decoded c0_cacheerr: FTLB parity error\n");
+		} else {
+			pr_err("Decoded c0_cacheerr: %s cache fault in %s reference.\n",
+			       reg_val & (1<<30) ? "secondary" : "primary",
+			       reg_val & (1<<31) ? "data" : "insn");
+		}
+	} else {
+		pr_err("FTLB error exception\n");
+	}
+	/* Just print the cacheerr bits for now */
+	cache_parity_error();
+}
+
 /*
  * SDBBP EJTAG debug exception handler.
  * We skip the instruction and return to the next instruction.
@@ -1995,6 +2039,7 @@
 	if (cpu_has_fpu && !cpu_has_nofpuex)
 		set_except_vector(15, handle_fpe);
 
+	set_except_vector(16, handle_ftlb);
 	set_except_vector(22, handle_mdmx);
 
 	if (cpu_has_mcheck)
diff --git a/arch/mips/kernel/vpe-cmp.c b/arch/mips/kernel/vpe-cmp.c
new file mode 100644
index 0000000..9268ebc
--- /dev/null
+++ b/arch/mips/kernel/vpe-cmp.c
@@ -0,0 +1,180 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2004, 2005 MIPS Technologies, Inc.  All rights reserved.
+ * Copyright (C) 2013 Imagination Technologies Ltd.
+ */
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/export.h>
+
+#include <asm/vpe.h>
+
+static int major;
+
+void cleanup_tc(struct tc *tc)
+{
+
+}
+
+static ssize_t store_kill(struct device *dev, struct device_attribute *attr,
+			  const char *buf, size_t len)
+{
+	struct vpe *vpe = get_vpe(aprp_cpu_index());
+	struct vpe_notifications *notifier;
+
+	list_for_each_entry(notifier, &vpe->notify, list)
+		notifier->stop(aprp_cpu_index());
+
+	release_progmem(vpe->load_addr);
+	vpe->state = VPE_STATE_UNUSED;
+
+	return len;
+}
+static DEVICE_ATTR(kill, S_IWUSR, NULL, store_kill);
+
+static ssize_t ntcs_show(struct device *cd, struct device_attribute *attr,
+			 char *buf)
+{
+	struct vpe *vpe = get_vpe(aprp_cpu_index());
+
+	return sprintf(buf, "%d\n", vpe->ntcs);
+}
+
+static ssize_t ntcs_store(struct device *dev, struct device_attribute *attr,
+			  const char *buf, size_t len)
+{
+	struct vpe *vpe = get_vpe(aprp_cpu_index());
+	unsigned long new;
+	int ret;
+
+	ret = kstrtoul(buf, 0, &new);
+	if (ret < 0)
+		return ret;
+
+	/* APRP can only reserve one TC in a VPE and no more. */
+	if (new != 1)
+		return -EINVAL;
+
+	vpe->ntcs = new;
+
+	return len;
+}
+static DEVICE_ATTR_RW(ntcs);
+
+static struct attribute *vpe_attrs[] = {
+	&dev_attr_kill.attr,
+	&dev_attr_ntcs.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(vpe);
+
+static void vpe_device_release(struct device *cd)
+{
+	kfree(cd);
+}
+
+static struct class vpe_class = {
+	.name = "vpe",
+	.owner = THIS_MODULE,
+	.dev_release = vpe_device_release,
+	.dev_groups = vpe_groups,
+};
+
+static struct device vpe_device;
+
+int __init vpe_module_init(void)
+{
+	struct vpe *v = NULL;
+	struct tc *t;
+	int err;
+
+	if (!cpu_has_mipsmt) {
+		pr_warn("VPE loader: not a MIPS MT capable processor\n");
+		return -ENODEV;
+	}
+
+	if (num_possible_cpus() - aprp_cpu_index() < 1) {
+		pr_warn("No VPEs reserved for AP/SP, not initialize VPE loader\n"
+			"Pass maxcpus=<n> argument as kernel argument\n");
+		return -ENODEV;
+	}
+
+	major = register_chrdev(0, VPE_MODULE_NAME, &vpe_fops);
+	if (major < 0) {
+		pr_warn("VPE loader: unable to register character device\n");
+		return major;
+	}
+
+	err = class_register(&vpe_class);
+	if (err) {
+		pr_err("vpe_class registration failed\n");
+		goto out_chrdev;
+	}
+
+	device_initialize(&vpe_device);
+	vpe_device.class	= &vpe_class,
+	vpe_device.parent	= NULL,
+	dev_set_name(&vpe_device, "vpe_sp");
+	vpe_device.devt = MKDEV(major, VPE_MODULE_MINOR);
+	err = device_add(&vpe_device);
+	if (err) {
+		pr_err("Adding vpe_device failed\n");
+		goto out_class;
+	}
+
+	t = alloc_tc(aprp_cpu_index());
+	if (!t) {
+		pr_warn("VPE: unable to allocate TC\n");
+		err = -ENOMEM;
+		goto out_dev;
+	}
+
+	/* VPE */
+	v = alloc_vpe(aprp_cpu_index());
+	if (v == NULL) {
+		pr_warn("VPE: unable to allocate VPE\n");
+		kfree(t);
+		err = -ENOMEM;
+		goto out_dev;
+	}
+
+	v->ntcs = 1;
+
+	/* add the tc to the list of this vpe's tc's. */
+	list_add(&t->tc, &v->tc);
+
+	/* TC */
+	t->pvpe = v;	/* set the parent vpe */
+
+	return 0;
+
+out_dev:
+	device_del(&vpe_device);
+
+out_class:
+	class_unregister(&vpe_class);
+
+out_chrdev:
+	unregister_chrdev(major, VPE_MODULE_NAME);
+
+	return err;
+}
+
+void __exit vpe_module_exit(void)
+{
+	struct vpe *v, *n;
+
+	device_del(&vpe_device);
+	class_unregister(&vpe_class);
+	unregister_chrdev(major, VPE_MODULE_NAME);
+
+	/* No locking needed here */
+	list_for_each_entry_safe(v, n, &vpecontrol.vpe_list, list)
+		if (v->state != VPE_STATE_UNUSED)
+			release_vpe(v);
+}
diff --git a/arch/mips/kernel/vpe-mt.c b/arch/mips/kernel/vpe-mt.c
new file mode 100644
index 0000000..949ae0e
--- /dev/null
+++ b/arch/mips/kernel/vpe-mt.c
@@ -0,0 +1,523 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2004, 2005 MIPS Technologies, Inc.  All rights reserved.
+ * Copyright (C) 2013 Imagination Technologies Ltd.
+ */
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/export.h>
+
+#include <asm/mipsregs.h>
+#include <asm/mipsmtregs.h>
+#include <asm/mips_mt.h>
+#include <asm/vpe.h>
+
+static int major;
+
+/* The number of TCs and VPEs physically available on the core */
+static int hw_tcs, hw_vpes;
+
+/* We are prepared so configure and start the VPE... */
+int vpe_run(struct vpe *v)
+{
+	unsigned long flags, val, dmt_flag;
+	struct vpe_notifications *notifier;
+	unsigned int vpeflags;
+	struct tc *t;
+
+	/* check we are the Master VPE */
+	local_irq_save(flags);
+	val = read_c0_vpeconf0();
+	if (!(val & VPECONF0_MVP)) {
+		pr_warn("VPE loader: only Master VPE's are able to config MT\n");
+		local_irq_restore(flags);
+
+		return -1;
+	}
+
+	dmt_flag = dmt();
+	vpeflags = dvpe();
+
+	if (list_empty(&v->tc)) {
+		evpe(vpeflags);
+		emt(dmt_flag);
+		local_irq_restore(flags);
+
+		pr_warn("VPE loader: No TC's associated with VPE %d\n",
+			v->minor);
+
+		return -ENOEXEC;
+	}
+
+	t = list_first_entry(&v->tc, struct tc, tc);
+
+	/* Put MVPE's into 'configuration state' */
+	set_c0_mvpcontrol(MVPCONTROL_VPC);
+
+	settc(t->index);
+
+	/* should check it is halted, and not activated */
+	if ((read_tc_c0_tcstatus() & TCSTATUS_A) ||
+	   !(read_tc_c0_tchalt() & TCHALT_H)) {
+		evpe(vpeflags);
+		emt(dmt_flag);
+		local_irq_restore(flags);
+
+		pr_warn("VPE loader: TC %d is already active!\n",
+			t->index);
+
+		return -ENOEXEC;
+	}
+
+	/*
+	 * Write the address we want it to start running from in the TCPC
+	 * register.
+	 */
+	write_tc_c0_tcrestart((unsigned long)v->__start);
+	write_tc_c0_tccontext((unsigned long)0);
+
+	/*
+	 * Mark the TC as activated, not interrupt exempt and not dynamically
+	 * allocatable
+	 */
+	val = read_tc_c0_tcstatus();
+	val = (val & ~(TCSTATUS_DA | TCSTATUS_IXMT)) | TCSTATUS_A;
+	write_tc_c0_tcstatus(val);
+
+	write_tc_c0_tchalt(read_tc_c0_tchalt() & ~TCHALT_H);
+
+	/*
+	 * The sde-kit passes 'memsize' to __start in $a3, so set something
+	 * here...  Or set $a3 to zero and define DFLT_STACK_SIZE and
+	 * DFLT_HEAP_SIZE when you compile your program
+	 */
+	mttgpr(6, v->ntcs);
+	mttgpr(7, physical_memsize);
+
+	/* set up VPE1 */
+	/*
+	 * bind the TC to VPE 1 as late as possible so we only have the final
+	 * VPE registers to set up, and so an EJTAG probe can trigger on it
+	 */
+	write_tc_c0_tcbind((read_tc_c0_tcbind() & ~TCBIND_CURVPE) | 1);
+
+	write_vpe_c0_vpeconf0(read_vpe_c0_vpeconf0() & ~(VPECONF0_VPA));
+
+	back_to_back_c0_hazard();
+
+	/* Set up the XTC bit in vpeconf0 to point at our tc */
+	write_vpe_c0_vpeconf0((read_vpe_c0_vpeconf0() & ~(VPECONF0_XTC))
+			      | (t->index << VPECONF0_XTC_SHIFT));
+
+	back_to_back_c0_hazard();
+
+	/* enable this VPE */
+	write_vpe_c0_vpeconf0(read_vpe_c0_vpeconf0() | VPECONF0_VPA);
+
+	/* clear out any left overs from a previous program */
+	write_vpe_c0_status(0);
+	write_vpe_c0_cause(0);
+
+	/* take system out of configuration state */
+	clear_c0_mvpcontrol(MVPCONTROL_VPC);
+
+	/*
+	 * SMTC/SMVP kernels manage VPE enable independently,
+	 * but uniprocessor kernels need to turn it on, even
+	 * if that wasn't the pre-dvpe() state.
+	 */
+#ifdef CONFIG_SMP
+	evpe(vpeflags);
+#else
+	evpe(EVPE_ENABLE);
+#endif
+	emt(dmt_flag);
+	local_irq_restore(flags);
+
+	list_for_each_entry(notifier, &v->notify, list)
+		notifier->start(VPE_MODULE_MINOR);
+
+	return 0;
+}
+
+void cleanup_tc(struct tc *tc)
+{
+	unsigned long flags;
+	unsigned int mtflags, vpflags;
+	int tmp;
+
+	local_irq_save(flags);
+	mtflags = dmt();
+	vpflags = dvpe();
+	/* Put MVPE's into 'configuration state' */
+	set_c0_mvpcontrol(MVPCONTROL_VPC);
+
+	settc(tc->index);
+	tmp = read_tc_c0_tcstatus();
+
+	/* mark not allocated and not dynamically allocatable */
+	tmp &= ~(TCSTATUS_A | TCSTATUS_DA);
+	tmp |= TCSTATUS_IXMT;	/* interrupt exempt */
+	write_tc_c0_tcstatus(tmp);
+
+	write_tc_c0_tchalt(TCHALT_H);
+	mips_ihb();
+
+	clear_c0_mvpcontrol(MVPCONTROL_VPC);
+	evpe(vpflags);
+	emt(mtflags);
+	local_irq_restore(flags);
+}
+
+/* module wrapper entry points */
+/* give me a vpe */
+void *vpe_alloc(void)
+{
+	int i;
+	struct vpe *v;
+
+	/* find a vpe */
+	for (i = 1; i < MAX_VPES; i++) {
+		v = get_vpe(i);
+		if (v != NULL) {
+			v->state = VPE_STATE_INUSE;
+			return v;
+		}
+	}
+	return NULL;
+}
+EXPORT_SYMBOL(vpe_alloc);
+
+/* start running from here */
+int vpe_start(void *vpe, unsigned long start)
+{
+	struct vpe *v = vpe;
+
+	v->__start = start;
+	return vpe_run(v);
+}
+EXPORT_SYMBOL(vpe_start);
+
+/* halt it for now */
+int vpe_stop(void *vpe)
+{
+	struct vpe *v = vpe;
+	struct tc *t;
+	unsigned int evpe_flags;
+
+	evpe_flags = dvpe();
+
+	t = list_entry(v->tc.next, struct tc, tc);
+	if (t != NULL) {
+		settc(t->index);
+		write_vpe_c0_vpeconf0(read_vpe_c0_vpeconf0() & ~VPECONF0_VPA);
+	}
+
+	evpe(evpe_flags);
+
+	return 0;
+}
+EXPORT_SYMBOL(vpe_stop);
+
+/* I've done with it thank you */
+int vpe_free(void *vpe)
+{
+	struct vpe *v = vpe;
+	struct tc *t;
+	unsigned int evpe_flags;
+
+	t = list_entry(v->tc.next, struct tc, tc);
+	if (t == NULL)
+		return -ENOEXEC;
+
+	evpe_flags = dvpe();
+
+	/* Put MVPE's into 'configuration state' */
+	set_c0_mvpcontrol(MVPCONTROL_VPC);
+
+	settc(t->index);
+	write_vpe_c0_vpeconf0(read_vpe_c0_vpeconf0() & ~VPECONF0_VPA);
+
+	/* halt the TC */
+	write_tc_c0_tchalt(TCHALT_H);
+	mips_ihb();
+
+	/* mark the TC unallocated */
+	write_tc_c0_tcstatus(read_tc_c0_tcstatus() & ~TCSTATUS_A);
+
+	v->state = VPE_STATE_UNUSED;
+
+	clear_c0_mvpcontrol(MVPCONTROL_VPC);
+	evpe(evpe_flags);
+
+	return 0;
+}
+EXPORT_SYMBOL(vpe_free);
+
+static ssize_t store_kill(struct device *dev, struct device_attribute *attr,
+			  const char *buf, size_t len)
+{
+	struct vpe *vpe = get_vpe(aprp_cpu_index());
+	struct vpe_notifications *notifier;
+
+	list_for_each_entry(notifier, &vpe->notify, list)
+		notifier->stop(aprp_cpu_index());
+
+	release_progmem(vpe->load_addr);
+	cleanup_tc(get_tc(aprp_cpu_index()));
+	vpe_stop(vpe);
+	vpe_free(vpe);
+
+	return len;
+}
+static DEVICE_ATTR(kill, S_IWUSR, NULL, store_kill);
+
+static ssize_t ntcs_show(struct device *cd, struct device_attribute *attr,
+			 char *buf)
+{
+	struct vpe *vpe = get_vpe(aprp_cpu_index());
+
+	return sprintf(buf, "%d\n", vpe->ntcs);
+}
+
+static ssize_t ntcs_store(struct device *dev, struct device_attribute *attr,
+			  const char *buf, size_t len)
+{
+	struct vpe *vpe = get_vpe(aprp_cpu_index());
+	unsigned long new;
+	int ret;
+
+	ret = kstrtoul(buf, 0, &new);
+	if (ret < 0)
+		return ret;
+
+	if (new == 0 || new > (hw_tcs - aprp_cpu_index()))
+		return -EINVAL;
+
+	vpe->ntcs = new;
+
+	return len;
+}
+static DEVICE_ATTR_RW(ntcs);
+
+static struct attribute *vpe_attrs[] = {
+	&dev_attr_kill.attr,
+	&dev_attr_ntcs.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(vpe);
+
+static void vpe_device_release(struct device *cd)
+{
+	kfree(cd);
+}
+
+static struct class vpe_class = {
+	.name = "vpe",
+	.owner = THIS_MODULE,
+	.dev_release = vpe_device_release,
+	.dev_groups = vpe_groups,
+};
+
+static struct device vpe_device;
+
+int __init vpe_module_init(void)
+{
+	unsigned int mtflags, vpflags;
+	unsigned long flags, val;
+	struct vpe *v = NULL;
+	struct tc *t;
+	int tc, err;
+
+	if (!cpu_has_mipsmt) {
+		pr_warn("VPE loader: not a MIPS MT capable processor\n");
+		return -ENODEV;
+	}
+
+	if (vpelimit == 0) {
+		pr_warn("No VPEs reserved for AP/SP, not initialize VPE loader\n"
+			"Pass maxvpes=<n> argument as kernel argument\n");
+
+		return -ENODEV;
+	}
+
+	if (aprp_cpu_index() == 0) {
+		pr_warn("No TCs reserved for AP/SP, not initialize VPE loader\n"
+			"Pass maxtcs=<n> argument as kernel argument\n");
+
+		return -ENODEV;
+	}
+
+	major = register_chrdev(0, VPE_MODULE_NAME, &vpe_fops);
+	if (major < 0) {
+		pr_warn("VPE loader: unable to register character device\n");
+		return major;
+	}
+
+	err = class_register(&vpe_class);
+	if (err) {
+		pr_err("vpe_class registration failed\n");
+		goto out_chrdev;
+	}
+
+	device_initialize(&vpe_device);
+	vpe_device.class	= &vpe_class,
+	vpe_device.parent	= NULL,
+	dev_set_name(&vpe_device, "vpe1");
+	vpe_device.devt = MKDEV(major, VPE_MODULE_MINOR);
+	err = device_add(&vpe_device);
+	if (err) {
+		pr_err("Adding vpe_device failed\n");
+		goto out_class;
+	}
+
+	local_irq_save(flags);
+	mtflags = dmt();
+	vpflags = dvpe();
+
+	/* Put MVPE's into 'configuration state' */
+	set_c0_mvpcontrol(MVPCONTROL_VPC);
+
+	val = read_c0_mvpconf0();
+	hw_tcs = (val & MVPCONF0_PTC) + 1;
+	hw_vpes = ((val & MVPCONF0_PVPE) >> MVPCONF0_PVPE_SHIFT) + 1;
+
+	for (tc = aprp_cpu_index(); tc < hw_tcs; tc++) {
+		/*
+		 * Must re-enable multithreading temporarily or in case we
+		 * reschedule send IPIs or similar we might hang.
+		 */
+		clear_c0_mvpcontrol(MVPCONTROL_VPC);
+		evpe(vpflags);
+		emt(mtflags);
+		local_irq_restore(flags);
+		t = alloc_tc(tc);
+		if (!t) {
+			err = -ENOMEM;
+			goto out_dev;
+		}
+
+		local_irq_save(flags);
+		mtflags = dmt();
+		vpflags = dvpe();
+		set_c0_mvpcontrol(MVPCONTROL_VPC);
+
+		/* VPE's */
+		if (tc < hw_tcs) {
+			settc(tc);
+
+			v = alloc_vpe(tc);
+			if (v == NULL) {
+				pr_warn("VPE: unable to allocate VPE\n");
+				goto out_reenable;
+			}
+
+			v->ntcs = hw_tcs - aprp_cpu_index();
+
+			/* add the tc to the list of this vpe's tc's. */
+			list_add(&t->tc, &v->tc);
+
+			/* deactivate all but vpe0 */
+			if (tc >= aprp_cpu_index()) {
+				unsigned long tmp = read_vpe_c0_vpeconf0();
+
+				tmp &= ~VPECONF0_VPA;
+
+				/* master VPE */
+				tmp |= VPECONF0_MVP;
+				write_vpe_c0_vpeconf0(tmp);
+			}
+
+			/* disable multi-threading with TC's */
+			write_vpe_c0_vpecontrol(read_vpe_c0_vpecontrol() &
+						~VPECONTROL_TE);
+
+			if (tc >= vpelimit) {
+				/*
+				 * Set config to be the same as vpe0,
+				 * particularly kseg0 coherency alg
+				 */
+				write_vpe_c0_config(read_c0_config());
+			}
+		}
+
+		/* TC's */
+		t->pvpe = v;	/* set the parent vpe */
+
+		if (tc >= aprp_cpu_index()) {
+			unsigned long tmp;
+
+			settc(tc);
+
+			/* Any TC that is bound to VPE0 gets left as is - in
+			 * case we are running SMTC on VPE0. A TC that is bound
+			 * to any other VPE gets bound to VPE0, ideally I'd like
+			 * to make it homeless but it doesn't appear to let me
+			 * bind a TC to a non-existent VPE. Which is perfectly
+			 * reasonable.
+			 *
+			 * The (un)bound state is visible to an EJTAG probe so
+			 * may notify GDB...
+			 */
+			tmp = read_tc_c0_tcbind();
+			if (tmp & TCBIND_CURVPE) {
+				/* tc is bound >vpe0 */
+				write_tc_c0_tcbind(tmp & ~TCBIND_CURVPE);
+
+				t->pvpe = get_vpe(0);	/* set the parent vpe */
+			}
+
+			/* halt the TC */
+			write_tc_c0_tchalt(TCHALT_H);
+			mips_ihb();
+
+			tmp = read_tc_c0_tcstatus();
+
+			/* mark not activated and not dynamically allocatable */
+			tmp &= ~(TCSTATUS_A | TCSTATUS_DA);
+			tmp |= TCSTATUS_IXMT;	/* interrupt exempt */
+			write_tc_c0_tcstatus(tmp);
+		}
+	}
+
+out_reenable:
+	/* release config state */
+	clear_c0_mvpcontrol(MVPCONTROL_VPC);
+
+	evpe(vpflags);
+	emt(mtflags);
+	local_irq_restore(flags);
+
+	return 0;
+
+out_dev:
+	device_del(&vpe_device);
+
+out_class:
+	class_unregister(&vpe_class);
+
+out_chrdev:
+	unregister_chrdev(major, VPE_MODULE_NAME);
+
+	return err;
+}
+
+void __exit vpe_module_exit(void)
+{
+	struct vpe *v, *n;
+
+	device_del(&vpe_device);
+	class_unregister(&vpe_class);
+	unregister_chrdev(major, VPE_MODULE_NAME);
+
+	/* No locking needed here */
+	list_for_each_entry_safe(v, n, &vpecontrol.vpe_list, list) {
+		if (v->state != VPE_STATE_UNUSED)
+			release_vpe(v);
+	}
+}
diff --git a/arch/mips/kernel/vpe.c b/arch/mips/kernel/vpe.c
index 2d5c142..11da314 100644
--- a/arch/mips/kernel/vpe.c
+++ b/arch/mips/kernel/vpe.c
@@ -1,37 +1,22 @@
 /*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
  * Copyright (C) 2004, 2005 MIPS Technologies, Inc.  All rights reserved.
+ * Copyright (C) 2013 Imagination Technologies Ltd.
  *
- *  This program is free software; you can distribute it and/or modify it
- *  under the terms of the GNU General Public License (Version 2) as
- *  published by the Free Software Foundation.
- *
- *  This program is distributed in the hope it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with this program; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
- */
-
-/*
- * VPE support module
- *
- * Provides support for loading a MIPS SP program on VPE1.
- * The SP environment is rather simple, no tlb's.  It needs to be relocatable
- * (or partially linked). You should initialise your stack in the startup
- * code. This loader looks for the symbol __start and sets up
- * execution to resume from there. The MIPS SDE kit contains suitable examples.
- *
- * To load and run, simply cat a SP 'program file' to /dev/vpe1.
- * i.e cat spapp >/dev/vpe1.
+ * VPE spport module for loading a MIPS SP program into VPE1. The SP
+ * environment is rather simple since there are no TLBs. It needs
+ * to be relocatable (or partiall linked). Initialize your stack in
+ * the startup-code. The loader looks for the symbol __start and sets
+ * up the execution to resume from there. To load and run, simply do
+ * a cat SP 'binary' to the /dev/vpe1 device.
  */
 #include <linux/kernel.h>
 #include <linux/device.h>
 #include <linux/fs.h>
 #include <linux/init.h>
-#include <asm/uaccess.h>
 #include <linux/slab.h>
 #include <linux/list.h>
 #include <linux/vmalloc.h>
@@ -46,13 +31,10 @@
 #include <asm/mipsmtregs.h>
 #include <asm/cacheflush.h>
 #include <linux/atomic.h>
-#include <asm/cpu.h>
 #include <asm/mips_mt.h>
 #include <asm/processor.h>
 #include <asm/vpe.h>
 
-typedef void *vpe_handle;
-
 #ifndef ARCH_SHF_SMALL
 #define ARCH_SHF_SMALL 0
 #endif
@@ -60,95 +42,15 @@
 /* If this is set, the section belongs in the init part of the module */
 #define INIT_OFFSET_MASK (1UL << (BITS_PER_LONG-1))
 
-/*
- * The number of TCs and VPEs physically available on the core
- */
-static int hw_tcs, hw_vpes;
-static char module_name[] = "vpe";
-static int major;
-static const int minor = 1;	/* fixed for now  */
-
-/* grab the likely amount of memory we will need. */
-#ifdef CONFIG_MIPS_VPE_LOADER_TOM
-#define P_SIZE (2 * 1024 * 1024)
-#else
-/* add an overhead to the max kmalloc size for non-striped symbols/etc */
-#define P_SIZE (256 * 1024)
-#endif
-
-extern unsigned long physical_memsize;
-
-#define MAX_VPES 16
-#define VPE_PATH_MAX 256
-
-enum vpe_state {
-	VPE_STATE_UNUSED = 0,
-	VPE_STATE_INUSE,
-	VPE_STATE_RUNNING
-};
-
-enum tc_state {
-	TC_STATE_UNUSED = 0,
-	TC_STATE_INUSE,
-	TC_STATE_RUNNING,
-	TC_STATE_DYNAMIC
-};
-
-struct vpe {
-	enum vpe_state state;
-
-	/* (device) minor associated with this vpe */
-	int minor;
-
-	/* elfloader stuff */
-	void *load_addr;
-	unsigned long len;
-	char *pbuffer;
-	unsigned long plen;
-	char cwd[VPE_PATH_MAX];
-
-	unsigned long __start;
-
-	/* tc's associated with this vpe */
-	struct list_head tc;
-
-	/* The list of vpe's */
-	struct list_head list;
-
-	/* shared symbol address */
-	void *shared_ptr;
-
-	/* the list of who wants to know when something major happens */
-	struct list_head notify;
-
-	unsigned int ntcs;
-};
-
-struct tc {
-	enum tc_state state;
-	int index;
-
-	struct vpe *pvpe;	/* parent VPE */
-	struct list_head tc;	/* The list of TC's with this VPE */
-	struct list_head list;	/* The global list of tc's */
-};
-
-struct {
-	spinlock_t vpe_list_lock;
-	struct list_head vpe_list;	/* Virtual processing elements */
-	spinlock_t tc_list_lock;
-	struct list_head tc_list;	/* Thread contexts */
-} vpecontrol = {
+struct vpe_control vpecontrol = {
 	.vpe_list_lock	= __SPIN_LOCK_UNLOCKED(vpe_list_lock),
 	.vpe_list	= LIST_HEAD_INIT(vpecontrol.vpe_list),
 	.tc_list_lock	= __SPIN_LOCK_UNLOCKED(tc_list_lock),
 	.tc_list	= LIST_HEAD_INIT(vpecontrol.tc_list)
 };
 
-static void release_progmem(void *ptr);
-
 /* get the vpe associated with this minor */
-static struct vpe *get_vpe(int minor)
+struct vpe *get_vpe(int minor)
 {
 	struct vpe *res, *v;
 
@@ -158,7 +60,7 @@
 	res = NULL;
 	spin_lock(&vpecontrol.vpe_list_lock);
 	list_for_each_entry(v, &vpecontrol.vpe_list, list) {
-		if (v->minor == minor) {
+		if (v->minor == VPE_MODULE_MINOR) {
 			res = v;
 			break;
 		}
@@ -169,7 +71,7 @@
 }
 
 /* get the vpe associated with this minor */
-static struct tc *get_tc(int index)
+struct tc *get_tc(int index)
 {
 	struct tc *res, *t;
 
@@ -187,12 +89,13 @@
 }
 
 /* allocate a vpe and associate it with this minor (or index) */
-static struct vpe *alloc_vpe(int minor)
+struct vpe *alloc_vpe(int minor)
 {
 	struct vpe *v;
 
-	if ((v = kzalloc(sizeof(struct vpe), GFP_KERNEL)) == NULL)
-		return NULL;
+	v = kzalloc(sizeof(struct vpe), GFP_KERNEL);
+	if (v == NULL)
+		goto out;
 
 	INIT_LIST_HEAD(&v->tc);
 	spin_lock(&vpecontrol.vpe_list_lock);
@@ -200,17 +103,19 @@
 	spin_unlock(&vpecontrol.vpe_list_lock);
 
 	INIT_LIST_HEAD(&v->notify);
-	v->minor = minor;
+	v->minor = VPE_MODULE_MINOR;
 
+out:
 	return v;
 }
 
 /* allocate a tc. At startup only tc0 is running, all other can be halted. */
-static struct tc *alloc_tc(int index)
+struct tc *alloc_tc(int index)
 {
 	struct tc *tc;
 
-	if ((tc = kzalloc(sizeof(struct tc), GFP_KERNEL)) == NULL)
+	tc = kzalloc(sizeof(struct tc), GFP_KERNEL);
+	if (tc == NULL)
 		goto out;
 
 	INIT_LIST_HEAD(&tc->tc);
@@ -225,7 +130,7 @@
 }
 
 /* clean up and free everything */
-static void release_vpe(struct vpe *v)
+void release_vpe(struct vpe *v)
 {
 	list_del(&v->list);
 	if (v->load_addr)
@@ -233,28 +138,8 @@
 	kfree(v);
 }
 
-static void __maybe_unused dump_mtregs(void)
-{
-	unsigned long val;
-
-	val = read_c0_config3();
-	printk("config3 0x%lx MT %ld\n", val,
-	       (val & CONFIG3_MT) >> CONFIG3_MT_SHIFT);
-
-	val = read_c0_mvpcontrol();
-	printk("MVPControl 0x%lx, STLB %ld VPC %ld EVP %ld\n", val,
-	       (val & MVPCONTROL_STLB) >> MVPCONTROL_STLB_SHIFT,
-	       (val & MVPCONTROL_VPC) >> MVPCONTROL_VPC_SHIFT,
-	       (val & MVPCONTROL_EVP));
-
-	val = read_c0_mvpconf0();
-	printk("mvpconf0 0x%lx, PVPE %ld PTC %ld M %ld\n", val,
-	       (val & MVPCONF0_PVPE) >> MVPCONF0_PVPE_SHIFT,
-	       val & MVPCONF0_PTC, (val & MVPCONF0_M) >> MVPCONF0_M_SHIFT);
-}
-
-/* Find some VPE program space	*/
-static void *alloc_progmem(unsigned long len)
+/* Find some VPE program space */
+void *alloc_progmem(unsigned long len)
 {
 	void *addr;
 
@@ -273,7 +158,7 @@
 	return addr;
 }
 
-static void release_progmem(void *ptr)
+void release_progmem(void *ptr)
 {
 #ifndef CONFIG_MIPS_VPE_LOADER_TOM
 	kfree(ptr);
@@ -281,7 +166,7 @@
 }
 
 /* Update size with this section: return offset. */
-static long get_offset(unsigned long *size, Elf_Shdr * sechdr)
+static long get_offset(unsigned long *size, Elf_Shdr *sechdr)
 {
 	long ret;
 
@@ -294,8 +179,8 @@
    might -- code, read-only data, read-write data, small data.	Tally
    sizes, and place the offsets into sh_entsize fields: high bit means it
    belongs in init. */
-static void layout_sections(struct module *mod, const Elf_Ehdr * hdr,
-			    Elf_Shdr * sechdrs, const char *secstrings)
+static void layout_sections(struct module *mod, const Elf_Ehdr *hdr,
+			    Elf_Shdr *sechdrs, const char *secstrings)
 {
 	static unsigned long const masks[][2] = {
 		/* NOTE: all executable code must be the first section
@@ -315,7 +200,6 @@
 		for (i = 0; i < hdr->e_shnum; ++i) {
 			Elf_Shdr *s = &sechdrs[i];
 
-			//  || strncmp(secstrings + s->sh_name, ".init", 5) == 0)
 			if ((s->sh_flags & masks[m][0]) != masks[m][0]
 			    || (s->sh_flags & masks[m][1])
 			    || s->sh_entsize != ~0UL)
@@ -330,7 +214,6 @@
 	}
 }
 
-
 /* from module-elf32.c, but subverted a little */
 
 struct mips_hi16 {
@@ -353,20 +236,18 @@
 {
 	int rel;
 
-	if( !(*location & 0xffff) ) {
+	if (!(*location & 0xffff)) {
 		rel = (int)v - gp_addr;
-	}
-	else {
+	} else {
 		/* .sbss + gp(relative) + offset */
 		/* kludge! */
 		rel =  (int)(short)((int)v + gp_offs +
 				    (int)(short)(*location & 0xffff) - gp_addr);
 	}
 
-	if( (rel > 32768) || (rel < -32768) ) {
-		printk(KERN_DEBUG "VPE loader: apply_r_mips_gprel16: "
-		       "relative address 0x%x out of range of gp register\n",
-		       rel);
+	if ((rel > 32768) || (rel < -32768)) {
+		pr_debug("VPE loader: apply_r_mips_gprel16: relative address 0x%x out of range of gp register\n",
+			 rel);
 		return -ENOEXEC;
 	}
 
@@ -380,12 +261,12 @@
 {
 	int rel;
 	rel = (((unsigned int)v - (unsigned int)location));
-	rel >>= 2;		// because the offset is in _instructions_ not bytes.
-	rel -= 1;		// and one instruction less due to the branch delay slot.
+	rel >>= 2; /* because the offset is in _instructions_ not bytes. */
+	rel -= 1;  /* and one instruction less due to the branch delay slot. */
 
-	if( (rel > 32768) || (rel < -32768) ) {
-		printk(KERN_DEBUG "VPE loader: "
-		       "apply_r_mips_pc16: relative address out of range 0x%x\n", rel);
+	if ((rel > 32768) || (rel < -32768)) {
+		pr_debug("VPE loader: apply_r_mips_pc16: relative address out of range 0x%x\n",
+			 rel);
 		return -ENOEXEC;
 	}
 
@@ -406,8 +287,7 @@
 			   Elf32_Addr v)
 {
 	if (v % 4) {
-		printk(KERN_DEBUG "VPE loader: apply_r_mips_26 "
-		       " unaligned relocation\n");
+		pr_debug("VPE loader: apply_r_mips_26: unaligned relocation\n");
 		return -ENOEXEC;
 	}
 
@@ -438,7 +318,7 @@
 	 * the carry we need to add.  Save the information, and let LO16 do the
 	 * actual relocation.
 	 */
-	n = kmalloc(sizeof *n, GFP_KERNEL);
+	n = kmalloc(sizeof(*n), GFP_KERNEL);
 	if (!n)
 		return -ENOMEM;
 
@@ -470,9 +350,7 @@
 			 * The value for the HI16 had best be the same.
 			 */
 			if (v != l->value) {
-				printk(KERN_DEBUG "VPE loader: "
-				       "apply_r_mips_lo16/hi16: \t"
-				       "inconsistent value information\n");
+				pr_debug("VPE loader: apply_r_mips_lo16/hi16: inconsistent value information\n");
 				goto out_free;
 			}
 
@@ -568,20 +446,19 @@
 			+ ELF32_R_SYM(r_info);
 
 		if (!sym->st_value) {
-			printk(KERN_DEBUG "%s: undefined weak symbol %s\n",
-			       me->name, strtab + sym->st_name);
+			pr_debug("%s: undefined weak symbol %s\n",
+				 me->name, strtab + sym->st_name);
 			/* just print the warning, dont barf */
 		}
 
 		v = sym->st_value;
 
 		res = reloc_handlers[ELF32_R_TYPE(r_info)](me, location, v);
-		if( res ) {
+		if (res) {
 			char *r = rstrs[ELF32_R_TYPE(r_info)];
-			printk(KERN_WARNING "VPE loader: .text+0x%x "
-			       "relocation type %s for symbol \"%s\" failed\n",
-			       rel[i].r_offset, r ? r : "UNKNOWN",
-			       strtab + sym->st_name);
+			pr_warn("VPE loader: .text+0x%x relocation type %s for symbol \"%s\" failed\n",
+				rel[i].r_offset, r ? r : "UNKNOWN",
+				strtab + sym->st_name);
 			return res;
 		}
 	}
@@ -596,10 +473,8 @@
 }
 /* end module-elf32.c */
 
-
-
 /* Change all symbols so that sh_value encodes the pointer directly. */
-static void simplify_symbols(Elf_Shdr * sechdrs,
+static void simplify_symbols(Elf_Shdr *sechdrs,
 			    unsigned int symindex,
 			    const char *strtab,
 			    const char *secstrings,
@@ -640,18 +515,16 @@
 			break;
 
 		case SHN_MIPS_SCOMMON:
-			printk(KERN_DEBUG "simplify_symbols: ignoring SHN_MIPS_SCOMMON "
-			       "symbol <%s> st_shndx %d\n", strtab + sym[i].st_name,
-			       sym[i].st_shndx);
-			// .sbss section
+			pr_debug("simplify_symbols: ignoring SHN_MIPS_SCOMMON symbol <%s> st_shndx %d\n",
+				 strtab + sym[i].st_name, sym[i].st_shndx);
+			/* .sbss section */
 			break;
 
 		default:
 			secbase = sechdrs[sym[i].st_shndx].sh_addr;
 
-			if (strncmp(strtab + sym[i].st_name, "_gp", 3) == 0) {
+			if (strncmp(strtab + sym[i].st_name, "_gp", 3) == 0)
 				save_gp_address(secbase, sym[i].st_value);
-			}
 
 			sym[i].st_value += secbase;
 			break;
@@ -660,142 +533,21 @@
 }
 
 #ifdef DEBUG_ELFLOADER
-static void dump_elfsymbols(Elf_Shdr * sechdrs, unsigned int symindex,
+static void dump_elfsymbols(Elf_Shdr *sechdrs, unsigned int symindex,
 			    const char *strtab, struct module *mod)
 {
 	Elf_Sym *sym = (void *)sechdrs[symindex].sh_addr;
 	unsigned int i, n = sechdrs[symindex].sh_size / sizeof(Elf_Sym);
 
-	printk(KERN_DEBUG "dump_elfsymbols: n %d\n", n);
+	pr_debug("dump_elfsymbols: n %d\n", n);
 	for (i = 1; i < n; i++) {
-		printk(KERN_DEBUG " i %d name <%s> 0x%x\n", i,
-		       strtab + sym[i].st_name, sym[i].st_value);
+		pr_debug(" i %d name <%s> 0x%x\n", i, strtab + sym[i].st_name,
+			 sym[i].st_value);
 	}
 }
 #endif
 
-/* We are prepared so configure and start the VPE... */
-static int vpe_run(struct vpe * v)
-{
-	unsigned long flags, val, dmt_flag;
-	struct vpe_notifications *n;
-	unsigned int vpeflags;
-	struct tc *t;
-
-	/* check we are the Master VPE */
-	local_irq_save(flags);
-	val = read_c0_vpeconf0();
-	if (!(val & VPECONF0_MVP)) {
-		printk(KERN_WARNING
-		       "VPE loader: only Master VPE's are allowed to configure MT\n");
-		local_irq_restore(flags);
-
-		return -1;
-	}
-
-	dmt_flag = dmt();
-	vpeflags = dvpe();
-
-	if (list_empty(&v->tc)) {
-		evpe(vpeflags);
-		emt(dmt_flag);
-		local_irq_restore(flags);
-
-		printk(KERN_WARNING
-		       "VPE loader: No TC's associated with VPE %d\n",
-		       v->minor);
-
-		return -ENOEXEC;
-	}
-
-	t = list_first_entry(&v->tc, struct tc, tc);
-
-	/* Put MVPE's into 'configuration state' */
-	set_c0_mvpcontrol(MVPCONTROL_VPC);
-
-	settc(t->index);
-
-	/* should check it is halted, and not activated */
-	if ((read_tc_c0_tcstatus() & TCSTATUS_A) || !(read_tc_c0_tchalt() & TCHALT_H)) {
-		evpe(vpeflags);
-		emt(dmt_flag);
-		local_irq_restore(flags);
-
-		printk(KERN_WARNING "VPE loader: TC %d is already active!\n",
-		       t->index);
-
-		return -ENOEXEC;
-	}
-
-	/* Write the address we want it to start running from in the TCPC register. */
-	write_tc_c0_tcrestart((unsigned long)v->__start);
-	write_tc_c0_tccontext((unsigned long)0);
-
-	/*
-	 * Mark the TC as activated, not interrupt exempt and not dynamically
-	 * allocatable
-	 */
-	val = read_tc_c0_tcstatus();
-	val = (val & ~(TCSTATUS_DA | TCSTATUS_IXMT)) | TCSTATUS_A;
-	write_tc_c0_tcstatus(val);
-
-	write_tc_c0_tchalt(read_tc_c0_tchalt() & ~TCHALT_H);
-
-	/*
-	 * The sde-kit passes 'memsize' to __start in $a3, so set something
-	 * here...  Or set $a3 to zero and define DFLT_STACK_SIZE and
-	 * DFLT_HEAP_SIZE when you compile your program
-	 */
-	mttgpr(6, v->ntcs);
-	mttgpr(7, physical_memsize);
-
-	/* set up VPE1 */
-	/*
-	 * bind the TC to VPE 1 as late as possible so we only have the final
-	 * VPE registers to set up, and so an EJTAG probe can trigger on it
-	 */
-	write_tc_c0_tcbind((read_tc_c0_tcbind() & ~TCBIND_CURVPE) | 1);
-
-	write_vpe_c0_vpeconf0(read_vpe_c0_vpeconf0() & ~(VPECONF0_VPA));
-
-	back_to_back_c0_hazard();
-
-	/* Set up the XTC bit in vpeconf0 to point at our tc */
-	write_vpe_c0_vpeconf0( (read_vpe_c0_vpeconf0() & ~(VPECONF0_XTC))
-			      | (t->index << VPECONF0_XTC_SHIFT));
-
-	back_to_back_c0_hazard();
-
-	/* enable this VPE */
-	write_vpe_c0_vpeconf0(read_vpe_c0_vpeconf0() | VPECONF0_VPA);
-
-	/* clear out any left overs from a previous program */
-	write_vpe_c0_status(0);
-	write_vpe_c0_cause(0);
-
-	/* take system out of configuration state */
-	clear_c0_mvpcontrol(MVPCONTROL_VPC);
-
-	/*
-	 * SMTC/SMVP kernels manage VPE enable independently,
-	 * but uniprocessor kernels need to turn it on, even
-	 * if that wasn't the pre-dvpe() state.
-	 */
-#ifdef CONFIG_SMP
-	evpe(vpeflags);
-#else
-	evpe(EVPE_ENABLE);
-#endif
-	emt(dmt_flag);
-	local_irq_restore(flags);
-
-	list_for_each_entry(n, &v->notify, list)
-		n->start(minor);
-
-	return 0;
-}
-
-static int find_vpe_symbols(struct vpe * v, Elf_Shdr * sechdrs,
+static int find_vpe_symbols(struct vpe *v, Elf_Shdr *sechdrs,
 				      unsigned int symindex, const char *strtab,
 				      struct module *mod)
 {
@@ -803,16 +555,14 @@
 	unsigned int i, n = sechdrs[symindex].sh_size / sizeof(Elf_Sym);
 
 	for (i = 1; i < n; i++) {
-		if (strcmp(strtab + sym[i].st_name, "__start") == 0) {
+		if (strcmp(strtab + sym[i].st_name, "__start") == 0)
 			v->__start = sym[i].st_value;
-		}
 
-		if (strcmp(strtab + sym[i].st_name, "vpe_shared") == 0) {
+		if (strcmp(strtab + sym[i].st_name, "vpe_shared") == 0)
 			v->shared_ptr = (void *)sym[i].st_value;
-		}
 	}
 
-	if ( (v->__start == 0) || (v->shared_ptr == NULL))
+	if ((v->__start == 0) || (v->shared_ptr == NULL))
 		return -1;
 
 	return 0;
@@ -823,14 +573,14 @@
  * contents of the program (p)buffer performing relocatations/etc, free's it
  * when finished.
  */
-static int vpe_elfload(struct vpe * v)
+static int vpe_elfload(struct vpe *v)
 {
 	Elf_Ehdr *hdr;
 	Elf_Shdr *sechdrs;
 	long err = 0;
 	char *secstrings, *strtab = NULL;
 	unsigned int len, i, symindex = 0, strindex = 0, relocate = 0;
-	struct module mod;	// so we can re-use the relocations code
+	struct module mod; /* so we can re-use the relocations code */
 
 	memset(&mod, 0, sizeof(struct module));
 	strcpy(mod.name, "VPE loader");
@@ -844,8 +594,7 @@
 	    || (hdr->e_type != ET_REL && hdr->e_type != ET_EXEC)
 	    || !elf_check_arch(hdr)
 	    || hdr->e_shentsize != sizeof(*sechdrs)) {
-		printk(KERN_WARNING
-		       "VPE loader: program wrong arch or weird elf version\n");
+		pr_warn("VPE loader: program wrong arch or weird elf version\n");
 
 		return -ENOEXEC;
 	}
@@ -854,8 +603,7 @@
 		relocate = 1;
 
 	if (len < hdr->e_shoff + hdr->e_shnum * sizeof(Elf_Shdr)) {
-		printk(KERN_ERR "VPE loader: program length %u truncated\n",
-		       len);
+		pr_err("VPE loader: program length %u truncated\n", len);
 
 		return -ENOEXEC;
 	}
@@ -870,22 +618,24 @@
 
 	if (relocate) {
 		for (i = 1; i < hdr->e_shnum; i++) {
-			if (sechdrs[i].sh_type != SHT_NOBITS
-			    && len < sechdrs[i].sh_offset + sechdrs[i].sh_size) {
-				printk(KERN_ERR "VPE program length %u truncated\n",
+			if ((sechdrs[i].sh_type != SHT_NOBITS) &&
+			    (len < sechdrs[i].sh_offset + sechdrs[i].sh_size)) {
+				pr_err("VPE program length %u truncated\n",
 				       len);
 				return -ENOEXEC;
 			}
 
 			/* Mark all sections sh_addr with their address in the
 			   temporary image. */
-			sechdrs[i].sh_addr = (size_t) hdr + sechdrs[i].sh_offset;
+			sechdrs[i].sh_addr = (size_t) hdr +
+				sechdrs[i].sh_offset;
 
 			/* Internal symbols and strings. */
 			if (sechdrs[i].sh_type == SHT_SYMTAB) {
 				symindex = i;
 				strindex = sechdrs[i].sh_link;
-				strtab = (char *)hdr + sechdrs[strindex].sh_offset;
+				strtab = (char *)hdr +
+					sechdrs[strindex].sh_offset;
 			}
 		}
 		layout_sections(&mod, hdr, sechdrs, secstrings);
@@ -912,8 +662,9 @@
 			/* Update sh_addr to point to copy in image. */
 			sechdrs[i].sh_addr = (unsigned long)dest;
 
-			printk(KERN_DEBUG " section sh_name %s sh_addr 0x%x\n",
-			       secstrings + sechdrs[i].sh_name, sechdrs[i].sh_addr);
+			pr_debug(" section sh_name %s sh_addr 0x%x\n",
+				 secstrings + sechdrs[i].sh_name,
+				 sechdrs[i].sh_addr);
 		}
 
 		/* Fix up syms, so that st_value is a pointer to location. */
@@ -934,17 +685,18 @@
 				continue;
 
 			if (sechdrs[i].sh_type == SHT_REL)
-				err = apply_relocations(sechdrs, strtab, symindex, i,
-							&mod);
+				err = apply_relocations(sechdrs, strtab,
+							symindex, i, &mod);
 			else if (sechdrs[i].sh_type == SHT_RELA)
-				err = apply_relocate_add(sechdrs, strtab, symindex, i,
-							 &mod);
+				err = apply_relocate_add(sechdrs, strtab,
+							 symindex, i, &mod);
 			if (err < 0)
 				return err;
 
 		}
 	} else {
-		struct elf_phdr *phdr = (struct elf_phdr *) ((char *)hdr + hdr->e_phoff);
+		struct elf_phdr *phdr = (struct elf_phdr *)
+						((char *)hdr + hdr->e_phoff);
 
 		for (i = 0; i < hdr->e_phnum; i++) {
 			if (phdr->p_type == PT_LOAD) {
@@ -962,11 +714,15 @@
 			if (sechdrs[i].sh_type == SHT_SYMTAB) {
 				symindex = i;
 				strindex = sechdrs[i].sh_link;
-				strtab = (char *)hdr + sechdrs[strindex].sh_offset;
+				strtab = (char *)hdr +
+					sechdrs[strindex].sh_offset;
 
-				/* mark the symtab's address for when we try to find the
-				   magic symbols */
-				sechdrs[i].sh_addr = (size_t) hdr + sechdrs[i].sh_offset;
+				/*
+				 * mark symtab's address for when we try
+				 * to find the magic symbols
+				 */
+				sechdrs[i].sh_addr = (size_t) hdr +
+					sechdrs[i].sh_offset;
 			}
 		}
 	}
@@ -977,53 +733,19 @@
 
 	if ((find_vpe_symbols(v, sechdrs, symindex, strtab, &mod)) < 0) {
 		if (v->__start == 0) {
-			printk(KERN_WARNING "VPE loader: program does not contain "
-			       "a __start symbol\n");
+			pr_warn("VPE loader: program does not contain a __start symbol\n");
 			return -ENOEXEC;
 		}
 
 		if (v->shared_ptr == NULL)
-			printk(KERN_WARNING "VPE loader: "
-			       "program does not contain vpe_shared symbol.\n"
-			       " Unable to use AMVP (AP/SP) facilities.\n");
+			pr_warn("VPE loader: program does not contain vpe_shared symbol.\n"
+				" Unable to use AMVP (AP/SP) facilities.\n");
 	}
 
-	printk(" elf loaded\n");
+	pr_info(" elf loaded\n");
 	return 0;
 }
 
-static void cleanup_tc(struct tc *tc)
-{
-	unsigned long flags;
-	unsigned int mtflags, vpflags;
-	int tmp;
-
-	local_irq_save(flags);
-	mtflags = dmt();
-	vpflags = dvpe();
-	/* Put MVPE's into 'configuration state' */
-	set_c0_mvpcontrol(MVPCONTROL_VPC);
-
-	settc(tc->index);
-	tmp = read_tc_c0_tcstatus();
-
-	/* mark not allocated and not dynamically allocatable */
-	tmp &= ~(TCSTATUS_A | TCSTATUS_DA);
-	tmp |= TCSTATUS_IXMT;	/* interrupt exempt */
-	write_tc_c0_tcstatus(tmp);
-
-	write_tc_c0_tchalt(TCHALT_H);
-	mips_ihb();
-
-	/* bind it to anything other than VPE1 */
-//	write_tc_c0_tcbind(read_tc_c0_tcbind() & ~TCBIND_CURVPE); // | TCBIND_CURVPE
-
-	clear_c0_mvpcontrol(MVPCONTROL_VPC);
-	evpe(vpflags);
-	emt(mtflags);
-	local_irq_restore(flags);
-}
-
 static int getcwd(char *buff, int size)
 {
 	mm_segment_t old_fs;
@@ -1043,39 +765,39 @@
 static int vpe_open(struct inode *inode, struct file *filp)
 {
 	enum vpe_state state;
-	struct vpe_notifications *not;
+	struct vpe_notifications *notifier;
 	struct vpe *v;
 	int ret;
 
-	if (minor != iminor(inode)) {
+	if (VPE_MODULE_MINOR != iminor(inode)) {
 		/* assume only 1 device at the moment. */
-		pr_warning("VPE loader: only vpe1 is supported\n");
+		pr_warn("VPE loader: only vpe1 is supported\n");
 
 		return -ENODEV;
 	}
 
-	if ((v = get_vpe(tclimit)) == NULL) {
-		pr_warning("VPE loader: unable to get vpe\n");
+	v = get_vpe(aprp_cpu_index());
+	if (v == NULL) {
+		pr_warn("VPE loader: unable to get vpe\n");
 
 		return -ENODEV;
 	}
 
 	state = xchg(&v->state, VPE_STATE_INUSE);
 	if (state != VPE_STATE_UNUSED) {
-		printk(KERN_DEBUG "VPE loader: tc in use dumping regs\n");
+		pr_debug("VPE loader: tc in use dumping regs\n");
 
-		list_for_each_entry(not, &v->notify, list) {
-			not->stop(tclimit);
-		}
+		list_for_each_entry(notifier, &v->notify, list)
+			notifier->stop(aprp_cpu_index());
 
 		release_progmem(v->load_addr);
-		cleanup_tc(get_tc(tclimit));
+		cleanup_tc(get_tc(aprp_cpu_index()));
 	}
 
 	/* this of-course trashes what was there before... */
 	v->pbuffer = vmalloc(P_SIZE);
 	if (!v->pbuffer) {
-		pr_warning("VPE loader: unable to allocate memory\n");
+		pr_warn("VPE loader: unable to allocate memory\n");
 		return -ENOMEM;
 	}
 	v->plen = P_SIZE;
@@ -1085,7 +807,7 @@
 	v->cwd[0] = 0;
 	ret = getcwd(v->cwd, VPE_PATH_MAX);
 	if (ret < 0)
-		printk(KERN_WARNING "VPE loader: open, getcwd returned %d\n", ret);
+		pr_warn("VPE loader: open, getcwd returned %d\n", ret);
 
 	v->shared_ptr = NULL;
 	v->__start = 0;
@@ -1099,20 +821,20 @@
 	Elf_Ehdr *hdr;
 	int ret = 0;
 
-	v = get_vpe(tclimit);
+	v = get_vpe(aprp_cpu_index());
 	if (v == NULL)
 		return -ENODEV;
 
 	hdr = (Elf_Ehdr *) v->pbuffer;
 	if (memcmp(hdr->e_ident, ELFMAG, SELFMAG) == 0) {
-		if (vpe_elfload(v) >= 0) {
+		if ((vpe_elfload(v) >= 0) && vpe_run) {
 			vpe_run(v);
 		} else {
-			printk(KERN_WARNING "VPE loader: ELF load failed.\n");
+			pr_warn("VPE loader: ELF load failed.\n");
 			ret = -ENOEXEC;
 		}
 	} else {
-		printk(KERN_WARNING "VPE loader: only elf files are supported\n");
+		pr_warn("VPE loader: only elf files are supported\n");
 		ret = -ENOEXEC;
 	}
 
@@ -1130,22 +852,22 @@
 	return ret;
 }
 
-static ssize_t vpe_write(struct file *file, const char __user * buffer,
-			 size_t count, loff_t * ppos)
+static ssize_t vpe_write(struct file *file, const char __user *buffer,
+			 size_t count, loff_t *ppos)
 {
 	size_t ret = count;
 	struct vpe *v;
 
-	if (iminor(file_inode(file)) != minor)
+	if (iminor(file_inode(file)) != VPE_MODULE_MINOR)
 		return -ENODEV;
 
-	v = get_vpe(tclimit);
+	v = get_vpe(aprp_cpu_index());
+
 	if (v == NULL)
 		return -ENODEV;
 
 	if ((count + v->len) > v->plen) {
-		printk(KERN_WARNING
-		       "VPE loader: elf size too big. Perhaps strip uneeded symbols\n");
+		pr_warn("VPE loader: elf size too big. Perhaps strip uneeded symbols\n");
 		return -ENOMEM;
 	}
 
@@ -1157,7 +879,7 @@
 	return ret;
 }
 
-static const struct file_operations vpe_fops = {
+const struct file_operations vpe_fops = {
 	.owner = THIS_MODULE,
 	.open = vpe_open,
 	.release = vpe_release,
@@ -1165,396 +887,40 @@
 	.llseek = noop_llseek,
 };
 
-/* module wrapper entry points */
-/* give me a vpe */
-vpe_handle vpe_alloc(void)
-{
-	int i;
-	struct vpe *v;
-
-	/* find a vpe */
-	for (i = 1; i < MAX_VPES; i++) {
-		if ((v = get_vpe(i)) != NULL) {
-			v->state = VPE_STATE_INUSE;
-			return v;
-		}
-	}
-	return NULL;
-}
-
-EXPORT_SYMBOL(vpe_alloc);
-
-/* start running from here */
-int vpe_start(vpe_handle vpe, unsigned long start)
-{
-	struct vpe *v = vpe;
-
-	v->__start = start;
-	return vpe_run(v);
-}
-
-EXPORT_SYMBOL(vpe_start);
-
-/* halt it for now */
-int vpe_stop(vpe_handle vpe)
-{
-	struct vpe *v = vpe;
-	struct tc *t;
-	unsigned int evpe_flags;
-
-	evpe_flags = dvpe();
-
-	if ((t = list_entry(v->tc.next, struct tc, tc)) != NULL) {
-
-		settc(t->index);
-		write_vpe_c0_vpeconf0(read_vpe_c0_vpeconf0() & ~VPECONF0_VPA);
-	}
-
-	evpe(evpe_flags);
-
-	return 0;
-}
-
-EXPORT_SYMBOL(vpe_stop);
-
-/* I've done with it thank you */
-int vpe_free(vpe_handle vpe)
-{
-	struct vpe *v = vpe;
-	struct tc *t;
-	unsigned int evpe_flags;
-
-	if ((t = list_entry(v->tc.next, struct tc, tc)) == NULL) {
-		return -ENOEXEC;
-	}
-
-	evpe_flags = dvpe();
-
-	/* Put MVPE's into 'configuration state' */
-	set_c0_mvpcontrol(MVPCONTROL_VPC);
-
-	settc(t->index);
-	write_vpe_c0_vpeconf0(read_vpe_c0_vpeconf0() & ~VPECONF0_VPA);
-
-	/* halt the TC */
-	write_tc_c0_tchalt(TCHALT_H);
-	mips_ihb();
-
-	/* mark the TC unallocated */
-	write_tc_c0_tcstatus(read_tc_c0_tcstatus() & ~TCSTATUS_A);
-
-	v->state = VPE_STATE_UNUSED;
-
-	clear_c0_mvpcontrol(MVPCONTROL_VPC);
-	evpe(evpe_flags);
-
-	return 0;
-}
-
-EXPORT_SYMBOL(vpe_free);
-
 void *vpe_get_shared(int index)
 {
-	struct vpe *v;
+	struct vpe *v = get_vpe(index);
 
-	if ((v = get_vpe(index)) == NULL)
+	if (v == NULL)
 		return NULL;
 
 	return v->shared_ptr;
 }
-
 EXPORT_SYMBOL(vpe_get_shared);
 
 int vpe_notify(int index, struct vpe_notifications *notify)
 {
-	struct vpe *v;
+	struct vpe *v = get_vpe(index);
 
-	if ((v = get_vpe(index)) == NULL)
+	if (v == NULL)
 		return -1;
 
 	list_add(&notify->list, &v->notify);
 	return 0;
 }
-
 EXPORT_SYMBOL(vpe_notify);
 
 char *vpe_getcwd(int index)
 {
-	struct vpe *v;
+	struct vpe *v = get_vpe(index);
 
-	if ((v = get_vpe(index)) == NULL)
+	if (v == NULL)
 		return NULL;
 
 	return v->cwd;
 }
-
 EXPORT_SYMBOL(vpe_getcwd);
 
-static ssize_t store_kill(struct device *dev, struct device_attribute *attr,
-			  const char *buf, size_t len)
-{
-	struct vpe *vpe = get_vpe(tclimit);
-	struct vpe_notifications *not;
-
-	list_for_each_entry(not, &vpe->notify, list) {
-		not->stop(tclimit);
-	}
-
-	release_progmem(vpe->load_addr);
-	cleanup_tc(get_tc(tclimit));
-	vpe_stop(vpe);
-	vpe_free(vpe);
-
-	return len;
-}
-static DEVICE_ATTR(kill, S_IWUSR, NULL, store_kill);
-
-static ssize_t ntcs_show(struct device *cd, struct device_attribute *attr,
-			 char *buf)
-{
-	struct vpe *vpe = get_vpe(tclimit);
-
-	return sprintf(buf, "%d\n", vpe->ntcs);
-}
-
-static ssize_t ntcs_store(struct device *dev, struct device_attribute *attr,
-			  const char *buf, size_t len)
-{
-	struct vpe *vpe = get_vpe(tclimit);
-	unsigned long new;
-	char *endp;
-
-	new = simple_strtoul(buf, &endp, 0);
-	if (endp == buf)
-		goto out_einval;
-
-	if (new == 0 || new > (hw_tcs - tclimit))
-		goto out_einval;
-
-	vpe->ntcs = new;
-
-	return len;
-
-out_einval:
-	return -EINVAL;
-}
-static DEVICE_ATTR_RW(ntcs);
-
-static struct attribute *vpe_attrs[] = {
-	&dev_attr_kill.attr,
-	&dev_attr_ntcs.attr,
-	NULL,
-};
-ATTRIBUTE_GROUPS(vpe);
-
-static void vpe_device_release(struct device *cd)
-{
-	kfree(cd);
-}
-
-struct class vpe_class = {
-	.name = "vpe",
-	.owner = THIS_MODULE,
-	.dev_release = vpe_device_release,
-	.dev_groups = vpe_groups,
-};
-
-struct device vpe_device;
-
-static int __init vpe_module_init(void)
-{
-	unsigned int mtflags, vpflags;
-	unsigned long flags, val;
-	struct vpe *v = NULL;
-	struct tc *t;
-	int tc, err;
-
-	if (!cpu_has_mipsmt) {
-		printk("VPE loader: not a MIPS MT capable processor\n");
-		return -ENODEV;
-	}
-
-	if (vpelimit == 0) {
-		printk(KERN_WARNING "No VPEs reserved for AP/SP, not "
-		       "initializing VPE loader.\nPass maxvpes=<n> argument as "
-		       "kernel argument\n");
-
-		return -ENODEV;
-	}
-
-	if (tclimit == 0) {
-		printk(KERN_WARNING "No TCs reserved for AP/SP, not "
-		       "initializing VPE loader.\nPass maxtcs=<n> argument as "
-		       "kernel argument\n");
-
-		return -ENODEV;
-	}
-
-	major = register_chrdev(0, module_name, &vpe_fops);
-	if (major < 0) {
-		printk("VPE loader: unable to register character device\n");
-		return major;
-	}
-
-	err = class_register(&vpe_class);
-	if (err) {
-		printk(KERN_ERR "vpe_class registration failed\n");
-		goto out_chrdev;
-	}
-
-	device_initialize(&vpe_device);
-	vpe_device.class	= &vpe_class,
-	vpe_device.parent	= NULL,
-	dev_set_name(&vpe_device, "vpe1");
-	vpe_device.devt = MKDEV(major, minor);
-	err = device_add(&vpe_device);
-	if (err) {
-		printk(KERN_ERR "Adding vpe_device failed\n");
-		goto out_class;
-	}
-
-	local_irq_save(flags);
-	mtflags = dmt();
-	vpflags = dvpe();
-
-	/* Put MVPE's into 'configuration state' */
-	set_c0_mvpcontrol(MVPCONTROL_VPC);
-
-	/* dump_mtregs(); */
-
-	val = read_c0_mvpconf0();
-	hw_tcs = (val & MVPCONF0_PTC) + 1;
-	hw_vpes = ((val & MVPCONF0_PVPE) >> MVPCONF0_PVPE_SHIFT) + 1;
-
-	for (tc = tclimit; tc < hw_tcs; tc++) {
-		/*
-		 * Must re-enable multithreading temporarily or in case we
-		 * reschedule send IPIs or similar we might hang.
-		 */
-		clear_c0_mvpcontrol(MVPCONTROL_VPC);
-		evpe(vpflags);
-		emt(mtflags);
-		local_irq_restore(flags);
-		t = alloc_tc(tc);
-		if (!t) {
-			err = -ENOMEM;
-			goto out;
-		}
-
-		local_irq_save(flags);
-		mtflags = dmt();
-		vpflags = dvpe();
-		set_c0_mvpcontrol(MVPCONTROL_VPC);
-
-		/* VPE's */
-		if (tc < hw_tcs) {
-			settc(tc);
-
-			if ((v = alloc_vpe(tc)) == NULL) {
-				printk(KERN_WARNING "VPE: unable to allocate VPE\n");
-
-				goto out_reenable;
-			}
-
-			v->ntcs = hw_tcs - tclimit;
-
-			/* add the tc to the list of this vpe's tc's. */
-			list_add(&t->tc, &v->tc);
-
-			/* deactivate all but vpe0 */
-			if (tc >= tclimit) {
-				unsigned long tmp = read_vpe_c0_vpeconf0();
-
-				tmp &= ~VPECONF0_VPA;
-
-				/* master VPE */
-				tmp |= VPECONF0_MVP;
-				write_vpe_c0_vpeconf0(tmp);
-			}
-
-			/* disable multi-threading with TC's */
-			write_vpe_c0_vpecontrol(read_vpe_c0_vpecontrol() & ~VPECONTROL_TE);
-
-			if (tc >= vpelimit) {
-				/*
-				 * Set config to be the same as vpe0,
-				 * particularly kseg0 coherency alg
-				 */
-				write_vpe_c0_config(read_c0_config());
-			}
-		}
-
-		/* TC's */
-		t->pvpe = v;	/* set the parent vpe */
-
-		if (tc >= tclimit) {
-			unsigned long tmp;
-
-			settc(tc);
-
-			/* Any TC that is bound to VPE0 gets left as is - in case
-			   we are running SMTC on VPE0. A TC that is bound to any
-			   other VPE gets bound to VPE0, ideally I'd like to make
-			   it homeless but it doesn't appear to let me bind a TC
-			   to a non-existent VPE. Which is perfectly reasonable.
-
-			   The (un)bound state is visible to an EJTAG probe so may
-			   notify GDB...
-			*/
-
-			if (((tmp = read_tc_c0_tcbind()) & TCBIND_CURVPE)) {
-				/* tc is bound >vpe0 */
-				write_tc_c0_tcbind(tmp & ~TCBIND_CURVPE);
-
-				t->pvpe = get_vpe(0);	/* set the parent vpe */
-			}
-
-			/* halt the TC */
-			write_tc_c0_tchalt(TCHALT_H);
-			mips_ihb();
-
-			tmp = read_tc_c0_tcstatus();
-
-			/* mark not activated and not dynamically allocatable */
-			tmp &= ~(TCSTATUS_A | TCSTATUS_DA);
-			tmp |= TCSTATUS_IXMT;	/* interrupt exempt */
-			write_tc_c0_tcstatus(tmp);
-		}
-	}
-
-out_reenable:
-	/* release config state */
-	clear_c0_mvpcontrol(MVPCONTROL_VPC);
-
-	evpe(vpflags);
-	emt(mtflags);
-	local_irq_restore(flags);
-
-	return 0;
-
-out_class:
-	class_unregister(&vpe_class);
-out_chrdev:
-	unregister_chrdev(major, module_name);
-
-out:
-	return err;
-}
-
-static void __exit vpe_module_exit(void)
-{
-	struct vpe *v, *n;
-
-	device_del(&vpe_device);
-	unregister_chrdev(major, module_name);
-
-	/* No locking needed here */
-	list_for_each_entry_safe(v, n, &vpecontrol.vpe_list, list) {
-		if (v->state != VPE_STATE_UNUSED)
-			release_vpe(v);
-	}
-}
-
 module_init(vpe_module_init);
 module_exit(vpe_module_exit);
 MODULE_DESCRIPTION("MIPS VPE Loader");
diff --git a/arch/mips/kvm/kvm_mips.c b/arch/mips/kvm/kvm_mips.c
index 73b3482..da5186f 100644
--- a/arch/mips/kvm/kvm_mips.c
+++ b/arch/mips/kvm/kvm_mips.c
@@ -1001,7 +1001,6 @@
 	hrtimer_init(&vcpu->arch.comparecount_timer, CLOCK_MONOTONIC,
 		     HRTIMER_MODE_REL);
 	vcpu->arch.comparecount_timer.function = kvm_mips_comparecount_wakeup;
-	kvm_mips_init_shadow_tlb(vcpu);
 	return 0;
 }
 
diff --git a/arch/mips/kvm/kvm_tlb.c b/arch/mips/kvm/kvm_tlb.c
index c777dd3..50ab9c4 100644
--- a/arch/mips/kvm/kvm_tlb.c
+++ b/arch/mips/kvm/kvm_tlb.c
@@ -10,7 +10,6 @@
 * Authors: Sanjay Lal <sanjayl@kymasys.com>
 */
 
-#include <linux/init.h>
 #include <linux/sched.h>
 #include <linux/smp.h>
 #include <linux/mm.h>
@@ -25,6 +24,7 @@
 #include <asm/mmu_context.h>
 #include <asm/pgtable.h>
 #include <asm/cacheflush.h>
+#include <asm/tlb.h>
 
 #undef CONFIG_MIPS_MT
 #include <asm/r4kcache.h>
@@ -35,9 +35,6 @@
 
 #define PRIx64 "llx"
 
-/* Use VZ EntryHi.EHINV to invalidate TLB entries */
-#define UNIQUE_ENTRYHI(idx) (CKSEG0 + ((idx) << (PAGE_SHIFT + 1)))
-
 atomic_t kvm_mips_instance;
 EXPORT_SYMBOL(kvm_mips_instance);
 
@@ -147,30 +144,6 @@
 	}
 }
 
-void kvm_mips_dump_shadow_tlbs(struct kvm_vcpu *vcpu)
-{
-	int i;
-	volatile struct kvm_mips_tlb tlb;
-
-	printk("Shadow TLBs:\n");
-	for (i = 0; i < KVM_MIPS_GUEST_TLB_SIZE; i++) {
-		tlb = vcpu->arch.shadow_tlb[smp_processor_id()][i];
-		printk("TLB%c%3d Hi 0x%08lx ",
-		       (tlb.tlb_lo0 | tlb.tlb_lo1) & MIPS3_PG_V ? ' ' : '*',
-		       i, tlb.tlb_hi);
-		printk("Lo0=0x%09" PRIx64 " %c%c attr %lx ",
-		       (uint64_t) mips3_tlbpfn_to_paddr(tlb.tlb_lo0),
-		       (tlb.tlb_lo0 & MIPS3_PG_D) ? 'D' : ' ',
-		       (tlb.tlb_lo0 & MIPS3_PG_G) ? 'G' : ' ',
-		       (tlb.tlb_lo0 >> 3) & 7);
-		printk("Lo1=0x%09" PRIx64 " %c%c attr %lx sz=%lx\n",
-		       (uint64_t) mips3_tlbpfn_to_paddr(tlb.tlb_lo1),
-		       (tlb.tlb_lo1 & MIPS3_PG_D) ? 'D' : ' ',
-		       (tlb.tlb_lo1 & MIPS3_PG_G) ? 'G' : ' ',
-		       (tlb.tlb_lo1 >> 3) & 7, tlb.tlb_mask);
-	}
-}
-
 static int kvm_mips_map_page(struct kvm *kvm, gfn_t gfn)
 {
 	int srcu_idx, err = 0;
@@ -657,70 +630,6 @@
 	cpu_context(cpu, mm) = asid_cache(cpu) = asid;
 }
 
-void kvm_shadow_tlb_put(struct kvm_vcpu *vcpu)
-{
-	unsigned long flags;
-	unsigned long old_entryhi;
-	unsigned long old_pagemask;
-	int entry = 0;
-	int cpu = smp_processor_id();
-
-	local_irq_save(flags);
-
-	old_entryhi = read_c0_entryhi();
-	old_pagemask = read_c0_pagemask();
-
-	for (entry = 0; entry < current_cpu_data.tlbsize; entry++) {
-		write_c0_index(entry);
-		mtc0_tlbw_hazard();
-		tlb_read();
-		tlbw_use_hazard();
-
-		vcpu->arch.shadow_tlb[cpu][entry].tlb_hi = read_c0_entryhi();
-		vcpu->arch.shadow_tlb[cpu][entry].tlb_lo0 = read_c0_entrylo0();
-		vcpu->arch.shadow_tlb[cpu][entry].tlb_lo1 = read_c0_entrylo1();
-		vcpu->arch.shadow_tlb[cpu][entry].tlb_mask = read_c0_pagemask();
-	}
-
-	write_c0_entryhi(old_entryhi);
-	write_c0_pagemask(old_pagemask);
-	mtc0_tlbw_hazard();
-
-	local_irq_restore(flags);
-
-}
-
-void kvm_shadow_tlb_load(struct kvm_vcpu *vcpu)
-{
-	unsigned long flags;
-	unsigned long old_ctx;
-	int entry;
-	int cpu = smp_processor_id();
-
-	local_irq_save(flags);
-
-	old_ctx = read_c0_entryhi();
-
-	for (entry = 0; entry < current_cpu_data.tlbsize; entry++) {
-		write_c0_entryhi(vcpu->arch.shadow_tlb[cpu][entry].tlb_hi);
-		mtc0_tlbw_hazard();
-		write_c0_entrylo0(vcpu->arch.shadow_tlb[cpu][entry].tlb_lo0);
-		write_c0_entrylo1(vcpu->arch.shadow_tlb[cpu][entry].tlb_lo1);
-
-		write_c0_index(entry);
-		mtc0_tlbw_hazard();
-
-		tlb_write_indexed();
-		tlbw_use_hazard();
-	}
-
-	tlbw_use_hazard();
-	write_c0_entryhi(old_ctx);
-	mtc0_tlbw_hazard();
-	local_irq_restore(flags);
-}
-
-
 void kvm_local_flush_tlb_all(void)
 {
 	unsigned long flags;
@@ -749,30 +658,6 @@
 	local_irq_restore(flags);
 }
 
-void kvm_mips_init_shadow_tlb(struct kvm_vcpu *vcpu)
-{
-	int cpu, entry;
-
-	for_each_possible_cpu(cpu) {
-		for (entry = 0; entry < current_cpu_data.tlbsize; entry++) {
-			vcpu->arch.shadow_tlb[cpu][entry].tlb_hi =
-			    UNIQUE_ENTRYHI(entry);
-			vcpu->arch.shadow_tlb[cpu][entry].tlb_lo0 = 0x0;
-			vcpu->arch.shadow_tlb[cpu][entry].tlb_lo1 = 0x0;
-			vcpu->arch.shadow_tlb[cpu][entry].tlb_mask =
-			    read_c0_pagemask();
-#ifdef DEBUG
-			kvm_debug
-			    ("shadow_tlb[%d][%d]: tlb_hi: %#lx, lo0: %#lx, lo1: %#lx\n",
-			     cpu, entry,
-			     vcpu->arch.shadow_tlb[cpu][entry].tlb_hi,
-			     vcpu->arch.shadow_tlb[cpu][entry].tlb_lo0,
-			     vcpu->arch.shadow_tlb[cpu][entry].tlb_lo1);
-#endif
-		}
-	}
-}
-
 /* Restore ASID once we are scheduled back after preemption */
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
@@ -810,14 +695,6 @@
 			 vcpu->arch.last_sched_cpu, cpu, vcpu->vcpu_id);
 	}
 
-	/* Only reload shadow host TLB if new ASIDs haven't been allocated */
-#if 0
-	if ((atomic_read(&kvm_mips_instance) > 1) && !newasid) {
-		kvm_mips_flush_host_tlb(0);
-		kvm_shadow_tlb_load(vcpu);
-	}
-#endif
-
 	if (!newasid) {
 		/* If we preempted while the guest was executing, then reload the pre-empted ASID */
 		if (current->flags & PF_VCPU) {
@@ -863,12 +740,6 @@
 	vcpu->arch.preempt_entryhi = read_c0_entryhi();
 	vcpu->arch.last_sched_cpu = cpu;
 
-#if 0
-	if ((atomic_read(&kvm_mips_instance) > 1)) {
-		kvm_shadow_tlb_put(vcpu);
-	}
-#endif
-
 	if (((cpu_context(cpu, current->mm) ^ asid_cache(cpu)) &
 	     ASID_VERSION_MASK)) {
 		kvm_debug("%s: Dropping MMU Context:  %#lx\n", __func__,
@@ -930,10 +801,8 @@
 }
 
 EXPORT_SYMBOL(kvm_local_flush_tlb_all);
-EXPORT_SYMBOL(kvm_shadow_tlb_put);
 EXPORT_SYMBOL(kvm_mips_handle_mapped_seg_tlb_fault);
 EXPORT_SYMBOL(kvm_mips_handle_commpage_tlb_fault);
-EXPORT_SYMBOL(kvm_mips_init_shadow_tlb);
 EXPORT_SYMBOL(kvm_mips_dump_host_tlbs);
 EXPORT_SYMBOL(kvm_mips_handle_kseg0_tlb_fault);
 EXPORT_SYMBOL(kvm_mips_host_tlb_lookup);
@@ -941,8 +810,6 @@
 EXPORT_SYMBOL(kvm_mips_guest_tlb_lookup);
 EXPORT_SYMBOL(kvm_mips_host_tlb_inv);
 EXPORT_SYMBOL(kvm_mips_translate_guest_kseg0_to_hpa);
-EXPORT_SYMBOL(kvm_shadow_tlb_load);
-EXPORT_SYMBOL(kvm_mips_dump_shadow_tlbs);
 EXPORT_SYMBOL(kvm_mips_dump_guest_tlbs);
 EXPORT_SYMBOL(kvm_get_inst);
 EXPORT_SYMBOL(kvm_arch_vcpu_load);
diff --git a/arch/mips/lantiq/xway/clk.c b/arch/mips/lantiq/xway/clk.c
index 1ab576d..8750dc0 100644
--- a/arch/mips/lantiq/xway/clk.c
+++ b/arch/mips/lantiq/xway/clk.c
@@ -8,7 +8,6 @@
 
 #include <linux/io.h>
 #include <linux/export.h>
-#include <linux/init.h>
 #include <linux/clk.h>
 
 #include <asm/time.h>
diff --git a/arch/mips/lantiq/xway/dma.c b/arch/mips/lantiq/xway/dma.c
index 08f7ebd..78a91fa 100644
--- a/arch/mips/lantiq/xway/dma.c
+++ b/arch/mips/lantiq/xway/dma.c
@@ -220,10 +220,6 @@
 	int i;
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!res)
-		panic("Failed to get dma resource");
-
-	/* remap dma register range */
 	ltq_dma_membase = devm_ioremap_resource(&pdev->dev, res);
 	if (IS_ERR(ltq_dma_membase))
 		panic("Failed to remap dma resource");
diff --git a/arch/mips/lasat/at93c.c b/arch/mips/lasat/at93c.c
index 793e234..942f32b 100644
--- a/arch/mips/lasat/at93c.c
+++ b/arch/mips/lasat/at93c.c
@@ -8,7 +8,6 @@
 #include <linux/delay.h>
 #include <asm/lasat/lasat.h>
 #include <linux/module.h>
-#include <linux/init.h>
 
 #include "at93c.h"
 
diff --git a/arch/mips/lasat/picvue.c b/arch/mips/lasat/picvue.c
index 7eb3348..d613b97 100644
--- a/arch/mips/lasat/picvue.c
+++ b/arch/mips/lasat/picvue.c
@@ -9,7 +9,6 @@
 #include <asm/bootinfo.h>
 #include <asm/lasat/lasat.h>
 #include <linux/module.h>
-#include <linux/init.h>
 #include <linux/errno.h>
 #include <linux/string.h>
 
diff --git a/arch/mips/lib/uncached.c b/arch/mips/lib/uncached.c
index d8522f8..09d5dee 100644
--- a/arch/mips/lib/uncached.c
+++ b/arch/mips/lib/uncached.c
@@ -8,7 +8,6 @@
  *	Author: Maciej W. Rozycki <macro@mips.com>
  */
 
-#include <linux/init.h>
 
 #include <asm/addrspace.h>
 #include <asm/bug.h>
diff --git a/arch/mips/loongson/lemote-2f/clock.c b/arch/mips/loongson/lemote-2f/clock.c
index 4dc2f5f..aed32b8 100644
--- a/arch/mips/loongson/lemote-2f/clock.c
+++ b/arch/mips/loongson/lemote-2f/clock.c
@@ -10,7 +10,6 @@
 #include <linux/cpufreq.h>
 #include <linux/errno.h>
 #include <linux/export.h>
-#include <linux/init.h>
 #include <linux/list.h>
 #include <linux/mutex.h>
 #include <linux/spinlock.h>
diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c
index efe0088..506925b 100644
--- a/arch/mips/math-emu/cp1emu.c
+++ b/arch/mips/math-emu/cp1emu.c
@@ -417,14 +417,20 @@
 			case mm_mtc1_op:
 			case mm_cfc1_op:
 			case mm_ctc1_op:
+			case mm_mfhc1_op:
+			case mm_mthc1_op:
 				if (insn.mm_fp1_format.op == mm_mfc1_op)
 					op = mfc_op;
 				else if (insn.mm_fp1_format.op == mm_mtc1_op)
 					op = mtc_op;
 				else if (insn.mm_fp1_format.op == mm_cfc1_op)
 					op = cfc_op;
-				else
+				else if (insn.mm_fp1_format.op == mm_ctc1_op)
 					op = ctc_op;
+				else if (insn.mm_fp1_format.op == mm_mfhc1_op)
+					op = mfhc_op;
+				else
+					op = mthc_op;
 				mips32_insn.fp1_format.opcode = cop1_op;
 				mips32_insn.fp1_format.op = op;
 				mips32_insn.fp1_format.rt =
@@ -853,20 +859,20 @@
  * In the Linux kernel, we support selection of FPR format on the
  * basis of the Status.FR bit.	If an FPU is not present, the FR bit
  * is hardwired to zero, which would imply a 32-bit FPU even for
- * 64-bit CPUs so we rather look at TIF_32BIT_REGS.
+ * 64-bit CPUs so we rather look at TIF_32BIT_FPREGS.
  * FPU emu is slow and bulky and optimizing this function offers fairly
  * sizeable benefits so we try to be clever and make this function return
  * a constant whenever possible, that is on 64-bit kernels without O32
- * compatibility enabled and on 32-bit kernels.
+ * compatibility enabled and on 32-bit without 64-bit FPU support.
  */
 static inline int cop1_64bit(struct pt_regs *xcp)
 {
 #if defined(CONFIG_64BIT) && !defined(CONFIG_MIPS32_O32)
 	return 1;
-#elif defined(CONFIG_64BIT) && defined(CONFIG_MIPS32_O32)
-	return !test_thread_flag(TIF_32BIT_REGS);
-#else
+#elif defined(CONFIG_32BIT) && !defined(CONFIG_MIPS_O32_FP64_SUPPORT)
 	return 0;
+#else
+	return !test_thread_flag(TIF_32BIT_FPREGS);
 #endif
 }
 
@@ -878,6 +884,10 @@
 			ctx->fpr[x & ~1] >> 32 << 32 | (u32)(si) : \
 			ctx->fpr[x & ~1] << 32 >> 32 | (u64)(si) << 32)
 
+#define SIFROMHREG(si, x)	((si) = (int)(ctx->fpr[x] >> 32))
+#define SITOHREG(si, x)		(ctx->fpr[x] = \
+				ctx->fpr[x] << 32 >> 32 | (u64)(si) << 32)
+
 #define DIFROMREG(di, x) ((di) = ctx->fpr[x & ~(cop1_64bit(xcp) == 0)])
 #define DITOREG(di, x)	(ctx->fpr[x & ~(cop1_64bit(xcp) == 0)] = (di))
 
@@ -1055,6 +1065,25 @@
 			break;
 #endif
 
+		case mfhc_op:
+			if (!cpu_has_mips_r2)
+				goto sigill;
+
+			/* copregister rd -> gpr[rt] */
+			if (MIPSInst_RT(ir) != 0) {
+				SIFROMHREG(xcp->regs[MIPSInst_RT(ir)],
+					MIPSInst_RD(ir));
+			}
+			break;
+
+		case mthc_op:
+			if (!cpu_has_mips_r2)
+				goto sigill;
+
+			/* copregister rd <- gpr[rt] */
+			SITOHREG(xcp->regs[MIPSInst_RT(ir)], MIPSInst_RD(ir));
+			break;
+
 		case mfc_op:
 			/* copregister rd -> gpr[rt] */
 			if (MIPSInst_RT(ir) != 0) {
@@ -1263,6 +1292,7 @@
 #endif
 
 	default:
+sigill:
 		return SIGILL;
 	}
 
diff --git a/arch/mips/math-emu/kernel_linkage.c b/arch/mips/math-emu/kernel_linkage.c
index 1c58657..3aeae07 100644
--- a/arch/mips/math-emu/kernel_linkage.c
+++ b/arch/mips/math-emu/kernel_linkage.c
@@ -89,8 +89,9 @@
 {
 	int i;
 	int err = 0;
+	int inc = test_thread_flag(TIF_32BIT_FPREGS) ? 2 : 1;
 
-	for (i = 0; i < 32; i+=2) {
+	for (i = 0; i < 32; i += inc) {
 		err |=
 		    __put_user(current->thread.fpu.fpr[i], &sc->sc_fpregs[i]);
 	}
@@ -103,8 +104,9 @@
 {
 	int i;
 	int err = 0;
+	int inc = test_thread_flag(TIF_32BIT_FPREGS) ? 2 : 1;
 
-	for (i = 0; i < 32; i+=2) {
+	for (i = 0; i < 32; i += inc) {
 		err |=
 		    __get_user(current->thread.fpu.fpr[i], &sc->sc_fpregs[i]);
 	}
diff --git a/arch/mips/mm/c-octeon.c b/arch/mips/mm/c-octeon.c
index c8efdb5..f41a5c5 100644
--- a/arch/mips/mm/c-octeon.c
+++ b/arch/mips/mm/c-octeon.c
@@ -6,7 +6,6 @@
  * Copyright (C) 2005-2007 Cavium Networks
  */
 #include <linux/export.h>
-#include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
 #include <linux/smp.h>
diff --git a/arch/mips/mm/c-r3k.c b/arch/mips/mm/c-r3k.c
index 2fcde0c..135ec31 100644
--- a/arch/mips/mm/c-r3k.c
+++ b/arch/mips/mm/c-r3k.c
@@ -9,7 +9,6 @@
  * Copyright (C) 1998 Gleb Raiko & Vladimir Roganov
  * Copyright (C) 2001, 2004, 2007  Maciej W. Rozycki
  */
-#include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
 #include <linux/smp.h>
diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c
index 49e572d..c14259e 100644
--- a/arch/mips/mm/c-r4k.c
+++ b/arch/mips/mm/c-r4k.c
@@ -1020,10 +1020,14 @@
 		 */
 		config1 = read_c0_config1();
 
-		if ((lsize = ((config1 >> 19) & 7)))
-			c->icache.linesz = 2 << lsize;
-		else
-			c->icache.linesz = lsize;
+		lsize = (config1 >> 19) & 7;
+
+		/* IL == 7 is reserved */
+		if (lsize == 7)
+			panic("Invalid icache line size");
+
+		c->icache.linesz = lsize ? 2 << lsize : 0;
+
 		c->icache.sets = 32 << (((config1 >> 22) + 1) & 7);
 		c->icache.ways = 1 + ((config1 >> 16) & 7);
 
@@ -1040,10 +1044,14 @@
 		 */
 		c->dcache.flags = 0;
 
-		if ((lsize = ((config1 >> 10) & 7)))
-			c->dcache.linesz = 2 << lsize;
-		else
-			c->dcache.linesz= lsize;
+		lsize = (config1 >> 10) & 7;
+
+		/* DL == 7 is reserved */
+		if (lsize == 7)
+			panic("Invalid dcache line size");
+
+		c->dcache.linesz = lsize ? 2 << lsize : 0;
+
 		c->dcache.sets = 32 << (((config1 >> 13) + 1) & 7);
 		c->dcache.ways = 1 + ((config1 >> 7) & 7);
 
@@ -1105,6 +1113,8 @@
 	case CPU_34K:
 	case CPU_74K:
 	case CPU_1004K:
+	case CPU_INTERAPTIV:
+	case CPU_PROAPTIV:
 		if (current_cpu_type() == CPU_74K)
 			alias_74k_erratum(c);
 		if ((read_c0_config7() & (1 << 16))) {
diff --git a/arch/mips/mm/cache.c b/arch/mips/mm/cache.c
index 15f813c..fde7e56 100644
--- a/arch/mips/mm/cache.c
+++ b/arch/mips/mm/cache.c
@@ -8,7 +8,6 @@
  */
 #include <linux/fs.h>
 #include <linux/fcntl.h>
-#include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/linkage.h>
 #include <linux/module.h>
diff --git a/arch/mips/mm/cex-sb1.S b/arch/mips/mm/cex-sb1.S
index 191cf6e..5d5f296 100644
--- a/arch/mips/mm/cex-sb1.S
+++ b/arch/mips/mm/cex-sb1.S
@@ -15,7 +15,6 @@
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  */
-#include <linux/init.h>
 
 #include <asm/asm.h>
 #include <asm/regdef.h>
diff --git a/arch/mips/mm/dma-default.c b/arch/mips/mm/dma-default.c
index 2e94185..44b6dff 100644
--- a/arch/mips/mm/dma-default.c
+++ b/arch/mips/mm/dma-default.c
@@ -23,6 +23,7 @@
 
 #include <dma-coherence.h>
 
+#ifdef CONFIG_DMA_MAYBE_COHERENT
 int coherentio = 0;	/* User defined DMA coherency from command line. */
 EXPORT_SYMBOL_GPL(coherentio);
 int hw_coherentio = 0;	/* Actual hardware supported DMA coherency setting. */
@@ -42,6 +43,7 @@
 	return 0;
 }
 early_param("nocoherentio", setnocoherentio);
+#endif
 
 static inline struct page *dma_addr_to_page(struct device *dev,
 	dma_addr_t dma_addr)
diff --git a/arch/mips/mm/hugetlbpage.c b/arch/mips/mm/hugetlbpage.c
index 01fda44..77e0ae0 100644
--- a/arch/mips/mm/hugetlbpage.c
+++ b/arch/mips/mm/hugetlbpage.c
@@ -11,7 +11,6 @@
  * Copyright (C) 2008, 2009 Cavium Networks, Inc.
  */
 
-#include <linux/init.h>
 #include <linux/fs.h>
 #include <linux/mm.h>
 #include <linux/hugetlb.h>
diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
index 1215617..6b59617 100644
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -171,8 +171,6 @@
 	return (void*) vaddr;
 }
 
-#define UNIQUE_ENTRYHI(idx) (CKSEG0 + ((idx) << (PAGE_SHIFT + 1)))
-
 void kunmap_coherent(void)
 {
 #ifndef CONFIG_MIPS_MT_SMTC
diff --git a/arch/mips/mm/page.c b/arch/mips/mm/page.c
index cbd81d1..58033c4 100644
--- a/arch/mips/mm/page.c
+++ b/arch/mips/mm/page.c
@@ -8,7 +8,6 @@
  * Copyright (C) 2008  Thiemo Seufer
  * Copyright (C) 2012  MIPS Technologies, Inc.
  */
-#include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
 #include <linux/smp.h>
diff --git a/arch/mips/mm/sc-mips.c b/arch/mips/mm/sc-mips.c
index 08d05ae..7a56aee 100644
--- a/arch/mips/mm/sc-mips.c
+++ b/arch/mips/mm/sc-mips.c
@@ -76,6 +76,8 @@
 	case CPU_34K:
 	case CPU_74K:
 	case CPU_1004K:
+	case CPU_INTERAPTIV:
+	case CPU_PROAPTIV:
 	case CPU_BMIPS5000:
 		if (config2 & (1 << 12))
 			return 0;
diff --git a/arch/mips/mm/sc-rm7k.c b/arch/mips/mm/sc-rm7k.c
index aaffbba..9ac1efc 100644
--- a/arch/mips/mm/sc-rm7k.c
+++ b/arch/mips/mm/sc-rm7k.c
@@ -6,7 +6,6 @@
 
 #undef DEBUG
 
-#include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/bitops.h>
diff --git a/arch/mips/mm/tlb-r3k.c b/arch/mips/mm/tlb-r3k.c
index 9aca109..d657493 100644
--- a/arch/mips/mm/tlb-r3k.c
+++ b/arch/mips/mm/tlb-r3k.c
@@ -10,7 +10,6 @@
  * Copyright (C) 2002  Ralf Baechle
  * Copyright (C) 2002  Maciej W. Rozycki
  */
-#include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
 #include <linux/smp.h>
diff --git a/arch/mips/mm/tlb-r4k.c b/arch/mips/mm/tlb-r4k.c
index da3b0b9..ae4ca24 100644
--- a/arch/mips/mm/tlb-r4k.c
+++ b/arch/mips/mm/tlb-r4k.c
@@ -20,16 +20,11 @@
 #include <asm/bootinfo.h>
 #include <asm/mmu_context.h>
 #include <asm/pgtable.h>
+#include <asm/tlb.h>
 #include <asm/tlbmisc.h>
 
 extern void build_tlb_refill_handler(void);
 
-/*
- * Make sure all entries differ.  If they're not different
- * MIPS32 will take revenge ...
- */
-#define UNIQUE_ENTRYHI(idx) (CKSEG0 + ((idx) << (PAGE_SHIFT + 1)))
-
 /* Atomicity and interruptability */
 #ifdef CONFIG_MIPS_MT_SMTC
 
@@ -77,7 +72,7 @@
 {
 	unsigned long flags;
 	unsigned long old_ctx;
-	int entry;
+	int entry, ftlbhighset;
 
 	ENTER_CRITICAL(flags);
 	/* Save old context and create impossible VPN2 value */
@@ -88,13 +83,30 @@
 	entry = read_c0_wired();
 
 	/* Blast 'em all away. */
-	while (entry < current_cpu_data.tlbsize) {
-		/* Make sure all entries differ. */
-		write_c0_entryhi(UNIQUE_ENTRYHI(entry));
-		write_c0_index(entry);
-		mtc0_tlbw_hazard();
-		tlb_write_indexed();
-		entry++;
+	if (cpu_has_tlbinv) {
+		if (current_cpu_data.tlbsizevtlb) {
+			write_c0_index(0);
+			mtc0_tlbw_hazard();
+			tlbinvf();  /* invalidate VTLB */
+		}
+		ftlbhighset = current_cpu_data.tlbsizevtlb +
+			current_cpu_data.tlbsizeftlbsets;
+		for (entry = current_cpu_data.tlbsizevtlb;
+		     entry < ftlbhighset;
+		     entry++) {
+			write_c0_index(entry);
+			mtc0_tlbw_hazard();
+			tlbinvf();  /* invalidate one FTLB set */
+		}
+	} else {
+		while (entry < current_cpu_data.tlbsize) {
+			/* Make sure all entries differ. */
+			write_c0_entryhi(UNIQUE_ENTRYHI(entry));
+			write_c0_index(entry);
+			mtc0_tlbw_hazard();
+			tlb_write_indexed();
+			entry++;
+		}
 	}
 	tlbw_use_hazard();
 	write_c0_entryhi(old_ctx);
@@ -133,7 +145,9 @@
 		start = round_down(start, PAGE_SIZE << 1);
 		end = round_up(end, PAGE_SIZE << 1);
 		size = (end - start) >> (PAGE_SHIFT + 1);
-		if (size <= current_cpu_data.tlbsize/2) {
+		if (size <= (current_cpu_data.tlbsizeftlbsets ?
+			     current_cpu_data.tlbsize / 8 :
+			     current_cpu_data.tlbsize / 2)) {
 			int oldpid = read_c0_entryhi();
 			int newpid = cpu_asid(cpu, mm);
 
@@ -172,7 +186,9 @@
 	ENTER_CRITICAL(flags);
 	size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
 	size = (size + 1) >> 1;
-	if (size <= current_cpu_data.tlbsize / 2) {
+	if (size <= (current_cpu_data.tlbsizeftlbsets ?
+		     current_cpu_data.tlbsize / 8 :
+		     current_cpu_data.tlbsize / 2)) {
 		int pid = read_c0_entryhi();
 
 		start &= (PAGE_MASK << 1);
diff --git a/arch/mips/mm/tlb-r8k.c b/arch/mips/mm/tlb-r8k.c
index 6a99733..138a2ec 100644
--- a/arch/mips/mm/tlb-r8k.c
+++ b/arch/mips/mm/tlb-r8k.c
@@ -8,7 +8,6 @@
  * Carsten Langgaard, carstenl@mips.com
  * Copyright (C) 2002 MIPS Technologies, Inc.  All rights reserved.
  */
-#include <linux/init.h>
 #include <linux/sched.h>
 #include <linux/smp.h>
 #include <linux/mm.h>
diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c
index 183f2b5..b234b1b 100644
--- a/arch/mips/mm/tlbex.c
+++ b/arch/mips/mm/tlbex.c
@@ -26,7 +26,6 @@
 #include <linux/types.h>
 #include <linux/smp.h>
 #include <linux/string.h>
-#include <linux/init.h>
 #include <linux/cache.h>
 
 #include <asm/cacheflush.h>
@@ -510,6 +509,7 @@
 		switch (current_cpu_type()) {
 		case CPU_M14KC:
 		case CPU_74K:
+		case CPU_PROAPTIV:
 			break;
 
 		default:
diff --git a/arch/mips/mm/uasm-micromips.c b/arch/mips/mm/uasm-micromips.c
index 060000f..b8d580c 100644
--- a/arch/mips/mm/uasm-micromips.c
+++ b/arch/mips/mm/uasm-micromips.c
@@ -15,7 +15,6 @@
 
 #include <linux/kernel.h>
 #include <linux/types.h>
-#include <linux/init.h>
 
 #include <asm/inst.h>
 #include <asm/elf.h>
diff --git a/arch/mips/mm/uasm-mips.c b/arch/mips/mm/uasm-mips.c
index 0c72458..3abd609 100644
--- a/arch/mips/mm/uasm-mips.c
+++ b/arch/mips/mm/uasm-mips.c
@@ -15,7 +15,6 @@
 
 #include <linux/kernel.h>
 #include <linux/types.h>
-#include <linux/init.h>
 
 #include <asm/inst.h>
 #include <asm/elf.h>
diff --git a/arch/mips/mti-malta/Makefile b/arch/mips/mti-malta/Makefile
index 72fdedb..eae0ba3 100644
--- a/arch/mips/mti-malta/Makefile
+++ b/arch/mips/mti-malta/Makefile
@@ -9,7 +9,5 @@
 				   malta-int.o malta-memory.o malta-platform.o \
 				   malta-reset.o malta-setup.o malta-time.o
 
-obj-$(CONFIG_EARLY_PRINTK)	+= malta-console.o
-
 # FIXME FIXME FIXME
 obj-$(CONFIG_MIPS_MT_SMTC)	+= malta-smtc.o
diff --git a/arch/mips/mti-malta/malta-amon.c b/arch/mips/mti-malta/malta-amon.c
index 1e47844..592ac04 100644
--- a/arch/mips/mti-malta/malta-amon.c
+++ b/arch/mips/mti-malta/malta-amon.c
@@ -1,30 +1,20 @@
 /*
- * Copyright (C) 2007  MIPS Technologies, Inc.
- *	All rights reserved.
-
- *  This program is free software; you can distribute it and/or modify it
- *  under the terms of the GNU General Public License (Version 2) as
- *  published by the Free Software Foundation.
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
  *
- *  This program is distributed in the hope it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
+ * Copyright (C) 2007 MIPS Technologies, Inc.  All rights reserved.
+ * Copyright (C) 2013 Imagination Technologies Ltd.
  *
- *  You should have received a copy of the GNU General Public License along
- *  with this program; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Arbitrary Monitor interface
+ * Arbitrary Monitor Interface
  */
-
 #include <linux/kernel.h>
-#include <linux/init.h>
 #include <linux/smp.h>
 
 #include <asm/addrspace.h>
-#include <asm/mips-boards/launch.h>
 #include <asm/mipsmtregs.h>
+#include <asm/mips-boards/launch.h>
+#include <asm/vpe.h>
 
 int amon_cpu_avail(int cpu)
 {
@@ -48,7 +38,7 @@
 	return 1;
 }
 
-void amon_cpu_start(int cpu,
+int amon_cpu_start(int cpu,
 		    unsigned long pc, unsigned long sp,
 		    unsigned long gp, unsigned long a0)
 {
@@ -56,10 +46,10 @@
 		(struct cpulaunch  *)CKSEG0ADDR(CPULAUNCH);
 
 	if (!amon_cpu_avail(cpu))
-		return;
+		return -1;
 	if (cpu == smp_processor_id()) {
 		pr_debug("launch: I am cpu%d!\n", cpu);
-		return;
+		return -1;
 	}
 	launch += cpu;
 
@@ -78,4 +68,21 @@
 		;
 	smp_rmb();	/* Target will be updating flags soon */
 	pr_debug("launch: cpu%d gone!\n", cpu);
+
+	return 0;
 }
+
+#ifdef CONFIG_MIPS_VPE_LOADER
+int vpe_run(struct vpe *v)
+{
+	struct vpe_notifications *n;
+
+	if (amon_cpu_start(aprp_cpu_index(), v->__start, 0, 0, 0) < 0)
+		return -1;
+
+	list_for_each_entry(n, &v->notify, list)
+		n->start(VPE_MODULE_MINOR);
+
+	return 0;
+}
+#endif
diff --git a/arch/mips/mti-malta/malta-console.c b/arch/mips/mti-malta/malta-console.c
deleted file mode 100644
index 43bcfb4..0000000
--- a/arch/mips/mti-malta/malta-console.c
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Carsten Langgaard, carstenl@mips.com
- * Copyright (C) 1999,2000 MIPS Technologies, Inc.  All rights reserved.
- *
- *  This program is free software; you can distribute it and/or modify it
- *  under the terms of the GNU General Public License (Version 2) as
- *  published by the Free Software Foundation.
- *
- *  This program is distributed in the hope it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with this program; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
- * Putting things on the screen/serial line using YAMONs facilities.
- */
-#include <linux/console.h>
-#include <linux/init.h>
-#include <linux/serial_reg.h>
-#include <asm/io.h>
-
-
-#define PORT(offset) (0x3f8 + (offset))
-
-
-static inline unsigned int serial_in(int offset)
-{
-	return inb(PORT(offset));
-}
-
-static inline void serial_out(int offset, int value)
-{
-	outb(value, PORT(offset));
-}
-
-int prom_putchar(char c)
-{
-	while ((serial_in(UART_LSR) & UART_LSR_THRE) == 0)
-		;
-
-	serial_out(UART_TX, c);
-
-	return 1;
-}
diff --git a/arch/mips/mti-malta/malta-init.c b/arch/mips/mti-malta/malta-init.c
index ff8caff..fcebfce 100644
--- a/arch/mips/mti-malta/malta-init.c
+++ b/arch/mips/mti-malta/malta-init.c
@@ -14,6 +14,7 @@
 #include <linux/init.h>
 #include <linux/string.h>
 #include <linux/kernel.h>
+#include <linux/serial_8250.h>
 
 #include <asm/cacheflush.h>
 #include <asm/smp-ops.h>
@@ -44,32 +45,39 @@
 	char parity = '\0', bits = '\0', flow = '\0';
 	char *s;
 
-	if ((strstr(fw_getcmdline(), "console=")) == NULL) {
-		s = fw_getenv("modetty0");
-		if (s) {
-			while (*s >= '0' && *s <= '9')
-				baud = baud*10 + *s++ - '0';
-			if (*s == ',')
-				s++;
-			if (*s)
-				parity = *s++;
-			if (*s == ',')
-				s++;
-			if (*s)
-				bits = *s++;
-			if (*s == ',')
-				s++;
-			if (*s == 'h')
-				flow = 'r';
-		}
-		if (baud == 0)
-			baud = 38400;
-		if (parity != 'n' && parity != 'o' && parity != 'e')
-			parity = 'n';
-		if (bits != '7' && bits != '8')
-			bits = '8';
-		if (flow == '\0')
+	s = fw_getenv("modetty0");
+	if (s) {
+		while (*s >= '0' && *s <= '9')
+			baud = baud*10 + *s++ - '0';
+		if (*s == ',')
+			s++;
+		if (*s)
+			parity = *s++;
+		if (*s == ',')
+			s++;
+		if (*s)
+			bits = *s++;
+		if (*s == ',')
+			s++;
+		if (*s == 'h')
 			flow = 'r';
+	}
+	if (baud == 0)
+		baud = 38400;
+	if (parity != 'n' && parity != 'o' && parity != 'e')
+		parity = 'n';
+	if (bits != '7' && bits != '8')
+		bits = '8';
+	if (flow == '\0')
+		flow = 'r';
+
+	if ((strstr(fw_getcmdline(), "earlycon=")) == NULL) {
+		sprintf(console_string, "uart8250,io,0x3f8,%d%c%c", baud,
+			parity, bits);
+		setup_early_serial8250_console(console_string);
+	}
+
+	if ((strstr(fw_getcmdline(), "console=")) == NULL) {
 		sprintf(console_string, " console=ttyS0,%d%c%c%c", baud,
 			parity, bits, flow);
 		strcat(fw_getcmdline(), console_string);
diff --git a/arch/mips/mti-malta/malta-int.c b/arch/mips/mti-malta/malta-int.c
index 0892575..ca3e3a4 100644
--- a/arch/mips/mti-malta/malta-int.c
+++ b/arch/mips/mti-malta/malta-int.c
@@ -1,25 +1,16 @@
 /*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
  * Carsten Langgaard, carstenl@mips.com
  * Copyright (C) 2000, 2001, 2004 MIPS Technologies, Inc.
  * Copyright (C) 2001 Ralf Baechle
- *
- *  This program is free software; you can distribute it and/or modify it
- *  under the terms of the GNU General Public License (Version 2) as
- *  published by the Free Software Foundation.
- *
- *  This program is distributed in the hope it will be useful, but WITHOUT
- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- *  for more details.
- *
- *  You should have received a copy of the GNU General Public License along
- *  with this program; if not, write to the Free Software Foundation, Inc.,
- *  59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ * Copyright (C) 2013 Imagination Technologies Ltd.
  *
  * Routines for generic manipulation of the interrupts found on the MIPS
- * Malta board.
- * The interrupt controller is located in the South Bridge a PIIX4 device
- * with two internal 82C95 interrupt controllers.
+ * Malta board. The interrupt controller is located in the South Bridge
+ * a PIIX4 device with two internal 82C95 interrupt controllers.
  */
 #include <linux/init.h>
 #include <linux/irq.h>
@@ -44,6 +35,7 @@
 #include <asm/gic.h>
 #include <asm/gcmpregs.h>
 #include <asm/setup.h>
+#include <asm/rtlx.h>
 
 int gcmp_present = -1;
 static unsigned long _msc01_biu_base;
@@ -90,7 +82,7 @@
 		BONITO_PCIMAP_CFG = 0;
 		break;
 	default:
-		printk(KERN_WARNING "Unknown system controller.\n");
+		pr_emerg("Unknown system controller.\n");
 		return -1;
 	}
 	return irq;
@@ -126,6 +118,11 @@
 	}
 
 	do_IRQ(MALTA_INT_BASE + irq);
+
+#ifdef MIPS_VPE_APSP_API
+	if (aprp_hook)
+		aprp_hook();
+#endif
 }
 
 static void malta_ipi_irqdispatch(void)
@@ -149,11 +146,11 @@
 	unsigned int intrcause, datalo, datahi;
 	struct pt_regs *regs = get_irq_regs();
 
-	printk(KERN_EMERG "CoreHI interrupt, shouldn't happen, we die here!\n");
-	printk(KERN_EMERG "epc	 : %08lx\nStatus: %08lx\n"
-			"Cause : %08lx\nbadVaddr : %08lx\n",
-			regs->cp0_epc, regs->cp0_status,
-			regs->cp0_cause, regs->cp0_badvaddr);
+	pr_emerg("CoreHI interrupt, shouldn't happen, we die here!\n");
+	pr_emerg("epc	 : %08lx\nStatus: %08lx\n"
+		 "Cause : %08lx\nbadVaddr : %08lx\n",
+		 regs->cp0_epc, regs->cp0_status,
+		 regs->cp0_cause, regs->cp0_badvaddr);
 
 	/* Read all the registers and then print them as there is a
 	   problem with interspersed printk's upsetting the Bonito controller.
@@ -171,8 +168,8 @@
 		intrcause = GT_READ(GT_INTRCAUSE_OFS);
 		datalo = GT_READ(GT_CPUERR_ADDRLO_OFS);
 		datahi = GT_READ(GT_CPUERR_ADDRHI_OFS);
-		printk(KERN_EMERG "GT_INTRCAUSE = %08x\n", intrcause);
-		printk(KERN_EMERG "GT_CPUERR_ADDR = %02x%08x\n",
+		pr_emerg("GT_INTRCAUSE = %08x\n", intrcause);
+		pr_emerg("GT_CPUERR_ADDR = %02x%08x\n",
 				datahi, datalo);
 		break;
 	case MIPS_REVISION_SCON_BONITO:
@@ -184,14 +181,14 @@
 		intedge = BONITO_INTEDGE;
 		intsteer = BONITO_INTSTEER;
 		pcicmd = BONITO_PCICMD;
-		printk(KERN_EMERG "BONITO_INTISR = %08x\n", intisr);
-		printk(KERN_EMERG "BONITO_INTEN = %08x\n", inten);
-		printk(KERN_EMERG "BONITO_INTPOL = %08x\n", intpol);
-		printk(KERN_EMERG "BONITO_INTEDGE = %08x\n", intedge);
-		printk(KERN_EMERG "BONITO_INTSTEER = %08x\n", intsteer);
-		printk(KERN_EMERG "BONITO_PCICMD = %08x\n", pcicmd);
-		printk(KERN_EMERG "BONITO_PCIBADADDR = %08x\n", pcibadaddr);
-		printk(KERN_EMERG "BONITO_PCIMSTAT = %08x\n", pcimstat);
+		pr_emerg("BONITO_INTISR = %08x\n", intisr);
+		pr_emerg("BONITO_INTEN = %08x\n", inten);
+		pr_emerg("BONITO_INTPOL = %08x\n", intpol);
+		pr_emerg("BONITO_INTEDGE = %08x\n", intedge);
+		pr_emerg("BONITO_INTSTEER = %08x\n", intsteer);
+		pr_emerg("BONITO_PCICMD = %08x\n", pcicmd);
+		pr_emerg("BONITO_PCIBADADDR = %08x\n", pcibadaddr);
+		pr_emerg("BONITO_PCIMSTAT = %08x\n", pcimstat);
 		break;
 	}
 
@@ -313,6 +310,11 @@
 
 static irqreturn_t ipi_resched_interrupt(int irq, void *dev_id)
 {
+#ifdef MIPS_VPE_APSP_API
+	if (aprp_hook)
+		aprp_hook();
+#endif
+
 	scheduler_ipi();
 
 	return IRQ_HANDLED;
@@ -365,13 +367,13 @@
 	.flags = IRQF_NO_THREAD,
 };
 
-static msc_irqmap_t __initdata msc_irqmap[] = {
+static msc_irqmap_t msc_irqmap[] __initdata = {
 	{MSC01C_INT_TMR,		MSC01_IRQ_EDGE, 0},
 	{MSC01C_INT_PCI,		MSC01_IRQ_LEVEL, 0},
 };
-static int __initdata msc_nr_irqs = ARRAY_SIZE(msc_irqmap);
+static int msc_nr_irqs __initdata = ARRAY_SIZE(msc_irqmap);
 
-static msc_irqmap_t __initdata msc_eicirqmap[] = {
+static msc_irqmap_t msc_eicirqmap[] __initdata = {
 	{MSC01E_INT_SW0,		MSC01_IRQ_LEVEL, 0},
 	{MSC01E_INT_SW1,		MSC01_IRQ_LEVEL, 0},
 	{MSC01E_INT_I8259A,		MSC01_IRQ_LEVEL, 0},
@@ -384,7 +386,7 @@
 	{MSC01E_INT_CPUCTR,		MSC01_IRQ_LEVEL, 0}
 };
 
-static int __initdata msc_nr_eicirqs = ARRAY_SIZE(msc_eicirqmap);
+static int msc_nr_eicirqs __initdata = ARRAY_SIZE(msc_eicirqmap);
 
 /*
  * This GIC specific tabular array defines the association between External
@@ -431,9 +433,12 @@
 	if (gcmp_present >= 0)
 		return gcmp_present;
 
-	_gcmp_base = (unsigned long) ioremap_nocache(GCMP_BASE_ADDR, GCMP_ADDRSPACE_SZ);
-	_msc01_biu_base = (unsigned long) ioremap_nocache(MSC01_BIU_REG_BASE, MSC01_BIU_ADDRSPACE_SZ);
-	gcmp_present = (GCMPGCB(GCMPB) & GCMP_GCB_GCMPB_GCMPBASE_MSK) == GCMP_BASE_ADDR;
+	_gcmp_base = (unsigned long) ioremap_nocache(GCMP_BASE_ADDR,
+		GCMP_ADDRSPACE_SZ);
+	_msc01_biu_base = (unsigned long) ioremap_nocache(MSC01_BIU_REG_BASE,
+		MSC01_BIU_ADDRSPACE_SZ);
+	gcmp_present = ((GCMPGCB(GCMPB) & GCMP_GCB_GCMPB_GCMPBASE_MSK) ==
+		GCMP_BASE_ADDR);
 
 	if (gcmp_present)
 		pr_debug("GCMP present\n");
@@ -443,9 +448,8 @@
 /* Return the number of IOCU's present */
 int __init gcmp_niocu(void)
 {
-  return gcmp_present ?
-    (GCMPGCB(GC) & GCMP_GCB_GC_NUMIOCU_MSK) >> GCMP_GCB_GC_NUMIOCU_SHF :
-    0;
+	return gcmp_present ? ((GCMPGCB(GC) & GCMP_GCB_GC_NUMIOCU_MSK) >>
+		GCMP_GCB_GC_NUMIOCU_SHF) : 0;
 }
 
 /* Set GCMP region attributes */
@@ -594,11 +598,14 @@
 			set_vi_handler(MIPSCPU_INT_IPI1, malta_ipi_irqdispatch);
 		}
 		/* Argh.. this really needs sorting out.. */
-		printk("CPU%d: status register was %08x\n", smp_processor_id(), read_c0_status());
+		pr_info("CPU%d: status register was %08x\n",
+			smp_processor_id(), read_c0_status());
 		write_c0_status(read_c0_status() | STATUSF_IP3 | STATUSF_IP4);
-		printk("CPU%d: status register now %08x\n", smp_processor_id(), read_c0_status());
+		pr_info("CPU%d: status register now %08x\n",
+			smp_processor_id(), read_c0_status());
 		write_c0_status(0x1100dc00);
-		printk("CPU%d: status register frc %08x\n", smp_processor_id(), read_c0_status());
+		pr_info("CPU%d: status register frc %08x\n",
+			smp_processor_id(), read_c0_status());
 		for (i = 0; i < nr_cpu_ids; i++) {
 			arch_init_ipiirq(MIPS_GIC_IRQ_BASE +
 					 GIC_RESCHED_INT(i), &irq_resched);
@@ -616,11 +623,15 @@
 			cpu_ipi_call_irq = MSC01E_INT_SW1;
 		} else {
 			if (cpu_has_vint) {
-				set_vi_handler (MIPS_CPU_IPI_RESCHED_IRQ, ipi_resched_dispatch);
-				set_vi_handler (MIPS_CPU_IPI_CALL_IRQ, ipi_call_dispatch);
+				set_vi_handler (MIPS_CPU_IPI_RESCHED_IRQ,
+					ipi_resched_dispatch);
+				set_vi_handler (MIPS_CPU_IPI_CALL_IRQ,
+					ipi_call_dispatch);
 			}
-			cpu_ipi_resched_irq = MIPS_CPU_IRQ_BASE + MIPS_CPU_IPI_RESCHED_IRQ;
-			cpu_ipi_call_irq = MIPS_CPU_IRQ_BASE + MIPS_CPU_IPI_CALL_IRQ;
+			cpu_ipi_resched_irq = MIPS_CPU_IRQ_BASE +
+				MIPS_CPU_IPI_RESCHED_IRQ;
+			cpu_ipi_call_irq = MIPS_CPU_IRQ_BASE +
+				MIPS_CPU_IPI_CALL_IRQ;
 		}
 		arch_init_ipiirq(cpu_ipi_resched_irq, &irq_resched);
 		arch_init_ipiirq(cpu_ipi_call_irq, &irq_call);
@@ -630,9 +641,7 @@
 
 void malta_be_init(void)
 {
-	if (gcmp_present) {
-		/* Could change CM error mask register */
-	}
+	/* Could change CM error mask register. */
 }
 
 
@@ -712,14 +721,14 @@
 			if (cause < 16) {
 				unsigned long cca_bits = (cm_error >> 15) & 7;
 				unsigned long tr_bits = (cm_error >> 12) & 7;
-				unsigned long mcmd_bits = (cm_error >> 7) & 0x1f;
+				unsigned long cmd_bits = (cm_error >> 7) & 0x1f;
 				unsigned long stag_bits = (cm_error >> 3) & 15;
 				unsigned long sport_bits = (cm_error >> 0) & 7;
 
 				snprintf(buf, sizeof(buf),
 					 "CCA=%lu TR=%s MCmd=%s STag=%lu "
 					 "SPort=%lu\n",
-					 cca_bits, tr[tr_bits], mcmd[mcmd_bits],
+					 cca_bits, tr[tr_bits], mcmd[cmd_bits],
 					 stag_bits, sport_bits);
 			} else {
 				/* glob state & sresp together */
@@ -728,7 +737,7 @@
 				unsigned long c1_bits = (cm_error >> 12) & 7;
 				unsigned long c0_bits = (cm_error >> 9) & 7;
 				unsigned long sc_bit = (cm_error >> 8) & 1;
-				unsigned long mcmd_bits = (cm_error >> 3) & 0x1f;
+				unsigned long cmd_bits = (cm_error >> 3) & 0x1f;
 				unsigned long sport_bits = (cm_error >> 0) & 7;
 				snprintf(buf, sizeof(buf),
 					 "C3=%s C2=%s C1=%s C0=%s SC=%s "
@@ -736,16 +745,16 @@
 					 core[c3_bits], core[c2_bits],
 					 core[c1_bits], core[c0_bits],
 					 sc_bit ? "True" : "False",
-					 mcmd[mcmd_bits], sport_bits);
+					 mcmd[cmd_bits], sport_bits);
 			}
 
 			ocause = (cm_other & GCMP_GCB_GMEO_ERROR_2ND_MSK) >>
 				 GCMP_GCB_GMEO_ERROR_2ND_SHF;
 
-			printk("CM_ERROR=%08lx %s <%s>\n", cm_error,
+			pr_err("CM_ERROR=%08lx %s <%s>\n", cm_error,
 			       causes[cause], buf);
-			printk("CM_ADDR =%08lx\n", cm_addr);
-			printk("CM_OTHER=%08lx %s\n", cm_other, causes[ocause]);
+			pr_err("CM_ADDR =%08lx\n", cm_addr);
+			pr_err("CM_OTHER=%08lx %s\n", cm_other, causes[ocause]);
 
 			/* reprime cause register */
 			GCMPGCB(GCMEC) = 0;
diff --git a/arch/mips/mti-malta/malta-platform.c b/arch/mips/mti-malta/malta-platform.c
index 132f866..e1dd1c1 100644
--- a/arch/mips/mti-malta/malta-platform.c
+++ b/arch/mips/mti-malta/malta-platform.c
@@ -47,6 +47,7 @@
 static struct plat_serial8250_port uart8250_data[] = {
 	SMC_PORT(0x3F8, 4),
 	SMC_PORT(0x2F8, 3),
+#ifndef CONFIG_MIPS_CMP
 	{
 		.mapbase	= 0x1f000900,	/* The CBUS UART */
 		.irq		= MIPS_CPU_IRQ_BASE + MIPSCPU_INT_MB2,
@@ -55,6 +56,7 @@
 		.flags		= CBUS_UART_FLAGS,
 		.regshift	= 3,
 	},
+#endif
 	{ },
 };
 
diff --git a/arch/mips/mti-malta/malta-time.c b/arch/mips/mti-malta/malta-time.c
index a18af5f..3190099 100644
--- a/arch/mips/mti-malta/malta-time.c
+++ b/arch/mips/mti-malta/malta-time.c
@@ -42,8 +42,6 @@
 #include <asm/mips-boards/generic.h>
 #include <asm/mips-boards/maltaint.h>
 
-unsigned long cpu_khz;
-
 static int mips_cpu_timer_irq;
 static int mips_cpu_perf_irq;
 extern int cp0_perfcount_irq;
@@ -168,11 +166,24 @@
 	return mips_cpu_timer_irq;
 }
 
+static void __init init_rtc(void)
+{
+	/* stop the clock whilst setting it up */
+	CMOS_WRITE(RTC_SET | RTC_24H, RTC_CONTROL);
+
+	/* 32KHz time base */
+	CMOS_WRITE(RTC_REF_CLCK_32KHZ, RTC_FREQ_SELECT);
+
+	/* start the clock */
+	CMOS_WRITE(RTC_24H, RTC_CONTROL);
+}
+
 void __init plat_time_init(void)
 {
 	unsigned int prid = read_c0_prid() & (PRID_COMP_MASK | PRID_IMP_MASK);
 	unsigned int freq;
 
+	init_rtc();
 	estimate_frequencies();
 
 	freq = mips_hpt_frequency;
@@ -182,7 +193,6 @@
 	freq = freqround(freq, 5000);
 	printk("CPU frequency %d.%02d MHz\n", freq/1000000,
 	       (freq%1000000)*100/1000000);
-	cpu_khz = freq / 1000;
 
 	mips_scroll_message();
 
diff --git a/arch/mips/mti-sead3/Makefile b/arch/mips/mti-sead3/Makefile
index be11420..071786f 100644
--- a/arch/mips/mti-sead3/Makefile
+++ b/arch/mips/mti-sead3/Makefile
@@ -21,5 +21,7 @@
 obj-$(CONFIG_USB_EHCI_HCD)	+= sead3-ehci.o
 obj-$(CONFIG_OF)		+= sead3.dtb.o
 
+CFLAGS_sead3-setup.o = -I$(src)/../../../scripts/dtc/libfdt
+
 $(obj)/%.dtb: $(obj)/%.dts
 	$(call if_changed,dtc)
diff --git a/arch/mips/mti-sead3/sead3-pic32-bus.c b/arch/mips/mti-sead3/sead3-pic32-bus.c
index eb2bf93..3b12aa5 100644
--- a/arch/mips/mti-sead3/sead3-pic32-bus.c
+++ b/arch/mips/mti-sead3/sead3-pic32-bus.c
@@ -8,7 +8,6 @@
 #include <linux/delay.h>
 #include <linux/kernel.h>
 #include <linux/spinlock.h>
-#include <linux/init.h>
 #include <linux/io.h>
 #include <linux/errno.h>
 
diff --git a/arch/mips/mti-sead3/sead3-setup.c b/arch/mips/mti-sead3/sead3-setup.c
index 928ba84..bf7fe48 100644
--- a/arch/mips/mti-sead3/sead3-setup.c
+++ b/arch/mips/mti-sead3/sead3-setup.c
@@ -4,13 +4,15 @@
  * for more details.
  *
  * Copyright (C) 2012 MIPS Technologies, Inc.  All rights reserved.
+ * Copyright (C) 2013 Imagination Technologies Ltd.
  */
 #include <linux/init.h>
+#include <linux/libfdt.h>
 #include <linux/of_platform.h>
 #include <linux/of_fdt.h>
-#include <linux/bootmem.h>
 
 #include <asm/prom.h>
+#include <asm/fw/fw.h>
 
 #include <asm/mips-boards/generic.h>
 
@@ -19,8 +21,73 @@
 	return "MIPS SEAD3";
 }
 
+static uint32_t get_memsize_from_cmdline(void)
+{
+	int memsize = 0;
+	char *p = arcs_cmdline;
+	char *s = "memsize=";
+
+	p = strstr(p, s);
+	if (p) {
+		p += strlen(s);
+		memsize = memparse(p, NULL);
+	}
+
+	return memsize;
+}
+
+static uint32_t get_memsize_from_env(void)
+{
+	int memsize = 0;
+	char *p;
+
+	p = fw_getenv("memsize");
+	if (p)
+		memsize = memparse(p, NULL);
+
+	return memsize;
+}
+
+static uint32_t get_memsize(void)
+{
+	uint32_t memsize;
+
+	memsize = get_memsize_from_cmdline();
+	if (memsize)
+		return memsize;
+
+	return get_memsize_from_env();
+}
+
+static void __init parse_memsize_param(void)
+{
+	int offset;
+	const uint64_t *prop_value;
+	int prop_len;
+	uint32_t memsize = get_memsize();
+
+	if (!memsize)
+		return;
+
+	offset = fdt_path_offset(&__dtb_start, "/memory");
+	if (offset > 0) {
+		uint64_t new_value;
+		/*
+		 * reg contains 2 32-bits BE values, offset and size. We just
+		 * want to replace the size value without affecting the offset
+		 */
+		prop_value = fdt_getprop(&__dtb_start, offset, "reg", &prop_len);
+		new_value = be64_to_cpu(*prop_value);
+		new_value =  (new_value & ~0xffffffffllu) | memsize;
+		fdt_setprop_inplace_u64(&__dtb_start, offset, "reg", new_value);
+	}
+}
+
 void __init plat_mem_setup(void)
 {
+	/* allow command line/bootloader env to override memory size in DT */
+	parse_memsize_param();
+
 	/*
 	 * Load the builtin devicetree. This causes the chosen node to be
 	 * parsed resulting in our memory appearing
@@ -30,16 +97,15 @@
 
 void __init device_tree_init(void)
 {
-	unsigned long base, size;
-
 	if (!initial_boot_params)
 		return;
 
-	base = virt_to_phys((void *)initial_boot_params);
-	size = be32_to_cpu(initial_boot_params->totalsize);
-
-	/* Before we do anything, lets reserve the dt blob */
-	reserve_bootmem(base, size, BOOTMEM_DEFAULT);
-
-	unflatten_device_tree();
+	unflatten_and_copy_device_tree();
 }
+
+static int __init customize_machine(void)
+{
+	of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL);
+	return 0;
+}
+arch_initcall(customize_machine);
diff --git a/arch/mips/mti-sead3/sead3-time.c b/arch/mips/mti-sead3/sead3-time.c
index 552d26c..678d03d 100644
--- a/arch/mips/mti-sead3/sead3-time.c
+++ b/arch/mips/mti-sead3/sead3-time.c
@@ -13,8 +13,6 @@
 #include <asm/irq.h>
 #include <asm/mips-boards/generic.h>
 
-unsigned long cpu_khz;
-
 static int mips_cpu_timer_irq;
 static int mips_cpu_perf_irq;
 
@@ -109,8 +107,6 @@
 	pr_debug("CPU frequency %d.%02d MHz\n", (est_freq / 1000000),
 		(est_freq % 1000000) * 100 / 1000000);
 
-	cpu_khz = est_freq / 1000;
-
 	mips_scroll_message();
 
 	plat_perf_setup();
diff --git a/arch/mips/mti-sead3/sead3.dts b/arch/mips/mti-sead3/sead3.dts
index 658f437..e4b317d 100644
--- a/arch/mips/mti-sead3/sead3.dts
+++ b/arch/mips/mti-sead3/sead3.dts
@@ -15,10 +15,6 @@
 		};
 	};
 
-	chosen {
-		bootargs = "console=ttyS1,38400 rootdelay=10 root=/dev/sda3";
-	};
-
 	memory {
 		device_type = "memory";
 		reg = <0x0 0x08000000>;
diff --git a/arch/mips/netlogic/Kconfig b/arch/mips/netlogic/Kconfig
index 852a4ee..4eb683a 100644
--- a/arch/mips/netlogic/Kconfig
+++ b/arch/mips/netlogic/Kconfig
@@ -28,6 +28,15 @@
 	  pointer to the kernel.  The corresponding DTS file is at
 	  arch/mips/netlogic/dts/xlp_fvp.dts
 
+config DT_XLP_GVP
+	bool "Built-in device tree for XLP GVP boards"
+	default y
+	help
+	  Add an FDT blob for XLP GVP board into the kernel.
+	  This DTB will be used if the firmware does not pass in a DTB
+	  pointer to the kernel.  The corresponding DTS file is at
+	  arch/mips/netlogic/dts/xlp_gvp.dts
+
 config NLM_MULTINODE
 	bool "Support for multi-chip boards"
 	depends on NLM_XLP_BOARD
diff --git a/arch/mips/netlogic/common/earlycons.c b/arch/mips/netlogic/common/earlycons.c
index 1902fa2..769f930 100644
--- a/arch/mips/netlogic/common/earlycons.c
+++ b/arch/mips/netlogic/common/earlycons.c
@@ -37,9 +37,11 @@
 
 #include <asm/mipsregs.h>
 #include <asm/netlogic/haldefs.h>
+#include <asm/netlogic/common.h>
 
 #if defined(CONFIG_CPU_XLP)
 #include <asm/netlogic/xlp-hal/iomap.h>
+#include <asm/netlogic/xlp-hal/xlp.h>
 #include <asm/netlogic/xlp-hal/uart.h>
 #elif defined(CONFIG_CPU_XLR)
 #include <asm/netlogic/xlr/iomap.h>
diff --git a/arch/mips/netlogic/common/irq.c b/arch/mips/netlogic/common/irq.c
index 1c7e3a1..5afc4b7 100644
--- a/arch/mips/netlogic/common/irq.c
+++ b/arch/mips/netlogic/common/irq.c
@@ -180,6 +180,7 @@
 #endif
 }
 
+
 void nlm_setup_pic_irq(int node, int picirq, int irq, int irt)
 {
 	struct nlm_pic_irq *pic_data;
@@ -207,32 +208,32 @@
 
 static void nlm_init_node_irqs(int node)
 {
-	int i, irt;
-	uint64_t irqmask;
 	struct nlm_soc_info *nodep;
+	int i, irt;
 
 	pr_info("Init IRQ for node %d\n", node);
 	nodep = nlm_get_node(node);
-	irqmask = PERCPU_IRQ_MASK;
+	nodep->irqmask = PERCPU_IRQ_MASK;
 	for (i = PIC_IRT_FIRST_IRQ; i <= PIC_IRT_LAST_IRQ; i++) {
 		irt = nlm_irq_to_irt(i);
-		if (irt == -1)
+		if (irt == -1)		/* unused irq */
 			continue;
-		nlm_setup_pic_irq(node, i, i, irt);
-		/* set interrupts to first cpu in node */
+		nodep->irqmask |= 1ull << i;
+		if (irt == -2)		/* not a direct PIC irq */
+			continue;
+
 		nlm_pic_init_irt(nodep->picbase, irt, i,
-					node * NLM_CPUS_PER_NODE, 0);
-		irqmask |= (1ull << i);
+				node * nlm_threads_per_node(), 0);
+		nlm_setup_pic_irq(node, i, i, irt);
 	}
-	nodep->irqmask = irqmask;
 }
 
 void nlm_smp_irq_init(int hwcpuid)
 {
 	int node, cpu;
 
-	node = hwcpuid / NLM_CPUS_PER_NODE;
-	cpu  = hwcpuid % NLM_CPUS_PER_NODE;
+	node = nlm_cpuid_to_node(hwcpuid);
+	cpu  = hwcpuid % nlm_threads_per_node();
 
 	if (cpu == 0 && node != 0)
 		nlm_init_node_irqs(node);
@@ -256,13 +257,23 @@
 		return;
 	}
 
+#if defined(CONFIG_PCI_MSI) && defined(CONFIG_CPU_XLP)
+	/* PCI interrupts need a second level dispatch for MSI bits */
+	if (i >= PIC_PCIE_LINK_MSI_IRQ(0) && i <= PIC_PCIE_LINK_MSI_IRQ(3)) {
+		nlm_dispatch_msi(node, i);
+		return;
+	}
+	if (i >= PIC_PCIE_MSIX_IRQ(0) && i <= PIC_PCIE_MSIX_IRQ(3)) {
+		nlm_dispatch_msix(node, i);
+		return;
+	}
+
+#endif
 	/* top level irq handling */
 	do_IRQ(nlm_irq_to_xirq(node, i));
 }
 
 #ifdef CONFIG_OF
-static struct irq_domain *xlp_pic_domain;
-
 static const struct irq_domain_ops xlp_pic_irq_domain_ops = {
 	.xlate = irq_domain_xlate_onetwocell,
 };
@@ -271,8 +282,9 @@
 					struct device_node *parent)
 {
 	const int n_picirqs = PIC_IRT_LAST_IRQ - PIC_IRQ_BASE + 1;
+	struct irq_domain *xlp_pic_domain;
 	struct resource res;
-	int socid, ret;
+	int socid, ret, bus;
 
 	/* we need a hack to get the PIC's SoC chip id */
 	ret = of_address_to_resource(node, 0, &res);
@@ -280,7 +292,34 @@
 		pr_err("PIC %s: reg property not found!\n", node->name);
 		return -EINVAL;
 	}
-	socid = (res.start >> 18) & 0x3;
+
+	if (cpu_is_xlp9xx()) {
+		bus = (res.start >> 20) & 0xf;
+		for (socid = 0; socid < NLM_NR_NODES; socid++) {
+			if (!nlm_node_present(socid))
+				continue;
+			if (nlm_get_node(socid)->socbus == bus)
+				break;
+		}
+		if (socid == NLM_NR_NODES) {
+			pr_err("PIC %s: Node mapping for bus %d not found!\n",
+					node->name, bus);
+			return -EINVAL;
+		}
+	} else {
+		socid = (res.start >> 18) & 0x3;
+		if (!nlm_node_present(socid)) {
+			pr_err("PIC %s: node %d does not exist!\n",
+							node->name, socid);
+			return -EINVAL;
+		}
+	}
+
+	if (!nlm_node_present(socid)) {
+		pr_err("PIC %s: node %d does not exist!\n", node->name, socid);
+		return -EINVAL;
+	}
+
 	xlp_pic_domain = irq_domain_add_legacy(node, n_picirqs,
 		nlm_irq_to_xirq(socid, PIC_IRQ_BASE), PIC_IRQ_BASE,
 		&xlp_pic_irq_domain_ops, NULL);
@@ -288,8 +327,7 @@
 		pr_err("PIC %s: Creating legacy domain failed!\n", node->name);
 		return -EINVAL;
 	}
-	pr_info("Node %d: IRQ domain created for PIC@%pa\n", socid,
-							&res.start);
+	pr_info("Node %d: IRQ domain created for PIC@%pR\n", socid, &res);
 	return 0;
 }
 
diff --git a/arch/mips/netlogic/common/reset.S b/arch/mips/netlogic/common/reset.S
index adb1828..b231fe1 100644
--- a/arch/mips/netlogic/common/reset.S
+++ b/arch/mips/netlogic/common/reset.S
@@ -32,10 +32,10 @@
  * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
-#include <linux/init.h>
 
 #include <asm/asm.h>
 #include <asm/asm-offsets.h>
+#include <asm/cacheops.h>
 #include <asm/regdef.h>
 #include <asm/mipsregs.h>
 #include <asm/stackframe.h>
@@ -50,8 +50,8 @@
 #include <asm/netlogic/xlp-hal/cpucontrol.h>
 
 #define CP0_EBASE	$15
-#define SYS_CPU_COHERENT_BASE(node)	CKSEG1ADDR(XLP_DEFAULT_IO_BASE) + \
-			XLP_IO_SYS_OFFSET(node) + XLP_IO_PCI_HDRSZ + \
+#define SYS_CPU_COHERENT_BASE	CKSEG1ADDR(XLP_DEFAULT_IO_BASE) + \
+			XLP_IO_SYS_OFFSET(0) + XLP_IO_PCI_HDRSZ + \
 			SYS_CPU_NONCOHERENT_MODE * 4
 
 /* Enable XLP features and workarounds in the LSU */
@@ -74,35 +74,55 @@
 .endm
 
 /*
- * Low level flush for L1D cache on XLP, the normal cache ops does
- * not do the complete and correct cache flush.
+ * L1D cache has to be flushed before enabling threads in XLP.
+ * On XLP8xx/XLP3xx, we do a low level flush using processor control
+ * registers. On XLPII CPUs, usual cache instructions work.
  */
 .macro	xlp_flush_l1_dcache
+	mfc0	t0, CP0_EBASE, 0
+	andi	t0, t0, 0xff00
+	slt	t1, t0, 0x1200
+	beqz	t1, 15f
+	nop
+
+	/* XLP8xx low level cache flush */
 	li	t0, LSU_DEBUG_DATA0
 	li	t1, LSU_DEBUG_ADDR
 	li	t2, 0		/* index */
 	li	t3, 0x1000	/* loop count */
-1:
+11:
 	sll	v0, t2, 5
 	mtcr	zero, t0
 	ori	v1, v0, 0x3	/* way0 | write_enable | write_active */
 	mtcr	v1, t1
-2:
+12:
 	mfcr	v1, t1
 	andi	v1, 0x1		/* wait for write_active == 0 */
-	bnez	v1, 2b
+	bnez	v1, 12b
 	nop
 	mtcr	zero, t0
 	ori	v1, v0, 0x7	/* way1 | write_enable | write_active */
 	mtcr	v1, t1
-3:
+13:
 	mfcr	v1, t1
 	andi	v1, 0x1		/* wait for write_active == 0 */
-	bnez	v1, 3b
+	bnez	v1, 13b
 	nop
 	addi	t2, 1
-	bne	t3, t2, 1b
+	bne	t3, t2, 11b
 	nop
+	b	17f
+	nop
+
+	/* XLPII CPUs, Invalidate all 64k of L1 D-cache */
+15:
+	li	t0, 0x80000000
+	li	t1, 0x80010000
+16:	cache	Index_Writeback_Inv_D, 0(t0)
+	addiu	t0, t0, 32
+	bne	t0, t1, 16b
+	nop
+17:
 .endm
 
 /*
@@ -138,6 +158,13 @@
 	nop
 
 1:	/* Entry point on core wakeup */
+	mfc0	t0, CP0_EBASE, 0	/* processor ID */
+	andi	t0, 0xff00
+	li	t1, 0x1500		/* XLP 9xx */
+	beq	t0, t1, 2f		/* does not need to set coherent */
+	nop
+
+	/* set bit in SYS coherent register for the core */
 	mfc0	t0, CP0_EBASE, 1
 	mfc0	t1, CP0_EBASE, 1
 	srl	t1, 5
@@ -149,7 +176,7 @@
 	li	t1, 0x1
 	sll	t0, t1, t0
 	nor	t0, t0, zero		/* t0 <- ~(1 << core) */
-	li	t2, SYS_CPU_COHERENT_BASE(0)
+	li	t2, SYS_CPU_COHERENT_BASE
 	add	t2, t2, t3		/* t2 <- SYS offset for node */
 	lw	t1, 0(t2)
 	and	t1, t1, t0
@@ -159,13 +186,13 @@
 	lw	t1, 0(t2)
 	sync
 
+2:
 	/* Configure LSU on Non-0 Cores. */
 	xlp_config_lsu
 	/* FALL THROUGH */
 
 /*
- * Wake up sibling threads from the initial thread in
- * a core.
+ * Wake up sibling threads from the initial thread in a core.
  */
 EXPORT(nlm_boot_siblings)
 	/* core L1D flush before enable threads */
@@ -181,8 +208,10 @@
 	/*
 	 * The new hardware thread starts at the next instruction
 	 * For all the cases other than core 0 thread 0, we will
-	* jump to the secondary wait function.
-	*/
+	 * jump to the secondary wait function.
+
+	 * NOTE: All GPR contents are lost after the mtcr above!
+	 */
 	mfc0	v0, CP0_EBASE, 1
 	andi	v0, 0x3ff		/* v0 <- node/core */
 
@@ -196,7 +225,7 @@
 #endif
 	mtc0	t1, CP0_STATUS
 
-	/* mark CPU ready, careful here, previous mtcr trashed registers */
+	/* mark CPU ready */
 	li	t3, CKSEG1ADDR(RESET_DATA_PHYS)
 	ADDIU	t1, t3, BOOT_CPU_READY
 	sll	v1, v0, 2
diff --git a/arch/mips/netlogic/common/smp.c b/arch/mips/netlogic/common/smp.c
index c0eded0..6baae15 100644
--- a/arch/mips/netlogic/common/smp.c
+++ b/arch/mips/netlogic/common/smp.c
@@ -63,7 +63,7 @@
 	uint64_t picbase;
 
 	cpu = cpu_logical_map(logical_cpu);
-	node = cpu / NLM_CPUS_PER_NODE;
+	node = nlm_cpuid_to_node(cpu);
 	picbase = nlm_get_node(node)->picbase;
 
 	if (action & SMP_CALL_FUNCTION)
@@ -152,7 +152,7 @@
 	int cpu, node;
 
 	cpu = cpu_logical_map(logical_cpu);
-	node = cpu / NLM_CPUS_PER_NODE;
+	node = nlm_cpuid_to_node(logical_cpu);
 	nlm_next_sp = (unsigned long)__KSTK_TOS(idle);
 	nlm_next_gp = (unsigned long)task_thread_info(idle);
 
@@ -164,7 +164,7 @@
 void __init nlm_smp_setup(void)
 {
 	unsigned int boot_cpu;
-	int num_cpus, i, ncore;
+	int num_cpus, i, ncore, node;
 	volatile u32 *cpu_ready = nlm_get_boot_data(BOOT_CPU_READY);
 	char buf[64];
 
@@ -187,6 +187,8 @@
 			__cpu_number_map[i] = num_cpus;
 			__cpu_logical_map[num_cpus] = i;
 			set_cpu_possible(num_cpus, true);
+			node = nlm_cpuid_to_node(i);
+			cpumask_set_cpu(num_cpus, &nlm_get_node(node)->cpumask);
 			++num_cpus;
 		}
 	}
diff --git a/arch/mips/netlogic/common/smpboot.S b/arch/mips/netlogic/common/smpboot.S
index aa6cff0..8597657 100644
--- a/arch/mips/netlogic/common/smpboot.S
+++ b/arch/mips/netlogic/common/smpboot.S
@@ -32,7 +32,6 @@
  * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
-#include <linux/init.h>
 
 #include <asm/asm.h>
 #include <asm/asm-offsets.h>
@@ -98,7 +97,7 @@
  * In case of RMIboot bootloader which is used on XLR boards, the CPUs
  * be already woken up and waiting in bootloader code.
  * This will get them out of the bootloader code and into linux. Needed
- *  because the bootloader area will be taken and initialized by linux.
+ * because the bootloader area will be taken and initialized by linux.
  */
 NESTED(nlm_rmiboot_preboot, 16, sp)
 	mfc0	t0, $15, 1	/* read ebase */
@@ -133,6 +132,7 @@
 	or	t1, t2, v1	/* put in new value */
 	mtcr	t1, t0		/* update core control */
 
+	/* wait for NMI to hit */
 1:	wait
 	b	1b
 	nop
diff --git a/arch/mips/netlogic/dts/Makefile b/arch/mips/netlogic/dts/Makefile
index 0b9be5f..25c8e87 100644
--- a/arch/mips/netlogic/dts/Makefile
+++ b/arch/mips/netlogic/dts/Makefile
@@ -1,3 +1,4 @@
 obj-$(CONFIG_DT_XLP_EVP) := xlp_evp.dtb.o
 obj-$(CONFIG_DT_XLP_SVP) += xlp_svp.dtb.o
 obj-$(CONFIG_DT_XLP_FVP) += xlp_fvp.dtb.o
+obj-$(CONFIG_DT_XLP_GVP) += xlp_gvp.dtb.o
diff --git a/arch/mips/netlogic/dts/xlp_gvp.dts b/arch/mips/netlogic/dts/xlp_gvp.dts
new file mode 100644
index 0000000..047d27f
--- /dev/null
+++ b/arch/mips/netlogic/dts/xlp_gvp.dts
@@ -0,0 +1,76 @@
+/*
+ * XLP9XX Device Tree Source for GVP boards
+ */
+
+/dts-v1/;
+/ {
+	model = "netlogic,XLP-GVP";
+	compatible = "netlogic,xlp";
+	#address-cells = <2>;
+	#size-cells = <2>;
+
+	soc {
+		#address-cells = <2>;
+		#size-cells = <1>;
+		compatible = "simple-bus";
+		ranges = <0 0  0 0x18000000  0x04000000   // PCIe CFG
+			  1 0  0 0x16000000  0x02000000>; // GBU chipselects
+
+		serial0: serial@30000 {
+			device_type = "serial";
+			compatible = "ns16550";
+			reg = <0 0x112100 0xa00>;
+			reg-shift = <2>;
+			reg-io-width = <4>;
+			clock-frequency = <125000000>;
+			interrupt-parent = <&pic>;
+			interrupts = <17>;
+		};
+		pic: pic@4000 {
+			interrupt-controller;
+			#address-cells = <0>;
+			#interrupt-cells = <1>;
+			reg = <0 0x110000 0x200>;
+		};
+
+		nor_flash@1,0 {
+			compatible = "cfi-flash";
+			#address-cells = <1>;
+			#size-cells = <1>;
+			bank-width = <2>;
+			reg = <1 0 0x1000000>;
+
+			partition@0 {
+				label = "x-loader";
+				reg = <0x0 0x100000>; /* 1M */
+				read-only;
+			};
+
+			partition@100000 {
+				label = "u-boot";
+				reg = <0x100000 0x100000>; /* 1M */
+			};
+
+			partition@200000 {
+				label = "kernel";
+				reg = <0x200000 0x500000>; /* 5M */
+			};
+
+			partition@700000 {
+				label = "rootfs";
+				reg = <0x700000 0x800000>; /* 8M */
+			};
+
+			partition@f00000 {
+				label = "env";
+				reg = <0xf00000 0x100000>; /* 1M */
+				read-only;
+			};
+		};
+
+	};
+
+	chosen {
+		bootargs = "console=ttyS0,115200 rdinit=/sbin/init";
+	};
+};
diff --git a/arch/mips/netlogic/xlp/dt.c b/arch/mips/netlogic/xlp/dt.c
index 8316d54..5754097 100644
--- a/arch/mips/netlogic/xlp/dt.c
+++ b/arch/mips/netlogic/xlp/dt.c
@@ -42,13 +42,18 @@
 #include <asm/prom.h>
 
 extern u32 __dtb_xlp_evp_begin[], __dtb_xlp_svp_begin[],
-	__dtb_xlp_fvp_begin[], __dtb_start[];
+	__dtb_xlp_fvp_begin[], __dtb_xlp_gvp_begin[], __dtb_start[];
 static void *xlp_fdt_blob;
 
 void __init *xlp_dt_init(void *fdtp)
 {
 	if (!fdtp) {
 		switch (current_cpu_data.processor_id & 0xff00) {
+#ifdef CONFIG_DT_XLP_GVP
+		case PRID_IMP_NETLOGIC_XLP9XX:
+			fdtp = __dtb_xlp_gvp_begin;
+			break;
+#endif
 #ifdef CONFIG_DT_XLP_FVP
 		case PRID_IMP_NETLOGIC_XLP2XX:
 			fdtp = __dtb_xlp_fvp_begin;
diff --git a/arch/mips/netlogic/xlp/nlm_hal.c b/arch/mips/netlogic/xlp/nlm_hal.c
index 56c50ba..997cd9e 100644
--- a/arch/mips/netlogic/xlp/nlm_hal.c
+++ b/arch/mips/netlogic/xlp/nlm_hal.c
@@ -57,6 +57,10 @@
 	nodep->sysbase = nlm_get_sys_regbase(node);
 	nodep->picbase = nlm_get_pic_regbase(node);
 	nodep->ebase = read_c0_ebase() & (~((1 << 12) - 1));
+	if (cpu_is_xlp9xx())
+		nodep->socbus = xlp9xx_get_socbus(node);
+	else
+		nodep->socbus = 0;
 	spin_lock_init(&nodep->piclock);
 }
 
@@ -65,6 +69,26 @@
 	uint64_t pcibase;
 	int devoff, irt;
 
+	/* bypass for 9xx */
+	if (cpu_is_xlp9xx()) {
+		switch (irq) {
+		case PIC_9XX_XHCI_0_IRQ:
+			return 114;
+		case PIC_9XX_XHCI_1_IRQ:
+			return 115;
+		case PIC_UART_0_IRQ:
+			return 133;
+		case PIC_UART_1_IRQ:
+			return 134;
+		case PIC_PCIE_LINK_LEGACY_IRQ(0):
+		case PIC_PCIE_LINK_LEGACY_IRQ(1):
+		case PIC_PCIE_LINK_LEGACY_IRQ(2):
+		case PIC_PCIE_LINK_LEGACY_IRQ(3):
+			return 191 + irq - PIC_PCIE_LINK_LEGACY_IRQ_BASE;
+		}
+		return -1;
+	}
+
 	devoff = 0;
 	switch (irq) {
 	case PIC_UART_0_IRQ:
@@ -135,9 +159,17 @@
 		case PIC_I2C_3_IRQ:
 			irt = irt + 3; break;
 		}
-	} else if (irq >= PIC_PCIE_LINK_0_IRQ && irq <= PIC_PCIE_LINK_3_IRQ) {
+	} else if (irq >= PIC_PCIE_LINK_LEGACY_IRQ(0) &&
+			irq <= PIC_PCIE_LINK_LEGACY_IRQ(3)) {
 		/* HW bug, PCI IRT entries are bad on early silicon, fix */
-		irt = PIC_IRT_PCIE_LINK_INDEX(irq - PIC_PCIE_LINK_0_IRQ);
+		irt = PIC_IRT_PCIE_LINK_INDEX(irq -
+					PIC_PCIE_LINK_LEGACY_IRQ_BASE);
+	} else if (irq >= PIC_PCIE_LINK_MSI_IRQ(0) &&
+			irq <= PIC_PCIE_LINK_MSI_IRQ(3)) {
+		irt = -2;
+	} else if (irq >= PIC_PCIE_MSIX_IRQ(0) &&
+			irq <= PIC_PCIE_MSIX_IRQ(3)) {
+		irt = -2;
 	} else {
 		irt = -1;
 	}
@@ -151,7 +183,10 @@
 	uint64_t num, sysbase;
 
 	sysbase = nlm_get_node(node)->sysbase;
-	rstval = nlm_read_sys_reg(sysbase, SYS_POWER_ON_RESET_CFG);
+	if (cpu_is_xlp9xx())
+		rstval = nlm_read_sys_reg(sysbase, SYS_9XX_POWER_ON_RESET_CFG);
+	else
+		rstval = nlm_read_sys_reg(sysbase, SYS_POWER_ON_RESET_CFG);
 	if (cpu_is_xlpii()) {
 		num = 1000000ULL * (400 * 3 + 100 * (rstval >> 26));
 		denom = 3;
@@ -265,6 +300,10 @@
 
 unsigned int nlm_get_pic_frequency(int node)
 {
+	/* TODO Has to calculate freq as like 2xx */
+	if (cpu_is_xlp9xx())
+		return 250000000;
+
 	if (cpu_is_xlpii())
 		return nlm_2xx_get_pic_frequency(node);
 	else
@@ -284,21 +323,33 @@
 {
 	uint64_t bridgebase, base, lim;
 	uint32_t val;
+	unsigned int barreg, limreg, xlatreg;
 	int i, node, rv;
 
 	/* Look only at mapping on Node 0, we don't handle crazy configs */
 	bridgebase = nlm_get_bridge_regbase(0);
 	rv = 0;
 	for (i = 0; i < 8; i++) {
-		val = nlm_read_bridge_reg(bridgebase,
-					BRIDGE_DRAM_NODE_TRANSLN(i));
-		node = (val >> 1) & 0x3;
-		if (n >= 0 && n != node)
-			continue;
-		val = nlm_read_bridge_reg(bridgebase, BRIDGE_DRAM_BAR(i));
+		if (cpu_is_xlp9xx()) {
+			barreg = BRIDGE_9XX_DRAM_BAR(i);
+			limreg = BRIDGE_9XX_DRAM_LIMIT(i);
+			xlatreg = BRIDGE_9XX_DRAM_NODE_TRANSLN(i);
+		} else {
+			barreg = BRIDGE_DRAM_BAR(i);
+			limreg = BRIDGE_DRAM_LIMIT(i);
+			xlatreg = BRIDGE_DRAM_NODE_TRANSLN(i);
+		}
+		if (n >= 0) {
+			/* node specified, get node mapping of BAR */
+			val = nlm_read_bridge_reg(bridgebase, xlatreg);
+			node = (val >> 1) & 0x3;
+			if (n != node)
+				continue;
+		}
+		val = nlm_read_bridge_reg(bridgebase, barreg);
 		val = (val >>  12) & 0xfffff;
 		base = (uint64_t) val << 20;
-		val = nlm_read_bridge_reg(bridgebase, BRIDGE_DRAM_LIMIT(i));
+		val = nlm_read_bridge_reg(bridgebase, limreg);
 		val = (val >>  12) & 0xfffff;
 		if (val == 0)   /* BAR not used */
 			continue;
diff --git a/arch/mips/netlogic/xlp/setup.c b/arch/mips/netlogic/xlp/setup.c
index 54e75c7..8c60a2d 100644
--- a/arch/mips/netlogic/xlp/setup.c
+++ b/arch/mips/netlogic/xlp/setup.c
@@ -51,12 +51,16 @@
 struct nlm_soc_info nlm_nodes[NLM_NR_NODES];
 cpumask_t nlm_cpumask = CPU_MASK_CPU0;
 unsigned int nlm_threads_per_core;
+unsigned int xlp_cores_per_node;
 
 static void nlm_linux_exit(void)
 {
 	uint64_t sysbase = nlm_get_node(0)->sysbase;
 
-	nlm_write_sys_reg(sysbase, SYS_CHIP_RESET, 1);
+	if (cpu_is_xlp9xx())
+		nlm_write_sys_reg(sysbase, SYS_9XX_CHIP_RESET, 1);
+	else
+		nlm_write_sys_reg(sysbase, SYS_CHIP_RESET, 1);
 	for ( ; ; )
 		cpu_wait();
 }
@@ -92,6 +96,14 @@
 
 void __init plat_mem_setup(void)
 {
+#ifdef CONFIG_SMP
+	nlm_wakeup_secondary_cpus();
+
+	/* update TLB size after waking up threads */
+	current_cpu_data.tlbsize = ((read_c0_config6() >> 16) & 0xffff) + 1;
+
+	register_smp_ops(&nlm_smp_ops);
+#endif
 	_machine_restart = (void (*)(char *))nlm_linux_exit;
 	_machine_halt	= nlm_linux_exit;
 	pm_power_off	= nlm_linux_exit;
@@ -110,6 +122,7 @@
 const char *get_system_type(void)
 {
 	switch (read_c0_prid() & 0xff00) {
+	case PRID_IMP_NETLOGIC_XLP9XX:
 	case PRID_IMP_NETLOGIC_XLP2XX:
 		return "Broadcom XLPII Series";
 	default:
@@ -149,6 +162,10 @@
 	void *reset_vec;
 
 	nlm_io_base = CKSEG1ADDR(XLP_DEFAULT_IO_BASE);
+	if (cpu_is_xlp9xx())
+		xlp_cores_per_node = 32;
+	else
+		xlp_cores_per_node = 8;
 	nlm_init_boot_cpu();
 	xlp_mmu_init();
 	nlm_node_init(0);
@@ -162,11 +179,5 @@
 
 #ifdef CONFIG_SMP
 	cpumask_setall(&nlm_cpumask);
-	nlm_wakeup_secondary_cpus();
-
-	/* update TLB size after waking up threads */
-	current_cpu_data.tlbsize = ((read_c0_config6() >> 16) & 0xffff) + 1;
-
-	register_smp_ops(&nlm_smp_ops);
 #endif
 }
diff --git a/arch/mips/netlogic/xlp/usb-init-xlp2.c b/arch/mips/netlogic/xlp/usb-init-xlp2.c
index 36e9c22..17ade1c 100644
--- a/arch/mips/netlogic/xlp/usb-init-xlp2.c
+++ b/arch/mips/netlogic/xlp/usb-init-xlp2.c
@@ -37,6 +37,7 @@
 #include <linux/delay.h>
 #include <linux/init.h>
 #include <linux/pci.h>
+#include <linux/pci_ids.h>
 #include <linux/platform_device.h>
 #include <linux/irq.h>
 
@@ -83,12 +84,14 @@
 #define nlm_read_usb_reg(b, r)		nlm_read_reg(b, r)
 #define nlm_write_usb_reg(b, r, v)	nlm_write_reg(b, r, v)
 
-#define nlm_xlpii_get_usb_pcibase(node, inst)		\
-	nlm_pcicfg_base(XLP2XX_IO_USB_OFFSET(node, inst))
+#define nlm_xlpii_get_usb_pcibase(node, inst)			\
+			nlm_pcicfg_base(cpu_is_xlp9xx() ?	\
+			XLP9XX_IO_USB_OFFSET(node, inst) :	\
+			XLP2XX_IO_USB_OFFSET(node, inst))
 #define nlm_xlpii_get_usb_regbase(node, inst)		\
 	(nlm_xlpii_get_usb_pcibase(node, inst) + XLP_IO_PCI_HDRSZ)
 
-static void xlpii_usb_ack(struct irq_data *data)
+static void xlp2xx_usb_ack(struct irq_data *data)
 {
 	u64 port_addr;
 
@@ -109,6 +112,29 @@
 	nlm_write_usb_reg(port_addr, XLPII_USB3_INT_REG, 0xffffffff);
 }
 
+static void xlp9xx_usb_ack(struct irq_data *data)
+{
+	u64 port_addr;
+	int node, irq;
+
+	/* Find the node and irq on the node */
+	irq = data->irq % NLM_IRQS_PER_NODE;
+	node = data->irq / NLM_IRQS_PER_NODE;
+
+	switch (irq) {
+	case PIC_9XX_XHCI_0_IRQ:
+		port_addr = nlm_xlpii_get_usb_regbase(node, 1);
+		break;
+	case PIC_9XX_XHCI_1_IRQ:
+		port_addr = nlm_xlpii_get_usb_regbase(node, 2);
+		break;
+	default:
+		pr_err("No matching USB irq %d node  %d!\n", irq, node);
+		return;
+	}
+	nlm_write_usb_reg(port_addr, XLPII_USB3_INT_REG, 0xffffffff);
+}
+
 static void nlm_xlpii_usb_hw_reset(int node, int port)
 {
 	u64 port_addr, xhci_base, pci_base;
@@ -178,17 +204,33 @@
 
 static int __init nlm_platform_xlpii_usb_init(void)
 {
+	int node;
+
 	if (!cpu_is_xlpii())
 		return 0;
 
-	pr_info("Initializing 2XX USB Interface\n");
-	nlm_xlpii_usb_hw_reset(0, 1);
-	nlm_xlpii_usb_hw_reset(0, 2);
-	nlm_xlpii_usb_hw_reset(0, 3);
-	nlm_set_pic_extra_ack(0, PIC_2XX_XHCI_0_IRQ, xlpii_usb_ack);
-	nlm_set_pic_extra_ack(0, PIC_2XX_XHCI_1_IRQ, xlpii_usb_ack);
-	nlm_set_pic_extra_ack(0, PIC_2XX_XHCI_2_IRQ, xlpii_usb_ack);
+	if (!cpu_is_xlp9xx()) {
+		/* XLP 2XX single node */
+		pr_info("Initializing 2XX USB Interface\n");
+		nlm_xlpii_usb_hw_reset(0, 1);
+		nlm_xlpii_usb_hw_reset(0, 2);
+		nlm_xlpii_usb_hw_reset(0, 3);
+		nlm_set_pic_extra_ack(0, PIC_2XX_XHCI_0_IRQ, xlp2xx_usb_ack);
+		nlm_set_pic_extra_ack(0, PIC_2XX_XHCI_1_IRQ, xlp2xx_usb_ack);
+		nlm_set_pic_extra_ack(0, PIC_2XX_XHCI_2_IRQ, xlp2xx_usb_ack);
+		return 0;
+	}
 
+	/* XLP 9XX, multi-node */
+	pr_info("Initializing 9XX USB Interface\n");
+	for (node = 0; node < NLM_NR_NODES; node++) {
+		if (!nlm_node_present(node))
+			continue;
+		nlm_xlpii_usb_hw_reset(node, 1);
+		nlm_xlpii_usb_hw_reset(node, 2);
+		nlm_set_pic_extra_ack(node, PIC_9XX_XHCI_0_IRQ, xlp9xx_usb_ack);
+		nlm_set_pic_extra_ack(node, PIC_9XX_XHCI_1_IRQ, xlp9xx_usb_ack);
+	}
 	return 0;
 }
 
@@ -196,8 +238,26 @@
 
 static u64 xlp_usb_dmamask = ~(u32)0;
 
-/* Fixup IRQ for USB devices on XLP the SoC PCIe bus */
-static void nlm_usb_fixup_final(struct pci_dev *dev)
+/* Fixup the IRQ for USB devices which is exist on XLP9XX SOC PCIE bus */
+static void nlm_xlp9xx_usb_fixup_final(struct pci_dev *dev)
+{
+	int node;
+
+	node = xlp_socdev_to_node(dev);
+	dev->dev.dma_mask		= &xlp_usb_dmamask;
+	dev->dev.coherent_dma_mask	= DMA_BIT_MASK(32);
+	switch (dev->devfn) {
+	case 0x21:
+		dev->irq = nlm_irq_to_xirq(node, PIC_9XX_XHCI_0_IRQ);
+		break;
+	case 0x22:
+		dev->irq = nlm_irq_to_xirq(node, PIC_9XX_XHCI_1_IRQ);
+		break;
+	}
+}
+
+/* Fixup the IRQ for USB devices which is exist on XLP2XX SOC PCIE bus */
+static void nlm_xlp2xx_usb_fixup_final(struct pci_dev *dev)
 {
 	dev->dev.dma_mask		= &xlp_usb_dmamask;
 	dev->dev.coherent_dma_mask	= DMA_BIT_MASK(32);
@@ -214,5 +274,7 @@
 	}
 }
 
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_XLP9XX_XHCI,
+		nlm_xlp9xx_usb_fixup_final);
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_NETLOGIC, PCI_DEVICE_ID_NLM_XHCI,
-		nlm_usb_fixup_final);
+		nlm_xlp2xx_usb_fixup_final);
diff --git a/arch/mips/netlogic/xlp/wakeup.c b/arch/mips/netlogic/xlp/wakeup.c
index 682d563..9a92617 100644
--- a/arch/mips/netlogic/xlp/wakeup.c
+++ b/arch/mips/netlogic/xlp/wakeup.c
@@ -32,7 +32,6 @@
  * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
-#include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/threads.h>
 
@@ -47,14 +46,14 @@
 #include <asm/netlogic/mips-extns.h>
 
 #include <asm/netlogic/xlp-hal/iomap.h>
-#include <asm/netlogic/xlp-hal/pic.h>
 #include <asm/netlogic/xlp-hal/xlp.h>
+#include <asm/netlogic/xlp-hal/pic.h>
 #include <asm/netlogic/xlp-hal/sys.h>
 
 static int xlp_wakeup_core(uint64_t sysbase, int node, int core)
 {
 	uint32_t coremask, value;
-	int count;
+	int count, resetreg;
 
 	coremask = (1 << core);
 
@@ -65,12 +64,24 @@
 		nlm_write_sys_reg(sysbase, SYS_CORE_DFS_DIS_CTRL, value);
 	}
 
-	/* Remove CPU Reset */
-	value = nlm_read_sys_reg(sysbase, SYS_CPU_RESET);
-	value &= ~coremask;
-	nlm_write_sys_reg(sysbase, SYS_CPU_RESET, value);
+	/* On 9XX, mark coherent first */
+	if (cpu_is_xlp9xx()) {
+		value = nlm_read_sys_reg(sysbase, SYS_9XX_CPU_NONCOHERENT_MODE);
+		value &= ~coremask;
+		nlm_write_sys_reg(sysbase, SYS_9XX_CPU_NONCOHERENT_MODE, value);
+	}
 
-	/* Poll for CPU to mark itself coherent */
+	/* Remove CPU Reset */
+	resetreg = cpu_is_xlp9xx() ? SYS_9XX_CPU_RESET : SYS_CPU_RESET;
+	value = nlm_read_sys_reg(sysbase, resetreg);
+	value &= ~coremask;
+	nlm_write_sys_reg(sysbase, resetreg, value);
+
+	/* We are done on 9XX */
+	if (cpu_is_xlp9xx())
+		return 1;
+
+	/* Poll for CPU to mark itself coherent on other type of XLP */
 	count = 100000;
 	do {
 		value = nlm_read_sys_reg(sysbase, SYS_CPU_NONCOHERENT_MODE);
@@ -84,7 +95,7 @@
 	volatile uint32_t *cpu_ready = nlm_get_boot_data(BOOT_CPU_READY);
 	int i, count, notready;
 
-	count = 0x20000000;
+	count = 0x800000;
 	do {
 		notready = nlm_threads_per_core;
 		for (i = 0; i < nlm_threads_per_core; i++)
@@ -98,27 +109,62 @@
 static void xlp_enable_secondary_cores(const cpumask_t *wakeup_mask)
 {
 	struct nlm_soc_info *nodep;
-	uint64_t syspcibase;
-	uint32_t syscoremask;
+	uint64_t syspcibase, fusebase;
+	uint32_t syscoremask, mask, fusemask;
 	int core, n, cpu;
 
 	for (n = 0; n < NLM_NR_NODES; n++) {
-		syspcibase = nlm_get_sys_pcibase(n);
-		if (nlm_read_reg(syspcibase, 0) == 0xffffffff)
-			break;
-
-		/* read cores in reset from SYS */
-		if (n != 0)
+		if (n != 0) {
+			/* check if node exists and is online */
+			if (cpu_is_xlp9xx()) {
+				int b = xlp9xx_get_socbus(n);
+				pr_info("Node %d SoC PCI bus %d.\n", n, b);
+				if (b == 0)
+					break;
+			} else {
+				syspcibase = nlm_get_sys_pcibase(n);
+				if (nlm_read_reg(syspcibase, 0) == 0xffffffff)
+					break;
+			}
 			nlm_node_init(n);
-		nodep = nlm_get_node(n);
-		syscoremask = nlm_read_sys_reg(nodep->sysbase, SYS_CPU_RESET);
-		/* The boot cpu */
-		if (n == 0) {
-			syscoremask |= 1;
-			nodep->coremask = 1;
 		}
 
-		for (core = 0; core < NLM_CORES_PER_NODE; core++) {
+		/* read cores in reset from SYS */
+		nodep = nlm_get_node(n);
+
+		if (cpu_is_xlp9xx()) {
+			fusebase = nlm_get_fuse_regbase(n);
+			fusemask = nlm_read_reg(fusebase, FUSE_9XX_DEVCFG6);
+			mask = 0xfffff;
+		} else {
+			fusemask = nlm_read_sys_reg(nodep->sysbase,
+						SYS_EFUSE_DEVICE_CFG_STATUS0);
+			switch (read_c0_prid() & 0xff00) {
+			case PRID_IMP_NETLOGIC_XLP3XX:
+				mask = 0xf;
+				break;
+			case PRID_IMP_NETLOGIC_XLP2XX:
+				mask = 0x3;
+				break;
+			case PRID_IMP_NETLOGIC_XLP8XX:
+			default:
+				mask = 0xff;
+				break;
+			}
+		}
+
+		/*
+		 * Fused out cores are set in the fusemask, and the remaining
+		 * cores are renumbered to range 0 .. nactive-1
+		 */
+		syscoremask = (1 << hweight32(~fusemask & mask)) - 1;
+
+		/* The boot cpu */
+		if (n == 0)
+			nodep->coremask = 1;
+
+		pr_info("Node %d - SYS/FUSE coremask %x\n", n, syscoremask);
+		for (core = 0; core < nlm_cores_per_node(); core++) {
 			/* we will be on node 0 core 0 */
 			if (n == 0 && core == 0)
 				continue;
@@ -128,7 +174,7 @@
 				continue;
 
 			/* see if at least the first hw thread is enabled */
-			cpu = (n * NLM_CORES_PER_NODE + core)
+			cpu = (n * nlm_cores_per_node() + core)
 						* NLM_THREADS_PER_CORE;
 			if (!cpumask_test_cpu(cpu, wakeup_mask))
 				continue;
@@ -141,7 +187,8 @@
 			nodep->coremask |= 1u << core;
 
 			/* spin until the hw threads sets their ready */
-			wait_for_cpus(cpu, 0);
+			if (!wait_for_cpus(cpu, 0))
+				pr_err("Node %d : timeout core %d\n", n, core);
 		}
 	}
 }
@@ -153,7 +200,8 @@
 	 * first wakeup core 0 threads
 	 */
 	xlp_boot_core0_siblings();
-	wait_for_cpus(0, 0);
+	if (!wait_for_cpus(0, 0))
+		pr_err("Node 0 : timeout core 0\n");
 
 	/* now get other cores out of reset */
 	xlp_enable_secondary_cores(&nlm_cpumask);
diff --git a/arch/mips/netlogic/xlr/platform.c b/arch/mips/netlogic/xlr/platform.c
index 7b96a91..4785932 100644
--- a/arch/mips/netlogic/xlr/platform.c
+++ b/arch/mips/netlogic/xlr/platform.c
@@ -23,7 +23,7 @@
 #include <asm/netlogic/xlr/pic.h>
 #include <asm/netlogic/xlr/xlr.h>
 
-unsigned int nlm_xlr_uart_in(struct uart_port *p, int offset)
+static unsigned int nlm_xlr_uart_in(struct uart_port *p, int offset)
 {
 	uint64_t uartbase;
 	unsigned int value;
@@ -41,7 +41,7 @@
 	return value;
 }
 
-void nlm_xlr_uart_out(struct uart_port *p, int offset, int value)
+static void nlm_xlr_uart_out(struct uart_port *p, int offset, int value)
 {
 	uint64_t uartbase;
 
diff --git a/arch/mips/netlogic/xlr/setup.c b/arch/mips/netlogic/xlr/setup.c
index 921be5f..d118b9a 100644
--- a/arch/mips/netlogic/xlr/setup.c
+++ b/arch/mips/netlogic/xlr/setup.c
@@ -60,25 +60,6 @@
 struct nlm_soc_info nlm_nodes[NLM_NR_NODES];
 cpumask_t nlm_cpumask = CPU_MASK_CPU0;
 
-static void __init nlm_early_serial_setup(void)
-{
-	struct uart_port s;
-	unsigned long uart_base;
-
-	uart_base = (unsigned long)nlm_mmio_base(NETLOGIC_IO_UART_0_OFFSET);
-	memset(&s, 0, sizeof(s));
-	s.flags		= ASYNC_BOOT_AUTOCONF | ASYNC_SKIP_TEST;
-	s.iotype	= UPIO_MEM32;
-	s.regshift	= 2;
-	s.irq		= PIC_UART_0_IRQ;
-	s.uartclk	= PIC_CLK_HZ;
-	s.serial_in	= nlm_xlr_uart_in;
-	s.serial_out	= nlm_xlr_uart_out;
-	s.mapbase	= uart_base;
-	s.membase	= (unsigned char __iomem *)uart_base;
-	early_serial_setup(&s);
-}
-
 static void nlm_linux_exit(void)
 {
 	uint64_t gpiobase;
@@ -214,7 +195,6 @@
 	memcpy(reset_vec, (void *)nlm_reset_entry,
 			(nlm_reset_entry_end - nlm_reset_entry));
 
-	nlm_early_serial_setup();
 	build_arcs_cmdline(argv);
 	prom_add_memory();
 
diff --git a/arch/mips/netlogic/xlr/wakeup.c b/arch/mips/netlogic/xlr/wakeup.c
index 9fb81fa..d61cba1 100644
--- a/arch/mips/netlogic/xlr/wakeup.c
+++ b/arch/mips/netlogic/xlr/wakeup.c
@@ -32,7 +32,6 @@
  * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
-#include <linux/init.h>
 #include <linux/delay.h>
 #include <linux/threads.h>
 
@@ -70,7 +69,7 @@
 
 	/* Fill up the coremask early */
 	nodep->coremask = 1;
-	for (i = 1; i < NLM_CORES_PER_NODE; i++) {
+	for (i = 1; i < nlm_cores_per_node(); i++) {
 		for (j = 1000000; j > 0; j--) {
 			if (cpu_ready[i * NLM_THREADS_PER_CORE])
 				break;
diff --git a/arch/mips/oprofile/common.c b/arch/mips/oprofile/common.c
index 4d1736f..2a86e38 100644
--- a/arch/mips/oprofile/common.c
+++ b/arch/mips/oprofile/common.c
@@ -86,6 +86,8 @@
 	case CPU_34K:
 	case CPU_1004K:
 	case CPU_74K:
+	case CPU_INTERAPTIV:
+	case CPU_PROAPTIV:
 	case CPU_LOONGSON1:
 	case CPU_SB1:
 	case CPU_SB1A:
diff --git a/arch/mips/oprofile/op_model_mipsxx.c b/arch/mips/oprofile/op_model_mipsxx.c
index 3a2b6e9..4d94d75 100644
--- a/arch/mips/oprofile/op_model_mipsxx.c
+++ b/arch/mips/oprofile/op_model_mipsxx.c
@@ -376,6 +376,14 @@
 		op_model_mipsxx_ops.cpu_type = "mips/74K";
 		break;
 
+	case CPU_INTERAPTIV:
+		op_model_mipsxx_ops.cpu_type = "mips/interAptiv";
+		break;
+
+	case CPU_PROAPTIV:
+		op_model_mipsxx_ops.cpu_type = "mips/proAptiv";
+		break;
+
 	case CPU_5KC:
 		op_model_mipsxx_ops.cpu_type = "mips/5K";
 		break;
diff --git a/arch/mips/pci/Makefile b/arch/mips/pci/Makefile
index 719e455..137f2a6 100644
--- a/arch/mips/pci/Makefile
+++ b/arch/mips/pci/Makefile
@@ -60,4 +60,5 @@
 
 ifdef CONFIG_PCI_MSI
 obj-$(CONFIG_CAVIUM_OCTEON_SOC) += msi-octeon.o
+obj-$(CONFIG_CPU_XLP)		+= msi-xlp.o
 endif
diff --git a/arch/mips/pci/fixup-malta.c b/arch/mips/pci/fixup-malta.c
index df36e23..7a0eda7 100644
--- a/arch/mips/pci/fixup-malta.c
+++ b/arch/mips/pci/fixup-malta.c
@@ -54,6 +54,7 @@
 static void malta_piix_func0_fixup(struct pci_dev *pdev)
 {
 	unsigned char reg_val;
+	u32 reg_val32;
 	/* PIIX PIRQC[A:D] irq mappings */
 	static int piixirqmap[PIIX4_FUNC0_PIRQRC_IRQ_ROUTING_MAX] = {
 		0,  0,	0,  3,
@@ -83,6 +84,16 @@
 		pci_write_config_byte(pdev, PIIX4_FUNC0_TOM, reg_val |
 				PIIX4_FUNC0_TOM_TOP_OF_MEMORY_MASK);
 	}
+
+	/* Mux SERIRQ to its pin */
+	pci_read_config_dword(pdev, PIIX4_FUNC0_GENCFG, &reg_val32);
+	pci_write_config_dword(pdev, PIIX4_FUNC0_GENCFG,
+			       reg_val32 | PIIX4_FUNC0_GENCFG_SERIRQ);
+
+	/* Enable SERIRQ */
+	pci_read_config_byte(pdev, PIIX4_FUNC0_SERIRQC, &reg_val);
+	reg_val |= PIIX4_FUNC0_SERIRQC_EN | PIIX4_FUNC0_SERIRQC_CONT;
+	pci_write_config_byte(pdev, PIIX4_FUNC0_SERIRQC, reg_val);
 }
 
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82371AB_0,
diff --git a/arch/mips/pci/fixup-rc32434.c b/arch/mips/pci/fixup-rc32434.c
index d0f6ecb..7fcafd5 100644
--- a/arch/mips/pci/fixup-rc32434.c
+++ b/arch/mips/pci/fixup-rc32434.c
@@ -27,7 +27,6 @@
 #include <linux/types.h>
 #include <linux/pci.h>
 #include <linux/kernel.h>
-#include <linux/init.h>
 
 #include <asm/mach-rc32434/rc32434.h>
 #include <asm/mach-rc32434/irq.h>
diff --git a/arch/mips/pci/fixup-sb1250.c b/arch/mips/pci/fixup-sb1250.c
index 1441bec..8feae91 100644
--- a/arch/mips/pci/fixup-sb1250.c
+++ b/arch/mips/pci/fixup-sb1250.c
@@ -8,7 +8,6 @@
  *	2 of the License, or (at your option) any later version.
  */
 
-#include <linux/init.h>
 #include <linux/pci.h>
 
 /*
diff --git a/arch/mips/pci/msi-xlp.c b/arch/mips/pci/msi-xlp.c
new file mode 100644
index 0000000..afd8405
--- /dev/null
+++ b/arch/mips/pci/msi-xlp.c
@@ -0,0 +1,494 @@
+/*
+ * Copyright (c) 2003-2012 Broadcom Corporation
+ * All Rights Reserved
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the Broadcom
+ * license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY BROADCOM ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL BROADCOM OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/msi.h>
+#include <linux/mm.h>
+#include <linux/irq.h>
+#include <linux/irqdesc.h>
+#include <linux/console.h>
+
+#include <asm/io.h>
+
+#include <asm/netlogic/interrupt.h>
+#include <asm/netlogic/haldefs.h>
+#include <asm/netlogic/common.h>
+#include <asm/netlogic/mips-extns.h>
+
+#include <asm/netlogic/xlp-hal/iomap.h>
+#include <asm/netlogic/xlp-hal/xlp.h>
+#include <asm/netlogic/xlp-hal/pic.h>
+#include <asm/netlogic/xlp-hal/pcibus.h>
+#include <asm/netlogic/xlp-hal/bridge.h>
+
+#define XLP_MSIVEC_PER_LINK	32
+#define XLP_MSIXVEC_TOTAL	32
+#define XLP_MSIXVEC_PER_LINK	8
+
+/* 128 MSI irqs per node, mapped starting at NLM_MSI_VEC_BASE */
+static inline int nlm_link_msiirq(int link, int msivec)
+{
+	return NLM_MSI_VEC_BASE + link * XLP_MSIVEC_PER_LINK + msivec;
+}
+
+static inline int nlm_irq_msivec(int irq)
+{
+	return irq % XLP_MSIVEC_PER_LINK;
+}
+
+static inline int nlm_irq_msilink(int irq)
+{
+	return (irq % (XLP_MSIVEC_PER_LINK * PCIE_NLINKS)) /
+						XLP_MSIVEC_PER_LINK;
+}
+
+/*
+ * Only 32 MSI-X vectors are possible because there are only 32 PIC
+ * interrupts for MSI. We split them statically and use 8 MSI-X vectors
+ * per link - this keeps the allocation and lookup simple.
+ */
+static inline int nlm_link_msixirq(int link, int bit)
+{
+	return NLM_MSIX_VEC_BASE + link * XLP_MSIXVEC_PER_LINK + bit;
+}
+
+static inline int nlm_irq_msixvec(int irq)
+{
+	return irq % XLP_MSIXVEC_TOTAL;  /* works when given xirq */
+}
+
+static inline int nlm_irq_msixlink(int irq)
+{
+	return nlm_irq_msixvec(irq) / XLP_MSIXVEC_PER_LINK;
+}
+
+/*
+ * Per link MSI and MSI-X information, set as IRQ handler data for
+ * MSI and MSI-X interrupts.
+ */
+struct xlp_msi_data {
+	struct nlm_soc_info *node;
+	uint64_t	lnkbase;
+	uint32_t	msi_enabled_mask;
+	uint32_t	msi_alloc_mask;
+	uint32_t	msix_alloc_mask;
+	spinlock_t	msi_lock;
+};
+
+/*
+ * MSI Chip definitions
+ *
+ * On XLP, there is a PIC interrupt associated with each PCIe link on the
+ * chip (which appears as a PCI bridge to us). This gives us 32 MSI irqa
+ * per link and 128 overall.
+ *
+ * When a device connected to the link raises a MSI interrupt, we get a
+ * link interrupt and we then have to look at PCIE_MSI_STATUS register at
+ * the bridge to map it to the IRQ
+ */
+static void xlp_msi_enable(struct irq_data *d)
+{
+	struct xlp_msi_data *md = irq_data_get_irq_handler_data(d);
+	unsigned long flags;
+	int vec;
+
+	vec = nlm_irq_msivec(d->irq);
+	spin_lock_irqsave(&md->msi_lock, flags);
+	md->msi_enabled_mask |= 1u << vec;
+	nlm_write_reg(md->lnkbase, PCIE_MSI_EN, md->msi_enabled_mask);
+	spin_unlock_irqrestore(&md->msi_lock, flags);
+}
+
+static void xlp_msi_disable(struct irq_data *d)
+{
+	struct xlp_msi_data *md = irq_data_get_irq_handler_data(d);
+	unsigned long flags;
+	int vec;
+
+	vec = nlm_irq_msivec(d->irq);
+	spin_lock_irqsave(&md->msi_lock, flags);
+	md->msi_enabled_mask &= ~(1u << vec);
+	nlm_write_reg(md->lnkbase, PCIE_MSI_EN, md->msi_enabled_mask);
+	spin_unlock_irqrestore(&md->msi_lock, flags);
+}
+
+static void xlp_msi_mask_ack(struct irq_data *d)
+{
+	struct xlp_msi_data *md = irq_data_get_irq_handler_data(d);
+	int link, vec;
+
+	link = nlm_irq_msilink(d->irq);
+	vec = nlm_irq_msivec(d->irq);
+	xlp_msi_disable(d);
+
+	/* Ack MSI on bridge */
+	nlm_write_reg(md->lnkbase, PCIE_MSI_STATUS, 1u << vec);
+
+	/* Ack at eirr and PIC */
+	ack_c0_eirr(PIC_PCIE_LINK_MSI_IRQ(link));
+	nlm_pic_ack(md->node->picbase, PIC_IRT_PCIE_LINK_INDEX(link));
+}
+
+static struct irq_chip xlp_msi_chip = {
+	.name		= "XLP-MSI",
+	.irq_enable	= xlp_msi_enable,
+	.irq_disable	= xlp_msi_disable,
+	.irq_mask_ack	= xlp_msi_mask_ack,
+	.irq_unmask	= xlp_msi_enable,
+};
+
+/*
+ * The MSI-X interrupt handling is different from MSI, there are 32
+ * MSI-X interrupts generated by the PIC and each of these correspond
+ * to a MSI-X vector (0-31) that can be assigned.
+ *
+ * We divide the MSI-X vectors to 8 per link and do a per-link
+ * allocation
+ *
+ * Enable and disable done using standard MSI functions.
+ */
+static void xlp_msix_mask_ack(struct irq_data *d)
+{
+	struct xlp_msi_data *md = irq_data_get_irq_handler_data(d);
+	int link, msixvec;
+
+	msixvec = nlm_irq_msixvec(d->irq);
+	link = nlm_irq_msixlink(d->irq);
+	mask_msi_irq(d);
+
+	/* Ack MSI on bridge */
+	nlm_write_reg(md->lnkbase, PCIE_MSIX_STATUS, 1u << msixvec);
+
+	/* Ack at eirr and PIC */
+	ack_c0_eirr(PIC_PCIE_MSIX_IRQ(link));
+	nlm_pic_ack(md->node->picbase, PIC_IRT_PCIE_MSIX_INDEX(msixvec));
+}
+
+static struct irq_chip xlp_msix_chip = {
+	.name		= "XLP-MSIX",
+	.irq_enable	= unmask_msi_irq,
+	.irq_disable	= mask_msi_irq,
+	.irq_mask_ack	= xlp_msix_mask_ack,
+	.irq_unmask	= unmask_msi_irq,
+};
+
+void destroy_irq(unsigned int irq)
+{
+	    /* nothing to do yet */
+}
+
+void arch_teardown_msi_irq(unsigned int irq)
+{
+	destroy_irq(irq);
+}
+
+/*
+ * Setup a PCIe link for MSI.  By default, the links are in
+ * legacy interrupt mode.  We will switch them to MSI mode
+ * at the first MSI request.
+ */
+static void xlp_config_link_msi(uint64_t lnkbase, int lirq, uint64_t msiaddr)
+{
+	u32 val;
+
+	val = nlm_read_reg(lnkbase, PCIE_INT_EN0);
+	if ((val & 0x200) == 0) {
+		val |= 0x200;		/* MSI Interrupt enable */
+		nlm_write_reg(lnkbase, PCIE_INT_EN0, val);
+	}
+
+	val = nlm_read_reg(lnkbase, 0x1);	/* CMD */
+	if ((val & 0x0400) == 0) {
+		val |= 0x0400;
+		nlm_write_reg(lnkbase, 0x1, val);
+	}
+
+	/* Update IRQ in the PCI irq reg */
+	val = nlm_read_pci_reg(lnkbase, 0xf);
+	val &= ~0x1fu;
+	val |= (1 << 8) | lirq;
+	nlm_write_pci_reg(lnkbase, 0xf, val);
+
+	/* MSI addr */
+	nlm_write_reg(lnkbase, PCIE_BRIDGE_MSI_ADDRH, msiaddr >> 32);
+	nlm_write_reg(lnkbase, PCIE_BRIDGE_MSI_ADDRL, msiaddr & 0xffffffff);
+
+	/* MSI cap for bridge */
+	val = nlm_read_reg(lnkbase, PCIE_BRIDGE_MSI_CAP);
+	if ((val & (1 << 16)) == 0) {
+		val |= 0xb << 16;		/* mmc32, msi enable */
+		nlm_write_reg(lnkbase, PCIE_BRIDGE_MSI_CAP, val);
+	}
+}
+
+/*
+ * Allocate a MSI vector on a link
+ */
+static int xlp_setup_msi(uint64_t lnkbase, int node, int link,
+	struct msi_desc *desc)
+{
+	struct xlp_msi_data *md;
+	struct msi_msg msg;
+	unsigned long flags;
+	int msivec, irt, lirq, xirq, ret;
+	uint64_t msiaddr;
+
+	/* Get MSI data for the link */
+	lirq = PIC_PCIE_LINK_MSI_IRQ(link);
+	xirq = nlm_irq_to_xirq(node, nlm_link_msiirq(link, 0));
+	md = irq_get_handler_data(xirq);
+	msiaddr = MSI_LINK_ADDR(node, link);
+
+	spin_lock_irqsave(&md->msi_lock, flags);
+	if (md->msi_alloc_mask == 0) {
+		/* switch the link IRQ to MSI range */
+		xlp_config_link_msi(lnkbase, lirq, msiaddr);
+		irt = PIC_IRT_PCIE_LINK_INDEX(link);
+		nlm_setup_pic_irq(node, lirq, lirq, irt);
+		nlm_pic_init_irt(nlm_get_node(node)->picbase, irt, lirq,
+				 node * nlm_threads_per_node(), 1 /*en */);
+	}
+
+	/* allocate a MSI vec, and tell the bridge about it */
+	msivec = fls(md->msi_alloc_mask);
+	if (msivec == XLP_MSIVEC_PER_LINK) {
+		spin_unlock_irqrestore(&md->msi_lock, flags);
+		return -ENOMEM;
+	}
+	md->msi_alloc_mask |= (1u << msivec);
+	spin_unlock_irqrestore(&md->msi_lock, flags);
+
+	msg.address_hi = msiaddr >> 32;
+	msg.address_lo = msiaddr & 0xffffffff;
+	msg.data = 0xc00 | msivec;
+
+	xirq = xirq + msivec;		/* msi mapped to global irq space */
+	ret = irq_set_msi_desc(xirq, desc);
+	if (ret < 0) {
+		destroy_irq(xirq);
+		return ret;
+	}
+
+	write_msi_msg(xirq, &msg);
+	return 0;
+}
+
+/*
+ * Switch a link to MSI-X mode
+ */
+static void xlp_config_link_msix(uint64_t lnkbase, int lirq, uint64_t msixaddr)
+{
+	u32 val;
+
+	val = nlm_read_reg(lnkbase, 0x2C);
+	if ((val & 0x80000000U) == 0) {
+		val |= 0x80000000U;
+		nlm_write_reg(lnkbase, 0x2C, val);
+	}
+	val = nlm_read_reg(lnkbase, PCIE_INT_EN0);
+	if ((val & 0x200) == 0) {
+		val |= 0x200;		/* MSI Interrupt enable */
+		nlm_write_reg(lnkbase, PCIE_INT_EN0, val);
+	}
+
+	val = nlm_read_reg(lnkbase, 0x1);	/* CMD */
+	if ((val & 0x0400) == 0) {
+		val |= 0x0400;
+		nlm_write_reg(lnkbase, 0x1, val);
+	}
+
+	/* Update IRQ in the PCI irq reg */
+	val = nlm_read_pci_reg(lnkbase, 0xf);
+	val &= ~0x1fu;
+	val |= (1 << 8) | lirq;
+	nlm_write_pci_reg(lnkbase, 0xf, val);
+
+	/* MSI-X addresses */
+	nlm_write_reg(lnkbase, PCIE_BRIDGE_MSIX_ADDR_BASE, msixaddr >> 8);
+	nlm_write_reg(lnkbase, PCIE_BRIDGE_MSIX_ADDR_LIMIT,
+					(msixaddr + MSI_ADDR_SZ) >> 8);
+}
+
+/*
+ *  Allocate a MSI-X vector
+ */
+static int xlp_setup_msix(uint64_t lnkbase, int node, int link,
+	struct msi_desc *desc)
+{
+	struct xlp_msi_data *md;
+	struct msi_msg msg;
+	unsigned long flags;
+	int t, msixvec, lirq, xirq, ret;
+	uint64_t msixaddr;
+
+	/* Get MSI data for the link */
+	lirq = PIC_PCIE_MSIX_IRQ(link);
+	xirq = nlm_irq_to_xirq(node, nlm_link_msixirq(link, 0));
+	md = irq_get_handler_data(xirq);
+	msixaddr = MSIX_LINK_ADDR(node, link);
+
+	spin_lock_irqsave(&md->msi_lock, flags);
+	/* switch the PCIe link to MSI-X mode at the first alloc */
+	if (md->msix_alloc_mask == 0)
+		xlp_config_link_msix(lnkbase, lirq, msixaddr);
+
+	/* allocate a MSI-X vec, and tell the bridge about it */
+	t = fls(md->msix_alloc_mask);
+	if (t == XLP_MSIXVEC_PER_LINK) {
+		spin_unlock_irqrestore(&md->msi_lock, flags);
+		return -ENOMEM;
+	}
+	md->msix_alloc_mask |= (1u << t);
+	spin_unlock_irqrestore(&md->msi_lock, flags);
+
+	xirq += t;
+	msixvec = nlm_irq_msixvec(xirq);
+	msg.address_hi = msixaddr >> 32;
+	msg.address_lo = msixaddr & 0xffffffff;
+	msg.data = 0xc00 | msixvec;
+
+	ret = irq_set_msi_desc(xirq, desc);
+	if (ret < 0) {
+		destroy_irq(xirq);
+		return ret;
+	}
+
+	write_msi_msg(xirq, &msg);
+	return 0;
+}
+
+int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
+{
+	struct pci_dev *lnkdev;
+	uint64_t lnkbase;
+	int node, link, slot;
+
+	lnkdev = xlp_get_pcie_link(dev);
+	if (lnkdev == NULL) {
+		dev_err(&dev->dev, "Could not find bridge\n");
+		return 1;
+	}
+	slot = PCI_SLOT(lnkdev->devfn);
+	link = PCI_FUNC(lnkdev->devfn);
+	node = slot / 8;
+	lnkbase = nlm_get_pcie_base(node, link);
+
+	if (desc->msi_attrib.is_msix)
+		return xlp_setup_msix(lnkbase, node, link, desc);
+	else
+		return xlp_setup_msi(lnkbase, node, link, desc);
+}
+
+void __init xlp_init_node_msi_irqs(int node, int link)
+{
+	struct nlm_soc_info *nodep;
+	struct xlp_msi_data *md;
+	int irq, i, irt, msixvec;
+
+	pr_info("[%d %d] Init node PCI IRT\n", node, link);
+	nodep = nlm_get_node(node);
+
+	/* Alloc an MSI block for the link */
+	md = kzalloc(sizeof(*md), GFP_KERNEL);
+	spin_lock_init(&md->msi_lock);
+	md->msi_enabled_mask = 0;
+	md->msi_alloc_mask = 0;
+	md->msix_alloc_mask = 0;
+	md->node = nodep;
+	md->lnkbase = nlm_get_pcie_base(node, link);
+
+	/* extended space for MSI interrupts */
+	irq = nlm_irq_to_xirq(node, nlm_link_msiirq(link, 0));
+	for (i = irq; i < irq + XLP_MSIVEC_PER_LINK; i++) {
+		irq_set_chip_and_handler(i, &xlp_msi_chip, handle_level_irq);
+		irq_set_handler_data(i, md);
+	}
+
+	for (i = 0; i < XLP_MSIXVEC_PER_LINK; i++) {
+		/* Initialize MSI-X irts to generate one interrupt per link */
+		msixvec = link * XLP_MSIXVEC_PER_LINK + i;
+		irt = PIC_IRT_PCIE_MSIX_INDEX(msixvec);
+		nlm_pic_init_irt(nodep->picbase, irt, PIC_PCIE_MSIX_IRQ(link),
+			node * nlm_threads_per_node(), 1 /* enable */);
+
+		/* Initialize MSI-X extended irq space for the link  */
+		irq = nlm_irq_to_xirq(node, nlm_link_msixirq(link, i));
+		irq_set_chip_and_handler(irq, &xlp_msix_chip, handle_level_irq);
+		irq_set_handler_data(irq, md);
+	}
+
+}
+
+void nlm_dispatch_msi(int node, int lirq)
+{
+	struct xlp_msi_data *md;
+	int link, i, irqbase;
+	u32 status;
+
+	link = lirq - PIC_PCIE_LINK_MSI_IRQ_BASE;
+	irqbase = nlm_irq_to_xirq(node, nlm_link_msiirq(link, 0));
+	md = irq_get_handler_data(irqbase);
+	status = nlm_read_reg(md->lnkbase, PCIE_MSI_STATUS) &
+						md->msi_enabled_mask;
+	while (status) {
+		i = __ffs(status);
+		do_IRQ(irqbase + i);
+		status &= status - 1;
+	}
+}
+
+void nlm_dispatch_msix(int node, int lirq)
+{
+	struct xlp_msi_data *md;
+	int link, i, irqbase;
+	u32 status;
+
+	link = lirq - PIC_PCIE_MSIX_IRQ_BASE;
+	irqbase = nlm_irq_to_xirq(node, nlm_link_msixirq(link, 0));
+	md = irq_get_handler_data(irqbase);
+	status = nlm_read_reg(md->lnkbase, PCIE_MSIX_STATUS);
+
+	/* narrow it down to the MSI-x vectors for our link */
+	status = (status >> (link * XLP_MSIXVEC_PER_LINK)) &
+			((1 << XLP_MSIXVEC_PER_LINK) - 1);
+
+	while (status) {
+		i = __ffs(status);
+		do_IRQ(irqbase + i);
+		status &= status - 1;
+	}
+}
diff --git a/arch/mips/pci/ops-bcm63xx.c b/arch/mips/pci/ops-bcm63xx.c
index 6144bb3..13eea69 100644
--- a/arch/mips/pci/ops-bcm63xx.c
+++ b/arch/mips/pci/ops-bcm63xx.c
@@ -9,7 +9,6 @@
 #include <linux/types.h>
 #include <linux/pci.h>
 #include <linux/kernel.h>
-#include <linux/init.h>
 #include <linux/delay.h>
 #include <linux/io.h>
 
diff --git a/arch/mips/pci/ops-bonito64.c b/arch/mips/pci/ops-bonito64.c
index 830352e..c06205a 100644
--- a/arch/mips/pci/ops-bonito64.c
+++ b/arch/mips/pci/ops-bonito64.c
@@ -22,7 +22,6 @@
 #include <linux/types.h>
 #include <linux/pci.h>
 #include <linux/kernel.h>
-#include <linux/init.h>
 
 #include <asm/mips-boards/bonito64.h>
 
diff --git a/arch/mips/pci/ops-lantiq.c b/arch/mips/pci/ops-lantiq.c
index 16e7c25..e5738ee 100644
--- a/arch/mips/pci/ops-lantiq.c
+++ b/arch/mips/pci/ops-lantiq.c
@@ -9,7 +9,6 @@
 #include <linux/types.h>
 #include <linux/pci.h>
 #include <linux/kernel.h>
-#include <linux/init.h>
 #include <linux/delay.h>
 #include <linux/mm.h>
 #include <asm/addrspace.h>
diff --git a/arch/mips/pci/ops-loongson2.c b/arch/mips/pci/ops-loongson2.c
index 98254af..24138bb 100644
--- a/arch/mips/pci/ops-loongson2.c
+++ b/arch/mips/pci/ops-loongson2.c
@@ -14,7 +14,6 @@
 #include <linux/types.h>
 #include <linux/pci.h>
 #include <linux/kernel.h>
-#include <linux/init.h>
 #include <linux/export.h>
 
 #include <loongson.h>
diff --git a/arch/mips/pci/ops-mace.c b/arch/mips/pci/ops-mace.c
index 1cfb558..6b5821f 100644
--- a/arch/mips/pci/ops-mace.c
+++ b/arch/mips/pci/ops-mace.c
@@ -6,7 +6,6 @@
  * Copyright (C) 2000, 2001 Keith M Wesolowski
  */
 #include <linux/kernel.h>
-#include <linux/init.h>
 #include <linux/pci.h>
 #include <linux/types.h>
 #include <asm/pci.h>
diff --git a/arch/mips/pci/ops-msc.c b/arch/mips/pci/ops-msc.c
index 92a8543..dbbf365 100644
--- a/arch/mips/pci/ops-msc.c
+++ b/arch/mips/pci/ops-msc.c
@@ -24,7 +24,6 @@
 #include <linux/types.h>
 #include <linux/pci.h>
 #include <linux/kernel.h>
-#include <linux/init.h>
 
 #include <asm/mips-boards/msc01_pci.h>
 
diff --git a/arch/mips/pci/ops-nile4.c b/arch/mips/pci/ops-nile4.c
index 499e35c..a1a7c9f 100644
--- a/arch/mips/pci/ops-nile4.c
+++ b/arch/mips/pci/ops-nile4.c
@@ -1,5 +1,4 @@
 #include <linux/kernel.h>
-#include <linux/init.h>
 #include <linux/pci.h>
 #include <asm/bootinfo.h>
 
diff --git a/arch/mips/pci/ops-rc32434.c b/arch/mips/pci/ops-rc32434.c
index 7c7182e..874ed6d 100644
--- a/arch/mips/pci/ops-rc32434.c
+++ b/arch/mips/pci/ops-rc32434.c
@@ -26,7 +26,6 @@
  *  675 Mass Ave, Cambridge, MA 02139, USA.
  */
 #include <linux/delay.h>
-#include <linux/init.h>
 #include <linux/io.h>
 #include <linux/pci.h>
 #include <linux/types.h>
diff --git a/arch/mips/pci/pci-ip27.c b/arch/mips/pci/pci-ip27.c
index 162b4cb..0f09eaf 100644
--- a/arch/mips/pci/pci-ip27.c
+++ b/arch/mips/pci/pci-ip27.c
@@ -7,7 +7,6 @@
  * Copyright (C) 1999, 2000, 04 Ralf Baechle (ralf@linux-mips.org)
  * Copyright (C) 1999, 2000 Silicon Graphics, Inc.
  */
-#include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/export.h>
 #include <linux/pci.h>
diff --git a/arch/mips/pci/pci-malta.c b/arch/mips/pci/pci-malta.c
index 37134dd..f1a7389 100644
--- a/arch/mips/pci/pci-malta.c
+++ b/arch/mips/pci/pci-malta.c
@@ -241,9 +241,9 @@
 		return;
 	}
 
-	/* Change start address to avoid conflicts with ACPI and SMB devices */
-	if (controller->io_resource->start < 0x00002000UL)
-		controller->io_resource->start = 0x00002000UL;
+	/* PIIX4 ACPI starts at 0x1000 */
+	if (controller->io_resource->start < 0x00001000UL)
+		controller->io_resource->start = 0x00001000UL;
 
 	iomem_resource.end &= 0xfffffffffULL;			/* 64 GB */
 	ioport_resource.end = controller->io_resource->end;
diff --git a/arch/mips/pci/pci-rt3883.c b/arch/mips/pci/pci-rt3883.c
index adeff2b..72919ae 100644
--- a/arch/mips/pci/pci-rt3883.c
+++ b/arch/mips/pci/pci-rt3883.c
@@ -436,9 +436,6 @@
 		return -ENOMEM;
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!res)
-		return -EINVAL;
-
 	rpc->base = devm_ioremap_resource(dev, res);
 	if (IS_ERR(rpc->base))
 		return PTR_ERR(rpc->base);
diff --git a/arch/mips/pci/pci-xlp.c b/arch/mips/pci/pci-xlp.c
index 653d2db..7babf01 100644
--- a/arch/mips/pci/pci-xlp.c
+++ b/arch/mips/pci/pci-xlp.c
@@ -47,10 +47,11 @@
 #include <asm/netlogic/interrupt.h>
 #include <asm/netlogic/haldefs.h>
 #include <asm/netlogic/common.h>
+#include <asm/netlogic/mips-extns.h>
 
 #include <asm/netlogic/xlp-hal/iomap.h>
-#include <asm/netlogic/xlp-hal/pic.h>
 #include <asm/netlogic/xlp-hal/xlp.h>
+#include <asm/netlogic/xlp-hal/pic.h>
 #include <asm/netlogic/xlp-hal/pcibus.h>
 #include <asm/netlogic/xlp-hal/bridge.h>
 
@@ -66,9 +67,22 @@
 	u32 *cfgaddr;
 
 	where &= ~3;
-	if (bus->number == 0 && PCI_SLOT(devfn) == 1 && where == 0x954)
+	if (cpu_is_xlp9xx()) {
+		/* be very careful on SoC buses */
+		if (bus->number == 0) {
+			/* Scan only existing nodes - uboot bug? */
+			if (PCI_SLOT(devfn) != 0 ||
+					   !nlm_node_present(PCI_FUNC(devfn)))
+				return 0xffffffff;
+		} else if (bus->parent->number == 0) {	/* SoC bus */
+			if (PCI_SLOT(devfn) == 0)	/* b.0.0 hangs */
+				return 0xffffffff;
+			if (devfn == 44)		/* b.5.4 hangs */
+				return 0xffffffff;
+		}
+	} else if (bus->number == 0 && PCI_SLOT(devfn) == 1 && where == 0x954) {
 		return 0xffffffff;
-
+	}
 	cfgaddr = (u32 *)(pci_config_base +
 			pci_cfg_addr(bus->number, devfn, where));
 	data = *cfgaddr;
@@ -162,27 +176,39 @@
 	.io_offset	= 0x00000000UL,
 };
 
-static struct pci_dev *xlp_get_pcie_link(const struct pci_dev *dev)
+struct pci_dev *xlp_get_pcie_link(const struct pci_dev *dev)
 {
 	struct pci_bus *bus, *p;
 
-	/* Find the bridge on bus 0 */
 	bus = dev->bus;
-	for (p = bus->parent; p && p->number != 0; p = p->parent)
-		bus = p;
 
-	return p ? bus->self : NULL;
+	if (cpu_is_xlp9xx()) {
+		/* find bus with grand parent number == 0 */
+		for (p = bus->parent; p && p->parent && p->parent->number != 0;
+				p = p->parent)
+			bus = p;
+		return (p && p->parent) ? bus->self : NULL;
+	} else {
+		/* Find the bridge on bus 0 */
+		for (p = bus->parent; p && p->number != 0; p = p->parent)
+			bus = p;
+
+		return p ? bus->self : NULL;
+	}
 }
 
-static inline int nlm_pci_link_to_irq(int link)
+int xlp_socdev_to_node(const struct pci_dev *lnkdev)
 {
-	return PIC_PCIE_LINK_0_IRQ + link;
+	if (cpu_is_xlp9xx())
+		return PCI_FUNC(lnkdev->bus->self->devfn);
+	else
+		return PCI_SLOT(lnkdev->devfn) / 8;
 }
 
 int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
 	struct pci_dev *lnkdev;
-	int lnkslot, lnkfunc;
+	int lnkfunc, node;
 
 	/*
 	 * For XLP PCIe, there is an IRQ per Link, find out which
@@ -191,9 +217,11 @@
 	lnkdev = xlp_get_pcie_link(dev);
 	if (lnkdev == NULL)
 		return 0;
+
 	lnkfunc = PCI_FUNC(lnkdev->devfn);
-	lnkslot = PCI_SLOT(lnkdev->devfn);
-	return nlm_irq_to_xirq(lnkslot / 8, nlm_pci_link_to_irq(lnkfunc));
+	node = xlp_socdev_to_node(lnkdev);
+
+	return nlm_irq_to_xirq(node, PIC_PCIE_LINK_LEGACY_IRQ(lnkfunc));
 }
 
 /* Do platform specific device initialization at pci_enable_device() time */
@@ -220,17 +248,38 @@
 	 *  Enable byte swap in hardware. Program each link's PCIe SWAP regions
 	 * from the link's address ranges.
 	 */
-	reg = nlm_read_bridge_reg(nbubase, BRIDGE_PCIEMEM_BASE0 + link);
-	nlm_write_pci_reg(lnkbase, PCIE_BYTE_SWAP_MEM_BASE, reg);
+	if (cpu_is_xlp9xx()) {
+		reg = nlm_read_bridge_reg(nbubase,
+				BRIDGE_9XX_PCIEMEM_BASE0 + link);
+		nlm_write_pci_reg(lnkbase, PCIE_9XX_BYTE_SWAP_MEM_BASE, reg);
 
-	reg = nlm_read_bridge_reg(nbubase, BRIDGE_PCIEMEM_LIMIT0 + link);
-	nlm_write_pci_reg(lnkbase, PCIE_BYTE_SWAP_MEM_LIM, reg | 0xfff);
+		reg = nlm_read_bridge_reg(nbubase,
+				BRIDGE_9XX_PCIEMEM_LIMIT0 + link);
+		nlm_write_pci_reg(lnkbase,
+				PCIE_9XX_BYTE_SWAP_MEM_LIM, reg | 0xfff);
 
-	reg = nlm_read_bridge_reg(nbubase, BRIDGE_PCIEIO_BASE0 + link);
-	nlm_write_pci_reg(lnkbase, PCIE_BYTE_SWAP_IO_BASE, reg);
+		reg = nlm_read_bridge_reg(nbubase,
+				BRIDGE_9XX_PCIEIO_BASE0 + link);
+		nlm_write_pci_reg(lnkbase, PCIE_9XX_BYTE_SWAP_IO_BASE, reg);
 
-	reg = nlm_read_bridge_reg(nbubase, BRIDGE_PCIEIO_LIMIT0 + link);
-	nlm_write_pci_reg(lnkbase, PCIE_BYTE_SWAP_IO_LIM, reg | 0xfff);
+		reg = nlm_read_bridge_reg(nbubase,
+				BRIDGE_9XX_PCIEIO_LIMIT0 + link);
+		nlm_write_pci_reg(lnkbase,
+				PCIE_9XX_BYTE_SWAP_IO_LIM, reg | 0xfff);
+	} else {
+		reg = nlm_read_bridge_reg(nbubase, BRIDGE_PCIEMEM_BASE0 + link);
+		nlm_write_pci_reg(lnkbase, PCIE_BYTE_SWAP_MEM_BASE, reg);
+
+		reg = nlm_read_bridge_reg(nbubase,
+					BRIDGE_PCIEMEM_LIMIT0 + link);
+		nlm_write_pci_reg(lnkbase, PCIE_BYTE_SWAP_MEM_LIM, reg | 0xfff);
+
+		reg = nlm_read_bridge_reg(nbubase, BRIDGE_PCIEIO_BASE0 + link);
+		nlm_write_pci_reg(lnkbase, PCIE_BYTE_SWAP_IO_BASE, reg);
+
+		reg = nlm_read_bridge_reg(nbubase, BRIDGE_PCIEIO_LIMIT0 + link);
+		nlm_write_pci_reg(lnkbase, PCIE_BYTE_SWAP_IO_LIM, reg | 0xfff);
+	}
 }
 #else
 /* Swap configuration not needed in little-endian mode */
@@ -239,7 +288,6 @@
 
 static int __init pcibios_init(void)
 {
-	struct nlm_soc_info *nodep;
 	uint64_t pciebase;
 	int link, n;
 	u32 reg;
@@ -253,20 +301,20 @@
 	ioport_resource.end   = ~0;
 
 	for (n = 0; n < NLM_NR_NODES; n++) {
-		nodep = nlm_get_node(n);
-		if (!nodep->coremask)
-			continue;	/* node does not exist */
+		if (!nlm_node_present(n))
+			continue;
 
-		for (link = 0; link < 4; link++) {
+		for (link = 0; link < PCIE_NLINKS; link++) {
 			pciebase = nlm_get_pcie_base(n, link);
 			if (nlm_read_pci_reg(pciebase, 0) == 0xffffffff)
 				continue;
 			xlp_config_pci_bswap(n, link);
+			xlp_init_node_msi_irqs(n, link);
 
 			/* put in intpin and irq - u-boot does not */
 			reg = nlm_read_pci_reg(pciebase, 0xf);
-			reg &= ~0x1fu;
-			reg |= (1 << 8) | nlm_pci_link_to_irq(link);
+			reg &= ~0x1ffu;
+			reg |= (1 << 8) | PIC_PCIE_LINK_LEGACY_IRQ(link);
 			nlm_write_pci_reg(pciebase, 0xf, reg);
 			pr_info("XLP PCIe: Link %d-%d initialized.\n", n, link);
 		}
diff --git a/arch/mips/pmcs-msp71xx/Kconfig b/arch/mips/pmcs-msp71xx/Kconfig
index 3482b8c..6073ca4 100644
--- a/arch/mips/pmcs-msp71xx/Kconfig
+++ b/arch/mips/pmcs-msp71xx/Kconfig
@@ -6,6 +6,7 @@
 	bool "PMC-Sierra MSP4200 Eval Board"
 	select IRQ_MSP_SLP
 	select HW_HAS_PCI
+	select MIPS_L1_CACHE_SHIFT_4
 
 config PMC_MSP4200_GW
 	bool "PMC-Sierra MSP4200 VoIP Gateway"
diff --git a/arch/mips/ralink/Kconfig b/arch/mips/ralink/Kconfig
index 424f034..1bfd1c1 100644
--- a/arch/mips/ralink/Kconfig
+++ b/arch/mips/ralink/Kconfig
@@ -15,6 +15,7 @@
 
 	config SOC_RT288X
 		bool "RT288x"
+		select MIPS_L1_CACHE_SHIFT_4
 
 	config SOC_RT305X
 		bool "RT305x"
diff --git a/arch/mips/sgi-ip27/ip27-console.c b/arch/mips/sgi-ip27/ip27-console.c
index b952d5b..45fdfbc 100644
--- a/arch/mips/sgi-ip27/ip27-console.c
+++ b/arch/mips/sgi-ip27/ip27-console.c
@@ -5,7 +5,6 @@
  *
  * Copyright (C) 2001, 2002 Ralf Baechle
  */
-#include <linux/init.h>
 
 #include <asm/page.h>
 #include <asm/sn/addrs.h>
diff --git a/arch/mips/sgi-ip27/ip27-irq-pci.c b/arch/mips/sgi-ip27/ip27-irq-pci.c
index ec22ec5..2a1c407 100644
--- a/arch/mips/sgi-ip27/ip27-irq-pci.c
+++ b/arch/mips/sgi-ip27/ip27-irq-pci.c
@@ -8,7 +8,6 @@
 
 #undef DEBUG
 
-#include <linux/init.h>
 #include <linux/irq.h>
 #include <linux/errno.h>
 #include <linux/signal.h>
diff --git a/arch/mips/sgi-ip27/ip27-klconfig.c b/arch/mips/sgi-ip27/ip27-klconfig.c
index 7afe146..c873d62 100644
--- a/arch/mips/sgi-ip27/ip27-klconfig.c
+++ b/arch/mips/sgi-ip27/ip27-klconfig.c
@@ -2,7 +2,6 @@
  * Copyright (C) 1999, 2000 Ralf Baechle (ralf@gnu.org)
  * Copyright (C) 1999, 2000 Silicon Graphics, Inc.
  */
-#include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
 #include <linux/interrupt.h>
diff --git a/arch/mips/sgi-ip27/ip27-xtalk.c b/arch/mips/sgi-ip27/ip27-xtalk.c
index d59b820..20f582a 100644
--- a/arch/mips/sgi-ip27/ip27-xtalk.c
+++ b/arch/mips/sgi-ip27/ip27-xtalk.c
@@ -7,7 +7,6 @@
  * Generic XTALK initialization code
  */
 
-#include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/smp.h>
 #include <asm/sn/types.h>
diff --git a/arch/mn10300/Makefile b/arch/mn10300/Makefile
index a3d0fef..3f1ea5d 100644
--- a/arch/mn10300/Makefile
+++ b/arch/mn10300/Makefile
@@ -92,14 +92,6 @@
   echo  '* zImage        - Compressed kernel image (arch/$(ARCH)/boot/zImage)'
 endef
 
-# If you make sure the .S files get compiled with debug info,
-# uncomment the following to disable optimisations
-# that are unhelpful whilst debugging.
-ifdef CONFIG_DEBUG_INFO
-#KBUILD_CFLAGS	+= -O1
-KBUILD_AFLAGS	+= -Wa,--gdwarf2
-endif
-
 #
 # include the appropriate processor- and unit-specific headers
 #
diff --git a/arch/openrisc/kernel/entry.S b/arch/openrisc/kernel/entry.S
index d8a455e..fec8bf9 100644
--- a/arch/openrisc/kernel/entry.S
+++ b/arch/openrisc/kernel/entry.S
@@ -853,38 +853,45 @@
 
 /* ========================================================[ return ] === */
 
-_work_pending:
-	/*
-	 * if (current_thread_info->flags & _TIF_NEED_RESCHED)
-	 *     schedule();
-	 */
-	l.lwz   r5,TI_FLAGS(r10)
-	l.andi	r3,r5,_TIF_NEED_RESCHED
-	l.sfnei r3,0
-	l.bnf   _work_notifysig
-	 l.nop
-	l.jal   schedule
-	 l.nop
-	l.j	_resume_userspace
-	 l.nop
-
-/* Handle pending signals and notify-resume requests.
- * do_notify_resume must be passed the latest pushed pt_regs, not
- * necessarily the "userspace" ones.  Also, pt_regs->syscallno
- * must be set so that the syscall restart functionality works.
- */
-_work_notifysig:
-	l.jal	do_notify_resume
-	 l.ori	r3,r1,0		  /* pt_regs */
-
 _resume_userspace:
 	DISABLE_INTERRUPTS(r3,r4)
-	l.lwz	r3,TI_FLAGS(r10)
-	l.andi	r3,r3,_TIF_WORK_MASK
-	l.sfnei	r3,0
-	l.bf	_work_pending
+	l.lwz	r4,TI_FLAGS(r10)
+	l.andi	r13,r4,_TIF_WORK_MASK
+	l.sfeqi	r13,0
+	l.bf	_restore_all
 	 l.nop
 
+_work_pending:
+	l.lwz	r5,PT_ORIG_GPR11(r1)
+	l.sfltsi r5,0
+	l.bnf	1f
+	 l.nop
+	l.andi	r5,r5,0
+1:
+	l.jal	do_work_pending
+	 l.ori	r3,r1,0			/* pt_regs */
+
+	l.sfeqi	r11,0
+	l.bf	_restore_all
+	 l.nop
+	l.sfltsi r11,0
+	l.bnf	1f
+	 l.nop
+	l.and	r11,r11,r0
+	l.ori	r11,r11,__NR_restart_syscall
+	l.j	_syscall_check_trace_enter
+	 l.nop
+1:
+	l.lwz	r11,PT_ORIG_GPR11(r1)
+	/* Restore arg registers */
+	l.lwz	r3,PT_GPR3(r1)
+	l.lwz	r4,PT_GPR4(r1)
+	l.lwz	r5,PT_GPR5(r1)
+	l.lwz	r6,PT_GPR6(r1)
+	l.lwz	r7,PT_GPR7(r1)
+	l.j	_syscall_check_trace_enter
+	 l.lwz	r8,PT_GPR8(r1)
+
 _restore_all:
 	RESTORE_ALL
 	/* This returns to userspace code */
diff --git a/arch/openrisc/kernel/signal.c b/arch/openrisc/kernel/signal.c
index ae167f7..66775bc 100644
--- a/arch/openrisc/kernel/signal.c
+++ b/arch/openrisc/kernel/signal.c
@@ -28,24 +28,24 @@
 #include <linux/tracehook.h>
 
 #include <asm/processor.h>
+#include <asm/syscall.h>
 #include <asm/ucontext.h>
 #include <asm/uaccess.h>
 
 #define DEBUG_SIG 0
 
 struct rt_sigframe {
-	struct siginfo *pinfo;
-	void *puc;
 	struct siginfo info;
 	struct ucontext uc;
 	unsigned char retcode[16];	/* trampoline code */
 };
 
-static int restore_sigcontext(struct pt_regs *regs, struct sigcontext *sc)
+static int restore_sigcontext(struct pt_regs *regs,
+			      struct sigcontext __user *sc)
 {
-	unsigned int err = 0;
+	int err = 0;
 
-	/* Alwys make any pending restarted system call return -EINTR */
+	/* Always make any pending restarted system calls return -EINTR */
 	current_thread_info()->restart_block.fn = do_no_restart_syscall;
 
 	/*
@@ -53,25 +53,21 @@
 	 * (sc is already checked for VERIFY_READ since the sigframe was
 	 *  checked in sys_sigreturn previously)
 	 */
-	if (__copy_from_user(regs, sc->regs.gpr, 32 * sizeof(unsigned long)))
-		goto badframe;
-	if (__copy_from_user(&regs->pc, &sc->regs.pc, sizeof(unsigned long)))
-		goto badframe;
-	if (__copy_from_user(&regs->sr, &sc->regs.sr, sizeof(unsigned long)))
-		goto badframe;
+	err |= __copy_from_user(regs, sc->regs.gpr, 32 * sizeof(unsigned long));
+	err |= __copy_from_user(&regs->pc, &sc->regs.pc, sizeof(unsigned long));
+	err |= __copy_from_user(&regs->sr, &sc->regs.sr, sizeof(unsigned long));
 
 	/* make sure the SM-bit is cleared so user-mode cannot fool us */
 	regs->sr &= ~SPR_SR_SM;
 
+	regs->orig_gpr11 = -1;	/* Avoid syscall restart checks */
+
 	/* TODO: the other ports use regs->orig_XX to disable syscall checks
 	 * after this completes, but we don't use that mechanism. maybe we can
 	 * use it now ?
 	 */
 
 	return err;
-
-badframe:
-	return 1;
 }
 
 asmlinkage long _sys_rt_sigreturn(struct pt_regs *regs)
@@ -111,21 +107,18 @@
  * Set up a signal frame.
  */
 
-static int setup_sigcontext(struct sigcontext *sc, struct pt_regs *regs,
-			    unsigned long mask)
+static int setup_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc)
 {
 	int err = 0;
 
 	/* copy the regs */
-
+	/* There should be no need to save callee-saved registers here...
+	 * ...but we save them anyway.  Revisit this
+	 */
 	err |= __copy_to_user(sc->regs.gpr, regs, 32 * sizeof(unsigned long));
 	err |= __copy_to_user(&sc->regs.pc, &regs->pc, sizeof(unsigned long));
 	err |= __copy_to_user(&sc->regs.sr, &regs->sr, sizeof(unsigned long));
 
-	/* then some other stuff */
-
-	err |= __put_user(mask, &sc->oldmask);
-
 	return err;
 }
 
@@ -173,55 +166,53 @@
  * trampoline which performs the syscall sigreturn, or a provided
  * user-mode trampoline.
  */
-static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
-			  sigset_t *set, struct pt_regs *regs)
+static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
+			  struct pt_regs *regs)
 {
 	struct rt_sigframe *frame;
 	unsigned long return_ip;
 	int err = 0;
 
-	frame = get_sigframe(ka, regs, sizeof(*frame));
+	frame = get_sigframe(&ksig->ka, regs, sizeof(*frame));
 
 	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
-		goto give_sigsegv;
+		return -EFAULT;
 
-	err |= __put_user(&frame->info, &frame->pinfo);
-	err |= __put_user(&frame->uc, &frame->puc);
+	/* Create siginfo.  */
+	if (ksig->ka.sa.sa_flags & SA_SIGINFO)
+		err |= copy_siginfo_to_user(&frame->info, &ksig->info);
 
-	if (ka->sa.sa_flags & SA_SIGINFO)
-		err |= copy_siginfo_to_user(&frame->info, info);
-	if (err)
-		goto give_sigsegv;
-
-	/* Clear all the bits of the ucontext we don't use.  */
-	err |= __clear_user(&frame->uc, offsetof(struct ucontext, uc_mcontext));
+	/* Create the ucontext.  */
 	err |= __put_user(0, &frame->uc.uc_flags);
 	err |= __put_user(NULL, &frame->uc.uc_link);
 	err |= __save_altstack(&frame->uc.uc_stack, regs->sp);
-	err |= setup_sigcontext(&frame->uc.uc_mcontext, regs, set->sig[0]);
+	err |= setup_sigcontext(regs, &frame->uc.uc_mcontext);
 
 	err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
 
 	if (err)
-		goto give_sigsegv;
+		return -EFAULT;
 
 	/* trampoline - the desired return ip is the retcode itself */
 	return_ip = (unsigned long)&frame->retcode;
-	/* This is l.ori r11,r0,__NR_sigreturn, l.sys 1 */
-	err |= __put_user(0xa960, (short *)(frame->retcode + 0));
-	err |= __put_user(__NR_rt_sigreturn, (short *)(frame->retcode + 2));
+	/* This is:
+		l.ori r11,r0,__NR_sigreturn
+		l.sys 1
+	 */
+	err |= __put_user(0xa960,             (short *)(frame->retcode + 0));
+	err |= __put_user(__NR_rt_sigreturn,  (short *)(frame->retcode + 2));
 	err |= __put_user(0x20000001, (unsigned long *)(frame->retcode + 4));
 	err |= __put_user(0x15000000, (unsigned long *)(frame->retcode + 8));
 
 	if (err)
-		goto give_sigsegv;
+		return -EFAULT;
 
 	/* TODO what is the current->exec_domain stuff and invmap ? */
 
 	/* Set up registers for signal handler */
-	regs->pc = (unsigned long)ka->sa.sa_handler; /* what we enter NOW */
+	regs->pc = (unsigned long)ksig->ka.sa.sa_handler; /* what we enter NOW */
 	regs->gpr[9] = (unsigned long)return_ip;     /* what we enter LATER */
-	regs->gpr[3] = (unsigned long)sig;           /* arg 1: signo */
+	regs->gpr[3] = (unsigned long)ksig->sig;           /* arg 1: signo */
 	regs->gpr[4] = (unsigned long)&frame->info;  /* arg 2: (siginfo_t*) */
 	regs->gpr[5] = (unsigned long)&frame->uc;    /* arg 3: ucontext */
 
@@ -229,25 +220,16 @@
 	regs->sp = (unsigned long)frame;
 
 	return 0;
-
-give_sigsegv:
-	force_sigsegv(sig, current);
-	return -EFAULT;
 }
 
 static inline void
-handle_signal(unsigned long sig,
-	      siginfo_t *info, struct k_sigaction *ka,
-	      struct pt_regs *regs)
+handle_signal(struct ksignal *ksig, struct pt_regs *regs)
 {
 	int ret;
 
-	ret = setup_rt_frame(sig, ka, info, sigmask_to_save(), regs);
-	if (ret)
-		return;
+	ret = setup_rt_frame(ksig, sigmask_to_save(), regs);
 
-	signal_delivered(sig, info, ka, regs,
-				 test_thread_flag(TIF_SINGLESTEP));
+	signal_setup_done(ret, ksig, test_thread_flag(TIF_SINGLESTEP));
 }
 
 /*
@@ -262,82 +244,99 @@
  * mode below.
  */
 
-void do_signal(struct pt_regs *regs)
+int do_signal(struct pt_regs *regs, int syscall)
 {
-	siginfo_t info;
-	int signr;
-	struct k_sigaction ka;
+	struct ksignal ksig;
+	unsigned long continue_addr = 0;
+	unsigned long restart_addr = 0;
+	unsigned long retval = 0;
+	int restart = 0;
+
+	if (syscall) {
+		continue_addr = regs->pc;
+		restart_addr = continue_addr - 4;
+		retval = regs->gpr[11];
+
+		/*
+		 * Setup syscall restart here so that a debugger will
+		 * see the already changed PC.
+		 */
+		switch (retval) {
+		case -ERESTART_RESTARTBLOCK:
+			restart = -2;
+			/* Fall through */
+		case -ERESTARTNOHAND:
+		case -ERESTARTSYS:
+		case -ERESTARTNOINTR:
+			restart++;
+			regs->gpr[11] = regs->orig_gpr11;
+			regs->pc = restart_addr;
+			break;
+		}
+	}
 
 	/*
-	 * We want the common case to go fast, which
-	 * is why we may in certain cases get here from
-	 * kernel mode. Just return without doing anything
-	 * if so.
+	 * Get the signal to deliver.  During the call to get_signal the
+	 * debugger may change all our registers so we may need to revert
+	 * the decision to restart the syscall; specifically, if the PC is
+	 * changed, don't restart the syscall.
 	 */
-	if (!user_mode(regs))
-		return;
-
-	signr = get_signal_to_deliver(&info, &ka, regs, NULL);
-
-	/* If we are coming out of a syscall then we need
-	 * to check if the syscall was interrupted and wants to be
-	 * restarted after handling the signal.  If so, the original
-	 * syscall number is put back into r11 and the PC rewound to
-	 * point at the l.sys instruction that resulted in the
-	 * original syscall.  Syscall results other than the four
-	 * below mean that the syscall executed to completion and no
-	 * restart is necessary.
-	 */
-	if (regs->orig_gpr11) {
-		int restart = 0;
-
-		switch (regs->gpr[11]) {
-		case -ERESTART_RESTARTBLOCK:
-		case -ERESTARTNOHAND:
-			/* Restart if there is no signal handler */
-			restart = (signr <= 0);
-			break;
-		case -ERESTARTSYS:
-			/* Restart if there no signal handler or
-			 * SA_RESTART flag is set */
-			restart = (signr <= 0 || (ka.sa.sa_flags & SA_RESTART));
-			break;
-		case -ERESTARTNOINTR:
-			/* Always restart */
-			restart = 1;
-			break;
+	if (get_signal(&ksig)) {
+		if (unlikely(restart) && regs->pc == restart_addr) {
+			if (retval == -ERESTARTNOHAND ||
+			    retval == -ERESTART_RESTARTBLOCK
+			    || (retval == -ERESTARTSYS
+			        && !(ksig.ka.sa.sa_flags & SA_RESTART))) {
+				/* No automatic restart */
+				regs->gpr[11] = -EINTR;
+				regs->pc = continue_addr;
+			}
 		}
-
-		if (restart) {
-			if (regs->gpr[11] == -ERESTART_RESTARTBLOCK)
-				regs->gpr[11] = __NR_restart_syscall;
-			else
-				regs->gpr[11] = regs->orig_gpr11;
-			regs->pc -= 4;
-		} else {
-			regs->gpr[11] = -EINTR;
-		}
-	}
-
-	if (signr <= 0) {
-		/* no signal to deliver so we just put the saved sigmask
-		 * back */
+		handle_signal(&ksig, regs);
+	} else {
+		/* no handler */
 		restore_saved_sigmask();
-	} else {		/* signr > 0 */
-		/* Whee!  Actually deliver the signal.  */
-		handle_signal(signr, &info, &ka, regs);
+		/*
+		 * Restore pt_regs PC as syscall restart will be handled by
+		 * kernel without return to userspace
+		 */
+		if (unlikely(restart) && regs->pc == restart_addr) {
+			regs->pc = continue_addr;
+			return restart;
+		}
 	}
 
-	return;
+	return 0;
 }
 
-asmlinkage void do_notify_resume(struct pt_regs *regs)
+asmlinkage int
+do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall)
 {
-	if (current_thread_info()->flags & _TIF_SIGPENDING)
-		do_signal(regs);
-
-	if (current_thread_info()->flags & _TIF_NOTIFY_RESUME) {
-		clear_thread_flag(TIF_NOTIFY_RESUME);
-		tracehook_notify_resume(regs);
-	}
+	do {
+		if (likely(thread_flags & _TIF_NEED_RESCHED)) {
+			schedule();
+		} else {
+			if (unlikely(!user_mode(regs)))
+				return 0;
+			local_irq_enable();
+			if (thread_flags & _TIF_SIGPENDING) {
+				int restart = do_signal(regs, syscall);
+				if (unlikely(restart)) {
+					/*
+					 * Restart without handlers.
+					 * Deal with it without leaving
+					 * the kernel space.
+					 */
+					return restart;
+				}
+				syscall = 0;
+			} else {
+				clear_thread_flag(TIF_NOTIFY_RESUME);
+				tracehook_notify_resume(regs);
+			}
+		}
+		local_irq_disable();
+		thread_flags = current_thread_info()->flags;
+	} while (thread_flags & _TIF_WORK_MASK);
+	return 0;
 }
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 25493a0..a5e5d2e 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -532,6 +532,7 @@
 
 config PPC_64K_PAGES
 	bool "64k page size" if 44x || PPC_STD_MMU_64 || PPC_BOOK3E_64
+	depends on !PPC_FSL_BOOK3E
 	select PPC_HAS_HASH_64K if PPC_STD_MMU_64
 
 config PPC_256K_PAGES
@@ -1045,11 +1046,6 @@
 
 source "crypto/Kconfig"
 
-config PPC_CLOCK
-	bool
-	default n
-	select HAVE_CLK
-
 config PPC_LIB_RHEAP
 	bool
 
diff --git a/arch/powerpc/boot/dts/ac14xx.dts b/arch/powerpc/boot/dts/ac14xx.dts
index a543c40..a1b8837 100644
--- a/arch/powerpc/boot/dts/ac14xx.dts
+++ b/arch/powerpc/boot/dts/ac14xx.dts
@@ -139,7 +139,14 @@
 		};
 	};
 
+	clocks {
+		osc {
+			clock-frequency = <25000000>;
+		};
+	};
+
 	soc@80000000 {
+		bus-frequency = <80000000>;	/* 80 MHz ips bus */
 
 		clock@f00 {
 			compatible = "fsl,mpc5121rev2-clock", "fsl,mpc5121-clock";
diff --git a/arch/powerpc/boot/dts/mpc5121.dtsi b/arch/powerpc/boot/dts/mpc5121.dtsi
index 2d7cb04..2c0e155 100644
--- a/arch/powerpc/boot/dts/mpc5121.dtsi
+++ b/arch/powerpc/boot/dts/mpc5121.dtsi
@@ -9,6 +9,8 @@
  * option) any later version.
  */
 
+#include <dt-bindings/clock/mpc512x-clock.h>
+
 /dts-v1/;
 
 / {
@@ -49,6 +51,10 @@
 		compatible = "fsl,mpc5121-mbx";
 		reg = <0x20000000 0x4000>;
 		interrupts = <66 0x8>;
+		clocks = <&clks MPC512x_CLK_MBX_BUS>,
+			 <&clks MPC512x_CLK_MBX_3D>,
+			 <&clks MPC512x_CLK_MBX>;
+		clock-names = "mbx-bus", "mbx-3d", "mbx";
 	};
 
 	sram@30000000 {
@@ -62,6 +68,8 @@
 		interrupts = <6 8>;
 		#address-cells = <1>;
 		#size-cells = <1>;
+		clocks = <&clks MPC512x_CLK_NFC>;
+		clock-names = "ipg";
 	};
 
 	localbus@80000020 {
@@ -73,6 +81,17 @@
 		ranges = <0x0 0x0 0xfc000000 0x04000000>;
 	};
 
+	clocks {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		osc: osc {
+			compatible = "fixed-clock";
+			#clock-cells = <0>;
+			clock-frequency = <33000000>;
+		};
+	};
+
 	soc@80000000 {
 		compatible = "fsl,mpc5121-immr";
 		#address-cells = <1>;
@@ -117,9 +136,12 @@
 		};
 
 		/* Clock control */
-		clock@f00 {
+		clks: clock@f00 {
 			compatible = "fsl,mpc5121-clock";
 			reg = <0xf00 0x100>;
+			#clock-cells = <1>;
+			clocks = <&osc>;
+			clock-names = "osc";
 		};
 
 		/* Power Management Controller */
@@ -139,12 +161,24 @@
 			compatible = "fsl,mpc5121-mscan";
 			reg = <0x1300 0x80>;
 			interrupts = <12 0x8>;
+			clocks = <&clks MPC512x_CLK_BDLC>,
+				 <&clks MPC512x_CLK_IPS>,
+				 <&clks MPC512x_CLK_SYS>,
+				 <&clks MPC512x_CLK_REF>,
+				 <&clks MPC512x_CLK_MSCAN0_MCLK>;
+			clock-names = "ipg", "ips", "sys", "ref", "mclk";
 		};
 
 		can@1380 {
 			compatible = "fsl,mpc5121-mscan";
 			reg = <0x1380 0x80>;
 			interrupts = <13 0x8>;
+			clocks = <&clks MPC512x_CLK_BDLC>,
+				 <&clks MPC512x_CLK_IPS>,
+				 <&clks MPC512x_CLK_SYS>,
+				 <&clks MPC512x_CLK_REF>,
+				 <&clks MPC512x_CLK_MSCAN1_MCLK>;
+			clock-names = "ipg", "ips", "sys", "ref", "mclk";
 		};
 
 		sdhc@1500 {
@@ -153,6 +187,9 @@
 			interrupts = <8 0x8>;
 			dmas = <&dma0 30>;
 			dma-names = "rx-tx";
+			clocks = <&clks MPC512x_CLK_IPS>,
+				 <&clks MPC512x_CLK_SDHC>;
+			clock-names = "ipg", "per";
 		};
 
 		i2c@1700 {
@@ -161,6 +198,8 @@
 			compatible = "fsl,mpc5121-i2c", "fsl-i2c";
 			reg = <0x1700 0x20>;
 			interrupts = <9 0x8>;
+			clocks = <&clks MPC512x_CLK_I2C>;
+			clock-names = "ipg";
 		};
 
 		i2c@1720 {
@@ -169,6 +208,8 @@
 			compatible = "fsl,mpc5121-i2c", "fsl-i2c";
 			reg = <0x1720 0x20>;
 			interrupts = <10 0x8>;
+			clocks = <&clks MPC512x_CLK_I2C>;
+			clock-names = "ipg";
 		};
 
 		i2c@1740 {
@@ -177,6 +218,8 @@
 			compatible = "fsl,mpc5121-i2c", "fsl-i2c";
 			reg = <0x1740 0x20>;
 			interrupts = <11 0x8>;
+			clocks = <&clks MPC512x_CLK_I2C>;
+			clock-names = "ipg";
 		};
 
 		i2ccontrol@1760 {
@@ -188,30 +231,48 @@
 			compatible = "fsl,mpc5121-axe";
 			reg = <0x2000 0x100>;
 			interrupts = <42 0x8>;
+			clocks = <&clks MPC512x_CLK_AXE>;
+			clock-names = "ipg";
 		};
 
 		display@2100 {
 			compatible = "fsl,mpc5121-diu";
 			reg = <0x2100 0x100>;
 			interrupts = <64 0x8>;
+			clocks = <&clks MPC512x_CLK_DIU>;
+			clock-names = "ipg";
 		};
 
 		can@2300 {
 			compatible = "fsl,mpc5121-mscan";
 			reg = <0x2300 0x80>;
 			interrupts = <90 0x8>;
+			clocks = <&clks MPC512x_CLK_BDLC>,
+				 <&clks MPC512x_CLK_IPS>,
+				 <&clks MPC512x_CLK_SYS>,
+				 <&clks MPC512x_CLK_REF>,
+				 <&clks MPC512x_CLK_MSCAN2_MCLK>;
+			clock-names = "ipg", "ips", "sys", "ref", "mclk";
 		};
 
 		can@2380 {
 			compatible = "fsl,mpc5121-mscan";
 			reg = <0x2380 0x80>;
 			interrupts = <91 0x8>;
+			clocks = <&clks MPC512x_CLK_BDLC>,
+				 <&clks MPC512x_CLK_IPS>,
+				 <&clks MPC512x_CLK_SYS>,
+				 <&clks MPC512x_CLK_REF>,
+				 <&clks MPC512x_CLK_MSCAN3_MCLK>;
+			clock-names = "ipg", "ips", "sys", "ref", "mclk";
 		};
 
 		viu@2400 {
 			compatible = "fsl,mpc5121-viu";
 			reg = <0x2400 0x400>;
 			interrupts = <67 0x8>;
+			clocks = <&clks MPC512x_CLK_VIU>;
+			clock-names = "ipg";
 		};
 
 		mdio@2800 {
@@ -219,6 +280,8 @@
 			reg = <0x2800 0x800>;
 			#address-cells = <1>;
 			#size-cells = <0>;
+			clocks = <&clks MPC512x_CLK_FEC>;
+			clock-names = "per";
 		};
 
 		eth0: ethernet@2800 {
@@ -227,6 +290,8 @@
 			reg = <0x2800 0x800>;
 			local-mac-address = [ 00 00 00 00 00 00 ];
 			interrupts = <4 0x8>;
+			clocks = <&clks MPC512x_CLK_FEC>;
+			clock-names = "per";
 		};
 
 		/* USB1 using external ULPI PHY */
@@ -238,6 +303,8 @@
 			interrupts = <43 0x8>;
 			dr_mode = "otg";
 			phy_type = "ulpi";
+			clocks = <&clks MPC512x_CLK_USB1>;
+			clock-names = "ipg";
 		};
 
 		/* USB0 using internal UTMI PHY */
@@ -249,6 +316,8 @@
 			interrupts = <44 0x8>;
 			dr_mode = "otg";
 			phy_type = "utmi_wide";
+			clocks = <&clks MPC512x_CLK_USB2>;
+			clock-names = "ipg";
 		};
 
 		/* IO control */
@@ -267,6 +336,8 @@
 			compatible = "fsl,mpc5121-pata";
 			reg = <0x10200 0x100>;
 			interrupts = <5 0x8>;
+			clocks = <&clks MPC512x_CLK_PATA>;
+			clock-names = "ipg";
 		};
 
 		/* 512x PSCs are not 52xx PSC compatible */
@@ -278,6 +349,9 @@
 			interrupts = <40 0x8>;
 			fsl,rx-fifo-size = <16>;
 			fsl,tx-fifo-size = <16>;
+			clocks = <&clks MPC512x_CLK_PSC0>,
+				 <&clks MPC512x_CLK_PSC0_MCLK>;
+			clock-names = "ipg", "mclk";
 		};
 
 		/* PSC1 */
@@ -287,6 +361,9 @@
 			interrupts = <40 0x8>;
 			fsl,rx-fifo-size = <16>;
 			fsl,tx-fifo-size = <16>;
+			clocks = <&clks MPC512x_CLK_PSC1>,
+				 <&clks MPC512x_CLK_PSC1_MCLK>;
+			clock-names = "ipg", "mclk";
 		};
 
 		/* PSC2 */
@@ -296,6 +373,9 @@
 			interrupts = <40 0x8>;
 			fsl,rx-fifo-size = <16>;
 			fsl,tx-fifo-size = <16>;
+			clocks = <&clks MPC512x_CLK_PSC2>,
+				 <&clks MPC512x_CLK_PSC2_MCLK>;
+			clock-names = "ipg", "mclk";
 		};
 
 		/* PSC3 */
@@ -305,6 +385,9 @@
 			interrupts = <40 0x8>;
 			fsl,rx-fifo-size = <16>;
 			fsl,tx-fifo-size = <16>;
+			clocks = <&clks MPC512x_CLK_PSC3>,
+				 <&clks MPC512x_CLK_PSC3_MCLK>;
+			clock-names = "ipg", "mclk";
 		};
 
 		/* PSC4 */
@@ -314,6 +397,9 @@
 			interrupts = <40 0x8>;
 			fsl,rx-fifo-size = <16>;
 			fsl,tx-fifo-size = <16>;
+			clocks = <&clks MPC512x_CLK_PSC4>,
+				 <&clks MPC512x_CLK_PSC4_MCLK>;
+			clock-names = "ipg", "mclk";
 		};
 
 		/* PSC5 */
@@ -323,6 +409,9 @@
 			interrupts = <40 0x8>;
 			fsl,rx-fifo-size = <16>;
 			fsl,tx-fifo-size = <16>;
+			clocks = <&clks MPC512x_CLK_PSC5>,
+				 <&clks MPC512x_CLK_PSC5_MCLK>;
+			clock-names = "ipg", "mclk";
 		};
 
 		/* PSC6 */
@@ -332,6 +421,9 @@
 			interrupts = <40 0x8>;
 			fsl,rx-fifo-size = <16>;
 			fsl,tx-fifo-size = <16>;
+			clocks = <&clks MPC512x_CLK_PSC6>,
+				 <&clks MPC512x_CLK_PSC6_MCLK>;
+			clock-names = "ipg", "mclk";
 		};
 
 		/* PSC7 */
@@ -341,6 +433,9 @@
 			interrupts = <40 0x8>;
 			fsl,rx-fifo-size = <16>;
 			fsl,tx-fifo-size = <16>;
+			clocks = <&clks MPC512x_CLK_PSC7>,
+				 <&clks MPC512x_CLK_PSC7_MCLK>;
+			clock-names = "ipg", "mclk";
 		};
 
 		/* PSC8 */
@@ -350,6 +445,9 @@
 			interrupts = <40 0x8>;
 			fsl,rx-fifo-size = <16>;
 			fsl,tx-fifo-size = <16>;
+			clocks = <&clks MPC512x_CLK_PSC8>,
+				 <&clks MPC512x_CLK_PSC8_MCLK>;
+			clock-names = "ipg", "mclk";
 		};
 
 		/* PSC9 */
@@ -359,6 +457,9 @@
 			interrupts = <40 0x8>;
 			fsl,rx-fifo-size = <16>;
 			fsl,tx-fifo-size = <16>;
+			clocks = <&clks MPC512x_CLK_PSC9>,
+				 <&clks MPC512x_CLK_PSC9_MCLK>;
+			clock-names = "ipg", "mclk";
 		};
 
 		/* PSC10 */
@@ -368,6 +469,9 @@
 			interrupts = <40 0x8>;
 			fsl,rx-fifo-size = <16>;
 			fsl,tx-fifo-size = <16>;
+			clocks = <&clks MPC512x_CLK_PSC10>,
+				 <&clks MPC512x_CLK_PSC10_MCLK>;
+			clock-names = "ipg", "mclk";
 		};
 
 		/* PSC11 */
@@ -377,12 +481,17 @@
 			interrupts = <40 0x8>;
 			fsl,rx-fifo-size = <16>;
 			fsl,tx-fifo-size = <16>;
+			clocks = <&clks MPC512x_CLK_PSC11>,
+				 <&clks MPC512x_CLK_PSC11_MCLK>;
+			clock-names = "ipg", "mclk";
 		};
 
 		pscfifo@11f00 {
 			compatible = "fsl,mpc5121-psc-fifo";
 			reg = <0x11f00 0x100>;
 			interrupts = <40 0x8>;
+			clocks = <&clks MPC512x_CLK_PSC_FIFO>;
+			clock-names = "ipg";
 		};
 
 		dma0: dma@14000 {
@@ -400,6 +509,8 @@
 		#address-cells = <3>;
 		#size-cells = <2>;
 		#interrupt-cells = <1>;
+		clocks = <&clks MPC512x_CLK_PCI>;
+		clock-names = "ipg";
 
 		reg = <0x80008500 0x100	/* internal registers */
 		       0x80008300 0x8>;	/* config space access registers */
diff --git a/arch/powerpc/boot/dts/mpc5125twr.dts b/arch/powerpc/boot/dts/mpc5125twr.dts
index a618dfc..e4f2974 100644
--- a/arch/powerpc/boot/dts/mpc5125twr.dts
+++ b/arch/powerpc/boot/dts/mpc5125twr.dts
@@ -12,6 +12,8 @@
  * option) any later version.
  */
 
+#include <dt-bindings/clock/mpc512x-clock.h>
+
 /dts-v1/;
 
 / {
@@ -54,6 +56,17 @@
 		reg = <0x30000000 0x08000>;		// 32K at 0x30000000
 	};
 
+	clocks {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		osc: osc {
+			compatible = "fixed-clock";
+			#clock-cells = <0>;
+			clock-frequency = <33000000>;
+		};
+	};
+
 	soc@80000000 {
 		compatible = "fsl,mpc5121-immr";
 		#address-cells = <1>;
@@ -87,9 +100,12 @@
 			reg = <0xe00 0x100>;
 		};
 
-		clock@f00 {	// Clock control
+		clks: clock@f00 {	// Clock control
 			compatible = "fsl,mpc5121-clock";
 			reg = <0xf00 0x100>;
+			#clock-cells = <1>;
+			clocks = <&osc>;
+			clock-names = "osc";
 		};
 
 		pmc@1000{  // Power Management Controller
@@ -114,18 +130,33 @@
 			compatible = "fsl,mpc5121-mscan";
 			interrupts = <12 0x8>;
 			reg = <0x1300 0x80>;
+			clocks = <&clks MPC512x_CLK_BDLC>,
+				 <&clks MPC512x_CLK_IPS>,
+				 <&clks MPC512x_CLK_SYS>,
+				 <&clks MPC512x_CLK_REF>,
+				 <&clks MPC512x_CLK_MSCAN0_MCLK>;
+			clock-names = "ipg", "ips", "sys", "ref", "mclk";
 		};
 
 		can@1380 {
 			compatible = "fsl,mpc5121-mscan";
 			interrupts = <13 0x8>;
 			reg = <0x1380 0x80>;
+			clocks = <&clks MPC512x_CLK_BDLC>,
+				 <&clks MPC512x_CLK_IPS>,
+				 <&clks MPC512x_CLK_SYS>,
+				 <&clks MPC512x_CLK_REF>,
+				 <&clks MPC512x_CLK_MSCAN1_MCLK>;
+			clock-names = "ipg", "ips", "sys", "ref", "mclk";
 		};
 
 		sdhc@1500 {
 			compatible = "fsl,mpc5121-sdhc";
 			interrupts = <8 0x8>;
 			reg = <0x1500 0x100>;
+			clocks = <&clks MPC512x_CLK_IPS>,
+				 <&clks MPC512x_CLK_SDHC>;
+			clock-names = "ipg", "per";
 		};
 
 		i2c@1700 {
@@ -134,6 +165,8 @@
 			compatible = "fsl,mpc5121-i2c", "fsl-i2c";
 			reg = <0x1700 0x20>;
 			interrupts = <0x9 0x8>;
+			clocks = <&clks MPC512x_CLK_I2C>;
+			clock-names = "ipg";
 		};
 
 		i2c@1720 {
@@ -142,6 +175,8 @@
 			compatible = "fsl,mpc5121-i2c", "fsl-i2c";
 			reg = <0x1720 0x20>;
 			interrupts = <0xa 0x8>;
+			clocks = <&clks MPC512x_CLK_I2C>;
+			clock-names = "ipg";
 		};
 
 		i2c@1740 {
@@ -150,6 +185,8 @@
 			compatible = "fsl,mpc5121-i2c", "fsl-i2c";
 			reg = <0x1740 0x20>;
 			interrupts = <0xb 0x8>;
+			clocks = <&clks MPC512x_CLK_I2C>;
+			clock-names = "ipg";
 		};
 
 		i2ccontrol@1760 {
@@ -161,6 +198,8 @@
 			compatible = "fsl,mpc5121-diu";
 			reg = <0x2100 0x100>;
 			interrupts = <64 0x8>;
+			clocks = <&clks MPC512x_CLK_DIU>;
+			clock-names = "ipg";
 		};
 
 		mdio@2800 {
@@ -180,6 +219,8 @@
 			interrupts = <4 0x8>;
 			phy-handle = < &phy0 >;
 			phy-connection-type = "rmii";
+			clocks = <&clks MPC512x_CLK_FEC>;
+			clock-names = "per";
 		};
 
 		// IO control
@@ -200,6 +241,8 @@
 			interrupts = <43 0x8>;
 			dr_mode = "host";
 			phy_type = "ulpi";
+			clocks = <&clks MPC512x_CLK_USB1>;
+			clock-names = "ipg";
 			status = "disabled";
 		};
 
@@ -211,6 +254,9 @@
 			interrupts = <40 0x8>;
 			fsl,rx-fifo-size = <16>;
 			fsl,tx-fifo-size = <16>;
+			clocks = <&clks MPC512x_CLK_PSC1>,
+				 <&clks MPC512x_CLK_PSC1_MCLK>;
+			clock-names = "ipg", "mclk";
 		};
 
 		// PSC9 uart1 aka ttyPSC1
@@ -220,12 +266,17 @@
 			interrupts = <40 0x8>;
 			fsl,rx-fifo-size = <16>;
 			fsl,tx-fifo-size = <16>;
+			clocks = <&clks MPC512x_CLK_PSC9>,
+				 <&clks MPC512x_CLK_PSC9_MCLK>;
+			clock-names = "ipg", "mclk";
 		};
 
 		pscfifo@11f00 {
 			compatible = "fsl,mpc5121-psc-fifo";
 			reg = <0x11f00 0x100>;
 			interrupts = <40 0x8>;
+			clocks = <&clks MPC512x_CLK_PSC_FIFO>;
+			clock-names = "ipg";
 		};
 
 		dma@14000 {
diff --git a/arch/powerpc/include/asm/clk_interface.h b/arch/powerpc/include/asm/clk_interface.h
deleted file mode 100644
index ab1882c..0000000
--- a/arch/powerpc/include/asm/clk_interface.h
+++ /dev/null
@@ -1,20 +0,0 @@
-#ifndef __ASM_POWERPC_CLK_INTERFACE_H
-#define __ASM_POWERPC_CLK_INTERFACE_H
-
-#include <linux/clk.h>
-
-struct clk_interface {
-	struct clk*	(*clk_get)	(struct device *dev, const char *id);
-	int		(*clk_enable)	(struct clk *clk);
-	void		(*clk_disable)	(struct clk *clk);
-	unsigned long	(*clk_get_rate)	(struct clk *clk);
-	void		(*clk_put)	(struct clk *clk);
-	long		(*clk_round_rate) (struct clk *clk, unsigned long rate);
-	int 		(*clk_set_rate)	(struct clk *clk, unsigned long rate);
-	int		(*clk_set_parent) (struct clk *clk, struct clk *parent);
-	struct clk*	(*clk_get_parent) (struct clk *clk);
-};
-
-extern struct clk_interface clk_functions;
-
-#endif /* __ASM_POWERPC_CLK_INTERFACE_H */
diff --git a/arch/powerpc/include/asm/mpc5121.h b/arch/powerpc/include/asm/mpc5121.h
index 887d3d6..4a69cd1 100644
--- a/arch/powerpc/include/asm/mpc5121.h
+++ b/arch/powerpc/include/asm/mpc5121.h
@@ -37,7 +37,12 @@
 	u32	cccr;	/* CFM Clock Control Register */
 	u32	dccr;	/* DIU Clock Control Register */
 	u32	mscan_ccr[4];	/* MSCAN Clock Control Registers */
-	u8	res[0x98]; /* Reserved */
+	u32	out_ccr[4];	/* OUT CLK Configure Registers */
+	u32	rsv0[2];	/* Reserved */
+	u32	scfr3;		/* System Clock Frequency Register 3 */
+	u32	rsv1[3];	/* Reserved */
+	u32	spll_lock_cnt;	/* System PLL Lock Counter */
+	u8	res[0x6c];	/* Reserved */
 };
 
 /*
diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h b/arch/powerpc/include/asm/pgtable-ppc64.h
index d27960c..bc141c9 100644
--- a/arch/powerpc/include/asm/pgtable-ppc64.h
+++ b/arch/powerpc/include/asm/pgtable-ppc64.h
@@ -560,9 +560,9 @@
 			    pmd_t *pmdp);
 
 #define pmd_move_must_withdraw pmd_move_must_withdraw
-typedef struct spinlock spinlock_t;
-static inline int pmd_move_must_withdraw(spinlock_t *new_pmd_ptl,
-					 spinlock_t *old_pmd_ptl)
+struct spinlock;
+static inline int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl,
+					 struct spinlock *old_pmd_ptl)
 {
 	/*
 	 * Archs like ppc64 use pgtable to store per pmd
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index 8ca20ac..b62de43 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -450,13 +450,6 @@
 
 extern int powersave_nap;	/* set if nap mode can be used in idle loop */
 extern void power7_nap(void);
-
-#ifdef CONFIG_PSERIES_IDLE
-extern void update_smt_snooze_delay(int cpu, int residency);
-#else
-static inline void update_smt_snooze_delay(int cpu, int residency) {}
-#endif
-
 extern void flush_instruction_cache(void);
 extern void hard_reset_now(void);
 extern void poweroff_now(void);
diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h
index 43523fe..3ddf702 100644
--- a/arch/powerpc/include/asm/systbl.h
+++ b/arch/powerpc/include/asm/systbl.h
@@ -359,3 +359,5 @@
 COMPAT_SYS(process_vm_writev)
 SYSCALL(finit_module)
 SYSCALL(ni_syscall) /* sys_kcmp */
+SYSCALL_SPU(sched_setattr)
+SYSCALL_SPU(sched_getattr)
diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h
index 3ca819f..4494f02 100644
--- a/arch/powerpc/include/asm/unistd.h
+++ b/arch/powerpc/include/asm/unistd.h
@@ -12,7 +12,7 @@
 #include <uapi/asm/unistd.h>
 
 
-#define __NR_syscalls		355
+#define __NR_syscalls		357
 
 #define __NR__exit __NR_exit
 #define NR_syscalls	__NR_syscalls
diff --git a/arch/powerpc/include/uapi/asm/unistd.h b/arch/powerpc/include/uapi/asm/unistd.h
index 74cb4d7..881bf2e 100644
--- a/arch/powerpc/include/uapi/asm/unistd.h
+++ b/arch/powerpc/include/uapi/asm/unistd.h
@@ -377,6 +377,7 @@
 #define __NR_process_vm_writev	352
 #define __NR_finit_module	353
 #define __NR_kcmp		354
-
+#define __NR_sched_setattr	355
+#define __NR_sched_getattr	356
 
 #endif /* _UAPI_ASM_POWERPC_UNISTD_H_ */
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 904d713..fcc9a89 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -48,7 +48,6 @@
 obj-$(CONFIG_PPC_970_NAP)	+= idle_power4.o
 obj-$(CONFIG_PPC_P7_NAP)	+= idle_power7.o
 obj-$(CONFIG_PPC_OF)		+= of_platform.o prom_parse.o
-obj-$(CONFIG_PPC_CLOCK)		+= clock.o
 procfs-y			:= proc_powerpc.o
 obj-$(CONFIG_PROC_FS)		+= $(procfs-y)
 rtaspci-$(CONFIG_PPC64)-$(CONFIG_PCI)	:= rtas_pci.o
diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c
index abfa011..2912b87 100644
--- a/arch/powerpc/kernel/cacheinfo.c
+++ b/arch/powerpc/kernel/cacheinfo.c
@@ -793,6 +793,9 @@
 {
 	remove_index_dirs(cache_dir);
 
+	/* Remove cache dir from sysfs */
+	kobject_del(cache_dir->kobj);
+
 	kobject_put(cache_dir->kobj);
 
 	kfree(cache_dir);
diff --git a/arch/powerpc/kernel/clock.c b/arch/powerpc/kernel/clock.c
deleted file mode 100644
index a764b47..0000000
--- a/arch/powerpc/kernel/clock.c
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * Dummy clk implementations for powerpc.
- * These need to be overridden in platform code.
- */
-
-#include <linux/clk.h>
-#include <linux/err.h>
-#include <linux/errno.h>
-#include <linux/export.h>
-#include <asm/clk_interface.h>
-
-struct clk_interface clk_functions;
-
-struct clk *clk_get(struct device *dev, const char *id)
-{
-	if (clk_functions.clk_get)
-		return clk_functions.clk_get(dev, id);
-	return ERR_PTR(-ENOSYS);
-}
-EXPORT_SYMBOL(clk_get);
-
-void clk_put(struct clk *clk)
-{
-	if (clk_functions.clk_put)
-		clk_functions.clk_put(clk);
-}
-EXPORT_SYMBOL(clk_put);
-
-int clk_enable(struct clk *clk)
-{
-	if (clk_functions.clk_enable)
-		return clk_functions.clk_enable(clk);
-	return -ENOSYS;
-}
-EXPORT_SYMBOL(clk_enable);
-
-void clk_disable(struct clk *clk)
-{
-	if (clk_functions.clk_disable)
-		clk_functions.clk_disable(clk);
-}
-EXPORT_SYMBOL(clk_disable);
-
-unsigned long clk_get_rate(struct clk *clk)
-{
-	if (clk_functions.clk_get_rate)
-		return clk_functions.clk_get_rate(clk);
-	return 0;
-}
-EXPORT_SYMBOL(clk_get_rate);
-
-long clk_round_rate(struct clk *clk, unsigned long rate)
-{
-	if (clk_functions.clk_round_rate)
-		return clk_functions.clk_round_rate(clk, rate);
-	return -ENOSYS;
-}
-EXPORT_SYMBOL(clk_round_rate);
-
-int clk_set_rate(struct clk *clk, unsigned long rate)
-{
-	if (clk_functions.clk_set_rate)
-		return clk_functions.clk_set_rate(clk, rate);
-	return -ENOSYS;
-}
-EXPORT_SYMBOL(clk_set_rate);
-
-struct clk *clk_get_parent(struct clk *clk)
-{
-	if (clk_functions.clk_get_parent)
-		return clk_functions.clk_get_parent(clk);
-	return ERR_PTR(-ENOSYS);
-}
-EXPORT_SYMBOL(clk_get_parent);
-
-int clk_set_parent(struct clk *clk, struct clk *parent)
-{
-	if (clk_functions.clk_set_parent)
-		return clk_functions.clk_set_parent(clk, parent);
-	return -ENOSYS;
-}
-EXPORT_SYMBOL(clk_set_parent);
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 64b7a6e..8d4c247f1 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -811,7 +811,7 @@
  * schedule DABR
  */
 #ifndef CONFIG_HAVE_HW_BREAKPOINT
-	if (unlikely(hw_brk_match(&__get_cpu_var(current_brk), &new->thread.hw_brk)))
+	if (unlikely(!hw_brk_match(&__get_cpu_var(current_brk), &new->thread.hw_brk)))
 		set_breakpoint(&new->thread.hw_brk);
 #endif /* CONFIG_HAVE_HW_BREAKPOINT */
 #endif
diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index 6ce69e6..a67e00a 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -1022,29 +1022,24 @@
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 	tm_frame = &rt_sf->uc_transact.uc_mcontext;
 	if (MSR_TM_ACTIVE(regs->msr)) {
+		if (__put_user((unsigned long)&rt_sf->uc_transact,
+			       &rt_sf->uc.uc_link) ||
+		    __put_user((unsigned long)tm_frame,
+			       &rt_sf->uc_transact.uc_regs))
+			goto badframe;
 		if (save_tm_user_regs(regs, frame, tm_frame, sigret))
 			goto badframe;
 	}
 	else
 #endif
 	{
+		if (__put_user(0, &rt_sf->uc.uc_link))
+			goto badframe;
 		if (save_user_regs(regs, frame, tm_frame, sigret, 1))
 			goto badframe;
 	}
 	regs->link = tramp;
 
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-	if (MSR_TM_ACTIVE(regs->msr)) {
-		if (__put_user((unsigned long)&rt_sf->uc_transact,
-			       &rt_sf->uc.uc_link)
-		    || __put_user((unsigned long)tm_frame, &rt_sf->uc_transact.uc_regs))
-			goto badframe;
-	}
-	else
-#endif
-		if (__put_user(0, &rt_sf->uc.uc_link))
-			goto badframe;
-
 	current->thread.fp_state.fpscr = 0;	/* turn off all fp exceptions */
 
 	/* create a stack frame for the caller of the handler */
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index d4a43e6..97e1dc9 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -51,8 +51,6 @@
 		return -EINVAL;
 
 	per_cpu(smt_snooze_delay, cpu->dev.id) = snooze;
-	update_smt_snooze_delay(cpu->dev.id, snooze);
-
 	return count;
 }
 
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 90bb6d9..eb92365 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -472,12 +472,13 @@
 {
 	struct hugepd_freelist **batchp;
 
-	batchp = &__get_cpu_var(hugepd_freelist_cur);
+	batchp = &get_cpu_var(hugepd_freelist_cur);
 
 	if (atomic_read(&tlb->mm->mm_users) < 2 ||
 	    cpumask_equal(mm_cpumask(tlb->mm),
 			  cpumask_of(smp_processor_id()))) {
 		kmem_cache_free(hugepte_cache, hugepte);
+        put_cpu_var(hugepd_freelist_cur);
 		return;
 	}
 
@@ -491,6 +492,7 @@
 		call_rcu_sched(&(*batchp)->rcu, hugepd_free_rcu_callback);
 		*batchp = NULL;
 	}
+	put_cpu_var(hugepd_freelist_cur);
 }
 #endif
 
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 86a63de..30a42e2 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -1785,7 +1785,7 @@
 static int topology_update_init(void)
 {
 	start_topology_update();
-	proc_create("powerpc/topology_updates", 644, NULL, &topology_ops);
+	proc_create("powerpc/topology_updates", 0644, NULL, &topology_ops);
 
 	return 0;
 }
diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
index 7ce9cf3..b0c75cc 100644
--- a/arch/powerpc/mm/slice.c
+++ b/arch/powerpc/mm/slice.c
@@ -408,7 +408,7 @@
 	if (fixed && (addr & ((1ul << pshift) - 1)))
 		return -EINVAL;
 	if (fixed && addr > (mm->task_size - len))
-		return -EINVAL;
+		return -ENOMEM;
 
 	/* If hint, make sure it matches our alignment restrictions */
 	if (!fixed && addr) {
diff --git a/arch/powerpc/mm/tlb_low_64e.S b/arch/powerpc/mm/tlb_low_64e.S
index 16250b1..c95eb32 100644
--- a/arch/powerpc/mm/tlb_low_64e.S
+++ b/arch/powerpc/mm/tlb_low_64e.S
@@ -240,6 +240,7 @@
 	beq	tlb_miss_common_bolted
 	b	itlb_miss_kernel_bolted
 
+#ifdef CONFIG_PPC_FSL_BOOK3E
 /*
  * TLB miss handling for e6500 and derivatives, using hardware tablewalk.
  *
@@ -409,7 +410,7 @@
 	TLB_MISS_STATS_I(MMSTAT_TLB_MISS_NORM_FAULT)
 	tlb_epilog_bolted
 	b	exc_instruction_storage_book3e
-
+#endif /* CONFIG_PPC_FSL_BOOK3E */
 
 /**********************************************************************
  *                                                                    *
diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c
index 735839b..b37a58e 100644
--- a/arch/powerpc/mm/tlb_nohash.c
+++ b/arch/powerpc/mm/tlb_nohash.c
@@ -557,10 +557,12 @@
 		patch_exception(0x1c0, exc_data_tlb_miss_htw_book3e);
 		patch_exception(0x1e0, exc_instruction_tlb_miss_htw_book3e);
 		break;
+#ifdef CONFIG_PPC_FSL_BOOK3E
 	case PPC_HTW_E6500:
 		patch_exception(0x1c0, exc_data_tlb_miss_e6500_book3e);
 		patch_exception(0x1e0, exc_instruction_tlb_miss_e6500_book3e);
 		break;
+#endif
 	}
 	pr_info("MMU: Book3E HW tablewalk %s\n",
 		book3e_htw_mode != PPC_HTW_NONE ? "enabled" : "not supported");
diff --git a/arch/powerpc/platforms/512x/Kconfig b/arch/powerpc/platforms/512x/Kconfig
index fc9c1cb..5aa3f4b 100644
--- a/arch/powerpc/platforms/512x/Kconfig
+++ b/arch/powerpc/platforms/512x/Kconfig
@@ -1,9 +1,9 @@
 config PPC_MPC512x
 	bool "512x-based boards"
 	depends on 6xx
+	select COMMON_CLK
 	select FSL_SOC
 	select IPIC
-	select PPC_CLOCK
 	select PPC_PCI_CHOICE
 	select FSL_PCI if PCI
 	select ARCH_WANT_OPTIONAL_GPIOLIB
diff --git a/arch/powerpc/platforms/512x/Makefile b/arch/powerpc/platforms/512x/Makefile
index 72fb934..0169312 100644
--- a/arch/powerpc/platforms/512x/Makefile
+++ b/arch/powerpc/platforms/512x/Makefile
@@ -1,7 +1,8 @@
 #
 # Makefile for the Freescale PowerPC 512x linux kernel.
 #
-obj-y				+= clock.o mpc512x_shared.o
+obj-$(CONFIG_COMMON_CLK)	+= clock-commonclk.o
+obj-y				+= mpc512x_shared.o
 obj-$(CONFIG_MPC5121_ADS)	+= mpc5121_ads.o mpc5121_ads_cpld.o
 obj-$(CONFIG_MPC512x_GENERIC)	+= mpc512x_generic.o
 obj-$(CONFIG_PDM360NG)		+= pdm360ng.o
diff --git a/arch/powerpc/platforms/512x/clock-commonclk.c b/arch/powerpc/platforms/512x/clock-commonclk.c
new file mode 100644
index 0000000..6eb614a
--- /dev/null
+++ b/arch/powerpc/platforms/512x/clock-commonclk.c
@@ -0,0 +1,1221 @@
+/*
+ * Copyright (C) 2013 DENX Software Engineering
+ *
+ * Gerhard Sittig, <gsi@denx.de>
+ *
+ * common clock driver support for the MPC512x platform
+ *
+ * This is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/bitops.h>
+#include <linux/clk-provider.h>
+#include <linux/clkdev.h>
+#include <linux/device.h>
+#include <linux/errno.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+
+#include <asm/mpc5121.h>
+#include <dt-bindings/clock/mpc512x-clock.h>
+
+#include "mpc512x.h"		/* our public mpc5121_clk_init() API */
+
+/* helpers to keep the MCLK intermediates "somewhere" in our table */
+enum {
+	MCLK_IDX_MUX0,
+	MCLK_IDX_EN0,
+	MCLK_IDX_DIV0,
+	MCLK_MAX_IDX,
+};
+
+#define NR_PSCS			12
+#define NR_MSCANS		4
+#define NR_SPDIFS		1
+#define NR_OUTCLK		4
+#define NR_MCLKS		(NR_PSCS + NR_MSCANS + NR_SPDIFS + NR_OUTCLK)
+
+/* extend the public set of clocks by adding internal slots for management */
+enum {
+	/* arrange for adjacent numbers after the public set */
+	MPC512x_CLK_START_PRIVATE = MPC512x_CLK_LAST_PUBLIC,
+	/* clocks which aren't announced to the public */
+	MPC512x_CLK_DDR,
+	MPC512x_CLK_MEM,
+	MPC512x_CLK_IIM,
+	/* intermediates in div+gate combos or fractional dividers */
+	MPC512x_CLK_DDR_UG,
+	MPC512x_CLK_SDHC_x4,
+	MPC512x_CLK_SDHC_UG,
+	MPC512x_CLK_SDHC2_UG,
+	MPC512x_CLK_DIU_x4,
+	MPC512x_CLK_DIU_UG,
+	MPC512x_CLK_MBX_BUS_UG,
+	MPC512x_CLK_MBX_UG,
+	MPC512x_CLK_MBX_3D_UG,
+	MPC512x_CLK_PCI_UG,
+	MPC512x_CLK_NFC_UG,
+	MPC512x_CLK_LPC_UG,
+	MPC512x_CLK_SPDIF_TX_IN,
+	/* intermediates for the mux+gate+div+mux MCLK generation */
+	MPC512x_CLK_MCLKS_FIRST,
+	MPC512x_CLK_MCLKS_LAST = MPC512x_CLK_MCLKS_FIRST
+				+ NR_MCLKS * MCLK_MAX_IDX,
+	/* internal, symbolic spec for the number of slots */
+	MPC512x_CLK_LAST_PRIVATE,
+};
+
+/* data required for the OF clock provider registration */
+static struct clk *clks[MPC512x_CLK_LAST_PRIVATE];
+static struct clk_onecell_data clk_data;
+
+/* CCM register access */
+static struct mpc512x_ccm __iomem *clkregs;
+static DEFINE_SPINLOCK(clklock);
+
+/* SoC variants {{{ */
+
+/*
+ * tell SoC variants apart as they are rather similar yet not identical,
+ * cache the result in an enum to not repeatedly run the expensive OF test
+ *
+ * MPC5123 is an MPC5121 without the MBX graphics accelerator
+ *
+ * MPC5125 has many more differences: no MBX, no AXE, no VIU, no SPDIF,
+ * no PATA, no SATA, no PCI, two FECs (of different compatibility name),
+ * only 10 PSCs (of different compatibility name), two SDHCs, different
+ * NFC IP block, output clocks, system PLL status query, different CPMF
+ * interpretation, no CFM, different fourth PSC/CAN mux0 input -- yet
+ * those differences can get folded into this clock provider support
+ * code and don't warrant a separate highly redundant implementation
+ */
+
+static enum soc_type {
+	MPC512x_SOC_MPC5121,
+	MPC512x_SOC_MPC5123,
+	MPC512x_SOC_MPC5125,
+} soc;
+
+static void mpc512x_clk_determine_soc(void)
+{
+	if (of_machine_is_compatible("fsl,mpc5121")) {
+		soc = MPC512x_SOC_MPC5121;
+		return;
+	}
+	if (of_machine_is_compatible("fsl,mpc5123")) {
+		soc = MPC512x_SOC_MPC5123;
+		return;
+	}
+	if (of_machine_is_compatible("fsl,mpc5125")) {
+		soc = MPC512x_SOC_MPC5125;
+		return;
+	}
+}
+
+static bool soc_has_mbx(void)
+{
+	if (soc == MPC512x_SOC_MPC5121)
+		return true;
+	return false;
+}
+
+static bool soc_has_axe(void)
+{
+	if (soc == MPC512x_SOC_MPC5125)
+		return false;
+	return true;
+}
+
+static bool soc_has_viu(void)
+{
+	if (soc == MPC512x_SOC_MPC5125)
+		return false;
+	return true;
+}
+
+static bool soc_has_spdif(void)
+{
+	if (soc == MPC512x_SOC_MPC5125)
+		return false;
+	return true;
+}
+
+static bool soc_has_pata(void)
+{
+	if (soc == MPC512x_SOC_MPC5125)
+		return false;
+	return true;
+}
+
+static bool soc_has_sata(void)
+{
+	if (soc == MPC512x_SOC_MPC5125)
+		return false;
+	return true;
+}
+
+static bool soc_has_pci(void)
+{
+	if (soc == MPC512x_SOC_MPC5125)
+		return false;
+	return true;
+}
+
+static bool soc_has_fec2(void)
+{
+	if (soc == MPC512x_SOC_MPC5125)
+		return true;
+	return false;
+}
+
+static int soc_max_pscnum(void)
+{
+	if (soc == MPC512x_SOC_MPC5125)
+		return 10;
+	return 12;
+}
+
+static bool soc_has_sdhc2(void)
+{
+	if (soc == MPC512x_SOC_MPC5125)
+		return true;
+	return false;
+}
+
+static bool soc_has_nfc_5125(void)
+{
+	if (soc == MPC512x_SOC_MPC5125)
+		return true;
+	return false;
+}
+
+static bool soc_has_outclk(void)
+{
+	if (soc == MPC512x_SOC_MPC5125)
+		return true;
+	return false;
+}
+
+static bool soc_has_cpmf_0_bypass(void)
+{
+	if (soc == MPC512x_SOC_MPC5125)
+		return true;
+	return false;
+}
+
+static bool soc_has_mclk_mux0_canin(void)
+{
+	if (soc == MPC512x_SOC_MPC5125)
+		return true;
+	return false;
+}
+
+/* }}} SoC variants */
+/* common clk API wrappers {{{ */
+
+/* convenience wrappers around the common clk API */
+static inline struct clk *mpc512x_clk_fixed(const char *name, int rate)
+{
+	return clk_register_fixed_rate(NULL, name, NULL, CLK_IS_ROOT, rate);
+}
+
+static inline struct clk *mpc512x_clk_factor(
+	const char *name, const char *parent_name,
+	int mul, int div)
+{
+	int clkflags;
+
+	clkflags = CLK_SET_RATE_PARENT;
+	return clk_register_fixed_factor(NULL, name, parent_name, clkflags,
+					 mul, div);
+}
+
+static inline struct clk *mpc512x_clk_divider(
+	const char *name, const char *parent_name, u8 clkflags,
+	u32 __iomem *reg, u8 pos, u8 len, int divflags)
+{
+	return clk_register_divider(NULL, name, parent_name, clkflags,
+				    reg, pos, len, divflags, &clklock);
+}
+
+static inline struct clk *mpc512x_clk_divtable(
+	const char *name, const char *parent_name,
+	u32 __iomem *reg, u8 pos, u8 len,
+	const struct clk_div_table *divtab)
+{
+	u8 divflags;
+
+	divflags = 0;
+	return clk_register_divider_table(NULL, name, parent_name, 0,
+					  reg, pos, len, divflags,
+					  divtab, &clklock);
+}
+
+static inline struct clk *mpc512x_clk_gated(
+	const char *name, const char *parent_name,
+	u32 __iomem *reg, u8 pos)
+{
+	int clkflags;
+
+	clkflags = CLK_SET_RATE_PARENT;
+	return clk_register_gate(NULL, name, parent_name, clkflags,
+				 reg, pos, 0, &clklock);
+}
+
+static inline struct clk *mpc512x_clk_muxed(const char *name,
+	const char **parent_names, int parent_count,
+	u32 __iomem *reg, u8 pos, u8 len)
+{
+	int clkflags;
+	u8 muxflags;
+
+	clkflags = CLK_SET_RATE_PARENT;
+	muxflags = 0;
+	return clk_register_mux(NULL, name,
+				parent_names, parent_count, clkflags,
+				reg, pos, len, muxflags, &clklock);
+}
+
+/* }}} common clk API wrappers */
+
+/* helper to isolate a bit field from a register */
+static inline int get_bit_field(uint32_t __iomem *reg, uint8_t pos, uint8_t len)
+{
+	uint32_t val;
+
+	val = in_be32(reg);
+	val >>= pos;
+	val &= (1 << len) - 1;
+	return val;
+}
+
+/* get the SPMF and translate it into the "sys pll" multiplier */
+static int get_spmf_mult(void)
+{
+	static int spmf_to_mult[] = {
+		68, 1, 12, 16, 20, 24, 28, 32,
+		36, 40, 44, 48, 52, 56, 60, 64,
+	};
+	int spmf;
+
+	spmf = get_bit_field(&clkregs->spmr, 24, 4);
+	return spmf_to_mult[spmf];
+}
+
+/*
+ * get the SYS_DIV value and translate it into a divide factor
+ *
+ * values returned from here are a multiple of the real factor since the
+ * divide ratio is fractional
+ */
+static int get_sys_div_x2(void)
+{
+	static int sysdiv_code_to_x2[] = {
+		4, 5, 6, 7, 8, 9, 10, 14,
+		12, 16, 18, 22, 20, 24, 26, 30,
+		28, 32, 34, 38, 36, 40, 42, 46,
+		44, 48, 50, 54, 52, 56, 58, 62,
+		60, 64, 66,
+	};
+	int divcode;
+
+	divcode = get_bit_field(&clkregs->scfr2, 26, 6);
+	return sysdiv_code_to_x2[divcode];
+}
+
+/*
+ * get the CPMF value and translate it into a multiplier factor
+ *
+ * values returned from here are a multiple of the real factor since the
+ * multiplier ratio is fractional
+ */
+static int get_cpmf_mult_x2(void)
+{
+	static int cpmf_to_mult_x36[] = {
+		/* 0b000 is "times 36" */
+		72, 2, 2, 3, 4, 5, 6, 7,
+	};
+	static int cpmf_to_mult_0by[] = {
+		/* 0b000 is "bypass" */
+		2, 2, 2, 3, 4, 5, 6, 7,
+	};
+
+	int *cpmf_to_mult;
+	int cpmf;
+
+	cpmf = get_bit_field(&clkregs->spmr, 16, 4);
+	if (soc_has_cpmf_0_bypass())
+		cpmf_to_mult = cpmf_to_mult_0by;
+	else
+		cpmf_to_mult = cpmf_to_mult_x36;
+	return cpmf_to_mult[cpmf];
+}
+
+/*
+ * some of the clock dividers do scale in a linear way, yet not all of
+ * their bit combinations are legal; use a divider table to get a
+ * resulting set of applicable divider values
+ */
+
+/* applies to the IPS_DIV, and PCI_DIV values */
+static struct clk_div_table divtab_2346[] = {
+	{ .val = 2, .div = 2, },
+	{ .val = 3, .div = 3, },
+	{ .val = 4, .div = 4, },
+	{ .val = 6, .div = 6, },
+	{ .div = 0, },
+};
+
+/* applies to the MBX_DIV, LPC_DIV, and NFC_DIV values */
+static struct clk_div_table divtab_1234[] = {
+	{ .val = 1, .div = 1, },
+	{ .val = 2, .div = 2, },
+	{ .val = 3, .div = 3, },
+	{ .val = 4, .div = 4, },
+	{ .div = 0, },
+};
+
+static int get_freq_from_dt(char *propname)
+{
+	struct device_node *np;
+	const unsigned int *prop;
+	int val;
+
+	val = 0;
+	np = of_find_compatible_node(NULL, NULL, "fsl,mpc5121-immr");
+	if (np) {
+		prop = of_get_property(np, propname, NULL);
+		if (prop)
+			val = *prop;
+	    of_node_put(np);
+	}
+	return val;
+}
+
+static void mpc512x_clk_preset_data(void)
+{
+	size_t i;
+
+	for (i = 0; i < ARRAY_SIZE(clks); i++)
+		clks[i] = ERR_PTR(-ENODEV);
+}
+
+/*
+ * - receives the "bus frequency" from the caller (that's the IPS clock
+ *   rate, the historical source of clock information)
+ * - fetches the system PLL multiplier and divider values as well as the
+ *   IPS divider value from hardware
+ * - determines the REF clock rate either from the XTAL/OSC spec (if
+ *   there is a device tree node describing the oscillator) or from the
+ *   IPS bus clock (supported for backwards compatibility, such that
+ *   setups without XTAL/OSC specs keep working)
+ * - creates the "ref" clock item in the clock tree, such that
+ *   subsequent code can create the remainder of the hierarchy (REF ->
+ *   SYS -> CSB -> IPS) from the REF clock rate and the returned mul/div
+ *   values
+ */
+static void mpc512x_clk_setup_ref_clock(struct device_node *np, int bus_freq,
+					int *sys_mul, int *sys_div,
+					int *ips_div)
+{
+	struct clk *osc_clk;
+	int calc_freq;
+
+	/* fetch mul/div factors from the hardware */
+	*sys_mul = get_spmf_mult();
+	*sys_mul *= 2;		/* compensate for the fractional divider */
+	*sys_div = get_sys_div_x2();
+	*ips_div = get_bit_field(&clkregs->scfr1, 23, 3);
+
+	/* lookup the oscillator clock for its rate */
+	osc_clk = of_clk_get_by_name(np, "osc");
+
+	/*
+	 * either descend from OSC to REF (and in bypassing verify the
+	 * IPS rate), or backtrack from IPS and multiplier values that
+	 * were fetched from hardware to REF and thus to the OSC value
+	 *
+	 * in either case the REF clock gets created here and the
+	 * remainder of the clock tree can get spanned from there
+	 */
+	if (!IS_ERR(osc_clk)) {
+		clks[MPC512x_CLK_REF] = mpc512x_clk_factor("ref", "osc", 1, 1);
+		calc_freq = clk_get_rate(clks[MPC512x_CLK_REF]);
+		calc_freq *= *sys_mul;
+		calc_freq /= *sys_div;
+		calc_freq /= 2;
+		calc_freq /= *ips_div;
+		if (bus_freq && calc_freq != bus_freq)
+			pr_warn("calc rate %d != OF spec %d\n",
+				calc_freq, bus_freq);
+	} else {
+		calc_freq = bus_freq;	/* start with IPS */
+		calc_freq *= *ips_div;	/* IPS -> CSB */
+		calc_freq *= 2;		/* CSB -> SYS */
+		calc_freq *= *sys_div;	/* SYS -> PLL out */
+		calc_freq /= *sys_mul;	/* PLL out -> REF == OSC */
+		clks[MPC512x_CLK_REF] = mpc512x_clk_fixed("ref", calc_freq);
+	}
+}
+
+/* MCLK helpers {{{ */
+
+/*
+ * helper code for the MCLK subtree setup
+ *
+ * the overview in section 5.2.4 of the MPC5121e Reference Manual rev4
+ * suggests that all instances of the "PSC clock generation" are equal,
+ * and that one might re-use the PSC setup for MSCAN clock generation
+ * (section 5.2.5) as well, at least the logic if not the data for
+ * description
+ *
+ * the details (starting at page 5-20) show differences in the specific
+ * inputs of the first mux stage ("can clk in", "spdif tx"), and the
+ * factual non-availability of the second mux stage (it's present yet
+ * only one input is valid)
+ *
+ * the MSCAN clock related registers (starting at page 5-35) all
+ * reference "spdif clk" at the first mux stage and don't mention any
+ * "can clk" at all, which somehow is unexpected
+ *
+ * TODO re-check the document, and clarify whether the RM is correct in
+ * the overview or in the details, and whether the difference is a
+ * clipboard induced error or results from chip revisions
+ *
+ * it turns out that the RM rev4 as of 2012-06 talks about "can" for the
+ * PSCs while RM rev3 as of 2008-10 talks about "spdif", so I guess that
+ * first a doc update is required which better reflects reality in the
+ * SoC before the implementation should follow while no questions remain
+ */
+
+/*
+ * note that this declaration raises a checkpatch warning, but
+ * it's the very data type dictated by <linux/clk-provider.h>,
+ * "fixing" this warning will break compilation
+ */
+static const char *parent_names_mux0_spdif[] = {
+	"sys", "ref", "psc-mclk-in", "spdif-tx",
+};
+
+static const char *parent_names_mux0_canin[] = {
+	"sys", "ref", "psc-mclk-in", "can-clk-in",
+};
+
+enum mclk_type {
+	MCLK_TYPE_PSC,
+	MCLK_TYPE_MSCAN,
+	MCLK_TYPE_SPDIF,
+	MCLK_TYPE_OUTCLK,
+};
+
+struct mclk_setup_data {
+	enum mclk_type type;
+	bool has_mclk1;
+	const char *name_mux0;
+	const char *name_en0;
+	const char *name_div0;
+	const char *parent_names_mux1[2];
+	const char *name_mclk;
+};
+
+#define MCLK_SETUP_DATA_PSC(id) { \
+	MCLK_TYPE_PSC, 0, \
+	"psc" #id "-mux0", \
+	"psc" #id "-en0", \
+	"psc" #id "_mclk_div", \
+	{ "psc" #id "_mclk_div", "dummy", }, \
+	"psc" #id "_mclk", \
+}
+
+#define MCLK_SETUP_DATA_MSCAN(id) { \
+	MCLK_TYPE_MSCAN, 0, \
+	"mscan" #id "-mux0", \
+	"mscan" #id "-en0", \
+	"mscan" #id "_mclk_div", \
+	{ "mscan" #id "_mclk_div", "dummy", }, \
+	"mscan" #id "_mclk", \
+}
+
+#define MCLK_SETUP_DATA_SPDIF { \
+	MCLK_TYPE_SPDIF, 1, \
+	"spdif-mux0", \
+	"spdif-en0", \
+	"spdif_mclk_div", \
+	{ "spdif_mclk_div", "spdif-rx", }, \
+	"spdif_mclk", \
+}
+
+#define MCLK_SETUP_DATA_OUTCLK(id) { \
+	MCLK_TYPE_OUTCLK, 0, \
+	"out" #id "-mux0", \
+	"out" #id "-en0", \
+	"out" #id "_mclk_div", \
+	{ "out" #id "_mclk_div", "dummy", }, \
+	"out" #id "_clk", \
+}
+
+static struct mclk_setup_data mclk_psc_data[] = {
+	MCLK_SETUP_DATA_PSC(0),
+	MCLK_SETUP_DATA_PSC(1),
+	MCLK_SETUP_DATA_PSC(2),
+	MCLK_SETUP_DATA_PSC(3),
+	MCLK_SETUP_DATA_PSC(4),
+	MCLK_SETUP_DATA_PSC(5),
+	MCLK_SETUP_DATA_PSC(6),
+	MCLK_SETUP_DATA_PSC(7),
+	MCLK_SETUP_DATA_PSC(8),
+	MCLK_SETUP_DATA_PSC(9),
+	MCLK_SETUP_DATA_PSC(10),
+	MCLK_SETUP_DATA_PSC(11),
+};
+
+static struct mclk_setup_data mclk_mscan_data[] = {
+	MCLK_SETUP_DATA_MSCAN(0),
+	MCLK_SETUP_DATA_MSCAN(1),
+	MCLK_SETUP_DATA_MSCAN(2),
+	MCLK_SETUP_DATA_MSCAN(3),
+};
+
+static struct mclk_setup_data mclk_spdif_data[] = {
+	MCLK_SETUP_DATA_SPDIF,
+};
+
+static struct mclk_setup_data mclk_outclk_data[] = {
+	MCLK_SETUP_DATA_OUTCLK(0),
+	MCLK_SETUP_DATA_OUTCLK(1),
+	MCLK_SETUP_DATA_OUTCLK(2),
+	MCLK_SETUP_DATA_OUTCLK(3),
+};
+
+/* setup the MCLK clock subtree of an individual PSC/MSCAN/SPDIF */
+static void mpc512x_clk_setup_mclk(struct mclk_setup_data *entry, size_t idx)
+{
+	size_t clks_idx_pub, clks_idx_int;
+	u32 __iomem *mccr_reg;	/* MCLK control register (mux, en, div) */
+	int div;
+
+	/* derive a few parameters from the component type and index */
+	switch (entry->type) {
+	case MCLK_TYPE_PSC:
+		clks_idx_pub = MPC512x_CLK_PSC0_MCLK + idx;
+		clks_idx_int = MPC512x_CLK_MCLKS_FIRST
+			     + (idx) * MCLK_MAX_IDX;
+		mccr_reg = &clkregs->psc_ccr[idx];
+		break;
+	case MCLK_TYPE_MSCAN:
+		clks_idx_pub = MPC512x_CLK_MSCAN0_MCLK + idx;
+		clks_idx_int = MPC512x_CLK_MCLKS_FIRST
+			     + (NR_PSCS + idx) * MCLK_MAX_IDX;
+		mccr_reg = &clkregs->mscan_ccr[idx];
+		break;
+	case MCLK_TYPE_SPDIF:
+		clks_idx_pub = MPC512x_CLK_SPDIF_MCLK;
+		clks_idx_int = MPC512x_CLK_MCLKS_FIRST
+			     + (NR_PSCS + NR_MSCANS) * MCLK_MAX_IDX;
+		mccr_reg = &clkregs->spccr;
+		break;
+	case MCLK_TYPE_OUTCLK:
+		clks_idx_pub = MPC512x_CLK_OUT0_CLK + idx;
+		clks_idx_int = MPC512x_CLK_MCLKS_FIRST
+			     + (NR_PSCS + NR_MSCANS + NR_SPDIFS + idx)
+			     * MCLK_MAX_IDX;
+		mccr_reg = &clkregs->out_ccr[idx];
+		break;
+	default:
+		return;
+	}
+
+	/*
+	 * this was grabbed from the PPC_CLOCK implementation, which
+	 * enforced a specific MCLK divider while the clock was gated
+	 * during setup (that's a documented hardware requirement)
+	 *
+	 * the PPC_CLOCK implementation might even have violated the
+	 * "MCLK <= IPS" constraint, the fixed divider value of 1
+	 * results in a divider of 2 and thus MCLK = SYS/2 which equals
+	 * CSB which is greater than IPS; the serial port setup may have
+	 * adjusted the divider which the clock setup might have left in
+	 * an undesirable state
+	 *
+	 * initial setup is:
+	 * - MCLK 0 from SYS
+	 * - MCLK DIV such to not exceed the IPS clock
+	 * - MCLK 0 enabled
+	 * - MCLK 1 from MCLK DIV
+	 */
+	div = clk_get_rate(clks[MPC512x_CLK_SYS]);
+	div /= clk_get_rate(clks[MPC512x_CLK_IPS]);
+	out_be32(mccr_reg, (0 << 16));
+	out_be32(mccr_reg, (0 << 16) | ((div - 1) << 17));
+	out_be32(mccr_reg, (1 << 16) | ((div - 1) << 17));
+
+	/*
+	 * create the 'struct clk' items of the MCLK's clock subtree
+	 *
+	 * note that by design we always create all nodes and won't take
+	 * shortcuts here, because
+	 * - the "internal" MCLK_DIV and MCLK_OUT signal in turn are
+	 *   selectable inputs to the CFM while those who "actually use"
+	 *   the PSC/MSCAN/SPDIF (serial drivers et al) need the MCLK
+	 *   for their bitrate
+	 * - in the absence of "aliases" for clocks we need to create
+	 *   individial 'struct clk' items for whatever might get
+	 *   referenced or looked up, even if several of those items are
+	 *   identical from the logical POV (their rate value)
+	 * - for easier future maintenance and for better reflection of
+	 *   the SoC's documentation, it appears appropriate to generate
+	 *   clock items even for those muxers which actually are NOPs
+	 *   (those with two inputs of which one is reserved)
+	 */
+	clks[clks_idx_int + MCLK_IDX_MUX0] = mpc512x_clk_muxed(
+			entry->name_mux0,
+			soc_has_mclk_mux0_canin()
+				? &parent_names_mux0_canin[0]
+				: &parent_names_mux0_spdif[0],
+			ARRAY_SIZE(parent_names_mux0_spdif),
+			mccr_reg, 14, 2);
+	clks[clks_idx_int + MCLK_IDX_EN0] = mpc512x_clk_gated(
+			entry->name_en0, entry->name_mux0,
+			mccr_reg, 16);
+	clks[clks_idx_int + MCLK_IDX_DIV0] = mpc512x_clk_divider(
+			entry->name_div0,
+			entry->name_en0, CLK_SET_RATE_GATE,
+			mccr_reg, 17, 15, 0);
+	if (entry->has_mclk1) {
+		clks[clks_idx_pub] = mpc512x_clk_muxed(
+				entry->name_mclk,
+				&entry->parent_names_mux1[0],
+				ARRAY_SIZE(entry->parent_names_mux1),
+				mccr_reg, 7, 1);
+	} else {
+		clks[clks_idx_pub] = mpc512x_clk_factor(
+				entry->name_mclk,
+				entry->parent_names_mux1[0],
+				1, 1);
+	}
+}
+
+/* }}} MCLK helpers */
+
+static void mpc512x_clk_setup_clock_tree(struct device_node *np, int busfreq)
+{
+	int sys_mul, sys_div, ips_div;
+	int mul, div;
+	size_t mclk_idx;
+	int freq;
+
+	/*
+	 * developer's notes:
+	 * - consider whether to handle clocks which have both gates and
+	 *   dividers via intermediates or by means of composites
+	 * - fractional dividers appear to not map well to composites
+	 *   since they can be seen as a fixed multiplier and an
+	 *   adjustable divider, while composites can only combine at
+	 *   most one of a mux, div, and gate each into one 'struct clk'
+	 *   item
+	 * - PSC/MSCAN/SPDIF clock generation OTOH already is very
+	 *   specific and cannot get mapped to componsites (at least not
+	 *   a single one, maybe two of them, but then some of these
+	 *   intermediate clock signals get referenced elsewhere (e.g.
+	 *   in the clock frequency measurement, CFM) and thus need
+	 *   publicly available names
+	 * - the current source layout appropriately reflects the
+	 *   hardware setup, and it works, so it's questionable whether
+	 *   further changes will result in big enough a benefit
+	 */
+
+	/* regardless of whether XTAL/OSC exists, have REF created */
+	mpc512x_clk_setup_ref_clock(np, busfreq, &sys_mul, &sys_div, &ips_div);
+
+	/* now setup the REF -> SYS -> CSB -> IPS hierarchy */
+	clks[MPC512x_CLK_SYS] = mpc512x_clk_factor("sys", "ref",
+						   sys_mul, sys_div);
+	clks[MPC512x_CLK_CSB] = mpc512x_clk_factor("csb", "sys", 1, 2);
+	clks[MPC512x_CLK_IPS] = mpc512x_clk_divtable("ips", "csb",
+						     &clkregs->scfr1, 23, 3,
+						     divtab_2346);
+	/* now setup anything below SYS and CSB and IPS */
+
+	clks[MPC512x_CLK_DDR_UG] = mpc512x_clk_factor("ddr-ug", "sys", 1, 2);
+
+	/*
+	 * the Reference Manual discusses that for SDHC only even divide
+	 * ratios are supported because clock domain synchronization
+	 * between 'per' and 'ipg' is broken;
+	 * keep the divider's bit 0 cleared (per reset value), and only
+	 * allow to setup the divider's bits 7:1, which results in that
+	 * only even divide ratios can get configured upon rate changes;
+	 * keep the "x4" name because this bit shift hack is an internal
+	 * implementation detail, the "fractional divider with quarters"
+	 * semantics remains
+	 */
+	clks[MPC512x_CLK_SDHC_x4] = mpc512x_clk_factor("sdhc-x4", "csb", 2, 1);
+	clks[MPC512x_CLK_SDHC_UG] = mpc512x_clk_divider("sdhc-ug", "sdhc-x4", 0,
+							&clkregs->scfr2, 1, 7,
+							CLK_DIVIDER_ONE_BASED);
+	if (soc_has_sdhc2()) {
+		clks[MPC512x_CLK_SDHC2_UG] = mpc512x_clk_divider(
+				"sdhc2-ug", "sdhc-x4", 0, &clkregs->scfr2,
+				9, 7, CLK_DIVIDER_ONE_BASED);
+	}
+
+	clks[MPC512x_CLK_DIU_x4] = mpc512x_clk_factor("diu-x4", "csb", 4, 1);
+	clks[MPC512x_CLK_DIU_UG] = mpc512x_clk_divider("diu-ug", "diu-x4", 0,
+						       &clkregs->scfr1, 0, 8,
+						       CLK_DIVIDER_ONE_BASED);
+
+	/*
+	 * the "power architecture PLL" was setup from data which was
+	 * sampled from the reset config word, at this point in time the
+	 * configuration can be considered fixed and read only (i.e. no
+	 * longer adjustable, or no longer in need of adjustment), which
+	 * is why we don't register a PLL here but assume fixed factors
+	 */
+	mul = get_cpmf_mult_x2();
+	div = 2;	/* compensate for the fractional factor */
+	clks[MPC512x_CLK_E300] = mpc512x_clk_factor("e300", "csb", mul, div);
+
+	if (soc_has_mbx()) {
+		clks[MPC512x_CLK_MBX_BUS_UG] = mpc512x_clk_factor(
+				"mbx-bus-ug", "csb", 1, 2);
+		clks[MPC512x_CLK_MBX_UG] = mpc512x_clk_divtable(
+				"mbx-ug", "mbx-bus-ug", &clkregs->scfr1,
+				14, 3, divtab_1234);
+		clks[MPC512x_CLK_MBX_3D_UG] = mpc512x_clk_factor(
+				"mbx-3d-ug", "mbx-ug", 1, 1);
+	}
+	if (soc_has_pci()) {
+		clks[MPC512x_CLK_PCI_UG] = mpc512x_clk_divtable(
+				"pci-ug", "csb", &clkregs->scfr1,
+				20, 3, divtab_2346);
+	}
+	if (soc_has_nfc_5125()) {
+		/*
+		 * XXX TODO implement 5125 NFC clock setup logic,
+		 * with high/low period counters in clkregs->scfr3,
+		 * currently there are no users so it's ENOIMPL
+		 */
+		clks[MPC512x_CLK_NFC_UG] = ERR_PTR(-ENOTSUPP);
+	} else {
+		clks[MPC512x_CLK_NFC_UG] = mpc512x_clk_divtable(
+				"nfc-ug", "ips", &clkregs->scfr1,
+				8, 3, divtab_1234);
+	}
+	clks[MPC512x_CLK_LPC_UG] = mpc512x_clk_divtable("lpc-ug", "ips",
+							&clkregs->scfr1, 11, 3,
+							divtab_1234);
+
+	clks[MPC512x_CLK_LPC] = mpc512x_clk_gated("lpc", "lpc-ug",
+						  &clkregs->sccr1, 30);
+	clks[MPC512x_CLK_NFC] = mpc512x_clk_gated("nfc", "nfc-ug",
+						  &clkregs->sccr1, 29);
+	if (soc_has_pata()) {
+		clks[MPC512x_CLK_PATA] = mpc512x_clk_gated(
+				"pata", "ips", &clkregs->sccr1, 28);
+	}
+	/* for PSCs there is a "registers" gate and a bitrate MCLK subtree */
+	for (mclk_idx = 0; mclk_idx < soc_max_pscnum(); mclk_idx++) {
+		char name[12];
+		snprintf(name, sizeof(name), "psc%d", mclk_idx);
+		clks[MPC512x_CLK_PSC0 + mclk_idx] = mpc512x_clk_gated(
+				name, "ips", &clkregs->sccr1, 27 - mclk_idx);
+		mpc512x_clk_setup_mclk(&mclk_psc_data[mclk_idx], mclk_idx);
+	}
+	clks[MPC512x_CLK_PSC_FIFO] = mpc512x_clk_gated("psc-fifo", "ips",
+						       &clkregs->sccr1, 15);
+	if (soc_has_sata()) {
+		clks[MPC512x_CLK_SATA] = mpc512x_clk_gated(
+				"sata", "ips", &clkregs->sccr1, 14);
+	}
+	clks[MPC512x_CLK_FEC] = mpc512x_clk_gated("fec", "ips",
+						  &clkregs->sccr1, 13);
+	if (soc_has_pci()) {
+		clks[MPC512x_CLK_PCI] = mpc512x_clk_gated(
+				"pci", "pci-ug", &clkregs->sccr1, 11);
+	}
+	clks[MPC512x_CLK_DDR] = mpc512x_clk_gated("ddr", "ddr-ug",
+						  &clkregs->sccr1, 10);
+	if (soc_has_fec2()) {
+		clks[MPC512x_CLK_FEC2] = mpc512x_clk_gated(
+				"fec2", "ips", &clkregs->sccr1, 9);
+	}
+
+	clks[MPC512x_CLK_DIU] = mpc512x_clk_gated("diu", "diu-ug",
+						  &clkregs->sccr2, 31);
+	if (soc_has_axe()) {
+		clks[MPC512x_CLK_AXE] = mpc512x_clk_gated(
+				"axe", "csb", &clkregs->sccr2, 30);
+	}
+	clks[MPC512x_CLK_MEM] = mpc512x_clk_gated("mem", "ips",
+						  &clkregs->sccr2, 29);
+	clks[MPC512x_CLK_USB1] = mpc512x_clk_gated("usb1", "csb",
+						   &clkregs->sccr2, 28);
+	clks[MPC512x_CLK_USB2] = mpc512x_clk_gated("usb2", "csb",
+						   &clkregs->sccr2, 27);
+	clks[MPC512x_CLK_I2C] = mpc512x_clk_gated("i2c", "ips",
+						  &clkregs->sccr2, 26);
+	/* MSCAN differs from PSC with just one gate for multiple components */
+	clks[MPC512x_CLK_BDLC] = mpc512x_clk_gated("bdlc", "ips",
+						   &clkregs->sccr2, 25);
+	for (mclk_idx = 0; mclk_idx < ARRAY_SIZE(mclk_mscan_data); mclk_idx++)
+		mpc512x_clk_setup_mclk(&mclk_mscan_data[mclk_idx], mclk_idx);
+	clks[MPC512x_CLK_SDHC] = mpc512x_clk_gated("sdhc", "sdhc-ug",
+						   &clkregs->sccr2, 24);
+	/* there is only one SPDIF component, which shares MCLK support code */
+	if (soc_has_spdif()) {
+		clks[MPC512x_CLK_SPDIF] = mpc512x_clk_gated(
+				"spdif", "ips", &clkregs->sccr2, 23);
+		mpc512x_clk_setup_mclk(&mclk_spdif_data[0], 0);
+	}
+	if (soc_has_mbx()) {
+		clks[MPC512x_CLK_MBX_BUS] = mpc512x_clk_gated(
+				"mbx-bus", "mbx-bus-ug", &clkregs->sccr2, 22);
+		clks[MPC512x_CLK_MBX] = mpc512x_clk_gated(
+				"mbx", "mbx-ug", &clkregs->sccr2, 21);
+		clks[MPC512x_CLK_MBX_3D] = mpc512x_clk_gated(
+				"mbx-3d", "mbx-3d-ug", &clkregs->sccr2, 20);
+	}
+	clks[MPC512x_CLK_IIM] = mpc512x_clk_gated("iim", "csb",
+						  &clkregs->sccr2, 19);
+	if (soc_has_viu()) {
+		clks[MPC512x_CLK_VIU] = mpc512x_clk_gated(
+				"viu", "csb", &clkregs->sccr2, 18);
+	}
+	if (soc_has_sdhc2()) {
+		clks[MPC512x_CLK_SDHC2] = mpc512x_clk_gated(
+				"sdhc-2", "sdhc2-ug", &clkregs->sccr2, 17);
+	}
+
+	if (soc_has_outclk()) {
+		size_t idx;	/* used as mclk_idx, just to trim line length */
+		for (idx = 0; idx < ARRAY_SIZE(mclk_outclk_data); idx++)
+			mpc512x_clk_setup_mclk(&mclk_outclk_data[idx], idx);
+	}
+
+	/*
+	 * externally provided clocks (when implemented in hardware,
+	 * device tree may specify values which otherwise were unknown)
+	 */
+	freq = get_freq_from_dt("psc_mclk_in");
+	if (!freq)
+		freq = 25000000;
+	clks[MPC512x_CLK_PSC_MCLK_IN] = mpc512x_clk_fixed("psc_mclk_in", freq);
+	if (soc_has_mclk_mux0_canin()) {
+		freq = get_freq_from_dt("can_clk_in");
+		clks[MPC512x_CLK_CAN_CLK_IN] = mpc512x_clk_fixed(
+				"can_clk_in", freq);
+	} else {
+		freq = get_freq_from_dt("spdif_tx_in");
+		clks[MPC512x_CLK_SPDIF_TX_IN] = mpc512x_clk_fixed(
+				"spdif_tx_in", freq);
+		freq = get_freq_from_dt("spdif_rx_in");
+		clks[MPC512x_CLK_SPDIF_TX_IN] = mpc512x_clk_fixed(
+				"spdif_rx_in", freq);
+	}
+
+	/* fixed frequency for AC97, always 24.567MHz */
+	clks[MPC512x_CLK_AC97] = mpc512x_clk_fixed("ac97", 24567000);
+
+	/*
+	 * pre-enable those "internal" clock items which never get
+	 * claimed by any peripheral driver, to not have the clock
+	 * subsystem disable them late at startup
+	 */
+	clk_prepare_enable(clks[MPC512x_CLK_DUMMY]);
+	clk_prepare_enable(clks[MPC512x_CLK_E300]);	/* PowerPC CPU */
+	clk_prepare_enable(clks[MPC512x_CLK_DDR]);	/* DRAM */
+	clk_prepare_enable(clks[MPC512x_CLK_MEM]);	/* SRAM */
+	clk_prepare_enable(clks[MPC512x_CLK_IPS]);	/* SoC periph */
+	clk_prepare_enable(clks[MPC512x_CLK_LPC]);	/* boot media */
+}
+
+/*
+ * registers the set of public clocks (those listed in the dt-bindings/
+ * header file) for OF lookups, keeps the intermediates private to us
+ */
+static void mpc5121_clk_register_of_provider(struct device_node *np)
+{
+	clk_data.clks = clks;
+	clk_data.clk_num = MPC512x_CLK_LAST_PUBLIC + 1;	/* _not_ ARRAY_SIZE() */
+	of_clk_add_provider(np, of_clk_src_onecell_get, &clk_data);
+}
+
+/*
+ * temporary support for the period of time between introduction of CCF
+ * support and the adjustment of peripheral drivers to OF based lookups
+ */
+static void mpc5121_clk_provide_migration_support(void)
+{
+
+	/*
+	 * pre-enable those clock items which are not yet appropriately
+	 * acquired by their peripheral driver
+	 *
+	 * the PCI clock cannot get acquired by its peripheral driver,
+	 * because for this platform the driver won't probe(), instead
+	 * initialization is done from within the .setup_arch() routine
+	 * at a point in time where the clock provider has not been
+	 * setup yet and thus isn't available yet
+	 *
+	 * so we "pre-enable" the clock here, to not have the clock
+	 * subsystem automatically disable this item in a late init call
+	 *
+	 * this PCI clock pre-enable workaround only applies when there
+	 * are device tree nodes for PCI and thus the peripheral driver
+	 * has attached to bridges, otherwise the PCI clock remains
+	 * unused and so it gets disabled
+	 */
+	clk_prepare_enable(clks[MPC512x_CLK_PSC3_MCLK]);/* serial console */
+	if (of_find_compatible_node(NULL, "pci", "fsl,mpc5121-pci"))
+		clk_prepare_enable(clks[MPC512x_CLK_PCI]);
+}
+
+/*
+ * those macros are not exactly pretty, but they encapsulate a lot
+ * of copy'n'paste heavy code which is even more ugly, and reduce
+ * the potential for inconsistencies in those many code copies
+ */
+#define FOR_NODES(compatname) \
+	for_each_compatible_node(np, NULL, compatname)
+
+#define NODE_PREP do { \
+	of_address_to_resource(np, 0, &res); \
+	snprintf(devname, sizeof(devname), "%08x.%s", res.start, np->name); \
+} while (0)
+
+#define NODE_CHK(clkname, clkitem, regnode, regflag) do { \
+	struct clk *clk; \
+	clk = of_clk_get_by_name(np, clkname); \
+	if (IS_ERR(clk)) { \
+		clk = clkitem; \
+		clk_register_clkdev(clk, clkname, devname); \
+		if (regnode) \
+			clk_register_clkdev(clk, clkname, np->name); \
+		did_register |= DID_REG_ ## regflag; \
+		pr_debug("clock alias name '%s' for dev '%s' pointer %p\n", \
+			 clkname, devname, clk); \
+	} else { \
+		clk_put(clk); \
+	} \
+} while (0)
+
+/*
+ * register source code provided fallback results for clock lookups,
+ * these get consulted when OF based clock lookup fails (that is in the
+ * case of not yet adjusted device tree data, where clock related specs
+ * are missing)
+ */
+static void mpc5121_clk_provide_backwards_compat(void)
+{
+	enum did_reg_flags {
+		DID_REG_PSC	= BIT(0),
+		DID_REG_PSCFIFO	= BIT(1),
+		DID_REG_NFC	= BIT(2),
+		DID_REG_CAN	= BIT(3),
+		DID_REG_I2C	= BIT(4),
+		DID_REG_DIU	= BIT(5),
+		DID_REG_VIU	= BIT(6),
+		DID_REG_FEC	= BIT(7),
+		DID_REG_USB	= BIT(8),
+		DID_REG_PATA	= BIT(9),
+	};
+
+	int did_register;
+	struct device_node *np;
+	struct resource res;
+	int idx;
+	char devname[32];
+
+	did_register = 0;
+
+	FOR_NODES(mpc512x_select_psc_compat()) {
+		NODE_PREP;
+		idx = (res.start >> 8) & 0xf;
+		NODE_CHK("ipg", clks[MPC512x_CLK_PSC0 + idx], 0, PSC);
+		NODE_CHK("mclk", clks[MPC512x_CLK_PSC0_MCLK + idx], 0, PSC);
+	}
+
+	FOR_NODES("fsl,mpc5121-psc-fifo") {
+		NODE_PREP;
+		NODE_CHK("ipg", clks[MPC512x_CLK_PSC_FIFO], 1, PSCFIFO);
+	}
+
+	FOR_NODES("fsl,mpc5121-nfc") {
+		NODE_PREP;
+		NODE_CHK("ipg", clks[MPC512x_CLK_NFC], 0, NFC);
+	}
+
+	FOR_NODES("fsl,mpc5121-mscan") {
+		NODE_PREP;
+		idx = 0;
+		idx += (res.start & 0x2000) ? 2 : 0;
+		idx += (res.start & 0x0080) ? 1 : 0;
+		NODE_CHK("ipg", clks[MPC512x_CLK_BDLC], 0, CAN);
+		NODE_CHK("mclk", clks[MPC512x_CLK_MSCAN0_MCLK + idx], 0, CAN);
+	}
+
+	/*
+	 * do register the 'ips', 'sys', and 'ref' names globally
+	 * instead of inside each individual CAN node, as there is no
+	 * potential for a name conflict (in contrast to 'ipg' and 'mclk')
+	 */
+	if (did_register & DID_REG_CAN) {
+		clk_register_clkdev(clks[MPC512x_CLK_IPS], "ips", NULL);
+		clk_register_clkdev(clks[MPC512x_CLK_SYS], "sys", NULL);
+		clk_register_clkdev(clks[MPC512x_CLK_REF], "ref", NULL);
+	}
+
+	FOR_NODES("fsl,mpc5121-i2c") {
+		NODE_PREP;
+		NODE_CHK("ipg", clks[MPC512x_CLK_I2C], 0, I2C);
+	}
+
+	/*
+	 * workaround for the fact that the I2C driver does an "anonymous"
+	 * lookup (NULL name spec, which yields the first clock spec) for
+	 * which we cannot register an alias -- a _global_ 'ipg' alias that
+	 * is not bound to any device name and returns the I2C clock item
+	 * is not a good idea
+	 *
+	 * so we have the lookup in the peripheral driver fail, which is
+	 * silent and non-fatal, and pre-enable the clock item here such
+	 * that register access is possible
+	 *
+	 * see commit b3bfce2b "i2c: mpc: cleanup clock API use" for
+	 * details, adjusting s/NULL/"ipg"/ in i2c-mpc.c would make this
+	 * workaround obsolete
+	 */
+	if (did_register & DID_REG_I2C)
+		clk_prepare_enable(clks[MPC512x_CLK_I2C]);
+
+	FOR_NODES("fsl,mpc5121-diu") {
+		NODE_PREP;
+		NODE_CHK("ipg", clks[MPC512x_CLK_DIU], 1, DIU);
+	}
+
+	FOR_NODES("fsl,mpc5121-viu") {
+		NODE_PREP;
+		NODE_CHK("ipg", clks[MPC512x_CLK_VIU], 0, VIU);
+	}
+
+	/*
+	 * note that 2771399a "fs_enet: cleanup clock API use" did use the
+	 * "per" string for the clock lookup in contrast to the "ipg" name
+	 * which most other nodes are using -- this is not a fatal thing
+	 * but just something to keep in mind when doing compatibility
+	 * registration, it's a non-issue with up-to-date device tree data
+	 */
+	FOR_NODES("fsl,mpc5121-fec") {
+		NODE_PREP;
+		NODE_CHK("per", clks[MPC512x_CLK_FEC], 0, FEC);
+	}
+	FOR_NODES("fsl,mpc5121-fec-mdio") {
+		NODE_PREP;
+		NODE_CHK("per", clks[MPC512x_CLK_FEC], 0, FEC);
+	}
+	/*
+	 * MPC5125 has two FECs: FEC1 at 0x2800, FEC2 at 0x4800;
+	 * the clock items don't "form an array" since FEC2 was
+	 * added only later and was not allowed to shift all other
+	 * clock item indices, so the numbers aren't adjacent
+	 */
+	FOR_NODES("fsl,mpc5125-fec") {
+		NODE_PREP;
+		if (res.start & 0x4000)
+			idx = MPC512x_CLK_FEC2;
+		else
+			idx = MPC512x_CLK_FEC;
+		NODE_CHK("per", clks[idx], 0, FEC);
+	}
+
+	FOR_NODES("fsl,mpc5121-usb2-dr") {
+		NODE_PREP;
+		idx = (res.start & 0x4000) ? 1 : 0;
+		NODE_CHK("ipg", clks[MPC512x_CLK_USB1 + idx], 0, USB);
+	}
+
+	FOR_NODES("fsl,mpc5121-pata") {
+		NODE_PREP;
+		NODE_CHK("ipg", clks[MPC512x_CLK_PATA], 0, PATA);
+	}
+
+	/*
+	 * try to collapse diagnostics into a single line of output yet
+	 * provide a full list of what is missing, to avoid noise in the
+	 * absence of up-to-date device tree data -- backwards
+	 * compatibility to old DTBs is a requirement, updates may be
+	 * desirable or preferrable but are not at all mandatory
+	 */
+	if (did_register) {
+		pr_notice("device tree lacks clock specs, adding fallbacks (0x%x,%s%s%s%s%s%s%s%s%s%s)\n",
+			  did_register,
+			  (did_register & DID_REG_PSC) ? " PSC" : "",
+			  (did_register & DID_REG_PSCFIFO) ? " PSCFIFO" : "",
+			  (did_register & DID_REG_NFC) ? " NFC" : "",
+			  (did_register & DID_REG_CAN) ? " CAN" : "",
+			  (did_register & DID_REG_I2C) ? " I2C" : "",
+			  (did_register & DID_REG_DIU) ? " DIU" : "",
+			  (did_register & DID_REG_VIU) ? " VIU" : "",
+			  (did_register & DID_REG_FEC) ? " FEC" : "",
+			  (did_register & DID_REG_USB) ? " USB" : "",
+			  (did_register & DID_REG_PATA) ? " PATA" : "");
+	} else {
+		pr_debug("device tree has clock specs, no fallbacks added\n");
+	}
+}
+
+int __init mpc5121_clk_init(void)
+{
+	struct device_node *clk_np;
+	int busfreq;
+
+	/* map the clock control registers */
+	clk_np = of_find_compatible_node(NULL, NULL, "fsl,mpc5121-clock");
+	if (!clk_np)
+		return -ENODEV;
+	clkregs = of_iomap(clk_np, 0);
+	WARN_ON(!clkregs);
+
+	/* determine the SoC variant we run on */
+	mpc512x_clk_determine_soc();
+
+	/* invalidate all not yet registered clock slots */
+	mpc512x_clk_preset_data();
+
+	/*
+	 * have the device tree scanned for "fixed-clock" nodes (which
+	 * includes the oscillator node if the board's DT provides one)
+	 */
+	of_clk_init(NULL);
+
+	/*
+	 * add a dummy clock for those situations where a clock spec is
+	 * required yet no real clock is involved
+	 */
+	clks[MPC512x_CLK_DUMMY] = mpc512x_clk_fixed("dummy", 0);
+
+	/*
+	 * have all the real nodes in the clock tree populated from REF
+	 * down to all leaves, either starting from the OSC node or from
+	 * a REF root that was created from the IPS bus clock input
+	 */
+	busfreq = get_freq_from_dt("bus-frequency");
+	mpc512x_clk_setup_clock_tree(clk_np, busfreq);
+
+	/* register as an OF clock provider */
+	mpc5121_clk_register_of_provider(clk_np);
+
+	/*
+	 * unbreak not yet adjusted peripheral drivers during migration
+	 * towards fully operational common clock support, and allow
+	 * operation in the absence of clock related device tree specs
+	 */
+	mpc5121_clk_provide_migration_support();
+	mpc5121_clk_provide_backwards_compat();
+
+	return 0;
+}
diff --git a/arch/powerpc/platforms/512x/clock.c b/arch/powerpc/platforms/512x/clock.c
deleted file mode 100644
index fd8a376..0000000
--- a/arch/powerpc/platforms/512x/clock.c
+++ /dev/null
@@ -1,754 +0,0 @@
-/*
- * Copyright (C) 2007,2008 Freescale Semiconductor, Inc. All rights reserved.
- *
- * Author: John Rigby <jrigby@freescale.com>
- *
- * Implements the clk api defined in include/linux/clk.h
- *
- *    Original based on linux/arch/arm/mach-integrator/clock.c
- *
- *    Copyright (C) 2004 ARM Limited.
- *    Written by Deep Blue Solutions Limited.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#include <linux/kernel.h>
-#include <linux/list.h>
-#include <linux/errno.h>
-#include <linux/err.h>
-#include <linux/module.h>
-#include <linux/string.h>
-#include <linux/clk.h>
-#include <linux/mutex.h>
-#include <linux/io.h>
-
-#include <linux/of_address.h>
-#include <linux/of_platform.h>
-#include <asm/mpc5xxx.h>
-#include <asm/mpc5121.h>
-#include <asm/clk_interface.h>
-
-#include "mpc512x.h"
-
-#undef CLK_DEBUG
-
-static int clocks_initialized;
-
-#define CLK_HAS_RATE	0x1	/* has rate in MHz */
-#define CLK_HAS_CTRL	0x2	/* has control reg and bit */
-
-struct clk {
-	struct list_head node;
-	char name[32];
-	int flags;
-	struct device *dev;
-	unsigned long rate;
-	struct module *owner;
-	void (*calc) (struct clk *);
-	struct clk *parent;
-	int reg, bit;		/* CLK_HAS_CTRL */
-	int div_shift;		/* only used by generic_div_clk_calc */
-};
-
-static LIST_HEAD(clocks);
-static DEFINE_MUTEX(clocks_mutex);
-
-static struct clk *mpc5121_clk_get(struct device *dev, const char *id)
-{
-	struct clk *p, *clk = ERR_PTR(-ENOENT);
-	int dev_match;
-	int id_match;
-
-	if (dev == NULL || id == NULL)
-		return clk;
-
-	mutex_lock(&clocks_mutex);
-	list_for_each_entry(p, &clocks, node) {
-		dev_match = id_match = 0;
-
-		if (dev == p->dev)
-			dev_match++;
-		if (strcmp(id, p->name) == 0)
-			id_match++;
-		if ((dev_match || id_match) && try_module_get(p->owner)) {
-			clk = p;
-			break;
-		}
-	}
-	mutex_unlock(&clocks_mutex);
-
-	return clk;
-}
-
-#ifdef CLK_DEBUG
-static void dump_clocks(void)
-{
-	struct clk *p;
-
-	mutex_lock(&clocks_mutex);
-	printk(KERN_INFO "CLOCKS:\n");
-	list_for_each_entry(p, &clocks, node) {
-		pr_info("  %s=%ld", p->name, p->rate);
-		if (p->parent)
-			pr_cont(" %s=%ld", p->parent->name,
-			       p->parent->rate);
-		if (p->flags & CLK_HAS_CTRL)
-			pr_cont(" reg/bit=%d/%d", p->reg, p->bit);
-		pr_cont("\n");
-	}
-	mutex_unlock(&clocks_mutex);
-}
-#define	DEBUG_CLK_DUMP() dump_clocks()
-#else
-#define	DEBUG_CLK_DUMP()
-#endif
-
-
-static void mpc5121_clk_put(struct clk *clk)
-{
-	module_put(clk->owner);
-}
-
-#define NRPSC 12
-
-struct mpc512x_clockctl {
-	u32 spmr;		/* System PLL Mode Reg */
-	u32 sccr[2];		/* System Clk Ctrl Reg 1 & 2 */
-	u32 scfr1;		/* System Clk Freq Reg 1 */
-	u32 scfr2;		/* System Clk Freq Reg 2 */
-	u32 reserved;
-	u32 bcr;		/* Bread Crumb Reg */
-	u32 pccr[NRPSC];	/* PSC Clk Ctrl Reg 0-11 */
-	u32 spccr;		/* SPDIF Clk Ctrl Reg */
-	u32 cccr;		/* CFM Clk Ctrl Reg */
-	u32 dccr;		/* DIU Clk Cnfg Reg */
-};
-
-static struct mpc512x_clockctl __iomem *clockctl;
-
-static int mpc5121_clk_enable(struct clk *clk)
-{
-	unsigned int mask;
-
-	if (clk->flags & CLK_HAS_CTRL) {
-		mask = in_be32(&clockctl->sccr[clk->reg]);
-		mask |= 1 << clk->bit;
-		out_be32(&clockctl->sccr[clk->reg], mask);
-	}
-	return 0;
-}
-
-static void mpc5121_clk_disable(struct clk *clk)
-{
-	unsigned int mask;
-
-	if (clk->flags & CLK_HAS_CTRL) {
-		mask = in_be32(&clockctl->sccr[clk->reg]);
-		mask &= ~(1 << clk->bit);
-		out_be32(&clockctl->sccr[clk->reg], mask);
-	}
-}
-
-static unsigned long mpc5121_clk_get_rate(struct clk *clk)
-{
-	if (clk->flags & CLK_HAS_RATE)
-		return clk->rate;
-	else
-		return 0;
-}
-
-static long mpc5121_clk_round_rate(struct clk *clk, unsigned long rate)
-{
-	return rate;
-}
-
-static int mpc5121_clk_set_rate(struct clk *clk, unsigned long rate)
-{
-	return 0;
-}
-
-static int clk_register(struct clk *clk)
-{
-	mutex_lock(&clocks_mutex);
-	list_add(&clk->node, &clocks);
-	mutex_unlock(&clocks_mutex);
-	return 0;
-}
-
-static unsigned long spmf_mult(void)
-{
-	/*
-	 * Convert spmf to multiplier
-	 */
-	static int spmf_to_mult[] = {
-		68, 1, 12, 16,
-		20, 24, 28, 32,
-		36, 40, 44, 48,
-		52, 56, 60, 64
-	};
-	int spmf = (in_be32(&clockctl->spmr) >> 24) & 0xf;
-	return spmf_to_mult[spmf];
-}
-
-static unsigned long sysdiv_div_x_2(void)
-{
-	/*
-	 * Convert sysdiv to divisor x 2
-	 * Some divisors have fractional parts so
-	 * multiply by 2 then divide by this value
-	 */
-	static int sysdiv_to_div_x_2[] = {
-		4, 5, 6, 7,
-		8, 9, 10, 14,
-		12, 16, 18, 22,
-		20, 24, 26, 30,
-		28, 32, 34, 38,
-		36, 40, 42, 46,
-		44, 48, 50, 54,
-		52, 56, 58, 62,
-		60, 64, 66,
-	};
-	int sysdiv = (in_be32(&clockctl->scfr2) >> 26) & 0x3f;
-	return sysdiv_to_div_x_2[sysdiv];
-}
-
-static unsigned long ref_to_sys(unsigned long rate)
-{
-	rate *= spmf_mult();
-	rate *= 2;
-	rate /= sysdiv_div_x_2();
-
-	return rate;
-}
-
-static unsigned long sys_to_ref(unsigned long rate)
-{
-	rate *= sysdiv_div_x_2();
-	rate /= 2;
-	rate /= spmf_mult();
-
-	return rate;
-}
-
-static long ips_to_ref(unsigned long rate)
-{
-	int ips_div = (in_be32(&clockctl->scfr1) >> 23) & 0x7;
-
-	rate *= ips_div;	/* csb_clk = ips_clk * ips_div */
-	rate *= 2;		/* sys_clk = csb_clk * 2 */
-	return sys_to_ref(rate);
-}
-
-static unsigned long devtree_getfreq(char *clockname)
-{
-	struct device_node *np;
-	const unsigned int *prop;
-	unsigned int val = 0;
-
-	np = of_find_compatible_node(NULL, NULL, "fsl,mpc5121-immr");
-	if (np) {
-		prop = of_get_property(np, clockname, NULL);
-		if (prop)
-			val = *prop;
-	    of_node_put(np);
-	}
-	return val;
-}
-
-static void ref_clk_calc(struct clk *clk)
-{
-	unsigned long rate;
-
-	rate = devtree_getfreq("bus-frequency");
-	if (rate == 0) {
-		printk(KERN_ERR "No bus-frequency in dev tree\n");
-		clk->rate = 0;
-		return;
-	}
-	clk->rate = ips_to_ref(rate);
-}
-
-static struct clk ref_clk = {
-	.name = "ref_clk",
-	.calc = ref_clk_calc,
-};
-
-
-static void sys_clk_calc(struct clk *clk)
-{
-	clk->rate = ref_to_sys(ref_clk.rate);
-}
-
-static struct clk sys_clk = {
-	.name = "sys_clk",
-	.calc = sys_clk_calc,
-};
-
-static void diu_clk_calc(struct clk *clk)
-{
-	int diudiv_x_2 = in_be32(&clockctl->scfr1) & 0xff;
-	unsigned long rate;
-
-	rate = sys_clk.rate;
-
-	rate *= 2;
-	rate /= diudiv_x_2;
-
-	clk->rate = rate;
-}
-
-static void viu_clk_calc(struct clk *clk)
-{
-	unsigned long rate;
-
-	rate = sys_clk.rate;
-	rate /= 2;
-	clk->rate = rate;
-}
-
-static void half_clk_calc(struct clk *clk)
-{
-	clk->rate = clk->parent->rate / 2;
-}
-
-static void generic_div_clk_calc(struct clk *clk)
-{
-	int div = (in_be32(&clockctl->scfr1) >> clk->div_shift) & 0x7;
-
-	clk->rate = clk->parent->rate / div;
-}
-
-static void unity_clk_calc(struct clk *clk)
-{
-	clk->rate = clk->parent->rate;
-}
-
-static struct clk csb_clk = {
-	.name = "csb_clk",
-	.calc = half_clk_calc,
-	.parent = &sys_clk,
-};
-
-static void e300_clk_calc(struct clk *clk)
-{
-	int spmf = (in_be32(&clockctl->spmr) >> 16) & 0xf;
-	int ratex2 = clk->parent->rate * spmf;
-
-	clk->rate = ratex2 / 2;
-}
-
-static struct clk e300_clk = {
-	.name = "e300_clk",
-	.calc = e300_clk_calc,
-	.parent = &csb_clk,
-};
-
-static struct clk ips_clk = {
-	.name = "ips_clk",
-	.calc = generic_div_clk_calc,
-	.parent = &csb_clk,
-	.div_shift = 23,
-};
-
-/*
- * Clocks controlled by SCCR1 (.reg = 0)
- */
-static struct clk lpc_clk = {
-	.name = "lpc_clk",
-	.flags = CLK_HAS_CTRL,
-	.reg = 0,
-	.bit = 30,
-	.calc = generic_div_clk_calc,
-	.parent = &ips_clk,
-	.div_shift = 11,
-};
-
-static struct clk nfc_clk = {
-	.name = "nfc_clk",
-	.flags = CLK_HAS_CTRL,
-	.reg = 0,
-	.bit = 29,
-	.calc = generic_div_clk_calc,
-	.parent = &ips_clk,
-	.div_shift = 8,
-};
-
-static struct clk pata_clk = {
-	.name = "pata_clk",
-	.flags = CLK_HAS_CTRL,
-	.reg = 0,
-	.bit = 28,
-	.calc = unity_clk_calc,
-	.parent = &ips_clk,
-};
-
-/*
- * PSC clocks (bits 27 - 16)
- * are setup elsewhere
- */
-
-static struct clk sata_clk = {
-	.name = "sata_clk",
-	.flags = CLK_HAS_CTRL,
-	.reg = 0,
-	.bit = 14,
-	.calc = unity_clk_calc,
-	.parent = &ips_clk,
-};
-
-static struct clk fec_clk = {
-	.name = "fec_clk",
-	.flags = CLK_HAS_CTRL,
-	.reg = 0,
-	.bit = 13,
-	.calc = unity_clk_calc,
-	.parent = &ips_clk,
-};
-
-static struct clk pci_clk = {
-	.name = "pci_clk",
-	.flags = CLK_HAS_CTRL,
-	.reg = 0,
-	.bit = 11,
-	.calc = generic_div_clk_calc,
-	.parent = &csb_clk,
-	.div_shift = 20,
-};
-
-/*
- * Clocks controlled by SCCR2 (.reg = 1)
- */
-static struct clk diu_clk = {
-	.name = "diu_clk",
-	.flags = CLK_HAS_CTRL,
-	.reg = 1,
-	.bit = 31,
-	.calc = diu_clk_calc,
-};
-
-static struct clk viu_clk = {
-	.name = "viu_clk",
-	.flags = CLK_HAS_CTRL,
-	.reg = 1,
-	.bit = 18,
-	.calc = viu_clk_calc,
-};
-
-static struct clk axe_clk = {
-	.name = "axe_clk",
-	.flags = CLK_HAS_CTRL,
-	.reg = 1,
-	.bit = 30,
-	.calc = unity_clk_calc,
-	.parent = &csb_clk,
-};
-
-static struct clk usb1_clk = {
-	.name = "usb1_clk",
-	.flags = CLK_HAS_CTRL,
-	.reg = 1,
-	.bit = 28,
-	.calc = unity_clk_calc,
-	.parent = &csb_clk,
-};
-
-static struct clk usb2_clk = {
-	.name = "usb2_clk",
-	.flags = CLK_HAS_CTRL,
-	.reg = 1,
-	.bit = 27,
-	.calc = unity_clk_calc,
-	.parent = &csb_clk,
-};
-
-static struct clk i2c_clk = {
-	.name = "i2c_clk",
-	.flags = CLK_HAS_CTRL,
-	.reg = 1,
-	.bit = 26,
-	.calc = unity_clk_calc,
-	.parent = &ips_clk,
-};
-
-static struct clk mscan_clk = {
-	.name = "mscan_clk",
-	.flags = CLK_HAS_CTRL,
-	.reg = 1,
-	.bit = 25,
-	.calc = unity_clk_calc,
-	.parent = &ips_clk,
-};
-
-static struct clk sdhc_clk = {
-	.name = "sdhc_clk",
-	.flags = CLK_HAS_CTRL,
-	.reg = 1,
-	.bit = 24,
-	.calc = unity_clk_calc,
-	.parent = &ips_clk,
-};
-
-static struct clk mbx_bus_clk = {
-	.name = "mbx_bus_clk",
-	.flags = CLK_HAS_CTRL,
-	.reg = 1,
-	.bit = 22,
-	.calc = half_clk_calc,
-	.parent = &csb_clk,
-};
-
-static struct clk mbx_clk = {
-	.name = "mbx_clk",
-	.flags = CLK_HAS_CTRL,
-	.reg = 1,
-	.bit = 21,
-	.calc = unity_clk_calc,
-	.parent = &csb_clk,
-};
-
-static struct clk mbx_3d_clk = {
-	.name = "mbx_3d_clk",
-	.flags = CLK_HAS_CTRL,
-	.reg = 1,
-	.bit = 20,
-	.calc = generic_div_clk_calc,
-	.parent = &mbx_bus_clk,
-	.div_shift = 14,
-};
-
-static void psc_mclk_in_calc(struct clk *clk)
-{
-	clk->rate = devtree_getfreq("psc_mclk_in");
-	if (!clk->rate)
-		clk->rate = 25000000;
-}
-
-static struct clk psc_mclk_in = {
-	.name = "psc_mclk_in",
-	.calc = psc_mclk_in_calc,
-};
-
-static struct clk spdif_txclk = {
-	.name = "spdif_txclk",
-	.flags = CLK_HAS_CTRL,
-	.reg = 1,
-	.bit = 23,
-};
-
-static struct clk spdif_rxclk = {
-	.name = "spdif_rxclk",
-	.flags = CLK_HAS_CTRL,
-	.reg = 1,
-	.bit = 23,
-};
-
-static void ac97_clk_calc(struct clk *clk)
-{
-	/* ac97 bit clock is always 24.567 MHz */
-	clk->rate = 24567000;
-}
-
-static struct clk ac97_clk = {
-	.name = "ac97_clk_in",
-	.calc = ac97_clk_calc,
-};
-
-static struct clk *rate_clks[] = {
-	&ref_clk,
-	&sys_clk,
-	&diu_clk,
-	&viu_clk,
-	&csb_clk,
-	&e300_clk,
-	&ips_clk,
-	&fec_clk,
-	&sata_clk,
-	&pata_clk,
-	&nfc_clk,
-	&lpc_clk,
-	&mbx_bus_clk,
-	&mbx_clk,
-	&mbx_3d_clk,
-	&axe_clk,
-	&usb1_clk,
-	&usb2_clk,
-	&i2c_clk,
-	&mscan_clk,
-	&sdhc_clk,
-	&pci_clk,
-	&psc_mclk_in,
-	&spdif_txclk,
-	&spdif_rxclk,
-	&ac97_clk,
-	NULL
-};
-
-static void rate_clk_init(struct clk *clk)
-{
-	if (clk->calc) {
-		clk->calc(clk);
-		clk->flags |= CLK_HAS_RATE;
-		clk_register(clk);
-	} else {
-		printk(KERN_WARNING
-		       "Could not initialize clk %s without a calc routine\n",
-		       clk->name);
-	}
-}
-
-static void rate_clks_init(void)
-{
-	struct clk **cpp, *clk;
-
-	cpp = rate_clks;
-	while ((clk = *cpp++))
-		rate_clk_init(clk);
-}
-
-/*
- * There are two clk enable registers with 32 enable bits each
- * psc clocks and device clocks are all stored in dev_clks
- */
-static struct clk dev_clks[2][32];
-
-/*
- * Given a psc number return the dev_clk
- * associated with it
- */
-static struct clk *psc_dev_clk(int pscnum)
-{
-	int reg, bit;
-	struct clk *clk;
-
-	reg = 0;
-	bit = 27 - pscnum;
-
-	clk = &dev_clks[reg][bit];
-	clk->reg = 0;
-	clk->bit = bit;
-	return clk;
-}
-
-/*
- * PSC clock rate calculation
- */
-static void psc_calc_rate(struct clk *clk, int pscnum, struct device_node *np)
-{
-	unsigned long mclk_src = sys_clk.rate;
-	unsigned long mclk_div;
-
-	/*
-	 * Can only change value of mclk divider
-	 * when the divider is disabled.
-	 *
-	 * Zero is not a valid divider so minimum
-	 * divider is 1
-	 *
-	 * disable/set divider/enable
-	 */
-	out_be32(&clockctl->pccr[pscnum], 0);
-	out_be32(&clockctl->pccr[pscnum], 0x00020000);
-	out_be32(&clockctl->pccr[pscnum], 0x00030000);
-
-	if (in_be32(&clockctl->pccr[pscnum]) & 0x80) {
-		clk->rate = spdif_rxclk.rate;
-		return;
-	}
-
-	switch ((in_be32(&clockctl->pccr[pscnum]) >> 14) & 0x3) {
-	case 0:
-		mclk_src = sys_clk.rate;
-		break;
-	case 1:
-		mclk_src = ref_clk.rate;
-		break;
-	case 2:
-		mclk_src = psc_mclk_in.rate;
-		break;
-	case 3:
-		mclk_src = spdif_txclk.rate;
-		break;
-	}
-
-	mclk_div = ((in_be32(&clockctl->pccr[pscnum]) >> 17) & 0x7fff) + 1;
-	clk->rate = mclk_src / mclk_div;
-}
-
-/*
- * Find all psc nodes in device tree and assign a clock
- * with name "psc%d_mclk" and dev pointing at the device
- * returned from of_find_device_by_node
- */
-static void psc_clks_init(void)
-{
-	struct device_node *np;
-	struct platform_device *ofdev;
-	u32 reg;
-	const char *psc_compat;
-
-	psc_compat = mpc512x_select_psc_compat();
-	if (!psc_compat)
-		return;
-
-	for_each_compatible_node(np, NULL, psc_compat) {
-		if (!of_property_read_u32(np, "reg", &reg)) {
-			int pscnum = (reg & 0xf00) >> 8;
-			struct clk *clk = psc_dev_clk(pscnum);
-
-			clk->flags = CLK_HAS_RATE | CLK_HAS_CTRL;
-			ofdev = of_find_device_by_node(np);
-			clk->dev = &ofdev->dev;
-			/*
-			 * AC97 is special rate clock does
-			 * not go through normal path
-			 */
-			if (of_device_is_compatible(np, "fsl,mpc5121-psc-ac97"))
-				clk->rate = ac97_clk.rate;
-			else
-				psc_calc_rate(clk, pscnum, np);
-			sprintf(clk->name, "psc%d_mclk", pscnum);
-			clk_register(clk);
-			clk_enable(clk);
-		}
-	}
-}
-
-static struct clk_interface mpc5121_clk_functions = {
-	.clk_get		= mpc5121_clk_get,
-	.clk_enable		= mpc5121_clk_enable,
-	.clk_disable		= mpc5121_clk_disable,
-	.clk_get_rate		= mpc5121_clk_get_rate,
-	.clk_put		= mpc5121_clk_put,
-	.clk_round_rate		= mpc5121_clk_round_rate,
-	.clk_set_rate		= mpc5121_clk_set_rate,
-	.clk_set_parent		= NULL,
-	.clk_get_parent		= NULL,
-};
-
-int __init mpc5121_clk_init(void)
-{
-	struct device_node *np;
-
-	np = of_find_compatible_node(NULL, NULL, "fsl,mpc5121-clock");
-	if (np) {
-		clockctl = of_iomap(np, 0);
-		of_node_put(np);
-	}
-
-	if (!clockctl) {
-		printk(KERN_ERR "Could not map clock control registers\n");
-		return 0;
-	}
-
-	rate_clks_init();
-	psc_clks_init();
-
-	/* leave clockctl mapped forever */
-	/*iounmap(clockctl); */
-	DEBUG_CLK_DUMP();
-	clocks_initialized++;
-	clk_functions = mpc5121_clk_functions;
-	return 0;
-}
diff --git a/arch/powerpc/platforms/512x/mpc512x_shared.c b/arch/powerpc/platforms/512x/mpc512x_shared.c
index 36b5652..adb95f03 100644
--- a/arch/powerpc/platforms/512x/mpc512x_shared.c
+++ b/arch/powerpc/platforms/512x/mpc512x_shared.c
@@ -12,6 +12,7 @@
  * (at your option) any later version.
  */
 
+#include <linux/clk.h>
 #include <linux/kernel.h>
 #include <linux/io.h>
 #include <linux/irq.h>
@@ -68,98 +69,112 @@
 	bool		in_use;
 };
 
-#define DIU_DIV_MASK	0x000000ff
+/* receives a pixel clock spec in pico seconds, adjusts the DIU clock rate */
 static void mpc512x_set_pixel_clock(unsigned int pixclock)
 {
-	unsigned long bestval, bestfreq, speed, busfreq;
-	unsigned long minpixclock, maxpixclock, pixval;
-	struct mpc512x_ccm __iomem *ccm;
 	struct device_node *np;
-	u32 temp;
-	long err;
-	int i;
+	struct clk *clk_diu;
+	unsigned long epsilon, minpixclock, maxpixclock;
+	unsigned long offset, want, got, delta;
 
-	np = of_find_compatible_node(NULL, NULL, "fsl,mpc5121-clock");
+	/* lookup and enable the DIU clock */
+	np = of_find_compatible_node(NULL, NULL, "fsl,mpc5121-diu");
 	if (!np) {
-		pr_err("Can't find clock control module.\n");
+		pr_err("Could not find DIU device tree node.\n");
 		return;
 	}
-
-	ccm = of_iomap(np, 0);
+	clk_diu = of_clk_get(np, 0);
+	if (IS_ERR(clk_diu)) {
+		/* backwards compat with device trees that lack clock specs */
+		clk_diu = clk_get_sys(np->name, "ipg");
+	}
 	of_node_put(np);
-	if (!ccm) {
-		pr_err("Can't map clock control module reg.\n");
+	if (IS_ERR(clk_diu)) {
+		pr_err("Could not lookup DIU clock.\n");
+		return;
+	}
+	if (clk_prepare_enable(clk_diu)) {
+		pr_err("Could not enable DIU clock.\n");
 		return;
 	}
 
-	np = of_find_node_by_type(NULL, "cpu");
-	if (np) {
-		const unsigned int *prop =
-			of_get_property(np, "bus-frequency", NULL);
+	/*
+	 * convert the picoseconds spec into the desired clock rate,
+	 * determine the acceptable clock range for the monitor (+/- 5%),
+	 * do the calculation in steps to avoid integer overflow
+	 */
+	pr_debug("DIU pixclock in ps - %u\n", pixclock);
+	pixclock = (1000000000 / pixclock) * 1000;
+	pr_debug("DIU pixclock freq  - %u\n", pixclock);
+	epsilon = pixclock / 20; /* pixclock * 0.05 */
+	pr_debug("DIU deviation      - %lu\n", epsilon);
+	minpixclock = pixclock - epsilon;
+	maxpixclock = pixclock + epsilon;
+	pr_debug("DIU minpixclock    - %lu\n", minpixclock);
+	pr_debug("DIU maxpixclock    - %lu\n", maxpixclock);
 
-		of_node_put(np);
-		if (prop) {
-			busfreq = *prop;
-		} else {
-			pr_err("Can't get bus-frequency property\n");
-			return;
-		}
-	} else {
-		pr_err("Can't find 'cpu' node.\n");
+	/*
+	 * check whether the DIU supports the desired pixel clock
+	 *
+	 * - simply request the desired clock and see what the
+	 *   platform's clock driver will make of it, assuming that it
+	 *   will setup the best approximation of the requested value
+	 * - try other candidate frequencies in the order of decreasing
+	 *   preference (i.e. with increasing distance from the desired
+	 *   pixel clock, and checking the lower frequency before the
+	 *   higher frequency to not overload the hardware) until the
+	 *   first match is found -- any potential subsequent match
+	 *   would only be as good as the former match or typically
+	 *   would be less preferrable
+	 *
+	 * the offset increment of pixelclock divided by 64 is an
+	 * arbitrary choice -- it's simple to calculate, in the typical
+	 * case we expect the first check to succeed already, in the
+	 * worst case seven frequencies get tested (the exact center and
+	 * three more values each to the left and to the right) before
+	 * the 5% tolerance window is exceeded, resulting in fast enough
+	 * execution yet high enough probability of finding a suitable
+	 * value, while the error rate will be in the order of single
+	 * percents
+	 */
+	for (offset = 0; offset <= epsilon; offset += pixclock / 64) {
+		want = pixclock - offset;
+		pr_debug("DIU checking clock - %lu\n", want);
+		clk_set_rate(clk_diu, want);
+		got = clk_get_rate(clk_diu);
+		delta = abs(pixclock - got);
+		if (delta < epsilon)
+			break;
+		if (!offset)
+			continue;
+		want = pixclock + offset;
+		pr_debug("DIU checking clock - %lu\n", want);
+		clk_set_rate(clk_diu, want);
+		got = clk_get_rate(clk_diu);
+		delta = abs(pixclock - got);
+		if (delta < epsilon)
+			break;
+	}
+	if (offset <= epsilon) {
+		pr_debug("DIU clock accepted - %lu\n", want);
+		pr_debug("DIU pixclock want %u, got %lu, delta %lu, eps %lu\n",
+			 pixclock, got, delta, epsilon);
 		return;
 	}
+	pr_warn("DIU pixclock auto search unsuccessful\n");
 
-	/* Pixel Clock configuration */
-	pr_debug("DIU: Bus Frequency = %lu\n", busfreq);
-	speed = busfreq * 4; /* DIU_DIV ratio is 4 * CSB_CLK / DIU_CLK */
-
-	/* Calculate the pixel clock with the smallest error */
-	/* calculate the following in steps to avoid overflow */
-	pr_debug("DIU pixclock in ps - %d\n", pixclock);
-	temp = (1000000000 / pixclock) * 1000;
-	pixclock = temp;
-	pr_debug("DIU pixclock freq - %u\n", pixclock);
-
-	temp = temp / 20; /* pixclock * 0.05 */
-	pr_debug("deviation = %d\n", temp);
-	minpixclock = pixclock - temp;
-	maxpixclock = pixclock + temp;
-	pr_debug("DIU minpixclock - %lu\n", minpixclock);
-	pr_debug("DIU maxpixclock - %lu\n", maxpixclock);
-	pixval = speed/pixclock;
-	pr_debug("DIU pixval = %lu\n", pixval);
-
-	err = LONG_MAX;
-	bestval = pixval;
-	pr_debug("DIU bestval = %lu\n", bestval);
-
-	bestfreq = 0;
-	for (i = -1; i <= 1; i++) {
-		temp = speed / (pixval+i);
-		pr_debug("DIU test pixval i=%d, pixval=%lu, temp freq. = %u\n",
-			i, pixval, temp);
-		if ((temp < minpixclock) || (temp > maxpixclock))
-			pr_debug("DIU exceeds monitor range (%lu to %lu)\n",
-				minpixclock, maxpixclock);
-		else if (abs(temp - pixclock) < err) {
-			pr_debug("Entered the else if block %d\n", i);
-			err = abs(temp - pixclock);
-			bestval = pixval + i;
-			bestfreq = temp;
-		}
-	}
-
-	pr_debug("DIU chose = %lx\n", bestval);
-	pr_debug("DIU error = %ld\n NomPixClk ", err);
-	pr_debug("DIU: Best Freq = %lx\n", bestfreq);
-	/* Modify DIU_DIV in CCM SCFR1 */
-	temp = in_be32(&ccm->scfr1);
-	pr_debug("DIU: Current value of SCFR1: 0x%08x\n", temp);
-	temp &= ~DIU_DIV_MASK;
-	temp |= (bestval & DIU_DIV_MASK);
-	out_be32(&ccm->scfr1, temp);
-	pr_debug("DIU: Modified value of SCFR1: 0x%08x\n", temp);
-	iounmap(ccm);
+	/*
+	 * what is the most appropriate action to take when the search
+	 * for an available pixel clock which is acceptable to the
+	 * monitor has failed?  disable the DIU (clock) or just provide
+	 * a "best effort"?  we go with the latter
+	 */
+	pr_warn("DIU pixclock best effort fallback (backend's choice)\n");
+	clk_set_rate(clk_diu, pixclock);
+	got = clk_get_rate(clk_diu);
+	delta = abs(pixclock - got);
+	pr_debug("DIU pixclock want %u, got %lu, delta %lu, eps %lu\n",
+		 pixclock, got, delta, epsilon);
 }
 
 static enum fsl_diu_monitor_port
diff --git a/arch/powerpc/platforms/52xx/Kconfig b/arch/powerpc/platforms/52xx/Kconfig
index af54174..b625a2c 100644
--- a/arch/powerpc/platforms/52xx/Kconfig
+++ b/arch/powerpc/platforms/52xx/Kconfig
@@ -1,7 +1,7 @@
 config PPC_MPC52xx
 	bool "52xx-based boards"
 	depends on 6xx
-	select PPC_CLOCK
+	select COMMON_CLK
 	select PPC_PCI_CHOICE
 
 config PPC_MPC5200_SIMPLE
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index a932feb..21166f6 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -26,6 +26,7 @@
 #include <linux/of_fdt.h>
 #include <linux/interrupt.h>
 #include <linux/bug.h>
+#include <linux/cpuidle.h>
 
 #include <asm/machdep.h>
 #include <asm/firmware.h>
@@ -216,6 +217,16 @@
 	return 1;
 }
 
+void powernv_idle(void)
+{
+	/* Hook to cpuidle framework if available, else
+	 * call on default platform idle code
+	 */
+	if (cpuidle_idle_call()) {
+		power7_idle();
+	}
+}
+
 define_machine(powernv) {
 	.name			= "PowerNV",
 	.probe			= pnv_probe,
@@ -225,7 +236,7 @@
 	.show_cpuinfo		= pnv_show_cpuinfo,
 	.progress		= pnv_progress,
 	.machine_shutdown	= pnv_shutdown,
-	.power_save             = power7_idle,
+	.power_save             = powernv_idle,
 	.calibrate_decr		= generic_calibrate_decr,
 #ifdef CONFIG_KEXEC
 	.kexec_cpu_down		= pnv_kexec_cpu_down,
diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig
index e666432..37300f6 100644
--- a/arch/powerpc/platforms/pseries/Kconfig
+++ b/arch/powerpc/platforms/pseries/Kconfig
@@ -119,12 +119,3 @@
 	  which are accessible through a debugfs file.
 
 	  Say N if you are unsure.
-
-config PSERIES_IDLE
-	bool "Cpuidle driver for pSeries platforms"
-	depends on CPU_IDLE
-	depends on PPC_PSERIES
-	default y
-	help
-	  Select this option to enable processor idle state management
-	  through cpuidle subsystem.
diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile
index fbccac9..0348079 100644
--- a/arch/powerpc/platforms/pseries/Makefile
+++ b/arch/powerpc/platforms/pseries/Makefile
@@ -21,7 +21,6 @@
 obj-$(CONFIG_CMM)		+= cmm.o
 obj-$(CONFIG_DTL)		+= dtl.o
 obj-$(CONFIG_IO_EVENT_IRQ)	+= io_event_irq.o
-obj-$(CONFIG_PSERIES_IDLE)	+= processor_idle.o
 obj-$(CONFIG_LPARCFG)		+= lparcfg.o
 
 ifeq ($(CONFIG_PPC_PSERIES),y)
diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c
index 1c16141..47b6b9f 100644
--- a/arch/powerpc/sysdev/axonram.c
+++ b/arch/powerpc/sysdev/axonram.c
@@ -109,27 +109,28 @@
 	struct axon_ram_bank *bank = bio->bi_bdev->bd_disk->private_data;
 	unsigned long phys_mem, phys_end;
 	void *user_mem;
-	struct bio_vec *vec;
+	struct bio_vec vec;
 	unsigned int transfered;
-	unsigned short idx;
+	struct bvec_iter iter;
 
-	phys_mem = bank->io_addr + (bio->bi_sector << AXON_RAM_SECTOR_SHIFT);
+	phys_mem = bank->io_addr + (bio->bi_iter.bi_sector <<
+				    AXON_RAM_SECTOR_SHIFT);
 	phys_end = bank->io_addr + bank->size;
 	transfered = 0;
-	bio_for_each_segment(vec, bio, idx) {
-		if (unlikely(phys_mem + vec->bv_len > phys_end)) {
+	bio_for_each_segment(vec, bio, iter) {
+		if (unlikely(phys_mem + vec.bv_len > phys_end)) {
 			bio_io_error(bio);
 			return;
 		}
 
-		user_mem = page_address(vec->bv_page) + vec->bv_offset;
+		user_mem = page_address(vec.bv_page) + vec.bv_offset;
 		if (bio_data_dir(bio) == READ)
-			memcpy(user_mem, (void *) phys_mem, vec->bv_len);
+			memcpy(user_mem, (void *) phys_mem, vec.bv_len);
 		else
-			memcpy((void *) phys_mem, user_mem, vec->bv_len);
+			memcpy((void *) phys_mem, user_mem, vec.bv_len);
 
-		phys_mem += vec->bv_len;
-		transfered += vec->bv_len;
+		phys_mem += vec.bv_len;
+		transfered += vec.bv_len;
 	}
 	bio_endio(bio, 0);
 }
diff --git a/arch/powerpc/sysdev/dart_iommu.c b/arch/powerpc/sysdev/dart_iommu.c
index bd968a4..62c47bb 100644
--- a/arch/powerpc/sysdev/dart_iommu.c
+++ b/arch/powerpc/sysdev/dart_iommu.c
@@ -292,6 +292,7 @@
 	iommu_table_dart.it_offset = 0;
 	/* it_size is in number of entries */
 	iommu_table_dart.it_size = dart_tablesize / sizeof(u32);
+	iommu_table_dart.it_page_shift = IOMMU_PAGE_SHIFT_4K;
 
 	/* Initialize the common IOMMU code */
 	iommu_table_dart.it_base = (unsigned long)dart_vbase;
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 13b22e0..eeda43a 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -11,6 +11,28 @@
         KBUILD_DEFCONFIG := $(ARCH)_defconfig
 endif
 
+# How to compile the 16-bit code.  Note we always compile for -march=i386;
+# that way we can complain to the user if the CPU is insufficient.
+#
+# The -m16 option is supported by GCC >= 4.9 and clang >= 3.5. For
+# older versions of GCC, we need to play evil and unreliable tricks to
+# attempt to ensure that our asm(".code16gcc") is first in the asm
+# output.
+CODE16GCC_CFLAGS := -m32 -include $(srctree)/arch/x86/boot/code16gcc.h \
+		    $(call cc-option, -fno-toplevel-reorder,\
+		      $(call cc-option, -fno-unit-at-a-time))
+M16_CFLAGS	 := $(call cc-option, -m16, $(CODE16GCC_CFLAGS))
+
+REALMODE_CFLAGS	:= $(M16_CFLAGS) -g -Os -D__KERNEL__ \
+		   -DDISABLE_BRANCH_PROFILING \
+		   -Wall -Wstrict-prototypes -march=i386 -mregparm=3 \
+		   -fno-strict-aliasing -fomit-frame-pointer -fno-pic \
+		   -mno-mmx -mno-sse \
+		   $(call cc-option, -ffreestanding) \
+		   $(call cc-option, -fno-stack-protector) \
+		   $(call cc-option, -mpreferred-stack-boundary=2)
+export REALMODE_CFLAGS
+
 # BITS is used as extension for files which are available in a 32 bit
 # and a 64 bit version to simplify shared Makefiles.
 # e.g.: obj-y += foo_$(BITS).o
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index de70669..878df7e 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -51,20 +51,7 @@
 
 # ---------------------------------------------------------------------------
 
-# How to compile the 16-bit code.  Note we always compile for -march=i386,
-# that way we can complain to the user if the CPU is insufficient.
-KBUILD_CFLAGS	:= $(USERINCLUDE) -m32 -g -Os -D_SETUP -D__KERNEL__ \
-		   -DDISABLE_BRANCH_PROFILING \
-		   -Wall -Wstrict-prototypes \
-		   -march=i386 -mregparm=3 \
-		   -include $(srctree)/$(src)/code16gcc.h \
-		   -fno-strict-aliasing -fomit-frame-pointer -fno-pic \
-		   -mno-mmx -mno-sse \
-		   $(call cc-option, -ffreestanding) \
-		   $(call cc-option, -fno-toplevel-reorder,\
-		   $(call cc-option, -fno-unit-at-a-time)) \
-		   $(call cc-option, -fno-stack-protector) \
-		   $(call cc-option, -mpreferred-stack-boundary=2)
+KBUILD_CFLAGS	:= $(USERINCLUDE) $(REALMODE_CFLAGS) -D_SETUP
 KBUILD_AFLAGS	:= $(KBUILD_CFLAGS) -D__ASSEMBLY__
 GCOV_PROFILE := n
 
diff --git a/arch/x86/boot/cpuflags.c b/arch/x86/boot/cpuflags.c
index a9fcb7c..431fa5f 100644
--- a/arch/x86/boot/cpuflags.c
+++ b/arch/x86/boot/cpuflags.c
@@ -28,20 +28,35 @@
 	return fsw == 0 && (fcw & 0x103f) == 0x003f;
 }
 
+/*
+ * For building the 16-bit code we want to explicitly specify 32-bit
+ * push/pop operations, rather than just saying 'pushf' or 'popf' and
+ * letting the compiler choose. But this is also included from the
+ * compressed/ directory where it may be 64-bit code, and thus needs
+ * to be 'pushfq' or 'popfq' in that case.
+ */
+#ifdef __x86_64__
+#define PUSHF "pushfq"
+#define POPF "popfq"
+#else
+#define PUSHF "pushfl"
+#define POPF "popfl"
+#endif
+
 int has_eflag(unsigned long mask)
 {
 	unsigned long f0, f1;
 
-	asm volatile("pushf	\n\t"
-		     "pushf	\n\t"
+	asm volatile(PUSHF "	\n\t"
+		     PUSHF "	\n\t"
 		     "pop %0	\n\t"
 		     "mov %0,%1	\n\t"
 		     "xor %2,%1	\n\t"
 		     "push %1	\n\t"
-		     "popf	\n\t"
-		     "pushf	\n\t"
+		     POPF "	\n\t"
+		     PUSHF "	\n\t"
 		     "pop %1	\n\t"
-		     "popf"
+		     POPF
 		     : "=&r" (f0), "=&r" (f1)
 		     : "ri" (mask));
 
diff --git a/arch/x86/boot/video.h b/arch/x86/boot/video.h
index ff339c5..0bb2549 100644
--- a/arch/x86/boot/video.h
+++ b/arch/x86/boot/video.h
@@ -80,7 +80,7 @@
 	u16 xmode_n;		/* Size of unprobed mode range */
 };
 
-#define __videocard struct card_info __attribute__((section(".videocards")))
+#define __videocard struct card_info __attribute__((used,section(".videocards")))
 extern struct card_info video_cards[], video_cards_end[];
 
 int mode_defined(u16 mode);	/* video.c */
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 401f350..cd6e161 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -781,9 +781,9 @@
  */
 #define PV_CALLEE_SAVE_REGS_THUNK(func)					\
 	extern typeof(func) __raw_callee_save_##func;			\
-	static void *__##func##__ __used = func;			\
 									\
 	asm(".pushsection .text;"					\
+	    ".globl __raw_callee_save_" #func " ; "			\
 	    "__raw_callee_save_" #func ": "				\
 	    PV_SAVE_ALL_CALLER_REGS					\
 	    "call " #func ";"						\
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index aab8f67..7549b8b 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -388,10 +388,11 @@
 	_paravirt_alt(insn_string, "%c[paravirt_typenum]", "%c[paravirt_clobber]")
 
 /* Simple instruction patching code. */
-#define DEF_NATIVE(ops, name, code) 					\
-	extern const char start_##ops##_##name[] __visible,		\
-			  end_##ops##_##name[] __visible;		\
-	asm("start_" #ops "_" #name ": " code "; end_" #ops "_" #name ":")
+#define NATIVE_LABEL(a,x,b) "\n\t.globl " a #x "_" #b "\n" a #x "_" #b ":\n\t"
+
+#define DEF_NATIVE(ops, name, code)					\
+	__visible extern const char start_##ops##_##name[], end_##ops##_##name[];	\
+	asm(NATIVE_LABEL("start_", ops, name) code NATIVE_LABEL("end_", ops, name))
 
 unsigned paravirt_patch_nop(void);
 unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len);
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index a83aa44..1aa9ccd 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -121,7 +121,8 @@
 
 /* Set of bits not changed in pte_modify */
 #define _PAGE_CHG_MASK	(PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT |		\
-			 _PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY)
+			 _PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY |	\
+			 _PAGE_SOFT_DIRTY)
 #define _HPAGE_CHG_MASK (_PAGE_CHG_MASK | _PAGE_PSE)
 
 #define _PAGE_CACHE_MASK	(_PAGE_PCD | _PAGE_PWT)
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 3ba3de4..e1940c0 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -163,9 +163,11 @@
  */
 #ifndef __ASSEMBLY__
 
-
-/* how to get the current stack pointer from C */
-register unsigned long current_stack_pointer asm("esp") __used;
+#define current_stack_pointer ({		\
+	unsigned long sp;			\
+	asm("mov %%esp,%0" : "=g" (sp));	\
+	sp;					\
+})
 
 /* how to get the thread information struct from C */
 static inline struct thread_info *current_thread_info(void)
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 6dd802c..cd1b362 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -673,7 +673,7 @@
 /* Track spinlock on which a cpu is waiting */
 static DEFINE_PER_CPU(struct kvm_lock_waiting, klock_waiting);
 
-static void kvm_lock_spinning(struct arch_spinlock *lock, __ticket_t want)
+__visible void kvm_lock_spinning(struct arch_spinlock *lock, __ticket_t want)
 {
 	struct kvm_lock_waiting *w;
 	int cpu;
diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c
index 992f890..f6584a9 100644
--- a/arch/x86/kernel/vsmp_64.c
+++ b/arch/x86/kernel/vsmp_64.c
@@ -33,7 +33,7 @@
  * and vice versa.
  */
 
-static unsigned long vsmp_save_fl(void)
+asmlinkage unsigned long vsmp_save_fl(void)
 {
 	unsigned long flags = native_save_fl();
 
@@ -43,7 +43,7 @@
 }
 PV_CALLEE_SAVE_REGS_THUNK(vsmp_save_fl);
 
-static void vsmp_restore_fl(unsigned long flags)
+__visible void vsmp_restore_fl(unsigned long flags)
 {
 	if (flags & X86_EFLAGS_IF)
 		flags &= ~X86_EFLAGS_AC;
@@ -53,7 +53,7 @@
 }
 PV_CALLEE_SAVE_REGS_THUNK(vsmp_restore_fl);
 
-static void vsmp_irq_disable(void)
+asmlinkage void vsmp_irq_disable(void)
 {
 	unsigned long flags = native_save_fl();
 
@@ -61,7 +61,7 @@
 }
 PV_CALLEE_SAVE_REGS_THUNK(vsmp_irq_disable);
 
-static void vsmp_irq_enable(void)
+asmlinkage void vsmp_irq_enable(void)
 {
 	unsigned long flags = native_save_fl();
 
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index bdf8532..ad1fb5f 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -233,13 +233,13 @@
  * flags word contains all kind of stuff, but in practice Linux only cares
  * about the interrupt flag.  Our "save_flags()" just returns that.
  */
-static unsigned long save_fl(void)
+asmlinkage unsigned long lguest_save_fl(void)
 {
 	return lguest_data.irq_enabled;
 }
 
 /* Interrupts go off... */
-static void irq_disable(void)
+asmlinkage void lguest_irq_disable(void)
 {
 	lguest_data.irq_enabled = 0;
 }
@@ -253,8 +253,8 @@
  * PV_CALLEE_SAVE_REGS_THUNK(), which pushes %eax onto the stack, calls the
  * C function, then restores it.
  */
-PV_CALLEE_SAVE_REGS_THUNK(save_fl);
-PV_CALLEE_SAVE_REGS_THUNK(irq_disable);
+PV_CALLEE_SAVE_REGS_THUNK(lguest_save_fl);
+PV_CALLEE_SAVE_REGS_THUNK(lguest_irq_disable);
 /*:*/
 
 /* These are in i386_head.S */
@@ -1291,9 +1291,9 @@
 	 */
 
 	/* Interrupt-related operations */
-	pv_irq_ops.save_fl = PV_CALLEE_SAVE(save_fl);
+	pv_irq_ops.save_fl = PV_CALLEE_SAVE(lguest_save_fl);
 	pv_irq_ops.restore_fl = __PV_IS_CALLEE_SAVE(lg_restore_fl);
-	pv_irq_ops.irq_disable = PV_CALLEE_SAVE(irq_disable);
+	pv_irq_ops.irq_disable = PV_CALLEE_SAVE(lguest_irq_disable);
 	pv_irq_ops.irq_enable = __PV_IS_CALLEE_SAVE(lg_irq_enable);
 	pv_irq_ops.safe_halt = lguest_safe_halt;
 
diff --git a/arch/x86/math-emu/errors.c b/arch/x86/math-emu/errors.c
index 59d353d..a544908 100644
--- a/arch/x86/math-emu/errors.c
+++ b/arch/x86/math-emu/errors.c
@@ -330,11 +330,6 @@
 
 	RE_ENTRANT_CHECK_OFF;
 	if ((~control_word & n & CW_Exceptions) || (n == EX_INTERNAL)) {
-#ifdef PRINT_MESSAGES
-		/* My message from the sponsor */
-		printk(FPU_VERSION " " __DATE__ " (C) W. Metzenthen.\n");
-#endif /* PRINT_MESSAGES */
-
 		/* Get a name string for error reporting */
 		for (i = 0; exception_names[i].type; i++)
 			if ((exception_names[i].type & n) ==
diff --git a/arch/x86/realmode/rm/Makefile b/arch/x86/realmode/rm/Makefile
index 9cac825..3497f14 100644
--- a/arch/x86/realmode/rm/Makefile
+++ b/arch/x86/realmode/rm/Makefile
@@ -64,20 +64,7 @@
 
 # ---------------------------------------------------------------------------
 
-# How to compile the 16-bit code.  Note we always compile for -march=i386,
-# that way we can complain to the user if the CPU is insufficient.
-KBUILD_CFLAGS	:= $(LINUXINCLUDE) -m32 -g -Os -D_SETUP -D__KERNEL__ -D_WAKEUP \
-		   -I$(srctree)/arch/x86/boot \
-		   -DDISABLE_BRANCH_PROFILING \
-		   -Wall -Wstrict-prototypes \
-		   -march=i386 -mregparm=3 \
-		   -include $(srctree)/$(src)/../../boot/code16gcc.h \
-		   -fno-strict-aliasing -fomit-frame-pointer -fno-pic \
-		   -mno-mmx -mno-sse \
-		   $(call cc-option, -ffreestanding) \
-		   $(call cc-option, -fno-toplevel-reorder,\
-		   $(call cc-option, -fno-unit-at-a-time)) \
-		   $(call cc-option, -fno-stack-protector) \
-		   $(call cc-option, -mpreferred-stack-boundary=2)
+KBUILD_CFLAGS	:= $(LINUXINCLUDE) $(REALMODE_CFLAGS) -D_SETUP -D_WAKEUP \
+		   -I$(srctree)/arch/x86/boot
 KBUILD_AFLAGS	:= $(KBUILD_CFLAGS) -D__ASSEMBLY__
 GCOV_PROFILE := n
diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c
index 11f9285..cfbdbdb 100644
--- a/arch/x86/tools/relocs.c
+++ b/arch/x86/tools/relocs.c
@@ -1025,6 +1025,29 @@
 	}
 }
 
+/*
+ * As an aid to debugging problems with different linkers
+ * print summary information about the relocs.
+ * Since different linkers tend to emit the sections in
+ * different orders we use the section names in the output.
+ */
+static int do_reloc_info(struct section *sec, Elf_Rel *rel, ElfW(Sym) *sym,
+				const char *symname)
+{
+	printf("%s\t%s\t%s\t%s\n",
+		sec_name(sec->shdr.sh_info),
+		rel_type(ELF_R_TYPE(rel->r_info)),
+		symname,
+		sec_name(sym->st_shndx));
+	return 0;
+}
+
+static void print_reloc_info(void)
+{
+	printf("reloc section\treloc type\tsymbol\tsymbol section\n");
+	walk_relocs(do_reloc_info);
+}
+
 #if ELF_BITS == 64
 # define process process_64
 #else
@@ -1032,7 +1055,8 @@
 #endif
 
 void process(FILE *fp, int use_real_mode, int as_text,
-	     int show_absolute_syms, int show_absolute_relocs)
+	     int show_absolute_syms, int show_absolute_relocs,
+	     int show_reloc_info)
 {
 	regex_init(use_real_mode);
 	read_ehdr(fp);
@@ -1050,5 +1074,9 @@
 		print_absolute_relocs();
 		return;
 	}
+	if (show_reloc_info) {
+		print_reloc_info();
+		return;
+	}
 	emit_relocs(as_text, use_real_mode);
 }
diff --git a/arch/x86/tools/relocs.h b/arch/x86/tools/relocs.h
index 07cdb1e..f595906 100644
--- a/arch/x86/tools/relocs.h
+++ b/arch/x86/tools/relocs.h
@@ -29,8 +29,9 @@
 };
 
 void process_32(FILE *fp, int use_real_mode, int as_text,
-		int show_absolute_syms, int show_absolute_relocs);
+		int show_absolute_syms, int show_absolute_relocs,
+		int show_reloc_info);
 void process_64(FILE *fp, int use_real_mode, int as_text,
-		int show_absolute_syms, int show_absolute_relocs);
-
+		int show_absolute_syms, int show_absolute_relocs,
+		int show_reloc_info);
 #endif /* RELOCS_H */
diff --git a/arch/x86/tools/relocs_common.c b/arch/x86/tools/relocs_common.c
index 44d3968..acab636b 100644
--- a/arch/x86/tools/relocs_common.c
+++ b/arch/x86/tools/relocs_common.c
@@ -11,12 +11,13 @@
 
 static void usage(void)
 {
-	die("relocs [--abs-syms|--abs-relocs|--text|--realmode] vmlinux\n");
+	die("relocs [--abs-syms|--abs-relocs|--reloc-info|--text|--realmode]" \
+	    " vmlinux\n");
 }
 
 int main(int argc, char **argv)
 {
-	int show_absolute_syms, show_absolute_relocs;
+	int show_absolute_syms, show_absolute_relocs, show_reloc_info;
 	int as_text, use_real_mode;
 	const char *fname;
 	FILE *fp;
@@ -25,6 +26,7 @@
 
 	show_absolute_syms = 0;
 	show_absolute_relocs = 0;
+	show_reloc_info = 0;
 	as_text = 0;
 	use_real_mode = 0;
 	fname = NULL;
@@ -39,6 +41,10 @@
 				show_absolute_relocs = 1;
 				continue;
 			}
+			if (strcmp(arg, "--reloc-info") == 0) {
+				show_reloc_info = 1;
+				continue;
+			}
 			if (strcmp(arg, "--text") == 0) {
 				as_text = 1;
 				continue;
@@ -67,10 +73,12 @@
 	rewind(fp);
 	if (e_ident[EI_CLASS] == ELFCLASS64)
 		process_64(fp, use_real_mode, as_text,
-			   show_absolute_syms, show_absolute_relocs);
+			   show_absolute_syms, show_absolute_relocs,
+			   show_reloc_info);
 	else
 		process_32(fp, use_real_mode, as_text,
-			   show_absolute_syms, show_absolute_relocs);
+			   show_absolute_syms, show_absolute_relocs,
+			   show_reloc_info);
 	fclose(fp);
 	return 0;
 }
diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c
index 76ca326..08f763d 100644
--- a/arch/x86/xen/irq.c
+++ b/arch/x86/xen/irq.c
@@ -23,7 +23,7 @@
 	(void)HYPERVISOR_xen_version(0, NULL);
 }
 
-static unsigned long xen_save_fl(void)
+asmlinkage unsigned long xen_save_fl(void)
 {
 	struct vcpu_info *vcpu;
 	unsigned long flags;
@@ -41,7 +41,7 @@
 }
 PV_CALLEE_SAVE_REGS_THUNK(xen_save_fl);
 
-static void xen_restore_fl(unsigned long flags)
+__visible void xen_restore_fl(unsigned long flags)
 {
 	struct vcpu_info *vcpu;
 
@@ -63,7 +63,7 @@
 }
 PV_CALLEE_SAVE_REGS_THUNK(xen_restore_fl);
 
-static void xen_irq_disable(void)
+asmlinkage void xen_irq_disable(void)
 {
 	/* There's a one instruction preempt window here.  We need to
 	   make sure we're don't switch CPUs between getting the vcpu
@@ -74,7 +74,7 @@
 }
 PV_CALLEE_SAVE_REGS_THUNK(xen_irq_disable);
 
-static void xen_irq_enable(void)
+asmlinkage void xen_irq_enable(void)
 {
 	struct vcpu_info *vcpu;
 
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index c1d406f..2423ef0 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -431,7 +431,7 @@
 	return val;
 }
 
-static pteval_t xen_pte_val(pte_t pte)
+__visible pteval_t xen_pte_val(pte_t pte)
 {
 	pteval_t pteval = pte.pte;
 #if 0
@@ -448,7 +448,7 @@
 }
 PV_CALLEE_SAVE_REGS_THUNK(xen_pte_val);
 
-static pgdval_t xen_pgd_val(pgd_t pgd)
+__visible pgdval_t xen_pgd_val(pgd_t pgd)
 {
 	return pte_mfn_to_pfn(pgd.pgd);
 }
@@ -479,7 +479,7 @@
 	WARN_ON(pat != 0x0007010600070106ull);
 }
 
-static pte_t xen_make_pte(pteval_t pte)
+__visible pte_t xen_make_pte(pteval_t pte)
 {
 	phys_addr_t addr = (pte & PTE_PFN_MASK);
 #if 0
@@ -514,14 +514,14 @@
 }
 PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte);
 
-static pgd_t xen_make_pgd(pgdval_t pgd)
+__visible pgd_t xen_make_pgd(pgdval_t pgd)
 {
 	pgd = pte_pfn_to_mfn(pgd);
 	return native_make_pgd(pgd);
 }
 PV_CALLEE_SAVE_REGS_THUNK(xen_make_pgd);
 
-static pmdval_t xen_pmd_val(pmd_t pmd)
+__visible pmdval_t xen_pmd_val(pmd_t pmd)
 {
 	return pte_mfn_to_pfn(pmd.pmd);
 }
@@ -580,7 +580,7 @@
 }
 #endif	/* CONFIG_X86_PAE */
 
-static pmd_t xen_make_pmd(pmdval_t pmd)
+__visible pmd_t xen_make_pmd(pmdval_t pmd)
 {
 	pmd = pte_pfn_to_mfn(pmd);
 	return native_make_pmd(pmd);
@@ -588,13 +588,13 @@
 PV_CALLEE_SAVE_REGS_THUNK(xen_make_pmd);
 
 #if PAGETABLE_LEVELS == 4
-static pudval_t xen_pud_val(pud_t pud)
+__visible pudval_t xen_pud_val(pud_t pud)
 {
 	return pte_mfn_to_pfn(pud.pud);
 }
 PV_CALLEE_SAVE_REGS_THUNK(xen_pud_val);
 
-static pud_t xen_make_pud(pudval_t pud)
+__visible pud_t xen_make_pud(pudval_t pud)
 {
 	pud = pte_pfn_to_mfn(pud);
 
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index dd5f905..0982233 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -35,7 +35,7 @@
 extern const char xen_hypervisor_callback[];
 extern const char xen_failsafe_callback[];
 #ifdef CONFIG_X86_64
-extern const char nmi[];
+extern asmlinkage void nmi(void);
 #endif
 extern void xen_sysenter_target(void);
 extern void xen_syscall_target(void);
@@ -577,7 +577,7 @@
 void xen_enable_nmi(void)
 {
 #ifdef CONFIG_X86_64
-	if (register_callback(CALLBACKTYPE_nmi, nmi))
+	if (register_callback(CALLBACKTYPE_nmi, (char *)nmi))
 		BUG();
 #endif
 }
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index 0e36cde..581521c 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -106,7 +106,7 @@
 static cpumask_t waiting_cpus;
 
 static bool xen_pvspin = true;
-static void xen_lock_spinning(struct arch_spinlock *lock, __ticket_t want)
+__visible void xen_lock_spinning(struct arch_spinlock *lock, __ticket_t want)
 {
 	int irq = __this_cpu_read(lock_kicker_irq);
 	struct xen_lock_waiting *w = &__get_cpu_var(lock_waiting);
diff --git a/arch/xtensa/platforms/iss/simdisk.c b/arch/xtensa/platforms/iss/simdisk.c
index 8c6e819..48eebac 100644
--- a/arch/xtensa/platforms/iss/simdisk.c
+++ b/arch/xtensa/platforms/iss/simdisk.c
@@ -103,18 +103,18 @@
 
 static int simdisk_xfer_bio(struct simdisk *dev, struct bio *bio)
 {
-	int i;
-	struct bio_vec *bvec;
-	sector_t sector = bio->bi_sector;
+	struct bio_vec bvec;
+	struct bvec_iter iter;
+	sector_t sector = bio->bi_iter.bi_sector;
 
-	bio_for_each_segment(bvec, bio, i) {
-		char *buffer = __bio_kmap_atomic(bio, i);
-		unsigned len = bvec->bv_len >> SECTOR_SHIFT;
+	bio_for_each_segment(bvec, bio, iter) {
+		char *buffer = __bio_kmap_atomic(bio, iter);
+		unsigned len = bvec.bv_len >> SECTOR_SHIFT;
 
 		simdisk_transfer(dev, sector, len, buffer,
 				bio_data_dir(bio) == WRITE);
 		sector += len;
-		__bio_kunmap_atomic(bio);
+		__bio_kunmap_atomic(buffer);
 	}
 	return 0;
 }
diff --git a/block/blk-core.c b/block/blk-core.c
index 8bdd012..c00e0bd 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -38,6 +38,7 @@
 
 #include "blk.h"
 #include "blk-cgroup.h"
+#include "blk-mq.h"
 
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);
@@ -130,7 +131,7 @@
 	bio_advance(bio, nbytes);
 
 	/* don't actually finish bio if it's part of flush sequence */
-	if (bio->bi_size == 0 && !(rq->cmd_flags & REQ_FLUSH_SEQ))
+	if (bio->bi_iter.bi_size == 0 && !(rq->cmd_flags & REQ_FLUSH_SEQ))
 		bio_endio(bio, error);
 }
 
@@ -245,7 +246,16 @@
 void blk_sync_queue(struct request_queue *q)
 {
 	del_timer_sync(&q->timeout);
-	cancel_delayed_work_sync(&q->delay_work);
+
+	if (q->mq_ops) {
+		struct blk_mq_hw_ctx *hctx;
+		int i;
+
+		queue_for_each_hw_ctx(q, hctx, i)
+			cancel_delayed_work_sync(&hctx->delayed_work);
+	} else {
+		cancel_delayed_work_sync(&q->delay_work);
+	}
 }
 EXPORT_SYMBOL(blk_sync_queue);
 
@@ -497,8 +507,13 @@
 	 * Drain all requests queued before DYING marking. Set DEAD flag to
 	 * prevent that q->request_fn() gets invoked after draining finished.
 	 */
-	spin_lock_irq(lock);
-	__blk_drain_queue(q, true);
+	if (q->mq_ops) {
+		blk_mq_drain_queue(q);
+		spin_lock_irq(lock);
+	} else {
+		spin_lock_irq(lock);
+		__blk_drain_queue(q, true);
+	}
 	queue_flag_set(QUEUE_FLAG_DEAD, q);
 	spin_unlock_irq(lock);
 
@@ -1326,7 +1341,7 @@
 	bio->bi_io_vec->bv_offset = 0;
 	bio->bi_io_vec->bv_len = len;
 
-	bio->bi_size = len;
+	bio->bi_iter.bi_size = len;
 	bio->bi_vcnt = 1;
 	bio->bi_phys_segments = 1;
 
@@ -1351,7 +1366,7 @@
 
 	req->biotail->bi_next = bio;
 	req->biotail = bio;
-	req->__data_len += bio->bi_size;
+	req->__data_len += bio->bi_iter.bi_size;
 	req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));
 
 	blk_account_io_start(req, false);
@@ -1380,8 +1395,8 @@
 	 * not touch req->buffer either...
 	 */
 	req->buffer = bio_data(bio);
-	req->__sector = bio->bi_sector;
-	req->__data_len += bio->bi_size;
+	req->__sector = bio->bi_iter.bi_sector;
+	req->__data_len += bio->bi_iter.bi_size;
 	req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));
 
 	blk_account_io_start(req, false);
@@ -1459,7 +1474,7 @@
 		req->cmd_flags |= REQ_FAILFAST_MASK;
 
 	req->errors = 0;
-	req->__sector = bio->bi_sector;
+	req->__sector = bio->bi_iter.bi_sector;
 	req->ioprio = bio_prio(bio);
 	blk_rq_bio_prep(req->q, req, bio);
 }
@@ -1583,12 +1598,12 @@
 	if (bio_sectors(bio) && bdev != bdev->bd_contains) {
 		struct hd_struct *p = bdev->bd_part;
 
-		bio->bi_sector += p->start_sect;
+		bio->bi_iter.bi_sector += p->start_sect;
 		bio->bi_bdev = bdev->bd_contains;
 
 		trace_block_bio_remap(bdev_get_queue(bio->bi_bdev), bio,
 				      bdev->bd_dev,
-				      bio->bi_sector - p->start_sect);
+				      bio->bi_iter.bi_sector - p->start_sect);
 	}
 }
 
@@ -1654,7 +1669,7 @@
 	/* Test device or partition size, when known. */
 	maxsector = i_size_read(bio->bi_bdev->bd_inode) >> 9;
 	if (maxsector) {
-		sector_t sector = bio->bi_sector;
+		sector_t sector = bio->bi_iter.bi_sector;
 
 		if (maxsector < nr_sectors || maxsector - nr_sectors < sector) {
 			/*
@@ -1690,7 +1705,7 @@
 		       "generic_make_request: Trying to access "
 			"nonexistent block-device %s (%Lu)\n",
 			bdevname(bio->bi_bdev, b),
-			(long long) bio->bi_sector);
+			(long long) bio->bi_iter.bi_sector);
 		goto end_io;
 	}
 
@@ -1704,9 +1719,9 @@
 	}
 
 	part = bio->bi_bdev->bd_part;
-	if (should_fail_request(part, bio->bi_size) ||
+	if (should_fail_request(part, bio->bi_iter.bi_size) ||
 	    should_fail_request(&part_to_disk(part)->part0,
-				bio->bi_size))
+				bio->bi_iter.bi_size))
 		goto end_io;
 
 	/*
@@ -1865,7 +1880,7 @@
 		if (rw & WRITE) {
 			count_vm_events(PGPGOUT, count);
 		} else {
-			task_io_account_read(bio->bi_size);
+			task_io_account_read(bio->bi_iter.bi_size);
 			count_vm_events(PGPGIN, count);
 		}
 
@@ -1874,7 +1889,7 @@
 			printk(KERN_DEBUG "%s(%d): %s block %Lu on %s (%u sectors)\n",
 			current->comm, task_pid_nr(current),
 				(rw & WRITE) ? "WRITE" : "READ",
-				(unsigned long long)bio->bi_sector,
+				(unsigned long long)bio->bi_iter.bi_sector,
 				bdevname(bio->bi_bdev, b),
 				count);
 		}
@@ -2007,7 +2022,7 @@
 	for (bio = rq->bio; bio; bio = bio->bi_next) {
 		if ((bio->bi_rw & ff) != ff)
 			break;
-		bytes += bio->bi_size;
+		bytes += bio->bi_iter.bi_size;
 	}
 
 	/* this could lead to infinite loop */
@@ -2378,9 +2393,9 @@
 	total_bytes = 0;
 	while (req->bio) {
 		struct bio *bio = req->bio;
-		unsigned bio_bytes = min(bio->bi_size, nr_bytes);
+		unsigned bio_bytes = min(bio->bi_iter.bi_size, nr_bytes);
 
-		if (bio_bytes == bio->bi_size)
+		if (bio_bytes == bio->bi_iter.bi_size)
 			req->bio = bio->bi_next;
 
 		req_bio_endio(req, bio, bio_bytes, error);
@@ -2728,7 +2743,7 @@
 		rq->nr_phys_segments = bio_phys_segments(q, bio);
 		rq->buffer = bio_data(bio);
 	}
-	rq->__data_len = bio->bi_size;
+	rq->__data_len = bio->bi_iter.bi_size;
 	rq->bio = rq->biotail = bio;
 
 	if (bio->bi_bdev)
@@ -2746,10 +2761,10 @@
 void rq_flush_dcache_pages(struct request *rq)
 {
 	struct req_iterator iter;
-	struct bio_vec *bvec;
+	struct bio_vec bvec;
 
 	rq_for_each_segment(bvec, rq, iter)
-		flush_dcache_page(bvec->bv_page);
+		flush_dcache_page(bvec.bv_page);
 }
 EXPORT_SYMBOL_GPL(rq_flush_dcache_pages);
 #endif
diff --git a/block/blk-exec.c b/block/blk-exec.c
index c3edf9d..bbfc072 100644
--- a/block/blk-exec.c
+++ b/block/blk-exec.c
@@ -60,6 +60,10 @@
 	rq->rq_disk = bd_disk;
 	rq->end_io = done;
 
+	/*
+	 * don't check dying flag for MQ because the request won't
+	 * be resued after dying flag is set
+	 */
 	if (q->mq_ops) {
 		blk_mq_insert_request(q, rq, true);
 		return;
diff --git a/block/blk-flush.c b/block/blk-flush.c
index fb6f3c0..9288aaf 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -548,7 +548,7 @@
 	 * copied from blk_rq_pos(rq).
 	 */
 	if (error_sector)
-		*error_sector = bio->bi_sector;
+		*error_sector = bio->bi_iter.bi_sector;
 
 	bio_put(bio);
 	return ret;
diff --git a/block/blk-integrity.c b/block/blk-integrity.c
index 03cf717..7fbab84 100644
--- a/block/blk-integrity.c
+++ b/block/blk-integrity.c
@@ -43,30 +43,32 @@
  */
 int blk_rq_count_integrity_sg(struct request_queue *q, struct bio *bio)
 {
-	struct bio_vec *iv, *ivprv = NULL;
+	struct bio_vec iv, ivprv = { NULL };
 	unsigned int segments = 0;
 	unsigned int seg_size = 0;
-	unsigned int i = 0;
+	struct bvec_iter iter;
+	int prev = 0;
 
-	bio_for_each_integrity_vec(iv, bio, i) {
+	bio_for_each_integrity_vec(iv, bio, iter) {
 
-		if (ivprv) {
-			if (!BIOVEC_PHYS_MERGEABLE(ivprv, iv))
+		if (prev) {
+			if (!BIOVEC_PHYS_MERGEABLE(&ivprv, &iv))
 				goto new_segment;
 
-			if (!BIOVEC_SEG_BOUNDARY(q, ivprv, iv))
+			if (!BIOVEC_SEG_BOUNDARY(q, &ivprv, &iv))
 				goto new_segment;
 
-			if (seg_size + iv->bv_len > queue_max_segment_size(q))
+			if (seg_size + iv.bv_len > queue_max_segment_size(q))
 				goto new_segment;
 
-			seg_size += iv->bv_len;
+			seg_size += iv.bv_len;
 		} else {
 new_segment:
 			segments++;
-			seg_size = iv->bv_len;
+			seg_size = iv.bv_len;
 		}
 
+		prev = 1;
 		ivprv = iv;
 	}
 
@@ -87,24 +89,25 @@
 int blk_rq_map_integrity_sg(struct request_queue *q, struct bio *bio,
 			    struct scatterlist *sglist)
 {
-	struct bio_vec *iv, *ivprv = NULL;
+	struct bio_vec iv, ivprv = { NULL };
 	struct scatterlist *sg = NULL;
 	unsigned int segments = 0;
-	unsigned int i = 0;
+	struct bvec_iter iter;
+	int prev = 0;
 
-	bio_for_each_integrity_vec(iv, bio, i) {
+	bio_for_each_integrity_vec(iv, bio, iter) {
 
-		if (ivprv) {
-			if (!BIOVEC_PHYS_MERGEABLE(ivprv, iv))
+		if (prev) {
+			if (!BIOVEC_PHYS_MERGEABLE(&ivprv, &iv))
 				goto new_segment;
 
-			if (!BIOVEC_SEG_BOUNDARY(q, ivprv, iv))
+			if (!BIOVEC_SEG_BOUNDARY(q, &ivprv, &iv))
 				goto new_segment;
 
-			if (sg->length + iv->bv_len > queue_max_segment_size(q))
+			if (sg->length + iv.bv_len > queue_max_segment_size(q))
 				goto new_segment;
 
-			sg->length += iv->bv_len;
+			sg->length += iv.bv_len;
 		} else {
 new_segment:
 			if (!sg)
@@ -114,10 +117,11 @@
 				sg = sg_next(sg);
 			}
 
-			sg_set_page(sg, iv->bv_page, iv->bv_len, iv->bv_offset);
+			sg_set_page(sg, iv.bv_page, iv.bv_len, iv.bv_offset);
 			segments++;
 		}
 
+		prev = 1;
 		ivprv = iv;
 	}
 
diff --git a/block/blk-lib.c b/block/blk-lib.c
index 9b5b561..2da76c9 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -108,12 +108,12 @@
 			req_sects = end_sect - sector;
 		}
 
-		bio->bi_sector = sector;
+		bio->bi_iter.bi_sector = sector;
 		bio->bi_end_io = bio_batch_end_io;
 		bio->bi_bdev = bdev;
 		bio->bi_private = &bb;
 
-		bio->bi_size = req_sects << 9;
+		bio->bi_iter.bi_size = req_sects << 9;
 		nr_sects -= req_sects;
 		sector = end_sect;
 
@@ -174,7 +174,7 @@
 			break;
 		}
 
-		bio->bi_sector = sector;
+		bio->bi_iter.bi_sector = sector;
 		bio->bi_end_io = bio_batch_end_io;
 		bio->bi_bdev = bdev;
 		bio->bi_private = &bb;
@@ -184,11 +184,11 @@
 		bio->bi_io_vec->bv_len = bdev_logical_block_size(bdev);
 
 		if (nr_sects > max_write_same_sectors) {
-			bio->bi_size = max_write_same_sectors << 9;
+			bio->bi_iter.bi_size = max_write_same_sectors << 9;
 			nr_sects -= max_write_same_sectors;
 			sector += max_write_same_sectors;
 		} else {
-			bio->bi_size = nr_sects << 9;
+			bio->bi_iter.bi_size = nr_sects << 9;
 			nr_sects = 0;
 		}
 
@@ -240,7 +240,7 @@
 			break;
 		}
 
-		bio->bi_sector = sector;
+		bio->bi_iter.bi_sector = sector;
 		bio->bi_bdev   = bdev;
 		bio->bi_end_io = bio_batch_end_io;
 		bio->bi_private = &bb;
diff --git a/block/blk-map.c b/block/blk-map.c
index 623e1cd..ae4ae10 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -20,7 +20,7 @@
 		rq->biotail->bi_next = bio;
 		rq->biotail = bio;
 
-		rq->__data_len += bio->bi_size;
+		rq->__data_len += bio->bi_iter.bi_size;
 	}
 	return 0;
 }
@@ -76,7 +76,7 @@
 
 	ret = blk_rq_append_bio(q, rq, bio);
 	if (!ret)
-		return bio->bi_size;
+		return bio->bi_iter.bi_size;
 
 	/* if it was boucned we must call the end io function */
 	bio_endio(bio, 0);
@@ -220,7 +220,7 @@
 	if (IS_ERR(bio))
 		return PTR_ERR(bio);
 
-	if (bio->bi_size != len) {
+	if (bio->bi_iter.bi_size != len) {
 		/*
 		 * Grab an extra reference to this bio, as bio_unmap_user()
 		 * expects to be able to drop it twice as it happens on the
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 1ffc589..8f8adaa 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -12,10 +12,11 @@
 static unsigned int __blk_recalc_rq_segments(struct request_queue *q,
 					     struct bio *bio)
 {
-	struct bio_vec *bv, *bvprv = NULL;
-	int cluster, i, high, highprv = 1;
+	struct bio_vec bv, bvprv = { NULL };
+	int cluster, high, highprv = 1;
 	unsigned int seg_size, nr_phys_segs;
 	struct bio *fbio, *bbio;
+	struct bvec_iter iter;
 
 	if (!bio)
 		return 0;
@@ -25,25 +26,23 @@
 	seg_size = 0;
 	nr_phys_segs = 0;
 	for_each_bio(bio) {
-		bio_for_each_segment(bv, bio, i) {
+		bio_for_each_segment(bv, bio, iter) {
 			/*
 			 * the trick here is making sure that a high page is
 			 * never considered part of another segment, since that
 			 * might change with the bounce page.
 			 */
-			high = page_to_pfn(bv->bv_page) > queue_bounce_pfn(q);
-			if (high || highprv)
-				goto new_segment;
-			if (cluster) {
-				if (seg_size + bv->bv_len
+			high = page_to_pfn(bv.bv_page) > queue_bounce_pfn(q);
+			if (!high && !highprv && cluster) {
+				if (seg_size + bv.bv_len
 				    > queue_max_segment_size(q))
 					goto new_segment;
-				if (!BIOVEC_PHYS_MERGEABLE(bvprv, bv))
+				if (!BIOVEC_PHYS_MERGEABLE(&bvprv, &bv))
 					goto new_segment;
-				if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bv))
+				if (!BIOVEC_SEG_BOUNDARY(q, &bvprv, &bv))
 					goto new_segment;
 
-				seg_size += bv->bv_len;
+				seg_size += bv.bv_len;
 				bvprv = bv;
 				continue;
 			}
@@ -54,7 +53,7 @@
 
 			nr_phys_segs++;
 			bvprv = bv;
-			seg_size = bv->bv_len;
+			seg_size = bv.bv_len;
 			highprv = high;
 		}
 		bbio = bio;
@@ -87,6 +86,9 @@
 static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio,
 				   struct bio *nxt)
 {
+	struct bio_vec end_bv = { NULL }, nxt_bv;
+	struct bvec_iter iter;
+
 	if (!blk_queue_cluster(q))
 		return 0;
 
@@ -97,34 +99,40 @@
 	if (!bio_has_data(bio))
 		return 1;
 
-	if (!BIOVEC_PHYS_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt)))
+	bio_for_each_segment(end_bv, bio, iter)
+		if (end_bv.bv_len == iter.bi_size)
+			break;
+
+	nxt_bv = bio_iovec(nxt);
+
+	if (!BIOVEC_PHYS_MERGEABLE(&end_bv, &nxt_bv))
 		return 0;
 
 	/*
 	 * bio and nxt are contiguous in memory; check if the queue allows
 	 * these two to be merged into one
 	 */
-	if (BIO_SEG_BOUNDARY(q, bio, nxt))
+	if (BIOVEC_SEG_BOUNDARY(q, &end_bv, &nxt_bv))
 		return 1;
 
 	return 0;
 }
 
-static void
+static inline void
 __blk_segment_map_sg(struct request_queue *q, struct bio_vec *bvec,
-		     struct scatterlist *sglist, struct bio_vec **bvprv,
+		     struct scatterlist *sglist, struct bio_vec *bvprv,
 		     struct scatterlist **sg, int *nsegs, int *cluster)
 {
 
 	int nbytes = bvec->bv_len;
 
-	if (*bvprv && *cluster) {
+	if (*sg && *cluster) {
 		if ((*sg)->length + nbytes > queue_max_segment_size(q))
 			goto new_segment;
 
-		if (!BIOVEC_PHYS_MERGEABLE(*bvprv, bvec))
+		if (!BIOVEC_PHYS_MERGEABLE(bvprv, bvec))
 			goto new_segment;
-		if (!BIOVEC_SEG_BOUNDARY(q, *bvprv, bvec))
+		if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bvec))
 			goto new_segment;
 
 		(*sg)->length += nbytes;
@@ -150,7 +158,7 @@
 		sg_set_page(*sg, bvec->bv_page, nbytes, bvec->bv_offset);
 		(*nsegs)++;
 	}
-	*bvprv = bvec;
+	*bvprv = *bvec;
 }
 
 /*
@@ -160,7 +168,7 @@
 int blk_rq_map_sg(struct request_queue *q, struct request *rq,
 		  struct scatterlist *sglist)
 {
-	struct bio_vec *bvec, *bvprv;
+	struct bio_vec bvec, bvprv = { NULL };
 	struct req_iterator iter;
 	struct scatterlist *sg;
 	int nsegs, cluster;
@@ -171,10 +179,9 @@
 	/*
 	 * for each bio in rq
 	 */
-	bvprv = NULL;
 	sg = NULL;
 	rq_for_each_segment(bvec, rq, iter) {
-		__blk_segment_map_sg(q, bvec, sglist, &bvprv, &sg,
+		__blk_segment_map_sg(q, &bvec, sglist, &bvprv, &sg,
 				     &nsegs, &cluster);
 	} /* segments in rq */
 
@@ -223,18 +230,17 @@
 int blk_bio_map_sg(struct request_queue *q, struct bio *bio,
 		   struct scatterlist *sglist)
 {
-	struct bio_vec *bvec, *bvprv;
+	struct bio_vec bvec, bvprv = { NULL };
 	struct scatterlist *sg;
 	int nsegs, cluster;
-	unsigned long i;
+	struct bvec_iter iter;
 
 	nsegs = 0;
 	cluster = blk_queue_cluster(q);
 
-	bvprv = NULL;
 	sg = NULL;
-	bio_for_each_segment(bvec, bio, i) {
-		__blk_segment_map_sg(q, bvec, sglist, &bvprv, &sg,
+	bio_for_each_segment(bvec, bio, iter) {
+		__blk_segment_map_sg(q, &bvec, sglist, &bvprv, &sg,
 				     &nsegs, &cluster);
 	} /* segments in bio */
 
@@ -543,9 +549,9 @@
 
 int blk_try_merge(struct request *rq, struct bio *bio)
 {
-	if (blk_rq_pos(rq) + blk_rq_sectors(rq) == bio->bi_sector)
+	if (blk_rq_pos(rq) + blk_rq_sectors(rq) == bio->bi_iter.bi_sector)
 		return ELEVATOR_BACK_MERGE;
-	else if (blk_rq_pos(rq) - bio_sectors(bio) == bio->bi_sector)
+	else if (blk_rq_pos(rq) - bio_sectors(bio) == bio->bi_iter.bi_sector)
 		return ELEVATOR_FRONT_MERGE;
 	return ELEVATOR_NO_MERGE;
 }
diff --git a/block/blk-mq-cpu.c b/block/blk-mq-cpu.c
index 0045ace..3146bef 100644
--- a/block/blk-mq-cpu.c
+++ b/block/blk-mq-cpu.c
@@ -28,36 +28,6 @@
 	return NOTIFY_OK;
 }
 
-static void blk_mq_cpu_notify(void *data, unsigned long action,
-			      unsigned int cpu)
-{
-	if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
-		/*
-		 * If the CPU goes away, ensure that we run any pending
-		 * completions.
-		 */
-		struct llist_node *node;
-		struct request *rq;
-
-		local_irq_disable();
-
-		node = llist_del_all(&per_cpu(ipi_lists, cpu));
-		while (node) {
-			struct llist_node *next = node->next;
-
-			rq = llist_entry(node, struct request, ll_list);
-			__blk_mq_end_io(rq, rq->errors);
-			node = next;
-		}
-
-		local_irq_enable();
-	}
-}
-
-static struct notifier_block __cpuinitdata blk_mq_main_cpu_notifier = {
-	.notifier_call	= blk_mq_main_cpu_notify,
-};
-
 void blk_mq_register_cpu_notifier(struct blk_mq_cpu_notifier *notifier)
 {
 	BUG_ON(!notifier->notify);
@@ -82,12 +52,7 @@
 	notifier->data = data;
 }
 
-static struct blk_mq_cpu_notifier __cpuinitdata cpu_notifier = {
-	.notify = blk_mq_cpu_notify,
-};
-
 void __init blk_mq_cpu_init(void)
 {
-	register_hotcpu_notifier(&blk_mq_main_cpu_notifier);
-	blk_mq_register_cpu_notifier(&cpu_notifier);
+	hotcpu_notifier(blk_mq_main_cpu_notify, 0);
 }
diff --git a/block/blk-mq.c b/block/blk-mq.c
index c79126e..57039fc 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -27,8 +27,6 @@
 
 static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx);
 
-DEFINE_PER_CPU(struct llist_head, ipi_lists);
-
 static struct blk_mq_ctx *__blk_mq_get_ctx(struct request_queue *q,
 					   unsigned int cpu)
 {
@@ -106,10 +104,13 @@
 
 	spin_lock_irq(q->queue_lock);
 	ret = wait_event_interruptible_lock_irq(q->mq_freeze_wq,
-		!blk_queue_bypass(q), *q->queue_lock);
+		!blk_queue_bypass(q) || blk_queue_dying(q),
+		*q->queue_lock);
 	/* inc usage with lock hold to avoid freeze_queue runs here */
-	if (!ret)
+	if (!ret && !blk_queue_dying(q))
 		__percpu_counter_add(&q->mq_usage_counter, 1, 1000000);
+	else if (blk_queue_dying(q))
+		ret = -ENODEV;
 	spin_unlock_irq(q->queue_lock);
 
 	return ret;
@@ -120,6 +121,22 @@
 	__percpu_counter_add(&q->mq_usage_counter, -1, 1000000);
 }
 
+static void __blk_mq_drain_queue(struct request_queue *q)
+{
+	while (true) {
+		s64 count;
+
+		spin_lock_irq(q->queue_lock);
+		count = percpu_counter_sum(&q->mq_usage_counter);
+		spin_unlock_irq(q->queue_lock);
+
+		if (count == 0)
+			break;
+		blk_mq_run_queues(q, false);
+		msleep(10);
+	}
+}
+
 /*
  * Guarantee no request is in use, so we can change any data structure of
  * the queue afterward.
@@ -133,21 +150,13 @@
 	queue_flag_set(QUEUE_FLAG_BYPASS, q);
 	spin_unlock_irq(q->queue_lock);
 
-	if (!drain)
-		return;
+	if (drain)
+		__blk_mq_drain_queue(q);
+}
 
-	while (true) {
-		s64 count;
-
-		spin_lock_irq(q->queue_lock);
-		count = percpu_counter_sum(&q->mq_usage_counter);
-		spin_unlock_irq(q->queue_lock);
-
-		if (count == 0)
-			break;
-		blk_mq_run_queues(q, false);
-		msleep(10);
-	}
+void blk_mq_drain_queue(struct request_queue *q)
+{
+	__blk_mq_drain_queue(q);
 }
 
 static void blk_mq_unfreeze_queue(struct request_queue *q)
@@ -179,6 +188,8 @@
 
 	rq->mq_ctx = ctx;
 	rq->cmd_flags = rw_flags;
+	rq->start_time = jiffies;
+	set_start_time_ns(rq);
 	ctx->rq_dispatched[rw_is_sync(rw_flags)]++;
 }
 
@@ -305,7 +316,7 @@
 		struct bio *next = bio->bi_next;
 
 		bio->bi_next = NULL;
-		bytes += bio->bi_size;
+		bytes += bio->bi_iter.bi_size;
 		blk_mq_bio_endio(rq, bio, error);
 		bio = next;
 	}
@@ -326,56 +337,13 @@
 		blk_mq_complete_request(rq, error);
 }
 
-#if defined(CONFIG_SMP)
-
-/*
- * Called with interrupts disabled.
- */
-static void ipi_end_io(void *data)
+static void blk_mq_end_io_remote(void *data)
 {
-	struct llist_head *list = &per_cpu(ipi_lists, smp_processor_id());
-	struct llist_node *entry, *next;
-	struct request *rq;
+	struct request *rq = data;
 
-	entry = llist_del_all(list);
-
-	while (entry) {
-		next = entry->next;
-		rq = llist_entry(entry, struct request, ll_list);
-		__blk_mq_end_io(rq, rq->errors);
-		entry = next;
-	}
+	__blk_mq_end_io(rq, rq->errors);
 }
 
-static int ipi_remote_cpu(struct blk_mq_ctx *ctx, const int cpu,
-			  struct request *rq, const int error)
-{
-	struct call_single_data *data = &rq->csd;
-
-	rq->errors = error;
-	rq->ll_list.next = NULL;
-
-	/*
-	 * If the list is non-empty, an existing IPI must already
-	 * be "in flight". If that is the case, we need not schedule
-	 * a new one.
-	 */
-	if (llist_add(&rq->ll_list, &per_cpu(ipi_lists, ctx->cpu))) {
-		data->func = ipi_end_io;
-		data->flags = 0;
-		__smp_call_function_single(ctx->cpu, data, 0);
-	}
-
-	return true;
-}
-#else /* CONFIG_SMP */
-static int ipi_remote_cpu(struct blk_mq_ctx *ctx, const int cpu,
-			  struct request *rq, const int error)
-{
-	return false;
-}
-#endif
-
 /*
  * End IO on this request on a multiqueue enabled driver. We'll either do
  * it directly inline, or punt to a local IPI handler on the matching
@@ -390,11 +358,15 @@
 		return __blk_mq_end_io(rq, error);
 
 	cpu = get_cpu();
-
-	if (cpu == ctx->cpu || !cpu_online(ctx->cpu) ||
-	    !ipi_remote_cpu(ctx, cpu, rq, error))
+	if (cpu != ctx->cpu && cpu_online(ctx->cpu)) {
+		rq->errors = error;
+		rq->csd.func = blk_mq_end_io_remote;
+		rq->csd.info = rq;
+		rq->csd.flags = 0;
+		__smp_call_function_single(ctx->cpu, &rq->csd, 0);
+	} else {
 		__blk_mq_end_io(rq, error);
-
+	}
 	put_cpu();
 }
 EXPORT_SYMBOL(blk_mq_end_io);
@@ -1091,8 +1063,8 @@
 	struct page *page;
 
 	while (!list_empty(&hctx->page_list)) {
-		page = list_first_entry(&hctx->page_list, struct page, list);
-		list_del_init(&page->list);
+		page = list_first_entry(&hctx->page_list, struct page, lru);
+		list_del_init(&page->lru);
 		__free_pages(page, page->private);
 	}
 
@@ -1156,7 +1128,7 @@
 			break;
 
 		page->private = this_order;
-		list_add_tail(&page->list, &hctx->page_list);
+		list_add_tail(&page->lru, &hctx->page_list);
 
 		p = page_address(page);
 		entries_per_page = order_to_size(this_order) / rq_size;
@@ -1429,7 +1401,6 @@
 	int i;
 
 	queue_for_each_hw_ctx(q, hctx, i) {
-		cancel_delayed_work_sync(&hctx->delayed_work);
 		kfree(hctx->ctx_map);
 		kfree(hctx->ctxs);
 		blk_mq_free_rq_map(hctx);
@@ -1451,7 +1422,6 @@
 	list_del_init(&q->all_q_node);
 	mutex_unlock(&all_q_mutex);
 }
-EXPORT_SYMBOL(blk_mq_free_queue);
 
 /* Basically redo blk_mq_init_queue with queue frozen */
 static void blk_mq_queue_reinit(struct request_queue *q)
@@ -1495,11 +1465,6 @@
 
 static int __init blk_mq_init(void)
 {
-	unsigned int i;
-
-	for_each_possible_cpu(i)
-		init_llist_head(&per_cpu(ipi_lists, i));
-
 	blk_mq_cpu_init();
 
 	/* Must be called after percpu_counter_hotcpu_callback() */
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 52bf1f9..5c39179 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -27,6 +27,8 @@
 void blk_mq_run_request(struct request *rq, bool run_queue, bool async);
 void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
 void blk_mq_init_flush(struct request_queue *q);
+void blk_mq_drain_queue(struct request_queue *q);
+void blk_mq_free_queue(struct request_queue *q);
 
 /*
  * CPU hotplug helpers
@@ -38,7 +40,6 @@
 void blk_mq_register_cpu_notifier(struct blk_mq_cpu_notifier *notifier);
 void blk_mq_unregister_cpu_notifier(struct blk_mq_cpu_notifier *notifier);
 void blk_mq_cpu_init(void);
-DECLARE_PER_CPU(struct llist_head, ipi_lists);
 
 /*
  * CPU -> queue mappings
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 05e8267..5d21239 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -592,6 +592,10 @@
 		ret = -1;
 	}
 
+	t->raid_partial_stripes_expensive =
+		max(t->raid_partial_stripes_expensive,
+		    b->raid_partial_stripes_expensive);
+
 	/* Find lowest common alignment_offset */
 	t->alignment_offset = lcm(t->alignment_offset, alignment)
 		& (max(t->physical_block_size, t->io_min) - 1);
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 9777952..8095c4a 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -11,6 +11,7 @@
 
 #include "blk.h"
 #include "blk-cgroup.h"
+#include "blk-mq.h"
 
 struct queue_sysfs_entry {
 	struct attribute attr;
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index a760857..1474c3a 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -877,14 +877,14 @@
 	do_div(tmp, HZ);
 	bytes_allowed = tmp;
 
-	if (tg->bytes_disp[rw] + bio->bi_size <= bytes_allowed) {
+	if (tg->bytes_disp[rw] + bio->bi_iter.bi_size <= bytes_allowed) {
 		if (wait)
 			*wait = 0;
 		return 1;
 	}
 
 	/* Calc approx time to dispatch */
-	extra_bytes = tg->bytes_disp[rw] + bio->bi_size - bytes_allowed;
+	extra_bytes = tg->bytes_disp[rw] + bio->bi_iter.bi_size - bytes_allowed;
 	jiffy_wait = div64_u64(extra_bytes * HZ, tg->bps[rw]);
 
 	if (!jiffy_wait)
@@ -987,7 +987,7 @@
 	bool rw = bio_data_dir(bio);
 
 	/* Charge the bio to the group */
-	tg->bytes_disp[rw] += bio->bi_size;
+	tg->bytes_disp[rw] += bio->bi_iter.bi_size;
 	tg->io_disp[rw]++;
 
 	/*
@@ -1003,8 +1003,8 @@
 	 */
 	if (!(bio->bi_rw & REQ_THROTTLED)) {
 		bio->bi_rw |= REQ_THROTTLED;
-		throtl_update_dispatch_stats(tg_to_blkg(tg), bio->bi_size,
-					     bio->bi_rw);
+		throtl_update_dispatch_stats(tg_to_blkg(tg),
+					     bio->bi_iter.bi_size, bio->bi_rw);
 	}
 }
 
@@ -1503,7 +1503,7 @@
 	if (tg) {
 		if (!tg->has_rules[rw]) {
 			throtl_update_dispatch_stats(tg_to_blkg(tg),
-						     bio->bi_size, bio->bi_rw);
+					bio->bi_iter.bi_size, bio->bi_rw);
 			goto out_unlock_rcu;
 		}
 	}
@@ -1559,7 +1559,7 @@
 	/* out-of-limit, queue to @tg */
 	throtl_log(sq, "[%c] bio. bdisp=%llu sz=%u bps=%llu iodisp=%u iops=%u queued=%d/%d",
 		   rw == READ ? 'R' : 'W',
-		   tg->bytes_disp[rw], bio->bi_size, tg->bps[rw],
+		   tg->bytes_disp[rw], bio->bi_iter.bi_size, tg->bps[rw],
 		   tg->io_disp[rw], tg->iops[rw],
 		   sq->nr_queued[READ], sq->nr_queued[WRITE]);
 
diff --git a/block/cmdline-parser.c b/block/cmdline-parser.c
index cc2637f..9dbc67e 100644
--- a/block/cmdline-parser.c
+++ b/block/cmdline-parser.c
@@ -4,8 +4,7 @@
  * Written by Cai Zhiyong <caizhiyong@huawei.com>
  *
  */
-#include <linux/buffer_head.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/cmdline-parser.h>
 
 static int parse_subpart(struct cmdline_subpart **subpart, char *partdef)
@@ -159,6 +158,7 @@
 		*parts = next_parts;
 	}
 }
+EXPORT_SYMBOL(cmdline_parts_free);
 
 int cmdline_parts_parse(struct cmdline_parts **parts, const char *cmdline)
 {
@@ -206,6 +206,7 @@
 	cmdline_parts_free(parts);
 	goto done;
 }
+EXPORT_SYMBOL(cmdline_parts_parse);
 
 struct cmdline_parts *cmdline_parts_find(struct cmdline_parts *parts,
 					 const char *bdev)
@@ -214,17 +215,17 @@
 		parts = parts->next_parts;
 	return parts;
 }
+EXPORT_SYMBOL(cmdline_parts_find);
 
 /*
  *  add_part()
  *    0 success.
  *    1 can not add so many partitions.
  */
-void cmdline_parts_set(struct cmdline_parts *parts, sector_t disk_size,
-		       int slot,
-		       int (*add_part)(int, struct cmdline_subpart *, void *),
-		       void *param)
-
+int cmdline_parts_set(struct cmdline_parts *parts, sector_t disk_size,
+		      int slot,
+		      int (*add_part)(int, struct cmdline_subpart *, void *),
+		      void *param)
 {
 	sector_t from = 0;
 	struct cmdline_subpart *subpart;
@@ -247,4 +248,7 @@
 		if (add_part(slot, subpart, param))
 			break;
 	}
+
+	return slot;
 }
+EXPORT_SYMBOL(cmdline_parts_set);
diff --git a/block/elevator.c b/block/elevator.c
index b7ff286..42c45a7 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -440,7 +440,7 @@
 	/*
 	 * See if our hash lookup can find a potential backmerge.
 	 */
-	__rq = elv_rqhash_find(q, bio->bi_sector);
+	__rq = elv_rqhash_find(q, bio->bi_iter.bi_sector);
 	if (__rq && elv_rq_merge_ok(__rq, bio)) {
 		*req = __rq;
 		return ELEVATOR_BACK_MERGE;
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index 625e3e4..2648797 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -323,12 +323,14 @@
 
 	if (hdr->iovec_count) {
 		size_t iov_data_len;
-		struct iovec *iov;
+		struct iovec *iov = NULL;
 
 		ret = rw_copy_check_uvector(-1, hdr->dxferp, hdr->iovec_count,
 					    0, NULL, &iov);
-		if (ret < 0)
+		if (ret < 0) {
+			kfree(iov);
 			goto out;
+		}
 
 		iov_data_len = ret;
 		ret = 0;
diff --git a/drivers/bcma/Kconfig b/drivers/bcma/Kconfig
index 7c081b3..0ee48be 100644
--- a/drivers/bcma/Kconfig
+++ b/drivers/bcma/Kconfig
@@ -75,6 +75,7 @@
 config BCMA_DRIVER_GPIO
 	bool "BCMA GPIO driver"
 	depends on BCMA && GPIOLIB
+	select IRQ_DOMAIN if BCMA_HOST_SOC
 	help
 	  Driver to provide access to the GPIO pins of the bcma bus.
 
diff --git a/drivers/bcma/driver_gpio.c b/drivers/bcma/driver_gpio.c
index 45f0996..25f9887 100644
--- a/drivers/bcma/driver_gpio.c
+++ b/drivers/bcma/driver_gpio.c
@@ -9,6 +9,9 @@
  */
 
 #include <linux/gpio.h>
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+#include <linux/irqdomain.h>
 #include <linux/export.h>
 #include <linux/bcma/bcma.h>
 
@@ -73,19 +76,136 @@
 	bcma_chipco_gpio_pullup(cc, 1 << gpio, 0);
 }
 
+#if IS_BUILTIN(CONFIG_BCMA_HOST_SOC)
 static int bcma_gpio_to_irq(struct gpio_chip *chip, unsigned gpio)
 {
 	struct bcma_drv_cc *cc = bcma_gpio_get_cc(chip);
 
 	if (cc->core->bus->hosttype == BCMA_HOSTTYPE_SOC)
-		return bcma_core_irq(cc->core);
+		return irq_find_mapping(cc->irq_domain, gpio);
 	else
 		return -EINVAL;
 }
 
+static void bcma_gpio_irq_unmask(struct irq_data *d)
+{
+	struct bcma_drv_cc *cc = irq_data_get_irq_chip_data(d);
+	int gpio = irqd_to_hwirq(d);
+	u32 val = bcma_chipco_gpio_in(cc, BIT(gpio));
+
+	bcma_chipco_gpio_polarity(cc, BIT(gpio), val);
+	bcma_chipco_gpio_intmask(cc, BIT(gpio), BIT(gpio));
+}
+
+static void bcma_gpio_irq_mask(struct irq_data *d)
+{
+	struct bcma_drv_cc *cc = irq_data_get_irq_chip_data(d);
+	int gpio = irqd_to_hwirq(d);
+
+	bcma_chipco_gpio_intmask(cc, BIT(gpio), 0);
+}
+
+static struct irq_chip bcma_gpio_irq_chip = {
+	.name		= "BCMA-GPIO",
+	.irq_mask	= bcma_gpio_irq_mask,
+	.irq_unmask	= bcma_gpio_irq_unmask,
+};
+
+static irqreturn_t bcma_gpio_irq_handler(int irq, void *dev_id)
+{
+	struct bcma_drv_cc *cc = dev_id;
+	u32 val = bcma_cc_read32(cc, BCMA_CC_GPIOIN);
+	u32 mask = bcma_cc_read32(cc, BCMA_CC_GPIOIRQ);
+	u32 pol = bcma_cc_read32(cc, BCMA_CC_GPIOPOL);
+	unsigned long irqs = (val ^ pol) & mask;
+	int gpio;
+
+	if (!irqs)
+		return IRQ_NONE;
+
+	for_each_set_bit(gpio, &irqs, cc->gpio.ngpio)
+		generic_handle_irq(bcma_gpio_to_irq(&cc->gpio, gpio));
+	bcma_chipco_gpio_polarity(cc, irqs, val & irqs);
+
+	return IRQ_HANDLED;
+}
+
+static int bcma_gpio_irq_domain_init(struct bcma_drv_cc *cc)
+{
+	struct gpio_chip *chip = &cc->gpio;
+	int gpio, hwirq, err;
+
+	if (cc->core->bus->hosttype != BCMA_HOSTTYPE_SOC)
+		return 0;
+
+	cc->irq_domain = irq_domain_add_linear(NULL, chip->ngpio,
+					       &irq_domain_simple_ops, cc);
+	if (!cc->irq_domain) {
+		err = -ENODEV;
+		goto err_irq_domain;
+	}
+	for (gpio = 0; gpio < chip->ngpio; gpio++) {
+		int irq = irq_create_mapping(cc->irq_domain, gpio);
+
+		irq_set_chip_data(irq, cc);
+		irq_set_chip_and_handler(irq, &bcma_gpio_irq_chip,
+					 handle_simple_irq);
+	}
+
+	hwirq = bcma_core_irq(cc->core);
+	err = request_irq(hwirq, bcma_gpio_irq_handler, IRQF_SHARED, "gpio",
+			  cc);
+	if (err)
+		goto err_req_irq;
+
+	bcma_chipco_gpio_intmask(cc, ~0, 0);
+	bcma_cc_set32(cc, BCMA_CC_IRQMASK, BCMA_CC_IRQ_GPIO);
+
+	return 0;
+
+err_req_irq:
+	for (gpio = 0; gpio < chip->ngpio; gpio++) {
+		int irq = irq_find_mapping(cc->irq_domain, gpio);
+
+		irq_dispose_mapping(irq);
+	}
+	irq_domain_remove(cc->irq_domain);
+err_irq_domain:
+	return err;
+}
+
+static void bcma_gpio_irq_domain_exit(struct bcma_drv_cc *cc)
+{
+	struct gpio_chip *chip = &cc->gpio;
+	int gpio;
+
+	if (cc->core->bus->hosttype != BCMA_HOSTTYPE_SOC)
+		return;
+
+	bcma_cc_mask32(cc, BCMA_CC_IRQMASK, ~BCMA_CC_IRQ_GPIO);
+	free_irq(bcma_core_irq(cc->core), cc);
+	for (gpio = 0; gpio < chip->ngpio; gpio++) {
+		int irq = irq_find_mapping(cc->irq_domain, gpio);
+
+		irq_dispose_mapping(irq);
+	}
+	irq_domain_remove(cc->irq_domain);
+}
+#else
+static int bcma_gpio_irq_domain_init(struct bcma_drv_cc *cc)
+{
+	return 0;
+}
+
+static void bcma_gpio_irq_domain_exit(struct bcma_drv_cc *cc)
+{
+}
+#endif
+
 int bcma_gpio_init(struct bcma_drv_cc *cc)
 {
 	struct gpio_chip *chip = &cc->gpio;
+	int err;
 
 	chip->label		= "bcma_gpio";
 	chip->owner		= THIS_MODULE;
@@ -95,7 +215,9 @@
 	chip->set		= bcma_gpio_set_value;
 	chip->direction_input	= bcma_gpio_direction_input;
 	chip->direction_output	= bcma_gpio_direction_output;
+#if IS_BUILTIN(CONFIG_BCMA_HOST_SOC)
 	chip->to_irq		= bcma_gpio_to_irq;
+#endif
 	chip->ngpio		= 16;
 	/* There is just one SoC in one device and its GPIO addresses should be
 	 * deterministic to address them more easily. The other buses could get
@@ -105,10 +227,21 @@
 	else
 		chip->base		= -1;
 
-	return gpiochip_add(chip);
+	err = bcma_gpio_irq_domain_init(cc);
+	if (err)
+		return err;
+
+	err = gpiochip_add(chip);
+	if (err) {
+		bcma_gpio_irq_domain_exit(cc);
+		return err;
+	}
+
+	return 0;
 }
 
 int bcma_gpio_unregister(struct bcma_drv_cc *cc)
 {
+	bcma_gpio_irq_domain_exit(cc);
 	return gpiochip_remove(&cc->gpio);
 }
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index 9ffa90c..014a1cf 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -108,6 +108,8 @@
 
 source "drivers/block/mtip32xx/Kconfig"
 
+source "drivers/block/zram/Kconfig"
+
 config BLK_CPQ_DA
 	tristate "Compaq SMART2 support"
 	depends on PCI && VIRT_TO_BUS && 0
diff --git a/drivers/block/Makefile b/drivers/block/Makefile
index 816d979..02b688d 100644
--- a/drivers/block/Makefile
+++ b/drivers/block/Makefile
@@ -42,6 +42,7 @@
 
 obj-$(CONFIG_BLK_DEV_RSXX) += rsxx/
 obj-$(CONFIG_BLK_DEV_NULL_BLK)	+= null_blk.o
+obj-$(CONFIG_ZRAM) += zram/
 
 nvme-y		:= nvme-core.o nvme-scsi.o
 skd-y		:= skd_main.o
diff --git a/drivers/block/aoe/aoe.h b/drivers/block/aoe/aoe.h
index 14a9d19..9220f8e 100644
--- a/drivers/block/aoe/aoe.h
+++ b/drivers/block/aoe/aoe.h
@@ -100,11 +100,8 @@
 
 struct buf {
 	ulong nframesout;
-	ulong resid;
-	ulong bv_resid;
-	sector_t sector;
 	struct bio *bio;
-	struct bio_vec *bv;
+	struct bvec_iter iter;
 	struct request *rq;
 };
 
@@ -120,13 +117,10 @@
 	ulong waited;
 	ulong waited_total;
 	struct aoetgt *t;		/* parent target I belong to */
-	sector_t lba;
 	struct sk_buff *skb;		/* command skb freed on module exit */
 	struct sk_buff *r_skb;		/* response skb for async processing */
 	struct buf *buf;
-	struct bio_vec *bv;
-	ulong bcnt;
-	ulong bv_off;
+	struct bvec_iter iter;
 	char flags;
 };
 
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index d251543..8184451 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -196,8 +196,7 @@
 
 	t = f->t;
 	f->buf = NULL;
-	f->lba = 0;
-	f->bv = NULL;
+	memset(&f->iter, 0, sizeof(f->iter));
 	f->r_skb = NULL;
 	f->flags = 0;
 	list_add(&f->head, &t->ffree);
@@ -295,21 +294,14 @@
 }
 
 static void
-skb_fillup(struct sk_buff *skb, struct bio_vec *bv, ulong off, ulong cnt)
+skb_fillup(struct sk_buff *skb, struct bio *bio, struct bvec_iter iter)
 {
 	int frag = 0;
-	ulong fcnt;
-loop:
-	fcnt = bv->bv_len - (off - bv->bv_offset);
-	if (fcnt > cnt)
-		fcnt = cnt;
-	skb_fill_page_desc(skb, frag++, bv->bv_page, off, fcnt);
-	cnt -= fcnt;
-	if (cnt <= 0)
-		return;
-	bv++;
-	off = bv->bv_offset;
-	goto loop;
+	struct bio_vec bv;
+
+	__bio_for_each_segment(bv, bio, iter, iter)
+		skb_fill_page_desc(skb, frag++, bv.bv_page,
+				   bv.bv_offset, bv.bv_len);
 }
 
 static void
@@ -346,12 +338,10 @@
 	t->nout++;
 	f->waited = 0;
 	f->waited_total = 0;
-	if (f->buf)
-		f->lba = f->buf->sector;
 
 	/* set up ata header */
-	ah->scnt = f->bcnt >> 9;
-	put_lba(ah, f->lba);
+	ah->scnt = f->iter.bi_size >> 9;
+	put_lba(ah, f->iter.bi_sector);
 	if (t->d->flags & DEVFL_EXT) {
 		ah->aflags |= AOEAFL_EXT;
 	} else {
@@ -360,11 +350,11 @@
 		ah->lba3 |= 0xe0;	/* LBA bit + obsolete 0xa0 */
 	}
 	if (f->buf && bio_data_dir(f->buf->bio) == WRITE) {
-		skb_fillup(skb, f->bv, f->bv_off, f->bcnt);
+		skb_fillup(skb, f->buf->bio, f->iter);
 		ah->aflags |= AOEAFL_WRITE;
-		skb->len += f->bcnt;
-		skb->data_len = f->bcnt;
-		skb->truesize += f->bcnt;
+		skb->len += f->iter.bi_size;
+		skb->data_len = f->iter.bi_size;
+		skb->truesize += f->iter.bi_size;
 		t->wpkts++;
 	} else {
 		t->rpkts++;
@@ -382,7 +372,6 @@
 	struct buf *buf;
 	struct sk_buff *skb;
 	struct sk_buff_head queue;
-	ulong bcnt, fbcnt;
 
 	buf = nextbuf(d);
 	if (buf == NULL)
@@ -390,39 +379,22 @@
 	f = newframe(d);
 	if (f == NULL)
 		return 0;
-	bcnt = d->maxbcnt;
-	if (bcnt == 0)
-		bcnt = DEFAULTBCNT;
-	if (bcnt > buf->resid)
-		bcnt = buf->resid;
-	fbcnt = bcnt;
-	f->bv = buf->bv;
-	f->bv_off = f->bv->bv_offset + (f->bv->bv_len - buf->bv_resid);
-	do {
-		if (fbcnt < buf->bv_resid) {
-			buf->bv_resid -= fbcnt;
-			buf->resid -= fbcnt;
-			break;
-		}
-		fbcnt -= buf->bv_resid;
-		buf->resid -= buf->bv_resid;
-		if (buf->resid == 0) {
-			d->ip.buf = NULL;
-			break;
-		}
-		buf->bv++;
-		buf->bv_resid = buf->bv->bv_len;
-		WARN_ON(buf->bv_resid == 0);
-	} while (fbcnt);
 
 	/* initialize the headers & frame */
 	f->buf = buf;
-	f->bcnt = bcnt;
-	ata_rw_frameinit(f);
+	f->iter = buf->iter;
+	f->iter.bi_size = min_t(unsigned long,
+				d->maxbcnt ?: DEFAULTBCNT,
+				f->iter.bi_size);
+	bio_advance_iter(buf->bio, &buf->iter, f->iter.bi_size);
+
+	if (!buf->iter.bi_size)
+		d->ip.buf = NULL;
 
 	/* mark all tracking fields and load out */
 	buf->nframesout += 1;
-	buf->sector += bcnt >> 9;
+
+	ata_rw_frameinit(f);
 
 	skb = skb_clone(f->skb, GFP_ATOMIC);
 	if (skb) {
@@ -613,10 +585,7 @@
 	skb = nf->skb;
 	nf->skb = f->skb;
 	nf->buf = f->buf;
-	nf->bcnt = f->bcnt;
-	nf->lba = f->lba;
-	nf->bv = f->bv;
-	nf->bv_off = f->bv_off;
+	nf->iter = f->iter;
 	nf->waited = 0;
 	nf->waited_total = f->waited_total;
 	nf->sent = f->sent;
@@ -648,19 +617,19 @@
 	}
 	f->flags |= FFL_PROBE;
 	ifrotate(t);
-	f->bcnt = t->d->maxbcnt ? t->d->maxbcnt : DEFAULTBCNT;
+	f->iter.bi_size = t->d->maxbcnt ? t->d->maxbcnt : DEFAULTBCNT;
 	ata_rw_frameinit(f);
 	skb = f->skb;
-	for (frag = 0, n = f->bcnt; n > 0; ++frag, n -= m) {
+	for (frag = 0, n = f->iter.bi_size; n > 0; ++frag, n -= m) {
 		if (n < PAGE_SIZE)
 			m = n;
 		else
 			m = PAGE_SIZE;
 		skb_fill_page_desc(skb, frag, empty_page, 0, m);
 	}
-	skb->len += f->bcnt;
-	skb->data_len = f->bcnt;
-	skb->truesize += f->bcnt;
+	skb->len += f->iter.bi_size;
+	skb->data_len = f->iter.bi_size;
+	skb->truesize += f->iter.bi_size;
 
 	skb = skb_clone(f->skb, GFP_ATOMIC);
 	if (skb) {
@@ -897,15 +866,15 @@
 static void
 bio_pageinc(struct bio *bio)
 {
-	struct bio_vec *bv;
+	struct bio_vec bv;
 	struct page *page;
-	int i;
+	struct bvec_iter iter;
 
-	bio_for_each_segment(bv, bio, i) {
+	bio_for_each_segment(bv, bio, iter) {
 		/* Non-zero page count for non-head members of
 		 * compound pages is no longer allowed by the kernel.
 		 */
-		page = compound_trans_head(bv->bv_page);
+		page = compound_trans_head(bv.bv_page);
 		atomic_inc(&page->_count);
 	}
 }
@@ -913,12 +882,12 @@
 static void
 bio_pagedec(struct bio *bio)
 {
-	struct bio_vec *bv;
 	struct page *page;
-	int i;
+	struct bio_vec bv;
+	struct bvec_iter iter;
 
-	bio_for_each_segment(bv, bio, i) {
-		page = compound_trans_head(bv->bv_page);
+	bio_for_each_segment(bv, bio, iter) {
+		page = compound_trans_head(bv.bv_page);
 		atomic_dec(&page->_count);
 	}
 }
@@ -929,12 +898,8 @@
 	memset(buf, 0, sizeof(*buf));
 	buf->rq = rq;
 	buf->bio = bio;
-	buf->resid = bio->bi_size;
-	buf->sector = bio->bi_sector;
+	buf->iter = bio->bi_iter;
 	bio_pageinc(bio);
-	buf->bv = bio_iovec(bio);
-	buf->bv_resid = buf->bv->bv_len;
-	WARN_ON(buf->bv_resid == 0);
 }
 
 static struct buf *
@@ -1119,24 +1084,18 @@
 }
 
 static void
-bvcpy(struct bio_vec *bv, ulong off, struct sk_buff *skb, long cnt)
+bvcpy(struct sk_buff *skb, struct bio *bio, struct bvec_iter iter, long cnt)
 {
-	ulong fcnt;
-	char *p;
 	int soff = 0;
-loop:
-	fcnt = bv->bv_len - (off - bv->bv_offset);
-	if (fcnt > cnt)
-		fcnt = cnt;
-	p = page_address(bv->bv_page) + off;
-	skb_copy_bits(skb, soff, p, fcnt);
-	soff += fcnt;
-	cnt -= fcnt;
-	if (cnt <= 0)
-		return;
-	bv++;
-	off = bv->bv_offset;
-	goto loop;
+	struct bio_vec bv;
+
+	iter.bi_size = cnt;
+
+	__bio_for_each_segment(bv, bio, iter, iter) {
+		char *p = page_address(bv.bv_page) + bv.bv_offset;
+		skb_copy_bits(skb, soff, p, bv.bv_len);
+		soff += bv.bv_len;
+	}
 }
 
 void
@@ -1152,7 +1111,7 @@
 	do {
 		bio = rq->bio;
 		bok = !fastfail && test_bit(BIO_UPTODATE, &bio->bi_flags);
-	} while (__blk_end_request(rq, bok ? 0 : -EIO, bio->bi_size));
+	} while (__blk_end_request(rq, bok ? 0 : -EIO, bio->bi_iter.bi_size));
 
 	/* cf. http://lkml.org/lkml/2006/10/31/28 */
 	if (!fastfail)
@@ -1229,7 +1188,15 @@
 			clear_bit(BIO_UPTODATE, &buf->bio->bi_flags);
 			break;
 		}
-		bvcpy(f->bv, f->bv_off, skb, n);
+		if (n > f->iter.bi_size) {
+			pr_err_ratelimited("%s e%ld.%d.  bytes=%ld need=%u\n",
+				"aoe: too-large data size in read from",
+				(long) d->aoemajor, d->aoeminor,
+				n, f->iter.bi_size);
+			clear_bit(BIO_UPTODATE, &buf->bio->bi_flags);
+			break;
+		}
+		bvcpy(skb, f->buf->bio, f->iter, n);
 	case ATA_CMD_PIO_WRITE:
 	case ATA_CMD_PIO_WRITE_EXT:
 		spin_lock_irq(&d->lock);
@@ -1272,7 +1239,7 @@
 
 	aoe_freetframe(f);
 
-	if (buf && --buf->nframesout == 0 && buf->resid == 0)
+	if (buf && --buf->nframesout == 0 && buf->iter.bi_size == 0)
 		aoe_end_buf(d, buf);
 
 	spin_unlock_irq(&d->lock);
@@ -1727,7 +1694,7 @@
 {
 	if (buf == NULL)
 		return;
-	buf->resid = 0;
+	buf->iter.bi_size = 0;
 	clear_bit(BIO_UPTODATE, &buf->bio->bi_flags);
 	if (buf->nframesout == 0)
 		aoe_end_buf(d, buf);
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index d91f1a5..e73b85c 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -328,18 +328,18 @@
 	struct block_device *bdev = bio->bi_bdev;
 	struct brd_device *brd = bdev->bd_disk->private_data;
 	int rw;
-	struct bio_vec *bvec;
+	struct bio_vec bvec;
 	sector_t sector;
-	int i;
+	struct bvec_iter iter;
 	int err = -EIO;
 
-	sector = bio->bi_sector;
+	sector = bio->bi_iter.bi_sector;
 	if (bio_end_sector(bio) > get_capacity(bdev->bd_disk))
 		goto out;
 
 	if (unlikely(bio->bi_rw & REQ_DISCARD)) {
 		err = 0;
-		discard_from_brd(brd, sector, bio->bi_size);
+		discard_from_brd(brd, sector, bio->bi_iter.bi_size);
 		goto out;
 	}
 
@@ -347,10 +347,10 @@
 	if (rw == READA)
 		rw = READ;
 
-	bio_for_each_segment(bvec, bio, i) {
-		unsigned int len = bvec->bv_len;
-		err = brd_do_bvec(brd, bvec->bv_page, len,
-					bvec->bv_offset, rw, sector);
+	bio_for_each_segment(bvec, bio, iter) {
+		unsigned int len = bvec.bv_len;
+		err = brd_do_bvec(brd, bvec.bv_page, len,
+					bvec.bv_offset, rw, sector);
 		if (err)
 			break;
 		sector += len >> SECTOR_SHIFT;
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index b35fc4f..036e8ab 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -5004,7 +5004,7 @@
 
 	i = alloc_cciss_hba(pdev);
 	if (i < 0)
-		return -1;
+		return -ENOMEM;
 
 	h = hba[i];
 	h->pdev = pdev;
@@ -5205,7 +5205,7 @@
 	 */
 	pci_set_drvdata(pdev, NULL);
 	free_hba(h);
-	return -1;
+	return -ENODEV;
 }
 
 static void cciss_shutdown(struct pci_dev *pdev)
diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index 28c73ca..a9b13f2 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -159,7 +159,7 @@
 
 	bio = bio_alloc_drbd(GFP_NOIO);
 	bio->bi_bdev = bdev->md_bdev;
-	bio->bi_sector = sector;
+	bio->bi_iter.bi_sector = sector;
 	err = -EIO;
 	if (bio_add_page(bio, page, size, 0) != size)
 		goto out;
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c
index b12c11e..597f111 100644
--- a/drivers/block/drbd/drbd_bitmap.c
+++ b/drivers/block/drbd/drbd_bitmap.c
@@ -1028,7 +1028,7 @@
 	} else
 		page = b->bm_pages[page_nr];
 	bio->bi_bdev = mdev->ldev->md_bdev;
-	bio->bi_sector = on_disk_sector;
+	bio->bi_iter.bi_sector = on_disk_sector;
 	/* bio_add_page of a single page to an empty bio will always succeed,
 	 * according to api.  Do we want to assert that? */
 	bio_add_page(bio, page, len, 0);
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 9e3818b..929468e 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -1537,15 +1537,17 @@
 
 static int _drbd_send_bio(struct drbd_conf *mdev, struct bio *bio)
 {
-	struct bio_vec *bvec;
-	int i;
+	struct bio_vec bvec;
+	struct bvec_iter iter;
+
 	/* hint all but last page with MSG_MORE */
-	bio_for_each_segment(bvec, bio, i) {
+	bio_for_each_segment(bvec, bio, iter) {
 		int err;
 
-		err = _drbd_no_send_page(mdev, bvec->bv_page,
-					 bvec->bv_offset, bvec->bv_len,
-					 i == bio->bi_vcnt - 1 ? 0 : MSG_MORE);
+		err = _drbd_no_send_page(mdev, bvec.bv_page,
+					 bvec.bv_offset, bvec.bv_len,
+					 bio_iter_last(bvec, iter)
+					 ? 0 : MSG_MORE);
 		if (err)
 			return err;
 	}
@@ -1554,15 +1556,16 @@
 
 static int _drbd_send_zc_bio(struct drbd_conf *mdev, struct bio *bio)
 {
-	struct bio_vec *bvec;
-	int i;
+	struct bio_vec bvec;
+	struct bvec_iter iter;
+
 	/* hint all but last page with MSG_MORE */
-	bio_for_each_segment(bvec, bio, i) {
+	bio_for_each_segment(bvec, bio, iter) {
 		int err;
 
-		err = _drbd_send_page(mdev, bvec->bv_page,
-				      bvec->bv_offset, bvec->bv_len,
-				      i == bio->bi_vcnt - 1 ? 0 : MSG_MORE);
+		err = _drbd_send_page(mdev, bvec.bv_page,
+				      bvec.bv_offset, bvec.bv_len,
+				      bio_iter_last(bvec, iter) ? 0 : MSG_MORE);
 		if (err)
 			return err;
 	}
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 6fa6673..d073305 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -1333,7 +1333,7 @@
 		goto fail;
 	}
 	/* > peer_req->i.sector, unless this is the first bio */
-	bio->bi_sector = sector;
+	bio->bi_iter.bi_sector = sector;
 	bio->bi_bdev = mdev->ldev->backing_bdev;
 	bio->bi_rw = rw;
 	bio->bi_private = peer_req;
@@ -1353,7 +1353,7 @@
 				dev_err(DEV,
 					"bio_add_page failed for len=%u, "
 					"bi_vcnt=0 (bi_sector=%llu)\n",
-					len, (unsigned long long)bio->bi_sector);
+					len, (uint64_t)bio->bi_iter.bi_sector);
 				err = -ENOSPC;
 				goto fail;
 			}
@@ -1595,9 +1595,10 @@
 static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req,
 			   sector_t sector, int data_size)
 {
-	struct bio_vec *bvec;
+	struct bio_vec bvec;
+	struct bvec_iter iter;
 	struct bio *bio;
-	int dgs, err, i, expect;
+	int dgs, err, expect;
 	void *dig_in = mdev->tconn->int_dig_in;
 	void *dig_vv = mdev->tconn->int_dig_vv;
 
@@ -1615,13 +1616,13 @@
 	mdev->recv_cnt += data_size>>9;
 
 	bio = req->master_bio;
-	D_ASSERT(sector == bio->bi_sector);
+	D_ASSERT(sector == bio->bi_iter.bi_sector);
 
-	bio_for_each_segment(bvec, bio, i) {
-		void *mapped = kmap(bvec->bv_page) + bvec->bv_offset;
-		expect = min_t(int, data_size, bvec->bv_len);
+	bio_for_each_segment(bvec, bio, iter) {
+		void *mapped = kmap(bvec.bv_page) + bvec.bv_offset;
+		expect = min_t(int, data_size, bvec.bv_len);
 		err = drbd_recv_all_warn(mdev->tconn, mapped, expect);
-		kunmap(bvec->bv_page);
+		kunmap(bvec.bv_page);
 		if (err)
 			return err;
 		data_size -= expect;
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index fec7bef..104a040 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -77,8 +77,8 @@
 	req->epoch       = 0;
 
 	drbd_clear_interval(&req->i);
-	req->i.sector     = bio_src->bi_sector;
-	req->i.size      = bio_src->bi_size;
+	req->i.sector     = bio_src->bi_iter.bi_sector;
+	req->i.size      = bio_src->bi_iter.bi_size;
 	req->i.local = true;
 	req->i.waiting = false;
 
@@ -1280,7 +1280,7 @@
 	/*
 	 * what we "blindly" assume:
 	 */
-	D_ASSERT(IS_ALIGNED(bio->bi_size, 512));
+	D_ASSERT(IS_ALIGNED(bio->bi_iter.bi_size, 512));
 
 	inc_ap_bio(mdev);
 	__drbd_make_request(mdev, bio, start_time);
diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h
index 978cb1a..28e15d9 100644
--- a/drivers/block/drbd/drbd_req.h
+++ b/drivers/block/drbd/drbd_req.h
@@ -269,7 +269,7 @@
 
 /* Short lived temporary struct on the stack.
  * We could squirrel the error to be returned into
- * bio->bi_size, or similar. But that would be too ugly. */
+ * bio->bi_iter.bi_size, or similar. But that would be too ugly. */
 struct bio_and_error {
 	struct bio *bio;
 	int error;
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index 891c0ec..84d3175 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -313,8 +313,8 @@
 {
 	struct hash_desc desc;
 	struct scatterlist sg;
-	struct bio_vec *bvec;
-	int i;
+	struct bio_vec bvec;
+	struct bvec_iter iter;
 
 	desc.tfm = tfm;
 	desc.flags = 0;
@@ -322,8 +322,8 @@
 	sg_init_table(&sg, 1);
 	crypto_hash_init(&desc);
 
-	bio_for_each_segment(bvec, bio, i) {
-		sg_set_page(&sg, bvec->bv_page, bvec->bv_len, bvec->bv_offset);
+	bio_for_each_segment(bvec, bio, iter) {
+		sg_set_page(&sg, bvec.bv_page, bvec.bv_len, bvec.bv_offset);
 		crypto_hash_update(&desc, &sg, sg.length);
 	}
 	crypto_hash_final(&desc, digest);
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 000abe2..2023043 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -2351,7 +2351,7 @@
 /* Compute maximal contiguous buffer size. */
 static int buffer_chain_size(void)
 {
-	struct bio_vec *bv;
+	struct bio_vec bv;
 	int size;
 	struct req_iterator iter;
 	char *base;
@@ -2360,10 +2360,10 @@
 	size = 0;
 
 	rq_for_each_segment(bv, current_req, iter) {
-		if (page_address(bv->bv_page) + bv->bv_offset != base + size)
+		if (page_address(bv.bv_page) + bv.bv_offset != base + size)
 			break;
 
-		size += bv->bv_len;
+		size += bv.bv_len;
 	}
 
 	return size >> 9;
@@ -2389,7 +2389,7 @@
 static void copy_buffer(int ssize, int max_sector, int max_sector_2)
 {
 	int remaining;		/* number of transferred 512-byte sectors */
-	struct bio_vec *bv;
+	struct bio_vec bv;
 	char *buffer;
 	char *dma_buffer;
 	int size;
@@ -2427,10 +2427,10 @@
 		if (!remaining)
 			break;
 
-		size = bv->bv_len;
+		size = bv.bv_len;
 		SUPBOUND(size, remaining);
 
-		buffer = page_address(bv->bv_page) + bv->bv_offset;
+		buffer = page_address(bv.bv_page) + bv.bv_offset;
 		if (dma_buffer + size >
 		    floppy_track_buffer + (max_buffer_sectors << 10) ||
 		    dma_buffer < floppy_track_buffer) {
@@ -3691,9 +3691,12 @@
 	if (!(mode & FMODE_NDELAY)) {
 		if (mode & (FMODE_READ|FMODE_WRITE)) {
 			UDRS->last_checked = 0;
+			clear_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags);
 			check_disk_change(bdev);
 			if (test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags))
 				goto out;
+			if (test_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags))
+				goto out;
 		}
 		res = -EROFS;
 		if ((mode & FMODE_WRITE) &&
@@ -3746,17 +3749,29 @@
  * a disk in the drive, and whether that disk is writable.
  */
 
-static void floppy_rb0_complete(struct bio *bio, int err)
+struct rb0_cbdata {
+	int drive;
+	struct completion complete;
+};
+
+static void floppy_rb0_cb(struct bio *bio, int err)
 {
-	complete((struct completion *)bio->bi_private);
+	struct rb0_cbdata *cbdata = (struct rb0_cbdata *)bio->bi_private;
+	int drive = cbdata->drive;
+
+	if (err) {
+		pr_info("floppy: error %d while reading block 0", err);
+		set_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags);
+	}
+	complete(&cbdata->complete);
 }
 
-static int __floppy_read_block_0(struct block_device *bdev)
+static int __floppy_read_block_0(struct block_device *bdev, int drive)
 {
 	struct bio bio;
 	struct bio_vec bio_vec;
-	struct completion complete;
 	struct page *page;
+	struct rb0_cbdata cbdata;
 	size_t size;
 
 	page = alloc_page(GFP_NOIO);
@@ -3769,23 +3784,26 @@
 	if (!size)
 		size = 1024;
 
+	cbdata.drive = drive;
+
 	bio_init(&bio);
 	bio.bi_io_vec = &bio_vec;
 	bio_vec.bv_page = page;
 	bio_vec.bv_len = size;
 	bio_vec.bv_offset = 0;
 	bio.bi_vcnt = 1;
-	bio.bi_size = size;
+	bio.bi_iter.bi_size = size;
 	bio.bi_bdev = bdev;
-	bio.bi_sector = 0;
+	bio.bi_iter.bi_sector = 0;
 	bio.bi_flags = (1 << BIO_QUIET);
-	init_completion(&complete);
-	bio.bi_private = &complete;
-	bio.bi_end_io = floppy_rb0_complete;
+	bio.bi_private = &cbdata;
+	bio.bi_end_io = floppy_rb0_cb;
 
 	submit_bio(READ, &bio);
 	process_fd_request();
-	wait_for_completion(&complete);
+
+	init_completion(&cbdata.complete);
+	wait_for_completion(&cbdata.complete);
 
 	__free_page(page);
 
@@ -3827,7 +3845,7 @@
 			UDRS->generation++;
 		if (drive_no_geom(drive)) {
 			/* auto-sensing */
-			res = __floppy_read_block_0(opened_bdev[drive]);
+			res = __floppy_read_block_0(opened_bdev[drive], drive);
 		} else {
 			if (cf)
 				poll_drive(false, FD_RAW_NEED_DISK);
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index c8dac73..66e8c3b 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -288,9 +288,10 @@
 {
 	int (*do_lo_send)(struct loop_device *, struct bio_vec *, loff_t,
 			struct page *page);
-	struct bio_vec *bvec;
+	struct bio_vec bvec;
+	struct bvec_iter iter;
 	struct page *page = NULL;
-	int i, ret = 0;
+	int ret = 0;
 
 	if (lo->transfer != transfer_none) {
 		page = alloc_page(GFP_NOIO | __GFP_HIGHMEM);
@@ -302,11 +303,11 @@
 		do_lo_send = do_lo_send_direct_write;
 	}
 
-	bio_for_each_segment(bvec, bio, i) {
-		ret = do_lo_send(lo, bvec, pos, page);
+	bio_for_each_segment(bvec, bio, iter) {
+		ret = do_lo_send(lo, &bvec, pos, page);
 		if (ret < 0)
 			break;
-		pos += bvec->bv_len;
+		pos += bvec.bv_len;
 	}
 	if (page) {
 		kunmap(page);
@@ -392,20 +393,20 @@
 static int
 lo_receive(struct loop_device *lo, struct bio *bio, int bsize, loff_t pos)
 {
-	struct bio_vec *bvec;
+	struct bio_vec bvec;
+	struct bvec_iter iter;
 	ssize_t s;
-	int i;
 
-	bio_for_each_segment(bvec, bio, i) {
-		s = do_lo_receive(lo, bvec, bsize, pos);
+	bio_for_each_segment(bvec, bio, iter) {
+		s = do_lo_receive(lo, &bvec, bsize, pos);
 		if (s < 0)
 			return s;
 
-		if (s != bvec->bv_len) {
+		if (s != bvec.bv_len) {
 			zero_fill_bio(bio);
 			break;
 		}
-		pos += bvec->bv_len;
+		pos += bvec.bv_len;
 	}
 	return 0;
 }
@@ -415,7 +416,7 @@
 	loff_t pos;
 	int ret;
 
-	pos = ((loff_t) bio->bi_sector << 9) + lo->lo_offset;
+	pos = ((loff_t) bio->bi_iter.bi_sector << 9) + lo->lo_offset;
 
 	if (bio_rw(bio) == WRITE) {
 		struct file *file = lo->lo_backing_file;
@@ -444,7 +445,7 @@
 				goto out;
 			}
 			ret = file->f_op->fallocate(file, mode, pos,
-						    bio->bi_size);
+						    bio->bi_iter.bi_size);
 			if (unlikely(ret && ret != -EINVAL &&
 				     ret != -EOPNOTSUPP))
 				ret = -EIO;
@@ -798,7 +799,7 @@
 
 	/*
 	 * We use punch hole to reclaim the free space used by the
-	 * image a.k.a. discard. However we do support discard if
+	 * image a.k.a. discard. However we do not support discard if
 	 * encryption is enabled, because it may give an attacker
 	 * useful information.
 	 */
diff --git a/drivers/block/mg_disk.c b/drivers/block/mg_disk.c
index 7bc363f..eb59b12 100644
--- a/drivers/block/mg_disk.c
+++ b/drivers/block/mg_disk.c
@@ -915,7 +915,7 @@
 
 	/* disk reset */
 	if (prv_data->dev_attr == MG_STORAGE_DEV) {
-		/* If POR seq. not yet finised, wait */
+		/* If POR seq. not yet finished, wait */
 		err = mg_wait_rstout(host->rstout, MG_TMAX_RSTOUT);
 		if (err)
 			goto probe_err_3b;
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 050c712..51602695 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -41,10 +41,31 @@
 #include "mtip32xx.h"
 
 #define HW_CMD_SLOT_SZ		(MTIP_MAX_COMMAND_SLOTS * 32)
-#define HW_CMD_TBL_SZ		(AHCI_CMD_TBL_HDR_SZ + (MTIP_MAX_SG * 16))
-#define HW_CMD_TBL_AR_SZ	(HW_CMD_TBL_SZ * MTIP_MAX_COMMAND_SLOTS)
-#define HW_PORT_PRIV_DMA_SZ \
-		(HW_CMD_SLOT_SZ + HW_CMD_TBL_AR_SZ + AHCI_RX_FIS_SZ)
+
+/* DMA region containing RX Fis, Identify, RLE10, and SMART buffers */
+#define AHCI_RX_FIS_SZ          0x100
+#define AHCI_RX_FIS_OFFSET      0x0
+#define AHCI_IDFY_SZ            ATA_SECT_SIZE
+#define AHCI_IDFY_OFFSET        0x400
+#define AHCI_SECTBUF_SZ         ATA_SECT_SIZE
+#define AHCI_SECTBUF_OFFSET     0x800
+#define AHCI_SMARTBUF_SZ        ATA_SECT_SIZE
+#define AHCI_SMARTBUF_OFFSET    0xC00
+/* 0x100 + 0x200 + 0x200 + 0x200 is smaller than 4k but we pad it out */
+#define BLOCK_DMA_ALLOC_SZ      4096
+
+/* DMA region containing command table (should be 8192 bytes) */
+#define AHCI_CMD_SLOT_SZ        sizeof(struct mtip_cmd_hdr)
+#define AHCI_CMD_TBL_SZ         (MTIP_MAX_COMMAND_SLOTS * AHCI_CMD_SLOT_SZ)
+#define AHCI_CMD_TBL_OFFSET     0x0
+
+/* DMA region per command (contains header and SGL) */
+#define AHCI_CMD_TBL_HDR_SZ     0x80
+#define AHCI_CMD_TBL_HDR_OFFSET 0x0
+#define AHCI_CMD_TBL_SGL_SZ     (MTIP_MAX_SG * sizeof(struct mtip_cmd_sg))
+#define AHCI_CMD_TBL_SGL_OFFSET AHCI_CMD_TBL_HDR_SZ
+#define CMD_DMA_ALLOC_SZ        (AHCI_CMD_TBL_SGL_SZ + AHCI_CMD_TBL_HDR_SZ)
+
 
 #define HOST_CAP_NZDMA		(1 << 19)
 #define HOST_HSORG		0xFC
@@ -899,8 +920,9 @@
 			fail_reason = "thermal shutdown";
 		}
 		if (buf[288] == 0xBF) {
+			set_bit(MTIP_DDF_SEC_LOCK_BIT, &dd->dd_flag);
 			dev_info(&dd->pdev->dev,
-				"Drive indicates rebuild has failed.\n");
+				"Drive indicates rebuild has failed. Secure erase required.\n");
 			fail_all_ncq_cmds = 1;
 			fail_reason = "rebuild failed";
 		}
@@ -1566,6 +1588,12 @@
 	}
 #endif
 
+	/* Check security locked state */
+	if (port->identify[128] & 0x4)
+		set_bit(MTIP_DDF_SEC_LOCK_BIT, &port->dd->dd_flag);
+	else
+		clear_bit(MTIP_DDF_SEC_LOCK_BIT, &port->dd->dd_flag);
+
 #ifdef MTIP_TRIM /* Disabling TRIM support temporarily */
 	/* Demux ID.DRAT & ID.RZAT to determine trim support */
 	if (port->identify[69] & (1 << 14) && port->identify[69] & (1 << 5))
@@ -1887,6 +1915,10 @@
 	strlcpy(cbuf, (char *)(port->identify+27), 41);
 	dev_info(&port->dd->pdev->dev, "Model: %s\n", cbuf);
 
+	dev_info(&port->dd->pdev->dev, "Security: %04x %s\n",
+		port->identify[128],
+		port->identify[128] & 0x4 ? "(LOCKED)" : "");
+
 	if (mtip_hw_get_capacity(port->dd, &sectors))
 		dev_info(&port->dd->pdev->dev,
 			"Capacity: %llu sectors (%llu MB)\n",
@@ -3313,6 +3345,118 @@
 }
 
 /*
+ * DMA region teardown
+ *
+ * @dd Pointer to driver_data structure
+ *
+ * return value
+ *      None
+ */
+static void mtip_dma_free(struct driver_data *dd)
+{
+	int i;
+	struct mtip_port *port = dd->port;
+
+	if (port->block1)
+		dmam_free_coherent(&dd->pdev->dev, BLOCK_DMA_ALLOC_SZ,
+					port->block1, port->block1_dma);
+
+	if (port->command_list) {
+		dmam_free_coherent(&dd->pdev->dev, AHCI_CMD_TBL_SZ,
+				port->command_list, port->command_list_dma);
+	}
+
+	for (i = 0; i < MTIP_MAX_COMMAND_SLOTS; i++) {
+		if (port->commands[i].command)
+			dmam_free_coherent(&dd->pdev->dev, CMD_DMA_ALLOC_SZ,
+				port->commands[i].command,
+				port->commands[i].command_dma);
+	}
+}
+
+/*
+ * DMA region setup
+ *
+ * @dd Pointer to driver_data structure
+ *
+ * return value
+ *      -ENOMEM Not enough free DMA region space to initialize driver
+ */
+static int mtip_dma_alloc(struct driver_data *dd)
+{
+	struct mtip_port *port = dd->port;
+	int i, rv = 0;
+	u32 host_cap_64 = readl(dd->mmio + HOST_CAP) & HOST_CAP_64;
+
+	/* Allocate dma memory for RX Fis, Identify, and Sector Bufffer */
+	port->block1 =
+		dmam_alloc_coherent(&dd->pdev->dev, BLOCK_DMA_ALLOC_SZ,
+					&port->block1_dma, GFP_KERNEL);
+	if (!port->block1)
+		return -ENOMEM;
+	memset(port->block1, 0, BLOCK_DMA_ALLOC_SZ);
+
+	/* Allocate dma memory for command list */
+	port->command_list =
+		dmam_alloc_coherent(&dd->pdev->dev, AHCI_CMD_TBL_SZ,
+					&port->command_list_dma, GFP_KERNEL);
+	if (!port->command_list) {
+		dmam_free_coherent(&dd->pdev->dev, BLOCK_DMA_ALLOC_SZ,
+					port->block1, port->block1_dma);
+		port->block1 = NULL;
+		port->block1_dma = 0;
+		return -ENOMEM;
+	}
+	memset(port->command_list, 0, AHCI_CMD_TBL_SZ);
+
+	/* Setup all pointers into first DMA region */
+	port->rxfis         = port->block1 + AHCI_RX_FIS_OFFSET;
+	port->rxfis_dma     = port->block1_dma + AHCI_RX_FIS_OFFSET;
+	port->identify      = port->block1 + AHCI_IDFY_OFFSET;
+	port->identify_dma  = port->block1_dma + AHCI_IDFY_OFFSET;
+	port->log_buf       = port->block1 + AHCI_SECTBUF_OFFSET;
+	port->log_buf_dma   = port->block1_dma + AHCI_SECTBUF_OFFSET;
+	port->smart_buf     = port->block1 + AHCI_SMARTBUF_OFFSET;
+	port->smart_buf_dma = port->block1_dma + AHCI_SMARTBUF_OFFSET;
+
+	/* Setup per command SGL DMA region */
+
+	/* Point the command headers at the command tables */
+	for (i = 0; i < MTIP_MAX_COMMAND_SLOTS; i++) {
+		port->commands[i].command =
+			dmam_alloc_coherent(&dd->pdev->dev, CMD_DMA_ALLOC_SZ,
+				&port->commands[i].command_dma, GFP_KERNEL);
+		if (!port->commands[i].command) {
+			rv = -ENOMEM;
+			mtip_dma_free(dd);
+			return rv;
+		}
+		memset(port->commands[i].command, 0, CMD_DMA_ALLOC_SZ);
+
+		port->commands[i].command_header = port->command_list +
+					(sizeof(struct mtip_cmd_hdr) * i);
+		port->commands[i].command_header_dma =
+					dd->port->command_list_dma +
+					(sizeof(struct mtip_cmd_hdr) * i);
+
+		if (host_cap_64)
+			port->commands[i].command_header->ctbau =
+				__force_bit2int cpu_to_le32(
+				(port->commands[i].command_dma >> 16) >> 16);
+
+		port->commands[i].command_header->ctba =
+				__force_bit2int cpu_to_le32(
+				port->commands[i].command_dma & 0xFFFFFFFF);
+
+		sg_init_table(port->commands[i].sg, MTIP_MAX_SG);
+
+		/* Mark command as currently inactive */
+		atomic_set(&dd->port->commands[i].active, 0);
+	}
+	return 0;
+}
+
+/*
  * Called once for each card.
  *
  * @dd Pointer to the driver data structure.
@@ -3370,83 +3514,10 @@
 	dd->port->mmio	= dd->mmio + PORT_OFFSET;
 	dd->port->dd	= dd;
 
-	/* Allocate memory for the command list. */
-	dd->port->command_list =
-		dmam_alloc_coherent(&dd->pdev->dev,
-			HW_PORT_PRIV_DMA_SZ + (ATA_SECT_SIZE * 4),
-			&dd->port->command_list_dma,
-			GFP_KERNEL);
-	if (!dd->port->command_list) {
-		dev_err(&dd->pdev->dev,
-			"Memory allocation: command list\n");
-		rv = -ENOMEM;
+	/* DMA allocations */
+	rv = mtip_dma_alloc(dd);
+	if (rv < 0)
 		goto out1;
-	}
-
-	/* Clear the memory we have allocated. */
-	memset(dd->port->command_list,
-		0,
-		HW_PORT_PRIV_DMA_SZ + (ATA_SECT_SIZE * 4));
-
-	/* Setup the addresse of the RX FIS. */
-	dd->port->rxfis	    = dd->port->command_list + HW_CMD_SLOT_SZ;
-	dd->port->rxfis_dma = dd->port->command_list_dma + HW_CMD_SLOT_SZ;
-
-	/* Setup the address of the command tables. */
-	dd->port->command_table	  = dd->port->rxfis + AHCI_RX_FIS_SZ;
-	dd->port->command_tbl_dma = dd->port->rxfis_dma + AHCI_RX_FIS_SZ;
-
-	/* Setup the address of the identify data. */
-	dd->port->identify     = dd->port->command_table +
-					HW_CMD_TBL_AR_SZ;
-	dd->port->identify_dma = dd->port->command_tbl_dma +
-					HW_CMD_TBL_AR_SZ;
-
-	/* Setup the address of the sector buffer - for some non-ncq cmds */
-	dd->port->sector_buffer	= (void *) dd->port->identify + ATA_SECT_SIZE;
-	dd->port->sector_buffer_dma = dd->port->identify_dma + ATA_SECT_SIZE;
-
-	/* Setup the address of the log buf - for read log command */
-	dd->port->log_buf = (void *)dd->port->sector_buffer  + ATA_SECT_SIZE;
-	dd->port->log_buf_dma = dd->port->sector_buffer_dma + ATA_SECT_SIZE;
-
-	/* Setup the address of the smart buf - for smart read data command */
-	dd->port->smart_buf = (void *)dd->port->log_buf  + ATA_SECT_SIZE;
-	dd->port->smart_buf_dma = dd->port->log_buf_dma + ATA_SECT_SIZE;
-
-
-	/* Point the command headers at the command tables. */
-	for (i = 0; i < num_command_slots; i++) {
-		dd->port->commands[i].command_header =
-					dd->port->command_list +
-					(sizeof(struct mtip_cmd_hdr) * i);
-		dd->port->commands[i].command_header_dma =
-					dd->port->command_list_dma +
-					(sizeof(struct mtip_cmd_hdr) * i);
-
-		dd->port->commands[i].command =
-			dd->port->command_table + (HW_CMD_TBL_SZ * i);
-		dd->port->commands[i].command_dma =
-			dd->port->command_tbl_dma + (HW_CMD_TBL_SZ * i);
-
-		if (readl(dd->mmio + HOST_CAP) & HOST_CAP_64)
-			dd->port->commands[i].command_header->ctbau =
-			__force_bit2int cpu_to_le32(
-			(dd->port->commands[i].command_dma >> 16) >> 16);
-		dd->port->commands[i].command_header->ctba =
-			__force_bit2int cpu_to_le32(
-			dd->port->commands[i].command_dma & 0xFFFFFFFF);
-
-		/*
-		 * If this is not done, a bug is reported by the stock
-		 * FC11 i386. Due to the fact that it has lots of kernel
-		 * debugging enabled.
-		 */
-		sg_init_table(dd->port->commands[i].sg, MTIP_MAX_SG);
-
-		/* Mark all commands as currently inactive.*/
-		atomic_set(&dd->port->commands[i].active, 0);
-	}
 
 	/* Setup the pointers to the extended s_active and CI registers. */
 	for (i = 0; i < dd->slot_groups; i++) {
@@ -3594,12 +3665,8 @@
 
 out2:
 	mtip_deinit_port(dd->port);
+	mtip_dma_free(dd);
 
-	/* Free the command/command header memory. */
-	dmam_free_coherent(&dd->pdev->dev,
-				HW_PORT_PRIV_DMA_SZ + (ATA_SECT_SIZE * 4),
-				dd->port->command_list,
-				dd->port->command_list_dma);
 out1:
 	/* Free the memory allocated for the for structure. */
 	kfree(dd->port);
@@ -3622,7 +3689,8 @@
 	 * saves its state.
 	 */
 	if (!dd->sr) {
-		if (!test_bit(MTIP_DDF_REBUILD_FAILED_BIT, &dd->dd_flag))
+		if (!test_bit(MTIP_PF_REBUILD_BIT, &dd->port->flags) &&
+		    !test_bit(MTIP_DDF_SEC_LOCK_BIT, &dd->dd_flag))
 			if (mtip_standby_immediate(dd->port))
 				dev_warn(&dd->pdev->dev,
 					"STANDBY IMMEDIATE failed\n");
@@ -3641,11 +3709,9 @@
 	irq_set_affinity_hint(dd->pdev->irq, NULL);
 	devm_free_irq(&dd->pdev->dev, dd->pdev->irq, dd);
 
-	/* Free the command/command header memory. */
-	dmam_free_coherent(&dd->pdev->dev,
-			HW_PORT_PRIV_DMA_SZ + (ATA_SECT_SIZE * 4),
-			dd->port->command_list,
-			dd->port->command_list_dma);
+	/* Free dma regions */
+	mtip_dma_free(dd);
+
 	/* Free the memory allocated for the for structure. */
 	kfree(dd->port);
 	dd->port = NULL;
@@ -3962,8 +4028,9 @@
 {
 	struct driver_data *dd = queue->queuedata;
 	struct scatterlist *sg;
-	struct bio_vec *bvec;
-	int i, nents = 0;
+	struct bio_vec bvec;
+	struct bvec_iter iter;
+	int nents = 0;
 	int tag = 0, unaligned = 0;
 
 	if (unlikely(dd->dd_flag & MTIP_DDF_STOP_IO)) {
@@ -3993,7 +4060,7 @@
 	}
 
 	if (unlikely(bio->bi_rw & REQ_DISCARD)) {
-		bio_endio(bio, mtip_send_trim(dd, bio->bi_sector,
+		bio_endio(bio, mtip_send_trim(dd, bio->bi_iter.bi_sector,
 						bio_sectors(bio)));
 		return;
 	}
@@ -4006,7 +4073,8 @@
 
 	if (bio_data_dir(bio) == WRITE && bio_sectors(bio) <= 64 &&
 							dd->unal_qdepth) {
-		if (bio->bi_sector % 8 != 0) /* Unaligned on 4k boundaries */
+		if (bio->bi_iter.bi_sector % 8 != 0)
+			/* Unaligned on 4k boundaries */
 			unaligned = 1;
 		else if (bio_sectors(bio) % 8 != 0) /* Aligned but not 4k/8k */
 			unaligned = 1;
@@ -4025,17 +4093,17 @@
 		}
 
 		/* Create the scatter list for this bio. */
-		bio_for_each_segment(bvec, bio, i) {
+		bio_for_each_segment(bvec, bio, iter) {
 			sg_set_page(&sg[nents],
-					bvec->bv_page,
-					bvec->bv_len,
-					bvec->bv_offset);
+					bvec.bv_page,
+					bvec.bv_len,
+					bvec.bv_offset);
 			nents++;
 		}
 
 		/* Issue the read/write. */
 		mtip_hw_submit_io(dd,
-				bio->bi_sector,
+				bio->bi_iter.bi_sector,
 				bio_sectors(bio),
 				nents,
 				tag,
diff --git a/drivers/block/mtip32xx/mtip32xx.h b/drivers/block/mtip32xx/mtip32xx.h
index 9be7a15..b52e9a6 100644
--- a/drivers/block/mtip32xx/mtip32xx.h
+++ b/drivers/block/mtip32xx/mtip32xx.h
@@ -69,7 +69,7 @@
  * Maximum number of scatter gather entries
  * a single command may have.
  */
-#define MTIP_MAX_SG		128
+#define MTIP_MAX_SG		504
 
 /*
  * Maximum number of slot groups (Command Issue & s_active registers)
@@ -92,7 +92,7 @@
 
 /* Driver name and version strings */
 #define MTIP_DRV_NAME		"mtip32xx"
-#define MTIP_DRV_VERSION	"1.2.6os3"
+#define MTIP_DRV_VERSION	"1.3.0"
 
 /* Maximum number of minor device numbers per device. */
 #define MTIP_MAX_MINORS		16
@@ -391,15 +391,13 @@
 	 */
 	dma_addr_t rxfis_dma;
 	/*
-	 * Pointer to the beginning of the command table memory as used
-	 * by the driver.
+	 * Pointer to the DMA region for RX Fis, Identify, RLE10, and SMART
 	 */
-	void *command_table;
+	void *block1;
 	/*
-	 * Pointer to the beginning of the command table memory as used
-	 * by the DMA.
+	 * DMA address of region for RX Fis, Identify, RLE10, and SMART
 	 */
-	dma_addr_t command_tbl_dma;
+	dma_addr_t block1_dma;
 	/*
 	 * Pointer to the beginning of the identify data memory as used
 	 * by the driver.
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 2dc3b51..55298db 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -271,18 +271,18 @@
 
 	if (nbd_cmd(req) == NBD_CMD_WRITE) {
 		struct req_iterator iter;
-		struct bio_vec *bvec;
+		struct bio_vec bvec;
 		/*
 		 * we are really probing at internals to determine
 		 * whether to set MSG_MORE or not...
 		 */
 		rq_for_each_segment(bvec, req, iter) {
 			flags = 0;
-			if (!rq_iter_last(req, iter))
+			if (!rq_iter_last(bvec, iter))
 				flags = MSG_MORE;
 			dprintk(DBG_TX, "%s: request %p: sending %d bytes data\n",
-					nbd->disk->disk_name, req, bvec->bv_len);
-			result = sock_send_bvec(nbd, bvec, flags);
+					nbd->disk->disk_name, req, bvec.bv_len);
+			result = sock_send_bvec(nbd, &bvec, flags);
 			if (result <= 0) {
 				dev_err(disk_to_dev(nbd->disk),
 					"Send data failed (result %d)\n",
@@ -378,10 +378,10 @@
 			nbd->disk->disk_name, req);
 	if (nbd_cmd(req) == NBD_CMD_READ) {
 		struct req_iterator iter;
-		struct bio_vec *bvec;
+		struct bio_vec bvec;
 
 		rq_for_each_segment(bvec, req, iter) {
-			result = sock_recv_bvec(nbd, bvec);
+			result = sock_recv_bvec(nbd, &bvec);
 			if (result <= 0) {
 				dev_err(disk_to_dev(nbd->disk), "Receive data failed (result %d)\n",
 					result);
@@ -389,7 +389,7 @@
 				return req;
 			}
 			dprintk(DBG_RX, "%s: request %p: got %d bytes data\n",
-				nbd->disk->disk_name, req, bvec->bv_len);
+				nbd->disk->disk_name, req, bvec.bv_len);
 		}
 	}
 	return req;
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index 83a598e..3107282 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -616,6 +616,11 @@
 		irqmode = NULL_IRQ_NONE;
 	}
 #endif
+	if (bs > PAGE_SIZE) {
+		pr_warn("null_blk: invalid block size\n");
+		pr_warn("null_blk: defaults block size to %lu\n", PAGE_SIZE);
+		bs = PAGE_SIZE;
+	}
 
 	if (queue_mode == NULL_Q_MQ && use_per_node_hctx) {
 		if (submit_queues < nr_online_nodes) {
diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index 26d03fa..1f14ac4 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -441,104 +441,19 @@
 	return total_len;
 }
 
-struct nvme_bio_pair {
-	struct bio b1, b2, *parent;
-	struct bio_vec *bv1, *bv2;
-	int err;
-	atomic_t cnt;
-};
-
-static void nvme_bio_pair_endio(struct bio *bio, int err)
-{
-	struct nvme_bio_pair *bp = bio->bi_private;
-
-	if (err)
-		bp->err = err;
-
-	if (atomic_dec_and_test(&bp->cnt)) {
-		bio_endio(bp->parent, bp->err);
-		kfree(bp->bv1);
-		kfree(bp->bv2);
-		kfree(bp);
-	}
-}
-
-static struct nvme_bio_pair *nvme_bio_split(struct bio *bio, int idx,
-							int len, int offset)
-{
-	struct nvme_bio_pair *bp;
-
-	BUG_ON(len > bio->bi_size);
-	BUG_ON(idx > bio->bi_vcnt);
-
-	bp = kmalloc(sizeof(*bp), GFP_ATOMIC);
-	if (!bp)
-		return NULL;
-	bp->err = 0;
-
-	bp->b1 = *bio;
-	bp->b2 = *bio;
-
-	bp->b1.bi_size = len;
-	bp->b2.bi_size -= len;
-	bp->b1.bi_vcnt = idx;
-	bp->b2.bi_idx = idx;
-	bp->b2.bi_sector += len >> 9;
-
-	if (offset) {
-		bp->bv1 = kmalloc(bio->bi_max_vecs * sizeof(struct bio_vec),
-								GFP_ATOMIC);
-		if (!bp->bv1)
-			goto split_fail_1;
-
-		bp->bv2 = kmalloc(bio->bi_max_vecs * sizeof(struct bio_vec),
-								GFP_ATOMIC);
-		if (!bp->bv2)
-			goto split_fail_2;
-
-		memcpy(bp->bv1, bio->bi_io_vec,
-			bio->bi_max_vecs * sizeof(struct bio_vec));
-		memcpy(bp->bv2, bio->bi_io_vec,
-			bio->bi_max_vecs * sizeof(struct bio_vec));
-
-		bp->b1.bi_io_vec = bp->bv1;
-		bp->b2.bi_io_vec = bp->bv2;
-		bp->b2.bi_io_vec[idx].bv_offset += offset;
-		bp->b2.bi_io_vec[idx].bv_len -= offset;
-		bp->b1.bi_io_vec[idx].bv_len = offset;
-		bp->b1.bi_vcnt++;
-	} else
-		bp->bv1 = bp->bv2 = NULL;
-
-	bp->b1.bi_private = bp;
-	bp->b2.bi_private = bp;
-
-	bp->b1.bi_end_io = nvme_bio_pair_endio;
-	bp->b2.bi_end_io = nvme_bio_pair_endio;
-
-	bp->parent = bio;
-	atomic_set(&bp->cnt, 2);
-
-	return bp;
-
- split_fail_2:
-	kfree(bp->bv1);
- split_fail_1:
-	kfree(bp);
-	return NULL;
-}
-
 static int nvme_split_and_submit(struct bio *bio, struct nvme_queue *nvmeq,
-						int idx, int len, int offset)
+				 int len)
 {
-	struct nvme_bio_pair *bp = nvme_bio_split(bio, idx, len, offset);
-	if (!bp)
+	struct bio *split = bio_split(bio, len >> 9, GFP_ATOMIC, NULL);
+	if (!split)
 		return -ENOMEM;
 
+	bio_chain(split, bio);
+
 	if (bio_list_empty(&nvmeq->sq_cong))
 		add_wait_queue(&nvmeq->sq_full, &nvmeq->sq_cong_wait);
-	bio_list_add(&nvmeq->sq_cong, &bp->b1);
-	bio_list_add(&nvmeq->sq_cong, &bp->b2);
+	bio_list_add(&nvmeq->sq_cong, split);
+	bio_list_add(&nvmeq->sq_cong, bio);
 
 	return 0;
 }
@@ -550,41 +465,44 @@
 static int nvme_map_bio(struct nvme_queue *nvmeq, struct nvme_iod *iod,
 		struct bio *bio, enum dma_data_direction dma_dir, int psegs)
 {
-	struct bio_vec *bvec, *bvprv = NULL;
+	struct bio_vec bvec, bvprv;
+	struct bvec_iter iter;
 	struct scatterlist *sg = NULL;
-	int i, length = 0, nsegs = 0, split_len = bio->bi_size;
+	int length = 0, nsegs = 0, split_len = bio->bi_iter.bi_size;
+	int first = 1;
 
 	if (nvmeq->dev->stripe_size)
 		split_len = nvmeq->dev->stripe_size -
-			((bio->bi_sector << 9) & (nvmeq->dev->stripe_size - 1));
+			((bio->bi_iter.bi_sector << 9) &
+			 (nvmeq->dev->stripe_size - 1));
 
 	sg_init_table(iod->sg, psegs);
-	bio_for_each_segment(bvec, bio, i) {
-		if (bvprv && BIOVEC_PHYS_MERGEABLE(bvprv, bvec)) {
-			sg->length += bvec->bv_len;
+	bio_for_each_segment(bvec, bio, iter) {
+		if (!first && BIOVEC_PHYS_MERGEABLE(&bvprv, &bvec)) {
+			sg->length += bvec.bv_len;
 		} else {
-			if (bvprv && BIOVEC_NOT_VIRT_MERGEABLE(bvprv, bvec))
-				return nvme_split_and_submit(bio, nvmeq, i,
-								length, 0);
+			if (!first && BIOVEC_NOT_VIRT_MERGEABLE(&bvprv, &bvec))
+				return nvme_split_and_submit(bio, nvmeq,
+							     length);
 
 			sg = sg ? sg + 1 : iod->sg;
-			sg_set_page(sg, bvec->bv_page, bvec->bv_len,
-							bvec->bv_offset);
+			sg_set_page(sg, bvec.bv_page,
+				    bvec.bv_len, bvec.bv_offset);
 			nsegs++;
 		}
 
-		if (split_len - length < bvec->bv_len)
-			return nvme_split_and_submit(bio, nvmeq, i, split_len,
-							split_len - length);
-		length += bvec->bv_len;
+		if (split_len - length < bvec.bv_len)
+			return nvme_split_and_submit(bio, nvmeq, split_len);
+		length += bvec.bv_len;
 		bvprv = bvec;
+		first = 0;
 	}
 	iod->nents = nsegs;
 	sg_mark_end(sg);
 	if (dma_map_sg(nvmeq->q_dmadev, iod->sg, iod->nents, dma_dir) == 0)
 		return -ENOMEM;
 
-	BUG_ON(length != bio->bi_size);
+	BUG_ON(length != bio->bi_iter.bi_size);
 	return length;
 }
 
@@ -608,8 +526,8 @@
 	iod->npages = 0;
 
 	range->cattr = cpu_to_le32(0);
-	range->nlb = cpu_to_le32(bio->bi_size >> ns->lba_shift);
-	range->slba = cpu_to_le64(nvme_block_nr(ns, bio->bi_sector));
+	range->nlb = cpu_to_le32(bio->bi_iter.bi_size >> ns->lba_shift);
+	range->slba = cpu_to_le64(nvme_block_nr(ns, bio->bi_iter.bi_sector));
 
 	memset(cmnd, 0, sizeof(*cmnd));
 	cmnd->dsm.opcode = nvme_cmd_dsm;
@@ -674,7 +592,7 @@
 	}
 
 	result = -ENOMEM;
-	iod = nvme_alloc_iod(psegs, bio->bi_size, GFP_ATOMIC);
+	iod = nvme_alloc_iod(psegs, bio->bi_iter.bi_size, GFP_ATOMIC);
 	if (!iod)
 		goto nomem;
 	iod->private = bio;
@@ -723,7 +641,7 @@
 	cmnd->rw.nsid = cpu_to_le32(ns->ns_id);
 	length = nvme_setup_prps(nvmeq->dev, &cmnd->common, iod, length,
 								GFP_ATOMIC);
-	cmnd->rw.slba = cpu_to_le64(nvme_block_nr(ns, bio->bi_sector));
+	cmnd->rw.slba = cpu_to_le64(nvme_block_nr(ns, bio->bi_iter.bi_sector));
 	cmnd->rw.length = cpu_to_le16((length >> ns->lba_shift) - 1);
 	cmnd->rw.control = cpu_to_le16(control);
 	cmnd->rw.dsmgmt = cpu_to_le32(dsmgmt);
diff --git a/drivers/block/paride/pg.c b/drivers/block/paride/pg.c
index 4a27b1d..2ce3dfd 100644
--- a/drivers/block/paride/pg.c
+++ b/drivers/block/paride/pg.c
@@ -581,7 +581,7 @@
 
 	if (hdr.magic != PG_MAGIC)
 		return -EINVAL;
-	if (hdr.dlen > PG_MAX_DATA)
+	if (hdr.dlen < 0 || hdr.dlen > PG_MAX_DATA)
 		return -EINVAL;
 	if ((count - hs) > PG_MAX_DATA)
 		return -EINVAL;
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index ff8668c..a2af73d 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -651,7 +651,7 @@
 
 	for (;;) {
 		tmp = rb_entry(n, struct pkt_rb_node, rb_node);
-		if (s <= tmp->bio->bi_sector)
+		if (s <= tmp->bio->bi_iter.bi_sector)
 			next = n->rb_left;
 		else
 			next = n->rb_right;
@@ -660,12 +660,12 @@
 		n = next;
 	}
 
-	if (s > tmp->bio->bi_sector) {
+	if (s > tmp->bio->bi_iter.bi_sector) {
 		tmp = pkt_rbtree_next(tmp);
 		if (!tmp)
 			return NULL;
 	}
-	BUG_ON(s > tmp->bio->bi_sector);
+	BUG_ON(s > tmp->bio->bi_iter.bi_sector);
 	return tmp;
 }
 
@@ -676,13 +676,13 @@
 {
 	struct rb_node **p = &pd->bio_queue.rb_node;
 	struct rb_node *parent = NULL;
-	sector_t s = node->bio->bi_sector;
+	sector_t s = node->bio->bi_iter.bi_sector;
 	struct pkt_rb_node *tmp;
 
 	while (*p) {
 		parent = *p;
 		tmp = rb_entry(parent, struct pkt_rb_node, rb_node);
-		if (s < tmp->bio->bi_sector)
+		if (s < tmp->bio->bi_iter.bi_sector)
 			p = &(*p)->rb_left;
 		else
 			p = &(*p)->rb_right;
@@ -706,7 +706,9 @@
 			     WRITE : READ, __GFP_WAIT);
 
 	if (cgc->buflen) {
-		if (blk_rq_map_kern(q, rq, cgc->buffer, cgc->buflen, __GFP_WAIT))
+		ret = blk_rq_map_kern(q, rq, cgc->buffer, cgc->buflen,
+				      __GFP_WAIT);
+		if (ret)
 			goto out;
 	}
 
@@ -857,7 +859,8 @@
 			spin_lock(&pd->iosched.lock);
 			bio = bio_list_peek(&pd->iosched.write_queue);
 			spin_unlock(&pd->iosched.lock);
-			if (bio && (bio->bi_sector == pd->iosched.last_write))
+			if (bio && (bio->bi_iter.bi_sector ==
+				    pd->iosched.last_write))
 				need_write_seek = 0;
 			if (need_write_seek && reads_queued) {
 				if (atomic_read(&pd->cdrw.pending_bios) > 0) {
@@ -888,7 +891,8 @@
 			continue;
 
 		if (bio_data_dir(bio) == READ)
-			pd->iosched.successive_reads += bio->bi_size >> 10;
+			pd->iosched.successive_reads +=
+				bio->bi_iter.bi_size >> 10;
 		else {
 			pd->iosched.successive_reads = 0;
 			pd->iosched.last_write = bio_end_sector(bio);
@@ -978,7 +982,7 @@
 
 	pkt_dbg(2, pd, "bio=%p sec0=%llx sec=%llx err=%d\n",
 		bio, (unsigned long long)pkt->sector,
-		(unsigned long long)bio->bi_sector, err);
+		(unsigned long long)bio->bi_iter.bi_sector, err);
 
 	if (err)
 		atomic_inc(&pkt->io_errors);
@@ -1026,8 +1030,9 @@
 	memset(written, 0, sizeof(written));
 	spin_lock(&pkt->lock);
 	bio_list_for_each(bio, &pkt->orig_bios) {
-		int first_frame = (bio->bi_sector - pkt->sector) / (CD_FRAMESIZE >> 9);
-		int num_frames = bio->bi_size / CD_FRAMESIZE;
+		int first_frame = (bio->bi_iter.bi_sector - pkt->sector) /
+			(CD_FRAMESIZE >> 9);
+		int num_frames = bio->bi_iter.bi_size / CD_FRAMESIZE;
 		pd->stats.secs_w += num_frames * (CD_FRAMESIZE >> 9);
 		BUG_ON(first_frame < 0);
 		BUG_ON(first_frame + num_frames > pkt->frames);
@@ -1053,7 +1058,7 @@
 
 		bio = pkt->r_bios[f];
 		bio_reset(bio);
-		bio->bi_sector = pkt->sector + f * (CD_FRAMESIZE >> 9);
+		bio->bi_iter.bi_sector = pkt->sector + f * (CD_FRAMESIZE >> 9);
 		bio->bi_bdev = pd->bdev;
 		bio->bi_end_io = pkt_end_io_read;
 		bio->bi_private = pkt;
@@ -1150,8 +1155,8 @@
 	bio_reset(pkt->bio);
 	pkt->bio->bi_bdev = pd->bdev;
 	pkt->bio->bi_rw = REQ_WRITE;
-	pkt->bio->bi_sector = new_sector;
-	pkt->bio->bi_size = pkt->frames * CD_FRAMESIZE;
+	pkt->bio->bi_iter.bi_sector = new_sector;
+	pkt->bio->bi_iter.bi_size = pkt->frames * CD_FRAMESIZE;
 	pkt->bio->bi_vcnt = pkt->frames;
 
 	pkt->bio->bi_end_io = pkt_end_io_packet_write;
@@ -1213,7 +1218,7 @@
 	node = first_node;
 	while (node) {
 		bio = node->bio;
-		zone = get_zone(bio->bi_sector, pd);
+		zone = get_zone(bio->bi_iter.bi_sector, pd);
 		list_for_each_entry(p, &pd->cdrw.pkt_active_list, list) {
 			if (p->sector == zone) {
 				bio = NULL;
@@ -1252,14 +1257,14 @@
 	pkt_dbg(2, pd, "looking for zone %llx\n", (unsigned long long)zone);
 	while ((node = pkt_rbtree_find(pd, zone)) != NULL) {
 		bio = node->bio;
-		pkt_dbg(2, pd, "found zone=%llx\n",
-			(unsigned long long)get_zone(bio->bi_sector, pd));
-		if (get_zone(bio->bi_sector, pd) != zone)
+		pkt_dbg(2, pd, "found zone=%llx\n", (unsigned long long)
+			get_zone(bio->bi_iter.bi_sector, pd));
+		if (get_zone(bio->bi_iter.bi_sector, pd) != zone)
 			break;
 		pkt_rbtree_erase(pd, node);
 		spin_lock(&pkt->lock);
 		bio_list_add(&pkt->orig_bios, bio);
-		pkt->write_size += bio->bi_size / CD_FRAMESIZE;
+		pkt->write_size += bio->bi_iter.bi_size / CD_FRAMESIZE;
 		spin_unlock(&pkt->lock);
 	}
 	/* check write congestion marks, and if bio_queue_size is
@@ -1293,7 +1298,7 @@
 	struct bio_vec *bvec = pkt->w_bio->bi_io_vec;
 
 	bio_reset(pkt->w_bio);
-	pkt->w_bio->bi_sector = pkt->sector;
+	pkt->w_bio->bi_iter.bi_sector = pkt->sector;
 	pkt->w_bio->bi_bdev = pd->bdev;
 	pkt->w_bio->bi_end_io = pkt_end_io_packet_write;
 	pkt->w_bio->bi_private = pkt;
@@ -2335,75 +2340,29 @@
 	pkt_bio_finished(pd);
 }
 
-static void pkt_make_request(struct request_queue *q, struct bio *bio)
+static void pkt_make_request_read(struct pktcdvd_device *pd, struct bio *bio)
 {
-	struct pktcdvd_device *pd;
-	char b[BDEVNAME_SIZE];
+	struct bio *cloned_bio = bio_clone(bio, GFP_NOIO);
+	struct packet_stacked_data *psd = mempool_alloc(psd_pool, GFP_NOIO);
+
+	psd->pd = pd;
+	psd->bio = bio;
+	cloned_bio->bi_bdev = pd->bdev;
+	cloned_bio->bi_private = psd;
+	cloned_bio->bi_end_io = pkt_end_io_read_cloned;
+	pd->stats.secs_r += bio_sectors(bio);
+	pkt_queue_bio(pd, cloned_bio);
+}
+
+static void pkt_make_request_write(struct request_queue *q, struct bio *bio)
+{
+	struct pktcdvd_device *pd = q->queuedata;
 	sector_t zone;
 	struct packet_data *pkt;
 	int was_empty, blocked_bio;
 	struct pkt_rb_node *node;
 
-	pd = q->queuedata;
-	if (!pd) {
-		pr_err("%s incorrect request queue\n",
-		       bdevname(bio->bi_bdev, b));
-		goto end_io;
-	}
-
-	/*
-	 * Clone READ bios so we can have our own bi_end_io callback.
-	 */
-	if (bio_data_dir(bio) == READ) {
-		struct bio *cloned_bio = bio_clone(bio, GFP_NOIO);
-		struct packet_stacked_data *psd = mempool_alloc(psd_pool, GFP_NOIO);
-
-		psd->pd = pd;
-		psd->bio = bio;
-		cloned_bio->bi_bdev = pd->bdev;
-		cloned_bio->bi_private = psd;
-		cloned_bio->bi_end_io = pkt_end_io_read_cloned;
-		pd->stats.secs_r += bio_sectors(bio);
-		pkt_queue_bio(pd, cloned_bio);
-		return;
-	}
-
-	if (!test_bit(PACKET_WRITABLE, &pd->flags)) {
-		pkt_notice(pd, "WRITE for ro device (%llu)\n",
-			   (unsigned long long)bio->bi_sector);
-		goto end_io;
-	}
-
-	if (!bio->bi_size || (bio->bi_size % CD_FRAMESIZE)) {
-		pkt_err(pd, "wrong bio size\n");
-		goto end_io;
-	}
-
-	blk_queue_bounce(q, &bio);
-
-	zone = get_zone(bio->bi_sector, pd);
-	pkt_dbg(2, pd, "start = %6llx stop = %6llx\n",
-		(unsigned long long)bio->bi_sector,
-		(unsigned long long)bio_end_sector(bio));
-
-	/* Check if we have to split the bio */
-	{
-		struct bio_pair *bp;
-		sector_t last_zone;
-		int first_sectors;
-
-		last_zone = get_zone(bio_end_sector(bio) - 1, pd);
-		if (last_zone != zone) {
-			BUG_ON(last_zone != zone + pd->settings.size);
-			first_sectors = last_zone - bio->bi_sector;
-			bp = bio_split(bio, first_sectors);
-			BUG_ON(!bp);
-			pkt_make_request(q, &bp->bio1);
-			pkt_make_request(q, &bp->bio2);
-			bio_pair_release(bp);
-			return;
-		}
-	}
+	zone = get_zone(bio->bi_iter.bi_sector, pd);
 
 	/*
 	 * If we find a matching packet in state WAITING or READ_WAIT, we can
@@ -2417,7 +2376,8 @@
 			if ((pkt->state == PACKET_WAITING_STATE) ||
 			    (pkt->state == PACKET_READ_WAIT_STATE)) {
 				bio_list_add(&pkt->orig_bios, bio);
-				pkt->write_size += bio->bi_size / CD_FRAMESIZE;
+				pkt->write_size +=
+					bio->bi_iter.bi_size / CD_FRAMESIZE;
 				if ((pkt->write_size >= pkt->frames) &&
 				    (pkt->state == PACKET_WAITING_STATE)) {
 					atomic_inc(&pkt->run_sm);
@@ -2476,6 +2436,64 @@
 		 */
 		wake_up(&pd->wqueue);
 	}
+}
+
+static void pkt_make_request(struct request_queue *q, struct bio *bio)
+{
+	struct pktcdvd_device *pd;
+	char b[BDEVNAME_SIZE];
+	struct bio *split;
+
+	pd = q->queuedata;
+	if (!pd) {
+		pr_err("%s incorrect request queue\n",
+		       bdevname(bio->bi_bdev, b));
+		goto end_io;
+	}
+
+	pkt_dbg(2, pd, "start = %6llx stop = %6llx\n",
+		(unsigned long long)bio->bi_iter.bi_sector,
+		(unsigned long long)bio_end_sector(bio));
+
+	/*
+	 * Clone READ bios so we can have our own bi_end_io callback.
+	 */
+	if (bio_data_dir(bio) == READ) {
+		pkt_make_request_read(pd, bio);
+		return;
+	}
+
+	if (!test_bit(PACKET_WRITABLE, &pd->flags)) {
+		pkt_notice(pd, "WRITE for ro device (%llu)\n",
+			   (unsigned long long)bio->bi_iter.bi_sector);
+		goto end_io;
+	}
+
+	if (!bio->bi_iter.bi_size || (bio->bi_iter.bi_size % CD_FRAMESIZE)) {
+		pkt_err(pd, "wrong bio size\n");
+		goto end_io;
+	}
+
+	blk_queue_bounce(q, &bio);
+
+	do {
+		sector_t zone = get_zone(bio->bi_iter.bi_sector, pd);
+		sector_t last_zone = get_zone(bio_end_sector(bio) - 1, pd);
+
+		if (last_zone != zone) {
+			BUG_ON(last_zone != zone + pd->settings.size);
+
+			split = bio_split(bio, last_zone -
+					  bio->bi_iter.bi_sector,
+					  GFP_NOIO, fs_bio_set);
+			bio_chain(split, bio);
+		} else {
+			split = bio;
+		}
+
+		pkt_make_request_write(q, split);
+	} while (split != bio);
+
 	return;
 end_io:
 	bio_io_error(bio);
diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c
index d754a88..c120d70 100644
--- a/drivers/block/ps3disk.c
+++ b/drivers/block/ps3disk.c
@@ -94,26 +94,25 @@
 {
 	unsigned int offset = 0;
 	struct req_iterator iter;
-	struct bio_vec *bvec;
+	struct bio_vec bvec;
 	unsigned int i = 0;
 	size_t size;
 	void *buf;
 
 	rq_for_each_segment(bvec, req, iter) {
 		unsigned long flags;
-		dev_dbg(&dev->sbd.core,
-			"%s:%u: bio %u: %u segs %u sectors from %lu\n",
-			__func__, __LINE__, i, bio_segments(iter.bio),
-			bio_sectors(iter.bio), iter.bio->bi_sector);
+		dev_dbg(&dev->sbd.core, "%s:%u: bio %u: %u sectors from %lu\n",
+			__func__, __LINE__, i, bio_sectors(iter.bio),
+			iter.bio->bi_iter.bi_sector);
 
-		size = bvec->bv_len;
-		buf = bvec_kmap_irq(bvec, &flags);
+		size = bvec.bv_len;
+		buf = bvec_kmap_irq(&bvec, &flags);
 		if (gather)
 			memcpy(dev->bounce_buf+offset, buf, size);
 		else
 			memcpy(buf, dev->bounce_buf+offset, size);
 		offset += size;
-		flush_kernel_dcache_page(bvec->bv_page);
+		flush_kernel_dcache_page(bvec.bv_page);
 		bvec_kunmap_irq(buf, &flags);
 		i++;
 	}
@@ -130,7 +129,7 @@
 
 #ifdef DEBUG
 	unsigned int n = 0;
-	struct bio_vec *bv;
+	struct bio_vec bv;
 	struct req_iterator iter;
 
 	rq_for_each_segment(bv, req, iter)
diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c
index 06a2e53..ef45cfb 100644
--- a/drivers/block/ps3vram.c
+++ b/drivers/block/ps3vram.c
@@ -553,16 +553,16 @@
 	struct ps3vram_priv *priv = ps3_system_bus_get_drvdata(dev);
 	int write = bio_data_dir(bio) == WRITE;
 	const char *op = write ? "write" : "read";
-	loff_t offset = bio->bi_sector << 9;
+	loff_t offset = bio->bi_iter.bi_sector << 9;
 	int error = 0;
-	struct bio_vec *bvec;
-	unsigned int i;
+	struct bio_vec bvec;
+	struct bvec_iter iter;
 	struct bio *next;
 
-	bio_for_each_segment(bvec, bio, i) {
+	bio_for_each_segment(bvec, bio, iter) {
 		/* PS3 is ppc64, so we don't handle highmem */
-		char *ptr = page_address(bvec->bv_page) + bvec->bv_offset;
-		size_t len = bvec->bv_len, retlen;
+		char *ptr = page_address(bvec.bv_page) + bvec.bv_offset;
+		size_t len = bvec.bv_len, retlen;
 
 		dev_dbg(&dev->core, "    %s %zu bytes at offset %llu\n", op,
 			len, offset);
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 16cab66..b365e0d 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -1156,23 +1156,23 @@
  */
 static void zero_bio_chain(struct bio *chain, int start_ofs)
 {
-	struct bio_vec *bv;
+	struct bio_vec bv;
+	struct bvec_iter iter;
 	unsigned long flags;
 	void *buf;
-	int i;
 	int pos = 0;
 
 	while (chain) {
-		bio_for_each_segment(bv, chain, i) {
-			if (pos + bv->bv_len > start_ofs) {
+		bio_for_each_segment(bv, chain, iter) {
+			if (pos + bv.bv_len > start_ofs) {
 				int remainder = max(start_ofs - pos, 0);
-				buf = bvec_kmap_irq(bv, &flags);
+				buf = bvec_kmap_irq(&bv, &flags);
 				memset(buf + remainder, 0,
-				       bv->bv_len - remainder);
-				flush_dcache_page(bv->bv_page);
+				       bv.bv_len - remainder);
+				flush_dcache_page(bv.bv_page);
 				bvec_kunmap_irq(buf, &flags);
 			}
-			pos += bv->bv_len;
+			pos += bv.bv_len;
 		}
 
 		chain = chain->bi_next;
@@ -1220,74 +1220,14 @@
 					unsigned int len,
 					gfp_t gfpmask)
 {
-	struct bio_vec *bv;
-	unsigned int resid;
-	unsigned short idx;
-	unsigned int voff;
-	unsigned short end_idx;
-	unsigned short vcnt;
 	struct bio *bio;
 
-	/* Handle the easy case for the caller */
-
-	if (!offset && len == bio_src->bi_size)
-		return bio_clone(bio_src, gfpmask);
-
-	if (WARN_ON_ONCE(!len))
-		return NULL;
-	if (WARN_ON_ONCE(len > bio_src->bi_size))
-		return NULL;
-	if (WARN_ON_ONCE(offset > bio_src->bi_size - len))
-		return NULL;
-
-	/* Find first affected segment... */
-
-	resid = offset;
-	bio_for_each_segment(bv, bio_src, idx) {
-		if (resid < bv->bv_len)
-			break;
-		resid -= bv->bv_len;
-	}
-	voff = resid;
-
-	/* ...and the last affected segment */
-
-	resid += len;
-	__bio_for_each_segment(bv, bio_src, end_idx, idx) {
-		if (resid <= bv->bv_len)
-			break;
-		resid -= bv->bv_len;
-	}
-	vcnt = end_idx - idx + 1;
-
-	/* Build the clone */
-
-	bio = bio_alloc(gfpmask, (unsigned int) vcnt);
+	bio = bio_clone(bio_src, gfpmask);
 	if (!bio)
 		return NULL;	/* ENOMEM */
 
-	bio->bi_bdev = bio_src->bi_bdev;
-	bio->bi_sector = bio_src->bi_sector + (offset >> SECTOR_SHIFT);
-	bio->bi_rw = bio_src->bi_rw;
-	bio->bi_flags |= 1 << BIO_CLONED;
-
-	/*
-	 * Copy over our part of the bio_vec, then update the first
-	 * and last (or only) entries.
-	 */
-	memcpy(&bio->bi_io_vec[0], &bio_src->bi_io_vec[idx],
-			vcnt * sizeof (struct bio_vec));
-	bio->bi_io_vec[0].bv_offset += voff;
-	if (vcnt > 1) {
-		bio->bi_io_vec[0].bv_len -= voff;
-		bio->bi_io_vec[vcnt - 1].bv_len = resid;
-	} else {
-		bio->bi_io_vec[0].bv_len = len;
-	}
-
-	bio->bi_vcnt = vcnt;
-	bio->bi_size = len;
-	bio->bi_idx = 0;
+	bio_advance(bio, offset);
+	bio->bi_iter.bi_size = len;
 
 	return bio;
 }
@@ -1318,7 +1258,7 @@
 
 	/* Build up a chain of clone bios up to the limit */
 
-	if (!bi || off >= bi->bi_size || !len)
+	if (!bi || off >= bi->bi_iter.bi_size || !len)
 		return NULL;		/* Nothing to clone */
 
 	end = &chain;
@@ -1330,7 +1270,7 @@
 			rbd_warn(NULL, "bio_chain exhausted with %u left", len);
 			goto out_err;	/* EINVAL; ran out of bio's */
 		}
-		bi_size = min_t(unsigned int, bi->bi_size - off, len);
+		bi_size = min_t(unsigned int, bi->bi_iter.bi_size - off, len);
 		bio = bio_clone_range(bi, off, bi_size, gfpmask);
 		if (!bio)
 			goto out_err;	/* ENOMEM */
@@ -1339,7 +1279,7 @@
 		end = &bio->bi_next;
 
 		off += bi_size;
-		if (off == bi->bi_size) {
+		if (off == bi->bi_iter.bi_size) {
 			bi = bi->bi_next;
 			off = 0;
 		}
@@ -2227,7 +2167,8 @@
 
 	if (type == OBJ_REQUEST_BIO) {
 		bio_list = data_desc;
-		rbd_assert(img_offset == bio_list->bi_sector << SECTOR_SHIFT);
+		rbd_assert(img_offset ==
+			   bio_list->bi_iter.bi_sector << SECTOR_SHIFT);
 	} else {
 		rbd_assert(type == OBJ_REQUEST_PAGES);
 		pages = data_desc;
diff --git a/drivers/block/rsxx/dev.c b/drivers/block/rsxx/dev.c
index 2284f5d..2839d37 100644
--- a/drivers/block/rsxx/dev.c
+++ b/drivers/block/rsxx/dev.c
@@ -174,7 +174,7 @@
 	if (!card)
 		goto req_err;
 
-	if (bio->bi_sector + (bio->bi_size >> 9) > get_capacity(card->gendisk))
+	if (bio_end_sector(bio) > get_capacity(card->gendisk))
 		goto req_err;
 
 	if (unlikely(card->halt)) {
@@ -187,7 +187,7 @@
 		goto req_err;
 	}
 
-	if (bio->bi_size == 0) {
+	if (bio->bi_iter.bi_size == 0) {
 		dev_err(CARD_TO_DEV(card), "size zero BIO!\n");
 		goto req_err;
 	}
@@ -208,7 +208,7 @@
 
 	dev_dbg(CARD_TO_DEV(card), "BIO[%c]: meta: %p addr8: x%llx size: %d\n",
 		 bio_data_dir(bio) ? 'W' : 'R', bio_meta,
-		 (u64)bio->bi_sector << 9, bio->bi_size);
+		 (u64)bio->bi_iter.bi_sector << 9, bio->bi_iter.bi_size);
 
 	st = rsxx_dma_queue_bio(card, bio, &bio_meta->pending_dmas,
 				    bio_dma_done_cb, bio_meta);
diff --git a/drivers/block/rsxx/dma.c b/drivers/block/rsxx/dma.c
index fc88ba3..cf8cd29 100644
--- a/drivers/block/rsxx/dma.c
+++ b/drivers/block/rsxx/dma.c
@@ -684,7 +684,8 @@
 			   void *cb_data)
 {
 	struct list_head dma_list[RSXX_MAX_TARGETS];
-	struct bio_vec *bvec;
+	struct bio_vec bvec;
+	struct bvec_iter iter;
 	unsigned long long addr8;
 	unsigned int laddr;
 	unsigned int bv_len;
@@ -696,7 +697,7 @@
 	int st;
 	int i;
 
-	addr8 = bio->bi_sector << 9; /* sectors are 512 bytes */
+	addr8 = bio->bi_iter.bi_sector << 9; /* sectors are 512 bytes */
 	atomic_set(n_dmas, 0);
 
 	for (i = 0; i < card->n_targets; i++) {
@@ -705,7 +706,7 @@
 	}
 
 	if (bio->bi_rw & REQ_DISCARD) {
-		bv_len = bio->bi_size;
+		bv_len = bio->bi_iter.bi_size;
 
 		while (bv_len > 0) {
 			tgt   = rsxx_get_dma_tgt(card, addr8);
@@ -722,9 +723,9 @@
 			bv_len -= RSXX_HW_BLK_SIZE;
 		}
 	} else {
-		bio_for_each_segment(bvec, bio, i) {
-			bv_len = bvec->bv_len;
-			bv_off = bvec->bv_offset;
+		bio_for_each_segment(bvec, bio, iter) {
+			bv_len = bvec.bv_len;
+			bv_off = bvec.bv_offset;
 
 			while (bv_len > 0) {
 				tgt   = rsxx_get_dma_tgt(card, addr8);
@@ -736,7 +737,7 @@
 				st = rsxx_queue_dma(card, &dma_list[tgt],
 							bio_data_dir(bio),
 							dma_off, dma_len,
-							laddr, bvec->bv_page,
+							laddr, bvec.bv_page,
 							bv_off, cb, cb_data);
 				if (st)
 					goto bvec_err;
diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c
index 3fb6ab4..d5e2d12 100644
--- a/drivers/block/sx8.c
+++ b/drivers/block/sx8.c
@@ -1744,20 +1744,6 @@
 	kfree(host);
 	pci_release_regions(pdev);
 	pci_disable_device(pdev);
-	pci_set_drvdata(pdev, NULL);
 }
 
-static int __init carm_init(void)
-{
-	return pci_register_driver(&carm_driver);
-}
-
-static void __exit carm_exit(void)
-{
-	pci_unregister_driver(&carm_driver);
-}
-
-module_init(carm_init);
-module_exit(carm_exit);
-
-
+module_pci_driver(carm_driver);
diff --git a/drivers/block/umem.c b/drivers/block/umem.c
index ad70868..4cf81b5 100644
--- a/drivers/block/umem.c
+++ b/drivers/block/umem.c
@@ -108,8 +108,7 @@
 				    * have been written
 				    */
 	struct bio	*bio, *currentbio, **biotail;
-	int		current_idx;
-	sector_t	current_sector;
+	struct bvec_iter current_iter;
 
 	struct request_queue *queue;
 
@@ -118,7 +117,7 @@
 		struct mm_dma_desc	*desc;
 		int	 		cnt, headcnt;
 		struct bio		*bio, **biotail;
-		int			idx;
+		struct bvec_iter	iter;
 	} mm_pages[2];
 #define DESC_PER_PAGE ((PAGE_SIZE*2)/sizeof(struct mm_dma_desc))
 
@@ -344,16 +343,13 @@
 	dma_addr_t dma_handle;
 	int offset;
 	struct bio *bio;
-	struct bio_vec *vec;
-	int idx;
+	struct bio_vec vec;
 	int rw;
-	int len;
 
 	bio = card->currentbio;
 	if (!bio && card->bio) {
 		card->currentbio = card->bio;
-		card->current_idx = card->bio->bi_idx;
-		card->current_sector = card->bio->bi_sector;
+		card->current_iter = card->bio->bi_iter;
 		card->bio = card->bio->bi_next;
 		if (card->bio == NULL)
 			card->biotail = &card->bio;
@@ -362,18 +358,17 @@
 	}
 	if (!bio)
 		return 0;
-	idx = card->current_idx;
 
 	rw = bio_rw(bio);
 	if (card->mm_pages[card->Ready].cnt >= DESC_PER_PAGE)
 		return 0;
 
-	vec = bio_iovec_idx(bio, idx);
-	len = vec->bv_len;
+	vec = bio_iter_iovec(bio, card->current_iter);
+
 	dma_handle = pci_map_page(card->dev,
-				  vec->bv_page,
-				  vec->bv_offset,
-				  len,
+				  vec.bv_page,
+				  vec.bv_offset,
+				  vec.bv_len,
 				  (rw == READ) ?
 				  PCI_DMA_FROMDEVICE : PCI_DMA_TODEVICE);
 
@@ -381,7 +376,7 @@
 	desc = &p->desc[p->cnt];
 	p->cnt++;
 	if (p->bio == NULL)
-		p->idx = idx;
+		p->iter = card->current_iter;
 	if ((p->biotail) != &bio->bi_next) {
 		*(p->biotail) = bio;
 		p->biotail = &(bio->bi_next);
@@ -391,8 +386,8 @@
 	desc->data_dma_handle = dma_handle;
 
 	desc->pci_addr = cpu_to_le64((u64)desc->data_dma_handle);
-	desc->local_addr = cpu_to_le64(card->current_sector << 9);
-	desc->transfer_size = cpu_to_le32(len);
+	desc->local_addr = cpu_to_le64(card->current_iter.bi_sector << 9);
+	desc->transfer_size = cpu_to_le32(vec.bv_len);
 	offset = (((char *)&desc->sem_control_bits) - ((char *)p->desc));
 	desc->sem_addr = cpu_to_le64((u64)(p->page_dma+offset));
 	desc->zero1 = desc->zero2 = 0;
@@ -407,10 +402,9 @@
 		desc->control_bits |= cpu_to_le32(DMASCR_TRANSFER_READ);
 	desc->sem_control_bits = desc->control_bits;
 
-	card->current_sector += (len >> 9);
-	idx++;
-	card->current_idx = idx;
-	if (idx >= bio->bi_vcnt)
+
+	bio_advance_iter(bio, &card->current_iter, vec.bv_len);
+	if (!card->current_iter.bi_size)
 		card->currentbio = NULL;
 
 	return 1;
@@ -439,23 +433,25 @@
 		struct mm_dma_desc *desc = &page->desc[page->headcnt];
 		int control = le32_to_cpu(desc->sem_control_bits);
 		int last = 0;
-		int idx;
+		struct bio_vec vec;
 
 		if (!(control & DMASCR_DMA_COMPLETE)) {
 			control = dma_status;
 			last = 1;
 		}
+
 		page->headcnt++;
-		idx = page->idx;
-		page->idx++;
-		if (page->idx >= bio->bi_vcnt) {
+		vec = bio_iter_iovec(bio, page->iter);
+		bio_advance_iter(bio, &page->iter, vec.bv_len);
+
+		if (!page->iter.bi_size) {
 			page->bio = bio->bi_next;
 			if (page->bio)
-				page->idx = page->bio->bi_idx;
+				page->iter = page->bio->bi_iter;
 		}
 
 		pci_unmap_page(card->dev, desc->data_dma_handle,
-			       bio_iovec_idx(bio, idx)->bv_len,
+			       vec.bv_len,
 				 (control & DMASCR_TRANSFER_READ) ?
 				PCI_DMA_TODEVICE : PCI_DMA_FROMDEVICE);
 		if (control & DMASCR_HARD_ERROR) {
@@ -532,7 +528,8 @@
 {
 	struct cardinfo *card = q->queuedata;
 	pr_debug("mm_make_request %llu %u\n",
-		 (unsigned long long)bio->bi_sector, bio->bi_size);
+		 (unsigned long long)bio->bi_iter.bi_sector,
+		 bio->bi_iter.bi_size);
 
 	spin_lock_irq(&card->lock);
 	*card->biotail = bio;
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index 6620b73..4b97b86 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -1257,7 +1257,7 @@
 			bio->bi_bdev    = preq.bdev;
 			bio->bi_private = pending_req;
 			bio->bi_end_io  = end_block_io_op;
-			bio->bi_sector  = preq.sector_number;
+			bio->bi_iter.bi_sector  = preq.sector_number;
 		}
 
 		preq.sector_number += seg[i].nsec;
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index f9c43f9..8dcfb54 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -1547,7 +1547,7 @@
 			for (i = 0; i < pending; i++) {
 				offset = (i * segs * PAGE_SIZE) >> 9;
 				size = min((unsigned int)(segs * PAGE_SIZE) >> 9,
-					   (unsigned int)(bio->bi_size >> 9) - offset);
+					   (unsigned int)bio_sectors(bio) - offset);
 				cloned_bio = bio_clone(bio, GFP_NOIO);
 				BUG_ON(cloned_bio == NULL);
 				bio_trim(cloned_bio, offset, size);
diff --git a/drivers/staging/zram/Kconfig b/drivers/block/zram/Kconfig
similarity index 92%
rename from drivers/staging/zram/Kconfig
rename to drivers/block/zram/Kconfig
index 983314c..3450be8 100644
--- a/drivers/staging/zram/Kconfig
+++ b/drivers/block/zram/Kconfig
@@ -14,7 +14,6 @@
 	  disks and maybe many more.
 
 	  See zram.txt for more information.
-	  Project home: <https://compcache.googlecode.com/>
 
 config ZRAM_DEBUG
 	bool "Compressed RAM block device debug support"
diff --git a/drivers/staging/zram/Makefile b/drivers/block/zram/Makefile
similarity index 100%
rename from drivers/staging/zram/Makefile
rename to drivers/block/zram/Makefile
diff --git a/drivers/staging/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
similarity index 87%
rename from drivers/staging/zram/zram_drv.c
rename to drivers/block/zram/zram_drv.c
index 3277d98..011e55d 100644
--- a/drivers/staging/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -2,6 +2,7 @@
  * Compressed RAM block device
  *
  * Copyright (C) 2008, 2009, 2010  Nitin Gupta
+ *               2012, 2013 Minchan Kim
  *
  * This code is released using a dual license strategy: BSD/GPL
  * You can choose the licence that better fits your requirements.
@@ -9,7 +10,6 @@
  * Released under the terms of 3-clause BSD License
  * Released under the terms of GNU General Public License Version 2.0
  *
- * Project home: http://compcache.googlecode.com
  */
 
 #define KMSG_COMPONENT "zram"
@@ -104,7 +104,7 @@
 {
 	struct zram *zram = dev_to_zram(dev);
 
-	return sprintf(buf, "%u\n", zram->stats.pages_zero);
+	return sprintf(buf, "%u\n", atomic_read(&zram->stats.pages_zero));
 }
 
 static ssize_t orig_data_size_show(struct device *dev,
@@ -113,7 +113,7 @@
 	struct zram *zram = dev_to_zram(dev);
 
 	return sprintf(buf, "%llu\n",
-		(u64)(zram->stats.pages_stored) << PAGE_SHIFT);
+		(u64)(atomic_read(&zram->stats.pages_stored)) << PAGE_SHIFT);
 }
 
 static ssize_t compr_data_size_show(struct device *dev,
@@ -140,6 +140,7 @@
 	return sprintf(buf, "%llu\n", val);
 }
 
+/* flag operations needs meta->tb_lock */
 static int zram_test_flag(struct zram_meta *meta, u32 index,
 			enum zram_pageflags flag)
 {
@@ -171,13 +172,14 @@
 	u64 start, end, bound;
 
 	/* unaligned request */
-	if (unlikely(bio->bi_sector & (ZRAM_SECTOR_PER_LOGICAL_BLOCK - 1)))
+	if (unlikely(bio->bi_iter.bi_sector &
+		     (ZRAM_SECTOR_PER_LOGICAL_BLOCK - 1)))
 		return 0;
-	if (unlikely(bio->bi_size & (ZRAM_LOGICAL_BLOCK_SIZE - 1)))
+	if (unlikely(bio->bi_iter.bi_size & (ZRAM_LOGICAL_BLOCK_SIZE - 1)))
 		return 0;
 
-	start = bio->bi_sector;
-	end = start + (bio->bi_size >> SECTOR_SHIFT);
+	start = bio->bi_iter.bi_sector;
+	end = start + (bio->bi_iter.bi_size >> SECTOR_SHIFT);
 	bound = zram->disksize >> SECTOR_SHIFT;
 	/* out of range range */
 	if (unlikely(start >= bound || end > bound || start > end))
@@ -227,6 +229,8 @@
 		goto free_table;
 	}
 
+	rwlock_init(&meta->tb_lock);
+	mutex_init(&meta->buffer_lock);
 	return meta;
 
 free_table:
@@ -279,6 +283,7 @@
 	flush_dcache_page(page);
 }
 
+/* NOTE: caller should hold meta->tb_lock with write-side */
 static void zram_free_page(struct zram *zram, size_t index)
 {
 	struct zram_meta *meta = zram->meta;
@@ -292,21 +297,21 @@
 		 */
 		if (zram_test_flag(meta, index, ZRAM_ZERO)) {
 			zram_clear_flag(meta, index, ZRAM_ZERO);
-			zram->stats.pages_zero--;
+			atomic_dec(&zram->stats.pages_zero);
 		}
 		return;
 	}
 
 	if (unlikely(size > max_zpage_size))
-		zram->stats.bad_compress--;
+		atomic_dec(&zram->stats.bad_compress);
 
 	zs_free(meta->mem_pool, handle);
 
 	if (size <= PAGE_SIZE / 2)
-		zram->stats.good_compress--;
+		atomic_dec(&zram->stats.good_compress);
 
 	atomic64_sub(meta->table[index].size, &zram->stats.compr_size);
-	zram->stats.pages_stored--;
+	atomic_dec(&zram->stats.pages_stored);
 
 	meta->table[index].handle = 0;
 	meta->table[index].size = 0;
@@ -318,20 +323,26 @@
 	size_t clen = PAGE_SIZE;
 	unsigned char *cmem;
 	struct zram_meta *meta = zram->meta;
-	unsigned long handle = meta->table[index].handle;
+	unsigned long handle;
+	u16 size;
+
+	read_lock(&meta->tb_lock);
+	handle = meta->table[index].handle;
+	size = meta->table[index].size;
 
 	if (!handle || zram_test_flag(meta, index, ZRAM_ZERO)) {
+		read_unlock(&meta->tb_lock);
 		clear_page(mem);
 		return 0;
 	}
 
 	cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_RO);
-	if (meta->table[index].size == PAGE_SIZE)
+	if (size == PAGE_SIZE)
 		copy_page(mem, cmem);
 	else
-		ret = lzo1x_decompress_safe(cmem, meta->table[index].size,
-						mem, &clen);
+		ret = lzo1x_decompress_safe(cmem, size,	mem, &clen);
 	zs_unmap_object(meta->mem_pool, handle);
+	read_unlock(&meta->tb_lock);
 
 	/* Should NEVER happen. Return bio error if it does. */
 	if (unlikely(ret != LZO_E_OK)) {
@@ -352,11 +363,14 @@
 	struct zram_meta *meta = zram->meta;
 	page = bvec->bv_page;
 
+	read_lock(&meta->tb_lock);
 	if (unlikely(!meta->table[index].handle) ||
 			zram_test_flag(meta, index, ZRAM_ZERO)) {
+		read_unlock(&meta->tb_lock);
 		handle_zero_page(bvec);
 		return 0;
 	}
+	read_unlock(&meta->tb_lock);
 
 	if (is_partial_io(bvec))
 		/* Use  a temporary buffer to decompress the page */
@@ -399,6 +413,7 @@
 	struct page *page;
 	unsigned char *user_mem, *cmem, *src, *uncmem = NULL;
 	struct zram_meta *meta = zram->meta;
+	bool locked = false;
 
 	page = bvec->bv_page;
 	src = meta->compress_buffer;
@@ -418,6 +433,8 @@
 			goto out;
 	}
 
+	mutex_lock(&meta->buffer_lock);
+	locked = true;
 	user_mem = kmap_atomic(page);
 
 	if (is_partial_io(bvec)) {
@@ -432,25 +449,18 @@
 	if (page_zero_filled(uncmem)) {
 		kunmap_atomic(user_mem);
 		/* Free memory associated with this sector now. */
+		write_lock(&zram->meta->tb_lock);
 		zram_free_page(zram, index);
-
-		zram->stats.pages_zero++;
 		zram_set_flag(meta, index, ZRAM_ZERO);
+		write_unlock(&zram->meta->tb_lock);
+
+		atomic_inc(&zram->stats.pages_zero);
 		ret = 0;
 		goto out;
 	}
 
-	/*
-	 * zram_slot_free_notify could miss free so that let's
-	 * double check.
-	 */
-	if (unlikely(meta->table[index].handle ||
-			zram_test_flag(meta, index, ZRAM_ZERO)))
-		zram_free_page(zram, index);
-
 	ret = lzo1x_1_compress(uncmem, PAGE_SIZE, src, &clen,
 			       meta->compress_workmem);
-
 	if (!is_partial_io(bvec)) {
 		kunmap_atomic(user_mem);
 		user_mem = NULL;
@@ -463,7 +473,7 @@
 	}
 
 	if (unlikely(clen > max_zpage_size)) {
-		zram->stats.bad_compress++;
+		atomic_inc(&zram->stats.bad_compress);
 		clen = PAGE_SIZE;
 		src = NULL;
 		if (is_partial_io(bvec))
@@ -493,18 +503,22 @@
 	 * Free memory associated with this sector
 	 * before overwriting unused sectors.
 	 */
+	write_lock(&zram->meta->tb_lock);
 	zram_free_page(zram, index);
 
 	meta->table[index].handle = handle;
 	meta->table[index].size = clen;
+	write_unlock(&zram->meta->tb_lock);
 
 	/* Update stats */
 	atomic64_add(clen, &zram->stats.compr_size);
-	zram->stats.pages_stored++;
+	atomic_inc(&zram->stats.pages_stored);
 	if (clen <= PAGE_SIZE / 2)
-		zram->stats.good_compress++;
+		atomic_inc(&zram->stats.good_compress);
 
 out:
+	if (locked)
+		mutex_unlock(&meta->buffer_lock);
 	if (is_partial_io(bvec))
 		kfree(uncmem);
 
@@ -513,36 +527,15 @@
 	return ret;
 }
 
-static void handle_pending_slot_free(struct zram *zram)
-{
-	struct zram_slot_free *free_rq;
-
-	spin_lock(&zram->slot_free_lock);
-	while (zram->slot_free_rq) {
-		free_rq = zram->slot_free_rq;
-		zram->slot_free_rq = free_rq->next;
-		zram_free_page(zram, free_rq->index);
-		kfree(free_rq);
-	}
-	spin_unlock(&zram->slot_free_lock);
-}
-
 static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
 			int offset, struct bio *bio, int rw)
 {
 	int ret;
 
-	if (rw == READ) {
-		down_read(&zram->lock);
-		handle_pending_slot_free(zram);
+	if (rw == READ)
 		ret = zram_bvec_read(zram, bvec, index, offset, bio);
-		up_read(&zram->lock);
-	} else {
-		down_write(&zram->lock);
-		handle_pending_slot_free(zram);
+	else
 		ret = zram_bvec_write(zram, bvec, index, offset);
-		up_write(&zram->lock);
-	}
 
 	return ret;
 }
@@ -552,8 +545,6 @@
 	size_t index;
 	struct zram_meta *meta;
 
-	flush_work(&zram->free_work);
-
 	down_write(&zram->init_lock);
 	if (!zram->init_done) {
 		up_write(&zram->init_lock);
@@ -680,9 +671,10 @@
 
 static void __zram_make_request(struct zram *zram, struct bio *bio, int rw)
 {
-	int i, offset;
+	int offset;
 	u32 index;
-	struct bio_vec *bvec;
+	struct bio_vec bvec;
+	struct bvec_iter iter;
 
 	switch (rw) {
 	case READ:
@@ -693,36 +685,37 @@
 		break;
 	}
 
-	index = bio->bi_sector >> SECTORS_PER_PAGE_SHIFT;
-	offset = (bio->bi_sector & (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT;
+	index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
+	offset = (bio->bi_iter.bi_sector &
+		  (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT;
 
-	bio_for_each_segment(bvec, bio, i) {
+	bio_for_each_segment(bvec, bio, iter) {
 		int max_transfer_size = PAGE_SIZE - offset;
 
-		if (bvec->bv_len > max_transfer_size) {
+		if (bvec.bv_len > max_transfer_size) {
 			/*
 			 * zram_bvec_rw() can only make operation on a single
 			 * zram page. Split the bio vector.
 			 */
 			struct bio_vec bv;
 
-			bv.bv_page = bvec->bv_page;
+			bv.bv_page = bvec.bv_page;
 			bv.bv_len = max_transfer_size;
-			bv.bv_offset = bvec->bv_offset;
+			bv.bv_offset = bvec.bv_offset;
 
 			if (zram_bvec_rw(zram, &bv, index, offset, bio, rw) < 0)
 				goto out;
 
-			bv.bv_len = bvec->bv_len - max_transfer_size;
+			bv.bv_len = bvec.bv_len - max_transfer_size;
 			bv.bv_offset += max_transfer_size;
 			if (zram_bvec_rw(zram, &bv, index+1, 0, bio, rw) < 0)
 				goto out;
 		} else
-			if (zram_bvec_rw(zram, bvec, index, offset, bio, rw)
+			if (zram_bvec_rw(zram, &bvec, index, offset, bio, rw)
 			    < 0)
 				goto out;
 
-		update_position(&index, &offset, bvec);
+		update_position(&index, &offset, &bvec);
 	}
 
 	set_bit(BIO_UPTODATE, &bio->bi_flags);
@@ -759,40 +752,19 @@
 	bio_io_error(bio);
 }
 
-static void zram_slot_free(struct work_struct *work)
-{
-	struct zram *zram;
-
-	zram = container_of(work, struct zram, free_work);
-	down_write(&zram->lock);
-	handle_pending_slot_free(zram);
-	up_write(&zram->lock);
-}
-
-static void add_slot_free(struct zram *zram, struct zram_slot_free *free_rq)
-{
-	spin_lock(&zram->slot_free_lock);
-	free_rq->next = zram->slot_free_rq;
-	zram->slot_free_rq = free_rq;
-	spin_unlock(&zram->slot_free_lock);
-}
-
 static void zram_slot_free_notify(struct block_device *bdev,
 				unsigned long index)
 {
 	struct zram *zram;
-	struct zram_slot_free *free_rq;
+	struct zram_meta *meta;
 
 	zram = bdev->bd_disk->private_data;
+	meta = zram->meta;
+
+	write_lock(&meta->tb_lock);
+	zram_free_page(zram, index);
+	write_unlock(&meta->tb_lock);
 	atomic64_inc(&zram->stats.notify_free);
-
-	free_rq = kmalloc(sizeof(struct zram_slot_free), GFP_ATOMIC);
-	if (!free_rq)
-		return;
-
-	free_rq->index = index;
-	add_slot_free(zram, free_rq);
-	schedule_work(&zram->free_work);
 }
 
 static const struct block_device_operations zram_devops = {
@@ -836,13 +808,8 @@
 {
 	int ret = -ENOMEM;
 
-	init_rwsem(&zram->lock);
 	init_rwsem(&zram->init_lock);
 
-	INIT_WORK(&zram->free_work, zram_slot_free);
-	spin_lock_init(&zram->slot_free_lock);
-	zram->slot_free_rq = NULL;
-
 	zram->queue = blk_alloc_queue(GFP_KERNEL);
 	if (!zram->queue) {
 		pr_err("Error allocating disk queue for device %d\n",
diff --git a/drivers/staging/zram/zram_drv.h b/drivers/block/zram/zram_drv.h
similarity index 74%
rename from drivers/staging/zram/zram_drv.h
rename to drivers/block/zram/zram_drv.h
index 97a3acf..ad8aa35 100644
--- a/drivers/staging/zram/zram_drv.h
+++ b/drivers/block/zram/zram_drv.h
@@ -2,6 +2,7 @@
  * Compressed RAM block device
  *
  * Copyright (C) 2008, 2009, 2010  Nitin Gupta
+ *               2012, 2013 Minchan Kim
  *
  * This code is released using a dual license strategy: BSD/GPL
  * You can choose the licence that better fits your requirements.
@@ -9,7 +10,6 @@
  * Released under the terms of 3-clause BSD License
  * Released under the terms of GNU General Public License Version 2.0
  *
- * Project home: http://compcache.googlecode.com
  */
 
 #ifndef _ZRAM_DRV_H_
@@ -17,8 +17,7 @@
 
 #include <linux/spinlock.h>
 #include <linux/mutex.h>
-
-#include "../zsmalloc/zsmalloc.h"
+#include <linux/zsmalloc.h>
 
 /*
  * Some arbitrary value. This is just to catch
@@ -69,10 +68,6 @@
 	u8 flags;
 } __aligned(4);
 
-/*
- * All 64bit fields should only be manipulated by 64bit atomic accessors.
- * All modifications to 32bit counter should be protected by zram->lock.
- */
 struct zram_stats {
 	atomic64_t compr_size;	/* compressed size of pages stored */
 	atomic64_t num_reads;	/* failed + successful */
@@ -81,33 +76,23 @@
 	atomic64_t failed_writes;	/* can happen when memory is too low */
 	atomic64_t invalid_io;	/* non-page-aligned I/O requests */
 	atomic64_t notify_free;	/* no. of swap slot free notifications */
-	u32 pages_zero;		/* no. of zero filled pages */
-	u32 pages_stored;	/* no. of pages currently stored */
-	u32 good_compress;	/* % of pages with compression ratio<=50% */
-	u32 bad_compress;	/* % of pages with compression ratio>=75% */
+	atomic_t pages_zero;		/* no. of zero filled pages */
+	atomic_t pages_stored;	/* no. of pages currently stored */
+	atomic_t good_compress;	/* % of pages with compression ratio<=50% */
+	atomic_t bad_compress;	/* % of pages with compression ratio>=75% */
 };
 
 struct zram_meta {
+	rwlock_t tb_lock;	/* protect table */
 	void *compress_workmem;
 	void *compress_buffer;
 	struct table *table;
 	struct zs_pool *mem_pool;
-};
-
-struct zram_slot_free {
-	unsigned long index;
-	struct zram_slot_free *next;
+	struct mutex buffer_lock; /* protect compress buffers */
 };
 
 struct zram {
 	struct zram_meta *meta;
-	struct rw_semaphore lock; /* protect compression buffers, table,
-				   * 32bit stat counters against concurrent
-				   * notifications, reads and writes */
-
-	struct work_struct free_work;  /* handle pending free request */
-	struct zram_slot_free *slot_free_rq; /* list head of free request */
-
 	struct request_queue *queue;
 	struct gendisk *disk;
 	int init_done;
@@ -118,7 +103,6 @@
 	 * we can store in a disk.
 	 */
 	u64 disksize;	/* bytes */
-	spinlock_t slot_free_lock;
 
 	struct zram_stats stats;
 };
diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c
index 5980cb9..51e75ad 100644
--- a/drivers/cdrom/gdrom.c
+++ b/drivers/cdrom/gdrom.c
@@ -561,11 +561,11 @@
 	int err;
 
 	err = request_irq(HW_EVENT_GDROM_CMD, gdrom_command_interrupt,
-		IRQF_DISABLED, "gdrom_command", &gd);
+		0, "gdrom_command", &gd);
 	if (err)
 		return err;
 	err = request_irq(HW_EVENT_GDROM_DMA, gdrom_dma_interrupt,
-		IRQF_DISABLED, "gdrom_dma", &gd);
+		0, "gdrom_dma", &gd);
 	if (err)
 		free_irq(HW_EVENT_GDROM_CMD, &gd);
 	return err;
diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
index 671c385..03f4189 100644
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -2724,6 +2724,7 @@
 static int ipmi_parisc_probe(struct parisc_device *dev)
 {
 	struct smi_info *info;
+	int rv;
 
 	info = smi_info_alloc();
 
diff --git a/drivers/cpuidle/Kconfig b/drivers/cpuidle/Kconfig
index b3fb81d..f04e25f 100644
--- a/drivers/cpuidle/Kconfig
+++ b/drivers/cpuidle/Kconfig
@@ -35,6 +35,11 @@
 source "drivers/cpuidle/Kconfig.arm"
 endmenu
 
+menu "POWERPC CPU Idle Drivers"
+depends on PPC
+source "drivers/cpuidle/Kconfig.powerpc"
+endmenu
+
 endif
 
 config ARCH_NEEDS_CPU_IDLE_COUPLED
diff --git a/drivers/cpuidle/Kconfig.powerpc b/drivers/cpuidle/Kconfig.powerpc
new file mode 100644
index 0000000..66c3a09
--- /dev/null
+++ b/drivers/cpuidle/Kconfig.powerpc
@@ -0,0 +1,20 @@
+#
+# POWERPC CPU Idle Drivers
+#
+config PSERIES_CPUIDLE
+	bool "Cpuidle driver for pSeries platforms"
+	depends on CPU_IDLE
+	depends on PPC_PSERIES
+	default y
+	help
+	  Select this option to enable processor idle state management
+	  through cpuidle subsystem.
+
+config POWERNV_CPUIDLE
+	bool "Cpuidle driver for powernv platforms"
+	depends on CPU_IDLE
+	depends on PPC_POWERNV
+	default y
+	help
+	  Select this option to enable processor idle state management
+	  through cpuidle subsystem.
diff --git a/drivers/cpuidle/Makefile b/drivers/cpuidle/Makefile
index 527be28..f71ae1b 100644
--- a/drivers/cpuidle/Makefile
+++ b/drivers/cpuidle/Makefile
@@ -13,3 +13,8 @@
 obj-$(CONFIG_ARM_ZYNQ_CPUIDLE)		+= cpuidle-zynq.o
 obj-$(CONFIG_ARM_U8500_CPUIDLE)         += cpuidle-ux500.o
 obj-$(CONFIG_ARM_AT91_CPUIDLE)          += cpuidle-at91.o
+
+###############################################################################
+# POWERPC drivers
+obj-$(CONFIG_PSERIES_CPUIDLE)		+= cpuidle-pseries.o
+obj-$(CONFIG_POWERNV_CPUIDLE)		+= cpuidle-powernv.o
diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c
new file mode 100644
index 0000000..78fd174
--- /dev/null
+++ b/drivers/cpuidle/cpuidle-powernv.c
@@ -0,0 +1,169 @@
+/*
+ *  cpuidle-powernv - idle state cpuidle driver.
+ *  Adapted from drivers/cpuidle/cpuidle-pseries
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/moduleparam.h>
+#include <linux/cpuidle.h>
+#include <linux/cpu.h>
+#include <linux/notifier.h>
+
+#include <asm/machdep.h>
+#include <asm/firmware.h>
+
+struct cpuidle_driver powernv_idle_driver = {
+	.name             = "powernv_idle",
+	.owner            = THIS_MODULE,
+};
+
+static int max_idle_state;
+static struct cpuidle_state *cpuidle_state_table;
+
+static int snooze_loop(struct cpuidle_device *dev,
+			struct cpuidle_driver *drv,
+			int index)
+{
+	local_irq_enable();
+	set_thread_flag(TIF_POLLING_NRFLAG);
+
+	while (!need_resched()) {
+		HMT_low();
+		HMT_very_low();
+	}
+
+	HMT_medium();
+	clear_thread_flag(TIF_POLLING_NRFLAG);
+	smp_mb();
+	return index;
+}
+
+static int nap_loop(struct cpuidle_device *dev,
+			struct cpuidle_driver *drv,
+			int index)
+{
+	power7_idle();
+	return index;
+}
+
+/*
+ * States for dedicated partition case.
+ */
+static struct cpuidle_state powernv_states[] = {
+	{ /* Snooze */
+		.name = "snooze",
+		.desc = "snooze",
+		.flags = CPUIDLE_FLAG_TIME_VALID,
+		.exit_latency = 0,
+		.target_residency = 0,
+		.enter = &snooze_loop },
+	{ /* NAP */
+		.name = "NAP",
+		.desc = "NAP",
+		.flags = CPUIDLE_FLAG_TIME_VALID,
+		.exit_latency = 10,
+		.target_residency = 100,
+		.enter = &nap_loop },
+};
+
+static int powernv_cpuidle_add_cpu_notifier(struct notifier_block *n,
+			unsigned long action, void *hcpu)
+{
+	int hotcpu = (unsigned long)hcpu;
+	struct cpuidle_device *dev =
+				per_cpu(cpuidle_devices, hotcpu);
+
+	if (dev && cpuidle_get_driver()) {
+		switch (action) {
+		case CPU_ONLINE:
+		case CPU_ONLINE_FROZEN:
+			cpuidle_pause_and_lock();
+			cpuidle_enable_device(dev);
+			cpuidle_resume_and_unlock();
+			break;
+
+		case CPU_DEAD:
+		case CPU_DEAD_FROZEN:
+			cpuidle_pause_and_lock();
+			cpuidle_disable_device(dev);
+			cpuidle_resume_and_unlock();
+			break;
+
+		default:
+			return NOTIFY_DONE;
+		}
+	}
+	return NOTIFY_OK;
+}
+
+static struct notifier_block setup_hotplug_notifier = {
+	.notifier_call = powernv_cpuidle_add_cpu_notifier,
+};
+
+/*
+ * powernv_cpuidle_driver_init()
+ */
+static int powernv_cpuidle_driver_init(void)
+{
+	int idle_state;
+	struct cpuidle_driver *drv = &powernv_idle_driver;
+
+	drv->state_count = 0;
+
+	for (idle_state = 0; idle_state < max_idle_state; ++idle_state) {
+		/* Is the state not enabled? */
+		if (cpuidle_state_table[idle_state].enter == NULL)
+			continue;
+
+		drv->states[drv->state_count] =	/* structure copy */
+			cpuidle_state_table[idle_state];
+
+		drv->state_count += 1;
+	}
+
+	return 0;
+}
+
+/*
+ * powernv_idle_probe()
+ * Choose state table for shared versus dedicated partition
+ */
+static int powernv_idle_probe(void)
+{
+
+	if (cpuidle_disable != IDLE_NO_OVERRIDE)
+		return -ENODEV;
+
+	if (firmware_has_feature(FW_FEATURE_OPALv3)) {
+		cpuidle_state_table = powernv_states;
+		max_idle_state = ARRAY_SIZE(powernv_states);
+ 	} else
+ 		return -ENODEV;
+
+	return 0;
+}
+
+static int __init powernv_processor_idle_init(void)
+{
+	int retval;
+
+	retval = powernv_idle_probe();
+	if (retval)
+		return retval;
+
+	powernv_cpuidle_driver_init();
+	retval = cpuidle_register(&powernv_idle_driver, NULL);
+	if (retval) {
+		printk(KERN_DEBUG "Registration of powernv driver failed.\n");
+		return retval;
+	}
+
+	register_cpu_notifier(&setup_hotplug_notifier);
+	printk(KERN_DEBUG "powernv_idle_driver registered\n");
+	return 0;
+}
+
+device_initcall(powernv_processor_idle_init);
diff --git a/arch/powerpc/platforms/pseries/processor_idle.c b/drivers/cpuidle/cpuidle-pseries.c
similarity index 75%
rename from arch/powerpc/platforms/pseries/processor_idle.c
rename to drivers/cpuidle/cpuidle-pseries.c
index 002d5b4..7ab564a 100644
--- a/arch/powerpc/platforms/pseries/processor_idle.c
+++ b/drivers/cpuidle/cpuidle-pseries.c
@@ -1,5 +1,5 @@
 /*
- *  processor_idle - idle state cpuidle driver.
+ *  cpuidle-pseries - idle state cpuidle driver.
  *  Adapted from drivers/idle/intel_idle.c and
  *  drivers/acpi/processor_idle.c
  *
@@ -24,9 +24,7 @@
 	.owner            = THIS_MODULE,
 };
 
-#define MAX_IDLE_STATE_COUNT	2
-
-static int max_idle_state = MAX_IDLE_STATE_COUNT - 1;
+static int max_idle_state;
 static struct cpuidle_state *cpuidle_state_table;
 
 static inline void idle_loop_prolog(unsigned long *in_purr)
@@ -54,13 +52,12 @@
 			int index)
 {
 	unsigned long in_purr;
-	int cpu = dev->cpu;
 
 	idle_loop_prolog(&in_purr);
 	local_irq_enable();
 	set_thread_flag(TIF_POLLING_NRFLAG);
 
-	while ((!need_resched()) && cpu_online(cpu)) {
+	while (!need_resched()) {
 		HMT_low();
 		HMT_very_low();
 	}
@@ -135,7 +132,7 @@
 /*
  * States for dedicated partition case.
  */
-static struct cpuidle_state dedicated_states[MAX_IDLE_STATE_COUNT] = {
+static struct cpuidle_state dedicated_states[] = {
 	{ /* Snooze */
 		.name = "snooze",
 		.desc = "snooze",
@@ -155,7 +152,7 @@
 /*
  * States for shared partition case.
  */
-static struct cpuidle_state shared_states[MAX_IDLE_STATE_COUNT] = {
+static struct cpuidle_state shared_states[] = {
 	{ /* Shared Cede */
 		.name = "Shared Cede",
 		.desc = "Shared Cede",
@@ -165,29 +162,12 @@
 		.enter = &shared_cede_loop },
 };
 
-void update_smt_snooze_delay(int cpu, int residency)
-{
-	struct cpuidle_driver *drv = cpuidle_get_driver();
-	struct cpuidle_device *dev = per_cpu(cpuidle_devices, cpu);
-
-	if (cpuidle_state_table != dedicated_states)
-		return;
-
-	if (residency < 0) {
-		/* Disable the Nap state on that cpu */
-		if (dev)
-			dev->states_usage[1].disable = 1;
-	} else
-		if (drv)
-			drv->states[1].target_residency = residency;
-}
-
 static int pseries_cpuidle_add_cpu_notifier(struct notifier_block *n,
 			unsigned long action, void *hcpu)
 {
 	int hotcpu = (unsigned long)hcpu;
 	struct cpuidle_device *dev =
-			per_cpu_ptr(cpuidle_devices, hotcpu);
+				per_cpu(cpuidle_devices, hotcpu);
 
 	if (dev && cpuidle_get_driver()) {
 		switch (action) {
@@ -226,12 +206,8 @@
 
 	drv->state_count = 0;
 
-	for (idle_state = 0; idle_state < MAX_IDLE_STATE_COUNT; ++idle_state) {
-
-		if (idle_state > max_idle_state)
-			break;
-
-		/* is the state not enabled? */
+	for (idle_state = 0; idle_state < max_idle_state; ++idle_state) {
+		/* Is the state not enabled? */
 		if (cpuidle_state_table[idle_state].enter == NULL)
 			continue;
 
@@ -251,21 +227,19 @@
 static int pseries_idle_probe(void)
 {
 
-	if (!firmware_has_feature(FW_FEATURE_SPLPAR))
-		return -ENODEV;
-
 	if (cpuidle_disable != IDLE_NO_OVERRIDE)
 		return -ENODEV;
 
-	if (max_idle_state == 0) {
-		printk(KERN_DEBUG "pseries processor idle disabled.\n");
-		return -EPERM;
-	}
-
-	if (lppaca_shared_proc(get_lppaca()))
-		cpuidle_state_table = shared_states;
-	else
-		cpuidle_state_table = dedicated_states;
+	if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
+		if (lppaca_shared_proc(get_lppaca())) {
+			cpuidle_state_table = shared_states;
+			max_idle_state = ARRAY_SIZE(shared_states);
+		} else {
+			cpuidle_state_table = dedicated_states;
+			max_idle_state = ARRAY_SIZE(dedicated_states);
+		}
+	} else
+		return -ENODEV;
 
 	return 0;
 }
@@ -287,22 +261,7 @@
 
 	register_cpu_notifier(&setup_hotplug_notifier);
 	printk(KERN_DEBUG "pseries_idle_driver registered\n");
-
 	return 0;
 }
 
-static void __exit pseries_processor_idle_exit(void)
-{
-
-	unregister_cpu_notifier(&setup_hotplug_notifier);
-	cpuidle_unregister(&pseries_idle_driver);
-
-	return;
-}
-
-module_init(pseries_processor_idle_init);
-module_exit(pseries_processor_idle_exit);
-
-MODULE_AUTHOR("Deepthi Dharwar <deepthi@linux.vnet.ibm.com>");
-MODULE_DESCRIPTION("Cpuidle driver for POWER");
-MODULE_LICENSE("GPL");
+device_initcall(pseries_processor_idle_init);
diff --git a/drivers/md/bcache/Makefile b/drivers/md/bcache/Makefile
index 0e9c825..c488b84 100644
--- a/drivers/md/bcache/Makefile
+++ b/drivers/md/bcache/Makefile
@@ -1,7 +1,8 @@
 
 obj-$(CONFIG_BCACHE)	+= bcache.o
 
-bcache-y		:= alloc.o btree.o bset.o io.o journal.o writeback.o\
-	movinggc.o request.o super.o sysfs.o debug.o util.o trace.o stats.o closure.o
+bcache-y		:= alloc.o bset.o btree.o closure.o debug.o extents.o\
+	io.o journal.o movinggc.o request.o stats.o super.o sysfs.o trace.o\
+	util.o writeback.o
 
 CFLAGS_request.o	+= -Iblock
diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
index 4c9852d..c0d37d0 100644
--- a/drivers/md/bcache/alloc.c
+++ b/drivers/md/bcache/alloc.c
@@ -132,10 +132,16 @@
 {
 	BUG_ON(GC_MARK(b) || GC_SECTORS_USED(b));
 
-	if (fifo_used(&ca->free) > ca->watermark[WATERMARK_MOVINGGC] &&
-	    CACHE_REPLACEMENT(&ca->sb) == CACHE_REPLACEMENT_FIFO)
-		return false;
+	if (CACHE_REPLACEMENT(&ca->sb) == CACHE_REPLACEMENT_FIFO) {
+		unsigned i;
 
+		for (i = 0; i < RESERVE_NONE; i++)
+			if (!fifo_full(&ca->free[i]))
+				goto add;
+
+		return false;
+	}
+add:
 	b->prio = 0;
 
 	if (can_inc_bucket_gen(b) &&
@@ -162,8 +168,21 @@
 	fifo_push(&ca->free_inc, b - ca->buckets);
 }
 
-#define bucket_prio(b)				\
-	(((unsigned) (b->prio - ca->set->min_prio)) * GC_SECTORS_USED(b))
+/*
+ * Determines what order we're going to reuse buckets, smallest bucket_prio()
+ * first: we also take into account the number of sectors of live data in that
+ * bucket, and in order for that multiply to make sense we have to scale bucket
+ *
+ * Thus, we scale the bucket priorities so that the bucket with the smallest
+ * prio is worth 1/8th of what INITIAL_PRIO is worth.
+ */
+
+#define bucket_prio(b)							\
+({									\
+	unsigned min_prio = (INITIAL_PRIO - ca->set->min_prio) / 8;	\
+									\
+	(b->prio - ca->set->min_prio + min_prio) * GC_SECTORS_USED(b);	\
+})
 
 #define bucket_max_cmp(l, r)	(bucket_prio(l) < bucket_prio(r))
 #define bucket_min_cmp(l, r)	(bucket_prio(l) > bucket_prio(r))
@@ -304,6 +323,21 @@
 	__set_current_state(TASK_RUNNING);				\
 } while (0)
 
+static int bch_allocator_push(struct cache *ca, long bucket)
+{
+	unsigned i;
+
+	/* Prios/gens are actually the most important reserve */
+	if (fifo_push(&ca->free[RESERVE_PRIO], bucket))
+		return true;
+
+	for (i = 0; i < RESERVE_NR; i++)
+		if (fifo_push(&ca->free[i], bucket))
+			return true;
+
+	return false;
+}
+
 static int bch_allocator_thread(void *arg)
 {
 	struct cache *ca = arg;
@@ -336,9 +370,7 @@
 				mutex_lock(&ca->set->bucket_lock);
 			}
 
-			allocator_wait(ca, !fifo_full(&ca->free));
-
-			fifo_push(&ca->free, bucket);
+			allocator_wait(ca, bch_allocator_push(ca, bucket));
 			wake_up(&ca->set->bucket_wait);
 		}
 
@@ -365,34 +397,29 @@
 	}
 }
 
-long bch_bucket_alloc(struct cache *ca, unsigned watermark, bool wait)
+long bch_bucket_alloc(struct cache *ca, unsigned reserve, bool wait)
 {
 	DEFINE_WAIT(w);
 	struct bucket *b;
 	long r;
 
 	/* fastpath */
-	if (fifo_used(&ca->free) > ca->watermark[watermark]) {
-		fifo_pop(&ca->free, r);
+	if (fifo_pop(&ca->free[RESERVE_NONE], r) ||
+	    fifo_pop(&ca->free[reserve], r))
 		goto out;
-	}
 
 	if (!wait)
 		return -1;
 
-	while (1) {
-		if (fifo_used(&ca->free) > ca->watermark[watermark]) {
-			fifo_pop(&ca->free, r);
-			break;
-		}
-
+	do {
 		prepare_to_wait(&ca->set->bucket_wait, &w,
 				TASK_UNINTERRUPTIBLE);
 
 		mutex_unlock(&ca->set->bucket_lock);
 		schedule();
 		mutex_lock(&ca->set->bucket_lock);
-	}
+	} while (!fifo_pop(&ca->free[RESERVE_NONE], r) &&
+		 !fifo_pop(&ca->free[reserve], r));
 
 	finish_wait(&ca->set->bucket_wait, &w);
 out:
@@ -401,12 +428,14 @@
 	if (expensive_debug_checks(ca->set)) {
 		size_t iter;
 		long i;
+		unsigned j;
 
 		for (iter = 0; iter < prio_buckets(ca) * 2; iter++)
 			BUG_ON(ca->prio_buckets[iter] == (uint64_t) r);
 
-		fifo_for_each(i, &ca->free, iter)
-			BUG_ON(i == r);
+		for (j = 0; j < RESERVE_NR; j++)
+			fifo_for_each(i, &ca->free[j], iter)
+				BUG_ON(i == r);
 		fifo_for_each(i, &ca->free_inc, iter)
 			BUG_ON(i == r);
 		fifo_for_each(i, &ca->unused, iter)
@@ -419,7 +448,7 @@
 
 	SET_GC_SECTORS_USED(b, ca->sb.bucket_size);
 
-	if (watermark <= WATERMARK_METADATA) {
+	if (reserve <= RESERVE_PRIO) {
 		SET_GC_MARK(b, GC_MARK_METADATA);
 		SET_GC_MOVE(b, 0);
 		b->prio = BTREE_PRIO;
@@ -445,7 +474,7 @@
 	}
 }
 
-int __bch_bucket_alloc_set(struct cache_set *c, unsigned watermark,
+int __bch_bucket_alloc_set(struct cache_set *c, unsigned reserve,
 			   struct bkey *k, int n, bool wait)
 {
 	int i;
@@ -459,7 +488,7 @@
 
 	for (i = 0; i < n; i++) {
 		struct cache *ca = c->cache_by_alloc[i];
-		long b = bch_bucket_alloc(ca, watermark, wait);
+		long b = bch_bucket_alloc(ca, reserve, wait);
 
 		if (b == -1)
 			goto err;
@@ -478,12 +507,12 @@
 	return -1;
 }
 
-int bch_bucket_alloc_set(struct cache_set *c, unsigned watermark,
+int bch_bucket_alloc_set(struct cache_set *c, unsigned reserve,
 			 struct bkey *k, int n, bool wait)
 {
 	int ret;
 	mutex_lock(&c->bucket_lock);
-	ret = __bch_bucket_alloc_set(c, watermark, k, n, wait);
+	ret = __bch_bucket_alloc_set(c, reserve, k, n, wait);
 	mutex_unlock(&c->bucket_lock);
 	return ret;
 }
@@ -573,8 +602,8 @@
 
 	while (!(b = pick_data_bucket(c, k, write_point, &alloc.key))) {
 		unsigned watermark = write_prio
-			? WATERMARK_MOVINGGC
-			: WATERMARK_NONE;
+			? RESERVE_MOVINGGC
+			: RESERVE_NONE;
 
 		spin_unlock(&c->data_bucket_lock);
 
@@ -689,7 +718,7 @@
 	 * Then 8 for btree allocations
 	 * Then half for the moving garbage collector
 	 */
-
+#if 0
 	ca->watermark[WATERMARK_PRIO] = 0;
 
 	ca->watermark[WATERMARK_METADATA] = prio_buckets(ca);
@@ -699,6 +728,6 @@
 
 	ca->watermark[WATERMARK_NONE] = ca->free.size / 2 +
 		ca->watermark[WATERMARK_MOVINGGC];
-
+#endif
 	return 0;
 }
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index 754f4317..0c707e4 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -187,6 +187,7 @@
 #include <linux/types.h>
 #include <linux/workqueue.h>
 
+#include "bset.h"
 #include "util.h"
 #include "closure.h"
 
@@ -280,7 +281,6 @@
 	unsigned long		sectors_dirty_last;
 	long			sectors_dirty_derivative;
 
-	mempool_t		*unaligned_bvec;
 	struct bio_set		*bio_split;
 
 	unsigned		data_csum:1;
@@ -310,7 +310,8 @@
 	struct cache_sb		sb;
 	struct bio		sb_bio;
 	struct bio_vec		sb_bv[1];
-	struct closure_with_waitlist sb_write;
+	struct closure		sb_write;
+	struct semaphore	sb_write_mutex;
 
 	/* Refcount on the cache set. Always nonzero when we're caching. */
 	atomic_t		count;
@@ -383,12 +384,12 @@
 	unsigned		writeback_rate_p_term_inverse;
 };
 
-enum alloc_watermarks {
-	WATERMARK_PRIO,
-	WATERMARK_METADATA,
-	WATERMARK_MOVINGGC,
-	WATERMARK_NONE,
-	WATERMARK_MAX
+enum alloc_reserve {
+	RESERVE_BTREE,
+	RESERVE_PRIO,
+	RESERVE_MOVINGGC,
+	RESERVE_NONE,
+	RESERVE_NR,
 };
 
 struct cache {
@@ -400,8 +401,6 @@
 	struct kobject		kobj;
 	struct block_device	*bdev;
 
-	unsigned		watermark[WATERMARK_MAX];
-
 	struct task_struct	*alloc_thread;
 
 	struct closure		prio;
@@ -430,7 +429,7 @@
 	 * because all the data they contained was overwritten), so we only
 	 * need to discard them before they can be moved to the free list.
 	 */
-	DECLARE_FIFO(long, free);
+	DECLARE_FIFO(long, free)[RESERVE_NR];
 	DECLARE_FIFO(long, free_inc);
 	DECLARE_FIFO(long, unused);
 
@@ -515,7 +514,8 @@
 	uint64_t		cached_dev_sectors;
 	struct closure		caching;
 
-	struct closure_with_waitlist sb_write;
+	struct closure		sb_write;
+	struct semaphore	sb_write_mutex;
 
 	mempool_t		*search;
 	mempool_t		*bio_meta;
@@ -630,13 +630,15 @@
 
 #ifdef CONFIG_BCACHE_DEBUG
 	struct btree		*verify_data;
+	struct bset		*verify_ondisk;
 	struct mutex		verify_lock;
 #endif
 
 	unsigned		nr_uuids;
 	struct uuid_entry	*uuids;
 	BKEY_PADDED(uuid_bucket);
-	struct closure_with_waitlist uuid_write;
+	struct closure		uuid_write;
+	struct semaphore	uuid_write_mutex;
 
 	/*
 	 * A btree node on disk could have too many bsets for an iterator to fit
@@ -644,13 +646,7 @@
 	 */
 	mempool_t		*fill_iter;
 
-	/*
-	 * btree_sort() is a merge sort and requires temporary space - single
-	 * element mempool
-	 */
-	struct mutex		sort_lock;
-	struct bset		*sort;
-	unsigned		sort_crit_factor;
+	struct bset_sort_state	sort;
 
 	/* List of buckets we're currently writing data to */
 	struct list_head	data_buckets;
@@ -666,7 +662,6 @@
 	unsigned		congested_read_threshold_us;
 	unsigned		congested_write_threshold_us;
 
-	struct time_stats	sort_time;
 	struct time_stats	btree_gc_time;
 	struct time_stats	btree_split_time;
 	struct time_stats	btree_read_time;
@@ -684,9 +679,9 @@
 	unsigned		error_decay;
 
 	unsigned short		journal_delay_ms;
+	bool			expensive_debug_checks;
 	unsigned		verify:1;
 	unsigned		key_merging_disabled:1;
-	unsigned		expensive_debug_checks:1;
 	unsigned		gc_always_rewrite:1;
 	unsigned		shrinker_disabled:1;
 	unsigned		copy_gc_enabled:1;
@@ -708,13 +703,8 @@
 	struct bio		bio;
 };
 
-static inline unsigned local_clock_us(void)
-{
-	return local_clock() >> 10;
-}
-
 #define BTREE_PRIO		USHRT_MAX
-#define INITIAL_PRIO		32768
+#define INITIAL_PRIO		32768U
 
 #define btree_bytes(c)		((c)->btree_pages * PAGE_SIZE)
 #define btree_blocks(b)							\
@@ -727,21 +717,6 @@
 #define bucket_bytes(c)		((c)->sb.bucket_size << 9)
 #define block_bytes(c)		((c)->sb.block_size << 9)
 
-#define __set_bytes(i, k)	(sizeof(*(i)) + (k) * sizeof(uint64_t))
-#define set_bytes(i)		__set_bytes(i, i->keys)
-
-#define __set_blocks(i, k, c)	DIV_ROUND_UP(__set_bytes(i, k), block_bytes(c))
-#define set_blocks(i, c)	__set_blocks(i, (i)->keys, c)
-
-#define node(i, j)		((struct bkey *) ((i)->d + (j)))
-#define end(i)			node(i, (i)->keys)
-
-#define index(i, b)							\
-	((size_t) (((void *) i - (void *) (b)->sets[0].data) /		\
-		   block_bytes(b->c)))
-
-#define btree_data_space(b)	(PAGE_SIZE << (b)->page_order)
-
 #define prios_per_bucket(c)				\
 	((bucket_bytes(c) - sizeof(struct prio_set)) /	\
 	 sizeof(struct bucket_disk))
@@ -784,20 +759,34 @@
 	return PTR_CACHE(c, k, ptr)->buckets + PTR_BUCKET_NR(c, k, ptr);
 }
 
-/* Btree key macros */
-
-static inline void bkey_init(struct bkey *k)
+static inline uint8_t gen_after(uint8_t a, uint8_t b)
 {
-	*k = ZERO_KEY;
+	uint8_t r = a - b;
+	return r > 128U ? 0 : r;
 }
 
+static inline uint8_t ptr_stale(struct cache_set *c, const struct bkey *k,
+				unsigned i)
+{
+	return gen_after(PTR_BUCKET(c, k, i)->gen, PTR_GEN(k, i));
+}
+
+static inline bool ptr_available(struct cache_set *c, const struct bkey *k,
+				 unsigned i)
+{
+	return (PTR_DEV(k, i) < MAX_CACHES_PER_SET) && PTR_CACHE(c, k, i);
+}
+
+/* Btree key macros */
+
 /*
  * This is used for various on disk data structures - cache_sb, prio_set, bset,
  * jset: The checksum is _always_ the first 8 bytes of these structs
  */
 #define csum_set(i)							\
 	bch_crc64(((void *) (i)) + sizeof(uint64_t),			\
-	      ((void *) end(i)) - (((void *) (i)) + sizeof(uint64_t)))
+		  ((void *) bset_bkey_last(i)) -			\
+		  (((void *) (i)) + sizeof(uint64_t)))
 
 /* Error handling macros */
 
@@ -902,7 +891,6 @@
 void bch_bbio_free(struct bio *, struct cache_set *);
 struct bio *bch_bbio_alloc(struct cache_set *);
 
-struct bio *bch_bio_split(struct bio *, int, gfp_t, struct bio_set *);
 void bch_generic_make_request(struct bio *, struct bio_split_pool *);
 void __bch_submit_bbio(struct bio *, struct cache_set *);
 void bch_submit_bbio(struct bio *, struct cache_set *, struct bkey *, unsigned);
diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c
index 7d388b8..4f6b594 100644
--- a/drivers/md/bcache/bset.c
+++ b/drivers/md/bcache/bset.c
@@ -5,30 +5,134 @@
  * Copyright 2012 Google, Inc.
  */
 
-#include "bcache.h"
-#include "btree.h"
-#include "debug.h"
+#define pr_fmt(fmt) "bcache: %s() " fmt "\n", __func__
 
+#include "util.h"
+#include "bset.h"
+
+#include <linux/console.h>
 #include <linux/random.h>
 #include <linux/prefetch.h>
 
+#ifdef CONFIG_BCACHE_DEBUG
+
+void bch_dump_bset(struct btree_keys *b, struct bset *i, unsigned set)
+{
+	struct bkey *k, *next;
+
+	for (k = i->start; k < bset_bkey_last(i); k = next) {
+		next = bkey_next(k);
+
+		printk(KERN_ERR "block %u key %zi/%u: ", set,
+		       (uint64_t *) k - i->d, i->keys);
+
+		if (b->ops->key_dump)
+			b->ops->key_dump(b, k);
+		else
+			printk("%llu:%llu\n", KEY_INODE(k), KEY_OFFSET(k));
+
+		if (next < bset_bkey_last(i) &&
+		    bkey_cmp(k, b->ops->is_extents ?
+			     &START_KEY(next) : next) > 0)
+			printk(KERN_ERR "Key skipped backwards\n");
+	}
+}
+
+void bch_dump_bucket(struct btree_keys *b)
+{
+	unsigned i;
+
+	console_lock();
+	for (i = 0; i <= b->nsets; i++)
+		bch_dump_bset(b, b->set[i].data,
+			      bset_sector_offset(b, b->set[i].data));
+	console_unlock();
+}
+
+int __bch_count_data(struct btree_keys *b)
+{
+	unsigned ret = 0;
+	struct btree_iter iter;
+	struct bkey *k;
+
+	if (b->ops->is_extents)
+		for_each_key(b, k, &iter)
+			ret += KEY_SIZE(k);
+	return ret;
+}
+
+void __bch_check_keys(struct btree_keys *b, const char *fmt, ...)
+{
+	va_list args;
+	struct bkey *k, *p = NULL;
+	struct btree_iter iter;
+	const char *err;
+
+	for_each_key(b, k, &iter) {
+		if (b->ops->is_extents) {
+			err = "Keys out of order";
+			if (p && bkey_cmp(&START_KEY(p), &START_KEY(k)) > 0)
+				goto bug;
+
+			if (bch_ptr_invalid(b, k))
+				continue;
+
+			err =  "Overlapping keys";
+			if (p && bkey_cmp(p, &START_KEY(k)) > 0)
+				goto bug;
+		} else {
+			if (bch_ptr_bad(b, k))
+				continue;
+
+			err = "Duplicate keys";
+			if (p && !bkey_cmp(p, k))
+				goto bug;
+		}
+		p = k;
+	}
+#if 0
+	err = "Key larger than btree node key";
+	if (p && bkey_cmp(p, &b->key) > 0)
+		goto bug;
+#endif
+	return;
+bug:
+	bch_dump_bucket(b);
+
+	va_start(args, fmt);
+	vprintk(fmt, args);
+	va_end(args);
+
+	panic("bch_check_keys error:  %s:\n", err);
+}
+
+static void bch_btree_iter_next_check(struct btree_iter *iter)
+{
+	struct bkey *k = iter->data->k, *next = bkey_next(k);
+
+	if (next < iter->data->end &&
+	    bkey_cmp(k, iter->b->ops->is_extents ?
+		     &START_KEY(next) : next) > 0) {
+		bch_dump_bucket(iter->b);
+		panic("Key skipped backwards\n");
+	}
+}
+
+#else
+
+static inline void bch_btree_iter_next_check(struct btree_iter *iter) {}
+
+#endif
+
 /* Keylists */
 
-int bch_keylist_realloc(struct keylist *l, int nptrs, struct cache_set *c)
+int __bch_keylist_realloc(struct keylist *l, unsigned u64s)
 {
 	size_t oldsize = bch_keylist_nkeys(l);
-	size_t newsize = oldsize + 2 + nptrs;
+	size_t newsize = oldsize + u64s;
 	uint64_t *old_keys = l->keys_p == l->inline_keys ? NULL : l->keys_p;
 	uint64_t *new_keys;
 
-	/* The journalling code doesn't handle the case where the keys to insert
-	 * is bigger than an empty write: If we just return -ENOMEM here,
-	 * bio_insert() and bio_invalidate() will insert the keys created so far
-	 * and finish the rest when the keylist is empty.
-	 */
-	if (newsize * sizeof(uint64_t) > block_bytes(c) - sizeof(struct jset))
-		return -ENOMEM;
-
 	newsize = roundup_pow_of_two(newsize);
 
 	if (newsize <= KEYLIST_INLINE ||
@@ -71,136 +175,6 @@
 		bch_keylist_bytes(l));
 }
 
-/* Pointer validation */
-
-static bool __ptr_invalid(struct cache_set *c, const struct bkey *k)
-{
-	unsigned i;
-
-	for (i = 0; i < KEY_PTRS(k); i++)
-		if (ptr_available(c, k, i)) {
-			struct cache *ca = PTR_CACHE(c, k, i);
-			size_t bucket = PTR_BUCKET_NR(c, k, i);
-			size_t r = bucket_remainder(c, PTR_OFFSET(k, i));
-
-			if (KEY_SIZE(k) + r > c->sb.bucket_size ||
-			    bucket <  ca->sb.first_bucket ||
-			    bucket >= ca->sb.nbuckets)
-				return true;
-		}
-
-	return false;
-}
-
-bool bch_btree_ptr_invalid(struct cache_set *c, const struct bkey *k)
-{
-	char buf[80];
-
-	if (!KEY_PTRS(k) || !KEY_SIZE(k) || KEY_DIRTY(k))
-		goto bad;
-
-	if (__ptr_invalid(c, k))
-		goto bad;
-
-	return false;
-bad:
-	bch_bkey_to_text(buf, sizeof(buf), k);
-	cache_bug(c, "spotted btree ptr %s: %s", buf, bch_ptr_status(c, k));
-	return true;
-}
-
-bool bch_extent_ptr_invalid(struct cache_set *c, const struct bkey *k)
-{
-	char buf[80];
-
-	if (!KEY_SIZE(k))
-		return true;
-
-	if (KEY_SIZE(k) > KEY_OFFSET(k))
-		goto bad;
-
-	if (__ptr_invalid(c, k))
-		goto bad;
-
-	return false;
-bad:
-	bch_bkey_to_text(buf, sizeof(buf), k);
-	cache_bug(c, "spotted extent %s: %s", buf, bch_ptr_status(c, k));
-	return true;
-}
-
-static bool ptr_bad_expensive_checks(struct btree *b, const struct bkey *k,
-				     unsigned ptr)
-{
-	struct bucket *g = PTR_BUCKET(b->c, k, ptr);
-	char buf[80];
-
-	if (mutex_trylock(&b->c->bucket_lock)) {
-		if (b->level) {
-			if (KEY_DIRTY(k) ||
-			    g->prio != BTREE_PRIO ||
-			    (b->c->gc_mark_valid &&
-			     GC_MARK(g) != GC_MARK_METADATA))
-				goto err;
-
-		} else {
-			if (g->prio == BTREE_PRIO)
-				goto err;
-
-			if (KEY_DIRTY(k) &&
-			    b->c->gc_mark_valid &&
-			    GC_MARK(g) != GC_MARK_DIRTY)
-				goto err;
-		}
-		mutex_unlock(&b->c->bucket_lock);
-	}
-
-	return false;
-err:
-	mutex_unlock(&b->c->bucket_lock);
-	bch_bkey_to_text(buf, sizeof(buf), k);
-	btree_bug(b,
-"inconsistent pointer %s: bucket %zu pin %i prio %i gen %i last_gc %i mark %llu gc_gen %i",
-		  buf, PTR_BUCKET_NR(b->c, k, ptr), atomic_read(&g->pin),
-		  g->prio, g->gen, g->last_gc, GC_MARK(g), g->gc_gen);
-	return true;
-}
-
-bool bch_ptr_bad(struct btree *b, const struct bkey *k)
-{
-	struct bucket *g;
-	unsigned i, stale;
-
-	if (!bkey_cmp(k, &ZERO_KEY) ||
-	    !KEY_PTRS(k) ||
-	    bch_ptr_invalid(b, k))
-		return true;
-
-	for (i = 0; i < KEY_PTRS(k); i++) {
-		if (!ptr_available(b->c, k, i))
-			return true;
-
-		g = PTR_BUCKET(b->c, k, i);
-		stale = ptr_stale(b->c, k, i);
-
-		btree_bug_on(stale > 96, b,
-			     "key too stale: %i, need_gc %u",
-			     stale, b->c->need_gc);
-
-		btree_bug_on(stale && KEY_DIRTY(k) && KEY_SIZE(k),
-			     b, "stale dirty pointer");
-
-		if (stale)
-			return true;
-
-		if (expensive_debug_checks(b->c) &&
-		    ptr_bad_expensive_checks(b, k, i))
-			return true;
-	}
-
-	return false;
-}
-
 /* Key/pointer manipulation */
 
 void bch_bkey_copy_single_ptr(struct bkey *dest, const struct bkey *src,
@@ -255,56 +229,138 @@
 	return true;
 }
 
-static uint64_t merge_chksums(struct bkey *l, struct bkey *r)
+/* Auxiliary search trees */
+
+/* 32 bits total: */
+#define BKEY_MID_BITS		3
+#define BKEY_EXPONENT_BITS	7
+#define BKEY_MANTISSA_BITS	(32 - BKEY_MID_BITS - BKEY_EXPONENT_BITS)
+#define BKEY_MANTISSA_MASK	((1 << BKEY_MANTISSA_BITS) - 1)
+
+struct bkey_float {
+	unsigned	exponent:BKEY_EXPONENT_BITS;
+	unsigned	m:BKEY_MID_BITS;
+	unsigned	mantissa:BKEY_MANTISSA_BITS;
+} __packed;
+
+/*
+ * BSET_CACHELINE was originally intended to match the hardware cacheline size -
+ * it used to be 64, but I realized the lookup code would touch slightly less
+ * memory if it was 128.
+ *
+ * It definites the number of bytes (in struct bset) per struct bkey_float in
+ * the auxiliar search tree - when we're done searching the bset_float tree we
+ * have this many bytes left that we do a linear search over.
+ *
+ * Since (after level 5) every level of the bset_tree is on a new cacheline,
+ * we're touching one fewer cacheline in the bset tree in exchange for one more
+ * cacheline in the linear search - but the linear search might stop before it
+ * gets to the second cacheline.
+ */
+
+#define BSET_CACHELINE		128
+
+/* Space required for the btree node keys */
+static inline size_t btree_keys_bytes(struct btree_keys *b)
 {
-	return (l->ptr[KEY_PTRS(l)] + r->ptr[KEY_PTRS(r)]) &
-		~((uint64_t)1 << 63);
+	return PAGE_SIZE << b->page_order;
 }
 
-/* Tries to merge l and r: l should be lower than r
- * Returns true if we were able to merge. If we did merge, l will be the merged
- * key, r will be untouched.
- */
-bool bch_bkey_try_merge(struct btree *b, struct bkey *l, struct bkey *r)
+static inline size_t btree_keys_cachelines(struct btree_keys *b)
+{
+	return btree_keys_bytes(b) / BSET_CACHELINE;
+}
+
+/* Space required for the auxiliary search trees */
+static inline size_t bset_tree_bytes(struct btree_keys *b)
+{
+	return btree_keys_cachelines(b) * sizeof(struct bkey_float);
+}
+
+/* Space required for the prev pointers */
+static inline size_t bset_prev_bytes(struct btree_keys *b)
+{
+	return btree_keys_cachelines(b) * sizeof(uint8_t);
+}
+
+/* Memory allocation */
+
+void bch_btree_keys_free(struct btree_keys *b)
+{
+	struct bset_tree *t = b->set;
+
+	if (bset_prev_bytes(b) < PAGE_SIZE)
+		kfree(t->prev);
+	else
+		free_pages((unsigned long) t->prev,
+			   get_order(bset_prev_bytes(b)));
+
+	if (bset_tree_bytes(b) < PAGE_SIZE)
+		kfree(t->tree);
+	else
+		free_pages((unsigned long) t->tree,
+			   get_order(bset_tree_bytes(b)));
+
+	free_pages((unsigned long) t->data, b->page_order);
+
+	t->prev = NULL;
+	t->tree = NULL;
+	t->data = NULL;
+}
+EXPORT_SYMBOL(bch_btree_keys_free);
+
+int bch_btree_keys_alloc(struct btree_keys *b, unsigned page_order, gfp_t gfp)
+{
+	struct bset_tree *t = b->set;
+
+	BUG_ON(t->data);
+
+	b->page_order = page_order;
+
+	t->data = (void *) __get_free_pages(gfp, b->page_order);
+	if (!t->data)
+		goto err;
+
+	t->tree = bset_tree_bytes(b) < PAGE_SIZE
+		? kmalloc(bset_tree_bytes(b), gfp)
+		: (void *) __get_free_pages(gfp, get_order(bset_tree_bytes(b)));
+	if (!t->tree)
+		goto err;
+
+	t->prev = bset_prev_bytes(b) < PAGE_SIZE
+		? kmalloc(bset_prev_bytes(b), gfp)
+		: (void *) __get_free_pages(gfp, get_order(bset_prev_bytes(b)));
+	if (!t->prev)
+		goto err;
+
+	return 0;
+err:
+	bch_btree_keys_free(b);
+	return -ENOMEM;
+}
+EXPORT_SYMBOL(bch_btree_keys_alloc);
+
+void bch_btree_keys_init(struct btree_keys *b, const struct btree_keys_ops *ops,
+			 bool *expensive_debug_checks)
 {
 	unsigned i;
 
-	if (key_merging_disabled(b->c))
-		return false;
+	b->ops = ops;
+	b->expensive_debug_checks = expensive_debug_checks;
+	b->nsets = 0;
+	b->last_set_unwritten = 0;
 
-	if (KEY_PTRS(l) != KEY_PTRS(r) ||
-	    KEY_DIRTY(l) != KEY_DIRTY(r) ||
-	    bkey_cmp(l, &START_KEY(r)))
-		return false;
-
-	for (i = 0; i < KEY_PTRS(l); i++)
-		if (l->ptr[i] + PTR(0, KEY_SIZE(l), 0) != r->ptr[i] ||
-		    PTR_BUCKET_NR(b->c, l, i) != PTR_BUCKET_NR(b->c, r, i))
-			return false;
-
-	/* Keys with no pointers aren't restricted to one bucket and could
-	 * overflow KEY_SIZE
+	/* XXX: shouldn't be needed */
+	for (i = 0; i < MAX_BSETS; i++)
+		b->set[i].size = 0;
+	/*
+	 * Second loop starts at 1 because b->keys[0]->data is the memory we
+	 * allocated
 	 */
-	if (KEY_SIZE(l) + KEY_SIZE(r) > USHRT_MAX) {
-		SET_KEY_OFFSET(l, KEY_OFFSET(l) + USHRT_MAX - KEY_SIZE(l));
-		SET_KEY_SIZE(l, USHRT_MAX);
-
-		bch_cut_front(l, r);
-		return false;
-	}
-
-	if (KEY_CSUM(l)) {
-		if (KEY_CSUM(r))
-			l->ptr[KEY_PTRS(l)] = merge_chksums(l, r);
-		else
-			SET_KEY_CSUM(l, 0);
-	}
-
-	SET_KEY_OFFSET(l, KEY_OFFSET(l) + KEY_SIZE(r));
-	SET_KEY_SIZE(l, KEY_SIZE(l) + KEY_SIZE(r));
-
-	return true;
+	for (i = 1; i < MAX_BSETS; i++)
+		b->set[i].data = NULL;
 }
+EXPORT_SYMBOL(bch_btree_keys_init);
 
 /* Binary tree stuff for auxiliary search trees */
 
@@ -455,9 +511,11 @@
 	return ((void *) k - (void *) t->data) / BSET_CACHELINE;
 }
 
-static unsigned bkey_to_cacheline_offset(struct bkey *k)
+static unsigned bkey_to_cacheline_offset(struct bset_tree *t,
+					 unsigned cacheline,
+					 struct bkey *k)
 {
-	return ((size_t) k & (BSET_CACHELINE - 1)) / sizeof(uint64_t);
+	return (u64 *) k - (u64 *) cacheline_to_bkey(t, cacheline, 0);
 }
 
 static struct bkey *tree_to_bkey(struct bset_tree *t, unsigned j)
@@ -504,7 +562,7 @@
 		: tree_to_prev_bkey(t, j >> ffs(j));
 
 	struct bkey *r = is_power_of_2(j + 1)
-		? node(t->data, t->data->keys - bkey_u64s(&t->end))
+		? bset_bkey_idx(t->data, t->data->keys - bkey_u64s(&t->end))
 		: tree_to_bkey(t, j >> (ffz(j) + 1));
 
 	BUG_ON(m < l || m > r);
@@ -528,9 +586,9 @@
 		f->exponent = 127;
 }
 
-static void bset_alloc_tree(struct btree *b, struct bset_tree *t)
+static void bset_alloc_tree(struct btree_keys *b, struct bset_tree *t)
 {
-	if (t != b->sets) {
+	if (t != b->set) {
 		unsigned j = roundup(t[-1].size,
 				     64 / sizeof(struct bkey_float));
 
@@ -538,33 +596,54 @@
 		t->prev = t[-1].prev + j;
 	}
 
-	while (t < b->sets + MAX_BSETS)
+	while (t < b->set + MAX_BSETS)
 		t++->size = 0;
 }
 
-static void bset_build_unwritten_tree(struct btree *b)
+static void bch_bset_build_unwritten_tree(struct btree_keys *b)
 {
-	struct bset_tree *t = b->sets + b->nsets;
+	struct bset_tree *t = bset_tree_last(b);
+
+	BUG_ON(b->last_set_unwritten);
+	b->last_set_unwritten = 1;
 
 	bset_alloc_tree(b, t);
 
-	if (t->tree != b->sets->tree + bset_tree_space(b)) {
-		t->prev[0] = bkey_to_cacheline_offset(t->data->start);
+	if (t->tree != b->set->tree + btree_keys_cachelines(b)) {
+		t->prev[0] = bkey_to_cacheline_offset(t, 0, t->data->start);
 		t->size = 1;
 	}
 }
 
-static void bset_build_written_tree(struct btree *b)
+void bch_bset_init_next(struct btree_keys *b, struct bset *i, uint64_t magic)
 {
-	struct bset_tree *t = b->sets + b->nsets;
-	struct bkey *k = t->data->start;
+	if (i != b->set->data) {
+		b->set[++b->nsets].data = i;
+		i->seq = b->set->data->seq;
+	} else
+		get_random_bytes(&i->seq, sizeof(uint64_t));
+
+	i->magic	= magic;
+	i->version	= 0;
+	i->keys		= 0;
+
+	bch_bset_build_unwritten_tree(b);
+}
+EXPORT_SYMBOL(bch_bset_init_next);
+
+void bch_bset_build_written_tree(struct btree_keys *b)
+{
+	struct bset_tree *t = bset_tree_last(b);
+	struct bkey *prev = NULL, *k = t->data->start;
 	unsigned j, cacheline = 1;
 
+	b->last_set_unwritten = 0;
+
 	bset_alloc_tree(b, t);
 
 	t->size = min_t(unsigned,
-			bkey_to_cacheline(t, end(t->data)),
-			b->sets->tree + bset_tree_space(b) - t->tree);
+			bkey_to_cacheline(t, bset_bkey_last(t->data)),
+			b->set->tree + btree_keys_cachelines(b) - t->tree);
 
 	if (t->size < 2) {
 		t->size = 0;
@@ -577,16 +656,14 @@
 	for (j = inorder_next(0, t->size);
 	     j;
 	     j = inorder_next(j, t->size)) {
-		while (bkey_to_cacheline(t, k) != cacheline)
-			k = bkey_next(k);
+		while (bkey_to_cacheline(t, k) < cacheline)
+			prev = k, k = bkey_next(k);
 
-		t->prev[j] = bkey_u64s(k);
-		k = bkey_next(k);
-		cacheline++;
-		t->tree[j].m = bkey_to_cacheline_offset(k);
+		t->prev[j] = bkey_u64s(prev);
+		t->tree[j].m = bkey_to_cacheline_offset(t, cacheline++, k);
 	}
 
-	while (bkey_next(k) != end(t->data))
+	while (bkey_next(k) != bset_bkey_last(t->data))
 		k = bkey_next(k);
 
 	t->end = *k;
@@ -597,14 +674,17 @@
 	     j = inorder_next(j, t->size))
 		make_bfloat(t, j);
 }
+EXPORT_SYMBOL(bch_bset_build_written_tree);
 
-void bch_bset_fix_invalidated_key(struct btree *b, struct bkey *k)
+/* Insert */
+
+void bch_bset_fix_invalidated_key(struct btree_keys *b, struct bkey *k)
 {
 	struct bset_tree *t;
 	unsigned inorder, j = 1;
 
-	for (t = b->sets; t <= &b->sets[b->nsets]; t++)
-		if (k < end(t->data))
+	for (t = b->set; t <= bset_tree_last(b); t++)
+		if (k < bset_bkey_last(t->data))
 			goto found_set;
 
 	BUG();
@@ -617,7 +697,7 @@
 	if (k == t->data->start)
 		goto fix_left;
 
-	if (bkey_next(k) == end(t->data)) {
+	if (bkey_next(k) == bset_bkey_last(t->data)) {
 		t->end = *k;
 		goto fix_right;
 	}
@@ -642,10 +722,12 @@
 			j = j * 2 + 1;
 		} while (j < t->size);
 }
+EXPORT_SYMBOL(bch_bset_fix_invalidated_key);
 
-void bch_bset_fix_lookup_table(struct btree *b, struct bkey *k)
+static void bch_bset_fix_lookup_table(struct btree_keys *b,
+				      struct bset_tree *t,
+				      struct bkey *k)
 {
-	struct bset_tree *t = &b->sets[b->nsets];
 	unsigned shift = bkey_u64s(k);
 	unsigned j = bkey_to_cacheline(t, k);
 
@@ -657,8 +739,8 @@
 	 * lookup table for the first key that is strictly greater than k:
 	 * it's either k's cacheline or the next one
 	 */
-	if (j < t->size &&
-	    table_to_bkey(t, j) <= k)
+	while (j < t->size &&
+	       table_to_bkey(t, j) <= k)
 		j++;
 
 	/* Adjust all the lookup table entries, and find a new key for any that
@@ -673,54 +755,124 @@
 			while (k < cacheline_to_bkey(t, j, 0))
 				k = bkey_next(k);
 
-			t->prev[j] = bkey_to_cacheline_offset(k);
+			t->prev[j] = bkey_to_cacheline_offset(t, j, k);
 		}
 	}
 
-	if (t->size == b->sets->tree + bset_tree_space(b) - t->tree)
+	if (t->size == b->set->tree + btree_keys_cachelines(b) - t->tree)
 		return;
 
 	/* Possibly add a new entry to the end of the lookup table */
 
 	for (k = table_to_bkey(t, t->size - 1);
-	     k != end(t->data);
+	     k != bset_bkey_last(t->data);
 	     k = bkey_next(k))
 		if (t->size == bkey_to_cacheline(t, k)) {
-			t->prev[t->size] = bkey_to_cacheline_offset(k);
+			t->prev[t->size] = bkey_to_cacheline_offset(t, t->size, k);
 			t->size++;
 		}
 }
 
-void bch_bset_init_next(struct btree *b)
+/*
+ * Tries to merge l and r: l should be lower than r
+ * Returns true if we were able to merge. If we did merge, l will be the merged
+ * key, r will be untouched.
+ */
+bool bch_bkey_try_merge(struct btree_keys *b, struct bkey *l, struct bkey *r)
 {
-	struct bset *i = write_block(b);
+	if (!b->ops->key_merge)
+		return false;
 
-	if (i != b->sets[0].data) {
-		b->sets[++b->nsets].data = i;
-		i->seq = b->sets[0].data->seq;
-	} else
-		get_random_bytes(&i->seq, sizeof(uint64_t));
+	/*
+	 * Generic header checks
+	 * Assumes left and right are in order
+	 * Left and right must be exactly aligned
+	 */
+	if (!bch_bkey_equal_header(l, r) ||
+	     bkey_cmp(l, &START_KEY(r)))
+		return false;
 
-	i->magic	= bset_magic(&b->c->sb);
-	i->version	= 0;
-	i->keys		= 0;
-
-	bset_build_unwritten_tree(b);
+	return b->ops->key_merge(b, l, r);
 }
+EXPORT_SYMBOL(bch_bkey_try_merge);
+
+void bch_bset_insert(struct btree_keys *b, struct bkey *where,
+		     struct bkey *insert)
+{
+	struct bset_tree *t = bset_tree_last(b);
+
+	BUG_ON(!b->last_set_unwritten);
+	BUG_ON(bset_byte_offset(b, t->data) +
+	       __set_bytes(t->data, t->data->keys + bkey_u64s(insert)) >
+	       PAGE_SIZE << b->page_order);
+
+	memmove((uint64_t *) where + bkey_u64s(insert),
+		where,
+		(void *) bset_bkey_last(t->data) - (void *) where);
+
+	t->data->keys += bkey_u64s(insert);
+	bkey_copy(where, insert);
+	bch_bset_fix_lookup_table(b, t, where);
+}
+EXPORT_SYMBOL(bch_bset_insert);
+
+unsigned bch_btree_insert_key(struct btree_keys *b, struct bkey *k,
+			      struct bkey *replace_key)
+{
+	unsigned status = BTREE_INSERT_STATUS_NO_INSERT;
+	struct bset *i = bset_tree_last(b)->data;
+	struct bkey *m, *prev = NULL;
+	struct btree_iter iter;
+
+	BUG_ON(b->ops->is_extents && !KEY_SIZE(k));
+
+	m = bch_btree_iter_init(b, &iter, b->ops->is_extents
+				? PRECEDING_KEY(&START_KEY(k))
+				: PRECEDING_KEY(k));
+
+	if (b->ops->insert_fixup(b, k, &iter, replace_key))
+		return status;
+
+	status = BTREE_INSERT_STATUS_INSERT;
+
+	while (m != bset_bkey_last(i) &&
+	       bkey_cmp(k, b->ops->is_extents ? &START_KEY(m) : m) > 0)
+		prev = m, m = bkey_next(m);
+
+	/* prev is in the tree, if we merge we're done */
+	status = BTREE_INSERT_STATUS_BACK_MERGE;
+	if (prev &&
+	    bch_bkey_try_merge(b, prev, k))
+		goto merged;
+#if 0
+	status = BTREE_INSERT_STATUS_OVERWROTE;
+	if (m != bset_bkey_last(i) &&
+	    KEY_PTRS(m) == KEY_PTRS(k) && !KEY_SIZE(m))
+		goto copy;
+#endif
+	status = BTREE_INSERT_STATUS_FRONT_MERGE;
+	if (m != bset_bkey_last(i) &&
+	    bch_bkey_try_merge(b, k, m))
+		goto copy;
+
+	bch_bset_insert(b, m, k);
+copy:	bkey_copy(m, k);
+merged:
+	return status;
+}
+EXPORT_SYMBOL(bch_btree_insert_key);
+
+/* Lookup */
 
 struct bset_search_iter {
 	struct bkey *l, *r;
 };
 
-static struct bset_search_iter bset_search_write_set(struct btree *b,
-						     struct bset_tree *t,
+static struct bset_search_iter bset_search_write_set(struct bset_tree *t,
 						     const struct bkey *search)
 {
 	unsigned li = 0, ri = t->size;
 
-	BUG_ON(!b->nsets &&
-	       t->size < bkey_to_cacheline(t, end(t->data)));
-
 	while (li + 1 != ri) {
 		unsigned m = (li + ri) >> 1;
 
@@ -732,12 +884,11 @@
 
 	return (struct bset_search_iter) {
 		table_to_bkey(t, li),
-		ri < t->size ? table_to_bkey(t, ri) : end(t->data)
+		ri < t->size ? table_to_bkey(t, ri) : bset_bkey_last(t->data)
 	};
 }
 
-static struct bset_search_iter bset_search_tree(struct btree *b,
-						struct bset_tree *t,
+static struct bset_search_iter bset_search_tree(struct bset_tree *t,
 						const struct bkey *search)
 {
 	struct bkey *l, *r;
@@ -784,7 +935,7 @@
 			f = &t->tree[inorder_next(j, t->size)];
 			r = cacheline_to_bkey(t, inorder, f->m);
 		} else
-			r = end(t->data);
+			r = bset_bkey_last(t->data);
 	} else {
 		r = cacheline_to_bkey(t, inorder, f->m);
 
@@ -798,7 +949,7 @@
 	return (struct bset_search_iter) {l, r};
 }
 
-struct bkey *__bch_bset_search(struct btree *b, struct bset_tree *t,
+struct bkey *__bch_bset_search(struct btree_keys *b, struct bset_tree *t,
 			       const struct bkey *search)
 {
 	struct bset_search_iter i;
@@ -820,7 +971,7 @@
 
 	if (unlikely(!t->size)) {
 		i.l = t->data->start;
-		i.r = end(t->data);
+		i.r = bset_bkey_last(t->data);
 	} else if (bset_written(b, t)) {
 		/*
 		 * Each node in the auxiliary search tree covers a certain range
@@ -830,23 +981,27 @@
 		 */
 
 		if (unlikely(bkey_cmp(search, &t->end) >= 0))
-			return end(t->data);
+			return bset_bkey_last(t->data);
 
 		if (unlikely(bkey_cmp(search, t->data->start) < 0))
 			return t->data->start;
 
-		i = bset_search_tree(b, t, search);
-	} else
-		i = bset_search_write_set(b, t, search);
+		i = bset_search_tree(t, search);
+	} else {
+		BUG_ON(!b->nsets &&
+		       t->size < bkey_to_cacheline(t, bset_bkey_last(t->data)));
 
-	if (expensive_debug_checks(b->c)) {
+		i = bset_search_write_set(t, search);
+	}
+
+	if (btree_keys_expensive_checks(b)) {
 		BUG_ON(bset_written(b, t) &&
 		       i.l != t->data->start &&
 		       bkey_cmp(tree_to_prev_bkey(t,
 			  inorder_to_tree(bkey_to_cacheline(t, i.l), t)),
 				search) > 0);
 
-		BUG_ON(i.r != end(t->data) &&
+		BUG_ON(i.r != bset_bkey_last(t->data) &&
 		       bkey_cmp(i.r, search) <= 0);
 	}
 
@@ -856,22 +1011,17 @@
 
 	return i.l;
 }
+EXPORT_SYMBOL(__bch_bset_search);
 
 /* Btree iterator */
 
-/*
- * Returns true if l > r - unless l == r, in which case returns true if l is
- * older than r.
- *
- * Necessary for btree_sort_fixup() - if there are multiple keys that compare
- * equal in different sets, we have to process them newest to oldest.
- */
+typedef bool (btree_iter_cmp_fn)(struct btree_iter_set,
+				 struct btree_iter_set);
+
 static inline bool btree_iter_cmp(struct btree_iter_set l,
 				  struct btree_iter_set r)
 {
-	int64_t c = bkey_cmp(&START_KEY(l.k), &START_KEY(r.k));
-
-	return c ? c > 0 : l.k < r.k;
+	return bkey_cmp(l.k, r.k) > 0;
 }
 
 static inline bool btree_iter_end(struct btree_iter *iter)
@@ -888,8 +1038,10 @@
 				 btree_iter_cmp));
 }
 
-struct bkey *__bch_btree_iter_init(struct btree *b, struct btree_iter *iter,
-				   struct bkey *search, struct bset_tree *start)
+static struct bkey *__bch_btree_iter_init(struct btree_keys *b,
+					  struct btree_iter *iter,
+					  struct bkey *search,
+					  struct bset_tree *start)
 {
 	struct bkey *ret = NULL;
 	iter->size = ARRAY_SIZE(iter->data);
@@ -899,15 +1051,24 @@
 	iter->b = b;
 #endif
 
-	for (; start <= &b->sets[b->nsets]; start++) {
+	for (; start <= bset_tree_last(b); start++) {
 		ret = bch_bset_search(b, start, search);
-		bch_btree_iter_push(iter, ret, end(start->data));
+		bch_btree_iter_push(iter, ret, bset_bkey_last(start->data));
 	}
 
 	return ret;
 }
 
-struct bkey *bch_btree_iter_next(struct btree_iter *iter)
+struct bkey *bch_btree_iter_init(struct btree_keys *b,
+				 struct btree_iter *iter,
+				 struct bkey *search)
+{
+	return __bch_btree_iter_init(b, iter, search, b->set);
+}
+EXPORT_SYMBOL(bch_btree_iter_init);
+
+static inline struct bkey *__bch_btree_iter_next(struct btree_iter *iter,
+						 btree_iter_cmp_fn *cmp)
 {
 	struct btree_iter_set unused;
 	struct bkey *ret = NULL;
@@ -924,16 +1085,23 @@
 		}
 
 		if (iter->data->k == iter->data->end)
-			heap_pop(iter, unused, btree_iter_cmp);
+			heap_pop(iter, unused, cmp);
 		else
-			heap_sift(iter, 0, btree_iter_cmp);
+			heap_sift(iter, 0, cmp);
 	}
 
 	return ret;
 }
 
+struct bkey *bch_btree_iter_next(struct btree_iter *iter)
+{
+	return __bch_btree_iter_next(iter, btree_iter_cmp);
+
+}
+EXPORT_SYMBOL(bch_btree_iter_next);
+
 struct bkey *bch_btree_iter_next_filter(struct btree_iter *iter,
-					struct btree *b, ptr_filter_fn fn)
+					struct btree_keys *b, ptr_filter_fn fn)
 {
 	struct bkey *ret;
 
@@ -946,70 +1114,58 @@
 
 /* Mergesort */
 
-static void sort_key_next(struct btree_iter *iter,
-			  struct btree_iter_set *i)
+void bch_bset_sort_state_free(struct bset_sort_state *state)
 {
-	i->k = bkey_next(i->k);
-
-	if (i->k == i->end)
-		*i = iter->data[--iter->used];
+	if (state->pool)
+		mempool_destroy(state->pool);
 }
 
-static void btree_sort_fixup(struct btree_iter *iter)
+int bch_bset_sort_state_init(struct bset_sort_state *state, unsigned page_order)
 {
-	while (iter->used > 1) {
-		struct btree_iter_set *top = iter->data, *i = top + 1;
+	spin_lock_init(&state->time.lock);
 
-		if (iter->used > 2 &&
-		    btree_iter_cmp(i[0], i[1]))
-			i++;
+	state->page_order = page_order;
+	state->crit_factor = int_sqrt(1 << page_order);
 
-		if (bkey_cmp(top->k, &START_KEY(i->k)) <= 0)
-			break;
+	state->pool = mempool_create_page_pool(1, page_order);
+	if (!state->pool)
+		return -ENOMEM;
 
-		if (!KEY_SIZE(i->k)) {
-			sort_key_next(iter, i);
-			heap_sift(iter, i - top, btree_iter_cmp);
-			continue;
-		}
-
-		if (top->k > i->k) {
-			if (bkey_cmp(top->k, i->k) >= 0)
-				sort_key_next(iter, i);
-			else
-				bch_cut_front(top->k, i->k);
-
-			heap_sift(iter, i - top, btree_iter_cmp);
-		} else {
-			/* can't happen because of comparison func */
-			BUG_ON(!bkey_cmp(&START_KEY(top->k), &START_KEY(i->k)));
-			bch_cut_back(&START_KEY(i->k), top->k);
-		}
-	}
+	return 0;
 }
+EXPORT_SYMBOL(bch_bset_sort_state_init);
 
-static void btree_mergesort(struct btree *b, struct bset *out,
+static void btree_mergesort(struct btree_keys *b, struct bset *out,
 			    struct btree_iter *iter,
 			    bool fixup, bool remove_stale)
 {
+	int i;
 	struct bkey *k, *last = NULL;
-	bool (*bad)(struct btree *, const struct bkey *) = remove_stale
+	BKEY_PADDED(k) tmp;
+	bool (*bad)(struct btree_keys *, const struct bkey *) = remove_stale
 		? bch_ptr_bad
 		: bch_ptr_invalid;
 
-	while (!btree_iter_end(iter)) {
-		if (fixup && !b->level)
-			btree_sort_fixup(iter);
+	/* Heapify the iterator, using our comparison function */
+	for (i = iter->used / 2 - 1; i >= 0; --i)
+		heap_sift(iter, i, b->ops->sort_cmp);
 
-		k = bch_btree_iter_next(iter);
+	while (!btree_iter_end(iter)) {
+		if (b->ops->sort_fixup && fixup)
+			k = b->ops->sort_fixup(iter, &tmp.k);
+		else
+			k = NULL;
+
+		if (!k)
+			k = __bch_btree_iter_next(iter, b->ops->sort_cmp);
+
 		if (bad(b, k))
 			continue;
 
 		if (!last) {
 			last = out->start;
 			bkey_copy(last, k);
-		} else if (b->level ||
-			   !bch_bkey_try_merge(b, last, k)) {
+		} else if (!bch_bkey_try_merge(b, last, k)) {
 			last = bkey_next(last);
 			bkey_copy(last, k);
 		}
@@ -1020,27 +1176,27 @@
 	pr_debug("sorted %i keys", out->keys);
 }
 
-static void __btree_sort(struct btree *b, struct btree_iter *iter,
-			 unsigned start, unsigned order, bool fixup)
+static void __btree_sort(struct btree_keys *b, struct btree_iter *iter,
+			 unsigned start, unsigned order, bool fixup,
+			 struct bset_sort_state *state)
 {
 	uint64_t start_time;
-	bool remove_stale = !b->written;
+	bool used_mempool = false;
 	struct bset *out = (void *) __get_free_pages(__GFP_NOWARN|GFP_NOIO,
 						     order);
 	if (!out) {
-		mutex_lock(&b->c->sort_lock);
-		out = b->c->sort;
-		order = ilog2(bucket_pages(b->c));
+		BUG_ON(order > state->page_order);
+
+		out = page_address(mempool_alloc(state->pool, GFP_NOIO));
+		used_mempool = true;
+		order = state->page_order;
 	}
 
 	start_time = local_clock();
 
-	btree_mergesort(b, out, iter, fixup, remove_stale);
+	btree_mergesort(b, out, iter, fixup, false);
 	b->nsets = start;
 
-	if (!fixup && !start && b->written)
-		bch_btree_verify(b, out);
-
 	if (!start && order == b->page_order) {
 		/*
 		 * Our temporary buffer is the same size as the btree node's
@@ -1048,84 +1204,76 @@
 		 * memcpy()
 		 */
 
-		out->magic	= bset_magic(&b->c->sb);
-		out->seq	= b->sets[0].data->seq;
-		out->version	= b->sets[0].data->version;
-		swap(out, b->sets[0].data);
-
-		if (b->c->sort == b->sets[0].data)
-			b->c->sort = out;
+		out->magic	= b->set->data->magic;
+		out->seq	= b->set->data->seq;
+		out->version	= b->set->data->version;
+		swap(out, b->set->data);
 	} else {
-		b->sets[start].data->keys = out->keys;
-		memcpy(b->sets[start].data->start, out->start,
-		       (void *) end(out) - (void *) out->start);
+		b->set[start].data->keys = out->keys;
+		memcpy(b->set[start].data->start, out->start,
+		       (void *) bset_bkey_last(out) - (void *) out->start);
 	}
 
-	if (out == b->c->sort)
-		mutex_unlock(&b->c->sort_lock);
+	if (used_mempool)
+		mempool_free(virt_to_page(out), state->pool);
 	else
 		free_pages((unsigned long) out, order);
 
-	if (b->written)
-		bset_build_written_tree(b);
+	bch_bset_build_written_tree(b);
 
 	if (!start)
-		bch_time_stats_update(&b->c->sort_time, start_time);
+		bch_time_stats_update(&state->time, start_time);
 }
 
-void bch_btree_sort_partial(struct btree *b, unsigned start)
+void bch_btree_sort_partial(struct btree_keys *b, unsigned start,
+			    struct bset_sort_state *state)
 {
 	size_t order = b->page_order, keys = 0;
 	struct btree_iter iter;
 	int oldsize = bch_count_data(b);
 
-	__bch_btree_iter_init(b, &iter, NULL, &b->sets[start]);
-
-	BUG_ON(b->sets[b->nsets].data == write_block(b) &&
-	       (b->sets[b->nsets].size || b->nsets));
-
+	__bch_btree_iter_init(b, &iter, NULL, &b->set[start]);
 
 	if (start) {
 		unsigned i;
 
 		for (i = start; i <= b->nsets; i++)
-			keys += b->sets[i].data->keys;
+			keys += b->set[i].data->keys;
 
-		order = roundup_pow_of_two(__set_bytes(b->sets->data,
-						       keys)) / PAGE_SIZE;
-		if (order)
-			order = ilog2(order);
+		order = get_order(__set_bytes(b->set->data, keys));
 	}
 
-	__btree_sort(b, &iter, start, order, false);
+	__btree_sort(b, &iter, start, order, false, state);
 
-	EBUG_ON(b->written && oldsize >= 0 && bch_count_data(b) != oldsize);
+	EBUG_ON(oldsize >= 0 && bch_count_data(b) != oldsize);
 }
+EXPORT_SYMBOL(bch_btree_sort_partial);
 
-void bch_btree_sort_and_fix_extents(struct btree *b, struct btree_iter *iter)
+void bch_btree_sort_and_fix_extents(struct btree_keys *b,
+				    struct btree_iter *iter,
+				    struct bset_sort_state *state)
 {
-	BUG_ON(!b->written);
-	__btree_sort(b, iter, 0, b->page_order, true);
+	__btree_sort(b, iter, 0, b->page_order, true, state);
 }
 
-void bch_btree_sort_into(struct btree *b, struct btree *new)
+void bch_btree_sort_into(struct btree_keys *b, struct btree_keys *new,
+			 struct bset_sort_state *state)
 {
 	uint64_t start_time = local_clock();
 
 	struct btree_iter iter;
 	bch_btree_iter_init(b, &iter, NULL);
 
-	btree_mergesort(b, new->sets->data, &iter, false, true);
+	btree_mergesort(b, new->set->data, &iter, false, true);
 
-	bch_time_stats_update(&b->c->sort_time, start_time);
+	bch_time_stats_update(&state->time, start_time);
 
-	bkey_copy_key(&new->key, &b->key);
-	new->sets->size = 0;
+	new->set->size = 0; // XXX: why?
 }
 
 #define SORT_CRIT	(4096 / sizeof(uint64_t))
 
-void bch_btree_sort_lazy(struct btree *b)
+void bch_btree_sort_lazy(struct btree_keys *b, struct bset_sort_state *state)
 {
 	unsigned crit = SORT_CRIT;
 	int i;
@@ -1134,50 +1282,32 @@
 	if (!b->nsets)
 		goto out;
 
-	/* If not a leaf node, always sort */
-	if (b->level) {
-		bch_btree_sort(b);
-		return;
-	}
-
 	for (i = b->nsets - 1; i >= 0; --i) {
-		crit *= b->c->sort_crit_factor;
+		crit *= state->crit_factor;
 
-		if (b->sets[i].data->keys < crit) {
-			bch_btree_sort_partial(b, i);
+		if (b->set[i].data->keys < crit) {
+			bch_btree_sort_partial(b, i, state);
 			return;
 		}
 	}
 
 	/* Sort if we'd overflow */
 	if (b->nsets + 1 == MAX_BSETS) {
-		bch_btree_sort(b);
+		bch_btree_sort(b, state);
 		return;
 	}
 
 out:
-	bset_build_written_tree(b);
+	bch_bset_build_written_tree(b);
 }
+EXPORT_SYMBOL(bch_btree_sort_lazy);
 
-/* Sysfs stuff */
-
-struct bset_stats {
-	struct btree_op op;
-	size_t nodes;
-	size_t sets_written, sets_unwritten;
-	size_t bytes_written, bytes_unwritten;
-	size_t floats, failed;
-};
-
-static int btree_bset_stats(struct btree_op *op, struct btree *b)
+void bch_btree_keys_stats(struct btree_keys *b, struct bset_stats *stats)
 {
-	struct bset_stats *stats = container_of(op, struct bset_stats, op);
 	unsigned i;
 
-	stats->nodes++;
-
 	for (i = 0; i <= b->nsets; i++) {
-		struct bset_tree *t = &b->sets[i];
+		struct bset_tree *t = &b->set[i];
 		size_t bytes = t->data->keys * sizeof(uint64_t);
 		size_t j;
 
@@ -1195,32 +1325,4 @@
 			stats->bytes_unwritten += bytes;
 		}
 	}
-
-	return MAP_CONTINUE;
-}
-
-int bch_bset_print_stats(struct cache_set *c, char *buf)
-{
-	struct bset_stats t;
-	int ret;
-
-	memset(&t, 0, sizeof(struct bset_stats));
-	bch_btree_op_init(&t.op, -1);
-
-	ret = bch_btree_map_nodes(&t.op, c, &ZERO_KEY, btree_bset_stats);
-	if (ret < 0)
-		return ret;
-
-	return snprintf(buf, PAGE_SIZE,
-			"btree nodes:		%zu\n"
-			"written sets:		%zu\n"
-			"unwritten sets:		%zu\n"
-			"written key bytes:	%zu\n"
-			"unwritten key bytes:	%zu\n"
-			"floats:			%zu\n"
-			"failed:			%zu\n",
-			t.nodes,
-			t.sets_written, t.sets_unwritten,
-			t.bytes_written, t.bytes_unwritten,
-			t.floats, t.failed);
 }
diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h
index 1d3c24f..003260f 100644
--- a/drivers/md/bcache/bset.h
+++ b/drivers/md/bcache/bset.h
@@ -1,7 +1,11 @@
 #ifndef _BCACHE_BSET_H
 #define _BCACHE_BSET_H
 
-#include <linux/slab.h>
+#include <linux/bcache.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+
+#include "util.h" /* for time_stats */
 
 /*
  * BKEYS:
@@ -142,20 +146,13 @@
  * first key in that range of bytes again.
  */
 
-/* Btree key comparison/iteration */
+struct btree_keys;
+struct btree_iter;
+struct btree_iter_set;
+struct bkey_float;
 
 #define MAX_BSETS		4U
 
-struct btree_iter {
-	size_t size, used;
-#ifdef CONFIG_BCACHE_DEBUG
-	struct btree *b;
-#endif
-	struct btree_iter_set {
-		struct bkey *k, *end;
-	} data[MAX_BSETS];
-};
-
 struct bset_tree {
 	/*
 	 * We construct a binary tree in an array as if the array
@@ -165,14 +162,14 @@
 	 */
 
 	/* size of the binary tree and prev array */
-	unsigned	size;
+	unsigned		size;
 
 	/* function of size - precalculated for to_inorder() */
-	unsigned	extra;
+	unsigned		extra;
 
 	/* copy of the last key in the set */
-	struct bkey	end;
-	struct bkey_float *tree;
+	struct bkey		end;
+	struct bkey_float	*tree;
 
 	/*
 	 * The nodes in the bset tree point to specific keys - this
@@ -182,12 +179,219 @@
 	 * to keep bkey_float to 4 bytes and prev isn't used in the fast
 	 * path.
 	 */
-	uint8_t		*prev;
+	uint8_t			*prev;
 
 	/* The actual btree node, with pointers to each sorted set */
-	struct bset	*data;
+	struct bset		*data;
 };
 
+struct btree_keys_ops {
+	bool		(*sort_cmp)(struct btree_iter_set,
+				    struct btree_iter_set);
+	struct bkey	*(*sort_fixup)(struct btree_iter *, struct bkey *);
+	bool		(*insert_fixup)(struct btree_keys *, struct bkey *,
+					struct btree_iter *, struct bkey *);
+	bool		(*key_invalid)(struct btree_keys *,
+				       const struct bkey *);
+	bool		(*key_bad)(struct btree_keys *, const struct bkey *);
+	bool		(*key_merge)(struct btree_keys *,
+				     struct bkey *, struct bkey *);
+	void		(*key_to_text)(char *, size_t, const struct bkey *);
+	void		(*key_dump)(struct btree_keys *, const struct bkey *);
+
+	/*
+	 * Only used for deciding whether to use START_KEY(k) or just the key
+	 * itself in a couple places
+	 */
+	bool		is_extents;
+};
+
+struct btree_keys {
+	const struct btree_keys_ops	*ops;
+	uint8_t			page_order;
+	uint8_t			nsets;
+	unsigned		last_set_unwritten:1;
+	bool			*expensive_debug_checks;
+
+	/*
+	 * Sets of sorted keys - the real btree node - plus a binary search tree
+	 *
+	 * set[0] is special; set[0]->tree, set[0]->prev and set[0]->data point
+	 * to the memory we have allocated for this btree node. Additionally,
+	 * set[0]->data points to the entire btree node as it exists on disk.
+	 */
+	struct bset_tree	set[MAX_BSETS];
+};
+
+static inline struct bset_tree *bset_tree_last(struct btree_keys *b)
+{
+	return b->set + b->nsets;
+}
+
+static inline bool bset_written(struct btree_keys *b, struct bset_tree *t)
+{
+	return t <= b->set + b->nsets - b->last_set_unwritten;
+}
+
+static inline bool bkey_written(struct btree_keys *b, struct bkey *k)
+{
+	return !b->last_set_unwritten || k < b->set[b->nsets].data->start;
+}
+
+static inline unsigned bset_byte_offset(struct btree_keys *b, struct bset *i)
+{
+	return ((size_t) i) - ((size_t) b->set->data);
+}
+
+static inline unsigned bset_sector_offset(struct btree_keys *b, struct bset *i)
+{
+	return bset_byte_offset(b, i) >> 9;
+}
+
+#define __set_bytes(i, k)	(sizeof(*(i)) + (k) * sizeof(uint64_t))
+#define set_bytes(i)		__set_bytes(i, i->keys)
+
+#define __set_blocks(i, k, block_bytes)				\
+	DIV_ROUND_UP(__set_bytes(i, k), block_bytes)
+#define set_blocks(i, block_bytes)				\
+	__set_blocks(i, (i)->keys, block_bytes)
+
+static inline size_t bch_btree_keys_u64s_remaining(struct btree_keys *b)
+{
+	struct bset_tree *t = bset_tree_last(b);
+
+	BUG_ON((PAGE_SIZE << b->page_order) <
+	       (bset_byte_offset(b, t->data) + set_bytes(t->data)));
+
+	if (!b->last_set_unwritten)
+		return 0;
+
+	return ((PAGE_SIZE << b->page_order) -
+		(bset_byte_offset(b, t->data) + set_bytes(t->data))) /
+		sizeof(u64);
+}
+
+static inline struct bset *bset_next_set(struct btree_keys *b,
+					 unsigned block_bytes)
+{
+	struct bset *i = bset_tree_last(b)->data;
+
+	return ((void *) i) + roundup(set_bytes(i), block_bytes);
+}
+
+void bch_btree_keys_free(struct btree_keys *);
+int bch_btree_keys_alloc(struct btree_keys *, unsigned, gfp_t);
+void bch_btree_keys_init(struct btree_keys *, const struct btree_keys_ops *,
+			 bool *);
+
+void bch_bset_init_next(struct btree_keys *, struct bset *, uint64_t);
+void bch_bset_build_written_tree(struct btree_keys *);
+void bch_bset_fix_invalidated_key(struct btree_keys *, struct bkey *);
+bool bch_bkey_try_merge(struct btree_keys *, struct bkey *, struct bkey *);
+void bch_bset_insert(struct btree_keys *, struct bkey *, struct bkey *);
+unsigned bch_btree_insert_key(struct btree_keys *, struct bkey *,
+			      struct bkey *);
+
+enum {
+	BTREE_INSERT_STATUS_NO_INSERT = 0,
+	BTREE_INSERT_STATUS_INSERT,
+	BTREE_INSERT_STATUS_BACK_MERGE,
+	BTREE_INSERT_STATUS_OVERWROTE,
+	BTREE_INSERT_STATUS_FRONT_MERGE,
+};
+
+/* Btree key iteration */
+
+struct btree_iter {
+	size_t size, used;
+#ifdef CONFIG_BCACHE_DEBUG
+	struct btree_keys *b;
+#endif
+	struct btree_iter_set {
+		struct bkey *k, *end;
+	} data[MAX_BSETS];
+};
+
+typedef bool (*ptr_filter_fn)(struct btree_keys *, const struct bkey *);
+
+struct bkey *bch_btree_iter_next(struct btree_iter *);
+struct bkey *bch_btree_iter_next_filter(struct btree_iter *,
+					struct btree_keys *, ptr_filter_fn);
+
+void bch_btree_iter_push(struct btree_iter *, struct bkey *, struct bkey *);
+struct bkey *bch_btree_iter_init(struct btree_keys *, struct btree_iter *,
+				 struct bkey *);
+
+struct bkey *__bch_bset_search(struct btree_keys *, struct bset_tree *,
+			       const struct bkey *);
+
+/*
+ * Returns the first key that is strictly greater than search
+ */
+static inline struct bkey *bch_bset_search(struct btree_keys *b,
+					   struct bset_tree *t,
+					   const struct bkey *search)
+{
+	return search ? __bch_bset_search(b, t, search) : t->data->start;
+}
+
+#define for_each_key_filter(b, k, iter, filter)				\
+	for (bch_btree_iter_init((b), (iter), NULL);			\
+	     ((k) = bch_btree_iter_next_filter((iter), (b), filter));)
+
+#define for_each_key(b, k, iter)					\
+	for (bch_btree_iter_init((b), (iter), NULL);			\
+	     ((k) = bch_btree_iter_next(iter));)
+
+/* Sorting */
+
+struct bset_sort_state {
+	mempool_t		*pool;
+
+	unsigned		page_order;
+	unsigned		crit_factor;
+
+	struct time_stats	time;
+};
+
+void bch_bset_sort_state_free(struct bset_sort_state *);
+int bch_bset_sort_state_init(struct bset_sort_state *, unsigned);
+void bch_btree_sort_lazy(struct btree_keys *, struct bset_sort_state *);
+void bch_btree_sort_into(struct btree_keys *, struct btree_keys *,
+			 struct bset_sort_state *);
+void bch_btree_sort_and_fix_extents(struct btree_keys *, struct btree_iter *,
+				    struct bset_sort_state *);
+void bch_btree_sort_partial(struct btree_keys *, unsigned,
+			    struct bset_sort_state *);
+
+static inline void bch_btree_sort(struct btree_keys *b,
+				  struct bset_sort_state *state)
+{
+	bch_btree_sort_partial(b, 0, state);
+}
+
+struct bset_stats {
+	size_t sets_written, sets_unwritten;
+	size_t bytes_written, bytes_unwritten;
+	size_t floats, failed;
+};
+
+void bch_btree_keys_stats(struct btree_keys *, struct bset_stats *);
+
+/* Bkey utility code */
+
+#define bset_bkey_last(i)	bkey_idx((struct bkey *) (i)->d, (i)->keys)
+
+static inline struct bkey *bset_bkey_idx(struct bset *i, unsigned idx)
+{
+	return bkey_idx(i->start, idx);
+}
+
+static inline void bkey_init(struct bkey *k)
+{
+	*k = ZERO_KEY;
+}
+
 static __always_inline int64_t bkey_cmp(const struct bkey *l,
 					const struct bkey *r)
 {
@@ -196,6 +400,62 @@
 		: (int64_t) KEY_OFFSET(l) - (int64_t) KEY_OFFSET(r);
 }
 
+void bch_bkey_copy_single_ptr(struct bkey *, const struct bkey *,
+			      unsigned);
+bool __bch_cut_front(const struct bkey *, struct bkey *);
+bool __bch_cut_back(const struct bkey *, struct bkey *);
+
+static inline bool bch_cut_front(const struct bkey *where, struct bkey *k)
+{
+	BUG_ON(bkey_cmp(where, k) > 0);
+	return __bch_cut_front(where, k);
+}
+
+static inline bool bch_cut_back(const struct bkey *where, struct bkey *k)
+{
+	BUG_ON(bkey_cmp(where, &START_KEY(k)) < 0);
+	return __bch_cut_back(where, k);
+}
+
+#define PRECEDING_KEY(_k)					\
+({								\
+	struct bkey *_ret = NULL;				\
+								\
+	if (KEY_INODE(_k) || KEY_OFFSET(_k)) {			\
+		_ret = &KEY(KEY_INODE(_k), KEY_OFFSET(_k), 0);	\
+								\
+		if (!_ret->low)					\
+			_ret->high--;				\
+		_ret->low--;					\
+	}							\
+								\
+	_ret;							\
+})
+
+static inline bool bch_ptr_invalid(struct btree_keys *b, const struct bkey *k)
+{
+	return b->ops->key_invalid(b, k);
+}
+
+static inline bool bch_ptr_bad(struct btree_keys *b, const struct bkey *k)
+{
+	return b->ops->key_bad(b, k);
+}
+
+static inline void bch_bkey_to_text(struct btree_keys *b, char *buf,
+				    size_t size, const struct bkey *k)
+{
+	return b->ops->key_to_text(buf, size, k);
+}
+
+static inline bool bch_bkey_equal_header(const struct bkey *l,
+					 const struct bkey *r)
+{
+	return (KEY_DIRTY(l) == KEY_DIRTY(r) &&
+		KEY_PTRS(l) == KEY_PTRS(r) &&
+		KEY_CSUM(l) == KEY_CSUM(l));
+}
+
 /* Keylists */
 
 struct keylist {
@@ -257,136 +517,44 @@
 
 struct bkey *bch_keylist_pop(struct keylist *);
 void bch_keylist_pop_front(struct keylist *);
-int bch_keylist_realloc(struct keylist *, int, struct cache_set *);
+int __bch_keylist_realloc(struct keylist *, unsigned);
 
-void bch_bkey_copy_single_ptr(struct bkey *, const struct bkey *,
-			      unsigned);
-bool __bch_cut_front(const struct bkey *, struct bkey *);
-bool __bch_cut_back(const struct bkey *, struct bkey *);
+/* Debug stuff */
 
-static inline bool bch_cut_front(const struct bkey *where, struct bkey *k)
+#ifdef CONFIG_BCACHE_DEBUG
+
+int __bch_count_data(struct btree_keys *);
+void __bch_check_keys(struct btree_keys *, const char *, ...);
+void bch_dump_bset(struct btree_keys *, struct bset *, unsigned);
+void bch_dump_bucket(struct btree_keys *);
+
+#else
+
+static inline int __bch_count_data(struct btree_keys *b) { return -1; }
+static inline void __bch_check_keys(struct btree_keys *b, const char *fmt, ...) {}
+static inline void bch_dump_bucket(struct btree_keys *b) {}
+void bch_dump_bset(struct btree_keys *, struct bset *, unsigned);
+
+#endif
+
+static inline bool btree_keys_expensive_checks(struct btree_keys *b)
 {
-	BUG_ON(bkey_cmp(where, k) > 0);
-	return __bch_cut_front(where, k);
+#ifdef CONFIG_BCACHE_DEBUG
+	return *b->expensive_debug_checks;
+#else
+	return false;
+#endif
 }
 
-static inline bool bch_cut_back(const struct bkey *where, struct bkey *k)
+static inline int bch_count_data(struct btree_keys *b)
 {
-	BUG_ON(bkey_cmp(where, &START_KEY(k)) < 0);
-	return __bch_cut_back(where, k);
+	return btree_keys_expensive_checks(b) ? __bch_count_data(b) : -1;
 }
 
-const char *bch_ptr_status(struct cache_set *, const struct bkey *);
-bool bch_btree_ptr_invalid(struct cache_set *, const struct bkey *);
-bool bch_extent_ptr_invalid(struct cache_set *, const struct bkey *);
-
-bool bch_ptr_bad(struct btree *, const struct bkey *);
-
-static inline uint8_t gen_after(uint8_t a, uint8_t b)
-{
-	uint8_t r = a - b;
-	return r > 128U ? 0 : r;
-}
-
-static inline uint8_t ptr_stale(struct cache_set *c, const struct bkey *k,
-				unsigned i)
-{
-	return gen_after(PTR_BUCKET(c, k, i)->gen, PTR_GEN(k, i));
-}
-
-static inline bool ptr_available(struct cache_set *c, const struct bkey *k,
-				 unsigned i)
-{
-	return (PTR_DEV(k, i) < MAX_CACHES_PER_SET) && PTR_CACHE(c, k, i);
-}
-
-
-typedef bool (*ptr_filter_fn)(struct btree *, const struct bkey *);
-
-struct bkey *bch_btree_iter_next(struct btree_iter *);
-struct bkey *bch_btree_iter_next_filter(struct btree_iter *,
-					struct btree *, ptr_filter_fn);
-
-void bch_btree_iter_push(struct btree_iter *, struct bkey *, struct bkey *);
-struct bkey *__bch_btree_iter_init(struct btree *, struct btree_iter *,
-				   struct bkey *, struct bset_tree *);
-
-/* 32 bits total: */
-#define BKEY_MID_BITS		3
-#define BKEY_EXPONENT_BITS	7
-#define BKEY_MANTISSA_BITS	22
-#define BKEY_MANTISSA_MASK	((1 << BKEY_MANTISSA_BITS) - 1)
-
-struct bkey_float {
-	unsigned	exponent:BKEY_EXPONENT_BITS;
-	unsigned	m:BKEY_MID_BITS;
-	unsigned	mantissa:BKEY_MANTISSA_BITS;
-} __packed;
-
-/*
- * BSET_CACHELINE was originally intended to match the hardware cacheline size -
- * it used to be 64, but I realized the lookup code would touch slightly less
- * memory if it was 128.
- *
- * It definites the number of bytes (in struct bset) per struct bkey_float in
- * the auxiliar search tree - when we're done searching the bset_float tree we
- * have this many bytes left that we do a linear search over.
- *
- * Since (after level 5) every level of the bset_tree is on a new cacheline,
- * we're touching one fewer cacheline in the bset tree in exchange for one more
- * cacheline in the linear search - but the linear search might stop before it
- * gets to the second cacheline.
- */
-
-#define BSET_CACHELINE		128
-#define bset_tree_space(b)	(btree_data_space(b) / BSET_CACHELINE)
-
-#define bset_tree_bytes(b)	(bset_tree_space(b) * sizeof(struct bkey_float))
-#define bset_prev_bytes(b)	(bset_tree_space(b) * sizeof(uint8_t))
-
-void bch_bset_init_next(struct btree *);
-
-void bch_bset_fix_invalidated_key(struct btree *, struct bkey *);
-void bch_bset_fix_lookup_table(struct btree *, struct bkey *);
-
-struct bkey *__bch_bset_search(struct btree *, struct bset_tree *,
-			   const struct bkey *);
-
-/*
- * Returns the first key that is strictly greater than search
- */
-static inline struct bkey *bch_bset_search(struct btree *b, struct bset_tree *t,
-					   const struct bkey *search)
-{
-	return search ? __bch_bset_search(b, t, search) : t->data->start;
-}
-
-#define PRECEDING_KEY(_k)					\
-({								\
-	struct bkey *_ret = NULL;				\
-								\
-	if (KEY_INODE(_k) || KEY_OFFSET(_k)) {			\
-		_ret = &KEY(KEY_INODE(_k), KEY_OFFSET(_k), 0);	\
-								\
-		if (!_ret->low)					\
-			_ret->high--;				\
-		_ret->low--;					\
-	}							\
-								\
-	_ret;							\
-})
-
-bool bch_bkey_try_merge(struct btree *, struct bkey *, struct bkey *);
-void bch_btree_sort_lazy(struct btree *);
-void bch_btree_sort_into(struct btree *, struct btree *);
-void bch_btree_sort_and_fix_extents(struct btree *, struct btree_iter *);
-void bch_btree_sort_partial(struct btree *, unsigned);
-
-static inline void bch_btree_sort(struct btree *b)
-{
-	bch_btree_sort_partial(b, 0);
-}
-
-int bch_bset_print_stats(struct cache_set *, char *);
+#define bch_check_keys(b, ...)						\
+do {									\
+	if (btree_keys_expensive_checks(b))				\
+		__bch_check_keys(b, __VA_ARGS__);			\
+} while (0)
 
 #endif
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index 31bb53f..98cc0a8 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -23,7 +23,7 @@
 #include "bcache.h"
 #include "btree.h"
 #include "debug.h"
-#include "writeback.h"
+#include "extents.h"
 
 #include <linux/slab.h>
 #include <linux/bitops.h>
@@ -89,13 +89,6 @@
  * Test module load/unload
  */
 
-enum {
-	BTREE_INSERT_STATUS_INSERT,
-	BTREE_INSERT_STATUS_BACK_MERGE,
-	BTREE_INSERT_STATUS_OVERWROTE,
-	BTREE_INSERT_STATUS_FRONT_MERGE,
-};
-
 #define MAX_NEED_GC		64
 #define MAX_SAVE_PRIO		72
 
@@ -106,14 +99,6 @@
 
 static struct workqueue_struct *btree_io_wq;
 
-static inline bool should_split(struct btree *b)
-{
-	struct bset *i = write_block(b);
-	return b->written >= btree_blocks(b) ||
-		(b->written + __set_blocks(i, i->keys + 15, b->c)
-		 > btree_blocks(b));
-}
-
 #define insert_lock(s, b)	((b)->level <= (s)->lock)
 
 /*
@@ -167,6 +152,8 @@
 			_r = bch_btree_ ## fn(_b, op, ##__VA_ARGS__);	\
 		}							\
 		rw_unlock(_w, _b);					\
+		if (_r == -EINTR)					\
+			schedule();					\
 		bch_cannibalize_unlock(c);				\
 		if (_r == -ENOSPC) {					\
 			wait_event((c)->try_wait,			\
@@ -175,9 +162,15 @@
 		}							\
 	} while (_r == -EINTR);						\
 									\
+	finish_wait(&(c)->bucket_wait, &(op)->wait);			\
 	_r;								\
 })
 
+static inline struct bset *write_block(struct btree *b)
+{
+	return ((void *) btree_bset_first(b)) + b->written * block_bytes(b->c);
+}
+
 /* Btree key manipulation */
 
 void bkey_put(struct cache_set *c, struct bkey *k)
@@ -194,16 +187,16 @@
 static uint64_t btree_csum_set(struct btree *b, struct bset *i)
 {
 	uint64_t crc = b->key.ptr[0];
-	void *data = (void *) i + 8, *end = end(i);
+	void *data = (void *) i + 8, *end = bset_bkey_last(i);
 
 	crc = bch_crc64_update(crc, data, end - data);
 	return crc ^ 0xffffffffffffffffULL;
 }
 
-static void bch_btree_node_read_done(struct btree *b)
+void bch_btree_node_read_done(struct btree *b)
 {
 	const char *err = "bad btree header";
-	struct bset *i = b->sets[0].data;
+	struct bset *i = btree_bset_first(b);
 	struct btree_iter *iter;
 
 	iter = mempool_alloc(b->c->fill_iter, GFP_NOWAIT);
@@ -211,21 +204,22 @@
 	iter->used = 0;
 
 #ifdef CONFIG_BCACHE_DEBUG
-	iter->b = b;
+	iter->b = &b->keys;
 #endif
 
 	if (!i->seq)
 		goto err;
 
 	for (;
-	     b->written < btree_blocks(b) && i->seq == b->sets[0].data->seq;
+	     b->written < btree_blocks(b) && i->seq == b->keys.set[0].data->seq;
 	     i = write_block(b)) {
 		err = "unsupported bset version";
 		if (i->version > BCACHE_BSET_VERSION)
 			goto err;
 
 		err = "bad btree header";
-		if (b->written + set_blocks(i, b->c) > btree_blocks(b))
+		if (b->written + set_blocks(i, block_bytes(b->c)) >
+		    btree_blocks(b))
 			goto err;
 
 		err = "bad magic";
@@ -245,39 +239,40 @@
 		}
 
 		err = "empty set";
-		if (i != b->sets[0].data && !i->keys)
+		if (i != b->keys.set[0].data && !i->keys)
 			goto err;
 
-		bch_btree_iter_push(iter, i->start, end(i));
+		bch_btree_iter_push(iter, i->start, bset_bkey_last(i));
 
-		b->written += set_blocks(i, b->c);
+		b->written += set_blocks(i, block_bytes(b->c));
 	}
 
 	err = "corrupted btree";
 	for (i = write_block(b);
-	     index(i, b) < btree_blocks(b);
+	     bset_sector_offset(&b->keys, i) < KEY_SIZE(&b->key);
 	     i = ((void *) i) + block_bytes(b->c))
-		if (i->seq == b->sets[0].data->seq)
+		if (i->seq == b->keys.set[0].data->seq)
 			goto err;
 
-	bch_btree_sort_and_fix_extents(b, iter);
+	bch_btree_sort_and_fix_extents(&b->keys, iter, &b->c->sort);
 
-	i = b->sets[0].data;
+	i = b->keys.set[0].data;
 	err = "short btree key";
-	if (b->sets[0].size &&
-	    bkey_cmp(&b->key, &b->sets[0].end) < 0)
+	if (b->keys.set[0].size &&
+	    bkey_cmp(&b->key, &b->keys.set[0].end) < 0)
 		goto err;
 
 	if (b->written < btree_blocks(b))
-		bch_bset_init_next(b);
+		bch_bset_init_next(&b->keys, write_block(b),
+				   bset_magic(&b->c->sb));
 out:
 	mempool_free(iter, b->c->fill_iter);
 	return;
 err:
 	set_btree_node_io_error(b);
-	bch_cache_set_error(b->c, "%s at bucket %zu, block %zu, %u keys",
+	bch_cache_set_error(b->c, "%s at bucket %zu, block %u, %u keys",
 			    err, PTR_BUCKET_NR(b->c, &b->key, 0),
-			    index(i, b), i->keys);
+			    bset_block_offset(b, i), i->keys);
 	goto out;
 }
 
@@ -287,7 +282,7 @@
 	closure_put(cl);
 }
 
-void bch_btree_node_read(struct btree *b)
+static void bch_btree_node_read(struct btree *b)
 {
 	uint64_t start_time = local_clock();
 	struct closure cl;
@@ -299,11 +294,11 @@
 
 	bio = bch_bbio_alloc(b->c);
 	bio->bi_rw	= REQ_META|READ_SYNC;
-	bio->bi_size	= KEY_SIZE(&b->key) << 9;
+	bio->bi_iter.bi_size = KEY_SIZE(&b->key) << 9;
 	bio->bi_end_io	= btree_node_read_endio;
 	bio->bi_private	= &cl;
 
-	bch_bio_map(bio, b->sets[0].data);
+	bch_bio_map(bio, b->keys.set[0].data);
 
 	bch_submit_bbio(bio, b->c, &b->key, 0);
 	closure_sync(&cl);
@@ -340,9 +335,16 @@
 	w->journal	= NULL;
 }
 
+static void btree_node_write_unlock(struct closure *cl)
+{
+	struct btree *b = container_of(cl, struct btree, io);
+
+	up(&b->io_mutex);
+}
+
 static void __btree_node_write_done(struct closure *cl)
 {
-	struct btree *b = container_of(cl, struct btree, io.cl);
+	struct btree *b = container_of(cl, struct btree, io);
 	struct btree_write *w = btree_prev_write(b);
 
 	bch_bbio_free(b->bio, b->c);
@@ -353,16 +355,16 @@
 		queue_delayed_work(btree_io_wq, &b->work,
 				   msecs_to_jiffies(30000));
 
-	closure_return(cl);
+	closure_return_with_destructor(cl, btree_node_write_unlock);
 }
 
 static void btree_node_write_done(struct closure *cl)
 {
-	struct btree *b = container_of(cl, struct btree, io.cl);
+	struct btree *b = container_of(cl, struct btree, io);
 	struct bio_vec *bv;
 	int n;
 
-	__bio_for_each_segment(bv, b->bio, n, 0)
+	bio_for_each_segment_all(bv, b->bio, n)
 		__free_page(bv->bv_page);
 
 	__btree_node_write_done(cl);
@@ -371,7 +373,7 @@
 static void btree_node_write_endio(struct bio *bio, int error)
 {
 	struct closure *cl = bio->bi_private;
-	struct btree *b = container_of(cl, struct btree, io.cl);
+	struct btree *b = container_of(cl, struct btree, io);
 
 	if (error)
 		set_btree_node_io_error(b);
@@ -382,8 +384,8 @@
 
 static void do_btree_node_write(struct btree *b)
 {
-	struct closure *cl = &b->io.cl;
-	struct bset *i = b->sets[b->nsets].data;
+	struct closure *cl = &b->io;
+	struct bset *i = btree_bset_last(b);
 	BKEY_PADDED(key) k;
 
 	i->version	= BCACHE_BSET_VERSION;
@@ -395,7 +397,7 @@
 	b->bio->bi_end_io	= btree_node_write_endio;
 	b->bio->bi_private	= cl;
 	b->bio->bi_rw		= REQ_META|WRITE_SYNC|REQ_FUA;
-	b->bio->bi_size		= set_blocks(i, b->c) * block_bytes(b->c);
+	b->bio->bi_iter.bi_size	= roundup(set_bytes(i), block_bytes(b->c));
 	bch_bio_map(b->bio, i);
 
 	/*
@@ -414,14 +416,15 @@
 	 */
 
 	bkey_copy(&k.key, &b->key);
-	SET_PTR_OFFSET(&k.key, 0, PTR_OFFSET(&k.key, 0) + bset_offset(b, i));
+	SET_PTR_OFFSET(&k.key, 0, PTR_OFFSET(&k.key, 0) +
+		       bset_sector_offset(&b->keys, i));
 
 	if (!bio_alloc_pages(b->bio, GFP_NOIO)) {
 		int j;
 		struct bio_vec *bv;
 		void *base = (void *) ((unsigned long) i & ~(PAGE_SIZE - 1));
 
-		bio_for_each_segment(bv, b->bio, j)
+		bio_for_each_segment_all(bv, b->bio, j)
 			memcpy(page_address(bv->bv_page),
 			       base + j * PAGE_SIZE, PAGE_SIZE);
 
@@ -435,40 +438,54 @@
 		bch_submit_bbio(b->bio, b->c, &k.key, 0);
 
 		closure_sync(cl);
-		__btree_node_write_done(cl);
+		continue_at_nobarrier(cl, __btree_node_write_done, NULL);
 	}
 }
 
 void bch_btree_node_write(struct btree *b, struct closure *parent)
 {
-	struct bset *i = b->sets[b->nsets].data;
+	struct bset *i = btree_bset_last(b);
 
 	trace_bcache_btree_write(b);
 
 	BUG_ON(current->bio_list);
 	BUG_ON(b->written >= btree_blocks(b));
 	BUG_ON(b->written && !i->keys);
-	BUG_ON(b->sets->data->seq != i->seq);
-	bch_check_keys(b, "writing");
+	BUG_ON(btree_bset_first(b)->seq != i->seq);
+	bch_check_keys(&b->keys, "writing");
 
 	cancel_delayed_work(&b->work);
 
 	/* If caller isn't waiting for write, parent refcount is cache set */
-	closure_lock(&b->io, parent ?: &b->c->cl);
+	down(&b->io_mutex);
+	closure_init(&b->io, parent ?: &b->c->cl);
 
 	clear_bit(BTREE_NODE_dirty,	 &b->flags);
 	change_bit(BTREE_NODE_write_idx, &b->flags);
 
 	do_btree_node_write(b);
 
-	b->written += set_blocks(i, b->c);
-	atomic_long_add(set_blocks(i, b->c) * b->c->sb.block_size,
+	atomic_long_add(set_blocks(i, block_bytes(b->c)) * b->c->sb.block_size,
 			&PTR_CACHE(b->c, &b->key, 0)->btree_sectors_written);
 
-	bch_btree_sort_lazy(b);
+	b->written += set_blocks(i, block_bytes(b->c));
+
+	/* If not a leaf node, always sort */
+	if (b->level && b->keys.nsets)
+		bch_btree_sort(&b->keys, &b->c->sort);
+	else
+		bch_btree_sort_lazy(&b->keys, &b->c->sort);
+
+	/*
+	 * do verify if there was more than one set initially (i.e. we did a
+	 * sort) and we sorted down to a single set:
+	 */
+	if (i != b->keys.set->data && !b->keys.nsets)
+		bch_btree_verify(b);
 
 	if (b->written < btree_blocks(b))
-		bch_bset_init_next(b);
+		bch_bset_init_next(&b->keys, write_block(b),
+				   bset_magic(&b->c->sb));
 }
 
 static void bch_btree_node_write_sync(struct btree *b)
@@ -493,7 +510,7 @@
 
 static void bch_btree_leaf_dirty(struct btree *b, atomic_t *journal_ref)
 {
-	struct bset *i = b->sets[b->nsets].data;
+	struct bset *i = btree_bset_last(b);
 	struct btree_write *w = btree_current_write(b);
 
 	BUG_ON(!b->written);
@@ -528,24 +545,6 @@
  * mca -> memory cache
  */
 
-static void mca_reinit(struct btree *b)
-{
-	unsigned i;
-
-	b->flags	= 0;
-	b->written	= 0;
-	b->nsets	= 0;
-
-	for (i = 0; i < MAX_BSETS; i++)
-		b->sets[i].size = 0;
-	/*
-	 * Second loop starts at 1 because b->sets[0]->data is the memory we
-	 * allocated
-	 */
-	for (i = 1; i < MAX_BSETS; i++)
-		b->sets[i].data = NULL;
-}
-
 #define mca_reserve(c)	(((c->root && c->root->level)		\
 			  ? c->root->level : 1) * 8 + 16)
 #define mca_can_free(c)						\
@@ -553,28 +552,12 @@
 
 static void mca_data_free(struct btree *b)
 {
-	struct bset_tree *t = b->sets;
-	BUG_ON(!closure_is_unlocked(&b->io.cl));
+	BUG_ON(b->io_mutex.count != 1);
 
-	if (bset_prev_bytes(b) < PAGE_SIZE)
-		kfree(t->prev);
-	else
-		free_pages((unsigned long) t->prev,
-			   get_order(bset_prev_bytes(b)));
+	bch_btree_keys_free(&b->keys);
 
-	if (bset_tree_bytes(b) < PAGE_SIZE)
-		kfree(t->tree);
-	else
-		free_pages((unsigned long) t->tree,
-			   get_order(bset_tree_bytes(b)));
-
-	free_pages((unsigned long) t->data, b->page_order);
-
-	t->prev = NULL;
-	t->tree = NULL;
-	t->data = NULL;
-	list_move(&b->list, &b->c->btree_cache_freed);
 	b->c->bucket_cache_used--;
+	list_move(&b->list, &b->c->btree_cache_freed);
 }
 
 static void mca_bucket_free(struct btree *b)
@@ -593,34 +576,16 @@
 
 static void mca_data_alloc(struct btree *b, struct bkey *k, gfp_t gfp)
 {
-	struct bset_tree *t = b->sets;
-	BUG_ON(t->data);
-
-	b->page_order = max_t(unsigned,
-			      ilog2(b->c->btree_pages),
-			      btree_order(k));
-
-	t->data = (void *) __get_free_pages(gfp, b->page_order);
-	if (!t->data)
-		goto err;
-
-	t->tree = bset_tree_bytes(b) < PAGE_SIZE
-		? kmalloc(bset_tree_bytes(b), gfp)
-		: (void *) __get_free_pages(gfp, get_order(bset_tree_bytes(b)));
-	if (!t->tree)
-		goto err;
-
-	t->prev = bset_prev_bytes(b) < PAGE_SIZE
-		? kmalloc(bset_prev_bytes(b), gfp)
-		: (void *) __get_free_pages(gfp, get_order(bset_prev_bytes(b)));
-	if (!t->prev)
-		goto err;
-
-	list_move(&b->list, &b->c->btree_cache);
-	b->c->bucket_cache_used++;
-	return;
-err:
-	mca_data_free(b);
+	if (!bch_btree_keys_alloc(&b->keys,
+				  max_t(unsigned,
+					ilog2(b->c->btree_pages),
+					btree_order(k)),
+				  gfp)) {
+		b->c->bucket_cache_used++;
+		list_move(&b->list, &b->c->btree_cache);
+	} else {
+		list_move(&b->list, &b->c->btree_cache_freed);
+	}
 }
 
 static struct btree *mca_bucket_alloc(struct cache_set *c,
@@ -635,7 +600,7 @@
 	INIT_LIST_HEAD(&b->list);
 	INIT_DELAYED_WORK(&b->work, btree_node_write_work);
 	b->c = c;
-	closure_init_unlocked(&b->io);
+	sema_init(&b->io_mutex, 1);
 
 	mca_data_alloc(b, k, gfp);
 	return b;
@@ -651,24 +616,31 @@
 	if (!down_write_trylock(&b->lock))
 		return -ENOMEM;
 
-	BUG_ON(btree_node_dirty(b) && !b->sets[0].data);
+	BUG_ON(btree_node_dirty(b) && !b->keys.set[0].data);
 
-	if (b->page_order < min_order ||
-	    (!flush &&
-	     (btree_node_dirty(b) ||
-	      atomic_read(&b->io.cl.remaining) != -1))) {
-		rw_unlock(true, b);
-		return -ENOMEM;
+	if (b->keys.page_order < min_order)
+		goto out_unlock;
+
+	if (!flush) {
+		if (btree_node_dirty(b))
+			goto out_unlock;
+
+		if (down_trylock(&b->io_mutex))
+			goto out_unlock;
+		up(&b->io_mutex);
 	}
 
 	if (btree_node_dirty(b))
 		bch_btree_node_write_sync(b);
 
 	/* wait for any in flight btree write */
-	closure_wait_event(&b->io.wait, &cl,
-			   atomic_read(&b->io.cl.remaining) == -1);
+	down(&b->io_mutex);
+	up(&b->io_mutex);
 
 	return 0;
+out_unlock:
+	rw_unlock(true, b);
+	return -ENOMEM;
 }
 
 static unsigned long bch_mca_scan(struct shrinker *shrink,
@@ -714,14 +686,10 @@
 		}
 	}
 
-	/*
-	 * Can happen right when we first start up, before we've read in any
-	 * btree nodes
-	 */
-	if (list_empty(&c->btree_cache))
-		goto out;
-
 	for (i = 0; (nr--) && i < c->bucket_cache_used; i++) {
+		if (list_empty(&c->btree_cache))
+			goto out;
+
 		b = list_first_entry(&c->btree_cache, struct btree, list);
 		list_rotate_left(&c->btree_cache);
 
@@ -767,6 +735,8 @@
 #ifdef CONFIG_BCACHE_DEBUG
 	if (c->verify_data)
 		list_move(&c->verify_data->list, &c->btree_cache);
+
+	free_pages((unsigned long) c->verify_ondisk, ilog2(bucket_pages(c)));
 #endif
 
 	list_splice(&c->btree_cache_freeable,
@@ -807,10 +777,13 @@
 #ifdef CONFIG_BCACHE_DEBUG
 	mutex_init(&c->verify_lock);
 
+	c->verify_ondisk = (void *)
+		__get_free_pages(GFP_KERNEL, ilog2(bucket_pages(c)));
+
 	c->verify_data = mca_bucket_alloc(c, &ZERO_KEY, GFP_KERNEL);
 
 	if (c->verify_data &&
-	    c->verify_data->sets[0].data)
+	    c->verify_data->keys.set->data)
 		list_del_init(&c->verify_data->list);
 	else
 		c->verify_data = NULL;
@@ -908,7 +881,7 @@
 	list_for_each_entry(b, &c->btree_cache_freed, list)
 		if (!mca_reap(b, 0, false)) {
 			mca_data_alloc(b, k, __GFP_NOWARN|GFP_NOIO);
-			if (!b->sets[0].data)
+			if (!b->keys.set[0].data)
 				goto err;
 			else
 				goto out;
@@ -919,10 +892,10 @@
 		goto err;
 
 	BUG_ON(!down_write_trylock(&b->lock));
-	if (!b->sets->data)
+	if (!b->keys.set->data)
 		goto err;
 out:
-	BUG_ON(!closure_is_unlocked(&b->io.cl));
+	BUG_ON(b->io_mutex.count != 1);
 
 	bkey_copy(&b->key, k);
 	list_move(&b->list, &c->btree_cache);
@@ -930,10 +903,17 @@
 	hlist_add_head_rcu(&b->hash, mca_hash(c, k));
 
 	lock_set_subclass(&b->lock.dep_map, level + 1, _THIS_IP_);
-	b->level	= level;
 	b->parent	= (void *) ~0UL;
+	b->flags	= 0;
+	b->written	= 0;
+	b->level	= level;
 
-	mca_reinit(b);
+	if (!b->level)
+		bch_btree_keys_init(&b->keys, &bch_extent_keys_ops,
+				    &b->c->expensive_debug_checks);
+	else
+		bch_btree_keys_init(&b->keys, &bch_btree_keys_ops,
+				    &b->c->expensive_debug_checks);
 
 	return b;
 err:
@@ -994,13 +974,13 @@
 
 	b->accessed = 1;
 
-	for (; i <= b->nsets && b->sets[i].size; i++) {
-		prefetch(b->sets[i].tree);
-		prefetch(b->sets[i].data);
+	for (; i <= b->keys.nsets && b->keys.set[i].size; i++) {
+		prefetch(b->keys.set[i].tree);
+		prefetch(b->keys.set[i].data);
 	}
 
-	for (; i <= b->nsets; i++)
-		prefetch(b->sets[i].data);
+	for (; i <= b->keys.nsets; i++)
+		prefetch(b->keys.set[i].data);
 
 	if (btree_node_io_error(b)) {
 		rw_unlock(write, b);
@@ -1063,7 +1043,7 @@
 
 	mutex_lock(&c->bucket_lock);
 retry:
-	if (__bch_bucket_alloc_set(c, WATERMARK_METADATA, &k.key, 1, wait))
+	if (__bch_bucket_alloc_set(c, RESERVE_BTREE, &k.key, 1, wait))
 		goto err;
 
 	bkey_put(c, &k.key);
@@ -1080,7 +1060,7 @@
 	}
 
 	b->accessed = 1;
-	bch_bset_init_next(b);
+	bch_bset_init_next(&b->keys, b->keys.set->data, bset_magic(&b->c->sb));
 
 	mutex_unlock(&c->bucket_lock);
 
@@ -1098,8 +1078,10 @@
 static struct btree *btree_node_alloc_replacement(struct btree *b, bool wait)
 {
 	struct btree *n = bch_btree_node_alloc(b->c, b->level, wait);
-	if (!IS_ERR_OR_NULL(n))
-		bch_btree_sort_into(b, n);
+	if (!IS_ERR_OR_NULL(n)) {
+		bch_btree_sort_into(&b->keys, &n->keys, &b->c->sort);
+		bkey_copy_key(&n->key, &b->key);
+	}
 
 	return n;
 }
@@ -1120,6 +1102,28 @@
 	atomic_inc(&b->c->prio_blocked);
 }
 
+static int btree_check_reserve(struct btree *b, struct btree_op *op)
+{
+	struct cache_set *c = b->c;
+	struct cache *ca;
+	unsigned i, reserve = c->root->level * 2 + 1;
+	int ret = 0;
+
+	mutex_lock(&c->bucket_lock);
+
+	for_each_cache(ca, c, i)
+		if (fifo_used(&ca->free[RESERVE_BTREE]) < reserve) {
+			if (op)
+				prepare_to_wait(&c->bucket_wait, &op->wait,
+						TASK_UNINTERRUPTIBLE);
+			ret = -EINTR;
+			break;
+		}
+
+	mutex_unlock(&c->bucket_lock);
+	return ret;
+}
+
 /* Garbage collection */
 
 uint8_t __bch_btree_mark_key(struct cache_set *c, int level, struct bkey *k)
@@ -1183,11 +1187,11 @@
 
 	gc->nodes++;
 
-	for_each_key_filter(b, k, &iter, bch_ptr_invalid) {
+	for_each_key_filter(&b->keys, k, &iter, bch_ptr_invalid) {
 		stale = max(stale, btree_mark_key(b, k));
 		keys++;
 
-		if (bch_ptr_bad(b, k))
+		if (bch_ptr_bad(&b->keys, k))
 			continue;
 
 		gc->key_bytes += bkey_u64s(k);
@@ -1197,9 +1201,9 @@
 		gc->data += KEY_SIZE(k);
 	}
 
-	for (t = b->sets; t <= &b->sets[b->nsets]; t++)
+	for (t = b->keys.set; t <= &b->keys.set[b->keys.nsets]; t++)
 		btree_bug_on(t->size &&
-			     bset_written(b, t) &&
+			     bset_written(&b->keys, t) &&
 			     bkey_cmp(&b->key, &t->end) < 0,
 			     b, "found short btree key in gc");
 
@@ -1243,7 +1247,8 @@
 	blocks = btree_default_blocks(b->c) * 2 / 3;
 
 	if (nodes < 2 ||
-	    __set_blocks(b->sets[0].data, keys, b->c) > blocks * (nodes - 1))
+	    __set_blocks(b->keys.set[0].data, keys,
+			 block_bytes(b->c)) > blocks * (nodes - 1))
 		return 0;
 
 	for (i = 0; i < nodes; i++) {
@@ -1253,18 +1258,19 @@
 	}
 
 	for (i = nodes - 1; i > 0; --i) {
-		struct bset *n1 = new_nodes[i]->sets->data;
-		struct bset *n2 = new_nodes[i - 1]->sets->data;
+		struct bset *n1 = btree_bset_first(new_nodes[i]);
+		struct bset *n2 = btree_bset_first(new_nodes[i - 1]);
 		struct bkey *k, *last = NULL;
 
 		keys = 0;
 
 		if (i > 1) {
 			for (k = n2->start;
-			     k < end(n2);
+			     k < bset_bkey_last(n2);
 			     k = bkey_next(k)) {
 				if (__set_blocks(n1, n1->keys + keys +
-						 bkey_u64s(k), b->c) > blocks)
+						 bkey_u64s(k),
+						 block_bytes(b->c)) > blocks)
 					break;
 
 				last = k;
@@ -1280,7 +1286,8 @@
 			 * though)
 			 */
 			if (__set_blocks(n1, n1->keys + n2->keys,
-					 b->c) > btree_blocks(new_nodes[i]))
+					 block_bytes(b->c)) >
+			    btree_blocks(new_nodes[i]))
 				goto out_nocoalesce;
 
 			keys = n2->keys;
@@ -1288,27 +1295,28 @@
 			last = &r->b->key;
 		}
 
-		BUG_ON(__set_blocks(n1, n1->keys + keys,
-				    b->c) > btree_blocks(new_nodes[i]));
+		BUG_ON(__set_blocks(n1, n1->keys + keys, block_bytes(b->c)) >
+		       btree_blocks(new_nodes[i]));
 
 		if (last)
 			bkey_copy_key(&new_nodes[i]->key, last);
 
-		memcpy(end(n1),
+		memcpy(bset_bkey_last(n1),
 		       n2->start,
-		       (void *) node(n2, keys) - (void *) n2->start);
+		       (void *) bset_bkey_idx(n2, keys) - (void *) n2->start);
 
 		n1->keys += keys;
 		r[i].keys = n1->keys;
 
 		memmove(n2->start,
-			node(n2, keys),
-			(void *) end(n2) - (void *) node(n2, keys));
+			bset_bkey_idx(n2, keys),
+			(void *) bset_bkey_last(n2) -
+			(void *) bset_bkey_idx(n2, keys));
 
 		n2->keys -= keys;
 
-		if (bch_keylist_realloc(keylist,
-					KEY_PTRS(&new_nodes[i]->key), b->c))
+		if (__bch_keylist_realloc(keylist,
+					  bkey_u64s(&new_nodes[i]->key)))
 			goto out_nocoalesce;
 
 		bch_btree_node_write(new_nodes[i], &cl);
@@ -1316,7 +1324,7 @@
 	}
 
 	for (i = 0; i < nodes; i++) {
-		if (bch_keylist_realloc(keylist, KEY_PTRS(&r[i].b->key), b->c))
+		if (__bch_keylist_realloc(keylist, bkey_u64s(&r[i].b->key)))
 			goto out_nocoalesce;
 
 		make_btree_freeing_key(r[i].b, keylist->top);
@@ -1324,7 +1332,7 @@
 	}
 
 	/* We emptied out this node */
-	BUG_ON(new_nodes[0]->sets->data->keys);
+	BUG_ON(btree_bset_first(new_nodes[0])->keys);
 	btree_node_free(new_nodes[0]);
 	rw_unlock(true, new_nodes[0]);
 
@@ -1370,7 +1378,7 @@
 	struct btree_iter iter;
 	unsigned ret = 0;
 
-	for_each_key_filter(b, k, &iter, bch_ptr_bad)
+	for_each_key_filter(&b->keys, k, &iter, bch_ptr_bad)
 		ret += bkey_u64s(k);
 
 	return ret;
@@ -1390,13 +1398,13 @@
 	struct gc_merge_info *last = r + GC_MERGE_NODES - 1;
 
 	bch_keylist_init(&keys);
-	bch_btree_iter_init(b, &iter, &b->c->gc_done);
+	bch_btree_iter_init(&b->keys, &iter, &b->c->gc_done);
 
 	for (i = 0; i < GC_MERGE_NODES; i++)
 		r[i].b = ERR_PTR(-EINTR);
 
 	while (1) {
-		k = bch_btree_iter_next_filter(&iter, b, bch_ptr_bad);
+		k = bch_btree_iter_next_filter(&iter, &b->keys, bch_ptr_bad);
 		if (k) {
 			r->b = bch_btree_node_get(b->c, k, b->level - 1, true);
 			if (IS_ERR(r->b)) {
@@ -1416,7 +1424,8 @@
 
 		if (!IS_ERR(last->b)) {
 			should_rewrite = btree_gc_mark_node(last->b, gc);
-			if (should_rewrite) {
+			if (should_rewrite &&
+			    !btree_check_reserve(b, NULL)) {
 				n = btree_node_alloc_replacement(last->b,
 								 false);
 
@@ -1705,7 +1714,7 @@
 	struct bucket *g;
 	struct btree_iter iter;
 
-	for_each_key_filter(b, k, &iter, bch_ptr_invalid) {
+	for_each_key_filter(&b->keys, k, &iter, bch_ptr_invalid) {
 		for (i = 0; i < KEY_PTRS(k); i++) {
 			if (!ptr_available(b->c, k, i))
 				continue;
@@ -1728,10 +1737,11 @@
 	}
 
 	if (b->level) {
-		bch_btree_iter_init(b, &iter, NULL);
+		bch_btree_iter_init(&b->keys, &iter, NULL);
 
 		do {
-			k = bch_btree_iter_next_filter(&iter, b, bch_ptr_bad);
+			k = bch_btree_iter_next_filter(&iter, &b->keys,
+						       bch_ptr_bad);
 			if (k)
 				btree_node_prefetch(b->c, k, b->level - 1);
 
@@ -1774,235 +1784,36 @@
 
 /* Btree insertion */
 
-static void shift_keys(struct btree *b, struct bkey *where, struct bkey *insert)
+static bool btree_insert_key(struct btree *b, struct bkey *k,
+			     struct bkey *replace_key)
 {
-	struct bset *i = b->sets[b->nsets].data;
-
-	memmove((uint64_t *) where + bkey_u64s(insert),
-		where,
-		(void *) end(i) - (void *) where);
-
-	i->keys += bkey_u64s(insert);
-	bkey_copy(where, insert);
-	bch_bset_fix_lookup_table(b, where);
-}
-
-static bool fix_overlapping_extents(struct btree *b, struct bkey *insert,
-				    struct btree_iter *iter,
-				    struct bkey *replace_key)
-{
-	void subtract_dirty(struct bkey *k, uint64_t offset, int sectors)
-	{
-		if (KEY_DIRTY(k))
-			bcache_dev_sectors_dirty_add(b->c, KEY_INODE(k),
-						     offset, -sectors);
-	}
-
-	uint64_t old_offset;
-	unsigned old_size, sectors_found = 0;
-
-	while (1) {
-		struct bkey *k = bch_btree_iter_next(iter);
-		if (!k ||
-		    bkey_cmp(&START_KEY(k), insert) >= 0)
-			break;
-
-		if (bkey_cmp(k, &START_KEY(insert)) <= 0)
-			continue;
-
-		old_offset = KEY_START(k);
-		old_size = KEY_SIZE(k);
-
-		/*
-		 * We might overlap with 0 size extents; we can't skip these
-		 * because if they're in the set we're inserting to we have to
-		 * adjust them so they don't overlap with the key we're
-		 * inserting. But we don't want to check them for replace
-		 * operations.
-		 */
-
-		if (replace_key && KEY_SIZE(k)) {
-			/*
-			 * k might have been split since we inserted/found the
-			 * key we're replacing
-			 */
-			unsigned i;
-			uint64_t offset = KEY_START(k) -
-				KEY_START(replace_key);
-
-			/* But it must be a subset of the replace key */
-			if (KEY_START(k) < KEY_START(replace_key) ||
-			    KEY_OFFSET(k) > KEY_OFFSET(replace_key))
-				goto check_failed;
-
-			/* We didn't find a key that we were supposed to */
-			if (KEY_START(k) > KEY_START(insert) + sectors_found)
-				goto check_failed;
-
-			if (KEY_PTRS(k) != KEY_PTRS(replace_key) ||
-			    KEY_DIRTY(k) != KEY_DIRTY(replace_key))
-				goto check_failed;
-
-			/* skip past gen */
-			offset <<= 8;
-
-			BUG_ON(!KEY_PTRS(replace_key));
-
-			for (i = 0; i < KEY_PTRS(replace_key); i++)
-				if (k->ptr[i] != replace_key->ptr[i] + offset)
-					goto check_failed;
-
-			sectors_found = KEY_OFFSET(k) - KEY_START(insert);
-		}
-
-		if (bkey_cmp(insert, k) < 0 &&
-		    bkey_cmp(&START_KEY(insert), &START_KEY(k)) > 0) {
-			/*
-			 * We overlapped in the middle of an existing key: that
-			 * means we have to split the old key. But we have to do
-			 * slightly different things depending on whether the
-			 * old key has been written out yet.
-			 */
-
-			struct bkey *top;
-
-			subtract_dirty(k, KEY_START(insert), KEY_SIZE(insert));
-
-			if (bkey_written(b, k)) {
-				/*
-				 * We insert a new key to cover the top of the
-				 * old key, and the old key is modified in place
-				 * to represent the bottom split.
-				 *
-				 * It's completely arbitrary whether the new key
-				 * is the top or the bottom, but it has to match
-				 * up with what btree_sort_fixup() does - it
-				 * doesn't check for this kind of overlap, it
-				 * depends on us inserting a new key for the top
-				 * here.
-				 */
-				top = bch_bset_search(b, &b->sets[b->nsets],
-						      insert);
-				shift_keys(b, top, k);
-			} else {
-				BKEY_PADDED(key) temp;
-				bkey_copy(&temp.key, k);
-				shift_keys(b, k, &temp.key);
-				top = bkey_next(k);
-			}
-
-			bch_cut_front(insert, top);
-			bch_cut_back(&START_KEY(insert), k);
-			bch_bset_fix_invalidated_key(b, k);
-			return false;
-		}
-
-		if (bkey_cmp(insert, k) < 0) {
-			bch_cut_front(insert, k);
-		} else {
-			if (bkey_cmp(&START_KEY(insert), &START_KEY(k)) > 0)
-				old_offset = KEY_START(insert);
-
-			if (bkey_written(b, k) &&
-			    bkey_cmp(&START_KEY(insert), &START_KEY(k)) <= 0) {
-				/*
-				 * Completely overwrote, so we don't have to
-				 * invalidate the binary search tree
-				 */
-				bch_cut_front(k, k);
-			} else {
-				__bch_cut_back(&START_KEY(insert), k);
-				bch_bset_fix_invalidated_key(b, k);
-			}
-		}
-
-		subtract_dirty(k, old_offset, old_size - KEY_SIZE(k));
-	}
-
-check_failed:
-	if (replace_key) {
-		if (!sectors_found) {
-			return true;
-		} else if (sectors_found < KEY_SIZE(insert)) {
-			SET_KEY_OFFSET(insert, KEY_OFFSET(insert) -
-				       (KEY_SIZE(insert) - sectors_found));
-			SET_KEY_SIZE(insert, sectors_found);
-		}
-	}
-
-	return false;
-}
-
-static bool btree_insert_key(struct btree *b, struct btree_op *op,
-			     struct bkey *k, struct bkey *replace_key)
-{
-	struct bset *i = b->sets[b->nsets].data;
-	struct bkey *m, *prev;
-	unsigned status = BTREE_INSERT_STATUS_INSERT;
+	unsigned status;
 
 	BUG_ON(bkey_cmp(k, &b->key) > 0);
-	BUG_ON(b->level && !KEY_PTRS(k));
-	BUG_ON(!b->level && !KEY_OFFSET(k));
 
-	if (!b->level) {
-		struct btree_iter iter;
+	status = bch_btree_insert_key(&b->keys, k, replace_key);
+	if (status != BTREE_INSERT_STATUS_NO_INSERT) {
+		bch_check_keys(&b->keys, "%u for %s", status,
+			       replace_key ? "replace" : "insert");
 
-		/*
-		 * bset_search() returns the first key that is strictly greater
-		 * than the search key - but for back merging, we want to find
-		 * the previous key.
-		 */
-		prev = NULL;
-		m = bch_btree_iter_init(b, &iter, PRECEDING_KEY(&START_KEY(k)));
+		trace_bcache_btree_insert_key(b, k, replace_key != NULL,
+					      status);
+		return true;
+	} else
+		return false;
+}
 
-		if (fix_overlapping_extents(b, k, &iter, replace_key)) {
-			op->insert_collision = true;
-			return false;
-		}
+static size_t insert_u64s_remaining(struct btree *b)
+{
+	ssize_t ret = bch_btree_keys_u64s_remaining(&b->keys);
 
-		if (KEY_DIRTY(k))
-			bcache_dev_sectors_dirty_add(b->c, KEY_INODE(k),
-						     KEY_START(k), KEY_SIZE(k));
+	/*
+	 * Might land in the middle of an existing extent and have to split it
+	 */
+	if (b->keys.ops->is_extents)
+		ret -= KEY_MAX_U64S;
 
-		while (m != end(i) &&
-		       bkey_cmp(k, &START_KEY(m)) > 0)
-			prev = m, m = bkey_next(m);
-
-		if (key_merging_disabled(b->c))
-			goto insert;
-
-		/* prev is in the tree, if we merge we're done */
-		status = BTREE_INSERT_STATUS_BACK_MERGE;
-		if (prev &&
-		    bch_bkey_try_merge(b, prev, k))
-			goto merged;
-
-		status = BTREE_INSERT_STATUS_OVERWROTE;
-		if (m != end(i) &&
-		    KEY_PTRS(m) == KEY_PTRS(k) && !KEY_SIZE(m))
-			goto copy;
-
-		status = BTREE_INSERT_STATUS_FRONT_MERGE;
-		if (m != end(i) &&
-		    bch_bkey_try_merge(b, k, m))
-			goto copy;
-	} else {
-		BUG_ON(replace_key);
-		m = bch_bset_search(b, &b->sets[b->nsets], k);
-	}
-
-insert:	shift_keys(b, m, k);
-copy:	bkey_copy(m, k);
-merged:
-	bch_check_keys(b, "%u for %s", status,
-		       replace_key ? "replace" : "insert");
-
-	if (b->level && !KEY_OFFSET(k))
-		btree_current_write(b)->prio_blocked++;
-
-	trace_bcache_btree_insert_key(b, k, replace_key != NULL, status);
-
-	return true;
+	return max(ret, 0L);
 }
 
 static bool bch_btree_insert_keys(struct btree *b, struct btree_op *op,
@@ -2010,21 +1821,19 @@
 				  struct bkey *replace_key)
 {
 	bool ret = false;
-	int oldsize = bch_count_data(b);
+	int oldsize = bch_count_data(&b->keys);
 
 	while (!bch_keylist_empty(insert_keys)) {
-		struct bset *i = write_block(b);
 		struct bkey *k = insert_keys->keys;
 
-		if (b->written + __set_blocks(i, i->keys + bkey_u64s(k), b->c)
-		    > btree_blocks(b))
+		if (bkey_u64s(k) > insert_u64s_remaining(b))
 			break;
 
 		if (bkey_cmp(k, &b->key) <= 0) {
 			if (!b->level)
 				bkey_put(b->c, k);
 
-			ret |= btree_insert_key(b, op, k, replace_key);
+			ret |= btree_insert_key(b, k, replace_key);
 			bch_keylist_pop_front(insert_keys);
 		} else if (bkey_cmp(&START_KEY(k), &b->key) < 0) {
 			BKEY_PADDED(key) temp;
@@ -2033,16 +1842,19 @@
 			bch_cut_back(&b->key, &temp.key);
 			bch_cut_front(&b->key, insert_keys->keys);
 
-			ret |= btree_insert_key(b, op, &temp.key, replace_key);
+			ret |= btree_insert_key(b, &temp.key, replace_key);
 			break;
 		} else {
 			break;
 		}
 	}
 
+	if (!ret)
+		op->insert_collision = true;
+
 	BUG_ON(!bch_keylist_empty(insert_keys) && b->level);
 
-	BUG_ON(bch_count_data(b) < oldsize);
+	BUG_ON(bch_count_data(&b->keys) < oldsize);
 	return ret;
 }
 
@@ -2059,16 +1871,21 @@
 	closure_init_stack(&cl);
 	bch_keylist_init(&parent_keys);
 
+	if (!b->level &&
+	    btree_check_reserve(b, op))
+		return -EINTR;
+
 	n1 = btree_node_alloc_replacement(b, true);
 	if (IS_ERR(n1))
 		goto err;
 
-	split = set_blocks(n1->sets[0].data, n1->c) > (btree_blocks(b) * 4) / 5;
+	split = set_blocks(btree_bset_first(n1),
+			   block_bytes(n1->c)) > (btree_blocks(b) * 4) / 5;
 
 	if (split) {
 		unsigned keys = 0;
 
-		trace_bcache_btree_node_split(b, n1->sets[0].data->keys);
+		trace_bcache_btree_node_split(b, btree_bset_first(n1)->keys);
 
 		n2 = bch_btree_node_alloc(b->c, b->level, true);
 		if (IS_ERR(n2))
@@ -2087,18 +1904,20 @@
 		 * search tree yet
 		 */
 
-		while (keys < (n1->sets[0].data->keys * 3) / 5)
-			keys += bkey_u64s(node(n1->sets[0].data, keys));
+		while (keys < (btree_bset_first(n1)->keys * 3) / 5)
+			keys += bkey_u64s(bset_bkey_idx(btree_bset_first(n1),
+							keys));
 
-		bkey_copy_key(&n1->key, node(n1->sets[0].data, keys));
-		keys += bkey_u64s(node(n1->sets[0].data, keys));
+		bkey_copy_key(&n1->key,
+			      bset_bkey_idx(btree_bset_first(n1), keys));
+		keys += bkey_u64s(bset_bkey_idx(btree_bset_first(n1), keys));
 
-		n2->sets[0].data->keys = n1->sets[0].data->keys - keys;
-		n1->sets[0].data->keys = keys;
+		btree_bset_first(n2)->keys = btree_bset_first(n1)->keys - keys;
+		btree_bset_first(n1)->keys = keys;
 
-		memcpy(n2->sets[0].data->start,
-		       end(n1->sets[0].data),
-		       n2->sets[0].data->keys * sizeof(uint64_t));
+		memcpy(btree_bset_first(n2)->start,
+		       bset_bkey_last(btree_bset_first(n1)),
+		       btree_bset_first(n2)->keys * sizeof(uint64_t));
 
 		bkey_copy_key(&n2->key, &b->key);
 
@@ -2106,7 +1925,7 @@
 		bch_btree_node_write(n2, &cl);
 		rw_unlock(true, n2);
 	} else {
-		trace_bcache_btree_node_compact(b, n1->sets[0].data->keys);
+		trace_bcache_btree_node_compact(b, btree_bset_first(n1)->keys);
 
 		bch_btree_insert_keys(n1, op, insert_keys, replace_key);
 	}
@@ -2149,18 +1968,21 @@
 
 	return 0;
 err_free2:
+	bkey_put(b->c, &n2->key);
 	btree_node_free(n2);
 	rw_unlock(true, n2);
 err_free1:
+	bkey_put(b->c, &n1->key);
 	btree_node_free(n1);
 	rw_unlock(true, n1);
 err:
+	WARN(1, "bcache: btree split failed");
+
 	if (n3 == ERR_PTR(-EAGAIN) ||
 	    n2 == ERR_PTR(-EAGAIN) ||
 	    n1 == ERR_PTR(-EAGAIN))
 		return -EAGAIN;
 
-	pr_warn("couldn't split");
 	return -ENOMEM;
 }
 
@@ -2171,7 +1993,7 @@
 {
 	BUG_ON(b->level && replace_key);
 
-	if (should_split(b)) {
+	if (bch_keylist_nkeys(insert_keys) > insert_u64s_remaining(b)) {
 		if (current->bio_list) {
 			op->lock = b->c->root->level + 1;
 			return -EAGAIN;
@@ -2180,11 +2002,13 @@
 			return -EINTR;
 		} else {
 			/* Invalidated all iterators */
-			return btree_split(b, op, insert_keys, replace_key) ?:
-				-EINTR;
+			int ret = btree_split(b, op, insert_keys, replace_key);
+
+			return bch_keylist_empty(insert_keys) ?
+				0 : ret ?: -EINTR;
 		}
 	} else {
-		BUG_ON(write_block(b) != b->sets[b->nsets].data);
+		BUG_ON(write_block(b) != btree_bset_last(b));
 
 		if (bch_btree_insert_keys(b, op, insert_keys, replace_key)) {
 			if (!b->level)
@@ -2323,9 +2147,9 @@
 		struct bkey *k;
 		struct btree_iter iter;
 
-		bch_btree_iter_init(b, &iter, from);
+		bch_btree_iter_init(&b->keys, &iter, from);
 
-		while ((k = bch_btree_iter_next_filter(&iter, b,
+		while ((k = bch_btree_iter_next_filter(&iter, &b->keys,
 						       bch_ptr_bad))) {
 			ret = btree(map_nodes_recurse, k, b,
 				    op, from, fn, flags);
@@ -2356,9 +2180,9 @@
 	struct bkey *k;
 	struct btree_iter iter;
 
-	bch_btree_iter_init(b, &iter, from);
+	bch_btree_iter_init(&b->keys, &iter, from);
 
-	while ((k = bch_btree_iter_next_filter(&iter, b, bch_ptr_bad))) {
+	while ((k = bch_btree_iter_next_filter(&iter, &b->keys, bch_ptr_bad))) {
 		ret = !b->level
 			? fn(op, b, k)
 			: btree(map_keys_recurse, k, b, op, from, fn, flags);
diff --git a/drivers/md/bcache/btree.h b/drivers/md/bcache/btree.h
index 767e755..af065e9 100644
--- a/drivers/md/bcache/btree.h
+++ b/drivers/md/bcache/btree.h
@@ -130,20 +130,12 @@
 	unsigned long		flags;
 	uint16_t		written;	/* would be nice to kill */
 	uint8_t			level;
-	uint8_t			nsets;
-	uint8_t			page_order;
 
-	/*
-	 * Set of sorted keys - the real btree node - plus a binary search tree
-	 *
-	 * sets[0] is special; set[0]->tree, set[0]->prev and set[0]->data point
-	 * to the memory we have allocated for this btree node. Additionally,
-	 * set[0]->data points to the entire btree node as it exists on disk.
-	 */
-	struct bset_tree	sets[MAX_BSETS];
+	struct btree_keys	keys;
 
 	/* For outstanding btree writes, used as a lock - protects write_idx */
-	struct closure_with_waitlist	io;
+	struct closure		io;
+	struct semaphore	io_mutex;
 
 	struct list_head	list;
 	struct delayed_work	work;
@@ -179,24 +171,19 @@
 	return b->writes + (btree_node_write_idx(b) ^ 1);
 }
 
-static inline unsigned bset_offset(struct btree *b, struct bset *i)
+static inline struct bset *btree_bset_first(struct btree *b)
 {
-	return (((size_t) i) - ((size_t) b->sets->data)) >> 9;
+	return b->keys.set->data;
 }
 
-static inline struct bset *write_block(struct btree *b)
+static inline struct bset *btree_bset_last(struct btree *b)
 {
-	return ((void *) b->sets[0].data) + b->written * block_bytes(b->c);
+	return bset_tree_last(&b->keys)->data;
 }
 
-static inline bool bset_written(struct btree *b, struct bset_tree *t)
+static inline unsigned bset_block_offset(struct btree *b, struct bset *i)
 {
-	return t->data < write_block(b);
-}
-
-static inline bool bkey_written(struct btree *b, struct bkey *k)
-{
-	return k < write_block(b)->start;
+	return bset_sector_offset(&b->keys, i) >> b->c->block_bits;
 }
 
 static inline void set_gc_sectors(struct cache_set *c)
@@ -204,21 +191,6 @@
 	atomic_set(&c->sectors_to_gc, c->sb.bucket_size * c->nbuckets / 16);
 }
 
-static inline struct bkey *bch_btree_iter_init(struct btree *b,
-					       struct btree_iter *iter,
-					       struct bkey *search)
-{
-	return __bch_btree_iter_init(b, iter, search, b->sets);
-}
-
-static inline bool bch_ptr_invalid(struct btree *b, const struct bkey *k)
-{
-	if (b->level)
-		return bch_btree_ptr_invalid(b->c, k);
-	else
-		return bch_extent_ptr_invalid(b->c, k);
-}
-
 void bkey_put(struct cache_set *c, struct bkey *k);
 
 /* Looping macros */
@@ -229,17 +201,12 @@
 	     iter++)							\
 		hlist_for_each_entry_rcu((b), (c)->bucket_hash + iter, hash)
 
-#define for_each_key_filter(b, k, iter, filter)				\
-	for (bch_btree_iter_init((b), (iter), NULL);			\
-	     ((k) = bch_btree_iter_next_filter((iter), b, filter));)
-
-#define for_each_key(b, k, iter)					\
-	for (bch_btree_iter_init((b), (iter), NULL);			\
-	     ((k) = bch_btree_iter_next(iter));)
-
 /* Recursing down the btree */
 
 struct btree_op {
+	/* for waiting on btree reserve in btree_split() */
+	wait_queue_t		wait;
+
 	/* Btree level at which we start taking write locks */
 	short			lock;
 
@@ -249,6 +216,7 @@
 static inline void bch_btree_op_init(struct btree_op *op, int write_lock_level)
 {
 	memset(op, 0, sizeof(struct btree_op));
+	init_wait(&op->wait);
 	op->lock = write_lock_level;
 }
 
@@ -267,7 +235,7 @@
 	(w ? up_write : up_read)(&b->lock);
 }
 
-void bch_btree_node_read(struct btree *);
+void bch_btree_node_read_done(struct btree *);
 void bch_btree_node_write(struct btree *, struct closure *);
 
 void bch_btree_set_root(struct btree *);
diff --git a/drivers/md/bcache/closure.c b/drivers/md/bcache/closure.c
index dfff241..7a228de 100644
--- a/drivers/md/bcache/closure.c
+++ b/drivers/md/bcache/closure.c
@@ -11,19 +11,6 @@
 
 #include "closure.h"
 
-#define CL_FIELD(type, field)					\
-	case TYPE_ ## type:					\
-	return &container_of(cl, struct type, cl)->field
-
-static struct closure_waitlist *closure_waitlist(struct closure *cl)
-{
-	switch (cl->type) {
-		CL_FIELD(closure_with_waitlist, wait);
-	default:
-		return NULL;
-	}
-}
-
 static inline void closure_put_after_sub(struct closure *cl, int flags)
 {
 	int r = flags & CLOSURE_REMAINING_MASK;
@@ -42,17 +29,10 @@
 			closure_queue(cl);
 		} else {
 			struct closure *parent = cl->parent;
-			struct closure_waitlist *wait = closure_waitlist(cl);
 			closure_fn *destructor = cl->fn;
 
 			closure_debug_destroy(cl);
 
-			smp_mb();
-			atomic_set(&cl->remaining, -1);
-
-			if (wait)
-				closure_wake_up(wait);
-
 			if (destructor)
 				destructor(cl);
 
@@ -69,19 +49,18 @@
 }
 EXPORT_SYMBOL(closure_sub);
 
+/**
+ * closure_put - decrement a closure's refcount
+ */
 void closure_put(struct closure *cl)
 {
 	closure_put_after_sub(cl, atomic_dec_return(&cl->remaining));
 }
 EXPORT_SYMBOL(closure_put);
 
-static void set_waiting(struct closure *cl, unsigned long f)
-{
-#ifdef CONFIG_BCACHE_CLOSURES_DEBUG
-	cl->waiting_on = f;
-#endif
-}
-
+/**
+ * closure_wake_up - wake up all closures on a wait list, without memory barrier
+ */
 void __closure_wake_up(struct closure_waitlist *wait_list)
 {
 	struct llist_node *list;
@@ -106,27 +85,34 @@
 		cl = container_of(reverse, struct closure, list);
 		reverse = llist_next(reverse);
 
-		set_waiting(cl, 0);
+		closure_set_waiting(cl, 0);
 		closure_sub(cl, CLOSURE_WAITING + 1);
 	}
 }
 EXPORT_SYMBOL(__closure_wake_up);
 
-bool closure_wait(struct closure_waitlist *list, struct closure *cl)
+/**
+ * closure_wait - add a closure to a waitlist
+ *
+ * @waitlist will own a ref on @cl, which will be released when
+ * closure_wake_up() is called on @waitlist.
+ *
+ */
+bool closure_wait(struct closure_waitlist *waitlist, struct closure *cl)
 {
 	if (atomic_read(&cl->remaining) & CLOSURE_WAITING)
 		return false;
 
-	set_waiting(cl, _RET_IP_);
+	closure_set_waiting(cl, _RET_IP_);
 	atomic_add(CLOSURE_WAITING + 1, &cl->remaining);
-	llist_add(&cl->list, &list->list);
+	llist_add(&cl->list, &waitlist->list);
 
 	return true;
 }
 EXPORT_SYMBOL(closure_wait);
 
 /**
- * closure_sync() - sleep until a closure a closure has nothing left to wait on
+ * closure_sync - sleep until a closure a closure has nothing left to wait on
  *
  * Sleeps until the refcount hits 1 - the thread that's running the closure owns
  * the last refcount.
@@ -148,46 +134,6 @@
 }
 EXPORT_SYMBOL(closure_sync);
 
-/**
- * closure_trylock() - try to acquire the closure, without waiting
- * @cl:		closure to lock
- *
- * Returns true if the closure was succesfully locked.
- */
-bool closure_trylock(struct closure *cl, struct closure *parent)
-{
-	if (atomic_cmpxchg(&cl->remaining, -1,
-			   CLOSURE_REMAINING_INITIALIZER) != -1)
-		return false;
-
-	smp_mb();
-
-	cl->parent = parent;
-	if (parent)
-		closure_get(parent);
-
-	closure_set_ret_ip(cl);
-	closure_debug_create(cl);
-	return true;
-}
-EXPORT_SYMBOL(closure_trylock);
-
-void __closure_lock(struct closure *cl, struct closure *parent,
-		    struct closure_waitlist *wait_list)
-{
-	struct closure wait;
-	closure_init_stack(&wait);
-
-	while (1) {
-		if (closure_trylock(cl, parent))
-			return;
-
-		closure_wait_event(wait_list, &wait,
-				   atomic_read(&cl->remaining) == -1);
-	}
-}
-EXPORT_SYMBOL(__closure_lock);
-
 #ifdef CONFIG_BCACHE_CLOSURES_DEBUG
 
 static LIST_HEAD(closure_list);
diff --git a/drivers/md/bcache/closure.h b/drivers/md/bcache/closure.h
index 9762f1b..7ef7461 100644
--- a/drivers/md/bcache/closure.h
+++ b/drivers/md/bcache/closure.h
@@ -72,30 +72,6 @@
  * closure - _always_ use continue_at(). Doing so consistently will help
  * eliminate an entire class of particularly pernicious races.
  *
- * For a closure to wait on an arbitrary event, we need to introduce waitlists:
- *
- * struct closure_waitlist list;
- * closure_wait_event(list, cl, condition);
- * closure_wake_up(wait_list);
- *
- * These work analagously to wait_event() and wake_up() - except that instead of
- * operating on the current thread (for wait_event()) and lists of threads, they
- * operate on an explicit closure and lists of closures.
- *
- * Because it's a closure we can now wait either synchronously or
- * asynchronously. closure_wait_event() returns the current value of the
- * condition, and if it returned false continue_at() or closure_sync() can be
- * used to wait for it to become true.
- *
- * It's useful for waiting on things when you can't sleep in the context in
- * which you must check the condition (perhaps a spinlock held, or you might be
- * beneath generic_make_request() - in which case you can't sleep on IO).
- *
- * closure_wait_event() will wait either synchronously or asynchronously,
- * depending on whether the closure is in blocking mode or not. You can pick a
- * mode explicitly with closure_wait_event_sync() and
- * closure_wait_event_async(), which do just what you might expect.
- *
  * Lastly, you might have a wait list dedicated to a specific event, and have no
  * need for specifying the condition - you just want to wait until someone runs
  * closure_wake_up() on the appropriate wait list. In that case, just use
@@ -121,40 +97,6 @@
  * All this implies that a closure should typically be embedded in a particular
  * struct (which its refcount will normally control the lifetime of), and that
  * struct can very much be thought of as a stack frame.
- *
- * Locking:
- *
- * Closures are based on work items but they can be thought of as more like
- * threads - in that like threads and unlike work items they have a well
- * defined lifetime; they are created (with closure_init()) and eventually
- * complete after a continue_at(cl, NULL, NULL).
- *
- * Suppose you've got some larger structure with a closure embedded in it that's
- * used for periodically doing garbage collection. You only want one garbage
- * collection happening at a time, so the natural thing to do is protect it with
- * a lock. However, it's difficult to use a lock protecting a closure correctly
- * because the unlock should come after the last continue_to() (additionally, if
- * you're using the closure asynchronously a mutex won't work since a mutex has
- * to be unlocked by the same process that locked it).
- *
- * So to make it less error prone and more efficient, we also have the ability
- * to use closures as locks:
- *
- * closure_init_unlocked();
- * closure_trylock();
- *
- * That's all we need for trylock() - the last closure_put() implicitly unlocks
- * it for you.  But for closure_lock(), we also need a wait list:
- *
- * struct closure_with_waitlist frobnicator_cl;
- *
- * closure_init_unlocked(&frobnicator_cl);
- * closure_lock(&frobnicator_cl);
- *
- * A closure_with_waitlist embeds a closure and a wait list - much like struct
- * delayed_work embeds a work item and a timer_list. The important thing is, use
- * it exactly like you would a regular closure and closure_put() will magically
- * handle everything for you.
  */
 
 struct closure;
@@ -164,12 +106,6 @@
 	struct llist_head	list;
 };
 
-enum closure_type {
-	TYPE_closure				= 0,
-	TYPE_closure_with_waitlist		= 1,
-	MAX_CLOSURE_TYPE			= 1,
-};
-
 enum closure_state {
 	/*
 	 * CLOSURE_WAITING: Set iff the closure is on a waitlist. Must be set by
@@ -224,8 +160,6 @@
 
 	atomic_t		remaining;
 
-	enum closure_type	type;
-
 #ifdef CONFIG_BCACHE_CLOSURES_DEBUG
 #define CLOSURE_MAGIC_DEAD	0xc054dead
 #define CLOSURE_MAGIC_ALIVE	0xc054a11e
@@ -237,34 +171,12 @@
 #endif
 };
 
-struct closure_with_waitlist {
-	struct closure		cl;
-	struct closure_waitlist	wait;
-};
-
-extern unsigned invalid_closure_type(void);
-
-#define __CLOSURE_TYPE(cl, _t)						\
-	  __builtin_types_compatible_p(typeof(cl), struct _t)		\
-		? TYPE_ ## _t :						\
-
-#define __closure_type(cl)						\
-(									\
-	__CLOSURE_TYPE(cl, closure)					\
-	__CLOSURE_TYPE(cl, closure_with_waitlist)			\
-	invalid_closure_type()						\
-)
-
 void closure_sub(struct closure *cl, int v);
 void closure_put(struct closure *cl);
 void __closure_wake_up(struct closure_waitlist *list);
 bool closure_wait(struct closure_waitlist *list, struct closure *cl);
 void closure_sync(struct closure *cl);
 
-bool closure_trylock(struct closure *cl, struct closure *parent);
-void __closure_lock(struct closure *cl, struct closure *parent,
-		    struct closure_waitlist *wait_list);
-
 #ifdef CONFIG_BCACHE_CLOSURES_DEBUG
 
 void closure_debug_init(void);
@@ -293,114 +205,13 @@
 #endif
 }
 
-static inline void closure_get(struct closure *cl)
+static inline void closure_set_waiting(struct closure *cl, unsigned long f)
 {
 #ifdef CONFIG_BCACHE_CLOSURES_DEBUG
-	BUG_ON((atomic_inc_return(&cl->remaining) &
-		CLOSURE_REMAINING_MASK) <= 1);
-#else
-	atomic_inc(&cl->remaining);
+	cl->waiting_on = f;
 #endif
 }
 
-static inline void closure_set_stopped(struct closure *cl)
-{
-	atomic_sub(CLOSURE_RUNNING, &cl->remaining);
-}
-
-static inline bool closure_is_unlocked(struct closure *cl)
-{
-	return atomic_read(&cl->remaining) == -1;
-}
-
-static inline void do_closure_init(struct closure *cl, struct closure *parent,
-				   bool running)
-{
-	cl->parent = parent;
-	if (parent)
-		closure_get(parent);
-
-	if (running) {
-		closure_debug_create(cl);
-		atomic_set(&cl->remaining, CLOSURE_REMAINING_INITIALIZER);
-	} else
-		atomic_set(&cl->remaining, -1);
-
-	closure_set_ip(cl);
-}
-
-/*
- * Hack to get at the embedded closure if there is one, by doing an unsafe cast:
- * the result of __closure_type() is thrown away, it's used merely for type
- * checking.
- */
-#define __to_internal_closure(cl)				\
-({								\
-	BUILD_BUG_ON(__closure_type(*cl) > MAX_CLOSURE_TYPE);	\
-	(struct closure *) cl;					\
-})
-
-#define closure_init_type(cl, parent, running)			\
-do {								\
-	struct closure *_cl = __to_internal_closure(cl);	\
-	_cl->type = __closure_type(*(cl));			\
-	do_closure_init(_cl, parent, running);			\
-} while (0)
-
-/**
- * __closure_init() - Initialize a closure, skipping the memset()
- *
- * May be used instead of closure_init() when memory has already been zeroed.
- */
-#define __closure_init(cl, parent)				\
-	closure_init_type(cl, parent, true)
-
-/**
- * closure_init() - Initialize a closure, setting the refcount to 1
- * @cl:		closure to initialize
- * @parent:	parent of the new closure. cl will take a refcount on it for its
- *		lifetime; may be NULL.
- */
-#define closure_init(cl, parent)				\
-do {								\
-	memset((cl), 0, sizeof(*(cl)));				\
-	__closure_init(cl, parent);				\
-} while (0)
-
-static inline void closure_init_stack(struct closure *cl)
-{
-	memset(cl, 0, sizeof(struct closure));
-	atomic_set(&cl->remaining, CLOSURE_REMAINING_INITIALIZER|CLOSURE_STACK);
-}
-
-/**
- * closure_init_unlocked() - Initialize a closure but leave it unlocked.
- * @cl:		closure to initialize
- *
- * For when the closure will be used as a lock. The closure may not be used
- * until after a closure_lock() or closure_trylock().
- */
-#define closure_init_unlocked(cl)				\
-do {								\
-	memset((cl), 0, sizeof(*(cl)));				\
-	closure_init_type(cl, NULL, false);			\
-} while (0)
-
-/**
- * closure_lock() - lock and initialize a closure.
- * @cl:		the closure to lock
- * @parent:	the new parent for this closure
- *
- * The closure must be of one of the types that has a waitlist (otherwise we
- * wouldn't be able to sleep on contention).
- *
- * @parent has exactly the same meaning as in closure_init(); if non null, the
- * closure will take a reference on @parent which will be released when it is
- * unlocked.
- */
-#define closure_lock(cl, parent)				\
-	__closure_lock(__to_internal_closure(cl), parent, &(cl)->wait)
-
 static inline void __closure_end_sleep(struct closure *cl)
 {
 	__set_current_state(TASK_RUNNING);
@@ -419,65 +230,9 @@
 		atomic_add(CLOSURE_SLEEPING, &cl->remaining);
 }
 
-/**
- * closure_wake_up() - wake up all closures on a wait list.
- */
-static inline void closure_wake_up(struct closure_waitlist *list)
+static inline void closure_set_stopped(struct closure *cl)
 {
-	smp_mb();
-	__closure_wake_up(list);
-}
-
-/*
- * Wait on an event, synchronously or asynchronously - analogous to wait_event()
- * but for closures.
- *
- * The loop is oddly structured so as to avoid a race; we must check the
- * condition again after we've added ourself to the waitlist. We know if we were
- * already on the waitlist because closure_wait() returns false; thus, we only
- * schedule or break if closure_wait() returns false. If it returns true, we
- * just loop again - rechecking the condition.
- *
- * The __closure_wake_up() is necessary because we may race with the event
- * becoming true; i.e. we see event false -> wait -> recheck condition, but the
- * thread that made the event true may have called closure_wake_up() before we
- * added ourself to the wait list.
- *
- * We have to call closure_sync() at the end instead of just
- * __closure_end_sleep() because a different thread might've called
- * closure_wake_up() before us and gotten preempted before they dropped the
- * refcount on our closure. If this was a stack allocated closure, that would be
- * bad.
- */
-#define closure_wait_event(list, cl, condition)				\
-({									\
-	typeof(condition) ret;						\
-									\
-	while (1) {							\
-		ret = (condition);					\
-		if (ret) {						\
-			__closure_wake_up(list);			\
-			closure_sync(cl);				\
-			break;						\
-		}							\
-									\
-		__closure_start_sleep(cl);				\
-									\
-		if (!closure_wait(list, cl))				\
-			schedule();					\
-	}								\
-									\
-	ret;								\
-})
-
-static inline void closure_queue(struct closure *cl)
-{
-	struct workqueue_struct *wq = cl->wq;
-	if (wq) {
-		INIT_WORK(&cl->work, cl->work.func);
-		BUG_ON(!queue_work(wq, &cl->work));
-	} else
-		cl->fn(cl);
+	atomic_sub(CLOSURE_RUNNING, &cl->remaining);
 }
 
 static inline void set_closure_fn(struct closure *cl, closure_fn *fn,
@@ -491,6 +246,76 @@
 	smp_mb__before_atomic_dec();
 }
 
+static inline void closure_queue(struct closure *cl)
+{
+	struct workqueue_struct *wq = cl->wq;
+	if (wq) {
+		INIT_WORK(&cl->work, cl->work.func);
+		BUG_ON(!queue_work(wq, &cl->work));
+	} else
+		cl->fn(cl);
+}
+
+/**
+ * closure_get - increment a closure's refcount
+ */
+static inline void closure_get(struct closure *cl)
+{
+#ifdef CONFIG_BCACHE_CLOSURES_DEBUG
+	BUG_ON((atomic_inc_return(&cl->remaining) &
+		CLOSURE_REMAINING_MASK) <= 1);
+#else
+	atomic_inc(&cl->remaining);
+#endif
+}
+
+/**
+ * closure_init - Initialize a closure, setting the refcount to 1
+ * @cl:		closure to initialize
+ * @parent:	parent of the new closure. cl will take a refcount on it for its
+ *		lifetime; may be NULL.
+ */
+static inline void closure_init(struct closure *cl, struct closure *parent)
+{
+	memset(cl, 0, sizeof(struct closure));
+	cl->parent = parent;
+	if (parent)
+		closure_get(parent);
+
+	atomic_set(&cl->remaining, CLOSURE_REMAINING_INITIALIZER);
+
+	closure_debug_create(cl);
+	closure_set_ip(cl);
+}
+
+static inline void closure_init_stack(struct closure *cl)
+{
+	memset(cl, 0, sizeof(struct closure));
+	atomic_set(&cl->remaining, CLOSURE_REMAINING_INITIALIZER|CLOSURE_STACK);
+}
+
+/**
+ * closure_wake_up - wake up all closures on a wait list.
+ */
+static inline void closure_wake_up(struct closure_waitlist *list)
+{
+	smp_mb();
+	__closure_wake_up(list);
+}
+
+/**
+ * continue_at - jump to another function with barrier
+ *
+ * After @cl is no longer waiting on anything (i.e. all outstanding refs have
+ * been dropped with closure_put()), it will resume execution at @fn running out
+ * of @wq (or, if @wq is NULL, @fn will be called by closure_put() directly).
+ *
+ * NOTE: This macro expands to a return in the calling function!
+ *
+ * This is because after calling continue_at() you no longer have a ref on @cl,
+ * and whatever @cl owns may be freed out from under you - a running closure fn
+ * has a ref on its own closure which continue_at() drops.
+ */
 #define continue_at(_cl, _fn, _wq)					\
 do {									\
 	set_closure_fn(_cl, _fn, _wq);					\
@@ -498,8 +323,28 @@
 	return;								\
 } while (0)
 
+/**
+ * closure_return - finish execution of a closure
+ *
+ * This is used to indicate that @cl is finished: when all outstanding refs on
+ * @cl have been dropped @cl's ref on its parent closure (as passed to
+ * closure_init()) will be dropped, if one was specified - thus this can be
+ * thought of as returning to the parent closure.
+ */
 #define closure_return(_cl)	continue_at((_cl), NULL, NULL)
 
+/**
+ * continue_at_nobarrier - jump to another function without barrier
+ *
+ * Causes @fn to be executed out of @cl, in @wq context (or called directly if
+ * @wq is NULL).
+ *
+ * NOTE: like continue_at(), this macro expands to a return in the caller!
+ *
+ * The ref the caller of continue_at_nobarrier() had on @cl is now owned by @fn,
+ * thus it's not safe to touch anything protected by @cl after a
+ * continue_at_nobarrier().
+ */
 #define continue_at_nobarrier(_cl, _fn, _wq)				\
 do {									\
 	set_closure_fn(_cl, _fn, _wq);					\
@@ -507,6 +352,15 @@
 	return;								\
 } while (0)
 
+/**
+ * closure_return - finish execution of a closure, with destructor
+ *
+ * Works like closure_return(), except @destructor will be called when all
+ * outstanding refs on @cl have been dropped; @destructor may be used to safely
+ * free the memory occupied by @cl, and it is called with the ref on the parent
+ * closure still held - so @destructor could safely return an item to a
+ * freelist protected by @cl's parent.
+ */
 #define closure_return_with_destructor(_cl, _destructor)		\
 do {									\
 	set_closure_fn(_cl, _destructor, NULL);				\
@@ -514,6 +368,13 @@
 	return;								\
 } while (0)
 
+/**
+ * closure_call - execute @fn out of a new, uninitialized closure
+ *
+ * Typically used when running out of one closure, and we want to run @fn
+ * asynchronously out of a new closure - @parent will then wait for @cl to
+ * finish.
+ */
 static inline void closure_call(struct closure *cl, closure_fn fn,
 				struct workqueue_struct *wq,
 				struct closure *parent)
@@ -522,12 +383,4 @@
 	continue_at_nobarrier(cl, fn, wq);
 }
 
-static inline void closure_trylock_call(struct closure *cl, closure_fn fn,
-					struct workqueue_struct *wq,
-					struct closure *parent)
-{
-	if (closure_trylock(cl, parent))
-		continue_at_nobarrier(cl, fn, wq);
-}
-
 #endif /* _LINUX_CLOSURE_H */
diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c
index 264fcfb..8b1f1d5 100644
--- a/drivers/md/bcache/debug.c
+++ b/drivers/md/bcache/debug.c
@@ -8,6 +8,7 @@
 #include "bcache.h"
 #include "btree.h"
 #include "debug.h"
+#include "extents.h"
 
 #include <linux/console.h>
 #include <linux/debugfs.h>
@@ -17,163 +18,96 @@
 
 static struct dentry *debug;
 
-const char *bch_ptr_status(struct cache_set *c, const struct bkey *k)
-{
-	unsigned i;
-
-	for (i = 0; i < KEY_PTRS(k); i++)
-		if (ptr_available(c, k, i)) {
-			struct cache *ca = PTR_CACHE(c, k, i);
-			size_t bucket = PTR_BUCKET_NR(c, k, i);
-			size_t r = bucket_remainder(c, PTR_OFFSET(k, i));
-
-			if (KEY_SIZE(k) + r > c->sb.bucket_size)
-				return "bad, length too big";
-			if (bucket <  ca->sb.first_bucket)
-				return "bad, short offset";
-			if (bucket >= ca->sb.nbuckets)
-				return "bad, offset past end of device";
-			if (ptr_stale(c, k, i))
-				return "stale";
-		}
-
-	if (!bkey_cmp(k, &ZERO_KEY))
-		return "bad, null key";
-	if (!KEY_PTRS(k))
-		return "bad, no pointers";
-	if (!KEY_SIZE(k))
-		return "zeroed key";
-	return "";
-}
-
-int bch_bkey_to_text(char *buf, size_t size, const struct bkey *k)
-{
-	unsigned i = 0;
-	char *out = buf, *end = buf + size;
-
-#define p(...)	(out += scnprintf(out, end - out, __VA_ARGS__))
-
-	p("%llu:%llu len %llu -> [", KEY_INODE(k), KEY_OFFSET(k), KEY_SIZE(k));
-
-	if (KEY_PTRS(k))
-		while (1) {
-			p("%llu:%llu gen %llu",
-			  PTR_DEV(k, i), PTR_OFFSET(k, i), PTR_GEN(k, i));
-
-			if (++i == KEY_PTRS(k))
-				break;
-
-			p(", ");
-		}
-
-	p("]");
-
-	if (KEY_DIRTY(k))
-		p(" dirty");
-	if (KEY_CSUM(k))
-		p(" cs%llu %llx", KEY_CSUM(k), k->ptr[1]);
-#undef p
-	return out - buf;
-}
-
 #ifdef CONFIG_BCACHE_DEBUG
 
-static void dump_bset(struct btree *b, struct bset *i)
-{
-	struct bkey *k, *next;
-	unsigned j;
-	char buf[80];
+#define for_each_written_bset(b, start, i)				\
+	for (i = (start);						\
+	     (void *) i < (void *) (start) + (KEY_SIZE(&b->key) << 9) &&\
+	     i->seq == (start)->seq;					\
+	     i = (void *) i + set_blocks(i, block_bytes(b->c)) *	\
+		 block_bytes(b->c))
 
-	for (k = i->start; k < end(i); k = next) {
-		next = bkey_next(k);
-
-		bch_bkey_to_text(buf, sizeof(buf), k);
-		printk(KERN_ERR "block %zu key %zi/%u: %s", index(i, b),
-		       (uint64_t *) k - i->d, i->keys, buf);
-
-		for (j = 0; j < KEY_PTRS(k); j++) {
-			size_t n = PTR_BUCKET_NR(b->c, k, j);
-			printk(" bucket %zu", n);
-
-			if (n >= b->c->sb.first_bucket && n < b->c->sb.nbuckets)
-				printk(" prio %i",
-				       PTR_BUCKET(b->c, k, j)->prio);
-		}
-
-		printk(" %s\n", bch_ptr_status(b->c, k));
-
-		if (next < end(i) &&
-		    bkey_cmp(k, !b->level ? &START_KEY(next) : next) > 0)
-			printk(KERN_ERR "Key skipped backwards\n");
-	}
-}
-
-static void bch_dump_bucket(struct btree *b)
-{
-	unsigned i;
-
-	console_lock();
-	for (i = 0; i <= b->nsets; i++)
-		dump_bset(b, b->sets[i].data);
-	console_unlock();
-}
-
-void bch_btree_verify(struct btree *b, struct bset *new)
+void bch_btree_verify(struct btree *b)
 {
 	struct btree *v = b->c->verify_data;
-	struct closure cl;
-	closure_init_stack(&cl);
+	struct bset *ondisk, *sorted, *inmemory;
+	struct bio *bio;
 
-	if (!b->c->verify)
+	if (!b->c->verify || !b->c->verify_ondisk)
 		return;
 
-	closure_wait_event(&b->io.wait, &cl,
-			   atomic_read(&b->io.cl.remaining) == -1);
-
+	down(&b->io_mutex);
 	mutex_lock(&b->c->verify_lock);
 
+	ondisk = b->c->verify_ondisk;
+	sorted = b->c->verify_data->keys.set->data;
+	inmemory = b->keys.set->data;
+
 	bkey_copy(&v->key, &b->key);
 	v->written = 0;
 	v->level = b->level;
+	v->keys.ops = b->keys.ops;
 
-	bch_btree_node_read(v);
-	closure_wait_event(&v->io.wait, &cl,
-			   atomic_read(&b->io.cl.remaining) == -1);
+	bio = bch_bbio_alloc(b->c);
+	bio->bi_bdev		= PTR_CACHE(b->c, &b->key, 0)->bdev;
+	bio->bi_iter.bi_sector	= PTR_OFFSET(&b->key, 0);
+	bio->bi_iter.bi_size	= KEY_SIZE(&v->key) << 9;
+	bch_bio_map(bio, sorted);
 
-	if (new->keys != v->sets[0].data->keys ||
-	    memcmp(new->start,
-		   v->sets[0].data->start,
-		   (void *) end(new) - (void *) new->start)) {
-		unsigned i, j;
+	submit_bio_wait(REQ_META|READ_SYNC, bio);
+	bch_bbio_free(bio, b->c);
+
+	memcpy(ondisk, sorted, KEY_SIZE(&v->key) << 9);
+
+	bch_btree_node_read_done(v);
+	sorted = v->keys.set->data;
+
+	if (inmemory->keys != sorted->keys ||
+	    memcmp(inmemory->start,
+		   sorted->start,
+		   (void *) bset_bkey_last(inmemory) - (void *) inmemory->start)) {
+		struct bset *i;
+		unsigned j;
 
 		console_lock();
 
-		printk(KERN_ERR "*** original memory node:\n");
-		for (i = 0; i <= b->nsets; i++)
-			dump_bset(b, b->sets[i].data);
+		printk(KERN_ERR "*** in memory:\n");
+		bch_dump_bset(&b->keys, inmemory, 0);
 
-		printk(KERN_ERR "*** sorted memory node:\n");
-		dump_bset(b, new);
+		printk(KERN_ERR "*** read back in:\n");
+		bch_dump_bset(&v->keys, sorted, 0);
 
-		printk(KERN_ERR "*** on disk node:\n");
-		dump_bset(v, v->sets[0].data);
+		for_each_written_bset(b, ondisk, i) {
+			unsigned block = ((void *) i - (void *) ondisk) /
+				block_bytes(b->c);
 
-		for (j = 0; j < new->keys; j++)
-			if (new->d[j] != v->sets[0].data->d[j])
+			printk(KERN_ERR "*** on disk block %u:\n", block);
+			bch_dump_bset(&b->keys, i, block);
+		}
+
+		printk(KERN_ERR "*** block %zu not written\n",
+		       ((void *) i - (void *) ondisk) / block_bytes(b->c));
+
+		for (j = 0; j < inmemory->keys; j++)
+			if (inmemory->d[j] != sorted->d[j])
 				break;
 
+		printk(KERN_ERR "b->written %u\n", b->written);
+
 		console_unlock();
 		panic("verify failed at %u\n", j);
 	}
 
 	mutex_unlock(&b->c->verify_lock);
+	up(&b->io_mutex);
 }
 
 void bch_data_verify(struct cached_dev *dc, struct bio *bio)
 {
 	char name[BDEVNAME_SIZE];
 	struct bio *check;
-	struct bio_vec *bv;
+	struct bio_vec bv, *bv2;
+	struct bvec_iter iter;
 	int i;
 
 	check = bio_clone(bio, GFP_NOIO);
@@ -185,95 +119,27 @@
 
 	submit_bio_wait(READ_SYNC, check);
 
-	bio_for_each_segment(bv, bio, i) {
-		void *p1 = kmap_atomic(bv->bv_page);
-		void *p2 = page_address(check->bi_io_vec[i].bv_page);
+	bio_for_each_segment(bv, bio, iter) {
+		void *p1 = kmap_atomic(bv.bv_page);
+		void *p2 = page_address(check->bi_io_vec[iter.bi_idx].bv_page);
 
-		cache_set_err_on(memcmp(p1 + bv->bv_offset,
-					p2 + bv->bv_offset,
-					bv->bv_len),
+		cache_set_err_on(memcmp(p1 + bv.bv_offset,
+					p2 + bv.bv_offset,
+					bv.bv_len),
 				 dc->disk.c,
 				 "verify failed at dev %s sector %llu",
 				 bdevname(dc->bdev, name),
-				 (uint64_t) bio->bi_sector);
+				 (uint64_t) bio->bi_iter.bi_sector);
 
 		kunmap_atomic(p1);
 	}
 
-	bio_for_each_segment_all(bv, check, i)
-		__free_page(bv->bv_page);
+	bio_for_each_segment_all(bv2, check, i)
+		__free_page(bv2->bv_page);
 out_put:
 	bio_put(check);
 }
 
-int __bch_count_data(struct btree *b)
-{
-	unsigned ret = 0;
-	struct btree_iter iter;
-	struct bkey *k;
-
-	if (!b->level)
-		for_each_key(b, k, &iter)
-			ret += KEY_SIZE(k);
-	return ret;
-}
-
-void __bch_check_keys(struct btree *b, const char *fmt, ...)
-{
-	va_list args;
-	struct bkey *k, *p = NULL;
-	struct btree_iter iter;
-	const char *err;
-
-	for_each_key(b, k, &iter) {
-		if (!b->level) {
-			err = "Keys out of order";
-			if (p && bkey_cmp(&START_KEY(p), &START_KEY(k)) > 0)
-				goto bug;
-
-			if (bch_ptr_invalid(b, k))
-				continue;
-
-			err =  "Overlapping keys";
-			if (p && bkey_cmp(p, &START_KEY(k)) > 0)
-				goto bug;
-		} else {
-			if (bch_ptr_bad(b, k))
-				continue;
-
-			err = "Duplicate keys";
-			if (p && !bkey_cmp(p, k))
-				goto bug;
-		}
-		p = k;
-	}
-
-	err = "Key larger than btree node key";
-	if (p && bkey_cmp(p, &b->key) > 0)
-		goto bug;
-
-	return;
-bug:
-	bch_dump_bucket(b);
-
-	va_start(args, fmt);
-	vprintk(fmt, args);
-	va_end(args);
-
-	panic("bcache error: %s:\n", err);
-}
-
-void bch_btree_iter_next_check(struct btree_iter *iter)
-{
-	struct bkey *k = iter->data->k, *next = bkey_next(k);
-
-	if (next < iter->data->end &&
-	    bkey_cmp(k, iter->b->level ? next : &START_KEY(next)) > 0) {
-		bch_dump_bucket(iter->b);
-		panic("Key skipped backwards\n");
-	}
-}
-
 #endif
 
 #ifdef CONFIG_DEBUG_FS
@@ -320,7 +186,7 @@
 		if (!w)
 			break;
 
-		bch_bkey_to_text(kbuf, sizeof(kbuf), &w->key);
+		bch_extent_to_text(kbuf, sizeof(kbuf), &w->key);
 		i->bytes = snprintf(i->buf, PAGE_SIZE, "%s\n", kbuf);
 		bch_keybuf_del(&i->keys, w);
 	}
diff --git a/drivers/md/bcache/debug.h b/drivers/md/bcache/debug.h
index 2ede60e..1f63c19 100644
--- a/drivers/md/bcache/debug.h
+++ b/drivers/md/bcache/debug.h
@@ -1,47 +1,30 @@
 #ifndef _BCACHE_DEBUG_H
 #define _BCACHE_DEBUG_H
 
-/* Btree/bkey debug printing */
-
-int bch_bkey_to_text(char *buf, size_t size, const struct bkey *k);
+struct bio;
+struct cached_dev;
+struct cache_set;
 
 #ifdef CONFIG_BCACHE_DEBUG
 
-void bch_btree_verify(struct btree *, struct bset *);
+void bch_btree_verify(struct btree *);
 void bch_data_verify(struct cached_dev *, struct bio *);
-int __bch_count_data(struct btree *);
-void __bch_check_keys(struct btree *, const char *, ...);
-void bch_btree_iter_next_check(struct btree_iter *);
 
-#define EBUG_ON(cond)			BUG_ON(cond)
 #define expensive_debug_checks(c)	((c)->expensive_debug_checks)
 #define key_merging_disabled(c)		((c)->key_merging_disabled)
 #define bypass_torture_test(d)		((d)->bypass_torture_test)
 
 #else /* DEBUG */
 
-static inline void bch_btree_verify(struct btree *b, struct bset *i) {}
+static inline void bch_btree_verify(struct btree *b) {}
 static inline void bch_data_verify(struct cached_dev *dc, struct bio *bio) {}
-static inline int __bch_count_data(struct btree *b) { return -1; }
-static inline void __bch_check_keys(struct btree *b, const char *fmt, ...) {}
-static inline void bch_btree_iter_next_check(struct btree_iter *iter) {}
 
-#define EBUG_ON(cond)			do { if (cond); } while (0)
 #define expensive_debug_checks(c)	0
 #define key_merging_disabled(c)		0
 #define bypass_torture_test(d)		0
 
 #endif
 
-#define bch_count_data(b)						\
-	(expensive_debug_checks((b)->c) ? __bch_count_data(b) : -1)
-
-#define bch_check_keys(b, ...)						\
-do {									\
-	if (expensive_debug_checks((b)->c))				\
-		__bch_check_keys(b, __VA_ARGS__);			\
-} while (0)
-
 #ifdef CONFIG_DEBUG_FS
 void bch_debug_init_cache_set(struct cache_set *);
 #else
diff --git a/drivers/md/bcache/extents.c b/drivers/md/bcache/extents.c
new file mode 100644
index 0000000..c3ead58
--- /dev/null
+++ b/drivers/md/bcache/extents.c
@@ -0,0 +1,616 @@
+/*
+ * Copyright (C) 2010 Kent Overstreet <kent.overstreet@gmail.com>
+ *
+ * Uses a block device as cache for other block devices; optimized for SSDs.
+ * All allocation is done in buckets, which should match the erase block size
+ * of the device.
+ *
+ * Buckets containing cached data are kept on a heap sorted by priority;
+ * bucket priority is increased on cache hit, and periodically all the buckets
+ * on the heap have their priority scaled down. This currently is just used as
+ * an LRU but in the future should allow for more intelligent heuristics.
+ *
+ * Buckets have an 8 bit counter; freeing is accomplished by incrementing the
+ * counter. Garbage collection is used to remove stale pointers.
+ *
+ * Indexing is done via a btree; nodes are not necessarily fully sorted, rather
+ * as keys are inserted we only sort the pages that have not yet been written.
+ * When garbage collection is run, we resort the entire node.
+ *
+ * All configuration is done via sysfs; see Documentation/bcache.txt.
+ */
+
+#include "bcache.h"
+#include "btree.h"
+#include "debug.h"
+#include "extents.h"
+#include "writeback.h"
+
+static void sort_key_next(struct btree_iter *iter,
+			  struct btree_iter_set *i)
+{
+	i->k = bkey_next(i->k);
+
+	if (i->k == i->end)
+		*i = iter->data[--iter->used];
+}
+
+static bool bch_key_sort_cmp(struct btree_iter_set l,
+			     struct btree_iter_set r)
+{
+	int64_t c = bkey_cmp(l.k, r.k);
+
+	return c ? c > 0 : l.k < r.k;
+}
+
+static bool __ptr_invalid(struct cache_set *c, const struct bkey *k)
+{
+	unsigned i;
+
+	for (i = 0; i < KEY_PTRS(k); i++)
+		if (ptr_available(c, k, i)) {
+			struct cache *ca = PTR_CACHE(c, k, i);
+			size_t bucket = PTR_BUCKET_NR(c, k, i);
+			size_t r = bucket_remainder(c, PTR_OFFSET(k, i));
+
+			if (KEY_SIZE(k) + r > c->sb.bucket_size ||
+			    bucket <  ca->sb.first_bucket ||
+			    bucket >= ca->sb.nbuckets)
+				return true;
+		}
+
+	return false;
+}
+
+/* Common among btree and extent ptrs */
+
+static const char *bch_ptr_status(struct cache_set *c, const struct bkey *k)
+{
+	unsigned i;
+
+	for (i = 0; i < KEY_PTRS(k); i++)
+		if (ptr_available(c, k, i)) {
+			struct cache *ca = PTR_CACHE(c, k, i);
+			size_t bucket = PTR_BUCKET_NR(c, k, i);
+			size_t r = bucket_remainder(c, PTR_OFFSET(k, i));
+
+			if (KEY_SIZE(k) + r > c->sb.bucket_size)
+				return "bad, length too big";
+			if (bucket <  ca->sb.first_bucket)
+				return "bad, short offset";
+			if (bucket >= ca->sb.nbuckets)
+				return "bad, offset past end of device";
+			if (ptr_stale(c, k, i))
+				return "stale";
+		}
+
+	if (!bkey_cmp(k, &ZERO_KEY))
+		return "bad, null key";
+	if (!KEY_PTRS(k))
+		return "bad, no pointers";
+	if (!KEY_SIZE(k))
+		return "zeroed key";
+	return "";
+}
+
+void bch_extent_to_text(char *buf, size_t size, const struct bkey *k)
+{
+	unsigned i = 0;
+	char *out = buf, *end = buf + size;
+
+#define p(...)	(out += scnprintf(out, end - out, __VA_ARGS__))
+
+	p("%llu:%llu len %llu -> [", KEY_INODE(k), KEY_START(k), KEY_SIZE(k));
+
+	for (i = 0; i < KEY_PTRS(k); i++) {
+		if (i)
+			p(", ");
+
+		if (PTR_DEV(k, i) == PTR_CHECK_DEV)
+			p("check dev");
+		else
+			p("%llu:%llu gen %llu", PTR_DEV(k, i),
+			  PTR_OFFSET(k, i), PTR_GEN(k, i));
+	}
+
+	p("]");
+
+	if (KEY_DIRTY(k))
+		p(" dirty");
+	if (KEY_CSUM(k))
+		p(" cs%llu %llx", KEY_CSUM(k), k->ptr[1]);
+#undef p
+}
+
+static void bch_bkey_dump(struct btree_keys *keys, const struct bkey *k)
+{
+	struct btree *b = container_of(keys, struct btree, keys);
+	unsigned j;
+	char buf[80];
+
+	bch_extent_to_text(buf, sizeof(buf), k);
+	printk(" %s", buf);
+
+	for (j = 0; j < KEY_PTRS(k); j++) {
+		size_t n = PTR_BUCKET_NR(b->c, k, j);
+		printk(" bucket %zu", n);
+
+		if (n >= b->c->sb.first_bucket && n < b->c->sb.nbuckets)
+			printk(" prio %i",
+			       PTR_BUCKET(b->c, k, j)->prio);
+	}
+
+	printk(" %s\n", bch_ptr_status(b->c, k));
+}
+
+/* Btree ptrs */
+
+bool __bch_btree_ptr_invalid(struct cache_set *c, const struct bkey *k)
+{
+	char buf[80];
+
+	if (!KEY_PTRS(k) || !KEY_SIZE(k) || KEY_DIRTY(k))
+		goto bad;
+
+	if (__ptr_invalid(c, k))
+		goto bad;
+
+	return false;
+bad:
+	bch_extent_to_text(buf, sizeof(buf), k);
+	cache_bug(c, "spotted btree ptr %s: %s", buf, bch_ptr_status(c, k));
+	return true;
+}
+
+static bool bch_btree_ptr_invalid(struct btree_keys *bk, const struct bkey *k)
+{
+	struct btree *b = container_of(bk, struct btree, keys);
+	return __bch_btree_ptr_invalid(b->c, k);
+}
+
+static bool btree_ptr_bad_expensive(struct btree *b, const struct bkey *k)
+{
+	unsigned i;
+	char buf[80];
+	struct bucket *g;
+
+	if (mutex_trylock(&b->c->bucket_lock)) {
+		for (i = 0; i < KEY_PTRS(k); i++)
+			if (ptr_available(b->c, k, i)) {
+				g = PTR_BUCKET(b->c, k, i);
+
+				if (KEY_DIRTY(k) ||
+				    g->prio != BTREE_PRIO ||
+				    (b->c->gc_mark_valid &&
+				     GC_MARK(g) != GC_MARK_METADATA))
+					goto err;
+			}
+
+		mutex_unlock(&b->c->bucket_lock);
+	}
+
+	return false;
+err:
+	mutex_unlock(&b->c->bucket_lock);
+	bch_extent_to_text(buf, sizeof(buf), k);
+	btree_bug(b,
+"inconsistent btree pointer %s: bucket %li pin %i prio %i gen %i last_gc %i mark %llu gc_gen %i",
+		  buf, PTR_BUCKET_NR(b->c, k, i), atomic_read(&g->pin),
+		  g->prio, g->gen, g->last_gc, GC_MARK(g), g->gc_gen);
+	return true;
+}
+
+static bool bch_btree_ptr_bad(struct btree_keys *bk, const struct bkey *k)
+{
+	struct btree *b = container_of(bk, struct btree, keys);
+	unsigned i;
+
+	if (!bkey_cmp(k, &ZERO_KEY) ||
+	    !KEY_PTRS(k) ||
+	    bch_ptr_invalid(bk, k))
+		return true;
+
+	for (i = 0; i < KEY_PTRS(k); i++)
+		if (!ptr_available(b->c, k, i) ||
+		    ptr_stale(b->c, k, i))
+			return true;
+
+	if (expensive_debug_checks(b->c) &&
+	    btree_ptr_bad_expensive(b, k))
+		return true;
+
+	return false;
+}
+
+static bool bch_btree_ptr_insert_fixup(struct btree_keys *bk,
+				       struct bkey *insert,
+				       struct btree_iter *iter,
+				       struct bkey *replace_key)
+{
+	struct btree *b = container_of(bk, struct btree, keys);
+
+	if (!KEY_OFFSET(insert))
+		btree_current_write(b)->prio_blocked++;
+
+	return false;
+}
+
+const struct btree_keys_ops bch_btree_keys_ops = {
+	.sort_cmp	= bch_key_sort_cmp,
+	.insert_fixup	= bch_btree_ptr_insert_fixup,
+	.key_invalid	= bch_btree_ptr_invalid,
+	.key_bad	= bch_btree_ptr_bad,
+	.key_to_text	= bch_extent_to_text,
+	.key_dump	= bch_bkey_dump,
+};
+
+/* Extents */
+
+/*
+ * Returns true if l > r - unless l == r, in which case returns true if l is
+ * older than r.
+ *
+ * Necessary for btree_sort_fixup() - if there are multiple keys that compare
+ * equal in different sets, we have to process them newest to oldest.
+ */
+static bool bch_extent_sort_cmp(struct btree_iter_set l,
+				struct btree_iter_set r)
+{
+	int64_t c = bkey_cmp(&START_KEY(l.k), &START_KEY(r.k));
+
+	return c ? c > 0 : l.k < r.k;
+}
+
+static struct bkey *bch_extent_sort_fixup(struct btree_iter *iter,
+					  struct bkey *tmp)
+{
+	while (iter->used > 1) {
+		struct btree_iter_set *top = iter->data, *i = top + 1;
+
+		if (iter->used > 2 &&
+		    bch_extent_sort_cmp(i[0], i[1]))
+			i++;
+
+		if (bkey_cmp(top->k, &START_KEY(i->k)) <= 0)
+			break;
+
+		if (!KEY_SIZE(i->k)) {
+			sort_key_next(iter, i);
+			heap_sift(iter, i - top, bch_extent_sort_cmp);
+			continue;
+		}
+
+		if (top->k > i->k) {
+			if (bkey_cmp(top->k, i->k) >= 0)
+				sort_key_next(iter, i);
+			else
+				bch_cut_front(top->k, i->k);
+
+			heap_sift(iter, i - top, bch_extent_sort_cmp);
+		} else {
+			/* can't happen because of comparison func */
+			BUG_ON(!bkey_cmp(&START_KEY(top->k), &START_KEY(i->k)));
+
+			if (bkey_cmp(i->k, top->k) < 0) {
+				bkey_copy(tmp, top->k);
+
+				bch_cut_back(&START_KEY(i->k), tmp);
+				bch_cut_front(i->k, top->k);
+				heap_sift(iter, 0, bch_extent_sort_cmp);
+
+				return tmp;
+			} else {
+				bch_cut_back(&START_KEY(i->k), top->k);
+			}
+		}
+	}
+
+	return NULL;
+}
+
+static bool bch_extent_insert_fixup(struct btree_keys *b,
+				    struct bkey *insert,
+				    struct btree_iter *iter,
+				    struct bkey *replace_key)
+{
+	struct cache_set *c = container_of(b, struct btree, keys)->c;
+
+	void subtract_dirty(struct bkey *k, uint64_t offset, int sectors)
+	{
+		if (KEY_DIRTY(k))
+			bcache_dev_sectors_dirty_add(c, KEY_INODE(k),
+						     offset, -sectors);
+	}
+
+	uint64_t old_offset;
+	unsigned old_size, sectors_found = 0;
+
+	BUG_ON(!KEY_OFFSET(insert));
+	BUG_ON(!KEY_SIZE(insert));
+
+	while (1) {
+		struct bkey *k = bch_btree_iter_next(iter);
+		if (!k)
+			break;
+
+		if (bkey_cmp(&START_KEY(k), insert) >= 0) {
+			if (KEY_SIZE(k))
+				break;
+			else
+				continue;
+		}
+
+		if (bkey_cmp(k, &START_KEY(insert)) <= 0)
+			continue;
+
+		old_offset = KEY_START(k);
+		old_size = KEY_SIZE(k);
+
+		/*
+		 * We might overlap with 0 size extents; we can't skip these
+		 * because if they're in the set we're inserting to we have to
+		 * adjust them so they don't overlap with the key we're
+		 * inserting. But we don't want to check them for replace
+		 * operations.
+		 */
+
+		if (replace_key && KEY_SIZE(k)) {
+			/*
+			 * k might have been split since we inserted/found the
+			 * key we're replacing
+			 */
+			unsigned i;
+			uint64_t offset = KEY_START(k) -
+				KEY_START(replace_key);
+
+			/* But it must be a subset of the replace key */
+			if (KEY_START(k) < KEY_START(replace_key) ||
+			    KEY_OFFSET(k) > KEY_OFFSET(replace_key))
+				goto check_failed;
+
+			/* We didn't find a key that we were supposed to */
+			if (KEY_START(k) > KEY_START(insert) + sectors_found)
+				goto check_failed;
+
+			if (!bch_bkey_equal_header(k, replace_key))
+				goto check_failed;
+
+			/* skip past gen */
+			offset <<= 8;
+
+			BUG_ON(!KEY_PTRS(replace_key));
+
+			for (i = 0; i < KEY_PTRS(replace_key); i++)
+				if (k->ptr[i] != replace_key->ptr[i] + offset)
+					goto check_failed;
+
+			sectors_found = KEY_OFFSET(k) - KEY_START(insert);
+		}
+
+		if (bkey_cmp(insert, k) < 0 &&
+		    bkey_cmp(&START_KEY(insert), &START_KEY(k)) > 0) {
+			/*
+			 * We overlapped in the middle of an existing key: that
+			 * means we have to split the old key. But we have to do
+			 * slightly different things depending on whether the
+			 * old key has been written out yet.
+			 */
+
+			struct bkey *top;
+
+			subtract_dirty(k, KEY_START(insert), KEY_SIZE(insert));
+
+			if (bkey_written(b, k)) {
+				/*
+				 * We insert a new key to cover the top of the
+				 * old key, and the old key is modified in place
+				 * to represent the bottom split.
+				 *
+				 * It's completely arbitrary whether the new key
+				 * is the top or the bottom, but it has to match
+				 * up with what btree_sort_fixup() does - it
+				 * doesn't check for this kind of overlap, it
+				 * depends on us inserting a new key for the top
+				 * here.
+				 */
+				top = bch_bset_search(b, bset_tree_last(b),
+						      insert);
+				bch_bset_insert(b, top, k);
+			} else {
+				BKEY_PADDED(key) temp;
+				bkey_copy(&temp.key, k);
+				bch_bset_insert(b, k, &temp.key);
+				top = bkey_next(k);
+			}
+
+			bch_cut_front(insert, top);
+			bch_cut_back(&START_KEY(insert), k);
+			bch_bset_fix_invalidated_key(b, k);
+			goto out;
+		}
+
+		if (bkey_cmp(insert, k) < 0) {
+			bch_cut_front(insert, k);
+		} else {
+			if (bkey_cmp(&START_KEY(insert), &START_KEY(k)) > 0)
+				old_offset = KEY_START(insert);
+
+			if (bkey_written(b, k) &&
+			    bkey_cmp(&START_KEY(insert), &START_KEY(k)) <= 0) {
+				/*
+				 * Completely overwrote, so we don't have to
+				 * invalidate the binary search tree
+				 */
+				bch_cut_front(k, k);
+			} else {
+				__bch_cut_back(&START_KEY(insert), k);
+				bch_bset_fix_invalidated_key(b, k);
+			}
+		}
+
+		subtract_dirty(k, old_offset, old_size - KEY_SIZE(k));
+	}
+
+check_failed:
+	if (replace_key) {
+		if (!sectors_found) {
+			return true;
+		} else if (sectors_found < KEY_SIZE(insert)) {
+			SET_KEY_OFFSET(insert, KEY_OFFSET(insert) -
+				       (KEY_SIZE(insert) - sectors_found));
+			SET_KEY_SIZE(insert, sectors_found);
+		}
+	}
+out:
+	if (KEY_DIRTY(insert))
+		bcache_dev_sectors_dirty_add(c, KEY_INODE(insert),
+					     KEY_START(insert),
+					     KEY_SIZE(insert));
+
+	return false;
+}
+
+static bool bch_extent_invalid(struct btree_keys *bk, const struct bkey *k)
+{
+	struct btree *b = container_of(bk, struct btree, keys);
+	char buf[80];
+
+	if (!KEY_SIZE(k))
+		return true;
+
+	if (KEY_SIZE(k) > KEY_OFFSET(k))
+		goto bad;
+
+	if (__ptr_invalid(b->c, k))
+		goto bad;
+
+	return false;
+bad:
+	bch_extent_to_text(buf, sizeof(buf), k);
+	cache_bug(b->c, "spotted extent %s: %s", buf, bch_ptr_status(b->c, k));
+	return true;
+}
+
+static bool bch_extent_bad_expensive(struct btree *b, const struct bkey *k,
+				     unsigned ptr)
+{
+	struct bucket *g = PTR_BUCKET(b->c, k, ptr);
+	char buf[80];
+
+	if (mutex_trylock(&b->c->bucket_lock)) {
+		if (b->c->gc_mark_valid &&
+		    ((GC_MARK(g) != GC_MARK_DIRTY &&
+		      KEY_DIRTY(k)) ||
+		     GC_MARK(g) == GC_MARK_METADATA))
+			goto err;
+
+		if (g->prio == BTREE_PRIO)
+			goto err;
+
+		mutex_unlock(&b->c->bucket_lock);
+	}
+
+	return false;
+err:
+	mutex_unlock(&b->c->bucket_lock);
+	bch_extent_to_text(buf, sizeof(buf), k);
+	btree_bug(b,
+"inconsistent extent pointer %s:\nbucket %zu pin %i prio %i gen %i last_gc %i mark %llu gc_gen %i",
+		  buf, PTR_BUCKET_NR(b->c, k, ptr), atomic_read(&g->pin),
+		  g->prio, g->gen, g->last_gc, GC_MARK(g), g->gc_gen);
+	return true;
+}
+
+static bool bch_extent_bad(struct btree_keys *bk, const struct bkey *k)
+{
+	struct btree *b = container_of(bk, struct btree, keys);
+	struct bucket *g;
+	unsigned i, stale;
+
+	if (!KEY_PTRS(k) ||
+	    bch_extent_invalid(bk, k))
+		return true;
+
+	for (i = 0; i < KEY_PTRS(k); i++)
+		if (!ptr_available(b->c, k, i))
+			return true;
+
+	if (!expensive_debug_checks(b->c) && KEY_DIRTY(k))
+		return false;
+
+	for (i = 0; i < KEY_PTRS(k); i++) {
+		g = PTR_BUCKET(b->c, k, i);
+		stale = ptr_stale(b->c, k, i);
+
+		btree_bug_on(stale > 96, b,
+			     "key too stale: %i, need_gc %u",
+			     stale, b->c->need_gc);
+
+		btree_bug_on(stale && KEY_DIRTY(k) && KEY_SIZE(k),
+			     b, "stale dirty pointer");
+
+		if (stale)
+			return true;
+
+		if (expensive_debug_checks(b->c) &&
+		    bch_extent_bad_expensive(b, k, i))
+			return true;
+	}
+
+	return false;
+}
+
+static uint64_t merge_chksums(struct bkey *l, struct bkey *r)
+{
+	return (l->ptr[KEY_PTRS(l)] + r->ptr[KEY_PTRS(r)]) &
+		~((uint64_t)1 << 63);
+}
+
+static bool bch_extent_merge(struct btree_keys *bk, struct bkey *l, struct bkey *r)
+{
+	struct btree *b = container_of(bk, struct btree, keys);
+	unsigned i;
+
+	if (key_merging_disabled(b->c))
+		return false;
+
+	for (i = 0; i < KEY_PTRS(l); i++)
+		if (l->ptr[i] + PTR(0, KEY_SIZE(l), 0) != r->ptr[i] ||
+		    PTR_BUCKET_NR(b->c, l, i) != PTR_BUCKET_NR(b->c, r, i))
+			return false;
+
+	/* Keys with no pointers aren't restricted to one bucket and could
+	 * overflow KEY_SIZE
+	 */
+	if (KEY_SIZE(l) + KEY_SIZE(r) > USHRT_MAX) {
+		SET_KEY_OFFSET(l, KEY_OFFSET(l) + USHRT_MAX - KEY_SIZE(l));
+		SET_KEY_SIZE(l, USHRT_MAX);
+
+		bch_cut_front(l, r);
+		return false;
+	}
+
+	if (KEY_CSUM(l)) {
+		if (KEY_CSUM(r))
+			l->ptr[KEY_PTRS(l)] = merge_chksums(l, r);
+		else
+			SET_KEY_CSUM(l, 0);
+	}
+
+	SET_KEY_OFFSET(l, KEY_OFFSET(l) + KEY_SIZE(r));
+	SET_KEY_SIZE(l, KEY_SIZE(l) + KEY_SIZE(r));
+
+	return true;
+}
+
+const struct btree_keys_ops bch_extent_keys_ops = {
+	.sort_cmp	= bch_extent_sort_cmp,
+	.sort_fixup	= bch_extent_sort_fixup,
+	.insert_fixup	= bch_extent_insert_fixup,
+	.key_invalid	= bch_extent_invalid,
+	.key_bad	= bch_extent_bad,
+	.key_merge	= bch_extent_merge,
+	.key_to_text	= bch_extent_to_text,
+	.key_dump	= bch_bkey_dump,
+	.is_extents	= true,
+};
diff --git a/drivers/md/bcache/extents.h b/drivers/md/bcache/extents.h
new file mode 100644
index 0000000..e4e2340
--- /dev/null
+++ b/drivers/md/bcache/extents.h
@@ -0,0 +1,13 @@
+#ifndef _BCACHE_EXTENTS_H
+#define _BCACHE_EXTENTS_H
+
+extern const struct btree_keys_ops bch_btree_keys_ops;
+extern const struct btree_keys_ops bch_extent_keys_ops;
+
+struct bkey;
+struct cache_set;
+
+void bch_extent_to_text(char *, size_t, const struct bkey *);
+bool __bch_btree_ptr_invalid(struct cache_set *, const struct bkey *);
+
+#endif /* _BCACHE_EXTENTS_H */
diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c
index 9056632..fa028fa 100644
--- a/drivers/md/bcache/io.c
+++ b/drivers/md/bcache/io.c
@@ -11,178 +11,40 @@
 
 #include <linux/blkdev.h>
 
-static void bch_bi_idx_hack_endio(struct bio *bio, int error)
-{
-	struct bio *p = bio->bi_private;
-
-	bio_endio(p, error);
-	bio_put(bio);
-}
-
-static void bch_generic_make_request_hack(struct bio *bio)
-{
-	if (bio->bi_idx) {
-		struct bio *clone = bio_alloc(GFP_NOIO, bio_segments(bio));
-
-		memcpy(clone->bi_io_vec,
-		       bio_iovec(bio),
-		       bio_segments(bio) * sizeof(struct bio_vec));
-
-		clone->bi_sector	= bio->bi_sector;
-		clone->bi_bdev		= bio->bi_bdev;
-		clone->bi_rw		= bio->bi_rw;
-		clone->bi_vcnt		= bio_segments(bio);
-		clone->bi_size		= bio->bi_size;
-
-		clone->bi_private	= bio;
-		clone->bi_end_io	= bch_bi_idx_hack_endio;
-
-		bio = clone;
-	}
-
-	/*
-	 * Hack, since drivers that clone bios clone up to bi_max_vecs, but our
-	 * bios might have had more than that (before we split them per device
-	 * limitations).
-	 *
-	 * To be taken out once immutable bvec stuff is in.
-	 */
-	bio->bi_max_vecs = bio->bi_vcnt;
-
-	generic_make_request(bio);
-}
-
-/**
- * bch_bio_split - split a bio
- * @bio:	bio to split
- * @sectors:	number of sectors to split from the front of @bio
- * @gfp:	gfp mask
- * @bs:		bio set to allocate from
- *
- * Allocates and returns a new bio which represents @sectors from the start of
- * @bio, and updates @bio to represent the remaining sectors.
- *
- * If bio_sectors(@bio) was less than or equal to @sectors, returns @bio
- * unchanged.
- *
- * The newly allocated bio will point to @bio's bi_io_vec, if the split was on a
- * bvec boundry; it is the caller's responsibility to ensure that @bio is not
- * freed before the split.
- */
-struct bio *bch_bio_split(struct bio *bio, int sectors,
-			  gfp_t gfp, struct bio_set *bs)
-{
-	unsigned idx = bio->bi_idx, vcnt = 0, nbytes = sectors << 9;
-	struct bio_vec *bv;
-	struct bio *ret = NULL;
-
-	BUG_ON(sectors <= 0);
-
-	if (sectors >= bio_sectors(bio))
-		return bio;
-
-	if (bio->bi_rw & REQ_DISCARD) {
-		ret = bio_alloc_bioset(gfp, 1, bs);
-		if (!ret)
-			return NULL;
-		idx = 0;
-		goto out;
-	}
-
-	bio_for_each_segment(bv, bio, idx) {
-		vcnt = idx - bio->bi_idx;
-
-		if (!nbytes) {
-			ret = bio_alloc_bioset(gfp, vcnt, bs);
-			if (!ret)
-				return NULL;
-
-			memcpy(ret->bi_io_vec, bio_iovec(bio),
-			       sizeof(struct bio_vec) * vcnt);
-
-			break;
-		} else if (nbytes < bv->bv_len) {
-			ret = bio_alloc_bioset(gfp, ++vcnt, bs);
-			if (!ret)
-				return NULL;
-
-			memcpy(ret->bi_io_vec, bio_iovec(bio),
-			       sizeof(struct bio_vec) * vcnt);
-
-			ret->bi_io_vec[vcnt - 1].bv_len = nbytes;
-			bv->bv_offset	+= nbytes;
-			bv->bv_len	-= nbytes;
-			break;
-		}
-
-		nbytes -= bv->bv_len;
-	}
-out:
-	ret->bi_bdev	= bio->bi_bdev;
-	ret->bi_sector	= bio->bi_sector;
-	ret->bi_size	= sectors << 9;
-	ret->bi_rw	= bio->bi_rw;
-	ret->bi_vcnt	= vcnt;
-	ret->bi_max_vecs = vcnt;
-
-	bio->bi_sector	+= sectors;
-	bio->bi_size	-= sectors << 9;
-	bio->bi_idx	 = idx;
-
-	if (bio_integrity(bio)) {
-		if (bio_integrity_clone(ret, bio, gfp)) {
-			bio_put(ret);
-			return NULL;
-		}
-
-		bio_integrity_trim(ret, 0, bio_sectors(ret));
-		bio_integrity_trim(bio, bio_sectors(ret), bio_sectors(bio));
-	}
-
-	return ret;
-}
-
 static unsigned bch_bio_max_sectors(struct bio *bio)
 {
-	unsigned ret = bio_sectors(bio);
 	struct request_queue *q = bdev_get_queue(bio->bi_bdev);
-	unsigned max_segments = min_t(unsigned, BIO_MAX_PAGES,
-				      queue_max_segments(q));
+	struct bio_vec bv;
+	struct bvec_iter iter;
+	unsigned ret = 0, seg = 0;
 
 	if (bio->bi_rw & REQ_DISCARD)
-		return min(ret, q->limits.max_discard_sectors);
+		return min(bio_sectors(bio), q->limits.max_discard_sectors);
 
-	if (bio_segments(bio) > max_segments ||
-	    q->merge_bvec_fn) {
-		struct bio_vec *bv;
-		int i, seg = 0;
+	bio_for_each_segment(bv, bio, iter) {
+		struct bvec_merge_data bvm = {
+			.bi_bdev	= bio->bi_bdev,
+			.bi_sector	= bio->bi_iter.bi_sector,
+			.bi_size	= ret << 9,
+			.bi_rw		= bio->bi_rw,
+		};
 
-		ret = 0;
+		if (seg == min_t(unsigned, BIO_MAX_PAGES,
+				 queue_max_segments(q)))
+			break;
 
-		bio_for_each_segment(bv, bio, i) {
-			struct bvec_merge_data bvm = {
-				.bi_bdev	= bio->bi_bdev,
-				.bi_sector	= bio->bi_sector,
-				.bi_size	= ret << 9,
-				.bi_rw		= bio->bi_rw,
-			};
+		if (q->merge_bvec_fn &&
+		    q->merge_bvec_fn(q, &bvm, &bv) < (int) bv.bv_len)
+			break;
 
-			if (seg == max_segments)
-				break;
-
-			if (q->merge_bvec_fn &&
-			    q->merge_bvec_fn(q, &bvm, bv) < (int) bv->bv_len)
-				break;
-
-			seg++;
-			ret += bv->bv_len >> 9;
-		}
+		seg++;
+		ret += bv.bv_len >> 9;
 	}
 
 	ret = min(ret, queue_max_sectors(q));
 
 	WARN_ON(!ret);
-	ret = max_t(int, ret, bio_iovec(bio)->bv_len >> 9);
+	ret = max_t(int, ret, bio_iovec(bio).bv_len >> 9);
 
 	return ret;
 }
@@ -193,7 +55,7 @@
 
 	s->bio->bi_end_io = s->bi_end_io;
 	s->bio->bi_private = s->bi_private;
-	bio_endio(s->bio, 0);
+	bio_endio_nodec(s->bio, 0);
 
 	closure_debug_destroy(&s->cl);
 	mempool_free(s, s->p->bio_split_hook);
@@ -232,19 +94,19 @@
 	bio_get(bio);
 
 	do {
-		n = bch_bio_split(bio, bch_bio_max_sectors(bio),
-				  GFP_NOIO, s->p->bio_split);
+		n = bio_next_split(bio, bch_bio_max_sectors(bio),
+				   GFP_NOIO, s->p->bio_split);
 
 		n->bi_end_io	= bch_bio_submit_split_endio;
 		n->bi_private	= &s->cl;
 
 		closure_get(&s->cl);
-		bch_generic_make_request_hack(n);
+		generic_make_request(n);
 	} while (n != bio);
 
 	continue_at(&s->cl, bch_bio_submit_split_done, NULL);
 submit:
-	bch_generic_make_request_hack(bio);
+	generic_make_request(bio);
 }
 
 /* Bios with headers */
@@ -272,8 +134,8 @@
 {
 	struct bbio *b = container_of(bio, struct bbio, bio);
 
-	bio->bi_sector	= PTR_OFFSET(&b->key, 0);
-	bio->bi_bdev	= PTR_CACHE(c, &b->key, 0)->bdev;
+	bio->bi_iter.bi_sector	= PTR_OFFSET(&b->key, 0);
+	bio->bi_bdev		= PTR_CACHE(c, &b->key, 0)->bdev;
 
 	b->submit_time_us = local_clock_us();
 	closure_bio_submit(bio, bio->bi_private, PTR_CACHE(c, &b->key, 0));
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
index ecdaa67..18039af 100644
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c
@@ -44,17 +44,17 @@
 
 	closure_init_stack(&cl);
 
-	pr_debug("reading %llu", (uint64_t) bucket);
+	pr_debug("reading %u", bucket_index);
 
 	while (offset < ca->sb.bucket_size) {
 reread:		left = ca->sb.bucket_size - offset;
-		len = min_t(unsigned, left, PAGE_SECTORS * 8);
+		len = min_t(unsigned, left, PAGE_SECTORS << JSET_BITS);
 
 		bio_reset(bio);
-		bio->bi_sector	= bucket + offset;
+		bio->bi_iter.bi_sector	= bucket + offset;
 		bio->bi_bdev	= ca->bdev;
 		bio->bi_rw	= READ;
-		bio->bi_size	= len << 9;
+		bio->bi_iter.bi_size	= len << 9;
 
 		bio->bi_end_io	= journal_read_endio;
 		bio->bi_private = &cl;
@@ -74,19 +74,28 @@
 			struct list_head *where;
 			size_t blocks, bytes = set_bytes(j);
 
-			if (j->magic != jset_magic(&ca->sb))
+			if (j->magic != jset_magic(&ca->sb)) {
+				pr_debug("%u: bad magic", bucket_index);
 				return ret;
+			}
 
-			if (bytes > left << 9)
+			if (bytes > left << 9 ||
+			    bytes > PAGE_SIZE << JSET_BITS) {
+				pr_info("%u: too big, %zu bytes, offset %u",
+					bucket_index, bytes, offset);
 				return ret;
+			}
 
 			if (bytes > len << 9)
 				goto reread;
 
-			if (j->csum != csum_set(j))
+			if (j->csum != csum_set(j)) {
+				pr_info("%u: bad csum, %zu bytes, offset %u",
+					bucket_index, bytes, offset);
 				return ret;
+			}
 
-			blocks = set_blocks(j, ca->set);
+			blocks = set_blocks(j, block_bytes(ca->set));
 
 			while (!list_empty(list)) {
 				i = list_first_entry(list,
@@ -275,7 +284,7 @@
 		}
 
 		for (k = i->j.start;
-		     k < end(&i->j);
+		     k < bset_bkey_last(&i->j);
 		     k = bkey_next(k)) {
 			unsigned j;
 
@@ -313,7 +322,7 @@
 				 n, i->j.seq - 1, start, end);
 
 		for (k = i->j.start;
-		     k < end(&i->j);
+		     k < bset_bkey_last(&i->j);
 		     k = bkey_next(k)) {
 			trace_bcache_journal_replay_key(k);
 
@@ -437,13 +446,13 @@
 		atomic_set(&ja->discard_in_flight, DISCARD_IN_FLIGHT);
 
 		bio_init(bio);
-		bio->bi_sector		= bucket_to_sector(ca->set,
+		bio->bi_iter.bi_sector	= bucket_to_sector(ca->set,
 						ca->sb.d[ja->discard_idx]);
 		bio->bi_bdev		= ca->bdev;
 		bio->bi_rw		= REQ_WRITE|REQ_DISCARD;
 		bio->bi_max_vecs	= 1;
 		bio->bi_io_vec		= bio->bi_inline_vecs;
-		bio->bi_size		= bucket_bytes(ca);
+		bio->bi_iter.bi_size	= bucket_bytes(ca);
 		bio->bi_end_io		= journal_discard_endio;
 
 		closure_get(&ca->set->cl);
@@ -555,6 +564,14 @@
 	continue_at_nobarrier(cl, journal_write, system_wq);
 }
 
+static void journal_write_unlock(struct closure *cl)
+{
+	struct cache_set *c = container_of(cl, struct cache_set, journal.io);
+
+	c->journal.io_in_flight = 0;
+	spin_unlock(&c->journal.lock);
+}
+
 static void journal_write_unlocked(struct closure *cl)
 	__releases(c->journal.lock)
 {
@@ -562,22 +579,15 @@
 	struct cache *ca;
 	struct journal_write *w = c->journal.cur;
 	struct bkey *k = &c->journal.key;
-	unsigned i, sectors = set_blocks(w->data, c) * c->sb.block_size;
+	unsigned i, sectors = set_blocks(w->data, block_bytes(c)) *
+		c->sb.block_size;
 
 	struct bio *bio;
 	struct bio_list list;
 	bio_list_init(&list);
 
 	if (!w->need_write) {
-		/*
-		 * XXX: have to unlock closure before we unlock journal lock,
-		 * else we race with bch_journal(). But this way we race
-		 * against cache set unregister. Doh.
-		 */
-		set_closure_fn(cl, NULL, NULL);
-		closure_sub(cl, CLOSURE_RUNNING + 1);
-		spin_unlock(&c->journal.lock);
-		return;
+		closure_return_with_destructor(cl, journal_write_unlock);
 	} else if (journal_full(&c->journal)) {
 		journal_reclaim(c);
 		spin_unlock(&c->journal.lock);
@@ -586,7 +596,7 @@
 		continue_at(cl, journal_write, system_wq);
 	}
 
-	c->journal.blocks_free -= set_blocks(w->data, c);
+	c->journal.blocks_free -= set_blocks(w->data, block_bytes(c));
 
 	w->data->btree_level = c->root->level;
 
@@ -608,10 +618,10 @@
 		atomic_long_add(sectors, &ca->meta_sectors_written);
 
 		bio_reset(bio);
-		bio->bi_sector	= PTR_OFFSET(k, i);
+		bio->bi_iter.bi_sector	= PTR_OFFSET(k, i);
 		bio->bi_bdev	= ca->bdev;
 		bio->bi_rw	= REQ_WRITE|REQ_SYNC|REQ_META|REQ_FLUSH|REQ_FUA;
-		bio->bi_size	= sectors << 9;
+		bio->bi_iter.bi_size = sectors << 9;
 
 		bio->bi_end_io	= journal_write_endio;
 		bio->bi_private = w;
@@ -653,10 +663,12 @@
 
 	w->need_write = true;
 
-	if (closure_trylock(cl, &c->cl))
-		journal_write_unlocked(cl);
-	else
+	if (!c->journal.io_in_flight) {
+		c->journal.io_in_flight = 1;
+		closure_call(cl, journal_write_unlocked, NULL, &c->cl);
+	} else {
 		spin_unlock(&c->journal.lock);
+	}
 }
 
 static struct journal_write *journal_wait_for_write(struct cache_set *c,
@@ -664,6 +676,7 @@
 {
 	size_t sectors;
 	struct closure cl;
+	bool wait = false;
 
 	closure_init_stack(&cl);
 
@@ -673,16 +686,19 @@
 		struct journal_write *w = c->journal.cur;
 
 		sectors = __set_blocks(w->data, w->data->keys + nkeys,
-				       c) * c->sb.block_size;
+				       block_bytes(c)) * c->sb.block_size;
 
 		if (sectors <= min_t(size_t,
 				     c->journal.blocks_free * c->sb.block_size,
 				     PAGE_SECTORS << JSET_BITS))
 			return w;
 
-		/* XXX: tracepoint */
+		if (wait)
+			closure_wait(&c->journal.wait, &cl);
+
 		if (!journal_full(&c->journal)) {
-			trace_bcache_journal_entry_full(c);
+			if (wait)
+				trace_bcache_journal_entry_full(c);
 
 			/*
 			 * XXX: If we were inserting so many keys that they
@@ -692,12 +708,11 @@
 			 */
 			BUG_ON(!w->data->keys);
 
-			closure_wait(&w->wait, &cl);
 			journal_try_write(c); /* unlocks */
 		} else {
-			trace_bcache_journal_full(c);
+			if (wait)
+				trace_bcache_journal_full(c);
 
-			closure_wait(&c->journal.wait, &cl);
 			journal_reclaim(c);
 			spin_unlock(&c->journal.lock);
 
@@ -706,6 +721,7 @@
 
 		closure_sync(&cl);
 		spin_lock(&c->journal.lock);
+		wait = true;
 	}
 }
 
@@ -736,7 +752,7 @@
 
 	w = journal_wait_for_write(c, bch_keylist_nkeys(keys));
 
-	memcpy(end(w->data), keys->keys, bch_keylist_bytes(keys));
+	memcpy(bset_bkey_last(w->data), keys->keys, bch_keylist_bytes(keys));
 	w->data->keys += bch_keylist_nkeys(keys);
 
 	ret = &fifo_back(&c->journal.pin);
@@ -780,7 +796,6 @@
 {
 	struct journal *j = &c->journal;
 
-	closure_init_unlocked(&j->io);
 	spin_lock_init(&j->lock);
 	INIT_DELAYED_WORK(&j->work, journal_write_work);
 
diff --git a/drivers/md/bcache/journal.h b/drivers/md/bcache/journal.h
index a6472fd..9180c44 100644
--- a/drivers/md/bcache/journal.h
+++ b/drivers/md/bcache/journal.h
@@ -104,6 +104,7 @@
 	/* used when waiting because the journal was full */
 	struct closure_waitlist	wait;
 	struct closure		io;
+	int			io_in_flight;
 	struct delayed_work	work;
 
 	/* Number of blocks free in the bucket(s) we're currently writing to */
diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c
index f2f0998..9eb60d1 100644
--- a/drivers/md/bcache/movinggc.c
+++ b/drivers/md/bcache/movinggc.c
@@ -86,7 +86,7 @@
 	bio_get(bio);
 	bio_set_prio(bio, IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0));
 
-	bio->bi_size		= KEY_SIZE(&io->w->key) << 9;
+	bio->bi_iter.bi_size	= KEY_SIZE(&io->w->key) << 9;
 	bio->bi_max_vecs	= DIV_ROUND_UP(KEY_SIZE(&io->w->key),
 					       PAGE_SECTORS);
 	bio->bi_private		= &io->cl;
@@ -102,7 +102,7 @@
 	if (!op->error) {
 		moving_init(io);
 
-		io->bio.bio.bi_sector = KEY_START(&io->w->key);
+		io->bio.bio.bi_iter.bi_sector = KEY_START(&io->w->key);
 		op->write_prio		= 1;
 		op->bio			= &io->bio.bio;
 
@@ -211,7 +211,7 @@
 	for_each_cache(ca, c, i) {
 		unsigned sectors_to_move = 0;
 		unsigned reserve_sectors = ca->sb.bucket_size *
-			min(fifo_used(&ca->free), ca->free.size / 2);
+			fifo_used(&ca->free[RESERVE_MOVINGGC]);
 
 		ca->heap.used = 0;
 
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 61bcfc2..72cd213 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -197,14 +197,14 @@
 
 static void bio_csum(struct bio *bio, struct bkey *k)
 {
-	struct bio_vec *bv;
+	struct bio_vec bv;
+	struct bvec_iter iter;
 	uint64_t csum = 0;
-	int i;
 
-	bio_for_each_segment(bv, bio, i) {
-		void *d = kmap(bv->bv_page) + bv->bv_offset;
-		csum = bch_crc64_update(csum, d, bv->bv_len);
-		kunmap(bv->bv_page);
+	bio_for_each_segment(bv, bio, iter) {
+		void *d = kmap(bv.bv_page) + bv.bv_offset;
+		csum = bch_crc64_update(csum, d, bv.bv_len);
+		kunmap(bv.bv_page);
 	}
 
 	k->ptr[KEY_PTRS(k)] = csum & (~0ULL >> 1);
@@ -254,26 +254,44 @@
 	closure_return(cl);
 }
 
+static int bch_keylist_realloc(struct keylist *l, unsigned u64s,
+			       struct cache_set *c)
+{
+	size_t oldsize = bch_keylist_nkeys(l);
+	size_t newsize = oldsize + u64s;
+
+	/*
+	 * The journalling code doesn't handle the case where the keys to insert
+	 * is bigger than an empty write: If we just return -ENOMEM here,
+	 * bio_insert() and bio_invalidate() will insert the keys created so far
+	 * and finish the rest when the keylist is empty.
+	 */
+	if (newsize * sizeof(uint64_t) > block_bytes(c) - sizeof(struct jset))
+		return -ENOMEM;
+
+	return __bch_keylist_realloc(l, u64s);
+}
+
 static void bch_data_invalidate(struct closure *cl)
 {
 	struct data_insert_op *op = container_of(cl, struct data_insert_op, cl);
 	struct bio *bio = op->bio;
 
 	pr_debug("invalidating %i sectors from %llu",
-		 bio_sectors(bio), (uint64_t) bio->bi_sector);
+		 bio_sectors(bio), (uint64_t) bio->bi_iter.bi_sector);
 
 	while (bio_sectors(bio)) {
 		unsigned sectors = min(bio_sectors(bio),
 				       1U << (KEY_SIZE_BITS - 1));
 
-		if (bch_keylist_realloc(&op->insert_keys, 0, op->c))
+		if (bch_keylist_realloc(&op->insert_keys, 2, op->c))
 			goto out;
 
-		bio->bi_sector	+= sectors;
-		bio->bi_size	-= sectors << 9;
+		bio->bi_iter.bi_sector	+= sectors;
+		bio->bi_iter.bi_size	-= sectors << 9;
 
 		bch_keylist_add(&op->insert_keys,
-				&KEY(op->inode, bio->bi_sector, sectors));
+				&KEY(op->inode, bio->bi_iter.bi_sector, sectors));
 	}
 
 	op->insert_data_done = true;
@@ -356,21 +374,21 @@
 
 		/* 1 for the device pointer and 1 for the chksum */
 		if (bch_keylist_realloc(&op->insert_keys,
-					1 + (op->csum ? 1 : 0),
+					3 + (op->csum ? 1 : 0),
 					op->c))
 			continue_at(cl, bch_data_insert_keys, bcache_wq);
 
 		k = op->insert_keys.top;
 		bkey_init(k);
 		SET_KEY_INODE(k, op->inode);
-		SET_KEY_OFFSET(k, bio->bi_sector);
+		SET_KEY_OFFSET(k, bio->bi_iter.bi_sector);
 
 		if (!bch_alloc_sectors(op->c, k, bio_sectors(bio),
 				       op->write_point, op->write_prio,
 				       op->writeback))
 			goto err;
 
-		n = bch_bio_split(bio, KEY_SIZE(k), GFP_NOIO, split);
+		n = bio_next_split(bio, KEY_SIZE(k), GFP_NOIO, split);
 
 		n->bi_end_io	= bch_data_insert_endio;
 		n->bi_private	= cl;
@@ -521,7 +539,7 @@
 	     (bio->bi_rw & REQ_WRITE)))
 		goto skip;
 
-	if (bio->bi_sector & (c->sb.block_size - 1) ||
+	if (bio->bi_iter.bi_sector & (c->sb.block_size - 1) ||
 	    bio_sectors(bio) & (c->sb.block_size - 1)) {
 		pr_debug("skipping unaligned io");
 		goto skip;
@@ -545,8 +563,8 @@
 
 	spin_lock(&dc->io_lock);
 
-	hlist_for_each_entry(i, iohash(dc, bio->bi_sector), hash)
-		if (i->last == bio->bi_sector &&
+	hlist_for_each_entry(i, iohash(dc, bio->bi_iter.bi_sector), hash)
+		if (i->last == bio->bi_iter.bi_sector &&
 		    time_before(jiffies, i->jiffies))
 			goto found;
 
@@ -555,8 +573,8 @@
 	add_sequential(task);
 	i->sequential = 0;
 found:
-	if (i->sequential + bio->bi_size > i->sequential)
-		i->sequential	+= bio->bi_size;
+	if (i->sequential + bio->bi_iter.bi_size > i->sequential)
+		i->sequential	+= bio->bi_iter.bi_size;
 
 	i->last			 = bio_end_sector(bio);
 	i->jiffies		 = jiffies + msecs_to_jiffies(5000);
@@ -596,16 +614,13 @@
 	/* Stack frame for bio_complete */
 	struct closure		cl;
 
-	struct bcache_device	*d;
-
 	struct bbio		bio;
 	struct bio		*orig_bio;
 	struct bio		*cache_miss;
+	struct bcache_device	*d;
 
 	unsigned		insert_bio_sectors;
-
 	unsigned		recoverable:1;
-	unsigned		unaligned_bvec:1;
 	unsigned		write:1;
 	unsigned		read_dirty_data:1;
 
@@ -630,7 +645,8 @@
 
 	if (error)
 		s->iop.error = error;
-	else if (ptr_stale(s->iop.c, &b->key, 0)) {
+	else if (!KEY_DIRTY(&b->key) &&
+		 ptr_stale(s->iop.c, &b->key, 0)) {
 		atomic_long_inc(&s->iop.c->cache_read_races);
 		s->iop.error = -EINTR;
 	}
@@ -649,15 +665,15 @@
 	struct bkey *bio_key;
 	unsigned ptr;
 
-	if (bkey_cmp(k, &KEY(s->iop.inode, bio->bi_sector, 0)) <= 0)
+	if (bkey_cmp(k, &KEY(s->iop.inode, bio->bi_iter.bi_sector, 0)) <= 0)
 		return MAP_CONTINUE;
 
 	if (KEY_INODE(k) != s->iop.inode ||
-	    KEY_START(k) > bio->bi_sector) {
+	    KEY_START(k) > bio->bi_iter.bi_sector) {
 		unsigned bio_sectors = bio_sectors(bio);
 		unsigned sectors = KEY_INODE(k) == s->iop.inode
 			? min_t(uint64_t, INT_MAX,
-				KEY_START(k) - bio->bi_sector)
+				KEY_START(k) - bio->bi_iter.bi_sector)
 			: INT_MAX;
 
 		int ret = s->d->cache_miss(b, s, bio, sectors);
@@ -679,14 +695,14 @@
 	if (KEY_DIRTY(k))
 		s->read_dirty_data = true;
 
-	n = bch_bio_split(bio, min_t(uint64_t, INT_MAX,
-				     KEY_OFFSET(k) - bio->bi_sector),
-			  GFP_NOIO, s->d->bio_split);
+	n = bio_next_split(bio, min_t(uint64_t, INT_MAX,
+				      KEY_OFFSET(k) - bio->bi_iter.bi_sector),
+			   GFP_NOIO, s->d->bio_split);
 
 	bio_key = &container_of(n, struct bbio, bio)->key;
 	bch_bkey_copy_single_ptr(bio_key, k, ptr);
 
-	bch_cut_front(&KEY(s->iop.inode, n->bi_sector, 0), bio_key);
+	bch_cut_front(&KEY(s->iop.inode, n->bi_iter.bi_sector, 0), bio_key);
 	bch_cut_back(&KEY(s->iop.inode, bio_end_sector(n), 0), bio_key);
 
 	n->bi_end_io	= bch_cache_read_endio;
@@ -711,10 +727,13 @@
 {
 	struct search *s = container_of(cl, struct search, iop.cl);
 	struct bio *bio = &s->bio.bio;
+	int ret;
 
-	int ret = bch_btree_map_keys(&s->op, s->iop.c,
-				     &KEY(s->iop.inode, bio->bi_sector, 0),
-				     cache_lookup_fn, MAP_END_KEY);
+	bch_btree_op_init(&s->op, -1);
+
+	ret = bch_btree_map_keys(&s->op, s->iop.c,
+				 &KEY(s->iop.inode, bio->bi_iter.bi_sector, 0),
+				 cache_lookup_fn, MAP_END_KEY);
 	if (ret == -EAGAIN)
 		continue_at(cl, cache_lookup, bcache_wq);
 
@@ -755,13 +774,15 @@
 	}
 }
 
-static void do_bio_hook(struct search *s)
+static void do_bio_hook(struct search *s, struct bio *orig_bio)
 {
 	struct bio *bio = &s->bio.bio;
-	memcpy(bio, s->orig_bio, sizeof(struct bio));
 
+	bio_init(bio);
+	__bio_clone_fast(bio, orig_bio);
 	bio->bi_end_io		= request_endio;
 	bio->bi_private		= &s->cl;
+
 	atomic_set(&bio->bi_cnt, 3);
 }
 
@@ -773,43 +794,36 @@
 	if (s->iop.bio)
 		bio_put(s->iop.bio);
 
-	if (s->unaligned_bvec)
-		mempool_free(s->bio.bio.bi_io_vec, s->d->unaligned_bvec);
-
 	closure_debug_destroy(cl);
 	mempool_free(s, s->d->c->search);
 }
 
-static struct search *search_alloc(struct bio *bio, struct bcache_device *d)
+static inline struct search *search_alloc(struct bio *bio,
+					  struct bcache_device *d)
 {
 	struct search *s;
-	struct bio_vec *bv;
 
 	s = mempool_alloc(d->c->search, GFP_NOIO);
-	memset(s, 0, offsetof(struct search, iop.insert_keys));
 
-	__closure_init(&s->cl, NULL);
+	closure_init(&s->cl, NULL);
+	do_bio_hook(s, bio);
 
-	s->iop.inode		= d->id;
-	s->iop.c		= d->c;
-	s->d			= d;
-	s->op.lock		= -1;
-	s->iop.write_point	= hash_long((unsigned long) current, 16);
 	s->orig_bio		= bio;
-	s->write		= (bio->bi_rw & REQ_WRITE) != 0;
-	s->iop.flush_journal	= (bio->bi_rw & (REQ_FLUSH|REQ_FUA)) != 0;
+	s->cache_miss		= NULL;
+	s->d			= d;
 	s->recoverable		= 1;
+	s->write		= (bio->bi_rw & REQ_WRITE) != 0;
+	s->read_dirty_data	= 0;
 	s->start_time		= jiffies;
-	do_bio_hook(s);
 
-	if (bio->bi_size != bio_segments(bio) * PAGE_SIZE) {
-		bv = mempool_alloc(d->unaligned_bvec, GFP_NOIO);
-		memcpy(bv, bio_iovec(bio),
-		       sizeof(struct bio_vec) * bio_segments(bio));
-
-		s->bio.bio.bi_io_vec	= bv;
-		s->unaligned_bvec	= 1;
-	}
+	s->iop.c		= d->c;
+	s->iop.bio		= NULL;
+	s->iop.inode		= d->id;
+	s->iop.write_point	= hash_long((unsigned long) current, 16);
+	s->iop.write_prio	= 0;
+	s->iop.error		= 0;
+	s->iop.flags		= 0;
+	s->iop.flush_journal	= (bio->bi_rw & (REQ_FLUSH|REQ_FUA)) != 0;
 
 	return s;
 }
@@ -849,26 +863,13 @@
 {
 	struct search *s = container_of(cl, struct search, cl);
 	struct bio *bio = &s->bio.bio;
-	struct bio_vec *bv;
-	int i;
 
 	if (s->recoverable) {
 		/* Retry from the backing device: */
 		trace_bcache_read_retry(s->orig_bio);
 
 		s->iop.error = 0;
-		bv = s->bio.bio.bi_io_vec;
-		do_bio_hook(s);
-		s->bio.bio.bi_io_vec = bv;
-
-		if (!s->unaligned_bvec)
-			bio_for_each_segment(bv, s->orig_bio, i)
-				bv->bv_offset = 0, bv->bv_len = PAGE_SIZE;
-		else
-			memcpy(s->bio.bio.bi_io_vec,
-			       bio_iovec(s->orig_bio),
-			       sizeof(struct bio_vec) *
-			       bio_segments(s->orig_bio));
+		do_bio_hook(s, s->orig_bio);
 
 		/* XXX: invalidate cache */
 
@@ -893,9 +894,9 @@
 
 	if (s->iop.bio) {
 		bio_reset(s->iop.bio);
-		s->iop.bio->bi_sector = s->cache_miss->bi_sector;
+		s->iop.bio->bi_iter.bi_sector = s->cache_miss->bi_iter.bi_sector;
 		s->iop.bio->bi_bdev = s->cache_miss->bi_bdev;
-		s->iop.bio->bi_size = s->insert_bio_sectors << 9;
+		s->iop.bio->bi_iter.bi_size = s->insert_bio_sectors << 9;
 		bch_bio_map(s->iop.bio, NULL);
 
 		bio_copy_data(s->cache_miss, s->iop.bio);
@@ -904,8 +905,7 @@
 		s->cache_miss = NULL;
 	}
 
-	if (verify(dc, &s->bio.bio) && s->recoverable &&
-	    !s->unaligned_bvec && !s->read_dirty_data)
+	if (verify(dc, &s->bio.bio) && s->recoverable && !s->read_dirty_data)
 		bch_data_verify(dc, s->orig_bio);
 
 	bio_complete(s);
@@ -945,7 +945,7 @@
 	struct bio *miss, *cache_bio;
 
 	if (s->cache_miss || s->iop.bypass) {
-		miss = bch_bio_split(bio, sectors, GFP_NOIO, s->d->bio_split);
+		miss = bio_next_split(bio, sectors, GFP_NOIO, s->d->bio_split);
 		ret = miss == bio ? MAP_DONE : MAP_CONTINUE;
 		goto out_submit;
 	}
@@ -959,7 +959,7 @@
 	s->insert_bio_sectors = min(sectors, bio_sectors(bio) + reada);
 
 	s->iop.replace_key = KEY(s->iop.inode,
-				 bio->bi_sector + s->insert_bio_sectors,
+				 bio->bi_iter.bi_sector + s->insert_bio_sectors,
 				 s->insert_bio_sectors);
 
 	ret = bch_btree_insert_check_key(b, &s->op, &s->iop.replace_key);
@@ -968,7 +968,7 @@
 
 	s->iop.replace = true;
 
-	miss = bch_bio_split(bio, sectors, GFP_NOIO, s->d->bio_split);
+	miss = bio_next_split(bio, sectors, GFP_NOIO, s->d->bio_split);
 
 	/* btree_search_recurse()'s btree iterator is no good anymore */
 	ret = miss == bio ? MAP_DONE : -EINTR;
@@ -979,9 +979,9 @@
 	if (!cache_bio)
 		goto out_submit;
 
-	cache_bio->bi_sector	= miss->bi_sector;
-	cache_bio->bi_bdev	= miss->bi_bdev;
-	cache_bio->bi_size	= s->insert_bio_sectors << 9;
+	cache_bio->bi_iter.bi_sector	= miss->bi_iter.bi_sector;
+	cache_bio->bi_bdev		= miss->bi_bdev;
+	cache_bio->bi_iter.bi_size	= s->insert_bio_sectors << 9;
 
 	cache_bio->bi_end_io	= request_endio;
 	cache_bio->bi_private	= &s->cl;
@@ -1031,7 +1031,7 @@
 {
 	struct closure *cl = &s->cl;
 	struct bio *bio = &s->bio.bio;
-	struct bkey start = KEY(dc->disk.id, bio->bi_sector, 0);
+	struct bkey start = KEY(dc->disk.id, bio->bi_iter.bi_sector, 0);
 	struct bkey end = KEY(dc->disk.id, bio_end_sector(bio), 0);
 
 	bch_keybuf_check_overlapping(&s->iop.c->moving_gc_keys, &start, &end);
@@ -1087,8 +1087,7 @@
 			closure_bio_submit(flush, cl, s->d);
 		}
 	} else {
-		s->iop.bio = bio_clone_bioset(bio, GFP_NOIO,
-					      dc->disk.bio_split);
+		s->iop.bio = bio_clone_fast(bio, GFP_NOIO, dc->disk.bio_split);
 
 		closure_bio_submit(bio, cl, s->d);
 	}
@@ -1126,13 +1125,13 @@
 	part_stat_unlock();
 
 	bio->bi_bdev = dc->bdev;
-	bio->bi_sector += dc->sb.data_offset;
+	bio->bi_iter.bi_sector += dc->sb.data_offset;
 
 	if (cached_dev_get(dc)) {
 		s = search_alloc(bio, d);
 		trace_bcache_request_start(s->d, bio);
 
-		if (!bio->bi_size) {
+		if (!bio->bi_iter.bi_size) {
 			/*
 			 * can't call bch_journal_meta from under
 			 * generic_make_request
@@ -1204,24 +1203,24 @@
 static int flash_dev_cache_miss(struct btree *b, struct search *s,
 				struct bio *bio, unsigned sectors)
 {
-	struct bio_vec *bv;
-	int i;
+	struct bio_vec bv;
+	struct bvec_iter iter;
 
 	/* Zero fill bio */
 
-	bio_for_each_segment(bv, bio, i) {
-		unsigned j = min(bv->bv_len >> 9, sectors);
+	bio_for_each_segment(bv, bio, iter) {
+		unsigned j = min(bv.bv_len >> 9, sectors);
 
-		void *p = kmap(bv->bv_page);
-		memset(p + bv->bv_offset, 0, j << 9);
-		kunmap(bv->bv_page);
+		void *p = kmap(bv.bv_page);
+		memset(p + bv.bv_offset, 0, j << 9);
+		kunmap(bv.bv_page);
 
 		sectors	-= j;
 	}
 
-	bio_advance(bio, min(sectors << 9, bio->bi_size));
+	bio_advance(bio, min(sectors << 9, bio->bi_iter.bi_size));
 
-	if (!bio->bi_size)
+	if (!bio->bi_iter.bi_size)
 		return MAP_DONE;
 
 	return MAP_CONTINUE;
@@ -1255,7 +1254,7 @@
 
 	trace_bcache_request_start(s->d, bio);
 
-	if (!bio->bi_size) {
+	if (!bio->bi_iter.bi_size) {
 		/*
 		 * can't call bch_journal_meta from under
 		 * generic_make_request
@@ -1265,7 +1264,7 @@
 				      bcache_wq);
 	} else if (rw) {
 		bch_keybuf_check_overlapping(&s->iop.c->moving_gc_keys,
-					&KEY(d->id, bio->bi_sector, 0),
+					&KEY(d->id, bio->bi_iter.bi_sector, 0),
 					&KEY(d->id, bio_end_sector(bio), 0));
 
 		s->iop.bypass		= (bio->bi_rw & REQ_DISCARD) != 0;
diff --git a/drivers/md/bcache/request.h b/drivers/md/bcache/request.h
index 2cd65bf..39f21db 100644
--- a/drivers/md/bcache/request.h
+++ b/drivers/md/bcache/request.h
@@ -13,17 +13,22 @@
 	uint16_t		write_prio;
 	short			error;
 
-	unsigned		bypass:1;
-	unsigned		writeback:1;
-	unsigned		flush_journal:1;
-	unsigned		csum:1;
+	union {
+		uint16_t	flags;
 
-	unsigned		replace:1;
-	unsigned		replace_collision:1;
+	struct {
+		unsigned	bypass:1;
+		unsigned	writeback:1;
+		unsigned	flush_journal:1;
+		unsigned	csum:1;
 
-	unsigned		insert_data_done:1;
+		unsigned	replace:1;
+		unsigned	replace_collision:1;
 
-	/* Anything past this point won't get zeroed in search_alloc() */
+		unsigned	insert_data_done:1;
+	};
+	};
+
 	struct keylist		insert_keys;
 	BKEY_PADDED(replace_key);
 };
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index c57bfa0..24a3a15 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -9,6 +9,7 @@
 #include "bcache.h"
 #include "btree.h"
 #include "debug.h"
+#include "extents.h"
 #include "request.h"
 #include "writeback.h"
 
@@ -225,7 +226,7 @@
 	struct cached_dev *dc = bio->bi_private;
 	/* XXX: error checking */
 
-	closure_put(&dc->sb_write.cl);
+	closure_put(&dc->sb_write);
 }
 
 static void __write_super(struct cache_sb *sb, struct bio *bio)
@@ -233,9 +234,9 @@
 	struct cache_sb *out = page_address(bio->bi_io_vec[0].bv_page);
 	unsigned i;
 
-	bio->bi_sector	= SB_SECTOR;
-	bio->bi_rw	= REQ_SYNC|REQ_META;
-	bio->bi_size	= SB_SIZE;
+	bio->bi_iter.bi_sector	= SB_SECTOR;
+	bio->bi_rw		= REQ_SYNC|REQ_META;
+	bio->bi_iter.bi_size	= SB_SIZE;
 	bch_bio_map(bio, NULL);
 
 	out->offset		= cpu_to_le64(sb->offset);
@@ -263,12 +264,20 @@
 	submit_bio(REQ_WRITE, bio);
 }
 
+static void bch_write_bdev_super_unlock(struct closure *cl)
+{
+	struct cached_dev *dc = container_of(cl, struct cached_dev, sb_write);
+
+	up(&dc->sb_write_mutex);
+}
+
 void bch_write_bdev_super(struct cached_dev *dc, struct closure *parent)
 {
-	struct closure *cl = &dc->sb_write.cl;
+	struct closure *cl = &dc->sb_write;
 	struct bio *bio = &dc->sb_bio;
 
-	closure_lock(&dc->sb_write, parent);
+	down(&dc->sb_write_mutex);
+	closure_init(cl, parent);
 
 	bio_reset(bio);
 	bio->bi_bdev	= dc->bdev;
@@ -278,7 +287,7 @@
 	closure_get(cl);
 	__write_super(&dc->sb, bio);
 
-	closure_return(cl);
+	closure_return_with_destructor(cl, bch_write_bdev_super_unlock);
 }
 
 static void write_super_endio(struct bio *bio, int error)
@@ -286,16 +295,24 @@
 	struct cache *ca = bio->bi_private;
 
 	bch_count_io_errors(ca, error, "writing superblock");
-	closure_put(&ca->set->sb_write.cl);
+	closure_put(&ca->set->sb_write);
+}
+
+static void bcache_write_super_unlock(struct closure *cl)
+{
+	struct cache_set *c = container_of(cl, struct cache_set, sb_write);
+
+	up(&c->sb_write_mutex);
 }
 
 void bcache_write_super(struct cache_set *c)
 {
-	struct closure *cl = &c->sb_write.cl;
+	struct closure *cl = &c->sb_write;
 	struct cache *ca;
 	unsigned i;
 
-	closure_lock(&c->sb_write, &c->cl);
+	down(&c->sb_write_mutex);
+	closure_init(cl, &c->cl);
 
 	c->sb.seq++;
 
@@ -317,7 +334,7 @@
 		__write_super(&ca->sb, bio);
 	}
 
-	closure_return(cl);
+	closure_return_with_destructor(cl, bcache_write_super_unlock);
 }
 
 /* UUID io */
@@ -325,29 +342,37 @@
 static void uuid_endio(struct bio *bio, int error)
 {
 	struct closure *cl = bio->bi_private;
-	struct cache_set *c = container_of(cl, struct cache_set, uuid_write.cl);
+	struct cache_set *c = container_of(cl, struct cache_set, uuid_write);
 
 	cache_set_err_on(error, c, "accessing uuids");
 	bch_bbio_free(bio, c);
 	closure_put(cl);
 }
 
+static void uuid_io_unlock(struct closure *cl)
+{
+	struct cache_set *c = container_of(cl, struct cache_set, uuid_write);
+
+	up(&c->uuid_write_mutex);
+}
+
 static void uuid_io(struct cache_set *c, unsigned long rw,
 		    struct bkey *k, struct closure *parent)
 {
-	struct closure *cl = &c->uuid_write.cl;
+	struct closure *cl = &c->uuid_write;
 	struct uuid_entry *u;
 	unsigned i;
 	char buf[80];
 
 	BUG_ON(!parent);
-	closure_lock(&c->uuid_write, parent);
+	down(&c->uuid_write_mutex);
+	closure_init(cl, parent);
 
 	for (i = 0; i < KEY_PTRS(k); i++) {
 		struct bio *bio = bch_bbio_alloc(c);
 
 		bio->bi_rw	= REQ_SYNC|REQ_META|rw;
-		bio->bi_size	= KEY_SIZE(k) << 9;
+		bio->bi_iter.bi_size = KEY_SIZE(k) << 9;
 
 		bio->bi_end_io	= uuid_endio;
 		bio->bi_private = cl;
@@ -359,7 +384,7 @@
 			break;
 	}
 
-	bch_bkey_to_text(buf, sizeof(buf), k);
+	bch_extent_to_text(buf, sizeof(buf), k);
 	pr_debug("%s UUIDs at %s", rw & REQ_WRITE ? "wrote" : "read", buf);
 
 	for (u = c->uuids; u < c->uuids + c->nr_uuids; u++)
@@ -368,14 +393,14 @@
 				 u - c->uuids, u->uuid, u->label,
 				 u->first_reg, u->last_reg, u->invalidated);
 
-	closure_return(cl);
+	closure_return_with_destructor(cl, uuid_io_unlock);
 }
 
 static char *uuid_read(struct cache_set *c, struct jset *j, struct closure *cl)
 {
 	struct bkey *k = &j->uuid_bucket;
 
-	if (bch_btree_ptr_invalid(c, k))
+	if (__bch_btree_ptr_invalid(c, k))
 		return "bad uuid pointer";
 
 	bkey_copy(&c->uuid_bucket, k);
@@ -420,7 +445,7 @@
 
 	lockdep_assert_held(&bch_register_lock);
 
-	if (bch_bucket_alloc_set(c, WATERMARK_METADATA, &k.key, 1, true))
+	if (bch_bucket_alloc_set(c, RESERVE_BTREE, &k.key, 1, true))
 		return 1;
 
 	SET_KEY_SIZE(&k.key, c->sb.bucket_size);
@@ -503,10 +528,10 @@
 
 	closure_init_stack(cl);
 
-	bio->bi_sector	= bucket * ca->sb.bucket_size;
-	bio->bi_bdev	= ca->bdev;
-	bio->bi_rw	= REQ_SYNC|REQ_META|rw;
-	bio->bi_size	= bucket_bytes(ca);
+	bio->bi_iter.bi_sector	= bucket * ca->sb.bucket_size;
+	bio->bi_bdev		= ca->bdev;
+	bio->bi_rw		= REQ_SYNC|REQ_META|rw;
+	bio->bi_iter.bi_size	= bucket_bytes(ca);
 
 	bio->bi_end_io	= prio_endio;
 	bio->bi_private = ca;
@@ -538,8 +563,8 @@
 	atomic_long_add(ca->sb.bucket_size * prio_buckets(ca),
 			&ca->meta_sectors_written);
 
-	pr_debug("free %zu, free_inc %zu, unused %zu", fifo_used(&ca->free),
-		 fifo_used(&ca->free_inc), fifo_used(&ca->unused));
+	//pr_debug("free %zu, free_inc %zu, unused %zu", fifo_used(&ca->free),
+	//	 fifo_used(&ca->free_inc), fifo_used(&ca->unused));
 
 	for (i = prio_buckets(ca) - 1; i >= 0; --i) {
 		long bucket;
@@ -558,7 +583,7 @@
 		p->magic	= pset_magic(&ca->sb);
 		p->csum		= bch_crc64(&p->magic, bucket_bytes(ca) - 8);
 
-		bucket = bch_bucket_alloc(ca, WATERMARK_PRIO, true);
+		bucket = bch_bucket_alloc(ca, RESERVE_PRIO, true);
 		BUG_ON(bucket == -1);
 
 		mutex_unlock(&ca->set->bucket_lock);
@@ -739,8 +764,6 @@
 	}
 
 	bio_split_pool_free(&d->bio_split_hook);
-	if (d->unaligned_bvec)
-		mempool_destroy(d->unaligned_bvec);
 	if (d->bio_split)
 		bioset_free(d->bio_split);
 	if (is_vmalloc_addr(d->full_dirty_stripes))
@@ -793,8 +816,6 @@
 		return minor;
 
 	if (!(d->bio_split = bioset_create(4, offsetof(struct bbio, bio))) ||
-	    !(d->unaligned_bvec = mempool_create_kmalloc_pool(1,
-				sizeof(struct bio_vec) * BIO_MAX_PAGES)) ||
 	    bio_split_pool_init(&d->bio_split_hook) ||
 	    !(d->disk = alloc_disk(1))) {
 		ida_simple_remove(&bcache_minor, minor);
@@ -1102,7 +1123,7 @@
 	set_closure_fn(&dc->disk.cl, cached_dev_flush, system_wq);
 	kobject_init(&dc->disk.kobj, &bch_cached_dev_ktype);
 	INIT_WORK(&dc->detach, cached_dev_detach_finish);
-	closure_init_unlocked(&dc->sb_write);
+	sema_init(&dc->sb_write_mutex, 1);
 	INIT_LIST_HEAD(&dc->io_lru);
 	spin_lock_init(&dc->io_lock);
 	bch_cache_accounting_init(&dc->accounting, &dc->disk.cl);
@@ -1114,6 +1135,12 @@
 		hlist_add_head(&io->hash, dc->io_hash + RECENT_IO);
 	}
 
+	dc->disk.stripe_size = q->limits.io_opt >> 9;
+
+	if (dc->disk.stripe_size)
+		dc->partial_stripes_expensive =
+			q->limits.raid_partial_stripes_expensive;
+
 	ret = bcache_device_init(&dc->disk, block_size,
 			 dc->bdev->bd_part->nr_sects - dc->sb.data_offset);
 	if (ret)
@@ -1325,8 +1352,8 @@
 		if (ca)
 			kobject_put(&ca->kobj);
 
+	bch_bset_sort_state_free(&c->sort);
 	free_pages((unsigned long) c->uuids, ilog2(bucket_pages(c)));
-	free_pages((unsigned long) c->sort, ilog2(bucket_pages(c)));
 
 	if (c->bio_split)
 		bioset_free(c->bio_split);
@@ -1451,21 +1478,17 @@
 	c->block_bits		= ilog2(sb->block_size);
 	c->nr_uuids		= bucket_bytes(c) / sizeof(struct uuid_entry);
 
-	c->btree_pages		= c->sb.bucket_size / PAGE_SECTORS;
+	c->btree_pages		= bucket_pages(c);
 	if (c->btree_pages > BTREE_MAX_PAGES)
 		c->btree_pages = max_t(int, c->btree_pages / 4,
 				       BTREE_MAX_PAGES);
 
-	c->sort_crit_factor = int_sqrt(c->btree_pages);
-
-	closure_init_unlocked(&c->sb_write);
+	sema_init(&c->sb_write_mutex, 1);
 	mutex_init(&c->bucket_lock);
 	init_waitqueue_head(&c->try_wait);
 	init_waitqueue_head(&c->bucket_wait);
-	closure_init_unlocked(&c->uuid_write);
-	mutex_init(&c->sort_lock);
+	sema_init(&c->uuid_write_mutex, 1);
 
-	spin_lock_init(&c->sort_time.lock);
 	spin_lock_init(&c->btree_gc_time.lock);
 	spin_lock_init(&c->btree_split_time.lock);
 	spin_lock_init(&c->btree_read_time.lock);
@@ -1493,11 +1516,11 @@
 				bucket_pages(c))) ||
 	    !(c->fill_iter = mempool_create_kmalloc_pool(1, iter_size)) ||
 	    !(c->bio_split = bioset_create(4, offsetof(struct bbio, bio))) ||
-	    !(c->sort = alloc_bucket_pages(GFP_KERNEL, c)) ||
 	    !(c->uuids = alloc_bucket_pages(GFP_KERNEL, c)) ||
 	    bch_journal_alloc(c) ||
 	    bch_btree_cache_alloc(c) ||
-	    bch_open_buckets_alloc(c))
+	    bch_open_buckets_alloc(c) ||
+	    bch_bset_sort_state_init(&c->sort, ilog2(c->btree_pages)))
 		goto err;
 
 	c->congested_read_threshold_us	= 2000;
@@ -1553,7 +1576,7 @@
 		k = &j->btree_root;
 
 		err = "bad btree root";
-		if (bch_btree_ptr_invalid(c, k))
+		if (__bch_btree_ptr_invalid(c, k))
 			goto err;
 
 		err = "error reading btree root";
@@ -1747,6 +1770,7 @@
 void bch_cache_release(struct kobject *kobj)
 {
 	struct cache *ca = container_of(kobj, struct cache, kobj);
+	unsigned i;
 
 	if (ca->set)
 		ca->set->cache[ca->sb.nr_this_dev] = NULL;
@@ -1760,7 +1784,9 @@
 	free_heap(&ca->heap);
 	free_fifo(&ca->unused);
 	free_fifo(&ca->free_inc);
-	free_fifo(&ca->free);
+
+	for (i = 0; i < RESERVE_NR; i++)
+		free_fifo(&ca->free[i]);
 
 	if (ca->sb_bio.bi_inline_vecs[0].bv_page)
 		put_page(ca->sb_bio.bi_io_vec[0].bv_page);
@@ -1786,10 +1812,12 @@
 	ca->journal.bio.bi_max_vecs = 8;
 	ca->journal.bio.bi_io_vec = ca->journal.bio.bi_inline_vecs;
 
-	free = roundup_pow_of_two(ca->sb.nbuckets) >> 9;
-	free = max_t(size_t, free, (prio_buckets(ca) + 8) * 2);
+	free = roundup_pow_of_two(ca->sb.nbuckets) >> 10;
 
-	if (!init_fifo(&ca->free,	free, GFP_KERNEL) ||
+	if (!init_fifo(&ca->free[RESERVE_BTREE], 8, GFP_KERNEL) ||
+	    !init_fifo(&ca->free[RESERVE_PRIO], prio_buckets(ca), GFP_KERNEL) ||
+	    !init_fifo(&ca->free[RESERVE_MOVINGGC], free, GFP_KERNEL) ||
+	    !init_fifo(&ca->free[RESERVE_NONE], free, GFP_KERNEL) ||
 	    !init_fifo(&ca->free_inc,	free << 2, GFP_KERNEL) ||
 	    !init_fifo(&ca->unused,	free << 2, GFP_KERNEL) ||
 	    !init_heap(&ca->heap,	free << 3, GFP_KERNEL) ||
@@ -2034,7 +2062,8 @@
 		kobject_put(bcache_kobj);
 	if (bcache_wq)
 		destroy_workqueue(bcache_wq);
-	unregister_blkdev(bcache_major, "bcache");
+	if (bcache_major)
+		unregister_blkdev(bcache_major, "bcache");
 	unregister_reboot_notifier(&reboot);
 }
 
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index a1f8561..c6ab693 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -102,7 +102,6 @@
 rw_attribute(key_merging_disabled);
 rw_attribute(gc_always_rewrite);
 rw_attribute(expensive_debug_checks);
-rw_attribute(freelist_percent);
 rw_attribute(cache_replacement_policy);
 rw_attribute(btree_shrinker_disabled);
 rw_attribute(copy_gc_enabled);
@@ -401,6 +400,48 @@
 };
 KTYPE(bch_flash_dev);
 
+struct bset_stats_op {
+	struct btree_op op;
+	size_t nodes;
+	struct bset_stats stats;
+};
+
+static int btree_bset_stats(struct btree_op *b_op, struct btree *b)
+{
+	struct bset_stats_op *op = container_of(b_op, struct bset_stats_op, op);
+
+	op->nodes++;
+	bch_btree_keys_stats(&b->keys, &op->stats);
+
+	return MAP_CONTINUE;
+}
+
+int bch_bset_print_stats(struct cache_set *c, char *buf)
+{
+	struct bset_stats_op op;
+	int ret;
+
+	memset(&op, 0, sizeof(op));
+	bch_btree_op_init(&op.op, -1);
+
+	ret = bch_btree_map_nodes(&op.op, c, &ZERO_KEY, btree_bset_stats);
+	if (ret < 0)
+		return ret;
+
+	return snprintf(buf, PAGE_SIZE,
+			"btree nodes:		%zu\n"
+			"written sets:		%zu\n"
+			"unwritten sets:		%zu\n"
+			"written key bytes:	%zu\n"
+			"unwritten key bytes:	%zu\n"
+			"floats:			%zu\n"
+			"failed:			%zu\n",
+			op.nodes,
+			op.stats.sets_written, op.stats.sets_unwritten,
+			op.stats.bytes_written, op.stats.bytes_unwritten,
+			op.stats.floats, op.stats.failed);
+}
+
 SHOW(__bch_cache_set)
 {
 	unsigned root_usage(struct cache_set *c)
@@ -419,7 +460,7 @@
 			rw_lock(false, b, b->level);
 		} while (b != c->root);
 
-		for_each_key_filter(b, k, &iter, bch_ptr_bad)
+		for_each_key_filter(&b->keys, k, &iter, bch_ptr_bad)
 			bytes += bkey_bytes(k);
 
 		rw_unlock(false, b);
@@ -434,7 +475,7 @@
 
 		mutex_lock(&c->bucket_lock);
 		list_for_each_entry(b, &c->btree_cache, list)
-			ret += 1 << (b->page_order + PAGE_SHIFT);
+			ret += 1 << (b->keys.page_order + PAGE_SHIFT);
 
 		mutex_unlock(&c->bucket_lock);
 		return ret;
@@ -491,7 +532,7 @@
 
 	sysfs_print_time_stats(&c->btree_gc_time,	btree_gc, sec, ms);
 	sysfs_print_time_stats(&c->btree_split_time,	btree_split, sec, us);
-	sysfs_print_time_stats(&c->sort_time,		btree_sort, ms, us);
+	sysfs_print_time_stats(&c->sort.time,		btree_sort, ms, us);
 	sysfs_print_time_stats(&c->btree_read_time,	btree_read, ms, us);
 	sysfs_print_time_stats(&c->try_harder_time,	try_harder, ms, us);
 
@@ -711,9 +752,6 @@
 	sysfs_print(io_errors,
 		    atomic_read(&ca->io_errors) >> IO_ERROR_SHIFT);
 
-	sysfs_print(freelist_percent, ca->free.size * 100 /
-		    ((size_t) ca->sb.nbuckets));
-
 	if (attr == &sysfs_cache_replacement_policy)
 		return bch_snprint_string_list(buf, PAGE_SIZE,
 					       cache_replacement_policies,
@@ -820,32 +858,6 @@
 		}
 	}
 
-	if (attr == &sysfs_freelist_percent) {
-		DECLARE_FIFO(long, free);
-		long i;
-		size_t p = strtoul_or_return(buf);
-
-		p = clamp_t(size_t,
-			    ((size_t) ca->sb.nbuckets * p) / 100,
-			    roundup_pow_of_two(ca->sb.nbuckets) >> 9,
-			    ca->sb.nbuckets / 2);
-
-		if (!init_fifo_exact(&free, p, GFP_KERNEL))
-			return -ENOMEM;
-
-		mutex_lock(&ca->set->bucket_lock);
-
-		fifo_move(&free, &ca->free);
-		fifo_swap(&free, &ca->free);
-
-		mutex_unlock(&ca->set->bucket_lock);
-
-		while (fifo_pop(&free, i))
-			atomic_dec(&ca->buckets[i].pin);
-
-		free_fifo(&free);
-	}
-
 	if (attr == &sysfs_clear_stats) {
 		atomic_long_set(&ca->sectors_written, 0);
 		atomic_long_set(&ca->btree_sectors_written, 0);
@@ -869,7 +881,6 @@
 	&sysfs_metadata_written,
 	&sysfs_io_errors,
 	&sysfs_clear_stats,
-	&sysfs_freelist_percent,
 	&sysfs_cache_replacement_policy,
 	NULL
 };
diff --git a/drivers/md/bcache/util.c b/drivers/md/bcache/util.c
index bb37618..db3ae4c 100644
--- a/drivers/md/bcache/util.c
+++ b/drivers/md/bcache/util.c
@@ -224,10 +224,10 @@
 
 void bch_bio_map(struct bio *bio, void *base)
 {
-	size_t size = bio->bi_size;
+	size_t size = bio->bi_iter.bi_size;
 	struct bio_vec *bv = bio->bi_io_vec;
 
-	BUG_ON(!bio->bi_size);
+	BUG_ON(!bio->bi_iter.bi_size);
 	BUG_ON(bio->bi_vcnt);
 
 	bv->bv_offset = base ? ((unsigned long) base) % PAGE_SIZE : 0;
diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h
index 1030c60..ac7d0d1 100644
--- a/drivers/md/bcache/util.h
+++ b/drivers/md/bcache/util.h
@@ -2,6 +2,7 @@
 #ifndef _BCACHE_UTIL_H
 #define _BCACHE_UTIL_H
 
+#include <linux/blkdev.h>
 #include <linux/errno.h>
 #include <linux/kernel.h>
 #include <linux/llist.h>
@@ -17,11 +18,13 @@
 
 #ifdef CONFIG_BCACHE_DEBUG
 
+#define EBUG_ON(cond)			BUG_ON(cond)
 #define atomic_dec_bug(v)	BUG_ON(atomic_dec_return(v) < 0)
 #define atomic_inc_bug(v, i)	BUG_ON(atomic_inc_return(v) <= i)
 
 #else /* DEBUG */
 
+#define EBUG_ON(cond)			do { if (cond); } while (0)
 #define atomic_dec_bug(v)	atomic_dec(v)
 #define atomic_inc_bug(v, i)	atomic_inc(v)
 
@@ -391,6 +394,11 @@
 
 void bch_time_stats_update(struct time_stats *stats, uint64_t time);
 
+static inline unsigned local_clock_us(void)
+{
+	return local_clock() >> 10;
+}
+
 #define NSEC_PER_ns			1L
 #define NSEC_PER_us			NSEC_PER_USEC
 #define NSEC_PER_ms			NSEC_PER_MSEC
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index 6c44fe0..f4300e4 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -111,7 +111,7 @@
 	if (!io->dc->writeback_percent)
 		bio_set_prio(bio, IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0));
 
-	bio->bi_size		= KEY_SIZE(&w->key) << 9;
+	bio->bi_iter.bi_size	= KEY_SIZE(&w->key) << 9;
 	bio->bi_max_vecs	= DIV_ROUND_UP(KEY_SIZE(&w->key), PAGE_SECTORS);
 	bio->bi_private		= w;
 	bio->bi_io_vec		= bio->bi_inline_vecs;
@@ -184,7 +184,7 @@
 
 	dirty_init(w);
 	io->bio.bi_rw		= WRITE;
-	io->bio.bi_sector	= KEY_START(&w->key);
+	io->bio.bi_iter.bi_sector = KEY_START(&w->key);
 	io->bio.bi_bdev		= io->dc->bdev;
 	io->bio.bi_end_io	= dirty_endio;
 
@@ -253,7 +253,7 @@
 		io->dc		= dc;
 
 		dirty_init(w);
-		io->bio.bi_sector	= PTR_OFFSET(&w->key, 0);
+		io->bio.bi_iter.bi_sector = PTR_OFFSET(&w->key, 0);
 		io->bio.bi_bdev		= PTR_CACHE(dc->disk.c,
 						    &w->key, 0)->bdev;
 		io->bio.bi_rw		= READ;
diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h
index c9ddcf4..e2f8598 100644
--- a/drivers/md/bcache/writeback.h
+++ b/drivers/md/bcache/writeback.h
@@ -50,7 +50,7 @@
 		return false;
 
 	if (dc->partial_stripes_expensive &&
-	    bcache_dev_stripe_dirty(dc, bio->bi_sector,
+	    bcache_dev_stripe_dirty(dc, bio->bi_iter.bi_sector,
 				    bio_sectors(bio)))
 		return true;
 
diff --git a/drivers/md/dm-bio-record.h b/drivers/md/dm-bio-record.h
index 3a8cfa2..dd36461 100644
--- a/drivers/md/dm-bio-record.h
+++ b/drivers/md/dm-bio-record.h
@@ -17,55 +17,24 @@
  * original bio state.
  */
 
-struct dm_bio_vec_details {
-#if PAGE_SIZE < 65536
-	__u16 bv_len;
-	__u16 bv_offset;
-#else
-	unsigned bv_len;
-	unsigned bv_offset;
-#endif
-};
-
 struct dm_bio_details {
-	sector_t bi_sector;
 	struct block_device *bi_bdev;
-	unsigned int bi_size;
-	unsigned short bi_idx;
 	unsigned long bi_flags;
-	struct dm_bio_vec_details bi_io_vec[BIO_MAX_PAGES];
+	struct bvec_iter bi_iter;
 };
 
 static inline void dm_bio_record(struct dm_bio_details *bd, struct bio *bio)
 {
-	unsigned i;
-
-	bd->bi_sector = bio->bi_sector;
 	bd->bi_bdev = bio->bi_bdev;
-	bd->bi_size = bio->bi_size;
-	bd->bi_idx = bio->bi_idx;
 	bd->bi_flags = bio->bi_flags;
-
-	for (i = 0; i < bio->bi_vcnt; i++) {
-		bd->bi_io_vec[i].bv_len = bio->bi_io_vec[i].bv_len;
-		bd->bi_io_vec[i].bv_offset = bio->bi_io_vec[i].bv_offset;
-	}
+	bd->bi_iter = bio->bi_iter;
 }
 
 static inline void dm_bio_restore(struct dm_bio_details *bd, struct bio *bio)
 {
-	unsigned i;
-
-	bio->bi_sector = bd->bi_sector;
 	bio->bi_bdev = bd->bi_bdev;
-	bio->bi_size = bd->bi_size;
-	bio->bi_idx = bd->bi_idx;
 	bio->bi_flags = bd->bi_flags;
-
-	for (i = 0; i < bio->bi_vcnt; i++) {
-		bio->bi_io_vec[i].bv_len = bd->bi_io_vec[i].bv_len;
-		bio->bi_io_vec[i].bv_offset = bd->bi_io_vec[i].bv_offset;
-	}
+	bio->bi_iter = bd->bi_iter;
 }
 
 #endif
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index 9ed4212..66c5d13 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -540,7 +540,7 @@
 	bio_init(&b->bio);
 	b->bio.bi_io_vec = b->bio_vec;
 	b->bio.bi_max_vecs = DM_BUFIO_INLINE_VECS;
-	b->bio.bi_sector = block << b->c->sectors_per_block_bits;
+	b->bio.bi_iter.bi_sector = block << b->c->sectors_per_block_bits;
 	b->bio.bi_bdev = b->c->bdev;
 	b->bio.bi_end_io = end_io;
 
diff --git a/drivers/md/dm-cache-policy-mq.c b/drivers/md/dm-cache-policy-mq.c
index 930e8c3..1e018e9 100644
--- a/drivers/md/dm-cache-policy-mq.c
+++ b/drivers/md/dm-cache-policy-mq.c
@@ -72,7 +72,7 @@
 
 static void iot_update_stats(struct io_tracker *t, struct bio *bio)
 {
-	if (bio->bi_sector == from_oblock(t->last_end_oblock) + 1)
+	if (bio->bi_iter.bi_sector == from_oblock(t->last_end_oblock) + 1)
 		t->nr_seq_samples++;
 	else {
 		/*
@@ -87,7 +87,7 @@
 		t->nr_rand_samples++;
 	}
 
-	t->last_end_oblock = to_oblock(bio->bi_sector + bio_sectors(bio) - 1);
+	t->last_end_oblock = to_oblock(bio_end_sector(bio) - 1);
 }
 
 static void iot_check_for_pattern_switch(struct io_tracker *t)
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index 09334c2..ffd472e 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -85,6 +85,12 @@
 {
 	bio->bi_end_io = h->bi_end_io;
 	bio->bi_private = h->bi_private;
+
+	/*
+	 * Must bump bi_remaining to allow bio to complete with
+	 * restored bi_end_io.
+	 */
+	atomic_inc(&bio->bi_remaining);
 }
 
 /*----------------------------------------------------------------*/
@@ -664,15 +670,17 @@
 static void remap_to_cache(struct cache *cache, struct bio *bio,
 			   dm_cblock_t cblock)
 {
-	sector_t bi_sector = bio->bi_sector;
+	sector_t bi_sector = bio->bi_iter.bi_sector;
 
 	bio->bi_bdev = cache->cache_dev->bdev;
 	if (!block_size_is_power_of_two(cache))
-		bio->bi_sector = (from_cblock(cblock) * cache->sectors_per_block) +
-				sector_div(bi_sector, cache->sectors_per_block);
+		bio->bi_iter.bi_sector =
+			(from_cblock(cblock) * cache->sectors_per_block) +
+			sector_div(bi_sector, cache->sectors_per_block);
 	else
-		bio->bi_sector = (from_cblock(cblock) << cache->sectors_per_block_shift) |
-				(bi_sector & (cache->sectors_per_block - 1));
+		bio->bi_iter.bi_sector =
+			(from_cblock(cblock) << cache->sectors_per_block_shift) |
+			(bi_sector & (cache->sectors_per_block - 1));
 }
 
 static void check_if_tick_bio_needed(struct cache *cache, struct bio *bio)
@@ -712,7 +720,7 @@
 
 static dm_oblock_t get_bio_block(struct cache *cache, struct bio *bio)
 {
-	sector_t block_nr = bio->bi_sector;
+	sector_t block_nr = bio->bi_iter.bi_sector;
 
 	if (!block_size_is_power_of_two(cache))
 		(void) sector_div(block_nr, cache->sectors_per_block);
@@ -1027,7 +1035,7 @@
 static bool bio_writes_complete_block(struct cache *cache, struct bio *bio)
 {
 	return (bio_data_dir(bio) == WRITE) &&
-		(bio->bi_size == (cache->sectors_per_block << SECTOR_SHIFT));
+		(bio->bi_iter.bi_size == (cache->sectors_per_block << SECTOR_SHIFT));
 }
 
 static void avoid_copy(struct dm_cache_migration *mg)
@@ -1252,7 +1260,7 @@
 	size_t pb_data_size = get_per_bio_data_size(cache);
 	struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
 
-	BUG_ON(bio->bi_size);
+	BUG_ON(bio->bi_iter.bi_size);
 	if (!pb->req_nr)
 		remap_to_origin(cache, bio);
 	else
@@ -1275,9 +1283,9 @@
  */
 static void process_discard_bio(struct cache *cache, struct bio *bio)
 {
-	dm_block_t start_block = dm_sector_div_up(bio->bi_sector,
+	dm_block_t start_block = dm_sector_div_up(bio->bi_iter.bi_sector,
 						  cache->discard_block_size);
-	dm_block_t end_block = bio->bi_sector + bio_sectors(bio);
+	dm_block_t end_block = bio_end_sector(bio);
 	dm_block_t b;
 
 	end_block = block_div(end_block, cache->discard_block_size);
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 81b0fa6..784695d 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -39,10 +39,8 @@
 	struct completion restart;
 	struct bio *bio_in;
 	struct bio *bio_out;
-	unsigned int offset_in;
-	unsigned int offset_out;
-	unsigned int idx_in;
-	unsigned int idx_out;
+	struct bvec_iter iter_in;
+	struct bvec_iter iter_out;
 	sector_t cc_sector;
 	atomic_t cc_pending;
 };
@@ -826,10 +824,10 @@
 {
 	ctx->bio_in = bio_in;
 	ctx->bio_out = bio_out;
-	ctx->offset_in = 0;
-	ctx->offset_out = 0;
-	ctx->idx_in = bio_in ? bio_in->bi_idx : 0;
-	ctx->idx_out = bio_out ? bio_out->bi_idx : 0;
+	if (bio_in)
+		ctx->iter_in = bio_in->bi_iter;
+	if (bio_out)
+		ctx->iter_out = bio_out->bi_iter;
 	ctx->cc_sector = sector + cc->iv_offset;
 	init_completion(&ctx->restart);
 }
@@ -857,8 +855,8 @@
 			       struct convert_context *ctx,
 			       struct ablkcipher_request *req)
 {
-	struct bio_vec *bv_in = bio_iovec_idx(ctx->bio_in, ctx->idx_in);
-	struct bio_vec *bv_out = bio_iovec_idx(ctx->bio_out, ctx->idx_out);
+	struct bio_vec bv_in = bio_iter_iovec(ctx->bio_in, ctx->iter_in);
+	struct bio_vec bv_out = bio_iter_iovec(ctx->bio_out, ctx->iter_out);
 	struct dm_crypt_request *dmreq;
 	u8 *iv;
 	int r;
@@ -869,24 +867,15 @@
 	dmreq->iv_sector = ctx->cc_sector;
 	dmreq->ctx = ctx;
 	sg_init_table(&dmreq->sg_in, 1);
-	sg_set_page(&dmreq->sg_in, bv_in->bv_page, 1 << SECTOR_SHIFT,
-		    bv_in->bv_offset + ctx->offset_in);
+	sg_set_page(&dmreq->sg_in, bv_in.bv_page, 1 << SECTOR_SHIFT,
+		    bv_in.bv_offset);
 
 	sg_init_table(&dmreq->sg_out, 1);
-	sg_set_page(&dmreq->sg_out, bv_out->bv_page, 1 << SECTOR_SHIFT,
-		    bv_out->bv_offset + ctx->offset_out);
+	sg_set_page(&dmreq->sg_out, bv_out.bv_page, 1 << SECTOR_SHIFT,
+		    bv_out.bv_offset);
 
-	ctx->offset_in += 1 << SECTOR_SHIFT;
-	if (ctx->offset_in >= bv_in->bv_len) {
-		ctx->offset_in = 0;
-		ctx->idx_in++;
-	}
-
-	ctx->offset_out += 1 << SECTOR_SHIFT;
-	if (ctx->offset_out >= bv_out->bv_len) {
-		ctx->offset_out = 0;
-		ctx->idx_out++;
-	}
+	bio_advance_iter(ctx->bio_in, &ctx->iter_in, 1 << SECTOR_SHIFT);
+	bio_advance_iter(ctx->bio_out, &ctx->iter_out, 1 << SECTOR_SHIFT);
 
 	if (cc->iv_gen_ops) {
 		r = cc->iv_gen_ops->generator(cc, iv, dmreq);
@@ -937,8 +926,7 @@
 
 	atomic_set(&ctx->cc_pending, 1);
 
-	while(ctx->idx_in < ctx->bio_in->bi_vcnt &&
-	      ctx->idx_out < ctx->bio_out->bi_vcnt) {
+	while (ctx->iter_in.bi_size && ctx->iter_out.bi_size) {
 
 		crypt_alloc_req(cc, ctx);
 
@@ -1021,7 +1009,7 @@
 		size -= len;
 	}
 
-	if (!clone->bi_size) {
+	if (!clone->bi_iter.bi_size) {
 		bio_put(clone);
 		return NULL;
 	}
@@ -1161,7 +1149,7 @@
 	crypt_inc_pending(io);
 
 	clone_init(io, clone);
-	clone->bi_sector = cc->start + io->sector;
+	clone->bi_iter.bi_sector = cc->start + io->sector;
 
 	generic_make_request(clone);
 	return 0;
@@ -1207,9 +1195,9 @@
 	}
 
 	/* crypt_convert should have filled the clone bio */
-	BUG_ON(io->ctx.idx_out < clone->bi_vcnt);
+	BUG_ON(io->ctx.iter_out.bi_size);
 
-	clone->bi_sector = cc->start + io->sector;
+	clone->bi_iter.bi_sector = cc->start + io->sector;
 
 	if (async)
 		kcryptd_queue_io(io);
@@ -1224,7 +1212,7 @@
 	struct dm_crypt_io *new_io;
 	int crypt_finished;
 	unsigned out_of_pages = 0;
-	unsigned remaining = io->base_bio->bi_size;
+	unsigned remaining = io->base_bio->bi_iter.bi_size;
 	sector_t sector = io->sector;
 	int r;
 
@@ -1246,9 +1234,9 @@
 		}
 
 		io->ctx.bio_out = clone;
-		io->ctx.idx_out = 0;
+		io->ctx.iter_out = clone->bi_iter;
 
-		remaining -= clone->bi_size;
+		remaining -= clone->bi_iter.bi_size;
 		sector += bio_sectors(clone);
 
 		crypt_inc_pending(io);
@@ -1290,8 +1278,7 @@
 			crypt_inc_pending(new_io);
 			crypt_convert_init(cc, &new_io->ctx, NULL,
 					   io->base_bio, sector);
-			new_io->ctx.idx_in = io->ctx.idx_in;
-			new_io->ctx.offset_in = io->ctx.offset_in;
+			new_io->ctx.iter_in = io->ctx.iter_in;
 
 			/*
 			 * Fragments after the first use the base_io
@@ -1869,11 +1856,12 @@
 	if (unlikely(bio->bi_rw & (REQ_FLUSH | REQ_DISCARD))) {
 		bio->bi_bdev = cc->dev->bdev;
 		if (bio_sectors(bio))
-			bio->bi_sector = cc->start + dm_target_offset(ti, bio->bi_sector);
+			bio->bi_iter.bi_sector = cc->start +
+				dm_target_offset(ti, bio->bi_iter.bi_sector);
 		return DM_MAPIO_REMAPPED;
 	}
 
-	io = crypt_io_alloc(cc, bio, dm_target_offset(ti, bio->bi_sector));
+	io = crypt_io_alloc(cc, bio, dm_target_offset(ti, bio->bi_iter.bi_sector));
 
 	if (bio_data_dir(io->base_bio) == READ) {
 		if (kcryptd_io_read(io, GFP_NOWAIT))
diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c
index a8a511c..42c3a27 100644
--- a/drivers/md/dm-delay.c
+++ b/drivers/md/dm-delay.c
@@ -277,14 +277,15 @@
 	if ((bio_data_dir(bio) == WRITE) && (dc->dev_write)) {
 		bio->bi_bdev = dc->dev_write->bdev;
 		if (bio_sectors(bio))
-			bio->bi_sector = dc->start_write +
-					 dm_target_offset(ti, bio->bi_sector);
+			bio->bi_iter.bi_sector = dc->start_write +
+				dm_target_offset(ti, bio->bi_iter.bi_sector);
 
 		return delay_bio(dc, dc->write_delay, bio);
 	}
 
 	bio->bi_bdev = dc->dev_read->bdev;
-	bio->bi_sector = dc->start_read + dm_target_offset(ti, bio->bi_sector);
+	bio->bi_iter.bi_sector = dc->start_read +
+		dm_target_offset(ti, bio->bi_iter.bi_sector);
 
 	return delay_bio(dc, dc->read_delay, bio);
 }
diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c
index c80a0ec..b257e46 100644
--- a/drivers/md/dm-flakey.c
+++ b/drivers/md/dm-flakey.c
@@ -248,7 +248,8 @@
 
 	bio->bi_bdev = fc->dev->bdev;
 	if (bio_sectors(bio))
-		bio->bi_sector = flakey_map_sector(ti, bio->bi_sector);
+		bio->bi_iter.bi_sector =
+			flakey_map_sector(ti, bio->bi_iter.bi_sector);
 }
 
 static void corrupt_bio_data(struct bio *bio, struct flakey_c *fc)
@@ -265,8 +266,8 @@
 		DMDEBUG("Corrupting data bio=%p by writing %u to byte %u "
 			"(rw=%c bi_rw=%lu bi_sector=%llu cur_bytes=%u)\n",
 			bio, fc->corrupt_bio_value, fc->corrupt_bio_byte,
-			(bio_data_dir(bio) == WRITE) ? 'w' : 'r',
-			bio->bi_rw, (unsigned long long)bio->bi_sector, bio_bytes);
+			(bio_data_dir(bio) == WRITE) ? 'w' : 'r', bio->bi_rw,
+			(unsigned long long)bio->bi_iter.bi_sector, bio_bytes);
 	}
 }
 
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
index 2a20986..b2b8a10 100644
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c
@@ -201,26 +201,29 @@
 /*
  * Functions for getting the pages from a bvec.
  */
-static void bvec_get_page(struct dpages *dp,
+static void bio_get_page(struct dpages *dp,
 		  struct page **p, unsigned long *len, unsigned *offset)
 {
-	struct bio_vec *bvec = (struct bio_vec *) dp->context_ptr;
-	*p = bvec->bv_page;
-	*len = bvec->bv_len;
-	*offset = bvec->bv_offset;
+	struct bio *bio = dp->context_ptr;
+	struct bio_vec bvec = bio_iovec(bio);
+	*p = bvec.bv_page;
+	*len = bvec.bv_len;
+	*offset = bvec.bv_offset;
 }
 
-static void bvec_next_page(struct dpages *dp)
+static void bio_next_page(struct dpages *dp)
 {
-	struct bio_vec *bvec = (struct bio_vec *) dp->context_ptr;
-	dp->context_ptr = bvec + 1;
+	struct bio *bio = dp->context_ptr;
+	struct bio_vec bvec = bio_iovec(bio);
+
+	bio_advance(bio, bvec.bv_len);
 }
 
-static void bvec_dp_init(struct dpages *dp, struct bio_vec *bvec)
+static void bio_dp_init(struct dpages *dp, struct bio *bio)
 {
-	dp->get_page = bvec_get_page;
-	dp->next_page = bvec_next_page;
-	dp->context_ptr = bvec;
+	dp->get_page = bio_get_page;
+	dp->next_page = bio_next_page;
+	dp->context_ptr = bio;
 }
 
 /*
@@ -304,14 +307,14 @@
 					  dm_sector_div_up(remaining, (PAGE_SIZE >> SECTOR_SHIFT)));
 
 		bio = bio_alloc_bioset(GFP_NOIO, num_bvecs, io->client->bios);
-		bio->bi_sector = where->sector + (where->count - remaining);
+		bio->bi_iter.bi_sector = where->sector + (where->count - remaining);
 		bio->bi_bdev = where->bdev;
 		bio->bi_end_io = endio;
 		store_io_and_region_in_bio(bio, io, region);
 
 		if (rw & REQ_DISCARD) {
 			num_sectors = min_t(sector_t, q->limits.max_discard_sectors, remaining);
-			bio->bi_size = num_sectors << SECTOR_SHIFT;
+			bio->bi_iter.bi_size = num_sectors << SECTOR_SHIFT;
 			remaining -= num_sectors;
 		} else if (rw & REQ_WRITE_SAME) {
 			/*
@@ -320,7 +323,7 @@
 			dp->get_page(dp, &page, &len, &offset);
 			bio_add_page(bio, page, logical_block_size, offset);
 			num_sectors = min_t(sector_t, q->limits.max_write_same_sectors, remaining);
-			bio->bi_size = num_sectors << SECTOR_SHIFT;
+			bio->bi_iter.bi_size = num_sectors << SECTOR_SHIFT;
 
 			offset = 0;
 			remaining -= num_sectors;
@@ -457,8 +460,8 @@
 		list_dp_init(dp, io_req->mem.ptr.pl, io_req->mem.offset);
 		break;
 
-	case DM_IO_BVEC:
-		bvec_dp_init(dp, io_req->mem.ptr.bvec);
+	case DM_IO_BIO:
+		bio_dp_init(dp, io_req->mem.ptr.bio);
 		break;
 
 	case DM_IO_VMA:
diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c
index 4f99d26..53e848c 100644
--- a/drivers/md/dm-linear.c
+++ b/drivers/md/dm-linear.c
@@ -85,7 +85,8 @@
 
 	bio->bi_bdev = lc->dev->bdev;
 	if (bio_sectors(bio))
-		bio->bi_sector = linear_map_sector(ti, bio->bi_sector);
+		bio->bi_iter.bi_sector =
+			linear_map_sector(ti, bio->bi_iter.bi_sector);
 }
 
 static int linear_map(struct dm_target *ti, struct bio *bio)
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index 9584443..f284e0b 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -432,7 +432,7 @@
 	region_t region = dm_rh_bio_to_region(ms->rh, bio);
 
 	if (log->type->in_sync(log, region, 0))
-		return choose_mirror(ms,  bio->bi_sector) ? 1 : 0;
+		return choose_mirror(ms,  bio->bi_iter.bi_sector) ? 1 : 0;
 
 	return 0;
 }
@@ -442,15 +442,15 @@
  */
 static sector_t map_sector(struct mirror *m, struct bio *bio)
 {
-	if (unlikely(!bio->bi_size))
+	if (unlikely(!bio->bi_iter.bi_size))
 		return 0;
-	return m->offset + dm_target_offset(m->ms->ti, bio->bi_sector);
+	return m->offset + dm_target_offset(m->ms->ti, bio->bi_iter.bi_sector);
 }
 
 static void map_bio(struct mirror *m, struct bio *bio)
 {
 	bio->bi_bdev = m->dev->bdev;
-	bio->bi_sector = map_sector(m, bio);
+	bio->bi_iter.bi_sector = map_sector(m, bio);
 }
 
 static void map_region(struct dm_io_region *io, struct mirror *m,
@@ -526,8 +526,8 @@
 	struct dm_io_region io;
 	struct dm_io_request io_req = {
 		.bi_rw = READ,
-		.mem.type = DM_IO_BVEC,
-		.mem.ptr.bvec = bio->bi_io_vec + bio->bi_idx,
+		.mem.type = DM_IO_BIO,
+		.mem.ptr.bio = bio,
 		.notify.fn = read_callback,
 		.notify.context = bio,
 		.client = m->ms->io_client,
@@ -559,7 +559,7 @@
 		 * We can only read balance if the region is in sync.
 		 */
 		if (likely(region_in_sync(ms, region, 1)))
-			m = choose_mirror(ms, bio->bi_sector);
+			m = choose_mirror(ms, bio->bi_iter.bi_sector);
 		else if (m && atomic_read(&m->error_count))
 			m = NULL;
 
@@ -629,8 +629,8 @@
 	struct mirror *m;
 	struct dm_io_request io_req = {
 		.bi_rw = WRITE | (bio->bi_rw & WRITE_FLUSH_FUA),
-		.mem.type = DM_IO_BVEC,
-		.mem.ptr.bvec = bio->bi_io_vec + bio->bi_idx,
+		.mem.type = DM_IO_BIO,
+		.mem.ptr.bio = bio,
 		.notify.fn = write_callback,
 		.notify.context = bio,
 		.client = ms->io_client,
@@ -1181,7 +1181,7 @@
 	 * The region is in-sync and we can perform reads directly.
 	 * Store enough information so we can retry if it fails.
 	 */
-	m = choose_mirror(ms, bio->bi_sector);
+	m = choose_mirror(ms, bio->bi_iter.bi_sector);
 	if (unlikely(!m))
 		return -EIO;
 
diff --git a/drivers/md/dm-region-hash.c b/drivers/md/dm-region-hash.c
index 69732e0..b929fd5 100644
--- a/drivers/md/dm-region-hash.c
+++ b/drivers/md/dm-region-hash.c
@@ -126,7 +126,8 @@
 
 region_t dm_rh_bio_to_region(struct dm_region_hash *rh, struct bio *bio)
 {
-	return dm_rh_sector_to_region(rh, bio->bi_sector - rh->target_begin);
+	return dm_rh_sector_to_region(rh, bio->bi_iter.bi_sector -
+				      rh->target_begin);
 }
 EXPORT_SYMBOL_GPL(dm_rh_bio_to_region);
 
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 7177185..ebddef5 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -1438,6 +1438,7 @@
 	if (full_bio) {
 		full_bio->bi_end_io = pe->full_bio_end_io;
 		full_bio->bi_private = pe->full_bio_private;
+		atomic_inc(&full_bio->bi_remaining);
 	}
 	free_pending_exception(pe);
 
@@ -1619,11 +1620,10 @@
 			    struct bio *bio, chunk_t chunk)
 {
 	bio->bi_bdev = s->cow->bdev;
-	bio->bi_sector = chunk_to_sector(s->store,
-					 dm_chunk_number(e->new_chunk) +
-					 (chunk - e->old_chunk)) +
-					 (bio->bi_sector &
-					  s->store->chunk_mask);
+	bio->bi_iter.bi_sector =
+		chunk_to_sector(s->store, dm_chunk_number(e->new_chunk) +
+				(chunk - e->old_chunk)) +
+		(bio->bi_iter.bi_sector & s->store->chunk_mask);
 }
 
 static int snapshot_map(struct dm_target *ti, struct bio *bio)
@@ -1641,7 +1641,7 @@
 		return DM_MAPIO_REMAPPED;
 	}
 
-	chunk = sector_to_chunk(s->store, bio->bi_sector);
+	chunk = sector_to_chunk(s->store, bio->bi_iter.bi_sector);
 
 	/* Full snapshots are not usable */
 	/* To get here the table must be live so s->active is always set. */
@@ -1702,7 +1702,8 @@
 		r = DM_MAPIO_SUBMITTED;
 
 		if (!pe->started &&
-		    bio->bi_size == (s->store->chunk_size << SECTOR_SHIFT)) {
+		    bio->bi_iter.bi_size ==
+		    (s->store->chunk_size << SECTOR_SHIFT)) {
 			pe->started = 1;
 			up_write(&s->lock);
 			start_full_bio(pe, bio);
@@ -1758,7 +1759,7 @@
 		return DM_MAPIO_REMAPPED;
 	}
 
-	chunk = sector_to_chunk(s->store, bio->bi_sector);
+	chunk = sector_to_chunk(s->store, bio->bi_iter.bi_sector);
 
 	down_write(&s->lock);
 
@@ -2095,7 +2096,7 @@
 	down_read(&_origins_lock);
 	o = __lookup_origin(origin->bdev);
 	if (o)
-		r = __origin_write(&o->snapshots, bio->bi_sector, bio);
+		r = __origin_write(&o->snapshots, bio->bi_iter.bi_sector, bio);
 	up_read(&_origins_lock);
 
 	return r;
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index 73c1712..d1600d2 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -259,13 +259,15 @@
 {
 	sector_t begin, end;
 
-	stripe_map_range_sector(sc, bio->bi_sector, target_stripe, &begin);
+	stripe_map_range_sector(sc, bio->bi_iter.bi_sector,
+				target_stripe, &begin);
 	stripe_map_range_sector(sc, bio_end_sector(bio),
 				target_stripe, &end);
 	if (begin < end) {
 		bio->bi_bdev = sc->stripe[target_stripe].dev->bdev;
-		bio->bi_sector = begin + sc->stripe[target_stripe].physical_start;
-		bio->bi_size = to_bytes(end - begin);
+		bio->bi_iter.bi_sector = begin +
+			sc->stripe[target_stripe].physical_start;
+		bio->bi_iter.bi_size = to_bytes(end - begin);
 		return DM_MAPIO_REMAPPED;
 	} else {
 		/* The range doesn't map to the target stripe */
@@ -293,9 +295,10 @@
 		return stripe_map_range(sc, bio, target_bio_nr);
 	}
 
-	stripe_map_sector(sc, bio->bi_sector, &stripe, &bio->bi_sector);
+	stripe_map_sector(sc, bio->bi_iter.bi_sector,
+			  &stripe, &bio->bi_iter.bi_sector);
 
-	bio->bi_sector += sc->stripe[stripe].physical_start;
+	bio->bi_iter.bi_sector += sc->stripe[stripe].physical_start;
 	bio->bi_bdev = sc->stripe[stripe].dev->bdev;
 
 	return DM_MAPIO_REMAPPED;
diff --git a/drivers/md/dm-switch.c b/drivers/md/dm-switch.c
index ff9ac4b..09a688b 100644
--- a/drivers/md/dm-switch.c
+++ b/drivers/md/dm-switch.c
@@ -311,11 +311,11 @@
 static int switch_map(struct dm_target *ti, struct bio *bio)
 {
 	struct switch_ctx *sctx = ti->private;
-	sector_t offset = dm_target_offset(ti, bio->bi_sector);
+	sector_t offset = dm_target_offset(ti, bio->bi_iter.bi_sector);
 	unsigned path_nr = switch_get_path_nr(sctx, offset);
 
 	bio->bi_bdev = sctx->path_list[path_nr].dmdev->bdev;
-	bio->bi_sector = sctx->path_list[path_nr].start + offset;
+	bio->bi_iter.bi_sector = sctx->path_list[path_nr].start + offset;
 
 	return DM_MAPIO_REMAPPED;
 }
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index 726228b..faaf944 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -414,7 +414,7 @@
 static dm_block_t get_bio_block(struct thin_c *tc, struct bio *bio)
 {
 	struct pool *pool = tc->pool;
-	sector_t block_nr = bio->bi_sector;
+	sector_t block_nr = bio->bi_iter.bi_sector;
 
 	if (block_size_is_power_of_two(pool))
 		block_nr >>= pool->sectors_per_block_shift;
@@ -427,14 +427,15 @@
 static void remap(struct thin_c *tc, struct bio *bio, dm_block_t block)
 {
 	struct pool *pool = tc->pool;
-	sector_t bi_sector = bio->bi_sector;
+	sector_t bi_sector = bio->bi_iter.bi_sector;
 
 	bio->bi_bdev = tc->pool_dev->bdev;
 	if (block_size_is_power_of_two(pool))
-		bio->bi_sector = (block << pool->sectors_per_block_shift) |
-				(bi_sector & (pool->sectors_per_block - 1));
+		bio->bi_iter.bi_sector =
+			(block << pool->sectors_per_block_shift) |
+			(bi_sector & (pool->sectors_per_block - 1));
 	else
-		bio->bi_sector = (block * pool->sectors_per_block) +
+		bio->bi_iter.bi_sector = (block * pool->sectors_per_block) +
 				 sector_div(bi_sector, pool->sectors_per_block);
 }
 
@@ -612,8 +613,10 @@
 
 static void process_prepared_mapping_fail(struct dm_thin_new_mapping *m)
 {
-	if (m->bio)
+	if (m->bio) {
 		m->bio->bi_end_io = m->saved_bi_end_io;
+		atomic_inc(&m->bio->bi_remaining);
+	}
 	cell_error(m->tc->pool, m->cell);
 	list_del(&m->list);
 	mempool_free(m, m->tc->pool->mapping_pool);
@@ -627,8 +630,10 @@
 	int r;
 
 	bio = m->bio;
-	if (bio)
+	if (bio) {
 		bio->bi_end_io = m->saved_bi_end_io;
+		atomic_inc(&bio->bi_remaining);
+	}
 
 	if (m->err) {
 		cell_error(pool, m->cell);
@@ -731,7 +736,8 @@
  */
 static int io_overlaps_block(struct pool *pool, struct bio *bio)
 {
-	return bio->bi_size == (pool->sectors_per_block << SECTOR_SHIFT);
+	return bio->bi_iter.bi_size ==
+		(pool->sectors_per_block << SECTOR_SHIFT);
 }
 
 static int io_overwrites_block(struct pool *pool, struct bio *bio)
@@ -1136,7 +1142,7 @@
 	if (bio_detain(pool, &key, bio, &cell))
 		return;
 
-	if (bio_data_dir(bio) == WRITE && bio->bi_size)
+	if (bio_data_dir(bio) == WRITE && bio->bi_iter.bi_size)
 		break_sharing(tc, bio, block, &key, lookup_result, cell);
 	else {
 		struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));
@@ -1159,7 +1165,7 @@
 	/*
 	 * Remap empty bios (flushes) immediately, without provisioning.
 	 */
-	if (!bio->bi_size) {
+	if (!bio->bi_iter.bi_size) {
 		inc_all_io_entry(pool, bio);
 		cell_defer_no_holder(tc, cell);
 
@@ -1258,7 +1264,7 @@
 	r = dm_thin_find_block(tc->td, block, 1, &lookup_result);
 	switch (r) {
 	case 0:
-		if (lookup_result.shared && (rw == WRITE) && bio->bi_size)
+		if (lookup_result.shared && (rw == WRITE) && bio->bi_iter.bi_size)
 			handle_unserviceable_bio(tc->pool, bio);
 		else {
 			inc_all_io_entry(tc->pool, bio);
@@ -2939,7 +2945,7 @@
 
 static int thin_map(struct dm_target *ti, struct bio *bio)
 {
-	bio->bi_sector = dm_target_offset(ti, bio->bi_sector);
+	bio->bi_iter.bi_sector = dm_target_offset(ti, bio->bi_iter.bi_sector);
 
 	return thin_bio_map(ti, bio);
 }
diff --git a/drivers/md/dm-verity.c b/drivers/md/dm-verity.c
index 4b7941d..796007a 100644
--- a/drivers/md/dm-verity.c
+++ b/drivers/md/dm-verity.c
@@ -73,15 +73,10 @@
 	sector_t block;
 	unsigned n_blocks;
 
-	/* saved bio vector */
-	struct bio_vec *io_vec;
-	unsigned io_vec_size;
+	struct bvec_iter iter;
 
 	struct work_struct work;
 
-	/* A space for short vectors; longer vectors are allocated separately. */
-	struct bio_vec io_vec_inline[DM_VERITY_IO_VEC_INLINE];
-
 	/*
 	 * Three variably-size fields follow this struct:
 	 *
@@ -284,9 +279,10 @@
 static int verity_verify_io(struct dm_verity_io *io)
 {
 	struct dm_verity *v = io->v;
+	struct bio *bio = dm_bio_from_per_bio_data(io,
+						   v->ti->per_bio_data_size);
 	unsigned b;
 	int i;
-	unsigned vector = 0, offset = 0;
 
 	for (b = 0; b < io->n_blocks; b++) {
 		struct shash_desc *desc;
@@ -336,31 +332,22 @@
 		}
 
 		todo = 1 << v->data_dev_block_bits;
-		do {
-			struct bio_vec *bv;
+		while (io->iter.bi_size) {
 			u8 *page;
-			unsigned len;
+			struct bio_vec bv = bio_iter_iovec(bio, io->iter);
 
-			BUG_ON(vector >= io->io_vec_size);
-			bv = &io->io_vec[vector];
-			page = kmap_atomic(bv->bv_page);
-			len = bv->bv_len - offset;
-			if (likely(len >= todo))
-				len = todo;
-			r = crypto_shash_update(desc,
-					page + bv->bv_offset + offset, len);
+			page = kmap_atomic(bv.bv_page);
+			r = crypto_shash_update(desc, page + bv.bv_offset,
+						bv.bv_len);
 			kunmap_atomic(page);
+
 			if (r < 0) {
 				DMERR("crypto_shash_update failed: %d", r);
 				return r;
 			}
-			offset += len;
-			if (likely(offset == bv->bv_len)) {
-				offset = 0;
-				vector++;
-			}
-			todo -= len;
-		} while (todo);
+
+			bio_advance_iter(bio, &io->iter, bv.bv_len);
+		}
 
 		if (!v->version) {
 			r = crypto_shash_update(desc, v->salt, v->salt_size);
@@ -383,8 +370,6 @@
 			return -EIO;
 		}
 	}
-	BUG_ON(vector != io->io_vec_size);
-	BUG_ON(offset);
 
 	return 0;
 }
@@ -400,10 +385,7 @@
 	bio->bi_end_io = io->orig_bi_end_io;
 	bio->bi_private = io->orig_bi_private;
 
-	if (io->io_vec != io->io_vec_inline)
-		mempool_free(io->io_vec, v->vec_mempool);
-
-	bio_endio(bio, error);
+	bio_endio_nodec(bio, error);
 }
 
 static void verity_work(struct work_struct *w)
@@ -493,9 +475,9 @@
 	struct dm_verity_io *io;
 
 	bio->bi_bdev = v->data_dev->bdev;
-	bio->bi_sector = verity_map_sector(v, bio->bi_sector);
+	bio->bi_iter.bi_sector = verity_map_sector(v, bio->bi_iter.bi_sector);
 
-	if (((unsigned)bio->bi_sector | bio_sectors(bio)) &
+	if (((unsigned)bio->bi_iter.bi_sector | bio_sectors(bio)) &
 	    ((1 << (v->data_dev_block_bits - SECTOR_SHIFT)) - 1)) {
 		DMERR_LIMIT("unaligned io");
 		return -EIO;
@@ -514,18 +496,12 @@
 	io->v = v;
 	io->orig_bi_end_io = bio->bi_end_io;
 	io->orig_bi_private = bio->bi_private;
-	io->block = bio->bi_sector >> (v->data_dev_block_bits - SECTOR_SHIFT);
-	io->n_blocks = bio->bi_size >> v->data_dev_block_bits;
+	io->block = bio->bi_iter.bi_sector >> (v->data_dev_block_bits - SECTOR_SHIFT);
+	io->n_blocks = bio->bi_iter.bi_size >> v->data_dev_block_bits;
 
 	bio->bi_end_io = verity_end_io;
 	bio->bi_private = io;
-	io->io_vec_size = bio_segments(bio);
-	if (io->io_vec_size < DM_VERITY_IO_VEC_INLINE)
-		io->io_vec = io->io_vec_inline;
-	else
-		io->io_vec = mempool_alloc(v->vec_mempool, GFP_NOIO);
-	memcpy(io->io_vec, bio_iovec(bio),
-	       io->io_vec_size * sizeof(struct bio_vec));
+	io->iter = bio->bi_iter;
 
 	verity_submit_prefetch(v, io);
 
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index b49c762..8c53b09 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -575,7 +575,7 @@
 		atomic_inc_return(&md->pending[rw]));
 
 	if (unlikely(dm_stats_used(&md->stats)))
-		dm_stats_account_io(&md->stats, bio->bi_rw, bio->bi_sector,
+		dm_stats_account_io(&md->stats, bio->bi_rw, bio->bi_iter.bi_sector,
 				    bio_sectors(bio), false, 0, &io->stats_aux);
 }
 
@@ -593,7 +593,7 @@
 	part_stat_unlock();
 
 	if (unlikely(dm_stats_used(&md->stats)))
-		dm_stats_account_io(&md->stats, bio->bi_rw, bio->bi_sector,
+		dm_stats_account_io(&md->stats, bio->bi_rw, bio->bi_iter.bi_sector,
 				    bio_sectors(bio), true, duration, &io->stats_aux);
 
 	/*
@@ -742,7 +742,7 @@
 		if (io_error == DM_ENDIO_REQUEUE)
 			return;
 
-		if ((bio->bi_rw & REQ_FLUSH) && bio->bi_size) {
+		if ((bio->bi_rw & REQ_FLUSH) && bio->bi_iter.bi_size) {
 			/*
 			 * Preflush done for flush with data, reissue
 			 * without REQ_FLUSH.
@@ -797,7 +797,7 @@
 	struct dm_rq_clone_bio_info *info = clone->bi_private;
 	struct dm_rq_target_io *tio = info->tio;
 	struct bio *bio = info->orig;
-	unsigned int nr_bytes = info->orig->bi_size;
+	unsigned int nr_bytes = info->orig->bi_iter.bi_size;
 
 	bio_put(clone);
 
@@ -1128,7 +1128,7 @@
 	 * this io.
 	 */
 	atomic_inc(&tio->io->io_count);
-	sector = clone->bi_sector;
+	sector = clone->bi_iter.bi_sector;
 	r = ti->type->map(ti, clone);
 	if (r == DM_MAPIO_REMAPPED) {
 		/* the bio has been remapped so dispatch it */
@@ -1155,76 +1155,32 @@
 	struct dm_io *io;
 	sector_t sector;
 	sector_t sector_count;
-	unsigned short idx;
 };
 
 static void bio_setup_sector(struct bio *bio, sector_t sector, sector_t len)
 {
-	bio->bi_sector = sector;
-	bio->bi_size = to_bytes(len);
-}
-
-static void bio_setup_bv(struct bio *bio, unsigned short idx, unsigned short bv_count)
-{
-	bio->bi_idx = idx;
-	bio->bi_vcnt = idx + bv_count;
-	bio->bi_flags &= ~(1 << BIO_SEG_VALID);
-}
-
-static void clone_bio_integrity(struct bio *bio, struct bio *clone,
-				unsigned short idx, unsigned len, unsigned offset,
-				unsigned trim)
-{
-	if (!bio_integrity(bio))
-		return;
-
-	bio_integrity_clone(clone, bio, GFP_NOIO);
-
-	if (trim)
-		bio_integrity_trim(clone, bio_sector_offset(bio, idx, offset), len);
-}
-
-/*
- * Creates a little bio that just does part of a bvec.
- */
-static void clone_split_bio(struct dm_target_io *tio, struct bio *bio,
-			    sector_t sector, unsigned short idx,
-			    unsigned offset, unsigned len)
-{
-	struct bio *clone = &tio->clone;
-	struct bio_vec *bv = bio->bi_io_vec + idx;
-
-	*clone->bi_io_vec = *bv;
-
-	bio_setup_sector(clone, sector, len);
-
-	clone->bi_bdev = bio->bi_bdev;
-	clone->bi_rw = bio->bi_rw;
-	clone->bi_vcnt = 1;
-	clone->bi_io_vec->bv_offset = offset;
-	clone->bi_io_vec->bv_len = clone->bi_size;
-	clone->bi_flags |= 1 << BIO_CLONED;
-
-	clone_bio_integrity(bio, clone, idx, len, offset, 1);
+	bio->bi_iter.bi_sector = sector;
+	bio->bi_iter.bi_size = to_bytes(len);
 }
 
 /*
  * Creates a bio that consists of range of complete bvecs.
  */
 static void clone_bio(struct dm_target_io *tio, struct bio *bio,
-		      sector_t sector, unsigned short idx,
-		      unsigned short bv_count, unsigned len)
+		      sector_t sector, unsigned len)
 {
 	struct bio *clone = &tio->clone;
-	unsigned trim = 0;
 
-	__bio_clone(clone, bio);
-	bio_setup_sector(clone, sector, len);
-	bio_setup_bv(clone, idx, bv_count);
+	__bio_clone_fast(clone, bio);
 
-	if (idx != bio->bi_idx || clone->bi_size < bio->bi_size)
-		trim = 1;
-	clone_bio_integrity(bio, clone, idx, len, 0, trim);
+	if (bio_integrity(bio))
+		bio_integrity_clone(clone, bio, GFP_NOIO);
+
+	bio_advance(clone, to_bytes(sector - clone->bi_iter.bi_sector));
+	clone->bi_iter.bi_size = to_bytes(len);
+
+	if (bio_integrity(bio))
+		bio_integrity_trim(clone, 0, len);
 }
 
 static struct dm_target_io *alloc_tio(struct clone_info *ci,
@@ -1257,7 +1213,7 @@
 	 * ci->bio->bi_max_vecs is BIO_INLINE_VECS anyway, for both flush
 	 * and discard, so no need for concern about wasted bvec allocations.
 	 */
-	 __bio_clone(clone, ci->bio);
+	 __bio_clone_fast(clone, ci->bio);
 	if (len)
 		bio_setup_sector(clone, ci->sector, len);
 
@@ -1286,10 +1242,7 @@
 }
 
 static void __clone_and_map_data_bio(struct clone_info *ci, struct dm_target *ti,
-				     sector_t sector, int nr_iovecs,
-				     unsigned short idx, unsigned short bv_count,
-				     unsigned offset, unsigned len,
-				     unsigned split_bvec)
+				     sector_t sector, unsigned len)
 {
 	struct bio *bio = ci->bio;
 	struct dm_target_io *tio;
@@ -1303,11 +1256,8 @@
 		num_target_bios = ti->num_write_bios(ti, bio);
 
 	for (target_bio_nr = 0; target_bio_nr < num_target_bios; target_bio_nr++) {
-		tio = alloc_tio(ci, ti, nr_iovecs, target_bio_nr);
-		if (split_bvec)
-			clone_split_bio(tio, bio, sector, idx, offset, len);
-		else
-			clone_bio(tio, bio, sector, idx, bv_count, len);
+		tio = alloc_tio(ci, ti, 0, target_bio_nr);
+		clone_bio(tio, bio, sector, len);
 		__map_bio(tio);
 	}
 }
@@ -1379,68 +1329,13 @@
 }
 
 /*
- * Find maximum number of sectors / bvecs we can process with a single bio.
- */
-static sector_t __len_within_target(struct clone_info *ci, sector_t max, int *idx)
-{
-	struct bio *bio = ci->bio;
-	sector_t bv_len, total_len = 0;
-
-	for (*idx = ci->idx; max && (*idx < bio->bi_vcnt); (*idx)++) {
-		bv_len = to_sector(bio->bi_io_vec[*idx].bv_len);
-
-		if (bv_len > max)
-			break;
-
-		max -= bv_len;
-		total_len += bv_len;
-	}
-
-	return total_len;
-}
-
-static int __split_bvec_across_targets(struct clone_info *ci,
-				       struct dm_target *ti, sector_t max)
-{
-	struct bio *bio = ci->bio;
-	struct bio_vec *bv = bio->bi_io_vec + ci->idx;
-	sector_t remaining = to_sector(bv->bv_len);
-	unsigned offset = 0;
-	sector_t len;
-
-	do {
-		if (offset) {
-			ti = dm_table_find_target(ci->map, ci->sector);
-			if (!dm_target_is_valid(ti))
-				return -EIO;
-
-			max = max_io_len(ci->sector, ti);
-		}
-
-		len = min(remaining, max);
-
-		__clone_and_map_data_bio(ci, ti, ci->sector, 1, ci->idx, 0,
-					 bv->bv_offset + offset, len, 1);
-
-		ci->sector += len;
-		ci->sector_count -= len;
-		offset += to_bytes(len);
-	} while (remaining -= len);
-
-	ci->idx++;
-
-	return 0;
-}
-
-/*
  * Select the correct strategy for processing a non-flush bio.
  */
 static int __split_and_process_non_flush(struct clone_info *ci)
 {
 	struct bio *bio = ci->bio;
 	struct dm_target *ti;
-	sector_t len, max;
-	int idx;
+	unsigned len;
 
 	if (unlikely(bio->bi_rw & REQ_DISCARD))
 		return __send_discard(ci);
@@ -1451,41 +1346,14 @@
 	if (!dm_target_is_valid(ti))
 		return -EIO;
 
-	max = max_io_len(ci->sector, ti);
+	len = min_t(sector_t, max_io_len(ci->sector, ti), ci->sector_count);
 
-	/*
-	 * Optimise for the simple case where we can do all of
-	 * the remaining io with a single clone.
-	 */
-	if (ci->sector_count <= max) {
-		__clone_and_map_data_bio(ci, ti, ci->sector, bio->bi_max_vecs,
-					 ci->idx, bio->bi_vcnt - ci->idx, 0,
-					 ci->sector_count, 0);
-		ci->sector_count = 0;
-		return 0;
-	}
+	__clone_and_map_data_bio(ci, ti, ci->sector, len);
 
-	/*
-	 * There are some bvecs that don't span targets.
-	 * Do as many of these as possible.
-	 */
-	if (to_sector(bio->bi_io_vec[ci->idx].bv_len) <= max) {
-		len = __len_within_target(ci, max, &idx);
+	ci->sector += len;
+	ci->sector_count -= len;
 
-		__clone_and_map_data_bio(ci, ti, ci->sector, bio->bi_max_vecs,
-					 ci->idx, idx - ci->idx, 0, len, 0);
-
-		ci->sector += len;
-		ci->sector_count -= len;
-		ci->idx = idx;
-
-		return 0;
-	}
-
-	/*
-	 * Handle a bvec that must be split between two or more targets.
-	 */
-	return __split_bvec_across_targets(ci, ti, max);
+	return 0;
 }
 
 /*
@@ -1510,8 +1378,7 @@
 	ci.io->bio = bio;
 	ci.io->md = md;
 	spin_lock_init(&ci.io->endio_lock);
-	ci.sector = bio->bi_sector;
-	ci.idx = bio->bi_idx;
+	ci.sector = bio->bi_iter.bi_sector;
 
 	start_io_acct(ci.io);
 
diff --git a/drivers/md/faulty.c b/drivers/md/faulty.c
index 3193aef..e8b4574 100644
--- a/drivers/md/faulty.c
+++ b/drivers/md/faulty.c
@@ -74,8 +74,8 @@
 {
 	struct bio *b = bio->bi_private;
 
-	b->bi_size = bio->bi_size;
-	b->bi_sector = bio->bi_sector;
+	b->bi_iter.bi_size = bio->bi_iter.bi_size;
+	b->bi_iter.bi_sector = bio->bi_iter.bi_sector;
 
 	bio_put(bio);
 
@@ -185,26 +185,31 @@
 			return;
 		}
 
-		if (check_sector(conf, bio->bi_sector, bio_end_sector(bio), WRITE))
+		if (check_sector(conf, bio->bi_iter.bi_sector,
+				 bio_end_sector(bio), WRITE))
 			failit = 1;
 		if (check_mode(conf, WritePersistent)) {
-			add_sector(conf, bio->bi_sector, WritePersistent);
+			add_sector(conf, bio->bi_iter.bi_sector,
+				   WritePersistent);
 			failit = 1;
 		}
 		if (check_mode(conf, WriteTransient))
 			failit = 1;
 	} else {
 		/* read request */
-		if (check_sector(conf, bio->bi_sector, bio_end_sector(bio), READ))
+		if (check_sector(conf, bio->bi_iter.bi_sector,
+				 bio_end_sector(bio), READ))
 			failit = 1;
 		if (check_mode(conf, ReadTransient))
 			failit = 1;
 		if (check_mode(conf, ReadPersistent)) {
-			add_sector(conf, bio->bi_sector, ReadPersistent);
+			add_sector(conf, bio->bi_iter.bi_sector,
+				   ReadPersistent);
 			failit = 1;
 		}
 		if (check_mode(conf, ReadFixable)) {
-			add_sector(conf, bio->bi_sector, ReadFixable);
+			add_sector(conf, bio->bi_iter.bi_sector,
+				   ReadFixable);
 			failit = 1;
 		}
 	}
diff --git a/drivers/md/linear.c b/drivers/md/linear.c
index f03fabd..56f534b 100644
--- a/drivers/md/linear.c
+++ b/drivers/md/linear.c
@@ -288,65 +288,65 @@
 
 static void linear_make_request(struct mddev *mddev, struct bio *bio)
 {
+	char b[BDEVNAME_SIZE];
 	struct dev_info *tmp_dev;
-	sector_t start_sector;
+	struct bio *split;
+	sector_t start_sector, end_sector, data_offset;
 
 	if (unlikely(bio->bi_rw & REQ_FLUSH)) {
 		md_flush_request(mddev, bio);
 		return;
 	}
 
-	rcu_read_lock();
-	tmp_dev = which_dev(mddev, bio->bi_sector);
-	start_sector = tmp_dev->end_sector - tmp_dev->rdev->sectors;
+	do {
+		rcu_read_lock();
 
-
-	if (unlikely(bio->bi_sector >= (tmp_dev->end_sector)
-		     || (bio->bi_sector < start_sector))) {
-		char b[BDEVNAME_SIZE];
-
-		printk(KERN_ERR
-		       "md/linear:%s: make_request: Sector %llu out of bounds on "
-		       "dev %s: %llu sectors, offset %llu\n",
-		       mdname(mddev),
-		       (unsigned long long)bio->bi_sector,
-		       bdevname(tmp_dev->rdev->bdev, b),
-		       (unsigned long long)tmp_dev->rdev->sectors,
-		       (unsigned long long)start_sector);
-		rcu_read_unlock();
-		bio_io_error(bio);
-		return;
-	}
-	if (unlikely(bio_end_sector(bio) > tmp_dev->end_sector)) {
-		/* This bio crosses a device boundary, so we have to
-		 * split it.
-		 */
-		struct bio_pair *bp;
-		sector_t end_sector = tmp_dev->end_sector;
+		tmp_dev = which_dev(mddev, bio->bi_iter.bi_sector);
+		start_sector = tmp_dev->end_sector - tmp_dev->rdev->sectors;
+		end_sector = tmp_dev->end_sector;
+		data_offset = tmp_dev->rdev->data_offset;
+		bio->bi_bdev = tmp_dev->rdev->bdev;
 
 		rcu_read_unlock();
 
-		bp = bio_split(bio, end_sector - bio->bi_sector);
+		if (unlikely(bio->bi_iter.bi_sector >= end_sector ||
+			     bio->bi_iter.bi_sector < start_sector))
+			goto out_of_bounds;
 
-		linear_make_request(mddev, &bp->bio1);
-		linear_make_request(mddev, &bp->bio2);
-		bio_pair_release(bp);
-		return;
-	}
-		    
-	bio->bi_bdev = tmp_dev->rdev->bdev;
-	bio->bi_sector = bio->bi_sector - start_sector
-		+ tmp_dev->rdev->data_offset;
-	rcu_read_unlock();
+		if (unlikely(bio_end_sector(bio) > end_sector)) {
+			/* This bio crosses a device boundary, so we have to
+			 * split it.
+			 */
+			split = bio_split(bio, end_sector -
+					  bio->bi_iter.bi_sector,
+					  GFP_NOIO, fs_bio_set);
+			bio_chain(split, bio);
+		} else {
+			split = bio;
+		}
 
-	if (unlikely((bio->bi_rw & REQ_DISCARD) &&
-		     !blk_queue_discard(bdev_get_queue(bio->bi_bdev)))) {
-		/* Just ignore it */
-		bio_endio(bio, 0);
-		return;
-	}
+		split->bi_iter.bi_sector = split->bi_iter.bi_sector -
+			start_sector + data_offset;
 
-	generic_make_request(bio);
+		if (unlikely((split->bi_rw & REQ_DISCARD) &&
+			 !blk_queue_discard(bdev_get_queue(split->bi_bdev)))) {
+			/* Just ignore it */
+			bio_endio(split, 0);
+		} else
+			generic_make_request(split);
+	} while (split != bio);
+	return;
+
+out_of_bounds:
+	printk(KERN_ERR
+	       "md/linear:%s: make_request: Sector %llu out of bounds on "
+	       "dev %s: %llu sectors, offset %llu\n",
+	       mdname(mddev),
+	       (unsigned long long)bio->bi_iter.bi_sector,
+	       bdevname(tmp_dev->rdev->bdev, b),
+	       (unsigned long long)tmp_dev->rdev->sectors,
+	       (unsigned long long)start_sector);
+	bio_io_error(bio);
 }
 
 static void linear_status (struct seq_file *seq, struct mddev *mddev)
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 40c5313..4ad5cc4 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -393,7 +393,7 @@
 	struct mddev *mddev = container_of(ws, struct mddev, flush_work);
 	struct bio *bio = mddev->flush_bio;
 
-	if (bio->bi_size == 0)
+	if (bio->bi_iter.bi_size == 0)
 		/* an empty barrier - all done */
 		bio_endio(bio, 0);
 	else {
@@ -754,7 +754,7 @@
 	struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, mddev);
 
 	bio->bi_bdev = rdev->meta_bdev ? rdev->meta_bdev : rdev->bdev;
-	bio->bi_sector = sector;
+	bio->bi_iter.bi_sector = sector;
 	bio_add_page(bio, page, size, 0);
 	bio->bi_private = rdev;
 	bio->bi_end_io = super_written;
@@ -782,18 +782,16 @@
 	struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, rdev->mddev);
 	int ret;
 
-	rw |= REQ_SYNC;
-
 	bio->bi_bdev = (metadata_op && rdev->meta_bdev) ?
 		rdev->meta_bdev : rdev->bdev;
 	if (metadata_op)
-		bio->bi_sector = sector + rdev->sb_start;
+		bio->bi_iter.bi_sector = sector + rdev->sb_start;
 	else if (rdev->mddev->reshape_position != MaxSector &&
 		 (rdev->mddev->reshape_backwards ==
 		  (sector >= rdev->mddev->reshape_position)))
-		bio->bi_sector = sector + rdev->new_data_offset;
+		bio->bi_iter.bi_sector = sector + rdev->new_data_offset;
 	else
-		bio->bi_sector = sector + rdev->data_offset;
+		bio->bi_iter.bi_sector = sector + rdev->data_offset;
 	bio_add_page(bio, page, size, 0);
 	submit_bio_wait(rw, bio);
 
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c
index 1642eae..849ad39 100644
--- a/drivers/md/multipath.c
+++ b/drivers/md/multipath.c
@@ -100,7 +100,7 @@
 		md_error (mp_bh->mddev, rdev);
 		printk(KERN_ERR "multipath: %s: rescheduling sector %llu\n", 
 		       bdevname(rdev->bdev,b), 
-		       (unsigned long long)bio->bi_sector);
+		       (unsigned long long)bio->bi_iter.bi_sector);
 		multipath_reschedule_retry(mp_bh);
 	} else
 		multipath_end_bh_io(mp_bh, error);
@@ -132,7 +132,7 @@
 	multipath = conf->multipaths + mp_bh->path;
 
 	mp_bh->bio = *bio;
-	mp_bh->bio.bi_sector += multipath->rdev->data_offset;
+	mp_bh->bio.bi_iter.bi_sector += multipath->rdev->data_offset;
 	mp_bh->bio.bi_bdev = multipath->rdev->bdev;
 	mp_bh->bio.bi_rw |= REQ_FAILFAST_TRANSPORT;
 	mp_bh->bio.bi_end_io = multipath_end_request;
@@ -355,21 +355,22 @@
 		spin_unlock_irqrestore(&conf->device_lock, flags);
 
 		bio = &mp_bh->bio;
-		bio->bi_sector = mp_bh->master_bio->bi_sector;
+		bio->bi_iter.bi_sector = mp_bh->master_bio->bi_iter.bi_sector;
 		
 		if ((mp_bh->path = multipath_map (conf))<0) {
 			printk(KERN_ALERT "multipath: %s: unrecoverable IO read"
 				" error for block %llu\n",
 				bdevname(bio->bi_bdev,b),
-				(unsigned long long)bio->bi_sector);
+				(unsigned long long)bio->bi_iter.bi_sector);
 			multipath_end_bh_io(mp_bh, -EIO);
 		} else {
 			printk(KERN_ERR "multipath: %s: redirecting sector %llu"
 				" to another IO path\n",
 				bdevname(bio->bi_bdev,b),
-				(unsigned long long)bio->bi_sector);
+				(unsigned long long)bio->bi_iter.bi_sector);
 			*bio = *(mp_bh->master_bio);
-			bio->bi_sector += conf->multipaths[mp_bh->path].rdev->data_offset;
+			bio->bi_iter.bi_sector +=
+				conf->multipaths[mp_bh->path].rdev->data_offset;
 			bio->bi_bdev = conf->multipaths[mp_bh->path].rdev->bdev;
 			bio->bi_rw |= REQ_FAILFAST_TRANSPORT;
 			bio->bi_end_io = multipath_end_request;
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index c4d420b..407a99e 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -501,10 +501,11 @@
 			unsigned int chunk_sects, struct bio *bio)
 {
 	if (likely(is_power_of_2(chunk_sects))) {
-		return chunk_sects >= ((bio->bi_sector & (chunk_sects-1))
+		return chunk_sects >=
+			((bio->bi_iter.bi_sector & (chunk_sects-1))
 					+ bio_sectors(bio));
 	} else{
-		sector_t sector = bio->bi_sector;
+		sector_t sector = bio->bi_iter.bi_sector;
 		return chunk_sects >= (sector_div(sector, chunk_sects)
 						+ bio_sectors(bio));
 	}
@@ -512,64 +513,44 @@
 
 static void raid0_make_request(struct mddev *mddev, struct bio *bio)
 {
-	unsigned int chunk_sects;
-	sector_t sector_offset;
 	struct strip_zone *zone;
 	struct md_rdev *tmp_dev;
+	struct bio *split;
 
 	if (unlikely(bio->bi_rw & REQ_FLUSH)) {
 		md_flush_request(mddev, bio);
 		return;
 	}
 
-	chunk_sects = mddev->chunk_sectors;
-	if (unlikely(!is_io_in_chunk_boundary(mddev, chunk_sects, bio))) {
-		sector_t sector = bio->bi_sector;
-		struct bio_pair *bp;
-		/* Sanity check -- queue functions should prevent this happening */
-		if (bio_segments(bio) > 1)
-			goto bad_map;
-		/* This is a one page bio that upper layers
-		 * refuse to split for us, so we need to split it.
-		 */
-		if (likely(is_power_of_2(chunk_sects)))
-			bp = bio_split(bio, chunk_sects - (sector &
-							   (chunk_sects-1)));
-		else
-			bp = bio_split(bio, chunk_sects -
-				       sector_div(sector, chunk_sects));
-		raid0_make_request(mddev, &bp->bio1);
-		raid0_make_request(mddev, &bp->bio2);
-		bio_pair_release(bp);
-		return;
-	}
+	do {
+		sector_t sector = bio->bi_iter.bi_sector;
+		unsigned chunk_sects = mddev->chunk_sectors;
 
-	sector_offset = bio->bi_sector;
-	zone = find_zone(mddev->private, &sector_offset);
-	tmp_dev = map_sector(mddev, zone, bio->bi_sector,
-			     &sector_offset);
-	bio->bi_bdev = tmp_dev->bdev;
-	bio->bi_sector = sector_offset + zone->dev_start +
-		tmp_dev->data_offset;
+		unsigned sectors = chunk_sects -
+			(likely(is_power_of_2(chunk_sects))
+			 ? (sector & (chunk_sects-1))
+			 : sector_div(sector, chunk_sects));
 
-	if (unlikely((bio->bi_rw & REQ_DISCARD) &&
-		     !blk_queue_discard(bdev_get_queue(bio->bi_bdev)))) {
-		/* Just ignore it */
-		bio_endio(bio, 0);
-		return;
-	}
+		if (sectors < bio_sectors(bio)) {
+			split = bio_split(bio, sectors, GFP_NOIO, fs_bio_set);
+			bio_chain(split, bio);
+		} else {
+			split = bio;
+		}
 
-	generic_make_request(bio);
-	return;
+		zone = find_zone(mddev->private, &sector);
+		tmp_dev = map_sector(mddev, zone, sector, &sector);
+		split->bi_bdev = tmp_dev->bdev;
+		split->bi_iter.bi_sector = sector + zone->dev_start +
+			tmp_dev->data_offset;
 
-bad_map:
-	printk("md/raid0:%s: make_request bug: can't convert block across chunks"
-	       " or bigger than %dk %llu %d\n",
-	       mdname(mddev), chunk_sects / 2,
-	       (unsigned long long)bio->bi_sector, bio_sectors(bio) / 2);
-
-	bio_io_error(bio);
-	return;
+		if (unlikely((split->bi_rw & REQ_DISCARD) &&
+			 !blk_queue_discard(bdev_get_queue(split->bi_bdev)))) {
+			/* Just ignore it */
+			bio_endio(split, 0);
+		} else
+			generic_make_request(split);
+	} while (split != bio);
 }
 
 static void raid0_status(struct seq_file *seq, struct mddev *mddev)
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index a49cfcc..fd3a2a1 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -229,7 +229,7 @@
 	int done;
 	struct r1conf *conf = r1_bio->mddev->private;
 	sector_t start_next_window = r1_bio->start_next_window;
-	sector_t bi_sector = bio->bi_sector;
+	sector_t bi_sector = bio->bi_iter.bi_sector;
 
 	if (bio->bi_phys_segments) {
 		unsigned long flags;
@@ -265,9 +265,8 @@
 	if (!test_and_set_bit(R1BIO_Returned, &r1_bio->state)) {
 		pr_debug("raid1: sync end %s on sectors %llu-%llu\n",
 			 (bio_data_dir(bio) == WRITE) ? "write" : "read",
-			 (unsigned long long) bio->bi_sector,
-			 (unsigned long long) bio->bi_sector +
-			 bio_sectors(bio) - 1);
+			 (unsigned long long) bio->bi_iter.bi_sector,
+			 (unsigned long long) bio_end_sector(bio) - 1);
 
 		call_bio_endio(r1_bio);
 	}
@@ -466,9 +465,8 @@
 				struct bio *mbio = r1_bio->master_bio;
 				pr_debug("raid1: behind end write sectors"
 					 " %llu-%llu\n",
-					 (unsigned long long) mbio->bi_sector,
-					 (unsigned long long) mbio->bi_sector +
-					 bio_sectors(mbio) - 1);
+					 (unsigned long long) mbio->bi_iter.bi_sector,
+					 (unsigned long long) bio_end_sector(mbio) - 1);
 				call_bio_endio(r1_bio);
 			}
 		}
@@ -875,7 +873,7 @@
 		else if ((conf->next_resync - RESYNC_WINDOW_SECTORS
 				>= bio_end_sector(bio)) ||
 			 (conf->next_resync + NEXT_NORMALIO_DISTANCE
-				<= bio->bi_sector))
+				<= bio->bi_iter.bi_sector))
 			wait = false;
 		else
 			wait = true;
@@ -913,14 +911,14 @@
 
 	if (bio && bio_data_dir(bio) == WRITE) {
 		if (conf->next_resync + NEXT_NORMALIO_DISTANCE
-		    <= bio->bi_sector) {
+		    <= bio->bi_iter.bi_sector) {
 			if (conf->start_next_window == MaxSector)
 				conf->start_next_window =
 					conf->next_resync +
 					NEXT_NORMALIO_DISTANCE;
 
 			if ((conf->start_next_window + NEXT_NORMALIO_DISTANCE)
-			    <= bio->bi_sector)
+			    <= bio->bi_iter.bi_sector)
 				conf->next_window_requests++;
 			else
 				conf->current_window_requests++;
@@ -1027,7 +1025,8 @@
 		if (bvecs[i].bv_page)
 			put_page(bvecs[i].bv_page);
 	kfree(bvecs);
-	pr_debug("%dB behind alloc failed, doing sync I/O\n", bio->bi_size);
+	pr_debug("%dB behind alloc failed, doing sync I/O\n",
+		 bio->bi_iter.bi_size);
 }
 
 struct raid1_plug_cb {
@@ -1107,7 +1106,7 @@
 
 	if (bio_data_dir(bio) == WRITE &&
 	    bio_end_sector(bio) > mddev->suspend_lo &&
-	    bio->bi_sector < mddev->suspend_hi) {
+	    bio->bi_iter.bi_sector < mddev->suspend_hi) {
 		/* As the suspend_* range is controlled by
 		 * userspace, we want an interruptible
 		 * wait.
@@ -1118,7 +1117,7 @@
 			prepare_to_wait(&conf->wait_barrier,
 					&w, TASK_INTERRUPTIBLE);
 			if (bio_end_sector(bio) <= mddev->suspend_lo ||
-			    bio->bi_sector >= mddev->suspend_hi)
+			    bio->bi_iter.bi_sector >= mddev->suspend_hi)
 				break;
 			schedule();
 		}
@@ -1140,7 +1139,7 @@
 	r1_bio->sectors = bio_sectors(bio);
 	r1_bio->state = 0;
 	r1_bio->mddev = mddev;
-	r1_bio->sector = bio->bi_sector;
+	r1_bio->sector = bio->bi_iter.bi_sector;
 
 	/* We might need to issue multiple reads to different
 	 * devices if there are bad blocks around, so we keep
@@ -1180,12 +1179,13 @@
 		r1_bio->read_disk = rdisk;
 
 		read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev);
-		bio_trim(read_bio, r1_bio->sector - bio->bi_sector,
+		bio_trim(read_bio, r1_bio->sector - bio->bi_iter.bi_sector,
 			 max_sectors);
 
 		r1_bio->bios[rdisk] = read_bio;
 
-		read_bio->bi_sector = r1_bio->sector + mirror->rdev->data_offset;
+		read_bio->bi_iter.bi_sector = r1_bio->sector +
+			mirror->rdev->data_offset;
 		read_bio->bi_bdev = mirror->rdev->bdev;
 		read_bio->bi_end_io = raid1_end_read_request;
 		read_bio->bi_rw = READ | do_sync;
@@ -1197,7 +1197,7 @@
 			 */
 
 			sectors_handled = (r1_bio->sector + max_sectors
-					   - bio->bi_sector);
+					   - bio->bi_iter.bi_sector);
 			r1_bio->sectors = max_sectors;
 			spin_lock_irq(&conf->device_lock);
 			if (bio->bi_phys_segments == 0)
@@ -1218,7 +1218,8 @@
 			r1_bio->sectors = bio_sectors(bio) - sectors_handled;
 			r1_bio->state = 0;
 			r1_bio->mddev = mddev;
-			r1_bio->sector = bio->bi_sector + sectors_handled;
+			r1_bio->sector = bio->bi_iter.bi_sector +
+				sectors_handled;
 			goto read_again;
 		} else
 			generic_make_request(read_bio);
@@ -1321,7 +1322,7 @@
 			if (r1_bio->bios[j])
 				rdev_dec_pending(conf->mirrors[j].rdev, mddev);
 		r1_bio->state = 0;
-		allow_barrier(conf, start_next_window, bio->bi_sector);
+		allow_barrier(conf, start_next_window, bio->bi_iter.bi_sector);
 		md_wait_for_blocked_rdev(blocked_rdev, mddev);
 		start_next_window = wait_barrier(conf, bio);
 		/*
@@ -1348,7 +1349,7 @@
 			bio->bi_phys_segments++;
 		spin_unlock_irq(&conf->device_lock);
 	}
-	sectors_handled = r1_bio->sector + max_sectors - bio->bi_sector;
+	sectors_handled = r1_bio->sector + max_sectors - bio->bi_iter.bi_sector;
 
 	atomic_set(&r1_bio->remaining, 1);
 	atomic_set(&r1_bio->behind_remaining, 0);
@@ -1360,7 +1361,7 @@
 			continue;
 
 		mbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
-		bio_trim(mbio, r1_bio->sector - bio->bi_sector, max_sectors);
+		bio_trim(mbio, r1_bio->sector - bio->bi_iter.bi_sector, max_sectors);
 
 		if (first_clone) {
 			/* do behind I/O ?
@@ -1394,7 +1395,7 @@
 
 		r1_bio->bios[i] = mbio;
 
-		mbio->bi_sector	= (r1_bio->sector +
+		mbio->bi_iter.bi_sector	= (r1_bio->sector +
 				   conf->mirrors[i].rdev->data_offset);
 		mbio->bi_bdev = conf->mirrors[i].rdev->bdev;
 		mbio->bi_end_io	= raid1_end_write_request;
@@ -1434,7 +1435,7 @@
 		r1_bio->sectors = bio_sectors(bio) - sectors_handled;
 		r1_bio->state = 0;
 		r1_bio->mddev = mddev;
-		r1_bio->sector = bio->bi_sector + sectors_handled;
+		r1_bio->sector = bio->bi_iter.bi_sector + sectors_handled;
 		goto retry_write;
 	}
 
@@ -1958,14 +1959,14 @@
 		/* fixup the bio for reuse */
 		bio_reset(b);
 		b->bi_vcnt = vcnt;
-		b->bi_size = r1_bio->sectors << 9;
-		b->bi_sector = r1_bio->sector +
+		b->bi_iter.bi_size = r1_bio->sectors << 9;
+		b->bi_iter.bi_sector = r1_bio->sector +
 			conf->mirrors[i].rdev->data_offset;
 		b->bi_bdev = conf->mirrors[i].rdev->bdev;
 		b->bi_end_io = end_sync_read;
 		b->bi_private = r1_bio;
 
-		size = b->bi_size;
+		size = b->bi_iter.bi_size;
 		for (j = 0; j < vcnt ; j++) {
 			struct bio_vec *bi;
 			bi = &b->bi_io_vec[j];
@@ -2220,11 +2221,11 @@
 		}
 
 		wbio->bi_rw = WRITE;
-		wbio->bi_sector = r1_bio->sector;
-		wbio->bi_size = r1_bio->sectors << 9;
+		wbio->bi_iter.bi_sector = r1_bio->sector;
+		wbio->bi_iter.bi_size = r1_bio->sectors << 9;
 
 		bio_trim(wbio, sector - r1_bio->sector, sectors);
-		wbio->bi_sector += rdev->data_offset;
+		wbio->bi_iter.bi_sector += rdev->data_offset;
 		wbio->bi_bdev = rdev->bdev;
 		if (submit_bio_wait(WRITE, wbio) == 0)
 			/* failure! */
@@ -2338,7 +2339,8 @@
 		}
 		r1_bio->read_disk = disk;
 		bio = bio_clone_mddev(r1_bio->master_bio, GFP_NOIO, mddev);
-		bio_trim(bio, r1_bio->sector - bio->bi_sector, max_sectors);
+		bio_trim(bio, r1_bio->sector - bio->bi_iter.bi_sector,
+			 max_sectors);
 		r1_bio->bios[r1_bio->read_disk] = bio;
 		rdev = conf->mirrors[disk].rdev;
 		printk_ratelimited(KERN_ERR
@@ -2347,7 +2349,7 @@
 				   mdname(mddev),
 				   (unsigned long long)r1_bio->sector,
 				   bdevname(rdev->bdev, b));
-		bio->bi_sector = r1_bio->sector + rdev->data_offset;
+		bio->bi_iter.bi_sector = r1_bio->sector + rdev->data_offset;
 		bio->bi_bdev = rdev->bdev;
 		bio->bi_end_io = raid1_end_read_request;
 		bio->bi_rw = READ | do_sync;
@@ -2356,7 +2358,7 @@
 			/* Drat - have to split this up more */
 			struct bio *mbio = r1_bio->master_bio;
 			int sectors_handled = (r1_bio->sector + max_sectors
-					       - mbio->bi_sector);
+					       - mbio->bi_iter.bi_sector);
 			r1_bio->sectors = max_sectors;
 			spin_lock_irq(&conf->device_lock);
 			if (mbio->bi_phys_segments == 0)
@@ -2374,7 +2376,8 @@
 			r1_bio->state = 0;
 			set_bit(R1BIO_ReadError, &r1_bio->state);
 			r1_bio->mddev = mddev;
-			r1_bio->sector = mbio->bi_sector + sectors_handled;
+			r1_bio->sector = mbio->bi_iter.bi_sector +
+				sectors_handled;
 
 			goto read_more;
 		} else
@@ -2598,7 +2601,7 @@
 		}
 		if (bio->bi_end_io) {
 			atomic_inc(&rdev->nr_pending);
-			bio->bi_sector = sector_nr + rdev->data_offset;
+			bio->bi_iter.bi_sector = sector_nr + rdev->data_offset;
 			bio->bi_bdev = rdev->bdev;
 			bio->bi_private = r1_bio;
 		}
@@ -2698,7 +2701,7 @@
 							continue;
 						/* remove last page from this bio */
 						bio->bi_vcnt--;
-						bio->bi_size -= len;
+						bio->bi_iter.bi_size -= len;
 						bio->bi_flags &= ~(1<< BIO_SEG_VALID);
 					}
 					goto bio_full;
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 8d39d63..33fc408 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1152,14 +1152,12 @@
 	kfree(plug);
 }
 
-static void make_request(struct mddev *mddev, struct bio * bio)
+static void __make_request(struct mddev *mddev, struct bio *bio)
 {
 	struct r10conf *conf = mddev->private;
 	struct r10bio *r10_bio;
 	struct bio *read_bio;
 	int i;
-	sector_t chunk_mask = (conf->geo.chunk_mask & conf->prev.chunk_mask);
-	int chunk_sects = chunk_mask + 1;
 	const int rw = bio_data_dir(bio);
 	const unsigned long do_sync = (bio->bi_rw & REQ_SYNC);
 	const unsigned long do_fua = (bio->bi_rw & REQ_FUA);
@@ -1174,88 +1172,27 @@
 	int max_sectors;
 	int sectors;
 
-	if (unlikely(bio->bi_rw & REQ_FLUSH)) {
-		md_flush_request(mddev, bio);
-		return;
-	}
-
-	/* If this request crosses a chunk boundary, we need to
-	 * split it.  This will only happen for 1 PAGE (or less) requests.
-	 */
-	if (unlikely((bio->bi_sector & chunk_mask) + bio_sectors(bio)
-		     > chunk_sects
-		     && (conf->geo.near_copies < conf->geo.raid_disks
-			 || conf->prev.near_copies < conf->prev.raid_disks))) {
-		struct bio_pair *bp;
-		/* Sanity check -- queue functions should prevent this happening */
-		if (bio_segments(bio) > 1)
-			goto bad_map;
-		/* This is a one page bio that upper layers
-		 * refuse to split for us, so we need to split it.
-		 */
-		bp = bio_split(bio,
-			       chunk_sects - (bio->bi_sector & (chunk_sects - 1)) );
-
-		/* Each of these 'make_request' calls will call 'wait_barrier'.
-		 * If the first succeeds but the second blocks due to the resync
-		 * thread raising the barrier, we will deadlock because the
-		 * IO to the underlying device will be queued in generic_make_request
-		 * and will never complete, so will never reduce nr_pending.
-		 * So increment nr_waiting here so no new raise_barriers will
-		 * succeed, and so the second wait_barrier cannot block.
-		 */
-		spin_lock_irq(&conf->resync_lock);
-		conf->nr_waiting++;
-		spin_unlock_irq(&conf->resync_lock);
-
-		make_request(mddev, &bp->bio1);
-		make_request(mddev, &bp->bio2);
-
-		spin_lock_irq(&conf->resync_lock);
-		conf->nr_waiting--;
-		wake_up(&conf->wait_barrier);
-		spin_unlock_irq(&conf->resync_lock);
-
-		bio_pair_release(bp);
-		return;
-	bad_map:
-		printk("md/raid10:%s: make_request bug: can't convert block across chunks"
-		       " or bigger than %dk %llu %d\n", mdname(mddev), chunk_sects/2,
-		       (unsigned long long)bio->bi_sector, bio_sectors(bio) / 2);
-
-		bio_io_error(bio);
-		return;
-	}
-
-	md_write_start(mddev, bio);
-
-	/*
-	 * Register the new request and wait if the reconstruction
-	 * thread has put up a bar for new requests.
-	 * Continue immediately if no resync is active currently.
-	 */
-	wait_barrier(conf);
-
 	sectors = bio_sectors(bio);
 	while (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
-	    bio->bi_sector < conf->reshape_progress &&
-	    bio->bi_sector + sectors > conf->reshape_progress) {
+	    bio->bi_iter.bi_sector < conf->reshape_progress &&
+	    bio->bi_iter.bi_sector + sectors > conf->reshape_progress) {
 		/* IO spans the reshape position.  Need to wait for
 		 * reshape to pass
 		 */
 		allow_barrier(conf);
 		wait_event(conf->wait_barrier,
-			   conf->reshape_progress <= bio->bi_sector ||
-			   conf->reshape_progress >= bio->bi_sector + sectors);
+			   conf->reshape_progress <= bio->bi_iter.bi_sector ||
+			   conf->reshape_progress >= bio->bi_iter.bi_sector +
+			   sectors);
 		wait_barrier(conf);
 	}
 	if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
 	    bio_data_dir(bio) == WRITE &&
 	    (mddev->reshape_backwards
-	     ? (bio->bi_sector < conf->reshape_safe &&
-		bio->bi_sector + sectors > conf->reshape_progress)
-	     : (bio->bi_sector + sectors > conf->reshape_safe &&
-		bio->bi_sector < conf->reshape_progress))) {
+	     ? (bio->bi_iter.bi_sector < conf->reshape_safe &&
+		bio->bi_iter.bi_sector + sectors > conf->reshape_progress)
+	     : (bio->bi_iter.bi_sector + sectors > conf->reshape_safe &&
+		bio->bi_iter.bi_sector < conf->reshape_progress))) {
 		/* Need to update reshape_position in metadata */
 		mddev->reshape_position = conf->reshape_progress;
 		set_bit(MD_CHANGE_DEVS, &mddev->flags);
@@ -1273,7 +1210,7 @@
 	r10_bio->sectors = sectors;
 
 	r10_bio->mddev = mddev;
-	r10_bio->sector = bio->bi_sector;
+	r10_bio->sector = bio->bi_iter.bi_sector;
 	r10_bio->state = 0;
 
 	/* We might need to issue multiple reads to different
@@ -1302,13 +1239,13 @@
 		slot = r10_bio->read_slot;
 
 		read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev);
-		bio_trim(read_bio, r10_bio->sector - bio->bi_sector,
+		bio_trim(read_bio, r10_bio->sector - bio->bi_iter.bi_sector,
 			 max_sectors);
 
 		r10_bio->devs[slot].bio = read_bio;
 		r10_bio->devs[slot].rdev = rdev;
 
-		read_bio->bi_sector = r10_bio->devs[slot].addr +
+		read_bio->bi_iter.bi_sector = r10_bio->devs[slot].addr +
 			choose_data_offset(r10_bio, rdev);
 		read_bio->bi_bdev = rdev->bdev;
 		read_bio->bi_end_io = raid10_end_read_request;
@@ -1320,7 +1257,7 @@
 			 * need another r10_bio.
 			 */
 			sectors_handled = (r10_bio->sector + max_sectors
-					   - bio->bi_sector);
+					   - bio->bi_iter.bi_sector);
 			r10_bio->sectors = max_sectors;
 			spin_lock_irq(&conf->device_lock);
 			if (bio->bi_phys_segments == 0)
@@ -1341,7 +1278,8 @@
 			r10_bio->sectors = bio_sectors(bio) - sectors_handled;
 			r10_bio->state = 0;
 			r10_bio->mddev = mddev;
-			r10_bio->sector = bio->bi_sector + sectors_handled;
+			r10_bio->sector = bio->bi_iter.bi_sector +
+				sectors_handled;
 			goto read_again;
 		} else
 			generic_make_request(read_bio);
@@ -1499,7 +1437,8 @@
 			bio->bi_phys_segments++;
 		spin_unlock_irq(&conf->device_lock);
 	}
-	sectors_handled = r10_bio->sector + max_sectors - bio->bi_sector;
+	sectors_handled = r10_bio->sector + max_sectors -
+		bio->bi_iter.bi_sector;
 
 	atomic_set(&r10_bio->remaining, 1);
 	bitmap_startwrite(mddev->bitmap, r10_bio->sector, r10_bio->sectors, 0);
@@ -1510,11 +1449,11 @@
 		if (r10_bio->devs[i].bio) {
 			struct md_rdev *rdev = conf->mirrors[d].rdev;
 			mbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
-			bio_trim(mbio, r10_bio->sector - bio->bi_sector,
+			bio_trim(mbio, r10_bio->sector - bio->bi_iter.bi_sector,
 				 max_sectors);
 			r10_bio->devs[i].bio = mbio;
 
-			mbio->bi_sector	= (r10_bio->devs[i].addr+
+			mbio->bi_iter.bi_sector	= (r10_bio->devs[i].addr+
 					   choose_data_offset(r10_bio,
 							      rdev));
 			mbio->bi_bdev = rdev->bdev;
@@ -1553,11 +1492,11 @@
 				rdev = conf->mirrors[d].rdev;
 			}
 			mbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
-			bio_trim(mbio, r10_bio->sector - bio->bi_sector,
+			bio_trim(mbio, r10_bio->sector - bio->bi_iter.bi_sector,
 				 max_sectors);
 			r10_bio->devs[i].repl_bio = mbio;
 
-			mbio->bi_sector	= (r10_bio->devs[i].addr +
+			mbio->bi_iter.bi_sector	= (r10_bio->devs[i].addr +
 					   choose_data_offset(
 						   r10_bio, rdev));
 			mbio->bi_bdev = rdev->bdev;
@@ -1591,11 +1530,57 @@
 		r10_bio->sectors = bio_sectors(bio) - sectors_handled;
 
 		r10_bio->mddev = mddev;
-		r10_bio->sector = bio->bi_sector + sectors_handled;
+		r10_bio->sector = bio->bi_iter.bi_sector + sectors_handled;
 		r10_bio->state = 0;
 		goto retry_write;
 	}
 	one_write_done(r10_bio);
+}
+
+static void make_request(struct mddev *mddev, struct bio *bio)
+{
+	struct r10conf *conf = mddev->private;
+	sector_t chunk_mask = (conf->geo.chunk_mask & conf->prev.chunk_mask);
+	int chunk_sects = chunk_mask + 1;
+
+	struct bio *split;
+
+	if (unlikely(bio->bi_rw & REQ_FLUSH)) {
+		md_flush_request(mddev, bio);
+		return;
+	}
+
+	md_write_start(mddev, bio);
+
+	/*
+	 * Register the new request and wait if the reconstruction
+	 * thread has put up a bar for new requests.
+	 * Continue immediately if no resync is active currently.
+	 */
+	wait_barrier(conf);
+
+	do {
+
+		/*
+		 * If this request crosses a chunk boundary, we need to split
+		 * it.
+		 */
+		if (unlikely((bio->bi_iter.bi_sector & chunk_mask) +
+			     bio_sectors(bio) > chunk_sects
+			     && (conf->geo.near_copies < conf->geo.raid_disks
+				 || conf->prev.near_copies <
+				 conf->prev.raid_disks))) {
+			split = bio_split(bio, chunk_sects -
+					  (bio->bi_iter.bi_sector &
+					   (chunk_sects - 1)),
+					  GFP_NOIO, fs_bio_set);
+			bio_chain(split, bio);
+		} else {
+			split = bio;
+		}
+
+		__make_request(mddev, split);
+	} while (split != bio);
 
 	/* In case raid10d snuck in to freeze_array */
 	wake_up(&conf->wait_barrier);
@@ -2124,10 +2109,10 @@
 		bio_reset(tbio);
 
 		tbio->bi_vcnt = vcnt;
-		tbio->bi_size = r10_bio->sectors << 9;
+		tbio->bi_iter.bi_size = r10_bio->sectors << 9;
 		tbio->bi_rw = WRITE;
 		tbio->bi_private = r10_bio;
-		tbio->bi_sector = r10_bio->devs[i].addr;
+		tbio->bi_iter.bi_sector = r10_bio->devs[i].addr;
 
 		for (j=0; j < vcnt ; j++) {
 			tbio->bi_io_vec[j].bv_offset = 0;
@@ -2144,7 +2129,7 @@
 		atomic_inc(&r10_bio->remaining);
 		md_sync_acct(conf->mirrors[d].rdev->bdev, bio_sectors(tbio));
 
-		tbio->bi_sector += conf->mirrors[d].rdev->data_offset;
+		tbio->bi_iter.bi_sector += conf->mirrors[d].rdev->data_offset;
 		tbio->bi_bdev = conf->mirrors[d].rdev->bdev;
 		generic_make_request(tbio);
 	}
@@ -2614,8 +2599,8 @@
 			sectors = sect_to_write;
 		/* Write at 'sector' for 'sectors' */
 		wbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
-		bio_trim(wbio, sector - bio->bi_sector, sectors);
-		wbio->bi_sector = (r10_bio->devs[i].addr+
+		bio_trim(wbio, sector - bio->bi_iter.bi_sector, sectors);
+		wbio->bi_iter.bi_sector = (r10_bio->devs[i].addr+
 				   choose_data_offset(r10_bio, rdev) +
 				   (sector - r10_bio->sector));
 		wbio->bi_bdev = rdev->bdev;
@@ -2687,10 +2672,10 @@
 		(unsigned long long)r10_bio->sector);
 	bio = bio_clone_mddev(r10_bio->master_bio,
 			      GFP_NOIO, mddev);
-	bio_trim(bio, r10_bio->sector - bio->bi_sector, max_sectors);
+	bio_trim(bio, r10_bio->sector - bio->bi_iter.bi_sector, max_sectors);
 	r10_bio->devs[slot].bio = bio;
 	r10_bio->devs[slot].rdev = rdev;
-	bio->bi_sector = r10_bio->devs[slot].addr
+	bio->bi_iter.bi_sector = r10_bio->devs[slot].addr
 		+ choose_data_offset(r10_bio, rdev);
 	bio->bi_bdev = rdev->bdev;
 	bio->bi_rw = READ | do_sync;
@@ -2701,7 +2686,7 @@
 		struct bio *mbio = r10_bio->master_bio;
 		int sectors_handled =
 			r10_bio->sector + max_sectors
-			- mbio->bi_sector;
+			- mbio->bi_iter.bi_sector;
 		r10_bio->sectors = max_sectors;
 		spin_lock_irq(&conf->device_lock);
 		if (mbio->bi_phys_segments == 0)
@@ -2719,7 +2704,7 @@
 		set_bit(R10BIO_ReadError,
 			&r10_bio->state);
 		r10_bio->mddev = mddev;
-		r10_bio->sector = mbio->bi_sector
+		r10_bio->sector = mbio->bi_iter.bi_sector
 			+ sectors_handled;
 
 		goto read_more;
@@ -3157,7 +3142,8 @@
 				bio->bi_end_io = end_sync_read;
 				bio->bi_rw = READ;
 				from_addr = r10_bio->devs[j].addr;
-				bio->bi_sector = from_addr + rdev->data_offset;
+				bio->bi_iter.bi_sector = from_addr +
+					rdev->data_offset;
 				bio->bi_bdev = rdev->bdev;
 				atomic_inc(&rdev->nr_pending);
 				/* and we write to 'i' (if not in_sync) */
@@ -3181,7 +3167,7 @@
 					bio->bi_private = r10_bio;
 					bio->bi_end_io = end_sync_write;
 					bio->bi_rw = WRITE;
-					bio->bi_sector = to_addr
+					bio->bi_iter.bi_sector = to_addr
 						+ rdev->data_offset;
 					bio->bi_bdev = rdev->bdev;
 					atomic_inc(&r10_bio->remaining);
@@ -3210,7 +3196,8 @@
 				bio->bi_private = r10_bio;
 				bio->bi_end_io = end_sync_write;
 				bio->bi_rw = WRITE;
-				bio->bi_sector = to_addr + rdev->data_offset;
+				bio->bi_iter.bi_sector = to_addr +
+					rdev->data_offset;
 				bio->bi_bdev = rdev->bdev;
 				atomic_inc(&r10_bio->remaining);
 				break;
@@ -3328,7 +3315,7 @@
 			bio->bi_private = r10_bio;
 			bio->bi_end_io = end_sync_read;
 			bio->bi_rw = READ;
-			bio->bi_sector = sector +
+			bio->bi_iter.bi_sector = sector +
 				conf->mirrors[d].rdev->data_offset;
 			bio->bi_bdev = conf->mirrors[d].rdev->bdev;
 			count++;
@@ -3350,7 +3337,7 @@
 			bio->bi_private = r10_bio;
 			bio->bi_end_io = end_sync_write;
 			bio->bi_rw = WRITE;
-			bio->bi_sector = sector +
+			bio->bi_iter.bi_sector = sector +
 				conf->mirrors[d].replacement->data_offset;
 			bio->bi_bdev = conf->mirrors[d].replacement->bdev;
 			count++;
@@ -3397,7 +3384,7 @@
 			     bio2 = bio2->bi_next) {
 				/* remove last page from this bio */
 				bio2->bi_vcnt--;
-				bio2->bi_size -= len;
+				bio2->bi_iter.bi_size -= len;
 				bio2->bi_flags &= ~(1<< BIO_SEG_VALID);
 			}
 			goto bio_full;
@@ -4418,7 +4405,7 @@
 	read_bio = bio_alloc_mddev(GFP_KERNEL, RESYNC_PAGES, mddev);
 
 	read_bio->bi_bdev = rdev->bdev;
-	read_bio->bi_sector = (r10_bio->devs[r10_bio->read_slot].addr
+	read_bio->bi_iter.bi_sector = (r10_bio->devs[r10_bio->read_slot].addr
 			       + rdev->data_offset);
 	read_bio->bi_private = r10_bio;
 	read_bio->bi_end_io = end_sync_read;
@@ -4426,7 +4413,7 @@
 	read_bio->bi_flags &= ~(BIO_POOL_MASK - 1);
 	read_bio->bi_flags |= 1 << BIO_UPTODATE;
 	read_bio->bi_vcnt = 0;
-	read_bio->bi_size = 0;
+	read_bio->bi_iter.bi_size = 0;
 	r10_bio->master_bio = read_bio;
 	r10_bio->read_slot = r10_bio->devs[r10_bio->read_slot].devnum;
 
@@ -4452,7 +4439,8 @@
 
 		bio_reset(b);
 		b->bi_bdev = rdev2->bdev;
-		b->bi_sector = r10_bio->devs[s/2].addr + rdev2->new_data_offset;
+		b->bi_iter.bi_sector = r10_bio->devs[s/2].addr +
+			rdev2->new_data_offset;
 		b->bi_private = r10_bio;
 		b->bi_end_io = end_reshape_write;
 		b->bi_rw = WRITE;
@@ -4479,7 +4467,7 @@
 			     bio2 = bio2->bi_next) {
 				/* Remove last page from this bio */
 				bio2->bi_vcnt--;
-				bio2->bi_size -= len;
+				bio2->bi_iter.bi_size -= len;
 				bio2->bi_flags &= ~(1<<BIO_SEG_VALID);
 			}
 			goto bio_full;
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 03f82ab..f1feade 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -133,7 +133,7 @@
 static inline struct bio *r5_next_bio(struct bio *bio, sector_t sector)
 {
 	int sectors = bio_sectors(bio);
-	if (bio->bi_sector + sectors < sector + STRIPE_SECTORS)
+	if (bio->bi_iter.bi_sector + sectors < sector + STRIPE_SECTORS)
 		return bio->bi_next;
 	else
 		return NULL;
@@ -225,7 +225,7 @@
 
 		return_bi = bi->bi_next;
 		bi->bi_next = NULL;
-		bi->bi_size = 0;
+		bi->bi_iter.bi_size = 0;
 		trace_block_bio_complete(bdev_get_queue(bi->bi_bdev),
 					 bi, 0);
 		bio_endio(bi, 0);
@@ -852,10 +852,10 @@
 				bi->bi_rw, i);
 			atomic_inc(&sh->count);
 			if (use_new_offset(conf, sh))
-				bi->bi_sector = (sh->sector
+				bi->bi_iter.bi_sector = (sh->sector
 						 + rdev->new_data_offset);
 			else
-				bi->bi_sector = (sh->sector
+				bi->bi_iter.bi_sector = (sh->sector
 						 + rdev->data_offset);
 			if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags))
 				bi->bi_rw |= REQ_NOMERGE;
@@ -863,7 +863,7 @@
 			bi->bi_vcnt = 1;
 			bi->bi_io_vec[0].bv_len = STRIPE_SIZE;
 			bi->bi_io_vec[0].bv_offset = 0;
-			bi->bi_size = STRIPE_SIZE;
+			bi->bi_iter.bi_size = STRIPE_SIZE;
 			/*
 			 * If this is discard request, set bi_vcnt 0. We don't
 			 * want to confuse SCSI because SCSI will replace payload
@@ -899,15 +899,15 @@
 				rbi->bi_rw, i);
 			atomic_inc(&sh->count);
 			if (use_new_offset(conf, sh))
-				rbi->bi_sector = (sh->sector
+				rbi->bi_iter.bi_sector = (sh->sector
 						  + rrdev->new_data_offset);
 			else
-				rbi->bi_sector = (sh->sector
+				rbi->bi_iter.bi_sector = (sh->sector
 						  + rrdev->data_offset);
 			rbi->bi_vcnt = 1;
 			rbi->bi_io_vec[0].bv_len = STRIPE_SIZE;
 			rbi->bi_io_vec[0].bv_offset = 0;
-			rbi->bi_size = STRIPE_SIZE;
+			rbi->bi_iter.bi_size = STRIPE_SIZE;
 			/*
 			 * If this is discard request, set bi_vcnt 0. We don't
 			 * want to confuse SCSI because SCSI will replace payload
@@ -935,24 +935,24 @@
 async_copy_data(int frombio, struct bio *bio, struct page *page,
 	sector_t sector, struct dma_async_tx_descriptor *tx)
 {
-	struct bio_vec *bvl;
+	struct bio_vec bvl;
+	struct bvec_iter iter;
 	struct page *bio_page;
-	int i;
 	int page_offset;
 	struct async_submit_ctl submit;
 	enum async_tx_flags flags = 0;
 
-	if (bio->bi_sector >= sector)
-		page_offset = (signed)(bio->bi_sector - sector) * 512;
+	if (bio->bi_iter.bi_sector >= sector)
+		page_offset = (signed)(bio->bi_iter.bi_sector - sector) * 512;
 	else
-		page_offset = (signed)(sector - bio->bi_sector) * -512;
+		page_offset = (signed)(sector - bio->bi_iter.bi_sector) * -512;
 
 	if (frombio)
 		flags |= ASYNC_TX_FENCE;
 	init_async_submit(&submit, flags, tx, NULL, NULL, NULL);
 
-	bio_for_each_segment(bvl, bio, i) {
-		int len = bvl->bv_len;
+	bio_for_each_segment(bvl, bio, iter) {
+		int len = bvl.bv_len;
 		int clen;
 		int b_offset = 0;
 
@@ -968,8 +968,8 @@
 			clen = len;
 
 		if (clen > 0) {
-			b_offset += bvl->bv_offset;
-			bio_page = bvl->bv_page;
+			b_offset += bvl.bv_offset;
+			bio_page = bvl.bv_page;
 			if (frombio)
 				tx = async_memcpy(page, bio_page, page_offset,
 						  b_offset, clen, &submit);
@@ -1012,7 +1012,7 @@
 			BUG_ON(!dev->read);
 			rbi = dev->read;
 			dev->read = NULL;
-			while (rbi && rbi->bi_sector <
+			while (rbi && rbi->bi_iter.bi_sector <
 				dev->sector + STRIPE_SECTORS) {
 				rbi2 = r5_next_bio(rbi, dev->sector);
 				if (!raid5_dec_bi_active_stripes(rbi)) {
@@ -1048,7 +1048,7 @@
 			dev->read = rbi = dev->toread;
 			dev->toread = NULL;
 			spin_unlock_irq(&sh->stripe_lock);
-			while (rbi && rbi->bi_sector <
+			while (rbi && rbi->bi_iter.bi_sector <
 				dev->sector + STRIPE_SECTORS) {
 				tx = async_copy_data(0, rbi, dev->page,
 					dev->sector, tx);
@@ -1390,7 +1390,7 @@
 			wbi = dev->written = chosen;
 			spin_unlock_irq(&sh->stripe_lock);
 
-			while (wbi && wbi->bi_sector <
+			while (wbi && wbi->bi_iter.bi_sector <
 				dev->sector + STRIPE_SECTORS) {
 				if (wbi->bi_rw & REQ_FUA)
 					set_bit(R5_WantFUA, &dev->flags);
@@ -2615,7 +2615,7 @@
 	int firstwrite=0;
 
 	pr_debug("adding bi b#%llu to stripe s#%llu\n",
-		(unsigned long long)bi->bi_sector,
+		(unsigned long long)bi->bi_iter.bi_sector,
 		(unsigned long long)sh->sector);
 
 	/*
@@ -2633,12 +2633,12 @@
 			firstwrite = 1;
 	} else
 		bip = &sh->dev[dd_idx].toread;
-	while (*bip && (*bip)->bi_sector < bi->bi_sector) {
-		if (bio_end_sector(*bip) > bi->bi_sector)
+	while (*bip && (*bip)->bi_iter.bi_sector < bi->bi_iter.bi_sector) {
+		if (bio_end_sector(*bip) > bi->bi_iter.bi_sector)
 			goto overlap;
 		bip = & (*bip)->bi_next;
 	}
-	if (*bip && (*bip)->bi_sector < bio_end_sector(bi))
+	if (*bip && (*bip)->bi_iter.bi_sector < bio_end_sector(bi))
 		goto overlap;
 
 	BUG_ON(*bip && bi->bi_next && (*bip) != bi->bi_next);
@@ -2652,7 +2652,7 @@
 		sector_t sector = sh->dev[dd_idx].sector;
 		for (bi=sh->dev[dd_idx].towrite;
 		     sector < sh->dev[dd_idx].sector + STRIPE_SECTORS &&
-			     bi && bi->bi_sector <= sector;
+			     bi && bi->bi_iter.bi_sector <= sector;
 		     bi = r5_next_bio(bi, sh->dev[dd_idx].sector)) {
 			if (bio_end_sector(bi) >= sector)
 				sector = bio_end_sector(bi);
@@ -2662,7 +2662,7 @@
 	}
 
 	pr_debug("added bi b#%llu to stripe s#%llu, disk %d.\n",
-		(unsigned long long)(*bip)->bi_sector,
+		(unsigned long long)(*bip)->bi_iter.bi_sector,
 		(unsigned long long)sh->sector, dd_idx);
 	spin_unlock_irq(&sh->stripe_lock);
 
@@ -2737,7 +2737,7 @@
 		if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
 			wake_up(&conf->wait_for_overlap);
 
-		while (bi && bi->bi_sector <
+		while (bi && bi->bi_iter.bi_sector <
 			sh->dev[i].sector + STRIPE_SECTORS) {
 			struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector);
 			clear_bit(BIO_UPTODATE, &bi->bi_flags);
@@ -2756,7 +2756,7 @@
 		bi = sh->dev[i].written;
 		sh->dev[i].written = NULL;
 		if (bi) bitmap_end = 1;
-		while (bi && bi->bi_sector <
+		while (bi && bi->bi_iter.bi_sector <
 		       sh->dev[i].sector + STRIPE_SECTORS) {
 			struct bio *bi2 = r5_next_bio(bi, sh->dev[i].sector);
 			clear_bit(BIO_UPTODATE, &bi->bi_flags);
@@ -2780,7 +2780,7 @@
 			spin_unlock_irq(&sh->stripe_lock);
 			if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
 				wake_up(&conf->wait_for_overlap);
-			while (bi && bi->bi_sector <
+			while (bi && bi->bi_iter.bi_sector <
 			       sh->dev[i].sector + STRIPE_SECTORS) {
 				struct bio *nextbi =
 					r5_next_bio(bi, sh->dev[i].sector);
@@ -3004,7 +3004,7 @@
 					clear_bit(R5_UPTODATE, &dev->flags);
 				wbi = dev->written;
 				dev->written = NULL;
-				while (wbi && wbi->bi_sector <
+				while (wbi && wbi->bi_iter.bi_sector <
 					dev->sector + STRIPE_SECTORS) {
 					wbi2 = r5_next_bio(wbi, dev->sector);
 					if (!raid5_dec_bi_active_stripes(wbi)) {
@@ -4096,7 +4096,7 @@
 
 static int in_chunk_boundary(struct mddev *mddev, struct bio *bio)
 {
-	sector_t sector = bio->bi_sector + get_start_sect(bio->bi_bdev);
+	sector_t sector = bio->bi_iter.bi_sector + get_start_sect(bio->bi_bdev);
 	unsigned int chunk_sectors = mddev->chunk_sectors;
 	unsigned int bio_sectors = bio_sectors(bio);
 
@@ -4233,9 +4233,9 @@
 	/*
 	 *	compute position
 	 */
-	align_bi->bi_sector =  raid5_compute_sector(conf, raid_bio->bi_sector,
-						    0,
-						    &dd_idx, NULL);
+	align_bi->bi_iter.bi_sector =
+		raid5_compute_sector(conf, raid_bio->bi_iter.bi_sector,
+				     0, &dd_idx, NULL);
 
 	end_sector = bio_end_sector(align_bi);
 	rcu_read_lock();
@@ -4260,7 +4260,8 @@
 		align_bi->bi_flags &= ~(1 << BIO_SEG_VALID);
 
 		if (!bio_fits_rdev(align_bi) ||
-		    is_badblock(rdev, align_bi->bi_sector, bio_sectors(align_bi),
+		    is_badblock(rdev, align_bi->bi_iter.bi_sector,
+				bio_sectors(align_bi),
 				&first_bad, &bad_sectors)) {
 			/* too big in some way, or has a known bad block */
 			bio_put(align_bi);
@@ -4269,7 +4270,7 @@
 		}
 
 		/* No reshape active, so we can trust rdev->data_offset */
-		align_bi->bi_sector += rdev->data_offset;
+		align_bi->bi_iter.bi_sector += rdev->data_offset;
 
 		spin_lock_irq(&conf->device_lock);
 		wait_event_lock_irq(conf->wait_for_stripe,
@@ -4281,7 +4282,7 @@
 		if (mddev->gendisk)
 			trace_block_bio_remap(bdev_get_queue(align_bi->bi_bdev),
 					      align_bi, disk_devt(mddev->gendisk),
-					      raid_bio->bi_sector);
+					      raid_bio->bi_iter.bi_sector);
 		generic_make_request(align_bi);
 		return 1;
 	} else {
@@ -4464,8 +4465,8 @@
 		/* Skip discard while reshape is happening */
 		return;
 
-	logical_sector = bi->bi_sector & ~((sector_t)STRIPE_SECTORS-1);
-	last_sector = bi->bi_sector + (bi->bi_size>>9);
+	logical_sector = bi->bi_iter.bi_sector & ~((sector_t)STRIPE_SECTORS-1);
+	last_sector = bi->bi_iter.bi_sector + (bi->bi_iter.bi_size>>9);
 
 	bi->bi_next = NULL;
 	bi->bi_phys_segments = 1; /* over-loaded to count active stripes */
@@ -4569,7 +4570,7 @@
 		return;
 	}
 
-	logical_sector = bi->bi_sector & ~((sector_t)STRIPE_SECTORS-1);
+	logical_sector = bi->bi_iter.bi_sector & ~((sector_t)STRIPE_SECTORS-1);
 	last_sector = bio_end_sector(bi);
 	bi->bi_next = NULL;
 	bi->bi_phys_segments = 1;	/* over-loaded to count active stripes */
@@ -5053,7 +5054,8 @@
 	int remaining;
 	int handled = 0;
 
-	logical_sector = raid_bio->bi_sector & ~((sector_t)STRIPE_SECTORS-1);
+	logical_sector = raid_bio->bi_iter.bi_sector &
+		~((sector_t)STRIPE_SECTORS-1);
 	sector = raid5_compute_sector(conf, logical_sector,
 				      0, &dd_idx, NULL);
 	last_sector = bio_end_sector(raid_bio);
@@ -6101,6 +6103,7 @@
 		blk_queue_io_min(mddev->queue, chunk_size);
 		blk_queue_io_opt(mddev->queue, chunk_size *
 				 (conf->raid_disks - conf->max_degraded));
+		mddev->queue->limits.raid_partial_stripes_expensive = 1;
 		/*
 		 * We can only discard a whole stripe. It doesn't make sense to
 		 * discard data disk but write parity disk
diff --git a/drivers/media/platform/fsl-viu.c b/drivers/media/platform/fsl-viu.c
index 6a23223..dbf0ce3 100644
--- a/drivers/media/platform/fsl-viu.c
+++ b/drivers/media/platform/fsl-viu.c
@@ -1580,7 +1580,7 @@
 	}
 
 	/* enable VIU clock */
-	clk = devm_clk_get(&op->dev, "viu_clk");
+	clk = devm_clk_get(&op->dev, "ipg");
 	if (IS_ERR(clk)) {
 		dev_err(&op->dev, "failed to lookup the clock!\n");
 		ret = PTR_ERR(clk);
diff --git a/drivers/message/fusion/mptsas.c b/drivers/message/fusion/mptsas.c
index dd239bd..00d339c 100644
--- a/drivers/message/fusion/mptsas.c
+++ b/drivers/message/fusion/mptsas.c
@@ -2235,10 +2235,10 @@
 	}
 
 	/* do we need to support multiple segments? */
-	if (bio_segments(req->bio) > 1 || bio_segments(rsp->bio) > 1) {
-		printk(MYIOC_s_ERR_FMT "%s: multiple segments req %u %u, rsp %u %u\n",
-		    ioc->name, __func__, bio_segments(req->bio), blk_rq_bytes(req),
-		    bio_segments(rsp->bio), blk_rq_bytes(rsp));
+	if (bio_multiple_segments(req->bio) ||
+	    bio_multiple_segments(rsp->bio)) {
+		printk(MYIOC_s_ERR_FMT "%s: multiple segments req %u, rsp %u\n",
+		    ioc->name, __func__, blk_rq_bytes(req), blk_rq_bytes(rsp));
 		return -EINVAL;
 	}
 
diff --git a/drivers/mtd/nand/mpc5121_nfc.c b/drivers/mtd/nand/mpc5121_nfc.c
index 61e2abc..31ee7cf 100644
--- a/drivers/mtd/nand/mpc5121_nfc.c
+++ b/drivers/mtd/nand/mpc5121_nfc.c
@@ -729,7 +729,7 @@
 	of_node_put(rootnode);
 
 	/* Enable NFC clock */
-	clk = devm_clk_get(dev, "nfc_clk");
+	clk = devm_clk_get(dev, "ipg");
 	if (IS_ERR(clk)) {
 		dev_err(dev, "Unable to acquire NFC clock!\n");
 		retval = PTR_ERR(clk);
diff --git a/drivers/net/can/mscan/mpc5xxx_can.c b/drivers/net/can/mscan/mpc5xxx_can.c
index 035e235..4472529 100644
--- a/drivers/net/can/mscan/mpc5xxx_can.c
+++ b/drivers/net/can/mscan/mpc5xxx_can.c
@@ -108,135 +108,170 @@
 #endif /* CONFIG_PPC_MPC52xx */
 
 #ifdef CONFIG_PPC_MPC512x
-struct mpc512x_clockctl {
-	u32 spmr;		/* System PLL Mode Reg */
-	u32 sccr[2];		/* System Clk Ctrl Reg 1 & 2 */
-	u32 scfr1;		/* System Clk Freq Reg 1 */
-	u32 scfr2;		/* System Clk Freq Reg 2 */
-	u32 reserved;
-	u32 bcr;		/* Bread Crumb Reg */
-	u32 pccr[12];		/* PSC Clk Ctrl Reg 0-11 */
-	u32 spccr;		/* SPDIF Clk Ctrl Reg */
-	u32 cccr;		/* CFM Clk Ctrl Reg */
-	u32 dccr;		/* DIU Clk Cnfg Reg */
-	u32 mccr[4];		/* MSCAN Clk Ctrl Reg 1-3 */
-};
-
-static struct of_device_id mpc512x_clock_ids[] = {
-	{ .compatible = "fsl,mpc5121-clock", },
-	{}
-};
-
 static u32 mpc512x_can_get_clock(struct platform_device *ofdev,
-				 const char *clock_name, int *mscan_clksrc)
+				 const char *clock_source, int *mscan_clksrc)
 {
-	struct mpc512x_clockctl __iomem *clockctl;
-	struct device_node *np_clock;
-	struct clk *sys_clk, *ref_clk;
-	int plen, clockidx, clocksrc = -1;
-	u32 sys_freq, val, clockdiv = 1, freq = 0;
-	const u32 *pval;
+	struct device_node *np;
+	u32 clockdiv;
+	enum {
+		CLK_FROM_AUTO,
+		CLK_FROM_IPS,
+		CLK_FROM_SYS,
+		CLK_FROM_REF,
+	} clk_from;
+	struct clk *clk_in, *clk_can;
+	unsigned long freq_calc;
+	struct mscan_priv *priv;
+	struct clk *clk_ipg;
 
-	np_clock = of_find_matching_node(NULL, mpc512x_clock_ids);
-	if (!np_clock) {
-		dev_err(&ofdev->dev, "couldn't find clock node\n");
-		return 0;
+	/* the caller passed in the clock source spec that was read from
+	 * the device tree, get the optional clock divider as well
+	 */
+	np = ofdev->dev.of_node;
+	clockdiv = 1;
+	of_property_read_u32(np, "fsl,mscan-clock-divider", &clockdiv);
+	dev_dbg(&ofdev->dev, "device tree specs: clk src[%s] div[%d]\n",
+		clock_source ? clock_source : "<NULL>", clockdiv);
+
+	/* when clock-source is 'ip', the CANCTL1[CLKSRC] bit needs to
+	 * get set, and the 'ips' clock is the input to the MSCAN
+	 * component
+	 *
+	 * for clock-source values of 'ref' or 'sys' the CANCTL1[CLKSRC]
+	 * bit needs to get cleared, an optional clock-divider may have
+	 * been specified (the default value is 1), the appropriate
+	 * MSCAN related MCLK is the input to the MSCAN component
+	 *
+	 * in the absence of a clock-source spec, first an optimal clock
+	 * gets determined based on the 'sys' clock, if that fails the
+	 * 'ref' clock is used
+	 */
+	clk_from = CLK_FROM_AUTO;
+	if (clock_source) {
+		/* interpret the device tree's spec for the clock source */
+		if (!strcmp(clock_source, "ip"))
+			clk_from = CLK_FROM_IPS;
+		else if (!strcmp(clock_source, "sys"))
+			clk_from = CLK_FROM_SYS;
+		else if (!strcmp(clock_source, "ref"))
+			clk_from = CLK_FROM_REF;
+		else
+			goto err_invalid;
+		dev_dbg(&ofdev->dev, "got a clk source spec[%d]\n", clk_from);
 	}
-	clockctl = of_iomap(np_clock, 0);
-	if (!clockctl) {
-		dev_err(&ofdev->dev, "couldn't map clock registers\n");
-		goto exit_put;
+	if (clk_from == CLK_FROM_AUTO) {
+		/* no spec so far, try the 'sys' clock; round to the
+		 * next MHz and see if we can get a multiple of 16MHz
+		 */
+		dev_dbg(&ofdev->dev, "no clk source spec, trying SYS\n");
+		clk_in = devm_clk_get(&ofdev->dev, "sys");
+		if (IS_ERR(clk_in))
+			goto err_notavail;
+		freq_calc = clk_get_rate(clk_in);
+		freq_calc +=  499999;
+		freq_calc /= 1000000;
+		freq_calc *= 1000000;
+		if ((freq_calc % 16000000) == 0) {
+			clk_from = CLK_FROM_SYS;
+			clockdiv = freq_calc / 16000000;
+			dev_dbg(&ofdev->dev,
+				"clk fit, sys[%lu] div[%d] freq[%lu]\n",
+				freq_calc, clockdiv, freq_calc / clockdiv);
+		}
+	}
+	if (clk_from == CLK_FROM_AUTO) {
+		/* no spec so far, use the 'ref' clock */
+		dev_dbg(&ofdev->dev, "no clk source spec, trying REF\n");
+		clk_in = devm_clk_get(&ofdev->dev, "ref");
+		if (IS_ERR(clk_in))
+			goto err_notavail;
+		clk_from = CLK_FROM_REF;
+		freq_calc = clk_get_rate(clk_in);
+		dev_dbg(&ofdev->dev,
+			"clk fit, ref[%lu] (no div) freq[%lu]\n",
+			freq_calc, freq_calc);
 	}
 
-	/* Determine the MSCAN device index from the peripheral's
-	 * physical address. Register address offsets against the
-	 * IMMR base are:  0x1300, 0x1380, 0x2300, 0x2380
+	/* select IPS or MCLK as the MSCAN input (returned to the caller),
+	 * setup the MCLK mux source and rate if applicable, apply the
+	 * optionally specified or derived above divider, and determine
+	 * the actual resulting clock rate to return to the caller
 	 */
-	pval = of_get_property(ofdev->dev.of_node, "reg", &plen);
-	BUG_ON(!pval || plen < sizeof(*pval));
-	clockidx = (*pval & 0x80) ? 1 : 0;
-	if (*pval & 0x2000)
-		clockidx += 2;
-
-	/*
-	 * Clock source and divider selection: 3 different clock sources
-	 * can be selected: "ip", "ref" or "sys". For the latter two, a
-	 * clock divider can be defined as well. If the clock source is
-	 * not specified by the device tree, we first try to find an
-	 * optimal CAN source clock based on the system clock. If that
-	 * is not posslible, the reference clock will be used.
-	 */
-	if (clock_name && !strcmp(clock_name, "ip")) {
+	switch (clk_from) {
+	case CLK_FROM_IPS:
+		clk_can = devm_clk_get(&ofdev->dev, "ips");
+		if (IS_ERR(clk_can))
+			goto err_notavail;
+		priv = netdev_priv(dev_get_drvdata(&ofdev->dev));
+		priv->clk_can = clk_can;
+		freq_calc = clk_get_rate(clk_can);
 		*mscan_clksrc = MSCAN_CLKSRC_IPS;
-		freq = mpc5xxx_get_bus_frequency(ofdev->dev.of_node);
-	} else {
+		dev_dbg(&ofdev->dev, "clk from IPS, clksrc[%d] freq[%lu]\n",
+			*mscan_clksrc, freq_calc);
+		break;
+	case CLK_FROM_SYS:
+	case CLK_FROM_REF:
+		clk_can = devm_clk_get(&ofdev->dev, "mclk");
+		if (IS_ERR(clk_can))
+			goto err_notavail;
+		priv = netdev_priv(dev_get_drvdata(&ofdev->dev));
+		priv->clk_can = clk_can;
+		if (clk_from == CLK_FROM_SYS)
+			clk_in = devm_clk_get(&ofdev->dev, "sys");
+		if (clk_from == CLK_FROM_REF)
+			clk_in = devm_clk_get(&ofdev->dev, "ref");
+		if (IS_ERR(clk_in))
+			goto err_notavail;
+		clk_set_parent(clk_can, clk_in);
+		freq_calc = clk_get_rate(clk_in);
+		freq_calc /= clockdiv;
+		clk_set_rate(clk_can, freq_calc);
+		freq_calc = clk_get_rate(clk_can);
 		*mscan_clksrc = MSCAN_CLKSRC_BUS;
-
-		pval = of_get_property(ofdev->dev.of_node,
-				       "fsl,mscan-clock-divider", &plen);
-		if (pval && plen == sizeof(*pval))
-			clockdiv = *pval;
-		if (!clockdiv)
-			clockdiv = 1;
-
-		if (!clock_name || !strcmp(clock_name, "sys")) {
-			sys_clk = devm_clk_get(&ofdev->dev, "sys_clk");
-			if (IS_ERR(sys_clk)) {
-				dev_err(&ofdev->dev, "couldn't get sys_clk\n");
-				goto exit_unmap;
-			}
-			/* Get and round up/down sys clock rate */
-			sys_freq = 1000000 *
-				((clk_get_rate(sys_clk) + 499999) / 1000000);
-
-			if (!clock_name) {
-				/* A multiple of 16 MHz would be optimal */
-				if ((sys_freq % 16000000) == 0) {
-					clocksrc = 0;
-					clockdiv = sys_freq / 16000000;
-					freq = sys_freq / clockdiv;
-				}
-			} else {
-				clocksrc = 0;
-				freq = sys_freq / clockdiv;
-			}
-		}
-
-		if (clocksrc < 0) {
-			ref_clk = devm_clk_get(&ofdev->dev, "ref_clk");
-			if (IS_ERR(ref_clk)) {
-				dev_err(&ofdev->dev, "couldn't get ref_clk\n");
-				goto exit_unmap;
-			}
-			clocksrc = 1;
-			freq = clk_get_rate(ref_clk) / clockdiv;
-		}
+		dev_dbg(&ofdev->dev, "clk from MCLK, clksrc[%d] freq[%lu]\n",
+			*mscan_clksrc, freq_calc);
+		break;
+	default:
+		goto err_invalid;
 	}
 
-	/* Disable clock */
-	out_be32(&clockctl->mccr[clockidx], 0x0);
-	if (clocksrc >= 0) {
-		/* Set source and divider */
-		val = (clocksrc << 14) | ((clockdiv - 1) << 17);
-		out_be32(&clockctl->mccr[clockidx], val);
-		/* Enable clock */
-		out_be32(&clockctl->mccr[clockidx], val | 0x10000);
-	}
+	/* the above clk_can item is used for the bitrate, access to
+	 * the peripheral's register set needs the clk_ipg item
+	 */
+	clk_ipg = devm_clk_get(&ofdev->dev, "ipg");
+	if (IS_ERR(clk_ipg))
+		goto err_notavail_ipg;
+	if (clk_prepare_enable(clk_ipg))
+		goto err_notavail_ipg;
+	priv = netdev_priv(dev_get_drvdata(&ofdev->dev));
+	priv->clk_ipg = clk_ipg;
 
-	/* Enable MSCAN clock domain */
-	val = in_be32(&clockctl->sccr[1]);
-	if (!(val & (1 << 25)))
-		out_be32(&clockctl->sccr[1], val | (1 << 25));
+	/* return the determined clock source rate */
+	return freq_calc;
 
-	dev_dbg(&ofdev->dev, "using '%s' with frequency divider %d\n",
-		*mscan_clksrc == MSCAN_CLKSRC_IPS ? "ips_clk" :
-		clocksrc == 1 ? "ref_clk" : "sys_clk", clockdiv);
+err_invalid:
+	dev_err(&ofdev->dev, "invalid clock source specification\n");
+	/* clock source rate could not get determined */
+	return 0;
 
-exit_unmap:
-	iounmap(clockctl);
-exit_put:
-	of_node_put(np_clock);
-	return freq;
+err_notavail:
+	dev_err(&ofdev->dev, "cannot acquire or setup bitrate clock source\n");
+	/* clock source rate could not get determined */
+	return 0;
+
+err_notavail_ipg:
+	dev_err(&ofdev->dev, "cannot acquire or setup register clock\n");
+	/* clock source rate could not get determined */
+	return 0;
+}
+
+static void mpc512x_can_put_clock(struct platform_device *ofdev)
+{
+	struct mscan_priv *priv;
+
+	priv = netdev_priv(dev_get_drvdata(&ofdev->dev));
+	if (priv->clk_ipg)
+		clk_disable_unprepare(priv->clk_ipg);
 }
 #else /* !CONFIG_PPC_MPC512x */
 static u32 mpc512x_can_get_clock(struct platform_device *ofdev,
@@ -244,6 +279,7 @@
 {
 	return 0;
 }
+#define mpc512x_can_put_clock NULL
 #endif /* CONFIG_PPC_MPC512x */
 
 static const struct of_device_id mpc5xxx_can_table[];
@@ -385,11 +421,13 @@
 static const struct mpc5xxx_can_data mpc5200_can_data = {
 	.type = MSCAN_TYPE_MPC5200,
 	.get_clock = mpc52xx_can_get_clock,
+	/* .put_clock not applicable */
 };
 
 static const struct mpc5xxx_can_data mpc5121_can_data = {
 	.type = MSCAN_TYPE_MPC5121,
 	.get_clock = mpc512x_can_get_clock,
+	.put_clock = mpc512x_can_put_clock,
 };
 
 static const struct of_device_id mpc5xxx_can_table[] = {
diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c
index 930694d..71e4900 100644
--- a/drivers/net/phy/mdio_bus.c
+++ b/drivers/net/phy/mdio_bus.c
@@ -150,6 +150,7 @@
 	err = device_register(&bus->dev);
 	if (err) {
 		pr_err("mii_bus %s failed to register\n", bus->id);
+		put_device(&bus->dev);
 		return -EINVAL;
 	}
 
diff --git a/drivers/s390/block/dasd_diag.c b/drivers/s390/block/dasd_diag.c
index 92bd22c..9cbc567 100644
--- a/drivers/s390/block/dasd_diag.c
+++ b/drivers/s390/block/dasd_diag.c
@@ -504,7 +504,7 @@
 	struct dasd_diag_req *dreq;
 	struct dasd_diag_bio *dbio;
 	struct req_iterator iter;
-	struct bio_vec *bv;
+	struct bio_vec bv;
 	char *dst;
 	unsigned int count, datasize;
 	sector_t recid, first_rec, last_rec;
@@ -525,10 +525,10 @@
 	/* Check struct bio and count the number of blocks for the request. */
 	count = 0;
 	rq_for_each_segment(bv, req, iter) {
-		if (bv->bv_len & (blksize - 1))
+		if (bv.bv_len & (blksize - 1))
 			/* Fba can only do full blocks. */
 			return ERR_PTR(-EINVAL);
-		count += bv->bv_len >> (block->s2b_shift + 9);
+		count += bv.bv_len >> (block->s2b_shift + 9);
 	}
 	/* Paranoia. */
 	if (count != last_rec - first_rec + 1)
@@ -545,8 +545,8 @@
 	dbio = dreq->bio;
 	recid = first_rec;
 	rq_for_each_segment(bv, req, iter) {
-		dst = page_address(bv->bv_page) + bv->bv_offset;
-		for (off = 0; off < bv->bv_len; off += blksize) {
+		dst = page_address(bv.bv_page) + bv.bv_offset;
+		for (off = 0; off < bv.bv_len; off += blksize) {
 			memset(dbio, 0, sizeof (struct dasd_diag_bio));
 			dbio->type = rw_cmd;
 			dbio->block_number = recid + 1;
diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c
index 95e4578..2e8e075 100644
--- a/drivers/s390/block/dasd_eckd.c
+++ b/drivers/s390/block/dasd_eckd.c
@@ -2551,7 +2551,7 @@
 	struct dasd_ccw_req *cqr;
 	struct ccw1 *ccw;
 	struct req_iterator iter;
-	struct bio_vec *bv;
+	struct bio_vec bv;
 	char *dst;
 	unsigned int off;
 	int count, cidaw, cplength, datasize;
@@ -2573,13 +2573,13 @@
 	count = 0;
 	cidaw = 0;
 	rq_for_each_segment(bv, req, iter) {
-		if (bv->bv_len & (blksize - 1))
+		if (bv.bv_len & (blksize - 1))
 			/* Eckd can only do full blocks. */
 			return ERR_PTR(-EINVAL);
-		count += bv->bv_len >> (block->s2b_shift + 9);
+		count += bv.bv_len >> (block->s2b_shift + 9);
 #if defined(CONFIG_64BIT)
-		if (idal_is_needed (page_address(bv->bv_page), bv->bv_len))
-			cidaw += bv->bv_len >> (block->s2b_shift + 9);
+		if (idal_is_needed (page_address(bv.bv_page), bv.bv_len))
+			cidaw += bv.bv_len >> (block->s2b_shift + 9);
 #endif
 	}
 	/* Paranoia. */
@@ -2650,16 +2650,16 @@
 			      last_rec - recid + 1, cmd, basedev, blksize);
 	}
 	rq_for_each_segment(bv, req, iter) {
-		dst = page_address(bv->bv_page) + bv->bv_offset;
+		dst = page_address(bv.bv_page) + bv.bv_offset;
 		if (dasd_page_cache) {
 			char *copy = kmem_cache_alloc(dasd_page_cache,
 						      GFP_DMA | __GFP_NOWARN);
 			if (copy && rq_data_dir(req) == WRITE)
-				memcpy(copy + bv->bv_offset, dst, bv->bv_len);
+				memcpy(copy + bv.bv_offset, dst, bv.bv_len);
 			if (copy)
-				dst = copy + bv->bv_offset;
+				dst = copy + bv.bv_offset;
 		}
-		for (off = 0; off < bv->bv_len; off += blksize) {
+		for (off = 0; off < bv.bv_len; off += blksize) {
 			sector_t trkid = recid;
 			unsigned int recoffs = sector_div(trkid, blk_per_trk);
 			rcmd = cmd;
@@ -2735,7 +2735,7 @@
 	struct dasd_ccw_req *cqr;
 	struct ccw1 *ccw;
 	struct req_iterator iter;
-	struct bio_vec *bv;
+	struct bio_vec bv;
 	char *dst, *idaw_dst;
 	unsigned int cidaw, cplength, datasize;
 	unsigned int tlf;
@@ -2813,8 +2813,8 @@
 	idaw_dst = NULL;
 	idaw_len = 0;
 	rq_for_each_segment(bv, req, iter) {
-		dst = page_address(bv->bv_page) + bv->bv_offset;
-		seg_len = bv->bv_len;
+		dst = page_address(bv.bv_page) + bv.bv_offset;
+		seg_len = bv.bv_len;
 		while (seg_len) {
 			if (new_track) {
 				trkid = recid;
@@ -3039,7 +3039,7 @@
 {
 	struct dasd_ccw_req *cqr;
 	struct req_iterator iter;
-	struct bio_vec *bv;
+	struct bio_vec bv;
 	char *dst;
 	unsigned int trkcount, ctidaw;
 	unsigned char cmd;
@@ -3125,8 +3125,8 @@
 		new_track = 1;
 		recid = first_rec;
 		rq_for_each_segment(bv, req, iter) {
-			dst = page_address(bv->bv_page) + bv->bv_offset;
-			seg_len = bv->bv_len;
+			dst = page_address(bv.bv_page) + bv.bv_offset;
+			seg_len = bv.bv_len;
 			while (seg_len) {
 				if (new_track) {
 					trkid = recid;
@@ -3158,9 +3158,9 @@
 		}
 	} else {
 		rq_for_each_segment(bv, req, iter) {
-			dst = page_address(bv->bv_page) + bv->bv_offset;
+			dst = page_address(bv.bv_page) + bv.bv_offset;
 			last_tidaw = itcw_add_tidaw(itcw, 0x00,
-						    dst, bv->bv_len);
+						    dst, bv.bv_len);
 			if (IS_ERR(last_tidaw)) {
 				ret = -EINVAL;
 				goto out_error;
@@ -3278,7 +3278,7 @@
 	struct dasd_ccw_req *cqr;
 	struct ccw1 *ccw;
 	struct req_iterator iter;
-	struct bio_vec *bv;
+	struct bio_vec bv;
 	char *dst;
 	unsigned char cmd;
 	unsigned int trkcount;
@@ -3378,8 +3378,8 @@
 			idaws = idal_create_words(idaws, rawpadpage, PAGE_SIZE);
 	}
 	rq_for_each_segment(bv, req, iter) {
-		dst = page_address(bv->bv_page) + bv->bv_offset;
-		seg_len = bv->bv_len;
+		dst = page_address(bv.bv_page) + bv.bv_offset;
+		seg_len = bv.bv_len;
 		if (cmd == DASD_ECKD_CCW_READ_TRACK)
 			memset(dst, 0, seg_len);
 		if (!len_to_track_end) {
@@ -3424,7 +3424,7 @@
 	struct dasd_eckd_private *private;
 	struct ccw1 *ccw;
 	struct req_iterator iter;
-	struct bio_vec *bv;
+	struct bio_vec bv;
 	char *dst, *cda;
 	unsigned int blksize, blk_per_trk, off;
 	sector_t recid;
@@ -3442,8 +3442,8 @@
 	if (private->uses_cdl == 0 || recid > 2*blk_per_trk)
 		ccw++;
 	rq_for_each_segment(bv, req, iter) {
-		dst = page_address(bv->bv_page) + bv->bv_offset;
-		for (off = 0; off < bv->bv_len; off += blksize) {
+		dst = page_address(bv.bv_page) + bv.bv_offset;
+		for (off = 0; off < bv.bv_len; off += blksize) {
 			/* Skip locate record. */
 			if (private->uses_cdl && recid <= 2*blk_per_trk)
 				ccw++;
@@ -3454,7 +3454,7 @@
 					cda = (char *)((addr_t) ccw->cda);
 				if (dst != cda) {
 					if (rq_data_dir(req) == READ)
-						memcpy(dst, cda, bv->bv_len);
+						memcpy(dst, cda, bv.bv_len);
 					kmem_cache_free(dasd_page_cache,
 					    (void *)((addr_t)cda & PAGE_MASK));
 				}
diff --git a/drivers/s390/block/dasd_fba.c b/drivers/s390/block/dasd_fba.c
index 9cbc8c3..2c8e68b 100644
--- a/drivers/s390/block/dasd_fba.c
+++ b/drivers/s390/block/dasd_fba.c
@@ -260,7 +260,7 @@
 	struct dasd_ccw_req *cqr;
 	struct ccw1 *ccw;
 	struct req_iterator iter;
-	struct bio_vec *bv;
+	struct bio_vec bv;
 	char *dst;
 	int count, cidaw, cplength, datasize;
 	sector_t recid, first_rec, last_rec;
@@ -283,13 +283,13 @@
 	count = 0;
 	cidaw = 0;
 	rq_for_each_segment(bv, req, iter) {
-		if (bv->bv_len & (blksize - 1))
+		if (bv.bv_len & (blksize - 1))
 			/* Fba can only do full blocks. */
 			return ERR_PTR(-EINVAL);
-		count += bv->bv_len >> (block->s2b_shift + 9);
+		count += bv.bv_len >> (block->s2b_shift + 9);
 #if defined(CONFIG_64BIT)
-		if (idal_is_needed (page_address(bv->bv_page), bv->bv_len))
-			cidaw += bv->bv_len / blksize;
+		if (idal_is_needed (page_address(bv.bv_page), bv.bv_len))
+			cidaw += bv.bv_len / blksize;
 #endif
 	}
 	/* Paranoia. */
@@ -326,16 +326,16 @@
 	}
 	recid = first_rec;
 	rq_for_each_segment(bv, req, iter) {
-		dst = page_address(bv->bv_page) + bv->bv_offset;
+		dst = page_address(bv.bv_page) + bv.bv_offset;
 		if (dasd_page_cache) {
 			char *copy = kmem_cache_alloc(dasd_page_cache,
 						      GFP_DMA | __GFP_NOWARN);
 			if (copy && rq_data_dir(req) == WRITE)
-				memcpy(copy + bv->bv_offset, dst, bv->bv_len);
+				memcpy(copy + bv.bv_offset, dst, bv.bv_len);
 			if (copy)
-				dst = copy + bv->bv_offset;
+				dst = copy + bv.bv_offset;
 		}
-		for (off = 0; off < bv->bv_len; off += blksize) {
+		for (off = 0; off < bv.bv_len; off += blksize) {
 			/* Locate record for stupid devices. */
 			if (private->rdc_data.mode.bits.data_chain == 0) {
 				ccw[-1].flags |= CCW_FLAG_CC;
@@ -384,7 +384,7 @@
 	struct dasd_fba_private *private;
 	struct ccw1 *ccw;
 	struct req_iterator iter;
-	struct bio_vec *bv;
+	struct bio_vec bv;
 	char *dst, *cda;
 	unsigned int blksize, off;
 	int status;
@@ -399,8 +399,8 @@
 	if (private->rdc_data.mode.bits.data_chain != 0)
 		ccw++;
 	rq_for_each_segment(bv, req, iter) {
-		dst = page_address(bv->bv_page) + bv->bv_offset;
-		for (off = 0; off < bv->bv_len; off += blksize) {
+		dst = page_address(bv.bv_page) + bv.bv_offset;
+		for (off = 0; off < bv.bv_len; off += blksize) {
 			/* Skip locate record. */
 			if (private->rdc_data.mode.bits.data_chain == 0)
 				ccw++;
@@ -411,7 +411,7 @@
 					cda = (char *)((addr_t) ccw->cda);
 				if (dst != cda) {
 					if (rq_data_dir(req) == READ)
-						memcpy(dst, cda, bv->bv_len);
+						memcpy(dst, cda, bv.bv_len);
 					kmem_cache_free(dasd_page_cache,
 					    (void *)((addr_t)cda & PAGE_MASK));
 				}
diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c
index 6eca019..ebf41e2 100644
--- a/drivers/s390/block/dcssblk.c
+++ b/drivers/s390/block/dcssblk.c
@@ -808,18 +808,19 @@
 dcssblk_make_request(struct request_queue *q, struct bio *bio)
 {
 	struct dcssblk_dev_info *dev_info;
-	struct bio_vec *bvec;
+	struct bio_vec bvec;
+	struct bvec_iter iter;
 	unsigned long index;
 	unsigned long page_addr;
 	unsigned long source_addr;
 	unsigned long bytes_done;
-	int i;
 
 	bytes_done = 0;
 	dev_info = bio->bi_bdev->bd_disk->private_data;
 	if (dev_info == NULL)
 		goto fail;
-	if ((bio->bi_sector & 7) != 0 || (bio->bi_size & 4095) != 0)
+	if ((bio->bi_iter.bi_sector & 7) != 0 ||
+	    (bio->bi_iter.bi_size & 4095) != 0)
 		/* Request is not page-aligned. */
 		goto fail;
 	if (bio_end_sector(bio) > get_capacity(bio->bi_bdev->bd_disk)) {
@@ -842,22 +843,22 @@
 		}
 	}
 
-	index = (bio->bi_sector >> 3);
-	bio_for_each_segment(bvec, bio, i) {
+	index = (bio->bi_iter.bi_sector >> 3);
+	bio_for_each_segment(bvec, bio, iter) {
 		page_addr = (unsigned long)
-			page_address(bvec->bv_page) + bvec->bv_offset;
+			page_address(bvec.bv_page) + bvec.bv_offset;
 		source_addr = dev_info->start + (index<<12) + bytes_done;
-		if (unlikely((page_addr & 4095) != 0) || (bvec->bv_len & 4095) != 0)
+		if (unlikely((page_addr & 4095) != 0) || (bvec.bv_len & 4095) != 0)
 			// More paranoia.
 			goto fail;
 		if (bio_data_dir(bio) == READ) {
 			memcpy((void*)page_addr, (void*)source_addr,
-				bvec->bv_len);
+				bvec.bv_len);
 		} else {
 			memcpy((void*)source_addr, (void*)page_addr,
-				bvec->bv_len);
+				bvec.bv_len);
 		}
-		bytes_done += bvec->bv_len;
+		bytes_done += bvec.bv_len;
 	}
 	bio_endio(bio, 0);
 	return;
diff --git a/drivers/s390/block/scm_blk.c b/drivers/s390/block/scm_blk.c
index d0ab501..76bed17 100644
--- a/drivers/s390/block/scm_blk.c
+++ b/drivers/s390/block/scm_blk.c
@@ -130,7 +130,7 @@
 	struct aidaw *aidaw = scmrq->aidaw;
 	struct msb *msb = &scmrq->aob->msb[0];
 	struct req_iterator iter;
-	struct bio_vec *bv;
+	struct bio_vec bv;
 
 	msb->bs = MSB_BS_4K;
 	scmrq->aob->request.msb_count = 1;
@@ -142,9 +142,9 @@
 	msb->data_addr = (u64) aidaw;
 
 	rq_for_each_segment(bv, scmrq->request, iter) {
-		WARN_ON(bv->bv_offset);
-		msb->blk_count += bv->bv_len >> 12;
-		aidaw->data_addr = (u64) page_address(bv->bv_page);
+		WARN_ON(bv.bv_offset);
+		msb->blk_count += bv.bv_len >> 12;
+		aidaw->data_addr = (u64) page_address(bv.bv_page);
 		aidaw++;
 	}
 }
diff --git a/drivers/s390/block/scm_blk_cluster.c b/drivers/s390/block/scm_blk_cluster.c
index 27f930c..9aae909 100644
--- a/drivers/s390/block/scm_blk_cluster.c
+++ b/drivers/s390/block/scm_blk_cluster.c
@@ -122,7 +122,7 @@
 	struct aidaw *aidaw = scmrq->aidaw;
 	struct msb *msb = &scmrq->aob->msb[0];
 	struct req_iterator iter;
-	struct bio_vec *bv;
+	struct bio_vec bv;
 	int i = 0;
 	u64 addr;
 
@@ -163,7 +163,7 @@
 			i++;
 		}
 		rq_for_each_segment(bv, req, iter) {
-			aidaw->data_addr = (u64) page_address(bv->bv_page);
+			aidaw->data_addr = (u64) page_address(bv.bv_page);
 			aidaw++;
 			i++;
 		}
diff --git a/drivers/s390/block/xpram.c b/drivers/s390/block/xpram.c
index 58141f0..6969d39 100644
--- a/drivers/s390/block/xpram.c
+++ b/drivers/s390/block/xpram.c
@@ -184,25 +184,26 @@
 static void xpram_make_request(struct request_queue *q, struct bio *bio)
 {
 	xpram_device_t *xdev = bio->bi_bdev->bd_disk->private_data;
-	struct bio_vec *bvec;
+	struct bio_vec bvec;
+	struct bvec_iter iter;
 	unsigned int index;
 	unsigned long page_addr;
 	unsigned long bytes;
-	int i;
 
-	if ((bio->bi_sector & 7) != 0 || (bio->bi_size & 4095) != 0)
+	if ((bio->bi_iter.bi_sector & 7) != 0 ||
+	    (bio->bi_iter.bi_size & 4095) != 0)
 		/* Request is not page-aligned. */
 		goto fail;
-	if ((bio->bi_size >> 12) > xdev->size)
+	if ((bio->bi_iter.bi_size >> 12) > xdev->size)
 		/* Request size is no page-aligned. */
 		goto fail;
-	if ((bio->bi_sector >> 3) > 0xffffffffU - xdev->offset)
+	if ((bio->bi_iter.bi_sector >> 3) > 0xffffffffU - xdev->offset)
 		goto fail;
-	index = (bio->bi_sector >> 3) + xdev->offset;
-	bio_for_each_segment(bvec, bio, i) {
+	index = (bio->bi_iter.bi_sector >> 3) + xdev->offset;
+	bio_for_each_segment(bvec, bio, iter) {
 		page_addr = (unsigned long)
-			kmap(bvec->bv_page) + bvec->bv_offset;
-		bytes = bvec->bv_len;
+			kmap(bvec.bv_page) + bvec.bv_offset;
+		bytes = bvec.bv_len;
 		if ((page_addr & 4095) != 0 || (bytes & 4095) != 0)
 			/* More paranoia. */
 			goto fail;
diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c
index 446b851..0cac7d8 100644
--- a/drivers/scsi/libsas/sas_expander.c
+++ b/drivers/scsi/libsas/sas_expander.c
@@ -2163,10 +2163,10 @@
 	}
 
 	/* do we need to support multiple segments? */
-	if (bio_segments(req->bio) > 1 || bio_segments(rsp->bio) > 1) {
-		printk("%s: multiple segments req %u %u, rsp %u %u\n",
-		       __func__, bio_segments(req->bio), blk_rq_bytes(req),
-		       bio_segments(rsp->bio), blk_rq_bytes(rsp));
+	if (bio_multiple_segments(req->bio) ||
+	    bio_multiple_segments(rsp->bio)) {
+		printk("%s: multiple segments req %u, rsp %u\n",
+		       __func__, blk_rq_bytes(req), blk_rq_bytes(rsp));
 		return -EINVAL;
 	}
 
diff --git a/drivers/scsi/mpt2sas/mpt2sas_transport.c b/drivers/scsi/mpt2sas/mpt2sas_transport.c
index 9d26637..410f4a3 100644
--- a/drivers/scsi/mpt2sas/mpt2sas_transport.c
+++ b/drivers/scsi/mpt2sas/mpt2sas_transport.c
@@ -1901,7 +1901,7 @@
 	struct MPT2SAS_ADAPTER *ioc = shost_priv(shost);
 	Mpi2SmpPassthroughRequest_t *mpi_request;
 	Mpi2SmpPassthroughReply_t *mpi_reply;
-	int rc, i;
+	int rc;
 	u16 smid;
 	u32 ioc_state;
 	unsigned long timeleft;
@@ -1916,7 +1916,8 @@
 	void *pci_addr_out = NULL;
 	u16 wait_state_count;
 	struct request *rsp = req->next_rq;
-	struct bio_vec *bvec = NULL;
+	struct bio_vec bvec;
+	struct bvec_iter iter;
 
 	if (!rsp) {
 		printk(MPT2SAS_ERR_FMT "%s: the smp response space is "
@@ -1942,7 +1943,7 @@
 	ioc->transport_cmds.status = MPT2_CMD_PENDING;
 
 	/* Check if the request is split across multiple segments */
-	if (bio_segments(req->bio) > 1) {
+	if (bio_multiple_segments(req->bio)) {
 		u32 offset = 0;
 
 		/* Allocate memory and copy the request */
@@ -1955,11 +1956,11 @@
 			goto out;
 		}
 
-		bio_for_each_segment(bvec, req->bio, i) {
+		bio_for_each_segment(bvec, req->bio, iter) {
 			memcpy(pci_addr_out + offset,
-			    page_address(bvec->bv_page) + bvec->bv_offset,
-			    bvec->bv_len);
-			offset += bvec->bv_len;
+			    page_address(bvec.bv_page) + bvec.bv_offset,
+			    bvec.bv_len);
+			offset += bvec.bv_len;
 		}
 	} else {
 		dma_addr_out = pci_map_single(ioc->pdev, bio_data(req->bio),
@@ -1974,7 +1975,7 @@
 
 	/* Check if the response needs to be populated across
 	 * multiple segments */
-	if (bio_segments(rsp->bio) > 1) {
+	if (bio_multiple_segments(rsp->bio)) {
 		pci_addr_in = pci_alloc_consistent(ioc->pdev, blk_rq_bytes(rsp),
 		    &pci_dma_in);
 		if (!pci_addr_in) {
@@ -2041,7 +2042,7 @@
 	sgl_flags = (MPI2_SGE_FLAGS_SIMPLE_ELEMENT |
 	    MPI2_SGE_FLAGS_END_OF_BUFFER | MPI2_SGE_FLAGS_HOST_TO_IOC);
 	sgl_flags = sgl_flags << MPI2_SGE_FLAGS_SHIFT;
-	if (bio_segments(req->bio) > 1) {
+	if (bio_multiple_segments(req->bio)) {
 		ioc->base_add_sg_single(psge, sgl_flags |
 		    (blk_rq_bytes(req) - 4), pci_dma_out);
 	} else {
@@ -2057,7 +2058,7 @@
 	    MPI2_SGE_FLAGS_LAST_ELEMENT | MPI2_SGE_FLAGS_END_OF_BUFFER |
 	    MPI2_SGE_FLAGS_END_OF_LIST);
 	sgl_flags = sgl_flags << MPI2_SGE_FLAGS_SHIFT;
-	if (bio_segments(rsp->bio) > 1) {
+	if (bio_multiple_segments(rsp->bio)) {
 		ioc->base_add_sg_single(psge, sgl_flags |
 		    (blk_rq_bytes(rsp) + 4), pci_dma_in);
 	} else {
@@ -2102,23 +2103,23 @@
 		    le16_to_cpu(mpi_reply->ResponseDataLength);
 		/* check if the resp needs to be copied from the allocated
 		 * pci mem */
-		if (bio_segments(rsp->bio) > 1) {
+		if (bio_multiple_segments(rsp->bio)) {
 			u32 offset = 0;
 			u32 bytes_to_copy =
 			    le16_to_cpu(mpi_reply->ResponseDataLength);
-			bio_for_each_segment(bvec, rsp->bio, i) {
-				if (bytes_to_copy <= bvec->bv_len) {
-					memcpy(page_address(bvec->bv_page) +
-					    bvec->bv_offset, pci_addr_in +
+			bio_for_each_segment(bvec, rsp->bio, iter) {
+				if (bytes_to_copy <= bvec.bv_len) {
+					memcpy(page_address(bvec.bv_page) +
+					    bvec.bv_offset, pci_addr_in +
 					    offset, bytes_to_copy);
 					break;
 				} else {
-					memcpy(page_address(bvec->bv_page) +
-					    bvec->bv_offset, pci_addr_in +
-					    offset, bvec->bv_len);
-					bytes_to_copy -= bvec->bv_len;
+					memcpy(page_address(bvec.bv_page) +
+					    bvec.bv_offset, pci_addr_in +
+					    offset, bvec.bv_len);
+					bytes_to_copy -= bvec.bv_len;
 				}
-				offset += bvec->bv_len;
+				offset += bvec.bv_len;
 			}
 		}
 	} else {
diff --git a/drivers/scsi/mpt3sas/mpt3sas_transport.c b/drivers/scsi/mpt3sas/mpt3sas_transport.c
index e771a88..65170cb 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_transport.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_transport.c
@@ -1884,7 +1884,7 @@
 	struct MPT3SAS_ADAPTER *ioc = shost_priv(shost);
 	Mpi2SmpPassthroughRequest_t *mpi_request;
 	Mpi2SmpPassthroughReply_t *mpi_reply;
-	int rc, i;
+	int rc;
 	u16 smid;
 	u32 ioc_state;
 	unsigned long timeleft;
@@ -1898,7 +1898,8 @@
 	void *pci_addr_out = NULL;
 	u16 wait_state_count;
 	struct request *rsp = req->next_rq;
-	struct bio_vec *bvec = NULL;
+	struct bio_vec bvec;
+	struct bvec_iter iter;
 
 	if (!rsp) {
 		pr_err(MPT3SAS_FMT "%s: the smp response space is missing\n",
@@ -1925,7 +1926,7 @@
 	ioc->transport_cmds.status = MPT3_CMD_PENDING;
 
 	/* Check if the request is split across multiple segments */
-	if (req->bio->bi_vcnt > 1) {
+	if (bio_multiple_segments(req->bio)) {
 		u32 offset = 0;
 
 		/* Allocate memory and copy the request */
@@ -1938,11 +1939,11 @@
 			goto out;
 		}
 
-		bio_for_each_segment(bvec, req->bio, i) {
+		bio_for_each_segment(bvec, req->bio, iter) {
 			memcpy(pci_addr_out + offset,
-			    page_address(bvec->bv_page) + bvec->bv_offset,
-			    bvec->bv_len);
-			offset += bvec->bv_len;
+			    page_address(bvec.bv_page) + bvec.bv_offset,
+			    bvec.bv_len);
+			offset += bvec.bv_len;
 		}
 	} else {
 		dma_addr_out = pci_map_single(ioc->pdev, bio_data(req->bio),
@@ -1957,7 +1958,7 @@
 
 	/* Check if the response needs to be populated across
 	 * multiple segments */
-	if (rsp->bio->bi_vcnt > 1) {
+	if (bio_multiple_segments(rsp->bio)) {
 		pci_addr_in = pci_alloc_consistent(ioc->pdev, blk_rq_bytes(rsp),
 		    &pci_dma_in);
 		if (!pci_addr_in) {
@@ -2018,7 +2019,7 @@
 	mpi_request->RequestDataLength = cpu_to_le16(blk_rq_bytes(req) - 4);
 	psge = &mpi_request->SGL;
 
-	if (req->bio->bi_vcnt > 1)
+	if (bio_multiple_segments(req->bio))
 		ioc->build_sg(ioc, psge, pci_dma_out, (blk_rq_bytes(req) - 4),
 		    pci_dma_in, (blk_rq_bytes(rsp) + 4));
 	else
@@ -2063,23 +2064,23 @@
 
 		/* check if the resp needs to be copied from the allocated
 		 * pci mem */
-		if (rsp->bio->bi_vcnt > 1) {
+		if (bio_multiple_segments(rsp->bio)) {
 			u32 offset = 0;
 			u32 bytes_to_copy =
 			    le16_to_cpu(mpi_reply->ResponseDataLength);
-			bio_for_each_segment(bvec, rsp->bio, i) {
-				if (bytes_to_copy <= bvec->bv_len) {
-					memcpy(page_address(bvec->bv_page) +
-					    bvec->bv_offset, pci_addr_in +
+			bio_for_each_segment(bvec, rsp->bio, iter) {
+				if (bytes_to_copy <= bvec.bv_len) {
+					memcpy(page_address(bvec.bv_page) +
+					    bvec.bv_offset, pci_addr_in +
 					    offset, bytes_to_copy);
 					break;
 				} else {
-					memcpy(page_address(bvec->bv_page) +
-					    bvec->bv_offset, pci_addr_in +
-					    offset, bvec->bv_len);
-					bytes_to_copy -= bvec->bv_len;
+					memcpy(page_address(bvec.bv_page) +
+					    bvec.bv_offset, pci_addr_in +
+					    offset, bvec.bv_len);
+					bytes_to_copy -= bvec.bv_len;
 				}
-				offset += bvec->bv_len;
+				offset += bvec.bv_len;
 			}
 		}
 	} else {
diff --git a/drivers/scsi/osd/osd_initiator.c b/drivers/scsi/osd/osd_initiator.c
index aa66361..bac04c2 100644
--- a/drivers/scsi/osd/osd_initiator.c
+++ b/drivers/scsi/osd/osd_initiator.c
@@ -731,7 +731,7 @@
 
 	bio->bi_rw &= ~REQ_WRITE;
 	or->in.bio = bio;
-	or->in.total_bytes = bio->bi_size;
+	or->in.total_bytes = bio->bi_iter.bi_size;
 	return 0;
 }
 
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 9846c6a..470954a 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -801,7 +801,7 @@
 	if (sdkp->device->no_write_same)
 		return BLKPREP_KILL;
 
-	BUG_ON(bio_offset(bio) || bio_iovec(bio)->bv_len != sdp->sector_size);
+	BUG_ON(bio_offset(bio) || bio_iovec(bio).bv_len != sdp->sector_size);
 
 	sector >>= ilog2(sdp->sector_size) - 9;
 	nr_sectors >>= ilog2(sdp->sector_size) - 9;
diff --git a/drivers/scsi/sd_dif.c b/drivers/scsi/sd_dif.c
index 6174ca4..a7a691d 100644
--- a/drivers/scsi/sd_dif.c
+++ b/drivers/scsi/sd_dif.c
@@ -365,7 +365,6 @@
 	struct bio *bio;
 	struct scsi_disk *sdkp;
 	struct sd_dif_tuple *sdt;
-	unsigned int i, j;
 	u32 phys, virt;
 
 	sdkp = rq->bio->bi_bdev->bd_disk->private_data;
@@ -376,19 +375,21 @@
 	phys = hw_sector & 0xffffffff;
 
 	__rq_for_each_bio(bio, rq) {
-		struct bio_vec *iv;
+		struct bio_vec iv;
+		struct bvec_iter iter;
+		unsigned int j;
 
 		/* Already remapped? */
 		if (bio_flagged(bio, BIO_MAPPED_INTEGRITY))
 			break;
 
-		virt = bio->bi_integrity->bip_sector & 0xffffffff;
+		virt = bio->bi_integrity->bip_iter.bi_sector & 0xffffffff;
 
-		bip_for_each_vec(iv, bio->bi_integrity, i) {
-			sdt = kmap_atomic(iv->bv_page)
-				+ iv->bv_offset;
+		bip_for_each_vec(iv, bio->bi_integrity, iter) {
+			sdt = kmap_atomic(iv.bv_page)
+				+ iv.bv_offset;
 
-			for (j = 0 ; j < iv->bv_len ; j += tuple_sz, sdt++) {
+			for (j = 0; j < iv.bv_len; j += tuple_sz, sdt++) {
 
 				if (be32_to_cpu(sdt->ref_tag) == virt)
 					sdt->ref_tag = cpu_to_be32(phys);
@@ -414,7 +415,7 @@
 	struct scsi_disk *sdkp;
 	struct bio *bio;
 	struct sd_dif_tuple *sdt;
-	unsigned int i, j, sectors, sector_sz;
+	unsigned int j, sectors, sector_sz;
 	u32 phys, virt;
 
 	sdkp = scsi_disk(scmd->request->rq_disk);
@@ -430,15 +431,16 @@
 		phys >>= 3;
 
 	__rq_for_each_bio(bio, scmd->request) {
-		struct bio_vec *iv;
+		struct bio_vec iv;
+		struct bvec_iter iter;
 
-		virt = bio->bi_integrity->bip_sector & 0xffffffff;
+		virt = bio->bi_integrity->bip_iter.bi_sector & 0xffffffff;
 
-		bip_for_each_vec(iv, bio->bi_integrity, i) {
-			sdt = kmap_atomic(iv->bv_page)
-				+ iv->bv_offset;
+		bip_for_each_vec(iv, bio->bi_integrity, iter) {
+			sdt = kmap_atomic(iv.bv_page)
+				+ iv.bv_offset;
 
-			for (j = 0 ; j < iv->bv_len ; j += tuple_sz, sdt++) {
+			for (j = 0; j < iv.bv_len; j += tuple_sz, sdt++) {
 
 				if (sectors == 0) {
 					kunmap_atomic(sdt);
diff --git a/drivers/spi/spi-mpc512x-psc.c b/drivers/spi/spi-mpc512x-psc.c
index 46d2313..5032141 100644
--- a/drivers/spi/spi-mpc512x-psc.c
+++ b/drivers/spi/spi-mpc512x-psc.c
@@ -40,6 +40,7 @@
 	unsigned int irq;
 	u8 bits_per_word;
 	struct clk *clk_mclk;
+	struct clk *clk_ipg;
 	u32 mclk_rate;
 
 	struct completion txisrdone;
@@ -475,8 +476,6 @@
 	struct spi_master *master;
 	int ret;
 	void *tempp;
-	int psc_num;
-	char clk_name[16];
 	struct clk *clk;
 
 	master = spi_alloc_master(dev, sizeof *mps);
@@ -519,9 +518,7 @@
 		goto free_master;
 	init_completion(&mps->txisrdone);
 
-	psc_num = master->bus_num;
-	snprintf(clk_name, sizeof(clk_name), "psc%d_mclk", psc_num);
-	clk = devm_clk_get(dev, clk_name);
+	clk = devm_clk_get(dev, "mclk");
 	if (IS_ERR(clk)) {
 		ret = PTR_ERR(clk);
 		goto free_master;
@@ -532,17 +529,29 @@
 	mps->clk_mclk = clk;
 	mps->mclk_rate = clk_get_rate(clk);
 
+	clk = devm_clk_get(dev, "ipg");
+	if (IS_ERR(clk)) {
+		ret = PTR_ERR(clk);
+		goto free_mclk_clock;
+	}
+	ret = clk_prepare_enable(clk);
+	if (ret)
+		goto free_mclk_clock;
+	mps->clk_ipg = clk;
+
 	ret = mpc512x_psc_spi_port_config(master, mps);
 	if (ret < 0)
-		goto free_clock;
+		goto free_ipg_clock;
 
 	ret = devm_spi_register_master(dev, master);
 	if (ret < 0)
-		goto free_clock;
+		goto free_ipg_clock;
 
 	return ret;
 
-free_clock:
+free_ipg_clock:
+	clk_disable_unprepare(mps->clk_ipg);
+free_mclk_clock:
 	clk_disable_unprepare(mps->clk_mclk);
 free_master:
 	spi_master_put(master);
@@ -556,6 +565,7 @@
 	struct mpc512x_psc_spi *mps = spi_master_get_devdata(master);
 
 	clk_disable_unprepare(mps->clk_mclk);
+	clk_disable_unprepare(mps->clk_ipg);
 
 	return 0;
 }
diff --git a/drivers/ssb/Kconfig b/drivers/ssb/Kconfig
index 2cd9b0e..75b3603 100644
--- a/drivers/ssb/Kconfig
+++ b/drivers/ssb/Kconfig
@@ -168,6 +168,7 @@
 config SSB_DRIVER_GPIO
 	bool "SSB GPIO driver"
 	depends on SSB && GPIOLIB
+	select IRQ_DOMAIN if SSB_EMBEDDED
 	help
 	  Driver to provide access to the GPIO pins on the bus.
 
diff --git a/drivers/ssb/driver_gpio.c b/drivers/ssb/driver_gpio.c
index dc109de..ba350d2 100644
--- a/drivers/ssb/driver_gpio.c
+++ b/drivers/ssb/driver_gpio.c
@@ -9,16 +9,40 @@
  */
 
 #include <linux/gpio.h>
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+#include <linux/irqdomain.h>
 #include <linux/export.h>
 #include <linux/ssb/ssb.h>
 
 #include "ssb_private.h"
 
+
+/**************************************************
+ * Shared
+ **************************************************/
+
 static struct ssb_bus *ssb_gpio_get_bus(struct gpio_chip *chip)
 {
 	return container_of(chip, struct ssb_bus, gpio);
 }
 
+#if IS_ENABLED(CONFIG_SSB_EMBEDDED)
+static int ssb_gpio_to_irq(struct gpio_chip *chip, unsigned gpio)
+{
+	struct ssb_bus *bus = ssb_gpio_get_bus(chip);
+
+	if (bus->bustype == SSB_BUSTYPE_SSB)
+		return irq_find_mapping(bus->irq_domain, gpio);
+	else
+		return -EINVAL;
+}
+#endif
+
+/**************************************************
+ * ChipCommon
+ **************************************************/
+
 static int ssb_gpio_chipco_get_value(struct gpio_chip *chip, unsigned gpio)
 {
 	struct ssb_bus *bus = ssb_gpio_get_bus(chip);
@@ -74,19 +98,129 @@
 	ssb_chipco_gpio_pullup(&bus->chipco, 1 << gpio, 0);
 }
 
-static int ssb_gpio_chipco_to_irq(struct gpio_chip *chip, unsigned gpio)
+#if IS_ENABLED(CONFIG_SSB_EMBEDDED)
+static void ssb_gpio_irq_chipco_mask(struct irq_data *d)
 {
-	struct ssb_bus *bus = ssb_gpio_get_bus(chip);
+	struct ssb_bus *bus = irq_data_get_irq_chip_data(d);
+	int gpio = irqd_to_hwirq(d);
 
-	if (bus->bustype == SSB_BUSTYPE_SSB)
-		return ssb_mips_irq(bus->chipco.dev) + 2;
-	else
-		return -EINVAL;
+	ssb_chipco_gpio_intmask(&bus->chipco, BIT(gpio), 0);
 }
 
+static void ssb_gpio_irq_chipco_unmask(struct irq_data *d)
+{
+	struct ssb_bus *bus = irq_data_get_irq_chip_data(d);
+	int gpio = irqd_to_hwirq(d);
+	u32 val = ssb_chipco_gpio_in(&bus->chipco, BIT(gpio));
+
+	ssb_chipco_gpio_polarity(&bus->chipco, BIT(gpio), val);
+	ssb_chipco_gpio_intmask(&bus->chipco, BIT(gpio), BIT(gpio));
+}
+
+static struct irq_chip ssb_gpio_irq_chipco_chip = {
+	.name		= "SSB-GPIO-CC",
+	.irq_mask	= ssb_gpio_irq_chipco_mask,
+	.irq_unmask	= ssb_gpio_irq_chipco_unmask,
+};
+
+static irqreturn_t ssb_gpio_irq_chipco_handler(int irq, void *dev_id)
+{
+	struct ssb_bus *bus = dev_id;
+	struct ssb_chipcommon *chipco = &bus->chipco;
+	u32 val = chipco_read32(chipco, SSB_CHIPCO_GPIOIN);
+	u32 mask = chipco_read32(chipco, SSB_CHIPCO_GPIOIRQ);
+	u32 pol = chipco_read32(chipco, SSB_CHIPCO_GPIOPOL);
+	unsigned long irqs = (val ^ pol) & mask;
+	int gpio;
+
+	if (!irqs)
+		return IRQ_NONE;
+
+	for_each_set_bit(gpio, &irqs, bus->gpio.ngpio)
+		generic_handle_irq(ssb_gpio_to_irq(&bus->gpio, gpio));
+	ssb_chipco_gpio_polarity(chipco, irqs, val & irqs);
+
+	return IRQ_HANDLED;
+}
+
+static int ssb_gpio_irq_chipco_domain_init(struct ssb_bus *bus)
+{
+	struct ssb_chipcommon *chipco = &bus->chipco;
+	struct gpio_chip *chip = &bus->gpio;
+	int gpio, hwirq, err;
+
+	if (bus->bustype != SSB_BUSTYPE_SSB)
+		return 0;
+
+	bus->irq_domain = irq_domain_add_linear(NULL, chip->ngpio,
+						&irq_domain_simple_ops, chipco);
+	if (!bus->irq_domain) {
+		err = -ENODEV;
+		goto err_irq_domain;
+	}
+	for (gpio = 0; gpio < chip->ngpio; gpio++) {
+		int irq = irq_create_mapping(bus->irq_domain, gpio);
+
+		irq_set_chip_data(irq, bus);
+		irq_set_chip_and_handler(irq, &ssb_gpio_irq_chipco_chip,
+					 handle_simple_irq);
+	}
+
+	hwirq = ssb_mips_irq(bus->chipco.dev) + 2;
+	err = request_irq(hwirq, ssb_gpio_irq_chipco_handler, IRQF_SHARED,
+			  "gpio", bus);
+	if (err)
+		goto err_req_irq;
+
+	ssb_chipco_gpio_intmask(&bus->chipco, ~0, 0);
+	chipco_set32(chipco, SSB_CHIPCO_IRQMASK, SSB_CHIPCO_IRQ_GPIO);
+
+	return 0;
+
+err_req_irq:
+	for (gpio = 0; gpio < chip->ngpio; gpio++) {
+		int irq = irq_find_mapping(bus->irq_domain, gpio);
+
+		irq_dispose_mapping(irq);
+	}
+	irq_domain_remove(bus->irq_domain);
+err_irq_domain:
+	return err;
+}
+
+static void ssb_gpio_irq_chipco_domain_exit(struct ssb_bus *bus)
+{
+	struct ssb_chipcommon *chipco = &bus->chipco;
+	struct gpio_chip *chip = &bus->gpio;
+	int gpio;
+
+	if (bus->bustype != SSB_BUSTYPE_SSB)
+		return;
+
+	chipco_mask32(chipco, SSB_CHIPCO_IRQMASK, ~SSB_CHIPCO_IRQ_GPIO);
+	free_irq(ssb_mips_irq(bus->chipco.dev) + 2, chipco);
+	for (gpio = 0; gpio < chip->ngpio; gpio++) {
+		int irq = irq_find_mapping(bus->irq_domain, gpio);
+
+		irq_dispose_mapping(irq);
+	}
+	irq_domain_remove(bus->irq_domain);
+}
+#else
+static int ssb_gpio_irq_chipco_domain_init(struct ssb_bus *bus)
+{
+	return 0;
+}
+
+static void ssb_gpio_irq_chipco_domain_exit(struct ssb_bus *bus)
+{
+}
+#endif
+
 static int ssb_gpio_chipco_init(struct ssb_bus *bus)
 {
 	struct gpio_chip *chip = &bus->gpio;
+	int err;
 
 	chip->label		= "ssb_chipco_gpio";
 	chip->owner		= THIS_MODULE;
@@ -96,7 +230,9 @@
 	chip->set		= ssb_gpio_chipco_set_value;
 	chip->direction_input	= ssb_gpio_chipco_direction_input;
 	chip->direction_output	= ssb_gpio_chipco_direction_output;
-	chip->to_irq		= ssb_gpio_chipco_to_irq;
+#if IS_ENABLED(CONFIG_SSB_EMBEDDED)
+	chip->to_irq		= ssb_gpio_to_irq;
+#endif
 	chip->ngpio		= 16;
 	/* There is just one SoC in one device and its GPIO addresses should be
 	 * deterministic to address them more easily. The other buses could get
@@ -106,9 +242,23 @@
 	else
 		chip->base		= -1;
 
-	return gpiochip_add(chip);
+	err = ssb_gpio_irq_chipco_domain_init(bus);
+	if (err)
+		return err;
+
+	err = gpiochip_add(chip);
+	if (err) {
+		ssb_gpio_irq_chipco_domain_exit(bus);
+		return err;
+	}
+
+	return 0;
 }
 
+/**************************************************
+ * EXTIF
+ **************************************************/
+
 #ifdef CONFIG_SSB_DRIVER_EXTIF
 
 static int ssb_gpio_extif_get_value(struct gpio_chip *chip, unsigned gpio)
@@ -145,19 +295,127 @@
 	return 0;
 }
 
-static int ssb_gpio_extif_to_irq(struct gpio_chip *chip, unsigned gpio)
+#if IS_ENABLED(CONFIG_SSB_EMBEDDED)
+static void ssb_gpio_irq_extif_mask(struct irq_data *d)
 {
-	struct ssb_bus *bus = ssb_gpio_get_bus(chip);
+	struct ssb_bus *bus = irq_data_get_irq_chip_data(d);
+	int gpio = irqd_to_hwirq(d);
 
-	if (bus->bustype == SSB_BUSTYPE_SSB)
-		return ssb_mips_irq(bus->extif.dev) + 2;
-	else
-		return -EINVAL;
+	ssb_extif_gpio_intmask(&bus->extif, BIT(gpio), 0);
 }
 
+static void ssb_gpio_irq_extif_unmask(struct irq_data *d)
+{
+	struct ssb_bus *bus = irq_data_get_irq_chip_data(d);
+	int gpio = irqd_to_hwirq(d);
+	u32 val = ssb_extif_gpio_in(&bus->extif, BIT(gpio));
+
+	ssb_extif_gpio_polarity(&bus->extif, BIT(gpio), val);
+	ssb_extif_gpio_intmask(&bus->extif, BIT(gpio), BIT(gpio));
+}
+
+static struct irq_chip ssb_gpio_irq_extif_chip = {
+	.name		= "SSB-GPIO-EXTIF",
+	.irq_mask	= ssb_gpio_irq_extif_mask,
+	.irq_unmask	= ssb_gpio_irq_extif_unmask,
+};
+
+static irqreturn_t ssb_gpio_irq_extif_handler(int irq, void *dev_id)
+{
+	struct ssb_bus *bus = dev_id;
+	struct ssb_extif *extif = &bus->extif;
+	u32 val = ssb_read32(extif->dev, SSB_EXTIF_GPIO_IN);
+	u32 mask = ssb_read32(extif->dev, SSB_EXTIF_GPIO_INTMASK);
+	u32 pol = ssb_read32(extif->dev, SSB_EXTIF_GPIO_INTPOL);
+	unsigned long irqs = (val ^ pol) & mask;
+	int gpio;
+
+	if (!irqs)
+		return IRQ_NONE;
+
+	for_each_set_bit(gpio, &irqs, bus->gpio.ngpio)
+		generic_handle_irq(ssb_gpio_to_irq(&bus->gpio, gpio));
+	ssb_extif_gpio_polarity(extif, irqs, val & irqs);
+
+	return IRQ_HANDLED;
+}
+
+static int ssb_gpio_irq_extif_domain_init(struct ssb_bus *bus)
+{
+	struct ssb_extif *extif = &bus->extif;
+	struct gpio_chip *chip = &bus->gpio;
+	int gpio, hwirq, err;
+
+	if (bus->bustype != SSB_BUSTYPE_SSB)
+		return 0;
+
+	bus->irq_domain = irq_domain_add_linear(NULL, chip->ngpio,
+						&irq_domain_simple_ops, extif);
+	if (!bus->irq_domain) {
+		err = -ENODEV;
+		goto err_irq_domain;
+	}
+	for (gpio = 0; gpio < chip->ngpio; gpio++) {
+		int irq = irq_create_mapping(bus->irq_domain, gpio);
+
+		irq_set_chip_data(irq, bus);
+		irq_set_chip_and_handler(irq, &ssb_gpio_irq_extif_chip,
+					 handle_simple_irq);
+	}
+
+	hwirq = ssb_mips_irq(bus->extif.dev) + 2;
+	err = request_irq(hwirq, ssb_gpio_irq_extif_handler, IRQF_SHARED,
+			  "gpio", bus);
+	if (err)
+		goto err_req_irq;
+
+	ssb_extif_gpio_intmask(&bus->extif, ~0, 0);
+
+	return 0;
+
+err_req_irq:
+	for (gpio = 0; gpio < chip->ngpio; gpio++) {
+		int irq = irq_find_mapping(bus->irq_domain, gpio);
+
+		irq_dispose_mapping(irq);
+	}
+	irq_domain_remove(bus->irq_domain);
+err_irq_domain:
+	return err;
+}
+
+static void ssb_gpio_irq_extif_domain_exit(struct ssb_bus *bus)
+{
+	struct ssb_extif *extif = &bus->extif;
+	struct gpio_chip *chip = &bus->gpio;
+	int gpio;
+
+	if (bus->bustype != SSB_BUSTYPE_SSB)
+		return;
+
+	free_irq(ssb_mips_irq(bus->extif.dev) + 2, extif);
+	for (gpio = 0; gpio < chip->ngpio; gpio++) {
+		int irq = irq_find_mapping(bus->irq_domain, gpio);
+
+		irq_dispose_mapping(irq);
+	}
+	irq_domain_remove(bus->irq_domain);
+}
+#else
+static int ssb_gpio_irq_extif_domain_init(struct ssb_bus *bus)
+{
+	return 0;
+}
+
+static void ssb_gpio_irq_extif_domain_exit(struct ssb_bus *bus)
+{
+}
+#endif
+
 static int ssb_gpio_extif_init(struct ssb_bus *bus)
 {
 	struct gpio_chip *chip = &bus->gpio;
+	int err;
 
 	chip->label		= "ssb_extif_gpio";
 	chip->owner		= THIS_MODULE;
@@ -165,7 +423,9 @@
 	chip->set		= ssb_gpio_extif_set_value;
 	chip->direction_input	= ssb_gpio_extif_direction_input;
 	chip->direction_output	= ssb_gpio_extif_direction_output;
-	chip->to_irq		= ssb_gpio_extif_to_irq;
+#if IS_ENABLED(CONFIG_SSB_EMBEDDED)
+	chip->to_irq		= ssb_gpio_to_irq;
+#endif
 	chip->ngpio		= 5;
 	/* There is just one SoC in one device and its GPIO addresses should be
 	 * deterministic to address them more easily. The other buses could get
@@ -175,7 +435,17 @@
 	else
 		chip->base		= -1;
 
-	return gpiochip_add(chip);
+	err = ssb_gpio_irq_extif_domain_init(bus);
+	if (err)
+		return err;
+
+	err = gpiochip_add(chip);
+	if (err) {
+		ssb_gpio_irq_extif_domain_exit(bus);
+		return err;
+	}
+
+	return 0;
 }
 
 #else
@@ -185,6 +455,10 @@
 }
 #endif
 
+/**************************************************
+ * Init
+ **************************************************/
+
 int ssb_gpio_init(struct ssb_bus *bus)
 {
 	if (ssb_chipco_available(&bus->chipco))
diff --git a/drivers/ssb/main.c b/drivers/ssb/main.c
index 32a811d..2fead38 100644
--- a/drivers/ssb/main.c
+++ b/drivers/ssb/main.c
@@ -593,6 +593,13 @@
 		ssb_pcicore_init(&bus->pcicore);
 		if (bus->bustype == SSB_BUSTYPE_SSB)
 			ssb_watchdog_register(bus);
+
+		err = ssb_gpio_init(bus);
+		if (err == -ENOTSUPP)
+			ssb_dbg("GPIO driver not activated\n");
+		else if (err)
+			ssb_dbg("Error registering GPIO driver: %i\n", err);
+
 		ssb_bus_may_powerdown(bus);
 
 		err = ssb_devices_register(bus);
@@ -830,11 +837,6 @@
 	ssb_chipcommon_init(&bus->chipco);
 	ssb_extif_init(&bus->extif);
 	ssb_mipscore_init(&bus->mipscore);
-	err = ssb_gpio_init(bus);
-	if (err == -ENOTSUPP)
-		ssb_dbg("GPIO driver not activated\n");
-	else if (err)
-		ssb_dbg("Error registering GPIO driver: %i\n", err);
 	err = ssb_fetch_invariants(bus, get_invariants);
 	if (err) {
 		ssb_bus_may_powerdown(bus);
diff --git a/drivers/staging/Kconfig b/drivers/staging/Kconfig
index 4bb6b11..040a515 100644
--- a/drivers/staging/Kconfig
+++ b/drivers/staging/Kconfig
@@ -76,10 +76,6 @@
 
 source "drivers/staging/iio/Kconfig"
 
-source "drivers/staging/zsmalloc/Kconfig"
-
-source "drivers/staging/zram/Kconfig"
-
 source "drivers/staging/wlags49_h2/Kconfig"
 
 source "drivers/staging/wlags49_h25/Kconfig"
diff --git a/drivers/staging/Makefile b/drivers/staging/Makefile
index 9f07e5e..dea056b 100644
--- a/drivers/staging/Makefile
+++ b/drivers/staging/Makefile
@@ -32,8 +32,6 @@
 obj-$(CONFIG_VME_BUS)		+= vme/
 obj-$(CONFIG_DX_SEP)            += sep/
 obj-$(CONFIG_IIO)		+= iio/
-obj-$(CONFIG_ZRAM)		+= zram/
-obj-$(CONFIG_ZSMALLOC)		+= zsmalloc/
 obj-$(CONFIG_WLAGS49_H2)	+= wlags49_h2/
 obj-$(CONFIG_WLAGS49_H25)	+= wlags49_h25/
 obj-$(CONFIG_FB_SM7XX)		+= sm7xxfb/
diff --git a/drivers/staging/lustre/lustre/llite/lloop.c b/drivers/staging/lustre/lustre/llite/lloop.c
index 5338e8d..0718905 100644
--- a/drivers/staging/lustre/lustre/llite/lloop.c
+++ b/drivers/staging/lustre/lustre/llite/lloop.c
@@ -194,10 +194,10 @@
 	struct cl_object     *obj = ll_i2info(inode)->lli_clob;
 	pgoff_t	       offset;
 	int		   ret;
-	int		   i;
 	int		   rw;
 	obd_count	     page_count = 0;
-	struct bio_vec       *bvec;
+	struct bio_vec       bvec;
+	struct bvec_iter   iter;
 	struct bio	   *bio;
 	ssize_t	       bytes;
 
@@ -220,15 +220,15 @@
 	for (bio = head; bio != NULL; bio = bio->bi_next) {
 		LASSERT(rw == bio->bi_rw);
 
-		offset = (pgoff_t)(bio->bi_sector << 9) + lo->lo_offset;
-		bio_for_each_segment(bvec, bio, i) {
-			BUG_ON(bvec->bv_offset != 0);
-			BUG_ON(bvec->bv_len != PAGE_CACHE_SIZE);
+		offset = (pgoff_t)(bio->bi_iter.bi_sector << 9) + lo->lo_offset;
+		bio_for_each_segment(bvec, bio, iter) {
+			BUG_ON(bvec.bv_offset != 0);
+			BUG_ON(bvec.bv_len != PAGE_CACHE_SIZE);
 
-			pages[page_count] = bvec->bv_page;
+			pages[page_count] = bvec.bv_page;
 			offsets[page_count] = offset;
 			page_count++;
-			offset += bvec->bv_len;
+			offset += bvec.bv_len;
 		}
 		LASSERT(page_count <= LLOOP_MAX_SEGMENTS);
 	}
@@ -313,7 +313,8 @@
 	bio = &lo->lo_bio;
 	while (*bio && (*bio)->bi_rw == rw) {
 		CDEBUG(D_INFO, "bio sector %llu size %u count %u vcnt%u \n",
-		       (unsigned long long)(*bio)->bi_sector, (*bio)->bi_size,
+		       (unsigned long long)(*bio)->bi_iter.bi_sector,
+		       (*bio)->bi_iter.bi_size,
 		       page_count, (*bio)->bi_vcnt);
 		if (page_count + (*bio)->bi_vcnt > LLOOP_MAX_SEGMENTS)
 			break;
@@ -347,7 +348,8 @@
 		goto err;
 
 	CDEBUG(D_INFO, "submit bio sector %llu size %u\n",
-	       (unsigned long long)old_bio->bi_sector, old_bio->bi_size);
+	       (unsigned long long)old_bio->bi_iter.bi_sector,
+	       old_bio->bi_iter.bi_size);
 
 	spin_lock_irq(&lo->lo_lock);
 	inactive = (lo->lo_state != LLOOP_BOUND);
@@ -367,7 +369,7 @@
 	loop_add_bio(lo, old_bio);
 	return;
 err:
-	cfs_bio_io_error(old_bio, old_bio->bi_size);
+	cfs_bio_io_error(old_bio, old_bio->bi_iter.bi_size);
 }
 
 
@@ -378,7 +380,7 @@
 	while (bio) {
 		struct bio *tmp = bio->bi_next;
 		bio->bi_next = NULL;
-		cfs_bio_endio(bio, bio->bi_size, ret);
+		cfs_bio_endio(bio, bio->bi_iter.bi_size, ret);
 		bio = tmp;
 	}
 }
diff --git a/drivers/staging/zsmalloc/Kconfig b/drivers/staging/zsmalloc/Kconfig
deleted file mode 100644
index 9d1f2a2..0000000
--- a/drivers/staging/zsmalloc/Kconfig
+++ /dev/null
@@ -1,24 +0,0 @@
-config ZSMALLOC
-	bool "Memory allocator for compressed pages"
-	depends on MMU
-	default n
-	help
-	  zsmalloc is a slab-based memory allocator designed to store
-	  compressed RAM pages.  zsmalloc uses virtual memory mapping
-	  in order to reduce fragmentation.  However, this results in a
-	  non-standard allocator interface where a handle, not a pointer, is
-	  returned by an alloc().  This handle must be mapped in order to
-	  access the allocated space.
-
-config PGTABLE_MAPPING
-	bool "Use page table mapping to access object in zsmalloc"
-	depends on ZSMALLOC
-	help
-	  By default, zsmalloc uses a copy-based object mapping method to
-	  access allocations that span two pages. However, if a particular
-	  architecture (ex, ARM) performs VM mapping faster than copying,
-	  then you should select this. This causes zsmalloc to use page table
-	  mapping rather than copying for object mapping.
-
-	  You can check speed with zsmalloc benchmark[1].
-	  [1] https://github.com/spartacus06/zsmalloc
diff --git a/drivers/staging/zsmalloc/Makefile b/drivers/staging/zsmalloc/Makefile
deleted file mode 100644
index b134848..0000000
--- a/drivers/staging/zsmalloc/Makefile
+++ /dev/null
@@ -1,3 +0,0 @@
-zsmalloc-y 		:= zsmalloc-main.o
-
-obj-$(CONFIG_ZSMALLOC)	+= zsmalloc.o
diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c
index c87959f..2d29356 100644
--- a/drivers/target/target_core_iblock.c
+++ b/drivers/target/target_core_iblock.c
@@ -319,7 +319,7 @@
 	bio->bi_bdev = ib_dev->ibd_bd;
 	bio->bi_private = cmd;
 	bio->bi_end_io = &iblock_bio_done;
-	bio->bi_sector = lba;
+	bio->bi_iter.bi_sector = lba;
 
 	return bio;
 }
diff --git a/drivers/tty/serial/bcm63xx_uart.c b/drivers/tty/serial/bcm63xx_uart.c
index 649d512..78e82b0 100644
--- a/drivers/tty/serial/bcm63xx_uart.c
+++ b/drivers/tty/serial/bcm63xx_uart.c
@@ -29,10 +29,7 @@
 #include <linux/sysrq.h>
 #include <linux/serial.h>
 #include <linux/serial_core.h>
-
-#include <bcm63xx_irq.h>
-#include <bcm63xx_regs.h>
-#include <bcm63xx_io.h>
+#include <linux/serial_bcm63xx.h>
 
 #define BCM63XX_NR_UARTS	2
 
@@ -81,13 +78,13 @@
 static inline unsigned int bcm_uart_readl(struct uart_port *port,
 					 unsigned int offset)
 {
-	return bcm_readl(port->membase + offset);
+	return __raw_readl(port->membase + offset);
 }
 
 static inline void bcm_uart_writel(struct uart_port *port,
 				  unsigned int value, unsigned int offset)
 {
-	bcm_writel(value, port->membase + offset);
+	__raw_writel(value, port->membase + offset);
 }
 
 /*
diff --git a/drivers/tty/serial/mpc52xx_uart.c b/drivers/tty/serial/mpc52xx_uart.c
index ec06505..97888f4 100644
--- a/drivers/tty/serial/mpc52xx_uart.c
+++ b/drivers/tty/serial/mpc52xx_uart.c
@@ -421,6 +421,7 @@
 
 static struct psc_fifoc __iomem *psc_fifoc;
 static unsigned int psc_fifoc_irq;
+static struct clk *psc_fifoc_clk;
 
 static void mpc512x_psc_fifo_init(struct uart_port *port)
 {
@@ -568,36 +569,73 @@
 /* Init PSC FIFO Controller */
 static int __init mpc512x_psc_fifoc_init(void)
 {
+	int err;
 	struct device_node *np;
+	struct clk *clk;
+
+	/* default error code, potentially overwritten by clock calls */
+	err = -ENODEV;
 
 	np = of_find_compatible_node(NULL, NULL,
 				     "fsl,mpc5121-psc-fifo");
 	if (!np) {
 		pr_err("%s: Can't find FIFOC node\n", __func__);
-		return -ENODEV;
+		goto out_err;
 	}
 
+	clk = of_clk_get(np, 0);
+	if (IS_ERR(clk)) {
+		/* backwards compat with device trees that lack clock specs */
+		clk = clk_get_sys(np->name, "ipg");
+	}
+	if (IS_ERR(clk)) {
+		pr_err("%s: Can't lookup FIFO clock\n", __func__);
+		err = PTR_ERR(clk);
+		goto out_ofnode_put;
+	}
+	if (clk_prepare_enable(clk)) {
+		pr_err("%s: Can't enable FIFO clock\n", __func__);
+		clk_put(clk);
+		goto out_ofnode_put;
+	}
+	psc_fifoc_clk = clk;
+
 	psc_fifoc = of_iomap(np, 0);
 	if (!psc_fifoc) {
 		pr_err("%s: Can't map FIFOC\n", __func__);
-		of_node_put(np);
-		return -ENODEV;
+		goto out_clk_disable;
 	}
 
 	psc_fifoc_irq = irq_of_parse_and_map(np, 0);
-	of_node_put(np);
 	if (psc_fifoc_irq == 0) {
 		pr_err("%s: Can't get FIFOC irq\n", __func__);
-		iounmap(psc_fifoc);
-		return -ENODEV;
+		goto out_unmap;
 	}
 
+	of_node_put(np);
 	return 0;
+
+out_unmap:
+	iounmap(psc_fifoc);
+out_clk_disable:
+	clk_disable_unprepare(psc_fifoc_clk);
+	clk_put(psc_fifoc_clk);
+out_ofnode_put:
+	of_node_put(np);
+out_err:
+	return err;
 }
 
 static void __exit mpc512x_psc_fifoc_uninit(void)
 {
 	iounmap(psc_fifoc);
+
+	/* disable the clock, errors are not fatal */
+	if (psc_fifoc_clk) {
+		clk_disable_unprepare(psc_fifoc_clk);
+		clk_put(psc_fifoc_clk);
+		psc_fifoc_clk = NULL;
+	}
 }
 
 /* 512x specific interrupt handler. The caller holds the port lock */
@@ -619,29 +657,55 @@
 }
 
 static struct clk *psc_mclk_clk[MPC52xx_PSC_MAXNUM];
+static struct clk *psc_ipg_clk[MPC52xx_PSC_MAXNUM];
 
 /* called from within the .request_port() callback (allocation) */
 static int mpc512x_psc_alloc_clock(struct uart_port *port)
 {
 	int psc_num;
-	char clk_name[16];
 	struct clk *clk;
 	int err;
 
 	psc_num = (port->mapbase & 0xf00) >> 8;
-	snprintf(clk_name, sizeof(clk_name), "psc%d_mclk", psc_num);
-	clk = devm_clk_get(port->dev, clk_name);
+
+	clk = devm_clk_get(port->dev, "mclk");
 	if (IS_ERR(clk)) {
 		dev_err(port->dev, "Failed to get MCLK!\n");
-		return PTR_ERR(clk);
+		err = PTR_ERR(clk);
+		goto out_err;
 	}
 	err = clk_prepare_enable(clk);
 	if (err) {
 		dev_err(port->dev, "Failed to enable MCLK!\n");
-		return err;
+		goto out_err;
 	}
 	psc_mclk_clk[psc_num] = clk;
+
+	clk = devm_clk_get(port->dev, "ipg");
+	if (IS_ERR(clk)) {
+		dev_err(port->dev, "Failed to get IPG clock!\n");
+		err = PTR_ERR(clk);
+		goto out_err;
+	}
+	err = clk_prepare_enable(clk);
+	if (err) {
+		dev_err(port->dev, "Failed to enable IPG clock!\n");
+		goto out_err;
+	}
+	psc_ipg_clk[psc_num] = clk;
+
 	return 0;
+
+out_err:
+	if (psc_mclk_clk[psc_num]) {
+		clk_disable_unprepare(psc_mclk_clk[psc_num]);
+		psc_mclk_clk[psc_num] = NULL;
+	}
+	if (psc_ipg_clk[psc_num]) {
+		clk_disable_unprepare(psc_ipg_clk[psc_num]);
+		psc_ipg_clk[psc_num] = NULL;
+	}
+	return err;
 }
 
 /* called from within the .release_port() callback (release) */
@@ -656,6 +720,10 @@
 		clk_disable_unprepare(clk);
 		psc_mclk_clk[psc_num] = NULL;
 	}
+	if (psc_ipg_clk[psc_num]) {
+		clk_disable_unprepare(psc_ipg_clk[psc_num]);
+		psc_ipg_clk[psc_num] = NULL;
+	}
 }
 
 /* implementation of the .clock() callback (enable/disable) */
diff --git a/drivers/usb/host/fsl-mph-dr-of.c b/drivers/usb/host/fsl-mph-dr-of.c
index abd5050..9162d1b 100644
--- a/drivers/usb/host/fsl-mph-dr-of.c
+++ b/drivers/usb/host/fsl-mph-dr-of.c
@@ -261,19 +261,8 @@
 	struct fsl_usb2_platform_data *pdata = dev_get_platdata(&pdev->dev);
 	struct clk *clk;
 	int err;
-	char clk_name[10];
-	int base, clk_num;
 
-	base = pdev->resource->start & 0xf000;
-	if (base == 0x3000)
-		clk_num = 1;
-	else if (base == 0x4000)
-		clk_num = 2;
-	else
-		return -ENODEV;
-
-	snprintf(clk_name, sizeof(clk_name), "usb%d_clk", clk_num);
-	clk = devm_clk_get(pdev->dev.parent, clk_name);
+	clk = devm_clk_get(pdev->dev.parent, "ipg");
 	if (IS_ERR(clk)) {
 		dev_err(&pdev->dev, "failed to get clk\n");
 		return PTR_ERR(clk);
diff --git a/drivers/video/backlight/lcd.c b/drivers/video/backlight/lcd.c
index 93cf15e..7de847d 100644
--- a/drivers/video/backlight/lcd.c
+++ b/drivers/video/backlight/lcd.c
@@ -228,7 +228,7 @@
 
 	rc = device_register(&new_ld->dev);
 	if (rc) {
-		kfree(new_ld);
+		put_device(&new_ld->dev);
 		return ERR_PTR(rc);
 	}
 
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c
index fc60b31..0bad24d 100644
--- a/fs/bio-integrity.c
+++ b/fs/bio-integrity.c
@@ -134,8 +134,7 @@
 		return 0;
 	}
 
-	iv = bip_vec_idx(bip, bip->bip_vcnt);
-	BUG_ON(iv == NULL);
+	iv = bip->bip_vec + bip->bip_vcnt;
 
 	iv->bv_page = page;
 	iv->bv_len = len;
@@ -203,6 +202,12 @@
 	return sectors;
 }
 
+static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi,
+					       unsigned int sectors)
+{
+	return bio_integrity_hw_sectors(bi, sectors) * bi->tuple_size;
+}
+
 /**
  * bio_integrity_tag_size - Retrieve integrity tag space
  * @bio:	bio to inspect
@@ -215,9 +220,9 @@
 {
 	struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
 
-	BUG_ON(bio->bi_size == 0);
+	BUG_ON(bio->bi_iter.bi_size == 0);
 
-	return bi->tag_size * (bio->bi_size / bi->sector_size);
+	return bi->tag_size * (bio->bi_iter.bi_size / bi->sector_size);
 }
 EXPORT_SYMBOL(bio_integrity_tag_size);
 
@@ -235,9 +240,9 @@
 	nr_sectors = bio_integrity_hw_sectors(bi,
 					DIV_ROUND_UP(len, bi->tag_size));
 
-	if (nr_sectors * bi->tuple_size > bip->bip_size) {
-		printk(KERN_ERR "%s: tag too big for bio: %u > %u\n",
-		       __func__, nr_sectors * bi->tuple_size, bip->bip_size);
+	if (nr_sectors * bi->tuple_size > bip->bip_iter.bi_size) {
+		printk(KERN_ERR "%s: tag too big for bio: %u > %u\n", __func__,
+		       nr_sectors * bi->tuple_size, bip->bip_iter.bi_size);
 		return -1;
 	}
 
@@ -299,29 +304,30 @@
 {
 	struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
 	struct blk_integrity_exchg bix;
-	struct bio_vec *bv;
-	sector_t sector = bio->bi_sector;
-	unsigned int i, sectors, total;
+	struct bio_vec bv;
+	struct bvec_iter iter;
+	sector_t sector = bio->bi_iter.bi_sector;
+	unsigned int sectors, total;
 	void *prot_buf = bio->bi_integrity->bip_buf;
 
 	total = 0;
 	bix.disk_name = bio->bi_bdev->bd_disk->disk_name;
 	bix.sector_size = bi->sector_size;
 
-	bio_for_each_segment(bv, bio, i) {
-		void *kaddr = kmap_atomic(bv->bv_page);
-		bix.data_buf = kaddr + bv->bv_offset;
-		bix.data_size = bv->bv_len;
+	bio_for_each_segment(bv, bio, iter) {
+		void *kaddr = kmap_atomic(bv.bv_page);
+		bix.data_buf = kaddr + bv.bv_offset;
+		bix.data_size = bv.bv_len;
 		bix.prot_buf = prot_buf;
 		bix.sector = sector;
 
 		bi->generate_fn(&bix);
 
-		sectors = bv->bv_len / bi->sector_size;
+		sectors = bv.bv_len / bi->sector_size;
 		sector += sectors;
 		prot_buf += sectors * bi->tuple_size;
 		total += sectors * bi->tuple_size;
-		BUG_ON(total > bio->bi_integrity->bip_size);
+		BUG_ON(total > bio->bi_integrity->bip_iter.bi_size);
 
 		kunmap_atomic(kaddr);
 	}
@@ -386,8 +392,8 @@
 
 	bip->bip_owns_buf = 1;
 	bip->bip_buf = buf;
-	bip->bip_size = len;
-	bip->bip_sector = bio->bi_sector;
+	bip->bip_iter.bi_size = len;
+	bip->bip_iter.bi_sector = bio->bi_iter.bi_sector;
 
 	/* Map it */
 	offset = offset_in_page(buf);
@@ -442,16 +448,18 @@
 	struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
 	struct blk_integrity_exchg bix;
 	struct bio_vec *bv;
-	sector_t sector = bio->bi_integrity->bip_sector;
-	unsigned int i, sectors, total, ret;
+	sector_t sector = bio->bi_integrity->bip_iter.bi_sector;
+	unsigned int sectors, total, ret;
 	void *prot_buf = bio->bi_integrity->bip_buf;
+	int i;
 
 	ret = total = 0;
 	bix.disk_name = bio->bi_bdev->bd_disk->disk_name;
 	bix.sector_size = bi->sector_size;
 
-	bio_for_each_segment(bv, bio, i) {
+	bio_for_each_segment_all(bv, bio, i) {
 		void *kaddr = kmap_atomic(bv->bv_page);
+
 		bix.data_buf = kaddr + bv->bv_offset;
 		bix.data_size = bv->bv_len;
 		bix.prot_buf = prot_buf;
@@ -468,7 +476,7 @@
 		sector += sectors;
 		prot_buf += sectors * bi->tuple_size;
 		total += sectors * bi->tuple_size;
-		BUG_ON(total > bio->bi_integrity->bip_size);
+		BUG_ON(total > bio->bi_integrity->bip_iter.bi_size);
 
 		kunmap_atomic(kaddr);
 	}
@@ -495,7 +503,7 @@
 
 	/* Restore original bio completion handler */
 	bio->bi_end_io = bip->bip_end_io;
-	bio_endio(bio, error);
+	bio_endio_nodec(bio, error);
 }
 
 /**
@@ -533,56 +541,6 @@
 EXPORT_SYMBOL(bio_integrity_endio);
 
 /**
- * bio_integrity_mark_head - Advance bip_vec skip bytes
- * @bip:	Integrity vector to advance
- * @skip:	Number of bytes to advance it
- */
-void bio_integrity_mark_head(struct bio_integrity_payload *bip,
-			     unsigned int skip)
-{
-	struct bio_vec *iv;
-	unsigned int i;
-
-	bip_for_each_vec(iv, bip, i) {
-		if (skip == 0) {
-			bip->bip_idx = i;
-			return;
-		} else if (skip >= iv->bv_len) {
-			skip -= iv->bv_len;
-		} else { /* skip < iv->bv_len) */
-			iv->bv_offset += skip;
-			iv->bv_len -= skip;
-			bip->bip_idx = i;
-			return;
-		}
-	}
-}
-
-/**
- * bio_integrity_mark_tail - Truncate bip_vec to be len bytes long
- * @bip:	Integrity vector to truncate
- * @len:	New length of integrity vector
- */
-void bio_integrity_mark_tail(struct bio_integrity_payload *bip,
-			     unsigned int len)
-{
-	struct bio_vec *iv;
-	unsigned int i;
-
-	bip_for_each_vec(iv, bip, i) {
-		if (len == 0) {
-			bip->bip_vcnt = i;
-			return;
-		} else if (len >= iv->bv_len) {
-			len -= iv->bv_len;
-		} else { /* len < iv->bv_len) */
-			iv->bv_len = len;
-			len = 0;
-		}
-	}
-}
-
-/**
  * bio_integrity_advance - Advance integrity vector
  * @bio:	bio whose integrity vector to update
  * @bytes_done:	number of data bytes that have been completed
@@ -595,13 +553,9 @@
 {
 	struct bio_integrity_payload *bip = bio->bi_integrity;
 	struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
-	unsigned int nr_sectors;
+	unsigned bytes = bio_integrity_bytes(bi, bytes_done >> 9);
 
-	BUG_ON(bip == NULL);
-	BUG_ON(bi == NULL);
-
-	nr_sectors = bio_integrity_hw_sectors(bi, bytes_done >> 9);
-	bio_integrity_mark_head(bip, nr_sectors * bi->tuple_size);
+	bvec_iter_advance(bip->bip_vec, &bip->bip_iter, bytes);
 }
 EXPORT_SYMBOL(bio_integrity_advance);
 
@@ -621,64 +575,13 @@
 {
 	struct bio_integrity_payload *bip = bio->bi_integrity;
 	struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
-	unsigned int nr_sectors;
 
-	BUG_ON(bip == NULL);
-	BUG_ON(bi == NULL);
-	BUG_ON(!bio_flagged(bio, BIO_CLONED));
-
-	nr_sectors = bio_integrity_hw_sectors(bi, sectors);
-	bip->bip_sector = bip->bip_sector + offset;
-	bio_integrity_mark_head(bip, offset * bi->tuple_size);
-	bio_integrity_mark_tail(bip, sectors * bi->tuple_size);
+	bio_integrity_advance(bio, offset << 9);
+	bip->bip_iter.bi_size = bio_integrity_bytes(bi, sectors);
 }
 EXPORT_SYMBOL(bio_integrity_trim);
 
 /**
- * bio_integrity_split - Split integrity metadata
- * @bio:	Protected bio
- * @bp:		Resulting bio_pair
- * @sectors:	Offset
- *
- * Description: Splits an integrity page into a bio_pair.
- */
-void bio_integrity_split(struct bio *bio, struct bio_pair *bp, int sectors)
-{
-	struct blk_integrity *bi;
-	struct bio_integrity_payload *bip = bio->bi_integrity;
-	unsigned int nr_sectors;
-
-	if (bio_integrity(bio) == 0)
-		return;
-
-	bi = bdev_get_integrity(bio->bi_bdev);
-	BUG_ON(bi == NULL);
-	BUG_ON(bip->bip_vcnt != 1);
-
-	nr_sectors = bio_integrity_hw_sectors(bi, sectors);
-
-	bp->bio1.bi_integrity = &bp->bip1;
-	bp->bio2.bi_integrity = &bp->bip2;
-
-	bp->iv1 = bip->bip_vec[bip->bip_idx];
-	bp->iv2 = bip->bip_vec[bip->bip_idx];
-
-	bp->bip1.bip_vec = &bp->iv1;
-	bp->bip2.bip_vec = &bp->iv2;
-
-	bp->iv1.bv_len = sectors * bi->tuple_size;
-	bp->iv2.bv_offset += sectors * bi->tuple_size;
-	bp->iv2.bv_len -= sectors * bi->tuple_size;
-
-	bp->bip1.bip_sector = bio->bi_integrity->bip_sector;
-	bp->bip2.bip_sector = bio->bi_integrity->bip_sector + nr_sectors;
-
-	bp->bip1.bip_vcnt = bp->bip2.bip_vcnt = 1;
-	bp->bip1.bip_idx = bp->bip2.bip_idx = 0;
-}
-EXPORT_SYMBOL(bio_integrity_split);
-
-/**
  * bio_integrity_clone - Callback for cloning bios with integrity metadata
  * @bio:	New bio
  * @bio_src:	Original bio
@@ -702,9 +605,8 @@
 	memcpy(bip->bip_vec, bip_src->bip_vec,
 	       bip_src->bip_vcnt * sizeof(struct bio_vec));
 
-	bip->bip_sector = bip_src->bip_sector;
 	bip->bip_vcnt = bip_src->bip_vcnt;
-	bip->bip_idx = bip_src->bip_idx;
+	bip->bip_iter = bip_src->bip_iter;
 
 	return 0;
 }
diff --git a/fs/bio.c b/fs/bio.c
index 33d79a4..75c49a3 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -38,8 +38,6 @@
  */
 #define BIO_INLINE_VECS		4
 
-static mempool_t *bio_split_pool __read_mostly;
-
 /*
  * if you change this list, also change bvec_alloc or things will
  * break badly! cannot be bigger than what you can fit into an
@@ -273,6 +271,7 @@
 {
 	memset(bio, 0, sizeof(*bio));
 	bio->bi_flags = 1 << BIO_UPTODATE;
+	atomic_set(&bio->bi_remaining, 1);
 	atomic_set(&bio->bi_cnt, 1);
 }
 EXPORT_SYMBOL(bio_init);
@@ -295,9 +294,35 @@
 
 	memset(bio, 0, BIO_RESET_BYTES);
 	bio->bi_flags = flags|(1 << BIO_UPTODATE);
+	atomic_set(&bio->bi_remaining, 1);
 }
 EXPORT_SYMBOL(bio_reset);
 
+static void bio_chain_endio(struct bio *bio, int error)
+{
+	bio_endio(bio->bi_private, error);
+	bio_put(bio);
+}
+
+/**
+ * bio_chain - chain bio completions
+ *
+ * The caller won't have a bi_end_io called when @bio completes - instead,
+ * @parent's bi_end_io won't be called until both @parent and @bio have
+ * completed; the chained bio will also be freed when it completes.
+ *
+ * The caller must not set bi_private or bi_end_io in @bio.
+ */
+void bio_chain(struct bio *bio, struct bio *parent)
+{
+	BUG_ON(bio->bi_private || bio->bi_end_io);
+
+	bio->bi_private = parent;
+	bio->bi_end_io	= bio_chain_endio;
+	atomic_inc(&parent->bi_remaining);
+}
+EXPORT_SYMBOL(bio_chain);
+
 static void bio_alloc_rescue(struct work_struct *work)
 {
 	struct bio_set *bs = container_of(work, struct bio_set, rescue_work);
@@ -473,13 +498,13 @@
 void zero_fill_bio(struct bio *bio)
 {
 	unsigned long flags;
-	struct bio_vec *bv;
-	int i;
+	struct bio_vec bv;
+	struct bvec_iter iter;
 
-	bio_for_each_segment(bv, bio, i) {
-		char *data = bvec_kmap_irq(bv, &flags);
-		memset(data, 0, bv->bv_len);
-		flush_dcache_page(bv->bv_page);
+	bio_for_each_segment(bv, bio, iter) {
+		char *data = bvec_kmap_irq(&bv, &flags);
+		memset(data, 0, bv.bv_len);
+		flush_dcache_page(bv.bv_page);
 		bvec_kunmap_irq(data, &flags);
 	}
 }
@@ -515,51 +540,49 @@
 EXPORT_SYMBOL(bio_phys_segments);
 
 /**
- * 	__bio_clone	-	clone a bio
+ * 	__bio_clone_fast - clone a bio that shares the original bio's biovec
  * 	@bio: destination bio
  * 	@bio_src: bio to clone
  *
  *	Clone a &bio. Caller will own the returned bio, but not
  *	the actual data it points to. Reference count of returned
  * 	bio will be one.
+ *
+ * 	Caller must ensure that @bio_src is not freed before @bio.
  */
-void __bio_clone(struct bio *bio, struct bio *bio_src)
+void __bio_clone_fast(struct bio *bio, struct bio *bio_src)
 {
-	memcpy(bio->bi_io_vec, bio_src->bi_io_vec,
-		bio_src->bi_max_vecs * sizeof(struct bio_vec));
+	BUG_ON(bio->bi_pool && BIO_POOL_IDX(bio) != BIO_POOL_NONE);
 
 	/*
 	 * most users will be overriding ->bi_bdev with a new target,
 	 * so we don't set nor calculate new physical/hw segment counts here
 	 */
-	bio->bi_sector = bio_src->bi_sector;
 	bio->bi_bdev = bio_src->bi_bdev;
 	bio->bi_flags |= 1 << BIO_CLONED;
 	bio->bi_rw = bio_src->bi_rw;
-	bio->bi_vcnt = bio_src->bi_vcnt;
-	bio->bi_size = bio_src->bi_size;
-	bio->bi_idx = bio_src->bi_idx;
+	bio->bi_iter = bio_src->bi_iter;
+	bio->bi_io_vec = bio_src->bi_io_vec;
 }
-EXPORT_SYMBOL(__bio_clone);
+EXPORT_SYMBOL(__bio_clone_fast);
 
 /**
- *	bio_clone_bioset -	clone a bio
+ *	bio_clone_fast - clone a bio that shares the original bio's biovec
  *	@bio: bio to clone
  *	@gfp_mask: allocation priority
  *	@bs: bio_set to allocate from
  *
- * 	Like __bio_clone, only also allocates the returned bio
+ * 	Like __bio_clone_fast, only also allocates the returned bio
  */
-struct bio *bio_clone_bioset(struct bio *bio, gfp_t gfp_mask,
-			     struct bio_set *bs)
+struct bio *bio_clone_fast(struct bio *bio, gfp_t gfp_mask, struct bio_set *bs)
 {
 	struct bio *b;
 
-	b = bio_alloc_bioset(gfp_mask, bio->bi_max_vecs, bs);
+	b = bio_alloc_bioset(gfp_mask, 0, bs);
 	if (!b)
 		return NULL;
 
-	__bio_clone(b, bio);
+	__bio_clone_fast(b, bio);
 
 	if (bio_integrity(bio)) {
 		int ret;
@@ -574,6 +597,74 @@
 
 	return b;
 }
+EXPORT_SYMBOL(bio_clone_fast);
+
+/**
+ * 	bio_clone_bioset - clone a bio
+ * 	@bio_src: bio to clone
+ *	@gfp_mask: allocation priority
+ *	@bs: bio_set to allocate from
+ *
+ *	Clone bio. Caller will own the returned bio, but not the actual data it
+ *	points to. Reference count of returned bio will be one.
+ */
+struct bio *bio_clone_bioset(struct bio *bio_src, gfp_t gfp_mask,
+			     struct bio_set *bs)
+{
+	unsigned nr_iovecs = 0;
+	struct bvec_iter iter;
+	struct bio_vec bv;
+	struct bio *bio;
+
+	/*
+	 * Pre immutable biovecs, __bio_clone() used to just do a memcpy from
+	 * bio_src->bi_io_vec to bio->bi_io_vec.
+	 *
+	 * We can't do that anymore, because:
+	 *
+	 *  - The point of cloning the biovec is to produce a bio with a biovec
+	 *    the caller can modify: bi_idx and bi_bvec_done should be 0.
+	 *
+	 *  - The original bio could've had more than BIO_MAX_PAGES biovecs; if
+	 *    we tried to clone the whole thing bio_alloc_bioset() would fail.
+	 *    But the clone should succeed as long as the number of biovecs we
+	 *    actually need to allocate is fewer than BIO_MAX_PAGES.
+	 *
+	 *  - Lastly, bi_vcnt should not be looked at or relied upon by code
+	 *    that does not own the bio - reason being drivers don't use it for
+	 *    iterating over the biovec anymore, so expecting it to be kept up
+	 *    to date (i.e. for clones that share the parent biovec) is just
+	 *    asking for trouble and would force extra work on
+	 *    __bio_clone_fast() anyways.
+	 */
+
+	bio_for_each_segment(bv, bio_src, iter)
+		nr_iovecs++;
+
+	bio = bio_alloc_bioset(gfp_mask, nr_iovecs, bs);
+	if (!bio)
+		return NULL;
+
+	bio->bi_bdev		= bio_src->bi_bdev;
+	bio->bi_rw		= bio_src->bi_rw;
+	bio->bi_iter.bi_sector	= bio_src->bi_iter.bi_sector;
+	bio->bi_iter.bi_size	= bio_src->bi_iter.bi_size;
+
+	bio_for_each_segment(bv, bio_src, iter)
+		bio->bi_io_vec[bio->bi_vcnt++] = bv;
+
+	if (bio_integrity(bio_src)) {
+		int ret;
+
+		ret = bio_integrity_clone(bio, bio_src, gfp_mask);
+		if (ret < 0) {
+			bio_put(bio);
+			return NULL;
+		}
+	}
+
+	return bio;
+}
 EXPORT_SYMBOL(bio_clone_bioset);
 
 /**
@@ -612,7 +703,7 @@
 	if (unlikely(bio_flagged(bio, BIO_CLONED)))
 		return 0;
 
-	if (((bio->bi_size + len) >> 9) > max_sectors)
+	if (((bio->bi_iter.bi_size + len) >> 9) > max_sectors)
 		return 0;
 
 	/*
@@ -635,8 +726,9 @@
 					   simulate merging updated prev_bvec
 					   as new bvec. */
 					.bi_bdev = bio->bi_bdev,
-					.bi_sector = bio->bi_sector,
-					.bi_size = bio->bi_size - prev_bv_len,
+					.bi_sector = bio->bi_iter.bi_sector,
+					.bi_size = bio->bi_iter.bi_size -
+						prev_bv_len,
 					.bi_rw = bio->bi_rw,
 				};
 
@@ -684,8 +776,8 @@
 	if (q->merge_bvec_fn) {
 		struct bvec_merge_data bvm = {
 			.bi_bdev = bio->bi_bdev,
-			.bi_sector = bio->bi_sector,
-			.bi_size = bio->bi_size,
+			.bi_sector = bio->bi_iter.bi_sector,
+			.bi_size = bio->bi_iter.bi_size,
 			.bi_rw = bio->bi_rw,
 		};
 
@@ -708,7 +800,7 @@
 	bio->bi_vcnt++;
 	bio->bi_phys_segments++;
  done:
-	bio->bi_size += len;
+	bio->bi_iter.bi_size += len;
 	return len;
 }
 
@@ -807,28 +899,7 @@
 	if (bio_integrity(bio))
 		bio_integrity_advance(bio, bytes);
 
-	bio->bi_sector += bytes >> 9;
-	bio->bi_size -= bytes;
-
-	if (bio->bi_rw & BIO_NO_ADVANCE_ITER_MASK)
-		return;
-
-	while (bytes) {
-		if (unlikely(bio->bi_idx >= bio->bi_vcnt)) {
-			WARN_ONCE(1, "bio idx %d >= vcnt %d\n",
-				  bio->bi_idx, bio->bi_vcnt);
-			break;
-		}
-
-		if (bytes >= bio_iovec(bio)->bv_len) {
-			bytes -= bio_iovec(bio)->bv_len;
-			bio->bi_idx++;
-		} else {
-			bio_iovec(bio)->bv_len -= bytes;
-			bio_iovec(bio)->bv_offset += bytes;
-			bytes = 0;
-		}
-	}
+	bio_advance_iter(bio, &bio->bi_iter, bytes);
 }
 EXPORT_SYMBOL(bio_advance);
 
@@ -874,117 +945,80 @@
  */
 void bio_copy_data(struct bio *dst, struct bio *src)
 {
-	struct bio_vec *src_bv, *dst_bv;
-	unsigned src_offset, dst_offset, bytes;
+	struct bvec_iter src_iter, dst_iter;
+	struct bio_vec src_bv, dst_bv;
 	void *src_p, *dst_p;
+	unsigned bytes;
 
-	src_bv = bio_iovec(src);
-	dst_bv = bio_iovec(dst);
-
-	src_offset = src_bv->bv_offset;
-	dst_offset = dst_bv->bv_offset;
+	src_iter = src->bi_iter;
+	dst_iter = dst->bi_iter;
 
 	while (1) {
-		if (src_offset == src_bv->bv_offset + src_bv->bv_len) {
-			src_bv++;
-			if (src_bv == bio_iovec_idx(src, src->bi_vcnt)) {
-				src = src->bi_next;
-				if (!src)
-					break;
+		if (!src_iter.bi_size) {
+			src = src->bi_next;
+			if (!src)
+				break;
 
-				src_bv = bio_iovec(src);
-			}
-
-			src_offset = src_bv->bv_offset;
+			src_iter = src->bi_iter;
 		}
 
-		if (dst_offset == dst_bv->bv_offset + dst_bv->bv_len) {
-			dst_bv++;
-			if (dst_bv == bio_iovec_idx(dst, dst->bi_vcnt)) {
-				dst = dst->bi_next;
-				if (!dst)
-					break;
+		if (!dst_iter.bi_size) {
+			dst = dst->bi_next;
+			if (!dst)
+				break;
 
-				dst_bv = bio_iovec(dst);
-			}
-
-			dst_offset = dst_bv->bv_offset;
+			dst_iter = dst->bi_iter;
 		}
 
-		bytes = min(dst_bv->bv_offset + dst_bv->bv_len - dst_offset,
-			    src_bv->bv_offset + src_bv->bv_len - src_offset);
+		src_bv = bio_iter_iovec(src, src_iter);
+		dst_bv = bio_iter_iovec(dst, dst_iter);
 
-		src_p = kmap_atomic(src_bv->bv_page);
-		dst_p = kmap_atomic(dst_bv->bv_page);
+		bytes = min(src_bv.bv_len, dst_bv.bv_len);
 
-		memcpy(dst_p + dst_offset,
-		       src_p + src_offset,
+		src_p = kmap_atomic(src_bv.bv_page);
+		dst_p = kmap_atomic(dst_bv.bv_page);
+
+		memcpy(dst_p + dst_bv.bv_offset,
+		       src_p + src_bv.bv_offset,
 		       bytes);
 
 		kunmap_atomic(dst_p);
 		kunmap_atomic(src_p);
 
-		src_offset += bytes;
-		dst_offset += bytes;
+		bio_advance_iter(src, &src_iter, bytes);
+		bio_advance_iter(dst, &dst_iter, bytes);
 	}
 }
 EXPORT_SYMBOL(bio_copy_data);
 
 struct bio_map_data {
-	struct bio_vec *iovecs;
-	struct sg_iovec *sgvecs;
 	int nr_sgvecs;
 	int is_our_pages;
+	struct sg_iovec sgvecs[];
 };
 
 static void bio_set_map_data(struct bio_map_data *bmd, struct bio *bio,
 			     struct sg_iovec *iov, int iov_count,
 			     int is_our_pages)
 {
-	memcpy(bmd->iovecs, bio->bi_io_vec, sizeof(struct bio_vec) * bio->bi_vcnt);
 	memcpy(bmd->sgvecs, iov, sizeof(struct sg_iovec) * iov_count);
 	bmd->nr_sgvecs = iov_count;
 	bmd->is_our_pages = is_our_pages;
 	bio->bi_private = bmd;
 }
 
-static void bio_free_map_data(struct bio_map_data *bmd)
-{
-	kfree(bmd->iovecs);
-	kfree(bmd->sgvecs);
-	kfree(bmd);
-}
-
 static struct bio_map_data *bio_alloc_map_data(int nr_segs,
 					       unsigned int iov_count,
 					       gfp_t gfp_mask)
 {
-	struct bio_map_data *bmd;
-
 	if (iov_count > UIO_MAXIOV)
 		return NULL;
 
-	bmd = kmalloc(sizeof(*bmd), gfp_mask);
-	if (!bmd)
-		return NULL;
-
-	bmd->iovecs = kmalloc(sizeof(struct bio_vec) * nr_segs, gfp_mask);
-	if (!bmd->iovecs) {
-		kfree(bmd);
-		return NULL;
-	}
-
-	bmd->sgvecs = kmalloc(sizeof(struct sg_iovec) * iov_count, gfp_mask);
-	if (bmd->sgvecs)
-		return bmd;
-
-	kfree(bmd->iovecs);
-	kfree(bmd);
-	return NULL;
+	return kmalloc(sizeof(struct bio_map_data) +
+		       sizeof(struct sg_iovec) * iov_count, gfp_mask);
 }
 
-static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs,
-			  struct sg_iovec *iov, int iov_count,
+static int __bio_copy_iov(struct bio *bio, struct sg_iovec *iov, int iov_count,
 			  int to_user, int from_user, int do_free_page)
 {
 	int ret = 0, i;
@@ -994,7 +1028,7 @@
 
 	bio_for_each_segment_all(bvec, bio, i) {
 		char *bv_addr = page_address(bvec->bv_page);
-		unsigned int bv_len = iovecs[i].bv_len;
+		unsigned int bv_len = bvec->bv_len;
 
 		while (bv_len && iov_idx < iov_count) {
 			unsigned int bytes;
@@ -1054,14 +1088,14 @@
 		 * don't copy into a random user address space, just free.
 		 */
 		if (current->mm)
-			ret = __bio_copy_iov(bio, bmd->iovecs, bmd->sgvecs,
-					     bmd->nr_sgvecs, bio_data_dir(bio) == READ,
+			ret = __bio_copy_iov(bio, bmd->sgvecs, bmd->nr_sgvecs,
+					     bio_data_dir(bio) == READ,
 					     0, bmd->is_our_pages);
 		else if (bmd->is_our_pages)
 			bio_for_each_segment_all(bvec, bio, i)
 				__free_page(bvec->bv_page);
 	}
-	bio_free_map_data(bmd);
+	kfree(bmd);
 	bio_put(bio);
 	return ret;
 }
@@ -1175,7 +1209,7 @@
 	 */
 	if ((!write_to_vm && (!map_data || !map_data->null_mapped)) ||
 	    (map_data && map_data->from_user)) {
-		ret = __bio_copy_iov(bio, bio->bi_io_vec, iov, iov_count, 0, 1, 0);
+		ret = __bio_copy_iov(bio, iov, iov_count, 0, 1, 0);
 		if (ret)
 			goto cleanup;
 	}
@@ -1189,7 +1223,7 @@
 
 	bio_put(bio);
 out_bmd:
-	bio_free_map_data(bmd);
+	kfree(bmd);
 	return ERR_PTR(ret);
 }
 
@@ -1485,7 +1519,7 @@
 	if (IS_ERR(bio))
 		return bio;
 
-	if (bio->bi_size == len)
+	if (bio->bi_iter.bi_size == len)
 		return bio;
 
 	/*
@@ -1506,16 +1540,15 @@
 
 	bio_for_each_segment_all(bvec, bio, i) {
 		char *addr = page_address(bvec->bv_page);
-		int len = bmd->iovecs[i].bv_len;
 
 		if (read)
-			memcpy(p, addr, len);
+			memcpy(p, addr, bvec->bv_len);
 
 		__free_page(bvec->bv_page);
-		p += len;
+		p += bvec->bv_len;
 	}
 
-	bio_free_map_data(bmd);
+	kfree(bmd);
 	bio_put(bio);
 }
 
@@ -1686,11 +1719,11 @@
 #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
 void bio_flush_dcache_pages(struct bio *bi)
 {
-	int i;
-	struct bio_vec *bvec;
+	struct bio_vec bvec;
+	struct bvec_iter iter;
 
-	bio_for_each_segment(bvec, bi, i)
-		flush_dcache_page(bvec->bv_page);
+	bio_for_each_segment(bvec, bi, iter)
+		flush_dcache_page(bvec.bv_page);
 }
 EXPORT_SYMBOL(bio_flush_dcache_pages);
 #endif
@@ -1711,96 +1744,86 @@
  **/
 void bio_endio(struct bio *bio, int error)
 {
-	if (error)
-		clear_bit(BIO_UPTODATE, &bio->bi_flags);
-	else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
-		error = -EIO;
+	while (bio) {
+		BUG_ON(atomic_read(&bio->bi_remaining) <= 0);
 
-	if (bio->bi_end_io)
-		bio->bi_end_io(bio, error);
+		if (error)
+			clear_bit(BIO_UPTODATE, &bio->bi_flags);
+		else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
+			error = -EIO;
+
+		if (!atomic_dec_and_test(&bio->bi_remaining))
+			return;
+
+		/*
+		 * Need to have a real endio function for chained bios,
+		 * otherwise various corner cases will break (like stacking
+		 * block devices that save/restore bi_end_io) - however, we want
+		 * to avoid unbounded recursion and blowing the stack. Tail call
+		 * optimization would handle this, but compiling with frame
+		 * pointers also disables gcc's sibling call optimization.
+		 */
+		if (bio->bi_end_io == bio_chain_endio) {
+			struct bio *parent = bio->bi_private;
+			bio_put(bio);
+			bio = parent;
+		} else {
+			if (bio->bi_end_io)
+				bio->bi_end_io(bio, error);
+			bio = NULL;
+		}
+	}
 }
 EXPORT_SYMBOL(bio_endio);
 
-void bio_pair_release(struct bio_pair *bp)
+/**
+ * bio_endio_nodec - end I/O on a bio, without decrementing bi_remaining
+ * @bio:	bio
+ * @error:	error, if any
+ *
+ * For code that has saved and restored bi_end_io; thing hard before using this
+ * function, probably you should've cloned the entire bio.
+ **/
+void bio_endio_nodec(struct bio *bio, int error)
 {
-	if (atomic_dec_and_test(&bp->cnt)) {
-		struct bio *master = bp->bio1.bi_private;
-
-		bio_endio(master, bp->error);
-		mempool_free(bp, bp->bio2.bi_private);
-	}
+	atomic_inc(&bio->bi_remaining);
+	bio_endio(bio, error);
 }
-EXPORT_SYMBOL(bio_pair_release);
+EXPORT_SYMBOL(bio_endio_nodec);
 
-static void bio_pair_end_1(struct bio *bi, int err)
-{
-	struct bio_pair *bp = container_of(bi, struct bio_pair, bio1);
-
-	if (err)
-		bp->error = err;
-
-	bio_pair_release(bp);
-}
-
-static void bio_pair_end_2(struct bio *bi, int err)
-{
-	struct bio_pair *bp = container_of(bi, struct bio_pair, bio2);
-
-	if (err)
-		bp->error = err;
-
-	bio_pair_release(bp);
-}
-
-/*
- * split a bio - only worry about a bio with a single page in its iovec
+/**
+ * bio_split - split a bio
+ * @bio:	bio to split
+ * @sectors:	number of sectors to split from the front of @bio
+ * @gfp:	gfp mask
+ * @bs:		bio set to allocate from
+ *
+ * Allocates and returns a new bio which represents @sectors from the start of
+ * @bio, and updates @bio to represent the remaining sectors.
+ *
+ * The newly allocated bio will point to @bio's bi_io_vec; it is the caller's
+ * responsibility to ensure that @bio is not freed before the split.
  */
-struct bio_pair *bio_split(struct bio *bi, int first_sectors)
+struct bio *bio_split(struct bio *bio, int sectors,
+		      gfp_t gfp, struct bio_set *bs)
 {
-	struct bio_pair *bp = mempool_alloc(bio_split_pool, GFP_NOIO);
+	struct bio *split = NULL;
 
-	if (!bp)
-		return bp;
+	BUG_ON(sectors <= 0);
+	BUG_ON(sectors >= bio_sectors(bio));
 
-	trace_block_split(bdev_get_queue(bi->bi_bdev), bi,
-				bi->bi_sector + first_sectors);
+	split = bio_clone_fast(bio, gfp, bs);
+	if (!split)
+		return NULL;
 
-	BUG_ON(bio_segments(bi) > 1);
-	atomic_set(&bp->cnt, 3);
-	bp->error = 0;
-	bp->bio1 = *bi;
-	bp->bio2 = *bi;
-	bp->bio2.bi_sector += first_sectors;
-	bp->bio2.bi_size -= first_sectors << 9;
-	bp->bio1.bi_size = first_sectors << 9;
+	split->bi_iter.bi_size = sectors << 9;
 
-	if (bi->bi_vcnt != 0) {
-		bp->bv1 = *bio_iovec(bi);
-		bp->bv2 = *bio_iovec(bi);
+	if (bio_integrity(split))
+		bio_integrity_trim(split, 0, sectors);
 
-		if (bio_is_rw(bi)) {
-			bp->bv2.bv_offset += first_sectors << 9;
-			bp->bv2.bv_len -= first_sectors << 9;
-			bp->bv1.bv_len = first_sectors << 9;
-		}
+	bio_advance(bio, split->bi_iter.bi_size);
 
-		bp->bio1.bi_io_vec = &bp->bv1;
-		bp->bio2.bi_io_vec = &bp->bv2;
-
-		bp->bio1.bi_max_vecs = 1;
-		bp->bio2.bi_max_vecs = 1;
-	}
-
-	bp->bio1.bi_end_io = bio_pair_end_1;
-	bp->bio2.bi_end_io = bio_pair_end_2;
-
-	bp->bio1.bi_private = bi;
-	bp->bio2.bi_private = bio_split_pool;
-
-	if (bio_integrity(bi))
-		bio_integrity_split(bi, bp, first_sectors);
-
-	return bp;
+	return split;
 }
 EXPORT_SYMBOL(bio_split);
 
@@ -1814,80 +1837,20 @@
 {
 	/* 'bio' is a cloned bio which we need to trim to match
 	 * the given offset and size.
-	 * This requires adjusting bi_sector, bi_size, and bi_io_vec
 	 */
-	int i;
-	struct bio_vec *bvec;
-	int sofar = 0;
 
 	size <<= 9;
-	if (offset == 0 && size == bio->bi_size)
+	if (offset == 0 && size == bio->bi_iter.bi_size)
 		return;
 
 	clear_bit(BIO_SEG_VALID, &bio->bi_flags);
 
 	bio_advance(bio, offset << 9);
 
-	bio->bi_size = size;
-
-	/* avoid any complications with bi_idx being non-zero*/
-	if (bio->bi_idx) {
-		memmove(bio->bi_io_vec, bio->bi_io_vec+bio->bi_idx,
-			(bio->bi_vcnt - bio->bi_idx) * sizeof(struct bio_vec));
-		bio->bi_vcnt -= bio->bi_idx;
-		bio->bi_idx = 0;
-	}
-	/* Make sure vcnt and last bv are not too big */
-	bio_for_each_segment(bvec, bio, i) {
-		if (sofar + bvec->bv_len > size)
-			bvec->bv_len = size - sofar;
-		if (bvec->bv_len == 0) {
-			bio->bi_vcnt = i;
-			break;
-		}
-		sofar += bvec->bv_len;
-	}
+	bio->bi_iter.bi_size = size;
 }
 EXPORT_SYMBOL_GPL(bio_trim);
 
-/**
- *      bio_sector_offset - Find hardware sector offset in bio
- *      @bio:           bio to inspect
- *      @index:         bio_vec index
- *      @offset:        offset in bv_page
- *
- *      Return the number of hardware sectors between beginning of bio
- *      and an end point indicated by a bio_vec index and an offset
- *      within that vector's page.
- */
-sector_t bio_sector_offset(struct bio *bio, unsigned short index,
-			   unsigned int offset)
-{
-	unsigned int sector_sz;
-	struct bio_vec *bv;
-	sector_t sectors;
-	int i;
-
-	sector_sz = queue_logical_block_size(bio->bi_bdev->bd_disk->queue);
-	sectors = 0;
-
-	if (index >= bio->bi_idx)
-		index = bio->bi_vcnt - 1;
-
-	bio_for_each_segment_all(bv, bio, i) {
-		if (i == index) {
-			if (offset > bv->bv_offset)
-				sectors += (offset - bv->bv_offset) / sector_sz;
-			break;
-		}
-
-		sectors += bv->bv_len / sector_sz;
-	}
-
-	return sectors;
-}
-EXPORT_SYMBOL(bio_sector_offset);
-
 /*
  * create memory pools for biovec's in a bio_set.
  * use the global biovec slabs created for general use.
@@ -2065,11 +2028,6 @@
 	if (bioset_integrity_create(fs_bio_set, BIO_POOL_SIZE))
 		panic("bio: can't create integrity pool\n");
 
-	bio_split_pool = mempool_create_kmalloc_pool(BIO_SPLIT_ENTRIES,
-						     sizeof(struct bio_pair));
-	if (!bio_split_pool)
-		panic("bio: can't create split pool\n");
-
 	return 0;
 }
 subsys_initcall(init_bio);
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
index 131d828..cb05e1c 100644
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c
@@ -1695,7 +1695,7 @@
 			return -1;
 		}
 		bio->bi_bdev = block_ctx->dev->bdev;
-		bio->bi_sector = dev_bytenr >> 9;
+		bio->bi_iter.bi_sector = dev_bytenr >> 9;
 
 		for (j = i; j < num_pages; j++) {
 			ret = bio_add_page(bio, block_ctx->pagev[j],
@@ -3013,7 +3013,7 @@
 		int bio_is_patched;
 		char **mapped_datav;
 
-		dev_bytenr = 512 * bio->bi_sector;
+		dev_bytenr = 512 * bio->bi_iter.bi_sector;
 		bio_is_patched = 0;
 		if (dev_state->state->print_mask &
 		    BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
@@ -3021,8 +3021,8 @@
 			       "submit_bio(rw=0x%x, bi_vcnt=%u,"
 			       " bi_sector=%llu (bytenr %llu), bi_bdev=%p)\n",
 			       rw, bio->bi_vcnt,
-			       (unsigned long long)bio->bi_sector, dev_bytenr,
-			       bio->bi_bdev);
+			       (unsigned long long)bio->bi_iter.bi_sector,
+			       dev_bytenr, bio->bi_bdev);
 
 		mapped_datav = kmalloc(sizeof(*mapped_datav) * bio->bi_vcnt,
 				       GFP_NOFS);
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 1499b27..f5cdeb4 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -172,7 +172,8 @@
 		goto out;
 
 	inode = cb->inode;
-	ret = check_compressed_csum(inode, cb, (u64)bio->bi_sector << 9);
+	ret = check_compressed_csum(inode, cb,
+				    (u64)bio->bi_iter.bi_sector << 9);
 	if (ret)
 		goto csum_failed;
 
@@ -201,18 +202,16 @@
 	if (cb->errors) {
 		bio_io_error(cb->orig_bio);
 	} else {
-		int bio_index = 0;
-		struct bio_vec *bvec = cb->orig_bio->bi_io_vec;
+		int i;
+		struct bio_vec *bvec;
 
 		/*
 		 * we have verified the checksum already, set page
 		 * checked so the end_io handlers know about it
 		 */
-		while (bio_index < cb->orig_bio->bi_vcnt) {
+		bio_for_each_segment_all(bvec, cb->orig_bio, i)
 			SetPageChecked(bvec->bv_page);
-			bvec++;
-			bio_index++;
-		}
+
 		bio_endio(cb->orig_bio, 0);
 	}
 
@@ -372,7 +371,7 @@
 	for (pg_index = 0; pg_index < cb->nr_pages; pg_index++) {
 		page = compressed_pages[pg_index];
 		page->mapping = inode->i_mapping;
-		if (bio->bi_size)
+		if (bio->bi_iter.bi_size)
 			ret = io_tree->ops->merge_bio_hook(WRITE, page, 0,
 							   PAGE_CACHE_SIZE,
 							   bio, 0);
@@ -506,7 +505,7 @@
 
 		if (!em || last_offset < em->start ||
 		    (last_offset + PAGE_CACHE_SIZE > extent_map_end(em)) ||
-		    (em->block_start >> 9) != cb->orig_bio->bi_sector) {
+		    (em->block_start >> 9) != cb->orig_bio->bi_iter.bi_sector) {
 			free_extent_map(em);
 			unlock_extent(tree, last_offset, end);
 			unlock_page(page);
@@ -552,7 +551,7 @@
  * in it.  We don't actually do IO on those pages but allocate new ones
  * to hold the compressed pages on disk.
  *
- * bio->bi_sector points to the compressed extent on disk
+ * bio->bi_iter.bi_sector points to the compressed extent on disk
  * bio->bi_io_vec points to all of the inode pages
  * bio->bi_vcnt is a count of pages
  *
@@ -573,7 +572,7 @@
 	struct page *page;
 	struct block_device *bdev;
 	struct bio *comp_bio;
-	u64 cur_disk_byte = (u64)bio->bi_sector << 9;
+	u64 cur_disk_byte = (u64)bio->bi_iter.bi_sector << 9;
 	u64 em_len;
 	u64 em_start;
 	struct extent_map *em;
@@ -659,7 +658,7 @@
 		page->mapping = inode->i_mapping;
 		page->index = em_start >> PAGE_CACHE_SHIFT;
 
-		if (comp_bio->bi_size)
+		if (comp_bio->bi_iter.bi_size)
 			ret = tree->ops->merge_bio_hook(READ, page, 0,
 							PAGE_CACHE_SIZE,
 							comp_bio, 0);
@@ -687,8 +686,8 @@
 							comp_bio, sums);
 				BUG_ON(ret); /* -ENOMEM */
 			}
-			sums += (comp_bio->bi_size + root->sectorsize - 1) /
-				root->sectorsize;
+			sums += (comp_bio->bi_iter.bi_size +
+				 root->sectorsize - 1) / root->sectorsize;
 
 			ret = btrfs_map_bio(root, READ, comp_bio,
 					    mirror_num, 0);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 8072cfa..e71039e 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -842,20 +842,17 @@
 
 static int btree_csum_one_bio(struct bio *bio)
 {
-	struct bio_vec *bvec = bio->bi_io_vec;
-	int bio_index = 0;
+	struct bio_vec *bvec;
 	struct btrfs_root *root;
-	int ret = 0;
+	int i, ret = 0;
 
-	WARN_ON(bio->bi_vcnt <= 0);
-	while (bio_index < bio->bi_vcnt) {
+	bio_for_each_segment_all(bvec, bio, i) {
 		root = BTRFS_I(bvec->bv_page->mapping->host)->root;
 		ret = csum_dirty_buffer(root, bvec->bv_page);
 		if (ret)
 			break;
-		bio_index++;
-		bvec++;
 	}
+
 	return ret;
 }
 
@@ -1695,7 +1692,7 @@
 	bio->bi_private = end_io_wq->private;
 	bio->bi_end_io = end_io_wq->end_io;
 	kfree(end_io_wq);
-	bio_endio(bio, error);
+	bio_endio_nodec(bio, error);
 }
 
 static int cleaner_kthread(void *arg)
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index ff43802..bcb6f1b 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1984,7 +1984,7 @@
 	bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
 	if (!bio)
 		return -EIO;
-	bio->bi_size = 0;
+	bio->bi_iter.bi_size = 0;
 	map_length = length;
 
 	ret = btrfs_map_block(fs_info, WRITE, logical,
@@ -1995,7 +1995,7 @@
 	}
 	BUG_ON(mirror_num != bbio->mirror_num);
 	sector = bbio->stripes[mirror_num-1].physical >> 9;
-	bio->bi_sector = sector;
+	bio->bi_iter.bi_sector = sector;
 	dev = bbio->stripes[mirror_num-1].dev;
 	kfree(bbio);
 	if (!dev || !dev->bdev || !dev->writeable) {
@@ -2268,9 +2268,9 @@
 		return -EIO;
 	}
 	bio->bi_end_io = failed_bio->bi_end_io;
-	bio->bi_sector = failrec->logical >> 9;
+	bio->bi_iter.bi_sector = failrec->logical >> 9;
 	bio->bi_bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
-	bio->bi_size = 0;
+	bio->bi_iter.bi_size = 0;
 
 	btrfs_failed_bio = btrfs_io_bio(failed_bio);
 	if (btrfs_failed_bio->csum) {
@@ -2332,12 +2332,13 @@
  */
 static void end_bio_extent_writepage(struct bio *bio, int err)
 {
-	struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
+	struct bio_vec *bvec;
 	struct extent_io_tree *tree;
 	u64 start;
 	u64 end;
+	int i;
 
-	do {
+	bio_for_each_segment_all(bvec, bio, i) {
 		struct page *page = bvec->bv_page;
 		tree = &BTRFS_I(page->mapping->host)->io_tree;
 
@@ -2355,14 +2356,11 @@
 		start = page_offset(page);
 		end = start + bvec->bv_offset + bvec->bv_len - 1;
 
-		if (--bvec >= bio->bi_io_vec)
-			prefetchw(&bvec->bv_page->flags);
-
 		if (end_extent_writepage(page, err, start, end))
 			continue;
 
 		end_page_writeback(page);
-	} while (bvec >= bio->bi_io_vec);
+	}
 
 	bio_put(bio);
 }
@@ -2392,9 +2390,8 @@
  */
 static void end_bio_extent_readpage(struct bio *bio, int err)
 {
+	struct bio_vec *bvec;
 	int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
-	struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1;
-	struct bio_vec *bvec = bio->bi_io_vec;
 	struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
 	struct extent_io_tree *tree;
 	u64 offset = 0;
@@ -2405,16 +2402,17 @@
 	u64 extent_len = 0;
 	int mirror;
 	int ret;
+	int i;
 
 	if (err)
 		uptodate = 0;
 
-	do {
+	bio_for_each_segment_all(bvec, bio, i) {
 		struct page *page = bvec->bv_page;
 		struct inode *inode = page->mapping->host;
 
 		pr_debug("end_bio_extent_readpage: bi_sector=%llu, err=%d, "
-			 "mirror=%lu\n", (u64)bio->bi_sector, err,
+			 "mirror=%lu\n", (u64)bio->bi_iter.bi_sector, err,
 			 io_bio->mirror_num);
 		tree = &BTRFS_I(inode)->io_tree;
 
@@ -2433,9 +2431,6 @@
 		end = start + bvec->bv_offset + bvec->bv_len - 1;
 		len = bvec->bv_len;
 
-		if (++bvec <= bvec_end)
-			prefetchw(&bvec->bv_page->flags);
-
 		mirror = io_bio->mirror_num;
 		if (likely(uptodate && tree->ops &&
 			   tree->ops->readpage_end_io_hook)) {
@@ -2516,7 +2511,7 @@
 			extent_start = start;
 			extent_len = end + 1 - start;
 		}
-	} while (bvec <= bvec_end);
+	}
 
 	if (extent_len)
 		endio_readpage_release_extent(tree, extent_start, extent_len,
@@ -2547,9 +2542,8 @@
 	}
 
 	if (bio) {
-		bio->bi_size = 0;
 		bio->bi_bdev = bdev;
-		bio->bi_sector = first_sector;
+		bio->bi_iter.bi_sector = first_sector;
 		btrfs_bio = btrfs_io_bio(bio);
 		btrfs_bio->csum = NULL;
 		btrfs_bio->csum_allocated = NULL;
@@ -2643,7 +2637,7 @@
 	if (bio_ret && *bio_ret) {
 		bio = *bio_ret;
 		if (old_compressed)
-			contig = bio->bi_sector == sector;
+			contig = bio->bi_iter.bi_sector == sector;
 		else
 			contig = bio_end_sector(bio) == sector;
 
@@ -3410,20 +3404,18 @@
 
 static void end_bio_extent_buffer_writepage(struct bio *bio, int err)
 {
-	int uptodate = err == 0;
-	struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
+	struct bio_vec *bvec;
 	struct extent_buffer *eb;
-	int done;
+	int i, done;
 
-	do {
+	bio_for_each_segment_all(bvec, bio, i) {
 		struct page *page = bvec->bv_page;
 
-		bvec--;
 		eb = (struct extent_buffer *)page->private;
 		BUG_ON(!eb);
 		done = atomic_dec_and_test(&eb->io_pages);
 
-		if (!uptodate || test_bit(EXTENT_BUFFER_IOERR, &eb->bflags)) {
+		if (err || test_bit(EXTENT_BUFFER_IOERR, &eb->bflags)) {
 			set_bit(EXTENT_BUFFER_IOERR, &eb->bflags);
 			ClearPageUptodate(page);
 			SetPageError(page);
@@ -3435,10 +3427,9 @@
 			continue;
 
 		end_extent_buffer_writeback(eb);
-	} while (bvec >= bio->bi_io_vec);
+	}
 
 	bio_put(bio);
-
 }
 
 static int write_one_eb(struct extent_buffer *eb,
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index 6f38488..84a46a4 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -182,7 +182,7 @@
 	if (!path)
 		return -ENOMEM;
 
-	nblocks = bio->bi_size >> inode->i_sb->s_blocksize_bits;
+	nblocks = bio->bi_iter.bi_size >> inode->i_sb->s_blocksize_bits;
 	if (!dst) {
 		if (nblocks * csum_size > BTRFS_BIO_INLINE_CSUM_SIZE) {
 			btrfs_bio->csum_allocated = kmalloc(nblocks * csum_size,
@@ -201,7 +201,7 @@
 		csum = (u8 *)dst;
 	}
 
-	if (bio->bi_size > PAGE_CACHE_SIZE * 8)
+	if (bio->bi_iter.bi_size > PAGE_CACHE_SIZE * 8)
 		path->reada = 2;
 
 	WARN_ON(bio->bi_vcnt <= 0);
@@ -217,7 +217,7 @@
 		path->skip_locking = 1;
 	}
 
-	disk_bytenr = (u64)bio->bi_sector << 9;
+	disk_bytenr = (u64)bio->bi_iter.bi_sector << 9;
 	if (dio)
 		offset = logical_offset;
 	while (bio_index < bio->bi_vcnt) {
@@ -302,7 +302,7 @@
 			      struct btrfs_dio_private *dip, struct bio *bio,
 			      u64 offset)
 {
-	int len = (bio->bi_sector << 9) - dip->disk_bytenr;
+	int len = (bio->bi_iter.bi_sector << 9) - dip->disk_bytenr;
 	u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
 	int ret;
 
@@ -447,11 +447,12 @@
 	u64 offset;
 
 	WARN_ON(bio->bi_vcnt <= 0);
-	sums = kzalloc(btrfs_ordered_sum_size(root, bio->bi_size), GFP_NOFS);
+	sums = kzalloc(btrfs_ordered_sum_size(root, bio->bi_iter.bi_size),
+		       GFP_NOFS);
 	if (!sums)
 		return -ENOMEM;
 
-	sums->len = bio->bi_size;
+	sums->len = bio->bi_iter.bi_size;
 	INIT_LIST_HEAD(&sums->list);
 
 	if (contig)
@@ -461,7 +462,7 @@
 
 	ordered = btrfs_lookup_ordered_extent(inode, offset);
 	BUG_ON(!ordered); /* Logic error */
-	sums->bytenr = (u64)bio->bi_sector << 9;
+	sums->bytenr = (u64)bio->bi_iter.bi_sector << 9;
 	index = 0;
 
 	while (bio_index < bio->bi_vcnt) {
@@ -476,7 +477,7 @@
 			btrfs_add_ordered_sum(inode, ordered, sums);
 			btrfs_put_ordered_extent(ordered);
 
-			bytes_left = bio->bi_size - total_bytes;
+			bytes_left = bio->bi_iter.bi_size - total_bytes;
 
 			sums = kzalloc(btrfs_ordered_sum_size(root, bytes_left),
 				       GFP_NOFS);
@@ -484,7 +485,7 @@
 			sums->len = bytes_left;
 			ordered = btrfs_lookup_ordered_extent(inode, offset);
 			BUG_ON(!ordered); /* Logic error */
-			sums->bytenr = ((u64)bio->bi_sector << 9) +
+			sums->bytenr = ((u64)bio->bi_iter.bi_sector << 9) +
 				       total_bytes;
 			index = 0;
 		}
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 514b291..d546d8c 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1577,7 +1577,7 @@
 			 unsigned long bio_flags)
 {
 	struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
-	u64 logical = (u64)bio->bi_sector << 9;
+	u64 logical = (u64)bio->bi_iter.bi_sector << 9;
 	u64 length = 0;
 	u64 map_length;
 	int ret;
@@ -1585,7 +1585,7 @@
 	if (bio_flags & EXTENT_BIO_COMPRESSED)
 		return 0;
 
-	length = bio->bi_size;
+	length = bio->bi_iter.bi_size;
 	map_length = length;
 	ret = btrfs_map_block(root->fs_info, rw, logical,
 			      &map_length, NULL, 0);
@@ -6783,17 +6783,16 @@
 static void btrfs_endio_direct_read(struct bio *bio, int err)
 {
 	struct btrfs_dio_private *dip = bio->bi_private;
-	struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1;
-	struct bio_vec *bvec = bio->bi_io_vec;
+	struct bio_vec *bvec;
 	struct inode *inode = dip->inode;
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 	struct bio *dio_bio;
 	u32 *csums = (u32 *)dip->csum;
-	int index = 0;
 	u64 start;
+	int i;
 
 	start = dip->logical_offset;
-	do {
+	bio_for_each_segment_all(bvec, bio, i) {
 		if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
 			struct page *page = bvec->bv_page;
 			char *kaddr;
@@ -6809,18 +6808,16 @@
 			local_irq_restore(flags);
 
 			flush_dcache_page(bvec->bv_page);
-			if (csum != csums[index]) {
+			if (csum != csums[i]) {
 				btrfs_err(root->fs_info, "csum failed ino %llu off %llu csum %u expected csum %u",
 					  btrfs_ino(inode), start, csum,
-					  csums[index]);
+					  csums[i]);
 				err = -EIO;
 			}
 		}
 
 		start += bvec->bv_len;
-		bvec++;
-		index++;
-	} while (bvec <= bvec_end);
+	}
 
 	unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset,
 		      dip->logical_offset + dip->bytes - 1);
@@ -6901,7 +6898,8 @@
 		printk(KERN_ERR "btrfs direct IO failed ino %llu rw %lu "
 		      "sector %#Lx len %u err no %d\n",
 		      btrfs_ino(dip->inode), bio->bi_rw,
-		      (unsigned long long)bio->bi_sector, bio->bi_size, err);
+		      (unsigned long long)bio->bi_iter.bi_sector,
+		      bio->bi_iter.bi_size, err);
 		dip->errors = 1;
 
 		/*
@@ -6992,7 +6990,7 @@
 	struct bio *bio;
 	struct bio *orig_bio = dip->orig_bio;
 	struct bio_vec *bvec = orig_bio->bi_io_vec;
-	u64 start_sector = orig_bio->bi_sector;
+	u64 start_sector = orig_bio->bi_iter.bi_sector;
 	u64 file_offset = dip->logical_offset;
 	u64 submit_len = 0;
 	u64 map_length;
@@ -7000,7 +6998,7 @@
 	int ret = 0;
 	int async_submit = 0;
 
-	map_length = orig_bio->bi_size;
+	map_length = orig_bio->bi_iter.bi_size;
 	ret = btrfs_map_block(root->fs_info, rw, start_sector << 9,
 			      &map_length, NULL, 0);
 	if (ret) {
@@ -7008,7 +7006,7 @@
 		return -EIO;
 	}
 
-	if (map_length >= orig_bio->bi_size) {
+	if (map_length >= orig_bio->bi_iter.bi_size) {
 		bio = orig_bio;
 		goto submit;
 	}
@@ -7060,7 +7058,7 @@
 			bio->bi_private = dip;
 			bio->bi_end_io = btrfs_end_dio_bio;
 
-			map_length = orig_bio->bi_size;
+			map_length = orig_bio->bi_iter.bi_size;
 			ret = btrfs_map_block(root->fs_info, rw,
 					      start_sector << 9,
 					      &map_length, NULL, 0);
@@ -7118,7 +7116,8 @@
 
 	if (!skip_sum && !write) {
 		csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
-		sum_len = dio_bio->bi_size >> inode->i_sb->s_blocksize_bits;
+		sum_len = dio_bio->bi_iter.bi_size >>
+			inode->i_sb->s_blocksize_bits;
 		sum_len *= csum_size;
 	} else {
 		sum_len = 0;
@@ -7133,8 +7132,8 @@
 	dip->private = dio_bio->bi_private;
 	dip->inode = inode;
 	dip->logical_offset = file_offset;
-	dip->bytes = dio_bio->bi_size;
-	dip->disk_bytenr = (u64)dio_bio->bi_sector << 9;
+	dip->bytes = dio_bio->bi_iter.bi_size;
+	dip->disk_bytenr = (u64)dio_bio->bi_iter.bi_sector << 9;
 	io_bio->bi_private = dip;
 	dip->errors = 0;
 	dip->orig_bio = io_bio;
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index 24ac218..9af0b25 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -1032,8 +1032,8 @@
 
 	/* see if we can add this page onto our existing bio */
 	if (last) {
-		last_end = (u64)last->bi_sector << 9;
-		last_end += last->bi_size;
+		last_end = (u64)last->bi_iter.bi_sector << 9;
+		last_end += last->bi_iter.bi_size;
 
 		/*
 		 * we can't merge these if they are from different
@@ -1053,9 +1053,9 @@
 	if (!bio)
 		return -ENOMEM;
 
-	bio->bi_size = 0;
+	bio->bi_iter.bi_size = 0;
 	bio->bi_bdev = stripe->dev->bdev;
-	bio->bi_sector = disk_start >> 9;
+	bio->bi_iter.bi_sector = disk_start >> 9;
 	set_bit(BIO_UPTODATE, &bio->bi_flags);
 
 	bio_add_page(bio, page, PAGE_CACHE_SIZE, 0);
@@ -1111,7 +1111,7 @@
 
 	spin_lock_irq(&rbio->bio_list_lock);
 	bio_list_for_each(bio, &rbio->bio_list) {
-		start = (u64)bio->bi_sector << 9;
+		start = (u64)bio->bi_iter.bi_sector << 9;
 		stripe_offset = start - rbio->raid_map[0];
 		page_index = stripe_offset >> PAGE_CACHE_SHIFT;
 
@@ -1272,7 +1272,7 @@
 static int find_bio_stripe(struct btrfs_raid_bio *rbio,
 			   struct bio *bio)
 {
-	u64 physical = bio->bi_sector;
+	u64 physical = bio->bi_iter.bi_sector;
 	u64 stripe_start;
 	int i;
 	struct btrfs_bio_stripe *stripe;
@@ -1298,7 +1298,7 @@
 static int find_logical_bio_stripe(struct btrfs_raid_bio *rbio,
 				   struct bio *bio)
 {
-	u64 logical = bio->bi_sector;
+	u64 logical = bio->bi_iter.bi_sector;
 	u64 stripe_start;
 	int i;
 
@@ -1602,8 +1602,8 @@
 						 plug_list);
 	struct btrfs_raid_bio *rb = container_of(b, struct btrfs_raid_bio,
 						 plug_list);
-	u64 a_sector = ra->bio_list.head->bi_sector;
-	u64 b_sector = rb->bio_list.head->bi_sector;
+	u64 a_sector = ra->bio_list.head->bi_iter.bi_sector;
+	u64 b_sector = rb->bio_list.head->bi_iter.bi_sector;
 
 	if (a_sector < b_sector)
 		return -1;
@@ -1691,7 +1691,7 @@
 	if (IS_ERR(rbio))
 		return PTR_ERR(rbio);
 	bio_list_add(&rbio->bio_list, bio);
-	rbio->bio_list_bytes = bio->bi_size;
+	rbio->bio_list_bytes = bio->bi_iter.bi_size;
 
 	/*
 	 * don't plug on full rbios, just get them out the door
@@ -2044,7 +2044,7 @@
 
 	rbio->read_rebuild = 1;
 	bio_list_add(&rbio->bio_list, bio);
-	rbio->bio_list_bytes = bio->bi_size;
+	rbio->bio_list_bytes = bio->bi_iter.bi_size;
 
 	rbio->faila = find_logical_bio_stripe(rbio, bio);
 	if (rbio->faila == -1) {
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 1fd3f33..bb9a928 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -1308,7 +1308,7 @@
 			continue;
 		}
 		bio->bi_bdev = page->dev->bdev;
-		bio->bi_sector = page->physical >> 9;
+		bio->bi_iter.bi_sector = page->physical >> 9;
 
 		bio_add_page(bio, page->page, PAGE_SIZE, 0);
 		if (btrfsic_submit_bio_wait(READ, bio))
@@ -1427,7 +1427,7 @@
 		if (!bio)
 			return -EIO;
 		bio->bi_bdev = page_bad->dev->bdev;
-		bio->bi_sector = page_bad->physical >> 9;
+		bio->bi_iter.bi_sector = page_bad->physical >> 9;
 
 		ret = bio_add_page(bio, page_good->page, PAGE_SIZE, 0);
 		if (PAGE_SIZE != ret) {
@@ -1520,7 +1520,7 @@
 		bio->bi_private = sbio;
 		bio->bi_end_io = scrub_wr_bio_end_io;
 		bio->bi_bdev = sbio->dev->bdev;
-		bio->bi_sector = sbio->physical >> 9;
+		bio->bi_iter.bi_sector = sbio->physical >> 9;
 		sbio->err = 0;
 	} else if (sbio->physical + sbio->page_count * PAGE_SIZE !=
 		   spage->physical_for_dev_replace ||
@@ -1926,7 +1926,7 @@
 		bio->bi_private = sbio;
 		bio->bi_end_io = scrub_bio_end_io;
 		bio->bi_bdev = sbio->dev->bdev;
-		bio->bi_sector = sbio->physical >> 9;
+		bio->bi_iter.bi_sector = sbio->physical >> 9;
 		sbio->err = 0;
 	} else if (sbio->physical + sbio->page_count * PAGE_SIZE !=
 		   spage->physical ||
@@ -3371,8 +3371,8 @@
 		spin_unlock(&sctx->stat_lock);
 		return -ENOMEM;
 	}
-	bio->bi_size = 0;
-	bio->bi_sector = physical_for_dev_replace >> 9;
+	bio->bi_iter.bi_size = 0;
+	bio->bi_iter.bi_sector = physical_for_dev_replace >> 9;
 	bio->bi_bdev = dev->bdev;
 	ret = bio_add_page(bio, page, PAGE_CACHE_SIZE, 0);
 	if (ret != PAGE_CACHE_SIZE) {
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 92303f4..54d2685 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -5298,6 +5298,13 @@
 			bio_put(bio);
 			bio = bbio->orig_bio;
 		}
+
+ 		/*
+		 * We have original bio now. So increment bi_remaining to
+		 * account for it in endio
+		 */
+		atomic_inc(&bio->bi_remaining);
+
 		bio->bi_private = bbio->private;
 		bio->bi_end_io = bbio->end_io;
 		btrfs_io_bio(bio)->mirror_num = bbio->mirror_num;
@@ -5411,7 +5418,7 @@
 	if (!q->merge_bvec_fn)
 		return 1;
 
-	bvm.bi_size = bio->bi_size - prev->bv_len;
+	bvm.bi_size = bio->bi_iter.bi_size - prev->bv_len;
 	if (q->merge_bvec_fn(q, &bvm, prev) < prev->bv_len)
 		return 0;
 	return 1;
@@ -5426,7 +5433,7 @@
 	bio->bi_private = bbio;
 	btrfs_io_bio(bio)->stripe_index = dev_nr;
 	bio->bi_end_io = btrfs_end_bio;
-	bio->bi_sector = physical >> 9;
+	bio->bi_iter.bi_sector = physical >> 9;
 #ifdef DEBUG
 	{
 		struct rcu_string *name;
@@ -5464,7 +5471,7 @@
 	while (bvec <= (first_bio->bi_io_vec + first_bio->bi_vcnt - 1)) {
 		if (bio_add_page(bio, bvec->bv_page, bvec->bv_len,
 				 bvec->bv_offset) < bvec->bv_len) {
-			u64 len = bio->bi_size;
+			u64 len = bio->bi_iter.bi_size;
 
 			atomic_inc(&bbio->stripes_pending);
 			submit_stripe_bio(root, bbio, bio, physical, dev_nr,
@@ -5486,7 +5493,7 @@
 		bio->bi_private = bbio->private;
 		bio->bi_end_io = bbio->end_io;
 		btrfs_io_bio(bio)->mirror_num = bbio->mirror_num;
-		bio->bi_sector = logical >> 9;
+		bio->bi_iter.bi_sector = logical >> 9;
 		kfree(bbio);
 		bio_endio(bio, -EIO);
 	}
@@ -5497,7 +5504,7 @@
 {
 	struct btrfs_device *dev;
 	struct bio *first_bio = bio;
-	u64 logical = (u64)bio->bi_sector << 9;
+	u64 logical = (u64)bio->bi_iter.bi_sector << 9;
 	u64 length = 0;
 	u64 map_length;
 	u64 *raid_map = NULL;
@@ -5506,7 +5513,7 @@
 	int total_devs = 1;
 	struct btrfs_bio *bbio = NULL;
 
-	length = bio->bi_size;
+	length = bio->bi_iter.bi_size;
 	map_length = length;
 
 	ret = __btrfs_map_block(root->fs_info, rw, logical, &map_length, &bbio,
diff --git a/fs/buffer.c b/fs/buffer.c
index 6024877..651dba1 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1312,7 +1312,7 @@
 		}
 		while (out < BH_LRU_SIZE)
 			bhs[out++] = NULL;
-		memcpy(__this_cpu_ptr(&bh_lrus.bhs), bhs, sizeof(bhs));
+		memcpy(this_cpu_ptr(&bh_lrus.bhs), bhs, sizeof(bhs));
 	}
 	bh_lru_unlock();
 
@@ -2982,11 +2982,11 @@
 	 * let it through, and the IO layer will turn it into
 	 * an EIO.
 	 */
-	if (unlikely(bio->bi_sector >= maxsector))
+	if (unlikely(bio->bi_iter.bi_sector >= maxsector))
 		return;
 
-	maxsector -= bio->bi_sector;
-	bytes = bio->bi_size;
+	maxsector -= bio->bi_iter.bi_sector;
+	bytes = bio->bi_iter.bi_size;
 	if (likely((bytes >> 9) <= maxsector))
 		return;
 
@@ -2994,7 +2994,7 @@
 	bytes = maxsector << 9;
 
 	/* Truncate the bio.. */
-	bio->bi_size = bytes;
+	bio->bi_iter.bi_size = bytes;
 	bio->bi_io_vec[0].bv_len = bytes;
 
 	/* ..and clear the end of the buffer for reads */
@@ -3029,14 +3029,14 @@
 	 */
 	bio = bio_alloc(GFP_NOIO, 1);
 
-	bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9);
+	bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
 	bio->bi_bdev = bh->b_bdev;
 	bio->bi_io_vec[0].bv_page = bh->b_page;
 	bio->bi_io_vec[0].bv_len = bh->b_size;
 	bio->bi_io_vec[0].bv_offset = bh_offset(bh);
 
 	bio->bi_vcnt = 1;
-	bio->bi_size = bh->b_size;
+	bio->bi_iter.bi_size = bh->b_size;
 
 	bio->bi_end_io = end_bio_bh_io_sync;
 	bio->bi_private = bh;
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 0e04142..160a548 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -375,7 +375,7 @@
 	bio = bio_alloc(GFP_KERNEL, nr_vecs);
 
 	bio->bi_bdev = bdev;
-	bio->bi_sector = first_sector;
+	bio->bi_iter.bi_sector = first_sector;
 	if (dio->is_async)
 		bio->bi_end_io = dio_bio_end_aio;
 	else
@@ -719,7 +719,7 @@
 	if (sdio->bio) {
 		loff_t cur_offset = sdio->cur_page_fs_offset;
 		loff_t bio_next_offset = sdio->logical_offset_in_bio +
-			sdio->bio->bi_size;
+			sdio->bio->bi_iter.bi_size;
 
 		/*
 		 * See whether this new request is contiguous with the old.
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index d488f80..ab95508 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -65,9 +65,9 @@
 {
 	int i;
 	int error = !test_bit(BIO_UPTODATE, &bio->bi_flags);
+	struct bio_vec *bvec;
 
-	for (i = 0; i < bio->bi_vcnt; i++) {
-		struct bio_vec *bvec = &bio->bi_io_vec[i];
+	bio_for_each_segment_all(bvec, bio, i) {
 		struct page *page = bvec->bv_page;
 		struct buffer_head *bh, *head;
 		unsigned bio_start = bvec->bv_offset;
@@ -298,7 +298,7 @@
 static void ext4_end_bio(struct bio *bio, int error)
 {
 	ext4_io_end_t *io_end = bio->bi_private;
-	sector_t bi_sector = bio->bi_sector;
+	sector_t bi_sector = bio->bi_iter.bi_sector;
 
 	BUG_ON(!io_end);
 	bio->bi_end_io = NULL;
@@ -366,7 +366,7 @@
 	bio = bio_alloc(GFP_NOIO, min(nvecs, BIO_MAX_PAGES));
 	if (!bio)
 		return -ENOMEM;
-	bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9);
+	bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
 	bio->bi_bdev = bh->b_bdev;
 	bio->bi_end_io = ext4_end_bio;
 	bio->bi_private = ext4_get_io_end(io->io_end);
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 0ae5587..2261ccd 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -26,40 +26,33 @@
 
 static void f2fs_read_end_io(struct bio *bio, int err)
 {
-	const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
-	struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
+	struct bio_vec *bvec;
+	int i;
 
-	do {
+	bio_for_each_segment_all(bvec, bio, i) {
 		struct page *page = bvec->bv_page;
 
-		if (--bvec >= bio->bi_io_vec)
-			prefetchw(&bvec->bv_page->flags);
-
-		if (unlikely(!uptodate)) {
+		if (!err) {
+			SetPageUptodate(page);
+		} else {
 			ClearPageUptodate(page);
 			SetPageError(page);
-		} else {
-			SetPageUptodate(page);
 		}
 		unlock_page(page);
-	} while (bvec >= bio->bi_io_vec);
-
+	}
 	bio_put(bio);
 }
 
 static void f2fs_write_end_io(struct bio *bio, int err)
 {
-	const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
-	struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
-	struct f2fs_sb_info *sbi = F2FS_SB(bvec->bv_page->mapping->host->i_sb);
+	struct f2fs_sb_info *sbi = F2FS_SB(bio->bi_io_vec->bv_page->mapping->host->i_sb);
+	struct bio_vec *bvec;
+	int i;
 
-	do {
+	bio_for_each_segment_all(bvec, bio, i) {
 		struct page *page = bvec->bv_page;
 
-		if (--bvec >= bio->bi_io_vec)
-			prefetchw(&bvec->bv_page->flags);
-
-		if (unlikely(!uptodate)) {
+		if (unlikely(err)) {
 			SetPageError(page);
 			set_bit(AS_EIO, &page->mapping->flags);
 			set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG);
@@ -67,7 +60,7 @@
 		}
 		end_page_writeback(page);
 		dec_page_count(sbi, F2FS_WRITEBACK);
-	} while (bvec >= bio->bi_io_vec);
+	}
 
 	if (bio->bi_private)
 		complete(bio->bi_private);
@@ -91,7 +84,7 @@
 	bio = bio_alloc(GFP_NOIO, npages);
 
 	bio->bi_bdev = sbi->sb->s_bdev;
-	bio->bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr);
+	bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr);
 	bio->bi_end_io = is_read ? f2fs_read_end_io : f2fs_write_end_io;
 
 	return bio;
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 58f0640..7669379 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -273,7 +273,7 @@
 		nrvecs = max(nrvecs/2, 1U);
 	}
 
-	bio->bi_sector = blkno * (sb->s_blocksize >> 9);
+	bio->bi_iter.bi_sector = blkno * (sb->s_blocksize >> 9);
 	bio->bi_bdev = sb->s_bdev;
 	bio->bi_end_io = gfs2_end_log_write;
 	bio->bi_private = sdp;
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 1e712b5..c6872d0 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -238,7 +238,7 @@
 	lock_page(page);
 
 	bio = bio_alloc(GFP_NOFS, 1);
-	bio->bi_sector = sector * (sb->s_blocksize >> 9);
+	bio->bi_iter.bi_sector = sector * (sb->s_blocksize >> 9);
 	bio->bi_bdev = sb->s_bdev;
 	bio_add_page(bio, page, PAGE_SIZE, 0);
 
diff --git a/fs/hfsplus/wrapper.c b/fs/hfsplus/wrapper.c
index e9a97a0..3f99964 100644
--- a/fs/hfsplus/wrapper.c
+++ b/fs/hfsplus/wrapper.c
@@ -63,7 +63,7 @@
 	sector &= ~((io_size >> HFSPLUS_SECTOR_SHIFT) - 1);
 
 	bio = bio_alloc(GFP_NOIO, 1);
-	bio->bi_sector = sector;
+	bio->bi_iter.bi_sector = sector;
 	bio->bi_bdev = sb->s_bdev;
 
 	if (!(rw & WRITE) && data)
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index 360d27c..8d811e0 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -1998,20 +1998,20 @@
 
 	bio = bio_alloc(GFP_NOFS, 1);
 
-	bio->bi_sector = bp->l_blkno << (log->l2bsize - 9);
+	bio->bi_iter.bi_sector = bp->l_blkno << (log->l2bsize - 9);
 	bio->bi_bdev = log->bdev;
 	bio->bi_io_vec[0].bv_page = bp->l_page;
 	bio->bi_io_vec[0].bv_len = LOGPSIZE;
 	bio->bi_io_vec[0].bv_offset = bp->l_offset;
 
 	bio->bi_vcnt = 1;
-	bio->bi_size = LOGPSIZE;
+	bio->bi_iter.bi_size = LOGPSIZE;
 
 	bio->bi_end_io = lbmIODone;
 	bio->bi_private = bp;
 	/*check if journaling to disk has been disabled*/
 	if (log->no_integrity) {
-		bio->bi_size = 0;
+		bio->bi_iter.bi_size = 0;
 		lbmIODone(bio, 0);
 	} else {
 		submit_bio(READ_SYNC, bio);
@@ -2144,21 +2144,21 @@
 	jfs_info("lbmStartIO\n");
 
 	bio = bio_alloc(GFP_NOFS, 1);
-	bio->bi_sector = bp->l_blkno << (log->l2bsize - 9);
+	bio->bi_iter.bi_sector = bp->l_blkno << (log->l2bsize - 9);
 	bio->bi_bdev = log->bdev;
 	bio->bi_io_vec[0].bv_page = bp->l_page;
 	bio->bi_io_vec[0].bv_len = LOGPSIZE;
 	bio->bi_io_vec[0].bv_offset = bp->l_offset;
 
 	bio->bi_vcnt = 1;
-	bio->bi_size = LOGPSIZE;
+	bio->bi_iter.bi_size = LOGPSIZE;
 
 	bio->bi_end_io = lbmIODone;
 	bio->bi_private = bp;
 
 	/* check if journaling to disk has been disabled */
 	if (log->no_integrity) {
-		bio->bi_size = 0;
+		bio->bi_iter.bi_size = 0;
 		lbmIODone(bio, 0);
 	} else {
 		submit_bio(WRITE_SYNC, bio);
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
index d165cde..49ba7ff 100644
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -416,7 +416,7 @@
 			 * count from hitting zero before we're through
 			 */
 			inc_io(page);
-			if (!bio->bi_size)
+			if (!bio->bi_iter.bi_size)
 				goto dump_bio;
 			submit_bio(WRITE, bio);
 			nr_underway++;
@@ -438,7 +438,7 @@
 
 		bio = bio_alloc(GFP_NOFS, 1);
 		bio->bi_bdev = inode->i_sb->s_bdev;
-		bio->bi_sector = pblock << (inode->i_blkbits - 9);
+		bio->bi_iter.bi_sector = pblock << (inode->i_blkbits - 9);
 		bio->bi_end_io = metapage_write_end_io;
 		bio->bi_private = page;
 
@@ -452,7 +452,7 @@
 	if (bio) {
 		if (bio_add_page(bio, page, bio_bytes, bio_offset) < bio_bytes)
 				goto add_failed;
-		if (!bio->bi_size)
+		if (!bio->bi_iter.bi_size)
 			goto dump_bio;
 
 		submit_bio(WRITE, bio);
@@ -517,7 +517,8 @@
 
 			bio = bio_alloc(GFP_NOFS, 1);
 			bio->bi_bdev = inode->i_sb->s_bdev;
-			bio->bi_sector = pblock << (inode->i_blkbits - 9);
+			bio->bi_iter.bi_sector =
+				pblock << (inode->i_blkbits - 9);
 			bio->bi_end_io = metapage_read_end_io;
 			bio->bi_private = page;
 			len = xlen << inode->i_blkbits;
diff --git a/fs/logfs/dev_bdev.c b/fs/logfs/dev_bdev.c
index 0f95f0d..76279e1 100644
--- a/fs/logfs/dev_bdev.c
+++ b/fs/logfs/dev_bdev.c
@@ -26,9 +26,9 @@
 	bio_vec.bv_len = PAGE_SIZE;
 	bio_vec.bv_offset = 0;
 	bio.bi_vcnt = 1;
-	bio.bi_size = PAGE_SIZE;
 	bio.bi_bdev = bdev;
-	bio.bi_sector = page->index * (PAGE_SIZE >> 9);
+	bio.bi_iter.bi_sector = page->index * (PAGE_SIZE >> 9);
+	bio.bi_iter.bi_size = PAGE_SIZE;
 
 	return submit_bio_wait(rw, &bio);
 }
@@ -56,22 +56,18 @@
 static void writeseg_end_io(struct bio *bio, int err)
 {
 	const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
-	struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
+	struct bio_vec *bvec;
+	int i;
 	struct super_block *sb = bio->bi_private;
 	struct logfs_super *super = logfs_super(sb);
-	struct page *page;
 
 	BUG_ON(!uptodate); /* FIXME: Retry io or write elsewhere */
 	BUG_ON(err);
-	BUG_ON(bio->bi_vcnt == 0);
-	do {
-		page = bvec->bv_page;
-		if (--bvec >= bio->bi_io_vec)
-			prefetchw(&bvec->bv_page->flags);
 
-		end_page_writeback(page);
-		page_cache_release(page);
-	} while (bvec >= bio->bi_io_vec);
+	bio_for_each_segment_all(bvec, bio, i) {
+		end_page_writeback(bvec->bv_page);
+		page_cache_release(bvec->bv_page);
+	}
 	bio_put(bio);
 	if (atomic_dec_and_test(&super->s_pending_writes))
 		wake_up(&wq);
@@ -96,9 +92,9 @@
 		if (i >= max_pages) {
 			/* Block layer cannot split bios :( */
 			bio->bi_vcnt = i;
-			bio->bi_size = i * PAGE_SIZE;
+			bio->bi_iter.bi_size = i * PAGE_SIZE;
 			bio->bi_bdev = super->s_bdev;
-			bio->bi_sector = ofs >> 9;
+			bio->bi_iter.bi_sector = ofs >> 9;
 			bio->bi_private = sb;
 			bio->bi_end_io = writeseg_end_io;
 			atomic_inc(&super->s_pending_writes);
@@ -123,9 +119,9 @@
 		unlock_page(page);
 	}
 	bio->bi_vcnt = nr_pages;
-	bio->bi_size = nr_pages * PAGE_SIZE;
+	bio->bi_iter.bi_size = nr_pages * PAGE_SIZE;
 	bio->bi_bdev = super->s_bdev;
-	bio->bi_sector = ofs >> 9;
+	bio->bi_iter.bi_sector = ofs >> 9;
 	bio->bi_private = sb;
 	bio->bi_end_io = writeseg_end_io;
 	atomic_inc(&super->s_pending_writes);
@@ -188,9 +184,9 @@
 		if (i >= max_pages) {
 			/* Block layer cannot split bios :( */
 			bio->bi_vcnt = i;
-			bio->bi_size = i * PAGE_SIZE;
+			bio->bi_iter.bi_size = i * PAGE_SIZE;
 			bio->bi_bdev = super->s_bdev;
-			bio->bi_sector = ofs >> 9;
+			bio->bi_iter.bi_sector = ofs >> 9;
 			bio->bi_private = sb;
 			bio->bi_end_io = erase_end_io;
 			atomic_inc(&super->s_pending_writes);
@@ -209,9 +205,9 @@
 		bio->bi_io_vec[i].bv_offset = 0;
 	}
 	bio->bi_vcnt = nr_pages;
-	bio->bi_size = nr_pages * PAGE_SIZE;
+	bio->bi_iter.bi_size = nr_pages * PAGE_SIZE;
 	bio->bi_bdev = super->s_bdev;
-	bio->bi_sector = ofs >> 9;
+	bio->bi_iter.bi_sector = ofs >> 9;
 	bio->bi_private = sb;
 	bio->bi_end_io = erase_end_io;
 	atomic_inc(&super->s_pending_writes);
diff --git a/fs/mpage.c b/fs/mpage.c
index 0face1c..4979ffa 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -43,16 +43,14 @@
  */
 static void mpage_end_io(struct bio *bio, int err)
 {
-	const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
-	struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
+	struct bio_vec *bv;
+	int i;
 
-	do {
-		struct page *page = bvec->bv_page;
+	bio_for_each_segment_all(bv, bio, i) {
+		struct page *page = bv->bv_page;
 
-		if (--bvec >= bio->bi_io_vec)
-			prefetchw(&bvec->bv_page->flags);
 		if (bio_data_dir(bio) == READ) {
-			if (uptodate) {
+			if (!err) {
 				SetPageUptodate(page);
 			} else {
 				ClearPageUptodate(page);
@@ -60,14 +58,15 @@
 			}
 			unlock_page(page);
 		} else { /* bio_data_dir(bio) == WRITE */
-			if (!uptodate) {
+			if (err) {
 				SetPageError(page);
 				if (page->mapping)
 					set_bit(AS_EIO, &page->mapping->flags);
 			}
 			end_page_writeback(page);
 		}
-	} while (bvec >= bio->bi_io_vec);
+	}
+
 	bio_put(bio);
 }
 
@@ -94,7 +93,7 @@
 
 	if (bio) {
 		bio->bi_bdev = bdev;
-		bio->bi_sector = first_sector;
+		bio->bi_iter.bi_sector = first_sector;
 	}
 	return bio;
 }
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index e242bbf..56ff823 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -134,8 +134,8 @@
 	if (bio) {
 		get_parallel(bio->bi_private);
 		dprintk("%s submitting %s bio %u@%llu\n", __func__,
-			rw == READ ? "read" : "write",
-			bio->bi_size, (unsigned long long)bio->bi_sector);
+			rw == READ ? "read" : "write", bio->bi_iter.bi_size,
+			(unsigned long long)bio->bi_iter.bi_sector);
 		submit_bio(rw, bio);
 	}
 	return NULL;
@@ -156,7 +156,8 @@
 	}
 
 	if (bio) {
-		bio->bi_sector = isect - be->be_f_offset + be->be_v_offset;
+		bio->bi_iter.bi_sector = isect - be->be_f_offset +
+			be->be_v_offset;
 		bio->bi_bdev = be->be_mdev;
 		bio->bi_end_io = end_io;
 		bio->bi_private = par;
@@ -201,18 +202,14 @@
 static void bl_end_io_read(struct bio *bio, int err)
 {
 	struct parallel_io *par = bio->bi_private;
-	const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
-	struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
+	struct bio_vec *bvec;
+	int i;
 
-	do {
-		struct page *page = bvec->bv_page;
+	if (!err)
+		bio_for_each_segment_all(bvec, bio, i)
+			SetPageUptodate(bvec->bv_page);
 
-		if (--bvec >= bio->bi_io_vec)
-			prefetchw(&bvec->bv_page->flags);
-		if (uptodate)
-			SetPageUptodate(page);
-	} while (bvec >= bio->bi_io_vec);
-	if (!uptodate) {
+	if (err) {
 		struct nfs_read_data *rdata = par->data;
 		struct nfs_pgio_header *header = rdata->header;
 
@@ -383,20 +380,16 @@
 static void bl_end_io_write_zero(struct bio *bio, int err)
 {
 	struct parallel_io *par = bio->bi_private;
-	const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
-	struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
+	struct bio_vec *bvec;
+	int i;
 
-	do {
-		struct page *page = bvec->bv_page;
-
-		if (--bvec >= bio->bi_io_vec)
-			prefetchw(&bvec->bv_page->flags);
+	bio_for_each_segment_all(bvec, bio, i) {
 		/* This is the zeroing page we added */
-		end_page_writeback(page);
-		page_cache_release(page);
-	} while (bvec >= bio->bi_io_vec);
+		end_page_writeback(bvec->bv_page);
+		page_cache_release(bvec->bv_page);
+	}
 
-	if (unlikely(!uptodate)) {
+	if (unlikely(err)) {
 		struct nfs_write_data *data = par->data;
 		struct nfs_pgio_header *header = data->header;
 
@@ -519,7 +512,7 @@
 	isect = (page->index << PAGE_CACHE_SECTOR_SHIFT) +
 		(offset / SECTOR_SIZE);
 
-	bio->bi_sector = isect - be->be_f_offset + be->be_v_offset;
+	bio->bi_iter.bi_sector = isect - be->be_f_offset + be->be_v_offset;
 	bio->bi_bdev = be->be_mdev;
 	bio->bi_end_io = bl_read_single_end_io;
 
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index d2255d7..aa9bc97 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -924,11 +924,11 @@
 	.permission	= nfs_permission,
 	.getattr	= nfs_getattr,
 	.setattr	= nfs_setattr,
+#ifdef CONFIG_NFS_V3_ACL
 	.listxattr	= generic_listxattr,
 	.getxattr	= generic_getxattr,
 	.setxattr	= generic_setxattr,
 	.removexattr	= generic_removexattr,
-#ifdef CONFIG_NFS_V3_ACL
 	.get_acl	= nfs3_get_acl,
 	.set_acl	= nfs3_set_acl,
 #endif
@@ -938,11 +938,11 @@
 	.permission	= nfs_permission,
 	.getattr	= nfs_getattr,
 	.setattr	= nfs_setattr,
+#ifdef CONFIG_NFS_V3_ACL
 	.listxattr	= generic_listxattr,
 	.getxattr	= generic_getxattr,
 	.setxattr	= generic_setxattr,
 	.removexattr	= generic_removexattr,
-#ifdef CONFIG_NFS_V3_ACL
 	.get_acl	= nfs3_get_acl,
 	.set_acl	= nfs3_set_acl,
 #endif
diff --git a/fs/nfsd/acl.h b/fs/nfsd/acl.h
index 8b68218..a812fd1 100644
--- a/fs/nfsd/acl.h
+++ b/fs/nfsd/acl.h
@@ -45,7 +45,7 @@
 
 struct nfs4_acl *nfs4_acl_new(int);
 int nfs4_acl_get_whotype(char *, u32);
-int nfs4_acl_write_who(int who, char *p);
+__be32 nfs4_acl_write_who(int who, __be32 **p, int *len);
 
 int nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry,
 		struct nfs4_acl **acl);
diff --git a/fs/nfsd/cache.h b/fs/nfsd/cache.h
index d5c5b3e..b582f9a 100644
--- a/fs/nfsd/cache.h
+++ b/fs/nfsd/cache.h
@@ -84,12 +84,4 @@
 void	nfsd_cache_update(struct svc_rqst *, int, __be32 *);
 int	nfsd_reply_cache_stats_open(struct inode *, struct file *);
 
-#ifdef CONFIG_NFSD_V4
-void	nfsd4_set_statp(struct svc_rqst *rqstp, __be32 *statp);
-#else  /* CONFIG_NFSD_V4 */
-static inline void nfsd4_set_statp(struct svc_rqst *rqstp, __be32 *statp)
-{
-}
-#endif /* CONFIG_NFSD_V4 */
-
 #endif /* NFSCACHE_H */
diff --git a/fs/nfsd/idmap.h b/fs/nfsd/idmap.h
index bf95f6b..66e58db 100644
--- a/fs/nfsd/idmap.h
+++ b/fs/nfsd/idmap.h
@@ -56,7 +56,7 @@
 
 __be32 nfsd_map_name_to_uid(struct svc_rqst *, const char *, size_t, kuid_t *);
 __be32 nfsd_map_name_to_gid(struct svc_rqst *, const char *, size_t, kgid_t *);
-int nfsd_map_uid_to_name(struct svc_rqst *, kuid_t, char *);
-int nfsd_map_gid_to_name(struct svc_rqst *, kgid_t, char *);
+__be32 nfsd4_encode_user(struct svc_rqst *, kuid_t, __be32 **, int *);
+__be32 nfsd4_encode_group(struct svc_rqst *, kgid_t, __be32 **, int *);
 
 #endif /* LINUX_NFSD_IDMAP_H */
diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index 849a7c3..d32b3aa 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -95,6 +95,7 @@
 	time_t nfsd4_grace;
 
 	bool nfsd_net_up;
+	bool lockd_up;
 
 	/*
 	 * Time of server startup
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 14d9ecb..de6e39e 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -168,7 +168,7 @@
 	      struct kstat *stat)
 {
 	*p++ = htonl(nfs3_ftypes[(stat->mode & S_IFMT) >> 12]);
-	*p++ = htonl((u32) stat->mode);
+	*p++ = htonl((u32) (stat->mode & S_IALLUGO));
 	*p++ = htonl((u32) stat->nlink);
 	*p++ = htonl((u32) from_kuid(&init_user_ns, stat->uid));
 	*p++ = htonl((u32) from_kgid(&init_user_ns, stat->gid));
@@ -842,21 +842,21 @@
 
 static __be32 *encode_entryplus_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name, int namlen)
 {
-	struct svc_fh	fh;
+	struct svc_fh	*fh = &cd->scratch;
 	__be32 err;
 
-	fh_init(&fh, NFS3_FHSIZE);
-	err = compose_entry_fh(cd, &fh, name, namlen);
+	fh_init(fh, NFS3_FHSIZE);
+	err = compose_entry_fh(cd, fh, name, namlen);
 	if (err) {
 		*p++ = 0;
 		*p++ = 0;
 		goto out;
 	}
-	p = encode_post_op_attr(cd->rqstp, p, &fh);
+	p = encode_post_op_attr(cd->rqstp, p, fh);
 	*p++ = xdr_one;			/* yes, a file handle follows */
-	p = encode_fh(p, &fh);
+	p = encode_fh(p, fh);
 out:
-	fh_put(&fh);
+	fh_put(fh);
 	return p;
 }
 
diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c
index 649ad7c..d3a5871 100644
--- a/fs/nfsd/nfs4acl.c
+++ b/fs/nfsd/nfs4acl.c
@@ -38,6 +38,7 @@
 #include <linux/nfs_fs.h>
 #include <linux/export.h>
 #include "nfsfh.h"
+#include "nfsd.h"
 #include "acl.h"
 #include "vfs.h"
 
@@ -916,17 +917,22 @@
 	return NFS4_ACL_WHO_NAMED;
 }
 
-int
-nfs4_acl_write_who(int who, char *p)
+__be32 nfs4_acl_write_who(int who, __be32 **p, int *len)
 {
 	int i;
+	int bytes;
 
 	for (i = 0; i < ARRAY_SIZE(s2t_map); i++) {
-		if (s2t_map[i].type == who) {
-			memcpy(p, s2t_map[i].string, s2t_map[i].stringlen);
-			return s2t_map[i].stringlen;
-		}
+		if (s2t_map[i].type != who)
+			continue;
+		bytes = 4 + (XDR_QUADLEN(s2t_map[i].stringlen) << 2);
+		if (bytes > *len)
+			return nfserr_resource;
+		*p = xdr_encode_opaque(*p, s2t_map[i].string,
+					s2t_map[i].stringlen);
+		*len -= bytes;
+		return 0;
 	}
-	BUG();
+	WARN_ON_ONCE(1);
 	return -1;
 }
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
index 4832fd8..c0dfde6 100644
--- a/fs/nfsd/nfs4idmap.c
+++ b/fs/nfsd/nfs4idmap.c
@@ -551,27 +551,46 @@
 	return 0;
 }
 
-static int
-idmap_id_to_name(struct svc_rqst *rqstp, int type, u32 id, char *name)
+static __be32 encode_ascii_id(u32 id, __be32 **p, int *buflen)
+{
+	char buf[11];
+	int len;
+	int bytes;
+
+	len = sprintf(buf, "%u", id);
+	bytes = 4 + (XDR_QUADLEN(len) << 2);
+	if (bytes > *buflen)
+		return nfserr_resource;
+	*p = xdr_encode_opaque(*p, buf, len);
+	*buflen -= bytes;
+	return 0;
+}
+
+static __be32 idmap_id_to_name(struct svc_rqst *rqstp, int type, u32 id, __be32 **p, int *buflen)
 {
 	struct ent *item, key = {
 		.id = id,
 		.type = type,
 	};
 	int ret;
+	int bytes;
 	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
 
 	strlcpy(key.authname, rqst_authname(rqstp), sizeof(key.authname));
 	ret = idmap_lookup(rqstp, idtoname_lookup, &key, nn->idtoname_cache, &item);
 	if (ret == -ENOENT)
-		return sprintf(name, "%u", id);
+		return encode_ascii_id(id, p, buflen);
 	if (ret)
-		return ret;
+		return nfserrno(ret);
 	ret = strlen(item->name);
-	BUG_ON(ret > IDMAP_NAMESZ);
-	memcpy(name, item->name, ret);
+	WARN_ON_ONCE(ret > IDMAP_NAMESZ);
+	bytes = 4 + (XDR_QUADLEN(ret) << 2);
+	if (bytes > *buflen)
+		return nfserr_resource;
+	*p = xdr_encode_opaque(*p, item->name, ret);
+	*buflen -= bytes;
 	cache_put(&item->h, nn->idtoname_cache);
-	return ret;
+	return 0;
 }
 
 static bool
@@ -603,12 +622,11 @@
 	return idmap_name_to_id(rqstp, type, name, namelen, id);
 }
 
-static int
-do_id_to_name(struct svc_rqst *rqstp, int type, u32 id, char *name)
+static __be32 encode_name_from_id(struct svc_rqst *rqstp, int type, u32 id, __be32 **p, int *buflen)
 {
 	if (nfs4_disable_idmapping && rqstp->rq_cred.cr_flavor < RPC_AUTH_GSS)
-		return sprintf(name, "%u", id);
-	return idmap_id_to_name(rqstp, type, id, name);
+		return encode_ascii_id(id, p, buflen);
+	return idmap_id_to_name(rqstp, type, id, p, buflen);
 }
 
 __be32
@@ -637,16 +655,14 @@
 	return status;
 }
 
-int
-nfsd_map_uid_to_name(struct svc_rqst *rqstp, kuid_t uid, char *name)
+__be32 nfsd4_encode_user(struct svc_rqst *rqstp, kuid_t uid,  __be32 **p, int *buflen)
 {
 	u32 id = from_kuid(&init_user_ns, uid);
-	return do_id_to_name(rqstp, IDMAP_TYPE_USER, id, name);
+	return encode_name_from_id(rqstp, IDMAP_TYPE_USER, id, p, buflen);
 }
 
-int
-nfsd_map_gid_to_name(struct svc_rqst *rqstp, kgid_t gid, char *name)
+__be32 nfsd4_encode_group(struct svc_rqst *rqstp, kgid_t gid, __be32 **p, int *buflen)
 {
 	u32 id = from_kgid(&init_user_ns, gid);
-	return do_id_to_name(rqstp, IDMAP_TYPE_GROUP, id, name);
+	return encode_name_from_id(rqstp, IDMAP_TYPE_GROUP, id, p, buflen);
 }
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 825b8a9..82189b2 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -231,17 +231,16 @@
 }
 
 static __be32
-do_open_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_open *open)
+do_open_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_open *open, struct svc_fh **resfh)
 {
 	struct svc_fh *current_fh = &cstate->current_fh;
-	struct svc_fh *resfh;
 	int accmode;
 	__be32 status;
 
-	resfh = kmalloc(sizeof(struct svc_fh), GFP_KERNEL);
-	if (!resfh)
+	*resfh = kmalloc(sizeof(struct svc_fh), GFP_KERNEL);
+	if (!*resfh)
 		return nfserr_jukebox;
-	fh_init(resfh, NFS4_FHSIZE);
+	fh_init(*resfh, NFS4_FHSIZE);
 	open->op_truncate = 0;
 
 	if (open->op_create) {
@@ -266,12 +265,12 @@
 		 */
 		status = do_nfsd_create(rqstp, current_fh, open->op_fname.data,
 					open->op_fname.len, &open->op_iattr,
-					resfh, open->op_createmode,
+					*resfh, open->op_createmode,
 					(u32 *)open->op_verf.data,
 					&open->op_truncate, &open->op_created);
 
 		if (!status && open->op_label.len)
-			nfsd4_security_inode_setsecctx(resfh, &open->op_label, open->op_bmval);
+			nfsd4_security_inode_setsecctx(*resfh, &open->op_label, open->op_bmval);
 
 		/*
 		 * Following rfc 3530 14.2.16, use the returned bitmask
@@ -281,31 +280,32 @@
 		if (open->op_createmode == NFS4_CREATE_EXCLUSIVE && status == 0)
 			open->op_bmval[1] = (FATTR4_WORD1_TIME_ACCESS |
 							FATTR4_WORD1_TIME_MODIFY);
-	} else {
+	} else
+		/*
+		 * Note this may exit with the parent still locked.
+		 * We will hold the lock until nfsd4_open's final
+		 * lookup, to prevent renames or unlinks until we've had
+		 * a chance to an acquire a delegation if appropriate.
+		 */
 		status = nfsd_lookup(rqstp, current_fh,
-				     open->op_fname.data, open->op_fname.len, resfh);
-		fh_unlock(current_fh);
-	}
+				     open->op_fname.data, open->op_fname.len, *resfh);
 	if (status)
 		goto out;
-	status = nfsd_check_obj_isreg(resfh);
+	status = nfsd_check_obj_isreg(*resfh);
 	if (status)
 		goto out;
 
 	if (is_create_with_attrs(open) && open->op_acl != NULL)
-		do_set_nfs4_acl(rqstp, resfh, open->op_acl, open->op_bmval);
+		do_set_nfs4_acl(rqstp, *resfh, open->op_acl, open->op_bmval);
 
-	nfsd4_set_open_owner_reply_cache(cstate, open, resfh);
+	nfsd4_set_open_owner_reply_cache(cstate, open, *resfh);
 	accmode = NFSD_MAY_NOP;
 	if (open->op_created ||
 			open->op_claim_type == NFS4_OPEN_CLAIM_DELEGATE_CUR)
 		accmode |= NFSD_MAY_OWNER_OVERRIDE;
-	status = do_open_permission(rqstp, resfh, open, accmode);
+	status = do_open_permission(rqstp, *resfh, open, accmode);
 	set_change_info(&open->op_cinfo, current_fh);
-	fh_dup2(current_fh, resfh);
 out:
-	fh_put(resfh);
-	kfree(resfh);
 	return status;
 }
 
@@ -358,6 +358,7 @@
 	   struct nfsd4_open *open)
 {
 	__be32 status;
+	struct svc_fh *resfh = NULL;
 	struct nfsd4_compoundres *resp;
 	struct net *net = SVC_NET(rqstp);
 	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
@@ -424,7 +425,7 @@
 	switch (open->op_claim_type) {
 		case NFS4_OPEN_CLAIM_DELEGATE_CUR:
 		case NFS4_OPEN_CLAIM_NULL:
-			status = do_open_lookup(rqstp, cstate, open);
+			status = do_open_lookup(rqstp, cstate, open, &resfh);
 			if (status)
 				goto out;
 			break;
@@ -440,6 +441,7 @@
 			status = do_open_fhandle(rqstp, cstate, open);
 			if (status)
 				goto out;
+			resfh = &cstate->current_fh;
 			break;
 		case NFS4_OPEN_CLAIM_DELEG_PREV_FH:
              	case NFS4_OPEN_CLAIM_DELEGATE_PREV:
@@ -459,9 +461,14 @@
 	 * successful, it (1) truncates the file if open->op_truncate was
 	 * set, (2) sets open->op_stateid, (3) sets open->op_delegation.
 	 */
-	status = nfsd4_process_open2(rqstp, &cstate->current_fh, open);
+	status = nfsd4_process_open2(rqstp, resfh, open);
 	WARN_ON(status && open->op_created);
 out:
+	if (resfh && resfh != &cstate->current_fh) {
+		fh_dup2(&cstate->current_fh, resfh);
+		fh_put(resfh);
+		kfree(resfh);
+	}
 	nfsd4_cleanup_open_state(open, status);
 	if (open->op_openowner && !nfsd4_has_session(cstate))
 		cstate->replay_owner = &open->op_openowner->oo_owner;
@@ -1070,8 +1077,10 @@
 				    cstate->current_fh.fh_dentry, &p,
 				    count, verify->ve_bmval,
 				    rqstp, 0);
-
-	/* this means that nfsd4_encode_fattr() ran out of space */
+	/*
+	 * If nfsd4_encode_fattr() ran out of space, assume that's because
+	 * the attributes are longer (hence different) than those given:
+	 */
 	if (status == nfserr_resource)
 		status = nfserr_not_same;
 	if (status)
@@ -1525,7 +1534,8 @@
 static inline u32 nfsd4_exchange_id_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
 {
 	return (op_encode_hdr_size + 2 + 1 + /* eir_clientid, eir_sequenceid */\
-		1 + 1 + 2 + /* eir_flags, spr_how, spo_must_enforce & _allow */\
+		1 + 1 + /* eir_flags, spr_how */\
+		4 + /* spo_must_enforce & _allow with bitmap */\
 		2 + /*eir_server_owner.so_minor_id */\
 		/* eir_server_owner.so_major_id<> */\
 		XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + 1 +\
@@ -1882,6 +1892,7 @@
 		.vs_proc	= nfsd_procedures4,
 		.vs_dispatch	= nfsd_dispatch,
 		.vs_xdrsize	= NFS4_SVC_XDRSIZE,
+		.vs_rpcb_optnl	= 1,
 };
 
 /*
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 105d6fa..d5d070f 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -832,10 +832,11 @@
 	spin_unlock(&nfsd_drc_lock);
 }
 
-static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *attrs)
+static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs,
+					   struct nfsd4_channel_attrs *battrs)
 {
-	int numslots = attrs->maxreqs;
-	int slotsize = slot_bytes(attrs);
+	int numslots = fattrs->maxreqs;
+	int slotsize = slot_bytes(fattrs);
 	struct nfsd4_session *new;
 	int mem, i;
 
@@ -852,6 +853,10 @@
 		if (!new->se_slots[i])
 			goto out_free;
 	}
+
+	memcpy(&new->se_fchannel, fattrs, sizeof(struct nfsd4_channel_attrs));
+	memcpy(&new->se_bchannel, battrs, sizeof(struct nfsd4_channel_attrs));
+
 	return new;
 out_free:
 	while (i--)
@@ -997,8 +1002,7 @@
 	list_add(&new->se_perclnt, &clp->cl_sessions);
 	spin_unlock(&clp->cl_lock);
 	spin_unlock(&nn->client_lock);
-	memcpy(&new->se_fchannel, &cses->fore_channel,
-			sizeof(struct nfsd4_channel_attrs));
+
 	if (cses->flags & SESSION4_BACK_CHAN) {
 		struct sockaddr *sa = svc_addr(rqstp);
 		/*
@@ -1851,6 +1855,11 @@
 	return nfs_ok;
 }
 
+#define NFSD_CB_MAX_REQ_SZ	((NFS4_enc_cb_recall_sz + \
+				 RPC_MAX_HEADER_WITH_AUTH) * sizeof(__be32))
+#define NFSD_CB_MAX_RESP_SZ	((NFS4_dec_cb_recall_sz + \
+				 RPC_MAX_REPHEADER_WITH_AUTH) * sizeof(__be32))
+
 static __be32 check_backchannel_attrs(struct nfsd4_channel_attrs *ca)
 {
 	ca->headerpadsz = 0;
@@ -1861,9 +1870,9 @@
 	 * less than 1k.  Tighten up this estimate in the unlikely event
 	 * it turns out to be a problem for some client:
 	 */
-	if (ca->maxreq_sz < NFS4_enc_cb_recall_sz + RPC_MAX_HEADER_WITH_AUTH)
+	if (ca->maxreq_sz < NFSD_CB_MAX_REQ_SZ)
 		return nfserr_toosmall;
-	if (ca->maxresp_sz < NFS4_dec_cb_recall_sz + RPC_MAX_REPHEADER_WITH_AUTH)
+	if (ca->maxresp_sz < NFSD_CB_MAX_RESP_SZ)
 		return nfserr_toosmall;
 	ca->maxresp_cached = 0;
 	if (ca->maxops < 2)
@@ -1913,9 +1922,9 @@
 		return status;
 	status = check_backchannel_attrs(&cr_ses->back_channel);
 	if (status)
-		return status;
+		goto out_release_drc_mem;
 	status = nfserr_jukebox;
-	new = alloc_session(&cr_ses->fore_channel);
+	new = alloc_session(&cr_ses->fore_channel, &cr_ses->back_channel);
 	if (!new)
 		goto out_release_drc_mem;
 	conn = alloc_conn_from_crses(rqstp, cr_ses);
@@ -3034,18 +3043,18 @@
 	if (!fl)
 		return -ENOMEM;
 	fl->fl_file = find_readable_file(fp);
-	list_add(&dp->dl_perclnt, &dp->dl_stid.sc_client->cl_delegations);
 	status = vfs_setlease(fl->fl_file, fl->fl_type, &fl);
-	if (status) {
-		list_del_init(&dp->dl_perclnt);
-		locks_free_lock(fl);
-		return status;
-	}
+	if (status)
+		goto out_free;
+	list_add(&dp->dl_perclnt, &dp->dl_stid.sc_client->cl_delegations);
 	fp->fi_lease = fl;
 	fp->fi_deleg_file = get_file(fl->fl_file);
 	atomic_set(&fp->fi_delegees, 1);
 	list_add(&dp->dl_perfile, &fp->fi_delegations);
 	return 0;
+out_free:
+	locks_free_lock(fl);
+	return status;
 }
 
 static int nfs4_set_delegation(struct nfs4_delegation *dp, struct nfs4_file *fp)
@@ -3125,6 +3134,7 @@
 				goto out_no_deleg;
 			break;
 		case NFS4_OPEN_CLAIM_NULL:
+		case NFS4_OPEN_CLAIM_FH:
 			/*
 			 * Let's not give out any delegations till everyone's
 			 * had the chance to reclaim theirs....
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index ee7237f..63f2395 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -103,11 +103,6 @@
 	(x) = (u64)ntohl(*p++) << 32;		\
 	(x) |= ntohl(*p++);			\
 } while (0)
-#define READTIME(x)       do {			\
-	p++;					\
-	(x) = ntohl(*p++);			\
-	p++;					\
-} while (0)
 #define READMEM(x,nbytes) do {			\
 	x = (char *)p;				\
 	p += XDR_QUADLEN(nbytes);		\
@@ -190,6 +185,15 @@
 	return (clid->cl_boot == 0) && (clid->cl_id == 0);
 }
 
+/**
+ * defer_free - mark an allocation as deferred freed
+ * @argp: NFSv4 compound argument structure to be freed with
+ * @release: release callback to free @p, typically kfree()
+ * @p: pointer to be freed
+ *
+ * Marks @p to be freed when processing the compound operation
+ * described in @argp finishes.
+ */
 static int
 defer_free(struct nfsd4_compoundargs *argp,
 		void (*release)(const void *), void *p)
@@ -206,6 +210,16 @@
 	return 0;
 }
 
+/**
+ * savemem - duplicate a chunk of memory for later processing
+ * @argp: NFSv4 compound argument structure to be freed with
+ * @p: pointer to be duplicated
+ * @nbytes: length to be duplicated
+ *
+ * Returns a pointer to a copy of @nbytes bytes of memory at @p
+ * that are preserved until processing of the NFSv4 compound
+ * operation described by @argp finishes.
+ */
 static char *savemem(struct nfsd4_compoundargs *argp, __be32 *p, int nbytes)
 {
 	if (p == argp->tmp) {
@@ -257,7 +271,6 @@
 	int expected_len, len = 0;
 	u32 dummy32;
 	char *buf;
-	int host_err;
 
 	DECODE_HEAD;
 	iattr->ia_valid = 0;
@@ -284,10 +297,9 @@
 			return nfserr_resource;
 
 		*acl = nfs4_acl_new(nace);
-		if (*acl == NULL) {
-			host_err = -ENOMEM;
-			goto out_nfserr;
-		}
+		if (*acl == NULL)
+			return nfserr_jukebox;
+
 		defer_free(argp, kfree, *acl);
 
 		(*acl)->naces = nace;
@@ -425,10 +437,6 @@
 		goto xdr_error;
 
 	DECODE_TAIL;
-
-out_nfserr:
-	status = nfserrno(host_err);
-	goto out;
 }
 
 static __be32
@@ -1957,56 +1965,16 @@
 	};
 }
 
-static __be32
-nfsd4_encode_name(struct svc_rqst *rqstp, int whotype, kuid_t uid, kgid_t gid,
-			__be32 **p, int *buflen)
-{
-	int status;
-
-	if (*buflen < (XDR_QUADLEN(IDMAP_NAMESZ) << 2) + 4)
-		return nfserr_resource;
-	if (whotype != NFS4_ACL_WHO_NAMED)
-		status = nfs4_acl_write_who(whotype, (u8 *)(*p + 1));
-	else if (gid_valid(gid))
-		status = nfsd_map_gid_to_name(rqstp, gid, (u8 *)(*p + 1));
-	else
-		status = nfsd_map_uid_to_name(rqstp, uid, (u8 *)(*p + 1));
-	if (status < 0)
-		return nfserrno(status);
-	*p = xdr_encode_opaque(*p, NULL, status);
-	*buflen -= (XDR_QUADLEN(status) << 2) + 4;
-	BUG_ON(*buflen < 0);
-	return 0;
-}
-
-static inline __be32
-nfsd4_encode_user(struct svc_rqst *rqstp, kuid_t user, __be32 **p, int *buflen)
-{
-	return nfsd4_encode_name(rqstp, NFS4_ACL_WHO_NAMED, user, INVALID_GID,
-				 p, buflen);
-}
-
-static inline __be32
-nfsd4_encode_group(struct svc_rqst *rqstp, kgid_t group, __be32 **p, int *buflen)
-{
-	return nfsd4_encode_name(rqstp, NFS4_ACL_WHO_NAMED, INVALID_UID, group,
-				 p, buflen);
-}
-
 static inline __be32
 nfsd4_encode_aclname(struct svc_rqst *rqstp, struct nfs4_ace *ace,
 		__be32 **p, int *buflen)
 {
-	kuid_t uid = INVALID_UID;
-	kgid_t gid = INVALID_GID;
-
-	if (ace->whotype == NFS4_ACL_WHO_NAMED) {
-		if (ace->flag & NFS4_ACE_IDENTIFIER_GROUP)
-			gid = ace->who_gid;
-		else
-			uid = ace->who_uid;
-	}
-	return nfsd4_encode_name(rqstp, ace->whotype, uid, gid, p, buflen);
+	if (ace->whotype != NFS4_ACL_WHO_NAMED)
+		return nfs4_acl_write_who(ace->whotype, p, buflen);
+	else if (ace->flag & NFS4_ACE_IDENTIFIER_GROUP)
+		return nfsd4_encode_group(rqstp, ace->who_gid, p, buflen);
+	else
+		return nfsd4_encode_user(rqstp, ace->who_uid, p, buflen);
 }
 
 #define WORD0_ABSENT_FS_ATTRS (FATTR4_WORD0_FS_LOCATIONS | FATTR4_WORD0_FSID | \
@@ -2090,7 +2058,7 @@
 	u32 bmval1 = bmval[1];
 	u32 bmval2 = bmval[2];
 	struct kstat stat;
-	struct svc_fh tempfh;
+	struct svc_fh *tempfh = NULL;
 	struct kstatfs statfs;
 	int buflen = count << 2;
 	__be32 *attrlenp;
@@ -2137,11 +2105,15 @@
 			goto out_nfserr;
 	}
 	if ((bmval0 & (FATTR4_WORD0_FILEHANDLE | FATTR4_WORD0_FSID)) && !fhp) {
-		fh_init(&tempfh, NFS4_FHSIZE);
-		status = fh_compose(&tempfh, exp, dentry, NULL);
+		tempfh = kmalloc(sizeof(struct svc_fh), GFP_KERNEL);
+		status = nfserr_jukebox;
+		if (!tempfh)
+			goto out;
+		fh_init(tempfh, NFS4_FHSIZE);
+		status = fh_compose(tempfh, exp, dentry, NULL);
 		if (status)
 			goto out;
-		fhp = &tempfh;
+		fhp = tempfh;
 	}
 	if (bmval0 & (FATTR4_WORD0_ACL | FATTR4_WORD0_ACLSUPPORT
 			| FATTR4_WORD0_SUPPORTED_ATTRS)) {
@@ -2222,8 +2194,10 @@
 		if ((buflen -= 4) < 0)
 			goto out_resource;
 		dummy = nfs4_file_type(stat.mode);
-		if (dummy == NF4BAD)
-			goto out_serverfault;
+		if (dummy == NF4BAD) {
+			status = nfserr_serverfault;
+			goto out;
+		}
 		WRITE32(dummy);
 	}
 	if (bmval0 & FATTR4_WORD0_FH_EXPIRE_TYPE) {
@@ -2317,8 +2291,6 @@
 			WRITE32(ace->flag);
 			WRITE32(ace->access_mask & NFS4_ACE_MASK_ALL);
 			status = nfsd4_encode_aclname(rqstp, ace, &p, &buflen);
-			if (status == nfserr_resource)
-				goto out_resource;
 			if (status)
 				goto out;
 		}
@@ -2379,8 +2351,6 @@
 	}
 	if (bmval0 & FATTR4_WORD0_FS_LOCATIONS) {
 		status = nfsd4_encode_fs_locations(rqstp, exp, &p, &buflen);
-		if (status == nfserr_resource)
-			goto out_resource;
 		if (status)
 			goto out;
 	}
@@ -2431,15 +2401,11 @@
 	}
 	if (bmval1 & FATTR4_WORD1_OWNER) {
 		status = nfsd4_encode_user(rqstp, stat.uid, &p, &buflen);
-		if (status == nfserr_resource)
-			goto out_resource;
 		if (status)
 			goto out;
 	}
 	if (bmval1 & FATTR4_WORD1_OWNER_GROUP) {
 		status = nfsd4_encode_group(rqstp, stat.gid, &p, &buflen);
-		if (status == nfserr_resource)
-			goto out_resource;
 		if (status)
 			goto out;
 	}
@@ -2533,8 +2499,8 @@
 		security_release_secctx(context, contextlen);
 #endif /* CONFIG_NFSD_V4_SECURITY_LABEL */
 	kfree(acl);
-	if (fhp == &tempfh)
-		fh_put(&tempfh);
+	if (tempfh)
+		fh_put(tempfh);
 	return status;
 out_nfserr:
 	status = nfserrno(err);
@@ -2542,9 +2508,6 @@
 out_resource:
 	status = nfserr_resource;
 	goto out;
-out_serverfault:
-	status = nfserr_serverfault;
-	goto out;
 }
 
 static inline int attributes_need_mount(u32 *bmval)
@@ -2621,17 +2584,14 @@
 static __be32 *
 nfsd4_encode_rdattr_error(__be32 *p, int buflen, __be32 nfserr)
 {
-	__be32 *attrlenp;
-
 	if (buflen < 6)
 		return NULL;
 	*p++ = htonl(2);
 	*p++ = htonl(FATTR4_WORD0_RDATTR_ERROR); /* bmval0 */
 	*p++ = htonl(0);			 /* bmval1 */
 
-	attrlenp = p++;
+	*p++ = htonl(4);     /* attribute length */
 	*p++ = nfserr;       /* no htonl */
-	*attrlenp = htonl((char *)p - (char *)attrlenp - 4);
 	return p;
 }
 
@@ -3244,7 +3204,7 @@
 
 		if (rpcauth_get_gssinfo(pf, &info) == 0) {
 			supported++;
-			RESERVE_SPACE(4 + 4 + info.oid.len + 4 + 4);
+			RESERVE_SPACE(4 + 4 + XDR_LEN(info.oid.len) + 4 + 4);
 			WRITE32(RPC_AUTH_GSS);
 			WRITE32(info.oid.len);
 			WRITEMEM(info.oid.data, info.oid.len);
@@ -3379,35 +3339,43 @@
 		8 /* eir_clientid */ +
 		4 /* eir_sequenceid */ +
 		4 /* eir_flags */ +
-		4 /* spr_how */ +
-		8 /* spo_must_enforce, spo_must_allow */ +
-		8 /* so_minor_id */ +
-		4 /* so_major_id.len */ +
-		(XDR_QUADLEN(major_id_sz) * 4) +
-		4 /* eir_server_scope.len */ +
-		(XDR_QUADLEN(server_scope_sz) * 4) +
-		4 /* eir_server_impl_id.count (0) */);
+		4 /* spr_how */);
 
 	WRITEMEM(&exid->clientid, 8);
 	WRITE32(exid->seqid);
 	WRITE32(exid->flags);
 
 	WRITE32(exid->spa_how);
+	ADJUST_ARGS();
+
 	switch (exid->spa_how) {
 	case SP4_NONE:
 		break;
 	case SP4_MACH_CRED:
+		/* spo_must_enforce, spo_must_allow */
+		RESERVE_SPACE(16);
+
 		/* spo_must_enforce bitmap: */
 		WRITE32(2);
 		WRITE32(nfs4_minimal_spo_must_enforce[0]);
 		WRITE32(nfs4_minimal_spo_must_enforce[1]);
 		/* empty spo_must_allow bitmap: */
 		WRITE32(0);
+
+		ADJUST_ARGS();
 		break;
 	default:
 		WARN_ON_ONCE(1);
 	}
 
+	RESERVE_SPACE(
+		8 /* so_minor_id */ +
+		4 /* so_major_id.len */ +
+		(XDR_QUADLEN(major_id_sz) * 4) +
+		4 /* eir_server_scope.len */ +
+		(XDR_QUADLEN(server_scope_sz) * 4) +
+		4 /* eir_server_impl_id.count (0) */);
+
 	/* The server_owner struct */
 	WRITE64(minor_id);      /* Minor id */
 	/* major id */
@@ -3474,28 +3442,6 @@
 }
 
 static __be32
-nfsd4_encode_destroy_session(struct nfsd4_compoundres *resp, __be32 nfserr,
-			     struct nfsd4_destroy_session *destroy_session)
-{
-	return nfserr;
-}
-
-static __be32
-nfsd4_encode_free_stateid(struct nfsd4_compoundres *resp, __be32 nfserr,
-			  struct nfsd4_free_stateid *free_stateid)
-{
-	__be32 *p;
-
-	if (nfserr)
-		return nfserr;
-
-	RESERVE_SPACE(4);
-	*p++ = nfserr;
-	ADJUST_ARGS();
-	return nfserr;
-}
-
-static __be32
 nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr,
 		      struct nfsd4_sequence *seq)
 {
@@ -3593,8 +3539,8 @@
 	[OP_BIND_CONN_TO_SESSION] = (nfsd4_enc)nfsd4_encode_bind_conn_to_session,
 	[OP_EXCHANGE_ID]	= (nfsd4_enc)nfsd4_encode_exchange_id,
 	[OP_CREATE_SESSION]	= (nfsd4_enc)nfsd4_encode_create_session,
-	[OP_DESTROY_SESSION]	= (nfsd4_enc)nfsd4_encode_destroy_session,
-	[OP_FREE_STATEID]	= (nfsd4_enc)nfsd4_encode_free_stateid,
+	[OP_DESTROY_SESSION]	= (nfsd4_enc)nfsd4_encode_noop,
+	[OP_FREE_STATEID]	= (nfsd4_enc)nfsd4_encode_noop,
 	[OP_GET_DIR_DELEGATION]	= (nfsd4_enc)nfsd4_encode_noop,
 	[OP_GETDEVICEINFO]	= (nfsd4_enc)nfsd4_encode_noop,
 	[OP_GETDEVICELIST]	= (nfsd4_enc)nfsd4_encode_noop,
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index b6af150..f8f060f 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -132,13 +132,6 @@
 }
 
 static void
-nfsd_reply_cache_unhash(struct svc_cacherep *rp)
-{
-	hlist_del_init(&rp->c_hash);
-	list_del_init(&rp->c_lru);
-}
-
-static void
 nfsd_reply_cache_free_locked(struct svc_cacherep *rp)
 {
 	if (rp->c_type == RC_REPLBUFF && rp->c_replvec.iov_base) {
@@ -416,22 +409,8 @@
 
 	/*
 	 * Since the common case is a cache miss followed by an insert,
-	 * preallocate an entry. First, try to reuse the first entry on the LRU
-	 * if it works, then go ahead and prune the LRU list.
+	 * preallocate an entry.
 	 */
-	spin_lock(&cache_lock);
-	if (!list_empty(&lru_head)) {
-		rp = list_first_entry(&lru_head, struct svc_cacherep, c_lru);
-		if (nfsd_cache_entry_expired(rp) ||
-		    num_drc_entries >= max_drc_entries) {
-			nfsd_reply_cache_unhash(rp);
-			prune_cache_entries();
-			goto search_cache;
-		}
-	}
-
-	/* No expired ones available, allocate a new one. */
-	spin_unlock(&cache_lock);
 	rp = nfsd_reply_cache_alloc();
 	spin_lock(&cache_lock);
 	if (likely(rp)) {
@@ -439,7 +418,9 @@
 		drc_mem_usage += sizeof(*rp);
 	}
 
-search_cache:
+	/* go ahead and prune the cache */
+	prune_cache_entries();
+
 	found = nfsd_cache_search(rqstp, csum);
 	if (found) {
 		if (likely(rp))
@@ -453,15 +434,6 @@
 		goto out;
 	}
 
-	/*
-	 * We're keeping the one we just allocated. Are we now over the
-	 * limit? Prune one off the tip of the LRU in trade for the one we
-	 * just allocated if so.
-	 */
-	if (num_drc_entries >= max_drc_entries)
-		nfsd_reply_cache_free_locked(list_first_entry(&lru_head,
-						struct svc_cacherep, c_lru));
-
 	nfsdstats.rcmisses++;
 	rqstp->rq_cacherep = rp;
 	rp->c_state = RC_INPROG;
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 760c85a..9a4a5f9 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -241,6 +241,15 @@
 	nfsd_racache_shutdown();
 }
 
+static bool nfsd_needs_lockd(void)
+{
+#if defined(CONFIG_NFSD_V3)
+	return (nfsd_versions[2] != NULL) || (nfsd_versions[3] != NULL);
+#else
+	return (nfsd_versions[2] != NULL);
+#endif
+}
+
 static int nfsd_startup_net(int nrservs, struct net *net)
 {
 	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
@@ -255,9 +264,14 @@
 	ret = nfsd_init_socks(net);
 	if (ret)
 		goto out_socks;
-	ret = lockd_up(net);
-	if (ret)
-		goto out_socks;
+
+	if (nfsd_needs_lockd() && !nn->lockd_up) {
+		ret = lockd_up(net);
+		if (ret)
+			goto out_socks;
+		nn->lockd_up = 1;
+	}
+
 	ret = nfs4_state_start_net(net);
 	if (ret)
 		goto out_lockd;
@@ -266,7 +280,10 @@
 	return 0;
 
 out_lockd:
-	lockd_down(net);
+	if (nn->lockd_up) {
+		lockd_down(net);
+		nn->lockd_up = 0;
+	}
 out_socks:
 	nfsd_shutdown_generic();
 	return ret;
@@ -277,7 +294,10 @@
 	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
 
 	nfs4_state_shutdown_net(net);
-	lockd_down(net);
+	if (nn->lockd_up) {
+		lockd_down(net);
+		nn->lockd_up = 0;
+	}
 	nn->nfsd_net_up = false;
 	nfsd_shutdown_generic();
 }
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index 9c769a4..b17d932 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c
@@ -152,7 +152,7 @@
 	type = (stat->mode & S_IFMT);
 
 	*p++ = htonl(nfs_ftypes[type >> 12]);
-	*p++ = htonl((u32) stat->mode);
+	*p++ = htonl((u32) (stat->mode & S_IALLUGO));
 	*p++ = htonl((u32) stat->nlink);
 	*p++ = htonl((u32) from_kuid(&init_user_ns, stat->uid));
 	*p++ = htonl((u32) from_kgid(&init_user_ns, stat->gid));
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 1426eb6..017d3cb 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -207,7 +207,12 @@
 				goto out_nfserr;
 		}
 	} else {
-		fh_lock(fhp);
+		/*
+		 * In the nfsd4_open() case, this may be held across
+		 * subsequent open and delegation acquisition which may
+		 * need to take the child's i_mutex:
+		 */
+		fh_lock_nested(fhp, I_MUTEX_PARENT);
 		dentry = lookup_one_len(name, dparent, len);
 		host_err = PTR_ERR(dentry);
 		if (IS_ERR(dentry))
@@ -273,13 +278,6 @@
 	return err;
 }
 
-static int nfsd_break_lease(struct inode *inode)
-{
-	if (!S_ISREG(inode->i_mode))
-		return 0;
-	return break_lease(inode, O_WRONLY | O_NONBLOCK);
-}
-
 /*
  * Commit metadata changes to stable storage.
  */
@@ -348,8 +346,7 @@
 
 	/* Revoke setuid/setgid on chown */
 	if (!S_ISDIR(inode->i_mode) &&
-	    (((iap->ia_valid & ATTR_UID) && !uid_eq(iap->ia_uid, inode->i_uid)) ||
-	     ((iap->ia_valid & ATTR_GID) && !gid_eq(iap->ia_gid, inode->i_gid)))) {
+	    ((iap->ia_valid & ATTR_UID) || (iap->ia_valid & ATTR_GID))) {
 		iap->ia_valid |= ATTR_KILL_PRIV;
 		if (iap->ia_valid & ATTR_MODE) {
 			/* we're setting mode too, just clear the s*id bits */
@@ -449,16 +446,10 @@
 		goto out_put_write_access;
 	}
 
-	host_err = nfsd_break_lease(inode);
-	if (host_err)
-		goto out_put_write_access_nfserror;
-
 	fh_lock(fhp);
 	host_err = notify_change(dentry, iap, NULL);
 	fh_unlock(fhp);
 
-out_put_write_access_nfserror:
-	err = nfserrno(host_err);
 out_put_write_access:
 	if (size_change)
 		put_write_access(inode);
@@ -1609,11 +1600,6 @@
 	err = nfserr_noent;
 	if (!dold->d_inode)
 		goto out_dput;
-	host_err = nfsd_break_lease(dold->d_inode);
-	if (host_err) {
-		err = nfserrno(host_err);
-		goto out_dput;
-	}
 	host_err = vfs_link(dold, dirp, dnew, NULL);
 	if (!host_err) {
 		err = nfserrno(commit_metadata(ffhp));
@@ -1707,14 +1693,6 @@
 	if (ffhp->fh_export->ex_path.dentry != tfhp->fh_export->ex_path.dentry)
 		goto out_dput_new;
 
-	host_err = nfsd_break_lease(odentry->d_inode);
-	if (host_err)
-		goto out_dput_new;
-	if (ndentry->d_inode) {
-		host_err = nfsd_break_lease(ndentry->d_inode);
-		if (host_err)
-			goto out_dput_new;
-	}
 	host_err = vfs_rename(fdir, odentry, tdir, ndentry, NULL);
 	if (!host_err) {
 		host_err = commit_metadata(tfhp);
@@ -1784,16 +1762,12 @@
 	if (!type)
 		type = rdentry->d_inode->i_mode & S_IFMT;
 
-	host_err = nfsd_break_lease(rdentry->d_inode);
-	if (host_err)
-		goto out_put;
 	if (type != S_IFDIR)
 		host_err = vfs_unlink(dirp, rdentry, NULL);
 	else
 		host_err = vfs_rmdir(dirp, rdentry);
 	if (!host_err)
 		host_err = commit_metadata(fhp);
-out_put:
 	dput(rdentry);
 
 out_nfserr:
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index 1bc1d44..fbe90bd 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -86,8 +86,6 @@
 __be32		nfsd_rename(struct svc_rqst *,
 				struct svc_fh *, char *, int,
 				struct svc_fh *, char *, int);
-__be32		nfsd_remove(struct svc_rqst *,
-				struct svc_fh *, char *, int);
 __be32		nfsd_unlink(struct svc_rqst *, struct svc_fh *, int type,
 				char *name, int len);
 __be32		nfsd_readdir(struct svc_rqst *, struct svc_fh *,
diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h
index b6d5542..335e04a 100644
--- a/fs/nfsd/xdr3.h
+++ b/fs/nfsd/xdr3.h
@@ -174,6 +174,9 @@
 struct nfsd3_readdirres {
 	__be32			status;
 	struct svc_fh		fh;
+	/* Just to save kmalloc on every readdirplus entry (svc_fh is a
+	 * little large for the stack): */
+	struct svc_fh		scratch;
 	int			count;
 	__be32			verf[2];
 
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index b3ed644..d278a0d 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -228,7 +228,7 @@
 	u32		op_create;     	    /* request */
 	u32		op_createmode;      /* request */
 	u32		op_bmval[3];        /* request */
-	struct iattr	iattr;              /* UNCHECKED4, GUARDED4, EXCLUSIVE4_1 */
+	struct iattr	op_iattr;           /* UNCHECKED4, GUARDED4, EXCLUSIVE4_1 */
 	nfs4_verifier	op_verf __attribute__((aligned(32)));
 					    /* EXCLUSIVE4 */
 	clientid_t	op_clientid;        /* request */
@@ -250,7 +250,6 @@
 	struct nfs4_acl *op_acl;
 	struct xdr_netobj op_label;
 };
-#define op_iattr	iattr
 
 struct nfsd4_open_confirm {
 	stateid_t	oc_req_stateid		/* request */;
@@ -374,7 +373,6 @@
 
 struct nfsd4_free_stateid {
 	stateid_t	fr_stateid;         /* request */
-	__be32		fr_status;          /* response */
 };
 
 /* also used for NVERIFY */
diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c
index 2d8be51..dc3a9efd 100644
--- a/fs/nilfs2/segbuf.c
+++ b/fs/nilfs2/segbuf.c
@@ -416,7 +416,8 @@
 	}
 	if (likely(bio)) {
 		bio->bi_bdev = nilfs->ns_bdev;
-		bio->bi_sector = start << (nilfs->ns_blocksize_bits - 9);
+		bio->bi_iter.bi_sector =
+			start << (nilfs->ns_blocksize_bits - 9);
 	}
 	return bio;
 }
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 73920ff..bf482df 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -413,7 +413,7 @@
 	}
 
 	/* Must put everything in 512 byte sectors for the bio... */
-	bio->bi_sector = (reg->hr_start_block + cs) << (bits - 9);
+	bio->bi_iter.bi_sector = (reg->hr_start_block + cs) << (bits - 9);
 	bio->bi_bdev = reg->hr_bdev;
 	bio->bi_private = wc;
 	bio->bi_end_io = o2hb_bio_end_io;
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index a267394..db2cfb0 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -407,7 +407,7 @@
 	struct bio		*bio = bio_alloc(GFP_NOIO, nvecs);
 
 	ASSERT(bio->bi_private == NULL);
-	bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9);
+	bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
 	bio->bi_bdev = bh->b_bdev;
 	return bio;
 }
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 5175711..9c061ef 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1240,7 +1240,7 @@
 
 	bio = bio_alloc(GFP_NOIO, nr_pages);
 	bio->bi_bdev = bp->b_target->bt_bdev;
-	bio->bi_sector = sector;
+	bio->bi_iter.bi_sector = sector;
 	bio->bi_end_io = xfs_buf_bio_end_io;
 	bio->bi_private = bp;
 
@@ -1262,7 +1262,7 @@
 		total_nr_pages--;
 	}
 
-	if (likely(bio->bi_size)) {
+	if (likely(bio->bi_iter.bi_size)) {
 		if (xfs_buf_is_vmapped(bp)) {
 			flush_kernel_vmap_range(bp->b_addr,
 						xfs_buf_vmap_len(bp));
diff --git a/include/dt-bindings/clock/mpc512x-clock.h b/include/dt-bindings/clock/mpc512x-clock.h
new file mode 100644
index 0000000..4f94919
--- /dev/null
+++ b/include/dt-bindings/clock/mpc512x-clock.h
@@ -0,0 +1,76 @@
+/*
+ * This header provides constants for MPC512x clock specs in DT bindings.
+ */
+
+#ifndef _DT_BINDINGS_CLOCK_MPC512x_CLOCK_H
+#define _DT_BINDINGS_CLOCK_MPC512x_CLOCK_H
+
+#define MPC512x_CLK_DUMMY		0
+#define MPC512x_CLK_REF			1
+#define MPC512x_CLK_SYS			2
+#define MPC512x_CLK_DIU			3
+#define MPC512x_CLK_VIU			4
+#define MPC512x_CLK_CSB			5
+#define MPC512x_CLK_E300		6
+#define MPC512x_CLK_IPS			7
+#define MPC512x_CLK_FEC			8
+#define MPC512x_CLK_SATA		9
+#define MPC512x_CLK_PATA		10
+#define MPC512x_CLK_NFC			11
+#define MPC512x_CLK_LPC			12
+#define MPC512x_CLK_MBX_BUS		13
+#define MPC512x_CLK_MBX			14
+#define MPC512x_CLK_MBX_3D		15
+#define MPC512x_CLK_AXE			16
+#define MPC512x_CLK_USB1		17
+#define MPC512x_CLK_USB2		18
+#define MPC512x_CLK_I2C			19
+#define MPC512x_CLK_MSCAN0_MCLK		20
+#define MPC512x_CLK_MSCAN1_MCLK		21
+#define MPC512x_CLK_MSCAN2_MCLK		22
+#define MPC512x_CLK_MSCAN3_MCLK		23
+#define MPC512x_CLK_BDLC		24
+#define MPC512x_CLK_SDHC		25
+#define MPC512x_CLK_PCI			26
+#define MPC512x_CLK_PSC_MCLK_IN		27
+#define MPC512x_CLK_SPDIF_TX		28
+#define MPC512x_CLK_SPDIF_RX		29
+#define MPC512x_CLK_SPDIF_MCLK		30
+#define MPC512x_CLK_SPDIF		31
+#define MPC512x_CLK_AC97		32
+#define MPC512x_CLK_PSC0_MCLK		33
+#define MPC512x_CLK_PSC1_MCLK		34
+#define MPC512x_CLK_PSC2_MCLK		35
+#define MPC512x_CLK_PSC3_MCLK		36
+#define MPC512x_CLK_PSC4_MCLK		37
+#define MPC512x_CLK_PSC5_MCLK		38
+#define MPC512x_CLK_PSC6_MCLK		39
+#define MPC512x_CLK_PSC7_MCLK		40
+#define MPC512x_CLK_PSC8_MCLK		41
+#define MPC512x_CLK_PSC9_MCLK		42
+#define MPC512x_CLK_PSC10_MCLK		43
+#define MPC512x_CLK_PSC11_MCLK		44
+#define MPC512x_CLK_PSC_FIFO		45
+#define MPC512x_CLK_PSC0		46
+#define MPC512x_CLK_PSC1		47
+#define MPC512x_CLK_PSC2		48
+#define MPC512x_CLK_PSC3		49
+#define MPC512x_CLK_PSC4		50
+#define MPC512x_CLK_PSC5		51
+#define MPC512x_CLK_PSC6		52
+#define MPC512x_CLK_PSC7		53
+#define MPC512x_CLK_PSC8		54
+#define MPC512x_CLK_PSC9		55
+#define MPC512x_CLK_PSC10		56
+#define MPC512x_CLK_PSC11		57
+#define MPC512x_CLK_SDHC2		58
+#define MPC512x_CLK_FEC2		59
+#define MPC512x_CLK_OUT0_CLK		60
+#define MPC512x_CLK_OUT1_CLK		61
+#define MPC512x_CLK_OUT2_CLK		62
+#define MPC512x_CLK_OUT3_CLK		63
+#define MPC512x_CLK_CAN_CLK_IN		64
+
+#define MPC512x_CLK_LAST_PUBLIC		64
+
+#endif
diff --git a/include/linux/bcma/bcma_driver_chipcommon.h b/include/linux/bcma/bcma_driver_chipcommon.h
index c49e1a1..63d105c 100644
--- a/include/linux/bcma/bcma_driver_chipcommon.h
+++ b/include/linux/bcma/bcma_driver_chipcommon.h
@@ -640,6 +640,7 @@
 	spinlock_t gpio_lock;
 #ifdef CONFIG_BCMA_DRIVER_GPIO
 	struct gpio_chip gpio;
+	struct irq_domain *irq_domain;
 #endif
 };
 
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 060ff69..7065452 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -61,25 +61,87 @@
  * various member access, note that bio_data should of course not be used
  * on highmem page vectors
  */
-#define bio_iovec_idx(bio, idx)	(&((bio)->bi_io_vec[(idx)]))
-#define bio_iovec(bio)		bio_iovec_idx((bio), (bio)->bi_idx)
-#define bio_page(bio)		bio_iovec((bio))->bv_page
-#define bio_offset(bio)		bio_iovec((bio))->bv_offset
-#define bio_segments(bio)	((bio)->bi_vcnt - (bio)->bi_idx)
-#define bio_sectors(bio)	((bio)->bi_size >> 9)
-#define bio_end_sector(bio)	((bio)->bi_sector + bio_sectors((bio)))
+#define __bvec_iter_bvec(bvec, iter)	(&(bvec)[(iter).bi_idx])
+
+#define bvec_iter_page(bvec, iter)				\
+	(__bvec_iter_bvec((bvec), (iter))->bv_page)
+
+#define bvec_iter_len(bvec, iter)				\
+	min((iter).bi_size,					\
+	    __bvec_iter_bvec((bvec), (iter))->bv_len - (iter).bi_bvec_done)
+
+#define bvec_iter_offset(bvec, iter)				\
+	(__bvec_iter_bvec((bvec), (iter))->bv_offset + (iter).bi_bvec_done)
+
+#define bvec_iter_bvec(bvec, iter)				\
+((struct bio_vec) {						\
+	.bv_page	= bvec_iter_page((bvec), (iter)),	\
+	.bv_len		= bvec_iter_len((bvec), (iter)),	\
+	.bv_offset	= bvec_iter_offset((bvec), (iter)),	\
+})
+
+#define bio_iter_iovec(bio, iter)				\
+	bvec_iter_bvec((bio)->bi_io_vec, (iter))
+
+#define bio_iter_page(bio, iter)				\
+	bvec_iter_page((bio)->bi_io_vec, (iter))
+#define bio_iter_len(bio, iter)					\
+	bvec_iter_len((bio)->bi_io_vec, (iter))
+#define bio_iter_offset(bio, iter)				\
+	bvec_iter_offset((bio)->bi_io_vec, (iter))
+
+#define bio_page(bio)		bio_iter_page((bio), (bio)->bi_iter)
+#define bio_offset(bio)		bio_iter_offset((bio), (bio)->bi_iter)
+#define bio_iovec(bio)		bio_iter_iovec((bio), (bio)->bi_iter)
+
+#define bio_multiple_segments(bio)				\
+	((bio)->bi_iter.bi_size != bio_iovec(bio).bv_len)
+#define bio_sectors(bio)	((bio)->bi_iter.bi_size >> 9)
+#define bio_end_sector(bio)	((bio)->bi_iter.bi_sector + bio_sectors((bio)))
+
+/*
+ * Check whether this bio carries any data or not. A NULL bio is allowed.
+ */
+static inline bool bio_has_data(struct bio *bio)
+{
+	if (bio &&
+	    bio->bi_iter.bi_size &&
+	    !(bio->bi_rw & REQ_DISCARD))
+		return true;
+
+	return false;
+}
+
+static inline bool bio_is_rw(struct bio *bio)
+{
+	if (!bio_has_data(bio))
+		return false;
+
+	if (bio->bi_rw & BIO_NO_ADVANCE_ITER_MASK)
+		return false;
+
+	return true;
+}
+
+static inline bool bio_mergeable(struct bio *bio)
+{
+	if (bio->bi_rw & REQ_NOMERGE_FLAGS)
+		return false;
+
+	return true;
+}
 
 static inline unsigned int bio_cur_bytes(struct bio *bio)
 {
-	if (bio->bi_vcnt)
-		return bio_iovec(bio)->bv_len;
+	if (bio_has_data(bio))
+		return bio_iovec(bio).bv_len;
 	else /* dataless requests such as discard */
-		return bio->bi_size;
+		return bio->bi_iter.bi_size;
 }
 
 static inline void *bio_data(struct bio *bio)
 {
-	if (bio->bi_vcnt)
+	if (bio_has_data(bio))
 		return page_address(bio_page(bio)) + bio_offset(bio);
 
 	return NULL;
@@ -97,19 +159,16 @@
  * permanent PIO fall back, user is probably better off disabling highmem
  * I/O completely on that queue (see ide-dma for example)
  */
-#define __bio_kmap_atomic(bio, idx)				\
-	(kmap_atomic(bio_iovec_idx((bio), (idx))->bv_page) +	\
-		bio_iovec_idx((bio), (idx))->bv_offset)
+#define __bio_kmap_atomic(bio, iter)				\
+	(kmap_atomic(bio_iter_iovec((bio), (iter)).bv_page) +	\
+		bio_iter_iovec((bio), (iter)).bv_offset)
 
-#define __bio_kunmap_atomic(addr) kunmap_atomic(addr)
+#define __bio_kunmap_atomic(addr)	kunmap_atomic(addr)
 
 /*
  * merge helpers etc
  */
 
-#define __BVEC_END(bio)		bio_iovec_idx((bio), (bio)->bi_vcnt - 1)
-#define __BVEC_START(bio)	bio_iovec_idx((bio), (bio)->bi_idx)
-
 /* Default implementation of BIOVEC_PHYS_MERGEABLE */
 #define __BIOVEC_PHYS_MERGEABLE(vec1, vec2)	\
 	((bvec_to_phys((vec1)) + (vec1)->bv_len) == bvec_to_phys((vec2)))
@@ -126,33 +185,76 @@
 	(((addr1) | (mask)) == (((addr2) - 1) | (mask)))
 #define BIOVEC_SEG_BOUNDARY(q, b1, b2) \
 	__BIO_SEG_BOUNDARY(bvec_to_phys((b1)), bvec_to_phys((b2)) + (b2)->bv_len, queue_segment_boundary((q)))
-#define BIO_SEG_BOUNDARY(q, b1, b2) \
-	BIOVEC_SEG_BOUNDARY((q), __BVEC_END((b1)), __BVEC_START((b2)))
 
 #define bio_io_error(bio) bio_endio((bio), -EIO)
 
 /*
- * drivers should not use the __ version unless they _really_ know what
- * they're doing
- */
-#define __bio_for_each_segment(bvl, bio, i, start_idx)			\
-	for (bvl = bio_iovec_idx((bio), (start_idx)), i = (start_idx);	\
-	     i < (bio)->bi_vcnt;					\
-	     bvl++, i++)
-
-/*
  * drivers should _never_ use the all version - the bio may have been split
  * before it got to the driver and the driver won't own all of it
  */
 #define bio_for_each_segment_all(bvl, bio, i)				\
-	for (i = 0;							\
-	     bvl = bio_iovec_idx((bio), (i)), i < (bio)->bi_vcnt;	\
-	     i++)
+	for (i = 0, bvl = (bio)->bi_io_vec; i < (bio)->bi_vcnt; i++, bvl++)
 
-#define bio_for_each_segment(bvl, bio, i)				\
-	for (i = (bio)->bi_idx;						\
-	     bvl = bio_iovec_idx((bio), (i)), i < (bio)->bi_vcnt;	\
-	     i++)
+static inline void bvec_iter_advance(struct bio_vec *bv, struct bvec_iter *iter,
+				     unsigned bytes)
+{
+	WARN_ONCE(bytes > iter->bi_size,
+		  "Attempted to advance past end of bvec iter\n");
+
+	while (bytes) {
+		unsigned len = min(bytes, bvec_iter_len(bv, *iter));
+
+		bytes -= len;
+		iter->bi_size -= len;
+		iter->bi_bvec_done += len;
+
+		if (iter->bi_bvec_done == __bvec_iter_bvec(bv, *iter)->bv_len) {
+			iter->bi_bvec_done = 0;
+			iter->bi_idx++;
+		}
+	}
+}
+
+#define for_each_bvec(bvl, bio_vec, iter, start)			\
+	for ((iter) = start;						\
+	     (bvl) = bvec_iter_bvec((bio_vec), (iter)),			\
+		(iter).bi_size;						\
+	     bvec_iter_advance((bio_vec), &(iter), (bvl).bv_len))
+
+
+static inline void bio_advance_iter(struct bio *bio, struct bvec_iter *iter,
+				    unsigned bytes)
+{
+	iter->bi_sector += bytes >> 9;
+
+	if (bio->bi_rw & BIO_NO_ADVANCE_ITER_MASK)
+		iter->bi_size -= bytes;
+	else
+		bvec_iter_advance(bio->bi_io_vec, iter, bytes);
+}
+
+#define __bio_for_each_segment(bvl, bio, iter, start)			\
+	for (iter = (start);						\
+	     (iter).bi_size &&						\
+		((bvl = bio_iter_iovec((bio), (iter))), 1);		\
+	     bio_advance_iter((bio), &(iter), (bvl).bv_len))
+
+#define bio_for_each_segment(bvl, bio, iter)				\
+	__bio_for_each_segment(bvl, bio, iter, (bio)->bi_iter)
+
+#define bio_iter_last(bvec, iter) ((iter).bi_size == (bvec).bv_len)
+
+static inline unsigned bio_segments(struct bio *bio)
+{
+	unsigned segs = 0;
+	struct bio_vec bv;
+	struct bvec_iter iter;
+
+	bio_for_each_segment(bv, bio, iter)
+		segs++;
+
+	return segs;
+}
 
 /*
  * get a reference to a bio, so it won't disappear. the intended use is
@@ -177,16 +279,15 @@
 struct bio_integrity_payload {
 	struct bio		*bip_bio;	/* parent bio */
 
-	sector_t		bip_sector;	/* virtual start sector */
+	struct bvec_iter	bip_iter;
 
+	/* kill - should just use bip_vec */
 	void			*bip_buf;	/* generated integrity data */
-	bio_end_io_t		*bip_end_io;	/* saved I/O completion fn */
 
-	unsigned int		bip_size;
+	bio_end_io_t		*bip_end_io;	/* saved I/O completion fn */
 
 	unsigned short		bip_slab;	/* slab the bip came from */
 	unsigned short		bip_vcnt;	/* # of integrity bio_vecs */
-	unsigned short		bip_idx;	/* current bip_vec index */
 	unsigned		bip_owns_buf:1;	/* should free bip_buf */
 
 	struct work_struct	bip_work;	/* I/O completion */
@@ -196,29 +297,28 @@
 };
 #endif /* CONFIG_BLK_DEV_INTEGRITY */
 
-/*
- * A bio_pair is used when we need to split a bio.
- * This can only happen for a bio that refers to just one
- * page of data, and in the unusual situation when the
- * page crosses a chunk/device boundary
- *
- * The address of the master bio is stored in bio1.bi_private
- * The address of the pool the pair was allocated from is stored
- *   in bio2.bi_private
- */
-struct bio_pair {
-	struct bio			bio1, bio2;
-	struct bio_vec			bv1, bv2;
-#if defined(CONFIG_BLK_DEV_INTEGRITY)
-	struct bio_integrity_payload	bip1, bip2;
-	struct bio_vec			iv1, iv2;
-#endif
-	atomic_t			cnt;
-	int				error;
-};
-extern struct bio_pair *bio_split(struct bio *bi, int first_sectors);
-extern void bio_pair_release(struct bio_pair *dbio);
 extern void bio_trim(struct bio *bio, int offset, int size);
+extern struct bio *bio_split(struct bio *bio, int sectors,
+			     gfp_t gfp, struct bio_set *bs);
+
+/**
+ * bio_next_split - get next @sectors from a bio, splitting if necessary
+ * @bio:	bio to split
+ * @sectors:	number of sectors to split from the front of @bio
+ * @gfp:	gfp mask
+ * @bs:		bio set to allocate from
+ *
+ * Returns a bio representing the next @sectors of @bio - if the bio is smaller
+ * than @sectors, returns the original bio unchanged.
+ */
+static inline struct bio *bio_next_split(struct bio *bio, int sectors,
+					 gfp_t gfp, struct bio_set *bs)
+{
+	if (sectors >= bio_sectors(bio))
+		return bio;
+
+	return bio_split(bio, sectors, gfp, bs);
+}
 
 extern struct bio_set *bioset_create(unsigned int, unsigned int);
 extern void bioset_free(struct bio_set *);
@@ -227,7 +327,8 @@
 extern struct bio *bio_alloc_bioset(gfp_t, int, struct bio_set *);
 extern void bio_put(struct bio *);
 
-extern void __bio_clone(struct bio *, struct bio *);
+extern void __bio_clone_fast(struct bio *, struct bio *);
+extern struct bio *bio_clone_fast(struct bio *, gfp_t, struct bio_set *);
 extern struct bio *bio_clone_bioset(struct bio *, gfp_t, struct bio_set *bs);
 
 extern struct bio_set *fs_bio_set;
@@ -254,6 +355,7 @@
 }
 
 extern void bio_endio(struct bio *, int);
+extern void bio_endio_nodec(struct bio *, int);
 struct request_queue;
 extern int bio_phys_segments(struct request_queue *, struct bio *);
 
@@ -262,12 +364,12 @@
 
 extern void bio_init(struct bio *);
 extern void bio_reset(struct bio *);
+void bio_chain(struct bio *, struct bio *);
 
 extern int bio_add_page(struct bio *, struct page *, unsigned int,unsigned int);
 extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *,
 			   unsigned int, unsigned int);
 extern int bio_get_nr_vecs(struct block_device *);
-extern sector_t bio_sector_offset(struct bio *, unsigned short, unsigned int);
 extern struct bio *bio_map_user(struct request_queue *, struct block_device *,
 				unsigned long, unsigned int, int, gfp_t);
 struct sg_iovec;
@@ -357,48 +459,18 @@
 }
 #endif
 
-static inline char *__bio_kmap_irq(struct bio *bio, unsigned short idx,
+static inline char *__bio_kmap_irq(struct bio *bio, struct bvec_iter iter,
 				   unsigned long *flags)
 {
-	return bvec_kmap_irq(bio_iovec_idx(bio, idx), flags);
+	return bvec_kmap_irq(&bio_iter_iovec(bio, iter), flags);
 }
 #define __bio_kunmap_irq(buf, flags)	bvec_kunmap_irq(buf, flags)
 
 #define bio_kmap_irq(bio, flags) \
-	__bio_kmap_irq((bio), (bio)->bi_idx, (flags))
+	__bio_kmap_irq((bio), (bio)->bi_iter, (flags))
 #define bio_kunmap_irq(buf,flags)	__bio_kunmap_irq(buf, flags)
 
 /*
- * Check whether this bio carries any data or not. A NULL bio is allowed.
- */
-static inline bool bio_has_data(struct bio *bio)
-{
-	if (bio && bio->bi_vcnt)
-		return true;
-
-	return false;
-}
-
-static inline bool bio_is_rw(struct bio *bio)
-{
-	if (!bio_has_data(bio))
-		return false;
-
-	if (bio->bi_rw & REQ_WRITE_SAME)
-		return false;
-
-	return true;
-}
-
-static inline bool bio_mergeable(struct bio *bio)
-{
-	if (bio->bi_rw & REQ_NOMERGE_FLAGS)
-		return false;
-
-	return true;
-}
-
-/*
  * BIO list management for use by remapping drivers (e.g. DM or MD) and loop.
  *
  * A bio_list anchors a singly-linked list of bios chained through the bi_next
@@ -559,16 +631,12 @@
 
 #if defined(CONFIG_BLK_DEV_INTEGRITY)
 
+
+
 #define bip_vec_idx(bip, idx)	(&(bip->bip_vec[(idx)]))
-#define bip_vec(bip)		bip_vec_idx(bip, 0)
 
-#define __bip_for_each_vec(bvl, bip, i, start_idx)			\
-	for (bvl = bip_vec_idx((bip), (start_idx)), i = (start_idx);	\
-	     i < (bip)->bip_vcnt;					\
-	     bvl++, i++)
-
-#define bip_for_each_vec(bvl, bip, i)					\
-	__bip_for_each_vec(bvl, bip, i, (bip)->bip_idx)
+#define bip_for_each_vec(bvl, bip, iter)				\
+	for_each_bvec(bvl, (bip)->bip_vec, iter, (bip)->bip_iter)
 
 #define bio_for_each_integrity_vec(_bvl, _bio, _iter)			\
 	for_each_bio(_bio)						\
@@ -586,7 +654,6 @@
 extern void bio_integrity_endio(struct bio *, int);
 extern void bio_integrity_advance(struct bio *, unsigned int);
 extern void bio_integrity_trim(struct bio *, unsigned int, unsigned int);
-extern void bio_integrity_split(struct bio *, struct bio_pair *, int);
 extern int bio_integrity_clone(struct bio *, struct bio *, gfp_t);
 extern int bioset_integrity_create(struct bio_set *, int);
 extern void bioset_integrity_free(struct bio_set *);
@@ -630,12 +697,6 @@
 	return 0;
 }
 
-static inline void bio_integrity_split(struct bio *bio, struct bio_pair *bp,
-				       int sectors)
-{
-	return;
-}
-
 static inline void bio_integrity_advance(struct bio *bio,
 					 unsigned int bytes_done)
 {
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index ab0e9b2..161b231 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -113,7 +113,6 @@
 };
 
 struct request_queue *blk_mq_init_queue(struct blk_mq_reg *, void *);
-void blk_mq_free_queue(struct request_queue *);
 int blk_mq_register_disk(struct gendisk *);
 void blk_mq_unregister_disk(struct gendisk *);
 void blk_mq_init_commands(struct request_queue *, void (*init)(void *data, struct blk_mq_hw_ctx *, struct request *, unsigned int), void *data);
@@ -159,16 +158,16 @@
 }
 
 #define queue_for_each_hw_ctx(q, hctx, i)				\
-	for ((i) = 0, hctx = (q)->queue_hw_ctx[0];			\
-	     (i) < (q)->nr_hw_queues; (i)++, hctx = (q)->queue_hw_ctx[i])
+	for ((i) = 0; (i) < (q)->nr_hw_queues &&			\
+	     ({ hctx = (q)->queue_hw_ctx[i]; 1; }); (i)++)
 
 #define queue_for_each_ctx(q, ctx, i)					\
-	for ((i) = 0, ctx = per_cpu_ptr((q)->queue_ctx, 0);		\
-	     (i) < (q)->nr_queues; (i)++, ctx = per_cpu_ptr(q->queue_ctx, (i)))
+	for ((i) = 0; (i) < (q)->nr_queues &&				\
+	     ({ ctx = per_cpu_ptr((q)->queue_ctx, (i)); 1; }); (i)++)
 
 #define hctx_for_each_ctx(hctx, ctx, i)					\
-	for ((i) = 0, ctx = (hctx)->ctxs[0];				\
-	     (i) < (hctx)->nr_ctx; (i)++, ctx = (hctx)->ctxs[(i)])
+	for ((i) = 0; (i) < (hctx)->nr_ctx &&				\
+	     ({ ctx = (hctx)->ctxs[(i)]; 1; }); (i)++)
 
 #define blk_ctx_sum(q, sum)						\
 ({									\
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 238ef0e..bbc3a6c 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -28,13 +28,22 @@
 	unsigned int	bv_offset;
 };
 
+struct bvec_iter {
+	sector_t		bi_sector;	/* device address in 512 byte
+						   sectors */
+	unsigned int		bi_size;	/* residual I/O count */
+
+	unsigned int		bi_idx;		/* current index into bvl_vec */
+
+	unsigned int            bi_bvec_done;	/* number of bytes completed in
+						   current bvec */
+};
+
 /*
  * main unit of I/O for the block layer and lower layers (ie drivers and
  * stacking drivers)
  */
 struct bio {
-	sector_t		bi_sector;	/* device address in 512 byte
-						   sectors */
 	struct bio		*bi_next;	/* request queue link */
 	struct block_device	*bi_bdev;
 	unsigned long		bi_flags;	/* status, command, etc */
@@ -42,16 +51,13 @@
 						 * top bits priority
 						 */
 
-	unsigned short		bi_vcnt;	/* how many bio_vec's */
-	unsigned short		bi_idx;		/* current index into bvl_vec */
+	struct bvec_iter	bi_iter;
 
 	/* Number of segments in this BIO after
 	 * physical address coalescing is performed.
 	 */
 	unsigned int		bi_phys_segments;
 
-	unsigned int		bi_size;	/* residual I/O count */
-
 	/*
 	 * To keep track of the max segment size, we account for the
 	 * sizes of the first and last mergeable segments in this bio.
@@ -59,6 +65,8 @@
 	unsigned int		bi_seg_front_size;
 	unsigned int		bi_seg_back_size;
 
+	atomic_t		bi_remaining;
+
 	bio_end_io_t		*bi_end_io;
 
 	void			*bi_private;
@@ -74,11 +82,13 @@
 	struct bio_integrity_payload *bi_integrity;  /* data integrity */
 #endif
 
+	unsigned short		bi_vcnt;	/* how many bio_vec's */
+
 	/*
 	 * Everything starting with bi_max_vecs will be preserved by bio_reset()
 	 */
 
-	unsigned int		bi_max_vecs;	/* max bvl_vecs we can hold */
+	unsigned short		bi_max_vecs;	/* max bvl_vecs we can hold */
 
 	atomic_t		bi_cnt;		/* pin count */
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 1b135d4..8678c43 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -95,10 +95,7 @@
  * as well!
  */
 struct request {
-	union {
-		struct list_head queuelist;
-		struct llist_node ll_list;
-	};
+	struct list_head queuelist;
 	union {
 		struct call_single_data csd;
 		struct work_struct mq_flush_data;
@@ -291,6 +288,7 @@
 	unsigned char		discard_misaligned;
 	unsigned char		cluster;
 	unsigned char		discard_zeroes_data;
+	unsigned char		raid_partial_stripes_expensive;
 };
 
 struct request_queue {
@@ -735,7 +733,7 @@
 };
 
 struct req_iterator {
-	int i;
+	struct bvec_iter iter;
 	struct bio *bio;
 };
 
@@ -748,10 +746,11 @@
 
 #define rq_for_each_segment(bvl, _rq, _iter)			\
 	__rq_for_each_bio(_iter.bio, _rq)			\
-		bio_for_each_segment(bvl, _iter.bio, _iter.i)
+		bio_for_each_segment(bvl, _iter.bio, _iter.iter)
 
-#define rq_iter_last(rq, _iter)					\
-		(_iter.bio->bi_next == NULL && _iter.i == _iter.bio->bi_vcnt-1)
+#define rq_iter_last(bvec, _iter)				\
+		(_iter.bio->bi_next == NULL &&			\
+		 bio_iter_last(bvec, _iter.iter))
 
 #ifndef ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
 # error	"You should define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE for your platform"
diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index b388223..db51fe4 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -264,7 +264,7 @@
 {
 	if (!align)
 		align = SMP_CACHE_BYTES;
-	return __alloc_bootmem_low(size, align, BOOTMEM_LOW_LIMIT);
+	return __alloc_bootmem_low(size, align, 0);
 }
 
 static inline void * __init memblock_virt_alloc_low_nopanic(
@@ -272,7 +272,7 @@
 {
 	if (!align)
 		align = SMP_CACHE_BYTES;
-	return __alloc_bootmem_low_nopanic(size, align, BOOTMEM_LOW_LIMIT);
+	return __alloc_bootmem_low_nopanic(size, align, 0);
 }
 
 static inline void * __init memblock_virt_alloc_from_nopanic(
diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
index 20ee8b6..d21f2db 100644
--- a/include/linux/ceph/messenger.h
+++ b/include/linux/ceph/messenger.h
@@ -1,6 +1,7 @@
 #ifndef __FS_CEPH_MESSENGER_H
 #define __FS_CEPH_MESSENGER_H
 
+#include <linux/blk_types.h>
 #include <linux/kref.h>
 #include <linux/mutex.h>
 #include <linux/net.h>
@@ -119,8 +120,7 @@
 #ifdef CONFIG_BLOCK
 		struct {				/* bio */
 			struct bio	*bio;		/* bio from list */
-			unsigned int	vector_index;	/* vector from bio */
-			unsigned int	vector_offset;	/* bytes from vector */
+			struct bvec_iter bvec_iter;
 		};
 #endif /* CONFIG_BLOCK */
 		struct {				/* pages */
diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h
index 448b229..939533d 100644
--- a/include/linux/clk-provider.h
+++ b/include/linux/clk-provider.h
@@ -544,6 +544,20 @@
  * for improved portability across platforms
  */
 
+#if IS_ENABLED(CONFIG_PPC)
+
+static inline u32 clk_readl(u32 __iomem *reg)
+{
+	return ioread32be(reg);
+}
+
+static inline void clk_writel(u32 val, u32 __iomem *reg)
+{
+	iowrite32be(val, reg);
+}
+
+#else	/* platform dependent I/O accessors */
+
 static inline u32 clk_readl(u32 __iomem *reg)
 {
 	return readl(reg);
@@ -554,5 +568,7 @@
 	writel(val, reg);
 }
 
+#endif	/* platform dependent I/O accessors */
+
 #endif /* CONFIG_COMMON_CLK */
 #endif /* CLK_PROVIDER_H */
diff --git a/include/linux/cmdline-parser.h b/include/linux/cmdline-parser.h
index a0f9280..2e6dce6 100644
--- a/include/linux/cmdline-parser.h
+++ b/include/linux/cmdline-parser.h
@@ -37,9 +37,9 @@
 struct cmdline_parts *cmdline_parts_find(struct cmdline_parts *parts,
 					 const char *bdev);
 
-void cmdline_parts_set(struct cmdline_parts *parts, sector_t disk_size,
-		       int slot,
-		       int (*add_part)(int, struct cmdline_subpart *, void *),
-		       void *param);
+int cmdline_parts_set(struct cmdline_parts *parts, sector_t disk_size,
+		      int slot,
+		      int (*add_part)(int, struct cmdline_subpart *, void *),
+		      void *param);
 
 #endif /* CMDLINEPARSEH */
diff --git a/include/linux/dm-io.h b/include/linux/dm-io.h
index f4b0aa3..a68cbe5 100644
--- a/include/linux/dm-io.h
+++ b/include/linux/dm-io.h
@@ -29,7 +29,7 @@
 
 enum dm_io_mem_type {
 	DM_IO_PAGE_LIST,/* Page list */
-	DM_IO_BVEC,	/* Bio vector */
+	DM_IO_BIO,	/* Bio vector */
 	DM_IO_VMA,	/* Virtual memory area */
 	DM_IO_KMEM,	/* Kernel memory */
 };
@@ -41,7 +41,7 @@
 
 	union {
 		struct page_list *pl;
-		struct bio_vec *bvec;
+		struct bio *bio;
 		void *vma;
 		void *addr;
 	} ptr;
diff --git a/include/linux/linkage.h b/include/linux/linkage.h
index d3e8ad2..a6a42dd 100644
--- a/include/linux/linkage.h
+++ b/include/linux/linkage.h
@@ -6,6 +6,11 @@
 #include <linux/export.h>
 #include <asm/linkage.h>
 
+/* Some toolchains use other characters (e.g. '`') to mark new line in macro */
+#ifndef ASM_NL
+#define ASM_NL		 ;
+#endif
+
 #ifdef __cplusplus
 #define CPP_ASMLINKAGE extern "C"
 #else
@@ -75,21 +80,21 @@
 
 #ifndef ENTRY
 #define ENTRY(name) \
-  .globl name; \
-  ALIGN; \
-  name:
+	.globl name ASM_NL \
+	ALIGN ASM_NL \
+	name:
 #endif
 #endif /* LINKER_SCRIPT */
 
 #ifndef WEAK
 #define WEAK(name)	   \
-	.weak name;	   \
+	.weak name ASM_NL   \
 	name:
 #endif
 
 #ifndef END
 #define END(name) \
-  .size name, .-name
+	.size name, .-name
 #endif
 
 /* If symbol 'name' is treated as a subroutine (gets called, and returns)
@@ -98,8 +103,8 @@
  */
 #ifndef ENDPROC
 #define ENDPROC(name) \
-  .type name, @function; \
-  END(name)
+	.type name, @function ASM_NL \
+	END(name)
 #endif
 
 #endif
diff --git a/include/linux/serial_bcm63xx.h b/include/linux/serial_bcm63xx.h
new file mode 100644
index 0000000..570e964
--- /dev/null
+++ b/include/linux/serial_bcm63xx.h
@@ -0,0 +1,119 @@
+#ifndef _LINUX_SERIAL_BCM63XX_H
+#define _LINUX_SERIAL_BCM63XX_H
+
+/* UART Control Register */
+#define UART_CTL_REG			0x0
+#define UART_CTL_RXTMOUTCNT_SHIFT	0
+#define UART_CTL_RXTMOUTCNT_MASK	(0x1f << UART_CTL_RXTMOUTCNT_SHIFT)
+#define UART_CTL_RSTTXDN_SHIFT		5
+#define UART_CTL_RSTTXDN_MASK		(1 << UART_CTL_RSTTXDN_SHIFT)
+#define UART_CTL_RSTRXFIFO_SHIFT		6
+#define UART_CTL_RSTRXFIFO_MASK		(1 << UART_CTL_RSTRXFIFO_SHIFT)
+#define UART_CTL_RSTTXFIFO_SHIFT		7
+#define UART_CTL_RSTTXFIFO_MASK		(1 << UART_CTL_RSTTXFIFO_SHIFT)
+#define UART_CTL_STOPBITS_SHIFT		8
+#define UART_CTL_STOPBITS_MASK		(0xf << UART_CTL_STOPBITS_SHIFT)
+#define UART_CTL_STOPBITS_1		(0x7 << UART_CTL_STOPBITS_SHIFT)
+#define UART_CTL_STOPBITS_2		(0xf << UART_CTL_STOPBITS_SHIFT)
+#define UART_CTL_BITSPERSYM_SHIFT	12
+#define UART_CTL_BITSPERSYM_MASK	(0x3 << UART_CTL_BITSPERSYM_SHIFT)
+#define UART_CTL_XMITBRK_SHIFT		14
+#define UART_CTL_XMITBRK_MASK		(1 << UART_CTL_XMITBRK_SHIFT)
+#define UART_CTL_RSVD_SHIFT		15
+#define UART_CTL_RSVD_MASK		(1 << UART_CTL_RSVD_SHIFT)
+#define UART_CTL_RXPAREVEN_SHIFT		16
+#define UART_CTL_RXPAREVEN_MASK		(1 << UART_CTL_RXPAREVEN_SHIFT)
+#define UART_CTL_RXPAREN_SHIFT		17
+#define UART_CTL_RXPAREN_MASK		(1 << UART_CTL_RXPAREN_SHIFT)
+#define UART_CTL_TXPAREVEN_SHIFT		18
+#define UART_CTL_TXPAREVEN_MASK		(1 << UART_CTL_TXPAREVEN_SHIFT)
+#define UART_CTL_TXPAREN_SHIFT		18
+#define UART_CTL_TXPAREN_MASK		(1 << UART_CTL_TXPAREN_SHIFT)
+#define UART_CTL_LOOPBACK_SHIFT		20
+#define UART_CTL_LOOPBACK_MASK		(1 << UART_CTL_LOOPBACK_SHIFT)
+#define UART_CTL_RXEN_SHIFT		21
+#define UART_CTL_RXEN_MASK		(1 << UART_CTL_RXEN_SHIFT)
+#define UART_CTL_TXEN_SHIFT		22
+#define UART_CTL_TXEN_MASK		(1 << UART_CTL_TXEN_SHIFT)
+#define UART_CTL_BRGEN_SHIFT		23
+#define UART_CTL_BRGEN_MASK		(1 << UART_CTL_BRGEN_SHIFT)
+
+/* UART Baudword register */
+#define UART_BAUD_REG			0x4
+
+/* UART Misc Control register */
+#define UART_MCTL_REG			0x8
+#define UART_MCTL_DTR_SHIFT		0
+#define UART_MCTL_DTR_MASK		(1 << UART_MCTL_DTR_SHIFT)
+#define UART_MCTL_RTS_SHIFT		1
+#define UART_MCTL_RTS_MASK		(1 << UART_MCTL_RTS_SHIFT)
+#define UART_MCTL_RXFIFOTHRESH_SHIFT	8
+#define UART_MCTL_RXFIFOTHRESH_MASK	(0xf << UART_MCTL_RXFIFOTHRESH_SHIFT)
+#define UART_MCTL_TXFIFOTHRESH_SHIFT	12
+#define UART_MCTL_TXFIFOTHRESH_MASK	(0xf << UART_MCTL_TXFIFOTHRESH_SHIFT)
+#define UART_MCTL_RXFIFOFILL_SHIFT	16
+#define UART_MCTL_RXFIFOFILL_MASK	(0x1f << UART_MCTL_RXFIFOFILL_SHIFT)
+#define UART_MCTL_TXFIFOFILL_SHIFT	24
+#define UART_MCTL_TXFIFOFILL_MASK	(0x1f << UART_MCTL_TXFIFOFILL_SHIFT)
+
+/* UART External Input Configuration register */
+#define UART_EXTINP_REG			0xc
+#define UART_EXTINP_RI_SHIFT		0
+#define UART_EXTINP_RI_MASK		(1 << UART_EXTINP_RI_SHIFT)
+#define UART_EXTINP_CTS_SHIFT		1
+#define UART_EXTINP_CTS_MASK		(1 << UART_EXTINP_CTS_SHIFT)
+#define UART_EXTINP_DCD_SHIFT		2
+#define UART_EXTINP_DCD_MASK		(1 << UART_EXTINP_DCD_SHIFT)
+#define UART_EXTINP_DSR_SHIFT		3
+#define UART_EXTINP_DSR_MASK		(1 << UART_EXTINP_DSR_SHIFT)
+#define UART_EXTINP_IRSTAT(x)		(1 << (x + 4))
+#define UART_EXTINP_IRMASK(x)		(1 << (x + 8))
+#define UART_EXTINP_IR_RI		0
+#define UART_EXTINP_IR_CTS		1
+#define UART_EXTINP_IR_DCD		2
+#define UART_EXTINP_IR_DSR		3
+#define UART_EXTINP_RI_NOSENSE_SHIFT	16
+#define UART_EXTINP_RI_NOSENSE_MASK	(1 << UART_EXTINP_RI_NOSENSE_SHIFT)
+#define UART_EXTINP_CTS_NOSENSE_SHIFT	17
+#define UART_EXTINP_CTS_NOSENSE_MASK	(1 << UART_EXTINP_CTS_NOSENSE_SHIFT)
+#define UART_EXTINP_DCD_NOSENSE_SHIFT	18
+#define UART_EXTINP_DCD_NOSENSE_MASK	(1 << UART_EXTINP_DCD_NOSENSE_SHIFT)
+#define UART_EXTINP_DSR_NOSENSE_SHIFT	19
+#define UART_EXTINP_DSR_NOSENSE_MASK	(1 << UART_EXTINP_DSR_NOSENSE_SHIFT)
+
+/* UART Interrupt register */
+#define UART_IR_REG			0x10
+#define UART_IR_MASK(x)			(1 << (x + 16))
+#define UART_IR_STAT(x)			(1 << (x))
+#define UART_IR_EXTIP			0
+#define UART_IR_TXUNDER			1
+#define UART_IR_TXOVER			2
+#define UART_IR_TXTRESH			3
+#define UART_IR_TXRDLATCH		4
+#define UART_IR_TXEMPTY			5
+#define UART_IR_RXUNDER			6
+#define UART_IR_RXOVER			7
+#define UART_IR_RXTIMEOUT		8
+#define UART_IR_RXFULL			9
+#define UART_IR_RXTHRESH		10
+#define UART_IR_RXNOTEMPTY		11
+#define UART_IR_RXFRAMEERR		12
+#define UART_IR_RXPARERR		13
+#define UART_IR_RXBRK			14
+#define UART_IR_TXDONE			15
+
+/* UART Fifo register */
+#define UART_FIFO_REG			0x14
+#define UART_FIFO_VALID_SHIFT		0
+#define UART_FIFO_VALID_MASK		0xff
+#define UART_FIFO_FRAMEERR_SHIFT	8
+#define UART_FIFO_FRAMEERR_MASK		(1 << UART_FIFO_FRAMEERR_SHIFT)
+#define UART_FIFO_PARERR_SHIFT		9
+#define UART_FIFO_PARERR_MASK		(1 << UART_FIFO_PARERR_SHIFT)
+#define UART_FIFO_BRKDET_SHIFT		10
+#define UART_FIFO_BRKDET_MASK		(1 << UART_FIFO_BRKDET_SHIFT)
+#define UART_FIFO_ANYERR_MASK		(UART_FIFO_FRAMEERR_MASK |	\
+					UART_FIFO_PARERR_MASK |		\
+					UART_FIFO_BRKDET_MASK)
+
+#endif /* _LINUX_SERIAL_BCM63XX_H */
diff --git a/include/linux/smp.h b/include/linux/smp.h
index 5da22ee..3834f43 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -11,12 +11,16 @@
 #include <linux/list.h>
 #include <linux/cpumask.h>
 #include <linux/init.h>
+#include <linux/llist.h>
 
 extern void cpu_idle(void);
 
 typedef void (*smp_call_func_t)(void *info);
 struct call_single_data {
-	struct list_head list;
+	union {
+		struct list_head list;
+		struct llist_node llist;
+	};
 	smp_call_func_t func;
 	void *info;
 	u16 flags;
diff --git a/include/linux/ssb/ssb.h b/include/linux/ssb/ssb.h
index c64999f..07ef9b8 100644
--- a/include/linux/ssb/ssb.h
+++ b/include/linux/ssb/ssb.h
@@ -486,6 +486,7 @@
 #endif /* EMBEDDED */
 #ifdef CONFIG_SSB_DRIVER_GPIO
 	struct gpio_chip gpio;
+	struct irq_domain *irq_domain;
 #endif /* DRIVER_GPIO */
 
 	/* Internal-only stuff follows. Do not touch. */
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 6eecfc2..04e7632 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -368,7 +368,7 @@
 	struct svc_program *	pg_next;	/* other programs (same xprt) */
 	u32			pg_prog;	/* program number */
 	unsigned int		pg_lovers;	/* lowest version */
-	unsigned int		pg_hivers;	/* lowest version */
+	unsigned int		pg_hivers;	/* highest version */
 	unsigned int		pg_nvers;	/* number of versions */
 	struct svc_version **	pg_vers;	/* version array */
 	char *			pg_name;	/* service name */
@@ -386,8 +386,10 @@
 	struct svc_procedure *	vs_proc;	/* per-procedure info */
 	u32			vs_xdrsize;	/* xdrsize needed for this version */
 
-	unsigned int		vs_hidden : 1;	/* Don't register with portmapper.
+	unsigned int		vs_hidden : 1,	/* Don't register with portmapper.
 						 * Only used for nfsacl so far. */
+				vs_rpcb_optnl:1;/* Don't care the result of register.
+						 * Only used for nfsv4. */
 
 	/* Override dispatch function (e.g. when caching replies).
 	 * A return value of 0 means drop the request. 
diff --git a/drivers/staging/zsmalloc/zsmalloc.h b/include/linux/zsmalloc.h
similarity index 96%
rename from drivers/staging/zsmalloc/zsmalloc.h
rename to include/linux/zsmalloc.h
index c2eb174..e44d634 100644
--- a/drivers/staging/zsmalloc/zsmalloc.h
+++ b/include/linux/zsmalloc.h
@@ -2,6 +2,7 @@
  * zsmalloc memory allocator
  *
  * Copyright (C) 2011  Nitin Gupta
+ * Copyright (C) 2012, 2013 Minchan Kim
  *
  * This code is released using a dual license strategy: BSD/GPL
  * You can choose the license that better fits your requirements.
diff --git a/include/trace/events/bcache.h b/include/trace/events/bcache.h
index e2b9576..7110897 100644
--- a/include/trace/events/bcache.h
+++ b/include/trace/events/bcache.h
@@ -24,10 +24,10 @@
 		__entry->dev		= bio->bi_bdev->bd_dev;
 		__entry->orig_major	= d->disk->major;
 		__entry->orig_minor	= d->disk->first_minor;
-		__entry->sector		= bio->bi_sector;
-		__entry->orig_sector	= bio->bi_sector - 16;
-		__entry->nr_sector	= bio->bi_size >> 9;
-		blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
+		__entry->sector		= bio->bi_iter.bi_sector;
+		__entry->orig_sector	= bio->bi_iter.bi_sector - 16;
+		__entry->nr_sector	= bio->bi_iter.bi_size >> 9;
+		blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_iter.bi_size);
 	),
 
 	TP_printk("%d,%d %s %llu + %u (from %d,%d @ %llu)",
@@ -99,9 +99,9 @@
 
 	TP_fast_assign(
 		__entry->dev		= bio->bi_bdev->bd_dev;
-		__entry->sector		= bio->bi_sector;
-		__entry->nr_sector	= bio->bi_size >> 9;
-		blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
+		__entry->sector		= bio->bi_iter.bi_sector;
+		__entry->nr_sector	= bio->bi_iter.bi_size >> 9;
+		blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_iter.bi_size);
 	),
 
 	TP_printk("%d,%d  %s %llu + %u",
@@ -134,9 +134,9 @@
 
 	TP_fast_assign(
 		__entry->dev		= bio->bi_bdev->bd_dev;
-		__entry->sector		= bio->bi_sector;
-		__entry->nr_sector	= bio->bi_size >> 9;
-		blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
+		__entry->sector		= bio->bi_iter.bi_sector;
+		__entry->nr_sector	= bio->bi_iter.bi_size >> 9;
+		blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_iter.bi_size);
 		__entry->cache_hit = hit;
 		__entry->bypass = bypass;
 	),
@@ -162,9 +162,9 @@
 
 	TP_fast_assign(
 		__entry->dev		= bio->bi_bdev->bd_dev;
-		__entry->sector		= bio->bi_sector;
-		__entry->nr_sector	= bio->bi_size >> 9;
-		blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
+		__entry->sector		= bio->bi_iter.bi_sector;
+		__entry->nr_sector	= bio->bi_iter.bi_size >> 9;
+		blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_iter.bi_size);
 		__entry->writeback = writeback;
 		__entry->bypass = bypass;
 	),
@@ -247,7 +247,7 @@
 	TP_fast_assign(
 		__entry->bucket	= PTR_BUCKET_NR(b->c, &b->key, 0);
 		__entry->block	= b->written;
-		__entry->keys	= b->sets[b->nsets].data->keys;
+		__entry->keys	= b->keys.set[b->keys.nsets].data->keys;
 	),
 
 	TP_printk("bucket %zu", __entry->bucket)
@@ -411,7 +411,7 @@
 	),
 
 	TP_fast_assign(
-		__entry->free		= fifo_used(&ca->free);
+		__entry->free		= fifo_used(&ca->free[RESERVE_NONE]);
 		__entry->free_inc	= fifo_used(&ca->free_inc);
 		__entry->free_inc_size	= ca->free_inc.size;
 		__entry->unused		= fifo_used(&ca->unused);
@@ -422,8 +422,8 @@
 );
 
 TRACE_EVENT(bcache_alloc_fail,
-	TP_PROTO(struct cache *ca),
-	TP_ARGS(ca),
+	TP_PROTO(struct cache *ca, unsigned reserve),
+	TP_ARGS(ca, reserve),
 
 	TP_STRUCT__entry(
 		__field(unsigned,	free			)
@@ -433,7 +433,7 @@
 	),
 
 	TP_fast_assign(
-		__entry->free		= fifo_used(&ca->free);
+		__entry->free		= fifo_used(&ca->free[reserve]);
 		__entry->free_inc	= fifo_used(&ca->free_inc);
 		__entry->unused		= fifo_used(&ca->unused);
 		__entry->blocked	= atomic_read(&ca->set->prio_blocked);
diff --git a/include/trace/events/block.h b/include/trace/events/block.h
index 4c2301d..e76ae19 100644
--- a/include/trace/events/block.h
+++ b/include/trace/events/block.h
@@ -243,9 +243,9 @@
 	TP_fast_assign(
 		__entry->dev		= bio->bi_bdev ?
 					  bio->bi_bdev->bd_dev : 0;
-		__entry->sector		= bio->bi_sector;
+		__entry->sector		= bio->bi_iter.bi_sector;
 		__entry->nr_sector	= bio_sectors(bio);
-		blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
+		blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_iter.bi_size);
 		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
 	),
 
@@ -280,10 +280,10 @@
 
 	TP_fast_assign(
 		__entry->dev		= bio->bi_bdev->bd_dev;
-		__entry->sector		= bio->bi_sector;
+		__entry->sector		= bio->bi_iter.bi_sector;
 		__entry->nr_sector	= bio_sectors(bio);
 		__entry->error		= error;
-		blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
+		blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_iter.bi_size);
 	),
 
 	TP_printk("%d,%d %s %llu + %u [%d]",
@@ -308,9 +308,9 @@
 
 	TP_fast_assign(
 		__entry->dev		= bio->bi_bdev->bd_dev;
-		__entry->sector		= bio->bi_sector;
+		__entry->sector		= bio->bi_iter.bi_sector;
 		__entry->nr_sector	= bio_sectors(bio);
-		blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
+		blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_iter.bi_size);
 		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
 	),
 
@@ -375,9 +375,9 @@
 
 	TP_fast_assign(
 		__entry->dev		= bio->bi_bdev->bd_dev;
-		__entry->sector		= bio->bi_sector;
+		__entry->sector		= bio->bi_iter.bi_sector;
 		__entry->nr_sector	= bio_sectors(bio);
-		blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
+		blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_iter.bi_size);
 		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
 	),
 
@@ -403,7 +403,7 @@
 
 	TP_fast_assign(
 		__entry->dev		= bio ? bio->bi_bdev->bd_dev : 0;
-		__entry->sector		= bio ? bio->bi_sector : 0;
+		__entry->sector		= bio ? bio->bi_iter.bi_sector : 0;
 		__entry->nr_sector	= bio ? bio_sectors(bio) : 0;
 		blk_fill_rwbs(__entry->rwbs,
 			      bio ? bio->bi_rw : 0, __entry->nr_sector);
@@ -538,9 +538,9 @@
 
 	TP_fast_assign(
 		__entry->dev		= bio->bi_bdev->bd_dev;
-		__entry->sector		= bio->bi_sector;
+		__entry->sector		= bio->bi_iter.bi_sector;
 		__entry->new_sector	= new_sector;
-		blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
+		blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_iter.bi_size);
 		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
 	),
 
@@ -579,11 +579,11 @@
 
 	TP_fast_assign(
 		__entry->dev		= bio->bi_bdev->bd_dev;
-		__entry->sector		= bio->bi_sector;
+		__entry->sector		= bio->bi_iter.bi_sector;
 		__entry->nr_sector	= bio_sectors(bio);
 		__entry->old_dev	= dev;
 		__entry->old_sector	= from;
-		blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
+		blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_iter.bi_size);
 	),
 
 	TP_printk("%d,%d %s %llu + %u <- (%d,%d) %llu",
diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
index 3b9f28d..67f38fa 100644
--- a/include/trace/events/f2fs.h
+++ b/include/trace/events/f2fs.h
@@ -629,8 +629,8 @@
 		__entry->dev		= sb->s_dev;
 		__entry->rw		= rw;
 		__entry->type		= type;
-		__entry->sector		= bio->bi_sector;
-		__entry->size		= bio->bi_size;
+		__entry->sector		= bio->bi_iter.bi_sector;
+		__entry->size		= bio->bi_iter.bi_size;
 	),
 
 	TP_printk("dev = (%d,%d), %s%s, %s, sector = %lld, size = %u",
diff --git a/include/uapi/linux/bcache.h b/include/uapi/linux/bcache.h
index 164a7e2..22b6ad3 100644
--- a/include/uapi/linux/bcache.h
+++ b/include/uapi/linux/bcache.h
@@ -39,6 +39,7 @@
 }
 
 #define KEY_SIZE_BITS		16
+#define KEY_MAX_U64S		8
 
 KEY_FIELD(KEY_PTRS,	high, 60, 3)
 KEY_FIELD(HEADER_SIZE,	high, 58, 2)
@@ -118,7 +119,7 @@
 	return (struct bkey *) (d + bkey_u64s(k));
 }
 
-static inline struct bkey *bkey_last(const struct bkey *k, unsigned nr_keys)
+static inline struct bkey *bkey_idx(const struct bkey *k, unsigned nr_keys)
 {
 	__u64 *d = (void *) k;
 	return (struct bkey *) (d + nr_keys);
diff --git a/include/uapi/linux/fd.h b/include/uapi/linux/fd.h
index f1f3dd5..84c517c 100644
--- a/include/uapi/linux/fd.h
+++ b/include/uapi/linux/fd.h
@@ -185,7 +185,8 @@
 				 * to clear media change status */
 	FD_UNUSED_BIT,
 	FD_DISK_CHANGED_BIT,	/* disk has been changed since last i/o */
-	FD_DISK_WRITABLE_BIT	/* disk is writable */
+	FD_DISK_WRITABLE_BIT,	/* disk is writable */
+	FD_OPEN_SHOULD_FAIL_BIT
 };
 
 #define FDSETDRVPRM _IOW(2, 0x90, struct floppy_drive_params)
diff --git a/kernel/power/block_io.c b/kernel/power/block_io.c
index d09dd10..9a58bc2 100644
--- a/kernel/power/block_io.c
+++ b/kernel/power/block_io.c
@@ -32,7 +32,7 @@
 	struct bio *bio;
 
 	bio = bio_alloc(__GFP_WAIT | __GFP_HIGH, 1);
-	bio->bi_sector = sector;
+	bio->bi_iter.bi_sector = sector;
 	bio->bi_bdev = bdev;
 	bio->bi_end_io = end_swap_bio_read;
 
diff --git a/kernel/smp.c b/kernel/smp.c
index bd9f940..ffee35b 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -23,17 +23,11 @@
 struct call_function_data {
 	struct call_single_data	__percpu *csd;
 	cpumask_var_t		cpumask;
-	cpumask_var_t		cpumask_ipi;
 };
 
 static DEFINE_PER_CPU_SHARED_ALIGNED(struct call_function_data, cfd_data);
 
-struct call_single_queue {
-	struct list_head	list;
-	raw_spinlock_t		lock;
-};
-
-static DEFINE_PER_CPU_SHARED_ALIGNED(struct call_single_queue, call_single_queue);
+static DEFINE_PER_CPU_SHARED_ALIGNED(struct llist_head, call_single_queue);
 
 static int
 hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu)
@@ -47,14 +41,8 @@
 		if (!zalloc_cpumask_var_node(&cfd->cpumask, GFP_KERNEL,
 				cpu_to_node(cpu)))
 			return notifier_from_errno(-ENOMEM);
-		if (!zalloc_cpumask_var_node(&cfd->cpumask_ipi, GFP_KERNEL,
-				cpu_to_node(cpu))) {
-			free_cpumask_var(cfd->cpumask);
-			return notifier_from_errno(-ENOMEM);
-		}
 		cfd->csd = alloc_percpu(struct call_single_data);
 		if (!cfd->csd) {
-			free_cpumask_var(cfd->cpumask_ipi);
 			free_cpumask_var(cfd->cpumask);
 			return notifier_from_errno(-ENOMEM);
 		}
@@ -67,7 +55,6 @@
 	case CPU_DEAD:
 	case CPU_DEAD_FROZEN:
 		free_cpumask_var(cfd->cpumask);
-		free_cpumask_var(cfd->cpumask_ipi);
 		free_percpu(cfd->csd);
 		break;
 #endif
@@ -85,12 +72,8 @@
 	void *cpu = (void *)(long)smp_processor_id();
 	int i;
 
-	for_each_possible_cpu(i) {
-		struct call_single_queue *q = &per_cpu(call_single_queue, i);
-
-		raw_spin_lock_init(&q->lock);
-		INIT_LIST_HEAD(&q->list);
-	}
+	for_each_possible_cpu(i)
+		init_llist_head(&per_cpu(call_single_queue, i));
 
 	hotplug_cfd(&hotplug_cfd_notifier, CPU_UP_PREPARE, cpu);
 	register_cpu_notifier(&hotplug_cfd_notifier);
@@ -141,18 +124,9 @@
  */
 static void generic_exec_single(int cpu, struct call_single_data *csd, int wait)
 {
-	struct call_single_queue *dst = &per_cpu(call_single_queue, cpu);
-	unsigned long flags;
-	int ipi;
-
 	if (wait)
 		csd->flags |= CSD_FLAG_WAIT;
 
-	raw_spin_lock_irqsave(&dst->lock, flags);
-	ipi = list_empty(&dst->list);
-	list_add_tail(&csd->list, &dst->list);
-	raw_spin_unlock_irqrestore(&dst->lock, flags);
-
 	/*
 	 * The list addition should be visible before sending the IPI
 	 * handler locks the list to pull the entry off it because of
@@ -164,7 +138,7 @@
 	 * locking and barrier primitives. Generic code isn't really
 	 * equipped to do the right thing...
 	 */
-	if (ipi)
+	if (llist_add(&csd->llist, &per_cpu(call_single_queue, cpu)))
 		arch_send_call_function_single_ipi(cpu);
 
 	if (wait)
@@ -177,27 +151,26 @@
  */
 void generic_smp_call_function_single_interrupt(void)
 {
-	struct call_single_queue *q = &__get_cpu_var(call_single_queue);
-	LIST_HEAD(list);
+	struct llist_node *entry, *next;
 
 	/*
 	 * Shouldn't receive this interrupt on a cpu that is not yet online.
 	 */
 	WARN_ON_ONCE(!cpu_online(smp_processor_id()));
 
-	raw_spin_lock(&q->lock);
-	list_replace_init(&q->list, &list);
-	raw_spin_unlock(&q->lock);
+	entry = llist_del_all(&__get_cpu_var(call_single_queue));
+	entry = llist_reverse_order(entry);
 
-	while (!list_empty(&list)) {
+	while (entry) {
 		struct call_single_data *csd;
 
-		csd = list_entry(list.next, struct call_single_data, list);
-		list_del(&csd->list);
+		next = entry->next;
 
+		csd = llist_entry(entry, struct call_single_data, llist);
 		csd->func(csd->info);
-
 		csd_unlock(csd);
+
+		entry = next;
 	}
 }
 
@@ -402,30 +375,17 @@
 	if (unlikely(!cpumask_weight(cfd->cpumask)))
 		return;
 
-	/*
-	 * After we put an entry into the list, cfd->cpumask may be cleared
-	 * again when another CPU sends another IPI for a SMP function call, so
-	 * cfd->cpumask will be zero.
-	 */
-	cpumask_copy(cfd->cpumask_ipi, cfd->cpumask);
-
 	for_each_cpu(cpu, cfd->cpumask) {
 		struct call_single_data *csd = per_cpu_ptr(cfd->csd, cpu);
-		struct call_single_queue *dst =
-					&per_cpu(call_single_queue, cpu);
-		unsigned long flags;
 
 		csd_lock(csd);
 		csd->func = func;
 		csd->info = info;
-
-		raw_spin_lock_irqsave(&dst->lock, flags);
-		list_add_tail(&csd->list, &dst->list);
-		raw_spin_unlock_irqrestore(&dst->lock, flags);
+		llist_add(&csd->llist, &per_cpu(call_single_queue, cpu));
 	}
 
 	/* Send a message to all CPUs in the map */
-	arch_send_call_function_ipi_mask(cfd->cpumask_ipi);
+	arch_send_call_function_ipi_mask(cfd->cpumask);
 
 	if (wait) {
 		for_each_cpu(cpu, cfd->cpumask) {
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index f785aef..b418cb0 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -781,8 +781,8 @@
 	if (!error && !bio_flagged(bio, BIO_UPTODATE))
 		error = EIO;
 
-	__blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, what,
-			error, 0, NULL);
+	__blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size,
+			bio->bi_rw, what, error, 0, NULL);
 }
 
 static void blk_add_trace_bio_bounce(void *ignore,
@@ -885,8 +885,9 @@
 	if (bt) {
 		__be64 rpdu = cpu_to_be64(pdu);
 
-		__blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw,
-				BLK_TA_SPLIT, !bio_flagged(bio, BIO_UPTODATE),
+		__blk_add_trace(bt, bio->bi_iter.bi_sector,
+				bio->bi_iter.bi_size, bio->bi_rw, BLK_TA_SPLIT,
+				!bio_flagged(bio, BIO_UPTODATE),
 				sizeof(rpdu), &rpdu);
 	}
 }
@@ -918,9 +919,9 @@
 	r.device_to   = cpu_to_be32(bio->bi_bdev->bd_dev);
 	r.sector_from = cpu_to_be64(from);
 
-	__blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw,
-			BLK_TA_REMAP, !bio_flagged(bio, BIO_UPTODATE),
-			sizeof(r), &r);
+	__blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size,
+			bio->bi_rw, BLK_TA_REMAP,
+			!bio_flagged(bio, BIO_UPTODATE), sizeof(r), &r);
 }
 
 /**
diff --git a/mm/Kconfig b/mm/Kconfig
index 723bbe0..2d9f150 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -552,3 +552,28 @@
 	  it can be cleared by hands.
 
 	  See Documentation/vm/soft-dirty.txt for more details.
+
+config ZSMALLOC
+	bool "Memory allocator for compressed pages"
+	depends on MMU
+	default n
+	help
+	  zsmalloc is a slab-based memory allocator designed to store
+	  compressed RAM pages.  zsmalloc uses virtual memory mapping
+	  in order to reduce fragmentation.  However, this results in a
+	  non-standard allocator interface where a handle, not a pointer, is
+	  returned by an alloc().  This handle must be mapped in order to
+	  access the allocated space.
+
+config PGTABLE_MAPPING
+	bool "Use page table mapping to access object in zsmalloc"
+	depends on ZSMALLOC
+	help
+	  By default, zsmalloc uses a copy-based object mapping method to
+	  access allocations that span two pages. However, if a particular
+	  architecture (ex, ARM) performs VM mapping faster than copying,
+	  then you should select this. This causes zsmalloc to use page table
+	  mapping rather than copying for object mapping.
+
+	  You can check speed with zsmalloc benchmark[1].
+	  [1] https://github.com/spartacus06/zsmalloc
diff --git a/mm/Makefile b/mm/Makefile
index 305d10a..310c90a 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -60,3 +60,4 @@
 obj-$(CONFIG_CLEANCACHE) += cleancache.o
 obj-$(CONFIG_MEMORY_ISOLATION) += page_isolation.o
 obj-$(CONFIG_ZBUD)	+= zbud.o
+obj-$(CONFIG_ZSMALLOC)	+= zsmalloc.o
diff --git a/mm/bounce.c b/mm/bounce.c
index 5a7d58f..523918b 100644
--- a/mm/bounce.c
+++ b/mm/bounce.c
@@ -98,27 +98,24 @@
 static void copy_to_high_bio_irq(struct bio *to, struct bio *from)
 {
 	unsigned char *vfrom;
-	struct bio_vec *tovec, *fromvec;
-	int i;
+	struct bio_vec tovec, *fromvec = from->bi_io_vec;
+	struct bvec_iter iter;
 
-	bio_for_each_segment(tovec, to, i) {
-		fromvec = from->bi_io_vec + i;
+	bio_for_each_segment(tovec, to, iter) {
+		if (tovec.bv_page != fromvec->bv_page) {
+			/*
+			 * fromvec->bv_offset and fromvec->bv_len might have
+			 * been modified by the block layer, so use the original
+			 * copy, bounce_copy_vec already uses tovec->bv_len
+			 */
+			vfrom = page_address(fromvec->bv_page) +
+				tovec.bv_offset;
 
-		/*
-		 * not bounced
-		 */
-		if (tovec->bv_page == fromvec->bv_page)
-			continue;
+			bounce_copy_vec(&tovec, vfrom);
+			flush_dcache_page(tovec.bv_page);
+		}
 
-		/*
-		 * fromvec->bv_offset and fromvec->bv_len might have been
-		 * modified by the block layer, so use the original copy,
-		 * bounce_copy_vec already uses tovec->bv_len
-		 */
-		vfrom = page_address(fromvec->bv_page) + tovec->bv_offset;
-
-		bounce_copy_vec(tovec, vfrom);
-		flush_dcache_page(tovec->bv_page);
+		fromvec++;
 	}
 }
 
@@ -201,13 +198,14 @@
 {
 	struct bio *bio;
 	int rw = bio_data_dir(*bio_orig);
-	struct bio_vec *to, *from;
+	struct bio_vec *to, from;
+	struct bvec_iter iter;
 	unsigned i;
 
 	if (force)
 		goto bounce;
-	bio_for_each_segment(from, *bio_orig, i)
-		if (page_to_pfn(from->bv_page) > queue_bounce_pfn(q))
+	bio_for_each_segment(from, *bio_orig, iter)
+		if (page_to_pfn(from.bv_page) > queue_bounce_pfn(q))
 			goto bounce;
 
 	return;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 19d5d42..53385cd 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3400,7 +3400,7 @@
 static struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg,
 						  struct kmem_cache *s)
 {
-	struct kmem_cache *new;
+	struct kmem_cache *new = NULL;
 	static char *tmp_name = NULL;
 	static DEFINE_MUTEX(mutex);	/* protects tmp_name */
 
@@ -3416,7 +3416,7 @@
 	if (!tmp_name) {
 		tmp_name = kmalloc(PATH_MAX, GFP_KERNEL);
 		if (!tmp_name)
-			return NULL;
+			goto out;
 	}
 
 	rcu_read_lock();
@@ -3426,12 +3426,11 @@
 
 	new = kmem_cache_create_memcg(memcg, tmp_name, s->object_size, s->align,
 				      (s->flags & ~SLAB_PANIC), s->ctor, s);
-
 	if (new)
 		new->allocflags |= __GFP_KMEMCG;
 	else
 		new = s;
-
+out:
 	mutex_unlock(&mutex);
 	return new;
 }
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 873de7e..ae3c8f3 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -2930,7 +2930,7 @@
 	unsigned short mode = MPOL_DEFAULT;
 	unsigned short flags = 0;
 
-	if (pol && pol != &default_policy) {
+	if (pol && pol != &default_policy && !(pol->flags & MPOL_F_MORON)) {
 		mode = pol->mode;
 		flags = pol->flags;
 	}
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 37b1b19..3291e82 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -178,7 +178,7 @@
 	 * implementation used by LSMs.
 	 */
 	if (has_capability_noaudit(p, CAP_SYS_ADMIN))
-		adj -= 30;
+		points -= (points * 3) / 100;
 
 	/* Normalize to oom_score_adj units */
 	adj *= totalpages / 1000;
diff --git a/mm/page_io.c b/mm/page_io.c
index 7247be6..7c59ef6 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -31,13 +31,13 @@
 
 	bio = bio_alloc(gfp_flags, 1);
 	if (bio) {
-		bio->bi_sector = map_swap_page(page, &bio->bi_bdev);
-		bio->bi_sector <<= PAGE_SHIFT - 9;
+		bio->bi_iter.bi_sector = map_swap_page(page, &bio->bi_bdev);
+		bio->bi_iter.bi_sector <<= PAGE_SHIFT - 9;
 		bio->bi_io_vec[0].bv_page = page;
 		bio->bi_io_vec[0].bv_len = PAGE_SIZE;
 		bio->bi_io_vec[0].bv_offset = 0;
 		bio->bi_vcnt = 1;
-		bio->bi_size = PAGE_SIZE;
+		bio->bi_iter.bi_size = PAGE_SIZE;
 		bio->bi_end_io = end_io;
 	}
 	return bio;
@@ -62,7 +62,7 @@
 		printk(KERN_ALERT "Write-error on swap-device (%u:%u:%Lu)\n",
 				imajor(bio->bi_bdev->bd_inode),
 				iminor(bio->bi_bdev->bd_inode),
-				(unsigned long long)bio->bi_sector);
+				(unsigned long long)bio->bi_iter.bi_sector);
 		ClearPageReclaim(page);
 	}
 	end_page_writeback(page);
@@ -80,7 +80,7 @@
 		printk(KERN_ALERT "Read-error on swap-device (%u:%u:%Lu)\n",
 				imajor(bio->bi_bdev->bd_inode),
 				iminor(bio->bi_bdev->bd_inode),
-				(unsigned long long)bio->bi_sector);
+				(unsigned long long)bio->bi_iter.bi_sector);
 		goto out;
 	}
 
diff --git a/mm/slub.c b/mm/slub.c
index 545a170..2b1a697 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -355,6 +355,21 @@
 	__bit_spin_unlock(PG_locked, &page->flags);
 }
 
+static inline void set_page_slub_counters(struct page *page, unsigned long counters_new)
+{
+	struct page tmp;
+	tmp.counters = counters_new;
+	/*
+	 * page->counters can cover frozen/inuse/objects as well
+	 * as page->_count.  If we assign to ->counters directly
+	 * we run the risk of losing updates to page->_count, so
+	 * be careful and only assign to the fields we need.
+	 */
+	page->frozen  = tmp.frozen;
+	page->inuse   = tmp.inuse;
+	page->objects = tmp.objects;
+}
+
 /* Interrupts must be disabled (for the fallback code to work right) */
 static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
 		void *freelist_old, unsigned long counters_old,
@@ -376,7 +391,7 @@
 		if (page->freelist == freelist_old &&
 					page->counters == counters_old) {
 			page->freelist = freelist_new;
-			page->counters = counters_new;
+			set_page_slub_counters(page, counters_new);
 			slab_unlock(page);
 			return 1;
 		}
@@ -415,7 +430,7 @@
 		if (page->freelist == freelist_old &&
 					page->counters == counters_old) {
 			page->freelist = freelist_new;
-			page->counters = counters_new;
+			set_page_slub_counters(page, counters_new);
 			slab_unlock(page);
 			local_irq_restore(flags);
 			return 1;
diff --git a/drivers/staging/zsmalloc/zsmalloc-main.c b/mm/zsmalloc.c
similarity index 98%
rename from drivers/staging/zsmalloc/zsmalloc-main.c
rename to mm/zsmalloc.c
index 7660c87..c03ca5e 100644
--- a/drivers/staging/zsmalloc/zsmalloc-main.c
+++ b/mm/zsmalloc.c
@@ -2,6 +2,7 @@
  * zsmalloc memory allocator
  *
  * Copyright (C) 2011  Nitin Gupta
+ * Copyright (C) 2012, 2013 Minchan Kim
  *
  * This code is released using a dual license strategy: BSD/GPL
  * You can choose the license that better fits your requirements.
@@ -90,8 +91,7 @@
 #include <linux/hardirq.h>
 #include <linux/spinlock.h>
 #include <linux/types.h>
-
-#include "zsmalloc.h"
+#include <linux/zsmalloc.h>
 
 /*
  * This must be power of 2 and greater than of equal to sizeof(link_free).
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 2ed1304..0e478a0 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -778,13 +778,12 @@
 
 	bio = data->bio;
 	BUG_ON(!bio);
-	BUG_ON(!bio->bi_vcnt);
 
 	cursor->resid = min(length, data->bio_length);
 	cursor->bio = bio;
-	cursor->vector_index = 0;
-	cursor->vector_offset = 0;
-	cursor->last_piece = length <= bio->bi_io_vec[0].bv_len;
+	cursor->bvec_iter = bio->bi_iter;
+	cursor->last_piece =
+		cursor->resid <= bio_iter_len(bio, cursor->bvec_iter);
 }
 
 static struct page *ceph_msg_data_bio_next(struct ceph_msg_data_cursor *cursor,
@@ -793,71 +792,63 @@
 {
 	struct ceph_msg_data *data = cursor->data;
 	struct bio *bio;
-	struct bio_vec *bio_vec;
-	unsigned int index;
+	struct bio_vec bio_vec;
 
 	BUG_ON(data->type != CEPH_MSG_DATA_BIO);
 
 	bio = cursor->bio;
 	BUG_ON(!bio);
 
-	index = cursor->vector_index;
-	BUG_ON(index >= (unsigned int) bio->bi_vcnt);
+	bio_vec = bio_iter_iovec(bio, cursor->bvec_iter);
 
-	bio_vec = &bio->bi_io_vec[index];
-	BUG_ON(cursor->vector_offset >= bio_vec->bv_len);
-	*page_offset = (size_t) (bio_vec->bv_offset + cursor->vector_offset);
+	*page_offset = (size_t) bio_vec.bv_offset;
 	BUG_ON(*page_offset >= PAGE_SIZE);
 	if (cursor->last_piece) /* pagelist offset is always 0 */
 		*length = cursor->resid;
 	else
-		*length = (size_t) (bio_vec->bv_len - cursor->vector_offset);
+		*length = (size_t) bio_vec.bv_len;
 	BUG_ON(*length > cursor->resid);
 	BUG_ON(*page_offset + *length > PAGE_SIZE);
 
-	return bio_vec->bv_page;
+	return bio_vec.bv_page;
 }
 
 static bool ceph_msg_data_bio_advance(struct ceph_msg_data_cursor *cursor,
 					size_t bytes)
 {
 	struct bio *bio;
-	struct bio_vec *bio_vec;
-	unsigned int index;
+	struct bio_vec bio_vec;
 
 	BUG_ON(cursor->data->type != CEPH_MSG_DATA_BIO);
 
 	bio = cursor->bio;
 	BUG_ON(!bio);
 
-	index = cursor->vector_index;
-	BUG_ON(index >= (unsigned int) bio->bi_vcnt);
-	bio_vec = &bio->bi_io_vec[index];
+	bio_vec = bio_iter_iovec(bio, cursor->bvec_iter);
 
 	/* Advance the cursor offset */
 
 	BUG_ON(cursor->resid < bytes);
 	cursor->resid -= bytes;
-	cursor->vector_offset += bytes;
-	if (cursor->vector_offset < bio_vec->bv_len)
+
+	bio_advance_iter(bio, &cursor->bvec_iter, bytes);
+
+	if (bytes < bio_vec.bv_len)
 		return false;	/* more bytes to process in this segment */
-	BUG_ON(cursor->vector_offset != bio_vec->bv_len);
 
 	/* Move on to the next segment, and possibly the next bio */
 
-	if (++index == (unsigned int) bio->bi_vcnt) {
+	if (!cursor->bvec_iter.bi_size) {
 		bio = bio->bi_next;
-		index = 0;
+		cursor->bvec_iter = bio->bi_iter;
 	}
 	cursor->bio = bio;
-	cursor->vector_index = index;
-	cursor->vector_offset = 0;
 
 	if (!cursor->last_piece) {
 		BUG_ON(!cursor->resid);
 		BUG_ON(!bio);
 		/* A short read is OK, so use <= rather than == */
-		if (cursor->resid <= bio->bi_io_vec[index].bv_len)
+		if (cursor->resid <= bio_iter_len(bio, cursor->bvec_iter))
 			cursor->last_piece = true;
 	}
 
diff --git a/net/compat.c b/net/compat.c
index dd32e34..f50161f 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -780,21 +780,16 @@
 	if (flags & MSG_CMSG_COMPAT)
 		return -EINVAL;
 
-	if (COMPAT_USE_64BIT_TIME)
-		return __sys_recvmmsg(fd, (struct mmsghdr __user *)mmsg, vlen,
-				      flags | MSG_CMSG_COMPAT,
-				      (struct timespec *) timeout);
-
 	if (timeout == NULL)
 		return __sys_recvmmsg(fd, (struct mmsghdr __user *)mmsg, vlen,
 				      flags | MSG_CMSG_COMPAT, NULL);
 
-	if (get_compat_timespec(&ktspec, timeout))
+	if (compat_get_timespec(&ktspec, timeout))
 		return -EFAULT;
 
 	datagrams = __sys_recvmmsg(fd, (struct mmsghdr __user *)mmsg, vlen,
 				   flags | MSG_CMSG_COMPAT, &ktspec);
-	if (datagrams > 0 && put_compat_timespec(&ktspec, timeout))
+	if (datagrams > 0 && compat_put_timespec(&ktspec, timeout))
 		datagrams = -EFAULT;
 
 	return datagrams;
diff --git a/net/sunrpc/auth_gss/gss_krb5_keys.c b/net/sunrpc/auth_gss/gss_krb5_keys.c
index 76e42e6..24589bd 100644
--- a/net/sunrpc/auth_gss/gss_krb5_keys.c
+++ b/net/sunrpc/auth_gss/gss_krb5_keys.c
@@ -59,6 +59,7 @@
 #include <linux/crypto.h>
 #include <linux/sunrpc/gss_krb5.h>
 #include <linux/sunrpc/xdr.h>
+#include <linux/lcm.h>
 
 #ifdef RPC_DEBUG
 # define RPCDBG_FACILITY        RPCDBG_AUTH
@@ -72,7 +73,7 @@
 static void krb5_nfold(u32 inbits, const u8 *in,
 		       u32 outbits, u8 *out)
 {
-	int a, b, c, lcm;
+	unsigned long ulcm;
 	int byte, i, msbit;
 
 	/* the code below is more readable if I make these bytes
@@ -82,17 +83,7 @@
 	outbits >>= 3;
 
 	/* first compute lcm(n,k) */
-
-	a = outbits;
-	b = inbits;
-
-	while (b != 0) {
-		c = b;
-		b = a%b;
-		a = c;
-	}
-
-	lcm = outbits*inbits/a;
+	ulcm = lcm(inbits, outbits);
 
 	/* now do the real work */
 
@@ -101,7 +92,7 @@
 
 	/* this will end up cycling through k lcm(k,n)/k times, which
 	   is correct */
-	for (i = lcm-1; i >= 0; i--) {
+	for (i = ulcm-1; i >= 0; i--) {
 		/* compute the msbit in k which gets added into this byte */
 		msbit = (
 			/* first, start with the msbit in the first,
diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.c b/net/sunrpc/auth_gss/gss_rpc_upcall.c
index 458f85e..abbb7dc 100644
--- a/net/sunrpc/auth_gss/gss_rpc_upcall.c
+++ b/net/sunrpc/auth_gss/gss_rpc_upcall.c
@@ -137,7 +137,6 @@
 {
 	mutex_init(&sn->gssp_lock);
 	sn->gssp_clnt = NULL;
-	init_waitqueue_head(&sn->gssp_wq);
 }
 
 int set_gssp_clnt(struct net *net)
@@ -154,7 +153,6 @@
 		sn->gssp_clnt = clnt;
 	}
 	mutex_unlock(&sn->gssp_lock);
-	wake_up(&sn->gssp_wq);
 	return ret;
 }
 
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 008cdad..0f73f45 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -1263,66 +1263,35 @@
 	return ret;
 }
 
-DEFINE_SPINLOCK(use_gssp_lock);
+/*
+ * Try to set the sn->use_gss_proxy variable to a new value. We only allow
+ * it to be changed if it's currently undefined (-1). If it's any other value
+ * then return -EBUSY unless the type wouldn't have changed anyway.
+ */
+static int set_gss_proxy(struct net *net, int type)
+{
+	struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
+	int ret;
+
+	WARN_ON_ONCE(type != 0 && type != 1);
+	ret = cmpxchg(&sn->use_gss_proxy, -1, type);
+	if (ret != -1 && ret != type)
+		return -EBUSY;
+	return 0;
+}
 
 static bool use_gss_proxy(struct net *net)
 {
 	struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
 
-	if (sn->use_gss_proxy != -1)
-		return sn->use_gss_proxy;
-	spin_lock(&use_gssp_lock);
-	/*
-	 * If you wanted gss-proxy, you should have said so before
-	 * starting to accept requests:
-	 */
-	sn->use_gss_proxy = 0;
-	spin_unlock(&use_gssp_lock);
-	return 0;
+	/* If use_gss_proxy is still undefined, then try to disable it */
+	if (sn->use_gss_proxy == -1)
+		set_gss_proxy(net, 0);
+	return sn->use_gss_proxy;
 }
 
 #ifdef CONFIG_PROC_FS
 
-static int set_gss_proxy(struct net *net, int type)
-{
-	struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
-	int ret = 0;
-
-	WARN_ON_ONCE(type != 0 && type != 1);
-	spin_lock(&use_gssp_lock);
-	if (sn->use_gss_proxy == -1 || sn->use_gss_proxy == type)
-		sn->use_gss_proxy = type;
-	else
-		ret = -EBUSY;
-	spin_unlock(&use_gssp_lock);
-	wake_up(&sn->gssp_wq);
-	return ret;
-}
-
-static inline bool gssp_ready(struct sunrpc_net *sn)
-{
-	switch (sn->use_gss_proxy) {
-		case -1:
-			return false;
-		case 0:
-			return true;
-		case 1:
-			return sn->gssp_clnt;
-	}
-	WARN_ON_ONCE(1);
-	return false;
-}
-
-static int wait_for_gss_proxy(struct net *net, struct file *file)
-{
-	struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
-
-	if (file->f_flags & O_NONBLOCK && !gssp_ready(sn))
-		return -EAGAIN;
-	return wait_event_interruptible(sn->gssp_wq, gssp_ready(sn));
-}
-
-
 static ssize_t write_gssp(struct file *file, const char __user *buf,
 			 size_t count, loff_t *ppos)
 {
@@ -1342,10 +1311,10 @@
 		return res;
 	if (i != 1)
 		return -EINVAL;
-	res = set_gss_proxy(net, 1);
+	res = set_gssp_clnt(net);
 	if (res)
 		return res;
-	res = set_gssp_clnt(net);
+	res = set_gss_proxy(net, 1);
 	if (res)
 		return res;
 	return count;
@@ -1355,16 +1324,12 @@
 			 size_t count, loff_t *ppos)
 {
 	struct net *net = PDE_DATA(file_inode(file));
+	struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
 	unsigned long p = *ppos;
 	char tbuf[10];
 	size_t len;
-	int ret;
 
-	ret = wait_for_gss_proxy(net, file);
-	if (ret)
-		return ret;
-
-	snprintf(tbuf, sizeof(tbuf), "%d\n", use_gss_proxy(net));
+	snprintf(tbuf, sizeof(tbuf), "%d\n", sn->use_gss_proxy);
 	len = strlen(tbuf);
 	if (p >= len)
 		return 0;
@@ -1626,8 +1591,7 @@
 	BUG_ON(integ_len % 4);
 	*p++ = htonl(integ_len);
 	*p++ = htonl(gc->gc_seq);
-	if (xdr_buf_subsegment(resbuf, &integ_buf, integ_offset,
-				integ_len))
+	if (xdr_buf_subsegment(resbuf, &integ_buf, integ_offset, integ_len))
 		BUG();
 	if (resbuf->tail[0].iov_base == NULL) {
 		if (resbuf->head[0].iov_len + RPC_MAX_AUTH_SIZE > PAGE_SIZE)
@@ -1635,10 +1599,8 @@
 		resbuf->tail[0].iov_base = resbuf->head[0].iov_base
 						+ resbuf->head[0].iov_len;
 		resbuf->tail[0].iov_len = 0;
-		resv = &resbuf->tail[0];
-	} else {
-		resv = &resbuf->tail[0];
 	}
+	resv = &resbuf->tail[0];
 	mic.data = (u8 *)resv->iov_base + resv->iov_len + 4;
 	if (gss_get_mic(gsd->rsci->mechctx, &integ_buf, &mic))
 		goto out_err;
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index e521d20..ae333c1 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -1111,9 +1111,7 @@
 		*bp++ = 'x';
 		len -= 2;
 		while (blen && len >= 2) {
-			unsigned char c = *buf++;
-			*bp++ = '0' + ((c&0xf0)>>4) + (c>=0xa0)*('a'-'9'-1);
-			*bp++ = '0' + (c&0x0f) + ((c&0x0f)>=0x0a)*('a'-'9'-1);
+			bp = hex_byte_pack(bp, *buf++);
 			len -= 2;
 			blen--;
 		}
diff --git a/net/sunrpc/netns.h b/net/sunrpc/netns.h
index 94e506f..df58268 100644
--- a/net/sunrpc/netns.h
+++ b/net/sunrpc/netns.h
@@ -27,7 +27,6 @@
 	unsigned int rpcb_is_af_local : 1;
 
 	struct mutex gssp_lock;
-	wait_queue_head_t gssp_wq;
 	struct rpc_clnt *gssp_clnt;
 	int use_gss_proxy;
 	int pipe_version;
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index e7fbe36..5de6801 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -916,9 +916,6 @@
 #endif
 	}
 
-	if (error < 0)
-		printk(KERN_WARNING "svc: failed to register %sv%u RPC "
-			"service (errno %d).\n", progname, version, -error);
 	return error;
 }
 
@@ -937,6 +934,7 @@
 		 const unsigned short port)
 {
 	struct svc_program	*progp;
+	struct svc_version	*vers;
 	unsigned int		i;
 	int			error = 0;
 
@@ -946,7 +944,8 @@
 
 	for (progp = serv->sv_program; progp; progp = progp->pg_next) {
 		for (i = 0; i < progp->pg_nvers; i++) {
-			if (progp->pg_vers[i] == NULL)
+			vers = progp->pg_vers[i];
+			if (vers == NULL)
 				continue;
 
 			dprintk("svc: svc_register(%sv%d, %s, %u, %u)%s\n",
@@ -955,16 +954,26 @@
 					proto == IPPROTO_UDP?  "udp" : "tcp",
 					port,
 					family,
-					progp->pg_vers[i]->vs_hidden?
-						" (but not telling portmap)" : "");
+					vers->vs_hidden ?
+					" (but not telling portmap)" : "");
 
-			if (progp->pg_vers[i]->vs_hidden)
+			if (vers->vs_hidden)
 				continue;
 
 			error = __svc_register(net, progp->pg_name, progp->pg_prog,
 						i, family, proto, port);
-			if (error < 0)
+
+			if (vers->vs_rpcb_optnl) {
+				error = 0;
+				continue;
+			}
+
+			if (error < 0) {
+				printk(KERN_WARNING "svc: failed to register "
+					"%sv%u RPC service (errno %d).\n",
+					progp->pg_name, i, -error);
 				break;
+			}
 		}
 	}
 
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 2a7ca8f..817a1e5 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -2964,10 +2964,9 @@
 
 	/*
 	 * Once we've associated a backchannel xprt with a connection,
-	 * we want to keep it around as long as long as the connection
-	 * lasts, in case we need to start using it for a backchannel
-	 * again; this reference won't be dropped until bc_xprt is
-	 * destroyed.
+	 * we want to keep it around as long as the connection lasts,
+	 * in case we need to start using it for a backchannel again;
+	 * this reference won't be dropped until bc_xprt is destroyed.
 	 */
 	xprt_get(xprt);
 	args->bc_xprt->xpt_bc_xprt = xprt;
diff --git a/scripts/setlocalversion b/scripts/setlocalversion
index d105a44..63d91e2 100755
--- a/scripts/setlocalversion
+++ b/scripts/setlocalversion
@@ -43,7 +43,8 @@
 	fi
 
 	# Check for git and a git repo.
-	if test -d .git && head=`git rev-parse --verify --short HEAD 2>/dev/null`; then
+	if test -z "$(git rev-parse --show-cdup 2>/dev/null)" &&
+	   head=`git rev-parse --verify --short HEAD 2>/dev/null`; then
 
 		# If we are at a tagged commit (like "v2.6.30-rc6"), we ignore
 		# it, because this version is defined in the top level Makefile.